VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 54315

Last change on this file since 54315 was 54315, checked in by vboxsync, 10 years ago

SUPDrv: Eliminated the need for the GIP master to have a zero TSC delta value. Fixed incorrect RTCPUSET indexing (don't use idCpu!). Documented the tsc delta thred states. Made it transition from terminating to destroyed. Renamed supdrvMeasureTscDeltas to supdrvMeasureInitialTscDeltas and dropped the unused pidxMaster parameter.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 316.4 KB
Line 
1/* $Id: SUPDrv.c 54315 2015-02-19 21:33:21Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63#include <VBox/vmm/hm_svm.h>
64#include <VBox/vmm/hm_vmx.h>
65
66#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
67# include "dtrace/SUPDrv.h"
68#else
69# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
70# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
71# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
72# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
73#endif
74
75/*
76 * Logging assignments:
77 * Log - useful stuff, like failures.
78 * LogFlow - program flow, except the really noisy bits.
79 * Log2 - Cleanup.
80 * Log3 - Loader flow noise.
81 * Log4 - Call VMMR0 flow noise.
82 * Log5 - Native yet-to-be-defined noise.
83 * Log6 - Native ioctl flow noise.
84 *
85 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
86 * instantiation in log-vbox.c(pp).
87 */
88
89
90/*******************************************************************************
91* Defined Constants And Macros *
92*******************************************************************************/
93/** The frequency by which we recalculate the u32UpdateHz and
94 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
95 *
96 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
97 */
98#define GIP_UPDATEHZ_RECALC_FREQ 0x800
99
100/** A reserved TSC value used for synchronization as well as measurement of
101 * TSC deltas. */
102#define GIP_TSC_DELTA_RSVD UINT64_MAX
103/** The number of TSC delta measurement loops in total (includes primer and
104 * read-time loops). */
105#define GIP_TSC_DELTA_LOOPS 96
106/** The number of cache primer loops. */
107#define GIP_TSC_DELTA_PRIMER_LOOPS 4
108/** The number of loops until we keep computing the minumum read time. */
109#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
110/** Stop measurement of TSC delta. */
111#define GIP_TSC_DELTA_SYNC_STOP 0
112/** Start measurement of TSC delta. */
113#define GIP_TSC_DELTA_SYNC_START 1
114/** Worker thread is ready for reading the TSC. */
115#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
116/** Worker thread is done updating TSC delta info. */
117#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
118/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
119 * with a timeout. */
120#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
121/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
122 * master with a timeout. */
123#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
124/** The TSC-refinement interval in seconds. */
125#define GIP_TSC_REFINE_INTERVAL 5
126/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
127#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
128/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
129#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
130
131AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
132AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
133
134/** @def VBOX_SVN_REV
135 * The makefile should define this if it can. */
136#ifndef VBOX_SVN_REV
137# define VBOX_SVN_REV 0
138#endif
139
140#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
141# define DO_NOT_START_GIP
142#endif
143
144/*******************************************************************************
145* Internal Functions *
146*******************************************************************************/
147static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
148static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
149static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
150static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
151static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
152static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
153static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
154static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt);
155static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
156static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
157static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
158static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
159static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
160static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
161DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
162DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
163static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
164static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
165static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
166static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq);
167static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq);
168static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
169static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
170static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
171static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
172static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
173static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
174 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
175static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
176static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
177static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
178static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
179 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
180static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
181static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
182static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
183static int supdrvIOCtl_ResumeSuspendedKbds(void);
184
185
186/*******************************************************************************
187* Global Variables *
188*******************************************************************************/
189DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
190
191
192/**
193 * Array of the R0 SUP API.
194 *
195 * While making changes to these exports, make sure to update the IOC
196 * minor version (SUPDRV_IOC_VERSION).
197 */
198static SUPFUNC g_aFunctions[] =
199{
200/* SED: START */
201 /* name function */
202 /* Entries with absolute addresses determined at runtime, fixup
203 code makes ugly ASSUMPTIONS about the order here: */
204 { "SUPR0AbsIs64bit", (void *)0 },
205 { "SUPR0Abs64bitKernelCS", (void *)0 },
206 { "SUPR0Abs64bitKernelSS", (void *)0 },
207 { "SUPR0Abs64bitKernelDS", (void *)0 },
208 { "SUPR0AbsKernelCS", (void *)0 },
209 { "SUPR0AbsKernelSS", (void *)0 },
210 { "SUPR0AbsKernelDS", (void *)0 },
211 { "SUPR0AbsKernelES", (void *)0 },
212 { "SUPR0AbsKernelFS", (void *)0 },
213 { "SUPR0AbsKernelGS", (void *)0 },
214 /* Normal function pointers: */
215 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
216 { "SUPGetGIP", (void *)SUPGetGIP },
217 { "SUPReadTscWithDelta", (void *)SUPReadTscWithDelta },
218 { "SUPGetTscDeltaSlow", (void *)SUPGetTscDeltaSlow },
219 { "SUPGetCpuHzFromGipForAsyncMode", (void *)SUPGetCpuHzFromGipForAsyncMode },
220 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
221 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
222 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
223 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
224 { "SUPR0ContFree", (void *)SUPR0ContFree },
225 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
226 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
227 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
228 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
229 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
230 { "SUPR0GetSvmUsability", (void *)SUPR0GetSvmUsability },
231 { "SUPR0GetVmxUsability", (void *)SUPR0GetVmxUsability },
232 { "SUPR0LockMem", (void *)SUPR0LockMem },
233 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
234 { "SUPR0LowFree", (void *)SUPR0LowFree },
235 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
236 { "SUPR0MemFree", (void *)SUPR0MemFree },
237 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
238 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
239 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
240 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
241 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
242 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
243 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
244 { "SUPR0PageFree", (void *)SUPR0PageFree },
245 { "SUPR0Printf", (void *)SUPR0Printf },
246 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
247 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
248 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
249 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
250 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
251 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
252 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
253 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
254 { "SUPSemEventClose", (void *)SUPSemEventClose },
255 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
256 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
257 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
258 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
259 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
260 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
261 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
262 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
263 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
264 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
265 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
266 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
267 { "SUPSemEventWait", (void *)SUPSemEventWait },
268 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
269 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
270 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
271
272 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
273 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
274 { "RTAssertMsg1", (void *)RTAssertMsg1 },
275 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
276 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
277 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
278 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
279 { "RTCrc32", (void *)RTCrc32 },
280 { "RTCrc32Finish", (void *)RTCrc32Finish },
281 { "RTCrc32Process", (void *)RTCrc32Process },
282 { "RTCrc32Start", (void *)RTCrc32Start },
283 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
284 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
285 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
286 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
287 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
288 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
289 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
290 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
291 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
292 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
293 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
294 { "RTLogPrintfV", (void *)RTLogPrintfV },
295 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
296 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
297 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
298 { "RTMemAllocTag", (void *)RTMemAllocTag },
299 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
300 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
301 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
302 { "RTMemDupExTag", (void *)RTMemDupExTag },
303 { "RTMemDupTag", (void *)RTMemDupTag },
304 { "RTMemFree", (void *)RTMemFree },
305 { "RTMemFreeEx", (void *)RTMemFreeEx },
306 { "RTMemReallocTag", (void *)RTMemReallocTag },
307 { "RTMpCpuId", (void *)RTMpCpuId },
308 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
309 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
310 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
311 { "RTMpGetCount", (void *)RTMpGetCount },
312 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
313 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
314 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
315 { "RTMpGetSet", (void *)RTMpGetSet },
316 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
317 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
318 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
319 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
320 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
321 { "RTMpOnAll", (void *)RTMpOnAll },
322 { "RTMpOnOthers", (void *)RTMpOnOthers },
323 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
324 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
325 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
326 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
327 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
328 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
329 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
330 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
331 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
332 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
333 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
334 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
335 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
336 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
337 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
338 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
339 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
340 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
341 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
342 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
343 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
344 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
345 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
346 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
347 { "RTProcSelf", (void *)RTProcSelf },
348 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
349 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
350 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
351 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
352 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
353 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
354 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
355 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
356 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
357 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
358 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
359 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
360 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
361 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
362 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
363 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
364 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
365 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
366 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
367 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
368 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
369 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
370 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
371 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
372 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
373 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
374 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
375 { "RTSemEventCreate", (void *)RTSemEventCreate },
376 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
377 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
378 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
379 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
380 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
381 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
382 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
383 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
384 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
385 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
386 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
387 { "RTSemEventSignal", (void *)RTSemEventSignal },
388 { "RTSemEventWait", (void *)RTSemEventWait },
389 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
390 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
391 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
392 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
393 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
394 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
395 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
396 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
397 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
398 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
399 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
400 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
401 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
402 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
403 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
404 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
405 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
406 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
407 { "RTStrCopy", (void *)RTStrCopy },
408 { "RTStrDupTag", (void *)RTStrDupTag },
409 { "RTStrFormat", (void *)RTStrFormat },
410 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
411 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
412 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
413 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
414 { "RTStrFormatV", (void *)RTStrFormatV },
415 { "RTStrFree", (void *)RTStrFree },
416 { "RTStrNCmp", (void *)RTStrNCmp },
417 { "RTStrPrintf", (void *)RTStrPrintf },
418 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
419 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
420 { "RTStrPrintfV", (void *)RTStrPrintfV },
421 { "RTThreadCreate", (void *)RTThreadCreate },
422 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
423 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
424 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
425 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
426 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
427 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
428 { "RTThreadGetName", (void *)RTThreadGetName },
429 { "RTThreadGetNative", (void *)RTThreadGetNative },
430 { "RTThreadGetType", (void *)RTThreadGetType },
431 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
432 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
433 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
434 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
435 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
436 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
437 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
438 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
439 { "RTThreadSelf", (void *)RTThreadSelf },
440 { "RTThreadSelfName", (void *)RTThreadSelfName },
441 { "RTThreadSleep", (void *)RTThreadSleep },
442 { "RTThreadUserReset", (void *)RTThreadUserReset },
443 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
444 { "RTThreadUserWait", (void *)RTThreadUserWait },
445 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
446 { "RTThreadWait", (void *)RTThreadWait },
447 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
448 { "RTThreadYield", (void *)RTThreadYield },
449 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
450 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
451 { "RTTimeNow", (void *)RTTimeNow },
452 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
453 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
454 { "RTTimerCreate", (void *)RTTimerCreate },
455 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
456 { "RTTimerDestroy", (void *)RTTimerDestroy },
457 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
458 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
459 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
460 { "RTTimerStart", (void *)RTTimerStart },
461 { "RTTimerStop", (void *)RTTimerStop },
462 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
463 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
464 { "RTUuidCompare", (void *)RTUuidCompare },
465 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
466 { "RTUuidFromStr", (void *)RTUuidFromStr },
467/* SED: END */
468};
469
470#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
471/**
472 * Drag in the rest of IRPT since we share it with the
473 * rest of the kernel modules on darwin.
474 */
475PFNRT g_apfnVBoxDrvIPRTDeps[] =
476{
477 /* VBoxNetAdp */
478 (PFNRT)RTRandBytes,
479 /* VBoxUSB */
480 (PFNRT)RTPathStripFilename,
481 NULL
482};
483#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
484
485
486/**
487 * Initializes the device extentsion structure.
488 *
489 * @returns IPRT status code.
490 * @param pDevExt The device extension to initialize.
491 * @param cbSession The size of the session structure. The size of
492 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
493 * defined because we're skipping the OS specific members
494 * then.
495 */
496int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
497{
498 int rc;
499
500#ifdef SUPDRV_WITH_RELEASE_LOGGER
501 /*
502 * Create the release log.
503 */
504 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
505 PRTLOGGER pRelLogger;
506 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
507 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
508 if (RT_SUCCESS(rc))
509 RTLogRelSetDefaultInstance(pRelLogger);
510 /** @todo Add native hook for getting logger config parameters and setting
511 * them. On linux we should use the module parameter stuff... */
512#endif
513
514 /*
515 * Initialize it.
516 */
517 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
518 pDevExt->Spinlock = NIL_RTSPINLOCK;
519 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
520 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
521 pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
522 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
523 if (RT_SUCCESS(rc))
524 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
525 if (RT_SUCCESS(rc))
526 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
527
528 if (RT_SUCCESS(rc))
529#ifdef SUPDRV_USE_MUTEX_FOR_LDR
530 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
531#else
532 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
533#endif
534 if (RT_SUCCESS(rc))
535 {
536 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
537 if (RT_SUCCESS(rc))
538 {
539#ifdef SUPDRV_USE_MUTEX_FOR_LDR
540 rc = RTSemMutexCreate(&pDevExt->mtxGip);
541#else
542 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
543#endif
544 if (RT_SUCCESS(rc))
545 {
546 rc = supdrvGipCreate(pDevExt);
547 if (RT_SUCCESS(rc))
548 {
549 rc = supdrvTracerInit(pDevExt);
550 if (RT_SUCCESS(rc))
551 {
552 pDevExt->pLdrInitImage = NULL;
553 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
554 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
555 pDevExt->cbSession = (uint32_t)cbSession;
556
557 /*
558 * Fixup the absolute symbols.
559 *
560 * Because of the table indexing assumptions we'll have a little #ifdef orgy
561 * here rather than distributing this to OS specific files. At least for now.
562 */
563#ifdef RT_OS_DARWIN
564# if ARCH_BITS == 32
565 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
566 {
567 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
568 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
569 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
570 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
571 }
572 else
573 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
574 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
575 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
576 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
577 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
578 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
579 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
580# else /* 64-bit darwin: */
581 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
582 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
583 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
584 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
585 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
586 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
587 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
588 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
589 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
590 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
591
592# endif
593#else /* !RT_OS_DARWIN */
594# if ARCH_BITS == 64
595 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
596 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
597 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
598 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
599# else
600 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
601# endif
602 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
603 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
604 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
605 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
606 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
607 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
608#endif /* !RT_OS_DARWIN */
609 return VINF_SUCCESS;
610 }
611
612 supdrvGipDestroy(pDevExt);
613 }
614
615#ifdef SUPDRV_USE_MUTEX_FOR_GIP
616 RTSemMutexDestroy(pDevExt->mtxGip);
617 pDevExt->mtxGip = NIL_RTSEMMUTEX;
618#else
619 RTSemFastMutexDestroy(pDevExt->mtxGip);
620 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
621#endif
622 }
623 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
624 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
625 }
626#ifdef SUPDRV_USE_MUTEX_FOR_LDR
627 RTSemMutexDestroy(pDevExt->mtxLdr);
628 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
629#else
630 RTSemFastMutexDestroy(pDevExt->mtxLdr);
631 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
632#endif
633 }
634
635 RTSpinlockDestroy(pDevExt->Spinlock);
636 pDevExt->Spinlock = NIL_RTSPINLOCK;
637 RTSpinlockDestroy(pDevExt->hGipSpinlock);
638 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
639 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
640 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
641
642#ifdef SUPDRV_WITH_RELEASE_LOGGER
643 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
644 RTLogDestroy(RTLogSetDefaultInstance(NULL));
645#endif
646
647 return rc;
648}
649
650
651/**
652 * Delete the device extension (e.g. cleanup members).
653 *
654 * @param pDevExt The device extension to delete.
655 */
656void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
657{
658 PSUPDRVOBJ pObj;
659 PSUPDRVUSAGE pUsage;
660
661 /*
662 * Kill mutexes and spinlocks.
663 */
664#ifdef SUPDRV_USE_MUTEX_FOR_GIP
665 RTSemMutexDestroy(pDevExt->mtxGip);
666 pDevExt->mtxGip = NIL_RTSEMMUTEX;
667#else
668 RTSemFastMutexDestroy(pDevExt->mtxGip);
669 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
670#endif
671#ifdef SUPDRV_USE_MUTEX_FOR_LDR
672 RTSemMutexDestroy(pDevExt->mtxLdr);
673 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
674#else
675 RTSemFastMutexDestroy(pDevExt->mtxLdr);
676 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
677#endif
678 RTSpinlockDestroy(pDevExt->Spinlock);
679 pDevExt->Spinlock = NIL_RTSPINLOCK;
680 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
681 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
682 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
683 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
684
685 /*
686 * Free lists.
687 */
688 /* objects. */
689 pObj = pDevExt->pObjs;
690 Assert(!pObj); /* (can trigger on forced unloads) */
691 pDevExt->pObjs = NULL;
692 while (pObj)
693 {
694 void *pvFree = pObj;
695 pObj = pObj->pNext;
696 RTMemFree(pvFree);
697 }
698
699 /* usage records. */
700 pUsage = pDevExt->pUsageFree;
701 pDevExt->pUsageFree = NULL;
702 while (pUsage)
703 {
704 void *pvFree = pUsage;
705 pUsage = pUsage->pNext;
706 RTMemFree(pvFree);
707 }
708
709 /* kill the GIP. */
710 supdrvGipDestroy(pDevExt);
711 RTSpinlockDestroy(pDevExt->hGipSpinlock);
712 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
713
714 supdrvTracerTerm(pDevExt);
715
716#ifdef SUPDRV_WITH_RELEASE_LOGGER
717 /* destroy the loggers. */
718 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
719 RTLogDestroy(RTLogSetDefaultInstance(NULL));
720#endif
721}
722
723
724/**
725 * Create session.
726 *
727 * @returns IPRT status code.
728 * @param pDevExt Device extension.
729 * @param fUser Flag indicating whether this is a user or kernel
730 * session.
731 * @param fUnrestricted Unrestricted access (system) or restricted access
732 * (user)?
733 * @param ppSession Where to store the pointer to the session data.
734 */
735int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
736{
737 int rc;
738 PSUPDRVSESSION pSession;
739
740 if (!SUP_IS_DEVEXT_VALID(pDevExt))
741 return VERR_INVALID_PARAMETER;
742
743 /*
744 * Allocate memory for the session data.
745 */
746 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
747 if (pSession)
748 {
749 /* Initialize session data. */
750 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
751 if (!rc)
752 {
753 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
754 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
755 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
756 if (RT_SUCCESS(rc))
757 {
758 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
759 pSession->pDevExt = pDevExt;
760 pSession->u32Cookie = BIRD_INV;
761 pSession->fUnrestricted = fUnrestricted;
762 /*pSession->fInHashTable = false; */
763 pSession->cRefs = 1;
764 /*pSession->pCommonNextHash = NULL;
765 pSession->ppOsSessionPtr = NULL; */
766 if (fUser)
767 {
768 pSession->Process = RTProcSelf();
769 pSession->R0Process = RTR0ProcHandleSelf();
770 }
771 else
772 {
773 pSession->Process = NIL_RTPROCESS;
774 pSession->R0Process = NIL_RTR0PROCESS;
775 }
776 /*pSession->pLdrUsage = NULL;
777 pSession->pVM = NULL;
778 pSession->pUsage = NULL;
779 pSession->pGip = NULL;
780 pSession->fGipReferenced = false;
781 pSession->Bundle.cUsed = 0; */
782 pSession->Uid = NIL_RTUID;
783 pSession->Gid = NIL_RTGID;
784 /*pSession->uTracerData = 0;*/
785 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
786 RTListInit(&pSession->TpProviders);
787 /*pSession->cTpProviders = 0;*/
788 /*pSession->cTpProbesFiring = 0;*/
789 RTListInit(&pSession->TpUmods);
790 /*RT_ZERO(pSession->apTpLookupTable);*/
791
792 VBOXDRV_SESSION_CREATE(pSession, fUser);
793 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
794 return VINF_SUCCESS;
795 }
796
797 RTSpinlockDestroy(pSession->Spinlock);
798 }
799 RTMemFree(pSession);
800 *ppSession = NULL;
801 Log(("Failed to create spinlock, rc=%d!\n", rc));
802 }
803 else
804 rc = VERR_NO_MEMORY;
805
806 return rc;
807}
808
809
810/**
811 * Cleans up the session in the context of the process to which it belongs, the
812 * caller will free the session and the session spinlock.
813 *
814 * This should normally occur when the session is closed or as the process
815 * exits. Careful reference counting in the OS specfic code makes sure that
816 * there cannot be any races between process/handle cleanup callbacks and
817 * threads doing I/O control calls.
818 *
819 * @param pDevExt The device extension.
820 * @param pSession Session data.
821 */
822static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
823{
824 int rc;
825 PSUPDRVBUNDLE pBundle;
826 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
827
828 Assert(!pSession->fInHashTable);
829 Assert(!pSession->ppOsSessionPtr);
830 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
831 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
832
833 /*
834 * Remove logger instances related to this session.
835 */
836 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
837
838 /*
839 * Destroy the handle table.
840 */
841 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
842 AssertRC(rc);
843 pSession->hHandleTable = NIL_RTHANDLETABLE;
844
845 /*
846 * Release object references made in this session.
847 * In theory there should be noone racing us in this session.
848 */
849 Log2(("release objects - start\n"));
850 if (pSession->pUsage)
851 {
852 PSUPDRVUSAGE pUsage;
853 RTSpinlockAcquire(pDevExt->Spinlock);
854
855 while ((pUsage = pSession->pUsage) != NULL)
856 {
857 PSUPDRVOBJ pObj = pUsage->pObj;
858 pSession->pUsage = pUsage->pNext;
859
860 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
861 if (pUsage->cUsage < pObj->cUsage)
862 {
863 pObj->cUsage -= pUsage->cUsage;
864 RTSpinlockRelease(pDevExt->Spinlock);
865 }
866 else
867 {
868 /* Destroy the object and free the record. */
869 if (pDevExt->pObjs == pObj)
870 pDevExt->pObjs = pObj->pNext;
871 else
872 {
873 PSUPDRVOBJ pObjPrev;
874 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
875 if (pObjPrev->pNext == pObj)
876 {
877 pObjPrev->pNext = pObj->pNext;
878 break;
879 }
880 Assert(pObjPrev);
881 }
882 RTSpinlockRelease(pDevExt->Spinlock);
883
884 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
885 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
886 if (pObj->pfnDestructor)
887 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
888 RTMemFree(pObj);
889 }
890
891 /* free it and continue. */
892 RTMemFree(pUsage);
893
894 RTSpinlockAcquire(pDevExt->Spinlock);
895 }
896
897 RTSpinlockRelease(pDevExt->Spinlock);
898 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
899 }
900 Log2(("release objects - done\n"));
901
902 /*
903 * Do tracer cleanups related to this session.
904 */
905 Log2(("release tracer stuff - start\n"));
906 supdrvTracerCleanupSession(pDevExt, pSession);
907 Log2(("release tracer stuff - end\n"));
908
909 /*
910 * Release memory allocated in the session.
911 *
912 * We do not serialize this as we assume that the application will
913 * not allocated memory while closing the file handle object.
914 */
915 Log2(("freeing memory:\n"));
916 pBundle = &pSession->Bundle;
917 while (pBundle)
918 {
919 PSUPDRVBUNDLE pToFree;
920 unsigned i;
921
922 /*
923 * Check and unlock all entries in the bundle.
924 */
925 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
926 {
927 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
928 {
929 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
930 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
931 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
932 {
933 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
934 AssertRC(rc); /** @todo figure out how to handle this. */
935 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
936 }
937 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
938 AssertRC(rc); /** @todo figure out how to handle this. */
939 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
940 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
941 }
942 }
943
944 /*
945 * Advance and free previous bundle.
946 */
947 pToFree = pBundle;
948 pBundle = pBundle->pNext;
949
950 pToFree->pNext = NULL;
951 pToFree->cUsed = 0;
952 if (pToFree != &pSession->Bundle)
953 RTMemFree(pToFree);
954 }
955 Log2(("freeing memory - done\n"));
956
957 /*
958 * Deregister component factories.
959 */
960 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
961 Log2(("deregistering component factories:\n"));
962 if (pDevExt->pComponentFactoryHead)
963 {
964 PSUPDRVFACTORYREG pPrev = NULL;
965 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
966 while (pCur)
967 {
968 if (pCur->pSession == pSession)
969 {
970 /* unlink it */
971 PSUPDRVFACTORYREG pNext = pCur->pNext;
972 if (pPrev)
973 pPrev->pNext = pNext;
974 else
975 pDevExt->pComponentFactoryHead = pNext;
976
977 /* free it */
978 pCur->pNext = NULL;
979 pCur->pSession = NULL;
980 pCur->pFactory = NULL;
981 RTMemFree(pCur);
982
983 /* next */
984 pCur = pNext;
985 }
986 else
987 {
988 /* next */
989 pPrev = pCur;
990 pCur = pCur->pNext;
991 }
992 }
993 }
994 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
995 Log2(("deregistering component factories - done\n"));
996
997 /*
998 * Loaded images needs to be dereferenced and possibly freed up.
999 */
1000 supdrvLdrLock(pDevExt);
1001 Log2(("freeing images:\n"));
1002 if (pSession->pLdrUsage)
1003 {
1004 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1005 pSession->pLdrUsage = NULL;
1006 while (pUsage)
1007 {
1008 void *pvFree = pUsage;
1009 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1010 if (pImage->cUsage > pUsage->cUsage)
1011 pImage->cUsage -= pUsage->cUsage;
1012 else
1013 supdrvLdrFree(pDevExt, pImage);
1014 pUsage->pImage = NULL;
1015 pUsage = pUsage->pNext;
1016 RTMemFree(pvFree);
1017 }
1018 }
1019 supdrvLdrUnlock(pDevExt);
1020 Log2(("freeing images - done\n"));
1021
1022 /*
1023 * Unmap the GIP.
1024 */
1025 Log2(("umapping GIP:\n"));
1026 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1027 {
1028 SUPR0GipUnmap(pSession);
1029 pSession->fGipReferenced = 0;
1030 }
1031 Log2(("umapping GIP - done\n"));
1032}
1033
1034
1035/**
1036 * Common code for freeing a session when the reference count reaches zero.
1037 *
1038 * @param pDevExt Device extension.
1039 * @param pSession Session data.
1040 * This data will be freed by this routine.
1041 */
1042static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1043{
1044 VBOXDRV_SESSION_CLOSE(pSession);
1045
1046 /*
1047 * Cleanup the session first.
1048 */
1049 supdrvCleanupSession(pDevExt, pSession);
1050 supdrvOSCleanupSession(pDevExt, pSession);
1051
1052 /*
1053 * Free the rest of the session stuff.
1054 */
1055 RTSpinlockDestroy(pSession->Spinlock);
1056 pSession->Spinlock = NIL_RTSPINLOCK;
1057 pSession->pDevExt = NULL;
1058 RTMemFree(pSession);
1059 LogFlow(("supdrvDestroySession: returns\n"));
1060}
1061
1062
1063/**
1064 * Inserts the session into the global hash table.
1065 *
1066 * @retval VINF_SUCCESS on success.
1067 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1068 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1069 * session (asserted).
1070 * @retval VERR_DUPLICATE if there is already a session for that pid.
1071 *
1072 * @param pDevExt The device extension.
1073 * @param pSession The session.
1074 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1075 * available and used. This will set to point to the
1076 * session while under the protection of the session
1077 * hash table spinlock. It will also be kept in
1078 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1079 * cleanup use.
1080 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1081 */
1082int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1083 void *pvUser)
1084{
1085 PSUPDRVSESSION pCur;
1086 unsigned iHash;
1087
1088 /*
1089 * Validate input.
1090 */
1091 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1092 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1093
1094 /*
1095 * Calculate the hash table index and acquire the spinlock.
1096 */
1097 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1098
1099 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1100
1101 /*
1102 * If there are a collisions, we need to carefully check if we got a
1103 * duplicate. There can only be one open session per process.
1104 */
1105 pCur = pDevExt->apSessionHashTab[iHash];
1106 if (pCur)
1107 {
1108 while (pCur && pCur->Process != pSession->Process)
1109 pCur = pCur->pCommonNextHash;
1110
1111 if (pCur)
1112 {
1113 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1114 if (pCur == pSession)
1115 {
1116 Assert(pSession->fInHashTable);
1117 AssertFailed();
1118 return VERR_WRONG_ORDER;
1119 }
1120 Assert(!pSession->fInHashTable);
1121 if (pCur->R0Process == pSession->R0Process)
1122 return VERR_RESOURCE_IN_USE;
1123 return VERR_DUPLICATE;
1124 }
1125 }
1126 Assert(!pSession->fInHashTable);
1127 Assert(!pSession->ppOsSessionPtr);
1128
1129 /*
1130 * Insert it, doing a callout to the OS specific code in case it has
1131 * anything it wishes to do while we're holding the spinlock.
1132 */
1133 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1134 pDevExt->apSessionHashTab[iHash] = pSession;
1135 pSession->fInHashTable = true;
1136 ASMAtomicIncS32(&pDevExt->cSessions);
1137
1138 pSession->ppOsSessionPtr = ppOsSessionPtr;
1139 if (ppOsSessionPtr)
1140 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1141
1142 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1143
1144 /*
1145 * Retain a reference for the pointer in the session table.
1146 */
1147 ASMAtomicIncU32(&pSession->cRefs);
1148
1149 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1150 return VINF_SUCCESS;
1151}
1152
1153
1154/**
1155 * Removes the session from the global hash table.
1156 *
1157 * @retval VINF_SUCCESS on success.
1158 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1159 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1160 * session (asserted).
1161 *
1162 * @param pDevExt The device extension.
1163 * @param pSession The session. The caller is expected to have a reference
1164 * to this so it won't croak on us when we release the hash
1165 * table reference.
1166 * @param pvUser OS specific context value for the
1167 * supdrvOSSessionHashTabInserted callback.
1168 */
1169int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1170{
1171 PSUPDRVSESSION pCur;
1172 unsigned iHash;
1173 int32_t cRefs;
1174
1175 /*
1176 * Validate input.
1177 */
1178 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1179 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1180
1181 /*
1182 * Calculate the hash table index and acquire the spinlock.
1183 */
1184 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1185
1186 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1187
1188 /*
1189 * Unlink it.
1190 */
1191 pCur = pDevExt->apSessionHashTab[iHash];
1192 if (pCur == pSession)
1193 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1194 else
1195 {
1196 PSUPDRVSESSION pPrev = pCur;
1197 while (pCur && pCur != pSession)
1198 {
1199 pPrev = pCur;
1200 pCur = pCur->pCommonNextHash;
1201 }
1202 if (pCur)
1203 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1204 else
1205 {
1206 Assert(!pSession->fInHashTable);
1207 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1208 return VERR_NOT_FOUND;
1209 }
1210 }
1211
1212 pSession->pCommonNextHash = NULL;
1213 pSession->fInHashTable = false;
1214
1215 ASMAtomicDecS32(&pDevExt->cSessions);
1216
1217 /*
1218 * Clear OS specific session pointer if available and do the OS callback.
1219 */
1220 if (pSession->ppOsSessionPtr)
1221 {
1222 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1223 pSession->ppOsSessionPtr = NULL;
1224 }
1225
1226 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1227
1228 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1229
1230 /*
1231 * Drop the reference the hash table had to the session. This shouldn't
1232 * be the last reference!
1233 */
1234 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1235 Assert(cRefs > 0 && cRefs < _1M);
1236 if (cRefs == 0)
1237 supdrvDestroySession(pDevExt, pSession);
1238
1239 return VINF_SUCCESS;
1240}
1241
1242
1243/**
1244 * Looks up the session for the current process in the global hash table or in
1245 * OS specific pointer.
1246 *
1247 * @returns Pointer to the session with a reference that the caller must
1248 * release. If no valid session was found, NULL is returned.
1249 *
1250 * @param pDevExt The device extension.
1251 * @param Process The process ID.
1252 * @param R0Process The ring-0 process handle.
1253 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1254 * this is used instead of the hash table. For
1255 * additional safety it must then be equal to the
1256 * SUPDRVSESSION::ppOsSessionPtr member.
1257 * This can be NULL even if the OS has a session
1258 * pointer.
1259 */
1260PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1261 PSUPDRVSESSION *ppOsSessionPtr)
1262{
1263 PSUPDRVSESSION pCur;
1264 unsigned iHash;
1265
1266 /*
1267 * Validate input.
1268 */
1269 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1270
1271 /*
1272 * Calculate the hash table index and acquire the spinlock.
1273 */
1274 iHash = SUPDRV_SESSION_HASH(Process);
1275
1276 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1277
1278 /*
1279 * If an OS session pointer is provided, always use it.
1280 */
1281 if (ppOsSessionPtr)
1282 {
1283 pCur = *ppOsSessionPtr;
1284 if ( pCur
1285 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1286 || pCur->Process != Process
1287 || pCur->R0Process != R0Process) )
1288 pCur = NULL;
1289 }
1290 else
1291 {
1292 /*
1293 * Otherwise, do the hash table lookup.
1294 */
1295 pCur = pDevExt->apSessionHashTab[iHash];
1296 while ( pCur
1297 && ( pCur->Process != Process
1298 || pCur->R0Process != R0Process) )
1299 pCur = pCur->pCommonNextHash;
1300 }
1301
1302 /*
1303 * Retain the session.
1304 */
1305 if (pCur)
1306 {
1307 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1308 NOREF(cRefs);
1309 Assert(cRefs > 1 && cRefs < _1M);
1310 }
1311
1312 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1313
1314 return pCur;
1315}
1316
1317
1318/**
1319 * Retain a session to make sure it doesn't go away while it is in use.
1320 *
1321 * @returns New reference count on success, UINT32_MAX on failure.
1322 * @param pSession Session data.
1323 */
1324uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1325{
1326 uint32_t cRefs;
1327 AssertPtrReturn(pSession, UINT32_MAX);
1328 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1329
1330 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1331 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1332 return cRefs;
1333}
1334
1335
1336/**
1337 * Releases a given session.
1338 *
1339 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1340 * @param pSession Session data.
1341 */
1342uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1343{
1344 uint32_t cRefs;
1345 AssertPtrReturn(pSession, UINT32_MAX);
1346 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1347
1348 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1349 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1350 if (cRefs == 0)
1351 supdrvDestroySession(pSession->pDevExt, pSession);
1352 return cRefs;
1353}
1354
1355
1356/**
1357 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1358 *
1359 * @returns IPRT status code, see SUPR0ObjAddRef.
1360 * @param hHandleTable The handle table handle. Ignored.
1361 * @param pvObj The object pointer.
1362 * @param pvCtx Context, the handle type. Ignored.
1363 * @param pvUser Session pointer.
1364 */
1365static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1366{
1367 NOREF(pvCtx);
1368 NOREF(hHandleTable);
1369 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1370}
1371
1372
1373/**
1374 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1375 *
1376 * @param hHandleTable The handle table handle. Ignored.
1377 * @param h The handle value. Ignored.
1378 * @param pvObj The object pointer.
1379 * @param pvCtx Context, the handle type. Ignored.
1380 * @param pvUser Session pointer.
1381 */
1382static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1383{
1384 NOREF(pvCtx);
1385 NOREF(h);
1386 NOREF(hHandleTable);
1387 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1388}
1389
1390
1391/**
1392 * Fast path I/O Control worker.
1393 *
1394 * @returns VBox status code that should be passed down to ring-3 unchanged.
1395 * @param uIOCtl Function number.
1396 * @param idCpu VMCPU id.
1397 * @param pDevExt Device extention.
1398 * @param pSession Session data.
1399 */
1400int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1401{
1402 /*
1403 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1404 */
1405 if (RT_LIKELY( RT_VALID_PTR(pSession)
1406 && pSession->pVM
1407 && pDevExt->pfnVMMR0EntryFast))
1408 {
1409 switch (uIOCtl)
1410 {
1411 case SUP_IOCTL_FAST_DO_RAW_RUN:
1412 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1413 break;
1414 case SUP_IOCTL_FAST_DO_HM_RUN:
1415 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1416 break;
1417 case SUP_IOCTL_FAST_DO_NOP:
1418 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1419 break;
1420 default:
1421 return VERR_INTERNAL_ERROR;
1422 }
1423 return VINF_SUCCESS;
1424 }
1425 return VERR_INTERNAL_ERROR;
1426}
1427
1428
1429/**
1430 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1431 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1432 * list, see http://www.kerneldrivers.org/RHEL5.
1433 *
1434 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1435 * @param pszStr String to check
1436 * @param pszChars Character set
1437 */
1438static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1439{
1440 int chCur;
1441 while ((chCur = *pszStr++) != '\0')
1442 {
1443 int ch;
1444 const char *psz = pszChars;
1445 while ((ch = *psz++) != '\0')
1446 if (ch == chCur)
1447 return 1;
1448
1449 }
1450 return 0;
1451}
1452
1453
1454
1455/**
1456 * I/O Control inner worker (tracing reasons).
1457 *
1458 * @returns IPRT status code.
1459 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1460 *
1461 * @param uIOCtl Function number.
1462 * @param pDevExt Device extention.
1463 * @param pSession Session data.
1464 * @param pReqHdr The request header.
1465 */
1466static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1467{
1468 /*
1469 * Validation macros
1470 */
1471#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1472 do { \
1473 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1474 { \
1475 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1476 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1477 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1478 } \
1479 } while (0)
1480
1481#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1482
1483#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1484 do { \
1485 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1486 { \
1487 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1488 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1489 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1490 } \
1491 } while (0)
1492
1493#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1494 do { \
1495 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1496 { \
1497 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1498 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1499 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1500 } \
1501 } while (0)
1502
1503#define REQ_CHECK_EXPR(Name, expr) \
1504 do { \
1505 if (RT_UNLIKELY(!(expr))) \
1506 { \
1507 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1508 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1509 } \
1510 } while (0)
1511
1512#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1513 do { \
1514 if (RT_UNLIKELY(!(expr))) \
1515 { \
1516 OSDBGPRINT( fmt ); \
1517 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1518 } \
1519 } while (0)
1520
1521 /*
1522 * The switch.
1523 */
1524 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1525 {
1526 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1527 {
1528 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1529 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1530 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1531 {
1532 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1533 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1534 return 0;
1535 }
1536
1537#if 0
1538 /*
1539 * Call out to the OS specific code and let it do permission checks on the
1540 * client process.
1541 */
1542 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1543 {
1544 pReq->u.Out.u32Cookie = 0xffffffff;
1545 pReq->u.Out.u32SessionCookie = 0xffffffff;
1546 pReq->u.Out.u32SessionVersion = 0xffffffff;
1547 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1548 pReq->u.Out.pSession = NULL;
1549 pReq->u.Out.cFunctions = 0;
1550 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1551 return 0;
1552 }
1553#endif
1554
1555 /*
1556 * Match the version.
1557 * The current logic is very simple, match the major interface version.
1558 */
1559 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1560 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1561 {
1562 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1563 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1564 pReq->u.Out.u32Cookie = 0xffffffff;
1565 pReq->u.Out.u32SessionCookie = 0xffffffff;
1566 pReq->u.Out.u32SessionVersion = 0xffffffff;
1567 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1568 pReq->u.Out.pSession = NULL;
1569 pReq->u.Out.cFunctions = 0;
1570 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1571 return 0;
1572 }
1573
1574 /*
1575 * Fill in return data and be gone.
1576 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1577 * u32SessionVersion <= u32ReqVersion!
1578 */
1579 /** @todo Somehow validate the client and negotiate a secure cookie... */
1580 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1581 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1582 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1583 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1584 pReq->u.Out.pSession = pSession;
1585 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1586 pReq->Hdr.rc = VINF_SUCCESS;
1587 return 0;
1588 }
1589
1590 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1591 {
1592 /* validate */
1593 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1594 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1595
1596 /* execute */
1597 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1598 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1599 pReq->Hdr.rc = VINF_SUCCESS;
1600 return 0;
1601 }
1602
1603 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1604 {
1605 /* validate */
1606 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1607 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1608 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1609 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1610 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1611
1612 /* execute */
1613 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1614 if (RT_FAILURE(pReq->Hdr.rc))
1615 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1616 return 0;
1617 }
1618
1619 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1620 {
1621 /* validate */
1622 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1623 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1624
1625 /* execute */
1626 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1627 return 0;
1628 }
1629
1630 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1631 {
1632 /* validate */
1633 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1634 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1635
1636 /* execute */
1637 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1638 if (RT_FAILURE(pReq->Hdr.rc))
1639 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1640 return 0;
1641 }
1642
1643 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1644 {
1645 /* validate */
1646 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1647 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1648
1649 /* execute */
1650 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1651 return 0;
1652 }
1653
1654 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1655 {
1656 /* validate */
1657 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1658 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1659 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1660 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1661 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1662 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1663 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1664 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1665 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1666 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1667 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1668
1669 /* execute */
1670 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1671 return 0;
1672 }
1673
1674 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1675 {
1676 /* validate */
1677 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1678 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1679 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1680 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1681 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1682 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1683 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1684 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1685 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1686 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1687 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1688 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1689 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1690 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1691 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1692
1693 if (pReq->u.In.cSymbols)
1694 {
1695 uint32_t i;
1696 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1697 for (i = 0; i < pReq->u.In.cSymbols; i++)
1698 {
1699 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1700 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1701 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1702 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1703 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1704 pReq->u.In.cbStrTab - paSyms[i].offName),
1705 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1706 }
1707 }
1708
1709 /* execute */
1710 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1711 return 0;
1712 }
1713
1714 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1715 {
1716 /* validate */
1717 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1718 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1719
1720 /* execute */
1721 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1722 return 0;
1723 }
1724
1725 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOCK_DOWN):
1726 {
1727 /* validate */
1728 REQ_CHECK_SIZES(SUP_IOCTL_LDR_LOCK_DOWN);
1729
1730 /* execute */
1731 pReqHdr->rc = supdrvIOCtl_LdrLockDown(pDevExt);
1732 return 0;
1733 }
1734
1735 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1736 {
1737 /* validate */
1738 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1739 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1740 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1741
1742 /* execute */
1743 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1744 return 0;
1745 }
1746
1747 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1748 {
1749 /* validate */
1750 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1751 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1752 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1753
1754 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1755 {
1756 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1757
1758 /* execute */
1759 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1760 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1761 else
1762 pReq->Hdr.rc = VERR_WRONG_ORDER;
1763 }
1764 else
1765 {
1766 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1767 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1768 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1769 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1770 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1771
1772 /* execute */
1773 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1774 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1775 else
1776 pReq->Hdr.rc = VERR_WRONG_ORDER;
1777 }
1778
1779 if ( RT_FAILURE(pReq->Hdr.rc)
1780 && pReq->Hdr.rc != VERR_INTERRUPTED
1781 && pReq->Hdr.rc != VERR_TIMEOUT)
1782 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1783 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1784 else
1785 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1786 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1787 return 0;
1788 }
1789
1790 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1791 {
1792 /* validate */
1793 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1794 PSUPVMMR0REQHDR pVMMReq;
1795 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1796 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1797
1798 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1799 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1800 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1801 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1802 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1803
1804 /* execute */
1805 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1806 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1807 else
1808 pReq->Hdr.rc = VERR_WRONG_ORDER;
1809
1810 if ( RT_FAILURE(pReq->Hdr.rc)
1811 && pReq->Hdr.rc != VERR_INTERRUPTED
1812 && pReq->Hdr.rc != VERR_TIMEOUT)
1813 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1814 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1815 else
1816 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1817 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1818 return 0;
1819 }
1820
1821 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1822 {
1823 /* validate */
1824 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1825 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1826
1827 /* execute */
1828 pReq->Hdr.rc = VINF_SUCCESS;
1829 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1830 return 0;
1831 }
1832
1833 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1834 {
1835 /* validate */
1836 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1837 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1838 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1839
1840 /* execute */
1841 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1842 if (RT_FAILURE(pReq->Hdr.rc))
1843 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1844 return 0;
1845 }
1846
1847 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1848 {
1849 /* validate */
1850 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1851 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1852
1853 /* execute */
1854 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1855 return 0;
1856 }
1857
1858 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1859 {
1860 /* validate */
1861 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1862 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1863
1864 /* execute */
1865 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1866 if (RT_SUCCESS(pReq->Hdr.rc))
1867 pReq->u.Out.pGipR0 = pDevExt->pGip;
1868 return 0;
1869 }
1870
1871 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1872 {
1873 /* validate */
1874 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1875 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1876
1877 /* execute */
1878 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1879 return 0;
1880 }
1881
1882 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1883 {
1884 /* validate */
1885 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1886 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1887 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1888 || ( VALID_PTR(pReq->u.In.pVMR0)
1889 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1890 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1891 /* execute */
1892 pSession->pVM = pReq->u.In.pVMR0;
1893 pReq->Hdr.rc = VINF_SUCCESS;
1894 return 0;
1895 }
1896
1897 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1898 {
1899 /* validate */
1900 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1901 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1902 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1903 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1904 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1905 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1906 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1907 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1908 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1909
1910 /* execute */
1911 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1912 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1913 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1914 &pReq->u.Out.aPages[0]);
1915 if (RT_FAILURE(pReq->Hdr.rc))
1916 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1917 return 0;
1918 }
1919
1920 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1921 {
1922 /* validate */
1923 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1924 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1925 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1926 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1927 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1928 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1929
1930 /* execute */
1931 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1932 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1933 if (RT_FAILURE(pReq->Hdr.rc))
1934 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1935 return 0;
1936 }
1937
1938 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1939 {
1940 /* validate */
1941 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1942 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1943 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1944 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1945 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1946 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1947 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1948
1949 /* execute */
1950 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1951 return 0;
1952 }
1953
1954 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1955 {
1956 /* validate */
1957 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1958 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1959
1960 /* execute */
1961 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1962 return 0;
1963 }
1964
1965 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1966 {
1967 /* validate */
1968 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1969 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1970 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1971
1972 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1973 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1974 else
1975 {
1976 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1977 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1978 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1979 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1980 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1981 }
1982 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1983
1984 /* execute */
1985 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1986 return 0;
1987 }
1988
1989 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
1990 {
1991 /* validate */
1992 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
1993 size_t cbStrTab;
1994 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
1995 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
1996 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
1997 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
1998 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
1999 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
2000 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
2001 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
2002 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
2003 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
2004 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
2005
2006 /* execute */
2007 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2008 return 0;
2009 }
2010
2011 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2012 {
2013 /* validate */
2014 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2015 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2016 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2017
2018 /* execute */
2019 switch (pReq->u.In.uType)
2020 {
2021 case SUP_SEM_TYPE_EVENT:
2022 {
2023 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2024 switch (pReq->u.In.uOp)
2025 {
2026 case SUPSEMOP2_WAIT_MS_REL:
2027 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2028 break;
2029 case SUPSEMOP2_WAIT_NS_ABS:
2030 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2031 break;
2032 case SUPSEMOP2_WAIT_NS_REL:
2033 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2034 break;
2035 case SUPSEMOP2_SIGNAL:
2036 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2037 break;
2038 case SUPSEMOP2_CLOSE:
2039 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2040 break;
2041 case SUPSEMOP2_RESET:
2042 default:
2043 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2044 break;
2045 }
2046 break;
2047 }
2048
2049 case SUP_SEM_TYPE_EVENT_MULTI:
2050 {
2051 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2052 switch (pReq->u.In.uOp)
2053 {
2054 case SUPSEMOP2_WAIT_MS_REL:
2055 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2056 break;
2057 case SUPSEMOP2_WAIT_NS_ABS:
2058 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2059 break;
2060 case SUPSEMOP2_WAIT_NS_REL:
2061 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2062 break;
2063 case SUPSEMOP2_SIGNAL:
2064 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2065 break;
2066 case SUPSEMOP2_CLOSE:
2067 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2068 break;
2069 case SUPSEMOP2_RESET:
2070 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2071 break;
2072 default:
2073 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2074 break;
2075 }
2076 break;
2077 }
2078
2079 default:
2080 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2081 break;
2082 }
2083 return 0;
2084 }
2085
2086 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2087 {
2088 /* validate */
2089 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2090 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2091 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2092
2093 /* execute */
2094 switch (pReq->u.In.uType)
2095 {
2096 case SUP_SEM_TYPE_EVENT:
2097 {
2098 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2099 switch (pReq->u.In.uOp)
2100 {
2101 case SUPSEMOP3_CREATE:
2102 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2103 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2104 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2105 break;
2106 case SUPSEMOP3_GET_RESOLUTION:
2107 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2108 pReq->Hdr.rc = VINF_SUCCESS;
2109 pReq->Hdr.cbOut = sizeof(*pReq);
2110 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2111 break;
2112 default:
2113 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2114 break;
2115 }
2116 break;
2117 }
2118
2119 case SUP_SEM_TYPE_EVENT_MULTI:
2120 {
2121 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2122 switch (pReq->u.In.uOp)
2123 {
2124 case SUPSEMOP3_CREATE:
2125 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2126 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2127 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2128 break;
2129 case SUPSEMOP3_GET_RESOLUTION:
2130 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2131 pReq->Hdr.rc = VINF_SUCCESS;
2132 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2133 break;
2134 default:
2135 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2136 break;
2137 }
2138 break;
2139 }
2140
2141 default:
2142 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2143 break;
2144 }
2145 return 0;
2146 }
2147
2148 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2149 {
2150 /* validate */
2151 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2152 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2153
2154 /* execute */
2155 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2156 if (RT_FAILURE(pReq->Hdr.rc))
2157 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2158 return 0;
2159 }
2160
2161 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2162 {
2163 /* validate */
2164 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2165 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2166
2167 /* execute */
2168 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2169 return 0;
2170 }
2171
2172 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2173 {
2174 /* validate */
2175 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2176
2177 /* execute */
2178 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2179 return 0;
2180 }
2181
2182 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2183 {
2184 /* validate */
2185 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2186 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2187
2188 /* execute */
2189 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2190 return 0;
2191 }
2192
2193 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2194 {
2195 /* validate */
2196 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2197 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2198 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2199 return VERR_INVALID_PARAMETER;
2200
2201 /* execute */
2202 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2203 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2204 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2205 pReq->u.In.szName, pReq->u.In.fFlags);
2206 return 0;
2207 }
2208
2209 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2210 {
2211 /* validate */
2212 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2213 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2214
2215 /* execute */
2216 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2217 return 0;
2218 }
2219
2220 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2221 {
2222 /* validate */
2223 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2224 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2225
2226 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2227 pReqHdr->rc = VINF_SUCCESS;
2228 return 0;
2229 }
2230
2231 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2232 {
2233 /* validate */
2234 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2235 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2236 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2237 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2238
2239 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2240 return 0;
2241 }
2242
2243 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2244 {
2245 /* validate */
2246 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2247
2248 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2249 return 0;
2250 }
2251
2252 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2253 {
2254 /* validate */
2255 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2256 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2257
2258 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pSession, pReq);
2259 return 0;
2260 }
2261
2262 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2263 {
2264 /* validate */
2265 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2266 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2267
2268 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pSession, pReq);
2269 return 0;
2270 }
2271
2272 default:
2273 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2274 break;
2275 }
2276 return VERR_GENERAL_FAILURE;
2277}
2278
2279
2280/**
2281 * I/O Control inner worker for the restricted operations.
2282 *
2283 * @returns IPRT status code.
2284 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2285 *
2286 * @param uIOCtl Function number.
2287 * @param pDevExt Device extention.
2288 * @param pSession Session data.
2289 * @param pReqHdr The request header.
2290 */
2291static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2292{
2293 /*
2294 * The switch.
2295 */
2296 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2297 {
2298 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2299 {
2300 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2301 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2302 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2303 {
2304 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2305 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2306 return 0;
2307 }
2308
2309 /*
2310 * Match the version.
2311 * The current logic is very simple, match the major interface version.
2312 */
2313 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2314 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2315 {
2316 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2317 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2318 pReq->u.Out.u32Cookie = 0xffffffff;
2319 pReq->u.Out.u32SessionCookie = 0xffffffff;
2320 pReq->u.Out.u32SessionVersion = 0xffffffff;
2321 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2322 pReq->u.Out.pSession = NULL;
2323 pReq->u.Out.cFunctions = 0;
2324 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2325 return 0;
2326 }
2327
2328 /*
2329 * Fill in return data and be gone.
2330 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2331 * u32SessionVersion <= u32ReqVersion!
2332 */
2333 /** @todo Somehow validate the client and negotiate a secure cookie... */
2334 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2335 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2336 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2337 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2338 pReq->u.Out.pSession = pSession;
2339 pReq->u.Out.cFunctions = 0;
2340 pReq->Hdr.rc = VINF_SUCCESS;
2341 return 0;
2342 }
2343
2344 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2345 {
2346 /* validate */
2347 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2348 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2349
2350 /* execute */
2351 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2352 if (RT_FAILURE(pReq->Hdr.rc))
2353 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2354 return 0;
2355 }
2356
2357 default:
2358 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2359 break;
2360 }
2361 return VERR_GENERAL_FAILURE;
2362}
2363
2364
2365/**
2366 * I/O Control worker.
2367 *
2368 * @returns IPRT status code.
2369 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2370 *
2371 * @param uIOCtl Function number.
2372 * @param pDevExt Device extention.
2373 * @param pSession Session data.
2374 * @param pReqHdr The request header.
2375 */
2376int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2377{
2378 int rc;
2379 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2380
2381 /*
2382 * Validate the request.
2383 */
2384 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2385 {
2386 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2387 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2388 return VERR_INVALID_PARAMETER;
2389 }
2390 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2391 || pReqHdr->cbIn < sizeof(*pReqHdr)
2392 || pReqHdr->cbIn > cbReq
2393 || pReqHdr->cbOut < sizeof(*pReqHdr)
2394 || pReqHdr->cbOut > cbReq))
2395 {
2396 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2397 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2398 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2399 return VERR_INVALID_PARAMETER;
2400 }
2401 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2402 {
2403 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2404 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2405 return VERR_INVALID_PARAMETER;
2406 }
2407 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2408 {
2409 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2410 {
2411 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2412 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2413 return VERR_INVALID_PARAMETER;
2414 }
2415 }
2416 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2417 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2418 {
2419 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2420 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2421 return VERR_INVALID_PARAMETER;
2422 }
2423
2424 /*
2425 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2426 */
2427 if (pSession->fUnrestricted)
2428 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2429 else
2430 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2431
2432 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2433 return rc;
2434}
2435
2436
2437/**
2438 * Inter-Driver Communication (IDC) worker.
2439 *
2440 * @returns VBox status code.
2441 * @retval VINF_SUCCESS on success.
2442 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2443 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2444 *
2445 * @param uReq The request (function) code.
2446 * @param pDevExt Device extention.
2447 * @param pSession Session data.
2448 * @param pReqHdr The request header.
2449 */
2450int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2451{
2452 /*
2453 * The OS specific code has already validated the pSession
2454 * pointer, and the request size being greater or equal to
2455 * size of the header.
2456 *
2457 * So, just check that pSession is a kernel context session.
2458 */
2459 if (RT_UNLIKELY( pSession
2460 && pSession->R0Process != NIL_RTR0PROCESS))
2461 return VERR_INVALID_PARAMETER;
2462
2463/*
2464 * Validation macro.
2465 */
2466#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2467 do { \
2468 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2469 { \
2470 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2471 (long)pReqHdr->cb, (long)(cbExpect))); \
2472 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2473 } \
2474 } while (0)
2475
2476 switch (uReq)
2477 {
2478 case SUPDRV_IDC_REQ_CONNECT:
2479 {
2480 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2481 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2482
2483 /*
2484 * Validate the cookie and other input.
2485 */
2486 if (pReq->Hdr.pSession != NULL)
2487 {
2488 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2489 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2490 }
2491 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2492 {
2493 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2494 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2495 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2496 }
2497 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2498 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2499 {
2500 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2501 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2502 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2503 }
2504 if (pSession != NULL)
2505 {
2506 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2507 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2508 }
2509
2510 /*
2511 * Match the version.
2512 * The current logic is very simple, match the major interface version.
2513 */
2514 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2515 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2516 {
2517 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2518 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2519 pReq->u.Out.pSession = NULL;
2520 pReq->u.Out.uSessionVersion = 0xffffffff;
2521 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2522 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2523 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2524 return VINF_SUCCESS;
2525 }
2526
2527 pReq->u.Out.pSession = NULL;
2528 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2529 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2530 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2531
2532 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2533 if (RT_FAILURE(pReq->Hdr.rc))
2534 {
2535 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2536 return VINF_SUCCESS;
2537 }
2538
2539 pReq->u.Out.pSession = pSession;
2540 pReq->Hdr.pSession = pSession;
2541
2542 return VINF_SUCCESS;
2543 }
2544
2545 case SUPDRV_IDC_REQ_DISCONNECT:
2546 {
2547 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2548
2549 supdrvSessionRelease(pSession);
2550 return pReqHdr->rc = VINF_SUCCESS;
2551 }
2552
2553 case SUPDRV_IDC_REQ_GET_SYMBOL:
2554 {
2555 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2556 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2557
2558 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2559 return VINF_SUCCESS;
2560 }
2561
2562 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2563 {
2564 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2565 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2566
2567 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2568 return VINF_SUCCESS;
2569 }
2570
2571 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2572 {
2573 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2574 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2575
2576 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2577 return VINF_SUCCESS;
2578 }
2579
2580 default:
2581 Log(("Unknown IDC %#lx\n", (long)uReq));
2582 break;
2583 }
2584
2585#undef REQ_CHECK_IDC_SIZE
2586 return VERR_NOT_SUPPORTED;
2587}
2588
2589
2590/**
2591 * Register a object for reference counting.
2592 * The object is registered with one reference in the specified session.
2593 *
2594 * @returns Unique identifier on success (pointer).
2595 * All future reference must use this identifier.
2596 * @returns NULL on failure.
2597 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2598 * @param pvUser1 The first user argument.
2599 * @param pvUser2 The second user argument.
2600 */
2601SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2602{
2603 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2604 PSUPDRVOBJ pObj;
2605 PSUPDRVUSAGE pUsage;
2606
2607 /*
2608 * Validate the input.
2609 */
2610 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2611 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2612 AssertPtrReturn(pfnDestructor, NULL);
2613
2614 /*
2615 * Allocate and initialize the object.
2616 */
2617 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2618 if (!pObj)
2619 return NULL;
2620 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2621 pObj->enmType = enmType;
2622 pObj->pNext = NULL;
2623 pObj->cUsage = 1;
2624 pObj->pfnDestructor = pfnDestructor;
2625 pObj->pvUser1 = pvUser1;
2626 pObj->pvUser2 = pvUser2;
2627 pObj->CreatorUid = pSession->Uid;
2628 pObj->CreatorGid = pSession->Gid;
2629 pObj->CreatorProcess= pSession->Process;
2630 supdrvOSObjInitCreator(pObj, pSession);
2631
2632 /*
2633 * Allocate the usage record.
2634 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2635 */
2636 RTSpinlockAcquire(pDevExt->Spinlock);
2637
2638 pUsage = pDevExt->pUsageFree;
2639 if (pUsage)
2640 pDevExt->pUsageFree = pUsage->pNext;
2641 else
2642 {
2643 RTSpinlockRelease(pDevExt->Spinlock);
2644 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2645 if (!pUsage)
2646 {
2647 RTMemFree(pObj);
2648 return NULL;
2649 }
2650 RTSpinlockAcquire(pDevExt->Spinlock);
2651 }
2652
2653 /*
2654 * Insert the object and create the session usage record.
2655 */
2656 /* The object. */
2657 pObj->pNext = pDevExt->pObjs;
2658 pDevExt->pObjs = pObj;
2659
2660 /* The session record. */
2661 pUsage->cUsage = 1;
2662 pUsage->pObj = pObj;
2663 pUsage->pNext = pSession->pUsage;
2664 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2665 pSession->pUsage = pUsage;
2666
2667 RTSpinlockRelease(pDevExt->Spinlock);
2668
2669 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2670 return pObj;
2671}
2672
2673
2674/**
2675 * Increment the reference counter for the object associating the reference
2676 * with the specified session.
2677 *
2678 * @returns IPRT status code.
2679 * @param pvObj The identifier returned by SUPR0ObjRegister().
2680 * @param pSession The session which is referencing the object.
2681 *
2682 * @remarks The caller should not own any spinlocks and must carefully protect
2683 * itself against potential race with the destructor so freed memory
2684 * isn't accessed here.
2685 */
2686SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2687{
2688 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2689}
2690
2691
2692/**
2693 * Increment the reference counter for the object associating the reference
2694 * with the specified session.
2695 *
2696 * @returns IPRT status code.
2697 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2698 * couldn't be allocated. (If you see this you're not doing the right
2699 * thing and it won't ever work reliably.)
2700 *
2701 * @param pvObj The identifier returned by SUPR0ObjRegister().
2702 * @param pSession The session which is referencing the object.
2703 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2704 * first reference to an object in a session with this
2705 * argument set.
2706 *
2707 * @remarks The caller should not own any spinlocks and must carefully protect
2708 * itself against potential race with the destructor so freed memory
2709 * isn't accessed here.
2710 */
2711SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2712{
2713 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2714 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2715 int rc = VINF_SUCCESS;
2716 PSUPDRVUSAGE pUsagePre;
2717 PSUPDRVUSAGE pUsage;
2718
2719 /*
2720 * Validate the input.
2721 * Be ready for the destruction race (someone might be stuck in the
2722 * destructor waiting a lock we own).
2723 */
2724 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2725 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2726 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2727 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2728 VERR_INVALID_PARAMETER);
2729
2730 RTSpinlockAcquire(pDevExt->Spinlock);
2731
2732 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2733 {
2734 RTSpinlockRelease(pDevExt->Spinlock);
2735
2736 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2737 return VERR_WRONG_ORDER;
2738 }
2739
2740 /*
2741 * Preallocate the usage record if we can.
2742 */
2743 pUsagePre = pDevExt->pUsageFree;
2744 if (pUsagePre)
2745 pDevExt->pUsageFree = pUsagePre->pNext;
2746 else if (!fNoBlocking)
2747 {
2748 RTSpinlockRelease(pDevExt->Spinlock);
2749 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2750 if (!pUsagePre)
2751 return VERR_NO_MEMORY;
2752
2753 RTSpinlockAcquire(pDevExt->Spinlock);
2754 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2755 {
2756 RTSpinlockRelease(pDevExt->Spinlock);
2757
2758 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2759 return VERR_WRONG_ORDER;
2760 }
2761 }
2762
2763 /*
2764 * Reference the object.
2765 */
2766 pObj->cUsage++;
2767
2768 /*
2769 * Look for the session record.
2770 */
2771 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2772 {
2773 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2774 if (pUsage->pObj == pObj)
2775 break;
2776 }
2777 if (pUsage)
2778 pUsage->cUsage++;
2779 else if (pUsagePre)
2780 {
2781 /* create a new session record. */
2782 pUsagePre->cUsage = 1;
2783 pUsagePre->pObj = pObj;
2784 pUsagePre->pNext = pSession->pUsage;
2785 pSession->pUsage = pUsagePre;
2786 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2787
2788 pUsagePre = NULL;
2789 }
2790 else
2791 {
2792 pObj->cUsage--;
2793 rc = VERR_TRY_AGAIN;
2794 }
2795
2796 /*
2797 * Put any unused usage record into the free list..
2798 */
2799 if (pUsagePre)
2800 {
2801 pUsagePre->pNext = pDevExt->pUsageFree;
2802 pDevExt->pUsageFree = pUsagePre;
2803 }
2804
2805 RTSpinlockRelease(pDevExt->Spinlock);
2806
2807 return rc;
2808}
2809
2810
2811/**
2812 * Decrement / destroy a reference counter record for an object.
2813 *
2814 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2815 *
2816 * @returns IPRT status code.
2817 * @retval VINF_SUCCESS if not destroyed.
2818 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2819 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2820 * string builds.
2821 *
2822 * @param pvObj The identifier returned by SUPR0ObjRegister().
2823 * @param pSession The session which is referencing the object.
2824 */
2825SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2826{
2827 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2828 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2829 int rc = VERR_INVALID_PARAMETER;
2830 PSUPDRVUSAGE pUsage;
2831 PSUPDRVUSAGE pUsagePrev;
2832
2833 /*
2834 * Validate the input.
2835 */
2836 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2837 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2838 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2839 VERR_INVALID_PARAMETER);
2840
2841 /*
2842 * Acquire the spinlock and look for the usage record.
2843 */
2844 RTSpinlockAcquire(pDevExt->Spinlock);
2845
2846 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2847 pUsage;
2848 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2849 {
2850 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2851 if (pUsage->pObj == pObj)
2852 {
2853 rc = VINF_SUCCESS;
2854 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2855 if (pUsage->cUsage > 1)
2856 {
2857 pObj->cUsage--;
2858 pUsage->cUsage--;
2859 }
2860 else
2861 {
2862 /*
2863 * Free the session record.
2864 */
2865 if (pUsagePrev)
2866 pUsagePrev->pNext = pUsage->pNext;
2867 else
2868 pSession->pUsage = pUsage->pNext;
2869 pUsage->pNext = pDevExt->pUsageFree;
2870 pDevExt->pUsageFree = pUsage;
2871
2872 /* What about the object? */
2873 if (pObj->cUsage > 1)
2874 pObj->cUsage--;
2875 else
2876 {
2877 /*
2878 * Object is to be destroyed, unlink it.
2879 */
2880 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2881 rc = VINF_OBJECT_DESTROYED;
2882 if (pDevExt->pObjs == pObj)
2883 pDevExt->pObjs = pObj->pNext;
2884 else
2885 {
2886 PSUPDRVOBJ pObjPrev;
2887 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2888 if (pObjPrev->pNext == pObj)
2889 {
2890 pObjPrev->pNext = pObj->pNext;
2891 break;
2892 }
2893 Assert(pObjPrev);
2894 }
2895 }
2896 }
2897 break;
2898 }
2899 }
2900
2901 RTSpinlockRelease(pDevExt->Spinlock);
2902
2903 /*
2904 * Call the destructor and free the object if required.
2905 */
2906 if (rc == VINF_OBJECT_DESTROYED)
2907 {
2908 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2909 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2910 if (pObj->pfnDestructor)
2911 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2912 RTMemFree(pObj);
2913 }
2914
2915 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2916 return rc;
2917}
2918
2919
2920/**
2921 * Verifies that the current process can access the specified object.
2922 *
2923 * @returns The following IPRT status code:
2924 * @retval VINF_SUCCESS if access was granted.
2925 * @retval VERR_PERMISSION_DENIED if denied access.
2926 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2927 *
2928 * @param pvObj The identifier returned by SUPR0ObjRegister().
2929 * @param pSession The session which wishes to access the object.
2930 * @param pszObjName Object string name. This is optional and depends on the object type.
2931 *
2932 * @remark The caller is responsible for making sure the object isn't removed while
2933 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2934 */
2935SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2936{
2937 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2938 int rc;
2939
2940 /*
2941 * Validate the input.
2942 */
2943 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2944 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2945 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2946 VERR_INVALID_PARAMETER);
2947
2948 /*
2949 * Check access. (returns true if a decision has been made.)
2950 */
2951 rc = VERR_INTERNAL_ERROR;
2952 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2953 return rc;
2954
2955 /*
2956 * Default policy is to allow the user to access his own
2957 * stuff but nothing else.
2958 */
2959 if (pObj->CreatorUid == pSession->Uid)
2960 return VINF_SUCCESS;
2961 return VERR_PERMISSION_DENIED;
2962}
2963
2964
2965/**
2966 * Lock pages.
2967 *
2968 * @returns IPRT status code.
2969 * @param pSession Session to which the locked memory should be associated.
2970 * @param pvR3 Start of the memory range to lock.
2971 * This must be page aligned.
2972 * @param cPages Number of pages to lock.
2973 * @param paPages Where to put the physical addresses of locked memory.
2974 */
2975SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2976{
2977 int rc;
2978 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2979 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2980 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2981
2982 /*
2983 * Verify input.
2984 */
2985 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2986 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2987 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2988 || !pvR3)
2989 {
2990 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
2991 return VERR_INVALID_PARAMETER;
2992 }
2993
2994 /*
2995 * Let IPRT do the job.
2996 */
2997 Mem.eType = MEMREF_TYPE_LOCKED;
2998 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
2999 if (RT_SUCCESS(rc))
3000 {
3001 uint32_t iPage = cPages;
3002 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
3003 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
3004
3005 while (iPage-- > 0)
3006 {
3007 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3008 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
3009 {
3010 AssertMsgFailed(("iPage=%d\n", iPage));
3011 rc = VERR_INTERNAL_ERROR;
3012 break;
3013 }
3014 }
3015 if (RT_SUCCESS(rc))
3016 rc = supdrvMemAdd(&Mem, pSession);
3017 if (RT_FAILURE(rc))
3018 {
3019 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3020 AssertRC(rc2);
3021 }
3022 }
3023
3024 return rc;
3025}
3026
3027
3028/**
3029 * Unlocks the memory pointed to by pv.
3030 *
3031 * @returns IPRT status code.
3032 * @param pSession Session to which the memory was locked.
3033 * @param pvR3 Memory to unlock.
3034 */
3035SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3036{
3037 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3038 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3039 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3040}
3041
3042
3043/**
3044 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3045 * backing.
3046 *
3047 * @returns IPRT status code.
3048 * @param pSession Session data.
3049 * @param cPages Number of pages to allocate.
3050 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3051 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3052 * @param pHCPhys Where to put the physical address of allocated memory.
3053 */
3054SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3055{
3056 int rc;
3057 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3058 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3059
3060 /*
3061 * Validate input.
3062 */
3063 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3064 if (!ppvR3 || !ppvR0 || !pHCPhys)
3065 {
3066 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3067 pSession, ppvR0, ppvR3, pHCPhys));
3068 return VERR_INVALID_PARAMETER;
3069
3070 }
3071 if (cPages < 1 || cPages >= 256)
3072 {
3073 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3074 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3075 }
3076
3077 /*
3078 * Let IPRT do the job.
3079 */
3080 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3081 if (RT_SUCCESS(rc))
3082 {
3083 int rc2;
3084 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3085 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3086 if (RT_SUCCESS(rc))
3087 {
3088 Mem.eType = MEMREF_TYPE_CONT;
3089 rc = supdrvMemAdd(&Mem, pSession);
3090 if (!rc)
3091 {
3092 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3093 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3094 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3095 return 0;
3096 }
3097
3098 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3099 AssertRC(rc2);
3100 }
3101 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3102 AssertRC(rc2);
3103 }
3104
3105 return rc;
3106}
3107
3108
3109/**
3110 * Frees memory allocated using SUPR0ContAlloc().
3111 *
3112 * @returns IPRT status code.
3113 * @param pSession The session to which the memory was allocated.
3114 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3115 */
3116SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3117{
3118 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3119 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3120 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3121}
3122
3123
3124/**
3125 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3126 *
3127 * The memory isn't zeroed.
3128 *
3129 * @returns IPRT status code.
3130 * @param pSession Session data.
3131 * @param cPages Number of pages to allocate.
3132 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3133 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3134 * @param paPages Where to put the physical addresses of allocated memory.
3135 */
3136SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3137{
3138 unsigned iPage;
3139 int rc;
3140 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3141 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3142
3143 /*
3144 * Validate input.
3145 */
3146 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3147 if (!ppvR3 || !ppvR0 || !paPages)
3148 {
3149 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3150 pSession, ppvR3, ppvR0, paPages));
3151 return VERR_INVALID_PARAMETER;
3152
3153 }
3154 if (cPages < 1 || cPages >= 256)
3155 {
3156 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3157 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3158 }
3159
3160 /*
3161 * Let IPRT do the work.
3162 */
3163 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3164 if (RT_SUCCESS(rc))
3165 {
3166 int rc2;
3167 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3168 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3169 if (RT_SUCCESS(rc))
3170 {
3171 Mem.eType = MEMREF_TYPE_LOW;
3172 rc = supdrvMemAdd(&Mem, pSession);
3173 if (!rc)
3174 {
3175 for (iPage = 0; iPage < cPages; iPage++)
3176 {
3177 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3178 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3179 }
3180 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3181 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3182 return 0;
3183 }
3184
3185 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3186 AssertRC(rc2);
3187 }
3188
3189 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3190 AssertRC(rc2);
3191 }
3192
3193 return rc;
3194}
3195
3196
3197/**
3198 * Frees memory allocated using SUPR0LowAlloc().
3199 *
3200 * @returns IPRT status code.
3201 * @param pSession The session to which the memory was allocated.
3202 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3203 */
3204SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3205{
3206 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3207 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3208 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3209}
3210
3211
3212
3213/**
3214 * Allocates a chunk of memory with both R0 and R3 mappings.
3215 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3216 *
3217 * @returns IPRT status code.
3218 * @param pSession The session to associated the allocation with.
3219 * @param cb Number of bytes to allocate.
3220 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3221 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3222 */
3223SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3224{
3225 int rc;
3226 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3227 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3228
3229 /*
3230 * Validate input.
3231 */
3232 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3233 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3234 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3235 if (cb < 1 || cb >= _4M)
3236 {
3237 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3238 return VERR_INVALID_PARAMETER;
3239 }
3240
3241 /*
3242 * Let IPRT do the work.
3243 */
3244 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3245 if (RT_SUCCESS(rc))
3246 {
3247 int rc2;
3248 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3249 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3250 if (RT_SUCCESS(rc))
3251 {
3252 Mem.eType = MEMREF_TYPE_MEM;
3253 rc = supdrvMemAdd(&Mem, pSession);
3254 if (!rc)
3255 {
3256 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3257 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3258 return VINF_SUCCESS;
3259 }
3260
3261 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3262 AssertRC(rc2);
3263 }
3264
3265 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3266 AssertRC(rc2);
3267 }
3268
3269 return rc;
3270}
3271
3272
3273/**
3274 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3275 *
3276 * @returns IPRT status code.
3277 * @param pSession The session to which the memory was allocated.
3278 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3279 * @param paPages Where to store the physical addresses.
3280 */
3281SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3282{
3283 PSUPDRVBUNDLE pBundle;
3284 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3285
3286 /*
3287 * Validate input.
3288 */
3289 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3290 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3291 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3292
3293 /*
3294 * Search for the address.
3295 */
3296 RTSpinlockAcquire(pSession->Spinlock);
3297 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3298 {
3299 if (pBundle->cUsed > 0)
3300 {
3301 unsigned i;
3302 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3303 {
3304 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3305 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3306 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3307 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3308 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3309 )
3310 )
3311 {
3312 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3313 size_t iPage;
3314 for (iPage = 0; iPage < cPages; iPage++)
3315 {
3316 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3317 paPages[iPage].uReserved = 0;
3318 }
3319 RTSpinlockRelease(pSession->Spinlock);
3320 return VINF_SUCCESS;
3321 }
3322 }
3323 }
3324 }
3325 RTSpinlockRelease(pSession->Spinlock);
3326 Log(("Failed to find %p!!!\n", (void *)uPtr));
3327 return VERR_INVALID_PARAMETER;
3328}
3329
3330
3331/**
3332 * Free memory allocated by SUPR0MemAlloc().
3333 *
3334 * @returns IPRT status code.
3335 * @param pSession The session owning the allocation.
3336 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3337 */
3338SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3339{
3340 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3341 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3342 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3343}
3344
3345
3346/**
3347 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3348 *
3349 * The memory is fixed and it's possible to query the physical addresses using
3350 * SUPR0MemGetPhys().
3351 *
3352 * @returns IPRT status code.
3353 * @param pSession The session to associated the allocation with.
3354 * @param cPages The number of pages to allocate.
3355 * @param fFlags Flags, reserved for the future. Must be zero.
3356 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3357 * NULL if no ring-3 mapping.
3358 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3359 * NULL if no ring-0 mapping.
3360 * @param paPages Where to store the addresses of the pages. Optional.
3361 */
3362SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3363{
3364 int rc;
3365 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3366 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3367
3368 /*
3369 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3370 */
3371 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3372 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3373 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3374 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3375 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3376 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3377 {
3378 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3379 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3380 }
3381
3382 /*
3383 * Let IPRT do the work.
3384 */
3385 if (ppvR0)
3386 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3387 else
3388 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3389 if (RT_SUCCESS(rc))
3390 {
3391 int rc2;
3392 if (ppvR3)
3393 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3394 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3395 else
3396 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3397 if (RT_SUCCESS(rc))
3398 {
3399 Mem.eType = MEMREF_TYPE_PAGE;
3400 rc = supdrvMemAdd(&Mem, pSession);
3401 if (!rc)
3402 {
3403 if (ppvR3)
3404 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3405 if (ppvR0)
3406 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3407 if (paPages)
3408 {
3409 uint32_t iPage = cPages;
3410 while (iPage-- > 0)
3411 {
3412 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3413 Assert(paPages[iPage] != NIL_RTHCPHYS);
3414 }
3415 }
3416 return VINF_SUCCESS;
3417 }
3418
3419 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3420 AssertRC(rc2);
3421 }
3422
3423 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3424 AssertRC(rc2);
3425 }
3426 return rc;
3427}
3428
3429
3430/**
3431 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3432 * space.
3433 *
3434 * @returns IPRT status code.
3435 * @param pSession The session to associated the allocation with.
3436 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3437 * @param offSub Where to start mapping. Must be page aligned.
3438 * @param cbSub How much to map. Must be page aligned.
3439 * @param fFlags Flags, MBZ.
3440 * @param ppvR0 Where to return the address of the ring-0 mapping on
3441 * success.
3442 */
3443SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3444 uint32_t fFlags, PRTR0PTR ppvR0)
3445{
3446 int rc;
3447 PSUPDRVBUNDLE pBundle;
3448 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3449 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3450
3451 /*
3452 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3453 */
3454 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3455 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3456 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3457 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3458 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3459 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3460
3461 /*
3462 * Find the memory object.
3463 */
3464 RTSpinlockAcquire(pSession->Spinlock);
3465 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3466 {
3467 if (pBundle->cUsed > 0)
3468 {
3469 unsigned i;
3470 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3471 {
3472 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3473 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3474 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3475 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3476 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3477 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3478 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3479 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3480 {
3481 hMemObj = pBundle->aMem[i].MemObj;
3482 break;
3483 }
3484 }
3485 }
3486 }
3487 RTSpinlockRelease(pSession->Spinlock);
3488
3489 rc = VERR_INVALID_PARAMETER;
3490 if (hMemObj != NIL_RTR0MEMOBJ)
3491 {
3492 /*
3493 * Do some further input validations before calling IPRT.
3494 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3495 */
3496 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3497 if ( offSub < cbMemObj
3498 && cbSub <= cbMemObj
3499 && offSub + cbSub <= cbMemObj)
3500 {
3501 RTR0MEMOBJ hMapObj;
3502 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3503 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3504 if (RT_SUCCESS(rc))
3505 *ppvR0 = RTR0MemObjAddress(hMapObj);
3506 }
3507 else
3508 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3509
3510 }
3511 return rc;
3512}
3513
3514
3515/**
3516 * Changes the page level protection of one or more pages previously allocated
3517 * by SUPR0PageAllocEx.
3518 *
3519 * @returns IPRT status code.
3520 * @param pSession The session to associated the allocation with.
3521 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3522 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3523 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3524 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3525 * @param offSub Where to start changing. Must be page aligned.
3526 * @param cbSub How much to change. Must be page aligned.
3527 * @param fProt The new page level protection, see RTMEM_PROT_*.
3528 */
3529SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3530{
3531 int rc;
3532 PSUPDRVBUNDLE pBundle;
3533 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3534 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3535 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3536
3537 /*
3538 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3539 */
3540 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3541 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3542 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3543 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3544 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3545
3546 /*
3547 * Find the memory object.
3548 */
3549 RTSpinlockAcquire(pSession->Spinlock);
3550 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3551 {
3552 if (pBundle->cUsed > 0)
3553 {
3554 unsigned i;
3555 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3556 {
3557 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3558 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3559 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3560 || pvR3 == NIL_RTR3PTR)
3561 && ( pvR0 == NIL_RTR0PTR
3562 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3563 && ( pvR3 == NIL_RTR3PTR
3564 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3565 {
3566 if (pvR0 != NIL_RTR0PTR)
3567 hMemObjR0 = pBundle->aMem[i].MemObj;
3568 if (pvR3 != NIL_RTR3PTR)
3569 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3570 break;
3571 }
3572 }
3573 }
3574 }
3575 RTSpinlockRelease(pSession->Spinlock);
3576
3577 rc = VERR_INVALID_PARAMETER;
3578 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3579 || hMemObjR3 != NIL_RTR0MEMOBJ)
3580 {
3581 /*
3582 * Do some further input validations before calling IPRT.
3583 */
3584 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3585 if ( offSub < cbMemObj
3586 && cbSub <= cbMemObj
3587 && offSub + cbSub <= cbMemObj)
3588 {
3589 rc = VINF_SUCCESS;
3590 if (hMemObjR3 != NIL_RTR0PTR)
3591 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3592 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3593 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3594 }
3595 else
3596 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3597
3598 }
3599 return rc;
3600
3601}
3602
3603
3604/**
3605 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3606 *
3607 * @returns IPRT status code.
3608 * @param pSession The session owning the allocation.
3609 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3610 * SUPR0PageAllocEx().
3611 */
3612SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3613{
3614 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3615 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3616 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3617}
3618
3619
3620/**
3621 * Gets the paging mode of the current CPU.
3622 *
3623 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3624 */
3625SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3626{
3627 SUPPAGINGMODE enmMode;
3628
3629 RTR0UINTREG cr0 = ASMGetCR0();
3630 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3631 enmMode = SUPPAGINGMODE_INVALID;
3632 else
3633 {
3634 RTR0UINTREG cr4 = ASMGetCR4();
3635 uint32_t fNXEPlusLMA = 0;
3636 if (cr4 & X86_CR4_PAE)
3637 {
3638 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3639 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3640 {
3641 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3642 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3643 fNXEPlusLMA |= RT_BIT(0);
3644 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3645 fNXEPlusLMA |= RT_BIT(1);
3646 }
3647 }
3648
3649 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3650 {
3651 case 0:
3652 enmMode = SUPPAGINGMODE_32_BIT;
3653 break;
3654
3655 case X86_CR4_PGE:
3656 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3657 break;
3658
3659 case X86_CR4_PAE:
3660 enmMode = SUPPAGINGMODE_PAE;
3661 break;
3662
3663 case X86_CR4_PAE | RT_BIT(0):
3664 enmMode = SUPPAGINGMODE_PAE_NX;
3665 break;
3666
3667 case X86_CR4_PAE | X86_CR4_PGE:
3668 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3669 break;
3670
3671 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3672 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3673 break;
3674
3675 case RT_BIT(1) | X86_CR4_PAE:
3676 enmMode = SUPPAGINGMODE_AMD64;
3677 break;
3678
3679 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3680 enmMode = SUPPAGINGMODE_AMD64_NX;
3681 break;
3682
3683 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3684 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3685 break;
3686
3687 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3688 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3689 break;
3690
3691 default:
3692 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3693 enmMode = SUPPAGINGMODE_INVALID;
3694 break;
3695 }
3696 }
3697 return enmMode;
3698}
3699
3700
3701/**
3702 * Enables or disabled hardware virtualization extensions using native OS APIs.
3703 *
3704 * @returns VBox status code.
3705 * @retval VINF_SUCCESS on success.
3706 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3707 *
3708 * @param fEnable Whether to enable or disable.
3709 */
3710SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3711{
3712#ifdef RT_OS_DARWIN
3713 return supdrvOSEnableVTx(fEnable);
3714#else
3715 return VERR_NOT_SUPPORTED;
3716#endif
3717}
3718
3719
3720/**
3721 * Suspends hardware virtualization extensions using the native OS API.
3722 *
3723 * This is called prior to entering raw-mode context.
3724 *
3725 * @returns @c true if suspended, @c false if not.
3726 */
3727SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3728{
3729#ifdef RT_OS_DARWIN
3730 return supdrvOSSuspendVTxOnCpu();
3731#else
3732 return false;
3733#endif
3734}
3735
3736
3737/**
3738 * Resumes hardware virtualization extensions using the native OS API.
3739 *
3740 * This is called after to entering raw-mode context.
3741 *
3742 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3743 */
3744SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3745{
3746#ifdef RT_OS_DARWIN
3747 supdrvOSResumeVTxOnCpu(fSuspended);
3748#else
3749 Assert(!fSuspended);
3750#endif
3751}
3752
3753
3754/**
3755 * Checks if Intel VT-x feature is usable on this CPU.
3756 *
3757 * @returns VBox status code.
3758 * @param fIsSmxModeAmbiguous Where to write whether the SMX mode causes
3759 * ambiguity that makes us unsure whether we
3760 * really can use VT-x or not.
3761 *
3762 * @remarks Must be called with preemption disabled.
3763 */
3764SUPR0DECL(int) SUPR0GetVmxUsability(bool *pfIsSmxModeAmbiguous)
3765{
3766 uint64_t u64FeatMsr;
3767 bool fMaybeSmxMode;
3768 bool fMsrLocked;
3769 bool fSmxVmxAllowed;
3770 bool fVmxAllowed;
3771 bool fIsSmxModeAmbiguous;
3772 int rc;
3773
3774 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3775
3776 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3777 fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3778 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3779 fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3780 fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3781 fIsSmxModeAmbiguous = false;
3782 rc = VERR_INTERNAL_ERROR_5;
3783
3784 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3785 if (fMsrLocked)
3786 {
3787 if (fVmxAllowed && fSmxVmxAllowed)
3788 rc = VINF_SUCCESS;
3789 else if (!fVmxAllowed && !fSmxVmxAllowed)
3790 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3791 else if (!fMaybeSmxMode)
3792 {
3793 if (fVmxAllowed)
3794 rc = VINF_SUCCESS;
3795 else
3796 rc = VERR_VMX_MSR_VMXON_DISABLED;
3797 }
3798 else
3799 {
3800 /*
3801 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3802 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3803 * See @bugref{6873}.
3804 */
3805 Assert(fMaybeSmxMode == true);
3806 fIsSmxModeAmbiguous = true;
3807 rc = VINF_SUCCESS;
3808 }
3809 }
3810 else
3811 {
3812 /*
3813 * MSR is not yet locked; we can change it ourselves here.
3814 * Once the lock bit is set, this MSR can no longer be modified.
3815 *
3816 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3817 * accurately. See @bugref{6873}.
3818 */
3819 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3820 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3821 | MSR_IA32_FEATURE_CONTROL_VMXON;
3822 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3823
3824 /* Verify. */
3825 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3826 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3827 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3828 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3829 if (fSmxVmxAllowed && fVmxAllowed)
3830 rc = VINF_SUCCESS;
3831 else
3832 rc = VERR_VMX_MSR_LOCKING_FAILED;
3833 }
3834
3835 if (pfIsSmxModeAmbiguous)
3836 *pfIsSmxModeAmbiguous = fIsSmxModeAmbiguous;
3837
3838 return rc;
3839}
3840
3841
3842/**
3843 * Checks if AMD-V SVM feature is usable on this CPU.
3844 *
3845 * @returns VBox status code.
3846 * @param fInitSvm If usable, try to initialize SVM on this CPU.
3847 *
3848 * @remarks Must be called with preemption disabled.
3849 */
3850SUPR0DECL(int) SUPR0GetSvmUsability(bool fInitSvm)
3851{
3852 int rc;
3853 uint64_t fVmCr;
3854 uint64_t fEfer;
3855
3856 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3857 fVmCr = ASMRdMsr(MSR_K8_VM_CR);
3858 if (!(fVmCr & MSR_K8_VM_CR_SVM_DISABLE))
3859 {
3860 rc = VINF_SUCCESS;
3861 if (fInitSvm)
3862 {
3863 /* Turn on SVM in the EFER MSR. */
3864 fEfer = ASMRdMsr(MSR_K6_EFER);
3865 if (fEfer & MSR_K6_EFER_SVME)
3866 rc = VERR_SVM_IN_USE;
3867 else
3868 {
3869 ASMWrMsr(MSR_K6_EFER, fEfer | MSR_K6_EFER_SVME);
3870
3871 /* Paranoia. */
3872 fEfer = ASMRdMsr(MSR_K6_EFER);
3873 if (fEfer & MSR_K6_EFER_SVME)
3874 {
3875 /* Restore previous value. */
3876 ASMWrMsr(MSR_K6_EFER, fEfer & ~MSR_K6_EFER_SVME);
3877 }
3878 else
3879 rc = VERR_SVM_ILLEGAL_EFER_MSR;
3880 }
3881 }
3882 }
3883 else
3884 rc = VERR_SVM_DISABLED;
3885 return rc;
3886}
3887
3888
3889/**
3890 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3891 *
3892 * @returns VBox status code.
3893 * @retval VERR_VMX_NO_VMX
3894 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3895 * @retval VERR_VMX_MSR_VMXON_DISABLED
3896 * @retval VERR_VMX_MSR_LOCKING_FAILED
3897 * @retval VERR_SVM_NO_SVM
3898 * @retval VERR_SVM_DISABLED
3899 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3900 * (centaur) CPU.
3901 *
3902 * @param pSession The session handle.
3903 * @param pfCaps Where to store the capabilities.
3904 */
3905SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3906{
3907 int rc = VERR_UNSUPPORTED_CPU;
3908 bool fIsSmxModeAmbiguous = false;
3909 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3910
3911 /*
3912 * Input validation.
3913 */
3914 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3915 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3916
3917 *pfCaps = 0;
3918 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3919 RTThreadPreemptDisable(&PreemptState);
3920 if (ASMHasCpuId())
3921 {
3922 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3923 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3924
3925 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3926 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3927
3928 if ( ASMIsValidStdRange(uMaxId)
3929 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3930 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3931 )
3932 {
3933 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3934 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3935 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3936 )
3937 {
3938 rc = SUPR0GetVmxUsability(&fIsSmxModeAmbiguous);
3939 if (rc == VINF_SUCCESS)
3940 {
3941 VMXCAPABILITY vtCaps;
3942
3943 *pfCaps |= SUPVTCAPS_VT_X;
3944
3945 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3946 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3947 {
3948 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3949 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3950 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3951 }
3952 }
3953 }
3954 else
3955 rc = VERR_VMX_NO_VMX;
3956 }
3957 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3958 && ASMIsValidStdRange(uMaxId))
3959 {
3960 uint32_t fExtFeaturesEcx, uExtMaxId;
3961 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3962 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3963
3964 /* Check if SVM is available. */
3965 if ( ASMIsValidExtRange(uExtMaxId)
3966 && uExtMaxId >= 0x8000000a
3967 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3968 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3969 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3970 )
3971 {
3972 rc = SUPR0GetSvmUsability(false /* fInitSvm */);
3973 if (RT_SUCCESS(rc))
3974 {
3975 uint32_t fSvmFeatures;
3976 *pfCaps |= SUPVTCAPS_AMD_V;
3977
3978 /* Query AMD-V features. */
3979 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3980 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3981 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3982 }
3983 }
3984 else
3985 rc = VERR_SVM_NO_SVM;
3986 }
3987 }
3988
3989 RTThreadPreemptRestore(&PreemptState);
3990 if (fIsSmxModeAmbiguous)
3991 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3992 return rc;
3993}
3994
3995
3996/**
3997 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3998 * updating.
3999 *
4000 * @param pGip Pointer to the GIP.
4001 * @param pGipCpu The per CPU structure for this CPU.
4002 * @param u64NanoTS The current time.
4003 */
4004static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
4005{
4006 /*
4007 * Here we don't really care about applying the TSC delta. The re-initialization of this
4008 * value is not relevant especially while (re)starting the GIP as the first few ones will
4009 * be ignored anyway, see supdrvGipDoUpdateCpu().
4010 */
4011 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
4012 pGipCpu->u64NanoTS = u64NanoTS;
4013}
4014
4015
4016/**
4017 * Set the current TSC and NanoTS value for the CPU.
4018 *
4019 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
4020 * @param pvUser1 Pointer to the ring-0 GIP mapping.
4021 * @param pvUser2 Pointer to the variable holding the current time.
4022 */
4023static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4024{
4025 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
4026 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
4027
4028 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
4029 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
4030
4031 NOREF(pvUser2);
4032 NOREF(idCpu);
4033}
4034
4035
4036/**
4037 * State structure for supdrvGipDetectGetGipCpuCallback.
4038 */
4039typedef struct SUPDRVGIPDETECTGETCPU
4040{
4041 /** Bitmap of APIC IDs that has been seen (initialized to zero).
4042 * Used to detect duplicate APIC IDs (paranoia). */
4043 uint8_t volatile bmApicId[256 / 8];
4044 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
4045 * initially). The callback clears the methods not detected. */
4046 uint32_t volatile fSupported;
4047 /** The first callback detecting any kind of range issues (initialized to
4048 * NIL_RTCPUID). */
4049 RTCPUID volatile idCpuProblem;
4050} SUPDRVGIPDETECTGETCPU;
4051/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
4052typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
4053
4054
4055/**
4056 * Checks for alternative ways of getting the CPU ID.
4057 *
4058 * This also checks the APIC ID, CPU ID and CPU set index values against the
4059 * GIP tables.
4060 *
4061 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
4062 * @param pvUser1 Pointer to the state structure.
4063 * @param pvUser2 Pointer to the GIP.
4064 */
4065static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4066{
4067 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
4068 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
4069 uint32_t fSupported = 0;
4070 uint16_t idApic;
4071 int iCpuSet;
4072
4073 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
4074
4075 /*
4076 * Check that the CPU ID and CPU set index are interchangable.
4077 */
4078 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
4079 if ((RTCPUID)iCpuSet == idCpu)
4080 {
4081 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
4082 if ( iCpuSet >= 0
4083 && iCpuSet < RTCPUSET_MAX_CPUS
4084 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
4085 {
4086 /*
4087 * Check whether the IDTR.LIMIT contains a CPU number.
4088 */
4089#ifdef RT_ARCH_X86
4090 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
4091#else
4092 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
4093#endif
4094 RTIDTR Idtr;
4095 ASMGetIDTR(&Idtr);
4096 if (Idtr.cbIdt >= cbIdt)
4097 {
4098 uint32_t uTmp = Idtr.cbIdt - cbIdt;
4099 uTmp &= RTCPUSET_MAX_CPUS - 1;
4100 if (uTmp == idCpu)
4101 {
4102 RTIDTR Idtr2;
4103 ASMGetIDTR(&Idtr2);
4104 if (Idtr2.cbIdt == Idtr.cbIdt)
4105 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
4106 }
4107 }
4108
4109 /*
4110 * Check whether RDTSCP is an option.
4111 */
4112 if (ASMHasCpuId())
4113 {
4114 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
4115 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
4116 {
4117 uint32_t uAux;
4118 ASMReadTscWithAux(&uAux);
4119 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
4120 {
4121 ASMNopPause();
4122 ASMReadTscWithAux(&uAux);
4123 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
4124 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
4125 }
4126 }
4127 }
4128 }
4129 }
4130
4131 /*
4132 * Check that the APIC ID is unique.
4133 */
4134 idApic = ASMGetApicId();
4135 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
4136 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
4137 fSupported |= SUPGIPGETCPU_APIC_ID;
4138 else
4139 {
4140 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
4141 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4142 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
4143 idCpu, iCpuSet, idApic));
4144 }
4145
4146 /*
4147 * Check that the iCpuSet is within the expected range.
4148 */
4149 if (RT_UNLIKELY( iCpuSet < 0
4150 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
4151 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
4152 {
4153 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4154 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
4155 idCpu, iCpuSet, idApic));
4156 }
4157 else
4158 {
4159 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
4160 if (RT_UNLIKELY(idCpu2 != idCpu))
4161 {
4162 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
4163 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
4164 idCpu, iCpuSet, idApic, idCpu2));
4165 }
4166 }
4167
4168 /*
4169 * Update the supported feature mask before we return.
4170 */
4171 ASMAtomicAndU32(&pState->fSupported, fSupported);
4172
4173 NOREF(pvUser2);
4174}
4175
4176
4177/**
4178 * Increase the timer freqency on hosts where this is possible (NT).
4179 *
4180 * The idea is that more interrupts is better for us... Also, it's better than
4181 * we increase the timer frequence, because we might end up getting inaccurate
4182 * callbacks if someone else does it.
4183 *
4184 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
4185 */
4186static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
4187{
4188 if (pDevExt->u32SystemTimerGranularityGrant == 0)
4189 {
4190 uint32_t u32SystemResolution;
4191 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4192 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4193 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4194 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4195 )
4196 {
4197 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4198 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4199 }
4200 }
4201}
4202
4203
4204/**
4205 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
4206 *
4207 * @param pDevExt Clears u32SystemTimerGranularityGrant.
4208 */
4209static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
4210{
4211 if (pDevExt->u32SystemTimerGranularityGrant)
4212 {
4213 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4214 AssertRC(rc2);
4215 pDevExt->u32SystemTimerGranularityGrant = 0;
4216 }
4217}
4218
4219
4220/**
4221 * Maps the GIP into userspace and/or get the physical address of the GIP.
4222 *
4223 * @returns IPRT status code.
4224 * @param pSession Session to which the GIP mapping should belong.
4225 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
4226 * @param pHCPhysGip Where to store the physical address. (optional)
4227 *
4228 * @remark There is no reference counting on the mapping, so one call to this function
4229 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
4230 * and remove the session as a GIP user.
4231 */
4232SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
4233{
4234 int rc;
4235 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4236 RTR3PTR pGipR3 = NIL_RTR3PTR;
4237 RTHCPHYS HCPhys = NIL_RTHCPHYS;
4238 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
4239
4240 /*
4241 * Validate
4242 */
4243 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4244 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
4245 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
4246
4247#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4248 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4249#else
4250 RTSemFastMutexRequest(pDevExt->mtxGip);
4251#endif
4252 if (pDevExt->pGip)
4253 {
4254 /*
4255 * Map it?
4256 */
4257 rc = VINF_SUCCESS;
4258 if (ppGipR3)
4259 {
4260 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4261 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4262 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4263 if (RT_SUCCESS(rc))
4264 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4265 }
4266
4267 /*
4268 * Get physical address.
4269 */
4270 if (pHCPhysGip && RT_SUCCESS(rc))
4271 HCPhys = pDevExt->HCPhysGip;
4272
4273 /*
4274 * Reference globally.
4275 */
4276 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4277 {
4278 pSession->fGipReferenced = 1;
4279 pDevExt->cGipUsers++;
4280 if (pDevExt->cGipUsers == 1)
4281 {
4282 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4283 uint64_t u64NanoTS;
4284
4285 /*
4286 * GIP starts/resumes updating again. On windows we bump the
4287 * host timer frequency to make sure we don't get stuck in guest
4288 * mode and to get better timer (and possibly clock) accuracy.
4289 */
4290 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4291
4292 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
4293
4294 /*
4295 * document me
4296 */
4297 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4298 {
4299 unsigned i;
4300 for (i = 0; i < pGipR0->cCpus; i++)
4301 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4302 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4303 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4304 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4305 }
4306
4307 /*
4308 * document me
4309 */
4310 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4311 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4312 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4313 || RTMpGetOnlineCount() == 1)
4314 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4315 else
4316 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4317
4318 /*
4319 * Detect alternative ways to figure the CPU ID in ring-3 and
4320 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
4321 * and CPU set indexes while we're at it.
4322 */
4323 if (RT_SUCCESS(rc))
4324 {
4325 SUPDRVGIPDETECTGETCPU DetectState;
4326 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
4327 DetectState.fSupported = UINT32_MAX;
4328 DetectState.idCpuProblem = NIL_RTCPUID;
4329 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
4330 if (DetectState.idCpuProblem == NIL_RTCPUID)
4331 {
4332 if ( DetectState.fSupported != UINT32_MAX
4333 && DetectState.fSupported != 0)
4334 {
4335 if (pGipR0->fGetGipCpu != DetectState.fSupported)
4336 {
4337 pGipR0->fGetGipCpu = DetectState.fSupported;
4338 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
4339 }
4340 }
4341 else
4342 {
4343 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
4344 DetectState.fSupported));
4345 rc = VERR_UNSUPPORTED_CPU;
4346 }
4347 }
4348 else
4349 {
4350 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
4351 DetectState.idCpuProblem, DetectState.idCpuProblem));
4352 rc = VERR_INVALID_CPU_ID;
4353 }
4354 }
4355
4356 /*
4357 * Start the GIP timer if all is well..
4358 */
4359 if (RT_SUCCESS(rc))
4360 {
4361#ifndef DO_NOT_START_GIP
4362 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4363#endif
4364 rc = VINF_SUCCESS;
4365 }
4366
4367 /*
4368 * Bail out on error.
4369 */
4370 if (RT_FAILURE(rc))
4371 {
4372 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
4373 pDevExt->cGipUsers = 0;
4374 pSession->fGipReferenced = 0;
4375 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4376 {
4377 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
4378 if (RT_SUCCESS(rc2))
4379 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4380 }
4381 HCPhys = NIL_RTHCPHYS;
4382 pGipR3 = NIL_RTR3PTR;
4383 }
4384 }
4385 }
4386 }
4387 else
4388 {
4389 rc = VERR_GENERAL_FAILURE;
4390 Log(("SUPR0GipMap: GIP is not available!\n"));
4391 }
4392#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4393 RTSemMutexRelease(pDevExt->mtxGip);
4394#else
4395 RTSemFastMutexRelease(pDevExt->mtxGip);
4396#endif
4397
4398 /*
4399 * Write returns.
4400 */
4401 if (pHCPhysGip)
4402 *pHCPhysGip = HCPhys;
4403 if (ppGipR3)
4404 *ppGipR3 = pGipR3;
4405
4406#ifdef DEBUG_DARWIN_GIP
4407 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4408#else
4409 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4410#endif
4411 return rc;
4412}
4413
4414
4415/**
4416 * Unmaps any user mapping of the GIP and terminates all GIP access
4417 * from this session.
4418 *
4419 * @returns IPRT status code.
4420 * @param pSession Session to which the GIP mapping should belong.
4421 */
4422SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4423{
4424 int rc = VINF_SUCCESS;
4425 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4426#ifdef DEBUG_DARWIN_GIP
4427 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4428 pSession,
4429 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4430 pSession->GipMapObjR3));
4431#else
4432 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4433#endif
4434 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4435
4436#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4437 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4438#else
4439 RTSemFastMutexRequest(pDevExt->mtxGip);
4440#endif
4441
4442 /*
4443 * Unmap anything?
4444 */
4445 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4446 {
4447 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4448 AssertRC(rc);
4449 if (RT_SUCCESS(rc))
4450 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4451 }
4452
4453 /*
4454 * Dereference global GIP.
4455 */
4456 if (pSession->fGipReferenced && !rc)
4457 {
4458 pSession->fGipReferenced = 0;
4459 if ( pDevExt->cGipUsers > 0
4460 && !--pDevExt->cGipUsers)
4461 {
4462 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4463#ifndef DO_NOT_START_GIP
4464 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4465#endif
4466 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
4467 }
4468 }
4469
4470#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4471 RTSemMutexRelease(pDevExt->mtxGip);
4472#else
4473 RTSemFastMutexRelease(pDevExt->mtxGip);
4474#endif
4475
4476 return rc;
4477}
4478
4479
4480/**
4481 * Gets the GIP pointer.
4482 *
4483 * @returns Pointer to the GIP or NULL.
4484 */
4485SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4486{
4487 return g_pSUPGlobalInfoPage;
4488}
4489
4490
4491/**
4492 * Register a component factory with the support driver.
4493 *
4494 * This is currently restricted to kernel sessions only.
4495 *
4496 * @returns VBox status code.
4497 * @retval VINF_SUCCESS on success.
4498 * @retval VERR_NO_MEMORY if we're out of memory.
4499 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4500 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4501 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4502 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4503 *
4504 * @param pSession The SUPDRV session (must be a ring-0 session).
4505 * @param pFactory Pointer to the component factory registration structure.
4506 *
4507 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4508 */
4509SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4510{
4511 PSUPDRVFACTORYREG pNewReg;
4512 const char *psz;
4513 int rc;
4514
4515 /*
4516 * Validate parameters.
4517 */
4518 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4519 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4520 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4521 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4522 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4523 AssertReturn(psz, VERR_INVALID_PARAMETER);
4524
4525 /*
4526 * Allocate and initialize a new registration structure.
4527 */
4528 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4529 if (pNewReg)
4530 {
4531 pNewReg->pNext = NULL;
4532 pNewReg->pFactory = pFactory;
4533 pNewReg->pSession = pSession;
4534 pNewReg->cchName = psz - &pFactory->szName[0];
4535
4536 /*
4537 * Add it to the tail of the list after checking for prior registration.
4538 */
4539 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4540 if (RT_SUCCESS(rc))
4541 {
4542 PSUPDRVFACTORYREG pPrev = NULL;
4543 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4544 while (pCur && pCur->pFactory != pFactory)
4545 {
4546 pPrev = pCur;
4547 pCur = pCur->pNext;
4548 }
4549 if (!pCur)
4550 {
4551 if (pPrev)
4552 pPrev->pNext = pNewReg;
4553 else
4554 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4555 rc = VINF_SUCCESS;
4556 }
4557 else
4558 rc = VERR_ALREADY_EXISTS;
4559
4560 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4561 }
4562
4563 if (RT_FAILURE(rc))
4564 RTMemFree(pNewReg);
4565 }
4566 else
4567 rc = VERR_NO_MEMORY;
4568 return rc;
4569}
4570
4571
4572/**
4573 * Deregister a component factory.
4574 *
4575 * @returns VBox status code.
4576 * @retval VINF_SUCCESS on success.
4577 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4578 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4579 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4580 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4581 *
4582 * @param pSession The SUPDRV session (must be a ring-0 session).
4583 * @param pFactory Pointer to the component factory registration structure
4584 * previously passed SUPR0ComponentRegisterFactory().
4585 *
4586 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4587 */
4588SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4589{
4590 int rc;
4591
4592 /*
4593 * Validate parameters.
4594 */
4595 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4596 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4597 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4598
4599 /*
4600 * Take the lock and look for the registration record.
4601 */
4602 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4603 if (RT_SUCCESS(rc))
4604 {
4605 PSUPDRVFACTORYREG pPrev = NULL;
4606 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4607 while (pCur && pCur->pFactory != pFactory)
4608 {
4609 pPrev = pCur;
4610 pCur = pCur->pNext;
4611 }
4612 if (pCur)
4613 {
4614 if (!pPrev)
4615 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4616 else
4617 pPrev->pNext = pCur->pNext;
4618
4619 pCur->pNext = NULL;
4620 pCur->pFactory = NULL;
4621 pCur->pSession = NULL;
4622 rc = VINF_SUCCESS;
4623 }
4624 else
4625 rc = VERR_NOT_FOUND;
4626
4627 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4628
4629 RTMemFree(pCur);
4630 }
4631 return rc;
4632}
4633
4634
4635/**
4636 * Queries a component factory.
4637 *
4638 * @returns VBox status code.
4639 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4640 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4641 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4642 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4643 *
4644 * @param pSession The SUPDRV session.
4645 * @param pszName The name of the component factory.
4646 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4647 * @param ppvFactoryIf Where to store the factory interface.
4648 */
4649SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4650{
4651 const char *pszEnd;
4652 size_t cchName;
4653 int rc;
4654
4655 /*
4656 * Validate parameters.
4657 */
4658 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4659
4660 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4661 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4662 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4663 cchName = pszEnd - pszName;
4664
4665 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4666 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4667 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4668
4669 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4670 *ppvFactoryIf = NULL;
4671
4672 /*
4673 * Take the lock and try all factories by this name.
4674 */
4675 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4676 if (RT_SUCCESS(rc))
4677 {
4678 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4679 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4680 while (pCur)
4681 {
4682 if ( pCur->cchName == cchName
4683 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4684 {
4685 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4686 if (pvFactory)
4687 {
4688 *ppvFactoryIf = pvFactory;
4689 rc = VINF_SUCCESS;
4690 break;
4691 }
4692 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4693 }
4694
4695 /* next */
4696 pCur = pCur->pNext;
4697 }
4698
4699 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4700 }
4701 return rc;
4702}
4703
4704
4705/**
4706 * Adds a memory object to the session.
4707 *
4708 * @returns IPRT status code.
4709 * @param pMem Memory tracking structure containing the
4710 * information to track.
4711 * @param pSession The session.
4712 */
4713static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4714{
4715 PSUPDRVBUNDLE pBundle;
4716
4717 /*
4718 * Find free entry and record the allocation.
4719 */
4720 RTSpinlockAcquire(pSession->Spinlock);
4721 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4722 {
4723 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4724 {
4725 unsigned i;
4726 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4727 {
4728 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4729 {
4730 pBundle->cUsed++;
4731 pBundle->aMem[i] = *pMem;
4732 RTSpinlockRelease(pSession->Spinlock);
4733 return VINF_SUCCESS;
4734 }
4735 }
4736 AssertFailed(); /* !!this can't be happening!!! */
4737 }
4738 }
4739 RTSpinlockRelease(pSession->Spinlock);
4740
4741 /*
4742 * Need to allocate a new bundle.
4743 * Insert into the last entry in the bundle.
4744 */
4745 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4746 if (!pBundle)
4747 return VERR_NO_MEMORY;
4748
4749 /* take last entry. */
4750 pBundle->cUsed++;
4751 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4752
4753 /* insert into list. */
4754 RTSpinlockAcquire(pSession->Spinlock);
4755 pBundle->pNext = pSession->Bundle.pNext;
4756 pSession->Bundle.pNext = pBundle;
4757 RTSpinlockRelease(pSession->Spinlock);
4758
4759 return VINF_SUCCESS;
4760}
4761
4762
4763/**
4764 * Releases a memory object referenced by pointer and type.
4765 *
4766 * @returns IPRT status code.
4767 * @param pSession Session data.
4768 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4769 * @param eType Memory type.
4770 */
4771static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4772{
4773 PSUPDRVBUNDLE pBundle;
4774
4775 /*
4776 * Validate input.
4777 */
4778 if (!uPtr)
4779 {
4780 Log(("Illegal address %p\n", (void *)uPtr));
4781 return VERR_INVALID_PARAMETER;
4782 }
4783
4784 /*
4785 * Search for the address.
4786 */
4787 RTSpinlockAcquire(pSession->Spinlock);
4788 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4789 {
4790 if (pBundle->cUsed > 0)
4791 {
4792 unsigned i;
4793 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4794 {
4795 if ( pBundle->aMem[i].eType == eType
4796 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4797 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4798 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4799 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4800 )
4801 {
4802 /* Make a copy of it and release it outside the spinlock. */
4803 SUPDRVMEMREF Mem = pBundle->aMem[i];
4804 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4805 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4806 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4807 RTSpinlockRelease(pSession->Spinlock);
4808
4809 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4810 {
4811 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4812 AssertRC(rc); /** @todo figure out how to handle this. */
4813 }
4814 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4815 {
4816 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4817 AssertRC(rc); /** @todo figure out how to handle this. */
4818 }
4819 return VINF_SUCCESS;
4820 }
4821 }
4822 }
4823 }
4824 RTSpinlockRelease(pSession->Spinlock);
4825 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4826 return VERR_INVALID_PARAMETER;
4827}
4828
4829
4830/**
4831 * Opens an image. If it's the first time it's opened the call must upload
4832 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4833 *
4834 * This is the 1st step of the loading.
4835 *
4836 * @returns IPRT status code.
4837 * @param pDevExt Device globals.
4838 * @param pSession Session data.
4839 * @param pReq The open request.
4840 */
4841static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4842{
4843 int rc;
4844 PSUPDRVLDRIMAGE pImage;
4845 void *pv;
4846 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4847 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4848
4849 /*
4850 * Check if we got an instance of the image already.
4851 */
4852 supdrvLdrLock(pDevExt);
4853 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4854 {
4855 if ( pImage->szName[cchName] == '\0'
4856 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4857 {
4858 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4859 {
4860 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4861 pImage->cUsage++;
4862 pReq->u.Out.pvImageBase = pImage->pvImage;
4863 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4864 pReq->u.Out.fNativeLoader = pImage->fNative;
4865 supdrvLdrAddUsage(pSession, pImage);
4866 supdrvLdrUnlock(pDevExt);
4867 return VINF_SUCCESS;
4868 }
4869 supdrvLdrUnlock(pDevExt);
4870 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4871 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4872 }
4873 }
4874 /* (not found - add it!) */
4875
4876 /* If the loader interface is locked down, make userland fail early */
4877 if (pDevExt->fLdrLockedDown)
4878 {
4879 supdrvLdrUnlock(pDevExt);
4880 Log(("supdrvIOCtl_LdrOpen: Not adding '%s' to image list, loader interface is locked down!\n", pReq->u.In.szName));
4881 return VERR_PERMISSION_DENIED;
4882 }
4883
4884 /*
4885 * Allocate memory.
4886 */
4887 Assert(cchName < sizeof(pImage->szName));
4888 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4889 if (!pv)
4890 {
4891 supdrvLdrUnlock(pDevExt);
4892 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4893 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4894 }
4895
4896 /*
4897 * Setup and link in the LDR stuff.
4898 */
4899 pImage = (PSUPDRVLDRIMAGE)pv;
4900 pImage->pvImage = NULL;
4901 pImage->pvImageAlloc = NULL;
4902 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4903 pImage->cbImageBits = pReq->u.In.cbImageBits;
4904 pImage->cSymbols = 0;
4905 pImage->paSymbols = NULL;
4906 pImage->pachStrTab = NULL;
4907 pImage->cbStrTab = 0;
4908 pImage->pfnModuleInit = NULL;
4909 pImage->pfnModuleTerm = NULL;
4910 pImage->pfnServiceReqHandler = NULL;
4911 pImage->uState = SUP_IOCTL_LDR_OPEN;
4912 pImage->cUsage = 1;
4913 pImage->pDevExt = pDevExt;
4914 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4915
4916 /*
4917 * Try load it using the native loader, if that isn't supported, fall back
4918 * on the older method.
4919 */
4920 pImage->fNative = true;
4921 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4922 if (rc == VERR_NOT_SUPPORTED)
4923 {
4924 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4925 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4926 pImage->fNative = false;
4927 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4928 }
4929 if (RT_FAILURE(rc))
4930 {
4931 supdrvLdrUnlock(pDevExt);
4932 RTMemFree(pImage);
4933 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4934 return rc;
4935 }
4936 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4937
4938 /*
4939 * Link it.
4940 */
4941 pImage->pNext = pDevExt->pLdrImages;
4942 pDevExt->pLdrImages = pImage;
4943
4944 supdrvLdrAddUsage(pSession, pImage);
4945
4946 pReq->u.Out.pvImageBase = pImage->pvImage;
4947 pReq->u.Out.fNeedsLoading = true;
4948 pReq->u.Out.fNativeLoader = pImage->fNative;
4949 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4950
4951 supdrvLdrUnlock(pDevExt);
4952 return VINF_SUCCESS;
4953}
4954
4955
4956/**
4957 * Worker that validates a pointer to an image entrypoint.
4958 *
4959 * @returns IPRT status code.
4960 * @param pDevExt The device globals.
4961 * @param pImage The loader image.
4962 * @param pv The pointer into the image.
4963 * @param fMayBeNull Whether it may be NULL.
4964 * @param pszWhat What is this entrypoint? (for logging)
4965 * @param pbImageBits The image bits prepared by ring-3.
4966 *
4967 * @remarks Will leave the lock on failure.
4968 */
4969static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4970 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4971{
4972 if (!fMayBeNull || pv)
4973 {
4974 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4975 {
4976 supdrvLdrUnlock(pDevExt);
4977 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4978 return VERR_INVALID_PARAMETER;
4979 }
4980
4981 if (pImage->fNative)
4982 {
4983 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4984 if (RT_FAILURE(rc))
4985 {
4986 supdrvLdrUnlock(pDevExt);
4987 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4988 return rc;
4989 }
4990 }
4991 }
4992 return VINF_SUCCESS;
4993}
4994
4995
4996/**
4997 * Loads the image bits.
4998 *
4999 * This is the 2nd step of the loading.
5000 *
5001 * @returns IPRT status code.
5002 * @param pDevExt Device globals.
5003 * @param pSession Session data.
5004 * @param pReq The request.
5005 */
5006static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
5007{
5008 PSUPDRVLDRUSAGE pUsage;
5009 PSUPDRVLDRIMAGE pImage;
5010 int rc;
5011 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
5012
5013 /*
5014 * Find the ldr image.
5015 */
5016 supdrvLdrLock(pDevExt);
5017 pUsage = pSession->pLdrUsage;
5018 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5019 pUsage = pUsage->pNext;
5020 if (!pUsage)
5021 {
5022 supdrvLdrUnlock(pDevExt);
5023 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
5024 return VERR_INVALID_HANDLE;
5025 }
5026 pImage = pUsage->pImage;
5027
5028 /*
5029 * Validate input.
5030 */
5031 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
5032 || pImage->cbImageBits != pReq->u.In.cbImageBits)
5033 {
5034 supdrvLdrUnlock(pDevExt);
5035 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
5036 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
5037 return VERR_INVALID_HANDLE;
5038 }
5039
5040 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
5041 {
5042 unsigned uState = pImage->uState;
5043 supdrvLdrUnlock(pDevExt);
5044 if (uState != SUP_IOCTL_LDR_LOAD)
5045 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
5046 return VERR_ALREADY_LOADED;
5047 }
5048
5049 /* If the loader interface is locked down, don't load new images */
5050 if (pDevExt->fLdrLockedDown)
5051 {
5052 supdrvLdrUnlock(pDevExt);
5053 Log(("SUP_IOCTL_LDR_LOAD: Not loading '%s' image bits, loader interface is locked down!\n", pImage->szName));
5054 return VERR_PERMISSION_DENIED;
5055 }
5056
5057 switch (pReq->u.In.eEPType)
5058 {
5059 case SUPLDRLOADEP_NOTHING:
5060 break;
5061
5062 case SUPLDRLOADEP_VMMR0:
5063 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
5064 if (RT_SUCCESS(rc))
5065 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
5066 if (RT_SUCCESS(rc))
5067 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
5068 if (RT_SUCCESS(rc))
5069 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
5070 if (RT_FAILURE(rc))
5071 return rc;
5072 break;
5073
5074 case SUPLDRLOADEP_SERVICE:
5075 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
5076 if (RT_FAILURE(rc))
5077 return rc;
5078 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
5079 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
5080 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
5081 {
5082 supdrvLdrUnlock(pDevExt);
5083 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
5084 pImage->pvImage, pReq->u.In.cbImageWithTabs,
5085 pReq->u.In.EP.Service.apvReserved[0],
5086 pReq->u.In.EP.Service.apvReserved[1],
5087 pReq->u.In.EP.Service.apvReserved[2]));
5088 return VERR_INVALID_PARAMETER;
5089 }
5090 break;
5091
5092 default:
5093 supdrvLdrUnlock(pDevExt);
5094 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
5095 return VERR_INVALID_PARAMETER;
5096 }
5097
5098 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
5099 if (RT_FAILURE(rc))
5100 return rc;
5101 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
5102 if (RT_FAILURE(rc))
5103 return rc;
5104
5105 /*
5106 * Allocate and copy the tables.
5107 * (No need to do try/except as this is a buffered request.)
5108 */
5109 pImage->cbStrTab = pReq->u.In.cbStrTab;
5110 if (pImage->cbStrTab)
5111 {
5112 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
5113 if (pImage->pachStrTab)
5114 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
5115 else
5116 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
5117 }
5118
5119 pImage->cSymbols = pReq->u.In.cSymbols;
5120 if (RT_SUCCESS(rc) && pImage->cSymbols)
5121 {
5122 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
5123 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
5124 if (pImage->paSymbols)
5125 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
5126 else
5127 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
5128 }
5129
5130 /*
5131 * Copy the bits / complete native loading.
5132 */
5133 if (RT_SUCCESS(rc))
5134 {
5135 pImage->uState = SUP_IOCTL_LDR_LOAD;
5136 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
5137 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
5138
5139 if (pImage->fNative)
5140 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
5141 else
5142 {
5143 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
5144 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
5145 }
5146 }
5147
5148 /*
5149 * Update any entry points.
5150 */
5151 if (RT_SUCCESS(rc))
5152 {
5153 switch (pReq->u.In.eEPType)
5154 {
5155 default:
5156 case SUPLDRLOADEP_NOTHING:
5157 rc = VINF_SUCCESS;
5158 break;
5159 case SUPLDRLOADEP_VMMR0:
5160 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
5161 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
5162 break;
5163 case SUPLDRLOADEP_SERVICE:
5164 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
5165 rc = VINF_SUCCESS;
5166 break;
5167 }
5168 }
5169
5170 /*
5171 * On success call the module initialization.
5172 */
5173 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
5174 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
5175 {
5176 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
5177 pDevExt->pLdrInitImage = pImage;
5178 pDevExt->hLdrInitThread = RTThreadNativeSelf();
5179 rc = pImage->pfnModuleInit(pImage);
5180 pDevExt->pLdrInitImage = NULL;
5181 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
5182 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
5183 supdrvLdrUnsetVMMR0EPs(pDevExt);
5184 }
5185 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
5186
5187 if (RT_FAILURE(rc))
5188 {
5189 /* Inform the tracing component in case ModuleInit registered TPs. */
5190 supdrvTracerModuleUnloading(pDevExt, pImage);
5191
5192 pImage->uState = SUP_IOCTL_LDR_OPEN;
5193 pImage->pfnModuleInit = NULL;
5194 pImage->pfnModuleTerm = NULL;
5195 pImage->pfnServiceReqHandler= NULL;
5196 pImage->cbStrTab = 0;
5197 RTMemFree(pImage->pachStrTab);
5198 pImage->pachStrTab = NULL;
5199 RTMemFree(pImage->paSymbols);
5200 pImage->paSymbols = NULL;
5201 pImage->cSymbols = 0;
5202 }
5203
5204 supdrvLdrUnlock(pDevExt);
5205 return rc;
5206}
5207
5208
5209/**
5210 * Frees a previously loaded (prep'ed) image.
5211 *
5212 * @returns IPRT status code.
5213 * @param pDevExt Device globals.
5214 * @param pSession Session data.
5215 * @param pReq The request.
5216 */
5217static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
5218{
5219 int rc;
5220 PSUPDRVLDRUSAGE pUsagePrev;
5221 PSUPDRVLDRUSAGE pUsage;
5222 PSUPDRVLDRIMAGE pImage;
5223 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
5224
5225 /*
5226 * Find the ldr image.
5227 */
5228 supdrvLdrLock(pDevExt);
5229 pUsagePrev = NULL;
5230 pUsage = pSession->pLdrUsage;
5231 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5232 {
5233 pUsagePrev = pUsage;
5234 pUsage = pUsage->pNext;
5235 }
5236 if (!pUsage)
5237 {
5238 supdrvLdrUnlock(pDevExt);
5239 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
5240 return VERR_INVALID_HANDLE;
5241 }
5242
5243 /*
5244 * Check if we can remove anything.
5245 */
5246 rc = VINF_SUCCESS;
5247 pImage = pUsage->pImage;
5248 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
5249 {
5250 /*
5251 * Check if there are any objects with destructors in the image, if
5252 * so leave it for the session cleanup routine so we get a chance to
5253 * clean things up in the right order and not leave them all dangling.
5254 */
5255 RTSpinlockAcquire(pDevExt->Spinlock);
5256 if (pImage->cUsage <= 1)
5257 {
5258 PSUPDRVOBJ pObj;
5259 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5260 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5261 {
5262 rc = VERR_DANGLING_OBJECTS;
5263 break;
5264 }
5265 }
5266 else
5267 {
5268 PSUPDRVUSAGE pGenUsage;
5269 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
5270 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5271 {
5272 rc = VERR_DANGLING_OBJECTS;
5273 break;
5274 }
5275 }
5276 RTSpinlockRelease(pDevExt->Spinlock);
5277 if (rc == VINF_SUCCESS)
5278 {
5279 /* unlink it */
5280 if (pUsagePrev)
5281 pUsagePrev->pNext = pUsage->pNext;
5282 else
5283 pSession->pLdrUsage = pUsage->pNext;
5284
5285 /* free it */
5286 pUsage->pImage = NULL;
5287 pUsage->pNext = NULL;
5288 RTMemFree(pUsage);
5289
5290 /*
5291 * Dereference the image.
5292 */
5293 if (pImage->cUsage <= 1)
5294 supdrvLdrFree(pDevExt, pImage);
5295 else
5296 pImage->cUsage--;
5297 }
5298 else
5299 {
5300 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
5301 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
5302 }
5303 }
5304 else
5305 {
5306 /*
5307 * Dereference both image and usage.
5308 */
5309 pImage->cUsage--;
5310 pUsage->cUsage--;
5311 }
5312
5313 supdrvLdrUnlock(pDevExt);
5314 return rc;
5315}
5316
5317
5318/**
5319 * Lock down the image loader interface.
5320 *
5321 * @returns IPRT status code.
5322 * @param pDevExt Device globals.
5323 */
5324static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt)
5325{
5326 LogFlow(("supdrvIOCtl_LdrLockDown:\n"));
5327
5328 supdrvLdrLock(pDevExt);
5329 if (!pDevExt->fLdrLockedDown)
5330 {
5331 pDevExt->fLdrLockedDown = true;
5332 Log(("supdrvIOCtl_LdrLockDown: Image loader interface locked down\n"));
5333 }
5334 supdrvLdrUnlock(pDevExt);
5335
5336 return VINF_SUCCESS;
5337}
5338
5339
5340/**
5341 * Gets the address of a symbol in an open image.
5342 *
5343 * @returns IPRT status code.
5344 * @param pDevExt Device globals.
5345 * @param pSession Session data.
5346 * @param pReq The request buffer.
5347 */
5348static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
5349{
5350 PSUPDRVLDRIMAGE pImage;
5351 PSUPDRVLDRUSAGE pUsage;
5352 uint32_t i;
5353 PSUPLDRSYM paSyms;
5354 const char *pchStrings;
5355 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5356 void *pvSymbol = NULL;
5357 int rc = VERR_GENERAL_FAILURE;
5358 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5359
5360 /*
5361 * Find the ldr image.
5362 */
5363 supdrvLdrLock(pDevExt);
5364 pUsage = pSession->pLdrUsage;
5365 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5366 pUsage = pUsage->pNext;
5367 if (!pUsage)
5368 {
5369 supdrvLdrUnlock(pDevExt);
5370 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5371 return VERR_INVALID_HANDLE;
5372 }
5373 pImage = pUsage->pImage;
5374 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5375 {
5376 unsigned uState = pImage->uState;
5377 supdrvLdrUnlock(pDevExt);
5378 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5379 return VERR_ALREADY_LOADED;
5380 }
5381
5382 /*
5383 * Search the symbol strings.
5384 *
5385 * Note! The int32_t is for native loading on solaris where the data
5386 * and text segments are in very different places.
5387 */
5388 pchStrings = pImage->pachStrTab;
5389 paSyms = pImage->paSymbols;
5390 for (i = 0; i < pImage->cSymbols; i++)
5391 {
5392 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5393 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5394 {
5395 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5396 rc = VINF_SUCCESS;
5397 break;
5398 }
5399 }
5400 supdrvLdrUnlock(pDevExt);
5401 pReq->u.Out.pvSymbol = pvSymbol;
5402 return rc;
5403}
5404
5405
5406/**
5407 * Gets the address of a symbol in an open image or the support driver.
5408 *
5409 * @returns VINF_SUCCESS on success.
5410 * @returns
5411 * @param pDevExt Device globals.
5412 * @param pSession Session data.
5413 * @param pReq The request buffer.
5414 */
5415static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5416{
5417 int rc = VINF_SUCCESS;
5418 const char *pszSymbol = pReq->u.In.pszSymbol;
5419 const char *pszModule = pReq->u.In.pszModule;
5420 size_t cbSymbol;
5421 char const *pszEnd;
5422 uint32_t i;
5423
5424 /*
5425 * Input validation.
5426 */
5427 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5428 pszEnd = RTStrEnd(pszSymbol, 512);
5429 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5430 cbSymbol = pszEnd - pszSymbol + 1;
5431
5432 if (pszModule)
5433 {
5434 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5435 pszEnd = RTStrEnd(pszModule, 64);
5436 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5437 }
5438 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5439
5440
5441 if ( !pszModule
5442 || !strcmp(pszModule, "SupDrv"))
5443 {
5444 /*
5445 * Search the support driver export table.
5446 */
5447 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5448 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5449 {
5450 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5451 break;
5452 }
5453 }
5454 else
5455 {
5456 /*
5457 * Find the loader image.
5458 */
5459 PSUPDRVLDRIMAGE pImage;
5460
5461 supdrvLdrLock(pDevExt);
5462
5463 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5464 if (!strcmp(pImage->szName, pszModule))
5465 break;
5466 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5467 {
5468 /*
5469 * Search the symbol strings.
5470 */
5471 const char *pchStrings = pImage->pachStrTab;
5472 PCSUPLDRSYM paSyms = pImage->paSymbols;
5473 for (i = 0; i < pImage->cSymbols; i++)
5474 {
5475 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5476 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5477 {
5478 /*
5479 * Found it! Calc the symbol address and add a reference to the module.
5480 */
5481 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5482 rc = supdrvLdrAddUsage(pSession, pImage);
5483 break;
5484 }
5485 }
5486 }
5487 else
5488 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5489
5490 supdrvLdrUnlock(pDevExt);
5491 }
5492 return rc;
5493}
5494
5495
5496/**
5497 * Updates the VMMR0 entry point pointers.
5498 *
5499 * @returns IPRT status code.
5500 * @param pDevExt Device globals.
5501 * @param pSession Session data.
5502 * @param pVMMR0 VMMR0 image handle.
5503 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5504 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5505 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5506 * @remark Caller must own the loader mutex.
5507 */
5508static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5509{
5510 int rc = VINF_SUCCESS;
5511 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5512
5513
5514 /*
5515 * Check if not yet set.
5516 */
5517 if (!pDevExt->pvVMMR0)
5518 {
5519 pDevExt->pvVMMR0 = pvVMMR0;
5520 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5521 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5522 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5523 }
5524 else
5525 {
5526 /*
5527 * Return failure or success depending on whether the values match or not.
5528 */
5529 if ( pDevExt->pvVMMR0 != pvVMMR0
5530 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5531 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5532 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5533 {
5534 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5535 rc = VERR_INVALID_PARAMETER;
5536 }
5537 }
5538 return rc;
5539}
5540
5541
5542/**
5543 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5544 *
5545 * @param pDevExt Device globals.
5546 */
5547static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5548{
5549 pDevExt->pvVMMR0 = NULL;
5550 pDevExt->pfnVMMR0EntryInt = NULL;
5551 pDevExt->pfnVMMR0EntryFast = NULL;
5552 pDevExt->pfnVMMR0EntryEx = NULL;
5553}
5554
5555
5556/**
5557 * Adds a usage reference in the specified session of an image.
5558 *
5559 * Called while owning the loader semaphore.
5560 *
5561 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5562 * @param pSession Session in question.
5563 * @param pImage Image which the session is using.
5564 */
5565static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5566{
5567 PSUPDRVLDRUSAGE pUsage;
5568 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5569
5570 /*
5571 * Referenced it already?
5572 */
5573 pUsage = pSession->pLdrUsage;
5574 while (pUsage)
5575 {
5576 if (pUsage->pImage == pImage)
5577 {
5578 pUsage->cUsage++;
5579 return VINF_SUCCESS;
5580 }
5581 pUsage = pUsage->pNext;
5582 }
5583
5584 /*
5585 * Allocate new usage record.
5586 */
5587 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5588 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5589 pUsage->cUsage = 1;
5590 pUsage->pImage = pImage;
5591 pUsage->pNext = pSession->pLdrUsage;
5592 pSession->pLdrUsage = pUsage;
5593 return VINF_SUCCESS;
5594}
5595
5596
5597/**
5598 * Frees a load image.
5599 *
5600 * @param pDevExt Pointer to device extension.
5601 * @param pImage Pointer to the image we're gonna free.
5602 * This image must exit!
5603 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5604 */
5605static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5606{
5607 PSUPDRVLDRIMAGE pImagePrev;
5608 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5609
5610 /*
5611 * Warn if we're releasing images while the image loader interface is
5612 * locked down -- we won't be able to reload them!
5613 */
5614 if (pDevExt->fLdrLockedDown)
5615 Log(("supdrvLdrFree: Warning: unloading '%s' image, while loader interface is locked down!\n", pImage->szName));
5616
5617 /* find it - arg. should've used doubly linked list. */
5618 Assert(pDevExt->pLdrImages);
5619 pImagePrev = NULL;
5620 if (pDevExt->pLdrImages != pImage)
5621 {
5622 pImagePrev = pDevExt->pLdrImages;
5623 while (pImagePrev->pNext != pImage)
5624 pImagePrev = pImagePrev->pNext;
5625 Assert(pImagePrev->pNext == pImage);
5626 }
5627
5628 /* unlink */
5629 if (pImagePrev)
5630 pImagePrev->pNext = pImage->pNext;
5631 else
5632 pDevExt->pLdrImages = pImage->pNext;
5633
5634 /* check if this is VMMR0.r0 unset its entry point pointers. */
5635 if (pDevExt->pvVMMR0 == pImage->pvImage)
5636 supdrvLdrUnsetVMMR0EPs(pDevExt);
5637
5638 /* check for objects with destructors in this image. (Shouldn't happen.) */
5639 if (pDevExt->pObjs)
5640 {
5641 unsigned cObjs = 0;
5642 PSUPDRVOBJ pObj;
5643 RTSpinlockAcquire(pDevExt->Spinlock);
5644 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5645 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5646 {
5647 pObj->pfnDestructor = NULL;
5648 cObjs++;
5649 }
5650 RTSpinlockRelease(pDevExt->Spinlock);
5651 if (cObjs)
5652 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5653 }
5654
5655 /* call termination function if fully loaded. */
5656 if ( pImage->pfnModuleTerm
5657 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5658 {
5659 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5660 pImage->pfnModuleTerm(pImage);
5661 }
5662
5663 /* Inform the tracing component. */
5664 supdrvTracerModuleUnloading(pDevExt, pImage);
5665
5666 /* do native unload if appropriate. */
5667 if (pImage->fNative)
5668 supdrvOSLdrUnload(pDevExt, pImage);
5669
5670 /* free the image */
5671 pImage->cUsage = 0;
5672 pImage->pDevExt = NULL;
5673 pImage->pNext = NULL;
5674 pImage->uState = SUP_IOCTL_LDR_FREE;
5675 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5676 pImage->pvImageAlloc = NULL;
5677 RTMemFree(pImage->pachStrTab);
5678 pImage->pachStrTab = NULL;
5679 RTMemFree(pImage->paSymbols);
5680 pImage->paSymbols = NULL;
5681 RTMemFree(pImage);
5682}
5683
5684
5685/**
5686 * Acquires the loader lock.
5687 *
5688 * @returns IPRT status code.
5689 * @param pDevExt The device extension.
5690 */
5691DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5692{
5693#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5694 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5695#else
5696 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5697#endif
5698 AssertRC(rc);
5699 return rc;
5700}
5701
5702
5703/**
5704 * Releases the loader lock.
5705 *
5706 * @returns IPRT status code.
5707 * @param pDevExt The device extension.
5708 */
5709DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5710{
5711#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5712 return RTSemMutexRelease(pDevExt->mtxLdr);
5713#else
5714 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5715#endif
5716}
5717
5718
5719/**
5720 * Implements the service call request.
5721 *
5722 * @returns VBox status code.
5723 * @param pDevExt The device extension.
5724 * @param pSession The calling session.
5725 * @param pReq The request packet, valid.
5726 */
5727static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5728{
5729#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5730 int rc;
5731
5732 /*
5733 * Find the module first in the module referenced by the calling session.
5734 */
5735 rc = supdrvLdrLock(pDevExt);
5736 if (RT_SUCCESS(rc))
5737 {
5738 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5739 PSUPDRVLDRUSAGE pUsage;
5740
5741 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5742 if ( pUsage->pImage->pfnServiceReqHandler
5743 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5744 {
5745 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5746 break;
5747 }
5748 supdrvLdrUnlock(pDevExt);
5749
5750 if (pfnServiceReqHandler)
5751 {
5752 /*
5753 * Call it.
5754 */
5755 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5756 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5757 else
5758 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5759 }
5760 else
5761 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5762 }
5763
5764 /* log it */
5765 if ( RT_FAILURE(rc)
5766 && rc != VERR_INTERRUPTED
5767 && rc != VERR_TIMEOUT)
5768 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5769 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5770 else
5771 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5772 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5773 return rc;
5774#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5775 return VERR_NOT_IMPLEMENTED;
5776#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5777}
5778
5779
5780/**
5781 * Implements the logger settings request.
5782 *
5783 * @returns VBox status code.
5784 * @param pDevExt The device extension.
5785 * @param pSession The caller's session.
5786 * @param pReq The request.
5787 */
5788static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5789{
5790 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5791 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5792 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5793 PRTLOGGER pLogger = NULL;
5794 int rc;
5795
5796 /*
5797 * Some further validation.
5798 */
5799 switch (pReq->u.In.fWhat)
5800 {
5801 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5802 case SUPLOGGERSETTINGS_WHAT_CREATE:
5803 break;
5804
5805 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5806 if (*pszGroup || *pszFlags || *pszDest)
5807 return VERR_INVALID_PARAMETER;
5808 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5809 return VERR_ACCESS_DENIED;
5810 break;
5811
5812 default:
5813 return VERR_INTERNAL_ERROR;
5814 }
5815
5816 /*
5817 * Get the logger.
5818 */
5819 switch (pReq->u.In.fWhich)
5820 {
5821 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5822 pLogger = RTLogGetDefaultInstance();
5823 break;
5824
5825 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5826 pLogger = RTLogRelDefaultInstance();
5827 break;
5828
5829 default:
5830 return VERR_INTERNAL_ERROR;
5831 }
5832
5833 /*
5834 * Do the job.
5835 */
5836 switch (pReq->u.In.fWhat)
5837 {
5838 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5839 if (pLogger)
5840 {
5841 rc = RTLogFlags(pLogger, pszFlags);
5842 if (RT_SUCCESS(rc))
5843 rc = RTLogGroupSettings(pLogger, pszGroup);
5844 NOREF(pszDest);
5845 }
5846 else
5847 rc = VERR_NOT_FOUND;
5848 break;
5849
5850 case SUPLOGGERSETTINGS_WHAT_CREATE:
5851 {
5852 if (pLogger)
5853 rc = VERR_ALREADY_EXISTS;
5854 else
5855 {
5856 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5857
5858 rc = RTLogCreate(&pLogger,
5859 0 /* fFlags */,
5860 pszGroup,
5861 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5862 ? "VBOX_LOG"
5863 : "VBOX_RELEASE_LOG",
5864 RT_ELEMENTS(s_apszGroups),
5865 s_apszGroups,
5866 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5867 NULL);
5868 if (RT_SUCCESS(rc))
5869 {
5870 rc = RTLogFlags(pLogger, pszFlags);
5871 NOREF(pszDest);
5872 if (RT_SUCCESS(rc))
5873 {
5874 switch (pReq->u.In.fWhich)
5875 {
5876 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5877 pLogger = RTLogSetDefaultInstance(pLogger);
5878 break;
5879 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5880 pLogger = RTLogRelSetDefaultInstance(pLogger);
5881 break;
5882 }
5883 }
5884 RTLogDestroy(pLogger);
5885 }
5886 }
5887 break;
5888 }
5889
5890 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5891 switch (pReq->u.In.fWhich)
5892 {
5893 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5894 pLogger = RTLogSetDefaultInstance(NULL);
5895 break;
5896 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5897 pLogger = RTLogRelSetDefaultInstance(NULL);
5898 break;
5899 }
5900 rc = RTLogDestroy(pLogger);
5901 break;
5902
5903 default:
5904 {
5905 rc = VERR_INTERNAL_ERROR;
5906 break;
5907 }
5908 }
5909
5910 return rc;
5911}
5912
5913
5914/**
5915 * Implements the MSR prober operations.
5916 *
5917 * @returns VBox status code.
5918 * @param pDevExt The device extension.
5919 * @param pReq The request.
5920 */
5921static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5922{
5923#ifdef SUPDRV_WITH_MSR_PROBER
5924 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5925 int rc;
5926
5927 switch (pReq->u.In.enmOp)
5928 {
5929 case SUPMSRPROBEROP_READ:
5930 {
5931 uint64_t uValue;
5932 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5933 if (RT_SUCCESS(rc))
5934 {
5935 pReq->u.Out.uResults.Read.uValue = uValue;
5936 pReq->u.Out.uResults.Read.fGp = false;
5937 }
5938 else if (rc == VERR_ACCESS_DENIED)
5939 {
5940 pReq->u.Out.uResults.Read.uValue = 0;
5941 pReq->u.Out.uResults.Read.fGp = true;
5942 rc = VINF_SUCCESS;
5943 }
5944 break;
5945 }
5946
5947 case SUPMSRPROBEROP_WRITE:
5948 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5949 if (RT_SUCCESS(rc))
5950 pReq->u.Out.uResults.Write.fGp = false;
5951 else if (rc == VERR_ACCESS_DENIED)
5952 {
5953 pReq->u.Out.uResults.Write.fGp = true;
5954 rc = VINF_SUCCESS;
5955 }
5956 break;
5957
5958 case SUPMSRPROBEROP_MODIFY:
5959 case SUPMSRPROBEROP_MODIFY_FASTER:
5960 rc = supdrvOSMsrProberModify(idCpu, pReq);
5961 break;
5962
5963 default:
5964 return VERR_INVALID_FUNCTION;
5965 }
5966 return rc;
5967#else
5968 return VERR_NOT_IMPLEMENTED;
5969#endif
5970}
5971
5972#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5973
5974/**
5975 * Switches the TSC-delta measurement thread into the butchered state.
5976 *
5977 * @returns VBox status code.
5978 * @param pDevExt Pointer to the device instance data.
5979 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5980 * @param pszFailed An error message to log.
5981 * @param rcFailed The error code to exit the thread with.
5982 */
5983static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5984{
5985 if (!fSpinlockHeld)
5986 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5987
5988 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
5989 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5990 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5991 return rcFailed;
5992}
5993
5994
5995/**
5996 * The TSC-delta measurement thread.
5997 *
5998 * @returns VBox status code.
5999 * @param hThread The thread handle.
6000 * @param pvUser Opaque pointer to the device instance data.
6001 */
6002static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
6003{
6004 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6005 bool fInitialMeasurement = true;
6006 uint32_t cConsecutiveTimeouts = 0;
6007 int rc = VERR_INTERNAL_ERROR_2;
6008 for (;;)
6009 {
6010 /*
6011 * Switch on the current state.
6012 */
6013 SUPDRVTSCDELTATHREADSTATE enmState;
6014 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6015 enmState = pDevExt->enmTscDeltaThreadState;
6016 switch (enmState)
6017 {
6018 case kTscDeltaThreadState_Creating:
6019 {
6020 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
6021 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
6022 if (RT_FAILURE(rc))
6023 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
6024 /* fall thru */
6025 }
6026
6027 case kTscDeltaThreadState_Listening:
6028 {
6029 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6030
6031 /* Simple adaptive timeout. */
6032 if (cConsecutiveTimeouts++ == 10)
6033 {
6034 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
6035 pDevExt->cMsTscDeltaTimeout = 10;
6036 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
6037 pDevExt->cMsTscDeltaTimeout = 100;
6038 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
6039 pDevExt->cMsTscDeltaTimeout = 500;
6040 cConsecutiveTimeouts = 0;
6041 }
6042 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
6043 if ( RT_FAILURE(rc)
6044 && rc != VERR_TIMEOUT)
6045 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
6046 RTThreadUserReset(pDevExt->hTscDeltaThread);
6047 break;
6048 }
6049
6050 case kTscDeltaThreadState_WaitAndMeasure:
6051 {
6052 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
6053 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
6054 if (RT_FAILURE(rc))
6055 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
6056 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6057 pDevExt->cMsTscDeltaTimeout = 1;
6058 RTThreadSleep(10);
6059 /* fall thru */
6060 }
6061
6062 case kTscDeltaThreadState_Measuring:
6063 {
6064 cConsecutiveTimeouts = 0;
6065 if (fInitialMeasurement)
6066 {
6067 int cTries = 8;
6068 int cMsWaitPerTry = 10;
6069 fInitialMeasurement = false;
6070 do
6071 {
6072 rc = supdrvMeasureInitialTscDeltas(pDevExt);
6073 if ( RT_SUCCESS(rc)
6074 || ( RT_FAILURE(rc)
6075 && rc != VERR_TRY_AGAIN
6076 && rc != VERR_CPU_OFFLINE))
6077 {
6078 break;
6079 }
6080 RTThreadSleep(cMsWaitPerTry);
6081 } while (cTries-- > 0);
6082 }
6083 else
6084 {
6085 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6086 unsigned iCpu;
6087
6088 /* Measure TSC-deltas only for the CPUs that are in the set. */
6089 rc = VINF_SUCCESS;
6090 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6091 {
6092 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
6093 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
6094 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
6095 {
6096 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
6097 }
6098 }
6099 }
6100 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6101 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
6102 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
6103 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6104 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as the initial value. */
6105 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
6106 break;
6107 }
6108
6109 case kTscDeltaThreadState_Terminating:
6110 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
6111 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6112 return VINF_SUCCESS;
6113
6114 case kTscDeltaThreadState_Butchered:
6115 default:
6116 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
6117 }
6118 }
6119
6120 return rc;
6121}
6122
6123
6124/**
6125 * Waits for the TSC-delta measurement thread to respond to a state change.
6126 *
6127 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
6128 * other error code on internal error.
6129 *
6130 * @param pThis Pointer to the grant service instance data.
6131 * @param enmCurState The current state.
6132 * @param enmNewState The new state we're waiting for it to enter.
6133 */
6134static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
6135 SUPDRVTSCDELTATHREADSTATE enmNewState)
6136{
6137 /*
6138 * Wait a short while for the expected state transition.
6139 */
6140 int rc;
6141 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
6142 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6143 if (pDevExt->enmTscDeltaThreadState == enmNewState)
6144 {
6145 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6146 rc = VINF_SUCCESS;
6147 }
6148 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
6149 {
6150 /*
6151 * Wait longer if the state has not yet transitioned to the one we want.
6152 */
6153 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6154 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
6155 if ( RT_SUCCESS(rc)
6156 || rc == VERR_TIMEOUT)
6157 {
6158 /*
6159 * Check the state whether we've succeeded.
6160 */
6161 SUPDRVTSCDELTATHREADSTATE enmState;
6162 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6163 enmState = pDevExt->enmTscDeltaThreadState;
6164 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6165 if (enmState == enmNewState)
6166 rc = VINF_SUCCESS;
6167 else if (enmState == enmCurState)
6168 {
6169 rc = VERR_TIMEOUT;
6170 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
6171 enmNewState));
6172 }
6173 else
6174 {
6175 rc = VERR_INTERNAL_ERROR;
6176 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
6177 enmState, enmNewState));
6178 }
6179 }
6180 else
6181 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
6182 }
6183 else
6184 {
6185 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6186 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
6187 rc = VERR_INTERNAL_ERROR;
6188 }
6189
6190 return rc;
6191}
6192
6193
6194/**
6195 * Terminates the TSC-delta measurement thread.
6196 *
6197 * @param pDevExt Pointer to the device instance data.
6198 */
6199static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
6200{
6201 int rc;
6202 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6203 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
6204 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6205 RTThreadUserSignal(pDevExt->hTscDeltaThread);
6206 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
6207 if (RT_FAILURE(rc))
6208 {
6209 /* Signal a few more times before giving up. */
6210 int cTriesLeft = 5;
6211 while (--cTriesLeft > 0)
6212 {
6213 RTThreadUserSignal(pDevExt->hTscDeltaThread);
6214 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
6215 if (rc != VERR_TIMEOUT)
6216 break;
6217 }
6218 }
6219}
6220
6221
6222/**
6223 * Initializes and spawns the TSC-delta measurement thread.
6224 *
6225 * A thread is required for servicing re-measurement requests from events like
6226 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
6227 * under all contexts on all OSs.
6228 *
6229 * @returns VBox status code.
6230 * @param pDevExt Pointer to the device instance data.
6231 *
6232 * @remarks Must only be called -after- initializing GIP and setting up MP
6233 * notifications!
6234 */
6235static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
6236{
6237 int rc;
6238 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
6239 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
6240 if (RT_SUCCESS(rc))
6241 {
6242 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
6243 if (RT_SUCCESS(rc))
6244 {
6245 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
6246 pDevExt->cMsTscDeltaTimeout = 1;
6247 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
6248 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
6249 if (RT_SUCCESS(rc))
6250 {
6251 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
6252 if (RT_SUCCESS(rc))
6253 {
6254 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
6255 return rc;
6256 }
6257
6258 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
6259 supdrvTscDeltaThreadTerminate(pDevExt);
6260 }
6261 else
6262 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
6263 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
6264 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
6265 }
6266 else
6267 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
6268 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
6269 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
6270 }
6271 else
6272 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
6273
6274 return rc;
6275}
6276
6277
6278/**
6279 * Terminates the TSC-delta measurement thread and cleanup.
6280 *
6281 * @param pDevExt Pointer to the device instance data.
6282 */
6283static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
6284{
6285 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
6286 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
6287 {
6288 supdrvTscDeltaThreadTerminate(pDevExt);
6289 }
6290
6291 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
6292 {
6293 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
6294 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
6295 }
6296
6297 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
6298 {
6299 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
6300 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
6301 }
6302
6303 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
6304}
6305
6306
6307/**
6308 * Waits for TSC-delta measurements to be completed for all online CPUs.
6309 *
6310 * @returns VBox status code.
6311 * @param pDevExt Pointer to the device instance data.
6312 */
6313static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
6314{
6315 int cTriesLeft = 5;
6316 int cMsTotalWait;
6317 int cMsWaited = 0;
6318 int cMsWaitGranularity = 1;
6319
6320 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6321 AssertReturn(pGip, VERR_INVALID_POINTER);
6322
6323 if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD))
6324 return VERR_THREAD_NOT_WAITABLE;
6325
6326 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 10, 200);
6327 while (cTriesLeft-- > 0)
6328 {
6329 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
6330 return VINF_SUCCESS;
6331 RTThreadSleep(cMsWaitGranularity);
6332 cMsWaited += cMsWaitGranularity;
6333 if (cMsWaited >= cMsTotalWait)
6334 break;
6335 }
6336
6337 return VERR_TIMEOUT;
6338}
6339
6340#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
6341
6342/**
6343 * Applies the TSC delta to the supplied raw TSC value.
6344 *
6345 * @returns VBox status code. (Ignored by all users, just FYI.)
6346 * @param pGip Pointer to the GIP.
6347 * @param puTsc Pointer to a valid TSC value before the TSC delta has been applied.
6348 * @param idApic The APIC ID of the CPU @c puTsc corresponds to.
6349 * @param fDeltaApplied Where to store whether the TSC delta was succesfully
6350 * applied or not (optional, can be NULL).
6351 *
6352 * @remarks Maybe called with interrupts disabled in ring-0!
6353 *
6354 * @note Don't you dare change the delta calculation. If you really do, make
6355 * sure you update all places where it's used (IPRT, SUPLibAll.cpp,
6356 * SUPDrv.c, supdrvGipMpEvent, and more).
6357 */
6358DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied)
6359{
6360 int rc;
6361
6362 /*
6363 * Validate input.
6364 */
6365 AssertPtr(puTsc);
6366 AssertPtr(pGip);
6367 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
6368
6369 /*
6370 * Carefully convert the idApic into a GIPCPU entry.
6371 */
6372 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
6373 {
6374 uint16_t iCpu = pGip->aiCpuFromApicId[idApic];
6375 if (RT_LIKELY(iCpu < pGip->cCpus))
6376 {
6377 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
6378
6379 /*
6380 * Apply the delta if valid.
6381 */
6382 if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX))
6383 {
6384 *puTsc -= pGipCpu->i64TSCDelta;
6385 if (pfDeltaApplied)
6386 *pfDeltaApplied = true;
6387 return VINF_SUCCESS;
6388 }
6389
6390 rc = VINF_SUCCESS;
6391 }
6392 else
6393 {
6394 AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus));
6395 rc = VERR_INVALID_CPU_INDEX;
6396 }
6397 }
6398 else
6399 {
6400 AssertMsgFailed(("idApic=%u\n", idApic));
6401 rc = VERR_INVALID_CPU_ID;
6402 }
6403 if (pfDeltaApplied)
6404 *pfDeltaApplied = false;
6405 return rc;
6406}
6407
6408
6409/**
6410 * Measures the TSC frequency of the system.
6411 *
6412 * Uses a busy-wait method for the async. case as it is intended to help push
6413 * the CPU frequency up, while for the invariant cases using a sleeping method.
6414 *
6415 * The TSC frequency can vary on systems which are not reported as invariant.
6416 * On such systems the object of this function is to find out what the nominal,
6417 * maximum TSC frequency under 'normal' CPU operation.
6418 *
6419 * @returns VBox status code.
6420 * @param pDevExt Pointer to the device instance.
6421 *
6422 * @remarks Must be called only -after- measuring the TSC deltas.
6423 */
6424static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6425{
6426 int cTriesLeft = 4;
6427 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6428
6429 /* Assert order. */
6430 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6431 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6432
6433 while (cTriesLeft-- > 0)
6434 {
6435 RTCCUINTREG uFlags;
6436 uint64_t u64NanoTsBefore;
6437 uint64_t u64NanoTsAfter;
6438 uint64_t u64TscBefore;
6439 uint64_t u64TscAfter;
6440 uint8_t idApicBefore;
6441 uint8_t idApicAfter;
6442
6443 /*
6444 * Synchronize with the host OS clock tick before reading the TSC.
6445 * Especially important on older Windows version where the granularity is terrible.
6446 */
6447 u64NanoTsBefore = RTTimeSystemNanoTS();
6448 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6449 ASMNopPause();
6450
6451 uFlags = ASMIntDisableFlags();
6452 idApicBefore = ASMGetApicId();
6453 u64TscBefore = ASMReadTSC();
6454 u64NanoTsBefore = RTTimeSystemNanoTS();
6455 ASMSetFlags(uFlags);
6456
6457 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6458 {
6459 /*
6460 * Sleep-wait since the TSC frequency is constant, it eases host load.
6461 * Shorter interval produces more variance in the frequency (esp. Windows).
6462 */
6463 RTThreadSleep(200);
6464 u64NanoTsAfter = RTTimeSystemNanoTS();
6465 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6466 ASMNopPause();
6467 u64NanoTsAfter = RTTimeSystemNanoTS();
6468 }
6469 else
6470 {
6471 /* Busy-wait keeping the frequency up and measure. */
6472 for (;;)
6473 {
6474 u64NanoTsAfter = RTTimeSystemNanoTS();
6475 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6476 ASMNopPause();
6477 else
6478 break;
6479 }
6480 }
6481
6482 uFlags = ASMIntDisableFlags();
6483 idApicAfter = ASMGetApicId();
6484 u64TscAfter = ASMReadTSC();
6485 ASMSetFlags(uFlags);
6486
6487 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6488 {
6489 int rc;
6490 bool fAppliedBefore;
6491 bool fAppliedAfter;
6492 rc = supdrvTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6493 rc = supdrvTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6494
6495 if ( !fAppliedBefore
6496 || !fAppliedAfter)
6497 {
6498#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6499 /*
6500 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6501 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6502 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6503 * proceed. This should be triggered just once if we're rather unlucky.
6504 */
6505 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6506 if (rc == VERR_TIMEOUT)
6507 {
6508 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6509 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6510 }
6511#else
6512 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6513 idApicBefore, idApicAfter, cTriesLeft);
6514#endif
6515 continue;
6516 }
6517 }
6518
6519 /*
6520 * Update GIP.
6521 */
6522 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6523 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6524 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6525 return VINF_SUCCESS;
6526 }
6527
6528 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6529}
6530
6531
6532/**
6533 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6534 *
6535 * @param pTimer The timer.
6536 * @param pvUser Opaque pointer to the device instance data.
6537 * @param iTick The timer tick.
6538 */
6539static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6540{
6541 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6542 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6543 bool fDeltaApplied = false;
6544 uint8_t idApic;
6545 uint64_t u64DeltaNanoTS;
6546 uint64_t u64DeltaTsc;
6547 uint64_t u64NanoTS;
6548 uint64_t u64Tsc;
6549 RTCCUINTREG uFlags;
6550
6551 /* Paranoia. */
6552 Assert(pGip);
6553 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6554
6555#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
6556 u64NanoTS = RTTimeSystemNanoTS();
6557 while (RTTimeSystemNanoTS() == u64NanoTS)
6558 ASMNopPause();
6559#endif
6560 uFlags = ASMIntDisableFlags();
6561 idApic = ASMGetApicId();
6562 u64Tsc = ASMReadTSC();
6563 u64NanoTS = RTTimeSystemNanoTS();
6564 ASMSetFlags(uFlags);
6565 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6566 supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6567 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
6568 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
6569
6570 if (RT_UNLIKELY( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO
6571 && !fDeltaApplied))
6572 {
6573 Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6574 GIP_TSC_REFINE_INTERVAL));
6575 return;
6576 }
6577
6578 /* Calculate the TSC frequency. */
6579 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6580 && u64DeltaNanoTS < UINT32_MAX)
6581 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
6582 else
6583 {
6584 RTUINT128U CpuHz, Tmp, Divisor;
6585 CpuHz.s.Lo = CpuHz.s.Hi = 0;
6586 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
6587 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
6588 pGip->u64CpuHz = CpuHz.s.Lo;
6589 }
6590
6591 /* Update rest of GIP. */
6592 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6593 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6594}
6595
6596
6597/**
6598 * Starts the TSC-frequency refinement phase asynchronously.
6599 *
6600 * @param pDevExt Pointer to the device instance data.
6601 */
6602static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6603{
6604 uint64_t u64NanoTS;
6605 RTCCUINTREG uFlags;
6606 uint8_t idApic;
6607 int rc;
6608 PSUPGLOBALINFOPAGE pGip;
6609
6610 /* Validate. */
6611 Assert(pDevExt);
6612 Assert(pDevExt->pGip);
6613 pGip = pDevExt->pGip;
6614
6615#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6616 /*
6617 * If the TSC-delta thread is created, wait until it's done calculating
6618 * the TSC-deltas on the relevant online CPUs before we start the TSC refinement.
6619 */
6620 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
6621 && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE)
6622 {
6623 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6624 if (rc == VERR_TIMEOUT)
6625 {
6626 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6627 return;
6628 }
6629 }
6630#endif
6631
6632 /*
6633 * Record the TSC and NanoTS as the starting anchor point for refinement of the
6634 * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the
6635 * reading of the TSC and the NanoTS as close as possible.
6636 */
6637 u64NanoTS = RTTimeSystemNanoTS();
6638 while (RTTimeSystemNanoTS() == u64NanoTS)
6639 ASMNopPause();
6640 uFlags = ASMIntDisableFlags();
6641 idApic = ASMGetApicId();
6642 pDevExt->u64TscAnchor = ASMReadTSC();
6643 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
6644 ASMSetFlags(uFlags);
6645 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6646 supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */);
6647
6648 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
6649 if (RT_SUCCESS(rc))
6650 {
6651 /*
6652 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6653 * interval as small as possible while gaining the most consistent and accurate frequency
6654 * (compared to what the host OS might have measured).
6655 *
6656 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6657 * same TSC frequency whenever possible so we need to keep the interval short.
6658 */
6659 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6660 AssertRC(rc);
6661 }
6662 else
6663 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6664}
6665
6666
6667/**
6668 * Creates the GIP.
6669 *
6670 * @returns VBox status code.
6671 * @param pDevExt Instance data. GIP stuff may be updated.
6672 */
6673static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6674{
6675 PSUPGLOBALINFOPAGE pGip;
6676 RTHCPHYS HCPhysGip;
6677 uint32_t u32SystemResolution;
6678 uint32_t u32Interval;
6679 uint32_t u32MinInterval;
6680 uint32_t uMod;
6681 unsigned cCpus;
6682 int rc;
6683
6684 LogFlow(("supdrvGipCreate:\n"));
6685
6686 /* Assert order. */
6687 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6688 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6689 Assert(!pDevExt->pGipTimer);
6690
6691 /*
6692 * Check the CPU count.
6693 */
6694 cCpus = RTMpGetArraySize();
6695 if ( cCpus > RTCPUSET_MAX_CPUS
6696 || cCpus > 256 /* ApicId is used for the mappings */)
6697 {
6698 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6699 return VERR_TOO_MANY_CPUS;
6700 }
6701
6702 /*
6703 * Allocate a contiguous set of pages with a default kernel mapping.
6704 */
6705 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6706 if (RT_FAILURE(rc))
6707 {
6708 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6709 return rc;
6710 }
6711 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6712 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6713
6714 /*
6715 * Allocate the TSC-delta sync struct on a separate cache line.
6716 */
6717 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
6718 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
6719 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
6720
6721 /*
6722 * Find a reasonable update interval and initialize the structure.
6723 */
6724 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
6725 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6726 * See @bugref{6710}. */
6727 u32MinInterval = RT_NS_10MS;
6728 u32SystemResolution = RTTimerGetSystemGranularity();
6729 u32Interval = u32MinInterval;
6730 uMod = u32MinInterval % u32SystemResolution;
6731 if (uMod)
6732 u32Interval += u32SystemResolution - uMod;
6733
6734 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6735
6736 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
6737 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6738 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6739 {
6740 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
6741 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6742 return VERR_INTERNAL_ERROR_2;
6743 }
6744
6745 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
6746 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
6747#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6748 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
6749 {
6750 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6751 rc = supdrvTscDeltaThreadInit(pDevExt);
6752 }
6753#endif
6754 if (RT_SUCCESS(rc))
6755 {
6756 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6757 if (RT_SUCCESS(rc))
6758 {
6759 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6760 if (RT_SUCCESS(rc))
6761 {
6762#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6763 uint16_t iCpu;
6764 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
6765 {
6766 /*
6767 * Measure the TSC deltas now that we have MP notifications.
6768 */
6769 int cTries = 5;
6770 do
6771 {
6772 rc = supdrvMeasureInitialTscDeltas(pDevExt);
6773 if ( rc != VERR_TRY_AGAIN
6774 && rc != VERR_CPU_OFFLINE)
6775 break;
6776 } while (--cTries > 0);
6777 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6778 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6779 }
6780 else
6781 {
6782 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6783 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
6784 }
6785#endif
6786 if (RT_SUCCESS(rc))
6787 {
6788 rc = supdrvGipMeasureTscFreq(pDevExt);
6789 if (RT_SUCCESS(rc))
6790 {
6791 /*
6792 * Create the timer.
6793 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6794 */
6795 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6796 {
6797 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6798 pDevExt);
6799 if (rc == VERR_NOT_SUPPORTED)
6800 {
6801 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6802 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6803 }
6804 }
6805 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6806 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
6807 supdrvGipSyncAndInvariantTimer, pDevExt);
6808 if (RT_SUCCESS(rc))
6809 {
6810 /*
6811 * We're good.
6812 */
6813 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6814 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6815
6816 g_pSUPGlobalInfoPage = pGip;
6817 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6818 supdrvRefineTscFreq(pDevExt);
6819 return VINF_SUCCESS;
6820 }
6821
6822 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6823 Assert(!pDevExt->pGipTimer);
6824 }
6825 else
6826 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6827 }
6828 else
6829 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
6830 }
6831 else
6832 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6833 }
6834 else
6835 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6836 }
6837 else
6838 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6839
6840 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
6841 return rc;
6842}
6843
6844
6845/**
6846 * Terminates the GIP.
6847 *
6848 * @param pDevExt Instance data. GIP stuff may be updated.
6849 */
6850static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6851{
6852 int rc;
6853#ifdef DEBUG_DARWIN_GIP
6854 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6855 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6856 pDevExt->pGipTimer, pDevExt->GipMemObj));
6857#endif
6858
6859 /*
6860 * Stop receiving MP notifications before tearing anything else down.
6861 */
6862 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6863
6864#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6865 /*
6866 * Terminate the TSC-delta measurement thread and resources.
6867 */
6868 supdrvTscDeltaTerm(pDevExt);
6869#endif
6870
6871 /*
6872 * Destroy the TSC-refinement one-shot timer.
6873 */
6874 if (pDevExt->pTscRefineTimer)
6875 {
6876 RTTimerDestroy(pDevExt->pTscRefineTimer);
6877 pDevExt->pTscRefineTimer = NULL;
6878 }
6879
6880 if (pDevExt->pvTscDeltaSync)
6881 {
6882 RTMemFree(pDevExt->pvTscDeltaSync);
6883 pDevExt->pTscDeltaSync = NULL;
6884 pDevExt->pvTscDeltaSync = NULL;
6885 }
6886
6887 /*
6888 * Invalid the GIP data.
6889 */
6890 if (pDevExt->pGip)
6891 {
6892 supdrvGipTerm(pDevExt->pGip);
6893 pDevExt->pGip = NULL;
6894 }
6895 g_pSUPGlobalInfoPage = NULL;
6896
6897 /*
6898 * Destroy the timer and free the GIP memory object.
6899 */
6900 if (pDevExt->pGipTimer)
6901 {
6902 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6903 pDevExt->pGipTimer = NULL;
6904 }
6905
6906 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6907 {
6908 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6909 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6910 }
6911
6912 /*
6913 * Finally, make sure we've release the system timer resolution request
6914 * if one actually succeeded and is still pending.
6915 */
6916 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
6917}
6918
6919
6920/**
6921 * Timer callback function for the sync and invariant GIP modes.
6922 *
6923 * @param pTimer The timer.
6924 * @param pvUser Opaque pointer to the device extension.
6925 * @param iTick The timer tick.
6926 */
6927static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6928{
6929 RTCCUINTREG uFlags;
6930 uint64_t u64TSC;
6931 uint64_t u64NanoTS;
6932 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6933 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6934
6935 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6936 u64TSC = ASMReadTSC();
6937 u64NanoTS = RTTimeSystemNanoTS();
6938
6939 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
6940 {
6941 /*
6942 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6943 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6944 * affected a bit until we get proper TSC deltas than implementing options like
6945 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6946 *
6947 * The likely hood of this happening is really low. On Windows, Linux, and Solaris
6948 * timers fire on the CPU they were registered/started on. Darwin timers doesn't
6949 * necessarily (they are high priority threads waiting).
6950 */
6951 Assert(!ASMIntAreEnabled());
6952 supdrvTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6953 }
6954
6955 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6956
6957 ASMSetFlags(uFlags);
6958
6959#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6960 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
6961 && !RTCpuSetIsEmpty(&pDevExt->TscDeltaCpuSet))
6962 {
6963 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6964 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
6965 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
6966 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
6967 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6968 /** @todo Do the actual poking using -- RTThreadUserSignal() */
6969 }
6970#endif
6971}
6972
6973
6974/**
6975 * Timer callback function for async GIP mode.
6976 * @param pTimer The timer.
6977 * @param pvUser Opaque pointer to the device extension.
6978 * @param iTick The timer tick.
6979 */
6980static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6981{
6982 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6983 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6984 RTCPUID idCpu = RTMpCpuId();
6985 uint64_t u64TSC = ASMReadTSC();
6986 uint64_t NanoTS = RTTimeSystemNanoTS();
6987
6988 /** @todo reset the transaction number and whatnot when iTick == 1. */
6989 if (pDevExt->idGipMaster == idCpu)
6990 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6991 else
6992 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6993
6994 ASMSetFlags(fOldFlags);
6995}
6996
6997
6998/**
6999 * Finds our (@a idCpu) entry, or allocates a new one if not found.
7000 *
7001 * @returns Index of the CPU in the cache set.
7002 * @param pGip The GIP.
7003 * @param idCpu The CPU ID.
7004 */
7005static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
7006{
7007 uint32_t i, cTries;
7008
7009 /*
7010 * ASSUMES that CPU IDs are constant.
7011 */
7012 for (i = 0; i < pGip->cCpus; i++)
7013 if (pGip->aCPUs[i].idCpu == idCpu)
7014 return i;
7015
7016 cTries = 0;
7017 do
7018 {
7019 for (i = 0; i < pGip->cCpus; i++)
7020 {
7021 bool fRc;
7022 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
7023 if (fRc)
7024 return i;
7025 }
7026 } while (cTries++ < 32);
7027 AssertReleaseFailed();
7028 return i - 1;
7029}
7030
7031
7032/**
7033 * Finds the GIP CPU index corresponding to @a idCpu.
7034 *
7035 * @returns GIP CPU array index, UINT32_MAX if not found.
7036 * @param pGip The GIP.
7037 * @param idCpu The CPU ID.
7038 */
7039static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
7040{
7041 uint32_t i;
7042 for (i = 0; i < pGip->cCpus; i++)
7043 if (pGip->aCPUs[i].idCpu == idCpu)
7044 return i;
7045 return UINT32_MAX;
7046}
7047
7048
7049/**
7050 * The calling CPU should be accounted as online, update GIP accordingly.
7051 *
7052 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
7053 *
7054 * @param pDevExt The device extension.
7055 * @param idCpu The CPU ID.
7056 */
7057static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
7058{
7059 int iCpuSet = 0;
7060 uint16_t idApic = UINT16_MAX;
7061 uint32_t i = 0;
7062 uint64_t u64NanoTS = 0;
7063 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7064
7065 AssertPtrReturnVoid(pGip);
7066 AssertRelease(idCpu == RTMpCpuId());
7067 Assert(pGip->cPossibleCpus == RTMpGetCount());
7068
7069 /*
7070 * Do this behind a spinlock with interrupts disabled as this can fire
7071 * on all CPUs simultaneously, see @bugref{6110}.
7072 */
7073 RTSpinlockAcquire(pDevExt->hGipSpinlock);
7074
7075 /*
7076 * Update the globals.
7077 */
7078 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
7079 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
7080 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
7081 if (iCpuSet >= 0)
7082 {
7083 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
7084 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
7085 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
7086 }
7087
7088 /*
7089 * Update the entry.
7090 */
7091 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
7092 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
7093 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7094 idApic = ASMGetApicId();
7095 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
7096 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
7097 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
7098
7099 /*
7100 * Update the APIC ID and CPU set index mappings.
7101 */
7102 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
7103 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
7104
7105 /* Update the Mp online/offline counter. */
7106 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
7107
7108 /* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */
7109 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
7110 {
7111 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, iCpuSet);
7112#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7113 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7114 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
7115 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
7116 {
7117 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
7118 }
7119 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7120#endif
7121 }
7122
7123 /* commit it */
7124 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
7125
7126 RTSpinlockRelease(pDevExt->hGipSpinlock);
7127}
7128
7129
7130/**
7131 * The CPU should be accounted as offline, update the GIP accordingly.
7132 *
7133 * This is used by supdrvGipMpEvent.
7134 *
7135 * @param pDevExt The device extension.
7136 * @param idCpu The CPU ID.
7137 */
7138static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
7139{
7140 int iCpuSet;
7141 unsigned i;
7142
7143 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7144
7145 AssertPtrReturnVoid(pGip);
7146 RTSpinlockAcquire(pDevExt->hGipSpinlock);
7147
7148 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
7149 AssertReturnVoid(iCpuSet >= 0);
7150
7151 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
7152 AssertReturnVoid(i < pGip->cCpus);
7153 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
7154
7155 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
7156 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
7157
7158 /* Update the Mp online/offline counter. */
7159 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
7160
7161 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
7162 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
7163 {
7164 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
7165 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
7166 }
7167
7168 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
7169 {
7170 /* Reset the TSC delta, we will recalculate it lazily. */
7171 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
7172 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
7173 RTCpuSetDel(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
7174 }
7175
7176 /* commit it */
7177 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
7178
7179 RTSpinlockRelease(pDevExt->hGipSpinlock);
7180}
7181
7182
7183/**
7184 * Multiprocessor event notification callback.
7185 *
7186 * This is used to make sure that the GIP master gets passed on to
7187 * another CPU. It also updates the associated CPU data.
7188 *
7189 * @param enmEvent The event.
7190 * @param idCpu The cpu it applies to.
7191 * @param pvUser Pointer to the device extension.
7192 *
7193 * @remarks This function -must- fire on the newly online'd CPU for the
7194 * RTMPEVENT_ONLINE case and can fire on any CPU for the
7195 * RTMPEVENT_OFFLINE case.
7196 */
7197static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
7198{
7199 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
7200 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7201
7202 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7203
7204 /*
7205 * Update the GIP CPU data.
7206 */
7207 if (pGip)
7208 {
7209 switch (enmEvent)
7210 {
7211 case RTMPEVENT_ONLINE:
7212 AssertRelease(idCpu == RTMpCpuId());
7213 supdrvGipMpEventOnline(pDevExt, idCpu);
7214 break;
7215 case RTMPEVENT_OFFLINE:
7216 supdrvGipMpEventOffline(pDevExt, idCpu);
7217 break;
7218 }
7219 }
7220
7221 /*
7222 * Make sure there is a master GIP.
7223 */
7224 if (enmEvent == RTMPEVENT_OFFLINE)
7225 {
7226 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
7227 if (idGipMaster == idCpu)
7228 {
7229 /*
7230 * The GIP master is going offline, find a new one.
7231 */
7232 bool fIgnored;
7233 unsigned i;
7234 RTCPUID idNewGipMaster = NIL_RTCPUID;
7235 RTCPUSET OnlineCpus;
7236 RTMpGetOnlineSet(&OnlineCpus);
7237
7238 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
7239 {
7240 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
7241 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
7242 && idCurCpu != idGipMaster)
7243 {
7244 idNewGipMaster = idCurCpu;
7245 break;
7246 }
7247 }
7248
7249 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
7250 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
7251 NOREF(fIgnored);
7252 }
7253 }
7254}
7255
7256
7257/**
7258 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
7259 * and compute the delta between them.
7260 *
7261 * @param idCpu The CPU we are current scheduled on.
7262 * @param pvUser1 Opaque pointer to the device instance data.
7263 * @param pvUser2 Pointer to the SUPGIPCPU entry of the worker CPU.
7264 *
7265 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
7266 * read the TSC at exactly the same time on both the master and the worker
7267 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
7268 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
7269 * try to minimize the measurement error by computing the minimum read time
7270 * of the compare statement in the worker by taking TSC measurements across
7271 * it.
7272 *
7273 * We ignore the first few runs of the loop in order to prime the cache.
7274 * Also, be careful about using 'pause' instruction in critical busy-wait
7275 * loops in this code - it can cause undesired behaviour with
7276 * hyperthreading.
7277 *
7278 * It must be noted that the computed minimum read time is mostly to
7279 * eliminate huge deltas when the worker is too early and doesn't by itself
7280 * help produce more accurate deltas. We allow two times the computed
7281 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
7282 * possible to get negative deltas where there are none when the worker is
7283 * earlier. As long as these occasional negative deltas are lower than the
7284 * time it takes to exit guest-context and the OS to reschedule EMT on a
7285 * different CPU we won't expose a TSC that jumped backwards. It is because
7286 * of the existence of the negative deltas we don't recompute the delta with
7287 * the master and worker interchanged to eliminate the remaining measurement
7288 * error.
7289 */
7290static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7291{
7292 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
7293 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7294 PSUPGIPCPU pGipCpuWorker = (PSUPGIPCPU)pvUser2;
7295 uint32_t idWorker = pGipCpuWorker->idCpu;
7296 RTCPUID idMaster = ASMAtomicUoReadU32(&pDevExt->idTscDeltaInitiator);
7297 unsigned idxMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
7298 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
7299 int cTriesLeft;
7300
7301 if ( idCpu != idMaster
7302 && idCpu != idWorker)
7303 return;
7304
7305 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
7306 with a timeout to avoid deadlocking the entire system. */
7307 if (!RTMpOnAllIsConcurrentSafe())
7308 {
7309 /** @todo This was introduced for Windows, but since Windows doesn't use this
7310 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
7311 * see @bugref{6710} comment 81), eventually phase it out. */
7312 uint64_t uTscNow;
7313 uint64_t uTscStart;
7314 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
7315
7316 ASMSerializeInstruction();
7317 uTscStart = ASMReadTSC();
7318 if (idCpu == idMaster)
7319 {
7320 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
7321 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
7322 {
7323 ASMSerializeInstruction();
7324 uTscNow = ASMReadTSC();
7325 if (uTscNow - uTscStart > cWaitTicks)
7326 {
7327 /* Set the worker delta to indicate failure, not the master. */
7328 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7329 return;
7330 }
7331
7332 ASMNopPause();
7333 }
7334 }
7335 else
7336 {
7337 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
7338 {
7339 ASMSerializeInstruction();
7340 uTscNow = ASMReadTSC();
7341 if (uTscNow - uTscStart > cWaitTicks)
7342 {
7343 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7344 return;
7345 }
7346
7347 ASMNopPause();
7348 }
7349 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
7350 }
7351 }
7352
7353 /*
7354 * ...
7355 */
7356 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
7357 cTriesLeft = 12;
7358 while (cTriesLeft-- > 0)
7359 {
7360 unsigned i;
7361 uint64_t uMinCmpReadTime = UINT64_MAX;
7362 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
7363 {
7364 if (idCpu == idMaster)
7365 {
7366 /*
7367 * The master.
7368 */
7369 RTCCUINTREG uFlags;
7370 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7371 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
7372
7373 /* Disable interrupts only in the master for as short a period
7374 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
7375 uFlags = ASMIntDisableFlags();
7376
7377 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
7378 { /* nothing */ }
7379
7380 do
7381 {
7382 ASMSerializeInstruction();
7383 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
7384 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7385
7386 ASMSetFlags(uFlags);
7387
7388 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
7389 { /* nothing */ }
7390
7391 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7392 {
7393 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
7394 {
7395 int64_t iDelta = pGipCpuWorker->u64TSCSample
7396 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
7397 /** @todo r=bird: Why isn't this code using absolute values? Guess it mostly works fine because
7398 * the detection code is biased thowards positive deltas (see tstSupTscDelta.cpp output) ... */
7399 if (iDelta < pGipCpuWorker->i64TSCDelta)
7400 pGipCpuWorker->i64TSCDelta = iDelta;
7401 }
7402 }
7403
7404 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
7405 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7406 }
7407 else
7408 {
7409 /*
7410 * The worker.
7411 */
7412 uint64_t uTscWorker;
7413 uint64_t uTscWorkerFlushed;
7414 uint64_t uCmpReadTime;
7415
7416 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
7417 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
7418 { /* nothing */ }
7419 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7420 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
7421
7422 /*
7423 * Keep reading the TSC until we notice that the master has read his. Reading
7424 * the TSC -after- the master has updated the memory is way too late. We thus
7425 * compensate by trying to measure how long it took for the worker to notice
7426 * the memory flushed from the master.
7427 */
7428 do
7429 {
7430 ASMSerializeInstruction();
7431 uTscWorker = ASMReadTSC();
7432 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7433 ASMSerializeInstruction();
7434 uTscWorkerFlushed = ASMReadTSC();
7435
7436 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7437 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7438 {
7439 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7440 if (uCmpReadTime < (uMinCmpReadTime << 1))
7441 {
7442 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7443 if (uCmpReadTime < uMinCmpReadTime)
7444 uMinCmpReadTime = uCmpReadTime;
7445 }
7446 else
7447 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7448 }
7449 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7450 {
7451 if (uCmpReadTime < uMinCmpReadTime)
7452 uMinCmpReadTime = uCmpReadTime;
7453 }
7454
7455 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7456 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7457 ASMNopPause();
7458 }
7459 }
7460
7461 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7462 {
7463 if (idCpu == idMaster)
7464 {
7465 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuMaster->iCpuSet);
7466 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuMaster->iCpuSet);
7467 }
7468 else
7469 {
7470 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
7471 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
7472 }
7473 break;
7474 }
7475 }
7476}
7477
7478
7479/**
7480 * Clears TSC delta related variables.
7481 *
7482 * Clears all TSC samples as well as the delta synchronization variable on the
7483 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
7484 *
7485 * @param pDevExt Pointer to the device instance data.
7486 * @param fClearDeltas Whether the deltas are also to be cleared.
7487 */
7488DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
7489{
7490 unsigned iCpu;
7491 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7492 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7493 {
7494 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7495 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7496 if (fClearDeltas)
7497 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7498 }
7499 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7500}
7501
7502
7503/**
7504 * Measures the TSC delta between the master GIP CPU and one specified worker
7505 * CPU.
7506 *
7507 * @returns VBox status code.
7508 * @param pDevExt Pointer to the device instance data.
7509 * @param idxWorker The index of the worker CPU from the GIP's array of
7510 * CPUs.
7511 *
7512 * @remarks This can be called with preemption disabled!
7513 */
7514static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7515{
7516 int rc;
7517 PSUPGLOBALINFOPAGE pGip;
7518 PSUPGIPCPU pGipCpuWorker;
7519 RTCPUID idMaster;
7520
7521 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7522 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7523
7524 pGip = pDevExt->pGip;
7525 idMaster = pDevExt->idGipMaster;
7526 pGipCpuWorker = &pGip->aCPUs[idxWorker];
7527
7528 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
7529
7530 /*
7531 * Don't attempt measuring the delta for the GIP master.
7532 */
7533 if (pGipCpuWorker->idCpu == idMaster)
7534 {
7535 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
7536 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
7537 return VINF_SUCCESS;
7538 }
7539
7540 /* Set the master TSC as the initiator. */
7541 while (!ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID))
7542 {
7543 /*
7544 * Sleep here rather than spin as there is a parallel measurement
7545 * being executed and that can take a good while to be done.
7546 */
7547 RTThreadSleep(1);
7548 }
7549
7550 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7551 {
7552 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
7553 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7554 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7555 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pDevExt, pGipCpuWorker);
7556 if (RT_SUCCESS(rc))
7557 {
7558 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
7559 {
7560 /*
7561 * Work the TSC delta applicability rating. It starts
7562 * optimistic in supdrvGipInit, we downgrade it here.
7563 */
7564 SUPGIPUSETSCDELTA enmRating;
7565 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
7566 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
7567 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
7568 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
7569 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
7570 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
7571 else
7572 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
7573 if (pGip->enmUseTscDelta < enmRating)
7574 {
7575 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
7576 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
7577 }
7578 }
7579 else
7580 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7581 }
7582 }
7583 else
7584 rc = VERR_CPU_OFFLINE;
7585
7586 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
7587 return rc;
7588}
7589
7590
7591/**
7592 * Performs the initial measurements of the TSC deltas between CPUs.
7593 *
7594 * This is called by supdrvGipCreate or triggered by it if threaded.
7595 *
7596 * @returns VBox status code.
7597 * @param pDevExt Pointer to the device instance data.
7598 *
7599 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7600 * idCpu, GIP's online CPU set which are populated in
7601 * supdrvGipInitOnCpu().
7602 */
7603static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
7604{
7605 PSUPGIPCPU pGipCpuMaster;
7606 unsigned iCpu;
7607 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7608 uint32_t idxMaster = UINT32_MAX;
7609 int rc = VINF_SUCCESS;
7610 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
7611
7612 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
7613
7614 /*
7615 * Pick the first CPU online as the master TSC and make it the new GIP master based
7616 * on the APIC ID.
7617 *
7618 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7619 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7620 * master as this point since the sync/async timer isn't created yet.
7621 */
7622 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
7623 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7624 {
7625 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7626 if (idxCpu != UINT16_MAX)
7627 {
7628 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7629 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7630 {
7631 idxMaster = idxCpu;
7632 pGipCpu->i64TSCDelta = 0;
7633 break;
7634 }
7635 }
7636 }
7637 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7638 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7639 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7640
7641 /*
7642 * If there is only a single CPU online we have nothing to do.
7643 */
7644 if (pGip->cOnlineCpus <= 1)
7645 {
7646 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7647 return VINF_SUCCESS;
7648 }
7649
7650 /*
7651 * Loop thru the GIP CPU array and get deltas for each CPU (except the master).
7652 */
7653 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7654 {
7655 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7656 if ( iCpu != idxMaster
7657 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
7658 {
7659 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7660 if (RT_FAILURE(rc))
7661 {
7662 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7663 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7664 break;
7665 }
7666
7667 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
7668 {
7669 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
7670 rc = VERR_TRY_AGAIN;
7671 break;
7672 }
7673 }
7674 }
7675
7676 return rc;
7677}
7678
7679
7680/**
7681 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7682 *
7683 * @param idCpu Ignored.
7684 * @param pvUser1 Where to put the TSC.
7685 * @param pvUser2 Ignored.
7686 */
7687static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7688{
7689 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7690}
7691
7692
7693/**
7694 * Determine if Async GIP mode is required because of TSC drift.
7695 *
7696 * When using the default/normal timer code it is essential that the time stamp counter
7697 * (TSC) runs never backwards, that is, a read operation to the counter should return
7698 * a bigger value than any previous read operation. This is guaranteed by the latest
7699 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7700 * case we have to choose the asynchronous timer mode.
7701 *
7702 * @param poffMin Pointer to the determined difference between different
7703 * cores (optional, can be NULL).
7704 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7705 */
7706static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7707{
7708 /*
7709 * Just iterate all the cpus 8 times and make sure that the TSC is
7710 * ever increasing. We don't bother taking TSC rollover into account.
7711 */
7712 int iEndCpu = RTMpGetArraySize();
7713 int iCpu;
7714 int cLoops = 8;
7715 bool fAsync = false;
7716 int rc = VINF_SUCCESS;
7717 uint64_t offMax = 0;
7718 uint64_t offMin = ~(uint64_t)0;
7719 uint64_t PrevTsc = ASMReadTSC();
7720
7721 while (cLoops-- > 0)
7722 {
7723 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7724 {
7725 uint64_t CurTsc;
7726 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7727 if (RT_SUCCESS(rc))
7728 {
7729 if (CurTsc <= PrevTsc)
7730 {
7731 fAsync = true;
7732 offMin = offMax = PrevTsc - CurTsc;
7733 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7734 iCpu, cLoops, CurTsc, PrevTsc));
7735 break;
7736 }
7737
7738 /* Gather statistics (except the first time). */
7739 if (iCpu != 0 || cLoops != 7)
7740 {
7741 uint64_t off = CurTsc - PrevTsc;
7742 if (off < offMin)
7743 offMin = off;
7744 if (off > offMax)
7745 offMax = off;
7746 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7747 }
7748
7749 /* Next */
7750 PrevTsc = CurTsc;
7751 }
7752 else if (rc == VERR_NOT_SUPPORTED)
7753 break;
7754 else
7755 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7756 }
7757
7758 /* broke out of the loop. */
7759 if (iCpu < iEndCpu)
7760 break;
7761 }
7762
7763 if (poffMin)
7764 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7765 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7766 fAsync, iEndCpu, rc, offMin, offMax));
7767#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7768 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7769#endif
7770 return fAsync;
7771}
7772
7773
7774/**
7775 * supdrvGipInit() worker that determines the GIP TSC mode.
7776 *
7777 * @returns The most suitable TSC mode.
7778 * @param pDevExt Pointer to the device instance data.
7779 */
7780static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7781{
7782 uint64_t u64DiffCoresIgnored;
7783 uint32_t uEAX, uEBX, uECX, uEDX;
7784
7785 /*
7786 * Establish whether the CPU advertises TSC as invariant, we need that in
7787 * a couple of places below.
7788 */
7789 bool fInvariantTsc = false;
7790 if (ASMHasCpuId())
7791 {
7792 uEAX = ASMCpuId_EAX(0x80000000);
7793 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
7794 {
7795 uEDX = ASMCpuId_EDX(0x80000007);
7796 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
7797 fInvariantTsc = true;
7798 }
7799 }
7800
7801 /*
7802 * On single CPU systems, we don't need to consider ASYNC mode.
7803 */
7804 if (RTMpGetCount() <= 1)
7805 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
7806
7807 /*
7808 * Allow the user and/or OS specific bits to force async mode.
7809 */
7810 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7811 return SUPGIPMODE_ASYNC_TSC;
7812
7813
7814#if 0 /** @todo enable this when i64TscDelta is applied in all places where it's needed */
7815 /*
7816 * Use invariant mode if the CPU says TSC is invariant.
7817 */
7818 if (fInvariantTsc)
7819 return SUPGIPMODE_INVARIANT_TSC;
7820#endif
7821
7822 /*
7823 * TSC is not invariant and we're on SMP, this presents two problems:
7824 *
7825 * (1) There might be a skew between the CPU, so that cpu0
7826 * returns a TSC that is slightly different from cpu1.
7827 * This screw may be due to (2), bad TSC initialization
7828 * or slightly different TSC rates.
7829 *
7830 * (2) Power management (and other things) may cause the TSC
7831 * to run at a non-constant speed, and cause the speed
7832 * to be different on the cpus. This will result in (1).
7833 *
7834 * If any of the above is detected, we will have to use ASYNC mode.
7835 */
7836
7837 /* (1). Try check for current differences between the cpus. */
7838 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7839 return SUPGIPMODE_ASYNC_TSC;
7840
7841#if 1 /** @todo remove once i64TscDelta is applied everywhere. Enable #if 0 above. */
7842 if (fInvariantTsc)
7843 return SUPGIPMODE_INVARIANT_TSC;
7844#endif
7845
7846 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
7847 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7848 if ( ASMIsValidStdRange(uEAX)
7849 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7850 {
7851 /* Check for APM support. */
7852 uEAX = ASMCpuId_EAX(0x80000000);
7853 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
7854 {
7855 uEDX = ASMCpuId_EDX(0x80000007);
7856 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7857 return SUPGIPMODE_ASYNC_TSC;
7858 }
7859 }
7860
7861 return SUPGIPMODE_SYNC_TSC;
7862}
7863
7864
7865/**
7866 * Initializes per-CPU GIP information.
7867 *
7868 * @param pDevExt Pointer to the device instance data.
7869 * @param pGip Pointer to the GIP.
7870 * @param pCpu Pointer to which GIP CPU to initalize.
7871 * @param u64NanoTS The current nanosecond timestamp.
7872 */
7873static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7874{
7875 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
7876 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
7877 pCpu->u32TransactionId = 2;
7878 pCpu->u64NanoTS = u64NanoTS;
7879 pCpu->u64TSC = ASMReadTSC();
7880 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7881 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
7882
7883 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7884 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7885 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7886 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7887
7888 /*
7889 * We don't know the following values until we've executed updates.
7890 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7891 * the 2nd timer callout.
7892 */
7893 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7894 pCpu->u32UpdateIntervalTSC
7895 = pCpu->au32TSCHistory[0]
7896 = pCpu->au32TSCHistory[1]
7897 = pCpu->au32TSCHistory[2]
7898 = pCpu->au32TSCHistory[3]
7899 = pCpu->au32TSCHistory[4]
7900 = pCpu->au32TSCHistory[5]
7901 = pCpu->au32TSCHistory[6]
7902 = pCpu->au32TSCHistory[7]
7903 = (uint32_t)(_4G / pGip->u32UpdateHz);
7904}
7905
7906
7907/**
7908 * Initializes the GIP data.
7909 *
7910 * @param pDevExt Pointer to the device instance data.
7911 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7912 * @param HCPhys The physical address of the GIP.
7913 * @param u64NanoTS The current nanosecond timestamp.
7914 * @param uUpdateHz The update frequency.
7915 * @param uUpdateIntervalNS The update interval in nanoseconds.
7916 * @param cCpus The CPU count.
7917 */
7918static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7919 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7920{
7921 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7922 unsigned i;
7923#ifdef DEBUG_DARWIN_GIP
7924 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7925#else
7926 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7927#endif
7928
7929 /*
7930 * Initialize the structure.
7931 */
7932 memset(pGip, 0, cbGip);
7933
7934 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7935 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7936 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
7937 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
7938 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
7939 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
7940 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
7941 else
7942 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
7943 pGip->cCpus = (uint16_t)cCpus;
7944 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7945 pGip->u32UpdateHz = uUpdateHz;
7946 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7947 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
7948 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7949 RTCpuSetEmpty(&pGip->PresentCpuSet);
7950 RTMpGetSet(&pGip->PossibleCpuSet);
7951 pGip->cOnlineCpus = RTMpGetOnlineCount();
7952 pGip->cPresentCpus = RTMpGetPresentCount();
7953 pGip->cPossibleCpus = RTMpGetCount();
7954 pGip->idCpuMax = RTMpGetMaxCpuId();
7955 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7956 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7957 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7958 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7959 for (i = 0; i < cCpus; i++)
7960 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
7961
7962 /*
7963 * Link it to the device extension.
7964 */
7965 pDevExt->pGip = pGip;
7966 pDevExt->HCPhysGip = HCPhys;
7967 pDevExt->cGipUsers = 0;
7968}
7969
7970
7971/**
7972 * On CPU initialization callback for RTMpOnAll.
7973 *
7974 * @param idCpu The CPU ID.
7975 * @param pvUser1 The device extension.
7976 * @param pvUser2 The GIP.
7977 */
7978static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7979{
7980 /* This is good enough, even though it will update some of the globals a
7981 bit to much. */
7982 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7983}
7984
7985
7986/**
7987 * Invalidates the GIP data upon termination.
7988 *
7989 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7990 */
7991static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7992{
7993 unsigned i;
7994 pGip->u32Magic = 0;
7995 for (i = 0; i < pGip->cCpus; i++)
7996 {
7997 pGip->aCPUs[i].u64NanoTS = 0;
7998 pGip->aCPUs[i].u64TSC = 0;
7999 pGip->aCPUs[i].iTSCHistoryHead = 0;
8000 pGip->aCPUs[i].u64TSCSample = 0;
8001 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
8002 }
8003}
8004
8005
8006/**
8007 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
8008 * updates all the per cpu data except the transaction id.
8009 *
8010 * @param pDevExt The device extension.
8011 * @param pGipCpu Pointer to the per cpu data.
8012 * @param u64NanoTS The current time stamp.
8013 * @param u64TSC The current TSC.
8014 * @param iTick The current timer tick.
8015 *
8016 * @remarks Can be called with interrupts disabled!
8017 */
8018static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
8019{
8020 uint64_t u64TSCDelta;
8021 uint32_t u32UpdateIntervalTSC;
8022 uint32_t u32UpdateIntervalTSCSlack;
8023 unsigned iTSCHistoryHead;
8024 uint64_t u64CpuHz;
8025 uint32_t u32TransactionId;
8026
8027 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
8028 AssertPtrReturnVoid(pGip);
8029
8030 /* Delta between this and the previous update. */
8031 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
8032
8033 /*
8034 * Update the NanoTS.
8035 */
8036 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
8037
8038 /*
8039 * Calc TSC delta.
8040 */
8041 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
8042 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
8043
8044 /* We don't need to keep realculating the frequency when it's invariant. */
8045 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
8046 return;
8047
8048 if (u64TSCDelta >> 32)
8049 {
8050 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
8051 pGipCpu->cErrors++;
8052 }
8053
8054 /*
8055 * On the 2nd and 3rd callout, reset the history with the current TSC
8056 * interval since the values entered by supdrvGipInit are totally off.
8057 * The interval on the 1st callout completely unreliable, the 2nd is a bit
8058 * better, while the 3rd should be most reliable.
8059 */
8060 u32TransactionId = pGipCpu->u32TransactionId;
8061 if (RT_UNLIKELY( ( u32TransactionId == 5
8062 || u32TransactionId == 7)
8063 && ( iTick == 2
8064 || iTick == 3) ))
8065 {
8066 unsigned i;
8067 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
8068 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
8069 }
8070
8071 /*
8072 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
8073 * Wait until we have at least one full history since the above history reset. The
8074 * assumption is that the majority of the previous history values will be tolerable.
8075 * See @bugref{6710} comment #67.
8076 */
8077 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
8078 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
8079 {
8080 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
8081 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
8082 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
8083 {
8084 uint32_t u32;
8085 u32 = pGipCpu->au32TSCHistory[0];
8086 u32 += pGipCpu->au32TSCHistory[1];
8087 u32 += pGipCpu->au32TSCHistory[2];
8088 u32 += pGipCpu->au32TSCHistory[3];
8089 u32 >>= 2;
8090 u64TSCDelta = pGipCpu->au32TSCHistory[4];
8091 u64TSCDelta += pGipCpu->au32TSCHistory[5];
8092 u64TSCDelta += pGipCpu->au32TSCHistory[6];
8093 u64TSCDelta += pGipCpu->au32TSCHistory[7];
8094 u64TSCDelta >>= 2;
8095 u64TSCDelta += u32;
8096 u64TSCDelta >>= 1;
8097 }
8098 }
8099
8100 /*
8101 * TSC History.
8102 */
8103 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
8104 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
8105 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
8106 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
8107
8108 /*
8109 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
8110 *
8111 * On Windows, we have an occasional (but recurring) sour value that messed up
8112 * the history but taking only 1 interval reduces the precision overall.
8113 * However, this problem existed before the invariant mode was introduced.
8114 */
8115 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
8116 || pGip->u32UpdateHz >= 1000)
8117 {
8118 uint32_t u32;
8119 u32 = pGipCpu->au32TSCHistory[0];
8120 u32 += pGipCpu->au32TSCHistory[1];
8121 u32 += pGipCpu->au32TSCHistory[2];
8122 u32 += pGipCpu->au32TSCHistory[3];
8123 u32 >>= 2;
8124 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
8125 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
8126 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
8127 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
8128 u32UpdateIntervalTSC >>= 2;
8129 u32UpdateIntervalTSC += u32;
8130 u32UpdateIntervalTSC >>= 1;
8131
8132 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
8133 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
8134 }
8135 else if (pGip->u32UpdateHz >= 90)
8136 {
8137 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
8138 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
8139 u32UpdateIntervalTSC >>= 1;
8140
8141 /* value chosen on a 2GHz thinkpad running windows */
8142 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
8143 }
8144 else
8145 {
8146 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
8147
8148 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
8149 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
8150 }
8151 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
8152
8153 /*
8154 * CpuHz.
8155 */
8156 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
8157 u64CpuHz /= pGip->u32UpdateIntervalNS;
8158 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
8159}
8160
8161
8162/**
8163 * Updates the GIP.
8164 *
8165 * @param pDevExt The device extension.
8166 * @param u64NanoTS The current nanosecond timesamp.
8167 * @param u64TSC The current TSC timesamp.
8168 * @param idCpu The CPU ID.
8169 * @param iTick The current timer tick.
8170 *
8171 * @remarks Can be called with interrupts disabled!
8172 */
8173static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
8174{
8175 /*
8176 * Determine the relevant CPU data.
8177 */
8178 PSUPGIPCPU pGipCpu;
8179 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
8180 AssertPtrReturnVoid(pGip);
8181
8182 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
8183 pGipCpu = &pGip->aCPUs[0];
8184 else
8185 {
8186 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
8187 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
8188 return;
8189 pGipCpu = &pGip->aCPUs[iCpu];
8190 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
8191 return;
8192 }
8193
8194 /*
8195 * Start update transaction.
8196 */
8197 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
8198 {
8199 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
8200 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
8201 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8202 pGipCpu->cErrors++;
8203 return;
8204 }
8205
8206 /*
8207 * Recalc the update frequency every 0x800th time.
8208 */
8209 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
8210 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
8211 {
8212 if (pGip->u64NanoTSLastUpdateHz)
8213 {
8214#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
8215 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
8216 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
8217 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
8218 {
8219 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
8220 * calculation on non-invariant hosts if it changes the history decision
8221 * taken in supdrvGipDoUpdateCpu(). */
8222 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
8223 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
8224 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
8225 }
8226#endif
8227 }
8228 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
8229 }
8230
8231 /*
8232 * Update the data.
8233 */
8234 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
8235
8236 /*
8237 * Complete transaction.
8238 */
8239 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8240}
8241
8242
8243/**
8244 * Updates the per cpu GIP data for the calling cpu.
8245 *
8246 * @param pDevExt The device extension.
8247 * @param u64NanoTS The current nanosecond timesamp.
8248 * @param u64TSC The current TSC timesamp.
8249 * @param idCpu The CPU ID.
8250 * @param idApic The APIC id for the CPU index.
8251 * @param iTick The current timer tick.
8252 *
8253 * @remarks Can be called with interrupts disabled!
8254 */
8255static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
8256 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
8257{
8258 uint32_t iCpu;
8259 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
8260
8261 /*
8262 * Avoid a potential race when a CPU online notification doesn't fire on
8263 * the onlined CPU but the tick creeps in before the event notification is
8264 * run.
8265 */
8266 if (RT_UNLIKELY(iTick == 1))
8267 {
8268 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
8269 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
8270 supdrvGipMpEventOnline(pDevExt, idCpu);
8271 }
8272
8273 iCpu = pGip->aiCpuFromApicId[idApic];
8274 if (RT_LIKELY(iCpu < pGip->cCpus))
8275 {
8276 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
8277 if (pGipCpu->idCpu == idCpu)
8278 {
8279 /*
8280 * Start update transaction.
8281 */
8282 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
8283 {
8284 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
8285 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8286 pGipCpu->cErrors++;
8287 return;
8288 }
8289
8290 /*
8291 * Update the data.
8292 */
8293 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
8294
8295 /*
8296 * Complete transaction.
8297 */
8298 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
8299 }
8300 }
8301}
8302
8303
8304/**
8305 * Resume built-in keyboard on MacBook Air and Pro hosts.
8306 * If there is no built-in keyboard device, return success anyway.
8307 *
8308 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
8309 */
8310static int supdrvIOCtl_ResumeSuspendedKbds(void)
8311{
8312#if defined(RT_OS_DARWIN)
8313 return supdrvDarwinResumeSuspendedKbds();
8314#else
8315 return VERR_NOT_IMPLEMENTED;
8316#endif
8317}
8318
8319
8320/**
8321 * Service a TSC-delta measurement request.
8322 *
8323 * @returns VBox status code.
8324 * @param pDevExt Pointer to the device instance data.
8325 * @param pSession The support driver session.
8326 * @param pReq Pointer to the TSC-delta measurement request.
8327 */
8328static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
8329{
8330 PSUPGLOBALINFOPAGE pGip;
8331 RTCPUID idCpuWorker;
8332 int rc;
8333 int16_t cTries;
8334 RTMSINTERVAL cMsWaitRetry;
8335 uint16_t iCpu;
8336
8337 /*
8338 * Validate.
8339 */
8340 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
8341 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
8342 return VERR_WRONG_ORDER;
8343 pGip = pDevExt->pGip;
8344 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
8345
8346 idCpuWorker = pReq->u.In.idCpu;
8347 if (idCpuWorker == NIL_RTCPUID)
8348 return VERR_INVALID_CPU_ID;
8349 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
8350 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
8351
8352 /*
8353 * The request is a noop if the TSC delta isn't being used.
8354 */
8355 pGip = pDevExt->pGip;
8356 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
8357 return VINF_SUCCESS;
8358
8359 rc = VERR_CPU_NOT_FOUND;
8360 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
8361 {
8362 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
8363 if (pGipCpuWorker->idCpu == idCpuWorker)
8364 {
8365 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
8366 && !pReq->u.In.fForce)
8367 return VINF_SUCCESS;
8368
8369#ifdef SUPDRV_USE_TSC_DELTA_THREAD
8370 if (pReq->u.In.fAsync)
8371 {
8372 /** @todo Async. doesn't implement options like retries, waiting. We'll need
8373 * to pass those options to the thread somehow and implement it in the
8374 * thread. Check if anyone uses/needs fAsync before implementing this. */
8375 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
8376 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
8377 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
8378 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
8379 {
8380 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
8381 }
8382 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
8383 RTThreadUserSignal(pDevExt->hTscDeltaThread);
8384 return VINF_SUCCESS;
8385 }
8386
8387 /*
8388 * If a TSC-delta measurement request is already being serviced by the thread,
8389 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
8390 */
8391 while (cTries-- > 0)
8392 {
8393 SUPDRVTSCDELTATHREADSTATE enmState;
8394 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
8395 enmState = pDevExt->enmTscDeltaThreadState;
8396 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
8397
8398 if ( enmState == kTscDeltaThreadState_Measuring
8399 || enmState == kTscDeltaThreadState_WaitAndMeasure)
8400 {
8401 if ( !cTries
8402 || !cMsWaitRetry)
8403 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
8404 if (cMsWaitRetry)
8405 RTThreadSleep(cMsWaitRetry);
8406 }
8407 }
8408 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
8409#endif
8410
8411 while (cTries-- > 0)
8412 {
8413 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
8414 if (RT_SUCCESS(rc))
8415 {
8416 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
8417 break;
8418 }
8419
8420 if (cMsWaitRetry)
8421 RTThreadSleep(cMsWaitRetry);
8422 }
8423
8424 break;
8425 }
8426 }
8427 return rc;
8428}
8429
8430
8431/**
8432 * Reads TSC with delta applied.
8433 *
8434 * Will try to resolve delta value INT64_MAX before applying it. This is the
8435 * main purpose of this function, to handle the case where the delta needs to be
8436 * determined.
8437 *
8438 * @returns VBox status code.
8439 * @param pDevExt Pointer to the device instance data.
8440 * @param pSession The support driver session.
8441 * @param pReq Pointer to the TSC-read request.
8442 */
8443static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
8444{
8445 PSUPGLOBALINFOPAGE pGip;
8446 int rc;
8447
8448 /*
8449 * Validate. We require the client to have mapped GIP (no asserting on
8450 * ring-3 preconditions).
8451 */
8452 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
8453 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
8454 return VERR_WRONG_ORDER;
8455 pGip = pDevExt->pGip;
8456 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
8457
8458 /*
8459 * We're usually here because we need to apply delta, but we shouldn't be
8460 * upset if the GIP is some different mode.
8461 */
8462 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
8463 {
8464 uint32_t cTries = 0;
8465 for (;;)
8466 {
8467 /*
8468 * Start by gathering the data, using CLI for disabling preemption
8469 * while we do that.
8470 */
8471 RTCCUINTREG uFlags = ASMIntDisableFlags();
8472 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
8473 int iGipCpu;
8474 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
8475 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
8476 {
8477 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
8478 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
8479 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8480 ASMSetFlags(uFlags);
8481
8482 /*
8483 * If we're lucky we've got a delta, but no predicitions here
8484 * as this I/O control is normally only used when the TSC delta
8485 * is set to INT64_MAX.
8486 */
8487 if (i64Delta != INT64_MAX)
8488 {
8489 pReq->u.Out.u64AdjustedTsc -= i64Delta;
8490 rc = VINF_SUCCESS;
8491 break;
8492 }
8493
8494 /* Give up after a few times. */
8495 if (cTries >= 4)
8496 {
8497 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
8498 break;
8499 }
8500
8501 /* Need to measure the delta an try again. */
8502 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
8503 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
8504 }
8505 else
8506 {
8507 /* This really shouldn't happen. */
8508 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
8509 pReq->u.Out.idApic = ASMGetApicId();
8510 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8511 ASMSetFlags(uFlags);
8512 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
8513 break;
8514 }
8515 }
8516 }
8517 else
8518 {
8519 /*
8520 * No delta to apply. Easy. Deal with preemption the lazy way.
8521 */
8522 RTCCUINTREG uFlags = ASMIntDisableFlags();
8523 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
8524 int iGipCpu;
8525 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
8526 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
8527 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
8528 else
8529 pReq->u.Out.idApic = ASMGetApicId();
8530 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
8531 ASMSetFlags(uFlags);
8532 rc = VINF_SUCCESS;
8533 }
8534
8535 return rc;
8536}
8537
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette