VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 53430

Last change on this file since 53430 was 53430, checked in by vboxsync, 10 years ago

VMM/TM: First step in introducing the invariant TM mode.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 295.7 KB
Line 
1/* $Id: SUPDrv.c 53430 2014-12-03 13:18:41Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code.
4 */
5
6/*
7 * Copyright (C) 2006-2014 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/x86.h>
58
59#include <VBox/param.h>
60#include <VBox/log.h>
61#include <VBox/err.h>
62#include <VBox/vmm/hm_svm.h>
63#include <VBox/vmm/hm_vmx.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68# define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
69# define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
70# define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
71# define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
72#endif
73
74/*
75 * Logging assignments:
76 * Log - useful stuff, like failures.
77 * LogFlow - program flow, except the really noisy bits.
78 * Log2 - Cleanup.
79 * Log3 - Loader flow noise.
80 * Log4 - Call VMMR0 flow noise.
81 * Log5 - Native yet-to-be-defined noise.
82 * Log6 - Native ioctl flow noise.
83 *
84 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
85 * instantiation in log-vbox.c(pp).
86 */
87
88
89/*******************************************************************************
90* Defined Constants And Macros *
91*******************************************************************************/
92/** The frequency by which we recalculate the u32UpdateHz and
93 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
94 *
95 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
96 */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/** A reserved TSC value used for synchronization as well as measurement of
100 * TSC deltas. */
101#define GIP_TSC_DELTA_RSVD UINT64_MAX
102/** The number of TSC delta measurement loops in total (includes primer and
103 * read-time loops). */
104#define GIP_TSC_DELTA_LOOPS 96
105/** The number of cache primer loops. */
106#define GIP_TSC_DELTA_PRIMER_LOOPS 4
107/** The number of loops until we keep computing the minumum read time. */
108#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
109/** Stop measurement of TSC delta. */
110#define GIP_TSC_DELTA_SYNC_STOP 0
111/** Start measurement of TSC delta. */
112#define GIP_TSC_DELTA_SYNC_START 1
113/** Worker thread is ready for reading the TSC. */
114#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
115/** Worker thread is done updating TSC delta info. */
116#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
117/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
118 * with a timeout. */
119#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
120/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
121 * master with a timeout. */
122#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
123/** The TSC-refinement interval in seconds. */
124#define GIP_TSC_REFINE_INTERVAL 5
125
126AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
127AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
128
129/** @def VBOX_SVN_REV
130 * The makefile should define this if it can. */
131#ifndef VBOX_SVN_REV
132# define VBOX_SVN_REV 0
133#endif
134
135#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
136# define DO_NOT_START_GIP
137#endif
138
139/** Whether the application of TSC-deltas is required. */
140#define GIP_ARE_TSC_DELTAS_APPLICABLE(a_pGip) ((a_pGip)->u32Mode == SUPGIPMODE_INVARIANT_TSC && !g_fOsTscDeltasInSync)
141
142
143/*******************************************************************************
144* Internal Functions *
145*******************************************************************************/
146static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
147static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
148static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
149static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
150static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
151static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
152static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
153static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
154static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
155static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
156static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
157static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
158static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
159DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
160DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
161static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
162static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
163static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
164static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq);
165static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq);
166static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
167static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
168static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
169static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
170static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
171static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
172 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
173static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
174static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
175static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
176static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
177 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
178static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
179static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster);
180static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
181static int supdrvIOCtl_ResumeSuspendedKbds(void);
182
183
184/*******************************************************************************
185* Global Variables *
186*******************************************************************************/
187DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
188
189/**
190 * The TSC delta synchronization struct. rounded to cache line size.
191 */
192typedef union SUPTSCDELTASYNC
193{
194 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
195 volatile uint32_t u;
196 /** Padding to cache line size. */
197 uint8_t u8Padding[64];
198} SUPTSCDELTASYNC;
199AssertCompileSize(SUPTSCDELTASYNC, 64);
200typedef SUPTSCDELTASYNC *PSUPTSCDELTASYNC;
201
202/** Pointer to the TSC delta sync. struct. */
203static void *g_pvTscDeltaSync;
204/** Aligned pointer to the TSC delta sync. struct. */
205static PSUPTSCDELTASYNC g_pTscDeltaSync;
206/** The TSC delta measurement initiator Cpu Id. */
207static volatile RTCPUID g_idTscDeltaInitiator = NIL_RTCPUID;
208/** Number of online/offline events, incremented each time a CPU goes online
209 * or offline. */
210static volatile uint32_t g_cMpOnOffEvents;
211/** TSC reading during start of TSC frequency refinement phase. */
212static uint64_t g_u64TscAnchor;
213/** Timestamp (in nanosec) during start of TSC frequency refinement phase. */
214static uint64_t g_u64NanoTSAnchor;
215/** Pointer to the timer used to refine the TSC frequency. */
216static PRTTIMER g_pTscRefineTimer;
217/** Whether the host OS has already normalized the hardware TSC deltas across
218 * CPUs. */
219static bool g_fOsTscDeltasInSync;
220
221/**
222 * Array of the R0 SUP API.
223 */
224static SUPFUNC g_aFunctions[] =
225{
226/* SED: START */
227 /* name function */
228 /* Entries with absolute addresses determined at runtime, fixup
229 code makes ugly ASSUMPTIONS about the order here: */
230 { "SUPR0AbsIs64bit", (void *)0 },
231 { "SUPR0Abs64bitKernelCS", (void *)0 },
232 { "SUPR0Abs64bitKernelSS", (void *)0 },
233 { "SUPR0Abs64bitKernelDS", (void *)0 },
234 { "SUPR0AbsKernelCS", (void *)0 },
235 { "SUPR0AbsKernelSS", (void *)0 },
236 { "SUPR0AbsKernelDS", (void *)0 },
237 { "SUPR0AbsKernelES", (void *)0 },
238 { "SUPR0AbsKernelFS", (void *)0 },
239 { "SUPR0AbsKernelGS", (void *)0 },
240 /* Normal function pointers: */
241 { "g_pSUPGlobalInfoPage", (void *)&g_pSUPGlobalInfoPage }, /* SED: DATA */
242 { "SUPGetGIP", (void *)SUPGetGIP },
243 { "SUPR0ComponentDeregisterFactory", (void *)SUPR0ComponentDeregisterFactory },
244 { "SUPR0ComponentQueryFactory", (void *)SUPR0ComponentQueryFactory },
245 { "SUPR0ComponentRegisterFactory", (void *)SUPR0ComponentRegisterFactory },
246 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
247 { "SUPR0ContFree", (void *)SUPR0ContFree },
248 { "SUPR0EnableVTx", (void *)SUPR0EnableVTx },
249 { "SUPR0SuspendVTxOnCpu", (void *)SUPR0SuspendVTxOnCpu },
250 { "SUPR0ResumeVTxOnCpu", (void *)SUPR0ResumeVTxOnCpu },
251 { "SUPR0GetPagingMode", (void *)SUPR0GetPagingMode },
252 { "SUPR0GetKernelFeatures", (void *)SUPR0GetKernelFeatures },
253 { "SUPR0LockMem", (void *)SUPR0LockMem },
254 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
255 { "SUPR0LowFree", (void *)SUPR0LowFree },
256 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
257 { "SUPR0MemFree", (void *)SUPR0MemFree },
258 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
259 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
260 { "SUPR0ObjAddRefEx", (void *)SUPR0ObjAddRefEx },
261 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
262 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
263 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
264 { "SUPR0PageAllocEx", (void *)SUPR0PageAllocEx },
265 { "SUPR0PageFree", (void *)SUPR0PageFree },
266 { "SUPR0Printf", (void *)SUPR0Printf },
267 { "SUPR0TracerDeregisterDrv", (void *)SUPR0TracerDeregisterDrv },
268 { "SUPR0TracerDeregisterImpl", (void *)SUPR0TracerDeregisterImpl },
269 { "SUPR0TracerFireProbe", (void *)SUPR0TracerFireProbe },
270 { "SUPR0TracerRegisterDrv", (void *)SUPR0TracerRegisterDrv },
271 { "SUPR0TracerRegisterImpl", (void *)SUPR0TracerRegisterImpl },
272 { "SUPR0TracerRegisterModule", (void *)SUPR0TracerRegisterModule },
273 { "SUPR0TracerUmodProbeFire", (void *)SUPR0TracerUmodProbeFire },
274 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
275 { "SUPSemEventClose", (void *)SUPSemEventClose },
276 { "SUPSemEventCreate", (void *)SUPSemEventCreate },
277 { "SUPSemEventGetResolution", (void *)SUPSemEventGetResolution },
278 { "SUPSemEventMultiClose", (void *)SUPSemEventMultiClose },
279 { "SUPSemEventMultiCreate", (void *)SUPSemEventMultiCreate },
280 { "SUPSemEventMultiGetResolution", (void *)SUPSemEventMultiGetResolution },
281 { "SUPSemEventMultiReset", (void *)SUPSemEventMultiReset },
282 { "SUPSemEventMultiSignal", (void *)SUPSemEventMultiSignal },
283 { "SUPSemEventMultiWait", (void *)SUPSemEventMultiWait },
284 { "SUPSemEventMultiWaitNoResume", (void *)SUPSemEventMultiWaitNoResume },
285 { "SUPSemEventMultiWaitNsAbsIntr", (void *)SUPSemEventMultiWaitNsAbsIntr },
286 { "SUPSemEventMultiWaitNsRelIntr", (void *)SUPSemEventMultiWaitNsRelIntr },
287 { "SUPSemEventSignal", (void *)SUPSemEventSignal },
288 { "SUPSemEventWait", (void *)SUPSemEventWait },
289 { "SUPSemEventWaitNoResume", (void *)SUPSemEventWaitNoResume },
290 { "SUPSemEventWaitNsAbsIntr", (void *)SUPSemEventWaitNsAbsIntr },
291 { "SUPSemEventWaitNsRelIntr", (void *)SUPSemEventWaitNsRelIntr },
292
293 { "RTAssertAreQuiet", (void *)RTAssertAreQuiet },
294 { "RTAssertMayPanic", (void *)RTAssertMayPanic },
295 { "RTAssertMsg1", (void *)RTAssertMsg1 },
296 { "RTAssertMsg2AddV", (void *)RTAssertMsg2AddV },
297 { "RTAssertMsg2V", (void *)RTAssertMsg2V },
298 { "RTAssertSetMayPanic", (void *)RTAssertSetMayPanic },
299 { "RTAssertSetQuiet", (void *)RTAssertSetQuiet },
300 { "RTCrc32", (void *)RTCrc32 },
301 { "RTCrc32Finish", (void *)RTCrc32Finish },
302 { "RTCrc32Process", (void *)RTCrc32Process },
303 { "RTCrc32Start", (void *)RTCrc32Start },
304 { "RTErrConvertFromErrno", (void *)RTErrConvertFromErrno },
305 { "RTErrConvertToErrno", (void *)RTErrConvertToErrno },
306 { "RTHandleTableAllocWithCtx", (void *)RTHandleTableAllocWithCtx },
307 { "RTHandleTableCreate", (void *)RTHandleTableCreate },
308 { "RTHandleTableCreateEx", (void *)RTHandleTableCreateEx },
309 { "RTHandleTableDestroy", (void *)RTHandleTableDestroy },
310 { "RTHandleTableFreeWithCtx", (void *)RTHandleTableFreeWithCtx },
311 { "RTHandleTableLookupWithCtx", (void *)RTHandleTableLookupWithCtx },
312 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
313 { "RTLogGetDefaultInstance", (void *)RTLogGetDefaultInstance },
314 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
315 { "RTLogPrintfV", (void *)RTLogPrintfV },
316 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
317 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
318 { "RTMemAllocExTag", (void *)RTMemAllocExTag },
319 { "RTMemAllocTag", (void *)RTMemAllocTag },
320 { "RTMemAllocVarTag", (void *)RTMemAllocVarTag },
321 { "RTMemAllocZTag", (void *)RTMemAllocZTag },
322 { "RTMemAllocZVarTag", (void *)RTMemAllocZVarTag },
323 { "RTMemDupExTag", (void *)RTMemDupExTag },
324 { "RTMemDupTag", (void *)RTMemDupTag },
325 { "RTMemFree", (void *)RTMemFree },
326 { "RTMemFreeEx", (void *)RTMemFreeEx },
327 { "RTMemReallocTag", (void *)RTMemReallocTag },
328 { "RTMpCpuId", (void *)RTMpCpuId },
329 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
330 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
331 { "RTMpGetArraySize", (void *)RTMpGetArraySize },
332 { "RTMpGetCount", (void *)RTMpGetCount },
333 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
334 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
335 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
336 { "RTMpGetSet", (void *)RTMpGetSet },
337 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
338 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
339 { "RTMpIsCpuWorkPending", (void *)RTMpIsCpuWorkPending },
340 { "RTMpNotificationDeregister", (void *)RTMpNotificationDeregister },
341 { "RTMpNotificationRegister", (void *)RTMpNotificationRegister },
342 { "RTMpOnAll", (void *)RTMpOnAll },
343 { "RTMpOnOthers", (void *)RTMpOnOthers },
344 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
345 { "RTMpPokeCpu", (void *)RTMpPokeCpu },
346 { "RTNetIPv4AddDataChecksum", (void *)RTNetIPv4AddDataChecksum },
347 { "RTNetIPv4AddTCPChecksum", (void *)RTNetIPv4AddTCPChecksum },
348 { "RTNetIPv4AddUDPChecksum", (void *)RTNetIPv4AddUDPChecksum },
349 { "RTNetIPv4FinalizeChecksum", (void *)RTNetIPv4FinalizeChecksum },
350 { "RTNetIPv4HdrChecksum", (void *)RTNetIPv4HdrChecksum },
351 { "RTNetIPv4IsDHCPValid", (void *)RTNetIPv4IsDHCPValid },
352 { "RTNetIPv4IsHdrValid", (void *)RTNetIPv4IsHdrValid },
353 { "RTNetIPv4IsTCPSizeValid", (void *)RTNetIPv4IsTCPSizeValid },
354 { "RTNetIPv4IsTCPValid", (void *)RTNetIPv4IsTCPValid },
355 { "RTNetIPv4IsUDPSizeValid", (void *)RTNetIPv4IsUDPSizeValid },
356 { "RTNetIPv4IsUDPValid", (void *)RTNetIPv4IsUDPValid },
357 { "RTNetIPv4PseudoChecksum", (void *)RTNetIPv4PseudoChecksum },
358 { "RTNetIPv4PseudoChecksumBits", (void *)RTNetIPv4PseudoChecksumBits },
359 { "RTNetIPv4TCPChecksum", (void *)RTNetIPv4TCPChecksum },
360 { "RTNetIPv4UDPChecksum", (void *)RTNetIPv4UDPChecksum },
361 { "RTNetIPv6PseudoChecksum", (void *)RTNetIPv6PseudoChecksum },
362 { "RTNetIPv6PseudoChecksumBits", (void *)RTNetIPv6PseudoChecksumBits },
363 { "RTNetIPv6PseudoChecksumEx", (void *)RTNetIPv6PseudoChecksumEx },
364 { "RTNetTCPChecksum", (void *)RTNetTCPChecksum },
365 { "RTNetUDPChecksum", (void *)RTNetUDPChecksum },
366 { "RTPowerNotificationDeregister", (void *)RTPowerNotificationDeregister },
367 { "RTPowerNotificationRegister", (void *)RTPowerNotificationRegister },
368 { "RTProcSelf", (void *)RTProcSelf },
369 { "RTR0AssertPanicSystem", (void *)RTR0AssertPanicSystem },
370 { "RTR0MemAreKrnlAndUsrDifferent", (void *)RTR0MemAreKrnlAndUsrDifferent },
371 { "RTR0MemKernelIsValidAddr", (void *)RTR0MemKernelIsValidAddr },
372 { "RTR0MemKernelCopyFrom", (void *)RTR0MemKernelCopyFrom },
373 { "RTR0MemKernelCopyTo", (void *)RTR0MemKernelCopyTo },
374 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
375 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
376 { "RTR0MemObjAllocContTag", (void *)RTR0MemObjAllocContTag },
377 { "RTR0MemObjAllocLowTag", (void *)RTR0MemObjAllocLowTag },
378 { "RTR0MemObjAllocPageTag", (void *)RTR0MemObjAllocPageTag },
379 { "RTR0MemObjAllocPhysExTag", (void *)RTR0MemObjAllocPhysExTag },
380 { "RTR0MemObjAllocPhysNCTag", (void *)RTR0MemObjAllocPhysNCTag },
381 { "RTR0MemObjAllocPhysTag", (void *)RTR0MemObjAllocPhysTag },
382 { "RTR0MemObjEnterPhysTag", (void *)RTR0MemObjEnterPhysTag },
383 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
384 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
385 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
386 { "RTR0MemObjLockUserTag", (void *)RTR0MemObjLockUserTag },
387 { "RTR0MemObjMapKernelExTag", (void *)RTR0MemObjMapKernelExTag },
388 { "RTR0MemObjMapKernelTag", (void *)RTR0MemObjMapKernelTag },
389 { "RTR0MemObjMapUserTag", (void *)RTR0MemObjMapUserTag },
390 { "RTR0MemObjProtect", (void *)RTR0MemObjProtect },
391 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
392 { "RTR0MemUserCopyFrom", (void *)RTR0MemUserCopyFrom },
393 { "RTR0MemUserCopyTo", (void *)RTR0MemUserCopyTo },
394 { "RTR0MemUserIsValidAddr", (void *)RTR0MemUserIsValidAddr },
395 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
396 { "RTSemEventCreate", (void *)RTSemEventCreate },
397 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
398 { "RTSemEventGetResolution", (void *)RTSemEventGetResolution },
399 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
400 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
401 { "RTSemEventMultiGetResolution", (void *)RTSemEventMultiGetResolution },
402 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
403 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
404 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
405 { "RTSemEventMultiWaitEx", (void *)RTSemEventMultiWaitEx },
406 { "RTSemEventMultiWaitExDebug", (void *)RTSemEventMultiWaitExDebug },
407 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
408 { "RTSemEventSignal", (void *)RTSemEventSignal },
409 { "RTSemEventWait", (void *)RTSemEventWait },
410 { "RTSemEventWaitEx", (void *)RTSemEventWaitEx },
411 { "RTSemEventWaitExDebug", (void *)RTSemEventWaitExDebug },
412 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
413 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
414 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
415 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
416 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
417 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
418 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
419 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
420 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
421 { "RTSemMutexRequestDebug", (void *)RTSemMutexRequestDebug },
422 { "RTSemMutexRequestNoResume", (void *)RTSemMutexRequestNoResume },
423 { "RTSemMutexRequestNoResumeDebug", (void *)RTSemMutexRequestNoResumeDebug },
424 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
425 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
426 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
427 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
428 { "RTStrCopy", (void *)RTStrCopy },
429 { "RTStrDupTag", (void *)RTStrDupTag },
430 { "RTStrFormat", (void *)RTStrFormat },
431 { "RTStrFormatNumber", (void *)RTStrFormatNumber },
432 { "RTStrFormatTypeDeregister", (void *)RTStrFormatTypeDeregister },
433 { "RTStrFormatTypeRegister", (void *)RTStrFormatTypeRegister },
434 { "RTStrFormatTypeSetUser", (void *)RTStrFormatTypeSetUser },
435 { "RTStrFormatV", (void *)RTStrFormatV },
436 { "RTStrFree", (void *)RTStrFree },
437 { "RTStrNCmp", (void *)RTStrNCmp },
438 { "RTStrPrintf", (void *)RTStrPrintf },
439 { "RTStrPrintfEx", (void *)RTStrPrintfEx },
440 { "RTStrPrintfExV", (void *)RTStrPrintfExV },
441 { "RTStrPrintfV", (void *)RTStrPrintfV },
442 { "RTThreadCreate", (void *)RTThreadCreate },
443 { "RTThreadCtxHooksAreRegistered", (void *)RTThreadCtxHooksAreRegistered },
444 { "RTThreadCtxHooksCreate", (void *)RTThreadCtxHooksCreate },
445 { "RTThreadCtxHooksDeregister", (void *)RTThreadCtxHooksDeregister },
446 { "RTThreadCtxHooksRegister", (void *)RTThreadCtxHooksRegister },
447 { "RTThreadCtxHooksRelease", (void *)RTThreadCtxHooksRelease },
448 { "RTThreadCtxHooksRetain", (void *)RTThreadCtxHooksRetain },
449 { "RTThreadGetName", (void *)RTThreadGetName },
450 { "RTThreadGetNative", (void *)RTThreadGetNative },
451 { "RTThreadGetType", (void *)RTThreadGetType },
452 { "RTThreadIsInInterrupt", (void *)RTThreadIsInInterrupt },
453 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
454 { "RTThreadPreemptDisable", (void *)RTThreadPreemptDisable },
455 { "RTThreadPreemptIsEnabled", (void *)RTThreadPreemptIsEnabled },
456 { "RTThreadPreemptIsPending", (void *)RTThreadPreemptIsPending },
457 { "RTThreadPreemptIsPendingTrusty", (void *)RTThreadPreemptIsPendingTrusty },
458 { "RTThreadPreemptIsPossible", (void *)RTThreadPreemptIsPossible },
459 { "RTThreadPreemptRestore", (void *)RTThreadPreemptRestore },
460 { "RTThreadSelf", (void *)RTThreadSelf },
461 { "RTThreadSelfName", (void *)RTThreadSelfName },
462 { "RTThreadSleep", (void *)RTThreadSleep },
463 { "RTThreadUserReset", (void *)RTThreadUserReset },
464 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
465 { "RTThreadUserWait", (void *)RTThreadUserWait },
466 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
467 { "RTThreadWait", (void *)RTThreadWait },
468 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
469 { "RTThreadYield", (void *)RTThreadYield },
470 { "RTTimeMilliTS", (void *)RTTimeMilliTS },
471 { "RTTimeNanoTS", (void *)RTTimeNanoTS },
472 { "RTTimeNow", (void *)RTTimeNow },
473 { "RTTimerCanDoHighResolution", (void *)RTTimerCanDoHighResolution },
474 { "RTTimerChangeInterval", (void *)RTTimerChangeInterval },
475 { "RTTimerCreate", (void *)RTTimerCreate },
476 { "RTTimerCreateEx", (void *)RTTimerCreateEx },
477 { "RTTimerDestroy", (void *)RTTimerDestroy },
478 { "RTTimerGetSystemGranularity", (void *)RTTimerGetSystemGranularity },
479 { "RTTimerReleaseSystemGranularity", (void *)RTTimerReleaseSystemGranularity },
480 { "RTTimerRequestSystemGranularity", (void *)RTTimerRequestSystemGranularity },
481 { "RTTimerStart", (void *)RTTimerStart },
482 { "RTTimerStop", (void *)RTTimerStop },
483 { "RTTimeSystemMilliTS", (void *)RTTimeSystemMilliTS },
484 { "RTTimeSystemNanoTS", (void *)RTTimeSystemNanoTS },
485 { "RTUuidCompare", (void *)RTUuidCompare },
486 { "RTUuidCompareStr", (void *)RTUuidCompareStr },
487 { "RTUuidFromStr", (void *)RTUuidFromStr },
488/* SED: END */
489};
490
491#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
492/**
493 * Drag in the rest of IRPT since we share it with the
494 * rest of the kernel modules on darwin.
495 */
496PFNRT g_apfnVBoxDrvIPRTDeps[] =
497{
498 /* VBoxNetAdp */
499 (PFNRT)RTRandBytes,
500 /* VBoxUSB */
501 (PFNRT)RTPathStripFilename,
502 NULL
503};
504#endif /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
505
506
507/**
508 * Initializes the device extentsion structure.
509 *
510 * @returns IPRT status code.
511 * @param pDevExt The device extension to initialize.
512 * @param cbSession The size of the session structure. The size of
513 * SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
514 * defined because we're skipping the OS specific members
515 * then.
516 */
517int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
518{
519 int rc;
520
521#ifdef SUPDRV_WITH_RELEASE_LOGGER
522 /*
523 * Create the release log.
524 */
525 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
526 PRTLOGGER pRelLogger;
527 rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
528 "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
529 if (RT_SUCCESS(rc))
530 RTLogRelSetDefaultInstance(pRelLogger);
531 /** @todo Add native hook for getting logger config parameters and setting
532 * them. On linux we should use the module parameter stuff... */
533#endif
534
535 /*
536 * Initialize it.
537 */
538 memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
539 pDevExt->Spinlock = NIL_RTSPINLOCK;
540 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
541 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
542 rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
543 if (RT_SUCCESS(rc))
544 rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
545 if (RT_SUCCESS(rc))
546 rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
547
548 if (RT_SUCCESS(rc))
549#ifdef SUPDRV_USE_MUTEX_FOR_LDR
550 rc = RTSemMutexCreate(&pDevExt->mtxLdr);
551#else
552 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
553#endif
554 if (RT_SUCCESS(rc))
555 {
556 rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
557 if (RT_SUCCESS(rc))
558 {
559#ifdef SUPDRV_USE_MUTEX_FOR_LDR
560 rc = RTSemMutexCreate(&pDevExt->mtxGip);
561#else
562 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
563#endif
564 if (RT_SUCCESS(rc))
565 {
566 rc = supdrvGipCreate(pDevExt);
567 if (RT_SUCCESS(rc))
568 {
569 rc = supdrvTracerInit(pDevExt);
570 if (RT_SUCCESS(rc))
571 {
572 pDevExt->pLdrInitImage = NULL;
573 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
574 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
575 pDevExt->cbSession = (uint32_t)cbSession;
576
577 /*
578 * Fixup the absolute symbols.
579 *
580 * Because of the table indexing assumptions we'll have a little #ifdef orgy
581 * here rather than distributing this to OS specific files. At least for now.
582 */
583#ifdef RT_OS_DARWIN
584# if ARCH_BITS == 32
585 if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
586 {
587 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
588 g_aFunctions[1].pfn = (void *)0x80; /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
589 g_aFunctions[2].pfn = (void *)0x88; /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
590 g_aFunctions[3].pfn = (void *)0x88; /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
591 }
592 else
593 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
594 g_aFunctions[4].pfn = (void *)0x08; /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
595 g_aFunctions[5].pfn = (void *)0x10; /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
596 g_aFunctions[6].pfn = (void *)0x10; /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
597 g_aFunctions[7].pfn = (void *)0x10; /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
598 g_aFunctions[8].pfn = (void *)0x10; /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
599 g_aFunctions[9].pfn = (void *)0x48; /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
600# else /* 64-bit darwin: */
601 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
602 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
603 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
604 g_aFunctions[3].pfn = (void *)0; /* SUPR0Abs64bitKernelDS */
605 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
606 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
607 g_aFunctions[6].pfn = (void *)0; /* SUPR0AbsKernelDS */
608 g_aFunctions[7].pfn = (void *)0; /* SUPR0AbsKernelES */
609 g_aFunctions[8].pfn = (void *)0; /* SUPR0AbsKernelFS */
610 g_aFunctions[9].pfn = (void *)0; /* SUPR0AbsKernelGS */
611
612# endif
613#else /* !RT_OS_DARWIN */
614# if ARCH_BITS == 64
615 g_aFunctions[0].pfn = (void *)1; /* SUPR0AbsIs64bit */
616 g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0Abs64bitKernelCS */
617 g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0Abs64bitKernelSS */
618 g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0Abs64bitKernelDS */
619# else
620 g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
621# endif
622 g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS(); /* SUPR0AbsKernelCS */
623 g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS(); /* SUPR0AbsKernelSS */
624 g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS(); /* SUPR0AbsKernelDS */
625 g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES(); /* SUPR0AbsKernelES */
626 g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS(); /* SUPR0AbsKernelFS */
627 g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS(); /* SUPR0AbsKernelGS */
628#endif /* !RT_OS_DARWIN */
629 return VINF_SUCCESS;
630 }
631
632 supdrvGipDestroy(pDevExt);
633 }
634
635#ifdef SUPDRV_USE_MUTEX_FOR_GIP
636 RTSemMutexDestroy(pDevExt->mtxGip);
637 pDevExt->mtxGip = NIL_RTSEMMUTEX;
638#else
639 RTSemFastMutexDestroy(pDevExt->mtxGip);
640 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
641#endif
642 }
643 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
644 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
645 }
646#ifdef SUPDRV_USE_MUTEX_FOR_LDR
647 RTSemMutexDestroy(pDevExt->mtxLdr);
648 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
649#else
650 RTSemFastMutexDestroy(pDevExt->mtxLdr);
651 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
652#endif
653 }
654
655 RTSpinlockDestroy(pDevExt->Spinlock);
656 pDevExt->Spinlock = NIL_RTSPINLOCK;
657 RTSpinlockDestroy(pDevExt->hGipSpinlock);
658 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
659 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
660 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
661
662#ifdef SUPDRV_WITH_RELEASE_LOGGER
663 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
664 RTLogDestroy(RTLogSetDefaultInstance(NULL));
665#endif
666
667 return rc;
668}
669
670
671/**
672 * Delete the device extension (e.g. cleanup members).
673 *
674 * @param pDevExt The device extension to delete.
675 */
676void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
677{
678 PSUPDRVOBJ pObj;
679 PSUPDRVUSAGE pUsage;
680
681 /*
682 * Kill mutexes and spinlocks.
683 */
684#ifdef SUPDRV_USE_MUTEX_FOR_GIP
685 RTSemMutexDestroy(pDevExt->mtxGip);
686 pDevExt->mtxGip = NIL_RTSEMMUTEX;
687#else
688 RTSemFastMutexDestroy(pDevExt->mtxGip);
689 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
690#endif
691#ifdef SUPDRV_USE_MUTEX_FOR_LDR
692 RTSemMutexDestroy(pDevExt->mtxLdr);
693 pDevExt->mtxLdr = NIL_RTSEMMUTEX;
694#else
695 RTSemFastMutexDestroy(pDevExt->mtxLdr);
696 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
697#endif
698 RTSpinlockDestroy(pDevExt->Spinlock);
699 pDevExt->Spinlock = NIL_RTSPINLOCK;
700 RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
701 pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
702 RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
703 pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
704
705 /*
706 * Free lists.
707 */
708 /* objects. */
709 pObj = pDevExt->pObjs;
710 Assert(!pObj); /* (can trigger on forced unloads) */
711 pDevExt->pObjs = NULL;
712 while (pObj)
713 {
714 void *pvFree = pObj;
715 pObj = pObj->pNext;
716 RTMemFree(pvFree);
717 }
718
719 /* usage records. */
720 pUsage = pDevExt->pUsageFree;
721 pDevExt->pUsageFree = NULL;
722 while (pUsage)
723 {
724 void *pvFree = pUsage;
725 pUsage = pUsage->pNext;
726 RTMemFree(pvFree);
727 }
728
729 /* kill the GIP. */
730 supdrvGipDestroy(pDevExt);
731 RTSpinlockDestroy(pDevExt->hGipSpinlock);
732 pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
733
734 supdrvTracerTerm(pDevExt);
735
736#ifdef SUPDRV_WITH_RELEASE_LOGGER
737 /* destroy the loggers. */
738 RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
739 RTLogDestroy(RTLogSetDefaultInstance(NULL));
740#endif
741}
742
743
744/**
745 * Create session.
746 *
747 * @returns IPRT status code.
748 * @param pDevExt Device extension.
749 * @param fUser Flag indicating whether this is a user or kernel
750 * session.
751 * @param fUnrestricted Unrestricted access (system) or restricted access
752 * (user)?
753 * @param ppSession Where to store the pointer to the session data.
754 */
755int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
756{
757 int rc;
758 PSUPDRVSESSION pSession;
759
760 if (!SUP_IS_DEVEXT_VALID(pDevExt))
761 return VERR_INVALID_PARAMETER;
762
763 /*
764 * Allocate memory for the session data.
765 */
766 pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
767 if (pSession)
768 {
769 /* Initialize session data. */
770 rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
771 if (!rc)
772 {
773 rc = RTHandleTableCreateEx(&pSession->hHandleTable,
774 RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
775 1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
776 if (RT_SUCCESS(rc))
777 {
778 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
779 pSession->pDevExt = pDevExt;
780 pSession->u32Cookie = BIRD_INV;
781 pSession->fUnrestricted = fUnrestricted;
782 /*pSession->fInHashTable = false; */
783 pSession->cRefs = 1;
784 /*pSession->pCommonNextHash = NULL;
785 pSession->ppOsSessionPtr = NULL; */
786 if (fUser)
787 {
788 pSession->Process = RTProcSelf();
789 pSession->R0Process = RTR0ProcHandleSelf();
790 }
791 else
792 {
793 pSession->Process = NIL_RTPROCESS;
794 pSession->R0Process = NIL_RTR0PROCESS;
795 }
796 /*pSession->pLdrUsage = NULL;
797 pSession->pVM = NULL;
798 pSession->pUsage = NULL;
799 pSession->pGip = NULL;
800 pSession->fGipReferenced = false;
801 pSession->Bundle.cUsed = 0; */
802 pSession->Uid = NIL_RTUID;
803 pSession->Gid = NIL_RTGID;
804 /*pSession->uTracerData = 0;*/
805 pSession->hTracerCaller = NIL_RTNATIVETHREAD;
806 RTListInit(&pSession->TpProviders);
807 /*pSession->cTpProviders = 0;*/
808 /*pSession->cTpProbesFiring = 0;*/
809 RTListInit(&pSession->TpUmods);
810 /*RT_ZERO(pSession->apTpLookupTable);*/
811
812 VBOXDRV_SESSION_CREATE(pSession, fUser);
813 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
814 return VINF_SUCCESS;
815 }
816
817 RTSpinlockDestroy(pSession->Spinlock);
818 }
819 RTMemFree(pSession);
820 *ppSession = NULL;
821 Log(("Failed to create spinlock, rc=%d!\n", rc));
822 }
823 else
824 rc = VERR_NO_MEMORY;
825
826 return rc;
827}
828
829
830/**
831 * Cleans up the session in the context of the process to which it belongs, the
832 * caller will free the session and the session spinlock.
833 *
834 * This should normally occur when the session is closed or as the process
835 * exits. Careful reference counting in the OS specfic code makes sure that
836 * there cannot be any races between process/handle cleanup callbacks and
837 * threads doing I/O control calls.
838 *
839 * @param pDevExt The device extension.
840 * @param pSession Session data.
841 */
842static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
843{
844 int rc;
845 PSUPDRVBUNDLE pBundle;
846 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
847
848 Assert(!pSession->fInHashTable);
849 Assert(!pSession->ppOsSessionPtr);
850 AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
851 ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
852
853 /*
854 * Remove logger instances related to this session.
855 */
856 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
857
858 /*
859 * Destroy the handle table.
860 */
861 rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
862 AssertRC(rc);
863 pSession->hHandleTable = NIL_RTHANDLETABLE;
864
865 /*
866 * Release object references made in this session.
867 * In theory there should be noone racing us in this session.
868 */
869 Log2(("release objects - start\n"));
870 if (pSession->pUsage)
871 {
872 PSUPDRVUSAGE pUsage;
873 RTSpinlockAcquire(pDevExt->Spinlock);
874
875 while ((pUsage = pSession->pUsage) != NULL)
876 {
877 PSUPDRVOBJ pObj = pUsage->pObj;
878 pSession->pUsage = pUsage->pNext;
879
880 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
881 if (pUsage->cUsage < pObj->cUsage)
882 {
883 pObj->cUsage -= pUsage->cUsage;
884 RTSpinlockRelease(pDevExt->Spinlock);
885 }
886 else
887 {
888 /* Destroy the object and free the record. */
889 if (pDevExt->pObjs == pObj)
890 pDevExt->pObjs = pObj->pNext;
891 else
892 {
893 PSUPDRVOBJ pObjPrev;
894 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
895 if (pObjPrev->pNext == pObj)
896 {
897 pObjPrev->pNext = pObj->pNext;
898 break;
899 }
900 Assert(pObjPrev);
901 }
902 RTSpinlockRelease(pDevExt->Spinlock);
903
904 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
905 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
906 if (pObj->pfnDestructor)
907 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
908 RTMemFree(pObj);
909 }
910
911 /* free it and continue. */
912 RTMemFree(pUsage);
913
914 RTSpinlockAcquire(pDevExt->Spinlock);
915 }
916
917 RTSpinlockRelease(pDevExt->Spinlock);
918 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
919 }
920 Log2(("release objects - done\n"));
921
922 /*
923 * Do tracer cleanups related to this session.
924 */
925 Log2(("release tracer stuff - start\n"));
926 supdrvTracerCleanupSession(pDevExt, pSession);
927 Log2(("release tracer stuff - end\n"));
928
929 /*
930 * Release memory allocated in the session.
931 *
932 * We do not serialize this as we assume that the application will
933 * not allocated memory while closing the file handle object.
934 */
935 Log2(("freeing memory:\n"));
936 pBundle = &pSession->Bundle;
937 while (pBundle)
938 {
939 PSUPDRVBUNDLE pToFree;
940 unsigned i;
941
942 /*
943 * Check and unlock all entries in the bundle.
944 */
945 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
946 {
947 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
948 {
949 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
950 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
951 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
952 {
953 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
954 AssertRC(rc); /** @todo figure out how to handle this. */
955 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
956 }
957 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
958 AssertRC(rc); /** @todo figure out how to handle this. */
959 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
960 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
961 }
962 }
963
964 /*
965 * Advance and free previous bundle.
966 */
967 pToFree = pBundle;
968 pBundle = pBundle->pNext;
969
970 pToFree->pNext = NULL;
971 pToFree->cUsed = 0;
972 if (pToFree != &pSession->Bundle)
973 RTMemFree(pToFree);
974 }
975 Log2(("freeing memory - done\n"));
976
977 /*
978 * Deregister component factories.
979 */
980 RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
981 Log2(("deregistering component factories:\n"));
982 if (pDevExt->pComponentFactoryHead)
983 {
984 PSUPDRVFACTORYREG pPrev = NULL;
985 PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
986 while (pCur)
987 {
988 if (pCur->pSession == pSession)
989 {
990 /* unlink it */
991 PSUPDRVFACTORYREG pNext = pCur->pNext;
992 if (pPrev)
993 pPrev->pNext = pNext;
994 else
995 pDevExt->pComponentFactoryHead = pNext;
996
997 /* free it */
998 pCur->pNext = NULL;
999 pCur->pSession = NULL;
1000 pCur->pFactory = NULL;
1001 RTMemFree(pCur);
1002
1003 /* next */
1004 pCur = pNext;
1005 }
1006 else
1007 {
1008 /* next */
1009 pPrev = pCur;
1010 pCur = pCur->pNext;
1011 }
1012 }
1013 }
1014 RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
1015 Log2(("deregistering component factories - done\n"));
1016
1017 /*
1018 * Loaded images needs to be dereferenced and possibly freed up.
1019 */
1020 supdrvLdrLock(pDevExt);
1021 Log2(("freeing images:\n"));
1022 if (pSession->pLdrUsage)
1023 {
1024 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
1025 pSession->pLdrUsage = NULL;
1026 while (pUsage)
1027 {
1028 void *pvFree = pUsage;
1029 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
1030 if (pImage->cUsage > pUsage->cUsage)
1031 pImage->cUsage -= pUsage->cUsage;
1032 else
1033 supdrvLdrFree(pDevExt, pImage);
1034 pUsage->pImage = NULL;
1035 pUsage = pUsage->pNext;
1036 RTMemFree(pvFree);
1037 }
1038 }
1039 supdrvLdrUnlock(pDevExt);
1040 Log2(("freeing images - done\n"));
1041
1042 /*
1043 * Unmap the GIP.
1044 */
1045 Log2(("umapping GIP:\n"));
1046 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
1047 {
1048 SUPR0GipUnmap(pSession);
1049 pSession->fGipReferenced = 0;
1050 }
1051 Log2(("umapping GIP - done\n"));
1052}
1053
1054
1055/**
1056 * Common code for freeing a session when the reference count reaches zero.
1057 *
1058 * @param pDevExt Device extension.
1059 * @param pSession Session data.
1060 * This data will be freed by this routine.
1061 */
1062static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1063{
1064 VBOXDRV_SESSION_CLOSE(pSession);
1065
1066 /*
1067 * Cleanup the session first.
1068 */
1069 supdrvCleanupSession(pDevExt, pSession);
1070 supdrvOSCleanupSession(pDevExt, pSession);
1071
1072 /*
1073 * Free the rest of the session stuff.
1074 */
1075 RTSpinlockDestroy(pSession->Spinlock);
1076 pSession->Spinlock = NIL_RTSPINLOCK;
1077 pSession->pDevExt = NULL;
1078 RTMemFree(pSession);
1079 LogFlow(("supdrvDestroySession: returns\n"));
1080}
1081
1082
1083/**
1084 * Inserts the session into the global hash table.
1085 *
1086 * @retval VINF_SUCCESS on success.
1087 * @retval VERR_WRONG_ORDER if the session was already inserted (asserted).
1088 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1089 * session (asserted).
1090 * @retval VERR_DUPLICATE if there is already a session for that pid.
1091 *
1092 * @param pDevExt The device extension.
1093 * @param pSession The session.
1094 * @param ppOsSessionPtr Pointer to the OS session pointer, if any is
1095 * available and used. This will set to point to the
1096 * session while under the protection of the session
1097 * hash table spinlock. It will also be kept in
1098 * PSUPDRVSESSION::ppOsSessionPtr for lookup and
1099 * cleanup use.
1100 * @param pvUser Argument for supdrvOSSessionHashTabInserted.
1101 */
1102int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
1103 void *pvUser)
1104{
1105 PSUPDRVSESSION pCur;
1106 unsigned iHash;
1107
1108 /*
1109 * Validate input.
1110 */
1111 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1112 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1113
1114 /*
1115 * Calculate the hash table index and acquire the spinlock.
1116 */
1117 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1118
1119 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1120
1121 /*
1122 * If there are a collisions, we need to carefully check if we got a
1123 * duplicate. There can only be one open session per process.
1124 */
1125 pCur = pDevExt->apSessionHashTab[iHash];
1126 if (pCur)
1127 {
1128 while (pCur && pCur->Process != pSession->Process)
1129 pCur = pCur->pCommonNextHash;
1130
1131 if (pCur)
1132 {
1133 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1134 if (pCur == pSession)
1135 {
1136 Assert(pSession->fInHashTable);
1137 AssertFailed();
1138 return VERR_WRONG_ORDER;
1139 }
1140 Assert(!pSession->fInHashTable);
1141 if (pCur->R0Process == pSession->R0Process)
1142 return VERR_RESOURCE_IN_USE;
1143 return VERR_DUPLICATE;
1144 }
1145 }
1146 Assert(!pSession->fInHashTable);
1147 Assert(!pSession->ppOsSessionPtr);
1148
1149 /*
1150 * Insert it, doing a callout to the OS specific code in case it has
1151 * anything it wishes to do while we're holding the spinlock.
1152 */
1153 pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
1154 pDevExt->apSessionHashTab[iHash] = pSession;
1155 pSession->fInHashTable = true;
1156 ASMAtomicIncS32(&pDevExt->cSessions);
1157
1158 pSession->ppOsSessionPtr = ppOsSessionPtr;
1159 if (ppOsSessionPtr)
1160 ASMAtomicWritePtr(ppOsSessionPtr, pSession);
1161
1162 supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
1163
1164 /*
1165 * Retain a reference for the pointer in the session table.
1166 */
1167 ASMAtomicIncU32(&pSession->cRefs);
1168
1169 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1170 return VINF_SUCCESS;
1171}
1172
1173
1174/**
1175 * Removes the session from the global hash table.
1176 *
1177 * @retval VINF_SUCCESS on success.
1178 * @retval VERR_NOT_FOUND if the session was already removed (asserted).
1179 * @retval VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
1180 * session (asserted).
1181 *
1182 * @param pDevExt The device extension.
1183 * @param pSession The session. The caller is expected to have a reference
1184 * to this so it won't croak on us when we release the hash
1185 * table reference.
1186 * @param pvUser OS specific context value for the
1187 * supdrvOSSessionHashTabInserted callback.
1188 */
1189int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
1190{
1191 PSUPDRVSESSION pCur;
1192 unsigned iHash;
1193 int32_t cRefs;
1194
1195 /*
1196 * Validate input.
1197 */
1198 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1199 AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
1200
1201 /*
1202 * Calculate the hash table index and acquire the spinlock.
1203 */
1204 iHash = SUPDRV_SESSION_HASH(pSession->Process);
1205
1206 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1207
1208 /*
1209 * Unlink it.
1210 */
1211 pCur = pDevExt->apSessionHashTab[iHash];
1212 if (pCur == pSession)
1213 pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
1214 else
1215 {
1216 PSUPDRVSESSION pPrev = pCur;
1217 while (pCur && pCur != pSession)
1218 {
1219 pPrev = pCur;
1220 pCur = pCur->pCommonNextHash;
1221 }
1222 if (pCur)
1223 pPrev->pCommonNextHash = pCur->pCommonNextHash;
1224 else
1225 {
1226 Assert(!pSession->fInHashTable);
1227 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1228 return VERR_NOT_FOUND;
1229 }
1230 }
1231
1232 pSession->pCommonNextHash = NULL;
1233 pSession->fInHashTable = false;
1234
1235 ASMAtomicDecS32(&pDevExt->cSessions);
1236
1237 /*
1238 * Clear OS specific session pointer if available and do the OS callback.
1239 */
1240 if (pSession->ppOsSessionPtr)
1241 {
1242 ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
1243 pSession->ppOsSessionPtr = NULL;
1244 }
1245
1246 supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
1247
1248 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1249
1250 /*
1251 * Drop the reference the hash table had to the session. This shouldn't
1252 * be the last reference!
1253 */
1254 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1255 Assert(cRefs > 0 && cRefs < _1M);
1256 if (cRefs == 0)
1257 supdrvDestroySession(pDevExt, pSession);
1258
1259 return VINF_SUCCESS;
1260}
1261
1262
1263/**
1264 * Looks up the session for the current process in the global hash table or in
1265 * OS specific pointer.
1266 *
1267 * @returns Pointer to the session with a reference that the caller must
1268 * release. If no valid session was found, NULL is returned.
1269 *
1270 * @param pDevExt The device extension.
1271 * @param Process The process ID.
1272 * @param R0Process The ring-0 process handle.
1273 * @param ppOsSessionPtr The OS session pointer if available. If not NULL,
1274 * this is used instead of the hash table. For
1275 * additional safety it must then be equal to the
1276 * SUPDRVSESSION::ppOsSessionPtr member.
1277 * This can be NULL even if the OS has a session
1278 * pointer.
1279 */
1280PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
1281 PSUPDRVSESSION *ppOsSessionPtr)
1282{
1283 PSUPDRVSESSION pCur;
1284 unsigned iHash;
1285
1286 /*
1287 * Validate input.
1288 */
1289 AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
1290
1291 /*
1292 * Calculate the hash table index and acquire the spinlock.
1293 */
1294 iHash = SUPDRV_SESSION_HASH(Process);
1295
1296 RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
1297
1298 /*
1299 * If an OS session pointer is provided, always use it.
1300 */
1301 if (ppOsSessionPtr)
1302 {
1303 pCur = *ppOsSessionPtr;
1304 if ( pCur
1305 && ( pCur->ppOsSessionPtr != ppOsSessionPtr
1306 || pCur->Process != Process
1307 || pCur->R0Process != R0Process) )
1308 pCur = NULL;
1309 }
1310 else
1311 {
1312 /*
1313 * Otherwise, do the hash table lookup.
1314 */
1315 pCur = pDevExt->apSessionHashTab[iHash];
1316 while ( pCur
1317 && ( pCur->Process != Process
1318 || pCur->R0Process != R0Process) )
1319 pCur = pCur->pCommonNextHash;
1320 }
1321
1322 /*
1323 * Retain the session.
1324 */
1325 if (pCur)
1326 {
1327 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
1328 NOREF(cRefs);
1329 Assert(cRefs > 1 && cRefs < _1M);
1330 }
1331
1332 RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
1333
1334 return pCur;
1335}
1336
1337
1338/**
1339 * Retain a session to make sure it doesn't go away while it is in use.
1340 *
1341 * @returns New reference count on success, UINT32_MAX on failure.
1342 * @param pSession Session data.
1343 */
1344uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
1345{
1346 uint32_t cRefs;
1347 AssertPtrReturn(pSession, UINT32_MAX);
1348 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1349
1350 cRefs = ASMAtomicIncU32(&pSession->cRefs);
1351 AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1352 return cRefs;
1353}
1354
1355
1356/**
1357 * Releases a given session.
1358 *
1359 * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
1360 * @param pSession Session data.
1361 */
1362uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
1363{
1364 uint32_t cRefs;
1365 AssertPtrReturn(pSession, UINT32_MAX);
1366 AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
1367
1368 cRefs = ASMAtomicDecU32(&pSession->cRefs);
1369 AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
1370 if (cRefs == 0)
1371 supdrvDestroySession(pSession->pDevExt, pSession);
1372 return cRefs;
1373}
1374
1375
1376/**
1377 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1378 *
1379 * @returns IPRT status code, see SUPR0ObjAddRef.
1380 * @param hHandleTable The handle table handle. Ignored.
1381 * @param pvObj The object pointer.
1382 * @param pvCtx Context, the handle type. Ignored.
1383 * @param pvUser Session pointer.
1384 */
1385static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
1386{
1387 NOREF(pvCtx);
1388 NOREF(hHandleTable);
1389 return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
1390}
1391
1392
1393/**
1394 * RTHandleTableDestroy callback used by supdrvCleanupSession.
1395 *
1396 * @param hHandleTable The handle table handle. Ignored.
1397 * @param h The handle value. Ignored.
1398 * @param pvObj The object pointer.
1399 * @param pvCtx Context, the handle type. Ignored.
1400 * @param pvUser Session pointer.
1401 */
1402static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
1403{
1404 NOREF(pvCtx);
1405 NOREF(h);
1406 NOREF(hHandleTable);
1407 SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
1408}
1409
1410
1411/**
1412 * Fast path I/O Control worker.
1413 *
1414 * @returns VBox status code that should be passed down to ring-3 unchanged.
1415 * @param uIOCtl Function number.
1416 * @param idCpu VMCPU id.
1417 * @param pDevExt Device extention.
1418 * @param pSession Session data.
1419 */
1420int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
1421{
1422 /*
1423 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
1424 */
1425 if (RT_LIKELY( RT_VALID_PTR(pSession)
1426 && pSession->pVM
1427 && pDevExt->pfnVMMR0EntryFast))
1428 {
1429 switch (uIOCtl)
1430 {
1431 case SUP_IOCTL_FAST_DO_RAW_RUN:
1432 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
1433 break;
1434 case SUP_IOCTL_FAST_DO_HM_RUN:
1435 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
1436 break;
1437 case SUP_IOCTL_FAST_DO_NOP:
1438 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
1439 break;
1440 default:
1441 return VERR_INTERNAL_ERROR;
1442 }
1443 return VINF_SUCCESS;
1444 }
1445 return VERR_INTERNAL_ERROR;
1446}
1447
1448
1449/**
1450 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
1451 * We would use strpbrk here if this function would be contained in the RedHat kABI white
1452 * list, see http://www.kerneldrivers.org/RHEL5.
1453 *
1454 * @returns 1 if pszStr does contain any character of pszChars, 0 otherwise.
1455 * @param pszStr String to check
1456 * @param pszChars Character set
1457 */
1458static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
1459{
1460 int chCur;
1461 while ((chCur = *pszStr++) != '\0')
1462 {
1463 int ch;
1464 const char *psz = pszChars;
1465 while ((ch = *psz++) != '\0')
1466 if (ch == chCur)
1467 return 1;
1468
1469 }
1470 return 0;
1471}
1472
1473
1474
1475/**
1476 * I/O Control inner worker (tracing reasons).
1477 *
1478 * @returns IPRT status code.
1479 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1480 *
1481 * @param uIOCtl Function number.
1482 * @param pDevExt Device extention.
1483 * @param pSession Session data.
1484 * @param pReqHdr The request header.
1485 */
1486static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
1487{
1488 /*
1489 * Validation macros
1490 */
1491#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
1492 do { \
1493 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
1494 { \
1495 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
1496 (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1497 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1498 } \
1499 } while (0)
1500
1501#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
1502
1503#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
1504 do { \
1505 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
1506 { \
1507 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
1508 (long)pReqHdr->cbIn, (long)(cbInExpect))); \
1509 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1510 } \
1511 } while (0)
1512
1513#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
1514 do { \
1515 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
1516 { \
1517 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
1518 (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
1519 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1520 } \
1521 } while (0)
1522
1523#define REQ_CHECK_EXPR(Name, expr) \
1524 do { \
1525 if (RT_UNLIKELY(!(expr))) \
1526 { \
1527 OSDBGPRINT(( #Name ": %s\n", #expr)); \
1528 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1529 } \
1530 } while (0)
1531
1532#define REQ_CHECK_EXPR_FMT(expr, fmt) \
1533 do { \
1534 if (RT_UNLIKELY(!(expr))) \
1535 { \
1536 OSDBGPRINT( fmt ); \
1537 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1538 } \
1539 } while (0)
1540
1541 /*
1542 * The switch.
1543 */
1544 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
1545 {
1546 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
1547 {
1548 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
1549 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
1550 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
1551 {
1552 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
1553 pReq->Hdr.rc = VERR_INVALID_MAGIC;
1554 return 0;
1555 }
1556
1557#if 0
1558 /*
1559 * Call out to the OS specific code and let it do permission checks on the
1560 * client process.
1561 */
1562 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
1563 {
1564 pReq->u.Out.u32Cookie = 0xffffffff;
1565 pReq->u.Out.u32SessionCookie = 0xffffffff;
1566 pReq->u.Out.u32SessionVersion = 0xffffffff;
1567 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1568 pReq->u.Out.pSession = NULL;
1569 pReq->u.Out.cFunctions = 0;
1570 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
1571 return 0;
1572 }
1573#endif
1574
1575 /*
1576 * Match the version.
1577 * The current logic is very simple, match the major interface version.
1578 */
1579 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
1580 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
1581 {
1582 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
1583 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
1584 pReq->u.Out.u32Cookie = 0xffffffff;
1585 pReq->u.Out.u32SessionCookie = 0xffffffff;
1586 pReq->u.Out.u32SessionVersion = 0xffffffff;
1587 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1588 pReq->u.Out.pSession = NULL;
1589 pReq->u.Out.cFunctions = 0;
1590 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
1591 return 0;
1592 }
1593
1594 /*
1595 * Fill in return data and be gone.
1596 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
1597 * u32SessionVersion <= u32ReqVersion!
1598 */
1599 /** @todo Somehow validate the client and negotiate a secure cookie... */
1600 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
1601 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
1602 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
1603 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
1604 pReq->u.Out.pSession = pSession;
1605 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
1606 pReq->Hdr.rc = VINF_SUCCESS;
1607 return 0;
1608 }
1609
1610 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
1611 {
1612 /* validate */
1613 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
1614 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
1615
1616 /* execute */
1617 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
1618 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
1619 pReq->Hdr.rc = VINF_SUCCESS;
1620 return 0;
1621 }
1622
1623 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
1624 {
1625 /* validate */
1626 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
1627 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
1628 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
1629 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
1630 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
1631
1632 /* execute */
1633 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
1634 if (RT_FAILURE(pReq->Hdr.rc))
1635 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1636 return 0;
1637 }
1638
1639 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
1640 {
1641 /* validate */
1642 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
1643 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
1644
1645 /* execute */
1646 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
1647 return 0;
1648 }
1649
1650 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
1651 {
1652 /* validate */
1653 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
1654 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
1655
1656 /* execute */
1657 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
1658 if (RT_FAILURE(pReq->Hdr.rc))
1659 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1660 return 0;
1661 }
1662
1663 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
1664 {
1665 /* validate */
1666 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
1667 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
1668
1669 /* execute */
1670 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1671 return 0;
1672 }
1673
1674 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
1675 {
1676 /* validate */
1677 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
1678 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
1679 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
1680 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
1681 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1682 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
1683 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
1684 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
1685 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1686 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
1687 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
1688
1689 /* execute */
1690 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
1691 return 0;
1692 }
1693
1694 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
1695 {
1696 /* validate */
1697 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
1698 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
1699 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
1700 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
1701 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
1702 || ( pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
1703 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
1704 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
1705 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
1706 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
1707 || ( pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
1708 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
1709 && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
1710 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
1711 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
1712
1713 if (pReq->u.In.cSymbols)
1714 {
1715 uint32_t i;
1716 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
1717 for (i = 0; i < pReq->u.In.cSymbols; i++)
1718 {
1719 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
1720 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
1721 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
1722 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1723 REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
1724 pReq->u.In.cbStrTab - paSyms[i].offName),
1725 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
1726 }
1727 }
1728
1729 /* execute */
1730 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
1731 return 0;
1732 }
1733
1734 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
1735 {
1736 /* validate */
1737 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
1738 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
1739
1740 /* execute */
1741 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
1742 return 0;
1743 }
1744
1745 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
1746 {
1747 /* validate */
1748 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1749 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1750 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
1751
1752 /* execute */
1753 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1754 return 0;
1755 }
1756
1757 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1758 {
1759 /* validate */
1760 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1761 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1762 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1763
1764 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1765 {
1766 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1767
1768 /* execute */
1769 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1770 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
1771 else
1772 pReq->Hdr.rc = VERR_WRONG_ORDER;
1773 }
1774 else
1775 {
1776 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1777 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1778 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1779 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1780 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1781
1782 /* execute */
1783 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1784 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1785 else
1786 pReq->Hdr.rc = VERR_WRONG_ORDER;
1787 }
1788
1789 if ( RT_FAILURE(pReq->Hdr.rc)
1790 && pReq->Hdr.rc != VERR_INTERRUPTED
1791 && pReq->Hdr.rc != VERR_TIMEOUT)
1792 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1793 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1794 else
1795 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1796 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1797 return 0;
1798 }
1799
1800 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
1801 {
1802 /* validate */
1803 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1804 PSUPVMMR0REQHDR pVMMReq;
1805 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1806 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1807
1808 pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1809 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
1810 ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
1811 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1812 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
1813
1814 /* execute */
1815 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1816 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
1817 else
1818 pReq->Hdr.rc = VERR_WRONG_ORDER;
1819
1820 if ( RT_FAILURE(pReq->Hdr.rc)
1821 && pReq->Hdr.rc != VERR_INTERRUPTED
1822 && pReq->Hdr.rc != VERR_TIMEOUT)
1823 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1824 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1825 else
1826 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1827 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1828 return 0;
1829 }
1830
1831 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1832 {
1833 /* validate */
1834 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1835 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1836
1837 /* execute */
1838 pReq->Hdr.rc = VINF_SUCCESS;
1839 pReq->u.Out.enmMode = SUPR0GetPagingMode();
1840 return 0;
1841 }
1842
1843 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1844 {
1845 /* validate */
1846 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1847 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1848 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1849
1850 /* execute */
1851 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1852 if (RT_FAILURE(pReq->Hdr.rc))
1853 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1854 return 0;
1855 }
1856
1857 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1858 {
1859 /* validate */
1860 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1861 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1862
1863 /* execute */
1864 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1865 return 0;
1866 }
1867
1868 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1869 {
1870 /* validate */
1871 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1872 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1873
1874 /* execute */
1875 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1876 if (RT_SUCCESS(pReq->Hdr.rc))
1877 pReq->u.Out.pGipR0 = pDevExt->pGip;
1878 return 0;
1879 }
1880
1881 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1882 {
1883 /* validate */
1884 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1885 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1886
1887 /* execute */
1888 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1889 return 0;
1890 }
1891
1892 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1893 {
1894 /* validate */
1895 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1896 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1897 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1898 || ( VALID_PTR(pReq->u.In.pVMR0)
1899 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1900 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1901 /* execute */
1902 pSession->pVM = pReq->u.In.pVMR0;
1903 pReq->Hdr.rc = VINF_SUCCESS;
1904 return 0;
1905 }
1906
1907 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
1908 {
1909 /* validate */
1910 PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
1911 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
1912 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
1913 REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
1914 ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
1915 REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
1916 ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
1917 REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
1918 ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
1919
1920 /* execute */
1921 pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
1922 pReq->u.In.fUserMapping ? &pReq->u.Out.pvR3 : NULL,
1923 pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
1924 &pReq->u.Out.aPages[0]);
1925 if (RT_FAILURE(pReq->Hdr.rc))
1926 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1927 return 0;
1928 }
1929
1930 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
1931 {
1932 /* validate */
1933 PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
1934 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
1935 REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
1936 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
1937 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1938 ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
1939
1940 /* execute */
1941 pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
1942 pReq->u.In.fFlags, &pReq->u.Out.pvR0);
1943 if (RT_FAILURE(pReq->Hdr.rc))
1944 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1945 return 0;
1946 }
1947
1948 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
1949 {
1950 /* validate */
1951 PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
1952 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
1953 REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
1954 ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
1955 REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
1956 REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
1957 ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
1958
1959 /* execute */
1960 pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
1961 return 0;
1962 }
1963
1964 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1965 {
1966 /* validate */
1967 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1968 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1969
1970 /* execute */
1971 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1972 return 0;
1973 }
1974
1975 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
1976 {
1977 /* validate */
1978 PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
1979 Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1980 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1981
1982 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
1983 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
1984 else
1985 {
1986 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
1987 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
1988 ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
1989 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
1990 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
1991 }
1992 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
1993
1994 /* execute */
1995 pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
1996 return 0;
1997 }
1998
1999 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
2000 {
2001 /* validate */
2002 PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
2003 size_t cbStrTab;
2004 REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
2005 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
2006 cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
2007 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups < cbStrTab);
2008 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags < cbStrTab);
2009 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
2010 REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
2011 ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
2012 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
2013 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
2014 REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat <= SUPLOGGERSETTINGS_WHAT_DESTROY);
2015
2016 /* execute */
2017 pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
2018 return 0;
2019 }
2020
2021 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
2022 {
2023 /* validate */
2024 PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
2025 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
2026 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
2027
2028 /* execute */
2029 switch (pReq->u.In.uType)
2030 {
2031 case SUP_SEM_TYPE_EVENT:
2032 {
2033 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2034 switch (pReq->u.In.uOp)
2035 {
2036 case SUPSEMOP2_WAIT_MS_REL:
2037 pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
2038 break;
2039 case SUPSEMOP2_WAIT_NS_ABS:
2040 pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
2041 break;
2042 case SUPSEMOP2_WAIT_NS_REL:
2043 pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
2044 break;
2045 case SUPSEMOP2_SIGNAL:
2046 pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
2047 break;
2048 case SUPSEMOP2_CLOSE:
2049 pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
2050 break;
2051 case SUPSEMOP2_RESET:
2052 default:
2053 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2054 break;
2055 }
2056 break;
2057 }
2058
2059 case SUP_SEM_TYPE_EVENT_MULTI:
2060 {
2061 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2062 switch (pReq->u.In.uOp)
2063 {
2064 case SUPSEMOP2_WAIT_MS_REL:
2065 pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
2066 break;
2067 case SUPSEMOP2_WAIT_NS_ABS:
2068 pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
2069 break;
2070 case SUPSEMOP2_WAIT_NS_REL:
2071 pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
2072 break;
2073 case SUPSEMOP2_SIGNAL:
2074 pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
2075 break;
2076 case SUPSEMOP2_CLOSE:
2077 pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
2078 break;
2079 case SUPSEMOP2_RESET:
2080 pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
2081 break;
2082 default:
2083 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2084 break;
2085 }
2086 break;
2087 }
2088
2089 default:
2090 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2091 break;
2092 }
2093 return 0;
2094 }
2095
2096 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
2097 {
2098 /* validate */
2099 PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
2100 REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
2101 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
2102
2103 /* execute */
2104 switch (pReq->u.In.uType)
2105 {
2106 case SUP_SEM_TYPE_EVENT:
2107 {
2108 SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
2109 switch (pReq->u.In.uOp)
2110 {
2111 case SUPSEMOP3_CREATE:
2112 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2113 pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
2114 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
2115 break;
2116 case SUPSEMOP3_GET_RESOLUTION:
2117 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
2118 pReq->Hdr.rc = VINF_SUCCESS;
2119 pReq->Hdr.cbOut = sizeof(*pReq);
2120 pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
2121 break;
2122 default:
2123 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2124 break;
2125 }
2126 break;
2127 }
2128
2129 case SUP_SEM_TYPE_EVENT_MULTI:
2130 {
2131 SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
2132 switch (pReq->u.In.uOp)
2133 {
2134 case SUPSEMOP3_CREATE:
2135 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2136 pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
2137 pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
2138 break;
2139 case SUPSEMOP3_GET_RESOLUTION:
2140 REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
2141 pReq->Hdr.rc = VINF_SUCCESS;
2142 pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
2143 break;
2144 default:
2145 pReq->Hdr.rc = VERR_INVALID_FUNCTION;
2146 break;
2147 }
2148 break;
2149 }
2150
2151 default:
2152 pReq->Hdr.rc = VERR_INVALID_PARAMETER;
2153 break;
2154 }
2155 return 0;
2156 }
2157
2158 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2159 {
2160 /* validate */
2161 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2162 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2163
2164 /* execute */
2165 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2166 if (RT_FAILURE(pReq->Hdr.rc))
2167 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2168 return 0;
2169 }
2170
2171 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
2172 {
2173 /* validate */
2174 PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
2175 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
2176
2177 /* execute */
2178 pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
2179 return 0;
2180 }
2181
2182 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
2183 {
2184 /* validate */
2185 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
2186
2187 /* execute */
2188 pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
2189 return 0;
2190 }
2191
2192 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
2193 {
2194 /* validate */
2195 PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
2196 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
2197
2198 /* execute */
2199 pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
2200 return 0;
2201 }
2202
2203 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
2204 {
2205 /* validate */
2206 PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
2207 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
2208 if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
2209 return VERR_INVALID_PARAMETER;
2210
2211 /* execute */
2212 pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
2213 pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
2214 pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
2215 pReq->u.In.szName, pReq->u.In.fFlags);
2216 return 0;
2217 }
2218
2219 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
2220 {
2221 /* validate */
2222 PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
2223 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
2224
2225 /* execute */
2226 pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
2227 return 0;
2228 }
2229
2230 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
2231 {
2232 /* validate */
2233 PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
2234 REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
2235
2236 supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
2237 pReqHdr->rc = VINF_SUCCESS;
2238 return 0;
2239 }
2240
2241 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
2242 {
2243 /* validate */
2244 PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
2245 REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
2246 REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
2247 pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
2248
2249 pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
2250 return 0;
2251 }
2252
2253 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
2254 {
2255 /* validate */
2256 REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
2257
2258 pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
2259 return 0;
2260 }
2261
2262 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
2263 {
2264 /* validate */
2265 PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
2266 REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
2267
2268 pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pReq);
2269 return 0;
2270 }
2271
2272 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
2273 {
2274 /* validate */
2275 PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
2276 REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
2277
2278 pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pReq);
2279 return 0;
2280 }
2281
2282 default:
2283 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2284 break;
2285 }
2286 return VERR_GENERAL_FAILURE;
2287}
2288
2289
2290/**
2291 * I/O Control inner worker for the restricted operations.
2292 *
2293 * @returns IPRT status code.
2294 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2295 *
2296 * @param uIOCtl Function number.
2297 * @param pDevExt Device extention.
2298 * @param pSession Session data.
2299 * @param pReqHdr The request header.
2300 */
2301static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
2302{
2303 /*
2304 * The switch.
2305 */
2306 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
2307 {
2308 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
2309 {
2310 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
2311 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
2312 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
2313 {
2314 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
2315 pReq->Hdr.rc = VERR_INVALID_MAGIC;
2316 return 0;
2317 }
2318
2319 /*
2320 * Match the version.
2321 * The current logic is very simple, match the major interface version.
2322 */
2323 if ( pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
2324 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
2325 {
2326 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2327 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
2328 pReq->u.Out.u32Cookie = 0xffffffff;
2329 pReq->u.Out.u32SessionCookie = 0xffffffff;
2330 pReq->u.Out.u32SessionVersion = 0xffffffff;
2331 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2332 pReq->u.Out.pSession = NULL;
2333 pReq->u.Out.cFunctions = 0;
2334 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2335 return 0;
2336 }
2337
2338 /*
2339 * Fill in return data and be gone.
2340 * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
2341 * u32SessionVersion <= u32ReqVersion!
2342 */
2343 /** @todo Somehow validate the client and negotiate a secure cookie... */
2344 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
2345 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
2346 pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
2347 pReq->u.Out.u32DriverVersion = SUPDRV_IOC_VERSION;
2348 pReq->u.Out.pSession = pSession;
2349 pReq->u.Out.cFunctions = 0;
2350 pReq->Hdr.rc = VINF_SUCCESS;
2351 return 0;
2352 }
2353
2354 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
2355 {
2356 /* validate */
2357 PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
2358 REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
2359
2360 /* execute */
2361 pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
2362 if (RT_FAILURE(pReq->Hdr.rc))
2363 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
2364 return 0;
2365 }
2366
2367 default:
2368 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
2369 break;
2370 }
2371 return VERR_GENERAL_FAILURE;
2372}
2373
2374
2375/**
2376 * I/O Control worker.
2377 *
2378 * @returns IPRT status code.
2379 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2380 *
2381 * @param uIOCtl Function number.
2382 * @param pDevExt Device extention.
2383 * @param pSession Session data.
2384 * @param pReqHdr The request header.
2385 */
2386int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
2387{
2388 int rc;
2389 VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
2390
2391 /*
2392 * Validate the request.
2393 */
2394 if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
2395 {
2396 OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
2397 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2398 return VERR_INVALID_PARAMETER;
2399 }
2400 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
2401 || pReqHdr->cbIn < sizeof(*pReqHdr)
2402 || pReqHdr->cbIn > cbReq
2403 || pReqHdr->cbOut < sizeof(*pReqHdr)
2404 || pReqHdr->cbOut > cbReq))
2405 {
2406 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
2407 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
2408 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2409 return VERR_INVALID_PARAMETER;
2410 }
2411 if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
2412 {
2413 OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
2414 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2415 return VERR_INVALID_PARAMETER;
2416 }
2417 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
2418 {
2419 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
2420 {
2421 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
2422 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2423 return VERR_INVALID_PARAMETER;
2424 }
2425 }
2426 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
2427 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
2428 {
2429 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
2430 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
2431 return VERR_INVALID_PARAMETER;
2432 }
2433
2434 /*
2435 * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
2436 */
2437 if (pSession->fUnrestricted)
2438 rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2439 else
2440 rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
2441
2442 VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
2443 return rc;
2444}
2445
2446
2447/**
2448 * Inter-Driver Communication (IDC) worker.
2449 *
2450 * @returns VBox status code.
2451 * @retval VINF_SUCCESS on success.
2452 * @retval VERR_INVALID_PARAMETER if the request is invalid.
2453 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
2454 *
2455 * @param uReq The request (function) code.
2456 * @param pDevExt Device extention.
2457 * @param pSession Session data.
2458 * @param pReqHdr The request header.
2459 */
2460int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
2461{
2462 /*
2463 * The OS specific code has already validated the pSession
2464 * pointer, and the request size being greater or equal to
2465 * size of the header.
2466 *
2467 * So, just check that pSession is a kernel context session.
2468 */
2469 if (RT_UNLIKELY( pSession
2470 && pSession->R0Process != NIL_RTR0PROCESS))
2471 return VERR_INVALID_PARAMETER;
2472
2473/*
2474 * Validation macro.
2475 */
2476#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
2477 do { \
2478 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
2479 { \
2480 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
2481 (long)pReqHdr->cb, (long)(cbExpect))); \
2482 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
2483 } \
2484 } while (0)
2485
2486 switch (uReq)
2487 {
2488 case SUPDRV_IDC_REQ_CONNECT:
2489 {
2490 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
2491 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
2492
2493 /*
2494 * Validate the cookie and other input.
2495 */
2496 if (pReq->Hdr.pSession != NULL)
2497 {
2498 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
2499 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2500 }
2501 if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
2502 {
2503 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
2504 (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
2505 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2506 }
2507 if ( pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
2508 || (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
2509 {
2510 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
2511 pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
2512 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2513 }
2514 if (pSession != NULL)
2515 {
2516 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
2517 return pReqHdr->rc = VERR_INVALID_PARAMETER;
2518 }
2519
2520 /*
2521 * Match the version.
2522 * The current logic is very simple, match the major interface version.
2523 */
2524 if ( pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
2525 || (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
2526 {
2527 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
2528 pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
2529 pReq->u.Out.pSession = NULL;
2530 pReq->u.Out.uSessionVersion = 0xffffffff;
2531 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2532 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2533 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
2534 return VINF_SUCCESS;
2535 }
2536
2537 pReq->u.Out.pSession = NULL;
2538 pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
2539 pReq->u.Out.uDriverVersion = SUPDRV_IDC_VERSION;
2540 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
2541
2542 pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
2543 if (RT_FAILURE(pReq->Hdr.rc))
2544 {
2545 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
2546 return VINF_SUCCESS;
2547 }
2548
2549 pReq->u.Out.pSession = pSession;
2550 pReq->Hdr.pSession = pSession;
2551
2552 return VINF_SUCCESS;
2553 }
2554
2555 case SUPDRV_IDC_REQ_DISCONNECT:
2556 {
2557 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
2558
2559 supdrvSessionRelease(pSession);
2560 return pReqHdr->rc = VINF_SUCCESS;
2561 }
2562
2563 case SUPDRV_IDC_REQ_GET_SYMBOL:
2564 {
2565 PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
2566 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
2567
2568 pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
2569 return VINF_SUCCESS;
2570 }
2571
2572 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
2573 {
2574 PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
2575 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
2576
2577 pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
2578 return VINF_SUCCESS;
2579 }
2580
2581 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
2582 {
2583 PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
2584 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
2585
2586 pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
2587 return VINF_SUCCESS;
2588 }
2589
2590 default:
2591 Log(("Unknown IDC %#lx\n", (long)uReq));
2592 break;
2593 }
2594
2595#undef REQ_CHECK_IDC_SIZE
2596 return VERR_NOT_SUPPORTED;
2597}
2598
2599
2600/**
2601 * Register a object for reference counting.
2602 * The object is registered with one reference in the specified session.
2603 *
2604 * @returns Unique identifier on success (pointer).
2605 * All future reference must use this identifier.
2606 * @returns NULL on failure.
2607 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
2608 * @param pvUser1 The first user argument.
2609 * @param pvUser2 The second user argument.
2610 */
2611SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
2612{
2613 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2614 PSUPDRVOBJ pObj;
2615 PSUPDRVUSAGE pUsage;
2616
2617 /*
2618 * Validate the input.
2619 */
2620 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
2621 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
2622 AssertPtrReturn(pfnDestructor, NULL);
2623
2624 /*
2625 * Allocate and initialize the object.
2626 */
2627 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
2628 if (!pObj)
2629 return NULL;
2630 pObj->u32Magic = SUPDRVOBJ_MAGIC;
2631 pObj->enmType = enmType;
2632 pObj->pNext = NULL;
2633 pObj->cUsage = 1;
2634 pObj->pfnDestructor = pfnDestructor;
2635 pObj->pvUser1 = pvUser1;
2636 pObj->pvUser2 = pvUser2;
2637 pObj->CreatorUid = pSession->Uid;
2638 pObj->CreatorGid = pSession->Gid;
2639 pObj->CreatorProcess= pSession->Process;
2640 supdrvOSObjInitCreator(pObj, pSession);
2641
2642 /*
2643 * Allocate the usage record.
2644 * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
2645 */
2646 RTSpinlockAcquire(pDevExt->Spinlock);
2647
2648 pUsage = pDevExt->pUsageFree;
2649 if (pUsage)
2650 pDevExt->pUsageFree = pUsage->pNext;
2651 else
2652 {
2653 RTSpinlockRelease(pDevExt->Spinlock);
2654 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
2655 if (!pUsage)
2656 {
2657 RTMemFree(pObj);
2658 return NULL;
2659 }
2660 RTSpinlockAcquire(pDevExt->Spinlock);
2661 }
2662
2663 /*
2664 * Insert the object and create the session usage record.
2665 */
2666 /* The object. */
2667 pObj->pNext = pDevExt->pObjs;
2668 pDevExt->pObjs = pObj;
2669
2670 /* The session record. */
2671 pUsage->cUsage = 1;
2672 pUsage->pObj = pObj;
2673 pUsage->pNext = pSession->pUsage;
2674 /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
2675 pSession->pUsage = pUsage;
2676
2677 RTSpinlockRelease(pDevExt->Spinlock);
2678
2679 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
2680 return pObj;
2681}
2682
2683
2684/**
2685 * Increment the reference counter for the object associating the reference
2686 * with the specified session.
2687 *
2688 * @returns IPRT status code.
2689 * @param pvObj The identifier returned by SUPR0ObjRegister().
2690 * @param pSession The session which is referencing the object.
2691 *
2692 * @remarks The caller should not own any spinlocks and must carefully protect
2693 * itself against potential race with the destructor so freed memory
2694 * isn't accessed here.
2695 */
2696SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
2697{
2698 return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
2699}
2700
2701
2702/**
2703 * Increment the reference counter for the object associating the reference
2704 * with the specified session.
2705 *
2706 * @returns IPRT status code.
2707 * @retval VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
2708 * couldn't be allocated. (If you see this you're not doing the right
2709 * thing and it won't ever work reliably.)
2710 *
2711 * @param pvObj The identifier returned by SUPR0ObjRegister().
2712 * @param pSession The session which is referencing the object.
2713 * @param fNoBlocking Set if it's not OK to block. Never try to make the
2714 * first reference to an object in a session with this
2715 * argument set.
2716 *
2717 * @remarks The caller should not own any spinlocks and must carefully protect
2718 * itself against potential race with the destructor so freed memory
2719 * isn't accessed here.
2720 */
2721SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
2722{
2723 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2724 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2725 int rc = VINF_SUCCESS;
2726 PSUPDRVUSAGE pUsagePre;
2727 PSUPDRVUSAGE pUsage;
2728
2729 /*
2730 * Validate the input.
2731 * Be ready for the destruction race (someone might be stuck in the
2732 * destructor waiting a lock we own).
2733 */
2734 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2735 AssertPtrReturn(pObj, VERR_INVALID_POINTER);
2736 AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
2737 ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
2738 VERR_INVALID_PARAMETER);
2739
2740 RTSpinlockAcquire(pDevExt->Spinlock);
2741
2742 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2743 {
2744 RTSpinlockRelease(pDevExt->Spinlock);
2745
2746 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2747 return VERR_WRONG_ORDER;
2748 }
2749
2750 /*
2751 * Preallocate the usage record if we can.
2752 */
2753 pUsagePre = pDevExt->pUsageFree;
2754 if (pUsagePre)
2755 pDevExt->pUsageFree = pUsagePre->pNext;
2756 else if (!fNoBlocking)
2757 {
2758 RTSpinlockRelease(pDevExt->Spinlock);
2759 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2760 if (!pUsagePre)
2761 return VERR_NO_MEMORY;
2762
2763 RTSpinlockAcquire(pDevExt->Spinlock);
2764 if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
2765 {
2766 RTSpinlockRelease(pDevExt->Spinlock);
2767
2768 AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
2769 return VERR_WRONG_ORDER;
2770 }
2771 }
2772
2773 /*
2774 * Reference the object.
2775 */
2776 pObj->cUsage++;
2777
2778 /*
2779 * Look for the session record.
2780 */
2781 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
2782 {
2783 /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2784 if (pUsage->pObj == pObj)
2785 break;
2786 }
2787 if (pUsage)
2788 pUsage->cUsage++;
2789 else if (pUsagePre)
2790 {
2791 /* create a new session record. */
2792 pUsagePre->cUsage = 1;
2793 pUsagePre->pObj = pObj;
2794 pUsagePre->pNext = pSession->pUsage;
2795 pSession->pUsage = pUsagePre;
2796 /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
2797
2798 pUsagePre = NULL;
2799 }
2800 else
2801 {
2802 pObj->cUsage--;
2803 rc = VERR_TRY_AGAIN;
2804 }
2805
2806 /*
2807 * Put any unused usage record into the free list..
2808 */
2809 if (pUsagePre)
2810 {
2811 pUsagePre->pNext = pDevExt->pUsageFree;
2812 pDevExt->pUsageFree = pUsagePre;
2813 }
2814
2815 RTSpinlockRelease(pDevExt->Spinlock);
2816
2817 return rc;
2818}
2819
2820
2821/**
2822 * Decrement / destroy a reference counter record for an object.
2823 *
2824 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
2825 *
2826 * @returns IPRT status code.
2827 * @retval VINF_SUCCESS if not destroyed.
2828 * @retval VINF_OBJECT_DESTROYED if it's destroyed by this release call.
2829 * @retval VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
2830 * string builds.
2831 *
2832 * @param pvObj The identifier returned by SUPR0ObjRegister().
2833 * @param pSession The session which is referencing the object.
2834 */
2835SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
2836{
2837 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2838 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2839 int rc = VERR_INVALID_PARAMETER;
2840 PSUPDRVUSAGE pUsage;
2841 PSUPDRVUSAGE pUsagePrev;
2842
2843 /*
2844 * Validate the input.
2845 */
2846 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2847 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2848 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2849 VERR_INVALID_PARAMETER);
2850
2851 /*
2852 * Acquire the spinlock and look for the usage record.
2853 */
2854 RTSpinlockAcquire(pDevExt->Spinlock);
2855
2856 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
2857 pUsage;
2858 pUsagePrev = pUsage, pUsage = pUsage->pNext)
2859 {
2860 /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
2861 if (pUsage->pObj == pObj)
2862 {
2863 rc = VINF_SUCCESS;
2864 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
2865 if (pUsage->cUsage > 1)
2866 {
2867 pObj->cUsage--;
2868 pUsage->cUsage--;
2869 }
2870 else
2871 {
2872 /*
2873 * Free the session record.
2874 */
2875 if (pUsagePrev)
2876 pUsagePrev->pNext = pUsage->pNext;
2877 else
2878 pSession->pUsage = pUsage->pNext;
2879 pUsage->pNext = pDevExt->pUsageFree;
2880 pDevExt->pUsageFree = pUsage;
2881
2882 /* What about the object? */
2883 if (pObj->cUsage > 1)
2884 pObj->cUsage--;
2885 else
2886 {
2887 /*
2888 * Object is to be destroyed, unlink it.
2889 */
2890 pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
2891 rc = VINF_OBJECT_DESTROYED;
2892 if (pDevExt->pObjs == pObj)
2893 pDevExt->pObjs = pObj->pNext;
2894 else
2895 {
2896 PSUPDRVOBJ pObjPrev;
2897 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
2898 if (pObjPrev->pNext == pObj)
2899 {
2900 pObjPrev->pNext = pObj->pNext;
2901 break;
2902 }
2903 Assert(pObjPrev);
2904 }
2905 }
2906 }
2907 break;
2908 }
2909 }
2910
2911 RTSpinlockRelease(pDevExt->Spinlock);
2912
2913 /*
2914 * Call the destructor and free the object if required.
2915 */
2916 if (rc == VINF_OBJECT_DESTROYED)
2917 {
2918 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
2919 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
2920 if (pObj->pfnDestructor)
2921 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
2922 RTMemFree(pObj);
2923 }
2924
2925 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
2926 return rc;
2927}
2928
2929
2930/**
2931 * Verifies that the current process can access the specified object.
2932 *
2933 * @returns The following IPRT status code:
2934 * @retval VINF_SUCCESS if access was granted.
2935 * @retval VERR_PERMISSION_DENIED if denied access.
2936 * @retval VERR_INVALID_PARAMETER if invalid parameter.
2937 *
2938 * @param pvObj The identifier returned by SUPR0ObjRegister().
2939 * @param pSession The session which wishes to access the object.
2940 * @param pszObjName Object string name. This is optional and depends on the object type.
2941 *
2942 * @remark The caller is responsible for making sure the object isn't removed while
2943 * we're inside this function. If uncertain about this, just call AddRef before calling us.
2944 */
2945SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
2946{
2947 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
2948 int rc;
2949
2950 /*
2951 * Validate the input.
2952 */
2953 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2954 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
2955 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
2956 VERR_INVALID_PARAMETER);
2957
2958 /*
2959 * Check access. (returns true if a decision has been made.)
2960 */
2961 rc = VERR_INTERNAL_ERROR;
2962 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
2963 return rc;
2964
2965 /*
2966 * Default policy is to allow the user to access his own
2967 * stuff but nothing else.
2968 */
2969 if (pObj->CreatorUid == pSession->Uid)
2970 return VINF_SUCCESS;
2971 return VERR_PERMISSION_DENIED;
2972}
2973
2974
2975/**
2976 * Lock pages.
2977 *
2978 * @returns IPRT status code.
2979 * @param pSession Session to which the locked memory should be associated.
2980 * @param pvR3 Start of the memory range to lock.
2981 * This must be page aligned.
2982 * @param cPages Number of pages to lock.
2983 * @param paPages Where to put the physical addresses of locked memory.
2984 */
2985SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2986{
2987 int rc;
2988 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
2989 const size_t cb = (size_t)cPages << PAGE_SHIFT;
2990 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
2991
2992 /*
2993 * Verify input.
2994 */
2995 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2996 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
2997 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
2998 || !pvR3)
2999 {
3000 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
3001 return VERR_INVALID_PARAMETER;
3002 }
3003
3004 /*
3005 * Let IPRT do the job.
3006 */
3007 Mem.eType = MEMREF_TYPE_LOCKED;
3008 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
3009 if (RT_SUCCESS(rc))
3010 {
3011 uint32_t iPage = cPages;
3012 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
3013 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
3014
3015 while (iPage-- > 0)
3016 {
3017 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3018 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
3019 {
3020 AssertMsgFailed(("iPage=%d\n", iPage));
3021 rc = VERR_INTERNAL_ERROR;
3022 break;
3023 }
3024 }
3025 if (RT_SUCCESS(rc))
3026 rc = supdrvMemAdd(&Mem, pSession);
3027 if (RT_FAILURE(rc))
3028 {
3029 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
3030 AssertRC(rc2);
3031 }
3032 }
3033
3034 return rc;
3035}
3036
3037
3038/**
3039 * Unlocks the memory pointed to by pv.
3040 *
3041 * @returns IPRT status code.
3042 * @param pSession Session to which the memory was locked.
3043 * @param pvR3 Memory to unlock.
3044 */
3045SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3046{
3047 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3048 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3049 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
3050}
3051
3052
3053/**
3054 * Allocates a chunk of page aligned memory with contiguous and fixed physical
3055 * backing.
3056 *
3057 * @returns IPRT status code.
3058 * @param pSession Session data.
3059 * @param cPages Number of pages to allocate.
3060 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
3061 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
3062 * @param pHCPhys Where to put the physical address of allocated memory.
3063 */
3064SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
3065{
3066 int rc;
3067 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3068 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
3069
3070 /*
3071 * Validate input.
3072 */
3073 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3074 if (!ppvR3 || !ppvR0 || !pHCPhys)
3075 {
3076 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
3077 pSession, ppvR0, ppvR3, pHCPhys));
3078 return VERR_INVALID_PARAMETER;
3079
3080 }
3081 if (cPages < 1 || cPages >= 256)
3082 {
3083 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3084 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3085 }
3086
3087 /*
3088 * Let IPRT do the job.
3089 */
3090 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
3091 if (RT_SUCCESS(rc))
3092 {
3093 int rc2;
3094 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3095 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3096 if (RT_SUCCESS(rc))
3097 {
3098 Mem.eType = MEMREF_TYPE_CONT;
3099 rc = supdrvMemAdd(&Mem, pSession);
3100 if (!rc)
3101 {
3102 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3103 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3104 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
3105 return 0;
3106 }
3107
3108 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3109 AssertRC(rc2);
3110 }
3111 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3112 AssertRC(rc2);
3113 }
3114
3115 return rc;
3116}
3117
3118
3119/**
3120 * Frees memory allocated using SUPR0ContAlloc().
3121 *
3122 * @returns IPRT status code.
3123 * @param pSession The session to which the memory was allocated.
3124 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3125 */
3126SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3127{
3128 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3129 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3130 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
3131}
3132
3133
3134/**
3135 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
3136 *
3137 * The memory isn't zeroed.
3138 *
3139 * @returns IPRT status code.
3140 * @param pSession Session data.
3141 * @param cPages Number of pages to allocate.
3142 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
3143 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
3144 * @param paPages Where to put the physical addresses of allocated memory.
3145 */
3146SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
3147{
3148 unsigned iPage;
3149 int rc;
3150 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3151 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
3152
3153 /*
3154 * Validate input.
3155 */
3156 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3157 if (!ppvR3 || !ppvR0 || !paPages)
3158 {
3159 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
3160 pSession, ppvR3, ppvR0, paPages));
3161 return VERR_INVALID_PARAMETER;
3162
3163 }
3164 if (cPages < 1 || cPages >= 256)
3165 {
3166 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
3167 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3168 }
3169
3170 /*
3171 * Let IPRT do the work.
3172 */
3173 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
3174 if (RT_SUCCESS(rc))
3175 {
3176 int rc2;
3177 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3178 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3179 if (RT_SUCCESS(rc))
3180 {
3181 Mem.eType = MEMREF_TYPE_LOW;
3182 rc = supdrvMemAdd(&Mem, pSession);
3183 if (!rc)
3184 {
3185 for (iPage = 0; iPage < cPages; iPage++)
3186 {
3187 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
3188 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
3189 }
3190 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3191 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3192 return 0;
3193 }
3194
3195 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3196 AssertRC(rc2);
3197 }
3198
3199 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3200 AssertRC(rc2);
3201 }
3202
3203 return rc;
3204}
3205
3206
3207/**
3208 * Frees memory allocated using SUPR0LowAlloc().
3209 *
3210 * @returns IPRT status code.
3211 * @param pSession The session to which the memory was allocated.
3212 * @param uPtr Pointer to the memory (ring-3 or ring-0).
3213 */
3214SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3215{
3216 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3217 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3218 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
3219}
3220
3221
3222
3223/**
3224 * Allocates a chunk of memory with both R0 and R3 mappings.
3225 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
3226 *
3227 * @returns IPRT status code.
3228 * @param pSession The session to associated the allocation with.
3229 * @param cb Number of bytes to allocate.
3230 * @param ppvR0 Where to store the address of the Ring-0 mapping.
3231 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3232 */
3233SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
3234{
3235 int rc;
3236 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3237 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
3238
3239 /*
3240 * Validate input.
3241 */
3242 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3243 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
3244 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
3245 if (cb < 1 || cb >= _4M)
3246 {
3247 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
3248 return VERR_INVALID_PARAMETER;
3249 }
3250
3251 /*
3252 * Let IPRT do the work.
3253 */
3254 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
3255 if (RT_SUCCESS(rc))
3256 {
3257 int rc2;
3258 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3259 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3260 if (RT_SUCCESS(rc))
3261 {
3262 Mem.eType = MEMREF_TYPE_MEM;
3263 rc = supdrvMemAdd(&Mem, pSession);
3264 if (!rc)
3265 {
3266 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3267 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3268 return VINF_SUCCESS;
3269 }
3270
3271 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3272 AssertRC(rc2);
3273 }
3274
3275 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3276 AssertRC(rc2);
3277 }
3278
3279 return rc;
3280}
3281
3282
3283/**
3284 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
3285 *
3286 * @returns IPRT status code.
3287 * @param pSession The session to which the memory was allocated.
3288 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3289 * @param paPages Where to store the physical addresses.
3290 */
3291SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
3292{
3293 PSUPDRVBUNDLE pBundle;
3294 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
3295
3296 /*
3297 * Validate input.
3298 */
3299 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3300 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
3301 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
3302
3303 /*
3304 * Search for the address.
3305 */
3306 RTSpinlockAcquire(pSession->Spinlock);
3307 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3308 {
3309 if (pBundle->cUsed > 0)
3310 {
3311 unsigned i;
3312 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3313 {
3314 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
3315 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3316 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
3317 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3318 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
3319 )
3320 )
3321 {
3322 const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
3323 size_t iPage;
3324 for (iPage = 0; iPage < cPages; iPage++)
3325 {
3326 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
3327 paPages[iPage].uReserved = 0;
3328 }
3329 RTSpinlockRelease(pSession->Spinlock);
3330 return VINF_SUCCESS;
3331 }
3332 }
3333 }
3334 }
3335 RTSpinlockRelease(pSession->Spinlock);
3336 Log(("Failed to find %p!!!\n", (void *)uPtr));
3337 return VERR_INVALID_PARAMETER;
3338}
3339
3340
3341/**
3342 * Free memory allocated by SUPR0MemAlloc().
3343 *
3344 * @returns IPRT status code.
3345 * @param pSession The session owning the allocation.
3346 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
3347 */
3348SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
3349{
3350 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
3351 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3352 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
3353}
3354
3355
3356/**
3357 * Allocates a chunk of memory with a kernel or/and a user mode mapping.
3358 *
3359 * The memory is fixed and it's possible to query the physical addresses using
3360 * SUPR0MemGetPhys().
3361 *
3362 * @returns IPRT status code.
3363 * @param pSession The session to associated the allocation with.
3364 * @param cPages The number of pages to allocate.
3365 * @param fFlags Flags, reserved for the future. Must be zero.
3366 * @param ppvR3 Where to store the address of the Ring-3 mapping.
3367 * NULL if no ring-3 mapping.
3368 * @param ppvR3 Where to store the address of the Ring-0 mapping.
3369 * NULL if no ring-0 mapping.
3370 * @param paPages Where to store the addresses of the pages. Optional.
3371 */
3372SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
3373{
3374 int rc;
3375 SUPDRVMEMREF Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
3376 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
3377
3378 /*
3379 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3380 */
3381 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3382 AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
3383 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3384 AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
3385 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3386 if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
3387 {
3388 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
3389 return VERR_PAGE_COUNT_OUT_OF_RANGE;
3390 }
3391
3392 /*
3393 * Let IPRT do the work.
3394 */
3395 if (ppvR0)
3396 rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
3397 else
3398 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
3399 if (RT_SUCCESS(rc))
3400 {
3401 int rc2;
3402 if (ppvR3)
3403 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
3404 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
3405 else
3406 Mem.MapObjR3 = NIL_RTR0MEMOBJ;
3407 if (RT_SUCCESS(rc))
3408 {
3409 Mem.eType = MEMREF_TYPE_PAGE;
3410 rc = supdrvMemAdd(&Mem, pSession);
3411 if (!rc)
3412 {
3413 if (ppvR3)
3414 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
3415 if (ppvR0)
3416 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
3417 if (paPages)
3418 {
3419 uint32_t iPage = cPages;
3420 while (iPage-- > 0)
3421 {
3422 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
3423 Assert(paPages[iPage] != NIL_RTHCPHYS);
3424 }
3425 }
3426 return VINF_SUCCESS;
3427 }
3428
3429 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
3430 AssertRC(rc2);
3431 }
3432
3433 rc2 = RTR0MemObjFree(Mem.MemObj, false);
3434 AssertRC(rc2);
3435 }
3436 return rc;
3437}
3438
3439
3440/**
3441 * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
3442 * space.
3443 *
3444 * @returns IPRT status code.
3445 * @param pSession The session to associated the allocation with.
3446 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3447 * @param offSub Where to start mapping. Must be page aligned.
3448 * @param cbSub How much to map. Must be page aligned.
3449 * @param fFlags Flags, MBZ.
3450 * @param ppvR0 Where to return the address of the ring-0 mapping on
3451 * success.
3452 */
3453SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
3454 uint32_t fFlags, PRTR0PTR ppvR0)
3455{
3456 int rc;
3457 PSUPDRVBUNDLE pBundle;
3458 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
3459 LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
3460
3461 /*
3462 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3463 */
3464 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3465 AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
3466 AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
3467 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3468 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3469 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3470
3471 /*
3472 * Find the memory object.
3473 */
3474 RTSpinlockAcquire(pSession->Spinlock);
3475 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3476 {
3477 if (pBundle->cUsed > 0)
3478 {
3479 unsigned i;
3480 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3481 {
3482 if ( ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3483 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3484 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3485 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
3486 || ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
3487 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3488 && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
3489 && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
3490 {
3491 hMemObj = pBundle->aMem[i].MemObj;
3492 break;
3493 }
3494 }
3495 }
3496 }
3497 RTSpinlockRelease(pSession->Spinlock);
3498
3499 rc = VERR_INVALID_PARAMETER;
3500 if (hMemObj != NIL_RTR0MEMOBJ)
3501 {
3502 /*
3503 * Do some further input validations before calling IPRT.
3504 * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
3505 */
3506 size_t cbMemObj = RTR0MemObjSize(hMemObj);
3507 if ( offSub < cbMemObj
3508 && cbSub <= cbMemObj
3509 && offSub + cbSub <= cbMemObj)
3510 {
3511 RTR0MEMOBJ hMapObj;
3512 rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
3513 RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
3514 if (RT_SUCCESS(rc))
3515 *ppvR0 = RTR0MemObjAddress(hMapObj);
3516 }
3517 else
3518 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3519
3520 }
3521 return rc;
3522}
3523
3524
3525/**
3526 * Changes the page level protection of one or more pages previously allocated
3527 * by SUPR0PageAllocEx.
3528 *
3529 * @returns IPRT status code.
3530 * @param pSession The session to associated the allocation with.
3531 * @param pvR3 The ring-3 address returned by SUPR0PageAllocEx.
3532 * NIL_RTR3PTR if the ring-3 mapping should be unaffected.
3533 * @param pvR0 The ring-0 address returned by SUPR0PageAllocEx.
3534 * NIL_RTR0PTR if the ring-0 mapping should be unaffected.
3535 * @param offSub Where to start changing. Must be page aligned.
3536 * @param cbSub How much to change. Must be page aligned.
3537 * @param fProt The new page level protection, see RTMEM_PROT_*.
3538 */
3539SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
3540{
3541 int rc;
3542 PSUPDRVBUNDLE pBundle;
3543 RTR0MEMOBJ hMemObjR0 = NIL_RTR0MEMOBJ;
3544 RTR0MEMOBJ hMemObjR3 = NIL_RTR0MEMOBJ;
3545 LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
3546
3547 /*
3548 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
3549 */
3550 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3551 AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
3552 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3553 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
3554 AssertReturn(cbSub, VERR_INVALID_PARAMETER);
3555
3556 /*
3557 * Find the memory object.
3558 */
3559 RTSpinlockAcquire(pSession->Spinlock);
3560 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
3561 {
3562 if (pBundle->cUsed > 0)
3563 {
3564 unsigned i;
3565 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
3566 {
3567 if ( pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
3568 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
3569 && ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
3570 || pvR3 == NIL_RTR3PTR)
3571 && ( pvR0 == NIL_RTR0PTR
3572 || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
3573 && ( pvR3 == NIL_RTR3PTR
3574 || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
3575 {
3576 if (pvR0 != NIL_RTR0PTR)
3577 hMemObjR0 = pBundle->aMem[i].MemObj;
3578 if (pvR3 != NIL_RTR3PTR)
3579 hMemObjR3 = pBundle->aMem[i].MapObjR3;
3580 break;
3581 }
3582 }
3583 }
3584 }
3585 RTSpinlockRelease(pSession->Spinlock);
3586
3587 rc = VERR_INVALID_PARAMETER;
3588 if ( hMemObjR0 != NIL_RTR0MEMOBJ
3589 || hMemObjR3 != NIL_RTR0MEMOBJ)
3590 {
3591 /*
3592 * Do some further input validations before calling IPRT.
3593 */
3594 size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
3595 if ( offSub < cbMemObj
3596 && cbSub <= cbMemObj
3597 && offSub + cbSub <= cbMemObj)
3598 {
3599 rc = VINF_SUCCESS;
3600 if (hMemObjR3 != NIL_RTR0PTR)
3601 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
3602 if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
3603 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
3604 }
3605 else
3606 SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
3607
3608 }
3609 return rc;
3610
3611}
3612
3613
3614/**
3615 * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
3616 *
3617 * @returns IPRT status code.
3618 * @param pSession The session owning the allocation.
3619 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc() or
3620 * SUPR0PageAllocEx().
3621 */
3622SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
3623{
3624 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
3625 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3626 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
3627}
3628
3629
3630/**
3631 * Gets the paging mode of the current CPU.
3632 *
3633 * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
3634 */
3635SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
3636{
3637 SUPPAGINGMODE enmMode;
3638
3639 RTR0UINTREG cr0 = ASMGetCR0();
3640 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3641 enmMode = SUPPAGINGMODE_INVALID;
3642 else
3643 {
3644 RTR0UINTREG cr4 = ASMGetCR4();
3645 uint32_t fNXEPlusLMA = 0;
3646 if (cr4 & X86_CR4_PAE)
3647 {
3648 uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
3649 if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
3650 {
3651 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3652 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3653 fNXEPlusLMA |= RT_BIT(0);
3654 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3655 fNXEPlusLMA |= RT_BIT(1);
3656 }
3657 }
3658
3659 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3660 {
3661 case 0:
3662 enmMode = SUPPAGINGMODE_32_BIT;
3663 break;
3664
3665 case X86_CR4_PGE:
3666 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3667 break;
3668
3669 case X86_CR4_PAE:
3670 enmMode = SUPPAGINGMODE_PAE;
3671 break;
3672
3673 case X86_CR4_PAE | RT_BIT(0):
3674 enmMode = SUPPAGINGMODE_PAE_NX;
3675 break;
3676
3677 case X86_CR4_PAE | X86_CR4_PGE:
3678 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3679 break;
3680
3681 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3682 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3683 break;
3684
3685 case RT_BIT(1) | X86_CR4_PAE:
3686 enmMode = SUPPAGINGMODE_AMD64;
3687 break;
3688
3689 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3690 enmMode = SUPPAGINGMODE_AMD64_NX;
3691 break;
3692
3693 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3694 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3695 break;
3696
3697 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3698 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3699 break;
3700
3701 default:
3702 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3703 enmMode = SUPPAGINGMODE_INVALID;
3704 break;
3705 }
3706 }
3707 return enmMode;
3708}
3709
3710
3711/**
3712 * Enables or disabled hardware virtualization extensions using native OS APIs.
3713 *
3714 * @returns VBox status code.
3715 * @retval VINF_SUCCESS on success.
3716 * @retval VERR_NOT_SUPPORTED if not supported by the native OS.
3717 *
3718 * @param fEnable Whether to enable or disable.
3719 */
3720SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
3721{
3722#ifdef RT_OS_DARWIN
3723 return supdrvOSEnableVTx(fEnable);
3724#else
3725 return VERR_NOT_SUPPORTED;
3726#endif
3727}
3728
3729
3730/**
3731 * Suspends hardware virtualization extensions using the native OS API.
3732 *
3733 * This is called prior to entering raw-mode context.
3734 *
3735 * @returns @c true if suspended, @c false if not.
3736 */
3737SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
3738{
3739#ifdef RT_OS_DARWIN
3740 return supdrvOSSuspendVTxOnCpu();
3741#else
3742 return false;
3743#endif
3744}
3745
3746
3747/**
3748 * Resumes hardware virtualization extensions using the native OS API.
3749 *
3750 * This is called after to entering raw-mode context.
3751 *
3752 * @param fSuspended The return value of SUPR0SuspendVTxOnCpu.
3753 */
3754SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
3755{
3756#ifdef RT_OS_DARWIN
3757 supdrvOSResumeVTxOnCpu(fSuspended);
3758#else
3759 Assert(!fSuspended);
3760#endif
3761}
3762
3763
3764/**
3765 * Queries the AMD-V and VT-x capabilities of the calling CPU.
3766 *
3767 * @returns VBox status code.
3768 * @retval VERR_VMX_NO_VMX
3769 * @retval VERR_VMX_MSR_ALL_VMXON_DISABLED
3770 * @retval VERR_VMX_MSR_VMXON_DISABLED
3771 * @retval VERR_VMX_MSR_LOCKING_FAILED
3772 * @retval VERR_SVM_NO_SVM
3773 * @retval VERR_SVM_DISABLED
3774 * @retval VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
3775 * (centaur) CPU.
3776 *
3777 * @param pSession The session handle.
3778 * @param pfCaps Where to store the capabilities.
3779 */
3780SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
3781{
3782 int rc = VERR_UNSUPPORTED_CPU;
3783 bool fIsSmxModeAmbiguous = false;
3784 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
3785
3786 /*
3787 * Input validation.
3788 */
3789 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3790 AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
3791
3792 *pfCaps = 0;
3793 /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
3794 RTThreadPreemptDisable(&PreemptState);
3795 if (ASMHasCpuId())
3796 {
3797 uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
3798 uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
3799
3800 ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
3801 ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
3802
3803 if ( ASMIsValidStdRange(uMaxId)
3804 && ( ASMIsIntelCpuEx( uVendorEBX, uVendorECX, uVendorEDX)
3805 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
3806 )
3807 {
3808 if ( (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
3809 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3810 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3811 )
3812 {
3813 /** @todo Unify code with hmR0InitIntelCpu(). */
3814 uint64_t u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3815 bool const fMaybeSmxMode = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
3816 bool fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3817 bool fSmxVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3818 bool fVmxAllowed = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3819
3820 /* Check if the LOCK bit is set but excludes the required VMXON bit. */
3821 if (fMsrLocked)
3822 {
3823 if (fVmxAllowed && fSmxVmxAllowed)
3824 rc = VINF_SUCCESS;
3825 else if (!fVmxAllowed && !fSmxVmxAllowed)
3826 rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
3827 else if (!fMaybeSmxMode)
3828 {
3829 if (fVmxAllowed)
3830 rc = VINF_SUCCESS;
3831 else
3832 rc = VERR_VMX_MSR_VMXON_DISABLED;
3833 }
3834 else
3835 {
3836 /*
3837 * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
3838 * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
3839 * See @bugref{6873}.
3840 */
3841 Assert(fMaybeSmxMode == true);
3842 fIsSmxModeAmbiguous = true;
3843 rc = VINF_SUCCESS;
3844 }
3845 }
3846 else
3847 {
3848 /*
3849 * MSR is not yet locked; we can change it ourselves here.
3850 * Once the lock bit is set, this MSR can no longer be modified.
3851 *
3852 * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
3853 * accurately. See @bugref{6873}.
3854 */
3855 u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
3856 | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
3857 | MSR_IA32_FEATURE_CONTROL_VMXON;
3858 ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
3859
3860 /* Verify. */
3861 u64FeatMsr = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
3862 fMsrLocked = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
3863 fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
3864 fVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
3865 if (fSmxVmxAllowed && fVmxAllowed)
3866 rc = VINF_SUCCESS;
3867 else
3868 rc = VERR_VMX_MSR_LOCKING_FAILED;
3869 }
3870
3871 if (rc == VINF_SUCCESS)
3872 {
3873 VMXCAPABILITY vtCaps;
3874
3875 *pfCaps |= SUPVTCAPS_VT_X;
3876
3877 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
3878 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
3879 {
3880 vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
3881 if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
3882 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3883 }
3884 }
3885 }
3886 else
3887 rc = VERR_VMX_NO_VMX;
3888 }
3889 else if ( ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
3890 && ASMIsValidStdRange(uMaxId))
3891 {
3892 uint32_t fExtFeaturesEcx, uExtMaxId;
3893 ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
3894 ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
3895 if ( ASMIsValidExtRange(uExtMaxId)
3896 && uExtMaxId >= 0x8000000a
3897 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
3898 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
3899 && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
3900 )
3901 {
3902 /* Check if SVM is disabled */
3903 uint64_t u64FeatMsr = ASMRdMsr(MSR_K8_VM_CR);
3904 if (!(u64FeatMsr & MSR_K8_VM_CR_SVM_DISABLE))
3905 {
3906 uint32_t fSvmFeatures;
3907 *pfCaps |= SUPVTCAPS_AMD_V;
3908
3909 /* Query AMD-V features. */
3910 ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
3911 if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
3912 *pfCaps |= SUPVTCAPS_NESTED_PAGING;
3913
3914 rc = VINF_SUCCESS;
3915 }
3916 else
3917 rc = VERR_SVM_DISABLED;
3918 }
3919 else
3920 rc = VERR_SVM_NO_SVM;
3921 }
3922 }
3923
3924 RTThreadPreemptRestore(&PreemptState);
3925 if (fIsSmxModeAmbiguous)
3926 SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
3927 return rc;
3928}
3929
3930
3931/**
3932 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
3933 * updating.
3934 *
3935 * @param pGip Pointer to the GIP.
3936 * @param pGipCpu The per CPU structure for this CPU.
3937 * @param u64NanoTS The current time.
3938 */
3939static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
3940{
3941 pGipCpu->u64TSC = SUPReadTsc() - pGipCpu->u32UpdateIntervalTSC;
3942 pGipCpu->u64NanoTS = u64NanoTS;
3943}
3944
3945
3946/**
3947 * Set the current TSC and NanoTS value for the CPU.
3948 *
3949 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
3950 * @param pvUser1 Pointer to the ring-0 GIP mapping.
3951 * @param pvUser2 Pointer to the variable holding the current time.
3952 */
3953static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3954{
3955 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
3956 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
3957
3958 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
3959 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
3960
3961 NOREF(pvUser2);
3962 NOREF(idCpu);
3963}
3964
3965
3966/**
3967 * Maps the GIP into userspace and/or get the physical address of the GIP.
3968 *
3969 * @returns IPRT status code.
3970 * @param pSession Session to which the GIP mapping should belong.
3971 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
3972 * @param pHCPhysGip Where to store the physical address. (optional)
3973 *
3974 * @remark There is no reference counting on the mapping, so one call to this function
3975 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
3976 * and remove the session as a GIP user.
3977 */
3978SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
3979{
3980 int rc;
3981 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
3982 RTR3PTR pGipR3 = NIL_RTR3PTR;
3983 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3984 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
3985
3986 /*
3987 * Validate
3988 */
3989 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3990 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
3991 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
3992
3993#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3994 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
3995#else
3996 RTSemFastMutexRequest(pDevExt->mtxGip);
3997#endif
3998 if (pDevExt->pGip)
3999 {
4000 /*
4001 * Map it?
4002 */
4003 rc = VINF_SUCCESS;
4004 if (ppGipR3)
4005 {
4006 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4007 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
4008 RTMEM_PROT_READ, RTR0ProcHandleSelf());
4009 if (RT_SUCCESS(rc))
4010 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
4011 }
4012
4013 /*
4014 * Get physical address.
4015 */
4016 if (pHCPhysGip && RT_SUCCESS(rc))
4017 HCPhys = pDevExt->HCPhysGip;
4018
4019 /*
4020 * Reference globally.
4021 */
4022 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
4023 {
4024 pSession->fGipReferenced = 1;
4025 pDevExt->cGipUsers++;
4026 if (pDevExt->cGipUsers == 1)
4027 {
4028 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
4029 uint64_t u64NanoTS;
4030 uint32_t u32SystemResolution;
4031 unsigned i;
4032
4033 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
4034
4035 /*
4036 * Try bump up the system timer resolution.
4037 * The more interrupts the better...
4038 */
4039 /** @todo On Windows, RTTimerRequestSystemGranularity() always succeeds, so
4040 * whats the point of the remaining calls? */
4041 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
4042 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
4043 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
4044 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
4045 )
4046 {
4047 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
4048 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
4049 }
4050
4051 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
4052 {
4053 for (i = 0; i < pGipR0->cCpus; i++)
4054 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
4055 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
4056 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
4057 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
4058 }
4059
4060 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
4061 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
4062 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
4063 || RTMpGetOnlineCount() == 1)
4064 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
4065 else
4066 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
4067
4068#ifndef DO_NOT_START_GIP
4069 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
4070#endif
4071 rc = VINF_SUCCESS;
4072 }
4073 }
4074 }
4075 else
4076 {
4077 rc = VERR_GENERAL_FAILURE;
4078 Log(("SUPR0GipMap: GIP is not available!\n"));
4079 }
4080#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4081 RTSemMutexRelease(pDevExt->mtxGip);
4082#else
4083 RTSemFastMutexRelease(pDevExt->mtxGip);
4084#endif
4085
4086 /*
4087 * Write returns.
4088 */
4089 if (pHCPhysGip)
4090 *pHCPhysGip = HCPhys;
4091 if (ppGipR3)
4092 *ppGipR3 = pGipR3;
4093
4094#ifdef DEBUG_DARWIN_GIP
4095 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4096#else
4097 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
4098#endif
4099 return rc;
4100}
4101
4102
4103/**
4104 * Unmaps any user mapping of the GIP and terminates all GIP access
4105 * from this session.
4106 *
4107 * @returns IPRT status code.
4108 * @param pSession Session to which the GIP mapping should belong.
4109 */
4110SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
4111{
4112 int rc = VINF_SUCCESS;
4113 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
4114#ifdef DEBUG_DARWIN_GIP
4115 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
4116 pSession,
4117 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
4118 pSession->GipMapObjR3));
4119#else
4120 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
4121#endif
4122 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4123
4124#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4125 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4126#else
4127 RTSemFastMutexRequest(pDevExt->mtxGip);
4128#endif
4129
4130 /*
4131 * Unmap anything?
4132 */
4133 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
4134 {
4135 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
4136 AssertRC(rc);
4137 if (RT_SUCCESS(rc))
4138 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
4139 }
4140
4141 /*
4142 * Dereference global GIP.
4143 */
4144 if (pSession->fGipReferenced && !rc)
4145 {
4146 pSession->fGipReferenced = 0;
4147 if ( pDevExt->cGipUsers > 0
4148 && !--pDevExt->cGipUsers)
4149 {
4150 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
4151#ifndef DO_NOT_START_GIP
4152 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
4153#endif
4154
4155 if (pDevExt->u32SystemTimerGranularityGrant)
4156 {
4157 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
4158 AssertRC(rc2);
4159 pDevExt->u32SystemTimerGranularityGrant = 0;
4160 }
4161 }
4162 }
4163
4164#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4165 RTSemMutexRelease(pDevExt->mtxGip);
4166#else
4167 RTSemFastMutexRelease(pDevExt->mtxGip);
4168#endif
4169
4170 return rc;
4171}
4172
4173
4174/**
4175 * Gets the GIP pointer.
4176 *
4177 * @returns Pointer to the GIP or NULL.
4178 */
4179SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
4180{
4181 return g_pSUPGlobalInfoPage;
4182}
4183
4184
4185/**
4186 * Register a component factory with the support driver.
4187 *
4188 * This is currently restricted to kernel sessions only.
4189 *
4190 * @returns VBox status code.
4191 * @retval VINF_SUCCESS on success.
4192 * @retval VERR_NO_MEMORY if we're out of memory.
4193 * @retval VERR_ALREADY_EXISTS if the factory has already been registered.
4194 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4195 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4196 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4197 *
4198 * @param pSession The SUPDRV session (must be a ring-0 session).
4199 * @param pFactory Pointer to the component factory registration structure.
4200 *
4201 * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
4202 */
4203SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4204{
4205 PSUPDRVFACTORYREG pNewReg;
4206 const char *psz;
4207 int rc;
4208
4209 /*
4210 * Validate parameters.
4211 */
4212 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4213 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4214 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4215 AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
4216 psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
4217 AssertReturn(psz, VERR_INVALID_PARAMETER);
4218
4219 /*
4220 * Allocate and initialize a new registration structure.
4221 */
4222 pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
4223 if (pNewReg)
4224 {
4225 pNewReg->pNext = NULL;
4226 pNewReg->pFactory = pFactory;
4227 pNewReg->pSession = pSession;
4228 pNewReg->cchName = psz - &pFactory->szName[0];
4229
4230 /*
4231 * Add it to the tail of the list after checking for prior registration.
4232 */
4233 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4234 if (RT_SUCCESS(rc))
4235 {
4236 PSUPDRVFACTORYREG pPrev = NULL;
4237 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4238 while (pCur && pCur->pFactory != pFactory)
4239 {
4240 pPrev = pCur;
4241 pCur = pCur->pNext;
4242 }
4243 if (!pCur)
4244 {
4245 if (pPrev)
4246 pPrev->pNext = pNewReg;
4247 else
4248 pSession->pDevExt->pComponentFactoryHead = pNewReg;
4249 rc = VINF_SUCCESS;
4250 }
4251 else
4252 rc = VERR_ALREADY_EXISTS;
4253
4254 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4255 }
4256
4257 if (RT_FAILURE(rc))
4258 RTMemFree(pNewReg);
4259 }
4260 else
4261 rc = VERR_NO_MEMORY;
4262 return rc;
4263}
4264
4265
4266/**
4267 * Deregister a component factory.
4268 *
4269 * @returns VBox status code.
4270 * @retval VINF_SUCCESS on success.
4271 * @retval VERR_NOT_FOUND if the factory wasn't registered.
4272 * @retval VERR_ACCESS_DENIED if it isn't a kernel session.
4273 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4274 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4275 *
4276 * @param pSession The SUPDRV session (must be a ring-0 session).
4277 * @param pFactory Pointer to the component factory registration structure
4278 * previously passed SUPR0ComponentRegisterFactory().
4279 *
4280 * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
4281 */
4282SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
4283{
4284 int rc;
4285
4286 /*
4287 * Validate parameters.
4288 */
4289 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4290 AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
4291 AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
4292
4293 /*
4294 * Take the lock and look for the registration record.
4295 */
4296 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4297 if (RT_SUCCESS(rc))
4298 {
4299 PSUPDRVFACTORYREG pPrev = NULL;
4300 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4301 while (pCur && pCur->pFactory != pFactory)
4302 {
4303 pPrev = pCur;
4304 pCur = pCur->pNext;
4305 }
4306 if (pCur)
4307 {
4308 if (!pPrev)
4309 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
4310 else
4311 pPrev->pNext = pCur->pNext;
4312
4313 pCur->pNext = NULL;
4314 pCur->pFactory = NULL;
4315 pCur->pSession = NULL;
4316 rc = VINF_SUCCESS;
4317 }
4318 else
4319 rc = VERR_NOT_FOUND;
4320
4321 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4322
4323 RTMemFree(pCur);
4324 }
4325 return rc;
4326}
4327
4328
4329/**
4330 * Queries a component factory.
4331 *
4332 * @returns VBox status code.
4333 * @retval VERR_INVALID_PARAMETER on invalid parameter.
4334 * @retval VERR_INVALID_POINTER on invalid pointer parameter.
4335 * @retval VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
4336 * @retval VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
4337 *
4338 * @param pSession The SUPDRV session.
4339 * @param pszName The name of the component factory.
4340 * @param pszInterfaceUuid The UUID of the factory interface (stringified).
4341 * @param ppvFactoryIf Where to store the factory interface.
4342 */
4343SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
4344{
4345 const char *pszEnd;
4346 size_t cchName;
4347 int rc;
4348
4349 /*
4350 * Validate parameters.
4351 */
4352 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4353
4354 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
4355 pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
4356 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4357 cchName = pszEnd - pszName;
4358
4359 AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
4360 pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
4361 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
4362
4363 AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
4364 *ppvFactoryIf = NULL;
4365
4366 /*
4367 * Take the lock and try all factories by this name.
4368 */
4369 rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
4370 if (RT_SUCCESS(rc))
4371 {
4372 PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
4373 rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
4374 while (pCur)
4375 {
4376 if ( pCur->cchName == cchName
4377 && !memcmp(pCur->pFactory->szName, pszName, cchName))
4378 {
4379 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
4380 if (pvFactory)
4381 {
4382 *ppvFactoryIf = pvFactory;
4383 rc = VINF_SUCCESS;
4384 break;
4385 }
4386 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
4387 }
4388
4389 /* next */
4390 pCur = pCur->pNext;
4391 }
4392
4393 RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
4394 }
4395 return rc;
4396}
4397
4398
4399/**
4400 * Adds a memory object to the session.
4401 *
4402 * @returns IPRT status code.
4403 * @param pMem Memory tracking structure containing the
4404 * information to track.
4405 * @param pSession The session.
4406 */
4407static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
4408{
4409 PSUPDRVBUNDLE pBundle;
4410
4411 /*
4412 * Find free entry and record the allocation.
4413 */
4414 RTSpinlockAcquire(pSession->Spinlock);
4415 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4416 {
4417 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
4418 {
4419 unsigned i;
4420 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4421 {
4422 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
4423 {
4424 pBundle->cUsed++;
4425 pBundle->aMem[i] = *pMem;
4426 RTSpinlockRelease(pSession->Spinlock);
4427 return VINF_SUCCESS;
4428 }
4429 }
4430 AssertFailed(); /* !!this can't be happening!!! */
4431 }
4432 }
4433 RTSpinlockRelease(pSession->Spinlock);
4434
4435 /*
4436 * Need to allocate a new bundle.
4437 * Insert into the last entry in the bundle.
4438 */
4439 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
4440 if (!pBundle)
4441 return VERR_NO_MEMORY;
4442
4443 /* take last entry. */
4444 pBundle->cUsed++;
4445 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
4446
4447 /* insert into list. */
4448 RTSpinlockAcquire(pSession->Spinlock);
4449 pBundle->pNext = pSession->Bundle.pNext;
4450 pSession->Bundle.pNext = pBundle;
4451 RTSpinlockRelease(pSession->Spinlock);
4452
4453 return VINF_SUCCESS;
4454}
4455
4456
4457/**
4458 * Releases a memory object referenced by pointer and type.
4459 *
4460 * @returns IPRT status code.
4461 * @param pSession Session data.
4462 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
4463 * @param eType Memory type.
4464 */
4465static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
4466{
4467 PSUPDRVBUNDLE pBundle;
4468
4469 /*
4470 * Validate input.
4471 */
4472 if (!uPtr)
4473 {
4474 Log(("Illegal address %p\n", (void *)uPtr));
4475 return VERR_INVALID_PARAMETER;
4476 }
4477
4478 /*
4479 * Search for the address.
4480 */
4481 RTSpinlockAcquire(pSession->Spinlock);
4482 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
4483 {
4484 if (pBundle->cUsed > 0)
4485 {
4486 unsigned i;
4487 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
4488 {
4489 if ( pBundle->aMem[i].eType == eType
4490 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
4491 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
4492 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
4493 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
4494 )
4495 {
4496 /* Make a copy of it and release it outside the spinlock. */
4497 SUPDRVMEMREF Mem = pBundle->aMem[i];
4498 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
4499 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
4500 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
4501 RTSpinlockRelease(pSession->Spinlock);
4502
4503 if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
4504 {
4505 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
4506 AssertRC(rc); /** @todo figure out how to handle this. */
4507 }
4508 if (Mem.MemObj != NIL_RTR0MEMOBJ)
4509 {
4510 int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
4511 AssertRC(rc); /** @todo figure out how to handle this. */
4512 }
4513 return VINF_SUCCESS;
4514 }
4515 }
4516 }
4517 }
4518 RTSpinlockRelease(pSession->Spinlock);
4519 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
4520 return VERR_INVALID_PARAMETER;
4521}
4522
4523
4524/**
4525 * Opens an image. If it's the first time it's opened the call must upload
4526 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
4527 *
4528 * This is the 1st step of the loading.
4529 *
4530 * @returns IPRT status code.
4531 * @param pDevExt Device globals.
4532 * @param pSession Session data.
4533 * @param pReq The open request.
4534 */
4535static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
4536{
4537 int rc;
4538 PSUPDRVLDRIMAGE pImage;
4539 void *pv;
4540 size_t cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
4541 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
4542
4543 /*
4544 * Check if we got an instance of the image already.
4545 */
4546 supdrvLdrLock(pDevExt);
4547 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
4548 {
4549 if ( pImage->szName[cchName] == '\0'
4550 && !memcmp(pImage->szName, pReq->u.In.szName, cchName))
4551 {
4552 if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
4553 {
4554 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
4555 pImage->cUsage++;
4556 pReq->u.Out.pvImageBase = pImage->pvImage;
4557 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
4558 pReq->u.Out.fNativeLoader = pImage->fNative;
4559 supdrvLdrAddUsage(pSession, pImage);
4560 supdrvLdrUnlock(pDevExt);
4561 return VINF_SUCCESS;
4562 }
4563 supdrvLdrUnlock(pDevExt);
4564 Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
4565 return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
4566 }
4567 }
4568 /* (not found - add it!) */
4569
4570 /*
4571 * Allocate memory.
4572 */
4573 Assert(cchName < sizeof(pImage->szName));
4574 pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
4575 if (!pv)
4576 {
4577 supdrvLdrUnlock(pDevExt);
4578 Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
4579 return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
4580 }
4581
4582 /*
4583 * Setup and link in the LDR stuff.
4584 */
4585 pImage = (PSUPDRVLDRIMAGE)pv;
4586 pImage->pvImage = NULL;
4587 pImage->pvImageAlloc = NULL;
4588 pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
4589 pImage->cbImageBits = pReq->u.In.cbImageBits;
4590 pImage->cSymbols = 0;
4591 pImage->paSymbols = NULL;
4592 pImage->pachStrTab = NULL;
4593 pImage->cbStrTab = 0;
4594 pImage->pfnModuleInit = NULL;
4595 pImage->pfnModuleTerm = NULL;
4596 pImage->pfnServiceReqHandler = NULL;
4597 pImage->uState = SUP_IOCTL_LDR_OPEN;
4598 pImage->cUsage = 1;
4599 pImage->pDevExt = pDevExt;
4600 memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
4601
4602 /*
4603 * Try load it using the native loader, if that isn't supported, fall back
4604 * on the older method.
4605 */
4606 pImage->fNative = true;
4607 rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
4608 if (rc == VERR_NOT_SUPPORTED)
4609 {
4610 pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
4611 pImage->pvImage = RT_ALIGN_P(pImage->pvImageAlloc, 32);
4612 pImage->fNative = false;
4613 rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
4614 }
4615 if (RT_FAILURE(rc))
4616 {
4617 supdrvLdrUnlock(pDevExt);
4618 RTMemFree(pImage);
4619 Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
4620 return rc;
4621 }
4622 Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
4623
4624 /*
4625 * Link it.
4626 */
4627 pImage->pNext = pDevExt->pLdrImages;
4628 pDevExt->pLdrImages = pImage;
4629
4630 supdrvLdrAddUsage(pSession, pImage);
4631
4632 pReq->u.Out.pvImageBase = pImage->pvImage;
4633 pReq->u.Out.fNeedsLoading = true;
4634 pReq->u.Out.fNativeLoader = pImage->fNative;
4635 supdrvOSLdrNotifyOpened(pDevExt, pImage);
4636
4637 supdrvLdrUnlock(pDevExt);
4638 return VINF_SUCCESS;
4639}
4640
4641
4642/**
4643 * Worker that validates a pointer to an image entrypoint.
4644 *
4645 * @returns IPRT status code.
4646 * @param pDevExt The device globals.
4647 * @param pImage The loader image.
4648 * @param pv The pointer into the image.
4649 * @param fMayBeNull Whether it may be NULL.
4650 * @param pszWhat What is this entrypoint? (for logging)
4651 * @param pbImageBits The image bits prepared by ring-3.
4652 *
4653 * @remarks Will leave the lock on failure.
4654 */
4655static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
4656 bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
4657{
4658 if (!fMayBeNull || pv)
4659 {
4660 if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
4661 {
4662 supdrvLdrUnlock(pDevExt);
4663 Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
4664 return VERR_INVALID_PARAMETER;
4665 }
4666
4667 if (pImage->fNative)
4668 {
4669 int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
4670 if (RT_FAILURE(rc))
4671 {
4672 supdrvLdrUnlock(pDevExt);
4673 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
4674 return rc;
4675 }
4676 }
4677 }
4678 return VINF_SUCCESS;
4679}
4680
4681
4682/**
4683 * Loads the image bits.
4684 *
4685 * This is the 2nd step of the loading.
4686 *
4687 * @returns IPRT status code.
4688 * @param pDevExt Device globals.
4689 * @param pSession Session data.
4690 * @param pReq The request.
4691 */
4692static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
4693{
4694 PSUPDRVLDRUSAGE pUsage;
4695 PSUPDRVLDRIMAGE pImage;
4696 int rc;
4697 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
4698
4699 /*
4700 * Find the ldr image.
4701 */
4702 supdrvLdrLock(pDevExt);
4703 pUsage = pSession->pLdrUsage;
4704 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4705 pUsage = pUsage->pNext;
4706 if (!pUsage)
4707 {
4708 supdrvLdrUnlock(pDevExt);
4709 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
4710 return VERR_INVALID_HANDLE;
4711 }
4712 pImage = pUsage->pImage;
4713
4714 /*
4715 * Validate input.
4716 */
4717 if ( pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
4718 || pImage->cbImageBits != pReq->u.In.cbImageBits)
4719 {
4720 supdrvLdrUnlock(pDevExt);
4721 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
4722 pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
4723 return VERR_INVALID_HANDLE;
4724 }
4725
4726 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
4727 {
4728 unsigned uState = pImage->uState;
4729 supdrvLdrUnlock(pDevExt);
4730 if (uState != SUP_IOCTL_LDR_LOAD)
4731 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
4732 return VERR_ALREADY_LOADED;
4733 }
4734
4735 switch (pReq->u.In.eEPType)
4736 {
4737 case SUPLDRLOADEP_NOTHING:
4738 break;
4739
4740 case SUPLDRLOADEP_VMMR0:
4741 rc = supdrvLdrValidatePointer( pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0, false, pReq->u.In.abImage, "pvVMMR0");
4742 if (RT_SUCCESS(rc))
4743 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt, false, pReq->u.In.abImage, "pvVMMR0EntryInt");
4744 if (RT_SUCCESS(rc))
4745 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
4746 if (RT_SUCCESS(rc))
4747 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx, false, pReq->u.In.abImage, "pvVMMR0EntryEx");
4748 if (RT_FAILURE(rc))
4749 return rc;
4750 break;
4751
4752 case SUPLDRLOADEP_SERVICE:
4753 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
4754 if (RT_FAILURE(rc))
4755 return rc;
4756 if ( pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
4757 || pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
4758 || pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
4759 {
4760 supdrvLdrUnlock(pDevExt);
4761 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
4762 pImage->pvImage, pReq->u.In.cbImageWithTabs,
4763 pReq->u.In.EP.Service.apvReserved[0],
4764 pReq->u.In.EP.Service.apvReserved[1],
4765 pReq->u.In.EP.Service.apvReserved[2]));
4766 return VERR_INVALID_PARAMETER;
4767 }
4768 break;
4769
4770 default:
4771 supdrvLdrUnlock(pDevExt);
4772 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
4773 return VERR_INVALID_PARAMETER;
4774 }
4775
4776 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
4777 if (RT_FAILURE(rc))
4778 return rc;
4779 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
4780 if (RT_FAILURE(rc))
4781 return rc;
4782
4783 /*
4784 * Allocate and copy the tables.
4785 * (No need to do try/except as this is a buffered request.)
4786 */
4787 pImage->cbStrTab = pReq->u.In.cbStrTab;
4788 if (pImage->cbStrTab)
4789 {
4790 pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
4791 if (pImage->pachStrTab)
4792 memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
4793 else
4794 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
4795 }
4796
4797 pImage->cSymbols = pReq->u.In.cSymbols;
4798 if (RT_SUCCESS(rc) && pImage->cSymbols)
4799 {
4800 size_t cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
4801 pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
4802 if (pImage->paSymbols)
4803 memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
4804 else
4805 rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
4806 }
4807
4808 /*
4809 * Copy the bits / complete native loading.
4810 */
4811 if (RT_SUCCESS(rc))
4812 {
4813 pImage->uState = SUP_IOCTL_LDR_LOAD;
4814 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
4815 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
4816
4817 if (pImage->fNative)
4818 rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
4819 else
4820 {
4821 memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
4822 Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
4823 }
4824 }
4825
4826 /*
4827 * Update any entry points.
4828 */
4829 if (RT_SUCCESS(rc))
4830 {
4831 switch (pReq->u.In.eEPType)
4832 {
4833 default:
4834 case SUPLDRLOADEP_NOTHING:
4835 rc = VINF_SUCCESS;
4836 break;
4837 case SUPLDRLOADEP_VMMR0:
4838 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
4839 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
4840 break;
4841 case SUPLDRLOADEP_SERVICE:
4842 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
4843 rc = VINF_SUCCESS;
4844 break;
4845 }
4846 }
4847
4848 /*
4849 * On success call the module initialization.
4850 */
4851 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
4852 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
4853 {
4854 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
4855 pDevExt->pLdrInitImage = pImage;
4856 pDevExt->hLdrInitThread = RTThreadNativeSelf();
4857 rc = pImage->pfnModuleInit(pImage);
4858 pDevExt->pLdrInitImage = NULL;
4859 pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
4860 if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
4861 supdrvLdrUnsetVMMR0EPs(pDevExt);
4862 }
4863 SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
4864
4865 if (RT_FAILURE(rc))
4866 {
4867 /* Inform the tracing component in case ModuleInit registered TPs. */
4868 supdrvTracerModuleUnloading(pDevExt, pImage);
4869
4870 pImage->uState = SUP_IOCTL_LDR_OPEN;
4871 pImage->pfnModuleInit = NULL;
4872 pImage->pfnModuleTerm = NULL;
4873 pImage->pfnServiceReqHandler= NULL;
4874 pImage->cbStrTab = 0;
4875 RTMemFree(pImage->pachStrTab);
4876 pImage->pachStrTab = NULL;
4877 RTMemFree(pImage->paSymbols);
4878 pImage->paSymbols = NULL;
4879 pImage->cSymbols = 0;
4880 }
4881
4882 supdrvLdrUnlock(pDevExt);
4883 return rc;
4884}
4885
4886
4887/**
4888 * Frees a previously loaded (prep'ed) image.
4889 *
4890 * @returns IPRT status code.
4891 * @param pDevExt Device globals.
4892 * @param pSession Session data.
4893 * @param pReq The request.
4894 */
4895static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
4896{
4897 int rc;
4898 PSUPDRVLDRUSAGE pUsagePrev;
4899 PSUPDRVLDRUSAGE pUsage;
4900 PSUPDRVLDRIMAGE pImage;
4901 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
4902
4903 /*
4904 * Find the ldr image.
4905 */
4906 supdrvLdrLock(pDevExt);
4907 pUsagePrev = NULL;
4908 pUsage = pSession->pLdrUsage;
4909 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
4910 {
4911 pUsagePrev = pUsage;
4912 pUsage = pUsage->pNext;
4913 }
4914 if (!pUsage)
4915 {
4916 supdrvLdrUnlock(pDevExt);
4917 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
4918 return VERR_INVALID_HANDLE;
4919 }
4920
4921 /*
4922 * Check if we can remove anything.
4923 */
4924 rc = VINF_SUCCESS;
4925 pImage = pUsage->pImage;
4926 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
4927 {
4928 /*
4929 * Check if there are any objects with destructors in the image, if
4930 * so leave it for the session cleanup routine so we get a chance to
4931 * clean things up in the right order and not leave them all dangling.
4932 */
4933 RTSpinlockAcquire(pDevExt->Spinlock);
4934 if (pImage->cUsage <= 1)
4935 {
4936 PSUPDRVOBJ pObj;
4937 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
4938 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4939 {
4940 rc = VERR_DANGLING_OBJECTS;
4941 break;
4942 }
4943 }
4944 else
4945 {
4946 PSUPDRVUSAGE pGenUsage;
4947 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
4948 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
4949 {
4950 rc = VERR_DANGLING_OBJECTS;
4951 break;
4952 }
4953 }
4954 RTSpinlockRelease(pDevExt->Spinlock);
4955 if (rc == VINF_SUCCESS)
4956 {
4957 /* unlink it */
4958 if (pUsagePrev)
4959 pUsagePrev->pNext = pUsage->pNext;
4960 else
4961 pSession->pLdrUsage = pUsage->pNext;
4962
4963 /* free it */
4964 pUsage->pImage = NULL;
4965 pUsage->pNext = NULL;
4966 RTMemFree(pUsage);
4967
4968 /*
4969 * Dereference the image.
4970 */
4971 if (pImage->cUsage <= 1)
4972 supdrvLdrFree(pDevExt, pImage);
4973 else
4974 pImage->cUsage--;
4975 }
4976 else
4977 {
4978 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
4979 rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
4980 }
4981 }
4982 else
4983 {
4984 /*
4985 * Dereference both image and usage.
4986 */
4987 pImage->cUsage--;
4988 pUsage->cUsage--;
4989 }
4990
4991 supdrvLdrUnlock(pDevExt);
4992 return rc;
4993}
4994
4995
4996/**
4997 * Gets the address of a symbol in an open image.
4998 *
4999 * @returns IPRT status code.
5000 * @param pDevExt Device globals.
5001 * @param pSession Session data.
5002 * @param pReq The request buffer.
5003 */
5004static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
5005{
5006 PSUPDRVLDRIMAGE pImage;
5007 PSUPDRVLDRUSAGE pUsage;
5008 uint32_t i;
5009 PSUPLDRSYM paSyms;
5010 const char *pchStrings;
5011 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
5012 void *pvSymbol = NULL;
5013 int rc = VERR_GENERAL_FAILURE;
5014 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
5015
5016 /*
5017 * Find the ldr image.
5018 */
5019 supdrvLdrLock(pDevExt);
5020 pUsage = pSession->pLdrUsage;
5021 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
5022 pUsage = pUsage->pNext;
5023 if (!pUsage)
5024 {
5025 supdrvLdrUnlock(pDevExt);
5026 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
5027 return VERR_INVALID_HANDLE;
5028 }
5029 pImage = pUsage->pImage;
5030 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
5031 {
5032 unsigned uState = pImage->uState;
5033 supdrvLdrUnlock(pDevExt);
5034 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
5035 return VERR_ALREADY_LOADED;
5036 }
5037
5038 /*
5039 * Search the symbol strings.
5040 *
5041 * Note! The int32_t is for native loading on solaris where the data
5042 * and text segments are in very different places.
5043 */
5044 pchStrings = pImage->pachStrTab;
5045 paSyms = pImage->paSymbols;
5046 for (i = 0; i < pImage->cSymbols; i++)
5047 {
5048 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5049 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
5050 {
5051 pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
5052 rc = VINF_SUCCESS;
5053 break;
5054 }
5055 }
5056 supdrvLdrUnlock(pDevExt);
5057 pReq->u.Out.pvSymbol = pvSymbol;
5058 return rc;
5059}
5060
5061
5062/**
5063 * Gets the address of a symbol in an open image or the support driver.
5064 *
5065 * @returns VINF_SUCCESS on success.
5066 * @returns
5067 * @param pDevExt Device globals.
5068 * @param pSession Session data.
5069 * @param pReq The request buffer.
5070 */
5071static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
5072{
5073 int rc = VINF_SUCCESS;
5074 const char *pszSymbol = pReq->u.In.pszSymbol;
5075 const char *pszModule = pReq->u.In.pszModule;
5076 size_t cbSymbol;
5077 char const *pszEnd;
5078 uint32_t i;
5079
5080 /*
5081 * Input validation.
5082 */
5083 AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
5084 pszEnd = RTStrEnd(pszSymbol, 512);
5085 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5086 cbSymbol = pszEnd - pszSymbol + 1;
5087
5088 if (pszModule)
5089 {
5090 AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
5091 pszEnd = RTStrEnd(pszModule, 64);
5092 AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
5093 }
5094 Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
5095
5096
5097 if ( !pszModule
5098 || !strcmp(pszModule, "SupDrv"))
5099 {
5100 /*
5101 * Search the support driver export table.
5102 */
5103 for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
5104 if (!strcmp(g_aFunctions[i].szName, pszSymbol))
5105 {
5106 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
5107 break;
5108 }
5109 }
5110 else
5111 {
5112 /*
5113 * Find the loader image.
5114 */
5115 PSUPDRVLDRIMAGE pImage;
5116
5117 supdrvLdrLock(pDevExt);
5118
5119 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
5120 if (!strcmp(pImage->szName, pszModule))
5121 break;
5122 if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
5123 {
5124 /*
5125 * Search the symbol strings.
5126 */
5127 const char *pchStrings = pImage->pachStrTab;
5128 PCSUPLDRSYM paSyms = pImage->paSymbols;
5129 for (i = 0; i < pImage->cSymbols; i++)
5130 {
5131 if ( paSyms[i].offName + cbSymbol <= pImage->cbStrTab
5132 && !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
5133 {
5134 /*
5135 * Found it! Calc the symbol address and add a reference to the module.
5136 */
5137 pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
5138 rc = supdrvLdrAddUsage(pSession, pImage);
5139 break;
5140 }
5141 }
5142 }
5143 else
5144 rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
5145
5146 supdrvLdrUnlock(pDevExt);
5147 }
5148 return rc;
5149}
5150
5151
5152/**
5153 * Updates the VMMR0 entry point pointers.
5154 *
5155 * @returns IPRT status code.
5156 * @param pDevExt Device globals.
5157 * @param pSession Session data.
5158 * @param pVMMR0 VMMR0 image handle.
5159 * @param pvVMMR0EntryInt VMMR0EntryInt address.
5160 * @param pvVMMR0EntryFast VMMR0EntryFast address.
5161 * @param pvVMMR0EntryEx VMMR0EntryEx address.
5162 * @remark Caller must own the loader mutex.
5163 */
5164static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
5165{
5166 int rc = VINF_SUCCESS;
5167 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
5168
5169
5170 /*
5171 * Check if not yet set.
5172 */
5173 if (!pDevExt->pvVMMR0)
5174 {
5175 pDevExt->pvVMMR0 = pvVMMR0;
5176 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
5177 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
5178 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
5179 }
5180 else
5181 {
5182 /*
5183 * Return failure or success depending on whether the values match or not.
5184 */
5185 if ( pDevExt->pvVMMR0 != pvVMMR0
5186 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
5187 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
5188 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
5189 {
5190 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
5191 rc = VERR_INVALID_PARAMETER;
5192 }
5193 }
5194 return rc;
5195}
5196
5197
5198/**
5199 * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
5200 *
5201 * @param pDevExt Device globals.
5202 */
5203static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
5204{
5205 pDevExt->pvVMMR0 = NULL;
5206 pDevExt->pfnVMMR0EntryInt = NULL;
5207 pDevExt->pfnVMMR0EntryFast = NULL;
5208 pDevExt->pfnVMMR0EntryEx = NULL;
5209}
5210
5211
5212/**
5213 * Adds a usage reference in the specified session of an image.
5214 *
5215 * Called while owning the loader semaphore.
5216 *
5217 * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
5218 * @param pSession Session in question.
5219 * @param pImage Image which the session is using.
5220 */
5221static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
5222{
5223 PSUPDRVLDRUSAGE pUsage;
5224 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
5225
5226 /*
5227 * Referenced it already?
5228 */
5229 pUsage = pSession->pLdrUsage;
5230 while (pUsage)
5231 {
5232 if (pUsage->pImage == pImage)
5233 {
5234 pUsage->cUsage++;
5235 return VINF_SUCCESS;
5236 }
5237 pUsage = pUsage->pNext;
5238 }
5239
5240 /*
5241 * Allocate new usage record.
5242 */
5243 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
5244 AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
5245 pUsage->cUsage = 1;
5246 pUsage->pImage = pImage;
5247 pUsage->pNext = pSession->pLdrUsage;
5248 pSession->pLdrUsage = pUsage;
5249 return VINF_SUCCESS;
5250}
5251
5252
5253/**
5254 * Frees a load image.
5255 *
5256 * @param pDevExt Pointer to device extension.
5257 * @param pImage Pointer to the image we're gonna free.
5258 * This image must exit!
5259 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
5260 */
5261static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
5262{
5263 PSUPDRVLDRIMAGE pImagePrev;
5264 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
5265
5266 /* find it - arg. should've used doubly linked list. */
5267 Assert(pDevExt->pLdrImages);
5268 pImagePrev = NULL;
5269 if (pDevExt->pLdrImages != pImage)
5270 {
5271 pImagePrev = pDevExt->pLdrImages;
5272 while (pImagePrev->pNext != pImage)
5273 pImagePrev = pImagePrev->pNext;
5274 Assert(pImagePrev->pNext == pImage);
5275 }
5276
5277 /* unlink */
5278 if (pImagePrev)
5279 pImagePrev->pNext = pImage->pNext;
5280 else
5281 pDevExt->pLdrImages = pImage->pNext;
5282
5283 /* check if this is VMMR0.r0 unset its entry point pointers. */
5284 if (pDevExt->pvVMMR0 == pImage->pvImage)
5285 supdrvLdrUnsetVMMR0EPs(pDevExt);
5286
5287 /* check for objects with destructors in this image. (Shouldn't happen.) */
5288 if (pDevExt->pObjs)
5289 {
5290 unsigned cObjs = 0;
5291 PSUPDRVOBJ pObj;
5292 RTSpinlockAcquire(pDevExt->Spinlock);
5293 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
5294 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
5295 {
5296 pObj->pfnDestructor = NULL;
5297 cObjs++;
5298 }
5299 RTSpinlockRelease(pDevExt->Spinlock);
5300 if (cObjs)
5301 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
5302 }
5303
5304 /* call termination function if fully loaded. */
5305 if ( pImage->pfnModuleTerm
5306 && pImage->uState == SUP_IOCTL_LDR_LOAD)
5307 {
5308 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
5309 pImage->pfnModuleTerm(pImage);
5310 }
5311
5312 /* Inform the tracing component. */
5313 supdrvTracerModuleUnloading(pDevExt, pImage);
5314
5315 /* do native unload if appropriate. */
5316 if (pImage->fNative)
5317 supdrvOSLdrUnload(pDevExt, pImage);
5318
5319 /* free the image */
5320 pImage->cUsage = 0;
5321 pImage->pDevExt = NULL;
5322 pImage->pNext = NULL;
5323 pImage->uState = SUP_IOCTL_LDR_FREE;
5324 RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
5325 pImage->pvImageAlloc = NULL;
5326 RTMemFree(pImage->pachStrTab);
5327 pImage->pachStrTab = NULL;
5328 RTMemFree(pImage->paSymbols);
5329 pImage->paSymbols = NULL;
5330 RTMemFree(pImage);
5331}
5332
5333
5334/**
5335 * Acquires the loader lock.
5336 *
5337 * @returns IPRT status code.
5338 * @param pDevExt The device extension.
5339 */
5340DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
5341{
5342#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5343 int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
5344#else
5345 int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
5346#endif
5347 AssertRC(rc);
5348 return rc;
5349}
5350
5351
5352/**
5353 * Releases the loader lock.
5354 *
5355 * @returns IPRT status code.
5356 * @param pDevExt The device extension.
5357 */
5358DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
5359{
5360#ifdef SUPDRV_USE_MUTEX_FOR_LDR
5361 return RTSemMutexRelease(pDevExt->mtxLdr);
5362#else
5363 return RTSemFastMutexRelease(pDevExt->mtxLdr);
5364#endif
5365}
5366
5367
5368/**
5369 * Implements the service call request.
5370 *
5371 * @returns VBox status code.
5372 * @param pDevExt The device extension.
5373 * @param pSession The calling session.
5374 * @param pReq The request packet, valid.
5375 */
5376static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
5377{
5378#if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
5379 int rc;
5380
5381 /*
5382 * Find the module first in the module referenced by the calling session.
5383 */
5384 rc = supdrvLdrLock(pDevExt);
5385 if (RT_SUCCESS(rc))
5386 {
5387 PFNSUPR0SERVICEREQHANDLER pfnServiceReqHandler = NULL;
5388 PSUPDRVLDRUSAGE pUsage;
5389
5390 for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
5391 if ( pUsage->pImage->pfnServiceReqHandler
5392 && !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
5393 {
5394 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
5395 break;
5396 }
5397 supdrvLdrUnlock(pDevExt);
5398
5399 if (pfnServiceReqHandler)
5400 {
5401 /*
5402 * Call it.
5403 */
5404 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
5405 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
5406 else
5407 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
5408 }
5409 else
5410 rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
5411 }
5412
5413 /* log it */
5414 if ( RT_FAILURE(rc)
5415 && rc != VERR_INTERRUPTED
5416 && rc != VERR_TIMEOUT)
5417 Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5418 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5419 else
5420 Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
5421 rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
5422 return rc;
5423#else /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5424 return VERR_NOT_IMPLEMENTED;
5425#endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
5426}
5427
5428
5429/**
5430 * Implements the logger settings request.
5431 *
5432 * @returns VBox status code.
5433 * @param pDevExt The device extension.
5434 * @param pSession The caller's session.
5435 * @param pReq The request.
5436 */
5437static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
5438{
5439 const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
5440 const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
5441 const char *pszDest = &pReq->u.In.szStrings[pReq->u.In.offDestination];
5442 PRTLOGGER pLogger = NULL;
5443 int rc;
5444
5445 /*
5446 * Some further validation.
5447 */
5448 switch (pReq->u.In.fWhat)
5449 {
5450 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5451 case SUPLOGGERSETTINGS_WHAT_CREATE:
5452 break;
5453
5454 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5455 if (*pszGroup || *pszFlags || *pszDest)
5456 return VERR_INVALID_PARAMETER;
5457 if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
5458 return VERR_ACCESS_DENIED;
5459 break;
5460
5461 default:
5462 return VERR_INTERNAL_ERROR;
5463 }
5464
5465 /*
5466 * Get the logger.
5467 */
5468 switch (pReq->u.In.fWhich)
5469 {
5470 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5471 pLogger = RTLogGetDefaultInstance();
5472 break;
5473
5474 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5475 pLogger = RTLogRelDefaultInstance();
5476 break;
5477
5478 default:
5479 return VERR_INTERNAL_ERROR;
5480 }
5481
5482 /*
5483 * Do the job.
5484 */
5485 switch (pReq->u.In.fWhat)
5486 {
5487 case SUPLOGGERSETTINGS_WHAT_SETTINGS:
5488 if (pLogger)
5489 {
5490 rc = RTLogFlags(pLogger, pszFlags);
5491 if (RT_SUCCESS(rc))
5492 rc = RTLogGroupSettings(pLogger, pszGroup);
5493 NOREF(pszDest);
5494 }
5495 else
5496 rc = VERR_NOT_FOUND;
5497 break;
5498
5499 case SUPLOGGERSETTINGS_WHAT_CREATE:
5500 {
5501 if (pLogger)
5502 rc = VERR_ALREADY_EXISTS;
5503 else
5504 {
5505 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
5506
5507 rc = RTLogCreate(&pLogger,
5508 0 /* fFlags */,
5509 pszGroup,
5510 pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
5511 ? "VBOX_LOG"
5512 : "VBOX_RELEASE_LOG",
5513 RT_ELEMENTS(s_apszGroups),
5514 s_apszGroups,
5515 RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
5516 NULL);
5517 if (RT_SUCCESS(rc))
5518 {
5519 rc = RTLogFlags(pLogger, pszFlags);
5520 NOREF(pszDest);
5521 if (RT_SUCCESS(rc))
5522 {
5523 switch (pReq->u.In.fWhich)
5524 {
5525 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5526 pLogger = RTLogSetDefaultInstance(pLogger);
5527 break;
5528 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5529 pLogger = RTLogRelSetDefaultInstance(pLogger);
5530 break;
5531 }
5532 }
5533 RTLogDestroy(pLogger);
5534 }
5535 }
5536 break;
5537 }
5538
5539 case SUPLOGGERSETTINGS_WHAT_DESTROY:
5540 switch (pReq->u.In.fWhich)
5541 {
5542 case SUPLOGGERSETTINGS_WHICH_DEBUG:
5543 pLogger = RTLogSetDefaultInstance(NULL);
5544 break;
5545 case SUPLOGGERSETTINGS_WHICH_RELEASE:
5546 pLogger = RTLogRelSetDefaultInstance(NULL);
5547 break;
5548 }
5549 rc = RTLogDestroy(pLogger);
5550 break;
5551
5552 default:
5553 {
5554 rc = VERR_INTERNAL_ERROR;
5555 break;
5556 }
5557 }
5558
5559 return rc;
5560}
5561
5562
5563/**
5564 * Implements the MSR prober operations.
5565 *
5566 * @returns VBox status code.
5567 * @param pDevExt The device extension.
5568 * @param pReq The request.
5569 */
5570static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
5571{
5572#ifdef SUPDRV_WITH_MSR_PROBER
5573 RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
5574 int rc;
5575
5576 switch (pReq->u.In.enmOp)
5577 {
5578 case SUPMSRPROBEROP_READ:
5579 {
5580 uint64_t uValue;
5581 rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
5582 if (RT_SUCCESS(rc))
5583 {
5584 pReq->u.Out.uResults.Read.uValue = uValue;
5585 pReq->u.Out.uResults.Read.fGp = false;
5586 }
5587 else if (rc == VERR_ACCESS_DENIED)
5588 {
5589 pReq->u.Out.uResults.Read.uValue = 0;
5590 pReq->u.Out.uResults.Read.fGp = true;
5591 rc = VINF_SUCCESS;
5592 }
5593 break;
5594 }
5595
5596 case SUPMSRPROBEROP_WRITE:
5597 rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
5598 if (RT_SUCCESS(rc))
5599 pReq->u.Out.uResults.Write.fGp = false;
5600 else if (rc == VERR_ACCESS_DENIED)
5601 {
5602 pReq->u.Out.uResults.Write.fGp = true;
5603 rc = VINF_SUCCESS;
5604 }
5605 break;
5606
5607 case SUPMSRPROBEROP_MODIFY:
5608 case SUPMSRPROBEROP_MODIFY_FASTER:
5609 rc = supdrvOSMsrProberModify(idCpu, pReq);
5610 break;
5611
5612 default:
5613 return VERR_INVALID_FUNCTION;
5614 }
5615 return rc;
5616#else
5617 return VERR_NOT_IMPLEMENTED;
5618#endif
5619}
5620
5621
5622/**
5623 * Returns whether the host CPU sports an invariant TSC or not.
5624 *
5625 * @returns true if invariant TSC is supported, false otherwise.
5626 */
5627static bool supdrvIsInvariantTsc(void)
5628{
5629 static bool s_fQueried = false;
5630 static bool s_fIsInvariantTsc = false;
5631 if (!s_fQueried)
5632 {
5633 uint32_t uEax, uEbx, uEcx, uEdx;
5634 ASMCpuId(0x80000000, &uEax, &uEbx, &uEcx, &uEdx);
5635 if (uEax >= 0x80000007)
5636 {
5637 ASMCpuId(0x80000007, &uEax, &uEbx, &uEcx, &uEdx);
5638 if (uEdx & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
5639 s_fIsInvariantTsc = true;
5640 }
5641 s_fQueried = true;
5642 }
5643
5644 return s_fIsInvariantTsc;
5645}
5646
5647
5648#ifdef SUPDRV_USE_TSC_DELTA_THREAD
5649/**
5650 * Switches the TSC-delta measurement thread into the butchered state.
5651 *
5652 * @returns VBox status code.
5653 * @param pDevExt Pointer to the device instance data.
5654 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
5655 * @param pszFailed An error message to log.
5656 * @param rcFailed The error code to exit the thread with.
5657 */
5658static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
5659{
5660 if (!fSpinlockHeld)
5661 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5662
5663 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Butchered;
5664 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5665 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
5666 return rcFailed;
5667}
5668
5669
5670/**
5671 * The TSC-delta measurement thread.
5672 *
5673 * @returns VBox status code.
5674 * @param hThread The thread handle.
5675 * @param pvUser Opaque pointer to the device instance data.
5676 */
5677static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
5678{
5679 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
5680 static uint32_t cTimesMeasured = 0;
5681 uint32_t cConsecutiveTimeouts = 0;
5682 int rc = VERR_INTERNAL_ERROR_2;
5683 for (;;)
5684 {
5685 /*
5686 * Switch on the current state.
5687 */
5688 SUPDRVTSCDELTASTATE enmState;
5689 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5690 enmState = pDevExt->enmTscDeltaState;
5691 switch (enmState)
5692 {
5693 case kSupDrvTscDeltaState_Creating:
5694 {
5695 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5696 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5697 if (RT_FAILURE(rc))
5698 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5699 /* fall thru */
5700 }
5701
5702 case kSupDrvTscDeltaState_Listening:
5703 {
5704 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5705
5706 /* Simple adaptive timeout. */
5707 if (cConsecutiveTimeouts++ == 10)
5708 {
5709 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
5710 pDevExt->cMsTscDeltaTimeout = 10;
5711 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
5712 pDevExt->cMsTscDeltaTimeout = 100;
5713 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
5714 pDevExt->cMsTscDeltaTimeout = 500;
5715 cConsecutiveTimeouts = 0;
5716 }
5717 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
5718 if ( RT_FAILURE(rc)
5719 && rc != VERR_TIMEOUT)
5720 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
5721 break;
5722 }
5723
5724 case kSupDrvTscDeltaState_WaitAndMeasure:
5725 {
5726 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Measuring;
5727 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
5728 if (RT_FAILURE(rc))
5729 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
5730 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5731 pDevExt->cMsTscDeltaTimeout = 1;
5732 RTThreadSleep(10);
5733 /* fall thru */
5734 }
5735
5736 case kSupDrvTscDeltaState_Measuring:
5737 {
5738 cConsecutiveTimeouts = 0;
5739 if (!cTimesMeasured++)
5740 {
5741 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
5742 RTCpuSetCopy(&pDevExt->TscDeltaObtainedCpuSet, &pDevExt->pGip->OnlineCpuSet);
5743 }
5744 else
5745 {
5746 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5747 unsigned iCpu;
5748
5749 if (cTimesMeasured == UINT32_MAX)
5750 cTimesMeasured = 1;
5751
5752 /* Measure TSC-deltas only for the CPUs that are in the set. */
5753 rc = VINF_SUCCESS;
5754 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
5755 {
5756 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
5757 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
5758 && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu))
5759 {
5760 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
5761 RTCpuSetDel(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
5762 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
5763 RTCpuSetAdd(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->idCpu);
5764 }
5765 }
5766 }
5767 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5768 if (pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
5769 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Listening;
5770 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5771 pDevExt->rcTscDelta = rc;
5772 break;
5773 }
5774
5775 case kSupDrvTscDeltaState_Terminating:
5776 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5777 return VINF_SUCCESS;
5778
5779 case kSupDrvTscDeltaState_Butchered:
5780 default:
5781 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
5782 }
5783 }
5784
5785 return rc;
5786}
5787
5788
5789/**
5790 * Waits for the TSC-delta measurement thread to respond to a state change.
5791 *
5792 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
5793 * other error code on internal error.
5794 *
5795 * @param pThis Pointer to the grant service instance data.
5796 * @param enmCurState The current state.
5797 * @param enmNewState The new state we're waiting for it to enter.
5798 */
5799static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTASTATE enmCurState, SUPDRVTSCDELTASTATE enmNewState)
5800{
5801 /*
5802 * Wait a short while for the expected state transition.
5803 */
5804 int rc;
5805 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
5806 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5807 if (pDevExt->enmTscDeltaState == enmNewState)
5808 {
5809 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5810 rc = VINF_SUCCESS;
5811 }
5812 else if (pDevExt->enmTscDeltaState == enmCurState)
5813 {
5814 /*
5815 * Wait longer if the state has not yet transitioned to the one we want.
5816 */
5817 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5818 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
5819 if ( RT_SUCCESS(rc)
5820 || rc == VERR_TIMEOUT)
5821 {
5822 /*
5823 * Check the state whether we've succeeded.
5824 */
5825 SUPDRVTSCDELTASTATE enmState;
5826 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5827 enmState = pDevExt->enmTscDeltaState;
5828 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5829 if (enmState == enmNewState)
5830 rc = VINF_SUCCESS;
5831 else if (enmState == enmCurState)
5832 {
5833 rc = VERR_TIMEOUT;
5834 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
5835 enmNewState));
5836 }
5837 else
5838 {
5839 rc = VERR_INTERNAL_ERROR;
5840 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
5841 enmState, enmNewState));
5842 }
5843 }
5844 else
5845 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
5846 }
5847 else
5848 {
5849 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5850 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
5851 rc = VERR_INTERNAL_ERROR;
5852 }
5853
5854 return rc;
5855}
5856
5857
5858/**
5859 * Terminates the TSC-delta measurement thread.
5860 *
5861 * @param pDevExt Pointer to the device instance data.
5862 */
5863static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
5864{
5865 int rc;
5866 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
5867 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Terminating;
5868 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
5869 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5870 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
5871 if (RT_FAILURE(rc))
5872 {
5873 /* Signal a few more times before giving up. */
5874 int cTriesLeft = 5;
5875 while (--cTriesLeft > 0)
5876 {
5877 RTThreadUserSignal(pDevExt->hTscDeltaThread);
5878 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
5879 if (rc != VERR_TIMEOUT)
5880 break;
5881 }
5882 }
5883}
5884
5885
5886/**
5887 * Initializes and spawns the TSC-delta measurement thread.
5888 *
5889 * A thread is required for servicing re-measurement requests from events like
5890 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
5891 * under all contexts on all OSs.
5892 *
5893 * @returns VBox status code.
5894 * @param pDevExt Pointer to the device instance data.
5895 *
5896 * @remarks Must only be called -after- initializing GIP and setting up MP
5897 * notifications!
5898 */
5899static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
5900{
5901 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pDevExt->pGip));
5902
5903 int rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
5904 if (RT_SUCCESS(rc))
5905 {
5906 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
5907 if (RT_SUCCESS(rc))
5908 {
5909 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_Creating;
5910 pDevExt->cMsTscDeltaTimeout = 1;
5911 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
5912 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
5913 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
5914 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
5915 if (RT_SUCCESS(rc))
5916 {
5917 rc = supdrvTscDeltaThreadWait(pDevExt, kSupDrvTscDeltaState_Creating, kSupDrvTscDeltaState_Listening);
5918 if (RT_SUCCESS(rc))
5919 {
5920 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5921 return rc;
5922 }
5923
5924 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
5925 supdrvTscDeltaThreadTerminate(pDevExt);
5926 }
5927 else
5928 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
5929 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5930 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5931 }
5932 else
5933 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
5934 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5935 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5936 }
5937 else
5938 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
5939
5940 return rc;
5941}
5942
5943
5944/**
5945 * Terminates the TSC-delta measurement thread and cleanup.
5946 *
5947 * @param pDevExt Pointer to the device instance data.
5948 */
5949static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
5950{
5951 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
5952 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5953 {
5954 supdrvTscDeltaThreadTerminate(pDevExt);
5955 }
5956
5957 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
5958 {
5959 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
5960 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
5961 }
5962
5963 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
5964 {
5965 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
5966 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
5967 }
5968
5969 pDevExt->rcTscDelta = VERR_NOT_AVAILABLE;
5970}
5971
5972
5973/**
5974 * Waits for TSC-delta measurements to be completed for all online CPUs.
5975 *
5976 * @returns VBox status code.
5977 * @param pDevExt Pointer to the device instance data.
5978 */
5979static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
5980{
5981 int cTriesLeft = 5;
5982 int cMsTotalWait;
5983 int cMsWaited = 0;
5984 int cMsWaitGranularity = 1;
5985
5986 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5987 AssertReturn(pGip, VERR_INVALID_POINTER);
5988
5989 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 2, 150);
5990 while (cTriesLeft-- > 0)
5991 {
5992 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
5993 return VINF_SUCCESS;
5994 RTThreadSleep(cMsWaitGranularity);
5995 cMsWaited += cMsWaitGranularity;
5996 if (cMsWaited >= cMsTotalWait)
5997 break;
5998 }
5999
6000 return VERR_TIMEOUT;
6001}
6002#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
6003
6004
6005/**
6006 * Measures the TSC frequency of the system.
6007 *
6008 * Uses a busy-wait method for the async. case as it is intended to help push
6009 * the CPU frequency up, while for the invariant cases using a sleeping method.
6010 *
6011 * The TSC frequency can vary on systems which are not reported as invariant.
6012 * On such systems the object of this function is to find out what the nominal,
6013 * maximum TSC frequency under 'normal' CPU operation.
6014 *
6015 * @returns VBox status code.
6016 * @param pDevExt Pointer to the device instance.
6017 *
6018 * @remarks Must be called only -after- measuring the TSC deltas.
6019 */
6020static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
6021{
6022 int cTriesLeft = 4;
6023 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6024
6025 /* Assert order. */
6026 AssertReturn(pGip, VERR_INVALID_PARAMETER);
6027 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
6028
6029 while (cTriesLeft-- > 0)
6030 {
6031 RTCCUINTREG uFlags;
6032 uint64_t u64NanoTsBefore;
6033 uint64_t u64NanoTsAfter;
6034 uint64_t u64TscBefore;
6035 uint64_t u64TscAfter;
6036 uint8_t idApicBefore;
6037 uint8_t idApicAfter;
6038
6039 /*
6040 * Synchronize with the host OS clock tick before reading the TSC.
6041 * Especially important on Windows where the granularity is terrible.
6042 */
6043 u64NanoTsBefore = RTTimeSystemNanoTS();
6044 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
6045 ASMNopPause();
6046
6047 uFlags = ASMIntDisableFlags();
6048 idApicBefore = ASMGetApicId();
6049 u64TscBefore = ASMReadTSC();
6050 u64NanoTsBefore = RTTimeSystemNanoTS();
6051 ASMSetFlags(uFlags);
6052
6053 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6054 {
6055 /*
6056 * Sleep-wait since the TSC frequency is constant, it eases host load.
6057 * Shorter interval produces more variance in the frequency (esp. Windows).
6058 */
6059 RTThreadSleep(200);
6060 u64NanoTsAfter = RTTimeSystemNanoTS();
6061 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
6062 ASMNopPause();
6063 u64NanoTsAfter = RTTimeSystemNanoTS();
6064 }
6065 else
6066 {
6067 /* Busy-wait keeping the frequency up and measure. */
6068 for (;;)
6069 {
6070 u64NanoTsAfter = RTTimeSystemNanoTS();
6071 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
6072 ASMNopPause();
6073 else
6074 break;
6075 }
6076 }
6077
6078 uFlags = ASMIntDisableFlags();
6079 idApicAfter = ASMGetApicId();
6080 u64TscAfter = ASMReadTSC();
6081 ASMSetFlags(uFlags);
6082
6083 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6084 {
6085 int rc;
6086 bool fAppliedBefore;
6087 bool fAppliedAfter;
6088 rc = SUPTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
6089 rc = SUPTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
6090
6091 if ( !fAppliedBefore
6092 || !fAppliedAfter)
6093 {
6094#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6095 /*
6096 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
6097 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
6098 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
6099 * proceed. This should be triggered just once if we're rather unlucky.
6100 */
6101 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6102 if (rc == VERR_TIMEOUT)
6103 {
6104 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
6105 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6106 }
6107#else
6108 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
6109 idApicBefore, idApicAfter, cTriesLeft);
6110#endif
6111 continue;
6112 }
6113 }
6114
6115 /*
6116 * Update GIP.
6117 */
6118 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
6119 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6120 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6121 return VINF_SUCCESS;
6122 }
6123
6124 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
6125}
6126
6127
6128/**
6129 * Timer callback function for TSC frequency refinement in invariant GIP mode.
6130 *
6131 * @param pTimer The timer.
6132 * @param pvUser Opaque pointer to the GIP.
6133 * @param iTick The timer tick.
6134 */
6135static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6136{
6137 uint8_t idApic;
6138 uint64_t u64DeltaNanoTS;
6139 uint64_t u64DeltaTsc;
6140 uint64_t u64NanoTS;
6141 uint64_t u64Tsc;
6142 RTCCUINTREG uFlags;
6143 bool fDeltaApplied = false;
6144 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser;
6145
6146 /* Paranoia. */
6147 Assert(pGip);
6148 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
6149
6150 u64NanoTS = RTTimeSystemNanoTS();
6151 while (RTTimeSystemNanoTS() == u64NanoTS)
6152 ASMNopPause();
6153 uFlags = ASMIntDisableFlags();
6154 idApic = ASMGetApicId();
6155 u64Tsc = ASMReadTSC();
6156 u64NanoTS = RTTimeSystemNanoTS();
6157 ASMSetFlags(uFlags);
6158 SUPTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
6159 u64DeltaNanoTS = u64NanoTS - g_u64NanoTSAnchor;
6160 u64DeltaTsc = u64Tsc - g_u64TscAnchor;
6161
6162 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6163 && !fDeltaApplied)
6164 {
6165 SUPR0Printf("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
6166 GIP_TSC_REFINE_INTERVAL);
6167 return;
6168 }
6169
6170 /* Calculate the TSC frequency. */
6171 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
6172 && u64DeltaNanoTS < UINT32_MAX)
6173 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, u64DeltaNanoTS);
6174 else
6175 {
6176 /* Try not to lose precision, the larger the interval the more likely we overflow. */
6177 if ( u64DeltaTsc < UINT64_MAX / RT_NS_100MS
6178 && u64DeltaNanoTS / 10 < UINT32_MAX)
6179 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_100MS, u64DeltaNanoTS / 10);
6180 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_10MS
6181 && u64DeltaNanoTS / 100 < UINT32_MAX)
6182 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_10MS, u64DeltaNanoTS / 100);
6183 else if ( u64DeltaTsc < UINT64_MAX / RT_NS_1MS
6184 && u64DeltaNanoTS / 1000 < UINT32_MAX)
6185 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1MS, u64DeltaNanoTS / 1000);
6186 else /* Screw it. */
6187 pGip->u64CpuHz = u64DeltaTsc / (u64DeltaNanoTS / RT_NS_1SEC_64);
6188 }
6189
6190 /* Update rest of GIP. */
6191 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
6192 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
6193}
6194
6195
6196/**
6197 * Starts the TSC-frequency refinement phase asynchronously.
6198 *
6199 * @param pDevExt Pointer to the device instance data.
6200 */
6201static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
6202{
6203 uint64_t u64NanoTS;
6204 RTCCUINTREG uFlags;
6205 uint8_t idApic;
6206 int rc;
6207 bool fDeltaApplied = false;
6208 PSUPGLOBALINFOPAGE pGip;
6209
6210 /* Validate. */
6211 Assert(pDevExt);
6212 Assert(pDevExt->pGip);
6213
6214 pGip = pDevExt->pGip;
6215 u64NanoTS = RTTimeSystemNanoTS();
6216 while (RTTimeSystemNanoTS() == u64NanoTS)
6217 ASMNopPause();
6218 uFlags = ASMIntDisableFlags();
6219 idApic = ASMGetApicId();
6220 g_u64TscAnchor = ASMReadTSC();
6221 g_u64NanoTSAnchor = RTTimeSystemNanoTS();
6222 ASMSetFlags(uFlags);
6223 SUPTscDeltaApply(pGip, &g_u64TscAnchor, idApic, &fDeltaApplied);
6224
6225#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6226 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
6227 && !fDeltaApplied)
6228 {
6229 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
6230 if (rc == VERR_TIMEOUT)
6231 {
6232 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
6233 return;
6234 }
6235 }
6236#endif
6237
6238 rc = RTTimerCreateEx(&g_pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pGip);
6239 if (RT_SUCCESS(rc))
6240 {
6241 /*
6242 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
6243 * interval as small as possible while gaining the most consistent and accurate frequency
6244 * (compared to what the host OS might have measured).
6245 *
6246 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
6247 * same TSC frequency whenever possible so we need to keep the interval short.
6248 */
6249 rc = RTTimerStart(g_pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
6250 AssertRC(rc);
6251 }
6252 else
6253 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
6254}
6255
6256
6257/**
6258 * Creates the GIP.
6259 *
6260 * @returns VBox status code.
6261 * @param pDevExt Instance data. GIP stuff may be updated.
6262 */
6263static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
6264{
6265 PSUPGLOBALINFOPAGE pGip;
6266 RTHCPHYS HCPhysGip;
6267 uint32_t u32SystemResolution;
6268 uint32_t u32Interval;
6269 uint32_t u32MinInterval;
6270 uint32_t uMod;
6271 unsigned cCpus;
6272 int rc;
6273
6274 LogFlow(("supdrvGipCreate:\n"));
6275
6276 /* Assert order. */
6277 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
6278 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
6279 Assert(!pDevExt->pGipTimer);
6280
6281 /*
6282 * Check the CPU count.
6283 */
6284 cCpus = RTMpGetArraySize();
6285 if ( cCpus > RTCPUSET_MAX_CPUS
6286 || cCpus > 256 /* ApicId is used for the mappings */)
6287 {
6288 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
6289 return VERR_TOO_MANY_CPUS;
6290 }
6291
6292 /*
6293 * Allocate a contiguous set of pages with a default kernel mapping.
6294 */
6295 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
6296 if (RT_FAILURE(rc))
6297 {
6298 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
6299 return rc;
6300 }
6301 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
6302 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
6303
6304 /*
6305 * Find a reasonable update interval and initialize the structure.
6306 */
6307 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
6308 * See @bugref{6710}. */
6309 u32MinInterval = RT_NS_10MS;
6310 u32SystemResolution = RTTimerGetSystemGranularity();
6311 u32Interval = u32MinInterval;
6312 uMod = u32MinInterval % u32SystemResolution;
6313 if (uMod)
6314 u32Interval += u32SystemResolution - uMod;
6315
6316 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
6317
6318 if (RT_UNLIKELY( g_fOsTscDeltasInSync
6319 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
6320 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
6321 {
6322 /* Basically invariant Windows boxes, should never be detected as async. */
6323 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
6324 return VERR_INTERNAL_ERROR_2;
6325 }
6326
6327#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6328 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6329 {
6330 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
6331 rc = supdrvTscDeltaThreadInit(pDevExt);
6332 }
6333#endif
6334 if (RT_SUCCESS(rc))
6335 {
6336 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
6337 if (RT_SUCCESS(rc))
6338 {
6339 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
6340 if (RT_SUCCESS(rc))
6341 {
6342 uint16_t iCpu;
6343#ifndef SUPDRV_USE_TSC_DELTA_THREAD
6344 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6345 {
6346 /*
6347 * Measure the TSC deltas now that we have MP notifications.
6348 */
6349 int cTries = 5;
6350 do
6351 {
6352 rc = supdrvMeasureTscDeltas(pDevExt, NULL /* pidxMaster */);
6353 if (rc != VERR_TRY_AGAIN)
6354 break;
6355 } while (--cTries > 0);
6356 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6357 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
6358 }
6359 else
6360 {
6361 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
6362 Assert(!pGip->aCPUs[iCpu].i64TSCDelta);
6363 }
6364#endif
6365 if (RT_SUCCESS(rc))
6366 {
6367 rc = supdrvGipMeasureTscFreq(pDevExt);
6368 if (RT_SUCCESS(rc))
6369 {
6370 /*
6371 * Create the timer.
6372 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
6373 */
6374 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
6375 {
6376 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
6377 pDevExt);
6378 if (rc == VERR_NOT_SUPPORTED)
6379 {
6380 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
6381 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
6382 }
6383 }
6384 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
6385 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */, supdrvGipSyncTimer, pDevExt);
6386 if (RT_SUCCESS(rc))
6387 {
6388 /*
6389 * We're good.
6390 */
6391 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
6392 g_pSUPGlobalInfoPage = pGip;
6393 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
6394 supdrvRefineTscFreq(pDevExt);
6395 return VINF_SUCCESS;
6396 }
6397 else
6398 {
6399 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
6400 Assert(!pDevExt->pGipTimer);
6401 }
6402 }
6403 else
6404 OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
6405 }
6406 else
6407 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureTscDeltas failed. rc=%Rrc\n", rc));
6408 }
6409 else
6410 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
6411 }
6412 else
6413 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
6414 }
6415 else
6416 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
6417
6418 supdrvGipDestroy(pDevExt);
6419 return rc;
6420}
6421
6422
6423/**
6424 * Terminates the GIP.
6425 *
6426 * @param pDevExt Instance data. GIP stuff may be updated.
6427 */
6428static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
6429{
6430 int rc;
6431#ifdef DEBUG_DARWIN_GIP
6432 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
6433 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
6434 pDevExt->pGipTimer, pDevExt->GipMemObj));
6435#endif
6436
6437 /*
6438 * Stop receiving MP notifications before tearing anything else down.
6439 */
6440 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
6441
6442#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6443 /*
6444 * Terminate the TSC-delta measurement thread and resources.
6445 */
6446 supdrvTscDeltaTerm(pDevExt);
6447#endif
6448
6449 /*
6450 * Destroy the TSC-refinement one-shot timer.
6451 */
6452 if (g_pTscRefineTimer)
6453 {
6454 RTTimerDestroy(g_pTscRefineTimer);
6455 g_pTscRefineTimer = NULL;
6456 }
6457
6458 /*
6459 * Invalid the GIP data.
6460 */
6461 if (pDevExt->pGip)
6462 {
6463 supdrvGipTerm(pDevExt->pGip);
6464 pDevExt->pGip = NULL;
6465 }
6466 g_pSUPGlobalInfoPage = NULL;
6467
6468 /*
6469 * Destroy the timer and free the GIP memory object.
6470 */
6471 if (pDevExt->pGipTimer)
6472 {
6473 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
6474 pDevExt->pGipTimer = NULL;
6475 }
6476
6477 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
6478 {
6479 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
6480 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
6481 }
6482
6483 /*
6484 * Finally, make sure we've release the system timer resolution request
6485 * if one actually succeeded and is still pending.
6486 */
6487 if (pDevExt->u32SystemTimerGranularityGrant)
6488 {
6489 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
6490 pDevExt->u32SystemTimerGranularityGrant = 0;
6491 }
6492}
6493
6494
6495/**
6496 * Timer callback function sync GIP mode.
6497 * @param pTimer The timer.
6498 * @param pvUser Opaque pointer to the device extension.
6499 * @param iTick The timer tick.
6500 */
6501static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6502{
6503 RTCCUINTREG uFlags;
6504 uint64_t u64TSC;
6505 uint64_t u64NanoTS;
6506 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6507 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6508
6509 /*
6510 * Synchronize with the host OS clock tick before reading the TSC.
6511 * Especially important on Windows where the granularity is terrible.
6512 */
6513 u64NanoTS = RTTimeSystemNanoTS();
6514 while (u64NanoTS == RTTimeSystemNanoTS())
6515 ASMNopPause();
6516
6517 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6518 u64TSC = ASMReadTSC();
6519 u64NanoTS = RTTimeSystemNanoTS();
6520
6521 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6522 {
6523 /*
6524 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
6525 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
6526 * affected a bit until we get proper TSC deltas than implementing options like
6527 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
6528 *
6529 * The likely hood of this happening is really low. On Windows, Linux timers
6530 * fire on the CPU they were registered/started on. Darwin, Solaris need verification.
6531 */
6532 Assert(!ASMIntAreEnabled());
6533 SUPTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
6534 }
6535
6536 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
6537
6538 ASMSetFlags(uFlags);
6539}
6540
6541
6542/**
6543 * Timer callback function for async GIP mode.
6544 * @param pTimer The timer.
6545 * @param pvUser Opaque pointer to the device extension.
6546 * @param iTick The timer tick.
6547 */
6548static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
6549{
6550 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
6551 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6552 RTCPUID idCpu = RTMpCpuId();
6553 uint64_t u64TSC = ASMReadTSC();
6554 uint64_t NanoTS = RTTimeSystemNanoTS();
6555
6556 /** @todo reset the transaction number and whatnot when iTick == 1. */
6557 if (pDevExt->idGipMaster == idCpu)
6558 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
6559 else
6560 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
6561
6562 ASMSetFlags(fOldFlags);
6563}
6564
6565
6566/**
6567 * Finds our (@a idCpu) entry, or allocates a new one if not found.
6568 *
6569 * @returns Index of the CPU in the cache set.
6570 * @param pGip The GIP.
6571 * @param idCpu The CPU ID.
6572 */
6573static uint32_t supdrvGipCpuIndexFromCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
6574{
6575 uint32_t i, cTries;
6576
6577 /*
6578 * ASSUMES that CPU IDs are constant.
6579 */
6580 for (i = 0; i < pGip->cCpus; i++)
6581 if (pGip->aCPUs[i].idCpu == idCpu)
6582 return i;
6583
6584 cTries = 0;
6585 do
6586 {
6587 for (i = 0; i < pGip->cCpus; i++)
6588 {
6589 bool fRc;
6590 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
6591 if (fRc)
6592 return i;
6593 }
6594 } while (cTries++ < 32);
6595 AssertReleaseFailed();
6596 return i - 1;
6597}
6598
6599
6600/**
6601 * The calling CPU should be accounted as online, update GIP accordingly.
6602 *
6603 * This is used by supdrvGipMpEvent as well as the supdrvGipCreate.
6604 *
6605 * @param pDevExt The device extension.
6606 * @param idCpu The CPU ID.
6607 */
6608static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6609{
6610 int iCpuSet = 0;
6611 uint16_t idApic = UINT16_MAX;
6612 uint32_t i = 0;
6613 uint64_t u64NanoTS = 0;
6614 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6615
6616 AssertPtrReturnVoid(pGip);
6617 AssertRelease(idCpu == RTMpCpuId());
6618 Assert(pGip->cPossibleCpus == RTMpGetCount());
6619
6620 /*
6621 * Do this behind a spinlock with interrupts disabled as this can fire
6622 * on all CPUs simultaneously, see @bugref{6110}.
6623 */
6624 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6625
6626 /*
6627 * Update the globals.
6628 */
6629 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
6630 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
6631 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6632 if (iCpuSet >= 0)
6633 {
6634 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6635 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
6636 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
6637 }
6638
6639 /*
6640 * Update the entry.
6641 */
6642 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
6643 i = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
6644 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
6645 idApic = ASMGetApicId();
6646 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
6647 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
6648 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
6649
6650 /*
6651 * Update the APIC ID and CPU set index mappings.
6652 */
6653 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
6654 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
6655
6656 /* Update the Mp online/offline counter. */
6657 ASMAtomicIncU32(&g_cMpOnOffEvents);
6658
6659#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6660 /*
6661 * Add this CPU to the set of CPUs that require their TSC delta to be measured.
6662 *
6663 * We cannot poke the TSC-delta measurement thread from this context (on all OSs), so we only
6664 * update the state and it'll get serviced when the thread's listening interval times out.
6665 */
6666 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6667 {
6668 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
6669 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
6670 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
6671 {
6672 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, idCpu);
6673 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
6674 }
6675 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
6676 }
6677#endif
6678
6679 /* commit it */
6680 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
6681
6682 RTSpinlockRelease(pDevExt->hGipSpinlock);
6683}
6684
6685
6686/**
6687 * The CPU should be accounted as offline, update the GIP accordingly.
6688 *
6689 * This is used by supdrvGipMpEvent.
6690 *
6691 * @param pDevExt The device extension.
6692 * @param idCpu The CPU ID.
6693 */
6694static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
6695{
6696 int iCpuSet;
6697 unsigned i;
6698
6699 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6700
6701 AssertPtrReturnVoid(pGip);
6702 RTSpinlockAcquire(pDevExt->hGipSpinlock);
6703
6704 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
6705 AssertReturnVoid(iCpuSet >= 0);
6706
6707 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
6708 AssertReturnVoid(i < pGip->cCpus);
6709 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
6710
6711 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
6712 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
6713
6714 /* Update the Mp online/offline counter. */
6715 ASMAtomicIncU32(&g_cMpOnOffEvents);
6716
6717 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
6718 if (ASMAtomicReadU32(&g_idTscDeltaInitiator) == idCpu)
6719 {
6720 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6721 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
6722 }
6723
6724 /* Reset the TSC delta, we will recalculate it lazily. */
6725 if (GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
6726 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
6727
6728#ifdef SUPDRV_USE_TSC_DELTA_THREAD
6729 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
6730 if (supdrvIsInvariantTsc())
6731 RTCpuSetDel(&pDevExt->TscDeltaObtainedCpuSet, idCpu);
6732#endif
6733
6734 /* commit it */
6735 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
6736
6737 RTSpinlockRelease(pDevExt->hGipSpinlock);
6738}
6739
6740
6741/**
6742 * Multiprocessor event notification callback.
6743 *
6744 * This is used to make sure that the GIP master gets passed on to
6745 * another CPU. It also updates the associated CPU data.
6746 *
6747 * @param enmEvent The event.
6748 * @param idCpu The cpu it applies to.
6749 * @param pvUser Pointer to the device extension.
6750 *
6751 * @remarks This function -must- fire on the newly online'd CPU for the
6752 * RTMPEVENT_ONLINE case and can fire on any CPU for the
6753 * RTMPEVENT_OFFLINE case.
6754 */
6755static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
6756{
6757 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
6758 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
6759
6760 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6761
6762 /*
6763 * Update the GIP CPU data.
6764 */
6765 if (pGip)
6766 {
6767 switch (enmEvent)
6768 {
6769 case RTMPEVENT_ONLINE:
6770 AssertRelease(idCpu == RTMpCpuId());
6771 supdrvGipMpEventOnline(pDevExt, idCpu);
6772 break;
6773 case RTMPEVENT_OFFLINE:
6774 supdrvGipMpEventOffline(pDevExt, idCpu);
6775 break;
6776 }
6777 }
6778
6779 /*
6780 * Make sure there is a master GIP.
6781 */
6782 if (enmEvent == RTMPEVENT_OFFLINE)
6783 {
6784 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
6785 if (idGipMaster == idCpu)
6786 {
6787 /*
6788 * Find a new GIP master.
6789 */
6790 bool fIgnored;
6791 unsigned i;
6792 int64_t iTSCDelta;
6793 uint32_t idxNewGipMaster;
6794 RTCPUID idNewGipMaster = NIL_RTCPUID;
6795 RTCPUSET OnlineCpus;
6796 RTMpGetOnlineSet(&OnlineCpus);
6797
6798 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
6799 {
6800 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
6801 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
6802 && idCurCpu != idGipMaster)
6803 {
6804 idNewGipMaster = idCurCpu;
6805 break;
6806 }
6807 }
6808
6809 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
6810 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
6811 NOREF(fIgnored);
6812
6813 /*
6814 * Adjust all the TSC deltas against the new GIP master.
6815 */
6816 if (pGip)
6817 {
6818 idxNewGipMaster = supdrvGipCpuIndexFromCpuId(pGip, idNewGipMaster);
6819 iTSCDelta = pGip->aCPUs[idxNewGipMaster].i64TSCDelta;
6820 Assert(iTSCDelta != INT64_MAX);
6821 for (i = 0; i < pGip->cCpus; i++)
6822 {
6823 PSUPGIPCPU pGipCpu = &pGip->aCPUs[i];
6824 int64_t iWorkerDelta = pGipCpu->i64TSCDelta;
6825 if (iWorkerDelta != INT64_MAX)
6826 iWorkerDelta -= iTSCDelta;
6827 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, iWorkerDelta);
6828 }
6829 Assert(pGip->aCPUs[idxNewGipMaster].i64TSCDelta == 0);
6830 }
6831 }
6832 }
6833}
6834
6835
6836/**
6837 * Callback used by supdrvMeasureTscDeltas() to read the TSC on two CPUs and
6838 * compute the delta between them.
6839 *
6840 * @param idCpu The CPU we are current scheduled on.
6841 * @param pvUser1 Opaque pointer to the GIP.
6842 * @param pvUser2 Opaque pointer to the worker Cpu Id.
6843 *
6844 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
6845 * read the TSC at exactly the same time on both the master and the worker
6846 * CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
6847 * pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
6848 * try to minimize the measurement error by computing the minimum read time
6849 * of the compare statement in the worker by taking TSC measurements across
6850 * it.
6851 *
6852 * We ignore the first few runs of the loop in order to prime the cache.
6853 * Also, be careful about using 'pause' instruction in critical busy-wait
6854 * loops in this code - it can cause undesired behaviour with
6855 * hyperthreading.
6856 *
6857 * It must be noted that the computed minimum read time is mostly to
6858 * eliminate huge deltas when the worker is too early and doesn't by itself
6859 * help produce more accurate deltas. We allow two times the computed
6860 * minimum as an arbibtrary acceptable threshold. Therefore, it is still
6861 * possible to get negative deltas where there are none when the worker is
6862 * earlier. As long as these occasional negative deltas are lower than the
6863 * time it takes to exit guest-context and the OS to reschedule EMT on a
6864 * different CPU we won't expose a TSC that jumped backwards. It is because
6865 * of the existence of the negative deltas we don't recompute the delta with
6866 * the master and worker interchanged to eliminate the remaining measurement
6867 * error.
6868 */
6869static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
6870{
6871 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
6872 uint32_t *pidWorker = (uint32_t *)pvUser2;
6873 RTCPUID idMaster = ASMAtomicUoReadU32(&g_idTscDeltaInitiator);
6874 unsigned idxMaster = supdrvGipCpuIndexFromCpuId(pGip, idMaster);
6875 unsigned idxWorker = supdrvGipCpuIndexFromCpuId(pGip, *pidWorker);
6876 PSUPGIPCPU pGipCpuMaster = &pGip->aCPUs[idxMaster];
6877 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
6878 int cTriesLeft = 12;
6879
6880 if ( idCpu != idMaster
6881 && idCpu != *pidWorker)
6882 return;
6883
6884 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
6885 with a timeout to avoid deadlocking the entire system. */
6886 if (!RTMpOnAllIsConcurrentSafe())
6887 {
6888 /** @todo This was introduced for Windows, but since Windows doesn't use this
6889 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
6890 * see @bugref{6710} comment 81), eventually phase it out. */
6891 uint64_t uTscNow;
6892 uint64_t uTscStart;
6893 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
6894
6895 ASMSerializeInstruction();
6896 uTscStart = ASMReadTSC();
6897 if (idCpu == idMaster)
6898 {
6899 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
6900 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
6901 {
6902 ASMSerializeInstruction();
6903 uTscNow = ASMReadTSC();
6904 if (uTscNow - uTscStart > cWaitTicks)
6905 {
6906 /* Set the worker delta to indicate failure, not the master. */
6907 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6908 return;
6909 }
6910
6911 ASMNopPause();
6912 }
6913 }
6914 else
6915 {
6916 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
6917 {
6918 ASMSerializeInstruction();
6919 uTscNow = ASMReadTSC();
6920 if (uTscNow - uTscStart > cWaitTicks)
6921 {
6922 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
6923 return;
6924 }
6925
6926 ASMNopPause();
6927 }
6928 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
6929 }
6930 }
6931
6932 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
6933 while (cTriesLeft-- > 0)
6934 {
6935 unsigned i;
6936 uint64_t uMinCmpReadTime = UINT64_MAX;
6937 for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
6938 {
6939 if (idCpu == idMaster)
6940 {
6941 /*
6942 * The master.
6943 */
6944 RTCCUINTREG uFlags;
6945 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6946 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
6947
6948 /* Disable interrupts only in the master for as short a period
6949 as possible, thanks again to Windows. See @bugref{6710} comment #73. */
6950 uFlags = ASMIntDisableFlags();
6951
6952 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
6953 ;
6954
6955 do
6956 {
6957 ASMSerializeInstruction();
6958 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
6959 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6960
6961 ASMSetFlags(uFlags);
6962
6963 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
6964 ;
6965
6966 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
6967 {
6968 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
6969 {
6970 int64_t iDelta = pGipCpuWorker->u64TSCSample - pGipCpuMaster->u64TSCSample;
6971 if (iDelta < pGipCpuWorker->i64TSCDelta)
6972 pGipCpuWorker->i64TSCDelta = iDelta;
6973 }
6974 }
6975
6976 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
6977 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
6978 }
6979 else
6980 {
6981 /*
6982 * The worker.
6983 */
6984 uint64_t uTscWorker;
6985 uint64_t uTscWorkerFlushed;
6986 uint64_t uCmpReadTime;
6987
6988 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
6989 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
6990 ;
6991 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
6992 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
6993
6994 /*
6995 * Keep reading the TSC until we notice that the master has read his. Reading
6996 * the TSC -after- the master has updated the memory is way too late. We thus
6997 * compensate by trying to measure how long it took for the worker to notice
6998 * the memory flushed from the master.
6999 */
7000 do
7001 {
7002 ASMSerializeInstruction();
7003 uTscWorker = ASMReadTSC();
7004 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
7005 ASMSerializeInstruction();
7006 uTscWorkerFlushed = ASMReadTSC();
7007
7008 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
7009 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
7010 {
7011 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
7012 if (uCmpReadTime < (uMinCmpReadTime << 1))
7013 {
7014 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
7015 if (uCmpReadTime < uMinCmpReadTime)
7016 uMinCmpReadTime = uCmpReadTime;
7017 }
7018 else
7019 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
7020 }
7021 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
7022 {
7023 if (uCmpReadTime < uMinCmpReadTime)
7024 uMinCmpReadTime = uCmpReadTime;
7025 }
7026
7027 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
7028 while (ASMAtomicReadU32(&g_pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
7029 ASMNopPause();
7030 }
7031 }
7032
7033 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
7034 break;
7035 }
7036}
7037
7038
7039/**
7040 * Clears all TSCs on the per-CPUs GIP struct. as well as the delta
7041 * synchronization variable. Optionally also clears the deltas on the per-CPU
7042 * GIP struct. as well.
7043 *
7044 * @param pGip Pointer to the GIP.
7045 * @param fClearDeltas Whether the deltas are also to be cleared.
7046 */
7047DECLINLINE(void) supdrvClearTscSamples(PSUPGLOBALINFOPAGE pGip, bool fClearDeltas)
7048{
7049 unsigned iCpu;
7050 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7051 {
7052 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7053 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
7054 if (fClearDeltas)
7055 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
7056 }
7057 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7058}
7059
7060
7061/**
7062 * Measures the TSC delta between the master GIP CPU and one specified worker
7063 * CPU.
7064 *
7065 * @returns VBox status code.
7066 * @param pDevExt Pointer to the device instance data.
7067 * @param idxWorker The index of the worker CPU from the GIP's array of
7068 * CPUs.
7069 *
7070 * @remarks This can be called with preemption disabled!
7071 */
7072static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
7073{
7074 int rc;
7075 PSUPGLOBALINFOPAGE pGip;
7076 PSUPGIPCPU pGipCpuWorker;
7077 RTCPUID idMaster;
7078
7079 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7080 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7081
7082 pGip = pDevExt->pGip;
7083 idMaster = pDevExt->idGipMaster;
7084 pGipCpuWorker = &pGip->aCPUs[idxWorker];
7085
7086 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
7087
7088 if (pGipCpuWorker->idCpu == idMaster)
7089 {
7090 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, 0);
7091 return VINF_SUCCESS;
7092 }
7093
7094 /* Set the master TSC as the initiator. */
7095 while (ASMAtomicCmpXchgU32(&g_idTscDeltaInitiator, idMaster, NIL_RTCPUID) == false)
7096 {
7097 /*
7098 * Sleep here rather than spin as there is a parallel measurement
7099 * being executed and that can take a good while to be done.
7100 */
7101 RTThreadSleep(1);
7102 }
7103
7104 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7105 {
7106 /* Fire TSC-read workers on all CPUs but only synchronize between master and one worker to ease memory contention. */
7107 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
7108 ASMAtomicWriteU32(&g_pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
7109 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, pGip, &pGipCpuWorker->idCpu);
7110 if (RT_SUCCESS(rc))
7111 {
7112 if (RT_UNLIKELY(pGipCpuWorker->i64TSCDelta == INT64_MAX))
7113 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
7114 }
7115 }
7116 else
7117 rc = VERR_CPU_OFFLINE;
7118
7119 ASMAtomicWriteU32(&g_idTscDeltaInitiator, NIL_RTCPUID);
7120 return rc;
7121}
7122
7123
7124/**
7125 * Measures the TSC deltas between CPUs.
7126 *
7127 * @param pDevExt Pointer to the device instance data.
7128 * @param pidxMaster Where to store the index of the chosen master TSC if we
7129 * managed to determine the TSC deltas successfully.
7130 * Optional, can be NULL.
7131 *
7132 * @returns VBox status code.
7133 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
7134 * idCpu, GIP's online CPU set which are populated in
7135 * supdrvGipInitOnCpu().
7136 */
7137static int supdrvMeasureTscDeltas(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
7138{
7139 PSUPGIPCPU pGipCpuMaster;
7140 unsigned iCpu;
7141 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7142 uint32_t idxMaster = UINT32_MAX;
7143 int rc = VINF_SUCCESS;
7144 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&g_cMpOnOffEvents);
7145 uint32_t cOnlineCpus = pGip->cOnlineCpus;
7146
7147 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
7148
7149 /*
7150 * Pick the first CPU online as the master TSC and make it the new GIP master based
7151 * on the APIC ID.
7152 *
7153 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
7154 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
7155 * master as this point since the sync/async timer isn't created yet.
7156 */
7157 supdrvClearTscSamples(pGip, true /* fClearDeltas */);
7158 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
7159 {
7160 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
7161 if (idxCpu != UINT16_MAX)
7162 {
7163 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
7164 if (RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpu->idCpu))
7165 {
7166 idxMaster = idxCpu;
7167 pGipCpu->i64TSCDelta = 0;
7168 break;
7169 }
7170 }
7171 }
7172 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
7173 pGipCpuMaster = &pGip->aCPUs[idxMaster];
7174 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7175
7176 AssertReturn(cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
7177 if (pGip->cOnlineCpus <= 1)
7178 {
7179 if (pidxMaster)
7180 *pidxMaster = idxMaster;
7181 return VINF_SUCCESS;
7182 }
7183
7184 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7185 {
7186 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7187 if ( iCpu != idxMaster
7188 && RTCpuSetIsMember(&pGip->OnlineCpuSet, pGipCpuWorker->idCpu))
7189 {
7190 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7191 if (RT_FAILURE(rc))
7192 {
7193 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
7194 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
7195 break;
7196 }
7197
7198 if (ASMAtomicReadU32(&g_cMpOnOffEvents) != cMpOnOffEvents)
7199 {
7200 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retrying...\n");
7201 rc = VERR_TRY_AGAIN;
7202 break;
7203 }
7204 }
7205 }
7206
7207 if ( RT_SUCCESS(rc)
7208 && !pGipCpuMaster->i64TSCDelta
7209 && pidxMaster)
7210 {
7211 *pidxMaster = idxMaster;
7212 }
7213 return rc;
7214}
7215
7216
7217/**
7218 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
7219 *
7220 * @param idCpu Ignored.
7221 * @param pvUser1 Where to put the TSC.
7222 * @param pvUser2 Ignored.
7223 */
7224static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7225{
7226 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
7227}
7228
7229
7230/**
7231 * Determine if Async GIP mode is required because of TSC drift.
7232 *
7233 * When using the default/normal timer code it is essential that the time stamp counter
7234 * (TSC) runs never backwards, that is, a read operation to the counter should return
7235 * a bigger value than any previous read operation. This is guaranteed by the latest
7236 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
7237 * case we have to choose the asynchronous timer mode.
7238 *
7239 * @param poffMin Pointer to the determined difference between different
7240 * cores (optional, can be NULL).
7241 * @return false if the time stamp counters appear to be synchronized, true otherwise.
7242 */
7243static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
7244{
7245 /*
7246 * Just iterate all the cpus 8 times and make sure that the TSC is
7247 * ever increasing. We don't bother taking TSC rollover into account.
7248 */
7249 int iEndCpu = RTMpGetArraySize();
7250 int iCpu;
7251 int cLoops = 8;
7252 bool fAsync = false;
7253 int rc = VINF_SUCCESS;
7254 uint64_t offMax = 0;
7255 uint64_t offMin = ~(uint64_t)0;
7256 uint64_t PrevTsc = ASMReadTSC();
7257
7258 while (cLoops-- > 0)
7259 {
7260 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
7261 {
7262 uint64_t CurTsc;
7263 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
7264 if (RT_SUCCESS(rc))
7265 {
7266 if (CurTsc <= PrevTsc)
7267 {
7268 fAsync = true;
7269 offMin = offMax = PrevTsc - CurTsc;
7270 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
7271 iCpu, cLoops, CurTsc, PrevTsc));
7272 break;
7273 }
7274
7275 /* Gather statistics (except the first time). */
7276 if (iCpu != 0 || cLoops != 7)
7277 {
7278 uint64_t off = CurTsc - PrevTsc;
7279 if (off < offMin)
7280 offMin = off;
7281 if (off > offMax)
7282 offMax = off;
7283 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
7284 }
7285
7286 /* Next */
7287 PrevTsc = CurTsc;
7288 }
7289 else if (rc == VERR_NOT_SUPPORTED)
7290 break;
7291 else
7292 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
7293 }
7294
7295 /* broke out of the loop. */
7296 if (iCpu < iEndCpu)
7297 break;
7298 }
7299
7300 if (poffMin)
7301 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
7302 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
7303 fAsync, iEndCpu, rc, offMin, offMax));
7304#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
7305 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
7306#endif
7307 return fAsync;
7308}
7309
7310
7311/**
7312 * Determine the GIP TSC mode.
7313 *
7314 * @returns The most suitable TSC mode.
7315 * @param pDevExt Pointer to the device instance data.
7316 */
7317static SUPGIPMODE supdrvGipDetermineTscMode(PSUPDRVDEVEXT pDevExt)
7318{
7319 /* Trust CPUs that declare their TSC to be invariant. */
7320 if (supdrvIsInvariantTsc())
7321 return SUPGIPMODE_INVARIANT_TSC;
7322
7323 /*
7324 * Without invariant CPU ID bit - On SMP we're faced with two problems:
7325 * (1) There might be a skew between the CPU, so that cpu0
7326 * returns a TSC that is slightly different from cpu1.
7327 * (2) Power management (and other things) may cause the TSC
7328 * to run at a non-constant speed, and cause the speed
7329 * to be different on the cpus. This will result in (1).
7330 *
7331 * So, on SMP systems we'll have to select the ASYNC update method
7332 * if there are symptoms of these problems.
7333 */
7334 if (RTMpGetCount() > 1)
7335 {
7336 uint32_t uEAX, uEBX, uECX, uEDX;
7337 uint64_t u64DiffCoresIgnored;
7338
7339 /* Permit the user and/or the OS specific bits to force async mode. */
7340 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
7341 return SUPGIPMODE_ASYNC_TSC;
7342
7343 /* Try check for current differences between the cpus. */
7344 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
7345 return SUPGIPMODE_ASYNC_TSC;
7346
7347 /*
7348 * If the CPU supports power management and is an AMD one we
7349 * won't trust it unless it has the TscInvariant bit is set.
7350 */
7351 /** @todo this is now redundant. remove later. */
7352 /* Check for "AuthenticAMD" */
7353 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
7354 if ( uEAX >= 1
7355 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
7356 {
7357 /* Check for APM support and that TscInvariant is cleared. */
7358 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
7359 if (uEAX >= 0x80000007)
7360 {
7361 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
7362 if ( !(uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) /* TscInvariant */
7363 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
7364 return SUPGIPMODE_ASYNC_TSC;
7365 }
7366 }
7367 }
7368 return SUPGIPMODE_SYNC_TSC;
7369}
7370
7371
7372/**
7373 * Initializes per-CPU GIP information.
7374 *
7375 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7376 * @param pCpu Pointer to which GIP CPU to initalize.
7377 * @param u64NanoTS The current nanosecond timestamp.
7378 */
7379static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
7380{
7381 pCpu->u32TransactionId = 2;
7382 pCpu->u64NanoTS = u64NanoTS;
7383 pCpu->u64TSC = ASMReadTSC();
7384 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
7385 pCpu->i64TSCDelta = g_fOsTscDeltasInSync ? 0 : INT64_MAX;
7386
7387 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
7388 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
7389 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
7390 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
7391
7392 /*
7393 * We don't know the following values until we've executed updates.
7394 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
7395 * the 2nd timer callout.
7396 */
7397 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
7398 pCpu->u32UpdateIntervalTSC
7399 = pCpu->au32TSCHistory[0]
7400 = pCpu->au32TSCHistory[1]
7401 = pCpu->au32TSCHistory[2]
7402 = pCpu->au32TSCHistory[3]
7403 = pCpu->au32TSCHistory[4]
7404 = pCpu->au32TSCHistory[5]
7405 = pCpu->au32TSCHistory[6]
7406 = pCpu->au32TSCHistory[7]
7407 = (uint32_t)(_4G / pGip->u32UpdateHz);
7408}
7409
7410
7411/**
7412 * Initializes the GIP data.
7413 *
7414 * @param pDevExt Pointer to the device instance data.
7415 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7416 * @param HCPhys The physical address of the GIP.
7417 * @param u64NanoTS The current nanosecond timestamp.
7418 * @param uUpdateHz The update frequency.
7419 * @param uUpdateIntervalNS The update interval in nanoseconds.
7420 * @param cCpus The CPU count.
7421 */
7422static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
7423 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
7424{
7425 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
7426 unsigned i;
7427#ifdef DEBUG_DARWIN_GIP
7428 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7429#else
7430 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
7431#endif
7432
7433 /*
7434 * Record whether the host OS has already normalized inter-CPU deltas for the hardware TSC.
7435 * We only bother with TSC-deltas only on invariant CPUs for now.
7436 */
7437 g_fOsTscDeltasInSync = supdrvIsInvariantTsc() && supdrvOSAreTscDeltasInSync();
7438
7439 /*
7440 * Initialize the structure.
7441 */
7442 memset(pGip, 0, cbGip);
7443 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
7444 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
7445 pGip->u32Mode = supdrvGipDetermineTscMode(pDevExt);
7446 pGip->cCpus = (uint16_t)cCpus;
7447 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
7448 pGip->u32UpdateHz = uUpdateHz;
7449 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
7450 RTCpuSetEmpty(&pGip->OnlineCpuSet);
7451 RTCpuSetEmpty(&pGip->PresentCpuSet);
7452 RTMpGetSet(&pGip->PossibleCpuSet);
7453 pGip->cOnlineCpus = RTMpGetOnlineCount();
7454 pGip->cPresentCpus = RTMpGetPresentCount();
7455 pGip->cPossibleCpus = RTMpGetCount();
7456 pGip->idCpuMax = RTMpGetMaxCpuId();
7457 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
7458 pGip->aiCpuFromApicId[i] = UINT16_MAX;
7459 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
7460 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
7461
7462 for (i = 0; i < cCpus; i++)
7463 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS);
7464
7465 /*
7466 * Link it to the device extension.
7467 */
7468 pDevExt->pGip = pGip;
7469 pDevExt->HCPhysGip = HCPhys;
7470 pDevExt->cGipUsers = 0;
7471
7472 /*
7473 * Allocate the TSC delta sync. struct. on a separate cache line.
7474 */
7475 g_pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
7476 g_pTscDeltaSync = RT_ALIGN_PT(g_pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
7477 Assert(RT_ALIGN_PT(g_pTscDeltaSync, 64, PSUPTSCDELTASYNC) == g_pTscDeltaSync);
7478}
7479
7480
7481/**
7482 * On CPU initialization callback for RTMpOnAll.
7483 *
7484 * @param idCpu The CPU ID.
7485 * @param pvUser1 The device extension.
7486 * @param pvUser2 The GIP.
7487 */
7488static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
7489{
7490 /* This is good enough, even though it will update some of the globals a
7491 bit to much. */
7492 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
7493}
7494
7495
7496/**
7497 * Invalidates the GIP data upon termination.
7498 *
7499 * @param pGip Pointer to the read-write kernel mapping of the GIP.
7500 */
7501static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
7502{
7503 unsigned i;
7504 pGip->u32Magic = 0;
7505 for (i = 0; i < pGip->cCpus; i++)
7506 {
7507 pGip->aCPUs[i].u64NanoTS = 0;
7508 pGip->aCPUs[i].u64TSC = 0;
7509 pGip->aCPUs[i].iTSCHistoryHead = 0;
7510 pGip->aCPUs[i].u64TSCSample = 0;
7511 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
7512 }
7513
7514 if (g_pvTscDeltaSync)
7515 {
7516 RTMemFree(g_pvTscDeltaSync);
7517 g_pTscDeltaSync = NULL;
7518 g_pvTscDeltaSync = NULL;
7519 }
7520}
7521
7522
7523/**
7524 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
7525 * updates all the per cpu data except the transaction id.
7526 *
7527 * @param pDevExt The device extension.
7528 * @param pGipCpu Pointer to the per cpu data.
7529 * @param u64NanoTS The current time stamp.
7530 * @param u64TSC The current TSC.
7531 * @param iTick The current timer tick.
7532 *
7533 * @remarks Can be called with interrupts disabled!
7534 */
7535static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
7536{
7537 uint64_t u64TSCDelta;
7538 uint32_t u32UpdateIntervalTSC;
7539 uint32_t u32UpdateIntervalTSCSlack;
7540 unsigned iTSCHistoryHead;
7541 uint64_t u64CpuHz;
7542 uint32_t u32TransactionId;
7543
7544 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7545 AssertPtrReturnVoid(pGip);
7546
7547 /* Delta between this and the previous update. */
7548 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
7549
7550 /*
7551 * Update the NanoTS.
7552 */
7553 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
7554
7555 /*
7556 * Calc TSC delta.
7557 */
7558 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
7559 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
7560
7561 /* We don't need to keep realculating the frequency when it's invariant. */
7562 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
7563 return;
7564
7565 if (u64TSCDelta >> 32)
7566 {
7567 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
7568 pGipCpu->cErrors++;
7569 }
7570
7571 /*
7572 * On the 2nd and 3rd callout, reset the history with the current TSC
7573 * interval since the values entered by supdrvGipInit are totally off.
7574 * The interval on the 1st callout completely unreliable, the 2nd is a bit
7575 * better, while the 3rd should be most reliable.
7576 */
7577 u32TransactionId = pGipCpu->u32TransactionId;
7578 if (RT_UNLIKELY( ( u32TransactionId == 5
7579 || u32TransactionId == 7)
7580 && ( iTick == 2
7581 || iTick == 3) ))
7582 {
7583 unsigned i;
7584 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
7585 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
7586 }
7587
7588 /*
7589 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
7590 * Wait until we have at least one full history since the above history reset. The
7591 * assumption is that the majority of the previous history values will be tolerable.
7592 * See @bugref{6710} comment #67.
7593 */
7594 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
7595 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7596 {
7597 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
7598 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
7599 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
7600 {
7601 uint32_t u32;
7602 u32 = pGipCpu->au32TSCHistory[0];
7603 u32 += pGipCpu->au32TSCHistory[1];
7604 u32 += pGipCpu->au32TSCHistory[2];
7605 u32 += pGipCpu->au32TSCHistory[3];
7606 u32 >>= 2;
7607 u64TSCDelta = pGipCpu->au32TSCHistory[4];
7608 u64TSCDelta += pGipCpu->au32TSCHistory[5];
7609 u64TSCDelta += pGipCpu->au32TSCHistory[6];
7610 u64TSCDelta += pGipCpu->au32TSCHistory[7];
7611 u64TSCDelta >>= 2;
7612 u64TSCDelta += u32;
7613 u64TSCDelta >>= 1;
7614 }
7615 }
7616
7617
7618 /*
7619 * TSC History.
7620 */
7621 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
7622 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
7623 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
7624 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
7625
7626 /*
7627 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
7628 *
7629 * On Windows, we have an occasional (but recurring) sour value that messed up
7630 * the history but taking only 1 interval reduces the precision overall.
7631 * However, this problem existed before the invariant mode was introduced.
7632 */
7633 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
7634 || pGip->u32UpdateHz >= 1000)
7635 {
7636 uint32_t u32;
7637 u32 = pGipCpu->au32TSCHistory[0];
7638 u32 += pGipCpu->au32TSCHistory[1];
7639 u32 += pGipCpu->au32TSCHistory[2];
7640 u32 += pGipCpu->au32TSCHistory[3];
7641 u32 >>= 2;
7642 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
7643 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
7644 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
7645 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
7646 u32UpdateIntervalTSC >>= 2;
7647 u32UpdateIntervalTSC += u32;
7648 u32UpdateIntervalTSC >>= 1;
7649
7650 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
7651 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
7652 }
7653 else if (pGip->u32UpdateHz >= 90)
7654 {
7655 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7656 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
7657 u32UpdateIntervalTSC >>= 1;
7658
7659 /* value chosen on a 2GHz thinkpad running windows */
7660 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
7661 }
7662 else
7663 {
7664 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
7665
7666 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
7667 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
7668 }
7669 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
7670
7671 /*
7672 * CpuHz.
7673 */
7674 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
7675 u64CpuHz /= pGip->u32UpdateIntervalNS;
7676 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
7677}
7678
7679
7680/**
7681 * Updates the GIP.
7682 *
7683 * @param pDevExt The device extension.
7684 * @param u64NanoTS The current nanosecond timesamp.
7685 * @param u64TSC The current TSC timesamp.
7686 * @param idCpu The CPU ID.
7687 * @param iTick The current timer tick.
7688 *
7689 * @remarks Can be called with interrupts disabled!
7690 */
7691static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
7692{
7693 /*
7694 * Determine the relevant CPU data.
7695 */
7696 PSUPGIPCPU pGipCpu;
7697 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7698 AssertPtrReturnVoid(pGip);
7699
7700 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
7701 pGipCpu = &pGip->aCPUs[0];
7702 else
7703 {
7704 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
7705 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
7706 return;
7707 pGipCpu = &pGip->aCPUs[iCpu];
7708 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
7709 return;
7710 }
7711
7712 /*
7713 * Start update transaction.
7714 */
7715 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7716 {
7717 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
7718 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7719 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7720 pGipCpu->cErrors++;
7721 return;
7722 }
7723
7724 /*
7725 * Recalc the update frequency every 0x800th time.
7726 */
7727 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
7728 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
7729 {
7730 if (pGip->u64NanoTSLastUpdateHz)
7731 {
7732#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
7733 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
7734 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
7735 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
7736 {
7737 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
7738 * calculation on non-invariant hosts if it changes the history decision
7739 * taken in supdrvGipDoUpdateCpu(). */
7740 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
7741 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
7742 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
7743 }
7744#endif
7745 }
7746 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
7747 }
7748
7749 /*
7750 * Update the data.
7751 */
7752 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7753
7754 /*
7755 * Complete transaction.
7756 */
7757 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7758}
7759
7760
7761/**
7762 * Updates the per cpu GIP data for the calling cpu.
7763 *
7764 * @param pDevExt The device extension.
7765 * @param u64NanoTS The current nanosecond timesamp.
7766 * @param u64TSC The current TSC timesamp.
7767 * @param idCpu The CPU ID.
7768 * @param idApic The APIC id for the CPU index.
7769 * @param iTick The current timer tick.
7770 *
7771 * @remarks Can be called with interrupts disabled!
7772 */
7773static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
7774 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
7775{
7776 uint32_t iCpu;
7777 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
7778
7779 /*
7780 * Avoid a potential race when a CPU online notification doesn't fire on
7781 * the onlined CPU but the tick creeps in before the event notification is
7782 * run.
7783 */
7784 if (RT_UNLIKELY(iTick == 1))
7785 {
7786 iCpu = supdrvGipCpuIndexFromCpuId(pGip, idCpu);
7787 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
7788 supdrvGipMpEventOnline(pDevExt, idCpu);
7789 }
7790
7791 iCpu = pGip->aiCpuFromApicId[idApic];
7792 if (RT_LIKELY(iCpu < pGip->cCpus))
7793 {
7794 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
7795 if (pGipCpu->idCpu == idCpu)
7796 {
7797 /*
7798 * Start update transaction.
7799 */
7800 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
7801 {
7802 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
7803 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7804 pGipCpu->cErrors++;
7805 return;
7806 }
7807
7808 /*
7809 * Update the data.
7810 */
7811 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
7812
7813 /*
7814 * Complete transaction.
7815 */
7816 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
7817 }
7818 }
7819}
7820
7821
7822/**
7823 * Resume built-in keyboard on MacBook Air and Pro hosts.
7824 * If there is no built-in keyboard device, return success anyway.
7825 *
7826 * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
7827 */
7828static int supdrvIOCtl_ResumeSuspendedKbds(void)
7829{
7830#if defined(RT_OS_DARWIN)
7831 return supdrvDarwinResumeSuspendedKbds();
7832#else
7833 return VERR_NOT_IMPLEMENTED;
7834#endif
7835}
7836
7837
7838/**
7839 * Service a TSC-delta measurement request.
7840 *
7841 * @returns VBox status code.
7842 * @param pDevExt Pointer to the device instance data.
7843 * @param pReq Pointer to the TSC-delta measurement request.
7844 */
7845static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPTSCDELTAMEASURE pReq)
7846{
7847 PSUPGLOBALINFOPAGE pGip;
7848 RTCPUID idCpuWorker;
7849 int rc = VERR_CPU_NOT_FOUND;
7850 int16_t cTries;
7851 RTMSINTERVAL cMsWaitRetry;
7852 uint16_t iCpu;
7853
7854 /*
7855 * Validate.
7856 */
7857 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7858 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7859 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7860 idCpuWorker = pReq->u.In.idCpu;
7861 if (idCpuWorker == NIL_RTCPUID)
7862 return VERR_INVALID_CPU_ID;
7863
7864 if (!GIP_ARE_TSC_DELTAS_APPLICABLE(pGip))
7865 return VINF_SUCCESS;
7866
7867 cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
7868 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
7869 pGip = pDevExt->pGip;
7870 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
7871 {
7872 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
7873 if (pGipCpuWorker->idCpu == idCpuWorker)
7874 {
7875 if ( pGipCpuWorker->i64TSCDelta != INT64_MAX
7876 && !pReq->u.In.fForce)
7877 return VINF_SUCCESS;
7878
7879#ifdef SUPDRV_USE_TSC_DELTA_THREAD
7880 if (pReq->u.In.fAsync)
7881 {
7882 /** @todo Async. doesn't implement options like retries, waiting. We'll need
7883 * to pass those options to the thread somehow and implement it in the
7884 * thread. Check if anyone uses/needs fAsync before implementing this. */
7885 RTCpuSetAdd(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->idCpu);
7886 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
7887 if ( pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Listening
7888 || pDevExt->enmTscDeltaState == kSupDrvTscDeltaState_Measuring)
7889 {
7890 pDevExt->enmTscDeltaState = kSupDrvTscDeltaState_WaitAndMeasure;
7891 }
7892 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
7893 RTThreadUserSignal(pDevExt->hTscDeltaThread);
7894 return VINF_SUCCESS;
7895 }
7896#endif
7897
7898 while (cTries-- > 0)
7899 {
7900 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7901 if (RT_SUCCESS(rc))
7902 {
7903 Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
7904 break;
7905 }
7906
7907 if (cMsWaitRetry)
7908 RTThreadSleep(cMsWaitRetry);
7909 }
7910
7911 break;
7912 }
7913 }
7914 return rc;
7915}
7916
7917
7918/**
7919 * Reads the TSC and TSC-delta atomically, applies the TSC delta.
7920 *
7921 * @returns VBox status code.
7922 * @param pDevExt Pointer to the device instance data.
7923 * @param pReq Pointer to the TSC-read request.
7924 */
7925static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPTSCREAD pReq)
7926{
7927 uint64_t uTsc;
7928 uint16_t idApic;
7929 int16_t cTries;
7930 PSUPGLOBALINFOPAGE pGip;
7931 int rc;
7932
7933 /*
7934 * Validate.
7935 */
7936 AssertReturn(pDevExt, VERR_INVALID_PARAMETER);
7937 AssertReturn(pReq, VERR_INVALID_PARAMETER);
7938 AssertReturn(pDevExt->pGip, VERR_INVALID_PARAMETER);
7939 pGip = pDevExt->pGip;
7940
7941 cTries = 4;
7942 while (cTries-- > 0)
7943 {
7944 rc = SUPGetTsc(&uTsc, &idApic);
7945 if (RT_SUCCESS(rc))
7946 {
7947 pReq->u.Out.u64AdjustedTsc = uTsc;
7948 pReq->u.Out.idApic = idApic;
7949 return VINF_SUCCESS;
7950 }
7951 else
7952 {
7953 /* If we failed to have a TSC-delta, measurement the TSC-delta and retry. */
7954 int rc2;
7955 uint16_t iCpu;
7956 AssertMsgReturn(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId),
7957 ("idApic=%u ArraySize=%u\n", idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)), VERR_INVALID_CPU_INDEX);
7958 iCpu = pGip->aiCpuFromApicId[idApic];
7959 AssertMsgReturn(iCpu < pGip->cCpus, ("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus), VERR_INVALID_CPU_INDEX);
7960
7961 Assert(GIP_ARE_TSC_DELTAS_APPLICABLE(pGip));
7962 rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
7963 if (RT_SUCCESS(rc2))
7964 AssertReturn(pGip->aCPUs[iCpu].i64TSCDelta != INT64_MAX, VERR_INTERNAL_ERROR_2);
7965 }
7966 }
7967
7968 return rc;
7969}
7970
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette