VirtualBox

Changeset 54327 in vbox for trunk/src/VBox/HostDrivers


Ignore:
Timestamp:
Feb 20, 2015 1:35:30 PM (10 years ago)
Author:
vboxsync
Message:

SUPDrv: Split out the GIP related code into SUPDrvGip.cpp.

Location:
trunk/src/VBox/HostDrivers/Support
Files:
7 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/HostDrivers/Support/Makefile.kmk

    r54308 r54327  
    55
    66#
    7 # Copyright (C) 2006-2012 Oracle Corporation
     7# Copyright (C) 2006-2015 Oracle Corporation
    88#
    99# This file is part of VirtualBox Open Source Edition (OSE), as
     
    5555 ifdef VBOX_WITH_VBOXDRV
    5656  LIBRARIES       += SUPR0IdcClient
    57   SYSMODS.freebsd += vboxdrv
    5857  SYSMODS.os2     += VBoxDrv
    5958 endif
     
    519518        os2/SUPDrv-os2.cpp \
    520519        SUPDrv.c \
     520        SUPDrvGip.c \
    521521        SUPDrvSem.c \
    522522        SUPLibAll.cpp
    523523
    524524endif # os2
    525 ifeq ($(KBUILD_TARGET),freebsd)
    526 
    527 #
    528 # vboxdrv.ko - The FreeBSD Kernel Module.
    529 #
    530 vboxdrv_TEMPLATE      = VBOXR0DRV
    531 vboxdrv_DEFS          = IN_RT_R0 IN_SUP_R0 SUPDRV_WITH_RELEASE_LOGGER VBOX_SVN_REV=$(VBOX_SVN_REV)
    532 vboxdrv_INCS         := $(PATH_SUB_CURRENT)
    533 vboxdrv_LIBS          = $(PATH_STAGE_LIB)/RuntimeR0Drv$(VBOX_SUFF_LIB)
    534 vboxdrv_SOURCES      := \
    535         $(KBUILD_TARGET)/SUPDrv-$(KBUILD_TARGET).c \
    536         $(PATH_SUB_CURRENT)/$(KBUILD_TARGET)/SUPDrv-$(KBUILD_TARGET).def \
    537         SUPDrv.c \
    538         SUPDrvSem.c \
    539         SUPLibAll.cpp
    540 ## @todo the SUPDrv-freebsd.def is most probably gonna break it and require build system hacking...
    541 
    542 #
    543 # Targets for installing the freebsd sources.
    544 #
    545 vboxdrv-mod_INST    = bin/src/vboxdrv/
    546 vboxdrv-mod_SOURCES = \
    547         $(subst $(DQUOTE),,$(FILES_VBOXDRV_NOBIN)) \
    548         $(vboxdrv-mod_0_OUTDIR)/Makefile
    549 vboxdrv-mod_CLEAN   = \
    550         $(vboxdrv-mod_0_OUTDIR)/Makefile
    551 
    552 $$(vboxdrv-mod_0_OUTDIR)/Makefile: \
    553                 $(PATH_SUB_CURRENT)/freebsd/Makefile \
    554                 $$(if $$(eq $$(Support/freebsd/Makefile_VBOX_HARDENED),$$(VBOX_WITH_HARDENING)),,FORCE) \
    555                 | $$(dir $$@)
    556         $(call MSG_TOOL,Creating,,$@)
    557         $(QUIET)$(RM) -f -- $@
    558  ifndef VBOX_WITH_HARDENING
    559         $(QUIET)$(SED) -e "s;-DVBOX_WITH_HARDENING;;g" --output $@ $<
    560  else
    561         $(QUIET)$(CP) -f $< $@
    562  endif
    563 
    564 endif # freebsd
    565525
    566526
     
    568528# New VBoxDrv target. TODO: Convert all the above to use this!
    569529#
    570 if1of ($(KBUILD_TARGET), darwin linux solaris win)
     530if1of ($(KBUILD_TARGET), darwin freebsd linux solaris win)
    571531 ifdef VBOX_WITH_VBOXDRV
    572532  SYSMODS += VBoxDrv
     
    657617        SUPDrv.d \
    658618        SUPDrv.c \
     619        SUPDrvGip.cpp \
    659620        SUPDrvSem.c \
    660621        SUPDrvTracer.cpp \
     
    715676
    716677
    717 if1of ($(KBUILD_TARGET), linux)
     678if1of ($(KBUILD_TARGET), linux freebsd)
    718679 #
    719680 # Targets for installing the linux sources.
     
    724685        $(vboxdrv-mod_0_OUTDIR)/Makefile
    725686 vboxdrv-mod_EXEC_SOURCES  = \
    726         $(subst $(DQUOTE),,$(FILES_VBOXDRV_BIN)) \
    727         $(PATH_ROOT)/src/VBox/HostDrivers/linux/do_Module.symvers
     687        $(subst $(DQUOTE),,$(FILES_VBOXDRV_BIN))
     688 vboxdrv-mod_EXEC_SOURCES.linux = \
     689        $(PATH_ROOT)/src/VBox/HostDrivers/$(KBUILD_TARGET)/do_Module.symvers
    728690 vboxdrv-mod_CLEAN   = \
    729691        $(vboxdrv-mod_0_OUTDIR)/Makefile \
     
    733695 includedep $(PATH_TARGET)/vboxdrv-mod-1.dep
    734696 $$(vboxdrv-mod_0_OUTDIR)/Makefile: \
    735                 $(PATH_SUB_CURRENT)/linux/Makefile \
    736                 $$(if $$(eq $$(Support/linux/Makefile_VBOX_HARDENED),$$(VBOX_WITH_HARDENING)),,FORCE) \
     697                $(PATH_SUB_CURRENT)/$(KBUILD_TARGET)/Makefile \
     698                $$(if $$(eq $$(Support/$(KBUILD_TARGET)/Makefile_VBOX_HARDENED),$$(VBOX_WITH_HARDENING)),,FORCE) \
    737699                | $$(dir $$@)
    738700        $(call MSG_TOOL,Creating,,$@)
     
    742704        $(QUIET)$(CP) -f $< $@
    743705 endif
    744         %$(QUIET2)$(APPEND) -t '$(PATH_TARGET)/vboxdrv-mod-1.dep' 'Support/linux/Makefile_VBOX_HARDENED=$(VBOX_WITH_HARDENING)'
    745 endif # real linux
     706        %$(QUIET2)$(APPEND) -t '$(PATH_TARGET)/vboxdrv-mod-1.dep' \
     707                'Support/$(KBUILD_TARGET)/Makefile_VBOX_HARDENED=$(VBOX_WITH_HARDENING)'
     708endif # linux freebsd
    746709
    747710
  • trunk/src/VBox/HostDrivers/Support/SUPDrv.c

    r54325 r54327  
    9191*   Defined Constants And Macros                                               *
    9292*******************************************************************************/
    93 /** The frequency by which we recalculate the u32UpdateHz and
    94  * u32UpdateIntervalNS GIP members. The value must be a power of 2.
    95  *
    96  * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
    97  */
    98 #define GIP_UPDATEHZ_RECALC_FREQ            0x800
    99 
    100 /** A reserved TSC value used for synchronization as well as measurement of
    101  *  TSC deltas. */
    102 #define GIP_TSC_DELTA_RSVD                  UINT64_MAX
    103 /** The number of TSC delta measurement loops in total (includes primer and
    104  *  read-time loops). */
    105 #define GIP_TSC_DELTA_LOOPS                 96
    106 /** The number of cache primer loops. */
    107 #define GIP_TSC_DELTA_PRIMER_LOOPS          4
    108 /** The number of loops until we keep computing the minumum read time. */
    109 #define GIP_TSC_DELTA_READ_TIME_LOOPS       24
    110 /** Stop measurement of TSC delta. */
    111 #define GIP_TSC_DELTA_SYNC_STOP             0
    112 /** Start measurement of TSC delta. */
    113 #define GIP_TSC_DELTA_SYNC_START            1
    114 /** Worker thread is ready for reading the TSC. */
    115 #define GIP_TSC_DELTA_SYNC_WORKER_READY     2
    116 /** Worker thread is done updating TSC delta info. */
    117 #define GIP_TSC_DELTA_SYNC_WORKER_DONE      3
    118 /** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
    119  *  with a timeout. */
    120 #define GIP_TSC_DELTA_SYNC_PRESTART_MASTER  4
    121 /** When IPRT is isn't concurrent safe: Worker is ready after waiting for
    122  *  master with a timeout. */
    123 #define GIP_TSC_DELTA_SYNC_PRESTART_WORKER  5
    124 /** The TSC-refinement interval in seconds. */
    125 #define GIP_TSC_REFINE_INTERVAL             5
    126 /** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
    127 #define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO    32
    128 /** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
    129 #define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO        448
    130 /** The TSC delta value for the initial GIP master - 0 in regular builds.
    131  * To test the delta code this can be set to a non-zero value.  */
    132 #if 1
    133 # define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
    134 #else
    135 # define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
    136 #endif
    137 
    138 AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
    139 AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
    140 
    14193/** @def VBOX_SVN_REV
    14294 * The makefile should define this if it can. */
     
    14597#endif
    14698
    147 #if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
    148 # define DO_NOT_START_GIP
    149 #endif
    15099
    151100/*******************************************************************************
     
    171120static int                  supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
    172121static int                  supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
    173 static int                  supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq);
    174 static int                  supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq);
    175 static int                  supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
    176 static void                 supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
    177 static DECLCALLBACK(void)   supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
    178 static DECLCALLBACK(void)   supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
    179 static DECLCALLBACK(void)   supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
    180 static void                 supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
    181                                           unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
    182 static DECLCALLBACK(void)   supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
    183 static void                 supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
    184 static void                 supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
    185 static void                 supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
    186                                                   RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
    187 static void                 supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
    188 static int                  supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
    189 static int                  supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
    190122static int                  supdrvIOCtl_ResumeSuspendedKbds(void);
    191123
     
    194126*   Global Variables                                                           *
    195127*******************************************************************************/
    196 DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
    197 
    198 
    199128/**
    200129 * Array of the R0 SUP API.
     
    40023931
    40033932/**
    4004  * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
    4005  * updating.
    4006  *
    4007  * @param   pGip             Pointer to the GIP.
    4008  * @param   pGipCpu          The per CPU structure for this CPU.
    4009  * @param   u64NanoTS        The current time.
    4010  */
    4011 static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
    4012 {
    4013     /*
    4014      * Here we don't really care about applying the TSC delta. The re-initialization of this
    4015      * value is not relevant especially while (re)starting the GIP as the first few ones will
    4016      * be ignored anyway, see supdrvGipDoUpdateCpu().
    4017      */
    4018     pGipCpu->u64TSC    = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
    4019     pGipCpu->u64NanoTS = u64NanoTS;
    4020 }
    4021 
    4022 
    4023 /**
    4024  * Set the current TSC and NanoTS value for the CPU.
    4025  *
    4026  * @param   idCpu            The CPU ID. Unused - we have to use the APIC ID.
    4027  * @param   pvUser1          Pointer to the ring-0 GIP mapping.
    4028  * @param   pvUser2          Pointer to the variable holding the current time.
    4029  */
    4030 static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
    4031 {
    4032     PSUPGLOBALINFOPAGE  pGip = (PSUPGLOBALINFOPAGE)pvUser1;
    4033     unsigned            iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
    4034 
    4035     if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
    4036         supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
    4037 
    4038     NOREF(pvUser2);
    4039     NOREF(idCpu);
    4040 }
    4041 
    4042 
    4043 /**
    4044  * State structure for supdrvGipDetectGetGipCpuCallback.
    4045  */
    4046 typedef struct SUPDRVGIPDETECTGETCPU
    4047 {
    4048     /** Bitmap of APIC IDs that has been seen (initialized to zero).
    4049      *  Used to detect duplicate APIC IDs (paranoia). */
    4050     uint8_t volatile    bmApicId[256 / 8];
    4051     /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
    4052      *  initially). The callback clears the methods not detected. */
    4053     uint32_t volatile   fSupported;
    4054     /** The first callback detecting any kind of range issues (initialized to
    4055      * NIL_RTCPUID). */
    4056     RTCPUID volatile    idCpuProblem;
    4057 } SUPDRVGIPDETECTGETCPU;
    4058 /** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
    4059 typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
    4060 
    4061 
    4062 /**
    4063  * Checks for alternative ways of getting the CPU ID.
    4064  *
    4065  * This also checks the APIC ID, CPU ID and CPU set index values against the
    4066  * GIP tables.
    4067  *
    4068  * @param   idCpu            The CPU ID. Unused - we have to use the APIC ID.
    4069  * @param   pvUser1          Pointer to the state structure.
    4070  * @param   pvUser2          Pointer to the GIP.
    4071  */
    4072 static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
    4073 {
    4074     PSUPDRVGIPDETECTGETCPU  pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
    4075     PSUPGLOBALINFOPAGE      pGip   = (PSUPGLOBALINFOPAGE)pvUser2;
    4076     uint32_t                fSupported = 0;
    4077     uint16_t                idApic;
    4078     int                     iCpuSet;
    4079 
    4080     AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
    4081 
    4082     /*
    4083      * Check that the CPU ID and CPU set index are interchangable.
    4084      */
    4085     iCpuSet = RTMpCpuIdToSetIndex(idCpu);
    4086     if ((RTCPUID)iCpuSet == idCpu)
    4087     {
    4088         AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
    4089         if (   iCpuSet >= 0
    4090             && iCpuSet < RTCPUSET_MAX_CPUS
    4091             && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
    4092         {
    4093             /*
    4094              * Check whether the IDTR.LIMIT contains a CPU number.
    4095              */
    4096 #ifdef RT_ARCH_X86
    4097             uint16_t const  cbIdt = sizeof(X86DESC64SYSTEM) * 256;
    4098 #else
    4099             uint16_t const  cbIdt = sizeof(X86DESCGATE)     * 256;
    4100 #endif
    4101             RTIDTR          Idtr;
    4102             ASMGetIDTR(&Idtr);
    4103             if (Idtr.cbIdt >= cbIdt)
    4104             {
    4105                 uint32_t uTmp = Idtr.cbIdt - cbIdt;
    4106                 uTmp &= RTCPUSET_MAX_CPUS - 1;
    4107                 if (uTmp == idCpu)
    4108                 {
    4109                     RTIDTR Idtr2;
    4110                     ASMGetIDTR(&Idtr2);
    4111                     if (Idtr2.cbIdt == Idtr.cbIdt)
    4112                         fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
    4113                 }
    4114             }
    4115 
    4116             /*
    4117              * Check whether RDTSCP is an option.
    4118              */
    4119             if (ASMHasCpuId())
    4120             {
    4121                 if (   ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
    4122                     && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
    4123                 {
    4124                     uint32_t uAux;
    4125                     ASMReadTscWithAux(&uAux);
    4126                     if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
    4127                     {
    4128                         ASMNopPause();
    4129                         ASMReadTscWithAux(&uAux);
    4130                         if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
    4131                             fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
    4132                     }
    4133                 }
    4134             }
    4135         }
    4136     }
    4137 
    4138     /*
    4139      * Check that the APIC ID is unique.
    4140      */
    4141     idApic = ASMGetApicId();
    4142     if (RT_LIKELY(   idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
    4143                   && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
    4144         fSupported |= SUPGIPGETCPU_APIC_ID;
    4145     else
    4146     {
    4147         AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
    4148         ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
    4149         LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
    4150                 idCpu, iCpuSet, idApic));
    4151     }
    4152 
    4153     /*
    4154      * Check that the iCpuSet is within the expected range.
    4155      */
    4156     if (RT_UNLIKELY(   iCpuSet < 0
    4157                     || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
    4158                     || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
    4159     {
    4160         ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
    4161         LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
    4162                 idCpu, iCpuSet, idApic));
    4163     }
    4164     else
    4165     {
    4166         RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
    4167         if (RT_UNLIKELY(idCpu2 != idCpu))
    4168         {
    4169             ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
    4170             LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
    4171                     idCpu, iCpuSet, idApic, idCpu2));
    4172         }
    4173     }
    4174 
    4175     /*
    4176      * Update the supported feature mask before we return.
    4177      */
    4178     ASMAtomicAndU32(&pState->fSupported, fSupported);
    4179 
    4180     NOREF(pvUser2);
    4181 }
    4182 
    4183 
    4184 /**
    4185  * Increase the timer freqency on hosts where this is possible (NT).
    4186  *
    4187  * The idea is that more interrupts is better for us... Also, it's better than
    4188  * we increase the timer frequence, because we might end up getting inaccurate
    4189  * callbacks if someone else does it.
    4190  *
    4191  * @param   pDevExt   Sets u32SystemTimerGranularityGrant if increased.
    4192  */
    4193 static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
    4194 {
    4195     if (pDevExt->u32SystemTimerGranularityGrant == 0)
    4196     {
    4197         uint32_t u32SystemResolution;
    4198         if (   RT_SUCCESS_NP(RTTimerRequestSystemGranularity(  976563 /* 1024 HZ */, &u32SystemResolution))
    4199             || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
    4200             || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /*  512 HZ */, &u32SystemResolution))
    4201             || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /*  500 HZ */, &u32SystemResolution))
    4202            )
    4203         {
    4204             Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
    4205             pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
    4206         }
    4207     }
    4208 }
    4209 
    4210 
    4211 /**
    4212  * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
    4213  *
    4214  * @param   pDevExt     Clears u32SystemTimerGranularityGrant.
    4215  */
    4216 static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
    4217 {
    4218     if (pDevExt->u32SystemTimerGranularityGrant)
    4219     {
    4220         int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
    4221         AssertRC(rc2);
    4222         pDevExt->u32SystemTimerGranularityGrant = 0;
    4223     }
    4224 }
    4225 
    4226 
    4227 /**
    4228  * Maps the GIP into userspace and/or get the physical address of the GIP.
    4229  *
    4230  * @returns IPRT status code.
    4231  * @param   pSession        Session to which the GIP mapping should belong.
    4232  * @param   ppGipR3         Where to store the address of the ring-3 mapping. (optional)
    4233  * @param   pHCPhysGip      Where to store the physical address. (optional)
    4234  *
    4235  * @remark  There is no reference counting on the mapping, so one call to this function
    4236  *          count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
    4237  *          and remove the session as a GIP user.
    4238  */
    4239 SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
    4240 {
    4241     int             rc;
    4242     PSUPDRVDEVEXT   pDevExt = pSession->pDevExt;
    4243     RTR3PTR         pGipR3  = NIL_RTR3PTR;
    4244     RTHCPHYS        HCPhys  = NIL_RTHCPHYS;
    4245     LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
    4246 
    4247     /*
    4248      * Validate
    4249      */
    4250     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    4251     AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
    4252     AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
    4253 
    4254 #ifdef SUPDRV_USE_MUTEX_FOR_GIP
    4255     RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
    4256 #else
    4257     RTSemFastMutexRequest(pDevExt->mtxGip);
    4258 #endif
    4259     if (pDevExt->pGip)
    4260     {
    4261         /*
    4262          * Map it?
    4263          */
    4264         rc = VINF_SUCCESS;
    4265         if (ppGipR3)
    4266         {
    4267             if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
    4268                 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
    4269                                        RTMEM_PROT_READ, RTR0ProcHandleSelf());
    4270             if (RT_SUCCESS(rc))
    4271                 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
    4272         }
    4273 
    4274         /*
    4275          * Get physical address.
    4276          */
    4277         if (pHCPhysGip && RT_SUCCESS(rc))
    4278             HCPhys = pDevExt->HCPhysGip;
    4279 
    4280         /*
    4281          * Reference globally.
    4282          */
    4283         if (!pSession->fGipReferenced && RT_SUCCESS(rc))
    4284         {
    4285             pSession->fGipReferenced = 1;
    4286             pDevExt->cGipUsers++;
    4287             if (pDevExt->cGipUsers == 1)
    4288             {
    4289                 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
    4290                 uint64_t u64NanoTS;
    4291 
    4292                 /*
    4293                  * GIP starts/resumes updating again.  On windows we bump the
    4294                  * host timer frequency to make sure we don't get stuck in guest
    4295                  * mode and to get better timer (and possibly clock) accuracy.
    4296                  */
    4297                 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
    4298 
    4299                 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
    4300 
    4301                 /*
    4302                  * document me
    4303                  */
    4304                 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
    4305                 {
    4306                     unsigned i;
    4307                     for (i = 0; i < pGipR0->cCpus; i++)
    4308                         ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
    4309                                             (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
    4310                                             & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
    4311                     ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
    4312                 }
    4313 
    4314                 /*
    4315                  * document me
    4316                  */
    4317                 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
    4318                 if (   pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
    4319                     || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
    4320                     || RTMpGetOnlineCount() == 1)
    4321                     supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
    4322                 else
    4323                     RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
    4324 
    4325                 /*
    4326                  * Detect alternative ways to figure the CPU ID in ring-3 and
    4327                  * raw-mode context.  Check the sanity of the APIC IDs, CPU IDs,
    4328                  * and CPU set indexes while we're at it.
    4329                  */
    4330                 if (RT_SUCCESS(rc))
    4331                 {
    4332                     SUPDRVGIPDETECTGETCPU DetectState;
    4333                     RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
    4334                     DetectState.fSupported   = UINT32_MAX;
    4335                     DetectState.idCpuProblem = NIL_RTCPUID;
    4336                     rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
    4337                     if (DetectState.idCpuProblem == NIL_RTCPUID)
    4338                     {
    4339                         if (   DetectState.fSupported != UINT32_MAX
    4340                             && DetectState.fSupported != 0)
    4341                         {
    4342                             if (pGipR0->fGetGipCpu != DetectState.fSupported)
    4343                             {
    4344                                 pGipR0->fGetGipCpu = DetectState.fSupported;
    4345                                 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
    4346                             }
    4347                         }
    4348                         else
    4349                         {
    4350                             LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
    4351                                     DetectState.fSupported));
    4352                             rc = VERR_UNSUPPORTED_CPU;
    4353                         }
    4354                     }
    4355                     else
    4356                     {
    4357                         LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
    4358                                 DetectState.idCpuProblem, DetectState.idCpuProblem));
    4359                         rc = VERR_INVALID_CPU_ID;
    4360                     }
    4361                 }
    4362 
    4363                 /*
    4364                  * Start the GIP timer if all is well..
    4365                  */
    4366                 if (RT_SUCCESS(rc))
    4367                 {
    4368 #ifndef DO_NOT_START_GIP
    4369                     rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
    4370 #endif
    4371                     rc = VINF_SUCCESS;
    4372                 }
    4373 
    4374                 /*
    4375                  * Bail out on error.
    4376                  */
    4377                 if (RT_FAILURE(rc))
    4378                 {
    4379                     LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
    4380                     pDevExt->cGipUsers = 0;
    4381                     pSession->fGipReferenced = 0;
    4382                     if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
    4383                     {
    4384                         int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
    4385                         if (RT_SUCCESS(rc2))
    4386                             pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
    4387                     }
    4388                     HCPhys = NIL_RTHCPHYS;
    4389                     pGipR3 = NIL_RTR3PTR;
    4390                 }
    4391             }
    4392         }
    4393     }
    4394     else
    4395     {
    4396         rc = VERR_GENERAL_FAILURE;
    4397         Log(("SUPR0GipMap: GIP is not available!\n"));
    4398     }
    4399 #ifdef SUPDRV_USE_MUTEX_FOR_GIP
    4400     RTSemMutexRelease(pDevExt->mtxGip);
    4401 #else
    4402     RTSemFastMutexRelease(pDevExt->mtxGip);
    4403 #endif
    4404 
    4405     /*
    4406      * Write returns.
    4407      */
    4408     if (pHCPhysGip)
    4409         *pHCPhysGip = HCPhys;
    4410     if (ppGipR3)
    4411         *ppGipR3 = pGipR3;
    4412 
    4413 #ifdef DEBUG_DARWIN_GIP
    4414     OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
    4415 #else
    4416     LogFlow((   "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
    4417 #endif
    4418     return rc;
    4419 }
    4420 
    4421 
    4422 /**
    4423  * Unmaps any user mapping of the GIP and terminates all GIP access
    4424  * from this session.
    4425  *
    4426  * @returns IPRT status code.
    4427  * @param   pSession        Session to which the GIP mapping should belong.
    4428  */
    4429 SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
    4430 {
    4431     int                     rc = VINF_SUCCESS;
    4432     PSUPDRVDEVEXT           pDevExt = pSession->pDevExt;
    4433 #ifdef DEBUG_DARWIN_GIP
    4434     OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
    4435                 pSession,
    4436                 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
    4437                 pSession->GipMapObjR3));
    4438 #else
    4439     LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
    4440 #endif
    4441     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    4442 
    4443 #ifdef SUPDRV_USE_MUTEX_FOR_GIP
    4444     RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
    4445 #else
    4446     RTSemFastMutexRequest(pDevExt->mtxGip);
    4447 #endif
    4448 
    4449     /*
    4450      * Unmap anything?
    4451      */
    4452     if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
    4453     {
    4454         rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
    4455         AssertRC(rc);
    4456         if (RT_SUCCESS(rc))
    4457             pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
    4458     }
    4459 
    4460     /*
    4461      * Dereference global GIP.
    4462      */
    4463     if (pSession->fGipReferenced && !rc)
    4464     {
    4465         pSession->fGipReferenced = 0;
    4466         if (    pDevExt->cGipUsers > 0
    4467             &&  !--pDevExt->cGipUsers)
    4468         {
    4469             LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
    4470 #ifndef DO_NOT_START_GIP
    4471             rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
    4472 #endif
    4473             supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
    4474         }
    4475     }
    4476 
    4477 #ifdef SUPDRV_USE_MUTEX_FOR_GIP
    4478     RTSemMutexRelease(pDevExt->mtxGip);
    4479 #else
    4480     RTSemFastMutexRelease(pDevExt->mtxGip);
    4481 #endif
    4482 
    4483     return rc;
    4484 }
    4485 
    4486 
    4487 /**
    4488  * Gets the GIP pointer.
    4489  *
    4490  * @returns Pointer to the GIP or NULL.
    4491  */
    4492 SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
    4493 {
    4494     return g_pSUPGlobalInfoPage;
    4495 }
    4496 
    4497 
    4498 /**
    44993933 * Register a component factory with the support driver.
    45003934 *
     
    59775411}
    59785412
    5979 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    5980 
    5981 /**
    5982  * Switches the TSC-delta measurement thread into the butchered state.
    5983  *
    5984  * @returns VBox status code.
    5985  * @param pDevExt           Pointer to the device instance data.
    5986  * @param fSpinlockHeld     Whether the TSC-delta spinlock is held or not.
    5987  * @param pszFailed         An error message to log.
    5988  * @param rcFailed          The error code to exit the thread with.
    5989  */
    5990 static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
    5991 {
    5992     if (!fSpinlockHeld)
    5993         RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    5994 
    5995     pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
    5996     RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    5997     OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
    5998     return rcFailed;
    5999 }
    6000 
    6001 
    6002 /**
    6003  * The TSC-delta measurement thread.
    6004  *
    6005  * @returns VBox status code.
    6006  * @param hThread   The thread handle.
    6007  * @param pvUser    Opaque pointer to the device instance data.
    6008  */
    6009 static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
    6010 {
    6011     PSUPDRVDEVEXT     pDevExt = (PSUPDRVDEVEXT)pvUser;
    6012     bool              fInitialMeasurement = true;
    6013     uint32_t          cConsecutiveTimeouts = 0;
    6014     int               rc = VERR_INTERNAL_ERROR_2;
    6015     for (;;)
    6016     {
    6017         /*
    6018          * Switch on the current state.
    6019          */
    6020         SUPDRVTSCDELTATHREADSTATE enmState;
    6021         RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    6022         enmState = pDevExt->enmTscDeltaThreadState;
    6023         switch (enmState)
    6024         {
    6025             case kTscDeltaThreadState_Creating:
    6026             {
    6027                 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
    6028                 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
    6029                 if (RT_FAILURE(rc))
    6030                     return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
    6031                 /* fall thru */
    6032             }
    6033 
    6034             case kTscDeltaThreadState_Listening:
    6035             {
    6036                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6037 
    6038                 /* Simple adaptive timeout. */
    6039                 if (cConsecutiveTimeouts++ == 10)
    6040                 {
    6041                     if (pDevExt->cMsTscDeltaTimeout == 1)           /* 10 ms */
    6042                         pDevExt->cMsTscDeltaTimeout = 10;
    6043                     else if (pDevExt->cMsTscDeltaTimeout == 10)     /* +100 ms */
    6044                         pDevExt->cMsTscDeltaTimeout = 100;
    6045                     else if (pDevExt->cMsTscDeltaTimeout == 100)    /* +1000 ms */
    6046                         pDevExt->cMsTscDeltaTimeout = 500;
    6047                     cConsecutiveTimeouts = 0;
    6048                 }
    6049                 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
    6050                 if (   RT_FAILURE(rc)
    6051                     && rc != VERR_TIMEOUT)
    6052                     return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
    6053                 RTThreadUserReset(pDevExt->hTscDeltaThread);
    6054                 break;
    6055             }
    6056 
    6057             case kTscDeltaThreadState_WaitAndMeasure:
    6058             {
    6059                 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
    6060                 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
    6061                 if (RT_FAILURE(rc))
    6062                     return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
    6063                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6064                 pDevExt->cMsTscDeltaTimeout = 1;
    6065                 RTThreadSleep(10);
    6066                 /* fall thru */
    6067             }
    6068 
    6069             case kTscDeltaThreadState_Measuring:
    6070             {
    6071                 cConsecutiveTimeouts = 0;
    6072                 if (fInitialMeasurement)
    6073                 {
    6074                     int cTries = 8;
    6075                     int cMsWaitPerTry = 10;
    6076                     fInitialMeasurement = false;
    6077                     do
    6078                     {
    6079                         rc = supdrvMeasureInitialTscDeltas(pDevExt);
    6080                         if (   RT_SUCCESS(rc)
    6081                             || (   RT_FAILURE(rc)
    6082                                 && rc != VERR_TRY_AGAIN
    6083                                 && rc != VERR_CPU_OFFLINE))
    6084                         {
    6085                             break;
    6086                         }
    6087                         RTThreadSleep(cMsWaitPerTry);
    6088                     } while (cTries-- > 0);
    6089                 }
    6090                 else
    6091                 {
    6092                     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    6093                     unsigned iCpu;
    6094 
    6095                     /* Measure TSC-deltas only for the CPUs that are in the set. */
    6096                     rc = VINF_SUCCESS;
    6097                     for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
    6098                     {
    6099                         PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
    6100                         if (   pGipCpuWorker->i64TSCDelta == INT64_MAX
    6101                             && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
    6102                         {
    6103                             rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
    6104                         }
    6105                     }
    6106                 }
    6107                 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    6108                 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
    6109                     pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
    6110                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6111                 Assert(rc != VERR_NOT_AVAILABLE);   /* VERR_NOT_AVAILABLE is used as the initial value. */
    6112                 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
    6113                 break;
    6114             }
    6115 
    6116             case kTscDeltaThreadState_Terminating:
    6117                 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
    6118                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6119                 return VINF_SUCCESS;
    6120 
    6121             case kTscDeltaThreadState_Butchered:
    6122             default:
    6123                 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
    6124         }
    6125     }
    6126 
    6127     return rc;
    6128 }
    6129 
    6130 
    6131 /**
    6132  * Waits for the TSC-delta measurement thread to respond to a state change.
    6133  *
    6134  * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
    6135  *          other error code on internal error.
    6136  *
    6137  * @param   pThis           Pointer to the grant service instance data.
    6138  * @param   enmCurState     The current state.
    6139  * @param   enmNewState     The new state we're waiting for it to enter.
    6140  */
    6141 static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
    6142                                     SUPDRVTSCDELTATHREADSTATE enmNewState)
    6143 {
    6144     /*
    6145      * Wait a short while for the expected state transition.
    6146      */
    6147     int rc;
    6148     RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
    6149     RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    6150     if (pDevExt->enmTscDeltaThreadState == enmNewState)
    6151     {
    6152         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6153         rc = VINF_SUCCESS;
    6154     }
    6155     else if (pDevExt->enmTscDeltaThreadState == enmCurState)
    6156     {
    6157         /*
    6158          * Wait longer if the state has not yet transitioned to the one we want.
    6159          */
    6160         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6161         rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
    6162         if (   RT_SUCCESS(rc)
    6163             || rc == VERR_TIMEOUT)
    6164         {
    6165             /*
    6166              * Check the state whether we've succeeded.
    6167              */
    6168             SUPDRVTSCDELTATHREADSTATE enmState;
    6169             RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    6170             enmState = pDevExt->enmTscDeltaThreadState;
    6171             RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6172             if (enmState == enmNewState)
    6173                 rc = VINF_SUCCESS;
    6174             else if (enmState == enmCurState)
    6175             {
    6176                 rc = VERR_TIMEOUT;
    6177                 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
    6178                             enmNewState));
    6179             }
    6180             else
    6181             {
    6182                 rc = VERR_INTERNAL_ERROR;
    6183                 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
    6184                             enmState, enmNewState));
    6185             }
    6186         }
    6187         else
    6188             OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
    6189     }
    6190     else
    6191     {
    6192         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6193         OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
    6194         rc = VERR_INTERNAL_ERROR;
    6195     }
    6196 
    6197     return rc;
    6198 }
    6199 
    6200 
    6201 /**
    6202  * Terminates the TSC-delta measurement thread.
    6203  *
    6204  * @param   pDevExt   Pointer to the device instance data.
    6205  */
    6206 static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
    6207 {
    6208     int rc;
    6209     RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    6210     pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
    6211     RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6212     RTThreadUserSignal(pDevExt->hTscDeltaThread);
    6213     rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
    6214     if (RT_FAILURE(rc))
    6215     {
    6216         /* Signal a few more times before giving up. */
    6217         int cTriesLeft = 5;
    6218         while (--cTriesLeft > 0)
    6219         {
    6220             RTThreadUserSignal(pDevExt->hTscDeltaThread);
    6221             rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
    6222             if (rc != VERR_TIMEOUT)
    6223                 break;
    6224         }
    6225     }
    6226 }
    6227 
    6228 
    6229 /**
    6230  * Initializes and spawns the TSC-delta measurement thread.
    6231  *
    6232  * A thread is required for servicing re-measurement requests from events like
    6233  * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
    6234  * under all contexts on all OSs.
    6235  *
    6236  * @returns VBox status code.
    6237  * @param   pDevExt           Pointer to the device instance data.
    6238  *
    6239  * @remarks Must only be called -after- initializing GIP and setting up MP
    6240  *          notifications!
    6241  */
    6242 static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
    6243 {
    6244     int rc;
    6245     Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
    6246     rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
    6247     if (RT_SUCCESS(rc))
    6248     {
    6249         rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
    6250         if (RT_SUCCESS(rc))
    6251         {
    6252             pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
    6253             pDevExt->cMsTscDeltaTimeout = 1;
    6254             rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
    6255                                 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
    6256             if (RT_SUCCESS(rc))
    6257             {
    6258                 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
    6259                 if (RT_SUCCESS(rc))
    6260                 {
    6261                     ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
    6262                     return rc;
    6263                 }
    6264 
    6265                 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
    6266                 supdrvTscDeltaThreadTerminate(pDevExt);
    6267             }
    6268             else
    6269                 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
    6270             RTSemEventDestroy(pDevExt->hTscDeltaEvent);
    6271             pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
    6272         }
    6273         else
    6274             OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
    6275         RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
    6276         pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
    6277     }
    6278     else
    6279         OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
    6280 
    6281     return rc;
    6282 }
    6283 
    6284 
    6285 /**
    6286  * Terminates the TSC-delta measurement thread and cleanup.
    6287  *
    6288  * @param   pDevExt         Pointer to the device instance data.
    6289  */
    6290 static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
    6291 {
    6292     if (   pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
    6293         && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
    6294     {
    6295         supdrvTscDeltaThreadTerminate(pDevExt);
    6296     }
    6297 
    6298     if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
    6299     {
    6300         RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
    6301         pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
    6302     }
    6303 
    6304     if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
    6305     {
    6306         RTSemEventDestroy(pDevExt->hTscDeltaEvent);
    6307         pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
    6308     }
    6309 
    6310     ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
    6311 }
    6312 
    6313 
    6314 /**
    6315  * Waits for TSC-delta measurements to be completed for all online CPUs.
    6316  *
    6317  * @returns VBox status code.
    6318  * @param   pDevExt         Pointer to the device instance data.
    6319  */
    6320 static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
    6321 {
    6322     int cTriesLeft = 5;
    6323     int cMsTotalWait;
    6324     int cMsWaited = 0;
    6325     int cMsWaitGranularity = 1;
    6326 
    6327     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    6328     AssertReturn(pGip, VERR_INVALID_POINTER);
    6329 
    6330     if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD))
    6331         return VERR_THREAD_NOT_WAITABLE;
    6332 
    6333     cMsTotalWait = RT_MIN(pGip->cPresentCpus + 10, 200);
    6334     while (cTriesLeft-- > 0)
    6335     {
    6336         if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
    6337             return VINF_SUCCESS;
    6338         RTThreadSleep(cMsWaitGranularity);
    6339         cMsWaited += cMsWaitGranularity;
    6340         if (cMsWaited >= cMsTotalWait)
    6341             break;
    6342     }
    6343 
    6344     return VERR_TIMEOUT;
    6345 }
    6346 
    6347 #endif /* SUPDRV_USE_TSC_DELTA_THREAD */
    6348 
    6349 /**
    6350  * Applies the TSC delta to the supplied raw TSC value.
    6351  *
    6352  * @returns VBox status code. (Ignored by all users, just FYI.)
    6353  * @param   pGip            Pointer to the GIP.
    6354  * @param   puTsc           Pointer to a valid TSC value before the TSC delta has been applied.
    6355  * @param   idApic          The APIC ID of the CPU @c puTsc corresponds to.
    6356  * @param   fDeltaApplied   Where to store whether the TSC delta was succesfully
    6357  *                          applied or not (optional, can be NULL).
    6358  *
    6359  * @remarks Maybe called with interrupts disabled in ring-0!
    6360  *
    6361  * @note    Don't you dare change the delta calculation.  If you really do, make
    6362  *          sure you update all places where it's used (IPRT, SUPLibAll.cpp,
    6363  *          SUPDrv.c, supdrvGipMpEvent, and more).
    6364  */
    6365 DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied)
    6366 {
    6367     int rc;
    6368 
    6369     /*
    6370      * Validate input.
    6371      */
    6372     AssertPtr(puTsc);
    6373     AssertPtr(pGip);
    6374     Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
    6375 
    6376     /*
    6377      * Carefully convert the idApic into a GIPCPU entry.
    6378      */
    6379     if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
    6380     {
    6381         uint16_t iCpu = pGip->aiCpuFromApicId[idApic];
    6382         if (RT_LIKELY(iCpu < pGip->cCpus))
    6383         {
    6384             PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
    6385 
    6386             /*
    6387              * Apply the delta if valid.
    6388              */
    6389             if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX))
    6390             {
    6391                 *puTsc -= pGipCpu->i64TSCDelta;
    6392                 if (pfDeltaApplied)
    6393                     *pfDeltaApplied = true;
    6394                 return VINF_SUCCESS;
    6395             }
    6396 
    6397             rc = VINF_SUCCESS;
    6398         }
    6399         else
    6400         {
    6401             AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus));
    6402             rc = VERR_INVALID_CPU_INDEX;
    6403         }
    6404     }
    6405     else
    6406     {
    6407         AssertMsgFailed(("idApic=%u\n", idApic));
    6408         rc = VERR_INVALID_CPU_ID;
    6409     }
    6410     if (pfDeltaApplied)
    6411         *pfDeltaApplied = false;
    6412     return rc;
    6413 }
    6414 
    6415 
    6416 /**
    6417  * Measures the TSC frequency of the system.
    6418  *
    6419  * Uses a busy-wait method for the async. case as it is intended to help push
    6420  * the CPU frequency up, while for the invariant cases using a sleeping method.
    6421  *
    6422  * The TSC frequency can vary on systems which are not reported as invariant.
    6423  * On such systems the object of this function is to find out what the nominal,
    6424  * maximum TSC frequency under 'normal' CPU operation.
    6425  *
    6426  * @returns VBox status code.
    6427  * @param   pDevExt        Pointer to the device instance.
    6428  *
    6429  * @remarks Must be called only -after- measuring the TSC deltas.
    6430  */
    6431 static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
    6432 {
    6433     int cTriesLeft = 4;
    6434     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    6435 
    6436     /* Assert order. */
    6437     AssertReturn(pGip, VERR_INVALID_PARAMETER);
    6438     AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
    6439 
    6440     while (cTriesLeft-- > 0)
    6441     {
    6442         RTCCUINTREG uFlags;
    6443         uint64_t    u64NanoTsBefore;
    6444         uint64_t    u64NanoTsAfter;
    6445         uint64_t    u64TscBefore;
    6446         uint64_t    u64TscAfter;
    6447         uint8_t     idApicBefore;
    6448         uint8_t     idApicAfter;
    6449 
    6450         /*
    6451          * Synchronize with the host OS clock tick before reading the TSC.
    6452          * Especially important on older Windows version where the granularity is terrible.
    6453          */
    6454         u64NanoTsBefore = RTTimeSystemNanoTS();
    6455         while (RTTimeSystemNanoTS() == u64NanoTsBefore)
    6456             ASMNopPause();
    6457 
    6458         uFlags          = ASMIntDisableFlags();
    6459         idApicBefore    = ASMGetApicId();
    6460         u64TscBefore    = ASMReadTSC();
    6461         u64NanoTsBefore = RTTimeSystemNanoTS();
    6462         ASMSetFlags(uFlags);
    6463 
    6464         if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
    6465         {
    6466             /*
    6467              * Sleep-wait since the TSC frequency is constant, it eases host load.
    6468              * Shorter interval produces more variance in the frequency (esp. Windows).
    6469              */
    6470             RTThreadSleep(200);
    6471             u64NanoTsAfter = RTTimeSystemNanoTS();
    6472             while (RTTimeSystemNanoTS() == u64NanoTsAfter)
    6473                 ASMNopPause();
    6474             u64NanoTsAfter = RTTimeSystemNanoTS();
    6475         }
    6476         else
    6477         {
    6478             /* Busy-wait keeping the frequency up and measure. */
    6479             for (;;)
    6480             {
    6481                 u64NanoTsAfter = RTTimeSystemNanoTS();
    6482                 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
    6483                     ASMNopPause();
    6484                 else
    6485                     break;
    6486             }
    6487         }
    6488 
    6489         uFlags      = ASMIntDisableFlags();
    6490         idApicAfter = ASMGetApicId();
    6491         u64TscAfter = ASMReadTSC();
    6492         ASMSetFlags(uFlags);
    6493 
    6494         if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
    6495         {
    6496             int rc;
    6497             bool fAppliedBefore;
    6498             bool fAppliedAfter;
    6499             rc = supdrvTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore);   AssertRCReturn(rc, rc);
    6500             rc = supdrvTscDeltaApply(pGip, &u64TscAfter,  idApicAfter,  &fAppliedAfter);    AssertRCReturn(rc, rc);
    6501 
    6502             if (   !fAppliedBefore
    6503                 || !fAppliedAfter)
    6504             {
    6505 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    6506                 /*
    6507                  * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
    6508                  * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
    6509                  * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
    6510                  * proceed. This should be triggered just once if we're rather unlucky.
    6511                  */
    6512                 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
    6513                 if (rc == VERR_TIMEOUT)
    6514                 {
    6515                     SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
    6516                     return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
    6517                 }
    6518 #else
    6519                 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
    6520                             idApicBefore, idApicAfter, cTriesLeft);
    6521 #endif
    6522                 continue;
    6523             }
    6524         }
    6525 
    6526         /*
    6527          * Update GIP.
    6528          */
    6529         pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
    6530         if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
    6531             pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
    6532         return VINF_SUCCESS;
    6533     }
    6534 
    6535     return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
    6536 }
    6537 
    6538 
    6539 /**
    6540  * Timer callback function for TSC frequency refinement in invariant GIP mode.
    6541  *
    6542  * @param   pTimer      The timer.
    6543  * @param   pvUser      Opaque pointer to the device instance data.
    6544  * @param   iTick       The timer tick.
    6545  */
    6546 static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
    6547 {
    6548     PSUPDRVDEVEXT      pDevExt = (PSUPDRVDEVEXT)pvUser;
    6549     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    6550     bool               fDeltaApplied = false;
    6551     uint8_t            idApic;
    6552     uint64_t           u64DeltaNanoTS;
    6553     uint64_t           u64DeltaTsc;
    6554     uint64_t           u64NanoTS;
    6555     uint64_t           u64Tsc;
    6556     RTCCUINTREG        uFlags;
    6557 
    6558     /* Paranoia. */
    6559     Assert(pGip);
    6560     Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
    6561 
    6562 #if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
    6563     u64NanoTS = RTTimeSystemNanoTS();
    6564     while (RTTimeSystemNanoTS() == u64NanoTS)
    6565         ASMNopPause();
    6566 #endif
    6567     uFlags    = ASMIntDisableFlags();
    6568     idApic    = ASMGetApicId();
    6569     u64Tsc    = ASMReadTSC();
    6570     u64NanoTS = RTTimeSystemNanoTS();
    6571     ASMSetFlags(uFlags);
    6572     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
    6573         supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
    6574     u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
    6575     u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
    6576 
    6577     if (RT_UNLIKELY(   pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO
    6578                     && !fDeltaApplied))
    6579     {
    6580         Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
    6581                     GIP_TSC_REFINE_INTERVAL));
    6582         return;
    6583     }
    6584 
    6585     /* Calculate the TSC frequency. */
    6586     if (   u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
    6587         && u64DeltaNanoTS < UINT32_MAX)
    6588         pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
    6589     else
    6590     {
    6591         RTUINT128U CpuHz, Tmp, Divisor;
    6592         CpuHz.s.Lo = CpuHz.s.Hi = 0;
    6593         RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
    6594         RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
    6595         pGip->u64CpuHz = CpuHz.s.Lo;
    6596     }
    6597 
    6598     /* Update rest of GIP. */
    6599     Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
    6600     pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
    6601 }
    6602 
    6603 
    6604 /**
    6605  * Starts the TSC-frequency refinement phase asynchronously.
    6606  *
    6607  * @param   pDevExt        Pointer to the device instance data.
    6608  */
    6609 static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
    6610 {
    6611     uint64_t            u64NanoTS;
    6612     RTCCUINTREG         uFlags;
    6613     uint8_t             idApic;
    6614     int                 rc;
    6615     PSUPGLOBALINFOPAGE  pGip;
    6616 
    6617     /* Validate. */
    6618     Assert(pDevExt);
    6619     Assert(pDevExt->pGip);
    6620     pGip = pDevExt->pGip;
    6621 
    6622 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    6623     /*
    6624      * If the TSC-delta thread is created, wait until it's done calculating
    6625      * the TSC-deltas on the relevant online CPUs before we start the TSC refinement.
    6626      */
    6627     if (   pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
    6628         && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE)
    6629     {
    6630         rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
    6631         if (rc == VERR_TIMEOUT)
    6632         {
    6633             SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
    6634             return;
    6635         }
    6636     }
    6637 #endif
    6638 
    6639     /*
    6640      * Record the TSC and NanoTS as the starting anchor point for refinement of the
    6641      * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the
    6642      * reading of the TSC and the NanoTS as close as possible.
    6643      */
    6644     u64NanoTS = RTTimeSystemNanoTS();
    6645     while (RTTimeSystemNanoTS() == u64NanoTS)
    6646         ASMNopPause();
    6647     uFlags                   = ASMIntDisableFlags();
    6648     idApic                   = ASMGetApicId();
    6649     pDevExt->u64TscAnchor    = ASMReadTSC();
    6650     pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
    6651     ASMSetFlags(uFlags);
    6652     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
    6653         supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */);
    6654 
    6655     rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
    6656     if (RT_SUCCESS(rc))
    6657     {
    6658         /*
    6659          * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
    6660          * interval as small as possible while gaining the most consistent and accurate frequency
    6661          * (compared to what the host OS might have measured).
    6662          *
    6663          * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
    6664          * same TSC frequency whenever possible so we need to keep the interval short.
    6665          */
    6666         rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
    6667         AssertRC(rc);
    6668     }
    6669     else
    6670         OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
    6671 }
    6672 
    6673 
    6674 /**
    6675  * Creates the GIP.
    6676  *
    6677  * @returns VBox status code.
    6678  * @param   pDevExt     Instance data. GIP stuff may be updated.
    6679  */
    6680 static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
    6681 {
    6682     PSUPGLOBALINFOPAGE  pGip;
    6683     RTHCPHYS            HCPhysGip;
    6684     uint32_t            u32SystemResolution;
    6685     uint32_t            u32Interval;
    6686     uint32_t            u32MinInterval;
    6687     uint32_t            uMod;
    6688     unsigned            cCpus;
    6689     int                 rc;
    6690 
    6691     LogFlow(("supdrvGipCreate:\n"));
    6692 
    6693     /* Assert order. */
    6694     Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
    6695     Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
    6696     Assert(!pDevExt->pGipTimer);
    6697 
    6698     /*
    6699      * Check the CPU count.
    6700      */
    6701     cCpus = RTMpGetArraySize();
    6702     if (   cCpus > RTCPUSET_MAX_CPUS
    6703         || cCpus > 256 /* ApicId is used for the mappings */)
    6704     {
    6705         SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
    6706         return VERR_TOO_MANY_CPUS;
    6707     }
    6708 
    6709     /*
    6710      * Allocate a contiguous set of pages with a default kernel mapping.
    6711      */
    6712     rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
    6713     if (RT_FAILURE(rc))
    6714     {
    6715         OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
    6716         return rc;
    6717     }
    6718     pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
    6719     HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
    6720 
    6721     /*
    6722      * Allocate the TSC-delta sync struct on a separate cache line.
    6723      */
    6724     pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
    6725     pDevExt->pTscDeltaSync  = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
    6726     Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
    6727 
    6728     /*
    6729      * Find a reasonable update interval and initialize the structure.
    6730      */
    6731     supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
    6732     /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
    6733      *        See @bugref{6710}. */
    6734     u32MinInterval      = RT_NS_10MS;
    6735     u32SystemResolution = RTTimerGetSystemGranularity();
    6736     u32Interval         = u32MinInterval;
    6737     uMod                = u32MinInterval % u32SystemResolution;
    6738     if (uMod)
    6739         u32Interval += u32SystemResolution - uMod;
    6740 
    6741     supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
    6742 
    6743     if (RT_UNLIKELY(   pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
    6744                     && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
    6745                     && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
    6746     {
    6747         /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
    6748         OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
    6749         return VERR_INTERNAL_ERROR_2;
    6750     }
    6751 
    6752     RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
    6753     RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
    6754 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    6755     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
    6756     {
    6757         /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
    6758         rc = supdrvTscDeltaThreadInit(pDevExt);
    6759     }
    6760 #endif
    6761     if (RT_SUCCESS(rc))
    6762     {
    6763         rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
    6764         if (RT_SUCCESS(rc))
    6765         {
    6766             rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
    6767             if (RT_SUCCESS(rc))
    6768             {
    6769 #ifndef SUPDRV_USE_TSC_DELTA_THREAD
    6770                 uint16_t iCpu;
    6771                 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
    6772                 {
    6773                     /*
    6774                      * Measure the TSC deltas now that we have MP notifications.
    6775                      */
    6776                     int cTries = 5;
    6777                     do
    6778                     {
    6779                         rc = supdrvMeasureInitialTscDeltas(pDevExt);
    6780                         if (   rc != VERR_TRY_AGAIN
    6781                             && rc != VERR_CPU_OFFLINE)
    6782                             break;
    6783                     } while (--cTries > 0);
    6784                     for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
    6785                         Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
    6786                 }
    6787                 else
    6788                 {
    6789                     for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
    6790                         AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
    6791                 }
    6792 #endif
    6793                 if (RT_SUCCESS(rc))
    6794                 {
    6795                     rc = supdrvGipMeasureTscFreq(pDevExt);
    6796                     if (RT_SUCCESS(rc))
    6797                     {
    6798                         /*
    6799                          * Create the timer.
    6800                          * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
    6801                          */
    6802                         if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
    6803                         {
    6804                             rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
    6805                                                  pDevExt);
    6806                             if (rc == VERR_NOT_SUPPORTED)
    6807                             {
    6808                                 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
    6809                                 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
    6810                             }
    6811                         }
    6812                         if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
    6813                             rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
    6814                                                  supdrvGipSyncAndInvariantTimer, pDevExt);
    6815                         if (RT_SUCCESS(rc))
    6816                         {
    6817                             /*
    6818                              * We're good.
    6819                              */
    6820                             Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
    6821                             supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
    6822 
    6823                             g_pSUPGlobalInfoPage = pGip;
    6824                             if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
    6825                                 supdrvRefineTscFreq(pDevExt);
    6826                             return VINF_SUCCESS;
    6827                         }
    6828 
    6829                         OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
    6830                         Assert(!pDevExt->pGipTimer);
    6831                     }
    6832                     else
    6833                         OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
    6834                 }
    6835                 else
    6836                     OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
    6837             }
    6838             else
    6839                 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
    6840         }
    6841         else
    6842             OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
    6843     }
    6844     else
    6845         OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
    6846 
    6847     supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
    6848     return rc;
    6849 }
    6850 
    6851 
    6852 /**
    6853  * Terminates the GIP.
    6854  *
    6855  * @param   pDevExt     Instance data. GIP stuff may be updated.
    6856  */
    6857 static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
    6858 {
    6859     int rc;
    6860 #ifdef DEBUG_DARWIN_GIP
    6861     OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
    6862                 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
    6863                 pDevExt->pGipTimer, pDevExt->GipMemObj));
    6864 #endif
    6865 
    6866     /*
    6867      * Stop receiving MP notifications before tearing anything else down.
    6868      */
    6869     RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
    6870 
    6871 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    6872     /*
    6873      * Terminate the TSC-delta measurement thread and resources.
    6874      */
    6875     supdrvTscDeltaTerm(pDevExt);
    6876 #endif
    6877 
    6878     /*
    6879      * Destroy the TSC-refinement one-shot timer.
    6880      */
    6881     if (pDevExt->pTscRefineTimer)
    6882     {
    6883         RTTimerDestroy(pDevExt->pTscRefineTimer);
    6884         pDevExt->pTscRefineTimer = NULL;
    6885     }
    6886 
    6887     if (pDevExt->pvTscDeltaSync)
    6888     {
    6889         RTMemFree(pDevExt->pvTscDeltaSync);
    6890         pDevExt->pTscDeltaSync  = NULL;
    6891         pDevExt->pvTscDeltaSync = NULL;
    6892     }
    6893 
    6894     /*
    6895      * Invalid the GIP data.
    6896      */
    6897     if (pDevExt->pGip)
    6898     {
    6899         supdrvGipTerm(pDevExt->pGip);
    6900         pDevExt->pGip = NULL;
    6901     }
    6902     g_pSUPGlobalInfoPage = NULL;
    6903 
    6904     /*
    6905      * Destroy the timer and free the GIP memory object.
    6906      */
    6907     if (pDevExt->pGipTimer)
    6908     {
    6909         rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
    6910         pDevExt->pGipTimer = NULL;
    6911     }
    6912 
    6913     if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
    6914     {
    6915         rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
    6916         pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
    6917     }
    6918 
    6919     /*
    6920      * Finally, make sure we've release the system timer resolution request
    6921      * if one actually succeeded and is still pending.
    6922      */
    6923     supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
    6924 }
    6925 
    6926 
    6927 /**
    6928  * Timer callback function for the sync and invariant GIP modes.
    6929  *
    6930  * @param   pTimer      The timer.
    6931  * @param   pvUser      Opaque pointer to the device extension.
    6932  * @param   iTick       The timer tick.
    6933  */
    6934 static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
    6935 {
    6936     RTCCUINTREG        uFlags;
    6937     uint64_t           u64TSC;
    6938     uint64_t           u64NanoTS;
    6939     PSUPDRVDEVEXT      pDevExt = (PSUPDRVDEVEXT)pvUser;
    6940     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    6941 
    6942     uFlags    = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
    6943     u64TSC    = ASMReadTSC();
    6944     u64NanoTS = RTTimeSystemNanoTS();
    6945 
    6946     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
    6947     {
    6948         /*
    6949          * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
    6950          * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
    6951          * affected a bit until we get proper TSC deltas than implementing options like
    6952          * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
    6953          *
    6954          * The likely hood of this happening is really low. On Windows, Linux, and Solaris
    6955          * timers fire on the CPU they were registered/started on.  Darwin timers doesn't
    6956          * necessarily (they are high priority threads waiting).
    6957          */
    6958         Assert(!ASMIntAreEnabled());
    6959         supdrvTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
    6960     }
    6961 
    6962     supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
    6963 
    6964     ASMSetFlags(uFlags);
    6965 
    6966 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    6967     if (   pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
    6968         && !RTCpuSetIsEmpty(&pDevExt->TscDeltaCpuSet))
    6969     {
    6970         RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    6971         if (   pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
    6972             || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
    6973             pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
    6974         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    6975         /** @todo Do the actual poking using -- RTThreadUserSignal() */
    6976     }
    6977 #endif
    6978 }
    6979 
    6980 
    6981 /**
    6982  * Timer callback function for async GIP mode.
    6983  * @param   pTimer      The timer.
    6984  * @param   pvUser      Opaque pointer to the device extension.
    6985  * @param   iTick       The timer tick.
    6986  */
    6987 static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
    6988 {
    6989     RTCCUINTREG     fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
    6990     PSUPDRVDEVEXT   pDevExt   = (PSUPDRVDEVEXT)pvUser;
    6991     RTCPUID         idCpu     = RTMpCpuId();
    6992     uint64_t        u64TSC    = ASMReadTSC();
    6993     uint64_t        NanoTS    = RTTimeSystemNanoTS();
    6994 
    6995     /** @todo reset the transaction number and whatnot when iTick == 1. */
    6996     if (pDevExt->idGipMaster == idCpu)
    6997         supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
    6998     else
    6999         supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
    7000 
    7001     ASMSetFlags(fOldFlags);
    7002 }
    7003 
    7004 
    7005 /**
    7006  * Finds our (@a idCpu) entry, or allocates a new one if not found.
    7007  *
    7008  * @returns Index of the CPU in the cache set.
    7009  * @param   pGip                The GIP.
    7010  * @param   idCpu               The CPU ID.
    7011  */
    7012 static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
    7013 {
    7014     uint32_t i, cTries;
    7015 
    7016     /*
    7017      * ASSUMES that CPU IDs are constant.
    7018      */
    7019     for (i = 0; i < pGip->cCpus; i++)
    7020         if (pGip->aCPUs[i].idCpu == idCpu)
    7021             return i;
    7022 
    7023     cTries = 0;
    7024     do
    7025     {
    7026         for (i = 0; i < pGip->cCpus; i++)
    7027         {
    7028             bool fRc;
    7029             ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
    7030             if (fRc)
    7031                 return i;
    7032         }
    7033     } while (cTries++ < 32);
    7034     AssertReleaseFailed();
    7035     return i - 1;
    7036 }
    7037 
    7038 
    7039 /**
    7040  * Finds the GIP CPU index corresponding to @a idCpu.
    7041  *
    7042  * @returns GIP CPU array index, UINT32_MAX if not found.
    7043  * @param   pGip                The GIP.
    7044  * @param   idCpu               The CPU ID.
    7045  */
    7046 static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
    7047 {
    7048     uint32_t i;
    7049     for (i = 0; i < pGip->cCpus; i++)
    7050         if (pGip->aCPUs[i].idCpu == idCpu)
    7051             return i;
    7052     return UINT32_MAX;
    7053 }
    7054 
    7055 
    7056 /**
    7057  * The calling CPU should be accounted as online, update GIP accordingly.
    7058  *
    7059  * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
    7060  *
    7061  * @param   pDevExt             The device extension.
    7062  * @param   idCpu               The CPU ID.
    7063  */
    7064 static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
    7065 {
    7066     int         iCpuSet = 0;
    7067     uint16_t    idApic = UINT16_MAX;
    7068     uint32_t    i = 0;
    7069     uint64_t    u64NanoTS = 0;
    7070     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    7071 
    7072     AssertPtrReturnVoid(pGip);
    7073     AssertRelease(idCpu == RTMpCpuId());
    7074     Assert(pGip->cPossibleCpus == RTMpGetCount());
    7075 
    7076     /*
    7077      * Do this behind a spinlock with interrupts disabled as this can fire
    7078      * on all CPUs simultaneously, see @bugref{6110}.
    7079      */
    7080     RTSpinlockAcquire(pDevExt->hGipSpinlock);
    7081 
    7082     /*
    7083      * Update the globals.
    7084      */
    7085     ASMAtomicWriteU16(&pGip->cPresentCpus,  RTMpGetPresentCount());
    7086     ASMAtomicWriteU16(&pGip->cOnlineCpus,   RTMpGetOnlineCount());
    7087     iCpuSet = RTMpCpuIdToSetIndex(idCpu);
    7088     if (iCpuSet >= 0)
    7089     {
    7090         Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
    7091         RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
    7092         RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
    7093     }
    7094 
    7095     /*
    7096      * Update the entry.
    7097      */
    7098     u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
    7099     i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
    7100     supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
    7101     idApic = ASMGetApicId();
    7102     ASMAtomicWriteU16(&pGip->aCPUs[i].idApic,  idApic);
    7103     ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
    7104     ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu,  idCpu);
    7105 
    7106     /*
    7107      * Update the APIC ID and CPU set index mappings.
    7108      */
    7109     ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic],     i);
    7110     ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
    7111 
    7112     /* Update the Mp online/offline counter. */
    7113     ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
    7114 
    7115     /* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */
    7116     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
    7117     {
    7118         RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
    7119 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    7120         RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    7121         if (   pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
    7122             || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
    7123         {
    7124             pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
    7125         }
    7126         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    7127 #endif
    7128     }
    7129 
    7130     /* commit it */
    7131     ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
    7132 
    7133     RTSpinlockRelease(pDevExt->hGipSpinlock);
    7134 }
    7135 
    7136 
    7137 /**
    7138  * The CPU should be accounted as offline, update the GIP accordingly.
    7139  *
    7140  * This is used by supdrvGipMpEvent.
    7141  *
    7142  * @param   pDevExt             The device extension.
    7143  * @param   idCpu               The CPU ID.
    7144  */
    7145 static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
    7146 {
    7147     int         iCpuSet;
    7148     unsigned    i;
    7149 
    7150     PSUPGLOBALINFOPAGE pGip   = pDevExt->pGip;
    7151 
    7152     AssertPtrReturnVoid(pGip);
    7153     RTSpinlockAcquire(pDevExt->hGipSpinlock);
    7154 
    7155     iCpuSet = RTMpCpuIdToSetIndex(idCpu);
    7156     AssertReturnVoid(iCpuSet >= 0);
    7157 
    7158     i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
    7159     AssertReturnVoid(i < pGip->cCpus);
    7160     AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
    7161 
    7162     Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
    7163     RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
    7164 
    7165     /* Update the Mp online/offline counter. */
    7166     ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
    7167 
    7168     /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
    7169     if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
    7170     {
    7171         ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
    7172         ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
    7173     }
    7174 
    7175     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
    7176     {
    7177         /* Reset the TSC delta, we will recalculate it lazily. */
    7178         ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
    7179         /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
    7180         RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
    7181     }
    7182 
    7183     /* commit it */
    7184     ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
    7185 
    7186     RTSpinlockRelease(pDevExt->hGipSpinlock);
    7187 }
    7188 
    7189 
    7190 /**
    7191  * Multiprocessor event notification callback.
    7192  *
    7193  * This is used to make sure that the GIP master gets passed on to
    7194  * another CPU.  It also updates the associated CPU data.
    7195  *
    7196  * @param   enmEvent    The event.
    7197  * @param   idCpu       The cpu it applies to.
    7198  * @param   pvUser      Pointer to the device extension.
    7199  *
    7200  * @remarks This function -must- fire on the newly online'd CPU for the
    7201  *          RTMPEVENT_ONLINE case and can fire on any CPU for the
    7202  *          RTMPEVENT_OFFLINE case.
    7203  */
    7204 static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
    7205 {
    7206     PSUPDRVDEVEXT       pDevExt = (PSUPDRVDEVEXT)pvUser;
    7207     PSUPGLOBALINFOPAGE  pGip    = pDevExt->pGip;
    7208 
    7209     AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
    7210 
    7211     /*
    7212      * Update the GIP CPU data.
    7213      */
    7214     if (pGip)
    7215     {
    7216         switch (enmEvent)
    7217         {
    7218             case RTMPEVENT_ONLINE:
    7219                 AssertRelease(idCpu == RTMpCpuId());
    7220                 supdrvGipMpEventOnline(pDevExt, idCpu);
    7221                 break;
    7222             case RTMPEVENT_OFFLINE:
    7223                 supdrvGipMpEventOffline(pDevExt, idCpu);
    7224                 break;
    7225         }
    7226     }
    7227 
    7228     /*
    7229      * Make sure there is a master GIP.
    7230      */
    7231     if (enmEvent == RTMPEVENT_OFFLINE)
    7232     {
    7233         RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
    7234         if (idGipMaster == idCpu)
    7235         {
    7236             /*
    7237              * The GIP master is going offline, find a new one.
    7238              */
    7239             bool        fIgnored;
    7240             unsigned    i;
    7241             RTCPUID     idNewGipMaster = NIL_RTCPUID;
    7242             RTCPUSET    OnlineCpus;
    7243             RTMpGetOnlineSet(&OnlineCpus);
    7244 
    7245             for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
    7246                 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
    7247                 {
    7248                     RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
    7249                     if (idCurCpu != idGipMaster)
    7250                     {
    7251                         idNewGipMaster = idCurCpu;
    7252                         break;
    7253                     }
    7254                 }
    7255 
    7256             Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
    7257             ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
    7258             NOREF(fIgnored);
    7259         }
    7260     }
    7261 }
    7262 
    7263 
    7264 /*
    7265  * Select TSC delta measurement algorithm.
    7266  */
    7267 #if 1
    7268 # define GIP_TSC_DELTA_METHOD_1
    7269 #else
    7270 # define GIP_TSC_DELTA_METHOD_2
    7271 #endif
    7272 
    7273 
    7274 #ifdef GIP_TSC_DELTA_METHOD_2
    7275 
    7276 /**
    7277  * TSC delta measurment algorithm \#2 result entry.
    7278  */
    7279 typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
    7280 {
    7281     uint32_t    iSeqMine;
    7282     uint32_t    iSeqOther;
    7283     uint64_t    uTsc;
    7284 } SUPDRVTSCDELTAMETHOD2ENTRY;
    7285 
    7286 /**
    7287  * TSC delta measurment algorithm \#2 Data.
    7288  */
    7289 typedef struct SUPDRVTSCDELTAMETHOD2
    7290 {
    7291     /** Padding to make sure the iCurSeqNo is in its own cache line.
    7292      * ASSUMES cacheline sizes <= 128 bytes. */
    7293     uint32_t                    au32CacheLinePaddingBefore[128 / sizeof(uint32_t)];
    7294     /** The current sequence number of this worker. */
    7295     uint32_t volatile           iCurSeqNo;
    7296     /** Padding to make sure the iCurSeqNo is in its own cache line.
    7297      * ASSUMES cacheline sizes <= 128 bytes. */
    7298     uint32_t                    au32CacheLinePaddingAfter[128 / sizeof(uint32_t) - 1];
    7299     /** Result table. */
    7300     SUPDRVTSCDELTAMETHOD2ENTRY  aResults[96];
    7301 } SUPDRVTSCDELTAMETHOD2;
    7302 /** Pointer to the data for TSC delta mesurment algorithm \#2 .*/
    7303 typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
    7304 
    7305 #endif /* GIP_TSC_DELTA_METHOD_2 */
    7306 
    7307 /**
    7308  * Argument package/state passed by supdrvMeasureTscDeltaOne to the RTMpOn
    7309  * callback worker.
    7310  */
    7311 typedef struct SUPDRVGIPTSCDELTARGS
    7312 {
    7313     PSUPDRVDEVEXT           pDevExt;
    7314     PSUPGIPCPU              pWorker;
    7315     PSUPGIPCPU              pMaster;
    7316     RTCPUID                 idMaster;
    7317 #ifdef GIP_TSC_DELTA_METHOD_2
    7318     PSUPDRVTSCDELTAMETHOD2  pMasterData;
    7319     PSUPDRVTSCDELTAMETHOD2  pWorkerData;
    7320     uint32_t                cHits;
    7321     /*uint32_t                cOffByOne;*/
    7322     uint32_t                iAttempt;       /**< 1-base outer loop counter. */
    7323     bool                    fLagMaster;
    7324     bool                    fLagWorker;
    7325 #endif
    7326 } SUPDRVGIPTSCDELTARGS;
    7327 typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
    7328 
    7329 
    7330 #ifdef GIP_TSC_DELTA_METHOD_2
    7331 /*
    7332  * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
    7333  */
    7334 # undef  GIP_TSC_DELTA_LOOPS
    7335 # undef  GIP_TSC_DELTA_READ_TIME_LOOPS
    7336 # undef  GIP_TSC_DELTA_PRIMER_LOOPS
    7337 # define GIP_TSC_DELTA_LOOPS             17
    7338 # define GIP_TSC_DELTA_PRIMER_LOOPS      1
    7339 # define GIP_TSC_DELTA_READ_TIME_LOOPS   GIP_TSC_DELTA_PRIMER_LOOPS /* no read-time-loops necessary */
    7340 
    7341 
    7342 static int supdrvTscDeltaMethod2Init(PSUPDRVGIPTSCDELTARGS pArgs)
    7343 {
    7344     uint32_t const fFlags = /*RTMEMALLOCEX_FLAGS_ANY_CTX |*/ RTMEMALLOCEX_FLAGS_ZEROED;
    7345     int rc = RTMemAllocEx(sizeof(*pArgs->pMasterData), 0, fFlags, (void **)&pArgs->pWorkerData);
    7346     if (RT_SUCCESS(rc))
    7347         rc = RTMemAllocEx(sizeof(*pArgs->pMasterData), 0, fFlags, (void **)&pArgs->pMasterData);
    7348     return rc;
    7349 }
    7350 
    7351 
    7352 static void supdrvTscDeltaMethod2Term(PSUPDRVGIPTSCDELTARGS pArgs)
    7353 {
    7354     RTMemFreeEx(pArgs->pMasterData, sizeof(*pArgs->pMasterData));
    7355     RTMemFreeEx(pArgs->pWorkerData, sizeof(*pArgs->pWorkerData));
    7356     /*SUPR0Printf("cHits=%d cOffByOne=%d m=%d w=%d\n", pArgs->cHits, pArgs->cOffByOne, pArgs->pMaster->idApic, pArgs->pWorker->idApic);*/
    7357 }
    7358 
    7359 
    7360 static void supdrvTscDeltaMethod2Looped(PSUPDRVGIPTSCDELTARGS pArgs, RTCPUID idCpu, unsigned iLoop)
    7361 {
    7362     if (pArgs->idMaster == idCpu)
    7363     {
    7364         if (iLoop < GIP_TSC_DELTA_PRIMER_LOOPS)
    7365         {
    7366             if (iLoop == 0)
    7367                 pArgs->iAttempt++;
    7368 
    7369             /* Lag during the priming to be nice to everyone.. */
    7370             pArgs->fLagMaster = true;
    7371             pArgs->fLagWorker = true;
    7372         }
    7373         else if (iLoop < (GIP_TSC_DELTA_LOOPS - GIP_TSC_DELTA_PRIMER_LOOPS) / 4)
    7374         {
    7375             /* 25 % of the body without lagging. */
    7376             pArgs->fLagMaster = false;
    7377             pArgs->fLagWorker = false;
    7378         }
    7379         else if (iLoop < (GIP_TSC_DELTA_LOOPS - GIP_TSC_DELTA_PRIMER_LOOPS) / 4 * 2)
    7380         {
    7381             /* 25 % of the body with both lagging. */
    7382             pArgs->fLagMaster = true;
    7383             pArgs->fLagWorker = true;
    7384         }
    7385         else
    7386         {
    7387             /* 50% of the body with alternating lag. */
    7388             pArgs->fLagMaster = (iLoop & 1) == 0;
    7389             pArgs->fLagWorker = (iLoop & 1) == 1;
    7390         }
    7391     }
    7392 }
    7393 
    7394 
    7395 /**
    7396  * The core function of the 2nd TSC delta mesurment algorithm.
    7397  * 
    7398  * The idea here is that we have the two CPUs execute the exact same code
    7399  * collecting a largish set of TSC samples.  The code has one data dependency on
    7400  * the other CPU which intention it is to synchronize the execution as well as
    7401  * help cross references the two sets of TSC samples (the sequence numbers). 
    7402  *
    7403  * The @a fLag parameter is used to modify the execution a tiny bit on one or
    7404  * both of the CPUs.  When @a fLag differs between the CPUs, it is thought that
    7405  * it will help with making the CPUs enter lock step execution occationally.
    7406  *
    7407  */
    7408 static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
    7409 {
    7410     SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
    7411     uint32_t                    cLeft  = RT_ELEMENTS(pMyData->aResults);
    7412 
    7413     ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
    7414     ASMSerializeInstruction();
    7415     while (cLeft-- > 0)
    7416     {
    7417         uint64_t uTsc;
    7418         uint32_t iSeqMine  = ASMAtomicIncU32(&pMyData->iCurSeqNo);
    7419         uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
    7420         ASMCompilerBarrier();
    7421         ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
    7422         uTsc = ASMReadTSC();
    7423         ASMAtomicIncU32(&pMyData->iCurSeqNo);
    7424         ASMCompilerBarrier();
    7425         ASMSerializeInstruction();
    7426         pEntry->iSeqMine  = iSeqMine;
    7427         pEntry->iSeqOther = iSeqOther;
    7428         pEntry->uTsc      = uTsc;
    7429         pEntry++;
    7430         ASMSerializeInstruction();
    7431         if (fLag)
    7432             ASMNopPause();
    7433     }
    7434 }
    7435 
    7436 
    7437 static void supdrvTscDeltaMethod2ProcessDataSet(PSUPDRVGIPTSCDELTARGS pArgs, PSUPDRVTSCDELTAMETHOD2 pMyData,
    7438                                                 bool fIsMaster, uint32_t cResults,
    7439                                                 PSUPDRVTSCDELTAMETHOD2 pOtherData, int64_t iMasterTscDelta,
    7440                                                 int64_t volatile *piWorkerTscDelta)
    7441 {
    7442     uint32_t cHits      = 0;
    7443 #if 0
    7444     uint32_t cOffByOne  = 0;
    7445 #endif
    7446     uint32_t idxResult  = 0;
    7447     int64_t  iBestDelta = *piWorkerTscDelta;
    7448 
    7449     if (cResults > RT_ELEMENTS(pMyData->aResults))
    7450         cResults = RT_ELEMENTS(pMyData->aResults);
    7451 
    7452     for (idxResult = 0; idxResult < cResults; idxResult++)
    7453     {
    7454         uint32_t idxOther = pMyData->aResults[idxResult].iSeqOther;
    7455         if (idxOther & 1)
    7456         {
    7457             idxOther >>= 1;
    7458             if (idxOther < RT_ELEMENTS(pOtherData->aResults))
    7459             {
    7460                 if (pOtherData->aResults[idxOther].iSeqOther == pMyData->aResults[idxResult].iSeqMine)
    7461                 {
    7462                     int64_t iDelta;
    7463                     if (fIsMaster)
    7464                         iDelta = pOtherData->aResults[idxOther].uTsc
    7465                                - (pMyData->aResults[idxResult].uTsc - iMasterTscDelta);
    7466                     else
    7467                         iDelta = (pOtherData->aResults[idxResult].uTsc - iMasterTscDelta)
    7468                                - pMyData->aResults[idxOther].uTsc;
    7469                     if (  iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
    7470                         ? iDelta < iBestDelta
    7471                         : iDelta > iBestDelta || iBestDelta == INT64_MAX)
    7472                         iBestDelta = iDelta;
    7473                     cHits++;
    7474                 }
    7475             }
    7476         }
    7477 #if 0  /* Can be used to detect battles between threads on the same core. Decided to change the master instead.  */
    7478         else
    7479         {
    7480             idxOther >>= 1;
    7481             if (   idxOther < RT_ELEMENTS(pOtherData->aResults)
    7482                 && pOtherData->aResults[idxOther].iSeqOther == pMyData->aResults[idxResult].iSeqMine)
    7483                 cOffByOne++;
    7484         }
    7485 #endif
    7486     }
    7487 
    7488     if (cHits > 0)
    7489         *piWorkerTscDelta = iBestDelta;
    7490     pArgs->cHits     += cHits;
    7491 #if 0
    7492     pArgs->cOffByOne += cOffByOne;
    7493 #endif
    7494 }
    7495 
    7496 
    7497 static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, bool fFinalLoop)
    7498 {
    7499     supdrvTscDeltaMethod2ProcessDataSet(pArgs,
    7500                                         pArgs->pMasterData,
    7501                                         true /*fIsMaster*/,
    7502                                         RT_ELEMENTS(pArgs->pMasterData->aResults),
    7503                                         pArgs->pWorkerData,
    7504                                         pArgs->pMaster->i64TSCDelta,
    7505                                         &pArgs->pWorker->i64TSCDelta);
    7506 
    7507     supdrvTscDeltaMethod2ProcessDataSet(pArgs,
    7508                                         pArgs->pWorkerData,
    7509                                         false /*fIsMaster*/,
    7510                                         ASMAtomicReadU32(&pArgs->pWorkerData->iCurSeqNo) >> 1,
    7511                                         pArgs->pMasterData,
    7512                                         pArgs->pMaster->i64TSCDelta,
    7513                                         &pArgs->pWorker->i64TSCDelta);
    7514 }
    7515 
    7516 #endif /* GIP_TSC_DELTA_METHOD_2 */
    7517 
    7518 
    7519 /**
    7520  * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
    7521  * and compute the delta between them.
    7522  *
    7523  * @param   idCpu       The CPU we are current scheduled on.
    7524  * @param   pvUser1     Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
    7525  * @param   pvUser2     Unused.
    7526  *
    7527  * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
    7528  *          read the TSC at exactly the same time on both the master and the
    7529  *          worker CPUs. Due to DMA, bus arbitration, cache locality,
    7530  *          contention, SMI, pipelining etc. there is no guaranteed way of
    7531  *          doing this on x86 CPUs.
    7532  *
    7533  *          GIP_TSC_DELTA_METHOD_1:
    7534  *          We ignore the first few runs of the loop in order to prime the
    7535  *          cache. Also, we need to be careful about using 'pause' instruction
    7536  *          in critical busy-wait loops in this code - it can cause undesired
    7537  *          behaviour with hyperthreading.
    7538  *
    7539  *          We try to minimize the measurement error by computing the minimum
    7540  *          read time of the compare statement in the worker by taking TSC
    7541  *          measurements across it.
    7542  *
    7543  *          It must be noted that the computed minimum read time is mostly to
    7544  *          eliminate huge deltas when the worker is too early and doesn't by
    7545  *          itself help produce more accurate deltas. We allow two times the
    7546  *          computed minimum as an arbibtrary acceptable threshold. Therefore,
    7547  *          it is still possible to get negative deltas where there are none
    7548  *          when the worker is earlier. As long as these occasional negative
    7549  *          deltas are lower than the time it takes to exit guest-context and
    7550  *          the OS to reschedule EMT on a different CPU we won't expose a TSC
    7551  *          that jumped backwards. It is because of the existence of the
    7552  *          negative deltas we don't recompute the delta with the master and
    7553  *          worker interchanged to eliminate the remaining measurement error.
    7554  *
    7555  *          @todo document working of GIP_TSC_DELTA_METHOD_2.
    7556  */
    7557 static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
    7558 {
    7559     PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)pvUser1;
    7560     PSUPDRVDEVEXT      pDevExt          = pArgs->pDevExt;
    7561     PSUPGLOBALINFOPAGE pGip             = pDevExt->pGip;
    7562     PSUPGIPCPU         pGipCpuWorker    = pArgs->pWorker;
    7563     PSUPGIPCPU         pGipCpuMaster    = pArgs->pMaster;
    7564     RTCPUID            idMaster         = pArgs->idMaster;
    7565     int                cTriesLeft;
    7566 
    7567     /* A bit of paranoia first. */
    7568     if (!pGipCpuMaster || !pGipCpuWorker)
    7569         return;
    7570 
    7571     /* If the CPU isn't part of the measurement, return immediately. */
    7572     if (   idCpu != idMaster
    7573         && idCpu != pGipCpuWorker->idCpu)
    7574         return;
    7575 
    7576     /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
    7577        with a timeout to avoid deadlocking the entire system. */
    7578     if (!RTMpOnAllIsConcurrentSafe())
    7579     {
    7580         /** @todo This was introduced for Windows, but since Windows doesn't use this
    7581          *        code path any longer (as DPC timeouts BSOD regardless of interrupts,
    7582          *        see @bugref{6710} comment 81), eventually phase it out. */
    7583         uint64_t       uTscNow;
    7584         uint64_t       uTscStart;
    7585         uint64_t const cWaitTicks = 130000;  /* Arbitrary value, can be tweaked later. */
    7586 
    7587         ASMSerializeInstruction();
    7588         uTscStart = ASMReadTSC();
    7589         if (idCpu == idMaster)
    7590         {
    7591             ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
    7592             while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
    7593             {
    7594                 ASMSerializeInstruction();
    7595                 uTscNow = ASMReadTSC();
    7596                 if (uTscNow - uTscStart > cWaitTicks)
    7597                 {
    7598                     /* Set the worker delta to indicate failure, not the master. */
    7599                     ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
    7600                     return;
    7601                 }
    7602 
    7603                 ASMNopPause();
    7604             }
    7605         }
    7606         else
    7607         {
    7608             while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
    7609             {
    7610                 ASMSerializeInstruction();
    7611                 uTscNow = ASMReadTSC();
    7612                 if (uTscNow - uTscStart > cWaitTicks)
    7613                 {
    7614                     ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
    7615                     return;
    7616                 }
    7617 
    7618                 ASMNopPause();
    7619             }
    7620             ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
    7621         }
    7622     }
    7623 
    7624     /*
    7625      * ...
    7626      */
    7627     Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
    7628     cTriesLeft = 12;
    7629     while (cTriesLeft-- > 0)
    7630     {
    7631         unsigned i;
    7632         uint64_t uMinCmpReadTime = UINT64_MAX;
    7633         for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
    7634         {
    7635 #ifdef GIP_TSC_DELTA_METHOD_2
    7636             supdrvTscDeltaMethod2Looped(pArgs, idCpu, i);
    7637 #endif
    7638             if (idCpu == idMaster)
    7639             {
    7640                 /*
    7641                  * The master.
    7642                  */
    7643                 RTCCUINTREG uFlags;
    7644                 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
    7645                           ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
    7646                            pGipCpuMaster->u64TSCSample, idMaster, pGipCpuWorker->idCpu, pDevExt->idGipMaster));
    7647                 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
    7648 
    7649                 /* Disable interrupts only in the master for as short a period
    7650                    as possible, thanks again to Windows. See @bugref{6710} comment #73. */
    7651                 uFlags = ASMIntDisableFlags();
    7652 
    7653                 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
    7654                 { /* nothing */ }
    7655 
    7656 #ifdef GIP_TSC_DELTA_METHOD_1
    7657                 do
    7658                 {
    7659                     ASMSerializeInstruction();
    7660                     ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
    7661                 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
    7662 
    7663 #elif defined(GIP_TSC_DELTA_METHOD_2)
    7664                 supdrvTscDeltaMethod2CollectData(pArgs->pMasterData, &pArgs->pWorkerData->iCurSeqNo, pArgs->fLagMaster);
    7665 #else
    7666 # error "tsc delta method not selected"
    7667 #endif
    7668 
    7669                 /* Sync up with worker. */
    7670                 ASMSetFlags(uFlags);
    7671 
    7672                 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
    7673                 { /* nothing */ }
    7674 
    7675                 /* Process the data. */
    7676                 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
    7677                 {
    7678 #ifdef GIP_TSC_DELTA_METHOD_1
    7679                     if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
    7680                     {
    7681                         int64_t iDelta = pGipCpuWorker->u64TSCSample
    7682                                        - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
    7683                         if (  iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
    7684                             ? iDelta < pGipCpuWorker->i64TSCDelta
    7685                             : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
    7686                             pGipCpuWorker->i64TSCDelta = iDelta;
    7687                     }
    7688 #elif defined(GIP_TSC_DELTA_METHOD_2)
    7689                     if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
    7690                         supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, i == GIP_TSC_DELTA_LOOPS - 1);
    7691 #else
    7692 # error "tsc delta method not selected"
    7693 #endif
    7694                 }
    7695 
    7696                 /* Reset our TSC sample and tell the worker to move on. */
    7697                 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
    7698                 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
    7699             }
    7700             else
    7701             {
    7702                 /*
    7703                  * The worker.
    7704                  */
    7705                 uint64_t uTscWorker;
    7706                 uint64_t uTscWorkerFlushed;
    7707                 uint64_t uCmpReadTime;
    7708 
    7709                 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample);     /* Warm the cache line. */
    7710                 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
    7711                 { /* nothing */ }
    7712                 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
    7713                 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
    7714 
    7715 #ifdef GIP_TSC_DELTA_METHOD_1
    7716                 /*
    7717                  * Keep reading the TSC until we notice that the master has read his. Reading
    7718                  * the TSC -after- the master has updated the memory is way too late. We thus
    7719                  * compensate by trying to measure how long it took for the worker to notice
    7720                  * the memory flushed from the master.
    7721                  */
    7722                 do
    7723                 {
    7724                     ASMSerializeInstruction();
    7725                     uTscWorker = ASMReadTSC();
    7726                 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
    7727                 ASMSerializeInstruction();
    7728                 uTscWorkerFlushed = ASMReadTSC();
    7729 
    7730                 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
    7731                 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
    7732                 {
    7733                     /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
    7734                     if (uCmpReadTime < (uMinCmpReadTime << 1))
    7735                     {
    7736                         ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
    7737                         if (uCmpReadTime < uMinCmpReadTime)
    7738                             uMinCmpReadTime = uCmpReadTime;
    7739                     }
    7740                     else
    7741                         ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
    7742                 }
    7743                 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
    7744                 {
    7745                     if (uCmpReadTime < uMinCmpReadTime)
    7746                         uMinCmpReadTime = uCmpReadTime;
    7747                 }
    7748 
    7749 #elif defined(GIP_TSC_DELTA_METHOD_2)
    7750                 supdrvTscDeltaMethod2CollectData(pArgs->pWorkerData, &pArgs->pMasterData->iCurSeqNo, pArgs->fLagWorker);
    7751 #else
    7752 # error "tsc delta method not selected"
    7753 #endif
    7754 
    7755                 /* Tell master we're done collecting our data. */
    7756                 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
    7757 
    7758                 /* Wait for the master to process the data. */
    7759                 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
    7760                     ASMNopPause();
    7761             }
    7762         }
    7763 
    7764         /*
    7765          * We must reset the worker TSC sample value in case it gets picked as a
    7766          * GIP master later on (it's trashed above, naturally).
    7767          */
    7768         if (idCpu == idMaster)
    7769             ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
    7770 
    7771         /*
    7772          * Success? If so, stop trying.
    7773          */
    7774         if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
    7775         {
    7776             if (idCpu == idMaster)
    7777             {
    7778                 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuMaster->iCpuSet);
    7779                 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuMaster->iCpuSet);
    7780             }
    7781             else
    7782             {
    7783                 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
    7784                 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
    7785             }
    7786             break;
    7787         }
    7788     }
    7789 }
    7790 
    7791 
    7792 /**
    7793  * Clears TSC delta related variables.
    7794  *
    7795  * Clears all TSC samples as well as the delta synchronization variable on the
    7796  * all the per-CPU structs.  Optionally also clears the per-cpu deltas too.
    7797  *
    7798  * @param   pDevExt         Pointer to the device instance data.
    7799  * @param   fClearDeltas    Whether the deltas are also to be cleared.
    7800  */
    7801 DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
    7802 {
    7803     unsigned iCpu;
    7804     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    7805     for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
    7806     {
    7807         PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
    7808         ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
    7809         if (fClearDeltas)
    7810             ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
    7811     }
    7812     ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
    7813 }
    7814 
    7815 
    7816 /**
    7817  * Measures the TSC delta between the master GIP CPU and one specified worker
    7818  * CPU.
    7819  *
    7820  * @returns VBox status code.
    7821  * @param   pDevExt         Pointer to the device instance data.
    7822  * @param   idxWorker       The index of the worker CPU from the GIP's array of
    7823  *                          CPUs.
    7824  *
    7825  * @remarks This must be called with preemption enabled!
    7826  */
    7827 static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
    7828 {
    7829     int                 rc;
    7830     PSUPGLOBALINFOPAGE  pGip          = pDevExt->pGip;
    7831     RTCPUID             idMaster      = pDevExt->idGipMaster;
    7832     PSUPGIPCPU          pGipCpuWorker = &pGip->aCPUs[idxWorker];
    7833     PSUPGIPCPU          pGipCpuMaster;
    7834     uint32_t            iGipCpuMaster;
    7835 
    7836     /* Validate input a bit. */
    7837     AssertReturn(pGip, VERR_INVALID_PARAMETER);
    7838     Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
    7839     Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
    7840 
    7841     /*
    7842      * Don't attempt measuring the delta for the GIP master.
    7843      */
    7844     if (pGipCpuWorker->idCpu == idMaster)
    7845     {
    7846         if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
    7847             ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
    7848         return VINF_SUCCESS;
    7849     }
    7850 
    7851     /*
    7852      * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
    7853      * try pick a different master.  (This fudge only works with multi core systems.)
    7854      * ASSUMES related threads have adjacent APIC IDs.  ASSUMES two threads per core.
    7855      */
    7856     iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
    7857     AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
    7858     pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
    7859     if (   (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
    7860         && ASMHasCpuId()
    7861         && ASMIsValidStdRange(ASMCpuId_EAX(0))
    7862         && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
    7863         && pGip->cOnlineCpus > 2)
    7864     {
    7865         uint32_t i;
    7866         for (i = 0; i < pGip->cCpus; i++)
    7867             if (   i != iGipCpuMaster
    7868                 && i != idxWorker
    7869                 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
    7870                 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
    7871                 && pGip->aCPUs[i].idCpu  != NIL_RTCPUID
    7872                 && pGip->aCPUs[i].idCpu  != idMaster              /* paranoia starts here... */
    7873                 && pGip->aCPUs[i].idCpu  != pGipCpuWorker->idCpu
    7874                 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
    7875                 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic)
    7876             {
    7877                 iGipCpuMaster = i;
    7878                 pGipCpuMaster = &pGip->aCPUs[i];
    7879                 idMaster = pGipCpuMaster->idCpu;
    7880                 break;
    7881             }
    7882     }
    7883 
    7884     /*
    7885      * Set the master TSC as the initiator.  This serializes delta measurments.
    7886      */
    7887     while (!ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID))
    7888     {
    7889         /*
    7890          * Sleep here rather than spin as there is a parallel measurement
    7891          * being executed and that can take a good while to be done.
    7892          */
    7893         RTThreadSleep(1);
    7894     }
    7895 
    7896     if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
    7897     {
    7898         /*
    7899          * Initialize data package for the RTMpOnAll callback.
    7900          */
    7901         SUPDRVGIPTSCDELTARGS Args;
    7902         RT_ZERO(Args);
    7903         Args.pWorker  = pGipCpuWorker;
    7904         Args.pMaster  = pGipCpuMaster;
    7905         Args.idMaster = idMaster;
    7906         Args.pDevExt  = pDevExt;
    7907 #ifdef GIP_TSC_DELTA_METHOD_1
    7908         rc = VINF_SUCCESS;
    7909 #elif defined(GIP_TSC_DELTA_METHOD_2)
    7910         rc = supdrvTscDeltaMethod2Init(&Args);
    7911 #else
    7912 # error "huh?"
    7913 #endif
    7914         if (RT_SUCCESS(rc))
    7915         {
    7916             /*
    7917              * Fire TSC-read workers on all CPUs but only synchronize between master
    7918              * and one worker to ease memory contention.
    7919              */
    7920             ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
    7921             ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
    7922 
    7923             rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, &Args, NULL);
    7924             if (RT_SUCCESS(rc))
    7925             {
    7926                 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
    7927                 {
    7928                     /*
    7929                      * Work the TSC delta applicability rating.  It starts
    7930                      * optimistic in supdrvGipInit, we downgrade it here.
    7931                      */
    7932                     SUPGIPUSETSCDELTA enmRating;
    7933                     if (   pGipCpuWorker->i64TSCDelta >  GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
    7934                         || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
    7935                         enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
    7936                     else if (   pGipCpuWorker->i64TSCDelta >  GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
    7937                              || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
    7938                         enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
    7939                     else
    7940                         enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
    7941                     if (pGip->enmUseTscDelta < enmRating)
    7942                     {
    7943                         AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
    7944                         ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
    7945                     }
    7946                 }
    7947                 else
    7948                     rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
    7949             }
    7950         }
    7951 
    7952 #ifdef GIP_TSC_DELTA_METHOD_2
    7953         supdrvTscDeltaMethod2Term(&Args);
    7954 #endif
    7955     }
    7956     else
    7957         rc = VERR_CPU_OFFLINE;
    7958 
    7959     ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
    7960     return rc;
    7961 }
    7962 
    7963 
    7964 /**
    7965  * Performs the initial measurements of the TSC deltas between CPUs.
    7966  *
    7967  * This is called by supdrvGipCreate or triggered by it if threaded.
    7968  *
    7969  * @returns VBox status code.
    7970  * @param   pDevExt     Pointer to the device instance data.
    7971  *
    7972  * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
    7973  *          idCpu, GIP's online CPU set which are populated in
    7974  *          supdrvGipInitOnCpu().
    7975  */
    7976 static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
    7977 {
    7978     PSUPGIPCPU pGipCpuMaster;
    7979     unsigned   iCpu;
    7980     unsigned   iOddEven;
    7981     PSUPGLOBALINFOPAGE pGip   = pDevExt->pGip;
    7982     uint32_t   idxMaster      = UINT32_MAX;
    7983     int        rc             = VINF_SUCCESS;
    7984     uint32_t   cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
    7985 
    7986     Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
    7987 
    7988     /*
    7989      * Pick the first CPU online as the master TSC and make it the new GIP master based
    7990      * on the APIC ID.
    7991      *
    7992      * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
    7993      * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
    7994      * master as this point since the sync/async timer isn't created yet.
    7995      */
    7996     supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
    7997     for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
    7998     {
    7999         uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
    8000         if (idxCpu != UINT16_MAX)
    8001         {
    8002             PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
    8003             if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
    8004             {
    8005                 idxMaster = idxCpu;
    8006                 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
    8007                 break;
    8008             }
    8009         }
    8010     }
    8011     AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
    8012     pGipCpuMaster = &pGip->aCPUs[idxMaster];
    8013     ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
    8014 
    8015     /*
    8016      * If there is only a single CPU online we have nothing to do.
    8017      */
    8018     if (pGip->cOnlineCpus <= 1)
    8019     {
    8020         AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
    8021         return VINF_SUCCESS;
    8022     }
    8023 
    8024     /*
    8025      * Loop thru the GIP CPU array and get deltas for each CPU (except the
    8026      * master).   We do the CPUs with the even numbered APIC IDs first so that
    8027      * we've got alternative master CPUs to pick from on hyper-threaded systems.
    8028      */
    8029     for (iOddEven = 0; iOddEven < 2; iOddEven++)
    8030     {
    8031         for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
    8032         {
    8033             PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
    8034             if (   iCpu != idxMaster
    8035                 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
    8036                 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
    8037             {
    8038                 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
    8039                 if (RT_FAILURE(rc))
    8040                 {
    8041                     SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
    8042                                 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
    8043                     break;
    8044                 }
    8045 
    8046                 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
    8047                 {
    8048                     SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
    8049                     rc = VERR_TRY_AGAIN;
    8050                     break;
    8051                 }
    8052             }
    8053         }
    8054     }
    8055 
    8056     return rc;
    8057 }
    8058 
    8059 
    8060 /**
    8061  * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
    8062  *
    8063  * @param   idCpu       Ignored.
    8064  * @param   pvUser1     Where to put the TSC.
    8065  * @param   pvUser2     Ignored.
    8066  */
    8067 static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
    8068 {
    8069     ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
    8070 }
    8071 
    8072 
    8073 /**
    8074  * Determine if Async GIP mode is required because of TSC drift.
    8075  *
    8076  * When using the default/normal timer code it is essential that the time stamp counter
    8077  * (TSC) runs never backwards, that is, a read operation to the counter should return
    8078  * a bigger value than any previous read operation. This is guaranteed by the latest
    8079  * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
    8080  * case we have to choose the asynchronous timer mode.
    8081  *
    8082  * @param   poffMin     Pointer to the determined difference between different
    8083  *                      cores (optional, can be NULL).
    8084  * @return  false if the time stamp counters appear to be synchronized, true otherwise.
    8085  */
    8086 static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
    8087 {
    8088     /*
    8089      * Just iterate all the cpus 8 times and make sure that the TSC is
    8090      * ever increasing. We don't bother taking TSC rollover into account.
    8091      */
    8092     int         iEndCpu = RTMpGetArraySize();
    8093     int         iCpu;
    8094     int         cLoops = 8;
    8095     bool        fAsync = false;
    8096     int         rc = VINF_SUCCESS;
    8097     uint64_t    offMax = 0;
    8098     uint64_t    offMin = ~(uint64_t)0;
    8099     uint64_t    PrevTsc = ASMReadTSC();
    8100 
    8101     while (cLoops-- > 0)
    8102     {
    8103         for (iCpu = 0; iCpu < iEndCpu; iCpu++)
    8104         {
    8105             uint64_t CurTsc;
    8106             rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
    8107             if (RT_SUCCESS(rc))
    8108             {
    8109                 if (CurTsc <= PrevTsc)
    8110                 {
    8111                     fAsync = true;
    8112                     offMin = offMax = PrevTsc - CurTsc;
    8113                     Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
    8114                          iCpu, cLoops, CurTsc, PrevTsc));
    8115                     break;
    8116                 }
    8117 
    8118                 /* Gather statistics (except the first time). */
    8119                 if (iCpu != 0 || cLoops != 7)
    8120                 {
    8121                     uint64_t off = CurTsc - PrevTsc;
    8122                     if (off < offMin)
    8123                         offMin = off;
    8124                     if (off > offMax)
    8125                         offMax = off;
    8126                     Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
    8127                 }
    8128 
    8129                 /* Next */
    8130                 PrevTsc = CurTsc;
    8131             }
    8132             else if (rc == VERR_NOT_SUPPORTED)
    8133                 break;
    8134             else
    8135                 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
    8136         }
    8137 
    8138         /* broke out of the loop. */
    8139         if (iCpu < iEndCpu)
    8140             break;
    8141     }
    8142 
    8143     if (poffMin)
    8144         *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
    8145     Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
    8146          fAsync, iEndCpu, rc, offMin, offMax));
    8147 #if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
    8148     OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
    8149 #endif
    8150     return fAsync;
    8151 }
    8152 
    8153 
    8154 /**
    8155  * supdrvGipInit() worker that determines the GIP TSC mode.
    8156  *
    8157  * @returns The most suitable TSC mode.
    8158  * @param   pDevExt     Pointer to the device instance data.
    8159  */
    8160 static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
    8161 {
    8162     uint64_t u64DiffCoresIgnored;
    8163     uint32_t uEAX, uEBX, uECX, uEDX;
    8164 
    8165     /*
    8166      * Establish whether the CPU advertises TSC as invariant, we need that in
    8167      * a couple of places below.
    8168      */
    8169     bool fInvariantTsc = false;
    8170     if (ASMHasCpuId())
    8171     {
    8172         uEAX = ASMCpuId_EAX(0x80000000);
    8173         if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
    8174         {
    8175             uEDX = ASMCpuId_EDX(0x80000007);
    8176             if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
    8177                 fInvariantTsc = true;
    8178         }
    8179     }
    8180 
    8181     /*
    8182      * On single CPU systems, we don't need to consider ASYNC mode.
    8183      */
    8184     if (RTMpGetCount() <= 1)
    8185         return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
    8186 
    8187     /*
    8188      * Allow the user and/or OS specific bits to force async mode.
    8189      */
    8190     if (supdrvOSGetForcedAsyncTscMode(pDevExt))
    8191         return SUPGIPMODE_ASYNC_TSC;
    8192 
    8193 
    8194 #if 0 /** @todo enable this when i64TscDelta is applied in all places where it's needed */
    8195     /*
    8196      * Use invariant mode if the CPU says TSC is invariant.
    8197      */
    8198     if (fInvariantTsc)
    8199         return SUPGIPMODE_INVARIANT_TSC;
    8200 #endif
    8201 
    8202     /*
    8203      * TSC is not invariant and we're on SMP, this presents two problems:
    8204      *
    8205      *      (1) There might be a skew between the CPU, so that cpu0
    8206      *          returns a TSC that is slightly different from cpu1.
    8207      *          This screw may be due to (2), bad TSC initialization
    8208      *          or slightly different TSC rates.
    8209      *
    8210      *      (2) Power management (and other things) may cause the TSC
    8211      *          to run at a non-constant speed, and cause the speed
    8212      *          to be different on the cpus. This will result in (1).
    8213      *
    8214      * If any of the above is detected, we will have to use ASYNC mode.
    8215      */
    8216 
    8217     /* (1). Try check for current differences between the cpus. */
    8218     if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
    8219         return SUPGIPMODE_ASYNC_TSC;
    8220 
    8221 #if 1 /** @todo remove once i64TscDelta is applied everywhere. Enable #if 0 above. */
    8222     if (fInvariantTsc)
    8223         return SUPGIPMODE_INVARIANT_TSC;
    8224 #endif
    8225 
    8226     /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
    8227     ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
    8228     if (   ASMIsValidStdRange(uEAX)
    8229         && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
    8230     {
    8231         /* Check for APM support. */
    8232         uEAX = ASMCpuId_EAX(0x80000000);
    8233         if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
    8234         {
    8235             uEDX = ASMCpuId_EDX(0x80000007);
    8236             if (uEDX & 0x3e)  /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
    8237                 return SUPGIPMODE_ASYNC_TSC;
    8238         }
    8239     }
    8240 
    8241     return SUPGIPMODE_SYNC_TSC;
    8242 }
    8243 
    8244 
    8245 /**
    8246  * Initializes per-CPU GIP information.
    8247  *
    8248  * @param   pDevExt     Pointer to the device instance data.
    8249  * @param   pGip        Pointer to the GIP.
    8250  * @param   pCpu        Pointer to which GIP CPU to initalize.
    8251  * @param   u64NanoTS   The current nanosecond timestamp.
    8252  */
    8253 static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
    8254 {
    8255     /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
    8256        which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
    8257     pCpu->u32TransactionId   = 2;
    8258     pCpu->u64NanoTS          = u64NanoTS;
    8259     pCpu->u64TSC             = ASMReadTSC();
    8260     pCpu->u64TSCSample       = GIP_TSC_DELTA_RSVD;
    8261     pCpu->i64TSCDelta        = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
    8262 
    8263     ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
    8264     ASMAtomicWriteSize(&pCpu->idCpu,    NIL_RTCPUID);
    8265     ASMAtomicWriteS16(&pCpu->iCpuSet,   -1);
    8266     ASMAtomicWriteU16(&pCpu->idApic,    UINT16_MAX);
    8267 
    8268     /*
    8269      * We don't know the following values until we've executed updates.
    8270      * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
    8271      * the 2nd timer callout.
    8272      */
    8273     pCpu->u64CpuHz          = _4G + 1; /* tstGIP-2 depends on this. */
    8274     pCpu->u32UpdateIntervalTSC
    8275         = pCpu->au32TSCHistory[0]
    8276         = pCpu->au32TSCHistory[1]
    8277         = pCpu->au32TSCHistory[2]
    8278         = pCpu->au32TSCHistory[3]
    8279         = pCpu->au32TSCHistory[4]
    8280         = pCpu->au32TSCHistory[5]
    8281         = pCpu->au32TSCHistory[6]
    8282         = pCpu->au32TSCHistory[7]
    8283         = (uint32_t)(_4G / pGip->u32UpdateHz);
    8284 }
    8285 
    8286 
    8287 /**
    8288  * Initializes the GIP data.
    8289  *
    8290  * @param   pDevExt             Pointer to the device instance data.
    8291  * @param   pGip                Pointer to the read-write kernel mapping of the GIP.
    8292  * @param   HCPhys              The physical address of the GIP.
    8293  * @param   u64NanoTS           The current nanosecond timestamp.
    8294  * @param   uUpdateHz           The update frequency.
    8295  * @param   uUpdateIntervalNS   The update interval in nanoseconds.
    8296  * @param   cCpus               The CPU count.
    8297  */
    8298 static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
    8299                           uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
    8300 {
    8301     size_t const    cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
    8302     unsigned        i;
    8303 #ifdef DEBUG_DARWIN_GIP
    8304     OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
    8305 #else
    8306     LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
    8307 #endif
    8308 
    8309     /*
    8310      * Initialize the structure.
    8311      */
    8312     memset(pGip, 0, cbGip);
    8313 
    8314     pGip->u32Magic                = SUPGLOBALINFOPAGE_MAGIC;
    8315     pGip->u32Version              = SUPGLOBALINFOPAGE_VERSION;
    8316     pGip->u32Mode                 = supdrvGipInitDetermineTscMode(pDevExt);
    8317     if (   pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
    8318         /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
    8319         pGip->enmUseTscDelta      = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
    8320                                   ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
    8321     else
    8322         pGip->enmUseTscDelta      = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
    8323     pGip->cCpus                   = (uint16_t)cCpus;
    8324     pGip->cPages                  = (uint16_t)(cbGip / PAGE_SIZE);
    8325     pGip->u32UpdateHz             = uUpdateHz;
    8326     pGip->u32UpdateIntervalNS     = uUpdateIntervalNS;
    8327     pGip->fGetGipCpu              = SUPGIPGETCPU_APIC_ID;
    8328     RTCpuSetEmpty(&pGip->OnlineCpuSet);
    8329     RTCpuSetEmpty(&pGip->PresentCpuSet);
    8330     RTMpGetSet(&pGip->PossibleCpuSet);
    8331     pGip->cOnlineCpus             = RTMpGetOnlineCount();
    8332     pGip->cPresentCpus            = RTMpGetPresentCount();
    8333     pGip->cPossibleCpus           = RTMpGetCount();
    8334     pGip->idCpuMax                = RTMpGetMaxCpuId();
    8335     for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
    8336         pGip->aiCpuFromApicId[i]    = UINT16_MAX;
    8337     for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
    8338         pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
    8339     for (i = 0; i < cCpus; i++)
    8340         supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
    8341 
    8342     /*
    8343      * Link it to the device extension.
    8344      */
    8345     pDevExt->pGip      = pGip;
    8346     pDevExt->HCPhysGip = HCPhys;
    8347     pDevExt->cGipUsers = 0;
    8348 }
    8349 
    8350 
    8351 /**
    8352  * On CPU initialization callback for RTMpOnAll.
    8353  *
    8354  * @param   idCpu               The CPU ID.
    8355  * @param   pvUser1             The device extension.
    8356  * @param   pvUser2             The GIP.
    8357  */
    8358 static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
    8359 {
    8360     /* This is good enough, even though it will update some of the globals a
    8361        bit to much. */
    8362     supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
    8363 }
    8364 
    8365 
    8366 /**
    8367  * Invalidates the GIP data upon termination.
    8368  *
    8369  * @param   pGip        Pointer to the read-write kernel mapping of the GIP.
    8370  */
    8371 static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
    8372 {
    8373     unsigned i;
    8374     pGip->u32Magic = 0;
    8375     for (i = 0; i < pGip->cCpus; i++)
    8376     {
    8377         pGip->aCPUs[i].u64NanoTS = 0;
    8378         pGip->aCPUs[i].u64TSC = 0;
    8379         pGip->aCPUs[i].iTSCHistoryHead = 0;
    8380         pGip->aCPUs[i].u64TSCSample = 0;
    8381         pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
    8382     }
    8383 }
    8384 
    8385 
    8386 /**
    8387  * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
    8388  * updates all the per cpu data except the transaction id.
    8389  *
    8390  * @param   pDevExt         The device extension.
    8391  * @param   pGipCpu         Pointer to the per cpu data.
    8392  * @param   u64NanoTS       The current time stamp.
    8393  * @param   u64TSC          The current TSC.
    8394  * @param   iTick           The current timer tick.
    8395  *
    8396  * @remarks Can be called with interrupts disabled!
    8397  */
    8398 static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
    8399 {
    8400     uint64_t    u64TSCDelta;
    8401     uint32_t    u32UpdateIntervalTSC;
    8402     uint32_t    u32UpdateIntervalTSCSlack;
    8403     unsigned    iTSCHistoryHead;
    8404     uint64_t    u64CpuHz;
    8405     uint32_t    u32TransactionId;
    8406 
    8407     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    8408     AssertPtrReturnVoid(pGip);
    8409 
    8410     /* Delta between this and the previous update. */
    8411     ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
    8412 
    8413     /*
    8414      * Update the NanoTS.
    8415      */
    8416     ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
    8417 
    8418     /*
    8419      * Calc TSC delta.
    8420      */
    8421     u64TSCDelta = u64TSC - pGipCpu->u64TSC;
    8422     ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
    8423 
    8424     /* We don't need to keep realculating the frequency when it's invariant. */
    8425     if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
    8426         return;
    8427 
    8428     if (u64TSCDelta >> 32)
    8429     {
    8430         u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
    8431         pGipCpu->cErrors++;
    8432     }
    8433 
    8434     /*
    8435      * On the 2nd and 3rd callout, reset the history with the current TSC
    8436      * interval since the values entered by supdrvGipInit are totally off.
    8437      * The interval on the 1st callout completely unreliable, the 2nd is a bit
    8438      * better, while the 3rd should be most reliable.
    8439      */
    8440     u32TransactionId = pGipCpu->u32TransactionId;
    8441     if (RT_UNLIKELY(   (   u32TransactionId == 5
    8442                         || u32TransactionId == 7)
    8443                     && (   iTick == 2
    8444                         || iTick == 3) ))
    8445     {
    8446         unsigned i;
    8447         for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
    8448             ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
    8449     }
    8450 
    8451     /*
    8452      * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
    8453      * Wait until we have at least one full history since the above history reset. The
    8454      * assumption is that the majority of the previous history values will be tolerable.
    8455      * See @bugref{6710} comment #67.
    8456      */
    8457     if (   u32TransactionId > 23 /* 7 + (8 * 2) */
    8458         && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
    8459     {
    8460         uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
    8461         if (   pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
    8462             || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
    8463         {
    8464             uint32_t u32;
    8465             u32  = pGipCpu->au32TSCHistory[0];
    8466             u32 += pGipCpu->au32TSCHistory[1];
    8467             u32 += pGipCpu->au32TSCHistory[2];
    8468             u32 += pGipCpu->au32TSCHistory[3];
    8469             u32 >>= 2;
    8470             u64TSCDelta  = pGipCpu->au32TSCHistory[4];
    8471             u64TSCDelta += pGipCpu->au32TSCHistory[5];
    8472             u64TSCDelta += pGipCpu->au32TSCHistory[6];
    8473             u64TSCDelta += pGipCpu->au32TSCHistory[7];
    8474             u64TSCDelta >>= 2;
    8475             u64TSCDelta += u32;
    8476             u64TSCDelta >>= 1;
    8477         }
    8478     }
    8479 
    8480     /*
    8481      * TSC History.
    8482      */
    8483     Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
    8484     iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
    8485     ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
    8486     ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
    8487 
    8488     /*
    8489      * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
    8490      *
    8491      * On Windows, we have an occasional (but recurring) sour value that messed up
    8492      * the history but taking only 1 interval reduces the precision overall.
    8493      * However, this problem existed before the invariant mode was introduced.
    8494      */
    8495     if (   pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
    8496         || pGip->u32UpdateHz >= 1000)
    8497     {
    8498         uint32_t u32;
    8499         u32  = pGipCpu->au32TSCHistory[0];
    8500         u32 += pGipCpu->au32TSCHistory[1];
    8501         u32 += pGipCpu->au32TSCHistory[2];
    8502         u32 += pGipCpu->au32TSCHistory[3];
    8503         u32 >>= 2;
    8504         u32UpdateIntervalTSC  = pGipCpu->au32TSCHistory[4];
    8505         u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
    8506         u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
    8507         u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
    8508         u32UpdateIntervalTSC >>= 2;
    8509         u32UpdateIntervalTSC += u32;
    8510         u32UpdateIntervalTSC >>= 1;
    8511 
    8512         /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
    8513         u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
    8514     }
    8515     else if (pGip->u32UpdateHz >= 90)
    8516     {
    8517         u32UpdateIntervalTSC  = (uint32_t)u64TSCDelta;
    8518         u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
    8519         u32UpdateIntervalTSC >>= 1;
    8520 
    8521         /* value chosen on a 2GHz thinkpad running windows */
    8522         u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
    8523     }
    8524     else
    8525     {
    8526         u32UpdateIntervalTSC  = (uint32_t)u64TSCDelta;
    8527 
    8528         /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
    8529         u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
    8530     }
    8531     ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
    8532 
    8533     /*
    8534      * CpuHz.
    8535      */
    8536     u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
    8537     u64CpuHz /= pGip->u32UpdateIntervalNS;
    8538     ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
    8539 }
    8540 
    8541 
    8542 /**
    8543  * Updates the GIP.
    8544  *
    8545  * @param   pDevExt         The device extension.
    8546  * @param   u64NanoTS       The current nanosecond timesamp.
    8547  * @param   u64TSC          The current TSC timesamp.
    8548  * @param   idCpu           The CPU ID.
    8549  * @param   iTick           The current timer tick.
    8550  *
    8551  * @remarks Can be called with interrupts disabled!
    8552  */
    8553 static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
    8554 {
    8555     /*
    8556      * Determine the relevant CPU data.
    8557      */
    8558     PSUPGIPCPU pGipCpu;
    8559     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    8560     AssertPtrReturnVoid(pGip);
    8561 
    8562     if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
    8563         pGipCpu = &pGip->aCPUs[0];
    8564     else
    8565     {
    8566         unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
    8567         if (RT_UNLIKELY(iCpu >= pGip->cCpus))
    8568             return;
    8569         pGipCpu = &pGip->aCPUs[iCpu];
    8570         if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
    8571             return;
    8572     }
    8573 
    8574     /*
    8575      * Start update transaction.
    8576      */
    8577     if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
    8578     {
    8579         /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
    8580         AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
    8581         ASMAtomicIncU32(&pGipCpu->u32TransactionId);
    8582         pGipCpu->cErrors++;
    8583         return;
    8584     }
    8585 
    8586     /*
    8587      * Recalc the update frequency every 0x800th time.
    8588      */
    8589     if (   pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC   /* cuz we're not recalculating the frequency on invariants hosts. */
    8590         && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
    8591     {
    8592         if (pGip->u64NanoTSLastUpdateHz)
    8593         {
    8594 #ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
    8595             uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
    8596             uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
    8597             if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
    8598             {
    8599                 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
    8600                  *        calculation on non-invariant hosts if it changes the history decision
    8601                  *        taken in supdrvGipDoUpdateCpu(). */
    8602                 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
    8603                 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
    8604                 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
    8605             }
    8606 #endif
    8607         }
    8608         ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
    8609     }
    8610 
    8611     /*
    8612      * Update the data.
    8613      */
    8614     supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
    8615 
    8616     /*
    8617      * Complete transaction.
    8618      */
    8619     ASMAtomicIncU32(&pGipCpu->u32TransactionId);
    8620 }
    8621 
    8622 
    8623 /**
    8624  * Updates the per cpu GIP data for the calling cpu.
    8625  *
    8626  * @param   pDevExt         The device extension.
    8627  * @param   u64NanoTS       The current nanosecond timesamp.
    8628  * @param   u64TSC          The current TSC timesamp.
    8629  * @param   idCpu           The CPU ID.
    8630  * @param   idApic          The APIC id for the CPU index.
    8631  * @param   iTick           The current timer tick.
    8632  *
    8633  * @remarks Can be called with interrupts disabled!
    8634  */
    8635 static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
    8636                                   RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
    8637 {
    8638     uint32_t iCpu;
    8639     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    8640 
    8641     /*
    8642      * Avoid a potential race when a CPU online notification doesn't fire on
    8643      * the onlined CPU but the tick creeps in before the event notification is
    8644      * run.
    8645      */
    8646     if (RT_UNLIKELY(iTick == 1))
    8647     {
    8648         iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
    8649         if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
    8650             supdrvGipMpEventOnline(pDevExt, idCpu);
    8651     }
    8652 
    8653     iCpu = pGip->aiCpuFromApicId[idApic];
    8654     if (RT_LIKELY(iCpu < pGip->cCpus))
    8655     {
    8656         PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
    8657         if (pGipCpu->idCpu == idCpu)
    8658         {
    8659             /*
    8660              * Start update transaction.
    8661              */
    8662             if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
    8663             {
    8664                 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
    8665                 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
    8666                 pGipCpu->cErrors++;
    8667                 return;
    8668             }
    8669 
    8670             /*
    8671              * Update the data.
    8672              */
    8673             supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
    8674 
    8675             /*
    8676              * Complete transaction.
    8677              */
    8678             ASMAtomicIncU32(&pGipCpu->u32TransactionId);
    8679         }
    8680     }
    8681 }
    8682 
    86835413
    86845414/**
     
    86975427}
    86985428
    8699 
    8700 /**
    8701  * Service a TSC-delta measurement request.
    8702  *
    8703  * @returns VBox status code.
    8704  * @param   pDevExt         Pointer to the device instance data.
    8705  * @param   pSession        The support driver session.
    8706  * @param   pReq            Pointer to the TSC-delta measurement request.
    8707  */
    8708 static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
    8709 {
    8710     PSUPGLOBALINFOPAGE pGip;
    8711     RTCPUID            idCpuWorker;
    8712     int                rc;
    8713     int16_t            cTries;
    8714     RTMSINTERVAL       cMsWaitRetry;
    8715     uint16_t           iCpu;
    8716 
    8717     /*
    8718      * Validate.
    8719      */
    8720     AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
    8721     if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
    8722         return VERR_WRONG_ORDER;
    8723     pGip = pDevExt->pGip;
    8724     AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
    8725 
    8726     idCpuWorker = pReq->u.In.idCpu;
    8727     if (idCpuWorker == NIL_RTCPUID)
    8728         return VERR_INVALID_CPU_ID;
    8729     cTries       = RT_MAX(pReq->u.In.cRetries + 1, 10);
    8730     cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
    8731 
    8732     /*
    8733      * The request is a noop if the TSC delta isn't being used.
    8734      */
    8735     pGip = pDevExt->pGip;
    8736     if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
    8737         return VINF_SUCCESS;
    8738 
    8739     rc = VERR_CPU_NOT_FOUND;
    8740     for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
    8741     {
    8742         PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
    8743         if (pGipCpuWorker->idCpu == idCpuWorker)
    8744         {
    8745             if (   pGipCpuWorker->i64TSCDelta != INT64_MAX
    8746                 && !pReq->u.In.fForce)
    8747                 return VINF_SUCCESS;
    8748 
    8749 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    8750             if (pReq->u.In.fAsync)
    8751             {
    8752                 /** @todo Async. doesn't implement options like retries, waiting. We'll need
    8753                  *        to pass those options to the thread somehow and implement it in the
    8754                  *        thread. Check if anyone uses/needs fAsync before implementing this. */
    8755                 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    8756                 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
    8757                 if (   pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
    8758                     || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
    8759                 {
    8760                     pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
    8761                 }
    8762                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    8763                 RTThreadUserSignal(pDevExt->hTscDeltaThread);
    8764                 return VINF_SUCCESS;
    8765             }
    8766 
    8767             /*
    8768              * If a TSC-delta measurement request is already being serviced by the thread,
    8769              * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
    8770              */
    8771             while (cTries-- > 0)
    8772             {
    8773                 SUPDRVTSCDELTATHREADSTATE enmState;
    8774                 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    8775                 enmState = pDevExt->enmTscDeltaThreadState;
    8776                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    8777 
    8778                 if (   enmState == kTscDeltaThreadState_Measuring
    8779                     || enmState == kTscDeltaThreadState_WaitAndMeasure)
    8780                 {
    8781                     if (   !cTries
    8782                         || !cMsWaitRetry)
    8783                         return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
    8784                     if (cMsWaitRetry)
    8785                         RTThreadSleep(cMsWaitRetry);
    8786                 }
    8787             }
    8788             cTries = RT_MAX(pReq->u.In.cRetries + 1, 10);
    8789 #endif
    8790 
    8791             while (cTries-- > 0)
    8792             {
    8793                 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
    8794                 if (RT_SUCCESS(rc))
    8795                 {
    8796                     Assert(pGipCpuWorker->i64TSCDelta != INT64_MAX);
    8797                     break;
    8798                 }
    8799 
    8800                 if (cMsWaitRetry)
    8801                     RTThreadSleep(cMsWaitRetry);
    8802             }
    8803 
    8804             break;
    8805         }
    8806     }
    8807     return rc;
    8808 }
    8809 
    8810 
    8811 /**
    8812  * Reads TSC with delta applied.
    8813  *
    8814  * Will try to resolve delta value INT64_MAX before applying it.  This is the
    8815  * main purpose of this function, to handle the case where the delta needs to be
    8816  * determined.
    8817  *
    8818  * @returns VBox status code.
    8819  * @param   pDevExt         Pointer to the device instance data.
    8820  * @param   pSession        The support driver session.
    8821  * @param   pReq            Pointer to the TSC-read request.
    8822  */
    8823 static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
    8824 {
    8825     PSUPGLOBALINFOPAGE pGip;
    8826     int rc;
    8827 
    8828     /*
    8829      * Validate.  We require the client to have mapped GIP (no asserting on
    8830      * ring-3 preconditions).
    8831      */
    8832     AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
    8833     if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
    8834         return VERR_WRONG_ORDER;
    8835     pGip = pDevExt->pGip;
    8836     AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
    8837 
    8838     /*
    8839      * We're usually here because we need to apply delta, but we shouldn't be
    8840      * upset if the GIP is some different mode.
    8841      */
    8842     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
    8843     {
    8844         uint32_t cTries = 0;
    8845         for (;;)
    8846         {
    8847             /*
    8848              * Start by gathering the data, using CLI for disabling preemption
    8849              * while we do that.
    8850              */
    8851             RTCCUINTREG uFlags  = ASMIntDisableFlags();
    8852             int         iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
    8853             int         iGipCpu;
    8854             if (RT_LIKELY(   (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
    8855                           && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
    8856             {
    8857                 int64_t i64Delta   = pGip->aCPUs[iGipCpu].i64TSCDelta;
    8858                 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
    8859                 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
    8860                 ASMSetFlags(uFlags);
    8861 
    8862                 /*
    8863                  * If we're lucky we've got a delta, but no predicitions here
    8864                  * as this I/O control is normally only used when the TSC delta
    8865                  * is set to INT64_MAX.
    8866                  */
    8867                 if (i64Delta != INT64_MAX)
    8868                 {
    8869                     pReq->u.Out.u64AdjustedTsc -= i64Delta;
    8870                     rc = VINF_SUCCESS;
    8871                     break;
    8872                 }
    8873 
    8874                 /* Give up after a few times. */
    8875                 if (cTries >= 4)
    8876                 {
    8877                     rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
    8878                     break;
    8879                 }
    8880 
    8881                 /* Need to measure the delta an try again. */
    8882                 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
    8883                 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
    8884             }
    8885             else
    8886             {
    8887                 /* This really shouldn't happen. */
    8888                 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
    8889                 pReq->u.Out.idApic = ASMGetApicId();
    8890                 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
    8891                 ASMSetFlags(uFlags);
    8892                 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
    8893                 break;
    8894             }
    8895         }
    8896     }
    8897     else
    8898     {
    8899         /*
    8900          * No delta to apply. Easy. Deal with preemption the lazy way.
    8901          */
    8902         RTCCUINTREG uFlags  = ASMIntDisableFlags();
    8903         int         iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
    8904         int         iGipCpu;
    8905         if (RT_LIKELY(   (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
    8906                       && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
    8907             pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
    8908         else
    8909             pReq->u.Out.idApic = ASMGetApicId();
    8910         pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
    8911         ASMSetFlags(uFlags);
    8912         rc = VINF_SUCCESS;
    8913     }
    8914 
    8915     return rc;
    8916 }
    8917 
  • trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp

    r54323 r54327  
    11/* $Id$ */
    22/** @file
    3  * VBoxDrv - The VirtualBox Support Driver - Common code.
     3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
    44 */
    55
     
    6161#include <VBox/log.h>
    6262#include <VBox/err.h>
    63 #include <VBox/vmm/hm_svm.h>
    64 #include <VBox/vmm/hm_vmx.h>
    6563
    6664#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
    6765# include "dtrace/SUPDrv.h"
    6866#else
    69 # define VBOXDRV_SESSION_CREATE(pvSession, fUser) do { } while (0)
    70 # define VBOXDRV_SESSION_CLOSE(pvSession) do { } while (0)
    71 # define VBOXDRV_IOCTL_ENTRY(pvSession, uIOCtl, pvReqHdr) do { } while (0)
    72 # define VBOXDRV_IOCTL_RETURN(pvSession, uIOCtl, pvReqHdr, rcRet, rcReq) do { } while (0)
    73 #endif
    74 
    75 /*
    76  * Logging assignments:
    77  *      Log     - useful stuff, like failures.
    78  *      LogFlow - program flow, except the really noisy bits.
    79  *      Log2    - Cleanup.
    80  *      Log3    - Loader flow noise.
    81  *      Log4    - Call VMMR0 flow noise.
    82  *      Log5    - Native yet-to-be-defined noise.
    83  *      Log6    - Native ioctl flow noise.
    84  *
    85  * Logging requires BUILD_TYPE=debug and possibly changes to the logger
    86  * instantiation in log-vbox.c(pp).
    87  */
     67/* ... */
     68#endif
    8869
    8970
     
    152133*   Internal Functions                                                         *
    153134*******************************************************************************/
    154 static DECLCALLBACK(int)    supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser);
    155 static DECLCALLBACK(void)   supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser);
    156 static int                  supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
    157 static int                  supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
    158 static int                  supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
    159 static int                  supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
    160 static int                  supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
    161 static int                  supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt);
    162 static int                  supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
    163 static int                  supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq);
    164 static int                  supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt,void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
    165 static void                 supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt);
    166 static int                  supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
    167 static void                 supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
    168 DECLINLINE(int)             supdrvLdrLock(PSUPDRVDEVEXT pDevExt);
    169 DECLINLINE(int)             supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt);
    170 static int                  supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq);
    171 static int                  supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq);
    172 static int                  supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq);
    173 static int                  supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq);
    174 static int                  supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq);
    175 static int                  supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
    176 static void                 supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
    177135static DECLCALLBACK(void)   supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
    178136static DECLCALLBACK(void)   supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
     
    188146static int                  supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
    189147static int                  supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
    190 static int                  supdrvIOCtl_ResumeSuspendedKbds(void);
    191148
    192149
     
    196153DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
    197154
    198 
    199 /**
    200  * Array of the R0 SUP API.
    201  *
    202  * While making changes to these exports, make sure to update the IOC
    203  * minor version (SUPDRV_IOC_VERSION).
    204  */
    205 static SUPFUNC g_aFunctions[] =
    206 {
    207 /* SED: START */
    208     /* name                                     function */
    209         /* Entries with absolute addresses determined at runtime, fixup
    210            code makes ugly ASSUMPTIONS about the order here: */
    211     { "SUPR0AbsIs64bit",                        (void *)0 },
    212     { "SUPR0Abs64bitKernelCS",                  (void *)0 },
    213     { "SUPR0Abs64bitKernelSS",                  (void *)0 },
    214     { "SUPR0Abs64bitKernelDS",                  (void *)0 },
    215     { "SUPR0AbsKernelCS",                       (void *)0 },
    216     { "SUPR0AbsKernelSS",                       (void *)0 },
    217     { "SUPR0AbsKernelDS",                       (void *)0 },
    218     { "SUPR0AbsKernelES",                       (void *)0 },
    219     { "SUPR0AbsKernelFS",                       (void *)0 },
    220     { "SUPR0AbsKernelGS",                       (void *)0 },
    221         /* Normal function pointers: */
    222     { "g_pSUPGlobalInfoPage",                   (void *)&g_pSUPGlobalInfoPage },            /* SED: DATA */
    223     { "SUPGetGIP",                              (void *)SUPGetGIP },
    224     { "SUPReadTscWithDelta",                    (void *)SUPReadTscWithDelta },
    225     { "SUPGetTscDeltaSlow",                     (void *)SUPGetTscDeltaSlow },
    226     { "SUPGetCpuHzFromGipForAsyncMode",         (void *)SUPGetCpuHzFromGipForAsyncMode },
    227     { "SUPR0ComponentDeregisterFactory",        (void *)SUPR0ComponentDeregisterFactory },
    228     { "SUPR0ComponentQueryFactory",             (void *)SUPR0ComponentQueryFactory },
    229     { "SUPR0ComponentRegisterFactory",          (void *)SUPR0ComponentRegisterFactory },
    230     { "SUPR0ContAlloc",                         (void *)SUPR0ContAlloc },
    231     { "SUPR0ContFree",                          (void *)SUPR0ContFree },
    232     { "SUPR0EnableVTx",                         (void *)SUPR0EnableVTx },
    233     { "SUPR0SuspendVTxOnCpu",                   (void *)SUPR0SuspendVTxOnCpu },
    234     { "SUPR0ResumeVTxOnCpu",                    (void *)SUPR0ResumeVTxOnCpu },
    235     { "SUPR0GetKernelFeatures",                 (void *)SUPR0GetKernelFeatures },
    236     { "SUPR0GetPagingMode",                     (void *)SUPR0GetPagingMode },
    237     { "SUPR0GetSvmUsability",                   (void *)SUPR0GetSvmUsability },
    238     { "SUPR0GetVmxUsability",                   (void *)SUPR0GetVmxUsability },
    239     { "SUPR0LockMem",                           (void *)SUPR0LockMem },
    240     { "SUPR0LowAlloc",                          (void *)SUPR0LowAlloc },
    241     { "SUPR0LowFree",                           (void *)SUPR0LowFree },
    242     { "SUPR0MemAlloc",                          (void *)SUPR0MemAlloc },
    243     { "SUPR0MemFree",                           (void *)SUPR0MemFree },
    244     { "SUPR0MemGetPhys",                        (void *)SUPR0MemGetPhys },
    245     { "SUPR0ObjAddRef",                         (void *)SUPR0ObjAddRef },
    246     { "SUPR0ObjAddRefEx",                       (void *)SUPR0ObjAddRefEx },
    247     { "SUPR0ObjRegister",                       (void *)SUPR0ObjRegister },
    248     { "SUPR0ObjRelease",                        (void *)SUPR0ObjRelease },
    249     { "SUPR0ObjVerifyAccess",                   (void *)SUPR0ObjVerifyAccess },
    250     { "SUPR0PageAllocEx",                       (void *)SUPR0PageAllocEx },
    251     { "SUPR0PageFree",                          (void *)SUPR0PageFree },
    252     { "SUPR0Printf",                            (void *)SUPR0Printf },
    253     { "SUPR0TracerDeregisterDrv",               (void *)SUPR0TracerDeregisterDrv },
    254     { "SUPR0TracerDeregisterImpl",              (void *)SUPR0TracerDeregisterImpl },
    255     { "SUPR0TracerFireProbe",                   (void *)SUPR0TracerFireProbe },
    256     { "SUPR0TracerRegisterDrv",                 (void *)SUPR0TracerRegisterDrv },
    257     { "SUPR0TracerRegisterImpl",                (void *)SUPR0TracerRegisterImpl },
    258     { "SUPR0TracerRegisterModule",              (void *)SUPR0TracerRegisterModule },
    259     { "SUPR0TracerUmodProbeFire",               (void *)SUPR0TracerUmodProbeFire },
    260     { "SUPR0UnlockMem",                         (void *)SUPR0UnlockMem },
    261     { "SUPSemEventClose",                       (void *)SUPSemEventClose },
    262     { "SUPSemEventCreate",                      (void *)SUPSemEventCreate },
    263     { "SUPSemEventGetResolution",               (void *)SUPSemEventGetResolution },
    264     { "SUPSemEventMultiClose",                  (void *)SUPSemEventMultiClose },
    265     { "SUPSemEventMultiCreate",                 (void *)SUPSemEventMultiCreate },
    266     { "SUPSemEventMultiGetResolution",          (void *)SUPSemEventMultiGetResolution },
    267     { "SUPSemEventMultiReset",                  (void *)SUPSemEventMultiReset },
    268     { "SUPSemEventMultiSignal",                 (void *)SUPSemEventMultiSignal },
    269     { "SUPSemEventMultiWait",                   (void *)SUPSemEventMultiWait },
    270     { "SUPSemEventMultiWaitNoResume",           (void *)SUPSemEventMultiWaitNoResume },
    271     { "SUPSemEventMultiWaitNsAbsIntr",          (void *)SUPSemEventMultiWaitNsAbsIntr },
    272     { "SUPSemEventMultiWaitNsRelIntr",          (void *)SUPSemEventMultiWaitNsRelIntr },
    273     { "SUPSemEventSignal",                      (void *)SUPSemEventSignal },
    274     { "SUPSemEventWait",                        (void *)SUPSemEventWait },
    275     { "SUPSemEventWaitNoResume",                (void *)SUPSemEventWaitNoResume },
    276     { "SUPSemEventWaitNsAbsIntr",               (void *)SUPSemEventWaitNsAbsIntr },
    277     { "SUPSemEventWaitNsRelIntr",               (void *)SUPSemEventWaitNsRelIntr },
    278 
    279     { "RTAssertAreQuiet",                       (void *)RTAssertAreQuiet },
    280     { "RTAssertMayPanic",                       (void *)RTAssertMayPanic },
    281     { "RTAssertMsg1",                           (void *)RTAssertMsg1 },
    282     { "RTAssertMsg2AddV",                       (void *)RTAssertMsg2AddV },
    283     { "RTAssertMsg2V",                          (void *)RTAssertMsg2V },
    284     { "RTAssertSetMayPanic",                    (void *)RTAssertSetMayPanic },
    285     { "RTAssertSetQuiet",                       (void *)RTAssertSetQuiet },
    286     { "RTCrc32",                                (void *)RTCrc32 },
    287     { "RTCrc32Finish",                          (void *)RTCrc32Finish },
    288     { "RTCrc32Process",                         (void *)RTCrc32Process },
    289     { "RTCrc32Start",                           (void *)RTCrc32Start },
    290     { "RTErrConvertFromErrno",                  (void *)RTErrConvertFromErrno },
    291     { "RTErrConvertToErrno",                    (void *)RTErrConvertToErrno },
    292     { "RTHandleTableAllocWithCtx",              (void *)RTHandleTableAllocWithCtx },
    293     { "RTHandleTableCreate",                    (void *)RTHandleTableCreate },
    294     { "RTHandleTableCreateEx",                  (void *)RTHandleTableCreateEx },
    295     { "RTHandleTableDestroy",                   (void *)RTHandleTableDestroy },
    296     { "RTHandleTableFreeWithCtx",               (void *)RTHandleTableFreeWithCtx },
    297     { "RTHandleTableLookupWithCtx",             (void *)RTHandleTableLookupWithCtx },
    298     { "RTLogDefaultInstance",                   (void *)RTLogDefaultInstance },
    299     { "RTLogGetDefaultInstance",                (void *)RTLogGetDefaultInstance },
    300     { "RTLogLoggerExV",                         (void *)RTLogLoggerExV },
    301     { "RTLogPrintfV",                           (void *)RTLogPrintfV },
    302     { "RTLogRelDefaultInstance",                (void *)RTLogRelDefaultInstance },
    303     { "RTLogSetDefaultInstanceThread",          (void *)RTLogSetDefaultInstanceThread },
    304     { "RTMemAllocExTag",                        (void *)RTMemAllocExTag },
    305     { "RTMemAllocTag",                          (void *)RTMemAllocTag },
    306     { "RTMemAllocVarTag",                       (void *)RTMemAllocVarTag },
    307     { "RTMemAllocZTag",                         (void *)RTMemAllocZTag },
    308     { "RTMemAllocZVarTag",                      (void *)RTMemAllocZVarTag },
    309     { "RTMemDupExTag",                          (void *)RTMemDupExTag },
    310     { "RTMemDupTag",                            (void *)RTMemDupTag },
    311     { "RTMemFree",                              (void *)RTMemFree },
    312     { "RTMemFreeEx",                            (void *)RTMemFreeEx },
    313     { "RTMemReallocTag",                        (void *)RTMemReallocTag },
    314     { "RTMpCpuId",                              (void *)RTMpCpuId },
    315     { "RTMpCpuIdFromSetIndex",                  (void *)RTMpCpuIdFromSetIndex },
    316     { "RTMpCpuIdToSetIndex",                    (void *)RTMpCpuIdToSetIndex },
    317     { "RTMpGetArraySize",                       (void *)RTMpGetArraySize },
    318     { "RTMpGetCount",                           (void *)RTMpGetCount },
    319     { "RTMpGetMaxCpuId",                        (void *)RTMpGetMaxCpuId },
    320     { "RTMpGetOnlineCount",                     (void *)RTMpGetOnlineCount },
    321     { "RTMpGetOnlineSet",                       (void *)RTMpGetOnlineSet },
    322     { "RTMpGetSet",                             (void *)RTMpGetSet },
    323     { "RTMpIsCpuOnline",                        (void *)RTMpIsCpuOnline },
    324     { "RTMpIsCpuPossible",                      (void *)RTMpIsCpuPossible },
    325     { "RTMpIsCpuWorkPending",                   (void *)RTMpIsCpuWorkPending },
    326     { "RTMpNotificationDeregister",             (void *)RTMpNotificationDeregister },
    327     { "RTMpNotificationRegister",               (void *)RTMpNotificationRegister },
    328     { "RTMpOnAll",                              (void *)RTMpOnAll },
    329     { "RTMpOnOthers",                           (void *)RTMpOnOthers },
    330     { "RTMpOnSpecific",                         (void *)RTMpOnSpecific },
    331     { "RTMpPokeCpu",                            (void *)RTMpPokeCpu },
    332     { "RTNetIPv4AddDataChecksum",               (void *)RTNetIPv4AddDataChecksum },
    333     { "RTNetIPv4AddTCPChecksum",                (void *)RTNetIPv4AddTCPChecksum },
    334     { "RTNetIPv4AddUDPChecksum",                (void *)RTNetIPv4AddUDPChecksum },
    335     { "RTNetIPv4FinalizeChecksum",              (void *)RTNetIPv4FinalizeChecksum },
    336     { "RTNetIPv4HdrChecksum",                   (void *)RTNetIPv4HdrChecksum },
    337     { "RTNetIPv4IsDHCPValid",                   (void *)RTNetIPv4IsDHCPValid },
    338     { "RTNetIPv4IsHdrValid",                    (void *)RTNetIPv4IsHdrValid },
    339     { "RTNetIPv4IsTCPSizeValid",                (void *)RTNetIPv4IsTCPSizeValid },
    340     { "RTNetIPv4IsTCPValid",                    (void *)RTNetIPv4IsTCPValid },
    341     { "RTNetIPv4IsUDPSizeValid",                (void *)RTNetIPv4IsUDPSizeValid },
    342     { "RTNetIPv4IsUDPValid",                    (void *)RTNetIPv4IsUDPValid },
    343     { "RTNetIPv4PseudoChecksum",                (void *)RTNetIPv4PseudoChecksum },
    344     { "RTNetIPv4PseudoChecksumBits",            (void *)RTNetIPv4PseudoChecksumBits },
    345     { "RTNetIPv4TCPChecksum",                   (void *)RTNetIPv4TCPChecksum },
    346     { "RTNetIPv4UDPChecksum",                   (void *)RTNetIPv4UDPChecksum },
    347     { "RTNetIPv6PseudoChecksum",                (void *)RTNetIPv6PseudoChecksum },
    348     { "RTNetIPv6PseudoChecksumBits",            (void *)RTNetIPv6PseudoChecksumBits },
    349     { "RTNetIPv6PseudoChecksumEx",              (void *)RTNetIPv6PseudoChecksumEx },
    350     { "RTNetTCPChecksum",                       (void *)RTNetTCPChecksum },
    351     { "RTNetUDPChecksum",                       (void *)RTNetUDPChecksum },
    352     { "RTPowerNotificationDeregister",          (void *)RTPowerNotificationDeregister },
    353     { "RTPowerNotificationRegister",            (void *)RTPowerNotificationRegister },
    354     { "RTProcSelf",                             (void *)RTProcSelf },
    355     { "RTR0AssertPanicSystem",                  (void *)RTR0AssertPanicSystem },
    356     { "RTR0MemAreKrnlAndUsrDifferent",          (void *)RTR0MemAreKrnlAndUsrDifferent },
    357     { "RTR0MemKernelIsValidAddr",               (void *)RTR0MemKernelIsValidAddr },
    358     { "RTR0MemKernelCopyFrom",                  (void *)RTR0MemKernelCopyFrom },
    359     { "RTR0MemKernelCopyTo",                    (void *)RTR0MemKernelCopyTo },
    360     { "RTR0MemObjAddress",                      (void *)RTR0MemObjAddress },
    361     { "RTR0MemObjAddressR3",                    (void *)RTR0MemObjAddressR3 },
    362     { "RTR0MemObjAllocContTag",                 (void *)RTR0MemObjAllocContTag },
    363     { "RTR0MemObjAllocLowTag",                  (void *)RTR0MemObjAllocLowTag },
    364     { "RTR0MemObjAllocPageTag",                 (void *)RTR0MemObjAllocPageTag },
    365     { "RTR0MemObjAllocPhysExTag",               (void *)RTR0MemObjAllocPhysExTag },
    366     { "RTR0MemObjAllocPhysNCTag",               (void *)RTR0MemObjAllocPhysNCTag },
    367     { "RTR0MemObjAllocPhysTag",                 (void *)RTR0MemObjAllocPhysTag },
    368     { "RTR0MemObjEnterPhysTag",                 (void *)RTR0MemObjEnterPhysTag },
    369     { "RTR0MemObjFree",                         (void *)RTR0MemObjFree },
    370     { "RTR0MemObjGetPagePhysAddr",              (void *)RTR0MemObjGetPagePhysAddr },
    371     { "RTR0MemObjIsMapping",                    (void *)RTR0MemObjIsMapping },
    372     { "RTR0MemObjLockUserTag",                  (void *)RTR0MemObjLockUserTag },
    373     { "RTR0MemObjMapKernelExTag",               (void *)RTR0MemObjMapKernelExTag },
    374     { "RTR0MemObjMapKernelTag",                 (void *)RTR0MemObjMapKernelTag },
    375     { "RTR0MemObjMapUserTag",                   (void *)RTR0MemObjMapUserTag },
    376     { "RTR0MemObjProtect",                      (void *)RTR0MemObjProtect },
    377     { "RTR0MemObjSize",                         (void *)RTR0MemObjSize },
    378     { "RTR0MemUserCopyFrom",                    (void *)RTR0MemUserCopyFrom },
    379     { "RTR0MemUserCopyTo",                      (void *)RTR0MemUserCopyTo },
    380     { "RTR0MemUserIsValidAddr",                 (void *)RTR0MemUserIsValidAddr },
    381     { "RTR0ProcHandleSelf",                     (void *)RTR0ProcHandleSelf },
    382     { "RTSemEventCreate",                       (void *)RTSemEventCreate },
    383     { "RTSemEventDestroy",                      (void *)RTSemEventDestroy },
    384     { "RTSemEventGetResolution",                (void *)RTSemEventGetResolution },
    385     { "RTSemEventMultiCreate",                  (void *)RTSemEventMultiCreate },
    386     { "RTSemEventMultiDestroy",                 (void *)RTSemEventMultiDestroy },
    387     { "RTSemEventMultiGetResolution",           (void *)RTSemEventMultiGetResolution },
    388     { "RTSemEventMultiReset",                   (void *)RTSemEventMultiReset },
    389     { "RTSemEventMultiSignal",                  (void *)RTSemEventMultiSignal },
    390     { "RTSemEventMultiWait",                    (void *)RTSemEventMultiWait },
    391     { "RTSemEventMultiWaitEx",                  (void *)RTSemEventMultiWaitEx },
    392     { "RTSemEventMultiWaitExDebug",             (void *)RTSemEventMultiWaitExDebug },
    393     { "RTSemEventMultiWaitNoResume",            (void *)RTSemEventMultiWaitNoResume },
    394     { "RTSemEventSignal",                       (void *)RTSemEventSignal },
    395     { "RTSemEventWait",                         (void *)RTSemEventWait },
    396     { "RTSemEventWaitEx",                       (void *)RTSemEventWaitEx },
    397     { "RTSemEventWaitExDebug",                  (void *)RTSemEventWaitExDebug },
    398     { "RTSemEventWaitNoResume",                 (void *)RTSemEventWaitNoResume },
    399     { "RTSemFastMutexCreate",                   (void *)RTSemFastMutexCreate },
    400     { "RTSemFastMutexDestroy",                  (void *)RTSemFastMutexDestroy },
    401     { "RTSemFastMutexRelease",                  (void *)RTSemFastMutexRelease },
    402     { "RTSemFastMutexRequest",                  (void *)RTSemFastMutexRequest },
    403     { "RTSemMutexCreate",                       (void *)RTSemMutexCreate },
    404     { "RTSemMutexDestroy",                      (void *)RTSemMutexDestroy },
    405     { "RTSemMutexRelease",                      (void *)RTSemMutexRelease },
    406     { "RTSemMutexRequest",                      (void *)RTSemMutexRequest },
    407     { "RTSemMutexRequestDebug",                 (void *)RTSemMutexRequestDebug },
    408     { "RTSemMutexRequestNoResume",              (void *)RTSemMutexRequestNoResume },
    409     { "RTSemMutexRequestNoResumeDebug",         (void *)RTSemMutexRequestNoResumeDebug },
    410     { "RTSpinlockAcquire",                      (void *)RTSpinlockAcquire },
    411     { "RTSpinlockCreate",                       (void *)RTSpinlockCreate },
    412     { "RTSpinlockDestroy",                      (void *)RTSpinlockDestroy },
    413     { "RTSpinlockRelease",                      (void *)RTSpinlockRelease },
    414     { "RTStrCopy",                              (void *)RTStrCopy },
    415     { "RTStrDupTag",                            (void *)RTStrDupTag },
    416     { "RTStrFormat",                            (void *)RTStrFormat },
    417     { "RTStrFormatNumber",                      (void *)RTStrFormatNumber },
    418     { "RTStrFormatTypeDeregister",              (void *)RTStrFormatTypeDeregister },
    419     { "RTStrFormatTypeRegister",                (void *)RTStrFormatTypeRegister },
    420     { "RTStrFormatTypeSetUser",                 (void *)RTStrFormatTypeSetUser },
    421     { "RTStrFormatV",                           (void *)RTStrFormatV },
    422     { "RTStrFree",                              (void *)RTStrFree },
    423     { "RTStrNCmp",                              (void *)RTStrNCmp },
    424     { "RTStrPrintf",                            (void *)RTStrPrintf },
    425     { "RTStrPrintfEx",                          (void *)RTStrPrintfEx },
    426     { "RTStrPrintfExV",                         (void *)RTStrPrintfExV },
    427     { "RTStrPrintfV",                           (void *)RTStrPrintfV },
    428     { "RTThreadCreate",                         (void *)RTThreadCreate },
    429     { "RTThreadCtxHooksAreRegistered",          (void *)RTThreadCtxHooksAreRegistered },
    430     { "RTThreadCtxHooksCreate",                 (void *)RTThreadCtxHooksCreate },
    431     { "RTThreadCtxHooksDeregister",             (void *)RTThreadCtxHooksDeregister },
    432     { "RTThreadCtxHooksRegister",               (void *)RTThreadCtxHooksRegister },
    433     { "RTThreadCtxHooksRelease",                (void *)RTThreadCtxHooksRelease },
    434     { "RTThreadCtxHooksRetain",                 (void *)RTThreadCtxHooksRetain },
    435     { "RTThreadGetName",                        (void *)RTThreadGetName },
    436     { "RTThreadGetNative",                      (void *)RTThreadGetNative },
    437     { "RTThreadGetType",                        (void *)RTThreadGetType },
    438     { "RTThreadIsInInterrupt",                  (void *)RTThreadIsInInterrupt },
    439     { "RTThreadNativeSelf",                     (void *)RTThreadNativeSelf },
    440     { "RTThreadPreemptDisable",                 (void *)RTThreadPreemptDisable },
    441     { "RTThreadPreemptIsEnabled",               (void *)RTThreadPreemptIsEnabled },
    442     { "RTThreadPreemptIsPending",               (void *)RTThreadPreemptIsPending },
    443     { "RTThreadPreemptIsPendingTrusty",         (void *)RTThreadPreemptIsPendingTrusty },
    444     { "RTThreadPreemptIsPossible",              (void *)RTThreadPreemptIsPossible },
    445     { "RTThreadPreemptRestore",                 (void *)RTThreadPreemptRestore },
    446     { "RTThreadSelf",                           (void *)RTThreadSelf },
    447     { "RTThreadSelfName",                       (void *)RTThreadSelfName },
    448     { "RTThreadSleep",                          (void *)RTThreadSleep },
    449     { "RTThreadUserReset",                      (void *)RTThreadUserReset },
    450     { "RTThreadUserSignal",                     (void *)RTThreadUserSignal },
    451     { "RTThreadUserWait",                       (void *)RTThreadUserWait },
    452     { "RTThreadUserWaitNoResume",               (void *)RTThreadUserWaitNoResume },
    453     { "RTThreadWait",                           (void *)RTThreadWait },
    454     { "RTThreadWaitNoResume",                   (void *)RTThreadWaitNoResume },
    455     { "RTThreadYield",                          (void *)RTThreadYield },
    456     { "RTTimeMilliTS",                          (void *)RTTimeMilliTS },
    457     { "RTTimeNanoTS",                           (void *)RTTimeNanoTS },
    458     { "RTTimeNow",                              (void *)RTTimeNow },
    459     { "RTTimerCanDoHighResolution",             (void *)RTTimerCanDoHighResolution },
    460     { "RTTimerChangeInterval",                  (void *)RTTimerChangeInterval },
    461     { "RTTimerCreate",                          (void *)RTTimerCreate },
    462     { "RTTimerCreateEx",                        (void *)RTTimerCreateEx },
    463     { "RTTimerDestroy",                         (void *)RTTimerDestroy },
    464     { "RTTimerGetSystemGranularity",            (void *)RTTimerGetSystemGranularity },
    465     { "RTTimerReleaseSystemGranularity",        (void *)RTTimerReleaseSystemGranularity },
    466     { "RTTimerRequestSystemGranularity",        (void *)RTTimerRequestSystemGranularity },
    467     { "RTTimerStart",                           (void *)RTTimerStart },
    468     { "RTTimerStop",                            (void *)RTTimerStop },
    469     { "RTTimeSystemMilliTS",                    (void *)RTTimeSystemMilliTS },
    470     { "RTTimeSystemNanoTS",                     (void *)RTTimeSystemNanoTS },
    471     { "RTUuidCompare",                          (void *)RTUuidCompare },
    472     { "RTUuidCompareStr",                       (void *)RTUuidCompareStr },
    473     { "RTUuidFromStr",                          (void *)RTUuidFromStr },
    474 /* SED: END */
    475 };
    476 
    477 #if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
    478 /**
    479  * Drag in the rest of IRPT since we share it with the
    480  * rest of the kernel modules on darwin.
    481  */
    482 PFNRT g_apfnVBoxDrvIPRTDeps[] =
    483 {
    484     /* VBoxNetAdp */
    485     (PFNRT)RTRandBytes,
    486     /* VBoxUSB */
    487     (PFNRT)RTPathStripFilename,
    488     NULL
    489 };
    490 #endif  /* RT_OS_DARWIN || RT_OS_SOLARIS || RT_OS_SOLARIS */
    491 
    492 
    493 /**
    494  * Initializes the device extentsion structure.
    495  *
    496  * @returns IPRT status code.
    497  * @param   pDevExt     The device extension to initialize.
    498  * @param   cbSession   The size of the session structure.  The size of
    499  *                      SUPDRVSESSION may be smaller when SUPDRV_AGNOSTIC is
    500  *                      defined because we're skipping the OS specific members
    501  *                      then.
    502  */
    503 int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt, size_t cbSession)
    504 {
    505     int rc;
    506 
    507 #ifdef SUPDRV_WITH_RELEASE_LOGGER
    508     /*
    509      * Create the release log.
    510      */
    511     static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
    512     PRTLOGGER pRelLogger;
    513     rc = RTLogCreate(&pRelLogger, 0 /* fFlags */, "all",
    514                      "VBOX_RELEASE_LOG", RT_ELEMENTS(s_apszGroups), s_apszGroups, RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER, NULL);
    515     if (RT_SUCCESS(rc))
    516         RTLogRelSetDefaultInstance(pRelLogger);
    517     /** @todo Add native hook for getting logger config parameters and setting
    518      *        them. On linux we should use the module parameter stuff... */
    519 #endif
    520 
    521     /*
    522      * Initialize it.
    523      */
    524     memset(pDevExt, 0, sizeof(*pDevExt)); /* Does not wipe OS specific tail section of the structure. */
    525     pDevExt->Spinlock = NIL_RTSPINLOCK;
    526     pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
    527     pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
    528     pDevExt->idTscDeltaInitiator = NIL_RTCPUID;
    529     rc = RTSpinlockCreate(&pDevExt->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvDevExt");
    530     if (RT_SUCCESS(rc))
    531         rc = RTSpinlockCreate(&pDevExt->hGipSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvGip");
    532     if (RT_SUCCESS(rc))
    533         rc = RTSpinlockCreate(&pDevExt->hSessionHashTabSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "SUPDrvSession");
    534 
    535     if (RT_SUCCESS(rc))
    536 #ifdef SUPDRV_USE_MUTEX_FOR_LDR
    537         rc = RTSemMutexCreate(&pDevExt->mtxLdr);
    538 #else
    539         rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
    540 #endif
    541     if (RT_SUCCESS(rc))
    542     {
    543         rc = RTSemFastMutexCreate(&pDevExt->mtxComponentFactory);
    544         if (RT_SUCCESS(rc))
    545         {
    546 #ifdef SUPDRV_USE_MUTEX_FOR_LDR
    547             rc = RTSemMutexCreate(&pDevExt->mtxGip);
    548 #else
    549             rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
    550 #endif
    551             if (RT_SUCCESS(rc))
    552             {
    553                 rc = supdrvGipCreate(pDevExt);
    554                 if (RT_SUCCESS(rc))
    555                 {
    556                     rc = supdrvTracerInit(pDevExt);
    557                     if (RT_SUCCESS(rc))
    558                     {
    559                         pDevExt->pLdrInitImage  = NULL;
    560                         pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
    561                         pDevExt->u32Cookie      = BIRD;  /** @todo make this random? */
    562                         pDevExt->cbSession      = (uint32_t)cbSession;
    563 
    564                         /*
    565                          * Fixup the absolute symbols.
    566                          *
    567                          * Because of the table indexing assumptions we'll have a little #ifdef orgy
    568                          * here rather than distributing this to OS specific files. At least for now.
    569                          */
    570 #ifdef RT_OS_DARWIN
    571 # if ARCH_BITS == 32
    572                         if (SUPR0GetPagingMode() >= SUPPAGINGMODE_AMD64)
    573                         {
    574                             g_aFunctions[0].pfn = (void *)1;                    /* SUPR0AbsIs64bit */
    575                             g_aFunctions[1].pfn = (void *)0x80;                 /* SUPR0Abs64bitKernelCS - KERNEL64_CS, seg.h */
    576                             g_aFunctions[2].pfn = (void *)0x88;                 /* SUPR0Abs64bitKernelSS - KERNEL64_SS, seg.h */
    577                             g_aFunctions[3].pfn = (void *)0x88;                 /* SUPR0Abs64bitKernelDS - KERNEL64_SS, seg.h */
    578                         }
    579                         else
    580                             g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
    581                         g_aFunctions[4].pfn = (void *)0x08;                     /* SUPR0AbsKernelCS - KERNEL_CS, seg.h */
    582                         g_aFunctions[5].pfn = (void *)0x10;                     /* SUPR0AbsKernelSS - KERNEL_DS, seg.h */
    583                         g_aFunctions[6].pfn = (void *)0x10;                     /* SUPR0AbsKernelDS - KERNEL_DS, seg.h */
    584                         g_aFunctions[7].pfn = (void *)0x10;                     /* SUPR0AbsKernelES - KERNEL_DS, seg.h */
    585                         g_aFunctions[8].pfn = (void *)0x10;                     /* SUPR0AbsKernelFS - KERNEL_DS, seg.h */
    586                         g_aFunctions[9].pfn = (void *)0x48;                     /* SUPR0AbsKernelGS - CPU_DATA_GS, seg.h */
    587 # else /* 64-bit darwin: */
    588                         g_aFunctions[0].pfn = (void *)1;                        /* SUPR0AbsIs64bit */
    589                         g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS();    /* SUPR0Abs64bitKernelCS */
    590                         g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS();    /* SUPR0Abs64bitKernelSS */
    591                         g_aFunctions[3].pfn = (void *)0;                        /* SUPR0Abs64bitKernelDS */
    592                         g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS();    /* SUPR0AbsKernelCS */
    593                         g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS();    /* SUPR0AbsKernelSS */
    594                         g_aFunctions[6].pfn = (void *)0;                        /* SUPR0AbsKernelDS */
    595                         g_aFunctions[7].pfn = (void *)0;                        /* SUPR0AbsKernelES */
    596                         g_aFunctions[8].pfn = (void *)0;                        /* SUPR0AbsKernelFS */
    597                         g_aFunctions[9].pfn = (void *)0;                        /* SUPR0AbsKernelGS */
    598 
    599 # endif
    600 #else  /* !RT_OS_DARWIN */
    601 # if ARCH_BITS == 64
    602                         g_aFunctions[0].pfn = (void *)1;                        /* SUPR0AbsIs64bit */
    603                         g_aFunctions[1].pfn = (void *)(uintptr_t)ASMGetCS();    /* SUPR0Abs64bitKernelCS */
    604                         g_aFunctions[2].pfn = (void *)(uintptr_t)ASMGetSS();    /* SUPR0Abs64bitKernelSS */
    605                         g_aFunctions[3].pfn = (void *)(uintptr_t)ASMGetDS();    /* SUPR0Abs64bitKernelDS */
    606 # else
    607                         g_aFunctions[0].pfn = g_aFunctions[1].pfn = g_aFunctions[2].pfn = g_aFunctions[4].pfn = (void *)0;
    608 # endif
    609                         g_aFunctions[4].pfn = (void *)(uintptr_t)ASMGetCS();    /* SUPR0AbsKernelCS */
    610                         g_aFunctions[5].pfn = (void *)(uintptr_t)ASMGetSS();    /* SUPR0AbsKernelSS */
    611                         g_aFunctions[6].pfn = (void *)(uintptr_t)ASMGetDS();    /* SUPR0AbsKernelDS */
    612                         g_aFunctions[7].pfn = (void *)(uintptr_t)ASMGetES();    /* SUPR0AbsKernelES */
    613                         g_aFunctions[8].pfn = (void *)(uintptr_t)ASMGetFS();    /* SUPR0AbsKernelFS */
    614                         g_aFunctions[9].pfn = (void *)(uintptr_t)ASMGetGS();    /* SUPR0AbsKernelGS */
    615 #endif /* !RT_OS_DARWIN */
    616                         return VINF_SUCCESS;
    617                     }
    618 
    619                     supdrvGipDestroy(pDevExt);
    620                 }
    621 
    622 #ifdef SUPDRV_USE_MUTEX_FOR_GIP
    623                 RTSemMutexDestroy(pDevExt->mtxGip);
    624                 pDevExt->mtxGip = NIL_RTSEMMUTEX;
    625 #else
    626                 RTSemFastMutexDestroy(pDevExt->mtxGip);
    627                 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
    628 #endif
    629             }
    630             RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
    631             pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
    632         }
    633 #ifdef SUPDRV_USE_MUTEX_FOR_LDR
    634         RTSemMutexDestroy(pDevExt->mtxLdr);
    635         pDevExt->mtxLdr = NIL_RTSEMMUTEX;
    636 #else
    637         RTSemFastMutexDestroy(pDevExt->mtxLdr);
    638         pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
    639 #endif
    640     }
    641 
    642     RTSpinlockDestroy(pDevExt->Spinlock);
    643     pDevExt->Spinlock = NIL_RTSPINLOCK;
    644     RTSpinlockDestroy(pDevExt->hGipSpinlock);
    645     pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
    646     RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
    647     pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
    648 
    649 #ifdef SUPDRV_WITH_RELEASE_LOGGER
    650     RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
    651     RTLogDestroy(RTLogSetDefaultInstance(NULL));
    652 #endif
    653 
    654     return rc;
    655 }
    656 
    657 
    658 /**
    659  * Delete the device extension (e.g. cleanup members).
    660  *
    661  * @param   pDevExt     The device extension to delete.
    662  */
    663 void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
    664 {
    665     PSUPDRVOBJ          pObj;
    666     PSUPDRVUSAGE        pUsage;
    667 
    668     /*
    669      * Kill mutexes and spinlocks.
    670      */
    671 #ifdef SUPDRV_USE_MUTEX_FOR_GIP
    672     RTSemMutexDestroy(pDevExt->mtxGip);
    673     pDevExt->mtxGip = NIL_RTSEMMUTEX;
    674 #else
    675     RTSemFastMutexDestroy(pDevExt->mtxGip);
    676     pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
    677 #endif
    678 #ifdef SUPDRV_USE_MUTEX_FOR_LDR
    679     RTSemMutexDestroy(pDevExt->mtxLdr);
    680     pDevExt->mtxLdr = NIL_RTSEMMUTEX;
    681 #else
    682     RTSemFastMutexDestroy(pDevExt->mtxLdr);
    683     pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
    684 #endif
    685     RTSpinlockDestroy(pDevExt->Spinlock);
    686     pDevExt->Spinlock = NIL_RTSPINLOCK;
    687     RTSemFastMutexDestroy(pDevExt->mtxComponentFactory);
    688     pDevExt->mtxComponentFactory = NIL_RTSEMFASTMUTEX;
    689     RTSpinlockDestroy(pDevExt->hSessionHashTabSpinlock);
    690     pDevExt->hSessionHashTabSpinlock = NIL_RTSPINLOCK;
    691 
    692     /*
    693      * Free lists.
    694      */
    695     /* objects. */
    696     pObj = pDevExt->pObjs;
    697     Assert(!pObj);                      /* (can trigger on forced unloads) */
    698     pDevExt->pObjs = NULL;
    699     while (pObj)
    700     {
    701         void *pvFree = pObj;
    702         pObj = pObj->pNext;
    703         RTMemFree(pvFree);
    704     }
    705 
    706     /* usage records. */
    707     pUsage = pDevExt->pUsageFree;
    708     pDevExt->pUsageFree = NULL;
    709     while (pUsage)
    710     {
    711         void *pvFree = pUsage;
    712         pUsage = pUsage->pNext;
    713         RTMemFree(pvFree);
    714     }
    715 
    716     /* kill the GIP. */
    717     supdrvGipDestroy(pDevExt);
    718     RTSpinlockDestroy(pDevExt->hGipSpinlock);
    719     pDevExt->hGipSpinlock = NIL_RTSPINLOCK;
    720 
    721     supdrvTracerTerm(pDevExt);
    722 
    723 #ifdef SUPDRV_WITH_RELEASE_LOGGER
    724     /* destroy the loggers. */
    725     RTLogDestroy(RTLogRelSetDefaultInstance(NULL));
    726     RTLogDestroy(RTLogSetDefaultInstance(NULL));
    727 #endif
    728 }
    729 
    730 
    731 /**
    732  * Create session.
    733  *
    734  * @returns IPRT status code.
    735  * @param   pDevExt         Device extension.
    736  * @param   fUser           Flag indicating whether this is a user or kernel
    737  *                          session.
    738  * @param   fUnrestricted   Unrestricted access (system) or restricted access
    739  *                          (user)?
    740  * @param   ppSession       Where to store the pointer to the session data.
    741  */
    742 int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, bool fUser, bool fUnrestricted, PSUPDRVSESSION *ppSession)
    743 {
    744     int             rc;
    745     PSUPDRVSESSION  pSession;
    746 
    747     if (!SUP_IS_DEVEXT_VALID(pDevExt))
    748         return VERR_INVALID_PARAMETER;
    749 
    750     /*
    751      * Allocate memory for the session data.
    752      */
    753     pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(pDevExt->cbSession);
    754     if (pSession)
    755     {
    756         /* Initialize session data. */
    757         rc = RTSpinlockCreate(&pSession->Spinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "SUPDrvSession");
    758         if (!rc)
    759         {
    760             rc = RTHandleTableCreateEx(&pSession->hHandleTable,
    761                                        RTHANDLETABLE_FLAGS_LOCKED_IRQ_SAFE | RTHANDLETABLE_FLAGS_CONTEXT,
    762                                        1 /*uBase*/, 32768 /*cMax*/, supdrvSessionObjHandleRetain, pSession);
    763             if (RT_SUCCESS(rc))
    764             {
    765                 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
    766                 pSession->pDevExt           = pDevExt;
    767                 pSession->u32Cookie         = BIRD_INV;
    768                 pSession->fUnrestricted     = fUnrestricted;
    769                 /*pSession->fInHashTable      = false; */
    770                 pSession->cRefs             = 1;
    771                 /*pSession->pCommonNextHash   = NULL;
    772                 pSession->ppOsSessionPtr    = NULL; */
    773                 if (fUser)
    774                 {
    775                     pSession->Process       = RTProcSelf();
    776                     pSession->R0Process     = RTR0ProcHandleSelf();
    777                 }
    778                 else
    779                 {
    780                     pSession->Process       = NIL_RTPROCESS;
    781                     pSession->R0Process     = NIL_RTR0PROCESS;
    782                 }
    783                 /*pSession->pLdrUsage         = NULL;
    784                 pSession->pVM               = NULL;
    785                 pSession->pUsage            = NULL;
    786                 pSession->pGip              = NULL;
    787                 pSession->fGipReferenced    = false;
    788                 pSession->Bundle.cUsed      = 0; */
    789                 pSession->Uid               = NIL_RTUID;
    790                 pSession->Gid               = NIL_RTGID;
    791                 /*pSession->uTracerData       = 0;*/
    792                 pSession->hTracerCaller     = NIL_RTNATIVETHREAD;
    793                 RTListInit(&pSession->TpProviders);
    794                 /*pSession->cTpProviders      = 0;*/
    795                 /*pSession->cTpProbesFiring   = 0;*/
    796                 RTListInit(&pSession->TpUmods);
    797                 /*RT_ZERO(pSession->apTpLookupTable);*/
    798 
    799                 VBOXDRV_SESSION_CREATE(pSession, fUser);
    800                 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
    801                 return VINF_SUCCESS;
    802             }
    803 
    804             RTSpinlockDestroy(pSession->Spinlock);
    805         }
    806         RTMemFree(pSession);
    807         *ppSession = NULL;
    808         Log(("Failed to create spinlock, rc=%d!\n", rc));
    809     }
    810     else
    811         rc = VERR_NO_MEMORY;
    812 
    813     return rc;
    814 }
    815 
    816 
    817 /**
    818  * Cleans up the session in the context of the process to which it belongs, the
    819  * caller will free the session and the session spinlock.
    820  *
    821  * This should normally occur when the session is closed or as the process
    822  * exits.  Careful reference counting in the OS specfic code makes sure that
    823  * there cannot be any races between process/handle cleanup callbacks and
    824  * threads doing I/O control calls.
    825  *
    826  * @param   pDevExt     The device extension.
    827  * @param   pSession    Session data.
    828  */
    829 static void supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
    830 {
    831     int                 rc;
    832     PSUPDRVBUNDLE       pBundle;
    833     LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
    834 
    835     Assert(!pSession->fInHashTable);
    836     Assert(!pSession->ppOsSessionPtr);
    837     AssertReleaseMsg(pSession->R0Process == RTR0ProcHandleSelf() || pSession->R0Process == NIL_RTR0PROCESS,
    838                      ("R0Process=%p cur=%p; Process=%u curpid=%u\n", RTR0ProcHandleSelf(), RTProcSelf()));
    839 
    840     /*
    841      * Remove logger instances related to this session.
    842      */
    843     RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
    844 
    845     /*
    846      * Destroy the handle table.
    847      */
    848     rc = RTHandleTableDestroy(pSession->hHandleTable, supdrvSessionObjHandleDelete, pSession);
    849     AssertRC(rc);
    850     pSession->hHandleTable = NIL_RTHANDLETABLE;
    851 
    852     /*
    853      * Release object references made in this session.
    854      * In theory there should be noone racing us in this session.
    855      */
    856     Log2(("release objects - start\n"));
    857     if (pSession->pUsage)
    858     {
    859         PSUPDRVUSAGE    pUsage;
    860         RTSpinlockAcquire(pDevExt->Spinlock);
    861 
    862         while ((pUsage = pSession->pUsage) != NULL)
    863         {
    864             PSUPDRVOBJ  pObj = pUsage->pObj;
    865             pSession->pUsage = pUsage->pNext;
    866 
    867             AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
    868             if (pUsage->cUsage < pObj->cUsage)
    869             {
    870                 pObj->cUsage -= pUsage->cUsage;
    871                 RTSpinlockRelease(pDevExt->Spinlock);
    872             }
    873             else
    874             {
    875                 /* Destroy the object and free the record. */
    876                 if (pDevExt->pObjs == pObj)
    877                     pDevExt->pObjs = pObj->pNext;
    878                 else
    879                 {
    880                     PSUPDRVOBJ pObjPrev;
    881                     for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
    882                         if (pObjPrev->pNext == pObj)
    883                         {
    884                             pObjPrev->pNext = pObj->pNext;
    885                             break;
    886                         }
    887                     Assert(pObjPrev);
    888                 }
    889                 RTSpinlockRelease(pDevExt->Spinlock);
    890 
    891                 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
    892                      pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
    893                 if (pObj->pfnDestructor)
    894                     pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
    895                 RTMemFree(pObj);
    896             }
    897 
    898             /* free it and continue. */
    899             RTMemFree(pUsage);
    900 
    901             RTSpinlockAcquire(pDevExt->Spinlock);
    902         }
    903 
    904         RTSpinlockRelease(pDevExt->Spinlock);
    905         AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
    906     }
    907     Log2(("release objects - done\n"));
    908 
    909     /*
    910      * Do tracer cleanups related to this session.
    911      */
    912     Log2(("release tracer stuff - start\n"));
    913     supdrvTracerCleanupSession(pDevExt, pSession);
    914     Log2(("release tracer stuff - end\n"));
    915 
    916     /*
    917      * Release memory allocated in the session.
    918      *
    919      * We do not serialize this as we assume that the application will
    920      * not allocated memory while closing the file handle object.
    921      */
    922     Log2(("freeing memory:\n"));
    923     pBundle = &pSession->Bundle;
    924     while (pBundle)
    925     {
    926         PSUPDRVBUNDLE   pToFree;
    927         unsigned        i;
    928 
    929         /*
    930          * Check and unlock all entries in the bundle.
    931          */
    932         for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
    933         {
    934             if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
    935             {
    936                 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
    937                       (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
    938                 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
    939                 {
    940                     rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
    941                     AssertRC(rc); /** @todo figure out how to handle this. */
    942                     pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
    943                 }
    944                 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, true /* fFreeMappings */);
    945                 AssertRC(rc); /** @todo figure out how to handle this. */
    946                 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
    947                 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
    948             }
    949         }
    950 
    951         /*
    952          * Advance and free previous bundle.
    953          */
    954         pToFree = pBundle;
    955         pBundle = pBundle->pNext;
    956 
    957         pToFree->pNext = NULL;
    958         pToFree->cUsed = 0;
    959         if (pToFree != &pSession->Bundle)
    960             RTMemFree(pToFree);
    961     }
    962     Log2(("freeing memory - done\n"));
    963 
    964     /*
    965      * Deregister component factories.
    966      */
    967     RTSemFastMutexRequest(pDevExt->mtxComponentFactory);
    968     Log2(("deregistering component factories:\n"));
    969     if (pDevExt->pComponentFactoryHead)
    970     {
    971         PSUPDRVFACTORYREG pPrev = NULL;
    972         PSUPDRVFACTORYREG pCur = pDevExt->pComponentFactoryHead;
    973         while (pCur)
    974         {
    975             if (pCur->pSession == pSession)
    976             {
    977                 /* unlink it */
    978                 PSUPDRVFACTORYREG pNext = pCur->pNext;
    979                 if (pPrev)
    980                     pPrev->pNext = pNext;
    981                 else
    982                     pDevExt->pComponentFactoryHead = pNext;
    983 
    984                 /* free it */
    985                 pCur->pNext = NULL;
    986                 pCur->pSession = NULL;
    987                 pCur->pFactory = NULL;
    988                 RTMemFree(pCur);
    989 
    990                 /* next */
    991                 pCur = pNext;
    992             }
    993             else
    994             {
    995                 /* next */
    996                 pPrev = pCur;
    997                 pCur = pCur->pNext;
    998             }
    999         }
    1000     }
    1001     RTSemFastMutexRelease(pDevExt->mtxComponentFactory);
    1002     Log2(("deregistering component factories - done\n"));
    1003 
    1004     /*
    1005      * Loaded images needs to be dereferenced and possibly freed up.
    1006      */
    1007     supdrvLdrLock(pDevExt);
    1008     Log2(("freeing images:\n"));
    1009     if (pSession->pLdrUsage)
    1010     {
    1011         PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
    1012         pSession->pLdrUsage = NULL;
    1013         while (pUsage)
    1014         {
    1015             void           *pvFree = pUsage;
    1016             PSUPDRVLDRIMAGE pImage = pUsage->pImage;
    1017             if (pImage->cUsage > pUsage->cUsage)
    1018                 pImage->cUsage -= pUsage->cUsage;
    1019             else
    1020                 supdrvLdrFree(pDevExt, pImage);
    1021             pUsage->pImage = NULL;
    1022             pUsage = pUsage->pNext;
    1023             RTMemFree(pvFree);
    1024         }
    1025     }
    1026     supdrvLdrUnlock(pDevExt);
    1027     Log2(("freeing images - done\n"));
    1028 
    1029     /*
    1030      * Unmap the GIP.
    1031      */
    1032     Log2(("umapping GIP:\n"));
    1033     if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
    1034     {
    1035         SUPR0GipUnmap(pSession);
    1036         pSession->fGipReferenced = 0;
    1037     }
    1038     Log2(("umapping GIP - done\n"));
    1039 }
    1040 
    1041 
    1042 /**
    1043  * Common code for freeing a session when the reference count reaches zero.
    1044  *
    1045  * @param   pDevExt     Device extension.
    1046  * @param   pSession    Session data.
    1047  *                      This data will be freed by this routine.
    1048  */
    1049 static void supdrvDestroySession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
    1050 {
    1051     VBOXDRV_SESSION_CLOSE(pSession);
    1052 
    1053     /*
    1054      * Cleanup the session first.
    1055      */
    1056     supdrvCleanupSession(pDevExt, pSession);
    1057     supdrvOSCleanupSession(pDevExt, pSession);
    1058 
    1059     /*
    1060      * Free the rest of the session stuff.
    1061      */
    1062     RTSpinlockDestroy(pSession->Spinlock);
    1063     pSession->Spinlock = NIL_RTSPINLOCK;
    1064     pSession->pDevExt = NULL;
    1065     RTMemFree(pSession);
    1066     LogFlow(("supdrvDestroySession: returns\n"));
    1067 }
    1068 
    1069 
    1070 /**
    1071  * Inserts the session into the global hash table.
    1072  *
    1073  * @retval  VINF_SUCCESS on success.
    1074  * @retval  VERR_WRONG_ORDER if the session was already inserted (asserted).
    1075  * @retval  VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
    1076  *          session (asserted).
    1077  * @retval  VERR_DUPLICATE if there is already a session for that pid.
    1078  *
    1079  * @param   pDevExt         The device extension.
    1080  * @param   pSession        The session.
    1081  * @param   ppOsSessionPtr  Pointer to the OS session pointer, if any is
    1082  *                          available and used.  This will set to point to the
    1083  *                          session while under the protection of the session
    1084  *                          hash table spinlock.  It will also be kept in
    1085  *                          PSUPDRVSESSION::ppOsSessionPtr for lookup and
    1086  *                          cleanup use.
    1087  * @param   pvUser          Argument for supdrvOSSessionHashTabInserted.
    1088  */
    1089 int VBOXCALL supdrvSessionHashTabInsert(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVSESSION *ppOsSessionPtr,
    1090                                         void *pvUser)
    1091 {
    1092     PSUPDRVSESSION  pCur;
    1093     unsigned        iHash;
    1094 
    1095     /*
    1096      * Validate input.
    1097      */
    1098     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    1099     AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
    1100 
    1101     /*
    1102      * Calculate the hash table index and acquire the spinlock.
    1103      */
    1104     iHash = SUPDRV_SESSION_HASH(pSession->Process);
    1105 
    1106     RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
    1107 
    1108     /*
    1109      * If there are a collisions, we need to carefully check if we got a
    1110      * duplicate.  There can only be one open session per process.
    1111      */
    1112     pCur = pDevExt->apSessionHashTab[iHash];
    1113     if (pCur)
    1114     {
    1115         while (pCur && pCur->Process != pSession->Process)
    1116             pCur = pCur->pCommonNextHash;
    1117 
    1118         if (pCur)
    1119         {
    1120             RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
    1121             if (pCur == pSession)
    1122             {
    1123                 Assert(pSession->fInHashTable);
    1124                 AssertFailed();
    1125                 return VERR_WRONG_ORDER;
    1126             }
    1127             Assert(!pSession->fInHashTable);
    1128             if (pCur->R0Process == pSession->R0Process)
    1129                 return VERR_RESOURCE_IN_USE;
    1130             return VERR_DUPLICATE;
    1131         }
    1132     }
    1133     Assert(!pSession->fInHashTable);
    1134     Assert(!pSession->ppOsSessionPtr);
    1135 
    1136     /*
    1137      * Insert it, doing a callout to the OS specific code in case it has
    1138      * anything it wishes to do while we're holding the spinlock.
    1139      */
    1140     pSession->pCommonNextHash = pDevExt->apSessionHashTab[iHash];
    1141     pDevExt->apSessionHashTab[iHash] = pSession;
    1142     pSession->fInHashTable    = true;
    1143     ASMAtomicIncS32(&pDevExt->cSessions);
    1144 
    1145     pSession->ppOsSessionPtr = ppOsSessionPtr;
    1146     if (ppOsSessionPtr)
    1147         ASMAtomicWritePtr(ppOsSessionPtr, pSession);
    1148 
    1149     supdrvOSSessionHashTabInserted(pDevExt, pSession, pvUser);
    1150 
    1151     /*
    1152      * Retain a reference for the pointer in the session table.
    1153      */
    1154     ASMAtomicIncU32(&pSession->cRefs);
    1155 
    1156     RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
    1157     return VINF_SUCCESS;
    1158 }
    1159 
    1160 
    1161 /**
    1162  * Removes the session from the global hash table.
    1163  *
    1164  * @retval  VINF_SUCCESS on success.
    1165  * @retval  VERR_NOT_FOUND if the session was already removed (asserted).
    1166  * @retval  VERR_INVALID_PARAMETER if the session handle is invalid or a ring-0
    1167  *          session (asserted).
    1168  *
    1169  * @param   pDevExt     The device extension.
    1170  * @param   pSession    The session. The caller is expected to have a reference
    1171  *                      to this so it won't croak on us when we release the hash
    1172  *                      table reference.
    1173  * @param   pvUser      OS specific context value for the
    1174  *                      supdrvOSSessionHashTabInserted callback.
    1175  */
    1176 int VBOXCALL supdrvSessionHashTabRemove(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
    1177 {
    1178     PSUPDRVSESSION  pCur;
    1179     unsigned        iHash;
    1180     int32_t         cRefs;
    1181 
    1182     /*
    1183      * Validate input.
    1184      */
    1185     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    1186     AssertReturn(pSession->R0Process != NIL_RTR0PROCESS, VERR_INVALID_PARAMETER);
    1187 
    1188     /*
    1189      * Calculate the hash table index and acquire the spinlock.
    1190      */
    1191     iHash = SUPDRV_SESSION_HASH(pSession->Process);
    1192 
    1193     RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
    1194 
    1195     /*
    1196      * Unlink it.
    1197      */
    1198     pCur = pDevExt->apSessionHashTab[iHash];
    1199     if (pCur == pSession)
    1200         pDevExt->apSessionHashTab[iHash] = pSession->pCommonNextHash;
    1201     else
    1202     {
    1203         PSUPDRVSESSION pPrev = pCur;
    1204         while (pCur && pCur != pSession)
    1205         {
    1206             pPrev = pCur;
    1207             pCur  = pCur->pCommonNextHash;
    1208         }
    1209         if (pCur)
    1210             pPrev->pCommonNextHash = pCur->pCommonNextHash;
    1211         else
    1212         {
    1213             Assert(!pSession->fInHashTable);
    1214             RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
    1215             return VERR_NOT_FOUND;
    1216         }
    1217     }
    1218 
    1219     pSession->pCommonNextHash = NULL;
    1220     pSession->fInHashTable    = false;
    1221 
    1222     ASMAtomicDecS32(&pDevExt->cSessions);
    1223 
    1224     /*
    1225      * Clear OS specific session pointer if available and do the OS callback.
    1226      */
    1227     if (pSession->ppOsSessionPtr)
    1228     {
    1229         ASMAtomicCmpXchgPtr(pSession->ppOsSessionPtr, NULL, pSession);
    1230         pSession->ppOsSessionPtr = NULL;
    1231     }
    1232 
    1233     supdrvOSSessionHashTabRemoved(pDevExt, pSession, pvUser);
    1234 
    1235     RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
    1236 
    1237     /*
    1238      * Drop the reference the hash table had to the session.  This shouldn't
    1239      * be the last reference!
    1240      */
    1241     cRefs = ASMAtomicDecU32(&pSession->cRefs);
    1242     Assert(cRefs > 0 && cRefs < _1M);
    1243     if (cRefs == 0)
    1244         supdrvDestroySession(pDevExt, pSession);
    1245 
    1246     return VINF_SUCCESS;
    1247 }
    1248 
    1249 
    1250 /**
    1251  * Looks up the session for the current process in the global hash table or in
    1252  * OS specific pointer.
    1253  *
    1254  * @returns Pointer to the session with a reference that the caller must
    1255  *          release.  If no valid session was found, NULL is returned.
    1256  *
    1257  * @param   pDevExt         The device extension.
    1258  * @param   Process         The process ID.
    1259  * @param   R0Process       The ring-0 process handle.
    1260  * @param   ppOsSessionPtr  The OS session pointer if available.  If not NULL,
    1261  *                          this is used instead of the hash table.  For
    1262  *                          additional safety it must then be equal to the
    1263  *                          SUPDRVSESSION::ppOsSessionPtr member.
    1264  *                          This can be NULL even if the OS has a session
    1265  *                          pointer.
    1266  */
    1267 PSUPDRVSESSION VBOXCALL supdrvSessionHashTabLookup(PSUPDRVDEVEXT pDevExt, RTPROCESS Process, RTR0PROCESS R0Process,
    1268                                                    PSUPDRVSESSION *ppOsSessionPtr)
    1269 {
    1270     PSUPDRVSESSION  pCur;
    1271     unsigned        iHash;
    1272 
    1273     /*
    1274      * Validate input.
    1275      */
    1276     AssertReturn(R0Process != NIL_RTR0PROCESS, NULL);
    1277 
    1278     /*
    1279      * Calculate the hash table index and acquire the spinlock.
    1280      */
    1281     iHash = SUPDRV_SESSION_HASH(Process);
    1282 
    1283     RTSpinlockAcquire(pDevExt->hSessionHashTabSpinlock);
    1284 
    1285     /*
    1286      * If an OS session pointer is provided, always use it.
    1287      */
    1288     if (ppOsSessionPtr)
    1289     {
    1290         pCur = *ppOsSessionPtr;
    1291         if (   pCur
    1292             && (   pCur->ppOsSessionPtr != ppOsSessionPtr
    1293                 || pCur->Process        != Process
    1294                 || pCur->R0Process      != R0Process) )
    1295             pCur = NULL;
    1296     }
    1297     else
    1298     {
    1299         /*
    1300          * Otherwise, do the hash table lookup.
    1301          */
    1302         pCur = pDevExt->apSessionHashTab[iHash];
    1303         while (   pCur
    1304                && (   pCur->Process   != Process
    1305                    || pCur->R0Process != R0Process) )
    1306             pCur = pCur->pCommonNextHash;
    1307     }
    1308 
    1309     /*
    1310      * Retain the session.
    1311      */
    1312     if (pCur)
    1313     {
    1314         uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
    1315         NOREF(cRefs);
    1316         Assert(cRefs > 1 && cRefs < _1M);
    1317     }
    1318 
    1319     RTSpinlockRelease(pDevExt->hSessionHashTabSpinlock);
    1320 
    1321     return pCur;
    1322 }
    1323 
    1324 
    1325 /**
    1326  * Retain a session to make sure it doesn't go away while it is in use.
    1327  *
    1328  * @returns New reference count on success, UINT32_MAX on failure.
    1329  * @param   pSession    Session data.
    1330  */
    1331 uint32_t VBOXCALL supdrvSessionRetain(PSUPDRVSESSION pSession)
    1332 {
    1333     uint32_t cRefs;
    1334     AssertPtrReturn(pSession, UINT32_MAX);
    1335     AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
    1336 
    1337     cRefs = ASMAtomicIncU32(&pSession->cRefs);
    1338     AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pSession));
    1339     return cRefs;
    1340 }
    1341 
    1342 
    1343 /**
    1344  * Releases a given session.
    1345  *
    1346  * @returns New reference count on success (0 if closed), UINT32_MAX on failure.
    1347  * @param   pSession    Session data.
    1348  */
    1349 uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession)
    1350 {
    1351     uint32_t cRefs;
    1352     AssertPtrReturn(pSession, UINT32_MAX);
    1353     AssertReturn(SUP_IS_SESSION_VALID(pSession), UINT32_MAX);
    1354 
    1355     cRefs = ASMAtomicDecU32(&pSession->cRefs);
    1356     AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pSession));
    1357     if (cRefs == 0)
    1358         supdrvDestroySession(pSession->pDevExt, pSession);
    1359     return cRefs;
    1360 }
    1361 
    1362 
    1363 /**
    1364  * RTHandleTableDestroy callback used by supdrvCleanupSession.
    1365  *
    1366  * @returns IPRT status code, see SUPR0ObjAddRef.
    1367  * @param   hHandleTable    The handle table handle. Ignored.
    1368  * @param   pvObj           The object pointer.
    1369  * @param   pvCtx           Context, the handle type. Ignored.
    1370  * @param   pvUser          Session pointer.
    1371  */
    1372 static DECLCALLBACK(int) supdrvSessionObjHandleRetain(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser)
    1373 {
    1374     NOREF(pvCtx);
    1375     NOREF(hHandleTable);
    1376     return SUPR0ObjAddRefEx(pvObj, (PSUPDRVSESSION)pvUser, true /*fNoBlocking*/);
    1377 }
    1378 
    1379 
    1380 /**
    1381  * RTHandleTableDestroy callback used by supdrvCleanupSession.
    1382  *
    1383  * @param   hHandleTable    The handle table handle. Ignored.
    1384  * @param   h               The handle value. Ignored.
    1385  * @param   pvObj           The object pointer.
    1386  * @param   pvCtx           Context, the handle type. Ignored.
    1387  * @param   pvUser          Session pointer.
    1388  */
    1389 static DECLCALLBACK(void) supdrvSessionObjHandleDelete(RTHANDLETABLE hHandleTable, uint32_t h, void *pvObj, void *pvCtx, void *pvUser)
    1390 {
    1391     NOREF(pvCtx);
    1392     NOREF(h);
    1393     NOREF(hHandleTable);
    1394     SUPR0ObjRelease(pvObj, (PSUPDRVSESSION)pvUser);
    1395 }
    1396 
    1397 
    1398 /**
    1399  * Fast path I/O Control worker.
    1400  *
    1401  * @returns VBox status code that should be passed down to ring-3 unchanged.
    1402  * @param   uIOCtl      Function number.
    1403  * @param   idCpu       VMCPU id.
    1404  * @param   pDevExt     Device extention.
    1405  * @param   pSession    Session data.
    1406  */
    1407 int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, VMCPUID idCpu, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
    1408 {
    1409     /*
    1410      * We check the two prereqs after doing this only to allow the compiler to optimize things better.
    1411      */
    1412     if (RT_LIKELY(   RT_VALID_PTR(pSession)
    1413                   && pSession->pVM
    1414                   && pDevExt->pfnVMMR0EntryFast))
    1415     {
    1416         switch (uIOCtl)
    1417         {
    1418             case SUP_IOCTL_FAST_DO_RAW_RUN:
    1419                 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_RAW_RUN);
    1420                 break;
    1421             case SUP_IOCTL_FAST_DO_HM_RUN:
    1422                 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_HM_RUN);
    1423                 break;
    1424             case SUP_IOCTL_FAST_DO_NOP:
    1425                 pDevExt->pfnVMMR0EntryFast(pSession->pVM, idCpu, SUP_VMMR0_DO_NOP);
    1426                 break;
    1427             default:
    1428                 return VERR_INTERNAL_ERROR;
    1429         }
    1430         return VINF_SUCCESS;
    1431     }
    1432     return VERR_INTERNAL_ERROR;
    1433 }
    1434 
    1435 
    1436 /**
    1437  * Helper for supdrvIOCtl used to validate module names passed to SUP_IOCTL_LDR_OPEN.
    1438  *
    1439  * Check if pszStr contains any character of pszChars.  We would use strpbrk
    1440  * here if this function would be contained in the RedHat kABI white list, see
    1441  * http://www.kerneldrivers.org/RHEL5.
    1442  *
    1443  * @returns  true if fine, false if not.
    1444  * @param    pszName        The module name to check.
    1445  */
    1446 static bool supdrvIsLdrModuleNameValid(const char *pszName)
    1447 {
    1448     int chCur;
    1449     while ((chCur = *pszName++) != '\0')
    1450     {
    1451         static const char s_szInvalidChars[] = ";:()[]{}/\\|&*%#@!~`\"'";
    1452         unsigned offInv = RT_ELEMENTS(s_szInvalidChars);
    1453         while (offInv-- > 0)
    1454             if (s_szInvalidChars[offInv] == chCur)
    1455                 return false;
    1456     }
    1457     return true;
    1458 }
    1459 
    1460 
    1461 
    1462 /**
    1463  * I/O Control inner worker (tracing reasons).
    1464  *
    1465  * @returns IPRT status code.
    1466  * @retval  VERR_INVALID_PARAMETER if the request is invalid.
    1467  *
    1468  * @param   uIOCtl      Function number.
    1469  * @param   pDevExt     Device extention.
    1470  * @param   pSession    Session data.
    1471  * @param   pReqHdr     The request header.
    1472  */
    1473 static int supdrvIOCtlInnerUnrestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
    1474 {
    1475     /*
    1476      * Validation macros
    1477      */
    1478 #define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
    1479     do { \
    1480         if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
    1481         { \
    1482             OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
    1483                         (long)pReqHdr->cbIn, (long)(cbInExpect), (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
    1484             return pReqHdr->rc = VERR_INVALID_PARAMETER; \
    1485         } \
    1486     } while (0)
    1487 
    1488 #define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
    1489 
    1490 #define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
    1491     do { \
    1492         if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
    1493         { \
    1494             OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
    1495                         (long)pReqHdr->cbIn, (long)(cbInExpect))); \
    1496             return pReqHdr->rc = VERR_INVALID_PARAMETER; \
    1497         } \
    1498     } while (0)
    1499 
    1500 #define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
    1501     do { \
    1502         if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
    1503         { \
    1504             OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
    1505                         (long)pReqHdr->cbOut, (long)(cbOutExpect))); \
    1506             return pReqHdr->rc = VERR_INVALID_PARAMETER; \
    1507         } \
    1508     } while (0)
    1509 
    1510 #define REQ_CHECK_EXPR(Name, expr) \
    1511     do { \
    1512         if (RT_UNLIKELY(!(expr))) \
    1513         { \
    1514             OSDBGPRINT(( #Name ": %s\n", #expr)); \
    1515             return pReqHdr->rc = VERR_INVALID_PARAMETER; \
    1516         } \
    1517     } while (0)
    1518 
    1519 #define REQ_CHECK_EXPR_FMT(expr, fmt) \
    1520     do { \
    1521         if (RT_UNLIKELY(!(expr))) \
    1522         { \
    1523             OSDBGPRINT( fmt ); \
    1524             return pReqHdr->rc = VERR_INVALID_PARAMETER; \
    1525         } \
    1526     } while (0)
    1527 
    1528     /*
    1529      * The switch.
    1530      */
    1531     switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
    1532     {
    1533         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
    1534         {
    1535             PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
    1536             REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
    1537             if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
    1538             {
    1539                 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
    1540                 pReq->Hdr.rc = VERR_INVALID_MAGIC;
    1541                 return 0;
    1542             }
    1543 
    1544 #if 0
    1545             /*
    1546              * Call out to the OS specific code and let it do permission checks on the
    1547              * client process.
    1548              */
    1549             if (!supdrvOSValidateClientProcess(pDevExt, pSession))
    1550             {
    1551                 pReq->u.Out.u32Cookie         = 0xffffffff;
    1552                 pReq->u.Out.u32SessionCookie  = 0xffffffff;
    1553                 pReq->u.Out.u32SessionVersion = 0xffffffff;
    1554                 pReq->u.Out.u32DriverVersion  = SUPDRV_IOC_VERSION;
    1555                 pReq->u.Out.pSession          = NULL;
    1556                 pReq->u.Out.cFunctions        = 0;
    1557                 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
    1558                 return 0;
    1559             }
    1560 #endif
    1561 
    1562             /*
    1563              * Match the version.
    1564              * The current logic is very simple, match the major interface version.
    1565              */
    1566             if (    pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
    1567                 ||  (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
    1568             {
    1569                 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x  Min: %#x  Current: %#x\n",
    1570                             pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
    1571                 pReq->u.Out.u32Cookie         = 0xffffffff;
    1572                 pReq->u.Out.u32SessionCookie  = 0xffffffff;
    1573                 pReq->u.Out.u32SessionVersion = 0xffffffff;
    1574                 pReq->u.Out.u32DriverVersion  = SUPDRV_IOC_VERSION;
    1575                 pReq->u.Out.pSession          = NULL;
    1576                 pReq->u.Out.cFunctions        = 0;
    1577                 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
    1578                 return 0;
    1579             }
    1580 
    1581             /*
    1582              * Fill in return data and be gone.
    1583              * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
    1584              *      u32SessionVersion <= u32ReqVersion!
    1585              */
    1586             /** @todo Somehow validate the client and negotiate a secure cookie... */
    1587             pReq->u.Out.u32Cookie         = pDevExt->u32Cookie;
    1588             pReq->u.Out.u32SessionCookie  = pSession->u32Cookie;
    1589             pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
    1590             pReq->u.Out.u32DriverVersion  = SUPDRV_IOC_VERSION;
    1591             pReq->u.Out.pSession          = pSession;
    1592             pReq->u.Out.cFunctions        = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
    1593             pReq->Hdr.rc = VINF_SUCCESS;
    1594             return 0;
    1595         }
    1596 
    1597         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
    1598         {
    1599             /* validate */
    1600             PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
    1601             REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
    1602 
    1603             /* execute */
    1604             pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
    1605             memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
    1606             pReq->Hdr.rc = VINF_SUCCESS;
    1607             return 0;
    1608         }
    1609 
    1610         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
    1611         {
    1612             /* validate */
    1613             PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
    1614             REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
    1615             REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
    1616             REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
    1617             REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
    1618 
    1619             /* execute */
    1620             pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
    1621             if (RT_FAILURE(pReq->Hdr.rc))
    1622                 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
    1623             return 0;
    1624         }
    1625 
    1626         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
    1627         {
    1628             /* validate */
    1629             PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
    1630             REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
    1631 
    1632             /* execute */
    1633             pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
    1634             return 0;
    1635         }
    1636 
    1637         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
    1638         {
    1639             /* validate */
    1640             PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
    1641             REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
    1642 
    1643             /* execute */
    1644             pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
    1645             if (RT_FAILURE(pReq->Hdr.rc))
    1646                 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
    1647             return 0;
    1648         }
    1649 
    1650         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
    1651         {
    1652             /* validate */
    1653             PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
    1654             REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
    1655 
    1656             /* execute */
    1657             pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
    1658             return 0;
    1659         }
    1660 
    1661         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
    1662         {
    1663             /* validate */
    1664             PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
    1665             REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
    1666             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs > 0);
    1667             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageWithTabs < 16*_1M);
    1668             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
    1669             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits > 0);
    1670             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImageBits < pReq->u.In.cbImageWithTabs);
    1671             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
    1672             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
    1673             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, supdrvIsLdrModuleNameValid(pReq->u.In.szName));
    1674             REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, RTStrEnd(pReq->u.In.szFilename, sizeof(pReq->u.In.szFilename)));
    1675 
    1676             /* execute */
    1677             pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
    1678             return 0;
    1679         }
    1680 
    1681         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
    1682         {
    1683             /* validate */
    1684             PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
    1685             REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
    1686             REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImageWithTabs), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
    1687             REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
    1688             REQ_CHECK_EXPR_FMT(     !pReq->u.In.cSymbols
    1689                                ||   (   pReq->u.In.offSymbols < pReq->u.In.cbImageWithTabs
    1690                                      && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImageWithTabs),
    1691                                ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offSymbols,
    1692                                 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImageWithTabs));
    1693             REQ_CHECK_EXPR_FMT(     !pReq->u.In.cbStrTab
    1694                                ||   (   pReq->u.In.offStrTab < pReq->u.In.cbImageWithTabs
    1695                                      && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs
    1696                                      && pReq->u.In.cbStrTab <= pReq->u.In.cbImageWithTabs),
    1697                                ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImageWithTabs=%#lx\n", (long)pReq->u.In.offStrTab,
    1698                                 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImageWithTabs));
    1699 
    1700             if (pReq->u.In.cSymbols)
    1701             {
    1702                 uint32_t i;
    1703                 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.abImage[pReq->u.In.offSymbols];
    1704                 for (i = 0; i < pReq->u.In.cSymbols; i++)
    1705                 {
    1706                     REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImageWithTabs,
    1707                                        ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImageWithTabs));
    1708                     REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
    1709                                        ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
    1710                     REQ_CHECK_EXPR_FMT(RTStrEnd((char const *)&pReq->u.In.abImage[pReq->u.In.offStrTab + paSyms[i].offName],
    1711                                                 pReq->u.In.cbStrTab - paSyms[i].offName),
    1712                                        ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImageWithTabs));
    1713                 }
    1714             }
    1715 
    1716             /* execute */
    1717             pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
    1718             return 0;
    1719         }
    1720 
    1721         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
    1722         {
    1723             /* validate */
    1724             PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
    1725             REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
    1726 
    1727             /* execute */
    1728             pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
    1729             return 0;
    1730         }
    1731 
    1732         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOCK_DOWN):
    1733         {
    1734             /* validate */
    1735             REQ_CHECK_SIZES(SUP_IOCTL_LDR_LOCK_DOWN);
    1736 
    1737             /* execute */
    1738             pReqHdr->rc = supdrvIOCtl_LdrLockDown(pDevExt);
    1739             return 0;
    1740         }
    1741 
    1742         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
    1743         {
    1744             /* validate */
    1745             PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
    1746             REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
    1747             REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, RTStrEnd(pReq->u.In.szSymbol, sizeof(pReq->u.In.szSymbol)));
    1748 
    1749             /* execute */
    1750             pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
    1751             return 0;
    1752         }
    1753 
    1754         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
    1755         {
    1756             /* validate */
    1757             PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
    1758             Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    1759                   pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    1760 
    1761             if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
    1762             {
    1763                 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
    1764 
    1765                 /* execute */
    1766                 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
    1767                     pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg, pSession);
    1768                 else
    1769                     pReq->Hdr.rc = VERR_WRONG_ORDER;
    1770             }
    1771             else
    1772             {
    1773                 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
    1774                 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
    1775                                    ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
    1776                 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
    1777                 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
    1778 
    1779                 /* execute */
    1780                 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
    1781                     pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
    1782                 else
    1783                     pReq->Hdr.rc = VERR_WRONG_ORDER;
    1784             }
    1785 
    1786             if (    RT_FAILURE(pReq->Hdr.rc)
    1787                 &&  pReq->Hdr.rc != VERR_INTERRUPTED
    1788                 &&  pReq->Hdr.rc != VERR_TIMEOUT)
    1789                 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    1790                      pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    1791             else
    1792                 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    1793                       pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    1794             return 0;
    1795         }
    1796 
    1797         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0_BIG):
    1798         {
    1799             /* validate */
    1800             PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
    1801             PSUPVMMR0REQHDR pVMMReq;
    1802             Log4(("SUP_IOCTL_CALL_VMMR0_BIG: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    1803                   pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    1804 
    1805             pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
    1806             REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR)),
    1807                                ("SUP_IOCTL_CALL_VMMR0_BIG: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_BIG_SIZE(sizeof(SUPVMMR0REQHDR))));
    1808             REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0_BIG, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
    1809             REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0_BIG, SUP_IOCTL_CALL_VMMR0_BIG_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_BIG_SIZE_OUT(pVMMReq->cbReq));
    1810 
    1811             /* execute */
    1812             if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
    1813                 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.idCpu, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg, pSession);
    1814             else
    1815                 pReq->Hdr.rc = VERR_WRONG_ORDER;
    1816 
    1817             if (    RT_FAILURE(pReq->Hdr.rc)
    1818                 &&  pReq->Hdr.rc != VERR_INTERRUPTED
    1819                 &&  pReq->Hdr.rc != VERR_TIMEOUT)
    1820                 Log(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    1821                      pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    1822             else
    1823                 Log4(("SUP_IOCTL_CALL_VMMR0_BIG: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    1824                       pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    1825             return 0;
    1826         }
    1827 
    1828         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
    1829         {
    1830             /* validate */
    1831             PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
    1832             REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
    1833 
    1834             /* execute */
    1835             pReq->Hdr.rc = VINF_SUCCESS;
    1836             pReq->u.Out.enmMode = SUPR0GetPagingMode();
    1837             return 0;
    1838         }
    1839 
    1840         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
    1841         {
    1842             /* validate */
    1843             PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
    1844             REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
    1845             REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
    1846 
    1847             /* execute */
    1848             pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
    1849             if (RT_FAILURE(pReq->Hdr.rc))
    1850                 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
    1851             return 0;
    1852         }
    1853 
    1854         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
    1855         {
    1856             /* validate */
    1857             PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
    1858             REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
    1859 
    1860             /* execute */
    1861             pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
    1862             return 0;
    1863         }
    1864 
    1865         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
    1866         {
    1867             /* validate */
    1868             PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
    1869             REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
    1870 
    1871             /* execute */
    1872             pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
    1873             if (RT_SUCCESS(pReq->Hdr.rc))
    1874                 pReq->u.Out.pGipR0 = pDevExt->pGip;
    1875             return 0;
    1876         }
    1877 
    1878         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
    1879         {
    1880             /* validate */
    1881             PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
    1882             REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
    1883 
    1884             /* execute */
    1885             pReq->Hdr.rc = SUPR0GipUnmap(pSession);
    1886             return 0;
    1887         }
    1888 
    1889         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
    1890         {
    1891             /* validate */
    1892             PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
    1893             REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
    1894             REQ_CHECK_EXPR_FMT(     !pReq->u.In.pVMR0
    1895                                ||   (   VALID_PTR(pReq->u.In.pVMR0)
    1896                                      && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
    1897                                ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
    1898             /* execute */
    1899             pSession->pVM = pReq->u.In.pVMR0;
    1900             pReq->Hdr.rc = VINF_SUCCESS;
    1901             return 0;
    1902         }
    1903 
    1904         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC_EX):
    1905         {
    1906             /* validate */
    1907             PSUPPAGEALLOCEX pReq = (PSUPPAGEALLOCEX)pReqHdr;
    1908             REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC_EX, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN);
    1909             REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC_EX, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_EX_SIZE_OUT(pReq->u.In.cPages));
    1910             REQ_CHECK_EXPR_FMT(pReq->u.In.fKernelMapping || pReq->u.In.fUserMapping,
    1911                                ("SUP_IOCTL_PAGE_ALLOC_EX: No mapping requested!\n"));
    1912             REQ_CHECK_EXPR_FMT(pReq->u.In.fUserMapping,
    1913                                ("SUP_IOCTL_PAGE_ALLOC_EX: Must have user mapping!\n"));
    1914             REQ_CHECK_EXPR_FMT(!pReq->u.In.fReserved0 && !pReq->u.In.fReserved1,
    1915                                ("SUP_IOCTL_PAGE_ALLOC_EX: fReserved0=%d fReserved1=%d\n", pReq->u.In.fReserved0, pReq->u.In.fReserved1));
    1916 
    1917             /* execute */
    1918             pReq->Hdr.rc = SUPR0PageAllocEx(pSession, pReq->u.In.cPages, 0 /* fFlags */,
    1919                                             pReq->u.In.fUserMapping   ? &pReq->u.Out.pvR3 : NULL,
    1920                                             pReq->u.In.fKernelMapping ? &pReq->u.Out.pvR0 : NULL,
    1921                                             &pReq->u.Out.aPages[0]);
    1922             if (RT_FAILURE(pReq->Hdr.rc))
    1923                 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
    1924             return 0;
    1925         }
    1926 
    1927         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_MAP_KERNEL):
    1928         {
    1929             /* validate */
    1930             PSUPPAGEMAPKERNEL pReq = (PSUPPAGEMAPKERNEL)pReqHdr;
    1931             REQ_CHECK_SIZES(SUP_IOCTL_PAGE_MAP_KERNEL);
    1932             REQ_CHECK_EXPR_FMT(!pReq->u.In.fFlags, ("SUP_IOCTL_PAGE_MAP_KERNEL: fFlags=%#x! MBZ\n", pReq->u.In.fFlags));
    1933             REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_MAP_KERNEL: offSub=%#x\n", pReq->u.In.offSub));
    1934             REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
    1935                                ("SUP_IOCTL_PAGE_MAP_KERNEL: cbSub=%#x\n", pReq->u.In.cbSub));
    1936 
    1937             /* execute */
    1938             pReq->Hdr.rc = SUPR0PageMapKernel(pSession, pReq->u.In.pvR3, pReq->u.In.offSub, pReq->u.In.cbSub,
    1939                                               pReq->u.In.fFlags, &pReq->u.Out.pvR0);
    1940             if (RT_FAILURE(pReq->Hdr.rc))
    1941                 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
    1942             return 0;
    1943         }
    1944 
    1945         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_PROTECT):
    1946         {
    1947             /* validate */
    1948             PSUPPAGEPROTECT pReq = (PSUPPAGEPROTECT)pReqHdr;
    1949             REQ_CHECK_SIZES(SUP_IOCTL_PAGE_PROTECT);
    1950             REQ_CHECK_EXPR_FMT(!(pReq->u.In.fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)),
    1951                                ("SUP_IOCTL_PAGE_PROTECT: fProt=%#x!\n", pReq->u.In.fProt));
    1952             REQ_CHECK_EXPR_FMT(!(pReq->u.In.offSub & PAGE_OFFSET_MASK), ("SUP_IOCTL_PAGE_PROTECT: offSub=%#x\n", pReq->u.In.offSub));
    1953             REQ_CHECK_EXPR_FMT(pReq->u.In.cbSub && !(pReq->u.In.cbSub & PAGE_OFFSET_MASK),
    1954                                ("SUP_IOCTL_PAGE_PROTECT: cbSub=%#x\n", pReq->u.In.cbSub));
    1955 
    1956             /* execute */
    1957             pReq->Hdr.rc = SUPR0PageProtect(pSession, pReq->u.In.pvR3, pReq->u.In.pvR0, pReq->u.In.offSub, pReq->u.In.cbSub, pReq->u.In.fProt);
    1958             return 0;
    1959         }
    1960 
    1961         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
    1962         {
    1963             /* validate */
    1964             PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
    1965             REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
    1966 
    1967             /* execute */
    1968             pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
    1969             return 0;
    1970         }
    1971 
    1972         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_SERVICE(0)):
    1973         {
    1974             /* validate */
    1975             PSUPCALLSERVICE pReq = (PSUPCALLSERVICE)pReqHdr;
    1976             Log4(("SUP_IOCTL_CALL_SERVICE: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    1977                   pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    1978 
    1979             if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
    1980                 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(0), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(0));
    1981             else
    1982             {
    1983                 PSUPR0SERVICEREQHDR pSrvReq = (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0];
    1984                 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR)),
    1985                                    ("SUP_IOCTL_CALL_SERVICE: cbIn=%#x < %#lx\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_SERVICE_SIZE(sizeof(SUPR0SERVICEREQHDR))));
    1986                 REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, pSrvReq->u32Magic == SUPR0SERVICEREQHDR_MAGIC);
    1987                 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_SERVICE, SUP_IOCTL_CALL_SERVICE_SIZE_IN(pSrvReq->cbReq), SUP_IOCTL_CALL_SERVICE_SIZE_OUT(pSrvReq->cbReq));
    1988             }
    1989             REQ_CHECK_EXPR(SUP_IOCTL_CALL_SERVICE, RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)));
    1990 
    1991             /* execute */
    1992             pReq->Hdr.rc = supdrvIOCtl_CallServiceModule(pDevExt, pSession, pReq);
    1993             return 0;
    1994         }
    1995 
    1996         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOGGER_SETTINGS(0)):
    1997         {
    1998             /* validate */
    1999             PSUPLOGGERSETTINGS pReq = (PSUPLOGGERSETTINGS)pReqHdr;
    2000             size_t cbStrTab;
    2001             REQ_CHECK_SIZE_OUT(SUP_IOCTL_LOGGER_SETTINGS, SUP_IOCTL_LOGGER_SETTINGS_SIZE_OUT);
    2002             REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->Hdr.cbIn >= SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(1));
    2003             cbStrTab = pReq->Hdr.cbIn - SUP_IOCTL_LOGGER_SETTINGS_SIZE_IN(0);
    2004             REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offGroups      < cbStrTab);
    2005             REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offFlags       < cbStrTab);
    2006             REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.offDestination < cbStrTab);
    2007             REQ_CHECK_EXPR_FMT(pReq->u.In.szStrings[cbStrTab - 1] == '\0',
    2008                                ("SUP_IOCTL_LOGGER_SETTINGS: cbIn=%#x cbStrTab=%#zx LastChar=%d\n",
    2009                                 pReq->Hdr.cbIn, cbStrTab, pReq->u.In.szStrings[cbStrTab - 1]));
    2010             REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhich <= SUPLOGGERSETTINGS_WHICH_RELEASE);
    2011             REQ_CHECK_EXPR(SUP_IOCTL_LOGGER_SETTINGS, pReq->u.In.fWhat  <= SUPLOGGERSETTINGS_WHAT_DESTROY);
    2012 
    2013             /* execute */
    2014             pReq->Hdr.rc = supdrvIOCtl_LoggerSettings(pDevExt, pSession, pReq);
    2015             return 0;
    2016         }
    2017 
    2018         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP2):
    2019         {
    2020             /* validate */
    2021             PSUPSEMOP2 pReq = (PSUPSEMOP2)pReqHdr;
    2022             REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP2, SUP_IOCTL_SEM_OP2_SIZE_IN, SUP_IOCTL_SEM_OP2_SIZE_OUT);
    2023             REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP2, pReq->u.In.uReserved == 0);
    2024 
    2025             /* execute */
    2026             switch (pReq->u.In.uType)
    2027             {
    2028                 case SUP_SEM_TYPE_EVENT:
    2029                 {
    2030                     SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
    2031                     switch (pReq->u.In.uOp)
    2032                     {
    2033                         case SUPSEMOP2_WAIT_MS_REL:
    2034                             pReq->Hdr.rc = SUPSemEventWaitNoResume(pSession, hEvent, pReq->u.In.uArg.cRelMsTimeout);
    2035                             break;
    2036                         case SUPSEMOP2_WAIT_NS_ABS:
    2037                             pReq->Hdr.rc = SUPSemEventWaitNsAbsIntr(pSession, hEvent, pReq->u.In.uArg.uAbsNsTimeout);
    2038                             break;
    2039                         case SUPSEMOP2_WAIT_NS_REL:
    2040                             pReq->Hdr.rc = SUPSemEventWaitNsRelIntr(pSession, hEvent, pReq->u.In.uArg.cRelNsTimeout);
    2041                             break;
    2042                         case SUPSEMOP2_SIGNAL:
    2043                             pReq->Hdr.rc = SUPSemEventSignal(pSession, hEvent);
    2044                             break;
    2045                         case SUPSEMOP2_CLOSE:
    2046                             pReq->Hdr.rc = SUPSemEventClose(pSession, hEvent);
    2047                             break;
    2048                         case SUPSEMOP2_RESET:
    2049                         default:
    2050                             pReq->Hdr.rc = VERR_INVALID_FUNCTION;
    2051                             break;
    2052                     }
    2053                     break;
    2054                 }
    2055 
    2056                 case SUP_SEM_TYPE_EVENT_MULTI:
    2057                 {
    2058                     SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
    2059                     switch (pReq->u.In.uOp)
    2060                     {
    2061                         case SUPSEMOP2_WAIT_MS_REL:
    2062                             pReq->Hdr.rc = SUPSemEventMultiWaitNoResume(pSession, hEventMulti, pReq->u.In.uArg.cRelMsTimeout);
    2063                             break;
    2064                         case SUPSEMOP2_WAIT_NS_ABS:
    2065                             pReq->Hdr.rc = SUPSemEventMultiWaitNsAbsIntr(pSession, hEventMulti, pReq->u.In.uArg.uAbsNsTimeout);
    2066                             break;
    2067                         case SUPSEMOP2_WAIT_NS_REL:
    2068                             pReq->Hdr.rc = SUPSemEventMultiWaitNsRelIntr(pSession, hEventMulti, pReq->u.In.uArg.cRelNsTimeout);
    2069                             break;
    2070                         case SUPSEMOP2_SIGNAL:
    2071                             pReq->Hdr.rc = SUPSemEventMultiSignal(pSession, hEventMulti);
    2072                             break;
    2073                         case SUPSEMOP2_CLOSE:
    2074                             pReq->Hdr.rc = SUPSemEventMultiClose(pSession, hEventMulti);
    2075                             break;
    2076                         case SUPSEMOP2_RESET:
    2077                             pReq->Hdr.rc = SUPSemEventMultiReset(pSession, hEventMulti);
    2078                             break;
    2079                         default:
    2080                             pReq->Hdr.rc = VERR_INVALID_FUNCTION;
    2081                             break;
    2082                     }
    2083                     break;
    2084                 }
    2085 
    2086                 default:
    2087                     pReq->Hdr.rc = VERR_INVALID_PARAMETER;
    2088                     break;
    2089             }
    2090             return 0;
    2091         }
    2092 
    2093         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SEM_OP3):
    2094         {
    2095             /* validate */
    2096             PSUPSEMOP3 pReq = (PSUPSEMOP3)pReqHdr;
    2097             REQ_CHECK_SIZES_EX(SUP_IOCTL_SEM_OP3, SUP_IOCTL_SEM_OP3_SIZE_IN, SUP_IOCTL_SEM_OP3_SIZE_OUT);
    2098             REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, pReq->u.In.u32Reserved == 0 && pReq->u.In.u64Reserved == 0);
    2099 
    2100             /* execute */
    2101             switch (pReq->u.In.uType)
    2102             {
    2103                 case SUP_SEM_TYPE_EVENT:
    2104                 {
    2105                     SUPSEMEVENT hEvent = (SUPSEMEVENT)(uintptr_t)pReq->u.In.hSem;
    2106                     switch (pReq->u.In.uOp)
    2107                     {
    2108                         case SUPSEMOP3_CREATE:
    2109                             REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
    2110                             pReq->Hdr.rc = SUPSemEventCreate(pSession, &hEvent);
    2111                             pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEvent;
    2112                             break;
    2113                         case SUPSEMOP3_GET_RESOLUTION:
    2114                             REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEvent == NIL_SUPSEMEVENT);
    2115                             pReq->Hdr.rc = VINF_SUCCESS;
    2116                             pReq->Hdr.cbOut = sizeof(*pReq);
    2117                             pReq->u.Out.cNsResolution = SUPSemEventGetResolution(pSession);
    2118                             break;
    2119                         default:
    2120                             pReq->Hdr.rc = VERR_INVALID_FUNCTION;
    2121                             break;
    2122                     }
    2123                     break;
    2124                 }
    2125 
    2126                 case SUP_SEM_TYPE_EVENT_MULTI:
    2127                 {
    2128                     SUPSEMEVENTMULTI hEventMulti = (SUPSEMEVENTMULTI)(uintptr_t)pReq->u.In.hSem;
    2129                     switch (pReq->u.In.uOp)
    2130                     {
    2131                         case SUPSEMOP3_CREATE:
    2132                             REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
    2133                             pReq->Hdr.rc = SUPSemEventMultiCreate(pSession, &hEventMulti);
    2134                             pReq->u.Out.hSem = (uint32_t)(uintptr_t)hEventMulti;
    2135                             break;
    2136                         case SUPSEMOP3_GET_RESOLUTION:
    2137                             REQ_CHECK_EXPR(SUP_IOCTL_SEM_OP3, hEventMulti == NIL_SUPSEMEVENTMULTI);
    2138                             pReq->Hdr.rc = VINF_SUCCESS;
    2139                             pReq->u.Out.cNsResolution = SUPSemEventMultiGetResolution(pSession);
    2140                             break;
    2141                         default:
    2142                             pReq->Hdr.rc = VERR_INVALID_FUNCTION;
    2143                             break;
    2144                     }
    2145                     break;
    2146                 }
    2147 
    2148                 default:
    2149                     pReq->Hdr.rc = VERR_INVALID_PARAMETER;
    2150                     break;
    2151             }
    2152             return 0;
    2153         }
    2154 
    2155         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
    2156         {
    2157             /* validate */
    2158             PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
    2159             REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
    2160 
    2161             /* execute */
    2162             pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
    2163             if (RT_FAILURE(pReq->Hdr.rc))
    2164                 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
    2165             return 0;
    2166         }
    2167 
    2168         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_OPEN):
    2169         {
    2170             /* validate */
    2171             PSUPTRACEROPEN pReq = (PSUPTRACEROPEN)pReqHdr;
    2172             REQ_CHECK_SIZES(SUP_IOCTL_TRACER_OPEN);
    2173 
    2174             /* execute */
    2175             pReq->Hdr.rc = supdrvIOCtl_TracerOpen(pDevExt, pSession, pReq->u.In.uCookie, pReq->u.In.uArg);
    2176             return 0;
    2177         }
    2178 
    2179         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_CLOSE):
    2180         {
    2181             /* validate */
    2182             REQ_CHECK_SIZES(SUP_IOCTL_TRACER_CLOSE);
    2183 
    2184             /* execute */
    2185             pReqHdr->rc = supdrvIOCtl_TracerClose(pDevExt, pSession);
    2186             return 0;
    2187         }
    2188 
    2189         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_IOCTL):
    2190         {
    2191             /* validate */
    2192             PSUPTRACERIOCTL pReq = (PSUPTRACERIOCTL)pReqHdr;
    2193             REQ_CHECK_SIZES(SUP_IOCTL_TRACER_IOCTL);
    2194 
    2195             /* execute */
    2196             pReqHdr->rc = supdrvIOCtl_TracerIOCtl(pDevExt, pSession, pReq->u.In.uCmd, pReq->u.In.uArg, &pReq->u.Out.iRetVal);
    2197             return 0;
    2198         }
    2199 
    2200         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_REG):
    2201         {
    2202             /* validate */
    2203             PSUPTRACERUMODREG pReq = (PSUPTRACERUMODREG)pReqHdr;
    2204             REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_REG);
    2205             if (!RTStrEnd(pReq->u.In.szName, sizeof(pReq->u.In.szName)))
    2206                 return VERR_INVALID_PARAMETER;
    2207 
    2208             /* execute */
    2209             pReqHdr->rc = supdrvIOCtl_TracerUmodRegister(pDevExt, pSession,
    2210                                                          pReq->u.In.R3PtrVtgHdr, pReq->u.In.uVtgHdrAddr,
    2211                                                          pReq->u.In.R3PtrStrTab, pReq->u.In.cbStrTab,
    2212                                                          pReq->u.In.szName, pReq->u.In.fFlags);
    2213             return 0;
    2214         }
    2215 
    2216         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_DEREG):
    2217         {
    2218             /* validate */
    2219             PSUPTRACERUMODDEREG pReq = (PSUPTRACERUMODDEREG)pReqHdr;
    2220             REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_DEREG);
    2221 
    2222             /* execute */
    2223             pReqHdr->rc = supdrvIOCtl_TracerUmodDeregister(pDevExt, pSession, pReq->u.In.pVtgHdr);
    2224             return 0;
    2225         }
    2226 
    2227         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE):
    2228         {
    2229             /* validate */
    2230             PSUPTRACERUMODFIREPROBE pReq = (PSUPTRACERUMODFIREPROBE)pReqHdr;
    2231             REQ_CHECK_SIZES(SUP_IOCTL_TRACER_UMOD_FIRE_PROBE);
    2232 
    2233             supdrvIOCtl_TracerUmodProbeFire(pDevExt, pSession, &pReq->u.In);
    2234             pReqHdr->rc = VINF_SUCCESS;
    2235             return 0;
    2236         }
    2237 
    2238         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_MSR_PROBER):
    2239         {
    2240             /* validate */
    2241             PSUPMSRPROBER pReq = (PSUPMSRPROBER)pReqHdr;
    2242             REQ_CHECK_SIZES(SUP_IOCTL_MSR_PROBER);
    2243             REQ_CHECK_EXPR(SUP_IOCTL_MSR_PROBER,
    2244                            pReq->u.In.enmOp > SUPMSRPROBEROP_INVALID && pReq->u.In.enmOp < SUPMSRPROBEROP_END);
    2245 
    2246             pReqHdr->rc = supdrvIOCtl_MsrProber(pDevExt, pReq);
    2247             return 0;
    2248         }
    2249 
    2250         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_RESUME_SUSPENDED_KBDS):
    2251         {
    2252             /* validate */
    2253             REQ_CHECK_SIZES(SUP_IOCTL_RESUME_SUSPENDED_KBDS);
    2254 
    2255             pReqHdr->rc = supdrvIOCtl_ResumeSuspendedKbds();
    2256             return 0;
    2257         }
    2258 
    2259         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_DELTA_MEASURE):
    2260         {
    2261             /* validate */
    2262             PSUPTSCDELTAMEASURE pReq = (PSUPTSCDELTAMEASURE)pReqHdr;
    2263             REQ_CHECK_SIZES(SUP_IOCTL_TSC_DELTA_MEASURE);
    2264 
    2265             pReqHdr->rc = supdrvIOCtl_TscDeltaMeasure(pDevExt, pSession, pReq);
    2266             return 0;
    2267         }
    2268 
    2269         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_TSC_READ):
    2270         {
    2271             /* validate */
    2272             PSUPTSCREAD pReq = (PSUPTSCREAD)pReqHdr;
    2273             REQ_CHECK_SIZES(SUP_IOCTL_TSC_READ);
    2274 
    2275             pReqHdr->rc = supdrvIOCtl_TscRead(pDevExt, pSession, pReq);
    2276             return 0;
    2277         }
    2278 
    2279         default:
    2280             Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
    2281             break;
    2282     }
    2283     return VERR_GENERAL_FAILURE;
    2284 }
    2285 
    2286 
    2287 /**
    2288  * I/O Control inner worker for the restricted operations.
    2289  *
    2290  * @returns IPRT status code.
    2291  * @retval  VERR_INVALID_PARAMETER if the request is invalid.
    2292  *
    2293  * @param   uIOCtl      Function number.
    2294  * @param   pDevExt     Device extention.
    2295  * @param   pSession    Session data.
    2296  * @param   pReqHdr     The request header.
    2297  */
    2298 static int supdrvIOCtlInnerRestricted(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
    2299 {
    2300     /*
    2301      * The switch.
    2302      */
    2303     switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
    2304     {
    2305         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
    2306         {
    2307             PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
    2308             REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
    2309             if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
    2310             {
    2311                 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
    2312                 pReq->Hdr.rc = VERR_INVALID_MAGIC;
    2313                 return 0;
    2314             }
    2315 
    2316             /*
    2317              * Match the version.
    2318              * The current logic is very simple, match the major interface version.
    2319              */
    2320             if (    pReq->u.In.u32MinVersion > SUPDRV_IOC_VERSION
    2321                 ||  (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRV_IOC_VERSION & 0xffff0000))
    2322             {
    2323                 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x  Min: %#x  Current: %#x\n",
    2324                             pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRV_IOC_VERSION));
    2325                 pReq->u.Out.u32Cookie         = 0xffffffff;
    2326                 pReq->u.Out.u32SessionCookie  = 0xffffffff;
    2327                 pReq->u.Out.u32SessionVersion = 0xffffffff;
    2328                 pReq->u.Out.u32DriverVersion  = SUPDRV_IOC_VERSION;
    2329                 pReq->u.Out.pSession          = NULL;
    2330                 pReq->u.Out.cFunctions        = 0;
    2331                 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
    2332                 return 0;
    2333             }
    2334 
    2335             /*
    2336              * Fill in return data and be gone.
    2337              * N.B. The first one to change SUPDRV_IOC_VERSION shall makes sure that
    2338              *      u32SessionVersion <= u32ReqVersion!
    2339              */
    2340             /** @todo Somehow validate the client and negotiate a secure cookie... */
    2341             pReq->u.Out.u32Cookie         = pDevExt->u32Cookie;
    2342             pReq->u.Out.u32SessionCookie  = pSession->u32Cookie;
    2343             pReq->u.Out.u32SessionVersion = SUPDRV_IOC_VERSION;
    2344             pReq->u.Out.u32DriverVersion  = SUPDRV_IOC_VERSION;
    2345             pReq->u.Out.pSession          = pSession;
    2346             pReq->u.Out.cFunctions        = 0;
    2347             pReq->Hdr.rc = VINF_SUCCESS;
    2348             return 0;
    2349         }
    2350 
    2351         case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_VT_CAPS):
    2352         {
    2353             /* validate */
    2354             PSUPVTCAPS pReq = (PSUPVTCAPS)pReqHdr;
    2355             REQ_CHECK_SIZES(SUP_IOCTL_VT_CAPS);
    2356 
    2357             /* execute */
    2358             pReq->Hdr.rc = SUPR0QueryVTCaps(pSession, &pReq->u.Out.Caps);
    2359             if (RT_FAILURE(pReq->Hdr.rc))
    2360                 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
    2361             return 0;
    2362         }
    2363 
    2364         default:
    2365             Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
    2366             break;
    2367     }
    2368     return VERR_GENERAL_FAILURE;
    2369 }
    2370 
    2371 
    2372 /**
    2373  * I/O Control worker.
    2374  *
    2375  * @returns IPRT status code.
    2376  * @retval  VERR_INVALID_PARAMETER if the request is invalid.
    2377  *
    2378  * @param   uIOCtl      Function number.
    2379  * @param   pDevExt     Device extention.
    2380  * @param   pSession    Session data.
    2381  * @param   pReqHdr     The request header.
    2382  */
    2383 int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr, size_t cbReq)
    2384 {
    2385     int rc;
    2386     VBOXDRV_IOCTL_ENTRY(pSession, uIOCtl, pReqHdr);
    2387 
    2388     /*
    2389      * Validate the request.
    2390      */
    2391     if (RT_UNLIKELY(cbReq < sizeof(*pReqHdr)))
    2392     {
    2393         OSDBGPRINT(("vboxdrv: Bad ioctl request size; cbReq=%#lx\n", (long)cbReq));
    2394         VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
    2395         return VERR_INVALID_PARAMETER;
    2396     }
    2397     if (RT_UNLIKELY(   (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
    2398                     || pReqHdr->cbIn < sizeof(*pReqHdr)
    2399                     || pReqHdr->cbIn > cbReq
    2400                     || pReqHdr->cbOut < sizeof(*pReqHdr)
    2401                     || pReqHdr->cbOut > cbReq))
    2402     {
    2403         OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
    2404                     (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
    2405         VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
    2406         return VERR_INVALID_PARAMETER;
    2407     }
    2408     if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
    2409     {
    2410         OSDBGPRINT(("vboxdrv: Invalid pSession value %p (ioctl=%p)\n", pSession, (void *)uIOCtl));
    2411         VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
    2412         return VERR_INVALID_PARAMETER;
    2413     }
    2414     if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
    2415     {
    2416         if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
    2417         {
    2418             OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
    2419             VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
    2420             return VERR_INVALID_PARAMETER;
    2421         }
    2422     }
    2423     else if (RT_UNLIKELY(    pReqHdr->u32Cookie != pDevExt->u32Cookie
    2424                          ||  pReqHdr->u32SessionCookie != pSession->u32Cookie))
    2425     {
    2426         OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
    2427         VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, VERR_INVALID_PARAMETER, VINF_SUCCESS);
    2428         return VERR_INVALID_PARAMETER;
    2429     }
    2430 
    2431     /*
    2432      * Hand it to an inner function to avoid lots of unnecessary return tracepoints.
    2433      */
    2434     if (pSession->fUnrestricted)
    2435         rc = supdrvIOCtlInnerUnrestricted(uIOCtl, pDevExt, pSession, pReqHdr);
    2436     else
    2437         rc = supdrvIOCtlInnerRestricted(uIOCtl, pDevExt, pSession, pReqHdr);
    2438 
    2439     VBOXDRV_IOCTL_RETURN(pSession, uIOCtl, pReqHdr, pReqHdr->rc, rc);
    2440     return rc;
    2441 }
    2442 
    2443 
    2444 /**
    2445  * Inter-Driver Communication (IDC) worker.
    2446  *
    2447  * @returns VBox status code.
    2448  * @retval  VINF_SUCCESS on success.
    2449  * @retval  VERR_INVALID_PARAMETER if the request is invalid.
    2450  * @retval  VERR_NOT_SUPPORTED if the request isn't supported.
    2451  *
    2452  * @param   uReq        The request (function) code.
    2453  * @param   pDevExt     Device extention.
    2454  * @param   pSession    Session data.
    2455  * @param   pReqHdr     The request header.
    2456  */
    2457 int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
    2458 {
    2459     /*
    2460      * The OS specific code has already validated the pSession
    2461      * pointer, and the request size being greater or equal to
    2462      * size of the header.
    2463      *
    2464      * So, just check that pSession is a kernel context session.
    2465      */
    2466     if (RT_UNLIKELY(    pSession
    2467                     &&  pSession->R0Process != NIL_RTR0PROCESS))
    2468         return VERR_INVALID_PARAMETER;
    2469 
    2470 /*
    2471  * Validation macro.
    2472  */
    2473 #define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
    2474     do { \
    2475         if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
    2476         { \
    2477             OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
    2478                         (long)pReqHdr->cb, (long)(cbExpect))); \
    2479             return pReqHdr->rc = VERR_INVALID_PARAMETER; \
    2480         } \
    2481     } while (0)
    2482 
    2483     switch (uReq)
    2484     {
    2485         case SUPDRV_IDC_REQ_CONNECT:
    2486         {
    2487             PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
    2488             REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
    2489 
    2490             /*
    2491              * Validate the cookie and other input.
    2492              */
    2493             if (pReq->Hdr.pSession != NULL)
    2494             {
    2495                 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Hdr.pSession=%p expected NULL!\n", pReq->Hdr.pSession));
    2496                 return pReqHdr->rc = VERR_INVALID_PARAMETER;
    2497             }
    2498             if (pReq->u.In.u32MagicCookie != SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE)
    2499             {
    2500                 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: u32MagicCookie=%#x expected %#x!\n",
    2501                             (unsigned)pReq->u.In.u32MagicCookie, (unsigned)SUPDRVIDCREQ_CONNECT_MAGIC_COOKIE));
    2502                 return pReqHdr->rc = VERR_INVALID_PARAMETER;
    2503             }
    2504             if (    pReq->u.In.uMinVersion > pReq->u.In.uReqVersion
    2505                 ||  (pReq->u.In.uMinVersion & UINT32_C(0xffff0000)) != (pReq->u.In.uReqVersion & UINT32_C(0xffff0000)))
    2506             {
    2507                 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: uMinVersion=%#x uMaxVersion=%#x doesn't match!\n",
    2508                             pReq->u.In.uMinVersion, pReq->u.In.uReqVersion));
    2509                 return pReqHdr->rc = VERR_INVALID_PARAMETER;
    2510             }
    2511             if (pSession != NULL)
    2512             {
    2513                 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: pSession=%p expected NULL!\n", pSession));
    2514                 return pReqHdr->rc = VERR_INVALID_PARAMETER;
    2515             }
    2516 
    2517             /*
    2518              * Match the version.
    2519              * The current logic is very simple, match the major interface version.
    2520              */
    2521             if (    pReq->u.In.uMinVersion > SUPDRV_IDC_VERSION
    2522                 ||  (pReq->u.In.uMinVersion & 0xffff0000) != (SUPDRV_IDC_VERSION & 0xffff0000))
    2523             {
    2524                 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: Version mismatch. Requested: %#x  Min: %#x  Current: %#x\n",
    2525                             pReq->u.In.uReqVersion, pReq->u.In.uMinVersion, (unsigned)SUPDRV_IDC_VERSION));
    2526                 pReq->u.Out.pSession        = NULL;
    2527                 pReq->u.Out.uSessionVersion = 0xffffffff;
    2528                 pReq->u.Out.uDriverVersion  = SUPDRV_IDC_VERSION;
    2529                 pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
    2530                 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
    2531                 return VINF_SUCCESS;
    2532             }
    2533 
    2534             pReq->u.Out.pSession        = NULL;
    2535             pReq->u.Out.uSessionVersion = SUPDRV_IDC_VERSION;
    2536             pReq->u.Out.uDriverVersion  = SUPDRV_IDC_VERSION;
    2537             pReq->u.Out.uDriverRevision = VBOX_SVN_REV;
    2538 
    2539             pReq->Hdr.rc = supdrvCreateSession(pDevExt, false /* fUser */, true /*fUnrestricted*/, &pSession);
    2540             if (RT_FAILURE(pReq->Hdr.rc))
    2541             {
    2542                 OSDBGPRINT(("SUPDRV_IDC_REQ_CONNECT: failed to create session, rc=%d\n", pReq->Hdr.rc));
    2543                 return VINF_SUCCESS;
    2544             }
    2545 
    2546             pReq->u.Out.pSession = pSession;
    2547             pReq->Hdr.pSession = pSession;
    2548 
    2549             return VINF_SUCCESS;
    2550         }
    2551 
    2552         case SUPDRV_IDC_REQ_DISCONNECT:
    2553         {
    2554             REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
    2555 
    2556             supdrvSessionRelease(pSession);
    2557             return pReqHdr->rc = VINF_SUCCESS;
    2558         }
    2559 
    2560         case SUPDRV_IDC_REQ_GET_SYMBOL:
    2561         {
    2562             PSUPDRVIDCREQGETSYM pReq = (PSUPDRVIDCREQGETSYM)pReqHdr;
    2563             REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
    2564 
    2565             pReq->Hdr.rc = supdrvIDC_LdrGetSymbol(pDevExt, pSession, pReq);
    2566             return VINF_SUCCESS;
    2567         }
    2568 
    2569         case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
    2570         {
    2571             PSUPDRVIDCREQCOMPREGFACTORY pReq = (PSUPDRVIDCREQCOMPREGFACTORY)pReqHdr;
    2572             REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
    2573 
    2574             pReq->Hdr.rc = SUPR0ComponentRegisterFactory(pSession, pReq->u.In.pFactory);
    2575             return VINF_SUCCESS;
    2576         }
    2577 
    2578         case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
    2579         {
    2580             PSUPDRVIDCREQCOMPDEREGFACTORY pReq = (PSUPDRVIDCREQCOMPDEREGFACTORY)pReqHdr;
    2581             REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
    2582 
    2583             pReq->Hdr.rc = SUPR0ComponentDeregisterFactory(pSession, pReq->u.In.pFactory);
    2584             return VINF_SUCCESS;
    2585         }
    2586 
    2587         default:
    2588             Log(("Unknown IDC %#lx\n", (long)uReq));
    2589             break;
    2590     }
    2591 
    2592 #undef REQ_CHECK_IDC_SIZE
    2593     return VERR_NOT_SUPPORTED;
    2594 }
    2595 
    2596 
    2597 /**
    2598  * Register a object for reference counting.
    2599  * The object is registered with one reference in the specified session.
    2600  *
    2601  * @returns Unique identifier on success (pointer).
    2602  *          All future reference must use this identifier.
    2603  * @returns NULL on failure.
    2604  * @param   pfnDestructor   The destructore function which will be called when the reference count reaches 0.
    2605  * @param   pvUser1         The first user argument.
    2606  * @param   pvUser2         The second user argument.
    2607  */
    2608 SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
    2609 {
    2610     PSUPDRVDEVEXT   pDevExt     = pSession->pDevExt;
    2611     PSUPDRVOBJ      pObj;
    2612     PSUPDRVUSAGE    pUsage;
    2613 
    2614     /*
    2615      * Validate the input.
    2616      */
    2617     AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
    2618     AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
    2619     AssertPtrReturn(pfnDestructor, NULL);
    2620 
    2621     /*
    2622      * Allocate and initialize the object.
    2623      */
    2624     pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
    2625     if (!pObj)
    2626         return NULL;
    2627     pObj->u32Magic      = SUPDRVOBJ_MAGIC;
    2628     pObj->enmType       = enmType;
    2629     pObj->pNext         = NULL;
    2630     pObj->cUsage        = 1;
    2631     pObj->pfnDestructor = pfnDestructor;
    2632     pObj->pvUser1       = pvUser1;
    2633     pObj->pvUser2       = pvUser2;
    2634     pObj->CreatorUid    = pSession->Uid;
    2635     pObj->CreatorGid    = pSession->Gid;
    2636     pObj->CreatorProcess= pSession->Process;
    2637     supdrvOSObjInitCreator(pObj, pSession);
    2638 
    2639     /*
    2640      * Allocate the usage record.
    2641      * (We keep freed usage records around to simplify SUPR0ObjAddRefEx().)
    2642      */
    2643     RTSpinlockAcquire(pDevExt->Spinlock);
    2644 
    2645     pUsage = pDevExt->pUsageFree;
    2646     if (pUsage)
    2647         pDevExt->pUsageFree = pUsage->pNext;
    2648     else
    2649     {
    2650         RTSpinlockRelease(pDevExt->Spinlock);
    2651         pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
    2652         if (!pUsage)
    2653         {
    2654             RTMemFree(pObj);
    2655             return NULL;
    2656         }
    2657         RTSpinlockAcquire(pDevExt->Spinlock);
    2658     }
    2659 
    2660     /*
    2661      * Insert the object and create the session usage record.
    2662      */
    2663     /* The object. */
    2664     pObj->pNext         = pDevExt->pObjs;
    2665     pDevExt->pObjs      = pObj;
    2666 
    2667     /* The session record. */
    2668     pUsage->cUsage      = 1;
    2669     pUsage->pObj        = pObj;
    2670     pUsage->pNext       = pSession->pUsage;
    2671     /* Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext)); */
    2672     pSession->pUsage    = pUsage;
    2673 
    2674     RTSpinlockRelease(pDevExt->Spinlock);
    2675 
    2676     Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
    2677     return pObj;
    2678 }
    2679 
    2680 
    2681 /**
    2682  * Increment the reference counter for the object associating the reference
    2683  * with the specified session.
    2684  *
    2685  * @returns IPRT status code.
    2686  * @param   pvObj           The identifier returned by SUPR0ObjRegister().
    2687  * @param   pSession        The session which is referencing the object.
    2688  *
    2689  * @remarks The caller should not own any spinlocks and must carefully protect
    2690  *          itself against potential race with the destructor so freed memory
    2691  *          isn't accessed here.
    2692  */
    2693 SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
    2694 {
    2695     return SUPR0ObjAddRefEx(pvObj, pSession, false /* fNoBlocking */);
    2696 }
    2697 
    2698 
    2699 /**
    2700  * Increment the reference counter for the object associating the reference
    2701  * with the specified session.
    2702  *
    2703  * @returns IPRT status code.
    2704  * @retval  VERR_TRY_AGAIN if fNoBlocking was set and a new usage record
    2705  *          couldn't be allocated. (If you see this you're not doing the right
    2706  *          thing and it won't ever work reliably.)
    2707  *
    2708  * @param   pvObj           The identifier returned by SUPR0ObjRegister().
    2709  * @param   pSession        The session which is referencing the object.
    2710  * @param   fNoBlocking     Set if it's not OK to block. Never try to make the
    2711  *                          first reference to an object in a session with this
    2712  *                          argument set.
    2713  *
    2714  * @remarks The caller should not own any spinlocks and must carefully protect
    2715  *          itself against potential race with the destructor so freed memory
    2716  *          isn't accessed here.
    2717  */
    2718 SUPR0DECL(int) SUPR0ObjAddRefEx(void *pvObj, PSUPDRVSESSION pSession, bool fNoBlocking)
    2719 {
    2720     PSUPDRVDEVEXT   pDevExt     = pSession->pDevExt;
    2721     PSUPDRVOBJ      pObj        = (PSUPDRVOBJ)pvObj;
    2722     int             rc          = VINF_SUCCESS;
    2723     PSUPDRVUSAGE    pUsagePre;
    2724     PSUPDRVUSAGE    pUsage;
    2725 
    2726     /*
    2727      * Validate the input.
    2728      * Be ready for the destruction race (someone might be stuck in the
    2729      * destructor waiting a lock we own).
    2730      */
    2731     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    2732     AssertPtrReturn(pObj, VERR_INVALID_POINTER);
    2733     AssertMsgReturn(pObj->u32Magic == SUPDRVOBJ_MAGIC || pObj->u32Magic == SUPDRVOBJ_MAGIC_DEAD,
    2734                     ("Invalid pvObj=%p magic=%#x (expected %#x or %#x)\n", pvObj, pObj->u32Magic, SUPDRVOBJ_MAGIC, SUPDRVOBJ_MAGIC_DEAD),
    2735                     VERR_INVALID_PARAMETER);
    2736 
    2737     RTSpinlockAcquire(pDevExt->Spinlock);
    2738 
    2739     if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
    2740     {
    2741         RTSpinlockRelease(pDevExt->Spinlock);
    2742 
    2743         AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
    2744         return VERR_WRONG_ORDER;
    2745     }
    2746 
    2747     /*
    2748      * Preallocate the usage record if we can.
    2749      */
    2750     pUsagePre = pDevExt->pUsageFree;
    2751     if (pUsagePre)
    2752         pDevExt->pUsageFree = pUsagePre->pNext;
    2753     else if (!fNoBlocking)
    2754     {
    2755         RTSpinlockRelease(pDevExt->Spinlock);
    2756         pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
    2757         if (!pUsagePre)
    2758             return VERR_NO_MEMORY;
    2759 
    2760         RTSpinlockAcquire(pDevExt->Spinlock);
    2761         if (RT_UNLIKELY(pObj->u32Magic != SUPDRVOBJ_MAGIC))
    2762         {
    2763             RTSpinlockRelease(pDevExt->Spinlock);
    2764 
    2765             AssertMsgFailed(("pvObj=%p magic=%#x\n", pvObj, pObj->u32Magic));
    2766             return VERR_WRONG_ORDER;
    2767         }
    2768     }
    2769 
    2770     /*
    2771      * Reference the object.
    2772      */
    2773     pObj->cUsage++;
    2774 
    2775     /*
    2776      * Look for the session record.
    2777      */
    2778     for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
    2779     {
    2780         /*Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
    2781         if (pUsage->pObj == pObj)
    2782             break;
    2783     }
    2784     if (pUsage)
    2785         pUsage->cUsage++;
    2786     else if (pUsagePre)
    2787     {
    2788         /* create a new session record. */
    2789         pUsagePre->cUsage   = 1;
    2790         pUsagePre->pObj     = pObj;
    2791         pUsagePre->pNext    = pSession->pUsage;
    2792         pSession->pUsage    = pUsagePre;
    2793         /*Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));*/
    2794 
    2795         pUsagePre = NULL;
    2796     }
    2797     else
    2798     {
    2799         pObj->cUsage--;
    2800         rc = VERR_TRY_AGAIN;
    2801     }
    2802 
    2803     /*
    2804      * Put any unused usage record into the free list..
    2805      */
    2806     if (pUsagePre)
    2807     {
    2808         pUsagePre->pNext = pDevExt->pUsageFree;
    2809         pDevExt->pUsageFree = pUsagePre;
    2810     }
    2811 
    2812     RTSpinlockRelease(pDevExt->Spinlock);
    2813 
    2814     return rc;
    2815 }
    2816 
    2817 
    2818 /**
    2819  * Decrement / destroy a reference counter record for an object.
    2820  *
    2821  * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
    2822  *
    2823  * @returns IPRT status code.
    2824  * @retval  VINF_SUCCESS if not destroyed.
    2825  * @retval  VINF_OBJECT_DESTROYED if it's destroyed by this release call.
    2826  * @retval  VERR_INVALID_PARAMETER if the object isn't valid. Will assert in
    2827  *          string builds.
    2828  *
    2829  * @param   pvObj           The identifier returned by SUPR0ObjRegister().
    2830  * @param   pSession        The session which is referencing the object.
    2831  */
    2832 SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
    2833 {
    2834     PSUPDRVDEVEXT       pDevExt     = pSession->pDevExt;
    2835     PSUPDRVOBJ          pObj        = (PSUPDRVOBJ)pvObj;
    2836     int                 rc          = VERR_INVALID_PARAMETER;
    2837     PSUPDRVUSAGE        pUsage;
    2838     PSUPDRVUSAGE        pUsagePrev;
    2839 
    2840     /*
    2841      * Validate the input.
    2842      */
    2843     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    2844     AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
    2845                     ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
    2846                     VERR_INVALID_PARAMETER);
    2847 
    2848     /*
    2849      * Acquire the spinlock and look for the usage record.
    2850      */
    2851     RTSpinlockAcquire(pDevExt->Spinlock);
    2852 
    2853     for (pUsagePrev = NULL, pUsage = pSession->pUsage;
    2854          pUsage;
    2855          pUsagePrev = pUsage, pUsage = pUsage->pNext)
    2856     {
    2857         /*Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));*/
    2858         if (pUsage->pObj == pObj)
    2859         {
    2860             rc = VINF_SUCCESS;
    2861             AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
    2862             if (pUsage->cUsage > 1)
    2863             {
    2864                 pObj->cUsage--;
    2865                 pUsage->cUsage--;
    2866             }
    2867             else
    2868             {
    2869                 /*
    2870                  * Free the session record.
    2871                  */
    2872                 if (pUsagePrev)
    2873                     pUsagePrev->pNext = pUsage->pNext;
    2874                 else
    2875                     pSession->pUsage = pUsage->pNext;
    2876                 pUsage->pNext = pDevExt->pUsageFree;
    2877                 pDevExt->pUsageFree = pUsage;
    2878 
    2879                 /* What about the object? */
    2880                 if (pObj->cUsage > 1)
    2881                     pObj->cUsage--;
    2882                 else
    2883                 {
    2884                     /*
    2885                      * Object is to be destroyed, unlink it.
    2886                      */
    2887                     pObj->u32Magic = SUPDRVOBJ_MAGIC_DEAD;
    2888                     rc = VINF_OBJECT_DESTROYED;
    2889                     if (pDevExt->pObjs == pObj)
    2890                         pDevExt->pObjs = pObj->pNext;
    2891                     else
    2892                     {
    2893                         PSUPDRVOBJ pObjPrev;
    2894                         for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
    2895                             if (pObjPrev->pNext == pObj)
    2896                             {
    2897                                 pObjPrev->pNext = pObj->pNext;
    2898                                 break;
    2899                             }
    2900                         Assert(pObjPrev);
    2901                     }
    2902                 }
    2903             }
    2904             break;
    2905         }
    2906     }
    2907 
    2908     RTSpinlockRelease(pDevExt->Spinlock);
    2909 
    2910     /*
    2911      * Call the destructor and free the object if required.
    2912      */
    2913     if (rc == VINF_OBJECT_DESTROYED)
    2914     {
    2915         Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
    2916              pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
    2917         if (pObj->pfnDestructor)
    2918             pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
    2919         RTMemFree(pObj);
    2920     }
    2921 
    2922     AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
    2923     return rc;
    2924 }
    2925 
    2926 
    2927 /**
    2928  * Verifies that the current process can access the specified object.
    2929  *
    2930  * @returns The following IPRT status code:
    2931  * @retval  VINF_SUCCESS if access was granted.
    2932  * @retval  VERR_PERMISSION_DENIED if denied access.
    2933  * @retval  VERR_INVALID_PARAMETER if invalid parameter.
    2934  *
    2935  * @param   pvObj           The identifier returned by SUPR0ObjRegister().
    2936  * @param   pSession        The session which wishes to access the object.
    2937  * @param   pszObjName      Object string name. This is optional and depends on the object type.
    2938  *
    2939  * @remark  The caller is responsible for making sure the object isn't removed while
    2940  *          we're inside this function. If uncertain about this, just call AddRef before calling us.
    2941  */
    2942 SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
    2943 {
    2944     PSUPDRVOBJ  pObj = (PSUPDRVOBJ)pvObj;
    2945     int         rc;
    2946 
    2947     /*
    2948      * Validate the input.
    2949      */
    2950     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    2951     AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
    2952                     ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
    2953                     VERR_INVALID_PARAMETER);
    2954 
    2955     /*
    2956      * Check access. (returns true if a decision has been made.)
    2957      */
    2958     rc = VERR_INTERNAL_ERROR;
    2959     if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
    2960         return rc;
    2961 
    2962     /*
    2963      * Default policy is to allow the user to access his own
    2964      * stuff but nothing else.
    2965      */
    2966     if (pObj->CreatorUid == pSession->Uid)
    2967         return VINF_SUCCESS;
    2968     return VERR_PERMISSION_DENIED;
    2969 }
    2970 
    2971 
    2972 /**
    2973  * Lock pages.
    2974  *
    2975  * @returns IPRT status code.
    2976  * @param   pSession    Session to which the locked memory should be associated.
    2977  * @param   pvR3        Start of the memory range to lock.
    2978  *                      This must be page aligned.
    2979  * @param   cPages      Number of pages to lock.
    2980  * @param   paPages     Where to put the physical addresses of locked memory.
    2981  */
    2982 SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
    2983 {
    2984     int             rc;
    2985     SUPDRVMEMREF    Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
    2986     const size_t    cb = (size_t)cPages << PAGE_SHIFT;
    2987     LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
    2988 
    2989     /*
    2990      * Verify input.
    2991      */
    2992     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    2993     AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
    2994     if (    RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
    2995         ||  !pvR3)
    2996     {
    2997         Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
    2998         return VERR_INVALID_PARAMETER;
    2999     }
    3000 
    3001     /*
    3002      * Let IPRT do the job.
    3003      */
    3004     Mem.eType = MEMREF_TYPE_LOCKED;
    3005     rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
    3006     if (RT_SUCCESS(rc))
    3007     {
    3008         uint32_t iPage = cPages;
    3009         AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
    3010         AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
    3011 
    3012         while (iPage-- > 0)
    3013         {
    3014             paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
    3015             if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
    3016             {
    3017                 AssertMsgFailed(("iPage=%d\n", iPage));
    3018                 rc = VERR_INTERNAL_ERROR;
    3019                 break;
    3020             }
    3021         }
    3022         if (RT_SUCCESS(rc))
    3023             rc = supdrvMemAdd(&Mem, pSession);
    3024         if (RT_FAILURE(rc))
    3025         {
    3026             int rc2 = RTR0MemObjFree(Mem.MemObj, false);
    3027             AssertRC(rc2);
    3028         }
    3029     }
    3030 
    3031     return rc;
    3032 }
    3033 
    3034 
    3035 /**
    3036  * Unlocks the memory pointed to by pv.
    3037  *
    3038  * @returns IPRT status code.
    3039  * @param   pSession    Session to which the memory was locked.
    3040  * @param   pvR3        Memory to unlock.
    3041  */
    3042 SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
    3043 {
    3044     LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
    3045     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3046     return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
    3047 }
    3048 
    3049 
    3050 /**
    3051  * Allocates a chunk of page aligned memory with contiguous and fixed physical
    3052  * backing.
    3053  *
    3054  * @returns IPRT status code.
    3055  * @param   pSession    Session data.
    3056  * @param   cPages      Number of pages to allocate.
    3057  * @param   ppvR0       Where to put the address of Ring-0 mapping the allocated memory.
    3058  * @param   ppvR3       Where to put the address of Ring-3 mapping the allocated memory.
    3059  * @param   pHCPhys     Where to put the physical address of allocated memory.
    3060  */
    3061 SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
    3062 {
    3063     int             rc;
    3064     SUPDRVMEMREF    Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
    3065     LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
    3066 
    3067     /*
    3068      * Validate input.
    3069      */
    3070     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3071     if (!ppvR3 || !ppvR0 || !pHCPhys)
    3072     {
    3073         Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
    3074              pSession, ppvR0, ppvR3, pHCPhys));
    3075         return VERR_INVALID_PARAMETER;
    3076 
    3077     }
    3078     if (cPages < 1 || cPages >= 256)
    3079     {
    3080         Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
    3081         return VERR_PAGE_COUNT_OUT_OF_RANGE;
    3082     }
    3083 
    3084     /*
    3085      * Let IPRT do the job.
    3086      */
    3087     rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
    3088     if (RT_SUCCESS(rc))
    3089     {
    3090         int rc2;
    3091         rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
    3092                                RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
    3093         if (RT_SUCCESS(rc))
    3094         {
    3095             Mem.eType = MEMREF_TYPE_CONT;
    3096             rc = supdrvMemAdd(&Mem, pSession);
    3097             if (!rc)
    3098             {
    3099                 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
    3100                 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
    3101                 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
    3102                 return 0;
    3103             }
    3104 
    3105             rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
    3106             AssertRC(rc2);
    3107         }
    3108         rc2 = RTR0MemObjFree(Mem.MemObj, false);
    3109         AssertRC(rc2);
    3110     }
    3111 
    3112     return rc;
    3113 }
    3114 
    3115 
    3116 /**
    3117  * Frees memory allocated using SUPR0ContAlloc().
    3118  *
    3119  * @returns IPRT status code.
    3120  * @param   pSession    The session to which the memory was allocated.
    3121  * @param   uPtr        Pointer to the memory (ring-3 or ring-0).
    3122  */
    3123 SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
    3124 {
    3125     LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
    3126     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3127     return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
    3128 }
    3129 
    3130 
    3131 /**
    3132  * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
    3133  *
    3134  * The memory isn't zeroed.
    3135  *
    3136  * @returns IPRT status code.
    3137  * @param   pSession    Session data.
    3138  * @param   cPages      Number of pages to allocate.
    3139  * @param   ppvR0       Where to put the address of Ring-0 mapping of the allocated memory.
    3140  * @param   ppvR3       Where to put the address of Ring-3 mapping of the allocated memory.
    3141  * @param   paPages     Where to put the physical addresses of allocated memory.
    3142  */
    3143 SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
    3144 {
    3145     unsigned        iPage;
    3146     int             rc;
    3147     SUPDRVMEMREF    Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
    3148     LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
    3149 
    3150     /*
    3151      * Validate input.
    3152      */
    3153     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3154     if (!ppvR3 || !ppvR0 || !paPages)
    3155     {
    3156         Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
    3157              pSession, ppvR3, ppvR0, paPages));
    3158         return VERR_INVALID_PARAMETER;
    3159 
    3160     }
    3161     if (cPages < 1 || cPages >= 256)
    3162     {
    3163         Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
    3164         return VERR_PAGE_COUNT_OUT_OF_RANGE;
    3165     }
    3166 
    3167     /*
    3168      * Let IPRT do the work.
    3169      */
    3170     rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
    3171     if (RT_SUCCESS(rc))
    3172     {
    3173         int rc2;
    3174         rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
    3175                                RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
    3176         if (RT_SUCCESS(rc))
    3177         {
    3178             Mem.eType = MEMREF_TYPE_LOW;
    3179             rc = supdrvMemAdd(&Mem, pSession);
    3180             if (!rc)
    3181             {
    3182                 for (iPage = 0; iPage < cPages; iPage++)
    3183                 {
    3184                     paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
    3185                     AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%RHp\n", paPages[iPage]));
    3186                 }
    3187                 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
    3188                 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
    3189                 return 0;
    3190             }
    3191 
    3192             rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
    3193             AssertRC(rc2);
    3194         }
    3195 
    3196         rc2 = RTR0MemObjFree(Mem.MemObj, false);
    3197         AssertRC(rc2);
    3198     }
    3199 
    3200     return rc;
    3201 }
    3202 
    3203 
    3204 /**
    3205  * Frees memory allocated using SUPR0LowAlloc().
    3206  *
    3207  * @returns IPRT status code.
    3208  * @param   pSession    The session to which the memory was allocated.
    3209  * @param   uPtr        Pointer to the memory (ring-3 or ring-0).
    3210  */
    3211 SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
    3212 {
    3213     LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
    3214     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3215     return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
    3216 }
    3217 
    3218 
    3219 
    3220 /**
    3221  * Allocates a chunk of memory with both R0 and R3 mappings.
    3222  * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
    3223  *
    3224  * @returns IPRT status code.
    3225  * @param   pSession    The session to associated the allocation with.
    3226  * @param   cb          Number of bytes to allocate.
    3227  * @param   ppvR0       Where to store the address of the Ring-0 mapping.
    3228  * @param   ppvR3       Where to store the address of the Ring-3 mapping.
    3229  */
    3230 SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
    3231 {
    3232     int             rc;
    3233     SUPDRVMEMREF    Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
    3234     LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
    3235 
    3236     /*
    3237      * Validate input.
    3238      */
    3239     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3240     AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
    3241     AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
    3242     if (cb < 1 || cb >= _4M)
    3243     {
    3244         Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
    3245         return VERR_INVALID_PARAMETER;
    3246     }
    3247 
    3248     /*
    3249      * Let IPRT do the work.
    3250      */
    3251     rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
    3252     if (RT_SUCCESS(rc))
    3253     {
    3254         int rc2;
    3255         rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
    3256                                RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
    3257         if (RT_SUCCESS(rc))
    3258         {
    3259             Mem.eType = MEMREF_TYPE_MEM;
    3260             rc = supdrvMemAdd(&Mem, pSession);
    3261             if (!rc)
    3262             {
    3263                 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
    3264                 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
    3265                 return VINF_SUCCESS;
    3266             }
    3267 
    3268             rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
    3269             AssertRC(rc2);
    3270         }
    3271 
    3272         rc2 = RTR0MemObjFree(Mem.MemObj, false);
    3273         AssertRC(rc2);
    3274     }
    3275 
    3276     return rc;
    3277 }
    3278 
    3279 
    3280 /**
    3281  * Get the physical addresses of memory allocated using SUPR0MemAlloc().
    3282  *
    3283  * @returns IPRT status code.
    3284  * @param   pSession        The session to which the memory was allocated.
    3285  * @param   uPtr            The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
    3286  * @param   paPages         Where to store the physical addresses.
    3287  */
    3288 SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
    3289 {
    3290     PSUPDRVBUNDLE pBundle;
    3291     LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
    3292 
    3293     /*
    3294      * Validate input.
    3295      */
    3296     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3297     AssertPtrReturn(paPages, VERR_INVALID_POINTER);
    3298     AssertReturn(uPtr, VERR_INVALID_PARAMETER);
    3299 
    3300     /*
    3301      * Search for the address.
    3302      */
    3303     RTSpinlockAcquire(pSession->Spinlock);
    3304     for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
    3305     {
    3306         if (pBundle->cUsed > 0)
    3307         {
    3308             unsigned i;
    3309             for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
    3310             {
    3311                 if (    pBundle->aMem[i].eType == MEMREF_TYPE_MEM
    3312                     &&  pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
    3313                     &&  (   (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
    3314                          || (   pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
    3315                              && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
    3316                         )
    3317                    )
    3318                 {
    3319                     const size_t cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
    3320                     size_t iPage;
    3321                     for (iPage = 0; iPage < cPages; iPage++)
    3322                     {
    3323                         paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
    3324                         paPages[iPage].uReserved = 0;
    3325                     }
    3326                     RTSpinlockRelease(pSession->Spinlock);
    3327                     return VINF_SUCCESS;
    3328                 }
    3329             }
    3330         }
    3331     }
    3332     RTSpinlockRelease(pSession->Spinlock);
    3333     Log(("Failed to find %p!!!\n", (void *)uPtr));
    3334     return VERR_INVALID_PARAMETER;
    3335 }
    3336 
    3337 
    3338 /**
    3339  * Free memory allocated by SUPR0MemAlloc().
    3340  *
    3341  * @returns IPRT status code.
    3342  * @param   pSession        The session owning the allocation.
    3343  * @param   uPtr            The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
    3344  */
    3345 SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
    3346 {
    3347     LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
    3348     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3349     return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
    3350 }
    3351 
    3352 
    3353 /**
    3354  * Allocates a chunk of memory with a kernel or/and a user mode mapping.
    3355  *
    3356  * The memory is fixed and it's possible to query the physical addresses using
    3357  * SUPR0MemGetPhys().
    3358  *
    3359  * @returns IPRT status code.
    3360  * @param   pSession    The session to associated the allocation with.
    3361  * @param   cPages      The number of pages to allocate.
    3362  * @param   fFlags      Flags, reserved for the future. Must be zero.
    3363  * @param   ppvR3       Where to store the address of the Ring-3 mapping.
    3364  *                      NULL if no ring-3 mapping.
    3365  * @param   ppvR3       Where to store the address of the Ring-0 mapping.
    3366  *                      NULL if no ring-0 mapping.
    3367  * @param   paPages     Where to store the addresses of the pages. Optional.
    3368  */
    3369 SUPR0DECL(int) SUPR0PageAllocEx(PSUPDRVSESSION pSession, uint32_t cPages, uint32_t fFlags, PRTR3PTR ppvR3, PRTR0PTR ppvR0, PRTHCPHYS paPages)
    3370 {
    3371     int             rc;
    3372     SUPDRVMEMREF    Mem = { NIL_RTR0MEMOBJ, NIL_RTR0MEMOBJ, MEMREF_TYPE_UNUSED };
    3373     LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
    3374 
    3375     /*
    3376      * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
    3377      */
    3378     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3379     AssertPtrNullReturn(ppvR3, VERR_INVALID_POINTER);
    3380     AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
    3381     AssertReturn(ppvR3 || ppvR0, VERR_INVALID_PARAMETER);
    3382     AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
    3383     if (cPages < 1 || cPages > VBOX_MAX_ALLOC_PAGE_COUNT)
    3384     {
    3385         Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than %uMB (VBOX_MAX_ALLOC_PAGE_COUNT pages).\n", cPages, VBOX_MAX_ALLOC_PAGE_COUNT * (_1M / _4K)));
    3386         return VERR_PAGE_COUNT_OUT_OF_RANGE;
    3387     }
    3388 
    3389     /*
    3390      * Let IPRT do the work.
    3391      */
    3392     if (ppvR0)
    3393         rc = RTR0MemObjAllocPage(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, true /* fExecutable */);
    3394     else
    3395         rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
    3396     if (RT_SUCCESS(rc))
    3397     {
    3398         int rc2;
    3399         if (ppvR3)
    3400             rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
    3401                                    RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
    3402         else
    3403             Mem.MapObjR3 = NIL_RTR0MEMOBJ;
    3404         if (RT_SUCCESS(rc))
    3405         {
    3406             Mem.eType = MEMREF_TYPE_PAGE;
    3407             rc = supdrvMemAdd(&Mem, pSession);
    3408             if (!rc)
    3409             {
    3410                 if (ppvR3)
    3411                     *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
    3412                 if (ppvR0)
    3413                     *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
    3414                 if (paPages)
    3415                 {
    3416                     uint32_t iPage = cPages;
    3417                     while (iPage-- > 0)
    3418                     {
    3419                         paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
    3420                         Assert(paPages[iPage] != NIL_RTHCPHYS);
    3421                     }
    3422                 }
    3423                 return VINF_SUCCESS;
    3424             }
    3425 
    3426             rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
    3427             AssertRC(rc2);
    3428         }
    3429 
    3430         rc2 = RTR0MemObjFree(Mem.MemObj, false);
    3431         AssertRC(rc2);
    3432     }
    3433     return rc;
    3434 }
    3435 
    3436 
    3437 /**
    3438  * Maps a chunk of memory previously allocated by SUPR0PageAllocEx into kernel
    3439  * space.
    3440  *
    3441  * @returns IPRT status code.
    3442  * @param   pSession    The session to associated the allocation with.
    3443  * @param   pvR3        The ring-3 address returned by SUPR0PageAllocEx.
    3444  * @param   offSub      Where to start mapping. Must be page aligned.
    3445  * @param   cbSub       How much to map. Must be page aligned.
    3446  * @param   fFlags      Flags, MBZ.
    3447  * @param   ppvR0       Where to return the address of the ring-0 mapping on
    3448  *                      success.
    3449  */
    3450 SUPR0DECL(int) SUPR0PageMapKernel(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t offSub, uint32_t cbSub,
    3451                                   uint32_t fFlags, PRTR0PTR ppvR0)
    3452 {
    3453     int             rc;
    3454     PSUPDRVBUNDLE   pBundle;
    3455     RTR0MEMOBJ      hMemObj = NIL_RTR0MEMOBJ;
    3456     LogFlow(("SUPR0PageMapKernel: pSession=%p pvR3=%p offSub=%#x cbSub=%#x\n", pSession, pvR3, offSub, cbSub));
    3457 
    3458     /*
    3459      * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
    3460      */
    3461     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3462     AssertPtrNullReturn(ppvR0, VERR_INVALID_POINTER);
    3463     AssertReturn(!fFlags, VERR_INVALID_PARAMETER);
    3464     AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
    3465     AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
    3466     AssertReturn(cbSub, VERR_INVALID_PARAMETER);
    3467 
    3468     /*
    3469      * Find the memory object.
    3470      */
    3471     RTSpinlockAcquire(pSession->Spinlock);
    3472     for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
    3473     {
    3474         if (pBundle->cUsed > 0)
    3475         {
    3476             unsigned i;
    3477             for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
    3478             {
    3479                 if (    (   pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
    3480                          && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
    3481                          && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
    3482                          && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
    3483                     ||  (   pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED
    3484                          && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
    3485                          && pBundle->aMem[i].MapObjR3 == NIL_RTR0MEMOBJ
    3486                          && RTR0MemObjAddressR3(pBundle->aMem[i].MemObj) == pvR3))
    3487                 {
    3488                     hMemObj = pBundle->aMem[i].MemObj;
    3489                     break;
    3490                 }
    3491             }
    3492         }
    3493     }
    3494     RTSpinlockRelease(pSession->Spinlock);
    3495 
    3496     rc = VERR_INVALID_PARAMETER;
    3497     if (hMemObj != NIL_RTR0MEMOBJ)
    3498     {
    3499         /*
    3500          * Do some further input validations before calling IPRT.
    3501          * (Cleanup is done indirectly by telling RTR0MemObjFree to include mappings.)
    3502          */
    3503         size_t cbMemObj = RTR0MemObjSize(hMemObj);
    3504         if (    offSub < cbMemObj
    3505             &&  cbSub <= cbMemObj
    3506             &&  offSub + cbSub <= cbMemObj)
    3507         {
    3508             RTR0MEMOBJ hMapObj;
    3509             rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0,
    3510                                        RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
    3511             if (RT_SUCCESS(rc))
    3512                 *ppvR0 = RTR0MemObjAddress(hMapObj);
    3513         }
    3514         else
    3515             SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
    3516 
    3517     }
    3518     return rc;
    3519 }
    3520 
    3521 
    3522 /**
    3523  * Changes the page level protection of one or more pages previously allocated
    3524  * by SUPR0PageAllocEx.
    3525  *
    3526  * @returns IPRT status code.
    3527  * @param   pSession    The session to associated the allocation with.
    3528  * @param   pvR3        The ring-3 address returned by SUPR0PageAllocEx.
    3529  *                      NIL_RTR3PTR if the ring-3 mapping should be unaffected.
    3530  * @param   pvR0        The ring-0 address returned by SUPR0PageAllocEx.
    3531  *                      NIL_RTR0PTR if the ring-0 mapping should be unaffected.
    3532  * @param   offSub      Where to start changing. Must be page aligned.
    3533  * @param   cbSub       How much to change. Must be page aligned.
    3534  * @param   fProt       The new page level protection, see RTMEM_PROT_*.
    3535  */
    3536 SUPR0DECL(int) SUPR0PageProtect(PSUPDRVSESSION pSession, RTR3PTR pvR3, RTR0PTR pvR0, uint32_t offSub, uint32_t cbSub, uint32_t fProt)
    3537 {
    3538     int             rc;
    3539     PSUPDRVBUNDLE   pBundle;
    3540     RTR0MEMOBJ      hMemObjR0 = NIL_RTR0MEMOBJ;
    3541     RTR0MEMOBJ      hMemObjR3 = NIL_RTR0MEMOBJ;
    3542     LogFlow(("SUPR0PageProtect: pSession=%p pvR3=%p pvR0=%p offSub=%#x cbSub=%#x fProt-%#x\n", pSession, pvR3, pvR0, offSub, cbSub, fProt));
    3543 
    3544     /*
    3545      * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
    3546      */
    3547     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3548     AssertReturn(!(fProt & ~(RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_NONE)), VERR_INVALID_PARAMETER);
    3549     AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
    3550     AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
    3551     AssertReturn(cbSub, VERR_INVALID_PARAMETER);
    3552 
    3553     /*
    3554      * Find the memory object.
    3555      */
    3556     RTSpinlockAcquire(pSession->Spinlock);
    3557     for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
    3558     {
    3559         if (pBundle->cUsed > 0)
    3560         {
    3561             unsigned i;
    3562             for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
    3563             {
    3564                 if (   pBundle->aMem[i].eType == MEMREF_TYPE_PAGE
    3565                     && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
    3566                     && (   pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
    3567                         || pvR3 == NIL_RTR3PTR)
    3568                     && (   pvR0 == NIL_RTR0PTR
    3569                         || RTR0MemObjAddress(pBundle->aMem[i].MemObj) == pvR0)
    3570                     && (   pvR3 == NIL_RTR3PTR
    3571                         || RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3))
    3572                 {
    3573                     if (pvR0 != NIL_RTR0PTR)
    3574                         hMemObjR0 = pBundle->aMem[i].MemObj;
    3575                     if (pvR3 != NIL_RTR3PTR)
    3576                         hMemObjR3 = pBundle->aMem[i].MapObjR3;
    3577                     break;
    3578                 }
    3579             }
    3580         }
    3581     }
    3582     RTSpinlockRelease(pSession->Spinlock);
    3583 
    3584     rc = VERR_INVALID_PARAMETER;
    3585     if (    hMemObjR0 != NIL_RTR0MEMOBJ
    3586         ||  hMemObjR3 != NIL_RTR0MEMOBJ)
    3587     {
    3588         /*
    3589          * Do some further input validations before calling IPRT.
    3590          */
    3591         size_t cbMemObj = hMemObjR0 != NIL_RTR0PTR ? RTR0MemObjSize(hMemObjR0) : RTR0MemObjSize(hMemObjR3);
    3592         if (    offSub < cbMemObj
    3593             &&  cbSub <= cbMemObj
    3594             &&  offSub + cbSub <= cbMemObj)
    3595         {
    3596             rc = VINF_SUCCESS;
    3597             if (hMemObjR3 != NIL_RTR0PTR)
    3598                 rc = RTR0MemObjProtect(hMemObjR3, offSub, cbSub, fProt);
    3599             if (hMemObjR0 != NIL_RTR0PTR && RT_SUCCESS(rc))
    3600                 rc = RTR0MemObjProtect(hMemObjR0, offSub, cbSub, fProt);
    3601         }
    3602         else
    3603             SUPR0Printf("SUPR0PageMapKernel: cbMemObj=%#x offSub=%#x cbSub=%#x\n", cbMemObj, offSub, cbSub);
    3604 
    3605     }
    3606     return rc;
    3607 
    3608 }
    3609 
    3610 
    3611 /**
    3612  * Free memory allocated by SUPR0PageAlloc() and SUPR0PageAllocEx().
    3613  *
    3614  * @returns IPRT status code.
    3615  * @param   pSession        The session owning the allocation.
    3616  * @param   pvR3             The Ring-3 address returned by SUPR0PageAlloc() or
    3617  *                           SUPR0PageAllocEx().
    3618  */
    3619 SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
    3620 {
    3621     LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
    3622     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3623     return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_PAGE);
    3624 }
    3625 
    3626 
    3627 /**
    3628  * Gets the paging mode of the current CPU.
    3629  *
    3630  * @returns Paging mode, SUPPAGEINGMODE_INVALID on error.
    3631  */
    3632 SUPR0DECL(SUPPAGINGMODE) SUPR0GetPagingMode(void)
    3633 {
    3634     SUPPAGINGMODE enmMode;
    3635 
    3636     RTR0UINTREG cr0 = ASMGetCR0();
    3637     if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
    3638         enmMode = SUPPAGINGMODE_INVALID;
    3639     else
    3640     {
    3641         RTR0UINTREG cr4 = ASMGetCR4();
    3642         uint32_t fNXEPlusLMA = 0;
    3643         if (cr4 & X86_CR4_PAE)
    3644         {
    3645             uint32_t fExtFeatures = ASMCpuId_EDX(0x80000001);
    3646             if (fExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
    3647             {
    3648                 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
    3649                 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_NX)        && (efer & MSR_K6_EFER_NXE))
    3650                     fNXEPlusLMA |= RT_BIT(0);
    3651                 if ((fExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
    3652                     fNXEPlusLMA |= RT_BIT(1);
    3653             }
    3654         }
    3655 
    3656         switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
    3657         {
    3658             case 0:
    3659                 enmMode = SUPPAGINGMODE_32_BIT;
    3660                 break;
    3661 
    3662             case X86_CR4_PGE:
    3663                 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
    3664                 break;
    3665 
    3666             case X86_CR4_PAE:
    3667                 enmMode = SUPPAGINGMODE_PAE;
    3668                 break;
    3669 
    3670             case X86_CR4_PAE | RT_BIT(0):
    3671                 enmMode = SUPPAGINGMODE_PAE_NX;
    3672                 break;
    3673 
    3674             case X86_CR4_PAE | X86_CR4_PGE:
    3675                 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
    3676                 break;
    3677 
    3678             case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
    3679                 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
    3680                 break;
    3681 
    3682             case RT_BIT(1) | X86_CR4_PAE:
    3683                 enmMode = SUPPAGINGMODE_AMD64;
    3684                 break;
    3685 
    3686             case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
    3687                 enmMode = SUPPAGINGMODE_AMD64_NX;
    3688                 break;
    3689 
    3690             case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
    3691                 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
    3692                 break;
    3693 
    3694             case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
    3695                 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
    3696                 break;
    3697 
    3698             default:
    3699                 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
    3700                 enmMode = SUPPAGINGMODE_INVALID;
    3701                 break;
    3702         }
    3703     }
    3704     return enmMode;
    3705 }
    3706 
    3707 
    3708 /**
    3709  * Enables or disabled hardware virtualization extensions using native OS APIs.
    3710  *
    3711  * @returns VBox status code.
    3712  * @retval  VINF_SUCCESS on success.
    3713  * @retval  VERR_NOT_SUPPORTED if not supported by the native OS.
    3714  *
    3715  * @param   fEnable         Whether to enable or disable.
    3716  */
    3717 SUPR0DECL(int) SUPR0EnableVTx(bool fEnable)
    3718 {
    3719 #ifdef RT_OS_DARWIN
    3720     return supdrvOSEnableVTx(fEnable);
    3721 #else
    3722     return VERR_NOT_SUPPORTED;
    3723 #endif
    3724 }
    3725 
    3726 
    3727 /**
    3728  * Suspends hardware virtualization extensions using the native OS API.
    3729  *
    3730  * This is called prior to entering raw-mode context.
    3731  *
    3732  * @returns @c true if suspended, @c false if not.
    3733  */
    3734 SUPR0DECL(bool) SUPR0SuspendVTxOnCpu(void)
    3735 {
    3736 #ifdef RT_OS_DARWIN
    3737     return supdrvOSSuspendVTxOnCpu();
    3738 #else
    3739     return false;
    3740 #endif
    3741 }
    3742 
    3743 
    3744 /**
    3745  * Resumes hardware virtualization extensions using the native OS API.
    3746  *
    3747  * This is called after to entering raw-mode context.
    3748  *
    3749  * @param   fSuspended      The return value of SUPR0SuspendVTxOnCpu.
    3750  */
    3751 SUPR0DECL(void) SUPR0ResumeVTxOnCpu(bool fSuspended)
    3752 {
    3753 #ifdef RT_OS_DARWIN
    3754     supdrvOSResumeVTxOnCpu(fSuspended);
    3755 #else
    3756     Assert(!fSuspended);
    3757 #endif
    3758 }
    3759 
    3760 
    3761 /**
    3762  * Checks if Intel VT-x feature is usable on this CPU.
    3763  *
    3764  * @returns VBox status code.
    3765  * @param   fIsSmxModeAmbiguous   Where to write whether the SMX mode causes
    3766  *                                ambiguity that makes us unsure whether we
    3767  *                                really can use VT-x or not.
    3768  *
    3769  * @remarks Must be called with preemption disabled.
    3770  */
    3771 SUPR0DECL(int) SUPR0GetVmxUsability(bool *pfIsSmxModeAmbiguous)
    3772 {
    3773     uint64_t   u64FeatMsr;
    3774     bool       fMaybeSmxMode;
    3775     bool       fMsrLocked;
    3776     bool       fSmxVmxAllowed;
    3777     bool       fVmxAllowed;
    3778     bool       fIsSmxModeAmbiguous;
    3779     int        rc;
    3780 
    3781     Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
    3782 
    3783     u64FeatMsr          = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
    3784     fMaybeSmxMode       = RT_BOOL(ASMGetCR4() & X86_CR4_SMXE);
    3785     fMsrLocked          = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
    3786     fSmxVmxAllowed      = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
    3787     fVmxAllowed         = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
    3788     fIsSmxModeAmbiguous = false;
    3789     rc                  = VERR_INTERNAL_ERROR_5;
    3790 
    3791     /* Check if the LOCK bit is set but excludes the required VMXON bit. */
    3792     if (fMsrLocked)
    3793     {
    3794         if (fVmxAllowed && fSmxVmxAllowed)
    3795             rc = VINF_SUCCESS;
    3796         else if (!fVmxAllowed && !fSmxVmxAllowed)
    3797             rc = VERR_VMX_MSR_ALL_VMXON_DISABLED;
    3798         else if (!fMaybeSmxMode)
    3799         {
    3800             if (fVmxAllowed)
    3801                 rc = VINF_SUCCESS;
    3802             else
    3803                 rc = VERR_VMX_MSR_VMXON_DISABLED;
    3804         }
    3805         else
    3806         {
    3807             /*
    3808              * CR4.SMXE is set but this doesn't mean the CPU is necessarily in SMX mode. We shall assume
    3809              * that it is -not- and that it is a stupid BIOS/OS setting CR4.SMXE for no good reason.
    3810              * See @bugref{6873}.
    3811              */
    3812             Assert(fMaybeSmxMode == true);
    3813             fIsSmxModeAmbiguous = true;
    3814             rc = VINF_SUCCESS;
    3815         }
    3816     }
    3817     else
    3818     {
    3819         /*
    3820          * MSR is not yet locked; we can change it ourselves here.
    3821          * Once the lock bit is set, this MSR can no longer be modified.
    3822          *
    3823          * Set both the VMXON and SMX_VMXON bits as we can't determine SMX mode
    3824          * accurately. See @bugref{6873}.
    3825          */
    3826         u64FeatMsr |= MSR_IA32_FEATURE_CONTROL_LOCK
    3827                     | MSR_IA32_FEATURE_CONTROL_SMX_VMXON
    3828                     | MSR_IA32_FEATURE_CONTROL_VMXON;
    3829         ASMWrMsr(MSR_IA32_FEATURE_CONTROL, u64FeatMsr);
    3830 
    3831         /* Verify. */
    3832         u64FeatMsr     = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
    3833         fMsrLocked     = RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_LOCK);
    3834         fSmxVmxAllowed = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_SMX_VMXON);
    3835         fVmxAllowed    = fMsrLocked && RT_BOOL(u64FeatMsr & MSR_IA32_FEATURE_CONTROL_VMXON);
    3836         if (fSmxVmxAllowed && fVmxAllowed)
    3837             rc = VINF_SUCCESS;
    3838         else
    3839             rc = VERR_VMX_MSR_LOCKING_FAILED;
    3840     }
    3841 
    3842     if (pfIsSmxModeAmbiguous)
    3843         *pfIsSmxModeAmbiguous = fIsSmxModeAmbiguous;
    3844 
    3845     return rc;
    3846 }
    3847 
    3848 
    3849 /**
    3850  * Checks if AMD-V SVM feature is usable on this CPU.
    3851  *
    3852  * @returns VBox status code.
    3853  * @param   fInitSvm    If usable, try to initialize SVM on this CPU.
    3854  *
    3855  * @remarks Must be called with preemption disabled.
    3856  */
    3857 SUPR0DECL(int) SUPR0GetSvmUsability(bool fInitSvm)
    3858 {
    3859     int      rc;
    3860     uint64_t fVmCr;
    3861     uint64_t fEfer;
    3862 
    3863     Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
    3864     fVmCr = ASMRdMsr(MSR_K8_VM_CR);
    3865     if (!(fVmCr & MSR_K8_VM_CR_SVM_DISABLE))
    3866     {
    3867         rc = VINF_SUCCESS;
    3868         if (fInitSvm)
    3869         {
    3870             /* Turn on SVM in the EFER MSR. */
    3871             fEfer = ASMRdMsr(MSR_K6_EFER);
    3872             if (fEfer & MSR_K6_EFER_SVME)
    3873                 rc = VERR_SVM_IN_USE;
    3874             else
    3875             {
    3876                 ASMWrMsr(MSR_K6_EFER, fEfer | MSR_K6_EFER_SVME);
    3877 
    3878                 /* Paranoia. */
    3879                 fEfer = ASMRdMsr(MSR_K6_EFER);
    3880                 if (fEfer & MSR_K6_EFER_SVME)
    3881                 {
    3882                     /* Restore previous value. */
    3883                     ASMWrMsr(MSR_K6_EFER, fEfer & ~MSR_K6_EFER_SVME);
    3884                 }
    3885                 else
    3886                     rc = VERR_SVM_ILLEGAL_EFER_MSR;
    3887             }
    3888         }
    3889     }
    3890     else
    3891         rc = VERR_SVM_DISABLED;
    3892     return rc;
    3893 }
    3894 
    3895 
    3896 /**
    3897  * Queries the AMD-V and VT-x capabilities of the calling CPU.
    3898  *
    3899  * @returns VBox status code.
    3900  * @retval  VERR_VMX_NO_VMX
    3901  * @retval  VERR_VMX_MSR_ALL_VMXON_DISABLED
    3902  * @retval  VERR_VMX_MSR_VMXON_DISABLED
    3903  * @retval  VERR_VMX_MSR_LOCKING_FAILED
    3904  * @retval  VERR_SVM_NO_SVM
    3905  * @retval  VERR_SVM_DISABLED
    3906  * @retval  VERR_UNSUPPORTED_CPU if not identifiable as an AMD, Intel or VIA
    3907  *          (centaur) CPU.
    3908  *
    3909  * @param   pSession        The session handle.
    3910  * @param   pfCaps          Where to store the capabilities.
    3911  */
    3912 SUPR0DECL(int) SUPR0QueryVTCaps(PSUPDRVSESSION pSession, uint32_t *pfCaps)
    3913 {
    3914     int  rc = VERR_UNSUPPORTED_CPU;
    3915     bool fIsSmxModeAmbiguous = false;
    3916     RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
    3917 
    3918     /*
    3919      * Input validation.
    3920      */
    3921     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    3922     AssertPtrReturn(pfCaps, VERR_INVALID_POINTER);
    3923 
    3924     *pfCaps = 0;
    3925     /* We may modify MSRs and re-read them, disable preemption so we make sure we don't migrate CPUs. */
    3926     RTThreadPreemptDisable(&PreemptState);
    3927     if (ASMHasCpuId())
    3928     {
    3929         uint32_t fFeaturesECX, fFeaturesEDX, uDummy;
    3930         uint32_t uMaxId, uVendorEBX, uVendorECX, uVendorEDX;
    3931 
    3932         ASMCpuId(0, &uMaxId, &uVendorEBX, &uVendorECX, &uVendorEDX);
    3933         ASMCpuId(1, &uDummy, &uDummy, &fFeaturesECX, &fFeaturesEDX);
    3934 
    3935         if (   ASMIsValidStdRange(uMaxId)
    3936             && (   ASMIsIntelCpuEx(     uVendorEBX, uVendorECX, uVendorEDX)
    3937                 || ASMIsViaCentaurCpuEx(uVendorEBX, uVendorECX, uVendorEDX) )
    3938            )
    3939         {
    3940             if (    (fFeaturesECX & X86_CPUID_FEATURE_ECX_VMX)
    3941                  && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_MSR)
    3942                  && (fFeaturesEDX & X86_CPUID_FEATURE_EDX_FXSR)
    3943                )
    3944             {
    3945                 rc = SUPR0GetVmxUsability(&fIsSmxModeAmbiguous);
    3946                 if (rc == VINF_SUCCESS)
    3947                 {
    3948                     VMXCAPABILITY vtCaps;
    3949 
    3950                     *pfCaps |= SUPVTCAPS_VT_X;
    3951 
    3952                     vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS);
    3953                     if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
    3954                     {
    3955                         vtCaps.u = ASMRdMsr(MSR_IA32_VMX_PROCBASED_CTLS2);
    3956                         if (vtCaps.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_EPT)
    3957                             *pfCaps |= SUPVTCAPS_NESTED_PAGING;
    3958                     }
    3959                 }
    3960             }
    3961             else
    3962                 rc = VERR_VMX_NO_VMX;
    3963         }
    3964         else if (   ASMIsAmdCpuEx(uVendorEBX, uVendorECX, uVendorEDX)
    3965                  && ASMIsValidStdRange(uMaxId))
    3966         {
    3967             uint32_t fExtFeaturesEcx, uExtMaxId;
    3968             ASMCpuId(0x80000000, &uExtMaxId, &uDummy, &uDummy, &uDummy);
    3969             ASMCpuId(0x80000001, &uDummy, &uDummy, &fExtFeaturesEcx, &uDummy);
    3970 
    3971             /* Check if SVM is available. */
    3972             if (   ASMIsValidExtRange(uExtMaxId)
    3973                 && uExtMaxId >= 0x8000000a
    3974                 && (fExtFeaturesEcx & X86_CPUID_AMD_FEATURE_ECX_SVM)
    3975                 && (fFeaturesEDX    & X86_CPUID_FEATURE_EDX_MSR)
    3976                 && (fFeaturesEDX    & X86_CPUID_FEATURE_EDX_FXSR)
    3977                )
    3978             {
    3979                 rc = SUPR0GetSvmUsability(false /* fInitSvm */);
    3980                 if (RT_SUCCESS(rc))
    3981                 {
    3982                     uint32_t fSvmFeatures;
    3983                     *pfCaps |= SUPVTCAPS_AMD_V;
    3984 
    3985                     /* Query AMD-V features. */
    3986                     ASMCpuId(0x8000000a, &uDummy, &uDummy, &uDummy, &fSvmFeatures);
    3987                     if (fSvmFeatures & AMD_CPUID_SVM_FEATURE_EDX_NESTED_PAGING)
    3988                         *pfCaps |= SUPVTCAPS_NESTED_PAGING;
    3989                 }
    3990             }
    3991             else
    3992                 rc = VERR_SVM_NO_SVM;
    3993         }
    3994     }
    3995 
    3996     RTThreadPreemptRestore(&PreemptState);
    3997     if (fIsSmxModeAmbiguous)
    3998         SUPR0Printf(("WARNING! CR4 hints SMX mode but your CPU is too secretive. Proceeding anyway... We wish you good luck!\n"));
    3999     return rc;
    4000 }
    4001155
    4002156
     
    4495649}
    4496650
    4497 
    4498 /**
    4499  * Register a component factory with the support driver.
    4500  *
    4501  * This is currently restricted to kernel sessions only.
    4502  *
    4503  * @returns VBox status code.
    4504  * @retval  VINF_SUCCESS on success.
    4505  * @retval  VERR_NO_MEMORY if we're out of memory.
    4506  * @retval  VERR_ALREADY_EXISTS if the factory has already been registered.
    4507  * @retval  VERR_ACCESS_DENIED if it isn't a kernel session.
    4508  * @retval  VERR_INVALID_PARAMETER on invalid parameter.
    4509  * @retval  VERR_INVALID_POINTER on invalid pointer parameter.
    4510  *
    4511  * @param   pSession        The SUPDRV session (must be a ring-0 session).
    4512  * @param   pFactory        Pointer to the component factory registration structure.
    4513  *
    4514  * @remarks This interface is also available via SUPR0IdcComponentRegisterFactory.
    4515  */
    4516 SUPR0DECL(int) SUPR0ComponentRegisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
    4517 {
    4518     PSUPDRVFACTORYREG pNewReg;
    4519     const char *psz;
    4520     int rc;
    4521 
    4522     /*
    4523      * Validate parameters.
    4524      */
    4525     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    4526     AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
    4527     AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
    4528     AssertPtrReturn(pFactory->pfnQueryFactoryInterface, VERR_INVALID_POINTER);
    4529     psz = RTStrEnd(pFactory->szName, sizeof(pFactory->szName));
    4530     AssertReturn(psz, VERR_INVALID_PARAMETER);
    4531 
    4532     /*
    4533      * Allocate and initialize a new registration structure.
    4534      */
    4535     pNewReg = (PSUPDRVFACTORYREG)RTMemAlloc(sizeof(SUPDRVFACTORYREG));
    4536     if (pNewReg)
    4537     {
    4538         pNewReg->pNext = NULL;
    4539         pNewReg->pFactory = pFactory;
    4540         pNewReg->pSession = pSession;
    4541         pNewReg->cchName = psz - &pFactory->szName[0];
    4542 
    4543         /*
    4544          * Add it to the tail of the list after checking for prior registration.
    4545          */
    4546         rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
    4547         if (RT_SUCCESS(rc))
    4548         {
    4549             PSUPDRVFACTORYREG pPrev = NULL;
    4550             PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
    4551             while (pCur && pCur->pFactory != pFactory)
    4552             {
    4553                 pPrev = pCur;
    4554                 pCur = pCur->pNext;
    4555             }
    4556             if (!pCur)
    4557             {
    4558                 if (pPrev)
    4559                     pPrev->pNext = pNewReg;
    4560                 else
    4561                     pSession->pDevExt->pComponentFactoryHead = pNewReg;
    4562                 rc = VINF_SUCCESS;
    4563             }
    4564             else
    4565                 rc = VERR_ALREADY_EXISTS;
    4566 
    4567             RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
    4568         }
    4569 
    4570         if (RT_FAILURE(rc))
    4571             RTMemFree(pNewReg);
    4572     }
    4573     else
    4574         rc = VERR_NO_MEMORY;
    4575     return rc;
    4576 }
    4577 
    4578 
    4579 /**
    4580  * Deregister a component factory.
    4581  *
    4582  * @returns VBox status code.
    4583  * @retval  VINF_SUCCESS on success.
    4584  * @retval  VERR_NOT_FOUND if the factory wasn't registered.
    4585  * @retval  VERR_ACCESS_DENIED if it isn't a kernel session.
    4586  * @retval  VERR_INVALID_PARAMETER on invalid parameter.
    4587  * @retval  VERR_INVALID_POINTER on invalid pointer parameter.
    4588  *
    4589  * @param   pSession        The SUPDRV session (must be a ring-0 session).
    4590  * @param   pFactory        Pointer to the component factory registration structure
    4591  *                          previously passed SUPR0ComponentRegisterFactory().
    4592  *
    4593  * @remarks This interface is also available via SUPR0IdcComponentDeregisterFactory.
    4594  */
    4595 SUPR0DECL(int) SUPR0ComponentDeregisterFactory(PSUPDRVSESSION pSession, PCSUPDRVFACTORY pFactory)
    4596 {
    4597     int rc;
    4598 
    4599     /*
    4600      * Validate parameters.
    4601      */
    4602     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    4603     AssertReturn(pSession->R0Process == NIL_RTR0PROCESS, VERR_ACCESS_DENIED);
    4604     AssertPtrReturn(pFactory, VERR_INVALID_POINTER);
    4605 
    4606     /*
    4607      * Take the lock and look for the registration record.
    4608      */
    4609     rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
    4610     if (RT_SUCCESS(rc))
    4611     {
    4612         PSUPDRVFACTORYREG pPrev = NULL;
    4613         PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
    4614         while (pCur && pCur->pFactory != pFactory)
    4615         {
    4616             pPrev = pCur;
    4617             pCur = pCur->pNext;
    4618         }
    4619         if (pCur)
    4620         {
    4621             if (!pPrev)
    4622                 pSession->pDevExt->pComponentFactoryHead = pCur->pNext;
    4623             else
    4624                 pPrev->pNext = pCur->pNext;
    4625 
    4626             pCur->pNext = NULL;
    4627             pCur->pFactory = NULL;
    4628             pCur->pSession = NULL;
    4629             rc = VINF_SUCCESS;
    4630         }
    4631         else
    4632             rc = VERR_NOT_FOUND;
    4633 
    4634         RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
    4635 
    4636         RTMemFree(pCur);
    4637     }
    4638     return rc;
    4639 }
    4640 
    4641 
    4642 /**
    4643  * Queries a component factory.
    4644  *
    4645  * @returns VBox status code.
    4646  * @retval  VERR_INVALID_PARAMETER on invalid parameter.
    4647  * @retval  VERR_INVALID_POINTER on invalid pointer parameter.
    4648  * @retval  VERR_SUPDRV_COMPONENT_NOT_FOUND if the component factory wasn't found.
    4649  * @retval  VERR_SUPDRV_INTERFACE_NOT_SUPPORTED if the interface wasn't supported.
    4650  *
    4651  * @param   pSession            The SUPDRV session.
    4652  * @param   pszName             The name of the component factory.
    4653  * @param   pszInterfaceUuid    The UUID of the factory interface (stringified).
    4654  * @param   ppvFactoryIf        Where to store the factory interface.
    4655  */
    4656 SUPR0DECL(int) SUPR0ComponentQueryFactory(PSUPDRVSESSION pSession, const char *pszName, const char *pszInterfaceUuid, void **ppvFactoryIf)
    4657 {
    4658     const char *pszEnd;
    4659     size_t cchName;
    4660     int rc;
    4661 
    4662     /*
    4663      * Validate parameters.
    4664      */
    4665     AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
    4666 
    4667     AssertPtrReturn(pszName, VERR_INVALID_POINTER);
    4668     pszEnd = RTStrEnd(pszName, RT_SIZEOFMEMB(SUPDRVFACTORY, szName));
    4669     AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
    4670     cchName = pszEnd - pszName;
    4671 
    4672     AssertPtrReturn(pszInterfaceUuid, VERR_INVALID_POINTER);
    4673     pszEnd = RTStrEnd(pszInterfaceUuid, RTUUID_STR_LENGTH);
    4674     AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
    4675 
    4676     AssertPtrReturn(ppvFactoryIf, VERR_INVALID_POINTER);
    4677     *ppvFactoryIf = NULL;
    4678 
    4679     /*
    4680      * Take the lock and try all factories by this name.
    4681      */
    4682     rc = RTSemFastMutexRequest(pSession->pDevExt->mtxComponentFactory);
    4683     if (RT_SUCCESS(rc))
    4684     {
    4685         PSUPDRVFACTORYREG pCur = pSession->pDevExt->pComponentFactoryHead;
    4686         rc = VERR_SUPDRV_COMPONENT_NOT_FOUND;
    4687         while (pCur)
    4688         {
    4689             if (    pCur->cchName == cchName
    4690                 &&  !memcmp(pCur->pFactory->szName, pszName, cchName))
    4691             {
    4692                 void *pvFactory = pCur->pFactory->pfnQueryFactoryInterface(pCur->pFactory, pSession, pszInterfaceUuid);
    4693                 if (pvFactory)
    4694                 {
    4695                     *ppvFactoryIf = pvFactory;
    4696                     rc = VINF_SUCCESS;
    4697                     break;
    4698                 }
    4699                 rc = VERR_SUPDRV_INTERFACE_NOT_SUPPORTED;
    4700             }
    4701 
    4702             /* next */
    4703             pCur = pCur->pNext;
    4704         }
    4705 
    4706         RTSemFastMutexRelease(pSession->pDevExt->mtxComponentFactory);
    4707     }
    4708     return rc;
    4709 }
    4710 
    4711 
    4712 /**
    4713  * Adds a memory object to the session.
    4714  *
    4715  * @returns IPRT status code.
    4716  * @param   pMem        Memory tracking structure containing the
    4717  *                      information to track.
    4718  * @param   pSession    The session.
    4719  */
    4720 static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
    4721 {
    4722     PSUPDRVBUNDLE pBundle;
    4723 
    4724     /*
    4725      * Find free entry and record the allocation.
    4726      */
    4727     RTSpinlockAcquire(pSession->Spinlock);
    4728     for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
    4729     {
    4730         if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
    4731         {
    4732             unsigned i;
    4733             for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
    4734             {
    4735                 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
    4736                 {
    4737                     pBundle->cUsed++;
    4738                     pBundle->aMem[i] = *pMem;
    4739                     RTSpinlockRelease(pSession->Spinlock);
    4740                     return VINF_SUCCESS;
    4741                 }
    4742             }
    4743             AssertFailed();             /* !!this can't be happening!!! */
    4744         }
    4745     }
    4746     RTSpinlockRelease(pSession->Spinlock);
    4747 
    4748     /*
    4749      * Need to allocate a new bundle.
    4750      * Insert into the last entry in the bundle.
    4751      */
    4752     pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
    4753     if (!pBundle)
    4754         return VERR_NO_MEMORY;
    4755 
    4756     /* take last entry. */
    4757     pBundle->cUsed++;
    4758     pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
    4759 
    4760     /* insert into list. */
    4761     RTSpinlockAcquire(pSession->Spinlock);
    4762     pBundle->pNext = pSession->Bundle.pNext;
    4763     pSession->Bundle.pNext = pBundle;
    4764     RTSpinlockRelease(pSession->Spinlock);
    4765 
    4766     return VINF_SUCCESS;
    4767 }
    4768 
    4769 
    4770 /**
    4771  * Releases a memory object referenced by pointer and type.
    4772  *
    4773  * @returns IPRT status code.
    4774  * @param   pSession    Session data.
    4775  * @param   uPtr        Pointer to memory. This is matched against both the R0 and R3 addresses.
    4776  * @param   eType       Memory type.
    4777  */
    4778 static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
    4779 {
    4780     PSUPDRVBUNDLE pBundle;
    4781 
    4782     /*
    4783      * Validate input.
    4784      */
    4785     if (!uPtr)
    4786     {
    4787         Log(("Illegal address %p\n", (void *)uPtr));
    4788         return VERR_INVALID_PARAMETER;
    4789     }
    4790 
    4791     /*
    4792      * Search for the address.
    4793      */
    4794     RTSpinlockAcquire(pSession->Spinlock);
    4795     for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
    4796     {
    4797         if (pBundle->cUsed > 0)
    4798         {
    4799             unsigned i;
    4800             for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
    4801             {
    4802                 if (    pBundle->aMem[i].eType == eType
    4803                     &&  pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
    4804                     &&  (   (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
    4805                          || (   pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
    4806                              && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
    4807                    )
    4808                 {
    4809                     /* Make a copy of it and release it outside the spinlock. */
    4810                     SUPDRVMEMREF Mem = pBundle->aMem[i];
    4811                     pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
    4812                     pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
    4813                     pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
    4814                     RTSpinlockRelease(pSession->Spinlock);
    4815 
    4816                     if (Mem.MapObjR3 != NIL_RTR0MEMOBJ)
    4817                     {
    4818                         int rc = RTR0MemObjFree(Mem.MapObjR3, false);
    4819                         AssertRC(rc); /** @todo figure out how to handle this. */
    4820                     }
    4821                     if (Mem.MemObj != NIL_RTR0MEMOBJ)
    4822                     {
    4823                         int rc = RTR0MemObjFree(Mem.MemObj, true /* fFreeMappings */);
    4824                         AssertRC(rc); /** @todo figure out how to handle this. */
    4825                     }
    4826                     return VINF_SUCCESS;
    4827                 }
    4828             }
    4829         }
    4830     }
    4831     RTSpinlockRelease(pSession->Spinlock);
    4832     Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
    4833     return VERR_INVALID_PARAMETER;
    4834 }
    4835 
    4836 
    4837 /**
    4838  * Opens an image. If it's the first time it's opened the call must upload
    4839  * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
    4840  *
    4841  * This is the 1st step of the loading.
    4842  *
    4843  * @returns IPRT status code.
    4844  * @param   pDevExt     Device globals.
    4845  * @param   pSession    Session data.
    4846  * @param   pReq        The open request.
    4847  */
    4848 static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
    4849 {
    4850     int             rc;
    4851     PSUPDRVLDRIMAGE pImage;
    4852     void           *pv;
    4853     size_t          cchName = strlen(pReq->u.In.szName); /* (caller checked < 32). */
    4854     LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImageWithTabs=%d\n", pReq->u.In.szName, pReq->u.In.cbImageWithTabs));
    4855 
    4856     /*
    4857      * Check if we got an instance of the image already.
    4858      */
    4859     supdrvLdrLock(pDevExt);
    4860     for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
    4861     {
    4862         if (    pImage->szName[cchName] == '\0'
    4863             &&  !memcmp(pImage->szName, pReq->u.In.szName, cchName))
    4864         {
    4865             if (RT_LIKELY(pImage->cUsage < UINT32_MAX / 2U))
    4866             {
    4867                 /** @todo check cbImageBits and cbImageWithTabs here, if they differs that indicates that the images are different. */
    4868                 pImage->cUsage++;
    4869                 pReq->u.Out.pvImageBase   = pImage->pvImage;
    4870                 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
    4871                 pReq->u.Out.fNativeLoader = pImage->fNative;
    4872                 supdrvLdrAddUsage(pSession, pImage);
    4873                 supdrvLdrUnlock(pDevExt);
    4874                 return VINF_SUCCESS;
    4875             }
    4876             supdrvLdrUnlock(pDevExt);
    4877             Log(("supdrvIOCtl_LdrOpen: To many existing references to '%s'!\n", pReq->u.In.szName));
    4878             return VERR_INTERNAL_ERROR_3; /** @todo add VERR_TOO_MANY_REFERENCES */
    4879         }
    4880     }
    4881     /* (not found - add it!) */
    4882 
    4883     /* If the loader interface is locked down, make userland fail early */
    4884     if (pDevExt->fLdrLockedDown)
    4885     {
    4886         supdrvLdrUnlock(pDevExt);
    4887         Log(("supdrvIOCtl_LdrOpen: Not adding '%s' to image list, loader interface is locked down!\n", pReq->u.In.szName));
    4888         return VERR_PERMISSION_DENIED;
    4889     }
    4890 
    4891     /*
    4892      * Allocate memory.
    4893      */
    4894     Assert(cchName < sizeof(pImage->szName));
    4895     pv = RTMemAlloc(sizeof(SUPDRVLDRIMAGE));
    4896     if (!pv)
    4897     {
    4898         supdrvLdrUnlock(pDevExt);
    4899         Log(("supdrvIOCtl_LdrOpen: RTMemAlloc() failed\n"));
    4900         return /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_2;
    4901     }
    4902 
    4903     /*
    4904      * Setup and link in the LDR stuff.
    4905      */
    4906     pImage = (PSUPDRVLDRIMAGE)pv;
    4907     pImage->pvImage         = NULL;
    4908     pImage->pvImageAlloc    = NULL;
    4909     pImage->cbImageWithTabs = pReq->u.In.cbImageWithTabs;
    4910     pImage->cbImageBits     = pReq->u.In.cbImageBits;
    4911     pImage->cSymbols        = 0;
    4912     pImage->paSymbols       = NULL;
    4913     pImage->pachStrTab      = NULL;
    4914     pImage->cbStrTab        = 0;
    4915     pImage->pfnModuleInit   = NULL;
    4916     pImage->pfnModuleTerm   = NULL;
    4917     pImage->pfnServiceReqHandler = NULL;
    4918     pImage->uState          = SUP_IOCTL_LDR_OPEN;
    4919     pImage->cUsage          = 1;
    4920     pImage->pDevExt         = pDevExt;
    4921     memcpy(pImage->szName, pReq->u.In.szName, cchName + 1);
    4922 
    4923     /*
    4924      * Try load it using the native loader, if that isn't supported, fall back
    4925      * on the older method.
    4926      */
    4927     pImage->fNative         = true;
    4928     rc = supdrvOSLdrOpen(pDevExt, pImage, pReq->u.In.szFilename);
    4929     if (rc == VERR_NOT_SUPPORTED)
    4930     {
    4931         pImage->pvImageAlloc = RTMemExecAlloc(pImage->cbImageBits + 31);
    4932         pImage->pvImage     = RT_ALIGN_P(pImage->pvImageAlloc, 32);
    4933         pImage->fNative     = false;
    4934         rc = pImage->pvImageAlloc ? VINF_SUCCESS : VERR_NO_EXEC_MEMORY;
    4935     }
    4936     if (RT_FAILURE(rc))
    4937     {
    4938         supdrvLdrUnlock(pDevExt);
    4939         RTMemFree(pImage);
    4940         Log(("supdrvIOCtl_LdrOpen(%s): failed - %Rrc\n", pReq->u.In.szName, rc));
    4941         return rc;
    4942     }
    4943     Assert(VALID_PTR(pImage->pvImage) || RT_FAILURE(rc));
    4944 
    4945     /*
    4946      * Link it.
    4947      */
    4948     pImage->pNext           = pDevExt->pLdrImages;
    4949     pDevExt->pLdrImages     = pImage;
    4950 
    4951     supdrvLdrAddUsage(pSession, pImage);
    4952 
    4953     pReq->u.Out.pvImageBase   = pImage->pvImage;
    4954     pReq->u.Out.fNeedsLoading = true;
    4955     pReq->u.Out.fNativeLoader = pImage->fNative;
    4956     supdrvOSLdrNotifyOpened(pDevExt, pImage);
    4957 
    4958     supdrvLdrUnlock(pDevExt);
    4959     return VINF_SUCCESS;
    4960 }
    4961 
    4962 
    4963 /**
    4964  * Worker that validates a pointer to an image entrypoint.
    4965  *
    4966  * @returns IPRT status code.
    4967  * @param   pDevExt     The device globals.
    4968  * @param   pImage      The loader image.
    4969  * @param   pv          The pointer into the image.
    4970  * @param   fMayBeNull  Whether it may be NULL.
    4971  * @param   pszWhat     What is this entrypoint? (for logging)
    4972  * @param   pbImageBits The image bits prepared by ring-3.
    4973  *
    4974  * @remarks Will leave the lock on failure.
    4975  */
    4976 static int supdrvLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
    4977                                     bool fMayBeNull, const uint8_t *pbImageBits, const char *pszWhat)
    4978 {
    4979     if (!fMayBeNull || pv)
    4980     {
    4981         if ((uintptr_t)pv - (uintptr_t)pImage->pvImage >= pImage->cbImageBits)
    4982         {
    4983             supdrvLdrUnlock(pDevExt);
    4984             Log(("Out of range (%p LB %#x): %s=%p\n", pImage->pvImage, pImage->cbImageBits, pszWhat, pv));
    4985             return VERR_INVALID_PARAMETER;
    4986         }
    4987 
    4988         if (pImage->fNative)
    4989         {
    4990             int rc = supdrvOSLdrValidatePointer(pDevExt, pImage, pv, pbImageBits);
    4991             if (RT_FAILURE(rc))
    4992             {
    4993                 supdrvLdrUnlock(pDevExt);
    4994                 Log(("Bad entry point address: %s=%p (rc=%Rrc)\n", pszWhat, pv, rc));
    4995                 return rc;
    4996             }
    4997         }
    4998     }
    4999     return VINF_SUCCESS;
    5000 }
    5001 
    5002 
    5003 /**
    5004  * Loads the image bits.
    5005  *
    5006  * This is the 2nd step of the loading.
    5007  *
    5008  * @returns IPRT status code.
    5009  * @param   pDevExt     Device globals.
    5010  * @param   pSession    Session data.
    5011  * @param   pReq        The request.
    5012  */
    5013 static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
    5014 {
    5015     PSUPDRVLDRUSAGE pUsage;
    5016     PSUPDRVLDRIMAGE pImage;
    5017     int             rc;
    5018     LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImageWithBits=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImageWithTabs));
    5019 
    5020     /*
    5021      * Find the ldr image.
    5022      */
    5023     supdrvLdrLock(pDevExt);
    5024     pUsage = pSession->pLdrUsage;
    5025     while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
    5026         pUsage = pUsage->pNext;
    5027     if (!pUsage)
    5028     {
    5029         supdrvLdrUnlock(pDevExt);
    5030         Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
    5031         return VERR_INVALID_HANDLE;
    5032     }
    5033     pImage = pUsage->pImage;
    5034 
    5035     /*
    5036      * Validate input.
    5037      */
    5038     if (   pImage->cbImageWithTabs != pReq->u.In.cbImageWithTabs
    5039         || pImage->cbImageBits     != pReq->u.In.cbImageBits)
    5040     {
    5041         supdrvLdrUnlock(pDevExt);
    5042         Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load) or %d != %d\n",
    5043              pImage->cbImageWithTabs, pReq->u.In.cbImageWithTabs, pImage->cbImageBits, pReq->u.In.cbImageBits));
    5044         return VERR_INVALID_HANDLE;
    5045     }
    5046 
    5047     if (pImage->uState != SUP_IOCTL_LDR_OPEN)
    5048     {
    5049         unsigned uState = pImage->uState;
    5050         supdrvLdrUnlock(pDevExt);
    5051         if (uState != SUP_IOCTL_LDR_LOAD)
    5052             AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
    5053         return VERR_ALREADY_LOADED;
    5054     }
    5055 
    5056     /* If the loader interface is locked down, don't load new images */
    5057     if (pDevExt->fLdrLockedDown)
    5058     {
    5059         supdrvLdrUnlock(pDevExt);
    5060         Log(("SUP_IOCTL_LDR_LOAD: Not loading '%s' image bits, loader interface is locked down!\n", pImage->szName));
    5061         return VERR_PERMISSION_DENIED;
    5062     }
    5063 
    5064     switch (pReq->u.In.eEPType)
    5065     {
    5066         case SUPLDRLOADEP_NOTHING:
    5067             break;
    5068 
    5069         case SUPLDRLOADEP_VMMR0:
    5070             rc = supdrvLdrValidatePointer(    pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0,          false, pReq->u.In.abImage, "pvVMMR0");
    5071             if (RT_SUCCESS(rc))
    5072                 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,  false, pReq->u.In.abImage, "pvVMMR0EntryInt");
    5073             if (RT_SUCCESS(rc))
    5074                 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, false, pReq->u.In.abImage, "pvVMMR0EntryFast");
    5075             if (RT_SUCCESS(rc))
    5076                 rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx,   false, pReq->u.In.abImage, "pvVMMR0EntryEx");
    5077             if (RT_FAILURE(rc))
    5078                 return rc;
    5079             break;
    5080 
    5081         case SUPLDRLOADEP_SERVICE:
    5082             rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.EP.Service.pfnServiceReq, false, pReq->u.In.abImage, "pfnServiceReq");
    5083             if (RT_FAILURE(rc))
    5084                 return rc;
    5085             if (    pReq->u.In.EP.Service.apvReserved[0] != NIL_RTR0PTR
    5086                 ||  pReq->u.In.EP.Service.apvReserved[1] != NIL_RTR0PTR
    5087                 ||  pReq->u.In.EP.Service.apvReserved[2] != NIL_RTR0PTR)
    5088             {
    5089                 supdrvLdrUnlock(pDevExt);
    5090                 Log(("Out of range (%p LB %#x): apvReserved={%p,%p,%p} MBZ!\n",
    5091                      pImage->pvImage, pReq->u.In.cbImageWithTabs,
    5092                      pReq->u.In.EP.Service.apvReserved[0],
    5093                      pReq->u.In.EP.Service.apvReserved[1],
    5094                      pReq->u.In.EP.Service.apvReserved[2]));
    5095                 return VERR_INVALID_PARAMETER;
    5096             }
    5097             break;
    5098 
    5099         default:
    5100             supdrvLdrUnlock(pDevExt);
    5101             Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
    5102             return VERR_INVALID_PARAMETER;
    5103     }
    5104 
    5105     rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleInit, true, pReq->u.In.abImage, "pfnModuleInit");
    5106     if (RT_FAILURE(rc))
    5107         return rc;
    5108     rc = supdrvLdrValidatePointer(pDevExt, pImage, pReq->u.In.pfnModuleTerm, true, pReq->u.In.abImage, "pfnModuleTerm");
    5109     if (RT_FAILURE(rc))
    5110         return rc;
    5111 
    5112     /*
    5113      * Allocate and copy the tables.
    5114      * (No need to do try/except as this is a buffered request.)
    5115      */
    5116     pImage->cbStrTab = pReq->u.In.cbStrTab;
    5117     if (pImage->cbStrTab)
    5118     {
    5119         pImage->pachStrTab = (char *)RTMemAlloc(pImage->cbStrTab);
    5120         if (pImage->pachStrTab)
    5121             memcpy(pImage->pachStrTab, &pReq->u.In.abImage[pReq->u.In.offStrTab], pImage->cbStrTab);
    5122         else
    5123             rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_3;
    5124     }
    5125 
    5126     pImage->cSymbols = pReq->u.In.cSymbols;
    5127     if (RT_SUCCESS(rc) && pImage->cSymbols)
    5128     {
    5129         size_t  cbSymbols = pImage->cSymbols * sizeof(SUPLDRSYM);
    5130         pImage->paSymbols = (PSUPLDRSYM)RTMemAlloc(cbSymbols);
    5131         if (pImage->paSymbols)
    5132             memcpy(pImage->paSymbols, &pReq->u.In.abImage[pReq->u.In.offSymbols], cbSymbols);
    5133         else
    5134             rc = /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_4;
    5135     }
    5136 
    5137     /*
    5138      * Copy the bits / complete native loading.
    5139      */
    5140     if (RT_SUCCESS(rc))
    5141     {
    5142         pImage->uState = SUP_IOCTL_LDR_LOAD;
    5143         pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
    5144         pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
    5145 
    5146         if (pImage->fNative)
    5147             rc = supdrvOSLdrLoad(pDevExt, pImage, pReq->u.In.abImage, pReq);
    5148         else
    5149         {
    5150             memcpy(pImage->pvImage, &pReq->u.In.abImage[0], pImage->cbImageBits);
    5151             Log(("vboxdrv: Loaded '%s' at %p\n", pImage->szName, pImage->pvImage));
    5152         }
    5153     }
    5154 
    5155     /*
    5156      * Update any entry points.
    5157      */
    5158     if (RT_SUCCESS(rc))
    5159     {
    5160         switch (pReq->u.In.eEPType)
    5161         {
    5162             default:
    5163             case SUPLDRLOADEP_NOTHING:
    5164                 rc = VINF_SUCCESS;
    5165                 break;
    5166             case SUPLDRLOADEP_VMMR0:
    5167                 rc = supdrvLdrSetVMMR0EPs(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
    5168                                           pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
    5169                 break;
    5170             case SUPLDRLOADEP_SERVICE:
    5171                 pImage->pfnServiceReqHandler = pReq->u.In.EP.Service.pfnServiceReq;
    5172                 rc = VINF_SUCCESS;
    5173                 break;
    5174         }
    5175     }
    5176 
    5177     /*
    5178      * On success call the module initialization.
    5179      */
    5180     LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
    5181     if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
    5182     {
    5183         Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
    5184         pDevExt->pLdrInitImage  = pImage;
    5185         pDevExt->hLdrInitThread = RTThreadNativeSelf();
    5186         rc = pImage->pfnModuleInit(pImage);
    5187         pDevExt->pLdrInitImage  = NULL;
    5188         pDevExt->hLdrInitThread = NIL_RTNATIVETHREAD;
    5189         if (RT_FAILURE(rc) && pDevExt->pvVMMR0 == pImage->pvImage)
    5190             supdrvLdrUnsetVMMR0EPs(pDevExt);
    5191     }
    5192     SUPR0Printf("vboxdrv: %p %s\n", pImage->pvImage, pImage->szName);
    5193 
    5194     if (RT_FAILURE(rc))
    5195     {
    5196         /* Inform the tracing component in case ModuleInit registered TPs. */
    5197         supdrvTracerModuleUnloading(pDevExt, pImage);
    5198 
    5199         pImage->uState              = SUP_IOCTL_LDR_OPEN;
    5200         pImage->pfnModuleInit       = NULL;
    5201         pImage->pfnModuleTerm       = NULL;
    5202         pImage->pfnServiceReqHandler= NULL;
    5203         pImage->cbStrTab            = 0;
    5204         RTMemFree(pImage->pachStrTab);
    5205         pImage->pachStrTab          = NULL;
    5206         RTMemFree(pImage->paSymbols);
    5207         pImage->paSymbols           = NULL;
    5208         pImage->cSymbols            = 0;
    5209     }
    5210 
    5211     supdrvLdrUnlock(pDevExt);
    5212     return rc;
    5213 }
    5214 
    5215 
    5216 /**
    5217  * Frees a previously loaded (prep'ed) image.
    5218  *
    5219  * @returns IPRT status code.
    5220  * @param   pDevExt     Device globals.
    5221  * @param   pSession    Session data.
    5222  * @param   pReq        The request.
    5223  */
    5224 static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
    5225 {
    5226     int             rc;
    5227     PSUPDRVLDRUSAGE pUsagePrev;
    5228     PSUPDRVLDRUSAGE pUsage;
    5229     PSUPDRVLDRIMAGE pImage;
    5230     LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
    5231 
    5232     /*
    5233      * Find the ldr image.
    5234      */
    5235     supdrvLdrLock(pDevExt);
    5236     pUsagePrev = NULL;
    5237     pUsage = pSession->pLdrUsage;
    5238     while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
    5239     {
    5240         pUsagePrev = pUsage;
    5241         pUsage = pUsage->pNext;
    5242     }
    5243     if (!pUsage)
    5244     {
    5245         supdrvLdrUnlock(pDevExt);
    5246         Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
    5247         return VERR_INVALID_HANDLE;
    5248     }
    5249 
    5250     /*
    5251      * Check if we can remove anything.
    5252      */
    5253     rc = VINF_SUCCESS;
    5254     pImage = pUsage->pImage;
    5255     if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
    5256     {
    5257         /*
    5258          * Check if there are any objects with destructors in the image, if
    5259          * so leave it for the session cleanup routine so we get a chance to
    5260          * clean things up in the right order and not leave them all dangling.
    5261          */
    5262         RTSpinlockAcquire(pDevExt->Spinlock);
    5263         if (pImage->cUsage <= 1)
    5264         {
    5265             PSUPDRVOBJ pObj;
    5266             for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
    5267                 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
    5268                 {
    5269                     rc = VERR_DANGLING_OBJECTS;
    5270                     break;
    5271                 }
    5272         }
    5273         else
    5274         {
    5275             PSUPDRVUSAGE pGenUsage;
    5276             for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
    5277                 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
    5278                 {
    5279                     rc = VERR_DANGLING_OBJECTS;
    5280                     break;
    5281                 }
    5282         }
    5283         RTSpinlockRelease(pDevExt->Spinlock);
    5284         if (rc == VINF_SUCCESS)
    5285         {
    5286             /* unlink it */
    5287             if (pUsagePrev)
    5288                 pUsagePrev->pNext = pUsage->pNext;
    5289             else
    5290                 pSession->pLdrUsage = pUsage->pNext;
    5291 
    5292             /* free it */
    5293             pUsage->pImage = NULL;
    5294             pUsage->pNext = NULL;
    5295             RTMemFree(pUsage);
    5296 
    5297             /*
    5298              * Dereference the image.
    5299              */
    5300             if (pImage->cUsage <= 1)
    5301                 supdrvLdrFree(pDevExt, pImage);
    5302             else
    5303                 pImage->cUsage--;
    5304         }
    5305         else
    5306         {
    5307             Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
    5308             rc = VINF_SUCCESS; /** @todo BRANCH-2.1: remove this after branching. */
    5309         }
    5310     }
    5311     else
    5312     {
    5313         /*
    5314          * Dereference both image and usage.
    5315          */
    5316         pImage->cUsage--;
    5317         pUsage->cUsage--;
    5318     }
    5319 
    5320     supdrvLdrUnlock(pDevExt);
    5321     return rc;
    5322 }
    5323 
    5324 
    5325 /**
    5326  * Lock down the image loader interface.
    5327  *
    5328  * @returns IPRT status code.
    5329  * @param   pDevExt     Device globals.
    5330  */
    5331 static int supdrvIOCtl_LdrLockDown(PSUPDRVDEVEXT pDevExt)
    5332 {
    5333     LogFlow(("supdrvIOCtl_LdrLockDown:\n"));
    5334 
    5335     supdrvLdrLock(pDevExt);
    5336     if (!pDevExt->fLdrLockedDown)
    5337     {
    5338         pDevExt->fLdrLockedDown = true;
    5339         Log(("supdrvIOCtl_LdrLockDown: Image loader interface locked down\n"));
    5340     }
    5341     supdrvLdrUnlock(pDevExt);
    5342 
    5343     return VINF_SUCCESS;
    5344 }
    5345 
    5346 
    5347 /**
    5348  * Gets the address of a symbol in an open image.
    5349  *
    5350  * @returns IPRT status code.
    5351  * @param   pDevExt     Device globals.
    5352  * @param   pSession    Session data.
    5353  * @param   pReq        The request buffer.
    5354  */
    5355 static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
    5356 {
    5357     PSUPDRVLDRIMAGE pImage;
    5358     PSUPDRVLDRUSAGE pUsage;
    5359     uint32_t        i;
    5360     PSUPLDRSYM      paSyms;
    5361     const char     *pchStrings;
    5362     const size_t    cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
    5363     void           *pvSymbol = NULL;
    5364     int             rc = VERR_GENERAL_FAILURE;
    5365     Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
    5366 
    5367     /*
    5368      * Find the ldr image.
    5369      */
    5370     supdrvLdrLock(pDevExt);
    5371     pUsage = pSession->pLdrUsage;
    5372     while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
    5373         pUsage = pUsage->pNext;
    5374     if (!pUsage)
    5375     {
    5376         supdrvLdrUnlock(pDevExt);
    5377         Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
    5378         return VERR_INVALID_HANDLE;
    5379     }
    5380     pImage = pUsage->pImage;
    5381     if (pImage->uState != SUP_IOCTL_LDR_LOAD)
    5382     {
    5383         unsigned uState = pImage->uState;
    5384         supdrvLdrUnlock(pDevExt);
    5385         Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
    5386         return VERR_ALREADY_LOADED;
    5387     }
    5388 
    5389     /*
    5390      * Search the symbol strings.
    5391      *
    5392      * Note! The int32_t is for native loading on solaris where the data
    5393      *       and text segments are in very different places.
    5394      */
    5395     pchStrings = pImage->pachStrTab;
    5396     paSyms     = pImage->paSymbols;
    5397     for (i = 0; i < pImage->cSymbols; i++)
    5398     {
    5399         if (    paSyms[i].offName + cbSymbol <= pImage->cbStrTab
    5400             &&  !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
    5401         {
    5402             pvSymbol = (uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol;
    5403             rc = VINF_SUCCESS;
    5404             break;
    5405         }
    5406     }
    5407     supdrvLdrUnlock(pDevExt);
    5408     pReq->u.Out.pvSymbol = pvSymbol;
    5409     return rc;
    5410 }
    5411 
    5412 
    5413 /**
    5414  * Gets the address of a symbol in an open image or the support driver.
    5415  *
    5416  * @returns VINF_SUCCESS on success.
    5417  * @returns
    5418  * @param   pDevExt     Device globals.
    5419  * @param   pSession    Session data.
    5420  * @param   pReq        The request buffer.
    5421  */
    5422 static int supdrvIDC_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQGETSYM pReq)
    5423 {
    5424     int             rc = VINF_SUCCESS;
    5425     const char     *pszSymbol = pReq->u.In.pszSymbol;
    5426     const char     *pszModule = pReq->u.In.pszModule;
    5427     size_t          cbSymbol;
    5428     char const     *pszEnd;
    5429     uint32_t        i;
    5430 
    5431     /*
    5432      * Input validation.
    5433      */
    5434     AssertPtrReturn(pszSymbol, VERR_INVALID_POINTER);
    5435     pszEnd = RTStrEnd(pszSymbol, 512);
    5436     AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
    5437     cbSymbol = pszEnd - pszSymbol + 1;
    5438 
    5439     if (pszModule)
    5440     {
    5441         AssertPtrReturn(pszModule, VERR_INVALID_POINTER);
    5442         pszEnd = RTStrEnd(pszModule, 64);
    5443         AssertReturn(pszEnd, VERR_INVALID_PARAMETER);
    5444     }
    5445     Log3(("supdrvIDC_LdrGetSymbol: pszModule=%p:{%s} pszSymbol=%p:{%s}\n", pszModule, pszModule, pszSymbol, pszSymbol));
    5446 
    5447 
    5448     if (    !pszModule
    5449         ||  !strcmp(pszModule, "SupDrv"))
    5450     {
    5451         /*
    5452          * Search the support driver export table.
    5453          */
    5454         for (i = 0; i < RT_ELEMENTS(g_aFunctions); i++)
    5455             if (!strcmp(g_aFunctions[i].szName, pszSymbol))
    5456             {
    5457                 pReq->u.Out.pfnSymbol = g_aFunctions[i].pfn;
    5458                 break;
    5459             }
    5460     }
    5461     else
    5462     {
    5463         /*
    5464          * Find the loader image.
    5465          */
    5466         PSUPDRVLDRIMAGE pImage;
    5467 
    5468         supdrvLdrLock(pDevExt);
    5469 
    5470         for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
    5471             if (!strcmp(pImage->szName, pszModule))
    5472                 break;
    5473         if (pImage && pImage->uState == SUP_IOCTL_LDR_LOAD)
    5474         {
    5475             /*
    5476              * Search the symbol strings.
    5477              */
    5478             const char *pchStrings = pImage->pachStrTab;
    5479             PCSUPLDRSYM paSyms     = pImage->paSymbols;
    5480             for (i = 0; i < pImage->cSymbols; i++)
    5481             {
    5482                 if (    paSyms[i].offName + cbSymbol <= pImage->cbStrTab
    5483                     &&  !memcmp(pchStrings + paSyms[i].offName, pszSymbol, cbSymbol))
    5484                 {
    5485                     /*
    5486                      * Found it! Calc the symbol address and add a reference to the module.
    5487                      */
    5488                     pReq->u.Out.pfnSymbol = (PFNRT)((uint8_t *)pImage->pvImage + (int32_t)paSyms[i].offSymbol);
    5489                     rc = supdrvLdrAddUsage(pSession, pImage);
    5490                     break;
    5491                 }
    5492             }
    5493         }
    5494         else
    5495             rc = pImage ? VERR_WRONG_ORDER : VERR_MODULE_NOT_FOUND;
    5496 
    5497         supdrvLdrUnlock(pDevExt);
    5498     }
    5499     return rc;
    5500 }
    5501 
    5502 
    5503 /**
    5504  * Updates the VMMR0 entry point pointers.
    5505  *
    5506  * @returns IPRT status code.
    5507  * @param   pDevExt             Device globals.
    5508  * @param   pSession            Session data.
    5509  * @param   pVMMR0              VMMR0 image handle.
    5510  * @param   pvVMMR0EntryInt     VMMR0EntryInt address.
    5511  * @param   pvVMMR0EntryFast    VMMR0EntryFast address.
    5512  * @param   pvVMMR0EntryEx      VMMR0EntryEx address.
    5513  * @remark  Caller must own the loader mutex.
    5514  */
    5515 static int supdrvLdrSetVMMR0EPs(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
    5516 {
    5517     int rc = VINF_SUCCESS;
    5518     LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
    5519 
    5520 
    5521     /*
    5522      * Check if not yet set.
    5523      */
    5524     if (!pDevExt->pvVMMR0)
    5525     {
    5526         pDevExt->pvVMMR0            = pvVMMR0;
    5527         pDevExt->pfnVMMR0EntryInt   = pvVMMR0EntryInt;
    5528         pDevExt->pfnVMMR0EntryFast  = pvVMMR0EntryFast;
    5529         pDevExt->pfnVMMR0EntryEx    = pvVMMR0EntryEx;
    5530     }
    5531     else
    5532     {
    5533         /*
    5534          * Return failure or success depending on whether the values match or not.
    5535          */
    5536         if (    pDevExt->pvVMMR0 != pvVMMR0
    5537             ||  (void *)pDevExt->pfnVMMR0EntryInt   != pvVMMR0EntryInt
    5538             ||  (void *)pDevExt->pfnVMMR0EntryFast  != pvVMMR0EntryFast
    5539             ||  (void *)pDevExt->pfnVMMR0EntryEx    != pvVMMR0EntryEx)
    5540         {
    5541             AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
    5542             rc = VERR_INVALID_PARAMETER;
    5543         }
    5544     }
    5545     return rc;
    5546 }
    5547 
    5548 
    5549 /**
    5550  * Unsets the VMMR0 entry point installed by supdrvLdrSetR0EP.
    5551  *
    5552  * @param   pDevExt     Device globals.
    5553  */
    5554 static void supdrvLdrUnsetVMMR0EPs(PSUPDRVDEVEXT pDevExt)
    5555 {
    5556     pDevExt->pvVMMR0            = NULL;
    5557     pDevExt->pfnVMMR0EntryInt   = NULL;
    5558     pDevExt->pfnVMMR0EntryFast  = NULL;
    5559     pDevExt->pfnVMMR0EntryEx    = NULL;
    5560 }
    5561 
    5562 
    5563 /**
    5564  * Adds a usage reference in the specified session of an image.
    5565  *
    5566  * Called while owning the loader semaphore.
    5567  *
    5568  * @returns VINF_SUCCESS on success and VERR_NO_MEMORY on failure.
    5569  * @param   pSession    Session in question.
    5570  * @param   pImage      Image which the session is using.
    5571  */
    5572 static int supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
    5573 {
    5574     PSUPDRVLDRUSAGE pUsage;
    5575     LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
    5576 
    5577     /*
    5578      * Referenced it already?
    5579      */
    5580     pUsage = pSession->pLdrUsage;
    5581     while (pUsage)
    5582     {
    5583         if (pUsage->pImage == pImage)
    5584         {
    5585             pUsage->cUsage++;
    5586             return VINF_SUCCESS;
    5587         }
    5588         pUsage = pUsage->pNext;
    5589     }
    5590 
    5591     /*
    5592      * Allocate new usage record.
    5593      */
    5594     pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
    5595     AssertReturn(pUsage, /*VERR_NO_MEMORY*/ VERR_INTERNAL_ERROR_5);
    5596     pUsage->cUsage = 1;
    5597     pUsage->pImage = pImage;
    5598     pUsage->pNext  = pSession->pLdrUsage;
    5599     pSession->pLdrUsage = pUsage;
    5600     return VINF_SUCCESS;
    5601 }
    5602 
    5603 
    5604 /**
    5605  * Frees a load image.
    5606  *
    5607  * @param   pDevExt     Pointer to device extension.
    5608  * @param   pImage      Pointer to the image we're gonna free.
    5609  *                      This image must exit!
    5610  * @remark  The caller MUST own SUPDRVDEVEXT::mtxLdr!
    5611  */
    5612 static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
    5613 {
    5614     PSUPDRVLDRIMAGE pImagePrev;
    5615     LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
    5616 
    5617     /*
    5618      * Warn if we're releasing images while the image loader interface is
    5619      * locked down -- we won't be able to reload them!
    5620      */
    5621     if (pDevExt->fLdrLockedDown)
    5622         Log(("supdrvLdrFree: Warning: unloading '%s' image, while loader interface is locked down!\n", pImage->szName));
    5623 
    5624     /* find it - arg. should've used doubly linked list. */
    5625     Assert(pDevExt->pLdrImages);
    5626     pImagePrev = NULL;
    5627     if (pDevExt->pLdrImages != pImage)
    5628     {
    5629         pImagePrev = pDevExt->pLdrImages;
    5630         while (pImagePrev->pNext != pImage)
    5631             pImagePrev = pImagePrev->pNext;
    5632         Assert(pImagePrev->pNext == pImage);
    5633     }
    5634 
    5635     /* unlink */
    5636     if (pImagePrev)
    5637         pImagePrev->pNext = pImage->pNext;
    5638     else
    5639         pDevExt->pLdrImages = pImage->pNext;
    5640 
    5641     /* check if this is VMMR0.r0 unset its entry point pointers. */
    5642     if (pDevExt->pvVMMR0 == pImage->pvImage)
    5643         supdrvLdrUnsetVMMR0EPs(pDevExt);
    5644 
    5645     /* check for objects with destructors in this image. (Shouldn't happen.) */
    5646     if (pDevExt->pObjs)
    5647     {
    5648         unsigned        cObjs = 0;
    5649         PSUPDRVOBJ      pObj;
    5650         RTSpinlockAcquire(pDevExt->Spinlock);
    5651         for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
    5652             if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImageBits))
    5653             {
    5654                 pObj->pfnDestructor = NULL;
    5655                 cObjs++;
    5656             }
    5657         RTSpinlockRelease(pDevExt->Spinlock);
    5658         if (cObjs)
    5659             OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
    5660     }
    5661 
    5662     /* call termination function if fully loaded. */
    5663     if (    pImage->pfnModuleTerm
    5664         &&  pImage->uState == SUP_IOCTL_LDR_LOAD)
    5665     {
    5666         LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
    5667         pImage->pfnModuleTerm(pImage);
    5668     }
    5669 
    5670     /* Inform the tracing component. */
    5671     supdrvTracerModuleUnloading(pDevExt, pImage);
    5672 
    5673     /* do native unload if appropriate. */
    5674     if (pImage->fNative)
    5675         supdrvOSLdrUnload(pDevExt, pImage);
    5676 
    5677     /* free the image */
    5678     pImage->cUsage  = 0;
    5679     pImage->pDevExt = NULL;
    5680     pImage->pNext   = NULL;
    5681     pImage->uState  = SUP_IOCTL_LDR_FREE;
    5682     RTMemExecFree(pImage->pvImageAlloc, pImage->cbImageBits + 31);
    5683     pImage->pvImageAlloc = NULL;
    5684     RTMemFree(pImage->pachStrTab);
    5685     pImage->pachStrTab = NULL;
    5686     RTMemFree(pImage->paSymbols);
    5687     pImage->paSymbols = NULL;
    5688     RTMemFree(pImage);
    5689 }
    5690 
    5691 
    5692 /**
    5693  * Acquires the loader lock.
    5694  *
    5695  * @returns IPRT status code.
    5696  * @param   pDevExt         The device extension.
    5697  */
    5698 DECLINLINE(int) supdrvLdrLock(PSUPDRVDEVEXT pDevExt)
    5699 {
    5700 #ifdef SUPDRV_USE_MUTEX_FOR_LDR
    5701     int rc = RTSemMutexRequest(pDevExt->mtxLdr, RT_INDEFINITE_WAIT);
    5702 #else
    5703     int rc = RTSemFastMutexRequest(pDevExt->mtxLdr);
    5704 #endif
    5705     AssertRC(rc);
    5706     return rc;
    5707 }
    5708 
    5709 
    5710 /**
    5711  * Releases the loader lock.
    5712  *
    5713  * @returns IPRT status code.
    5714  * @param   pDevExt         The device extension.
    5715  */
    5716 DECLINLINE(int) supdrvLdrUnlock(PSUPDRVDEVEXT pDevExt)
    5717 {
    5718 #ifdef SUPDRV_USE_MUTEX_FOR_LDR
    5719     return RTSemMutexRelease(pDevExt->mtxLdr);
    5720 #else
    5721     return RTSemFastMutexRelease(pDevExt->mtxLdr);
    5722 #endif
    5723 }
    5724 
    5725 
    5726 /**
    5727  * Implements the service call request.
    5728  *
    5729  * @returns VBox status code.
    5730  * @param   pDevExt         The device extension.
    5731  * @param   pSession        The calling session.
    5732  * @param   pReq            The request packet, valid.
    5733  */
    5734 static int supdrvIOCtl_CallServiceModule(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPCALLSERVICE pReq)
    5735 {
    5736 #if !defined(RT_OS_WINDOWS) || defined(RT_ARCH_AMD64) || defined(DEBUG)
    5737     int rc;
    5738 
    5739     /*
    5740      * Find the module first in the module referenced by the calling session.
    5741      */
    5742     rc = supdrvLdrLock(pDevExt);
    5743     if (RT_SUCCESS(rc))
    5744     {
    5745         PFNSUPR0SERVICEREQHANDLER   pfnServiceReqHandler = NULL;
    5746         PSUPDRVLDRUSAGE             pUsage;
    5747 
    5748         for (pUsage = pSession->pLdrUsage; pUsage; pUsage = pUsage->pNext)
    5749             if (    pUsage->pImage->pfnServiceReqHandler
    5750                 &&  !strcmp(pUsage->pImage->szName, pReq->u.In.szName))
    5751             {
    5752                 pfnServiceReqHandler = pUsage->pImage->pfnServiceReqHandler;
    5753                 break;
    5754             }
    5755         supdrvLdrUnlock(pDevExt);
    5756 
    5757         if (pfnServiceReqHandler)
    5758         {
    5759             /*
    5760              * Call it.
    5761              */
    5762             if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_SERVICE_SIZE(0))
    5763                 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, NULL);
    5764             else
    5765                 rc = pfnServiceReqHandler(pSession, pReq->u.In.uOperation, pReq->u.In.u64Arg, (PSUPR0SERVICEREQHDR)&pReq->abReqPkt[0]);
    5766         }
    5767         else
    5768             rc = VERR_SUPDRV_SERVICE_NOT_FOUND;
    5769     }
    5770 
    5771     /* log it */
    5772     if (    RT_FAILURE(rc)
    5773         &&  rc != VERR_INTERRUPTED
    5774         &&  rc != VERR_TIMEOUT)
    5775         Log(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    5776              rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    5777     else
    5778         Log4(("SUP_IOCTL_CALL_SERVICE: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
    5779               rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
    5780     return rc;
    5781 #else  /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
    5782     return VERR_NOT_IMPLEMENTED;
    5783 #endif /* RT_OS_WINDOWS && !RT_ARCH_AMD64 && !DEBUG */
    5784 }
    5785 
    5786 
    5787 /**
    5788  * Implements the logger settings request.
    5789  *
    5790  * @returns VBox status code.
    5791  * @param   pDevExt     The device extension.
    5792  * @param   pSession    The caller's session.
    5793  * @param   pReq        The request.
    5794  */
    5795 static int supdrvIOCtl_LoggerSettings(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLOGGERSETTINGS pReq)
    5796 {
    5797     const char *pszGroup = &pReq->u.In.szStrings[pReq->u.In.offGroups];
    5798     const char *pszFlags = &pReq->u.In.szStrings[pReq->u.In.offFlags];
    5799     const char *pszDest  = &pReq->u.In.szStrings[pReq->u.In.offDestination];
    5800     PRTLOGGER   pLogger  = NULL;
    5801     int         rc;
    5802 
    5803     /*
    5804      * Some further validation.
    5805      */
    5806     switch (pReq->u.In.fWhat)
    5807     {
    5808         case SUPLOGGERSETTINGS_WHAT_SETTINGS:
    5809         case SUPLOGGERSETTINGS_WHAT_CREATE:
    5810             break;
    5811 
    5812         case SUPLOGGERSETTINGS_WHAT_DESTROY:
    5813             if (*pszGroup || *pszFlags || *pszDest)
    5814                 return VERR_INVALID_PARAMETER;
    5815             if (pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_RELEASE)
    5816                 return VERR_ACCESS_DENIED;
    5817             break;
    5818 
    5819         default:
    5820             return VERR_INTERNAL_ERROR;
    5821     }
    5822 
    5823     /*
    5824      * Get the logger.
    5825      */
    5826     switch (pReq->u.In.fWhich)
    5827     {
    5828         case SUPLOGGERSETTINGS_WHICH_DEBUG:
    5829             pLogger = RTLogGetDefaultInstance();
    5830             break;
    5831 
    5832         case SUPLOGGERSETTINGS_WHICH_RELEASE:
    5833             pLogger = RTLogRelDefaultInstance();
    5834             break;
    5835 
    5836         default:
    5837             return VERR_INTERNAL_ERROR;
    5838     }
    5839 
    5840     /*
    5841      * Do the job.
    5842      */
    5843     switch (pReq->u.In.fWhat)
    5844     {
    5845         case SUPLOGGERSETTINGS_WHAT_SETTINGS:
    5846             if (pLogger)
    5847             {
    5848                 rc = RTLogFlags(pLogger, pszFlags);
    5849                 if (RT_SUCCESS(rc))
    5850                     rc = RTLogGroupSettings(pLogger, pszGroup);
    5851                 NOREF(pszDest);
    5852             }
    5853             else
    5854                 rc = VERR_NOT_FOUND;
    5855             break;
    5856 
    5857         case SUPLOGGERSETTINGS_WHAT_CREATE:
    5858         {
    5859             if (pLogger)
    5860                 rc = VERR_ALREADY_EXISTS;
    5861             else
    5862             {
    5863                 static const char * const s_apszGroups[] = VBOX_LOGGROUP_NAMES;
    5864 
    5865                 rc = RTLogCreate(&pLogger,
    5866                                  0 /* fFlags */,
    5867                                  pszGroup,
    5868                                  pReq->u.In.fWhich == SUPLOGGERSETTINGS_WHICH_DEBUG
    5869                                  ? "VBOX_LOG"
    5870                                  : "VBOX_RELEASE_LOG",
    5871                                  RT_ELEMENTS(s_apszGroups),
    5872                                  s_apszGroups,
    5873                                  RTLOGDEST_STDOUT | RTLOGDEST_DEBUGGER,
    5874                                  NULL);
    5875                 if (RT_SUCCESS(rc))
    5876                 {
    5877                     rc = RTLogFlags(pLogger, pszFlags);
    5878                     NOREF(pszDest);
    5879                     if (RT_SUCCESS(rc))
    5880                     {
    5881                         switch (pReq->u.In.fWhich)
    5882                         {
    5883                             case SUPLOGGERSETTINGS_WHICH_DEBUG:
    5884                                 pLogger = RTLogSetDefaultInstance(pLogger);
    5885                                 break;
    5886                             case SUPLOGGERSETTINGS_WHICH_RELEASE:
    5887                                 pLogger = RTLogRelSetDefaultInstance(pLogger);
    5888                                 break;
    5889                         }
    5890                     }
    5891                     RTLogDestroy(pLogger);
    5892                 }
    5893             }
    5894             break;
    5895         }
    5896 
    5897         case SUPLOGGERSETTINGS_WHAT_DESTROY:
    5898             switch (pReq->u.In.fWhich)
    5899             {
    5900                 case SUPLOGGERSETTINGS_WHICH_DEBUG:
    5901                     pLogger = RTLogSetDefaultInstance(NULL);
    5902                     break;
    5903                 case SUPLOGGERSETTINGS_WHICH_RELEASE:
    5904                     pLogger = RTLogRelSetDefaultInstance(NULL);
    5905                     break;
    5906             }
    5907             rc = RTLogDestroy(pLogger);
    5908             break;
    5909 
    5910         default:
    5911         {
    5912             rc = VERR_INTERNAL_ERROR;
    5913             break;
    5914         }
    5915     }
    5916 
    5917     return rc;
    5918 }
    5919 
    5920 
    5921 /**
    5922  * Implements the MSR prober operations.
    5923  *
    5924  * @returns VBox status code.
    5925  * @param   pDevExt     The device extension.
    5926  * @param   pReq        The request.
    5927  */
    5928 static int supdrvIOCtl_MsrProber(PSUPDRVDEVEXT pDevExt, PSUPMSRPROBER pReq)
    5929 {
    5930 #ifdef SUPDRV_WITH_MSR_PROBER
    5931     RTCPUID const idCpu = pReq->u.In.idCpu == UINT32_MAX ? NIL_RTCPUID : pReq->u.In.idCpu;
    5932     int rc;
    5933 
    5934     switch (pReq->u.In.enmOp)
    5935     {
    5936         case SUPMSRPROBEROP_READ:
    5937         {
    5938             uint64_t uValue;
    5939             rc = supdrvOSMsrProberRead(pReq->u.In.uMsr, idCpu, &uValue);
    5940             if (RT_SUCCESS(rc))
    5941             {
    5942                 pReq->u.Out.uResults.Read.uValue = uValue;
    5943                 pReq->u.Out.uResults.Read.fGp    = false;
    5944             }
    5945             else if (rc == VERR_ACCESS_DENIED)
    5946             {
    5947                 pReq->u.Out.uResults.Read.uValue = 0;
    5948                 pReq->u.Out.uResults.Read.fGp    = true;
    5949                 rc  = VINF_SUCCESS;
    5950             }
    5951             break;
    5952         }
    5953 
    5954         case SUPMSRPROBEROP_WRITE:
    5955             rc = supdrvOSMsrProberWrite(pReq->u.In.uMsr, idCpu, pReq->u.In.uArgs.Write.uToWrite);
    5956             if (RT_SUCCESS(rc))
    5957                 pReq->u.Out.uResults.Write.fGp   = false;
    5958             else if (rc == VERR_ACCESS_DENIED)
    5959             {
    5960                 pReq->u.Out.uResults.Write.fGp   = true;
    5961                 rc  = VINF_SUCCESS;
    5962             }
    5963             break;
    5964 
    5965         case SUPMSRPROBEROP_MODIFY:
    5966         case SUPMSRPROBEROP_MODIFY_FASTER:
    5967             rc = supdrvOSMsrProberModify(idCpu, pReq);
    5968             break;
    5969 
    5970         default:
    5971             return VERR_INVALID_FUNCTION;
    5972     }
    5973     return rc;
    5974 #else
    5975     return VERR_NOT_IMPLEMENTED;
    5976 #endif
    5977 }
    5978 
    5979651#ifdef SUPDRV_USE_TSC_DELTA_THREAD
    5980652
     
    6099771                        PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
    6100772                        if (   pGipCpuWorker->i64TSCDelta == INT64_MAX
    6101                             && RTCpuSetIsMember(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
     773                            && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
    6102774                        {
    6103775                            rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
     
    66781350 * @param   pDevExt     Instance data. GIP stuff may be updated.
    66791351 */
    6680 static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
     1352int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
    66811353{
    66821354    PSUPGLOBALINFOPAGE  pGip;
     
    68551527 * @param   pDevExt     Instance data. GIP stuff may be updated.
    68561528 */
    6857 static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
     1529void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
    68581530{
    68591531    int rc;
     
    75262198 *
    75272199 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
    7528  *     read the TSC at exactly the same time on both the master and the worker
    7529  *     CPUs. Due to DMA, bus arbitration, cache locality, contention, SMI,
    7530  *     pipelining etc. there is no guaranteed way of doing this on x86 CPUs. We
    7531  *     try to minimize the measurement error by computing the minimum read time
    7532  *     of the compare statement in the worker by taking TSC measurements across
    7533  *     it.
    7534  *
    7535  *     We ignore the first few runs of the loop in order to prime the cache.
    7536  *     Also, be careful about using 'pause' instruction in critical busy-wait
    7537  *     loops in this code - it can cause undesired behaviour with
    7538  *     hyperthreading.
    7539  *
    7540  *     It must be noted that the computed minimum read time is mostly to
    7541  *     eliminate huge deltas when the worker is too early and doesn't by itself
    7542  *     help produce more accurate deltas. We allow two times the computed
    7543  *     minimum as an arbibtrary acceptable threshold. Therefore, it is still
    7544  *     possible to get negative deltas where there are none when the worker is
    7545  *     earlier. As long as these occasional negative deltas are lower than the
    7546  *     time it takes to exit guest-context and the OS to reschedule EMT on a
    7547  *     different CPU we won't expose a TSC that jumped backwards. It is because
    7548  *     of the existence of the negative deltas we don't recompute the delta with
    7549  *     the master and worker interchanged to eliminate the remaining measurement
    7550  *     error.
     2200 *          read the TSC at exactly the same time on both the master and the
     2201 *          worker CPUs. Due to DMA, bus arbitration, cache locality,
     2202 *          contention, SMI, pipelining etc. there is no guaranteed way of
     2203 *          doing this on x86 CPUs.
     2204 *
     2205 *          GIP_TSC_DELTA_METHOD_1:
     2206 *          We ignore the first few runs of the loop in order to prime the
     2207 *          cache. Also, we need to be careful about using 'pause' instruction
     2208 *          in critical busy-wait loops in this code - it can cause undesired
     2209 *          behaviour with hyperthreading.
     2210 *
     2211 *          We try to minimize the measurement error by computing the minimum
     2212 *          read time of the compare statement in the worker by taking TSC
     2213 *          measurements across it.
     2214 *
     2215 *          It must be noted that the computed minimum read time is mostly to
     2216 *          eliminate huge deltas when the worker is too early and doesn't by
     2217 *          itself help produce more accurate deltas. We allow two times the
     2218 *          computed minimum as an arbibtrary acceptable threshold. Therefore,
     2219 *          it is still possible to get negative deltas where there are none
     2220 *          when the worker is earlier. As long as these occasional negative
     2221 *          deltas are lower than the time it takes to exit guest-context and
     2222 *          the OS to reschedule EMT on a different CPU we won't expose a TSC
     2223 *          that jumped backwards. It is because of the existence of the
     2224 *          negative deltas we don't recompute the delta with the master and
     2225 *          worker interchanged to eliminate the remaining measurement error.
     2226 *
     2227 *          For GIP_TSC_DELTA_METHOD_2, see supdrvTscDeltaMethod2CollectData.
    75512228 */
    75522229static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
     
    75542231    PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)pvUser1;
    75552232    PSUPDRVDEVEXT      pDevExt          = pArgs->pDevExt;
    7556     PSUPGLOBALINFOPAGE pGip             = pDevExt->pGip;
    75572233    PSUPGIPCPU         pGipCpuWorker    = pArgs->pWorker;
    75582234    PSUPGIPCPU         pGipCpuMaster    = pArgs->pMaster;
     
    78182494 *                          CPUs.
    78192495 *
    7820  * @remarks This can be called with preemption disabled!
     2496 * @remarks This must be called with preemption enabled!
    78212497 */
    78222498static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
     
    78322508    AssertReturn(pGip, VERR_INVALID_PARAMETER);
    78332509    Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
     2510    Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
    78342511
    78352512    /*
     
    78852562         * being executed and that can take a good while to be done.
    78862563         */
    7887         RTThreadSleep(1); /** @todo r=bird: This won't work with preemption disabled, not on real OSes anyway. */
     2564        RTThreadSleep(1);
    78882565    }
    78892566
     
    86773354
    86783355/**
    8679  * Resume built-in keyboard on MacBook Air and Pro hosts.
    8680  * If there is no built-in keyboard device, return success anyway.
    8681  *
    8682  * @returns 0 on Mac OS X platform, VERR_NOT_IMPLEMENTED on the other ones.
    8683  */
    8684 static int supdrvIOCtl_ResumeSuspendedKbds(void)
    8685 {
    8686 #if defined(RT_OS_DARWIN)
    8687     return supdrvDarwinResumeSuspendedKbds();
    8688 #else
    8689     return VERR_NOT_IMPLEMENTED;
    8690 #endif
    8691 }
    8692 
    8693 
    8694 /**
    86953356 * Service a TSC-delta measurement request.
    86963357 *
     
    87003361 * @param   pReq            Pointer to the TSC-delta measurement request.
    87013362 */
    8702 static int supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
     3363int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
    87033364{
    87043365    PSUPGLOBALINFOPAGE pGip;
     
    88153476 * @param   pReq            Pointer to the TSC-read request.
    88163477 */
    8817 static int supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
     3478int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
    88183479{
    88193480    PSUPGLOBALINFOPAGE pGip;
  • trunk/src/VBox/HostDrivers/Support/SUPDrvInternal.h

    r54315 r54327  
    942942uint32_t VBOXCALL supdrvSessionRelease(PSUPDRVSESSION pSession);
    943943
     944/* SUPDrvGip.cpp */
     945int  VBOXCALL   supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
     946void VBOXCALL   supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
     947int  VBOXCALL   supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq);
     948int  VBOXCALL   supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq);
     949
     950/* SUPDrvTracer.cpp */
    944951int  VBOXCALL   supdrvTracerInit(PSUPDRVDEVEXT pDevExt);
    945952void VBOXCALL   supdrvTracerTerm(PSUPDRVDEVEXT pDevExt);
  • trunk/src/VBox/HostDrivers/Support/freebsd/Makefile

    r54224 r54327  
    3737SRCS = \
    3838        SUPDrv.c \
     39        SUPDrvGip.c \
    3940        SUPDrvSem.c \
    4041        SUPDrvTracer.c \
  • trunk/src/VBox/HostDrivers/Support/freebsd/files_vboxdrv

    r54224 r54327  
    8787    ${PATH_ROOT}/src/VBox/HostDrivers/Support/freebsd/SUPDrv-freebsd.c=>freebsd/SUPDrv-freebsd.c \
    8888    ${PATH_ROOT}/src/VBox/HostDrivers/Support/SUPDrv.c=>SUPDrv.c \
     89    ${PATH_ROOT}/src/VBox/HostDrivers/Support/SUPDrvGip.cpp=>SUPDrvGip.c \
    8990    ${PATH_ROOT}/src/VBox/HostDrivers/Support/SUPDrvSem.c=>SUPDrvSem.c \
    9091    ${PATH_ROOT}/src/VBox/HostDrivers/Support/SUPDrvTracer.cpp=>SUPDrvTracer.c \
  • trunk/src/VBox/HostDrivers/Support/linux/Makefile

    r54322 r54327  
    7373        linux/SUPDrv-linux.o \
    7474        SUPDrv.o \
     75        SUPDrvGip.o \
    7576        SUPDrvSem.o \
    7677        SUPDrvTracer.o \
  • trunk/src/VBox/HostDrivers/Support/linux/files_vboxdrv

    r54224 r54327  
    8181    ${PATH_ROOT}/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c=>linux/SUPDrv-linux.c \
    8282    ${PATH_ROOT}/src/VBox/HostDrivers/Support/SUPDrv.c=>SUPDrv.c \
     83    ${PATH_ROOT}/src/VBox/HostDrivers/Support/SUPDrvGip.cpp=>SUPDrvGip.c \
    8384    ${PATH_ROOT}/src/VBox/HostDrivers/Support/SUPDrvSem.c=>SUPDrvSem.c \
    8485    ${PATH_ROOT}/src/VBox/HostDrivers/Support/SUPDrvTracer.cpp=>SUPDrvTracer.c \
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette