VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp@ 92119

Last change on this file since 92119 was 91958, checked in by vboxsync, 3 years ago

VMM/NEM/win: No mapping limits when in simplified memory management mode. bugref:10122

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 150.8 KB
Line 
1/* $Id: NEMR3Native-win.cpp 91958 2021-10-21 14:34:49Z vboxsync $ */
2/** @file
3 * NEM - Native execution manager, native ring-3 Windows backend.
4 *
5 * Log group 2: Exit logging.
6 * Log group 3: Log context on exit.
7 * Log group 5: Ring-3 memory management
8 * Log group 6: Ring-0 memory management
9 * Log group 12: API intercepts.
10 */
11
12/*
13 * Copyright (C) 2018-2020 Oracle Corporation
14 *
15 * This file is part of VirtualBox Open Source Edition (OSE), as
16 * available from http://www.virtualbox.org. This file is free software;
17 * you can redistribute it and/or modify it under the terms of the GNU
18 * General Public License (GPL) as published by the Free Software
19 * Foundation, in version 2 as it comes in the "COPYING" file of the
20 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
21 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
22 */
23
24
25/*********************************************************************************************************************************
26* Header Files *
27*********************************************************************************************************************************/
28#define LOG_GROUP LOG_GROUP_NEM
29#define VMCPU_INCL_CPUM_GST_CTX
30#include <iprt/nt/nt-and-windows.h>
31#include <iprt/nt/hyperv.h>
32#include <iprt/nt/vid.h>
33#include <WinHvPlatform.h>
34
35#ifndef _WIN32_WINNT_WIN10
36# error "Missing _WIN32_WINNT_WIN10"
37#endif
38#ifndef _WIN32_WINNT_WIN10_RS1 /* Missing define, causing trouble for us. */
39# define _WIN32_WINNT_WIN10_RS1 (_WIN32_WINNT_WIN10 + 1)
40#endif
41#include <sysinfoapi.h>
42#include <debugapi.h>
43#include <errhandlingapi.h>
44#include <fileapi.h>
45#include <winerror.h> /* no api header for this. */
46
47#include <VBox/vmm/nem.h>
48#include <VBox/vmm/iem.h>
49#include <VBox/vmm/em.h>
50#include <VBox/vmm/apic.h>
51#include <VBox/vmm/pdm.h>
52#include <VBox/vmm/dbgftrace.h>
53#include "NEMInternal.h"
54#include <VBox/vmm/vmcc.h>
55
56#include <iprt/ldr.h>
57#include <iprt/path.h>
58#include <iprt/string.h>
59#include <iprt/system.h>
60#include <iprt/utf16.h>
61
62
63/*********************************************************************************************************************************
64* Defined Constants And Macros *
65*********************************************************************************************************************************/
66#ifdef LOG_ENABLED
67# define NEM_WIN_INTERCEPT_NT_IO_CTLS
68#endif
69
70/** VID I/O control detection: Fake partition handle input. */
71#define NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE ((HANDLE)(uintptr_t)38479125)
72/** VID I/O control detection: Fake partition ID return. */
73#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID UINT64_C(0xfa1e000042424242)
74/** VID I/O control detection: The property we get via VidGetPartitionProperty. */
75#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE HvPartitionPropertyProcessorVendor
76/** VID I/O control detection: Fake property value return. */
77#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE UINT64_C(0xf00dface01020304)
78/** VID I/O control detection: Fake CPU index input. */
79#define NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX UINT32_C(42)
80/** VID I/O control detection: Fake timeout input. */
81#define NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT UINT32_C(0x00080286)
82
83
84/*********************************************************************************************************************************
85* Global Variables *
86*********************************************************************************************************************************/
87/** @name APIs imported from WinHvPlatform.dll
88 * @{ */
89static decltype(WHvGetCapability) * g_pfnWHvGetCapability;
90static decltype(WHvCreatePartition) * g_pfnWHvCreatePartition;
91static decltype(WHvSetupPartition) * g_pfnWHvSetupPartition;
92static decltype(WHvDeletePartition) * g_pfnWHvDeletePartition;
93static decltype(WHvGetPartitionProperty) * g_pfnWHvGetPartitionProperty;
94static decltype(WHvSetPartitionProperty) * g_pfnWHvSetPartitionProperty;
95static decltype(WHvMapGpaRange) * g_pfnWHvMapGpaRange;
96static decltype(WHvUnmapGpaRange) * g_pfnWHvUnmapGpaRange;
97static decltype(WHvTranslateGva) * g_pfnWHvTranslateGva;
98#ifndef NEM_WIN_USE_OUR_OWN_RUN_API
99static decltype(WHvCreateVirtualProcessor) * g_pfnWHvCreateVirtualProcessor;
100static decltype(WHvDeleteVirtualProcessor) * g_pfnWHvDeleteVirtualProcessor;
101static decltype(WHvRunVirtualProcessor) * g_pfnWHvRunVirtualProcessor;
102static decltype(WHvCancelRunVirtualProcessor) * g_pfnWHvCancelRunVirtualProcessor;
103static decltype(WHvGetVirtualProcessorRegisters) * g_pfnWHvGetVirtualProcessorRegisters;
104static decltype(WHvSetVirtualProcessorRegisters) * g_pfnWHvSetVirtualProcessorRegisters;
105#endif
106/** @} */
107
108/** @name APIs imported from Vid.dll
109 * @{ */
110static decltype(VidGetHvPartitionId) *g_pfnVidGetHvPartitionId;
111static decltype(VidGetPartitionProperty) *g_pfnVidGetPartitionProperty;
112static decltype(VidStartVirtualProcessor) *g_pfnVidStartVirtualProcessor;
113static decltype(VidStopVirtualProcessor) *g_pfnVidStopVirtualProcessor;
114static decltype(VidMessageSlotMap) *g_pfnVidMessageSlotMap;
115static decltype(VidMessageSlotHandleAndGetNext) *g_pfnVidMessageSlotHandleAndGetNext;
116#ifdef LOG_ENABLED
117static decltype(VidGetVirtualProcessorState) *g_pfnVidGetVirtualProcessorState;
118static decltype(VidSetVirtualProcessorState) *g_pfnVidSetVirtualProcessorState;
119static decltype(VidGetVirtualProcessorRunningStatus) *g_pfnVidGetVirtualProcessorRunningStatus;
120#endif
121/** @} */
122
123/** The Windows build number. */
124static uint32_t g_uBuildNo = 17134;
125
126
127
128/**
129 * Import instructions.
130 */
131static const struct
132{
133 uint8_t idxDll; /**< 0 for WinHvPlatform.dll, 1 for vid.dll. */
134 bool fOptional; /**< Set if import is optional. */
135 PFNRT *ppfn; /**< The function pointer variable. */
136 const char *pszName; /**< The function name. */
137} g_aImports[] =
138{
139#define NEM_WIN_IMPORT(a_idxDll, a_fOptional, a_Name) { (a_idxDll), (a_fOptional), (PFNRT *)&RT_CONCAT(g_pfn,a_Name), #a_Name }
140 NEM_WIN_IMPORT(0, false, WHvGetCapability),
141 NEM_WIN_IMPORT(0, false, WHvCreatePartition),
142 NEM_WIN_IMPORT(0, false, WHvSetupPartition),
143 NEM_WIN_IMPORT(0, false, WHvDeletePartition),
144 NEM_WIN_IMPORT(0, false, WHvGetPartitionProperty),
145 NEM_WIN_IMPORT(0, false, WHvSetPartitionProperty),
146 NEM_WIN_IMPORT(0, false, WHvMapGpaRange),
147 NEM_WIN_IMPORT(0, false, WHvUnmapGpaRange),
148 NEM_WIN_IMPORT(0, false, WHvTranslateGva),
149#ifndef NEM_WIN_USE_OUR_OWN_RUN_API
150 NEM_WIN_IMPORT(0, false, WHvCreateVirtualProcessor),
151 NEM_WIN_IMPORT(0, false, WHvDeleteVirtualProcessor),
152 NEM_WIN_IMPORT(0, false, WHvRunVirtualProcessor),
153 NEM_WIN_IMPORT(0, false, WHvCancelRunVirtualProcessor),
154 NEM_WIN_IMPORT(0, false, WHvGetVirtualProcessorRegisters),
155 NEM_WIN_IMPORT(0, false, WHvSetVirtualProcessorRegisters),
156#endif
157 NEM_WIN_IMPORT(1, false, VidGetHvPartitionId),
158 NEM_WIN_IMPORT(1, false, VidGetPartitionProperty),
159 NEM_WIN_IMPORT(1, false, VidMessageSlotMap),
160 NEM_WIN_IMPORT(1, false, VidMessageSlotHandleAndGetNext),
161 NEM_WIN_IMPORT(1, false, VidStartVirtualProcessor),
162 NEM_WIN_IMPORT(1, false, VidStopVirtualProcessor),
163#ifdef LOG_ENABLED
164 NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorState),
165 NEM_WIN_IMPORT(1, false, VidSetVirtualProcessorState),
166 NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorRunningStatus),
167#endif
168#undef NEM_WIN_IMPORT
169};
170
171
172/** The real NtDeviceIoControlFile API in NTDLL. */
173static decltype(NtDeviceIoControlFile) *g_pfnNtDeviceIoControlFile;
174/** Pointer to the NtDeviceIoControlFile import table entry. */
175static decltype(NtDeviceIoControlFile) **g_ppfnVidNtDeviceIoControlFile;
176#if defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) || defined(LOG_ENABLED)
177/** Info about the VidGetHvPartitionId I/O control interface. */
178static NEMWINIOCTL g_IoCtlGetHvPartitionId;
179/** Info about the VidGetPartitionProperty I/O control interface. */
180static NEMWINIOCTL g_IoCtlGetPartitionProperty;
181#endif
182#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(LOG_ENABLED)
183/** Info about the VidStartVirtualProcessor I/O control interface. */
184static NEMWINIOCTL g_IoCtlStartVirtualProcessor;
185/** Info about the VidStopVirtualProcessor I/O control interface. */
186static NEMWINIOCTL g_IoCtlStopVirtualProcessor;
187/** Info about the VidMessageSlotHandleAndGetNext I/O control interface. */
188static NEMWINIOCTL g_IoCtlMessageSlotHandleAndGetNext;
189#endif
190#ifdef LOG_ENABLED
191/** Info about the VidMessageSlotMap I/O control interface - for logging. */
192static NEMWINIOCTL g_IoCtlMessageSlotMap;
193/** Info about the VidGetVirtualProcessorState I/O control interface - for logging. */
194static NEMWINIOCTL g_IoCtlGetVirtualProcessorState;
195/** Info about the VidSetVirtualProcessorState I/O control interface - for logging. */
196static NEMWINIOCTL g_IoCtlSetVirtualProcessorState;
197/** Pointer to what nemR3WinIoctlDetector_ForLogging should fill in. */
198static NEMWINIOCTL *g_pIoCtlDetectForLogging;
199#endif
200
201#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
202/** Mapping slot for CPU #0.
203 * @{ */
204static VID_MESSAGE_MAPPING_HEADER *g_pMsgSlotMapping = NULL;
205static const HV_MESSAGE_HEADER *g_pHvMsgHdr;
206static const HV_X64_INTERCEPT_MESSAGE_HEADER *g_pX64MsgHdr;
207/** @} */
208#endif
209
210
211/*
212 * Let the preprocessor alias the APIs to import variables for better autocompletion.
213 */
214#ifndef IN_SLICKEDIT
215# define WHvGetCapability g_pfnWHvGetCapability
216# define WHvCreatePartition g_pfnWHvCreatePartition
217# define WHvSetupPartition g_pfnWHvSetupPartition
218# define WHvDeletePartition g_pfnWHvDeletePartition
219# define WHvGetPartitionProperty g_pfnWHvGetPartitionProperty
220# define WHvSetPartitionProperty g_pfnWHvSetPartitionProperty
221# define WHvMapGpaRange g_pfnWHvMapGpaRange
222# define WHvUnmapGpaRange g_pfnWHvUnmapGpaRange
223# define WHvTranslateGva g_pfnWHvTranslateGva
224# define WHvCreateVirtualProcessor g_pfnWHvCreateVirtualProcessor
225# define WHvDeleteVirtualProcessor g_pfnWHvDeleteVirtualProcessor
226# define WHvRunVirtualProcessor g_pfnWHvRunVirtualProcessor
227# define WHvGetRunExitContextSize g_pfnWHvGetRunExitContextSize
228# define WHvCancelRunVirtualProcessor g_pfnWHvCancelRunVirtualProcessor
229# define WHvGetVirtualProcessorRegisters g_pfnWHvGetVirtualProcessorRegisters
230# define WHvSetVirtualProcessorRegisters g_pfnWHvSetVirtualProcessorRegisters
231
232# define VidMessageSlotHandleAndGetNext g_pfnVidMessageSlotHandleAndGetNext
233# define VidStartVirtualProcessor g_pfnVidStartVirtualProcessor
234# define VidStopVirtualProcessor g_pfnVidStopVirtualProcessor
235
236#endif
237
238/** WHV_MEMORY_ACCESS_TYPE names */
239static const char * const g_apszWHvMemAccesstypes[4] = { "read", "write", "exec", "!undefined!" };
240
241
242/*********************************************************************************************************************************
243* Internal Functions *
244*********************************************************************************************************************************/
245DECLINLINE(int) nemR3NativeGCPhys2R3PtrReadOnly(PVM pVM, RTGCPHYS GCPhys, const void **ppv);
246DECLINLINE(int) nemR3NativeGCPhys2R3PtrWriteable(PVM pVM, RTGCPHYS GCPhys, void **ppv);
247
248/*
249 * Instantate the code we share with ring-0.
250 */
251#ifdef NEM_WIN_USE_OUR_OWN_RUN_API
252# define NEM_WIN_TEMPLATE_MODE_OWN_RUN_API
253#else
254# undef NEM_WIN_TEMPLATE_MODE_OWN_RUN_API
255#endif
256#include "../VMMAll/NEMAllNativeTemplate-win.cpp.h"
257
258
259
260#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
261/**
262 * Wrapper that logs the call from VID.DLL.
263 *
264 * This is very handy for figuring out why an API call fails.
265 */
266static NTSTATUS WINAPI
267nemR3WinLogWrapper_NtDeviceIoControlFile(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
268 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
269 PVOID pvOutput, ULONG cbOutput)
270{
271
272 char szFunction[32];
273 const char *pszFunction;
274 if (uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction)
275 pszFunction = "VidMessageSlotHandleAndGetNext";
276 else if (uFunction == g_IoCtlStartVirtualProcessor.uFunction)
277 pszFunction = "VidStartVirtualProcessor";
278 else if (uFunction == g_IoCtlStopVirtualProcessor.uFunction)
279 pszFunction = "VidStopVirtualProcessor";
280 else if (uFunction == g_IoCtlMessageSlotMap.uFunction)
281 pszFunction = "VidMessageSlotMap";
282 else if (uFunction == g_IoCtlGetVirtualProcessorState.uFunction)
283 pszFunction = "VidGetVirtualProcessorState";
284 else if (uFunction == g_IoCtlSetVirtualProcessorState.uFunction)
285 pszFunction = "VidSetVirtualProcessorState";
286 else
287 {
288 RTStrPrintf(szFunction, sizeof(szFunction), "%#x", uFunction);
289 pszFunction = szFunction;
290 }
291
292 if (cbInput > 0 && pvInput)
293 Log12(("VID!NtDeviceIoControlFile: %s/input: %.*Rhxs\n", pszFunction, RT_MIN(cbInput, 32), pvInput));
294 NTSTATUS rcNt = g_pfnNtDeviceIoControlFile(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, uFunction,
295 pvInput, cbInput, pvOutput, cbOutput);
296 if (!hEvt && !pfnApcCallback && !pvApcCtx)
297 Log12(("VID!NtDeviceIoControlFile: hFile=%#zx pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n",
298 hFile, pIos, pIos->Status, pIos->Information, pszFunction, pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress()));
299 else
300 Log12(("VID!NtDeviceIoControlFile: hFile=%#zx hEvt=%#zx Apc=%p/%p pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n",
301 hFile, hEvt, RT_CB_LOG_CAST(pfnApcCallback), pvApcCtx, pIos, pIos->Status, pIos->Information, pszFunction,
302 pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress()));
303 if (cbOutput > 0 && pvOutput)
304 {
305 Log12(("VID!NtDeviceIoControlFile: %s/output: %.*Rhxs\n", pszFunction, RT_MIN(cbOutput, 32), pvOutput));
306 if (uFunction == 0x2210cc && g_pMsgSlotMapping == NULL && cbOutput >= sizeof(void *))
307 {
308 g_pMsgSlotMapping = *(VID_MESSAGE_MAPPING_HEADER **)pvOutput;
309 g_pHvMsgHdr = (const HV_MESSAGE_HEADER *)(g_pMsgSlotMapping + 1);
310 g_pX64MsgHdr = (const HV_X64_INTERCEPT_MESSAGE_HEADER *)(g_pHvMsgHdr + 1);
311 Log12(("VID!NtDeviceIoControlFile: Message slot mapping: %p\n", g_pMsgSlotMapping));
312 }
313 }
314 if ( g_pMsgSlotMapping
315 && ( uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction
316 || uFunction == g_IoCtlStopVirtualProcessor.uFunction
317 || uFunction == g_IoCtlMessageSlotMap.uFunction
318 ))
319 Log12(("VID!NtDeviceIoControlFile: enmVidMsgType=%#x cb=%#x msg=%#x payload=%u cs:rip=%04x:%08RX64 (%s)\n",
320 g_pMsgSlotMapping->enmVidMsgType, g_pMsgSlotMapping->cbMessage,
321 g_pHvMsgHdr->MessageType, g_pHvMsgHdr->PayloadSize,
322 g_pX64MsgHdr->CsSegment.Selector, g_pX64MsgHdr->Rip, pszFunction));
323
324 return rcNt;
325}
326#endif /* NEM_WIN_INTERCEPT_NT_IO_CTLS */
327
328
329/**
330 * Patches the call table of VID.DLL so we can intercept NtDeviceIoControlFile.
331 *
332 * This is for used to figure out the I/O control codes and in logging builds
333 * for logging API calls that WinHvPlatform.dll does.
334 *
335 * @returns VBox status code.
336 * @param hLdrModVid The VID module handle.
337 * @param pErrInfo Where to return additional error information.
338 */
339static int nemR3WinInitVidIntercepts(RTLDRMOD hLdrModVid, PRTERRINFO pErrInfo)
340{
341 /*
342 * Locate the real API.
343 */
344 g_pfnNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) *)RTLdrGetSystemSymbol("NTDLL.DLL", "NtDeviceIoControlFile");
345 AssertReturn(g_pfnNtDeviceIoControlFile != NULL,
346 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to resolve NtDeviceIoControlFile from NTDLL.DLL"));
347
348 /*
349 * Locate the PE header and get what we need from it.
350 */
351 uint8_t const *pbImage = (uint8_t const *)RTLdrGetNativeHandle(hLdrModVid);
352 IMAGE_DOS_HEADER const *pMzHdr = (IMAGE_DOS_HEADER const *)pbImage;
353 AssertReturn(pMzHdr->e_magic == IMAGE_DOS_SIGNATURE,
354 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL mapping doesn't start with MZ signature: %#x", pMzHdr->e_magic));
355 IMAGE_NT_HEADERS const *pNtHdrs = (IMAGE_NT_HEADERS const *)&pbImage[pMzHdr->e_lfanew];
356 AssertReturn(pNtHdrs->Signature == IMAGE_NT_SIGNATURE,
357 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL has invalid PE signaturre: %#x @%#x",
358 pNtHdrs->Signature, pMzHdr->e_lfanew));
359
360 uint32_t const cbImage = pNtHdrs->OptionalHeader.SizeOfImage;
361 IMAGE_DATA_DIRECTORY const ImportDir = pNtHdrs->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
362
363 /*
364 * Walk the import descriptor table looking for NTDLL.DLL.
365 */
366 AssertReturn( ImportDir.Size > 0
367 && ImportDir.Size < cbImage,
368 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory size: %#x", ImportDir.Size));
369 AssertReturn( ImportDir.VirtualAddress > 0
370 && ImportDir.VirtualAddress <= cbImage - ImportDir.Size,
371 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory RVA: %#x", ImportDir.VirtualAddress));
372
373 for (PIMAGE_IMPORT_DESCRIPTOR pImps = (PIMAGE_IMPORT_DESCRIPTOR)&pbImage[ImportDir.VirtualAddress];
374 pImps->Name != 0 && pImps->FirstThunk != 0;
375 pImps++)
376 {
377 AssertReturn(pImps->Name < cbImage,
378 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory entry name: %#x", pImps->Name));
379 const char *pszModName = (const char *)&pbImage[pImps->Name];
380 if (RTStrICmpAscii(pszModName, "ntdll.dll"))
381 continue;
382 AssertReturn(pImps->FirstThunk < cbImage,
383 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
384 AssertReturn(pImps->OriginalFirstThunk < cbImage,
385 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
386
387 /*
388 * Walk the thunks table(s) looking for NtDeviceIoControlFile.
389 */
390 uintptr_t *puFirstThunk = (uintptr_t *)&pbImage[pImps->FirstThunk]; /* update this. */
391 if ( pImps->OriginalFirstThunk != 0
392 && pImps->OriginalFirstThunk != pImps->FirstThunk)
393 {
394 uintptr_t const *puOrgThunk = (uintptr_t const *)&pbImage[pImps->OriginalFirstThunk]; /* read from this. */
395 uintptr_t cLeft = (cbImage - (RT_MAX(pImps->FirstThunk, pImps->OriginalFirstThunk)))
396 / sizeof(*puFirstThunk);
397 while (cLeft-- > 0 && *puOrgThunk != 0)
398 {
399 if (!(*puOrgThunk & IMAGE_ORDINAL_FLAG64)) /* ASSUMES 64-bit */
400 {
401 AssertReturn(*puOrgThunk > 0 && *puOrgThunk < cbImage,
402 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad thunk entry: %#x", *puOrgThunk));
403
404 const char *pszSymbol = (const char *)&pbImage[*puOrgThunk + 2];
405 if (strcmp(pszSymbol, "NtDeviceIoControlFile") == 0)
406 g_ppfnVidNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) **)puFirstThunk;
407 }
408
409 puOrgThunk++;
410 puFirstThunk++;
411 }
412 }
413 else
414 {
415 /* No original thunk table, so scan the resolved symbols for a match
416 with the NtDeviceIoControlFile address. */
417 uintptr_t const uNeedle = (uintptr_t)g_pfnNtDeviceIoControlFile;
418 uintptr_t cLeft = (cbImage - pImps->FirstThunk) / sizeof(*puFirstThunk);
419 while (cLeft-- > 0 && *puFirstThunk != 0)
420 {
421 if (*puFirstThunk == uNeedle)
422 g_ppfnVidNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) **)puFirstThunk;
423 puFirstThunk++;
424 }
425 }
426 }
427
428 if (g_ppfnVidNtDeviceIoControlFile != NULL)
429 {
430 /* Make the thunk writable we can freely modify it. */
431 DWORD fOldProt = PAGE_READONLY;
432 VirtualProtect((void *)(uintptr_t)g_ppfnVidNtDeviceIoControlFile, sizeof(uintptr_t), PAGE_EXECUTE_READWRITE, &fOldProt);
433
434#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
435 *g_ppfnVidNtDeviceIoControlFile = nemR3WinLogWrapper_NtDeviceIoControlFile;
436#endif
437 return VINF_SUCCESS;
438 }
439 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to patch NtDeviceIoControlFile import in VID.DLL!");
440}
441
442
443/**
444 * Worker for nemR3NativeInit that probes and load the native API.
445 *
446 * @returns VBox status code.
447 * @param fForced Whether the HMForced flag is set and we should
448 * fail if we cannot initialize.
449 * @param pErrInfo Where to always return error info.
450 */
451static int nemR3WinInitProbeAndLoad(bool fForced, PRTERRINFO pErrInfo)
452{
453 /*
454 * Check that the DLL files we need are present, but without loading them.
455 * We'd like to avoid loading them unnecessarily.
456 */
457 WCHAR wszPath[MAX_PATH + 64];
458 UINT cwcPath = GetSystemDirectoryW(wszPath, MAX_PATH);
459 if (cwcPath >= MAX_PATH || cwcPath < 2)
460 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "GetSystemDirectoryW failed (%#x / %u)", cwcPath, GetLastError());
461
462 if (wszPath[cwcPath - 1] != '\\' || wszPath[cwcPath - 1] != '/')
463 wszPath[cwcPath++] = '\\';
464 RTUtf16CopyAscii(&wszPath[cwcPath], RT_ELEMENTS(wszPath) - cwcPath, "WinHvPlatform.dll");
465 if (GetFileAttributesW(wszPath) == INVALID_FILE_ATTRIBUTES)
466 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "The native API dll was not found (%ls)", wszPath);
467
468 /*
469 * Check that we're in a VM and that the hypervisor identifies itself as Hyper-V.
470 */
471 if (!ASMHasCpuId())
472 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID support");
473 if (!ASMIsValidStdRange(ASMCpuId_EAX(0)))
474 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID leaf #1");
475 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_HVP))
476 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Not in a hypervisor partition (HVP=0)");
477
478 uint32_t cMaxHyperLeaf = 0;
479 uint32_t uEbx = 0;
480 uint32_t uEcx = 0;
481 uint32_t uEdx = 0;
482 ASMCpuIdExSlow(0x40000000, 0, 0, 0, &cMaxHyperLeaf, &uEbx, &uEcx, &uEdx);
483 if (!ASMIsValidHypervisorRange(cMaxHyperLeaf))
484 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Invalid hypervisor CPUID range (%#x %#x %#x %#x)",
485 cMaxHyperLeaf, uEbx, uEcx, uEdx);
486 if ( uEbx != UINT32_C(0x7263694d) /* Micr */
487 || uEcx != UINT32_C(0x666f736f) /* osof */
488 || uEdx != UINT32_C(0x76482074) /* t Hv */)
489 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE,
490 "Not Hyper-V CPUID signature: %#x %#x %#x (expected %#x %#x %#x)",
491 uEbx, uEcx, uEdx, UINT32_C(0x7263694d), UINT32_C(0x666f736f), UINT32_C(0x76482074));
492 if (cMaxHyperLeaf < UINT32_C(0x40000005))
493 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Too narrow hypervisor CPUID range (%#x)", cMaxHyperLeaf);
494
495 /** @todo would be great if we could recognize a root partition from the
496 * CPUID info, but I currently don't dare do that. */
497
498 /*
499 * Now try load the DLLs and resolve the APIs.
500 */
501 static const char * const s_apszDllNames[2] = { "WinHvPlatform.dll", "vid.dll" };
502 RTLDRMOD ahMods[2] = { NIL_RTLDRMOD, NIL_RTLDRMOD };
503 int rc = VINF_SUCCESS;
504 for (unsigned i = 0; i < RT_ELEMENTS(s_apszDllNames); i++)
505 {
506 int rc2 = RTLdrLoadSystem(s_apszDllNames[i], true /*fNoUnload*/, &ahMods[i]);
507 if (RT_FAILURE(rc2))
508 {
509 if (!RTErrInfoIsSet(pErrInfo))
510 RTErrInfoSetF(pErrInfo, rc2, "Failed to load API DLL: %s: %Rrc", s_apszDllNames[i], rc2);
511 else
512 RTErrInfoAddF(pErrInfo, rc2, "; %s: %Rrc", s_apszDllNames[i], rc2);
513 ahMods[i] = NIL_RTLDRMOD;
514 rc = VERR_NEM_INIT_FAILED;
515 }
516 }
517 if (RT_SUCCESS(rc))
518 rc = nemR3WinInitVidIntercepts(ahMods[1], pErrInfo);
519 if (RT_SUCCESS(rc))
520 {
521 for (unsigned i = 0; i < RT_ELEMENTS(g_aImports); i++)
522 {
523 int rc2 = RTLdrGetSymbol(ahMods[g_aImports[i].idxDll], g_aImports[i].pszName, (void **)g_aImports[i].ppfn);
524 if (RT_FAILURE(rc2))
525 {
526 *g_aImports[i].ppfn = NULL;
527
528 LogRel(("NEM: %s: Failed to import %s!%s: %Rrc",
529 g_aImports[i].fOptional ? "info" : fForced ? "fatal" : "error",
530 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName, rc2));
531 if (!g_aImports[i].fOptional)
532 {
533 if (RTErrInfoIsSet(pErrInfo))
534 RTErrInfoAddF(pErrInfo, rc2, ", %s!%s",
535 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName);
536 else
537 rc = RTErrInfoSetF(pErrInfo, rc2, "Failed to import: %s!%s",
538 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName);
539 Assert(RT_FAILURE(rc));
540 }
541 }
542 }
543 if (RT_SUCCESS(rc))
544 {
545 Assert(!RTErrInfoIsSet(pErrInfo));
546 }
547 }
548
549 for (unsigned i = 0; i < RT_ELEMENTS(ahMods); i++)
550 RTLdrClose(ahMods[i]);
551 return rc;
552}
553
554
555/**
556 * Wrapper for different WHvGetCapability signatures.
557 */
558DECLINLINE(HRESULT) WHvGetCapabilityWrapper(WHV_CAPABILITY_CODE enmCap, WHV_CAPABILITY *pOutput, uint32_t cbOutput)
559{
560 return g_pfnWHvGetCapability(enmCap, pOutput, cbOutput, NULL);
561}
562
563
564/**
565 * Worker for nemR3NativeInit that gets the hypervisor capabilities.
566 *
567 * @returns VBox status code.
568 * @param pVM The cross context VM structure.
569 * @param pErrInfo Where to always return error info.
570 */
571static int nemR3WinInitCheckCapabilities(PVM pVM, PRTERRINFO pErrInfo)
572{
573#define NEM_LOG_REL_CAP_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %-38s= " a_szFmt "\n", a_szField, a_Value))
574#define NEM_LOG_REL_CAP_SUB_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %36s: " a_szFmt "\n", a_szField, a_Value))
575#define NEM_LOG_REL_CAP_SUB(a_szField, a_Value) NEM_LOG_REL_CAP_SUB_EX(a_szField, "%d", a_Value)
576
577 /*
578 * Is the hypervisor present with the desired capability?
579 *
580 * In build 17083 this translates into:
581 * - CPUID[0x00000001].HVP is set
582 * - CPUID[0x40000000] == "Microsoft Hv"
583 * - CPUID[0x40000001].eax == "Hv#1"
584 * - CPUID[0x40000003].ebx[12] is set.
585 * - VidGetExoPartitionProperty(INVALID_HANDLE_VALUE, 0x60000, &Ignored) returns
586 * a non-zero value.
587 */
588 /**
589 * @todo Someone at Microsoft please explain weird API design:
590 * 1. Pointless CapabilityCode duplication int the output;
591 * 2. No output size.
592 */
593 WHV_CAPABILITY Caps;
594 RT_ZERO(Caps);
595 SetLastError(0);
596 HRESULT hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeHypervisorPresent, &Caps, sizeof(Caps));
597 DWORD rcWin = GetLastError();
598 if (FAILED(hrc))
599 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
600 "WHvGetCapability/WHvCapabilityCodeHypervisorPresent failed: %Rhrc (Last=%#x/%u)",
601 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
602 if (!Caps.HypervisorPresent)
603 {
604 if (!RTPathExists(RTPATH_NT_PASSTHRU_PREFIX "Device\\VidExo"))
605 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE,
606 "WHvCapabilityCodeHypervisorPresent is FALSE! Make sure you have enabled the 'Windows Hypervisor Platform' feature.");
607 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "WHvCapabilityCodeHypervisorPresent is FALSE! (%u)", rcWin);
608 }
609 LogRel(("NEM: WHvCapabilityCodeHypervisorPresent is TRUE, so this might work...\n"));
610
611
612 /*
613 * Check what extended VM exits are supported.
614 */
615 RT_ZERO(Caps);
616 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExtendedVmExits, &Caps, sizeof(Caps));
617 if (FAILED(hrc))
618 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
619 "WHvGetCapability/WHvCapabilityCodeExtendedVmExits failed: %Rhrc (Last=%#x/%u)",
620 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
621 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeExtendedVmExits", "%'#018RX64", Caps.ExtendedVmExits.AsUINT64);
622 pVM->nem.s.fExtendedMsrExit = RT_BOOL(Caps.ExtendedVmExits.X64MsrExit);
623 pVM->nem.s.fExtendedCpuIdExit = RT_BOOL(Caps.ExtendedVmExits.X64CpuidExit);
624 pVM->nem.s.fExtendedXcptExit = RT_BOOL(Caps.ExtendedVmExits.ExceptionExit);
625 NEM_LOG_REL_CAP_SUB("fExtendedMsrExit", pVM->nem.s.fExtendedMsrExit);
626 NEM_LOG_REL_CAP_SUB("fExtendedCpuIdExit", pVM->nem.s.fExtendedCpuIdExit);
627 NEM_LOG_REL_CAP_SUB("fExtendedXcptExit", pVM->nem.s.fExtendedXcptExit);
628 if (Caps.ExtendedVmExits.AsUINT64 & ~(uint64_t)7)
629 LogRel(("NEM: Warning! Unknown VM exit definitions: %#RX64\n", Caps.ExtendedVmExits.AsUINT64));
630 /** @todo RECHECK: WHV_EXTENDED_VM_EXITS typedef. */
631
632 /*
633 * Check features in case they end up defining any.
634 */
635 RT_ZERO(Caps);
636 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeFeatures, &Caps, sizeof(Caps));
637 if (FAILED(hrc))
638 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
639 "WHvGetCapability/WHvCapabilityCodeFeatures failed: %Rhrc (Last=%#x/%u)",
640 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
641 if (Caps.Features.AsUINT64 & ~(uint64_t)0)
642 LogRel(("NEM: Warning! Unknown feature definitions: %#RX64\n", Caps.Features.AsUINT64));
643 /** @todo RECHECK: WHV_CAPABILITY_FEATURES typedef. */
644
645 /*
646 * Check supported exception exit bitmap bits.
647 * We don't currently require this, so we just log failure.
648 */
649 RT_ZERO(Caps);
650 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExceptionExitBitmap, &Caps, sizeof(Caps));
651 if (SUCCEEDED(hrc))
652 LogRel(("NEM: Supported exception exit bitmap: %#RX64\n", Caps.ExceptionExitBitmap));
653 else
654 LogRel(("NEM: Warning! WHvGetCapability/WHvCapabilityCodeExceptionExitBitmap failed: %Rhrc (Last=%#x/%u)",
655 hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
656
657 /*
658 * Check that the CPU vendor is supported.
659 */
660 RT_ZERO(Caps);
661 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorVendor, &Caps, sizeof(Caps));
662 if (FAILED(hrc))
663 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
664 "WHvGetCapability/WHvCapabilityCodeProcessorVendor failed: %Rhrc (Last=%#x/%u)",
665 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
666 switch (Caps.ProcessorVendor)
667 {
668 /** @todo RECHECK: WHV_PROCESSOR_VENDOR typedef. */
669 case WHvProcessorVendorIntel:
670 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - Intel", Caps.ProcessorVendor);
671 pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_INTEL;
672 break;
673 case WHvProcessorVendorAmd:
674 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - AMD", Caps.ProcessorVendor);
675 pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_AMD;
676 break;
677 default:
678 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d", Caps.ProcessorVendor);
679 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unknown processor vendor: %d", Caps.ProcessorVendor);
680 }
681
682 /*
683 * CPU features, guessing these are virtual CPU features?
684 */
685 RT_ZERO(Caps);
686 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorFeatures, &Caps, sizeof(Caps));
687 if (FAILED(hrc))
688 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
689 "WHvGetCapability/WHvCapabilityCodeProcessorFeatures failed: %Rhrc (Last=%#x/%u)",
690 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
691 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorFeatures", "%'#018RX64", Caps.ProcessorFeatures.AsUINT64);
692#define NEM_LOG_REL_CPU_FEATURE(a_Field) NEM_LOG_REL_CAP_SUB(#a_Field, Caps.ProcessorFeatures.a_Field)
693 NEM_LOG_REL_CPU_FEATURE(Sse3Support);
694 NEM_LOG_REL_CPU_FEATURE(LahfSahfSupport);
695 NEM_LOG_REL_CPU_FEATURE(Ssse3Support);
696 NEM_LOG_REL_CPU_FEATURE(Sse4_1Support);
697 NEM_LOG_REL_CPU_FEATURE(Sse4_2Support);
698 NEM_LOG_REL_CPU_FEATURE(Sse4aSupport);
699 NEM_LOG_REL_CPU_FEATURE(XopSupport);
700 NEM_LOG_REL_CPU_FEATURE(PopCntSupport);
701 NEM_LOG_REL_CPU_FEATURE(Cmpxchg16bSupport);
702 NEM_LOG_REL_CPU_FEATURE(Altmovcr8Support);
703 NEM_LOG_REL_CPU_FEATURE(LzcntSupport);
704 NEM_LOG_REL_CPU_FEATURE(MisAlignSseSupport);
705 NEM_LOG_REL_CPU_FEATURE(MmxExtSupport);
706 NEM_LOG_REL_CPU_FEATURE(Amd3DNowSupport);
707 NEM_LOG_REL_CPU_FEATURE(ExtendedAmd3DNowSupport);
708 NEM_LOG_REL_CPU_FEATURE(Page1GbSupport);
709 NEM_LOG_REL_CPU_FEATURE(AesSupport);
710 NEM_LOG_REL_CPU_FEATURE(PclmulqdqSupport);
711 NEM_LOG_REL_CPU_FEATURE(PcidSupport);
712 NEM_LOG_REL_CPU_FEATURE(Fma4Support);
713 NEM_LOG_REL_CPU_FEATURE(F16CSupport);
714 NEM_LOG_REL_CPU_FEATURE(RdRandSupport);
715 NEM_LOG_REL_CPU_FEATURE(RdWrFsGsSupport);
716 NEM_LOG_REL_CPU_FEATURE(SmepSupport);
717 NEM_LOG_REL_CPU_FEATURE(EnhancedFastStringSupport);
718 NEM_LOG_REL_CPU_FEATURE(Bmi1Support);
719 NEM_LOG_REL_CPU_FEATURE(Bmi2Support);
720 /* two reserved bits here, see below */
721 NEM_LOG_REL_CPU_FEATURE(MovbeSupport);
722 NEM_LOG_REL_CPU_FEATURE(Npiep1Support);
723 NEM_LOG_REL_CPU_FEATURE(DepX87FPUSaveSupport);
724 NEM_LOG_REL_CPU_FEATURE(RdSeedSupport);
725 NEM_LOG_REL_CPU_FEATURE(AdxSupport);
726 NEM_LOG_REL_CPU_FEATURE(IntelPrefetchSupport);
727 NEM_LOG_REL_CPU_FEATURE(SmapSupport);
728 NEM_LOG_REL_CPU_FEATURE(HleSupport);
729 NEM_LOG_REL_CPU_FEATURE(RtmSupport);
730 NEM_LOG_REL_CPU_FEATURE(RdtscpSupport);
731 NEM_LOG_REL_CPU_FEATURE(ClflushoptSupport);
732 NEM_LOG_REL_CPU_FEATURE(ClwbSupport);
733 NEM_LOG_REL_CPU_FEATURE(ShaSupport);
734 NEM_LOG_REL_CPU_FEATURE(X87PointersSavedSupport);
735#undef NEM_LOG_REL_CPU_FEATURE
736 if (Caps.ProcessorFeatures.AsUINT64 & (~(RT_BIT_64(43) - 1) | RT_BIT_64(27) | RT_BIT_64(28)))
737 LogRel(("NEM: Warning! Unknown CPU features: %#RX64\n", Caps.ProcessorFeatures.AsUINT64));
738 pVM->nem.s.uCpuFeatures.u64 = Caps.ProcessorFeatures.AsUINT64;
739 /** @todo RECHECK: WHV_PROCESSOR_FEATURES typedef. */
740
741 /*
742 * The cache line flush size.
743 */
744 RT_ZERO(Caps);
745 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorClFlushSize, &Caps, sizeof(Caps));
746 if (FAILED(hrc))
747 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
748 "WHvGetCapability/WHvCapabilityCodeProcessorClFlushSize failed: %Rhrc (Last=%#x/%u)",
749 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
750 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorClFlushSize", "2^%u", Caps.ProcessorClFlushSize);
751 if (Caps.ProcessorClFlushSize < 8 && Caps.ProcessorClFlushSize > 9)
752 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unsupported cache line flush size: %u", Caps.ProcessorClFlushSize);
753 pVM->nem.s.cCacheLineFlushShift = Caps.ProcessorClFlushSize;
754
755 /*
756 * See if they've added more properties that we're not aware of.
757 */
758 /** @todo RECHECK: WHV_CAPABILITY_CODE typedef. */
759 if (!IsDebuggerPresent()) /* Too noisy when in debugger, so skip. */
760 {
761 static const struct
762 {
763 uint32_t iMin, iMax; } s_aUnknowns[] =
764 {
765 { 0x0004, 0x000f },
766 { 0x1003, 0x100f },
767 { 0x2000, 0x200f },
768 { 0x3000, 0x300f },
769 { 0x4000, 0x400f },
770 };
771 for (uint32_t j = 0; j < RT_ELEMENTS(s_aUnknowns); j++)
772 for (uint32_t i = s_aUnknowns[j].iMin; i <= s_aUnknowns[j].iMax; i++)
773 {
774 RT_ZERO(Caps);
775 hrc = WHvGetCapabilityWrapper((WHV_CAPABILITY_CODE)i, &Caps, sizeof(Caps));
776 if (SUCCEEDED(hrc))
777 LogRel(("NEM: Warning! Unknown capability %#x returning: %.*Rhxs\n", i, sizeof(Caps), &Caps));
778 }
779 }
780
781 /*
782 * For proper operation, we require CPUID exits.
783 */
784 if (!pVM->nem.s.fExtendedCpuIdExit)
785 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended CPUID exit support");
786 if (!pVM->nem.s.fExtendedMsrExit)
787 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended MSR exit support");
788 if (!pVM->nem.s.fExtendedXcptExit)
789 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended exception exit support");
790
791#undef NEM_LOG_REL_CAP_EX
792#undef NEM_LOG_REL_CAP_SUB_EX
793#undef NEM_LOG_REL_CAP_SUB
794 return VINF_SUCCESS;
795}
796
797#if defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) || defined(LOG_ENABLED)
798
799/**
800 * Used to fill in g_IoCtlGetHvPartitionId.
801 */
802static NTSTATUS WINAPI
803nemR3WinIoctlDetector_GetHvPartitionId(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
804 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
805 PVOID pvOutput, ULONG cbOutput)
806{
807 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
808 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
809 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
810 AssertLogRelMsgReturn(cbInput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
811 RT_NOREF(pvInput);
812
813 AssertLogRelMsgReturn(RT_VALID_PTR(pvOutput), ("pvOutput=%p\n", pvOutput), STATUS_INVALID_PARAMETER_9);
814 AssertLogRelMsgReturn(cbOutput == sizeof(HV_PARTITION_ID), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
815 *(HV_PARTITION_ID *)pvOutput = NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID;
816
817 g_IoCtlGetHvPartitionId.cbInput = cbInput;
818 g_IoCtlGetHvPartitionId.cbOutput = cbOutput;
819 g_IoCtlGetHvPartitionId.uFunction = uFunction;
820
821 return STATUS_SUCCESS;
822}
823
824
825/**
826 * Used to fill in g_IoCtlGetHvPartitionId.
827 */
828static NTSTATUS WINAPI
829nemR3WinIoctlDetector_GetPartitionProperty(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
830 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
831 PVOID pvOutput, ULONG cbOutput)
832{
833 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
834 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
835 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
836 AssertLogRelMsgReturn(cbInput == sizeof(VID_PARTITION_PROPERTY_CODE), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
837 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
838 AssertLogRelMsgReturn(*(VID_PARTITION_PROPERTY_CODE *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE,
839 ("*pvInput=%#x, expected %#x\n", *(HV_PARTITION_PROPERTY_CODE *)pvInput,
840 NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE), STATUS_INVALID_PARAMETER_9);
841 AssertLogRelMsgReturn(RT_VALID_PTR(pvOutput), ("pvOutput=%p\n", pvOutput), STATUS_INVALID_PARAMETER_9);
842 AssertLogRelMsgReturn(cbOutput == sizeof(HV_PARTITION_PROPERTY), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
843 *(HV_PARTITION_PROPERTY *)pvOutput = NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE;
844
845 g_IoCtlGetPartitionProperty.cbInput = cbInput;
846 g_IoCtlGetPartitionProperty.cbOutput = cbOutput;
847 g_IoCtlGetPartitionProperty.uFunction = uFunction;
848
849 return STATUS_SUCCESS;
850}
851
852#endif /* defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) || defined(LOG_ENABLED) */
853#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(LOG_ENABLED)
854
855/**
856 * Used to fill in g_IoCtlStartVirtualProcessor.
857 */
858static NTSTATUS WINAPI
859nemR3WinIoctlDetector_StartVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
860 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
861 PVOID pvOutput, ULONG cbOutput)
862{
863 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
864 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
865 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
866 AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
867 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
868 AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
869 ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9);
870 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
871 RT_NOREF(pvOutput);
872
873 g_IoCtlStartVirtualProcessor.cbInput = cbInput;
874 g_IoCtlStartVirtualProcessor.cbOutput = cbOutput;
875 g_IoCtlStartVirtualProcessor.uFunction = uFunction;
876
877 return STATUS_SUCCESS;
878}
879
880
881/**
882 * Used to fill in g_IoCtlStartVirtualProcessor.
883 */
884static NTSTATUS WINAPI
885nemR3WinIoctlDetector_StopVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
886 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
887 PVOID pvOutput, ULONG cbOutput)
888{
889 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
890 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
891 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
892 AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
893 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
894 AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
895 ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9);
896 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
897 RT_NOREF(pvOutput);
898
899 g_IoCtlStopVirtualProcessor.cbInput = cbInput;
900 g_IoCtlStopVirtualProcessor.cbOutput = cbOutput;
901 g_IoCtlStopVirtualProcessor.uFunction = uFunction;
902
903 return STATUS_SUCCESS;
904}
905
906
907/**
908 * Used to fill in g_IoCtlMessageSlotHandleAndGetNext
909 */
910static NTSTATUS WINAPI
911nemR3WinIoctlDetector_MessageSlotHandleAndGetNext(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
912 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
913 PVOID pvOutput, ULONG cbOutput)
914{
915 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
916 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
917 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
918
919 if (g_uBuildNo >= 17758)
920 {
921 /* No timeout since about build 17758, it's now always an infinite wait. So, a somewhat compatible change. */
922 AssertLogRelMsgReturn(cbInput == RT_UOFFSETOF(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT, cMillies),
923 ("cbInput=%#x\n", cbInput),
924 STATUS_INVALID_PARAMETER_8);
925 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
926 PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput;
927 AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX
928 && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE,
929 ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies),
930 STATUS_INVALID_PARAMETER_9);
931 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
932 }
933 else
934 {
935 AssertLogRelMsgReturn(cbInput == sizeof(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT), ("cbInput=%#x\n", cbInput),
936 STATUS_INVALID_PARAMETER_8);
937 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
938 PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput;
939 AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX
940 && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE
941 && pVidIn->cMillies == NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT,
942 ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies),
943 STATUS_INVALID_PARAMETER_9);
944 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
945 RT_NOREF(pvOutput);
946 }
947
948 g_IoCtlMessageSlotHandleAndGetNext.cbInput = cbInput;
949 g_IoCtlMessageSlotHandleAndGetNext.cbOutput = cbOutput;
950 g_IoCtlMessageSlotHandleAndGetNext.uFunction = uFunction;
951
952 return STATUS_SUCCESS;
953}
954
955#endif /* defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(LOG_ENABLED) */
956
957#ifdef LOG_ENABLED
958/**
959 * Used to fill in what g_pIoCtlDetectForLogging points to.
960 */
961static NTSTATUS WINAPI nemR3WinIoctlDetector_ForLogging(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
962 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
963 PVOID pvOutput, ULONG cbOutput)
964{
965 RT_NOREF(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, pvInput, pvOutput);
966
967 g_pIoCtlDetectForLogging->cbInput = cbInput;
968 g_pIoCtlDetectForLogging->cbOutput = cbOutput;
969 g_pIoCtlDetectForLogging->uFunction = uFunction;
970
971 return STATUS_SUCCESS;
972}
973#endif
974
975
976/**
977 * Worker for nemR3NativeInit that detect I/O control function numbers for VID.
978 *
979 * We use the function numbers directly in ring-0 and to name functions when
980 * logging NtDeviceIoControlFile calls.
981 *
982 * @note We could alternatively do this by disassembling the respective
983 * functions, but hooking NtDeviceIoControlFile and making fake calls
984 * more easily provides the desired information.
985 *
986 * @returns VBox status code.
987 * @param pVM The cross context VM structure. Will set I/O
988 * control info members.
989 * @param pErrInfo Where to always return error info.
990 */
991static int nemR3WinInitDiscoverIoControlProperties(PVM pVM, PRTERRINFO pErrInfo)
992{
993 RT_NOREF(pVM, pErrInfo);
994
995 /*
996 * Probe the I/O control information for select VID APIs so we can use
997 * them directly from ring-0 and better log them.
998 *
999 */
1000#if defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) || defined(LOG_ENABLED)
1001 decltype(NtDeviceIoControlFile) * const pfnOrg = *g_ppfnVidNtDeviceIoControlFile;
1002
1003 /* VidGetHvPartitionId - must work due to our memory management. */
1004 HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID;
1005 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_GetHvPartitionId;
1006 BOOL fRet = g_pfnVidGetHvPartitionId(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &idHvPartition);
1007 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1008 AssertReturn(fRet && idHvPartition == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID && g_IoCtlGetHvPartitionId.uFunction != 0,
1009 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
1010 "Problem figuring out VidGetHvPartitionId: fRet=%u idHvPartition=%#x dwErr=%u",
1011 fRet, idHvPartition, GetLastError()) );
1012 LogRel(("NEM: VidGetHvPartitionId -> fun:%#x in:%#x out:%#x\n",
1013 g_IoCtlGetHvPartitionId.uFunction, g_IoCtlGetHvPartitionId.cbInput, g_IoCtlGetHvPartitionId.cbOutput));
1014
1015 /* VidGetPartitionProperty - must work as it's fallback for VidGetHvPartitionId. */
1016 HV_PARTITION_PROPERTY uPropValue = ~NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE;
1017 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_GetPartitionProperty;
1018 fRet = g_pfnVidGetPartitionProperty(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE,
1019 &uPropValue);
1020 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1021 AssertReturn( fRet
1022 && uPropValue == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE
1023 && g_IoCtlGetHvPartitionId.uFunction != 0,
1024 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
1025 "Problem figuring out VidGetPartitionProperty: fRet=%u uPropValue=%#x dwErr=%u",
1026 fRet, uPropValue, GetLastError()) );
1027 LogRel(("NEM: VidGetPartitionProperty -> fun:%#x in:%#x out:%#x\n",
1028 g_IoCtlGetPartitionProperty.uFunction, g_IoCtlGetPartitionProperty.cbInput, g_IoCtlGetPartitionProperty.cbOutput));
1029
1030#endif
1031 int rcRet = VINF_SUCCESS;
1032#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(LOG_ENABLED)
1033
1034 /* VidStartVirtualProcessor */
1035 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StartVirtualProcessor;
1036 fRet = g_pfnVidStartVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1037 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1038 AssertStmt(fRet && g_IoCtlStartVirtualProcessor.uFunction != 0,
1039 rcRet = RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1040 "Problem figuring out VidStartVirtualProcessor: fRet=%u dwErr=%u",
1041 fRet, GetLastError()) );
1042 LogRel(("NEM: VidStartVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStartVirtualProcessor.uFunction,
1043 g_IoCtlStartVirtualProcessor.cbInput, g_IoCtlStartVirtualProcessor.cbOutput));
1044
1045 /* VidStopVirtualProcessor */
1046 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StopVirtualProcessor;
1047 fRet = g_pfnVidStopVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1048 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1049 AssertStmt(fRet && g_IoCtlStopVirtualProcessor.uFunction != 0,
1050 rcRet = RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1051 "Problem figuring out VidStopVirtualProcessor: fRet=%u dwErr=%u",
1052 fRet, GetLastError()) );
1053 LogRel(("NEM: VidStopVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStopVirtualProcessor.uFunction,
1054 g_IoCtlStopVirtualProcessor.cbInput, g_IoCtlStopVirtualProcessor.cbOutput));
1055
1056 /* VidMessageSlotHandleAndGetNext */
1057 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_MessageSlotHandleAndGetNext;
1058 fRet = g_pfnVidMessageSlotHandleAndGetNext(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE,
1059 NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, VID_MSHAGN_F_HANDLE_MESSAGE,
1060 NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT);
1061 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1062 AssertStmt(fRet && g_IoCtlMessageSlotHandleAndGetNext.uFunction != 0,
1063 rcRet = RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1064 "Problem figuring out VidMessageSlotHandleAndGetNext: fRet=%u dwErr=%u",
1065 fRet, GetLastError()) );
1066 LogRel(("NEM: VidMessageSlotHandleAndGetNext -> fun:%#x in:%#x out:%#x\n",
1067 g_IoCtlMessageSlotHandleAndGetNext.uFunction, g_IoCtlMessageSlotHandleAndGetNext.cbInput,
1068 g_IoCtlMessageSlotHandleAndGetNext.cbOutput));
1069
1070#endif /* defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(LOG_ENABLED) */
1071#ifdef LOG_ENABLED
1072 /* The following are only for logging: */
1073 union
1074 {
1075 VID_MAPPED_MESSAGE_SLOT MapSlot;
1076 HV_REGISTER_NAME Name;
1077 HV_REGISTER_VALUE Value;
1078 } uBuf;
1079
1080 /* VidMessageSlotMap */
1081 g_pIoCtlDetectForLogging = &g_IoCtlMessageSlotMap;
1082 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1083 fRet = g_pfnVidMessageSlotMap(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &uBuf.MapSlot, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1084 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1085 Assert(fRet);
1086 LogRel(("NEM: VidMessageSlotMap -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1087 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1088
1089 /* VidGetVirtualProcessorState */
1090 uBuf.Name = HvRegisterExplicitSuspend;
1091 g_pIoCtlDetectForLogging = &g_IoCtlGetVirtualProcessorState;
1092 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1093 fRet = g_pfnVidGetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1094 &uBuf.Name, 1, &uBuf.Value);
1095 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1096 Assert(fRet);
1097 LogRel(("NEM: VidGetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1098 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1099
1100 /* VidSetVirtualProcessorState */
1101 uBuf.Name = HvRegisterExplicitSuspend;
1102 g_pIoCtlDetectForLogging = &g_IoCtlSetVirtualProcessorState;
1103 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1104 fRet = g_pfnVidSetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1105 &uBuf.Name, 1, &uBuf.Value);
1106 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1107 Assert(fRet);
1108 LogRel(("NEM: VidSetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1109 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1110
1111 g_pIoCtlDetectForLogging = NULL;
1112#endif
1113
1114 /* Done. */
1115#ifdef NEM_WIN_USE_HYPERCALLS_FOR_PAGES
1116 pVM->nem.s.IoCtlGetHvPartitionId = g_IoCtlGetHvPartitionId;
1117 pVM->nem.s.IoCtlGetPartitionProperty = g_IoCtlGetPartitionProperty;
1118#endif
1119#ifdef NEM_WIN_WITH_RING0_RUNLOOP
1120 pVM->nem.s.IoCtlStartVirtualProcessor = g_IoCtlStartVirtualProcessor;
1121 pVM->nem.s.IoCtlStopVirtualProcessor = g_IoCtlStopVirtualProcessor;
1122 pVM->nem.s.IoCtlMessageSlotHandleAndGetNext = g_IoCtlMessageSlotHandleAndGetNext;
1123#endif
1124 return rcRet;
1125}
1126
1127
1128/**
1129 * Creates and sets up a Hyper-V (exo) partition.
1130 *
1131 * @returns VBox status code.
1132 * @param pVM The cross context VM structure.
1133 * @param pErrInfo Where to always return error info.
1134 */
1135static int nemR3WinInitCreatePartition(PVM pVM, PRTERRINFO pErrInfo)
1136{
1137 AssertReturn(!pVM->nem.s.hPartition, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order"));
1138 AssertReturn(!pVM->nem.s.hPartitionDevice, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order"));
1139
1140 /*
1141 * Create the partition.
1142 */
1143 WHV_PARTITION_HANDLE hPartition;
1144 HRESULT hrc = WHvCreatePartition(&hPartition);
1145 if (FAILED(hrc))
1146 return RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED, "WHvCreatePartition failed with %Rhrc (Last=%#x/%u)",
1147 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1148
1149 int rc;
1150
1151 /*
1152 * Set partition properties, most importantly the CPU count.
1153 */
1154 /**
1155 * @todo Someone at Microsoft please explain another weird API:
1156 * - Why this API doesn't take the WHV_PARTITION_PROPERTY_CODE value as an
1157 * argument rather than as part of the struct. That is so weird if you've
1158 * used any other NT or windows API, including WHvGetCapability().
1159 * - Why use PVOID when WHV_PARTITION_PROPERTY is what's expected. We
1160 * technically only need 9 bytes for setting/getting
1161 * WHVPartitionPropertyCodeProcessorClFlushSize, but the API insists on 16. */
1162 WHV_PARTITION_PROPERTY Property;
1163 RT_ZERO(Property);
1164 Property.ProcessorCount = pVM->cCpus;
1165 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorCount, &Property, sizeof(Property));
1166 if (SUCCEEDED(hrc))
1167 {
1168 RT_ZERO(Property);
1169 Property.ExtendedVmExits.X64CpuidExit = pVM->nem.s.fExtendedCpuIdExit; /** @todo Register fixed results and restrict cpuid exits */
1170 Property.ExtendedVmExits.X64MsrExit = pVM->nem.s.fExtendedMsrExit;
1171 Property.ExtendedVmExits.ExceptionExit = pVM->nem.s.fExtendedXcptExit;
1172 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExtendedVmExits, &Property, sizeof(Property));
1173 if (SUCCEEDED(hrc))
1174 {
1175 /*
1176 * We'll continue setup in nemR3NativeInitAfterCPUM.
1177 */
1178 pVM->nem.s.fCreatedEmts = false;
1179 pVM->nem.s.hPartition = hPartition;
1180 LogRel(("NEM: Created partition %p.\n", hPartition));
1181 return VINF_SUCCESS;
1182 }
1183
1184 rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED,
1185 "Failed setting WHvPartitionPropertyCodeExtendedVmExits to %'#RX64: %Rhrc",
1186 Property.ExtendedVmExits.AsUINT64, hrc);
1187 }
1188 else
1189 rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED,
1190 "Failed setting WHvPartitionPropertyCodeProcessorCount to %u: %Rhrc (Last=%#x/%u)",
1191 pVM->cCpus, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1192 WHvDeletePartition(hPartition);
1193
1194 Assert(!pVM->nem.s.hPartitionDevice);
1195 Assert(!pVM->nem.s.hPartition);
1196 return rc;
1197}
1198
1199
1200/**
1201 * Makes sure APIC and firmware will not allow X2APIC mode.
1202 *
1203 * This is rather ugly.
1204 *
1205 * @returns VBox status code
1206 * @param pVM The cross context VM structure.
1207 */
1208static int nemR3WinDisableX2Apic(PVM pVM)
1209{
1210 /*
1211 * First make sure the 'Mode' config value of the APIC isn't set to X2APIC.
1212 * This defaults to APIC, so no need to change unless it's X2APIC.
1213 */
1214 PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/apic/0/Config");
1215 if (pCfg)
1216 {
1217 uint8_t bMode = 0;
1218 int rc = CFGMR3QueryU8(pCfg, "Mode", &bMode);
1219 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc);
1220 if (RT_SUCCESS(rc) && bMode == PDMAPICMODE_X2APIC)
1221 {
1222 LogRel(("NEM: Adjusting APIC configuration from X2APIC to APIC max mode. X2APIC is not supported by the WinHvPlatform API!\n"));
1223 LogRel(("NEM: Disable Hyper-V if you need X2APIC for your guests!\n"));
1224 rc = CFGMR3RemoveValue(pCfg, "Mode");
1225 rc = CFGMR3InsertInteger(pCfg, "Mode", PDMAPICMODE_APIC);
1226 AssertLogRelRCReturn(rc, rc);
1227 }
1228 }
1229
1230 /*
1231 * Now the firmwares.
1232 * These also defaults to APIC and only needs adjusting if configured to X2APIC (2).
1233 */
1234 static const char * const s_apszFirmwareConfigs[] =
1235 {
1236 "/Devices/efi/0/Config",
1237 "/Devices/pcbios/0/Config",
1238 };
1239 for (unsigned i = 0; i < RT_ELEMENTS(s_apszFirmwareConfigs); i++)
1240 {
1241 pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/APIC/0/Config");
1242 if (pCfg)
1243 {
1244 uint8_t bMode = 0;
1245 int rc = CFGMR3QueryU8(pCfg, "APIC", &bMode);
1246 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc);
1247 if (RT_SUCCESS(rc) && bMode == 2)
1248 {
1249 LogRel(("NEM: Adjusting %s/Mode from 2 (X2APIC) to 1 (APIC).\n", s_apszFirmwareConfigs[i]));
1250 rc = CFGMR3RemoveValue(pCfg, "APIC");
1251 rc = CFGMR3InsertInteger(pCfg, "APIC", 1);
1252 AssertLogRelRCReturn(rc, rc);
1253 }
1254 }
1255 }
1256
1257 return VINF_SUCCESS;
1258}
1259
1260
1261/**
1262 * Try initialize the native API.
1263 *
1264 * This may only do part of the job, more can be done in
1265 * nemR3NativeInitAfterCPUM() and nemR3NativeInitCompleted().
1266 *
1267 * @returns VBox status code.
1268 * @param pVM The cross context VM structure.
1269 * @param fFallback Whether we're in fallback mode or use-NEM mode. In
1270 * the latter we'll fail if we cannot initialize.
1271 * @param fForced Whether the HMForced flag is set and we should
1272 * fail if we cannot initialize.
1273 */
1274int nemR3NativeInit(PVM pVM, bool fFallback, bool fForced)
1275{
1276 g_uBuildNo = RTSystemGetNtBuildNo();
1277
1278 /*
1279 * Some state init.
1280 */
1281 pVM->nem.s.fA20Enabled = true;
1282#if 0
1283 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1284 {
1285 PNEMCPU pNemCpu = &pVM->apCpusR3[idCpu]->nem.s;
1286 }
1287#endif
1288
1289#ifndef NEM_WIN_USE_HYPERCALLS_FOR_PAGES
1290 /** Some guess working here. */
1291 pVM->nem.s.cMaxMappedPages = 4000;
1292 if (g_uBuildNo >= 22000)
1293 pVM->nem.s.cMaxMappedPages = _64K; /* seems it can do lots more even */
1294#endif
1295
1296 /*
1297 * Error state.
1298 * The error message will be non-empty on failure and 'rc' will be set too.
1299 */
1300 RTERRINFOSTATIC ErrInfo;
1301 PRTERRINFO pErrInfo = RTErrInfoInitStatic(&ErrInfo);
1302 int rc = nemR3WinInitProbeAndLoad(fForced, pErrInfo);
1303 if (RT_SUCCESS(rc))
1304 {
1305 /*
1306 * Check the capabilties of the hypervisor, starting with whether it's present.
1307 */
1308 rc = nemR3WinInitCheckCapabilities(pVM, pErrInfo);
1309 if (RT_SUCCESS(rc))
1310 {
1311 /*
1312 * Discover the VID I/O control function numbers we need.
1313 */
1314 rc = nemR3WinInitDiscoverIoControlProperties(pVM, pErrInfo);
1315 if (rc == VERR_NEM_RING3_ONLY)
1316 {
1317 if (pVM->nem.s.fUseRing0Runloop)
1318 {
1319 LogRel(("NEM: Disabling UseRing0Runloop.\n"));
1320 pVM->nem.s.fUseRing0Runloop = false;
1321 }
1322 rc = VINF_SUCCESS;
1323 }
1324 if (RT_SUCCESS(rc))
1325 {
1326 /*
1327 * Check out our ring-0 capabilities.
1328 */
1329 rc = SUPR3CallVMMR0Ex(VMCC_GET_VMR0_FOR_CALL(pVM), 0 /*idCpu*/, VMMR0_DO_NEM_INIT_VM, 0, NULL);
1330 if (RT_SUCCESS(rc))
1331 {
1332 /*
1333 * Create and initialize a partition.
1334 */
1335 rc = nemR3WinInitCreatePartition(pVM, pErrInfo);
1336 if (RT_SUCCESS(rc))
1337 {
1338 /*
1339 * Set ourselves as the execution engine and make config adjustments.
1340 */
1341 VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_NATIVE_API);
1342 Log(("NEM: Marked active!\n"));
1343 nemR3WinDisableX2Apic(pVM);
1344#if !defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) && defined(VBOX_WITH_PGM_NEM_MODE)
1345 PGMR3EnableNemMode(pVM);
1346#endif
1347
1348 /*
1349 * Register release statistics
1350 */
1351 STAMR3Register(pVM, (void *)&pVM->nem.s.cMappedPages, STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1352 "/NEM/PagesCurrentlyMapped", STAMUNIT_PAGES, "Number guest pages currently mapped by the VM");
1353 STAMR3Register(pVM, (void *)&pVM->nem.s.StatMapPage, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1354 "/NEM/PagesMapCalls", STAMUNIT_PAGES, "Calls to WHvMapGpaRange/HvCallMapGpaPages");
1355 STAMR3Register(pVM, (void *)&pVM->nem.s.StatMapPageFailed, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1356 "/NEM/PagesMapFails", STAMUNIT_PAGES, "Calls to WHvMapGpaRange/HvCallMapGpaPages that failed");
1357 STAMR3Register(pVM, (void *)&pVM->nem.s.StatUnmapPage, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1358 "/NEM/PagesUnmapCalls", STAMUNIT_PAGES, "Calls to WHvUnmapGpaRange/HvCallUnmapGpaPages");
1359 STAMR3Register(pVM, (void *)&pVM->nem.s.StatUnmapPageFailed, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1360 "/NEM/PagesUnmapFails", STAMUNIT_PAGES, "Calls to WHvUnmapGpaRange/HvCallUnmapGpaPages that failed");
1361#ifdef NEM_WIN_USE_HYPERCALLS_FOR_PAGES
1362 STAMR3Register(pVM, (void *)&pVM->nem.s.StatRemapPage, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1363 "/NEM/PagesRemapCalls", STAMUNIT_PAGES, "Calls to HvCallMapGpaPages for changing page protection");
1364 STAMR3Register(pVM, (void *)&pVM->nem.s.StatRemapPage, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1365 "/NEM/PagesRemapFails", STAMUNIT_PAGES, "Calls to HvCallMapGpaPages for changing page protection failed");
1366#elif !defined(VBOX_WITH_PGM_NEM_MODE)
1367 STAMR3Register(pVM, (void *)&pVM->nem.s.StatUnmapAllPages, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1368 "/NEM/PagesUnmapAll", STAMUNIT_PAGES, "Times we had to unmap all the pages");
1369#endif
1370
1371 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1372 {
1373 PNEMCPU pNemCpu = &pVM->apCpusR3[idCpu]->nem.s;
1374 STAMR3RegisterF(pVM, &pNemCpu->StatExitPortIo, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of port I/O exits", "/NEM/CPU%u/ExitPortIo", idCpu);
1375 STAMR3RegisterF(pVM, &pNemCpu->StatExitMemUnmapped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unmapped memory exits", "/NEM/CPU%u/ExitMemUnmapped", idCpu);
1376 STAMR3RegisterF(pVM, &pNemCpu->StatExitMemIntercept, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of intercepted memory exits", "/NEM/CPU%u/ExitMemIntercept", idCpu);
1377 STAMR3RegisterF(pVM, &pNemCpu->StatExitHalt, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of HLT exits", "/NEM/CPU%u/ExitHalt", idCpu);
1378 STAMR3RegisterF(pVM, &pNemCpu->StatExitInterruptWindow, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of interrupt window exits", "/NEM/CPU%u/ExitInterruptWindow", idCpu);
1379 STAMR3RegisterF(pVM, &pNemCpu->StatExitCpuId, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of CPUID exits", "/NEM/CPU%u/ExitCpuId", idCpu);
1380 STAMR3RegisterF(pVM, &pNemCpu->StatExitMsr, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of MSR access exits", "/NEM/CPU%u/ExitMsr", idCpu);
1381 STAMR3RegisterF(pVM, &pNemCpu->StatExitException, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of exception exits", "/NEM/CPU%u/ExitException", idCpu);
1382 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionBp, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #BP exits", "/NEM/CPU%u/ExitExceptionBp", idCpu);
1383 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionDb, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #DB exits", "/NEM/CPU%u/ExitExceptionDb", idCpu);
1384 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionGp, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #GP exits", "/NEM/CPU%u/ExitExceptionGp", idCpu);
1385 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionGpMesa, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #GP exits from mesa driver", "/NEM/CPU%u/ExitExceptionGpMesa", idCpu);
1386 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #UD exits", "/NEM/CPU%u/ExitExceptionUd", idCpu);
1387 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUdHandled, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of handled #UD exits", "/NEM/CPU%u/ExitExceptionUdHandled", idCpu);
1388 STAMR3RegisterF(pVM, &pNemCpu->StatExitUnrecoverable, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unrecoverable exits", "/NEM/CPU%u/ExitUnrecoverable", idCpu);
1389 STAMR3RegisterF(pVM, &pNemCpu->StatGetMsgTimeout, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of get message timeouts/alerts", "/NEM/CPU%u/GetMsgTimeout", idCpu);
1390 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuSuccess, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of successful CPU stops", "/NEM/CPU%u/StopCpuSuccess", idCpu);
1391 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPending, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stops", "/NEM/CPU%u/StopCpuPending", idCpu);
1392 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingAlerts,STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stop alerts", "/NEM/CPU%u/StopCpuPendingAlerts", idCpu);
1393 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingOdd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of odd pending CPU stops (see code)", "/NEM/CPU%u/StopCpuPendingOdd", idCpu);
1394 STAMR3RegisterF(pVM, &pNemCpu->StatCancelChangedState, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel changed state", "/NEM/CPU%u/CancelChangedState", idCpu);
1395 STAMR3RegisterF(pVM, &pNemCpu->StatCancelAlertedThread, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel alerted EMT", "/NEM/CPU%u/CancelAlertedEMT", idCpu);
1396 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPre, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pre execution FF breaks", "/NEM/CPU%u/BreakOnFFPre", idCpu);
1397 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPost, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of post execution FF breaks", "/NEM/CPU%u/BreakOnFFPost", idCpu);
1398 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnCancel, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel execution breaks", "/NEM/CPU%u/BreakOnCancel", idCpu);
1399 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnStatus, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of status code breaks", "/NEM/CPU%u/BreakOnStatus", idCpu);
1400 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnDemand, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of on-demand state imports", "/NEM/CPU%u/ImportOnDemand", idCpu);
1401 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturn, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of state imports on loop return", "/NEM/CPU%u/ImportOnReturn", idCpu);
1402 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturnSkipped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of skipped state imports on loop return", "/NEM/CPU%u/ImportOnReturnSkipped", idCpu);
1403 STAMR3RegisterF(pVM, &pNemCpu->StatQueryCpuTick, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of TSC queries", "/NEM/CPU%u/QueryCpuTick", idCpu);
1404 }
1405
1406 PUVM pUVM = pVM->pUVM;
1407 STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesAvailable, STAMTYPE_U64, STAMVISIBILITY_ALWAYS,
1408 STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Free pages available to the hypervisor",
1409 "/NEM/R0Stats/cPagesAvailable");
1410 STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesInUse, STAMTYPE_U64, STAMVISIBILITY_ALWAYS,
1411 STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Pages in use by hypervisor",
1412 "/NEM/R0Stats/cPagesInUse");
1413
1414 }
1415 }
1416 else
1417 rc = RTErrInfoSetF(pErrInfo, rc, "VMMR0_DO_NEM_INIT_VM failed: %Rrc", rc);
1418 }
1419 }
1420 }
1421
1422 /*
1423 * We only fail if in forced mode, otherwise just log the complaint and return.
1424 */
1425 Assert(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API || RTErrInfoIsSet(pErrInfo));
1426 if ( (fForced || !fFallback)
1427 && pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NATIVE_API)
1428 return VMSetError(pVM, RT_SUCCESS_NP(rc) ? VERR_NEM_NOT_AVAILABLE : rc, RT_SRC_POS, "%s", pErrInfo->pszMsg);
1429
1430 if (RTErrInfoIsSet(pErrInfo))
1431 LogRel(("NEM: Not available: %s\n", pErrInfo->pszMsg));
1432 return VINF_SUCCESS;
1433}
1434
1435
1436/**
1437 * This is called after CPUMR3Init is done.
1438 *
1439 * @returns VBox status code.
1440 * @param pVM The VM handle..
1441 */
1442int nemR3NativeInitAfterCPUM(PVM pVM)
1443{
1444 /*
1445 * Validate sanity.
1446 */
1447 WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition;
1448 AssertReturn(hPartition != NULL, VERR_WRONG_ORDER);
1449 AssertReturn(!pVM->nem.s.hPartitionDevice, VERR_WRONG_ORDER);
1450 AssertReturn(!pVM->nem.s.fCreatedEmts, VERR_WRONG_ORDER);
1451 AssertReturn(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API, VERR_WRONG_ORDER);
1452
1453 /*
1454 * Continue setting up the partition now that we've got most of the CPUID feature stuff.
1455 */
1456 WHV_PARTITION_PROPERTY Property;
1457 HRESULT hrc;
1458
1459#if 0
1460 /* Not sure if we really need to set the vendor.
1461 Update: Apparently we don't. WHvPartitionPropertyCodeProcessorVendor was removed in 17110. */
1462 RT_ZERO(Property);
1463 Property.ProcessorVendor = pVM->nem.s.enmCpuVendor == CPUMCPUVENDOR_AMD ? WHvProcessorVendorAmd
1464 : WHvProcessorVendorIntel;
1465 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorVendor, &Property, sizeof(Property));
1466 if (FAILED(hrc))
1467 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1468 "Failed to set WHvPartitionPropertyCodeProcessorVendor to %u: %Rhrc (Last=%#x/%u)",
1469 Property.ProcessorVendor, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1470#endif
1471
1472 /* Not sure if we really need to set the cache line flush size. */
1473 RT_ZERO(Property);
1474 Property.ProcessorClFlushSize = pVM->nem.s.cCacheLineFlushShift;
1475 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorClFlushSize, &Property, sizeof(Property));
1476 if (FAILED(hrc))
1477 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1478 "Failed to set WHvPartitionPropertyCodeProcessorClFlushSize to %u: %Rhrc (Last=%#x/%u)",
1479 pVM->nem.s.cCacheLineFlushShift, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1480
1481 /* Intercept #DB, #BP and #UD exceptions. */
1482 RT_ZERO(Property);
1483 Property.ExceptionExitBitmap = RT_BIT_64(WHvX64ExceptionTypeDebugTrapOrFault)
1484 | RT_BIT_64(WHvX64ExceptionTypeBreakpointTrap)
1485 | RT_BIT_64(WHvX64ExceptionTypeInvalidOpcodeFault);
1486
1487 /* Intercept #GP to workaround the buggy mesa vmwgfx driver. */
1488 PVMCPU pVCpu = pVM->apCpusR3[0]; /** @todo In theory per vCPU, in practice same for all. */
1489 if (pVCpu->nem.s.fTrapXcptGpForLovelyMesaDrv)
1490 Property.ExceptionExitBitmap |= RT_BIT_64(WHvX64ExceptionTypeGeneralProtectionFault);
1491
1492 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExceptionExitBitmap, &Property, sizeof(Property));
1493 if (FAILED(hrc))
1494 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1495 "Failed to set WHvPartitionPropertyCodeExceptionExitBitmap to %#RX64: %Rhrc (Last=%#x/%u)",
1496 Property.ExceptionExitBitmap, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1497
1498
1499 /*
1500 * Sync CPU features with CPUM.
1501 */
1502 /** @todo sync CPU features with CPUM. */
1503
1504 /* Set the partition property. */
1505 RT_ZERO(Property);
1506 Property.ProcessorFeatures.AsUINT64 = pVM->nem.s.uCpuFeatures.u64;
1507 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorFeatures, &Property, sizeof(Property));
1508 if (FAILED(hrc))
1509 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1510 "Failed to set WHvPartitionPropertyCodeProcessorFeatures to %'#RX64: %Rhrc (Last=%#x/%u)",
1511 pVM->nem.s.uCpuFeatures.u64, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1512
1513 /*
1514 * Set up the partition.
1515 *
1516 * Seems like this is where the partition is actually instantiated and we get
1517 * a handle to it.
1518 */
1519 hrc = WHvSetupPartition(hPartition);
1520 if (FAILED(hrc))
1521 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1522 "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)",
1523 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1524
1525 /* Get the handle (could also fish this out via VID.DLL NtDeviceIoControlFile intercepting). */
1526 HANDLE hPartitionDevice;
1527 __try
1528 {
1529 hPartitionDevice = ((HANDLE *)hPartition)[1];
1530 }
1531 __except(EXCEPTION_EXECUTE_HANDLER)
1532 {
1533 hrc = GetExceptionCode();
1534 hPartitionDevice = NULL;
1535 }
1536 if ( hPartitionDevice == NULL
1537 || hPartitionDevice == (HANDLE)(intptr_t)-1)
1538 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1539 "Failed to get device handle for partition %p: %Rhrc", hPartition, hrc);
1540
1541 /* Test the handle. */
1542 HV_PARTITION_PROPERTY uValue;
1543 if (!g_pfnVidGetPartitionProperty(hPartitionDevice, HvPartitionPropertyProcessorVendor, &uValue))
1544 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1545 "Failed to get device handle and/or partition ID for %p (hPartitionDevice=%p, Last=%#x/%u)",
1546 hPartition, hPartitionDevice, RTNtLastStatusValue(), RTNtLastErrorValue());
1547 LogRel(("NEM: HvPartitionPropertyProcessorVendor=%#llx (%lld)\n", uValue, uValue));
1548
1549 /*
1550 * Get the partition ID so we can keep managing our memory the way we've
1551 * been doing for the last 12+ years.
1552 *
1553 * The WHvMapGpaRange/WHvUnmapGpaRange interface is very ill-fitting and
1554 * very inflexible compared to what we need. Fortunately, the hypervisor
1555 * have a much better interface which we are able to use from ring-0.
1556 * Not pretty, but necessary for the time being.
1557 */
1558 HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID;
1559 if (!g_pfnVidGetHvPartitionId(hPartitionDevice, &idHvPartition))
1560 {
1561 if (RTNtLastErrorValue() != ERROR_INVALID_FUNCTION) /* Will try get it later in VMMR0_DO_NEM_INIT_VM_PART_2. */
1562 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1563 "Failed to get device handle and/or partition ID for %p (hPartitionDevice=%p, Last=%#x/%u)",
1564 hPartition, hPartitionDevice, RTNtLastStatusValue(), RTNtLastErrorValue());
1565 LogRel(("NEM: VidGetHvPartitionId failed with ERROR_NOT_SUPPORTED, will try again later from ring-0...\n"));
1566 idHvPartition = HV_PARTITION_ID_INVALID;
1567 }
1568 pVM->nem.s.hPartitionDevice = hPartitionDevice;
1569 pVM->nem.s.idHvPartition = idHvPartition;
1570
1571 /*
1572 * Setup the EMTs.
1573 */
1574 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1575 {
1576 pVCpu = pVM->apCpusR3[idCpu];
1577
1578 pVCpu->nem.s.hNativeThreadHandle = (RTR3PTR)RTThreadGetNativeHandle(VMR3GetThreadHandle(pVCpu->pUVCpu));
1579 Assert((HANDLE)pVCpu->nem.s.hNativeThreadHandle != INVALID_HANDLE_VALUE);
1580
1581#ifndef NEM_WIN_USE_OUR_OWN_RUN_API
1582# ifdef NEM_WIN_WITH_RING0_RUNLOOP
1583 if (!pVM->nem.s.fUseRing0Runloop)
1584# endif
1585 {
1586 hrc = WHvCreateVirtualProcessor(hPartition, idCpu, 0 /*fFlags*/);
1587 if (FAILED(hrc))
1588 {
1589 NTSTATUS const rcNtLast = RTNtLastStatusValue();
1590 DWORD const dwErrLast = RTNtLastErrorValue();
1591 while (idCpu-- > 0)
1592 {
1593 HRESULT hrc2 = WHvDeleteVirtualProcessor(hPartition, idCpu);
1594 AssertLogRelMsg(SUCCEEDED(hrc2), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n",
1595 hPartition, idCpu, hrc2, RTNtLastStatusValue(),
1596 RTNtLastErrorValue()));
1597 }
1598 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1599 "Call to WHvCreateVirtualProcessor failed: %Rhrc (Last=%#x/%u)", hrc, rcNtLast, dwErrLast);
1600 }
1601 }
1602# ifdef NEM_WIN_WITH_RING0_RUNLOOP
1603 else
1604# endif
1605#endif /* !NEM_WIN_USE_OUR_OWN_RUN_API */
1606#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_OUR_OWN_RUN_API)
1607 {
1608 VID_MAPPED_MESSAGE_SLOT MappedMsgSlot = { NULL, UINT32_MAX, UINT32_MAX };
1609 if (g_pfnVidMessageSlotMap(hPartitionDevice, &MappedMsgSlot, idCpu))
1610 {
1611 AssertLogRelMsg(MappedMsgSlot.iCpu == idCpu && MappedMsgSlot.uParentAdvisory == UINT32_MAX,
1612 ("%#x %#x (iCpu=%#x)\n", MappedMsgSlot.iCpu, MappedMsgSlot.uParentAdvisory, idCpu));
1613 pVCpu->nem.s.pvMsgSlotMapping = MappedMsgSlot.pMsgBlock;
1614 }
1615 else
1616 {
1617 NTSTATUS const rcNtLast = RTNtLastStatusValue();
1618 DWORD const dwErrLast = RTNtLastErrorValue();
1619 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1620 "Call to VidMessageSlotMap failed: Last=%#x/%u", rcNtLast, dwErrLast);
1621 }
1622 }
1623#endif
1624 }
1625 pVM->nem.s.fCreatedEmts = true;
1626
1627 /*
1628 * Do some more ring-0 initialization now that we've got the partition handle.
1629 */
1630 int rc = VMMR3CallR0Emt(pVM, pVM->apCpusR3[0], VMMR0_DO_NEM_INIT_VM_PART_2, 0, NULL);
1631 if (RT_SUCCESS(rc))
1632 {
1633 LogRel(("NEM: Successfully set up partition (device handle %p, partition ID %#llx)\n",
1634 hPartitionDevice, pVM->nem.s.idHvPartition));
1635
1636#if 1
1637 VMMR3CallR0Emt(pVM, pVM->apCpusR3[0], VMMR0_DO_NEM_UPDATE_STATISTICS, 0, NULL);
1638 LogRel(("NEM: Memory balance: %#RX64 out of %#RX64 pages in use\n",
1639 pVM->nem.s.R0Stats.cPagesInUse, pVM->nem.s.R0Stats.cPagesAvailable));
1640#endif
1641
1642 /*
1643 * Register statistics on shared pages.
1644 */
1645 /** @todo HvCallMapStatsPage */
1646
1647 /*
1648 * Adjust features.
1649 * Note! We've already disabled X2APIC via CFGM during the first init call.
1650 */
1651
1652#if 0 && defined(DEBUG_bird)
1653 /*
1654 * Poke and probe a little.
1655 */
1656 PVMCPU pVCpu = pVM->apCpusR3[0];
1657 uint32_t aRegNames[1024];
1658 HV_REGISTER_VALUE aRegValues[1024];
1659 uint32_t aPropCodes[128];
1660 uint64_t aPropValues[128];
1661 for (int iOuter = 0; iOuter < 5; iOuter++)
1662 {
1663 LogRel(("\niOuter %d\n", iOuter));
1664# if 1
1665 /* registers */
1666 uint32_t iRegValue = 0;
1667 uint32_t cRegChanges = 0;
1668 for (uint32_t iReg = 0; iReg < 0x001101ff; iReg++)
1669 {
1670 if (iOuter != 0 && aRegNames[iRegValue] > iReg)
1671 continue;
1672 RT_ZERO(pVCpu->nem.s.Hypercall.Experiment);
1673 pVCpu->nem.s.Hypercall.Experiment.uItem = iReg;
1674 int rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 0, NULL);
1675 AssertLogRelRCBreak(rc2);
1676 if (pVCpu->nem.s.Hypercall.Experiment.fSuccess)
1677 {
1678 LogRel(("Register %#010x = %#18RX64, %#18RX64\n", iReg,
1679 pVCpu->nem.s.Hypercall.Experiment.uLoValue, pVCpu->nem.s.Hypercall.Experiment.uHiValue));
1680 if (iReg == HvX64RegisterTsc)
1681 {
1682 uint64_t uTsc = ASMReadTSC();
1683 LogRel(("TSC = %#18RX64; Delta %#18RX64 or %#18RX64\n",
1684 uTsc, pVCpu->nem.s.Hypercall.Experiment.uLoValue - uTsc, uTsc - pVCpu->nem.s.Hypercall.Experiment.uLoValue));
1685 }
1686
1687 if (iOuter == 0)
1688 aRegNames[iRegValue] = iReg;
1689 else if( aRegValues[iRegValue].Reg128.Low64 != pVCpu->nem.s.Hypercall.Experiment.uLoValue
1690 || aRegValues[iRegValue].Reg128.High64 != pVCpu->nem.s.Hypercall.Experiment.uHiValue)
1691 {
1692 LogRel(("Changed from %#18RX64, %#18RX64 !!\n",
1693 aRegValues[iRegValue].Reg128.Low64, aRegValues[iRegValue].Reg128.High64));
1694 LogRel(("Delta %#18RX64, %#18RX64 !!\n",
1695 pVCpu->nem.s.Hypercall.Experiment.uLoValue - aRegValues[iRegValue].Reg128.Low64,
1696 pVCpu->nem.s.Hypercall.Experiment.uHiValue - aRegValues[iRegValue].Reg128.High64));
1697 cRegChanges++;
1698 }
1699 aRegValues[iRegValue].Reg128.Low64 = pVCpu->nem.s.Hypercall.Experiment.uLoValue;
1700 aRegValues[iRegValue].Reg128.High64 = pVCpu->nem.s.Hypercall.Experiment.uHiValue;
1701 iRegValue++;
1702 AssertBreak(iRegValue < RT_ELEMENTS(aRegValues));
1703 }
1704 }
1705 LogRel(("Found %u registers, %u changed\n", iRegValue, cRegChanges));
1706# endif
1707# if 1
1708 /* partition properties */
1709 uint32_t iPropValue = 0;
1710 uint32_t cPropChanges = 0;
1711 for (uint32_t iProp = 0; iProp < 0xc11ff; iProp++)
1712 {
1713 if (iProp == HvPartitionPropertyDebugChannelId /* hangs host */)
1714 continue;
1715 if (iOuter != 0 && aPropCodes[iPropValue] > iProp)
1716 continue;
1717 RT_ZERO(pVCpu->nem.s.Hypercall.Experiment);
1718 pVCpu->nem.s.Hypercall.Experiment.uItem = iProp;
1719 int rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 1, NULL);
1720 AssertLogRelRCBreak(rc2);
1721 if (pVCpu->nem.s.Hypercall.Experiment.fSuccess)
1722 {
1723 LogRel(("Property %#010x = %#18RX64\n", iProp, pVCpu->nem.s.Hypercall.Experiment.uLoValue));
1724 if (iOuter == 0)
1725 aPropCodes[iPropValue] = iProp;
1726 else if (aPropValues[iPropValue] != pVCpu->nem.s.Hypercall.Experiment.uLoValue)
1727 {
1728 LogRel(("Changed from %#18RX64, delta %#18RX64!!\n",
1729 aPropValues[iPropValue], pVCpu->nem.s.Hypercall.Experiment.uLoValue - aPropValues[iPropValue]));
1730 cRegChanges++;
1731 }
1732 aPropValues[iPropValue] = pVCpu->nem.s.Hypercall.Experiment.uLoValue;
1733 iPropValue++;
1734 AssertBreak(iPropValue < RT_ELEMENTS(aPropValues));
1735 }
1736 }
1737 LogRel(("Found %u properties, %u changed\n", iPropValue, cPropChanges));
1738# endif
1739
1740 /* Modify the TSC register value and see what changes. */
1741 if (iOuter != 0)
1742 {
1743 RT_ZERO(pVCpu->nem.s.Hypercall.Experiment);
1744 pVCpu->nem.s.Hypercall.Experiment.uItem = HvX64RegisterTsc;
1745 pVCpu->nem.s.Hypercall.Experiment.uHiValue = UINT64_C(0x00000fffffffffff) >> iOuter;
1746 pVCpu->nem.s.Hypercall.Experiment.uLoValue = UINT64_C(0x0011100000000000) << iOuter;
1747 VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 2, NULL);
1748 LogRel(("Setting HvX64RegisterTsc -> %RTbool (%#RX64)\n", pVCpu->nem.s.Hypercall.Experiment.fSuccess, pVCpu->nem.s.Hypercall.Experiment.uStatus));
1749 }
1750
1751 RT_ZERO(pVCpu->nem.s.Hypercall.Experiment);
1752 pVCpu->nem.s.Hypercall.Experiment.uItem = HvX64RegisterTsc;
1753 VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 0, NULL);
1754 LogRel(("HvX64RegisterTsc = %#RX64, %#RX64\n", pVCpu->nem.s.Hypercall.Experiment.uLoValue, pVCpu->nem.s.Hypercall.Experiment.uHiValue));
1755 }
1756
1757#endif
1758 return VINF_SUCCESS;
1759 }
1760 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, "Call to NEMR0InitVMPart2 failed: %Rrc", rc);
1761}
1762
1763
1764int nemR3NativeInitCompleted(PVM pVM, VMINITCOMPLETED enmWhat)
1765{
1766 //BOOL fRet = SetThreadPriority(GetCurrentThread(), 0);
1767 //AssertLogRel(fRet);
1768
1769 NOREF(pVM); NOREF(enmWhat);
1770 return VINF_SUCCESS;
1771}
1772
1773
1774int nemR3NativeTerm(PVM pVM)
1775{
1776 /*
1777 * Delete the partition.
1778 */
1779 WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition;
1780 pVM->nem.s.hPartition = NULL;
1781 pVM->nem.s.hPartitionDevice = NULL;
1782 if (hPartition != NULL)
1783 {
1784 VMCPUID idCpu = pVM->nem.s.fCreatedEmts ? pVM->cCpus : 0;
1785 LogRel(("NEM: Destroying partition %p with its %u VCpus...\n", hPartition, idCpu));
1786 while (idCpu-- > 0)
1787 {
1788 PVMCPU pVCpu = pVM->apCpusR3[idCpu];
1789 pVCpu->nem.s.pvMsgSlotMapping = NULL;
1790#ifndef NEM_WIN_USE_OUR_OWN_RUN_API
1791# ifdef NEM_WIN_WITH_RING0_RUNLOOP
1792 if (!pVM->nem.s.fUseRing0Runloop)
1793# endif
1794 {
1795 HRESULT hrc = WHvDeleteVirtualProcessor(hPartition, idCpu);
1796 AssertLogRelMsg(SUCCEEDED(hrc), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n",
1797 hPartition, idCpu, hrc, RTNtLastStatusValue(),
1798 RTNtLastErrorValue()));
1799 }
1800#endif
1801 }
1802 WHvDeletePartition(hPartition);
1803 }
1804 pVM->nem.s.fCreatedEmts = false;
1805 return VINF_SUCCESS;
1806}
1807
1808
1809/**
1810 * VM reset notification.
1811 *
1812 * @param pVM The cross context VM structure.
1813 */
1814void nemR3NativeReset(PVM pVM)
1815{
1816 /* Unfix the A20 gate. */
1817 pVM->nem.s.fA20Fixed = false;
1818}
1819
1820
1821/**
1822 * Reset CPU due to INIT IPI or hot (un)plugging.
1823 *
1824 * @param pVCpu The cross context virtual CPU structure of the CPU being
1825 * reset.
1826 * @param fInitIpi Whether this is the INIT IPI or hot (un)plugging case.
1827 */
1828void nemR3NativeResetCpu(PVMCPU pVCpu, bool fInitIpi)
1829{
1830 /* Lock the A20 gate if INIT IPI, make sure it's enabled. */
1831 if (fInitIpi && pVCpu->idCpu > 0)
1832 {
1833 PVM pVM = pVCpu->CTX_SUFF(pVM);
1834 if (!pVM->nem.s.fA20Enabled)
1835 nemR3NativeNotifySetA20(pVCpu, true);
1836 pVM->nem.s.fA20Enabled = true;
1837 pVM->nem.s.fA20Fixed = true;
1838 }
1839}
1840
1841
1842VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu)
1843{
1844#ifdef NEM_WIN_WITH_RING0_RUNLOOP
1845 if (pVM->nem.s.fUseRing0Runloop)
1846 {
1847 for (;;)
1848 {
1849 VBOXSTRICTRC rcStrict = VMMR3CallR0EmtFast(pVM, pVCpu, VMMR0_DO_NEM_RUN);
1850 if (RT_SUCCESS(rcStrict))
1851 {
1852 /*
1853 * We deal with VINF_NEM_FLUSH_TLB here, since we're running the risk of
1854 * getting these while we already got another RC (I/O ports).
1855 */
1856 /* Status codes: */
1857 VBOXSTRICTRC rcPending = pVCpu->nem.s.rcPending;
1858 pVCpu->nem.s.rcPending = VINF_SUCCESS;
1859 if (rcStrict == VINF_NEM_FLUSH_TLB || rcPending == VINF_NEM_FLUSH_TLB)
1860 {
1861 LogFlow(("nemR3NativeRunGC: calling PGMFlushTLB...\n"));
1862 int rc = PGMFlushTLB(pVCpu, CPUMGetGuestCR3(pVCpu), true /*fGlobal*/, false /*fPdpesMapped*/);
1863 AssertRCReturn(rc, rc);
1864 if (rcStrict == VINF_NEM_FLUSH_TLB)
1865 {
1866 if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK | VM_FF_HP_R0_PRE_HM_MASK)
1867 && !VMCPU_FF_IS_ANY_SET(pVCpu, (VMCPU_FF_HIGH_PRIORITY_POST_MASK | VMCPU_FF_HP_R0_PRE_HM_MASK)
1868 & ~VMCPU_FF_RESUME_GUEST_MASK))
1869 {
1870 VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK);
1871 continue;
1872 }
1873 rcStrict = VINF_SUCCESS;
1874 }
1875 }
1876 else
1877 AssertMsg(rcPending == VINF_SUCCESS, ("rcPending=%Rrc\n", VBOXSTRICTRC_VAL(rcPending) ));
1878 }
1879 LogFlow(("nemR3NativeRunGC: returns %Rrc\n", VBOXSTRICTRC_VAL(rcStrict) ));
1880 return rcStrict;
1881 }
1882 }
1883#endif
1884 return nemHCWinRunGC(pVM, pVCpu);
1885}
1886
1887
1888bool nemR3NativeCanExecuteGuest(PVM pVM, PVMCPU pVCpu)
1889{
1890 NOREF(pVM); NOREF(pVCpu);
1891 return true;
1892}
1893
1894
1895bool nemR3NativeSetSingleInstruction(PVM pVM, PVMCPU pVCpu, bool fEnable)
1896{
1897 NOREF(pVM); NOREF(pVCpu); NOREF(fEnable);
1898 return false;
1899}
1900
1901
1902/**
1903 * Forced flag notification call from VMEmt.h.
1904 *
1905 * This is only called when pVCpu is in the VMCPUSTATE_STARTED_EXEC_NEM state.
1906 *
1907 * @param pVM The cross context VM structure.
1908 * @param pVCpu The cross context virtual CPU structure of the CPU
1909 * to be notified.
1910 * @param fFlags Notification flags, VMNOTIFYFF_FLAGS_XXX.
1911 */
1912void nemR3NativeNotifyFF(PVM pVM, PVMCPU pVCpu, uint32_t fFlags)
1913{
1914#ifdef NEM_WIN_USE_OUR_OWN_RUN_API
1915 nemHCWinCancelRunVirtualProcessor(pVM, pVCpu);
1916#else
1917# ifdef NEM_WIN_WITH_RING0_RUNLOOP
1918 if (pVM->nem.s.fUseRing0Runloop)
1919 nemHCWinCancelRunVirtualProcessor(pVM, pVCpu);
1920 else
1921# endif
1922 {
1923 Log8(("nemR3NativeNotifyFF: canceling %u\n", pVCpu->idCpu));
1924 HRESULT hrc = WHvCancelRunVirtualProcessor(pVM->nem.s.hPartition, pVCpu->idCpu, 0);
1925 AssertMsg(SUCCEEDED(hrc), ("WHvCancelRunVirtualProcessor -> hrc=%Rhrc\n", hrc));
1926 RT_NOREF_PV(hrc);
1927 }
1928#endif
1929 RT_NOREF_PV(fFlags);
1930}
1931
1932
1933DECLINLINE(int) nemR3NativeGCPhys2R3PtrReadOnly(PVM pVM, RTGCPHYS GCPhys, const void **ppv)
1934{
1935 PGMPAGEMAPLOCK Lock;
1936 int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhys, ppv, &Lock);
1937 if (RT_SUCCESS(rc))
1938 PGMPhysReleasePageMappingLock(pVM, &Lock);
1939 return rc;
1940}
1941
1942
1943DECLINLINE(int) nemR3NativeGCPhys2R3PtrWriteable(PVM pVM, RTGCPHYS GCPhys, void **ppv)
1944{
1945 PGMPAGEMAPLOCK Lock;
1946 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhys, ppv, &Lock);
1947 if (RT_SUCCESS(rc))
1948 PGMPhysReleasePageMappingLock(pVM, &Lock);
1949 return rc;
1950}
1951
1952
1953VMMR3_INT_DECL(int) NEMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvR3)
1954{
1955 Log5(("NEMR3NotifyPhysRamRegister: %RGp LB %RGp, pvR3=%p\n", GCPhys, cb, pvR3));
1956#if !defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) && defined(VBOX_WITH_PGM_NEM_MODE)
1957 if (pvR3)
1958 {
1959 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvR3, GCPhys, cb,
1960 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute);
1961 if (SUCCEEDED(hrc))
1962 { /* likely */ }
1963 else
1964 {
1965 LogRel(("NEMR3NotifyPhysRamRegister: GCPhys=%RGp LB %RGp pvR3=%p hrc=%Rhrc (%#x) Last=%#x/%u\n",
1966 GCPhys, cb, pvR3, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1967 return VERR_NEM_MAP_PAGES_FAILED;
1968 }
1969 }
1970#else
1971 RT_NOREF(pVM, GCPhys, cb, pvR3);
1972#endif
1973 return VINF_SUCCESS;
1974}
1975
1976
1977VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
1978 void *pvRam, void *pvMmio2, uint8_t *pu2State)
1979{
1980 Log5(("NEMR3NotifyPhysMmioExMapEarly: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p (%d)\n",
1981 GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, *pu2State));
1982
1983#if !defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) && defined(VBOX_WITH_PGM_NEM_MODE)
1984 /*
1985 * Unmap the RAM we're replacing.
1986 */
1987 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE)
1988 {
1989 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, cb);
1990 if (SUCCEEDED(hrc))
1991 { /* likely */ }
1992 else if (pvMmio2)
1993 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u (ignored)\n",
1994 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1995 else
1996 {
1997 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u\n",
1998 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1999 return VERR_NEM_UNMAP_PAGES_FAILED;
2000 }
2001 }
2002
2003 /*
2004 * Map MMIO2 if any.
2005 */
2006 if (pvMmio2)
2007 {
2008 Assert(fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2);
2009 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvMmio2, GCPhys, cb,
2010 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute);
2011 if (SUCCEEDED(hrc))
2012 *pu2State = NEM_WIN_PAGE_STATE_WRITABLE;
2013 else
2014 {
2015 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x pvMmio2=%p: Map -> hrc=%Rhrc (%#x) Last=%#x/%u\n",
2016 GCPhys, cb, fFlags, pvMmio2, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2017 return VERR_NEM_MAP_PAGES_FAILED;
2018 }
2019 }
2020 else
2021 {
2022 Assert(!(fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2));
2023 *pu2State = NEM_WIN_PAGE_STATE_UNMAPPED;
2024 }
2025
2026#else
2027 RT_NOREF(pVM, GCPhys, cb, pvRam, pvMmio2);
2028 *pu2State = (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE) ? UINT8_MAX : NEM_WIN_PAGE_STATE_UNMAPPED;
2029#endif
2030 return VINF_SUCCESS;
2031}
2032
2033
2034VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
2035 void *pvRam, void *pvMmio2)
2036{
2037 RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2);
2038 return VINF_SUCCESS;
2039}
2040
2041
2042VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvRam,
2043 void *pvMmio2, uint8_t *pu2State)
2044{
2045 Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p\n",
2046 GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State));
2047
2048 int rc = VINF_SUCCESS;
2049#if !defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) && defined(VBOX_WITH_PGM_NEM_MODE)
2050 /*
2051 * Unmap the MMIO2 pages.
2052 */
2053 /** @todo If we implement aliasing (MMIO2 page aliased into MMIO range),
2054 * we may have more stuff to unmap even in case of pure MMIO... */
2055 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2)
2056 {
2057 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, cb);
2058 if (FAILED(hrc))
2059 {
2060 LogRel2(("NEMR3NotifyPhysMmioExUnmap: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u (ignored)\n",
2061 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2062 rc = VERR_NEM_UNMAP_PAGES_FAILED;
2063 }
2064 }
2065
2066 /*
2067 * Restore the RAM we replaced.
2068 */
2069 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE)
2070 {
2071 AssertPtr(pvRam);
2072 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvRam, GCPhys, cb,
2073 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute);
2074 if (SUCCEEDED(hrc))
2075 { /* likely */ }
2076 else
2077 {
2078 LogRel(("NEMR3NotifyPhysMmioExUnmap: GCPhys=%RGp LB %RGp pvMmio2=%p hrc=%Rhrc (%#x) Last=%#x/%u\n",
2079 GCPhys, cb, pvMmio2, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2080 rc = VERR_NEM_MAP_PAGES_FAILED;
2081 }
2082 if (pu2State)
2083 *pu2State = NEM_WIN_PAGE_STATE_WRITABLE;
2084 }
2085 /* Mark the pages as unmapped if relevant. */
2086 else if (pu2State)
2087 *pu2State = NEM_WIN_PAGE_STATE_UNMAPPED;
2088
2089 RT_NOREF(pvMmio2);
2090#else
2091 RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State);
2092 if (pu2State)
2093 *pu2State = UINT8_MAX;
2094#endif
2095 return rc;
2096}
2097
2098
2099VMMR3_INT_DECL(int) NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages, uint32_t fFlags,
2100 uint8_t *pu2State)
2101{
2102 Log5(("nemR3NativeNotifyPhysRomRegisterEarly: %RGp LB %RGp pvPages=%p fFlags=%#x\n", GCPhys, cb, pvPages, fFlags));
2103 *pu2State = UINT8_MAX;
2104
2105#if 0 /* Let's not do this after all. We'll protection change notifications for each page and if not we'll map them lazily. */
2106 RTGCPHYS const cPages = cb >> X86_PAGE_SHIFT;
2107 for (RTGCPHYS iPage = 0; iPage < cPages; iPage++, GCPhys += X86_PAGE_SIZE)
2108 {
2109 const void *pvPage;
2110 int rc = nemR3NativeGCPhys2R3PtrReadOnly(pVM, GCPhys, &pvPage);
2111 if (RT_SUCCESS(rc))
2112 {
2113 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, (void *)pvPage, GCPhys, X86_PAGE_SIZE,
2114 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagExecute);
2115 if (SUCCEEDED(hrc))
2116 { /* likely */ }
2117 else
2118 {
2119 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n",
2120 GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2121 return VERR_NEM_INIT_FAILED;
2122 }
2123 }
2124 else
2125 {
2126 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp rc=%Rrc\n", GCPhys, rc));
2127 return rc;
2128 }
2129 }
2130 RT_NOREF_PV(fFlags);
2131#else
2132 RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
2133#endif
2134 return VINF_SUCCESS;
2135}
2136
2137
2138VMMR3_INT_DECL(int) NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
2139 uint32_t fFlags, uint8_t *pu2State)
2140{
2141 Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p\n",
2142 GCPhys, cb, pvPages, fFlags, pu2State));
2143 *pu2State = UINT8_MAX;
2144
2145#if !defined(NEM_WIN_USE_HYPERCALLS_FOR_PAGES) && defined(VBOX_WITH_PGM_NEM_MODE)
2146 /*
2147 * (Re-)map readonly.
2148 */
2149 AssertPtrReturn(pvPages, VERR_INVALID_POINTER);
2150 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvPages, GCPhys, cb, WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagExecute);
2151 if (SUCCEEDED(hrc))
2152 *pu2State = NEM_WIN_PAGE_STATE_READABLE;
2153 else
2154 {
2155 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp LB %RGp pvPages=%p fFlags=%#x hrc=%Rhrc (%#x) Last=%#x/%u\n",
2156 GCPhys, cb, pvPages, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2157 return VERR_NEM_MAP_PAGES_FAILED;
2158 }
2159 RT_NOREF(fFlags);
2160#else
2161 RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
2162#endif
2163 return VINF_SUCCESS;
2164}
2165
2166
2167/**
2168 * @callback_method_impl{FNPGMPHYSNEMCHECKPAGE}
2169 */
2170static DECLCALLBACK(int) nemR3WinUnsetForA20CheckerCallback(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys,
2171 PPGMPHYSNEMPAGEINFO pInfo, void *pvUser)
2172{
2173 /* We'll just unmap the memory. */
2174 if (pInfo->u2NemState > NEM_WIN_PAGE_STATE_UNMAPPED)
2175 {
2176#ifdef NEM_WIN_USE_HYPERCALLS_FOR_PAGES
2177 int rc = nemHCWinHypercallUnmapPage(pVM, pVCpu, GCPhys);
2178 AssertRC(rc);
2179 if (RT_SUCCESS(rc))
2180#else
2181 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, X86_PAGE_SIZE);
2182 if (SUCCEEDED(hrc))
2183#endif
2184 {
2185 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPage);
2186 uint32_t cMappedPages = ASMAtomicDecU32(&pVM->nem.s.cMappedPages); NOREF(cMappedPages);
2187 Log5(("NEM GPA unmapped/A20: %RGp (was %s, cMappedPages=%u)\n", GCPhys, g_apszPageStates[pInfo->u2NemState], cMappedPages));
2188 pInfo->u2NemState = NEM_WIN_PAGE_STATE_UNMAPPED;
2189 }
2190 else
2191 {
2192 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPageFailed);
2193#ifdef NEM_WIN_USE_HYPERCALLS_FOR_PAGES
2194 LogRel(("nemR3WinUnsetForA20CheckerCallback/unmap: GCPhys=%RGp rc=%Rrc\n", GCPhys, rc));
2195 return rc;
2196#else
2197 LogRel(("nemR3WinUnsetForA20CheckerCallback/unmap: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n",
2198 GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2199 return VERR_INTERNAL_ERROR_2;
2200#endif
2201 }
2202 }
2203 RT_NOREF(pVCpu, pvUser);
2204 return VINF_SUCCESS;
2205}
2206
2207
2208/**
2209 * Unmaps a page from Hyper-V for the purpose of emulating A20 gate behavior.
2210 *
2211 * @returns The PGMPhysNemQueryPageInfo result.
2212 * @param pVM The cross context VM structure.
2213 * @param pVCpu The cross context virtual CPU structure.
2214 * @param GCPhys The page to unmap.
2215 */
2216static int nemR3WinUnmapPageForA20Gate(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
2217{
2218 PGMPHYSNEMPAGEINFO Info;
2219 return PGMPhysNemPageInfoChecker(pVM, pVCpu, GCPhys, false /*fMakeWritable*/, &Info,
2220 nemR3WinUnsetForA20CheckerCallback, NULL);
2221}
2222
2223
2224/**
2225 * Called when the A20 state changes.
2226 *
2227 * Hyper-V doesn't seem to offer a simple way of implementing the A20 line
2228 * features of PCs. So, we do a very minimal emulation of the HMA to make DOS
2229 * happy.
2230 *
2231 * @param pVCpu The CPU the A20 state changed on.
2232 * @param fEnabled Whether it was enabled (true) or disabled.
2233 */
2234void nemR3NativeNotifySetA20(PVMCPU pVCpu, bool fEnabled)
2235{
2236 Log(("nemR3NativeNotifySetA20: fEnabled=%RTbool\n", fEnabled));
2237 PVM pVM = pVCpu->CTX_SUFF(pVM);
2238 if (!pVM->nem.s.fA20Fixed)
2239 {
2240 pVM->nem.s.fA20Enabled = fEnabled;
2241 for (RTGCPHYS GCPhys = _1M; GCPhys < _1M + _64K; GCPhys += X86_PAGE_SIZE)
2242 nemR3WinUnmapPageForA20Gate(pVM, pVCpu, GCPhys);
2243 }
2244}
2245
2246
2247/** @page pg_nem_win NEM/win - Native Execution Manager, Windows.
2248 *
2249 * On Windows the Hyper-V root partition (dom0 in zen terminology) does not have
2250 * nested VT-x or AMD-V capabilities. Early on raw-mode worked inside it, but
2251 * for a while now we've been getting \#GPs when trying to modify CR4 in the
2252 * world switcher. So, when Hyper-V is active on Windows we have little choice
2253 * but to use Hyper-V to run our VMs.
2254 *
2255 *
2256 * @section sub_nem_win_whv The WinHvPlatform API
2257 *
2258 * Since Windows 10 build 17083 there is a documented API for managing Hyper-V
2259 * VMs: header file WinHvPlatform.h and implementation in WinHvPlatform.dll.
2260 * This interface is a wrapper around the undocumented Virtualization
2261 * Infrastructure Driver (VID) API - VID.DLL and VID.SYS. The wrapper is
2262 * written in C++, namespaced, early versions (at least) was using standard C++
2263 * container templates in several places.
2264 *
2265 * When creating a VM using WHvCreatePartition, it will only create the
2266 * WinHvPlatform structures for it, to which you get an abstract pointer. The
2267 * VID API that actually creates the partition is first engaged when you call
2268 * WHvSetupPartition after first setting a lot of properties using
2269 * WHvSetPartitionProperty. Since the VID API is just a very thin wrapper
2270 * around CreateFile and NtDeviceIoControlFile, it returns an actual HANDLE for
2271 * the partition to WinHvPlatform. We fish this HANDLE out of the WinHvPlatform
2272 * partition structures because we need to talk directly to VID for reasons
2273 * we'll get to in a bit. (Btw. we could also intercept the CreateFileW or
2274 * NtDeviceIoControlFile calls from VID.DLL to get the HANDLE should fishing in
2275 * the partition structures become difficult.)
2276 *
2277 * The WinHvPlatform API requires us to both set the number of guest CPUs before
2278 * setting up the partition and call WHvCreateVirtualProcessor for each of them.
2279 * The CPU creation function boils down to a VidMessageSlotMap call that sets up
2280 * and maps a message buffer into ring-3 for async communication with hyper-V
2281 * and/or the VID.SYS thread actually running the CPU thru
2282 * WinHvRunVpDispatchLoop(). When for instance a VMEXIT is encountered, hyper-V
2283 * sends a message that the WHvRunVirtualProcessor API retrieves (and later
2284 * acknowledges) via VidMessageSlotHandleAndGetNext. Since or about build
2285 * 17757 a register page is also mapped into user space when creating the
2286 * virtual CPU. It should be noteded that WHvDeleteVirtualProcessor doesn't do
2287 * much as there seems to be no partner function VidMessagesSlotMap that
2288 * reverses what it did.
2289 *
2290 * Memory is managed thru calls to WHvMapGpaRange and WHvUnmapGpaRange (GPA does
2291 * not mean grade point average here, but rather guest physical addressspace),
2292 * which corresponds to VidCreateVaGpaRangeSpecifyUserVa and VidDestroyGpaRange
2293 * respectively. As 'UserVa' indicates, the functions works on user process
2294 * memory. The mappings are also subject to quota restrictions, so the number
2295 * of ranges are limited and probably their total size as well. Obviously
2296 * VID.SYS keeps track of the ranges, but so does WinHvPlatform, which means
2297 * there is a bit of overhead involved and quota restrctions makes sense.
2298 *
2299 * Running guest code is done through the WHvRunVirtualProcessor function. It
2300 * asynchronously starts or resumes hyper-V CPU execution and then waits for an
2301 * VMEXIT message. Hyper-V / VID.SYS will return information about the message
2302 * in the message buffer mapping, and WHvRunVirtualProcessor will convert that
2303 * finto it's own WHV_RUN_VP_EXIT_CONTEXT format.
2304 *
2305 * Other threads can interrupt the execution by using WHvCancelVirtualProcessor,
2306 * which since or about build 17757 uses VidMessageSlotHandleAndGetNext to do
2307 * the work (earlier builds would open the waiting thread, do a dummy
2308 * QueueUserAPC on it, and let it upon return use VidStopVirtualProcessor to
2309 * do the actual stopping). While there is certainly a race between cancelation
2310 * and the CPU causing a natural VMEXIT, it is not known whether this still
2311 * causes extra work on subsequent WHvRunVirtualProcessor calls (it did in and
2312 * earlier 17134).
2313 *
2314 * Registers are retrieved and set via WHvGetVirtualProcessorRegisters and
2315 * WHvSetVirtualProcessorRegisters. In addition, several VMEXITs include
2316 * essential register state in the exit context information, potentially making
2317 * it possible to emulate the instruction causing the exit without involving
2318 * WHvGetVirtualProcessorRegisters.
2319 *
2320 *
2321 * @subsection subsec_nem_win_whv_cons Issues & Feedback
2322 *
2323 * Here are some observations (mostly against build 17101):
2324 *
2325 * - The VMEXIT performance is dismal (build 17134).
2326 *
2327 * Our proof of concept implementation with a kernel runloop (i.e. not using
2328 * WHvRunVirtualProcessor and friends, but calling VID.SYS fast I/O control
2329 * entry point directly) delivers 9-10% of the port I/O performance and only
2330 * 6-7% of the MMIO performance that we have with our own hypervisor.
2331 *
2332 * When using the offical WinHvPlatform API, the numbers are %3 for port I/O
2333 * and 5% for MMIO.
2334 *
2335 * While the tests we've done are using tight tight loops only doing port I/O
2336 * and MMIO, the problem is clearly visible when running regular guest OSes.
2337 * Anything that hammers the VGA device would be suffering, for example:
2338 *
2339 * - Windows 2000 boot screen animation overloads us with MMIO exits
2340 * and won't even boot because all the time is spent in interrupt
2341 * handlers and redrawin the screen.
2342 *
2343 * - DSL 4.4 and its bootmenu logo is slower than molasses in january.
2344 *
2345 * We have not found a workaround for this yet.
2346 *
2347 * Something that might improve the issue a little is to detect blocks with
2348 * excessive MMIO and port I/O exits and emulate instructions to cover
2349 * multiple exits before letting Hyper-V have a go at the guest execution
2350 * again. This will only improve the situation under some circumstances,
2351 * since emulating instructions without recompilation can be expensive, so
2352 * there will only be real gains if the exitting instructions are tightly
2353 * packed.
2354 *
2355 * Update: Security fixes during the summer of 2018 caused the performance to
2356 * dropped even more.
2357 *
2358 * Update [build 17757]: Some performance improvements here, but they don't
2359 * yet make up for what was lost this summer.
2360 *
2361 *
2362 * - We need a way to directly modify the TSC offset (or bias if you like).
2363 *
2364 * The current approach of setting the WHvX64RegisterTsc register one by one
2365 * on each virtual CPU in sequence will introduce random inaccuracies,
2366 * especially if the thread doing the job is reschduled at a bad time.
2367 *
2368 *
2369 * - Unable to access WHvX64RegisterMsrMtrrCap (build 17134).
2370 *
2371 *
2372 * - On AMD Ryzen grub/debian 9.0 ends up with a unrecoverable exception
2373 * when IA32_MTRR_PHYSMASK0 is written.
2374 *
2375 *
2376 * - The IA32_APIC_BASE register does not work right:
2377 *
2378 * - Attempts by the guest to clear bit 11 (EN) are ignored, both the
2379 * guest and the VMM reads back the old value.
2380 *
2381 * - Attempts to modify the base address (bits NN:12) seems to be ignored
2382 * in the same way.
2383 *
2384 * - The VMM can modify both the base address as well as the the EN and
2385 * BSP bits, however this is useless if we cannot intercept the WRMSR.
2386 *
2387 * - Attempts by the guest to set the EXTD bit (X2APIC) result in \#GP(0),
2388 * while the VMM ends up with with ERROR_HV_INVALID_PARAMETER. Seems
2389 * there is no way to support X2APIC.
2390 *
2391 *
2392 * - Not sure if this is a thing, but WHvCancelVirtualProcessor seems to cause
2393 * cause a lot more spurious WHvRunVirtualProcessor returns that what we get
2394 * with the replacement code. By spurious returns we mean that the
2395 * subsequent call to WHvRunVirtualProcessor would return immediately.
2396 *
2397 * Update [build 17757]: New cancelation code might have addressed this, but
2398 * haven't had time to test it yet.
2399 *
2400 *
2401 * - There is no API for modifying protection of a page within a GPA range.
2402 *
2403 * From what we can tell, the only way to modify the protection (like readonly
2404 * -> writable, or vice versa) is to first unmap the range and then remap it
2405 * with the new protection.
2406 *
2407 * We are for instance doing this quite a bit in order to track dirty VRAM
2408 * pages. VRAM pages starts out as readonly, when the guest writes to a page
2409 * we take an exit, notes down which page it is, makes it writable and restart
2410 * the instruction. After refreshing the display, we reset all the writable
2411 * pages to readonly again, bulk fashion.
2412 *
2413 * Now to work around this issue, we do page sized GPA ranges. In addition to
2414 * add a lot of tracking overhead to WinHvPlatform and VID.SYS, this also
2415 * causes us to exceed our quota before we've even mapped a default sized
2416 * (128MB) VRAM page-by-page. So, to work around this quota issue we have to
2417 * lazily map pages and actively restrict the number of mappings.
2418 *
2419 * Our best workaround thus far is bypassing WinHvPlatform and VID entirely
2420 * when in comes to guest memory management and instead use the underlying
2421 * hypercalls (HvCallMapGpaPages, HvCallUnmapGpaPages) to do it ourselves.
2422 * (This also maps a whole lot better into our own guest page management
2423 * infrastructure.)
2424 *
2425 * Update [build 17757]: Introduces a KVM like dirty logging API which could
2426 * help tracking dirty VGA pages, while being useless for shadow ROM and
2427 * devices trying catch the guest updating descriptors and such.
2428 *
2429 *
2430 * - Observed problems doing WHvUnmapGpaRange immediately followed by
2431 * WHvMapGpaRange.
2432 *
2433 * As mentioned above, we've been forced to use this sequence when modifying
2434 * page protection. However, when transitioning from readonly to writable,
2435 * we've ended up looping forever with the same write to readonly memory
2436 * VMEXIT. We're wondering if this issue might be related to the lazy mapping
2437 * logic in WinHvPlatform.
2438 *
2439 * Workaround: Insert a WHvRunVirtualProcessor call and make sure to get a GPA
2440 * unmapped exit between the two calls. Not entirely great performance wise
2441 * (or the santity of our code).
2442 *
2443 *
2444 * - Implementing A20 gate behavior is tedious, where as correctly emulating the
2445 * A20M# pin (present on 486 and later) is near impossible for SMP setups
2446 * (e.g. possiblity of two CPUs with different A20 status).
2447 *
2448 * Workaround: Only do A20 on CPU 0, restricting the emulation to HMA. We
2449 * unmap all pages related to HMA (0x100000..0x10ffff) when the A20 state
2450 * changes, lazily syncing the right pages back when accessed.
2451 *
2452 *
2453 * - WHVRunVirtualProcessor wastes time converting VID/Hyper-V messages to its
2454 * own format (WHV_RUN_VP_EXIT_CONTEXT).
2455 *
2456 * We understand this might be because Microsoft wishes to remain free to
2457 * modify the VID/Hyper-V messages, but it's still rather silly and does slow
2458 * things down a little. We'd much rather just process the messages directly.
2459 *
2460 *
2461 * - WHVRunVirtualProcessor would've benefited from using a callback interface:
2462 *
2463 * - The potential size changes of the exit context structure wouldn't be
2464 * an issue, since the function could manage that itself.
2465 *
2466 * - State handling could probably be simplified (like cancelation).
2467 *
2468 *
2469 * - WHvGetVirtualProcessorRegisters and WHvSetVirtualProcessorRegisters
2470 * internally converts register names, probably using temporary heap buffers.
2471 *
2472 * From the looks of things, they are converting from WHV_REGISTER_NAME to
2473 * HV_REGISTER_NAME from in the "Virtual Processor Register Names" section in
2474 * the "Hypervisor Top-Level Functional Specification" document. This feels
2475 * like an awful waste of time.
2476 *
2477 * We simply cannot understand why HV_REGISTER_NAME isn't used directly here,
2478 * or at least the same values, making any conversion reduntant. Restricting
2479 * access to certain registers could easily be implement by scanning the
2480 * inputs.
2481 *
2482 * To avoid the heap + conversion overhead, we're currently using the
2483 * HvCallGetVpRegisters and HvCallSetVpRegisters calls directly, at least for
2484 * the ring-0 code.
2485 *
2486 * Update [build 17757]: Register translation has been very cleverly
2487 * optimized and made table driven (2 top level tables, 4 + 1 leaf tables).
2488 * Register information consists of the 32-bit HV register name, register page
2489 * offset, and flags (giving valid offset, size and more). Register
2490 * getting/settings seems to be done by hoping that the register page provides
2491 * it all, and falling back on the VidSetVirtualProcessorState if one or more
2492 * registers are not available there.
2493 *
2494 * Note! We have currently not updated our ring-0 code to take the register
2495 * page into account, so it's suffering a little compared to the ring-3 code
2496 * that now uses the offical APIs for registers.
2497 *
2498 *
2499 * - The YMM and XCR0 registers are not yet named (17083). This probably
2500 * wouldn't be a problem if HV_REGISTER_NAME was used, see previous point.
2501 *
2502 * Update [build 17757]: XCR0 is added. YMM register values seems to be put
2503 * into a yet undocumented XsaveState interface. Approach is a little bulky,
2504 * but saves number of enums and dispenses with register transation. Also,
2505 * the underlying Vid setter API duplicates the input buffer on the heap,
2506 * adding a 16 byte header.
2507 *
2508 *
2509 * - Why does VID.SYS only query/set 32 registers at the time thru the
2510 * HvCallGetVpRegisters and HvCallSetVpRegisters hypercalls?
2511 *
2512 * We've not trouble getting/setting all the registers defined by
2513 * WHV_REGISTER_NAME in one hypercall (around 80). Some kind of stack
2514 * buffering or similar?
2515 *
2516 *
2517 * - To handle the VMMCALL / VMCALL instructions, it seems we need to intercept
2518 * \#UD exceptions and inspect the opcodes. A dedicated exit for hypercalls
2519 * would be more efficient, esp. for guests using \#UD for other purposes..
2520 *
2521 *
2522 * - Wrong instruction length in the VpContext with unmapped GPA memory exit
2523 * contexts on 17115/AMD.
2524 *
2525 * One byte "PUSH CS" was reported as 2 bytes, while a two byte
2526 * "MOV [EBX],EAX" was reported with a 1 byte instruction length. Problem
2527 * naturally present in untranslated hyper-v messages.
2528 *
2529 *
2530 * - The I/O port exit context information seems to be missing the address size
2531 * information needed for correct string I/O emulation.
2532 *
2533 * VT-x provides this information in bits 7:9 in the instruction information
2534 * field on newer CPUs. AMD-V in bits 7:9 in the EXITINFO1 field in the VMCB.
2535 *
2536 * We can probably work around this by scanning the instruction bytes for
2537 * address size prefixes. Haven't investigated it any further yet.
2538 *
2539 *
2540 * - Querying WHvCapabilityCodeExceptionExitBitmap returns zero even when
2541 * intercepts demonstrably works (17134).
2542 *
2543 *
2544 * - Querying HvPartitionPropertyDebugChannelId via HvCallGetPartitionProperty
2545 * (hypercall) hangs the host (17134).
2546 *
2547 * - CommonUtilities::GuidToString needs a 'static' before the hex digit array,
2548 * looks pointless to re-init a stack copy it for each call (novice mistake).
2549 *
2550 *
2551 * Old concerns that have been addressed:
2552 *
2553 * - The WHvCancelVirtualProcessor API schedules a dummy usermode APC callback
2554 * in order to cancel any current or future alertable wait in VID.SYS during
2555 * the VidMessageSlotHandleAndGetNext call.
2556 *
2557 * IIRC this will make the kernel schedule the specified callback thru
2558 * NTDLL!KiUserApcDispatcher by modifying the thread context and quite
2559 * possibly the userland thread stack. When the APC callback returns to
2560 * KiUserApcDispatcher, it will call NtContinue to restore the old thread
2561 * context and resume execution from there. This naturally adds up to some
2562 * CPU cycles, ring transitions aren't for free, especially after Spectre &
2563 * Meltdown mitigations.
2564 *
2565 * Using NtAltertThread call could do the same without the thread context
2566 * modifications and the extra kernel call.
2567 *
2568 * Update: All concerns have addressed in or about build 17757.
2569 *
2570 * The WHvCancelVirtualProcessor API is now implemented using a new
2571 * VidMessageSlotHandleAndGetNext() flag (4). Codepath is slightly longer
2572 * than NtAlertThread, but has the added benefit that spurious wakeups can be
2573 * more easily reduced.
2574 *
2575 *
2576 * - When WHvRunVirtualProcessor returns without a message, or on a terse
2577 * VID message like HLT, it will make a kernel call to get some registers.
2578 * This is potentially inefficient if the caller decides he needs more
2579 * register state.
2580 *
2581 * It would be better to just return what's available and let the caller fetch
2582 * what is missing from his point of view in a single kernel call.
2583 *
2584 * Update: All concerns have been addressed in or about build 17757. Selected
2585 * registers are now available via shared memory and thus HLT should (not
2586 * verified) no longer require a system call to compose the exit context data.
2587 *
2588 *
2589 * - The WHvRunVirtualProcessor implementation does lazy GPA range mappings when
2590 * a unmapped GPA message is received from hyper-V.
2591 *
2592 * Since MMIO is currently realized as unmapped GPA, this will slow down all
2593 * MMIO accesses a tiny little bit as WHvRunVirtualProcessor looks up the
2594 * guest physical address to check if it is a pending lazy mapping.
2595 *
2596 * The lazy mapping feature makes no sense to us. We as API user have all the
2597 * information and can do lazy mapping ourselves if we want/have to (see next
2598 * point).
2599 *
2600 * Update: All concerns have been addressed in or about build 17757.
2601 *
2602 *
2603 * - The WHvGetCapability function has a weird design:
2604 * - The CapabilityCode parameter is pointlessly duplicated in the output
2605 * structure (WHV_CAPABILITY).
2606 *
2607 * - API takes void pointer, but everyone will probably be using
2608 * WHV_CAPABILITY due to WHV_CAPABILITY::CapabilityCode making it
2609 * impractical to use anything else.
2610 *
2611 * - No output size.
2612 *
2613 * - See GetFileAttributesEx, GetFileInformationByHandleEx,
2614 * FindFirstFileEx, and others for typical pattern for generic
2615 * information getters.
2616 *
2617 * Update: All concerns have been addressed in build 17110.
2618 *
2619 *
2620 * - The WHvGetPartitionProperty function uses the same weird design as
2621 * WHvGetCapability, see above.
2622 *
2623 * Update: All concerns have been addressed in build 17110.
2624 *
2625 *
2626 * - The WHvSetPartitionProperty function has a totally weird design too:
2627 * - In contrast to its partner WHvGetPartitionProperty, the property code
2628 * is not a separate input parameter here but part of the input
2629 * structure.
2630 *
2631 * - The input structure is a void pointer rather than a pointer to
2632 * WHV_PARTITION_PROPERTY which everyone probably will be using because
2633 * of the WHV_PARTITION_PROPERTY::PropertyCode field.
2634 *
2635 * - Really, why use PVOID for the input when the function isn't accepting
2636 * minimal sizes. E.g. WHVPartitionPropertyCodeProcessorClFlushSize only
2637 * requires a 9 byte input, but the function insists on 16 bytes (17083).
2638 *
2639 * - See GetFileAttributesEx, SetFileInformationByHandle, FindFirstFileEx,
2640 * and others for typical pattern for generic information setters and
2641 * getters.
2642 *
2643 * Update: All concerns have been addressed in build 17110.
2644 *
2645 *
2646 *
2647 * @section sec_nem_win_impl Our implementation.
2648 *
2649 * We set out with the goal of wanting to run as much as possible in ring-0,
2650 * reasoning that this would give use the best performance.
2651 *
2652 * This goal was approached gradually, starting out with a pure WinHvPlatform
2653 * implementation, gradually replacing parts: register access, guest memory
2654 * handling, running virtual processors. Then finally moving it all into
2655 * ring-0, while keeping most of it configurable so that we could make
2656 * comparisons (see NEMInternal.h and nemR3NativeRunGC()).
2657 *
2658 *
2659 * @subsection subsect_nem_win_impl_ioctl VID.SYS I/O control calls
2660 *
2661 * To run things in ring-0 we need to talk directly to VID.SYS thru its I/O
2662 * control interface. Looking at changes between like build 17083 and 17101 (if
2663 * memory serves) a set of the VID I/O control numbers shifted a little, which
2664 * means we need to determin them dynamically. We currently do this by hooking
2665 * the NtDeviceIoControlFile API call from VID.DLL and snooping up the
2666 * parameters when making dummy calls to relevant APIs. (We could also
2667 * disassemble the relevant APIs and try fish out the information from that, but
2668 * this is way simpler.)
2669 *
2670 * Issuing I/O control calls from ring-0 is facing a small challenge with
2671 * respect to direct buffering. When using direct buffering the device will
2672 * typically check that the buffer is actually in the user address space range
2673 * and reject kernel addresses. Fortunately, we've got the cross context VM
2674 * structure that is mapped into both kernel and user space, it's also locked
2675 * and safe to access from kernel space. So, we place the I/O control buffers
2676 * in the per-CPU part of it (NEMCPU::uIoCtlBuf) and give the driver the user
2677 * address if direct access buffering or kernel address if not.
2678 *
2679 * The I/O control calls are 'abstracted' in the support driver, see
2680 * SUPR0IoCtlSetupForHandle(), SUPR0IoCtlPerform() and SUPR0IoCtlCleanup().
2681 *
2682 *
2683 * @subsection subsect_nem_win_impl_cpumctx CPUMCTX
2684 *
2685 * Since the CPU state needs to live in Hyper-V when executing, we probably
2686 * should not transfer more than necessary when handling VMEXITs. To help us
2687 * manage this CPUMCTX got a new field CPUMCTX::fExtrn that to indicate which
2688 * part of the state is currently externalized (== in Hyper-V).
2689 *
2690 *
2691 * @subsection sec_nem_win_benchmarks Benchmarks.
2692 *
2693 * @subsubsection subsect_nem_win_benchmarks_bs2t1 17134/2018-06-22: Bootsector2-test1
2694 *
2695 * This is ValidationKit/bootsectors/bootsector2-test1.asm as of 2018-06-22
2696 * (internal r123172) running a the release build of VirtualBox from the same
2697 * source, though with exit optimizations disabled. Host is AMD Threadripper 1950X
2698 * running out an up to date 64-bit Windows 10 build 17134.
2699 *
2700 * The base line column is using the official WinHv API for everything but physical
2701 * memory mapping. The 2nd column is the default NEM/win configuration where we
2702 * put the main execution loop in ring-0, using hypercalls when we can and VID for
2703 * managing execution. The 3rd column is regular VirtualBox using AMD-V directly,
2704 * hyper-V is disabled, main execution loop in ring-0.
2705 *
2706 * @verbatim
2707TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V
2708 32-bit paged protected mode, CPUID : 108 874 ins/sec 113% / 123 602 1198% / 1 305 113
2709 32-bit pae protected mode, CPUID : 106 722 ins/sec 115% / 122 740 1232% / 1 315 201
2710 64-bit long mode, CPUID : 106 798 ins/sec 114% / 122 111 1198% / 1 280 404
2711 16-bit unpaged protected mode, CPUID : 106 835 ins/sec 114% / 121 994 1216% / 1 299 665
2712 32-bit unpaged protected mode, CPUID : 105 257 ins/sec 115% / 121 772 1235% / 1 300 860
2713 real mode, CPUID : 104 507 ins/sec 116% / 121 800 1228% / 1 283 848
2714CPUID EAX=1 : PASSED
2715 32-bit paged protected mode, RDTSC : 99 581 834 ins/sec 100% / 100 323 307 93% / 93 473 299
2716 32-bit pae protected mode, RDTSC : 99 620 585 ins/sec 100% / 99 960 952 84% / 83 968 839
2717 64-bit long mode, RDTSC : 100 540 009 ins/sec 100% / 100 946 372 93% / 93 652 826
2718 16-bit unpaged protected mode, RDTSC : 99 688 473 ins/sec 100% / 100 097 751 76% / 76 281 287
2719 32-bit unpaged protected mode, RDTSC : 98 385 857 ins/sec 102% / 100 510 404 94% / 93 379 536
2720 real mode, RDTSC : 100 087 967 ins/sec 101% / 101 386 138 93% / 93 234 999
2721RDTSC : PASSED
2722 32-bit paged protected mode, Read CR4 : 2 156 102 ins/sec 98% / 2 121 967 17114% / 369 009 009
2723 32-bit pae protected mode, Read CR4 : 2 163 820 ins/sec 98% / 2 133 804 17469% / 377 999 261
2724 64-bit long mode, Read CR4 : 2 164 822 ins/sec 98% / 2 128 698 18875% / 408 619 313
2725 16-bit unpaged protected mode, Read CR4 : 2 162 367 ins/sec 100% / 2 168 508 17132% / 370 477 568
2726 32-bit unpaged protected mode, Read CR4 : 2 163 189 ins/sec 100% / 2 169 808 16768% / 362 734 679
2727 real mode, Read CR4 : 2 162 436 ins/sec 100% / 2 164 914 15551% / 336 288 998
2728Read CR4 : PASSED
2729 real mode, 32-bit IN : 104 649 ins/sec 118% / 123 513 1028% / 1 075 831
2730 real mode, 32-bit OUT : 107 102 ins/sec 115% / 123 660 982% / 1 052 259
2731 real mode, 32-bit IN-to-ring-3 : 105 697 ins/sec 98% / 104 471 201% / 213 216
2732 real mode, 32-bit OUT-to-ring-3 : 105 830 ins/sec 98% / 104 598 198% / 210 495
2733 16-bit unpaged protected mode, 32-bit IN : 104 855 ins/sec 117% / 123 174 1029% / 1 079 591
2734 16-bit unpaged protected mode, 32-bit OUT : 107 529 ins/sec 115% / 124 250 992% / 1 067 053
2735 16-bit unpaged protected mode, 32-bit IN-to-ring-3 : 106 337 ins/sec 103% / 109 565 196% / 209 367
2736 16-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 107 558 ins/sec 100% / 108 237 191% / 206 387
2737 32-bit unpaged protected mode, 32-bit IN : 106 351 ins/sec 116% / 123 584 1016% / 1 081 325
2738 32-bit unpaged protected mode, 32-bit OUT : 106 424 ins/sec 116% / 124 252 995% / 1 059 408
2739 32-bit unpaged protected mode, 32-bit IN-to-ring-3 : 104 035 ins/sec 101% / 105 305 202% / 210 750
2740 32-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 103 831 ins/sec 102% / 106 919 205% / 213 198
2741 32-bit paged protected mode, 32-bit IN : 103 356 ins/sec 119% / 123 870 1041% / 1 076 463
2742 32-bit paged protected mode, 32-bit OUT : 107 177 ins/sec 115% / 124 302 998% / 1 069 655
2743 32-bit paged protected mode, 32-bit IN-to-ring-3 : 104 491 ins/sec 100% / 104 744 200% / 209 264
2744 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 106 603 ins/sec 97% / 103 849 197% / 210 219
2745 32-bit pae protected mode, 32-bit IN : 105 923 ins/sec 115% / 122 759 1041% / 1 103 261
2746 32-bit pae protected mode, 32-bit OUT : 107 083 ins/sec 117% / 126 057 1024% / 1 096 667
2747 32-bit pae protected mode, 32-bit IN-to-ring-3 : 106 114 ins/sec 97% / 103 496 199% / 211 312
2748 32-bit pae protected mode, 32-bit OUT-to-ring-3 : 105 675 ins/sec 96% / 102 096 198% / 209 890
2749 64-bit long mode, 32-bit IN : 105 800 ins/sec 113% / 120 006 1013% / 1 072 116
2750 64-bit long mode, 32-bit OUT : 105 635 ins/sec 113% / 120 375 997% / 1 053 655
2751 64-bit long mode, 32-bit IN-to-ring-3 : 105 274 ins/sec 95% / 100 763 197% / 208 026
2752 64-bit long mode, 32-bit OUT-to-ring-3 : 106 262 ins/sec 94% / 100 749 196% / 209 288
2753NOP I/O Port Access : PASSED
2754 32-bit paged protected mode, 32-bit read : 57 687 ins/sec 119% / 69 136 1197% / 690 548
2755 32-bit paged protected mode, 32-bit write : 57 957 ins/sec 118% / 68 935 1183% / 685 930
2756 32-bit paged protected mode, 32-bit read-to-ring-3 : 57 958 ins/sec 95% / 55 432 276% / 160 505
2757 32-bit paged protected mode, 32-bit write-to-ring-3 : 57 922 ins/sec 100% / 58 340 304% / 176 464
2758 32-bit pae protected mode, 32-bit read : 57 478 ins/sec 119% / 68 453 1141% / 656 159
2759 32-bit pae protected mode, 32-bit write : 57 226 ins/sec 118% / 68 097 1157% / 662 504
2760 32-bit pae protected mode, 32-bit read-to-ring-3 : 57 582 ins/sec 94% / 54 651 268% / 154 867
2761 32-bit pae protected mode, 32-bit write-to-ring-3 : 57 697 ins/sec 100% / 57 750 299% / 173 030
2762 64-bit long mode, 32-bit read : 57 128 ins/sec 118% / 67 779 1071% / 611 949
2763 64-bit long mode, 32-bit write : 57 127 ins/sec 118% / 67 632 1084% / 619 395
2764 64-bit long mode, 32-bit read-to-ring-3 : 57 181 ins/sec 94% / 54 123 265% / 151 937
2765 64-bit long mode, 32-bit write-to-ring-3 : 57 297 ins/sec 99% / 57 286 294% / 168 694
2766 16-bit unpaged protected mode, 32-bit read : 58 827 ins/sec 118% / 69 545 1185% / 697 602
2767 16-bit unpaged protected mode, 32-bit write : 58 678 ins/sec 118% / 69 442 1183% / 694 387
2768 16-bit unpaged protected mode, 32-bit read-to-ring-3 : 57 841 ins/sec 96% / 55 730 275% / 159 163
2769 16-bit unpaged protected mode, 32-bit write-to-ring-3 : 57 855 ins/sec 101% / 58 834 304% / 176 169
2770 32-bit unpaged protected mode, 32-bit read : 58 063 ins/sec 120% / 69 690 1233% / 716 444
2771 32-bit unpaged protected mode, 32-bit write : 57 936 ins/sec 120% / 69 633 1199% / 694 753
2772 32-bit unpaged protected mode, 32-bit read-to-ring-3 : 58 451 ins/sec 96% / 56 183 273% / 159 972
2773 32-bit unpaged protected mode, 32-bit write-to-ring-3 : 58 962 ins/sec 99% / 58 955 298% / 175 936
2774 real mode, 32-bit read : 58 571 ins/sec 118% / 69 478 1160% / 679 917
2775 real mode, 32-bit write : 58 418 ins/sec 118% / 69 320 1185% / 692 513
2776 real mode, 32-bit read-to-ring-3 : 58 072 ins/sec 96% / 55 751 274% / 159 145
2777 real mode, 32-bit write-to-ring-3 : 57 870 ins/sec 101% / 58 755 307% / 178 042
2778NOP MMIO Access : PASSED
2779SUCCESS
2780 * @endverbatim
2781 *
2782 * What we see here is:
2783 *
2784 * - The WinHv API approach is 10 to 12 times slower for exits we can
2785 * handle directly in ring-0 in the VBox AMD-V code.
2786 *
2787 * - The WinHv API approach is 2 to 3 times slower for exits we have to
2788 * go to ring-3 to handle with the VBox AMD-V code.
2789 *
2790 * - By using hypercalls and VID.SYS from ring-0 we gain between
2791 * 13% and 20% over the WinHv API on exits handled in ring-0.
2792 *
2793 * - For exits requiring ring-3 handling are between 6% slower and 3% faster
2794 * than the WinHv API.
2795 *
2796 *
2797 * As a side note, it looks like Hyper-V doesn't let the guest read CR4 but
2798 * triggers exits all the time. This isn't all that important these days since
2799 * OSes like Linux cache the CR4 value specifically to avoid these kinds of exits.
2800 *
2801 *
2802 * @subsubsection subsect_nem_win_benchmarks_bs2t1u1 17134/2018-10-02: Bootsector2-test1
2803 *
2804 * Update on 17134. While expectantly testing a couple of newer builds (17758,
2805 * 17763) hoping for some increases in performance, the numbers turned out
2806 * altogether worse than the June test run. So, we went back to the 1803
2807 * (17134) installation, made sure it was fully up to date (as per 2018-10-02)
2808 * and re-tested.
2809 *
2810 * The numbers had somehow turned significantly worse over the last 3-4 months,
2811 * dropping around 70% for the WinHv API test, more for Hypercalls + VID.
2812 *
2813 * @verbatim
2814TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V *
2815 32-bit paged protected mode, CPUID : 33 270 ins/sec 33 154
2816 real mode, CPUID : 33 534 ins/sec 32 711
2817 [snip]
2818 32-bit paged protected mode, RDTSC : 102 216 011 ins/sec 98 225 419
2819 real mode, RDTSC : 102 492 243 ins/sec 98 225 419
2820 [snip]
2821 32-bit paged protected mode, Read CR4 : 2 096 165 ins/sec 2 123 815
2822 real mode, Read CR4 : 2 081 047 ins/sec 2 075 151
2823 [snip]
2824 32-bit paged protected mode, 32-bit IN : 32 739 ins/sec 33 655
2825 32-bit paged protected mode, 32-bit OUT : 32 702 ins/sec 33 777
2826 32-bit paged protected mode, 32-bit IN-to-ring-3 : 32 579 ins/sec 29 985
2827 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 32 750 ins/sec 29 757
2828 [snip]
2829 32-bit paged protected mode, 32-bit read : 20 042 ins/sec 21 489
2830 32-bit paged protected mode, 32-bit write : 20 036 ins/sec 21 493
2831 32-bit paged protected mode, 32-bit read-to-ring-3 : 19 985 ins/sec 19 143
2832 32-bit paged protected mode, 32-bit write-to-ring-3 : 19 972 ins/sec 19 595
2833
2834 * @endverbatim
2835 *
2836 * Suspects are security updates and/or microcode updates installed since then.
2837 * Given that the RDTSC and CR4 numbers are reasonably unchanges, it seems that
2838 * the Hyper-V core loop (in hvax64.exe) aren't affected. Our ring-0 runloop
2839 * is equally affected as the ring-3 based runloop, so it cannot be ring
2840 * switching as such (unless the ring-0 loop is borked and we didn't notice yet).
2841 *
2842 * The issue is probably in the thread / process switching area, could be
2843 * something special for hyper-V interrupt delivery or worker thread switching.
2844 *
2845 * Really wish this thread ping-pong going on in VID.SYS could be eliminated!
2846 *
2847 *
2848 * @subsubsection subsect_nem_win_benchmarks_bs2t1u2 17763: Bootsector2-test1
2849 *
2850 * Some preliminary numbers for build 17763 on the 3.4 GHz AMD 1950X, the second
2851 * column will improve we get time to have a look the register page.
2852 *
2853 * There is a 50% performance loss here compared to the June numbers with
2854 * build 17134. The RDTSC numbers hits that it isn't in the Hyper-V core
2855 * (hvax64.exe), but something on the NT side.
2856 *
2857 * Clearing bit 20 in nt!KiSpeculationFeatures speeds things up (i.e. changing
2858 * the dword from 0x00300065 to 0x00200065 in windbg). This is checked by
2859 * nt!KePrepareToDispatchVirtualProcessor, making it a no-op if the flag is
2860 * clear. winhvr!WinHvpVpDispatchLoop call that function before making
2861 * hypercall 0xc2, which presumably does the heavy VCpu lifting in hvcax64.exe.
2862 *
2863 * @verbatim
2864TESTING... WinHv API Hypercalls + VID clr(bit-20) + WinHv API
2865 32-bit paged protected mode, CPUID : 54 145 ins/sec 51 436 130 076
2866 real mode, CPUID : 54 178 ins/sec 51 713 130 449
2867 [snip]
2868 32-bit paged protected mode, RDTSC : 98 927 639 ins/sec 100 254 552 100 549 882
2869 real mode, RDTSC : 99 601 206 ins/sec 100 886 699 100 470 957
2870 [snip]
2871 32-bit paged protected mode, 32-bit IN : 54 621 ins/sec 51 524 128 294
2872 32-bit paged protected mode, 32-bit OUT : 54 870 ins/sec 51 671 129 397
2873 32-bit paged protected mode, 32-bit IN-to-ring-3 : 54 624 ins/sec 43 964 127 874
2874 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 54 803 ins/sec 44 087 129 443
2875 [snip]
2876 32-bit paged protected mode, 32-bit read : 28 230 ins/sec 34 042 48 113
2877 32-bit paged protected mode, 32-bit write : 27 962 ins/sec 34 050 48 069
2878 32-bit paged protected mode, 32-bit read-to-ring-3 : 27 841 ins/sec 28 397 48 146
2879 32-bit paged protected mode, 32-bit write-to-ring-3 : 27 896 ins/sec 29 455 47 970
2880 * @endverbatim
2881 *
2882 *
2883 * @subsubsection subsect_nem_win_benchmarks_w2k 17134/2018-06-22: Windows 2000 Boot & Shutdown
2884 *
2885 * Timing the startup and automatic shutdown of a Windows 2000 SP4 guest serves
2886 * as a real world benchmark and example of why exit performance is import. When
2887 * Windows 2000 boots up is doing a lot of VGA redrawing of the boot animation,
2888 * which is very costly. Not having installed guest additions leaves it in a VGA
2889 * mode after the bootup sequence is done, keep up the screen access expenses,
2890 * though the graphics driver more economical than the bootvid code.
2891 *
2892 * The VM was configured to automatically logon. A startup script was installed
2893 * to perform the automatic shuting down and powering off the VM (thru
2894 * vts_shutdown.exe -f -p). An offline snapshot of the VM was taken an restored
2895 * before each test run. The test time run time is calculated from the monotonic
2896 * VBox.log timestamps, starting with the state change to 'RUNNING' and stopping
2897 * at 'POWERING_OFF'.
2898 *
2899 * The host OS and VirtualBox build is the same as for the bootsector2-test1
2900 * scenario.
2901 *
2902 * Results:
2903 *
2904 * - WinHv API for all but physical page mappings:
2905 * 32 min 12.19 seconds
2906 *
2907 * - The default NEM/win configuration where we put the main execution loop
2908 * in ring-0, using hypercalls when we can and VID for managing execution:
2909 * 3 min 23.18 seconds
2910 *
2911 * - Regular VirtualBox using AMD-V directly, hyper-V is disabled, main
2912 * execution loop in ring-0:
2913 * 58.09 seconds
2914 *
2915 * - WinHv API with exit history based optimizations:
2916 * 58.66 seconds
2917 *
2918 * - Hypercall + VID.SYS with exit history base optimizations:
2919 * 58.94 seconds
2920 *
2921 * With a well above average machine needing over half an hour for booting a
2922 * nearly 20 year old guest kind of says it all. The 13%-20% exit performance
2923 * increase we get by using hypercalls and VID.SYS directly pays off a lot here.
2924 * The 3m23s is almost acceptable in comparison to the half an hour.
2925 *
2926 * The similarity between the last three results strongly hits at windows 2000
2927 * doing a lot of waiting during boot and shutdown and isn't the best testcase
2928 * once a basic performance level is reached.
2929 *
2930 *
2931 * @subsubsection subsection_iem_win_benchmarks_deb9_nat Debian 9 NAT performance
2932 *
2933 * This benchmark is about network performance over NAT from a 64-bit Debian 9
2934 * VM with a single CPU. For network performance measurements, we use our own
2935 * NetPerf tool (ValidationKit/utils/network/NetPerf.cpp) to measure latency
2936 * and throughput.
2937 *
2938 * The setups, builds and configurations are as in the previous benchmarks
2939 * (release r123172 on 1950X running 64-bit W10/17134 (2016-06-xx). Please note
2940 * that the exit optimizations hasn't yet been in tuned with NetPerf in mind.
2941 *
2942 * The NAT network setup was selected here since it's the default one and the
2943 * slowest one. There is quite a bit of IPC with worker threads and packet
2944 * processing involved.
2945 *
2946 * Latency test is first up. This is a classic back and forth between the two
2947 * NetPerf instances, where the key measurement is the roundrip latency. The
2948 * values here are the lowest result over 3-6 runs.
2949 *
2950 * Against host system:
2951 * - 152 258 ns/roundtrip - 100% - regular VirtualBox SVM
2952 * - 271 059 ns/roundtrip - 178% - Hypercalls + VID.SYS in ring-0 with exit optimizations.
2953 * - 280 149 ns/roundtrip - 184% - Hypercalls + VID.SYS in ring-0
2954 * - 317 735 ns/roundtrip - 209% - Win HV API with exit optimizations.
2955 * - 342 440 ns/roundtrip - 225% - Win HV API
2956 *
2957 * Against a remote Windows 10 system over a 10Gbps link:
2958 * - 243 969 ns/roundtrip - 100% - regular VirtualBox SVM
2959 * - 384 427 ns/roundtrip - 158% - Win HV API with exit optimizations.
2960 * - 402 411 ns/roundtrip - 165% - Hypercalls + VID.SYS in ring-0
2961 * - 406 313 ns/roundtrip - 167% - Win HV API
2962 * - 413 160 ns/roundtrip - 169% - Hypercalls + VID.SYS in ring-0 with exit optimizations.
2963 *
2964 * What we see here is:
2965 *
2966 * - Consistent and signficant latency increase using Hyper-V compared
2967 * to directly harnessing AMD-V ourselves.
2968 *
2969 * - When talking to the host, it's clear that the hypercalls + VID.SYS
2970 * in ring-0 method pays off.
2971 *
2972 * - When talking to a different host, the numbers are closer and it
2973 * is not longer clear which Hyper-V execution method is better.
2974 *
2975 *
2976 * Throughput benchmarks are performed by one side pushing data full throttle
2977 * for 10 seconds (minus a 1 second at each end of the test), then reversing
2978 * the roles and measuring it in the other direction. The tests ran 3-5 times
2979 * and below are the highest and lowest results in each direction.
2980 *
2981 * Receiving from host system:
2982 * - Regular VirtualBox SVM:
2983 * Max: 96 907 549 bytes/s - 100%
2984 * Min: 86 912 095 bytes/s - 100%
2985 * - Hypercalls + VID.SYS in ring-0:
2986 * Max: 84 036 544 bytes/s - 87%
2987 * Min: 64 978 112 bytes/s - 75%
2988 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2989 * Max: 77 760 699 bytes/s - 80%
2990 * Min: 72 677 171 bytes/s - 84%
2991 * - Win HV API with exit optimizations:
2992 * Max: 64 465 905 bytes/s - 67%
2993 * Min: 62 286 369 bytes/s - 72%
2994 * - Win HV API:
2995 * Max: 62 466 631 bytes/s - 64%
2996 * Min: 61 362 782 bytes/s - 70%
2997 *
2998 * Sending to the host system:
2999 * - Regular VirtualBox SVM:
3000 * Max: 87 728 652 bytes/s - 100%
3001 * Min: 86 923 198 bytes/s - 100%
3002 * - Hypercalls + VID.SYS in ring-0:
3003 * Max: 84 280 749 bytes/s - 96%
3004 * Min: 78 369 842 bytes/s - 90%
3005 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
3006 * Max: 84 119 932 bytes/s - 96%
3007 * Min: 77 396 811 bytes/s - 89%
3008 * - Win HV API:
3009 * Max: 81 714 377 bytes/s - 93%
3010 * Min: 78 697 419 bytes/s - 91%
3011 * - Win HV API with exit optimizations:
3012 * Max: 80 502 488 bytes/s - 91%
3013 * Min: 71 164 978 bytes/s - 82%
3014 *
3015 * Receiving from a remote Windows 10 system over a 10Gbps link:
3016 * - Hypercalls + VID.SYS in ring-0:
3017 * Max: 115 346 922 bytes/s - 136%
3018 * Min: 112 912 035 bytes/s - 137%
3019 * - Regular VirtualBox SVM:
3020 * Max: 84 517 504 bytes/s - 100%
3021 * Min: 82 597 049 bytes/s - 100%
3022 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
3023 * Max: 77 736 251 bytes/s - 92%
3024 * Min: 73 813 784 bytes/s - 89%
3025 * - Win HV API with exit optimizations:
3026 * Max: 63 035 587 bytes/s - 75%
3027 * Min: 57 538 380 bytes/s - 70%
3028 * - Win HV API:
3029 * Max: 62 279 185 bytes/s - 74%
3030 * Min: 56 813 866 bytes/s - 69%
3031 *
3032 * Sending to a remote Windows 10 system over a 10Gbps link:
3033 * - Win HV API with exit optimizations:
3034 * Max: 116 502 357 bytes/s - 103%
3035 * Min: 49 046 550 bytes/s - 59%
3036 * - Regular VirtualBox SVM:
3037 * Max: 113 030 991 bytes/s - 100%
3038 * Min: 83 059 511 bytes/s - 100%
3039 * - Hypercalls + VID.SYS in ring-0:
3040 * Max: 106 435 031 bytes/s - 94%
3041 * Min: 47 253 510 bytes/s - 57%
3042 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
3043 * Max: 94 842 287 bytes/s - 84%
3044 * Min: 68 362 172 bytes/s - 82%
3045 * - Win HV API:
3046 * Max: 65 165 225 bytes/s - 58%
3047 * Min: 47 246 573 bytes/s - 57%
3048 *
3049 * What we see here is:
3050 *
3051 * - Again consistent numbers when talking to the host. Showing that the
3052 * ring-0 approach is preferable to the ring-3 one.
3053 *
3054 * - Again when talking to a remote host, things get more difficult to
3055 * make sense of. The spread is larger and direct AMD-V gets beaten by
3056 * a different the Hyper-V approaches in each direction.
3057 *
3058 * - However, if we treat the first entry (remote host) as weird spikes, the
3059 * other entries are consistently worse compared to direct AMD-V. For the
3060 * send case we get really bad results for WinHV.
3061 *
3062 */
3063
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette