VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp@ 73269

Last change on this file since 73269 was 73182, checked in by vboxsync, 6 years ago

NEM/win: Init the A20 gate state correctly. Issue with EFI since it doesn't ever disable A20. bugref:9044

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 128.7 KB
Line 
1/* $Id: NEMR3Native-win.cpp 73182 2018-07-17 14:47:55Z vboxsync $ */
2/** @file
3 * NEM - Native execution manager, native ring-3 Windows backend.
4 *
5 * Log group 2: Exit logging.
6 * Log group 3: Log context on exit.
7 * Log group 5: Ring-3 memory management
8 * Log group 6: Ring-0 memory management
9 * Log group 12: API intercepts.
10 */
11
12/*
13 * Copyright (C) 2018 Oracle Corporation
14 *
15 * This file is part of VirtualBox Open Source Edition (OSE), as
16 * available from http://www.virtualbox.org. This file is free software;
17 * you can redistribute it and/or modify it under the terms of the GNU
18 * General Public License (GPL) as published by the Free Software
19 * Foundation, in version 2 as it comes in the "COPYING" file of the
20 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
21 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
22 */
23
24
25/*********************************************************************************************************************************
26* Header Files *
27*********************************************************************************************************************************/
28#define LOG_GROUP LOG_GROUP_NEM
29#define VMCPU_INCL_CPUM_GST_CTX
30#include <iprt/nt/nt-and-windows.h>
31#include <iprt/nt/hyperv.h>
32#include <iprt/nt/vid.h>
33#include <WinHvPlatform.h>
34
35#ifndef _WIN32_WINNT_WIN10
36# error "Missing _WIN32_WINNT_WIN10"
37#endif
38#ifndef _WIN32_WINNT_WIN10_RS1 /* Missing define, causing trouble for us. */
39# define _WIN32_WINNT_WIN10_RS1 (_WIN32_WINNT_WIN10 + 1)
40#endif
41#include <sysinfoapi.h>
42#include <debugapi.h>
43#include <errhandlingapi.h>
44#include <fileapi.h>
45#include <winerror.h> /* no api header for this. */
46
47#include <VBox/vmm/nem.h>
48#include <VBox/vmm/iem.h>
49#include <VBox/vmm/em.h>
50#include <VBox/vmm/apic.h>
51#include <VBox/vmm/pdm.h>
52#include <VBox/vmm/dbgftrace.h>
53#include "NEMInternal.h"
54#include <VBox/vmm/vm.h>
55
56#include <iprt/ldr.h>
57#include <iprt/path.h>
58#include <iprt/string.h>
59#include <iprt/system.h>
60
61
62/*********************************************************************************************************************************
63* Defined Constants And Macros *
64*********************************************************************************************************************************/
65#ifdef LOG_ENABLED
66# define NEM_WIN_INTERCEPT_NT_IO_CTLS
67#endif
68
69/** VID I/O control detection: Fake partition handle input. */
70#define NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE ((HANDLE)(uintptr_t)38479125)
71/** VID I/O control detection: Fake partition ID return. */
72#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID UINT64_C(0xfa1e000042424242)
73/** VID I/O control detection: Fake CPU index input. */
74#define NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX UINT32_C(42)
75/** VID I/O control detection: Fake timeout input. */
76#define NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT UINT32_C(0x00080286)
77
78
79/*********************************************************************************************************************************
80* Global Variables *
81*********************************************************************************************************************************/
82/** @name APIs imported from WinHvPlatform.dll
83 * @{ */
84static decltype(WHvGetCapability) * g_pfnWHvGetCapability;
85static decltype(WHvCreatePartition) * g_pfnWHvCreatePartition;
86static decltype(WHvSetupPartition) * g_pfnWHvSetupPartition;
87static decltype(WHvDeletePartition) * g_pfnWHvDeletePartition;
88static decltype(WHvGetPartitionProperty) * g_pfnWHvGetPartitionProperty;
89static decltype(WHvSetPartitionProperty) * g_pfnWHvSetPartitionProperty;
90static decltype(WHvMapGpaRange) * g_pfnWHvMapGpaRange;
91static decltype(WHvUnmapGpaRange) * g_pfnWHvUnmapGpaRange;
92static decltype(WHvTranslateGva) * g_pfnWHvTranslateGva;
93#ifndef NEM_WIN_USE_OUR_OWN_RUN_API
94static decltype(WHvCreateVirtualProcessor) * g_pfnWHvCreateVirtualProcessor;
95static decltype(WHvDeleteVirtualProcessor) * g_pfnWHvDeleteVirtualProcessor;
96static decltype(WHvRunVirtualProcessor) * g_pfnWHvRunVirtualProcessor;
97static decltype(WHvCancelRunVirtualProcessor) * g_pfnWHvCancelRunVirtualProcessor;
98static decltype(WHvGetVirtualProcessorRegisters) * g_pfnWHvGetVirtualProcessorRegisters;
99static decltype(WHvSetVirtualProcessorRegisters) * g_pfnWHvSetVirtualProcessorRegisters;
100#endif
101/** @} */
102
103/** @name APIs imported from Vid.dll
104 * @{ */
105static decltype(VidGetHvPartitionId) *g_pfnVidGetHvPartitionId;
106static decltype(VidStartVirtualProcessor) *g_pfnVidStartVirtualProcessor;
107static decltype(VidStopVirtualProcessor) *g_pfnVidStopVirtualProcessor;
108static decltype(VidMessageSlotMap) *g_pfnVidMessageSlotMap;
109static decltype(VidMessageSlotHandleAndGetNext) *g_pfnVidMessageSlotHandleAndGetNext;
110#ifdef LOG_ENABLED
111static decltype(VidGetVirtualProcessorState) *g_pfnVidGetVirtualProcessorState;
112static decltype(VidSetVirtualProcessorState) *g_pfnVidSetVirtualProcessorState;
113static decltype(VidGetVirtualProcessorRunningStatus) *g_pfnVidGetVirtualProcessorRunningStatus;
114#endif
115/** @} */
116
117/** The Windows build number. */
118static uint32_t g_uBuildNo = 17134;
119
120
121
122/**
123 * Import instructions.
124 */
125static const struct
126{
127 uint8_t idxDll; /**< 0 for WinHvPlatform.dll, 1 for vid.dll. */
128 bool fOptional; /**< Set if import is optional. */
129 PFNRT *ppfn; /**< The function pointer variable. */
130 const char *pszName; /**< The function name. */
131} g_aImports[] =
132{
133#define NEM_WIN_IMPORT(a_idxDll, a_fOptional, a_Name) { (a_idxDll), (a_fOptional), (PFNRT *)&RT_CONCAT(g_pfn,a_Name), #a_Name }
134 NEM_WIN_IMPORT(0, false, WHvGetCapability),
135 NEM_WIN_IMPORT(0, false, WHvCreatePartition),
136 NEM_WIN_IMPORT(0, false, WHvSetupPartition),
137 NEM_WIN_IMPORT(0, false, WHvDeletePartition),
138 NEM_WIN_IMPORT(0, false, WHvGetPartitionProperty),
139 NEM_WIN_IMPORT(0, false, WHvSetPartitionProperty),
140 NEM_WIN_IMPORT(0, false, WHvMapGpaRange),
141 NEM_WIN_IMPORT(0, false, WHvUnmapGpaRange),
142 NEM_WIN_IMPORT(0, false, WHvTranslateGva),
143#ifndef NEM_WIN_USE_OUR_OWN_RUN_API
144 NEM_WIN_IMPORT(0, false, WHvCreateVirtualProcessor),
145 NEM_WIN_IMPORT(0, false, WHvDeleteVirtualProcessor),
146 NEM_WIN_IMPORT(0, false, WHvRunVirtualProcessor),
147 NEM_WIN_IMPORT(0, false, WHvCancelRunVirtualProcessor),
148 NEM_WIN_IMPORT(0, false, WHvGetVirtualProcessorRegisters),
149 NEM_WIN_IMPORT(0, false, WHvSetVirtualProcessorRegisters),
150#endif
151 NEM_WIN_IMPORT(1, false, VidGetHvPartitionId),
152 NEM_WIN_IMPORT(1, false, VidMessageSlotMap),
153 NEM_WIN_IMPORT(1, false, VidMessageSlotHandleAndGetNext),
154 NEM_WIN_IMPORT(1, false, VidStartVirtualProcessor),
155 NEM_WIN_IMPORT(1, false, VidStopVirtualProcessor),
156#ifdef LOG_ENABLED
157 NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorState),
158 NEM_WIN_IMPORT(1, false, VidSetVirtualProcessorState),
159 NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorRunningStatus),
160#endif
161#undef NEM_WIN_IMPORT
162};
163
164
165/** The real NtDeviceIoControlFile API in NTDLL. */
166static decltype(NtDeviceIoControlFile) *g_pfnNtDeviceIoControlFile;
167/** Pointer to the NtDeviceIoControlFile import table entry. */
168static decltype(NtDeviceIoControlFile) **g_ppfnVidNtDeviceIoControlFile;
169/** Info about the VidGetHvPartitionId I/O control interface. */
170static NEMWINIOCTL g_IoCtlGetHvPartitionId;
171/** Info about the VidStartVirtualProcessor I/O control interface. */
172static NEMWINIOCTL g_IoCtlStartVirtualProcessor;
173/** Info about the VidStopVirtualProcessor I/O control interface. */
174static NEMWINIOCTL g_IoCtlStopVirtualProcessor;
175/** Info about the VidMessageSlotHandleAndGetNext I/O control interface. */
176static NEMWINIOCTL g_IoCtlMessageSlotHandleAndGetNext;
177#ifdef LOG_ENABLED
178/** Info about the VidMessageSlotMap I/O control interface - for logging. */
179static NEMWINIOCTL g_IoCtlMessageSlotMap;
180/* Info about the VidGetVirtualProcessorState I/O control interface - for logging. */
181static NEMWINIOCTL g_IoCtlGetVirtualProcessorState;
182/* Info about the VidSetVirtualProcessorState I/O control interface - for logging. */
183static NEMWINIOCTL g_IoCtlSetVirtualProcessorState;
184/** Pointer to what nemR3WinIoctlDetector_ForLogging should fill in. */
185static NEMWINIOCTL *g_pIoCtlDetectForLogging;
186#endif
187
188#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
189/** Mapping slot for CPU #0.
190 * @{ */
191static VID_MESSAGE_MAPPING_HEADER *g_pMsgSlotMapping = NULL;
192static const HV_MESSAGE_HEADER *g_pHvMsgHdr;
193static const HV_X64_INTERCEPT_MESSAGE_HEADER *g_pX64MsgHdr;
194/** @} */
195#endif
196
197
198/*
199 * Let the preprocessor alias the APIs to import variables for better autocompletion.
200 */
201#ifndef IN_SLICKEDIT
202# define WHvGetCapability g_pfnWHvGetCapability
203# define WHvCreatePartition g_pfnWHvCreatePartition
204# define WHvSetupPartition g_pfnWHvSetupPartition
205# define WHvDeletePartition g_pfnWHvDeletePartition
206# define WHvGetPartitionProperty g_pfnWHvGetPartitionProperty
207# define WHvSetPartitionProperty g_pfnWHvSetPartitionProperty
208# define WHvMapGpaRange g_pfnWHvMapGpaRange
209# define WHvUnmapGpaRange g_pfnWHvUnmapGpaRange
210# define WHvTranslateGva g_pfnWHvTranslateGva
211# define WHvCreateVirtualProcessor g_pfnWHvCreateVirtualProcessor
212# define WHvDeleteVirtualProcessor g_pfnWHvDeleteVirtualProcessor
213# define WHvRunVirtualProcessor g_pfnWHvRunVirtualProcessor
214# define WHvGetRunExitContextSize g_pfnWHvGetRunExitContextSize
215# define WHvCancelRunVirtualProcessor g_pfnWHvCancelRunVirtualProcessor
216# define WHvGetVirtualProcessorRegisters g_pfnWHvGetVirtualProcessorRegisters
217# define WHvSetVirtualProcessorRegisters g_pfnWHvSetVirtualProcessorRegisters
218
219# define VidMessageSlotHandleAndGetNext g_pfnVidMessageSlotHandleAndGetNext
220# define VidStartVirtualProcessor g_pfnVidStartVirtualProcessor
221# define VidStopVirtualProcessor g_pfnVidStopVirtualProcessor
222
223#endif
224
225/** WHV_MEMORY_ACCESS_TYPE names */
226static const char * const g_apszWHvMemAccesstypes[4] = { "read", "write", "exec", "!undefined!" };
227
228
229/*********************************************************************************************************************************
230* Internal Functions *
231*********************************************************************************************************************************/
232
233/*
234 * Instantate the code we share with ring-0.
235 */
236#ifdef NEM_WIN_USE_OUR_OWN_RUN_API
237# define NEM_WIN_TEMPLATE_MODE_OWN_RUN_API
238#else
239# undef NEM_WIN_TEMPLATE_MODE_OWN_RUN_API
240#endif
241#include "../VMMAll/NEMAllNativeTemplate-win.cpp.h"
242
243
244
245#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
246/**
247 * Wrapper that logs the call from VID.DLL.
248 *
249 * This is very handy for figuring out why an API call fails.
250 */
251static NTSTATUS WINAPI
252nemR3WinLogWrapper_NtDeviceIoControlFile(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
253 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
254 PVOID pvOutput, ULONG cbOutput)
255{
256
257 char szFunction[32];
258 const char *pszFunction;
259 if (uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction)
260 pszFunction = "VidMessageSlotHandleAndGetNext";
261 else if (uFunction == g_IoCtlStartVirtualProcessor.uFunction)
262 pszFunction = "VidStartVirtualProcessor";
263 else if (uFunction == g_IoCtlStopVirtualProcessor.uFunction)
264 pszFunction = "VidStopVirtualProcessor";
265 else if (uFunction == g_IoCtlMessageSlotMap.uFunction)
266 pszFunction = "VidMessageSlotMap";
267 else if (uFunction == g_IoCtlGetVirtualProcessorState.uFunction)
268 pszFunction = "VidGetVirtualProcessorState";
269 else if (uFunction == g_IoCtlSetVirtualProcessorState.uFunction)
270 pszFunction = "VidSetVirtualProcessorState";
271 else
272 {
273 RTStrPrintf(szFunction, sizeof(szFunction), "%#x", uFunction);
274 pszFunction = szFunction;
275 }
276
277 if (cbInput > 0 && pvInput)
278 Log12(("VID!NtDeviceIoControlFile: %s/input: %.*Rhxs\n", pszFunction, RT_MIN(cbInput, 32), pvInput));
279 NTSTATUS rcNt = g_pfnNtDeviceIoControlFile(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, uFunction,
280 pvInput, cbInput, pvOutput, cbOutput);
281 if (!hEvt && !pfnApcCallback && !pvApcCtx)
282 Log12(("VID!NtDeviceIoControlFile: hFile=%#zx pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n",
283 hFile, pIos, pIos->Status, pIos->Information, pszFunction, pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress()));
284 else
285 Log12(("VID!NtDeviceIoControlFile: hFile=%#zx hEvt=%#zx Apc=%p/%p pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n",
286 hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, pIos->Status, pIos->Information, pszFunction,
287 pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress()));
288 if (cbOutput > 0 && pvOutput)
289 {
290 Log12(("VID!NtDeviceIoControlFile: %s/output: %.*Rhxs\n", pszFunction, RT_MIN(cbOutput, 32), pvOutput));
291 if (uFunction == 0x2210cc && g_pMsgSlotMapping == NULL && cbOutput >= sizeof(void *))
292 {
293 g_pMsgSlotMapping = *(VID_MESSAGE_MAPPING_HEADER **)pvOutput;
294 g_pHvMsgHdr = (const HV_MESSAGE_HEADER *)(g_pMsgSlotMapping + 1);
295 g_pX64MsgHdr = (const HV_X64_INTERCEPT_MESSAGE_HEADER *)(g_pHvMsgHdr + 1);
296 Log12(("VID!NtDeviceIoControlFile: Message slot mapping: %p\n", g_pMsgSlotMapping));
297 }
298 }
299 if ( g_pMsgSlotMapping
300 && ( uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction
301 || uFunction == g_IoCtlStopVirtualProcessor.uFunction
302 || uFunction == g_IoCtlMessageSlotMap.uFunction
303 ))
304 Log12(("VID!NtDeviceIoControlFile: enmVidMsgType=%#x cb=%#x msg=%#x payload=%u cs:rip=%04x:%08RX64 (%s)\n",
305 g_pMsgSlotMapping->enmVidMsgType, g_pMsgSlotMapping->cbMessage,
306 g_pHvMsgHdr->MessageType, g_pHvMsgHdr->PayloadSize,
307 g_pX64MsgHdr->CsSegment.Selector, g_pX64MsgHdr->Rip, pszFunction));
308
309 return rcNt;
310}
311#endif /* NEM_WIN_INTERCEPT_NT_IO_CTLS */
312
313
314/**
315 * Patches the call table of VID.DLL so we can intercept NtDeviceIoControlFile.
316 *
317 * This is for used to figure out the I/O control codes and in logging builds
318 * for logging API calls that WinHvPlatform.dll does.
319 *
320 * @returns VBox status code.
321 * @param hLdrModVid The VID module handle.
322 * @param pErrInfo Where to return additional error information.
323 */
324static int nemR3WinInitVidIntercepts(RTLDRMOD hLdrModVid, PRTERRINFO pErrInfo)
325{
326 /*
327 * Locate the real API.
328 */
329 g_pfnNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) *)RTLdrGetSystemSymbol("NTDLL.DLL", "NtDeviceIoControlFile");
330 AssertReturn(g_pfnNtDeviceIoControlFile != NULL,
331 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to resolve NtDeviceIoControlFile from NTDLL.DLL"));
332
333 /*
334 * Locate the PE header and get what we need from it.
335 */
336 uint8_t const *pbImage = (uint8_t const *)RTLdrGetNativeHandle(hLdrModVid);
337 IMAGE_DOS_HEADER const *pMzHdr = (IMAGE_DOS_HEADER const *)pbImage;
338 AssertReturn(pMzHdr->e_magic == IMAGE_DOS_SIGNATURE,
339 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL mapping doesn't start with MZ signature: %#x", pMzHdr->e_magic));
340 IMAGE_NT_HEADERS const *pNtHdrs = (IMAGE_NT_HEADERS const *)&pbImage[pMzHdr->e_lfanew];
341 AssertReturn(pNtHdrs->Signature == IMAGE_NT_SIGNATURE,
342 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL has invalid PE signaturre: %#x @%#x",
343 pNtHdrs->Signature, pMzHdr->e_lfanew));
344
345 uint32_t const cbImage = pNtHdrs->OptionalHeader.SizeOfImage;
346 IMAGE_DATA_DIRECTORY const ImportDir = pNtHdrs->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
347
348 /*
349 * Walk the import descriptor table looking for NTDLL.DLL.
350 */
351 AssertReturn( ImportDir.Size > 0
352 && ImportDir.Size < cbImage,
353 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory size: %#x", ImportDir.Size));
354 AssertReturn( ImportDir.VirtualAddress > 0
355 && ImportDir.VirtualAddress <= cbImage - ImportDir.Size,
356 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory RVA: %#x", ImportDir.VirtualAddress));
357
358 for (PIMAGE_IMPORT_DESCRIPTOR pImps = (PIMAGE_IMPORT_DESCRIPTOR)&pbImage[ImportDir.VirtualAddress];
359 pImps->Name != 0 && pImps->FirstThunk != 0;
360 pImps++)
361 {
362 AssertReturn(pImps->Name < cbImage,
363 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory entry name: %#x", pImps->Name));
364 const char *pszModName = (const char *)&pbImage[pImps->Name];
365 if (RTStrICmpAscii(pszModName, "ntdll.dll"))
366 continue;
367 AssertReturn(pImps->FirstThunk < cbImage,
368 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
369 AssertReturn(pImps->OriginalFirstThunk < cbImage,
370 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
371
372 /*
373 * Walk the thunks table(s) looking for NtDeviceIoControlFile.
374 */
375 PIMAGE_THUNK_DATA pFirstThunk = (PIMAGE_THUNK_DATA)&pbImage[pImps->FirstThunk]; /* update this. */
376 PIMAGE_THUNK_DATA pThunk = pImps->OriginalFirstThunk == 0 /* read from this. */
377 ? (PIMAGE_THUNK_DATA)&pbImage[pImps->FirstThunk]
378 : (PIMAGE_THUNK_DATA)&pbImage[pImps->OriginalFirstThunk];
379 while (pThunk->u1.Ordinal != 0)
380 {
381 if (!(pThunk->u1.Ordinal & IMAGE_ORDINAL_FLAG32))
382 {
383 AssertReturn(pThunk->u1.Ordinal > 0 && pThunk->u1.Ordinal < cbImage,
384 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
385
386 const char *pszSymbol = (const char *)&pbImage[(uintptr_t)pThunk->u1.AddressOfData + 2];
387 if (strcmp(pszSymbol, "NtDeviceIoControlFile") == 0)
388 {
389 DWORD fOldProt = PAGE_READONLY;
390 VirtualProtect(&pFirstThunk->u1.Function, sizeof(uintptr_t), PAGE_EXECUTE_READWRITE, &fOldProt);
391 g_ppfnVidNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) **)&pFirstThunk->u1.Function;
392 /* Don't restore the protection here, so we modify the NtDeviceIoControlFile pointer later. */
393 }
394 }
395
396 pThunk++;
397 pFirstThunk++;
398 }
399 }
400
401 if (*g_ppfnVidNtDeviceIoControlFile)
402 {
403#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
404 *g_ppfnVidNtDeviceIoControlFile = nemR3WinLogWrapper_NtDeviceIoControlFile;
405#endif
406 return VINF_SUCCESS;
407 }
408 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to patch NtDeviceIoControlFile import in VID.DLL!");
409}
410
411
412/**
413 * Worker for nemR3NativeInit that probes and load the native API.
414 *
415 * @returns VBox status code.
416 * @param fForced Whether the HMForced flag is set and we should
417 * fail if we cannot initialize.
418 * @param pErrInfo Where to always return error info.
419 */
420static int nemR3WinInitProbeAndLoad(bool fForced, PRTERRINFO pErrInfo)
421{
422 /*
423 * Check that the DLL files we need are present, but without loading them.
424 * We'd like to avoid loading them unnecessarily.
425 */
426 WCHAR wszPath[MAX_PATH + 64];
427 UINT cwcPath = GetSystemDirectoryW(wszPath, MAX_PATH);
428 if (cwcPath >= MAX_PATH || cwcPath < 2)
429 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "GetSystemDirectoryW failed (%#x / %u)", cwcPath, GetLastError());
430
431 if (wszPath[cwcPath - 1] != '\\' || wszPath[cwcPath - 1] != '/')
432 wszPath[cwcPath++] = '\\';
433 RTUtf16CopyAscii(&wszPath[cwcPath], RT_ELEMENTS(wszPath) - cwcPath, "WinHvPlatform.dll");
434 if (GetFileAttributesW(wszPath) == INVALID_FILE_ATTRIBUTES)
435 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "The native API dll was not found (%ls)", wszPath);
436
437 /*
438 * Check that we're in a VM and that the hypervisor identifies itself as Hyper-V.
439 */
440 if (!ASMHasCpuId())
441 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID support");
442 if (!ASMIsValidStdRange(ASMCpuId_EAX(0)))
443 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID leaf #1");
444 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_HVP))
445 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Not in a hypervisor partition (HVP=0)");
446
447 uint32_t cMaxHyperLeaf = 0;
448 uint32_t uEbx = 0;
449 uint32_t uEcx = 0;
450 uint32_t uEdx = 0;
451 ASMCpuIdExSlow(0x40000000, 0, 0, 0, &cMaxHyperLeaf, &uEbx, &uEcx, &uEdx);
452 if (!ASMIsValidHypervisorRange(cMaxHyperLeaf))
453 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Invalid hypervisor CPUID range (%#x %#x %#x %#x)",
454 cMaxHyperLeaf, uEbx, uEcx, uEdx);
455 if ( uEbx != UINT32_C(0x7263694d) /* Micr */
456 || uEcx != UINT32_C(0x666f736f) /* osof */
457 || uEdx != UINT32_C(0x76482074) /* t Hv */)
458 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE,
459 "Not Hyper-V CPUID signature: %#x %#x %#x (expected %#x %#x %#x)",
460 uEbx, uEcx, uEdx, UINT32_C(0x7263694d), UINT32_C(0x666f736f), UINT32_C(0x76482074));
461 if (cMaxHyperLeaf < UINT32_C(0x40000005))
462 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Too narrow hypervisor CPUID range (%#x)", cMaxHyperLeaf);
463
464 /** @todo would be great if we could recognize a root partition from the
465 * CPUID info, but I currently don't dare do that. */
466
467 /*
468 * Now try load the DLLs and resolve the APIs.
469 */
470 static const char * const s_apszDllNames[2] = { "WinHvPlatform.dll", "vid.dll" };
471 RTLDRMOD ahMods[2] = { NIL_RTLDRMOD, NIL_RTLDRMOD };
472 int rc = VINF_SUCCESS;
473 for (unsigned i = 0; i < RT_ELEMENTS(s_apszDllNames); i++)
474 {
475 int rc2 = RTLdrLoadSystem(s_apszDllNames[i], true /*fNoUnload*/, &ahMods[i]);
476 if (RT_FAILURE(rc2))
477 {
478 if (!RTErrInfoIsSet(pErrInfo))
479 RTErrInfoSetF(pErrInfo, rc2, "Failed to load API DLL: %s: %Rrc", s_apszDllNames[i], rc2);
480 else
481 RTErrInfoAddF(pErrInfo, rc2, "; %s: %Rrc", s_apszDllNames[i], rc2);
482 ahMods[i] = NIL_RTLDRMOD;
483 rc = VERR_NEM_INIT_FAILED;
484 }
485 }
486 if (RT_SUCCESS(rc))
487 rc = nemR3WinInitVidIntercepts(ahMods[1], pErrInfo);
488 if (RT_SUCCESS(rc))
489 {
490 for (unsigned i = 0; i < RT_ELEMENTS(g_aImports); i++)
491 {
492 int rc2 = RTLdrGetSymbol(ahMods[g_aImports[i].idxDll], g_aImports[i].pszName, (void **)g_aImports[i].ppfn);
493 if (RT_FAILURE(rc2))
494 {
495 *g_aImports[i].ppfn = NULL;
496
497 LogRel(("NEM: %s: Failed to import %s!%s: %Rrc",
498 g_aImports[i].fOptional ? "info" : fForced ? "fatal" : "error",
499 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName, rc2));
500 if (!g_aImports[i].fOptional)
501 {
502 if (RTErrInfoIsSet(pErrInfo))
503 RTErrInfoAddF(pErrInfo, rc2, ", %s!%s",
504 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName);
505 else
506 rc = RTErrInfoSetF(pErrInfo, rc2, "Failed to import: %s!%s",
507 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName);
508 Assert(RT_FAILURE(rc));
509 }
510 }
511 }
512 if (RT_SUCCESS(rc))
513 {
514 Assert(!RTErrInfoIsSet(pErrInfo));
515 }
516 }
517
518 for (unsigned i = 0; i < RT_ELEMENTS(ahMods); i++)
519 RTLdrClose(ahMods[i]);
520 return rc;
521}
522
523
524/**
525 * Wrapper for different WHvGetCapability signatures.
526 */
527DECLINLINE(HRESULT) WHvGetCapabilityWrapper(WHV_CAPABILITY_CODE enmCap, WHV_CAPABILITY *pOutput, uint32_t cbOutput)
528{
529 return g_pfnWHvGetCapability(enmCap, pOutput, cbOutput, NULL);
530}
531
532
533/**
534 * Worker for nemR3NativeInit that gets the hypervisor capabilities.
535 *
536 * @returns VBox status code.
537 * @param pVM The cross context VM structure.
538 * @param pErrInfo Where to always return error info.
539 */
540static int nemR3WinInitCheckCapabilities(PVM pVM, PRTERRINFO pErrInfo)
541{
542#define NEM_LOG_REL_CAP_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %-38s= " a_szFmt "\n", a_szField, a_Value))
543#define NEM_LOG_REL_CAP_SUB_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %36s: " a_szFmt "\n", a_szField, a_Value))
544#define NEM_LOG_REL_CAP_SUB(a_szField, a_Value) NEM_LOG_REL_CAP_SUB_EX(a_szField, "%d", a_Value)
545
546 /*
547 * Is the hypervisor present with the desired capability?
548 *
549 * In build 17083 this translates into:
550 * - CPUID[0x00000001].HVP is set
551 * - CPUID[0x40000000] == "Microsoft Hv"
552 * - CPUID[0x40000001].eax == "Hv#1"
553 * - CPUID[0x40000003].ebx[12] is set.
554 * - VidGetExoPartitionProperty(INVALID_HANDLE_VALUE, 0x60000, &Ignored) returns
555 * a non-zero value.
556 */
557 /**
558 * @todo Someone at Microsoft please explain weird API design:
559 * 1. Pointless CapabilityCode duplication int the output;
560 * 2. No output size.
561 */
562 WHV_CAPABILITY Caps;
563 RT_ZERO(Caps);
564 SetLastError(0);
565 HRESULT hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeHypervisorPresent, &Caps, sizeof(Caps));
566 DWORD rcWin = GetLastError();
567 if (FAILED(hrc))
568 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
569 "WHvGetCapability/WHvCapabilityCodeHypervisorPresent failed: %Rhrc (Last=%#x/%u)",
570 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
571 if (!Caps.HypervisorPresent)
572 {
573 if (!RTPathExists(RTPATH_NT_PASSTHRU_PREFIX "Device\\VidExo"))
574 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE,
575 "WHvCapabilityCodeHypervisorPresent is FALSE! Make sure you have enabled the 'Windows Hypervisor Platform' feature.");
576 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "WHvCapabilityCodeHypervisorPresent is FALSE! (%u)", rcWin);
577 }
578 LogRel(("NEM: WHvCapabilityCodeHypervisorPresent is TRUE, so this might work...\n"));
579
580
581 /*
582 * Check what extended VM exits are supported.
583 */
584 RT_ZERO(Caps);
585 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExtendedVmExits, &Caps, sizeof(Caps));
586 if (FAILED(hrc))
587 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
588 "WHvGetCapability/WHvCapabilityCodeExtendedVmExits failed: %Rhrc (Last=%#x/%u)",
589 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
590 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeExtendedVmExits", "%'#018RX64", Caps.ExtendedVmExits.AsUINT64);
591 pVM->nem.s.fExtendedMsrExit = RT_BOOL(Caps.ExtendedVmExits.X64MsrExit);
592 pVM->nem.s.fExtendedCpuIdExit = RT_BOOL(Caps.ExtendedVmExits.X64CpuidExit);
593 pVM->nem.s.fExtendedXcptExit = RT_BOOL(Caps.ExtendedVmExits.ExceptionExit);
594 NEM_LOG_REL_CAP_SUB("fExtendedMsrExit", pVM->nem.s.fExtendedMsrExit);
595 NEM_LOG_REL_CAP_SUB("fExtendedCpuIdExit", pVM->nem.s.fExtendedCpuIdExit);
596 NEM_LOG_REL_CAP_SUB("fExtendedXcptExit", pVM->nem.s.fExtendedXcptExit);
597 if (Caps.ExtendedVmExits.AsUINT64 & ~(uint64_t)7)
598 LogRel(("NEM: Warning! Unknown VM exit definitions: %#RX64\n", Caps.ExtendedVmExits.AsUINT64));
599 /** @todo RECHECK: WHV_EXTENDED_VM_EXITS typedef. */
600
601 /*
602 * Check features in case they end up defining any.
603 */
604 RT_ZERO(Caps);
605 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeFeatures, &Caps, sizeof(Caps));
606 if (FAILED(hrc))
607 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
608 "WHvGetCapability/WHvCapabilityCodeFeatures failed: %Rhrc (Last=%#x/%u)",
609 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
610 if (Caps.Features.AsUINT64 & ~(uint64_t)0)
611 LogRel(("NEM: Warning! Unknown feature definitions: %#RX64\n", Caps.Features.AsUINT64));
612 /** @todo RECHECK: WHV_CAPABILITY_FEATURES typedef. */
613
614 /*
615 * Check supported exception exit bitmap bits.
616 * We don't currently require this, so we just log failure.
617 */
618 RT_ZERO(Caps);
619 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExceptionExitBitmap, &Caps, sizeof(Caps));
620 if (SUCCEEDED(hrc))
621 LogRel(("NEM: Supported exception exit bitmap: %#RX64\n", Caps.ExceptionExitBitmap));
622 else
623 LogRel(("NEM: Warning! WHvGetCapability/WHvCapabilityCodeExceptionExitBitmap failed: %Rhrc (Last=%#x/%u)",
624 hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
625
626 /*
627 * Check that the CPU vendor is supported.
628 */
629 RT_ZERO(Caps);
630 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorVendor, &Caps, sizeof(Caps));
631 if (FAILED(hrc))
632 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
633 "WHvGetCapability/WHvCapabilityCodeProcessorVendor failed: %Rhrc (Last=%#x/%u)",
634 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
635 switch (Caps.ProcessorVendor)
636 {
637 /** @todo RECHECK: WHV_PROCESSOR_VENDOR typedef. */
638 case WHvProcessorVendorIntel:
639 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - Intel", Caps.ProcessorVendor);
640 pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_INTEL;
641 break;
642 case WHvProcessorVendorAmd:
643 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - AMD", Caps.ProcessorVendor);
644 pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_AMD;
645 break;
646 default:
647 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d", Caps.ProcessorVendor);
648 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unknown processor vendor: %d", Caps.ProcessorVendor);
649 }
650
651 /*
652 * CPU features, guessing these are virtual CPU features?
653 */
654 RT_ZERO(Caps);
655 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorFeatures, &Caps, sizeof(Caps));
656 if (FAILED(hrc))
657 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
658 "WHvGetCapability/WHvCapabilityCodeProcessorFeatures failed: %Rhrc (Last=%#x/%u)",
659 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
660 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorFeatures", "%'#018RX64", Caps.ProcessorFeatures.AsUINT64);
661#define NEM_LOG_REL_CPU_FEATURE(a_Field) NEM_LOG_REL_CAP_SUB(#a_Field, Caps.ProcessorFeatures.a_Field)
662 NEM_LOG_REL_CPU_FEATURE(Sse3Support);
663 NEM_LOG_REL_CPU_FEATURE(LahfSahfSupport);
664 NEM_LOG_REL_CPU_FEATURE(Ssse3Support);
665 NEM_LOG_REL_CPU_FEATURE(Sse4_1Support);
666 NEM_LOG_REL_CPU_FEATURE(Sse4_2Support);
667 NEM_LOG_REL_CPU_FEATURE(Sse4aSupport);
668 NEM_LOG_REL_CPU_FEATURE(XopSupport);
669 NEM_LOG_REL_CPU_FEATURE(PopCntSupport);
670 NEM_LOG_REL_CPU_FEATURE(Cmpxchg16bSupport);
671 NEM_LOG_REL_CPU_FEATURE(Altmovcr8Support);
672 NEM_LOG_REL_CPU_FEATURE(LzcntSupport);
673 NEM_LOG_REL_CPU_FEATURE(MisAlignSseSupport);
674 NEM_LOG_REL_CPU_FEATURE(MmxExtSupport);
675 NEM_LOG_REL_CPU_FEATURE(Amd3DNowSupport);
676 NEM_LOG_REL_CPU_FEATURE(ExtendedAmd3DNowSupport);
677 NEM_LOG_REL_CPU_FEATURE(Page1GbSupport);
678 NEM_LOG_REL_CPU_FEATURE(AesSupport);
679 NEM_LOG_REL_CPU_FEATURE(PclmulqdqSupport);
680 NEM_LOG_REL_CPU_FEATURE(PcidSupport);
681 NEM_LOG_REL_CPU_FEATURE(Fma4Support);
682 NEM_LOG_REL_CPU_FEATURE(F16CSupport);
683 NEM_LOG_REL_CPU_FEATURE(RdRandSupport);
684 NEM_LOG_REL_CPU_FEATURE(RdWrFsGsSupport);
685 NEM_LOG_REL_CPU_FEATURE(SmepSupport);
686 NEM_LOG_REL_CPU_FEATURE(EnhancedFastStringSupport);
687 NEM_LOG_REL_CPU_FEATURE(Bmi1Support);
688 NEM_LOG_REL_CPU_FEATURE(Bmi2Support);
689 /* two reserved bits here, see below */
690 NEM_LOG_REL_CPU_FEATURE(MovbeSupport);
691 NEM_LOG_REL_CPU_FEATURE(Npiep1Support);
692 NEM_LOG_REL_CPU_FEATURE(DepX87FPUSaveSupport);
693 NEM_LOG_REL_CPU_FEATURE(RdSeedSupport);
694 NEM_LOG_REL_CPU_FEATURE(AdxSupport);
695 NEM_LOG_REL_CPU_FEATURE(IntelPrefetchSupport);
696 NEM_LOG_REL_CPU_FEATURE(SmapSupport);
697 NEM_LOG_REL_CPU_FEATURE(HleSupport);
698 NEM_LOG_REL_CPU_FEATURE(RtmSupport);
699 NEM_LOG_REL_CPU_FEATURE(RdtscpSupport);
700 NEM_LOG_REL_CPU_FEATURE(ClflushoptSupport);
701 NEM_LOG_REL_CPU_FEATURE(ClwbSupport);
702 NEM_LOG_REL_CPU_FEATURE(ShaSupport);
703 NEM_LOG_REL_CPU_FEATURE(X87PointersSavedSupport);
704#undef NEM_LOG_REL_CPU_FEATURE
705 if (Caps.ProcessorFeatures.AsUINT64 & (~(RT_BIT_64(43) - 1) | RT_BIT_64(27) | RT_BIT_64(28)))
706 LogRel(("NEM: Warning! Unknown CPU features: %#RX64\n", Caps.ProcessorFeatures.AsUINT64));
707 pVM->nem.s.uCpuFeatures.u64 = Caps.ProcessorFeatures.AsUINT64;
708 /** @todo RECHECK: WHV_PROCESSOR_FEATURES typedef. */
709
710 /*
711 * The cache line flush size.
712 */
713 RT_ZERO(Caps);
714 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorClFlushSize, &Caps, sizeof(Caps));
715 if (FAILED(hrc))
716 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
717 "WHvGetCapability/WHvCapabilityCodeProcessorClFlushSize failed: %Rhrc (Last=%#x/%u)",
718 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
719 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorClFlushSize", "2^%u", Caps.ProcessorClFlushSize);
720 if (Caps.ProcessorClFlushSize < 8 && Caps.ProcessorClFlushSize > 9)
721 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unsupported cache line flush size: %u", Caps.ProcessorClFlushSize);
722 pVM->nem.s.cCacheLineFlushShift = Caps.ProcessorClFlushSize;
723
724 /*
725 * See if they've added more properties that we're not aware of.
726 */
727 /** @todo RECHECK: WHV_CAPABILITY_CODE typedef. */
728 if (!IsDebuggerPresent()) /* Too noisy when in debugger, so skip. */
729 {
730 static const struct
731 {
732 uint32_t iMin, iMax; } s_aUnknowns[] =
733 {
734 { 0x0004, 0x000f },
735 { 0x1003, 0x100f },
736 { 0x2000, 0x200f },
737 { 0x3000, 0x300f },
738 { 0x4000, 0x400f },
739 };
740 for (uint32_t j = 0; j < RT_ELEMENTS(s_aUnknowns); j++)
741 for (uint32_t i = s_aUnknowns[j].iMin; i <= s_aUnknowns[j].iMax; i++)
742 {
743 RT_ZERO(Caps);
744 hrc = WHvGetCapabilityWrapper((WHV_CAPABILITY_CODE)i, &Caps, sizeof(Caps));
745 if (SUCCEEDED(hrc))
746 LogRel(("NEM: Warning! Unknown capability %#x returning: %.*Rhxs\n", i, sizeof(Caps), &Caps));
747 }
748 }
749
750 /*
751 * For proper operation, we require CPUID exits.
752 */
753 if (!pVM->nem.s.fExtendedCpuIdExit)
754 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended CPUID exit support");
755 if (!pVM->nem.s.fExtendedMsrExit)
756 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended MSR exit support");
757 if (!pVM->nem.s.fExtendedXcptExit)
758 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended exception exit support");
759
760#undef NEM_LOG_REL_CAP_EX
761#undef NEM_LOG_REL_CAP_SUB_EX
762#undef NEM_LOG_REL_CAP_SUB
763 return VINF_SUCCESS;
764}
765
766
767/**
768 * Used to fill in g_IoCtlGetHvPartitionId.
769 */
770static NTSTATUS WINAPI
771nemR3WinIoctlDetector_GetHvPartitionId(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
772 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
773 PVOID pvOutput, ULONG cbOutput)
774{
775 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
776 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
777 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
778 AssertLogRelMsgReturn(cbInput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
779 RT_NOREF(pvInput);
780
781 AssertLogRelMsgReturn(RT_VALID_PTR(pvOutput), ("pvOutput=%p\n", pvOutput), STATUS_INVALID_PARAMETER_9);
782 AssertLogRelMsgReturn(cbOutput == sizeof(HV_PARTITION_ID), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
783 *(HV_PARTITION_ID *)pvOutput = NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID;
784
785 g_IoCtlGetHvPartitionId.cbInput = cbInput;
786 g_IoCtlGetHvPartitionId.cbOutput = cbOutput;
787 g_IoCtlGetHvPartitionId.uFunction = uFunction;
788
789 return STATUS_SUCCESS;
790}
791
792
793/**
794 * Used to fill in g_IoCtlStartVirtualProcessor.
795 */
796static NTSTATUS WINAPI
797nemR3WinIoctlDetector_StartVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
798 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
799 PVOID pvOutput, ULONG cbOutput)
800{
801 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
802 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
803 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
804 AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
805 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
806 AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
807 ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9);
808 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
809 RT_NOREF(pvOutput);
810
811 g_IoCtlStartVirtualProcessor.cbInput = cbInput;
812 g_IoCtlStartVirtualProcessor.cbOutput = cbOutput;
813 g_IoCtlStartVirtualProcessor.uFunction = uFunction;
814
815 return STATUS_SUCCESS;
816}
817
818
819/**
820 * Used to fill in g_IoCtlStartVirtualProcessor.
821 */
822static NTSTATUS WINAPI
823nemR3WinIoctlDetector_StopVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
824 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
825 PVOID pvOutput, ULONG cbOutput)
826{
827 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
828 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
829 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
830 AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
831 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
832 AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
833 ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9);
834 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
835 RT_NOREF(pvOutput);
836
837 g_IoCtlStopVirtualProcessor.cbInput = cbInput;
838 g_IoCtlStopVirtualProcessor.cbOutput = cbOutput;
839 g_IoCtlStopVirtualProcessor.uFunction = uFunction;
840
841 return STATUS_SUCCESS;
842}
843
844
845/**
846 * Used to fill in g_IoCtlMessageSlotHandleAndGetNext
847 */
848static NTSTATUS WINAPI
849nemR3WinIoctlDetector_MessageSlotHandleAndGetNext(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
850 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
851 PVOID pvOutput, ULONG cbOutput)
852{
853 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
854 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
855 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
856
857 AssertLogRelMsgReturn(cbInput == sizeof(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT), ("cbInput=%#x\n", cbInput),
858 STATUS_INVALID_PARAMETER_8);
859 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
860 PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput;
861 AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX
862 && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE
863 && pVidIn->cMillies == NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT,
864 ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies),
865 STATUS_INVALID_PARAMETER_9);
866 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
867 RT_NOREF(pvOutput);
868
869 g_IoCtlMessageSlotHandleAndGetNext.cbInput = cbInput;
870 g_IoCtlMessageSlotHandleAndGetNext.cbOutput = cbOutput;
871 g_IoCtlMessageSlotHandleAndGetNext.uFunction = uFunction;
872
873 return STATUS_SUCCESS;
874}
875
876
877#ifdef LOG_ENABLED
878/**
879 * Used to fill in what g_pIoCtlDetectForLogging points to.
880 */
881static NTSTATUS WINAPI nemR3WinIoctlDetector_ForLogging(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
882 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
883 PVOID pvOutput, ULONG cbOutput)
884{
885 RT_NOREF(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, pvInput, pvOutput);
886
887 g_pIoCtlDetectForLogging->cbInput = cbInput;
888 g_pIoCtlDetectForLogging->cbOutput = cbOutput;
889 g_pIoCtlDetectForLogging->uFunction = uFunction;
890
891 return STATUS_SUCCESS;
892}
893#endif
894
895
896/**
897 * Worker for nemR3NativeInit that detect I/O control function numbers for VID.
898 *
899 * We use the function numbers directly in ring-0 and to name functions when
900 * logging NtDeviceIoControlFile calls.
901 *
902 * @note We could alternatively do this by disassembling the respective
903 * functions, but hooking NtDeviceIoControlFile and making fake calls
904 * more easily provides the desired information.
905 *
906 * @returns VBox status code.
907 * @param pVM The cross context VM structure. Will set I/O
908 * control info members.
909 * @param pErrInfo Where to always return error info.
910 */
911static int nemR3WinInitDiscoverIoControlProperties(PVM pVM, PRTERRINFO pErrInfo)
912{
913 /*
914 * Probe the I/O control information for select VID APIs so we can use
915 * them directly from ring-0 and better log them.
916 *
917 */
918 decltype(NtDeviceIoControlFile) * const pfnOrg = *g_ppfnVidNtDeviceIoControlFile;
919
920 /* VidGetHvPartitionId */
921 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_GetHvPartitionId;
922 HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID;
923 BOOL fRet = g_pfnVidGetHvPartitionId(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &idHvPartition);
924 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
925 AssertReturn(fRet && idHvPartition == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID && g_IoCtlGetHvPartitionId.uFunction != 0,
926 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
927 "Problem figuring out VidGetHvPartitionId: fRet=%u idHvPartition=%#x dwErr=%u",
928 fRet, idHvPartition, GetLastError()) );
929 LogRel(("NEM: VidGetHvPartitionId -> fun:%#x in:%#x out:%#x\n",
930 g_IoCtlGetHvPartitionId.uFunction, g_IoCtlGetHvPartitionId.cbInput, g_IoCtlGetHvPartitionId.cbOutput));
931
932 /* VidStartVirtualProcessor */
933 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StartVirtualProcessor;
934 fRet = g_pfnVidStartVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
935 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
936 AssertReturn(fRet && g_IoCtlStartVirtualProcessor.uFunction != 0,
937 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
938 "Problem figuring out VidStartVirtualProcessor: fRet=%u dwErr=%u",
939 fRet, GetLastError()) );
940 LogRel(("NEM: VidStartVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStartVirtualProcessor.uFunction,
941 g_IoCtlStartVirtualProcessor.cbInput, g_IoCtlStartVirtualProcessor.cbOutput));
942
943 /* VidStopVirtualProcessor */
944 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StopVirtualProcessor;
945 fRet = g_pfnVidStopVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
946 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
947 AssertReturn(fRet && g_IoCtlStopVirtualProcessor.uFunction != 0,
948 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
949 "Problem figuring out VidStopVirtualProcessor: fRet=%u dwErr=%u",
950 fRet, GetLastError()) );
951 LogRel(("NEM: VidStopVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStopVirtualProcessor.uFunction,
952 g_IoCtlStopVirtualProcessor.cbInput, g_IoCtlStopVirtualProcessor.cbOutput));
953
954 /* VidMessageSlotHandleAndGetNext */
955 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_MessageSlotHandleAndGetNext;
956 fRet = g_pfnVidMessageSlotHandleAndGetNext(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE,
957 NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, VID_MSHAGN_F_HANDLE_MESSAGE,
958 NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT);
959 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
960 AssertReturn(fRet && g_IoCtlMessageSlotHandleAndGetNext.uFunction != 0,
961 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
962 "Problem figuring out VidMessageSlotHandleAndGetNext: fRet=%u dwErr=%u",
963 fRet, GetLastError()) );
964 LogRel(("NEM: VidMessageSlotHandleAndGetNext -> fun:%#x in:%#x out:%#x\n",
965 g_IoCtlMessageSlotHandleAndGetNext.uFunction, g_IoCtlMessageSlotHandleAndGetNext.cbInput,
966 g_IoCtlMessageSlotHandleAndGetNext.cbOutput));
967
968#ifdef LOG_ENABLED
969 /* The following are only for logging: */
970 union
971 {
972 VID_MAPPED_MESSAGE_SLOT MapSlot;
973 HV_REGISTER_NAME Name;
974 HV_REGISTER_VALUE Value;
975 } uBuf;
976
977 /* VidMessageSlotMap */
978 g_pIoCtlDetectForLogging = &g_IoCtlMessageSlotMap;
979 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
980 fRet = g_pfnVidMessageSlotMap(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &uBuf.MapSlot, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
981 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
982 Assert(fRet);
983 LogRel(("NEM: VidMessageSlotMap -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
984 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
985
986 /* VidGetVirtualProcessorState */
987 uBuf.Name = HvRegisterExplicitSuspend;
988 g_pIoCtlDetectForLogging = &g_IoCtlGetVirtualProcessorState;
989 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
990 fRet = g_pfnVidGetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
991 &uBuf.Name, 1, &uBuf.Value);
992 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
993 Assert(fRet);
994 LogRel(("NEM: VidGetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
995 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
996
997 /* VidSetVirtualProcessorState */
998 uBuf.Name = HvRegisterExplicitSuspend;
999 g_pIoCtlDetectForLogging = &g_IoCtlSetVirtualProcessorState;
1000 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1001 fRet = g_pfnVidSetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1002 &uBuf.Name, 1, &uBuf.Value);
1003 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1004 Assert(fRet);
1005 LogRel(("NEM: VidSetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1006 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1007
1008 g_pIoCtlDetectForLogging = NULL;
1009#endif
1010
1011 /* Done. */
1012 pVM->nem.s.IoCtlGetHvPartitionId = g_IoCtlGetHvPartitionId;
1013 pVM->nem.s.IoCtlStartVirtualProcessor = g_IoCtlStartVirtualProcessor;
1014 pVM->nem.s.IoCtlStopVirtualProcessor = g_IoCtlStopVirtualProcessor;
1015 pVM->nem.s.IoCtlMessageSlotHandleAndGetNext = g_IoCtlMessageSlotHandleAndGetNext;
1016 return VINF_SUCCESS;
1017}
1018
1019
1020/**
1021 * Creates and sets up a Hyper-V (exo) partition.
1022 *
1023 * @returns VBox status code.
1024 * @param pVM The cross context VM structure.
1025 * @param pErrInfo Where to always return error info.
1026 */
1027static int nemR3WinInitCreatePartition(PVM pVM, PRTERRINFO pErrInfo)
1028{
1029 AssertReturn(!pVM->nem.s.hPartition, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order"));
1030 AssertReturn(!pVM->nem.s.hPartitionDevice, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order"));
1031
1032 /*
1033 * Create the partition.
1034 */
1035 WHV_PARTITION_HANDLE hPartition;
1036 HRESULT hrc = WHvCreatePartition(&hPartition);
1037 if (FAILED(hrc))
1038 return RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED, "WHvCreatePartition failed with %Rhrc (Last=%#x/%u)",
1039 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1040
1041 int rc;
1042
1043 /*
1044 * Set partition properties, most importantly the CPU count.
1045 */
1046 /**
1047 * @todo Someone at Microsoft please explain another weird API:
1048 * - Why this API doesn't take the WHV_PARTITION_PROPERTY_CODE value as an
1049 * argument rather than as part of the struct. That is so weird if you've
1050 * used any other NT or windows API, including WHvGetCapability().
1051 * - Why use PVOID when WHV_PARTITION_PROPERTY is what's expected. We
1052 * technically only need 9 bytes for setting/getting
1053 * WHVPartitionPropertyCodeProcessorClFlushSize, but the API insists on 16. */
1054 WHV_PARTITION_PROPERTY Property;
1055 RT_ZERO(Property);
1056 Property.ProcessorCount = pVM->cCpus;
1057 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorCount, &Property, sizeof(Property));
1058 if (SUCCEEDED(hrc))
1059 {
1060 RT_ZERO(Property);
1061 Property.ExtendedVmExits.X64CpuidExit = pVM->nem.s.fExtendedCpuIdExit; /** @todo Register fixed results and restrict cpuid exits */
1062 Property.ExtendedVmExits.X64MsrExit = pVM->nem.s.fExtendedMsrExit;
1063 Property.ExtendedVmExits.ExceptionExit = pVM->nem.s.fExtendedXcptExit;
1064 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExtendedVmExits, &Property, sizeof(Property));
1065 if (SUCCEEDED(hrc))
1066 {
1067 /*
1068 * We'll continue setup in nemR3NativeInitAfterCPUM.
1069 */
1070 pVM->nem.s.fCreatedEmts = false;
1071 pVM->nem.s.hPartition = hPartition;
1072 LogRel(("NEM: Created partition %p.\n", hPartition));
1073 return VINF_SUCCESS;
1074 }
1075
1076 rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED,
1077 "Failed setting WHvPartitionPropertyCodeExtendedVmExits to %'#RX64: %Rhrc",
1078 Property.ExtendedVmExits.AsUINT64, hrc);
1079 }
1080 else
1081 rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED,
1082 "Failed setting WHvPartitionPropertyCodeProcessorCount to %u: %Rhrc (Last=%#x/%u)",
1083 pVM->cCpus, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1084 WHvDeletePartition(hPartition);
1085
1086 Assert(!pVM->nem.s.hPartitionDevice);
1087 Assert(!pVM->nem.s.hPartition);
1088 return rc;
1089}
1090
1091
1092/**
1093 * Makes sure APIC and firmware will not allow X2APIC mode.
1094 *
1095 * This is rather ugly.
1096 *
1097 * @returns VBox status code
1098 * @param pVM The cross context VM structure.
1099 */
1100static int nemR3WinDisableX2Apic(PVM pVM)
1101{
1102 /*
1103 * First make sure the 'Mode' config value of the APIC isn't set to X2APIC.
1104 * This defaults to APIC, so no need to change unless it's X2APIC.
1105 */
1106 PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/apic/0/Config");
1107 if (pCfg)
1108 {
1109 uint8_t bMode = 0;
1110 int rc = CFGMR3QueryU8(pCfg, "Mode", &bMode);
1111 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc);
1112 if (RT_SUCCESS(rc) && bMode == PDMAPICMODE_X2APIC)
1113 {
1114 LogRel(("NEM: Adjusting APIC configuration from X2APIC to APIC max mode. X2APIC is not supported by the WinHvPlatform API!\n"));
1115 LogRel(("NEM: Disable Hyper-V if you need X2APIC for your guests!\n"));
1116 rc = CFGMR3RemoveValue(pCfg, "Mode");
1117 rc = CFGMR3InsertInteger(pCfg, "Mode", PDMAPICMODE_APIC);
1118 AssertLogRelRCReturn(rc, rc);
1119 }
1120 }
1121
1122 /*
1123 * Now the firmwares.
1124 * These also defaults to APIC and only needs adjusting if configured to X2APIC (2).
1125 */
1126 static const char * const s_apszFirmwareConfigs[] =
1127 {
1128 "/Devices/efi/0/Config",
1129 "/Devices/pcbios/0/Config",
1130 };
1131 for (unsigned i = 0; i < RT_ELEMENTS(s_apszFirmwareConfigs); i++)
1132 {
1133 pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/APIC/0/Config");
1134 if (pCfg)
1135 {
1136 uint8_t bMode = 0;
1137 int rc = CFGMR3QueryU8(pCfg, "APIC", &bMode);
1138 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc);
1139 if (RT_SUCCESS(rc) && bMode == 2)
1140 {
1141 LogRel(("NEM: Adjusting %s/Mode from 2 (X2APIC) to 1 (APIC).\n", s_apszFirmwareConfigs[i]));
1142 rc = CFGMR3RemoveValue(pCfg, "APIC");
1143 rc = CFGMR3InsertInteger(pCfg, "APIC", 1);
1144 AssertLogRelRCReturn(rc, rc);
1145 }
1146 }
1147 }
1148
1149 return VINF_SUCCESS;
1150}
1151
1152
1153/**
1154 * Try initialize the native API.
1155 *
1156 * This may only do part of the job, more can be done in
1157 * nemR3NativeInitAfterCPUM() and nemR3NativeInitCompleted().
1158 *
1159 * @returns VBox status code.
1160 * @param pVM The cross context VM structure.
1161 * @param fFallback Whether we're in fallback mode or use-NEM mode. In
1162 * the latter we'll fail if we cannot initialize.
1163 * @param fForced Whether the HMForced flag is set and we should
1164 * fail if we cannot initialize.
1165 */
1166int nemR3NativeInit(PVM pVM, bool fFallback, bool fForced)
1167{
1168 g_uBuildNo = RTSystemGetNtBuildNo();
1169
1170 /*
1171 * Some state init.
1172 */
1173 pVM->nem.s.fA20Enabled = true;
1174 for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++)
1175 {
1176 PNEMCPU pNemCpu = &pVM->aCpus[iCpu].nem.s;
1177 pNemCpu->uPendingApicBase = UINT64_MAX;
1178 }
1179
1180 /*
1181 * Error state.
1182 * The error message will be non-empty on failure and 'rc' will be set too.
1183 */
1184 RTERRINFOSTATIC ErrInfo;
1185 PRTERRINFO pErrInfo = RTErrInfoInitStatic(&ErrInfo);
1186 int rc = nemR3WinInitProbeAndLoad(fForced, pErrInfo);
1187 if (RT_SUCCESS(rc))
1188 {
1189 /*
1190 * Check the capabilties of the hypervisor, starting with whether it's present.
1191 */
1192 rc = nemR3WinInitCheckCapabilities(pVM, pErrInfo);
1193 if (RT_SUCCESS(rc))
1194 {
1195 /*
1196 * Discover the VID I/O control function numbers we need.
1197 */
1198 rc = nemR3WinInitDiscoverIoControlProperties(pVM, pErrInfo);
1199 if (RT_SUCCESS(rc))
1200 {
1201 /*
1202 * Check out our ring-0 capabilities.
1203 */
1204 rc = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_NEM_INIT_VM, 0, NULL);
1205 if (RT_SUCCESS(rc))
1206 {
1207 /*
1208 * Create and initialize a partition.
1209 */
1210 rc = nemR3WinInitCreatePartition(pVM, pErrInfo);
1211 if (RT_SUCCESS(rc))
1212 {
1213 VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_NATIVE_API);
1214 Log(("NEM: Marked active!\n"));
1215 nemR3WinDisableX2Apic(pVM);
1216
1217 /* Register release statistics */
1218 for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++)
1219 {
1220 PNEMCPU pNemCpu = &pVM->aCpus[iCpu].nem.s;
1221 STAMR3RegisterF(pVM, &pNemCpu->StatExitPortIo, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of port I/O exits", "/NEM/CPU%u/ExitPortIo", iCpu);
1222 STAMR3RegisterF(pVM, &pNemCpu->StatExitMemUnmapped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unmapped memory exits", "/NEM/CPU%u/ExitMemUnmapped", iCpu);
1223 STAMR3RegisterF(pVM, &pNemCpu->StatExitMemIntercept, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of intercepted memory exits", "/NEM/CPU%u/ExitMemIntercept", iCpu);
1224 STAMR3RegisterF(pVM, &pNemCpu->StatExitHalt, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of HLT exits", "/NEM/CPU%u/ExitHalt", iCpu);
1225 STAMR3RegisterF(pVM, &pNemCpu->StatExitInterruptWindow, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of HLT exits", "/NEM/CPU%u/ExitInterruptWindow", iCpu);
1226 STAMR3RegisterF(pVM, &pNemCpu->StatExitCpuId, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of CPUID exits", "/NEM/CPU%u/ExitCpuId", iCpu);
1227 STAMR3RegisterF(pVM, &pNemCpu->StatExitMsr, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of MSR access exits", "/NEM/CPU%u/ExitMsr", iCpu);
1228 STAMR3RegisterF(pVM, &pNemCpu->StatExitException, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of exception exits", "/NEM/CPU%u/ExitException", iCpu);
1229 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionBp, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #BP exits", "/NEM/CPU%u/ExitExceptionBp", iCpu);
1230 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionDb, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #DB exits", "/NEM/CPU%u/ExitExceptionDb", iCpu);
1231 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #UD exits", "/NEM/CPU%u/ExitExceptionUd", iCpu);
1232 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUdHandled, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of handled #UD exits", "/NEM/CPU%u/ExitExceptionUdHandled", iCpu);
1233 STAMR3RegisterF(pVM, &pNemCpu->StatExitUnrecoverable, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unrecoverable exits", "/NEM/CPU%u/ExitUnrecoverable", iCpu);
1234 STAMR3RegisterF(pVM, &pNemCpu->StatGetMsgTimeout, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of get message timeouts/alerts", "/NEM/CPU%u/GetMsgTimeout", iCpu);
1235 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuSuccess, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of successful CPU stops", "/NEM/CPU%u/StopCpuSuccess", iCpu);
1236 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPending, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stops", "/NEM/CPU%u/StopCpuPending", iCpu);
1237 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingAlerts,STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stop alerts", "/NEM/CPU%u/StopCpuPendingAlerts", iCpu);
1238 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingOdd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of odd pending CPU stops (see code)", "/NEM/CPU%u/StopCpuPendingOdd", iCpu);
1239 STAMR3RegisterF(pVM, &pNemCpu->StatCancelChangedState, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel changed state", "/NEM/CPU%u/CancelChangedState", iCpu);
1240 STAMR3RegisterF(pVM, &pNemCpu->StatCancelAlertedThread, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel alerted EMT", "/NEM/CPU%u/CancelAlertedEMT", iCpu);
1241 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPre, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pre execution FF breaks", "/NEM/CPU%u/BreakOnFFPre", iCpu);
1242 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPost, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of post execution FF breaks", "/NEM/CPU%u/BreakOnFFPost", iCpu);
1243 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnCancel, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel execution breaks", "/NEM/CPU%u/BreakOnCancel", iCpu);
1244 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnStatus, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of status code breaks", "/NEM/CPU%u/BreakOnStatus", iCpu);
1245 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnDemand, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of on-demand state imports", "/NEM/CPU%u/ImportOnDemand", iCpu);
1246 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturn, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of state imports on loop return", "/NEM/CPU%u/ImportOnReturn", iCpu);
1247 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturnSkipped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of skipped state imports on loop return", "/NEM/CPU%u/ImportOnReturnSkipped", iCpu);
1248 STAMR3RegisterF(pVM, &pNemCpu->StatQueryCpuTick, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of TSC queries", "/NEM/CPU%u/QueryCpuTick", iCpu);
1249 }
1250
1251 PUVM pUVM = pVM->pUVM;
1252 STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesAvailable, STAMTYPE_U64, STAMVISIBILITY_ALWAYS,
1253 STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Free pages available to the hypervisor",
1254 "/NEM/R0Stats/cPagesAvailable");
1255 STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesInUse, STAMTYPE_U64, STAMVISIBILITY_ALWAYS,
1256 STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Pages in use by hypervisor",
1257 "/NEM/R0Stats/cPagesInUse");
1258 }
1259 }
1260 }
1261 }
1262 }
1263
1264 /*
1265 * We only fail if in forced mode, otherwise just log the complaint and return.
1266 */
1267 Assert(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API || RTErrInfoIsSet(pErrInfo));
1268 if ( (fForced || !fFallback)
1269 && pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NATIVE_API)
1270 return VMSetError(pVM, RT_SUCCESS_NP(rc) ? VERR_NEM_NOT_AVAILABLE : rc, RT_SRC_POS, "%s", pErrInfo->pszMsg);
1271
1272 if (RTErrInfoIsSet(pErrInfo))
1273 LogRel(("NEM: Not available: %s\n", pErrInfo->pszMsg));
1274 return VINF_SUCCESS;
1275}
1276
1277
1278/**
1279 * This is called after CPUMR3Init is done.
1280 *
1281 * @returns VBox status code.
1282 * @param pVM The VM handle..
1283 */
1284int nemR3NativeInitAfterCPUM(PVM pVM)
1285{
1286 /*
1287 * Validate sanity.
1288 */
1289 WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition;
1290 AssertReturn(hPartition != NULL, VERR_WRONG_ORDER);
1291 AssertReturn(!pVM->nem.s.hPartitionDevice, VERR_WRONG_ORDER);
1292 AssertReturn(!pVM->nem.s.fCreatedEmts, VERR_WRONG_ORDER);
1293 AssertReturn(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API, VERR_WRONG_ORDER);
1294
1295 /*
1296 * Continue setting up the partition now that we've got most of the CPUID feature stuff.
1297 */
1298 WHV_PARTITION_PROPERTY Property;
1299 HRESULT hrc;
1300
1301#if 0
1302 /* Not sure if we really need to set the vendor.
1303 Update: Apparently we don't. WHvPartitionPropertyCodeProcessorVendor was removed in 17110. */
1304 RT_ZERO(Property);
1305 Property.ProcessorVendor = pVM->nem.s.enmCpuVendor == CPUMCPUVENDOR_AMD ? WHvProcessorVendorAmd
1306 : WHvProcessorVendorIntel;
1307 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorVendor, &Property, sizeof(Property));
1308 if (FAILED(hrc))
1309 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1310 "Failed to set WHvPartitionPropertyCodeProcessorVendor to %u: %Rhrc (Last=%#x/%u)",
1311 Property.ProcessorVendor, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1312#endif
1313
1314 /* Not sure if we really need to set the cache line flush size. */
1315 RT_ZERO(Property);
1316 Property.ProcessorClFlushSize = pVM->nem.s.cCacheLineFlushShift;
1317 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorClFlushSize, &Property, sizeof(Property));
1318 if (FAILED(hrc))
1319 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1320 "Failed to set WHvPartitionPropertyCodeProcessorClFlushSize to %u: %Rhrc (Last=%#x/%u)",
1321 pVM->nem.s.cCacheLineFlushShift, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1322
1323 /* Intercept #DB, #BP and #UD exceptions. */
1324 RT_ZERO(Property);
1325 Property.ExceptionExitBitmap = RT_BIT_64(WHvX64ExceptionTypeDebugTrapOrFault)
1326 | RT_BIT_64(WHvX64ExceptionTypeBreakpointTrap)
1327 | RT_BIT_64(WHvX64ExceptionTypeInvalidOpcodeFault);
1328 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExceptionExitBitmap, &Property, sizeof(Property));
1329 if (FAILED(hrc))
1330 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1331 "Failed to set WHvPartitionPropertyCodeExceptionExitBitmap to %#RX64: %Rhrc (Last=%#x/%u)",
1332 Property.ExceptionExitBitmap, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1333
1334
1335 /*
1336 * Sync CPU features with CPUM.
1337 */
1338 /** @todo sync CPU features with CPUM. */
1339
1340 /* Set the partition property. */
1341 RT_ZERO(Property);
1342 Property.ProcessorFeatures.AsUINT64 = pVM->nem.s.uCpuFeatures.u64;
1343 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorFeatures, &Property, sizeof(Property));
1344 if (FAILED(hrc))
1345 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1346 "Failed to set WHvPartitionPropertyCodeProcessorFeatures to %'#RX64: %Rhrc (Last=%#x/%u)",
1347 pVM->nem.s.uCpuFeatures.u64, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1348
1349 /*
1350 * Set up the partition and create EMTs.
1351 *
1352 * Seems like this is where the partition is actually instantiated and we get
1353 * a handle to it.
1354 */
1355 hrc = WHvSetupPartition(hPartition);
1356 if (FAILED(hrc))
1357 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1358 "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)",
1359 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1360
1361 /* Get the handle. */
1362 HANDLE hPartitionDevice;
1363 __try
1364 {
1365 hPartitionDevice = ((HANDLE *)hPartition)[1];
1366 }
1367 __except(EXCEPTION_EXECUTE_HANDLER)
1368 {
1369 hrc = GetExceptionCode();
1370 hPartitionDevice = NULL;
1371 }
1372 if ( hPartitionDevice == NULL
1373 || hPartitionDevice == (HANDLE)(intptr_t)-1)
1374 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1375 "Failed to get device handle for partition %p: %Rhrc", hPartition, hrc);
1376
1377 HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID;
1378 if (!g_pfnVidGetHvPartitionId(hPartitionDevice, &idHvPartition))
1379 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1380 "Failed to get device handle and/or partition ID for %p (hPartitionDevice=%p, Last=%#x/%u)",
1381 hPartition, hPartitionDevice, RTNtLastStatusValue(), RTNtLastErrorValue());
1382 pVM->nem.s.hPartitionDevice = hPartitionDevice;
1383 pVM->nem.s.idHvPartition = idHvPartition;
1384
1385 /*
1386 * Setup the EMTs.
1387 */
1388 VMCPUID iCpu;
1389 for (iCpu = 0; iCpu < pVM->cCpus; iCpu++)
1390 {
1391 PVMCPU pVCpu = &pVM->aCpus[iCpu];
1392
1393 pVCpu->nem.s.hNativeThreadHandle = (RTR3PTR)RTThreadGetNativeHandle(VMR3GetThreadHandle(pVCpu->pUVCpu));
1394 Assert((HANDLE)pVCpu->nem.s.hNativeThreadHandle != INVALID_HANDLE_VALUE);
1395
1396#ifndef NEM_WIN_USE_OUR_OWN_RUN_API
1397# ifdef NEM_WIN_WITH_RING0_RUNLOOP
1398 if (!pVM->nem.s.fUseRing0Runloop)
1399# endif
1400 {
1401 hrc = WHvCreateVirtualProcessor(hPartition, iCpu, 0 /*fFlags*/);
1402 if (FAILED(hrc))
1403 {
1404 NTSTATUS const rcNtLast = RTNtLastStatusValue();
1405 DWORD const dwErrLast = RTNtLastErrorValue();
1406 while (iCpu-- > 0)
1407 {
1408 HRESULT hrc2 = WHvDeleteVirtualProcessor(hPartition, iCpu);
1409 AssertLogRelMsg(SUCCEEDED(hrc2), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n",
1410 hPartition, iCpu, hrc2, RTNtLastStatusValue(),
1411 RTNtLastErrorValue()));
1412 }
1413 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1414 "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)", hrc, rcNtLast, dwErrLast);
1415 }
1416 }
1417# ifdef NEM_WIN_WITH_RING0_RUNLOOP
1418 else
1419# endif
1420#endif /* !NEM_WIN_USE_OUR_OWN_RUN_API */
1421#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_OUR_OWN_RUN_API)
1422 {
1423 VID_MAPPED_MESSAGE_SLOT MappedMsgSlot = { NULL, UINT32_MAX, UINT32_MAX };
1424 if (g_pfnVidMessageSlotMap(hPartitionDevice, &MappedMsgSlot, iCpu))
1425 {
1426 AssertLogRelMsg(MappedMsgSlot.iCpu == iCpu && MappedMsgSlot.uParentAdvisory == UINT32_MAX,
1427 ("%#x %#x (iCpu=%#x)\n", MappedMsgSlot.iCpu, MappedMsgSlot.uParentAdvisory, iCpu));
1428 pVCpu->nem.s.pvMsgSlotMapping = MappedMsgSlot.pMsgBlock;
1429 }
1430 else
1431 {
1432 NTSTATUS const rcNtLast = RTNtLastStatusValue();
1433 DWORD const dwErrLast = RTNtLastErrorValue();
1434 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1435 "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)", hrc, rcNtLast, dwErrLast);
1436 }
1437 }
1438#endif
1439 }
1440 pVM->nem.s.fCreatedEmts = true;
1441
1442 /*
1443 * Do some more ring-0 initialization now that we've got the partition handle.
1444 */
1445 int rc = VMMR3CallR0Emt(pVM, &pVM->aCpus[0], VMMR0_DO_NEM_INIT_VM_PART_2, 0, NULL);
1446 if (RT_SUCCESS(rc))
1447 {
1448 LogRel(("NEM: Successfully set up partition (device handle %p, partition ID %#llx)\n", hPartitionDevice, idHvPartition));
1449
1450#if 1
1451 VMMR3CallR0Emt(pVM, &pVM->aCpus[0], VMMR0_DO_NEM_UPDATE_STATISTICS, 0, NULL);
1452 LogRel(("NEM: Memory balance: %#RX64 out of %#RX64 pages in use\n",
1453 pVM->nem.s.R0Stats.cPagesInUse, pVM->nem.s.R0Stats.cPagesAvailable));
1454#endif
1455
1456 /*
1457 * Register statistics on shared pages.
1458 */
1459 /** @todo HvCallMapStatsPage */
1460
1461 /*
1462 * Adjust features.
1463 * Note! We've already disabled X2APIC via CFGM during the first init call.
1464 */
1465
1466#if 0 && defined(DEBUG_bird)
1467 /*
1468 * Poke and probe a little.
1469 */
1470 PVMCPU pVCpu = &pVM->aCpus[0];
1471 uint32_t aRegNames[1024];
1472 HV_REGISTER_VALUE aRegValues[1024];
1473 uint32_t aPropCodes[128];
1474 uint64_t aPropValues[128];
1475 for (int iOuter = 0; iOuter < 5; iOuter++)
1476 {
1477 LogRel(("\niOuter %d\n", iOuter));
1478# if 1
1479 /* registers */
1480 uint32_t iRegValue = 0;
1481 uint32_t cRegChanges = 0;
1482 for (uint32_t iReg = 0; iReg < 0x001101ff; iReg++)
1483 {
1484 if (iOuter != 0 && aRegNames[iRegValue] > iReg)
1485 continue;
1486 RT_ZERO(pVCpu->nem.s.Hypercall.Experiment);
1487 pVCpu->nem.s.Hypercall.Experiment.uItem = iReg;
1488 int rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 0, NULL);
1489 AssertLogRelRCBreak(rc2);
1490 if (pVCpu->nem.s.Hypercall.Experiment.fSuccess)
1491 {
1492 LogRel(("Register %#010x = %#18RX64, %#18RX64\n", iReg,
1493 pVCpu->nem.s.Hypercall.Experiment.uLoValue, pVCpu->nem.s.Hypercall.Experiment.uHiValue));
1494 if (iReg == HvX64RegisterTsc)
1495 {
1496 uint64_t uTsc = ASMReadTSC();
1497 LogRel(("TSC = %#18RX64; Delta %#18RX64 or %#18RX64\n",
1498 uTsc, pVCpu->nem.s.Hypercall.Experiment.uLoValue - uTsc, uTsc - pVCpu->nem.s.Hypercall.Experiment.uLoValue));
1499 }
1500
1501 if (iOuter == 0)
1502 aRegNames[iRegValue] = iReg;
1503 else if( aRegValues[iRegValue].Reg128.Low64 != pVCpu->nem.s.Hypercall.Experiment.uLoValue
1504 || aRegValues[iRegValue].Reg128.High64 != pVCpu->nem.s.Hypercall.Experiment.uHiValue)
1505 {
1506 LogRel(("Changed from %#18RX64, %#18RX64 !!\n",
1507 aRegValues[iRegValue].Reg128.Low64, aRegValues[iRegValue].Reg128.High64));
1508 LogRel(("Delta %#18RX64, %#18RX64 !!\n",
1509 pVCpu->nem.s.Hypercall.Experiment.uLoValue - aRegValues[iRegValue].Reg128.Low64,
1510 pVCpu->nem.s.Hypercall.Experiment.uHiValue - aRegValues[iRegValue].Reg128.High64));
1511 cRegChanges++;
1512 }
1513 aRegValues[iRegValue].Reg128.Low64 = pVCpu->nem.s.Hypercall.Experiment.uLoValue;
1514 aRegValues[iRegValue].Reg128.High64 = pVCpu->nem.s.Hypercall.Experiment.uHiValue;
1515 iRegValue++;
1516 AssertBreak(iRegValue < RT_ELEMENTS(aRegValues));
1517 }
1518 }
1519 LogRel(("Found %u registers, %u changed\n", iRegValue, cRegChanges));
1520# endif
1521# if 1
1522 /* partition properties */
1523 uint32_t iPropValue = 0;
1524 uint32_t cPropChanges = 0;
1525 for (uint32_t iProp = 0; iProp < 0xc11ff; iProp++)
1526 {
1527 if (iProp == HvPartitionPropertyDebugChannelId /* hangs host */)
1528 continue;
1529 if (iOuter != 0 && aPropCodes[iPropValue] > iProp)
1530 continue;
1531 RT_ZERO(pVCpu->nem.s.Hypercall.Experiment);
1532 pVCpu->nem.s.Hypercall.Experiment.uItem = iProp;
1533 int rc2 = VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 1, NULL);
1534 AssertLogRelRCBreak(rc2);
1535 if (pVCpu->nem.s.Hypercall.Experiment.fSuccess)
1536 {
1537 LogRel(("Property %#010x = %#18RX64\n", iProp, pVCpu->nem.s.Hypercall.Experiment.uLoValue));
1538 if (iOuter == 0)
1539 aPropCodes[iPropValue] = iProp;
1540 else if (aPropValues[iPropValue] != pVCpu->nem.s.Hypercall.Experiment.uLoValue)
1541 {
1542 LogRel(("Changed from %#18RX64, delta %#18RX64!!\n",
1543 aPropValues[iPropValue], pVCpu->nem.s.Hypercall.Experiment.uLoValue - aPropValues[iPropValue]));
1544 cRegChanges++;
1545 }
1546 aPropValues[iPropValue] = pVCpu->nem.s.Hypercall.Experiment.uLoValue;
1547 iPropValue++;
1548 AssertBreak(iPropValue < RT_ELEMENTS(aPropValues));
1549 }
1550 }
1551 LogRel(("Found %u properties, %u changed\n", iPropValue, cPropChanges));
1552# endif
1553
1554 /* Modify the TSC register value and see what changes. */
1555 if (iOuter != 0)
1556 {
1557 RT_ZERO(pVCpu->nem.s.Hypercall.Experiment);
1558 pVCpu->nem.s.Hypercall.Experiment.uItem = HvX64RegisterTsc;
1559 pVCpu->nem.s.Hypercall.Experiment.uHiValue = UINT64_C(0x00000fffffffffff) >> iOuter;
1560 pVCpu->nem.s.Hypercall.Experiment.uLoValue = UINT64_C(0x0011100000000000) << iOuter;
1561 VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 2, NULL);
1562 LogRel(("Setting HvX64RegisterTsc -> %RTbool (%#RX64)\n", pVCpu->nem.s.Hypercall.Experiment.fSuccess, pVCpu->nem.s.Hypercall.Experiment.uStatus));
1563 }
1564
1565 RT_ZERO(pVCpu->nem.s.Hypercall.Experiment);
1566 pVCpu->nem.s.Hypercall.Experiment.uItem = HvX64RegisterTsc;
1567 VMMR3CallR0Emt(pVM, pVCpu, VMMR0_DO_NEM_EXPERIMENT, 0, NULL);
1568 LogRel(("HvX64RegisterTsc = %#RX64, %#RX64\n", pVCpu->nem.s.Hypercall.Experiment.uLoValue, pVCpu->nem.s.Hypercall.Experiment.uHiValue));
1569 }
1570
1571#endif
1572 return VINF_SUCCESS;
1573 }
1574 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, "Call to NEMR0InitVMPart2 failed: %Rrc", rc);
1575}
1576
1577
1578int nemR3NativeInitCompleted(PVM pVM, VMINITCOMPLETED enmWhat)
1579{
1580 //BOOL fRet = SetThreadPriority(GetCurrentThread(), 0);
1581 //AssertLogRel(fRet);
1582
1583 NOREF(pVM); NOREF(enmWhat);
1584 return VINF_SUCCESS;
1585}
1586
1587
1588int nemR3NativeTerm(PVM pVM)
1589{
1590 /*
1591 * Delete the partition.
1592 */
1593 WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition;
1594 pVM->nem.s.hPartition = NULL;
1595 pVM->nem.s.hPartitionDevice = NULL;
1596 if (hPartition != NULL)
1597 {
1598 VMCPUID iCpu = pVM->nem.s.fCreatedEmts ? pVM->cCpus : 0;
1599 LogRel(("NEM: Destroying partition %p with its %u VCpus...\n", hPartition, iCpu));
1600 while (iCpu-- > 0)
1601 {
1602 pVM->aCpus[iCpu].nem.s.pvMsgSlotMapping = NULL;
1603#ifndef NEM_WIN_USE_OUR_OWN_RUN_API
1604# ifdef NEM_WIN_WITH_RING0_RUNLOOP
1605 if (!pVM->nem.s.fUseRing0Runloop)
1606# endif
1607 {
1608 HRESULT hrc = WHvDeleteVirtualProcessor(hPartition, iCpu);
1609 AssertLogRelMsg(SUCCEEDED(hrc), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n",
1610 hPartition, iCpu, hrc, RTNtLastStatusValue(),
1611 RTNtLastErrorValue()));
1612 }
1613#endif
1614 }
1615 WHvDeletePartition(hPartition);
1616 }
1617 pVM->nem.s.fCreatedEmts = false;
1618 return VINF_SUCCESS;
1619}
1620
1621
1622/**
1623 * VM reset notification.
1624 *
1625 * @param pVM The cross context VM structure.
1626 */
1627void nemR3NativeReset(PVM pVM)
1628{
1629 /* Unfix the A20 gate. */
1630 pVM->nem.s.fA20Fixed = false;
1631}
1632
1633
1634/**
1635 * Reset CPU due to INIT IPI or hot (un)plugging.
1636 *
1637 * @param pVCpu The cross context virtual CPU structure of the CPU being
1638 * reset.
1639 * @param fInitIpi Whether this is the INIT IPI or hot (un)plugging case.
1640 */
1641void nemR3NativeResetCpu(PVMCPU pVCpu, bool fInitIpi)
1642{
1643 /* Lock the A20 gate if INIT IPI, make sure it's enabled. */
1644 if (fInitIpi && pVCpu->idCpu > 0)
1645 {
1646 PVM pVM = pVCpu->CTX_SUFF(pVM);
1647 if (!pVM->nem.s.fA20Enabled)
1648 nemR3NativeNotifySetA20(pVCpu, true);
1649 pVM->nem.s.fA20Enabled = true;
1650 pVM->nem.s.fA20Fixed = true;
1651 }
1652}
1653
1654
1655VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu)
1656{
1657#ifdef NEM_WIN_WITH_RING0_RUNLOOP
1658 if (pVM->nem.s.fUseRing0Runloop)
1659 {
1660 for (;;)
1661 {
1662 VBOXSTRICTRC rcStrict = VMMR3CallR0EmtFast(pVM, pVCpu, VMMR0_DO_NEM_RUN);
1663 if (RT_SUCCESS(rcStrict))
1664 {
1665 /*
1666 * We deal with VINF_NEM_CHANGE_PGM_MODE, VINF_NEM_FLUSH_TLB and
1667 * VINF_NEM_UPDATE_APIC_BASE here, since we're running the risk of
1668 * getting these while we already got another RC (I/O ports).
1669 *
1670 * The APIC base update and a PGM update can happen at the same time, so
1671 * we don't depend on the status code for that and always checks it first.
1672 */
1673 /* APIC base: */
1674 if (pVCpu->nem.s.uPendingApicBase != UINT64_MAX)
1675 {
1676 LogFlow(("nemR3NativeRunGC: calling APICSetBaseMsr(,%RX64)...\n", pVCpu->nem.s.uPendingApicBase));
1677 VBOXSTRICTRC rc2 = APICSetBaseMsr(pVCpu, pVCpu->nem.s.uPendingApicBase);
1678 AssertLogRelMsg(rc2 == VINF_SUCCESS, ("rc2=%Rrc [%#RX64]\n", VBOXSTRICTRC_VAL(rc2), pVCpu->nem.s.uPendingApicBase));
1679 pVCpu->nem.s.uPendingApicBase = UINT64_MAX;
1680 }
1681
1682 /* Status codes: */
1683 VBOXSTRICTRC rcPending = pVCpu->nem.s.rcPending;
1684 pVCpu->nem.s.rcPending = VINF_SUCCESS;
1685 if ( rcStrict == VINF_NEM_CHANGE_PGM_MODE
1686 || rcStrict == VINF_PGM_CHANGE_MODE
1687 || rcPending == VINF_NEM_CHANGE_PGM_MODE )
1688 {
1689 LogFlow(("nemR3NativeRunGC: calling PGMChangeMode...\n"));
1690 int rc = PGMChangeMode(pVCpu, CPUMGetGuestCR0(pVCpu), CPUMGetGuestCR4(pVCpu), CPUMGetGuestEFER(pVCpu));
1691 AssertRCReturn(rc, rc);
1692 if ( rcStrict == VINF_NEM_CHANGE_PGM_MODE
1693 || rcStrict == VINF_PGM_CHANGE_MODE
1694 || rcStrict == VINF_NEM_FLUSH_TLB)
1695 {
1696 if ( !VM_FF_IS_PENDING(pVM, VM_FF_HIGH_PRIORITY_POST_MASK | VM_FF_HP_R0_PRE_HM_MASK)
1697 && !VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_HIGH_PRIORITY_POST_MASK | VMCPU_FF_HP_R0_PRE_HM_MASK)
1698 & ~VMCPU_FF_RESUME_GUEST_MASK))
1699 {
1700 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_RESUME_GUEST_MASK);
1701 continue;
1702 }
1703 rcStrict = VINF_SUCCESS;
1704 }
1705 }
1706 else if (rcStrict == VINF_NEM_FLUSH_TLB || rcPending == VINF_NEM_FLUSH_TLB)
1707 {
1708 LogFlow(("nemR3NativeRunGC: calling PGMFlushTLB...\n"));
1709 int rc = PGMFlushTLB(pVCpu, CPUMGetGuestCR3(pVCpu), true);
1710 AssertRCReturn(rc, rc);
1711 if (rcStrict == VINF_NEM_FLUSH_TLB || rcStrict == VINF_NEM_CHANGE_PGM_MODE)
1712 {
1713 if ( !VM_FF_IS_PENDING(pVM, VM_FF_HIGH_PRIORITY_POST_MASK | VM_FF_HP_R0_PRE_HM_MASK)
1714 && !VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_HIGH_PRIORITY_POST_MASK | VMCPU_FF_HP_R0_PRE_HM_MASK)
1715 & ~VMCPU_FF_RESUME_GUEST_MASK))
1716 {
1717 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_RESUME_GUEST_MASK);
1718 continue;
1719 }
1720 rcStrict = VINF_SUCCESS;
1721 }
1722 }
1723 else if (rcStrict == VINF_NEM_UPDATE_APIC_BASE || rcPending == VERR_NEM_UPDATE_APIC_BASE)
1724 continue;
1725 else
1726 AssertMsg(rcPending == VINF_SUCCESS, ("rcPending=%Rrc\n", VBOXSTRICTRC_VAL(rcPending) ));
1727 }
1728 LogFlow(("nemR3NativeRunGC: returns %Rrc\n", VBOXSTRICTRC_VAL(rcStrict) ));
1729 return rcStrict;
1730 }
1731 }
1732#endif
1733 return nemHCWinRunGC(pVM, pVCpu, NULL /*pGVM*/, NULL /*pGVCpu*/);
1734}
1735
1736
1737bool nemR3NativeCanExecuteGuest(PVM pVM, PVMCPU pVCpu)
1738{
1739 NOREF(pVM); NOREF(pVCpu);
1740 return true;
1741}
1742
1743
1744bool nemR3NativeSetSingleInstruction(PVM pVM, PVMCPU pVCpu, bool fEnable)
1745{
1746 NOREF(pVM); NOREF(pVCpu); NOREF(fEnable);
1747 return false;
1748}
1749
1750
1751/**
1752 * Forced flag notification call from VMEmt.h.
1753 *
1754 * This is only called when pVCpu is in the VMCPUSTATE_STARTED_EXEC_NEM state.
1755 *
1756 * @param pVM The cross context VM structure.
1757 * @param pVCpu The cross context virtual CPU structure of the CPU
1758 * to be notified.
1759 * @param fFlags Notification flags, VMNOTIFYFF_FLAGS_XXX.
1760 */
1761void nemR3NativeNotifyFF(PVM pVM, PVMCPU pVCpu, uint32_t fFlags)
1762{
1763#ifdef NEM_WIN_USE_OUR_OWN_RUN_API
1764 nemHCWinCancelRunVirtualProcessor(pVM, pVCpu);
1765#else
1766# ifdef NEM_WIN_WITH_RING0_RUNLOOP
1767 if (pVM->nem.s.fUseRing0Runloop)
1768 nemHCWinCancelRunVirtualProcessor(pVM, pVCpu);
1769 else
1770# endif
1771 {
1772 Log8(("nemR3NativeNotifyFF: canceling %u\n", pVCpu->idCpu));
1773 HRESULT hrc = WHvCancelRunVirtualProcessor(pVM->nem.s.hPartition, pVCpu->idCpu, 0);
1774 AssertMsg(SUCCEEDED(hrc), ("WHvCancelRunVirtualProcessor -> hrc=%Rhrc\n", hrc));
1775 RT_NOREF_PV(hrc);
1776 }
1777#endif
1778 RT_NOREF_PV(fFlags);
1779}
1780
1781
1782DECLINLINE(int) nemR3NativeGCPhys2R3PtrReadOnly(PVM pVM, RTGCPHYS GCPhys, const void **ppv)
1783{
1784 PGMPAGEMAPLOCK Lock;
1785 int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhys, ppv, &Lock);
1786 if (RT_SUCCESS(rc))
1787 PGMPhysReleasePageMappingLock(pVM, &Lock);
1788 return rc;
1789}
1790
1791
1792DECLINLINE(int) nemR3NativeGCPhys2R3PtrWriteable(PVM pVM, RTGCPHYS GCPhys, void **ppv)
1793{
1794 PGMPAGEMAPLOCK Lock;
1795 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhys, ppv, &Lock);
1796 if (RT_SUCCESS(rc))
1797 PGMPhysReleasePageMappingLock(pVM, &Lock);
1798 return rc;
1799}
1800
1801
1802int nemR3NativeNotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb)
1803{
1804 Log5(("nemR3NativeNotifyPhysRamRegister: %RGp LB %RGp\n", GCPhys, cb));
1805 NOREF(pVM); NOREF(GCPhys); NOREF(cb);
1806 return VINF_SUCCESS;
1807}
1808
1809
1810int nemR3NativeNotifyPhysMmioExMap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvMmio2)
1811{
1812 Log5(("nemR3NativeNotifyPhysMmioExMap: %RGp LB %RGp fFlags=%#x pvMmio2=%p\n", GCPhys, cb, fFlags, pvMmio2));
1813 NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags); NOREF(pvMmio2);
1814 return VINF_SUCCESS;
1815}
1816
1817
1818int nemR3NativeNotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags)
1819{
1820 Log5(("nemR3NativeNotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x\n", GCPhys, cb, fFlags));
1821 NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags);
1822 return VINF_SUCCESS;
1823}
1824
1825
1826/**
1827 * Called early during ROM registration, right after the pages have been
1828 * allocated and the RAM range updated.
1829 *
1830 * This will be succeeded by a number of NEMHCNotifyPhysPageProtChanged() calls
1831 * and finally a NEMR3NotifyPhysRomRegisterEarly().
1832 *
1833 * @returns VBox status code
1834 * @param pVM The cross context VM structure.
1835 * @param GCPhys The ROM address (page aligned).
1836 * @param cb The size (page aligned).
1837 * @param fFlags NEM_NOTIFY_PHYS_ROM_F_XXX.
1838 */
1839int nemR3NativeNotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags)
1840{
1841 Log5(("nemR3NativeNotifyPhysRomRegisterEarly: %RGp LB %RGp fFlags=%#x\n", GCPhys, cb, fFlags));
1842#if 0 /* Let's not do this after all. We'll protection change notifications for each page and if not we'll map them lazily. */
1843 RTGCPHYS const cPages = cb >> X86_PAGE_SHIFT;
1844 for (RTGCPHYS iPage = 0; iPage < cPages; iPage++, GCPhys += X86_PAGE_SIZE)
1845 {
1846 const void *pvPage;
1847 int rc = nemR3NativeGCPhys2R3PtrReadOnly(pVM, GCPhys, &pvPage);
1848 if (RT_SUCCESS(rc))
1849 {
1850 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, (void *)pvPage, GCPhys, X86_PAGE_SIZE,
1851 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagExecute);
1852 if (SUCCEEDED(hrc))
1853 { /* likely */ }
1854 else
1855 {
1856 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n",
1857 GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1858 return VERR_NEM_INIT_FAILED;
1859 }
1860 }
1861 else
1862 {
1863 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp rc=%Rrc\n", GCPhys, rc));
1864 return rc;
1865 }
1866 }
1867#else
1868 NOREF(pVM); NOREF(GCPhys); NOREF(cb);
1869#endif
1870 RT_NOREF_PV(fFlags);
1871 return VINF_SUCCESS;
1872}
1873
1874
1875/**
1876 * Called after the ROM range has been fully completed.
1877 *
1878 * This will be preceeded by a NEMR3NotifyPhysRomRegisterEarly() call as well a
1879 * number of NEMHCNotifyPhysPageProtChanged calls.
1880 *
1881 * @returns VBox status code
1882 * @param pVM The cross context VM structure.
1883 * @param GCPhys The ROM address (page aligned).
1884 * @param cb The size (page aligned).
1885 * @param fFlags NEM_NOTIFY_PHYS_ROM_F_XXX.
1886 */
1887int nemR3NativeNotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags)
1888{
1889 Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp fFlags=%#x\n", GCPhys, cb, fFlags));
1890 NOREF(pVM); NOREF(GCPhys); NOREF(cb); NOREF(fFlags);
1891 return VINF_SUCCESS;
1892}
1893
1894
1895/**
1896 * @callback_method_impl{FNPGMPHYSNEMCHECKPAGE}
1897 */
1898static DECLCALLBACK(int) nemR3WinUnsetForA20CheckerCallback(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys,
1899 PPGMPHYSNEMPAGEINFO pInfo, void *pvUser)
1900{
1901 /* We'll just unmap the memory. */
1902 if (pInfo->u2NemState > NEM_WIN_PAGE_STATE_UNMAPPED)
1903 {
1904#ifdef NEM_WIN_USE_HYPERCALLS_FOR_PAGES
1905 int rc = nemHCWinHypercallUnmapPage(pVM, pVCpu, GCPhys);
1906 AssertRC(rc);
1907 if (RT_SUCCESS(rc))
1908#else
1909 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, X86_PAGE_SIZE);
1910 if (SUCCEEDED(hrc))
1911#endif
1912 {
1913 uint32_t cMappedPages = ASMAtomicDecU32(&pVM->nem.s.cMappedPages); NOREF(cMappedPages);
1914 Log5(("NEM GPA unmapped/A20: %RGp (was %s, cMappedPages=%u)\n", GCPhys, g_apszPageStates[pInfo->u2NemState], cMappedPages));
1915 pInfo->u2NemState = NEM_WIN_PAGE_STATE_UNMAPPED;
1916 }
1917 else
1918 {
1919#ifdef NEM_WIN_USE_HYPERCALLS_FOR_PAGES
1920 LogRel(("nemR3WinUnsetForA20CheckerCallback/unmap: GCPhys=%RGp rc=%Rrc\n", GCPhys, rc));
1921 return rc;
1922#else
1923 LogRel(("nemR3WinUnsetForA20CheckerCallback/unmap: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n",
1924 GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1925 return VERR_INTERNAL_ERROR_2;
1926#endif
1927 }
1928 }
1929 RT_NOREF(pVCpu, pvUser);
1930 return VINF_SUCCESS;
1931}
1932
1933
1934/**
1935 * Unmaps a page from Hyper-V for the purpose of emulating A20 gate behavior.
1936 *
1937 * @returns The PGMPhysNemQueryPageInfo result.
1938 * @param pVM The cross context VM structure.
1939 * @param pVCpu The cross context virtual CPU structure.
1940 * @param GCPhys The page to unmap.
1941 */
1942static int nemR3WinUnmapPageForA20Gate(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
1943{
1944 PGMPHYSNEMPAGEINFO Info;
1945 return PGMPhysNemPageInfoChecker(pVM, pVCpu, GCPhys, false /*fMakeWritable*/, &Info,
1946 nemR3WinUnsetForA20CheckerCallback, NULL);
1947}
1948
1949
1950/**
1951 * Called when the A20 state changes.
1952 *
1953 * Hyper-V doesn't seem to offer a simple way of implementing the A20 line
1954 * features of PCs. So, we do a very minimal emulation of the HMA to make DOS
1955 * happy.
1956 *
1957 * @param pVCpu The CPU the A20 state changed on.
1958 * @param fEnabled Whether it was enabled (true) or disabled.
1959 */
1960void nemR3NativeNotifySetA20(PVMCPU pVCpu, bool fEnabled)
1961{
1962 Log(("nemR3NativeNotifySetA20: fEnabled=%RTbool\n", fEnabled));
1963 PVM pVM = pVCpu->CTX_SUFF(pVM);
1964 if (!pVM->nem.s.fA20Fixed)
1965 {
1966 pVM->nem.s.fA20Enabled = fEnabled;
1967 for (RTGCPHYS GCPhys = _1M; GCPhys < _1M + _64K; GCPhys += X86_PAGE_SIZE)
1968 nemR3WinUnmapPageForA20Gate(pVM, pVCpu, GCPhys);
1969 }
1970}
1971
1972
1973/** @page pg_nem_win NEM/win - Native Execution Manager, Windows.
1974 *
1975 * On Windows the Hyper-V root partition (dom0 in zen terminology) does not have
1976 * nested VT-x or AMD-V capabilities. For a while raw-mode worked inside it,
1977 * but for a while now we've been getting \#GP when trying to modify CR4 in the
1978 * world switcher. So, when Hyper-V is active on Windows we have little choice
1979 * but to use Hyper-V to run our VMs.
1980 *
1981 *
1982 * @section sub_nem_win_whv The WinHvPlatform API
1983 *
1984 * Since Windows 10 build 17083 there is a documented API for managing Hyper-V
1985 * VMs, header file WinHvPlatform.h and implementation in WinHvPlatform.dll.
1986 * This interface is a wrapper around the undocumented Virtualization
1987 * Infrastructure Driver (VID) API - VID.DLL and VID.SYS. The wrapper is
1988 * written in C++, namespaced, early versions (at least) was using standard C++
1989 * container templates in several places.
1990 *
1991 * When creating a VM using WHvCreatePartition, it will only create the
1992 * WinHvPlatform structures for it, to which you get an abstract pointer. The
1993 * VID API that actually creates the partition is first engaged when you call
1994 * WHvSetupPartition after first setting a lot of properties using
1995 * WHvSetPartitionProperty. Since the VID API is just a very thin wrapper
1996 * around CreateFile and NtDeviceIoControlFile, it returns an actual HANDLE for
1997 * the partition WinHvPlatform. We fish this HANDLE out of the WinHvPlatform
1998 * partition structures because we need to talk directly to VID for reasons
1999 * we'll get to in a bit. (Btw. we could also intercept the CreateFileW or
2000 * NtDeviceIoControlFile calls from VID.DLL to get the HANDLE should fishing in
2001 * the partition structures become difficult.)
2002 *
2003 * The WinHvPlatform API requires us to both set the number of guest CPUs before
2004 * setting up the partition and call WHvCreateVirtualProcessor for each of them.
2005 * The CPU creation function boils down to a VidMessageSlotMap call that sets up
2006 * and maps a message buffer into ring-3 for async communication with hyper-V
2007 * and/or the VID.SYS thread actually running the CPU thru
2008 * WinHvRunVpDispatchLoop(). When for instance a VMEXIT is encountered, hyper-V
2009 * sends a message that the WHvRunVirtualProcessor API retrieves (and later
2010 * acknowledges) via VidMessageSlotHandleAndGetNext. It should be noteded that
2011 * WHvDeleteVirtualProcessor doesn't do much as there seems to be no partner
2012 * function VidMessagesSlotMap that reverses what it did.
2013 *
2014 * Memory is managed thru calls to WHvMapGpaRange and WHvUnmapGpaRange (GPA does
2015 * not mean grade point average here, but rather guest physical addressspace),
2016 * which corresponds to VidCreateVaGpaRangeSpecifyUserVa and VidDestroyGpaRange
2017 * respectively. As 'UserVa' indicates, the functions works on user process
2018 * memory. The mappings are also subject to quota restrictions, so the number
2019 * of ranges are limited and probably their total size as well. Obviously
2020 * VID.SYS keeps track of the ranges, but so does WinHvPlatform, which means
2021 * there is a bit of overhead involved and quota restrctions makes sense. For
2022 * some reason though, regions are lazily mapped on VMEXIT/memory by
2023 * WHvRunVirtualProcessor.
2024 *
2025 * Running guest code is done thru the WHvRunVirtualProcessor function. It
2026 * asynchronously starts or resumes hyper-V CPU execution and then waits for an
2027 * VMEXIT message. Hyper-V / VID.SYS will return information about the message
2028 * in the message buffer mapping, and WHvRunVirtualProcessor will convert that
2029 * finto it's own WHV_RUN_VP_EXIT_CONTEXT format.
2030 *
2031 * Other threads can interrupt the execution by using WHvCancelVirtualProcessor,
2032 * which which case the thread in WHvRunVirtualProcessor is woken up via a dummy
2033 * QueueUserAPC and will call VidStopVirtualProcessor to asynchronously end
2034 * execution. The stop CPU call not immediately succeed if the CPU encountered
2035 * a VMEXIT before the stop was processed, in which case the VMEXIT needs to be
2036 * processed first, and the pending stop will be processed in a subsequent call
2037 * to WHvRunVirtualProcessor.
2038 *
2039 * Registers are retrieved and set via WHvGetVirtualProcessorRegisters and
2040 * WHvSetVirtualProcessorRegisters. In addition, several VMEXITs include
2041 * essential register state in the exit context information, potentially making
2042 * it possible to emulate the instruction causing the exit without involving
2043 * WHvGetVirtualProcessorRegisters.
2044 *
2045 *
2046 * @subsection subsec_nem_win_whv_cons Issues & Feedback
2047 *
2048 * Here are some observations (mostly against build 17101):
2049 *
2050 * - The VMEXIT performance is dismal (build 17134).
2051 *
2052 * Our proof of concept implementation with a kernel runloop (i.e. not using
2053 * WHvRunVirtualProcessor and friends, but calling VID.SYS fast I/O control
2054 * entry point directly) delivers 9-10% of the port I/O performance and only
2055 * 6-7% of the MMIO performance that we have with our own hypervisor.
2056 *
2057 * When using the offical WinHvPlatform API, the numbers are %3 for port I/O
2058 * and 5% for MMIO.
2059 *
2060 * While the tests we've done are using tight tight loops only doing port I/O
2061 * and MMIO, the problem is clearly visible when running regular guest OSes.
2062 * Anything that hammers the VGA device would be suffering, for example:
2063 *
2064 * - Windows 2000 boot screen animation overloads us with MMIO exits
2065 * and won't even boot because all the time is spent in interrupt
2066 * handlers and redrawin the screen.
2067 *
2068 * - DSL 4.4 and its bootmenu logo is slower than molasses in january.
2069 *
2070 * We have not found a workaround for this yet.
2071 *
2072 * Something that might improve the issue a little is to detect blocks with
2073 * excessive MMIO and port I/O exits and emulate instructions to cover
2074 * multiple exits before letting Hyper-V have a go at the guest execution
2075 * again. This will only improve the situation under some circumstances,
2076 * since emulating instructions without recompilation can be expensive, so
2077 * there will only be real gains if the exitting instructions are tightly
2078 * packed.
2079 *
2080 *
2081 * - We need a way to directly modify the TSC offset (or bias if you like).
2082 *
2083 * The current approach of setting the WHvX64RegisterTsc register one by one
2084 * on each virtual CPU in sequence will introduce random inaccuracies,
2085 * especially if the thread doing the job is reschduled at a bad time.
2086 *
2087 *
2088 * - Unable to access WHvX64RegisterMsrMtrrCap (build 17134).
2089 *
2090 *
2091 * - On AMD Ryzen grub/debian 9.0 ends up with a unrecoverable exception
2092 * when IA32_MTRR_PHYSMASK0 is written.
2093 *
2094 *
2095 * - The IA32_APIC_BASE register does not work right:
2096 *
2097 * - Attempts by the guest to clear bit 11 (EN) are ignored, both the
2098 * guest and the VMM reads back the old value.
2099 *
2100 * - Attempts to modify the base address (bits NN:12) seems to be ignored
2101 * in the same way.
2102 *
2103 * - The VMM can modify both the base address as well as the the EN and
2104 * BSP bits, however this is useless if we cannot intercept the WRMSR.
2105 *
2106 * - Attempts by the guest to set the EXTD bit (X2APIC) result in \#GP(0),
2107 * while the VMM ends up with with ERROR_HV_INVALID_PARAMETER. Seems
2108 * there is no way to support X2APIC.
2109 *
2110 *
2111 * - The WHvCancelVirtualProcessor API schedules a dummy usermode APC callback
2112 * in order to cancel any current or future alertable wait in VID.SYS during
2113 * the VidMessageSlotHandleAndGetNext call.
2114 *
2115 * IIRC this will make the kernel schedule the specified callback thru
2116 * NTDLL!KiUserApcDispatcher by modifying the thread context and quite
2117 * possibly the userland thread stack. When the APC callback returns to
2118 * KiUserApcDispatcher, it will call NtContinue to restore the old thread
2119 * context and resume execution from there. This naturally adds up to some
2120 * CPU cycles, ring transitions aren't for free, especially after Spectre &
2121 * Meltdown mitigations.
2122 *
2123 * Using NtAltertThread call could do the same without the thread context
2124 * modifications and the extra kernel call.
2125 *
2126 *
2127 * - Not sure if this is a thing, but WHvCancelVirtualProcessor seems to cause
2128 * cause a lot more spurious WHvRunVirtualProcessor returns that what we get
2129 * with the replacement code. By spurious returns we mean that the
2130 * subsequent call to WHvRunVirtualProcessor would return immediately.
2131 *
2132 *
2133 * - When WHvRunVirtualProcessor returns without a message, or on a terse
2134 * VID message like HLT, it will make a kernel call to get some registers.
2135 * This is potentially inefficient if the caller decides he needs more
2136 * register state.
2137 *
2138 * It would be better to just return what's available and let the caller fetch
2139 * what is missing from his point of view in a single kernel call.
2140 *
2141 *
2142 * - The WHvRunVirtualProcessor implementation does lazy GPA range mappings when
2143 * a unmapped GPA message is received from hyper-V.
2144 *
2145 * Since MMIO is currently realized as unmapped GPA, this will slow down all
2146 * MMIO accesses a tiny little bit as WHvRunVirtualProcessor looks up the
2147 * guest physical address to check if it is a pending lazy mapping.
2148 *
2149 * The lazy mapping feature makes no sense to us. We as API user have all the
2150 * information and can do lazy mapping ourselves if we want/have to (see next
2151 * point).
2152 *
2153 *
2154 * - There is no API for modifying protection of a page within a GPA range.
2155 *
2156 * From what we can tell, the only way to modify the protection (like readonly
2157 * -> writable, or vice versa) is to first unmap the range and then remap it
2158 * with the new protection.
2159 *
2160 * We are for instance doing this quite a bit in order to track dirty VRAM
2161 * pages. VRAM pages starts out as readonly, when the guest writes to a page
2162 * we take an exit, notes down which page it is, makes it writable and restart
2163 * the instruction. After refreshing the display, we reset all the writable
2164 * pages to readonly again, bulk fashion.
2165 *
2166 * Now to work around this issue, we do page sized GPA ranges. In addition to
2167 * add a lot of tracking overhead to WinHvPlatform and VID.SYS, this also
2168 * causes us to exceed our quota before we've even mapped a default sized
2169 * (128MB) VRAM page-by-page. So, to work around this quota issue we have to
2170 * lazily map pages and actively restrict the number of mappings.
2171 *
2172 * Our best workaround thus far is bypassing WinHvPlatform and VID entirely
2173 * when in comes to guest memory management and instead use the underlying
2174 * hypercalls (HvCallMapGpaPages, HvCallUnmapGpaPages) to do it ourselves.
2175 * (This also maps a whole lot better into our own guest page management
2176 * infrastructure.)
2177 *
2178 *
2179 * - Observed problems doing WHvUnmapGpaRange immediately followed by
2180 * WHvMapGpaRange.
2181 *
2182 * As mentioned above, we've been forced to use this sequence when modifying
2183 * page protection. However, when transitioning from readonly to writable,
2184 * we've ended up looping forever with the same write to readonly memory
2185 * VMEXIT. We're wondering if this issue might be related to the lazy mapping
2186 * logic in WinHvPlatform.
2187 *
2188 * Workaround: Insert a WHvRunVirtualProcessor call and make sure to get a GPA
2189 * unmapped exit between the two calls. Not entirely great performance wise
2190 * (or the santity of our code).
2191 *
2192 *
2193 * - Implementing A20 gate behavior is tedious, where as correctly emulating the
2194 * A20M# pin (present on 486 and later) is near impossible for SMP setups
2195 * (e.g. possiblity of two CPUs with different A20 status).
2196 *
2197 * Workaround: Only do A20 on CPU 0, restricting the emulation to HMA. We
2198 * unmap all pages related to HMA (0x100000..0x10ffff) when the A20 state
2199 * changes, lazily syncing the right pages back when accessed.
2200 *
2201 *
2202 * - WHVRunVirtualProcessor wastes time converting VID/Hyper-V messages to its
2203 * own format (WHV_RUN_VP_EXIT_CONTEXT).
2204 *
2205 * We understand this might be because Microsoft wishes to remain free to
2206 * modify the VID/Hyper-V messages, but it's still rather silly and does slow
2207 * things down a little. We'd much rather just process the messages directly.
2208 *
2209 *
2210 * - WHVRunVirtualProcessor would've benefited from using a callback interface:
2211 *
2212 * - The potential size changes of the exit context structure wouldn't be
2213 * an issue, since the function could manage that itself.
2214 *
2215 * - State handling could probably be simplified (like cancelation).
2216 *
2217 *
2218 * - WHvGetVirtualProcessorRegisters and WHvSetVirtualProcessorRegisters
2219 * internally converts register names, probably using temporary heap buffers.
2220 *
2221 * From the looks of things, they are converting from WHV_REGISTER_NAME to
2222 * HV_REGISTER_NAME from in the "Virtual Processor Register Names" section in
2223 * the "Hypervisor Top-Level Functional Specification" document. This feels
2224 * like an awful waste of time.
2225 *
2226 * We simply cannot understand why HV_REGISTER_NAME isn't used directly here,
2227 * or at least the same values, making any conversion reduntant. Restricting
2228 * access to certain registers could easily be implement by scanning the
2229 * inputs.
2230 *
2231 * To avoid the heap + conversion overhead, we're currently using the
2232 * HvCallGetVpRegisters and HvCallSetVpRegisters calls directly.
2233 *
2234 *
2235 * - The YMM and XCR0 registers are not yet named (17083). This probably
2236 * wouldn't be a problem if HV_REGISTER_NAME was used, see previous point.
2237 *
2238 *
2239 * - Why does VID.SYS only query/set 32 registers at the time thru the
2240 * HvCallGetVpRegisters and HvCallSetVpRegisters hypercalls?
2241 *
2242 * We've not trouble getting/setting all the registers defined by
2243 * WHV_REGISTER_NAME in one hypercall (around 80). Some kind of stack
2244 * buffering or similar?
2245 *
2246 *
2247 * - To handle the VMMCALL / VMCALL instructions, it seems we need to intercept
2248 * \#UD exceptions and inspect the opcodes. A dedicated exit for hypercalls
2249 * would be more efficient, esp. for guests using \#UD for other purposes..
2250 *
2251 *
2252 * - Wrong instruction length in the VpContext with unmapped GPA memory exit
2253 * contexts on 17115/AMD.
2254 *
2255 * One byte "PUSH CS" was reported as 2 bytes, while a two byte
2256 * "MOV [EBX],EAX" was reported with a 1 byte instruction length. Problem
2257 * naturally present in untranslated hyper-v messages.
2258 *
2259 *
2260 * - The I/O port exit context information seems to be missing the address size
2261 * information needed for correct string I/O emulation.
2262 *
2263 * VT-x provides this information in bits 7:9 in the instruction information
2264 * field on newer CPUs. AMD-V in bits 7:9 in the EXITINFO1 field in the VMCB.
2265 *
2266 * We can probably work around this by scanning the instruction bytes for
2267 * address size prefixes. Haven't investigated it any further yet.
2268 *
2269 *
2270 * - Querying WHvCapabilityCodeExceptionExitBitmap returns zero even when
2271 * intercepts demonstrably works (17134).
2272 *
2273 *
2274 * - Querying HvPartitionPropertyDebugChannelId via HvCallGetPartitionProperty
2275 * (hypercall) hangs the host (17134).
2276 *
2277 *
2278 * - The WHvGetCapability function has a weird design:
2279 * - The CapabilityCode parameter is pointlessly duplicated in the output
2280 * structure (WHV_CAPABILITY).
2281 *
2282 * - API takes void pointer, but everyone will probably be using
2283 * WHV_CAPABILITY due to WHV_CAPABILITY::CapabilityCode making it
2284 * impractical to use anything else.
2285 *
2286 * - No output size.
2287 *
2288 * - See GetFileAttributesEx, GetFileInformationByHandleEx,
2289 * FindFirstFileEx, and others for typical pattern for generic
2290 * information getters.
2291 *
2292 * Update: All concerns have been addressed in build 17110.
2293 *
2294 *
2295 * - The WHvGetPartitionProperty function uses the same weird design as
2296 * WHvGetCapability, see above.
2297 *
2298 * Update: All concerns have been addressed in build 17110.
2299 *
2300 *
2301 * - The WHvSetPartitionProperty function has a totally weird design too:
2302 * - In contrast to its partner WHvGetPartitionProperty, the property code
2303 * is not a separate input parameter here but part of the input
2304 * structure.
2305 *
2306 * - The input structure is a void pointer rather than a pointer to
2307 * WHV_PARTITION_PROPERTY which everyone probably will be using because
2308 * of the WHV_PARTITION_PROPERTY::PropertyCode field.
2309 *
2310 * - Really, why use PVOID for the input when the function isn't accepting
2311 * minimal sizes. E.g. WHVPartitionPropertyCodeProcessorClFlushSize only
2312 * requires a 9 byte input, but the function insists on 16 bytes (17083).
2313 *
2314 * - See GetFileAttributesEx, SetFileInformationByHandle, FindFirstFileEx,
2315 * and others for typical pattern for generic information setters and
2316 * getters.
2317 *
2318 * Update: All concerns have been addressed in build 17110.
2319 *
2320 *
2321 *
2322 * @section sec_nem_win_impl Our implementation.
2323 *
2324 * We set out with the goal of wanting to run as much as possible in ring-0,
2325 * reasoning that this would give use the best performance.
2326 *
2327 * This goal was approached gradually, starting out with a pure WinHvPlatform
2328 * implementation, gradually replacing parts: register access, guest memory
2329 * handling, running virtual processors. Then finally moving it all into
2330 * ring-0, while keeping most of it configurable so that we could make
2331 * comparisons (see NEMInternal.h and nemR3NativeRunGC()).
2332 *
2333 *
2334 * @subsection subsect_nem_win_impl_ioctl VID.SYS I/O control calls
2335 *
2336 * To run things in ring-0 we need to talk directly to VID.SYS thru its I/O
2337 * control interface. Looking at changes between like build 17083 and 17101 (if
2338 * memory serves) a set of the VID I/O control numbers shifted a little, which
2339 * means we need to determin them dynamically. We currently do this by hooking
2340 * the NtDeviceIoControlFile API call from VID.DLL and snooping up the
2341 * parameters when making dummy calls to relevant APIs. (We could also
2342 * disassemble the relevant APIs and try fish out the information from that, but
2343 * this is way simpler.)
2344 *
2345 * Issuing I/O control calls from ring-0 is facing a small challenge with
2346 * respect to direct buffering. When using direct buffering the device will
2347 * typically check that the buffer is actually in the user address space range
2348 * and reject kernel addresses. Fortunately, we've got the cross context VM
2349 * structure that is mapped into both kernel and user space, it's also locked
2350 * and safe to access from kernel space. So, we place the I/O control buffers
2351 * in the per-CPU part of it (NEMCPU::uIoCtlBuf) and give the driver the user
2352 * address if direct access buffering or kernel address if not.
2353 *
2354 * The I/O control calls are 'abstracted' in the support driver, see
2355 * SUPR0IoCtlSetupForHandle(), SUPR0IoCtlPerform() and SUPR0IoCtlCleanup().
2356 *
2357 *
2358 * @subsection subsect_nem_win_impl_cpumctx CPUMCTX
2359 *
2360 * Since the CPU state needs to live in Hyper-V when executing, we probably
2361 * should not transfer more than necessary when handling VMEXITs. To help us
2362 * manage this CPUMCTX got a new field CPUMCTX::fExtrn that to indicate which
2363 * part of the state is currently externalized (== in Hyper-V).
2364 *
2365 *
2366 * @subsection sec_nem_win_benchmarks Benchmarks.
2367 *
2368 * @subsubsection subsect_nem_win_benchmarks_bs2t1 Bootsector2-test1
2369 *
2370 * This is ValidationKit/bootsectors/bootsector2-test1.asm as of 2018-06-22
2371 * (internal r123172) running a the release build of VirtualBox from the same
2372 * source, though with exit optimizations disabled. Host is AMD Threadripper 1950X
2373 * running out an up to date 64-bit Windows 10 build 17134.
2374 *
2375 * The base line column is using the official WinHv API for everything but physical
2376 * memory mapping. The 2nd column is the default NEM/win configuration where we
2377 * put the main execution loop in ring-0, using hypercalls when we can and VID for
2378 * managing execution. The 3rd column is regular VirtualBox using AMD-V directly,
2379 * hyper-V is disabled, main execution loop in ring-0.
2380 *
2381 * @verbatim
2382TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V
2383 32-bit paged protected mode, CPUID : 108 874 ins/sec 113% / 123 602 1198% / 1 305 113
2384 32-bit pae protected mode, CPUID : 106 722 ins/sec 115% / 122 740 1232% / 1 315 201
2385 64-bit long mode, CPUID : 106 798 ins/sec 114% / 122 111 1198% / 1 280 404
2386 16-bit unpaged protected mode, CPUID : 106 835 ins/sec 114% / 121 994 1216% / 1 299 665
2387 32-bit unpaged protected mode, CPUID : 105 257 ins/sec 115% / 121 772 1235% / 1 300 860
2388 real mode, CPUID : 104 507 ins/sec 116% / 121 800 1228% / 1 283 848
2389CPUID EAX=1 : PASSED
2390 32-bit paged protected mode, RDTSC : 99 581 834 ins/sec 100% / 100 323 307 93% / 93 473 299
2391 32-bit pae protected mode, RDTSC : 99 620 585 ins/sec 100% / 99 960 952 84% / 83 968 839
2392 64-bit long mode, RDTSC : 100 540 009 ins/sec 100% / 100 946 372 93% / 93 652 826
2393 16-bit unpaged protected mode, RDTSC : 99 688 473 ins/sec 100% / 100 097 751 76% / 76 281 287
2394 32-bit unpaged protected mode, RDTSC : 98 385 857 ins/sec 102% / 100 510 404 94% / 93 379 536
2395 real mode, RDTSC : 100 087 967 ins/sec 101% / 101 386 138 93% / 93 234 999
2396RDTSC : PASSED
2397 32-bit paged protected mode, Read CR4 : 2 156 102 ins/sec 98% / 2 121 967 17114% / 369 009 009
2398 32-bit pae protected mode, Read CR4 : 2 163 820 ins/sec 98% / 2 133 804 17469% / 377 999 261
2399 64-bit long mode, Read CR4 : 2 164 822 ins/sec 98% / 2 128 698 18875% / 408 619 313
2400 16-bit unpaged protected mode, Read CR4 : 2 162 367 ins/sec 100% / 2 168 508 17132% / 370 477 568
2401 32-bit unpaged protected mode, Read CR4 : 2 163 189 ins/sec 100% / 2 169 808 16768% / 362 734 679
2402 real mode, Read CR4 : 2 162 436 ins/sec 100% / 2 164 914 15551% / 336 288 998
2403Read CR4 : PASSED
2404 real mode, 32-bit IN : 104 649 ins/sec 118% / 123 513 1028% / 1 075 831
2405 real mode, 32-bit OUT : 107 102 ins/sec 115% / 123 660 982% / 1 052 259
2406 real mode, 32-bit IN-to-ring-3 : 105 697 ins/sec 98% / 104 471 201% / 213 216
2407 real mode, 32-bit OUT-to-ring-3 : 105 830 ins/sec 98% / 104 598 198% / 210 495
2408 16-bit unpaged protected mode, 32-bit IN : 104 855 ins/sec 117% / 123 174 1029% / 1 079 591
2409 16-bit unpaged protected mode, 32-bit OUT : 107 529 ins/sec 115% / 124 250 992% / 1 067 053
2410 16-bit unpaged protected mode, 32-bit IN-to-ring-3 : 106 337 ins/sec 103% / 109 565 196% / 209 367
2411 16-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 107 558 ins/sec 100% / 108 237 191% / 206 387
2412 32-bit unpaged protected mode, 32-bit IN : 106 351 ins/sec 116% / 123 584 1016% / 1 081 325
2413 32-bit unpaged protected mode, 32-bit OUT : 106 424 ins/sec 116% / 124 252 995% / 1 059 408
2414 32-bit unpaged protected mode, 32-bit IN-to-ring-3 : 104 035 ins/sec 101% / 105 305 202% / 210 750
2415 32-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 103 831 ins/sec 102% / 106 919 205% / 213 198
2416 32-bit paged protected mode, 32-bit IN : 103 356 ins/sec 119% / 123 870 1041% / 1 076 463
2417 32-bit paged protected mode, 32-bit OUT : 107 177 ins/sec 115% / 124 302 998% / 1 069 655
2418 32-bit paged protected mode, 32-bit IN-to-ring-3 : 104 491 ins/sec 100% / 104 744 200% / 209 264
2419 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 106 603 ins/sec 97% / 103 849 197% / 210 219
2420 32-bit pae protected mode, 32-bit IN : 105 923 ins/sec 115% / 122 759 1041% / 1 103 261
2421 32-bit pae protected mode, 32-bit OUT : 107 083 ins/sec 117% / 126 057 1024% / 1 096 667
2422 32-bit pae protected mode, 32-bit IN-to-ring-3 : 106 114 ins/sec 97% / 103 496 199% / 211 312
2423 32-bit pae protected mode, 32-bit OUT-to-ring-3 : 105 675 ins/sec 96% / 102 096 198% / 209 890
2424 64-bit long mode, 32-bit IN : 105 800 ins/sec 113% / 120 006 1013% / 1 072 116
2425 64-bit long mode, 32-bit OUT : 105 635 ins/sec 113% / 120 375 997% / 1 053 655
2426 64-bit long mode, 32-bit IN-to-ring-3 : 105 274 ins/sec 95% / 100 763 197% / 208 026
2427 64-bit long mode, 32-bit OUT-to-ring-3 : 106 262 ins/sec 94% / 100 749 196% / 209 288
2428NOP I/O Port Access : PASSED
2429 32-bit paged protected mode, 32-bit read : 57 687 ins/sec 119% / 69 136 1197% / 690 548
2430 32-bit paged protected mode, 32-bit write : 57 957 ins/sec 118% / 68 935 1183% / 685 930
2431 32-bit paged protected mode, 32-bit read-to-ring-3 : 57 958 ins/sec 95% / 55 432 276% / 160 505
2432 32-bit paged protected mode, 32-bit write-to-ring-3 : 57 922 ins/sec 100% / 58 340 304% / 176 464
2433 32-bit pae protected mode, 32-bit read : 57 478 ins/sec 119% / 68 453 1141% / 656 159
2434 32-bit pae protected mode, 32-bit write : 57 226 ins/sec 118% / 68 097 1157% / 662 504
2435 32-bit pae protected mode, 32-bit read-to-ring-3 : 57 582 ins/sec 94% / 54 651 268% / 154 867
2436 32-bit pae protected mode, 32-bit write-to-ring-3 : 57 697 ins/sec 100% / 57 750 299% / 173 030
2437 64-bit long mode, 32-bit read : 57 128 ins/sec 118% / 67 779 1071% / 611 949
2438 64-bit long mode, 32-bit write : 57 127 ins/sec 118% / 67 632 1084% / 619 395
2439 64-bit long mode, 32-bit read-to-ring-3 : 57 181 ins/sec 94% / 54 123 265% / 151 937
2440 64-bit long mode, 32-bit write-to-ring-3 : 57 297 ins/sec 99% / 57 286 294% / 168 694
2441 16-bit unpaged protected mode, 32-bit read : 58 827 ins/sec 118% / 69 545 1185% / 697 602
2442 16-bit unpaged protected mode, 32-bit write : 58 678 ins/sec 118% / 69 442 1183% / 694 387
2443 16-bit unpaged protected mode, 32-bit read-to-ring-3 : 57 841 ins/sec 96% / 55 730 275% / 159 163
2444 16-bit unpaged protected mode, 32-bit write-to-ring-3 : 57 855 ins/sec 101% / 58 834 304% / 176 169
2445 32-bit unpaged protected mode, 32-bit read : 58 063 ins/sec 120% / 69 690 1233% / 716 444
2446 32-bit unpaged protected mode, 32-bit write : 57 936 ins/sec 120% / 69 633 1199% / 694 753
2447 32-bit unpaged protected mode, 32-bit read-to-ring-3 : 58 451 ins/sec 96% / 56 183 273% / 159 972
2448 32-bit unpaged protected mode, 32-bit write-to-ring-3 : 58 962 ins/sec 99% / 58 955 298% / 175 936
2449 real mode, 32-bit read : 58 571 ins/sec 118% / 69 478 1160% / 679 917
2450 real mode, 32-bit write : 58 418 ins/sec 118% / 69 320 1185% / 692 513
2451 real mode, 32-bit read-to-ring-3 : 58 072 ins/sec 96% / 55 751 274% / 159 145
2452 real mode, 32-bit write-to-ring-3 : 57 870 ins/sec 101% / 58 755 307% / 178 042
2453NOP MMIO Access : PASSED
2454SUCCESS
2455 * @endverbatim
2456 *
2457 * What we see here is:
2458 *
2459 * - The WinHv API approach is 10 to 12 times slower for exits we can
2460 * handle directly in ring-0 in the VBox AMD-V code.
2461 *
2462 * - The WinHv API approach is 2 to 3 times slower for exits we have to
2463 * go to ring-3 to handle with the VBox AMD-V code.
2464 *
2465 * - By using hypercalls and VID.SYS from ring-0 we gain between
2466 * 13% and 20% over the WinHv API on exits handled in ring-0.
2467 *
2468 * - For exits requiring ring-3 handling are between 6% slower and 3% faster
2469 * than the WinHv API.
2470 *
2471 *
2472 * As a side note, it looks like Hyper-V doesn't let the guest read CR4 but
2473 * triggers exits all the time. This isn't all that important these days since
2474 * OSes like Linux cache the CR4 value specifically to avoid these kinds of exits.
2475 *
2476 *
2477 * @subsubsection subsect_nem_win_benchmarks_w2k Windows 2000 Boot & Shutdown
2478 *
2479 * Timing the startup and automatic shutdown of a Windows 2000 SP4 guest serves
2480 * as a real world benchmark and example of why exit performance is import. When
2481 * Windows 2000 boots up is doing a lot of VGA redrawing of the boot animation,
2482 * which is very costly. Not having installed guest additions leaves it in a VGA
2483 * mode after the bootup sequence is done, keep up the screen access expenses,
2484 * though the graphics driver more economical than the bootvid code.
2485 *
2486 * The VM was configured to automatically logon. A startup script was installed
2487 * to perform the automatic shuting down and powering off the VM (thru
2488 * vts_shutdown.exe -f -p). An offline snapshot of the VM was taken an restored
2489 * before each test run. The test time run time is calculated from the monotonic
2490 * VBox.log timestamps, starting with the state change to 'RUNNING' and stopping
2491 * at 'POWERING_OFF'.
2492 *
2493 * The host OS and VirtualBox build is the same as for the bootsector2-test1
2494 * scenario.
2495 *
2496 * Results:
2497 *
2498 * - WinHv API for all but physical page mappings:
2499 * 32 min 12.19 seconds
2500 *
2501 * - The default NEM/win configuration where we put the main execution loop
2502 * in ring-0, using hypercalls when we can and VID for managing execution:
2503 * 3 min 23.18 seconds
2504 *
2505 * - Regular VirtualBox using AMD-V directly, hyper-V is disabled, main
2506 * execution loop in ring-0:
2507 * 58.09 seconds
2508 *
2509 * - WinHv API with exit history based optimizations:
2510 * 58.66 seconds
2511 *
2512 * - Hypercall + VID.SYS with exit history base optimizations:
2513 * 58.94 seconds
2514 *
2515 * With a well above average machine needing over half an hour for booting a
2516 * nearly 20 year old guest kind of says it all. The 13%-20% exit performance
2517 * increase we get by using hypercalls and VID.SYS directly pays off a lot here.
2518 * The 3m23s is almost acceptable in comparison to the half an hour.
2519 *
2520 * The similarity between the last three results strongly hits at windows 2000
2521 * doing a lot of waiting during boot and shutdown and isn't the best testcase
2522 * once a basic performance level is reached.
2523 *
2524 *
2525 * @subsubsection subsection_iem_win_benchmarks_deb9_nat Debian 9 NAT performance
2526 *
2527 * This benchmark is about network performance over NAT from a 64-bit Debian 9
2528 * VM with a single CPU. For network performance measurements, we use our own
2529 * NetPerf tool (ValidationKit/utils/network/NetPerf.cpp) to measure latency
2530 * and throughput.
2531 *
2532 * The setups, builds and configurations are as in the previous benchmarks
2533 * (release r123172 on 1950X running 64-bit W10/17134). Please note that the
2534 * exit optimizations hasn't yet been in tuned with NetPerf in mind.
2535 *
2536 * The NAT network setup was selected here since it's the default one and the
2537 * slowest one. There is quite a bit of IPC with worker threads and packet
2538 * processing involved.
2539 *
2540 * Latency test is first up. This is a classic back and forth between the two
2541 * NetPerf instances, where the key measurement is the roundrip latency. The
2542 * values here are the lowest result over 3-6 runs.
2543 *
2544 * Against host system:
2545 * - 152 258 ns/roundtrip - 100% - regular VirtualBox SVM
2546 * - 271 059 ns/roundtrip - 178% - Hypercalls + VID.SYS in ring-0 with exit optimizations.
2547 * - 280 149 ns/roundtrip - 184% - Hypercalls + VID.SYS in ring-0
2548 * - 317 735 ns/roundtrip - 209% - Win HV API with exit optimizations.
2549 * - 342 440 ns/roundtrip - 225% - Win HV API
2550 *
2551 * Against a remote Windows 10 system over a 10Gbps link:
2552 * - 243 969 ns/roundtrip - 100% - regular VirtualBox SVM
2553 * - 384 427 ns/roundtrip - 158% - Win HV API with exit optimizations.
2554 * - 402 411 ns/roundtrip - 165% - Hypercalls + VID.SYS in ring-0
2555 * - 406 313 ns/roundtrip - 167% - Win HV API
2556 * - 413 160 ns/roundtrip - 169% - Hypercalls + VID.SYS in ring-0 with exit optimizations.
2557 *
2558 * What we see here is:
2559 *
2560 * - Consistent and signficant latency increase using Hyper-V compared
2561 * to directly harnessing AMD-V ourselves.
2562 *
2563 * - When talking to the host, it's clear that the hypercalls + VID.SYS
2564 * in ring-0 method pays off.
2565 *
2566 * - When talking to a different host, the numbers are closer and it
2567 * is not longer clear which Hyper-V execution method is better.
2568 *
2569 *
2570 * Throughput benchmarks are performed by one side pushing data full throttle
2571 * for 10 seconds (minus a 1 second at each end of the test), then reversing
2572 * the roles and measuring it in the other direction. The tests ran 3-5 times
2573 * and below are the highest and lowest results in each direction.
2574 *
2575 * Receiving from host system:
2576 * - Regular VirtualBox SVM:
2577 * Max: 96 907 549 bytes/s - 100%
2578 * Min: 86 912 095 bytes/s - 100%
2579 * - Hypercalls + VID.SYS in ring-0:
2580 * Max: 84 036 544 bytes/s - 87%
2581 * Min: 64 978 112 bytes/s - 75%
2582 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2583 * Max: 77 760 699 bytes/s - 80%
2584 * Min: 72 677 171 bytes/s - 84%
2585 * - Win HV API with exit optimizations:
2586 * Max: 64 465 905 bytes/s - 67%
2587 * Min: 62 286 369 bytes/s - 72%
2588 * - Win HV API:
2589 * Max: 62 466 631 bytes/s - 64%
2590 * Min: 61 362 782 bytes/s - 70%
2591 *
2592 * Sending to the host system:
2593 * - Regular VirtualBox SVM:
2594 * Max: 87 728 652 bytes/s - 100%
2595 * Min: 86 923 198 bytes/s - 100%
2596 * - Hypercalls + VID.SYS in ring-0:
2597 * Max: 84 280 749 bytes/s - 96%
2598 * Min: 78 369 842 bytes/s - 90%
2599 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2600 * Max: 84 119 932 bytes/s - 96%
2601 * Min: 77 396 811 bytes/s - 89%
2602 * - Win HV API:
2603 * Max: 81 714 377 bytes/s - 93%
2604 * Min: 78 697 419 bytes/s - 91%
2605 * - Win HV API with exit optimizations:
2606 * Max: 80 502 488 bytes/s - 91%
2607 * Min: 71 164 978 bytes/s - 82%
2608 *
2609 * Receiving from a remote Windows 10 system over a 10Gbps link:
2610 * - Hypercalls + VID.SYS in ring-0:
2611 * Max: 115 346 922 bytes/s - 136%
2612 * Min: 112 912 035 bytes/s - 137%
2613 * - Regular VirtualBox SVM:
2614 * Max: 84 517 504 bytes/s - 100%
2615 * Min: 82 597 049 bytes/s - 100%
2616 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2617 * Max: 77 736 251 bytes/s - 92%
2618 * Min: 73 813 784 bytes/s - 89%
2619 * - Win HV API with exit optimizations:
2620 * Max: 63 035 587 bytes/s - 75%
2621 * Min: 57 538 380 bytes/s - 70%
2622 * - Win HV API:
2623 * Max: 62 279 185 bytes/s - 74%
2624 * Min: 56 813 866 bytes/s - 69%
2625 *
2626 * Sending to a remote Windows 10 system over a 10Gbps link:
2627 * - Win HV API with exit optimizations:
2628 * Max: 116 502 357 bytes/s - 103%
2629 * Min: 49 046 550 bytes/s - 59%
2630 * - Regular VirtualBox SVM:
2631 * Max: 113 030 991 bytes/s - 100%
2632 * Min: 83 059 511 bytes/s - 100%
2633 * - Hypercalls + VID.SYS in ring-0:
2634 * Max: 106 435 031 bytes/s - 94%
2635 * Min: 47 253 510 bytes/s - 57%
2636 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2637 * Max: 94 842 287 bytes/s - 84%
2638 * Min: 68 362 172 bytes/s - 82%
2639 * - Win HV API:
2640 * Max: 65 165 225 bytes/s - 58%
2641 * Min: 47 246 573 bytes/s - 57%
2642 *
2643 * What we see here is:
2644 *
2645 * - Again consistent numbers when talking to the host. Showing that the
2646 * ring-0 approach is preferable to the ring-3 one.
2647 *
2648 * - Again when talking to a remote host, things get more difficult to
2649 * make sense of. The spread is larger and direct AMD-V gets beaten by
2650 * a different the Hyper-V approaches in each direction.
2651 *
2652 * - However, if we treat the first entry (remote host) as weird spikes, the
2653 * other entries are consistently worse compared to direct AMD-V. For the
2654 * send case we get really bad results for WinHV.
2655 *
2656 */
2657
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette