VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMR0A.asm@ 87336

Last change on this file since 87336 was 87336, checked in by vboxsync, 4 years ago

VMM/HMR0A.asm: Restore non-volatile registers skipping loading the filler value.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 44.2 KB
Line 
1; $Id: HMR0A.asm 87336 2021-01-21 01:20:00Z vboxsync $
2;; @file
3; HM - Ring-0 VMX, SVM world-switch and helper routines.
4;
5
6;
7; Copyright (C) 2006-2020 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17
18;*********************************************************************************************************************************
19;* Header Files *
20;*********************************************************************************************************************************
21%define RT_ASM_WITH_SEH64
22%include "VBox/asmdefs.mac"
23%include "VBox/err.mac"
24%include "VBox/vmm/hm_vmx.mac"
25%include "VBox/vmm/cpum.mac"
26%include "VBox/vmm/vm.mac"
27%include "iprt/x86.mac"
28%include "HMInternal.mac"
29
30%ifndef RT_ARCH_AMD64
31 %error AMD64 only.
32%endif
33
34
35;*********************************************************************************************************************************
36;* Defined Constants And Macros *
37;*********************************************************************************************************************************
38;; The offset of the XMM registers in X86FXSTATE.
39; Use define because I'm too lazy to convert the struct.
40%define XMM_OFF_IN_X86FXSTATE 160
41
42;; Spectre filler for 64-bit mode.
43; Choosen to be an invalid address (also with 5 level paging).
44%define SPECTRE_FILLER 0x02204204207fffff
45
46;;
47; Determine skipping restoring of GDTR, IDTR, TR across VMX non-root operation.
48;
49%define VMX_SKIP_GDTR
50%define VMX_SKIP_TR
51%define VBOX_SKIP_RESTORE_SEG
52%ifdef RT_OS_DARWIN
53 ; Load the NULL selector into DS, ES, FS and GS on 64-bit darwin so we don't
54 ; risk loading a stale LDT value or something invalid.
55 %define HM_64_BIT_USE_NULL_SEL
56 ; Darwin (Mavericks) uses IDTR limit to store the CPU Id so we need to restore it always.
57 ; See @bugref{6875}.
58%else
59 %define VMX_SKIP_IDTR
60%endif
61
62
63;; @def CALLEE_PRESERVED_REGISTER_COUNT
64; Number of registers pushed by PUSH_CALLEE_PRESERVED_REGISTERS
65%ifdef ASM_CALL64_GCC
66 %define CALLEE_PRESERVED_REGISTER_COUNT 5
67%else
68 %define CALLEE_PRESERVED_REGISTER_COUNT 7
69%endif
70
71;; @def PUSH_CALLEE_PRESERVED_REGISTERS
72; Macro for pushing all GPRs we must preserve for the caller.
73%macro PUSH_CALLEE_PRESERVED_REGISTERS 0
74 push r15
75 SEH64_PUSH_GREG r15
76 %assign cbFrame cbFrame + 8
77 %assign frm_saved_r15 -cbFrame
78
79 push r14
80 SEH64_PUSH_GREG r14
81 %assign cbFrame cbFrame + 8
82 %assign frm_saved_r14 -cbFrame
83
84 push r13
85 SEH64_PUSH_GREG r13
86 %assign cbFrame cbFrame + 8
87 %assign frm_saved_r13 -cbFrame
88
89 push r12
90 SEH64_PUSH_GREG r12
91 %assign cbFrame cbFrame + 8
92 %assign frm_saved_r12 -cbFrame
93
94 push rbx
95 SEH64_PUSH_GREG rbx
96 %assign cbFrame cbFrame + 8
97 %assign frm_saved_rbx -cbFrame
98
99 %ifdef ASM_CALL64_MSC
100 push rsi
101 SEH64_PUSH_GREG rsi
102 %assign cbFrame cbFrame + 8
103 %assign frm_saved_rsi -cbFrame
104
105 push rdi
106 SEH64_PUSH_GREG rdi
107 %assign cbFrame cbFrame + 8
108 %assign frm_saved_rdi -cbFrame
109 %endif
110%endmacro
111
112;; @def POP_CALLEE_PRESERVED_REGISTERS
113; Counterpart to PUSH_CALLEE_PRESERVED_REGISTERS for use in the epilogue.
114%macro POP_CALLEE_PRESERVED_REGISTERS 0
115 %ifdef ASM_CALL64_MSC
116 pop rdi
117 %assign cbFrame cbFrame - 8
118 %undef frm_saved_rdi
119
120 pop rsi
121 %assign cbFrame cbFrame - 8
122 %undef frm_saved_rsi
123 %endif
124 pop rbx
125 %assign cbFrame cbFrame - 8
126 %undef frm_saved_rbx
127
128 pop r12
129 %assign cbFrame cbFrame - 8
130 %undef frm_saved_r12
131
132 pop r13
133 %assign cbFrame cbFrame - 8
134 %undef frm_saved_r13
135
136 pop r14
137 %assign cbFrame cbFrame - 8
138 %undef frm_saved_r14
139
140 pop r15
141 %assign cbFrame cbFrame - 8
142 %undef frm_saved_r15
143%endmacro
144
145;; @def PUSH_RELEVANT_SEGMENT_REGISTERS
146; Macro saving all segment registers on the stack.
147; @param 1 Full width register name.
148; @param 2 16-bit register name for \a 1.
149
150;; @def POP_RELEVANT_SEGMENT_REGISTERS
151; Macro restoring all segment registers on the stack.
152; @param 1 Full width register name.
153; @param 2 16-bit register name for \a 1.
154%ifdef VBOX_SKIP_RESTORE_SEG
155 %macro PUSH_RELEVANT_SEGMENT_REGISTERS 2
156 %endmacro
157
158 %macro POP_RELEVANT_SEGMENT_REGISTERS 2
159 %endmacro
160%else ; !VBOX_SKIP_RESTORE_SEG
161 ; Trashes, rax, rdx & rcx.
162 %macro PUSH_RELEVANT_SEGMENT_REGISTERS 2
163 %ifndef HM_64_BIT_USE_NULL_SEL
164 mov %2, es
165 push %1
166 mov %2, ds
167 push %1
168 %endif
169
170 ; Special case for FS; Windows and Linux either don't use it or restore it when leaving kernel mode,
171 ; Solaris OTOH doesn't and we must save it.
172 mov ecx, MSR_K8_FS_BASE
173 rdmsr
174 push rdx
175 push rax
176 %ifndef HM_64_BIT_USE_NULL_SEL
177 push fs
178 %endif
179
180 ; Special case for GS; OSes typically use swapgs to reset the hidden base register for GS on entry into the kernel.
181 ; The same happens on exit.
182 mov ecx, MSR_K8_GS_BASE
183 rdmsr
184 push rdx
185 push rax
186 %ifndef HM_64_BIT_USE_NULL_SEL
187 push gs
188 %endif
189 %endmacro
190
191 ; trashes, rax, rdx & rcx
192 %macro POP_RELEVANT_SEGMENT_REGISTERS 2
193 ; Note: do not step through this code with a debugger!
194 %ifndef HM_64_BIT_USE_NULL_SEL
195 xor eax, eax
196 mov ds, ax
197 mov es, ax
198 mov fs, ax
199 mov gs, ax
200 %endif
201
202 %ifndef HM_64_BIT_USE_NULL_SEL
203 pop gs
204 %endif
205 pop rax
206 pop rdx
207 mov ecx, MSR_K8_GS_BASE
208 wrmsr
209
210 %ifndef HM_64_BIT_USE_NULL_SEL
211 pop fs
212 %endif
213 pop rax
214 pop rdx
215 mov ecx, MSR_K8_FS_BASE
216 wrmsr
217 ; Now it's safe to step again
218
219 %ifndef HM_64_BIT_USE_NULL_SEL
220 pop %1
221 mov ds, %2
222 pop %1
223 mov es, %2
224 %endif
225 %endmacro
226%endif ; VBOX_SKIP_RESTORE_SEG
227
228
229;;
230; Creates an indirect branch prediction barrier on CPUs that need and supports that.
231; @clobbers eax, edx, ecx
232; @param 1 How to address CPUMCTX.
233; @param 2 Which flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT)
234%macro INDIRECT_BRANCH_PREDICTION_BARRIER_OLD 2
235 test byte [%1 + CPUMCTX.fWorldSwitcher], %2
236 jz %%no_indirect_branch_barrier
237 mov ecx, MSR_IA32_PRED_CMD
238 mov eax, MSR_IA32_PRED_CMD_F_IBPB
239 xor edx, edx
240 wrmsr
241%%no_indirect_branch_barrier:
242%endmacro
243
244;;
245; Creates an indirect branch prediction barrier on CPUs that need and supports that.
246; @clobbers eax, edx, ecx
247; @param 1 How to address VMCPU.
248; @param 2 Which flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT)
249%macro INDIRECT_BRANCH_PREDICTION_BARRIER 2
250 test byte [%1 + VMCPU.cpum.GstCtx + CPUMCTX.fWorldSwitcher], %2
251 jz %%no_indirect_branch_barrier
252 mov ecx, MSR_IA32_PRED_CMD
253 mov eax, MSR_IA32_PRED_CMD_F_IBPB
254 xor edx, edx
255 wrmsr
256%%no_indirect_branch_barrier:
257%endmacro
258
259;;
260; Creates an indirect branch prediction and L1D barrier on CPUs that need and supports that.
261; @clobbers eax, edx, ecx
262; @param 1 How to address CPUMCTX.
263; @param 2 Which IBPB flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT)
264; @param 3 Which FLUSH flag to test for (CPUMCTX_WSF_L1D_ENTRY)
265; @param 4 Which MDS flag to test for (CPUMCTX_WSF_MDS_ENTRY)
266%macro INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER 4
267 ; Only one test+jmp when disabled CPUs.
268 test byte [%1 + CPUMCTX.fWorldSwitcher], (%2 | %3 | %4)
269 jz %%no_barrier_needed
270
271 ; The eax:edx value is the same for both.
272 AssertCompile(MSR_IA32_PRED_CMD_F_IBPB == MSR_IA32_FLUSH_CMD_F_L1D)
273 mov eax, MSR_IA32_PRED_CMD_F_IBPB
274 xor edx, edx
275
276 ; Indirect branch barrier.
277 test byte [%1 + CPUMCTX.fWorldSwitcher], %2
278 jz %%no_indirect_branch_barrier
279 mov ecx, MSR_IA32_PRED_CMD
280 wrmsr
281%%no_indirect_branch_barrier:
282
283 ; Level 1 data cache flush.
284 test byte [%1 + CPUMCTX.fWorldSwitcher], %3
285 jz %%no_cache_flush_barrier
286 mov ecx, MSR_IA32_FLUSH_CMD
287 wrmsr
288 jmp %%no_mds_buffer_flushing ; MDS flushing is included in L1D_FLUSH
289%%no_cache_flush_barrier:
290
291 ; MDS buffer flushing.
292 test byte [%1 + CPUMCTX.fWorldSwitcher], %4
293 jz %%no_mds_buffer_flushing
294 sub xSP, xSP
295 mov [xSP], ds
296 verw [xSP]
297 add xSP, xSP
298%%no_mds_buffer_flushing:
299
300%%no_barrier_needed:
301%endmacro
302
303
304;*********************************************************************************************************************************
305;* External Symbols *
306;*********************************************************************************************************************************
307%ifdef VBOX_WITH_KERNEL_USING_XMM
308extern NAME(CPUMIsGuestFPUStateActive)
309%endif
310
311
312BEGINCODE
313
314
315;;
316; Restores host-state fields.
317;
318; @returns VBox status code
319; @param f32RestoreHost x86: [ebp + 08h] msc: ecx gcc: edi RestoreHost flags.
320; @param pRestoreHost x86: [ebp + 0ch] msc: rdx gcc: rsi Pointer to the RestoreHost struct.
321;
322ALIGNCODE(16)
323BEGINPROC VMXRestoreHostState
324%ifndef ASM_CALL64_GCC
325 ; Use GCC's input registers since we'll be needing both rcx and rdx further
326 ; down with the wrmsr instruction. Use the R10 and R11 register for saving
327 ; RDI and RSI since MSC preserve the two latter registers.
328 mov r10, rdi
329 mov r11, rsi
330 mov rdi, rcx
331 mov rsi, rdx
332%endif
333 SEH64_END_PROLOGUE
334
335 test edi, VMX_RESTORE_HOST_GDTR
336 jz .test_idtr
337 lgdt [rsi + VMXRESTOREHOST.HostGdtr]
338
339.test_idtr:
340 test edi, VMX_RESTORE_HOST_IDTR
341 jz .test_ds
342 lidt [rsi + VMXRESTOREHOST.HostIdtr]
343
344.test_ds:
345 test edi, VMX_RESTORE_HOST_SEL_DS
346 jz .test_es
347 mov ax, [rsi + VMXRESTOREHOST.uHostSelDS]
348 mov ds, eax
349
350.test_es:
351 test edi, VMX_RESTORE_HOST_SEL_ES
352 jz .test_tr
353 mov ax, [rsi + VMXRESTOREHOST.uHostSelES]
354 mov es, eax
355
356.test_tr:
357 test edi, VMX_RESTORE_HOST_SEL_TR
358 jz .test_fs
359 ; When restoring the TR, we must first clear the busy flag or we'll end up faulting.
360 mov dx, [rsi + VMXRESTOREHOST.uHostSelTR]
361 mov ax, dx
362 and eax, X86_SEL_MASK_OFF_RPL ; mask away TI and RPL bits leaving only the descriptor offset
363 test edi, VMX_RESTORE_HOST_GDT_READ_ONLY | VMX_RESTORE_HOST_GDT_NEED_WRITABLE
364 jnz .gdt_readonly
365 add rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2] ; xAX <- descriptor offset + GDTR.pGdt.
366 and dword [rax + 4], ~RT_BIT(9) ; clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit)
367 ltr dx
368 jmp short .test_fs
369.gdt_readonly:
370 test edi, VMX_RESTORE_HOST_GDT_NEED_WRITABLE
371 jnz .gdt_readonly_need_writable
372 mov rcx, cr0
373 mov r9, rcx
374 add rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2] ; xAX <- descriptor offset + GDTR.pGdt.
375 and rcx, ~X86_CR0_WP
376 mov cr0, rcx
377 and dword [rax + 4], ~RT_BIT(9) ; clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit)
378 ltr dx
379 mov cr0, r9
380 jmp short .test_fs
381.gdt_readonly_need_writable:
382 add rax, qword [rsi + VMXRESTOREHOST.HostGdtrRw + 2] ; xAX <- descriptor offset + GDTR.pGdtRw
383 and dword [rax + 4], ~RT_BIT(9) ; clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit)
384 lgdt [rsi + VMXRESTOREHOST.HostGdtrRw]
385 ltr dx
386 lgdt [rsi + VMXRESTOREHOST.HostGdtr] ; load the original GDT
387
388.test_fs:
389 ;
390 ; When restoring the selector values for FS and GS, we'll temporarily trash
391 ; the base address (at least the high 32-bit bits, but quite possibly the
392 ; whole base address), the wrmsr will restore it correctly. (VT-x actually
393 ; restores the base correctly when leaving guest mode, but not the selector
394 ; value, so there is little problem with interrupts being enabled prior to
395 ; this restore job.)
396 ; We'll disable ints once for both FS and GS as that's probably faster.
397 ;
398 test edi, VMX_RESTORE_HOST_SEL_FS | VMX_RESTORE_HOST_SEL_GS
399 jz .restore_success
400 pushfq
401 cli ; (see above)
402
403 test edi, VMX_RESTORE_HOST_SEL_FS
404 jz .test_gs
405 mov ax, word [rsi + VMXRESTOREHOST.uHostSelFS]
406 mov fs, eax
407 mov eax, dword [rsi + VMXRESTOREHOST.uHostFSBase] ; uHostFSBase - Lo
408 mov edx, dword [rsi + VMXRESTOREHOST.uHostFSBase + 4h] ; uHostFSBase - Hi
409 mov ecx, MSR_K8_FS_BASE
410 wrmsr
411
412.test_gs:
413 test edi, VMX_RESTORE_HOST_SEL_GS
414 jz .restore_flags
415 mov ax, word [rsi + VMXRESTOREHOST.uHostSelGS]
416 mov gs, eax
417 mov eax, dword [rsi + VMXRESTOREHOST.uHostGSBase] ; uHostGSBase - Lo
418 mov edx, dword [rsi + VMXRESTOREHOST.uHostGSBase + 4h] ; uHostGSBase - Hi
419 mov ecx, MSR_K8_GS_BASE
420 wrmsr
421
422.restore_flags:
423 popfq
424
425.restore_success:
426 mov eax, VINF_SUCCESS
427%ifndef ASM_CALL64_GCC
428 ; Restore RDI and RSI on MSC.
429 mov rdi, r10
430 mov rsi, r11
431%endif
432 ret
433ENDPROC VMXRestoreHostState
434
435
436;;
437; Dispatches an NMI to the host.
438;
439ALIGNCODE(16)
440BEGINPROC VMXDispatchHostNmi
441 ; NMI is always vector 2. The IDT[2] IRQ handler cannot be anything else. See Intel spec. 6.3.1 "External Interrupts".
442 SEH64_END_PROLOGUE
443 int 2
444 ret
445ENDPROC VMXDispatchHostNmi
446
447
448%ifdef VBOX_WITH_KERNEL_USING_XMM
449
450;;
451; Wrapper around vmx.pfnStartVM that preserves host XMM registers and
452; load the guest ones when necessary.
453;
454; @cproto DECLASM(int) hmR0VMXStartVMWrapXMM(RTHCUINT fResume, PCPUMCTX pCtx, void *pvUnused, PVM pVM,
455; PVMCPU pVCpu, PFNHMVMXSTARTVM pfnStartVM);
456;
457; @returns eax
458;
459; @param fResumeVM msc:rcx
460; @param pCtx msc:rdx
461; @param pvUnused msc:r8
462; @param pVM msc:r9
463; @param pVCpu msc:[rbp+30h] The cross context virtual CPU structure of the calling EMT.
464; @param pfnStartVM msc:[rbp+38h]
465;
466; @remarks This is essentially the same code as hmR0SVMRunWrapXMM, only the parameters differ a little bit.
467;
468; @remarks Drivers shouldn't use AVX registers without saving+loading:
469; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
470; However the compiler docs have different idea:
471; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
472; We'll go with the former for now.
473;
474; ASSUMING 64-bit and windows for now.
475;
476ALIGNCODE(16)
477BEGINPROC hmR0VMXStartVMWrapXMM
478 SEH64_END_PROLOGUE
479 push xBP
480 mov xBP, xSP
481 sub xSP, 0b0h + 040h ; Don't bother optimizing the frame size.
482
483 ; Spill input parameters.
484 mov [xBP + 010h], rcx ; fResumeVM
485 mov [xBP + 018h], rdx ; pCtx
486 mov [xBP + 020h], r8 ; pvUnused
487 mov [xBP + 028h], r9 ; pVM
488
489 ; Ask CPUM whether we've started using the FPU yet.
490 mov rcx, [xBP + 30h] ; pVCpu
491 call NAME(CPUMIsGuestFPUStateActive)
492 test al, al
493 jnz .guest_fpu_state_active
494
495 ; No need to mess with XMM registers just call the start routine and return.
496 mov r11, [xBP + 38h] ; pfnStartVM
497 mov r10, [xBP + 30h] ; pVCpu
498 mov [xSP + 020h], r10
499 mov rcx, [xBP + 010h] ; fResumeVM
500 mov rdx, [xBP + 018h] ; pCtx
501 mov r8, [xBP + 020h] ; pvUnused
502 mov r9, [xBP + 028h] ; pVM
503 call r11
504
505 leave
506 ret
507
508ALIGNCODE(8)
509.guest_fpu_state_active:
510 ; Save the non-volatile host XMM registers.
511 movdqa [rsp + 040h + 000h], xmm6
512 movdqa [rsp + 040h + 010h], xmm7
513 movdqa [rsp + 040h + 020h], xmm8
514 movdqa [rsp + 040h + 030h], xmm9
515 movdqa [rsp + 040h + 040h], xmm10
516 movdqa [rsp + 040h + 050h], xmm11
517 movdqa [rsp + 040h + 060h], xmm12
518 movdqa [rsp + 040h + 070h], xmm13
519 movdqa [rsp + 040h + 080h], xmm14
520 movdqa [rsp + 040h + 090h], xmm15
521 stmxcsr [rsp + 040h + 0a0h]
522
523 mov r10, [xBP + 018h] ; pCtx
524 mov eax, [r10 + CPUMCTX.fXStateMask]
525 test eax, eax
526 jz .guest_fpu_state_manually
527
528 ;
529 ; Using XSAVE to load the guest XMM, YMM and ZMM registers.
530 ;
531 and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
532 xor edx, edx
533 mov r10, [r10 + CPUMCTX.pXStateR0]
534 xrstor [r10]
535
536 ; Make the call (same as in the other case).
537 mov r11, [xBP + 38h] ; pfnStartVM
538 mov r10, [xBP + 30h] ; pVCpu
539 mov [xSP + 020h], r10
540 mov rcx, [xBP + 010h] ; fResumeVM
541 mov rdx, [xBP + 018h] ; pCtx
542 mov r8, [xBP + 020h] ; pvUnused
543 mov r9, [xBP + 028h] ; pVM
544 call r11
545
546 mov r11d, eax ; save return value (xsave below uses eax)
547
548 ; Save the guest XMM registers.
549 mov r10, [xBP + 018h] ; pCtx
550 mov eax, [r10 + CPUMCTX.fXStateMask]
551 and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
552 xor edx, edx
553 mov r10, [r10 + CPUMCTX.pXStateR0]
554 xsave [r10]
555
556 mov eax, r11d ; restore return value
557
558.restore_non_volatile_host_xmm_regs:
559 ; Load the non-volatile host XMM registers.
560 movdqa xmm6, [rsp + 040h + 000h]
561 movdqa xmm7, [rsp + 040h + 010h]
562 movdqa xmm8, [rsp + 040h + 020h]
563 movdqa xmm9, [rsp + 040h + 030h]
564 movdqa xmm10, [rsp + 040h + 040h]
565 movdqa xmm11, [rsp + 040h + 050h]
566 movdqa xmm12, [rsp + 040h + 060h]
567 movdqa xmm13, [rsp + 040h + 070h]
568 movdqa xmm14, [rsp + 040h + 080h]
569 movdqa xmm15, [rsp + 040h + 090h]
570 ldmxcsr [rsp + 040h + 0a0h]
571 leave
572 ret
573
574 ;
575 ; No XSAVE, load and save the guest XMM registers manually.
576 ;
577.guest_fpu_state_manually:
578 ; Load the full guest XMM register state.
579 mov r10, [r10 + CPUMCTX.pXStateR0]
580 movdqa xmm0, [r10 + XMM_OFF_IN_X86FXSTATE + 000h]
581 movdqa xmm1, [r10 + XMM_OFF_IN_X86FXSTATE + 010h]
582 movdqa xmm2, [r10 + XMM_OFF_IN_X86FXSTATE + 020h]
583 movdqa xmm3, [r10 + XMM_OFF_IN_X86FXSTATE + 030h]
584 movdqa xmm4, [r10 + XMM_OFF_IN_X86FXSTATE + 040h]
585 movdqa xmm5, [r10 + XMM_OFF_IN_X86FXSTATE + 050h]
586 movdqa xmm6, [r10 + XMM_OFF_IN_X86FXSTATE + 060h]
587 movdqa xmm7, [r10 + XMM_OFF_IN_X86FXSTATE + 070h]
588 movdqa xmm8, [r10 + XMM_OFF_IN_X86FXSTATE + 080h]
589 movdqa xmm9, [r10 + XMM_OFF_IN_X86FXSTATE + 090h]
590 movdqa xmm10, [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h]
591 movdqa xmm11, [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h]
592 movdqa xmm12, [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h]
593 movdqa xmm13, [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h]
594 movdqa xmm14, [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h]
595 movdqa xmm15, [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h]
596 ldmxcsr [r10 + X86FXSTATE.MXCSR]
597
598 ; Make the call (same as in the other case).
599 mov r11, [xBP + 38h] ; pfnStartVM
600 mov r10, [xBP + 30h] ; pVCpu
601 mov [xSP + 020h], r10
602 mov rcx, [xBP + 010h] ; fResumeVM
603 mov rdx, [xBP + 018h] ; pCtx
604 mov r8, [xBP + 020h] ; pvUnused
605 mov r9, [xBP + 028h] ; pVM
606 call r11
607
608 ; Save the guest XMM registers.
609 mov r10, [xBP + 018h] ; pCtx
610 mov r10, [r10 + CPUMCTX.pXStateR0]
611 stmxcsr [r10 + X86FXSTATE.MXCSR]
612 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 000h], xmm0
613 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 010h], xmm1
614 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 020h], xmm2
615 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 030h], xmm3
616 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 040h], xmm4
617 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 050h], xmm5
618 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 060h], xmm6
619 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 070h], xmm7
620 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 080h], xmm8
621 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 090h], xmm9
622 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h], xmm10
623 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h], xmm11
624 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h], xmm12
625 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h], xmm13
626 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h], xmm14
627 movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h], xmm15
628 jmp .restore_non_volatile_host_xmm_regs
629ENDPROC hmR0VMXStartVMWrapXMM
630
631;;
632; Wrapper around svm.pfnVMRun that preserves host XMM registers and
633; load the guest ones when necessary.
634;
635; @cproto DECLASM(int) hmR0SVMRunWrapXMM(PVM pVM, PVMCPU pVCpu, RTHCPHYS HCPhysVmcb, PFNHMSVMVMRUN pfnVMRun);
636;
637; @returns eax
638;
639; @param pVM msc:rcx
640; @param pVCpu msc:rdx The cross context virtual CPU structure of the calling EMT.
641; @param HCPhysVmcb msc:r8
642; @param pfnVMRun msc:r9
643;
644; @remarks This is essentially the same code as hmR0VMXStartVMWrapXMM, only the parameters differ a little bit.
645;
646; @remarks Drivers shouldn't use AVX registers without saving+loading:
647; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
648; However the compiler docs have different idea:
649; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
650; We'll go with the former for now.
651;
652; ASSUMING 64-bit and windows for now.
653ALIGNCODE(64)
654BEGINPROC hmR0SVMRunWrapXMM
655 SEH64_END_PROLOGUE
656 push xBP
657 mov xBP, xSP
658 sub xSP, 0b0h + 040h ; don't bother optimizing the frame size
659
660%ifndef ASM_CALL64_MSC
661 %error "MSC only"
662%endif
663 ; Spill input parameters.
664 mov [xBP + 010h], rcx ; pVM
665 mov [xBP + 018h], rdx ; pVCpu
666 mov [xBP + 020h], r8 ; HCPhysVmcb
667 mov [xBP + 028h], r9 ; pfnVMRun
668
669 ; Ask CPUM whether we've started using the FPU yet.
670;; @todo implement this in assembly, it's just checking a couple of things. Or have the C code do it.
671 mov rcx, rdx ; pVCpu
672 call NAME(CPUMIsGuestFPUStateActive)
673 test al, al
674
675 mov rcx, [xBP + 010h] ; pVM
676 mov rdx, [xBP + 018h] ; pVCpu
677 mov r8, [xBP + 020h] ; HCPhysVmcb
678 mov r9, [xBP + 028h] ; pfnVMRun
679
680 jnz .guest_fpu_state_active
681
682 ; No need to mess with XMM registers just call the start routine and return.
683 call r9
684
685 leave
686 ret
687
688ALIGNCODE(8)
689.guest_fpu_state_active:
690 ; Save the non-volatile host XMM registers.
691;; @todo change to rbp relative addressing as that saves a byte per instruction!
692 movdqa [rsp + 040h + 000h], xmm6
693 movdqa [rsp + 040h + 010h], xmm7
694 movdqa [rsp + 040h + 020h], xmm8
695 movdqa [rsp + 040h + 030h], xmm9
696 movdqa [rsp + 040h + 040h], xmm10
697 movdqa [rsp + 040h + 050h], xmm11
698 movdqa [rsp + 040h + 060h], xmm12
699 movdqa [rsp + 040h + 070h], xmm13
700 movdqa [rsp + 040h + 080h], xmm14
701 movdqa [rsp + 040h + 090h], xmm15
702 stmxcsr [rsp + 040h + 0a0h]
703
704 mov r11, rdx ; r11 = pVCpu (rdx may get trashed)
705 mov eax, [rdx + VMCPU.cpum.GstCtx + CPUMCTX.fXStateMask]
706 test eax, eax
707 jz .guest_fpu_state_manually
708
709 ;
710 ; Using XSAVE.
711 ;
712 and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
713 xor edx, edx
714 mov r10, [r11 + VMCPU.cpum.GstCtx + CPUMCTX.pXStateR0]
715 xrstor [r10]
716
717 ; Make the call (same as in the other case).
718 mov rdx, r11 ; restore pVCpu to rdx
719 call r9
720
721 mov r10d, eax ; save return value (xsave below uses eax)
722
723 ; Save the guest XMM registers.
724 mov rcx, [xBP + 018h] ; pVCpu
725 mov eax, [rcx + VMCPU.cpum.GstCtx + CPUMCTX.fXStateMask]
726 and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
727 mov rcx, [rcx + VMCPU.cpum.GstCtx + CPUMCTX.pXStateR0]
728 xor edx, edx
729 xsave [rcx]
730
731 mov eax, r10d ; restore return value
732
733.restore_non_volatile_host_xmm_regs:
734 ; Load the non-volatile host XMM registers.
735;; @todo change to rbp relative addressing as that saves a byte per instruction!
736 movdqa xmm6, [rsp + 040h + 000h]
737 movdqa xmm7, [rsp + 040h + 010h]
738 movdqa xmm8, [rsp + 040h + 020h]
739 movdqa xmm9, [rsp + 040h + 030h]
740 movdqa xmm10, [rsp + 040h + 040h]
741 movdqa xmm11, [rsp + 040h + 050h]
742 movdqa xmm12, [rsp + 040h + 060h]
743 movdqa xmm13, [rsp + 040h + 070h]
744 movdqa xmm14, [rsp + 040h + 080h]
745 movdqa xmm15, [rsp + 040h + 090h]
746 ldmxcsr [rsp + 040h + 0a0h]
747 leave
748 ret
749
750 ;
751 ; No XSAVE, load and save the guest XMM registers manually.
752 ;
753ALIGNCODE(8)
754.guest_fpu_state_manually:
755 ; Load the full guest XMM register state.
756 mov rdx, [r11 + VMCPU.cpum.GstCtx + CPUMCTX.pXStateR0]
757 movdqa xmm0, [rdx + XMM_OFF_IN_X86FXSTATE + 000h]
758 movdqa xmm1, [rdx + XMM_OFF_IN_X86FXSTATE + 010h]
759 movdqa xmm2, [rdx + XMM_OFF_IN_X86FXSTATE + 020h]
760 movdqa xmm3, [rdx + XMM_OFF_IN_X86FXSTATE + 030h]
761 movdqa xmm4, [rdx + XMM_OFF_IN_X86FXSTATE + 040h]
762 movdqa xmm5, [rdx + XMM_OFF_IN_X86FXSTATE + 050h]
763 movdqa xmm6, [rdx + XMM_OFF_IN_X86FXSTATE + 060h]
764 movdqa xmm7, [rdx + XMM_OFF_IN_X86FXSTATE + 070h]
765 movdqa xmm8, [rdx + XMM_OFF_IN_X86FXSTATE + 080h]
766 movdqa xmm9, [rdx + XMM_OFF_IN_X86FXSTATE + 090h]
767 movdqa xmm10, [rdx + XMM_OFF_IN_X86FXSTATE + 0a0h]
768 movdqa xmm11, [rdx + XMM_OFF_IN_X86FXSTATE + 0b0h]
769 movdqa xmm12, [rdx + XMM_OFF_IN_X86FXSTATE + 0c0h]
770 movdqa xmm13, [rdx + XMM_OFF_IN_X86FXSTATE + 0d0h]
771 movdqa xmm14, [rdx + XMM_OFF_IN_X86FXSTATE + 0e0h]
772 movdqa xmm15, [rdx + XMM_OFF_IN_X86FXSTATE + 0f0h]
773 ldmxcsr [rdx + X86FXSTATE.MXCSR]
774
775 ; Make the call (same as in the other case).
776 mov rdx, r11 ; restore pVCpu to rdx
777 call r9
778
779 ; Save the guest XMM registers.
780 mov rdx, [xBP + 018h] ; pVCpu
781 mov rdx, [rdx + VMCPU.cpum.GstCtx + CPUMCTX.pXStateR0]
782 stmxcsr [rdx + X86FXSTATE.MXCSR]
783 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 000h], xmm0
784 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 010h], xmm1
785 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 020h], xmm2
786 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 030h], xmm3
787 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 040h], xmm4
788 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 050h], xmm5
789 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 060h], xmm6
790 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 070h], xmm7
791 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 080h], xmm8
792 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 090h], xmm9
793 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 0a0h], xmm10
794 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 0b0h], xmm11
795 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 0c0h], xmm12
796 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 0d0h], xmm13
797 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 0e0h], xmm14
798 movdqa [rdx + XMM_OFF_IN_X86FXSTATE + 0f0h], xmm15
799 jmp .restore_non_volatile_host_xmm_regs
800ENDPROC hmR0SVMRunWrapXMM
801
802%endif ; VBOX_WITH_KERNEL_USING_XMM
803
804
805;; @def RESTORE_STATE_VM64
806; Macro restoring essential host state and updating guest state
807; for 64-bit host, 64-bit guest for VT-x.
808;
809%macro RESTORE_STATE_VM64 0
810 ; Restore base and limit of the IDTR & GDTR.
811 %ifndef VMX_SKIP_IDTR
812 lidt [xSP]
813 add xSP, xCB * 2
814 %endif
815 %ifndef VMX_SKIP_GDTR
816 lgdt [xSP]
817 add xSP, xCB * 2
818 %endif
819
820 push xDI
821 %ifndef VMX_SKIP_TR
822 mov xDI, [xSP + xCB * 3] ; pCtx (*3 to skip the saved xDI, TR, LDTR)
823 %else
824 mov xDI, [xSP + xCB * 2] ; pCtx (*2 to skip the saved xDI, LDTR)
825 %endif
826
827 mov qword [xDI + CPUMCTX.eax], rax
828 mov rax, SPECTRE_FILLER
829 mov qword [xDI + CPUMCTX.ebx], rbx
830 mov rbx, rax
831 mov qword [xDI + CPUMCTX.ecx], rcx
832 mov rcx, rax
833 mov qword [xDI + CPUMCTX.edx], rdx
834 mov rdx, rax
835 mov qword [xDI + CPUMCTX.esi], rsi
836 mov rsi, rax
837 mov qword [xDI + CPUMCTX.ebp], rbp
838 mov rbp, rax
839 mov qword [xDI + CPUMCTX.r8], r8
840 mov r8, rax
841 mov qword [xDI + CPUMCTX.r9], r9
842 mov r9, rax
843 mov qword [xDI + CPUMCTX.r10], r10
844 mov r10, rax
845 mov qword [xDI + CPUMCTX.r11], r11
846 mov r11, rax
847 mov qword [xDI + CPUMCTX.r12], r12
848 mov r12, rax
849 mov qword [xDI + CPUMCTX.r13], r13
850 mov r13, rax
851 mov qword [xDI + CPUMCTX.r14], r14
852 mov r14, rax
853 mov qword [xDI + CPUMCTX.r15], r15
854 mov r15, rax
855 mov rax, cr2
856 mov qword [xDI + CPUMCTX.cr2], rax
857
858 pop xAX ; The guest rdi we pushed above
859 mov qword [xDI + CPUMCTX.edi], rax
860
861 ; Fight spectre.
862 INDIRECT_BRANCH_PREDICTION_BARRIER_OLD xDI, CPUMCTX_WSF_IBPB_EXIT
863
864 %ifndef VMX_SKIP_TR
865 ; Restore TSS selector; must mark it as not busy before using ltr!
866 ; ASSUME that this is supposed to be 'BUSY' (saves 20-30 ticks on the T42p).
867 ; @todo get rid of sgdt
868 pop xBX ; Saved TR
869 sub xSP, xCB * 2
870 sgdt [xSP]
871 mov xAX, xBX
872 and eax, X86_SEL_MASK_OFF_RPL ; mask away TI and RPL bits leaving only the descriptor offset
873 add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset
874 and dword [xAX + 4], ~RT_BIT(9) ; clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit)
875 ltr bx
876 add xSP, xCB * 2
877 %endif
878
879 pop xAX ; Saved LDTR
880 cmp eax, 0
881 je %%skip_ldt_write64
882 lldt ax
883
884%%skip_ldt_write64:
885 pop xSI ; pCtx (needed in rsi by the macros below)
886
887 ; Restore segment registers.
888 POP_RELEVANT_SEGMENT_REGISTERS xAX, ax
889
890 ; Restore the host XCR0 if necessary.
891 pop xCX
892 test ecx, ecx
893 jnz %%xcr0_after_skip
894 pop xAX
895 pop xDX
896 xsetbv ; ecx is already zero.
897%%xcr0_after_skip:
898
899 ; Restore general purpose registers.
900 POP_CALLEE_PRESERVED_REGISTERS
901%endmacro
902
903
904;;
905; Prepares for and executes VMLAUNCH/VMRESUME (64 bits guest mode)
906;
907; @returns VBox status code
908; @param fResume msc:rcx, gcc:rdi Whether to use vmlauch/vmresume.
909; @param pCtx msc:rdx, gcc:rsi Pointer to the guest-CPU context.
910; @param pvUnused msc:r8, gcc:rdx Unused argument.
911; @param pVM msc:r9, gcc:rcx The cross context VM structure.
912; @param pVCpu msc:[ebp+30], gcc:r8 The cross context virtual CPU structure of the calling EMT.
913;
914ALIGNCODE(16)
915BEGINPROC VMXR0StartVM64
916 push xBP
917 mov xBP, xSP
918
919 pushf
920 cli
921
922 ; Save all general purpose host registers.
923%assign cbFrame 0
924 PUSH_CALLEE_PRESERVED_REGISTERS
925 SEH64_END_PROLOGUE
926
927 ; First we have to save some final CPU context registers.
928 lea r10, [.vmlaunch64_done wrt rip]
929 mov rax, VMX_VMCS_HOST_RIP ; return address (too difficult to continue after VMLAUNCH?)
930 vmwrite rax, r10
931 ; Note: ASSUMES success!
932
933 ;
934 ; Unify the input parameter registers.
935 ;
936%ifdef ASM_CALL64_GCC
937 ; fResume already in rdi
938 ; pCtx already in rsi
939 mov rbx, rdx ; pvUnused
940%else
941 mov rdi, rcx ; fResume
942 mov rsi, rdx ; pCtx
943 mov rbx, r8 ; pvUnused
944%endif
945
946 ;
947 ; Save the host XCR0 and load the guest one if necessary.
948 ; Note! Trashes rdx and rcx.
949 ;
950%ifdef ASM_CALL64_MSC
951 mov rax, [xBP + 30h] ; pVCpu
952%else
953 mov rax, r8 ; pVCpu
954%endif
955 test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1
956 jz .xcr0_before_skip
957
958 xor ecx, ecx
959 xgetbv ; save the host one on the stack
960 push xDX
961 push xAX
962
963 mov eax, [xSI + CPUMCTX.aXcr] ; load the guest one
964 mov edx, [xSI + CPUMCTX.aXcr + 4]
965 xor ecx, ecx ; paranoia
966 xsetbv
967
968 push 0 ; indicate that we must restore XCR0 (popped into ecx, thus 0)
969 jmp .xcr0_before_done
970
971.xcr0_before_skip:
972 push 3fh ; indicate that we need not
973.xcr0_before_done:
974
975 ;
976 ; Save segment registers.
977 ; Note! Trashes rdx & rcx, so we moved it here (amd64 case).
978 ;
979 PUSH_RELEVANT_SEGMENT_REGISTERS xAX, ax
980
981 ; Save the pCtx pointer.
982 push xSI
983
984 ; Save host LDTR.
985 xor eax, eax
986 sldt ax
987 push xAX
988
989%ifndef VMX_SKIP_TR
990 ; The host TR limit is reset to 0x67; save & restore it manually.
991 str eax
992 push xAX
993%endif
994
995%ifndef VMX_SKIP_GDTR
996 ; VT-x only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly!
997 sub xSP, xCB * 2
998 sgdt [xSP]
999%endif
1000%ifndef VMX_SKIP_IDTR
1001 sub xSP, xCB * 2
1002 sidt [xSP]
1003%endif
1004
1005 ; Load CR2 if necessary (may be expensive as writing CR2 is a synchronizing instruction).
1006 mov rbx, qword [xSI + CPUMCTX.cr2]
1007 mov rdx, cr2
1008 cmp rbx, rdx
1009 je .skip_cr2_write
1010 mov cr2, rbx
1011
1012.skip_cr2_write:
1013 mov eax, VMX_VMCS_HOST_RSP
1014 vmwrite xAX, xSP
1015 ; Note: ASSUMES success!
1016 ; Don't mess with ESP anymore!!!
1017
1018 ; Fight spectre and similar.
1019 INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY, CPUMCTX_WSF_L1D_ENTRY, CPUMCTX_WSF_MDS_ENTRY
1020
1021 ; Load guest general purpose registers.
1022 mov rax, qword [xSI + CPUMCTX.eax]
1023 mov rbx, qword [xSI + CPUMCTX.ebx]
1024 mov rcx, qword [xSI + CPUMCTX.ecx]
1025 mov rdx, qword [xSI + CPUMCTX.edx]
1026 mov rbp, qword [xSI + CPUMCTX.ebp]
1027 mov r8, qword [xSI + CPUMCTX.r8]
1028 mov r9, qword [xSI + CPUMCTX.r9]
1029 mov r10, qword [xSI + CPUMCTX.r10]
1030 mov r11, qword [xSI + CPUMCTX.r11]
1031 mov r12, qword [xSI + CPUMCTX.r12]
1032 mov r13, qword [xSI + CPUMCTX.r13]
1033 mov r14, qword [xSI + CPUMCTX.r14]
1034 mov r15, qword [xSI + CPUMCTX.r15]
1035
1036 ; Resume or start VM?
1037 cmp xDI, 0 ; fResume
1038
1039 ; Load guest rdi & rsi.
1040 mov rdi, qword [xSI + CPUMCTX.edi]
1041 mov rsi, qword [xSI + CPUMCTX.esi]
1042
1043 je .vmlaunch64_launch
1044
1045 vmresume
1046 jc near .vmxstart64_invalid_vmcs_ptr
1047 jz near .vmxstart64_start_failed
1048 jmp .vmlaunch64_done; ; here if vmresume detected a failure
1049
1050.vmlaunch64_launch:
1051 vmlaunch
1052 jc near .vmxstart64_invalid_vmcs_ptr
1053 jz near .vmxstart64_start_failed
1054 jmp .vmlaunch64_done; ; here if vmlaunch detected a failure
1055
1056ALIGNCODE(16)
1057.vmlaunch64_done:
1058 RESTORE_STATE_VM64
1059 mov eax, VINF_SUCCESS
1060
1061.vmstart64_end:
1062 popf
1063 pop xBP
1064 ret
1065
1066.vmxstart64_invalid_vmcs_ptr:
1067 RESTORE_STATE_VM64
1068 mov eax, VERR_VMX_INVALID_VMCS_PTR_TO_START_VM
1069 jmp .vmstart64_end
1070
1071.vmxstart64_start_failed:
1072 RESTORE_STATE_VM64
1073 mov eax, VERR_VMX_UNABLE_TO_START_VM
1074 jmp .vmstart64_end
1075ENDPROC VMXR0StartVM64
1076
1077
1078;;
1079; Clears the MDS buffers using VERW.
1080ALIGNCODE(16)
1081BEGINPROC hmR0MdsClear
1082 SEH64_END_PROLOGUE
1083 sub xSP, xCB
1084 mov [xSP], ds
1085 verw [xSP]
1086 add xSP, xCB
1087 ret
1088ENDPROC hmR0MdsClear
1089
1090
1091;;
1092; Prepares for and executes VMRUN (32-bit and 64-bit guests).
1093;
1094; @returns VBox status code
1095; @param pVM msc:rcx,gcc:rdi The cross context VM structure (unused).
1096; @param pVCpu msc:rdx,gcc:rsi The cross context virtual CPU structure of the calling EMT.
1097; @param HCPhysVmcb msc:r8, gcc:rdx Physical address of guest VMCB.
1098;
1099ALIGNCODE(64)
1100BEGINPROC SVMR0VMRun
1101 push rbp
1102 SEH64_PUSH_xBP
1103 mov rbp, rsp
1104 SEH64_SET_FRAME_xBP 0
1105 pushf
1106 sub rsp, 30h - 8h ; The frame is 30h bytes, but the rbp-08h entry is the above pushf.
1107 SEH64_ALLOCATE_STACK 30h ; And we have CALLEE_PRESERVED_REGISTER_COUNT following it.
1108
1109%define frm_fRFlags -08h
1110%define frm_uHostXcr0 -18h ; 128-bit
1111%define frm_fNoRestoreXcr0 -20h ; Non-zero if we should skip XCR0 restoring.
1112%define frm_pVCpu -28h ; Where we stash pVCpu for use after the vmrun.
1113%define frm_HCPhysVmcbHost -30h ; Where we stash HCPhysVmcbHost for the vmload after vmrun.
1114%assign cbFrame 30h
1115
1116 ; Manual save and restore:
1117 ; - General purpose registers except RIP, RSP, RAX
1118 ;
1119 ; Trashed:
1120 ; - CR2 (we don't care)
1121 ; - LDTR (reset to 0)
1122 ; - DRx (presumably not changed at all)
1123 ; - DR7 (reset to 0x400)
1124
1125 ; Save all general purpose host registers.
1126 PUSH_CALLEE_PRESERVED_REGISTERS
1127 SEH64_END_PROLOGUE
1128%if cbFrame != (30h + 8 * CALLEE_PRESERVED_REGISTER_COUNT)
1129 %error Bad cbFrame value
1130%endif
1131
1132 ; Shuffle parameter registers so that r8=HCPhysVmcb and rsi=pVCpu. (rdx & rcx will soon be trashed.)
1133%ifdef ASM_CALL64_GCC
1134 mov r8, rdx ; Put HCPhysVmcb in r8 like on MSC as rdx is trashed below.
1135%else
1136 mov rsi, rdx ; Put pVCpu in rsi like on GCC as rdx is trashed below.
1137 ;mov rdi, rcx ; Put pVM in rdi like on GCC as rcx is trashed below.
1138%endif
1139
1140 ; Save the host XCR0 and load the guest one if necessary.
1141 mov ecx, 3fh ; indicate that we need not restore XCR0 (in case we jump)
1142 test byte [rsi + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1
1143 jz .xcr0_before_skip
1144
1145 xor ecx, ecx
1146 xgetbv ; save the host XCR0 on the stack
1147 mov [rbp + frm_uHostXcr0 + 8], rdx
1148 mov [rbp + frm_uHostXcr0 ], rax
1149
1150 mov eax, [rsi + VMCPU.cpum.GstCtx + CPUMCTX.aXcr] ; load the guest XCR0
1151 mov edx, [rsi + VMCPU.cpum.GstCtx + CPUMCTX.aXcr + 4]
1152 xor ecx, ecx ; paranoia; Also, indicates that we must restore XCR0 (moved into ecx, thus 0).
1153 xsetbv
1154
1155.xcr0_before_skip:
1156 mov [rbp + frm_fNoRestoreXcr0], rcx
1157
1158 ; Save pVCpu pointer for simplifying saving of the GPRs afterwards.
1159 mov qword [rbp + frm_pVCpu], rsi
1160
1161 ; Save host fs, gs, sysenter msr etc.
1162 mov rax, [rsi + VMCPU.hm + HMCPU.u + HMCPUSVM.HCPhysVmcbHost]
1163 mov qword [rbp + frm_HCPhysVmcbHost], rax ; save for the vmload after vmrun
1164 vmsave
1165
1166 ; Fight spectre (trashes rax, rdx and rcx).
1167 INDIRECT_BRANCH_PREDICTION_BARRIER rsi, CPUMCTX_WSF_IBPB_ENTRY
1168
1169 ; Setup rax for VMLOAD.
1170 mov rax, r8 ; HCPhysVmcb (64 bits physical address; take low dword only)
1171
1172 ; Load guest general purpose registers (rax is loaded from the VMCB by VMRUN).
1173 mov rbx, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.ebx]
1174 mov rcx, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.ecx]
1175 mov rdx, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.edx]
1176 mov rdi, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.edi]
1177 mov rbp, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.ebp]
1178 mov r8, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.r8]
1179 mov r9, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.r9]
1180 mov r10, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.r10]
1181 mov r11, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.r11]
1182 mov r12, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.r12]
1183 mov r13, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.r13]
1184 mov r14, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.r14]
1185 mov r15, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.r15]
1186 mov rsi, qword [rsi + VMCPU.cpum.GstCtx + CPUMCTX.esi]
1187
1188 ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch.
1189 clgi
1190 sti
1191
1192 ; Load guest FS, GS, Sysenter MSRs etc.
1193 vmload
1194
1195 ; Run the VM.
1196 vmrun
1197
1198 ; Save guest fs, gs, sysenter msr etc.
1199 vmsave
1200
1201 ; Load host fs, gs, sysenter msr etc.
1202 mov rax, [rsp + cbFrame + frm_HCPhysVmcbHost] ; load HCPhysVmcbHost (rbp is not operational yet, thus rsp)
1203 vmload
1204
1205 ; Set the global interrupt flag again, but execute cli to make sure IF=0.
1206 cli
1207 stgi
1208
1209 ; Pop pVCpu (saved above) and save the guest GPRs (sans RSP and RAX).
1210 mov rax, [rsp + cbFrame + frm_pVCpu] ; (rbp still not operational)
1211
1212 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.ebp], rbp
1213 lea rbp, [rsp + cbFrame]
1214 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.ecx], rcx
1215 mov rcx, SPECTRE_FILLER
1216 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.edx], rdx
1217 mov rdx, rcx
1218 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.r8], r8
1219 mov r8, rcx
1220 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.r9], r9
1221 mov r9, rcx
1222 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.r10], r10
1223 mov r10, rcx
1224 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.r11], r11
1225 mov r11, rcx
1226 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.edi], rdi
1227%ifdef ASM_CALL64_MSC
1228 mov rdi, [rbp + frm_saved_rdi]
1229%else
1230 mov rdi, rcx
1231%endif
1232 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.esi], rsi
1233%ifdef ASM_CALL64_MSC
1234 mov rsi, [rbp + frm_saved_rsi]
1235%else
1236 mov rsi, rcx
1237%endif
1238 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.ebx], rbx
1239 mov rbx, [rbp + frm_saved_rbx]
1240 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.r12], r12
1241 mov r12, [rbp + frm_saved_r12]
1242 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.r13], r13
1243 mov r13, [rbp + frm_saved_r13]
1244 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.r14], r14
1245 mov r14, [rbp + frm_saved_r14]
1246 mov qword [rax + VMCPU.cpum.GstCtx + CPUMCTX.r15], r15
1247 mov r15, [rbp + frm_saved_r15]
1248
1249 ; Fight spectre. Note! Trashes rax, rdx and rcx!
1250 INDIRECT_BRANCH_PREDICTION_BARRIER rax, CPUMCTX_WSF_IBPB_EXIT
1251
1252 ; Restore the host xcr0 if necessary.
1253 mov rcx, [rbp + frm_fNoRestoreXcr0]
1254 test ecx, ecx
1255 jnz .xcr0_after_skip
1256 mov rdx, [rbp + frm_uHostXcr0 + 8]
1257 mov rax, [rbp + frm_uHostXcr0]
1258 xsetbv ; ecx is already zero
1259.xcr0_after_skip:
1260nop
1261; POP_CALLEE_PRESERVED_REGISTERS
1262;%if cbFrame != 30h
1263; %error Bad cbFrame value
1264;%endif
1265
1266 add rsp, cbFrame - 8h
1267 mov eax, VINF_SUCCESS
1268 popf
1269 leave
1270 ret
1271%undef frm_uHostXcr0
1272%undef frm_fNoRestoreXcr0
1273%undef frm_pVCpu
1274%undef frm_HCPhysVmcbHost
1275%undef cbFrame
1276ENDPROC SVMR0VMRun
1277
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette