VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/FTM.cpp@ 38883

Last change on this file since 38883 was 38838, checked in by vboxsync, 13 years ago

VMM,++: Try fix the async reset, suspend and power-off problems in PDM wrt conflicting VMM requests. Split them into priority requests and normal requests. The priority requests can safely be processed when PDM is doing async state change waits, the normal ones cannot. (The problem I bumped into was a unmap-chunk request from PGM being processed during PDMR3Reset, causing a recursive VMMR3EmtRendezvous deadlock.)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 41.5 KB
Line 
1/* $Id: FTM.cpp 38838 2011-09-23 11:21:55Z vboxsync $ */
2/** @file
3 * FTM - Fault Tolerance Manager
4 */
5
6/*
7 * Copyright (C) 2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_FTM
23#include "FTMInternal.h"
24#include <VBox/vmm/vm.h>
25#include <VBox/vmm/vmm.h>
26#include <VBox/err.h>
27#include <VBox/param.h>
28#include <VBox/vmm/ssm.h>
29#include <VBox/log.h>
30#include <VBox/vmm/pgm.h>
31#include <VBox/vmm/pdm.h>
32
33#include <iprt/assert.h>
34#include <iprt/thread.h>
35#include <iprt/string.h>
36#include <iprt/mem.h>
37#include <iprt/tcp.h>
38#include <iprt/socket.h>
39#include <iprt/semaphore.h>
40#include <iprt/asm.h>
41
42#include "internal/vm.h"
43#include "internal/em.h"
44#include "internal/pgm.h"
45
46/*******************************************************************************
47 * Structures and Typedefs *
48 *******************************************************************************/
49
50/**
51 * TCP stream header.
52 *
53 * This is an extra layer for fixing the problem with figuring out when the SSM
54 * stream ends.
55 */
56typedef struct FTMTCPHDR
57{
58 /** Magic value. */
59 uint32_t u32Magic;
60 /** The size of the data block following this header.
61 * 0 indicates the end of the stream, while UINT32_MAX indicates
62 * cancelation. */
63 uint32_t cb;
64} FTMTCPHDR;
65/** Magic value for FTMTCPHDR::u32Magic. (Egberto Gismonti Amin) */
66#define FTMTCPHDR_MAGIC UINT32_C(0x19471205)
67/** The max block size. */
68#define FTMTCPHDR_MAX_SIZE UINT32_C(0x00fffff8)
69
70/**
71 * TCP stream header.
72 *
73 * This is an extra layer for fixing the problem with figuring out when the SSM
74 * stream ends.
75 */
76typedef struct FTMTCPHDRMEM
77{
78 /** Magic value. */
79 uint32_t u32Magic;
80 /** Size (Uncompressed) of the pages following the header. */
81 uint32_t cbPageRange;
82 /** GC Physical address of the page(s) to sync. */
83 RTGCPHYS GCPhys;
84 /** The size of the data block following this header.
85 * 0 indicates the end of the stream, while UINT32_MAX indicates
86 * cancelation. */
87 uint32_t cb;
88} FTMTCPHDRMEM;
89
90/*******************************************************************************
91* Global Variables *
92*******************************************************************************/
93static const char g_szWelcome[] = "VirtualBox-Fault-Tolerance-Sync-1.0\n";
94
95static DECLCALLBACK(int) ftmR3PageTreeDestroyCallback(PAVLGCPHYSNODECORE pBaseNode, void *pvUser);
96
97/**
98 * Initializes the FTM.
99 *
100 * @returns VBox status code.
101 * @param pVM The VM to operate on.
102 */
103VMMR3DECL(int) FTMR3Init(PVM pVM)
104{
105 /*
106 * Assert alignment and sizes.
107 */
108 AssertCompile(sizeof(pVM->ftm.s) <= sizeof(pVM->ftm.padding));
109 AssertCompileMemberAlignment(FTM, CritSect, sizeof(uintptr_t));
110
111 /** @todo saved state for master nodes! */
112 pVM->ftm.s.pszAddress = NULL;
113 pVM->ftm.s.pszPassword = NULL;
114 pVM->fFaultTolerantMaster = false;
115 pVM->ftm.s.fIsStandbyNode = false;
116 pVM->ftm.s.standby.hServer = NIL_RTTCPSERVER;
117 pVM->ftm.s.hShutdownEvent = NIL_RTSEMEVENT;
118 pVM->ftm.s.hSocket = NIL_RTSOCKET;
119
120 /*
121 * Initialize the PGM critical section.
122 */
123 int rc = PDMR3CritSectInit(pVM, &pVM->ftm.s.CritSect, RT_SRC_POS, "FTM");
124 AssertRCReturn(rc, rc);
125
126 /*
127 * Register statistics.
128 */
129 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedMem, STAMTYPE_COUNTER, "/FT/Received/Mem", STAMUNIT_BYTES, "The amount of memory pages that was received.");
130 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedState, STAMTYPE_COUNTER, "/FT/Received/State", STAMUNIT_BYTES, "The amount of state information that was received.");
131 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentMem, STAMTYPE_COUNTER, "/FT/Sent/Mem", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
132 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentState, STAMTYPE_COUNTER, "/FT/Sent/State", STAMUNIT_BYTES, "The amount of state information that was sent.");
133 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaVM, STAMTYPE_COUNTER, "/FT/Sync/DeltaVM", STAMUNIT_OCCURENCES, "Number of delta vm syncs.");
134 STAM_REL_REG(pVM, &pVM->ftm.s.StatFullSync, STAMTYPE_COUNTER, "/FT/Sync/Full", STAMUNIT_OCCURENCES, "Number of full vm syncs.");
135 STAM_REL_REG(pVM, &pVM->ftm.s.StatDeltaMem, STAMTYPE_COUNTER, "/FT/Sync/DeltaMem", STAMUNIT_OCCURENCES, "Number of delta mem syncs.");
136 STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointStorage, STAMTYPE_COUNTER, "/FT/Checkpoint/Storage", STAMUNIT_OCCURENCES, "Number of storage checkpoints.");
137 STAM_REL_REG(pVM, &pVM->ftm.s.StatCheckpointNetwork, STAMTYPE_COUNTER, "/FT/Checkpoint/Network", STAMUNIT_OCCURENCES, "Number of network checkpoints.");
138#ifdef VBOX_WITH_STATISTICS
139 STAM_REG(pVM, &pVM->ftm.s.StatCheckpoint, STAMTYPE_PROFILE, "/FT/Checkpoint", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
140 STAM_REG(pVM, &pVM->ftm.s.StatCheckpointPause, STAMTYPE_PROFILE, "/FT/Checkpoint/Pause", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
141 STAM_REG(pVM, &pVM->ftm.s.StatCheckpointResume, STAMTYPE_PROFILE, "/FT/Checkpoint/Resume", STAMUNIT_TICKS_PER_CALL, "Profiling of FTMR3SetCheckpoint.");
142 STAM_REG(pVM, &pVM->ftm.s.StatSentMemRAM, STAMTYPE_COUNTER, "/FT/Sent/Mem/RAM", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
143 STAM_REG(pVM, &pVM->ftm.s.StatSentMemMMIO2, STAMTYPE_COUNTER, "/FT/Sent/Mem/MMIO2", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
144 STAM_REG(pVM, &pVM->ftm.s.StatSentMemShwROM, STAMTYPE_COUNTER, "/FT/Sent/Mem/ShwROM", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
145 STAM_REG(pVM, &pVM->ftm.s.StatSentStateWrite, STAMTYPE_COUNTER, "/FT/Sent/State/Writes", STAMUNIT_BYTES, "The nr of write calls.");
146#endif
147 return VINF_SUCCESS;
148}
149
150/**
151 * Terminates the FTM.
152 *
153 * Termination means cleaning up and freeing all resources,
154 * the VM itself is at this point powered off or suspended.
155 *
156 * @returns VBox status code.
157 * @param pVM The VM to operate on.
158 */
159VMMR3DECL(int) FTMR3Term(PVM pVM)
160{
161 if (pVM->ftm.s.hShutdownEvent != NIL_RTSEMEVENT)
162 {
163 RTSemEventDestroy(pVM->ftm.s.hShutdownEvent);
164 pVM->ftm.s.hShutdownEvent = NIL_RTSEMEVENT;
165 }
166 if (pVM->ftm.s.hSocket != NIL_RTSOCKET)
167 {
168 RTTcpClientClose(pVM->ftm.s.hSocket);
169 pVM->ftm.s.hSocket = NIL_RTSOCKET;
170 }
171 if (pVM->ftm.s.standby.hServer)
172 {
173 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
174 pVM->ftm.s.standby.hServer = NULL;
175 }
176 if (pVM->ftm.s.pszAddress)
177 RTMemFree(pVM->ftm.s.pszAddress);
178 if (pVM->ftm.s.pszPassword)
179 RTMemFree(pVM->ftm.s.pszPassword);
180
181 /* Remove all pending memory updates. */
182 if (pVM->ftm.s.standby.pPhysPageTree)
183 {
184 RTAvlGCPhysDestroy(&pVM->ftm.s.standby.pPhysPageTree, ftmR3PageTreeDestroyCallback, NULL);
185 pVM->ftm.s.standby.pPhysPageTree = NULL;
186 }
187
188 pVM->ftm.s.pszAddress = NULL;
189 pVM->ftm.s.pszPassword = NULL;
190
191 PDMR3CritSectDelete(&pVM->ftm.s.CritSect);
192 return VINF_SUCCESS;
193}
194
195
196static int ftmR3TcpWriteACK(PVM pVM)
197{
198 int rc = RTTcpWrite(pVM->ftm.s.hSocket, "ACK\n", sizeof("ACK\n") - 1);
199 if (RT_FAILURE(rc))
200 {
201 LogRel(("FTSync: RTTcpWrite(,ACK,) -> %Rrc\n", rc));
202 }
203 return rc;
204}
205
206
207static int ftmR3TcpWriteNACK(PVM pVM, int32_t rc2, const char *pszMsgText = NULL)
208{
209 char szMsg[256];
210 size_t cch;
211 if (pszMsgText && *pszMsgText)
212 {
213 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d;%s\n", rc2, pszMsgText);
214 for (size_t off = 6; off + 1 < cch; off++)
215 if (szMsg[off] == '\n')
216 szMsg[off] = '\r';
217 }
218 else
219 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d\n", rc2);
220 int rc = RTTcpWrite(pVM->ftm.s.hSocket, szMsg, cch);
221 if (RT_FAILURE(rc))
222 LogRel(("FTSync: RTTcpWrite(,%s,%zu) -> %Rrc\n", szMsg, cch, rc));
223 return rc;
224}
225
226/**
227 * Reads a string from the socket.
228 *
229 * @returns VBox status code.
230 *
231 * @param pState The teleporter state structure.
232 * @param pszBuf The output buffer.
233 * @param cchBuf The size of the output buffer.
234 *
235 */
236static int ftmR3TcpReadLine(PVM pVM, char *pszBuf, size_t cchBuf)
237{
238 char *pszStart = pszBuf;
239 RTSOCKET Sock = pVM->ftm.s.hSocket;
240
241 AssertReturn(cchBuf > 1, VERR_INTERNAL_ERROR);
242 *pszBuf = '\0';
243
244 /* dead simple approach. */
245 for (;;)
246 {
247 char ch;
248 int rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
249 if (RT_FAILURE(rc))
250 {
251 LogRel(("FTSync: RTTcpRead -> %Rrc while reading string ('%s')\n", rc, pszStart));
252 return rc;
253 }
254 if ( ch == '\n'
255 || ch == '\0')
256 return VINF_SUCCESS;
257 if (cchBuf <= 1)
258 {
259 LogRel(("FTSync: String buffer overflow: '%s'\n", pszStart));
260 return VERR_BUFFER_OVERFLOW;
261 }
262 *pszBuf++ = ch;
263 *pszBuf = '\0';
264 cchBuf--;
265 }
266}
267
268/**
269 * Reads an ACK or NACK.
270 *
271 * @returns VBox status code.
272 * @param pVM The VM to operate on.
273 * @param pszWhich Which ACK is this this?
274 * @param pszNAckMsg Optional NACK message.
275 */
276static int ftmR3TcpReadACK(PVM pVM, const char *pszWhich, const char *pszNAckMsg = NULL)
277{
278 char szMsg[256];
279 int rc = ftmR3TcpReadLine(pVM, szMsg, sizeof(szMsg));
280 if (RT_FAILURE(rc))
281 return rc;
282
283 if (!strcmp(szMsg, "ACK"))
284 return VINF_SUCCESS;
285
286 if (!strncmp(szMsg, "NACK=", sizeof("NACK=") - 1))
287 {
288 char *pszMsgText = strchr(szMsg, ';');
289 if (pszMsgText)
290 *pszMsgText++ = '\0';
291
292 int32_t vrc2;
293 rc = RTStrToInt32Full(&szMsg[sizeof("NACK=") - 1], 10, &vrc2);
294 if (rc == VINF_SUCCESS)
295 {
296 /*
297 * Well formed NACK, transform it into an error.
298 */
299 if (pszNAckMsg)
300 {
301 LogRel(("FTSync: %s: NACK=%Rrc (%d)\n", pszWhich, vrc2, vrc2));
302 return VERR_INTERNAL_ERROR;
303 }
304
305 if (pszMsgText)
306 {
307 pszMsgText = RTStrStrip(pszMsgText);
308 for (size_t off = 0; pszMsgText[off]; off++)
309 if (pszMsgText[off] == '\r')
310 pszMsgText[off] = '\n';
311
312 LogRel(("FTSync: %s: NACK=%Rrc (%d) - '%s'\n", pszWhich, vrc2, vrc2, pszMsgText));
313 }
314 return VERR_INTERNAL_ERROR_2;
315 }
316
317 if (pszMsgText)
318 pszMsgText[-1] = ';';
319 }
320 return VERR_INTERNAL_ERROR_3;
321}
322
323/**
324 * Submitts a command to the destination and waits for the ACK.
325 *
326 * @returns VBox status code.
327 *
328 * @param pVM The VM to operate on.
329 * @param pszCommand The command.
330 * @param fWaitForAck Whether to wait for the ACK.
331 */
332static int ftmR3TcpSubmitCommand(PVM pVM, const char *pszCommand, bool fWaitForAck = true)
333{
334 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, pszCommand, strlen(pszCommand), "\n", sizeof("\n") - 1);
335 if (RT_FAILURE(rc))
336 return rc;
337 if (!fWaitForAck)
338 return VINF_SUCCESS;
339 return ftmR3TcpReadACK(pVM, pszCommand);
340}
341
342/**
343 * @copydoc SSMSTRMOPS::pfnWrite
344 */
345static DECLCALLBACK(int) ftmR3TcpOpWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite)
346{
347 PVM pVM = (PVM)pvUser;
348
349 AssertReturn(cbToWrite > 0, VINF_SUCCESS);
350 AssertReturn(cbToWrite < UINT32_MAX, VERR_OUT_OF_RANGE);
351 AssertReturn(pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
352
353 STAM_COUNTER_INC(&pVM->ftm.s.StatSentStateWrite);
354 for (;;)
355 {
356 FTMTCPHDR Hdr;
357 Hdr.u32Magic = FTMTCPHDR_MAGIC;
358 Hdr.cb = RT_MIN((uint32_t)cbToWrite, FTMTCPHDR_MAX_SIZE);
359 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pvBuf, (size_t)Hdr.cb);
360 if (RT_FAILURE(rc))
361 {
362 LogRel(("FTSync/TCP: Write error: %Rrc (cb=%#x)\n", rc, Hdr.cb));
363 return rc;
364 }
365 pVM->ftm.s.StatSentState.c += Hdr.cb + sizeof(Hdr);
366 pVM->ftm.s.syncstate.uOffStream += Hdr.cb;
367 if (Hdr.cb == cbToWrite)
368 return VINF_SUCCESS;
369
370 /* advance */
371 cbToWrite -= Hdr.cb;
372 pvBuf = (uint8_t const *)pvBuf + Hdr.cb;
373 }
374}
375
376
377/**
378 * Selects and poll for close condition.
379 *
380 * We can use a relatively high poll timeout here since it's only used to get
381 * us out of error paths. In the normal cause of events, we'll get a
382 * end-of-stream header.
383 *
384 * @returns VBox status code.
385 *
386 * @param pState The teleporter state data.
387 */
388static int ftmR3TcpReadSelect(PVM pVM)
389{
390 int rc;
391 do
392 {
393 rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 1000);
394 if (RT_FAILURE(rc) && rc != VERR_TIMEOUT)
395 {
396 pVM->ftm.s.syncstate.fIOError = true;
397 LogRel(("FTSync/TCP: Header select error: %Rrc\n", rc));
398 break;
399 }
400 if (pVM->ftm.s.syncstate.fStopReading)
401 {
402 rc = VERR_EOF;
403 break;
404 }
405 } while (rc == VERR_TIMEOUT);
406 return rc;
407}
408
409
410/**
411 * @copydoc SSMSTRMOPS::pfnRead
412 */
413static DECLCALLBACK(int) ftmR3TcpOpRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead)
414{
415 PVM pVM = (PVM)pvUser;
416 AssertReturn(!pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
417
418 for (;;)
419 {
420 int rc;
421
422 /*
423 * Check for various conditions and may have been signalled.
424 */
425 if (pVM->ftm.s.syncstate.fEndOfStream)
426 return VERR_EOF;
427 if (pVM->ftm.s.syncstate.fStopReading)
428 return VERR_EOF;
429 if (pVM->ftm.s.syncstate.fIOError)
430 return VERR_IO_GEN_FAILURE;
431
432 /*
433 * If there is no more data in the current block, read the next
434 * block header.
435 */
436 if (!pVM->ftm.s.syncstate.cbReadBlock)
437 {
438 rc = ftmR3TcpReadSelect(pVM);
439 if (RT_FAILURE(rc))
440 return rc;
441 FTMTCPHDR Hdr;
442 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
443 if (RT_FAILURE(rc))
444 {
445 pVM->ftm.s.syncstate.fIOError = true;
446 LogRel(("FTSync/TCP: Header read error: %Rrc\n", rc));
447 return rc;
448 }
449 pVM->ftm.s.StatReceivedState.c += sizeof(Hdr);
450
451 if (RT_UNLIKELY( Hdr.u32Magic != FTMTCPHDR_MAGIC
452 || Hdr.cb > FTMTCPHDR_MAX_SIZE
453 || Hdr.cb == 0))
454 {
455 if ( Hdr.u32Magic == FTMTCPHDR_MAGIC
456 && ( Hdr.cb == 0
457 || Hdr.cb == UINT32_MAX)
458 )
459 {
460 pVM->ftm.s.syncstate.fEndOfStream = true;
461 pVM->ftm.s.syncstate.cbReadBlock = 0;
462 return Hdr.cb ? VERR_SSM_CANCELLED : VERR_EOF;
463 }
464 pVM->ftm.s.syncstate.fIOError = true;
465 LogRel(("FTSync/TCP: Invalid block: u32Magic=%#x cb=%#x\n", Hdr.u32Magic, Hdr.cb));
466 return VERR_IO_GEN_FAILURE;
467 }
468
469 pVM->ftm.s.syncstate.cbReadBlock = Hdr.cb;
470 if (pVM->ftm.s.syncstate.fStopReading)
471 return VERR_EOF;
472 }
473
474 /*
475 * Read more data.
476 */
477 rc = ftmR3TcpReadSelect(pVM);
478 if (RT_FAILURE(rc))
479 return rc;
480
481 uint32_t cb = (uint32_t)RT_MIN(pVM->ftm.s.syncstate.cbReadBlock, cbToRead);
482 rc = RTTcpRead(pVM->ftm.s.hSocket, pvBuf, cb, pcbRead);
483 if (RT_FAILURE(rc))
484 {
485 pVM->ftm.s.syncstate.fIOError = true;
486 LogRel(("FTSync/TCP: Data read error: %Rrc (cb=%#x)\n", rc, cb));
487 return rc;
488 }
489 if (pcbRead)
490 {
491 cb = (uint32_t)*pcbRead;
492 pVM->ftm.s.StatReceivedState.c += cb;
493 pVM->ftm.s.syncstate.uOffStream += cb;
494 pVM->ftm.s.syncstate.cbReadBlock -= cb;
495 return VINF_SUCCESS;
496 }
497 pVM->ftm.s.StatReceivedState.c += cb;
498 pVM->ftm.s.syncstate.uOffStream += cb;
499 pVM->ftm.s.syncstate.cbReadBlock -= cb;
500 if (cbToRead == cb)
501 return VINF_SUCCESS;
502
503 /* Advance to the next block. */
504 cbToRead -= cb;
505 pvBuf = (uint8_t *)pvBuf + cb;
506 }
507}
508
509
510/**
511 * @copydoc SSMSTRMOPS::pfnSeek
512 */
513static DECLCALLBACK(int) ftmR3TcpOpSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
514{
515 return VERR_NOT_SUPPORTED;
516}
517
518
519/**
520 * @copydoc SSMSTRMOPS::pfnTell
521 */
522static DECLCALLBACK(uint64_t) ftmR3TcpOpTell(void *pvUser)
523{
524 PVM pVM = (PVM)pvUser;
525 return pVM->ftm.s.syncstate.uOffStream;
526}
527
528
529/**
530 * @copydoc SSMSTRMOPS::pfnSize
531 */
532static DECLCALLBACK(int) ftmR3TcpOpSize(void *pvUser, uint64_t *pcb)
533{
534 return VERR_NOT_SUPPORTED;
535}
536
537
538/**
539 * @copydoc SSMSTRMOPS::pfnIsOk
540 */
541static DECLCALLBACK(int) ftmR3TcpOpIsOk(void *pvUser)
542{
543 PVM pVM = (PVM)pvUser;
544
545 if (pVM->fFaultTolerantMaster)
546 {
547 /* Poll for incoming NACKs and errors from the other side */
548 int rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 0);
549 if (rc != VERR_TIMEOUT)
550 {
551 if (RT_SUCCESS(rc))
552 {
553 LogRel(("FTSync/TCP: Incoming data detect by IsOk, assuming it is a cancellation NACK.\n"));
554 rc = VERR_SSM_CANCELLED;
555 }
556 else
557 LogRel(("FTSync/TCP: RTTcpSelectOne -> %Rrc (IsOk).\n", rc));
558 return rc;
559 }
560 }
561
562 return VINF_SUCCESS;
563}
564
565
566/**
567 * @copydoc SSMSTRMOPS::pfnClose
568 */
569static DECLCALLBACK(int) ftmR3TcpOpClose(void *pvUser, bool fCanceled)
570{
571 PVM pVM = (PVM)pvUser;
572
573 if (pVM->fFaultTolerantMaster)
574 {
575 FTMTCPHDR EofHdr;
576 EofHdr.u32Magic = FTMTCPHDR_MAGIC;
577 EofHdr.cb = fCanceled ? UINT32_MAX : 0;
578 int rc = RTTcpWrite(pVM->ftm.s.hSocket, &EofHdr, sizeof(EofHdr));
579 if (RT_FAILURE(rc))
580 {
581 LogRel(("FTSync/TCP: EOF Header write error: %Rrc\n", rc));
582 return rc;
583 }
584 }
585 else
586 {
587 ASMAtomicWriteBool(&pVM->ftm.s.syncstate.fStopReading, true);
588 }
589
590 return VINF_SUCCESS;
591}
592
593
594/**
595 * Method table for a TCP based stream.
596 */
597static SSMSTRMOPS const g_ftmR3TcpOps =
598{
599 SSMSTRMOPS_VERSION,
600 ftmR3TcpOpWrite,
601 ftmR3TcpOpRead,
602 ftmR3TcpOpSeek,
603 ftmR3TcpOpTell,
604 ftmR3TcpOpSize,
605 ftmR3TcpOpIsOk,
606 ftmR3TcpOpClose,
607 SSMSTRMOPS_VERSION
608};
609
610
611/**
612 * VMR3ReqCallWait callback
613 *
614 * @param pVM The VM handle.
615 *
616 */
617static DECLCALLBACK(void) ftmR3WriteProtectMemory(PVM pVM)
618{
619 int rc = PGMR3PhysWriteProtectRAM(pVM);
620 AssertRC(rc);
621}
622
623
624/**
625 * Sync the VM state
626 *
627 * @returns VBox status code.
628 * @param pVM The VM handle.
629 */
630static int ftmR3PerformFullSync(PVM pVM)
631{
632 bool fSuspended = false;
633
634 int rc = VMR3Suspend(pVM);
635 AssertRCReturn(rc, rc);
636
637 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatFullSync);
638
639 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
640
641 /* Reset the sync state. */
642 pVM->ftm.s.syncstate.uOffStream = 0;
643 pVM->ftm.s.syncstate.cbReadBlock = 0;
644 pVM->ftm.s.syncstate.fStopReading = false;
645 pVM->ftm.s.syncstate.fIOError = false;
646 pVM->ftm.s.syncstate.fEndOfStream = false;
647
648 rc = ftmR3TcpSubmitCommand(pVM, "full-sync");
649 AssertRC(rc);
650
651 pVM->ftm.s.fDeltaLoadSaveActive = false;
652 rc = VMR3SaveFT(pVM, &g_ftmR3TcpOps, pVM, &fSuspended, false /* fSkipStateChanges */);
653 AssertRC(rc);
654
655 rc = ftmR3TcpReadACK(pVM, "full-sync-complete");
656 AssertRC(rc);
657
658 RTSocketRelease(pVM->ftm.s.hSocket);
659
660 /* Write protect all memory. */
661 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM);
662 AssertRCReturn(rc, rc);
663
664 rc = VMR3Resume(pVM);
665 AssertRC(rc);
666
667 return rc;
668}
669
670
671/**
672 * PGMR3PhysEnumDirtyFTPages callback for syncing dirty physical pages
673 *
674 * @param pVM VM Handle.
675 * @param GCPhys GC physical address
676 * @param pRange HC virtual address of the page(s)
677 * @param cbRange Size of the dirty range in bytes.
678 * @param pvUser User argument
679 */
680static DECLCALLBACK(int) ftmR3SyncDirtyPage(PVM pVM, RTGCPHYS GCPhys, uint8_t *pRange, unsigned cbRange, void *pvUser)
681{
682 FTMTCPHDRMEM Hdr;
683 Hdr.u32Magic = FTMTCPHDR_MAGIC;
684 Hdr.GCPhys = GCPhys;
685 Hdr.cbPageRange = cbRange;
686 Hdr.cb = cbRange;
687 /** @todo compress page(s). */
688 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pRange, (size_t)Hdr.cb);
689 if (RT_FAILURE(rc))
690 {
691 LogRel(("FTSync/TCP: Write error (ftmR3SyncDirtyPage): %Rrc (cb=%#x)\n", rc, Hdr.cb));
692 return rc;
693 }
694 pVM->ftm.s.StatSentMem.c += Hdr.cb + sizeof(Hdr);
695
696#ifdef VBOX_WITH_STATISTICS
697 switch (PGMPhysGetPageType(pVM, GCPhys))
698 {
699 case PGMPAGETYPE_RAM:
700 pVM->ftm.s.StatSentMemRAM.c += Hdr.cb + sizeof(Hdr);
701 break;
702
703 case PGMPAGETYPE_MMIO2:
704 pVM->ftm.s.StatSentMemMMIO2.c += Hdr.cb + sizeof(Hdr);
705 break;
706
707 case PGMPAGETYPE_ROM_SHADOW:
708 pVM->ftm.s.StatSentMemShwROM.c += Hdr.cb + sizeof(Hdr);
709 break;
710
711 case PGMPAGETYPE_MMIO2_ALIAS_MMIO:
712 AssertFailed();
713 break;
714
715 default:
716 AssertFailed();
717 break;
718 }
719#endif
720
721 return (pVM->ftm.s.fCheckpointingActive) ? VERR_INTERRUPTED : VINF_SUCCESS;
722}
723
724/**
725 * Thread function which starts syncing process for this master VM
726 *
727 * @param Thread The thread id.
728 * @param pvUser Not used
729 * @return VINF_SUCCESS (ignored).
730 *
731 */
732static DECLCALLBACK(int) ftmR3MasterThread(RTTHREAD Thread, void *pvUser)
733{
734 int rc = VINF_SUCCESS;
735 PVM pVM = (PVM)pvUser;
736
737 for (;;)
738 {
739 /*
740 * Try connect to the standby machine.
741 */
742 Log(("ftmR3MasterThread: client connect to %s %d\n", pVM->ftm.s.pszAddress, pVM->ftm.s.uPort));
743 rc = RTTcpClientConnect(pVM->ftm.s.pszAddress, pVM->ftm.s.uPort, &pVM->ftm.s.hSocket);
744 if (RT_SUCCESS(rc))
745 {
746 Log(("ftmR3MasterThread: CONNECTED\n"));
747
748 /* Disable Nagle. */
749 rc = RTTcpSetSendCoalescing(pVM->ftm.s.hSocket, false /*fEnable*/);
750 AssertRC(rc);
751
752 /* Read and check the welcome message. */
753 char szLine[RT_MAX(128, sizeof(g_szWelcome))];
754 RT_ZERO(szLine);
755 rc = RTTcpRead(pVM->ftm.s.hSocket, szLine, sizeof(g_szWelcome) - 1, NULL);
756 if ( RT_SUCCESS(rc)
757 && !strcmp(szLine, g_szWelcome))
758 {
759 /* password */
760 if (pVM->ftm.s.pszPassword)
761 rc = RTTcpWrite(pVM->ftm.s.hSocket, pVM->ftm.s.pszPassword, strlen(pVM->ftm.s.pszPassword));
762
763 if (RT_SUCCESS(rc))
764 {
765 /* ACK */
766 rc = ftmR3TcpReadACK(pVM, "password", "Invalid password");
767 if (RT_SUCCESS(rc))
768 {
769 /** todo: verify VM config. */
770 break;
771 }
772 }
773 }
774 /* Failed, so don't bother anymore. */
775 return VINF_SUCCESS;
776 }
777 rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, 1000 /* 1 second */);
778 if (rc != VERR_TIMEOUT)
779 return VINF_SUCCESS; /* told to quit */
780 }
781
782 /* Successfully initialized the connection to the standby node.
783 * Start the sync process.
784 */
785
786 /* First sync all memory and write protect everything so
787 * we can send changed pages later on.
788 */
789
790 rc = ftmR3PerformFullSync(pVM);
791
792 for (;;)
793 {
794 rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, pVM->ftm.s.uInterval);
795 if (rc != VERR_TIMEOUT)
796 break; /* told to quit */
797
798 if (!pVM->ftm.s.fCheckpointingActive)
799 {
800 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
801 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
802
803 rc = ftmR3TcpSubmitCommand(pVM, "mem-sync");
804 AssertRC(rc);
805
806 /* sync the changed memory with the standby node. */
807 /* Write protect all memory. */
808 if (!pVM->ftm.s.fCheckpointingActive)
809 {
810 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3WriteProtectMemory, 1, pVM);
811 AssertRC(rc);
812 }
813
814 /* Enumerate all dirty pages and send them to the standby VM. */
815 if (!pVM->ftm.s.fCheckpointingActive)
816 {
817 rc = PGMR3PhysEnumDirtyFTPages(pVM, ftmR3SyncDirtyPage, NULL /* pvUser */);
818 Assert(rc == VINF_SUCCESS || rc == VERR_INTERRUPTED);
819 }
820
821 /* Send last memory header to signal the end. */
822 FTMTCPHDRMEM Hdr;
823 Hdr.u32Magic = FTMTCPHDR_MAGIC;
824 Hdr.GCPhys = 0;
825 Hdr.cbPageRange = 0;
826 Hdr.cb = 0;
827 rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 1, &Hdr, sizeof(Hdr));
828 if (RT_FAILURE(rc))
829 LogRel(("FTSync/TCP: Write error (ftmR3MasterThread): %Rrc (cb=%#x)\n", rc, Hdr.cb));
830
831 rc = ftmR3TcpReadACK(pVM, "mem-sync-complete");
832 AssertRC(rc);
833
834 PDMCritSectLeave(&pVM->ftm.s.CritSect);
835 }
836 }
837 return rc;
838}
839
840/**
841 * Syncs memory from the master VM
842 *
843 * @returns VBox status code.
844 * @param pVM VM Handle.
845 */
846static int ftmR3SyncMem(PVM pVM)
847{
848 while (true)
849 {
850 FTMTCPHDRMEM Hdr;
851 RTGCPHYS GCPhys;
852
853 /* Read memory header. */
854 int rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
855 if (RT_FAILURE(rc))
856 {
857 Log(("RTTcpRead failed with %Rrc\n", rc));
858 break;
859 }
860 pVM->ftm.s.StatReceivedMem.c += sizeof(Hdr);
861
862 if (Hdr.cb == 0)
863 break; /* end of sync. */
864
865 Assert(Hdr.cb == Hdr.cbPageRange); /** @todo uncompress */
866 GCPhys = Hdr.GCPhys;
867
868 /* Must be a multiple of PAGE_SIZE. */
869 Assert((Hdr.cbPageRange & 0xfff) == 0);
870
871 while (Hdr.cbPageRange)
872 {
873 PFTMPHYSPAGETREENODE pNode = (PFTMPHYSPAGETREENODE)RTAvlGCPhysGet(&pVM->ftm.s.standby.pPhysPageTree, GCPhys);
874 if (!pNode)
875 {
876 /* Allocate memory for the node and page. */
877 pNode = (PFTMPHYSPAGETREENODE)RTMemAllocZ(sizeof(*pNode) + PAGE_SIZE);
878 AssertBreak(pNode);
879
880 /* Insert the node into the tree. */
881 pNode->Core.Key = GCPhys;
882 pNode->pPage = (void *)(pNode + 1);
883 bool fRet = RTAvlGCPhysInsert(&pVM->ftm.s.standby.pPhysPageTree, &pNode->Core);
884 Assert(fRet);
885 }
886
887 /* Fetch the page. */
888 rc = RTTcpRead(pVM->ftm.s.hSocket, pNode->pPage, PAGE_SIZE, NULL);
889 if (RT_FAILURE(rc))
890 {
891 Log(("RTTcpRead page data (%d bytes) failed with %Rrc\n", Hdr.cb, rc));
892 break;
893 }
894 pVM->ftm.s.StatReceivedMem.c += PAGE_SIZE;
895 Hdr.cbPageRange -= PAGE_SIZE;
896 GCPhys += PAGE_SIZE;
897 }
898 }
899 return VINF_SUCCESS;
900}
901
902
903/**
904 * Callback handler for RTAvlGCPhysDestroy
905 *
906 * @returns 0 to continue, otherwise stop
907 * @param pBaseNode Node to destroy
908 * @param pvUser User parameter
909 */
910static DECLCALLBACK(int) ftmR3PageTreeDestroyCallback(PAVLGCPHYSNODECORE pBaseNode, void *pvUser)
911{
912 PVM pVM = (PVM)pvUser;
913 PFTMPHYSPAGETREENODE pNode = (PFTMPHYSPAGETREENODE)pBaseNode;
914
915 if (pVM) /* NULL when the VM is destroyed. */
916 {
917 /* Update the guest memory of the standby VM. */
918 int rc = PGMR3PhysWriteExternal(pVM, pNode->Core.Key, pNode->pPage, PAGE_SIZE, "FTMemSync");
919 AssertRC(rc);
920 }
921 RTMemFree(pNode);
922 return 0;
923}
924
925/**
926 * Thread function which monitors the health of the master VM
927 *
928 * @param Thread The thread id.
929 * @param pvUser Not used
930 * @return VINF_SUCCESS (ignored).
931 *
932 */
933static DECLCALLBACK(int) ftmR3StandbyThread(RTTHREAD Thread, void *pvUser)
934{
935 PVM pVM = (PVM)pvUser;
936
937 for (;;)
938 {
939 uint64_t u64TimeNow;
940
941 int rc = RTSemEventWait(pVM->ftm.s.hShutdownEvent, pVM->ftm.s.uInterval);
942 if (rc != VERR_TIMEOUT)
943 break; /* told to quit */
944
945 if (pVM->ftm.s.standby.u64LastHeartbeat)
946 {
947 u64TimeNow = RTTimeMilliTS();
948
949 if (u64TimeNow > pVM->ftm.s.standby.u64LastHeartbeat + pVM->ftm.s.uInterval * 4)
950 {
951 /* Timeout; prepare to fallover. */
952 LogRel(("FTSync: TIMEOUT (%RX64 vs %RX64 ms): activate standby VM!\n", u64TimeNow, pVM->ftm.s.standby.u64LastHeartbeat + pVM->ftm.s.uInterval * 2));
953
954 pVM->ftm.s.fActivateStandby = true;
955 /** todo: prevent split-brain. */
956 break;
957 }
958 }
959 }
960
961 return VINF_SUCCESS;
962}
963
964
965/**
966 * Listen for incoming traffic destined for the standby VM.
967 *
968 * @copydoc FNRTTCPSERVE
969 *
970 * @returns VINF_SUCCESS or VERR_TCP_SERVER_STOP.
971 */
972static DECLCALLBACK(int) ftmR3StandbyServeConnection(RTSOCKET Sock, void *pvUser)
973{
974 PVM pVM = (PVM)pvUser;
975
976 pVM->ftm.s.hSocket = Sock;
977
978 /*
979 * Disable Nagle.
980 */
981 int rc = RTTcpSetSendCoalescing(Sock, false /*fEnable*/);
982 AssertRC(rc);
983
984 /* Send the welcome message to the master node. */
985 rc = RTTcpWrite(Sock, g_szWelcome, sizeof(g_szWelcome) - 1);
986 if (RT_FAILURE(rc))
987 {
988 LogRel(("Teleporter: Failed to write welcome message: %Rrc\n", rc));
989 return VINF_SUCCESS;
990 }
991
992 /*
993 * Password.
994 */
995 const char *pszPassword = pVM->ftm.s.pszPassword;
996 if (pszPassword)
997 {
998 unsigned off = 0;
999 while (pszPassword[off])
1000 {
1001 char ch;
1002 rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
1003 if ( RT_FAILURE(rc)
1004 || pszPassword[off] != ch)
1005 {
1006 if (RT_FAILURE(rc))
1007 LogRel(("FTSync: Password read failure (off=%u): %Rrc\n", off, rc));
1008 else
1009 LogRel(("FTSync: Invalid password (off=%u)\n", off));
1010 ftmR3TcpWriteNACK(pVM, VERR_AUTHENTICATION_FAILURE);
1011 return VINF_SUCCESS;
1012 }
1013 off++;
1014 }
1015 }
1016 rc = ftmR3TcpWriteACK(pVM);
1017 if (RT_FAILURE(rc))
1018 return VINF_SUCCESS;
1019
1020 /** todo: verify VM config. */
1021
1022 /*
1023 * Stop the server.
1024 *
1025 * Note! After this point we must return VERR_TCP_SERVER_STOP, while prior
1026 * to it we must not return that value!
1027 */
1028 RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1029
1030 /*
1031 * Command processing loop.
1032 */
1033 bool fDone = false;
1034 for (;;)
1035 {
1036 bool fFullSync = false;
1037 char szCmd[128];
1038
1039 rc = ftmR3TcpReadLine(pVM, szCmd, sizeof(szCmd));
1040 if (RT_FAILURE(rc))
1041 break;
1042
1043 pVM->ftm.s.standby.u64LastHeartbeat = RTTimeMilliTS();
1044 if (!strcmp(szCmd, "mem-sync"))
1045 {
1046 rc = ftmR3TcpWriteACK(pVM);
1047 AssertRC(rc);
1048 if (RT_FAILURE(rc))
1049 continue;
1050
1051 rc = ftmR3SyncMem(pVM);
1052 AssertRC(rc);
1053
1054 rc = ftmR3TcpWriteACK(pVM);
1055 AssertRC(rc);
1056 }
1057 else
1058 if ( !strcmp(szCmd, "checkpoint")
1059 || !strcmp(szCmd, "full-sync")
1060 || (fFullSync = true)) /* intended assignment */
1061 {
1062 rc = ftmR3TcpWriteACK(pVM);
1063 AssertRC(rc);
1064 if (RT_FAILURE(rc))
1065 continue;
1066
1067 /* Flush all pending memory updates. */
1068 if (pVM->ftm.s.standby.pPhysPageTree)
1069 {
1070 RTAvlGCPhysDestroy(&pVM->ftm.s.standby.pPhysPageTree, ftmR3PageTreeDestroyCallback, pVM);
1071 pVM->ftm.s.standby.pPhysPageTree = NULL;
1072 }
1073
1074 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
1075
1076 /* Reset the sync state. */
1077 pVM->ftm.s.syncstate.uOffStream = 0;
1078 pVM->ftm.s.syncstate.cbReadBlock = 0;
1079 pVM->ftm.s.syncstate.fStopReading = false;
1080 pVM->ftm.s.syncstate.fIOError = false;
1081 pVM->ftm.s.syncstate.fEndOfStream = false;
1082
1083 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
1084 rc = VMR3LoadFromStreamFT(pVM, &g_ftmR3TcpOps, pVM);
1085 pVM->ftm.s.fDeltaLoadSaveActive = false;
1086 RTSocketRelease(pVM->ftm.s.hSocket);
1087 AssertRC(rc);
1088 if (RT_FAILURE(rc))
1089 {
1090 LogRel(("FTSync: VMR3LoadFromStream -> %Rrc\n", rc));
1091 ftmR3TcpWriteNACK(pVM, rc);
1092 continue;
1093 }
1094
1095 /* The EOS might not have been read, make sure it is. */
1096 pVM->ftm.s.syncstate.fStopReading = false;
1097 size_t cbRead;
1098 rc = ftmR3TcpOpRead(pVM, pVM->ftm.s.syncstate.uOffStream, szCmd, 1, &cbRead);
1099 if (rc != VERR_EOF)
1100 {
1101 LogRel(("FTSync: Draining teleporterTcpOpRead -> %Rrc\n", rc));
1102 ftmR3TcpWriteNACK(pVM, rc);
1103 continue;
1104 }
1105
1106 rc = ftmR3TcpWriteACK(pVM);
1107 AssertRC(rc);
1108 }
1109 }
1110 LogFlowFunc(("returns mRc=%Rrc\n", rc));
1111 return VERR_TCP_SERVER_STOP;
1112}
1113
1114/**
1115 * Powers on the fault tolerant virtual machine.
1116 *
1117 * @returns VBox status code.
1118 *
1119 * @param pVM The VM to operate on.
1120 * @param fMaster FT master or standby
1121 * @param uInterval FT sync interval
1122 * @param pszAddress Standby VM address
1123 * @param uPort Standby VM port
1124 * @param pszPassword FT password (NULL for none)
1125 *
1126 * @thread Any thread.
1127 * @vmstate Created
1128 * @vmstateto PoweringOn+Running (master), PoweringOn+Running_FT (standby)
1129 */
1130VMMR3DECL(int) FTMR3PowerOn(PVM pVM, bool fMaster, unsigned uInterval, const char *pszAddress, unsigned uPort, const char *pszPassword)
1131{
1132 int rc = VINF_SUCCESS;
1133
1134 VMSTATE enmVMState = VMR3GetState(pVM);
1135 AssertMsgReturn(enmVMState == VMSTATE_CREATED,
1136 ("%s\n", VMR3GetStateName(enmVMState)),
1137 VERR_INTERNAL_ERROR_4);
1138 AssertReturn(pszAddress, VERR_INVALID_PARAMETER);
1139
1140 if (pVM->ftm.s.uInterval)
1141 pVM->ftm.s.uInterval = uInterval;
1142 else
1143 pVM->ftm.s.uInterval = 50; /* standard sync interval of 50ms */
1144
1145 pVM->ftm.s.uPort = uPort;
1146 pVM->ftm.s.pszAddress = RTStrDup(pszAddress);
1147 if (pszPassword)
1148 pVM->ftm.s.pszPassword = RTStrDup(pszPassword);
1149
1150 rc = RTSemEventCreate(&pVM->ftm.s.hShutdownEvent);
1151 if (RT_FAILURE(rc))
1152 return rc;
1153
1154 if (fMaster)
1155 {
1156 rc = RTThreadCreate(NULL, ftmR3MasterThread, pVM,
1157 0, RTTHREADTYPE_IO /* higher than normal priority */, 0, "ftmMaster");
1158 if (RT_FAILURE(rc))
1159 return rc;
1160
1161 pVM->fFaultTolerantMaster = true;
1162 if (PGMIsUsingLargePages(pVM))
1163 {
1164 /* Must disable large page usage as 2 MB pages are too big to write monitor. */
1165 LogRel(("FTSync: disabling large page usage.\n"));
1166 PGMSetLargePageUsage(pVM, false);
1167 }
1168 /** @todo might need to disable page fusion as well */
1169
1170 return VMR3PowerOn(pVM);
1171 }
1172 else
1173 {
1174 /* standby */
1175 rc = RTThreadCreate(NULL, ftmR3StandbyThread, pVM,
1176 0, RTTHREADTYPE_DEFAULT, 0, "ftmStandby");
1177 if (RT_FAILURE(rc))
1178 return rc;
1179
1180 rc = RTTcpServerCreateEx(pszAddress, uPort, &pVM->ftm.s.standby.hServer);
1181 if (RT_FAILURE(rc))
1182 return rc;
1183 pVM->ftm.s.fIsStandbyNode = true;
1184
1185 rc = RTTcpServerListen(pVM->ftm.s.standby.hServer, ftmR3StandbyServeConnection, pVM);
1186 /** @todo deal with the exit code to check if we should activate this standby VM. */
1187 if (pVM->ftm.s.fActivateStandby)
1188 {
1189 /** @todo fallover. */
1190 }
1191
1192 if (pVM->ftm.s.standby.hServer)
1193 {
1194 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
1195 pVM->ftm.s.standby.hServer = NULL;
1196 }
1197 if (rc == VERR_TCP_SERVER_SHUTDOWN)
1198 rc = VINF_SUCCESS; /* ignore this error; the standby process was cancelled. */
1199 }
1200 return rc;
1201}
1202
1203/**
1204 * Powers off the fault tolerant virtual machine (standby).
1205 *
1206 * @returns VBox status code.
1207 *
1208 * @param pVM The VM to operate on.
1209 */
1210VMMR3DECL(int) FTMR3CancelStandby(PVM pVM)
1211{
1212 AssertReturn(!pVM->fFaultTolerantMaster, VERR_NOT_SUPPORTED);
1213 Assert(pVM->ftm.s.standby.hServer);
1214
1215 return RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1216}
1217
1218/**
1219 * Rendezvous callback used by FTMR3SetCheckpoint
1220 * Sync state + changed memory with the standby node.
1221 *
1222 * This is only called on one of the EMTs while the other ones are waiting for
1223 * it to complete this function.
1224 *
1225 * @returns VINF_SUCCESS (VBox strict status code).
1226 * @param pVM The VM handle.
1227 * @param pVCpu The VMCPU for the EMT we're being called on. Unused.
1228 * @param pvUser User parameter
1229 */
1230static DECLCALLBACK(VBOXSTRICTRC) ftmR3SetCheckpointRendezvous(PVM pVM, PVMCPU pVCpu, void *pvUser)
1231{
1232 int rc = VINF_SUCCESS;
1233 bool fSuspended = false;
1234
1235 /* We don't call VMR3Suspend here to avoid the overhead of state changes and notifications. This
1236 * is only a short suspend.
1237 */
1238 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpointPause, a);
1239 PDMR3Suspend(pVM);
1240
1241 /* Hack alert: as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/
1242 EMR3NotifySuspend(pVM);
1243 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpointPause, a);
1244
1245 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatDeltaVM);
1246
1247 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
1248
1249 /* Reset the sync state. */
1250 pVM->ftm.s.syncstate.uOffStream = 0;
1251 pVM->ftm.s.syncstate.cbReadBlock = 0;
1252 pVM->ftm.s.syncstate.fStopReading = false;
1253 pVM->ftm.s.syncstate.fIOError = false;
1254 pVM->ftm.s.syncstate.fEndOfStream = false;
1255
1256 rc = ftmR3TcpSubmitCommand(pVM, "checkpoint");
1257 AssertRC(rc);
1258
1259 pVM->ftm.s.fDeltaLoadSaveActive = true;
1260 rc = VMR3SaveFT(pVM, &g_ftmR3TcpOps, pVM, &fSuspended, true /* fSkipStateChanges */);
1261 pVM->ftm.s.fDeltaLoadSaveActive = false;
1262 AssertRC(rc);
1263
1264 rc = ftmR3TcpReadACK(pVM, "checkpoint-complete");
1265 AssertRC(rc);
1266
1267 RTSocketRelease(pVM->ftm.s.hSocket);
1268
1269 /* Write protect all memory. */
1270 rc = PGMR3PhysWriteProtectRAM(pVM);
1271 AssertRC(rc);
1272
1273 /* We don't call VMR3Resume here to avoid the overhead of state changes and notifications. This
1274 * is only a short suspend.
1275 */
1276 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpointResume, b);
1277 PGMR3ResetNoMorePhysWritesFlag(pVM);
1278 PDMR3Resume(pVM);
1279
1280 /* Hack alert as EM is responsible for dealing with the suspend state. We must do this here ourselves, but only for this EMT.*/
1281 EMR3NotifyResume(pVM);
1282 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpointResume, b);
1283
1284 return rc;
1285}
1286
1287/**
1288 * Performs a full sync to the standby node
1289 *
1290 * @returns VBox status code.
1291 *
1292 * @param pVM The VM to operate on.
1293 * @param enmCheckpoint Checkpoint type
1294 */
1295VMMR3DECL(int) FTMR3SetCheckpoint(PVM pVM, FTMCHECKPOINTTYPE enmCheckpoint)
1296{
1297 int rc;
1298
1299 if (!pVM->fFaultTolerantMaster)
1300 return VINF_SUCCESS;
1301
1302 switch (enmCheckpoint)
1303 {
1304 case FTMCHECKPOINTTYPE_NETWORK:
1305 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointNetwork);
1306 break;
1307
1308 case FTMCHECKPOINTTYPE_STORAGE:
1309 STAM_REL_COUNTER_INC(&pVM->ftm.s.StatCheckpointStorage);
1310 break;
1311
1312 default:
1313 break;
1314 }
1315 pVM->ftm.s.fCheckpointingActive = true;
1316 if (VM_IS_EMT(pVM))
1317 {
1318 PVMCPU pVCpu = VMMGetCpu(pVM);
1319
1320 /* We must take special care here as the memory sync is competing with us and requires a responsive EMT. */
1321 while ((rc = PDMCritSectTryEnter(&pVM->ftm.s.CritSect)) == VERR_SEM_BUSY)
1322 {
1323 if (VM_FF_ISPENDING(pVM, VM_FF_EMT_RENDEZVOUS))
1324 {
1325 rc = VMMR3EmtRendezvousFF(pVM, pVCpu);
1326 AssertRC(rc);
1327 }
1328
1329 if (VM_FF_ISPENDING(pVM, VM_FF_REQUEST))
1330 {
1331 rc = VMR3ReqProcessU(pVM->pUVM, VMCPUID_ANY, true /*fPriorityOnly*/);
1332 AssertRC(rc);
1333 }
1334 }
1335 }
1336 else
1337 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
1338
1339 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
1340
1341 STAM_PROFILE_START(&pVM->ftm.s.StatCheckpoint, a);
1342
1343 rc = VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, ftmR3SetCheckpointRendezvous, NULL);
1344
1345 STAM_PROFILE_STOP(&pVM->ftm.s.StatCheckpoint, a);
1346
1347 PDMCritSectLeave(&pVM->ftm.s.CritSect);
1348 pVM->ftm.s.fCheckpointingActive = false;
1349
1350 return rc;
1351}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette