VirtualBox

source: vbox/trunk/src/VBox/VMM/FTM.cpp@ 32095

Last change on this file since 32095 was 32092, checked in by vboxsync, 14 years ago

Empty password

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 31.6 KB
Line 
1/* $Id: FTM.cpp 32092 2010-08-30 13:02:57Z vboxsync $ */
2/** @file
3 * FTM - Fault Tolerance Manager
4 */
5
6/*
7 * Copyright (C) 2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_FTM
23#include "FTMInternal.h"
24#include <VBox/vm.h>
25#include <VBox/vmm.h>
26#include <VBox/err.h>
27#include <VBox/param.h>
28#include <VBox/ssm.h>
29#include <VBox/log.h>
30#include <VBox/pgm.h>
31
32#include <iprt/assert.h>
33#include <iprt/thread.h>
34#include <iprt/string.h>
35#include <iprt/mem.h>
36#include <iprt/tcp.h>
37#include <iprt/socket.h>
38#include <iprt/semaphore.h>
39#include <iprt/asm.h>
40
41/*******************************************************************************
42 * Structures and Typedefs *
43 *******************************************************************************/
44
45/**
46 * TCP stream header.
47 *
48 * This is an extra layer for fixing the problem with figuring out when the SSM
49 * stream ends.
50 */
51typedef struct FTMTCPHDR
52{
53 /** Magic value. */
54 uint32_t u32Magic;
55 /** The size of the data block following this header.
56 * 0 indicates the end of the stream, while UINT32_MAX indicates
57 * cancelation. */
58 uint32_t cb;
59} FTMTCPHDR;
60/** Magic value for FTMTCPHDR::u32Magic. (Egberto Gismonti Amin) */
61#define FTMTCPHDR_MAGIC UINT32_C(0x19471205)
62/** The max block size. */
63#define FTMTCPHDR_MAX_SIZE UINT32_C(0x00fffff8)
64
65/**
66 * TCP stream header.
67 *
68 * This is an extra layer for fixing the problem with figuring out when the SSM
69 * stream ends.
70 */
71typedef struct FTMTCPHDRMEM
72{
73 /** Magic value. */
74 uint32_t u32Magic;
75 /** Size (Uncompressed) of the pages following the header. */
76 uint32_t cbPageRange;
77 /** GC Physical address of the page(s) to sync. */
78 RTGCPHYS GCPhys;
79 /** The size of the data block following this header.
80 * 0 indicates the end of the stream, while UINT32_MAX indicates
81 * cancelation. */
82 uint32_t cb;
83} FTMTCPHDRMEM;
84
85/*******************************************************************************
86* Global Variables *
87*******************************************************************************/
88static const char g_szWelcome[] = "VirtualBox-Fault-Tolerance-Sync-1.0\n";
89
90/**
91 * Initializes the FTM.
92 *
93 * @returns VBox status code.
94 * @param pVM The VM to operate on.
95 */
96VMMR3DECL(int) FTMR3Init(PVM pVM)
97{
98 /*
99 * Assert alignment and sizes.
100 */
101 AssertCompile(sizeof(pVM->ftm.s) <= sizeof(pVM->ftm.padding));
102 AssertCompileMemberAlignment(FTM, CritSect, sizeof(uintptr_t));
103
104 /** @todo saved state for master nodes! */
105 pVM->ftm.s.pszAddress = NULL;
106 pVM->ftm.s.pszPassword = NULL;
107 pVM->fFaultTolerantMaster = false;
108 pVM->ftm.s.fIsStandbyNode = false;
109 pVM->ftm.s.standby.hServer = NIL_RTTCPSERVER;
110 pVM->ftm.s.master.hShutdownEvent = NIL_RTSEMEVENT;
111 pVM->ftm.s.hSocket = NIL_RTSOCKET;
112
113 /*
114 * Initialize the PGM critical section.
115 */
116 int rc = PDMR3CritSectInit(pVM, &pVM->ftm.s.CritSect, RT_SRC_POS, "FTM");
117 AssertRCReturn(rc, rc);
118
119 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedMem, STAMTYPE_COUNTER, "/FT/Received/Mem", STAMUNIT_BYTES, "The amount of memory pages that was received.");
120 STAM_REL_REG(pVM, &pVM->ftm.s.StatReceivedState, STAMTYPE_COUNTER, "/FT/Received/State", STAMUNIT_BYTES, "The amount of state information that was received.");
121 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentMem, STAMTYPE_COUNTER, "/FT/Sent/Mem", STAMUNIT_BYTES, "The amount of memory pages that was sent.");
122 STAM_REL_REG(pVM, &pVM->ftm.s.StatSentState, STAMTYPE_COUNTER, "/FT/Sent/State", STAMUNIT_BYTES, "The amount of state information that was sent.");
123
124 return VINF_SUCCESS;
125}
126
127/**
128 * Terminates the FTM.
129 *
130 * Termination means cleaning up and freeing all resources,
131 * the VM itself is at this point powered off or suspended.
132 *
133 * @returns VBox status code.
134 * @param pVM The VM to operate on.
135 */
136VMMR3DECL(int) FTMR3Term(PVM pVM)
137{
138 if (pVM->ftm.s.master.hShutdownEvent != NIL_RTSEMEVENT)
139 {
140 RTSemEventDestroy(pVM->ftm.s.master.hShutdownEvent);
141 pVM->ftm.s.master.hShutdownEvent = NIL_RTSEMEVENT;
142 }
143 if (pVM->ftm.s.hSocket != NIL_RTSOCKET)
144 {
145 RTTcpClientClose(pVM->ftm.s.hSocket);
146 pVM->ftm.s.hSocket = NIL_RTSOCKET;
147 }
148 if (pVM->ftm.s.standby.hServer)
149 {
150 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
151 pVM->ftm.s.standby.hServer = NULL;
152 }
153 if (pVM->ftm.s.pszAddress)
154 RTMemFree(pVM->ftm.s.pszAddress);
155 if (pVM->ftm.s.pszPassword)
156 RTMemFree(pVM->ftm.s.pszPassword);
157
158 pVM->ftm.s.pszAddress = NULL;
159 pVM->ftm.s.pszPassword = NULL;
160
161 PDMR3CritSectDelete(&pVM->ftm.s.CritSect);
162 return VINF_SUCCESS;
163}
164
165
166static int ftmR3TcpWriteACK(PVM pVM)
167{
168 int rc = RTTcpWrite(pVM->ftm.s.hSocket, "ACK\n", sizeof("ACK\n") - 1);
169 if (RT_FAILURE(rc))
170 {
171 LogRel(("FTSync: RTTcpWrite(,ACK,) -> %Rrc\n", rc));
172 }
173 return rc;
174}
175
176
177static int ftmR3TcpWriteNACK(PVM pVM, int32_t rc2, const char *pszMsgText = NULL)
178{
179 char szMsg[256];
180 size_t cch;
181 if (pszMsgText && *pszMsgText)
182 {
183 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d;%s\n", rc2, pszMsgText);
184 for (size_t off = 6; off + 1 < cch; off++)
185 if (szMsg[off] == '\n')
186 szMsg[off] = '\r';
187 }
188 else
189 cch = RTStrPrintf(szMsg, sizeof(szMsg), "NACK=%d\n", rc2);
190 int rc = RTTcpWrite(pVM->ftm.s.hSocket, szMsg, cch);
191 if (RT_FAILURE(rc))
192 LogRel(("FTSync: RTTcpWrite(,%s,%zu) -> %Rrc\n", szMsg, cch, rc));
193 return rc;
194}
195
196/**
197 * Reads a string from the socket.
198 *
199 * @returns VBox status code.
200 *
201 * @param pState The teleporter state structure.
202 * @param pszBuf The output buffer.
203 * @param cchBuf The size of the output buffer.
204 *
205 */
206static int ftmR3TcpReadLine(PVM pVM, char *pszBuf, size_t cchBuf)
207{
208 char *pszStart = pszBuf;
209 RTSOCKET Sock = pVM->ftm.s.hSocket;
210
211 AssertReturn(cchBuf > 1, VERR_INTERNAL_ERROR);
212 *pszBuf = '\0';
213
214 /* dead simple approach. */
215 for (;;)
216 {
217 char ch;
218 int rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
219 if (RT_FAILURE(rc))
220 {
221 LogRel(("FTSync: RTTcpRead -> %Rrc while reading string ('%s')\n", rc, pszStart));
222 return rc;
223 }
224 if ( ch == '\n'
225 || ch == '\0')
226 return VINF_SUCCESS;
227 if (cchBuf <= 1)
228 {
229 LogRel(("FTSync: String buffer overflow: '%s'\n", pszStart));
230 return VERR_BUFFER_OVERFLOW;
231 }
232 *pszBuf++ = ch;
233 *pszBuf = '\0';
234 cchBuf--;
235 }
236}
237
238/**
239 * Reads an ACK or NACK.
240 *
241 * @returns VBox status code.
242 * @param pVM The VM to operate on.
243 * @param pszWhich Which ACK is this this?
244 * @param pszNAckMsg Optional NACK message.
245 */
246static int ftmR3TcpReadACK(PVM pVM, const char *pszWhich, const char *pszNAckMsg = NULL)
247{
248 char szMsg[256];
249 int rc = ftmR3TcpReadLine(pVM, szMsg, sizeof(szMsg));
250 if (RT_FAILURE(rc))
251 return rc;
252
253 if (!strcmp(szMsg, "ACK"))
254 return VINF_SUCCESS;
255
256 if (!strncmp(szMsg, "NACK=", sizeof("NACK=") - 1))
257 {
258 char *pszMsgText = strchr(szMsg, ';');
259 if (pszMsgText)
260 *pszMsgText++ = '\0';
261
262 int32_t vrc2;
263 rc = RTStrToInt32Full(&szMsg[sizeof("NACK=") - 1], 10, &vrc2);
264 if (rc == VINF_SUCCESS)
265 {
266 /*
267 * Well formed NACK, transform it into an error.
268 */
269 if (pszNAckMsg)
270 {
271 LogRel(("FTSync: %s: NACK=%Rrc (%d)\n", pszWhich, vrc2, vrc2));
272 return VERR_INTERNAL_ERROR;
273 }
274
275 if (pszMsgText)
276 {
277 pszMsgText = RTStrStrip(pszMsgText);
278 for (size_t off = 0; pszMsgText[off]; off++)
279 if (pszMsgText[off] == '\r')
280 pszMsgText[off] = '\n';
281
282 LogRel(("FTSync: %s: NACK=%Rrc (%d) - '%s'\n", pszWhich, vrc2, vrc2, pszMsgText));
283 }
284 return VERR_INTERNAL_ERROR_2;
285 }
286
287 if (pszMsgText)
288 pszMsgText[-1] = ';';
289 }
290 return VERR_INTERNAL_ERROR_3;
291}
292
293/**
294 * Submitts a command to the destination and waits for the ACK.
295 *
296 * @returns VBox status code.
297 *
298 * @param pVM The VM to operate on.
299 * @param pszCommand The command.
300 * @param fWaitForAck Whether to wait for the ACK.
301 */
302static int ftmR3TcpSubmitCommand(PVM pVM, const char *pszCommand, bool fWaitForAck = true)
303{
304 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, pszCommand, strlen(pszCommand), "\n", sizeof("\n") - 1);
305 if (RT_FAILURE(rc))
306 return rc;
307 if (!fWaitForAck)
308 return VINF_SUCCESS;
309 return ftmR3TcpReadACK(pVM, pszCommand);
310}
311
312/**
313 * @copydoc SSMSTRMOPS::pfnWrite
314 */
315static DECLCALLBACK(int) ftmR3TcpOpWrite(void *pvUser, uint64_t offStream, const void *pvBuf, size_t cbToWrite)
316{
317 PVM pVM = (PVM)pvUser;
318
319 AssertReturn(cbToWrite > 0, VINF_SUCCESS);
320 AssertReturn(cbToWrite < UINT32_MAX, VERR_OUT_OF_RANGE);
321 AssertReturn(pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
322
323 for (;;)
324 {
325 FTMTCPHDR Hdr;
326 Hdr.u32Magic = FTMTCPHDR_MAGIC;
327 Hdr.cb = RT_MIN((uint32_t)cbToWrite, FTMTCPHDR_MAX_SIZE);
328 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pvBuf, (size_t)Hdr.cb);
329 if (RT_FAILURE(rc))
330 {
331 LogRel(("FTSync/TCP: Write error: %Rrc (cb=%#x)\n", rc, Hdr.cb));
332 return rc;
333 }
334 pVM->ftm.s.syncstate.uOffStream += Hdr.cb;
335 if (Hdr.cb == cbToWrite)
336 return VINF_SUCCESS;
337
338 /* advance */
339 cbToWrite -= Hdr.cb;
340 pvBuf = (uint8_t const *)pvBuf + Hdr.cb;
341 }
342}
343
344
345/**
346 * Selects and poll for close condition.
347 *
348 * We can use a relatively high poll timeout here since it's only used to get
349 * us out of error paths. In the normal cause of events, we'll get a
350 * end-of-stream header.
351 *
352 * @returns VBox status code.
353 *
354 * @param pState The teleporter state data.
355 */
356static int ftmR3TcpReadSelect(PVM pVM)
357{
358 int rc;
359 do
360 {
361 rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 1000);
362 if (RT_FAILURE(rc) && rc != VERR_TIMEOUT)
363 {
364 pVM->ftm.s.syncstate.fIOError = true;
365 LogRel(("FTSync/TCP: Header select error: %Rrc\n", rc));
366 break;
367 }
368 if (pVM->ftm.s.syncstate.fStopReading)
369 {
370 rc = VERR_EOF;
371 break;
372 }
373 } while (rc == VERR_TIMEOUT);
374 return rc;
375}
376
377
378/**
379 * @copydoc SSMSTRMOPS::pfnRead
380 */
381static DECLCALLBACK(int) ftmR3TcpOpRead(void *pvUser, uint64_t offStream, void *pvBuf, size_t cbToRead, size_t *pcbRead)
382{
383 PVM pVM = (PVM)pvUser;
384 AssertReturn(!pVM->fFaultTolerantMaster, VERR_INVALID_HANDLE);
385
386 for (;;)
387 {
388 int rc;
389
390 /*
391 * Check for various conditions and may have been signalled.
392 */
393 if (pVM->ftm.s.syncstate.fEndOfStream)
394 return VERR_EOF;
395 if (pVM->ftm.s.syncstate.fStopReading)
396 return VERR_EOF;
397 if (pVM->ftm.s.syncstate.fIOError)
398 return VERR_IO_GEN_FAILURE;
399
400 /*
401 * If there is no more data in the current block, read the next
402 * block header.
403 */
404 if (!pVM->ftm.s.syncstate.cbReadBlock)
405 {
406 rc = ftmR3TcpReadSelect(pVM);
407 if (RT_FAILURE(rc))
408 return rc;
409 FTMTCPHDR Hdr;
410 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
411 if (RT_FAILURE(rc))
412 {
413 pVM->ftm.s.syncstate.fIOError = true;
414 LogRel(("FTSync/TCP: Header read error: %Rrc\n", rc));
415 return rc;
416 }
417
418 if (RT_UNLIKELY( Hdr.u32Magic != FTMTCPHDR_MAGIC
419 || Hdr.cb > FTMTCPHDR_MAX_SIZE
420 || Hdr.cb == 0))
421 {
422 if ( Hdr.u32Magic == FTMTCPHDR_MAGIC
423 && ( Hdr.cb == 0
424 || Hdr.cb == UINT32_MAX)
425 )
426 {
427 pVM->ftm.s.syncstate.fEndOfStream = true;
428 pVM->ftm.s.syncstate.cbReadBlock = 0;
429 return Hdr.cb ? VERR_SSM_CANCELLED : VERR_EOF;
430 }
431 pVM->ftm.s.syncstate.fIOError = true;
432 LogRel(("FTSync/TCP: Invalid block: u32Magic=%#x cb=%#x\n", Hdr.u32Magic, Hdr.cb));
433 return VERR_IO_GEN_FAILURE;
434 }
435
436 pVM->ftm.s.syncstate.cbReadBlock = Hdr.cb;
437 if (pVM->ftm.s.syncstate.fStopReading)
438 return VERR_EOF;
439 }
440
441 /*
442 * Read more data.
443 */
444 rc = ftmR3TcpReadSelect(pVM);
445 if (RT_FAILURE(rc))
446 return rc;
447 uint32_t cb = (uint32_t)RT_MIN(pVM->ftm.s.syncstate.cbReadBlock, cbToRead);
448 rc = RTTcpRead(pVM->ftm.s.hSocket, pvBuf, cb, pcbRead);
449 if (RT_FAILURE(rc))
450 {
451 pVM->ftm.s.syncstate.fIOError = true;
452 LogRel(("FTSync/TCP: Data read error: %Rrc (cb=%#x)\n", rc, cb));
453 return rc;
454 }
455 if (pcbRead)
456 {
457 cb = (uint32_t)*pcbRead;
458 pVM->ftm.s.syncstate.uOffStream += cb;
459 pVM->ftm.s.syncstate.cbReadBlock -= cb;
460 return VINF_SUCCESS;
461 }
462 pVM->ftm.s.syncstate.uOffStream += cb;
463 pVM->ftm.s.syncstate.cbReadBlock -= cb;
464 if (cbToRead == cb)
465 return VINF_SUCCESS;
466
467 /* Advance to the next block. */
468 cbToRead -= cb;
469 pvBuf = (uint8_t *)pvBuf + cb;
470 }
471}
472
473
474/**
475 * @copydoc SSMSTRMOPS::pfnSeek
476 */
477static DECLCALLBACK(int) ftmR3TcpOpSeek(void *pvUser, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
478{
479 return VERR_NOT_SUPPORTED;
480}
481
482
483/**
484 * @copydoc SSMSTRMOPS::pfnTell
485 */
486static DECLCALLBACK(uint64_t) ftmR3TcpOpTell(void *pvUser)
487{
488 PVM pVM = (PVM)pvUser;
489 return pVM->ftm.s.syncstate.uOffStream;
490}
491
492
493/**
494 * @copydoc SSMSTRMOPS::pfnSize
495 */
496static DECLCALLBACK(int) ftmR3TcpOpSize(void *pvUser, uint64_t *pcb)
497{
498 return VERR_NOT_SUPPORTED;
499}
500
501
502/**
503 * @copydoc SSMSTRMOPS::pfnIsOk
504 */
505static DECLCALLBACK(int) ftmR3TcpOpIsOk(void *pvUser)
506{
507 PVM pVM = (PVM)pvUser;
508
509 if (pVM->fFaultTolerantMaster)
510 {
511 /* Poll for incoming NACKs and errors from the other side */
512 int rc = RTTcpSelectOne(pVM->ftm.s.hSocket, 0);
513 if (rc != VERR_TIMEOUT)
514 {
515 if (RT_SUCCESS(rc))
516 {
517 LogRel(("FTSync/TCP: Incoming data detect by IsOk, assuming it is a cancellation NACK.\n"));
518 rc = VERR_SSM_CANCELLED;
519 }
520 else
521 LogRel(("FTSync/TCP: RTTcpSelectOne -> %Rrc (IsOk).\n", rc));
522 return rc;
523 }
524 }
525
526 return VINF_SUCCESS;
527}
528
529
530/**
531 * @copydoc SSMSTRMOPS::pfnClose
532 */
533static DECLCALLBACK(int) ftmR3TcpOpClose(void *pvUser, bool fCanceled)
534{
535 PVM pVM = (PVM)pvUser;
536
537 if (pVM->fFaultTolerantMaster)
538 {
539 FTMTCPHDR EofHdr;
540 EofHdr.u32Magic = FTMTCPHDR_MAGIC;
541 EofHdr.cb = fCanceled ? UINT32_MAX : 0;
542 int rc = RTTcpWrite(pVM->ftm.s.hSocket, &EofHdr, sizeof(EofHdr));
543 if (RT_FAILURE(rc))
544 {
545 LogRel(("FTSync/TCP: EOF Header write error: %Rrc\n", rc));
546 return rc;
547 }
548 }
549 else
550 {
551 ASMAtomicWriteBool(&pVM->ftm.s.syncstate.fStopReading, true);
552 }
553
554 return VINF_SUCCESS;
555}
556
557
558/**
559 * Method table for a TCP based stream.
560 */
561static SSMSTRMOPS const g_ftmR3TcpOps =
562{
563 SSMSTRMOPS_VERSION,
564 ftmR3TcpOpWrite,
565 ftmR3TcpOpRead,
566 ftmR3TcpOpSeek,
567 ftmR3TcpOpTell,
568 ftmR3TcpOpSize,
569 ftmR3TcpOpIsOk,
570 ftmR3TcpOpClose,
571 SSMSTRMOPS_VERSION
572};
573
574/**
575 * Sync the VM state partially or fully
576 *
577 * @returns VBox status code.
578 * @param pVM The VM handle.
579 * @param enmState Which state to sync
580 */
581static DECLCALLBACK(void) ftmR3PerformSync(PVM pVM, FTMSYNCSTATE enmState)
582{
583 int rc;
584 bool fFullSync = false;
585
586 if (enmState != FTMSYNCSTATE_DELTA_MEMORY)
587 {
588 rc = VMR3Suspend(pVM);
589 AssertReturnVoid(RT_SUCCESS(rc));
590 }
591
592 switch (enmState)
593 {
594 case FTMSYNCSTATE_FULL:
595 fFullSync = true;
596 /* no break */
597 case FTMSYNCSTATE_DELTA_VM:
598 {
599 bool fSuspended = false;
600
601 rc = ftmR3TcpSubmitCommand(pVM, (fFullSync) ? "full-sync" : "checkpoint");
602 AssertRC(rc);
603
604 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
605 rc = VMR3Save(pVM, NULL /* pszFilename */, &g_ftmR3TcpOps, pVM, true /* fContinueAfterwards */, NULL, NULL, &fSuspended);
606 pVM->ftm.s.fDeltaLoadSaveActive = false;
607 AssertRC(rc);
608
609 rc = ftmR3TcpReadACK(pVM, (fFullSync) ? "full-sync-complete" : "checkpoint-complete");
610 AssertRC(rc);
611 break;
612 }
613
614 case FTMSYNCSTATE_DELTA_MEMORY:
615 /* Nothing to do as we sync the memory in an async thread; no need to block EMT. */
616 break;
617 }
618 /* Write protect all memory. */
619 rc = PGMR3PhysWriteProtectRAM(pVM);
620 AssertRC(rc);
621
622 if (enmState != FTMSYNCSTATE_DELTA_MEMORY)
623 {
624 rc = VMR3Resume(pVM);
625 AssertRC(rc);
626 }
627}
628
629/**
630 * PGMR3PhysEnumDirtyFTPages callback for syncing dirty physical pages
631 *
632 * @param pVM VM Handle.
633 * @param GCPhys GC physical address
634 * @param pRange HC virtual address of the page(s)
635 * @param cbRange Size of the dirty range in bytes.
636 * @param pvUser User argument
637 */
638static DECLCALLBACK(int) ftmR3SyncDirtyPage(PVM pVM, RTGCPHYS GCPhys, uint8_t *pRange, unsigned cbRange, void *pvUser)
639{
640 FTMTCPHDRMEM Hdr;
641 Hdr.u32Magic = FTMTCPHDR_MAGIC;
642 Hdr.GCPhys = GCPhys;
643 Hdr.cbPageRange = cbRange;
644 Hdr.cb = cbRange;
645 /** @todo compress page(s). */
646 int rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 2, &Hdr, sizeof(Hdr), pRange, (size_t)Hdr.cb);
647 if (RT_FAILURE(rc))
648 {
649 LogRel(("FTSync/TCP: Write error (ftmR3SyncDirtyPage): %Rrc (cb=%#x)\n", rc, Hdr.cb));
650 return rc;
651 }
652 pVM->ftm.s.StatSentMem.c += Hdr.cb + sizeof(Hdr);
653 return VINF_SUCCESS;
654}
655
656/**
657 * Thread function which starts syncing process for this master VM
658 *
659 * @param Thread The thread id.
660 * @param pvUser Not used
661 * @return VINF_SUCCESS (ignored).
662 *
663 */
664static DECLCALLBACK(int) ftmR3MasterThread(RTTHREAD Thread, void *pvUser)
665{
666 int rc = VINF_SUCCESS;
667 PVM pVM = (PVM)pvUser;
668
669 for (;;)
670 {
671 /*
672 * Try connect to the standby machine.
673 */
674 Log(("ftmR3MasterThread: client connect to %s %d\n", pVM->ftm.s.pszAddress, pVM->ftm.s.uPort));
675 rc = RTTcpClientConnect(pVM->ftm.s.pszAddress, pVM->ftm.s.uPort, &pVM->ftm.s.hSocket);
676 if (RT_SUCCESS(rc))
677 {
678 Log(("ftmR3MasterThread: CONNECTED\n"));
679
680 /* Disable Nagle. */
681 rc = RTTcpSetSendCoalescing(pVM->ftm.s.hSocket, false /*fEnable*/);
682 AssertRC(rc);
683
684 /* Read and check the welcome message. */
685 char szLine[RT_MAX(128, sizeof(g_szWelcome))];
686 RT_ZERO(szLine);
687 rc = RTTcpRead(pVM->ftm.s.hSocket, szLine, sizeof(g_szWelcome) - 1, NULL);
688 if ( RT_SUCCESS(rc)
689 && !strcmp(szLine, g_szWelcome))
690 {
691 /* password */
692 if (pVM->ftm.s.pszPassword)
693 rc = RTTcpWrite(pVM->ftm.s.hSocket, pVM->ftm.s.pszPassword, strlen(pVM->ftm.s.pszPassword));
694
695 if (RT_SUCCESS(rc))
696 {
697 /* ACK */
698 rc = ftmR3TcpReadACK(pVM, "password", "Invalid password");
699 if (RT_SUCCESS(rc))
700 {
701 /** todo: verify VM config. */
702 break;
703 }
704 }
705 }
706 /* Failed, so don't bother anymore. */
707 return VINF_SUCCESS;
708 }
709 rc = RTSemEventWait(pVM->ftm.s.master.hShutdownEvent, 1000 /* 1 second */);
710 if (rc != VERR_TIMEOUT)
711 return VINF_SUCCESS; /* told to quit */
712 }
713
714 /* Successfully initialized the connection to the standby node.
715 * Start the sync process.
716 */
717
718 /* First sync all memory and write protect everything so
719 * we can send changed pages later on.
720 */
721
722 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3PerformSync, 2, pVM, FTMSYNCSTATE_FULL);
723 AssertRC(rc);
724
725 for (;;)
726 {
727 rc = RTSemEventWait(pVM->ftm.s.master.hShutdownEvent, pVM->ftm.s.uInterval);
728 if (rc != VERR_TIMEOUT)
729 break; /* told to quit */
730
731 if (!pVM->ftm.s.fCheckpointingActive)
732 {
733 rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
734 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
735
736 rc = ftmR3TcpSubmitCommand(pVM, "mem-sync");
737 AssertRC(rc);
738
739 /* sync the changed memory with the standby node. */
740 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3PerformSync, 2, pVM, FTMSYNCSTATE_DELTA_MEMORY);
741 AssertRC(rc);
742
743 /* Enumerate all dirty pages and send them to the standby VM. */
744 rc = PGMR3PhysEnumDirtyFTPages(pVM, ftmR3SyncDirtyPage, NULL /* pvUser */);
745 AssertRC(rc);
746
747 /* Send last memory header to signal the end. */
748 FTMTCPHDRMEM Hdr;
749 Hdr.u32Magic = FTMTCPHDR_MAGIC;
750 Hdr.GCPhys = 0;
751 Hdr.cbPageRange = 0;
752 Hdr.cb = 0;
753 rc = RTTcpSgWriteL(pVM->ftm.s.hSocket, 1, &Hdr, sizeof(Hdr));
754 if (RT_FAILURE(rc))
755 LogRel(("FTSync/TCP: Write error (ftmR3MasterThread): %Rrc (cb=%#x)\n", rc, Hdr.cb));
756
757 rc = ftmR3TcpReadACK(pVM, "mem-sync-complete");
758 AssertRC(rc);
759
760 PDMCritSectLeave(&pVM->ftm.s.CritSect);
761 }
762 }
763 return rc;
764}
765
766/**
767 * Listen for incoming traffic destined for the standby VM.
768 *
769 * @copydoc FNRTTCPSERVE
770 *
771 * @returns VINF_SUCCESS or VERR_TCP_SERVER_STOP.
772 */
773static DECLCALLBACK(int) ftmR3StandbyServeConnection(RTSOCKET Sock, void *pvUser)
774{
775 PVM pVM = (PVM)pvUser;
776
777 pVM->ftm.s.hSocket = Sock;
778
779 /*
780 * Disable Nagle.
781 */
782 int rc = RTTcpSetSendCoalescing(Sock, false /*fEnable*/);
783 AssertRC(rc);
784
785 /* Send the welcome message to the master node. */
786 rc = RTTcpWrite(Sock, g_szWelcome, sizeof(g_szWelcome) - 1);
787 if (RT_FAILURE(rc))
788 {
789 LogRel(("Teleporter: Failed to write welcome message: %Rrc\n", rc));
790 return VINF_SUCCESS;
791 }
792
793 /*
794 * Password.
795 */
796 const char *pszPassword = pVM->ftm.s.pszPassword;
797 if (pszPassword)
798 {
799 unsigned off = 0;
800 while (pszPassword[off])
801 {
802 char ch;
803 rc = RTTcpRead(Sock, &ch, sizeof(ch), NULL);
804 if ( RT_FAILURE(rc)
805 || pszPassword[off] != ch)
806 {
807 if (RT_FAILURE(rc))
808 LogRel(("FTSync: Password read failure (off=%u): %Rrc\n", off, rc));
809 else
810 LogRel(("FTSync: Invalid password (off=%u)\n", off));
811 ftmR3TcpWriteNACK(pVM, VERR_AUTHENTICATION_FAILURE);
812 return VINF_SUCCESS;
813 }
814 off++;
815 }
816 }
817 rc = ftmR3TcpWriteACK(pVM);
818 if (RT_FAILURE(rc))
819 return VINF_SUCCESS;
820
821 /** todo: verify VM config. */
822
823 /*
824 * Stop the server.
825 *
826 * Note! After this point we must return VERR_TCP_SERVER_STOP, while prior
827 * to it we must not return that value!
828 */
829 RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
830
831 /*
832 * Command processing loop.
833 */
834 bool fDone = false;
835 for (;;)
836 {
837 bool fFullSync = false;
838 char szCmd[128];
839
840 rc = ftmR3TcpReadLine(pVM, szCmd, sizeof(szCmd));
841 AssertRC(rc);
842 if (RT_FAILURE(rc))
843 break;
844
845 if (!strcmp(szCmd, "mem-sync"))
846 {
847 rc = ftmR3TcpWriteACK(pVM);
848 AssertRC(rc);
849 if (RT_FAILURE(rc))
850 continue;
851
852 while (true)
853 {
854 FTMTCPHDRMEM Hdr;
855 void *pPage;
856
857 /* Read memory header. */
858 rc = RTTcpRead(pVM->ftm.s.hSocket, &Hdr, sizeof(Hdr), NULL);
859 if (RT_FAILURE(rc))
860 {
861 Log(("RTTcpRead failed with %Rrc\n", rc));
862 break;
863 }
864 pVM->ftm.s.StatReceivedMem.c += sizeof(Hdr);
865
866 if (Hdr.cb == 0)
867 break; /* end of sync. */
868
869 Assert(Hdr.cb == Hdr.cbPageRange); /** @todo uncompress */
870
871 /* Allocate memory to hold the page(s). */
872 pPage = RTMemAlloc(Hdr.cbPageRange);
873 AssertBreak(pPage);
874
875 /* Fetch the page(s). */
876 rc = RTTcpRead(pVM->ftm.s.hSocket, pPage, Hdr.cb, NULL);
877 if (RT_FAILURE(rc))
878 {
879 Log(("RTTcpRead page data (%d bytes) failed with %Rrc\n", Hdr.cb, rc));
880 break;
881 }
882 pVM->ftm.s.StatReceivedMem.c += Hdr.cb;
883
884 /* Update the guest memory of the standby VM. */
885 rc = PGMPhysWrite(pVM, Hdr.GCPhys, pPage, Hdr.cbPageRange);
886 AssertRC(rc);
887
888 RTMemFree(pPage);
889 }
890
891 rc = ftmR3TcpWriteACK(pVM);
892 AssertRC(rc);
893 }
894 else
895 if ( !strcmp(szCmd, "checkpoint")
896 || (fFullSync = true) /* intended assignment */
897 || !strcmp(szCmd, "full-sync"))
898 {
899 rc = ftmR3TcpWriteACK(pVM);
900 AssertRC(rc);
901 if (RT_FAILURE(rc))
902 continue;
903
904 RTSocketRetain(pVM->ftm.s.hSocket); /* For concurrent access by I/O thread and EMT. */
905 pVM->ftm.s.syncstate.uOffStream = 0;
906
907 pVM->ftm.s.fDeltaLoadSaveActive = (fFullSync == false);
908 rc = VMR3LoadFromStream(pVM, &g_ftmR3TcpOps, pVM, NULL, NULL);
909 pVM->ftm.s.fDeltaLoadSaveActive = false;
910 RTSocketRelease(pVM->ftm.s.hSocket);
911 AssertRC(rc);
912 if (RT_FAILURE(rc))
913 {
914 LogRel(("FTSync: VMR3LoadFromStream -> %Rrc\n", rc));
915 ftmR3TcpWriteNACK(pVM, rc);
916 continue;
917 }
918
919 /* The EOS might not have been read, make sure it is. */
920 pVM->ftm.s.syncstate.fStopReading = false;
921 size_t cbRead;
922 rc = ftmR3TcpOpRead(pVM, pVM->ftm.s.syncstate.uOffStream, szCmd, 1, &cbRead);
923 if (rc != VERR_EOF)
924 {
925 LogRel(("FTSync: Draining teleporterTcpOpRead -> %Rrc\n", rc));
926 ftmR3TcpWriteNACK(pVM, rc);
927 continue;
928 }
929
930 rc = ftmR3TcpWriteACK(pVM);
931 AssertRC(rc);
932 }
933 }
934 LogFlowFunc(("returns mRc=%Rrc\n", rc));
935 return VERR_TCP_SERVER_STOP;
936}
937
938/**
939 * Powers on the fault tolerant virtual machine.
940 *
941 * @returns VBox status code.
942 *
943 * @param pVM The VM to operate on.
944 * @param fMaster FT master or standby
945 * @param uInterval FT sync interval
946 * @param pszAddress Standby VM address
947 * @param uPort Standby VM port
948 * @param pszPassword FT password (NULL for none)
949 *
950 * @thread Any thread.
951 * @vmstate Created
952 * @vmstateto PoweringOn+Running (master), PoweringOn+Running_FT (standby)
953 */
954VMMR3DECL(int) FTMR3PowerOn(PVM pVM, bool fMaster, unsigned uInterval, const char *pszAddress, unsigned uPort, const char *pszPassword)
955{
956 int rc = VINF_SUCCESS;
957
958 VMSTATE enmVMState = VMR3GetState(pVM);
959 AssertMsgReturn(enmVMState == VMSTATE_CREATED,
960 ("%s\n", VMR3GetStateName(enmVMState)),
961 VERR_INTERNAL_ERROR_4);
962 AssertReturn(pszAddress, VERR_INVALID_PARAMETER);
963
964 if (pVM->ftm.s.uInterval)
965 pVM->ftm.s.uInterval = uInterval;
966 else
967 pVM->ftm.s.uInterval = 50; /* standard sync interval of 50ms */
968
969 pVM->ftm.s.uPort = uPort;
970 pVM->ftm.s.pszAddress = RTStrDup(pszAddress);
971 if (pszPassword)
972 pVM->ftm.s.pszPassword = RTStrDup(pszPassword);
973 if (fMaster)
974 {
975 rc = RTSemEventCreate(&pVM->ftm.s.master.hShutdownEvent);
976 if (RT_FAILURE(rc))
977 return rc;
978
979 rc = RTThreadCreate(NULL, ftmR3MasterThread, pVM,
980 0, RTTHREADTYPE_IO /* higher than normal priority */, 0, "ftmMaster");
981 if (RT_FAILURE(rc))
982 return rc;
983
984 pVM->fFaultTolerantMaster = true;
985 if (PGMIsUsingLargePages(pVM))
986 {
987 /* Must disable large page usage as 2 MB pages are too big to write monitor. */
988 LogRel(("FTSync: disabling large page usage.\n"));
989 PGMSetLargePageUsage(pVM, false);
990 }
991 /** @todo might need to disable page fusion as well */
992
993 return VMR3PowerOn(pVM);
994 }
995 else
996 {
997 /* standby */
998 rc = RTTcpServerCreateEx(pszAddress, uPort, &pVM->ftm.s.standby.hServer);
999 if (RT_FAILURE(rc))
1000 return rc;
1001 pVM->ftm.s.fIsStandbyNode = true;
1002
1003 rc = RTTcpServerListen(pVM->ftm.s.standby.hServer, ftmR3StandbyServeConnection, pVM);
1004 /** @todo deal with the exit code to check if we should activate this standby VM. */
1005
1006 if (pVM->ftm.s.standby.hServer)
1007 {
1008 RTTcpServerDestroy(pVM->ftm.s.standby.hServer);
1009 pVM->ftm.s.standby.hServer = NULL;
1010 }
1011 if (rc == VERR_TCP_SERVER_SHUTDOWN)
1012 rc = VINF_SUCCESS; /* ignore this error; the standby process was cancelled. */
1013 }
1014 return rc;
1015}
1016
1017/**
1018 * Powers off the fault tolerant virtual machine (standby).
1019 *
1020 * @returns VBox status code.
1021 *
1022 * @param pVM The VM to operate on.
1023 */
1024VMMR3DECL(int) FTMR3CancelStandby(PVM pVM)
1025{
1026 AssertReturn(!pVM->fFaultTolerantMaster, VERR_NOT_SUPPORTED);
1027 Assert(pVM->ftm.s.standby.hServer);
1028
1029 return RTTcpServerShutdown(pVM->ftm.s.standby.hServer);
1030}
1031
1032
1033/**
1034 * Performs a full sync to the standby node
1035 *
1036 * @returns VBox status code.
1037 *
1038 * @param pVM The VM to operate on.
1039 */
1040VMMR3DECL(int) FTMR3SyncState(PVM pVM)
1041{
1042 if (!pVM->fFaultTolerantMaster)
1043 return VINF_SUCCESS;
1044
1045 pVM->ftm.s.fCheckpointingActive = true;
1046 int rc = PDMCritSectEnter(&pVM->ftm.s.CritSect, VERR_SEM_BUSY);
1047 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc));
1048
1049 /* Reset the sync state. */
1050 pVM->ftm.s.syncstate.uOffStream = 0;
1051 pVM->ftm.s.syncstate.cbReadBlock = 0;
1052 pVM->ftm.s.syncstate.fStopReading = false;
1053 pVM->ftm.s.syncstate.fIOError = false;
1054 pVM->ftm.s.syncstate.fEndOfStream = false;
1055
1056 /* Sync state + changed memory with the standby node. */
1057 rc = VMR3ReqCallWait(pVM, VMCPUID_ANY, (PFNRT)ftmR3PerformSync, 2, pVM, FTMSYNCSTATE_DELTA_VM);
1058 AssertRC(rc);
1059
1060 PDMCritSectLeave(&pVM->ftm.s.CritSect);
1061 pVM->ftm.s.fCheckpointingActive = false;
1062
1063 return VERR_NOT_IMPLEMENTED;
1064}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette