VirtualBox

source: vbox/trunk/src/VBox/Devices/Storage/DrvVD.cpp@ 64683

Last change on this file since 64683 was 64671, checked in by vboxsync, 8 years ago

DrvVD: Add statistics about how many attempts were made to query a direct buffer pointer from the device/driver above and how many succeeded

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 198.4 KB
Line 
1/* $Id: DrvVD.cpp 64671 2016-11-15 12:53:00Z vboxsync $ */
2/** @file
3 * DrvVD - Generic VBox disk media driver.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_DRV_VD
23#include <VBox/vd.h>
24#include <VBox/vmm/pdmdrv.h>
25#include <VBox/vmm/pdmstorageifs.h>
26#include <VBox/vmm/pdmasynccompletion.h>
27#include <VBox/vmm/pdmblkcache.h>
28#include <VBox/vmm/ssm.h>
29#include <iprt/asm.h>
30#include <iprt/alloc.h>
31#include <iprt/assert.h>
32#include <iprt/uuid.h>
33#include <iprt/file.h>
34#include <iprt/string.h>
35#include <iprt/tcp.h>
36#include <iprt/semaphore.h>
37#include <iprt/sg.h>
38#include <iprt/poll.h>
39#include <iprt/pipe.h>
40#include <iprt/system.h>
41#include <iprt/memsafer.h>
42#include <iprt/memcache.h>
43#include <iprt/list.h>
44
45#ifdef VBOX_WITH_INIP
46/* All lwip header files are not C++ safe. So hack around this. */
47RT_C_DECLS_BEGIN
48#include <lwip/opt.h>
49#include <lwip/inet.h>
50#include <lwip/tcp.h>
51#include <lwip/sockets.h>
52# if LWIP_IPV6
53# include <lwip/inet6.h>
54# endif
55RT_C_DECLS_END
56#endif /* VBOX_WITH_INIP */
57
58#include "HBDMgmt.h"
59#include "IOBufMgmt.h"
60
61#include "VBoxDD.h"
62
63#ifdef VBOX_WITH_INIP
64/* Small hack to get at lwIP initialized status */
65extern bool DevINIPConfigured(void);
66#endif /* VBOX_WITH_INIP */
67
68
69/** @def VBOX_PERIODIC_FLUSH
70 * Enable support for periodically flushing the VDI to disk. This may prove
71 * useful for those nasty problems with the ultra-slow host filesystems.
72 * If this is enabled, it can be configured via the CFGM key
73 * "VBoxInternal/Devices/piix3ide/0/LUN#<x>/Config/FlushInterval". @verbatim<x>@endverbatim
74 * must be replaced with the correct LUN number of the disk that should
75 * do the periodic flushes. The value of the key is the number of bytes
76 * written between flushes. A value of 0 (the default) denotes no flushes. */
77#define VBOX_PERIODIC_FLUSH
78
79/** @def VBOX_IGNORE_FLUSH
80 * Enable support for ignoring VDI flush requests. This can be useful for
81 * filesystems that show bad guest IDE write performance (especially with
82 * Windows guests). NOTE that this does not disable the flushes caused by
83 * the periodic flush cache feature above.
84 * If this feature is enabled, it can be configured via the CFGM key
85 * "VBoxInternal/Devices/piix3ide/0/LUN#<x>/Config/IgnoreFlush". @verbatim<x>@endverbatim
86 * must be replaced with the correct LUN number of the disk that should
87 * ignore flush requests. The value of the key is a boolean. The default
88 * is to ignore flushes, i.e. true. */
89#define VBOX_IGNORE_FLUSH
90
91
92/*********************************************************************************************************************************
93* Defined types, constants and macros *
94*********************************************************************************************************************************/
95
96/** Converts a pointer to VBOXDISK::IMedia to a PVBOXDISK. */
97#define PDMIMEDIA_2_VBOXDISK(pInterface) \
98 ( (PVBOXDISK)((uintptr_t)pInterface - RT_OFFSETOF(VBOXDISK, IMedia)) )
99
100/** Saved state version of an I/O request .*/
101#define DRVVD_IOREQ_SAVED_STATE_VERSION UINT32_C(1)
102/** Maximum number of request errors in the release log before muting. */
103#define DRVVD_MAX_LOG_REL_ERRORS 100
104
105/** Forward declaration for the dis kcontainer. */
106typedef struct VBOXDISK *PVBOXDISK;
107
108/**
109 * VBox disk container, image information, private part.
110 */
111
112typedef struct VBOXIMAGE
113{
114 /** Pointer to next image. */
115 struct VBOXIMAGE *pNext;
116 /** Pointer to list of VD interfaces. Per-image. */
117 PVDINTERFACE pVDIfsImage;
118 /** Configuration information interface. */
119 VDINTERFACECONFIG VDIfConfig;
120 /** TCP network stack interface. */
121 VDINTERFACETCPNET VDIfTcpNet;
122 /** I/O interface. */
123 VDINTERFACEIO VDIfIo;
124} VBOXIMAGE, *PVBOXIMAGE;
125
126/**
127 * Storage backend data.
128 */
129typedef struct DRVVDSTORAGEBACKEND
130{
131 /** PDM async completion end point. */
132 PPDMASYNCCOMPLETIONENDPOINT pEndpoint;
133 /** The template. */
134 PPDMASYNCCOMPLETIONTEMPLATE pTemplate;
135 /** Event semaphore for synchronous operations. */
136 RTSEMEVENT EventSem;
137 /** Flag whether a synchronous operation is currently pending. */
138 volatile bool fSyncIoPending;
139 /** Return code of the last completed request. */
140 int rcReqLast;
141 /** Callback routine */
142 PFNVDCOMPLETED pfnCompleted;
143} DRVVDSTORAGEBACKEND, *PDRVVDSTORAGEBACKEND;
144
145/**
146 * VD I/O request state.
147 */
148typedef enum VDIOREQSTATE
149{
150 /** Invalid. */
151 VDIOREQSTATE_INVALID = 0,
152 /** The request is not in use and resides on the free list. */
153 VDIOREQSTATE_FREE,
154 /** The request was just allocated and is not active. */
155 VDIOREQSTATE_ALLOCATED,
156 /** The request was allocated and is in use. */
157 VDIOREQSTATE_ACTIVE,
158 /** The request was suspended and is not actively processed. */
159 VDIOREQSTATE_SUSPENDED,
160 /** The request is in the last step of completion and syncs memory. */
161 VDIOREQSTATE_COMPLETING,
162 /** The request completed. */
163 VDIOREQSTATE_COMPLETED,
164 /** The request was aborted but wasn't returned as complete from the storage
165 * layer below us. */
166 VDIOREQSTATE_CANCELED,
167 /** 32bit hack. */
168 VDIOREQSTATE_32BIT_HACK = 0x7fffffff
169} VDIOREQSTATE;
170
171/**
172 * VD I/O Request.
173 */
174typedef struct PDMMEDIAEXIOREQINT
175{
176 /** List node for the list of allocated requests. */
177 RTLISTNODE NdAllocatedList;
178 /** List for requests waiting for I/O memory or on the redo list. */
179 RTLISTNODE NdLstWait;
180 /** I/O request type. */
181 PDMMEDIAEXIOREQTYPE enmType;
182 /** Request state. */
183 volatile VDIOREQSTATE enmState;
184 /** I/O request ID. */
185 PDMMEDIAEXIOREQID uIoReqId;
186 /** Pointer to the disk container. */
187 PVBOXDISK pDisk;
188 /** Flags. */
189 uint32_t fFlags;
190 /** Timestamp when the request was submitted. */
191 uint64_t tsSubmit;
192 /** Type dependent data. */
193 union
194 {
195 /** Read/Write request sepcific data. */
196 struct
197 {
198 /** Start offset of the request. */
199 uint64_t offStart;
200 /** Size of the request. */
201 size_t cbReq;
202 /** Size left for this request. */
203 size_t cbReqLeft;
204 /** Size of the allocated I/O buffer. */
205 size_t cbIoBuf;
206 /** Pointer to the S/G buffer. */
207 PRTSGBUF pSgBuf;
208 /** Flag whether the pointer is a direct buffer or
209 * was allocated by us. */
210 bool fDirectBuf;
211 /** Buffer management data based on the fDirectBuf flag. */
212 union
213 {
214 /** Direct buffer. */
215 struct
216 {
217 /** Segment for the data buffer. */
218 RTSGSEG Seg;
219 /** S/G buffer structure. */
220 RTSGBUF SgBuf;
221 } Direct;
222 /** I/O buffer descriptor. */
223 IOBUFDESC IoBuf;
224 };
225 } ReadWrite;
226 /** Discard specific data. */
227 struct
228 {
229 /** Pointer to array of ranges to discard. */
230 PRTRANGE paRanges;
231 /** Number of ranges to discard. */
232 unsigned cRanges;
233 } Discard;
234 };
235 /** Allocator specific memory - variable size. */
236 uint8_t abAlloc[1];
237} PDMMEDIAEXIOREQINT;
238/** Pointer to a VD I/O request. */
239typedef PDMMEDIAEXIOREQINT *PPDMMEDIAEXIOREQINT;
240
241/**
242 * Structure for holding a list of allocated requests.
243 */
244typedef struct VDLSTIOREQALLOC
245{
246 /** Mutex protecting the table of allocated requests. */
247 RTSEMFASTMUTEX hMtxLstIoReqAlloc;
248 /** List anchor. */
249 RTLISTANCHOR LstIoReqAlloc;
250} VDLSTIOREQALLOC;
251typedef VDLSTIOREQALLOC *PVDLSTIOREQALLOC;
252
253/** Number of bins for allocated requests. */
254#define DRVVD_VDIOREQ_ALLOC_BINS 8
255
256/**
257 * VBox disk container media main structure, private part.
258 *
259 * @implements PDMIMEDIA
260 * @implements PDMIMEDIAEX
261 * @implements PDMIMOUNT
262 * @implements VDINTERFACEERROR
263 * @implements VDINTERFACETCPNET
264 * @implements VDINTERFACEASYNCIO
265 * @implements VDINTERFACECONFIG
266 */
267typedef struct VBOXDISK
268{
269 /** The VBox disk container. */
270 PVBOXHDD pDisk;
271 /** The media interface. */
272 PDMIMEDIA IMedia;
273 /** Media port. */
274 PPDMIMEDIAPORT pDrvMediaPort;
275 /** Pointer to the driver instance. */
276 PPDMDRVINS pDrvIns;
277 /** Flag whether suspend has changed image open mode to read only. */
278 bool fTempReadOnly;
279 /** Flag whether to use the runtime (true) or startup error facility. */
280 bool fErrorUseRuntime;
281 /** Pointer to list of VD interfaces. Per-disk. */
282 PVDINTERFACE pVDIfsDisk;
283 /** Error interface. */
284 VDINTERFACEERROR VDIfError;
285 /** Thread synchronization interface. */
286 VDINTERFACETHREADSYNC VDIfThreadSync;
287
288 /** Flag whether opened disk supports async I/O operations. */
289 bool fAsyncIOSupported;
290 /** Pointer to the list of data we need to keep per image. */
291 PVBOXIMAGE pImages;
292 /** Flag whether the media should allow concurrent open for writing. */
293 bool fShareable;
294 /** Flag whether a merge operation has been set up. */
295 bool fMergePending;
296 /** Synchronization to prevent destruction before merge finishes. */
297 RTSEMFASTMUTEX MergeCompleteMutex;
298 /** Synchronization between merge and other image accesses. */
299 RTSEMRW MergeLock;
300 /** Source image index for merging. */
301 unsigned uMergeSource;
302 /** Target image index for merging. */
303 unsigned uMergeTarget;
304
305 /** Flag whether boot acceleration is enabled. */
306 bool fBootAccelEnabled;
307 /** Flag whether boot acceleration is currently active. */
308 bool fBootAccelActive;
309 /** Size of the disk, used for read truncation. */
310 uint64_t cbDisk;
311 /** Size of the configured buffer. */
312 size_t cbBootAccelBuffer;
313 /** Start offset for which the buffer holds data. */
314 uint64_t offDisk;
315 /** Number of valid bytes in the buffer. */
316 size_t cbDataValid;
317 /** The disk buffer. */
318 uint8_t *pbData;
319 /** Bandwidth group the disk is assigned to. */
320 char *pszBwGroup;
321 /** Flag whether async I/O using the host cache is enabled. */
322 bool fAsyncIoWithHostCache;
323
324 /** I/O interface for a cache image. */
325 VDINTERFACEIO VDIfIoCache;
326 /** Interface list for the cache image. */
327 PVDINTERFACE pVDIfsCache;
328
329 /** The block cache handle if configured. */
330 PPDMBLKCACHE pBlkCache;
331 /** Host block device manager. */
332 HBDMGR hHbdMgr;
333
334 /** Drive type. */
335 PDMMEDIATYPE enmType;
336 /** Locked indicator. */
337 bool fLocked;
338 /** Mountable indicator. */
339 bool fMountable;
340 /** Visible to the BIOS. */
341 bool fBiosVisible;
342 /** Flag whether this medium should be presented as non rotational. */
343 bool fNonRotational;
344#ifdef VBOX_PERIODIC_FLUSH
345 /** HACK: Configuration value for number of bytes written after which to flush. */
346 uint32_t cbFlushInterval;
347 /** HACK: Current count for the number of bytes written since the last flush. */
348 uint32_t cbDataWritten;
349#endif /* VBOX_PERIODIC_FLUSH */
350#ifdef VBOX_IGNORE_FLUSH
351 /** HACK: Disable flushes for this drive. */
352 bool fIgnoreFlush;
353 /** Disable async flushes for this drive. */
354 bool fIgnoreFlushAsync;
355#endif /* VBOX_IGNORE_FLUSH */
356 /** Our mountable interface. */
357 PDMIMOUNT IMount;
358 /** Pointer to the mount notify interface above us. */
359 PPDMIMOUNTNOTIFY pDrvMountNotify;
360 /** Uuid of the drive. */
361 RTUUID Uuid;
362 /** BIOS PCHS Geometry. */
363 PDMMEDIAGEOMETRY PCHSGeometry;
364 /** BIOS LCHS Geometry. */
365 PDMMEDIAGEOMETRY LCHSGeometry;
366
367 /** Cryptographic support
368 * @{ */
369 /** Pointer to the CFGM node containing the config of the crypto filter
370 * if enable. */
371 PCFGMNODE pCfgCrypto;
372 /** Config interface for the encryption filter. */
373 VDINTERFACECONFIG VDIfCfg;
374 /** Crypto interface for the encryption filter. */
375 VDINTERFACECRYPTO VDIfCrypto;
376 /** The secret key interface used to retrieve keys. */
377 PPDMISECKEY pIfSecKey;
378 /** The secret key helper interface used to notify about missing keys. */
379 PPDMISECKEYHLP pIfSecKeyHlp;
380 /** @} */
381
382 /** @name IMEDIAEX interface support specific members.
383 * @{ */
384 /** Pointer to the IMEDIAEXPORT interface above us. */
385 PPDMIMEDIAEXPORT pDrvMediaExPort;
386 /** Our extended media interface. */
387 PDMIMEDIAEX IMediaEx;
388 /** Memory cache for the I/O requests. */
389 RTMEMCACHE hIoReqCache;
390 /** I/O buffer manager. */
391 IOBUFMGR hIoBufMgr;
392 /** Active request counter. */
393 volatile uint32_t cIoReqsActive;
394 /** Bins for allocated requests. */
395 VDLSTIOREQALLOC aIoReqAllocBins[DRVVD_VDIOREQ_ALLOC_BINS];
396 /** List of requests for I/O memory to be available - VDIOREQ::NdLstWait. */
397 RTLISTANCHOR LstIoReqIoBufWait;
398 /** Critical section protecting the list of requests waiting for I/O memory. */
399 RTCRITSECT CritSectIoReqsIoBufWait;
400 /** Number of requests waiting for a I/O buffer. */
401 volatile uint32_t cIoReqsWaiting;
402 /** Flag whether we have to resubmit requests on resume because the
403 * VM was suspended due to a recoverable I/O error.
404 */
405 volatile bool fRedo;
406 /** List of requests we have to redo. */
407 RTLISTANCHOR LstIoReqRedo;
408 /** Criticial section protecting the list of waiting requests. */
409 RTCRITSECT CritSectIoReqRedo;
410 /** Number of errors logged so far. */
411 unsigned cErrors;
412 /** @} */
413
414 /** @name Statistics.
415 * @{ */
416 /** how many attempts were made to query a direct buffer pointer from the
417 * device/driver above. */
418 STAMCOUNTER StatQueryBufAttempts;
419 /** How many attempts to query a direct buffer pointer succeeded. */
420 STAMCOUNTER StatQueryBufSuccess;
421 /** @} */
422} VBOXDISK;
423
424
425/*********************************************************************************************************************************
426* Internal Functions *
427*********************************************************************************************************************************/
428
429static DECLCALLBACK(void) drvvdMediaExIoReqComplete(void *pvUser1, void *pvUser2, int rcReq);
430static void drvvdPowerOffOrDestructOrUnmount(PPDMDRVINS pDrvIns);
431DECLINLINE(void) drvvdMediaExIoReqBufFree(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq);
432static int drvvdMediaExIoReqCompleteWorker(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, int rcReq, bool fUpNotify);
433static int drvvdMediaExIoReqReadWriteProcess(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, bool fUpNotify);
434
435/**
436 * Internal: allocate new image descriptor and put it in the list
437 */
438static PVBOXIMAGE drvvdNewImage(PVBOXDISK pThis)
439{
440 AssertPtr(pThis);
441 PVBOXIMAGE pImage = (PVBOXIMAGE)RTMemAllocZ(sizeof(VBOXIMAGE));
442 if (pImage)
443 {
444 pImage->pVDIfsImage = NULL;
445 PVBOXIMAGE *pp = &pThis->pImages;
446 while (*pp != NULL)
447 pp = &(*pp)->pNext;
448 *pp = pImage;
449 pImage->pNext = NULL;
450 }
451
452 return pImage;
453}
454
455/**
456 * Internal: free the list of images descriptors.
457 */
458static void drvvdFreeImages(PVBOXDISK pThis)
459{
460 while (pThis->pImages != NULL)
461 {
462 PVBOXIMAGE p = pThis->pImages;
463 pThis->pImages = pThis->pImages->pNext;
464 RTMemFree(p);
465 }
466}
467
468
469/**
470 * Make the image temporarily read-only.
471 *
472 * @returns VBox status code.
473 * @param pThis The driver instance data.
474 */
475static int drvvdSetReadonly(PVBOXDISK pThis)
476{
477 int rc = VINF_SUCCESS;
478 if ( pThis->pDisk
479 && !VDIsReadOnly(pThis->pDisk))
480 {
481 unsigned uOpenFlags;
482 rc = VDGetOpenFlags(pThis->pDisk, VD_LAST_IMAGE, &uOpenFlags);
483 AssertRC(rc);
484 uOpenFlags |= VD_OPEN_FLAGS_READONLY;
485 rc = VDSetOpenFlags(pThis->pDisk, VD_LAST_IMAGE, uOpenFlags);
486 AssertRC(rc);
487 pThis->fTempReadOnly = true;
488 }
489 return rc;
490}
491
492
493/**
494 * Undo the temporary read-only status of the image.
495 *
496 * @returns VBox status code.
497 * @param pThis The driver instance data.
498 */
499static int drvvdSetWritable(PVBOXDISK pThis)
500{
501 int rc = VINF_SUCCESS;
502 if (pThis->fTempReadOnly)
503 {
504 unsigned uOpenFlags;
505 rc = VDGetOpenFlags(pThis->pDisk, VD_LAST_IMAGE, &uOpenFlags);
506 AssertRC(rc);
507 uOpenFlags &= ~VD_OPEN_FLAGS_READONLY;
508 rc = VDSetOpenFlags(pThis->pDisk, VD_LAST_IMAGE, uOpenFlags);
509 if (RT_SUCCESS(rc))
510 pThis->fTempReadOnly = false;
511 else
512 AssertRC(rc);
513 }
514 return rc;
515}
516
517
518/*********************************************************************************************************************************
519* Error reporting callback *
520*********************************************************************************************************************************/
521
522static DECLCALLBACK(void) drvvdErrorCallback(void *pvUser, int rc, RT_SRC_POS_DECL,
523 const char *pszFormat, va_list va)
524{
525 PPDMDRVINS pDrvIns = (PPDMDRVINS)pvUser;
526 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
527 if (pThis->fErrorUseRuntime)
528 /* We must not pass VMSETRTERR_FLAGS_FATAL as it could lead to a
529 * deadlock: We are probably executed in a thread context != EMT
530 * and the EM thread would wait until every thread is suspended
531 * but we would wait for the EM thread ... */
532
533 PDMDrvHlpVMSetRuntimeErrorV(pDrvIns, /* fFlags=*/ 0, "DrvVD", pszFormat, va);
534 else
535 PDMDrvHlpVMSetErrorV(pDrvIns, rc, RT_SRC_POS_ARGS, pszFormat, va);
536}
537
538
539/*********************************************************************************************************************************
540* VD Async I/O interface implementation *
541*********************************************************************************************************************************/
542
543#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION
544
545static DECLCALLBACK(void) drvvdAsyncTaskCompleted(PPDMDRVINS pDrvIns, void *pvTemplateUser, void *pvUser, int rcReq)
546{
547 RT_NOREF(pDrvIns);
548 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pvTemplateUser;
549
550 LogFlowFunc(("pDrvIns=%#p pvTemplateUser=%#p pvUser=%#p rcReq=%d\n",
551 pDrvIns, pvTemplateUser, pvUser, rcReq));
552
553 if (pStorageBackend->fSyncIoPending)
554 {
555 Assert(!pvUser);
556 pStorageBackend->rcReqLast = rcReq;
557 ASMAtomicWriteBool(&pStorageBackend->fSyncIoPending, false);
558 RTSemEventSignal(pStorageBackend->EventSem);
559 }
560 else
561 {
562 int rc;
563
564 AssertPtr(pvUser);
565
566 AssertPtr(pStorageBackend->pfnCompleted);
567 rc = pStorageBackend->pfnCompleted(pvUser, rcReq);
568 AssertRC(rc);
569 }
570}
571
572static DECLCALLBACK(int) drvvdAsyncIOOpen(void *pvUser, const char *pszLocation,
573 uint32_t fOpen,
574 PFNVDCOMPLETED pfnCompleted,
575 void **ppStorage)
576{
577 PVBOXDISK pThis = (PVBOXDISK)pvUser;
578 PDRVVDSTORAGEBACKEND pStorageBackend = NULL;
579 int rc = VINF_SUCCESS;
580
581 /*
582 * Check whether the backend wants to open a block device and try to prepare it
583 * if we didn't claim it yet.
584 *
585 * We only create a block device manager on demand to not waste any resources.
586 */
587 if (HBDMgrIsBlockDevice(pszLocation))
588 {
589 if (pThis->hHbdMgr == NIL_HBDMGR)
590 rc = HBDMgrCreate(&pThis->hHbdMgr);
591
592 if ( RT_SUCCESS(rc)
593 && !HBDMgrIsBlockDeviceClaimed(pThis->hHbdMgr, pszLocation))
594 rc = HBDMgrClaimBlockDevice(pThis->hHbdMgr, pszLocation);
595
596 if (RT_FAILURE(rc))
597 return rc;
598 }
599
600 pStorageBackend = (PDRVVDSTORAGEBACKEND)RTMemAllocZ(sizeof(DRVVDSTORAGEBACKEND));
601 if (pStorageBackend)
602 {
603 pStorageBackend->fSyncIoPending = false;
604 pStorageBackend->rcReqLast = VINF_SUCCESS;
605 pStorageBackend->pfnCompleted = pfnCompleted;
606
607 rc = RTSemEventCreate(&pStorageBackend->EventSem);
608 if (RT_SUCCESS(rc))
609 {
610 rc = PDMDrvHlpAsyncCompletionTemplateCreate(pThis->pDrvIns, &pStorageBackend->pTemplate,
611 drvvdAsyncTaskCompleted, pStorageBackend, "AsyncTaskCompleted");
612 if (RT_SUCCESS(rc))
613 {
614 uint32_t fFlags = (fOpen & RTFILE_O_ACCESS_MASK) == RTFILE_O_READ
615 ? PDMACEP_FILE_FLAGS_READ_ONLY
616 : 0;
617 if (pThis->fShareable)
618 {
619 Assert((fOpen & RTFILE_O_DENY_MASK) == RTFILE_O_DENY_NONE);
620
621 fFlags |= PDMACEP_FILE_FLAGS_DONT_LOCK;
622 }
623 if (pThis->fAsyncIoWithHostCache)
624 fFlags |= PDMACEP_FILE_FLAGS_HOST_CACHE_ENABLED;
625
626 rc = PDMR3AsyncCompletionEpCreateForFile(&pStorageBackend->pEndpoint,
627 pszLocation, fFlags,
628 pStorageBackend->pTemplate);
629
630 if (RT_SUCCESS(rc))
631 {
632 if (pThis->pszBwGroup)
633 rc = PDMR3AsyncCompletionEpSetBwMgr(pStorageBackend->pEndpoint, pThis->pszBwGroup);
634
635 if (RT_SUCCESS(rc))
636 {
637 LogFlow(("drvvdAsyncIOOpen: Successfully opened '%s'; fOpen=%#x pStorage=%p\n",
638 pszLocation, fOpen, pStorageBackend));
639 *ppStorage = pStorageBackend;
640 return VINF_SUCCESS;
641 }
642
643 PDMR3AsyncCompletionEpClose(pStorageBackend->pEndpoint);
644 }
645
646 PDMR3AsyncCompletionTemplateDestroy(pStorageBackend->pTemplate);
647 }
648 RTSemEventDestroy(pStorageBackend->EventSem);
649 }
650 RTMemFree(pStorageBackend);
651 }
652 else
653 rc = VERR_NO_MEMORY;
654
655 return rc;
656}
657
658static DECLCALLBACK(int) drvvdAsyncIOClose(void *pvUser, void *pStorage)
659{
660 RT_NOREF(pvUser);
661 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
662
663 /*
664 * We don't unclaim any block devices on purpose here because they
665 * might get reopened shortly (switching to readonly during suspend)
666 *
667 * Block devices will get unclaimed during destruction of the driver.
668 */
669
670 PDMR3AsyncCompletionEpClose(pStorageBackend->pEndpoint);
671 PDMR3AsyncCompletionTemplateDestroy(pStorageBackend->pTemplate);
672 RTSemEventDestroy(pStorageBackend->EventSem);
673 RTMemFree(pStorageBackend);
674 return VINF_SUCCESS;;
675}
676
677static DECLCALLBACK(int) drvvdAsyncIOReadSync(void *pvUser, void *pStorage, uint64_t uOffset,
678 void *pvBuf, size_t cbRead, size_t *pcbRead)
679{
680 RT_NOREF(pvUser);
681 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
682 RTSGSEG DataSeg;
683 PPDMASYNCCOMPLETIONTASK pTask;
684
685 bool fOld = ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, true);
686 Assert(!fOld); NOREF(fOld);
687 DataSeg.cbSeg = cbRead;
688 DataSeg.pvSeg = pvBuf;
689
690 int rc = PDMR3AsyncCompletionEpRead(pStorageBackend->pEndpoint, uOffset, &DataSeg, 1, cbRead, NULL, &pTask);
691 if (RT_FAILURE(rc))
692 return rc;
693
694 if (rc == VINF_AIO_TASK_PENDING)
695 {
696 /* Wait */
697 rc = RTSemEventWait(pStorageBackend->EventSem, RT_INDEFINITE_WAIT);
698 AssertRC(rc);
699 }
700 else
701 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, false);
702
703 if (pcbRead)
704 *pcbRead = cbRead;
705
706 return pStorageBackend->rcReqLast;
707}
708
709static DECLCALLBACK(int) drvvdAsyncIOWriteSync(void *pvUser, void *pStorage, uint64_t uOffset,
710 const void *pvBuf, size_t cbWrite, size_t *pcbWritten)
711{
712 RT_NOREF(pvUser);
713 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
714 RTSGSEG DataSeg;
715 PPDMASYNCCOMPLETIONTASK pTask;
716
717 bool fOld = ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, true);
718 Assert(!fOld); NOREF(fOld);
719 DataSeg.cbSeg = cbWrite;
720 DataSeg.pvSeg = (void *)pvBuf;
721
722 int rc = PDMR3AsyncCompletionEpWrite(pStorageBackend->pEndpoint, uOffset, &DataSeg, 1, cbWrite, NULL, &pTask);
723 if (RT_FAILURE(rc))
724 return rc;
725
726 if (rc == VINF_AIO_TASK_PENDING)
727 {
728 /* Wait */
729 rc = RTSemEventWait(pStorageBackend->EventSem, RT_INDEFINITE_WAIT);
730 AssertRC(rc);
731 }
732 else
733 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, false);
734
735 if (pcbWritten)
736 *pcbWritten = cbWrite;
737
738 return pStorageBackend->rcReqLast;
739}
740
741static DECLCALLBACK(int) drvvdAsyncIOFlushSync(void *pvUser, void *pStorage)
742{
743 RT_NOREF(pvUser);
744 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
745 PPDMASYNCCOMPLETIONTASK pTask;
746
747 LogFlowFunc(("pvUser=%#p pStorage=%#p\n", pvUser, pStorage));
748
749 bool fOld = ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, true);
750 Assert(!fOld); NOREF(fOld);
751
752 int rc = PDMR3AsyncCompletionEpFlush(pStorageBackend->pEndpoint, NULL, &pTask);
753 if (RT_FAILURE(rc))
754 return rc;
755
756 if (rc == VINF_AIO_TASK_PENDING)
757 {
758 /* Wait */
759 LogFlowFunc(("Waiting for flush to complete\n"));
760 rc = RTSemEventWait(pStorageBackend->EventSem, RT_INDEFINITE_WAIT);
761 AssertRC(rc);
762 }
763 else
764 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, false);
765
766 return pStorageBackend->rcReqLast;
767}
768
769static DECLCALLBACK(int) drvvdAsyncIOReadAsync(void *pvUser, void *pStorage, uint64_t uOffset,
770 PCRTSGSEG paSegments, size_t cSegments,
771 size_t cbRead, void *pvCompletion,
772 void **ppTask)
773{
774 RT_NOREF(pvUser);
775 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
776
777 int rc = PDMR3AsyncCompletionEpRead(pStorageBackend->pEndpoint, uOffset, paSegments, (unsigned)cSegments, cbRead,
778 pvCompletion, (PPPDMASYNCCOMPLETIONTASK)ppTask);
779 if (rc == VINF_AIO_TASK_PENDING)
780 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
781
782 return rc;
783}
784
785static DECLCALLBACK(int) drvvdAsyncIOWriteAsync(void *pvUser, void *pStorage, uint64_t uOffset,
786 PCRTSGSEG paSegments, size_t cSegments,
787 size_t cbWrite, void *pvCompletion,
788 void **ppTask)
789{
790 RT_NOREF(pvUser);
791 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
792
793 int rc = PDMR3AsyncCompletionEpWrite(pStorageBackend->pEndpoint, uOffset, paSegments, (unsigned)cSegments, cbWrite,
794 pvCompletion, (PPPDMASYNCCOMPLETIONTASK)ppTask);
795 if (rc == VINF_AIO_TASK_PENDING)
796 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
797
798 return rc;
799}
800
801static DECLCALLBACK(int) drvvdAsyncIOFlushAsync(void *pvUser, void *pStorage,
802 void *pvCompletion, void **ppTask)
803{
804 RT_NOREF(pvUser);
805 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
806
807 int rc = PDMR3AsyncCompletionEpFlush(pStorageBackend->pEndpoint, pvCompletion,
808 (PPPDMASYNCCOMPLETIONTASK)ppTask);
809 if (rc == VINF_AIO_TASK_PENDING)
810 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
811
812 return rc;
813}
814
815static DECLCALLBACK(int) drvvdAsyncIOGetSize(void *pvUser, void *pStorage, uint64_t *pcbSize)
816{
817 RT_NOREF(pvUser);
818 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
819
820 return PDMR3AsyncCompletionEpGetSize(pStorageBackend->pEndpoint, pcbSize);
821}
822
823static DECLCALLBACK(int) drvvdAsyncIOSetSize(void *pvUser, void *pStorage, uint64_t cbSize)
824{
825 RT_NOREF(pvUser);
826 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
827
828 return PDMR3AsyncCompletionEpSetSize(pStorageBackend->pEndpoint, cbSize);
829}
830
831static DECLCALLBACK(int) drvvdAsyncIOSetAllocationSize(void *pvUser, void *pvStorage, uint64_t cbSize, uint32_t fFlags)
832{
833 RT_NOREF(pvUser, pvStorage, cbSize, fFlags);
834 return VERR_NOT_SUPPORTED;
835}
836
837#endif /* VBOX_WITH_PDM_ASYNC_COMPLETION */
838
839
840/*********************************************************************************************************************************
841* VD Thread Synchronization interface implementation *
842*********************************************************************************************************************************/
843
844static DECLCALLBACK(int) drvvdThreadStartRead(void *pvUser)
845{
846 PVBOXDISK pThis = (PVBOXDISK)pvUser;
847
848 return RTSemRWRequestRead(pThis->MergeLock, RT_INDEFINITE_WAIT);
849}
850
851static DECLCALLBACK(int) drvvdThreadFinishRead(void *pvUser)
852{
853 PVBOXDISK pThis = (PVBOXDISK)pvUser;
854
855 return RTSemRWReleaseRead(pThis->MergeLock);
856}
857
858static DECLCALLBACK(int) drvvdThreadStartWrite(void *pvUser)
859{
860 PVBOXDISK pThis = (PVBOXDISK)pvUser;
861
862 return RTSemRWRequestWrite(pThis->MergeLock, RT_INDEFINITE_WAIT);
863}
864
865static DECLCALLBACK(int) drvvdThreadFinishWrite(void *pvUser)
866{
867 PVBOXDISK pThis = (PVBOXDISK)pvUser;
868
869 return RTSemRWReleaseWrite(pThis->MergeLock);
870}
871
872
873/*********************************************************************************************************************************
874* VD Configuration interface implementation *
875*********************************************************************************************************************************/
876
877static DECLCALLBACK(bool) drvvdCfgAreKeysValid(void *pvUser, const char *pszzValid)
878{
879 return CFGMR3AreValuesValid((PCFGMNODE)pvUser, pszzValid);
880}
881
882static DECLCALLBACK(int) drvvdCfgQuerySize(void *pvUser, const char *pszName, size_t *pcb)
883{
884 return CFGMR3QuerySize((PCFGMNODE)pvUser, pszName, pcb);
885}
886
887static DECLCALLBACK(int) drvvdCfgQuery(void *pvUser, const char *pszName, char *pszString, size_t cchString)
888{
889 return CFGMR3QueryString((PCFGMNODE)pvUser, pszName, pszString, cchString);
890}
891
892static DECLCALLBACK(int) drvvdCfgQueryBytes(void *pvUser, const char *pszName, void *ppvData, size_t cbData)
893{
894 return CFGMR3QueryBytes((PCFGMNODE)pvUser, pszName, ppvData, cbData);
895}
896
897
898/*******************************************************************************
899* VD Crypto interface implementation for the encryption support *
900*******************************************************************************/
901
902static DECLCALLBACK(int) drvvdCryptoKeyRetain(void *pvUser, const char *pszId, const uint8_t **ppbKey, size_t *pcbKey)
903{
904 PVBOXDISK pThis = (PVBOXDISK)pvUser;
905 int rc = VINF_SUCCESS;
906
907 AssertPtr(pThis->pIfSecKey);
908 if (pThis->pIfSecKey)
909 rc = pThis->pIfSecKey->pfnKeyRetain(pThis->pIfSecKey, pszId, ppbKey, pcbKey);
910 else
911 rc = VERR_NOT_SUPPORTED;
912
913 return rc;
914}
915
916static DECLCALLBACK(int) drvvdCryptoKeyRelease(void *pvUser, const char *pszId)
917{
918 PVBOXDISK pThis = (PVBOXDISK)pvUser;
919 int rc = VINF_SUCCESS;
920
921 AssertPtr(pThis->pIfSecKey);
922 if (pThis->pIfSecKey)
923 rc = pThis->pIfSecKey->pfnKeyRelease(pThis->pIfSecKey, pszId);
924 else
925 rc = VERR_NOT_SUPPORTED;
926
927 return rc;
928}
929
930static DECLCALLBACK(int) drvvdCryptoKeyStorePasswordRetain(void *pvUser, const char *pszId, const char **ppszPassword)
931{
932 PVBOXDISK pThis = (PVBOXDISK)pvUser;
933 int rc = VINF_SUCCESS;
934
935 AssertPtr(pThis->pIfSecKey);
936 if (pThis->pIfSecKey)
937 rc = pThis->pIfSecKey->pfnPasswordRetain(pThis->pIfSecKey, pszId, ppszPassword);
938 else
939 rc = VERR_NOT_SUPPORTED;
940
941 return rc;
942}
943
944static DECLCALLBACK(int) drvvdCryptoKeyStorePasswordRelease(void *pvUser, const char *pszId)
945{
946 PVBOXDISK pThis = (PVBOXDISK)pvUser;
947 int rc = VINF_SUCCESS;
948
949 AssertPtr(pThis->pIfSecKey);
950 if (pThis->pIfSecKey)
951 rc = pThis->pIfSecKey->pfnPasswordRelease(pThis->pIfSecKey, pszId);
952 else
953 rc = VERR_NOT_SUPPORTED;
954
955 return rc;
956}
957
958#ifdef VBOX_WITH_INIP
959
960
961/*********************************************************************************************************************************
962* VD TCP network stack interface implementation - INIP case *
963*********************************************************************************************************************************/
964
965/**
966 * vvl: this structure duplicate meaning of sockaddr,
967 * perhaps it'd be better to get rid of it.
968 */
969typedef union INIPSOCKADDRUNION
970{
971 struct sockaddr Addr;
972 struct sockaddr_in Ipv4;
973#if LWIP_IPV6
974 struct sockaddr_in6 Ipv6;
975#endif
976} INIPSOCKADDRUNION;
977
978typedef struct INIPSOCKET
979{
980 int hSock;
981} INIPSOCKET, *PINIPSOCKET;
982
983static DECLCALLBACK(int) drvvdINIPFlush(VDSOCKET Sock);
984
985/** @interface_method_impl{VDINTERFACETCPNET,pfnSocketCreate} */
986static DECLCALLBACK(int) drvvdINIPSocketCreate(uint32_t fFlags, PVDSOCKET pSock)
987{
988 PINIPSOCKET pSocketInt = NULL;
989
990 /*
991 * The extended select method is not supported because it is impossible to wakeup
992 * the thread.
993 */
994 if (fFlags & VD_INTERFACETCPNET_CONNECT_EXTENDED_SELECT)
995 return VERR_NOT_SUPPORTED;
996
997 pSocketInt = (PINIPSOCKET)RTMemAllocZ(sizeof(INIPSOCKET));
998 if (pSocketInt)
999 {
1000 pSocketInt->hSock = INT32_MAX;
1001 *pSock = (VDSOCKET)pSocketInt;
1002 return VINF_SUCCESS;
1003 }
1004
1005 return VERR_NO_MEMORY;
1006}
1007
1008/** @interface_method_impl{VDINTERFACETCPNET,pfnSocketCreate} */
1009static DECLCALLBACK(int) drvvdINIPSocketDestroy(VDSOCKET Sock)
1010{
1011 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1012
1013 RTMemFree(pSocketInt);
1014 return VINF_SUCCESS;
1015}
1016
1017/** @interface_method_impl{VDINTERFACETCPNET,pfnClientConnect} */
1018static DECLCALLBACK(int) drvvdINIPClientConnect(VDSOCKET Sock, const char *pszAddress, uint32_t uPort,
1019 RTMSINTERVAL cMillies)
1020{
1021 int rc = VINF_SUCCESS;
1022 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1023 int iInetFamily = PF_INET;
1024 struct in_addr ip;
1025#if LWIP_IPV6
1026 ip6_addr_t ip6;
1027 RT_ZERO(ip6);
1028#endif
1029
1030 NOREF(cMillies); /* LwIP doesn't support connect timeout. */
1031 RT_ZERO(ip); /* Shut up MSC. */
1032
1033 /* Check whether lwIP is set up in this VM instance. */
1034 if (!DevINIPConfigured())
1035 {
1036 LogRelFunc(("no IP stack\n"));
1037 return VERR_NET_HOST_UNREACHABLE;
1038 }
1039 /* Resolve hostname. As there is no standard resolver for lwIP yet,
1040 * just accept numeric IP addresses for now. */
1041#if LWIP_IPV6
1042 if (inet6_aton(pszAddress, &ip6))
1043 iInetFamily = PF_INET6;
1044 else /* concatination with if */
1045#endif
1046 if (!lwip_inet_aton(pszAddress, &ip))
1047 {
1048 LogRelFunc(("cannot resolve IP %s\n", pszAddress));
1049 return VERR_NET_HOST_UNREACHABLE;
1050 }
1051 /* Create socket and connect. */
1052 int iSock = lwip_socket(iInetFamily, SOCK_STREAM, 0);
1053 if (iSock != -1)
1054 {
1055 struct sockaddr *pSockAddr = NULL;
1056 struct sockaddr_in InAddr = {0};
1057#if LWIP_IPV6
1058 struct sockaddr_in6 In6Addr = {0};
1059#endif
1060 if (iInetFamily == PF_INET)
1061 {
1062 InAddr.sin_family = AF_INET;
1063 InAddr.sin_port = htons(uPort);
1064 InAddr.sin_addr = ip;
1065 InAddr.sin_len = sizeof(InAddr);
1066 pSockAddr = (struct sockaddr *)&InAddr;
1067 }
1068#if LWIP_IPV6
1069 else
1070 {
1071 In6Addr.sin6_family = AF_INET6;
1072 In6Addr.sin6_port = htons(uPort);
1073 memcpy(&In6Addr.sin6_addr, &ip6, sizeof(ip6));
1074 In6Addr.sin6_len = sizeof(In6Addr);
1075 pSockAddr = (struct sockaddr *)&In6Addr;
1076 }
1077#endif
1078 if ( pSockAddr
1079 && !lwip_connect(iSock, pSockAddr, pSockAddr->sa_len))
1080 {
1081 pSocketInt->hSock = iSock;
1082 return VINF_SUCCESS;
1083 }
1084 rc = VERR_NET_CONNECTION_REFUSED; /** @todo real solution needed */
1085 lwip_close(iSock);
1086 }
1087 else
1088 rc = VERR_NET_CONNECTION_REFUSED; /** @todo real solution needed */
1089 return rc;
1090}
1091
1092/** @interface_method_impl{VDINTERFACETCPNET,pfnClientClose} */
1093static DECLCALLBACK(int) drvvdINIPClientClose(VDSOCKET Sock)
1094{
1095 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1096
1097 lwip_close(pSocketInt->hSock);
1098 pSocketInt->hSock = INT32_MAX;
1099 return VINF_SUCCESS; /** @todo real solution needed */
1100}
1101
1102/** @interface_method_impl{VDINTERFACETCPNET,pfnIsClientConnected} */
1103static DECLCALLBACK(bool) drvvdINIPIsClientConnected(VDSOCKET Sock)
1104{
1105 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1106
1107 return pSocketInt->hSock != INT32_MAX;
1108}
1109
1110/** @interface_method_impl{VDINTERFACETCPNET,pfnSelectOne} */
1111static DECLCALLBACK(int) drvvdINIPSelectOne(VDSOCKET Sock, RTMSINTERVAL cMillies)
1112{
1113 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1114 fd_set fdsetR;
1115 FD_ZERO(&fdsetR);
1116 FD_SET((uintptr_t)pSocketInt->hSock, &fdsetR);
1117 fd_set fdsetE = fdsetR;
1118
1119 int rc;
1120 if (cMillies == RT_INDEFINITE_WAIT)
1121 rc = lwip_select(pSocketInt->hSock + 1, &fdsetR, NULL, &fdsetE, NULL);
1122 else
1123 {
1124 struct timeval timeout;
1125 timeout.tv_sec = cMillies / 1000;
1126 timeout.tv_usec = (cMillies % 1000) * 1000;
1127 rc = lwip_select(pSocketInt->hSock + 1, &fdsetR, NULL, &fdsetE, &timeout);
1128 }
1129 if (rc > 0)
1130 return VINF_SUCCESS;
1131 if (rc == 0)
1132 return VERR_TIMEOUT;
1133 return VERR_NET_CONNECTION_REFUSED; /** @todo real solution needed */
1134}
1135
1136/** @interface_method_impl{VDINTERFACETCPNET,pfnRead} */
1137static DECLCALLBACK(int) drvvdINIPRead(VDSOCKET Sock, void *pvBuffer, size_t cbBuffer, size_t *pcbRead)
1138{
1139 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1140
1141 /* Do params checking */
1142 if (!pvBuffer || !cbBuffer)
1143 {
1144 AssertMsgFailed(("Invalid params\n"));
1145 return VERR_INVALID_PARAMETER;
1146 }
1147
1148 /*
1149 * Read loop.
1150 * If pcbRead is NULL we have to fill the entire buffer!
1151 */
1152 size_t cbRead = 0;
1153 size_t cbToRead = cbBuffer;
1154 for (;;)
1155 {
1156 /** @todo this clipping here is just in case (the send function
1157 * needed it, so I added it here, too). Didn't investigate if this
1158 * really has issues. Better be safe than sorry. */
1159 ssize_t cbBytesRead = lwip_recv(pSocketInt->hSock, (char *)pvBuffer + cbRead,
1160 RT_MIN(cbToRead, 32768), 0);
1161 if (cbBytesRead < 0)
1162 return VERR_NET_CONNECTION_REFUSED; /** @todo real solution */
1163 if (cbBytesRead == 0 && errno) /** @todo r=bird: lwip_recv will not touch errno on Windows. This may apply to other hosts as well */
1164 return VERR_NET_CONNECTION_REFUSED; /** @todo real solution */
1165 if (pcbRead)
1166 {
1167 /* return partial data */
1168 *pcbRead = cbBytesRead;
1169 break;
1170 }
1171
1172 /* read more? */
1173 cbRead += cbBytesRead;
1174 if (cbRead == cbBuffer)
1175 break;
1176
1177 /* next */
1178 cbToRead = cbBuffer - cbRead;
1179 }
1180
1181 return VINF_SUCCESS;
1182}
1183
1184/** @interface_method_impl{VDINTERFACETCPNET,pfnWrite} */
1185static DECLCALLBACK(int) drvvdINIPWrite(VDSOCKET Sock, const void *pvBuffer, size_t cbBuffer)
1186{
1187 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1188
1189 do
1190 {
1191 /** @todo lwip send only supports up to 65535 bytes in a single
1192 * send (stupid limitation buried in the code), so make sure we
1193 * don't get any wraparounds. This should be moved to DevINIP
1194 * stack interface once that's implemented. */
1195 ssize_t cbWritten = lwip_send(pSocketInt->hSock, (void *)pvBuffer,
1196 RT_MIN(cbBuffer, 32768), 0);
1197 if (cbWritten < 0)
1198 return VERR_NET_CONNECTION_REFUSED; /** @todo real solution needed */
1199 AssertMsg(cbBuffer >= (size_t)cbWritten, ("Wrote more than we requested!!! cbWritten=%d cbBuffer=%d\n",
1200 cbWritten, cbBuffer));
1201 cbBuffer -= cbWritten;
1202 pvBuffer = (const char *)pvBuffer + cbWritten;
1203 } while (cbBuffer);
1204
1205 return VINF_SUCCESS;
1206}
1207
1208/** @interface_method_impl{VDINTERFACETCPNET,pfnSgWrite} */
1209static DECLCALLBACK(int) drvvdINIPSgWrite(VDSOCKET Sock, PCRTSGBUF pSgBuf)
1210{
1211 int rc = VINF_SUCCESS;
1212
1213 /* This is an extremely crude emulation, however it's good enough
1214 * for our iSCSI code. INIP has no sendmsg(). */
1215 for (unsigned i = 0; i < pSgBuf->cSegs; i++)
1216 {
1217 rc = drvvdINIPWrite(Sock, pSgBuf->paSegs[i].pvSeg,
1218 pSgBuf->paSegs[i].cbSeg);
1219 if (RT_FAILURE(rc))
1220 break;
1221 }
1222 if (RT_SUCCESS(rc))
1223 drvvdINIPFlush(Sock);
1224
1225 return rc;
1226}
1227
1228/** @interface_method_impl{VDINTERFACETCPNET,pfnFlush} */
1229static DECLCALLBACK(int) drvvdINIPFlush(VDSOCKET Sock)
1230{
1231 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1232
1233 int fFlag = 1;
1234 lwip_setsockopt(pSocketInt->hSock, IPPROTO_TCP, TCP_NODELAY,
1235 (const char *)&fFlag, sizeof(fFlag));
1236 fFlag = 0;
1237 lwip_setsockopt(pSocketInt->hSock, IPPROTO_TCP, TCP_NODELAY,
1238 (const char *)&fFlag, sizeof(fFlag));
1239 return VINF_SUCCESS;
1240}
1241
1242/** @interface_method_impl{VDINTERFACETCPNET,pfnSetSendCoalescing} */
1243static DECLCALLBACK(int) drvvdINIPSetSendCoalescing(VDSOCKET Sock, bool fEnable)
1244{
1245 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1246
1247 int fFlag = fEnable ? 0 : 1;
1248 lwip_setsockopt(pSocketInt->hSock, IPPROTO_TCP, TCP_NODELAY,
1249 (const char *)&fFlag, sizeof(fFlag));
1250 return VINF_SUCCESS;
1251}
1252
1253/** @interface_method_impl{VDINTERFACETCPNET,pfnGetLocalAddress} */
1254static DECLCALLBACK(int) drvvdINIPGetLocalAddress(VDSOCKET Sock, PRTNETADDR pAddr)
1255{
1256 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1257 INIPSOCKADDRUNION u;
1258 socklen_t cbAddr = sizeof(u);
1259 RT_ZERO(u);
1260 if (!lwip_getsockname(pSocketInt->hSock, &u.Addr, &cbAddr))
1261 {
1262 /*
1263 * Convert the address.
1264 */
1265 if ( cbAddr == sizeof(struct sockaddr_in)
1266 && u.Addr.sa_family == AF_INET)
1267 {
1268 RT_ZERO(*pAddr);
1269 pAddr->enmType = RTNETADDRTYPE_IPV4;
1270 pAddr->uPort = RT_N2H_U16(u.Ipv4.sin_port);
1271 pAddr->uAddr.IPv4.u = u.Ipv4.sin_addr.s_addr;
1272 }
1273#if LWIP_IPV6
1274 else if ( cbAddr == sizeof(struct sockaddr_in6)
1275 && u.Addr.sa_family == AF_INET6)
1276 {
1277 RT_ZERO(*pAddr);
1278 pAddr->enmType = RTNETADDRTYPE_IPV6;
1279 pAddr->uPort = RT_N2H_U16(u.Ipv6.sin6_port);
1280 memcpy(&pAddr->uAddr.IPv6, &u.Ipv6.sin6_addr, sizeof(RTNETADDRIPV6));
1281 }
1282#endif
1283 else
1284 return VERR_NET_ADDRESS_FAMILY_NOT_SUPPORTED;
1285 return VINF_SUCCESS;
1286 }
1287 return VERR_NET_OPERATION_NOT_SUPPORTED;
1288}
1289
1290/** @interface_method_impl{VDINTERFACETCPNET,pfnGetPeerAddress} */
1291static DECLCALLBACK(int) drvvdINIPGetPeerAddress(VDSOCKET Sock, PRTNETADDR pAddr)
1292{
1293 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
1294 INIPSOCKADDRUNION u;
1295 socklen_t cbAddr = sizeof(u);
1296 RT_ZERO(u);
1297 if (!lwip_getpeername(pSocketInt->hSock, &u.Addr, &cbAddr))
1298 {
1299 /*
1300 * Convert the address.
1301 */
1302 if ( cbAddr == sizeof(struct sockaddr_in)
1303 && u.Addr.sa_family == AF_INET)
1304 {
1305 RT_ZERO(*pAddr);
1306 pAddr->enmType = RTNETADDRTYPE_IPV4;
1307 pAddr->uPort = RT_N2H_U16(u.Ipv4.sin_port);
1308 pAddr->uAddr.IPv4.u = u.Ipv4.sin_addr.s_addr;
1309 }
1310#if LWIP_IPV6
1311 else if ( cbAddr == sizeof(struct sockaddr_in6)
1312 && u.Addr.sa_family == AF_INET6)
1313 {
1314 RT_ZERO(*pAddr);
1315 pAddr->enmType = RTNETADDRTYPE_IPV6;
1316 pAddr->uPort = RT_N2H_U16(u.Ipv6.sin6_port);
1317 memcpy(&pAddr->uAddr.IPv6, &u.Ipv6.sin6_addr, sizeof(RTNETADDRIPV6));
1318 }
1319#endif
1320 else
1321 return VERR_NET_ADDRESS_FAMILY_NOT_SUPPORTED;
1322 return VINF_SUCCESS;
1323 }
1324 return VERR_NET_OPERATION_NOT_SUPPORTED;
1325}
1326
1327/** @interface_method_impl{VDINTERFACETCPNET,pfnSelectOneEx} */
1328static DECLCALLBACK(int) drvvdINIPSelectOneEx(VDSOCKET Sock, uint32_t fEvents, uint32_t *pfEvents, RTMSINTERVAL cMillies)
1329{
1330 RT_NOREF(Sock, fEvents, pfEvents, cMillies);
1331 AssertMsgFailed(("Not supported!\n"));
1332 return VERR_NOT_SUPPORTED;
1333}
1334
1335/** @interface_method_impl{VDINTERFACETCPNET,pfnPoke} */
1336static DECLCALLBACK(int) drvvdINIPPoke(VDSOCKET Sock)
1337{
1338 RT_NOREF(Sock);
1339 AssertMsgFailed(("Not supported!\n"));
1340 return VERR_NOT_SUPPORTED;
1341}
1342
1343#endif /* VBOX_WITH_INIP */
1344
1345
1346/*********************************************************************************************************************************
1347* VD TCP network stack interface implementation - Host TCP case *
1348*********************************************************************************************************************************/
1349
1350/**
1351 * Socket data.
1352 */
1353typedef struct VDSOCKETINT
1354{
1355 /** IPRT socket handle. */
1356 RTSOCKET hSocket;
1357 /** Pollset with the wakeup pipe and socket. */
1358 RTPOLLSET hPollSet;
1359 /** Pipe endpoint - read (in the pollset). */
1360 RTPIPE hPipeR;
1361 /** Pipe endpoint - write. */
1362 RTPIPE hPipeW;
1363 /** Flag whether the thread was woken up. */
1364 volatile bool fWokenUp;
1365 /** Flag whether the thread is waiting in the select call. */
1366 volatile bool fWaiting;
1367 /** Old event mask. */
1368 uint32_t fEventsOld;
1369} VDSOCKETINT, *PVDSOCKETINT;
1370
1371/** Pollset id of the socket. */
1372#define VDSOCKET_POLL_ID_SOCKET 0
1373/** Pollset id of the pipe. */
1374#define VDSOCKET_POLL_ID_PIPE 1
1375
1376/** @interface_method_impl{VDINTERFACETCPNET,pfnSocketCreate} */
1377static DECLCALLBACK(int) drvvdTcpSocketCreate(uint32_t fFlags, PVDSOCKET phVdSock)
1378{
1379 int rc = VINF_SUCCESS;
1380 int rc2 = VINF_SUCCESS;
1381 PVDSOCKETINT pSockInt = NULL;
1382
1383 pSockInt = (PVDSOCKETINT)RTMemAllocZ(sizeof(VDSOCKETINT));
1384 if (!pSockInt)
1385 return VERR_NO_MEMORY;
1386
1387 pSockInt->hSocket = NIL_RTSOCKET;
1388 pSockInt->hPollSet = NIL_RTPOLLSET;
1389 pSockInt->hPipeR = NIL_RTPIPE;
1390 pSockInt->hPipeW = NIL_RTPIPE;
1391 pSockInt->fWokenUp = false;
1392 pSockInt->fWaiting = false;
1393
1394 if (fFlags & VD_INTERFACETCPNET_CONNECT_EXTENDED_SELECT)
1395 {
1396 /* Init pipe and pollset. */
1397 rc = RTPipeCreate(&pSockInt->hPipeR, &pSockInt->hPipeW, 0);
1398 if (RT_SUCCESS(rc))
1399 {
1400 rc = RTPollSetCreate(&pSockInt->hPollSet);
1401 if (RT_SUCCESS(rc))
1402 {
1403 rc = RTPollSetAddPipe(pSockInt->hPollSet, pSockInt->hPipeR,
1404 RTPOLL_EVT_READ, VDSOCKET_POLL_ID_PIPE);
1405 if (RT_SUCCESS(rc))
1406 {
1407 *phVdSock = pSockInt;
1408 return VINF_SUCCESS;
1409 }
1410
1411 RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_PIPE);
1412 rc2 = RTPollSetDestroy(pSockInt->hPollSet);
1413 AssertRC(rc2);
1414 }
1415
1416 rc2 = RTPipeClose(pSockInt->hPipeR);
1417 AssertRC(rc2);
1418 rc2 = RTPipeClose(pSockInt->hPipeW);
1419 AssertRC(rc2);
1420 }
1421 }
1422 else
1423 {
1424 *phVdSock = pSockInt;
1425 return VINF_SUCCESS;
1426 }
1427
1428 RTMemFree(pSockInt);
1429
1430 return rc;
1431}
1432
1433/** @interface_method_impl{VDINTERFACETCPNET,pfnSocketDestroy} */
1434static DECLCALLBACK(int) drvvdTcpSocketDestroy(VDSOCKET hVdSock)
1435{
1436 int rc = VINF_SUCCESS;
1437 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1438
1439 /* Destroy the pipe and pollset if necessary. */
1440 if (pSockInt->hPollSet != NIL_RTPOLLSET)
1441 {
1442 if (pSockInt->hSocket != NIL_RTSOCKET)
1443 {
1444 rc = RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_SOCKET);
1445 Assert(RT_SUCCESS(rc) || rc == VERR_POLL_HANDLE_ID_NOT_FOUND);
1446 }
1447 rc = RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_PIPE);
1448 AssertRC(rc);
1449 rc = RTPollSetDestroy(pSockInt->hPollSet);
1450 AssertRC(rc);
1451 rc = RTPipeClose(pSockInt->hPipeR);
1452 AssertRC(rc);
1453 rc = RTPipeClose(pSockInt->hPipeW);
1454 AssertRC(rc);
1455 }
1456
1457 if (pSockInt->hSocket != NIL_RTSOCKET)
1458 rc = RTTcpClientCloseEx(pSockInt->hSocket, false /*fGracefulShutdown*/);
1459
1460 RTMemFree(pSockInt);
1461
1462 return rc;
1463}
1464
1465/** @interface_method_impl{VDINTERFACETCPNET,pfnClientConnect} */
1466static DECLCALLBACK(int) drvvdTcpClientConnect(VDSOCKET hVdSock, const char *pszAddress, uint32_t uPort,
1467 RTMSINTERVAL cMillies)
1468{
1469 int rc = VINF_SUCCESS;
1470 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1471
1472 rc = RTTcpClientConnectEx(pszAddress, uPort, &pSockInt->hSocket, cMillies, NULL);
1473 if (RT_SUCCESS(rc))
1474 {
1475 /* Add to the pollset if required. */
1476 if (pSockInt->hPollSet != NIL_RTPOLLSET)
1477 {
1478 pSockInt->fEventsOld = RTPOLL_EVT_READ | RTPOLL_EVT_WRITE | RTPOLL_EVT_ERROR;
1479
1480 rc = RTPollSetAddSocket(pSockInt->hPollSet, pSockInt->hSocket,
1481 pSockInt->fEventsOld, VDSOCKET_POLL_ID_SOCKET);
1482 }
1483
1484 if (RT_SUCCESS(rc))
1485 return VINF_SUCCESS;
1486
1487 rc = RTTcpClientCloseEx(pSockInt->hSocket, false /*fGracefulShutdown*/);
1488 }
1489
1490 return rc;
1491}
1492
1493/** @interface_method_impl{VDINTERFACETCPNET,pfnClientClose} */
1494static DECLCALLBACK(int) drvvdTcpClientClose(VDSOCKET hVdSock)
1495{
1496 int rc = VINF_SUCCESS;
1497 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1498
1499 if (pSockInt->hPollSet != NIL_RTPOLLSET)
1500 {
1501 rc = RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_SOCKET);
1502 AssertRC(rc);
1503 }
1504
1505 rc = RTTcpClientCloseEx(pSockInt->hSocket, false /*fGracefulShutdown*/);
1506 pSockInt->hSocket = NIL_RTSOCKET;
1507
1508 return rc;
1509}
1510
1511/** @interface_method_impl{VDINTERFACETCPNET,pfnIsClientConnected} */
1512static DECLCALLBACK(bool) drvvdTcpIsClientConnected(VDSOCKET hVdSock)
1513{
1514 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1515
1516 return pSockInt->hSocket != NIL_RTSOCKET;
1517}
1518
1519/** @interface_method_impl{VDINTERFACETCPNET,pfnSelectOne} */
1520static DECLCALLBACK(int) drvvdTcpSelectOne(VDSOCKET hVdSock, RTMSINTERVAL cMillies)
1521{
1522 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1523
1524 return RTTcpSelectOne(pSockInt->hSocket, cMillies);
1525}
1526
1527/** @interface_method_impl{VDINTERFACETCPNET,pfnRead} */
1528static DECLCALLBACK(int) drvvdTcpRead(VDSOCKET hVdSock, void *pvBuffer, size_t cbBuffer, size_t *pcbRead)
1529{
1530 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1531
1532 return RTTcpRead(pSockInt->hSocket, pvBuffer, cbBuffer, pcbRead);
1533}
1534
1535/** @interface_method_impl{VDINTERFACETCPNET,pfnWrite} */
1536static DECLCALLBACK(int) drvvdTcpWrite(VDSOCKET hVdSock, const void *pvBuffer, size_t cbBuffer)
1537{
1538 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1539
1540 return RTTcpWrite(pSockInt->hSocket, pvBuffer, cbBuffer);
1541}
1542
1543/** @interface_method_impl{VDINTERFACETCPNET,pfnSgWrite} */
1544static DECLCALLBACK(int) drvvdTcpSgWrite(VDSOCKET hVdSock, PCRTSGBUF pSgBuf)
1545{
1546 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1547
1548 return RTTcpSgWrite(pSockInt->hSocket, pSgBuf);
1549}
1550
1551/** @interface_method_impl{VDINTERFACETCPNET,pfnReadNB} */
1552static DECLCALLBACK(int) drvvdTcpReadNB(VDSOCKET hVdSock, void *pvBuffer, size_t cbBuffer, size_t *pcbRead)
1553{
1554 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1555
1556 return RTTcpReadNB(pSockInt->hSocket, pvBuffer, cbBuffer, pcbRead);
1557}
1558
1559/** @interface_method_impl{VDINTERFACETCPNET,pfnWriteNB} */
1560static DECLCALLBACK(int) drvvdTcpWriteNB(VDSOCKET hVdSock, const void *pvBuffer, size_t cbBuffer, size_t *pcbWritten)
1561{
1562 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1563
1564 return RTTcpWriteNB(pSockInt->hSocket, pvBuffer, cbBuffer, pcbWritten);
1565}
1566
1567/** @interface_method_impl{VDINTERFACETCPNET,pfnSgWriteNB} */
1568static DECLCALLBACK(int) drvvdTcpSgWriteNB(VDSOCKET hVdSock, PRTSGBUF pSgBuf, size_t *pcbWritten)
1569{
1570 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1571
1572 return RTTcpSgWriteNB(pSockInt->hSocket, pSgBuf, pcbWritten);
1573}
1574
1575/** @interface_method_impl{VDINTERFACETCPNET,pfnFlush} */
1576static DECLCALLBACK(int) drvvdTcpFlush(VDSOCKET hVdSock)
1577{
1578 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1579
1580 return RTTcpFlush(pSockInt->hSocket);
1581}
1582
1583/** @interface_method_impl{VDINTERFACETCPNET,pfnSetSendCoalescing} */
1584static DECLCALLBACK(int) drvvdTcpSetSendCoalescing(VDSOCKET hVdSock, bool fEnable)
1585{
1586 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1587
1588 return RTTcpSetSendCoalescing(pSockInt->hSocket, fEnable);
1589}
1590
1591/** @interface_method_impl{VDINTERFACETCPNET,pfnGetLocalAddress} */
1592static DECLCALLBACK(int) drvvdTcpGetLocalAddress(VDSOCKET hVdSock, PRTNETADDR pAddr)
1593{
1594 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1595
1596 return RTTcpGetLocalAddress(pSockInt->hSocket, pAddr);
1597}
1598
1599/** @interface_method_impl{VDINTERFACETCPNET,pfnGetPeerAddress} */
1600static DECLCALLBACK(int) drvvdTcpGetPeerAddress(VDSOCKET hVdSock, PRTNETADDR pAddr)
1601{
1602 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1603
1604 return RTTcpGetPeerAddress(pSockInt->hSocket, pAddr);
1605}
1606
1607static DECLCALLBACK(int) drvvdTcpSelectOneExPoll(VDSOCKET hVdSock, uint32_t fEvents,
1608 uint32_t *pfEvents, RTMSINTERVAL cMillies)
1609{
1610 int rc = VINF_SUCCESS;
1611 uint32_t id = 0;
1612 uint32_t fEventsRecv = 0;
1613 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1614
1615 *pfEvents = 0;
1616
1617 if ( pSockInt->fEventsOld != fEvents
1618 && pSockInt->hSocket != NIL_RTSOCKET)
1619 {
1620 uint32_t fPollEvents = 0;
1621
1622 if (fEvents & VD_INTERFACETCPNET_EVT_READ)
1623 fPollEvents |= RTPOLL_EVT_READ;
1624 if (fEvents & VD_INTERFACETCPNET_EVT_WRITE)
1625 fPollEvents |= RTPOLL_EVT_WRITE;
1626 if (fEvents & VD_INTERFACETCPNET_EVT_ERROR)
1627 fPollEvents |= RTPOLL_EVT_ERROR;
1628
1629 rc = RTPollSetEventsChange(pSockInt->hPollSet, VDSOCKET_POLL_ID_SOCKET, fPollEvents);
1630 if (RT_FAILURE(rc))
1631 return rc;
1632
1633 pSockInt->fEventsOld = fEvents;
1634 }
1635
1636 ASMAtomicXchgBool(&pSockInt->fWaiting, true);
1637 if (ASMAtomicXchgBool(&pSockInt->fWokenUp, false))
1638 {
1639 ASMAtomicXchgBool(&pSockInt->fWaiting, false);
1640 return VERR_INTERRUPTED;
1641 }
1642
1643 rc = RTPoll(pSockInt->hPollSet, cMillies, &fEventsRecv, &id);
1644 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1645
1646 ASMAtomicXchgBool(&pSockInt->fWaiting, false);
1647
1648 if (RT_SUCCESS(rc))
1649 {
1650 if (id == VDSOCKET_POLL_ID_SOCKET)
1651 {
1652 fEventsRecv &= RTPOLL_EVT_VALID_MASK;
1653
1654 if (fEventsRecv & RTPOLL_EVT_READ)
1655 *pfEvents |= VD_INTERFACETCPNET_EVT_READ;
1656 if (fEventsRecv & RTPOLL_EVT_WRITE)
1657 *pfEvents |= VD_INTERFACETCPNET_EVT_WRITE;
1658 if (fEventsRecv & RTPOLL_EVT_ERROR)
1659 *pfEvents |= VD_INTERFACETCPNET_EVT_ERROR;
1660 }
1661 else
1662 {
1663 size_t cbRead = 0;
1664 uint8_t abBuf[10];
1665 Assert(id == VDSOCKET_POLL_ID_PIPE);
1666 Assert((fEventsRecv & RTPOLL_EVT_VALID_MASK) == RTPOLL_EVT_READ);
1667
1668 /* We got interrupted, drain the pipe. */
1669 rc = RTPipeRead(pSockInt->hPipeR, abBuf, sizeof(abBuf), &cbRead);
1670 AssertRC(rc);
1671
1672 ASMAtomicXchgBool(&pSockInt->fWokenUp, false);
1673
1674 rc = VERR_INTERRUPTED;
1675 }
1676 }
1677
1678 return rc;
1679}
1680
1681/** @interface_method_impl{VDINTERFACETCPNET,pfnSelectOneEx} */
1682static DECLCALLBACK(int) drvvdTcpSelectOneExNoPoll(VDSOCKET hVdSock, uint32_t fEvents, uint32_t *pfEvents, RTMSINTERVAL cMillies)
1683{
1684 RT_NOREF(cMillies); /** @todo timeouts */
1685 int rc = VINF_SUCCESS;
1686 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1687
1688 *pfEvents = 0;
1689
1690 ASMAtomicXchgBool(&pSockInt->fWaiting, true);
1691 if (ASMAtomicXchgBool(&pSockInt->fWokenUp, false))
1692 {
1693 ASMAtomicXchgBool(&pSockInt->fWaiting, false);
1694 return VERR_INTERRUPTED;
1695 }
1696
1697 if ( pSockInt->hSocket == NIL_RTSOCKET
1698 || !fEvents)
1699 {
1700 /*
1701 * Only the pipe is configured or the caller doesn't wait for a socket event,
1702 * wait until there is something to read from the pipe.
1703 */
1704 size_t cbRead = 0;
1705 char ch = 0;
1706 rc = RTPipeReadBlocking(pSockInt->hPipeR, &ch, 1, &cbRead);
1707 if (RT_SUCCESS(rc))
1708 {
1709 Assert(cbRead == 1);
1710 rc = VERR_INTERRUPTED;
1711 ASMAtomicXchgBool(&pSockInt->fWokenUp, false);
1712 }
1713 }
1714 else
1715 {
1716 uint32_t fSelectEvents = 0;
1717
1718 if (fEvents & VD_INTERFACETCPNET_EVT_READ)
1719 fSelectEvents |= RTSOCKET_EVT_READ;
1720 if (fEvents & VD_INTERFACETCPNET_EVT_WRITE)
1721 fSelectEvents |= RTSOCKET_EVT_WRITE;
1722 if (fEvents & VD_INTERFACETCPNET_EVT_ERROR)
1723 fSelectEvents |= RTSOCKET_EVT_ERROR;
1724
1725 if (fEvents & VD_INTERFACETCPNET_HINT_INTERRUPT)
1726 {
1727 uint32_t fEventsRecv = 0;
1728
1729 /* Make sure the socket is not in the pollset. */
1730 rc = RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_SOCKET);
1731 Assert(RT_SUCCESS(rc) || rc == VERR_POLL_HANDLE_ID_NOT_FOUND);
1732
1733 for (;;)
1734 {
1735 uint32_t id = 0;
1736 rc = RTPoll(pSockInt->hPollSet, 5, &fEvents, &id);
1737 if (rc == VERR_TIMEOUT)
1738 {
1739 /* Check the socket. */
1740 rc = RTTcpSelectOneEx(pSockInt->hSocket, fSelectEvents, &fEventsRecv, 0);
1741 if (RT_SUCCESS(rc))
1742 {
1743 if (fEventsRecv & RTSOCKET_EVT_READ)
1744 *pfEvents |= VD_INTERFACETCPNET_EVT_READ;
1745 if (fEventsRecv & RTSOCKET_EVT_WRITE)
1746 *pfEvents |= VD_INTERFACETCPNET_EVT_WRITE;
1747 if (fEventsRecv & RTSOCKET_EVT_ERROR)
1748 *pfEvents |= VD_INTERFACETCPNET_EVT_ERROR;
1749 break; /* Quit */
1750 }
1751 else if (rc != VERR_TIMEOUT)
1752 break;
1753 }
1754 else if (RT_SUCCESS(rc))
1755 {
1756 size_t cbRead = 0;
1757 uint8_t abBuf[10];
1758 Assert(id == VDSOCKET_POLL_ID_PIPE);
1759 Assert((fEventsRecv & RTPOLL_EVT_VALID_MASK) == RTPOLL_EVT_READ);
1760
1761 /* We got interrupted, drain the pipe. */
1762 rc = RTPipeRead(pSockInt->hPipeR, abBuf, sizeof(abBuf), &cbRead);
1763 AssertRC(rc);
1764
1765 ASMAtomicXchgBool(&pSockInt->fWokenUp, false);
1766
1767 rc = VERR_INTERRUPTED;
1768 break;
1769 }
1770 else
1771 break;
1772 }
1773 }
1774 else /* The caller waits for a socket event. */
1775 {
1776 uint32_t fEventsRecv = 0;
1777
1778 /* Loop until we got woken up or a socket event occurred. */
1779 for (;;)
1780 {
1781 /** @todo find an adaptive wait algorithm based on the
1782 * number of wakeups in the past. */
1783 rc = RTTcpSelectOneEx(pSockInt->hSocket, fSelectEvents, &fEventsRecv, 5);
1784 if (rc == VERR_TIMEOUT)
1785 {
1786 /* Check if there is an event pending. */
1787 size_t cbRead = 0;
1788 char ch = 0;
1789 rc = RTPipeRead(pSockInt->hPipeR, &ch, 1, &cbRead);
1790 if (RT_SUCCESS(rc) && rc != VINF_TRY_AGAIN)
1791 {
1792 Assert(cbRead == 1);
1793 rc = VERR_INTERRUPTED;
1794 ASMAtomicXchgBool(&pSockInt->fWokenUp, false);
1795 break; /* Quit */
1796 }
1797 else
1798 Assert(rc == VINF_TRY_AGAIN);
1799 }
1800 else if (RT_SUCCESS(rc))
1801 {
1802 if (fEventsRecv & RTSOCKET_EVT_READ)
1803 *pfEvents |= VD_INTERFACETCPNET_EVT_READ;
1804 if (fEventsRecv & RTSOCKET_EVT_WRITE)
1805 *pfEvents |= VD_INTERFACETCPNET_EVT_WRITE;
1806 if (fEventsRecv & RTSOCKET_EVT_ERROR)
1807 *pfEvents |= VD_INTERFACETCPNET_EVT_ERROR;
1808 break; /* Quit */
1809 }
1810 else
1811 break;
1812 }
1813 }
1814 }
1815
1816 ASMAtomicXchgBool(&pSockInt->fWaiting, false);
1817
1818 return rc;
1819}
1820
1821/** @interface_method_impl{VDINTERFACETCPNET,pfnPoke} */
1822static DECLCALLBACK(int) drvvdTcpPoke(VDSOCKET hVdSock)
1823{
1824 int rc = VINF_SUCCESS;
1825 size_t cbWritten = 0;
1826 PVDSOCKETINT pSockInt = (PVDSOCKETINT)hVdSock;
1827
1828 ASMAtomicXchgBool(&pSockInt->fWokenUp, true);
1829
1830 if (ASMAtomicReadBool(&pSockInt->fWaiting))
1831 {
1832 rc = RTPipeWrite(pSockInt->hPipeW, "", 1, &cbWritten);
1833 Assert(RT_SUCCESS(rc) || cbWritten == 0);
1834 }
1835
1836 return VINF_SUCCESS;
1837}
1838
1839/**
1840 * Checks the prerequisites for encrypted I/O.
1841 *
1842 * @returns VBox status code.
1843 * @param pThis The VD driver instance data.
1844 * @param fSetError Flag whether to set a runtime error.
1845 */
1846static int drvvdKeyCheckPrereqs(PVBOXDISK pThis, bool fSetError)
1847{
1848 if ( pThis->pCfgCrypto
1849 && !pThis->pIfSecKey)
1850 {
1851 AssertPtr(pThis->pIfSecKeyHlp);
1852 pThis->pIfSecKeyHlp->pfnKeyMissingNotify(pThis->pIfSecKeyHlp);
1853
1854 if (fSetError)
1855 {
1856 int rc = PDMDrvHlpVMSetRuntimeError(pThis->pDrvIns, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "DrvVD_DEKMISSING",
1857 N_("VD: The DEK for this disk is missing"));
1858 AssertRC(rc);
1859 }
1860 return VERR_VD_DEK_MISSING;
1861 }
1862
1863 return VINF_SUCCESS;
1864}
1865
1866
1867/*********************************************************************************************************************************
1868* Media interface methods *
1869*********************************************************************************************************************************/
1870
1871/** @interface_method_impl{PDMIMEDIA,pfnRead} */
1872static DECLCALLBACK(int) drvvdRead(PPDMIMEDIA pInterface,
1873 uint64_t off, void *pvBuf, size_t cbRead)
1874{
1875 int rc = VINF_SUCCESS;
1876
1877 LogFlowFunc(("off=%#llx pvBuf=%p cbRead=%d\n", off, pvBuf, cbRead));
1878 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1879
1880 /*
1881 * Check the state.
1882 */
1883 if (!pThis->pDisk)
1884 {
1885 AssertMsgFailed(("Invalid state! Not mounted!\n"));
1886 return VERR_PDM_MEDIA_NOT_MOUNTED;
1887 }
1888
1889 rc = drvvdKeyCheckPrereqs(pThis, true /* fSetError */);
1890 if (RT_FAILURE(rc))
1891 return rc;
1892
1893 if (!pThis->fBootAccelActive)
1894 rc = VDRead(pThis->pDisk, off, pvBuf, cbRead);
1895 else
1896 {
1897 /* Can we serve the request from the buffer? */
1898 if ( off >= pThis->offDisk
1899 && off - pThis->offDisk < pThis->cbDataValid)
1900 {
1901 size_t cbToCopy = RT_MIN(cbRead, pThis->offDisk + pThis->cbDataValid - off);
1902
1903 memcpy(pvBuf, pThis->pbData + (off - pThis->offDisk), cbToCopy);
1904 cbRead -= cbToCopy;
1905 off += cbToCopy;
1906 pvBuf = (char *)pvBuf + cbToCopy;
1907 }
1908
1909 if ( cbRead > 0
1910 && cbRead < pThis->cbBootAccelBuffer)
1911 {
1912 /* Increase request to the buffer size and read. */
1913 pThis->cbDataValid = RT_MIN(pThis->cbDisk - off, pThis->cbBootAccelBuffer);
1914 pThis->offDisk = off;
1915 rc = VDRead(pThis->pDisk, off, pThis->pbData, pThis->cbDataValid);
1916 if (RT_FAILURE(rc))
1917 pThis->cbDataValid = 0;
1918 else
1919 memcpy(pvBuf, pThis->pbData, cbRead);
1920 }
1921 else if (cbRead >= pThis->cbBootAccelBuffer)
1922 {
1923 pThis->fBootAccelActive = false; /* Deactiviate */
1924 }
1925 }
1926
1927 if (RT_SUCCESS(rc))
1928 Log2(("%s: off=%#llx pvBuf=%p cbRead=%d\n%.*Rhxd\n", __FUNCTION__,
1929 off, pvBuf, cbRead, cbRead, pvBuf));
1930 LogFlowFunc(("returns %Rrc\n", rc));
1931 return rc;
1932}
1933
1934/** @interface_method_impl{PDMIMEDIA,pfnRead} */
1935static DECLCALLBACK(int) drvvdReadPcBios(PPDMIMEDIA pInterface,
1936 uint64_t off, void *pvBuf, size_t cbRead)
1937{
1938 int rc = VINF_SUCCESS;
1939
1940 LogFlowFunc(("off=%#llx pvBuf=%p cbRead=%d\n", off, pvBuf, cbRead));
1941 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1942
1943 /*
1944 * Check the state.
1945 */
1946 if (!pThis->pDisk)
1947 {
1948 AssertMsgFailed(("Invalid state! Not mounted!\n"));
1949 return VERR_PDM_MEDIA_NOT_MOUNTED;
1950 }
1951
1952 if ( pThis->pCfgCrypto
1953 && !pThis->pIfSecKey)
1954 return VERR_VD_DEK_MISSING;
1955
1956 if (!pThis->fBootAccelActive)
1957 rc = VDRead(pThis->pDisk, off, pvBuf, cbRead);
1958 else
1959 {
1960 /* Can we serve the request from the buffer? */
1961 if ( off >= pThis->offDisk
1962 && off - pThis->offDisk < pThis->cbDataValid)
1963 {
1964 size_t cbToCopy = RT_MIN(cbRead, pThis->offDisk + pThis->cbDataValid - off);
1965
1966 memcpy(pvBuf, pThis->pbData + (off - pThis->offDisk), cbToCopy);
1967 cbRead -= cbToCopy;
1968 off += cbToCopy;
1969 pvBuf = (char *)pvBuf + cbToCopy;
1970 }
1971
1972 if ( cbRead > 0
1973 && cbRead < pThis->cbBootAccelBuffer)
1974 {
1975 /* Increase request to the buffer size and read. */
1976 pThis->cbDataValid = RT_MIN(pThis->cbDisk - off, pThis->cbBootAccelBuffer);
1977 pThis->offDisk = off;
1978 rc = VDRead(pThis->pDisk, off, pThis->pbData, pThis->cbDataValid);
1979 if (RT_FAILURE(rc))
1980 pThis->cbDataValid = 0;
1981 else
1982 memcpy(pvBuf, pThis->pbData, cbRead);
1983 }
1984 else if (cbRead >= pThis->cbBootAccelBuffer)
1985 {
1986 pThis->fBootAccelActive = false; /* Deactiviate */
1987 }
1988 }
1989
1990 if (RT_SUCCESS(rc))
1991 Log2(("%s: off=%#llx pvBuf=%p cbRead=%d\n%.*Rhxd\n", __FUNCTION__,
1992 off, pvBuf, cbRead, cbRead, pvBuf));
1993 LogFlowFunc(("returns %Rrc\n", rc));
1994 return rc;
1995}
1996
1997
1998/** @interface_method_impl{PDMIMEDIA,pfnWrite} */
1999static DECLCALLBACK(int) drvvdWrite(PPDMIMEDIA pInterface,
2000 uint64_t off, const void *pvBuf,
2001 size_t cbWrite)
2002{
2003 LogFlowFunc(("off=%#llx pvBuf=%p cbWrite=%d\n", off, pvBuf, cbWrite));
2004 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2005 Log2(("%s: off=%#llx pvBuf=%p cbWrite=%d\n%.*Rhxd\n", __FUNCTION__,
2006 off, pvBuf, cbWrite, cbWrite, pvBuf));
2007
2008 /*
2009 * Check the state.
2010 */
2011 if (!pThis->pDisk)
2012 {
2013 AssertMsgFailed(("Invalid state! Not mounted!\n"));
2014 return VERR_PDM_MEDIA_NOT_MOUNTED;
2015 }
2016
2017 /* Set an FTM checkpoint as this operation changes the state permanently. */
2018 PDMDrvHlpFTSetCheckpoint(pThis->pDrvIns, FTMCHECKPOINTTYPE_STORAGE);
2019
2020 int rc = drvvdKeyCheckPrereqs(pThis, true /* fSetError */);
2021 if (RT_FAILURE(rc))
2022 return rc;
2023
2024 /* Invalidate any buffer if boot acceleration is enabled. */
2025 if (pThis->fBootAccelActive)
2026 {
2027 pThis->cbDataValid = 0;
2028 pThis->offDisk = 0;
2029 }
2030
2031 rc = VDWrite(pThis->pDisk, off, pvBuf, cbWrite);
2032#ifdef VBOX_PERIODIC_FLUSH
2033 if (pThis->cbFlushInterval)
2034 {
2035 pThis->cbDataWritten += (uint32_t)cbWrite;
2036 if (pThis->cbDataWritten > pThis->cbFlushInterval)
2037 {
2038 pThis->cbDataWritten = 0;
2039 VDFlush(pThis->pDisk);
2040 }
2041 }
2042#endif /* VBOX_PERIODIC_FLUSH */
2043
2044 LogFlowFunc(("returns %Rrc\n", rc));
2045 return rc;
2046}
2047
2048/** @interface_method_impl{PDMIMEDIA,pfnFlush} */
2049static DECLCALLBACK(int) drvvdFlush(PPDMIMEDIA pInterface)
2050{
2051 LogFlowFunc(("\n"));
2052 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2053
2054 /*
2055 * Check the state.
2056 */
2057 if (!pThis->pDisk)
2058 {
2059 AssertMsgFailed(("Invalid state! Not mounted!\n"));
2060 return VERR_PDM_MEDIA_NOT_MOUNTED;
2061 }
2062
2063#ifdef VBOX_IGNORE_FLUSH
2064 if (pThis->fIgnoreFlush)
2065 return VINF_SUCCESS;
2066#endif /* VBOX_IGNORE_FLUSH */
2067
2068 int rc = VDFlush(pThis->pDisk);
2069 LogFlowFunc(("returns %Rrc\n", rc));
2070 return rc;
2071}
2072
2073/** @interface_method_impl{PDMIMEDIA,pfnMerge} */
2074static DECLCALLBACK(int) drvvdMerge(PPDMIMEDIA pInterface,
2075 PFNSIMPLEPROGRESS pfnProgress,
2076 void *pvUser)
2077{
2078 LogFlowFunc(("\n"));
2079 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2080 int rc = VINF_SUCCESS;
2081
2082 /*
2083 * Check the state.
2084 */
2085 if (!pThis->pDisk)
2086 {
2087 AssertMsgFailed(("Invalid state! Not mounted!\n"));
2088 return VERR_PDM_MEDIA_NOT_MOUNTED;
2089 }
2090
2091 /* Note: There is an unavoidable race between destruction and another
2092 * thread invoking this function. This is handled safely and gracefully by
2093 * atomically invalidating the lock handle in drvvdDestruct. */
2094 int rc2 = RTSemFastMutexRequest(pThis->MergeCompleteMutex);
2095 AssertRC(rc2);
2096 if (RT_SUCCESS(rc2) && pThis->fMergePending)
2097 {
2098 /* Take shortcut: PFNSIMPLEPROGRESS is exactly the same type as
2099 * PFNVDPROGRESS, so there's no need for a conversion function. */
2100 /** @todo maybe introduce a conversion which limits update frequency. */
2101 PVDINTERFACE pVDIfsOperation = NULL;
2102 VDINTERFACEPROGRESS VDIfProgress;
2103 VDIfProgress.pfnProgress = pfnProgress;
2104 rc2 = VDInterfaceAdd(&VDIfProgress.Core, "DrvVD_VDIProgress", VDINTERFACETYPE_PROGRESS,
2105 pvUser, sizeof(VDINTERFACEPROGRESS), &pVDIfsOperation);
2106 AssertRC(rc2);
2107 pThis->fMergePending = false;
2108 rc = VDMerge(pThis->pDisk, pThis->uMergeSource,
2109 pThis->uMergeTarget, pVDIfsOperation);
2110 }
2111 rc2 = RTSemFastMutexRelease(pThis->MergeCompleteMutex);
2112 AssertRC(rc2);
2113 LogFlowFunc(("returns %Rrc\n", rc));
2114 return rc;
2115}
2116
2117/** @interface_method_impl{PDMIMEDIA,pfnSetSecKeyIf} */
2118static DECLCALLBACK(int) drvvdSetSecKeyIf(PPDMIMEDIA pInterface, PPDMISECKEY pIfSecKey, PPDMISECKEYHLP pIfSecKeyHlp)
2119{
2120 LogFlowFunc(("\n"));
2121 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2122 int rc = VINF_SUCCESS;
2123
2124 if (pThis->pCfgCrypto)
2125 {
2126 PVDINTERFACE pVDIfFilter = NULL;
2127
2128 pThis->pIfSecKeyHlp = pIfSecKeyHlp;
2129
2130 if ( pThis->pIfSecKey
2131 && !pIfSecKey)
2132 {
2133 /* Unload the crypto filter first to make sure it doesn't access the keys anymore. */
2134 rc = VDFilterRemove(pThis->pDisk, VD_FILTER_FLAGS_DEFAULT);
2135 AssertRC(rc);
2136
2137 pThis->pIfSecKey = NULL;
2138 }
2139
2140 if ( pIfSecKey
2141 && RT_SUCCESS(rc))
2142 {
2143 pThis->pIfSecKey = pIfSecKey;
2144
2145 rc = VDInterfaceAdd(&pThis->VDIfCfg.Core, "DrvVD_Config", VDINTERFACETYPE_CONFIG,
2146 pThis->pCfgCrypto, sizeof(VDINTERFACECONFIG), &pVDIfFilter);
2147 AssertRC(rc);
2148
2149 rc = VDInterfaceAdd(&pThis->VDIfCrypto.Core, "DrvVD_Crypto", VDINTERFACETYPE_CRYPTO,
2150 pThis, sizeof(VDINTERFACECRYPTO), &pVDIfFilter);
2151 AssertRC(rc);
2152
2153 /* Load the crypt filter plugin. */
2154 rc = VDFilterAdd(pThis->pDisk, "CRYPT", VD_FILTER_FLAGS_DEFAULT, pVDIfFilter);
2155 if (RT_FAILURE(rc))
2156 pThis->pIfSecKey = NULL;
2157 }
2158 }
2159 else
2160 rc = VERR_NOT_SUPPORTED;
2161
2162 LogFlowFunc(("returns %Rrc\n", rc));
2163 return rc;
2164}
2165
2166/** @interface_method_impl{PDMIMEDIA,pfnGetSize} */
2167static DECLCALLBACK(uint64_t) drvvdGetSize(PPDMIMEDIA pInterface)
2168{
2169 LogFlowFunc(("\n"));
2170 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2171
2172 /*
2173 * Check the state.
2174 */
2175 if (!pThis->pDisk)
2176 return 0;
2177
2178 uint64_t cb = VDGetSize(pThis->pDisk, VD_LAST_IMAGE);
2179 LogFlowFunc(("returns %#llx (%llu)\n", cb, cb));
2180 return cb;
2181}
2182
2183/** @interface_method_impl{PDMIMEDIA,pfnGetSectorSize} */
2184static DECLCALLBACK(uint32_t) drvvdGetSectorSize(PPDMIMEDIA pInterface)
2185{
2186 LogFlowFunc(("\n"));
2187 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2188
2189 /*
2190 * Check the state.
2191 */
2192 if (!pThis->pDisk)
2193 return 0;
2194
2195 uint32_t cb = VDGetSectorSize(pThis->pDisk, VD_LAST_IMAGE);
2196 LogFlowFunc(("returns %u\n", cb));
2197 return cb;
2198}
2199
2200/** @interface_method_impl{PDMIMEDIA,pfnIsReadOnly} */
2201static DECLCALLBACK(bool) drvvdIsReadOnly(PPDMIMEDIA pInterface)
2202{
2203 LogFlowFunc(("\n"));
2204 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2205
2206 /*
2207 * Check the state.
2208 */
2209 if (!pThis->pDisk)
2210 return false;
2211
2212 bool f = VDIsReadOnly(pThis->pDisk);
2213 LogFlowFunc(("returns %d\n", f));
2214 return f;
2215}
2216
2217/** @interface_method_impl{PDMIMEDIA,pfnIsNonRotational} */
2218static DECLCALLBACK(bool) drvvdIsNonRotational(PPDMIMEDIA pInterface)
2219{
2220 LogFlowFunc(("\n"));
2221 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2222
2223 return pThis->fNonRotational;
2224}
2225
2226/** @interface_method_impl{PDMIMEDIA,pfnBiosGetPCHSGeometry} */
2227static DECLCALLBACK(int) drvvdBiosGetPCHSGeometry(PPDMIMEDIA pInterface,
2228 PPDMMEDIAGEOMETRY pPCHSGeometry)
2229{
2230 LogFlowFunc(("\n"));
2231 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2232 VDGEOMETRY geo;
2233
2234 /*
2235 * Check the state.
2236 */
2237 if (!pThis->pDisk)
2238 return VERR_PDM_MEDIA_NOT_MOUNTED;
2239
2240 /*
2241 * Use configured/cached values if present.
2242 */
2243 if ( pThis->PCHSGeometry.cCylinders > 0
2244 && pThis->PCHSGeometry.cHeads > 0
2245 && pThis->PCHSGeometry.cSectors > 0)
2246 {
2247 *pPCHSGeometry = pThis->PCHSGeometry;
2248 LogFlow(("%s: returns VINF_SUCCESS {%d,%d,%d}\n", __FUNCTION__, pThis->PCHSGeometry.cCylinders, pThis->PCHSGeometry.cHeads, pThis->PCHSGeometry.cSectors));
2249 return VINF_SUCCESS;
2250 }
2251
2252 int rc = VDGetPCHSGeometry(pThis->pDisk, VD_LAST_IMAGE, &geo);
2253 if (RT_SUCCESS(rc))
2254 {
2255 pPCHSGeometry->cCylinders = geo.cCylinders;
2256 pPCHSGeometry->cHeads = geo.cHeads;
2257 pPCHSGeometry->cSectors = geo.cSectors;
2258 pThis->PCHSGeometry = *pPCHSGeometry;
2259 }
2260 else
2261 {
2262 LogFunc(("geometry not available.\n"));
2263 rc = VERR_PDM_GEOMETRY_NOT_SET;
2264 }
2265 LogFlowFunc(("returns %Rrc (CHS=%d/%d/%d)\n",
2266 rc, pPCHSGeometry->cCylinders, pPCHSGeometry->cHeads, pPCHSGeometry->cSectors));
2267 return rc;
2268}
2269
2270/** @interface_method_impl{PDMIMEDIA,pfnBiosSetPCHSGeometry} */
2271static DECLCALLBACK(int) drvvdBiosSetPCHSGeometry(PPDMIMEDIA pInterface,
2272 PCPDMMEDIAGEOMETRY pPCHSGeometry)
2273{
2274 LogFlowFunc(("CHS=%d/%d/%d\n",
2275 pPCHSGeometry->cCylinders, pPCHSGeometry->cHeads, pPCHSGeometry->cSectors));
2276 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2277 VDGEOMETRY geo;
2278
2279 /*
2280 * Check the state.
2281 */
2282 if (!pThis->pDisk)
2283 {
2284 AssertMsgFailed(("Invalid state! Not mounted!\n"));
2285 return VERR_PDM_MEDIA_NOT_MOUNTED;
2286 }
2287
2288 geo.cCylinders = pPCHSGeometry->cCylinders;
2289 geo.cHeads = pPCHSGeometry->cHeads;
2290 geo.cSectors = pPCHSGeometry->cSectors;
2291 int rc = VDSetPCHSGeometry(pThis->pDisk, VD_LAST_IMAGE, &geo);
2292 if (rc == VERR_VD_GEOMETRY_NOT_SET)
2293 rc = VERR_PDM_GEOMETRY_NOT_SET;
2294 if (RT_SUCCESS(rc))
2295 pThis->PCHSGeometry = *pPCHSGeometry;
2296 LogFlowFunc(("returns %Rrc\n", rc));
2297 return rc;
2298}
2299
2300/** @interface_method_impl{PDMIMEDIA,pfnBiosGetLCHSGeometry} */
2301static DECLCALLBACK(int) drvvdBiosGetLCHSGeometry(PPDMIMEDIA pInterface,
2302 PPDMMEDIAGEOMETRY pLCHSGeometry)
2303{
2304 LogFlowFunc(("\n"));
2305 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2306 VDGEOMETRY geo;
2307
2308 /*
2309 * Check the state.
2310 */
2311 if (!pThis->pDisk)
2312 return VERR_PDM_MEDIA_NOT_MOUNTED;
2313
2314 /*
2315 * Use configured/cached values if present.
2316 */
2317 if ( pThis->LCHSGeometry.cCylinders > 0
2318 && pThis->LCHSGeometry.cHeads > 0
2319 && pThis->LCHSGeometry.cSectors > 0)
2320 {
2321 *pLCHSGeometry = pThis->LCHSGeometry;
2322 LogFlow(("%s: returns VINF_SUCCESS {%d,%d,%d}\n", __FUNCTION__, pThis->LCHSGeometry.cCylinders, pThis->LCHSGeometry.cHeads, pThis->LCHSGeometry.cSectors));
2323 return VINF_SUCCESS;
2324 }
2325
2326 int rc = VDGetLCHSGeometry(pThis->pDisk, VD_LAST_IMAGE, &geo);
2327 if (RT_SUCCESS(rc))
2328 {
2329 pLCHSGeometry->cCylinders = geo.cCylinders;
2330 pLCHSGeometry->cHeads = geo.cHeads;
2331 pLCHSGeometry->cSectors = geo.cSectors;
2332 pThis->LCHSGeometry = *pLCHSGeometry;
2333 }
2334 else
2335 {
2336 LogFunc(("geometry not available.\n"));
2337 rc = VERR_PDM_GEOMETRY_NOT_SET;
2338 }
2339 LogFlowFunc(("returns %Rrc (CHS=%d/%d/%d)\n",
2340 rc, pLCHSGeometry->cCylinders, pLCHSGeometry->cHeads, pLCHSGeometry->cSectors));
2341 return rc;
2342}
2343
2344/** @interface_method_impl{PDMIMEDIA,pfnBiosSetLCHSGeometry} */
2345static DECLCALLBACK(int) drvvdBiosSetLCHSGeometry(PPDMIMEDIA pInterface,
2346 PCPDMMEDIAGEOMETRY pLCHSGeometry)
2347{
2348 LogFlowFunc(("CHS=%d/%d/%d\n",
2349 pLCHSGeometry->cCylinders, pLCHSGeometry->cHeads, pLCHSGeometry->cSectors));
2350 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2351 VDGEOMETRY geo;
2352
2353 /*
2354 * Check the state.
2355 */
2356 if (!pThis->pDisk)
2357 {
2358 AssertMsgFailed(("Invalid state! Not mounted!\n"));
2359 return VERR_PDM_MEDIA_NOT_MOUNTED;
2360 }
2361
2362 geo.cCylinders = pLCHSGeometry->cCylinders;
2363 geo.cHeads = pLCHSGeometry->cHeads;
2364 geo.cSectors = pLCHSGeometry->cSectors;
2365 int rc = VDSetLCHSGeometry(pThis->pDisk, VD_LAST_IMAGE, &geo);
2366 if (rc == VERR_VD_GEOMETRY_NOT_SET)
2367 rc = VERR_PDM_GEOMETRY_NOT_SET;
2368 if (RT_SUCCESS(rc))
2369 pThis->LCHSGeometry = *pLCHSGeometry;
2370 LogFlowFunc(("returns %Rrc\n", rc));
2371 return rc;
2372}
2373
2374/** @interface_method_impl{PDMIMEDIA,pfnBiosIsVisible} */
2375static DECLCALLBACK(bool) drvvdBiosIsVisible(PPDMIMEDIA pInterface)
2376{
2377 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2378 LogFlow(("drvvdBiosIsVisible: returns %d\n", pThis->fBiosVisible));
2379 return pThis->fBiosVisible;
2380}
2381
2382/** @interface_method_impl{PDMIMEDIA,pfnGetType} */
2383static DECLCALLBACK(PDMMEDIATYPE) drvvdGetType(PPDMIMEDIA pInterface)
2384{
2385 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2386 LogFlow(("drvvdBiosIsVisible: returns %d\n", pThis->fBiosVisible));
2387 return pThis->enmType;
2388}
2389
2390/** @interface_method_impl{PDMIMEDIA,pfnGetUuid} */
2391static DECLCALLBACK(int) drvvdGetUuid(PPDMIMEDIA pInterface, PRTUUID pUuid)
2392{
2393 LogFlowFunc(("\n"));
2394 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2395
2396 /*
2397 * Copy the uuid.
2398 */
2399 *pUuid = pThis->Uuid;
2400 LogFlowFunc(("returns {%RTuuid}\n", pUuid));
2401 return VINF_SUCCESS;
2402}
2403
2404static DECLCALLBACK(int) drvvdDiscard(PPDMIMEDIA pInterface, PCRTRANGE paRanges, unsigned cRanges)
2405{
2406 LogFlowFunc(("\n"));
2407 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
2408
2409 int rc = VDDiscardRanges(pThis->pDisk, paRanges, cRanges);
2410 LogFlowFunc(("returns %Rrc\n", rc));
2411 return rc;
2412}
2413
2414/* -=-=-=-=- IMount -=-=-=-=- */
2415
2416/** @interface_method_impl{PDMIMOUNT,pfnUnmount} */
2417static DECLCALLBACK(int) drvvdUnmount(PPDMIMOUNT pInterface, bool fForce, bool fEject)
2418{
2419 RT_NOREF(fEject);
2420 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMount);
2421
2422 /*
2423 * Validate state.
2424 */
2425 if (!pThis->pDisk)
2426 {
2427 Log(("drvvdUnmount: Not mounted\n"));
2428 return VERR_PDM_MEDIA_NOT_MOUNTED;
2429 }
2430 if (pThis->fLocked && !fForce)
2431 {
2432 Log(("drvvdUnmount: Locked\n"));
2433 return VERR_PDM_MEDIA_LOCKED;
2434 }
2435
2436 /* Media is no longer locked even if it was previously. */
2437 pThis->fLocked = false;
2438 drvvdPowerOffOrDestructOrUnmount(pThis->pDrvIns);
2439
2440 /*
2441 * Notify driver/device above us.
2442 */
2443 if (pThis->pDrvMountNotify)
2444 pThis->pDrvMountNotify->pfnUnmountNotify(pThis->pDrvMountNotify);
2445 Log(("drvblockUnmount: success\n"));
2446 return VINF_SUCCESS;
2447}
2448
2449
2450/** @interface_method_impl{PDMIMOUNT,pfnIsMounted} */
2451static DECLCALLBACK(bool) drvvdIsMounted(PPDMIMOUNT pInterface)
2452{
2453 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMount);
2454 return pThis->pDisk != NULL;
2455}
2456
2457/** @interface_method_impl{PDMIMOUNT,pfnLock} */
2458static DECLCALLBACK(int) drvvdLock(PPDMIMOUNT pInterface)
2459{
2460 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMount);
2461 Log(("drvblockLock: %d -> %d\n", pThis->fLocked, true));
2462 pThis->fLocked = true;
2463 return VINF_SUCCESS;
2464}
2465
2466/** @interface_method_impl{PDMIMOUNT,pfnUnlock} */
2467static DECLCALLBACK(int) drvvdUnlock(PPDMIMOUNT pInterface)
2468{
2469 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMount);
2470 Log(("drvblockUnlock: %d -> %d\n", pThis->fLocked, false));
2471 pThis->fLocked = false;
2472 return VINF_SUCCESS;
2473}
2474
2475/** @interface_method_impl{PDMIMOUNT,pfnIsLocked} */
2476static DECLCALLBACK(bool) drvvdIsLocked(PPDMIMOUNT pInterface)
2477{
2478 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMount);
2479 return pThis->fLocked;
2480}
2481
2482
2483static DECLCALLBACK(void) drvvdBlkCacheReqComplete(void *pvUser1, void *pvUser2, int rcReq)
2484{
2485 PVBOXDISK pThis = (PVBOXDISK)pvUser1;
2486
2487 AssertPtr(pThis->pBlkCache);
2488 PDMR3BlkCacheIoXferComplete(pThis->pBlkCache, (PPDMBLKCACHEIOXFER)pvUser2, rcReq);
2489}
2490
2491
2492/** @copydoc FNPDMBLKCACHEXFERCOMPLETEDRV */
2493static DECLCALLBACK(void) drvvdBlkCacheXferCompleteIoReq(PPDMDRVINS pDrvIns, void *pvUser, int rc)
2494{
2495 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
2496
2497 drvvdMediaExIoReqCompleteWorker(pThis, (PPDMMEDIAEXIOREQINT)pvUser, rc, true /* fUpNotify */);
2498}
2499
2500/** @copydoc FNPDMBLKCACHEXFERENQUEUEDRV */
2501static DECLCALLBACK(int) drvvdBlkCacheXferEnqueue(PPDMDRVINS pDrvIns,
2502 PDMBLKCACHEXFERDIR enmXferDir,
2503 uint64_t off, size_t cbXfer,
2504 PCRTSGBUF pSgBuf, PPDMBLKCACHEIOXFER hIoXfer)
2505{
2506 int rc = VINF_SUCCESS;
2507 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
2508
2509 Assert (!pThis->pCfgCrypto);
2510
2511 switch (enmXferDir)
2512 {
2513 case PDMBLKCACHEXFERDIR_READ:
2514 rc = VDAsyncRead(pThis->pDisk, off, cbXfer, pSgBuf, drvvdBlkCacheReqComplete,
2515 pThis, hIoXfer);
2516 break;
2517 case PDMBLKCACHEXFERDIR_WRITE:
2518 rc = VDAsyncWrite(pThis->pDisk, off, cbXfer, pSgBuf, drvvdBlkCacheReqComplete,
2519 pThis, hIoXfer);
2520 break;
2521 case PDMBLKCACHEXFERDIR_FLUSH:
2522 rc = VDAsyncFlush(pThis->pDisk, drvvdBlkCacheReqComplete, pThis, hIoXfer);
2523 break;
2524 default:
2525 AssertMsgFailed(("Invalid transfer type %d\n", enmXferDir));
2526 rc = VERR_INVALID_PARAMETER;
2527 }
2528
2529 if (rc == VINF_VD_ASYNC_IO_FINISHED)
2530 PDMR3BlkCacheIoXferComplete(pThis->pBlkCache, hIoXfer, VINF_SUCCESS);
2531 else if (RT_FAILURE(rc) && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2532 PDMR3BlkCacheIoXferComplete(pThis->pBlkCache, hIoXfer, rc);
2533
2534 return VINF_SUCCESS;
2535}
2536
2537/** @copydoc FNPDMBLKCACHEXFERENQUEUEDISCARDDRV */
2538static DECLCALLBACK(int) drvvdBlkCacheXferEnqueueDiscard(PPDMDRVINS pDrvIns, PCRTRANGE paRanges,
2539 unsigned cRanges, PPDMBLKCACHEIOXFER hIoXfer)
2540{
2541 int rc = VINF_SUCCESS;
2542 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
2543
2544 rc = VDAsyncDiscardRanges(pThis->pDisk, paRanges, cRanges,
2545 drvvdBlkCacheReqComplete, pThis, hIoXfer);
2546
2547 if (rc == VINF_VD_ASYNC_IO_FINISHED)
2548 PDMR3BlkCacheIoXferComplete(pThis->pBlkCache, hIoXfer, VINF_SUCCESS);
2549 else if (RT_FAILURE(rc) && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2550 PDMR3BlkCacheIoXferComplete(pThis->pBlkCache, hIoXfer, rc);
2551
2552 return VINF_SUCCESS;
2553}
2554
2555
2556/*********************************************************************************************************************************
2557* Extended media interface methods *
2558*********************************************************************************************************************************/
2559
2560static void drvvdMediaExIoReqWarningDiskFull(PPDMDRVINS pDrvIns)
2561{
2562 int rc;
2563 LogRel(("VD#%u: Host disk full\n", pDrvIns->iInstance));
2564 rc = PDMDrvHlpVMSetRuntimeError(pDrvIns, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "DrvVD_DISKFULL",
2565 N_("Host system reported disk full. VM execution is suspended. You can resume after freeing some space"));
2566 AssertRC(rc);
2567}
2568
2569static void drvvdMediaExIoReqWarningFileTooBig(PPDMDRVINS pDrvIns)
2570{
2571 int rc;
2572 LogRel(("VD#%u: File too big\n", pDrvIns->iInstance));
2573 rc = PDMDrvHlpVMSetRuntimeError(pDrvIns, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "DrvVD_FILETOOBIG",
2574 N_("Host system reported that the file size limit of the host file system has been exceeded. VM execution is suspended. You need to move your virtual hard disk to a filesystem which allows bigger files"));
2575 AssertRC(rc);
2576}
2577
2578static void drvvdMediaExIoReqWarningISCSI(PPDMDRVINS pDrvIns)
2579{
2580 int rc;
2581 LogRel(("VD#%u: iSCSI target unavailable\n", pDrvIns->iInstance));
2582 rc = PDMDrvHlpVMSetRuntimeError(pDrvIns, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "DrvVD_ISCSIDOWN",
2583 N_("The iSCSI target has stopped responding. VM execution is suspended. You can resume when it is available again"));
2584 AssertRC(rc);
2585}
2586
2587static void drvvdMediaExIoReqWarningDekMissing(PPDMDRVINS pDrvIns)
2588{
2589 LogRel(("VD#%u: DEK is missing\n", pDrvIns->iInstance));
2590 int rc = PDMDrvHlpVMSetRuntimeError(pDrvIns, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "DrvVD_DEKMISSING",
2591 N_("VD: The DEK for this disk is missing"));
2592 AssertRC(rc);
2593}
2594
2595/**
2596 * Checks whether a given status code indicates a recoverable error
2597 * suspending the VM if it is.
2598 *
2599 * @returns Flag indicating whether the status code is a recoverable error
2600 * (full disk, broken network connection).
2601 * @param pThis VBox disk container instance data.
2602 * @param rc Status code to check.
2603 */
2604bool drvvdMediaExIoReqIsRedoSetWarning(PVBOXDISK pThis, int rc)
2605{
2606 if (rc == VERR_DISK_FULL)
2607 {
2608 if (ASMAtomicCmpXchgBool(&pThis->fRedo, true, false))
2609 drvvdMediaExIoReqWarningDiskFull(pThis->pDrvIns);
2610 return true;
2611 }
2612 if (rc == VERR_FILE_TOO_BIG)
2613 {
2614 if (ASMAtomicCmpXchgBool(&pThis->fRedo, true, false))
2615 drvvdMediaExIoReqWarningFileTooBig(pThis->pDrvIns);
2616 return true;
2617 }
2618 if (rc == VERR_BROKEN_PIPE || rc == VERR_NET_CONNECTION_REFUSED)
2619 {
2620 /* iSCSI connection abort (first error) or failure to reestablish
2621 * connection (second error). Pause VM. On resume we'll retry. */
2622 if (ASMAtomicCmpXchgBool(&pThis->fRedo, true, false))
2623 drvvdMediaExIoReqWarningISCSI(pThis->pDrvIns);
2624 return true;
2625 }
2626 if (rc == VERR_VD_DEK_MISSING)
2627 {
2628 /* Error message already set. */
2629 if (ASMAtomicCmpXchgBool(&pThis->fRedo, true, false))
2630 drvvdMediaExIoReqWarningDekMissing(pThis->pDrvIns);
2631 return true;
2632 }
2633
2634 return false;
2635}
2636
2637/**
2638 * Syncs the memory buffers between the I/O request allocator and the internal buffer.
2639 *
2640 * @returns VBox status code.
2641 * @param pThis VBox disk container instance data.
2642 * @param pIoReq I/O request to sync.
2643 * @param fToIoBuf Flag indicating the sync direction.
2644 * true to copy data from the allocators buffer to our internal buffer.
2645 * false for the other direction.
2646 */
2647DECLINLINE(int) drvvdMediaExIoReqBufSync(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, bool fToIoBuf)
2648{
2649 int rc = VINF_SUCCESS;
2650
2651 Assert(pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ || pIoReq->enmType == PDMMEDIAEXIOREQTYPE_WRITE);
2652 Assert(pIoReq->ReadWrite.cbIoBuf > 0);
2653
2654 if (!pIoReq->ReadWrite.fDirectBuf)
2655 {
2656 /* Make sure the buffer is reset. */
2657 RTSgBufReset(&pIoReq->ReadWrite.IoBuf.SgBuf);
2658
2659 size_t const offSrc = pIoReq->ReadWrite.cbReq - pIoReq->ReadWrite.cbReqLeft;
2660 Assert((uint32_t)offSrc == offSrc);
2661 if (fToIoBuf)
2662 rc = pThis->pDrvMediaExPort->pfnIoReqCopyToBuf(pThis->pDrvMediaExPort, pIoReq, &pIoReq->abAlloc[0], (uint32_t)offSrc,
2663 &pIoReq->ReadWrite.IoBuf.SgBuf,
2664 RT_MIN(pIoReq->ReadWrite.cbIoBuf, pIoReq->ReadWrite.cbReqLeft));
2665 else
2666 rc = pThis->pDrvMediaExPort->pfnIoReqCopyFromBuf(pThis->pDrvMediaExPort, pIoReq, &pIoReq->abAlloc[0], (uint32_t)offSrc,
2667 &pIoReq->ReadWrite.IoBuf.SgBuf,
2668 (uint32_t)RT_MIN(pIoReq->ReadWrite.cbIoBuf, pIoReq->ReadWrite.cbReqLeft));
2669
2670 RTSgBufReset(&pIoReq->ReadWrite.IoBuf.SgBuf);
2671 }
2672 return rc;
2673}
2674
2675/**
2676 * Hashes the I/O request ID to an index for the allocated I/O request bin.
2677 */
2678DECLINLINE(unsigned) drvvdMediaExIoReqIdHash(PDMMEDIAEXIOREQID uIoReqId)
2679{
2680 return uIoReqId % DRVVD_VDIOREQ_ALLOC_BINS; /** @todo Find something better? */
2681}
2682
2683/**
2684 * Inserts the given I/O request in to the list of allocated I/O requests.
2685 *
2686 * @returns VBox status code.
2687 * @param pThis VBox disk container instance data.
2688 * @param pIoReq I/O request to insert.
2689 */
2690static int drvvdMediaExIoReqInsert(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq)
2691{
2692 int rc = VINF_SUCCESS;
2693 unsigned idxBin = drvvdMediaExIoReqIdHash(pIoReq->uIoReqId);
2694
2695 rc = RTSemFastMutexRequest(pThis->aIoReqAllocBins[idxBin].hMtxLstIoReqAlloc);
2696 if (RT_SUCCESS(rc))
2697 {
2698 /* Search for conflicting I/O request ID. */
2699 PPDMMEDIAEXIOREQINT pIt;
2700 RTListForEach(&pThis->aIoReqAllocBins[idxBin].LstIoReqAlloc, pIt, PDMMEDIAEXIOREQINT, NdAllocatedList)
2701 {
2702 if (RT_UNLIKELY(pIt->uIoReqId == pIoReq->uIoReqId))
2703 {
2704 rc = VERR_PDM_MEDIAEX_IOREQID_CONFLICT;
2705 break;
2706 }
2707 }
2708 if (RT_SUCCESS(rc))
2709 RTListAppend(&pThis->aIoReqAllocBins[idxBin].LstIoReqAlloc, &pIoReq->NdAllocatedList);
2710 RTSemFastMutexRelease(pThis->aIoReqAllocBins[idxBin].hMtxLstIoReqAlloc);
2711 }
2712
2713 return rc;
2714}
2715
2716/**
2717 * Removes the given I/O request from the list of allocated I/O requests.
2718 *
2719 * @returns VBox status code.
2720 * @param pThis VBox disk container instance data.
2721 * @param pIoReq I/O request to insert.
2722 */
2723static int drvvdMediaExIoReqRemove(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq)
2724{
2725 int rc = VINF_SUCCESS;
2726 unsigned idxBin = drvvdMediaExIoReqIdHash(pIoReq->uIoReqId);
2727
2728 rc = RTSemFastMutexRequest(pThis->aIoReqAllocBins[idxBin].hMtxLstIoReqAlloc);
2729 if (RT_SUCCESS(rc))
2730 {
2731 RTListNodeRemove(&pIoReq->NdAllocatedList);
2732 RTSemFastMutexRelease(pThis->aIoReqAllocBins[idxBin].hMtxLstIoReqAlloc);
2733 }
2734
2735 return rc;
2736}
2737
2738/**
2739 * Retires a given I/O request marking it as complete and notiyfing the
2740 * device/driver above about the completion if requested.
2741 *
2742 * @returns VBox status code.
2743 * @param pThis VBox disk container instance data.
2744 * @param pIoReq I/O request to complete.
2745 * @param rcReq The status code the request completed with.
2746 * @param fUpNotify Flag whether to notify the driver/device above us about the completion.
2747 */
2748static void drvvdMediaExIoReqRetire(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, int rcReq, bool fUpNotify)
2749{
2750 LogFlowFunc(("pThis=%#p pIoReq=%#p rcReq=%Rrc fUpNotify=%RTbool\n",
2751 pThis, pIoReq, rcReq, fUpNotify));
2752
2753 bool fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_COMPLETING, VDIOREQSTATE_ACTIVE);
2754 if (fXchg)
2755 ASMAtomicDecU32(&pThis->cIoReqsActive);
2756 else
2757 {
2758 Assert(pIoReq->enmState == VDIOREQSTATE_CANCELED);
2759 rcReq = VERR_PDM_MEDIAEX_IOREQ_CANCELED;
2760 }
2761
2762 ASMAtomicXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_COMPLETED);
2763 drvvdMediaExIoReqBufFree(pThis, pIoReq);
2764
2765 /*
2766 * Leave a release log entry if the request was active for more than 25 seconds
2767 * (30 seconds is the timeout of the guest).
2768 */
2769 uint64_t tsNow = RTTimeMilliTS();
2770 if (tsNow - pIoReq->tsSubmit >= 25 * 1000)
2771 {
2772 const char *pcszReq = NULL;
2773
2774 switch (pIoReq->enmType)
2775 {
2776 case PDMMEDIAEXIOREQTYPE_READ:
2777 pcszReq = "Read";
2778 break;
2779 case PDMMEDIAEXIOREQTYPE_WRITE:
2780 pcszReq = "Write";
2781 break;
2782 case PDMMEDIAEXIOREQTYPE_FLUSH:
2783 pcszReq = "Flush";
2784 break;
2785 case PDMMEDIAEXIOREQTYPE_DISCARD:
2786 pcszReq = "Discard";
2787 break;
2788 default:
2789 pcszReq = "<Invalid>";
2790 }
2791
2792 LogRel(("VD#%u: %s request was active for %llu seconds\n",
2793 pThis->pDrvIns->iInstance, pcszReq, (tsNow - pIoReq->tsSubmit) / 1000));
2794 }
2795
2796 if (RT_FAILURE(rcReq))
2797 {
2798 /* Log the error. */
2799 if (pThis->cErrors++ < DRVVD_MAX_LOG_REL_ERRORS)
2800 {
2801 if (rcReq == VERR_PDM_MEDIAEX_IOREQ_CANCELED)
2802 {
2803 if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_FLUSH)
2804 LogRel(("VD#%u: Aborted flush returned rc=%Rrc\n",
2805 pThis->pDrvIns->iInstance, rcReq));
2806 else if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_DISCARD)
2807 LogRel(("VD#%u: Aborted discard returned rc=%Rrc\n",
2808 pThis->pDrvIns->iInstance, rcReq));
2809 else
2810 LogRel(("VD#%u: Aborted %s (%u bytes left) returned rc=%Rrc\n",
2811 pThis->pDrvIns->iInstance,
2812 pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ
2813 ? "read"
2814 : "write",
2815 pIoReq->ReadWrite.cbReqLeft, rcReq));
2816 }
2817 else
2818 {
2819 if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_FLUSH)
2820 LogRel(("VD#%u: Flush returned rc=%Rrc\n",
2821 pThis->pDrvIns->iInstance, rcReq));
2822 else if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_DISCARD)
2823 LogRel(("VD#%u: Discard returned rc=%Rrc\n",
2824 pThis->pDrvIns->iInstance, rcReq));
2825 else
2826 LogRel(("VD#%u: %s (%u bytes left) returned rc=%Rrc\n",
2827 pThis->pDrvIns->iInstance,
2828 pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ
2829 ? "Read"
2830 : "Write",
2831 pIoReq->ReadWrite.cbReqLeft, rcReq));
2832 }
2833 }
2834 }
2835
2836 if (fUpNotify)
2837 {
2838 int rc = pThis->pDrvMediaExPort->pfnIoReqCompleteNotify(pThis->pDrvMediaExPort,
2839 pIoReq, &pIoReq->abAlloc[0], rcReq);
2840 AssertRC(rc);
2841 }
2842
2843 LogFlowFunc(("returns\n"));
2844}
2845
2846/**
2847 * I/O request completion worker.
2848 *
2849 * @returns VBox status code.
2850 * @param pThis VBox disk container instance data.
2851 * @param pIoReq I/O request to complete.
2852 * @param rcReq The status code the request completed with.
2853 * @param fUpNotify Flag whether to notify the driver/device above us about the completion.
2854 */
2855static int drvvdMediaExIoReqCompleteWorker(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, int rcReq, bool fUpNotify)
2856{
2857 LogFlowFunc(("pThis=%#p pIoReq=%#p rcReq=%Rrc fUpNotify=%RTbool\n",
2858 pThis, pIoReq, rcReq, fUpNotify));
2859
2860 /*
2861 * For a read we need to sync the memory before continuing to process
2862 * the request further.
2863 */
2864 if ( RT_SUCCESS(rcReq)
2865 && pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ)
2866 rcReq = drvvdMediaExIoReqBufSync(pThis, pIoReq, false /* fToIoBuf */);
2867
2868 /*
2869 * When the request owner instructs us to handle recoverable errors like full disks
2870 * do it. Mark the request as suspended, notify the owner and put the request on the
2871 * redo list.
2872 */
2873 if ( RT_FAILURE(rcReq)
2874 && (pIoReq->fFlags & PDMIMEDIAEX_F_SUSPEND_ON_RECOVERABLE_ERR)
2875 && drvvdMediaExIoReqIsRedoSetWarning(pThis, rcReq))
2876 {
2877 bool fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_SUSPENDED, VDIOREQSTATE_ACTIVE);
2878 if (fXchg)
2879 {
2880 /* Put on redo list and adjust active request counter. */
2881 RTCritSectEnter(&pThis->CritSectIoReqRedo);
2882 RTListAppend(&pThis->LstIoReqRedo, &pIoReq->NdLstWait);
2883 RTCritSectLeave(&pThis->CritSectIoReqRedo);
2884 ASMAtomicDecU32(&pThis->cIoReqsActive);
2885 pThis->pDrvMediaExPort->pfnIoReqStateChanged(pThis->pDrvMediaExPort, pIoReq, &pIoReq->abAlloc[0],
2886 PDMMEDIAEXIOREQSTATE_SUSPENDED);
2887 rcReq = VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS;
2888 }
2889 else
2890 {
2891 /* Request was canceled inbetween, so don't care and notify the owner about the completed request. */
2892 Assert(pIoReq->enmState == VDIOREQSTATE_CANCELED);
2893 drvvdMediaExIoReqRetire(pThis, pIoReq, rcReq, fUpNotify);
2894 }
2895 }
2896 else
2897 {
2898 /* Adjust the remaining amount to transfer. */
2899 Assert(pIoReq->ReadWrite.cbIoBuf > 0);
2900
2901 size_t cbReqIo = RT_MIN(pIoReq->ReadWrite.cbReqLeft, pIoReq->ReadWrite.cbIoBuf);
2902 pIoReq->ReadWrite.offStart += cbReqIo;
2903 pIoReq->ReadWrite.cbReqLeft -= cbReqIo;
2904
2905 if ( RT_FAILURE(rcReq)
2906 || !pIoReq->ReadWrite.cbReqLeft
2907 || ( pIoReq->enmType != PDMMEDIAEXIOREQTYPE_READ
2908 && pIoReq->enmType != PDMMEDIAEXIOREQTYPE_WRITE))
2909 drvvdMediaExIoReqRetire(pThis, pIoReq, rcReq, fUpNotify);
2910 else
2911 drvvdMediaExIoReqReadWriteProcess(pThis, pIoReq, fUpNotify);
2912 }
2913
2914 LogFlowFunc(("returns %Rrc\n", rcReq));
2915 return rcReq;
2916}
2917
2918
2919/**
2920 * Allocates a memory buffer suitable for I/O for the given request.
2921 *
2922 * @returns VBox status code.
2923 * @retval VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS if there is no I/O memory available to allocate and
2924 * the request was placed on a waiting list.
2925 * @param pThis VBox disk container instance data.
2926 * @param pIoReq I/O request to allocate memory for.
2927 * @param cb Size of the buffer.
2928 */
2929DECLINLINE(int) drvvdMediaExIoReqBufAlloc(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, size_t cb)
2930{
2931 int rc = VERR_NOT_SUPPORTED;
2932 LogFlowFunc(("pThis=%#p pIoReq=%#p cb=%zu\n", pThis, pIoReq, cb));
2933
2934 if ( cb == _4K
2935 && pThis->pDrvMediaExPort->pfnIoReqQueryBuf)
2936 {
2937 /* Try to get a direct pointer to the buffer first. */
2938 void *pvBuf = NULL;
2939 size_t cbBuf = 0;
2940
2941 STAM_COUNTER_INC(&pThis->StatQueryBufAttempts);
2942 rc = pThis->pDrvMediaExPort->pfnIoReqQueryBuf(pThis->pDrvMediaExPort, pIoReq, &pIoReq->abAlloc[0],
2943 &pvBuf, &cbBuf);
2944 if (RT_SUCCESS(rc))
2945 {
2946 STAM_COUNTER_INC(&pThis->StatQueryBufSuccess);
2947 pIoReq->ReadWrite.cbIoBuf = cbBuf;
2948 pIoReq->ReadWrite.fDirectBuf = true;
2949 pIoReq->ReadWrite.Direct.Seg.pvSeg = pvBuf;
2950 pIoReq->ReadWrite.Direct.Seg.cbSeg = cbBuf;
2951 RTSgBufInit(&pIoReq->ReadWrite.Direct.SgBuf, &pIoReq->ReadWrite.Direct.Seg, 1);
2952 pIoReq->ReadWrite.pSgBuf = &pIoReq->ReadWrite.Direct.SgBuf;
2953 }
2954 }
2955
2956 if (RT_FAILURE(rc))
2957 {
2958 rc = IOBUFMgrAllocBuf(pThis->hIoBufMgr, &pIoReq->ReadWrite.IoBuf, cb, &pIoReq->ReadWrite.cbIoBuf);
2959 if (rc == VERR_NO_MEMORY)
2960 {
2961 LogFlowFunc(("Could not allocate memory for request, deferring\n"));
2962 RTCritSectEnter(&pThis->CritSectIoReqsIoBufWait);
2963 RTListAppend(&pThis->LstIoReqIoBufWait, &pIoReq->NdLstWait);
2964 RTCritSectLeave(&pThis->CritSectIoReqsIoBufWait);
2965 ASMAtomicIncU32(&pThis->cIoReqsWaiting);
2966 rc = VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS;
2967 }
2968 else
2969 {
2970 LogFlowFunc(("Allocated %zu bytes of memory\n", pIoReq->ReadWrite.cbIoBuf));
2971 Assert(pIoReq->ReadWrite.cbIoBuf > 0);
2972 pIoReq->ReadWrite.fDirectBuf = false;
2973 pIoReq->ReadWrite.pSgBuf = &pIoReq->ReadWrite.IoBuf.SgBuf;
2974 }
2975 }
2976
2977 LogFlowFunc(("returns %Rrc\n", rc));
2978 return rc;
2979}
2980
2981/**
2982 * Wrapper around the various ways to read from the underlying medium (cache, async vs. sync).
2983 *
2984 * @returns VBox status code.
2985 * @param pThis VBox disk container instance data.
2986 * @param pIoReq I/O request to process.
2987 * @param cbReqIo Transfer size.
2988 * @param pcbReqIo Where to store the amount of transferred data.
2989 */
2990static int drvvdMediaExIoReqReadWrapper(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, size_t cbReqIo, size_t *pcbReqIo)
2991{
2992 int rc = VINF_SUCCESS;
2993
2994 LogFlowFunc(("pThis=%#p pIoReq=%#p cbReqIo=%zu pcbReqIo=%#p\n", pThis, pIoReq, cbReqIo, pcbReqIo));
2995
2996 Assert(cbReqIo > 0);
2997
2998 if ( pThis->fAsyncIOSupported
2999 && !(pIoReq->fFlags & PDMIMEDIAEX_F_SYNC))
3000 {
3001 if (pThis->pBlkCache)
3002 {
3003 rc = PDMR3BlkCacheRead(pThis->pBlkCache, pIoReq->ReadWrite.offStart,
3004 pIoReq->ReadWrite.pSgBuf, cbReqIo, pIoReq);
3005 if (rc == VINF_SUCCESS)
3006 rc = VINF_VD_ASYNC_IO_FINISHED;
3007 else if (rc == VINF_AIO_TASK_PENDING)
3008 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
3009 }
3010 else
3011 rc = VDAsyncRead(pThis->pDisk, pIoReq->ReadWrite.offStart, cbReqIo, pIoReq->ReadWrite.pSgBuf,
3012 drvvdMediaExIoReqComplete, pThis, pIoReq);
3013 }
3014 else
3015 {
3016 void *pvBuf = RTSgBufGetNextSegment(pIoReq->ReadWrite.pSgBuf, &cbReqIo);
3017
3018 Assert(cbReqIo > 0 && VALID_PTR(pvBuf));
3019 rc = VDRead(pThis->pDisk, pIoReq->ReadWrite.offStart, pvBuf, cbReqIo);
3020 if (RT_SUCCESS(rc))
3021 rc = VINF_VD_ASYNC_IO_FINISHED;
3022 }
3023
3024 *pcbReqIo = cbReqIo;
3025
3026 LogFlowFunc(("returns %Rrc *pcbReqIo=%zu\n", rc, *pcbReqIo));
3027 return rc;
3028}
3029
3030/**
3031 * Wrapper around the various ways to write to the underlying medium (cache, async vs. sync).
3032 *
3033 * @returns VBox status code.
3034 * @param pThis VBox disk container instance data.
3035 * @param pIoReq I/O request to process.
3036 * @param cbReqIo Transfer size.
3037 * @param pcbReqIo Where to store the amount of transferred data.
3038 */
3039static int drvvdMediaExIoReqWriteWrapper(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, size_t cbReqIo, size_t *pcbReqIo)
3040{
3041 int rc = VINF_SUCCESS;
3042
3043 Assert(cbReqIo > 0);
3044
3045 LogFlowFunc(("pThis=%#p pIoReq=%#p cbReqIo=%zu pcbReqIo=%#p\n", pThis, pIoReq, cbReqIo, pcbReqIo));
3046
3047 if ( pThis->fAsyncIOSupported
3048 && !(pIoReq->fFlags & PDMIMEDIAEX_F_SYNC))
3049 {
3050 if (pThis->pBlkCache)
3051 {
3052 rc = PDMR3BlkCacheWrite(pThis->pBlkCache, pIoReq->ReadWrite.offStart,
3053 pIoReq->ReadWrite.pSgBuf, cbReqIo, pIoReq);
3054 if (rc == VINF_SUCCESS)
3055 rc = VINF_VD_ASYNC_IO_FINISHED;
3056 else if (rc == VINF_AIO_TASK_PENDING)
3057 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
3058 }
3059 else
3060 rc = VDAsyncWrite(pThis->pDisk, pIoReq->ReadWrite.offStart, cbReqIo, pIoReq->ReadWrite.pSgBuf,
3061 drvvdMediaExIoReqComplete, pThis, pIoReq);
3062 }
3063 else
3064 {
3065 void *pvBuf = RTSgBufGetNextSegment(pIoReq->ReadWrite.pSgBuf, &cbReqIo);
3066
3067 Assert(cbReqIo > 0 && VALID_PTR(pvBuf));
3068 rc = VDWrite(pThis->pDisk, pIoReq->ReadWrite.offStart, pvBuf, cbReqIo);
3069 if (RT_SUCCESS(rc))
3070 rc = VINF_VD_ASYNC_IO_FINISHED;
3071 }
3072
3073 *pcbReqIo = cbReqIo;
3074
3075 LogFlowFunc(("returns %Rrc *pcbReqIo=%zu\n", rc, *pcbReqIo));
3076 return rc;
3077}
3078
3079/**
3080 * Wrapper around the various ways to flush all data to the underlying medium (cache, async vs. sync).
3081 *
3082 * @returns VBox status code.
3083 * @param pThis VBox disk container instance data.
3084 * @param pIoReq I/O request to process.
3085 */
3086static int drvvdMediaExIoReqFlushWrapper(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq)
3087{
3088 int rc = VINF_SUCCESS;
3089
3090 LogFlowFunc(("pThis=%#p pIoReq=%#p\n", pThis, pIoReq));
3091
3092 if ( pThis->fAsyncIOSupported
3093 && !(pIoReq->fFlags & PDMIMEDIAEX_F_SYNC))
3094 {
3095 if (pThis->pBlkCache)
3096 {
3097 rc = PDMR3BlkCacheFlush(pThis->pBlkCache, pIoReq);
3098 if (rc == VINF_SUCCESS)
3099 rc = VINF_VD_ASYNC_IO_FINISHED;
3100 else if (rc == VINF_AIO_TASK_PENDING)
3101 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
3102 }
3103 else
3104 rc = VDAsyncFlush(pThis->pDisk, drvvdMediaExIoReqComplete, pThis, pIoReq);
3105 }
3106 else
3107 {
3108 rc = VDFlush(pThis->pDisk);
3109 if (RT_SUCCESS(rc))
3110 rc = VINF_VD_ASYNC_IO_FINISHED;
3111 }
3112
3113 LogFlowFunc(("returns %Rrc\n", rc));
3114 return rc;
3115}
3116
3117/**
3118 * Wrapper around the various ways to discard data blocks on the underlying medium (cache, async vs. sync).
3119 *
3120 * @returns VBox status code.
3121 * @param pThis VBox disk container instance data.
3122 * @param pIoReq I/O request to process.
3123 */
3124static int drvvdMediaExIoReqDiscardWrapper(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq)
3125{
3126 int rc = VINF_SUCCESS;
3127
3128 LogFlowFunc(("pThis=%#p pIoReq=%#p\n", pThis, pIoReq));
3129
3130 if ( pThis->fAsyncIOSupported
3131 && !(pIoReq->fFlags & PDMIMEDIAEX_F_SYNC))
3132 {
3133 if (pThis->pBlkCache)
3134 {
3135 rc = PDMR3BlkCacheDiscard(pThis->pBlkCache, pIoReq->Discard.paRanges, pIoReq->Discard.cRanges, pIoReq);
3136 if (rc == VINF_SUCCESS)
3137 rc = VINF_VD_ASYNC_IO_FINISHED;
3138 else if (rc == VINF_AIO_TASK_PENDING)
3139 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
3140 }
3141 else
3142 rc = VDAsyncDiscardRanges(pThis->pDisk, pIoReq->Discard.paRanges, pIoReq->Discard.cRanges,
3143 drvvdMediaExIoReqComplete, pThis, pIoReq);
3144 }
3145 else
3146 {
3147 rc = VDDiscardRanges(pThis->pDisk, pIoReq->Discard.paRanges, pIoReq->Discard.cRanges);
3148 if (RT_SUCCESS(rc))
3149 rc = VINF_VD_ASYNC_IO_FINISHED;
3150 }
3151
3152 LogFlowFunc(("returns %Rrc\n", rc));
3153 return rc;
3154}
3155
3156/**
3157 * Processes a read/write request.
3158 *
3159 * @returns VBox status code.
3160 * @param pThis VBox disk container instance data.
3161 * @param pIoReq I/O request to process.
3162 * @param fUpNotify Flag whether to notify the driver/device above us about the completion.
3163 */
3164static int drvvdMediaExIoReqReadWriteProcess(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq, bool fUpNotify)
3165{
3166 int rc = VINF_SUCCESS;
3167
3168 LogFlowFunc(("pThis=%#p pIoReq=%#p fUpNotify=%RTbool\n", pThis, pIoReq, fUpNotify));
3169
3170 Assert(pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ || pIoReq->enmType == PDMMEDIAEXIOREQTYPE_WRITE);
3171
3172 rc = drvvdKeyCheckPrereqs(pThis, false /* fSetError */);
3173
3174 while ( pIoReq->ReadWrite.cbReqLeft
3175 && rc == VINF_SUCCESS)
3176 {
3177 Assert(pIoReq->ReadWrite.cbIoBuf > 0);
3178
3179 size_t cbReqIo = RT_MIN(pIoReq->ReadWrite.cbReqLeft, pIoReq->ReadWrite.cbIoBuf);
3180
3181 if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ)
3182 rc = drvvdMediaExIoReqReadWrapper(pThis, pIoReq, cbReqIo, &cbReqIo);
3183 else
3184 {
3185 /* Sync memory buffer from the request initiator. */
3186 rc = drvvdMediaExIoReqBufSync(pThis, pIoReq, true /* fToIoBuf */);
3187 if (RT_SUCCESS(rc))
3188 rc = drvvdMediaExIoReqWriteWrapper(pThis, pIoReq, cbReqIo, &cbReqIo);
3189 }
3190
3191 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
3192 rc = VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS;
3193 else if (rc == VINF_VD_ASYNC_IO_FINISHED)
3194 {
3195 if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ)
3196 rc = drvvdMediaExIoReqBufSync(pThis, pIoReq, false /* fToIoBuf */);
3197 else
3198 rc = VINF_SUCCESS;
3199 pIoReq->ReadWrite.offStart += cbReqIo;
3200 pIoReq->ReadWrite.cbReqLeft -= cbReqIo;
3201 }
3202 }
3203
3204 if (rc != VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS)
3205 {
3206 Assert(!pIoReq->ReadWrite.cbReqLeft || RT_FAILURE(rc));
3207 rc = drvvdMediaExIoReqCompleteWorker(pThis, pIoReq, rc, fUpNotify);
3208 }
3209
3210 LogFlowFunc(("returns %Rrc\n", rc));
3211 return rc;
3212}
3213
3214
3215/**
3216 * Frees a I/O memory buffer allocated previously.
3217 *
3218 * @returns nothing.
3219 * @param pThis VBox disk container instance data.
3220 * @param pIoReq I/O request for which to free memory.
3221 */
3222DECLINLINE(void) drvvdMediaExIoReqBufFree(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq)
3223{
3224 LogFlowFunc(("pThis=%#p pIoReq=%#p{.cbIoBuf=%zu}\n", pThis, pIoReq, pIoReq->ReadWrite.cbIoBuf));
3225
3226 if ( ( pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ
3227 || pIoReq->enmType == PDMMEDIAEXIOREQTYPE_WRITE)
3228 && !pIoReq->ReadWrite.fDirectBuf)
3229 {
3230 IOBUFMgrFreeBuf(&pIoReq->ReadWrite.IoBuf);
3231
3232 uint32_t cIoReqsWaiting = ASMAtomicXchgU32(&pThis->cIoReqsWaiting, 0);
3233 if (cIoReqsWaiting > 0)
3234 {
3235 /* Try to process as many requests as possible. */
3236 RTCritSectEnter(&pThis->CritSectIoReqsIoBufWait);
3237 PPDMMEDIAEXIOREQINT pIoReqCur, pIoReqNext;
3238
3239 RTListForEachSafe(&pThis->LstIoReqIoBufWait, pIoReqCur, pIoReqNext, PDMMEDIAEXIOREQINT, NdLstWait)
3240 {
3241 LogFlowFunc(("Found I/O request %#p on waiting list, trying to allocate buffer of size %zu bytes\n",
3242 pIoReqCur, pIoReqCur->ReadWrite.cbReq));
3243
3244 /* Allocate a suitable I/O buffer for this request. */
3245 int rc = IOBUFMgrAllocBuf(pThis->hIoBufMgr, &pIoReqCur->ReadWrite.IoBuf, pIoReqCur->ReadWrite.cbReq,
3246 &pIoReqCur->ReadWrite.cbIoBuf);
3247 if (rc == VINF_SUCCESS)
3248 {
3249 Assert(pIoReqCur->ReadWrite.cbIoBuf > 0);
3250
3251 cIoReqsWaiting--;
3252 RTListNodeRemove(&pIoReqCur->NdLstWait);
3253
3254 pIoReqCur->ReadWrite.fDirectBuf = false;
3255 pIoReqCur->ReadWrite.pSgBuf = &pIoReqCur->ReadWrite.IoBuf.SgBuf;
3256
3257 bool fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReqCur->enmState, VDIOREQSTATE_ACTIVE, VDIOREQSTATE_ALLOCATED);
3258 if (RT_UNLIKELY(!fXchg))
3259 {
3260 /* Must have been canceled inbetween. */
3261 Assert(pIoReqCur->enmState == VDIOREQSTATE_CANCELED);
3262 drvvdMediaExIoReqCompleteWorker(pThis, pIoReqCur, VERR_PDM_MEDIAEX_IOREQ_CANCELED, true /* fUpNotify */);
3263 }
3264 ASMAtomicIncU32(&pThis->cIoReqsActive);
3265 rc = drvvdMediaExIoReqReadWriteProcess(pThis, pIoReqCur, true /* fUpNotify */);
3266 }
3267 else
3268 {
3269 Assert(rc == VERR_NO_MEMORY);
3270 break;
3271 }
3272 }
3273 RTCritSectLeave(&pThis->CritSectIoReqsIoBufWait);
3274
3275 ASMAtomicAddU32(&pThis->cIoReqsWaiting, cIoReqsWaiting);
3276 }
3277 }
3278
3279 LogFlowFunc(("returns\n"));
3280}
3281
3282
3283/**
3284 * Returns whether the VM is in a running state.
3285 *
3286 * @returns Flag indicating whether the VM is currently in a running state.
3287 * @param pThis VBox disk container instance data.
3288 */
3289DECLINLINE(bool) drvvdMediaExIoReqIsVmRunning(PVBOXDISK pThis)
3290{
3291 VMSTATE enmVmState = PDMDrvHlpVMState(pThis->pDrvIns);
3292 if ( enmVmState == VMSTATE_RESUMING
3293 || enmVmState == VMSTATE_RUNNING
3294 || enmVmState == VMSTATE_RUNNING_LS
3295 || enmVmState == VMSTATE_RUNNING_FT
3296 || enmVmState == VMSTATE_RESETTING
3297 || enmVmState == VMSTATE_RESETTING_LS
3298 || enmVmState == VMSTATE_SOFT_RESETTING
3299 || enmVmState == VMSTATE_SOFT_RESETTING_LS
3300 || enmVmState == VMSTATE_SUSPENDING
3301 || enmVmState == VMSTATE_SUSPENDING_LS
3302 || enmVmState == VMSTATE_SUSPENDING_EXT_LS)
3303 return true;
3304
3305 return false;
3306}
3307
3308/**
3309 * @copydoc FNVDASYNCTRANSFERCOMPLETE
3310 */
3311static DECLCALLBACK(void) drvvdMediaExIoReqComplete(void *pvUser1, void *pvUser2, int rcReq)
3312{
3313 PVBOXDISK pThis = (PVBOXDISK)pvUser1;
3314 PPDMMEDIAEXIOREQINT pIoReq = (PPDMMEDIAEXIOREQINT)pvUser2;
3315
3316 drvvdMediaExIoReqCompleteWorker(pThis, pIoReq, rcReq, true /* fUpNotify */);
3317}
3318
3319/**
3320 * Tries to cancel the given I/O request returning the result.
3321 *
3322 * @returns Flag whether the request was successfully canceled or whether it
3323 * already complete inbetween.
3324 * @param pThis VBox disk container instance data.
3325 * @param pIoReq The I/O request to cancel.
3326 */
3327static bool drvvdMediaExIoReqCancel(PVBOXDISK pThis, PPDMMEDIAEXIOREQINT pIoReq)
3328{
3329 bool fXchg = true;
3330 VDIOREQSTATE enmStateOld = (VDIOREQSTATE)ASMAtomicReadU32((volatile uint32_t *)&pIoReq->enmState);
3331
3332 /*
3333 * We might have to try canceling the request multiple times if it transitioned from
3334 * ALLOCATED to ACTIVE or to SUSPENDED between reading the state and trying to change it.
3335 */
3336 while ( ( enmStateOld == VDIOREQSTATE_ALLOCATED
3337 || enmStateOld == VDIOREQSTATE_ACTIVE
3338 || enmStateOld == VDIOREQSTATE_SUSPENDED)
3339 && !fXchg)
3340 {
3341 fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_CANCELED, enmStateOld);
3342 if (!fXchg)
3343 enmStateOld = (VDIOREQSTATE)ASMAtomicReadU32((volatile uint32_t *)&pIoReq->enmState);
3344 }
3345
3346 if (fXchg)
3347 ASMAtomicDecU32(&pThis->cIoReqsActive);
3348
3349 return fXchg;
3350}
3351
3352/**
3353 * @interface_method_impl{PDMIMEDIAEX,pfnQueryFeatures}
3354 */
3355static DECLCALLBACK(int) drvvdQueryFeatures(PPDMIMEDIAEX pInterface, uint32_t *pfFeatures)
3356{
3357 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3358
3359 AssertPtrReturn(pfFeatures, VERR_INVALID_POINTER);
3360
3361 uint32_t fFeatures = 0;
3362 if (pThis->fAsyncIOSupported)
3363 fFeatures |= PDMIMEDIAEX_FEATURE_F_ASYNC;
3364 if (pThis->IMedia.pfnDiscard)
3365 fFeatures |= PDMIMEDIAEX_FEATURE_F_DISCARD;
3366
3367 *pfFeatures = fFeatures;
3368
3369 return VINF_SUCCESS;
3370}
3371
3372
3373/**
3374 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqAllocSizeSet}
3375 */
3376static DECLCALLBACK(int) drvvdIoReqAllocSizeSet(PPDMIMEDIAEX pInterface, size_t cbIoReqAlloc)
3377{
3378 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3379 if (RT_UNLIKELY(pThis->hIoReqCache != NIL_RTMEMCACHE))
3380 return VERR_INVALID_STATE;
3381
3382 return RTMemCacheCreate(&pThis->hIoReqCache, sizeof(PDMMEDIAEXIOREQINT) + cbIoReqAlloc, 0, UINT32_MAX,
3383 NULL, NULL, NULL, 0);
3384}
3385
3386/**
3387 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqAlloc}
3388 */
3389static DECLCALLBACK(int) drvvdIoReqAlloc(PPDMIMEDIAEX pInterface, PPDMMEDIAEXIOREQ phIoReq, void **ppvIoReqAlloc,
3390 PDMMEDIAEXIOREQID uIoReqId, uint32_t fFlags)
3391{
3392 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3393
3394 AssertReturn(!(fFlags & ~PDMIMEDIAEX_F_VALID), VERR_INVALID_PARAMETER);
3395
3396 PPDMMEDIAEXIOREQINT pIoReq = (PPDMMEDIAEXIOREQINT)RTMemCacheAlloc(pThis->hIoReqCache);
3397
3398 if (RT_UNLIKELY(!pIoReq))
3399 return VERR_NO_MEMORY;
3400
3401 pIoReq->uIoReqId = uIoReqId;
3402 pIoReq->fFlags = fFlags;
3403 pIoReq->pDisk = pThis;
3404 pIoReq->enmState = VDIOREQSTATE_ALLOCATED;
3405 pIoReq->enmType = PDMMEDIAEXIOREQTYPE_INVALID;
3406
3407 int rc = drvvdMediaExIoReqInsert(pThis, pIoReq);
3408 if (RT_SUCCESS(rc))
3409 {
3410 *phIoReq = pIoReq;
3411 *ppvIoReqAlloc = &pIoReq->abAlloc[0];
3412 }
3413 else
3414 RTMemCacheFree(pThis->hIoReqCache, pIoReq);
3415
3416 return rc;
3417}
3418
3419/**
3420 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqFree}
3421 */
3422static DECLCALLBACK(int) drvvdIoReqFree(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq)
3423{
3424 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3425 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3426
3427 if ( pIoReq->enmState != VDIOREQSTATE_COMPLETED
3428 && pIoReq->enmState != VDIOREQSTATE_ALLOCATED)
3429 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3430
3431 /* Remove from allocated list. */
3432 int rc = drvvdMediaExIoReqRemove(pThis, pIoReq);
3433 if (RT_FAILURE(rc))
3434 return rc;
3435
3436 /* Free any associated I/O memory. */
3437 drvvdMediaExIoReqBufFree(pThis, pIoReq);
3438
3439 /* For discard request discard the range array. */
3440 if ( pIoReq->enmType == PDMMEDIAEXIOREQTYPE_DISCARD
3441 && pIoReq->Discard.paRanges)
3442 {
3443 RTMemFree(pIoReq->Discard.paRanges);
3444 pIoReq->Discard.paRanges = NULL;
3445 }
3446
3447 pIoReq->enmState = VDIOREQSTATE_FREE;
3448 RTMemCacheFree(pThis->hIoReqCache, pIoReq);
3449 return VINF_SUCCESS;
3450}
3451
3452/**
3453 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqQueryResidual}
3454 */
3455static DECLCALLBACK(int) drvvdIoReqQueryResidual(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq, size_t *pcbResidual)
3456{
3457 RT_NOREF1(pInterface);
3458
3459 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3460
3461 if (pIoReq->enmState != VDIOREQSTATE_COMPLETED)
3462 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3463
3464 if ( pIoReq->enmType != PDMMEDIAEXIOREQTYPE_READ
3465 && pIoReq->enmType != PDMMEDIAEXIOREQTYPE_WRITE
3466 && pIoReq->enmType != PDMMEDIAEXIOREQTYPE_FLUSH)
3467 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3468
3469 *pcbResidual = 0; /* No data left to transfer always. */
3470 return VINF_SUCCESS;
3471}
3472
3473/**
3474 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqQueryXferSize}
3475 */
3476static DECLCALLBACK(int) drvvdIoReqQueryXferSize(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq, size_t *pcbXfer)
3477{
3478 int rc = VINF_SUCCESS;
3479 RT_NOREF1(pInterface);
3480
3481 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3482
3483 if (pIoReq->enmState != VDIOREQSTATE_COMPLETED)
3484 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3485
3486 if ( pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ
3487 || pIoReq->enmType == PDMMEDIAEXIOREQTYPE_WRITE)
3488 *pcbXfer = pIoReq->ReadWrite.cbReq;
3489 else if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_FLUSH)
3490 *pcbXfer = 0;
3491 else
3492 rc = VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3493
3494 return rc;
3495}
3496
3497/**
3498 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqCancelAll}
3499 */
3500static DECLCALLBACK(int) drvvdIoReqCancelAll(PPDMIMEDIAEX pInterface)
3501{
3502 int rc = VINF_SUCCESS;
3503 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3504
3505 for (unsigned idxBin = 0; idxBin < RT_ELEMENTS(pThis->aIoReqAllocBins); idxBin++)
3506 {
3507 rc = RTSemFastMutexRequest(pThis->aIoReqAllocBins[idxBin].hMtxLstIoReqAlloc);
3508 if (RT_SUCCESS(rc))
3509 {
3510 /* Search for I/O request with ID. */
3511 PPDMMEDIAEXIOREQINT pIt;
3512
3513 RTListForEach(&pThis->aIoReqAllocBins[idxBin].LstIoReqAlloc, pIt, PDMMEDIAEXIOREQINT, NdAllocatedList)
3514 {
3515 drvvdMediaExIoReqCancel(pThis, pIt);
3516 }
3517 RTSemFastMutexRelease(pThis->aIoReqAllocBins[idxBin].hMtxLstIoReqAlloc);
3518 }
3519 }
3520
3521 return rc;
3522}
3523
3524/**
3525 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqCancel}
3526 */
3527static DECLCALLBACK(int) drvvdIoReqCancel(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQID uIoReqId)
3528{
3529 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3530 unsigned idxBin = drvvdMediaExIoReqIdHash(uIoReqId);
3531
3532 int rc = RTSemFastMutexRequest(pThis->aIoReqAllocBins[idxBin].hMtxLstIoReqAlloc);
3533 if (RT_SUCCESS(rc))
3534 {
3535 /* Search for I/O request with ID. */
3536 PPDMMEDIAEXIOREQINT pIt;
3537 rc = VERR_PDM_MEDIAEX_IOREQID_NOT_FOUND;
3538
3539 RTListForEach(&pThis->aIoReqAllocBins[idxBin].LstIoReqAlloc, pIt, PDMMEDIAEXIOREQINT, NdAllocatedList)
3540 {
3541 if (pIt->uIoReqId == uIoReqId)
3542 {
3543 if (drvvdMediaExIoReqCancel(pThis, pIt))
3544 rc = VINF_SUCCESS;
3545
3546 break;
3547 }
3548 }
3549 RTSemFastMutexRelease(pThis->aIoReqAllocBins[idxBin].hMtxLstIoReqAlloc);
3550 }
3551
3552 return rc;
3553}
3554
3555/**
3556 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqRead}
3557 */
3558static DECLCALLBACK(int) drvvdIoReqRead(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq, uint64_t off, size_t cbRead)
3559{
3560 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3561 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3562 VDIOREQSTATE enmState = (VDIOREQSTATE)ASMAtomicReadU32((volatile uint32_t *)&pIoReq->enmState);
3563
3564 if (RT_UNLIKELY(enmState == VDIOREQSTATE_CANCELED))
3565 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3566
3567 if (RT_UNLIKELY(enmState != VDIOREQSTATE_ALLOCATED))
3568 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3569
3570 pIoReq->enmType = PDMMEDIAEXIOREQTYPE_READ;
3571 pIoReq->tsSubmit = RTTimeMilliTS();
3572 pIoReq->ReadWrite.offStart = off;
3573 pIoReq->ReadWrite.cbReq = cbRead;
3574 pIoReq->ReadWrite.cbReqLeft = cbRead;
3575 /* Allocate a suitable I/O buffer for this request. */
3576 int rc = drvvdMediaExIoReqBufAlloc(pThis, pIoReq, cbRead);
3577 if (rc == VINF_SUCCESS)
3578 {
3579 bool fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_ACTIVE, VDIOREQSTATE_ALLOCATED);
3580 if (RT_UNLIKELY(!fXchg))
3581 {
3582 /* Must have been canceled inbetween. */
3583 Assert(pIoReq->enmState == VDIOREQSTATE_CANCELED);
3584 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3585 }
3586 ASMAtomicIncU32(&pThis->cIoReqsActive);
3587
3588 rc = drvvdMediaExIoReqReadWriteProcess(pThis, pIoReq, false /* fUpNotify */);
3589 }
3590
3591 return rc;
3592}
3593
3594/**
3595 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqWrite}
3596 */
3597static DECLCALLBACK(int) drvvdIoReqWrite(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq, uint64_t off, size_t cbWrite)
3598{
3599 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3600 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3601 VDIOREQSTATE enmState = (VDIOREQSTATE)ASMAtomicReadU32((volatile uint32_t *)&pIoReq->enmState);
3602
3603 if (RT_UNLIKELY(enmState == VDIOREQSTATE_CANCELED))
3604 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3605
3606 if (RT_UNLIKELY(enmState != VDIOREQSTATE_ALLOCATED))
3607 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3608
3609 pIoReq->enmType = PDMMEDIAEXIOREQTYPE_WRITE;
3610 pIoReq->tsSubmit = RTTimeMilliTS();
3611 pIoReq->ReadWrite.offStart = off;
3612 pIoReq->ReadWrite.cbReq = cbWrite;
3613 pIoReq->ReadWrite.cbReqLeft = cbWrite;
3614 /* Allocate a suitable I/O buffer for this request. */
3615 int rc = drvvdMediaExIoReqBufAlloc(pThis, pIoReq, cbWrite);
3616 if (rc == VINF_SUCCESS)
3617 {
3618 bool fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_ACTIVE, VDIOREQSTATE_ALLOCATED);
3619 if (RT_UNLIKELY(!fXchg))
3620 {
3621 /* Must have been canceled inbetween. */
3622 Assert(pIoReq->enmState == VDIOREQSTATE_CANCELED);
3623 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3624 }
3625 ASMAtomicIncU32(&pThis->cIoReqsActive);
3626
3627 rc = drvvdMediaExIoReqReadWriteProcess(pThis, pIoReq, false /* fUpNotify */);
3628 }
3629
3630 return rc;
3631}
3632
3633/**
3634 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqFlush}
3635 */
3636static DECLCALLBACK(int) drvvdIoReqFlush(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq)
3637{
3638 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3639 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3640 VDIOREQSTATE enmState = (VDIOREQSTATE)ASMAtomicReadU32((volatile uint32_t *)&pIoReq->enmState);
3641
3642 if (RT_UNLIKELY(enmState == VDIOREQSTATE_CANCELED))
3643 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3644
3645 if (RT_UNLIKELY(enmState != VDIOREQSTATE_ALLOCATED))
3646 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3647
3648 pIoReq->enmType = PDMMEDIAEXIOREQTYPE_FLUSH;
3649 pIoReq->tsSubmit = RTTimeMilliTS();
3650 bool fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_ACTIVE, VDIOREQSTATE_ALLOCATED);
3651 if (RT_UNLIKELY(!fXchg))
3652 {
3653 /* Must have been canceled inbetween. */
3654 Assert(pIoReq->enmState == VDIOREQSTATE_CANCELED);
3655 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3656 }
3657
3658 ASMAtomicIncU32(&pThis->cIoReqsActive);
3659 int rc = drvvdMediaExIoReqFlushWrapper(pThis, pIoReq);
3660 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
3661 rc = VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS;
3662 else if (rc == VINF_VD_ASYNC_IO_FINISHED)
3663 rc = VINF_SUCCESS;
3664
3665 if (rc != VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS)
3666 rc = drvvdMediaExIoReqCompleteWorker(pThis, pIoReq, rc, false /* fUpNotify */);
3667
3668 return rc;
3669}
3670
3671/**
3672 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqDiscard}
3673 */
3674static DECLCALLBACK(int) drvvdIoReqDiscard(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq, unsigned cRangesMax)
3675{
3676 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3677 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3678 VDIOREQSTATE enmState = (VDIOREQSTATE)ASMAtomicReadU32((volatile uint32_t *)&pIoReq->enmState);
3679
3680 if (RT_UNLIKELY(enmState == VDIOREQSTATE_CANCELED))
3681 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3682
3683 if (RT_UNLIKELY(enmState != VDIOREQSTATE_ALLOCATED))
3684 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3685
3686 /* Copy the ranges over now, this can be optimized in the future. */
3687 pIoReq->Discard.paRanges = (PRTRANGE)RTMemAllocZ(cRangesMax * sizeof(RTRANGE));
3688 if (RT_UNLIKELY(!pIoReq->Discard.paRanges))
3689 return VERR_NO_MEMORY;
3690
3691 int rc = pThis->pDrvMediaExPort->pfnIoReqQueryDiscardRanges(pThis->pDrvMediaExPort, pIoReq, &pIoReq->abAlloc[0],
3692 0, cRangesMax, pIoReq->Discard.paRanges,
3693 &pIoReq->Discard.cRanges);
3694 if (RT_SUCCESS(rc))
3695 {
3696 pIoReq->enmType = PDMMEDIAEXIOREQTYPE_DISCARD;
3697 pIoReq->tsSubmit = RTTimeMilliTS();
3698 bool fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_ACTIVE, VDIOREQSTATE_ALLOCATED);
3699 if (RT_UNLIKELY(!fXchg))
3700 {
3701 /* Must have been canceled inbetween. */
3702 Assert(pIoReq->enmState == VDIOREQSTATE_CANCELED);
3703 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3704 }
3705
3706 ASMAtomicIncU32(&pThis->cIoReqsActive);
3707 rc = drvvdMediaExIoReqDiscardWrapper(pThis, pIoReq);
3708 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
3709 rc = VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS;
3710 else if (rc == VINF_VD_ASYNC_IO_FINISHED)
3711 rc = VINF_SUCCESS;
3712
3713 if (rc != VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS)
3714 rc = drvvdMediaExIoReqCompleteWorker(pThis, pIoReq, rc, false /* fUpNotify */);
3715 }
3716
3717 return rc;
3718}
3719
3720/**
3721 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqSendScsiCmd}
3722 */
3723static DECLCALLBACK(int) drvvdIoReqSendScsiCmd(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq, uint32_t uLun,
3724 const uint8_t *pbCdb, size_t cbCdb, PDMMEDIAEXIOREQSCSITXDIR enmTxDir,
3725 size_t cbBuf, uint8_t *pabSense, size_t cbSense, uint8_t *pu8ScsiSts,
3726 uint32_t cTimeoutMillies)
3727{
3728 RT_NOREF10(pInterface, uLun, pbCdb, cbCdb, enmTxDir, cbBuf, pabSense, cbSense, pu8ScsiSts, cTimeoutMillies);
3729 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3730 VDIOREQSTATE enmState = (VDIOREQSTATE)ASMAtomicReadU32((volatile uint32_t *)&pIoReq->enmState);
3731
3732 if (RT_UNLIKELY(enmState == VDIOREQSTATE_CANCELED))
3733 return VERR_PDM_MEDIAEX_IOREQ_CANCELED;
3734
3735 if (RT_UNLIKELY(enmState != VDIOREQSTATE_ALLOCATED))
3736 return VERR_PDM_MEDIAEX_IOREQ_INVALID_STATE;
3737
3738 return VERR_NOT_SUPPORTED;
3739}
3740
3741/**
3742 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqGetActiveCount}
3743 */
3744static DECLCALLBACK(uint32_t) drvvdIoReqGetActiveCount(PPDMIMEDIAEX pInterface)
3745{
3746 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3747 return ASMAtomicReadU32(&pThis->cIoReqsActive);
3748}
3749
3750/**
3751 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqGetSuspendedCount}
3752 */
3753static DECLCALLBACK(uint32_t) drvvdIoReqGetSuspendedCount(PPDMIMEDIAEX pInterface)
3754{
3755 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3756
3757 AssertReturn(!drvvdMediaExIoReqIsVmRunning(pThis), 0);
3758
3759 uint32_t cIoReqSuspended = 0;
3760 PPDMMEDIAEXIOREQINT pIoReq;
3761 RTCritSectEnter(&pThis->CritSectIoReqRedo);
3762 RTListForEach(&pThis->LstIoReqRedo, pIoReq, PDMMEDIAEXIOREQINT, NdLstWait)
3763 {
3764 cIoReqSuspended++;
3765 }
3766 RTCritSectLeave(&pThis->CritSectIoReqRedo);
3767
3768 return cIoReqSuspended;
3769}
3770
3771/**
3772 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqQuerySuspendedStart}
3773 */
3774static DECLCALLBACK(int) drvvdIoReqQuerySuspendedStart(PPDMIMEDIAEX pInterface, PPDMMEDIAEXIOREQ phIoReq,
3775 void **ppvIoReqAlloc)
3776{
3777 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3778
3779 AssertReturn(!drvvdMediaExIoReqIsVmRunning(pThis), VERR_INVALID_STATE);
3780 AssertReturn(!RTListIsEmpty(&pThis->LstIoReqRedo), VERR_NOT_FOUND);
3781
3782 RTCritSectEnter(&pThis->CritSectIoReqRedo);
3783 PPDMMEDIAEXIOREQINT pIoReq = RTListGetFirst(&pThis->LstIoReqRedo, PDMMEDIAEXIOREQINT, NdLstWait);
3784 *phIoReq = pIoReq;
3785 *ppvIoReqAlloc = &pIoReq->abAlloc[0];
3786 RTCritSectLeave(&pThis->CritSectIoReqRedo);
3787
3788 return VINF_SUCCESS;
3789}
3790
3791/**
3792 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqQuerySuspendedNext}
3793 */
3794static DECLCALLBACK(int) drvvdIoReqQuerySuspendedNext(PPDMIMEDIAEX pInterface, PDMMEDIAEXIOREQ hIoReq,
3795 PPDMMEDIAEXIOREQ phIoReqNext, void **ppvIoReqAllocNext)
3796{
3797 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3798 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3799
3800 AssertReturn(!drvvdMediaExIoReqIsVmRunning(pThis), VERR_INVALID_STATE);
3801 AssertPtrReturn(pIoReq, VERR_INVALID_HANDLE);
3802 AssertReturn(!RTListNodeIsLast(&pThis->LstIoReqRedo, &pIoReq->NdLstWait), VERR_NOT_FOUND);
3803
3804 RTCritSectEnter(&pThis->CritSectIoReqRedo);
3805 PPDMMEDIAEXIOREQINT pIoReqNext = RTListNodeGetNext(&pIoReq->NdLstWait, PDMMEDIAEXIOREQINT, NdLstWait);
3806 *phIoReqNext = pIoReqNext;
3807 *ppvIoReqAllocNext = &pIoReqNext->abAlloc[0];
3808 RTCritSectLeave(&pThis->CritSectIoReqRedo);
3809
3810 return VINF_SUCCESS;
3811}
3812
3813/**
3814 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqSuspendedSave}
3815 */
3816static DECLCALLBACK(int) drvvdIoReqSuspendedSave(PPDMIMEDIAEX pInterface, PSSMHANDLE pSSM, PDMMEDIAEXIOREQ hIoReq)
3817{
3818 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3819 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3820
3821 AssertReturn(!drvvdMediaExIoReqIsVmRunning(pThis), VERR_INVALID_STATE);
3822 AssertPtrReturn(pIoReq, VERR_INVALID_HANDLE);
3823 AssertReturn(pIoReq->enmState == VDIOREQSTATE_SUSPENDED, VERR_INVALID_STATE);
3824
3825 SSMR3PutU32(pSSM, DRVVD_IOREQ_SAVED_STATE_VERSION);
3826 SSMR3PutU32(pSSM, (uint32_t)pIoReq->enmType);
3827 SSMR3PutU32(pSSM, pIoReq->uIoReqId);
3828 SSMR3PutU32(pSSM, pIoReq->fFlags);
3829 if ( pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ
3830 || pIoReq->enmType == PDMMEDIAEXIOREQTYPE_WRITE)
3831 {
3832 SSMR3PutU64(pSSM, pIoReq->ReadWrite.offStart);
3833 SSMR3PutU64(pSSM, pIoReq->ReadWrite.cbReq);
3834 SSMR3PutU64(pSSM, pIoReq->ReadWrite.cbReqLeft);
3835 }
3836 else if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_DISCARD)
3837 {
3838 SSMR3PutU32(pSSM, pIoReq->Discard.cRanges);
3839 for (unsigned i = 0; i < pIoReq->Discard.cRanges; i++)
3840 {
3841 SSMR3PutU64(pSSM, pIoReq->Discard.paRanges[i].offStart);
3842 SSMR3PutU64(pSSM, pIoReq->Discard.paRanges[i].cbRange);
3843 }
3844 }
3845
3846 return SSMR3PutU32(pSSM, UINT32_MAX); /* sanity/terminator */
3847}
3848
3849/**
3850 * @interface_method_impl{PDMIMEDIAEX,pfnIoReqSuspendedLoad}
3851 */
3852static DECLCALLBACK(int) drvvdIoReqSuspendedLoad(PPDMIMEDIAEX pInterface, PSSMHANDLE pSSM, PDMMEDIAEXIOREQ hIoReq)
3853{
3854 PVBOXDISK pThis = RT_FROM_MEMBER(pInterface, VBOXDISK, IMediaEx);
3855 PPDMMEDIAEXIOREQINT pIoReq = hIoReq;
3856
3857 AssertReturn(!drvvdMediaExIoReqIsVmRunning(pThis), VERR_INVALID_STATE);
3858 AssertPtrReturn(pIoReq, VERR_INVALID_HANDLE);
3859 AssertReturn(pIoReq->enmState == VDIOREQSTATE_ALLOCATED, VERR_INVALID_STATE);
3860
3861 uint32_t u32;
3862 uint64_t u64;
3863 int rc = VINF_SUCCESS;
3864 bool fPlaceOnRedoList = true;
3865
3866 SSMR3GetU32(pSSM, &u32);
3867 if (u32 <= DRVVD_IOREQ_SAVED_STATE_VERSION)
3868 {
3869 SSMR3GetU32(pSSM, &u32);
3870 AssertReturn( u32 == PDMMEDIAEXIOREQTYPE_WRITE
3871 || u32 == PDMMEDIAEXIOREQTYPE_READ
3872 || u32 == PDMMEDIAEXIOREQTYPE_DISCARD
3873 || u32 == PDMMEDIAEXIOREQTYPE_FLUSH,
3874 VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3875 pIoReq->enmType = (PDMMEDIAEXIOREQTYPE)u32;
3876
3877 SSMR3GetU32(pSSM, &u32);
3878 AssertReturn(u32 == pIoReq->uIoReqId, VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3879
3880 SSMR3GetU32(pSSM, &u32);
3881 AssertReturn(u32 == pIoReq->fFlags, VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3882
3883 if ( pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ
3884 || pIoReq->enmType == PDMMEDIAEXIOREQTYPE_WRITE)
3885 {
3886 SSMR3GetU64(pSSM, &pIoReq->ReadWrite.offStart);
3887 SSMR3GetU64(pSSM, &u64);
3888 pIoReq->ReadWrite.cbReq = (size_t)u64;
3889 SSMR3GetU64(pSSM, &u64);
3890 pIoReq->ReadWrite.cbReqLeft = (size_t)u64;
3891
3892 /*
3893 * Try to allocate enough I/O buffer, if this fails for some reason put it onto the
3894 * waiting list instead of the redo list.
3895 */
3896 pIoReq->ReadWrite.cbIoBuf = 0;
3897 rc = IOBUFMgrAllocBuf(pThis->hIoBufMgr, &pIoReq->ReadWrite.IoBuf, pIoReq->ReadWrite.cbReqLeft,
3898 &pIoReq->ReadWrite.cbIoBuf);
3899 if (rc == VERR_NO_MEMORY)
3900 {
3901 pIoReq->enmState = VDIOREQSTATE_ALLOCATED;
3902 ASMAtomicIncU32(&pThis->cIoReqsWaiting);
3903 RTListAppend(&pThis->LstIoReqIoBufWait, &pIoReq->NdLstWait);
3904 fPlaceOnRedoList = false;
3905 rc = VINF_SUCCESS;
3906 }
3907 else
3908 {
3909 pIoReq->ReadWrite.fDirectBuf = false;
3910 pIoReq->ReadWrite.pSgBuf = &pIoReq->ReadWrite.IoBuf.SgBuf;
3911 }
3912 }
3913 else if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_DISCARD)
3914 {
3915 rc = SSMR3GetU32(pSSM, &pIoReq->Discard.cRanges);
3916 if (RT_SUCCESS(rc))
3917 {
3918 pIoReq->Discard.paRanges = (PRTRANGE)RTMemAllocZ(pIoReq->Discard.cRanges * sizeof(RTRANGE));
3919 if (RT_LIKELY(pIoReq->Discard.paRanges))
3920 {
3921 for (unsigned i = 0; i < pIoReq->Discard.cRanges; i++)
3922 {
3923 SSMR3GetU64(pSSM, &pIoReq->Discard.paRanges[i].offStart);
3924 SSMR3GetU64(pSSM, &u64);
3925 pIoReq->Discard.paRanges[i].cbRange = (size_t)u64;
3926 }
3927 }
3928 else
3929 rc = VERR_NO_MEMORY;
3930 }
3931 }
3932
3933 if (RT_SUCCESS(rc))
3934 rc = SSMR3GetU32(pSSM, &u32); /* sanity/terminator */
3935 if (RT_SUCCESS(rc))
3936 AssertReturn(u32 == UINT32_MAX, VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3937 if ( RT_SUCCESS(rc)
3938 && fPlaceOnRedoList)
3939 {
3940 /* Mark as suspended */
3941 pIoReq->enmState = VDIOREQSTATE_SUSPENDED;
3942
3943 /* Link into suspended list so it gets kicked off again when we resume. */
3944 RTCritSectEnter(&pThis->CritSectIoReqRedo);
3945 RTListAppend(&pThis->LstIoReqRedo, &pIoReq->NdLstWait);
3946 RTCritSectLeave(&pThis->CritSectIoReqRedo);
3947 }
3948 }
3949
3950 return rc;
3951}
3952
3953/**
3954 * Loads all configured plugins.
3955 *
3956 * @returns VBox status code.
3957 * @param pCfg CFGM node holding plugin list.
3958 */
3959static int drvvdLoadPlugins(PCFGMNODE pCfg)
3960{
3961 PCFGMNODE pCfgPlugins = CFGMR3GetChild(pCfg, "Plugins");
3962
3963 if (pCfgPlugins)
3964 {
3965 PCFGMNODE pPluginCur = CFGMR3GetFirstChild(pCfgPlugins);
3966 while (pPluginCur)
3967 {
3968 int rc = VINF_SUCCESS;
3969 char *pszPluginFilename = NULL;
3970 rc = CFGMR3QueryStringAlloc(pPluginCur, "Path", &pszPluginFilename);
3971 if (RT_SUCCESS(rc))
3972 rc = VDPluginLoadFromFilename(pszPluginFilename);
3973
3974 if (RT_FAILURE(rc))
3975 LogRel(("VD: Failed to load plugin '%s' with %Rrc, continuing\n", pszPluginFilename, rc));
3976
3977 pPluginCur = CFGMR3GetNextChild(pPluginCur);
3978 }
3979 }
3980
3981 return VINF_SUCCESS;
3982}
3983
3984
3985/**
3986 * Sets up the disk filter chain.
3987 *
3988 * @returns VBox status code.
3989 * @param pThis The disk instance.
3990 * @param pCfg CFGM node holding the filter parameters.
3991 */
3992static int drvvdSetupFilters(PVBOXDISK pThis, PCFGMNODE pCfg)
3993{
3994 int rc = VINF_SUCCESS;
3995 PCFGMNODE pCfgFilter = CFGMR3GetChild(pCfg, "Filters");
3996
3997 if (pCfgFilter)
3998 {
3999 PCFGMNODE pCfgFilterConfig = CFGMR3GetChild(pCfgFilter, "VDConfig");
4000 char *pszFilterName = NULL;
4001 VDINTERFACECONFIG VDIfConfig;
4002 PVDINTERFACE pVDIfsFilter = NULL;
4003
4004 rc = CFGMR3QueryStringAlloc(pCfgFilter, "FilterName", &pszFilterName);
4005 if (RT_SUCCESS(rc))
4006 {
4007 VDIfConfig.pfnAreKeysValid = drvvdCfgAreKeysValid;
4008 VDIfConfig.pfnQuerySize = drvvdCfgQuerySize;
4009 VDIfConfig.pfnQuery = drvvdCfgQuery;
4010 VDIfConfig.pfnQueryBytes = drvvdCfgQueryBytes;
4011 rc = VDInterfaceAdd(&VDIfConfig.Core, "DrvVD_Config", VDINTERFACETYPE_CONFIG,
4012 pCfgFilterConfig, sizeof(VDINTERFACECONFIG), &pVDIfsFilter);
4013 AssertRC(rc);
4014
4015 rc = VDFilterAdd(pThis->pDisk, pszFilterName, VD_FILTER_FLAGS_DEFAULT, pVDIfsFilter);
4016
4017 MMR3HeapFree(pszFilterName);
4018 }
4019 }
4020
4021 return rc;
4022}
4023
4024
4025/**
4026 * Translates a PDMMEDIATYPE value into a string.
4027 *
4028 * @returns Read only string.
4029 * @param enmType The type value.
4030 */
4031static const char *drvvdGetTypeName(PDMMEDIATYPE enmType)
4032{
4033 switch (enmType)
4034 {
4035 case PDMMEDIATYPE_ERROR: return "ERROR";
4036 case PDMMEDIATYPE_FLOPPY_360: return "FLOPPY_360";
4037 case PDMMEDIATYPE_FLOPPY_720: return "FLOPPY_720";
4038 case PDMMEDIATYPE_FLOPPY_1_20: return "FLOPPY_1_20";
4039 case PDMMEDIATYPE_FLOPPY_1_44: return "FLOPPY_1_44";
4040 case PDMMEDIATYPE_FLOPPY_2_88: return "FLOPPY_2_88";
4041 case PDMMEDIATYPE_FLOPPY_FAKE_15_6: return "FLOPPY_FAKE_15_6";
4042 case PDMMEDIATYPE_FLOPPY_FAKE_63_5: return "FLOPPY_FAKE_63_5";
4043 case PDMMEDIATYPE_CDROM: return "CDROM";
4044 case PDMMEDIATYPE_DVD: return "DVD";
4045 case PDMMEDIATYPE_HARD_DISK: return "HARD_DISK";
4046 default: return "Unknown";
4047 }
4048}
4049
4050/**
4051 * Returns the appropriate PDMMEDIATYPE for t he given string.
4052 *
4053 * @returns PDMMEDIATYPE
4054 * @param pszType The string representation of the media type.
4055 */
4056static PDMMEDIATYPE drvvdGetMediaTypeFromString(const char *pszType)
4057{
4058 PDMMEDIATYPE enmType = PDMMEDIATYPE_ERROR;
4059
4060 if (!strcmp(pszType, "HardDisk"))
4061 enmType = PDMMEDIATYPE_HARD_DISK;
4062 else if (!strcmp(pszType, "DVD"))
4063 enmType = PDMMEDIATYPE_DVD;
4064 else if (!strcmp(pszType, "CDROM"))
4065 enmType = PDMMEDIATYPE_CDROM;
4066 else if (!strcmp(pszType, "Floppy 2.88"))
4067 enmType = PDMMEDIATYPE_FLOPPY_2_88;
4068 else if (!strcmp(pszType, "Floppy 1.44"))
4069 enmType = PDMMEDIATYPE_FLOPPY_1_44;
4070 else if (!strcmp(pszType, "Floppy 1.20"))
4071 enmType = PDMMEDIATYPE_FLOPPY_1_20;
4072 else if (!strcmp(pszType, "Floppy 720"))
4073 enmType = PDMMEDIATYPE_FLOPPY_720;
4074 else if (!strcmp(pszType, "Floppy 360"))
4075 enmType = PDMMEDIATYPE_FLOPPY_360;
4076 else if (!strcmp(pszType, "Floppy 15.6"))
4077 enmType = PDMMEDIATYPE_FLOPPY_FAKE_15_6;
4078 else if (!strcmp(pszType, "Floppy 63.5"))
4079 enmType = PDMMEDIATYPE_FLOPPY_FAKE_63_5;
4080
4081 return enmType;
4082}
4083
4084/**
4085 * Converts PDMMEDIATYPE to the appropriate VDTYPE.
4086 *
4087 * @returns The VDTYPE.
4088 * @param enmType The PDMMEDIATYPE to convert from.
4089 */
4090static VDTYPE drvvdGetVDFromMediaType(PDMMEDIATYPE enmType)
4091{
4092 if (PDMMEDIATYPE_IS_FLOPPY(enmType))
4093 return VDTYPE_FLOPPY;
4094 else if (enmType == PDMMEDIATYPE_DVD || enmType == PDMMEDIATYPE_CDROM)
4095 return VDTYPE_DVD;
4096 else if (enmType == PDMMEDIATYPE_HARD_DISK)
4097 return VDTYPE_HDD;
4098
4099 AssertMsgFailed(("Invalid media type %d{%s} given!\n", enmType, drvvdGetTypeName(enmType)));
4100 return VDTYPE_HDD;
4101}
4102
4103
4104/*********************************************************************************************************************************
4105* Base interface methods *
4106*********************************************************************************************************************************/
4107
4108/**
4109 * @interface_method_impl{PDMIBASE,pfnQueryInterface}
4110 */
4111static DECLCALLBACK(void *) drvvdQueryInterface(PPDMIBASE pInterface, const char *pszIID)
4112{
4113 PPDMDRVINS pDrvIns = PDMIBASE_2_PDMDRV(pInterface);
4114 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4115
4116 PDMIBASE_RETURN_INTERFACE(pszIID, PDMIBASE, &pDrvIns->IBase);
4117 PDMIBASE_RETURN_INTERFACE(pszIID, PDMIMEDIA, &pThis->IMedia);
4118 PDMIBASE_RETURN_INTERFACE(pszIID, PDMIMOUNT, pThis->fMountable ? &pThis->IMount : NULL);
4119 PDMIBASE_RETURN_INTERFACE(pszIID, PDMIMEDIAEX, pThis->pDrvMediaExPort ? &pThis->IMediaEx : NULL);
4120 return NULL;
4121}
4122
4123
4124/*********************************************************************************************************************************
4125* Saved state notification methods *
4126*********************************************************************************************************************************/
4127
4128/**
4129 * Load done callback for re-opening the image writable during teleportation.
4130 *
4131 * This is called both for successful and failed load runs, we only care about
4132 * successful ones.
4133 *
4134 * @returns VBox status code.
4135 * @param pDrvIns The driver instance.
4136 * @param pSSM The saved state handle.
4137 */
4138static DECLCALLBACK(int) drvvdLoadDone(PPDMDRVINS pDrvIns, PSSMHANDLE pSSM)
4139{
4140 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4141 Assert(!pThis->fErrorUseRuntime);
4142
4143 /* Drop out if we don't have any work to do or if it's a failed load. */
4144 if ( !pThis->fTempReadOnly
4145 || RT_FAILURE(SSMR3HandleGetStatus(pSSM)))
4146 return VINF_SUCCESS;
4147
4148 int rc = drvvdSetWritable(pThis);
4149 if (RT_FAILURE(rc)) /** @todo does the bugger set any errors? */
4150 return SSMR3SetLoadError(pSSM, rc, RT_SRC_POS,
4151 N_("Failed to write lock the images"));
4152 return VINF_SUCCESS;
4153}
4154
4155
4156/*********************************************************************************************************************************
4157* Driver methods *
4158*********************************************************************************************************************************/
4159
4160/**
4161 * Worker for the power off or destruct callback.
4162 *
4163 * @returns nothing.
4164 * @param pDrvIns The driver instance.
4165 */
4166static void drvvdPowerOffOrDestructOrUnmount(PPDMDRVINS pDrvIns)
4167{
4168 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4169 LogFlowFunc(("\n"));
4170
4171 RTSEMFASTMUTEX mutex;
4172 ASMAtomicXchgHandle(&pThis->MergeCompleteMutex, NIL_RTSEMFASTMUTEX, &mutex);
4173 if (mutex != NIL_RTSEMFASTMUTEX)
4174 {
4175 /* Request the semaphore to wait until a potentially running merge
4176 * operation has been finished. */
4177 int rc = RTSemFastMutexRequest(mutex);
4178 AssertRC(rc);
4179 pThis->fMergePending = false;
4180 rc = RTSemFastMutexRelease(mutex);
4181 AssertRC(rc);
4182 rc = RTSemFastMutexDestroy(mutex);
4183 AssertRC(rc);
4184 }
4185
4186 if (RT_VALID_PTR(pThis->pBlkCache))
4187 {
4188 PDMR3BlkCacheRelease(pThis->pBlkCache);
4189 pThis->pBlkCache = NULL;
4190 }
4191
4192 if (RT_VALID_PTR(pThis->pDisk))
4193 {
4194 VDDestroy(pThis->pDisk);
4195 pThis->pDisk = NULL;
4196 }
4197 drvvdFreeImages(pThis);
4198}
4199
4200/**
4201 * @copydoc FNPDMDRVPOWEROFF
4202 */
4203static DECLCALLBACK(void) drvvdPowerOff(PPDMDRVINS pDrvIns)
4204{
4205 PDMDRV_CHECK_VERSIONS_RETURN_VOID(pDrvIns);
4206 drvvdPowerOffOrDestructOrUnmount(pDrvIns);
4207}
4208
4209/**
4210 * @callback_method_impl{FNPDMDRVRESUME}
4211 *
4212 * VM resume notification that we use to undo what the temporary read-only image
4213 * mode set by drvvdSuspend.
4214 *
4215 * Also switch to runtime error mode if we're resuming after a state load
4216 * without having been powered on first.
4217 *
4218 * @todo The VMSetError vs VMSetRuntimeError mess must be fixed elsewhere,
4219 * we're making assumptions about Main behavior here!
4220 */
4221static DECLCALLBACK(void) drvvdResume(PPDMDRVINS pDrvIns)
4222{
4223 LogFlowFunc(("\n"));
4224 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4225
4226 drvvdSetWritable(pThis);
4227 pThis->fErrorUseRuntime = true;
4228
4229 if (pThis->pBlkCache)
4230 {
4231 int rc = PDMR3BlkCacheResume(pThis->pBlkCache);
4232 AssertRC(rc);
4233 }
4234
4235 if (pThis->pDrvMediaExPort)
4236 {
4237 /* Kick of any request we have to redo. */
4238 PPDMMEDIAEXIOREQINT pIoReq, pIoReqNext;
4239 RTCritSectEnter(&pThis->CritSectIoReqRedo);
4240 RTListForEachSafe(&pThis->LstIoReqRedo, pIoReq, pIoReqNext, PDMMEDIAEXIOREQINT, NdLstWait)
4241 {
4242 int rc = VINF_SUCCESS;
4243 bool fXchg = ASMAtomicCmpXchgU32((volatile uint32_t *)&pIoReq->enmState, VDIOREQSTATE_ACTIVE, VDIOREQSTATE_SUSPENDED);
4244
4245 RTListNodeRemove(&pIoReq->NdLstWait);
4246 ASMAtomicIncU32(&pThis->cIoReqsActive);
4247
4248 if (fXchg)
4249 {
4250 pThis->pDrvMediaExPort->pfnIoReqStateChanged(pThis->pDrvMediaExPort, pIoReq, &pIoReq->abAlloc[0],
4251 PDMMEDIAEXIOREQSTATE_ACTIVE);
4252 if ( pIoReq->enmType == PDMMEDIAEXIOREQTYPE_READ
4253 || pIoReq->enmType == PDMMEDIAEXIOREQTYPE_WRITE)
4254 rc = drvvdMediaExIoReqReadWriteProcess(pThis, pIoReq, true /* fUpNotify */);
4255 else if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_FLUSH)
4256 {
4257 rc = drvvdMediaExIoReqFlushWrapper(pThis, pIoReq);
4258 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4259 rc = VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS;
4260 else if (rc == VINF_VD_ASYNC_IO_FINISHED)
4261 rc = VINF_SUCCESS;
4262 }
4263 else if (pIoReq->enmType == PDMMEDIAEXIOREQTYPE_DISCARD)
4264 {
4265 rc = drvvdMediaExIoReqDiscardWrapper(pThis, pIoReq);
4266 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4267 rc = VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS;
4268 else if (rc == VINF_VD_ASYNC_IO_FINISHED)
4269 rc = VINF_SUCCESS;
4270 }
4271 else
4272 AssertMsgFailed(("Invalid request type %u\n", pIoReq->enmType));
4273
4274 /* The read write process will call the completion callback on its own. */
4275 if ( rc != VINF_PDM_MEDIAEX_IOREQ_IN_PROGRESS
4276 && ( pIoReq->enmType == PDMMEDIAEXIOREQTYPE_DISCARD
4277 || pIoReq->enmType == PDMMEDIAEXIOREQTYPE_FLUSH))
4278 {
4279 Assert( ( pIoReq->enmType != PDMMEDIAEXIOREQTYPE_WRITE
4280 && pIoReq->enmType != PDMMEDIAEXIOREQTYPE_READ)
4281 || !pIoReq->ReadWrite.cbReqLeft
4282 || RT_FAILURE(rc));
4283 drvvdMediaExIoReqCompleteWorker(pThis, pIoReq, rc, true /* fUpNotify */);
4284 }
4285
4286 }
4287 else
4288 {
4289 /* Request was canceled inbetween, so don't care and notify the owner about the completed request. */
4290 Assert(pIoReq->enmState == VDIOREQSTATE_CANCELED);
4291 drvvdMediaExIoReqCompleteWorker(pThis, pIoReq, VERR_PDM_MEDIAEX_IOREQ_CANCELED, true /* fUpNotify */);
4292 }
4293 }
4294 Assert(RTListIsEmpty(&pThis->LstIoReqRedo));
4295 RTCritSectLeave(&pThis->CritSectIoReqRedo);
4296 }
4297}
4298
4299/**
4300 * @callback_method_impl{FNPDMDRVSUSPEND}
4301 *
4302 * When the VM is being suspended, temporarily change to read-only image mode.
4303 *
4304 * This is important for several reasons:
4305 * -# It makes sure that there are no pending writes to the image. Most
4306 * backends implements this by closing and reopening the image in read-only
4307 * mode.
4308 * -# It allows Main to read the images during snapshotting without having
4309 * to account for concurrent writes.
4310 * -# This is essential for making teleportation targets sharing images work
4311 * right. Both with regards to caching and with regards to file sharing
4312 * locks (RTFILE_O_DENY_*). (See also drvvdLoadDone.)
4313 */
4314static DECLCALLBACK(void) drvvdSuspend(PPDMDRVINS pDrvIns)
4315{
4316 LogFlowFunc(("\n"));
4317 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4318
4319 if (pThis->pBlkCache)
4320 {
4321 int rc = PDMR3BlkCacheSuspend(pThis->pBlkCache);
4322 AssertRC(rc);
4323 }
4324
4325 drvvdSetReadonly(pThis);
4326}
4327
4328/**
4329 * @callback_method_impl{FNPDMDRVPOWERON}
4330 */
4331static DECLCALLBACK(void) drvvdPowerOn(PPDMDRVINS pDrvIns)
4332{
4333 LogFlowFunc(("\n"));
4334 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4335 drvvdSetWritable(pThis);
4336 pThis->fErrorUseRuntime = true;
4337}
4338
4339/**
4340 * @callback_method_impl{FNPDMDRVRESET}
4341 */
4342static DECLCALLBACK(void) drvvdReset(PPDMDRVINS pDrvIns)
4343{
4344 LogFlowFunc(("\n"));
4345 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4346
4347 if (pThis->pBlkCache)
4348 {
4349 int rc = PDMR3BlkCacheClear(pThis->pBlkCache);
4350 AssertRC(rc);
4351 }
4352
4353 if (pThis->fBootAccelEnabled)
4354 {
4355 pThis->fBootAccelActive = true;
4356 pThis->cbDataValid = 0;
4357 pThis->offDisk = 0;
4358 }
4359}
4360
4361/**
4362 * @callback_method_impl{FNPDMDRVDESTRUCT}
4363 */
4364static DECLCALLBACK(void) drvvdDestruct(PPDMDRVINS pDrvIns)
4365{
4366 PDMDRV_CHECK_VERSIONS_RETURN_VOID(pDrvIns);
4367 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4368 LogFlowFunc(("\n"));
4369
4370 /*
4371 * Make sure the block cache and disks are closed when this driver is
4372 * destroyed. This method will get called without calling the power off
4373 * callback first when we reconfigure the driver chain after a snapshot.
4374 */
4375 drvvdPowerOffOrDestructOrUnmount(pDrvIns);
4376 if (pThis->MergeLock != NIL_RTSEMRW)
4377 {
4378 int rc = RTSemRWDestroy(pThis->MergeLock);
4379 AssertRC(rc);
4380 pThis->MergeLock = NIL_RTSEMRW;
4381 }
4382 if (pThis->pbData)
4383 {
4384 RTMemFree(pThis->pbData);
4385 pThis->pbData = NULL;
4386 }
4387 if (pThis->pszBwGroup)
4388 {
4389 MMR3HeapFree(pThis->pszBwGroup);
4390 pThis->pszBwGroup = NULL;
4391 }
4392 if (pThis->hHbdMgr != NIL_HBDMGR)
4393 HBDMgrDestroy(pThis->hHbdMgr);
4394 if (pThis->hIoReqCache != NIL_RTMEMCACHE)
4395 RTMemCacheDestroy(pThis->hIoReqCache);
4396 if (pThis->hIoBufMgr != NIL_IOBUFMGR)
4397 IOBUFMgrDestroy(pThis->hIoBufMgr);
4398 if (RTCritSectIsInitialized(&pThis->CritSectIoReqsIoBufWait))
4399 RTCritSectDelete(&pThis->CritSectIoReqsIoBufWait);
4400 if (RTCritSectIsInitialized(&pThis->CritSectIoReqRedo))
4401 RTCritSectDelete(&pThis->CritSectIoReqRedo);
4402 for (unsigned i = 0; i < RT_ELEMENTS(pThis->aIoReqAllocBins); i++)
4403 if (pThis->aIoReqAllocBins[i].hMtxLstIoReqAlloc != NIL_RTSEMFASTMUTEX)
4404 RTSemFastMutexDestroy(pThis->aIoReqAllocBins[i].hMtxLstIoReqAlloc);
4405
4406 PDMDrvHlpSTAMDeregister(pDrvIns, &pThis->StatQueryBufAttempts);
4407 PDMDrvHlpSTAMDeregister(pDrvIns, &pThis->StatQueryBufSuccess);
4408}
4409
4410/**
4411 * @callback_method_impl{FNPDMDRVCONSTRUCT,
4412 * Construct a VBox disk media driver instance.}
4413 */
4414static DECLCALLBACK(int) drvvdConstruct(PPDMDRVINS pDrvIns, PCFGMNODE pCfg, uint32_t fFlags)
4415{
4416 RT_NOREF(fFlags);
4417 LogFlowFunc(("\n"));
4418 PDMDRV_CHECK_VERSIONS_RETURN(pDrvIns);
4419 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
4420 int rc = VINF_SUCCESS;
4421 char *pszName = NULL; /* The path of the disk image file. */
4422 char *pszFormat = NULL; /* The format backed to use for this image. */
4423 char *pszCachePath = NULL; /* The path to the cache image. */
4424 char *pszCacheFormat = NULL; /* The format backend to use for the cache image. */
4425 bool fReadOnly = false; /* True if the media is read-only. */
4426 bool fMaybeReadOnly = false; /* True if the media may or may not be read-only. */
4427 bool fHonorZeroWrites = false; /* True if zero blocks should be written. */
4428
4429 /*
4430 * Init the static parts.
4431 */
4432 pDrvIns->IBase.pfnQueryInterface = drvvdQueryInterface;
4433 pThis->pDrvIns = pDrvIns;
4434 pThis->fTempReadOnly = false;
4435 pThis->pDisk = NULL;
4436 pThis->fAsyncIOSupported = false;
4437 pThis->fShareable = false;
4438 pThis->fMergePending = false;
4439 pThis->MergeCompleteMutex = NIL_RTSEMFASTMUTEX;
4440 pThis->MergeLock = NIL_RTSEMRW;
4441 pThis->uMergeSource = VD_LAST_IMAGE;
4442 pThis->uMergeTarget = VD_LAST_IMAGE;
4443 pThis->pCfgCrypto = NULL;
4444 pThis->pIfSecKey = NULL;
4445 pThis->hIoReqCache = NIL_RTMEMCACHE;
4446 pThis->hIoBufMgr = NIL_IOBUFMGR;
4447
4448 for (unsigned i = 0; i < RT_ELEMENTS(pThis->aIoReqAllocBins); i++)
4449 pThis->aIoReqAllocBins[i].hMtxLstIoReqAlloc = NIL_RTSEMFASTMUTEX;
4450
4451 /* IMedia */
4452 pThis->IMedia.pfnRead = drvvdRead;
4453 pThis->IMedia.pfnReadPcBios = drvvdReadPcBios;
4454 pThis->IMedia.pfnWrite = drvvdWrite;
4455 pThis->IMedia.pfnFlush = drvvdFlush;
4456 pThis->IMedia.pfnMerge = drvvdMerge;
4457 pThis->IMedia.pfnSetSecKeyIf = drvvdSetSecKeyIf;
4458 pThis->IMedia.pfnGetSize = drvvdGetSize;
4459 pThis->IMedia.pfnGetSectorSize = drvvdGetSectorSize;
4460 pThis->IMedia.pfnIsReadOnly = drvvdIsReadOnly;
4461 pThis->IMedia.pfnIsNonRotational = drvvdIsNonRotational;
4462 pThis->IMedia.pfnBiosGetPCHSGeometry = drvvdBiosGetPCHSGeometry;
4463 pThis->IMedia.pfnBiosSetPCHSGeometry = drvvdBiosSetPCHSGeometry;
4464 pThis->IMedia.pfnBiosGetLCHSGeometry = drvvdBiosGetLCHSGeometry;
4465 pThis->IMedia.pfnBiosSetLCHSGeometry = drvvdBiosSetLCHSGeometry;
4466 pThis->IMedia.pfnBiosIsVisible = drvvdBiosIsVisible;
4467 pThis->IMedia.pfnGetType = drvvdGetType;
4468 pThis->IMedia.pfnGetUuid = drvvdGetUuid;
4469 pThis->IMedia.pfnDiscard = drvvdDiscard;
4470 pThis->IMedia.pfnSendCmd = NULL;
4471
4472 /* IMount */
4473 pThis->IMount.pfnUnmount = drvvdUnmount;
4474 pThis->IMount.pfnIsMounted = drvvdIsMounted;
4475 pThis->IMount.pfnLock = drvvdLock;
4476 pThis->IMount.pfnUnlock = drvvdUnlock;
4477 pThis->IMount.pfnIsLocked = drvvdIsLocked;
4478
4479 /* IMediaEx */
4480 pThis->IMediaEx.pfnQueryFeatures = drvvdQueryFeatures;
4481 pThis->IMediaEx.pfnIoReqAllocSizeSet = drvvdIoReqAllocSizeSet;
4482 pThis->IMediaEx.pfnIoReqAlloc = drvvdIoReqAlloc;
4483 pThis->IMediaEx.pfnIoReqFree = drvvdIoReqFree;
4484 pThis->IMediaEx.pfnIoReqQueryResidual = drvvdIoReqQueryResidual;
4485 pThis->IMediaEx.pfnIoReqQueryXferSize = drvvdIoReqQueryXferSize;
4486 pThis->IMediaEx.pfnIoReqCancelAll = drvvdIoReqCancelAll;
4487 pThis->IMediaEx.pfnIoReqCancel = drvvdIoReqCancel;
4488 pThis->IMediaEx.pfnIoReqRead = drvvdIoReqRead;
4489 pThis->IMediaEx.pfnIoReqWrite = drvvdIoReqWrite;
4490 pThis->IMediaEx.pfnIoReqFlush = drvvdIoReqFlush;
4491 pThis->IMediaEx.pfnIoReqDiscard = drvvdIoReqDiscard;
4492 pThis->IMediaEx.pfnIoReqSendScsiCmd = drvvdIoReqSendScsiCmd;
4493 pThis->IMediaEx.pfnIoReqGetActiveCount = drvvdIoReqGetActiveCount;
4494 pThis->IMediaEx.pfnIoReqGetSuspendedCount = drvvdIoReqGetSuspendedCount;
4495 pThis->IMediaEx.pfnIoReqQuerySuspendedStart = drvvdIoReqQuerySuspendedStart;
4496 pThis->IMediaEx.pfnIoReqQuerySuspendedNext = drvvdIoReqQuerySuspendedNext;
4497 pThis->IMediaEx.pfnIoReqSuspendedSave = drvvdIoReqSuspendedSave;
4498 pThis->IMediaEx.pfnIoReqSuspendedLoad = drvvdIoReqSuspendedLoad;
4499
4500 /* Initialize supported VD interfaces. */
4501 pThis->pVDIfsDisk = NULL;
4502
4503 pThis->VDIfError.pfnError = drvvdErrorCallback;
4504 pThis->VDIfError.pfnMessage = NULL;
4505 rc = VDInterfaceAdd(&pThis->VDIfError.Core, "DrvVD_VDIError", VDINTERFACETYPE_ERROR,
4506 pDrvIns, sizeof(VDINTERFACEERROR), &pThis->pVDIfsDisk);
4507 AssertRC(rc);
4508
4509 /* List of images is empty now. */
4510 pThis->pImages = NULL;
4511
4512 pThis->pDrvMediaPort = PDMIBASE_QUERY_INTERFACE(pDrvIns->pUpBase, PDMIMEDIAPORT);
4513 if (!pThis->pDrvMediaPort)
4514 return PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_MISSING_INTERFACE_ABOVE,
4515 N_("No media port interface above"));
4516
4517 pThis->pDrvMountNotify = PDMIBASE_QUERY_INTERFACE(pDrvIns->pUpBase, PDMIMOUNTNOTIFY);
4518
4519 /*
4520 * Try to attach the optional extended media interface port above and initialize associated
4521 * structures if available.
4522 */
4523 pThis->pDrvMediaExPort = PDMIBASE_QUERY_INTERFACE(pDrvIns->pUpBase, PDMIMEDIAEXPORT);
4524 if (pThis->pDrvMediaExPort)
4525 {
4526 for (unsigned i = 0; i < RT_ELEMENTS(pThis->aIoReqAllocBins); i++)
4527 {
4528 rc = RTSemFastMutexCreate(&pThis->aIoReqAllocBins[i].hMtxLstIoReqAlloc);
4529 if (RT_FAILURE(rc))
4530 break;
4531 RTListInit(&pThis->aIoReqAllocBins[i].LstIoReqAlloc);
4532 }
4533
4534 if (RT_SUCCESS(rc))
4535 rc = RTCritSectInit(&pThis->CritSectIoReqsIoBufWait);
4536
4537 if (RT_SUCCESS(rc))
4538 rc = RTCritSectInit(&pThis->CritSectIoReqRedo);
4539
4540 if (RT_FAILURE(rc))
4541 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Creating Mutex failed"));
4542
4543 RTListInit(&pThis->LstIoReqIoBufWait);
4544 RTListInit(&pThis->LstIoReqRedo);
4545 }
4546
4547 /* Before we access any VD API load all given plugins. */
4548 rc = drvvdLoadPlugins(pCfg);
4549 if (RT_FAILURE(rc))
4550 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Loading VD plugins failed"));
4551
4552 /*
4553 * Validate configuration and find all parent images.
4554 * It's sort of up side down from the image dependency tree.
4555 */
4556 bool fHostIP = false;
4557 bool fUseNewIo = false;
4558 bool fUseBlockCache = false;
4559 bool fDiscard = false;
4560 bool fInformAboutZeroBlocks = false;
4561 bool fSkipConsistencyChecks = false;
4562 bool fEmptyDrive = false;
4563 unsigned iLevel = 0;
4564 PCFGMNODE pCurNode = pCfg;
4565 uint32_t cbIoBufMax = 0;
4566
4567 for (;;)
4568 {
4569 bool fValid;
4570
4571 if (pCurNode == pCfg)
4572 {
4573 /* Toplevel configuration additionally contains the global image
4574 * open flags. Some might be converted to per-image flags later. */
4575 fValid = CFGMR3AreValuesValid(pCurNode,
4576 "Format\0Path\0"
4577 "ReadOnly\0MaybeReadOnly\0TempReadOnly\0Shareable\0HonorZeroWrites\0"
4578 "HostIPStack\0UseNewIo\0BootAcceleration\0BootAccelerationBuffer\0"
4579 "SetupMerge\0MergeSource\0MergeTarget\0BwGroup\0Type\0BlockCache\0"
4580 "CachePath\0CacheFormat\0Discard\0InformAboutZeroBlocks\0"
4581 "SkipConsistencyChecks\0"
4582 "Locked\0BIOSVisible\0Cylinders\0Heads\0Sectors\0Mountable\0"
4583 "EmptyDrive\0IoBufMax\0NonRotationalMedium\0"
4584#if defined(VBOX_PERIODIC_FLUSH) || defined(VBOX_IGNORE_FLUSH)
4585 "FlushInterval\0IgnoreFlush\0IgnoreFlushAsync\0"
4586#endif /* !(VBOX_PERIODIC_FLUSH || VBOX_IGNORE_FLUSH) */
4587 );
4588 }
4589 else
4590 {
4591 /* All other image configurations only contain image name and
4592 * the format information. */
4593 fValid = CFGMR3AreValuesValid(pCurNode, "Format\0Path\0"
4594 "MergeSource\0MergeTarget\0");
4595 }
4596 if (!fValid)
4597 {
4598 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_PDM_DRVINS_UNKNOWN_CFG_VALUES,
4599 RT_SRC_POS, N_("DrvVD: Configuration error: keys incorrect at level %d"), iLevel);
4600 break;
4601 }
4602
4603 if (pCurNode == pCfg)
4604 {
4605 rc = CFGMR3QueryBoolDef(pCurNode, "HostIPStack", &fHostIP, true);
4606 if (RT_FAILURE(rc))
4607 {
4608 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4609 N_("DrvVD: Configuration error: Querying \"HostIPStack\" as boolean failed"));
4610 break;
4611 }
4612
4613 rc = CFGMR3QueryBoolDef(pCurNode, "HonorZeroWrites", &fHonorZeroWrites, false);
4614 if (RT_FAILURE(rc))
4615 {
4616 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4617 N_("DrvVD: Configuration error: Querying \"HonorZeroWrites\" as boolean failed"));
4618 break;
4619 }
4620
4621 rc = CFGMR3QueryBoolDef(pCurNode, "ReadOnly", &fReadOnly, false);
4622 if (RT_FAILURE(rc))
4623 {
4624 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4625 N_("DrvVD: Configuration error: Querying \"ReadOnly\" as boolean failed"));
4626 break;
4627 }
4628
4629 rc = CFGMR3QueryBoolDef(pCurNode, "MaybeReadOnly", &fMaybeReadOnly, false);
4630 if (RT_FAILURE(rc))
4631 {
4632 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4633 N_("DrvVD: Configuration error: Querying \"MaybeReadOnly\" as boolean failed"));
4634 break;
4635 }
4636
4637 rc = CFGMR3QueryBoolDef(pCurNode, "TempReadOnly", &pThis->fTempReadOnly, false);
4638 if (RT_FAILURE(rc))
4639 {
4640 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4641 N_("DrvVD: Configuration error: Querying \"TempReadOnly\" as boolean failed"));
4642 break;
4643 }
4644 if (fReadOnly && pThis->fTempReadOnly)
4645 {
4646 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
4647 N_("DrvVD: Configuration error: Both \"ReadOnly\" and \"TempReadOnly\" are set"));
4648 break;
4649 }
4650
4651 rc = CFGMR3QueryBoolDef(pCurNode, "Shareable", &pThis->fShareable, false);
4652 if (RT_FAILURE(rc))
4653 {
4654 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4655 N_("DrvVD: Configuration error: Querying \"Shareable\" as boolean failed"));
4656 break;
4657 }
4658
4659 rc = CFGMR3QueryBoolDef(pCurNode, "UseNewIo", &fUseNewIo, false);
4660 if (RT_FAILURE(rc))
4661 {
4662 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4663 N_("DrvVD: Configuration error: Querying \"UseNewIo\" as boolean failed"));
4664 break;
4665 }
4666 rc = CFGMR3QueryBoolDef(pCurNode, "SetupMerge", &pThis->fMergePending, false);
4667 if (RT_FAILURE(rc))
4668 {
4669 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4670 N_("DrvVD: Configuration error: Querying \"SetupMerge\" as boolean failed"));
4671 break;
4672 }
4673 if (fReadOnly && pThis->fMergePending)
4674 {
4675 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
4676 N_("DrvVD: Configuration error: Both \"ReadOnly\" and \"MergePending\" are set"));
4677 break;
4678 }
4679 rc = CFGMR3QueryBoolDef(pCurNode, "BootAcceleration", &pThis->fBootAccelEnabled, false);
4680 if (RT_FAILURE(rc))
4681 {
4682 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4683 N_("DrvVD: Configuration error: Querying \"BootAcceleration\" as boolean failed"));
4684 break;
4685 }
4686 rc = CFGMR3QueryU32Def(pCurNode, "BootAccelerationBuffer", (uint32_t *)&pThis->cbBootAccelBuffer, 16 * _1K);
4687 if (RT_FAILURE(rc))
4688 {
4689 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4690 N_("DrvVD: Configuration error: Querying \"BootAccelerationBuffer\" as integer failed"));
4691 break;
4692 }
4693 rc = CFGMR3QueryBoolDef(pCurNode, "BlockCache", &fUseBlockCache, false);
4694 if (RT_FAILURE(rc))
4695 {
4696 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4697 N_("DrvVD: Configuration error: Querying \"BlockCache\" as boolean failed"));
4698 break;
4699 }
4700 rc = CFGMR3QueryStringAlloc(pCurNode, "BwGroup", &pThis->pszBwGroup);
4701 if (RT_FAILURE(rc) && rc != VERR_CFGM_VALUE_NOT_FOUND)
4702 {
4703 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4704 N_("DrvVD: Configuration error: Querying \"BwGroup\" as string failed"));
4705 break;
4706 }
4707 else
4708 rc = VINF_SUCCESS;
4709 rc = CFGMR3QueryBoolDef(pCurNode, "Discard", &fDiscard, false);
4710 if (RT_FAILURE(rc))
4711 {
4712 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4713 N_("DrvVD: Configuration error: Querying \"Discard\" as boolean failed"));
4714 break;
4715 }
4716 if (fReadOnly && fDiscard)
4717 {
4718 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
4719 N_("DrvVD: Configuration error: Both \"ReadOnly\" and \"Discard\" are set"));
4720 break;
4721 }
4722 rc = CFGMR3QueryBoolDef(pCurNode, "InformAboutZeroBlocks", &fInformAboutZeroBlocks, false);
4723 if (RT_FAILURE(rc))
4724 {
4725 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4726 N_("DrvVD: Configuration error: Querying \"InformAboutZeroBlocks\" as boolean failed"));
4727 break;
4728 }
4729 rc = CFGMR3QueryBoolDef(pCurNode, "SkipConsistencyChecks", &fSkipConsistencyChecks, true);
4730 if (RT_FAILURE(rc))
4731 {
4732 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4733 N_("DrvVD: Configuration error: Querying \"SKipConsistencyChecks\" as boolean failed"));
4734 break;
4735 }
4736
4737 char *psz = NULL;
4738 rc = CFGMR3QueryStringAlloc(pCfg, "Type", &psz);
4739 if (RT_FAILURE(rc))
4740 return PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_BLOCK_NO_TYPE, N_("Failed to obtain the sub type"));
4741 pThis->enmType = drvvdGetMediaTypeFromString(psz);
4742 if (pThis->enmType == PDMMEDIATYPE_ERROR)
4743 {
4744 PDMDrvHlpVMSetError(pDrvIns, VERR_PDM_BLOCK_UNKNOWN_TYPE, RT_SRC_POS,
4745 N_("Unknown type \"%s\""), psz);
4746 MMR3HeapFree(psz);
4747 return VERR_PDM_BLOCK_UNKNOWN_TYPE;
4748 }
4749 MMR3HeapFree(psz); psz = NULL;
4750
4751 rc = CFGMR3QueryStringAlloc(pCurNode, "CachePath", &pszCachePath);
4752 if (RT_FAILURE(rc) && rc != VERR_CFGM_VALUE_NOT_FOUND)
4753 {
4754 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4755 N_("DrvVD: Configuration error: Querying \"CachePath\" as string failed"));
4756 break;
4757 }
4758 else
4759 rc = VINF_SUCCESS;
4760
4761 if (pszCachePath)
4762 {
4763 rc = CFGMR3QueryStringAlloc(pCurNode, "CacheFormat", &pszCacheFormat);
4764 if (RT_FAILURE(rc))
4765 {
4766 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4767 N_("DrvVD: Configuration error: Querying \"CacheFormat\" as string failed"));
4768 break;
4769 }
4770 }
4771
4772 /* Mountable */
4773 rc = CFGMR3QueryBoolDef(pCfg, "Mountable", &pThis->fMountable, false);
4774 if (RT_FAILURE(rc))
4775 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"Mountable\" from the config"));
4776
4777 /* Locked */
4778 rc = CFGMR3QueryBoolDef(pCfg, "Locked", &pThis->fLocked, false);
4779 if (RT_FAILURE(rc))
4780 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"Locked\" from the config"));
4781
4782 /* BIOS visible */
4783 rc = CFGMR3QueryBoolDef(pCfg, "BIOSVisible", &pThis->fBiosVisible, true);
4784 if (RT_FAILURE(rc))
4785 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"BIOSVisible\" from the config"));
4786
4787 /* Cylinders */
4788 rc = CFGMR3QueryU32Def(pCfg, "Cylinders", &pThis->LCHSGeometry.cCylinders, 0);
4789 if (RT_FAILURE(rc))
4790 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"Cylinders\" from the config"));
4791
4792 /* Heads */
4793 rc = CFGMR3QueryU32Def(pCfg, "Heads", &pThis->LCHSGeometry.cHeads, 0);
4794 if (RT_FAILURE(rc))
4795 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"Heads\" from the config"));
4796
4797 /* Sectors */
4798 rc = CFGMR3QueryU32Def(pCfg, "Sectors", &pThis->LCHSGeometry.cSectors, 0);
4799 if (RT_FAILURE(rc))
4800 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"Sectors\" from the config"));
4801
4802 /* Uuid */
4803 rc = CFGMR3QueryStringAlloc(pCfg, "Uuid", &psz);
4804 if (rc == VERR_CFGM_VALUE_NOT_FOUND)
4805 RTUuidClear(&pThis->Uuid);
4806 else if (RT_SUCCESS(rc))
4807 {
4808 rc = RTUuidFromStr(&pThis->Uuid, psz);
4809 if (RT_FAILURE(rc))
4810 {
4811 PDMDrvHlpVMSetError(pDrvIns, rc, RT_SRC_POS, N_("Uuid from string failed on \"%s\""), psz);
4812 MMR3HeapFree(psz);
4813 return rc;
4814 }
4815 MMR3HeapFree(psz); psz = NULL;
4816 }
4817 else
4818 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"Uuid\" from the config"));
4819
4820#ifdef VBOX_PERIODIC_FLUSH
4821 rc = CFGMR3QueryU32Def(pCfg, "FlushInterval", &pThis->cbFlushInterval, 0);
4822 if (RT_FAILURE(rc))
4823 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"FlushInterval\" from the config"));
4824#endif /* VBOX_PERIODIC_FLUSH */
4825
4826#ifdef VBOX_IGNORE_FLUSH
4827 rc = CFGMR3QueryBoolDef(pCfg, "IgnoreFlush", &pThis->fIgnoreFlush, true);
4828 if (RT_FAILURE(rc))
4829 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"IgnoreFlush\" from the config"));
4830
4831 if (pThis->fIgnoreFlush)
4832 LogRel(("DrvVD: Flushes will be ignored\n"));
4833 else
4834 LogRel(("DrvVD: Flushes will be passed to the disk\n"));
4835
4836 rc = CFGMR3QueryBoolDef(pCfg, "IgnoreFlushAsync", &pThis->fIgnoreFlushAsync, false);
4837 if (RT_FAILURE(rc))
4838 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"IgnoreFlushAsync\" from the config"));
4839
4840 if (pThis->fIgnoreFlushAsync)
4841 LogRel(("DrvVD: Async flushes will be ignored\n"));
4842 else
4843 LogRel(("DrvVD: Async flushes will be passed to the disk\n"));
4844#endif /* VBOX_IGNORE_FLUSH */
4845
4846 rc = CFGMR3QueryBoolDef(pCurNode, "EmptyDrive", &fEmptyDrive, false);
4847 if (RT_FAILURE(rc))
4848 {
4849 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4850 N_("DrvVD: Configuration error: Querying \"EmptyDrive\" as boolean failed"));
4851 break;
4852 }
4853
4854 rc = CFGMR3QueryU32Def(pCfg, "IoBufMax", &cbIoBufMax, 5 * _1M);
4855 if (RT_FAILURE(rc))
4856 return PDMDRV_SET_ERROR(pDrvIns, rc, N_("Failed to query \"IoBufMax\" from the config"));
4857
4858 rc = CFGMR3QueryBoolDef(pCfg, "NonRotationalMedium", &pThis->fNonRotational, false);
4859 if (RT_FAILURE(rc))
4860 return PDMDRV_SET_ERROR(pDrvIns, rc,
4861 N_("DrvVD configuration error: Querying \"NonRotationalMedium\" as boolean failed"));
4862 }
4863
4864 PCFGMNODE pParent = CFGMR3GetChild(pCurNode, "Parent");
4865 if (!pParent)
4866 break;
4867 pCurNode = pParent;
4868 iLevel++;
4869 }
4870
4871 if (pThis->pDrvMediaExPort)
4872 rc = IOBUFMgrCreate(&pThis->hIoBufMgr, cbIoBufMax, pThis->pCfgCrypto ? IOBUFMGR_F_REQUIRE_NOT_PAGABLE : IOBUFMGR_F_DEFAULT);
4873
4874 if ( !fEmptyDrive
4875 && RT_SUCCESS(rc))
4876 {
4877 /*
4878 * Create the image container and the necessary interfaces.
4879 */
4880 if (RT_SUCCESS(rc))
4881 {
4882 /*
4883 * The image has a bandwidth group but the host cache is enabled.
4884 * Use the async I/O framework but tell it to enable the host cache.
4885 */
4886 if (!fUseNewIo && pThis->pszBwGroup)
4887 {
4888 pThis->fAsyncIoWithHostCache = true;
4889 fUseNewIo = true;
4890 }
4891
4892 /** @todo quick hack to work around problems in the async I/O
4893 * implementation (rw semaphore thread ownership problem)
4894 * while a merge is running. Remove once this is fixed. */
4895 if (pThis->fMergePending)
4896 fUseNewIo = false;
4897
4898 if (RT_SUCCESS(rc) && pThis->fMergePending)
4899 {
4900 rc = RTSemFastMutexCreate(&pThis->MergeCompleteMutex);
4901 if (RT_SUCCESS(rc))
4902 rc = RTSemRWCreate(&pThis->MergeLock);
4903 if (RT_SUCCESS(rc))
4904 {
4905 pThis->VDIfThreadSync.pfnStartRead = drvvdThreadStartRead;
4906 pThis->VDIfThreadSync.pfnFinishRead = drvvdThreadFinishRead;
4907 pThis->VDIfThreadSync.pfnStartWrite = drvvdThreadStartWrite;
4908 pThis->VDIfThreadSync.pfnFinishWrite = drvvdThreadFinishWrite;
4909
4910 rc = VDInterfaceAdd(&pThis->VDIfThreadSync.Core, "DrvVD_ThreadSync", VDINTERFACETYPE_THREADSYNC,
4911 pThis, sizeof(VDINTERFACETHREADSYNC), &pThis->pVDIfsDisk);
4912 }
4913 else
4914 {
4915 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4916 N_("DrvVD: Failed to create semaphores for \"MergePending\""));
4917 }
4918 }
4919
4920 if (RT_SUCCESS(rc))
4921 {
4922 rc = VDCreate(pThis->pVDIfsDisk, drvvdGetVDFromMediaType(pThis->enmType), &pThis->pDisk);
4923 /* Error message is already set correctly. */
4924 }
4925 }
4926
4927 if (pThis->pDrvMediaExPort && fUseNewIo)
4928 pThis->fAsyncIOSupported = true;
4929
4930 uint64_t tsStart = RTTimeNanoTS();
4931
4932 unsigned iImageIdx = 0;
4933 while (pCurNode && RT_SUCCESS(rc))
4934 {
4935 /* Allocate per-image data. */
4936 PVBOXIMAGE pImage = drvvdNewImage(pThis);
4937 if (!pImage)
4938 {
4939 rc = VERR_NO_MEMORY;
4940 break;
4941 }
4942
4943 /*
4944 * Read the image configuration.
4945 */
4946 rc = CFGMR3QueryStringAlloc(pCurNode, "Path", &pszName);
4947 if (RT_FAILURE(rc))
4948 {
4949 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4950 N_("DrvVD: Configuration error: Querying \"Path\" as string failed"));
4951 break;
4952 }
4953
4954 rc = CFGMR3QueryStringAlloc(pCurNode, "Format", &pszFormat);
4955 if (RT_FAILURE(rc))
4956 {
4957 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4958 N_("DrvVD: Configuration error: Querying \"Format\" as string failed"));
4959 break;
4960 }
4961
4962 bool fMergeSource;
4963 rc = CFGMR3QueryBoolDef(pCurNode, "MergeSource", &fMergeSource, false);
4964 if (RT_FAILURE(rc))
4965 {
4966 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4967 N_("DrvVD: Configuration error: Querying \"MergeSource\" as boolean failed"));
4968 break;
4969 }
4970 if (fMergeSource)
4971 {
4972 if (pThis->uMergeSource == VD_LAST_IMAGE)
4973 pThis->uMergeSource = iImageIdx;
4974 else
4975 {
4976 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
4977 N_("DrvVD: Configuration error: Multiple \"MergeSource\" occurrences"));
4978 break;
4979 }
4980 }
4981
4982 bool fMergeTarget;
4983 rc = CFGMR3QueryBoolDef(pCurNode, "MergeTarget", &fMergeTarget, false);
4984 if (RT_FAILURE(rc))
4985 {
4986 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
4987 N_("DrvVD: Configuration error: Querying \"MergeTarget\" as boolean failed"));
4988 break;
4989 }
4990 if (fMergeTarget)
4991 {
4992 if (pThis->uMergeTarget == VD_LAST_IMAGE)
4993 pThis->uMergeTarget = iImageIdx;
4994 else
4995 {
4996 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
4997 N_("DrvVD: Configuration error: Multiple \"MergeTarget\" occurrences"));
4998 break;
4999 }
5000 }
5001
5002 PCFGMNODE pCfgVDConfig = CFGMR3GetChild(pCurNode, "VDConfig");
5003 pImage->VDIfConfig.pfnAreKeysValid = drvvdCfgAreKeysValid;
5004 pImage->VDIfConfig.pfnQuerySize = drvvdCfgQuerySize;
5005 pImage->VDIfConfig.pfnQuery = drvvdCfgQuery;
5006 pImage->VDIfConfig.pfnQueryBytes = NULL;
5007 rc = VDInterfaceAdd(&pImage->VDIfConfig.Core, "DrvVD_Config", VDINTERFACETYPE_CONFIG,
5008 pCfgVDConfig, sizeof(VDINTERFACECONFIG), &pImage->pVDIfsImage);
5009 AssertRC(rc);
5010
5011 /* Check VDConfig for encryption config. */
5012 if (pCfgVDConfig)
5013 pThis->pCfgCrypto = CFGMR3GetChild(pCfgVDConfig, "CRYPT");
5014
5015 if (pThis->pCfgCrypto)
5016 {
5017 /* Setup VDConfig interface for disk encryption support. */
5018 pThis->VDIfCfg.pfnAreKeysValid = drvvdCfgAreKeysValid;
5019 pThis->VDIfCfg.pfnQuerySize = drvvdCfgQuerySize;
5020 pThis->VDIfCfg.pfnQuery = drvvdCfgQuery;
5021 pThis->VDIfCfg.pfnQueryBytes = NULL;
5022
5023 pThis->VDIfCrypto.pfnKeyRetain = drvvdCryptoKeyRetain;
5024 pThis->VDIfCrypto.pfnKeyRelease = drvvdCryptoKeyRelease;
5025 pThis->VDIfCrypto.pfnKeyStorePasswordRetain = drvvdCryptoKeyStorePasswordRetain;
5026 pThis->VDIfCrypto.pfnKeyStorePasswordRelease = drvvdCryptoKeyStorePasswordRelease;
5027 }
5028
5029 /* Unconditionally insert the TCPNET interface, don't bother to check
5030 * if an image really needs it. Will be ignored. Since the TCPNET
5031 * interface is per image we could make this more flexible in the
5032 * future if we want to. */
5033 /* Construct TCPNET callback table depending on the config. This is
5034 * done unconditionally, as uninterested backends will ignore it. */
5035 if (fHostIP)
5036 {
5037 pImage->VDIfTcpNet.pfnSocketCreate = drvvdTcpSocketCreate;
5038 pImage->VDIfTcpNet.pfnSocketDestroy = drvvdTcpSocketDestroy;
5039 pImage->VDIfTcpNet.pfnClientConnect = drvvdTcpClientConnect;
5040 pImage->VDIfTcpNet.pfnIsClientConnected = drvvdTcpIsClientConnected;
5041 pImage->VDIfTcpNet.pfnClientClose = drvvdTcpClientClose;
5042 pImage->VDIfTcpNet.pfnSelectOne = drvvdTcpSelectOne;
5043 pImage->VDIfTcpNet.pfnRead = drvvdTcpRead;
5044 pImage->VDIfTcpNet.pfnWrite = drvvdTcpWrite;
5045 pImage->VDIfTcpNet.pfnSgWrite = drvvdTcpSgWrite;
5046 pImage->VDIfTcpNet.pfnReadNB = drvvdTcpReadNB;
5047 pImage->VDIfTcpNet.pfnWriteNB = drvvdTcpWriteNB;
5048 pImage->VDIfTcpNet.pfnSgWriteNB = drvvdTcpSgWriteNB;
5049 pImage->VDIfTcpNet.pfnFlush = drvvdTcpFlush;
5050 pImage->VDIfTcpNet.pfnSetSendCoalescing = drvvdTcpSetSendCoalescing;
5051 pImage->VDIfTcpNet.pfnGetLocalAddress = drvvdTcpGetLocalAddress;
5052 pImage->VDIfTcpNet.pfnGetPeerAddress = drvvdTcpGetPeerAddress;
5053
5054 /*
5055 * There is a 15ms delay between receiving the data and marking the socket
5056 * as readable on Windows XP which hurts async I/O performance of
5057 * TCP backends badly. Provide a different select method without
5058 * using poll on XP.
5059 * This is only used on XP because it is not as efficient as the one using poll
5060 * and all other Windows versions are working fine.
5061 */
5062 char szOS[64];
5063 memset(szOS, 0, sizeof(szOS));
5064 rc = RTSystemQueryOSInfo(RTSYSOSINFO_PRODUCT, &szOS[0], sizeof(szOS));
5065
5066 if (RT_SUCCESS(rc) && !strncmp(szOS, "Windows XP", 10))
5067 {
5068 LogRel(("VD: Detected Windows XP, disabled poll based waiting for TCP\n"));
5069 pImage->VDIfTcpNet.pfnSelectOneEx = drvvdTcpSelectOneExNoPoll;
5070 }
5071 else
5072 pImage->VDIfTcpNet.pfnSelectOneEx = drvvdTcpSelectOneExPoll;
5073
5074 pImage->VDIfTcpNet.pfnPoke = drvvdTcpPoke;
5075 }
5076 else
5077 {
5078#ifndef VBOX_WITH_INIP
5079 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_PDM_DRVINS_UNKNOWN_CFG_VALUES,
5080 RT_SRC_POS, N_("DrvVD: Configuration error: TCP over Internal Networking not compiled in"));
5081#else /* VBOX_WITH_INIP */
5082 pImage->VDIfTcpNet.pfnSocketCreate = drvvdINIPSocketCreate;
5083 pImage->VDIfTcpNet.pfnSocketDestroy = drvvdINIPSocketDestroy;
5084 pImage->VDIfTcpNet.pfnClientConnect = drvvdINIPClientConnect;
5085 pImage->VDIfTcpNet.pfnClientClose = drvvdINIPClientClose;
5086 pImage->VDIfTcpNet.pfnIsClientConnected = drvvdINIPIsClientConnected;
5087 pImage->VDIfTcpNet.pfnSelectOne = drvvdINIPSelectOne;
5088 pImage->VDIfTcpNet.pfnRead = drvvdINIPRead;
5089 pImage->VDIfTcpNet.pfnWrite = drvvdINIPWrite;
5090 pImage->VDIfTcpNet.pfnSgWrite = drvvdINIPSgWrite;
5091 pImage->VDIfTcpNet.pfnFlush = drvvdINIPFlush;
5092 pImage->VDIfTcpNet.pfnSetSendCoalescing = drvvdINIPSetSendCoalescing;
5093 pImage->VDIfTcpNet.pfnGetLocalAddress = drvvdINIPGetLocalAddress;
5094 pImage->VDIfTcpNet.pfnGetPeerAddress = drvvdINIPGetPeerAddress;
5095 pImage->VDIfTcpNet.pfnSelectOneEx = drvvdINIPSelectOneEx;
5096 pImage->VDIfTcpNet.pfnPoke = drvvdINIPPoke;
5097#endif /* VBOX_WITH_INIP */
5098 }
5099 rc = VDInterfaceAdd(&pImage->VDIfTcpNet.Core, "DrvVD_TCPNET",
5100 VDINTERFACETYPE_TCPNET, NULL,
5101 sizeof(VDINTERFACETCPNET), &pImage->pVDIfsImage);
5102 AssertRC(rc);
5103
5104 /* Insert the custom I/O interface only if we're told to use new IO.
5105 * Since the I/O interface is per image we could make this more
5106 * flexible in the future if we want to. */
5107 if (fUseNewIo)
5108 {
5109#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION
5110 pImage->VDIfIo.pfnOpen = drvvdAsyncIOOpen;
5111 pImage->VDIfIo.pfnClose = drvvdAsyncIOClose;
5112 pImage->VDIfIo.pfnGetSize = drvvdAsyncIOGetSize;
5113 pImage->VDIfIo.pfnSetSize = drvvdAsyncIOSetSize;
5114 pImage->VDIfIo.pfnSetAllocationSize = drvvdAsyncIOSetAllocationSize;
5115 pImage->VDIfIo.pfnReadSync = drvvdAsyncIOReadSync;
5116 pImage->VDIfIo.pfnWriteSync = drvvdAsyncIOWriteSync;
5117 pImage->VDIfIo.pfnFlushSync = drvvdAsyncIOFlushSync;
5118 pImage->VDIfIo.pfnReadAsync = drvvdAsyncIOReadAsync;
5119 pImage->VDIfIo.pfnWriteAsync = drvvdAsyncIOWriteAsync;
5120 pImage->VDIfIo.pfnFlushAsync = drvvdAsyncIOFlushAsync;
5121#else /* !VBOX_WITH_PDM_ASYNC_COMPLETION */
5122 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_PDM_DRVINS_UNKNOWN_CFG_VALUES,
5123 RT_SRC_POS, N_("DrvVD: Configuration error: Async Completion Framework not compiled in"));
5124#endif /* !VBOX_WITH_PDM_ASYNC_COMPLETION */
5125 if (RT_SUCCESS(rc))
5126 rc = VDInterfaceAdd(&pImage->VDIfIo.Core, "DrvVD_IO", VDINTERFACETYPE_IO,
5127 pThis, sizeof(VDINTERFACEIO), &pImage->pVDIfsImage);
5128 AssertRC(rc);
5129 }
5130
5131 /*
5132 * Open the image.
5133 */
5134 unsigned uOpenFlags;
5135 if (fReadOnly || pThis->fTempReadOnly || iLevel != 0)
5136 uOpenFlags = VD_OPEN_FLAGS_READONLY;
5137 else
5138 uOpenFlags = VD_OPEN_FLAGS_NORMAL;
5139 if (fHonorZeroWrites)
5140 uOpenFlags |= VD_OPEN_FLAGS_HONOR_ZEROES;
5141 if (pThis->fAsyncIOSupported)
5142 uOpenFlags |= VD_OPEN_FLAGS_ASYNC_IO;
5143 if (pThis->fShareable)
5144 uOpenFlags |= VD_OPEN_FLAGS_SHAREABLE;
5145 if (fDiscard && iLevel == 0)
5146 uOpenFlags |= VD_OPEN_FLAGS_DISCARD;
5147 if (fInformAboutZeroBlocks)
5148 uOpenFlags |= VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS;
5149 if ( (uOpenFlags & VD_OPEN_FLAGS_READONLY)
5150 && fSkipConsistencyChecks)
5151 uOpenFlags |= VD_OPEN_FLAGS_SKIP_CONSISTENCY_CHECKS;
5152
5153 /* Try to open backend in async I/O mode first. */
5154 rc = VDOpen(pThis->pDisk, pszFormat, pszName, uOpenFlags, pImage->pVDIfsImage);
5155 if (rc == VERR_NOT_SUPPORTED)
5156 {
5157 pThis->fAsyncIOSupported = false;
5158 uOpenFlags &= ~VD_OPEN_FLAGS_ASYNC_IO;
5159 rc = VDOpen(pThis->pDisk, pszFormat, pszName, uOpenFlags, pImage->pVDIfsImage);
5160 }
5161
5162 if (rc == VERR_VD_DISCARD_NOT_SUPPORTED)
5163 {
5164 fDiscard = false;
5165 uOpenFlags &= ~VD_OPEN_FLAGS_DISCARD;
5166 rc = VDOpen(pThis->pDisk, pszFormat, pszName, uOpenFlags, pImage->pVDIfsImage);
5167 }
5168
5169 if (!fDiscard)
5170 {
5171 pThis->IMedia.pfnDiscard = NULL;
5172 pThis->IMediaEx.pfnIoReqDiscard = NULL;
5173 }
5174
5175 if (RT_SUCCESS(rc))
5176 {
5177 LogFunc(("%d - Opened '%s' in %s mode\n",
5178 iLevel, pszName,
5179 VDIsReadOnly(pThis->pDisk) ? "read-only" : "read-write"));
5180 if ( VDIsReadOnly(pThis->pDisk)
5181 && !fReadOnly
5182 && !fMaybeReadOnly
5183 && !pThis->fTempReadOnly
5184 && iLevel == 0)
5185 {
5186 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_VD_IMAGE_READ_ONLY, RT_SRC_POS,
5187 N_("Failed to open image '%s' for writing due to wrong permissions"),
5188 pszName);
5189 break;
5190 }
5191 }
5192 else
5193 {
5194 rc = PDMDrvHlpVMSetError(pDrvIns, rc, RT_SRC_POS,
5195 N_("Failed to open image '%s' in %s mode"), pszName,
5196 (uOpenFlags & VD_OPEN_FLAGS_READONLY) ? "read-only" : "read-write");
5197 break;
5198 }
5199
5200 MMR3HeapFree(pszName);
5201 pszName = NULL;
5202 MMR3HeapFree(pszFormat);
5203 pszFormat = NULL;
5204
5205 /* next */
5206 iLevel--;
5207 iImageIdx++;
5208 pCurNode = CFGMR3GetParent(pCurNode);
5209 }
5210
5211 LogRel(("VD: Opening the disk took %lld ns\n", RTTimeNanoTS() - tsStart));
5212
5213 /* Open the cache image if set. */
5214 if ( RT_SUCCESS(rc)
5215 && RT_VALID_PTR(pszCachePath))
5216 {
5217 /* Insert the custom I/O interface only if we're told to use new IO.
5218 * Since the I/O interface is per image we could make this more
5219 * flexible in the future if we want to. */
5220 if (fUseNewIo)
5221 {
5222#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION
5223 pThis->VDIfIoCache.pfnOpen = drvvdAsyncIOOpen;
5224 pThis->VDIfIoCache.pfnClose = drvvdAsyncIOClose;
5225 pThis->VDIfIoCache.pfnGetSize = drvvdAsyncIOGetSize;
5226 pThis->VDIfIoCache.pfnSetSize = drvvdAsyncIOSetSize;
5227 pThis->VDIfIoCache.pfnReadSync = drvvdAsyncIOReadSync;
5228 pThis->VDIfIoCache.pfnWriteSync = drvvdAsyncIOWriteSync;
5229 pThis->VDIfIoCache.pfnFlushSync = drvvdAsyncIOFlushSync;
5230 pThis->VDIfIoCache.pfnReadAsync = drvvdAsyncIOReadAsync;
5231 pThis->VDIfIoCache.pfnWriteAsync = drvvdAsyncIOWriteAsync;
5232 pThis->VDIfIoCache.pfnFlushAsync = drvvdAsyncIOFlushAsync;
5233#else /* !VBOX_WITH_PDM_ASYNC_COMPLETION */
5234 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_PDM_DRVINS_UNKNOWN_CFG_VALUES,
5235 RT_SRC_POS, N_("DrvVD: Configuration error: Async Completion Framework not compiled in"));
5236#endif /* !VBOX_WITH_PDM_ASYNC_COMPLETION */
5237 if (RT_SUCCESS(rc))
5238 rc = VDInterfaceAdd(&pThis->VDIfIoCache.Core, "DrvVD_IO", VDINTERFACETYPE_IO,
5239 pThis, sizeof(VDINTERFACEIO), &pThis->pVDIfsCache);
5240 AssertRC(rc);
5241 }
5242
5243 rc = VDCacheOpen(pThis->pDisk, pszCacheFormat, pszCachePath, VD_OPEN_FLAGS_NORMAL, pThis->pVDIfsCache);
5244 if (RT_FAILURE(rc))
5245 rc = PDMDRV_SET_ERROR(pDrvIns, rc, N_("DrvVD: Could not open cache image"));
5246 }
5247
5248 if (RT_VALID_PTR(pszCachePath))
5249 MMR3HeapFree(pszCachePath);
5250 if (RT_VALID_PTR(pszCacheFormat))
5251 MMR3HeapFree(pszCacheFormat);
5252
5253 if ( RT_SUCCESS(rc)
5254 && pThis->fMergePending
5255 && ( pThis->uMergeSource == VD_LAST_IMAGE
5256 || pThis->uMergeTarget == VD_LAST_IMAGE))
5257 {
5258 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
5259 N_("DrvVD: Configuration error: Inconsistent image merge data"));
5260 }
5261
5262 /* Create the block cache if enabled. */
5263 if ( fUseBlockCache
5264 && !pThis->fShareable
5265 && !fDiscard
5266 && !pThis->pCfgCrypto /* Disk encryption disables the block cache for security reasons */
5267 && RT_SUCCESS(rc))
5268 {
5269 /*
5270 * We need a unique ID for the block cache (to identify the owner of data
5271 * blocks in a saved state). UUIDs are not really suitable because
5272 * there are image formats which don't support them. Furthermore it is
5273 * possible that a new diff image was attached after a saved state
5274 * which changes the UUID.
5275 * However the device "name + device instance + LUN" triple the disk is
5276 * attached to is always constant for saved states.
5277 */
5278 char *pszId = NULL;
5279 uint32_t iInstance, iLUN;
5280 const char *pcszController;
5281
5282 rc = pThis->pDrvMediaPort->pfnQueryDeviceLocation(pThis->pDrvMediaPort, &pcszController,
5283 &iInstance, &iLUN);
5284 if (RT_FAILURE(rc))
5285 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
5286 N_("DrvVD: Configuration error: Could not query device data"));
5287 else
5288 {
5289 int cbStr = RTStrAPrintf(&pszId, "%s-%d-%d", pcszController, iInstance, iLUN);
5290
5291 if (cbStr > 0)
5292 {
5293 rc = PDMDrvHlpBlkCacheRetain(pDrvIns, &pThis->pBlkCache,
5294 drvvdBlkCacheXferCompleteIoReq,
5295 drvvdBlkCacheXferEnqueue,
5296 drvvdBlkCacheXferEnqueueDiscard,
5297 pszId);
5298 if (rc == VERR_NOT_SUPPORTED)
5299 {
5300 LogRel(("VD: Block cache is not supported\n"));
5301 rc = VINF_SUCCESS;
5302 }
5303 else
5304 AssertRC(rc);
5305
5306 RTStrFree(pszId);
5307 }
5308 else
5309 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
5310 N_("DrvVD: Out of memory when creating block cache"));
5311 }
5312 }
5313
5314 if (RT_SUCCESS(rc))
5315 rc = drvvdSetupFilters(pThis, pCfg);
5316
5317 /*
5318 * Register a load-done callback so we can undo TempReadOnly config before
5319 * we get to drvvdResume. Automatically deregistered upon destruction.
5320 */
5321 if (RT_SUCCESS(rc))
5322 rc = PDMDrvHlpSSMRegisterEx(pDrvIns, 0 /* version */, 0 /* cbGuess */,
5323 NULL /*pfnLivePrep*/, NULL /*pfnLiveExec*/, NULL /*pfnLiveVote*/,
5324 NULL /*pfnSavePrep*/, NULL /*pfnSaveExec*/, NULL /*pfnSaveDone*/,
5325 NULL /*pfnDonePrep*/, NULL /*pfnLoadExec*/, drvvdLoadDone);
5326
5327 /* Setup the boot acceleration stuff if enabled. */
5328 if (RT_SUCCESS(rc) && pThis->fBootAccelEnabled)
5329 {
5330 pThis->cbDisk = VDGetSize(pThis->pDisk, VD_LAST_IMAGE);
5331 Assert(pThis->cbDisk > 0);
5332 pThis->pbData = (uint8_t *)RTMemAllocZ(pThis->cbBootAccelBuffer);
5333 if (pThis->pbData)
5334 {
5335 pThis->fBootAccelActive = true;
5336 pThis->offDisk = 0;
5337 pThis->cbDataValid = 0;
5338 LogRel(("VD: Boot acceleration enabled\n"));
5339 }
5340 else
5341 LogRel(("VD: Boot acceleration, out of memory, disabled\n"));
5342 }
5343
5344 if ( RTUuidIsNull(&pThis->Uuid)
5345 && pThis->enmType == PDMMEDIATYPE_HARD_DISK)
5346 VDGetUuid(pThis->pDisk, 0, &pThis->Uuid);
5347
5348 /*
5349 * Automatically upgrade the floppy drive if the specified one is too
5350 * small to represent the whole boot time image. (We cannot do this later
5351 * since the BIOS (and others) gets the info via CMOS.)
5352 *
5353 * This trick should make 2.88 images as well as the fake 15.6 and 63.5 MB
5354 * images despite the hardcoded default 1.44 drive.
5355 */
5356 if ( PDMMEDIATYPE_IS_FLOPPY(pThis->enmType)
5357 && pThis->pDisk)
5358 {
5359 uint64_t const cbFloppyImg = VDGetSize(pThis->pDisk, VD_LAST_IMAGE);
5360 PDMMEDIATYPE const enmCfgType = pThis->enmType;
5361 switch (enmCfgType)
5362 {
5363 default:
5364 AssertFailed();
5365 case PDMMEDIATYPE_FLOPPY_360:
5366 if (cbFloppyImg > 40 * 2 * 9 * 512)
5367 pThis->enmType = PDMMEDIATYPE_FLOPPY_720;
5368 /* fall thru */
5369 case PDMMEDIATYPE_FLOPPY_720:
5370 if (cbFloppyImg > 80 * 2 * 14 * 512)
5371 pThis->enmType = PDMMEDIATYPE_FLOPPY_1_20;
5372 /* fall thru */
5373 case PDMMEDIATYPE_FLOPPY_1_20:
5374 if (cbFloppyImg > 80 * 2 * 20 * 512)
5375 pThis->enmType = PDMMEDIATYPE_FLOPPY_1_44;
5376 /* fall thru */
5377 case PDMMEDIATYPE_FLOPPY_1_44:
5378 if (cbFloppyImg > 80 * 2 * 24 * 512)
5379 pThis->enmType = PDMMEDIATYPE_FLOPPY_2_88;
5380 /* fall thru */
5381 case PDMMEDIATYPE_FLOPPY_2_88:
5382 if (cbFloppyImg > 80 * 2 * 48 * 512)
5383 pThis->enmType = PDMMEDIATYPE_FLOPPY_FAKE_15_6;
5384 /* fall thru */
5385 case PDMMEDIATYPE_FLOPPY_FAKE_15_6:
5386 if (cbFloppyImg > 255 * 2 * 63 * 512)
5387 pThis->enmType = PDMMEDIATYPE_FLOPPY_FAKE_63_5;
5388 case PDMMEDIATYPE_FLOPPY_FAKE_63_5:
5389 if (cbFloppyImg > 255 * 2 * 255 * 512)
5390 LogRel(("Warning: Floppy image is larger that 63.5 MB! (%llu bytes)\n", cbFloppyImg));
5391 break;
5392 }
5393 if (pThis->enmType != enmCfgType)
5394 LogRel(("DrvVD: Automatically upgraded floppy drive from %s to %s to better support the %u byte image\n",
5395 drvvdGetTypeName(enmCfgType), drvvdGetTypeName(pThis->enmType), cbFloppyImg));
5396 }
5397 } /* !fEmptyDrive */
5398
5399 PDMDrvHlpSTAMRegCounterEx(pDrvIns, &pThis->StatQueryBufAttempts, "QueryBufAttempts",
5400 STAMUNIT_COUNT, "Number of attempts to query a direct buffer.");
5401 PDMDrvHlpSTAMRegCounterEx(pDrvIns, &pThis->StatQueryBufSuccess, "QueryBufSuccess",
5402 STAMUNIT_COUNT, "Number of succeeded attempts to query a direct buffer.");
5403
5404 if (RT_FAILURE(rc))
5405 {
5406 if (RT_VALID_PTR(pszName))
5407 MMR3HeapFree(pszName);
5408 if (RT_VALID_PTR(pszFormat))
5409 MMR3HeapFree(pszFormat);
5410 /* drvvdDestruct does the rest. */
5411 }
5412
5413 LogFlowFunc(("returns %Rrc\n", rc));
5414 return rc;
5415}
5416
5417/**
5418 * VBox disk container media driver registration record.
5419 */
5420const PDMDRVREG g_DrvVD =
5421{
5422 /* u32Version */
5423 PDM_DRVREG_VERSION,
5424 /* szName */
5425 "VD",
5426 /* szRCMod */
5427 "",
5428 /* szR0Mod */
5429 "",
5430 /* pszDescription */
5431 "Generic VBox disk media driver.",
5432 /* fFlags */
5433 PDM_DRVREG_FLAGS_HOST_BITS_DEFAULT,
5434 /* fClass. */
5435 PDM_DRVREG_CLASS_MEDIA,
5436 /* cMaxInstances */
5437 ~0U,
5438 /* cbInstance */
5439 sizeof(VBOXDISK),
5440 /* pfnConstruct */
5441 drvvdConstruct,
5442 /* pfnDestruct */
5443 drvvdDestruct,
5444 /* pfnRelocate */
5445 NULL,
5446 /* pfnIOCtl */
5447 NULL,
5448 /* pfnPowerOn */
5449 drvvdPowerOn,
5450 /* pfnReset */
5451 drvvdReset,
5452 /* pfnSuspend */
5453 drvvdSuspend,
5454 /* pfnResume */
5455 drvvdResume,
5456 /* pfnAttach */
5457 NULL,
5458 /* pfnDetach */
5459 NULL,
5460 /* pfnPowerOff */
5461 drvvdPowerOff,
5462 /* pfnSoftReset */
5463 NULL,
5464 /* u32EndVersion */
5465 PDM_DRVREG_VERSION
5466};
5467
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette