VirtualBox

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77770

Last change on this file since 77770 was 77770, checked in by vboxsync, 6 years ago

linux/vboxsf: Redid the code that synchronizes writes with mmappings (page cache). Probably way faster to copy over the data written than kick out page mappings. It certainly works better with older kernels. bugref:9172

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 103.5 KB
Line 
1/* $Id: regops.c 77770 2019-03-18 19:36:48Z vboxsync $ */
2/** @file
3 * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
16 * conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 */
30
31
32/*********************************************************************************************************************************
33* Header Files *
34*********************************************************************************************************************************/
35#include "vfsmod.h"
36#include <linux/uio.h>
37#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
38# include <linux/aio.h> /* struct kiocb before 4.1 */
39#endif
40#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41# include <linux/buffer_head.h>
42#endif
43#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
44 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
45# include <linux/writeback.h>
46#endif
47#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
48 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
49# include <linux/splice.h>
50#endif
51#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
52# include <linux/swap.h> /* for mark_page_accessed */
53#endif
54#include <iprt/err.h>
55
56#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
57# define SEEK_END 2
58#endif
59
60#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
61# define iter_is_iovec(a_pIter) ( !((a_pIter)->type & (ITER_KVEC | ITER_BVEC)) )
62#endif
63
64#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0)
65# define vm_fault_t int
66#endif
67
68#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 20)
69# define pgoff_t unsigned long
70#endif
71
72
73/*********************************************************************************************************************************
74* Structures and Typedefs *
75*********************************************************************************************************************************/
76#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
77/** Used by vbsf_iter_lock_pages() to keep the first page of the next segment. */
78struct vbsf_iter_stash {
79 struct page *pPage;
80 size_t off;
81 size_t cb;
82# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
83 size_t offFromEnd;
84 struct iov_iter Copy;
85# endif
86};
87#endif /* >= 3.16.0 */
88/** Initializer for struct vbsf_iter_stash. */
89#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
90# define VBSF_ITER_STASH_INITIALIZER { NULL, 0 }
91#else
92# define VBSF_ITER_STASH_INITIALIZER { NULL, 0, ~(size_t)0 }
93#endif
94
95
96
97/**
98 * Called when an inode is released to unlink all handles that might impossibly
99 * still be associated with it.
100 *
101 * @param pInodeInfo The inode which handles to drop.
102 */
103void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
104{
105 struct vbsf_handle *pCur, *pNext;
106 unsigned long fSavedFlags;
107 SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
108 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
109
110 RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) {
111 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
112 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
113 pCur->fFlags |= VBSF_HANDLE_F_ON_LIST;
114 RTListNodeRemove(&pCur->Entry);
115 }
116
117 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
118}
119
120
121/**
122 * Locates a handle that matches all the flags in @a fFlags.
123 *
124 * @returns Pointer to handle on success (retained), use vbsf_handle_release() to
125 * release it. NULL if no suitable handle was found.
126 * @param pInodeInfo The inode info to search.
127 * @param fFlagsSet The flags that must be set.
128 * @param fFlagsClear The flags that must be clear.
129 */
130struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
131{
132 struct vbsf_handle *pCur;
133 unsigned long fSavedFlags;
134 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
135
136 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
137 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
138 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
139 if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
140 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
141 if (cRefs > 1) {
142 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
143 SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
144 return pCur;
145 }
146 /* Oops, already being closed (safe as it's only ever increased here). */
147 ASMAtomicDecU32(&pCur->cRefs);
148 }
149 }
150
151 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
152 SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
153 return NULL;
154}
155
156
157/**
158 * Slow worker for vbsf_handle_release() that does the freeing.
159 *
160 * @returns 0 (ref count).
161 * @param pHandle The handle to release.
162 * @param sf_g The info structure for the shared folder associated
163 * with the handle.
164 * @param pszCaller The caller name (for logging failures).
165 */
166uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *sf_g, const char *pszCaller)
167{
168 int rc;
169 unsigned long fSavedFlags;
170
171 SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
172
173 /*
174 * Remove from the list.
175 */
176 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
177
178 AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
179 Assert(pHandle->pInodeInfo);
180 Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
181
182 if (pHandle->fFlags & VBSF_HANDLE_F_ON_LIST) {
183 pHandle->fFlags &= ~VBSF_HANDLE_F_ON_LIST;
184 RTListNodeRemove(&pHandle->Entry);
185 }
186
187 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
188
189 /*
190 * Actually destroy it.
191 */
192 rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
193 if (RT_FAILURE(rc))
194 LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
195 pHandle->hHost = SHFL_HANDLE_NIL;
196 pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD;
197 kfree(pHandle);
198 return 0;
199}
200
201
202/**
203 * Appends a handle to a handle list.
204 *
205 * @param pInodeInfo The inode to add it to.
206 * @param pHandle The handle to add.
207 */
208void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle)
209{
210#ifdef VBOX_STRICT
211 struct vbsf_handle *pCur;
212#endif
213 unsigned long fSavedFlags;
214
215 SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
216 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
217 ("%p %#x\n", pHandle, pHandle->fFlags));
218 Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
219
220 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
221
222 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
223 ("%p %#x\n", pHandle, pHandle->fFlags));
224#ifdef VBOX_STRICT
225 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
226 Assert(pCur != pHandle);
227 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
228 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
229 }
230 pHandle->pInodeInfo = pInodeInfo;
231#endif
232
233 pHandle->fFlags |= VBSF_HANDLE_F_ON_LIST;
234 RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
235
236 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
237}
238
239
240#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
241 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
242
243/*
244 * Some pipe stuff we apparently need for 2.6.23-2.6.30.
245 */
246
247static void vbsf_free_pipebuf(struct page *kpage)
248{
249 kunmap(kpage);
250 __free_pages(kpage, 0);
251}
252
253static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, int atomic)
254{
255 return 0;
256}
257
258static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
259{
260}
261
262static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, void *map_data)
263{
264}
265
266static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
267{
268 return 0;
269}
270
271static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
272{
273 vbsf_free_pipebuf(pipe_buf->page);
274}
275
276static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pipe_buf)
277{
278 return 0;
279}
280
281static struct pipe_buf_operations vbsf_pipe_buf_ops = {
282 .can_merge = 0,
283 .map = vbsf_pipe_buf_map,
284 .unmap = vbsf_pipe_buf_unmap,
285 .confirm = vbsf_pipe_buf_confirm,
286 .release = vbsf_pipe_buf_release,
287 .steal = vbsf_pipe_buf_steal,
288 .get = vbsf_pipe_buf_get,
289};
290
291static int vbsf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
292 void *buf, uint32_t *nread, uint64_t pos)
293{
294 int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
295 if (RT_FAILURE(rc)) {
296 LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
297 rc));
298 return -EPROTO;
299 }
300 return 0;
301}
302
303# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
304# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
305
306ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
307{
308 size_t bytes_remaining = len;
309 loff_t orig_offset = *poffset;
310 loff_t offset = orig_offset;
311 struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
312 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
313 struct vbsf_reg_info *sf_r = in->private_data;
314 ssize_t retval;
315 struct page *kpage = 0;
316 size_t nsent = 0;
317
318/** @todo rig up a FsPerf test for this code */
319 TRACE();
320 if (!S_ISREG(inode->i_mode)) {
321 LogFunc(("read from non regular file %d\n", inode->i_mode));
322 return -EINVAL;
323 }
324 if (!len) {
325 return 0;
326 }
327
328 LOCK_PIPE(pipe);
329
330 uint32_t req_size = 0;
331 while (bytes_remaining > 0) {
332 kpage = alloc_page(GFP_KERNEL);
333 if (unlikely(kpage == NULL)) {
334 UNLOCK_PIPE(pipe);
335 return -ENOMEM;
336 }
337 req_size = 0;
338 uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
339 uint32_t chunk = 0;
340 void *kbuf = kmap(kpage);
341 while (chunk < req_size) {
342 retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
343 if (retval < 0)
344 goto err;
345 if (nread == 0)
346 break;
347 chunk += nread;
348 offset += nread;
349 nread = req_size - chunk;
350 }
351 if (!pipe->readers) {
352 send_sig(SIGPIPE, current, 0);
353 retval = -EPIPE;
354 goto err;
355 }
356 if (pipe->nrbufs < PIPE_BUFFERS) {
357 struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
358 pipebuf->page = kpage;
359 pipebuf->ops = &vbsf_pipe_buf_ops;
360 pipebuf->len = req_size;
361 pipebuf->offset = 0;
362 pipebuf->private = 0;
363 pipebuf->flags = 0;
364 pipe->nrbufs++;
365 nsent += req_size;
366 bytes_remaining -= req_size;
367 if (signal_pending(current))
368 break;
369 } else { /* pipe full */
370
371 if (flags & SPLICE_F_NONBLOCK) {
372 retval = -EAGAIN;
373 goto err;
374 }
375 vbsf_free_pipebuf(kpage);
376 break;
377 }
378 }
379 UNLOCK_PIPE(pipe);
380 if (!nsent && signal_pending(current))
381 return -ERESTARTSYS;
382 *poffset += nsent;
383 return offset - orig_offset;
384
385 err:
386 UNLOCK_PIPE(pipe);
387 vbsf_free_pipebuf(kpage);
388 return retval;
389}
390
391#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
392
393/**
394 * Helper for deciding wheter we should do a read via the page cache or not.
395 *
396 * By default we will only use the page cache if there is a writable memory
397 * mapping of the file with a chance that it may have modified any of the pages
398 * already.
399 */
400DECLINLINE(bool) vbsf_should_use_cached_read(struct file *file, struct address_space *mapping, struct vbsf_super_info *sf_g)
401{
402 return mapping
403 && mapping->nrpages > 0
404 && mapping_writably_mapped(mapping)
405 && !(file->f_flags & O_DIRECT)
406 && 1 /** @todo make this behaviour configurable at mount time (sf_g) */;
407}
408
409/** Wrapper around put_page / page_cache_release. */
410DECLINLINE(void) vbsf_put_page(struct page *pPage)
411{
412#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
413 put_page(pPage);
414#else
415 page_cache_release(pPage);
416#endif
417}
418
419
420/** Wrapper around get_page / page_cache_get. */
421DECLINLINE(void) vbsf_get_page(struct page *pPage)
422{
423#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
424 get_page(pPage);
425#else
426 page_cache_get(pPage);
427#endif
428}
429
430
431/** Companion to vbsf_lock_user_pages(). */
432DECLINLINE(void) vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack)
433{
434 /* We don't mark kernel pages dirty: */
435 if (fLockPgHack)
436 fSetDirty = false;
437
438 while (cPages-- > 0)
439 {
440 struct page *pPage = papPages[cPages];
441 if (fSetDirty && !PageReserved(pPage))
442 SetPageDirty(pPage);
443 vbsf_put_page(pPage);
444 }
445}
446
447
448/**
449 * Worker for vbsf_lock_user_pages_failed_check_kernel() and
450 * vbsf_iter_lock_pages().
451 */
452static int vbsf_lock_kernel_pages(uint8_t *pbStart, bool fWrite, size_t cPages, struct page **papPages)
453{
454 uintptr_t const uPtrFrom = (uintptr_t)pbStart;
455 uintptr_t const uPtrLast = (uPtrFrom & ~(uintptr_t)PAGE_OFFSET_MASK) + (cPages << PAGE_SHIFT) - 1;
456 uint8_t *pbPage = (uint8_t *)uPtrLast;
457 size_t iPage = cPages;
458
459 /*
460 * Touch the pages first (paranoia^2).
461 */
462 if (fWrite) {
463 uint8_t volatile *pbProbe = (uint8_t volatile *)uPtrFrom;
464 while (iPage-- > 0) {
465 *pbProbe = *pbProbe;
466 pbProbe += PAGE_SIZE;
467 }
468 } else {
469 uint8_t const *pbProbe = (uint8_t const *)uPtrFrom;
470 while (iPage-- > 0) {
471 ASMProbeReadByte(pbProbe);
472 pbProbe += PAGE_SIZE;
473 }
474 }
475
476 /*
477 * Get the pages.
478 * Note! Fixes here probably applies to rtR0MemObjNativeLockKernel as well.
479 */
480 iPage = cPages;
481 if ( uPtrFrom >= (unsigned long)__va(0)
482 && uPtrLast < (unsigned long)high_memory) {
483 /* The physical page mapping area: */
484 while (iPage-- > 0) {
485 struct page *pPage = papPages[iPage] = virt_to_page(pbPage);
486 vbsf_get_page(pPage);
487 pbPage -= PAGE_SIZE;
488 }
489 } else {
490 /* This is vmalloc or some such thing, so go thru page tables: */
491 while (iPage-- > 0) {
492 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
493 if (pPage) {
494 papPages[iPage] = pPage;
495 vbsf_get_page(pPage);
496 pbPage -= PAGE_SIZE;
497 } else {
498 while (++iPage < cPages) {
499 pPage = papPages[iPage];
500 vbsf_put_page(pPage);
501 }
502 return -EFAULT;
503 }
504 }
505 }
506 return 0;
507}
508
509
510/**
511 * Catches kernel_read() and kernel_write() calls and works around them.
512 *
513 * The file_operations::read and file_operations::write callbacks supposedly
514 * hands us the user buffers to read into and write out of. To allow the kernel
515 * to read and write without allocating buffers in userland, they kernel_read()
516 * and kernel_write() increases the user space address limit before calling us
517 * so that copyin/copyout won't reject it. Our problem is that get_user_pages()
518 * works on the userspace address space structures and will not be fooled by an
519 * increased addr_limit.
520 *
521 * This code tries to detect this situation and fake get_user_lock() for the
522 * kernel buffer.
523 */
524static int vbsf_lock_user_pages_failed_check_kernel(uintptr_t uPtrFrom, size_t cPages, bool fWrite, int rcFailed,
525 struct page **papPages, bool *pfLockPgHack)
526{
527 /*
528 * Check that this is valid user memory that is actually in the kernel range.
529 */
530#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
531 if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT)
532 && uPtrFrom >= USER_DS.seg)
533#else
534 if ( access_ok(fWrite ? VERIFY_WRITE : VERIFY_READ, (void *)uPtrFrom, cPages << PAGE_SHIFT)
535 && uPtrFrom >= USER_DS.seg)
536#endif
537 {
538 int rc = vbsf_lock_kernel_pages((uint8_t *)uPtrFrom, fWrite, cPages, papPages);
539 if (rc == 0) {
540 *pfLockPgHack = true;
541 return 0;
542 }
543 }
544
545 return rcFailed;
546}
547
548
549/** Wrapper around get_user_pages. */
550DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages, bool *pfLockPgHack)
551{
552# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
553 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
554 fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
555# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
556 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
557# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
558 ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
559# else
560 struct task_struct *pTask = current;
561 size_t cPagesLocked;
562 down_read(&pTask->mm->mmap_sem);
563 cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
564 up_read(&pTask->mm->mmap_sem);
565# endif
566 *pfLockPgHack = false;
567 if (cPagesLocked == cPages)
568 return 0;
569
570 /*
571 * It failed.
572 */
573 if (cPagesLocked < 0)
574 return vbsf_lock_user_pages_failed_check_kernel(uPtrFrom, cPages, fWrite, (int)cPagesLocked, papPages, pfLockPgHack);
575
576 vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/);
577
578 /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
579 return -EFAULT;
580}
581
582
583/**
584 * Read function used when accessing files that are memory mapped.
585 *
586 * We read from the page cache here to present the a cohertent picture of the
587 * the file content.
588 */
589static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
590{
591#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
592 struct iovec iov = { .iov_base = buf, .iov_len = size };
593 struct iov_iter iter;
594 struct kiocb kiocb;
595 ssize_t cbRet;
596
597 init_sync_kiocb(&kiocb, file);
598 kiocb.ki_pos = *off;
599 iov_iter_init(&iter, READ, &iov, 1, size);
600
601 cbRet = generic_file_read_iter(&kiocb, &iter);
602
603 *off = kiocb.ki_pos;
604 return cbRet;
605
606#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
607 struct iovec iov = { .iov_base = buf, .iov_len = size };
608 struct kiocb kiocb;
609 ssize_t cbRet;
610
611 init_sync_kiocb(&kiocb, file);
612 kiocb.ki_pos = *off;
613
614 cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
615 if (cbRet == -EIOCBQUEUED)
616 cbRet = wait_on_sync_kiocb(&kiocb);
617
618 *off = kiocb.ki_pos;
619 return cbRet;
620
621#else /* 2.6.18 or earlier: */
622 return generic_file_read(file, buf, size, off);
623#endif
624}
625
626
627/**
628 * Fallback case of vbsf_reg_read() that locks the user buffers and let the host
629 * write directly to them.
630 */
631static ssize_t vbsf_reg_read_locking(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
632 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
633{
634 /*
635 * Lock pages and execute the read, taking care not to pass the host
636 * more than it can handle in one go or more than we care to allocate
637 * page arrays for. The latter limit is set at just short of 32KB due
638 * to how the physical heap works.
639 */
640 struct page *apPagesStack[16];
641 struct page **papPages = &apPagesStack[0];
642 struct page **papPagesFree = NULL;
643 VBOXSFREADPGLSTREQ *pReq;
644 loff_t offFile = *off;
645 ssize_t cbRet = -ENOMEM;
646 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
647 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
648 bool fLockPgHack;
649
650 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
651 while (!pReq && cMaxPages > 4) {
652 cMaxPages /= 2;
653 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
654 }
655 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
656 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
657 if (pReq && papPages) {
658 cbRet = 0;
659 for (;;) {
660 /*
661 * Figure out how much to process now and lock the user pages.
662 */
663 int rc;
664 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
665 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
666 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
667 if (cPages <= cMaxPages)
668 cbChunk = size;
669 else {
670 cPages = cMaxPages;
671 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
672 }
673
674 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages, &fLockPgHack);
675 if (rc == 0) {
676 size_t iPage = cPages;
677 while (iPage-- > 0)
678 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
679 } else {
680 cbRet = rc;
681 break;
682 }
683
684 /*
685 * Issue the request and unlock the pages.
686 */
687 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
688
689 vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/, fLockPgHack);
690
691 if (RT_SUCCESS(rc)) {
692 /*
693 * Success, advance position and buffer.
694 */
695 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
696 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
697 cbRet += cbActual;
698 offFile += cbActual;
699 buf = (uint8_t *)buf + cbActual;
700 size -= cbActual;
701
702 /*
703 * Are we done already? If so commit the new file offset.
704 */
705 if (!size || cbActual < cbChunk) {
706 *off = offFile;
707 break;
708 }
709 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
710 /*
711 * The host probably doesn't have enough heap to handle the
712 * request, reduce the page count and retry.
713 */
714 cMaxPages /= 4;
715 Assert(cMaxPages > 0);
716 } else {
717 /*
718 * If we've successfully read stuff, return it rather than
719 * the error. (Not sure if this is such a great idea...)
720 */
721 if (cbRet > 0)
722 *off = offFile;
723 else
724 cbRet = -EPROTO;
725 break;
726 }
727 }
728 }
729 if (papPagesFree)
730 kfree(papPages);
731 if (pReq)
732 VbglR0PhysHeapFree(pReq);
733 return cbRet;
734}
735
736
737/**
738 * Read from a regular file.
739 *
740 * @param file the file
741 * @param buf the buffer
742 * @param size length of the buffer
743 * @param off offset within the file (in/out).
744 * @returns the number of read bytes on success, Linux error code otherwise
745 */
746static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
747{
748 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
749 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
750 struct vbsf_reg_info *sf_r = file->private_data;
751 struct address_space *mapping = inode->i_mapping;
752
753 SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
754
755 if (!S_ISREG(inode->i_mode)) {
756 LogFunc(("read from non regular file %d\n", inode->i_mode));
757 return -EINVAL;
758 }
759
760 /** @todo XXX Check read permission according to inode->i_mode! */
761
762 if (!size)
763 return 0;
764
765 /*
766 * If there is a mapping and O_DIRECT isn't in effect, we must at a
767 * heed dirty pages in the mapping and read from them. For simplicity
768 * though, we just do page cache reading when there are writable
769 * mappings around with any kind of pages loaded.
770 */
771 if (vbsf_should_use_cached_read(file, mapping, sf_g))
772 return vbsf_reg_read_mapped(file, buf, size, off);
773
774 /*
775 * For small requests, try use an embedded buffer provided we get a heap block
776 * that does not cross page boundraries (see host code).
777 */
778 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
779 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
780 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
781 if (pReq) {
782 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
783 ssize_t cbRet;
784 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
785 if (RT_SUCCESS(vrc)) {
786 cbRet = pReq->Parms.cb32Read.u.value32;
787 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
788 if (copy_to_user(buf, pReq->abData, cbRet) == 0)
789 *off += cbRet;
790 else
791 cbRet = -EFAULT;
792 } else
793 cbRet = -EPROTO;
794 VbglR0PhysHeapFree(pReq);
795 return cbRet;
796 }
797 VbglR0PhysHeapFree(pReq);
798 }
799 }
800
801#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
802 /*
803 * For medium sized requests try use a bounce buffer.
804 */
805 if (size <= _64K /** @todo make this configurable? */) {
806 void *pvBounce = kmalloc(size, GFP_KERNEL);
807 if (pvBounce) {
808 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
809 if (pReq) {
810 ssize_t cbRet;
811 int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
812 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
813 if (RT_SUCCESS(vrc)) {
814 cbRet = pReq->Parms.cb32Read.u.value32;
815 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
816 if (copy_to_user(buf, pvBounce, cbRet) == 0)
817 *off += cbRet;
818 else
819 cbRet = -EFAULT;
820 } else
821 cbRet = -EPROTO;
822 VbglR0PhysHeapFree(pReq);
823 kfree(pvBounce);
824 return cbRet;
825 }
826 kfree(pvBounce);
827 }
828 }
829#endif
830
831 return vbsf_reg_read_locking(file, buf, size, off, sf_g, sf_r);
832}
833
834
835/**
836 * Helper the synchronizes the page cache content with something we just wrote
837 * to the host.
838 */
839void vbsf_reg_write_sync_page_cache(struct address_space *mapping, loff_t offFile, uint32_t cbRange,
840 uint8_t const *pbSrcBuf, struct page **papSrcPages, uint32_t offSrcPage)
841{
842 if (mapping && mapping->nrpages > 0) {
843 /*
844 * Work the pages in the write range.
845 */
846 while (cbRange > 0) {
847 /*
848 * Lookup the page at offFile. We're fine if there aren't
849 * any there. We're skip if it's dirty or is being written
850 * back, at least for now.
851 */
852 size_t const offDstPage = offFile & PAGE_OFFSET_MASK;
853 size_t const cbToCopy = RT_MIN(PAGE_SIZE - offDstPage, cbRange);
854 pgoff_t const idxPage = offFile >> PAGE_SHIFT;
855 struct page *pDstPage = find_lock_page(mapping, idxPage);
856 if (pDstPage) {
857 if ( pDstPage->mapping == mapping /* ignore if re-purposed (paranoia) */
858 && pDstPage->index == idxPage
859 && !PageDirty(pDstPage) /* ignore if dirty */
860 && !PageWriteback(pDstPage) /* ignore if being written back */ ) {
861 /*
862 * Map the page and do the copying.
863 */
864 uint8_t *pbDst = (uint8_t *)kmap(pDstPage);
865 if (pbSrcBuf)
866 memcpy(&pbDst[offDstPage], pbSrcBuf, cbToCopy);
867 else {
868 uint32_t const cbSrc0 = PAGE_SIZE - offSrcPage;
869 uint8_t const *pbSrc = (uint8_t const *)kmap(papSrcPages[0]);
870 memcpy(&pbDst[offDstPage], &pbSrc[offSrcPage], RT_MIN(cbToCopy, cbSrc0));
871 kunmap(papSrcPages[0]);
872 if (cbToCopy > cbSrc0) {
873 pbSrc = (uint8_t const *)kmap(papSrcPages[1]);
874 memcpy(&pbDst[offDstPage + cbSrc0], pbSrc, cbToCopy - cbSrc0);
875 kunmap(papSrcPages[1]);
876 }
877 }
878 kunmap(pDstPage);
879 flush_dcache_page(pDstPage);
880 if (cbToCopy == PAGE_SIZE)
881 SetPageUptodate(pDstPage);
882# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
883 mark_page_accessed(pDstPage);
884# endif
885 } else
886 SFLOGFLOW(("vbsf_reg_write_sync_page_cache: Skipping page %p: mapping=%p (vs %p) writeback=%d offset=%#lx (vs%#lx)\n",
887 pDstPage, pDstPage->mapping, mapping, PageWriteback(pDstPage), pDstPage->index, idxPage));
888 unlock_page(pDstPage);
889 vbsf_put_page(pDstPage);
890 }
891
892 /*
893 * Advance.
894 */
895 cbRange -= cbToCopy;
896 offFile += cbToCopy;
897 if (pbSrcBuf)
898 pbSrcBuf += cbToCopy;
899 else
900 {
901 offSrcPage += cbToCopy;
902 if (offSrcPage >= PAGE_SIZE) {
903 offSrcPage &= PAGE_OFFSET_MASK;
904 papSrcPages++;
905 }
906 }
907 }
908 }
909}
910
911
912/**
913 * Fallback case of vbsf_reg_write() that locks the user buffers and let the host
914 * write directly to them.
915 */
916static ssize_t vbsf_reg_write_locking(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
917 struct inode *inode, struct vbsf_inode_info *sf_i,
918 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
919{
920 /*
921 * Lock pages and execute the write, taking care not to pass the host
922 * more than it can handle in one go or more than we care to allocate
923 * page arrays for. The latter limit is set at just short of 32KB due
924 * to how the physical heap works.
925 */
926 struct page *apPagesStack[16];
927 struct page **papPages = &apPagesStack[0];
928 struct page **papPagesFree = NULL;
929 VBOXSFWRITEPGLSTREQ *pReq;
930 ssize_t cbRet = -ENOMEM;
931 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
932 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
933 bool fLockPgHack;
934
935 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
936 while (!pReq && cMaxPages > 4) {
937 cMaxPages /= 2;
938 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
939 }
940 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
941 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
942 if (pReq && papPages) {
943 cbRet = 0;
944 for (;;) {
945 /*
946 * Figure out how much to process now and lock the user pages.
947 */
948 int rc;
949 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
950 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
951 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
952 if (cPages <= cMaxPages)
953 cbChunk = size;
954 else {
955 cPages = cMaxPages;
956 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
957 }
958
959 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages, &fLockPgHack);
960 if (rc == 0) {
961 size_t iPage = cPages;
962 while (iPage-- > 0)
963 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
964 } else {
965 cbRet = rc;
966 break;
967 }
968
969 /*
970 * Issue the request and unlock the pages.
971 */
972 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
973 if (RT_SUCCESS(rc)) {
974 /*
975 * Success, advance position and buffer.
976 */
977 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
978 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
979
980 vbsf_reg_write_sync_page_cache(inode->i_mapping, offFile, cbActual, NULL /*pbKrnlBuf*/,
981 papPages, (uintptr_t)buf & PAGE_OFFSET_MASK);
982 vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack);
983
984 cbRet += cbActual;
985 offFile += cbActual;
986 buf = (uint8_t *)buf + cbActual;
987 size -= cbActual;
988 if (offFile > i_size_read(inode))
989 i_size_write(inode, offFile);
990 sf_i->force_restat = 1; /* mtime (and size) may have changed */
991
992 /*
993 * Are we done already? If so commit the new file offset.
994 */
995 if (!size || cbActual < cbChunk) {
996 *off = offFile;
997 break;
998 }
999 } else {
1000 vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack);
1001 if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
1002 /*
1003 * The host probably doesn't have enough heap to handle the
1004 * request, reduce the page count and retry.
1005 */
1006 cMaxPages /= 4;
1007 Assert(cMaxPages > 0);
1008 } else {
1009 /*
1010 * If we've successfully written stuff, return it rather than
1011 * the error. (Not sure if this is such a great idea...)
1012 */
1013 if (cbRet > 0)
1014 *off = offFile;
1015 else
1016 cbRet = -EPROTO;
1017 break;
1018 }
1019 }
1020 }
1021 }
1022 if (papPagesFree)
1023 kfree(papPages);
1024 if (pReq)
1025 VbglR0PhysHeapFree(pReq);
1026 return cbRet;
1027}
1028
1029
1030/**
1031 * Write to a regular file.
1032 *
1033 * @param file the file
1034 * @param buf the buffer
1035 * @param size length of the buffer
1036 * @param off offset within the file
1037 * @returns the number of written bytes on success, Linux error code otherwise
1038 */
1039static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off)
1040{
1041 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
1042 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1043 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1044 struct vbsf_reg_info *sf_r = file->private_data;
1045 struct address_space *mapping = inode->i_mapping;
1046 loff_t pos;
1047
1048 SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
1049 BUG_ON(!sf_i);
1050 BUG_ON(!sf_g);
1051 BUG_ON(!sf_r);
1052 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1053
1054 pos = *off;
1055 /** @todo This should be handled by the host, it returning the new file
1056 * offset when appending. We may have an outdated i_size value here! */
1057 if (file->f_flags & O_APPEND)
1058 pos = i_size_read(inode);
1059
1060 /** @todo XXX Check write permission according to inode->i_mode! */
1061
1062 if (!size) {
1063 if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
1064 *off = pos;
1065 return 0;
1066 }
1067
1068 /*
1069 * If there are active writable mappings, coordinate with any
1070 * pending writes via those.
1071 */
1072 if ( mapping
1073 && mapping->nrpages > 0
1074 && mapping_writably_mapped(mapping)) {
1075#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
1076 int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
1077 if (err)
1078 return err;
1079#else
1080 /** @todo ... */
1081#endif
1082 }
1083
1084 /*
1085 * For small requests, try use an embedded buffer provided we get a heap block
1086 * that does not cross page boundraries (see host code).
1087 */
1088 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
1089 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
1090 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1091 if ( pReq
1092 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1093 ssize_t cbRet;
1094 if (copy_from_user(pReq->abData, buf, size) == 0) {
1095 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
1096 pos, (uint32_t)size);
1097 if (RT_SUCCESS(vrc)) {
1098 cbRet = pReq->Parms.cb32Write.u.value32;
1099 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1100 vbsf_reg_write_sync_page_cache(mapping, pos, (uint32_t)cbRet, pReq->abData,
1101 NULL /*papSrcPages*/, 0 /*offSrcPage0*/);
1102 pos += cbRet;
1103 *off = pos;
1104 if (pos > i_size_read(inode))
1105 i_size_write(inode, pos);
1106 } else
1107 cbRet = -EPROTO;
1108 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1109 } else
1110 cbRet = -EFAULT;
1111
1112 VbglR0PhysHeapFree(pReq);
1113 return cbRet;
1114 }
1115 if (pReq)
1116 VbglR0PhysHeapFree(pReq);
1117 }
1118
1119#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
1120 /*
1121 * For medium sized requests try use a bounce buffer.
1122 */
1123 if (size <= _64K /** @todo make this configurable? */) {
1124 void *pvBounce = kmalloc(size, GFP_KERNEL);
1125 if (pvBounce) {
1126 if (copy_from_user(pvBounce, buf, size) == 0) {
1127 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1128 if (pReq) {
1129 ssize_t cbRet;
1130 int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
1131 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
1132 if (RT_SUCCESS(vrc)) {
1133 cbRet = pReq->Parms.cb32Write.u.value32;
1134 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1135 vbsf_reg_write_sync_page_cache(mapping, pos, (uint32_t)cbRet, (uint8_t const *)pvBounce,
1136 NULL /*papSrcPages*/, 0 /*offSrcPage0*/);
1137 pos += cbRet;
1138 *off = pos;
1139 if (pos > i_size_read(inode))
1140 i_size_write(inode, pos);
1141 } else
1142 cbRet = -EPROTO;
1143 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1144 VbglR0PhysHeapFree(pReq);
1145 kfree(pvBounce);
1146 return cbRet;
1147 }
1148 kfree(pvBounce);
1149 } else {
1150 kfree(pvBounce);
1151 return -EFAULT;
1152 }
1153 }
1154 }
1155#endif
1156
1157 return vbsf_reg_write_locking(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
1158}
1159
1160#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
1161/*
1162 * Hide missing uio.h functionality in older kernsl.
1163 */
1164
1165static size_t copy_from_iter(uint8_t *pbDst, size_t cbToCopy, struct iov_iter *pSrcIter)
1166{
1167 size_t const cbTotal = cbToCopy;
1168 Assert(iov_iter_count(pSrcIter) >= cbToCopy);
1169 if (pSrcIter->type & ITER_BVEC) {
1170 while (cbToCopy > 0) {
1171 size_t const offPage = (uintptr_t)pbDst & PAGE_OFFSET_MASK;
1172 size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy);
1173 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbDst);
1174 size_t cbCopied = copy_page_from_iter(pPage, offPage, cbThisCopy, pSrcIter);
1175 AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy);
1176 pbDst += cbCopied;
1177 cbToCopy -= cbCopied;
1178 if (cbCopied != cbToCopy)
1179 break;
1180 }
1181 } else {
1182 while (cbToCopy > 0) {
1183 size_t cbThisCopy = iov_iter_single_seg_count(pSrcIter);
1184 if (cbThisCopy > 0) {
1185 if (cbThisCopy > cbToCopy)
1186 cbThisCopy = cbToCopy;
1187 if (pSrcIter->type & ITER_KVEC)
1188 memcpy(pbDst, (void *)pSrcIter->iov->iov_base + pSrcIter->iov_offset, cbThisCopy);
1189 else if (!copy_from_user(pbDst, pSrcIter->iov->iov_base + pSrcIter->iov_offset, cbThisCopy))
1190 break;
1191 pbDst += cbThisCopy;
1192 cbToCopy -= cbThisCopy;
1193 }
1194 iov_iter_advance(pSrcIter, cbThisCopy);
1195 }
1196 }
1197 return cbTotal - cbToCopy;
1198}
1199
1200static size_t copy_to_iter(uint8_t const *pbSrc, size_t cbToCopy, struct iov_iter *pDstIter)
1201{
1202 size_t const cbTotal = cbToCopy;
1203 Assert(iov_iter_count(pDstIter) >= cbToCopy);
1204 if (pDstIter->type & ITER_BVEC) {
1205 while (cbToCopy > 0) {
1206 size_t const offPage = (uintptr_t)pbSrc & PAGE_OFFSET_MASK;
1207 size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy);
1208 struct page *pPage = rtR0MemObjLinuxVirtToPage((void *)pbSrc);
1209 size_t cbCopied = copy_page_to_iter(pPage, offPage, cbThisCopy, pDstIter);
1210 AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy);
1211 pbSrc += cbCopied;
1212 cbToCopy -= cbCopied;
1213 if (cbCopied != cbToCopy)
1214 break;
1215 }
1216 } else {
1217 while (cbToCopy > 0) {
1218 size_t cbThisCopy = iov_iter_single_seg_count(pDstIter);
1219 if (cbThisCopy > 0) {
1220 if (cbThisCopy > cbToCopy)
1221 cbThisCopy = cbToCopy;
1222 if (pDstIter->type & ITER_KVEC)
1223 memcpy((void *)pDstIter->iov->iov_base + pDstIter->iov_offset, pbSrc, cbThisCopy);
1224 else if (!copy_to_user(pDstIter->iov->iov_base + pDstIter->iov_offset, pbSrc, cbThisCopy)) {
1225 break;
1226 }
1227 pbSrc += cbThisCopy;
1228 cbToCopy -= cbThisCopy;
1229 }
1230 iov_iter_advance(pDstIter, cbThisCopy);
1231 }
1232 }
1233 return cbTotal - cbToCopy;
1234}
1235
1236#endif /* 3.16.0 >= linux < 3.18.0 */
1237#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1238
1239/**
1240 * Companion to vbsf_iter_lock_pages().
1241 */
1242DECLINLINE(void) vbsf_iter_unlock_pages(struct iov_iter *iter, struct page **papPages, size_t cPages, bool fSetDirty)
1243{
1244 /* We don't mark kernel pages dirty: */
1245 if (iter->type & ITER_KVEC)
1246 fSetDirty = false;
1247
1248 while (cPages-- > 0)
1249 {
1250 struct page *pPage = papPages[cPages];
1251 if (fSetDirty && !PageReserved(pPage))
1252 SetPageDirty(pPage);
1253 vbsf_put_page(pPage);
1254 }
1255}
1256
1257
1258/**
1259 * Locks up to @a cMaxPages from the I/O vector iterator, advancing the
1260 * iterator.
1261 *
1262 * @returns 0 on success, negative errno value on failure.
1263 * @param iter The iterator to lock pages from.
1264 * @param fWrite Whether to write (true) or read (false) lock the pages.
1265 * @param pStash Where we stash peek results.
1266 * @param cMaxPages The maximum number of pages to get.
1267 * @param papPages Where to return the locked pages.
1268 * @param pcPages Where to return the number of pages.
1269 * @param poffPage0 Where to return the offset into the first page.
1270 * @param pcbChunk Where to return the number of bytes covered.
1271 */
1272static int vbsf_iter_lock_pages(struct iov_iter *iter, bool fWrite, struct vbsf_iter_stash *pStash, size_t cMaxPages,
1273 struct page **papPages, size_t *pcPages, size_t *poffPage0, size_t *pcbChunk)
1274{
1275 size_t cbChunk = 0;
1276 size_t cPages = 0;
1277 size_t offPage0 = 0;
1278 int rc = 0;
1279
1280 Assert(iov_iter_count(iter) + pStash->cb > 0);
1281 if (!(iter->type & ITER_KVEC)) {
1282 /*
1283 * Do we have a stashed page?
1284 */
1285 if (pStash->pPage) {
1286 papPages[0] = pStash->pPage;
1287 offPage0 = pStash->off;
1288 cbChunk = pStash->cb;
1289 cPages = 1;
1290 pStash->pPage = NULL;
1291 pStash->off = 0;
1292 pStash->cb = 0;
1293 if ( offPage0 + cbChunk < PAGE_SIZE
1294 || iov_iter_count(iter) == 0) {
1295 *poffPage0 = offPage0;
1296 *pcbChunk = cbChunk;
1297 *pcPages = cPages;
1298 SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx (stashed)\n",
1299 rc, cPages, offPage0, cbChunk));
1300 return 0;
1301 }
1302 cMaxPages -= 1;
1303 SFLOG3(("vbsf_iter_lock_pages: Picked up stashed page: %#zx LB %#zx\n", offPage0, cbChunk));
1304 } else {
1305# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
1306 /*
1307 * Copy out our starting point to assist rewinding.
1308 */
1309 pStash->offFromEnd = iov_iter_count(iter);
1310 pStash->Copy = *iter;
1311# endif
1312 }
1313
1314 /*
1315 * Get pages segment by segment.
1316 */
1317 do {
1318 /*
1319 * Make a special case of the first time thru here, since that's
1320 * the most typical scenario.
1321 */
1322 ssize_t cbSegRet;
1323 if (cPages == 0) {
1324# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
1325 while (!iov_iter_single_seg_count(iter)) /* Old code didn't skip empty segments which caused EFAULTs. */
1326 iov_iter_advance(iter, 0);
1327# endif
1328 cbSegRet = iov_iter_get_pages(iter, papPages, iov_iter_count(iter), cMaxPages, &offPage0);
1329 if (cbSegRet > 0) {
1330 iov_iter_advance(iter, cbSegRet);
1331 cbChunk = (size_t)cbSegRet;
1332 cPages = RT_ALIGN_Z(offPage0 + cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
1333 cMaxPages -= cPages;
1334 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages -> %#zx @ %#zx; %#zx pages [first]\n", cbSegRet, offPage0, cPages));
1335 if ( cMaxPages == 0
1336 || ((offPage0 + (size_t)cbSegRet) & PAGE_OFFSET_MASK))
1337 break;
1338 } else {
1339 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1340 rc = (int)cbSegRet;
1341 break;
1342 }
1343 } else {
1344 /*
1345 * Probe first page of new segment to check that we've got a zero offset and
1346 * can continue on the current chunk. Stash the page if the offset isn't zero.
1347 */
1348 size_t offPgProbe;
1349 size_t cbSeg = iov_iter_single_seg_count(iter);
1350 while (!cbSeg) {
1351 iov_iter_advance(iter, 0);
1352 cbSeg = iov_iter_single_seg_count(iter);
1353 }
1354 cbSegRet = iov_iter_get_pages(iter, &papPages[cPages], iov_iter_count(iter), 1, &offPgProbe);
1355 if (cbSegRet > 0) {
1356 iov_iter_advance(iter, cbSegRet); /** @todo maybe not do this if we stash the page? */
1357 Assert(offPgProbe + cbSegRet <= PAGE_SIZE);
1358 if (offPgProbe == 0) {
1359 cbChunk += cbSegRet;
1360 cPages += 1;
1361 cMaxPages -= 1;
1362 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx\n", cbSegRet, offPgProbe));
1363 if ( cMaxPages == 0
1364 || cbSegRet != PAGE_SIZE)
1365 break;
1366
1367 /*
1368 * Get the rest of the segment (if anything remaining).
1369 */
1370 cbSeg -= cbSegRet;
1371 if (cbSeg > 0) {
1372 cbSegRet = iov_iter_get_pages(iter, &papPages[cPages], iov_iter_count(iter), cMaxPages, &offPgProbe);
1373 if (cbSegRet > 0) {
1374 size_t const cPgRet = RT_ALIGN_Z((size_t)cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
1375 Assert(offPgProbe == 0);
1376 iov_iter_advance(iter, cbSegRet);
1377 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages() -> %#zx; %#zx pages\n", cbSegRet, cPgRet));
1378 cPages += cPgRet;
1379 cMaxPages -= cPgRet;
1380 cbChunk += cbSegRet;
1381 if ( cMaxPages == 0
1382 || ((size_t)cbSegRet & PAGE_OFFSET_MASK))
1383 break;
1384 } else {
1385 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1386 rc = (int)cbSegRet;
1387 break;
1388 }
1389 }
1390 } else {
1391 /* The segment didn't start at a page boundrary, so stash it for
1392 the next round: */
1393 SFLOGFLOW(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx; stashed\n", cbSegRet, offPgProbe));
1394 Assert(papPages[cPages]);
1395 pStash->pPage = papPages[cPages];
1396 pStash->off = offPgProbe;
1397 pStash->cb = cbSegRet;
1398 break;
1399 }
1400 } else {
1401 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1402 rc = (int)cbSegRet;
1403 break;
1404 }
1405 }
1406 Assert(cMaxPages > 0);
1407 } while (iov_iter_count(iter) > 0);
1408
1409 } else {
1410 /*
1411 * The silly iov_iter_get_pages_alloc() function doesn't handle KVECs,
1412 * so everyone needs to do that by themselves.
1413 *
1414 * Note! Fixes here may apply to rtR0MemObjNativeLockKernel()
1415 * and vbsf_lock_user_pages_failed_check_kernel() as well.
1416 */
1417# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
1418 pStash->offFromEnd = iov_iter_count(iter);
1419 pStash->Copy = *iter;
1420# endif
1421 do {
1422 uint8_t *pbBuf;
1423 size_t offStart;
1424 size_t cPgSeg;
1425
1426 size_t cbSeg = iov_iter_single_seg_count(iter);
1427 while (!cbSeg) {
1428 iov_iter_advance(iter, 0);
1429 cbSeg = iov_iter_single_seg_count(iter);
1430 }
1431
1432# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
1433 pbBuf = iter->kvec->iov_base + iter->iov_offset;
1434# else
1435 pbBuf = iter->iov->iov_base + iter->iov_offset;
1436# endif
1437 offStart = (uintptr_t)pbBuf & PAGE_OFFSET_MASK;
1438 if (!cPages)
1439 offPage0 = offStart;
1440 else if (offStart)
1441 break;
1442
1443 cPgSeg = RT_ALIGN_Z(cbSeg, PAGE_SIZE) >> PAGE_SHIFT;
1444 if (cPgSeg > cMaxPages) {
1445 cPgSeg = cMaxPages;
1446 cbSeg = (cPgSeg << PAGE_SHIFT) - offStart;
1447 }
1448
1449 rc = vbsf_lock_kernel_pages(pbBuf, fWrite, cPgSeg, &papPages[cPages]);
1450 if (rc == 0) {
1451 iov_iter_advance(iter, cbSeg);
1452 cbChunk += cbSeg;
1453 cPages += cPgSeg;
1454 cMaxPages -= cPgSeg;
1455 if ( cMaxPages == 0
1456 || ((offStart + cbSeg) & PAGE_OFFSET_MASK) != 0)
1457 break;
1458 } else
1459 break;
1460 } while (iov_iter_count(iter) > 0);
1461 }
1462
1463 /*
1464 * Clean up if we failed; set return values.
1465 */
1466 if (rc == 0) {
1467 /* likely */
1468 } else {
1469 if (cPages > 0)
1470 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1471 offPage0 = cbChunk = cPages = 0;
1472 }
1473 *poffPage0 = offPage0;
1474 *pcbChunk = cbChunk;
1475 *pcPages = cPages;
1476 SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx\n", rc, cPages, offPage0, cbChunk));
1477 return rc;
1478}
1479
1480
1481/**
1482 * Rewinds the I/O vector.
1483 */
1484static bool vbsf_iter_rewind(struct iov_iter *iter, struct vbsf_iter_stash *pStash, size_t cbToRewind, size_t cbChunk)
1485{
1486 size_t cbExtra;
1487 if (!pStash->pPage) {
1488 cbExtra = 0;
1489 } else {
1490 cbExtra = pStash->cb;
1491 vbsf_put_page(pStash->pPage);
1492 pStash->pPage = NULL;
1493 pStash->cb = 0;
1494 pStash->off = 0;
1495 }
1496
1497# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
1498 iov_iter_revert(iter, cbToRewind + cbExtra);
1499 return true;
1500# else
1501 /** @todo impl this */
1502 return false;
1503# endif
1504}
1505
1506
1507/**
1508 * Cleans up the page locking stash.
1509 */
1510DECLINLINE(void) vbsf_iter_cleanup_stash(struct iov_iter *iter, struct vbsf_iter_stash *pStash)
1511{
1512 if (pStash->pPage)
1513 vbsf_iter_rewind(iter, pStash, 0, 0);
1514}
1515
1516
1517/**
1518 * Calculates the longest span of pages we could transfer to the host in a
1519 * single request.
1520 *
1521 * @returns Page count, non-zero.
1522 * @param iter The I/O vector iterator to inspect.
1523 */
1524static size_t vbsf_iter_max_span_of_pages(struct iov_iter *iter)
1525{
1526 size_t cPages;
1527 if (iter_is_iovec(iter) || (iter->type & ITER_KVEC)) {
1528 const struct iovec *pCurIov = iter->iov;
1529 size_t cLeft = iter->nr_segs;
1530 size_t cPagesSpan = 0;
1531
1532 /* iovect and kvec are identical, except for the __user tagging of iov_base. */
1533 AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, struct kvec, iov_base);
1534 AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, struct kvec, iov_len);
1535 AssertCompile(sizeof(struct iovec) == sizeof(struct kvec));
1536
1537 cPages = 1;
1538 AssertReturn(cLeft > 0, cPages);
1539
1540 /* Special case: segment offset. */
1541 if (iter->iov_offset > 0) {
1542 if (iter->iov_offset < pCurIov->iov_len) {
1543 size_t const cbSegLeft = pCurIov->iov_len - iter->iov_offset;
1544 size_t const offPage0 = ((uintptr_t)pCurIov->iov_base + iter->iov_offset) & PAGE_OFFSET_MASK;
1545 cPages = cPagesSpan = RT_ALIGN_Z(offPage0 + cbSegLeft, PAGE_SIZE) >> PAGE_SHIFT;
1546 if ((offPage0 + cbSegLeft) & PAGE_OFFSET_MASK)
1547 cPagesSpan = 0;
1548 }
1549 SFLOGFLOW(("vbsf_iter: seg[0]= %p LB %#zx\n", pCurIov->iov_base, pCurIov->iov_len));
1550 pCurIov++;
1551 cLeft--;
1552 }
1553
1554 /* Full segments. */
1555 while (cLeft-- > 0) {
1556 if (pCurIov->iov_len > 0) {
1557 size_t const offPage0 = (uintptr_t)pCurIov->iov_base & PAGE_OFFSET_MASK;
1558 if (offPage0 == 0) {
1559 if (!(pCurIov->iov_len & PAGE_OFFSET_MASK)) {
1560 cPagesSpan += pCurIov->iov_len >> PAGE_SHIFT;
1561 } else {
1562 cPagesSpan += RT_ALIGN_Z(pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
1563 if (cPagesSpan > cPages)
1564 cPages = cPagesSpan;
1565 cPagesSpan = 0;
1566 }
1567 } else {
1568 if (cPagesSpan > cPages)
1569 cPages = cPagesSpan;
1570 if (!((offPage0 + pCurIov->iov_len) & PAGE_OFFSET_MASK)) {
1571 cPagesSpan = pCurIov->iov_len >> PAGE_SHIFT;
1572 } else {
1573 cPagesSpan += RT_ALIGN_Z(offPage0 + pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
1574 if (cPagesSpan > cPages)
1575 cPages = cPagesSpan;
1576 cPagesSpan = 0;
1577 }
1578 }
1579 }
1580 SFLOGFLOW(("vbsf_iter: seg[%u]= %p LB %#zx\n", iter->nr_segs - cLeft, pCurIov->iov_base, pCurIov->iov_len));
1581 pCurIov++;
1582 }
1583 if (cPagesSpan > cPages)
1584 cPages = cPagesSpan;
1585 } else {
1586 /* Won't bother with accurate counts for the next two types, just make
1587 some rough estimates (does pipes have segments?): */
1588 size_t cSegs = iter->type & ITER_BVEC ? RT_MAX(1, iter->nr_segs) : 1;
1589 cPages = (iov_iter_count(iter) + (PAGE_SIZE * 2 - 2) * cSegs) >> PAGE_SHIFT;
1590 }
1591 SFLOGFLOW(("vbsf_iter_max_span_of_pages: returns %#zx\n", cPages));
1592 return cPages;
1593}
1594
1595
1596/**
1597 * Worker for vbsf_reg_read_iter() that deals with larger reads using page
1598 * locking.
1599 */
1600static ssize_t vbsf_reg_read_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToRead,
1601 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
1602{
1603 /*
1604 * Estimate how many pages we may possible submit in a single request so
1605 * that we can allocate matching request buffer and page array.
1606 */
1607 struct page *apPagesStack[16];
1608 struct page **papPages = &apPagesStack[0];
1609 struct page **papPagesFree = NULL;
1610 VBOXSFREADPGLSTREQ *pReq;
1611 ssize_t cbRet = 0;
1612 size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
1613 cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 2), cMaxPages);
1614
1615 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
1616 while (!pReq && cMaxPages > 4) {
1617 cMaxPages /= 2;
1618 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
1619 }
1620 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
1621 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
1622 if (pReq && papPages) {
1623
1624 /*
1625 * The read loop.
1626 */
1627 struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
1628 do {
1629 /*
1630 * Grab as many pages as we can. This means that if adjacent
1631 * segments both starts and ends at a page boundrary, we can
1632 * do them both in the same transfer from the host.
1633 */
1634 size_t cPages = 0;
1635 size_t cbChunk = 0;
1636 size_t offPage0 = 0;
1637 int rc = vbsf_iter_lock_pages(iter, true /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
1638 if (rc == 0) {
1639 size_t iPage = cPages;
1640 while (iPage-- > 0)
1641 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
1642 pReq->PgLst.offFirstPage = (uint16_t)offPage0;
1643 AssertStmt(cbChunk <= cbToRead, cbChunk = cbToRead);
1644 } else {
1645 cbRet = rc;
1646 break;
1647 }
1648
1649 /*
1650 * Issue the request and unlock the pages.
1651 */
1652 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, cbChunk, cPages);
1653 SFLOGFLOW(("vbsf_reg_read_iter_locking: VbglR0SfHostReqReadPgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
1654 rc, pReq->Parms.cb32Read.u.value32, cbChunk, cbToRead, cPages, offPage0));
1655
1656 vbsf_iter_unlock_pages(iter, papPages, cPages, true /*fSetDirty*/);
1657
1658 if (RT_SUCCESS(rc)) {
1659 /*
1660 * Success, advance position and buffer.
1661 */
1662 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
1663 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
1664 cbRet += cbActual;
1665 kio->ki_pos += cbActual;
1666 cbToRead -= cbActual;
1667
1668 /*
1669 * Are we done already?
1670 */
1671 if (!cbToRead)
1672 break;
1673 if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
1674 if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
1675 iov_iter_truncate(iter, 0);
1676 break;
1677 }
1678 } else {
1679 /*
1680 * Try rewind the iter structure.
1681 */
1682 bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
1683 if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
1684 /*
1685 * The host probably doesn't have enough heap to handle the
1686 * request, reduce the page count and retry.
1687 */
1688 cMaxPages /= 4;
1689 Assert(cMaxPages > 0);
1690 } else {
1691 /*
1692 * If we've successfully read stuff, return it rather than
1693 * the error. (Not sure if this is such a great idea...)
1694 */
1695 if (cbRet <= 0)
1696 cbRet = -EPROTO;
1697 break;
1698 }
1699 }
1700 } while (cbToRead > 0);
1701
1702 vbsf_iter_cleanup_stash(iter, &Stash);
1703 }
1704 else
1705 cbRet = -ENOMEM;
1706 if (papPagesFree)
1707 kfree(papPages);
1708 if (pReq)
1709 VbglR0PhysHeapFree(pReq);
1710 SFLOGFLOW(("vbsf_reg_read_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
1711 return cbRet;
1712}
1713
1714
1715/**
1716 * Read into I/O vector iterator.
1717 *
1718 * @returns Number of bytes read on success, negative errno on error.
1719 * @param kio The kernel I/O control block (or something like that).
1720 * @param iter The I/O vector iterator describing the buffer.
1721 */
1722static ssize_t vbsf_reg_read_iter(struct kiocb *kio, struct iov_iter *iter)
1723{
1724 size_t cbToRead = iov_iter_count(iter);
1725 struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
1726 struct address_space *mapping = inode->i_mapping;
1727
1728 struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
1729 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1730
1731 SFLOGFLOW(("vbsf_reg_read_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
1732 inode, kio->ki_filp, cbToRead, kio->ki_pos, iter->type));
1733 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1734
1735 /*
1736 * Do we have anything at all to do here?
1737 */
1738 if (!cbToRead)
1739 return 0;
1740
1741 /*
1742 * If there is a mapping and O_DIRECT isn't in effect, we must at a
1743 * heed dirty pages in the mapping and read from them. For simplicity
1744 * though, we just do page cache reading when there are writable
1745 * mappings around with any kind of pages loaded.
1746 */
1747 if (vbsf_should_use_cached_read(kio->ki_filp, mapping, sf_g))
1748 return generic_file_read_iter(kio, iter);
1749
1750 /*
1751 * Now now we reject async I/O requests.
1752 */
1753 if (!is_sync_kiocb(kio)) {
1754 SFLOGFLOW(("vbsf_reg_read_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
1755 return -EOPNOTSUPP;
1756 }
1757
1758 /*
1759 * For small requests, try use an embedded buffer provided we get a heap block
1760 * that does not cross page boundraries (see host code).
1761 */
1762 if (cbToRead <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
1763 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + cbToRead;
1764 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1765 if (pReq) {
1766 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1767 ssize_t cbRet;
1768 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, (uint32_t)cbToRead);
1769 if (RT_SUCCESS(vrc)) {
1770 cbRet = pReq->Parms.cb32Read.u.value32;
1771 AssertStmt(cbRet <= (ssize_t)cbToRead, cbRet = cbToRead);
1772 if (copy_to_iter(pReq->abData, cbRet, iter) == cbRet) {
1773 kio->ki_pos += cbRet;
1774 if (cbRet < cbToRead)
1775 iov_iter_truncate(iter, 0);
1776 } else
1777 cbRet = -EFAULT;
1778 } else
1779 cbRet = -EPROTO;
1780 VbglR0PhysHeapFree(pReq);
1781 SFLOGFLOW(("vbsf_reg_read_iter: returns %#zx (%zd)\n", cbRet, cbRet));
1782 return cbRet;
1783 }
1784 VbglR0PhysHeapFree(pReq);
1785 }
1786 }
1787
1788 /*
1789 * Otherwise do the page locking thing.
1790 */
1791 return vbsf_reg_read_iter_locking(kio, iter, cbToRead, sf_g, sf_r);
1792}
1793
1794
1795/**
1796 * Worker for vbsf_reg_write_iter() that deals with larger writes using page
1797 * locking.
1798 */
1799static ssize_t vbsf_reg_write_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToWrite, loff_t offFile,
1800 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
1801 struct inode *inode, struct vbsf_inode_info *sf_i, struct address_space *mapping)
1802{
1803 /*
1804 * Estimate how many pages we may possible submit in a single request so
1805 * that we can allocate matching request buffer and page array.
1806 */
1807 struct page *apPagesStack[16];
1808 struct page **papPages = &apPagesStack[0];
1809 struct page **papPagesFree = NULL;
1810 VBOXSFWRITEPGLSTREQ *pReq;
1811 ssize_t cbRet = 0;
1812 size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
1813 cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 2), cMaxPages);
1814
1815 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
1816 while (!pReq && cMaxPages > 4) {
1817 cMaxPages /= 2;
1818 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
1819 }
1820 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
1821 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
1822 if (pReq && papPages) {
1823
1824 /*
1825 * The write loop.
1826 */
1827 struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
1828 do {
1829 /*
1830 * Grab as many pages as we can. This means that if adjacent
1831 * segments both starts and ends at a page boundrary, we can
1832 * do them both in the same transfer from the host.
1833 */
1834 size_t cPages = 0;
1835 size_t cbChunk = 0;
1836 size_t offPage0 = 0;
1837 int rc = vbsf_iter_lock_pages(iter, false /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
1838 if (rc == 0) {
1839 size_t iPage = cPages;
1840 while (iPage-- > 0)
1841 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
1842 pReq->PgLst.offFirstPage = (uint16_t)offPage0;
1843 AssertStmt(cbChunk <= cbToWrite, cbChunk = cbToWrite);
1844 } else {
1845 cbRet = rc;
1846 break;
1847 }
1848
1849 /*
1850 * Issue the request and unlock the pages.
1851 */
1852 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
1853 SFLOGFLOW(("vbsf_reg_write_iter_locking: VbglR0SfHostReqWritePgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
1854 rc, pReq->Parms.cb32Write.u.value32, cbChunk, cbToWrite, cPages, offPage0));
1855 if (RT_SUCCESS(rc)) {
1856 /*
1857 * Success, advance position and buffer.
1858 */
1859 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
1860 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
1861
1862 vbsf_reg_write_sync_page_cache(mapping, offFile, (uint32_t)cbRet, NULL /*pbSrcBuf*/, papPages, offPage0);
1863 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1864
1865 cbRet += cbActual;
1866 offFile += cbActual;
1867 kio->ki_pos = offFile;
1868 cbToWrite -= cbActual;
1869 if (offFile > i_size_read(inode))
1870 i_size_write(inode, offFile);
1871 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1872
1873 /*
1874 * Are we done already?
1875 */
1876 if (!cbToWrite)
1877 break;
1878 if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
1879 if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
1880 iov_iter_truncate(iter, 0);
1881 break;
1882 }
1883 } else {
1884 /*
1885 * Try rewind the iter structure.
1886 */
1887 bool fRewindOkay;
1888 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1889 fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
1890 if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
1891 /*
1892 * The host probably doesn't have enough heap to handle the
1893 * request, reduce the page count and retry.
1894 */
1895 cMaxPages /= 4;
1896 Assert(cMaxPages > 0);
1897 } else {
1898 /*
1899 * If we've successfully written stuff, return it rather than
1900 * the error. (Not sure if this is such a great idea...)
1901 */
1902 if (cbRet <= 0)
1903 cbRet = -EPROTO;
1904 break;
1905 }
1906 }
1907 } while (cbToWrite > 0);
1908
1909 vbsf_iter_cleanup_stash(iter, &Stash);
1910 }
1911 else
1912 cbRet = -ENOMEM;
1913 if (papPagesFree)
1914 kfree(papPages);
1915 if (pReq)
1916 VbglR0PhysHeapFree(pReq);
1917 SFLOGFLOW(("vbsf_reg_write_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
1918 return cbRet;
1919}
1920
1921
1922
1923/**
1924 * Write from I/O vector iterator.
1925 *
1926 * @returns Number of bytes written on success, negative errno on error.
1927 * @param kio The kernel I/O control block (or something like that).
1928 * @param iter The I/O vector iterator describing the buffer.
1929 */
1930static ssize_t vbsf_reg_write_iter(struct kiocb *kio, struct iov_iter *iter)
1931{
1932 size_t cbToWrite = iov_iter_count(iter);
1933 struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
1934 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1935 struct address_space *mapping = inode->i_mapping;
1936
1937 struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
1938 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1939 loff_t offFile = kio->ki_pos;
1940
1941 SFLOGFLOW(("vbsf_reg_write_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
1942 inode, kio->ki_filp, cbToWrite, offFile, iter->type));
1943 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1944
1945 /*
1946 * Enforce APPEND flag.
1947 */
1948 /** @todo This should be handled by the host, it returning the new file
1949 * offset when appending. We may have an outdated i_size value here! */
1950#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1951 if (kio->ki_flags & IOCB_APPEND)
1952#else
1953 if (kio->ki_filp->f_flags & O_APPEND)
1954#endif
1955 kio->ki_pos = offFile = i_size_read(inode);
1956
1957 /*
1958 * Do we have anything at all to do here?
1959 */
1960 if (!cbToWrite)
1961 return 0;
1962
1963 /*
1964 * Now now we reject async I/O requests.
1965 */
1966 if (!is_sync_kiocb(kio)) {
1967 SFLOGFLOW(("vbsf_reg_write_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
1968 return -EOPNOTSUPP;
1969 }
1970
1971 /*
1972 * If there are active writable mappings, coordinate with any
1973 * pending writes via those.
1974 */
1975 if ( mapping
1976 && mapping->nrpages > 0
1977 && mapping_writably_mapped(mapping)) {
1978#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
1979 int err = filemap_fdatawait_range(mapping, offFile, offFile + cbToWrite - 1);
1980 if (err)
1981 return err;
1982#else
1983 /** @todo ... */
1984#endif
1985 }
1986
1987 /*
1988 * For small requests, try use an embedded buffer provided we get a heap block
1989 * that does not cross page boundraries (see host code).
1990 */
1991 if (cbToWrite <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
1992 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + cbToWrite;
1993 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1994 if (pReq) {
1995 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1996 ssize_t cbRet;
1997 if (copy_from_iter(pReq->abData, cbToWrite, iter) == cbToWrite) {
1998 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
1999 offFile, (uint32_t)cbToWrite);
2000 if (RT_SUCCESS(vrc)) {
2001 cbRet = pReq->Parms.cb32Write.u.value32;
2002 AssertStmt(cbRet <= (ssize_t)cbToWrite, cbRet = cbToWrite);
2003 vbsf_reg_write_sync_page_cache(mapping, offFile, (uint32_t)cbRet, pReq->abData,
2004 NULL /*papSrcPages*/, 0 /*offSrcPage0*/);
2005 kio->ki_pos = offFile += cbRet;
2006 if (offFile > i_size_read(inode))
2007 i_size_write(inode, offFile);
2008# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
2009 if ((size_t)cbRet < cbToWrite)
2010 iov_iter_revert(iter, cbToWrite - cbRet);
2011# endif
2012 } else
2013 cbRet = -EPROTO;
2014 sf_i->force_restat = 1; /* mtime (and size) may have changed */
2015 } else
2016 cbRet = -EFAULT;
2017 VbglR0PhysHeapFree(pReq);
2018 SFLOGFLOW(("vbsf_reg_write_iter: returns %#zx (%zd)\n", cbRet, cbRet));
2019 return cbRet;
2020 }
2021 VbglR0PhysHeapFree(pReq);
2022 }
2023 }
2024
2025 /*
2026 * Otherwise do the page locking thing.
2027 */
2028 return vbsf_reg_write_iter_locking(kio, iter, cbToWrite, offFile, sf_g, sf_r, inode, sf_i, mapping);
2029}
2030
2031#endif /* >= 3.16.0 */
2032
2033/**
2034 * Used by vbsf_reg_open() and vbsf_inode_atomic_open() to
2035 *
2036 * @returns shared folders create flags.
2037 * @param fLnxOpen The linux O_XXX flags to convert.
2038 * @param pfHandle Pointer to vbsf_handle::fFlags.
2039 * @param pszCaller Caller, for logging purposes.
2040 */
2041uint32_t vbsf_linux_oflags_to_vbox(unsigned fLnxOpen, uint32_t *pfHandle, const char *pszCaller)
2042{
2043 uint32_t fVBoxFlags = SHFL_CF_ACCESS_DENYNONE;
2044
2045 /*
2046 * Disposition.
2047 */
2048 if (fLnxOpen & O_CREAT) {
2049 Log(("%s: O_CREAT set\n", pszCaller));
2050 fVBoxFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
2051 if (fLnxOpen & O_EXCL) {
2052 Log(("%s: O_EXCL set\n", pszCaller));
2053 fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_EXISTS;
2054 } else if (fLnxOpen & O_TRUNC) {
2055 Log(("%s: O_TRUNC set\n", pszCaller));
2056 fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
2057 } else
2058 fVBoxFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
2059 } else {
2060 fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
2061 if (fLnxOpen & O_TRUNC) {
2062 Log(("%s: O_TRUNC set\n", pszCaller));
2063 fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
2064 }
2065 }
2066
2067 /*
2068 * Access.
2069 */
2070 switch (fLnxOpen & O_ACCMODE) {
2071 case O_RDONLY:
2072 fVBoxFlags |= SHFL_CF_ACCESS_READ;
2073 *pfHandle |= VBSF_HANDLE_F_READ;
2074 break;
2075
2076 case O_WRONLY:
2077 fVBoxFlags |= SHFL_CF_ACCESS_WRITE;
2078 *pfHandle |= VBSF_HANDLE_F_WRITE;
2079 break;
2080
2081 case O_RDWR:
2082 fVBoxFlags |= SHFL_CF_ACCESS_READWRITE;
2083 *pfHandle |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE;
2084 break;
2085
2086 default:
2087 BUG();
2088 }
2089
2090 if (fLnxOpen & O_APPEND) {
2091 Log(("%s: O_APPEND set\n", pszCaller));
2092 fVBoxFlags |= SHFL_CF_ACCESS_APPEND;
2093 *pfHandle |= VBSF_HANDLE_F_APPEND;
2094 }
2095
2096 /*
2097 * Only directories?
2098 */
2099 if (fLnxOpen & O_DIRECTORY) {
2100 Log(("%s: O_DIRECTORY set\n", pszCaller));
2101 fVBoxFlags |= SHFL_CF_DIRECTORY;
2102 }
2103
2104 return fVBoxFlags;
2105}
2106
2107
2108/**
2109 * Open a regular file.
2110 *
2111 * @param inode the inode
2112 * @param file the file
2113 * @returns 0 on success, Linux error code otherwise
2114 */
2115static int vbsf_reg_open(struct inode *inode, struct file *file)
2116{
2117 int rc, rc_linux = 0;
2118 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2119 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2120 struct vbsf_reg_info *sf_r;
2121 struct dentry *dentry = VBSF_GET_F_DENTRY(file);
2122 VBOXSFCREATEREQ *pReq;
2123
2124 SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
2125 BUG_ON(!sf_g);
2126 BUG_ON(!sf_i);
2127
2128 sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
2129 if (!sf_r) {
2130 LogRelFunc(("could not allocate reg info\n"));
2131 return -ENOMEM;
2132 }
2133
2134 RTListInit(&sf_r->Handle.Entry);
2135 sf_r->Handle.cRefs = 1;
2136 sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC;
2137 sf_r->Handle.hHost = SHFL_HANDLE_NIL;
2138
2139 /* Already open? */
2140 if (sf_i->handle != SHFL_HANDLE_NIL) {
2141 /*
2142 * This inode was created with vbsf_create_worker(). Check the CreateFlags:
2143 * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
2144 * about the access flags (SHFL_CF_ACCESS_*).
2145 */
2146 sf_i->force_restat = 1;
2147 sf_r->Handle.hHost = sf_i->handle;
2148 sf_i->handle = SHFL_HANDLE_NIL;
2149 file->private_data = sf_r;
2150
2151 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; /** @todo fix */
2152 vbsf_handle_append(sf_i, &sf_r->Handle);
2153 SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
2154 return 0;
2155 }
2156
2157 pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
2158 if (!pReq) {
2159 kfree(sf_r);
2160 LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
2161 return -ENOMEM;
2162 }
2163 memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
2164 RT_ZERO(pReq->CreateParms);
2165 pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
2166
2167 /* We check the value of pReq->CreateParms.Handle afterwards to
2168 * find out if the call succeeded or failed, as the API does not seem
2169 * to cleanly distinguish error and informational messages.
2170 *
2171 * Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL
2172 * to make the shared folders host service use our fMode parameter */
2173
2174 /* We ignore O_EXCL, as the Linux kernel seems to call create
2175 beforehand itself, so O_EXCL should always fail. */
2176 pReq->CreateParms.CreateFlags = vbsf_linux_oflags_to_vbox(file->f_flags & ~O_EXCL, &sf_r->Handle.fFlags, __FUNCTION__);
2177 pReq->CreateParms.Info.Attr.fMode = inode->i_mode;
2178 LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n",
2179 sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags));
2180 rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
2181 if (RT_FAILURE(rc)) {
2182 LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc));
2183 kfree(sf_r);
2184 VbglR0PhysHeapFree(pReq);
2185 return -RTErrConvertToErrno(rc);
2186 }
2187
2188 if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) {
2189 vbsf_dentry_chain_increase_ttl(dentry);
2190 rc_linux = 0;
2191 } else {
2192 switch (pReq->CreateParms.Result) {
2193 case SHFL_PATH_NOT_FOUND:
2194 rc_linux = -ENOENT;
2195 break;
2196 case SHFL_FILE_NOT_FOUND:
2197 /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
2198 rc_linux = -ENOENT;
2199 break;
2200 case SHFL_FILE_EXISTS:
2201 vbsf_dentry_chain_increase_ttl(dentry);
2202 rc_linux = -EEXIST;
2203 break;
2204 default:
2205 vbsf_dentry_chain_increase_parent_ttl(dentry);
2206 rc_linux = 0;
2207 break;
2208 }
2209 }
2210
2211 sf_i->force_restat = 1; /** @todo Why?!? */
2212 sf_r->Handle.hHost = pReq->CreateParms.Handle;
2213 file->private_data = sf_r;
2214 vbsf_handle_append(sf_i, &sf_r->Handle);
2215 VbglR0PhysHeapFree(pReq);
2216 SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
2217 return rc_linux;
2218}
2219
2220
2221/**
2222 * Close a regular file.
2223 *
2224 * @param inode the inode
2225 * @param file the file
2226 * @returns 0 on success, Linux error code otherwise
2227 */
2228static int vbsf_reg_release(struct inode *inode, struct file *file)
2229{
2230 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2231 struct vbsf_reg_info *sf_r = file->private_data;
2232
2233 SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
2234 if (sf_r) {
2235 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2236 Assert(sf_g);
2237
2238#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
2239 /* See the smbfs source (file.c). mmap in particular can cause data to be
2240 * written to the file after it is closed, which we can't cope with. We
2241 * copy and paste the body of filemap_write_and_wait() here as it was not
2242 * defined before 2.6.6 and not exported until quite a bit later. */
2243 /* filemap_write_and_wait(inode->i_mapping); */
2244 if (inode->i_mapping->nrpages
2245 && filemap_fdatawrite(inode->i_mapping) != -EIO)
2246 filemap_fdatawait(inode->i_mapping);
2247#endif
2248
2249 /* Release sf_r, closing the handle if we're the last user. */
2250 file->private_data = NULL;
2251 vbsf_handle_release(&sf_r->Handle, sf_g, "vbsf_reg_release");
2252
2253 sf_i->handle = SHFL_HANDLE_NIL;
2254 }
2255 return 0;
2256}
2257
2258/**
2259 * Wrapper around generic/default seek function that ensures that we've got
2260 * the up-to-date file size when doing anything relative to EOF.
2261 *
2262 * The issue is that the host may extend the file while we weren't looking and
2263 * if the caller wishes to append data, it may end up overwriting existing data
2264 * if we operate with a stale size. So, we always retrieve the file size on EOF
2265 * relative seeks.
2266 */
2267static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
2268{
2269 SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
2270
2271 switch (whence) {
2272#ifdef SEEK_HOLE
2273 case SEEK_HOLE:
2274 case SEEK_DATA:
2275#endif
2276 case SEEK_END: {
2277 struct vbsf_reg_info *sf_r = file->private_data;
2278 int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost,
2279 true /*fForce*/, false /*fInodeLocked*/);
2280 if (rc == 0)
2281 break;
2282 return rc;
2283 }
2284 }
2285
2286#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
2287 return generic_file_llseek(file, off, whence);
2288#else
2289 return default_llseek(file, off, whence);
2290#endif
2291}
2292
2293/**
2294 * Flush region of file - chiefly mmap/msync.
2295 *
2296 * We cannot use the noop_fsync / simple_sync_file here as that means
2297 * msync(,,MS_SYNC) will return before the data hits the host, thereby
2298 * causing coherency issues with O_DIRECT access to the same file as
2299 * well as any host interaction with the file.
2300 */
2301#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
2302static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2303{
2304# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2305 return __generic_file_fsync(file, start, end, datasync);
2306# else
2307 return generic_file_fsync(file, start, end, datasync);
2308# endif
2309}
2310#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
2311static int vbsf_reg_fsync(struct file *file, int datasync)
2312{
2313 return generic_file_fsync(file, datasync);
2314}
2315#else /* < 2.6.35 */
2316static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
2317{
2318# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
2319 return simple_fsync(file, dentry, datasync);
2320# else
2321 int rc;
2322 struct inode *inode = dentry->d_inode;
2323 AssertReturn(inode, -EINVAL);
2324
2325 /** @todo What about file_fsync()? (<= 2.5.11) */
2326
2327# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
2328 rc = sync_mapping_buffers(inode->i_mapping);
2329 if ( rc == 0
2330 && (inode->i_state & I_DIRTY)
2331 && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
2332 ) {
2333 struct writeback_control wbc = {
2334 .sync_mode = WB_SYNC_ALL,
2335 .nr_to_write = 0
2336 };
2337 rc = sync_inode(inode, &wbc);
2338 }
2339# else /* < 2.5.12 */
2340 rc = fsync_inode_buffers(inode);
2341# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2342 rc |= fsync_inode_data_buffers(inode);
2343# endif
2344 /** @todo probably need to do more here... */
2345# endif /* < 2.5.12 */
2346 return rc;
2347# endif
2348}
2349#endif /* < 2.6.35 */
2350
2351
2352#ifdef SFLOG_ENABLED
2353/*
2354 * This is just for logging page faults and such.
2355 */
2356
2357/** Pointer to the ops generic_file_mmap returns the first time it's called. */
2358static struct vm_operations_struct const *g_pGenericFileVmOps = NULL;
2359/** Merge of g_LoggingVmOpsTemplate and g_pGenericFileVmOps. */
2360static struct vm_operations_struct g_LoggingVmOps;
2361
2362
2363/* Generic page fault callback: */
2364# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
2365static vm_fault_t vbsf_vmlog_fault(struct vm_fault *vmf)
2366{
2367 vm_fault_t rc;
2368 SFLOGFLOW(("vbsf_vmlog_fault: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address));
2369 rc = g_pGenericFileVmOps->fault(vmf);
2370 SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc));
2371 return rc;
2372}
2373# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2374static int vbsf_vmlog_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2375{
2376 int rc;
2377# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2378 SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->address));
2379# else
2380 SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address));
2381# endif
2382 rc = g_pGenericFileVmOps->fault(vma, vmf);
2383 SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc));
2384 return rc;
2385}
2386# endif
2387
2388
2389/* Special/generic page fault handler: */
2390# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 26)
2391# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 1)
2392static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
2393{
2394 struct page *page;
2395 SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p type=%p:{%#x}\n", vma, address, type, type ? *type : 0));
2396 page = g_pGenericFileVmOps->nopage(vma, address, type);
2397 SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page));
2398 return page;
2399}
2400# else
2401static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int write_access_or_unused)
2402{
2403 struct page *page;
2404 SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p wau=%d\n", vma, address, write_access_or_unused));
2405 page = g_pGenericFileVmOps->nopage(vma, address, write_access_or_unused);
2406 SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page));
2407 return page;
2408}
2409# endif /* < 2.6.26 */
2410
2411
2412/* Special page fault callback for making something writable: */
2413# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
2414static vm_fault_t vbsf_vmlog_page_mkwrite(struct vm_fault *vmf)
2415{
2416 vm_fault_t rc;
2417# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2418 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address));
2419# else
2420 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->virtual_address));
2421# endif
2422 rc = g_pGenericFileVmOps->page_mkwrite(vmf);
2423 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2424 return rc;
2425}
2426# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
2427static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2428{
2429 int rc;
2430 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address));
2431 rc = g_pGenericFileVmOps->page_mkwrite(vma, vmf);
2432 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2433 return rc;
2434}
2435# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
2436static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2437{
2438 int rc;
2439 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p page=%p\n", vma, page));
2440 rc = g_pGenericFileVmOps->page_mkwrite(vma, page);
2441 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2442 return rc;
2443}
2444# endif
2445
2446
2447/* Special page fault callback for mapping pages: */
2448# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2449static void vbsf_vmlog_map_pages(struct vm_fault *vmf, pgoff_t start, pgoff_t end)
2450{
2451 SFLOGFLOW(("vbsf_vmlog_map_pages: vmf=%p (flags=%#x addr=%p) start=%p end=%p\n", vmf, vmf->flags, vmf->address, start, end));
2452 g_pGenericFileVmOps->map_pages(vmf, start, end);
2453 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2454}
2455# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
2456static void vbsf_vmlog_map_pages(struct fault_env *fenv, pgoff_t start, pgoff_t end)
2457{
2458 SFLOGFLOW(("vbsf_vmlog_map_pages: fenv=%p (flags=%#x addr=%p) start=%p end=%p\n", fenv, fenv->flags, fenv->address, start, end));
2459 g_pGenericFileVmOps->map_pages(fenv, start, end);
2460 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2461}
2462# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
2463static void vbsf_vmlog_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
2464{
2465 SFLOGFLOW(("vbsf_vmlog_map_pages: vma=%p vmf=%p (flags=%#x addr=%p)\n", vma, vmf, vmf->flags, vmf->virtual_address));
2466 g_pGenericFileVmOps->map_pages(vma, vmf);
2467 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2468}
2469# endif
2470
2471
2472/** Overload template. */
2473static struct vm_operations_struct const g_LoggingVmOpsTemplate = {
2474# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2475 .fault = vbsf_vmlog_fault,
2476# endif
2477# if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 25)
2478 .nopage = vbsf_vmlog_nopage,
2479# endif
2480# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
2481 .page_mkwrite = vbsf_vmlog_page_mkwrite,
2482# endif
2483# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
2484 .map_pages = vbsf_vmlog_map_pages,
2485# endif
2486};
2487
2488/** file_operations::mmap wrapper for logging purposes. */
2489extern int vbsf_reg_mmap(struct file *file, struct vm_area_struct *vma)
2490{
2491 int rc;
2492 SFLOGFLOW(("vbsf_reg_mmap: file=%p vma=%p\n", file, vma));
2493 rc = generic_file_mmap(file, vma);
2494 if (rc == 0) {
2495 /* Merge the ops and template the first time thru (there's a race here). */
2496 if (g_pGenericFileVmOps == NULL) {
2497 uintptr_t const *puSrc1 = (uintptr_t *)vma->vm_ops;
2498 uintptr_t const *puSrc2 = (uintptr_t *)&g_LoggingVmOpsTemplate;
2499 uintptr_t volatile *puDst = (uintptr_t *)&g_LoggingVmOps;
2500 size_t cbLeft = sizeof(g_LoggingVmOps) / sizeof(*puDst);
2501 while (cbLeft-- > 0) {
2502 *puDst = *puSrc2 && *puSrc1 ? *puSrc2 : *puSrc1;
2503 puSrc1++;
2504 puSrc2++;
2505 puDst++;
2506 }
2507 g_pGenericFileVmOps = vma->vm_ops;
2508 vma->vm_ops = &g_LoggingVmOps;
2509 } else if (g_pGenericFileVmOps == vma->vm_ops)
2510 vma->vm_ops = &g_LoggingVmOps;
2511 else
2512 SFLOGFLOW(("vbsf_reg_mmap: Warning: vm_ops=%p, expected %p!\n", vma->vm_ops, g_pGenericFileVmOps));
2513 }
2514 SFLOGFLOW(("vbsf_reg_mmap: returns %d\n", rc));
2515 return rc;
2516}
2517
2518#endif /* SFLOG_ENABLED */
2519
2520
2521/**
2522 * File operations for regular files.
2523 */
2524struct file_operations vbsf_reg_fops = {
2525 .open = vbsf_reg_open,
2526 .read = vbsf_reg_read,
2527 .write = vbsf_reg_write,
2528#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2529 .read_iter = vbsf_reg_read_iter,
2530 .write_iter = vbsf_reg_write_iter,
2531#endif
2532 .release = vbsf_reg_release,
2533#ifdef SFLOG_ENABLED
2534 .mmap = vbsf_reg_mmap,
2535#else
2536 .mmap = generic_file_mmap,
2537#endif
2538#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
2539# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
2540/** @todo This code is known to cause caching of data which should not be
2541 * cached. Investigate. */
2542# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2543 .splice_read = vbsf_splice_read,
2544# else
2545 .sendfile = generic_file_sendfile,
2546# endif
2547 .aio_read = generic_file_aio_read,
2548 .aio_write = generic_file_aio_write,
2549# endif
2550#endif
2551 .llseek = vbsf_reg_llseek,
2552 .fsync = vbsf_reg_fsync,
2553};
2554
2555struct inode_operations vbsf_reg_iops = {
2556#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 18)
2557 .getattr = vbsf_inode_getattr,
2558#else
2559 .revalidate = vbsf_inode_revalidate,
2560#endif
2561 .setattr = vbsf_inode_setattr,
2562};
2563
2564
2565#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
2566
2567/**
2568 * Used to read the content of a page into the page cache.
2569 *
2570 * Needed for mmap and reads+writes when the file is mmapped in a
2571 * shared+writeable fashion.
2572 */
2573static int vbsf_readpage(struct file *file, struct page *page)
2574{
2575 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
2576 int err;
2577
2578 SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
2579 Assert(PageLocked(page));
2580
2581 if (PageUptodate(page)) {
2582 unlock_page(page);
2583 return 0;
2584 }
2585
2586 if (!is_bad_inode(inode)) {
2587 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
2588 if (pReq) {
2589 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2590 struct vbsf_reg_info *sf_r = file->private_data;
2591 uint32_t cbRead;
2592 int vrc;
2593
2594 pReq->PgLst.offFirstPage = 0;
2595 pReq->PgLst.aPages[0] = page_to_phys(page);
2596 vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
2597 pReq,
2598 sf_r->Handle.hHost,
2599 (uint64_t)page->index << PAGE_SHIFT,
2600 PAGE_SIZE,
2601 1 /*cPages*/);
2602
2603 cbRead = pReq->Parms.cb32Read.u.value32;
2604 AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
2605 VbglR0PhysHeapFree(pReq);
2606
2607 if (RT_SUCCESS(vrc)) {
2608 if (cbRead == PAGE_SIZE) {
2609 /* likely */
2610 } else {
2611 uint8_t *pbMapped = (uint8_t *)kmap(page);
2612 RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
2613 kunmap(page);
2614 /** @todo truncate the inode file size? */
2615 }
2616
2617 flush_dcache_page(page);
2618 SetPageUptodate(page);
2619 unlock_page(page);
2620 return 0;
2621 }
2622 err = -RTErrConvertToErrno(vrc);
2623 } else
2624 err = -ENOMEM;
2625 } else
2626 err = -EIO;
2627 SetPageError(page);
2628 unlock_page(page);
2629 return err;
2630}
2631
2632
2633/**
2634 * Used to write out the content of a dirty page cache page to the host file.
2635 *
2636 * Needed for mmap and writes when the file is mmapped in a shared+writeable
2637 * fashion.
2638 */
2639static int vbsf_writepage(struct page *page, struct writeback_control *wbc)
2640{
2641 struct address_space *mapping = page->mapping;
2642 struct inode *inode = mapping->host;
2643 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2644 struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND);
2645 int err;
2646
2647 SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
2648 inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
2649
2650 if (pHandle) {
2651 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2652 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
2653 if (pReq) {
2654 uint64_t const cbFile = i_size_read(inode);
2655 uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
2656 uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
2657 : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
2658 int vrc;
2659
2660 pReq->PgLst.offFirstPage = 0;
2661 pReq->PgLst.aPages[0] = page_to_phys(page);
2662 vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
2663 pReq,
2664 pHandle->hHost,
2665 offInFile,
2666 cbToWrite,
2667 1 /*cPages*/);
2668 AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
2669 ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
2670 vrc = VERR_WRITE_ERROR);
2671 VbglR0PhysHeapFree(pReq);
2672
2673 if (RT_SUCCESS(vrc)) {
2674 /* Update the inode if we've extended the file. */
2675 /** @todo is this necessary given the cbToWrite calc above? */
2676 uint64_t const offEndOfWrite = offInFile + cbToWrite;
2677 if ( offEndOfWrite > cbFile
2678 && offEndOfWrite > i_size_read(inode))
2679 i_size_write(inode, offEndOfWrite);
2680
2681 if (PageError(page))
2682 ClearPageError(page);
2683
2684 err = 0;
2685 } else {
2686 ClearPageUptodate(page);
2687 err = -EPROTO;
2688 }
2689 } else
2690 err = -ENOMEM;
2691 vbsf_handle_release(pHandle, sf_g, "vbsf_writepage");
2692 } else {
2693 static uint64_t volatile s_cCalls = 0;
2694 if (s_cCalls++ < 16)
2695 printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
2696 err = -EPROTO;
2697 }
2698 unlock_page(page);
2699 return err;
2700}
2701
2702# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
2703/**
2704 * Called when writing thru the page cache (which we shouldn't be doing).
2705 */
2706int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
2707 unsigned len, unsigned flags, struct page **pagep, void **fsdata)
2708{
2709 /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
2710 * the page cache for any writes AFAIK. We could just as well use
2711 * simple_write_begin & simple_write_end here if we think we really
2712 * need to have non-NULL function pointers in the table... */
2713 static uint64_t volatile s_cCalls = 0;
2714 if (s_cCalls++ < 16) {
2715 printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
2716 (unsigned long long)pos, len, flags);
2717 RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
2718 (unsigned long long)pos, len, flags);
2719# ifdef WARN_ON
2720 WARN_ON(1);
2721# endif
2722 }
2723 return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
2724}
2725# endif /* KERNEL_VERSION >= 2.6.24 */
2726
2727# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2728/**
2729 * This is needed to make open accept O_DIRECT as well as dealing with direct
2730 * I/O requests if we don't intercept them earlier.
2731 */
2732# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
2733static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2734# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
2735static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2736# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2737static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2738# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
2739static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2740# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
2741static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2742# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
2743static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2744# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
2745static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2746# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
2747static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
2748# else
2749static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *, unsigned long, int)
2750# endif
2751{
2752 TRACE();
2753 return -EINVAL;
2754}
2755# endif
2756
2757/**
2758 * Address space (for the page cache) operations for regular files.
2759 *
2760 * @todo the FsPerf touch/flush (mmap) test fails on 4.4.0 (ubuntu 16.04 lts).
2761 */
2762struct address_space_operations vbsf_reg_aops = {
2763 .readpage = vbsf_readpage,
2764 .writepage = vbsf_writepage,
2765 /** @todo Need .writepages if we want msync performance... */
2766# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
2767 .set_page_dirty = __set_page_dirty_buffers,
2768# endif
2769# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
2770 .write_begin = vbsf_write_begin,
2771 .write_end = simple_write_end,
2772# else
2773 .prepare_write = simple_prepare_write,
2774 .commit_write = simple_commit_write,
2775# endif
2776# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2777 .direct_IO = vbsf_direct_IO,
2778# endif
2779};
2780
2781#endif /* LINUX_VERSION_CODE >= 2.6.0 */
2782
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette