VirtualBox

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77741

Last change on this file since 77741 was 77741, checked in by vboxsync, 6 years ago

linux/vboxsf: Fixed nls conversion issue (don't try convert the zero terminator, at least not on 2.6.8). Hacked around missing invalidate_mapping_pages symbol in kernels before 2.6.21.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.7 KB
Line 
1/* $Id: regops.c 77741 2019-03-17 03:42:04Z vboxsync $ */
2/** @file
3 * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
16 * conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 */
30
31
32/*********************************************************************************************************************************
33* Header Files *
34*********************************************************************************************************************************/
35#include "vfsmod.h"
36#include <linux/uio.h>
37#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
38# include <linux/aio.h> /* struct kiocb before 4.1 */
39#endif
40#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41# include <linux/buffer_head.h>
42#endif
43#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
44 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
45# include <linux/writeback.h>
46#endif
47#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
48 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
49# include <linux/splice.h>
50#endif
51#include <iprt/err.h>
52
53#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
54# define SEEK_END 2
55#endif
56
57#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
58# define iter_is_iovec(a_pIter) ( !((a_pIter)->type & (ITER_KVEC | ITER_BVEC)) )
59#endif
60
61#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0)
62# define vm_fault_t int
63#endif
64
65
66/*********************************************************************************************************************************
67* Structures and Typedefs *
68*********************************************************************************************************************************/
69#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
70/** Used by vbsf_iter_lock_pages() to keep the first page of the next segment. */
71struct vbsf_iter_stash {
72 struct page *pPage;
73 size_t off;
74 size_t cb;
75# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
76 size_t offFromEnd;
77 struct iov_iter Copy;
78# endif
79};
80#endif /* >= 3.16.0 */
81/** Initializer for struct vbsf_iter_stash. */
82#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
83# define VBSF_ITER_STASH_INITIALIZER { NULL, 0 }
84#else
85# define VBSF_ITER_STASH_INITIALIZER { NULL, 0, ~(size_t)0 }
86#endif
87
88
89
90/**
91 * Called when an inode is released to unlink all handles that might impossibly
92 * still be associated with it.
93 *
94 * @param pInodeInfo The inode which handles to drop.
95 */
96void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
97{
98 struct vbsf_handle *pCur, *pNext;
99 unsigned long fSavedFlags;
100 SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
101 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
102
103 RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) {
104 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
105 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
106 pCur->fFlags |= VBSF_HANDLE_F_ON_LIST;
107 RTListNodeRemove(&pCur->Entry);
108 }
109
110 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
111}
112
113
114/**
115 * Locates a handle that matches all the flags in @a fFlags.
116 *
117 * @returns Pointer to handle on success (retained), use vbsf_handle_release() to
118 * release it. NULL if no suitable handle was found.
119 * @param pInodeInfo The inode info to search.
120 * @param fFlagsSet The flags that must be set.
121 * @param fFlagsClear The flags that must be clear.
122 */
123struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
124{
125 struct vbsf_handle *pCur;
126 unsigned long fSavedFlags;
127 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
128
129 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
130 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
131 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
132 if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
133 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
134 if (cRefs > 1) {
135 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
136 SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
137 return pCur;
138 }
139 /* Oops, already being closed (safe as it's only ever increased here). */
140 ASMAtomicDecU32(&pCur->cRefs);
141 }
142 }
143
144 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
145 SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
146 return NULL;
147}
148
149
150/**
151 * Slow worker for vbsf_handle_release() that does the freeing.
152 *
153 * @returns 0 (ref count).
154 * @param pHandle The handle to release.
155 * @param sf_g The info structure for the shared folder associated
156 * with the handle.
157 * @param pszCaller The caller name (for logging failures).
158 */
159uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *sf_g, const char *pszCaller)
160{
161 int rc;
162 unsigned long fSavedFlags;
163
164 SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
165
166 /*
167 * Remove from the list.
168 */
169 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
170
171 AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
172 Assert(pHandle->pInodeInfo);
173 Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
174
175 if (pHandle->fFlags & VBSF_HANDLE_F_ON_LIST) {
176 pHandle->fFlags &= ~VBSF_HANDLE_F_ON_LIST;
177 RTListNodeRemove(&pHandle->Entry);
178 }
179
180 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
181
182 /*
183 * Actually destroy it.
184 */
185 rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
186 if (RT_FAILURE(rc))
187 LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
188 pHandle->hHost = SHFL_HANDLE_NIL;
189 pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD;
190 kfree(pHandle);
191 return 0;
192}
193
194
195/**
196 * Appends a handle to a handle list.
197 *
198 * @param pInodeInfo The inode to add it to.
199 * @param pHandle The handle to add.
200 */
201void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle)
202{
203#ifdef VBOX_STRICT
204 struct vbsf_handle *pCur;
205#endif
206 unsigned long fSavedFlags;
207
208 SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
209 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
210 ("%p %#x\n", pHandle, pHandle->fFlags));
211 Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
212
213 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
214
215 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
216 ("%p %#x\n", pHandle, pHandle->fFlags));
217#ifdef VBOX_STRICT
218 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
219 Assert(pCur != pHandle);
220 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
221 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
222 }
223 pHandle->pInodeInfo = pInodeInfo;
224#endif
225
226 pHandle->fFlags |= VBSF_HANDLE_F_ON_LIST;
227 RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
228
229 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
230}
231
232
233#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
234 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
235
236/*
237 * Some pipe stuff we apparently need for 2.6.23-2.6.30.
238 */
239
240static void vbsf_free_pipebuf(struct page *kpage)
241{
242 kunmap(kpage);
243 __free_pages(kpage, 0);
244}
245
246static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, int atomic)
247{
248 return 0;
249}
250
251static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
252{
253}
254
255static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, void *map_data)
256{
257}
258
259static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
260{
261 return 0;
262}
263
264static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
265{
266 vbsf_free_pipebuf(pipe_buf->page);
267}
268
269static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pipe_buf)
270{
271 return 0;
272}
273
274static struct pipe_buf_operations vbsf_pipe_buf_ops = {
275 .can_merge = 0,
276 .map = vbsf_pipe_buf_map,
277 .unmap = vbsf_pipe_buf_unmap,
278 .confirm = vbsf_pipe_buf_confirm,
279 .release = vbsf_pipe_buf_release,
280 .steal = vbsf_pipe_buf_steal,
281 .get = vbsf_pipe_buf_get,
282};
283
284static int vbsf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
285 void *buf, uint32_t *nread, uint64_t pos)
286{
287 int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
288 if (RT_FAILURE(rc)) {
289 LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
290 rc));
291 return -EPROTO;
292 }
293 return 0;
294}
295
296# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
297# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
298
299ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
300{
301 size_t bytes_remaining = len;
302 loff_t orig_offset = *poffset;
303 loff_t offset = orig_offset;
304 struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
305 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
306 struct vbsf_reg_info *sf_r = in->private_data;
307 ssize_t retval;
308 struct page *kpage = 0;
309 size_t nsent = 0;
310
311/** @todo rig up a FsPerf test for this code */
312 TRACE();
313 if (!S_ISREG(inode->i_mode)) {
314 LogFunc(("read from non regular file %d\n", inode->i_mode));
315 return -EINVAL;
316 }
317 if (!len) {
318 return 0;
319 }
320
321 LOCK_PIPE(pipe);
322
323 uint32_t req_size = 0;
324 while (bytes_remaining > 0) {
325 kpage = alloc_page(GFP_KERNEL);
326 if (unlikely(kpage == NULL)) {
327 UNLOCK_PIPE(pipe);
328 return -ENOMEM;
329 }
330 req_size = 0;
331 uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
332 uint32_t chunk = 0;
333 void *kbuf = kmap(kpage);
334 while (chunk < req_size) {
335 retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
336 if (retval < 0)
337 goto err;
338 if (nread == 0)
339 break;
340 chunk += nread;
341 offset += nread;
342 nread = req_size - chunk;
343 }
344 if (!pipe->readers) {
345 send_sig(SIGPIPE, current, 0);
346 retval = -EPIPE;
347 goto err;
348 }
349 if (pipe->nrbufs < PIPE_BUFFERS) {
350 struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
351 pipebuf->page = kpage;
352 pipebuf->ops = &vbsf_pipe_buf_ops;
353 pipebuf->len = req_size;
354 pipebuf->offset = 0;
355 pipebuf->private = 0;
356 pipebuf->flags = 0;
357 pipe->nrbufs++;
358 nsent += req_size;
359 bytes_remaining -= req_size;
360 if (signal_pending(current))
361 break;
362 } else { /* pipe full */
363
364 if (flags & SPLICE_F_NONBLOCK) {
365 retval = -EAGAIN;
366 goto err;
367 }
368 vbsf_free_pipebuf(kpage);
369 break;
370 }
371 }
372 UNLOCK_PIPE(pipe);
373 if (!nsent && signal_pending(current))
374 return -ERESTARTSYS;
375 *poffset += nsent;
376 return offset - orig_offset;
377
378 err:
379 UNLOCK_PIPE(pipe);
380 vbsf_free_pipebuf(kpage);
381 return retval;
382}
383
384#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
385
386/**
387 * Helper for deciding wheter we should do a read via the page cache or not.
388 *
389 * By default we will only use the page cache if there is a writable memory
390 * mapping of the file with a chance that it may have modified any of the pages
391 * already.
392 */
393DECLINLINE(bool) vbsf_should_use_cached_read(struct file *file, struct address_space *mapping, struct vbsf_super_info *sf_g)
394{
395 return mapping
396 && mapping->nrpages > 0
397 && mapping_writably_mapped(mapping)
398 && !(file->f_flags & O_DIRECT)
399 && 1 /** @todo make this behaviour configurable at mount time (sf_g) */;
400}
401
402/** Wrapper around put_page / page_cache_release. */
403DECLINLINE(void) vbsf_put_page(struct page *pPage)
404{
405#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
406 put_page(pPage);
407#else
408 page_cache_release(pPage);
409#endif
410}
411
412
413/** Wrapper around get_page / page_cache_get. */
414DECLINLINE(void) vbsf_get_page(struct page *pPage)
415{
416#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
417 get_page(pPage);
418#else
419 page_cache_get(pPage);
420#endif
421}
422
423
424/** Companion to vbsf_lock_user_pages(). */
425DECLINLINE(void) vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack)
426{
427 /* We don't mark kernel pages dirty: */
428 if (fLockPgHack)
429 fSetDirty = false;
430
431 while (cPages-- > 0)
432 {
433 struct page *pPage = papPages[cPages];
434 if (fSetDirty && !PageReserved(pPage))
435 SetPageDirty(pPage);
436 vbsf_put_page(pPage);
437 }
438}
439
440
441/**
442 * Worker for vbsf_lock_user_pages_failed_check_kernel() and
443 * vbsf_iter_lock_pages().
444 */
445static int vbsf_lock_kernel_pages(uint8_t *pbStart, bool fWrite, size_t cPages, struct page **papPages)
446{
447 uintptr_t const uPtrFrom = (uintptr_t)pbStart;
448 uintptr_t const uPtrLast = (uPtrFrom & ~(uintptr_t)PAGE_OFFSET_MASK) + (cPages << PAGE_SHIFT) - 1;
449 uint8_t *pbPage = (uint8_t *)uPtrLast;
450 size_t iPage = cPages;
451
452 /*
453 * Touch the pages first (paranoia^2).
454 */
455 if (fWrite) {
456 uint8_t volatile *pbProbe = (uint8_t volatile *)uPtrFrom;
457 while (iPage-- > 0) {
458 *pbProbe = *pbProbe;
459 pbProbe += PAGE_SIZE;
460 }
461 } else {
462 uint8_t const *pbProbe = (uint8_t const *)uPtrFrom;
463 while (iPage-- > 0) {
464 ASMProbeReadByte(pbProbe);
465 pbProbe += PAGE_SIZE;
466 }
467 }
468
469 /*
470 * Get the pages.
471 * Note! Fixes here probably applies to rtR0MemObjNativeLockKernel as well.
472 */
473 iPage = cPages;
474 if ( uPtrFrom >= (unsigned long)__va(0)
475 && uPtrLast < (unsigned long)high_memory) {
476 /* The physical page mapping area: */
477 while (iPage-- > 0) {
478 struct page *pPage = papPages[iPage] = virt_to_page(pbPage);
479 vbsf_get_page(pPage);
480 pbPage -= PAGE_SIZE;
481 }
482 } else {
483 /* This is vmalloc or some such thing, so go thru page tables: */
484 while (iPage-- > 0) {
485 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
486 if (pPage) {
487 papPages[iPage] = pPage;
488 vbsf_get_page(pPage);
489 pbPage -= PAGE_SIZE;
490 } else {
491 while (++iPage < cPages) {
492 pPage = papPages[iPage];
493 vbsf_put_page(pPage);
494 }
495 return -EFAULT;
496 }
497 }
498 }
499 return 0;
500}
501
502
503/**
504 * Catches kernel_read() and kernel_write() calls and works around them.
505 *
506 * The file_operations::read and file_operations::write callbacks supposedly
507 * hands us the user buffers to read into and write out of. To allow the kernel
508 * to read and write without allocating buffers in userland, they kernel_read()
509 * and kernel_write() increases the user space address limit before calling us
510 * so that copyin/copyout won't reject it. Our problem is that get_user_pages()
511 * works on the userspace address space structures and will not be fooled by an
512 * increased addr_limit.
513 *
514 * This code tries to detect this situation and fake get_user_lock() for the
515 * kernel buffer.
516 */
517static int vbsf_lock_user_pages_failed_check_kernel(uintptr_t uPtrFrom, size_t cPages, bool fWrite, int rcFailed,
518 struct page **papPages, bool *pfLockPgHack)
519{
520 /*
521 * Check that this is valid user memory that is actually in the kernel range.
522 */
523#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
524 if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT)
525 && uPtrFrom >= USER_DS.seg)
526#else
527 if ( access_ok(fWrite ? VERIFY_WRITE : VERIFY_READ, (void *)uPtrFrom, cPages << PAGE_SHIFT)
528 && uPtrFrom >= USER_DS.seg)
529#endif
530 {
531 int rc = vbsf_lock_kernel_pages((uint8_t *)uPtrFrom, fWrite, cPages, papPages);
532 if (rc == 0) {
533 *pfLockPgHack = true;
534 return 0;
535 }
536 }
537
538 return rcFailed;
539}
540
541
542/** Wrapper around get_user_pages. */
543DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages, bool *pfLockPgHack)
544{
545# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
546 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
547 fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
548# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
549 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
550# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
551 ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
552# else
553 struct task_struct *pTask = current;
554 size_t cPagesLocked;
555 down_read(&pTask->mm->mmap_sem);
556 cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
557 up_read(&pTask->mm->mmap_sem);
558# endif
559 *pfLockPgHack = false;
560 if (cPagesLocked == cPages)
561 return 0;
562
563 /*
564 * It failed.
565 */
566 if (cPagesLocked < 0)
567 return vbsf_lock_user_pages_failed_check_kernel(uPtrFrom, cPages, fWrite, (int)cPagesLocked, papPages, pfLockPgHack);
568
569 vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/);
570
571 /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
572 return -EFAULT;
573}
574
575
576/**
577 * Read function used when accessing files that are memory mapped.
578 *
579 * We read from the page cache here to present the a cohertent picture of the
580 * the file content.
581 */
582static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
583{
584#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
585 struct iovec iov = { .iov_base = buf, .iov_len = size };
586 struct iov_iter iter;
587 struct kiocb kiocb;
588 ssize_t cbRet;
589
590 init_sync_kiocb(&kiocb, file);
591 kiocb.ki_pos = *off;
592 iov_iter_init(&iter, READ, &iov, 1, size);
593
594 cbRet = generic_file_read_iter(&kiocb, &iter);
595
596 *off = kiocb.ki_pos;
597 return cbRet;
598
599#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
600 struct iovec iov = { .iov_base = buf, .iov_len = size };
601 struct kiocb kiocb;
602 ssize_t cbRet;
603
604 init_sync_kiocb(&kiocb, file);
605 kiocb.ki_pos = *off;
606
607 cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
608 if (cbRet == -EIOCBQUEUED)
609 cbRet = wait_on_sync_kiocb(&kiocb);
610
611 *off = kiocb.ki_pos;
612 return cbRet;
613
614#else /* 2.6.18 or earlier: */
615 return generic_file_read(file, buf, size, off);
616#endif
617}
618
619
620/**
621 * Fallback case of vbsf_reg_read() that locks the user buffers and let the host
622 * write directly to them.
623 */
624static ssize_t vbsf_reg_read_locking(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
625 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
626{
627 /*
628 * Lock pages and execute the read, taking care not to pass the host
629 * more than it can handle in one go or more than we care to allocate
630 * page arrays for. The latter limit is set at just short of 32KB due
631 * to how the physical heap works.
632 */
633 struct page *apPagesStack[16];
634 struct page **papPages = &apPagesStack[0];
635 struct page **papPagesFree = NULL;
636 VBOXSFREADPGLSTREQ *pReq;
637 loff_t offFile = *off;
638 ssize_t cbRet = -ENOMEM;
639 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
640 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
641 bool fLockPgHack;
642
643 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
644 while (!pReq && cMaxPages > 4) {
645 cMaxPages /= 2;
646 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
647 }
648 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
649 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
650 if (pReq && papPages) {
651 cbRet = 0;
652 for (;;) {
653 /*
654 * Figure out how much to process now and lock the user pages.
655 */
656 int rc;
657 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
658 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
659 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
660 if (cPages <= cMaxPages)
661 cbChunk = size;
662 else {
663 cPages = cMaxPages;
664 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
665 }
666
667 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages, &fLockPgHack);
668 if (rc == 0) {
669 size_t iPage = cPages;
670 while (iPage-- > 0)
671 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
672 } else {
673 cbRet = rc;
674 break;
675 }
676
677 /*
678 * Issue the request and unlock the pages.
679 */
680 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
681
682 vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/, fLockPgHack);
683
684 if (RT_SUCCESS(rc)) {
685 /*
686 * Success, advance position and buffer.
687 */
688 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
689 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
690 cbRet += cbActual;
691 offFile += cbActual;
692 buf = (uint8_t *)buf + cbActual;
693 size -= cbActual;
694
695 /*
696 * Are we done already? If so commit the new file offset.
697 */
698 if (!size || cbActual < cbChunk) {
699 *off = offFile;
700 break;
701 }
702 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
703 /*
704 * The host probably doesn't have enough heap to handle the
705 * request, reduce the page count and retry.
706 */
707 cMaxPages /= 4;
708 Assert(cMaxPages > 0);
709 } else {
710 /*
711 * If we've successfully read stuff, return it rather than
712 * the error. (Not sure if this is such a great idea...)
713 */
714 if (cbRet > 0)
715 *off = offFile;
716 else
717 cbRet = -EPROTO;
718 break;
719 }
720 }
721 }
722 if (papPagesFree)
723 kfree(papPages);
724 if (pReq)
725 VbglR0PhysHeapFree(pReq);
726 return cbRet;
727}
728
729
730/**
731 * Read from a regular file.
732 *
733 * @param file the file
734 * @param buf the buffer
735 * @param size length of the buffer
736 * @param off offset within the file (in/out).
737 * @returns the number of read bytes on success, Linux error code otherwise
738 */
739static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
740{
741 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
742 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
743 struct vbsf_reg_info *sf_r = file->private_data;
744 struct address_space *mapping = inode->i_mapping;
745
746 SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
747
748 if (!S_ISREG(inode->i_mode)) {
749 LogFunc(("read from non regular file %d\n", inode->i_mode));
750 return -EINVAL;
751 }
752
753 /** @todo XXX Check read permission according to inode->i_mode! */
754
755 if (!size)
756 return 0;
757
758 /*
759 * If there is a mapping and O_DIRECT isn't in effect, we must at a
760 * heed dirty pages in the mapping and read from them. For simplicity
761 * though, we just do page cache reading when there are writable
762 * mappings around with any kind of pages loaded.
763 */
764 if (vbsf_should_use_cached_read(file, mapping, sf_g))
765 return vbsf_reg_read_mapped(file, buf, size, off);
766
767 /*
768 * For small requests, try use an embedded buffer provided we get a heap block
769 * that does not cross page boundraries (see host code).
770 */
771 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
772 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
773 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
774 if (pReq) {
775 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
776 ssize_t cbRet;
777 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
778 if (RT_SUCCESS(vrc)) {
779 cbRet = pReq->Parms.cb32Read.u.value32;
780 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
781 if (copy_to_user(buf, pReq->abData, cbRet) == 0)
782 *off += cbRet;
783 else
784 cbRet = -EFAULT;
785 } else
786 cbRet = -EPROTO;
787 VbglR0PhysHeapFree(pReq);
788 return cbRet;
789 }
790 VbglR0PhysHeapFree(pReq);
791 }
792 }
793
794#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
795 /*
796 * For medium sized requests try use a bounce buffer.
797 */
798 if (size <= _64K /** @todo make this configurable? */) {
799 void *pvBounce = kmalloc(size, GFP_KERNEL);
800 if (pvBounce) {
801 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
802 if (pReq) {
803 ssize_t cbRet;
804 int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
805 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
806 if (RT_SUCCESS(vrc)) {
807 cbRet = pReq->Parms.cb32Read.u.value32;
808 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
809 if (copy_to_user(buf, pvBounce, cbRet) == 0)
810 *off += cbRet;
811 else
812 cbRet = -EFAULT;
813 } else
814 cbRet = -EPROTO;
815 VbglR0PhysHeapFree(pReq);
816 kfree(pvBounce);
817 return cbRet;
818 }
819 kfree(pvBounce);
820 }
821 }
822#endif
823
824 return vbsf_reg_read_locking(file, buf, size, off, sf_g, sf_r);
825}
826
827
828/**
829 * Wrapper around invalidate_mapping_pages() for page cache invalidation so that
830 * the changes written via vbsf_reg_write are made visible to mmap users.
831 */
832DECLINLINE(void) vbsf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
833{
834 /*
835 * Only bother with this if the mapping has any pages in it.
836 *
837 * Note! According to the docs, the last parameter, end, is inclusive (we
838 * would have named it 'last' to indicate this).
839 *
840 * Note! The pre-2.6.12 function might not do enough to sure consistency
841 * when any of the pages in the range is already mapped.
842 */
843# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
844 if (mapping)
845 invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
846# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 21)
847 if (mapping && mapping->nrpages > 0)
848 invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
849# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60) && 0 /** @todo invalidate_mapping_pages was added in 2.5.60, but exported in 2.6.21 */
850 if (mapping && mapping->nrpages > 0)
851 invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
852# else
853 /** @todo ... */
854 RT_NOREF(mapping, offStart, offEnd);
855# endif
856}
857
858
859/**
860 * Fallback case of vbsf_reg_write() that locks the user buffers and let the host
861 * write directly to them.
862 */
863static ssize_t vbsf_reg_write_locking(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
864 struct inode *inode, struct vbsf_inode_info *sf_i,
865 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
866{
867 /*
868 * Lock pages and execute the write, taking care not to pass the host
869 * more than it can handle in one go or more than we care to allocate
870 * page arrays for. The latter limit is set at just short of 32KB due
871 * to how the physical heap works.
872 */
873 struct page *apPagesStack[16];
874 struct page **papPages = &apPagesStack[0];
875 struct page **papPagesFree = NULL;
876 VBOXSFWRITEPGLSTREQ *pReq;
877 ssize_t cbRet = -ENOMEM;
878 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
879 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
880 bool fLockPgHack;
881
882 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
883 while (!pReq && cMaxPages > 4) {
884 cMaxPages /= 2;
885 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
886 }
887 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
888 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
889 if (pReq && papPages) {
890 cbRet = 0;
891 for (;;) {
892 /*
893 * Figure out how much to process now and lock the user pages.
894 */
895 int rc;
896 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
897 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
898 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
899 if (cPages <= cMaxPages)
900 cbChunk = size;
901 else {
902 cPages = cMaxPages;
903 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
904 }
905
906 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages, &fLockPgHack);
907 if (rc == 0) {
908 size_t iPage = cPages;
909 while (iPage-- > 0)
910 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
911 } else {
912 cbRet = rc;
913 break;
914 }
915
916 /*
917 * Issue the request and unlock the pages.
918 */
919 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
920
921 vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack);
922
923 if (RT_SUCCESS(rc)) {
924 /*
925 * Success, advance position and buffer.
926 */
927 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
928 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
929 cbRet += cbActual;
930 offFile += cbActual;
931 buf = (uint8_t *)buf + cbActual;
932 size -= cbActual;
933 if (offFile > i_size_read(inode))
934 i_size_write(inode, offFile);
935 vbsf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
936 sf_i->force_restat = 1; /* mtime (and size) may have changed */
937
938 /*
939 * Are we done already? If so commit the new file offset.
940 */
941 if (!size || cbActual < cbChunk) {
942 *off = offFile;
943 break;
944 }
945 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
946 /*
947 * The host probably doesn't have enough heap to handle the
948 * request, reduce the page count and retry.
949 */
950 cMaxPages /= 4;
951 Assert(cMaxPages > 0);
952 } else {
953 /*
954 * If we've successfully written stuff, return it rather than
955 * the error. (Not sure if this is such a great idea...)
956 */
957 if (cbRet > 0)
958 *off = offFile;
959 else
960 cbRet = -EPROTO;
961 break;
962 }
963 }
964 }
965 if (papPagesFree)
966 kfree(papPages);
967 if (pReq)
968 VbglR0PhysHeapFree(pReq);
969 return cbRet;
970}
971
972
973/**
974 * Write to a regular file.
975 *
976 * @param file the file
977 * @param buf the buffer
978 * @param size length of the buffer
979 * @param off offset within the file
980 * @returns the number of written bytes on success, Linux error code otherwise
981 */
982static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off)
983{
984 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
985 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
986 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
987 struct vbsf_reg_info *sf_r = file->private_data;
988 struct address_space *mapping = inode->i_mapping;
989 loff_t pos;
990
991 SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
992 BUG_ON(!sf_i);
993 BUG_ON(!sf_g);
994 BUG_ON(!sf_r);
995 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
996
997 pos = *off;
998 /** @todo This should be handled by the host, it returning the new file
999 * offset when appending. We may have an outdated i_size value here! */
1000 if (file->f_flags & O_APPEND)
1001 pos = i_size_read(inode);
1002
1003 /** @todo XXX Check write permission according to inode->i_mode! */
1004
1005 if (!size) {
1006 if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
1007 *off = pos;
1008 return 0;
1009 }
1010
1011 /*
1012 * If there are active writable mappings, coordinate with any
1013 * pending writes via those.
1014 */
1015 if ( mapping
1016 && mapping->nrpages > 0
1017 && mapping_writably_mapped(mapping)) {
1018#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
1019 int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
1020 if (err)
1021 return err;
1022#else
1023 /** @todo ... */
1024#endif
1025 }
1026
1027 /*
1028 * For small requests, try use an embedded buffer provided we get a heap block
1029 * that does not cross page boundraries (see host code).
1030 */
1031 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
1032 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
1033 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1034 if ( pReq
1035 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1036 ssize_t cbRet;
1037 if (copy_from_user(pReq->abData, buf, size) == 0) {
1038 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
1039 pos, (uint32_t)size);
1040 if (RT_SUCCESS(vrc)) {
1041 cbRet = pReq->Parms.cb32Write.u.value32;
1042 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1043 pos += cbRet;
1044 *off = pos;
1045 if (pos > i_size_read(inode))
1046 i_size_write(inode, pos);
1047 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
1048 } else
1049 cbRet = -EPROTO;
1050 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1051 } else
1052 cbRet = -EFAULT;
1053
1054 VbglR0PhysHeapFree(pReq);
1055 return cbRet;
1056 }
1057 if (pReq)
1058 VbglR0PhysHeapFree(pReq);
1059 }
1060
1061#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
1062 /*
1063 * For medium sized requests try use a bounce buffer.
1064 */
1065 if (size <= _64K /** @todo make this configurable? */) {
1066 void *pvBounce = kmalloc(size, GFP_KERNEL);
1067 if (pvBounce) {
1068 if (copy_from_user(pvBounce, buf, size) == 0) {
1069 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1070 if (pReq) {
1071 ssize_t cbRet;
1072 int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
1073 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
1074 if (RT_SUCCESS(vrc)) {
1075 cbRet = pReq->Parms.cb32Write.u.value32;
1076 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1077 pos += cbRet;
1078 *off = pos;
1079 if (pos > i_size_read(inode))
1080 i_size_write(inode, pos);
1081 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
1082 } else
1083 cbRet = -EPROTO;
1084 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1085 VbglR0PhysHeapFree(pReq);
1086 kfree(pvBounce);
1087 return cbRet;
1088 }
1089 kfree(pvBounce);
1090 } else {
1091 kfree(pvBounce);
1092 return -EFAULT;
1093 }
1094 }
1095 }
1096#endif
1097
1098 return vbsf_reg_write_locking(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
1099}
1100
1101#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
1102/*
1103 * Hide missing uio.h functionality in older kernsl.
1104 */
1105
1106static size_t copy_from_iter(uint8_t *pbDst, size_t cbToCopy, struct iov_iter *pSrcIter)
1107{
1108 size_t const cbTotal = cbToCopy;
1109 Assert(iov_iter_count(pSrcIter) >= cbToCopy);
1110 if (pSrcIter->type & ITER_BVEC) {
1111 while (cbToCopy > 0) {
1112 size_t const offPage = (uintptr_t)pbDst & PAGE_OFFSET_MASK;
1113 size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy);
1114 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbDst);
1115 size_t cbCopied = copy_page_from_iter(pPage, offPage, cbThisCopy, pSrcIter);
1116 AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy);
1117 pbDst += cbCopied;
1118 cbToCopy -= cbCopied;
1119 if (cbCopied != cbToCopy)
1120 break;
1121 }
1122 } else {
1123 while (cbToCopy > 0) {
1124 size_t cbThisCopy = iov_iter_single_seg_count(pSrcIter);
1125 if (cbThisCopy > 0) {
1126 if (cbThisCopy > cbToCopy)
1127 cbThisCopy = cbToCopy;
1128 if (pSrcIter->type & ITER_KVEC)
1129 memcpy(pbDst, (void *)pSrcIter->iov->iov_base + pSrcIter->iov_offset, cbThisCopy);
1130 else if (!copy_from_user(pbDst, pSrcIter->iov->iov_base + pSrcIter->iov_offset, cbThisCopy))
1131 break;
1132 pbDst += cbThisCopy;
1133 cbToCopy -= cbThisCopy;
1134 }
1135 iov_iter_advance(pSrcIter, cbThisCopy);
1136 }
1137 }
1138 return cbTotal - cbToCopy;
1139}
1140
1141static size_t copy_to_iter(uint8_t const *pbSrc, size_t cbToCopy, struct iov_iter *pDstIter)
1142{
1143 size_t const cbTotal = cbToCopy;
1144 Assert(iov_iter_count(pDstIter) >= cbToCopy);
1145 if (pDstIter->type & ITER_BVEC) {
1146 while (cbToCopy > 0) {
1147 size_t const offPage = (uintptr_t)pbSrc & PAGE_OFFSET_MASK;
1148 size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy);
1149 struct page *pPage = rtR0MemObjLinuxVirtToPage((void *)pbSrc);
1150 size_t cbCopied = copy_page_to_iter(pPage, offPage, cbThisCopy, pDstIter);
1151 AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy);
1152 pbSrc += cbCopied;
1153 cbToCopy -= cbCopied;
1154 if (cbCopied != cbToCopy)
1155 break;
1156 }
1157 } else {
1158 while (cbToCopy > 0) {
1159 size_t cbThisCopy = iov_iter_single_seg_count(pDstIter);
1160 if (cbThisCopy > 0) {
1161 if (cbThisCopy > cbToCopy)
1162 cbThisCopy = cbToCopy;
1163 if (pDstIter->type & ITER_KVEC)
1164 memcpy((void *)pDstIter->iov->iov_base + pDstIter->iov_offset, pbSrc, cbThisCopy);
1165 else if (!copy_to_user(pDstIter->iov->iov_base + pDstIter->iov_offset, pbSrc, cbThisCopy)) {
1166 break;
1167 }
1168 pbSrc += cbThisCopy;
1169 cbToCopy -= cbThisCopy;
1170 }
1171 iov_iter_advance(pDstIter, cbThisCopy);
1172 }
1173 }
1174 return cbTotal - cbToCopy;
1175}
1176
1177#endif /* 3.16.0 >= linux < 3.18.0 */
1178#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1179
1180/**
1181 * Companion to vbsf_iter_lock_pages().
1182 */
1183DECLINLINE(void) vbsf_iter_unlock_pages(struct iov_iter *iter, struct page **papPages, size_t cPages, bool fSetDirty)
1184{
1185 /* We don't mark kernel pages dirty: */
1186 if (iter->type & ITER_KVEC)
1187 fSetDirty = false;
1188
1189 while (cPages-- > 0)
1190 {
1191 struct page *pPage = papPages[cPages];
1192 if (fSetDirty && !PageReserved(pPage))
1193 SetPageDirty(pPage);
1194 vbsf_put_page(pPage);
1195 }
1196}
1197
1198
1199/**
1200 * Locks up to @a cMaxPages from the I/O vector iterator, advancing the
1201 * iterator.
1202 *
1203 * @returns 0 on success, negative errno value on failure.
1204 * @param iter The iterator to lock pages from.
1205 * @param fWrite Whether to write (true) or read (false) lock the pages.
1206 * @param pStash Where we stash peek results.
1207 * @param cMaxPages The maximum number of pages to get.
1208 * @param papPages Where to return the locked pages.
1209 * @param pcPages Where to return the number of pages.
1210 * @param poffPage0 Where to return the offset into the first page.
1211 * @param pcbChunk Where to return the number of bytes covered.
1212 */
1213static int vbsf_iter_lock_pages(struct iov_iter *iter, bool fWrite, struct vbsf_iter_stash *pStash, size_t cMaxPages,
1214 struct page **papPages, size_t *pcPages, size_t *poffPage0, size_t *pcbChunk)
1215{
1216 size_t cbChunk = 0;
1217 size_t cPages = 0;
1218 size_t offPage0 = 0;
1219 int rc = 0;
1220
1221 Assert(iov_iter_count(iter) + pStash->cb > 0);
1222 if (!(iter->type & ITER_KVEC)) {
1223 /*
1224 * Do we have a stashed page?
1225 */
1226 if (pStash->pPage) {
1227 papPages[0] = pStash->pPage;
1228 offPage0 = pStash->off;
1229 cbChunk = pStash->cb;
1230 cPages = 1;
1231 pStash->pPage = NULL;
1232 pStash->off = 0;
1233 pStash->cb = 0;
1234 if ( offPage0 + cbChunk < PAGE_SIZE
1235 || iov_iter_count(iter) == 0) {
1236 *poffPage0 = offPage0;
1237 *pcbChunk = cbChunk;
1238 *pcPages = cPages;
1239 SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx (stashed)\n",
1240 rc, cPages, offPage0, cbChunk));
1241 return 0;
1242 }
1243 cMaxPages -= 1;
1244 SFLOG3(("vbsf_iter_lock_pages: Picked up stashed page: %#zx LB %#zx\n", offPage0, cbChunk));
1245 } else {
1246# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
1247 /*
1248 * Copy out our starting point to assist rewinding.
1249 */
1250 pStash->offFromEnd = iov_iter_count(iter);
1251 pStash->Copy = *iter;
1252# endif
1253 }
1254
1255 /*
1256 * Get pages segment by segment.
1257 */
1258 do {
1259 /*
1260 * Make a special case of the first time thru here, since that's
1261 * the most typical scenario.
1262 */
1263 ssize_t cbSegRet;
1264 if (cPages == 0) {
1265# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
1266 while (!iov_iter_single_seg_count(iter)) /* Old code didn't skip empty segments which caused EFAULTs. */
1267 iov_iter_advance(iter, 0);
1268# endif
1269 cbSegRet = iov_iter_get_pages(iter, papPages, iov_iter_count(iter), cMaxPages, &offPage0);
1270 if (cbSegRet > 0) {
1271 iov_iter_advance(iter, cbSegRet);
1272 cbChunk = (size_t)cbSegRet;
1273 cPages = RT_ALIGN_Z(offPage0 + cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
1274 cMaxPages -= cPages;
1275 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages -> %#zx @ %#zx; %#zx pages [first]\n", cbSegRet, offPage0, cPages));
1276 if ( cMaxPages == 0
1277 || ((offPage0 + (size_t)cbSegRet) & PAGE_OFFSET_MASK))
1278 break;
1279 } else {
1280 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1281 rc = (int)cbSegRet;
1282 break;
1283 }
1284 } else {
1285 /*
1286 * Probe first page of new segment to check that we've got a zero offset and
1287 * can continue on the current chunk. Stash the page if the offset isn't zero.
1288 */
1289 size_t offPgProbe;
1290 size_t cbSeg = iov_iter_single_seg_count(iter);
1291 while (!cbSeg) {
1292 iov_iter_advance(iter, 0);
1293 cbSeg = iov_iter_single_seg_count(iter);
1294 }
1295 cbSegRet = iov_iter_get_pages(iter, &papPages[cPages], iov_iter_count(iter), 1, &offPgProbe);
1296 if (cbSegRet > 0) {
1297 iov_iter_advance(iter, cbSegRet); /** @todo maybe not do this if we stash the page? */
1298 Assert(offPgProbe + cbSegRet <= PAGE_SIZE);
1299 if (offPgProbe == 0) {
1300 cbChunk += cbSegRet;
1301 cPages += 1;
1302 cMaxPages -= 1;
1303 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx\n", cbSegRet, offPgProbe));
1304 if ( cMaxPages == 0
1305 || cbSegRet != PAGE_SIZE)
1306 break;
1307
1308 /*
1309 * Get the rest of the segment (if anything remaining).
1310 */
1311 cbSeg -= cbSegRet;
1312 if (cbSeg > 0) {
1313 cbSegRet = iov_iter_get_pages(iter, &papPages[cPages], iov_iter_count(iter), cMaxPages, &offPgProbe);
1314 if (cbSegRet > 0) {
1315 size_t const cPgRet = RT_ALIGN_Z((size_t)cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
1316 Assert(offPgProbe == 0);
1317 iov_iter_advance(iter, cbSegRet);
1318 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages() -> %#zx; %#zx pages\n", cbSegRet, cPgRet));
1319 cPages += cPgRet;
1320 cMaxPages -= cPgRet;
1321 cbChunk += cbSegRet;
1322 if ( cMaxPages == 0
1323 || ((size_t)cbSegRet & PAGE_OFFSET_MASK))
1324 break;
1325 } else {
1326 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1327 rc = (int)cbSegRet;
1328 break;
1329 }
1330 }
1331 } else {
1332 /* The segment didn't start at a page boundrary, so stash it for
1333 the next round: */
1334 SFLOGFLOW(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx; stashed\n", cbSegRet, offPgProbe));
1335 Assert(papPages[cPages]);
1336 pStash->pPage = papPages[cPages];
1337 pStash->off = offPgProbe;
1338 pStash->cb = cbSegRet;
1339 break;
1340 }
1341 } else {
1342 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1343 rc = (int)cbSegRet;
1344 break;
1345 }
1346 }
1347 Assert(cMaxPages > 0);
1348 } while (iov_iter_count(iter) > 0);
1349
1350 } else {
1351 /*
1352 * The silly iov_iter_get_pages_alloc() function doesn't handle KVECs,
1353 * so everyone needs to do that by themselves.
1354 *
1355 * Note! Fixes here may apply to rtR0MemObjNativeLockKernel()
1356 * and vbsf_lock_user_pages_failed_check_kernel() as well.
1357 */
1358# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
1359 pStash->offFromEnd = iov_iter_count(iter);
1360 pStash->Copy = *iter;
1361# endif
1362 do {
1363 uint8_t *pbBuf;
1364 size_t offStart;
1365 size_t cPgSeg;
1366
1367 size_t cbSeg = iov_iter_single_seg_count(iter);
1368 while (!cbSeg) {
1369 iov_iter_advance(iter, 0);
1370 cbSeg = iov_iter_single_seg_count(iter);
1371 }
1372
1373# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
1374 pbBuf = iter->kvec->iov_base + iter->iov_offset;
1375# else
1376 pbBuf = iter->iov->iov_base + iter->iov_offset;
1377# endif
1378 offStart = (uintptr_t)pbBuf & PAGE_OFFSET_MASK;
1379 if (!cPages)
1380 offPage0 = offStart;
1381 else if (offStart)
1382 break;
1383
1384 cPgSeg = RT_ALIGN_Z(cbSeg, PAGE_SIZE) >> PAGE_SHIFT;
1385 if (cPgSeg > cMaxPages) {
1386 cPgSeg = cMaxPages;
1387 cbSeg = (cPgSeg << PAGE_SHIFT) - offStart;
1388 }
1389
1390 rc = vbsf_lock_kernel_pages(pbBuf, fWrite, cPgSeg, &papPages[cPages]);
1391 if (rc == 0) {
1392 iov_iter_advance(iter, cbSeg);
1393 cbChunk += cbSeg;
1394 cPages += cPgSeg;
1395 cMaxPages -= cPgSeg;
1396 if ( cMaxPages == 0
1397 || ((offStart + cbSeg) & PAGE_OFFSET_MASK) != 0)
1398 break;
1399 } else
1400 break;
1401 } while (iov_iter_count(iter) > 0);
1402 }
1403
1404 /*
1405 * Clean up if we failed; set return values.
1406 */
1407 if (rc == 0) {
1408 /* likely */
1409 } else {
1410 if (cPages > 0)
1411 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1412 offPage0 = cbChunk = cPages = 0;
1413 }
1414 *poffPage0 = offPage0;
1415 *pcbChunk = cbChunk;
1416 *pcPages = cPages;
1417 SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx\n", rc, cPages, offPage0, cbChunk));
1418 return rc;
1419}
1420
1421
1422/**
1423 * Rewinds the I/O vector.
1424 */
1425static bool vbsf_iter_rewind(struct iov_iter *iter, struct vbsf_iter_stash *pStash, size_t cbToRewind, size_t cbChunk)
1426{
1427 size_t cbExtra;
1428 if (!pStash->pPage) {
1429 cbExtra = 0;
1430 } else {
1431 cbExtra = pStash->cb;
1432 vbsf_put_page(pStash->pPage);
1433 pStash->pPage = NULL;
1434 pStash->cb = 0;
1435 pStash->off = 0;
1436 }
1437
1438# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
1439 iov_iter_revert(iter, cbToRewind + cbExtra);
1440 return true;
1441# else
1442 /** @todo impl this */
1443 return false;
1444# endif
1445}
1446
1447
1448/**
1449 * Cleans up the page locking stash.
1450 */
1451DECLINLINE(void) vbsf_iter_cleanup_stash(struct iov_iter *iter, struct vbsf_iter_stash *pStash)
1452{
1453 if (pStash->pPage)
1454 vbsf_iter_rewind(iter, pStash, 0, 0);
1455}
1456
1457
1458/**
1459 * Calculates the longest span of pages we could transfer to the host in a
1460 * single request.
1461 *
1462 * @returns Page count, non-zero.
1463 * @param iter The I/O vector iterator to inspect.
1464 */
1465static size_t vbsf_iter_max_span_of_pages(struct iov_iter *iter)
1466{
1467 size_t cPages;
1468 if (iter_is_iovec(iter) || (iter->type & ITER_KVEC)) {
1469 const struct iovec *pCurIov = iter->iov;
1470 size_t cLeft = iter->nr_segs;
1471 size_t cPagesSpan = 0;
1472
1473 /* iovect and kvec are identical, except for the __user tagging of iov_base. */
1474 AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, struct kvec, iov_base);
1475 AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, struct kvec, iov_len);
1476 AssertCompile(sizeof(struct iovec) == sizeof(struct kvec));
1477
1478 cPages = 1;
1479 AssertReturn(cLeft > 0, cPages);
1480
1481 /* Special case: segment offset. */
1482 if (iter->iov_offset > 0) {
1483 if (iter->iov_offset < pCurIov->iov_len) {
1484 size_t const cbSegLeft = pCurIov->iov_len - iter->iov_offset;
1485 size_t const offPage0 = ((uintptr_t)pCurIov->iov_base + iter->iov_offset) & PAGE_OFFSET_MASK;
1486 cPages = cPagesSpan = RT_ALIGN_Z(offPage0 + cbSegLeft, PAGE_SIZE) >> PAGE_SHIFT;
1487 if ((offPage0 + cbSegLeft) & PAGE_OFFSET_MASK)
1488 cPagesSpan = 0;
1489 }
1490 SFLOGFLOW(("vbsf_iter: seg[0]= %p LB %#zx\n", pCurIov->iov_base, pCurIov->iov_len));
1491 pCurIov++;
1492 cLeft--;
1493 }
1494
1495 /* Full segments. */
1496 while (cLeft-- > 0) {
1497 if (pCurIov->iov_len > 0) {
1498 size_t const offPage0 = (uintptr_t)pCurIov->iov_base & PAGE_OFFSET_MASK;
1499 if (offPage0 == 0) {
1500 if (!(pCurIov->iov_len & PAGE_OFFSET_MASK)) {
1501 cPagesSpan += pCurIov->iov_len >> PAGE_SHIFT;
1502 } else {
1503 cPagesSpan += RT_ALIGN_Z(pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
1504 if (cPagesSpan > cPages)
1505 cPages = cPagesSpan;
1506 cPagesSpan = 0;
1507 }
1508 } else {
1509 if (cPagesSpan > cPages)
1510 cPages = cPagesSpan;
1511 if (!((offPage0 + pCurIov->iov_len) & PAGE_OFFSET_MASK)) {
1512 cPagesSpan = pCurIov->iov_len >> PAGE_SHIFT;
1513 } else {
1514 cPagesSpan += RT_ALIGN_Z(offPage0 + pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
1515 if (cPagesSpan > cPages)
1516 cPages = cPagesSpan;
1517 cPagesSpan = 0;
1518 }
1519 }
1520 }
1521 SFLOGFLOW(("vbsf_iter: seg[%u]= %p LB %#zx\n", iter->nr_segs - cLeft, pCurIov->iov_base, pCurIov->iov_len));
1522 pCurIov++;
1523 }
1524 if (cPagesSpan > cPages)
1525 cPages = cPagesSpan;
1526 } else {
1527 /* Won't bother with accurate counts for the next two types, just make
1528 some rough estimates (does pipes have segments?): */
1529 size_t cSegs = iter->type & ITER_BVEC ? RT_MAX(1, iter->nr_segs) : 1;
1530 cPages = (iov_iter_count(iter) + (PAGE_SIZE * 2 - 2) * cSegs) >> PAGE_SHIFT;
1531 }
1532 SFLOGFLOW(("vbsf_iter_max_span_of_pages: returns %#zx\n", cPages));
1533 return cPages;
1534}
1535
1536
1537/**
1538 * Worker for vbsf_reg_read_iter() that deals with larger reads using page
1539 * locking.
1540 */
1541static ssize_t vbsf_reg_read_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToRead,
1542 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
1543{
1544 /*
1545 * Estimate how many pages we may possible submit in a single request so
1546 * that we can allocate matching request buffer and page array.
1547 */
1548 struct page *apPagesStack[16];
1549 struct page **papPages = &apPagesStack[0];
1550 struct page **papPagesFree = NULL;
1551 VBOXSFREADPGLSTREQ *pReq;
1552 ssize_t cbRet = 0;
1553 size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
1554 cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 2), cMaxPages);
1555
1556 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
1557 while (!pReq && cMaxPages > 4) {
1558 cMaxPages /= 2;
1559 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
1560 }
1561 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
1562 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
1563 if (pReq && papPages) {
1564
1565 /*
1566 * The read loop.
1567 */
1568 struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
1569 do {
1570 /*
1571 * Grab as many pages as we can. This means that if adjacent
1572 * segments both starts and ends at a page boundrary, we can
1573 * do them both in the same transfer from the host.
1574 */
1575 size_t cPages = 0;
1576 size_t cbChunk = 0;
1577 size_t offPage0 = 0;
1578 int rc = vbsf_iter_lock_pages(iter, true /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
1579 if (rc == 0) {
1580 size_t iPage = cPages;
1581 while (iPage-- > 0)
1582 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
1583 pReq->PgLst.offFirstPage = (uint16_t)offPage0;
1584 AssertStmt(cbChunk <= cbToRead, cbChunk = cbToRead);
1585 } else {
1586 cbRet = rc;
1587 break;
1588 }
1589
1590 /*
1591 * Issue the request and unlock the pages.
1592 */
1593 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, cbChunk, cPages);
1594 SFLOGFLOW(("vbsf_reg_read_iter_locking: VbglR0SfHostReqReadPgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
1595 rc, pReq->Parms.cb32Read.u.value32, cbChunk, cbToRead, cPages, offPage0));
1596
1597 vbsf_iter_unlock_pages(iter, papPages, cPages, true /*fSetDirty*/);
1598
1599 if (RT_SUCCESS(rc)) {
1600 /*
1601 * Success, advance position and buffer.
1602 */
1603 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
1604 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
1605 cbRet += cbActual;
1606 kio->ki_pos += cbActual;
1607 cbToRead -= cbActual;
1608
1609 /*
1610 * Are we done already?
1611 */
1612 if (!cbToRead)
1613 break;
1614 if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
1615 if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
1616 iov_iter_truncate(iter, 0);
1617 break;
1618 }
1619 } else {
1620 /*
1621 * Try rewind the iter structure.
1622 */
1623 bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
1624 if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
1625 /*
1626 * The host probably doesn't have enough heap to handle the
1627 * request, reduce the page count and retry.
1628 */
1629 cMaxPages /= 4;
1630 Assert(cMaxPages > 0);
1631 } else {
1632 /*
1633 * If we've successfully read stuff, return it rather than
1634 * the error. (Not sure if this is such a great idea...)
1635 */
1636 if (cbRet <= 0)
1637 cbRet = -EPROTO;
1638 break;
1639 }
1640 }
1641 } while (cbToRead > 0);
1642
1643 vbsf_iter_cleanup_stash(iter, &Stash);
1644 }
1645 else
1646 cbRet = -ENOMEM;
1647 if (papPagesFree)
1648 kfree(papPages);
1649 if (pReq)
1650 VbglR0PhysHeapFree(pReq);
1651 SFLOGFLOW(("vbsf_reg_read_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
1652 return cbRet;
1653}
1654
1655
1656/**
1657 * Read into I/O vector iterator.
1658 *
1659 * @returns Number of bytes read on success, negative errno on error.
1660 * @param kio The kernel I/O control block (or something like that).
1661 * @param iter The I/O vector iterator describing the buffer.
1662 */
1663static ssize_t vbsf_reg_read_iter(struct kiocb *kio, struct iov_iter *iter)
1664{
1665 size_t cbToRead = iov_iter_count(iter);
1666 struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
1667 struct address_space *mapping = inode->i_mapping;
1668
1669 struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
1670 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1671
1672 SFLOGFLOW(("vbsf_reg_read_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
1673 inode, kio->ki_filp, cbToRead, kio->ki_pos, iter->type));
1674 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1675
1676 /*
1677 * Do we have anything at all to do here?
1678 */
1679 if (!cbToRead)
1680 return 0;
1681
1682 /*
1683 * If there is a mapping and O_DIRECT isn't in effect, we must at a
1684 * heed dirty pages in the mapping and read from them. For simplicity
1685 * though, we just do page cache reading when there are writable
1686 * mappings around with any kind of pages loaded.
1687 */
1688 if (vbsf_should_use_cached_read(kio->ki_filp, mapping, sf_g))
1689 return generic_file_read_iter(kio, iter);
1690
1691 /*
1692 * Now now we reject async I/O requests.
1693 */
1694 if (!is_sync_kiocb(kio)) {
1695 SFLOGFLOW(("vbsf_reg_read_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
1696 return -EOPNOTSUPP;
1697 }
1698
1699 /*
1700 * For small requests, try use an embedded buffer provided we get a heap block
1701 * that does not cross page boundraries (see host code).
1702 */
1703 if (cbToRead <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
1704 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + cbToRead;
1705 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1706 if (pReq) {
1707 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1708 ssize_t cbRet;
1709 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, (uint32_t)cbToRead);
1710 if (RT_SUCCESS(vrc)) {
1711 cbRet = pReq->Parms.cb32Read.u.value32;
1712 AssertStmt(cbRet <= (ssize_t)cbToRead, cbRet = cbToRead);
1713 if (copy_to_iter(pReq->abData, cbRet, iter) == cbRet) {
1714 kio->ki_pos += cbRet;
1715 if (cbRet < cbToRead)
1716 iov_iter_truncate(iter, 0);
1717 } else
1718 cbRet = -EFAULT;
1719 } else
1720 cbRet = -EPROTO;
1721 VbglR0PhysHeapFree(pReq);
1722 SFLOGFLOW(("vbsf_reg_read_iter: returns %#zx (%zd)\n", cbRet, cbRet));
1723 return cbRet;
1724 }
1725 VbglR0PhysHeapFree(pReq);
1726 }
1727 }
1728
1729 /*
1730 * Otherwise do the page locking thing.
1731 */
1732 return vbsf_reg_read_iter_locking(kio, iter, cbToRead, sf_g, sf_r);
1733}
1734
1735
1736/**
1737 * Worker for vbsf_reg_write_iter() that deals with larger writes using page
1738 * locking.
1739 */
1740static ssize_t vbsf_reg_write_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToWrite, loff_t offFile,
1741 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
1742 struct inode *inode, struct vbsf_inode_info *sf_i, struct address_space *mapping)
1743{
1744 /*
1745 * Estimate how many pages we may possible submit in a single request so
1746 * that we can allocate matching request buffer and page array.
1747 */
1748 struct page *apPagesStack[16];
1749 struct page **papPages = &apPagesStack[0];
1750 struct page **papPagesFree = NULL;
1751 VBOXSFWRITEPGLSTREQ *pReq;
1752 ssize_t cbRet = 0;
1753 size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
1754 cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 2), cMaxPages);
1755
1756 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
1757 while (!pReq && cMaxPages > 4) {
1758 cMaxPages /= 2;
1759 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
1760 }
1761 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
1762 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
1763 if (pReq && papPages) {
1764
1765 /*
1766 * The write loop.
1767 */
1768 struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
1769 do {
1770 /*
1771 * Grab as many pages as we can. This means that if adjacent
1772 * segments both starts and ends at a page boundrary, we can
1773 * do them both in the same transfer from the host.
1774 */
1775 size_t cPages = 0;
1776 size_t cbChunk = 0;
1777 size_t offPage0 = 0;
1778 int rc = vbsf_iter_lock_pages(iter, false /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
1779 if (rc == 0) {
1780 size_t iPage = cPages;
1781 while (iPage-- > 0)
1782 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
1783 pReq->PgLst.offFirstPage = (uint16_t)offPage0;
1784 AssertStmt(cbChunk <= cbToWrite, cbChunk = cbToWrite);
1785 } else {
1786 cbRet = rc;
1787 break;
1788 }
1789
1790 /*
1791 * Issue the request and unlock the pages.
1792 */
1793 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
1794 SFLOGFLOW(("vbsf_reg_write_iter_locking: VbglR0SfHostReqWritePgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
1795 rc, pReq->Parms.cb32Write.u.value32, cbChunk, cbToWrite, cPages, offPage0));
1796
1797 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1798
1799 if (RT_SUCCESS(rc)) {
1800 /*
1801 * Success, advance position and buffer.
1802 */
1803 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
1804 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
1805 cbRet += cbActual;
1806 offFile += cbActual;
1807 kio->ki_pos = offFile;
1808 cbToWrite -= cbActual;
1809 if (offFile > i_size_read(inode))
1810 i_size_write(inode, offFile);
1811 vbsf_reg_write_invalidate_mapping_range(mapping, offFile - cbActual, offFile);
1812 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1813
1814 /*
1815 * Are we done already?
1816 */
1817 if (!cbToWrite)
1818 break;
1819 if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
1820 if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
1821 iov_iter_truncate(iter, 0);
1822 break;
1823 }
1824 } else {
1825 /*
1826 * Try rewind the iter structure.
1827 */
1828 bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
1829 if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
1830 /*
1831 * The host probably doesn't have enough heap to handle the
1832 * request, reduce the page count and retry.
1833 */
1834 cMaxPages /= 4;
1835 Assert(cMaxPages > 0);
1836 } else {
1837 /*
1838 * If we've successfully written stuff, return it rather than
1839 * the error. (Not sure if this is such a great idea...)
1840 */
1841 if (cbRet <= 0)
1842 cbRet = -EPROTO;
1843 break;
1844 }
1845 }
1846 } while (cbToWrite > 0);
1847
1848 vbsf_iter_cleanup_stash(iter, &Stash);
1849 }
1850 else
1851 cbRet = -ENOMEM;
1852 if (papPagesFree)
1853 kfree(papPages);
1854 if (pReq)
1855 VbglR0PhysHeapFree(pReq);
1856 SFLOGFLOW(("vbsf_reg_write_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
1857 return cbRet;
1858}
1859
1860
1861
1862/**
1863 * Write from I/O vector iterator.
1864 *
1865 * @returns Number of bytes written on success, negative errno on error.
1866 * @param kio The kernel I/O control block (or something like that).
1867 * @param iter The I/O vector iterator describing the buffer.
1868 */
1869static ssize_t vbsf_reg_write_iter(struct kiocb *kio, struct iov_iter *iter)
1870{
1871 size_t cbToWrite = iov_iter_count(iter);
1872 struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
1873 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1874 struct address_space *mapping = inode->i_mapping;
1875
1876 struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
1877 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1878 loff_t offFile = kio->ki_pos;
1879
1880 SFLOGFLOW(("vbsf_reg_write_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
1881 inode, kio->ki_filp, cbToWrite, offFile, iter->type));
1882 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1883
1884 /*
1885 * Enforce APPEND flag.
1886 */
1887 /** @todo This should be handled by the host, it returning the new file
1888 * offset when appending. We may have an outdated i_size value here! */
1889#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1890 if (kio->ki_flags & IOCB_APPEND)
1891#else
1892 if (kio->ki_filp->f_flags & O_APPEND)
1893#endif
1894 kio->ki_pos = offFile = i_size_read(inode);
1895
1896 /*
1897 * Do we have anything at all to do here?
1898 */
1899 if (!cbToWrite)
1900 return 0;
1901
1902 /*
1903 * Now now we reject async I/O requests.
1904 */
1905 if (!is_sync_kiocb(kio)) {
1906 SFLOGFLOW(("vbsf_reg_write_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
1907 return -EOPNOTSUPP;
1908 }
1909
1910 /*
1911 * If there are active writable mappings, coordinate with any
1912 * pending writes via those.
1913 */
1914 if ( mapping
1915 && mapping->nrpages > 0
1916 && mapping_writably_mapped(mapping)) {
1917#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
1918 int err = filemap_fdatawait_range(mapping, offFile, offFile + cbToWrite - 1);
1919 if (err)
1920 return err;
1921#else
1922 /** @todo ... */
1923#endif
1924 }
1925
1926 /*
1927 * For small requests, try use an embedded buffer provided we get a heap block
1928 * that does not cross page boundraries (see host code).
1929 */
1930 if (cbToWrite <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
1931 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + cbToWrite;
1932 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1933 if (pReq) {
1934 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1935 ssize_t cbRet;
1936 if (copy_from_iter(pReq->abData, cbToWrite, iter) == cbToWrite) {
1937 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
1938 offFile, (uint32_t)cbToWrite);
1939 if (RT_SUCCESS(vrc)) {
1940 cbRet = pReq->Parms.cb32Write.u.value32;
1941 AssertStmt(cbRet <= (ssize_t)cbToWrite, cbRet = cbToWrite);
1942 kio->ki_pos = offFile += cbRet;
1943 if (offFile > i_size_read(inode))
1944 i_size_write(inode, offFile);
1945 vbsf_reg_write_invalidate_mapping_range(mapping, offFile - cbRet, offFile);
1946# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
1947 if ((size_t)cbRet < cbToWrite)
1948 iov_iter_revert(iter, cbToWrite - cbRet);
1949# endif
1950 } else
1951 cbRet = -EPROTO;
1952 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1953 } else
1954 cbRet = -EFAULT;
1955 VbglR0PhysHeapFree(pReq);
1956 SFLOGFLOW(("vbsf_reg_write_iter: returns %#zx (%zd)\n", cbRet, cbRet));
1957 return cbRet;
1958 }
1959 VbglR0PhysHeapFree(pReq);
1960 }
1961 }
1962
1963 /*
1964 * Otherwise do the page locking thing.
1965 */
1966 return vbsf_reg_write_iter_locking(kio, iter, cbToWrite, offFile, sf_g, sf_r, inode, sf_i, mapping);
1967}
1968
1969#endif /* >= 3.16.0 */
1970
1971/**
1972 * Used by vbsf_reg_open() and vbsf_inode_atomic_open() to
1973 *
1974 * @returns shared folders create flags.
1975 * @param fLnxOpen The linux O_XXX flags to convert.
1976 * @param pfHandle Pointer to vbsf_handle::fFlags.
1977 * @param pszCaller Caller, for logging purposes.
1978 */
1979uint32_t vbsf_linux_oflags_to_vbox(unsigned fLnxOpen, uint32_t *pfHandle, const char *pszCaller)
1980{
1981 uint32_t fVBoxFlags = SHFL_CF_ACCESS_DENYNONE;
1982
1983 /*
1984 * Disposition.
1985 */
1986 if (fLnxOpen & O_CREAT) {
1987 Log(("%s: O_CREAT set\n", pszCaller));
1988 fVBoxFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
1989 if (fLnxOpen & O_EXCL) {
1990 Log(("%s: O_EXCL set\n", pszCaller));
1991 fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_EXISTS;
1992 } else if (fLnxOpen & O_TRUNC) {
1993 Log(("%s: O_TRUNC set\n", pszCaller));
1994 fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1995 } else
1996 fVBoxFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
1997 } else {
1998 fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
1999 if (fLnxOpen & O_TRUNC) {
2000 Log(("%s: O_TRUNC set\n", pszCaller));
2001 fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
2002 }
2003 }
2004
2005 /*
2006 * Access.
2007 */
2008 switch (fLnxOpen & O_ACCMODE) {
2009 case O_RDONLY:
2010 fVBoxFlags |= SHFL_CF_ACCESS_READ;
2011 *pfHandle |= VBSF_HANDLE_F_READ;
2012 break;
2013
2014 case O_WRONLY:
2015 fVBoxFlags |= SHFL_CF_ACCESS_WRITE;
2016 *pfHandle |= VBSF_HANDLE_F_WRITE;
2017 break;
2018
2019 case O_RDWR:
2020 fVBoxFlags |= SHFL_CF_ACCESS_READWRITE;
2021 *pfHandle |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE;
2022 break;
2023
2024 default:
2025 BUG();
2026 }
2027
2028 if (fLnxOpen & O_APPEND) {
2029 Log(("%s: O_APPEND set\n", pszCaller));
2030 fVBoxFlags |= SHFL_CF_ACCESS_APPEND;
2031 *pfHandle |= VBSF_HANDLE_F_APPEND;
2032 }
2033
2034 /*
2035 * Only directories?
2036 */
2037 if (fLnxOpen & O_DIRECTORY) {
2038 Log(("%s: O_DIRECTORY set\n", pszCaller));
2039 fVBoxFlags |= SHFL_CF_DIRECTORY;
2040 }
2041
2042 return fVBoxFlags;
2043}
2044
2045
2046/**
2047 * Open a regular file.
2048 *
2049 * @param inode the inode
2050 * @param file the file
2051 * @returns 0 on success, Linux error code otherwise
2052 */
2053static int vbsf_reg_open(struct inode *inode, struct file *file)
2054{
2055 int rc, rc_linux = 0;
2056 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2057 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2058 struct vbsf_reg_info *sf_r;
2059 struct dentry *dentry = VBSF_GET_F_DENTRY(file);
2060 VBOXSFCREATEREQ *pReq;
2061
2062 SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
2063 BUG_ON(!sf_g);
2064 BUG_ON(!sf_i);
2065
2066 sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
2067 if (!sf_r) {
2068 LogRelFunc(("could not allocate reg info\n"));
2069 return -ENOMEM;
2070 }
2071
2072 RTListInit(&sf_r->Handle.Entry);
2073 sf_r->Handle.cRefs = 1;
2074 sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC;
2075 sf_r->Handle.hHost = SHFL_HANDLE_NIL;
2076
2077 /* Already open? */
2078 if (sf_i->handle != SHFL_HANDLE_NIL) {
2079 /*
2080 * This inode was created with vbsf_create_worker(). Check the CreateFlags:
2081 * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
2082 * about the access flags (SHFL_CF_ACCESS_*).
2083 */
2084 sf_i->force_restat = 1;
2085 sf_r->Handle.hHost = sf_i->handle;
2086 sf_i->handle = SHFL_HANDLE_NIL;
2087 file->private_data = sf_r;
2088
2089 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; /** @todo fix */
2090 vbsf_handle_append(sf_i, &sf_r->Handle);
2091 SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
2092 return 0;
2093 }
2094
2095 pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
2096 if (!pReq) {
2097 kfree(sf_r);
2098 LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
2099 return -ENOMEM;
2100 }
2101 memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
2102 RT_ZERO(pReq->CreateParms);
2103 pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
2104
2105 /* We check the value of pReq->CreateParms.Handle afterwards to
2106 * find out if the call succeeded or failed, as the API does not seem
2107 * to cleanly distinguish error and informational messages.
2108 *
2109 * Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL
2110 * to make the shared folders host service use our fMode parameter */
2111
2112 /* We ignore O_EXCL, as the Linux kernel seems to call create
2113 beforehand itself, so O_EXCL should always fail. */
2114 pReq->CreateParms.CreateFlags = vbsf_linux_oflags_to_vbox(file->f_flags & ~O_EXCL, &sf_r->Handle.fFlags, __FUNCTION__);
2115 pReq->CreateParms.Info.Attr.fMode = inode->i_mode;
2116 LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n",
2117 sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags));
2118 rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
2119 if (RT_FAILURE(rc)) {
2120 LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc));
2121 kfree(sf_r);
2122 VbglR0PhysHeapFree(pReq);
2123 return -RTErrConvertToErrno(rc);
2124 }
2125
2126 if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) {
2127 vbsf_dentry_chain_increase_ttl(dentry);
2128 rc_linux = 0;
2129 } else {
2130 switch (pReq->CreateParms.Result) {
2131 case SHFL_PATH_NOT_FOUND:
2132 rc_linux = -ENOENT;
2133 break;
2134 case SHFL_FILE_NOT_FOUND:
2135 /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
2136 rc_linux = -ENOENT;
2137 break;
2138 case SHFL_FILE_EXISTS:
2139 vbsf_dentry_chain_increase_ttl(dentry);
2140 rc_linux = -EEXIST;
2141 break;
2142 default:
2143 vbsf_dentry_chain_increase_parent_ttl(dentry);
2144 rc_linux = 0;
2145 break;
2146 }
2147 }
2148
2149 sf_i->force_restat = 1; /** @todo Why?!? */
2150 sf_r->Handle.hHost = pReq->CreateParms.Handle;
2151 file->private_data = sf_r;
2152 vbsf_handle_append(sf_i, &sf_r->Handle);
2153 VbglR0PhysHeapFree(pReq);
2154 SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
2155 return rc_linux;
2156}
2157
2158
2159/**
2160 * Close a regular file.
2161 *
2162 * @param inode the inode
2163 * @param file the file
2164 * @returns 0 on success, Linux error code otherwise
2165 */
2166static int vbsf_reg_release(struct inode *inode, struct file *file)
2167{
2168 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2169 struct vbsf_reg_info *sf_r = file->private_data;
2170
2171 SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
2172 if (sf_r) {
2173 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2174 Assert(sf_g);
2175
2176#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
2177 /* See the smbfs source (file.c). mmap in particular can cause data to be
2178 * written to the file after it is closed, which we can't cope with. We
2179 * copy and paste the body of filemap_write_and_wait() here as it was not
2180 * defined before 2.6.6 and not exported until quite a bit later. */
2181 /* filemap_write_and_wait(inode->i_mapping); */
2182 if (inode->i_mapping->nrpages
2183 && filemap_fdatawrite(inode->i_mapping) != -EIO)
2184 filemap_fdatawait(inode->i_mapping);
2185#endif
2186
2187 /* Release sf_r, closing the handle if we're the last user. */
2188 file->private_data = NULL;
2189 vbsf_handle_release(&sf_r->Handle, sf_g, "vbsf_reg_release");
2190
2191 sf_i->handle = SHFL_HANDLE_NIL;
2192 }
2193 return 0;
2194}
2195
2196/**
2197 * Wrapper around generic/default seek function that ensures that we've got
2198 * the up-to-date file size when doing anything relative to EOF.
2199 *
2200 * The issue is that the host may extend the file while we weren't looking and
2201 * if the caller wishes to append data, it may end up overwriting existing data
2202 * if we operate with a stale size. So, we always retrieve the file size on EOF
2203 * relative seeks.
2204 */
2205static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
2206{
2207 SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
2208
2209 switch (whence) {
2210#ifdef SEEK_HOLE
2211 case SEEK_HOLE:
2212 case SEEK_DATA:
2213#endif
2214 case SEEK_END: {
2215 struct vbsf_reg_info *sf_r = file->private_data;
2216 int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost,
2217 true /*fForce*/, false /*fInodeLocked*/);
2218 if (rc == 0)
2219 break;
2220 return rc;
2221 }
2222 }
2223
2224#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
2225 return generic_file_llseek(file, off, whence);
2226#else
2227 return default_llseek(file, off, whence);
2228#endif
2229}
2230
2231/**
2232 * Flush region of file - chiefly mmap/msync.
2233 *
2234 * We cannot use the noop_fsync / simple_sync_file here as that means
2235 * msync(,,MS_SYNC) will return before the data hits the host, thereby
2236 * causing coherency issues with O_DIRECT access to the same file as
2237 * well as any host interaction with the file.
2238 */
2239#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
2240static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2241{
2242# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2243 return __generic_file_fsync(file, start, end, datasync);
2244# else
2245 return generic_file_fsync(file, start, end, datasync);
2246# endif
2247}
2248#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
2249static int vbsf_reg_fsync(struct file *file, int datasync)
2250{
2251 return generic_file_fsync(file, datasync);
2252}
2253#else /* < 2.6.35 */
2254static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
2255{
2256# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
2257 return simple_fsync(file, dentry, datasync);
2258# else
2259 int rc;
2260 struct inode *inode = dentry->d_inode;
2261 AssertReturn(inode, -EINVAL);
2262
2263 /** @todo What about file_fsync()? (<= 2.5.11) */
2264
2265# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
2266 rc = sync_mapping_buffers(inode->i_mapping);
2267 if ( rc == 0
2268 && (inode->i_state & I_DIRTY)
2269 && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
2270 ) {
2271 struct writeback_control wbc = {
2272 .sync_mode = WB_SYNC_ALL,
2273 .nr_to_write = 0
2274 };
2275 rc = sync_inode(inode, &wbc);
2276 }
2277# else /* < 2.5.12 */
2278 rc = fsync_inode_buffers(inode);
2279# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2280 rc |= fsync_inode_data_buffers(inode);
2281# endif
2282 /** @todo probably need to do more here... */
2283# endif /* < 2.5.12 */
2284 return rc;
2285# endif
2286}
2287#endif /* < 2.6.35 */
2288
2289
2290#ifdef SFLOG_ENABLED
2291/*
2292 * This is just for logging page faults and such.
2293 */
2294
2295/** Pointer to the ops generic_file_mmap returns the first time it's called. */
2296static struct vm_operations_struct const *g_pGenericFileVmOps = NULL;
2297/** Merge of g_LoggingVmOpsTemplate and g_pGenericFileVmOps. */
2298static struct vm_operations_struct g_LoggingVmOps;
2299
2300
2301/* Generic page fault callback: */
2302# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
2303static vm_fault_t vbsf_vmlog_fault(struct vm_fault *vmf)
2304{
2305 vm_fault_t rc;
2306 SFLOGFLOW(("vbsf_vmlog_fault: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address));
2307 rc = g_pGenericFileVmOps->fault(vmf);
2308 SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc));
2309 return rc;
2310}
2311# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2312static int vbsf_vmlog_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2313{
2314 int rc;
2315# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2316 SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->address));
2317# else
2318 SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address));
2319# endif
2320 rc = g_pGenericFileVmOps->fault(vma, vmf);
2321 SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc));
2322 return rc;
2323}
2324# endif
2325
2326
2327/* Special/generic page fault handler: */
2328# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 26)
2329# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 1)
2330static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
2331{
2332 struct page *page;
2333 SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p type=%p:{%#x}\n", vma, address, type, type ? *type : 0));
2334 page = g_pGenericFileVmOps->nopage(vma, address, type);
2335 SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page));
2336 return page;
2337}
2338# else
2339static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int write_access_or_unused)
2340{
2341 struct page *page;
2342 SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p wau=%d\n", vma, address, write_access_or_unused));
2343 page = g_pGenericFileVmOps->nopage(vma, address, write_access_or_unused);
2344 SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page));
2345 return page;
2346}
2347# endif /* < 2.6.26 */
2348
2349
2350/* Special page fault callback for making something writable: */
2351# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
2352static vm_fault_t vbsf_vmlog_page_mkwrite(struct vm_fault *vmf)
2353{
2354 vm_fault_t rc;
2355# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2356 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address));
2357# else
2358 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->virtual_address));
2359# endif
2360 rc = g_pGenericFileVmOps->page_mkwrite(vmf);
2361 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2362 return rc;
2363}
2364# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
2365static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2366{
2367 int rc;
2368 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address));
2369 rc = g_pGenericFileVmOps->page_mkwrite(vma, vmf);
2370 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2371 return rc;
2372}
2373# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
2374static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2375{
2376 int rc;
2377 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p page=%p\n", vma, page));
2378 rc = g_pGenericFileVmOps->page_mkwrite(vma, page);
2379 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2380 return rc;
2381}
2382# endif
2383
2384
2385/* Special page fault callback for mapping pages: */
2386# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2387static void vbsf_vmlog_map_pages(struct vm_fault *vmf, pgoff_t start, pgoff_t end)
2388{
2389 SFLOGFLOW(("vbsf_vmlog_map_pages: vmf=%p (flags=%#x addr=%p) start=%p end=%p\n", vmf, vmf->flags, vmf->address, start, end));
2390 g_pGenericFileVmOps->map_pages(vmf, start, end);
2391 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2392}
2393# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
2394static void vbsf_vmlog_map_pages(struct fault_env *fenv, pgoff_t start, pgoff_t end)
2395{
2396 SFLOGFLOW(("vbsf_vmlog_map_pages: fenv=%p (flags=%#x addr=%p) start=%p end=%p\n", fenv, fenv->flags, fenv->address, start, end));
2397 g_pGenericFileVmOps->map_pages(fenv, start, end);
2398 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2399}
2400# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
2401static void vbsf_vmlog_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
2402{
2403 SFLOGFLOW(("vbsf_vmlog_map_pages: vma=%p vmf=%p (flags=%#x addr=%p)\n", vma, vmf, vmf->flags, vmf->virtual_address));
2404 g_pGenericFileVmOps->map_pages(vma, vmf);
2405 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2406}
2407# endif
2408
2409
2410/** Overload template. */
2411static struct vm_operations_struct const g_LoggingVmOpsTemplate = {
2412# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2413 .fault = vbsf_vmlog_fault,
2414# endif
2415# if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 25)
2416 .nopage = vbsf_vmlog_nopage,
2417# endif
2418# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
2419 .page_mkwrite = vbsf_vmlog_page_mkwrite,
2420# endif
2421# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
2422 .map_pages = vbsf_vmlog_map_pages,
2423# endif
2424};
2425
2426/** file_operations::mmap wrapper for logging purposes. */
2427extern int vbsf_reg_mmap(struct file *file, struct vm_area_struct *vma)
2428{
2429 int rc;
2430 SFLOGFLOW(("vbsf_reg_mmap: file=%p vma=%p\n", file, vma));
2431 rc = generic_file_mmap(file, vma);
2432 if (rc == 0) {
2433 /* Merge the ops and template the first time thru (there's a race here). */
2434 if (g_pGenericFileVmOps == NULL) {
2435 uintptr_t const *puSrc1 = (uintptr_t *)vma->vm_ops;
2436 uintptr_t const *puSrc2 = (uintptr_t *)&g_LoggingVmOpsTemplate;
2437 uintptr_t volatile *puDst = (uintptr_t *)&g_LoggingVmOps;
2438 size_t cbLeft = sizeof(g_LoggingVmOps) / sizeof(*puDst);
2439 while (cbLeft-- > 0) {
2440 *puDst = *puSrc2 && *puSrc1 ? *puSrc2 : *puSrc1;
2441 puSrc1++;
2442 puSrc2++;
2443 puDst++;
2444 }
2445 g_pGenericFileVmOps = vma->vm_ops;
2446 vma->vm_ops = &g_LoggingVmOps;
2447 } else if (g_pGenericFileVmOps == vma->vm_ops)
2448 vma->vm_ops = &g_LoggingVmOps;
2449 else
2450 SFLOGFLOW(("vbsf_reg_mmap: Warning: vm_ops=%p, expected %p!\n", vma->vm_ops, g_pGenericFileVmOps));
2451 }
2452 SFLOGFLOW(("vbsf_reg_mmap: returns %d\n", rc));
2453 return rc;
2454}
2455
2456#endif /* SFLOG_ENABLED */
2457
2458
2459/**
2460 * File operations for regular files.
2461 */
2462struct file_operations vbsf_reg_fops = {
2463 .open = vbsf_reg_open,
2464 .read = vbsf_reg_read,
2465 .write = vbsf_reg_write,
2466#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2467 .read_iter = vbsf_reg_read_iter,
2468 .write_iter = vbsf_reg_write_iter,
2469#endif
2470 .release = vbsf_reg_release,
2471#ifdef SFLOG_ENABLED
2472 .mmap = vbsf_reg_mmap,
2473#else
2474 .mmap = generic_file_mmap,
2475#endif
2476#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
2477# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
2478/** @todo This code is known to cause caching of data which should not be
2479 * cached. Investigate. */
2480# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2481 .splice_read = vbsf_splice_read,
2482# else
2483 .sendfile = generic_file_sendfile,
2484# endif
2485 .aio_read = generic_file_aio_read,
2486 .aio_write = generic_file_aio_write,
2487# endif
2488#endif
2489 .llseek = vbsf_reg_llseek,
2490 .fsync = vbsf_reg_fsync,
2491};
2492
2493struct inode_operations vbsf_reg_iops = {
2494#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 18)
2495 .getattr = vbsf_inode_getattr,
2496#else
2497 .revalidate = vbsf_inode_revalidate,
2498#endif
2499 .setattr = vbsf_inode_setattr,
2500};
2501
2502
2503#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
2504
2505/**
2506 * Used to read the content of a page into the page cache.
2507 *
2508 * Needed for mmap and reads+writes when the file is mmapped in a
2509 * shared+writeable fashion.
2510 */
2511static int vbsf_readpage(struct file *file, struct page *page)
2512{
2513 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
2514 int err;
2515
2516 SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
2517 Assert(PageLocked(page));
2518
2519 if (PageUptodate(page)) {
2520 unlock_page(page);
2521 return 0;
2522 }
2523
2524 if (!is_bad_inode(inode)) {
2525 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
2526 if (pReq) {
2527 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2528 struct vbsf_reg_info *sf_r = file->private_data;
2529 uint32_t cbRead;
2530 int vrc;
2531
2532 pReq->PgLst.offFirstPage = 0;
2533 pReq->PgLst.aPages[0] = page_to_phys(page);
2534 vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
2535 pReq,
2536 sf_r->Handle.hHost,
2537 (uint64_t)page->index << PAGE_SHIFT,
2538 PAGE_SIZE,
2539 1 /*cPages*/);
2540
2541 cbRead = pReq->Parms.cb32Read.u.value32;
2542 AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
2543 VbglR0PhysHeapFree(pReq);
2544
2545 if (RT_SUCCESS(vrc)) {
2546 if (cbRead == PAGE_SIZE) {
2547 /* likely */
2548 } else {
2549 uint8_t *pbMapped = (uint8_t *)kmap(page);
2550 RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
2551 kunmap(page);
2552 /** @todo truncate the inode file size? */
2553 }
2554
2555 flush_dcache_page(page);
2556 SetPageUptodate(page);
2557 unlock_page(page);
2558 return 0;
2559 }
2560 err = -RTErrConvertToErrno(vrc);
2561 } else
2562 err = -ENOMEM;
2563 } else
2564 err = -EIO;
2565 SetPageError(page);
2566 unlock_page(page);
2567 return err;
2568}
2569
2570
2571/**
2572 * Used to write out the content of a dirty page cache page to the host file.
2573 *
2574 * Needed for mmap and writes when the file is mmapped in a shared+writeable
2575 * fashion.
2576 */
2577static int vbsf_writepage(struct page *page, struct writeback_control *wbc)
2578{
2579 struct address_space *mapping = page->mapping;
2580 struct inode *inode = mapping->host;
2581 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2582 struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND);
2583 int err;
2584
2585 SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
2586 inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
2587
2588 if (pHandle) {
2589 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2590 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
2591 if (pReq) {
2592 uint64_t const cbFile = i_size_read(inode);
2593 uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
2594 uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
2595 : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
2596 int vrc;
2597
2598 pReq->PgLst.offFirstPage = 0;
2599 pReq->PgLst.aPages[0] = page_to_phys(page);
2600 vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
2601 pReq,
2602 pHandle->hHost,
2603 offInFile,
2604 cbToWrite,
2605 1 /*cPages*/);
2606 AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
2607 ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
2608 vrc = VERR_WRITE_ERROR);
2609 VbglR0PhysHeapFree(pReq);
2610
2611 if (RT_SUCCESS(vrc)) {
2612 /* Update the inode if we've extended the file. */
2613 /** @todo is this necessary given the cbToWrite calc above? */
2614 uint64_t const offEndOfWrite = offInFile + cbToWrite;
2615 if ( offEndOfWrite > cbFile
2616 && offEndOfWrite > i_size_read(inode))
2617 i_size_write(inode, offEndOfWrite);
2618
2619 if (PageError(page))
2620 ClearPageError(page);
2621
2622 err = 0;
2623 } else {
2624 ClearPageUptodate(page);
2625 err = -EPROTO;
2626 }
2627 } else
2628 err = -ENOMEM;
2629 vbsf_handle_release(pHandle, sf_g, "vbsf_writepage");
2630 } else {
2631 static uint64_t volatile s_cCalls = 0;
2632 if (s_cCalls++ < 16)
2633 printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
2634 err = -EPROTO;
2635 }
2636 unlock_page(page);
2637 return err;
2638}
2639
2640# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
2641/**
2642 * Called when writing thru the page cache (which we shouldn't be doing).
2643 */
2644int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
2645 unsigned len, unsigned flags, struct page **pagep, void **fsdata)
2646{
2647 /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
2648 * the page cache for any writes AFAIK. We could just as well use
2649 * simple_write_begin & simple_write_end here if we think we really
2650 * need to have non-NULL function pointers in the table... */
2651 static uint64_t volatile s_cCalls = 0;
2652 if (s_cCalls++ < 16) {
2653 printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
2654 (unsigned long long)pos, len, flags);
2655 RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
2656 (unsigned long long)pos, len, flags);
2657# ifdef WARN_ON
2658 WARN_ON(1);
2659# endif
2660 }
2661 return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
2662}
2663# endif /* KERNEL_VERSION >= 2.6.24 */
2664
2665# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2666/**
2667 * This is needed to make open accept O_DIRECT as well as dealing with direct
2668 * I/O requests if we don't intercept them earlier.
2669 */
2670# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
2671static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2672# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
2673static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2674# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2675static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2676# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
2677static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2678# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
2679static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2680# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
2681static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2682# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
2683static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2684# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
2685static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
2686# else
2687static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *, unsigned long, int)
2688# endif
2689{
2690 TRACE();
2691 return -EINVAL;
2692}
2693# endif
2694
2695/**
2696 * Address space (for the page cache) operations for regular files.
2697 *
2698 * @todo the FsPerf touch/flush (mmap) test fails on 4.4.0 (ubuntu 16.04 lts).
2699 */
2700struct address_space_operations vbsf_reg_aops = {
2701 .readpage = vbsf_readpage,
2702 .writepage = vbsf_writepage,
2703 /** @todo Need .writepages if we want msync performance... */
2704# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
2705 .set_page_dirty = __set_page_dirty_buffers,
2706# endif
2707# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
2708 .write_begin = vbsf_write_begin,
2709 .write_end = simple_write_end,
2710# else
2711 .prepare_write = simple_prepare_write,
2712 .commit_write = simple_commit_write,
2713# endif
2714# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2715 .direct_IO = vbsf_direct_IO,
2716# endif
2717};
2718
2719#endif /* LINUX_VERSION_CODE >= 2.6.0 */
2720
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette