VirtualBox

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77735

Last change on this file since 77735 was 77735, checked in by vboxsync, 6 years ago

linux/vboxsf: build fix. bugref:9172

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.3 KB
Line 
1/* $Id: regops.c 77735 2019-03-16 03:58:19Z vboxsync $ */
2/** @file
3 * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
16 * conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 */
30
31
32/*********************************************************************************************************************************
33* Header Files *
34*********************************************************************************************************************************/
35#include "vfsmod.h"
36#include <linux/uio.h>
37#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
38# include <linux/aio.h> /* struct kiocb before 4.1 */
39#endif
40#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41# include <linux/buffer_head.h>
42#endif
43#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
44 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
45# include <linux/writeback.h>
46#endif
47#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
48 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
49# include <linux/splice.h>
50#endif
51#include <iprt/err.h>
52
53#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
54# define SEEK_END 2
55#endif
56
57#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
58# define iter_is_iovec(a_pIter) ( !((a_pIter)->type & (ITER_KVEC | ITER_BVEC)) )
59#endif
60
61#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0)
62# define vm_fault_t int
63#endif
64
65
66/*********************************************************************************************************************************
67* Structures and Typedefs *
68*********************************************************************************************************************************/
69#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
70/** Used by vbsf_iter_lock_pages() to keep the first page of the next segment. */
71struct vbsf_iter_stash {
72 struct page *pPage;
73 size_t off;
74 size_t cb;
75# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
76 size_t offFromEnd;
77 struct iov_iter Copy;
78# endif
79};
80#endif /* >= 3.16.0 */
81/** Initializer for struct vbsf_iter_stash. */
82#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
83# define VBSF_ITER_STASH_INITIALIZER { NULL, 0 }
84#else
85# define VBSF_ITER_STASH_INITIALIZER { NULL, 0, ~(size_t)0 }
86#endif
87
88
89
90/**
91 * Called when an inode is released to unlink all handles that might impossibly
92 * still be associated with it.
93 *
94 * @param pInodeInfo The inode which handles to drop.
95 */
96void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
97{
98 struct vbsf_handle *pCur, *pNext;
99 unsigned long fSavedFlags;
100 SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
101 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
102
103 RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) {
104 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
105 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
106 pCur->fFlags |= VBSF_HANDLE_F_ON_LIST;
107 RTListNodeRemove(&pCur->Entry);
108 }
109
110 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
111}
112
113
114/**
115 * Locates a handle that matches all the flags in @a fFlags.
116 *
117 * @returns Pointer to handle on success (retained), use vbsf_handle_release() to
118 * release it. NULL if no suitable handle was found.
119 * @param pInodeInfo The inode info to search.
120 * @param fFlagsSet The flags that must be set.
121 * @param fFlagsClear The flags that must be clear.
122 */
123struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
124{
125 struct vbsf_handle *pCur;
126 unsigned long fSavedFlags;
127 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
128
129 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
130 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
131 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
132 if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
133 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
134 if (cRefs > 1) {
135 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
136 SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
137 return pCur;
138 }
139 /* Oops, already being closed (safe as it's only ever increased here). */
140 ASMAtomicDecU32(&pCur->cRefs);
141 }
142 }
143
144 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
145 SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
146 return NULL;
147}
148
149
150/**
151 * Slow worker for vbsf_handle_release() that does the freeing.
152 *
153 * @returns 0 (ref count).
154 * @param pHandle The handle to release.
155 * @param sf_g The info structure for the shared folder associated
156 * with the handle.
157 * @param pszCaller The caller name (for logging failures).
158 */
159uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *sf_g, const char *pszCaller)
160{
161 int rc;
162 unsigned long fSavedFlags;
163
164 SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
165
166 /*
167 * Remove from the list.
168 */
169 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
170
171 AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
172 Assert(pHandle->pInodeInfo);
173 Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
174
175 if (pHandle->fFlags & VBSF_HANDLE_F_ON_LIST) {
176 pHandle->fFlags &= ~VBSF_HANDLE_F_ON_LIST;
177 RTListNodeRemove(&pHandle->Entry);
178 }
179
180 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
181
182 /*
183 * Actually destroy it.
184 */
185 rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
186 if (RT_FAILURE(rc))
187 LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
188 pHandle->hHost = SHFL_HANDLE_NIL;
189 pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD;
190 kfree(pHandle);
191 return 0;
192}
193
194
195/**
196 * Appends a handle to a handle list.
197 *
198 * @param pInodeInfo The inode to add it to.
199 * @param pHandle The handle to add.
200 */
201void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle)
202{
203#ifdef VBOX_STRICT
204 struct vbsf_handle *pCur;
205#endif
206 unsigned long fSavedFlags;
207
208 SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
209 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
210 ("%p %#x\n", pHandle, pHandle->fFlags));
211 Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
212
213 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
214
215 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
216 ("%p %#x\n", pHandle, pHandle->fFlags));
217#ifdef VBOX_STRICT
218 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
219 Assert(pCur != pHandle);
220 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
221 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
222 }
223 pHandle->pInodeInfo = pInodeInfo;
224#endif
225
226 pHandle->fFlags |= VBSF_HANDLE_F_ON_LIST;
227 RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
228
229 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
230}
231
232
233#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
234 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
235
236/*
237 * Some pipe stuff we apparently need for 2.6.23-2.6.30.
238 */
239
240static void vbsf_free_pipebuf(struct page *kpage)
241{
242 kunmap(kpage);
243 __free_pages(kpage, 0);
244}
245
246static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, int atomic)
247{
248 return 0;
249}
250
251static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
252{
253}
254
255static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, void *map_data)
256{
257}
258
259static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
260{
261 return 0;
262}
263
264static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
265{
266 vbsf_free_pipebuf(pipe_buf->page);
267}
268
269static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pipe_buf)
270{
271 return 0;
272}
273
274static struct pipe_buf_operations vbsf_pipe_buf_ops = {
275 .can_merge = 0,
276 .map = vbsf_pipe_buf_map,
277 .unmap = vbsf_pipe_buf_unmap,
278 .confirm = vbsf_pipe_buf_confirm,
279 .release = vbsf_pipe_buf_release,
280 .steal = vbsf_pipe_buf_steal,
281 .get = vbsf_pipe_buf_get,
282};
283
284static int vbsf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
285 void *buf, uint32_t *nread, uint64_t pos)
286{
287 int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
288 if (RT_FAILURE(rc)) {
289 LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
290 rc));
291 return -EPROTO;
292 }
293 return 0;
294}
295
296# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
297# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
298
299ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
300{
301 size_t bytes_remaining = len;
302 loff_t orig_offset = *poffset;
303 loff_t offset = orig_offset;
304 struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
305 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
306 struct vbsf_reg_info *sf_r = in->private_data;
307 ssize_t retval;
308 struct page *kpage = 0;
309 size_t nsent = 0;
310
311/** @todo rig up a FsPerf test for this code */
312 TRACE();
313 if (!S_ISREG(inode->i_mode)) {
314 LogFunc(("read from non regular file %d\n", inode->i_mode));
315 return -EINVAL;
316 }
317 if (!len) {
318 return 0;
319 }
320
321 LOCK_PIPE(pipe);
322
323 uint32_t req_size = 0;
324 while (bytes_remaining > 0) {
325 kpage = alloc_page(GFP_KERNEL);
326 if (unlikely(kpage == NULL)) {
327 UNLOCK_PIPE(pipe);
328 return -ENOMEM;
329 }
330 req_size = 0;
331 uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
332 uint32_t chunk = 0;
333 void *kbuf = kmap(kpage);
334 while (chunk < req_size) {
335 retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
336 if (retval < 0)
337 goto err;
338 if (nread == 0)
339 break;
340 chunk += nread;
341 offset += nread;
342 nread = req_size - chunk;
343 }
344 if (!pipe->readers) {
345 send_sig(SIGPIPE, current, 0);
346 retval = -EPIPE;
347 goto err;
348 }
349 if (pipe->nrbufs < PIPE_BUFFERS) {
350 struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
351 pipebuf->page = kpage;
352 pipebuf->ops = &vbsf_pipe_buf_ops;
353 pipebuf->len = req_size;
354 pipebuf->offset = 0;
355 pipebuf->private = 0;
356 pipebuf->flags = 0;
357 pipe->nrbufs++;
358 nsent += req_size;
359 bytes_remaining -= req_size;
360 if (signal_pending(current))
361 break;
362 } else { /* pipe full */
363
364 if (flags & SPLICE_F_NONBLOCK) {
365 retval = -EAGAIN;
366 goto err;
367 }
368 vbsf_free_pipebuf(kpage);
369 break;
370 }
371 }
372 UNLOCK_PIPE(pipe);
373 if (!nsent && signal_pending(current))
374 return -ERESTARTSYS;
375 *poffset += nsent;
376 return offset - orig_offset;
377
378 err:
379 UNLOCK_PIPE(pipe);
380 vbsf_free_pipebuf(kpage);
381 return retval;
382}
383
384#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
385
386/**
387 * Helper for deciding wheter we should do a read via the page cache or not.
388 *
389 * By default we will only use the page cache if there is a writable memory
390 * mapping of the file with a chance that it may have modified any of the pages
391 * already.
392 */
393DECLINLINE(bool) vbsf_should_use_cached_read(struct file *file, struct address_space *mapping, struct vbsf_super_info *sf_g)
394{
395 return mapping
396 && mapping->nrpages > 0
397 && mapping_writably_mapped(mapping)
398 && !(file->f_flags & O_DIRECT)
399 && 1 /** @todo make this behaviour configurable at mount time (sf_g) */;
400}
401
402/** Wrapper around put_page / page_cache_release. */
403DECLINLINE(void) vbsf_put_page(struct page *pPage)
404{
405#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
406 put_page(pPage);
407#else
408 page_cache_release(pPage);
409#endif
410}
411
412
413/** Wrapper around get_page / page_cache_get. */
414DECLINLINE(void) vbsf_get_page(struct page *pPage)
415{
416#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
417 get_page(pPage);
418#else
419 page_cache_get(pPage);
420#endif
421}
422
423
424/** Companion to vbsf_lock_user_pages(). */
425DECLINLINE(void) vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack)
426{
427 /* We don't mark kernel pages dirty: */
428 if (fLockPgHack)
429 fSetDirty = false;
430
431 while (cPages-- > 0)
432 {
433 struct page *pPage = papPages[cPages];
434 if (fSetDirty && !PageReserved(pPage))
435 SetPageDirty(pPage);
436 vbsf_put_page(pPage);
437 }
438}
439
440
441/**
442 * Worker for vbsf_lock_user_pages_failed_check_kernel() and
443 * vbsf_iter_lock_pages().
444 */
445static int vbsf_lock_kernel_pages(uint8_t *pbStart, bool fWrite, size_t cPages, struct page **papPages)
446{
447 uintptr_t const uPtrFrom = (uintptr_t)pbStart;
448 uintptr_t const uPtrLast = (uPtrFrom & ~(uintptr_t)PAGE_OFFSET_MASK) + (cPages << PAGE_SHIFT) - 1;
449 uint8_t *pbPage = (uint8_t *)uPtrLast;
450 size_t iPage = cPages;
451
452 /*
453 * Touch the pages first (paranoia^2).
454 */
455 if (fWrite) {
456 uint8_t volatile *pbProbe = (uint8_t volatile *)uPtrFrom;
457 while (iPage-- > 0) {
458 *pbProbe = *pbProbe;
459 pbProbe += PAGE_SIZE;
460 }
461 } else {
462 uint8_t const *pbProbe = (uint8_t const *)uPtrFrom;
463 while (iPage-- > 0) {
464 ASMProbeReadByte(pbProbe);
465 pbProbe += PAGE_SIZE;
466 }
467 }
468
469 /*
470 * Get the pages.
471 * Note! Fixes here probably applies to rtR0MemObjNativeLockKernel as well.
472 */
473 iPage = cPages;
474 if ( uPtrFrom >= (unsigned long)__va(0)
475 && uPtrLast < (unsigned long)high_memory) {
476 /* The physical page mapping area: */
477 while (iPage-- > 0) {
478 struct page *pPage = papPages[iPage] = virt_to_page(pbPage);
479 vbsf_get_page(pPage);
480 pbPage -= PAGE_SIZE;
481 }
482 } else {
483 /* This is vmalloc or some such thing, so go thru page tables: */
484 while (iPage-- > 0) {
485 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
486 if (pPage) {
487 papPages[iPage] = pPage;
488 vbsf_get_page(pPage);
489 pbPage -= PAGE_SIZE;
490 } else {
491 while (++iPage < cPages) {
492 pPage = papPages[iPage];
493 vbsf_put_page(pPage);
494 }
495 return -EFAULT;
496 }
497 }
498 }
499 return 0;
500}
501
502
503/**
504 * Catches kernel_read() and kernel_write() calls and works around them.
505 *
506 * The file_operations::read and file_operations::write callbacks supposedly
507 * hands us the user buffers to read into and write out of. To allow the kernel
508 * to read and write without allocating buffers in userland, they kernel_read()
509 * and kernel_write() increases the user space address limit before calling us
510 * so that copyin/copyout won't reject it. Our problem is that get_user_pages()
511 * works on the userspace address space structures and will not be fooled by an
512 * increased addr_limit.
513 *
514 * This code tries to detect this situation and fake get_user_lock() for the
515 * kernel buffer.
516 */
517static int vbsf_lock_user_pages_failed_check_kernel(uintptr_t uPtrFrom, size_t cPages, bool fWrite, int rcFailed,
518 struct page **papPages, bool *pfLockPgHack)
519{
520 /*
521 * Check that this is valid user memory that is actually in the kernel range.
522 */
523#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
524 if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT)
525 && uPtrFrom >= USER_DS.seg)
526#else
527 if ( access_ok(fWrite ? VERIFY_WRITE : VERIFY_READ, (void *)uPtrFrom, cPages << PAGE_SHIFT)
528 && uPtrFrom >= USER_DS.seg)
529#endif
530 {
531 int rc = vbsf_lock_kernel_pages((uint8_t *)uPtrFrom, fWrite, cPages, papPages);
532 if (rc == 0) {
533 *pfLockPgHack = true;
534 return 0;
535 }
536 }
537
538 return rcFailed;
539}
540
541
542/** Wrapper around get_user_pages. */
543DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages, bool *pfLockPgHack)
544{
545# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
546 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
547 fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
548# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
549 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
550# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
551 ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
552# else
553 struct task_struct *pTask = current;
554 size_t cPagesLocked;
555 down_read(&pTask->mm->mmap_sem);
556 cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
557 up_read(&pTask->mm->mmap_sem);
558# endif
559 *pfLockPgHack = false;
560 if (cPagesLocked == cPages)
561 return 0;
562
563 /*
564 * It failed.
565 */
566 if (cPagesLocked < 0)
567 return vbsf_lock_user_pages_failed_check_kernel(uPtrFrom, cPages, fWrite, (int)cPagesLocked, papPages, pfLockPgHack);
568
569 vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/);
570
571 /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
572 return -EFAULT;
573}
574
575
576/**
577 * Read function used when accessing files that are memory mapped.
578 *
579 * We read from the page cache here to present the a cohertent picture of the
580 * the file content.
581 */
582static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
583{
584#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
585 struct iovec iov = { .iov_base = buf, .iov_len = size };
586 struct iov_iter iter;
587 struct kiocb kiocb;
588 ssize_t cbRet;
589
590 init_sync_kiocb(&kiocb, file);
591 kiocb.ki_pos = *off;
592 iov_iter_init(&iter, READ, &iov, 1, size);
593
594 cbRet = generic_file_read_iter(&kiocb, &iter);
595
596 *off = kiocb.ki_pos;
597 return cbRet;
598
599#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
600 struct iovec iov = { .iov_base = buf, .iov_len = size };
601 struct kiocb kiocb;
602 ssize_t cbRet;
603
604 init_sync_kiocb(&kiocb, file);
605 kiocb.ki_pos = *off;
606
607 cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
608 if (cbRet == -EIOCBQUEUED)
609 cbRet = wait_on_sync_kiocb(&kiocb);
610
611 *off = kiocb.ki_pos;
612 return cbRet;
613
614#else /* 2.6.18 or earlier: */
615 return generic_file_read(file, buf, size, off);
616#endif
617}
618
619
620/**
621 * Fallback case of vbsf_reg_read() that locks the user buffers and let the host
622 * write directly to them.
623 */
624static ssize_t vbsf_reg_read_locking(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
625 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
626{
627 /*
628 * Lock pages and execute the read, taking care not to pass the host
629 * more than it can handle in one go or more than we care to allocate
630 * page arrays for. The latter limit is set at just short of 32KB due
631 * to how the physical heap works.
632 */
633 struct page *apPagesStack[16];
634 struct page **papPages = &apPagesStack[0];
635 struct page **papPagesFree = NULL;
636 VBOXSFREADPGLSTREQ *pReq;
637 loff_t offFile = *off;
638 ssize_t cbRet = -ENOMEM;
639 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
640 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
641 bool fLockPgHack;
642
643 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
644 while (!pReq && cMaxPages > 4) {
645 cMaxPages /= 2;
646 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
647 }
648 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
649 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
650 if (pReq && papPages) {
651 cbRet = 0;
652 for (;;) {
653 /*
654 * Figure out how much to process now and lock the user pages.
655 */
656 int rc;
657 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
658 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
659 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
660 if (cPages <= cMaxPages)
661 cbChunk = size;
662 else {
663 cPages = cMaxPages;
664 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
665 }
666
667 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages, &fLockPgHack);
668 if (rc == 0) {
669 size_t iPage = cPages;
670 while (iPage-- > 0)
671 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
672 } else {
673 cbRet = rc;
674 break;
675 }
676
677 /*
678 * Issue the request and unlock the pages.
679 */
680 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
681
682 vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/, fLockPgHack);
683
684 if (RT_SUCCESS(rc)) {
685 /*
686 * Success, advance position and buffer.
687 */
688 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
689 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
690 cbRet += cbActual;
691 offFile += cbActual;
692 buf = (uint8_t *)buf + cbActual;
693 size -= cbActual;
694
695 /*
696 * Are we done already? If so commit the new file offset.
697 */
698 if (!size || cbActual < cbChunk) {
699 *off = offFile;
700 break;
701 }
702 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
703 /*
704 * The host probably doesn't have enough heap to handle the
705 * request, reduce the page count and retry.
706 */
707 cMaxPages /= 4;
708 Assert(cMaxPages > 0);
709 } else {
710 /*
711 * If we've successfully read stuff, return it rather than
712 * the error. (Not sure if this is such a great idea...)
713 */
714 if (cbRet > 0)
715 *off = offFile;
716 else
717 cbRet = -EPROTO;
718 break;
719 }
720 }
721 }
722 if (papPagesFree)
723 kfree(papPages);
724 if (pReq)
725 VbglR0PhysHeapFree(pReq);
726 return cbRet;
727}
728
729
730/**
731 * Read from a regular file.
732 *
733 * @param file the file
734 * @param buf the buffer
735 * @param size length of the buffer
736 * @param off offset within the file (in/out).
737 * @returns the number of read bytes on success, Linux error code otherwise
738 */
739static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
740{
741 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
742 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
743 struct vbsf_reg_info *sf_r = file->private_data;
744 struct address_space *mapping = inode->i_mapping;
745
746 SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
747
748 if (!S_ISREG(inode->i_mode)) {
749 LogFunc(("read from non regular file %d\n", inode->i_mode));
750 return -EINVAL;
751 }
752
753 /** @todo XXX Check read permission according to inode->i_mode! */
754
755 if (!size)
756 return 0;
757
758 /*
759 * If there is a mapping and O_DIRECT isn't in effect, we must at a
760 * heed dirty pages in the mapping and read from them. For simplicity
761 * though, we just do page cache reading when there are writable
762 * mappings around with any kind of pages loaded.
763 */
764 if (vbsf_should_use_cached_read(file, mapping, sf_g))
765 return vbsf_reg_read_mapped(file, buf, size, off);
766
767 /*
768 * For small requests, try use an embedded buffer provided we get a heap block
769 * that does not cross page boundraries (see host code).
770 */
771 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
772 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
773 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
774 if (pReq) {
775 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
776 ssize_t cbRet;
777 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
778 if (RT_SUCCESS(vrc)) {
779 cbRet = pReq->Parms.cb32Read.u.value32;
780 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
781 if (copy_to_user(buf, pReq->abData, cbRet) == 0)
782 *off += cbRet;
783 else
784 cbRet = -EFAULT;
785 } else
786 cbRet = -EPROTO;
787 VbglR0PhysHeapFree(pReq);
788 return cbRet;
789 }
790 VbglR0PhysHeapFree(pReq);
791 }
792 }
793
794#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
795 /*
796 * For medium sized requests try use a bounce buffer.
797 */
798 if (size <= _64K /** @todo make this configurable? */) {
799 void *pvBounce = kmalloc(size, GFP_KERNEL);
800 if (pvBounce) {
801 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
802 if (pReq) {
803 ssize_t cbRet;
804 int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
805 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
806 if (RT_SUCCESS(vrc)) {
807 cbRet = pReq->Parms.cb32Read.u.value32;
808 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
809 if (copy_to_user(buf, pvBounce, cbRet) == 0)
810 *off += cbRet;
811 else
812 cbRet = -EFAULT;
813 } else
814 cbRet = -EPROTO;
815 VbglR0PhysHeapFree(pReq);
816 kfree(pvBounce);
817 return cbRet;
818 }
819 kfree(pvBounce);
820 }
821 }
822#endif
823
824 return vbsf_reg_read_locking(file, buf, size, off, sf_g, sf_r);
825}
826
827
828/**
829 * Wrapper around invalidate_mapping_pages() for page cache invalidation so that
830 * the changes written via vbsf_reg_write are made visible to mmap users.
831 */
832DECLINLINE(void) vbsf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
833{
834 /*
835 * Only bother with this if the mapping has any pages in it.
836 *
837 * Note! According to the docs, the last parameter, end, is inclusive (we
838 * would have named it 'last' to indicate this).
839 *
840 * Note! The pre-2.6.12 function might not do enough to sure consistency
841 * when any of the pages in the range is already mapped.
842 */
843# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
844 if (mapping)
845 invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
846# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60)
847 if (mapping && mapping->nrpages > 0)
848 invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
849# else
850 /** @todo ... */
851 RT_NOREF(mapping, offStart, offEnd);
852# endif
853}
854
855
856/**
857 * Fallback case of vbsf_reg_write() that locks the user buffers and let the host
858 * write directly to them.
859 */
860static ssize_t vbsf_reg_write_locking(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
861 struct inode *inode, struct vbsf_inode_info *sf_i,
862 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
863{
864 /*
865 * Lock pages and execute the write, taking care not to pass the host
866 * more than it can handle in one go or more than we care to allocate
867 * page arrays for. The latter limit is set at just short of 32KB due
868 * to how the physical heap works.
869 */
870 struct page *apPagesStack[16];
871 struct page **papPages = &apPagesStack[0];
872 struct page **papPagesFree = NULL;
873 VBOXSFWRITEPGLSTREQ *pReq;
874 ssize_t cbRet = -ENOMEM;
875 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
876 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
877 bool fLockPgHack;
878
879 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
880 while (!pReq && cMaxPages > 4) {
881 cMaxPages /= 2;
882 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
883 }
884 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
885 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
886 if (pReq && papPages) {
887 cbRet = 0;
888 for (;;) {
889 /*
890 * Figure out how much to process now and lock the user pages.
891 */
892 int rc;
893 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
894 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
895 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
896 if (cPages <= cMaxPages)
897 cbChunk = size;
898 else {
899 cPages = cMaxPages;
900 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
901 }
902
903 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages, &fLockPgHack);
904 if (rc == 0) {
905 size_t iPage = cPages;
906 while (iPage-- > 0)
907 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
908 } else {
909 cbRet = rc;
910 break;
911 }
912
913 /*
914 * Issue the request and unlock the pages.
915 */
916 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
917
918 vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack);
919
920 if (RT_SUCCESS(rc)) {
921 /*
922 * Success, advance position and buffer.
923 */
924 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
925 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
926 cbRet += cbActual;
927 offFile += cbActual;
928 buf = (uint8_t *)buf + cbActual;
929 size -= cbActual;
930 if (offFile > i_size_read(inode))
931 i_size_write(inode, offFile);
932 vbsf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
933 sf_i->force_restat = 1; /* mtime (and size) may have changed */
934
935 /*
936 * Are we done already? If so commit the new file offset.
937 */
938 if (!size || cbActual < cbChunk) {
939 *off = offFile;
940 break;
941 }
942 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
943 /*
944 * The host probably doesn't have enough heap to handle the
945 * request, reduce the page count and retry.
946 */
947 cMaxPages /= 4;
948 Assert(cMaxPages > 0);
949 } else {
950 /*
951 * If we've successfully written stuff, return it rather than
952 * the error. (Not sure if this is such a great idea...)
953 */
954 if (cbRet > 0)
955 *off = offFile;
956 else
957 cbRet = -EPROTO;
958 break;
959 }
960 }
961 }
962 if (papPagesFree)
963 kfree(papPages);
964 if (pReq)
965 VbglR0PhysHeapFree(pReq);
966 return cbRet;
967}
968
969
970/**
971 * Write to a regular file.
972 *
973 * @param file the file
974 * @param buf the buffer
975 * @param size length of the buffer
976 * @param off offset within the file
977 * @returns the number of written bytes on success, Linux error code otherwise
978 */
979static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off)
980{
981 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
982 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
983 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
984 struct vbsf_reg_info *sf_r = file->private_data;
985 struct address_space *mapping = inode->i_mapping;
986 loff_t pos;
987
988 SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
989 BUG_ON(!sf_i);
990 BUG_ON(!sf_g);
991 BUG_ON(!sf_r);
992 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
993
994 pos = *off;
995 /** @todo This should be handled by the host, it returning the new file
996 * offset when appending. We may have an outdated i_size value here! */
997 if (file->f_flags & O_APPEND)
998 pos = i_size_read(inode);
999
1000 /** @todo XXX Check write permission according to inode->i_mode! */
1001
1002 if (!size) {
1003 if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
1004 *off = pos;
1005 return 0;
1006 }
1007
1008 /*
1009 * If there are active writable mappings, coordinate with any
1010 * pending writes via those.
1011 */
1012 if ( mapping
1013 && mapping->nrpages > 0
1014 && mapping_writably_mapped(mapping)) {
1015#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
1016 int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
1017 if (err)
1018 return err;
1019#else
1020 /** @todo ... */
1021#endif
1022 }
1023
1024 /*
1025 * For small requests, try use an embedded buffer provided we get a heap block
1026 * that does not cross page boundraries (see host code).
1027 */
1028 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
1029 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
1030 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1031 if ( pReq
1032 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1033 ssize_t cbRet;
1034 if (copy_from_user(pReq->abData, buf, size) == 0) {
1035 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
1036 pos, (uint32_t)size);
1037 if (RT_SUCCESS(vrc)) {
1038 cbRet = pReq->Parms.cb32Write.u.value32;
1039 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1040 pos += cbRet;
1041 *off = pos;
1042 if (pos > i_size_read(inode))
1043 i_size_write(inode, pos);
1044 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
1045 } else
1046 cbRet = -EPROTO;
1047 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1048 } else
1049 cbRet = -EFAULT;
1050
1051 VbglR0PhysHeapFree(pReq);
1052 return cbRet;
1053 }
1054 if (pReq)
1055 VbglR0PhysHeapFree(pReq);
1056 }
1057
1058#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
1059 /*
1060 * For medium sized requests try use a bounce buffer.
1061 */
1062 if (size <= _64K /** @todo make this configurable? */) {
1063 void *pvBounce = kmalloc(size, GFP_KERNEL);
1064 if (pvBounce) {
1065 if (copy_from_user(pvBounce, buf, size) == 0) {
1066 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1067 if (pReq) {
1068 ssize_t cbRet;
1069 int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
1070 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
1071 if (RT_SUCCESS(vrc)) {
1072 cbRet = pReq->Parms.cb32Write.u.value32;
1073 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1074 pos += cbRet;
1075 *off = pos;
1076 if (pos > i_size_read(inode))
1077 i_size_write(inode, pos);
1078 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
1079 } else
1080 cbRet = -EPROTO;
1081 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1082 VbglR0PhysHeapFree(pReq);
1083 kfree(pvBounce);
1084 return cbRet;
1085 }
1086 kfree(pvBounce);
1087 } else {
1088 kfree(pvBounce);
1089 return -EFAULT;
1090 }
1091 }
1092 }
1093#endif
1094
1095 return vbsf_reg_write_locking(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
1096}
1097
1098#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
1099/*
1100 * Hide missing uio.h functionality in older kernsl.
1101 */
1102
1103static size_t copy_from_iter(uint8_t *pbDst, size_t cbToCopy, struct iov_iter *pSrcIter)
1104{
1105 size_t const cbTotal = cbToCopy;
1106 Assert(iov_iter_count(pSrcIter) >= cbToCopy);
1107 if (pSrcIter->type & ITER_BVEC) {
1108 while (cbToCopy > 0) {
1109 size_t const offPage = (uintptr_t)pbDst & PAGE_OFFSET_MASK;
1110 size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy);
1111 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbDst);
1112 size_t cbCopied = copy_page_from_iter(pPage, offPage, cbThisCopy, pSrcIter);
1113 AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy);
1114 pbDst += cbCopied;
1115 cbToCopy -= cbCopied;
1116 if (cbCopied != cbToCopy)
1117 break;
1118 }
1119 } else {
1120 while (cbToCopy > 0) {
1121 size_t cbThisCopy = iov_iter_single_seg_count(pSrcIter);
1122 if (cbThisCopy > 0) {
1123 if (cbThisCopy > cbToCopy)
1124 cbThisCopy = cbToCopy;
1125 if (pSrcIter->type & ITER_KVEC)
1126 memcpy(pbDst, (void *)pSrcIter->iov->iov_base + pSrcIter->iov_offset, cbThisCopy);
1127 else if (!copy_from_user(pbDst, pSrcIter->iov->iov_base + pSrcIter->iov_offset, cbThisCopy))
1128 break;
1129 pbDst += cbThisCopy;
1130 cbToCopy -= cbThisCopy;
1131 }
1132 iov_iter_advance(pSrcIter, cbThisCopy);
1133 }
1134 }
1135 return cbTotal - cbToCopy;
1136}
1137
1138static size_t copy_to_iter(uint8_t const *pbSrc, size_t cbToCopy, struct iov_iter *pDstIter)
1139{
1140 size_t const cbTotal = cbToCopy;
1141 Assert(iov_iter_count(pDstIter) >= cbToCopy);
1142 if (pDstIter->type & ITER_BVEC) {
1143 while (cbToCopy > 0) {
1144 size_t const offPage = (uintptr_t)pbSrc & PAGE_OFFSET_MASK;
1145 size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy);
1146 struct page *pPage = rtR0MemObjLinuxVirtToPage((void *)pbSrc);
1147 size_t cbCopied = copy_page_to_iter(pPage, offPage, cbThisCopy, pDstIter);
1148 AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy);
1149 pbSrc += cbCopied;
1150 cbToCopy -= cbCopied;
1151 if (cbCopied != cbToCopy)
1152 break;
1153 }
1154 } else {
1155 while (cbToCopy > 0) {
1156 size_t cbThisCopy = iov_iter_single_seg_count(pDstIter);
1157 if (cbThisCopy > 0) {
1158 if (cbThisCopy > cbToCopy)
1159 cbThisCopy = cbToCopy;
1160 if (pDstIter->type & ITER_KVEC)
1161 memcpy((void *)pDstIter->iov->iov_base + pDstIter->iov_offset, pbSrc, cbThisCopy);
1162 else if (!copy_to_user(pDstIter->iov->iov_base + pDstIter->iov_offset, pbSrc, cbThisCopy)) {
1163 break;
1164 }
1165 pbSrc += cbThisCopy;
1166 cbToCopy -= cbThisCopy;
1167 }
1168 iov_iter_advance(pDstIter, cbThisCopy);
1169 }
1170 }
1171 return cbTotal - cbToCopy;
1172}
1173
1174#endif /* 3.16.0 >= linux < 3.18.0 */
1175#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1176
1177/**
1178 * Companion to vbsf_iter_lock_pages().
1179 */
1180DECLINLINE(void) vbsf_iter_unlock_pages(struct iov_iter *iter, struct page **papPages, size_t cPages, bool fSetDirty)
1181{
1182 /* We don't mark kernel pages dirty: */
1183 if (iter->type & ITER_KVEC)
1184 fSetDirty = false;
1185
1186 while (cPages-- > 0)
1187 {
1188 struct page *pPage = papPages[cPages];
1189 if (fSetDirty && !PageReserved(pPage))
1190 SetPageDirty(pPage);
1191 vbsf_put_page(pPage);
1192 }
1193}
1194
1195
1196/**
1197 * Locks up to @a cMaxPages from the I/O vector iterator, advancing the
1198 * iterator.
1199 *
1200 * @returns 0 on success, negative errno value on failure.
1201 * @param iter The iterator to lock pages from.
1202 * @param fWrite Whether to write (true) or read (false) lock the pages.
1203 * @param pStash Where we stash peek results.
1204 * @param cMaxPages The maximum number of pages to get.
1205 * @param papPages Where to return the locked pages.
1206 * @param pcPages Where to return the number of pages.
1207 * @param poffPage0 Where to return the offset into the first page.
1208 * @param pcbChunk Where to return the number of bytes covered.
1209 */
1210static int vbsf_iter_lock_pages(struct iov_iter *iter, bool fWrite, struct vbsf_iter_stash *pStash, size_t cMaxPages,
1211 struct page **papPages, size_t *pcPages, size_t *poffPage0, size_t *pcbChunk)
1212{
1213 size_t cbChunk = 0;
1214 size_t cPages = 0;
1215 size_t offPage0 = 0;
1216 int rc = 0;
1217
1218 Assert(iov_iter_count(iter) + pStash->cb > 0);
1219 if (!(iter->type & ITER_KVEC)) {
1220 /*
1221 * Do we have a stashed page?
1222 */
1223 if (pStash->pPage) {
1224 papPages[0] = pStash->pPage;
1225 offPage0 = pStash->off;
1226 cbChunk = pStash->cb;
1227 cPages = 1;
1228 pStash->pPage = NULL;
1229 pStash->off = 0;
1230 pStash->cb = 0;
1231 if ( offPage0 + cbChunk < PAGE_SIZE
1232 || iov_iter_count(iter) == 0) {
1233 *poffPage0 = offPage0;
1234 *pcbChunk = cbChunk;
1235 *pcPages = cPages;
1236 SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx (stashed)\n",
1237 rc, cPages, offPage0, cbChunk));
1238 return 0;
1239 }
1240 cMaxPages -= 1;
1241 SFLOG3(("vbsf_iter_lock_pages: Picked up stashed page: %#zx LB %#zx\n", offPage0, cbChunk));
1242 } else {
1243# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
1244 /*
1245 * Copy out our starting point to assist rewinding.
1246 */
1247 pStash->offFromEnd = iov_iter_count(iter);
1248 pStash->Copy = *iter;
1249# endif
1250 }
1251
1252 /*
1253 * Get pages segment by segment.
1254 */
1255 do {
1256 /*
1257 * Make a special case of the first time thru here, since that's
1258 * the most typical scenario.
1259 */
1260 ssize_t cbSegRet;
1261 if (cPages == 0) {
1262# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0)
1263 while (!iov_iter_single_seg_count(iter)) /* Old code didn't skip empty segments which caused EFAULTs. */
1264 iov_iter_advance(iter, 0);
1265# endif
1266 cbSegRet = iov_iter_get_pages(iter, papPages, iov_iter_count(iter), cMaxPages, &offPage0);
1267 if (cbSegRet > 0) {
1268 iov_iter_advance(iter, cbSegRet);
1269 cbChunk = (size_t)cbSegRet;
1270 cPages = RT_ALIGN_Z(offPage0 + cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
1271 cMaxPages -= cPages;
1272 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages -> %#zx @ %#zx; %#zx pages [first]\n", cbSegRet, offPage0, cPages));
1273 if ( cMaxPages == 0
1274 || ((offPage0 + (size_t)cbSegRet) & PAGE_OFFSET_MASK))
1275 break;
1276 } else {
1277 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1278 rc = (int)cbSegRet;
1279 break;
1280 }
1281 } else {
1282 /*
1283 * Probe first page of new segment to check that we've got a zero offset and
1284 * can continue on the current chunk. Stash the page if the offset isn't zero.
1285 */
1286 size_t offPgProbe;
1287 size_t cbSeg = iov_iter_single_seg_count(iter);
1288 while (!cbSeg) {
1289 iov_iter_advance(iter, 0);
1290 cbSeg = iov_iter_single_seg_count(iter);
1291 }
1292 cbSegRet = iov_iter_get_pages(iter, &papPages[cPages], iov_iter_count(iter), 1, &offPgProbe);
1293 if (cbSegRet > 0) {
1294 iov_iter_advance(iter, cbSegRet); /** @todo maybe not do this if we stash the page? */
1295 Assert(offPgProbe + cbSegRet <= PAGE_SIZE);
1296 if (offPgProbe == 0) {
1297 cbChunk += cbSegRet;
1298 cPages += 1;
1299 cMaxPages -= 1;
1300 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx\n", cbSegRet, offPgProbe));
1301 if ( cMaxPages == 0
1302 || cbSegRet != PAGE_SIZE)
1303 break;
1304
1305 /*
1306 * Get the rest of the segment (if anything remaining).
1307 */
1308 cbSeg -= cbSegRet;
1309 if (cbSeg > 0) {
1310 cbSegRet = iov_iter_get_pages(iter, &papPages[cPages], iov_iter_count(iter), cMaxPages, &offPgProbe);
1311 if (cbSegRet > 0) {
1312 size_t const cPgRet = RT_ALIGN_Z((size_t)cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
1313 Assert(offPgProbe == 0);
1314 iov_iter_advance(iter, cbSegRet);
1315 SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages() -> %#zx; %#zx pages\n", cbSegRet, cPgRet));
1316 cPages += cPgRet;
1317 cMaxPages -= cPgRet;
1318 cbChunk += cbSegRet;
1319 if ( cMaxPages == 0
1320 || ((size_t)cbSegRet & PAGE_OFFSET_MASK))
1321 break;
1322 } else {
1323 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1324 rc = (int)cbSegRet;
1325 break;
1326 }
1327 }
1328 } else {
1329 /* The segment didn't start at a page boundrary, so stash it for
1330 the next round: */
1331 SFLOGFLOW(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx; stashed\n", cbSegRet, offPgProbe));
1332 Assert(papPages[cPages]);
1333 pStash->pPage = papPages[cPages];
1334 pStash->off = offPgProbe;
1335 pStash->cb = cbSegRet;
1336 break;
1337 }
1338 } else {
1339 AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
1340 rc = (int)cbSegRet;
1341 break;
1342 }
1343 }
1344 Assert(cMaxPages > 0);
1345 } while (iov_iter_count(iter) > 0);
1346
1347 } else {
1348 /*
1349 * The silly iov_iter_get_pages_alloc() function doesn't handle KVECs,
1350 * so everyone needs to do that by themselves.
1351 *
1352 * Note! Fixes here may apply to rtR0MemObjNativeLockKernel()
1353 * and vbsf_lock_user_pages_failed_check_kernel() as well.
1354 */
1355# if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
1356 pStash->offFromEnd = iov_iter_count(iter);
1357 pStash->Copy = *iter;
1358# endif
1359 do {
1360 uint8_t *pbBuf;
1361 size_t offStart;
1362 size_t cPgSeg;
1363
1364 size_t cbSeg = iov_iter_single_seg_count(iter);
1365 while (!cbSeg) {
1366 iov_iter_advance(iter, 0);
1367 cbSeg = iov_iter_single_seg_count(iter);
1368 }
1369
1370# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)
1371 pbBuf = iter->kvec->iov_base + iter->iov_offset;
1372# else
1373 pbBuf = iter->iov->iov_base + iter->iov_offset;
1374# endif
1375 offStart = (uintptr_t)pbBuf & PAGE_OFFSET_MASK;
1376 if (!cPages)
1377 offPage0 = offStart;
1378 else if (offStart)
1379 break;
1380
1381 cPgSeg = RT_ALIGN_Z(cbSeg, PAGE_SIZE) >> PAGE_SHIFT;
1382 if (cPgSeg > cMaxPages) {
1383 cPgSeg = cMaxPages;
1384 cbSeg = (cPgSeg << PAGE_SHIFT) - offStart;
1385 }
1386
1387 rc = vbsf_lock_kernel_pages(pbBuf, fWrite, cPgSeg, &papPages[cPages]);
1388 if (rc == 0) {
1389 iov_iter_advance(iter, cbSeg);
1390 cbChunk += cbSeg;
1391 cPages += cPgSeg;
1392 cMaxPages -= cPgSeg;
1393 if ( cMaxPages == 0
1394 || ((offStart + cbSeg) & PAGE_OFFSET_MASK) != 0)
1395 break;
1396 } else
1397 break;
1398 } while (iov_iter_count(iter) > 0);
1399 }
1400
1401 /*
1402 * Clean up if we failed; set return values.
1403 */
1404 if (rc == 0) {
1405 /* likely */
1406 } else {
1407 if (cPages > 0)
1408 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1409 offPage0 = cbChunk = cPages = 0;
1410 }
1411 *poffPage0 = offPage0;
1412 *pcbChunk = cbChunk;
1413 *pcPages = cPages;
1414 SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx\n", rc, cPages, offPage0, cbChunk));
1415 return rc;
1416}
1417
1418
1419/**
1420 * Rewinds the I/O vector.
1421 */
1422static bool vbsf_iter_rewind(struct iov_iter *iter, struct vbsf_iter_stash *pStash, size_t cbToRewind, size_t cbChunk)
1423{
1424 size_t cbExtra;
1425 if (!pStash->pPage) {
1426 cbExtra = 0;
1427 } else {
1428 cbExtra = pStash->cb;
1429 vbsf_put_page(pStash->pPage);
1430 pStash->pPage = NULL;
1431 pStash->cb = 0;
1432 pStash->off = 0;
1433 }
1434
1435# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
1436 iov_iter_revert(iter, cbToRewind + cbExtra);
1437 return true;
1438# else
1439 /** @todo impl this */
1440 return false;
1441# endif
1442}
1443
1444
1445/**
1446 * Cleans up the page locking stash.
1447 */
1448DECLINLINE(void) vbsf_iter_cleanup_stash(struct iov_iter *iter, struct vbsf_iter_stash *pStash)
1449{
1450 if (pStash->pPage)
1451 vbsf_iter_rewind(iter, pStash, 0, 0);
1452}
1453
1454
1455/**
1456 * Calculates the longest span of pages we could transfer to the host in a
1457 * single request.
1458 *
1459 * @returns Page count, non-zero.
1460 * @param iter The I/O vector iterator to inspect.
1461 */
1462static size_t vbsf_iter_max_span_of_pages(struct iov_iter *iter)
1463{
1464 size_t cPages;
1465 if (iter_is_iovec(iter) || (iter->type & ITER_KVEC)) {
1466 const struct iovec *pCurIov = iter->iov;
1467 size_t cLeft = iter->nr_segs;
1468 size_t cPagesSpan = 0;
1469
1470 /* iovect and kvec are identical, except for the __user tagging of iov_base. */
1471 AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, struct kvec, iov_base);
1472 AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, struct kvec, iov_len);
1473 AssertCompile(sizeof(struct iovec) == sizeof(struct kvec));
1474
1475 cPages = 1;
1476 AssertReturn(cLeft > 0, cPages);
1477
1478 /* Special case: segment offset. */
1479 if (iter->iov_offset > 0) {
1480 if (iter->iov_offset < pCurIov->iov_len) {
1481 size_t const cbSegLeft = pCurIov->iov_len - iter->iov_offset;
1482 size_t const offPage0 = ((uintptr_t)pCurIov->iov_base + iter->iov_offset) & PAGE_OFFSET_MASK;
1483 cPages = cPagesSpan = RT_ALIGN_Z(offPage0 + cbSegLeft, PAGE_SIZE) >> PAGE_SHIFT;
1484 if ((offPage0 + cbSegLeft) & PAGE_OFFSET_MASK)
1485 cPagesSpan = 0;
1486 }
1487 SFLOGFLOW(("vbsf_iter: seg[0]= %p LB %#zx\n", pCurIov->iov_base, pCurIov->iov_len));
1488 pCurIov++;
1489 cLeft--;
1490 }
1491
1492 /* Full segments. */
1493 while (cLeft-- > 0) {
1494 if (pCurIov->iov_len > 0) {
1495 size_t const offPage0 = (uintptr_t)pCurIov->iov_base & PAGE_OFFSET_MASK;
1496 if (offPage0 == 0) {
1497 if (!(pCurIov->iov_len & PAGE_OFFSET_MASK)) {
1498 cPagesSpan += pCurIov->iov_len >> PAGE_SHIFT;
1499 } else {
1500 cPagesSpan += RT_ALIGN_Z(pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
1501 if (cPagesSpan > cPages)
1502 cPages = cPagesSpan;
1503 cPagesSpan = 0;
1504 }
1505 } else {
1506 if (cPagesSpan > cPages)
1507 cPages = cPagesSpan;
1508 if (!((offPage0 + pCurIov->iov_len) & PAGE_OFFSET_MASK)) {
1509 cPagesSpan = pCurIov->iov_len >> PAGE_SHIFT;
1510 } else {
1511 cPagesSpan += RT_ALIGN_Z(offPage0 + pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
1512 if (cPagesSpan > cPages)
1513 cPages = cPagesSpan;
1514 cPagesSpan = 0;
1515 }
1516 }
1517 }
1518 SFLOGFLOW(("vbsf_iter: seg[%u]= %p LB %#zx\n", iter->nr_segs - cLeft, pCurIov->iov_base, pCurIov->iov_len));
1519 pCurIov++;
1520 }
1521 if (cPagesSpan > cPages)
1522 cPages = cPagesSpan;
1523 } else {
1524 /* Won't bother with accurate counts for the next two types, just make
1525 some rough estimates (does pipes have segments?): */
1526 size_t cSegs = iter->type & ITER_BVEC ? RT_MAX(1, iter->nr_segs) : 1;
1527 cPages = (iov_iter_count(iter) + (PAGE_SIZE * 2 - 2) * cSegs) >> PAGE_SHIFT;
1528 }
1529 SFLOGFLOW(("vbsf_iter_max_span_of_pages: returns %#zx\n", cPages));
1530 return cPages;
1531}
1532
1533
1534/**
1535 * Worker for vbsf_reg_read_iter() that deals with larger reads using page
1536 * locking.
1537 */
1538static ssize_t vbsf_reg_read_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToRead,
1539 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
1540{
1541 /*
1542 * Estimate how many pages we may possible submit in a single request so
1543 * that we can allocate matching request buffer and page array.
1544 */
1545 struct page *apPagesStack[16];
1546 struct page **papPages = &apPagesStack[0];
1547 struct page **papPagesFree = NULL;
1548 VBOXSFREADPGLSTREQ *pReq;
1549 ssize_t cbRet = 0;
1550 size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
1551 cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 2), cMaxPages);
1552
1553 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
1554 while (!pReq && cMaxPages > 4) {
1555 cMaxPages /= 2;
1556 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
1557 }
1558 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
1559 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
1560 if (pReq && papPages) {
1561
1562 /*
1563 * The read loop.
1564 */
1565 struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
1566 do {
1567 /*
1568 * Grab as many pages as we can. This means that if adjacent
1569 * segments both starts and ends at a page boundrary, we can
1570 * do them both in the same transfer from the host.
1571 */
1572 size_t cPages = 0;
1573 size_t cbChunk = 0;
1574 size_t offPage0 = 0;
1575 int rc = vbsf_iter_lock_pages(iter, true /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
1576 if (rc == 0) {
1577 size_t iPage = cPages;
1578 while (iPage-- > 0)
1579 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
1580 pReq->PgLst.offFirstPage = (uint16_t)offPage0;
1581 AssertStmt(cbChunk <= cbToRead, cbChunk = cbToRead);
1582 } else {
1583 cbRet = rc;
1584 break;
1585 }
1586
1587 /*
1588 * Issue the request and unlock the pages.
1589 */
1590 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, cbChunk, cPages);
1591 SFLOGFLOW(("vbsf_reg_read_iter_locking: VbglR0SfHostReqReadPgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
1592 rc, pReq->Parms.cb32Read.u.value32, cbChunk, cbToRead, cPages, offPage0));
1593
1594 vbsf_iter_unlock_pages(iter, papPages, cPages, true /*fSetDirty*/);
1595
1596 if (RT_SUCCESS(rc)) {
1597 /*
1598 * Success, advance position and buffer.
1599 */
1600 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
1601 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
1602 cbRet += cbActual;
1603 kio->ki_pos += cbActual;
1604 cbToRead -= cbActual;
1605
1606 /*
1607 * Are we done already?
1608 */
1609 if (!cbToRead)
1610 break;
1611 if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
1612 if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
1613 iov_iter_truncate(iter, 0);
1614 break;
1615 }
1616 } else {
1617 /*
1618 * Try rewind the iter structure.
1619 */
1620 bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
1621 if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
1622 /*
1623 * The host probably doesn't have enough heap to handle the
1624 * request, reduce the page count and retry.
1625 */
1626 cMaxPages /= 4;
1627 Assert(cMaxPages > 0);
1628 } else {
1629 /*
1630 * If we've successfully read stuff, return it rather than
1631 * the error. (Not sure if this is such a great idea...)
1632 */
1633 if (cbRet <= 0)
1634 cbRet = -EPROTO;
1635 break;
1636 }
1637 }
1638 } while (cbToRead > 0);
1639
1640 vbsf_iter_cleanup_stash(iter, &Stash);
1641 }
1642 else
1643 cbRet = -ENOMEM;
1644 if (papPagesFree)
1645 kfree(papPages);
1646 if (pReq)
1647 VbglR0PhysHeapFree(pReq);
1648 SFLOGFLOW(("vbsf_reg_read_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
1649 return cbRet;
1650}
1651
1652
1653/**
1654 * Read into I/O vector iterator.
1655 *
1656 * @returns Number of bytes read on success, negative errno on error.
1657 * @param kio The kernel I/O control block (or something like that).
1658 * @param iter The I/O vector iterator describing the buffer.
1659 */
1660static ssize_t vbsf_reg_read_iter(struct kiocb *kio, struct iov_iter *iter)
1661{
1662 size_t cbToRead = iov_iter_count(iter);
1663 struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
1664 struct address_space *mapping = inode->i_mapping;
1665
1666 struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
1667 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1668
1669 SFLOGFLOW(("vbsf_reg_read_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
1670 inode, kio->ki_filp, cbToRead, kio->ki_pos, iter->type));
1671 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1672
1673 /*
1674 * Do we have anything at all to do here?
1675 */
1676 if (!cbToRead)
1677 return 0;
1678
1679 /*
1680 * If there is a mapping and O_DIRECT isn't in effect, we must at a
1681 * heed dirty pages in the mapping and read from them. For simplicity
1682 * though, we just do page cache reading when there are writable
1683 * mappings around with any kind of pages loaded.
1684 */
1685 if (vbsf_should_use_cached_read(kio->ki_filp, mapping, sf_g))
1686 return generic_file_read_iter(kio, iter);
1687
1688 /*
1689 * Now now we reject async I/O requests.
1690 */
1691 if (!is_sync_kiocb(kio)) {
1692 SFLOGFLOW(("vbsf_reg_read_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
1693 return -EOPNOTSUPP;
1694 }
1695
1696 /*
1697 * For small requests, try use an embedded buffer provided we get a heap block
1698 * that does not cross page boundraries (see host code).
1699 */
1700 if (cbToRead <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
1701 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + cbToRead;
1702 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1703 if (pReq) {
1704 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1705 ssize_t cbRet;
1706 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, (uint32_t)cbToRead);
1707 if (RT_SUCCESS(vrc)) {
1708 cbRet = pReq->Parms.cb32Read.u.value32;
1709 AssertStmt(cbRet <= (ssize_t)cbToRead, cbRet = cbToRead);
1710 if (copy_to_iter(pReq->abData, cbRet, iter) == cbRet) {
1711 kio->ki_pos += cbRet;
1712 if (cbRet < cbToRead)
1713 iov_iter_truncate(iter, 0);
1714 } else
1715 cbRet = -EFAULT;
1716 } else
1717 cbRet = -EPROTO;
1718 VbglR0PhysHeapFree(pReq);
1719 SFLOGFLOW(("vbsf_reg_read_iter: returns %#zx (%zd)\n", cbRet, cbRet));
1720 return cbRet;
1721 }
1722 VbglR0PhysHeapFree(pReq);
1723 }
1724 }
1725
1726 /*
1727 * Otherwise do the page locking thing.
1728 */
1729 return vbsf_reg_read_iter_locking(kio, iter, cbToRead, sf_g, sf_r);
1730}
1731
1732
1733/**
1734 * Worker for vbsf_reg_write_iter() that deals with larger writes using page
1735 * locking.
1736 */
1737static ssize_t vbsf_reg_write_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToWrite, loff_t offFile,
1738 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
1739 struct inode *inode, struct vbsf_inode_info *sf_i, struct address_space *mapping)
1740{
1741 /*
1742 * Estimate how many pages we may possible submit in a single request so
1743 * that we can allocate matching request buffer and page array.
1744 */
1745 struct page *apPagesStack[16];
1746 struct page **papPages = &apPagesStack[0];
1747 struct page **papPagesFree = NULL;
1748 VBOXSFWRITEPGLSTREQ *pReq;
1749 ssize_t cbRet = 0;
1750 size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
1751 cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 2), cMaxPages);
1752
1753 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
1754 while (!pReq && cMaxPages > 4) {
1755 cMaxPages /= 2;
1756 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
1757 }
1758 if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
1759 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
1760 if (pReq && papPages) {
1761
1762 /*
1763 * The write loop.
1764 */
1765 struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
1766 do {
1767 /*
1768 * Grab as many pages as we can. This means that if adjacent
1769 * segments both starts and ends at a page boundrary, we can
1770 * do them both in the same transfer from the host.
1771 */
1772 size_t cPages = 0;
1773 size_t cbChunk = 0;
1774 size_t offPage0 = 0;
1775 int rc = vbsf_iter_lock_pages(iter, false /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
1776 if (rc == 0) {
1777 size_t iPage = cPages;
1778 while (iPage-- > 0)
1779 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
1780 pReq->PgLst.offFirstPage = (uint16_t)offPage0;
1781 AssertStmt(cbChunk <= cbToWrite, cbChunk = cbToWrite);
1782 } else {
1783 cbRet = rc;
1784 break;
1785 }
1786
1787 /*
1788 * Issue the request and unlock the pages.
1789 */
1790 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
1791 SFLOGFLOW(("vbsf_reg_write_iter_locking: VbglR0SfHostReqWritePgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
1792 rc, pReq->Parms.cb32Write.u.value32, cbChunk, cbToWrite, cPages, offPage0));
1793
1794 vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
1795
1796 if (RT_SUCCESS(rc)) {
1797 /*
1798 * Success, advance position and buffer.
1799 */
1800 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
1801 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
1802 cbRet += cbActual;
1803 offFile += cbActual;
1804 kio->ki_pos = offFile;
1805 cbToWrite -= cbActual;
1806 if (offFile > i_size_read(inode))
1807 i_size_write(inode, offFile);
1808 vbsf_reg_write_invalidate_mapping_range(mapping, offFile - cbActual, offFile);
1809 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1810
1811 /*
1812 * Are we done already?
1813 */
1814 if (!cbToWrite)
1815 break;
1816 if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
1817 if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
1818 iov_iter_truncate(iter, 0);
1819 break;
1820 }
1821 } else {
1822 /*
1823 * Try rewind the iter structure.
1824 */
1825 bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
1826 if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
1827 /*
1828 * The host probably doesn't have enough heap to handle the
1829 * request, reduce the page count and retry.
1830 */
1831 cMaxPages /= 4;
1832 Assert(cMaxPages > 0);
1833 } else {
1834 /*
1835 * If we've successfully written stuff, return it rather than
1836 * the error. (Not sure if this is such a great idea...)
1837 */
1838 if (cbRet <= 0)
1839 cbRet = -EPROTO;
1840 break;
1841 }
1842 }
1843 } while (cbToWrite > 0);
1844
1845 vbsf_iter_cleanup_stash(iter, &Stash);
1846 }
1847 else
1848 cbRet = -ENOMEM;
1849 if (papPagesFree)
1850 kfree(papPages);
1851 if (pReq)
1852 VbglR0PhysHeapFree(pReq);
1853 SFLOGFLOW(("vbsf_reg_write_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
1854 return cbRet;
1855}
1856
1857
1858
1859/**
1860 * Write from I/O vector iterator.
1861 *
1862 * @returns Number of bytes written on success, negative errno on error.
1863 * @param kio The kernel I/O control block (or something like that).
1864 * @param iter The I/O vector iterator describing the buffer.
1865 */
1866static ssize_t vbsf_reg_write_iter(struct kiocb *kio, struct iov_iter *iter)
1867{
1868 size_t cbToWrite = iov_iter_count(iter);
1869 struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
1870 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1871 struct address_space *mapping = inode->i_mapping;
1872
1873 struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
1874 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1875 loff_t offFile = kio->ki_pos;
1876
1877 SFLOGFLOW(("vbsf_reg_write_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
1878 inode, kio->ki_filp, cbToWrite, offFile, iter->type));
1879 AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
1880
1881 /*
1882 * Enforce APPEND flag.
1883 */
1884 /** @todo This should be handled by the host, it returning the new file
1885 * offset when appending. We may have an outdated i_size value here! */
1886#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1887 if (kio->ki_flags & IOCB_APPEND)
1888#else
1889 if (kio->ki_filp->f_flags & O_APPEND)
1890#endif
1891 kio->ki_pos = offFile = i_size_read(inode);
1892
1893 /*
1894 * Do we have anything at all to do here?
1895 */
1896 if (!cbToWrite)
1897 return 0;
1898
1899 /*
1900 * Now now we reject async I/O requests.
1901 */
1902 if (!is_sync_kiocb(kio)) {
1903 SFLOGFLOW(("vbsf_reg_write_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
1904 return -EOPNOTSUPP;
1905 }
1906
1907 /*
1908 * If there are active writable mappings, coordinate with any
1909 * pending writes via those.
1910 */
1911 if ( mapping
1912 && mapping->nrpages > 0
1913 && mapping_writably_mapped(mapping)) {
1914#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
1915 int err = filemap_fdatawait_range(mapping, offFile, offFile + cbToWrite - 1);
1916 if (err)
1917 return err;
1918#else
1919 /** @todo ... */
1920#endif
1921 }
1922
1923 /*
1924 * For small requests, try use an embedded buffer provided we get a heap block
1925 * that does not cross page boundraries (see host code).
1926 */
1927 if (cbToWrite <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
1928 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + cbToWrite;
1929 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
1930 if (pReq) {
1931 if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
1932 ssize_t cbRet;
1933 if (copy_from_iter(pReq->abData, cbToWrite, iter) == cbToWrite) {
1934 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
1935 offFile, (uint32_t)cbToWrite);
1936 if (RT_SUCCESS(vrc)) {
1937 cbRet = pReq->Parms.cb32Write.u.value32;
1938 AssertStmt(cbRet <= (ssize_t)cbToWrite, cbRet = cbToWrite);
1939 kio->ki_pos = offFile += cbRet;
1940 if (offFile > i_size_read(inode))
1941 i_size_write(inode, offFile);
1942 vbsf_reg_write_invalidate_mapping_range(mapping, offFile - cbRet, offFile);
1943# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
1944 if ((size_t)cbRet < cbToWrite)
1945 iov_iter_revert(iter, cbToWrite - cbRet);
1946# endif
1947 } else
1948 cbRet = -EPROTO;
1949 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1950 } else
1951 cbRet = -EFAULT;
1952 VbglR0PhysHeapFree(pReq);
1953 SFLOGFLOW(("vbsf_reg_write_iter: returns %#zx (%zd)\n", cbRet, cbRet));
1954 return cbRet;
1955 }
1956 VbglR0PhysHeapFree(pReq);
1957 }
1958 }
1959
1960 /*
1961 * Otherwise do the page locking thing.
1962 */
1963 return vbsf_reg_write_iter_locking(kio, iter, cbToWrite, offFile, sf_g, sf_r, inode, sf_i, mapping);
1964}
1965
1966#endif /* >= 3.16.0 */
1967
1968/**
1969 * Used by vbsf_reg_open() and vbsf_inode_atomic_open() to
1970 *
1971 * @returns shared folders create flags.
1972 * @param fLnxOpen The linux O_XXX flags to convert.
1973 * @param pfHandle Pointer to vbsf_handle::fFlags.
1974 * @param pszCaller Caller, for logging purposes.
1975 */
1976uint32_t vbsf_linux_oflags_to_vbox(unsigned fLnxOpen, uint32_t *pfHandle, const char *pszCaller)
1977{
1978 uint32_t fVBoxFlags = SHFL_CF_ACCESS_DENYNONE;
1979
1980 /*
1981 * Disposition.
1982 */
1983 if (fLnxOpen & O_CREAT) {
1984 Log(("%s: O_CREAT set\n", pszCaller));
1985 fVBoxFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
1986 if (fLnxOpen & O_EXCL) {
1987 Log(("%s: O_EXCL set\n", pszCaller));
1988 fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_EXISTS;
1989 } else if (fLnxOpen & O_TRUNC) {
1990 Log(("%s: O_TRUNC set\n", pszCaller));
1991 fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1992 } else
1993 fVBoxFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
1994 } else {
1995 fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
1996 if (fLnxOpen & O_TRUNC) {
1997 Log(("%s: O_TRUNC set\n", pszCaller));
1998 fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1999 }
2000 }
2001
2002 /*
2003 * Access.
2004 */
2005 switch (fLnxOpen & O_ACCMODE) {
2006 case O_RDONLY:
2007 fVBoxFlags |= SHFL_CF_ACCESS_READ;
2008 *pfHandle |= VBSF_HANDLE_F_READ;
2009 break;
2010
2011 case O_WRONLY:
2012 fVBoxFlags |= SHFL_CF_ACCESS_WRITE;
2013 *pfHandle |= VBSF_HANDLE_F_WRITE;
2014 break;
2015
2016 case O_RDWR:
2017 fVBoxFlags |= SHFL_CF_ACCESS_READWRITE;
2018 *pfHandle |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE;
2019 break;
2020
2021 default:
2022 BUG();
2023 }
2024
2025 if (fLnxOpen & O_APPEND) {
2026 Log(("%s: O_APPEND set\n", pszCaller));
2027 fVBoxFlags |= SHFL_CF_ACCESS_APPEND;
2028 *pfHandle |= VBSF_HANDLE_F_APPEND;
2029 }
2030
2031 /*
2032 * Only directories?
2033 */
2034 if (fLnxOpen & O_DIRECTORY) {
2035 Log(("%s: O_DIRECTORY set\n", pszCaller));
2036 fVBoxFlags |= SHFL_CF_DIRECTORY;
2037 }
2038
2039 return fVBoxFlags;
2040}
2041
2042
2043/**
2044 * Open a regular file.
2045 *
2046 * @param inode the inode
2047 * @param file the file
2048 * @returns 0 on success, Linux error code otherwise
2049 */
2050static int vbsf_reg_open(struct inode *inode, struct file *file)
2051{
2052 int rc, rc_linux = 0;
2053 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2054 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2055 struct vbsf_reg_info *sf_r;
2056 struct dentry *dentry = VBSF_GET_F_DENTRY(file);
2057 VBOXSFCREATEREQ *pReq;
2058
2059 SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
2060 BUG_ON(!sf_g);
2061 BUG_ON(!sf_i);
2062
2063 sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
2064 if (!sf_r) {
2065 LogRelFunc(("could not allocate reg info\n"));
2066 return -ENOMEM;
2067 }
2068
2069 RTListInit(&sf_r->Handle.Entry);
2070 sf_r->Handle.cRefs = 1;
2071 sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC;
2072 sf_r->Handle.hHost = SHFL_HANDLE_NIL;
2073
2074 /* Already open? */
2075 if (sf_i->handle != SHFL_HANDLE_NIL) {
2076 /*
2077 * This inode was created with vbsf_create_worker(). Check the CreateFlags:
2078 * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
2079 * about the access flags (SHFL_CF_ACCESS_*).
2080 */
2081 sf_i->force_restat = 1;
2082 sf_r->Handle.hHost = sf_i->handle;
2083 sf_i->handle = SHFL_HANDLE_NIL;
2084 file->private_data = sf_r;
2085
2086 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; /** @todo fix */
2087 vbsf_handle_append(sf_i, &sf_r->Handle);
2088 SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
2089 return 0;
2090 }
2091
2092 pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
2093 if (!pReq) {
2094 kfree(sf_r);
2095 LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
2096 return -ENOMEM;
2097 }
2098 memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
2099 RT_ZERO(pReq->CreateParms);
2100 pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
2101
2102 /* We check the value of pReq->CreateParms.Handle afterwards to
2103 * find out if the call succeeded or failed, as the API does not seem
2104 * to cleanly distinguish error and informational messages.
2105 *
2106 * Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL
2107 * to make the shared folders host service use our fMode parameter */
2108
2109 /* We ignore O_EXCL, as the Linux kernel seems to call create
2110 beforehand itself, so O_EXCL should always fail. */
2111 pReq->CreateParms.CreateFlags = vbsf_linux_oflags_to_vbox(file->f_flags & ~O_EXCL, &sf_r->Handle.fFlags, __FUNCTION__);
2112 pReq->CreateParms.Info.Attr.fMode = inode->i_mode;
2113 LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n",
2114 sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags));
2115 rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
2116 if (RT_FAILURE(rc)) {
2117 LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc));
2118 kfree(sf_r);
2119 VbglR0PhysHeapFree(pReq);
2120 return -RTErrConvertToErrno(rc);
2121 }
2122
2123 if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) {
2124 vbsf_dentry_chain_increase_ttl(dentry);
2125 rc_linux = 0;
2126 } else {
2127 switch (pReq->CreateParms.Result) {
2128 case SHFL_PATH_NOT_FOUND:
2129 rc_linux = -ENOENT;
2130 break;
2131 case SHFL_FILE_NOT_FOUND:
2132 /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
2133 rc_linux = -ENOENT;
2134 break;
2135 case SHFL_FILE_EXISTS:
2136 vbsf_dentry_chain_increase_ttl(dentry);
2137 rc_linux = -EEXIST;
2138 break;
2139 default:
2140 vbsf_dentry_chain_increase_parent_ttl(dentry);
2141 rc_linux = 0;
2142 break;
2143 }
2144 }
2145
2146 sf_i->force_restat = 1; /** @todo Why?!? */
2147 sf_r->Handle.hHost = pReq->CreateParms.Handle;
2148 file->private_data = sf_r;
2149 vbsf_handle_append(sf_i, &sf_r->Handle);
2150 VbglR0PhysHeapFree(pReq);
2151 SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
2152 return rc_linux;
2153}
2154
2155
2156/**
2157 * Close a regular file.
2158 *
2159 * @param inode the inode
2160 * @param file the file
2161 * @returns 0 on success, Linux error code otherwise
2162 */
2163static int vbsf_reg_release(struct inode *inode, struct file *file)
2164{
2165 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2166 struct vbsf_reg_info *sf_r = file->private_data;
2167
2168 SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
2169 if (sf_r) {
2170 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2171 Assert(sf_g);
2172
2173#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
2174 /* See the smbfs source (file.c). mmap in particular can cause data to be
2175 * written to the file after it is closed, which we can't cope with. We
2176 * copy and paste the body of filemap_write_and_wait() here as it was not
2177 * defined before 2.6.6 and not exported until quite a bit later. */
2178 /* filemap_write_and_wait(inode->i_mapping); */
2179 if (inode->i_mapping->nrpages
2180 && filemap_fdatawrite(inode->i_mapping) != -EIO)
2181 filemap_fdatawait(inode->i_mapping);
2182#endif
2183
2184 /* Release sf_r, closing the handle if we're the last user. */
2185 file->private_data = NULL;
2186 vbsf_handle_release(&sf_r->Handle, sf_g, "vbsf_reg_release");
2187
2188 sf_i->handle = SHFL_HANDLE_NIL;
2189 }
2190 return 0;
2191}
2192
2193/**
2194 * Wrapper around generic/default seek function that ensures that we've got
2195 * the up-to-date file size when doing anything relative to EOF.
2196 *
2197 * The issue is that the host may extend the file while we weren't looking and
2198 * if the caller wishes to append data, it may end up overwriting existing data
2199 * if we operate with a stale size. So, we always retrieve the file size on EOF
2200 * relative seeks.
2201 */
2202static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
2203{
2204 SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
2205
2206 switch (whence) {
2207#ifdef SEEK_HOLE
2208 case SEEK_HOLE:
2209 case SEEK_DATA:
2210#endif
2211 case SEEK_END: {
2212 struct vbsf_reg_info *sf_r = file->private_data;
2213 int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost,
2214 true /*fForce*/, false /*fInodeLocked*/);
2215 if (rc == 0)
2216 break;
2217 return rc;
2218 }
2219 }
2220
2221#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
2222 return generic_file_llseek(file, off, whence);
2223#else
2224 return default_llseek(file, off, whence);
2225#endif
2226}
2227
2228/**
2229 * Flush region of file - chiefly mmap/msync.
2230 *
2231 * We cannot use the noop_fsync / simple_sync_file here as that means
2232 * msync(,,MS_SYNC) will return before the data hits the host, thereby
2233 * causing coherency issues with O_DIRECT access to the same file as
2234 * well as any host interaction with the file.
2235 */
2236#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
2237static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2238{
2239# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2240 return __generic_file_fsync(file, start, end, datasync);
2241# else
2242 return generic_file_fsync(file, start, end, datasync);
2243# endif
2244}
2245#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
2246static int vbsf_reg_fsync(struct file *file, int datasync)
2247{
2248 return generic_file_fsync(file, datasync);
2249}
2250#else /* < 2.6.35 */
2251static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
2252{
2253# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
2254 return simple_fsync(file, dentry, datasync);
2255# else
2256 int rc;
2257 struct inode *inode = dentry->d_inode;
2258 AssertReturn(inode, -EINVAL);
2259
2260 /** @todo What about file_fsync()? (<= 2.5.11) */
2261
2262# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
2263 rc = sync_mapping_buffers(inode->i_mapping);
2264 if ( rc == 0
2265 && (inode->i_state & I_DIRTY)
2266 && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
2267 ) {
2268 struct writeback_control wbc = {
2269 .sync_mode = WB_SYNC_ALL,
2270 .nr_to_write = 0
2271 };
2272 rc = sync_inode(inode, &wbc);
2273 }
2274# else /* < 2.5.12 */
2275 rc = fsync_inode_buffers(inode);
2276# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2277 rc |= fsync_inode_data_buffers(inode);
2278# endif
2279 /** @todo probably need to do more here... */
2280# endif /* < 2.5.12 */
2281 return rc;
2282# endif
2283}
2284#endif /* < 2.6.35 */
2285
2286
2287#ifdef SFLOG_ENABLED
2288/*
2289 * This is just for logging page faults and such.
2290 */
2291
2292/** Pointer to the ops generic_file_mmap returns the first time it's called. */
2293static struct vm_operations_struct const *g_pGenericFileVmOps = NULL;
2294/** Merge of g_LoggingVmOpsTemplate and g_pGenericFileVmOps. */
2295static struct vm_operations_struct g_LoggingVmOps;
2296
2297
2298/* Generic page fault callback: */
2299# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
2300static vm_fault_t vbsf_vmlog_fault(struct vm_fault *vmf)
2301{
2302 vm_fault_t rc;
2303 SFLOGFLOW(("vbsf_vmlog_fault: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address));
2304 rc = g_pGenericFileVmOps->fault(vmf);
2305 SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc));
2306 return rc;
2307}
2308# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2309static int vbsf_vmlog_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2310{
2311 int rc;
2312# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2313 SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->address));
2314# else
2315 SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address));
2316# endif
2317 rc = g_pGenericFileVmOps->fault(vma, vmf);
2318 SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc));
2319 return rc;
2320}
2321# endif
2322
2323
2324/* Special/generic page fault handler: */
2325# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 26)
2326# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 1)
2327static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
2328{
2329 struct page *page;
2330 SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p type=%p:{%#x}\n", vma, address, type, type ? *type : 0));
2331 page = g_pGenericFileVmOps->nopage(vma, address, type);
2332 SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page));
2333 return page;
2334}
2335# else
2336static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int write_access_or_unused)
2337{
2338 struct page *page;
2339 SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p wau=%d\n", vma, address, write_access_or_unused));
2340 page = g_pGenericFileVmOps->nopage(vma, address, write_access_or_unused);
2341 SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page));
2342 return page;
2343}
2344# endif /* < 2.6.26 */
2345
2346
2347/* Special page fault callback for making something writable: */
2348# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
2349static vm_fault_t vbsf_vmlog_page_mkwrite(struct vm_fault *vmf)
2350{
2351 vm_fault_t rc;
2352# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2353 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address));
2354# else
2355 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->virtual_address));
2356# endif
2357 rc = g_pGenericFileVmOps->page_mkwrite(vmf);
2358 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2359 return rc;
2360}
2361# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 30)
2362static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2363{
2364 int rc;
2365 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address));
2366 rc = g_pGenericFileVmOps->page_mkwrite(vma, vmf);
2367 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2368 return rc;
2369}
2370# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
2371static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2372{
2373 int rc;
2374 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p page=%p\n", vma, page));
2375 rc = g_pGenericFileVmOps->page_mkwrite(vma, page);
2376 SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
2377 return rc;
2378}
2379# endif
2380
2381
2382/* Special page fault callback for mapping pages: */
2383# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
2384static void vbsf_vmlog_map_pages(struct vm_fault *vmf, pgoff_t start, pgoff_t end)
2385{
2386 SFLOGFLOW(("vbsf_vmlog_map_pages: vmf=%p (flags=%#x addr=%p) start=%p end=%p\n", vmf, vmf->flags, vmf->address, start, end));
2387 g_pGenericFileVmOps->map_pages(vmf, start, end);
2388 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2389}
2390# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
2391static void vbsf_vmlog_map_pages(struct fault_env *fenv, pgoff_t start, pgoff_t end)
2392{
2393 SFLOGFLOW(("vbsf_vmlog_map_pages: fenv=%p (flags=%#x addr=%p) start=%p end=%p\n", fenv, fenv->flags, fenv->address, start, end));
2394 g_pGenericFileVmOps->map_pages(fenv, start, end);
2395 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2396}
2397# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
2398static void vbsf_vmlog_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
2399{
2400 SFLOGFLOW(("vbsf_vmlog_map_pages: vma=%p vmf=%p (flags=%#x addr=%p)\n", vma, vmf, vmf->flags, vmf->virtual_address));
2401 g_pGenericFileVmOps->map_pages(vma, vmf);
2402 SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
2403}
2404# endif
2405
2406
2407/** Overload template. */
2408static struct vm_operations_struct const g_LoggingVmOpsTemplate = {
2409# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2410 .fault = vbsf_vmlog_fault,
2411# endif
2412# if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 25)
2413 .nopage = vbsf_vmlog_nopage,
2414# endif
2415# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
2416 .page_mkwrite = vbsf_vmlog_page_mkwrite,
2417# endif
2418# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
2419 .map_pages = vbsf_vmlog_map_pages,
2420# endif
2421};
2422
2423/** file_operations::mmap wrapper for logging purposes. */
2424extern int vbsf_reg_mmap(struct file *file, struct vm_area_struct *vma)
2425{
2426 int rc;
2427 SFLOGFLOW(("vbsf_reg_mmap: file=%p vma=%p\n", file, vma));
2428 rc = generic_file_mmap(file, vma);
2429 if (rc == 0) {
2430 /* Merge the ops and template the first time thru (there's a race here). */
2431 if (g_pGenericFileVmOps == NULL) {
2432 uintptr_t const *puSrc1 = (uintptr_t *)vma->vm_ops;
2433 uintptr_t const *puSrc2 = (uintptr_t *)&g_LoggingVmOpsTemplate;
2434 uintptr_t volatile *puDst = (uintptr_t *)&g_LoggingVmOps;
2435 size_t cbLeft = sizeof(g_LoggingVmOps) / sizeof(*puDst);
2436 while (cbLeft-- > 0) {
2437 *puDst = *puSrc2 && *puSrc1 ? *puSrc2 : *puSrc1;
2438 puSrc1++;
2439 puSrc2++;
2440 puDst++;
2441 }
2442 g_pGenericFileVmOps = vma->vm_ops;
2443 vma->vm_ops = &g_LoggingVmOps;
2444 } else if (g_pGenericFileVmOps == vma->vm_ops)
2445 vma->vm_ops = &g_LoggingVmOps;
2446 else
2447 SFLOGFLOW(("vbsf_reg_mmap: Warning: vm_ops=%p, expected %p!\n", vma->vm_ops, g_pGenericFileVmOps));
2448 }
2449 SFLOGFLOW(("vbsf_reg_mmap: returns %d\n", rc));
2450 return rc;
2451}
2452
2453#endif /* SFLOG_ENABLED */
2454
2455
2456/**
2457 * File operations for regular files.
2458 */
2459struct file_operations vbsf_reg_fops = {
2460 .open = vbsf_reg_open,
2461 .read = vbsf_reg_read,
2462 .write = vbsf_reg_write,
2463#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2464 .read_iter = vbsf_reg_read_iter,
2465 .write_iter = vbsf_reg_write_iter,
2466#endif
2467 .release = vbsf_reg_release,
2468#ifdef SFLOG_ENABLED
2469 .mmap = vbsf_reg_mmap,
2470#else
2471 .mmap = generic_file_mmap,
2472#endif
2473#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
2474# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
2475/** @todo This code is known to cause caching of data which should not be
2476 * cached. Investigate. */
2477# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
2478 .splice_read = vbsf_splice_read,
2479# else
2480 .sendfile = generic_file_sendfile,
2481# endif
2482 .aio_read = generic_file_aio_read,
2483 .aio_write = generic_file_aio_write,
2484# endif
2485#endif
2486 .llseek = vbsf_reg_llseek,
2487 .fsync = vbsf_reg_fsync,
2488};
2489
2490struct inode_operations vbsf_reg_iops = {
2491#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 18)
2492 .getattr = vbsf_inode_getattr,
2493#else
2494 .revalidate = vbsf_inode_revalidate,
2495#endif
2496 .setattr = vbsf_inode_setattr,
2497};
2498
2499
2500#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
2501
2502/**
2503 * Used to read the content of a page into the page cache.
2504 *
2505 * Needed for mmap and reads+writes when the file is mmapped in a
2506 * shared+writeable fashion.
2507 */
2508static int vbsf_readpage(struct file *file, struct page *page)
2509{
2510 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
2511 int err;
2512
2513 SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
2514 Assert(PageLocked(page));
2515
2516 if (PageUptodate(page)) {
2517 unlock_page(page);
2518 return 0;
2519 }
2520
2521 if (!is_bad_inode(inode)) {
2522 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
2523 if (pReq) {
2524 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2525 struct vbsf_reg_info *sf_r = file->private_data;
2526 uint32_t cbRead;
2527 int vrc;
2528
2529 pReq->PgLst.offFirstPage = 0;
2530 pReq->PgLst.aPages[0] = page_to_phys(page);
2531 vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
2532 pReq,
2533 sf_r->Handle.hHost,
2534 (uint64_t)page->index << PAGE_SHIFT,
2535 PAGE_SIZE,
2536 1 /*cPages*/);
2537
2538 cbRead = pReq->Parms.cb32Read.u.value32;
2539 AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
2540 VbglR0PhysHeapFree(pReq);
2541
2542 if (RT_SUCCESS(vrc)) {
2543 if (cbRead == PAGE_SIZE) {
2544 /* likely */
2545 } else {
2546 uint8_t *pbMapped = (uint8_t *)kmap(page);
2547 RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
2548 kunmap(page);
2549 /** @todo truncate the inode file size? */
2550 }
2551
2552 flush_dcache_page(page);
2553 SetPageUptodate(page);
2554 unlock_page(page);
2555 return 0;
2556 }
2557 err = -RTErrConvertToErrno(vrc);
2558 } else
2559 err = -ENOMEM;
2560 } else
2561 err = -EIO;
2562 SetPageError(page);
2563 unlock_page(page);
2564 return err;
2565}
2566
2567
2568/**
2569 * Used to write out the content of a dirty page cache page to the host file.
2570 *
2571 * Needed for mmap and writes when the file is mmapped in a shared+writeable
2572 * fashion.
2573 */
2574static int vbsf_writepage(struct page *page, struct writeback_control *wbc)
2575{
2576 struct address_space *mapping = page->mapping;
2577 struct inode *inode = mapping->host;
2578 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
2579 struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND);
2580 int err;
2581
2582 SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
2583 inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
2584
2585 if (pHandle) {
2586 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
2587 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
2588 if (pReq) {
2589 uint64_t const cbFile = i_size_read(inode);
2590 uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
2591 uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
2592 : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
2593 int vrc;
2594
2595 pReq->PgLst.offFirstPage = 0;
2596 pReq->PgLst.aPages[0] = page_to_phys(page);
2597 vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
2598 pReq,
2599 pHandle->hHost,
2600 offInFile,
2601 cbToWrite,
2602 1 /*cPages*/);
2603 AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
2604 ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
2605 vrc = VERR_WRITE_ERROR);
2606 VbglR0PhysHeapFree(pReq);
2607
2608 if (RT_SUCCESS(vrc)) {
2609 /* Update the inode if we've extended the file. */
2610 /** @todo is this necessary given the cbToWrite calc above? */
2611 uint64_t const offEndOfWrite = offInFile + cbToWrite;
2612 if ( offEndOfWrite > cbFile
2613 && offEndOfWrite > i_size_read(inode))
2614 i_size_write(inode, offEndOfWrite);
2615
2616 if (PageError(page))
2617 ClearPageError(page);
2618
2619 err = 0;
2620 } else {
2621 ClearPageUptodate(page);
2622 err = -EPROTO;
2623 }
2624 } else
2625 err = -ENOMEM;
2626 vbsf_handle_release(pHandle, sf_g, "vbsf_writepage");
2627 } else {
2628 static uint64_t volatile s_cCalls = 0;
2629 if (s_cCalls++ < 16)
2630 printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
2631 err = -EPROTO;
2632 }
2633 unlock_page(page);
2634 return err;
2635}
2636
2637# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
2638/**
2639 * Called when writing thru the page cache (which we shouldn't be doing).
2640 */
2641int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
2642 unsigned len, unsigned flags, struct page **pagep, void **fsdata)
2643{
2644 /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
2645 * the page cache for any writes AFAIK. We could just as well use
2646 * simple_write_begin & simple_write_end here if we think we really
2647 * need to have non-NULL function pointers in the table... */
2648 static uint64_t volatile s_cCalls = 0;
2649 if (s_cCalls++ < 16) {
2650 printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
2651 (unsigned long long)pos, len, flags);
2652 RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
2653 (unsigned long long)pos, len, flags);
2654# ifdef WARN_ON
2655 WARN_ON(1);
2656# endif
2657 }
2658 return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
2659}
2660# endif /* KERNEL_VERSION >= 2.6.24 */
2661
2662# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2663/**
2664 * This is needed to make open accept O_DIRECT as well as dealing with direct
2665 * I/O requests if we don't intercept them earlier.
2666 */
2667# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
2668static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2669# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
2670static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2671# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
2672static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2673# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
2674static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2675# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
2676static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2677# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
2678static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2679# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
2680static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2681# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
2682static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
2683# else
2684static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *, unsigned long, int)
2685# endif
2686{
2687 TRACE();
2688 return -EINVAL;
2689}
2690# endif
2691
2692/**
2693 * Address space (for the page cache) operations for regular files.
2694 */
2695struct address_space_operations vbsf_reg_aops = {
2696 .readpage = vbsf_readpage,
2697 .writepage = vbsf_writepage,
2698 /** @todo Need .writepages if we want msync performance... */
2699# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
2700 .set_page_dirty = __set_page_dirty_buffers,
2701# endif
2702# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
2703 .write_begin = vbsf_write_begin,
2704 .write_end = simple_write_end,
2705# else
2706 .prepare_write = simple_prepare_write,
2707 .commit_write = simple_commit_write,
2708# endif
2709# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
2710 .direct_IO = vbsf_direct_IO,
2711# endif
2712};
2713
2714#endif /* LINUX_VERSION_CODE >= 2.6.0 */
2715
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette