VirtualBox

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77549

Last change on this file since 77549 was 77549, checked in by vboxsync, 6 years ago

linux/vboxsf: Workaround for kernel_read/write calls (get_user_pages fails), fixing issue with sporadic ENOEXEC from execve(). The workaround requires rtR0MemObjLinuxVirtToPage() from memobj-r0drv-linux.c. Various cleanups and adjustments. bugref:9172

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 56.4 KB
Line 
1/* $Id: regops.c 77549 2019-03-04 10:00:34Z vboxsync $ */
2/** @file
3 * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
16 * conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 */
30
31#include "vfsmod.h"
32#include <linux/uio.h>
33#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
34# include <linux/aio.h> /* struct kiocb before 4.1 */
35#endif
36#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
37# include <linux/buffer_head.h>
38#endif
39#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
40 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41# include <linux/writeback.h>
42#endif
43#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
44 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
45# include <linux/splice.h>
46#endif
47#include <iprt/err.h>
48
49#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
50# define SEEK_END 2
51#endif
52
53
54/**
55 * Called when an inode is released to unlink all handles that might impossibly
56 * still be associated with it.
57 *
58 * @param pInodeInfo The inode which handles to drop.
59 */
60void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
61{
62 struct vbsf_handle *pCur, *pNext;
63 unsigned long fSavedFlags;
64 SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
65 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
66
67 RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) {
68 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
69 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
70 pCur->fFlags |= VBSF_HANDLE_F_ON_LIST;
71 RTListNodeRemove(&pCur->Entry);
72 }
73
74 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
75}
76
77
78/**
79 * Locates a handle that matches all the flags in @a fFlags.
80 *
81 * @returns Pointer to handle on success (retained), use vbsf_handle_release() to
82 * release it. NULL if no suitable handle was found.
83 * @param pInodeInfo The inode info to search.
84 * @param fFlagsSet The flags that must be set.
85 * @param fFlagsClear The flags that must be clear.
86 */
87struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
88{
89 struct vbsf_handle *pCur;
90 unsigned long fSavedFlags;
91 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
92
93 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
94 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
95 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
96 if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
97 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
98 if (cRefs > 1) {
99 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
100 SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
101 return pCur;
102 }
103 /* Oops, already being closed (safe as it's only ever increased here). */
104 ASMAtomicDecU32(&pCur->cRefs);
105 }
106 }
107
108 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
109 SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
110 return NULL;
111}
112
113
114/**
115 * Slow worker for vbsf_handle_release() that does the freeing.
116 *
117 * @returns 0 (ref count).
118 * @param pHandle The handle to release.
119 * @param sf_g The info structure for the shared folder associated
120 * with the handle.
121 * @param pszCaller The caller name (for logging failures).
122 */
123uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *sf_g, const char *pszCaller)
124{
125 int rc;
126 unsigned long fSavedFlags;
127
128 SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
129
130 /*
131 * Remove from the list.
132 */
133 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
134
135 AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
136 Assert(pHandle->pInodeInfo);
137 Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
138
139 if (pHandle->fFlags & VBSF_HANDLE_F_ON_LIST) {
140 pHandle->fFlags &= ~VBSF_HANDLE_F_ON_LIST;
141 RTListNodeRemove(&pHandle->Entry);
142 }
143
144 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
145
146 /*
147 * Actually destroy it.
148 */
149 rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
150 if (RT_FAILURE(rc))
151 LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
152 pHandle->hHost = SHFL_HANDLE_NIL;
153 pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD;
154 kfree(pHandle);
155 return 0;
156}
157
158
159/**
160 * Appends a handle to a handle list.
161 *
162 * @param pInodeInfo The inode to add it to.
163 * @param pHandle The handle to add.
164 */
165void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle)
166{
167#ifdef VBOX_STRICT
168 struct vbsf_handle *pCur;
169#endif
170 unsigned long fSavedFlags;
171
172 SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
173 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
174 ("%p %#x\n", pHandle, pHandle->fFlags));
175 Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
176
177 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
178
179 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
180 ("%p %#x\n", pHandle, pHandle->fFlags));
181#ifdef VBOX_STRICT
182 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
183 Assert(pCur != pHandle);
184 AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
185 == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
186 }
187 pHandle->pInodeInfo = pInodeInfo;
188#endif
189
190 pHandle->fFlags |= VBSF_HANDLE_F_ON_LIST;
191 RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
192
193 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
194}
195
196
197#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
198 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
199
200/*
201 * Some pipe stuff we apparently need for 2.6.23-2.6.30.
202 */
203
204static void vbsf_free_pipebuf(struct page *kpage)
205{
206 kunmap(kpage);
207 __free_pages(kpage, 0);
208}
209
210static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, int atomic)
211{
212 return 0;
213}
214
215static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
216{
217}
218
219static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, void *map_data)
220{
221}
222
223static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
224{
225 return 0;
226}
227
228static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
229{
230 vbsf_free_pipebuf(pipe_buf->page);
231}
232
233static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pipe_buf)
234{
235 return 0;
236}
237
238static struct pipe_buf_operations vbsf_pipe_buf_ops = {
239 .can_merge = 0,
240 .map = vbsf_pipe_buf_map,
241 .unmap = vbsf_pipe_buf_unmap,
242 .confirm = vbsf_pipe_buf_confirm,
243 .release = vbsf_pipe_buf_release,
244 .steal = vbsf_pipe_buf_steal,
245 .get = vbsf_pipe_buf_get,
246};
247
248static int vbsf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
249 void *buf, uint32_t *nread, uint64_t pos)
250{
251 int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
252 if (RT_FAILURE(rc)) {
253 LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
254 rc));
255 return -EPROTO;
256 }
257 return 0;
258}
259
260# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
261# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
262
263ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
264{
265 size_t bytes_remaining = len;
266 loff_t orig_offset = *poffset;
267 loff_t offset = orig_offset;
268 struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
269 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
270 struct vbsf_reg_info *sf_r = in->private_data;
271 ssize_t retval;
272 struct page *kpage = 0;
273 size_t nsent = 0;
274
275/** @todo rig up a FsPerf test for this code */
276 TRACE();
277 if (!S_ISREG(inode->i_mode)) {
278 LogFunc(("read from non regular file %d\n", inode->i_mode));
279 return -EINVAL;
280 }
281 if (!len) {
282 return 0;
283 }
284
285 LOCK_PIPE(pipe);
286
287 uint32_t req_size = 0;
288 while (bytes_remaining > 0) {
289 kpage = alloc_page(GFP_KERNEL);
290 if (unlikely(kpage == NULL)) {
291 UNLOCK_PIPE(pipe);
292 return -ENOMEM;
293 }
294 req_size = 0;
295 uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
296 uint32_t chunk = 0;
297 void *kbuf = kmap(kpage);
298 while (chunk < req_size) {
299 retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
300 if (retval < 0)
301 goto err;
302 if (nread == 0)
303 break;
304 chunk += nread;
305 offset += nread;
306 nread = req_size - chunk;
307 }
308 if (!pipe->readers) {
309 send_sig(SIGPIPE, current, 0);
310 retval = -EPIPE;
311 goto err;
312 }
313 if (pipe->nrbufs < PIPE_BUFFERS) {
314 struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
315 pipebuf->page = kpage;
316 pipebuf->ops = &vbsf_pipe_buf_ops;
317 pipebuf->len = req_size;
318 pipebuf->offset = 0;
319 pipebuf->private = 0;
320 pipebuf->flags = 0;
321 pipe->nrbufs++;
322 nsent += req_size;
323 bytes_remaining -= req_size;
324 if (signal_pending(current))
325 break;
326 } else { /* pipe full */
327
328 if (flags & SPLICE_F_NONBLOCK) {
329 retval = -EAGAIN;
330 goto err;
331 }
332 vbsf_free_pipebuf(kpage);
333 break;
334 }
335 }
336 UNLOCK_PIPE(pipe);
337 if (!nsent && signal_pending(current))
338 return -ERESTARTSYS;
339 *poffset += nsent;
340 return offset - orig_offset;
341
342 err:
343 UNLOCK_PIPE(pipe);
344 vbsf_free_pipebuf(kpage);
345 return retval;
346}
347
348#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
349
350/** Wrapper around put_page / page_cache_release. */
351DECLINLINE(void) vbsf_put_page(struct page *pPage)
352{
353#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
354 put_page(pPage);
355#else
356 page_cache_release(pPage);
357#endif
358}
359
360
361/** Wrapper around get_page / page_cache_get. */
362DECLINLINE(void) vbsf_get_page(struct page *pPage)
363{
364#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
365 get_page(pPage);
366#else
367 page_cache_get(pPage);
368#endif
369}
370
371
372/** Companion to vbsf_lock_user_pages(). */
373DECLINLINE(void) vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack)
374{
375 /* We don't mark kernel pages dirty: */
376 if (fLockPgHack)
377 fSetDirty = false;
378
379 while (cPages-- > 0)
380 {
381 struct page *pPage = papPages[cPages];
382 if (fSetDirty && !PageReserved(pPage))
383 SetPageDirty(pPage);
384 vbsf_put_page(pPage);
385 }
386}
387
388
389/**
390 * Catches kernel_read() and kernel_write() calls and works around them.
391 *
392 * The file_operations::read and file_operations::write callbacks supposedly
393 * hands us the user buffers to read into and write out of. To allow the kernel
394 * to read and write without allocating buffers in userland, they kernel_read()
395 * and kernel_write() increases the user space address limit before calling us
396 * so that copyin/copyout won't reject it. Our problem is that get_user_pages()
397 * works on the userspace address space structures and will not be fooled by an
398 * increased addr_limit.
399 *
400 * This code tries to detect this situation and fake get_user_lock() for the
401 * kernel buffer.
402 */
403static int vbsf_lock_user_pages_failed_check_kernel(uintptr_t uPtrFrom, size_t cPages, bool fWrite, int rcFailed,
404 struct page **papPages, bool *pfLockPgHack)
405{
406 /*
407 * Check that this is valid user memory that is actually in the kernel range.
408 */
409#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
410 if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT)
411 && uPtrFrom >= USER_DS.seg)
412#else
413 if ( access_ok(fWrite ? VERIFY_WRITE : VERIFY_READ, (void *)uPtrFrom, cPages << PAGE_SHIFT)
414 && uPtrFrom >= USER_DS.seg)
415#endif
416 {
417 uintptr_t const uPtrLast = (uPtrFrom & ~(uintptr_t)PAGE_OFFSET_MASK) + (cPages << PAGE_SHIFT) - 1;
418 uint8_t *pbPage = (uint8_t *)uPtrLast;
419 size_t iPage = cPages;
420
421 /*
422 * Touch the pages first (paranoia^2).
423 */
424 if (fWrite) {
425 uint8_t volatile *pbProbe = (uint8_t volatile *)uPtrFrom;
426 while (iPage-- > 0) {
427 *pbProbe = *pbProbe;
428 pbProbe += PAGE_SIZE;
429 }
430 } else {
431 uint8_t const *pbProbe = (uint8_t const *)uPtrFrom;
432 while (iPage-- > 0) {
433 ASMProbeReadByte(pbProbe);
434 pbProbe += PAGE_SIZE;
435 }
436 }
437
438 /*
439 * Get the pages.
440 * Note! Fixes here probably applies to rtR0MemObjNativeLockKernel as well.
441 */
442 iPage = cPages;
443 if ( uPtrFrom >= (unsigned long)__va(0)
444 && uPtrLast < (unsigned long)high_memory)
445 {
446 /* The physical page mapping area: */
447 while (iPage-- > 0)
448 {
449 struct page *pPage = papPages[iPage] = virt_to_page(pbPage);
450 vbsf_get_page(pPage);
451 pbPage -= PAGE_SIZE;
452 }
453 }
454 else
455 {
456 /* This is vmalloc or some such thing, so go thru page tables: */
457 while (iPage-- > 0)
458 {
459 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
460 if (pPage) {
461 papPages[iPage] = pPage;
462 vbsf_get_page(pPage);
463 pbPage -= PAGE_SIZE;
464 } else {
465 while (++iPage < cPages) {
466 pPage = papPages[iPage];
467 vbsf_put_page(pPage);
468 }
469 return rcFailed;
470 }
471 }
472 }
473 *pfLockPgHack = true;
474 return 0;
475 }
476
477 return rcFailed;
478}
479
480
481/** Wrapper around get_user_pages. */
482DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages, bool *pfLockPgHack)
483{
484# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
485 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
486 fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
487# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
488 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
489# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
490 ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
491# else
492 struct task_struct *pTask = current;
493 size_t cPagesLocked;
494 down_read(&pTask->mm->mmap_sem);
495 cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
496 up_read(&pTask->mm->mmap_sem);
497# endif
498 *pfLockPgHack = false;
499 if (cPagesLocked == cPages)
500 return 0;
501
502 /*
503 * It failed.
504 */
505 if (cPagesLocked < 0)
506 return vbsf_lock_user_pages_failed_check_kernel(uPtrFrom, cPages, fWrite, (int)cPagesLocked, papPages, pfLockPgHack);
507
508 vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/);
509
510 /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
511 return -EFAULT;
512}
513
514
515/**
516 * Read function used when accessing files that are memory mapped.
517 *
518 * We read from the page cache here to present the a cohertent picture of the
519 * the file content.
520 */
521static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
522{
523#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
524 struct iovec iov = { .iov_base = buf, .iov_len = size };
525 struct iov_iter iter;
526 struct kiocb kiocb;
527 ssize_t cbRet;
528
529 init_sync_kiocb(&kiocb, file);
530 kiocb.ki_pos = *off;
531 iov_iter_init(&iter, READ, &iov, 1, size);
532
533 cbRet = generic_file_read_iter(&kiocb, &iter);
534
535 *off = kiocb.ki_pos;
536 return cbRet;
537
538#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
539 struct iovec iov = { .iov_base = buf, .iov_len = size };
540 struct kiocb kiocb;
541 ssize_t cbRet;
542
543 init_sync_kiocb(&kiocb, file);
544 kiocb.ki_pos = *off;
545
546 cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
547 if (cbRet == -EIOCBQUEUED)
548 cbRet = wait_on_sync_kiocb(&kiocb);
549
550 *off = kiocb.ki_pos;
551 return cbRet;
552
553#else /* 2.6.18 or earlier: */
554 return generic_file_read(file, buf, size, off);
555#endif
556}
557
558
559/**
560 * Fallback case of vbsf_reg_read() that locks the user buffers and let the host
561 * write directly to them.
562 */
563static ssize_t vbsf_reg_read_locking(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
564 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
565{
566 /*
567 * Lock pages and execute the read, taking care not to pass the host
568 * more than it can handle in one go or more than we care to allocate
569 * page arrays for. The latter limit is set at just short of 32KB due
570 * to how the physical heap works.
571 */
572 struct page *apPagesStack[16];
573 struct page **papPages = &apPagesStack[0];
574 struct page **papPagesFree = NULL;
575 VBOXSFREADPGLSTREQ *pReq;
576 loff_t offFile = *off;
577 ssize_t cbRet = -ENOMEM;
578 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
579 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
580 bool fLockPgHack;
581
582 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
583 while (!pReq && cMaxPages > 4) {
584 cMaxPages /= 2;
585 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
586 }
587 if (pReq && cPages > RT_ELEMENTS(apPagesStack))
588 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
589 if (pReq && papPages) {
590 cbRet = 0;
591 for (;;) {
592 /*
593 * Figure out how much to process now and lock the user pages.
594 */
595 int rc;
596 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
597 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
598 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
599 if (cPages <= cMaxPages)
600 cbChunk = size;
601 else {
602 cPages = cMaxPages;
603 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
604 }
605
606 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages, &fLockPgHack);
607 if (rc == 0) {
608 size_t iPage = cPages;
609 while (iPage-- > 0)
610 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
611 } else {
612 /** @todo may need fallback here for kernel addresses during exec. sigh. */
613 cbRet = rc;
614 break;
615 }
616
617 /*
618 * Issue the request and unlock the pages.
619 */
620 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
621
622 vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/, fLockPgHack);
623
624 if (RT_SUCCESS(rc)) {
625 /*
626 * Success, advance position and buffer.
627 */
628 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
629 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
630 cbRet += cbActual;
631 offFile += cbActual;
632 buf = (uint8_t *)buf + cbActual;
633 size -= cbActual;
634
635 /*
636 * Are we done already? If so commit the new file offset.
637 */
638 if (!size || cbActual < cbChunk) {
639 *off = offFile;
640 break;
641 }
642 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
643 /*
644 * The host probably doesn't have enough heap to handle the
645 * request, reduce the page count and retry.
646 */
647 cMaxPages /= 4;
648 Assert(cMaxPages > 0);
649 } else {
650 /*
651 * If we've successfully read stuff, return it rather than
652 * the error. (Not sure if this is such a great idea...)
653 */
654 if (cbRet > 0)
655 *off = offFile;
656 else
657 cbRet = -EPROTO;
658 break;
659 }
660 }
661 }
662 if (papPagesFree)
663 kfree(papPages);
664 if (pReq)
665 VbglR0PhysHeapFree(pReq);
666 return cbRet;
667}
668
669
670/**
671 * Read from a regular file.
672 *
673 * @param file the file
674 * @param buf the buffer
675 * @param size length of the buffer
676 * @param off offset within the file (in/out).
677 * @returns the number of read bytes on success, Linux error code otherwise
678 */
679static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
680{
681 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
682 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
683 struct vbsf_reg_info *sf_r = file->private_data;
684 struct address_space *mapping = inode->i_mapping;
685
686 SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
687
688 if (!S_ISREG(inode->i_mode)) {
689 LogFunc(("read from non regular file %d\n", inode->i_mode));
690 return -EINVAL;
691 }
692
693 /** @todo XXX Check read permission according to inode->i_mode! */
694
695 if (!size)
696 return 0;
697
698 /*
699 * If there is a mapping and O_DIRECT isn't in effect, we must at a
700 * heed dirty pages in the mapping and read from them. For simplicity
701 * though, we just do page cache reading when there are writable
702 * mappings around with any kind of pages loaded.
703 */
704 if ( mapping
705 && mapping->nrpages > 0
706 && mapping_writably_mapped(mapping)
707 && !(file->f_flags & O_DIRECT)
708 && 1 /** @todo make this behaviour configurable */ )
709 return vbsf_reg_read_mapped(file, buf, size, off);
710
711 /*
712 * For small requests, try use an embedded buffer provided we get a heap block
713 * that does not cross page boundraries (see host code).
714 */
715 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
716 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
717 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
718 if ( pReq
719 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
720 ssize_t cbRet;
721 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
722 if (RT_SUCCESS(vrc)) {
723 cbRet = pReq->Parms.cb32Read.u.value32;
724 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
725 if (copy_to_user(buf, pReq->abData, cbRet) == 0)
726 *off += cbRet;
727 else
728 cbRet = -EFAULT;
729 } else
730 cbRet = -EPROTO;
731 VbglR0PhysHeapFree(pReq);
732 return cbRet;
733 }
734 if (pReq)
735 VbglR0PhysHeapFree(pReq);
736 }
737
738#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
739 /*
740 * For medium sized requests try use a bounce buffer.
741 */
742 if (size <= _64K /** @todo make this configurable? */) {
743 void *pvBounce = kmalloc(size, GFP_KERNEL);
744 if (pvBounce) {
745 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
746 if (pReq) {
747 ssize_t cbRet;
748 int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
749 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
750 if (RT_SUCCESS(vrc)) {
751 cbRet = pReq->Parms.cb32Read.u.value32;
752 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
753 if (copy_to_user(buf, pvBounce, cbRet) == 0)
754 *off += cbRet;
755 else
756 cbRet = -EFAULT;
757 } else
758 cbRet = -EPROTO;
759 VbglR0PhysHeapFree(pReq);
760 kfree(pvBounce);
761 return cbRet;
762 }
763 kfree(pvBounce);
764 }
765 }
766#endif
767
768 return vbsf_reg_read_locking(file, buf, size, off, sf_g, sf_r);
769}
770
771
772/**
773 * Wrapper around invalidate_mapping_pages() for page cache invalidation so that
774 * the changes written via vbsf_reg_write are made visible to mmap users.
775 */
776DECLINLINE(void) vbsf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
777{
778 /*
779 * Only bother with this if the mapping has any pages in it.
780 *
781 * Note! According to the docs, the last parameter, end, is inclusive (we
782 * would have named it 'last' to indicate this).
783 *
784 * Note! The pre-2.6.12 function might not do enough to sure consistency
785 * when any of the pages in the range is already mapped.
786 */
787# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
788 if (mapping)
789 invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
790# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60)
791 if (mapping && mapping->nrpages > 0)
792 invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
793# else
794 /** @todo ... */
795 RT_NOREF(mapping, offStart, offEnd);
796# endif
797}
798
799
800/**
801 * Fallback case of vbsf_reg_write() that locks the user buffers and let the host
802 * write directly to them.
803 */
804static ssize_t vbsf_reg_write_locking(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
805 struct inode *inode, struct vbsf_inode_info *sf_i,
806 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
807{
808 /*
809 * Lock pages and execute the write, taking care not to pass the host
810 * more than it can handle in one go or more than we care to allocate
811 * page arrays for. The latter limit is set at just short of 32KB due
812 * to how the physical heap works.
813 */
814 struct page *apPagesStack[16];
815 struct page **papPages = &apPagesStack[0];
816 struct page **papPagesFree = NULL;
817 VBOXSFWRITEPGLSTREQ *pReq;
818 ssize_t cbRet = -ENOMEM;
819 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
820 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
821 bool fLockPgHack;
822
823 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
824 while (!pReq && cMaxPages > 4) {
825 cMaxPages /= 2;
826 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
827 }
828 if (pReq && cPages > RT_ELEMENTS(apPagesStack))
829 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
830 if (pReq && papPages) {
831 cbRet = 0;
832 for (;;) {
833 /*
834 * Figure out how much to process now and lock the user pages.
835 */
836 int rc;
837 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
838 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
839 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
840 if (cPages <= cMaxPages)
841 cbChunk = size;
842 else {
843 cPages = cMaxPages;
844 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
845 }
846
847 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages, &fLockPgHack);
848 if (rc == 0) {
849 size_t iPage = cPages;
850 while (iPage-- > 0)
851 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
852 } else {
853 cbRet = rc;
854 break;
855 }
856
857 /*
858 * Issue the request and unlock the pages.
859 */
860 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
861
862 vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack);
863
864 if (RT_SUCCESS(rc)) {
865 /*
866 * Success, advance position and buffer.
867 */
868 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
869 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
870 cbRet += cbActual;
871 offFile += cbActual;
872 buf = (uint8_t *)buf + cbActual;
873 size -= cbActual;
874 if (offFile > i_size_read(inode))
875 i_size_write(inode, offFile);
876 vbsf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
877
878 /*
879 * Are we done already? If so commit the new file offset.
880 */
881 if (!size || cbActual < cbChunk) {
882 *off = offFile;
883 break;
884 }
885 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
886 /*
887 * The host probably doesn't have enough heap to handle the
888 * request, reduce the page count and retry.
889 */
890 cMaxPages /= 4;
891 Assert(cMaxPages > 0);
892 } else {
893 /*
894 * If we've successfully written stuff, return it rather than
895 * the error. (Not sure if this is such a great idea...)
896 */
897 if (cbRet > 0)
898 *off = offFile;
899 else
900 cbRet = -EPROTO;
901 break;
902 }
903 sf_i->force_restat = 1; /* mtime (and size) may have changed */
904 }
905 }
906 if (papPagesFree)
907 kfree(papPages);
908 if (pReq)
909 VbglR0PhysHeapFree(pReq);
910 return cbRet;
911}
912
913
914/**
915 * Write to a regular file.
916 *
917 * @param file the file
918 * @param buf the buffer
919 * @param size length of the buffer
920 * @param off offset within the file
921 * @returns the number of written bytes on success, Linux error code otherwise
922 */
923static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off)
924{
925 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
926 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
927 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
928 struct vbsf_reg_info *sf_r = file->private_data;
929 struct address_space *mapping = inode->i_mapping;
930 loff_t pos;
931
932 SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
933 BUG_ON(!sf_i);
934 BUG_ON(!sf_g);
935 BUG_ON(!sf_r);
936
937 if (!S_ISREG(inode->i_mode)) {
938 LogFunc(("write to non regular file %d\n", inode->i_mode));
939 return -EINVAL;
940 }
941
942 pos = *off;
943 /** @todo This should be handled by the host, it returning the new file
944 * offset when appending. We may have an outdated i_size value here! */
945 if (file->f_flags & O_APPEND)
946 pos = i_size_read(inode);
947
948 /** @todo XXX Check write permission according to inode->i_mode! */
949
950 if (!size) {
951 if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
952 *off = pos;
953 return 0;
954 }
955
956 /*
957 * If there are active writable mappings, coordinate with any
958 * pending writes via those.
959 */
960 if ( mapping
961 && mapping->nrpages > 0
962 && mapping_writably_mapped(mapping)) {
963#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
964 int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
965 if (err)
966 return err;
967#else
968 /** @todo ... */
969#endif
970 }
971
972 /*
973 * For small requests, try use an embedded buffer provided we get a heap block
974 * that does not cross page boundraries (see host code).
975 */
976 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
977 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
978 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
979 if ( pReq
980 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
981 ssize_t cbRet;
982 if (copy_from_user(pReq->abData, buf, size) == 0) {
983 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
984 pos, (uint32_t)size);
985 if (RT_SUCCESS(vrc)) {
986 cbRet = pReq->Parms.cb32Write.u.value32;
987 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
988 pos += cbRet;
989 *off = pos;
990 if (pos > i_size_read(inode))
991 i_size_write(inode, pos);
992 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
993 } else
994 cbRet = -EPROTO;
995 sf_i->force_restat = 1; /* mtime (and size) may have changed */
996 } else
997 cbRet = -EFAULT;
998
999 VbglR0PhysHeapFree(pReq);
1000 return cbRet;
1001 }
1002 if (pReq)
1003 VbglR0PhysHeapFree(pReq);
1004 }
1005
1006#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
1007 /*
1008 * For medium sized requests try use a bounce buffer.
1009 */
1010 if (size <= _64K /** @todo make this configurable? */) {
1011 void *pvBounce = kmalloc(size, GFP_KERNEL);
1012 if (pvBounce) {
1013 if (copy_from_user(pvBounce, buf, size) == 0) {
1014 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1015 if (pReq) {
1016 ssize_t cbRet;
1017 int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
1018 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
1019 if (RT_SUCCESS(vrc)) {
1020 cbRet = pReq->Parms.cb32Write.u.value32;
1021 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1022 pos += cbRet;
1023 *off = pos;
1024 if (pos > i_size_read(inode))
1025 i_size_write(inode, pos);
1026 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
1027 } else
1028 cbRet = -EPROTO;
1029 sf_i->force_restat = 1; /* mtime (and size) may have changed */
1030 VbglR0PhysHeapFree(pReq);
1031 kfree(pvBounce);
1032 return cbRet;
1033 }
1034 kfree(pvBounce);
1035 } else {
1036 kfree(pvBounce);
1037 return -EFAULT;
1038 }
1039 }
1040 }
1041#endif
1042
1043 return vbsf_reg_write_locking(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
1044}
1045
1046
1047/**
1048 * Open a regular file.
1049 *
1050 * @param inode the inode
1051 * @param file the file
1052 * @returns 0 on success, Linux error code otherwise
1053 */
1054static int vbsf_reg_open(struct inode *inode, struct file *file)
1055{
1056 int rc, rc_linux = 0;
1057 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1058 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1059 struct vbsf_reg_info *sf_r;
1060 struct dentry *dentry = VBSF_GET_F_DENTRY(file);
1061 VBOXSFCREATEREQ *pReq;
1062
1063 SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
1064 BUG_ON(!sf_g);
1065 BUG_ON(!sf_i);
1066
1067 sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
1068 if (!sf_r) {
1069 LogRelFunc(("could not allocate reg info\n"));
1070 return -ENOMEM;
1071 }
1072
1073 RTListInit(&sf_r->Handle.Entry);
1074 sf_r->Handle.cRefs = 1;
1075 sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC;
1076 sf_r->Handle.hHost = SHFL_HANDLE_NIL;
1077
1078 /* Already open? */
1079 if (sf_i->handle != SHFL_HANDLE_NIL) {
1080 /*
1081 * This inode was created with vbsf_create_worker(). Check the CreateFlags:
1082 * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
1083 * about the access flags (SHFL_CF_ACCESS_*).
1084 */
1085 sf_i->force_restat = 1;
1086 sf_r->Handle.hHost = sf_i->handle;
1087 sf_i->handle = SHFL_HANDLE_NIL;
1088 file->private_data = sf_r;
1089
1090 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; /** @todo fix */
1091 vbsf_handle_append(sf_i, &sf_r->Handle);
1092 SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1093 return 0;
1094 }
1095
1096 pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
1097 if (!pReq) {
1098 kfree(sf_r);
1099 LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
1100 return -ENOMEM;
1101 }
1102 memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
1103 RT_ZERO(pReq->CreateParms);
1104 pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
1105
1106 /* We check the value of pReq->CreateParms.Handle afterwards to
1107 * find out if the call succeeded or failed, as the API does not seem
1108 * to cleanly distinguish error and informational messages.
1109 *
1110 * Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL
1111 * to make the shared folders host service use our fMode parameter */
1112
1113 if (file->f_flags & O_CREAT) {
1114 LogFunc(("O_CREAT set\n"));
1115 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
1116 /* We ignore O_EXCL, as the Linux kernel seems to call create
1117 beforehand itself, so O_EXCL should always fail. */
1118 if (file->f_flags & O_TRUNC) {
1119 LogFunc(("O_TRUNC set\n"));
1120 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1121 } else
1122 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
1123 } else {
1124 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
1125 if (file->f_flags & O_TRUNC) {
1126 LogFunc(("O_TRUNC set\n"));
1127 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1128 }
1129 }
1130
1131 switch (file->f_flags & O_ACCMODE) {
1132 case O_RDONLY:
1133 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_READ;
1134 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ;
1135 break;
1136
1137 case O_WRONLY:
1138 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_WRITE;
1139 sf_r->Handle.fFlags |= VBSF_HANDLE_F_WRITE;
1140 break;
1141
1142 case O_RDWR:
1143 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_READWRITE;
1144 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE;
1145 break;
1146
1147 default:
1148 BUG();
1149 }
1150
1151 if (file->f_flags & O_APPEND) {
1152 LogFunc(("O_APPEND set\n"));
1153 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_APPEND;
1154 sf_r->Handle.fFlags |= VBSF_HANDLE_F_APPEND;
1155 }
1156
1157 pReq->CreateParms.Info.Attr.fMode = inode->i_mode;
1158 LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n",
1159 sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags));
1160 rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
1161 if (RT_FAILURE(rc)) {
1162 LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc));
1163 kfree(sf_r);
1164 VbglR0PhysHeapFree(pReq);
1165 return -RTErrConvertToErrno(rc);
1166 }
1167
1168 if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) {
1169 vbsf_dentry_chain_increase_ttl(dentry);
1170 rc_linux = 0;
1171 } else {
1172 switch (pReq->CreateParms.Result) {
1173 case SHFL_PATH_NOT_FOUND:
1174 rc_linux = -ENOENT;
1175 break;
1176 case SHFL_FILE_NOT_FOUND:
1177 /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
1178 rc_linux = -ENOENT;
1179 break;
1180 case SHFL_FILE_EXISTS:
1181 vbsf_dentry_chain_increase_ttl(dentry);
1182 rc_linux = -EEXIST;
1183 break;
1184 default:
1185 vbsf_dentry_chain_increase_parent_ttl(dentry);
1186 rc_linux = 0;
1187 break;
1188 }
1189 }
1190
1191 sf_i->force_restat = 1; /** @todo Why?!? */
1192 sf_r->Handle.hHost = pReq->CreateParms.Handle;
1193 file->private_data = sf_r;
1194 vbsf_handle_append(sf_i, &sf_r->Handle);
1195 VbglR0PhysHeapFree(pReq);
1196 SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1197 return rc_linux;
1198}
1199
1200
1201/**
1202 * Close a regular file.
1203 *
1204 * @param inode the inode
1205 * @param file the file
1206 * @returns 0 on success, Linux error code otherwise
1207 */
1208static int vbsf_reg_release(struct inode *inode, struct file *file)
1209{
1210 struct vbsf_reg_info *sf_r;
1211 struct vbsf_super_info *sf_g;
1212 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1213
1214 SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
1215 sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1216 sf_r = file->private_data;
1217
1218 BUG_ON(!sf_g);
1219 BUG_ON(!sf_r);
1220
1221#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
1222 /* See the smbfs source (file.c). mmap in particular can cause data to be
1223 * written to the file after it is closed, which we can't cope with. We
1224 * copy and paste the body of filemap_write_and_wait() here as it was not
1225 * defined before 2.6.6 and not exported until quite a bit later. */
1226 /* filemap_write_and_wait(inode->i_mapping); */
1227 if (inode->i_mapping->nrpages
1228 && filemap_fdatawrite(inode->i_mapping) != -EIO)
1229 filemap_fdatawait(inode->i_mapping);
1230#endif
1231
1232 /* Release sf_r, closing the handle if we're the last user. */
1233 file->private_data = NULL;
1234 vbsf_handle_release(&sf_r->Handle, sf_g, "vbsf_reg_release");
1235
1236 sf_i->handle = SHFL_HANDLE_NIL;
1237 return 0;
1238}
1239
1240/**
1241 * Wrapper around generic/default seek function that ensures that we've got
1242 * the up-to-date file size when doing anything relative to EOF.
1243 *
1244 * The issue is that the host may extend the file while we weren't looking and
1245 * if the caller wishes to append data, it may end up overwriting existing data
1246 * if we operate with a stale size. So, we always retrieve the file size on EOF
1247 * relative seeks.
1248 */
1249static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
1250{
1251 SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
1252
1253 switch (whence) {
1254#ifdef SEEK_HOLE
1255 case SEEK_HOLE:
1256 case SEEK_DATA:
1257#endif
1258 case SEEK_END: {
1259 struct vbsf_reg_info *sf_r = file->private_data;
1260 int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost, true /*fForce*/,
1261 false /*fInodeLocked*/);
1262 if (rc == 0)
1263 break;
1264 return rc;
1265 }
1266 }
1267
1268#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
1269 return generic_file_llseek(file, off, whence);
1270#else
1271 return default_llseek(file, off, whence);
1272#endif
1273}
1274
1275/**
1276 * Flush region of file - chiefly mmap/msync.
1277 *
1278 * We cannot use the noop_fsync / simple_sync_file here as that means
1279 * msync(,,MS_SYNC) will return before the data hits the host, thereby
1280 * causing coherency issues with O_DIRECT access to the same file as
1281 * well as any host interaction with the file.
1282 */
1283#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
1284static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1285{
1286# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1287 return __generic_file_fsync(file, start, end, datasync);
1288# else
1289 return generic_file_fsync(file, start, end, datasync);
1290# endif
1291}
1292#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
1293static int vbsf_reg_fsync(struct file *file, int datasync)
1294{
1295 return generic_file_fsync(file, datasync);
1296}
1297#else /* < 2.6.35 */
1298static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
1299{
1300# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
1301 return simple_fsync(file, dentry, datasync);
1302# else
1303 int rc;
1304 struct inode *inode = dentry->d_inode;
1305 AssertReturn(inode, -EINVAL);
1306
1307 /** @todo What about file_fsync()? (<= 2.5.11) */
1308
1309# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1310 rc = sync_mapping_buffers(inode->i_mapping);
1311 if ( rc == 0
1312 && (inode->i_state & I_DIRTY)
1313 && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
1314 ) {
1315 struct writeback_control wbc = {
1316 .sync_mode = WB_SYNC_ALL,
1317 .nr_to_write = 0
1318 };
1319 rc = sync_inode(inode, &wbc);
1320 }
1321# else /* < 2.5.12 */
1322 rc = fsync_inode_buffers(inode);
1323# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1324 rc |= fsync_inode_data_buffers(inode);
1325# endif
1326 /** @todo probably need to do more here... */
1327# endif /* < 2.5.12 */
1328 return rc;
1329# endif
1330}
1331#endif /* < 2.6.35 */
1332
1333
1334/**
1335 * File operations for regular files.
1336 */
1337struct file_operations vbsf_reg_fops = {
1338 .read = vbsf_reg_read,
1339 .open = vbsf_reg_open,
1340 .write = vbsf_reg_write,
1341 .release = vbsf_reg_release,
1342 .mmap = generic_file_mmap,
1343#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1344# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
1345/** @todo This code is known to cause caching of data which should not be
1346 * cached. Investigate. */
1347# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
1348 .splice_read = vbsf_splice_read,
1349# else
1350 .sendfile = generic_file_sendfile,
1351# endif
1352 .aio_read = generic_file_aio_read,
1353 .aio_write = generic_file_aio_write,
1354# endif
1355#endif
1356 .llseek = vbsf_reg_llseek,
1357 .fsync = vbsf_reg_fsync,
1358};
1359
1360struct inode_operations vbsf_reg_iops = {
1361#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
1362 .revalidate = vbsf_inode_revalidate
1363#else
1364 .getattr = vbsf_inode_getattr,
1365 .setattr = vbsf_inode_setattr
1366#endif
1367};
1368
1369
1370#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1371
1372/**
1373 * Used to read the content of a page into the page cache.
1374 *
1375 * Needed for mmap and reads+writes when the file is mmapped in a
1376 * shared+writeable fashion.
1377 */
1378static int vbsf_readpage(struct file *file, struct page *page)
1379{
1380 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
1381 int err;
1382
1383 SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
1384 Assert(PageLocked(page));
1385
1386 if (PageUptodate(page)) {
1387 unlock_page(page);
1388 return 0;
1389 }
1390
1391 if (!is_bad_inode(inode)) {
1392 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1393 if (pReq) {
1394 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1395 struct vbsf_reg_info *sf_r = file->private_data;
1396 uint32_t cbRead;
1397 int vrc;
1398
1399 pReq->PgLst.offFirstPage = 0;
1400 pReq->PgLst.aPages[0] = page_to_phys(page);
1401 vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
1402 pReq,
1403 sf_r->Handle.hHost,
1404 (uint64_t)page->index << PAGE_SHIFT,
1405 PAGE_SIZE,
1406 1 /*cPages*/);
1407
1408 cbRead = pReq->Parms.cb32Read.u.value32;
1409 AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
1410 VbglR0PhysHeapFree(pReq);
1411
1412 if (RT_SUCCESS(vrc)) {
1413 if (cbRead == PAGE_SIZE) {
1414 /* likely */
1415 } else {
1416 uint8_t *pbMapped = (uint8_t *)kmap(page);
1417 RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
1418 kunmap(page);
1419 /** @todo truncate the inode file size? */
1420 }
1421
1422 flush_dcache_page(page);
1423 SetPageUptodate(page);
1424 unlock_page(page);
1425 return 0;
1426 }
1427 err = -RTErrConvertToErrno(vrc);
1428 } else
1429 err = -ENOMEM;
1430 } else
1431 err = -EIO;
1432 SetPageError(page);
1433 unlock_page(page);
1434 return err;
1435}
1436
1437
1438/**
1439 * Used to write out the content of a dirty page cache page to the host file.
1440 *
1441 * Needed for mmap and writes when the file is mmapped in a shared+writeable
1442 * fashion.
1443 */
1444static int vbsf_writepage(struct page *page, struct writeback_control *wbc)
1445{
1446 struct address_space *mapping = page->mapping;
1447 struct inode *inode = mapping->host;
1448 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1449 struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND);
1450 int err;
1451
1452 SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
1453 inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
1454
1455 if (pHandle) {
1456 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1457 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1458 if (pReq) {
1459 uint64_t const cbFile = i_size_read(inode);
1460 uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
1461 uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
1462 : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
1463 int vrc;
1464
1465 pReq->PgLst.offFirstPage = 0;
1466 pReq->PgLst.aPages[0] = page_to_phys(page);
1467 vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
1468 pReq,
1469 pHandle->hHost,
1470 offInFile,
1471 cbToWrite,
1472 1 /*cPages*/);
1473 AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
1474 ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
1475 vrc = VERR_WRITE_ERROR);
1476 VbglR0PhysHeapFree(pReq);
1477
1478 if (RT_SUCCESS(vrc)) {
1479 /* Update the inode if we've extended the file. */
1480 /** @todo is this necessary given the cbToWrite calc above? */
1481 uint64_t const offEndOfWrite = offInFile + cbToWrite;
1482 if ( offEndOfWrite > cbFile
1483 && offEndOfWrite > i_size_read(inode))
1484 i_size_write(inode, offEndOfWrite);
1485
1486 if (PageError(page))
1487 ClearPageError(page);
1488
1489 err = 0;
1490 } else {
1491 ClearPageUptodate(page);
1492 err = -EPROTO;
1493 }
1494 } else
1495 err = -ENOMEM;
1496 vbsf_handle_release(pHandle, sf_g, "vbsf_writepage");
1497 } else {
1498 static uint64_t volatile s_cCalls = 0;
1499 if (s_cCalls++ < 16)
1500 printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
1501 err = -EPROTO;
1502 }
1503 unlock_page(page);
1504 return err;
1505}
1506
1507# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1508/**
1509 * Called when writing thru the page cache (which we shouldn't be doing).
1510 */
1511int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
1512 unsigned len, unsigned flags, struct page **pagep, void **fsdata)
1513{
1514 /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
1515 * the page cache for any writes AFAIK. We could just as well use
1516 * simple_write_begin & simple_write_end here if we think we really
1517 * need to have non-NULL function pointers in the table... */
1518 static uint64_t volatile s_cCalls = 0;
1519 if (s_cCalls++ < 16) {
1520 printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1521 (unsigned long long)pos, len, flags);
1522 RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1523 (unsigned long long)pos, len, flags);
1524# ifdef WARN_ON
1525 WARN_ON(1);
1526# endif
1527 }
1528 return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
1529}
1530# endif /* KERNEL_VERSION >= 2.6.24 */
1531
1532# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1533/**
1534 * This is needed to make open accept O_DIRECT as well as dealing with direct
1535 * I/O requests if we don't intercept them earlier.
1536 */
1537# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
1538static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1539# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1540static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
1541# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1542static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
1543# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
1544static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1545# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
1546static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1547# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
1548static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1549# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
1550static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1551# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
1552static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
1553# else
1554static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *, unsigned long, int)
1555# endif
1556{
1557 TRACE();
1558 return -EINVAL;
1559}
1560# endif
1561
1562/**
1563 * Address space (for the page cache) operations for regular files.
1564 */
1565struct address_space_operations vbsf_reg_aops = {
1566 .readpage = vbsf_readpage,
1567 .writepage = vbsf_writepage,
1568 /** @todo Need .writepages if we want msync performance... */
1569# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1570 .set_page_dirty = __set_page_dirty_buffers,
1571# endif
1572# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1573 .write_begin = vbsf_write_begin,
1574 .write_end = simple_write_end,
1575# else
1576 .prepare_write = simple_prepare_write,
1577 .commit_write = simple_commit_write,
1578# endif
1579# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1580 .direct_IO = vbsf_direct_IO,
1581# endif
1582};
1583
1584#endif /* LINUX_VERSION_CODE >= 2.6.0 */
1585
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette