VirtualBox

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77526

Last change on this file since 77526 was 77526, checked in by vboxsync, 6 years ago

linux/vboxsf: We don't use tabs and our indent size is 4 not 8. Makes this a heck lot easier to maintain. bugref:9172

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 51.7 KB
Line 
1/* $Id: regops.c 77526 2019-03-01 12:15:29Z vboxsync $ */
2/** @file
3 * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
16 * conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 */
30
31#include "vfsmod.h"
32#include <linux/uio.h>
33#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
34# include <linux/aio.h> /* struct kiocb before 4.1 */
35#endif
36#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
37# include <linux/buffer_head.h>
38#endif
39#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
40 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41# include <linux/writeback.h>
42#endif
43#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
44 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
45# include <linux/splice.h>
46#endif
47#include <iprt/err.h>
48
49#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
50# define SEEK_END 2
51#endif
52
53
54/**
55 * Called when an inode is released to unlink all handles that might impossibly
56 * still be associated with it.
57 *
58 * @param pInodeInfo The inode which handles to drop.
59 */
60void sf_handle_drop_chain(struct sf_inode_info *pInodeInfo)
61{
62 struct sf_handle *pCur, *pNext;
63 unsigned long fSavedFlags;
64 SFLOGFLOW(("sf_handle_drop_chain: %p\n", pInodeInfo));
65 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
66
67 RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct sf_handle, Entry) {
68 AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC | SF_HANDLE_F_ON_LIST),
69 ("%p %#x\n", pCur, pCur->fFlags));
70 pCur->fFlags |= SF_HANDLE_F_ON_LIST;
71 RTListNodeRemove(&pCur->Entry);
72 }
73
74 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
75}
76
77
78/**
79 * Locates a handle that matches all the flags in @a fFlags.
80 *
81 * @returns Pointer to handle on success (retained), use sf_handle_release() to
82 * release it. NULL if no suitable handle was found.
83 * @param pInodeInfo The inode info to search.
84 * @param fFlagsSet The flags that must be set.
85 * @param fFlagsClear The flags that must be clear.
86 */
87struct sf_handle *sf_handle_find(struct sf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
88{
89 struct sf_handle *pCur;
90 unsigned long fSavedFlags;
91 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
92
93 RTListForEach(&pInodeInfo->HandleList, pCur, struct sf_handle, Entry) {
94 AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC | SF_HANDLE_F_ON_LIST),
95 ("%p %#x\n", pCur, pCur->fFlags));
96 if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
97 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
98 if (cRefs > 1) {
99 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
100 SFLOGFLOW(("sf_handle_find: returns %p\n", pCur));
101 return pCur;
102 }
103 /* Oops, already being closed (safe as it's only ever increased here). */
104 ASMAtomicDecU32(&pCur->cRefs);
105 }
106 }
107
108 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
109 SFLOGFLOW(("sf_handle_find: returns NULL!\n"));
110 return NULL;
111}
112
113
114/**
115 * Slow worker for sf_handle_release() that does the freeing.
116 *
117 * @returns 0 (ref count).
118 * @param pHandle The handle to release.
119 * @param sf_g The info structure for the shared folder associated
120 * with the handle.
121 * @param pszCaller The caller name (for logging failures).
122 */
123uint32_t sf_handle_release_slow(struct sf_handle *pHandle, struct vbsf_super_info *sf_g, const char *pszCaller)
124{
125 int rc;
126 unsigned long fSavedFlags;
127
128 SFLOGFLOW(("sf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
129
130 /*
131 * Remove from the list.
132 */
133 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
134
135 AssertMsg((pHandle->fFlags & SF_HANDLE_F_MAGIC_MASK) == SF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
136 Assert(pHandle->pInodeInfo);
137 Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
138
139 if (pHandle->fFlags & SF_HANDLE_F_ON_LIST) {
140 pHandle->fFlags &= ~SF_HANDLE_F_ON_LIST;
141 RTListNodeRemove(&pHandle->Entry);
142 }
143
144 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
145
146 /*
147 * Actually destroy it.
148 */
149 rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
150 if (RT_FAILURE(rc))
151 LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
152 pHandle->hHost = SHFL_HANDLE_NIL;
153 pHandle->fFlags = SF_HANDLE_F_MAGIC_DEAD;
154 kfree(pHandle);
155 return 0;
156}
157
158
159/**
160 * Appends a handle to a handle list.
161 *
162 * @param pInodeInfo The inode to add it to.
163 * @param pHandle The handle to add.
164 */
165void sf_handle_append(struct sf_inode_info *pInodeInfo, struct sf_handle *pHandle)
166{
167#ifdef VBOX_STRICT
168 struct sf_handle *pCur;
169#endif
170 unsigned long fSavedFlags;
171
172 SFLOGFLOW(("sf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
173 AssertMsg((pHandle->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == SF_HANDLE_F_MAGIC,
174 ("%p %#x\n", pHandle, pHandle->fFlags));
175 Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
176
177 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
178
179 AssertMsg((pHandle->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == SF_HANDLE_F_MAGIC,
180 ("%p %#x\n", pHandle, pHandle->fFlags));
181#ifdef VBOX_STRICT
182 RTListForEach(&pInodeInfo->HandleList, pCur, struct sf_handle, Entry) {
183 Assert(pCur != pHandle);
184 AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK | SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC | SF_HANDLE_F_ON_LIST),
185 ("%p %#x\n", pCur, pCur->fFlags));
186 }
187 pHandle->pInodeInfo = pInodeInfo;
188#endif
189
190 pHandle->fFlags |= SF_HANDLE_F_ON_LIST;
191 RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
192
193 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
194}
195
196
197#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
198 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
199
200void free_pipebuf(struct page *kpage)
201{
202 kunmap(kpage);
203 __free_pages(kpage, 0);
204}
205
206void *sf_pipe_buf_map(struct pipe_inode_info *pipe,
207 struct pipe_buffer *pipe_buf, int atomic)
208{
209 return 0;
210}
211
212void sf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
213{
214}
215
216void sf_pipe_buf_unmap(struct pipe_inode_info *pipe,
217 struct pipe_buffer *pipe_buf, void *map_data)
218{
219}
220
221int sf_pipe_buf_steal(struct pipe_inode_info *pipe,
222 struct pipe_buffer *pipe_buf)
223{
224 return 0;
225}
226
227static void sf_pipe_buf_release(struct pipe_inode_info *pipe,
228 struct pipe_buffer *pipe_buf)
229{
230 free_pipebuf(pipe_buf->page);
231}
232
233int sf_pipe_buf_confirm(struct pipe_inode_info *info,
234 struct pipe_buffer *pipe_buf)
235{
236 return 0;
237}
238
239static struct pipe_buf_operations sf_pipe_buf_ops = {
240 .can_merge = 0,
241 .map = sf_pipe_buf_map,
242 .unmap = sf_pipe_buf_unmap,
243 .confirm = sf_pipe_buf_confirm,
244 .release = sf_pipe_buf_release,
245 .steal = sf_pipe_buf_steal,
246 .get = sf_pipe_buf_get,
247};
248
249static int sf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g,
250 struct sf_reg_info *sf_r, void *buf,
251 uint32_t * nread, uint64_t pos)
252{
253 int rc = VbglR0SfRead(&client_handle, &sf_g->map, sf_r->Handle.hHost,
254 pos, nread, buf, false /* already locked? */ );
255 if (RT_FAILURE(rc)) {
256 LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
257 rc));
258 return -EPROTO;
259 }
260 return 0;
261}
262
263# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
264# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
265
266ssize_t
267sf_splice_read(struct file *in, loff_t * poffset,
268 struct pipe_inode_info *pipe, size_t len, unsigned int flags)
269{
270 size_t bytes_remaining = len;
271 loff_t orig_offset = *poffset;
272 loff_t offset = orig_offset;
273 struct inode *inode = GET_F_DENTRY(in)->d_inode;
274 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
275 struct sf_reg_info *sf_r = in->private_data;
276 ssize_t retval;
277 struct page *kpage = 0;
278 size_t nsent = 0;
279
280/** @todo rig up a FsPerf test for this code */
281 TRACE();
282 if (!S_ISREG(inode->i_mode)) {
283 LogFunc(("read from non regular file %d\n", inode->i_mode));
284 return -EINVAL;
285 }
286 if (!len) {
287 return 0;
288 }
289
290 LOCK_PIPE(pipe);
291
292 uint32_t req_size = 0;
293 while (bytes_remaining > 0) {
294 kpage = alloc_page(GFP_KERNEL);
295 if (unlikely(kpage == NULL)) {
296 UNLOCK_PIPE(pipe);
297 return -ENOMEM;
298 }
299 req_size = 0;
300 uint32_t nread = req_size =
301 (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
302 uint32_t chunk = 0;
303 void *kbuf = kmap(kpage);
304 while (chunk < req_size) {
305 retval =
306 sf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk,
307 &nread, offset);
308 if (retval < 0)
309 goto err;
310 if (nread == 0)
311 break;
312 chunk += nread;
313 offset += nread;
314 nread = req_size - chunk;
315 }
316 if (!pipe->readers) {
317 send_sig(SIGPIPE, current, 0);
318 retval = -EPIPE;
319 goto err;
320 }
321 if (pipe->nrbufs < PIPE_BUFFERS) {
322 struct pipe_buffer *pipebuf =
323 pipe->bufs +
324 ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS -
325 1));
326 pipebuf->page = kpage;
327 pipebuf->ops = &sf_pipe_buf_ops;
328 pipebuf->len = req_size;
329 pipebuf->offset = 0;
330 pipebuf->private = 0;
331 pipebuf->flags = 0;
332 pipe->nrbufs++;
333 nsent += req_size;
334 bytes_remaining -= req_size;
335 if (signal_pending(current))
336 break;
337 } else { /* pipe full */
338
339 if (flags & SPLICE_F_NONBLOCK) {
340 retval = -EAGAIN;
341 goto err;
342 }
343 free_pipebuf(kpage);
344 break;
345 }
346 }
347 UNLOCK_PIPE(pipe);
348 if (!nsent && signal_pending(current))
349 return -ERESTARTSYS;
350 *poffset += nsent;
351 return offset - orig_offset;
352
353 err:
354 UNLOCK_PIPE(pipe);
355 free_pipebuf(kpage);
356 return retval;
357}
358
359#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
360
361
362/** Companion to sf_lock_user_pages(). */
363DECLINLINE(void) sf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty)
364{
365 while (cPages-- > 0)
366 {
367 struct page *pPage = papPages[cPages];
368 if (fSetDirty && !PageReserved(pPage))
369 SetPageDirty(pPage);
370#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
371 put_page(pPage);
372#else
373 page_cache_release(pPage);
374#endif
375 }
376}
377
378
379/** Wrapper around get_user_pages. */
380DECLINLINE(int) sf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages)
381{
382# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
383 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
384 fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
385# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
386 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
387# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
388 ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages,
389 fWrite, 1 /*force*/, papPages);
390# else
391 struct task_struct *pTask = current;
392 size_t cPagesLocked;
393 down_read(&pTask->mm->mmap_sem);
394 cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
395 up_read(&pTask->mm->mmap_sem);
396# endif
397 if (cPagesLocked == cPages)
398 return 0;
399 if (cPagesLocked < 0)
400 return cPagesLocked;
401
402 sf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/);
403
404 /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
405 return -EFAULT;
406}
407
408
409/**
410 * Read function used when accessing files that are memory mapped.
411 *
412 * We read from the page cache here to present the a cohertent picture of the
413 * the file content.
414 */
415static ssize_t sf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
416{
417#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
418 struct iovec iov = { .iov_base = buf, .iov_len = size };
419 struct iov_iter iter;
420 struct kiocb kiocb;
421 ssize_t cbRet;
422
423 init_sync_kiocb(&kiocb, file);
424 kiocb.ki_pos = *off;
425 iov_iter_init(&iter, READ, &iov, 1, size);
426
427 cbRet = generic_file_read_iter(&kiocb, &iter);
428
429 *off = kiocb.ki_pos;
430 return cbRet;
431
432#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
433 struct iovec iov = { .iov_base = buf, .iov_len = size };
434 struct kiocb kiocb;
435 ssize_t cbRet;
436
437 init_sync_kiocb(&kiocb, file);
438 kiocb.ki_pos = *off;
439
440 cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
441 if (cbRet == -EIOCBQUEUED)
442 cbRet = wait_on_sync_kiocb(&kiocb);
443
444 *off = kiocb.ki_pos;
445 return cbRet;
446
447#else /* 2.6.18 or earlier: */
448 return generic_file_read(file, buf, size, off);
449#endif
450}
451
452
453/**
454 * Fallback case of sf_reg_read() that locks the user buffers and let the host
455 * write directly to them.
456 */
457static ssize_t sf_reg_read_fallback(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
458 struct vbsf_super_info *sf_g, struct sf_reg_info *sf_r)
459{
460 /*
461 * Lock pages and execute the read, taking care not to pass the host
462 * more than it can handle in one go or more than we care to allocate
463 * page arrays for. The latter limit is set at just short of 32KB due
464 * to how the physical heap works.
465 */
466 struct page *apPagesStack[16];
467 struct page **papPages = &apPagesStack[0];
468 struct page **papPagesFree = NULL;
469 VBOXSFREADPGLSTREQ *pReq;
470 loff_t offFile = *off;
471 ssize_t cbRet = -ENOMEM;
472 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
473 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
474
475 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
476 while (!pReq && cMaxPages > 4) {
477 cMaxPages /= 2;
478 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
479 }
480 if (pReq && cPages > RT_ELEMENTS(apPagesStack))
481 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
482 if (pReq && papPages) {
483 cbRet = 0;
484 for (;;) {
485 /*
486 * Figure out how much to process now and lock the user pages.
487 */
488 int rc;
489 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
490 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
491 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
492 if (cPages <= cMaxPages)
493 cbChunk = size;
494 else {
495 cPages = cMaxPages;
496 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
497 }
498
499 rc = sf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages);
500 if (rc == 0) {
501 size_t iPage = cPages;
502 while (iPage-- > 0)
503 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
504 } else {
505 cbRet = rc;
506 break;
507 }
508
509 /*
510 * Issue the request and unlock the pages.
511 */
512 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
513
514 sf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/);
515
516 if (RT_SUCCESS(rc)) {
517 /*
518 * Success, advance position and buffer.
519 */
520 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
521 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
522 cbRet += cbActual;
523 offFile += cbActual;
524 buf = (uint8_t *)buf + cbActual;
525 size -= cbActual;
526
527 /*
528 * Are we done already? If so commit the new file offset.
529 */
530 if (!size || cbActual < cbChunk) {
531 *off = offFile;
532 break;
533 }
534 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
535 /*
536 * The host probably doesn't have enough heap to handle the
537 * request, reduce the page count and retry.
538 */
539 cMaxPages /= 4;
540 Assert(cMaxPages > 0);
541 } else {
542 /*
543 * If we've successfully read stuff, return it rather than
544 * the error. (Not sure if this is such a great idea...)
545 */
546 if (cbRet > 0)
547 *off = offFile;
548 else
549 cbRet = -EPROTO;
550 break;
551 }
552 }
553 }
554 if (papPagesFree)
555 kfree(papPages);
556 if (pReq)
557 VbglR0PhysHeapFree(pReq);
558 return cbRet;
559}
560
561
562/**
563 * Read from a regular file.
564 *
565 * @param file the file
566 * @param buf the buffer
567 * @param size length of the buffer
568 * @param off offset within the file (in/out).
569 * @returns the number of read bytes on success, Linux error code otherwise
570 */
571static ssize_t sf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
572{
573 struct inode *inode = GET_F_DENTRY(file)->d_inode;
574 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
575 struct sf_reg_info *sf_r = file->private_data;
576 struct address_space *mapping = inode->i_mapping;
577
578 SFLOGFLOW(("sf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
579
580 if (!S_ISREG(inode->i_mode)) {
581 LogFunc(("read from non regular file %d\n", inode->i_mode));
582 return -EINVAL;
583 }
584
585 /** @todo XXX Check read permission according to inode->i_mode! */
586
587 if (!size)
588 return 0;
589
590 /*
591 * If there is a mapping and O_DIRECT isn't in effect, we must at a
592 * heed dirty pages in the mapping and read from them. For simplicity
593 * though, we just do page cache reading when there are writable
594 * mappings around with any kind of pages loaded.
595 */
596 if ( mapping
597 && mapping->nrpages > 0
598 && mapping_writably_mapped(mapping)
599 && !(file->f_flags & O_DIRECT)
600 && 1 /** @todo make this behaviour configurable */ )
601 return sf_reg_read_mapped(file, buf, size, off);
602
603 /*
604 * For small requests, try use an embedded buffer provided we get a heap block
605 * that does not cross page boundraries (see host code).
606 */
607 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
608 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
609 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
610 if ( pReq
611 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
612 ssize_t cbRet;
613 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
614 if (RT_SUCCESS(vrc)) {
615 cbRet = pReq->Parms.cb32Read.u.value32;
616 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
617 if (copy_to_user(buf, pReq->abData, cbRet) == 0)
618 *off += cbRet;
619 else
620 cbRet = -EFAULT;
621 } else
622 cbRet = -EPROTO;
623 VbglR0PhysHeapFree(pReq);
624 return cbRet;
625 }
626 if (pReq)
627 VbglR0PhysHeapFree(pReq);
628 }
629
630#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
631 /*
632 * For medium sized requests try use a bounce buffer.
633 */
634 if (size <= _64K /** @todo make this configurable? */) {
635 void *pvBounce = kmalloc(size, GFP_KERNEL);
636 if (pvBounce) {
637 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
638 if (pReq) {
639 ssize_t cbRet;
640 int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
641 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
642 if (RT_SUCCESS(vrc)) {
643 cbRet = pReq->Parms.cb32Read.u.value32;
644 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
645 if (copy_to_user(buf, pvBounce, cbRet) == 0)
646 *off += cbRet;
647 else
648 cbRet = -EFAULT;
649 } else
650 cbRet = -EPROTO;
651 VbglR0PhysHeapFree(pReq);
652 kfree(pvBounce);
653 return cbRet;
654 }
655 kfree(pvBounce);
656 }
657 }
658#endif
659
660 return sf_reg_read_fallback(file, buf, size, off, sf_g, sf_r);
661}
662
663
664/**
665 * Wrapper around invalidate_mapping_pages() for page cache invalidation so that
666 * the changes written via sf_reg_write are made visible to mmap users.
667 */
668DECLINLINE(void) sf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
669{
670 /*
671 * Only bother with this if the mapping has any pages in it.
672 *
673 * Note! According to the docs, the last parameter, end, is inclusive (we
674 * would have named it 'last' to indicate this).
675 *
676 * Note! The pre-2.6.12 function might not do enough to sure consistency
677 * when any of the pages in the range is already mapped.
678 */
679# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
680 if (mapping)
681 invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
682# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60)
683 if (mapping && mapping->nrpages > 0)
684 invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
685# else
686 /** @todo ... */
687 RT_NOREF(mapping, offStart, offEnd);
688# endif
689}
690
691
692/**
693 * Fallback case of sf_reg_write() that locks the user buffers and let the host
694 * write directly to them.
695 */
696static ssize_t sf_reg_write_fallback(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
697 struct inode *inode, struct sf_inode_info *sf_i,
698 struct vbsf_super_info *sf_g, struct sf_reg_info *sf_r)
699{
700 /*
701 * Lock pages and execute the write, taking care not to pass the host
702 * more than it can handle in one go or more than we care to allocate
703 * page arrays for. The latter limit is set at just short of 32KB due
704 * to how the physical heap works.
705 */
706 struct page *apPagesStack[16];
707 struct page **papPages = &apPagesStack[0];
708 struct page **papPagesFree = NULL;
709 VBOXSFWRITEPGLSTREQ *pReq;
710 ssize_t cbRet = -ENOMEM;
711 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
712 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
713
714 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
715 while (!pReq && cMaxPages > 4) {
716 cMaxPages /= 2;
717 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
718 }
719 if (pReq && cPages > RT_ELEMENTS(apPagesStack))
720 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
721 if (pReq && papPages) {
722 cbRet = 0;
723 for (;;) {
724 /*
725 * Figure out how much to process now and lock the user pages.
726 */
727 int rc;
728 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
729 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
730 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
731 if (cPages <= cMaxPages)
732 cbChunk = size;
733 else {
734 cPages = cMaxPages;
735 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
736 }
737
738 rc = sf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages);
739 if (rc == 0) {
740 size_t iPage = cPages;
741 while (iPage-- > 0)
742 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
743 } else {
744 cbRet = rc;
745 break;
746 }
747
748 /*
749 * Issue the request and unlock the pages.
750 */
751 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
752
753 sf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/);
754
755 if (RT_SUCCESS(rc)) {
756 /*
757 * Success, advance position and buffer.
758 */
759 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
760 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
761 cbRet += cbActual;
762 offFile += cbActual;
763 buf = (uint8_t *)buf + cbActual;
764 size -= cbActual;
765 if (offFile > i_size_read(inode))
766 i_size_write(inode, offFile);
767 sf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
768
769 /*
770 * Are we done already? If so commit the new file offset.
771 */
772 if (!size || cbActual < cbChunk) {
773 *off = offFile;
774 break;
775 }
776 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
777 /*
778 * The host probably doesn't have enough heap to handle the
779 * request, reduce the page count and retry.
780 */
781 cMaxPages /= 4;
782 Assert(cMaxPages > 0);
783 } else {
784 /*
785 * If we've successfully written stuff, return it rather than
786 * the error. (Not sure if this is such a great idea...)
787 */
788 if (cbRet > 0)
789 *off = offFile;
790 else
791 cbRet = -EPROTO;
792 break;
793 }
794 sf_i->force_restat = 1; /* mtime (and size) may have changed */
795 }
796 }
797 if (papPagesFree)
798 kfree(papPages);
799 if (pReq)
800 VbglR0PhysHeapFree(pReq);
801 return cbRet;
802}
803
804
805/**
806 * Write to a regular file.
807 *
808 * @param file the file
809 * @param buf the buffer
810 * @param size length of the buffer
811 * @param off offset within the file
812 * @returns the number of written bytes on success, Linux error code otherwise
813 */
814static ssize_t sf_reg_write(struct file *file, const char *buf, size_t size,
815 loff_t * off)
816{
817 struct inode *inode = GET_F_DENTRY(file)->d_inode;
818 struct sf_inode_info *sf_i = GET_INODE_INFO(inode);
819 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
820 struct sf_reg_info *sf_r = file->private_data;
821 struct address_space *mapping = inode->i_mapping;
822 loff_t pos;
823
824 SFLOGFLOW(("sf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
825 BUG_ON(!sf_i);
826 BUG_ON(!sf_g);
827 BUG_ON(!sf_r);
828
829 if (!S_ISREG(inode->i_mode)) {
830 LogFunc(("write to non regular file %d\n", inode->i_mode));
831 return -EINVAL;
832 }
833
834 pos = *off;
835 /** @todo This should be handled by the host, it returning the new file
836 * offset when appending. We may have an outdated i_size value here! */
837 if (file->f_flags & O_APPEND)
838 pos = i_size_read(inode);
839
840 /** @todo XXX Check write permission according to inode->i_mode! */
841
842 if (!size) {
843 if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
844 *off = pos;
845 return 0;
846 }
847
848 /*
849 * If there are active writable mappings, coordinate with any
850 * pending writes via those.
851 */
852 if ( mapping
853 && mapping->nrpages > 0
854 && mapping_writably_mapped(mapping)) {
855#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
856 int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
857 if (err)
858 return err;
859#else
860 /** @todo ... */
861#endif
862 }
863
864 /*
865 * For small requests, try use an embedded buffer provided we get a heap block
866 * that does not cross page boundraries (see host code).
867 */
868 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
869 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
870 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
871 if ( pReq
872 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
873 ssize_t cbRet;
874 if (copy_from_user(pReq->abData, buf, size) == 0) {
875 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
876 pos, (uint32_t)size);
877 if (RT_SUCCESS(vrc)) {
878 cbRet = pReq->Parms.cb32Write.u.value32;
879 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
880 pos += cbRet;
881 *off = pos;
882 if (pos > i_size_read(inode))
883 i_size_write(inode, pos);
884 sf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
885 } else
886 cbRet = -EPROTO;
887 sf_i->force_restat = 1; /* mtime (and size) may have changed */
888 } else
889 cbRet = -EFAULT;
890
891 VbglR0PhysHeapFree(pReq);
892 return cbRet;
893 }
894 if (pReq)
895 VbglR0PhysHeapFree(pReq);
896 }
897
898#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
899 /*
900 * For medium sized requests try use a bounce buffer.
901 */
902 if (size <= _64K /** @todo make this configurable? */) {
903 void *pvBounce = kmalloc(size, GFP_KERNEL);
904 if (pvBounce) {
905 if (copy_from_user(pvBounce, buf, size) == 0) {
906 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
907 if (pReq) {
908 ssize_t cbRet;
909 int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
910 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
911 if (RT_SUCCESS(vrc)) {
912 cbRet = pReq->Parms.cb32Write.u.value32;
913 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
914 pos += cbRet;
915 *off = pos;
916 if (pos > i_size_read(inode))
917 i_size_write(inode, pos);
918 sf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
919 } else
920 cbRet = -EPROTO;
921 sf_i->force_restat = 1; /* mtime (and size) may have changed */
922 VbglR0PhysHeapFree(pReq);
923 kfree(pvBounce);
924 return cbRet;
925 }
926 kfree(pvBounce);
927 } else {
928 kfree(pvBounce);
929 return -EFAULT;
930 }
931 }
932 }
933#endif
934
935 return sf_reg_write_fallback(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
936}
937
938
939/**
940 * Open a regular file.
941 *
942 * @param inode the inode
943 * @param file the file
944 * @returns 0 on success, Linux error code otherwise
945 */
946static int sf_reg_open(struct inode *inode, struct file *file)
947{
948 int rc, rc_linux = 0;
949 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
950 struct sf_inode_info *sf_i = GET_INODE_INFO(inode);
951 struct sf_reg_info *sf_r;
952#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
953 struct dentry *dentry = file_dentry(file);
954#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
955 struct dentry *dentry = file->f_path.dentry;
956#else
957 struct dentry *dentry = file->f_dentry;
958#endif
959 VBOXSFCREATEREQ *pReq;
960 SHFLCREATEPARMS *pCreateParms; /* temp glue */
961
962 SFLOGFLOW(("sf_reg_open: inode=%p file=%p flags=%#x %s\n",
963 inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
964 BUG_ON(!sf_g);
965 BUG_ON(!sf_i);
966
967 sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
968 if (!sf_r) {
969 LogRelFunc(("could not allocate reg info\n"));
970 return -ENOMEM;
971 }
972
973 RTListInit(&sf_r->Handle.Entry);
974 sf_r->Handle.cRefs = 1;
975 sf_r->Handle.fFlags = SF_HANDLE_F_FILE | SF_HANDLE_F_MAGIC;
976 sf_r->Handle.hHost = SHFL_HANDLE_NIL;
977
978 /* Already open? */
979 if (sf_i->handle != SHFL_HANDLE_NIL) {
980 /*
981 * This inode was created with sf_create_aux(). Check the CreateFlags:
982 * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
983 * about the access flags (SHFL_CF_ACCESS_*).
984 */
985 sf_i->force_restat = 1;
986 sf_r->Handle.hHost = sf_i->handle;
987 sf_i->handle = SHFL_HANDLE_NIL;
988 file->private_data = sf_r;
989
990 sf_r->Handle.fFlags |= SF_HANDLE_F_READ | SF_HANDLE_F_WRITE; /** @todo check */
991 sf_handle_append(sf_i, &sf_r->Handle);
992 SFLOGFLOW(("sf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
993 return 0;
994 }
995
996 pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
997 if (!pReq) {
998 kfree(sf_r);
999 LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
1000 return -ENOMEM;
1001 }
1002 memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
1003 RT_ZERO(pReq->CreateParms);
1004 pCreateParms = &pReq->CreateParms;
1005 pCreateParms->Handle = SHFL_HANDLE_NIL;
1006
1007 /* We check the value of pCreateParms->Handle afterwards to find out if
1008 * the call succeeded or failed, as the API does not seem to cleanly
1009 * distinguish error and informational messages.
1010 *
1011 * Furthermore, we must set pCreateParms->Handle to SHFL_HANDLE_NIL to
1012 * make the shared folders host service use our fMode parameter */
1013
1014 if (file->f_flags & O_CREAT) {
1015 LogFunc(("O_CREAT set\n"));
1016 pCreateParms->CreateFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
1017 /* We ignore O_EXCL, as the Linux kernel seems to call create
1018 beforehand itself, so O_EXCL should always fail. */
1019 if (file->f_flags & O_TRUNC) {
1020 LogFunc(("O_TRUNC set\n"));
1021 pCreateParms->CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1022 } else
1023 pCreateParms->CreateFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
1024 } else {
1025 pCreateParms->CreateFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
1026 if (file->f_flags & O_TRUNC) {
1027 LogFunc(("O_TRUNC set\n"));
1028 pCreateParms->CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1029 }
1030 }
1031
1032 switch (file->f_flags & O_ACCMODE) {
1033 case O_RDONLY:
1034 pCreateParms->CreateFlags |= SHFL_CF_ACCESS_READ;
1035 sf_r->Handle.fFlags |= SF_HANDLE_F_READ;
1036 break;
1037
1038 case O_WRONLY:
1039 pCreateParms->CreateFlags |= SHFL_CF_ACCESS_WRITE;
1040 sf_r->Handle.fFlags |= SF_HANDLE_F_WRITE;
1041 break;
1042
1043 case O_RDWR:
1044 pCreateParms->CreateFlags |= SHFL_CF_ACCESS_READWRITE;
1045 sf_r->Handle.fFlags |= SF_HANDLE_F_READ | SF_HANDLE_F_WRITE;
1046 break;
1047
1048 default:
1049 BUG();
1050 }
1051
1052 if (file->f_flags & O_APPEND) {
1053 LogFunc(("O_APPEND set\n"));
1054 pCreateParms->CreateFlags |= SHFL_CF_ACCESS_APPEND;
1055 sf_r->Handle.fFlags |= SF_HANDLE_F_APPEND;
1056 }
1057
1058 pCreateParms->Info.Attr.fMode = inode->i_mode;
1059 LogFunc(("sf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n", sf_i->path->String.utf8, file->f_flags, pCreateParms->CreateFlags));
1060 rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
1061 if (RT_FAILURE(rc)) {
1062 LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pCreateParms->CreateFlags, rc));
1063 kfree(sf_r);
1064 VbglR0PhysHeapFree(pReq);
1065 return -RTErrConvertToErrno(rc);
1066 }
1067
1068 if (pCreateParms->Handle != SHFL_HANDLE_NIL) {
1069 sf_dentry_chain_increase_ttl(dentry);
1070 rc_linux = 0;
1071 } else {
1072 switch (pCreateParms->Result) {
1073 case SHFL_PATH_NOT_FOUND:
1074 rc_linux = -ENOENT;
1075 break;
1076 case SHFL_FILE_NOT_FOUND:
1077 /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
1078 rc_linux = -ENOENT;
1079 break;
1080 case SHFL_FILE_EXISTS:
1081 sf_dentry_chain_increase_ttl(dentry);
1082 rc_linux = -EEXIST;
1083 break;
1084 default:
1085 sf_dentry_chain_increase_parent_ttl(dentry);
1086 rc_linux = 0;
1087 break;
1088 }
1089 }
1090
1091 sf_i->force_restat = 1; /** @todo Why?!? */
1092 sf_r->Handle.hHost = pCreateParms->Handle;
1093 file->private_data = sf_r;
1094 sf_handle_append(sf_i, &sf_r->Handle);
1095 VbglR0PhysHeapFree(pReq);
1096 SFLOGFLOW(("sf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1097 return rc_linux;
1098}
1099
1100
1101/**
1102 * Close a regular file.
1103 *
1104 * @param inode the inode
1105 * @param file the file
1106 * @returns 0 on success, Linux error code otherwise
1107 */
1108static int sf_reg_release(struct inode *inode, struct file *file)
1109{
1110 struct sf_reg_info *sf_r;
1111 struct vbsf_super_info *sf_g;
1112 struct sf_inode_info *sf_i = GET_INODE_INFO(inode);
1113
1114 SFLOGFLOW(("sf_reg_release: inode=%p file=%p\n", inode, file));
1115 sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1116 sf_r = file->private_data;
1117
1118 BUG_ON(!sf_g);
1119 BUG_ON(!sf_r);
1120
1121#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
1122 /* See the smbfs source (file.c). mmap in particular can cause data to be
1123 * written to the file after it is closed, which we can't cope with. We
1124 * copy and paste the body of filemap_write_and_wait() here as it was not
1125 * defined before 2.6.6 and not exported until quite a bit later. */
1126 /* filemap_write_and_wait(inode->i_mapping); */
1127 if (inode->i_mapping->nrpages
1128 && filemap_fdatawrite(inode->i_mapping) != -EIO)
1129 filemap_fdatawait(inode->i_mapping);
1130#endif
1131
1132 /* Release sf_r, closing the handle if we're the last user. */
1133 file->private_data = NULL;
1134 sf_handle_release(&sf_r->Handle, sf_g, "sf_reg_release");
1135
1136 sf_i->handle = SHFL_HANDLE_NIL;
1137 return 0;
1138}
1139
1140/**
1141 * Wrapper around generic/default seek function that ensures that we've got
1142 * the up-to-date file size when doing anything relative to EOF.
1143 *
1144 * The issue is that the host may extend the file while we weren't looking and
1145 * if the caller wishes to append data, it may end up overwriting existing data
1146 * if we operate with a stale size. So, we always retrieve the file size on EOF
1147 * relative seeks.
1148 */
1149static loff_t sf_reg_llseek(struct file *file, loff_t off, int whence)
1150{
1151 SFLOGFLOW(("sf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
1152
1153 switch (whence) {
1154#ifdef SEEK_HOLE
1155 case SEEK_HOLE:
1156 case SEEK_DATA:
1157#endif
1158 case SEEK_END: {
1159 struct sf_reg_info *sf_r = file->private_data;
1160 int rc = sf_inode_revalidate_with_handle(GET_F_DENTRY(file), sf_r->Handle.hHost, true /*fForce*/,
1161 false /*fInodeLocked*/);
1162 if (rc == 0)
1163 break;
1164 return rc;
1165 }
1166 }
1167
1168#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
1169 return generic_file_llseek(file, off, whence);
1170#else
1171 return default_llseek(file, off, whence);
1172#endif
1173}
1174
1175/**
1176 * Flush region of file - chiefly mmap/msync.
1177 *
1178 * We cannot use the noop_fsync / simple_sync_file here as that means
1179 * msync(,,MS_SYNC) will return before the data hits the host, thereby
1180 * causing coherency issues with O_DIRECT access to the same file as
1181 * well as any host interaction with the file.
1182 */
1183#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
1184static int sf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1185{
1186# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1187 return __generic_file_fsync(file, start, end, datasync);
1188# else
1189 return generic_file_fsync(file, start, end, datasync);
1190# endif
1191}
1192#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
1193static int sf_reg_fsync(struct file *file, int datasync)
1194{
1195 return generic_file_fsync(file, datasync);
1196}
1197#else /* < 2.6.35 */
1198static int sf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
1199{
1200# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
1201 return simple_fsync(file, dentry, datasync);
1202# else
1203 int rc;
1204 struct inode *inode = dentry->d_inode;
1205 AssertReturn(inode, -EINVAL);
1206
1207 /** @todo What about file_fsync()? (<= 2.5.11) */
1208
1209# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1210 rc = sync_mapping_buffers(inode->i_mapping);
1211 if ( rc == 0
1212 && (inode->i_state & I_DIRTY)
1213 && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
1214 ) {
1215 struct writeback_control wbc = {
1216 .sync_mode = WB_SYNC_ALL,
1217 .nr_to_write = 0
1218 };
1219 rc = sync_inode(inode, &wbc);
1220 }
1221# else /* < 2.5.12 */
1222 rc = fsync_inode_buffers(inode);
1223# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1224 rc |= fsync_inode_data_buffers(inode);
1225# endif
1226 /** @todo probably need to do more here... */
1227# endif /* < 2.5.12 */
1228 return rc;
1229# endif
1230}
1231#endif /* < 2.6.35 */
1232
1233
1234struct file_operations sf_reg_fops = {
1235 .read = sf_reg_read,
1236 .open = sf_reg_open,
1237 .write = sf_reg_write,
1238 .release = sf_reg_release,
1239 .mmap = generic_file_mmap,
1240#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1241# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
1242/** @todo This code is known to cause caching of data which should not be
1243 * cached. Investigate. */
1244# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
1245 .splice_read = sf_splice_read,
1246# else
1247 .sendfile = generic_file_sendfile,
1248# endif
1249 .aio_read = generic_file_aio_read,
1250 .aio_write = generic_file_aio_write,
1251# endif
1252#endif
1253 .llseek = sf_reg_llseek,
1254 .fsync = sf_reg_fsync,
1255};
1256
1257struct inode_operations sf_reg_iops = {
1258#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
1259 .revalidate = sf_inode_revalidate
1260#else
1261 .getattr = sf_getattr,
1262 .setattr = sf_setattr
1263#endif
1264};
1265
1266#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1267
1268/**
1269 * Used to read the content of a page into the page cache.
1270 *
1271 * Needed for mmap and reads+writes when the file is mmapped in a
1272 * shared+writeable fashion.
1273 */
1274static int sf_readpage(struct file *file, struct page *page)
1275{
1276 struct inode *inode = GET_F_DENTRY(file)->d_inode;
1277 int err;
1278
1279 SFLOGFLOW(("sf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
1280
1281 if (!is_bad_inode(inode)) {
1282 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1283 if (pReq) {
1284 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1285 struct sf_reg_info *sf_r = file->private_data;
1286 uint32_t cbRead;
1287 int vrc;
1288
1289 pReq->PgLst.offFirstPage = 0;
1290 pReq->PgLst.aPages[0] = page_to_phys(page);
1291 vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
1292 pReq,
1293 sf_r->Handle.hHost,
1294 (uint64_t)page->index << PAGE_SHIFT,
1295 PAGE_SIZE,
1296 1 /*cPages*/);
1297
1298 cbRead = pReq->Parms.cb32Read.u.value32;
1299 AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
1300 VbglR0PhysHeapFree(pReq);
1301
1302 if (RT_SUCCESS(vrc)) {
1303 if (cbRead == PAGE_SIZE) {
1304 /* likely */
1305 } else {
1306 uint8_t *pbMapped = (uint8_t *)kmap(page);
1307 RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
1308 kunmap(page);
1309 /** @todo truncate the inode file size? */
1310 }
1311
1312 flush_dcache_page(page);
1313 SetPageUptodate(page);
1314 err = 0;
1315 } else
1316 err = -EPROTO;
1317 } else
1318 err = -ENOMEM;
1319 } else
1320 err = -EIO;
1321 unlock_page(page);
1322 return err;
1323}
1324
1325
1326/**
1327 * Used to write out the content of a dirty page cache page to the host file.
1328 *
1329 * Needed for mmap and writes when the file is mmapped in a shared+writeable
1330 * fashion.
1331 */
1332static int sf_writepage(struct page *page, struct writeback_control *wbc)
1333{
1334 struct address_space *mapping = page->mapping;
1335 struct inode *inode = mapping->host;
1336 struct sf_inode_info *sf_i = GET_INODE_INFO(inode);
1337 struct sf_handle *pHandle = sf_handle_find(sf_i, SF_HANDLE_F_WRITE, SF_HANDLE_F_APPEND);
1338 int err;
1339
1340 SFLOGFLOW(("sf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
1341 inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
1342
1343 if (pHandle) {
1344 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1345 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1346 if (pReq) {
1347 uint64_t const cbFile = i_size_read(inode);
1348 uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
1349 uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
1350 : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
1351 int vrc;
1352
1353 pReq->PgLst.offFirstPage = 0;
1354 pReq->PgLst.aPages[0] = page_to_phys(page);
1355 vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
1356 pReq,
1357 pHandle->hHost,
1358 offInFile,
1359 cbToWrite,
1360 1 /*cPages*/);
1361 AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
1362 ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
1363 vrc = VERR_WRITE_ERROR);
1364 VbglR0PhysHeapFree(pReq);
1365
1366 if (RT_SUCCESS(vrc)) {
1367 /* Update the inode if we've extended the file. */
1368 /** @todo is this necessary given the cbToWrite calc above? */
1369 uint64_t const offEndOfWrite = offInFile + cbToWrite;
1370 if ( offEndOfWrite > cbFile
1371 && offEndOfWrite > i_size_read(inode))
1372 i_size_write(inode, offEndOfWrite);
1373
1374 if (PageError(page))
1375 ClearPageError(page);
1376
1377 err = 0;
1378 } else {
1379 ClearPageUptodate(page);
1380 err = -EPROTO;
1381 }
1382 } else
1383 err = -ENOMEM;
1384 sf_handle_release(pHandle, sf_g, "sf_writepage");
1385 } else {
1386 static uint64_t volatile s_cCalls = 0;
1387 if (s_cCalls++ < 16)
1388 printk("sf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
1389 err = -EPROTO;
1390 }
1391 unlock_page(page);
1392 return err;
1393}
1394
1395# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1396/**
1397 * Called when writing thru the page cache (which we shouldn't be doing).
1398 */
1399int sf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
1400 unsigned len, unsigned flags, struct page **pagep,
1401 void **fsdata)
1402{
1403 /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
1404 * the page cache for any writes AFAIK. We could just as well use
1405 * simple_write_begin & simple_write_end here if we think we really
1406 * need to have non-NULL function pointers in the table... */
1407 static uint64_t volatile s_cCalls = 0;
1408 if (s_cCalls++ < 16) {
1409 printk("vboxsf: Unexpected call to sf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1410 (unsigned long long)pos, len, flags);
1411 RTLogBackdoorPrintf("vboxsf: Unexpected call to sf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1412 (unsigned long long)pos, len, flags);
1413# ifdef WARN_ON
1414 WARN_ON(1);
1415# endif
1416 }
1417 return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
1418}
1419# endif /* KERNEL_VERSION >= 2.6.24 */
1420
1421# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1422/**
1423 * This is needed to make open accept O_DIRECT as well as dealing with direct
1424 * I/O requests if we don't intercept them earlier.
1425 */
1426# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
1427static ssize_t sf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1428# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1429static ssize_t sf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
1430# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1431static ssize_t sf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
1432# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
1433static ssize_t sf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1434# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
1435static int sf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1436# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
1437static int sf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1438# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
1439static int sf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1440# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
1441static int sf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
1442# else
1443static int sf_direct_IO(int rw, struct inode *inode, struct kiobuf *, unsigned long, int)
1444# endif
1445{
1446 TRACE();
1447 return -EINVAL;
1448}
1449# endif
1450
1451struct address_space_operations sf_reg_aops = {
1452 .readpage = sf_readpage,
1453 .writepage = sf_writepage,
1454 /** @todo Need .writepages if we want msync performance... */
1455# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1456 .set_page_dirty = __set_page_dirty_buffers,
1457# endif
1458# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1459 .write_begin = sf_write_begin,
1460 .write_end = simple_write_end,
1461# else
1462 .prepare_write = simple_prepare_write,
1463 .commit_write = simple_commit_write,
1464# endif
1465# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1466 .direct_IO = sf_direct_IO,
1467# endif
1468};
1469
1470#endif /* LINUX_VERSION_CODE >= 2.6.0 */
1471
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette