VirtualBox

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77539

Last change on this file since 77539 was 77536, checked in by vboxsync, 6 years ago

linux/vboxsf: Early version of non-buffered directories (disabled). bugref:9172

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 52.0 KB
Line 
1/* $Id: regops.c 77536 2019-03-02 05:04:45Z vboxsync $ */
2/** @file
3 * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person
10 * obtaining a copy of this software and associated documentation
11 * files (the "Software"), to deal in the Software without
12 * restriction, including without limitation the rights to use,
13 * copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following
16 * conditions:
17 *
18 * The above copyright notice and this permission notice shall be
19 * included in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 * OTHER DEALINGS IN THE SOFTWARE.
29 */
30
31#include "vfsmod.h"
32#include <linux/uio.h>
33#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
34# include <linux/aio.h> /* struct kiocb before 4.1 */
35#endif
36#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
37# include <linux/buffer_head.h>
38#endif
39#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
40 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41# include <linux/writeback.h>
42#endif
43#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
44 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
45# include <linux/splice.h>
46#endif
47#include <iprt/err.h>
48
49#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
50# define SEEK_END 2
51#endif
52
53
54/**
55 * Called when an inode is released to unlink all handles that might impossibly
56 * still be associated with it.
57 *
58 * @param pInodeInfo The inode which handles to drop.
59 */
60void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
61{
62 struct vbsf_handle *pCur, *pNext;
63 unsigned long fSavedFlags;
64 SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
65 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
66
67 RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) {
68 AssertMsg((pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | vbSF_HANDLE_F_ON_LIST)) == (VBSF_HANDLE_F_MAGIC | vbSF_HANDLE_F_ON_LIST),
69 ("%p %#x\n", pCur, pCur->fFlags));
70 pCur->fFlags |= vbSF_HANDLE_F_ON_LIST;
71 RTListNodeRemove(&pCur->Entry);
72 }
73
74 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
75}
76
77
78/**
79 * Locates a handle that matches all the flags in @a fFlags.
80 *
81 * @returns Pointer to handle on success (retained), use vbsf_handle_release() to
82 * release it. NULL if no suitable handle was found.
83 * @param pInodeInfo The inode info to search.
84 * @param fFlagsSet The flags that must be set.
85 * @param fFlagsClear The flags that must be clear.
86 */
87struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
88{
89 struct vbsf_handle *pCur;
90 unsigned long fSavedFlags;
91 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
92
93 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
94 AssertMsg((pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | vbSF_HANDLE_F_ON_LIST)) == (VBSF_HANDLE_F_MAGIC | vbSF_HANDLE_F_ON_LIST),
95 ("%p %#x\n", pCur, pCur->fFlags));
96 if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
97 uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
98 if (cRefs > 1) {
99 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
100 SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
101 return pCur;
102 }
103 /* Oops, already being closed (safe as it's only ever increased here). */
104 ASMAtomicDecU32(&pCur->cRefs);
105 }
106 }
107
108 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
109 SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
110 return NULL;
111}
112
113
114/**
115 * Slow worker for vbsf_handle_release() that does the freeing.
116 *
117 * @returns 0 (ref count).
118 * @param pHandle The handle to release.
119 * @param sf_g The info structure for the shared folder associated
120 * with the handle.
121 * @param pszCaller The caller name (for logging failures).
122 */
123uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *sf_g, const char *pszCaller)
124{
125 int rc;
126 unsigned long fSavedFlags;
127
128 SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
129
130 /*
131 * Remove from the list.
132 */
133 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
134
135 AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
136 Assert(pHandle->pInodeInfo);
137 Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
138
139 if (pHandle->fFlags & vbSF_HANDLE_F_ON_LIST) {
140 pHandle->fFlags &= ~vbSF_HANDLE_F_ON_LIST;
141 RTListNodeRemove(&pHandle->Entry);
142 }
143
144 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
145
146 /*
147 * Actually destroy it.
148 */
149 rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
150 if (RT_FAILURE(rc))
151 LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
152 pHandle->hHost = SHFL_HANDLE_NIL;
153 pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD;
154 kfree(pHandle);
155 return 0;
156}
157
158
159/**
160 * Appends a handle to a handle list.
161 *
162 * @param pInodeInfo The inode to add it to.
163 * @param pHandle The handle to add.
164 */
165void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle)
166{
167#ifdef VBOX_STRICT
168 struct vbsf_handle *pCur;
169#endif
170 unsigned long fSavedFlags;
171
172 SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
173 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | vbSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
174 ("%p %#x\n", pHandle, pHandle->fFlags));
175 Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
176
177 spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
178
179 AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | vbSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
180 ("%p %#x\n", pHandle, pHandle->fFlags));
181#ifdef VBOX_STRICT
182 RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
183 Assert(pCur != pHandle);
184 AssertMsg((pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | vbSF_HANDLE_F_ON_LIST)) == (VBSF_HANDLE_F_MAGIC | vbSF_HANDLE_F_ON_LIST),
185 ("%p %#x\n", pCur, pCur->fFlags));
186 }
187 pHandle->pInodeInfo = pInodeInfo;
188#endif
189
190 pHandle->fFlags |= vbSF_HANDLE_F_ON_LIST;
191 RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
192
193 spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
194}
195
196
197#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
198 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
199
200/*
201 * Some pipe stuff we apparently need for 2.6.23-2.6.30.
202 */
203
204static void vbsf_free_pipebuf(struct page *kpage)
205{
206 kunmap(kpage);
207 __free_pages(kpage, 0);
208}
209
210static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, int atomic)
211{
212 return 0;
213}
214
215static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
216{
217}
218
219static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, void *map_data)
220{
221}
222
223static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
224{
225 return 0;
226}
227
228static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
229{
230 vbsf_free_pipebuf(pipe_buf->page);
231}
232
233static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pipe_buf)
234{
235 return 0;
236}
237
238static struct pipe_buf_operations vbsf_pipe_buf_ops = {
239 .can_merge = 0,
240 .map = vbsf_pipe_buf_map,
241 .unmap = vbsf_pipe_buf_unmap,
242 .confirm = vbsf_pipe_buf_confirm,
243 .release = vbsf_pipe_buf_release,
244 .steal = vbsf_pipe_buf_steal,
245 .get = vbsf_pipe_buf_get,
246};
247
248static int vbsf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
249 void *buf, uint32_t *nread, uint64_t pos)
250{
251 int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
252 if (RT_FAILURE(rc)) {
253 LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
254 rc));
255 return -EPROTO;
256 }
257 return 0;
258}
259
260# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
261# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
262
263ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
264{
265 size_t bytes_remaining = len;
266 loff_t orig_offset = *poffset;
267 loff_t offset = orig_offset;
268 struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
269 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
270 struct vbsf_reg_info *sf_r = in->private_data;
271 ssize_t retval;
272 struct page *kpage = 0;
273 size_t nsent = 0;
274
275/** @todo rig up a FsPerf test for this code */
276 TRACE();
277 if (!S_ISREG(inode->i_mode)) {
278 LogFunc(("read from non regular file %d\n", inode->i_mode));
279 return -EINVAL;
280 }
281 if (!len) {
282 return 0;
283 }
284
285 LOCK_PIPE(pipe);
286
287 uint32_t req_size = 0;
288 while (bytes_remaining > 0) {
289 kpage = alloc_page(GFP_KERNEL);
290 if (unlikely(kpage == NULL)) {
291 UNLOCK_PIPE(pipe);
292 return -ENOMEM;
293 }
294 req_size = 0;
295 uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
296 uint32_t chunk = 0;
297 void *kbuf = kmap(kpage);
298 while (chunk < req_size) {
299 retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
300 if (retval < 0)
301 goto err;
302 if (nread == 0)
303 break;
304 chunk += nread;
305 offset += nread;
306 nread = req_size - chunk;
307 }
308 if (!pipe->readers) {
309 send_sig(SIGPIPE, current, 0);
310 retval = -EPIPE;
311 goto err;
312 }
313 if (pipe->nrbufs < PIPE_BUFFERS) {
314 struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
315 pipebuf->page = kpage;
316 pipebuf->ops = &vbsf_pipe_buf_ops;
317 pipebuf->len = req_size;
318 pipebuf->offset = 0;
319 pipebuf->private = 0;
320 pipebuf->flags = 0;
321 pipe->nrbufs++;
322 nsent += req_size;
323 bytes_remaining -= req_size;
324 if (signal_pending(current))
325 break;
326 } else { /* pipe full */
327
328 if (flags & SPLICE_F_NONBLOCK) {
329 retval = -EAGAIN;
330 goto err;
331 }
332 vbsf_free_pipebuf(kpage);
333 break;
334 }
335 }
336 UNLOCK_PIPE(pipe);
337 if (!nsent && signal_pending(current))
338 return -ERESTARTSYS;
339 *poffset += nsent;
340 return offset - orig_offset;
341
342 err:
343 UNLOCK_PIPE(pipe);
344 vbsf_free_pipebuf(kpage);
345 return retval;
346}
347
348#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
349
350
351/** Companion to vbsf_lock_user_pages(). */
352DECLINLINE(void) vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty)
353{
354 while (cPages-- > 0)
355 {
356 struct page *pPage = papPages[cPages];
357 if (fSetDirty && !PageReserved(pPage))
358 SetPageDirty(pPage);
359#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
360 put_page(pPage);
361#else
362 page_cache_release(pPage);
363#endif
364 }
365}
366
367
368/** Wrapper around get_user_pages. */
369DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages)
370{
371# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
372 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
373 fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
374# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
375 ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
376# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
377 ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages,
378 fWrite, 1 /*force*/, papPages);
379# else
380 struct task_struct *pTask = current;
381 size_t cPagesLocked;
382 down_read(&pTask->mm->mmap_sem);
383 cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
384 up_read(&pTask->mm->mmap_sem);
385# endif
386 if (cPagesLocked == cPages)
387 return 0;
388 if (cPagesLocked < 0)
389 return cPagesLocked;
390
391 vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/);
392
393 /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
394 return -EFAULT;
395}
396
397
398/**
399 * Read function used when accessing files that are memory mapped.
400 *
401 * We read from the page cache here to present the a cohertent picture of the
402 * the file content.
403 */
404static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
405{
406#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
407 struct iovec iov = { .iov_base = buf, .iov_len = size };
408 struct iov_iter iter;
409 struct kiocb kiocb;
410 ssize_t cbRet;
411
412 init_sync_kiocb(&kiocb, file);
413 kiocb.ki_pos = *off;
414 iov_iter_init(&iter, READ, &iov, 1, size);
415
416 cbRet = generic_file_read_iter(&kiocb, &iter);
417
418 *off = kiocb.ki_pos;
419 return cbRet;
420
421#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
422 struct iovec iov = { .iov_base = buf, .iov_len = size };
423 struct kiocb kiocb;
424 ssize_t cbRet;
425
426 init_sync_kiocb(&kiocb, file);
427 kiocb.ki_pos = *off;
428
429 cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
430 if (cbRet == -EIOCBQUEUED)
431 cbRet = wait_on_sync_kiocb(&kiocb);
432
433 *off = kiocb.ki_pos;
434 return cbRet;
435
436#else /* 2.6.18 or earlier: */
437 return generic_file_read(file, buf, size, off);
438#endif
439}
440
441
442/**
443 * Fallback case of vbsf_reg_read() that locks the user buffers and let the host
444 * write directly to them.
445 */
446static ssize_t vbsf_reg_read_fallback(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
447 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
448{
449 /*
450 * Lock pages and execute the read, taking care not to pass the host
451 * more than it can handle in one go or more than we care to allocate
452 * page arrays for. The latter limit is set at just short of 32KB due
453 * to how the physical heap works.
454 */
455 struct page *apPagesStack[16];
456 struct page **papPages = &apPagesStack[0];
457 struct page **papPagesFree = NULL;
458 VBOXSFREADPGLSTREQ *pReq;
459 loff_t offFile = *off;
460 ssize_t cbRet = -ENOMEM;
461 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
462 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
463
464 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
465 while (!pReq && cMaxPages > 4) {
466 cMaxPages /= 2;
467 pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
468 }
469 if (pReq && cPages > RT_ELEMENTS(apPagesStack))
470 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
471 if (pReq && papPages) {
472 cbRet = 0;
473 for (;;) {
474 /*
475 * Figure out how much to process now and lock the user pages.
476 */
477 int rc;
478 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
479 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
480 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
481 if (cPages <= cMaxPages)
482 cbChunk = size;
483 else {
484 cPages = cMaxPages;
485 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
486 }
487
488 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages);
489 if (rc == 0) {
490 size_t iPage = cPages;
491 while (iPage-- > 0)
492 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
493 } else {
494 cbRet = rc;
495 break;
496 }
497
498 /*
499 * Issue the request and unlock the pages.
500 */
501 rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
502
503 vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/);
504
505 if (RT_SUCCESS(rc)) {
506 /*
507 * Success, advance position and buffer.
508 */
509 uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
510 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
511 cbRet += cbActual;
512 offFile += cbActual;
513 buf = (uint8_t *)buf + cbActual;
514 size -= cbActual;
515
516 /*
517 * Are we done already? If so commit the new file offset.
518 */
519 if (!size || cbActual < cbChunk) {
520 *off = offFile;
521 break;
522 }
523 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
524 /*
525 * The host probably doesn't have enough heap to handle the
526 * request, reduce the page count and retry.
527 */
528 cMaxPages /= 4;
529 Assert(cMaxPages > 0);
530 } else {
531 /*
532 * If we've successfully read stuff, return it rather than
533 * the error. (Not sure if this is such a great idea...)
534 */
535 if (cbRet > 0)
536 *off = offFile;
537 else
538 cbRet = -EPROTO;
539 break;
540 }
541 }
542 }
543 if (papPagesFree)
544 kfree(papPages);
545 if (pReq)
546 VbglR0PhysHeapFree(pReq);
547 return cbRet;
548}
549
550
551/**
552 * Read from a regular file.
553 *
554 * @param file the file
555 * @param buf the buffer
556 * @param size length of the buffer
557 * @param off offset within the file (in/out).
558 * @returns the number of read bytes on success, Linux error code otherwise
559 */
560static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
561{
562 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
563 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
564 struct vbsf_reg_info *sf_r = file->private_data;
565 struct address_space *mapping = inode->i_mapping;
566
567 SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
568
569 if (!S_ISREG(inode->i_mode)) {
570 LogFunc(("read from non regular file %d\n", inode->i_mode));
571 return -EINVAL;
572 }
573
574 /** @todo XXX Check read permission according to inode->i_mode! */
575
576 if (!size)
577 return 0;
578
579 /*
580 * If there is a mapping and O_DIRECT isn't in effect, we must at a
581 * heed dirty pages in the mapping and read from them. For simplicity
582 * though, we just do page cache reading when there are writable
583 * mappings around with any kind of pages loaded.
584 */
585 if ( mapping
586 && mapping->nrpages > 0
587 && mapping_writably_mapped(mapping)
588 && !(file->f_flags & O_DIRECT)
589 && 1 /** @todo make this behaviour configurable */ )
590 return vbsf_reg_read_mapped(file, buf, size, off);
591
592 /*
593 * For small requests, try use an embedded buffer provided we get a heap block
594 * that does not cross page boundraries (see host code).
595 */
596 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
597 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
598 VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
599 if ( pReq
600 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
601 ssize_t cbRet;
602 int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
603 if (RT_SUCCESS(vrc)) {
604 cbRet = pReq->Parms.cb32Read.u.value32;
605 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
606 if (copy_to_user(buf, pReq->abData, cbRet) == 0)
607 *off += cbRet;
608 else
609 cbRet = -EFAULT;
610 } else
611 cbRet = -EPROTO;
612 VbglR0PhysHeapFree(pReq);
613 return cbRet;
614 }
615 if (pReq)
616 VbglR0PhysHeapFree(pReq);
617 }
618
619#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
620 /*
621 * For medium sized requests try use a bounce buffer.
622 */
623 if (size <= _64K /** @todo make this configurable? */) {
624 void *pvBounce = kmalloc(size, GFP_KERNEL);
625 if (pvBounce) {
626 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
627 if (pReq) {
628 ssize_t cbRet;
629 int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
630 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
631 if (RT_SUCCESS(vrc)) {
632 cbRet = pReq->Parms.cb32Read.u.value32;
633 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
634 if (copy_to_user(buf, pvBounce, cbRet) == 0)
635 *off += cbRet;
636 else
637 cbRet = -EFAULT;
638 } else
639 cbRet = -EPROTO;
640 VbglR0PhysHeapFree(pReq);
641 kfree(pvBounce);
642 return cbRet;
643 }
644 kfree(pvBounce);
645 }
646 }
647#endif
648
649 return vbsf_reg_read_fallback(file, buf, size, off, sf_g, sf_r);
650}
651
652
653/**
654 * Wrapper around invalidate_mapping_pages() for page cache invalidation so that
655 * the changes written via vbsf_reg_write are made visible to mmap users.
656 */
657DECLINLINE(void) vbsf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
658{
659 /*
660 * Only bother with this if the mapping has any pages in it.
661 *
662 * Note! According to the docs, the last parameter, end, is inclusive (we
663 * would have named it 'last' to indicate this).
664 *
665 * Note! The pre-2.6.12 function might not do enough to sure consistency
666 * when any of the pages in the range is already mapped.
667 */
668# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
669 if (mapping)
670 invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
671# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60)
672 if (mapping && mapping->nrpages > 0)
673 invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
674# else
675 /** @todo ... */
676 RT_NOREF(mapping, offStart, offEnd);
677# endif
678}
679
680
681/**
682 * Fallback case of vbsf_reg_write() that locks the user buffers and let the host
683 * write directly to them.
684 */
685static ssize_t vbsf_reg_write_fallback(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
686 struct inode *inode, struct vbsf_inode_info *sf_i,
687 struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r)
688{
689 /*
690 * Lock pages and execute the write, taking care not to pass the host
691 * more than it can handle in one go or more than we care to allocate
692 * page arrays for. The latter limit is set at just short of 32KB due
693 * to how the physical heap works.
694 */
695 struct page *apPagesStack[16];
696 struct page **papPages = &apPagesStack[0];
697 struct page **papPagesFree = NULL;
698 VBOXSFWRITEPGLSTREQ *pReq;
699 ssize_t cbRet = -ENOMEM;
700 size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
701 size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
702
703 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
704 while (!pReq && cMaxPages > 4) {
705 cMaxPages /= 2;
706 pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
707 }
708 if (pReq && cPages > RT_ELEMENTS(apPagesStack))
709 papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
710 if (pReq && papPages) {
711 cbRet = 0;
712 for (;;) {
713 /*
714 * Figure out how much to process now and lock the user pages.
715 */
716 int rc;
717 size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
718 pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
719 cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
720 if (cPages <= cMaxPages)
721 cbChunk = size;
722 else {
723 cPages = cMaxPages;
724 cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
725 }
726
727 rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages);
728 if (rc == 0) {
729 size_t iPage = cPages;
730 while (iPage-- > 0)
731 pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
732 } else {
733 cbRet = rc;
734 break;
735 }
736
737 /*
738 * Issue the request and unlock the pages.
739 */
740 rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
741
742 vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/);
743
744 if (RT_SUCCESS(rc)) {
745 /*
746 * Success, advance position and buffer.
747 */
748 uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
749 AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
750 cbRet += cbActual;
751 offFile += cbActual;
752 buf = (uint8_t *)buf + cbActual;
753 size -= cbActual;
754 if (offFile > i_size_read(inode))
755 i_size_write(inode, offFile);
756 vbsf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
757
758 /*
759 * Are we done already? If so commit the new file offset.
760 */
761 if (!size || cbActual < cbChunk) {
762 *off = offFile;
763 break;
764 }
765 } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
766 /*
767 * The host probably doesn't have enough heap to handle the
768 * request, reduce the page count and retry.
769 */
770 cMaxPages /= 4;
771 Assert(cMaxPages > 0);
772 } else {
773 /*
774 * If we've successfully written stuff, return it rather than
775 * the error. (Not sure if this is such a great idea...)
776 */
777 if (cbRet > 0)
778 *off = offFile;
779 else
780 cbRet = -EPROTO;
781 break;
782 }
783 sf_i->force_restat = 1; /* mtime (and size) may have changed */
784 }
785 }
786 if (papPagesFree)
787 kfree(papPages);
788 if (pReq)
789 VbglR0PhysHeapFree(pReq);
790 return cbRet;
791}
792
793
794/**
795 * Write to a regular file.
796 *
797 * @param file the file
798 * @param buf the buffer
799 * @param size length of the buffer
800 * @param off offset within the file
801 * @returns the number of written bytes on success, Linux error code otherwise
802 */
803static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off)
804{
805 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
806 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
807 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
808 struct vbsf_reg_info *sf_r = file->private_data;
809 struct address_space *mapping = inode->i_mapping;
810 loff_t pos;
811
812 SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
813 BUG_ON(!sf_i);
814 BUG_ON(!sf_g);
815 BUG_ON(!sf_r);
816
817 if (!S_ISREG(inode->i_mode)) {
818 LogFunc(("write to non regular file %d\n", inode->i_mode));
819 return -EINVAL;
820 }
821
822 pos = *off;
823 /** @todo This should be handled by the host, it returning the new file
824 * offset when appending. We may have an outdated i_size value here! */
825 if (file->f_flags & O_APPEND)
826 pos = i_size_read(inode);
827
828 /** @todo XXX Check write permission according to inode->i_mode! */
829
830 if (!size) {
831 if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
832 *off = pos;
833 return 0;
834 }
835
836 /*
837 * If there are active writable mappings, coordinate with any
838 * pending writes via those.
839 */
840 if ( mapping
841 && mapping->nrpages > 0
842 && mapping_writably_mapped(mapping)) {
843#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
844 int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
845 if (err)
846 return err;
847#else
848 /** @todo ... */
849#endif
850 }
851
852 /*
853 * For small requests, try use an embedded buffer provided we get a heap block
854 * that does not cross page boundraries (see host code).
855 */
856 if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
857 uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
858 VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
859 if ( pReq
860 && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
861 ssize_t cbRet;
862 if (copy_from_user(pReq->abData, buf, size) == 0) {
863 int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
864 pos, (uint32_t)size);
865 if (RT_SUCCESS(vrc)) {
866 cbRet = pReq->Parms.cb32Write.u.value32;
867 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
868 pos += cbRet;
869 *off = pos;
870 if (pos > i_size_read(inode))
871 i_size_write(inode, pos);
872 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
873 } else
874 cbRet = -EPROTO;
875 sf_i->force_restat = 1; /* mtime (and size) may have changed */
876 } else
877 cbRet = -EFAULT;
878
879 VbglR0PhysHeapFree(pReq);
880 return cbRet;
881 }
882 if (pReq)
883 VbglR0PhysHeapFree(pReq);
884 }
885
886#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
887 /*
888 * For medium sized requests try use a bounce buffer.
889 */
890 if (size <= _64K /** @todo make this configurable? */) {
891 void *pvBounce = kmalloc(size, GFP_KERNEL);
892 if (pvBounce) {
893 if (copy_from_user(pvBounce, buf, size) == 0) {
894 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
895 if (pReq) {
896 ssize_t cbRet;
897 int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
898 (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
899 if (RT_SUCCESS(vrc)) {
900 cbRet = pReq->Parms.cb32Write.u.value32;
901 AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
902 pos += cbRet;
903 *off = pos;
904 if (pos > i_size_read(inode))
905 i_size_write(inode, pos);
906 vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
907 } else
908 cbRet = -EPROTO;
909 sf_i->force_restat = 1; /* mtime (and size) may have changed */
910 VbglR0PhysHeapFree(pReq);
911 kfree(pvBounce);
912 return cbRet;
913 }
914 kfree(pvBounce);
915 } else {
916 kfree(pvBounce);
917 return -EFAULT;
918 }
919 }
920 }
921#endif
922
923 return vbsf_reg_write_fallback(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
924}
925
926
927/**
928 * Open a regular file.
929 *
930 * @param inode the inode
931 * @param file the file
932 * @returns 0 on success, Linux error code otherwise
933 */
934static int vbsf_reg_open(struct inode *inode, struct file *file)
935{
936 int rc, rc_linux = 0;
937 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
938 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
939 struct vbsf_reg_info *sf_r;
940 struct dentry *dentry = VBSF_GET_F_DENTRY(file);
941 VBOXSFCREATEREQ *pReq;
942
943 SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
944 BUG_ON(!sf_g);
945 BUG_ON(!sf_i);
946
947 sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
948 if (!sf_r) {
949 LogRelFunc(("could not allocate reg info\n"));
950 return -ENOMEM;
951 }
952
953 RTListInit(&sf_r->Handle.Entry);
954 sf_r->Handle.cRefs = 1;
955 sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC;
956 sf_r->Handle.hHost = SHFL_HANDLE_NIL;
957
958 /* Already open? */
959 if (sf_i->handle != SHFL_HANDLE_NIL) {
960 /*
961 * This inode was created with vbsf_create_worker(). Check the CreateFlags:
962 * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
963 * about the access flags (SHFL_CF_ACCESS_*).
964 */
965 sf_i->force_restat = 1;
966 sf_r->Handle.hHost = sf_i->handle;
967 sf_i->handle = SHFL_HANDLE_NIL;
968 file->private_data = sf_r;
969
970 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; /** @todo fix */
971 vbsf_handle_append(sf_i, &sf_r->Handle);
972 SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
973 return 0;
974 }
975
976 pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
977 if (!pReq) {
978 kfree(sf_r);
979 LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
980 return -ENOMEM;
981 }
982 memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
983 RT_ZERO(pReq->CreateParms);
984 pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
985
986 /* We check the value of pReq->CreateParms.Handle afterwards to
987 * find out if the call succeeded or failed, as the API does not seem
988 * to cleanly distinguish error and informational messages.
989 *
990 * Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL
991 * to make the shared folders host service use our fMode parameter */
992
993 if (file->f_flags & O_CREAT) {
994 LogFunc(("O_CREAT set\n"));
995 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
996 /* We ignore O_EXCL, as the Linux kernel seems to call create
997 beforehand itself, so O_EXCL should always fail. */
998 if (file->f_flags & O_TRUNC) {
999 LogFunc(("O_TRUNC set\n"));
1000 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1001 } else
1002 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
1003 } else {
1004 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
1005 if (file->f_flags & O_TRUNC) {
1006 LogFunc(("O_TRUNC set\n"));
1007 pReq->CreateParms.CreateFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1008 }
1009 }
1010
1011 switch (file->f_flags & O_ACCMODE) {
1012 case O_RDONLY:
1013 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_READ;
1014 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ;
1015 break;
1016
1017 case O_WRONLY:
1018 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_WRITE;
1019 sf_r->Handle.fFlags |= VBSF_HANDLE_F_WRITE;
1020 break;
1021
1022 case O_RDWR:
1023 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_READWRITE;
1024 sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE;
1025 break;
1026
1027 default:
1028 BUG();
1029 }
1030
1031 if (file->f_flags & O_APPEND) {
1032 LogFunc(("O_APPEND set\n"));
1033 pReq->CreateParms.CreateFlags |= SHFL_CF_ACCESS_APPEND;
1034 sf_r->Handle.fFlags |= VBSF_HANDLE_F_APPEND;
1035 }
1036
1037 pReq->CreateParms.Info.Attr.fMode = inode->i_mode;
1038 LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n",
1039 sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags));
1040 rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
1041 if (RT_FAILURE(rc)) {
1042 LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc));
1043 kfree(sf_r);
1044 VbglR0PhysHeapFree(pReq);
1045 return -RTErrConvertToErrno(rc);
1046 }
1047
1048 if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) {
1049 vbsf_dentry_chain_increase_ttl(dentry);
1050 rc_linux = 0;
1051 } else {
1052 switch (pReq->CreateParms.Result) {
1053 case SHFL_PATH_NOT_FOUND:
1054 rc_linux = -ENOENT;
1055 break;
1056 case SHFL_FILE_NOT_FOUND:
1057 /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
1058 rc_linux = -ENOENT;
1059 break;
1060 case SHFL_FILE_EXISTS:
1061 vbsf_dentry_chain_increase_ttl(dentry);
1062 rc_linux = -EEXIST;
1063 break;
1064 default:
1065 vbsf_dentry_chain_increase_parent_ttl(dentry);
1066 rc_linux = 0;
1067 break;
1068 }
1069 }
1070
1071 sf_i->force_restat = 1; /** @todo Why?!? */
1072 sf_r->Handle.hHost = pReq->CreateParms.Handle;
1073 file->private_data = sf_r;
1074 vbsf_handle_append(sf_i, &sf_r->Handle);
1075 VbglR0PhysHeapFree(pReq);
1076 SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1077 return rc_linux;
1078}
1079
1080
1081/**
1082 * Close a regular file.
1083 *
1084 * @param inode the inode
1085 * @param file the file
1086 * @returns 0 on success, Linux error code otherwise
1087 */
1088static int vbsf_reg_release(struct inode *inode, struct file *file)
1089{
1090 struct vbsf_reg_info *sf_r;
1091 struct vbsf_super_info *sf_g;
1092 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1093
1094 SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
1095 sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1096 sf_r = file->private_data;
1097
1098 BUG_ON(!sf_g);
1099 BUG_ON(!sf_r);
1100
1101#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
1102 /* See the smbfs source (file.c). mmap in particular can cause data to be
1103 * written to the file after it is closed, which we can't cope with. We
1104 * copy and paste the body of filemap_write_and_wait() here as it was not
1105 * defined before 2.6.6 and not exported until quite a bit later. */
1106 /* filemap_write_and_wait(inode->i_mapping); */
1107 if (inode->i_mapping->nrpages
1108 && filemap_fdatawrite(inode->i_mapping) != -EIO)
1109 filemap_fdatawait(inode->i_mapping);
1110#endif
1111
1112 /* Release sf_r, closing the handle if we're the last user. */
1113 file->private_data = NULL;
1114 vbsf_handle_release(&sf_r->Handle, sf_g, "vbsf_reg_release");
1115
1116 sf_i->handle = SHFL_HANDLE_NIL;
1117 return 0;
1118}
1119
1120/**
1121 * Wrapper around generic/default seek function that ensures that we've got
1122 * the up-to-date file size when doing anything relative to EOF.
1123 *
1124 * The issue is that the host may extend the file while we weren't looking and
1125 * if the caller wishes to append data, it may end up overwriting existing data
1126 * if we operate with a stale size. So, we always retrieve the file size on EOF
1127 * relative seeks.
1128 */
1129static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
1130{
1131 SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
1132
1133 switch (whence) {
1134#ifdef SEEK_HOLE
1135 case SEEK_HOLE:
1136 case SEEK_DATA:
1137#endif
1138 case SEEK_END: {
1139 struct vbsf_reg_info *sf_r = file->private_data;
1140 int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost, true /*fForce*/,
1141 false /*fInodeLocked*/);
1142 if (rc == 0)
1143 break;
1144 return rc;
1145 }
1146 }
1147
1148#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
1149 return generic_file_llseek(file, off, whence);
1150#else
1151 return default_llseek(file, off, whence);
1152#endif
1153}
1154
1155/**
1156 * Flush region of file - chiefly mmap/msync.
1157 *
1158 * We cannot use the noop_fsync / simple_sync_file here as that means
1159 * msync(,,MS_SYNC) will return before the data hits the host, thereby
1160 * causing coherency issues with O_DIRECT access to the same file as
1161 * well as any host interaction with the file.
1162 */
1163#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
1164static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1165{
1166# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1167 return __generic_file_fsync(file, start, end, datasync);
1168# else
1169 return generic_file_fsync(file, start, end, datasync);
1170# endif
1171}
1172#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
1173static int vbsf_reg_fsync(struct file *file, int datasync)
1174{
1175 return generic_file_fsync(file, datasync);
1176}
1177#else /* < 2.6.35 */
1178static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
1179{
1180# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
1181 return simple_fsync(file, dentry, datasync);
1182# else
1183 int rc;
1184 struct inode *inode = dentry->d_inode;
1185 AssertReturn(inode, -EINVAL);
1186
1187 /** @todo What about file_fsync()? (<= 2.5.11) */
1188
1189# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1190 rc = sync_mapping_buffers(inode->i_mapping);
1191 if ( rc == 0
1192 && (inode->i_state & I_DIRTY)
1193 && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
1194 ) {
1195 struct writeback_control wbc = {
1196 .sync_mode = WB_SYNC_ALL,
1197 .nr_to_write = 0
1198 };
1199 rc = sync_inode(inode, &wbc);
1200 }
1201# else /* < 2.5.12 */
1202 rc = fsync_inode_buffers(inode);
1203# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1204 rc |= fsync_inode_data_buffers(inode);
1205# endif
1206 /** @todo probably need to do more here... */
1207# endif /* < 2.5.12 */
1208 return rc;
1209# endif
1210}
1211#endif /* < 2.6.35 */
1212
1213
1214/**
1215 * File operations for regular files.
1216 */
1217struct file_operations vbsf_reg_fops = {
1218 .read = vbsf_reg_read,
1219 .open = vbsf_reg_open,
1220 .write = vbsf_reg_write,
1221 .release = vbsf_reg_release,
1222 .mmap = generic_file_mmap,
1223#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1224# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
1225/** @todo This code is known to cause caching of data which should not be
1226 * cached. Investigate. */
1227# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
1228 .splice_read = vbsf_splice_read,
1229# else
1230 .sendfile = generic_file_sendfile,
1231# endif
1232 .aio_read = generic_file_aio_read,
1233 .aio_write = generic_file_aio_write,
1234# endif
1235#endif
1236 .llseek = vbsf_reg_llseek,
1237 .fsync = vbsf_reg_fsync,
1238};
1239
1240struct inode_operations vbsf_reg_iops = {
1241#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
1242 .revalidate = vbsf_inode_revalidate
1243#else
1244 .getattr = vbsf_inode_getattr,
1245 .setattr = vbsf_inode_setattr
1246#endif
1247};
1248
1249
1250#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1251
1252/**
1253 * Used to read the content of a page into the page cache.
1254 *
1255 * Needed for mmap and reads+writes when the file is mmapped in a
1256 * shared+writeable fashion.
1257 */
1258static int vbsf_readpage(struct file *file, struct page *page)
1259{
1260 struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
1261 int err;
1262
1263 SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
1264
1265 if (!is_bad_inode(inode)) {
1266 VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1267 if (pReq) {
1268 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1269 struct vbsf_reg_info *sf_r = file->private_data;
1270 uint32_t cbRead;
1271 int vrc;
1272
1273 pReq->PgLst.offFirstPage = 0;
1274 pReq->PgLst.aPages[0] = page_to_phys(page);
1275 vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
1276 pReq,
1277 sf_r->Handle.hHost,
1278 (uint64_t)page->index << PAGE_SHIFT,
1279 PAGE_SIZE,
1280 1 /*cPages*/);
1281
1282 cbRead = pReq->Parms.cb32Read.u.value32;
1283 AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
1284 VbglR0PhysHeapFree(pReq);
1285
1286 if (RT_SUCCESS(vrc)) {
1287 if (cbRead == PAGE_SIZE) {
1288 /* likely */
1289 } else {
1290 uint8_t *pbMapped = (uint8_t *)kmap(page);
1291 RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
1292 kunmap(page);
1293 /** @todo truncate the inode file size? */
1294 }
1295
1296 flush_dcache_page(page);
1297 SetPageUptodate(page);
1298 err = 0;
1299 } else
1300 err = -EPROTO;
1301 } else
1302 err = -ENOMEM;
1303 } else
1304 err = -EIO;
1305 unlock_page(page);
1306 return err;
1307}
1308
1309
1310/**
1311 * Used to write out the content of a dirty page cache page to the host file.
1312 *
1313 * Needed for mmap and writes when the file is mmapped in a shared+writeable
1314 * fashion.
1315 */
1316static int vbsf_writepage(struct page *page, struct writeback_control *wbc)
1317{
1318 struct address_space *mapping = page->mapping;
1319 struct inode *inode = mapping->host;
1320 struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1321 struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND);
1322 int err;
1323
1324 SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
1325 inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
1326
1327 if (pHandle) {
1328 struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1329 VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
1330 if (pReq) {
1331 uint64_t const cbFile = i_size_read(inode);
1332 uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
1333 uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
1334 : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
1335 int vrc;
1336
1337 pReq->PgLst.offFirstPage = 0;
1338 pReq->PgLst.aPages[0] = page_to_phys(page);
1339 vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
1340 pReq,
1341 pHandle->hHost,
1342 offInFile,
1343 cbToWrite,
1344 1 /*cPages*/);
1345 AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
1346 ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
1347 vrc = VERR_WRITE_ERROR);
1348 VbglR0PhysHeapFree(pReq);
1349
1350 if (RT_SUCCESS(vrc)) {
1351 /* Update the inode if we've extended the file. */
1352 /** @todo is this necessary given the cbToWrite calc above? */
1353 uint64_t const offEndOfWrite = offInFile + cbToWrite;
1354 if ( offEndOfWrite > cbFile
1355 && offEndOfWrite > i_size_read(inode))
1356 i_size_write(inode, offEndOfWrite);
1357
1358 if (PageError(page))
1359 ClearPageError(page);
1360
1361 err = 0;
1362 } else {
1363 ClearPageUptodate(page);
1364 err = -EPROTO;
1365 }
1366 } else
1367 err = -ENOMEM;
1368 vbsf_handle_release(pHandle, sf_g, "vbsf_writepage");
1369 } else {
1370 static uint64_t volatile s_cCalls = 0;
1371 if (s_cCalls++ < 16)
1372 printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
1373 err = -EPROTO;
1374 }
1375 unlock_page(page);
1376 return err;
1377}
1378
1379# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1380/**
1381 * Called when writing thru the page cache (which we shouldn't be doing).
1382 */
1383int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
1384 unsigned len, unsigned flags, struct page **pagep, void **fsdata)
1385{
1386 /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
1387 * the page cache for any writes AFAIK. We could just as well use
1388 * simple_write_begin & simple_write_end here if we think we really
1389 * need to have non-NULL function pointers in the table... */
1390 static uint64_t volatile s_cCalls = 0;
1391 if (s_cCalls++ < 16) {
1392 printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1393 (unsigned long long)pos, len, flags);
1394 RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1395 (unsigned long long)pos, len, flags);
1396# ifdef WARN_ON
1397 WARN_ON(1);
1398# endif
1399 }
1400 return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
1401}
1402# endif /* KERNEL_VERSION >= 2.6.24 */
1403
1404# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1405/**
1406 * This is needed to make open accept O_DIRECT as well as dealing with direct
1407 * I/O requests if we don't intercept them earlier.
1408 */
1409# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
1410static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1411# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1412static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
1413# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1414static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
1415# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
1416static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1417# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
1418static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1419# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
1420static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1421# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
1422static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1423# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
1424static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
1425# else
1426static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *, unsigned long, int)
1427# endif
1428{
1429 TRACE();
1430 return -EINVAL;
1431}
1432# endif
1433
1434/**
1435 * Address space (for the page cache) operations for regular files.
1436 */
1437struct address_space_operations vbsf_reg_aops = {
1438 .readpage = vbsf_readpage,
1439 .writepage = vbsf_writepage,
1440 /** @todo Need .writepages if we want msync performance... */
1441# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1442 .set_page_dirty = __set_page_dirty_buffers,
1443# endif
1444# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1445 .write_begin = vbsf_write_begin,
1446 .write_end = simple_write_end,
1447# else
1448 .prepare_write = simple_prepare_write,
1449 .commit_write = simple_commit_write,
1450# endif
1451# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1452 .direct_IO = vbsf_direct_IO,
1453# endif
1454};
1455
1456#endif /* LINUX_VERSION_CODE >= 2.6.0 */
1457
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette