regops.c@ 77549

Last change on this file since 77549 was 77549, checked in by vboxsync, 6 years ago
linux/vboxsf: Workaround for kernel_read/write calls (get_user_pages fails), fixing issue with sporadic ENOEXEC from execve(). The workaround requires rtR0MemObjLinuxVirtToPage() from memobj-r0drv-linux.c. Various cleanups and adjustments. bugref:9172
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 56.4 KB

Line
1	/* $Id: regops.c 77549 2019-03-04 10:00:34Z vboxsync $ */
2	/** @file
3	* vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4	*/
5
6	/*
7	* Copyright (C) 2006-2019 Oracle Corporation
8	*
9	* Permission is hereby granted, free of charge, to any person
10	* obtaining a copy of this software and associated documentation
11	* files (the "Software"), to deal in the Software without
12	* restriction, including without limitation the rights to use,
13	* copy, modify, merge, publish, distribute, sublicense, and/or sell
14	* copies of the Software, and to permit persons to whom the
15	* Software is furnished to do so, subject to the following
16	* conditions:
17	*
18	* The above copyright notice and this permission notice shall be
19	* included in all copies or substantial portions of the Software.
20	*
21	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23	* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25	* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26	* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28	* OTHER DEALINGS IN THE SOFTWARE.
29	*/
30
31	#include "vfsmod.h"
32	#include <linux/uio.h>
33	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
34	# include <linux/aio.h> /* struct kiocb before 4.1 */
35	#endif
36	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
37	# include <linux/buffer_head.h>
38	#endif
39	#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
40	&& LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41	# include <linux/writeback.h>
42	#endif
43	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
44	&& LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
45	# include <linux/splice.h>
46	#endif
47	#include <iprt/err.h>
48
49	#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
50	# define SEEK_END 2
51	#endif
52
53
54	/**
55	* Called when an inode is released to unlink all handles that might impossibly
56	* still be associated with it.
57	*
58	* @param pInodeInfo The inode which handles to drop.
59	*/
60	void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
61	{
62	struct vbsf_handle pCur, pNext;
63	unsigned long fSavedFlags;
64	SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
65	spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
66
67	RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) {
68	AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK \| VBSF_HANDLE_F_ON_LIST))
69	== (VBSF_HANDLE_F_MAGIC \| VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
70	pCur->fFlags \|= VBSF_HANDLE_F_ON_LIST;
71	RTListNodeRemove(&pCur->Entry);
72	}
73
74	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
75	}
76
77
78	/**
79	* Locates a handle that matches all the flags in @a fFlags.
80	*
81	* @returns Pointer to handle on success (retained), use vbsf_handle_release() to
82	* release it. NULL if no suitable handle was found.
83	* @param pInodeInfo The inode info to search.
84	* @param fFlagsSet The flags that must be set.
85	* @param fFlagsClear The flags that must be clear.
86	*/
87	struct vbsf_handle vbsf_handle_find(struct vbsf_inode_info pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
88	{
89	struct vbsf_handle *pCur;
90	unsigned long fSavedFlags;
91	spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
92
93	RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
94	AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK \| VBSF_HANDLE_F_ON_LIST))
95	== (VBSF_HANDLE_F_MAGIC \| VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
96	if ((pCur->fFlags & (fFlagsSet \| fFlagsClear)) == fFlagsSet) {
97	uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
98	if (cRefs > 1) {
99	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
100	SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
101	return pCur;
102	}
103	/* Oops, already being closed (safe as it's only ever increased here). */
104	ASMAtomicDecU32(&pCur->cRefs);
105	}
106	}
107
108	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
109	SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
110	return NULL;
111	}
112
113
114	/**
115	* Slow worker for vbsf_handle_release() that does the freeing.
116	*
117	* @returns 0 (ref count).
118	* @param pHandle The handle to release.
119	* @param sf_g The info structure for the shared folder associated
120	* with the handle.
121	* @param pszCaller The caller name (for logging failures).
122	*/
123	uint32_t vbsf_handle_release_slow(struct vbsf_handle pHandle, struct vbsf_super_info sf_g, const char *pszCaller)
124	{
125	int rc;
126	unsigned long fSavedFlags;
127
128	SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
129
130	/*
131	* Remove from the list.
132	*/
133	spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
134
135	AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
136	Assert(pHandle->pInodeInfo);
137	Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
138
139	if (pHandle->fFlags & VBSF_HANDLE_F_ON_LIST) {
140	pHandle->fFlags &= ~VBSF_HANDLE_F_ON_LIST;
141	RTListNodeRemove(&pHandle->Entry);
142	}
143
144	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
145
146	/*
147	* Actually destroy it.
148	*/
149	rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
150	if (RT_FAILURE(rc))
151	LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
152	pHandle->hHost = SHFL_HANDLE_NIL;
153	pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD;
154	kfree(pHandle);
155	return 0;
156	}
157
158
159	/**
160	* Appends a handle to a handle list.
161	*
162	* @param pInodeInfo The inode to add it to.
163	* @param pHandle The handle to add.
164	*/
165	void vbsf_handle_append(struct vbsf_inode_info pInodeInfo, struct vbsf_handle pHandle)
166	{
167	#ifdef VBOX_STRICT
168	struct vbsf_handle *pCur;
169	#endif
170	unsigned long fSavedFlags;
171
172	SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
173	AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK \| VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
174	("%p %#x\n", pHandle, pHandle->fFlags));
175	Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
176
177	spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
178
179	AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK \| VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
180	("%p %#x\n", pHandle, pHandle->fFlags));
181	#ifdef VBOX_STRICT
182	RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
183	Assert(pCur != pHandle);
184	AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK \| VBSF_HANDLE_F_ON_LIST))
185	== (VBSF_HANDLE_F_MAGIC \| VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
186	}
187	pHandle->pInodeInfo = pInodeInfo;
188	#endif
189
190	pHandle->fFlags \|= VBSF_HANDLE_F_ON_LIST;
191	RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
192
193	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
194	}
195
196
197	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
198	&& LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
199
200	/*
201	* Some pipe stuff we apparently need for 2.6.23-2.6.30.
202	*/
203
204	static void vbsf_free_pipebuf(struct page *kpage)
205	{
206	kunmap(kpage);
207	__free_pages(kpage, 0);
208	}
209
210	static void vbsf_pipe_buf_map(struct pipe_inode_info pipe, struct pipe_buffer *pipe_buf, int atomic)
211	{
212	return 0;
213	}
214
215	static void vbsf_pipe_buf_get(struct pipe_inode_info pipe, struct pipe_buffer pipe_buf)
216	{
217	}
218
219	static void vbsf_pipe_buf_unmap(struct pipe_inode_info pipe, struct pipe_buffer pipe_buf, void *map_data)
220	{
221	}
222
223	static int vbsf_pipe_buf_steal(struct pipe_inode_info pipe, struct pipe_buffer pipe_buf)
224	{
225	return 0;
226	}
227
228	static void vbsf_pipe_buf_release(struct pipe_inode_info pipe, struct pipe_buffer pipe_buf)
229	{
230	vbsf_free_pipebuf(pipe_buf->page);
231	}
232
233	static int vbsf_pipe_buf_confirm(struct pipe_inode_info info, struct pipe_buffer pipe_buf)
234	{
235	return 0;
236	}
237
238	static struct pipe_buf_operations vbsf_pipe_buf_ops = {
239	.can_merge = 0,
240	.map = vbsf_pipe_buf_map,
241	.unmap = vbsf_pipe_buf_unmap,
242	.confirm = vbsf_pipe_buf_confirm,
243	.release = vbsf_pipe_buf_release,
244	.steal = vbsf_pipe_buf_steal,
245	.get = vbsf_pipe_buf_get,
246	};
247
248	static int vbsf_reg_read_aux(const char caller, struct vbsf_super_info sf_g, struct vbsf_reg_info *sf_r,
249	void buf, uint32_t nread, uint64_t pos)
250	{
251	int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
252	if (RT_FAILURE(rc)) {
253	LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
254	rc));
255	return -EPROTO;
256	}
257	return 0;
258	}
259
260	# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
261	# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
262
263	ssize_t vbsf_splice_read(struct file in, loff_t poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
264	{
265	size_t bytes_remaining = len;
266	loff_t orig_offset = *poffset;
267	loff_t offset = orig_offset;
268	struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
269	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
270	struct vbsf_reg_info *sf_r = in->private_data;
271	ssize_t retval;
272	struct page *kpage = 0;
273	size_t nsent = 0;
274
275	/** @todo rig up a FsPerf test for this code */
276	TRACE();
277	if (!S_ISREG(inode->i_mode)) {
278	LogFunc(("read from non regular file %d\n", inode->i_mode));
279	return -EINVAL;
280	}
281	if (!len) {
282	return 0;
283	}
284
285	LOCK_PIPE(pipe);
286
287	uint32_t req_size = 0;
288	while (bytes_remaining > 0) {
289	kpage = alloc_page(GFP_KERNEL);
290	if (unlikely(kpage == NULL)) {
291	UNLOCK_PIPE(pipe);
292	return -ENOMEM;
293	}
294	req_size = 0;
295	uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
296	uint32_t chunk = 0;
297	void *kbuf = kmap(kpage);
298	while (chunk < req_size) {
299	retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
300	if (retval < 0)
301	goto err;
302	if (nread == 0)
303	break;
304	chunk += nread;
305	offset += nread;
306	nread = req_size - chunk;
307	}
308	if (!pipe->readers) {
309	send_sig(SIGPIPE, current, 0);
310	retval = -EPIPE;
311	goto err;
312	}
313	if (pipe->nrbufs < PIPE_BUFFERS) {
314	struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
315	pipebuf->page = kpage;
316	pipebuf->ops = &vbsf_pipe_buf_ops;
317	pipebuf->len = req_size;
318	pipebuf->offset = 0;
319	pipebuf->private = 0;
320	pipebuf->flags = 0;
321	pipe->nrbufs++;
322	nsent += req_size;
323	bytes_remaining -= req_size;
324	if (signal_pending(current))
325	break;
326	} else { /* pipe full */
327
328	if (flags & SPLICE_F_NONBLOCK) {
329	retval = -EAGAIN;
330	goto err;
331	}
332	vbsf_free_pipebuf(kpage);
333	break;
334	}
335	}
336	UNLOCK_PIPE(pipe);
337	if (!nsent && signal_pending(current))
338	return -ERESTARTSYS;
339	*poffset += nsent;
340	return offset - orig_offset;
341
342	err:
343	UNLOCK_PIPE(pipe);
344	vbsf_free_pipebuf(kpage);
345	return retval;
346	}
347
348	#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
349
350	/** Wrapper around put_page / page_cache_release. */
351	DECLINLINE(void) vbsf_put_page(struct page *pPage)
352	{
353	#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
354	put_page(pPage);
355	#else
356	page_cache_release(pPage);
357	#endif
358	}
359
360
361	/** Wrapper around get_page / page_cache_get. */
362	DECLINLINE(void) vbsf_get_page(struct page *pPage)
363	{
364	#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
365	get_page(pPage);
366	#else
367	page_cache_get(pPage);
368	#endif
369	}
370
371
372	/** Companion to vbsf_lock_user_pages(). */
373	DECLINLINE(void) vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack)
374	{
375	/* We don't mark kernel pages dirty: */
376	if (fLockPgHack)
377	fSetDirty = false;
378
379	while (cPages-- > 0)
380	{
381	struct page *pPage = papPages[cPages];
382	if (fSetDirty && !PageReserved(pPage))
383	SetPageDirty(pPage);
384	vbsf_put_page(pPage);
385	}
386	}
387
388
389	/**
390	* Catches kernel_read() and kernel_write() calls and works around them.
391	*
392	* The file_operations::read and file_operations::write callbacks supposedly
393	* hands us the user buffers to read into and write out of. To allow the kernel
394	* to read and write without allocating buffers in userland, they kernel_read()
395	* and kernel_write() increases the user space address limit before calling us
396	* so that copyin/copyout won't reject it. Our problem is that get_user_pages()
397	* works on the userspace address space structures and will not be fooled by an
398	* increased addr_limit.
399	*
400	* This code tries to detect this situation and fake get_user_lock() for the
401	* kernel buffer.
402	*/
403	static int vbsf_lock_user_pages_failed_check_kernel(uintptr_t uPtrFrom, size_t cPages, bool fWrite, int rcFailed,
404	struct page *papPages, bool pfLockPgHack)
405	{
406	/*
407	* Check that this is valid user memory that is actually in the kernel range.
408	*/
409	#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
410	if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT)
411	&& uPtrFrom >= USER_DS.seg)
412	#else
413	if ( access_ok(fWrite ? VERIFY_WRITE : VERIFY_READ, (void *)uPtrFrom, cPages << PAGE_SHIFT)
414	&& uPtrFrom >= USER_DS.seg)
415	#endif
416	{
417	uintptr_t const uPtrLast = (uPtrFrom & ~(uintptr_t)PAGE_OFFSET_MASK) + (cPages << PAGE_SHIFT) - 1;
418	uint8_t pbPage = (uint8_t )uPtrLast;
419	size_t iPage = cPages;
420
421	/*
422	* Touch the pages first (paranoia^2).
423	*/
424	if (fWrite) {
425	uint8_t volatile pbProbe = (uint8_t volatile )uPtrFrom;
426	while (iPage-- > 0) {
427	pbProbe = pbProbe;
428	pbProbe += PAGE_SIZE;
429	}
430	} else {
431	uint8_t const pbProbe = (uint8_t const )uPtrFrom;
432	while (iPage-- > 0) {
433	ASMProbeReadByte(pbProbe);
434	pbProbe += PAGE_SIZE;
435	}
436	}
437
438	/*
439	* Get the pages.
440	* Note! Fixes here probably applies to rtR0MemObjNativeLockKernel as well.
441	*/
442	iPage = cPages;
443	if ( uPtrFrom >= (unsigned long)__va(0)
444	&& uPtrLast < (unsigned long)high_memory)
445	{
446	/* The physical page mapping area: */
447	while (iPage-- > 0)
448	{
449	struct page *pPage = papPages[iPage] = virt_to_page(pbPage);
450	vbsf_get_page(pPage);
451	pbPage -= PAGE_SIZE;
452	}
453	}
454	else
455	{
456	/* This is vmalloc or some such thing, so go thru page tables: */
457	while (iPage-- > 0)
458	{
459	struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
460	if (pPage) {
461	papPages[iPage] = pPage;
462	vbsf_get_page(pPage);
463	pbPage -= PAGE_SIZE;
464	} else {
465	while (++iPage < cPages) {
466	pPage = papPages[iPage];
467	vbsf_put_page(pPage);
468	}
469	return rcFailed;
470	}
471	}
472	}
473	*pfLockPgHack = true;
474	return 0;
475	}
476
477	return rcFailed;
478	}
479
480
481	/** Wrapper around get_user_pages. */
482	DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page *papPages, bool pfLockPgHack)
483	{
484	# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
485	ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
486	fWrite ? FOLL_WRITE \| FOLL_FORCE : FOLL_FORCE);
487	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
488	ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /force/, papPages);
489	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
490	ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, fWrite, 1 /force/, papPages);
491	# else
492	struct task_struct *pTask = current;
493	size_t cPagesLocked;
494	down_read(&pTask->mm->mmap_sem);
495	cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /force/, papPages, NULL);
496	up_read(&pTask->mm->mmap_sem);
497	# endif
498	*pfLockPgHack = false;
499	if (cPagesLocked == cPages)
500	return 0;
501
502	/*
503	* It failed.
504	*/
505	if (cPagesLocked < 0)
506	return vbsf_lock_user_pages_failed_check_kernel(uPtrFrom, cPages, fWrite, (int)cPagesLocked, papPages, pfLockPgHack);
507
508	vbsf_unlock_user_pages(papPages, cPagesLocked, false /fSetDirty/, false /fLockPgHack/);
509
510	/* We could use uPtrFrom + cPagesLocked to get the correct status here... */
511	return -EFAULT;
512	}
513
514
515	/**
516	* Read function used when accessing files that are memory mapped.
517	*
518	* We read from the page cache here to present the a cohertent picture of the
519	* the file content.
520	*/
521	static ssize_t vbsf_reg_read_mapped(struct file file, char /__user/ buf, size_t size, loff_t *off)
522	{
523	#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
524	struct iovec iov = { .iov_base = buf, .iov_len = size };
525	struct iov_iter iter;
526	struct kiocb kiocb;
527	ssize_t cbRet;
528
529	init_sync_kiocb(&kiocb, file);
530	kiocb.ki_pos = *off;
531	iov_iter_init(&iter, READ, &iov, 1, size);
532
533	cbRet = generic_file_read_iter(&kiocb, &iter);
534
535	*off = kiocb.ki_pos;
536	return cbRet;
537
538	#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
539	struct iovec iov = { .iov_base = buf, .iov_len = size };
540	struct kiocb kiocb;
541	ssize_t cbRet;
542
543	init_sync_kiocb(&kiocb, file);
544	kiocb.ki_pos = *off;
545
546	cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
547	if (cbRet == -EIOCBQUEUED)
548	cbRet = wait_on_sync_kiocb(&kiocb);
549
550	*off = kiocb.ki_pos;
551	return cbRet;
552
553	#else /* 2.6.18 or earlier: */
554	return generic_file_read(file, buf, size, off);
555	#endif
556	}
557
558
559	/**
560	* Fallback case of vbsf_reg_read() that locks the user buffers and let the host
561	* write directly to them.
562	*/
563	static ssize_t vbsf_reg_read_locking(struct file file, char /__user/ buf, size_t size, loff_t *off,
564	struct vbsf_super_info sf_g, struct vbsf_reg_info sf_r)
565	{
566	/*
567	* Lock pages and execute the read, taking care not to pass the host
568	* more than it can handle in one go or more than we care to allocate
569	* page arrays for. The latter limit is set at just short of 32KB due
570	* to how the physical heap works.
571	*/
572	struct page *apPagesStack[16];
573	struct page **papPages = &apPagesStack[0];
574	struct page **papPagesFree = NULL;
575	VBOXSFREADPGLSTREQ *pReq;
576	loff_t offFile = *off;
577	ssize_t cbRet = -ENOMEM;
578	size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
579	size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
580	bool fLockPgHack;
581
582	pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
583	while (!pReq && cMaxPages > 4) {
584	cMaxPages /= 2;
585	pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
586	}
587	if (pReq && cPages > RT_ELEMENTS(apPagesStack))
588	papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
589	if (pReq && papPages) {
590	cbRet = 0;
591	for (;;) {
592	/*
593	* Figure out how much to process now and lock the user pages.
594	*/
595	int rc;
596	size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
597	pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
598	cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
599	if (cPages <= cMaxPages)
600	cbChunk = size;
601	else {
602	cPages = cMaxPages;
603	cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
604	}
605
606	rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /fWrite/, papPages, &fLockPgHack);
607	if (rc == 0) {
608	size_t iPage = cPages;
609	while (iPage-- > 0)
610	pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
611	} else {
612	/** @todo may need fallback here for kernel addresses during exec. sigh. */
613	cbRet = rc;
614	break;
615	}
616
617	/*
618	* Issue the request and unlock the pages.
619	*/
620	rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
621
622	vbsf_unlock_user_pages(papPages, cPages, true /fSetDirty/, fLockPgHack);
623
624	if (RT_SUCCESS(rc)) {
625	/*
626	* Success, advance position and buffer.
627	*/
628	uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
629	AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
630	cbRet += cbActual;
631	offFile += cbActual;
632	buf = (uint8_t *)buf + cbActual;
633	size -= cbActual;
634
635	/*
636	* Are we done already? If so commit the new file offset.
637	*/
638	if (!size \|\| cbActual < cbChunk) {
639	*off = offFile;
640	break;
641	}
642	} else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
643	/*
644	* The host probably doesn't have enough heap to handle the
645	* request, reduce the page count and retry.
646	*/
647	cMaxPages /= 4;
648	Assert(cMaxPages > 0);
649	} else {
650	/*
651	* If we've successfully read stuff, return it rather than
652	* the error. (Not sure if this is such a great idea...)
653	*/
654	if (cbRet > 0)
655	*off = offFile;
656	else
657	cbRet = -EPROTO;
658	break;
659	}
660	}
661	}
662	if (papPagesFree)
663	kfree(papPages);
664	if (pReq)
665	VbglR0PhysHeapFree(pReq);
666	return cbRet;
667	}
668
669
670	/**
671	* Read from a regular file.
672	*
673	* @param file the file
674	* @param buf the buffer
675	* @param size length of the buffer
676	* @param off offset within the file (in/out).
677	* @returns the number of read bytes on success, Linux error code otherwise
678	*/
679	static ssize_t vbsf_reg_read(struct file file, char /__user/ buf, size_t size, loff_t *off)
680	{
681	struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
682	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
683	struct vbsf_reg_info *sf_r = file->private_data;
684	struct address_space *mapping = inode->i_mapping;
685
686	SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
687
688	if (!S_ISREG(inode->i_mode)) {
689	LogFunc(("read from non regular file %d\n", inode->i_mode));
690	return -EINVAL;
691	}
692
693	/** @todo XXX Check read permission according to inode->i_mode! */
694
695	if (!size)
696	return 0;
697
698	/*
699	* If there is a mapping and O_DIRECT isn't in effect, we must at a
700	* heed dirty pages in the mapping and read from them. For simplicity
701	* though, we just do page cache reading when there are writable
702	* mappings around with any kind of pages loaded.
703	*/
704	if ( mapping
705	&& mapping->nrpages > 0
706	&& mapping_writably_mapped(mapping)
707	&& !(file->f_flags & O_DIRECT)
708	&& 1 /** @todo make this behaviour configurable */ )
709	return vbsf_reg_read_mapped(file, buf, size, off);
710
711	/*
712	* For small requests, try use an embedded buffer provided we get a heap block
713	* that does not cross page boundraries (see host code).
714	*/
715	if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
716	uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
717	VBOXSFREADEMBEDDEDREQ pReq = (VBOXSFREADEMBEDDEDREQ )VbglR0PhysHeapAlloc(cbReq);
718	if ( pReq
719	&& (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
720	ssize_t cbRet;
721	int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
722	if (RT_SUCCESS(vrc)) {
723	cbRet = pReq->Parms.cb32Read.u.value32;
724	AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
725	if (copy_to_user(buf, pReq->abData, cbRet) == 0)
726	*off += cbRet;
727	else
728	cbRet = -EFAULT;
729	} else
730	cbRet = -EPROTO;
731	VbglR0PhysHeapFree(pReq);
732	return cbRet;
733	}
734	if (pReq)
735	VbglR0PhysHeapFree(pReq);
736	}
737
738	#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
739	/*
740	* For medium sized requests try use a bounce buffer.
741	*/
742	if (size <= _64K /** @todo make this configurable? */) {
743	void *pvBounce = kmalloc(size, GFP_KERNEL);
744	if (pvBounce) {
745	VBOXSFREADPGLSTREQ pReq = (VBOXSFREADPGLSTREQ )VbglR0PhysHeapAlloc(sizeof(*pReq));
746	if (pReq) {
747	ssize_t cbRet;
748	int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
749	(uint32_t)size, pvBounce, virt_to_phys(pvBounce));
750	if (RT_SUCCESS(vrc)) {
751	cbRet = pReq->Parms.cb32Read.u.value32;
752	AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
753	if (copy_to_user(buf, pvBounce, cbRet) == 0)
754	*off += cbRet;
755	else
756	cbRet = -EFAULT;
757	} else
758	cbRet = -EPROTO;
759	VbglR0PhysHeapFree(pReq);
760	kfree(pvBounce);
761	return cbRet;
762	}
763	kfree(pvBounce);
764	}
765	}
766	#endif
767
768	return vbsf_reg_read_locking(file, buf, size, off, sf_g, sf_r);
769	}
770
771
772	/**
773	* Wrapper around invalidate_mapping_pages() for page cache invalidation so that
774	* the changes written via vbsf_reg_write are made visible to mmap users.
775	*/
776	DECLINLINE(void) vbsf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
777	{
778	/*
779	* Only bother with this if the mapping has any pages in it.
780	*
781	* Note! According to the docs, the last parameter, end, is inclusive (we
782	* would have named it 'last' to indicate this).
783	*
784	* Note! The pre-2.6.12 function might not do enough to sure consistency
785	* when any of the pages in the range is already mapped.
786	*/
787	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
788	if (mapping)
789	invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
790	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60)
791	if (mapping && mapping->nrpages > 0)
792	invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
793	# else
794	/** @todo ... */
795	RT_NOREF(mapping, offStart, offEnd);
796	# endif
797	}
798
799
800	/**
801	* Fallback case of vbsf_reg_write() that locks the user buffers and let the host
802	* write directly to them.
803	*/
804	static ssize_t vbsf_reg_write_locking(struct file file, const char /__user/ buf, size_t size, loff_t *off, loff_t offFile,
805	struct inode inode, struct vbsf_inode_info sf_i,
806	struct vbsf_super_info sf_g, struct vbsf_reg_info sf_r)
807	{
808	/*
809	* Lock pages and execute the write, taking care not to pass the host
810	* more than it can handle in one go or more than we care to allocate
811	* page arrays for. The latter limit is set at just short of 32KB due
812	* to how the physical heap works.
813	*/
814	struct page *apPagesStack[16];
815	struct page **papPages = &apPagesStack[0];
816	struct page **papPagesFree = NULL;
817	VBOXSFWRITEPGLSTREQ *pReq;
818	ssize_t cbRet = -ENOMEM;
819	size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
820	size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
821	bool fLockPgHack;
822
823	pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
824	while (!pReq && cMaxPages > 4) {
825	cMaxPages /= 2;
826	pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
827	}
828	if (pReq && cPages > RT_ELEMENTS(apPagesStack))
829	papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
830	if (pReq && papPages) {
831	cbRet = 0;
832	for (;;) {
833	/*
834	* Figure out how much to process now and lock the user pages.
835	*/
836	int rc;
837	size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
838	pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
839	cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
840	if (cPages <= cMaxPages)
841	cbChunk = size;
842	else {
843	cPages = cMaxPages;
844	cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
845	}
846
847	rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /fWrite/, papPages, &fLockPgHack);
848	if (rc == 0) {
849	size_t iPage = cPages;
850	while (iPage-- > 0)
851	pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
852	} else {
853	cbRet = rc;
854	break;
855	}
856
857	/*
858	* Issue the request and unlock the pages.
859	*/
860	rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
861
862	vbsf_unlock_user_pages(papPages, cPages, false /fSetDirty/, fLockPgHack);
863
864	if (RT_SUCCESS(rc)) {
865	/*
866	* Success, advance position and buffer.
867	*/
868	uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
869	AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
870	cbRet += cbActual;
871	offFile += cbActual;
872	buf = (uint8_t *)buf + cbActual;
873	size -= cbActual;
874	if (offFile > i_size_read(inode))
875	i_size_write(inode, offFile);
876	vbsf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
877
878	/*
879	* Are we done already? If so commit the new file offset.
880	*/
881	if (!size \|\| cbActual < cbChunk) {
882	*off = offFile;
883	break;
884	}
885	} else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
886	/*
887	* The host probably doesn't have enough heap to handle the
888	* request, reduce the page count and retry.
889	*/
890	cMaxPages /= 4;
891	Assert(cMaxPages > 0);
892	} else {
893	/*
894	* If we've successfully written stuff, return it rather than
895	* the error. (Not sure if this is such a great idea...)
896	*/
897	if (cbRet > 0)
898	*off = offFile;
899	else
900	cbRet = -EPROTO;
901	break;
902	}
903	sf_i->force_restat = 1; /* mtime (and size) may have changed */
904	}
905	}
906	if (papPagesFree)
907	kfree(papPages);
908	if (pReq)
909	VbglR0PhysHeapFree(pReq);
910	return cbRet;
911	}
912
913
914	/**
915	* Write to a regular file.
916	*
917	* @param file the file
918	* @param buf the buffer
919	* @param size length of the buffer
920	* @param off offset within the file
921	* @returns the number of written bytes on success, Linux error code otherwise
922	*/
923	static ssize_t vbsf_reg_write(struct file file, const char buf, size_t size, loff_t * off)
924	{
925	struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
926	struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
927	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
928	struct vbsf_reg_info *sf_r = file->private_data;
929	struct address_space *mapping = inode->i_mapping;
930	loff_t pos;
931
932	SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
933	BUG_ON(!sf_i);
934	BUG_ON(!sf_g);
935	BUG_ON(!sf_r);
936
937	if (!S_ISREG(inode->i_mode)) {
938	LogFunc(("write to non regular file %d\n", inode->i_mode));
939	return -EINVAL;
940	}
941
942	pos = *off;
943	/** @todo This should be handled by the host, it returning the new file
944	* offset when appending. We may have an outdated i_size value here! */
945	if (file->f_flags & O_APPEND)
946	pos = i_size_read(inode);
947
948	/** @todo XXX Check write permission according to inode->i_mode! */
949
950	if (!size) {
951	if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
952	*off = pos;
953	return 0;
954	}
955
956	/*
957	* If there are active writable mappings, coordinate with any
958	* pending writes via those.
959	*/
960	if ( mapping
961	&& mapping->nrpages > 0
962	&& mapping_writably_mapped(mapping)) {
963	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
964	int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
965	if (err)
966	return err;
967	#else
968	/** @todo ... */
969	#endif
970	}
971
972	/*
973	* For small requests, try use an embedded buffer provided we get a heap block
974	* that does not cross page boundraries (see host code).
975	*/
976	if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
977	uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
978	VBOXSFWRITEEMBEDDEDREQ pReq = (VBOXSFWRITEEMBEDDEDREQ )VbglR0PhysHeapAlloc(cbReq);
979	if ( pReq
980	&& (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
981	ssize_t cbRet;
982	if (copy_from_user(pReq->abData, buf, size) == 0) {
983	int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
984	pos, (uint32_t)size);
985	if (RT_SUCCESS(vrc)) {
986	cbRet = pReq->Parms.cb32Write.u.value32;
987	AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
988	pos += cbRet;
989	*off = pos;
990	if (pos > i_size_read(inode))
991	i_size_write(inode, pos);
992	vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
993	} else
994	cbRet = -EPROTO;
995	sf_i->force_restat = 1; /* mtime (and size) may have changed */
996	} else
997	cbRet = -EFAULT;
998
999	VbglR0PhysHeapFree(pReq);
1000	return cbRet;
1001	}
1002	if (pReq)
1003	VbglR0PhysHeapFree(pReq);
1004	}
1005
1006	#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
1007	/*
1008	* For medium sized requests try use a bounce buffer.
1009	*/
1010	if (size <= _64K /** @todo make this configurable? */) {
1011	void *pvBounce = kmalloc(size, GFP_KERNEL);
1012	if (pvBounce) {
1013	if (copy_from_user(pvBounce, buf, size) == 0) {
1014	VBOXSFWRITEPGLSTREQ pReq = (VBOXSFWRITEPGLSTREQ )VbglR0PhysHeapAlloc(sizeof(*pReq));
1015	if (pReq) {
1016	ssize_t cbRet;
1017	int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
1018	(uint32_t)size, pvBounce, virt_to_phys(pvBounce));
1019	if (RT_SUCCESS(vrc)) {
1020	cbRet = pReq->Parms.cb32Write.u.value32;
1021	AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
1022	pos += cbRet;
1023	*off = pos;
1024	if (pos > i_size_read(inode))
1025	i_size_write(inode, pos);
1026	vbsf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
1027	} else
1028	cbRet = -EPROTO;
1029	sf_i->force_restat = 1; /* mtime (and size) may have changed */
1030	VbglR0PhysHeapFree(pReq);
1031	kfree(pvBounce);
1032	return cbRet;
1033	}
1034	kfree(pvBounce);
1035	} else {
1036	kfree(pvBounce);
1037	return -EFAULT;
1038	}
1039	}
1040	}
1041	#endif
1042
1043	return vbsf_reg_write_locking(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
1044	}
1045
1046
1047	/**
1048	* Open a regular file.
1049	*
1050	* @param inode the inode
1051	* @param file the file
1052	* @returns 0 on success, Linux error code otherwise
1053	*/
1054	static int vbsf_reg_open(struct inode inode, struct file file)
1055	{
1056	int rc, rc_linux = 0;
1057	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1058	struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1059	struct vbsf_reg_info *sf_r;
1060	struct dentry *dentry = VBSF_GET_F_DENTRY(file);
1061	VBOXSFCREATEREQ *pReq;
1062
1063	SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
1064	BUG_ON(!sf_g);
1065	BUG_ON(!sf_i);
1066
1067	sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
1068	if (!sf_r) {
1069	LogRelFunc(("could not allocate reg info\n"));
1070	return -ENOMEM;
1071	}
1072
1073	RTListInit(&sf_r->Handle.Entry);
1074	sf_r->Handle.cRefs = 1;
1075	sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE \| VBSF_HANDLE_F_MAGIC;
1076	sf_r->Handle.hHost = SHFL_HANDLE_NIL;
1077
1078	/* Already open? */
1079	if (sf_i->handle != SHFL_HANDLE_NIL) {
1080	/*
1081	* This inode was created with vbsf_create_worker(). Check the CreateFlags:
1082	* O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
1083	* about the access flags (SHFL_CF_ACCESS_*).
1084	*/
1085	sf_i->force_restat = 1;
1086	sf_r->Handle.hHost = sf_i->handle;
1087	sf_i->handle = SHFL_HANDLE_NIL;
1088	file->private_data = sf_r;
1089
1090	sf_r->Handle.fFlags \|= VBSF_HANDLE_F_READ \| VBSF_HANDLE_F_WRITE; /** @todo fix */
1091	vbsf_handle_append(sf_i, &sf_r->Handle);
1092	SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1093	return 0;
1094	}
1095
1096	pReq = (VBOXSFCREATEREQ )VbglR0PhysHeapAlloc(sizeof(pReq) + sf_i->path->u16Size);
1097	if (!pReq) {
1098	kfree(sf_r);
1099	LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
1100	return -ENOMEM;
1101	}
1102	memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
1103	RT_ZERO(pReq->CreateParms);
1104	pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
1105
1106	/* We check the value of pReq->CreateParms.Handle afterwards to
1107	* find out if the call succeeded or failed, as the API does not seem
1108	* to cleanly distinguish error and informational messages.
1109	*
1110	* Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL
1111	* to make the shared folders host service use our fMode parameter */
1112
1113	if (file->f_flags & O_CREAT) {
1114	LogFunc(("O_CREAT set\n"));
1115	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACT_CREATE_IF_NEW;
1116	/* We ignore O_EXCL, as the Linux kernel seems to call create
1117	beforehand itself, so O_EXCL should always fail. */
1118	if (file->f_flags & O_TRUNC) {
1119	LogFunc(("O_TRUNC set\n"));
1120	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1121	} else
1122	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACT_OPEN_IF_EXISTS;
1123	} else {
1124	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACT_FAIL_IF_NEW;
1125	if (file->f_flags & O_TRUNC) {
1126	LogFunc(("O_TRUNC set\n"));
1127	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1128	}
1129	}
1130
1131	switch (file->f_flags & O_ACCMODE) {
1132	case O_RDONLY:
1133	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACCESS_READ;
1134	sf_r->Handle.fFlags \|= VBSF_HANDLE_F_READ;
1135	break;
1136
1137	case O_WRONLY:
1138	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACCESS_WRITE;
1139	sf_r->Handle.fFlags \|= VBSF_HANDLE_F_WRITE;
1140	break;
1141
1142	case O_RDWR:
1143	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACCESS_READWRITE;
1144	sf_r->Handle.fFlags \|= VBSF_HANDLE_F_READ \| VBSF_HANDLE_F_WRITE;
1145	break;
1146
1147	default:
1148	BUG();
1149	}
1150
1151	if (file->f_flags & O_APPEND) {
1152	LogFunc(("O_APPEND set\n"));
1153	pReq->CreateParms.CreateFlags \|= SHFL_CF_ACCESS_APPEND;
1154	sf_r->Handle.fFlags \|= VBSF_HANDLE_F_APPEND;
1155	}
1156
1157	pReq->CreateParms.Info.Attr.fMode = inode->i_mode;
1158	LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n",
1159	sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags));
1160	rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
1161	if (RT_FAILURE(rc)) {
1162	LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc));
1163	kfree(sf_r);
1164	VbglR0PhysHeapFree(pReq);
1165	return -RTErrConvertToErrno(rc);
1166	}
1167
1168	if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) {
1169	vbsf_dentry_chain_increase_ttl(dentry);
1170	rc_linux = 0;
1171	} else {
1172	switch (pReq->CreateParms.Result) {
1173	case SHFL_PATH_NOT_FOUND:
1174	rc_linux = -ENOENT;
1175	break;
1176	case SHFL_FILE_NOT_FOUND:
1177	/** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
1178	rc_linux = -ENOENT;
1179	break;
1180	case SHFL_FILE_EXISTS:
1181	vbsf_dentry_chain_increase_ttl(dentry);
1182	rc_linux = -EEXIST;
1183	break;
1184	default:
1185	vbsf_dentry_chain_increase_parent_ttl(dentry);
1186	rc_linux = 0;
1187	break;
1188	}
1189	}
1190
1191	sf_i->force_restat = 1; /** @todo Why?!? */
1192	sf_r->Handle.hHost = pReq->CreateParms.Handle;
1193	file->private_data = sf_r;
1194	vbsf_handle_append(sf_i, &sf_r->Handle);
1195	VbglR0PhysHeapFree(pReq);
1196	SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1197	return rc_linux;
1198	}
1199
1200
1201	/**
1202	* Close a regular file.
1203	*
1204	* @param inode the inode
1205	* @param file the file
1206	* @returns 0 on success, Linux error code otherwise
1207	*/
1208	static int vbsf_reg_release(struct inode inode, struct file file)
1209	{
1210	struct vbsf_reg_info *sf_r;
1211	struct vbsf_super_info *sf_g;
1212	struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1213
1214	SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
1215	sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1216	sf_r = file->private_data;
1217
1218	BUG_ON(!sf_g);
1219	BUG_ON(!sf_r);
1220
1221	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
1222	/* See the smbfs source (file.c). mmap in particular can cause data to be
1223	* written to the file after it is closed, which we can't cope with. We
1224	* copy and paste the body of filemap_write_and_wait() here as it was not
1225	* defined before 2.6.6 and not exported until quite a bit later. */
1226	/* filemap_write_and_wait(inode->i_mapping); */
1227	if (inode->i_mapping->nrpages
1228	&& filemap_fdatawrite(inode->i_mapping) != -EIO)
1229	filemap_fdatawait(inode->i_mapping);
1230	#endif
1231
1232	/* Release sf_r, closing the handle if we're the last user. */
1233	file->private_data = NULL;
1234	vbsf_handle_release(&sf_r->Handle, sf_g, "vbsf_reg_release");
1235
1236	sf_i->handle = SHFL_HANDLE_NIL;
1237	return 0;
1238	}
1239
1240	/**
1241	* Wrapper around generic/default seek function that ensures that we've got
1242	* the up-to-date file size when doing anything relative to EOF.
1243	*
1244	* The issue is that the host may extend the file while we weren't looking and
1245	* if the caller wishes to append data, it may end up overwriting existing data
1246	* if we operate with a stale size. So, we always retrieve the file size on EOF
1247	* relative seeks.
1248	*/
1249	static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
1250	{
1251	SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
1252
1253	switch (whence) {
1254	#ifdef SEEK_HOLE
1255	case SEEK_HOLE:
1256	case SEEK_DATA:
1257	#endif
1258	case SEEK_END: {
1259	struct vbsf_reg_info *sf_r = file->private_data;
1260	int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost, true /fForce/,
1261	false /fInodeLocked/);
1262	if (rc == 0)
1263	break;
1264	return rc;
1265	}
1266	}
1267
1268	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
1269	return generic_file_llseek(file, off, whence);
1270	#else
1271	return default_llseek(file, off, whence);
1272	#endif
1273	}
1274
1275	/**
1276	* Flush region of file - chiefly mmap/msync.
1277	*
1278	* We cannot use the noop_fsync / simple_sync_file here as that means
1279	* msync(,,MS_SYNC) will return before the data hits the host, thereby
1280	* causing coherency issues with O_DIRECT access to the same file as
1281	* well as any host interaction with the file.
1282	*/
1283	#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
1284	static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1285	{
1286	# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1287	return __generic_file_fsync(file, start, end, datasync);
1288	# else
1289	return generic_file_fsync(file, start, end, datasync);
1290	# endif
1291	}
1292	#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
1293	static int vbsf_reg_fsync(struct file *file, int datasync)
1294	{
1295	return generic_file_fsync(file, datasync);
1296	}
1297	#else /* < 2.6.35 */
1298	static int vbsf_reg_fsync(struct file file, struct dentry dentry, int datasync)
1299	{
1300	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
1301	return simple_fsync(file, dentry, datasync);
1302	# else
1303	int rc;
1304	struct inode *inode = dentry->d_inode;
1305	AssertReturn(inode, -EINVAL);
1306
1307	/** @todo What about file_fsync()? (<= 2.5.11) */
1308
1309	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1310	rc = sync_mapping_buffers(inode->i_mapping);
1311	if ( rc == 0
1312	&& (inode->i_state & I_DIRTY)
1313	&& ((inode->i_state & I_DIRTY_DATASYNC) \|\| !datasync)
1314	) {
1315	struct writeback_control wbc = {
1316	.sync_mode = WB_SYNC_ALL,
1317	.nr_to_write = 0
1318	};
1319	rc = sync_inode(inode, &wbc);
1320	}
1321	# else /* < 2.5.12 */
1322	rc = fsync_inode_buffers(inode);
1323	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1324	rc \|= fsync_inode_data_buffers(inode);
1325	# endif
1326	/** @todo probably need to do more here... */
1327	# endif /* < 2.5.12 */
1328	return rc;
1329	# endif
1330	}
1331	#endif /* < 2.6.35 */
1332
1333
1334	/**
1335	* File operations for regular files.
1336	*/
1337	struct file_operations vbsf_reg_fops = {
1338	.read = vbsf_reg_read,
1339	.open = vbsf_reg_open,
1340	.write = vbsf_reg_write,
1341	.release = vbsf_reg_release,
1342	.mmap = generic_file_mmap,
1343	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1344	# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
1345	/** @todo This code is known to cause caching of data which should not be
1346	* cached. Investigate. */
1347	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
1348	.splice_read = vbsf_splice_read,
1349	# else
1350	.sendfile = generic_file_sendfile,
1351	# endif
1352	.aio_read = generic_file_aio_read,
1353	.aio_write = generic_file_aio_write,
1354	# endif
1355	#endif
1356	.llseek = vbsf_reg_llseek,
1357	.fsync = vbsf_reg_fsync,
1358	};
1359
1360	struct inode_operations vbsf_reg_iops = {
1361	#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
1362	.revalidate = vbsf_inode_revalidate
1363	#else
1364	.getattr = vbsf_inode_getattr,
1365	.setattr = vbsf_inode_setattr
1366	#endif
1367	};
1368
1369
1370	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1371
1372	/**
1373	* Used to read the content of a page into the page cache.
1374	*
1375	* Needed for mmap and reads+writes when the file is mmapped in a
1376	* shared+writeable fashion.
1377	*/
1378	static int vbsf_readpage(struct file file, struct page page)
1379	{
1380	struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
1381	int err;
1382
1383	SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
1384	Assert(PageLocked(page));
1385
1386	if (PageUptodate(page)) {
1387	unlock_page(page);
1388	return 0;
1389	}
1390
1391	if (!is_bad_inode(inode)) {
1392	VBOXSFREADPGLSTREQ pReq = (VBOXSFREADPGLSTREQ )VbglR0PhysHeapAlloc(sizeof(*pReq));
1393	if (pReq) {
1394	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1395	struct vbsf_reg_info *sf_r = file->private_data;
1396	uint32_t cbRead;
1397	int vrc;
1398
1399	pReq->PgLst.offFirstPage = 0;
1400	pReq->PgLst.aPages[0] = page_to_phys(page);
1401	vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
1402	pReq,
1403	sf_r->Handle.hHost,
1404	(uint64_t)page->index << PAGE_SHIFT,
1405	PAGE_SIZE,
1406	1 /cPages/);
1407
1408	cbRead = pReq->Parms.cb32Read.u.value32;
1409	AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
1410	VbglR0PhysHeapFree(pReq);
1411
1412	if (RT_SUCCESS(vrc)) {
1413	if (cbRead == PAGE_SIZE) {
1414	/* likely */
1415	} else {
1416	uint8_t pbMapped = (uint8_t )kmap(page);
1417	RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
1418	kunmap(page);
1419	/** @todo truncate the inode file size? */
1420	}
1421
1422	flush_dcache_page(page);
1423	SetPageUptodate(page);
1424	unlock_page(page);
1425	return 0;
1426	}
1427	err = -RTErrConvertToErrno(vrc);
1428	} else
1429	err = -ENOMEM;
1430	} else
1431	err = -EIO;
1432	SetPageError(page);
1433	unlock_page(page);
1434	return err;
1435	}
1436
1437
1438	/**
1439	* Used to write out the content of a dirty page cache page to the host file.
1440	*
1441	* Needed for mmap and writes when the file is mmapped in a shared+writeable
1442	* fashion.
1443	*/
1444	static int vbsf_writepage(struct page page, struct writeback_control wbc)
1445	{
1446	struct address_space *mapping = page->mapping;
1447	struct inode *inode = mapping->host;
1448	struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
1449	struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND);
1450	int err;
1451
1452	SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
1453	inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
1454
1455	if (pHandle) {
1456	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1457	VBOXSFWRITEPGLSTREQ pReq = (VBOXSFWRITEPGLSTREQ )VbglR0PhysHeapAlloc(sizeof(*pReq));
1458	if (pReq) {
1459	uint64_t const cbFile = i_size_read(inode);
1460	uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
1461	uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
1462	: (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
1463	int vrc;
1464
1465	pReq->PgLst.offFirstPage = 0;
1466	pReq->PgLst.aPages[0] = page_to_phys(page);
1467	vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
1468	pReq,
1469	pHandle->hHost,
1470	offInFile,
1471	cbToWrite,
1472	1 /cPages/);
1473	AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite \|\| RT_FAILURE(vrc), /* lazy bird */
1474	("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
1475	vrc = VERR_WRITE_ERROR);
1476	VbglR0PhysHeapFree(pReq);
1477
1478	if (RT_SUCCESS(vrc)) {
1479	/* Update the inode if we've extended the file. */
1480	/** @todo is this necessary given the cbToWrite calc above? */
1481	uint64_t const offEndOfWrite = offInFile + cbToWrite;
1482	if ( offEndOfWrite > cbFile
1483	&& offEndOfWrite > i_size_read(inode))
1484	i_size_write(inode, offEndOfWrite);
1485
1486	if (PageError(page))
1487	ClearPageError(page);
1488
1489	err = 0;
1490	} else {
1491	ClearPageUptodate(page);
1492	err = -EPROTO;
1493	}
1494	} else
1495	err = -ENOMEM;
1496	vbsf_handle_release(pHandle, sf_g, "vbsf_writepage");
1497	} else {
1498	static uint64_t volatile s_cCalls = 0;
1499	if (s_cCalls++ < 16)
1500	printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
1501	err = -EPROTO;
1502	}
1503	unlock_page(page);
1504	return err;
1505	}
1506
1507	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1508	/**
1509	* Called when writing thru the page cache (which we shouldn't be doing).
1510	*/
1511	int vbsf_write_begin(struct file file, struct address_space mapping, loff_t pos,
1512	unsigned len, unsigned flags, struct page pagep, void fsdata)
1513	{
1514	/** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
1515	* the page cache for any writes AFAIK. We could just as well use
1516	* simple_write_begin & simple_write_end here if we think we really
1517	* need to have non-NULL function pointers in the table... */
1518	static uint64_t volatile s_cCalls = 0;
1519	if (s_cCalls++ < 16) {
1520	printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1521	(unsigned long long)pos, len, flags);
1522	RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1523	(unsigned long long)pos, len, flags);
1524	# ifdef WARN_ON
1525	WARN_ON(1);
1526	# endif
1527	}
1528	return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
1529	}
1530	# endif /* KERNEL_VERSION >= 2.6.24 */
1531
1532	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1533	/**
1534	* This is needed to make open accept O_DIRECT as well as dealing with direct
1535	* I/O requests if we don't intercept them earlier.
1536	*/
1537	# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
1538	static ssize_t vbsf_direct_IO(struct kiocb iocb, struct iov_iter iter)
1539	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1540	static ssize_t vbsf_direct_IO(struct kiocb iocb, struct iov_iter iter, loff_t offset)
1541	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1542	static ssize_t vbsf_direct_IO(int rw, struct kiocb iocb, struct iov_iter iter, loff_t offset)
1543	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
1544	static ssize_t vbsf_direct_IO(int rw, struct kiocb iocb, const struct iovec iov, loff_t offset, unsigned long nr_segs)
1545	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
1546	static int vbsf_direct_IO(int rw, struct kiocb iocb, const struct iovec iov, loff_t offset, unsigned long nr_segs)
1547	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
1548	static int vbsf_direct_IO(int rw, struct file file, const struct iovec iov, loff_t offset, unsigned long nr_segs)
1549	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
1550	static int vbsf_direct_IO(int rw, struct inode inode, const struct iovec iov, loff_t offset, unsigned long nr_segs)
1551	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
1552	static int vbsf_direct_IO(int rw, struct inode inode, char buf, loff_t offset, size_t count)
1553	# else
1554	static int vbsf_direct_IO(int rw, struct inode inode, struct kiobuf , unsigned long, int)
1555	# endif
1556	{
1557	TRACE();
1558	return -EINVAL;
1559	}
1560	# endif
1561
1562	/**
1563	* Address space (for the page cache) operations for regular files.
1564	*/
1565	struct address_space_operations vbsf_reg_aops = {
1566	.readpage = vbsf_readpage,
1567	.writepage = vbsf_writepage,
1568	/** @todo Need .writepages if we want msync performance... */
1569	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1570	.set_page_dirty = __set_page_dirty_buffers,
1571	# endif
1572	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1573	.write_begin = vbsf_write_begin,
1574	.write_end = simple_write_end,
1575	# else
1576	.prepare_write = simple_prepare_write,
1577	.commit_write = simple_commit_write,
1578	# endif
1579	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1580	.direct_IO = vbsf_direct_IO,
1581	# endif
1582	};
1583
1584	#endif /* LINUX_VERSION_CODE >= 2.6.0 */
1585

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77549

Download in other formats: