regops.c@ 77526

Last change on this file since 77526 was 77526, checked in by vboxsync, 6 years ago
linux/vboxsf: We don't use tabs and our indent size is 4 not 8. Makes this a heck lot easier to maintain. bugref:9172
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 51.7 KB

Line
1	/* $Id: regops.c 77526 2019-03-01 12:15:29Z vboxsync $ */
2	/** @file
3	* vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
4	*/
5
6	/*
7	* Copyright (C) 2006-2019 Oracle Corporation
8	*
9	* Permission is hereby granted, free of charge, to any person
10	* obtaining a copy of this software and associated documentation
11	* files (the "Software"), to deal in the Software without
12	* restriction, including without limitation the rights to use,
13	* copy, modify, merge, publish, distribute, sublicense, and/or sell
14	* copies of the Software, and to permit persons to whom the
15	* Software is furnished to do so, subject to the following
16	* conditions:
17	*
18	* The above copyright notice and this permission notice shall be
19	* included in all copies or substantial portions of the Software.
20	*
21	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23	* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25	* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26	* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28	* OTHER DEALINGS IN THE SOFTWARE.
29	*/
30
31	#include "vfsmod.h"
32	#include <linux/uio.h>
33	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 32)
34	# include <linux/aio.h> /* struct kiocb before 4.1 */
35	#endif
36	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
37	# include <linux/buffer_head.h>
38	#endif
39	#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) \
40	&& LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
41	# include <linux/writeback.h>
42	#endif
43	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
44	&& LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
45	# include <linux/splice.h>
46	#endif
47	#include <iprt/err.h>
48
49	#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18)
50	# define SEEK_END 2
51	#endif
52
53
54	/**
55	* Called when an inode is released to unlink all handles that might impossibly
56	* still be associated with it.
57	*
58	* @param pInodeInfo The inode which handles to drop.
59	*/
60	void sf_handle_drop_chain(struct sf_inode_info *pInodeInfo)
61	{
62	struct sf_handle pCur, pNext;
63	unsigned long fSavedFlags;
64	SFLOGFLOW(("sf_handle_drop_chain: %p\n", pInodeInfo));
65	spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
66
67	RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct sf_handle, Entry) {
68	AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK \| SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC \| SF_HANDLE_F_ON_LIST),
69	("%p %#x\n", pCur, pCur->fFlags));
70	pCur->fFlags \|= SF_HANDLE_F_ON_LIST;
71	RTListNodeRemove(&pCur->Entry);
72	}
73
74	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
75	}
76
77
78	/**
79	* Locates a handle that matches all the flags in @a fFlags.
80	*
81	* @returns Pointer to handle on success (retained), use sf_handle_release() to
82	* release it. NULL if no suitable handle was found.
83	* @param pInodeInfo The inode info to search.
84	* @param fFlagsSet The flags that must be set.
85	* @param fFlagsClear The flags that must be clear.
86	*/
87	struct sf_handle sf_handle_find(struct sf_inode_info pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
88	{
89	struct sf_handle *pCur;
90	unsigned long fSavedFlags;
91	spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
92
93	RTListForEach(&pInodeInfo->HandleList, pCur, struct sf_handle, Entry) {
94	AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK \| SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC \| SF_HANDLE_F_ON_LIST),
95	("%p %#x\n", pCur, pCur->fFlags));
96	if ((pCur->fFlags & (fFlagsSet \| fFlagsClear)) == fFlagsSet) {
97	uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
98	if (cRefs > 1) {
99	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
100	SFLOGFLOW(("sf_handle_find: returns %p\n", pCur));
101	return pCur;
102	}
103	/* Oops, already being closed (safe as it's only ever increased here). */
104	ASMAtomicDecU32(&pCur->cRefs);
105	}
106	}
107
108	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
109	SFLOGFLOW(("sf_handle_find: returns NULL!\n"));
110	return NULL;
111	}
112
113
114	/**
115	* Slow worker for sf_handle_release() that does the freeing.
116	*
117	* @returns 0 (ref count).
118	* @param pHandle The handle to release.
119	* @param sf_g The info structure for the shared folder associated
120	* with the handle.
121	* @param pszCaller The caller name (for logging failures).
122	*/
123	uint32_t sf_handle_release_slow(struct sf_handle pHandle, struct vbsf_super_info sf_g, const char *pszCaller)
124	{
125	int rc;
126	unsigned long fSavedFlags;
127
128	SFLOGFLOW(("sf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
129
130	/*
131	* Remove from the list.
132	*/
133	spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
134
135	AssertMsg((pHandle->fFlags & SF_HANDLE_F_MAGIC_MASK) == SF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
136	Assert(pHandle->pInodeInfo);
137	Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
138
139	if (pHandle->fFlags & SF_HANDLE_F_ON_LIST) {
140	pHandle->fFlags &= ~SF_HANDLE_F_ON_LIST;
141	RTListNodeRemove(&pHandle->Entry);
142	}
143
144	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
145
146	/*
147	* Actually destroy it.
148	*/
149	rc = VbglR0SfHostReqCloseSimple(sf_g->map.root, pHandle->hHost);
150	if (RT_FAILURE(rc))
151	LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
152	pHandle->hHost = SHFL_HANDLE_NIL;
153	pHandle->fFlags = SF_HANDLE_F_MAGIC_DEAD;
154	kfree(pHandle);
155	return 0;
156	}
157
158
159	/**
160	* Appends a handle to a handle list.
161	*
162	* @param pInodeInfo The inode to add it to.
163	* @param pHandle The handle to add.
164	*/
165	void sf_handle_append(struct sf_inode_info pInodeInfo, struct sf_handle pHandle)
166	{
167	#ifdef VBOX_STRICT
168	struct sf_handle *pCur;
169	#endif
170	unsigned long fSavedFlags;
171
172	SFLOGFLOW(("sf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
173	AssertMsg((pHandle->fFlags & (SF_HANDLE_F_MAGIC_MASK \| SF_HANDLE_F_ON_LIST)) == SF_HANDLE_F_MAGIC,
174	("%p %#x\n", pHandle, pHandle->fFlags));
175	Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
176
177	spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
178
179	AssertMsg((pHandle->fFlags & (SF_HANDLE_F_MAGIC_MASK \| SF_HANDLE_F_ON_LIST)) == SF_HANDLE_F_MAGIC,
180	("%p %#x\n", pHandle, pHandle->fFlags));
181	#ifdef VBOX_STRICT
182	RTListForEach(&pInodeInfo->HandleList, pCur, struct sf_handle, Entry) {
183	Assert(pCur != pHandle);
184	AssertMsg((pCur->fFlags & (SF_HANDLE_F_MAGIC_MASK \| SF_HANDLE_F_ON_LIST)) == (SF_HANDLE_F_MAGIC \| SF_HANDLE_F_ON_LIST),
185	("%p %#x\n", pCur, pCur->fFlags));
186	}
187	pHandle->pInodeInfo = pInodeInfo;
188	#endif
189
190	pHandle->fFlags \|= SF_HANDLE_F_ON_LIST;
191	RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
192
193	spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
194	}
195
196
197	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
198	&& LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
199
200	void free_pipebuf(struct page *kpage)
201	{
202	kunmap(kpage);
203	__free_pages(kpage, 0);
204	}
205
206	void sf_pipe_buf_map(struct pipe_inode_info pipe,
207	struct pipe_buffer *pipe_buf, int atomic)
208	{
209	return 0;
210	}
211
212	void sf_pipe_buf_get(struct pipe_inode_info pipe, struct pipe_buffer pipe_buf)
213	{
214	}
215
216	void sf_pipe_buf_unmap(struct pipe_inode_info *pipe,
217	struct pipe_buffer pipe_buf, void map_data)
218	{
219	}
220
221	int sf_pipe_buf_steal(struct pipe_inode_info *pipe,
222	struct pipe_buffer *pipe_buf)
223	{
224	return 0;
225	}
226
227	static void sf_pipe_buf_release(struct pipe_inode_info *pipe,
228	struct pipe_buffer *pipe_buf)
229	{
230	free_pipebuf(pipe_buf->page);
231	}
232
233	int sf_pipe_buf_confirm(struct pipe_inode_info *info,
234	struct pipe_buffer *pipe_buf)
235	{
236	return 0;
237	}
238
239	static struct pipe_buf_operations sf_pipe_buf_ops = {
240	.can_merge = 0,
241	.map = sf_pipe_buf_map,
242	.unmap = sf_pipe_buf_unmap,
243	.confirm = sf_pipe_buf_confirm,
244	.release = sf_pipe_buf_release,
245	.steal = sf_pipe_buf_steal,
246	.get = sf_pipe_buf_get,
247	};
248
249	static int sf_reg_read_aux(const char caller, struct vbsf_super_info sf_g,
250	struct sf_reg_info sf_r, void buf,
251	uint32_t * nread, uint64_t pos)
252	{
253	int rc = VbglR0SfRead(&client_handle, &sf_g->map, sf_r->Handle.hHost,
254	pos, nread, buf, false /* already locked? */ );
255	if (RT_FAILURE(rc)) {
256	LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
257	rc));
258	return -EPROTO;
259	}
260	return 0;
261	}
262
263	# define LOCK_PIPE(pipe) do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
264	# define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
265
266	ssize_t
267	sf_splice_read(struct file in, loff_t poffset,
268	struct pipe_inode_info *pipe, size_t len, unsigned int flags)
269	{
270	size_t bytes_remaining = len;
271	loff_t orig_offset = *poffset;
272	loff_t offset = orig_offset;
273	struct inode *inode = GET_F_DENTRY(in)->d_inode;
274	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
275	struct sf_reg_info *sf_r = in->private_data;
276	ssize_t retval;
277	struct page *kpage = 0;
278	size_t nsent = 0;
279
280	/** @todo rig up a FsPerf test for this code */
281	TRACE();
282	if (!S_ISREG(inode->i_mode)) {
283	LogFunc(("read from non regular file %d\n", inode->i_mode));
284	return -EINVAL;
285	}
286	if (!len) {
287	return 0;
288	}
289
290	LOCK_PIPE(pipe);
291
292	uint32_t req_size = 0;
293	while (bytes_remaining > 0) {
294	kpage = alloc_page(GFP_KERNEL);
295	if (unlikely(kpage == NULL)) {
296	UNLOCK_PIPE(pipe);
297	return -ENOMEM;
298	}
299	req_size = 0;
300	uint32_t nread = req_size =
301	(uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
302	uint32_t chunk = 0;
303	void *kbuf = kmap(kpage);
304	while (chunk < req_size) {
305	retval =
306	sf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk,
307	&nread, offset);
308	if (retval < 0)
309	goto err;
310	if (nread == 0)
311	break;
312	chunk += nread;
313	offset += nread;
314	nread = req_size - chunk;
315	}
316	if (!pipe->readers) {
317	send_sig(SIGPIPE, current, 0);
318	retval = -EPIPE;
319	goto err;
320	}
321	if (pipe->nrbufs < PIPE_BUFFERS) {
322	struct pipe_buffer *pipebuf =
323	pipe->bufs +
324	((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS -
325	1));
326	pipebuf->page = kpage;
327	pipebuf->ops = &sf_pipe_buf_ops;
328	pipebuf->len = req_size;
329	pipebuf->offset = 0;
330	pipebuf->private = 0;
331	pipebuf->flags = 0;
332	pipe->nrbufs++;
333	nsent += req_size;
334	bytes_remaining -= req_size;
335	if (signal_pending(current))
336	break;
337	} else { /* pipe full */
338
339	if (flags & SPLICE_F_NONBLOCK) {
340	retval = -EAGAIN;
341	goto err;
342	}
343	free_pipebuf(kpage);
344	break;
345	}
346	}
347	UNLOCK_PIPE(pipe);
348	if (!nsent && signal_pending(current))
349	return -ERESTARTSYS;
350	*poffset += nsent;
351	return offset - orig_offset;
352
353	err:
354	UNLOCK_PIPE(pipe);
355	free_pipebuf(kpage);
356	return retval;
357	}
358
359	#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
360
361
362	/** Companion to sf_lock_user_pages(). */
363	DECLINLINE(void) sf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty)
364	{
365	while (cPages-- > 0)
366	{
367	struct page *pPage = papPages[cPages];
368	if (fSetDirty && !PageReserved(pPage))
369	SetPageDirty(pPage);
370	#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
371	put_page(pPage);
372	#else
373	page_cache_release(pPage);
374	#endif
375	}
376	}
377
378
379	/** Wrapper around get_user_pages. */
380	DECLINLINE(int) sf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages)
381	{
382	# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
383	ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
384	fWrite ? FOLL_WRITE \| FOLL_FORCE : FOLL_FORCE);
385	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
386	ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /force/, papPages);
387	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
388	ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages,
389	fWrite, 1 /force/, papPages);
390	# else
391	struct task_struct *pTask = current;
392	size_t cPagesLocked;
393	down_read(&pTask->mm->mmap_sem);
394	cPagesLocked = get_user_pages(current, current->mm, uPtrFrom, cPages, fWrite, 1 /force/, papPages, NULL);
395	up_read(&pTask->mm->mmap_sem);
396	# endif
397	if (cPagesLocked == cPages)
398	return 0;
399	if (cPagesLocked < 0)
400	return cPagesLocked;
401
402	sf_unlock_user_pages(papPages, cPagesLocked, false /fSetDirty/);
403
404	/* We could use uPtrFrom + cPagesLocked to get the correct status here... */
405	return -EFAULT;
406	}
407
408
409	/**
410	* Read function used when accessing files that are memory mapped.
411	*
412	* We read from the page cache here to present the a cohertent picture of the
413	* the file content.
414	*/
415	static ssize_t sf_reg_read_mapped(struct file file, char /__user/ buf, size_t size, loff_t *off)
416	{
417	#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
418	struct iovec iov = { .iov_base = buf, .iov_len = size };
419	struct iov_iter iter;
420	struct kiocb kiocb;
421	ssize_t cbRet;
422
423	init_sync_kiocb(&kiocb, file);
424	kiocb.ki_pos = *off;
425	iov_iter_init(&iter, READ, &iov, 1, size);
426
427	cbRet = generic_file_read_iter(&kiocb, &iter);
428
429	*off = kiocb.ki_pos;
430	return cbRet;
431
432	#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
433	struct iovec iov = { .iov_base = buf, .iov_len = size };
434	struct kiocb kiocb;
435	ssize_t cbRet;
436
437	init_sync_kiocb(&kiocb, file);
438	kiocb.ki_pos = *off;
439
440	cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
441	if (cbRet == -EIOCBQUEUED)
442	cbRet = wait_on_sync_kiocb(&kiocb);
443
444	*off = kiocb.ki_pos;
445	return cbRet;
446
447	#else /* 2.6.18 or earlier: */
448	return generic_file_read(file, buf, size, off);
449	#endif
450	}
451
452
453	/**
454	* Fallback case of sf_reg_read() that locks the user buffers and let the host
455	* write directly to them.
456	*/
457	static ssize_t sf_reg_read_fallback(struct file file, char /__user/ buf, size_t size, loff_t *off,
458	struct vbsf_super_info sf_g, struct sf_reg_info sf_r)
459	{
460	/*
461	* Lock pages and execute the read, taking care not to pass the host
462	* more than it can handle in one go or more than we care to allocate
463	* page arrays for. The latter limit is set at just short of 32KB due
464	* to how the physical heap works.
465	*/
466	struct page *apPagesStack[16];
467	struct page **papPages = &apPagesStack[0];
468	struct page **papPagesFree = NULL;
469	VBOXSFREADPGLSTREQ *pReq;
470	loff_t offFile = *off;
471	ssize_t cbRet = -ENOMEM;
472	size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
473	size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
474
475	pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
476	while (!pReq && cMaxPages > 4) {
477	cMaxPages /= 2;
478	pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
479	}
480	if (pReq && cPages > RT_ELEMENTS(apPagesStack))
481	papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
482	if (pReq && papPages) {
483	cbRet = 0;
484	for (;;) {
485	/*
486	* Figure out how much to process now and lock the user pages.
487	*/
488	int rc;
489	size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
490	pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
491	cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
492	if (cPages <= cMaxPages)
493	cbChunk = size;
494	else {
495	cPages = cMaxPages;
496	cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
497	}
498
499	rc = sf_lock_user_pages((uintptr_t)buf, cPages, true /fWrite/, papPages);
500	if (rc == 0) {
501	size_t iPage = cPages;
502	while (iPage-- > 0)
503	pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
504	} else {
505	cbRet = rc;
506	break;
507	}
508
509	/*
510	* Issue the request and unlock the pages.
511	*/
512	rc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
513
514	sf_unlock_user_pages(papPages, cPages, true /fSetDirty/);
515
516	if (RT_SUCCESS(rc)) {
517	/*
518	* Success, advance position and buffer.
519	*/
520	uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
521	AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
522	cbRet += cbActual;
523	offFile += cbActual;
524	buf = (uint8_t *)buf + cbActual;
525	size -= cbActual;
526
527	/*
528	* Are we done already? If so commit the new file offset.
529	*/
530	if (!size \|\| cbActual < cbChunk) {
531	*off = offFile;
532	break;
533	}
534	} else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
535	/*
536	* The host probably doesn't have enough heap to handle the
537	* request, reduce the page count and retry.
538	*/
539	cMaxPages /= 4;
540	Assert(cMaxPages > 0);
541	} else {
542	/*
543	* If we've successfully read stuff, return it rather than
544	* the error. (Not sure if this is such a great idea...)
545	*/
546	if (cbRet > 0)
547	*off = offFile;
548	else
549	cbRet = -EPROTO;
550	break;
551	}
552	}
553	}
554	if (papPagesFree)
555	kfree(papPages);
556	if (pReq)
557	VbglR0PhysHeapFree(pReq);
558	return cbRet;
559	}
560
561
562	/**
563	* Read from a regular file.
564	*
565	* @param file the file
566	* @param buf the buffer
567	* @param size length of the buffer
568	* @param off offset within the file (in/out).
569	* @returns the number of read bytes on success, Linux error code otherwise
570	*/
571	static ssize_t sf_reg_read(struct file file, char /__user/ buf, size_t size, loff_t *off)
572	{
573	struct inode *inode = GET_F_DENTRY(file)->d_inode;
574	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
575	struct sf_reg_info *sf_r = file->private_data;
576	struct address_space *mapping = inode->i_mapping;
577
578	SFLOGFLOW(("sf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
579
580	if (!S_ISREG(inode->i_mode)) {
581	LogFunc(("read from non regular file %d\n", inode->i_mode));
582	return -EINVAL;
583	}
584
585	/** @todo XXX Check read permission according to inode->i_mode! */
586
587	if (!size)
588	return 0;
589
590	/*
591	* If there is a mapping and O_DIRECT isn't in effect, we must at a
592	* heed dirty pages in the mapping and read from them. For simplicity
593	* though, we just do page cache reading when there are writable
594	* mappings around with any kind of pages loaded.
595	*/
596	if ( mapping
597	&& mapping->nrpages > 0
598	&& mapping_writably_mapped(mapping)
599	&& !(file->f_flags & O_DIRECT)
600	&& 1 /** @todo make this behaviour configurable */ )
601	return sf_reg_read_mapped(file, buf, size, off);
602
603	/*
604	* For small requests, try use an embedded buffer provided we get a heap block
605	* that does not cross page boundraries (see host code).
606	*/
607	if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
608	uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
609	VBOXSFREADEMBEDDEDREQ pReq = (VBOXSFREADEMBEDDEDREQ )VbglR0PhysHeapAlloc(cbReq);
610	if ( pReq
611	&& (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
612	ssize_t cbRet;
613	int vrc = VbglR0SfHostReqReadEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
614	if (RT_SUCCESS(vrc)) {
615	cbRet = pReq->Parms.cb32Read.u.value32;
616	AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
617	if (copy_to_user(buf, pReq->abData, cbRet) == 0)
618	*off += cbRet;
619	else
620	cbRet = -EFAULT;
621	} else
622	cbRet = -EPROTO;
623	VbglR0PhysHeapFree(pReq);
624	return cbRet;
625	}
626	if (pReq)
627	VbglR0PhysHeapFree(pReq);
628	}
629
630	#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
631	/*
632	* For medium sized requests try use a bounce buffer.
633	*/
634	if (size <= _64K /** @todo make this configurable? */) {
635	void *pvBounce = kmalloc(size, GFP_KERNEL);
636	if (pvBounce) {
637	VBOXSFREADPGLSTREQ pReq = (VBOXSFREADPGLSTREQ )VbglR0PhysHeapAlloc(sizeof(*pReq));
638	if (pReq) {
639	ssize_t cbRet;
640	int vrc = VbglR0SfHostReqReadContig(sf_g->map.root, pReq, sf_r->Handle.hHost, *off,
641	(uint32_t)size, pvBounce, virt_to_phys(pvBounce));
642	if (RT_SUCCESS(vrc)) {
643	cbRet = pReq->Parms.cb32Read.u.value32;
644	AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
645	if (copy_to_user(buf, pvBounce, cbRet) == 0)
646	*off += cbRet;
647	else
648	cbRet = -EFAULT;
649	} else
650	cbRet = -EPROTO;
651	VbglR0PhysHeapFree(pReq);
652	kfree(pvBounce);
653	return cbRet;
654	}
655	kfree(pvBounce);
656	}
657	}
658	#endif
659
660	return sf_reg_read_fallback(file, buf, size, off, sf_g, sf_r);
661	}
662
663
664	/**
665	* Wrapper around invalidate_mapping_pages() for page cache invalidation so that
666	* the changes written via sf_reg_write are made visible to mmap users.
667	*/
668	DECLINLINE(void) sf_reg_write_invalidate_mapping_range(struct address_space *mapping, loff_t offStart, loff_t offEnd)
669	{
670	/*
671	* Only bother with this if the mapping has any pages in it.
672	*
673	* Note! According to the docs, the last parameter, end, is inclusive (we
674	* would have named it 'last' to indicate this).
675	*
676	* Note! The pre-2.6.12 function might not do enough to sure consistency
677	* when any of the pages in the range is already mapped.
678	*/
679	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12)
680	if (mapping)
681	invalidate_inode_pages2_range(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
682	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 60)
683	if (mapping && mapping->nrpages > 0)
684	invalidate_mapping_pages(mapping, offStart >> PAGE_SHIFT, (offEnd - 1) >> PAGE_SHIFT);
685	# else
686	/** @todo ... */
687	RT_NOREF(mapping, offStart, offEnd);
688	# endif
689	}
690
691
692	/**
693	* Fallback case of sf_reg_write() that locks the user buffers and let the host
694	* write directly to them.
695	*/
696	static ssize_t sf_reg_write_fallback(struct file file, const char /__user/ buf, size_t size, loff_t *off, loff_t offFile,
697	struct inode inode, struct sf_inode_info sf_i,
698	struct vbsf_super_info sf_g, struct sf_reg_info sf_r)
699	{
700	/*
701	* Lock pages and execute the write, taking care not to pass the host
702	* more than it can handle in one go or more than we care to allocate
703	* page arrays for. The latter limit is set at just short of 32KB due
704	* to how the physical heap works.
705	*/
706	struct page *apPagesStack[16];
707	struct page **papPages = &apPagesStack[0];
708	struct page **papPagesFree = NULL;
709	VBOXSFWRITEPGLSTREQ *pReq;
710	ssize_t cbRet = -ENOMEM;
711	size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
712	size_t cMaxPages = RT_MIN(RT_MAX(sf_g->cMaxIoPages, 1), cPages);
713
714	pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
715	while (!pReq && cMaxPages > 4) {
716	cMaxPages /= 2;
717	pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
718	}
719	if (pReq && cPages > RT_ELEMENTS(apPagesStack))
720	papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
721	if (pReq && papPages) {
722	cbRet = 0;
723	for (;;) {
724	/*
725	* Figure out how much to process now and lock the user pages.
726	*/
727	int rc;
728	size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
729	pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
730	cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
731	if (cPages <= cMaxPages)
732	cbChunk = size;
733	else {
734	cPages = cMaxPages;
735	cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
736	}
737
738	rc = sf_lock_user_pages((uintptr_t)buf, cPages, false /fWrite/, papPages);
739	if (rc == 0) {
740	size_t iPage = cPages;
741	while (iPage-- > 0)
742	pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
743	} else {
744	cbRet = rc;
745	break;
746	}
747
748	/*
749	* Issue the request and unlock the pages.
750	*/
751	rc = VbglR0SfHostReqWritePgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
752
753	sf_unlock_user_pages(papPages, cPages, false /fSetDirty/);
754
755	if (RT_SUCCESS(rc)) {
756	/*
757	* Success, advance position and buffer.
758	*/
759	uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
760	AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
761	cbRet += cbActual;
762	offFile += cbActual;
763	buf = (uint8_t *)buf + cbActual;
764	size -= cbActual;
765	if (offFile > i_size_read(inode))
766	i_size_write(inode, offFile);
767	sf_reg_write_invalidate_mapping_range(inode->i_mapping, offFile - cbActual, offFile);
768
769	/*
770	* Are we done already? If so commit the new file offset.
771	*/
772	if (!size \|\| cbActual < cbChunk) {
773	*off = offFile;
774	break;
775	}
776	} else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
777	/*
778	* The host probably doesn't have enough heap to handle the
779	* request, reduce the page count and retry.
780	*/
781	cMaxPages /= 4;
782	Assert(cMaxPages > 0);
783	} else {
784	/*
785	* If we've successfully written stuff, return it rather than
786	* the error. (Not sure if this is such a great idea...)
787	*/
788	if (cbRet > 0)
789	*off = offFile;
790	else
791	cbRet = -EPROTO;
792	break;
793	}
794	sf_i->force_restat = 1; /* mtime (and size) may have changed */
795	}
796	}
797	if (papPagesFree)
798	kfree(papPages);
799	if (pReq)
800	VbglR0PhysHeapFree(pReq);
801	return cbRet;
802	}
803
804
805	/**
806	* Write to a regular file.
807	*
808	* @param file the file
809	* @param buf the buffer
810	* @param size length of the buffer
811	* @param off offset within the file
812	* @returns the number of written bytes on success, Linux error code otherwise
813	*/
814	static ssize_t sf_reg_write(struct file file, const char buf, size_t size,
815	loff_t * off)
816	{
817	struct inode *inode = GET_F_DENTRY(file)->d_inode;
818	struct sf_inode_info *sf_i = GET_INODE_INFO(inode);
819	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
820	struct sf_reg_info *sf_r = file->private_data;
821	struct address_space *mapping = inode->i_mapping;
822	loff_t pos;
823
824	SFLOGFLOW(("sf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
825	BUG_ON(!sf_i);
826	BUG_ON(!sf_g);
827	BUG_ON(!sf_r);
828
829	if (!S_ISREG(inode->i_mode)) {
830	LogFunc(("write to non regular file %d\n", inode->i_mode));
831	return -EINVAL;
832	}
833
834	pos = *off;
835	/** @todo This should be handled by the host, it returning the new file
836	* offset when appending. We may have an outdated i_size value here! */
837	if (file->f_flags & O_APPEND)
838	pos = i_size_read(inode);
839
840	/** @todo XXX Check write permission according to inode->i_mode! */
841
842	if (!size) {
843	if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
844	*off = pos;
845	return 0;
846	}
847
848	/*
849	* If there are active writable mappings, coordinate with any
850	* pending writes via those.
851	*/
852	if ( mapping
853	&& mapping->nrpages > 0
854	&& mapping_writably_mapped(mapping)) {
855	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32)
856	int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
857	if (err)
858	return err;
859	#else
860	/** @todo ... */
861	#endif
862	}
863
864	/*
865	* For small requests, try use an embedded buffer provided we get a heap block
866	* that does not cross page boundraries (see host code).
867	*/
868	if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
869	uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
870	VBOXSFWRITEEMBEDDEDREQ pReq = (VBOXSFWRITEEMBEDDEDREQ )VbglR0PhysHeapAlloc(cbReq);
871	if ( pReq
872	&& (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
873	ssize_t cbRet;
874	if (copy_from_user(pReq->abData, buf, size) == 0) {
875	int vrc = VbglR0SfHostReqWriteEmbedded(sf_g->map.root, pReq, sf_r->Handle.hHost,
876	pos, (uint32_t)size);
877	if (RT_SUCCESS(vrc)) {
878	cbRet = pReq->Parms.cb32Write.u.value32;
879	AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
880	pos += cbRet;
881	*off = pos;
882	if (pos > i_size_read(inode))
883	i_size_write(inode, pos);
884	sf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
885	} else
886	cbRet = -EPROTO;
887	sf_i->force_restat = 1; /* mtime (and size) may have changed */
888	} else
889	cbRet = -EFAULT;
890
891	VbglR0PhysHeapFree(pReq);
892	return cbRet;
893	}
894	if (pReq)
895	VbglR0PhysHeapFree(pReq);
896	}
897
898	#if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
899	/*
900	* For medium sized requests try use a bounce buffer.
901	*/
902	if (size <= _64K /** @todo make this configurable? */) {
903	void *pvBounce = kmalloc(size, GFP_KERNEL);
904	if (pvBounce) {
905	if (copy_from_user(pvBounce, buf, size) == 0) {
906	VBOXSFWRITEPGLSTREQ pReq = (VBOXSFWRITEPGLSTREQ )VbglR0PhysHeapAlloc(sizeof(*pReq));
907	if (pReq) {
908	ssize_t cbRet;
909	int vrc = VbglR0SfHostReqWriteContig(sf_g->map.root, pReq, sf_r->handle, pos,
910	(uint32_t)size, pvBounce, virt_to_phys(pvBounce));
911	if (RT_SUCCESS(vrc)) {
912	cbRet = pReq->Parms.cb32Write.u.value32;
913	AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
914	pos += cbRet;
915	*off = pos;
916	if (pos > i_size_read(inode))
917	i_size_write(inode, pos);
918	sf_reg_write_invalidate_mapping_range(mapping, pos - cbRet, pos);
919	} else
920	cbRet = -EPROTO;
921	sf_i->force_restat = 1; /* mtime (and size) may have changed */
922	VbglR0PhysHeapFree(pReq);
923	kfree(pvBounce);
924	return cbRet;
925	}
926	kfree(pvBounce);
927	} else {
928	kfree(pvBounce);
929	return -EFAULT;
930	}
931	}
932	}
933	#endif
934
935	return sf_reg_write_fallback(file, buf, size, off, pos, inode, sf_i, sf_g, sf_r);
936	}
937
938
939	/**
940	* Open a regular file.
941	*
942	* @param inode the inode
943	* @param file the file
944	* @returns 0 on success, Linux error code otherwise
945	*/
946	static int sf_reg_open(struct inode inode, struct file file)
947	{
948	int rc, rc_linux = 0;
949	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
950	struct sf_inode_info *sf_i = GET_INODE_INFO(inode);
951	struct sf_reg_info *sf_r;
952	#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
953	struct dentry *dentry = file_dentry(file);
954	#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20)
955	struct dentry *dentry = file->f_path.dentry;
956	#else
957	struct dentry *dentry = file->f_dentry;
958	#endif
959	VBOXSFCREATEREQ *pReq;
960	SHFLCREATEPARMS pCreateParms; / temp glue */
961
962	SFLOGFLOW(("sf_reg_open: inode=%p file=%p flags=%#x %s\n",
963	inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
964	BUG_ON(!sf_g);
965	BUG_ON(!sf_i);
966
967	sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
968	if (!sf_r) {
969	LogRelFunc(("could not allocate reg info\n"));
970	return -ENOMEM;
971	}
972
973	RTListInit(&sf_r->Handle.Entry);
974	sf_r->Handle.cRefs = 1;
975	sf_r->Handle.fFlags = SF_HANDLE_F_FILE \| SF_HANDLE_F_MAGIC;
976	sf_r->Handle.hHost = SHFL_HANDLE_NIL;
977
978	/* Already open? */
979	if (sf_i->handle != SHFL_HANDLE_NIL) {
980	/*
981	* This inode was created with sf_create_aux(). Check the CreateFlags:
982	* O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
983	* about the access flags (SHFL_CF_ACCESS_*).
984	*/
985	sf_i->force_restat = 1;
986	sf_r->Handle.hHost = sf_i->handle;
987	sf_i->handle = SHFL_HANDLE_NIL;
988	file->private_data = sf_r;
989
990	sf_r->Handle.fFlags \|= SF_HANDLE_F_READ \| SF_HANDLE_F_WRITE; /** @todo check */
991	sf_handle_append(sf_i, &sf_r->Handle);
992	SFLOGFLOW(("sf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
993	return 0;
994	}
995
996	pReq = (VBOXSFCREATEREQ )VbglR0PhysHeapAlloc(sizeof(pReq) + sf_i->path->u16Size);
997	if (!pReq) {
998	kfree(sf_r);
999	LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
1000	return -ENOMEM;
1001	}
1002	memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
1003	RT_ZERO(pReq->CreateParms);
1004	pCreateParms = &pReq->CreateParms;
1005	pCreateParms->Handle = SHFL_HANDLE_NIL;
1006
1007	/* We check the value of pCreateParms->Handle afterwards to find out if
1008	* the call succeeded or failed, as the API does not seem to cleanly
1009	* distinguish error and informational messages.
1010	*
1011	* Furthermore, we must set pCreateParms->Handle to SHFL_HANDLE_NIL to
1012	* make the shared folders host service use our fMode parameter */
1013
1014	if (file->f_flags & O_CREAT) {
1015	LogFunc(("O_CREAT set\n"));
1016	pCreateParms->CreateFlags \|= SHFL_CF_ACT_CREATE_IF_NEW;
1017	/* We ignore O_EXCL, as the Linux kernel seems to call create
1018	beforehand itself, so O_EXCL should always fail. */
1019	if (file->f_flags & O_TRUNC) {
1020	LogFunc(("O_TRUNC set\n"));
1021	pCreateParms->CreateFlags \|= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1022	} else
1023	pCreateParms->CreateFlags \|= SHFL_CF_ACT_OPEN_IF_EXISTS;
1024	} else {
1025	pCreateParms->CreateFlags \|= SHFL_CF_ACT_FAIL_IF_NEW;
1026	if (file->f_flags & O_TRUNC) {
1027	LogFunc(("O_TRUNC set\n"));
1028	pCreateParms->CreateFlags \|= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
1029	}
1030	}
1031
1032	switch (file->f_flags & O_ACCMODE) {
1033	case O_RDONLY:
1034	pCreateParms->CreateFlags \|= SHFL_CF_ACCESS_READ;
1035	sf_r->Handle.fFlags \|= SF_HANDLE_F_READ;
1036	break;
1037
1038	case O_WRONLY:
1039	pCreateParms->CreateFlags \|= SHFL_CF_ACCESS_WRITE;
1040	sf_r->Handle.fFlags \|= SF_HANDLE_F_WRITE;
1041	break;
1042
1043	case O_RDWR:
1044	pCreateParms->CreateFlags \|= SHFL_CF_ACCESS_READWRITE;
1045	sf_r->Handle.fFlags \|= SF_HANDLE_F_READ \| SF_HANDLE_F_WRITE;
1046	break;
1047
1048	default:
1049	BUG();
1050	}
1051
1052	if (file->f_flags & O_APPEND) {
1053	LogFunc(("O_APPEND set\n"));
1054	pCreateParms->CreateFlags \|= SHFL_CF_ACCESS_APPEND;
1055	sf_r->Handle.fFlags \|= SF_HANDLE_F_APPEND;
1056	}
1057
1058	pCreateParms->Info.Attr.fMode = inode->i_mode;
1059	LogFunc(("sf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n", sf_i->path->String.utf8, file->f_flags, pCreateParms->CreateFlags));
1060	rc = VbglR0SfHostReqCreate(sf_g->map.root, pReq);
1061	if (RT_FAILURE(rc)) {
1062	LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pCreateParms->CreateFlags, rc));
1063	kfree(sf_r);
1064	VbglR0PhysHeapFree(pReq);
1065	return -RTErrConvertToErrno(rc);
1066	}
1067
1068	if (pCreateParms->Handle != SHFL_HANDLE_NIL) {
1069	sf_dentry_chain_increase_ttl(dentry);
1070	rc_linux = 0;
1071	} else {
1072	switch (pCreateParms->Result) {
1073	case SHFL_PATH_NOT_FOUND:
1074	rc_linux = -ENOENT;
1075	break;
1076	case SHFL_FILE_NOT_FOUND:
1077	/** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
1078	rc_linux = -ENOENT;
1079	break;
1080	case SHFL_FILE_EXISTS:
1081	sf_dentry_chain_increase_ttl(dentry);
1082	rc_linux = -EEXIST;
1083	break;
1084	default:
1085	sf_dentry_chain_increase_parent_ttl(dentry);
1086	rc_linux = 0;
1087	break;
1088	}
1089	}
1090
1091	sf_i->force_restat = 1; /** @todo Why?!? */
1092	sf_r->Handle.hHost = pCreateParms->Handle;
1093	file->private_data = sf_r;
1094	sf_handle_append(sf_i, &sf_r->Handle);
1095	VbglR0PhysHeapFree(pReq);
1096	SFLOGFLOW(("sf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
1097	return rc_linux;
1098	}
1099
1100
1101	/**
1102	* Close a regular file.
1103	*
1104	* @param inode the inode
1105	* @param file the file
1106	* @returns 0 on success, Linux error code otherwise
1107	*/
1108	static int sf_reg_release(struct inode inode, struct file file)
1109	{
1110	struct sf_reg_info *sf_r;
1111	struct vbsf_super_info *sf_g;
1112	struct sf_inode_info *sf_i = GET_INODE_INFO(inode);
1113
1114	SFLOGFLOW(("sf_reg_release: inode=%p file=%p\n", inode, file));
1115	sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1116	sf_r = file->private_data;
1117
1118	BUG_ON(!sf_g);
1119	BUG_ON(!sf_r);
1120
1121	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25)
1122	/* See the smbfs source (file.c). mmap in particular can cause data to be
1123	* written to the file after it is closed, which we can't cope with. We
1124	* copy and paste the body of filemap_write_and_wait() here as it was not
1125	* defined before 2.6.6 and not exported until quite a bit later. */
1126	/* filemap_write_and_wait(inode->i_mapping); */
1127	if (inode->i_mapping->nrpages
1128	&& filemap_fdatawrite(inode->i_mapping) != -EIO)
1129	filemap_fdatawait(inode->i_mapping);
1130	#endif
1131
1132	/* Release sf_r, closing the handle if we're the last user. */
1133	file->private_data = NULL;
1134	sf_handle_release(&sf_r->Handle, sf_g, "sf_reg_release");
1135
1136	sf_i->handle = SHFL_HANDLE_NIL;
1137	return 0;
1138	}
1139
1140	/**
1141	* Wrapper around generic/default seek function that ensures that we've got
1142	* the up-to-date file size when doing anything relative to EOF.
1143	*
1144	* The issue is that the host may extend the file while we weren't looking and
1145	* if the caller wishes to append data, it may end up overwriting existing data
1146	* if we operate with a stale size. So, we always retrieve the file size on EOF
1147	* relative seeks.
1148	*/
1149	static loff_t sf_reg_llseek(struct file *file, loff_t off, int whence)
1150	{
1151	SFLOGFLOW(("sf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
1152
1153	switch (whence) {
1154	#ifdef SEEK_HOLE
1155	case SEEK_HOLE:
1156	case SEEK_DATA:
1157	#endif
1158	case SEEK_END: {
1159	struct sf_reg_info *sf_r = file->private_data;
1160	int rc = sf_inode_revalidate_with_handle(GET_F_DENTRY(file), sf_r->Handle.hHost, true /fForce/,
1161	false /fInodeLocked/);
1162	if (rc == 0)
1163	break;
1164	return rc;
1165	}
1166	}
1167
1168	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8)
1169	return generic_file_llseek(file, off, whence);
1170	#else
1171	return default_llseek(file, off, whence);
1172	#endif
1173	}
1174
1175	/**
1176	* Flush region of file - chiefly mmap/msync.
1177	*
1178	* We cannot use the noop_fsync / simple_sync_file here as that means
1179	* msync(,,MS_SYNC) will return before the data hits the host, thereby
1180	* causing coherency issues with O_DIRECT access to the same file as
1181	* well as any host interaction with the file.
1182	*/
1183	#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
1184	static int sf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1185	{
1186	# if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1187	return __generic_file_fsync(file, start, end, datasync);
1188	# else
1189	return generic_file_fsync(file, start, end, datasync);
1190	# endif
1191	}
1192	#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
1193	static int sf_reg_fsync(struct file *file, int datasync)
1194	{
1195	return generic_file_fsync(file, datasync);
1196	}
1197	#else /* < 2.6.35 */
1198	static int sf_reg_fsync(struct file file, struct dentry dentry, int datasync)
1199	{
1200	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
1201	return simple_fsync(file, dentry, datasync);
1202	# else
1203	int rc;
1204	struct inode *inode = dentry->d_inode;
1205	AssertReturn(inode, -EINVAL);
1206
1207	/** @todo What about file_fsync()? (<= 2.5.11) */
1208
1209	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1210	rc = sync_mapping_buffers(inode->i_mapping);
1211	if ( rc == 0
1212	&& (inode->i_state & I_DIRTY)
1213	&& ((inode->i_state & I_DIRTY_DATASYNC) \|\| !datasync)
1214	) {
1215	struct writeback_control wbc = {
1216	.sync_mode = WB_SYNC_ALL,
1217	.nr_to_write = 0
1218	};
1219	rc = sync_inode(inode, &wbc);
1220	}
1221	# else /* < 2.5.12 */
1222	rc = fsync_inode_buffers(inode);
1223	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1224	rc \|= fsync_inode_data_buffers(inode);
1225	# endif
1226	/** @todo probably need to do more here... */
1227	# endif /* < 2.5.12 */
1228	return rc;
1229	# endif
1230	}
1231	#endif /* < 2.6.35 */
1232
1233
1234	struct file_operations sf_reg_fops = {
1235	.read = sf_reg_read,
1236	.open = sf_reg_open,
1237	.write = sf_reg_write,
1238	.release = sf_reg_release,
1239	.mmap = generic_file_mmap,
1240	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1241	# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
1242	/** @todo This code is known to cause caching of data which should not be
1243	* cached. Investigate. */
1244	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
1245	.splice_read = sf_splice_read,
1246	# else
1247	.sendfile = generic_file_sendfile,
1248	# endif
1249	.aio_read = generic_file_aio_read,
1250	.aio_write = generic_file_aio_write,
1251	# endif
1252	#endif
1253	.llseek = sf_reg_llseek,
1254	.fsync = sf_reg_fsync,
1255	};
1256
1257	struct inode_operations sf_reg_iops = {
1258	#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
1259	.revalidate = sf_inode_revalidate
1260	#else
1261	.getattr = sf_getattr,
1262	.setattr = sf_setattr
1263	#endif
1264	};
1265
1266	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1267
1268	/**
1269	* Used to read the content of a page into the page cache.
1270	*
1271	* Needed for mmap and reads+writes when the file is mmapped in a
1272	* shared+writeable fashion.
1273	*/
1274	static int sf_readpage(struct file file, struct page page)
1275	{
1276	struct inode *inode = GET_F_DENTRY(file)->d_inode;
1277	int err;
1278
1279	SFLOGFLOW(("sf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
1280
1281	if (!is_bad_inode(inode)) {
1282	VBOXSFREADPGLSTREQ pReq = (VBOXSFREADPGLSTREQ )VbglR0PhysHeapAlloc(sizeof(*pReq));
1283	if (pReq) {
1284	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1285	struct sf_reg_info *sf_r = file->private_data;
1286	uint32_t cbRead;
1287	int vrc;
1288
1289	pReq->PgLst.offFirstPage = 0;
1290	pReq->PgLst.aPages[0] = page_to_phys(page);
1291	vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root,
1292	pReq,
1293	sf_r->Handle.hHost,
1294	(uint64_t)page->index << PAGE_SHIFT,
1295	PAGE_SIZE,
1296	1 /cPages/);
1297
1298	cbRead = pReq->Parms.cb32Read.u.value32;
1299	AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
1300	VbglR0PhysHeapFree(pReq);
1301
1302	if (RT_SUCCESS(vrc)) {
1303	if (cbRead == PAGE_SIZE) {
1304	/* likely */
1305	} else {
1306	uint8_t pbMapped = (uint8_t )kmap(page);
1307	RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
1308	kunmap(page);
1309	/** @todo truncate the inode file size? */
1310	}
1311
1312	flush_dcache_page(page);
1313	SetPageUptodate(page);
1314	err = 0;
1315	} else
1316	err = -EPROTO;
1317	} else
1318	err = -ENOMEM;
1319	} else
1320	err = -EIO;
1321	unlock_page(page);
1322	return err;
1323	}
1324
1325
1326	/**
1327	* Used to write out the content of a dirty page cache page to the host file.
1328	*
1329	* Needed for mmap and writes when the file is mmapped in a shared+writeable
1330	* fashion.
1331	*/
1332	static int sf_writepage(struct page page, struct writeback_control wbc)
1333	{
1334	struct address_space *mapping = page->mapping;
1335	struct inode *inode = mapping->host;
1336	struct sf_inode_info *sf_i = GET_INODE_INFO(inode);
1337	struct sf_handle *pHandle = sf_handle_find(sf_i, SF_HANDLE_F_WRITE, SF_HANDLE_F_APPEND);
1338	int err;
1339
1340	SFLOGFLOW(("sf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
1341	inode, page,(uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle->hHost));
1342
1343	if (pHandle) {
1344	struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
1345	VBOXSFWRITEPGLSTREQ pReq = (VBOXSFWRITEPGLSTREQ )VbglR0PhysHeapAlloc(sizeof(*pReq));
1346	if (pReq) {
1347	uint64_t const cbFile = i_size_read(inode);
1348	uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
1349	uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
1350	: (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
1351	int vrc;
1352
1353	pReq->PgLst.offFirstPage = 0;
1354	pReq->PgLst.aPages[0] = page_to_phys(page);
1355	vrc = VbglR0SfHostReqWritePgLst(sf_g->map.root,
1356	pReq,
1357	pHandle->hHost,
1358	offInFile,
1359	cbToWrite,
1360	1 /cPages/);
1361	AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite \|\| RT_FAILURE(vrc), /* lazy bird */
1362	("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
1363	vrc = VERR_WRITE_ERROR);
1364	VbglR0PhysHeapFree(pReq);
1365
1366	if (RT_SUCCESS(vrc)) {
1367	/* Update the inode if we've extended the file. */
1368	/** @todo is this necessary given the cbToWrite calc above? */
1369	uint64_t const offEndOfWrite = offInFile + cbToWrite;
1370	if ( offEndOfWrite > cbFile
1371	&& offEndOfWrite > i_size_read(inode))
1372	i_size_write(inode, offEndOfWrite);
1373
1374	if (PageError(page))
1375	ClearPageError(page);
1376
1377	err = 0;
1378	} else {
1379	ClearPageUptodate(page);
1380	err = -EPROTO;
1381	}
1382	} else
1383	err = -ENOMEM;
1384	sf_handle_release(pHandle, sf_g, "sf_writepage");
1385	} else {
1386	static uint64_t volatile s_cCalls = 0;
1387	if (s_cCalls++ < 16)
1388	printk("sf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
1389	err = -EPROTO;
1390	}
1391	unlock_page(page);
1392	return err;
1393	}
1394
1395	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1396	/**
1397	* Called when writing thru the page cache (which we shouldn't be doing).
1398	*/
1399	int sf_write_begin(struct file file, struct address_space mapping, loff_t pos,
1400	unsigned len, unsigned flags, struct page **pagep,
1401	void **fsdata)
1402	{
1403	/** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
1404	* the page cache for any writes AFAIK. We could just as well use
1405	* simple_write_begin & simple_write_end here if we think we really
1406	* need to have non-NULL function pointers in the table... */
1407	static uint64_t volatile s_cCalls = 0;
1408	if (s_cCalls++ < 16) {
1409	printk("vboxsf: Unexpected call to sf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1410	(unsigned long long)pos, len, flags);
1411	RTLogBackdoorPrintf("vboxsf: Unexpected call to sf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
1412	(unsigned long long)pos, len, flags);
1413	# ifdef WARN_ON
1414	WARN_ON(1);
1415	# endif
1416	}
1417	return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
1418	}
1419	# endif /* KERNEL_VERSION >= 2.6.24 */
1420
1421	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1422	/**
1423	* This is needed to make open accept O_DIRECT as well as dealing with direct
1424	* I/O requests if we don't intercept them earlier.
1425	*/
1426	# if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
1427	static ssize_t sf_direct_IO(struct kiocb iocb, struct iov_iter iter)
1428	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0)
1429	static ssize_t sf_direct_IO(struct kiocb iocb, struct iov_iter iter, loff_t offset)
1430	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0)
1431	static ssize_t sf_direct_IO(int rw, struct kiocb iocb, struct iov_iter iter, loff_t offset)
1432	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 6)
1433	static ssize_t sf_direct_IO(int rw, struct kiocb iocb, const struct iovec iov, loff_t offset, unsigned long nr_segs)
1434	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 55)
1435	static int sf_direct_IO(int rw, struct kiocb iocb, const struct iovec iov, loff_t offset, unsigned long nr_segs)
1436	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 41)
1437	static int sf_direct_IO(int rw, struct file file, const struct iovec iov, loff_t offset, unsigned long nr_segs)
1438	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 35)
1439	static int sf_direct_IO(int rw, struct inode inode, const struct iovec iov, loff_t offset, unsigned long nr_segs)
1440	# elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 26)
1441	static int sf_direct_IO(int rw, struct inode inode, char buf, loff_t offset, size_t count)
1442	# else
1443	static int sf_direct_IO(int rw, struct inode inode, struct kiobuf , unsigned long, int)
1444	# endif
1445	{
1446	TRACE();
1447	return -EINVAL;
1448	}
1449	# endif
1450
1451	struct address_space_operations sf_reg_aops = {
1452	.readpage = sf_readpage,
1453	.writepage = sf_writepage,
1454	/** @todo Need .writepages if we want msync performance... */
1455	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 12)
1456	.set_page_dirty = __set_page_dirty_buffers,
1457	# endif
1458	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24)
1459	.write_begin = sf_write_begin,
1460	.write_end = simple_write_end,
1461	# else
1462	.prepare_write = simple_prepare_write,
1463	.commit_write = simple_commit_write,
1464	# endif
1465	# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 10)
1466	.direct_IO = sf_direct_IO,
1467	# endif
1468	};
1469
1470	#endif /* LINUX_VERSION_CODE >= 2.6.0 */
1471

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Additions/linux/sharedfolders/regops.c@ 77526

Download in other formats: