VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c@ 4160

Last change on this file since 4160 was 4160, checked in by vboxsync, 17 years ago

Some bug fixes and allocation fallbacks.

  • Property svn:keywords set to Id Rev
File size: 35.9 KB
Line 
1/* $Revision: 4160 $ */
2/** @file
3 * innotek Portable Runtime - Ring-0 Memory Objects, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include "the-linux-kernel.h"
23
24#include <iprt/memobj.h>
25#include <iprt/alloc.h>
26#include <iprt/assert.h>
27#include <iprt/log.h>
28#include <iprt/string.h>
29#include <iprt/process.h>
30#include "internal/memobj.h"
31
32
33/*******************************************************************************
34* Structures and Typedefs *
35*******************************************************************************/
36/**
37 * The Darwin version of the memory object structure.
38 */
39typedef struct RTR0MEMOBJLNX
40{
41 /** The core structure. */
42 RTR0MEMOBJINTERNAL Core;
43 /** Set if the allocation is contiguous.
44 * This means it has to be given back as one chunk. */
45 bool fContiguous;
46 /** Set if we've vmap'ed thed memory into ring-0. */
47 bool fMappedToRing0;
48 /** The pages in the apPages array. */
49 size_t cPages;
50 /** Array of struct page pointers. (variable size) */
51 struct page *apPages[1];
52} RTR0MEMOBJLNX, *PRTR0MEMOBJLNX;
53
54
55/**
56 * Helper that converts from a RTR0PROCESS handle to a linux task.
57 *
58 * @returns The corresponding Linux task.
59 * @param R0Process IPRT ring-0 process handle.
60 */
61struct task_struct *rtR0ProcessToLinuxTask(RTR0PROCESS R0Process)
62{
63 /** @todo fix rtR0ProcessToLinuxTask!! */
64 return R0Process == RTR0ProcHandleSelf() ? current : NULL;
65}
66
67
68/**
69 * Compute order. Some functions allocate 2^order pages.
70 *
71 * @returns order.
72 * @param cPages Number of pages.
73 */
74static int rtR0MemObjLinuxOrder(size_t cPages)
75{
76 int iOrder;
77 size_t cTmp;
78
79 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
80 ;
81 if (cPages & ~((size_t)1 << iOrder))
82 ++iOrder;
83
84 return iOrder;
85}
86
87
88/**
89 * Converts from RTMEM_PROT_* to Linux PAGE_*.
90 *
91 * @returns Linux page protection constant.
92 * @param fProt The IPRT protection mask.
93 * @param fKernel Whether it applies to kernel or user space.
94 */
95static pgprot_t rtR0MemObjLinuxConvertProt(unsigned fProt, bool fKernel)
96{
97 switch (fProt)
98 {
99 default:
100 AssertMsgFailed(("%#x %d\n", fProt, fKernel));
101 case RTMEM_PROT_NONE:
102 return PAGE_NONE;
103
104 case RTMEM_PROT_READ:
105 return fKernel ? PAGE_KERNEL_RO : PAGE_READONLY;
106
107 case RTMEM_PROT_WRITE:
108 case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
109 return fKernel ? PAGE_KERNEL : PAGE_SHARED;
110
111 case RTMEM_PROT_EXEC:
112 case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
113#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
114 if (fKernel)
115 {
116 pgprot_t fPg = MY_PAGE_KERNEL_EXEC;
117 pgprot_val(fPg) &= ~_PAGE_RW;
118 return fPg;
119 }
120 return PAGE_READONLY_EXEC;
121#else
122 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_READONLY_EXEC;
123#endif
124
125 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
126 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_READ:
127 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_SHARED_EXEC;
128 }
129}
130
131
132/**
133 * Internal worker that allocates physical pages and creates the memory object for them.
134 *
135 * @returns IPRT status code.
136 * @param ppMemLnx Where to store the memory object pointer.
137 * @param enmType The object type.
138 * @param cb The number of bytes to allocate.
139 * @param fFlagsLnx The page allocation flags (GPFs).
140 * @param fContiguous Whether the allocation must be contiguous.
141 */
142static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb, unsigned fFlagsLnx, bool fContiguous)
143{
144 size_t iPage;
145 size_t cPages = cb >> PAGE_SHIFT;
146 struct page *paPages;
147
148 /*
149 * Allocate a memory object structure that's large enough to contain
150 * the page pointer array.
151 */
152 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), enmType, NULL, cb);
153 if (!pMemLnx)
154 return VERR_NO_MEMORY;
155 pMemLnx->cPages = cPages;
156
157 /*
158 * Allocate the pages.
159 * For small allocations we'll try contiguous first and then fall back on page by page.
160 */
161#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
162 if ( fContiguous
163 || cb <= PAGE_SIZE * 2)
164 {
165 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cb >> PAGE_SHIFT));
166 if (paPages)
167 {
168 fContiguous = true;
169 for (iPage = 0; iPage < cPages; iPage++)
170 pMemLnx->apPages[iPage] = &paPages[iPage];
171 }
172 else if (fContiguous)
173 {
174 rtR0MemObjDelete(&pMemLnx->Core);
175 return VERR_NO_MEMORY;
176 }
177 }
178
179 if (!fContiguous)
180 {
181 for (iPage = 0; iPage < cPages; iPage++)
182 {
183 pMemLnx->apPages[iPage] = alloc_page(fFlagsLnx);
184 if (RT_UNLIKELY(!pMemLnx->apPages[iPage]))
185 {
186 while (iPage-- > 0)
187 __free_page(pMemLnx->apPages[iPage]);
188 rtR0MemObjDelete(&pMemLnx->Core);
189 return VERR_NO_MEMORY;
190 }
191 }
192 }
193
194#else /* < 2.4.22 */
195 /** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
196 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cb >> PAGE_SHIFT));
197 if (!paPages)
198 {
199 rtR0MemObjDelete(&pMemLnx->Core);
200 return VERR_NO_MEMORY;
201 }
202 for (iPage = 0; iPage < cPages; iPage++)
203 {
204 pMemLnx->apPages[iPage] = &paPages[iPage];
205 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
206 MY_CHANGE_PAGE_ATTR(pMemLnx->apPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
207 if (PageHighMem(pMemLnx->apPages[iPage]))
208 BUG();
209 }
210
211 fContiguous = true;
212#endif /* < 2.4.22 */
213 pMemLnx->fContiguous = fContiguous;
214
215 /*
216 * Reserve the pages.
217 */
218 for (iPage = 0; iPage < cPages; iPage++)
219 SetPageReserved(pMemLnx->apPages[iPage]);
220
221 *ppMemLnx = pMemLnx;
222 return VINF_SUCCESS;
223}
224
225
226/**
227 * Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
228 *
229 * This method does NOT free the object.
230 *
231 * @param pMemLnx The object which physical pages should be freed.
232 */
233static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx)
234{
235 size_t iPage = pMemLnx->cPages;
236 if (iPage > 0)
237 {
238 /*
239 * Restore the page flags.
240 */
241 while (iPage-- > 0)
242 {
243 ClearPageReserved(pMemLnx->apPages[iPage]);
244#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
245#else
246 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
247 MY_CHANGE_PAGE_ATTR(pMemLnx->apPages[iPage], 1, PAGE_KERNEL);
248#endif
249 }
250
251 /*
252 * Free the pages.
253 */
254#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
255 if (!pMemLnx->fContiguous)
256 {
257 iPage = pMemLnx->cPages;
258 while (iPage-- > 0)
259 __free_page(pMemLnx->apPages[iPage]);
260 }
261 else
262#endif
263 __free_pages(pMemLnx->apPages[0], rtR0MemObjLinuxOrder(pMemLnx->cPages));
264
265 pMemLnx->cPages = 0;
266 }
267}
268
269
270/**
271 * Maps the allocation into ring-0.
272 *
273 * This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
274 *
275 * Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
276 * space, so we'll use that mapping if possible. If execute access is required, we'll
277 * play safe and do our own mapping.
278 *
279 * @returns IPRT status code.
280 * @param pMemLnx The linux memory object to map.
281 * @param fExecutable Whether execute access is required.
282 */
283static int rtR0MemObjLinuxVMap(PRTR0MEMOBJLNX pMemLnx, bool fExecutable)
284{
285 int rc = VINF_SUCCESS;
286
287 /*
288 * Choose mapping strategy.
289 */
290 bool fMustMap = fExecutable
291 || !pMemLnx->fContiguous;
292 if (!fMustMap)
293 {
294 size_t iPage = pMemLnx->cPages;
295 while (iPage-- > 0)
296 if (PageHighMem(pMemLnx->apPages[iPage]))
297 {
298 fMustMap = true;
299 break;
300 }
301 }
302
303 Assert(!pMemLnx->Core.pv);
304 Assert(!pMemLnx->fMappedToRing0);
305
306 if (fMustMap)
307 {
308 /*
309 * Use vmap - 2.4.22 and later.
310 */
311#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
312 pgprot_t fPg;
313 pgprot_val(fPg) = _PAGE_PRESENT | _PAGE_RW;
314# ifdef _PAGE_NX
315 if (!fExecutable)
316 pgprot_val(fPg) |= _PAGE_NX;
317# endif
318
319# ifdef VM_MAP
320 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg);
321# else
322 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg);
323# endif
324 if (pMemLnx->Core.pv)
325 pMemLnx->fMappedToRing0 = true;
326 else
327 rc = VERR_MAP_FAILED;
328#else /* < 2.4.22 */
329 rc = VERR_NOT_SUPPORTED;
330#endif
331 }
332 else
333 {
334 /*
335 * Use the kernel RAM mapping.
336 */
337 pMemLnx->Core.pv = phys_to_virt(page_to_phys(pMemLnx->apPages[0]));
338 Assert(pMemLnx->Core.pv);
339 }
340
341 return rc;
342}
343
344
345/**
346 * Undos what rtR0MemObjLinuxVMap() did.
347 *
348 * @param pMemLnx The linux memory object.
349 */
350static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx)
351{
352#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
353 if (pMemLnx->fMappedToRing0)
354 {
355 Assert(pMemLnx->Core.pv);
356 vunmap(pMemLnx->Core.pv);
357 pMemLnx->fMappedToRing0 = false;
358 }
359#else /* < 2.4.22 */
360 Assert(!pMemLnx->fMappedToRing0);
361#endif
362 pMemLnx->Core.pv = NULL;
363}
364
365
366int rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
367{
368 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
369
370 /*
371 * Release any memory that we've allocated or locked.
372 */
373 switch (pMemLnx->Core.enmType)
374 {
375 case RTR0MEMOBJTYPE_LOW:
376 case RTR0MEMOBJTYPE_PAGE:
377 case RTR0MEMOBJTYPE_CONT:
378 case RTR0MEMOBJTYPE_PHYS:
379 rtR0MemObjLinuxVUnmap(pMemLnx);
380 rtR0MemObjLinuxFreePages(pMemLnx);
381 break;
382
383 case RTR0MEMOBJTYPE_LOCK:
384 if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS)
385 {
386 size_t iPage;
387 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
388 Assert(pTask);
389 if (pTask && pTask->mm)
390 down_read(&pTask->mm->mmap_sem);
391
392 iPage = pMemLnx->cPages;
393 while (iPage-- > 0)
394 {
395 if (!PageReserved(pMemLnx->apPages[iPage]))
396 SetPageDirty(pMemLnx->apPages[iPage]);
397 page_cache_release(pMemLnx->apPages[iPage]);
398 }
399
400 if (pTask && pTask->mm)
401 up_read(&pTask->mm->mmap_sem);
402 }
403 else
404 AssertFailed(); /* not implemented for R0 */
405 break;
406
407 case RTR0MEMOBJTYPE_RES_VIRT:
408 Assert(pMemLnx->Core.pv);
409 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
410 {
411 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
412 Assert(pTask);
413 if (pTask && pTask->mm)
414 {
415 down_write(&pTask->mm->mmap_sem);
416 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
417 up_write(&pTask->mm->mmap_sem);
418 }
419 }
420 else
421 {
422 vunmap(pMemLnx->Core.pv);
423
424 Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL);
425 __free_page(pMemLnx->apPages[0]);
426 pMemLnx->apPages[0] = NULL;
427 pMemLnx->cPages = 0;
428 }
429 pMemLnx->Core.pv = NULL;
430 break;
431
432 case RTR0MEMOBJTYPE_MAPPING:
433 Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv);
434 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
435 {
436 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
437 Assert(pTask);
438 if (pTask && pTask->mm)
439 {
440 down_write(&pTask->mm->mmap_sem);
441 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
442 up_write(&pTask->mm->mmap_sem);
443 }
444 }
445 else
446 vunmap(pMemLnx->Core.pv);
447 pMemLnx->Core.pv = NULL;
448 break;
449
450 default:
451 AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType));
452 return VERR_INTERNAL_ERROR;
453 }
454 return VINF_SUCCESS;
455}
456
457
458int rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
459{
460 PRTR0MEMOBJLNX pMemLnx;
461 int rc;
462
463#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
464 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, GFP_HIGHUSER, false /* non-contiguous */);
465#else
466 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, GFP_USER, false /* non-contiguous */);
467#endif
468 if (RT_SUCCESS(rc))
469 {
470 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
471 if (RT_SUCCESS(rc))
472 {
473 *ppMem = &pMemLnx->Core;
474 return rc;
475 }
476
477 rtR0MemObjLinuxFreePages(pMemLnx);
478 rtR0MemObjDelete(&pMemLnx->Core);
479 }
480
481 return rc;
482}
483
484
485int rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
486{
487 PRTR0MEMOBJLNX pMemLnx;
488 int rc;
489
490#ifdef RT_ARCH_AMD64
491# ifdef GFP_DMA32
492 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_DMA32, false /* non-contiguous */);
493 if (RT_FAILURE(rc))
494# endif
495 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_DMA, false /* non-contiguous */);
496#else
497 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_USER, false /* non-contiguous */);
498#endif
499 if (RT_SUCCESS(rc))
500 {
501 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
502 if (RT_SUCCESS(rc))
503 {
504 *ppMem = &pMemLnx->Core;
505 return rc;
506 }
507
508 rtR0MemObjLinuxFreePages(pMemLnx);
509 rtR0MemObjDelete(&pMemLnx->Core);
510 }
511
512 return rc;
513}
514
515
516int rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
517{
518 PRTR0MEMOBJLNX pMemLnx;
519 int rc;
520
521#ifdef RT_ARCH_AMD64
522# ifdef GFP_DMA32
523 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_DMA32, true /* contiguous */);
524 if (RT_FAILURE(rc))
525# endif
526 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_DMA, true /* contiguous */);
527#else
528 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_USER, true /* contiguous */);
529#endif
530 if (RT_SUCCESS(rc))
531 {
532 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
533 if (RT_SUCCESS(rc))
534 {
535#ifdef RT_STRICT
536 size_t iPage = pMemLnx->cPages;
537 while (iPage-- > 0)
538 Assert(page_to_phys(pMemLnx->apPages[iPage]) < _4G);
539#endif
540 pMemLnx->Core.u.Cont.Phys = page_to_phys(pMemLnx->apPages[0]);
541 *ppMem = &pMemLnx->Core;
542 return rc;
543 }
544
545 rtR0MemObjLinuxFreePages(pMemLnx);
546 rtR0MemObjDelete(&pMemLnx->Core);
547 }
548
549 return rc;
550}
551
552
553/**
554 * Worker for rtR0MemObjLinuxAllocPhysSub that tries one allocation strategy.
555 *
556 * @returns IPRT status.
557 * @param ppMemLnx Where to
558 * @param enmType The object type.
559 * @param cb The size of the allocation.
560 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
561 * @param fGfp The Linux GFP flags to use for the allocation.
562 */
563static int rtR0MemObjLinuxAllocPhysSub2(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType, size_t cb, RTHCPHYS PhysHighest, unsigned fGfp)
564{
565 PRTR0MEMOBJLNX pMemLnx;
566 int rc;
567
568 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, enmType, cb, fGfp,
569 enmType == RTR0MEMOBJTYPE_PHYS /* contiguous / non-contiguous */);
570 if (RT_FAILURE(rc))
571 return rc;
572
573 /*
574 * Check the addresses if necessary. (Can be optimized a bit for PHYS.)
575 */
576 if (PhysHighest != NIL_RTHCPHYS)
577 {
578 size_t iPage = pMemLnx->cPages;
579 while (iPage-- > 0)
580 if (page_to_phys(pMemLnx->apPages[iPage]) >= PhysHighest)
581 {
582 rtR0MemObjLinuxFreePages(pMemLnx);
583 rtR0MemObjDelete(&pMemLnx->Core);
584 return VERR_NO_MEMORY;
585 }
586 }
587
588 /*
589 * Complete the object.
590 */
591 if (enmType == RTR0MEMOBJTYPE_PHYS)
592 {
593 pMemLnx->Core.u.Phys.PhysBase = page_to_phys(pMemLnx->apPages[0]);
594 pMemLnx->Core.u.Phys.fAllocated = true;
595 }
596 *ppMem = &pMemLnx->Core;
597 return rc;
598}
599
600
601/**
602 * Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
603 *
604 * @returns IPRT status.
605 * @param ppMem Where to store the memory object pointer on success.
606 * @param enmType The object type.
607 * @param cb The size of the allocation.
608 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
609 */
610static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType, size_t cb, RTHCPHYS PhysHighest)
611{
612 int rc;
613
614 /*
615 * There are two clear cases and that's the <=16MB and anything-goes ones.
616 * When the physical address limit is somewhere inbetween those two we'll
617 * just have to try, starting with HIGHUSER and working our way thru the
618 * different types, hoping we'll get lucky.
619 *
620 * We should probably move this physical address restriction logic up to
621 * the page alloc function as it would be more efficient there. But since
622 * we don't expect this to be a performance issue just yet it can wait.
623 */
624 if (PhysHighest == NIL_RTHCPHYS)
625 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_HIGHUSER);
626 else if (PhysHighest <= _1M * 16)
627 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_DMA);
628 else
629 {
630 rc = VERR_NO_MEMORY;
631 if (RT_FAILURE(rc))
632 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_HIGHUSER);
633 if (RT_FAILURE(rc))
634 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_USER);
635#ifdef GFP_DMA32
636 if (RT_FAILURE(rc))
637 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_DMA32);
638#endif
639 if (RT_FAILURE(rc))
640 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, PhysHighest, GFP_DMA);
641 }
642 return rc;
643}
644
645
646int rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
647{
648 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, PhysHighest);
649}
650
651
652int rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
653{
654 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PhysHighest);
655}
656
657
658int rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb)
659{
660 /*
661 * All we need to do here is to validate that we can use
662 * ioremap on the specified address (32/64-bit dma_addr_t).
663 */
664 PRTR0MEMOBJLNX pMemLnx;
665 dma_addr_t PhysAddr = Phys;
666 AssertMsgReturn(PhysAddr == Phys, ("%#llx\n", (unsigned long long)Phys), VERR_ADDRESS_TOO_BIG);
667
668 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_PHYS, NULL, cb);
669 if (!pMemLnx)
670 return VERR_NO_MEMORY;
671
672 pMemLnx->Core.u.Phys.PhysBase = PhysAddr;
673 pMemLnx->Core.u.Phys.fAllocated = false;
674 Assert(!pMemLnx->cPages);
675 *ppMem = &pMemLnx->Core;
676 return VINF_SUCCESS;
677}
678
679
680int rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, RTR0PROCESS R0Process)
681{
682 const int cPages = cb >> PAGE_SHIFT;
683 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
684 struct vm_area_struct **papVMAs;
685 PRTR0MEMOBJLNX pMemLnx;
686 int rc;
687
688 /*
689 * Check for valid task and size overflows.
690 */
691 if (!pTask)
692 return VERR_NOT_SUPPORTED;
693 if (((size_t)cPages << PAGE_SHIFT) != cb)
694 return VERR_OUT_OF_RANGE;
695
696 /*
697 * Allocate the memory object and a temporary buffer for the VMAs.
698 */
699 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
700 if (!pMemLnx)
701 return VERR_NO_MEMORY;
702
703 papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
704 if (papVMAs)
705 {
706 down_read(&pTask->mm->mmap_sem);
707
708 /*
709 * Get user pages.
710 */
711 rc = get_user_pages(pTask, /* Task for fault acounting. */
712 pTask->mm, /* Whose pages. */
713 R3Ptr, /* Where from. */
714 cPages, /* How many pages. */
715 1, /* Write to memory. */
716 0, /* force. */
717 &pMemLnx->apPages[0], /* Page array. */
718 papVMAs); /* vmas */
719 if (rc == cPages)
720 {
721 /*
722 * Flush dcache (required?) and protect against fork.
723 */
724 /** @todo The Linux fork() protection will require more work if this API
725 * is to be used for anything but locking VM pages. */
726 while (rc-- > 0)
727 {
728 flush_dcache_page(pMemLnx->apPages[rc]);
729 papVMAs[rc]->vm_flags |= VM_DONTCOPY;
730 }
731
732 up_read(&pTask->mm->mmap_sem);
733
734 RTMemFree(papVMAs);
735
736 pMemLnx->Core.u.Lock.R0Process = R0Process;
737 pMemLnx->cPages = cPages;
738 Assert(!pMemLnx->fMappedToRing0);
739 *ppMem = &pMemLnx->Core;
740
741 return VINF_SUCCESS;
742 }
743
744 /*
745 * Failed - we need to unlock any pages that we succeeded to lock.
746 */
747 while (rc-- > 0)
748 {
749 if (!PageReserved(pMemLnx->apPages[rc]))
750 SetPageDirty(pMemLnx->apPages[rc]);
751 page_cache_release(pMemLnx->apPages[rc]);
752 }
753
754 up_read(&pTask->mm->mmap_sem);
755
756 RTMemFree(papVMAs);
757 rc = VERR_LOCK_FAILED;
758 }
759
760 rtR0MemObjDelete(&pMemLnx->Core);
761 return rc;
762}
763
764
765int rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb)
766{
767 /* What is there to lock? Should/Can we fake this? */
768 return VERR_NOT_SUPPORTED;
769}
770
771
772int rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
773{
774#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
775 const size_t cPages = cb >> PAGE_SHIFT;
776 struct page *pDummyPage;
777 struct page **papPages;
778
779 /* check for unsupported stuff. */
780 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
781 AssertMsgReturn(uAlignment <= PAGE_SIZE, ("%#x\n", uAlignment), VERR_NOT_SUPPORTED);
782
783 /*
784 * Allocate a dummy page and create a page pointer array for vmap such that
785 * the dummy page is mapped all over the reserved area.
786 */
787 pDummyPage = alloc_page(GFP_HIGHUSER);
788 if (!pDummyPage)
789 return VERR_NO_MEMORY;
790 papPages = RTMemAlloc(sizeof(*papPages) * cPages);
791 if (papPages)
792 {
793 void *pv;
794 size_t iPage = cPages;
795 while (iPage-- > 0)
796 papPages[iPage] = pDummyPage;
797# ifdef VM_MAP
798 pv = vmap(papPages, cPages, VM_MAP, PAGE_KERNEL_RO);
799# else
800 pv = vmap(papPages, cPages, VM_ALLOC, PAGE_KERNEL_RO);
801# endif
802 RTMemFree(papPages);
803 if (pv)
804 {
805 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
806 if (pMemLnx)
807 {
808 pMemLnx->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
809 pMemLnx->cPages = 1;
810 pMemLnx->apPages[0] = pDummyPage;
811 *ppMem = &pMemLnx->Core;
812 return VINF_SUCCESS;
813 }
814 vunmap(pv);
815 }
816 }
817 __free_page(pDummyPage);
818 return VERR_NO_MEMORY;
819
820#else /* < 2.4.22 */
821 /*
822 * Could probably use ioremap here, but the caller is in a better position than us
823 * to select some safe physical memory.
824 */
825 return VERR_NOT_SUPPORTED;
826#endif
827}
828
829
830/**
831 * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
832 * an empty user space mapping.
833 *
834 * The caller takes care of acquiring the mmap_sem of the task.
835 *
836 * @returns Pointer to the mapping.
837 * (void *)-1 on failure.
838 * @param R3PtrFixed (RTR3PTR)-1 if anywhere, otherwise a specific location.
839 * @param cb The size of the mapping.
840 * @param uAlignment The alignment of the mapping.
841 * @param pTask The Linux task to create this mapping in.
842 * @param fProt The RTMEM_PROT_* mask.
843 */
844static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
845{
846 unsigned fLnxProt;
847 unsigned long ulAddr;
848
849 /*
850 * Convert from IPRT protection to mman.h PROT_ and call do_mmap.
851 */
852 fProt &= (RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC);
853 if (fProt == RTMEM_PROT_NONE)
854 fLnxProt = PROT_NONE;
855 else
856 {
857 fLnxProt = 0;
858 if (fProt & RTMEM_PROT_READ)
859 fLnxProt |= PROT_READ;
860 if (fProt & RTMEM_PROT_WRITE)
861 fLnxProt |= PROT_WRITE;
862 if (fProt & RTMEM_PROT_EXEC)
863 fLnxProt |= PROT_EXEC;
864 }
865
866 if (R3PtrFixed != (RTR3PTR)-1)
867 ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
868 else
869 {
870 ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
871 if ( !(ulAddr & ~PAGE_MASK)
872 && (ulAddr & (uAlignment - 1)))
873 {
874 /** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
875 * up alignment gaps. This is of course complicated by fragmentation (which we might have cause
876 * ourselves) and further by there begin two mmap strategies (top / bottom). */
877 /* For now, just ignore uAlignment requirements... */
878 }
879 }
880 if (ulAddr & ~PAGE_MASK) /* ~PAGE_MASK == PAGE_OFFSET_MASK */
881 return (void *)-1;
882 return (void *)ulAddr;
883}
884
885
886int rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
887{
888 PRTR0MEMOBJLNX pMemLnx;
889 void *pv;
890 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
891 if (!pTask)
892 return VERR_NOT_SUPPORTED;
893
894 /*
895 * Let rtR0MemObjLinuxDoMmap do the difficult bits.
896 */
897 down_write(&pTask->mm->mmap_sem);
898 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cb, uAlignment, pTask, RTMEM_PROT_NONE);
899 up_write(&pTask->mm->mmap_sem);
900 if (pv == (void *)-1)
901 return VERR_NO_MEMORY;
902
903 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
904 if (!pMemLnx)
905 {
906 down_write(&pTask->mm->mmap_sem);
907 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, cb);
908 up_write(&pTask->mm->mmap_sem);
909 return VERR_NO_MEMORY;
910 }
911
912 pMemLnx->Core.u.ResVirt.R0Process = R0Process;
913 *ppMem = &pMemLnx->Core;
914 return VINF_SUCCESS;
915}
916
917
918int rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment, unsigned fProt)
919{
920 int rc = VERR_NO_MEMORY;
921 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
922 PRTR0MEMOBJLNX pMemLnx;
923
924 /* Fail if requested to do something we can't. */
925 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
926 AssertMsgReturn(uAlignment <= PAGE_SIZE, ("%#x\n", uAlignment), VERR_NOT_SUPPORTED);
927
928 /*
929 * Create the IPRT memory object.
930 */
931 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
932 if (pMemLnx)
933 {
934 if (pMemLnxToMap->cPages)
935 {
936#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
937 /*
938 * Use vmap - 2.4.22 and later.
939 */
940 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */);
941# ifdef VM_MAP
942 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_MAP, fPg);
943# else
944 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_ALLOC, fPg);
945# endif
946 if (pMemLnx->Core.pv)
947 {
948 pMemLnx->fMappedToRing0 = true;
949 rc = VINF_SUCCESS;
950 }
951 else
952 rc = VERR_MAP_FAILED;
953
954#else /* < 2.4.22 */
955 /*
956 * Only option here is to share mappings if possible and forget about fProt.
957 */
958 if (rtR0MemObjIsRing3(pMemToMap))
959 rc = VERR_NOT_SUPPORTED;
960 else
961 {
962 rc = VINF_SUCCESS;
963 if (!pMemLnxToMap->Core.pv)
964 rc = rtR0MemObjLinuxVMap(pMemLnxToMap, !!(fProt & RTMEM_PROT_EXEC));
965 if (RT_SUCCESS(rc))
966 {
967 Assert(pMemLnxToMap->Core.pv);
968 pMemLnx->Core.pv = pMemLnxToMap->Core.pv;
969 }
970 }
971#endif
972 }
973 else
974 {
975 /*
976 * MMIO / physical memory.
977 */
978 Assert(pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS && !pMemLnxToMap->Core.u.Phys.fAllocated);
979 pMemLnx->Core.pv = ioremap(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb);
980 if (pMemLnx->Core.pv)
981 {
982 /** @todo fix protection. */
983 rc = VINF_SUCCESS;
984 }
985 }
986 if (RT_SUCCESS(rc))
987 {
988 pMemLnx->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
989 *ppMem = &pMemLnx->Core;
990 return VINF_SUCCESS;
991 }
992 rtR0MemObjDelete(&pMemLnx->Core);
993 }
994
995 return rc;
996}
997
998
999int rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed, size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
1000{
1001 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1002 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1003 int rc = VERR_NO_MEMORY;
1004 PRTR0MEMOBJLNX pMemLnx;
1005
1006 /*
1007 * Check for restrictions.
1008 */
1009 if (!pTask)
1010 return VERR_NOT_SUPPORTED;
1011
1012 /*
1013 * Create the IPRT memory object.
1014 */
1015 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1016 if (pMemLnx)
1017 {
1018 /*
1019 * Allocate user space mapping.
1020 */
1021 void *pv;
1022 down_write(&pTask->mm->mmap_sem);
1023 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, pMemLnxToMap->Core.cb, uAlignment, pTask, fProt);
1024 if (pv != (void *)-1)
1025 {
1026 /*
1027 * Map page by page into the mmap area.
1028 * This is generic, paranoid and not very efficient.
1029 */
1030 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, false /* user */);
1031 unsigned long ulAddrCur = (unsigned long)pv;
1032 const size_t cPages = pMemLnxToMap->Core.cb >> PAGE_SHIFT;
1033 size_t iPage;
1034 rc = 0;
1035 if (pMemLnxToMap->cPages)
1036 {
1037 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1038 {
1039#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1040 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1041 AssertBreak(vma, rc = VERR_INTERNAL_ERROR);
1042#endif
1043
1044#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1045 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1046#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1047 rc = remap_page_range(vma, ulAddrCur, page_to_phys(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1048#else /* 2.4 */
1049 rc = remap_page_range(ulAddrCur, page_to_phys(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1050#endif
1051 if (rc)
1052 break;
1053 }
1054 }
1055 else
1056 {
1057 RTHCPHYS Phys;
1058 if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS)
1059 Phys = pMemLnxToMap->Core.u.Phys.PhysBase;
1060 else if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_CONT)
1061 Phys = pMemLnxToMap->Core.u.Cont.Phys;
1062 else
1063 {
1064 AssertMsgFailed(("%d\n", pMemLnxToMap->Core.enmType));
1065 Phys = NIL_RTHCPHYS;
1066 }
1067 if (Phys != NIL_RTHCPHYS)
1068 {
1069 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE, Phys += PAGE_SIZE)
1070 {
1071#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1072 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1073 AssertBreak(vma, rc = VERR_INTERNAL_ERROR);
1074#endif
1075
1076#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1077 rc = remap_pfn_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1078#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1079 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1080#else /* 2.4 */
1081 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1082#endif
1083 if (rc)
1084 break;
1085 }
1086 }
1087 }
1088 if (!rc)
1089 {
1090 up_write(&pTask->mm->mmap_sem);
1091
1092 pMemLnx->Core.pv = pv;
1093 pMemLnx->Core.u.Mapping.R0Process = R0Process;
1094 *ppMem = &pMemLnx->Core;
1095 return VINF_SUCCESS;
1096 }
1097
1098 /*
1099 * Bail out.
1100 */
1101 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, pMemLnxToMap->Core.cb);
1102 if (rc != VERR_INTERNAL_ERROR)
1103 rc = VERR_NO_MEMORY;
1104 }
1105
1106 up_write(&pTask->mm->mmap_sem);
1107
1108 rtR0MemObjDelete(&pMemLnx->Core);
1109 }
1110
1111 return rc;
1112}
1113
1114
1115RTHCPHYS rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
1116{
1117 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
1118
1119 if (pMemLnx->cPages)
1120 return page_to_phys(pMemLnx->apPages[iPage]);
1121
1122 switch (pMemLnx->Core.enmType)
1123 {
1124 case RTR0MEMOBJTYPE_CONT:
1125 return pMemLnx->Core.u.Cont.Phys + (iPage << PAGE_SHIFT);
1126
1127 case RTR0MEMOBJTYPE_PHYS:
1128 return pMemLnx->Core.u.Phys.PhysBase + (iPage << PAGE_SHIFT);
1129
1130 /* the parent knows */
1131 case RTR0MEMOBJTYPE_MAPPING:
1132 return rtR0MemObjNativeGetPagePhysAddr(pMemLnx->Core.uRel.Child.pParent, iPage);
1133
1134 /* cPages > 0 */
1135 case RTR0MEMOBJTYPE_LOW:
1136 case RTR0MEMOBJTYPE_LOCK:
1137 case RTR0MEMOBJTYPE_PHYS_NC:
1138 case RTR0MEMOBJTYPE_PAGE:
1139 default:
1140 AssertMsgFailed(("%d\n", pMemLnx->Core.enmType));
1141 /* fall thru */
1142
1143 case RTR0MEMOBJTYPE_RES_VIRT:
1144 return NIL_RTHCPHYS;
1145 }
1146}
1147
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette