VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c@ 4155

Last change on this file since 4155 was 4155, checked in by vboxsync, 17 years ago

RTR0MemGetAddressR3 & RTR0MemObjLockUser. Linux memobj impl.

  • Property svn:keywords set to Id
File size: 35.0 KB
Line 
1/* $Id: memobj-r0drv-linux.c 4155 2007-08-15 19:41:26Z vboxsync $ */
2/** @file
3 * innotek Portable Runtime - Ring-0 Memory Objects, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include "the-linux-kernel.h"
23
24#include <iprt/memobj.h>
25#include <iprt/alloc.h>
26#include <iprt/assert.h>
27#include <iprt/log.h>
28//#include <iprt/param.h>
29#include <iprt/string.h>
30#include <iprt/process.h>
31#include "internal/memobj.h"
32
33
34/*******************************************************************************
35* Structures and Typedefs *
36*******************************************************************************/
37/**
38 * The Darwin version of the memory object structure.
39 */
40typedef struct RTR0MEMOBJLNX
41{
42 /** The core structure. */
43 RTR0MEMOBJINTERNAL Core;
44 /** Set if the allocation is contiguous.
45 * This means it has to be given back as one chunk. */
46 bool fContiguous;
47 /** Set if we've vmap'ed thed memory into ring-0. */
48 bool fMappedToRing0;
49 /** The pages in the apPages array. */
50 size_t cPages;
51 /** Array of struct page pointers. (variable size) */
52 struct page *apPages[1];
53} RTR0MEMOBJLNX, *PRTR0MEMOBJLNX;
54
55
56/**
57 * Helper that converts from a RTR0PROCESS handle to a linux task.
58 *
59 * @returns The corresponding Linux task.
60 * @param R0Process IPRT ring-0 process handle.
61 */
62struct task_struct *rtR0ProcessToLinuxTask(RTR0PROCESS R0Process)
63{
64 /** @todo fix rtR0ProcessToLinuxTask!! */
65 return R0Process == RTR0ProcHandleSelf() ? current : NULL;
66}
67
68
69/**
70 * Compute order. Some functions allocate 2^order pages.
71 *
72 * @returns order.
73 * @param cPages Number of pages.
74 */
75static int rtR0MemObjLinuxOrder(size_t cPages)
76{
77 int iOrder;
78 size_t cTmp;
79
80 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
81 ;
82 if (cPages & ~((size_t)1 << iOrder))
83 ++iOrder;
84
85 return iOrder;
86}
87
88
89/**
90 * Converts from RTMEM_PROT_* to Linux PAGE_*.
91 *
92 * @returns Linux page protection constant.
93 * @param fProt The IPRT protection mask.
94 * @param fKernel Whether it applies to kernel or user space.
95 */
96static pgprot_t rtR0MemObjLinuxConvertProt(unsigned fProt, bool fKernel)
97{
98 switch (fProt)
99 {
100 default:
101 AssertMsgFailed(("%#x %d\n", fProt, fKernel));
102 case RTMEM_PROT_NONE:
103 return PAGE_NONE;
104
105 case RTMEM_PROT_READ:
106 return fKernel ? PAGE_KERNEL_RO : PAGE_READONLY;
107
108 case RTMEM_PROT_WRITE:
109 case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
110 return fKernel ? PAGE_KERNEL : PAGE_SHARED;
111
112 case RTMEM_PROT_EXEC:
113 case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
114#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
115 if (fKernel)
116 {
117 pgprot_t fPg = MY_PAGE_KERNEL_EXEC;
118 pgprot_val(fPg) &= ~_PAGE_RW;
119 return fPg;
120 }
121 return PAGE_READONLY_EXEC;
122#else
123 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_READONLY_EXEC;
124#endif
125
126 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
127 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_READ:
128 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_SHARED_EXEC;
129 }
130}
131
132
133/**
134 * Internal worker that allocates physical pages and creates the memory object for them.
135 *
136 * @returns IPRT status code.
137 * @param ppMemLnx Where to store the memory object pointer.
138 * @param enmType The object type.
139 * @param cb The number of bytes to allocate.
140 * @param fFlagsLnx The page allocation flags (GPFs).
141 * @param fContiguous Whether the allocation must be contiguous.
142 */
143static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb, unsigned fFlagsLnx, bool fContiguous)
144{
145 size_t iPage;
146 size_t cPages = cb >> PAGE_SHIFT;
147 struct page *paPages;
148
149 /*
150 * Allocate a memory object structure that's large enough to contain
151 * the page pointer array.
152 */
153 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), enmType, NULL, cb);
154 if (!pMemLnx)
155 return VERR_NO_MEMORY;
156 pMemLnx->cPages = cPages;
157
158 /*
159 * Allocate the pages.
160 * For small allocations we'll try contiguous first and then fall back on page by page.
161 */
162#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
163 if ( fContiguous
164 || cb <= PAGE_SIZE * 2)
165 {
166 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cb));
167 if (paPages)
168 {
169 fContiguous = true;
170 for (iPage = 0; iPage < cPages; iPage++)
171 pMemLnx->apPages[iPage] = &paPages[iPage];
172 }
173 else if (fContiguous)
174 {
175 rtR0MemObjDelete(&pMemLnx->Core);
176 return VERR_NO_MEMORY;
177 }
178 }
179
180 if (!fContiguous)
181 {
182 for (iPage = 0; iPage < cPages; iPage++)
183 {
184 pMemLnx->apPages[iPage] = alloc_page(fFlagsLnx);
185 if (RT_UNLIKELY(!pMemLnx->apPages[iPage]))
186 {
187 while (iPage-- > 0)
188 __free_page(pMemLnx->apPages[iPage]);
189 rtR0MemObjDelete(&pMemLnx->Core);
190 return VERR_NO_MEMORY;
191 }
192 }
193 }
194
195#else /* < 2.4.22 */
196 /** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
197 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cb));
198 if (!paPages)
199 {
200 rtR0MemObjDelete(&pMemLnx->Core);
201 return VERR_NO_MEMORY;
202 }
203 for (iPage = 0; iPage < cPages; iPage++)
204 {
205 pMemLnx->apPages[iPage] = &paPages[iPage];
206 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
207 MY_CHANGE_PAGE_ATTR(pMemLnx->apPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
208 if (PageHighMem(pMemLnx->apPages[iPage]))
209 BUG();
210 }
211
212 fPageByPage = false;
213#endif /* < 2.4.22 */
214 pMemLnx->fContiguous = fContiguous;
215
216 /*
217 * Reserve the pages.
218 */
219 for (iPage = 0; iPage < cPages; iPage++)
220 SetPageReserved(pMemLnx->apPages[iPage]);
221
222 *ppMemLnx = pMemLnx;
223 return VINF_SUCCESS;
224}
225
226
227/**
228 * Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
229 *
230 * This method does NOT free the object.
231 *
232 * @param pMemLnx The object which physical pages should be freed.
233 */
234static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx)
235{
236 size_t iPage = pMemLnx->cPages;
237 if (iPage > 0)
238 {
239 /*
240 * Restore the page flags.
241 */
242 while (iPage-- > 0)
243 {
244 ClearPageReserved(pMemLnx->apPages[iPage]);
245#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
246#else
247 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
248 MY_CHANGE_PAGE_ATTR(pMemLnx->apPages[iPage], 1, PAGE_KERNEL);
249#endif
250 }
251
252 /*
253 * Free the pages.
254 */
255#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
256 if (!pMemLnx->fContiguous)
257 {
258 iPage = pMemLnx->cPages;
259 while (iPage-- > 0)
260 __free_page(pMemLnx->apPages[iPage]);
261 }
262 else
263#endif
264 __free_pages(pMemLnx->apPages[0], rtR0MemObjLinuxOrder(pMemLnx->cPages));
265
266 pMemLnx->cPages = 0;
267 }
268}
269
270
271/**
272 * Maps the allocation into ring-0.
273 *
274 * This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
275 *
276 * Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
277 * space, so we'll use that mapping if possible. If execute access is required, we'll
278 * play safe and do our own mapping.
279 *
280 * @returns IPRT status code.
281 * @param pMemLnx The linux memory object to map.
282 * @param fExecutable Whether execute access is required.
283 */
284static int rtR0MemObjLinuxVMap(PRTR0MEMOBJLNX pMemLnx, bool fExecutable)
285{
286 int rc = VINF_SUCCESS;
287
288 /*
289 * Choose mapping strategy.
290 */
291 bool fMustMap = fExecutable
292 || !pMemLnx->fContiguous;
293 if (!fMustMap)
294 {
295 size_t iPage = pMemLnx->cPages;
296 while (iPage-- > 0)
297 if (PageHighMem(pMemLnx->apPages[iPage]))
298 {
299 fMustMap = true;
300 break;
301 }
302 }
303
304 Assert(!pMemLnx->Core.pv);
305 Assert(!pMemLnx->fMappedToRing0);
306
307 if (fMustMap)
308 {
309 /*
310 * Use vmap - 2.4.22 and later.
311 */
312#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
313 pgprot_t fPg;
314 pgprot_val(fPg) = _PAGE_PRESENT | _PAGE_RW;
315# ifdef _PAGE_NX
316 if (!fExecutable)
317 pgprot_val(fPg) |= _PAGE_NX;
318# endif
319
320# ifdef VM_MAP
321 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg);
322# else
323 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg);
324# endif
325 if (pMemLnx->Core.pv)
326 pMemLnx->fMappedToRing0 = true;
327 else
328 rc = VERR_MAP_FAILED;
329#else /* < 2.4.22 */
330 rc = VERR_NOT_SUPPORTED;
331#endif
332 }
333 else
334 {
335 /*
336 * Use the kernel RAM mapping.
337 */
338 pMemLnx->Core.pv = phys_to_virt(page_to_phys(pMemLnx->apPages[0]));
339 Assert(pMemLnx->Core.pv);
340 }
341
342 return rc;
343}
344
345
346/**
347 * Undos what rtR0MemObjLinuxVMap() did.
348 *
349 * @param pMemLnx The linux memory object.
350 */
351static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx)
352{
353#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
354 if (pMemLnx->fMappedToRing0)
355 {
356 Assert(pMemLnx->Core.pv);
357 vunmap(pMemLnx->Core.pv);
358 pMemLnx->fMappedToRing0 = false;
359 }
360#else /* < 2.4.22 */
361 Assert(!pMemLnx->fMappedToRing0);
362#endif
363 pMemLnx->Core.pv = NULL;
364}
365
366
367int rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
368{
369 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
370
371 /*
372 * Release any memory that we've allocated or locked.
373 */
374 switch (pMemLnx->Core.enmType)
375 {
376 case RTR0MEMOBJTYPE_LOW:
377 case RTR0MEMOBJTYPE_PAGE:
378 case RTR0MEMOBJTYPE_CONT:
379 case RTR0MEMOBJTYPE_PHYS:
380 rtR0MemObjLinuxVUnmap(pMemLnx);
381 rtR0MemObjLinuxFreePages(pMemLnx);
382 break;
383
384 case RTR0MEMOBJTYPE_LOCK:
385 if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS)
386 {
387 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
388 size_t iPage;
389 down_read(&pTask->mm->mmap_sem);
390
391 iPage = pMemLnx->cPages;
392 while (iPage-- > 0)
393 {
394 if (!PageReserved(pMemLnx->apPages[iPage]))
395 SetPageDirty(pMemLnx->apPages[iPage]);
396 page_cache_release(pMemLnx->apPages[iPage]);
397 }
398
399 up_read(&pTask->mm->mmap_sem);
400 }
401 else
402 AssertFailed(); /* not implemented for R0 */
403 break;
404
405 case RTR0MEMOBJTYPE_RES_VIRT:
406 Assert(pMemLnx->Core.pv);
407 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
408 {
409 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
410 down_write(&pTask->mm->mmap_sem);
411 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
412 up_write(&pTask->mm->mmap_sem);
413 }
414 else
415 {
416 vunmap(pMemLnx->Core.pv);
417
418 Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL);
419 __free_page(pMemLnx->apPages[0]);
420 pMemLnx->apPages[0] = NULL;
421 pMemLnx->cPages = 0;
422 }
423 pMemLnx->Core.pv = NULL;
424 break;
425
426 case RTR0MEMOBJTYPE_MAPPING:
427 Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv);
428 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
429 {
430 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
431 down_write(&pTask->mm->mmap_sem);
432 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
433 up_write(&pTask->mm->mmap_sem);
434 }
435 else
436 vunmap(pMemLnx->Core.pv);
437 pMemLnx->Core.pv = NULL;
438 break;
439
440 default:
441 AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType));
442 return VERR_INTERNAL_ERROR;
443 }
444 return VINF_SUCCESS;
445}
446
447
448int rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
449{
450 PRTR0MEMOBJLNX pMemLnx;
451 int rc;
452
453#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
454 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, GFP_HIGHUSER, false /* non-contiguous */);
455#else
456 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, GFP_USER, false /* non-contiguous */);
457#endif
458 if (RT_SUCCESS(rc))
459 {
460 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
461 if (RT_SUCCESS(rc))
462 {
463 *ppMem = &pMemLnx->Core;
464 return rc;
465 }
466
467 rtR0MemObjLinuxFreePages(pMemLnx);
468 rtR0MemObjDelete(&pMemLnx->Core);
469 }
470
471 return rc;
472}
473
474
475int rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
476{
477 PRTR0MEMOBJLNX pMemLnx;
478 int rc;
479
480#ifdef RT_ARCH_AMD64
481# ifdef GFP_DMA32
482 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_DMA32, false /* non-contiguous */);
483# else
484 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_DMA, false /* non-contiguous */);
485# endif
486#else
487 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, GFP_USER, false /* non-contiguous */);
488#endif
489 if (RT_SUCCESS(rc))
490 {
491 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
492 if (RT_SUCCESS(rc))
493 {
494 *ppMem = &pMemLnx->Core;
495 return rc;
496 }
497
498 rtR0MemObjLinuxFreePages(pMemLnx);
499 rtR0MemObjDelete(&pMemLnx->Core);
500 }
501
502 return rc;
503}
504
505
506int rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
507{
508 PRTR0MEMOBJLNX pMemLnx;
509 int rc;
510
511#ifdef RT_ARCH_AMD64
512# ifdef GFP_DMA32
513 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_DMA32, true /* contiguous */);
514# else
515 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_DMA, true /* contiguous */);
516# endif
517#else
518 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, GFP_USER, true /* contiguous */);
519#endif
520 if (RT_SUCCESS(rc))
521 {
522 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
523 if (RT_SUCCESS(rc))
524 {
525#ifdef RT_STRICT
526 size_t iPage = pMemLnx->cPages;
527 while (iPage-- > 0)
528 Assert(page_to_phys(pMemLnx->apPages[iPage]) < _4G);
529#endif
530 pMemLnx->Core.u.Cont.Phys = page_to_phys(pMemLnx->apPages[0]);
531 *ppMem = &pMemLnx->Core;
532 return rc;
533 }
534
535 rtR0MemObjLinuxFreePages(pMemLnx);
536 rtR0MemObjDelete(&pMemLnx->Core);
537 }
538
539 return rc;
540}
541
542
543/**
544 * Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
545 *
546 * @returns IPRT status.
547 * @param ppMemLnx Where to
548 * @param enmType The object type.
549 * @param cb The size of the allocation.
550 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
551 * @param fGfp The Linux GFP flags to use for the allocation.
552 */
553static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType, size_t cb, RTHCPHYS PhysHighest, unsigned fGfp)
554{
555 PRTR0MEMOBJLNX pMemLnx;
556 int rc;
557
558 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, enmType, cb, fGfp,
559 enmType == RTR0MEMOBJTYPE_PHYS /* contiguous / non-contiguous */);
560 if (RT_FAILURE(rc))
561 return rc;
562
563 /*
564 * Check the addresses if necessary. (Can be optimized a bit for PHYS.)
565 */
566 if (PhysHighest != NIL_RTHCPHYS)
567 {
568 size_t iPage = pMemLnx->cPages;
569 while (iPage-- > 0)
570 if (page_to_phys(pMemLnx->apPages[iPage]) >= PhysHighest)
571 {
572 rtR0MemObjLinuxFreePages(pMemLnx);
573 rtR0MemObjDelete(&pMemLnx->Core);
574 return VERR_NO_MEMORY;
575 }
576 }
577
578 /*
579 * Complete the object.
580 */
581 if (enmType == RTR0MEMOBJTYPE_PHYS)
582 {
583 pMemLnx->Core.u.Phys.PhysBase = page_to_phys(pMemLnx->apPages[0]);
584 pMemLnx->Core.u.Phys.fAllocated = true;
585 }
586 *ppMem = &pMemLnx->Core;
587 return rc;
588}
589
590
591int rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
592{
593 unsigned fGfp;
594 unsigned fGfp2;
595 int rc;
596
597 /*
598 * Calc the allocation flags for the first and second allocation attempt
599 * and perform the allocation(s).
600 */
601 if (PhysHighest == NIL_RTHCPHYS)
602 fGfp2 = fGfp = GFP_HIGHUSER;
603#ifdef GFP_DMA32
604 else if (PhysHighest >= _4G)
605 fGfp2 = fGfp = GFP_DMA32;
606#endif
607 else if (PhysHighest <= _1M * 16)
608 fGfp2 = fGfp = GFP_DMA;
609 else
610 {
611 fGfp = GFP_USER;
612 fGfp2 = GFP_DMA;
613 }
614 rc = rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, PhysHighest, fGfp);
615 if ( RT_FAILURE(rc)
616 && fGfp2 != fGfp)
617 rc = rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, PhysHighest, fGfp2);
618 return rc;
619}
620
621
622int rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
623{
624 unsigned fGfp;
625 unsigned fGfp2;
626 int rc;
627
628 /*
629 * Calc the allocation flags for the first and second allocation attempt
630 * and perform the allocation(s).
631 */
632 if (PhysHighest == NIL_RTHCPHYS)
633 fGfp2 = fGfp = GFP_HIGHUSER;
634#ifdef GFP_DMA32
635 else if (PhysHighest >= _4G)
636 fGfp2 = fGfp = GFP_DMA32;
637#endif
638 else if (PhysHighest <= _1M * 16)
639 fGfp2 = fGfp = GFP_DMA;
640 else
641 {
642 fGfp = GFP_USER;
643 fGfp2 = GFP_DMA;
644 }
645 rc = rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PhysHighest, fGfp);
646 if ( RT_FAILURE(rc)
647 && fGfp2 != fGfp)
648 rc = rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PhysHighest, fGfp2);
649 return rc;
650}
651
652
653int rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb)
654{
655 /*
656 * All we need to do here is to validate that we can use
657 * ioremap on the specified address (32/64-bit dma_addr_t).
658 */
659 PRTR0MEMOBJLNX pMemLnx;
660 dma_addr_t PhysAddr = Phys;
661 AssertMsgReturn(PhysAddr == Phys, ("%#llx\n", (unsigned long long)Phys), VERR_ADDRESS_TOO_BIG);
662
663 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_PHYS, NULL, cb);
664 if (!pMemLnx)
665 return VERR_NO_MEMORY;
666
667 pMemLnx->Core.u.Phys.PhysBase = PhysAddr;
668 pMemLnx->Core.u.Phys.fAllocated = false;
669 Assert(!pMemLnx->cPages);
670 *ppMem = &pMemLnx->Core;
671 return VINF_SUCCESS;
672}
673
674
675int rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, RTR0PROCESS R0Process)
676{
677 const int cPages = cb >> PAGE_SHIFT;
678 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
679 struct vm_area_struct **papVMAs;
680 PRTR0MEMOBJLNX pMemLnx;
681 int rc;
682
683 /*
684 * Check for valid task and size overflows.
685 */
686 if (!pTask)
687 return VERR_NOT_SUPPORTED;
688 if (((size_t)cPages << PAGE_SHIFT) != cb)
689 return VERR_OUT_OF_RANGE;
690
691 /*
692 * Allocate the memory object and a temporary buffer for the VMAs.
693 */
694 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
695 if (!pMemLnx)
696 return VERR_NO_MEMORY;
697
698 papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
699 if (papVMAs)
700 {
701 down_read(&pTask->mm->mmap_sem);
702
703 /*
704 * Get user pages.
705 */
706 rc = get_user_pages(pTask, /* Task for fault acounting. */
707 pTask->mm, /* Whose pages. */
708 R3Ptr, /* Where from. */
709 cPages, /* How many pages. */
710 1, /* Write to memory. */
711 0, /* force. */
712 &pMemLnx->apPages[0], /* Page array. */
713 papVMAs); /* vmas */
714 if (rc == cPages)
715 {
716 /*
717 * Flush dcache (required?) and protect against fork.
718 */
719 /** @todo The Linux fork() protection will require more work if this API
720 * is to be used for anything but locking VM pages. */
721 while (rc-- > 0)
722 {
723 flush_dcache_page(pMemLnx->apPages[rc]);
724 papVMAs[rc]->vm_flags |= VM_DONTCOPY;
725 }
726
727 up_read(&pTask->mm->mmap_sem);
728
729 RTMemFree(papVMAs);
730
731 pMemLnx->Core.u.Lock.R0Process = R0Process;
732 pMemLnx->cPages = cPages;
733 Assert(!pMemLnx->fMappedToRing0);
734 *ppMem = &pMemLnx->Core;
735
736 return VINF_SUCCESS;
737 }
738
739 /*
740 * Failed - we need to unlock any pages that we succeeded to lock.
741 */
742 while (rc-- > 0)
743 {
744 if (!PageReserved(pMemLnx->apPages[rc]))
745 SetPageDirty(pMemLnx->apPages[rc]);
746 page_cache_release(pMemLnx->apPages[rc]);
747 }
748
749 up_read(&pTask->mm->mmap_sem);
750
751 RTMemFree(papVMAs);
752 rc = VERR_LOCK_FAILED;
753 }
754
755 rtR0MemObjDelete(&pMemLnx->Core);
756 return rc;
757}
758
759
760int rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb)
761{
762 /* What is there to lock? Should/Can we fake this? */
763 return VERR_NOT_SUPPORTED;
764}
765
766
767int rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
768{
769#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
770 const size_t cPages = cb >> PAGE_SHIFT;
771 struct page *pDummyPage;
772 struct page **papPages;
773
774 /* check for unsupported stuff. */
775 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
776 AssertMsgReturn(uAlignment <= PAGE_SIZE, ("%#x\n", uAlignment), VERR_NOT_SUPPORTED);
777
778 /*
779 * Allocate a dummy page and create a page pointer array for vmap such that
780 * the dummy page is mapped all over the reserved area.
781 */
782 pDummyPage = alloc_page(GFP_HIGHUSER);
783 if (!pDummyPage)
784 return VERR_NO_MEMORY;
785 papPages = RTMemAlloc(sizeof(*papPages) * cPages);
786 if (papPages)
787 {
788 void *pv;
789 size_t iPage = cPages;
790 while (iPage-- > 0)
791 papPages[iPage] = pDummyPage;
792# ifdef VM_MAP
793 pv = vmap(papPages, cPages, VM_MAP, PAGE_KERNEL_RO);
794# else
795 pv = vmap(papPages, cPages, VM_ALLOC, PAGE_KERNEL_RO);
796# endif
797 RTMemFree(papPages);
798 if (pv)
799 {
800 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
801 if (pMemLnx)
802 {
803 pMemLnx->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
804 pMemLnx->cPages = 1;
805 pMemLnx->apPages[0] = pDummyPage;
806 *ppMem = &pMemLnx->Core;
807 return VINF_SUCCESS;
808 }
809 vunmap(pv);
810 }
811 }
812 __free_page(pDummyPage);
813 return VERR_NO_MEMORY;
814
815#else /* < 2.4.22 */
816 /*
817 * Could probably use ioremap here, but the caller is in a better position than us
818 * to select some safe physical memory.
819 */
820 return VERR_NOT_SUPPORTED;
821#endif
822}
823
824
825/**
826 * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
827 * an empty user space mapping.
828 *
829 * The caller takes care of acquiring the mmap_sem of the task.
830 *
831 * @returns Pointer to the mapping.
832 * (void *)-1 on failure.
833 * @param R3PtrFixed (RTR3PTR)-1 if anywhere, otherwise a specific location.
834 * @param cb The size of the mapping.
835 * @param uAlignment The alignment of the mapping.
836 * @param pTask The Linux task to create this mapping in.
837 * @param fProt The RTMEM_PROT_* mask.
838 */
839static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
840{
841 unsigned fLnxProt;
842 unsigned long ulAddr;
843
844 /*
845 * Convert from IPRT protection to mman.h PROT_ and call do_mmap.
846 */
847 fProt &= (RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC);
848 if (fProt == RTMEM_PROT_NONE)
849 fLnxProt = PROT_NONE;
850 else
851 {
852 fLnxProt = 0;
853 if (fProt & RTMEM_PROT_READ)
854 fLnxProt |= PROT_READ;
855 if (fProt & RTMEM_PROT_WRITE)
856 fLnxProt |= PROT_WRITE;
857 if (fProt & RTMEM_PROT_EXEC)
858 fLnxProt |= PROT_EXEC;
859 }
860
861 if (R3PtrFixed != (RTR3PTR)-1)
862 ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
863 else
864 {
865 ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
866 if ( !(ulAddr & ~PAGE_MASK)
867 && (ulAddr & (uAlignment - 1)))
868 {
869 /** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
870 * up alignment gaps. This is of course complicated by fragmentation (which we might have cause
871 * ourselves) and further by there begin two mmap strategies (top / bottom). */
872 /* For now, just ignore uAlignment requirements... */
873 }
874 }
875 if (ulAddr & ~PAGE_MASK) /* ~PAGE_MASK == PAGE_OFFSET_MASK */
876 return (void *)-1;
877 return (void *)ulAddr;
878}
879
880
881int rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
882{
883 PRTR0MEMOBJLNX pMemLnx;
884 void *pv;
885 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
886 if (!pTask)
887 return VERR_NOT_SUPPORTED;
888
889 /*
890 * Let rtR0MemObjLinuxDoMmap do the difficult bits.
891 */
892 down_write(&pTask->mm->mmap_sem);
893 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cb, uAlignment, pTask, RTMEM_PROT_NONE);
894 up_write(&pTask->mm->mmap_sem);
895 if (pv == (void *)-1)
896 return VERR_NO_MEMORY;
897
898 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
899 if (!pMemLnx)
900 {
901 down_write(&pTask->mm->mmap_sem);
902 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, cb);
903 up_write(&pTask->mm->mmap_sem);
904 return VERR_NO_MEMORY;
905 }
906
907 pMemLnx->Core.u.ResVirt.R0Process = R0Process;
908 *ppMem = &pMemLnx->Core;
909 return VINF_SUCCESS;
910}
911
912
913int rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment, unsigned fProt)
914{
915 int rc = VERR_NO_MEMORY;
916 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
917 PRTR0MEMOBJLNX pMemLnx;
918
919 /* Fail if requested to do something we can't. */
920 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
921 AssertMsgReturn(uAlignment <= PAGE_SIZE, ("%#x\n", uAlignment), VERR_NOT_SUPPORTED);
922
923 /*
924 * Create the IPRT memory object.
925 */
926 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
927 if (pMemLnx)
928 {
929 if (pMemLnxToMap->cPages)
930 {
931#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
932 /*
933 * Use vmap - 2.4.22 and later.
934 */
935 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */);
936# ifdef VM_MAP
937 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_MAP, fPg);
938# else
939 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_ALLOC, fPg);
940# endif
941 if (pMemLnx->Core.pv)
942 {
943 pMemLnx->fMappedToRing0 = true;
944 rc = VINF_SUCCESS;
945 }
946 else
947 rc = VERR_MAP_FAILED;
948
949#else /* < 2.4.22 */
950 /*
951 * Only option here is to share mappings if possible and forget about fProt.
952 */
953 if (rtR0MemObjIsRing3(pMemToMap))
954 rc = VERR_NOT_SUPPORTED;
955 else
956 {
957 rc = VINF_SUCCESS;
958 if (!pMemLnxToMap->Core.pv)
959 rc = rtR0MemObjLinuxVMap(pMemLnxToMap, !!(fProt & RTMEM_PROT_EXEC));
960 if (RT_SUCCESS(rc))
961 {
962 Assert(pMemLnxToMap->Core.pv);
963 pMemLnx->Core.pv = pMemLnxToMap->Core.pv;
964 }
965 }
966#endif
967 }
968 else
969 {
970 /*
971 * MMIO / physical memory.
972 */
973 Assert(pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS && !pMemLnxToMap->Core.u.Phys.fAllocated);
974 pMemLnx->Core.pv = ioremap(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb);
975 if (pMemLnx->Core.pv)
976 {
977 /** @todo fix protection. */
978 rc = VINF_SUCCESS;
979 }
980 }
981 if (RT_SUCCESS(rc))
982 {
983 pMemLnx->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
984 *ppMem = &pMemLnx->Core;
985 return VINF_SUCCESS;
986 }
987 rtR0MemObjDelete(&pMemLnx->Core);
988 }
989
990 return rc;
991}
992
993
994int rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed, size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
995{
996 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
997 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
998 int rc = VERR_NO_MEMORY;
999 PRTR0MEMOBJLNX pMemLnx;
1000
1001 /*
1002 * Check for restrictions.
1003 */
1004 if (!pTask)
1005 return VERR_NOT_SUPPORTED;
1006
1007 /*
1008 * Create the IPRT memory object.
1009 */
1010 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1011 if (pMemLnx)
1012 {
1013 /*
1014 * Allocate user space mapping.
1015 */
1016 void *pv;
1017 down_write(&pTask->mm->mmap_sem);
1018 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, pMemLnxToMap->Core.cb, uAlignment, pTask, fProt);
1019 if (pv != (void *)-1)
1020 {
1021 /*
1022 * Map page by page into the mmap area.
1023 * This is generic, paranoid and not very efficient.
1024 */
1025 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, false /* user */);
1026 unsigned long ulAddrCur = (unsigned long)pv;
1027 const size_t cPages = pMemLnxToMap->Core.cb >> PAGE_SHIFT;
1028 size_t iPage;
1029 rc = 0;
1030 if (pMemLnxToMap->cPages)
1031 {
1032 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1033 {
1034#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1035 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1036 AssertBreak(vma, rc = VERR_INTERNAL_ERROR);
1037#endif
1038
1039#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1040 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1041#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1042 rc = remap_page_range(vma, ulAddrCur, page_to_phys(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1043#else /* 2.4 */
1044 rc = remap_page_range(ulAddrCur, page_to_phys(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1045#endif
1046 if (rc)
1047 break;
1048 }
1049 }
1050 else
1051 {
1052 RTHCPHYS Phys;
1053 if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS)
1054 Phys = pMemLnxToMap->Core.u.Phys.PhysBase;
1055 else if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_CONT)
1056 Phys = pMemLnxToMap->Core.u.Cont.Phys;
1057 else
1058 {
1059 AssertMsgFailed(("%d\n", pMemLnxToMap->Core.enmType));
1060 Phys = NIL_RTHCPHYS;
1061 }
1062 if (Phys != NIL_RTHCPHYS)
1063 {
1064 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE, Phys += PAGE_SIZE)
1065 {
1066#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1067 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1068 AssertBreak(vma, rc = VERR_INTERNAL_ERROR);
1069#endif
1070
1071#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1072 rc = remap_pfn_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1073#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1074 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1075#else /* 2.4 */
1076 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1077#endif
1078 if (rc)
1079 break;
1080 }
1081 }
1082 }
1083 if (!rc)
1084 {
1085 up_write(&pTask->mm->mmap_sem);
1086
1087 pMemLnx->Core.pv = pv;
1088 pMemLnx->Core.u.Mapping.R0Process = R0Process;
1089 *ppMem = &pMemLnx->Core;
1090 return VINF_SUCCESS;
1091 }
1092
1093 /*
1094 * Bail out.
1095 */
1096 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, pMemLnxToMap->Core.cb);
1097 if (rc != VERR_INTERNAL_ERROR)
1098 rc = VERR_NO_MEMORY;
1099 }
1100
1101 up_write(&pTask->mm->mmap_sem);
1102
1103 rtR0MemObjDelete(&pMemLnx->Core);
1104 }
1105
1106 return rc;
1107}
1108
1109
1110RTHCPHYS rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
1111{
1112 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
1113
1114 if (pMemLnx->cPages)
1115 return page_to_phys(pMemLnx->apPages[iPage]);
1116
1117 switch (pMemLnx->Core.enmType)
1118 {
1119 case RTR0MEMOBJTYPE_CONT:
1120 return pMemLnx->Core.u.Cont.Phys + (iPage << PAGE_SHIFT);
1121
1122 case RTR0MEMOBJTYPE_PHYS:
1123 return pMemLnx->Core.u.Phys.PhysBase + (iPage << PAGE_SHIFT);
1124
1125 /* the parent knows */
1126 case RTR0MEMOBJTYPE_MAPPING:
1127 return rtR0MemObjNativeGetPagePhysAddr(pMemLnx->Core.uRel.Child.pParent, iPage);
1128
1129 /* cPages > 0 */
1130 case RTR0MEMOBJTYPE_LOW:
1131 case RTR0MEMOBJTYPE_LOCK:
1132 case RTR0MEMOBJTYPE_PHYS_NC:
1133 case RTR0MEMOBJTYPE_PAGE:
1134 default:
1135 AssertMsgFailed(("%d\n", pMemLnx->Core.enmType));
1136 /* fall thru */
1137
1138 case RTR0MEMOBJTYPE_RES_VIRT:
1139 return NIL_RTHCPHYS;
1140 }
1141}
1142
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette