VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c@ 34296

Last change on this file since 34296 was 33642, checked in by vboxsync, 14 years ago

this flag is not available in Linux 2.4

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Rev
File size: 46.9 KB
Line 
1/* $Revision: 33642 $ */
2/** @file
3 * IPRT - Ring-0 Memory Objects, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "the-linux-kernel.h"
32
33#include <iprt/memobj.h>
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/log.h>
37#include <iprt/process.h>
38#include <iprt/string.h>
39#include "internal/memobj.h"
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/* early 2.6 kernels */
46#ifndef PAGE_SHARED_EXEC
47# define PAGE_SHARED_EXEC PAGE_SHARED
48#endif
49#ifndef PAGE_READONLY_EXEC
50# define PAGE_READONLY_EXEC PAGE_READONLY
51#endif
52
53/*
54 * 2.6.29+ kernels don't work with remap_pfn_range() anymore because
55 * track_pfn_vma_new() is apparently not defined for non-RAM pages.
56 * It should be safe to use vm_insert_page() older kernels as well.
57 */
58#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
59# define VBOX_USE_INSERT_PAGE
60#endif
61#if defined(CONFIG_X86_PAE) \
62 && ( HAVE_26_STYLE_REMAP_PAGE_RANGE \
63 || (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)))
64# define VBOX_USE_PAE_HACK
65#endif
66
67
68/*******************************************************************************
69* Structures and Typedefs *
70*******************************************************************************/
71/**
72 * The Darwin version of the memory object structure.
73 */
74typedef struct RTR0MEMOBJLNX
75{
76 /** The core structure. */
77 RTR0MEMOBJINTERNAL Core;
78 /** Set if the allocation is contiguous.
79 * This means it has to be given back as one chunk. */
80 bool fContiguous;
81 /** Set if we've vmap'ed the memory into ring-0. */
82 bool fMappedToRing0;
83 /** The pages in the apPages array. */
84 size_t cPages;
85 /** Array of struct page pointers. (variable size) */
86 struct page *apPages[1];
87} RTR0MEMOBJLNX, *PRTR0MEMOBJLNX;
88
89
90static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx);
91
92
93/**
94 * Helper that converts from a RTR0PROCESS handle to a linux task.
95 *
96 * @returns The corresponding Linux task.
97 * @param R0Process IPRT ring-0 process handle.
98 */
99struct task_struct *rtR0ProcessToLinuxTask(RTR0PROCESS R0Process)
100{
101 /** @todo fix rtR0ProcessToLinuxTask!! */
102 return R0Process == RTR0ProcHandleSelf() ? current : NULL;
103}
104
105
106/**
107 * Compute order. Some functions allocate 2^order pages.
108 *
109 * @returns order.
110 * @param cPages Number of pages.
111 */
112static int rtR0MemObjLinuxOrder(size_t cPages)
113{
114 int iOrder;
115 size_t cTmp;
116
117 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
118 ;
119 if (cPages & ~((size_t)1 << iOrder))
120 ++iOrder;
121
122 return iOrder;
123}
124
125
126/**
127 * Converts from RTMEM_PROT_* to Linux PAGE_*.
128 *
129 * @returns Linux page protection constant.
130 * @param fProt The IPRT protection mask.
131 * @param fKernel Whether it applies to kernel or user space.
132 */
133static pgprot_t rtR0MemObjLinuxConvertProt(unsigned fProt, bool fKernel)
134{
135 switch (fProt)
136 {
137 default:
138 AssertMsgFailed(("%#x %d\n", fProt, fKernel));
139 case RTMEM_PROT_NONE:
140 return PAGE_NONE;
141
142 case RTMEM_PROT_READ:
143 return fKernel ? PAGE_KERNEL_RO : PAGE_READONLY;
144
145 case RTMEM_PROT_WRITE:
146 case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
147 return fKernel ? PAGE_KERNEL : PAGE_SHARED;
148
149 case RTMEM_PROT_EXEC:
150 case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
151#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
152 if (fKernel)
153 {
154 pgprot_t fPg = MY_PAGE_KERNEL_EXEC;
155 pgprot_val(fPg) &= ~_PAGE_RW;
156 return fPg;
157 }
158 return PAGE_READONLY_EXEC;
159#else
160 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_READONLY_EXEC;
161#endif
162
163 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
164 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_READ:
165 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_SHARED_EXEC;
166 }
167}
168
169
170/**
171 * Internal worker that allocates physical pages and creates the memory object for them.
172 *
173 * @returns IPRT status code.
174 * @param ppMemLnx Where to store the memory object pointer.
175 * @param enmType The object type.
176 * @param cb The number of bytes to allocate.
177 * @param uAlignment The alignment of the physical memory.
178 * Only valid if fContiguous == true, ignored otherwise.
179 * @param fFlagsLnx The page allocation flags (GPFs).
180 * @param fContiguous Whether the allocation must be contiguous.
181 */
182static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb,
183 size_t uAlignment, unsigned fFlagsLnx, bool fContiguous)
184{
185 size_t iPage;
186 size_t const cPages = cb >> PAGE_SHIFT;
187 struct page *paPages;
188
189 /*
190 * Allocate a memory object structure that's large enough to contain
191 * the page pointer array.
192 */
193 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), enmType, NULL, cb);
194 if (!pMemLnx)
195 return VERR_NO_MEMORY;
196 pMemLnx->cPages = cPages;
197
198 if (cPages > 255)
199 {
200# ifdef __GFP_NORETRY
201 /* Not available in Linux 2.4.0 */
202 fFlagsLnx |= __GFP_NORETRY;
203# endif
204# ifdef __GFP_NOMEMALLOC
205 /* Introduced with Linux 2.6.12: Don't use emergency reserves */
206 fFlagsLnx |= __GFP_NOMEMALLOC;
207# endif
208 }
209
210 /*
211 * Allocate the pages.
212 * For small allocations we'll try contiguous first and then fall back on page by page.
213 */
214#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
215 if ( fContiguous
216 || cb <= PAGE_SIZE * 2)
217 {
218# ifdef VBOX_USE_INSERT_PAGE
219 paPages = alloc_pages(fFlagsLnx | __GFP_COMP, rtR0MemObjLinuxOrder(cPages));
220# else
221 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
222# endif
223 if (paPages)
224 {
225 fContiguous = true;
226 for (iPage = 0; iPage < cPages; iPage++)
227 pMemLnx->apPages[iPage] = &paPages[iPage];
228 }
229 else if (fContiguous)
230 {
231 rtR0MemObjDelete(&pMemLnx->Core);
232 return VERR_NO_MEMORY;
233 }
234 }
235
236 if (!fContiguous)
237 {
238 for (iPage = 0; iPage < cPages; iPage++)
239 {
240 pMemLnx->apPages[iPage] = alloc_page(fFlagsLnx);
241 if (RT_UNLIKELY(!pMemLnx->apPages[iPage]))
242 {
243 while (iPage-- > 0)
244 __free_page(pMemLnx->apPages[iPage]);
245 rtR0MemObjDelete(&pMemLnx->Core);
246 return VERR_NO_MEMORY;
247 }
248 }
249 }
250
251#else /* < 2.4.22 */
252 /** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
253 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
254 if (!paPages)
255 {
256 rtR0MemObjDelete(&pMemLnx->Core);
257 return VERR_NO_MEMORY;
258 }
259 for (iPage = 0; iPage < cPages; iPage++)
260 {
261 pMemLnx->apPages[iPage] = &paPages[iPage];
262 MY_SET_PAGES_EXEC(pMemLnx->apPages[iPage], 1);
263 if (PageHighMem(pMemLnx->apPages[iPage]))
264 BUG();
265 }
266
267 fContiguous = true;
268#endif /* < 2.4.22 */
269 pMemLnx->fContiguous = fContiguous;
270
271 /*
272 * Reserve the pages.
273 */
274 for (iPage = 0; iPage < cPages; iPage++)
275 SetPageReserved(pMemLnx->apPages[iPage]);
276
277 /*
278 * Note that the physical address of memory allocated with alloc_pages(flags, order)
279 * is always 2^(PAGE_SHIFT+order)-aligned.
280 */
281 if ( fContiguous
282 && uAlignment > PAGE_SIZE)
283 {
284 /*
285 * Check for alignment constraints. The physical address of memory allocated with
286 * alloc_pages(flags, order) is always 2^(PAGE_SHIFT+order)-aligned.
287 */
288 if (RT_UNLIKELY(page_to_phys(pMemLnx->apPages[0]) & (uAlignment - 1)))
289 {
290 /*
291 * This should never happen!
292 */
293 printk("rtR0MemObjLinuxAllocPages(cb=0x%lx, uAlignment=0x%lx): alloc_pages(..., %d) returned physical memory at 0x%lx!\n",
294 (unsigned long)cb, (unsigned long)uAlignment, rtR0MemObjLinuxOrder(cPages), (unsigned long)page_to_phys(pMemLnx->apPages[0]));
295 rtR0MemObjLinuxFreePages(pMemLnx);
296 return VERR_NO_MEMORY;
297 }
298 }
299
300 *ppMemLnx = pMemLnx;
301 return VINF_SUCCESS;
302}
303
304
305/**
306 * Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
307 *
308 * This method does NOT free the object.
309 *
310 * @param pMemLnx The object which physical pages should be freed.
311 */
312static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx)
313{
314 size_t iPage = pMemLnx->cPages;
315 if (iPage > 0)
316 {
317 /*
318 * Restore the page flags.
319 */
320 while (iPage-- > 0)
321 {
322 ClearPageReserved(pMemLnx->apPages[iPage]);
323#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
324#else
325 MY_SET_PAGES_NOEXEC(pMemLnx->apPages[iPage], 1);
326#endif
327 }
328
329 /*
330 * Free the pages.
331 */
332#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
333 if (!pMemLnx->fContiguous)
334 {
335 iPage = pMemLnx->cPages;
336 while (iPage-- > 0)
337 __free_page(pMemLnx->apPages[iPage]);
338 }
339 else
340#endif
341 __free_pages(pMemLnx->apPages[0], rtR0MemObjLinuxOrder(pMemLnx->cPages));
342
343 pMemLnx->cPages = 0;
344 }
345}
346
347
348/**
349 * Maps the allocation into ring-0.
350 *
351 * This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
352 *
353 * Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
354 * space, so we'll use that mapping if possible. If execute access is required, we'll
355 * play safe and do our own mapping.
356 *
357 * @returns IPRT status code.
358 * @param pMemLnx The linux memory object to map.
359 * @param fExecutable Whether execute access is required.
360 */
361static int rtR0MemObjLinuxVMap(PRTR0MEMOBJLNX pMemLnx, bool fExecutable)
362{
363 int rc = VINF_SUCCESS;
364
365 /*
366 * Choose mapping strategy.
367 */
368 bool fMustMap = fExecutable
369 || !pMemLnx->fContiguous;
370 if (!fMustMap)
371 {
372 size_t iPage = pMemLnx->cPages;
373 while (iPage-- > 0)
374 if (PageHighMem(pMemLnx->apPages[iPage]))
375 {
376 fMustMap = true;
377 break;
378 }
379 }
380
381 Assert(!pMemLnx->Core.pv);
382 Assert(!pMemLnx->fMappedToRing0);
383
384 if (fMustMap)
385 {
386 /*
387 * Use vmap - 2.4.22 and later.
388 */
389#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
390 pgprot_t fPg;
391 pgprot_val(fPg) = _PAGE_PRESENT | _PAGE_RW;
392# ifdef _PAGE_NX
393 if (!fExecutable)
394 pgprot_val(fPg) |= _PAGE_NX;
395# endif
396
397# ifdef VM_MAP
398 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg);
399# else
400 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg);
401# endif
402 if (pMemLnx->Core.pv)
403 pMemLnx->fMappedToRing0 = true;
404 else
405 rc = VERR_MAP_FAILED;
406#else /* < 2.4.22 */
407 rc = VERR_NOT_SUPPORTED;
408#endif
409 }
410 else
411 {
412 /*
413 * Use the kernel RAM mapping.
414 */
415 pMemLnx->Core.pv = phys_to_virt(page_to_phys(pMemLnx->apPages[0]));
416 Assert(pMemLnx->Core.pv);
417 }
418
419 return rc;
420}
421
422
423/**
424 * Undos what rtR0MemObjLinuxVMap() did.
425 *
426 * @param pMemLnx The linux memory object.
427 */
428static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx)
429{
430#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
431 if (pMemLnx->fMappedToRing0)
432 {
433 Assert(pMemLnx->Core.pv);
434 vunmap(pMemLnx->Core.pv);
435 pMemLnx->fMappedToRing0 = false;
436 }
437#else /* < 2.4.22 */
438 Assert(!pMemLnx->fMappedToRing0);
439#endif
440 pMemLnx->Core.pv = NULL;
441}
442
443
444int rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
445{
446 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
447
448 /*
449 * Release any memory that we've allocated or locked.
450 */
451 switch (pMemLnx->Core.enmType)
452 {
453 case RTR0MEMOBJTYPE_LOW:
454 case RTR0MEMOBJTYPE_PAGE:
455 case RTR0MEMOBJTYPE_CONT:
456 case RTR0MEMOBJTYPE_PHYS:
457 case RTR0MEMOBJTYPE_PHYS_NC:
458 rtR0MemObjLinuxVUnmap(pMemLnx);
459 rtR0MemObjLinuxFreePages(pMemLnx);
460 break;
461
462 case RTR0MEMOBJTYPE_LOCK:
463 if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS)
464 {
465 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
466 size_t iPage;
467 Assert(pTask);
468 if (pTask && pTask->mm)
469 down_read(&pTask->mm->mmap_sem);
470
471 iPage = pMemLnx->cPages;
472 while (iPage-- > 0)
473 {
474 if (!PageReserved(pMemLnx->apPages[iPage]))
475 SetPageDirty(pMemLnx->apPages[iPage]);
476 page_cache_release(pMemLnx->apPages[iPage]);
477 }
478
479 if (pTask && pTask->mm)
480 up_read(&pTask->mm->mmap_sem);
481 }
482 /* else: kernel memory - nothing to do here. */
483 break;
484
485 case RTR0MEMOBJTYPE_RES_VIRT:
486 Assert(pMemLnx->Core.pv);
487 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
488 {
489 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
490 Assert(pTask);
491 if (pTask && pTask->mm)
492 {
493 down_write(&pTask->mm->mmap_sem);
494 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
495 up_write(&pTask->mm->mmap_sem);
496 }
497 }
498 else
499 {
500 vunmap(pMemLnx->Core.pv);
501
502 Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL);
503 __free_page(pMemLnx->apPages[0]);
504 pMemLnx->apPages[0] = NULL;
505 pMemLnx->cPages = 0;
506 }
507 pMemLnx->Core.pv = NULL;
508 break;
509
510 case RTR0MEMOBJTYPE_MAPPING:
511 Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv);
512 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
513 {
514 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
515 Assert(pTask);
516 if (pTask && pTask->mm)
517 {
518 down_write(&pTask->mm->mmap_sem);
519 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
520 up_write(&pTask->mm->mmap_sem);
521 }
522 }
523 else
524 vunmap(pMemLnx->Core.pv);
525 pMemLnx->Core.pv = NULL;
526 break;
527
528 default:
529 AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType));
530 return VERR_INTERNAL_ERROR;
531 }
532 return VINF_SUCCESS;
533}
534
535
536int rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
537{
538 PRTR0MEMOBJLNX pMemLnx;
539 int rc;
540
541#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
542 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_HIGHUSER, false /* non-contiguous */);
543#else
544 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_USER, false /* non-contiguous */);
545#endif
546 if (RT_SUCCESS(rc))
547 {
548 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
549 if (RT_SUCCESS(rc))
550 {
551 *ppMem = &pMemLnx->Core;
552 return rc;
553 }
554
555 rtR0MemObjLinuxFreePages(pMemLnx);
556 rtR0MemObjDelete(&pMemLnx->Core);
557 }
558
559 return rc;
560}
561
562
563int rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
564{
565 PRTR0MEMOBJLNX pMemLnx;
566 int rc;
567
568 /* Try to avoid GFP_DMA. GFM_DMA32 was introduced with Linux 2.6.15. */
569#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
570 /* ZONE_DMA32: 0-4GB */
571 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA32, false /* non-contiguous */);
572 if (RT_FAILURE(rc))
573#endif
574#ifdef RT_ARCH_AMD64
575 /* ZONE_DMA: 0-16MB */
576 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA, false /* non-contiguous */);
577#else
578# ifdef CONFIG_X86_PAE
579# endif
580 /* ZONE_NORMAL: 0-896MB */
581 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_USER, false /* non-contiguous */);
582#endif
583 if (RT_SUCCESS(rc))
584 {
585 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
586 if (RT_SUCCESS(rc))
587 {
588 *ppMem = &pMemLnx->Core;
589 return rc;
590 }
591
592 rtR0MemObjLinuxFreePages(pMemLnx);
593 rtR0MemObjDelete(&pMemLnx->Core);
594 }
595
596 return rc;
597}
598
599
600int rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
601{
602 PRTR0MEMOBJLNX pMemLnx;
603 int rc;
604
605#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
606 /* ZONE_DMA32: 0-4GB */
607 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA32, true /* contiguous */);
608 if (RT_FAILURE(rc))
609#endif
610#ifdef RT_ARCH_AMD64
611 /* ZONE_DMA: 0-16MB */
612 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA, true /* contiguous */);
613#else
614 /* ZONE_NORMAL (32-bit hosts): 0-896MB */
615 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_USER, true /* contiguous */);
616#endif
617 if (RT_SUCCESS(rc))
618 {
619 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
620 if (RT_SUCCESS(rc))
621 {
622#if defined(RT_STRICT) && (defined(RT_ARCH_AMD64) || defined(CONFIG_HIGHMEM64G))
623 size_t iPage = pMemLnx->cPages;
624 while (iPage-- > 0)
625 Assert(page_to_phys(pMemLnx->apPages[iPage]) < _4G);
626#endif
627 pMemLnx->Core.u.Cont.Phys = page_to_phys(pMemLnx->apPages[0]);
628 *ppMem = &pMemLnx->Core;
629 return rc;
630 }
631
632 rtR0MemObjLinuxFreePages(pMemLnx);
633 rtR0MemObjDelete(&pMemLnx->Core);
634 }
635
636 return rc;
637}
638
639
640/**
641 * Worker for rtR0MemObjLinuxAllocPhysSub that tries one allocation strategy.
642 *
643 * @returns IPRT status.
644 * @param ppMemLnx Where to
645 * @param enmType The object type.
646 * @param cb The size of the allocation.
647 * @param uAlignment The alignment of the physical memory.
648 * Only valid for fContiguous == true, ignored otherwise.
649 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
650 * @param fGfp The Linux GFP flags to use for the allocation.
651 */
652static int rtR0MemObjLinuxAllocPhysSub2(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
653 size_t cb, size_t uAlignment, RTHCPHYS PhysHighest, unsigned fGfp)
654{
655 PRTR0MEMOBJLNX pMemLnx;
656 int rc;
657
658 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, enmType, cb, uAlignment, fGfp,
659 enmType == RTR0MEMOBJTYPE_PHYS /* contiguous / non-contiguous */);
660 if (RT_FAILURE(rc))
661 return rc;
662
663 /*
664 * Check the addresses if necessary. (Can be optimized a bit for PHYS.)
665 */
666 if (PhysHighest != NIL_RTHCPHYS)
667 {
668 size_t iPage = pMemLnx->cPages;
669 while (iPage-- > 0)
670 if (page_to_phys(pMemLnx->apPages[iPage]) >= PhysHighest)
671 {
672 rtR0MemObjLinuxFreePages(pMemLnx);
673 rtR0MemObjDelete(&pMemLnx->Core);
674 return VERR_NO_MEMORY;
675 }
676 }
677
678 /*
679 * Complete the object.
680 */
681 if (enmType == RTR0MEMOBJTYPE_PHYS)
682 {
683 pMemLnx->Core.u.Phys.PhysBase = page_to_phys(pMemLnx->apPages[0]);
684 pMemLnx->Core.u.Phys.fAllocated = true;
685 }
686 *ppMem = &pMemLnx->Core;
687 return rc;
688}
689
690
691/**
692 * Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
693 *
694 * @returns IPRT status.
695 * @param ppMem Where to store the memory object pointer on success.
696 * @param enmType The object type.
697 * @param cb The size of the allocation.
698 * @param uAlignment The alignment of the physical memory.
699 * Only valid for enmType == RTR0MEMOBJTYPE_PHYS, ignored otherwise.
700 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
701 */
702static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
703 size_t cb, size_t uAlignment, RTHCPHYS PhysHighest)
704{
705 int rc;
706
707 /*
708 * There are two clear cases and that's the <=16MB and anything-goes ones.
709 * When the physical address limit is somewhere in-between those two we'll
710 * just have to try, starting with HIGHUSER and working our way thru the
711 * different types, hoping we'll get lucky.
712 *
713 * We should probably move this physical address restriction logic up to
714 * the page alloc function as it would be more efficient there. But since
715 * we don't expect this to be a performance issue just yet it can wait.
716 */
717 if (PhysHighest == NIL_RTHCPHYS)
718 /* ZONE_HIGHMEM: the whole physical memory */
719 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_HIGHUSER);
720 else if (PhysHighest <= _1M * 16)
721 /* ZONE_DMA: 0-16MB */
722 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA);
723 else
724 {
725 rc = VERR_NO_MEMORY;
726 if (RT_FAILURE(rc))
727 /* ZONE_HIGHMEM: the whole physical memory */
728 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_HIGHUSER);
729 if (RT_FAILURE(rc))
730 /* ZONE_NORMAL: 0-896MB */
731 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_USER);
732#ifdef GFP_DMA32
733 if (RT_FAILURE(rc))
734 /* ZONE_DMA32: 0-4GB */
735 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA32);
736#endif
737 if (RT_FAILURE(rc))
738 /* ZONE_DMA: 0-16MB */
739 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA);
740 }
741 return rc;
742}
743
744
745int rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment)
746{
747 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, uAlignment, PhysHighest);
748}
749
750
751int rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
752{
753 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PAGE_SIZE, PhysHighest);
754}
755
756
757int rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb, uint32_t uCachePolicy)
758{
759 /*
760 * All we need to do here is to validate that we can use
761 * ioremap on the specified address (32/64-bit dma_addr_t).
762 */
763 PRTR0MEMOBJLNX pMemLnx;
764 dma_addr_t PhysAddr = Phys;
765 AssertMsgReturn(PhysAddr == Phys, ("%#llx\n", (unsigned long long)Phys), VERR_ADDRESS_TOO_BIG);
766
767 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_PHYS, NULL, cb);
768 if (!pMemLnx)
769 return VERR_NO_MEMORY;
770
771 pMemLnx->Core.u.Phys.PhysBase = PhysAddr;
772 pMemLnx->Core.u.Phys.fAllocated = false;
773 pMemLnx->Core.u.Phys.uCachePolicy = uCachePolicy;
774 Assert(!pMemLnx->cPages);
775 *ppMem = &pMemLnx->Core;
776 return VINF_SUCCESS;
777}
778
779
780int rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess, RTR0PROCESS R0Process)
781{
782 const int cPages = cb >> PAGE_SHIFT;
783 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
784 struct vm_area_struct **papVMAs;
785 PRTR0MEMOBJLNX pMemLnx;
786 int rc = VERR_NO_MEMORY;
787 NOREF(fAccess);
788
789 /*
790 * Check for valid task and size overflows.
791 */
792 if (!pTask)
793 return VERR_NOT_SUPPORTED;
794 if (((size_t)cPages << PAGE_SHIFT) != cb)
795 return VERR_OUT_OF_RANGE;
796
797 /*
798 * Allocate the memory object and a temporary buffer for the VMAs.
799 */
800 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
801 if (!pMemLnx)
802 return VERR_NO_MEMORY;
803
804 papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
805 if (papVMAs)
806 {
807 down_read(&pTask->mm->mmap_sem);
808
809 /*
810 * Get user pages.
811 */
812 rc = get_user_pages(pTask, /* Task for fault accounting. */
813 pTask->mm, /* Whose pages. */
814 R3Ptr, /* Where from. */
815 cPages, /* How many pages. */
816 1, /* Write to memory. */
817 0, /* force. */
818 &pMemLnx->apPages[0], /* Page array. */
819 papVMAs); /* vmas */
820 if (rc == cPages)
821 {
822 /*
823 * Flush dcache (required?), protect against fork and _really_ pin the page
824 * table entries. get_user_pages() will protect against swapping out the
825 * pages but it will NOT protect against removing page table entries. This
826 * can be achieved with
827 * - using mlock / mmap(..., MAP_LOCKED, ...) from userland. This requires
828 * an appropriate limit set up with setrlimit(..., RLIMIT_MEMLOCK, ...).
829 * Usual Linux distributions support only a limited size of locked pages
830 * (e.g. 32KB).
831 * - setting the PageReserved bit (as we do in rtR0MemObjLinuxAllocPages()
832 * or by
833 * - setting the VM_LOCKED flag. This is the same as doing mlock() without
834 * a range check.
835 */
836 /** @todo The Linux fork() protection will require more work if this API
837 * is to be used for anything but locking VM pages. */
838 while (rc-- > 0)
839 {
840 flush_dcache_page(pMemLnx->apPages[rc]);
841 papVMAs[rc]->vm_flags |= (VM_DONTCOPY | VM_LOCKED);
842 }
843
844 up_read(&pTask->mm->mmap_sem);
845
846 RTMemFree(papVMAs);
847
848 pMemLnx->Core.u.Lock.R0Process = R0Process;
849 pMemLnx->cPages = cPages;
850 Assert(!pMemLnx->fMappedToRing0);
851 *ppMem = &pMemLnx->Core;
852
853 return VINF_SUCCESS;
854 }
855
856 /*
857 * Failed - we need to unlock any pages that we succeeded to lock.
858 */
859 while (rc-- > 0)
860 {
861 if (!PageReserved(pMemLnx->apPages[rc]))
862 SetPageDirty(pMemLnx->apPages[rc]);
863 page_cache_release(pMemLnx->apPages[rc]);
864 }
865
866 up_read(&pTask->mm->mmap_sem);
867
868 RTMemFree(papVMAs);
869 rc = VERR_LOCK_FAILED;
870 }
871
872 rtR0MemObjDelete(&pMemLnx->Core);
873 return rc;
874}
875
876
877int rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess)
878{
879 void *pvLast = (uint8_t *)pv + cb - 1;
880 size_t const cPages = cb >> PAGE_SHIFT;
881 PRTR0MEMOBJLNX pMemLnx;
882 bool fLinearMapping;
883 int rc;
884 uint8_t *pbPage;
885 size_t iPage;
886 NOREF(fAccess);
887
888 /*
889 * Classify the memory and check that we can deal with it.
890 */
891#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
892 fLinearMapping = virt_addr_valid(pvLast) && virt_addr_valid(pv);
893#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 0)
894 fLinearMapping = VALID_PAGE(virt_to_page(pvLast)) && VALID_PAGE(virt_to_page(pv));
895#else
896# error "not supported"
897#endif
898 if (!fLinearMapping)
899 {
900#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 19)
901 if ( !RTR0MemKernelIsValidAddr(pv)
902 || !RTR0MemKernelIsValidAddr(pv + cb))
903#endif
904 return VERR_INVALID_PARAMETER;
905 }
906
907 /*
908 * Allocate the memory object.
909 */
910 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, pv, cb);
911 if (!pMemLnx)
912 return VERR_NO_MEMORY;
913
914 /*
915 * Gather the pages.
916 * We ASSUME all kernel pages are non-swappable.
917 */
918 rc = VINF_SUCCESS;
919 pbPage = (uint8_t *)pvLast;
920 iPage = cPages;
921#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 19)
922 if (!fLinearMapping)
923 {
924 while (iPage-- > 0)
925 {
926 struct page *pPage = vmalloc_to_page(pbPage);
927 if (RT_UNLIKELY(!pPage))
928 {
929 rc = VERR_LOCK_FAILED;
930 break;
931 }
932 pMemLnx->apPages[iPage] = pPage;
933 pbPage -= PAGE_SIZE;
934 }
935 }
936 else
937#endif
938 {
939 while (iPage-- > 0)
940 {
941 pMemLnx->apPages[iPage] = virt_to_page(pbPage);
942 pbPage -= PAGE_SIZE;
943 }
944 }
945 if (RT_SUCCESS(rc))
946 {
947 /*
948 * Complete the memory object and return.
949 */
950 pMemLnx->Core.u.Lock.R0Process = NIL_RTR0PROCESS;
951 pMemLnx->cPages = cPages;
952 Assert(!pMemLnx->fMappedToRing0);
953 *ppMem = &pMemLnx->Core;
954
955 return VINF_SUCCESS;
956 }
957
958 rtR0MemObjDelete(&pMemLnx->Core);
959 return rc;
960}
961
962
963int rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
964{
965#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
966 const size_t cPages = cb >> PAGE_SHIFT;
967 struct page *pDummyPage;
968 struct page **papPages;
969
970 /* check for unsupported stuff. */
971 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
972 if (uAlignment > PAGE_SIZE)
973 return VERR_NOT_SUPPORTED;
974
975 /*
976 * Allocate a dummy page and create a page pointer array for vmap such that
977 * the dummy page is mapped all over the reserved area.
978 */
979 pDummyPage = alloc_page(GFP_HIGHUSER);
980 if (!pDummyPage)
981 return VERR_NO_MEMORY;
982 papPages = RTMemAlloc(sizeof(*papPages) * cPages);
983 if (papPages)
984 {
985 void *pv;
986 size_t iPage = cPages;
987 while (iPage-- > 0)
988 papPages[iPage] = pDummyPage;
989# ifdef VM_MAP
990 pv = vmap(papPages, cPages, VM_MAP, PAGE_KERNEL_RO);
991# else
992 pv = vmap(papPages, cPages, VM_ALLOC, PAGE_KERNEL_RO);
993# endif
994 RTMemFree(papPages);
995 if (pv)
996 {
997 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
998 if (pMemLnx)
999 {
1000 pMemLnx->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
1001 pMemLnx->cPages = 1;
1002 pMemLnx->apPages[0] = pDummyPage;
1003 *ppMem = &pMemLnx->Core;
1004 return VINF_SUCCESS;
1005 }
1006 vunmap(pv);
1007 }
1008 }
1009 __free_page(pDummyPage);
1010 return VERR_NO_MEMORY;
1011
1012#else /* < 2.4.22 */
1013 /*
1014 * Could probably use ioremap here, but the caller is in a better position than us
1015 * to select some safe physical memory.
1016 */
1017 return VERR_NOT_SUPPORTED;
1018#endif
1019}
1020
1021
1022/**
1023 * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
1024 * an empty user space mapping.
1025 *
1026 * The caller takes care of acquiring the mmap_sem of the task.
1027 *
1028 * @returns Pointer to the mapping.
1029 * (void *)-1 on failure.
1030 * @param R3PtrFixed (RTR3PTR)-1 if anywhere, otherwise a specific location.
1031 * @param cb The size of the mapping.
1032 * @param uAlignment The alignment of the mapping.
1033 * @param pTask The Linux task to create this mapping in.
1034 * @param fProt The RTMEM_PROT_* mask.
1035 */
1036static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
1037{
1038 unsigned fLnxProt;
1039 unsigned long ulAddr;
1040
1041 /*
1042 * Convert from IPRT protection to mman.h PROT_ and call do_mmap.
1043 */
1044 fProt &= (RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC);
1045 if (fProt == RTMEM_PROT_NONE)
1046 fLnxProt = PROT_NONE;
1047 else
1048 {
1049 fLnxProt = 0;
1050 if (fProt & RTMEM_PROT_READ)
1051 fLnxProt |= PROT_READ;
1052 if (fProt & RTMEM_PROT_WRITE)
1053 fLnxProt |= PROT_WRITE;
1054 if (fProt & RTMEM_PROT_EXEC)
1055 fLnxProt |= PROT_EXEC;
1056 }
1057
1058 if (R3PtrFixed != (RTR3PTR)-1)
1059 ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
1060 else
1061 {
1062 ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1063 if ( !(ulAddr & ~PAGE_MASK)
1064 && (ulAddr & (uAlignment - 1)))
1065 {
1066 /** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
1067 * up alignment gaps. This is of course complicated by fragmentation (which we might have cause
1068 * ourselves) and further by there begin two mmap strategies (top / bottom). */
1069 /* For now, just ignore uAlignment requirements... */
1070 }
1071 }
1072 if (ulAddr & ~PAGE_MASK) /* ~PAGE_MASK == PAGE_OFFSET_MASK */
1073 return (void *)-1;
1074 return (void *)ulAddr;
1075}
1076
1077
1078int rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
1079{
1080 PRTR0MEMOBJLNX pMemLnx;
1081 void *pv;
1082 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1083 if (!pTask)
1084 return VERR_NOT_SUPPORTED;
1085
1086 /*
1087 * Check that the specified alignment is supported.
1088 */
1089 if (uAlignment > PAGE_SIZE)
1090 return VERR_NOT_SUPPORTED;
1091
1092 /*
1093 * Let rtR0MemObjLinuxDoMmap do the difficult bits.
1094 */
1095 down_write(&pTask->mm->mmap_sem);
1096 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cb, uAlignment, pTask, RTMEM_PROT_NONE);
1097 up_write(&pTask->mm->mmap_sem);
1098 if (pv == (void *)-1)
1099 return VERR_NO_MEMORY;
1100
1101 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
1102 if (!pMemLnx)
1103 {
1104 down_write(&pTask->mm->mmap_sem);
1105 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, cb);
1106 up_write(&pTask->mm->mmap_sem);
1107 return VERR_NO_MEMORY;
1108 }
1109
1110 pMemLnx->Core.u.ResVirt.R0Process = R0Process;
1111 *ppMem = &pMemLnx->Core;
1112 return VINF_SUCCESS;
1113}
1114
1115
1116int rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment,
1117 unsigned fProt, size_t offSub, size_t cbSub)
1118{
1119 int rc = VERR_NO_MEMORY;
1120 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1121 PRTR0MEMOBJLNX pMemLnx;
1122
1123 /* Fail if requested to do something we can't. */
1124 AssertMsgReturn(!offSub && !cbSub, ("%#x %#x\n", offSub, cbSub), VERR_NOT_SUPPORTED);
1125 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
1126 if (uAlignment > PAGE_SIZE)
1127 return VERR_NOT_SUPPORTED;
1128
1129 /*
1130 * Create the IPRT memory object.
1131 */
1132 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1133 if (pMemLnx)
1134 {
1135 if (pMemLnxToMap->cPages)
1136 {
1137#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1138 /*
1139 * Use vmap - 2.4.22 and later.
1140 */
1141 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */);
1142# ifdef VM_MAP
1143 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_MAP, fPg);
1144# else
1145 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_ALLOC, fPg);
1146# endif
1147 if (pMemLnx->Core.pv)
1148 {
1149 pMemLnx->fMappedToRing0 = true;
1150 rc = VINF_SUCCESS;
1151 }
1152 else
1153 rc = VERR_MAP_FAILED;
1154
1155#else /* < 2.4.22 */
1156 /*
1157 * Only option here is to share mappings if possible and forget about fProt.
1158 */
1159 if (rtR0MemObjIsRing3(pMemToMap))
1160 rc = VERR_NOT_SUPPORTED;
1161 else
1162 {
1163 rc = VINF_SUCCESS;
1164 if (!pMemLnxToMap->Core.pv)
1165 rc = rtR0MemObjLinuxVMap(pMemLnxToMap, !!(fProt & RTMEM_PROT_EXEC));
1166 if (RT_SUCCESS(rc))
1167 {
1168 Assert(pMemLnxToMap->Core.pv);
1169 pMemLnx->Core.pv = pMemLnxToMap->Core.pv;
1170 }
1171 }
1172#endif
1173 }
1174 else
1175 {
1176 /*
1177 * MMIO / physical memory.
1178 */
1179 Assert(pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS && !pMemLnxToMap->Core.u.Phys.fAllocated);
1180 pMemLnx->Core.pv = pMemLnxToMap->Core.u.Phys.uCachePolicy == RTMEM_CACHE_POLICY_MMIO
1181 ? ioremap_nocache(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb)
1182 : ioremap(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb);
1183 if (pMemLnx->Core.pv)
1184 {
1185 /** @todo fix protection. */
1186 rc = VINF_SUCCESS;
1187 }
1188 }
1189 if (RT_SUCCESS(rc))
1190 {
1191 pMemLnx->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
1192 *ppMem = &pMemLnx->Core;
1193 return VINF_SUCCESS;
1194 }
1195 rtR0MemObjDelete(&pMemLnx->Core);
1196 }
1197
1198 return rc;
1199}
1200
1201
1202#ifdef VBOX_USE_PAE_HACK
1203/**
1204 * Replace the PFN of a PTE with the address of the actual page.
1205 *
1206 * The caller maps a reserved dummy page at the address with the desired access
1207 * and flags.
1208 *
1209 * This hack is required for older Linux kernels which don't provide
1210 * remap_pfn_range().
1211 *
1212 * @returns 0 on success, -ENOMEM on failure.
1213 * @param mm The memory context.
1214 * @param ulAddr The mapping address.
1215 * @param Phys The physical address of the page to map.
1216 */
1217static int rtR0MemObjLinuxFixPte(struct mm_struct *mm, unsigned long ulAddr, RTHCPHYS Phys)
1218{
1219 int rc = -ENOMEM;
1220 pgd_t *pgd;
1221
1222 spin_lock(&mm->page_table_lock);
1223
1224 pgd = pgd_offset(mm, ulAddr);
1225 if (!pgd_none(*pgd) && !pgd_bad(*pgd))
1226 {
1227 pmd_t *pmd = pmd_offset(pgd, ulAddr);
1228 if (!pmd_none(*pmd))
1229 {
1230 pte_t *ptep = pte_offset_map(pmd, ulAddr);
1231 if (ptep)
1232 {
1233 pte_t pte = *ptep;
1234 pte.pte_high &= 0xfff00000;
1235 pte.pte_high |= ((Phys >> 32) & 0x000fffff);
1236 pte.pte_low &= 0x00000fff;
1237 pte.pte_low |= (Phys & 0xfffff000);
1238 set_pte(ptep, pte);
1239 pte_unmap(ptep);
1240 rc = 0;
1241 }
1242 }
1243 }
1244
1245 spin_unlock(&mm->page_table_lock);
1246 return rc;
1247}
1248#endif /* VBOX_USE_PAE_HACK */
1249
1250
1251int rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed, size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
1252{
1253 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1254 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1255 int rc = VERR_NO_MEMORY;
1256 PRTR0MEMOBJLNX pMemLnx;
1257#ifdef VBOX_USE_PAE_HACK
1258 struct page *pDummyPage;
1259 RTHCPHYS DummyPhys;
1260#endif
1261
1262 /*
1263 * Check for restrictions.
1264 */
1265 if (!pTask)
1266 return VERR_NOT_SUPPORTED;
1267 if (uAlignment > PAGE_SIZE)
1268 return VERR_NOT_SUPPORTED;
1269
1270#ifdef VBOX_USE_PAE_HACK
1271 /*
1272 * Allocate a dummy page for use when mapping the memory.
1273 */
1274 pDummyPage = alloc_page(GFP_USER);
1275 if (!pDummyPage)
1276 return VERR_NO_MEMORY;
1277 SetPageReserved(pDummyPage);
1278 DummyPhys = page_to_phys(pDummyPage);
1279#endif
1280
1281 /*
1282 * Create the IPRT memory object.
1283 */
1284 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1285 if (pMemLnx)
1286 {
1287 /*
1288 * Allocate user space mapping.
1289 */
1290 void *pv;
1291 down_write(&pTask->mm->mmap_sem);
1292 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, pMemLnxToMap->Core.cb, uAlignment, pTask, fProt);
1293 if (pv != (void *)-1)
1294 {
1295 /*
1296 * Map page by page into the mmap area.
1297 * This is generic, paranoid and not very efficient.
1298 */
1299 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, false /* user */);
1300 unsigned long ulAddrCur = (unsigned long)pv;
1301 const size_t cPages = pMemLnxToMap->Core.cb >> PAGE_SHIFT;
1302 size_t iPage;
1303
1304 rc = 0;
1305 if (pMemLnxToMap->cPages)
1306 {
1307 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1308 {
1309#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1310 RTHCPHYS Phys = page_to_phys(pMemLnxToMap->apPages[iPage]);
1311#endif
1312#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1313 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1314 AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
1315#endif
1316#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && defined(RT_ARCH_X86)
1317 /* remap_page_range() limitation on x86 */
1318 AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
1319#endif
1320
1321#if defined(VBOX_USE_INSERT_PAGE) && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
1322 rc = vm_insert_page(vma, ulAddrCur, pMemLnxToMap->apPages[iPage]);
1323 vma->vm_flags |= VM_RESERVED; /* This flag helps making 100% sure some bad stuff wont happen (swap, core, ++). */
1324#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1325 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1326#elif defined(VBOX_USE_PAE_HACK)
1327 rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
1328 if (!rc)
1329 rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
1330#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1331 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1332#else /* 2.4 */
1333 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1334#endif
1335 if (rc)
1336 {
1337 rc = VERR_NO_MEMORY;
1338 break;
1339 }
1340 }
1341 }
1342 else
1343 {
1344 RTHCPHYS Phys;
1345 if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS)
1346 Phys = pMemLnxToMap->Core.u.Phys.PhysBase;
1347 else if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_CONT)
1348 Phys = pMemLnxToMap->Core.u.Cont.Phys;
1349 else
1350 {
1351 AssertMsgFailed(("%d\n", pMemLnxToMap->Core.enmType));
1352 Phys = NIL_RTHCPHYS;
1353 }
1354 if (Phys != NIL_RTHCPHYS)
1355 {
1356 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE, Phys += PAGE_SIZE)
1357 {
1358#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1359 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1360 AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
1361#endif
1362#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && defined(RT_ARCH_X86)
1363 /* remap_page_range() limitation on x86 */
1364 AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
1365#endif
1366
1367#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1368 rc = remap_pfn_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1369#elif defined(VBOX_USE_PAE_HACK)
1370 rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
1371 if (!rc)
1372 rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
1373#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1374 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1375#else /* 2.4 */
1376 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1377#endif
1378 if (rc)
1379 {
1380 rc = VERR_NO_MEMORY;
1381 break;
1382 }
1383 }
1384 }
1385 }
1386 if (!rc)
1387 {
1388 up_write(&pTask->mm->mmap_sem);
1389#ifdef VBOX_USE_PAE_HACK
1390 __free_page(pDummyPage);
1391#endif
1392
1393 pMemLnx->Core.pv = pv;
1394 pMemLnx->Core.u.Mapping.R0Process = R0Process;
1395 *ppMem = &pMemLnx->Core;
1396 return VINF_SUCCESS;
1397 }
1398
1399 /*
1400 * Bail out.
1401 */
1402 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, pMemLnxToMap->Core.cb);
1403 }
1404 up_write(&pTask->mm->mmap_sem);
1405 rtR0MemObjDelete(&pMemLnx->Core);
1406 }
1407#ifdef VBOX_USE_PAE_HACK
1408 __free_page(pDummyPage);
1409#endif
1410
1411 return rc;
1412}
1413
1414
1415int rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt)
1416{
1417 NOREF(pMem);
1418 NOREF(offSub);
1419 NOREF(cbSub);
1420 NOREF(fProt);
1421 return VERR_NOT_SUPPORTED;
1422}
1423
1424
1425RTHCPHYS rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
1426{
1427 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
1428
1429 if (pMemLnx->cPages)
1430 return page_to_phys(pMemLnx->apPages[iPage]);
1431
1432 switch (pMemLnx->Core.enmType)
1433 {
1434 case RTR0MEMOBJTYPE_CONT:
1435 return pMemLnx->Core.u.Cont.Phys + (iPage << PAGE_SHIFT);
1436
1437 case RTR0MEMOBJTYPE_PHYS:
1438 return pMemLnx->Core.u.Phys.PhysBase + (iPage << PAGE_SHIFT);
1439
1440 /* the parent knows */
1441 case RTR0MEMOBJTYPE_MAPPING:
1442 return rtR0MemObjNativeGetPagePhysAddr(pMemLnx->Core.uRel.Child.pParent, iPage);
1443
1444 /* cPages > 0 */
1445 case RTR0MEMOBJTYPE_LOW:
1446 case RTR0MEMOBJTYPE_LOCK:
1447 case RTR0MEMOBJTYPE_PHYS_NC:
1448 case RTR0MEMOBJTYPE_PAGE:
1449 default:
1450 AssertMsgFailed(("%d\n", pMemLnx->Core.enmType));
1451 /* fall thru */
1452
1453 case RTR0MEMOBJTYPE_RES_VIRT:
1454 return NIL_RTHCPHYS;
1455 }
1456}
1457
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette