VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c@ 39744

Last change on this file since 39744 was 39744, checked in by vboxsync, 13 years ago

rtr0memobj: Status code adjustments.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Rev
File size: 50.9 KB
Line 
1/* $Revision: 39744 $ */
2/** @file
3 * IPRT - Ring-0 Memory Objects, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "the-linux-kernel.h"
32
33#include <iprt/memobj.h>
34#include <iprt/alloc.h>
35#include <iprt/assert.h>
36#include <iprt/log.h>
37#include <iprt/process.h>
38#include <iprt/string.h>
39#include "internal/memobj.h"
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/* early 2.6 kernels */
46#ifndef PAGE_SHARED_EXEC
47# define PAGE_SHARED_EXEC PAGE_SHARED
48#endif
49#ifndef PAGE_READONLY_EXEC
50# define PAGE_READONLY_EXEC PAGE_READONLY
51#endif
52
53/*
54 * 2.6.29+ kernels don't work with remap_pfn_range() anymore because
55 * track_pfn_vma_new() is apparently not defined for non-RAM pages.
56 * It should be safe to use vm_insert_page() older kernels as well.
57 */
58#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
59# define VBOX_USE_INSERT_PAGE
60#endif
61#if defined(CONFIG_X86_PAE) \
62 && ( defined(HAVE_26_STYLE_REMAP_PAGE_RANGE) \
63 || ( LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) \
64 && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)))
65# define VBOX_USE_PAE_HACK
66#endif
67
68
69/*******************************************************************************
70* Structures and Typedefs *
71*******************************************************************************/
72/**
73 * The Darwin version of the memory object structure.
74 */
75typedef struct RTR0MEMOBJLNX
76{
77 /** The core structure. */
78 RTR0MEMOBJINTERNAL Core;
79 /** Set if the allocation is contiguous.
80 * This means it has to be given back as one chunk. */
81 bool fContiguous;
82 /** Set if we've vmap'ed the memory into ring-0. */
83 bool fMappedToRing0;
84 /** The pages in the apPages array. */
85 size_t cPages;
86 /** Array of struct page pointers. (variable size) */
87 struct page *apPages[1];
88} RTR0MEMOBJLNX, *PRTR0MEMOBJLNX;
89
90
91static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx);
92
93
94/**
95 * Helper that converts from a RTR0PROCESS handle to a linux task.
96 *
97 * @returns The corresponding Linux task.
98 * @param R0Process IPRT ring-0 process handle.
99 */
100static struct task_struct *rtR0ProcessToLinuxTask(RTR0PROCESS R0Process)
101{
102 /** @todo fix rtR0ProcessToLinuxTask!! */
103 return R0Process == RTR0ProcHandleSelf() ? current : NULL;
104}
105
106
107/**
108 * Compute order. Some functions allocate 2^order pages.
109 *
110 * @returns order.
111 * @param cPages Number of pages.
112 */
113static int rtR0MemObjLinuxOrder(size_t cPages)
114{
115 int iOrder;
116 size_t cTmp;
117
118 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
119 ;
120 if (cPages & ~((size_t)1 << iOrder))
121 ++iOrder;
122
123 return iOrder;
124}
125
126
127/**
128 * Converts from RTMEM_PROT_* to Linux PAGE_*.
129 *
130 * @returns Linux page protection constant.
131 * @param fProt The IPRT protection mask.
132 * @param fKernel Whether it applies to kernel or user space.
133 */
134static pgprot_t rtR0MemObjLinuxConvertProt(unsigned fProt, bool fKernel)
135{
136 switch (fProt)
137 {
138 default:
139 AssertMsgFailed(("%#x %d\n", fProt, fKernel));
140 case RTMEM_PROT_NONE:
141 return PAGE_NONE;
142
143 case RTMEM_PROT_READ:
144 return fKernel ? PAGE_KERNEL_RO : PAGE_READONLY;
145
146 case RTMEM_PROT_WRITE:
147 case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
148 return fKernel ? PAGE_KERNEL : PAGE_SHARED;
149
150 case RTMEM_PROT_EXEC:
151 case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
152#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
153 if (fKernel)
154 {
155 pgprot_t fPg = MY_PAGE_KERNEL_EXEC;
156 pgprot_val(fPg) &= ~_PAGE_RW;
157 return fPg;
158 }
159 return PAGE_READONLY_EXEC;
160#else
161 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_READONLY_EXEC;
162#endif
163
164 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
165 case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_READ:
166 return fKernel ? MY_PAGE_KERNEL_EXEC : PAGE_SHARED_EXEC;
167 }
168}
169
170
171/**
172 * Internal worker that allocates physical pages and creates the memory object for them.
173 *
174 * @returns IPRT status code.
175 * @param ppMemLnx Where to store the memory object pointer.
176 * @param enmType The object type.
177 * @param cb The number of bytes to allocate.
178 * @param uAlignment The alignment of the physical memory.
179 * Only valid if fContiguous == true, ignored otherwise.
180 * @param fFlagsLnx The page allocation flags (GPFs).
181 * @param fContiguous Whether the allocation must be contiguous.
182 * @param rcNoMem What to return when we're out of pages.
183 */
184static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb,
185 size_t uAlignment, unsigned fFlagsLnx, bool fContiguous, int rcNoMem)
186{
187 size_t iPage;
188 size_t const cPages = cb >> PAGE_SHIFT;
189 struct page *paPages;
190
191 /*
192 * Allocate a memory object structure that's large enough to contain
193 * the page pointer array.
194 */
195 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), enmType, NULL, cb);
196 if (!pMemLnx)
197 return VERR_NO_MEMORY;
198 pMemLnx->cPages = cPages;
199
200 if (cPages > 255)
201 {
202# ifdef __GFP_REPEAT
203 /* Try hard to allocate the memory, but the allocation attempt might fail. */
204 fFlagsLnx |= __GFP_REPEAT;
205# endif
206# ifdef __GFP_NOMEMALLOC
207 /* Introduced with Linux 2.6.12: Don't use emergency reserves */
208 fFlagsLnx |= __GFP_NOMEMALLOC;
209# endif
210 }
211
212 /*
213 * Allocate the pages.
214 * For small allocations we'll try contiguous first and then fall back on page by page.
215 */
216#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
217 if ( fContiguous
218 || cb <= PAGE_SIZE * 2)
219 {
220# ifdef VBOX_USE_INSERT_PAGE
221 paPages = alloc_pages(fFlagsLnx | __GFP_COMP, rtR0MemObjLinuxOrder(cPages));
222# else
223 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
224# endif
225 if (paPages)
226 {
227 fContiguous = true;
228 for (iPage = 0; iPage < cPages; iPage++)
229 pMemLnx->apPages[iPage] = &paPages[iPage];
230 }
231 else if (fContiguous)
232 {
233 rtR0MemObjDelete(&pMemLnx->Core);
234 return rcNoMem;
235 }
236 }
237
238 if (!fContiguous)
239 {
240 for (iPage = 0; iPage < cPages; iPage++)
241 {
242 pMemLnx->apPages[iPage] = alloc_page(fFlagsLnx);
243 if (RT_UNLIKELY(!pMemLnx->apPages[iPage]))
244 {
245 while (iPage-- > 0)
246 __free_page(pMemLnx->apPages[iPage]);
247 rtR0MemObjDelete(&pMemLnx->Core);
248 return rcNoMem;
249 }
250 }
251 }
252
253#else /* < 2.4.22 */
254 /** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
255 paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
256 if (!paPages)
257 {
258 rtR0MemObjDelete(&pMemLnx->Core);
259 return rcNoMem;
260 }
261 for (iPage = 0; iPage < cPages; iPage++)
262 {
263 pMemLnx->apPages[iPage] = &paPages[iPage];
264 MY_SET_PAGES_EXEC(pMemLnx->apPages[iPage], 1);
265 if (PageHighMem(pMemLnx->apPages[iPage]))
266 BUG();
267 }
268
269 fContiguous = true;
270#endif /* < 2.4.22 */
271 pMemLnx->fContiguous = fContiguous;
272
273 /*
274 * Reserve the pages.
275 */
276 for (iPage = 0; iPage < cPages; iPage++)
277 SetPageReserved(pMemLnx->apPages[iPage]);
278
279 /*
280 * Note that the physical address of memory allocated with alloc_pages(flags, order)
281 * is always 2^(PAGE_SHIFT+order)-aligned.
282 */
283 if ( fContiguous
284 && uAlignment > PAGE_SIZE)
285 {
286 /*
287 * Check for alignment constraints. The physical address of memory allocated with
288 * alloc_pages(flags, order) is always 2^(PAGE_SHIFT+order)-aligned.
289 */
290 if (RT_UNLIKELY(page_to_phys(pMemLnx->apPages[0]) & (uAlignment - 1)))
291 {
292 /*
293 * This should never happen!
294 */
295 printk("rtR0MemObjLinuxAllocPages(cb=0x%lx, uAlignment=0x%lx): alloc_pages(..., %d) returned physical memory at 0x%lx!\n",
296 (unsigned long)cb, (unsigned long)uAlignment, rtR0MemObjLinuxOrder(cPages), (unsigned long)page_to_phys(pMemLnx->apPages[0]));
297 rtR0MemObjLinuxFreePages(pMemLnx);
298 return rcNoMem;
299 }
300 }
301
302 *ppMemLnx = pMemLnx;
303 return VINF_SUCCESS;
304}
305
306
307/**
308 * Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
309 *
310 * This method does NOT free the object.
311 *
312 * @param pMemLnx The object which physical pages should be freed.
313 */
314static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx)
315{
316 size_t iPage = pMemLnx->cPages;
317 if (iPage > 0)
318 {
319 /*
320 * Restore the page flags.
321 */
322 while (iPage-- > 0)
323 {
324 ClearPageReserved(pMemLnx->apPages[iPage]);
325#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
326#else
327 MY_SET_PAGES_NOEXEC(pMemLnx->apPages[iPage], 1);
328#endif
329 }
330
331 /*
332 * Free the pages.
333 */
334#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
335 if (!pMemLnx->fContiguous)
336 {
337 iPage = pMemLnx->cPages;
338 while (iPage-- > 0)
339 __free_page(pMemLnx->apPages[iPage]);
340 }
341 else
342#endif
343 __free_pages(pMemLnx->apPages[0], rtR0MemObjLinuxOrder(pMemLnx->cPages));
344
345 pMemLnx->cPages = 0;
346 }
347}
348
349
350/**
351 * Maps the allocation into ring-0.
352 *
353 * This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
354 *
355 * Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
356 * space, so we'll use that mapping if possible. If execute access is required, we'll
357 * play safe and do our own mapping.
358 *
359 * @returns IPRT status code.
360 * @param pMemLnx The linux memory object to map.
361 * @param fExecutable Whether execute access is required.
362 */
363static int rtR0MemObjLinuxVMap(PRTR0MEMOBJLNX pMemLnx, bool fExecutable)
364{
365 int rc = VINF_SUCCESS;
366
367 /*
368 * Choose mapping strategy.
369 */
370 bool fMustMap = fExecutable
371 || !pMemLnx->fContiguous;
372 if (!fMustMap)
373 {
374 size_t iPage = pMemLnx->cPages;
375 while (iPage-- > 0)
376 if (PageHighMem(pMemLnx->apPages[iPage]))
377 {
378 fMustMap = true;
379 break;
380 }
381 }
382
383 Assert(!pMemLnx->Core.pv);
384 Assert(!pMemLnx->fMappedToRing0);
385
386 if (fMustMap)
387 {
388 /*
389 * Use vmap - 2.4.22 and later.
390 */
391#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
392 pgprot_t fPg;
393 pgprot_val(fPg) = _PAGE_PRESENT | _PAGE_RW;
394# ifdef _PAGE_NX
395 if (!fExecutable)
396 pgprot_val(fPg) |= _PAGE_NX;
397# endif
398
399# ifdef VM_MAP
400 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg);
401# else
402 pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg);
403# endif
404 if (pMemLnx->Core.pv)
405 pMemLnx->fMappedToRing0 = true;
406 else
407 rc = VERR_MAP_FAILED;
408#else /* < 2.4.22 */
409 rc = VERR_NOT_SUPPORTED;
410#endif
411 }
412 else
413 {
414 /*
415 * Use the kernel RAM mapping.
416 */
417 pMemLnx->Core.pv = phys_to_virt(page_to_phys(pMemLnx->apPages[0]));
418 Assert(pMemLnx->Core.pv);
419 }
420
421 return rc;
422}
423
424
425/**
426 * Undos what rtR0MemObjLinuxVMap() did.
427 *
428 * @param pMemLnx The linux memory object.
429 */
430static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx)
431{
432#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
433 if (pMemLnx->fMappedToRing0)
434 {
435 Assert(pMemLnx->Core.pv);
436 vunmap(pMemLnx->Core.pv);
437 pMemLnx->fMappedToRing0 = false;
438 }
439#else /* < 2.4.22 */
440 Assert(!pMemLnx->fMappedToRing0);
441#endif
442 pMemLnx->Core.pv = NULL;
443}
444
445
446DECLHIDDEN(int) rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
447{
448 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
449
450 /*
451 * Release any memory that we've allocated or locked.
452 */
453 switch (pMemLnx->Core.enmType)
454 {
455 case RTR0MEMOBJTYPE_LOW:
456 case RTR0MEMOBJTYPE_PAGE:
457 case RTR0MEMOBJTYPE_CONT:
458 case RTR0MEMOBJTYPE_PHYS:
459 case RTR0MEMOBJTYPE_PHYS_NC:
460 rtR0MemObjLinuxVUnmap(pMemLnx);
461 rtR0MemObjLinuxFreePages(pMemLnx);
462 break;
463
464 case RTR0MEMOBJTYPE_LOCK:
465 if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS)
466 {
467 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
468 size_t iPage;
469 Assert(pTask);
470 if (pTask && pTask->mm)
471 down_read(&pTask->mm->mmap_sem);
472
473 iPage = pMemLnx->cPages;
474 while (iPage-- > 0)
475 {
476 if (!PageReserved(pMemLnx->apPages[iPage]))
477 SetPageDirty(pMemLnx->apPages[iPage]);
478 page_cache_release(pMemLnx->apPages[iPage]);
479 }
480
481 if (pTask && pTask->mm)
482 up_read(&pTask->mm->mmap_sem);
483 }
484 /* else: kernel memory - nothing to do here. */
485 break;
486
487 case RTR0MEMOBJTYPE_RES_VIRT:
488 Assert(pMemLnx->Core.pv);
489 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
490 {
491 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
492 Assert(pTask);
493 if (pTask && pTask->mm)
494 {
495 down_write(&pTask->mm->mmap_sem);
496 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
497 up_write(&pTask->mm->mmap_sem);
498 }
499 }
500 else
501 {
502 vunmap(pMemLnx->Core.pv);
503
504 Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL);
505 __free_page(pMemLnx->apPages[0]);
506 pMemLnx->apPages[0] = NULL;
507 pMemLnx->cPages = 0;
508 }
509 pMemLnx->Core.pv = NULL;
510 break;
511
512 case RTR0MEMOBJTYPE_MAPPING:
513 Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv);
514 if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
515 {
516 struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
517 Assert(pTask);
518 if (pTask && pTask->mm)
519 {
520 down_write(&pTask->mm->mmap_sem);
521 MY_DO_MUNMAP(pTask->mm, (unsigned long)pMemLnx->Core.pv, pMemLnx->Core.cb);
522 up_write(&pTask->mm->mmap_sem);
523 }
524 }
525 else
526 vunmap(pMemLnx->Core.pv);
527 pMemLnx->Core.pv = NULL;
528 break;
529
530 default:
531 AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType));
532 return VERR_INTERNAL_ERROR;
533 }
534 return VINF_SUCCESS;
535}
536
537
538DECLHIDDEN(int) rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
539{
540 PRTR0MEMOBJLNX pMemLnx;
541 int rc;
542
543#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
544 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_HIGHUSER,
545 false /* non-contiguous */, VERR_NO_MEMORY);
546#else
547 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_USER,
548 false /* non-contiguous */, VERR_NO_MEMORY);
549#endif
550 if (RT_SUCCESS(rc))
551 {
552 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
553 if (RT_SUCCESS(rc))
554 {
555 *ppMem = &pMemLnx->Core;
556 return rc;
557 }
558
559 rtR0MemObjLinuxFreePages(pMemLnx);
560 rtR0MemObjDelete(&pMemLnx->Core);
561 }
562
563 return rc;
564}
565
566
567DECLHIDDEN(int) rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
568{
569 PRTR0MEMOBJLNX pMemLnx;
570 int rc;
571
572 /* Try to avoid GFP_DMA. GFM_DMA32 was introduced with Linux 2.6.15. */
573#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
574 /* ZONE_DMA32: 0-4GB */
575 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA32,
576 false /* non-contiguous */, VERR_NO_LOW_MEMORY);
577 if (RT_FAILURE(rc))
578#endif
579#ifdef RT_ARCH_AMD64
580 /* ZONE_DMA: 0-16MB */
581 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA,
582 false /* non-contiguous */, VERR_NO_LOW_MEMORY);
583#else
584# ifdef CONFIG_X86_PAE
585# endif
586 /* ZONE_NORMAL: 0-896MB */
587 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_USER,
588 false /* non-contiguous */, VERR_NO_LOW_MEMORY);
589#endif
590 if (RT_SUCCESS(rc))
591 {
592 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
593 if (RT_SUCCESS(rc))
594 {
595 *ppMem = &pMemLnx->Core;
596 return rc;
597 }
598
599 rtR0MemObjLinuxFreePages(pMemLnx);
600 rtR0MemObjDelete(&pMemLnx->Core);
601 }
602
603 return rc;
604}
605
606
607DECLHIDDEN(int) rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable)
608{
609 PRTR0MEMOBJLNX pMemLnx;
610 int rc;
611
612#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
613 /* ZONE_DMA32: 0-4GB */
614 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA32,
615 true /* contiguous */, VERR_NO_CONT_MEMORY);
616 if (RT_FAILURE(rc))
617#endif
618#ifdef RT_ARCH_AMD64
619 /* ZONE_DMA: 0-16MB */
620 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA,
621 true /* contiguous */, VERR_NO_CONT_MEMORY);
622#else
623 /* ZONE_NORMAL (32-bit hosts): 0-896MB */
624 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_USER,
625 true /* contiguous */, VERR_NO_CONT_MEMORY);
626#endif
627 if (RT_SUCCESS(rc))
628 {
629 rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
630 if (RT_SUCCESS(rc))
631 {
632#if defined(RT_STRICT) && (defined(RT_ARCH_AMD64) || defined(CONFIG_HIGHMEM64G))
633 size_t iPage = pMemLnx->cPages;
634 while (iPage-- > 0)
635 Assert(page_to_phys(pMemLnx->apPages[iPage]) < _4G);
636#endif
637 pMemLnx->Core.u.Cont.Phys = page_to_phys(pMemLnx->apPages[0]);
638 *ppMem = &pMemLnx->Core;
639 return rc;
640 }
641
642 rtR0MemObjLinuxFreePages(pMemLnx);
643 rtR0MemObjDelete(&pMemLnx->Core);
644 }
645
646 return rc;
647}
648
649
650/**
651 * Worker for rtR0MemObjLinuxAllocPhysSub that tries one allocation strategy.
652 *
653 * @returns IPRT status.
654 * @param ppMemLnx Where to
655 * @param enmType The object type.
656 * @param cb The size of the allocation.
657 * @param uAlignment The alignment of the physical memory.
658 * Only valid for fContiguous == true, ignored otherwise.
659 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
660 * @param fGfp The Linux GFP flags to use for the allocation.
661 */
662static int rtR0MemObjLinuxAllocPhysSub2(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
663 size_t cb, size_t uAlignment, RTHCPHYS PhysHighest, unsigned fGfp)
664{
665 PRTR0MEMOBJLNX pMemLnx;
666 int rc;
667
668 rc = rtR0MemObjLinuxAllocPages(&pMemLnx, enmType, cb, uAlignment, fGfp,
669 enmType == RTR0MEMOBJTYPE_PHYS /* contiguous / non-contiguous */,
670 VERR_NO_PHYS_MEMORY);
671 if (RT_FAILURE(rc))
672 return rc;
673
674 /*
675 * Check the addresses if necessary. (Can be optimized a bit for PHYS.)
676 */
677 if (PhysHighest != NIL_RTHCPHYS)
678 {
679 size_t iPage = pMemLnx->cPages;
680 while (iPage-- > 0)
681 if (page_to_phys(pMemLnx->apPages[iPage]) >= PhysHighest)
682 {
683 rtR0MemObjLinuxFreePages(pMemLnx);
684 rtR0MemObjDelete(&pMemLnx->Core);
685 return VERR_NO_MEMORY;
686 }
687 }
688
689 /*
690 * Complete the object.
691 */
692 if (enmType == RTR0MEMOBJTYPE_PHYS)
693 {
694 pMemLnx->Core.u.Phys.PhysBase = page_to_phys(pMemLnx->apPages[0]);
695 pMemLnx->Core.u.Phys.fAllocated = true;
696 }
697 *ppMem = &pMemLnx->Core;
698 return rc;
699}
700
701
702/**
703 * Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
704 *
705 * @returns IPRT status.
706 * @param ppMem Where to store the memory object pointer on success.
707 * @param enmType The object type.
708 * @param cb The size of the allocation.
709 * @param uAlignment The alignment of the physical memory.
710 * Only valid for enmType == RTR0MEMOBJTYPE_PHYS, ignored otherwise.
711 * @param PhysHighest See rtR0MemObjNativeAllocPhys.
712 */
713static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
714 size_t cb, size_t uAlignment, RTHCPHYS PhysHighest)
715{
716 int rc;
717
718 /*
719 * There are two clear cases and that's the <=16MB and anything-goes ones.
720 * When the physical address limit is somewhere in-between those two we'll
721 * just have to try, starting with HIGHUSER and working our way thru the
722 * different types, hoping we'll get lucky.
723 *
724 * We should probably move this physical address restriction logic up to
725 * the page alloc function as it would be more efficient there. But since
726 * we don't expect this to be a performance issue just yet it can wait.
727 */
728 if (PhysHighest == NIL_RTHCPHYS)
729 /* ZONE_HIGHMEM: the whole physical memory */
730 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_HIGHUSER);
731 else if (PhysHighest <= _1M * 16)
732 /* ZONE_DMA: 0-16MB */
733 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA);
734 else
735 {
736 rc = VERR_NO_MEMORY;
737 if (RT_FAILURE(rc))
738 /* ZONE_HIGHMEM: the whole physical memory */
739 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_HIGHUSER);
740 if (RT_FAILURE(rc))
741 /* ZONE_NORMAL: 0-896MB */
742 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_USER);
743#ifdef GFP_DMA32
744 if (RT_FAILURE(rc))
745 /* ZONE_DMA32: 0-4GB */
746 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA32);
747#endif
748 if (RT_FAILURE(rc))
749 /* ZONE_DMA: 0-16MB */
750 rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, GFP_DMA);
751 }
752 return rc;
753}
754
755
756/**
757 * Translates a kernel virtual address to a linux page structure by walking the
758 * page tables.
759 *
760 * @note We do assume that the page tables will not change as we are walking
761 * them. This assumption is rather forced by the fact that I could not
762 * immediately see any way of preventing this from happening. So, we
763 * take some extra care when accessing them.
764 *
765 * Because of this, we don't want to use this function on memory where
766 * attribute changes to nearby pages is likely to cause large pages to
767 * be used or split up. So, don't use this for the linear mapping of
768 * physical memory.
769 *
770 * @returns Pointer to the page structur or NULL if it could not be found.
771 * @param pv The kernel virtual address.
772 */
773static struct page *rtR0MemObjLinuxVirtToPage(void *pv)
774{
775 unsigned long ulAddr = (unsigned long)pv;
776 unsigned long pfn;
777 struct page *pPage;
778 pte_t *pEntry;
779 union
780 {
781 pgd_t Global;
782#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
783 pud_t Upper;
784#endif
785 pmd_t Middle;
786 pte_t Entry;
787 } u;
788
789 /* Should this happen in a situation this code will be called in? And if
790 * so, can it change under our feet? See also
791 * "Documentation/vm/active_mm.txt" in the kernel sources. */
792 if (RT_UNLIKELY(!current->active_mm))
793 return NULL;
794 u.Global = *pgd_offset(current->active_mm, ulAddr);
795 if (RT_UNLIKELY(pgd_none(u.Global)))
796 return NULL;
797
798#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
799 u.Upper = *pud_offset(&u.Global, ulAddr);
800 if (RT_UNLIKELY(pud_none(u.Upper)))
801 return NULL;
802# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25)
803 if (pud_large(u.Upper))
804 {
805 pPage = pud_page(u.Upper);
806 AssertReturn(pPage, NULL);
807 pfn = page_to_pfn(pPage); /* doing the safe way... */
808 pfn += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (PUD_SHIFT - PAGE_SHIFT)) - 1);
809 return pfn_to_page(pfn);
810 }
811# endif
812
813 u.Middle = *pmd_offset(&u.Upper, ulAddr);
814#else /* < 2.6.11 */
815 u.Middle = *pmd_offset(&u.Global, ulAddr);
816#endif /* < 2.6.11 */
817 if (RT_UNLIKELY(pmd_none(u.Middle)))
818 return NULL;
819#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
820 if (pmd_large(u.Middle))
821 {
822 pPage = pmd_page(u.Middle);
823 AssertReturn(pPage, NULL);
824 pfn = page_to_pfn(pPage); /* doing the safe way... */
825 pfn += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (PMD_SHIFT - PAGE_SHIFT)) - 1);
826 return pfn_to_page(pfn);
827 }
828#endif
829
830#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) || defined(pte_offset_map) /* As usual, RHEL 3 had pte_offset_map earlier. */
831 pEntry = pte_offset_map(&u.Middle, ulAddr);
832#else
833 pEntry = pte_offset(&u.Middle, ulAddr);
834#endif
835 if (RT_UNLIKELY(!pEntry))
836 return NULL;
837 u.Entry = *pEntry;
838 pte_unmap(pEntry);
839
840 if (RT_UNLIKELY(!pte_present(u.Entry)))
841 return NULL;
842 return pte_page(u.Entry);
843}
844
845
846DECLHIDDEN(int) rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment)
847{
848 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, uAlignment, PhysHighest);
849}
850
851
852DECLHIDDEN(int) rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest)
853{
854 return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PAGE_SIZE, PhysHighest);
855}
856
857
858DECLHIDDEN(int) rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb, uint32_t uCachePolicy)
859{
860 /*
861 * All we need to do here is to validate that we can use
862 * ioremap on the specified address (32/64-bit dma_addr_t).
863 */
864 PRTR0MEMOBJLNX pMemLnx;
865 dma_addr_t PhysAddr = Phys;
866 AssertMsgReturn(PhysAddr == Phys, ("%#llx\n", (unsigned long long)Phys), VERR_ADDRESS_TOO_BIG);
867
868 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_PHYS, NULL, cb);
869 if (!pMemLnx)
870 return VERR_NO_MEMORY;
871
872 pMemLnx->Core.u.Phys.PhysBase = PhysAddr;
873 pMemLnx->Core.u.Phys.fAllocated = false;
874 pMemLnx->Core.u.Phys.uCachePolicy = uCachePolicy;
875 Assert(!pMemLnx->cPages);
876 *ppMem = &pMemLnx->Core;
877 return VINF_SUCCESS;
878}
879
880
881DECLHIDDEN(int) rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess, RTR0PROCESS R0Process)
882{
883 const int cPages = cb >> PAGE_SHIFT;
884 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
885 struct vm_area_struct **papVMAs;
886 PRTR0MEMOBJLNX pMemLnx;
887 int rc = VERR_NO_MEMORY;
888 NOREF(fAccess);
889
890 /*
891 * Check for valid task and size overflows.
892 */
893 if (!pTask)
894 return VERR_NOT_SUPPORTED;
895 if (((size_t)cPages << PAGE_SHIFT) != cb)
896 return VERR_OUT_OF_RANGE;
897
898 /*
899 * Allocate the memory object and a temporary buffer for the VMAs.
900 */
901 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, (void *)R3Ptr, cb);
902 if (!pMemLnx)
903 return VERR_NO_MEMORY;
904
905 papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
906 if (papVMAs)
907 {
908 down_read(&pTask->mm->mmap_sem);
909
910 /*
911 * Get user pages.
912 */
913 rc = get_user_pages(pTask, /* Task for fault accounting. */
914 pTask->mm, /* Whose pages. */
915 R3Ptr, /* Where from. */
916 cPages, /* How many pages. */
917 1, /* Write to memory. */
918 0, /* force. */
919 &pMemLnx->apPages[0], /* Page array. */
920 papVMAs); /* vmas */
921 if (rc == cPages)
922 {
923 /*
924 * Flush dcache (required?), protect against fork and _really_ pin the page
925 * table entries. get_user_pages() will protect against swapping out the
926 * pages but it will NOT protect against removing page table entries. This
927 * can be achieved with
928 * - using mlock / mmap(..., MAP_LOCKED, ...) from userland. This requires
929 * an appropriate limit set up with setrlimit(..., RLIMIT_MEMLOCK, ...).
930 * Usual Linux distributions support only a limited size of locked pages
931 * (e.g. 32KB).
932 * - setting the PageReserved bit (as we do in rtR0MemObjLinuxAllocPages()
933 * or by
934 * - setting the VM_LOCKED flag. This is the same as doing mlock() without
935 * a range check.
936 */
937 /** @todo The Linux fork() protection will require more work if this API
938 * is to be used for anything but locking VM pages. */
939 while (rc-- > 0)
940 {
941 flush_dcache_page(pMemLnx->apPages[rc]);
942 papVMAs[rc]->vm_flags |= (VM_DONTCOPY | VM_LOCKED);
943 }
944
945 up_read(&pTask->mm->mmap_sem);
946
947 RTMemFree(papVMAs);
948
949 pMemLnx->Core.u.Lock.R0Process = R0Process;
950 pMemLnx->cPages = cPages;
951 Assert(!pMemLnx->fMappedToRing0);
952 *ppMem = &pMemLnx->Core;
953
954 return VINF_SUCCESS;
955 }
956
957 /*
958 * Failed - we need to unlock any pages that we succeeded to lock.
959 */
960 while (rc-- > 0)
961 {
962 if (!PageReserved(pMemLnx->apPages[rc]))
963 SetPageDirty(pMemLnx->apPages[rc]);
964 page_cache_release(pMemLnx->apPages[rc]);
965 }
966
967 up_read(&pTask->mm->mmap_sem);
968
969 RTMemFree(papVMAs);
970 rc = VERR_LOCK_FAILED;
971 }
972
973 rtR0MemObjDelete(&pMemLnx->Core);
974 return rc;
975}
976
977
978DECLHIDDEN(int) rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess)
979{
980 void *pvLast = (uint8_t *)pv + cb - 1;
981 size_t const cPages = cb >> PAGE_SHIFT;
982 PRTR0MEMOBJLNX pMemLnx;
983 bool fLinearMapping;
984 int rc;
985 uint8_t *pbPage;
986 size_t iPage;
987 NOREF(fAccess);
988
989 if ( !RTR0MemKernelIsValidAddr(pv)
990 || !RTR0MemKernelIsValidAddr(pv + cb))
991 return VERR_INVALID_PARAMETER;
992
993 /*
994 * The lower part of the kernel memory has a linear mapping between
995 * physical and virtual addresses. So we take a short cut here. This is
996 * assumed to be the cleanest way to handle those addresses (and the code
997 * is well tested, though the test for determining it is not very nice).
998 * If we ever decide it isn't we can still remove it.
999 */
1000#if 0
1001 fLinearMapping = (unsigned long)pvLast < VMALLOC_START;
1002#else
1003 fLinearMapping = (unsigned long)pv >= (unsigned long)__va(0)
1004 && (unsigned long)pvLast < (unsigned long)high_memory;
1005#endif
1006
1007 /*
1008 * Allocate the memory object.
1009 */
1010 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_OFFSETOF(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK, pv, cb);
1011 if (!pMemLnx)
1012 return VERR_NO_MEMORY;
1013
1014 /*
1015 * Gather the pages.
1016 * We ASSUME all kernel pages are non-swappable and non-movable.
1017 */
1018 rc = VINF_SUCCESS;
1019 pbPage = (uint8_t *)pvLast;
1020 iPage = cPages;
1021 if (!fLinearMapping)
1022 {
1023 while (iPage-- > 0)
1024 {
1025 struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
1026 if (RT_UNLIKELY(!pPage))
1027 {
1028 rc = VERR_LOCK_FAILED;
1029 break;
1030 }
1031 pMemLnx->apPages[iPage] = pPage;
1032 pbPage -= PAGE_SIZE;
1033 }
1034 }
1035 else
1036 {
1037 while (iPage-- > 0)
1038 {
1039 pMemLnx->apPages[iPage] = virt_to_page(pbPage);
1040 pbPage -= PAGE_SIZE;
1041 }
1042 }
1043 if (RT_SUCCESS(rc))
1044 {
1045 /*
1046 * Complete the memory object and return.
1047 */
1048 pMemLnx->Core.u.Lock.R0Process = NIL_RTR0PROCESS;
1049 pMemLnx->cPages = cPages;
1050 Assert(!pMemLnx->fMappedToRing0);
1051 *ppMem = &pMemLnx->Core;
1052
1053 return VINF_SUCCESS;
1054 }
1055
1056 rtR0MemObjDelete(&pMemLnx->Core);
1057 return rc;
1058}
1059
1060
1061DECLHIDDEN(int) rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment)
1062{
1063#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1064 const size_t cPages = cb >> PAGE_SHIFT;
1065 struct page *pDummyPage;
1066 struct page **papPages;
1067
1068 /* check for unsupported stuff. */
1069 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
1070 if (uAlignment > PAGE_SIZE)
1071 return VERR_NOT_SUPPORTED;
1072
1073 /*
1074 * Allocate a dummy page and create a page pointer array for vmap such that
1075 * the dummy page is mapped all over the reserved area.
1076 */
1077 pDummyPage = alloc_page(GFP_HIGHUSER);
1078 if (!pDummyPage)
1079 return VERR_NO_MEMORY;
1080 papPages = RTMemAlloc(sizeof(*papPages) * cPages);
1081 if (papPages)
1082 {
1083 void *pv;
1084 size_t iPage = cPages;
1085 while (iPage-- > 0)
1086 papPages[iPage] = pDummyPage;
1087# ifdef VM_MAP
1088 pv = vmap(papPages, cPages, VM_MAP, PAGE_KERNEL_RO);
1089# else
1090 pv = vmap(papPages, cPages, VM_ALLOC, PAGE_KERNEL_RO);
1091# endif
1092 RTMemFree(papPages);
1093 if (pv)
1094 {
1095 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
1096 if (pMemLnx)
1097 {
1098 pMemLnx->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
1099 pMemLnx->cPages = 1;
1100 pMemLnx->apPages[0] = pDummyPage;
1101 *ppMem = &pMemLnx->Core;
1102 return VINF_SUCCESS;
1103 }
1104 vunmap(pv);
1105 }
1106 }
1107 __free_page(pDummyPage);
1108 return VERR_NO_MEMORY;
1109
1110#else /* < 2.4.22 */
1111 /*
1112 * Could probably use ioremap here, but the caller is in a better position than us
1113 * to select some safe physical memory.
1114 */
1115 return VERR_NOT_SUPPORTED;
1116#endif
1117}
1118
1119
1120/**
1121 * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
1122 * an empty user space mapping.
1123 *
1124 * The caller takes care of acquiring the mmap_sem of the task.
1125 *
1126 * @returns Pointer to the mapping.
1127 * (void *)-1 on failure.
1128 * @param R3PtrFixed (RTR3PTR)-1 if anywhere, otherwise a specific location.
1129 * @param cb The size of the mapping.
1130 * @param uAlignment The alignment of the mapping.
1131 * @param pTask The Linux task to create this mapping in.
1132 * @param fProt The RTMEM_PROT_* mask.
1133 */
1134static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
1135{
1136 unsigned fLnxProt;
1137 unsigned long ulAddr;
1138
1139 /*
1140 * Convert from IPRT protection to mman.h PROT_ and call do_mmap.
1141 */
1142 fProt &= (RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC);
1143 if (fProt == RTMEM_PROT_NONE)
1144 fLnxProt = PROT_NONE;
1145 else
1146 {
1147 fLnxProt = 0;
1148 if (fProt & RTMEM_PROT_READ)
1149 fLnxProt |= PROT_READ;
1150 if (fProt & RTMEM_PROT_WRITE)
1151 fLnxProt |= PROT_WRITE;
1152 if (fProt & RTMEM_PROT_EXEC)
1153 fLnxProt |= PROT_EXEC;
1154 }
1155
1156 if (R3PtrFixed != (RTR3PTR)-1)
1157 ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
1158 else
1159 {
1160 ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1161 if ( !(ulAddr & ~PAGE_MASK)
1162 && (ulAddr & (uAlignment - 1)))
1163 {
1164 /** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
1165 * up alignment gaps. This is of course complicated by fragmentation (which we might have cause
1166 * ourselves) and further by there begin two mmap strategies (top / bottom). */
1167 /* For now, just ignore uAlignment requirements... */
1168 }
1169 }
1170 if (ulAddr & ~PAGE_MASK) /* ~PAGE_MASK == PAGE_OFFSET_MASK */
1171 return (void *)-1;
1172 return (void *)ulAddr;
1173}
1174
1175
1176DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
1177{
1178 PRTR0MEMOBJLNX pMemLnx;
1179 void *pv;
1180 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1181 if (!pTask)
1182 return VERR_NOT_SUPPORTED;
1183
1184 /*
1185 * Check that the specified alignment is supported.
1186 */
1187 if (uAlignment > PAGE_SIZE)
1188 return VERR_NOT_SUPPORTED;
1189
1190 /*
1191 * Let rtR0MemObjLinuxDoMmap do the difficult bits.
1192 */
1193 down_write(&pTask->mm->mmap_sem);
1194 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cb, uAlignment, pTask, RTMEM_PROT_NONE);
1195 up_write(&pTask->mm->mmap_sem);
1196 if (pv == (void *)-1)
1197 return VERR_NO_MEMORY;
1198
1199 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb);
1200 if (!pMemLnx)
1201 {
1202 down_write(&pTask->mm->mmap_sem);
1203 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, cb);
1204 up_write(&pTask->mm->mmap_sem);
1205 return VERR_NO_MEMORY;
1206 }
1207
1208 pMemLnx->Core.u.ResVirt.R0Process = R0Process;
1209 *ppMem = &pMemLnx->Core;
1210 return VINF_SUCCESS;
1211}
1212
1213
1214DECLHIDDEN(int) rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap,
1215 void *pvFixed, size_t uAlignment,
1216 unsigned fProt, size_t offSub, size_t cbSub)
1217{
1218 int rc = VERR_NO_MEMORY;
1219 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1220 PRTR0MEMOBJLNX pMemLnx;
1221
1222 /* Fail if requested to do something we can't. */
1223 AssertMsgReturn(!offSub && !cbSub, ("%#x %#x\n", offSub, cbSub), VERR_NOT_SUPPORTED);
1224 AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
1225 if (uAlignment > PAGE_SIZE)
1226 return VERR_NOT_SUPPORTED;
1227
1228 /*
1229 * Create the IPRT memory object.
1230 */
1231 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1232 if (pMemLnx)
1233 {
1234 if (pMemLnxToMap->cPages)
1235 {
1236#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1237 /*
1238 * Use vmap - 2.4.22 and later.
1239 */
1240 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */);
1241# ifdef VM_MAP
1242 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_MAP, fPg);
1243# else
1244 pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[0], pMemLnxToMap->cPages, VM_ALLOC, fPg);
1245# endif
1246 if (pMemLnx->Core.pv)
1247 {
1248 pMemLnx->fMappedToRing0 = true;
1249 rc = VINF_SUCCESS;
1250 }
1251 else
1252 rc = VERR_MAP_FAILED;
1253
1254#else /* < 2.4.22 */
1255 /*
1256 * Only option here is to share mappings if possible and forget about fProt.
1257 */
1258 if (rtR0MemObjIsRing3(pMemToMap))
1259 rc = VERR_NOT_SUPPORTED;
1260 else
1261 {
1262 rc = VINF_SUCCESS;
1263 if (!pMemLnxToMap->Core.pv)
1264 rc = rtR0MemObjLinuxVMap(pMemLnxToMap, !!(fProt & RTMEM_PROT_EXEC));
1265 if (RT_SUCCESS(rc))
1266 {
1267 Assert(pMemLnxToMap->Core.pv);
1268 pMemLnx->Core.pv = pMemLnxToMap->Core.pv;
1269 }
1270 }
1271#endif
1272 }
1273 else
1274 {
1275 /*
1276 * MMIO / physical memory.
1277 */
1278 Assert(pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS && !pMemLnxToMap->Core.u.Phys.fAllocated);
1279 pMemLnx->Core.pv = pMemLnxToMap->Core.u.Phys.uCachePolicy == RTMEM_CACHE_POLICY_MMIO
1280 ? ioremap_nocache(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb)
1281 : ioremap(pMemLnxToMap->Core.u.Phys.PhysBase, pMemLnxToMap->Core.cb);
1282 if (pMemLnx->Core.pv)
1283 {
1284 /** @todo fix protection. */
1285 rc = VINF_SUCCESS;
1286 }
1287 }
1288 if (RT_SUCCESS(rc))
1289 {
1290 pMemLnx->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
1291 *ppMem = &pMemLnx->Core;
1292 return VINF_SUCCESS;
1293 }
1294 rtR0MemObjDelete(&pMemLnx->Core);
1295 }
1296
1297 return rc;
1298}
1299
1300
1301#ifdef VBOX_USE_PAE_HACK
1302/**
1303 * Replace the PFN of a PTE with the address of the actual page.
1304 *
1305 * The caller maps a reserved dummy page at the address with the desired access
1306 * and flags.
1307 *
1308 * This hack is required for older Linux kernels which don't provide
1309 * remap_pfn_range().
1310 *
1311 * @returns 0 on success, -ENOMEM on failure.
1312 * @param mm The memory context.
1313 * @param ulAddr The mapping address.
1314 * @param Phys The physical address of the page to map.
1315 */
1316static int rtR0MemObjLinuxFixPte(struct mm_struct *mm, unsigned long ulAddr, RTHCPHYS Phys)
1317{
1318 int rc = -ENOMEM;
1319 pgd_t *pgd;
1320
1321 spin_lock(&mm->page_table_lock);
1322
1323 pgd = pgd_offset(mm, ulAddr);
1324 if (!pgd_none(*pgd) && !pgd_bad(*pgd))
1325 {
1326 pmd_t *pmd = pmd_offset(pgd, ulAddr);
1327 if (!pmd_none(*pmd))
1328 {
1329 pte_t *ptep = pte_offset_map(pmd, ulAddr);
1330 if (ptep)
1331 {
1332 pte_t pte = *ptep;
1333 pte.pte_high &= 0xfff00000;
1334 pte.pte_high |= ((Phys >> 32) & 0x000fffff);
1335 pte.pte_low &= 0x00000fff;
1336 pte.pte_low |= (Phys & 0xfffff000);
1337 set_pte(ptep, pte);
1338 pte_unmap(ptep);
1339 rc = 0;
1340 }
1341 }
1342 }
1343
1344 spin_unlock(&mm->page_table_lock);
1345 return rc;
1346}
1347#endif /* VBOX_USE_PAE_HACK */
1348
1349
1350DECLHIDDEN(int) rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed,
1351 size_t uAlignment, unsigned fProt, RTR0PROCESS R0Process)
1352{
1353 struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
1354 PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
1355 int rc = VERR_NO_MEMORY;
1356 PRTR0MEMOBJLNX pMemLnx;
1357#ifdef VBOX_USE_PAE_HACK
1358 struct page *pDummyPage;
1359 RTHCPHYS DummyPhys;
1360#endif
1361
1362 /*
1363 * Check for restrictions.
1364 */
1365 if (!pTask)
1366 return VERR_NOT_SUPPORTED;
1367 if (uAlignment > PAGE_SIZE)
1368 return VERR_NOT_SUPPORTED;
1369
1370#ifdef VBOX_USE_PAE_HACK
1371 /*
1372 * Allocate a dummy page for use when mapping the memory.
1373 */
1374 pDummyPage = alloc_page(GFP_USER);
1375 if (!pDummyPage)
1376 return VERR_NO_MEMORY;
1377 SetPageReserved(pDummyPage);
1378 DummyPhys = page_to_phys(pDummyPage);
1379#endif
1380
1381 /*
1382 * Create the IPRT memory object.
1383 */
1384 pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, pMemLnxToMap->Core.cb);
1385 if (pMemLnx)
1386 {
1387 /*
1388 * Allocate user space mapping.
1389 */
1390 void *pv;
1391 down_write(&pTask->mm->mmap_sem);
1392 pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, pMemLnxToMap->Core.cb, uAlignment, pTask, fProt);
1393 if (pv != (void *)-1)
1394 {
1395 /*
1396 * Map page by page into the mmap area.
1397 * This is generic, paranoid and not very efficient.
1398 */
1399 pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, false /* user */);
1400 unsigned long ulAddrCur = (unsigned long)pv;
1401 const size_t cPages = pMemLnxToMap->Core.cb >> PAGE_SHIFT;
1402 size_t iPage;
1403
1404 rc = 0;
1405 if (pMemLnxToMap->cPages)
1406 {
1407 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1408 {
1409#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1410 RTHCPHYS Phys = page_to_phys(pMemLnxToMap->apPages[iPage]);
1411#endif
1412#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1413 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1414 AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
1415#endif
1416#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && defined(RT_ARCH_X86)
1417 /* remap_page_range() limitation on x86 */
1418 AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
1419#endif
1420
1421#if defined(VBOX_USE_INSERT_PAGE) && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
1422 rc = vm_insert_page(vma, ulAddrCur, pMemLnxToMap->apPages[iPage]);
1423 vma->vm_flags |= VM_RESERVED; /* This flag helps making 100% sure some bad stuff wont happen (swap, core, ++). */
1424#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1425 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
1426#elif defined(VBOX_USE_PAE_HACK)
1427 rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
1428 if (!rc)
1429 rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
1430#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1431 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1432#else /* 2.4 */
1433 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1434#endif
1435 if (rc)
1436 {
1437 rc = VERR_NO_MEMORY;
1438 break;
1439 }
1440 }
1441 }
1442 else
1443 {
1444 RTHCPHYS Phys;
1445 if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS)
1446 Phys = pMemLnxToMap->Core.u.Phys.PhysBase;
1447 else if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_CONT)
1448 Phys = pMemLnxToMap->Core.u.Cont.Phys;
1449 else
1450 {
1451 AssertMsgFailed(("%d\n", pMemLnxToMap->Core.enmType));
1452 Phys = NIL_RTHCPHYS;
1453 }
1454 if (Phys != NIL_RTHCPHYS)
1455 {
1456 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE, Phys += PAGE_SIZE)
1457 {
1458#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1459 struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
1460 AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
1461#endif
1462#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && defined(RT_ARCH_X86)
1463 /* remap_page_range() limitation on x86 */
1464 AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
1465#endif
1466
1467#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1468 rc = remap_pfn_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1469#elif defined(VBOX_USE_PAE_HACK)
1470 rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
1471 if (!rc)
1472 rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
1473#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1474 rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
1475#else /* 2.4 */
1476 rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
1477#endif
1478 if (rc)
1479 {
1480 rc = VERR_NO_MEMORY;
1481 break;
1482 }
1483 }
1484 }
1485 }
1486 if (!rc)
1487 {
1488 up_write(&pTask->mm->mmap_sem);
1489#ifdef VBOX_USE_PAE_HACK
1490 __free_page(pDummyPage);
1491#endif
1492
1493 pMemLnx->Core.pv = pv;
1494 pMemLnx->Core.u.Mapping.R0Process = R0Process;
1495 *ppMem = &pMemLnx->Core;
1496 return VINF_SUCCESS;
1497 }
1498
1499 /*
1500 * Bail out.
1501 */
1502 MY_DO_MUNMAP(pTask->mm, (unsigned long)pv, pMemLnxToMap->Core.cb);
1503 }
1504 up_write(&pTask->mm->mmap_sem);
1505 rtR0MemObjDelete(&pMemLnx->Core);
1506 }
1507#ifdef VBOX_USE_PAE_HACK
1508 __free_page(pDummyPage);
1509#endif
1510
1511 return rc;
1512}
1513
1514
1515DECLHIDDEN(int) rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt)
1516{
1517 NOREF(pMem);
1518 NOREF(offSub);
1519 NOREF(cbSub);
1520 NOREF(fProt);
1521 return VERR_NOT_SUPPORTED;
1522}
1523
1524
1525DECLHIDDEN(RTHCPHYS) rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
1526{
1527 PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
1528
1529 if (pMemLnx->cPages)
1530 return page_to_phys(pMemLnx->apPages[iPage]);
1531
1532 switch (pMemLnx->Core.enmType)
1533 {
1534 case RTR0MEMOBJTYPE_CONT:
1535 return pMemLnx->Core.u.Cont.Phys + (iPage << PAGE_SHIFT);
1536
1537 case RTR0MEMOBJTYPE_PHYS:
1538 return pMemLnx->Core.u.Phys.PhysBase + (iPage << PAGE_SHIFT);
1539
1540 /* the parent knows */
1541 case RTR0MEMOBJTYPE_MAPPING:
1542 return rtR0MemObjNativeGetPagePhysAddr(pMemLnx->Core.uRel.Child.pParent, iPage);
1543
1544 /* cPages > 0 */
1545 case RTR0MEMOBJTYPE_LOW:
1546 case RTR0MEMOBJTYPE_LOCK:
1547 case RTR0MEMOBJTYPE_PHYS_NC:
1548 case RTR0MEMOBJTYPE_PAGE:
1549 default:
1550 AssertMsgFailed(("%d\n", pMemLnx->Core.enmType));
1551 /* fall thru */
1552
1553 case RTR0MEMOBJTYPE_RES_VIRT:
1554 return NIL_RTHCPHYS;
1555 }
1556}
1557
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette