VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c@ 85546

Last change on this file since 85546 was 85505, checked in by vboxsync, 4 years ago

IPRT/alloc-r0drv-linux.c,SUPDrv-linux.c: RTMEMALLOC_EXEC_HEAP for 5.8+ (more on this later as it doesn't really work). bugref:9801

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 15.0 KB
Line 
1/* $Id: alloc-r0drv-linux.c 85505 2020-07-29 10:03:29Z vboxsync $ */
2/** @file
3 * IPRT - Memory Allocation, Ring-0 Driver, Linux.
4 */
5
6/*
7 * Copyright (C) 2006-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include "the-linux-kernel.h"
32#include "internal/iprt.h"
33#include <iprt/mem.h>
34
35#include <iprt/assert.h>
36#include <iprt/errcore.h>
37#include "r0drv/alloc-r0drv.h"
38
39
40#if (defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)) && !defined(RTMEMALLOC_EXEC_HEAP)
41# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) && LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0)
42/**
43 * Starting with 2.6.23 we can use __get_vm_area and map_vm_area to allocate
44 * memory in the moduel range. This is preferrable to the exec heap below.
45 */
46# define RTMEMALLOC_EXEC_VM_AREA
47# else
48/**
49 * We need memory in the module range (~2GB to ~0) this can only be obtained
50 * thru APIs that are not exported (see module_alloc()).
51 *
52 * So, we'll have to create a quick and dirty heap here using BSS memory.
53 * Very annoying and it's going to restrict us!
54 */
55# define RTMEMALLOC_EXEC_HEAP
56# endif
57#endif
58
59#ifdef RTMEMALLOC_EXEC_HEAP
60# include <iprt/heap.h>
61# include <iprt/spinlock.h>
62# include <iprt/errcore.h>
63#endif
64
65#include "internal/initterm.h"
66
67
68/*********************************************************************************************************************************
69* Structures and Typedefs *
70*********************************************************************************************************************************/
71#ifdef RTMEMALLOC_EXEC_VM_AREA
72/**
73 * Extended header used for headers marked with RTMEMHDR_FLAG_EXEC_VM_AREA.
74 *
75 * This is used with allocating executable memory, for things like generated
76 * code and loaded modules.
77 */
78typedef struct RTMEMLNXHDREX
79{
80 /** The VM area for this allocation. */
81 struct vm_struct *pVmArea;
82 void *pvDummy;
83 /** The header we present to the generic API. */
84 RTMEMHDR Hdr;
85} RTMEMLNXHDREX;
86AssertCompileSize(RTMEMLNXHDREX, 32);
87/** Pointer to an extended memory header. */
88typedef RTMEMLNXHDREX *PRTMEMLNXHDREX;
89#endif
90
91
92/*********************************************************************************************************************************
93* Global Variables *
94*********************************************************************************************************************************/
95#ifdef RTMEMALLOC_EXEC_HEAP
96/** The heap. */
97static RTHEAPSIMPLE g_HeapExec = NIL_RTHEAPSIMPLE;
98/** Spinlock protecting the heap. */
99static RTSPINLOCK g_HeapExecSpinlock = NIL_RTSPINLOCK;
100#endif
101
102
103/**
104 * API for cleaning up the heap spinlock on IPRT termination.
105 * This is as RTMemExecDonate specific to AMD64 Linux/GNU.
106 */
107DECLHIDDEN(void) rtR0MemExecCleanup(void)
108{
109#ifdef RTMEMALLOC_EXEC_HEAP
110 RTSpinlockDestroy(g_HeapExecSpinlock);
111 g_HeapExecSpinlock = NIL_RTSPINLOCK;
112#endif
113}
114
115
116/**
117 * Donate read+write+execute memory to the exec heap.
118 *
119 * This API is specific to AMD64 and Linux/GNU. A kernel module that desires to
120 * use RTMemExecAlloc on AMD64 Linux/GNU will have to donate some statically
121 * allocated memory in the module if it wishes for GCC generated code to work.
122 * GCC can only generate modules that work in the address range ~2GB to ~0
123 * currently.
124 *
125 * The API only accept one single donation.
126 *
127 * @returns IPRT status code.
128 * @retval VERR_NOT_SUPPORTED if the code isn't enabled.
129 * @param pvMemory Pointer to the memory block.
130 * @param cb The size of the memory block.
131 */
132RTR0DECL(int) RTR0MemExecDonate(void *pvMemory, size_t cb)
133{
134#ifdef RTMEMALLOC_EXEC_HEAP
135 int rc;
136 AssertReturn(g_HeapExec == NIL_RTHEAPSIMPLE, VERR_WRONG_ORDER);
137
138 rc = RTSpinlockCreate(&g_HeapExecSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTR0MemExecDonate");
139 if (RT_SUCCESS(rc))
140 {
141 rc = RTHeapSimpleInit(&g_HeapExec, pvMemory, cb);
142 if (RT_FAILURE(rc))
143 rtR0MemExecCleanup();
144 }
145 return rc;
146#else
147 RT_NOREF_PV(pvMemory); RT_NOREF_PV(cb);
148 return VERR_NOT_SUPPORTED;
149#endif
150}
151RT_EXPORT_SYMBOL(RTR0MemExecDonate);
152
153
154
155#ifdef RTMEMALLOC_EXEC_VM_AREA
156/**
157 * Allocate executable kernel memory in the module range.
158 *
159 * @returns Pointer to a allocation header success. NULL on failure.
160 *
161 * @param cb The size the user requested.
162 */
163static PRTMEMHDR rtR0MemAllocExecVmArea(size_t cb)
164{
165 size_t const cbAlloc = RT_ALIGN_Z(sizeof(RTMEMLNXHDREX) + cb, PAGE_SIZE);
166 size_t const cPages = cbAlloc >> PAGE_SHIFT;
167 struct page **papPages;
168 struct vm_struct *pVmArea;
169 size_t iPage;
170
171 pVmArea = __get_vm_area(cbAlloc, VM_ALLOC, MODULES_VADDR, MODULES_END);
172 if (!pVmArea)
173 return NULL;
174 pVmArea->nr_pages = 0; /* paranoia? */
175 pVmArea->pages = NULL; /* paranoia? */
176
177 papPages = (struct page **)kmalloc(cPages * sizeof(papPages[0]), GFP_KERNEL | __GFP_NOWARN);
178 if (!papPages)
179 {
180 vunmap(pVmArea->addr);
181 return NULL;
182 }
183
184 for (iPage = 0; iPage < cPages; iPage++)
185 {
186 papPages[iPage] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN);
187 if (!papPages[iPage])
188 break;
189 }
190 if (iPage == cPages)
191 {
192 /*
193 * Map the pages.
194 *
195 * Not entirely sure we really need to set nr_pages and pages here, but
196 * they provide a very convenient place for storing something we need
197 * in the free function, if nothing else...
198 */
199# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
200 struct page **papPagesIterator = papPages;
201# endif
202 pVmArea->nr_pages = cPages;
203 pVmArea->pages = papPages;
204 if (!map_vm_area(pVmArea, PAGE_KERNEL_EXEC,
205# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
206 &papPagesIterator
207# else
208 papPages
209# endif
210 ))
211 {
212 PRTMEMLNXHDREX pHdrEx = (PRTMEMLNXHDREX)pVmArea->addr;
213 pHdrEx->pVmArea = pVmArea;
214 pHdrEx->pvDummy = NULL;
215 return &pHdrEx->Hdr;
216 }
217 /* bail out */
218# if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
219 pVmArea->nr_pages = papPagesIterator - papPages;
220# endif
221 }
222
223 vunmap(pVmArea->addr);
224
225 while (iPage-- > 0)
226 __free_page(papPages[iPage]);
227 kfree(papPages);
228
229 return NULL;
230}
231#endif /* RTMEMALLOC_EXEC_VM_AREA */
232
233
234/**
235 * OS specific allocation function.
236 */
237DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
238{
239 PRTMEMHDR pHdr;
240 IPRT_LINUX_SAVE_EFL_AC();
241
242 /*
243 * Allocate.
244 */
245 if (fFlags & RTMEMHDR_FLAG_EXEC)
246 {
247 if (fFlags & RTMEMHDR_FLAG_ANY_CTX)
248 return VERR_NOT_SUPPORTED;
249
250#if defined(RT_ARCH_AMD64)
251# ifdef RTMEMALLOC_EXEC_HEAP
252 if (g_HeapExec != NIL_RTHEAPSIMPLE)
253 {
254 RTSpinlockAcquire(g_HeapExecSpinlock);
255 pHdr = (PRTMEMHDR)RTHeapSimpleAlloc(g_HeapExec, cb + sizeof(*pHdr), 0);
256 RTSpinlockRelease(g_HeapExecSpinlock);
257 fFlags |= RTMEMHDR_FLAG_EXEC_HEAP;
258 }
259 else
260 pHdr = NULL;
261
262# elif defined(RTMEMALLOC_EXEC_VM_AREA)
263 pHdr = rtR0MemAllocExecVmArea(cb);
264 fFlags |= RTMEMHDR_FLAG_EXEC_VM_AREA;
265
266# else /* !RTMEMALLOC_EXEC_HEAP */
267# error "you don not want to go here..."
268 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
269# endif /* !RTMEMALLOC_EXEC_HEAP */
270
271#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
272 pHdr = (PRTMEMHDR)__vmalloc(cb + sizeof(*pHdr), GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, MY_PAGE_KERNEL_EXEC);
273#else
274 pHdr = (PRTMEMHDR)vmalloc(cb + sizeof(*pHdr));
275#endif
276 }
277 else
278 {
279 if (
280#if 1 /* vmalloc has serious performance issues, avoid it. */
281 cb <= PAGE_SIZE*16 - sizeof(*pHdr)
282#else
283 cb <= PAGE_SIZE
284#endif
285 || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
286 )
287 {
288 fFlags |= RTMEMHDR_FLAG_KMALLOC;
289 pHdr = kmalloc(cb + sizeof(*pHdr),
290 (fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC) ? (GFP_ATOMIC | __GFP_NOWARN)
291 : (GFP_KERNEL | __GFP_NOWARN));
292 if (RT_UNLIKELY( !pHdr
293 && cb > PAGE_SIZE
294 && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
295 {
296 fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
297 pHdr = vmalloc(cb + sizeof(*pHdr));
298 }
299 }
300 else
301 pHdr = vmalloc(cb + sizeof(*pHdr));
302 }
303 if (RT_UNLIKELY(!pHdr))
304 {
305 IPRT_LINUX_RESTORE_EFL_AC();
306 return VERR_NO_MEMORY;
307 }
308
309 /*
310 * Initialize.
311 */
312 pHdr->u32Magic = RTMEMHDR_MAGIC;
313 pHdr->fFlags = fFlags;
314 pHdr->cb = cb;
315 pHdr->cbReq = cb;
316
317 *ppHdr = pHdr;
318 IPRT_LINUX_RESTORE_EFL_AC();
319 return VINF_SUCCESS;
320}
321
322
323/**
324 * OS specific free function.
325 */
326DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
327{
328 IPRT_LINUX_SAVE_EFL_AC();
329
330 pHdr->u32Magic += 1;
331 if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
332 kfree(pHdr);
333#ifdef RTMEMALLOC_EXEC_HEAP
334 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_HEAP)
335 {
336 RTSpinlockAcquire(g_HeapExecSpinlock);
337 RTHeapSimpleFree(g_HeapExec, pHdr);
338 RTSpinlockRelease(g_HeapExecSpinlock);
339 }
340#endif
341#ifdef RTMEMALLOC_EXEC_VM_AREA
342 else if (pHdr->fFlags & RTMEMHDR_FLAG_EXEC_VM_AREA)
343 {
344 PRTMEMLNXHDREX pHdrEx = RT_FROM_MEMBER(pHdr, RTMEMLNXHDREX, Hdr);
345 size_t iPage = pHdrEx->pVmArea->nr_pages;
346 struct page **papPages = pHdrEx->pVmArea->pages;
347 void *pvMapping = pHdrEx->pVmArea->addr;
348
349 vunmap(pvMapping);
350
351 while (iPage-- > 0)
352 __free_page(papPages[iPage]);
353 kfree(papPages);
354 }
355#endif
356 else
357 vfree(pHdr);
358
359 IPRT_LINUX_RESTORE_EFL_AC();
360}
361
362
363
364/**
365 * Compute order. Some functions allocate 2^order pages.
366 *
367 * @returns order.
368 * @param cPages Number of pages.
369 */
370static int CalcPowerOf2Order(unsigned long cPages)
371{
372 int iOrder;
373 unsigned long cTmp;
374
375 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
376 ;
377 if (cPages & ~(1 << iOrder))
378 ++iOrder;
379
380 return iOrder;
381}
382
383
384/**
385 * Allocates physical contiguous memory (below 4GB).
386 * The allocation is page aligned and the content is undefined.
387 *
388 * @returns Pointer to the memory block. This is page aligned.
389 * @param pPhys Where to store the physical address.
390 * @param cb The allocation size in bytes. This is always
391 * rounded up to PAGE_SIZE.
392 */
393RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
394{
395 int cOrder;
396 unsigned cPages;
397 struct page *paPages;
398 void *pvRet;
399 IPRT_LINUX_SAVE_EFL_AC();
400
401 /*
402 * validate input.
403 */
404 Assert(VALID_PTR(pPhys));
405 Assert(cb > 0);
406
407 /*
408 * Allocate page pointer array.
409 */
410 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
411 cPages = cb >> PAGE_SHIFT;
412 cOrder = CalcPowerOf2Order(cPages);
413#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
414 /* ZONE_DMA32: 0-4GB */
415 paPages = alloc_pages(GFP_DMA32 | __GFP_NOWARN, cOrder);
416 if (!paPages)
417#endif
418#ifdef RT_ARCH_AMD64
419 /* ZONE_DMA; 0-16MB */
420 paPages = alloc_pages(GFP_DMA | __GFP_NOWARN, cOrder);
421#else
422 /* ZONE_NORMAL: 0-896MB */
423 paPages = alloc_pages(GFP_USER | __GFP_NOWARN, cOrder);
424#endif
425 if (paPages)
426 {
427 /*
428 * Reserve the pages and mark them executable.
429 */
430 unsigned iPage;
431 for (iPage = 0; iPage < cPages; iPage++)
432 {
433 Assert(!PageHighMem(&paPages[iPage]));
434 if (iPage + 1 < cPages)
435 {
436 AssertMsg( (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
437 == (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
438 && page_to_phys(&paPages[iPage]) + PAGE_SIZE
439 == page_to_phys(&paPages[iPage + 1]),
440 ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
441 (long long)page_to_phys(&paPages[iPage]), phys_to_virt(page_to_phys(&paPages[iPage])),
442 (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
443 }
444
445 SetPageReserved(&paPages[iPage]);
446 }
447 *pPhys = page_to_phys(paPages);
448 pvRet = phys_to_virt(page_to_phys(paPages));
449 }
450 else
451 pvRet = NULL;
452
453 IPRT_LINUX_RESTORE_EFL_AC();
454 return pvRet;
455}
456RT_EXPORT_SYMBOL(RTMemContAlloc);
457
458
459/**
460 * Frees memory allocated using RTMemContAlloc().
461 *
462 * @param pv Pointer to return from RTMemContAlloc().
463 * @param cb The cb parameter passed to RTMemContAlloc().
464 */
465RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
466{
467 if (pv)
468 {
469 int cOrder;
470 unsigned cPages;
471 unsigned iPage;
472 struct page *paPages;
473 IPRT_LINUX_SAVE_EFL_AC();
474
475 /* validate */
476 AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
477 Assert(cb > 0);
478
479 /* calc order and get pages */
480 cb = RT_ALIGN_Z(cb, PAGE_SIZE);
481 cPages = cb >> PAGE_SHIFT;
482 cOrder = CalcPowerOf2Order(cPages);
483 paPages = virt_to_page(pv);
484
485 /*
486 * Restore page attributes freeing the pages.
487 */
488 for (iPage = 0; iPage < cPages; iPage++)
489 {
490 ClearPageReserved(&paPages[iPage]);
491 }
492 __free_pages(paPages, cOrder);
493 IPRT_LINUX_RESTORE_EFL_AC();
494 }
495}
496RT_EXPORT_SYMBOL(RTMemContFree);
497
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette