VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 1890

Last change on this file since 1890 was 1890, checked in by vboxsync, 18 years ago

Attempt to fix ALSA on Linux kernels <= 2.6.17: use mmap not memalign for allocating pages. Use madvise or mprotect to separater VM area structs inside the kernel. Most SUP* functions work on cPages now (not cBytes anymore). The free functions take a cPages parameter which is used for munmap on Linux.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 51.1 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21/*******************************************************************************
22* Header Files *
23*******************************************************************************/
24#include "SUPDRV.h"
25#include "version-generated.h"
26
27#include <iprt/assert.h>
28#include <iprt/spinlock.h>
29#include <iprt/semaphore.h>
30#include <iprt/initterm.h>
31#include <iprt/process.h>
32#include <iprt/err.h>
33#include <iprt/mem.h>
34
35#include <linux/module.h>
36#include <linux/kernel.h>
37#include <linux/init.h>
38#include <linux/fs.h>
39#include <linux/mm.h>
40#include <linux/pagemap.h>
41#include <linux/slab.h>
42#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
43# include <linux/jiffies.h>
44#endif
45#include <asm/mman.h>
46#include <asm/io.h>
47#include <asm/uaccess.h>
48#ifdef CONFIG_DEVFS_FS
49# include <linux/devfs_fs_kernel.h>
50#endif
51#ifdef CONFIG_VBOXDRV_AS_MISC
52# include <linux/miscdevice.h>
53#endif
54#ifdef CONFIG_X86_LOCAL_APIC
55# include <asm/apic.h>
56# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
57# include <asm/nmi.h>
58# endif
59#endif
60
61#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
62# ifndef page_to_pfn
63# define page_to_pfn(page) ((page) - mem_map)
64# endif
65# include <asm/pgtable.h>
66# define global_flush_tlb __flush_tlb_global
67#endif
68
69/* devfs defines */
70#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
71# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
72
73# define VBOX_REGISTER_DEVFS() \
74({ \
75 void *rc = NULL; \
76 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
77 S_IFCHR | S_IRUGO | S_IWUGO, \
78 DEVICE_NAME) == 0) \
79 rc = (void *)' '; /* return not NULL */ \
80 rc; \
81 })
82
83# define VBOX_UNREGISTER_DEVFS(handle) \
84 devfs_remove(DEVICE_NAME);
85
86# else /* < 2.6.0 */
87
88# define VBOX_REGISTER_DEVFS() \
89 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
90 DEVICE_MAJOR, 0, \
91 S_IFCHR | S_IRUGO | S_IWUGO, \
92 &gFileOpsVBoxDrv, NULL)
93
94# define VBOX_UNREGISTER_DEVFS(handle) \
95 if (handle != NULL) \
96 devfs_unregister(handle)
97
98# endif /* < 2.6.0 */
99#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
100
101#ifndef CONFIG_VBOXDRV_AS_MISC
102# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
103# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
104# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
105# else
106# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
107# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
108# endif
109#endif /* !CONFIG_VBOXDRV_AS_MISC */
110
111
112#ifdef CONFIG_X86_HIGH_ENTRY
113# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
114#endif
115
116/*
117 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
118 */
119#if defined(__AMD64__)
120# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
121#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
122# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
123#else
124# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
125#endif
126
127/*
128 * The redhat hack section.
129 * - The current hacks are for 2.4.21-15.EL only.
130 */
131#ifndef NO_REDHAT_HACKS
132/* accounting. */
133# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
134# ifdef VM_ACCOUNT
135# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
136# endif
137# endif
138
139/* backported remap_page_range. */
140# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
141# include <asm/tlb.h>
142# ifdef tlb_vma /* probably not good enough... */
143# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
144# endif
145# endif
146
147# ifndef __AMD64__
148/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
149 * the page attributes from PAGE_KERNEL to something else, because there appears
150 * to be a bug in one of the many patches that redhat applied.
151 * It should be safe to do this on less buggy linux kernels too. ;-)
152 */
153# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
154 do { \
155 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
156 change_page_attr(pPages, cPages, prot); \
157 change_page_attr(pPages, cPages, prot); \
158 } while (0)
159# endif
160#endif /* !NO_REDHAT_HACKS */
161
162
163#ifndef MY_DO_MUNMAP
164# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
165#endif
166
167#ifndef MY_CHANGE_PAGE_ATTR
168# ifdef __AMD64__ /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr(). */
169# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
170 do { \
171 change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
172 change_page_attr(pPages, cPages, prot); \
173 } while (0)
174# else
175# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
176# endif
177#endif
178
179
180/** @def ONE_MSEC_IN_JIFFIES
181 * The number of jiffies that make up 1 millisecond. This is only actually used
182 * when HZ is > 1000. */
183#if HZ <= 1000
184# define ONE_MSEC_IN_JIFFIES 0
185#elif !(HZ % 1000)
186# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
187#else
188# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
189# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
190#endif
191
192
193/*******************************************************************************
194* Defined Constants And Macros *
195*******************************************************************************/
196/**
197 * Device extention & session data association structure.
198 */
199static SUPDRVDEVEXT g_DevExt;
200
201/** Timer structure for the GIP update. */
202static struct timer_list g_GipTimer;
203/** Pointer to the page structure for the GIP. */
204struct page *g_pGipPage;
205
206/** Registered devfs device handle. */
207#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
208# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
209static void *g_hDevFsVBoxDrv = NULL;
210# else
211static devfs_handle_t g_hDevFsVBoxDrv = NULL;
212# endif
213#endif
214
215#ifndef CONFIG_VBOXDRV_AS_MISC
216/** Module major number */
217#define DEVICE_MAJOR 234
218/** Saved major device number */
219static int g_iModuleMajor;
220#endif /* !CONFIG_VBOXDRV_AS_MISC */
221
222/** The module name. */
223#define DEVICE_NAME "vboxdrv"
224
225#ifdef __AMD64__
226/**
227 * Memory for the executable memory heap (in IPRT).
228 */
229extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
230__asm__(".section execmemory, \"awx\", @progbits\n\t"
231 ".align 32\n\t"
232 ".globl g_abExecMemory\n"
233 "g_abExecMemory:\n\t"
234 ".zero 1572864\n\t"
235 ".type g_abExecMemory, @object\n\t"
236 ".size g_abExecMemory, 1572864\n\t"
237 ".text\n\t");
238#endif
239
240
241/*******************************************************************************
242* Internal Functions *
243*******************************************************************************/
244static int VBoxSupDrvInit(void);
245static void VBoxSupDrvUnload(void);
246static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp);
247static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp);
248static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
249 unsigned int IOCmd, unsigned long IOArg);
250static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags);
251static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt);
252static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt);
253static void VBoxSupGipTimer(unsigned long ulUser);
254#ifdef CONFIG_SMP
255static void VBoxSupGipTimerPerCpu(unsigned long ulUser);
256static void VBoxSupGipResumePerCpu(void *pvUser);
257#endif
258static int VBoxSupDrvOrder(unsigned long size);
259static int VBoxSupDrvErr2LinuxErr(int);
260
261
262/** The file_operations structure. */
263static struct file_operations gFileOpsVBoxDrv =
264{
265 owner: THIS_MODULE,
266 open: VBoxSupDrvCreate,
267 release: VBoxSupDrvClose,
268 ioctl: VBoxSupDrvDeviceControl,
269};
270
271#ifdef CONFIG_VBOXDRV_AS_MISC
272/** The miscdevice structure. */
273static struct miscdevice gMiscDevice =
274{
275 minor: MISC_DYNAMIC_MINOR,
276 name: DEVICE_NAME,
277 fops: &gFileOpsVBoxDrv,
278# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
279 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
280 devfs_name: DEVICE_NAME,
281# endif
282};
283#endif
284
285
286/**
287 * Initialize module.
288 *
289 * @returns appropriate status code.
290 */
291static int __init VBoxSupDrvInit(void)
292{
293 int rc;
294
295 dprintf(("VBoxDrv::ModuleInit\n"));
296
297#ifdef CONFIG_X86_LOCAL_APIC
298 /*
299 * If an NMI occurs while we are inside the world switcher the macine will crash.
300 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
301 * compared with another counter increased in the timer interrupt handler. Therefore
302 * we don't allow to setup an NMI watchdog.
303 */
304# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
305 /*
306 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
307 * the nmi_watchdog variable.
308 */
309# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
310 /*
311 * Permanent IO_APIC mode active? No way to handle this!
312 */
313 if (nmi_watchdog == NMI_IO_APIC)
314 {
315 printk(KERN_ERR DEVICE_NAME
316 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
317 DEVICE_NAME
318 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
319 DEVICE_NAME
320 ": command line.\n");
321 return -EINVAL;
322 }
323
324 /*
325 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
326 */
327 atomic_set(&nmi_active, -1);
328 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate NMI watchdog permanently...\n");
329
330 /*
331 * Now fall through and see if it actually was enabled before. If so, fail
332 * as we cannot deactivate it cleanly from here.
333 */
334# else /* < 2.6.19 */
335 /*
336 * Older 2.6 kernels: nmi_watchdog is not initalized by default
337 */
338 if (nmi_watchdog != NMI_NONE)
339 goto nmi_activated;
340# endif
341# endif /* >= 2.6.0 */
342
343 /*
344 * Second test: Interrupt generated by performance counter not masked and can
345 * generate an NMI. Works also with Linux 2.4.
346 */
347 {
348 unsigned int v, ver, maxlvt;
349
350 v = apic_read(APIC_LVR);
351 ver = GET_APIC_VERSION(v);
352 /* 82489DXs do not report # of LVT entries. */
353 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
354 if (maxlvt >= 4)
355 {
356 /* Read status of performance counter IRQ vector */
357 v = apic_read(APIC_LVTPC);
358
359 /* performance counter generates NMI and is not masked? */
360 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
361 {
362# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
363 printk(KERN_ERR DEVICE_NAME
364 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
365 DEVICE_NAME
366 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
367 return -EINVAL;
368# else /* < 2.6.19 */
369# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
370nmi_activated:
371# endif
372 printk(KERN_ERR DEVICE_NAME
373 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
374 DEVICE_NAME
375 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
376 return -EINVAL;
377# endif /* >= 2.6.19 */
378 }
379 }
380 }
381# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
382 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
383# endif /* >= 2.6.19 */
384#endif /* CONFIG_X86_LOCAL_APIC */
385
386#ifdef CONFIG_VBOXDRV_AS_MISC
387 rc = misc_register(&gMiscDevice);
388 if (rc)
389 {
390 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
391 return rc;
392 }
393#else /* !CONFIG_VBOXDRV_AS_MISC */
394 /*
395 * Register character device.
396 */
397 g_iModuleMajor = DEVICE_MAJOR;
398 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
399 if (rc < 0)
400 {
401 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
402 return rc;
403 }
404
405 /*
406 * Save returned module major number
407 */
408 if (DEVICE_MAJOR != 0)
409 g_iModuleMajor = DEVICE_MAJOR;
410 else
411 g_iModuleMajor = rc;
412 rc = 0;
413
414#ifdef CONFIG_DEVFS_FS
415 /*
416 * Register a device entry
417 */
418 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
419 if (g_hDevFsVBoxDrv == NULL)
420 {
421 dprintf(("devfs_register failed!\n"));
422 rc = -EINVAL;
423 }
424#endif
425#endif /* !CONFIG_VBOXDRV_AS_MISC */
426 if (!rc)
427 {
428 /*
429 * Initialize the runtime.
430 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
431 */
432 rc = RTR0Init(0);
433 if (RT_SUCCESS(rc))
434 {
435#ifdef __AMD64__
436 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
437#endif
438 /*
439 * Initialize the device extension.
440 */
441 if (RT_SUCCESS(rc))
442 rc = supdrvInitDevExt(&g_DevExt);
443 if (!rc)
444 {
445 /*
446 * Create the GIP page.
447 */
448 rc = VBoxSupDrvInitGip(&g_DevExt);
449 if (!rc)
450 {
451 dprintf(("VBoxDrv::ModuleInit returning %#x\n", rc));
452 return rc;
453 }
454
455 supdrvDeleteDevExt(&g_DevExt);
456 }
457 else
458 rc = -EINVAL;
459 RTR0Term();
460 }
461 else
462 rc = -EINVAL;
463
464 /*
465 * Failed, cleanup and return the error code.
466 */
467#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
468 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
469#endif
470 }
471#ifdef CONFIG_VBOXDRV_AS_MISC
472 misc_deregister(&gMiscDevice);
473 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
474#else
475 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
476 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
477#endif
478 return rc;
479}
480
481
482/**
483 * Unload the module.
484 */
485static void __exit VBoxSupDrvUnload(void)
486{
487 int rc;
488 dprintf(("VBoxSupDrvUnload\n"));
489
490 /*
491 * I Don't think it's possible to unload a driver which processes have
492 * opened, at least we'll blindly assume that here.
493 */
494#ifdef CONFIG_VBOXDRV_AS_MISC
495 rc = misc_deregister(&gMiscDevice);
496 if (rc < 0)
497 {
498 dprintf(("misc_deregister failed with rc=%#x\n", rc));
499 }
500#else /* !CONFIG_VBOXDRV_AS_MISC */
501#ifdef CONFIG_DEVFS_FS
502 /*
503 * Unregister a device entry
504 */
505 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
506#endif // devfs
507 rc = VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
508 if (rc < 0)
509 {
510 dprintf(("unregister_chrdev failed with rc=%#x (major:%d)\n", rc, g_iModuleMajor));
511 }
512#endif /* !CONFIG_VBOXDRV_AS_MISC */
513
514 /*
515 * Destroy GIP, delete the device extension and terminate IPRT.
516 */
517 VBoxSupDrvTermGip(&g_DevExt);
518 supdrvDeleteDevExt(&g_DevExt);
519 RTR0Term();
520}
521
522
523/**
524 * Device open. Called on open /dev/vboxdrv
525 *
526 * @param pInode Pointer to inode info structure.
527 * @param pFilp Associated file pointer.
528 */
529static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp)
530{
531 int rc;
532 PSUPDRVSESSION pSession;
533 dprintf(("VBoxSupDrvCreate: pFilp=%p\n", pFilp));
534
535 /*
536 * Call common code for the rest.
537 */
538 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
539 if (!rc)
540 {
541 pSession->Uid = current->euid;
542 pSession->Gid = current->egid;
543 pSession->Process = RTProcSelf();
544 pSession->R0Process = RTR0ProcHandleSelf();
545 }
546
547 dprintf(("VBoxSupDrvCreate: g_DevExt=%p pSession=%p rc=%d\n", &g_DevExt, pSession, rc));
548 pFilp->private_data = pSession;
549
550 return VBoxSupDrvErr2LinuxErr(rc);
551}
552
553
554/**
555 * Close device.
556 *
557 * @param pInode Pointer to inode info structure.
558 * @param pFilp Associated file pointer.
559 */
560static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp)
561{
562 dprintf(("VBoxSupDrvClose: pFilp=%p private_data=%p\n", pFilp, pFilp->private_data));
563 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
564 pFilp->private_data = NULL;
565 return 0;
566}
567
568
569/**
570 * Device I/O Control entry point.
571 *
572 * @param pInode Pointer to inode info structure.
573 * @param pFilp Associated file pointer.
574 * @param IOCmd The function specified to ioctl().
575 * @param IOArg The argument specified to ioctl().
576 */
577static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
578 unsigned int IOCmd, unsigned long IOArg)
579{
580 int rc;
581 SUPDRVIOCTLDATA Args;
582 void *pvBuf = NULL;
583 int cbBuf = 0;
584 unsigned cbOut = 0;
585
586 dprintf2(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p\n", pFilp, IOCmd, (void *)IOArg));
587
588 /*
589 * Copy ioctl data structure from user space.
590 */
591 if (_IOC_SIZE(IOCmd) != sizeof(SUPDRVIOCTLDATA))
592 {
593 dprintf(("VBoxSupDrvDeviceControl: incorrect input length! cbArgs=%d\n", _IOC_SIZE(IOCmd)));
594 return -EINVAL;
595 }
596 if (copy_from_user(&Args, (void *)IOArg, _IOC_SIZE(IOCmd)))
597 {
598 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(&Args) failed.\n"));
599 return -EFAULT;
600 }
601
602 /*
603 * Allocate and copy user space input data buffer to kernel space.
604 */
605 if (Args.cbIn > 0 || Args.cbOut > 0)
606 {
607 cbBuf = max(Args.cbIn, Args.cbOut);
608 pvBuf = vmalloc(cbBuf);
609 if (pvBuf == NULL)
610 {
611 dprintf(("VBoxSupDrvDeviceControl: failed to allocate buffer of %d bytes.\n", cbBuf));
612 return -ENOMEM;
613 }
614
615 if (copy_from_user(pvBuf, (void *)Args.pvIn, Args.cbIn))
616 {
617 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(pvBuf) failed.\n"));
618 vfree(pvBuf);
619 return -EFAULT;
620 }
621 }
622
623 /*
624 * Process the IOCtl.
625 */
626 rc = supdrvIOCtl(IOCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data,
627 pvBuf, Args.cbIn, pvBuf, Args.cbOut, &cbOut);
628
629 /*
630 * Copy ioctl data and output buffer back to user space.
631 */
632 if (rc)
633 {
634 dprintf(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p failed, rc=%d (linux rc=%d)\n",
635 pFilp, IOCmd, (void *)IOArg, rc, VBoxSupDrvErr2LinuxErr(rc)));
636 rc = VBoxSupDrvErr2LinuxErr(rc);
637 }
638 else if (cbOut > 0)
639 {
640 if (pvBuf != NULL && cbOut <= cbBuf)
641 {
642 if (copy_to_user((void *)Args.pvOut, pvBuf, cbOut))
643 {
644 dprintf(("copy_to_user failed.\n"));
645 rc = -EFAULT;
646 }
647 }
648 else
649 {
650 dprintf(("WHAT!?! supdrvIOCtl messed up! cbOut=%d cbBuf=%d pvBuf=%p\n", cbOut, cbBuf, pvBuf));
651 rc = -EPERM;
652 }
653 }
654
655 if (pvBuf)
656 vfree(pvBuf);
657
658 dprintf2(("VBoxSupDrvDeviceControl: returns %d\n", rc));
659 return rc;
660}
661
662
663/**
664 * Initializes any OS specific object creator fields.
665 */
666void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
667{
668 NOREF(pObj);
669 NOREF(pSession);
670}
671
672
673/**
674 * Checks if the session can access the object.
675 *
676 * @returns true if a decision has been made.
677 * @returns false if the default access policy should be applied.
678 *
679 * @param pObj The object in question.
680 * @param pSession The session wanting to access the object.
681 * @param pszObjName The object name, can be NULL.
682 * @param prc Where to store the result when returning true.
683 */
684bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
685{
686 NOREF(pObj);
687 NOREF(pSession);
688 NOREF(pszObjName);
689 NOREF(prc);
690 return false;
691}
692
693
694/**
695 * Compute order. Some functions allocate 2^order pages.
696 *
697 * @returns order.
698 * @param cPages Number of pages.
699 */
700static int VBoxSupDrvOrder(unsigned long cPages)
701{
702 int iOrder;
703 unsigned long cTmp;
704
705 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
706 ;
707 if (cPages & ~(1 << iOrder))
708 ++iOrder;
709
710 return iOrder;
711}
712
713
714/**
715 * OS Specific code for locking down memory.
716 *
717 * @returns 0 on success.
718 * @returns SUPDRV_ERR_* on failure.
719 * @param pMem Pointer to memory.
720 * This is not linked in anywhere.
721 * @param paPages Array which should be filled with the address of the physical pages.
722 *
723 * @remark See sgl_map_user_pages() for an example of an similar function.
724 */
725int VBOXCALL supdrvOSLockMemOne(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
726{
727 int rc;
728 struct page **papPages;
729 unsigned iPage;
730 unsigned cPages = pMem->cb >> PAGE_SHIFT;
731 unsigned long pv = (unsigned long)pMem->pvR3;
732 struct vm_area_struct **papVMAs;
733
734 /*
735 * Allocate page pointer array.
736 */
737 papPages = vmalloc(cPages * sizeof(*papPages));
738 if (!papPages)
739 return SUPDRV_ERR_NO_MEMORY;
740
741 /*
742 * Allocate the VMA pointer array.
743 */
744 papVMAs = vmalloc(cPages * sizeof(*papVMAs));
745 if (!papVMAs)
746 return SUPDRV_ERR_NO_MEMORY;
747
748 /*
749 * Get user pages.
750 */
751 down_read(&current->mm->mmap_sem);
752 rc = get_user_pages(current, /* Task for fault acounting. */
753 current->mm, /* Whose pages. */
754 (unsigned long)pv, /* Where from. */
755 cPages, /* How many pages. */
756 1, /* Write to memory. */
757 0, /* force. */
758 papPages, /* Page array. */
759 papVMAs); /* vmas */
760 if (rc != cPages)
761 {
762 up_read(&current->mm->mmap_sem);
763 dprintf(("supdrvOSLockMemOne: get_user_pages failed. rc=%d\n", rc));
764 return SUPDRV_ERR_LOCK_FAILED;
765 }
766
767 for (iPage = 0; iPage < cPages; iPage++)
768 flush_dcache_page(papPages[iPage]);
769 up_read(&current->mm->mmap_sem);
770
771 pMem->u.locked.papPages = papPages;
772 pMem->u.locked.cPages = cPages;
773
774 /*
775 * Get addresses, protect against fork()
776 */
777 for (iPage = 0; iPage < cPages; iPage++)
778 {
779 paPages[iPage].Phys = page_to_phys(papPages[iPage]);
780 paPages[iPage].uReserved = 0;
781 papVMAs[iPage]->vm_flags |= VM_DONTCOPY;
782 }
783
784 vfree(papVMAs);
785
786 dprintf2(("supdrvOSLockMemOne: pvR3=%p cb=%d papPages=%p\n",
787 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
788 return 0;
789}
790
791
792/**
793 * Unlocks the memory pointed to by pv.
794 *
795 * @param pMem Pointer to memory to unlock.
796 *
797 * @remark See sgl_unmap_user_pages() for an example of an similar function.
798 */
799void VBOXCALL supdrvOSUnlockMemOne(PSUPDRVMEMREF pMem)
800{
801 unsigned iPage;
802 dprintf2(("supdrvOSUnlockMemOne: pvR3=%p cb=%d papPages=%p\n",
803 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
804
805 /*
806 * Loop thru the pages and release them.
807 */
808 for (iPage = 0; iPage < pMem->u.locked.cPages; iPage++)
809 {
810 if (!PageReserved(pMem->u.locked.papPages[iPage]))
811 SetPageDirty(pMem->u.locked.papPages[iPage]);
812 page_cache_release(pMem->u.locked.papPages[iPage]);
813 }
814
815 /* free the page array */
816 vfree(pMem->u.locked.papPages);
817 pMem->u.locked.cPages = 0;
818}
819
820
821/**
822 * OS Specific code for allocating page aligned memory with continuous fixed
823 * physical paged backing.
824 *
825 * @returns 0 on success.
826 * @returns SUPDRV_ERR_* on failure.
827 * @param pMem Memory reference record of the memory to be allocated.
828 * (This is not linked in anywhere.)
829 * @param ppvR0 Where to store the virtual address of the ring-0 mapping. (optional)
830 * @param ppvR3 Where to store the virtual address of the ring-3 mapping.
831 * @param pHCPhys Where to store the physical address.
832 */
833int VBOXCALL supdrvOSContAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
834{
835 struct page *paPages;
836 unsigned iPage;
837 unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
838 unsigned cPages = cbAligned >> PAGE_SHIFT;
839 unsigned cOrder = VBoxSupDrvOrder(cPages);
840 unsigned long ulAddr;
841 dma_addr_t HCPhys;
842 int rc = 0;
843 pgprot_t pgFlags;
844 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
845
846 Assert(ppvR3);
847 Assert(pHCPhys);
848
849 /*
850 * Allocate page pointer array.
851 */
852#ifdef __AMD64__ /** @todo check out if there is a correct way of getting memory below 4GB (physically). */
853 paPages = alloc_pages(GFP_DMA, cOrder);
854#else
855 paPages = alloc_pages(GFP_USER, cOrder);
856#endif
857 if (!paPages)
858 return SUPDRV_ERR_NO_MEMORY;
859
860 /*
861 * Lock the pages.
862 */
863 for (iPage = 0; iPage < cPages; iPage++)
864 {
865 SetPageReserved(&paPages[iPage]);
866 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
867 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
868#ifdef DEBUG
869 if (iPage + 1 < cPages && (page_to_phys((&paPages[iPage])) + 0x1000) != page_to_phys((&paPages[iPage + 1])))
870 {
871 dprintf(("supdrvOSContAllocOne: Pages are not continuous!!!! iPage=%d phys=%llx physnext=%llx\n",
872 iPage, (long long)page_to_phys((&paPages[iPage])), (long long)page_to_phys((&paPages[iPage + 1]))));
873 BUG();
874 }
875#endif
876 }
877 HCPhys = page_to_phys(paPages);
878
879 /*
880 * Allocate user space mapping and put the physical pages into it.
881 */
882 down_write(&current->mm->mmap_sem);
883 ulAddr = do_mmap(NULL, 0, cbAligned, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0);
884 if (!(ulAddr & ~PAGE_MASK))
885 {
886#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
887 int rc2 = remap_page_range(ulAddr, HCPhys, cbAligned, pgFlags);
888#else
889 int rc2 = 0;
890 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
891 if (vma)
892#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
893 rc2 = remap_page_range(vma, ulAddr, HCPhys, cbAligned, pgFlags);
894#else
895 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, cbAligned, pgFlags);
896#endif
897 else
898 {
899 rc = SUPDRV_ERR_NO_MEMORY;
900 dprintf(("supdrvOSContAllocOne: no vma found for ulAddr=%#lx!\n", ulAddr));
901 }
902#endif
903 if (rc2)
904 {
905 rc = SUPDRV_ERR_NO_MEMORY;
906 dprintf(("supdrvOSContAllocOne: remap_page_range failed rc2=%d\n", rc2));
907 }
908 }
909 else
910 {
911 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
912 rc = SUPDRV_ERR_NO_MEMORY;
913 }
914 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
915
916 /*
917 * Success?
918 */
919 if (!rc)
920 {
921 *pHCPhys = HCPhys;
922 *ppvR3 = ulAddr;
923 if (ppvR0)
924 *ppvR0 = (void *)ulAddr;
925 pMem->pvR3 = ulAddr;
926 pMem->pvR0 = NULL;
927 pMem->u.cont.paPages = paPages;
928 pMem->u.cont.cPages = cPages;
929 pMem->cb = cbAligned;
930
931 dprintf2(("supdrvOSContAllocOne: pvR0=%p pvR3=%p cb=%d paPages=%p *pHCPhys=%lx *ppvR0=*ppvR3=%p\n",
932 pMem->pvR0, pMem->pvR3, pMem->cb, paPages, (unsigned long)*pHCPhys, *ppvR3));
933 global_flush_tlb();
934 return 0;
935 }
936
937 /*
938 * Failure, cleanup and be gone.
939 */
940 down_write(&current->mm->mmap_sem);
941 if (ulAddr & ~PAGE_MASK)
942 MY_DO_MUNMAP(current->mm, ulAddr, pMem->cb);
943 for (iPage = 0; iPage < cPages; iPage++)
944 {
945 ClearPageReserved(&paPages[iPage]);
946 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
947 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, PAGE_KERNEL);
948 }
949 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
950 __free_pages(paPages, cOrder);
951
952 global_flush_tlb();
953 return rc;
954}
955
956
957/**
958 * Frees contiguous memory.
959 *
960 * @param pMem Memory reference record of the memory to be freed.
961 */
962void VBOXCALL supdrvOSContFreeOne(PSUPDRVMEMREF pMem)
963{
964 unsigned iPage;
965
966 dprintf2(("supdrvOSContFreeOne: pvR0=%p pvR3=%p cb=%d paPages=%p\n",
967 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.cont.paPages));
968
969 /*
970 * do_exit() destroys the mm before closing files.
971 * I really hope it cleans up our stuff properly...
972 */
973 if (current->mm)
974 {
975 down_write(&current->mm->mmap_sem);
976 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, pMem->cb);
977 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
978 }
979
980 /*
981 * Change page attributes freeing the pages.
982 */
983 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
984 {
985 ClearPageReserved(&pMem->u.cont.paPages[iPage]);
986 if (!PageHighMem(&pMem->u.cont.paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
987 MY_CHANGE_PAGE_ATTR(&pMem->u.cont.paPages[iPage], 1, PAGE_KERNEL);
988 }
989 __free_pages(pMem->u.cont.paPages, VBoxSupDrvOrder(pMem->u.cont.cPages));
990
991 pMem->u.cont.cPages = 0;
992}
993
994
995/**
996 * Allocates memory which mapped into both kernel and user space.
997 * The returned memory is page aligned and so is the allocation.
998 *
999 * @returns 0 on success.
1000 * @returns SUPDRV_ERR_* on failure.
1001 * @param pMem Memory reference record of the memory to be allocated.
1002 * (This is not linked in anywhere.)
1003 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1004 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1005 */
1006int VBOXCALL supdrvOSMemAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1007{
1008 const unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1009 const unsigned cPages = cbAligned >> PAGE_SHIFT;
1010#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1011 unsigned cOrder = VBoxSupDrvOrder(cPages);
1012 struct page *paPages;
1013#endif
1014 struct page **papPages;
1015 unsigned iPage;
1016 pgprot_t pgFlags;
1017 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1018
1019 /*
1020 * Allocate array with page pointers.
1021 */
1022 pMem->u.mem.cPages = 0;
1023 pMem->u.mem.papPages = papPages = kmalloc(sizeof(papPages[0]) * cPages, GFP_KERNEL);
1024 if (!papPages)
1025 return SUPDRV_ERR_NO_MEMORY;
1026
1027 /*
1028 * Allocate the pages.
1029 */
1030#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1031 for (iPage = 0; iPage < cPages; iPage++)
1032 {
1033 papPages[iPage] = alloc_page(GFP_HIGHUSER);
1034 if (!papPages[iPage])
1035 {
1036 pMem->u.mem.cPages = iPage;
1037 supdrvOSMemFreeOne(pMem);
1038 return SUPDRV_ERR_NO_MEMORY;
1039 }
1040 }
1041
1042#else /* < 2.4.22 */
1043 paPages = alloc_pages(GFP_USER, cOrder);
1044 if (!paPages)
1045 {
1046 supdrvOSMemFreeOne(pMem);
1047 return SUPDRV_ERR_NO_MEMORY;
1048 }
1049 for (iPage = 0; iPage < cPages; iPage++)
1050 {
1051 papPages[iPage] = &paPages[iPage];
1052 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1053 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1054 if (PageHighMem(papPages[iPage]))
1055 BUG();
1056 }
1057#endif
1058 pMem->u.mem.cPages = cPages;
1059
1060 /*
1061 * Reserve the pages.
1062 */
1063 for (iPage = 0; iPage < cPages; iPage++)
1064 SetPageReserved(papPages[iPage]);
1065
1066 /*
1067 * Create the Ring-0 mapping.
1068 */
1069 if (ppvR0)
1070 {
1071#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1072# ifdef VM_MAP
1073 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_MAP, pgFlags);
1074# else
1075 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_ALLOC, pgFlags);
1076# endif
1077#else
1078 *ppvR0 = pMem->pvR0 = phys_to_virt(page_to_phys(papPages[0]));
1079#endif
1080 }
1081 if (pMem->pvR0 || !ppvR0)
1082 {
1083 /*
1084 * Create the ring3 mapping.
1085 */
1086 if (ppvR3)
1087 *ppvR3 = pMem->pvR3 = VBoxSupDrvMapUser(papPages, cPages, PROT_READ | PROT_WRITE | PROT_EXEC, pgFlags);
1088 if (pMem->pvR3 || !ppvR3)
1089 return 0;
1090 dprintf(("supdrvOSMemAllocOne: failed to map into r3! cPages=%u\n", cPages));
1091 }
1092 else
1093 dprintf(("supdrvOSMemAllocOne: failed to map into r0! cPages=%u\n", cPages));
1094
1095 supdrvOSMemFreeOne(pMem);
1096 return SUPDRV_ERR_NO_MEMORY;
1097}
1098
1099
1100/**
1101 * Get the physical addresses of the pages in the allocation.
1102 * This is called while inside bundle the spinlock.
1103 *
1104 * @param pMem Memory reference record of the memory.
1105 * @param paPages Where to store the page addresses.
1106 */
1107void VBOXCALL supdrvOSMemGetPages(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
1108{
1109 unsigned iPage;
1110 for (iPage = 0; iPage < pMem->u.mem.cPages; iPage++)
1111 {
1112 paPages[iPage].Phys = page_to_phys(pMem->u.mem.papPages[iPage]);
1113 paPages[iPage].uReserved = 0;
1114 }
1115}
1116
1117
1118/**
1119 * Frees memory allocated by supdrvOSMemAllocOne().
1120 *
1121 * @param pMem Memory reference record of the memory to be free.
1122 */
1123void VBOXCALL supdrvOSMemFreeOne(PSUPDRVMEMREF pMem)
1124{
1125 dprintf2(("supdrvOSMemFreeOne: pvR0=%p pvR3=%p cb=%d cPages=%d papPages=%p\n",
1126 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.mem.cPages, pMem->u.mem.papPages));
1127
1128 /*
1129 * Unmap the user mapping (if any).
1130 * do_exit() destroys the mm before closing files.
1131 */
1132 if (pMem->pvR3 && current->mm)
1133 {
1134 down_write(&current->mm->mmap_sem);
1135 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, RT_ALIGN(pMem->cb, PAGE_SIZE));
1136 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1137 }
1138 pMem->pvR3 = NIL_RTR3PTR;
1139
1140 /*
1141 * Unmap the kernel mapping (if any).
1142 */
1143 if (pMem->pvR0)
1144 {
1145#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1146 vunmap(pMem->pvR0);
1147#endif
1148 pMem->pvR0 = NULL;
1149 }
1150
1151 /*
1152 * Free the physical pages.
1153 */
1154 if (pMem->u.mem.papPages)
1155 {
1156 struct page **papPages = pMem->u.mem.papPages;
1157 const unsigned cPages = pMem->u.mem.cPages;
1158 unsigned iPage;
1159
1160 /* Restore the page flags. */
1161 for (iPage = 0; iPage < cPages; iPage++)
1162 {
1163 ClearPageReserved(papPages[iPage]);
1164#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1165 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1166 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, PAGE_KERNEL);
1167#endif
1168 }
1169
1170 /* Free the pages. */
1171#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1172 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1173 __free_page(papPages[iPage]);
1174#else
1175 if (cPages > 0)
1176 __free_pages(papPages[0], VBoxSupDrvOrder(cPages));
1177#endif
1178 /* Free the page pointer array. */
1179 kfree(papPages);
1180 pMem->u.mem.papPages = NULL;
1181 }
1182 pMem->u.mem.cPages = 0;
1183}
1184
1185
1186/**
1187 * Maps a range of pages into user space.
1188 *
1189 * @returns Pointer to the user space mapping on success.
1190 * @returns NULL on failure.
1191 * @param papPages Array of the pages to map.
1192 * @param cPages Number of pages to map.
1193 * @param fProt The mapping protection.
1194 * @param pgFlags The page level protection.
1195 */
1196static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags)
1197{
1198 int rc = SUPDRV_ERR_NO_MEMORY;
1199 unsigned long ulAddr;
1200
1201 /*
1202 * Allocate user space mapping.
1203 */
1204 down_write(&current->mm->mmap_sem);
1205 ulAddr = do_mmap(NULL, 0, cPages * PAGE_SIZE, fProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1206 if (!(ulAddr & ~PAGE_MASK))
1207 {
1208 /*
1209 * Map page by page into the mmap area.
1210 * This is generic, paranoid and not very efficient.
1211 */
1212 int rc = 0;
1213 unsigned long ulAddrCur = ulAddr;
1214 unsigned iPage;
1215 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1216 {
1217#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1218 struct vm_area_struct *vma = find_vma(current->mm, ulAddrCur);
1219 if (!vma)
1220 break;
1221#endif
1222
1223#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1224 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(papPages[iPage]), PAGE_SIZE, pgFlags);
1225#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1226 rc = remap_page_range(vma, ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1227#else /* 2.4 */
1228 rc = remap_page_range(ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1229#endif
1230 if (rc)
1231 break;
1232 }
1233
1234 /*
1235 * Successful?
1236 */
1237 if (iPage >= cPages)
1238 {
1239 up_write(&current->mm->mmap_sem);
1240 return ulAddr;
1241 }
1242
1243 /* no, cleanup! */
1244 if (rc)
1245 dprintf(("VBoxSupDrvMapUser: remap_[page|pfn]_range failed! rc=%d\n", rc));
1246 else
1247 dprintf(("VBoxSupDrvMapUser: find_vma failed!\n"));
1248
1249 MY_DO_MUNMAP(current->mm, ulAddr, cPages << PAGE_SHIFT);
1250 }
1251 else
1252 {
1253 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1254 rc = SUPDRV_ERR_NO_MEMORY;
1255 }
1256 up_write(&current->mm->mmap_sem);
1257
1258 return NIL_RTR3PTR;
1259}
1260
1261
1262/**
1263 * Initializes the GIP.
1264 *
1265 * @returns negative errno.
1266 * @param pDevExt Instance data. GIP stuff may be updated.
1267 */
1268static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt)
1269{
1270 struct page *pPage;
1271 dma_addr_t HCPhys;
1272 PSUPGLOBALINFOPAGE pGip;
1273#ifdef CONFIG_SMP
1274 unsigned i;
1275#endif
1276 dprintf(("VBoxSupDrvInitGip:\n"));
1277
1278 /*
1279 * Allocate the page.
1280 */
1281 pPage = alloc_pages(GFP_USER, 0);
1282 if (!pPage)
1283 {
1284 dprintf(("VBoxSupDrvInitGip: failed to allocate the GIP page\n"));
1285 return -ENOMEM;
1286 }
1287
1288 /*
1289 * Lock the page.
1290 */
1291 SetPageReserved(pPage);
1292 g_pGipPage = pPage;
1293
1294 /*
1295 * Call common initialization routine.
1296 */
1297 HCPhys = page_to_phys(pPage);
1298 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1299 pDevExt->ulLastJiffies = jiffies;
1300#ifdef TICK_NSEC
1301 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1302 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1303 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1304#else
1305 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * (1000000 / HZ);
1306 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%d HZ=%d jiffies=%ld now=%lld\n",
1307 (int)(1000000 / HZ), HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1308#endif
1309 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1310 HZ <= 1000 ? HZ : 1000);
1311
1312 /*
1313 * Initialize the timer.
1314 */
1315 init_timer(&g_GipTimer);
1316 g_GipTimer.data = (unsigned long)pDevExt;
1317 g_GipTimer.function = VBoxSupGipTimer;
1318 g_GipTimer.expires = jiffies;
1319#ifdef CONFIG_SMP
1320 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1321 {
1322 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1323 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1324 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1325 init_timer(&pDevExt->aCPUs[i].Timer);
1326 pDevExt->aCPUs[i].Timer.data = i;
1327 pDevExt->aCPUs[i].Timer.function = VBoxSupGipTimerPerCpu;
1328 pDevExt->aCPUs[i].Timer.expires = jiffies;
1329 }
1330#endif
1331
1332 return 0;
1333}
1334
1335
1336/**
1337 * Terminates the GIP.
1338 *
1339 * @returns negative errno.
1340 * @param pDevExt Instance data. GIP stuff may be updated.
1341 */
1342static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt)
1343{
1344 struct page *pPage;
1345 PSUPGLOBALINFOPAGE pGip;
1346#ifdef CONFIG_SMP
1347 unsigned i;
1348#endif
1349 dprintf(("VBoxSupDrvTermGip:\n"));
1350
1351 /*
1352 * Delete the timer if it's pending.
1353 */
1354 if (timer_pending(&g_GipTimer))
1355 del_timer_sync(&g_GipTimer);
1356#ifdef CONFIG_SMP
1357 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1358 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1359 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1360#endif
1361
1362 /*
1363 * Uninitialize the content.
1364 */
1365 pGip = pDevExt->pGip;
1366 pDevExt->pGip = NULL;
1367 if (pGip)
1368 supdrvGipTerm(pGip);
1369
1370 /*
1371 * Free the page.
1372 */
1373 pPage = g_pGipPage;
1374 g_pGipPage = NULL;
1375 if (pPage)
1376 {
1377 ClearPageReserved(pPage);
1378 __free_pages(pPage, 0);
1379 }
1380
1381 return 0;
1382}
1383
1384/**
1385 * Timer callback function.
1386 *
1387 * In ASYNC TSC mode this is called on the primary CPU, and we're
1388 * assuming that the CPU remains online.
1389 *
1390 * @param ulUser The device extension pointer.
1391 */
1392static void VBoxSupGipTimer(unsigned long ulUser)
1393{
1394 PSUPDRVDEVEXT pDevExt;
1395 PSUPGLOBALINFOPAGE pGip;
1396 unsigned long ulNow;
1397 unsigned long ulDiff;
1398 uint64_t u64Monotime;
1399 unsigned long SavedFlags;
1400
1401 local_irq_save(SavedFlags);
1402
1403 pDevExt = (PSUPDRVDEVEXT)ulUser;
1404 pGip = pDevExt->pGip;
1405 ulNow = jiffies;
1406 ulDiff = ulNow - pDevExt->ulLastJiffies;
1407
1408 pDevExt->ulLastJiffies = ulNow;
1409#ifdef TICK_NSEC
1410 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1411#else
1412 u64Monotime = pDevExt->u64LastMonotime + ulDiff * (1000000 / HZ);
1413#endif
1414 ASMAtomicXchgU64(&pDevExt->u64LastMonotime, u64Monotime);
1415 if (RT_LIKELY(pGip))
1416 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1417 mod_timer(&g_GipTimer, jiffies + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1418
1419 local_irq_restore(SavedFlags);
1420}
1421
1422
1423#ifdef CONFIG_SMP
1424/**
1425 * Timer callback function for the other CPUs.
1426 *
1427 * @param iLnxCPU The APIC ID of this timer.
1428 */
1429static void VBoxSupGipTimerPerCpu(unsigned long iLnxCPU)
1430{
1431 PSUPDRVDEVEXT pDevExt;
1432 PSUPGLOBALINFOPAGE pGip;
1433 uint8_t iCPU;
1434 uint64_t u64Monotime;
1435 unsigned long SavedFlags;
1436
1437 local_irq_save(SavedFlags);
1438
1439 pDevExt = &g_DevExt;
1440 pGip = pDevExt->pGip;
1441 iCPU = ASMGetApicId();
1442
1443 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1444 {
1445 if (RT_LIKELY(iCPU == iLnxCPU))
1446 {
1447 unsigned long ulNow = jiffies;
1448 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iLnxCPU].ulLastJiffies;
1449
1450 pDevExt->aCPUs[iLnxCPU].ulLastJiffies = ulNow;
1451#ifdef TICK_NSEC
1452 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1453#else
1454 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1455#endif
1456 ASMAtomicXchgU64(&pDevExt->aCPUs[iCPU].u64LastMonotime, u64Monotime);
1457 if (RT_LIKELY(pGip))
1458 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1459 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1460 }
1461 else
1462 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d != timer-cpuid=%d)\n",
1463 iCPU, iLnxCPU, smp_processor_id(), pDevExt->aCPUs[iLnxCPU].iSmpProcessorId);
1464 }
1465 else
1466 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%d cpuid=%d\n",
1467 iCPU, RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1468
1469 local_irq_restore(SavedFlags);
1470}
1471#endif /* CONFIG_SMP */
1472
1473
1474/**
1475 * Maps the GIP into user space.
1476 *
1477 * @returns negative errno.
1478 * @param pDevExt Instance data.
1479 */
1480int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE *ppGip)
1481{
1482 int rc = 0;
1483 unsigned long ulAddr;
1484 unsigned long HCPhys = pDevExt->HCPhysGip;
1485 pgprot_t pgFlags;
1486 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1487 dprintf2(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1488
1489 /*
1490 * Allocate user space mapping and put the physical pages into it.
1491 */
1492 down_write(&current->mm->mmap_sem);
1493 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1494 if (!(ulAddr & ~PAGE_MASK))
1495 {
1496#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1497 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1498#else
1499 int rc2 = 0;
1500 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1501 if (vma)
1502#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1503 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1504#else
1505 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1506#endif
1507 else
1508 {
1509 rc = SUPDRV_ERR_NO_MEMORY;
1510 dprintf(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1511 }
1512#endif
1513 if (rc2)
1514 {
1515 rc = SUPDRV_ERR_NO_MEMORY;
1516 dprintf(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1517 }
1518 }
1519 else
1520 {
1521 dprintf(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1522 rc = SUPDRV_ERR_NO_MEMORY;
1523 }
1524 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1525
1526 /*
1527 * Success?
1528 */
1529 if (!rc)
1530 {
1531 *ppGip = (PCSUPGLOBALINFOPAGE)ulAddr;
1532 dprintf2(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1533 return 0;
1534 }
1535
1536 /*
1537 * Failure, cleanup and be gone.
1538 */
1539 if (ulAddr & ~PAGE_MASK)
1540 {
1541 down_write(&current->mm->mmap_sem);
1542 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1543 up_write(&current->mm->mmap_sem);
1544 }
1545
1546 dprintf2(("supdrvOSGipMap: returns %d\n", rc));
1547 return rc;
1548}
1549
1550
1551/**
1552 * Maps the GIP into user space.
1553 *
1554 * @returns negative errno.
1555 * @param pDevExt Instance data.
1556 */
1557int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE pGip)
1558{
1559 dprintf2(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1560 if (current->mm)
1561 {
1562 down_write(&current->mm->mmap_sem);
1563 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1564 up_write(&current->mm->mmap_sem);
1565 }
1566 dprintf2(("supdrvOSGipUnmap: returns 0\n"));
1567 return 0;
1568}
1569
1570
1571/**
1572 * Resumes the GIP updating.
1573 *
1574 * @param pDevExt Instance data.
1575 */
1576void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1577{
1578 dprintf2(("supdrvOSGipResume:\n"));
1579#ifdef CONFIG_SMP
1580 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1581#endif
1582 mod_timer(&g_GipTimer, jiffies);
1583#ifdef CONFIG_SMP
1584 else
1585 {
1586 mod_timer(&g_GipTimer, jiffies);
1587 smp_call_function(VBoxSupGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1588 }
1589#endif
1590}
1591
1592
1593#ifdef CONFIG_SMP
1594/**
1595 * Callback for resuming GIP updating on the other CPUs.
1596 *
1597 * This is only used when the GIP is in async tsc mode.
1598 *
1599 * @param pvUser Pointer to the device instance.
1600 */
1601static void VBoxSupGipResumePerCpu(void *pvUser)
1602{
1603 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1604 uint8_t iCPU = ASMGetApicId();
1605
1606 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1607 {
1608 printk("vboxdrv: error: apicid=%d max=%d cpuid=%d\n",
1609 iCPU, RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1610 return;
1611 }
1612
1613 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1614 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies);
1615}
1616#endif /* CONFIG_SMP */
1617
1618
1619/**
1620 * Suspends the GIP updating.
1621 *
1622 * @param pDevExt Instance data.
1623 */
1624void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1625{
1626#ifdef CONFIG_SMP
1627 unsigned i;
1628#endif
1629 dprintf2(("supdrvOSGipSuspend:\n"));
1630
1631 if (timer_pending(&g_GipTimer))
1632 del_timer_sync(&g_GipTimer);
1633#ifdef CONFIG_SMP
1634 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1635 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1636 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1637#endif
1638}
1639
1640
1641/**
1642 * Get the current CPU count.
1643 * @returns Number of cpus.
1644 */
1645unsigned VBOXCALL supdrvOSGetCPUCount(void)
1646{
1647#ifdef CONFIG_SMP
1648# ifdef num_present_cpus
1649 return num_present_cpus();
1650# else
1651 return smp_num_cpus;
1652# endif
1653#else
1654 return 1;
1655#endif
1656}
1657
1658/**
1659 * Force async tsc mode.
1660 * @todo add a module argument for this.
1661 */
1662bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1663{
1664 return false;
1665}
1666
1667
1668/**
1669 * Converts a supdrv error code to an linux error code.
1670 *
1671 * @returns corresponding linux error code.
1672 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1673 */
1674static int VBoxSupDrvErr2LinuxErr(int rc)
1675{
1676 switch (rc)
1677 {
1678 case 0: return 0;
1679 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1680 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1681 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1682 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1683 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1684 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1685 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1686 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1687 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1688 }
1689
1690 return -EPERM;
1691}
1692
1693
1694RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1695{
1696#if 1
1697 va_list args;
1698 char szMsg[512];
1699
1700 va_start(args, pszFormat);
1701 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1702 szMsg[sizeof(szMsg) - 1] = '\0';
1703 printk("%s", szMsg);
1704 va_end(args);
1705#else
1706 /* forward to printf - needs some more GCC hacking to fix ebp... */
1707 __asm__ __volatile__ ("mov %0, %esp\n\t"
1708 "jmp %1\n\t",
1709 :: "r" ((uintptr_t)&pszFormat - 4),
1710 "m" (printk));
1711#endif
1712 return 0;
1713}
1714
1715
1716/** Runtime assert implementation for Linux Ring-0. */
1717RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1718{
1719 printk("!!Assertion Failed!!\n"
1720 "Expression: %s\n"
1721 "Location : %s(%d) %s\n",
1722 pszExpr, pszFile, uLine, pszFunction);
1723}
1724
1725
1726/** Runtime assert implementation for Linux Ring-0. */
1727RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1728{ /* forwarder. */
1729 va_list ap;
1730 char msg[256];
1731
1732 va_start(ap, pszFormat);
1733 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1734 msg[sizeof(msg) - 1] = '\0';
1735 printk("%s", msg);
1736 va_end(ap);
1737}
1738
1739
1740/* GCC C++ hack. */
1741unsigned __gxx_personality_v0 = 0xcccccccc;
1742
1743
1744module_init(VBoxSupDrvInit);
1745module_exit(VBoxSupDrvUnload);
1746
1747MODULE_AUTHOR("InnoTek Systemberatung GmbH");
1748MODULE_DESCRIPTION("VirtualBox Support Driver");
1749MODULE_LICENSE("GPL");
1750#ifdef MODULE_VERSION
1751#define xstr(s) str(s)
1752#define str(s) #s
1753MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1754#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette