VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 2980

Last change on this file since 2980 was 2980, checked in by vboxsync, 18 years ago

InnoTek -> innotek: actual code changes (headers follow).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 51.9 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21/*******************************************************************************
22* Header Files *
23*******************************************************************************/
24#include "SUPDRV.h"
25#include "version-generated.h"
26
27#include <iprt/assert.h>
28#include <iprt/spinlock.h>
29#include <iprt/semaphore.h>
30#include <iprt/initterm.h>
31#include <iprt/process.h>
32#include <iprt/err.h>
33#include <iprt/mem.h>
34
35#include <linux/module.h>
36#include <linux/kernel.h>
37#include <linux/init.h>
38#include <linux/fs.h>
39#include <linux/mm.h>
40#include <linux/pagemap.h>
41#include <linux/sched.h>
42#include <linux/slab.h>
43#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
44# include <linux/jiffies.h>
45#endif
46#include <asm/mman.h>
47#include <asm/io.h>
48#include <asm/uaccess.h>
49#ifdef CONFIG_DEVFS_FS
50# include <linux/devfs_fs_kernel.h>
51#endif
52#ifdef CONFIG_VBOXDRV_AS_MISC
53# include <linux/miscdevice.h>
54#endif
55#ifdef CONFIG_X86_LOCAL_APIC
56# include <asm/apic.h>
57# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
58# include <asm/nmi.h>
59# endif
60#endif
61
62#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
63# ifndef page_to_pfn
64# define page_to_pfn(page) ((page) - mem_map)
65# endif
66# include <asm/pgtable.h>
67# define global_flush_tlb __flush_tlb_global
68#endif
69
70/* devfs defines */
71#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
72# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
73
74# define VBOX_REGISTER_DEVFS() \
75({ \
76 void *rc = NULL; \
77 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
78 S_IFCHR | S_IRUGO | S_IWUGO, \
79 DEVICE_NAME) == 0) \
80 rc = (void *)' '; /* return not NULL */ \
81 rc; \
82 })
83
84# define VBOX_UNREGISTER_DEVFS(handle) \
85 devfs_remove(DEVICE_NAME);
86
87# else /* < 2.6.0 */
88
89# define VBOX_REGISTER_DEVFS() \
90 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
91 DEVICE_MAJOR, 0, \
92 S_IFCHR | S_IRUGO | S_IWUGO, \
93 &gFileOpsVBoxDrv, NULL)
94
95# define VBOX_UNREGISTER_DEVFS(handle) \
96 if (handle != NULL) \
97 devfs_unregister(handle)
98
99# endif /* < 2.6.0 */
100#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
101
102#ifndef CONFIG_VBOXDRV_AS_MISC
103# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
104# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
105# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
106# else
107# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
108# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
109# endif
110#endif /* !CONFIG_VBOXDRV_AS_MISC */
111
112
113#ifdef CONFIG_X86_HIGH_ENTRY
114# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
115#endif
116
117/*
118 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
119 */
120#if defined(__AMD64__)
121# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
122#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
123# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
124#else
125# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
126#endif
127
128/*
129 * The redhat hack section.
130 * - The current hacks are for 2.4.21-15.EL only.
131 */
132#ifndef NO_REDHAT_HACKS
133/* accounting. */
134# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
135# ifdef VM_ACCOUNT
136# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
137# endif
138# endif
139
140/* backported remap_page_range. */
141# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
142# include <asm/tlb.h>
143# ifdef tlb_vma /* probably not good enough... */
144# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
145# endif
146# endif
147
148# ifndef __AMD64__
149/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
150 * the page attributes from PAGE_KERNEL to something else, because there appears
151 * to be a bug in one of the many patches that redhat applied.
152 * It should be safe to do this on less buggy linux kernels too. ;-)
153 */
154# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
155 do { \
156 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
157 change_page_attr(pPages, cPages, prot); \
158 change_page_attr(pPages, cPages, prot); \
159 } while (0)
160# endif
161#endif /* !NO_REDHAT_HACKS */
162
163
164#ifndef MY_DO_MUNMAP
165# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
166#endif
167
168#ifndef MY_CHANGE_PAGE_ATTR
169# ifdef __AMD64__ /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr(). */
170# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
171 do { \
172 change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
173 change_page_attr(pPages, cPages, prot); \
174 } while (0)
175# else
176# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
177# endif
178#endif
179
180
181/** @def ONE_MSEC_IN_JIFFIES
182 * The number of jiffies that make up 1 millisecond. This is only actually used
183 * when HZ is > 1000. */
184#if HZ <= 1000
185# define ONE_MSEC_IN_JIFFIES 0
186#elif !(HZ % 1000)
187# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
188#else
189# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
190# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
191#endif
192
193
194/*******************************************************************************
195* Defined Constants And Macros *
196*******************************************************************************/
197/**
198 * Device extention & session data association structure.
199 */
200static SUPDRVDEVEXT g_DevExt;
201
202/** Timer structure for the GIP update. */
203static struct timer_list g_GipTimer;
204/** Pointer to the page structure for the GIP. */
205struct page *g_pGipPage;
206
207/** Registered devfs device handle. */
208#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
209# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
210static void *g_hDevFsVBoxDrv = NULL;
211# else
212static devfs_handle_t g_hDevFsVBoxDrv = NULL;
213# endif
214#endif
215
216#ifndef CONFIG_VBOXDRV_AS_MISC
217/** Module major number */
218#define DEVICE_MAJOR 234
219/** Saved major device number */
220static int g_iModuleMajor;
221#endif /* !CONFIG_VBOXDRV_AS_MISC */
222
223/** The module name. */
224#define DEVICE_NAME "vboxdrv"
225
226#ifdef __AMD64__
227/**
228 * Memory for the executable memory heap (in IPRT).
229 */
230extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
231__asm__(".section execmemory, \"awx\", @progbits\n\t"
232 ".align 32\n\t"
233 ".globl g_abExecMemory\n"
234 "g_abExecMemory:\n\t"
235 ".zero 1572864\n\t"
236 ".type g_abExecMemory, @object\n\t"
237 ".size g_abExecMemory, 1572864\n\t"
238 ".text\n\t");
239#endif
240
241
242/*******************************************************************************
243* Internal Functions *
244*******************************************************************************/
245static int VBoxSupDrvInit(void);
246static void VBoxSupDrvUnload(void);
247static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp);
248static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp);
249static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
250 unsigned int IOCmd, unsigned long IOArg);
251static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags);
252static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt);
253static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt);
254static void VBoxSupGipTimer(unsigned long ulUser);
255#ifdef CONFIG_SMP
256static void VBoxSupGipTimerPerCpu(unsigned long ulUser);
257static void VBoxSupGipResumePerCpu(void *pvUser);
258#endif
259static int VBoxSupDrvOrder(unsigned long size);
260static int VBoxSupDrvErr2LinuxErr(int);
261
262
263/** The file_operations structure. */
264static struct file_operations gFileOpsVBoxDrv =
265{
266 owner: THIS_MODULE,
267 open: VBoxSupDrvCreate,
268 release: VBoxSupDrvClose,
269 ioctl: VBoxSupDrvDeviceControl,
270};
271
272#ifdef CONFIG_VBOXDRV_AS_MISC
273/** The miscdevice structure. */
274static struct miscdevice gMiscDevice =
275{
276 minor: MISC_DYNAMIC_MINOR,
277 name: DEVICE_NAME,
278 fops: &gFileOpsVBoxDrv,
279# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
280 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
281 devfs_name: DEVICE_NAME,
282# endif
283};
284#endif
285
286
287/**
288 * Initialize module.
289 *
290 * @returns appropriate status code.
291 */
292static int __init VBoxSupDrvInit(void)
293{
294 int rc;
295
296 dprintf(("VBoxDrv::ModuleInit\n"));
297
298#ifdef CONFIG_X86_LOCAL_APIC
299 /*
300 * If an NMI occurs while we are inside the world switcher the macine will crash.
301 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
302 * compared with another counter increased in the timer interrupt handler. Therefore
303 * we don't allow to setup an NMI watchdog.
304 */
305# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
306 /*
307 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
308 * the nmi_watchdog variable.
309 */
310# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
311 /*
312 * Permanent IO_APIC mode active? No way to handle this!
313 */
314 if (nmi_watchdog == NMI_IO_APIC)
315 {
316 printk(KERN_ERR DEVICE_NAME
317 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
318 DEVICE_NAME
319 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
320 DEVICE_NAME
321 ": command line.\n");
322 return -EINVAL;
323 }
324
325 /*
326 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
327 */
328 atomic_set(&nmi_active, -1);
329 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate NMI watchdog permanently...\n");
330
331 /*
332 * Now fall through and see if it actually was enabled before. If so, fail
333 * as we cannot deactivate it cleanly from here.
334 */
335# else /* < 2.6.19 */
336 /*
337 * Older 2.6 kernels: nmi_watchdog is not initalized by default
338 */
339 if (nmi_watchdog != NMI_NONE)
340 goto nmi_activated;
341# endif
342# endif /* >= 2.6.0 */
343
344 /*
345 * Second test: Interrupt generated by performance counter not masked and can
346 * generate an NMI. Works also with Linux 2.4.
347 */
348 {
349 unsigned int v, ver, maxlvt;
350
351 v = apic_read(APIC_LVR);
352 ver = GET_APIC_VERSION(v);
353 /* 82489DXs do not report # of LVT entries. */
354 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
355 if (maxlvt >= 4)
356 {
357 /* Read status of performance counter IRQ vector */
358 v = apic_read(APIC_LVTPC);
359
360 /* performance counter generates NMI and is not masked? */
361 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
362 {
363# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
364 printk(KERN_ERR DEVICE_NAME
365 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
366 DEVICE_NAME
367 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
368 return -EINVAL;
369# else /* < 2.6.19 */
370# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
371nmi_activated:
372# endif
373 printk(KERN_ERR DEVICE_NAME
374 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
375 DEVICE_NAME
376 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
377 return -EINVAL;
378# endif /* >= 2.6.19 */
379 }
380 }
381 }
382# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
383 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
384# endif /* >= 2.6.19 */
385#endif /* CONFIG_X86_LOCAL_APIC */
386
387#ifdef CONFIG_VBOXDRV_AS_MISC
388 rc = misc_register(&gMiscDevice);
389 if (rc)
390 {
391 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
392 return rc;
393 }
394#else /* !CONFIG_VBOXDRV_AS_MISC */
395 /*
396 * Register character device.
397 */
398 g_iModuleMajor = DEVICE_MAJOR;
399 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
400 if (rc < 0)
401 {
402 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
403 return rc;
404 }
405
406 /*
407 * Save returned module major number
408 */
409 if (DEVICE_MAJOR != 0)
410 g_iModuleMajor = DEVICE_MAJOR;
411 else
412 g_iModuleMajor = rc;
413 rc = 0;
414
415#ifdef CONFIG_DEVFS_FS
416 /*
417 * Register a device entry
418 */
419 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
420 if (g_hDevFsVBoxDrv == NULL)
421 {
422 dprintf(("devfs_register failed!\n"));
423 rc = -EINVAL;
424 }
425#endif
426#endif /* !CONFIG_VBOXDRV_AS_MISC */
427 if (!rc)
428 {
429 /*
430 * Initialize the runtime.
431 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
432 */
433 rc = RTR0Init(0);
434 if (RT_SUCCESS(rc))
435 {
436#ifdef __AMD64__
437 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
438#endif
439 /*
440 * Initialize the device extension.
441 */
442 if (RT_SUCCESS(rc))
443 rc = supdrvInitDevExt(&g_DevExt);
444 if (!rc)
445 {
446 /*
447 * Create the GIP page.
448 */
449 rc = VBoxSupDrvInitGip(&g_DevExt);
450 if (!rc)
451 {
452 dprintf(("VBoxDrv::ModuleInit returning %#x\n", rc));
453 return rc;
454 }
455
456 supdrvDeleteDevExt(&g_DevExt);
457 }
458 else
459 rc = -EINVAL;
460 RTR0Term();
461 }
462 else
463 rc = -EINVAL;
464
465 /*
466 * Failed, cleanup and return the error code.
467 */
468#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
469 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
470#endif
471 }
472#ifdef CONFIG_VBOXDRV_AS_MISC
473 misc_deregister(&gMiscDevice);
474 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
475#else
476 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
477 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
478#endif
479 return rc;
480}
481
482
483/**
484 * Unload the module.
485 */
486static void __exit VBoxSupDrvUnload(void)
487{
488 int rc;
489 dprintf(("VBoxSupDrvUnload\n"));
490
491 /*
492 * I Don't think it's possible to unload a driver which processes have
493 * opened, at least we'll blindly assume that here.
494 */
495#ifdef CONFIG_VBOXDRV_AS_MISC
496 rc = misc_deregister(&gMiscDevice);
497 if (rc < 0)
498 {
499 dprintf(("misc_deregister failed with rc=%#x\n", rc));
500 }
501#else /* !CONFIG_VBOXDRV_AS_MISC */
502#ifdef CONFIG_DEVFS_FS
503 /*
504 * Unregister a device entry
505 */
506 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
507#endif // devfs
508 rc = VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
509 if (rc < 0)
510 {
511 dprintf(("unregister_chrdev failed with rc=%#x (major:%d)\n", rc, g_iModuleMajor));
512 }
513#endif /* !CONFIG_VBOXDRV_AS_MISC */
514
515 /*
516 * Destroy GIP, delete the device extension and terminate IPRT.
517 */
518 VBoxSupDrvTermGip(&g_DevExt);
519 supdrvDeleteDevExt(&g_DevExt);
520 RTR0Term();
521}
522
523
524/**
525 * Device open. Called on open /dev/vboxdrv
526 *
527 * @param pInode Pointer to inode info structure.
528 * @param pFilp Associated file pointer.
529 */
530static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp)
531{
532 int rc;
533 PSUPDRVSESSION pSession;
534 dprintf(("VBoxSupDrvCreate: pFilp=%p\n", pFilp));
535
536 /*
537 * Call common code for the rest.
538 */
539 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
540 if (!rc)
541 {
542 pSession->Uid = current->euid;
543 pSession->Gid = current->egid;
544 pSession->Process = RTProcSelf();
545 pSession->R0Process = RTR0ProcHandleSelf();
546 }
547
548 dprintf(("VBoxSupDrvCreate: g_DevExt=%p pSession=%p rc=%d\n", &g_DevExt, pSession, rc));
549 pFilp->private_data = pSession;
550
551 return VBoxSupDrvErr2LinuxErr(rc);
552}
553
554
555/**
556 * Close device.
557 *
558 * @param pInode Pointer to inode info structure.
559 * @param pFilp Associated file pointer.
560 */
561static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp)
562{
563 dprintf(("VBoxSupDrvClose: pFilp=%p private_data=%p\n", pFilp, pFilp->private_data));
564 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
565 pFilp->private_data = NULL;
566 return 0;
567}
568
569
570/**
571 * Device I/O Control entry point.
572 *
573 * @param pInode Pointer to inode info structure.
574 * @param pFilp Associated file pointer.
575 * @param IOCmd The function specified to ioctl().
576 * @param IOArg The argument specified to ioctl().
577 */
578static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
579 unsigned int IOCmd, unsigned long IOArg)
580{
581 int rc;
582 SUPDRVIOCTLDATA Args;
583 void *pvBuf = NULL;
584 int cbBuf = 0;
585 unsigned cbOut = 0;
586
587 dprintf2(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p\n", pFilp, IOCmd, (void *)IOArg));
588
589 /*
590 * Copy ioctl data structure from user space.
591 */
592 if (_IOC_SIZE(IOCmd) != sizeof(SUPDRVIOCTLDATA))
593 {
594 dprintf(("VBoxSupDrvDeviceControl: incorrect input length! cbArgs=%d\n", _IOC_SIZE(IOCmd)));
595 return -EINVAL;
596 }
597 if (copy_from_user(&Args, (void *)IOArg, _IOC_SIZE(IOCmd)))
598 {
599 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(&Args) failed.\n"));
600 return -EFAULT;
601 }
602
603 /*
604 * Allocate and copy user space input data buffer to kernel space.
605 */
606 if (Args.cbIn > 0 || Args.cbOut > 0)
607 {
608 cbBuf = max(Args.cbIn, Args.cbOut);
609 pvBuf = vmalloc(cbBuf);
610 if (pvBuf == NULL)
611 {
612 dprintf(("VBoxSupDrvDeviceControl: failed to allocate buffer of %d bytes.\n", cbBuf));
613 return -ENOMEM;
614 }
615
616 if (copy_from_user(pvBuf, (void *)Args.pvIn, Args.cbIn))
617 {
618 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(pvBuf) failed.\n"));
619 vfree(pvBuf);
620 return -EFAULT;
621 }
622 }
623
624 /*
625 * Process the IOCtl.
626 */
627 rc = supdrvIOCtl(IOCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data,
628 pvBuf, Args.cbIn, pvBuf, Args.cbOut, &cbOut);
629
630 /*
631 * Copy ioctl data and output buffer back to user space.
632 */
633 if (rc)
634 {
635 dprintf(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p failed, rc=%d (linux rc=%d)\n",
636 pFilp, IOCmd, (void *)IOArg, rc, VBoxSupDrvErr2LinuxErr(rc)));
637 rc = VBoxSupDrvErr2LinuxErr(rc);
638 }
639 else if (cbOut > 0)
640 {
641 if (pvBuf != NULL && cbOut <= cbBuf)
642 {
643 if (copy_to_user((void *)Args.pvOut, pvBuf, cbOut))
644 {
645 dprintf(("copy_to_user failed.\n"));
646 rc = -EFAULT;
647 }
648 }
649 else
650 {
651 dprintf(("WHAT!?! supdrvIOCtl messed up! cbOut=%d cbBuf=%d pvBuf=%p\n", cbOut, cbBuf, pvBuf));
652 rc = -EPERM;
653 }
654 }
655
656 if (pvBuf)
657 vfree(pvBuf);
658
659 dprintf2(("VBoxSupDrvDeviceControl: returns %d\n", rc));
660 return rc;
661}
662
663
664/**
665 * Initializes any OS specific object creator fields.
666 */
667void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
668{
669 NOREF(pObj);
670 NOREF(pSession);
671}
672
673
674/**
675 * Checks if the session can access the object.
676 *
677 * @returns true if a decision has been made.
678 * @returns false if the default access policy should be applied.
679 *
680 * @param pObj The object in question.
681 * @param pSession The session wanting to access the object.
682 * @param pszObjName The object name, can be NULL.
683 * @param prc Where to store the result when returning true.
684 */
685bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
686{
687 NOREF(pObj);
688 NOREF(pSession);
689 NOREF(pszObjName);
690 NOREF(prc);
691 return false;
692}
693
694
695/**
696 * Compute order. Some functions allocate 2^order pages.
697 *
698 * @returns order.
699 * @param cPages Number of pages.
700 */
701static int VBoxSupDrvOrder(unsigned long cPages)
702{
703 int iOrder;
704 unsigned long cTmp;
705
706 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
707 ;
708 if (cPages & ~(1 << iOrder))
709 ++iOrder;
710
711 return iOrder;
712}
713
714
715/**
716 * OS Specific code for locking down memory.
717 *
718 * @returns 0 on success.
719 * @returns SUPDRV_ERR_* on failure.
720 * @param pMem Pointer to memory.
721 * This is not linked in anywhere.
722 * @param paPages Array which should be filled with the address of the physical pages.
723 *
724 * @remark See sgl_map_user_pages() for an example of an similar function.
725 */
726int VBOXCALL supdrvOSLockMemOne(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
727{
728 int rc;
729 struct page **papPages;
730 unsigned iPage;
731 unsigned cPages = pMem->cb >> PAGE_SHIFT;
732 unsigned long pv = (unsigned long)pMem->pvR3;
733 struct vm_area_struct **papVMAs;
734
735 /*
736 * Allocate page pointer array.
737 */
738 papPages = vmalloc(cPages * sizeof(*papPages));
739 if (!papPages)
740 return SUPDRV_ERR_NO_MEMORY;
741
742 /*
743 * Allocate the VMA pointer array.
744 */
745 papVMAs = vmalloc(cPages * sizeof(*papVMAs));
746 if (!papVMAs)
747 return SUPDRV_ERR_NO_MEMORY;
748
749 /*
750 * Get user pages.
751 */
752 down_read(&current->mm->mmap_sem);
753 rc = get_user_pages(current, /* Task for fault acounting. */
754 current->mm, /* Whose pages. */
755 (unsigned long)pv, /* Where from. */
756 cPages, /* How many pages. */
757 1, /* Write to memory. */
758 0, /* force. */
759 papPages, /* Page array. */
760 papVMAs); /* vmas */
761 if (rc != cPages)
762 {
763 up_read(&current->mm->mmap_sem);
764 dprintf(("supdrvOSLockMemOne: get_user_pages failed. rc=%d\n", rc));
765 return SUPDRV_ERR_LOCK_FAILED;
766 }
767
768 for (iPage = 0; iPage < cPages; iPage++)
769 flush_dcache_page(papPages[iPage]);
770 up_read(&current->mm->mmap_sem);
771
772 pMem->u.locked.papPages = papPages;
773 pMem->u.locked.cPages = cPages;
774
775 /*
776 * Get addresses, protect against fork()
777 */
778 for (iPage = 0; iPage < cPages; iPage++)
779 {
780 paPages[iPage].Phys = page_to_phys(papPages[iPage]);
781 paPages[iPage].uReserved = 0;
782 papVMAs[iPage]->vm_flags |= VM_DONTCOPY;
783 }
784
785 vfree(papVMAs);
786
787 dprintf2(("supdrvOSLockMemOne: pvR3=%p cb=%d papPages=%p\n",
788 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
789 return 0;
790}
791
792
793/**
794 * Unlocks the memory pointed to by pv.
795 *
796 * @param pMem Pointer to memory to unlock.
797 *
798 * @remark See sgl_unmap_user_pages() for an example of an similar function.
799 */
800void VBOXCALL supdrvOSUnlockMemOne(PSUPDRVMEMREF pMem)
801{
802 unsigned iPage;
803 dprintf2(("supdrvOSUnlockMemOne: pvR3=%p cb=%d papPages=%p\n",
804 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
805
806 /*
807 * Loop thru the pages and release them.
808 */
809 for (iPage = 0; iPage < pMem->u.locked.cPages; iPage++)
810 {
811 if (!PageReserved(pMem->u.locked.papPages[iPage]))
812 SetPageDirty(pMem->u.locked.papPages[iPage]);
813 page_cache_release(pMem->u.locked.papPages[iPage]);
814 }
815
816 /* free the page array */
817 vfree(pMem->u.locked.papPages);
818 pMem->u.locked.cPages = 0;
819}
820
821
822/**
823 * OS Specific code for allocating page aligned memory with continuous fixed
824 * physical paged backing.
825 *
826 * @returns 0 on success.
827 * @returns SUPDRV_ERR_* on failure.
828 * @param pMem Memory reference record of the memory to be allocated.
829 * (This is not linked in anywhere.)
830 * @param ppvR0 Where to store the virtual address of the ring-0 mapping. (optional)
831 * @param ppvR3 Where to store the virtual address of the ring-3 mapping.
832 * @param pHCPhys Where to store the physical address.
833 */
834int VBOXCALL supdrvOSContAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
835{
836 struct page *paPages;
837 unsigned iPage;
838 unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
839 unsigned cPages = cbAligned >> PAGE_SHIFT;
840 unsigned cOrder = VBoxSupDrvOrder(cPages);
841 unsigned long ulAddr;
842 dma_addr_t HCPhys;
843 int rc = 0;
844 pgprot_t pgFlags;
845 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
846
847 Assert(ppvR3);
848 Assert(pHCPhys);
849
850 /*
851 * Allocate page pointer array.
852 */
853#ifdef __AMD64__ /** @todo check out if there is a correct way of getting memory below 4GB (physically). */
854 paPages = alloc_pages(GFP_DMA, cOrder);
855#else
856 paPages = alloc_pages(GFP_USER, cOrder);
857#endif
858 if (!paPages)
859 return SUPDRV_ERR_NO_MEMORY;
860
861 /*
862 * Lock the pages.
863 */
864 for (iPage = 0; iPage < cPages; iPage++)
865 {
866 SetPageReserved(&paPages[iPage]);
867 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
868 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
869#ifdef DEBUG
870 if (iPage + 1 < cPages && (page_to_phys((&paPages[iPage])) + 0x1000) != page_to_phys((&paPages[iPage + 1])))
871 {
872 dprintf(("supdrvOSContAllocOne: Pages are not continuous!!!! iPage=%d phys=%llx physnext=%llx\n",
873 iPage, (long long)page_to_phys((&paPages[iPage])), (long long)page_to_phys((&paPages[iPage + 1]))));
874 BUG();
875 }
876#endif
877 }
878 HCPhys = page_to_phys(paPages);
879
880 /*
881 * Allocate user space mapping and put the physical pages into it.
882 */
883 down_write(&current->mm->mmap_sem);
884 ulAddr = do_mmap(NULL, 0, cbAligned, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0);
885 if (!(ulAddr & ~PAGE_MASK))
886 {
887#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
888 int rc2 = remap_page_range(ulAddr, HCPhys, cbAligned, pgFlags);
889#else
890 int rc2 = 0;
891 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
892 if (vma)
893#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
894 rc2 = remap_page_range(vma, ulAddr, HCPhys, cbAligned, pgFlags);
895#else
896 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, cbAligned, pgFlags);
897#endif
898 else
899 {
900 rc = SUPDRV_ERR_NO_MEMORY;
901 dprintf(("supdrvOSContAllocOne: no vma found for ulAddr=%#lx!\n", ulAddr));
902 }
903#endif
904 if (rc2)
905 {
906 rc = SUPDRV_ERR_NO_MEMORY;
907 dprintf(("supdrvOSContAllocOne: remap_page_range failed rc2=%d\n", rc2));
908 }
909 }
910 else
911 {
912 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
913 rc = SUPDRV_ERR_NO_MEMORY;
914 }
915 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
916
917 /*
918 * Success?
919 */
920 if (!rc)
921 {
922 *pHCPhys = HCPhys;
923 *ppvR3 = ulAddr;
924 if (ppvR0)
925 *ppvR0 = (void *)ulAddr;
926 pMem->pvR3 = ulAddr;
927 pMem->pvR0 = NULL;
928 pMem->u.cont.paPages = paPages;
929 pMem->u.cont.cPages = cPages;
930 pMem->cb = cbAligned;
931
932 dprintf2(("supdrvOSContAllocOne: pvR0=%p pvR3=%p cb=%d paPages=%p *pHCPhys=%lx *ppvR0=*ppvR3=%p\n",
933 pMem->pvR0, pMem->pvR3, pMem->cb, paPages, (unsigned long)*pHCPhys, *ppvR3));
934 global_flush_tlb();
935 return 0;
936 }
937
938 /*
939 * Failure, cleanup and be gone.
940 */
941 down_write(&current->mm->mmap_sem);
942 if (ulAddr & ~PAGE_MASK)
943 MY_DO_MUNMAP(current->mm, ulAddr, pMem->cb);
944 for (iPage = 0; iPage < cPages; iPage++)
945 {
946 ClearPageReserved(&paPages[iPage]);
947 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
948 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, PAGE_KERNEL);
949 }
950 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
951 __free_pages(paPages, cOrder);
952
953 global_flush_tlb();
954 return rc;
955}
956
957
958/**
959 * Frees contiguous memory.
960 *
961 * @param pMem Memory reference record of the memory to be freed.
962 */
963void VBOXCALL supdrvOSContFreeOne(PSUPDRVMEMREF pMem)
964{
965 unsigned iPage;
966
967 dprintf2(("supdrvOSContFreeOne: pvR0=%p pvR3=%p cb=%d paPages=%p\n",
968 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.cont.paPages));
969
970 /*
971 * do_exit() destroys the mm before closing files.
972 * I really hope it cleans up our stuff properly...
973 */
974 if (current->mm)
975 {
976 down_write(&current->mm->mmap_sem);
977 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, pMem->cb);
978 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
979 }
980
981 /*
982 * Change page attributes freeing the pages.
983 */
984 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
985 {
986 ClearPageReserved(&pMem->u.cont.paPages[iPage]);
987 if (!PageHighMem(&pMem->u.cont.paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
988 MY_CHANGE_PAGE_ATTR(&pMem->u.cont.paPages[iPage], 1, PAGE_KERNEL);
989 }
990 __free_pages(pMem->u.cont.paPages, VBoxSupDrvOrder(pMem->u.cont.cPages));
991
992 pMem->u.cont.cPages = 0;
993}
994
995
996/**
997 * Allocates memory which mapped into both kernel and user space.
998 * The returned memory is page aligned and so is the allocation.
999 *
1000 * @returns 0 on success.
1001 * @returns SUPDRV_ERR_* on failure.
1002 * @param pMem Memory reference record of the memory to be allocated.
1003 * (This is not linked in anywhere.)
1004 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1005 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1006 */
1007int VBOXCALL supdrvOSMemAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1008{
1009 const unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1010 const unsigned cPages = cbAligned >> PAGE_SHIFT;
1011#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1012 unsigned cOrder = VBoxSupDrvOrder(cPages);
1013 struct page *paPages;
1014#endif
1015 struct page **papPages;
1016 unsigned iPage;
1017 pgprot_t pgFlags;
1018 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1019
1020 /*
1021 * Allocate array with page pointers.
1022 */
1023 pMem->u.mem.cPages = 0;
1024 pMem->u.mem.papPages = papPages = kmalloc(sizeof(papPages[0]) * cPages, GFP_KERNEL);
1025 if (!papPages)
1026 return SUPDRV_ERR_NO_MEMORY;
1027
1028 /*
1029 * Allocate the pages.
1030 */
1031#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1032 for (iPage = 0; iPage < cPages; iPage++)
1033 {
1034 papPages[iPage] = alloc_page(GFP_HIGHUSER);
1035 if (!papPages[iPage])
1036 {
1037 pMem->u.mem.cPages = iPage;
1038 supdrvOSMemFreeOne(pMem);
1039 return SUPDRV_ERR_NO_MEMORY;
1040 }
1041 }
1042
1043#else /* < 2.4.22 */
1044 paPages = alloc_pages(GFP_USER, cOrder);
1045 if (!paPages)
1046 {
1047 supdrvOSMemFreeOne(pMem);
1048 return SUPDRV_ERR_NO_MEMORY;
1049 }
1050 for (iPage = 0; iPage < cPages; iPage++)
1051 {
1052 papPages[iPage] = &paPages[iPage];
1053 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1054 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1055 if (PageHighMem(papPages[iPage]))
1056 BUG();
1057 }
1058#endif
1059 pMem->u.mem.cPages = cPages;
1060
1061 /*
1062 * Reserve the pages.
1063 */
1064 for (iPage = 0; iPage < cPages; iPage++)
1065 SetPageReserved(papPages[iPage]);
1066
1067 /*
1068 * Create the Ring-0 mapping.
1069 */
1070 if (ppvR0)
1071 {
1072#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1073# ifdef VM_MAP
1074 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_MAP, pgFlags);
1075# else
1076 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_ALLOC, pgFlags);
1077# endif
1078#else
1079 *ppvR0 = pMem->pvR0 = phys_to_virt(page_to_phys(papPages[0]));
1080#endif
1081 }
1082 if (pMem->pvR0 || !ppvR0)
1083 {
1084 /*
1085 * Create the ring3 mapping.
1086 */
1087 if (ppvR3)
1088 *ppvR3 = pMem->pvR3 = VBoxSupDrvMapUser(papPages, cPages, PROT_READ | PROT_WRITE | PROT_EXEC, pgFlags);
1089 if (pMem->pvR3 || !ppvR3)
1090 return 0;
1091 dprintf(("supdrvOSMemAllocOne: failed to map into r3! cPages=%u\n", cPages));
1092 }
1093 else
1094 dprintf(("supdrvOSMemAllocOne: failed to map into r0! cPages=%u\n", cPages));
1095
1096 supdrvOSMemFreeOne(pMem);
1097 return SUPDRV_ERR_NO_MEMORY;
1098}
1099
1100
1101/**
1102 * Get the physical addresses of the pages in the allocation.
1103 * This is called while inside bundle the spinlock.
1104 *
1105 * @param pMem Memory reference record of the memory.
1106 * @param paPages Where to store the page addresses.
1107 */
1108void VBOXCALL supdrvOSMemGetPages(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
1109{
1110 unsigned iPage;
1111 for (iPage = 0; iPage < pMem->u.mem.cPages; iPage++)
1112 {
1113 paPages[iPage].Phys = page_to_phys(pMem->u.mem.papPages[iPage]);
1114 paPages[iPage].uReserved = 0;
1115 }
1116}
1117
1118
1119/**
1120 * Frees memory allocated by supdrvOSMemAllocOne().
1121 *
1122 * @param pMem Memory reference record of the memory to be free.
1123 */
1124void VBOXCALL supdrvOSMemFreeOne(PSUPDRVMEMREF pMem)
1125{
1126 dprintf2(("supdrvOSMemFreeOne: pvR0=%p pvR3=%p cb=%d cPages=%d papPages=%p\n",
1127 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.mem.cPages, pMem->u.mem.papPages));
1128
1129 /*
1130 * Unmap the user mapping (if any).
1131 * do_exit() destroys the mm before closing files.
1132 */
1133 if (pMem->pvR3 && current->mm)
1134 {
1135 down_write(&current->mm->mmap_sem);
1136 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, RT_ALIGN(pMem->cb, PAGE_SIZE));
1137 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1138 }
1139 pMem->pvR3 = NIL_RTR3PTR;
1140
1141 /*
1142 * Unmap the kernel mapping (if any).
1143 */
1144 if (pMem->pvR0)
1145 {
1146#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1147 vunmap(pMem->pvR0);
1148#endif
1149 pMem->pvR0 = NULL;
1150 }
1151
1152 /*
1153 * Free the physical pages.
1154 */
1155 if (pMem->u.mem.papPages)
1156 {
1157 struct page **papPages = pMem->u.mem.papPages;
1158 const unsigned cPages = pMem->u.mem.cPages;
1159 unsigned iPage;
1160
1161 /* Restore the page flags. */
1162 for (iPage = 0; iPage < cPages; iPage++)
1163 {
1164 ClearPageReserved(papPages[iPage]);
1165#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1166 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1167 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, PAGE_KERNEL);
1168#endif
1169 }
1170
1171 /* Free the pages. */
1172#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1173 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1174 __free_page(papPages[iPage]);
1175#else
1176 if (cPages > 0)
1177 __free_pages(papPages[0], VBoxSupDrvOrder(cPages));
1178#endif
1179 /* Free the page pointer array. */
1180 kfree(papPages);
1181 pMem->u.mem.papPages = NULL;
1182 }
1183 pMem->u.mem.cPages = 0;
1184}
1185
1186
1187/**
1188 * Maps a range of pages into user space.
1189 *
1190 * @returns Pointer to the user space mapping on success.
1191 * @returns NULL on failure.
1192 * @param papPages Array of the pages to map.
1193 * @param cPages Number of pages to map.
1194 * @param fProt The mapping protection.
1195 * @param pgFlags The page level protection.
1196 */
1197static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags)
1198{
1199 int rc = SUPDRV_ERR_NO_MEMORY;
1200 unsigned long ulAddr;
1201
1202 /*
1203 * Allocate user space mapping.
1204 */
1205 down_write(&current->mm->mmap_sem);
1206 ulAddr = do_mmap(NULL, 0, cPages * PAGE_SIZE, fProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1207 if (!(ulAddr & ~PAGE_MASK))
1208 {
1209 /*
1210 * Map page by page into the mmap area.
1211 * This is generic, paranoid and not very efficient.
1212 */
1213 int rc = 0;
1214 unsigned long ulAddrCur = ulAddr;
1215 unsigned iPage;
1216 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1217 {
1218#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1219 struct vm_area_struct *vma = find_vma(current->mm, ulAddrCur);
1220 if (!vma)
1221 break;
1222#endif
1223
1224#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1225 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(papPages[iPage]), PAGE_SIZE, pgFlags);
1226#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1227 rc = remap_page_range(vma, ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1228#else /* 2.4 */
1229 rc = remap_page_range(ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1230#endif
1231 if (rc)
1232 break;
1233 }
1234
1235 /*
1236 * Successful?
1237 */
1238 if (iPage >= cPages)
1239 {
1240 up_write(&current->mm->mmap_sem);
1241 return ulAddr;
1242 }
1243
1244 /* no, cleanup! */
1245 if (rc)
1246 dprintf(("VBoxSupDrvMapUser: remap_[page|pfn]_range failed! rc=%d\n", rc));
1247 else
1248 dprintf(("VBoxSupDrvMapUser: find_vma failed!\n"));
1249
1250 MY_DO_MUNMAP(current->mm, ulAddr, cPages << PAGE_SHIFT);
1251 }
1252 else
1253 {
1254 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1255 rc = SUPDRV_ERR_NO_MEMORY;
1256 }
1257 up_write(&current->mm->mmap_sem);
1258
1259 return NIL_RTR3PTR;
1260}
1261
1262
1263/**
1264 * Initializes the GIP.
1265 *
1266 * @returns negative errno.
1267 * @param pDevExt Instance data. GIP stuff may be updated.
1268 */
1269static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt)
1270{
1271 struct page *pPage;
1272 dma_addr_t HCPhys;
1273 PSUPGLOBALINFOPAGE pGip;
1274#ifdef CONFIG_SMP
1275 unsigned i;
1276#endif
1277 dprintf(("VBoxSupDrvInitGip:\n"));
1278
1279 /*
1280 * Allocate the page.
1281 */
1282 pPage = alloc_pages(GFP_USER, 0);
1283 if (!pPage)
1284 {
1285 dprintf(("VBoxSupDrvInitGip: failed to allocate the GIP page\n"));
1286 return -ENOMEM;
1287 }
1288
1289 /*
1290 * Lock the page.
1291 */
1292 SetPageReserved(pPage);
1293 g_pGipPage = pPage;
1294
1295 /*
1296 * Call common initialization routine.
1297 */
1298 HCPhys = page_to_phys(pPage);
1299 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1300 pDevExt->ulLastJiffies = jiffies;
1301#ifdef TICK_NSEC
1302 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1303 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1304 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1305#else
1306 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * (1000000 / HZ);
1307 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%d HZ=%d jiffies=%ld now=%lld\n",
1308 (int)(1000000 / HZ), HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1309#endif
1310 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1311 HZ <= 1000 ? HZ : 1000);
1312
1313 /*
1314 * Initialize the timer.
1315 */
1316 init_timer(&g_GipTimer);
1317 g_GipTimer.data = (unsigned long)pDevExt;
1318 g_GipTimer.function = VBoxSupGipTimer;
1319 g_GipTimer.expires = jiffies;
1320#ifdef CONFIG_SMP
1321 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1322 {
1323 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1324 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1325 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1326 init_timer(&pDevExt->aCPUs[i].Timer);
1327 pDevExt->aCPUs[i].Timer.data = i;
1328 pDevExt->aCPUs[i].Timer.function = VBoxSupGipTimerPerCpu;
1329 pDevExt->aCPUs[i].Timer.expires = jiffies;
1330 }
1331#endif
1332
1333 return 0;
1334}
1335
1336
1337/**
1338 * Terminates the GIP.
1339 *
1340 * @returns negative errno.
1341 * @param pDevExt Instance data. GIP stuff may be updated.
1342 */
1343static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt)
1344{
1345 struct page *pPage;
1346 PSUPGLOBALINFOPAGE pGip;
1347#ifdef CONFIG_SMP
1348 unsigned i;
1349#endif
1350 dprintf(("VBoxSupDrvTermGip:\n"));
1351
1352 /*
1353 * Delete the timer if it's pending.
1354 */
1355 if (timer_pending(&g_GipTimer))
1356 del_timer_sync(&g_GipTimer);
1357#ifdef CONFIG_SMP
1358 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1359 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1360 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1361#endif
1362
1363 /*
1364 * Uninitialize the content.
1365 */
1366 pGip = pDevExt->pGip;
1367 pDevExt->pGip = NULL;
1368 if (pGip)
1369 supdrvGipTerm(pGip);
1370
1371 /*
1372 * Free the page.
1373 */
1374 pPage = g_pGipPage;
1375 g_pGipPage = NULL;
1376 if (pPage)
1377 {
1378 ClearPageReserved(pPage);
1379 __free_pages(pPage, 0);
1380 }
1381
1382 return 0;
1383}
1384
1385/**
1386 * Timer callback function.
1387 *
1388 * In ASYNC TSC mode this is called on the primary CPU, and we're
1389 * assuming that the CPU remains online.
1390 *
1391 * @param ulUser The device extension pointer.
1392 */
1393static void VBoxSupGipTimer(unsigned long ulUser)
1394{
1395 PSUPDRVDEVEXT pDevExt;
1396 PSUPGLOBALINFOPAGE pGip;
1397 unsigned long ulNow;
1398 unsigned long ulDiff;
1399 uint64_t u64Monotime;
1400 unsigned long SavedFlags;
1401
1402 local_irq_save(SavedFlags);
1403
1404 pDevExt = (PSUPDRVDEVEXT)ulUser;
1405 pGip = pDevExt->pGip;
1406 ulNow = jiffies;
1407
1408#ifdef CONFIG_SMP
1409 if (pGip && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1410 {
1411 uint8_t iCPU = ASMGetApicId();
1412 ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1413 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1414#ifdef TICK_NSEC
1415 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1416#else
1417 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1418#endif
1419 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1420 }
1421 else
1422#endif /* CONFIG_SMP */
1423 {
1424 ulDiff = ulNow - pDevExt->ulLastJiffies;
1425 pDevExt->ulLastJiffies = ulNow;
1426#ifdef TICK_NSEC
1427 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1428#else
1429 u64Monotime = pDevExt->u64LastMonotime + ulDiff * (1000000 / HZ);
1430#endif
1431 pDevExt->u64LastMonotime = u64Monotime;
1432 }
1433 if (RT_LIKELY(pGip))
1434 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1435 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1436 mod_timer(&g_GipTimer, ulNow + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1437
1438 local_irq_restore(SavedFlags);
1439}
1440
1441
1442#ifdef CONFIG_SMP
1443/**
1444 * Timer callback function for the other CPUs.
1445 *
1446 * @param iTimerCPU The APIC ID of this timer.
1447 */
1448static void VBoxSupGipTimerPerCpu(unsigned long iTimerCPU)
1449{
1450 PSUPDRVDEVEXT pDevExt;
1451 PSUPGLOBALINFOPAGE pGip;
1452 uint8_t iCPU;
1453 uint64_t u64Monotime;
1454 unsigned long SavedFlags;
1455
1456 local_irq_save(SavedFlags);
1457
1458 pDevExt = &g_DevExt;
1459 pGip = pDevExt->pGip;
1460 iCPU = ASMGetApicId();
1461
1462 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1463 {
1464 if (RT_LIKELY(iTimerCPU == iCPU))
1465 {
1466 unsigned long ulNow = jiffies;
1467 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1468 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1469#ifdef TICK_NSEC
1470 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1471#else
1472 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1473#endif
1474 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1475 if (RT_LIKELY(pGip))
1476 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1477 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1478 mod_timer(&pDevExt->aCPUs[iCPU].Timer, ulNow + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1479 }
1480 else
1481 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d !=? timer-cpuid=%d)\n",
1482 iCPU, iTimerCPU, smp_processor_id(), pDevExt->aCPUs[iTimerCPU].iSmpProcessorId);
1483 }
1484 else
1485 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%d cpuid=%d\n",
1486 iCPU, RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1487
1488 local_irq_restore(SavedFlags);
1489}
1490#endif /* CONFIG_SMP */
1491
1492
1493/**
1494 * Maps the GIP into user space.
1495 *
1496 * @returns negative errno.
1497 * @param pDevExt Instance data.
1498 */
1499int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE *ppGip)
1500{
1501 int rc = 0;
1502 unsigned long ulAddr;
1503 unsigned long HCPhys = pDevExt->HCPhysGip;
1504 pgprot_t pgFlags;
1505 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1506 dprintf2(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1507
1508 /*
1509 * Allocate user space mapping and put the physical pages into it.
1510 */
1511 down_write(&current->mm->mmap_sem);
1512 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1513 if (!(ulAddr & ~PAGE_MASK))
1514 {
1515#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1516 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1517#else
1518 int rc2 = 0;
1519 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1520 if (vma)
1521#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1522 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1523#else
1524 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1525#endif
1526 else
1527 {
1528 rc = SUPDRV_ERR_NO_MEMORY;
1529 dprintf(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1530 }
1531#endif
1532 if (rc2)
1533 {
1534 rc = SUPDRV_ERR_NO_MEMORY;
1535 dprintf(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1536 }
1537 }
1538 else
1539 {
1540 dprintf(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1541 rc = SUPDRV_ERR_NO_MEMORY;
1542 }
1543 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1544
1545 /*
1546 * Success?
1547 */
1548 if (!rc)
1549 {
1550 *ppGip = (PCSUPGLOBALINFOPAGE)ulAddr;
1551 dprintf2(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1552 return 0;
1553 }
1554
1555 /*
1556 * Failure, cleanup and be gone.
1557 */
1558 if (ulAddr & ~PAGE_MASK)
1559 {
1560 down_write(&current->mm->mmap_sem);
1561 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1562 up_write(&current->mm->mmap_sem);
1563 }
1564
1565 dprintf2(("supdrvOSGipMap: returns %d\n", rc));
1566 return rc;
1567}
1568
1569
1570/**
1571 * Maps the GIP into user space.
1572 *
1573 * @returns negative errno.
1574 * @param pDevExt Instance data.
1575 */
1576int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE pGip)
1577{
1578 dprintf2(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1579 if (current->mm)
1580 {
1581 down_write(&current->mm->mmap_sem);
1582 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1583 up_write(&current->mm->mmap_sem);
1584 }
1585 dprintf2(("supdrvOSGipUnmap: returns 0\n"));
1586 return 0;
1587}
1588
1589
1590/**
1591 * Resumes the GIP updating.
1592 *
1593 * @param pDevExt Instance data.
1594 */
1595void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1596{
1597 dprintf2(("supdrvOSGipResume:\n"));
1598 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, false);
1599#ifdef CONFIG_SMP
1600 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1601#endif
1602 mod_timer(&g_GipTimer, jiffies);
1603#ifdef CONFIG_SMP
1604 else
1605 {
1606 mod_timer(&g_GipTimer, jiffies);
1607 smp_call_function(VBoxSupGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1608 }
1609#endif
1610}
1611
1612
1613#ifdef CONFIG_SMP
1614/**
1615 * Callback for resuming GIP updating on the other CPUs.
1616 *
1617 * This is only used when the GIP is in async tsc mode.
1618 *
1619 * @param pvUser Pointer to the device instance.
1620 */
1621static void VBoxSupGipResumePerCpu(void *pvUser)
1622{
1623 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1624 uint8_t iCPU = ASMGetApicId();
1625
1626 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1627 {
1628 printk("vboxdrv: error: apicid=%d max=%d cpuid=%d\n",
1629 iCPU, RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1630 return;
1631 }
1632
1633 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1634 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies);
1635}
1636#endif /* CONFIG_SMP */
1637
1638
1639/**
1640 * Suspends the GIP updating.
1641 *
1642 * @param pDevExt Instance data.
1643 */
1644void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1645{
1646#ifdef CONFIG_SMP
1647 unsigned i;
1648#endif
1649 dprintf2(("supdrvOSGipSuspend:\n"));
1650 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, true);
1651
1652 if (timer_pending(&g_GipTimer))
1653 del_timer_sync(&g_GipTimer);
1654#ifdef CONFIG_SMP
1655 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1656 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1657 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1658#endif
1659}
1660
1661
1662/**
1663 * Get the current CPU count.
1664 * @returns Number of cpus.
1665 */
1666unsigned VBOXCALL supdrvOSGetCPUCount(void)
1667{
1668#ifdef CONFIG_SMP
1669# ifdef num_present_cpus
1670 return num_present_cpus();
1671# else
1672 return smp_num_cpus;
1673# endif
1674#else
1675 return 1;
1676#endif
1677}
1678
1679/**
1680 * Force async tsc mode.
1681 * @todo add a module argument for this.
1682 */
1683bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1684{
1685 return false;
1686}
1687
1688
1689/**
1690 * Converts a supdrv error code to an linux error code.
1691 *
1692 * @returns corresponding linux error code.
1693 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1694 */
1695static int VBoxSupDrvErr2LinuxErr(int rc)
1696{
1697 switch (rc)
1698 {
1699 case 0: return 0;
1700 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1701 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1702 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1703 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1704 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1705 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1706 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1707 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1708 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1709 case SUPDRV_ERR_IDT_FAILED: return -1000;
1710 }
1711
1712 return -EPERM;
1713}
1714
1715
1716RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1717{
1718#if 1
1719 va_list args;
1720 char szMsg[512];
1721
1722 va_start(args, pszFormat);
1723 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1724 szMsg[sizeof(szMsg) - 1] = '\0';
1725 printk("%s", szMsg);
1726 va_end(args);
1727#else
1728 /* forward to printf - needs some more GCC hacking to fix ebp... */
1729 __asm__ __volatile__ ("mov %0, %esp\n\t"
1730 "jmp %1\n\t",
1731 :: "r" ((uintptr_t)&pszFormat - 4),
1732 "m" (printk));
1733#endif
1734 return 0;
1735}
1736
1737
1738/** Runtime assert implementation for Linux Ring-0. */
1739RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1740{
1741 printk("!!Assertion Failed!!\n"
1742 "Expression: %s\n"
1743 "Location : %s(%d) %s\n",
1744 pszExpr, pszFile, uLine, pszFunction);
1745}
1746
1747
1748/** Runtime assert implementation for Linux Ring-0. */
1749RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1750{ /* forwarder. */
1751 va_list ap;
1752 char msg[256];
1753
1754 va_start(ap, pszFormat);
1755 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1756 msg[sizeof(msg) - 1] = '\0';
1757 printk("%s", msg);
1758 va_end(ap);
1759}
1760
1761
1762/* GCC C++ hack. */
1763unsigned __gxx_personality_v0 = 0xcccccccc;
1764
1765
1766module_init(VBoxSupDrvInit);
1767module_exit(VBoxSupDrvUnload);
1768
1769MODULE_AUTHOR("innotek GmbH");
1770MODULE_DESCRIPTION("VirtualBox Support Driver");
1771MODULE_LICENSE("GPL");
1772#ifdef MODULE_VERSION
1773#define xstr(s) str(s)
1774#define str(s) #s
1775MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1776#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette