VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 1666

Last change on this file since 1666 was 1666, checked in by vboxsync, 18 years ago

make VBOX_VERSION visible with modinfo

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 50.1 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21/*******************************************************************************
22* Header Files *
23*******************************************************************************/
24#include "SUPDRV.h"
25#include "version-generated.h"
26
27#include <iprt/assert.h>
28#include <iprt/spinlock.h>
29#include <iprt/semaphore.h>
30#include <iprt/initterm.h>
31#include <iprt/process.h>
32#include <iprt/err.h>
33#include <iprt/mem.h>
34
35#include <linux/module.h>
36#include <linux/kernel.h>
37#include <linux/init.h>
38#include <linux/fs.h>
39#include <linux/mm.h>
40#include <linux/pagemap.h>
41#include <linux/slab.h>
42#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
43# include <linux/jiffies.h>
44#endif
45#include <asm/mman.h>
46#include <asm/io.h>
47#include <asm/uaccess.h>
48#ifdef CONFIG_DEVFS_FS
49# include <linux/devfs_fs_kernel.h>
50#endif
51#ifdef CONFIG_VBOXDRV_AS_MISC
52# include <linux/miscdevice.h>
53#endif
54#ifdef CONFIG_X86_LOCAL_APIC
55# include <asm/apic.h>
56# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
57# include <asm/nmi.h>
58# endif
59#endif
60
61#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
62# ifndef page_to_pfn
63# define page_to_pfn(page) ((page) - mem_map)
64# endif
65# include <asm/pgtable.h>
66# define global_flush_tlb __flush_tlb_global
67#endif
68
69/* devfs defines */
70#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
71# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
72
73# define VBOX_REGISTER_DEVFS() \
74({ \
75 void *rc = NULL; \
76 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
77 S_IFCHR | S_IRUGO | S_IWUGO, \
78 DEVICE_NAME) == 0) \
79 rc = (void *)' '; /* return not NULL */ \
80 rc; \
81 })
82
83# define VBOX_UNREGISTER_DEVFS(handle) \
84 devfs_remove(DEVICE_NAME);
85
86# else /* < 2.6.0 */
87
88# define VBOX_REGISTER_DEVFS() \
89 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
90 DEVICE_MAJOR, 0, \
91 S_IFCHR | S_IRUGO | S_IWUGO, \
92 &gFileOpsVBoxDrv, NULL)
93
94# define VBOX_UNREGISTER_DEVFS(handle) \
95 if (handle != NULL) \
96 devfs_unregister(handle)
97
98# endif /* < 2.6.0 */
99#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
100
101#ifndef CONFIG_VBOXDRV_AS_MISC
102# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
103# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
104# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
105# else
106# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
107# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
108# endif
109#endif /* !CONFIG_VBOXDRV_AS_MISC */
110
111
112#ifdef CONFIG_X86_HIGH_ENTRY
113# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
114#endif
115
116/*
117 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
118 */
119#if defined(__AMD64__)
120# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
121#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
122# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
123#else
124# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
125#endif
126
127/*
128 * The redhat hack section.
129 * - The current hacks are for 2.4.21-15.EL only.
130 */
131#ifndef NO_REDHAT_HACKS
132/* accounting. */
133# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
134# ifdef VM_ACCOUNT
135# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
136# endif
137# endif
138
139/* backported remap_page_range. */
140# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
141# include <asm/tlb.h>
142# ifdef tlb_vma /* probably not good enough... */
143# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
144# endif
145# endif
146
147# ifndef __AMD64__
148/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
149 * the page attributes from PAGE_KERNEL to something else, because there appears
150 * to be a bug in one of the many patches that redhat applied.
151 * It should be safe to do this on less buggy linux kernels too. ;-)
152 */
153# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
154 do { \
155 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
156 change_page_attr(pPages, cPages, prot); \
157 change_page_attr(pPages, cPages, prot); \
158 } while (0)
159# endif
160#endif /* !NO_REDHAT_HACKS */
161
162
163#ifndef MY_DO_MUNMAP
164# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
165#endif
166
167#ifndef MY_CHANGE_PAGE_ATTR
168# ifdef __AMD64__ /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr(). */
169# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
170 do { \
171 change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
172 change_page_attr(pPages, cPages, prot); \
173 } while (0)
174# else
175# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
176# endif
177#endif
178
179
180/** @def ONE_MSEC_IN_JIFFIES
181 * The number of jiffies that make up 1 millisecond. This is only actually used
182 * when HZ is > 1000. */
183#if HZ <= 1000
184# define ONE_MSEC_IN_JIFFIES 0
185#elif !(HZ % 1000)
186# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
187#else
188# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
189# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
190#endif
191
192
193/*******************************************************************************
194* Defined Constants And Macros *
195*******************************************************************************/
196/**
197 * Device extention & session data association structure.
198 */
199static SUPDRVDEVEXT g_DevExt;
200
201/** Timer structure for the GIP update. */
202static struct timer_list g_GipTimer;
203/** Pointer to the page structure for the GIP. */
204struct page *g_pGipPage;
205
206/** Registered devfs device handle. */
207#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
208# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
209static void *g_hDevFsVBoxDrv = NULL;
210# else
211static devfs_handle_t g_hDevFsVBoxDrv = NULL;
212# endif
213#endif
214
215#ifndef CONFIG_VBOXDRV_AS_MISC
216/** Module major number */
217#define DEVICE_MAJOR 234
218/** Saved major device number */
219static int g_iModuleMajor;
220#endif /* !CONFIG_VBOXDRV_AS_MISC */
221
222/** The module name. */
223#define DEVICE_NAME "vboxdrv"
224
225#ifdef __AMD64__
226/**
227 * Memory for the executable memory heap (in IPRT).
228 */
229extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
230__asm__(".section execmemory, \"awx\", @progbits\n\t"
231 ".align 32\n\t"
232 ".globl g_abExecMemory\n"
233 "g_abExecMemory:\n\t"
234 ".zero 1572864\n\t"
235 ".type g_abExecMemory, @object\n\t"
236 ".size g_abExecMemory, 1572864\n\t"
237 ".text\n\t");
238#endif
239
240
241/*******************************************************************************
242* Internal Functions *
243*******************************************************************************/
244static int VBoxSupDrvInit(void);
245static void VBoxSupDrvUnload(void);
246static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp);
247static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp);
248static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
249 unsigned int IOCmd, unsigned long IOArg);
250static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags);
251static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt);
252static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt);
253static void VBoxSupGipTimer(unsigned long ulUser);
254#ifdef CONFIG_SMP
255static void VBoxSupGipTimerPerCpu(unsigned long ulUser);
256static void VBoxSupGipResumePerCpu(void *pvUser);
257#endif
258static int VBoxSupDrvOrder(unsigned long size);
259static int VBoxSupDrvErr2LinuxErr(int);
260
261
262/** The file_operations structure. */
263static struct file_operations gFileOpsVBoxDrv =
264{
265 owner: THIS_MODULE,
266 open: VBoxSupDrvCreate,
267 release: VBoxSupDrvClose,
268 ioctl: VBoxSupDrvDeviceControl,
269};
270
271#ifdef CONFIG_VBOXDRV_AS_MISC
272/** The miscdevice structure. */
273static struct miscdevice gMiscDevice =
274{
275 minor: MISC_DYNAMIC_MINOR,
276 name: DEVICE_NAME,
277 fops: &gFileOpsVBoxDrv,
278# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
279 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
280 devfs_name: DEVICE_NAME,
281# endif
282};
283#endif
284
285
286/**
287 * Initialize module.
288 *
289 * @returns appropriate status code.
290 */
291static int __init VBoxSupDrvInit(void)
292{
293 int rc;
294
295 dprintf(("VBoxDrv::ModuleInit\n"));
296
297#ifdef CONFIG_X86_LOCAL_APIC
298 /*
299 * If an NMI occurs while we are inside the world switcher the macine will crash.
300 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
301 * compared with another counter increased in the timer interrupt handler. Therefore
302 * we don't allow to setup an NMI watchdog.
303 */
304# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
305 /*
306 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
307 * the nmi_watchdog variable.
308 */
309# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
310 /*
311 * Permanent IO_APIC mode active? No way to handle this!
312 */
313 if (nmi_watchdog == NMI_IO_APIC)
314 {
315 printk(KERN_ERR DEVICE_NAME
316 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
317 DEVICE_NAME
318 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
319 DEVICE_NAME
320 ": command line.\n");
321 return -EINVAL;
322 }
323
324 /*
325 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
326 */
327 atomic_set(&nmi_active, -1);
328 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate NMI watchdog permanently...\n");
329
330 /*
331 * Now fall through and see if it actually was enabled before. If so, fail
332 * as we cannot deactivate it cleanly from here.
333 */
334# else /* < 2.6.19 */
335 /*
336 * Older 2.6 kernels: nmi_watchdog is not initalized by default
337 */
338 if (nmi_watchdog != NMI_NONE)
339 goto nmi_activated;
340# endif
341# endif /* >= 2.6.0 */
342
343 /*
344 * Second test: Interrupt generated by performance counter not masked and can
345 * generate an NMI. Works also with Linux 2.4.
346 */
347 {
348 unsigned int v, ver, maxlvt;
349
350 v = apic_read(APIC_LVR);
351 ver = GET_APIC_VERSION(v);
352 /* 82489DXs do not report # of LVT entries. */
353 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
354 if (maxlvt >= 4)
355 {
356 /* Read status of performance counter IRQ vector */
357 v = apic_read(APIC_LVTPC);
358
359 /* performance counter generates NMI and is not masked? */
360 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
361 {
362# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
363 printk(KERN_ERR DEVICE_NAME
364 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
365 DEVICE_NAME
366 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
367 return -EINVAL;
368# else /* < 2.6.19 */
369# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
370nmi_activated:
371# endif
372 printk(KERN_ERR DEVICE_NAME
373 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
374 DEVICE_NAME
375 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
376 return -EINVAL;
377# endif /* >= 2.6.19 */
378 }
379 }
380 }
381# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
382 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
383# endif /* >= 2.6.19 */
384#endif /* CONFIG_X86_LOCAL_APIC */
385
386#ifdef CONFIG_VBOXDRV_AS_MISC
387 rc = misc_register(&gMiscDevice);
388 if (rc)
389 {
390 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
391 return rc;
392 }
393#else /* !CONFIG_VBOXDRV_AS_MISC */
394 /*
395 * Register character device.
396 */
397 g_iModuleMajor = DEVICE_MAJOR;
398 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
399 if (rc < 0)
400 {
401 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
402 return rc;
403 }
404
405 /*
406 * Save returned module major number
407 */
408 if (DEVICE_MAJOR != 0)
409 g_iModuleMajor = DEVICE_MAJOR;
410 else
411 g_iModuleMajor = rc;
412 rc = 0;
413
414#ifdef CONFIG_DEVFS_FS
415 /*
416 * Register a device entry
417 */
418 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
419 if (g_hDevFsVBoxDrv == NULL)
420 {
421 dprintf(("devfs_register failed!\n"));
422 rc = -EINVAL;
423 }
424#endif
425#endif /* !CONFIG_VBOXDRV_AS_MISC */
426 if (!rc)
427 {
428 /*
429 * Initialize the runtime.
430 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
431 */
432 rc = RTR0Init(0);
433 if (RT_SUCCESS(rc))
434 {
435#ifdef __AMD64__
436 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
437#endif
438 /*
439 * Initialize the device extension.
440 */
441 if (RT_SUCCESS(rc))
442 rc = supdrvInitDevExt(&g_DevExt);
443 if (!rc)
444 {
445 /*
446 * Create the GIP page.
447 */
448 rc = VBoxSupDrvInitGip(&g_DevExt);
449 if (!rc)
450 {
451 dprintf(("VBoxDrv::ModuleInit returning %#x\n", rc));
452 return rc;
453 }
454
455 supdrvDeleteDevExt(&g_DevExt);
456 }
457 else
458 rc = -EINVAL;
459 RTR0Term();
460 }
461 else
462 rc = -EINVAL;
463
464 /*
465 * Failed, cleanup and return the error code.
466 */
467#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
468 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
469#endif
470 }
471#ifdef CONFIG_VBOXDRV_AS_MISC
472 misc_deregister(&gMiscDevice);
473 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
474#else
475 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
476 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
477#endif
478 return rc;
479}
480
481
482/**
483 * Unload the module.
484 */
485static void __exit VBoxSupDrvUnload(void)
486{
487 int rc;
488 dprintf(("VBoxSupDrvUnload\n"));
489
490 /*
491 * I Don't think it's possible to unload a driver which processes have
492 * opened, at least we'll blindly assume that here.
493 */
494#ifdef CONFIG_VBOXDRV_AS_MISC
495 rc = misc_deregister(&gMiscDevice);
496 if (rc < 0)
497 {
498 dprintf(("misc_deregister failed with rc=%#x\n", rc));
499 }
500#else /* !CONFIG_VBOXDRV_AS_MISC */
501#ifdef CONFIG_DEVFS_FS
502 /*
503 * Unregister a device entry
504 */
505 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
506#endif // devfs
507 rc = VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
508 if (rc < 0)
509 {
510 dprintf(("unregister_chrdev failed with rc=%#x (major:%d)\n", rc, g_iModuleMajor));
511 }
512#endif /* !CONFIG_VBOXDRV_AS_MISC */
513
514 /*
515 * Destroy GIP, delete the device extension and terminate IPRT.
516 */
517 VBoxSupDrvTermGip(&g_DevExt);
518 supdrvDeleteDevExt(&g_DevExt);
519 RTR0Term();
520}
521
522
523/**
524 * Device open. Called on open /dev/vboxdrv
525 *
526 * @param pInode Pointer to inode info structure.
527 * @param pFilp Associated file pointer.
528 */
529static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp)
530{
531 int rc;
532 PSUPDRVSESSION pSession;
533 dprintf(("VBoxSupDrvCreate: pFilp=%p\n", pFilp));
534
535 /*
536 * Call common code for the rest.
537 */
538 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
539 if (!rc)
540 {
541 pSession->Uid = current->euid;
542 pSession->Gid = current->egid;
543 pSession->Process = RTProcSelf();
544 pSession->R0Process = RTR0ProcHandleSelf();
545 }
546
547 dprintf(("VBoxSupDrvCreate: g_DevExt=%p pSession=%p rc=%d\n", &g_DevExt, pSession, rc));
548 pFilp->private_data = pSession;
549
550 return VBoxSupDrvErr2LinuxErr(rc);
551}
552
553
554/**
555 * Close device.
556 *
557 * @param pInode Pointer to inode info structure.
558 * @param pFilp Associated file pointer.
559 */
560static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp)
561{
562 dprintf(("VBoxSupDrvClose: pFilp=%p private_data=%p\n", pFilp, pFilp->private_data));
563 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
564 pFilp->private_data = NULL;
565 return 0;
566}
567
568
569/**
570 * Device I/O Control entry point.
571 *
572 * @param pInode Pointer to inode info structure.
573 * @param pFilp Associated file pointer.
574 * @param IOCmd The function specified to ioctl().
575 * @param IOArg The argument specified to ioctl().
576 */
577static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
578 unsigned int IOCmd, unsigned long IOArg)
579{
580 int rc;
581 SUPDRVIOCTLDATA Args;
582 void *pvBuf = NULL;
583 int cbBuf = 0;
584 unsigned cbOut = 0;
585
586 dprintf2(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p\n", pFilp, IOCmd, (void *)IOArg));
587
588 /*
589 * Copy ioctl data structure from user space.
590 */
591 if (_IOC_SIZE(IOCmd) != sizeof(SUPDRVIOCTLDATA))
592 {
593 dprintf(("VBoxSupDrvDeviceControl: incorrect input length! cbArgs=%d\n", _IOC_SIZE(IOCmd)));
594 return -EINVAL;
595 }
596 if (copy_from_user(&Args, (void *)IOArg, _IOC_SIZE(IOCmd)))
597 {
598 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(&Args) failed.\n"));
599 return -EFAULT;
600 }
601
602 /*
603 * Allocate and copy user space input data buffer to kernel space.
604 */
605 if (Args.cbIn > 0 || Args.cbOut > 0)
606 {
607 cbBuf = max(Args.cbIn, Args.cbOut);
608 pvBuf = vmalloc(cbBuf);
609 if (pvBuf == NULL)
610 {
611 dprintf(("VBoxSupDrvDeviceControl: failed to allocate buffer of %d bytes.\n", cbBuf));
612 return -ENOMEM;
613 }
614
615 if (copy_from_user(pvBuf, (void *)Args.pvIn, Args.cbIn))
616 {
617 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(pvBuf) failed.\n"));
618 vfree(pvBuf);
619 return -EFAULT;
620 }
621 }
622
623 /*
624 * Process the IOCtl.
625 */
626 rc = supdrvIOCtl(IOCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data,
627 pvBuf, Args.cbIn, pvBuf, Args.cbOut, &cbOut);
628
629 /*
630 * Copy ioctl data and output buffer back to user space.
631 */
632 if (rc)
633 {
634 dprintf(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p failed, rc=%d (linux rc=%d)\n",
635 pFilp, IOCmd, (void *)IOArg, rc, VBoxSupDrvErr2LinuxErr(rc)));
636 rc = VBoxSupDrvErr2LinuxErr(rc);
637 }
638 else if (cbOut > 0)
639 {
640 if (pvBuf != NULL && cbOut <= cbBuf)
641 {
642 if (copy_to_user((void *)Args.pvOut, pvBuf, cbOut))
643 {
644 dprintf(("copy_to_user failed.\n"));
645 rc = -EFAULT;
646 }
647 }
648 else
649 {
650 dprintf(("WHAT!?! supdrvIOCtl messed up! cbOut=%d cbBuf=%d pvBuf=%p\n", cbOut, cbBuf, pvBuf));
651 rc = -EPERM;
652 }
653 }
654
655 if (pvBuf)
656 vfree(pvBuf);
657
658 dprintf2(("VBoxSupDrvDeviceControl: returns %d\n", rc));
659 return rc;
660}
661
662
663/**
664 * Initializes any OS specific object creator fields.
665 */
666void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
667{
668 NOREF(pObj);
669 NOREF(pSession);
670}
671
672
673/**
674 * Checks if the session can access the object.
675 *
676 * @returns true if a decision has been made.
677 * @returns false if the default access policy should be applied.
678 *
679 * @param pObj The object in question.
680 * @param pSession The session wanting to access the object.
681 * @param pszObjName The object name, can be NULL.
682 * @param prc Where to store the result when returning true.
683 */
684bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
685{
686 NOREF(pObj);
687 NOREF(pSession);
688 NOREF(pszObjName);
689 NOREF(prc);
690 return false;
691}
692
693
694/**
695 * Compute order. Some functions allocate 2^order pages.
696 *
697 * @returns order.
698 * @param cPages Number of pages.
699 */
700static int VBoxSupDrvOrder(unsigned long cPages)
701{
702 int iOrder;
703 unsigned long cTmp;
704
705 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
706 ;
707 if (cPages & ~(1 << iOrder))
708 ++iOrder;
709
710 return iOrder;
711}
712
713
714/**
715 * OS Specific code for locking down memory.
716 *
717 * @returns 0 on success.
718 * @returns SUPDRV_ERR_* on failure.
719 * @param pMem Pointer to memory.
720 * This is not linked in anywhere.
721 * @param paPages Array which should be filled with the address of the physical pages.
722 *
723 * @remark See sgl_map_user_pages() for an example of an similar function.
724 */
725int VBOXCALL supdrvOSLockMemOne(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
726{
727 int rc;
728 struct page **papPages;
729 unsigned iPage;
730 unsigned cPages = pMem->cb >> PAGE_SHIFT;
731 unsigned long pv = (unsigned long)pMem->pvR3;
732 struct vm_area_struct **papVMAs;
733
734 /*
735 * Allocate page pointer array.
736 */
737 papPages = vmalloc(cPages * sizeof(*papPages));
738 if (!papPages)
739 return SUPDRV_ERR_NO_MEMORY;
740
741 /*
742 * Allocate the VMA pointer array.
743 */
744 papVMAs = vmalloc(cPages * sizeof(*papVMAs));
745 if (!papVMAs)
746 return SUPDRV_ERR_NO_MEMORY;
747
748 /*
749 * Get user pages.
750 */
751 down_read(&current->mm->mmap_sem);
752 rc = get_user_pages(current, /* Task for fault acounting. */
753 current->mm, /* Whose pages. */
754 (unsigned long)pv, /* Where from. */
755 cPages, /* How many pages. */
756 1, /* Write to memory. */
757 0, /* force. */
758 papPages, /* Page array. */
759 papVMAs); /* vmas */
760 if (rc != cPages)
761 {
762 up_read(&current->mm->mmap_sem);
763 dprintf(("supdrvOSLockMemOne: get_user_pages failed. rc=%d\n", rc));
764 return SUPDRV_ERR_LOCK_FAILED;
765 }
766
767 for (iPage = 0; iPage < cPages; iPage++)
768 flush_dcache_page(papPages[iPage]);
769 up_read(&current->mm->mmap_sem);
770
771 pMem->u.locked.papPages = papPages;
772 pMem->u.locked.cPages = cPages;
773
774 /*
775 * Get addresses, protect against fork()
776 */
777 for (iPage = 0; iPage < cPages; iPage++)
778 {
779 paPages[iPage].Phys = page_to_phys(papPages[iPage]);
780 paPages[iPage].uReserved = 0;
781 papVMAs[iPage]->vm_flags |= VM_DONTCOPY;
782 }
783
784 vfree(papVMAs);
785
786 dprintf2(("supdrvOSLockMemOne: pvR3=%p cb=%d papPages=%p\n",
787 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
788 return 0;
789}
790
791
792/**
793 * Unlocks the memory pointed to by pv.
794 *
795 * @param pv Memory to unlock.
796 * @param cb Size of the memory (debug).
797 *
798 * @remark See sgl_unmap_user_pages() for an example of an similar function.
799 */
800void VBOXCALL supdrvOSUnlockMemOne(PSUPDRVMEMREF pMem)
801{
802 unsigned iPage;
803 dprintf2(("supdrvOSUnlockMemOne: pvR3=%p cb=%d papPages=%p\n",
804 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
805
806 /*
807 * Loop thru the pages and release them.
808 */
809 for (iPage = 0; iPage < pMem->u.locked.cPages; iPage++)
810 {
811 if (!PageReserved(pMem->u.locked.papPages[iPage]))
812 SetPageDirty(pMem->u.locked.papPages[iPage]);
813 page_cache_release(pMem->u.locked.papPages[iPage]);
814 }
815
816 /* free the page array */
817 vfree(pMem->u.locked.papPages);
818 pMem->u.locked.cPages = 0;
819}
820
821
822/**
823 * OS Specific code for allocating page aligned memory with continuous fixed
824 * physical paged backing.
825 *
826 * @returns 0 on success.
827 * @returns SUPDRV_ERR_* on failure.
828 * @param pMem Memory reference record of the memory to be allocated.
829 * (This is not linked in anywhere.)
830 * @param ppvR0 Where to store the virtual address of the ring-0 mapping. (optional)
831 * @param ppvR3 Where to store the virtual address of the ring-3 mapping.
832 * @param pHCPhys Where to store the physical address.
833 */
834int VBOXCALL supdrvOSContAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
835{
836 struct page *paPages;
837 unsigned iPage;
838 unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
839 unsigned cPages = cbAligned >> PAGE_SHIFT;
840 unsigned cOrder = VBoxSupDrvOrder(cPages);
841 unsigned long ulAddr;
842 dma_addr_t HCPhys;
843 int rc = 0;
844 pgprot_t pgFlags;
845 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
846
847 Assert(ppvR3);
848 Assert(pHCPhys);
849
850 /*
851 * Allocate page pointer array.
852 */
853#ifdef __AMD64__ /** @todo check out if there is a correct way of getting memory below 4GB (physically). */
854 paPages = alloc_pages(GFP_DMA, cOrder);
855#else
856 paPages = alloc_pages(GFP_USER, cOrder);
857#endif
858 if (!paPages)
859 return SUPDRV_ERR_NO_MEMORY;
860
861 /*
862 * Lock the pages.
863 */
864 for (iPage = 0; iPage < cPages; iPage++)
865 {
866 SetPageReserved(&paPages[iPage]);
867 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
868 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
869#ifdef DEBUG
870 if (iPage + 1 < cPages && (page_to_phys((&paPages[iPage])) + 0x1000) != page_to_phys((&paPages[iPage + 1])))
871 {
872 dprintf(("supdrvOSContAllocOne: Pages are not continuous!!!! iPage=%d phys=%llx physnext=%llx\n",
873 iPage, (long long)page_to_phys((&paPages[iPage])), (long long)page_to_phys((&paPages[iPage + 1]))));
874 BUG();
875 }
876#endif
877 }
878 HCPhys = page_to_phys(paPages);
879
880 /*
881 * Allocate user space mapping and put the physical pages into it.
882 */
883 down_write(&current->mm->mmap_sem);
884 ulAddr = do_mmap(NULL, 0, cbAligned, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0);
885 if (!(ulAddr & ~PAGE_MASK))
886 {
887#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
888 int rc2 = remap_page_range(ulAddr, HCPhys, cbAligned, pgFlags);
889#else
890 int rc2 = 0;
891 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
892 if (vma)
893#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
894 rc2 = remap_page_range(vma, ulAddr, HCPhys, cbAligned, pgFlags);
895#else
896 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, cbAligned, pgFlags);
897#endif
898 else
899 {
900 rc = SUPDRV_ERR_NO_MEMORY;
901 dprintf(("supdrvOSContAllocOne: no vma found for ulAddr=%#lx!\n", ulAddr));
902 }
903#endif
904 if (rc2)
905 {
906 rc = SUPDRV_ERR_NO_MEMORY;
907 dprintf(("supdrvOSContAllocOne: remap_page_range failed rc2=%d\n", rc2));
908 }
909 }
910 else
911 {
912 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
913 rc = SUPDRV_ERR_NO_MEMORY;
914 }
915 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
916
917 /*
918 * Success?
919 */
920 if (!rc)
921 {
922 *pHCPhys = HCPhys;
923 *ppvR3 = ulAddr;
924 if (ppvR0)
925 *ppvR0 = (void *)ulAddr;
926 pMem->pvR3 = ulAddr;
927 pMem->pvR0 = NULL;
928 pMem->u.cont.paPages = paPages;
929 pMem->u.cont.cPages = cPages;
930 pMem->cb = cbAligned;
931
932 dprintf2(("supdrvOSContAllocOne: pvR0=%p pvR3=%p cb=%d paPages=%p *pHCPhys=%lx *ppvR0=*ppvR3=%p\n",
933 pMem->pvR0, pMem->pvR3, pMem->cb, paPages, (unsigned long)*pHCPhys, *ppvR3));
934 global_flush_tlb();
935 return 0;
936 }
937
938 /*
939 * Failure, cleanup and be gone.
940 */
941 down_write(&current->mm->mmap_sem);
942 if (ulAddr & ~PAGE_MASK)
943 MY_DO_MUNMAP(current->mm, ulAddr, pMem->cb);
944 for (iPage = 0; iPage < cPages; iPage++)
945 {
946 ClearPageReserved(&paPages[iPage]);
947 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
948 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, PAGE_KERNEL);
949 }
950 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
951 __free_pages(paPages, cOrder);
952
953 global_flush_tlb();
954 return rc;
955}
956
957
958/**
959 * Frees contiguous memory.
960 *
961 * @param pMem Memory reference record of the memory to be freed.
962 */
963void VBOXCALL supdrvOSContFreeOne(PSUPDRVMEMREF pMem)
964{
965 unsigned iPage;
966
967 dprintf2(("supdrvOSContFreeOne: pvR0=%p pvR3=%p cb=%d paPages=%p\n",
968 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.cont.paPages));
969
970 /*
971 * do_exit() destroys the mm before closing files.
972 * I really hope it cleans up our stuff properly...
973 */
974 if (current->mm)
975 {
976 down_write(&current->mm->mmap_sem);
977 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, pMem->cb);
978 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
979 }
980
981 /*
982 * Change page attributes freeing the pages.
983 */
984 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
985 {
986 ClearPageReserved(&pMem->u.cont.paPages[iPage]);
987 if (!PageHighMem(&pMem->u.cont.paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
988 MY_CHANGE_PAGE_ATTR(&pMem->u.cont.paPages[iPage], 1, PAGE_KERNEL);
989 }
990 __free_pages(pMem->u.cont.paPages, VBoxSupDrvOrder(pMem->u.cont.cPages));
991
992 pMem->u.cont.cPages = 0;
993}
994
995
996/**
997 * Allocates memory which mapped into both kernel and user space.
998 * The returned memory is page aligned and so is the allocation.
999 *
1000 * @returns 0 on success.
1001 * @returns SUPDRV_ERR_* on failure.
1002 * @param pMem Memory reference record of the memory to be allocated.
1003 * (This is not linked in anywhere.)
1004 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1005 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1006 */
1007int VBOXCALL supdrvOSMemAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1008{
1009 const unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1010 const unsigned cPages = cbAligned >> PAGE_SHIFT;
1011#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1012 unsigned cOrder = VBoxSupDrvOrder(cPages);
1013 struct page *paPages;
1014#endif
1015 struct page **papPages;
1016 unsigned iPage;
1017 pgprot_t pgFlags;
1018 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1019
1020 /*
1021 * Allocate array with page pointers.
1022 */
1023 pMem->u.mem.cPages = 0;
1024 pMem->u.mem.papPages = papPages = kmalloc(sizeof(papPages[0]) * cPages, GFP_KERNEL);
1025 if (!papPages)
1026 return SUPDRV_ERR_NO_MEMORY;
1027
1028 /*
1029 * Allocate the pages.
1030 */
1031#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1032 for (iPage = 0; iPage < cPages; iPage++)
1033 {
1034 papPages[iPage] = alloc_page(GFP_HIGHUSER);
1035 if (!papPages[iPage])
1036 {
1037 pMem->u.mem.cPages = iPage;
1038 supdrvOSMemFreeOne(pMem);
1039 return SUPDRV_ERR_NO_MEMORY;
1040 }
1041 }
1042
1043#else /* < 2.4.22 */
1044 paPages = alloc_pages(GFP_USER, cOrder);
1045 if (!paPages)
1046 {
1047 supdrvOSMemFreeOne(pMem);
1048 return SUPDRV_ERR_NO_MEMORY;
1049 }
1050 for (iPage = 0; iPage < cPages; iPage++)
1051 {
1052 papPages[iPage] = &paPages[iPage];
1053 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1054 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1055 if (PageHighMem(papPages[iPage]))
1056 BUG();
1057 }
1058#endif
1059 pMem->u.mem.cPages = cPages;
1060
1061 /*
1062 * Reserve the pages.
1063 */
1064 for (iPage = 0; iPage < cPages; iPage++)
1065 SetPageReserved(papPages[iPage]);
1066
1067 /*
1068 * Create the Ring-0 mapping.
1069 */
1070 if (ppvR0)
1071 {
1072#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1073# ifdef VM_MAP
1074 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_MAP, pgFlags);
1075# else
1076 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_ALLOC, pgFlags);
1077# endif
1078#else
1079 *ppvR0 = pMem->pvR0 = phys_to_virt(page_to_phys(papPages[0]));
1080#endif
1081 }
1082 if (pMem->pvR0 || !ppvR0)
1083 {
1084 /*
1085 * Create the ring3 mapping.
1086 */
1087 if (ppvR3)
1088 *ppvR3 = pMem->pvR3 = VBoxSupDrvMapUser(papPages, cPages, PROT_READ | PROT_WRITE | PROT_EXEC, pgFlags);
1089 if (pMem->pvR3 || !ppvR3)
1090 return 0;
1091 dprintf(("supdrvOSMemAllocOne: failed to map into r3! cPages=%u\n", cPages));
1092 }
1093 else
1094 dprintf(("supdrvOSMemAllocOne: failed to map into r0! cPages=%u\n", cPages));
1095
1096 supdrvOSMemFreeOne(pMem);
1097 return SUPDRV_ERR_NO_MEMORY;
1098}
1099
1100
1101/**
1102 * Get the physical addresses of the pages in the allocation.
1103 * This is called while inside bundle the spinlock.
1104 *
1105 * @param pMem Memory reference record of the memory.
1106 * @param paPages Where to store the page addresses.
1107 */
1108void VBOXCALL supdrvOSMemGetPages(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
1109{
1110 unsigned iPage;
1111 for (iPage = 0; iPage < pMem->u.mem.cPages; iPage++)
1112 {
1113 paPages[iPage].Phys = page_to_phys(pMem->u.mem.papPages[iPage]);
1114 paPages[iPage].uReserved = 0;
1115 }
1116}
1117
1118
1119/**
1120 * Frees memory allocated by supdrvOSMemAllocOne().
1121 *
1122 * @param pMem Memory reference record of the memory to be free.
1123 */
1124void VBOXCALL supdrvOSMemFreeOne(PSUPDRVMEMREF pMem)
1125{
1126 dprintf2(("supdrvOSMemFreeOne: pvR0=%p pvR3=%p cb=%d cPages=%d papPages=%p\n",
1127 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.mem.cPages, pMem->u.mem.papPages));
1128
1129 /*
1130 * Unmap the user mapping (if any).
1131 * do_exit() destroys the mm before closing files.
1132 */
1133 if (pMem->pvR3 && current->mm)
1134 {
1135 down_write(&current->mm->mmap_sem);
1136 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, RT_ALIGN(pMem->cb, PAGE_SIZE));
1137 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1138 }
1139 pMem->pvR3 = NIL_RTR3PTR;
1140
1141 /*
1142 * Unmap the kernel mapping (if any).
1143 */
1144 if (pMem->pvR0)
1145 {
1146#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1147 vunmap(pMem->pvR0);
1148#endif
1149 pMem->pvR0 = NULL;
1150 }
1151
1152 /*
1153 * Free the physical pages.
1154 */
1155 if (pMem->u.mem.papPages)
1156 {
1157 struct page **papPages = pMem->u.mem.papPages;
1158 const unsigned cPages = pMem->u.mem.cPages;
1159 unsigned iPage;
1160
1161 /* Restore the page flags. */
1162 for (iPage = 0; iPage < cPages; iPage++)
1163 {
1164 ClearPageReserved(papPages[iPage]);
1165#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1166 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1167 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, PAGE_KERNEL);
1168#endif
1169 }
1170
1171 /* Free the pages. */
1172#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1173 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1174 __free_page(papPages[iPage]);
1175#else
1176 if (cPages > 0)
1177 __free_pages(papPages[0], VBoxSupDrvOrder(cPages));
1178#endif
1179 /* Free the page pointer array. */
1180 kfree(papPages);
1181 pMem->u.mem.papPages = NULL;
1182 }
1183 pMem->u.mem.cPages = 0;
1184}
1185
1186
1187/**
1188 * Maps a range of pages into user space.
1189 *
1190 * @returns Pointer to the user space mapping on success.
1191 * @returns NULL on failure.
1192 * @param papPages Array of the pages to map.
1193 * @param cPages Number of pages to map.
1194 * @param fProt The mapping protection.
1195 * @param pgFlags The page level protection.
1196 */
1197static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags)
1198{
1199 int rc = SUPDRV_ERR_NO_MEMORY;
1200 unsigned long ulAddr;
1201
1202 /*
1203 * Allocate user space mapping.
1204 */
1205 down_write(&current->mm->mmap_sem);
1206 ulAddr = do_mmap(NULL, 0, cPages * PAGE_SIZE, fProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1207 if (!(ulAddr & ~PAGE_MASK))
1208 {
1209 /*
1210 * Map page by page into the mmap area.
1211 * This is generic, paranoid and not very efficient.
1212 */
1213 int rc = 0;
1214 unsigned long ulAddrCur = ulAddr;
1215 unsigned iPage;
1216 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1217 {
1218#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1219 struct vm_area_struct *vma = find_vma(current->mm, ulAddrCur);
1220 if (!vma)
1221 break;
1222#endif
1223
1224#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1225 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(papPages[iPage]), PAGE_SIZE, pgFlags);
1226#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1227 rc = remap_page_range(vma, ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1228#else /* 2.4 */
1229 rc = remap_page_range(ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1230#endif
1231 if (rc)
1232 break;
1233 }
1234
1235 /*
1236 * Successful?
1237 */
1238 if (iPage >= cPages)
1239 {
1240 up_write(&current->mm->mmap_sem);
1241 return ulAddr;
1242 }
1243
1244 /* no, cleanup! */
1245 if (rc)
1246 dprintf(("VBoxSupDrvMapUser: remap_[page|pfn]_range failed! rc=%d\n", rc));
1247 else
1248 dprintf(("VBoxSupDrvMapUser: find_vma failed!\n"));
1249
1250 MY_DO_MUNMAP(current->mm, ulAddr, cPages * PAGE_SIZE);
1251 }
1252 else
1253 {
1254 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1255 rc = SUPDRV_ERR_NO_MEMORY;
1256 }
1257 up_write(&current->mm->mmap_sem);
1258
1259 return NIL_RTR3PTR;
1260}
1261
1262
1263/**
1264 * Initializes the GIP.
1265 *
1266 * @returns negative errno.
1267 * @param pDevExt Instance data. GIP stuff may be updated.
1268 */
1269static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt)
1270{
1271 struct page *pPage;
1272 dma_addr_t HCPhys;
1273 PSUPGLOBALINFOPAGE pGip;
1274#ifdef CONFIG_SMP
1275 unsigned i;
1276#endif
1277 dprintf(("VBoxSupDrvInitGip:\n"));
1278
1279 /*
1280 * Allocate the page.
1281 */
1282 pPage = alloc_pages(GFP_USER, 0);
1283 if (!pPage)
1284 {
1285 dprintf(("VBoxSupDrvInitGip: failed to allocate the GIP page\n"));
1286 return -ENOMEM;
1287 }
1288
1289 /*
1290 * Lock the page.
1291 */
1292 SetPageReserved(pPage);
1293 g_pGipPage = pPage;
1294
1295 /*
1296 * Call common initialization routine.
1297 */
1298 HCPhys = page_to_phys(pPage);
1299 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1300 pDevExt->ulLastJiffies = jiffies;
1301#ifdef TICK_NSEC
1302 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1303 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1304 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1305#else
1306 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * (1000000 / HZ);
1307 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%d HZ=%d jiffies=%ld now=%lld\n",
1308 (int)(1000000 / HZ), HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1309#endif
1310 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1311 HZ <= 1000 ? HZ : 1000);
1312
1313 /*
1314 * Initialize the timer.
1315 */
1316 init_timer(&g_GipTimer);
1317 g_GipTimer.data = (unsigned long)pDevExt;
1318 g_GipTimer.function = VBoxSupGipTimer;
1319 g_GipTimer.expires = jiffies;
1320#ifdef CONFIG_SMP
1321 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1322 {
1323 init_timer(&pDevExt->aCPUs[i].Timer);
1324 pDevExt->aCPUs[i].Timer.data = (unsigned long)pDevExt;
1325 pDevExt->aCPUs[i].Timer.function = VBoxSupGipTimerPerCpu;
1326 pDevExt->aCPUs[i].Timer.expires = jiffies;
1327 }
1328#endif
1329
1330 return 0;
1331}
1332
1333
1334/**
1335 * Terminates the GIP.
1336 *
1337 * @returns negative errno.
1338 * @param pDevExt Instance data. GIP stuff may be updated.
1339 */
1340static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt)
1341{
1342 struct page *pPage;
1343 PSUPGLOBALINFOPAGE pGip;
1344#ifdef CONFIG_SMP
1345 unsigned i;
1346#endif
1347 dprintf(("VBoxSupDrvTermGip:\n"));
1348
1349 /*
1350 * Delete the timer if it's pending.
1351 */
1352 if (timer_pending(&g_GipTimer))
1353 del_timer_sync(&g_GipTimer);
1354#ifdef CONFIG_SMP
1355 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1356 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1357 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1358#endif
1359
1360 /*
1361 * Uninitialize the content.
1362 */
1363 pGip = pDevExt->pGip;
1364 pDevExt->pGip = NULL;
1365 if (pGip)
1366 supdrvGipTerm(pGip);
1367
1368 /*
1369 * Free the page.
1370 */
1371 pPage = g_pGipPage;
1372 g_pGipPage = NULL;
1373 if (pPage)
1374 {
1375 ClearPageReserved(pPage);
1376 __free_pages(pPage, 0);
1377 }
1378
1379 return 0;
1380}
1381
1382/**
1383 * Timer callback function.
1384 *
1385 * In ASYNC TSC mode this is called on the primary CPU, and we're
1386 * assuming that the CPU remains online.
1387 *
1388 * @param ulUser The device extension pointer.
1389 */
1390static void VBoxSupGipTimer(unsigned long ulUser)
1391{
1392 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)ulUser;
1393 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1394 unsigned long ulNow = jiffies;
1395 unsigned long ulDiff = ulNow - pDevExt->ulLastJiffies;
1396 uint64_t u64Monotime;
1397 pDevExt->ulLastJiffies = ulNow;
1398#ifdef TICK_NSEC
1399 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1400#else
1401 u64Monotime = pDevExt->u64LastMonotime + ulDiff * (1000000 / HZ);
1402#endif
1403 ASMAtomicXchgU64(&pDevExt->u64LastMonotime, u64Monotime);
1404 if (RT_LIKELY(pGip))
1405 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1406 mod_timer(&g_GipTimer, jiffies + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1407}
1408
1409
1410#ifdef CONFIG_SMP
1411/**
1412 * Timer callback function for the other CPUs.
1413 *
1414 * @param ulUser The device extension pointer.
1415 */
1416static void VBoxSupGipTimerPerCpu(unsigned long ulUser)
1417{
1418 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)ulUser;
1419 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1420 unsigned long ulNow = jiffies;
1421 unsigned long ulDiff = ulNow - pDevExt->ulLastJiffies;
1422 uint64_t u64Monotime;
1423 uint8_t iCPU = ASMGetApicId();
1424
1425 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pGip->aCPUs)))
1426 {
1427 printk("vboxdrv: error: apicid=%d max=%d cpuid=%d\n",
1428 iCPU, RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1429 return;
1430 }
1431
1432 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1433#ifdef TICK_NSEC
1434 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1435#else
1436 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1437#endif
1438 ASMAtomicXchgU64(&pDevExt->aCPUs[iCPU].u64LastMonotime, u64Monotime);
1439 if (RT_LIKELY(pGip))
1440 supdrvGipUpdatePerCpu(pGip, pDevExt->aCPUs[iCPU].u64LastMonotime, iCPU);
1441 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1442}
1443#endif /* CONFIG_SMP */
1444
1445
1446/**
1447 * Maps the GIP into user space.
1448 *
1449 * @returns negative errno.
1450 * @param pDevExt Instance data.
1451 */
1452int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE *ppGip)
1453{
1454 int rc = 0;
1455 unsigned long ulAddr;
1456 unsigned long HCPhys = pDevExt->HCPhysGip;
1457 pgprot_t pgFlags;
1458 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1459 dprintf2(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1460
1461 /*
1462 * Allocate user space mapping and put the physical pages into it.
1463 */
1464 down_write(&current->mm->mmap_sem);
1465 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1466 if (!(ulAddr & ~PAGE_MASK))
1467 {
1468#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1469 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1470#else
1471 int rc2 = 0;
1472 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1473 if (vma)
1474#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1475 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1476#else
1477 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1478#endif
1479 else
1480 {
1481 rc = SUPDRV_ERR_NO_MEMORY;
1482 dprintf(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1483 }
1484#endif
1485 if (rc2)
1486 {
1487 rc = SUPDRV_ERR_NO_MEMORY;
1488 dprintf(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1489 }
1490 }
1491 else
1492 {
1493 dprintf(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1494 rc = SUPDRV_ERR_NO_MEMORY;
1495 }
1496 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1497
1498 /*
1499 * Success?
1500 */
1501 if (!rc)
1502 {
1503 *ppGip = (PCSUPGLOBALINFOPAGE)ulAddr;
1504 dprintf2(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1505 return 0;
1506 }
1507
1508 /*
1509 * Failure, cleanup and be gone.
1510 */
1511 if (ulAddr & ~PAGE_MASK)
1512 {
1513 down_write(&current->mm->mmap_sem);
1514 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1515 up_write(&current->mm->mmap_sem);
1516 }
1517
1518 dprintf2(("supdrvOSGipMap: returns %d\n", rc));
1519 return rc;
1520}
1521
1522
1523/**
1524 * Maps the GIP into user space.
1525 *
1526 * @returns negative errno.
1527 * @param pDevExt Instance data.
1528 */
1529int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE pGip)
1530{
1531 dprintf2(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1532 if (current->mm)
1533 {
1534 down_write(&current->mm->mmap_sem);
1535 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1536 up_write(&current->mm->mmap_sem);
1537 }
1538 dprintf2(("supdrvOSGipUnmap: returns 0\n"));
1539 return 0;
1540}
1541
1542
1543/**
1544 * Resumes the GIP updating.
1545 *
1546 * @param pDevExt Instance data.
1547 */
1548void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1549{
1550 dprintf2(("supdrvOSGipResume:\n"));
1551#ifdef CONFIG_SMP
1552 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1553#endif
1554 mod_timer(&g_GipTimer, jiffies);
1555#ifdef CONFIG_SMP
1556 else
1557 {
1558 mod_timer(&g_GipTimer, jiffies);
1559 smp_call_function(VBoxSupGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1560 }
1561#endif
1562}
1563
1564
1565#ifdef CONFIG_SMP
1566/**
1567 * Callback for resuming GIP updating on the other CPUs.
1568 *
1569 * This is only used when the GIP is in async tsc mode.
1570 *
1571 * @param pvUser Pointer to the device instance.
1572 */
1573static void VBoxSupGipResumePerCpu(void *pvUser)
1574{
1575 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1576 uint8_t iCPU = ASMGetApicId();
1577
1578
1579 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1580 {
1581 printk("vboxdrv: error: apicid=%d max=%d cpuid=%d\n",
1582 iCPU, RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1583 return;
1584 }
1585
1586 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies);
1587}
1588#endif /* CONFIG_SMP */
1589
1590
1591/**
1592 * Suspends the GIP updating.
1593 *
1594 * @param pDevExt Instance data.
1595 */
1596void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1597{
1598#ifdef CONFIG_SMP
1599 unsigned i;
1600#endif
1601 dprintf2(("supdrvOSGipSuspend:\n"));
1602
1603 if (timer_pending(&g_GipTimer))
1604 del_timer_sync(&g_GipTimer);
1605#ifdef CONFIG_SMP
1606 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1607 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1608 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1609#endif
1610}
1611
1612
1613/**
1614 * Get the current CPU count.
1615 * @returns Number of cpus.
1616 */
1617unsigned VBOXCALL supdrvOSGetCPUCount(void)
1618{
1619#ifdef CONFIG_SMP
1620# ifdef num_present_cpus
1621 return num_present_cpus();
1622# else
1623 return smp_num_cpus;
1624# endif
1625#else
1626 return 1;
1627#endif
1628}
1629
1630
1631/**
1632 * Converts a supdrv error code to an linux error code.
1633 *
1634 * @returns corresponding linux error code.
1635 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1636 */
1637static int VBoxSupDrvErr2LinuxErr(int rc)
1638{
1639 switch (rc)
1640 {
1641 case 0: return 0;
1642 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1643 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1644 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1645 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1646 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1647 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1648 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1649 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1650 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1651 }
1652
1653 return -EPERM;
1654}
1655
1656
1657RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1658{
1659#if 1
1660 va_list args;
1661 char szMsg[512];
1662
1663 va_start(args, pszFormat);
1664 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1665 szMsg[sizeof(szMsg) - 1] = '\0';
1666 printk("%s", szMsg);
1667 va_end(args);
1668#else
1669 /* forward to printf - needs some more GCC hacking to fix ebp... */
1670 __asm__ __volatile__ ("mov %0, %esp\n\t"
1671 "jmp %1\n\t",
1672 :: "r" ((uintptr_t)&pszFormat - 4),
1673 "m" (printk));
1674#endif
1675 return 0;
1676}
1677
1678
1679/** Runtime assert implementation for Linux Ring-0. */
1680RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1681{
1682 printk("!!Assertion Failed!!\n"
1683 "Expression: %s\n"
1684 "Location : %s(%d) %s\n",
1685 pszExpr, pszFile, uLine, pszFunction);
1686}
1687
1688
1689/** Runtime assert implementation for Linux Ring-0. */
1690RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1691{ /* forwarder. */
1692 va_list ap;
1693 char msg[256];
1694
1695 va_start(ap, pszFormat);
1696 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1697 msg[sizeof(msg) - 1] = '\0';
1698 printk("%s", msg);
1699 va_end(ap);
1700}
1701
1702
1703/* GCC C++ hack. */
1704unsigned __gxx_personality_v0 = 0xcccccccc;
1705
1706
1707module_init(VBoxSupDrvInit);
1708module_exit(VBoxSupDrvUnload);
1709
1710MODULE_AUTHOR("InnoTek Systemberatung GmbH");
1711MODULE_DESCRIPTION("VirtualBox Support Driver");
1712MODULE_LICENSE("GPL");
1713#ifdef MODULE_VERSION
1714#define xstr(s) str(s)
1715#define str(s) #s
1716MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1717#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette