VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 2373

Last change on this file since 2373 was 2373, checked in by vboxsync, 18 years ago

When in async mode we must update the aCPUs data everywhere since we don't know which CPU the operation will be resumed on.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 51.9 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006 InnoTek Systemberatung GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * If you received this file as part of a commercial VirtualBox
17 * distribution, then only the terms of your commercial VirtualBox
18 * license agreement apply instead of the previous paragraph.
19 */
20
21/*******************************************************************************
22* Header Files *
23*******************************************************************************/
24#include "SUPDRV.h"
25#include "version-generated.h"
26
27#include <iprt/assert.h>
28#include <iprt/spinlock.h>
29#include <iprt/semaphore.h>
30#include <iprt/initterm.h>
31#include <iprt/process.h>
32#include <iprt/err.h>
33#include <iprt/mem.h>
34
35#include <linux/module.h>
36#include <linux/kernel.h>
37#include <linux/init.h>
38#include <linux/fs.h>
39#include <linux/mm.h>
40#include <linux/pagemap.h>
41#include <linux/slab.h>
42#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
43# include <linux/jiffies.h>
44#endif
45#include <asm/mman.h>
46#include <asm/io.h>
47#include <asm/uaccess.h>
48#ifdef CONFIG_DEVFS_FS
49# include <linux/devfs_fs_kernel.h>
50#endif
51#ifdef CONFIG_VBOXDRV_AS_MISC
52# include <linux/miscdevice.h>
53#endif
54#ifdef CONFIG_X86_LOCAL_APIC
55# include <asm/apic.h>
56# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
57# include <asm/nmi.h>
58# endif
59#endif
60
61#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
62# ifndef page_to_pfn
63# define page_to_pfn(page) ((page) - mem_map)
64# endif
65# include <asm/pgtable.h>
66# define global_flush_tlb __flush_tlb_global
67#endif
68
69/* devfs defines */
70#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
71# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
72
73# define VBOX_REGISTER_DEVFS() \
74({ \
75 void *rc = NULL; \
76 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
77 S_IFCHR | S_IRUGO | S_IWUGO, \
78 DEVICE_NAME) == 0) \
79 rc = (void *)' '; /* return not NULL */ \
80 rc; \
81 })
82
83# define VBOX_UNREGISTER_DEVFS(handle) \
84 devfs_remove(DEVICE_NAME);
85
86# else /* < 2.6.0 */
87
88# define VBOX_REGISTER_DEVFS() \
89 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
90 DEVICE_MAJOR, 0, \
91 S_IFCHR | S_IRUGO | S_IWUGO, \
92 &gFileOpsVBoxDrv, NULL)
93
94# define VBOX_UNREGISTER_DEVFS(handle) \
95 if (handle != NULL) \
96 devfs_unregister(handle)
97
98# endif /* < 2.6.0 */
99#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
100
101#ifndef CONFIG_VBOXDRV_AS_MISC
102# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
103# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
104# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
105# else
106# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
107# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
108# endif
109#endif /* !CONFIG_VBOXDRV_AS_MISC */
110
111
112#ifdef CONFIG_X86_HIGH_ENTRY
113# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
114#endif
115
116/*
117 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
118 */
119#if defined(__AMD64__)
120# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
121#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
122# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
123#else
124# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
125#endif
126
127/*
128 * The redhat hack section.
129 * - The current hacks are for 2.4.21-15.EL only.
130 */
131#ifndef NO_REDHAT_HACKS
132/* accounting. */
133# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
134# ifdef VM_ACCOUNT
135# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
136# endif
137# endif
138
139/* backported remap_page_range. */
140# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
141# include <asm/tlb.h>
142# ifdef tlb_vma /* probably not good enough... */
143# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
144# endif
145# endif
146
147# ifndef __AMD64__
148/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
149 * the page attributes from PAGE_KERNEL to something else, because there appears
150 * to be a bug in one of the many patches that redhat applied.
151 * It should be safe to do this on less buggy linux kernels too. ;-)
152 */
153# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
154 do { \
155 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
156 change_page_attr(pPages, cPages, prot); \
157 change_page_attr(pPages, cPages, prot); \
158 } while (0)
159# endif
160#endif /* !NO_REDHAT_HACKS */
161
162
163#ifndef MY_DO_MUNMAP
164# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
165#endif
166
167#ifndef MY_CHANGE_PAGE_ATTR
168# ifdef __AMD64__ /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr(). */
169# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
170 do { \
171 change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
172 change_page_attr(pPages, cPages, prot); \
173 } while (0)
174# else
175# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
176# endif
177#endif
178
179
180/** @def ONE_MSEC_IN_JIFFIES
181 * The number of jiffies that make up 1 millisecond. This is only actually used
182 * when HZ is > 1000. */
183#if HZ <= 1000
184# define ONE_MSEC_IN_JIFFIES 0
185#elif !(HZ % 1000)
186# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
187#else
188# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
189# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
190#endif
191
192
193/*******************************************************************************
194* Defined Constants And Macros *
195*******************************************************************************/
196/**
197 * Device extention & session data association structure.
198 */
199static SUPDRVDEVEXT g_DevExt;
200
201/** Timer structure for the GIP update. */
202static struct timer_list g_GipTimer;
203/** Pointer to the page structure for the GIP. */
204struct page *g_pGipPage;
205
206/** Registered devfs device handle. */
207#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
208# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
209static void *g_hDevFsVBoxDrv = NULL;
210# else
211static devfs_handle_t g_hDevFsVBoxDrv = NULL;
212# endif
213#endif
214
215#ifndef CONFIG_VBOXDRV_AS_MISC
216/** Module major number */
217#define DEVICE_MAJOR 234
218/** Saved major device number */
219static int g_iModuleMajor;
220#endif /* !CONFIG_VBOXDRV_AS_MISC */
221
222/** The module name. */
223#define DEVICE_NAME "vboxdrv"
224
225#ifdef __AMD64__
226/**
227 * Memory for the executable memory heap (in IPRT).
228 */
229extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
230__asm__(".section execmemory, \"awx\", @progbits\n\t"
231 ".align 32\n\t"
232 ".globl g_abExecMemory\n"
233 "g_abExecMemory:\n\t"
234 ".zero 1572864\n\t"
235 ".type g_abExecMemory, @object\n\t"
236 ".size g_abExecMemory, 1572864\n\t"
237 ".text\n\t");
238#endif
239
240
241/*******************************************************************************
242* Internal Functions *
243*******************************************************************************/
244static int VBoxSupDrvInit(void);
245static void VBoxSupDrvUnload(void);
246static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp);
247static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp);
248static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
249 unsigned int IOCmd, unsigned long IOArg);
250static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags);
251static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt);
252static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt);
253static void VBoxSupGipTimer(unsigned long ulUser);
254#ifdef CONFIG_SMP
255static void VBoxSupGipTimerPerCpu(unsigned long ulUser);
256static void VBoxSupGipResumePerCpu(void *pvUser);
257#endif
258static int VBoxSupDrvOrder(unsigned long size);
259static int VBoxSupDrvErr2LinuxErr(int);
260
261
262/** The file_operations structure. */
263static struct file_operations gFileOpsVBoxDrv =
264{
265 owner: THIS_MODULE,
266 open: VBoxSupDrvCreate,
267 release: VBoxSupDrvClose,
268 ioctl: VBoxSupDrvDeviceControl,
269};
270
271#ifdef CONFIG_VBOXDRV_AS_MISC
272/** The miscdevice structure. */
273static struct miscdevice gMiscDevice =
274{
275 minor: MISC_DYNAMIC_MINOR,
276 name: DEVICE_NAME,
277 fops: &gFileOpsVBoxDrv,
278# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
279 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
280 devfs_name: DEVICE_NAME,
281# endif
282};
283#endif
284
285
286/**
287 * Initialize module.
288 *
289 * @returns appropriate status code.
290 */
291static int __init VBoxSupDrvInit(void)
292{
293 int rc;
294
295 dprintf(("VBoxDrv::ModuleInit\n"));
296
297#ifdef CONFIG_X86_LOCAL_APIC
298 /*
299 * If an NMI occurs while we are inside the world switcher the macine will crash.
300 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
301 * compared with another counter increased in the timer interrupt handler. Therefore
302 * we don't allow to setup an NMI watchdog.
303 */
304# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
305 /*
306 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
307 * the nmi_watchdog variable.
308 */
309# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
310 /*
311 * Permanent IO_APIC mode active? No way to handle this!
312 */
313 if (nmi_watchdog == NMI_IO_APIC)
314 {
315 printk(KERN_ERR DEVICE_NAME
316 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
317 DEVICE_NAME
318 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
319 DEVICE_NAME
320 ": command line.\n");
321 return -EINVAL;
322 }
323
324 /*
325 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
326 */
327 atomic_set(&nmi_active, -1);
328 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate NMI watchdog permanently...\n");
329
330 /*
331 * Now fall through and see if it actually was enabled before. If so, fail
332 * as we cannot deactivate it cleanly from here.
333 */
334# else /* < 2.6.19 */
335 /*
336 * Older 2.6 kernels: nmi_watchdog is not initalized by default
337 */
338 if (nmi_watchdog != NMI_NONE)
339 goto nmi_activated;
340# endif
341# endif /* >= 2.6.0 */
342
343 /*
344 * Second test: Interrupt generated by performance counter not masked and can
345 * generate an NMI. Works also with Linux 2.4.
346 */
347 {
348 unsigned int v, ver, maxlvt;
349
350 v = apic_read(APIC_LVR);
351 ver = GET_APIC_VERSION(v);
352 /* 82489DXs do not report # of LVT entries. */
353 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
354 if (maxlvt >= 4)
355 {
356 /* Read status of performance counter IRQ vector */
357 v = apic_read(APIC_LVTPC);
358
359 /* performance counter generates NMI and is not masked? */
360 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
361 {
362# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
363 printk(KERN_ERR DEVICE_NAME
364 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
365 DEVICE_NAME
366 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
367 return -EINVAL;
368# else /* < 2.6.19 */
369# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
370nmi_activated:
371# endif
372 printk(KERN_ERR DEVICE_NAME
373 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
374 DEVICE_NAME
375 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
376 return -EINVAL;
377# endif /* >= 2.6.19 */
378 }
379 }
380 }
381# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
382 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
383# endif /* >= 2.6.19 */
384#endif /* CONFIG_X86_LOCAL_APIC */
385
386#ifdef CONFIG_VBOXDRV_AS_MISC
387 rc = misc_register(&gMiscDevice);
388 if (rc)
389 {
390 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
391 return rc;
392 }
393#else /* !CONFIG_VBOXDRV_AS_MISC */
394 /*
395 * Register character device.
396 */
397 g_iModuleMajor = DEVICE_MAJOR;
398 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
399 if (rc < 0)
400 {
401 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
402 return rc;
403 }
404
405 /*
406 * Save returned module major number
407 */
408 if (DEVICE_MAJOR != 0)
409 g_iModuleMajor = DEVICE_MAJOR;
410 else
411 g_iModuleMajor = rc;
412 rc = 0;
413
414#ifdef CONFIG_DEVFS_FS
415 /*
416 * Register a device entry
417 */
418 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
419 if (g_hDevFsVBoxDrv == NULL)
420 {
421 dprintf(("devfs_register failed!\n"));
422 rc = -EINVAL;
423 }
424#endif
425#endif /* !CONFIG_VBOXDRV_AS_MISC */
426 if (!rc)
427 {
428 /*
429 * Initialize the runtime.
430 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
431 */
432 rc = RTR0Init(0);
433 if (RT_SUCCESS(rc))
434 {
435#ifdef __AMD64__
436 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
437#endif
438 /*
439 * Initialize the device extension.
440 */
441 if (RT_SUCCESS(rc))
442 rc = supdrvInitDevExt(&g_DevExt);
443 if (!rc)
444 {
445 /*
446 * Create the GIP page.
447 */
448 rc = VBoxSupDrvInitGip(&g_DevExt);
449 if (!rc)
450 {
451 dprintf(("VBoxDrv::ModuleInit returning %#x\n", rc));
452 return rc;
453 }
454
455 supdrvDeleteDevExt(&g_DevExt);
456 }
457 else
458 rc = -EINVAL;
459 RTR0Term();
460 }
461 else
462 rc = -EINVAL;
463
464 /*
465 * Failed, cleanup and return the error code.
466 */
467#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
468 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
469#endif
470 }
471#ifdef CONFIG_VBOXDRV_AS_MISC
472 misc_deregister(&gMiscDevice);
473 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
474#else
475 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
476 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
477#endif
478 return rc;
479}
480
481
482/**
483 * Unload the module.
484 */
485static void __exit VBoxSupDrvUnload(void)
486{
487 int rc;
488 dprintf(("VBoxSupDrvUnload\n"));
489
490 /*
491 * I Don't think it's possible to unload a driver which processes have
492 * opened, at least we'll blindly assume that here.
493 */
494#ifdef CONFIG_VBOXDRV_AS_MISC
495 rc = misc_deregister(&gMiscDevice);
496 if (rc < 0)
497 {
498 dprintf(("misc_deregister failed with rc=%#x\n", rc));
499 }
500#else /* !CONFIG_VBOXDRV_AS_MISC */
501#ifdef CONFIG_DEVFS_FS
502 /*
503 * Unregister a device entry
504 */
505 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
506#endif // devfs
507 rc = VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
508 if (rc < 0)
509 {
510 dprintf(("unregister_chrdev failed with rc=%#x (major:%d)\n", rc, g_iModuleMajor));
511 }
512#endif /* !CONFIG_VBOXDRV_AS_MISC */
513
514 /*
515 * Destroy GIP, delete the device extension and terminate IPRT.
516 */
517 VBoxSupDrvTermGip(&g_DevExt);
518 supdrvDeleteDevExt(&g_DevExt);
519 RTR0Term();
520}
521
522
523/**
524 * Device open. Called on open /dev/vboxdrv
525 *
526 * @param pInode Pointer to inode info structure.
527 * @param pFilp Associated file pointer.
528 */
529static int VBoxSupDrvCreate(struct inode *pInode, struct file *pFilp)
530{
531 int rc;
532 PSUPDRVSESSION pSession;
533 dprintf(("VBoxSupDrvCreate: pFilp=%p\n", pFilp));
534
535 /*
536 * Call common code for the rest.
537 */
538 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
539 if (!rc)
540 {
541 pSession->Uid = current->euid;
542 pSession->Gid = current->egid;
543 pSession->Process = RTProcSelf();
544 pSession->R0Process = RTR0ProcHandleSelf();
545 }
546
547 dprintf(("VBoxSupDrvCreate: g_DevExt=%p pSession=%p rc=%d\n", &g_DevExt, pSession, rc));
548 pFilp->private_data = pSession;
549
550 return VBoxSupDrvErr2LinuxErr(rc);
551}
552
553
554/**
555 * Close device.
556 *
557 * @param pInode Pointer to inode info structure.
558 * @param pFilp Associated file pointer.
559 */
560static int VBoxSupDrvClose(struct inode *pInode, struct file *pFilp)
561{
562 dprintf(("VBoxSupDrvClose: pFilp=%p private_data=%p\n", pFilp, pFilp->private_data));
563 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
564 pFilp->private_data = NULL;
565 return 0;
566}
567
568
569/**
570 * Device I/O Control entry point.
571 *
572 * @param pInode Pointer to inode info structure.
573 * @param pFilp Associated file pointer.
574 * @param IOCmd The function specified to ioctl().
575 * @param IOArg The argument specified to ioctl().
576 */
577static int VBoxSupDrvDeviceControl(struct inode *pInode, struct file *pFilp,
578 unsigned int IOCmd, unsigned long IOArg)
579{
580 int rc;
581 SUPDRVIOCTLDATA Args;
582 void *pvBuf = NULL;
583 int cbBuf = 0;
584 unsigned cbOut = 0;
585
586 dprintf2(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p\n", pFilp, IOCmd, (void *)IOArg));
587
588 /*
589 * Copy ioctl data structure from user space.
590 */
591 if (_IOC_SIZE(IOCmd) != sizeof(SUPDRVIOCTLDATA))
592 {
593 dprintf(("VBoxSupDrvDeviceControl: incorrect input length! cbArgs=%d\n", _IOC_SIZE(IOCmd)));
594 return -EINVAL;
595 }
596 if (copy_from_user(&Args, (void *)IOArg, _IOC_SIZE(IOCmd)))
597 {
598 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(&Args) failed.\n"));
599 return -EFAULT;
600 }
601
602 /*
603 * Allocate and copy user space input data buffer to kernel space.
604 */
605 if (Args.cbIn > 0 || Args.cbOut > 0)
606 {
607 cbBuf = max(Args.cbIn, Args.cbOut);
608 pvBuf = vmalloc(cbBuf);
609 if (pvBuf == NULL)
610 {
611 dprintf(("VBoxSupDrvDeviceControl: failed to allocate buffer of %d bytes.\n", cbBuf));
612 return -ENOMEM;
613 }
614
615 if (copy_from_user(pvBuf, (void *)Args.pvIn, Args.cbIn))
616 {
617 dprintf(("VBoxSupDrvDeviceControl: copy_from_user(pvBuf) failed.\n"));
618 vfree(pvBuf);
619 return -EFAULT;
620 }
621 }
622
623 /*
624 * Process the IOCtl.
625 */
626 rc = supdrvIOCtl(IOCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data,
627 pvBuf, Args.cbIn, pvBuf, Args.cbOut, &cbOut);
628
629 /*
630 * Copy ioctl data and output buffer back to user space.
631 */
632 if (rc)
633 {
634 dprintf(("VBoxSupDrvDeviceControl: pFilp=%p IOCmd=%x IOArg=%p failed, rc=%d (linux rc=%d)\n",
635 pFilp, IOCmd, (void *)IOArg, rc, VBoxSupDrvErr2LinuxErr(rc)));
636 rc = VBoxSupDrvErr2LinuxErr(rc);
637 }
638 else if (cbOut > 0)
639 {
640 if (pvBuf != NULL && cbOut <= cbBuf)
641 {
642 if (copy_to_user((void *)Args.pvOut, pvBuf, cbOut))
643 {
644 dprintf(("copy_to_user failed.\n"));
645 rc = -EFAULT;
646 }
647 }
648 else
649 {
650 dprintf(("WHAT!?! supdrvIOCtl messed up! cbOut=%d cbBuf=%d pvBuf=%p\n", cbOut, cbBuf, pvBuf));
651 rc = -EPERM;
652 }
653 }
654
655 if (pvBuf)
656 vfree(pvBuf);
657
658 dprintf2(("VBoxSupDrvDeviceControl: returns %d\n", rc));
659 return rc;
660}
661
662
663/**
664 * Initializes any OS specific object creator fields.
665 */
666void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
667{
668 NOREF(pObj);
669 NOREF(pSession);
670}
671
672
673/**
674 * Checks if the session can access the object.
675 *
676 * @returns true if a decision has been made.
677 * @returns false if the default access policy should be applied.
678 *
679 * @param pObj The object in question.
680 * @param pSession The session wanting to access the object.
681 * @param pszObjName The object name, can be NULL.
682 * @param prc Where to store the result when returning true.
683 */
684bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
685{
686 NOREF(pObj);
687 NOREF(pSession);
688 NOREF(pszObjName);
689 NOREF(prc);
690 return false;
691}
692
693
694/**
695 * Compute order. Some functions allocate 2^order pages.
696 *
697 * @returns order.
698 * @param cPages Number of pages.
699 */
700static int VBoxSupDrvOrder(unsigned long cPages)
701{
702 int iOrder;
703 unsigned long cTmp;
704
705 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
706 ;
707 if (cPages & ~(1 << iOrder))
708 ++iOrder;
709
710 return iOrder;
711}
712
713
714/**
715 * OS Specific code for locking down memory.
716 *
717 * @returns 0 on success.
718 * @returns SUPDRV_ERR_* on failure.
719 * @param pMem Pointer to memory.
720 * This is not linked in anywhere.
721 * @param paPages Array which should be filled with the address of the physical pages.
722 *
723 * @remark See sgl_map_user_pages() for an example of an similar function.
724 */
725int VBOXCALL supdrvOSLockMemOne(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
726{
727 int rc;
728 struct page **papPages;
729 unsigned iPage;
730 unsigned cPages = pMem->cb >> PAGE_SHIFT;
731 unsigned long pv = (unsigned long)pMem->pvR3;
732 struct vm_area_struct **papVMAs;
733
734 /*
735 * Allocate page pointer array.
736 */
737 papPages = vmalloc(cPages * sizeof(*papPages));
738 if (!papPages)
739 return SUPDRV_ERR_NO_MEMORY;
740
741 /*
742 * Allocate the VMA pointer array.
743 */
744 papVMAs = vmalloc(cPages * sizeof(*papVMAs));
745 if (!papVMAs)
746 return SUPDRV_ERR_NO_MEMORY;
747
748 /*
749 * Get user pages.
750 */
751 down_read(&current->mm->mmap_sem);
752 rc = get_user_pages(current, /* Task for fault acounting. */
753 current->mm, /* Whose pages. */
754 (unsigned long)pv, /* Where from. */
755 cPages, /* How many pages. */
756 1, /* Write to memory. */
757 0, /* force. */
758 papPages, /* Page array. */
759 papVMAs); /* vmas */
760 if (rc != cPages)
761 {
762 up_read(&current->mm->mmap_sem);
763 dprintf(("supdrvOSLockMemOne: get_user_pages failed. rc=%d\n", rc));
764 return SUPDRV_ERR_LOCK_FAILED;
765 }
766
767 for (iPage = 0; iPage < cPages; iPage++)
768 flush_dcache_page(papPages[iPage]);
769 up_read(&current->mm->mmap_sem);
770
771 pMem->u.locked.papPages = papPages;
772 pMem->u.locked.cPages = cPages;
773
774 /*
775 * Get addresses, protect against fork()
776 */
777 for (iPage = 0; iPage < cPages; iPage++)
778 {
779 paPages[iPage].Phys = page_to_phys(papPages[iPage]);
780 paPages[iPage].uReserved = 0;
781 papVMAs[iPage]->vm_flags |= VM_DONTCOPY;
782 }
783
784 vfree(papVMAs);
785
786 dprintf2(("supdrvOSLockMemOne: pvR3=%p cb=%d papPages=%p\n",
787 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
788 return 0;
789}
790
791
792/**
793 * Unlocks the memory pointed to by pv.
794 *
795 * @param pMem Pointer to memory to unlock.
796 *
797 * @remark See sgl_unmap_user_pages() for an example of an similar function.
798 */
799void VBOXCALL supdrvOSUnlockMemOne(PSUPDRVMEMREF pMem)
800{
801 unsigned iPage;
802 dprintf2(("supdrvOSUnlockMemOne: pvR3=%p cb=%d papPages=%p\n",
803 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
804
805 /*
806 * Loop thru the pages and release them.
807 */
808 for (iPage = 0; iPage < pMem->u.locked.cPages; iPage++)
809 {
810 if (!PageReserved(pMem->u.locked.papPages[iPage]))
811 SetPageDirty(pMem->u.locked.papPages[iPage]);
812 page_cache_release(pMem->u.locked.papPages[iPage]);
813 }
814
815 /* free the page array */
816 vfree(pMem->u.locked.papPages);
817 pMem->u.locked.cPages = 0;
818}
819
820
821/**
822 * OS Specific code for allocating page aligned memory with continuous fixed
823 * physical paged backing.
824 *
825 * @returns 0 on success.
826 * @returns SUPDRV_ERR_* on failure.
827 * @param pMem Memory reference record of the memory to be allocated.
828 * (This is not linked in anywhere.)
829 * @param ppvR0 Where to store the virtual address of the ring-0 mapping. (optional)
830 * @param ppvR3 Where to store the virtual address of the ring-3 mapping.
831 * @param pHCPhys Where to store the physical address.
832 */
833int VBOXCALL supdrvOSContAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
834{
835 struct page *paPages;
836 unsigned iPage;
837 unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
838 unsigned cPages = cbAligned >> PAGE_SHIFT;
839 unsigned cOrder = VBoxSupDrvOrder(cPages);
840 unsigned long ulAddr;
841 dma_addr_t HCPhys;
842 int rc = 0;
843 pgprot_t pgFlags;
844 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
845
846 Assert(ppvR3);
847 Assert(pHCPhys);
848
849 /*
850 * Allocate page pointer array.
851 */
852#ifdef __AMD64__ /** @todo check out if there is a correct way of getting memory below 4GB (physically). */
853 paPages = alloc_pages(GFP_DMA, cOrder);
854#else
855 paPages = alloc_pages(GFP_USER, cOrder);
856#endif
857 if (!paPages)
858 return SUPDRV_ERR_NO_MEMORY;
859
860 /*
861 * Lock the pages.
862 */
863 for (iPage = 0; iPage < cPages; iPage++)
864 {
865 SetPageReserved(&paPages[iPage]);
866 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
867 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
868#ifdef DEBUG
869 if (iPage + 1 < cPages && (page_to_phys((&paPages[iPage])) + 0x1000) != page_to_phys((&paPages[iPage + 1])))
870 {
871 dprintf(("supdrvOSContAllocOne: Pages are not continuous!!!! iPage=%d phys=%llx physnext=%llx\n",
872 iPage, (long long)page_to_phys((&paPages[iPage])), (long long)page_to_phys((&paPages[iPage + 1]))));
873 BUG();
874 }
875#endif
876 }
877 HCPhys = page_to_phys(paPages);
878
879 /*
880 * Allocate user space mapping and put the physical pages into it.
881 */
882 down_write(&current->mm->mmap_sem);
883 ulAddr = do_mmap(NULL, 0, cbAligned, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0);
884 if (!(ulAddr & ~PAGE_MASK))
885 {
886#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
887 int rc2 = remap_page_range(ulAddr, HCPhys, cbAligned, pgFlags);
888#else
889 int rc2 = 0;
890 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
891 if (vma)
892#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
893 rc2 = remap_page_range(vma, ulAddr, HCPhys, cbAligned, pgFlags);
894#else
895 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, cbAligned, pgFlags);
896#endif
897 else
898 {
899 rc = SUPDRV_ERR_NO_MEMORY;
900 dprintf(("supdrvOSContAllocOne: no vma found for ulAddr=%#lx!\n", ulAddr));
901 }
902#endif
903 if (rc2)
904 {
905 rc = SUPDRV_ERR_NO_MEMORY;
906 dprintf(("supdrvOSContAllocOne: remap_page_range failed rc2=%d\n", rc2));
907 }
908 }
909 else
910 {
911 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
912 rc = SUPDRV_ERR_NO_MEMORY;
913 }
914 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
915
916 /*
917 * Success?
918 */
919 if (!rc)
920 {
921 *pHCPhys = HCPhys;
922 *ppvR3 = ulAddr;
923 if (ppvR0)
924 *ppvR0 = (void *)ulAddr;
925 pMem->pvR3 = ulAddr;
926 pMem->pvR0 = NULL;
927 pMem->u.cont.paPages = paPages;
928 pMem->u.cont.cPages = cPages;
929 pMem->cb = cbAligned;
930
931 dprintf2(("supdrvOSContAllocOne: pvR0=%p pvR3=%p cb=%d paPages=%p *pHCPhys=%lx *ppvR0=*ppvR3=%p\n",
932 pMem->pvR0, pMem->pvR3, pMem->cb, paPages, (unsigned long)*pHCPhys, *ppvR3));
933 global_flush_tlb();
934 return 0;
935 }
936
937 /*
938 * Failure, cleanup and be gone.
939 */
940 down_write(&current->mm->mmap_sem);
941 if (ulAddr & ~PAGE_MASK)
942 MY_DO_MUNMAP(current->mm, ulAddr, pMem->cb);
943 for (iPage = 0; iPage < cPages; iPage++)
944 {
945 ClearPageReserved(&paPages[iPage]);
946 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
947 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, PAGE_KERNEL);
948 }
949 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
950 __free_pages(paPages, cOrder);
951
952 global_flush_tlb();
953 return rc;
954}
955
956
957/**
958 * Frees contiguous memory.
959 *
960 * @param pMem Memory reference record of the memory to be freed.
961 */
962void VBOXCALL supdrvOSContFreeOne(PSUPDRVMEMREF pMem)
963{
964 unsigned iPage;
965
966 dprintf2(("supdrvOSContFreeOne: pvR0=%p pvR3=%p cb=%d paPages=%p\n",
967 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.cont.paPages));
968
969 /*
970 * do_exit() destroys the mm before closing files.
971 * I really hope it cleans up our stuff properly...
972 */
973 if (current->mm)
974 {
975 down_write(&current->mm->mmap_sem);
976 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, pMem->cb);
977 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
978 }
979
980 /*
981 * Change page attributes freeing the pages.
982 */
983 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
984 {
985 ClearPageReserved(&pMem->u.cont.paPages[iPage]);
986 if (!PageHighMem(&pMem->u.cont.paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
987 MY_CHANGE_PAGE_ATTR(&pMem->u.cont.paPages[iPage], 1, PAGE_KERNEL);
988 }
989 __free_pages(pMem->u.cont.paPages, VBoxSupDrvOrder(pMem->u.cont.cPages));
990
991 pMem->u.cont.cPages = 0;
992}
993
994
995/**
996 * Allocates memory which mapped into both kernel and user space.
997 * The returned memory is page aligned and so is the allocation.
998 *
999 * @returns 0 on success.
1000 * @returns SUPDRV_ERR_* on failure.
1001 * @param pMem Memory reference record of the memory to be allocated.
1002 * (This is not linked in anywhere.)
1003 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1004 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1005 */
1006int VBOXCALL supdrvOSMemAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1007{
1008 const unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1009 const unsigned cPages = cbAligned >> PAGE_SHIFT;
1010#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1011 unsigned cOrder = VBoxSupDrvOrder(cPages);
1012 struct page *paPages;
1013#endif
1014 struct page **papPages;
1015 unsigned iPage;
1016 pgprot_t pgFlags;
1017 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1018
1019 /*
1020 * Allocate array with page pointers.
1021 */
1022 pMem->u.mem.cPages = 0;
1023 pMem->u.mem.papPages = papPages = kmalloc(sizeof(papPages[0]) * cPages, GFP_KERNEL);
1024 if (!papPages)
1025 return SUPDRV_ERR_NO_MEMORY;
1026
1027 /*
1028 * Allocate the pages.
1029 */
1030#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1031 for (iPage = 0; iPage < cPages; iPage++)
1032 {
1033 papPages[iPage] = alloc_page(GFP_HIGHUSER);
1034 if (!papPages[iPage])
1035 {
1036 pMem->u.mem.cPages = iPage;
1037 supdrvOSMemFreeOne(pMem);
1038 return SUPDRV_ERR_NO_MEMORY;
1039 }
1040 }
1041
1042#else /* < 2.4.22 */
1043 paPages = alloc_pages(GFP_USER, cOrder);
1044 if (!paPages)
1045 {
1046 supdrvOSMemFreeOne(pMem);
1047 return SUPDRV_ERR_NO_MEMORY;
1048 }
1049 for (iPage = 0; iPage < cPages; iPage++)
1050 {
1051 papPages[iPage] = &paPages[iPage];
1052 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1053 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1054 if (PageHighMem(papPages[iPage]))
1055 BUG();
1056 }
1057#endif
1058 pMem->u.mem.cPages = cPages;
1059
1060 /*
1061 * Reserve the pages.
1062 */
1063 for (iPage = 0; iPage < cPages; iPage++)
1064 SetPageReserved(papPages[iPage]);
1065
1066 /*
1067 * Create the Ring-0 mapping.
1068 */
1069 if (ppvR0)
1070 {
1071#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1072# ifdef VM_MAP
1073 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_MAP, pgFlags);
1074# else
1075 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_ALLOC, pgFlags);
1076# endif
1077#else
1078 *ppvR0 = pMem->pvR0 = phys_to_virt(page_to_phys(papPages[0]));
1079#endif
1080 }
1081 if (pMem->pvR0 || !ppvR0)
1082 {
1083 /*
1084 * Create the ring3 mapping.
1085 */
1086 if (ppvR3)
1087 *ppvR3 = pMem->pvR3 = VBoxSupDrvMapUser(papPages, cPages, PROT_READ | PROT_WRITE | PROT_EXEC, pgFlags);
1088 if (pMem->pvR3 || !ppvR3)
1089 return 0;
1090 dprintf(("supdrvOSMemAllocOne: failed to map into r3! cPages=%u\n", cPages));
1091 }
1092 else
1093 dprintf(("supdrvOSMemAllocOne: failed to map into r0! cPages=%u\n", cPages));
1094
1095 supdrvOSMemFreeOne(pMem);
1096 return SUPDRV_ERR_NO_MEMORY;
1097}
1098
1099
1100/**
1101 * Get the physical addresses of the pages in the allocation.
1102 * This is called while inside bundle the spinlock.
1103 *
1104 * @param pMem Memory reference record of the memory.
1105 * @param paPages Where to store the page addresses.
1106 */
1107void VBOXCALL supdrvOSMemGetPages(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
1108{
1109 unsigned iPage;
1110 for (iPage = 0; iPage < pMem->u.mem.cPages; iPage++)
1111 {
1112 paPages[iPage].Phys = page_to_phys(pMem->u.mem.papPages[iPage]);
1113 paPages[iPage].uReserved = 0;
1114 }
1115}
1116
1117
1118/**
1119 * Frees memory allocated by supdrvOSMemAllocOne().
1120 *
1121 * @param pMem Memory reference record of the memory to be free.
1122 */
1123void VBOXCALL supdrvOSMemFreeOne(PSUPDRVMEMREF pMem)
1124{
1125 dprintf2(("supdrvOSMemFreeOne: pvR0=%p pvR3=%p cb=%d cPages=%d papPages=%p\n",
1126 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.mem.cPages, pMem->u.mem.papPages));
1127
1128 /*
1129 * Unmap the user mapping (if any).
1130 * do_exit() destroys the mm before closing files.
1131 */
1132 if (pMem->pvR3 && current->mm)
1133 {
1134 down_write(&current->mm->mmap_sem);
1135 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, RT_ALIGN(pMem->cb, PAGE_SIZE));
1136 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1137 }
1138 pMem->pvR3 = NIL_RTR3PTR;
1139
1140 /*
1141 * Unmap the kernel mapping (if any).
1142 */
1143 if (pMem->pvR0)
1144 {
1145#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1146 vunmap(pMem->pvR0);
1147#endif
1148 pMem->pvR0 = NULL;
1149 }
1150
1151 /*
1152 * Free the physical pages.
1153 */
1154 if (pMem->u.mem.papPages)
1155 {
1156 struct page **papPages = pMem->u.mem.papPages;
1157 const unsigned cPages = pMem->u.mem.cPages;
1158 unsigned iPage;
1159
1160 /* Restore the page flags. */
1161 for (iPage = 0; iPage < cPages; iPage++)
1162 {
1163 ClearPageReserved(papPages[iPage]);
1164#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1165 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1166 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, PAGE_KERNEL);
1167#endif
1168 }
1169
1170 /* Free the pages. */
1171#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1172 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1173 __free_page(papPages[iPage]);
1174#else
1175 if (cPages > 0)
1176 __free_pages(papPages[0], VBoxSupDrvOrder(cPages));
1177#endif
1178 /* Free the page pointer array. */
1179 kfree(papPages);
1180 pMem->u.mem.papPages = NULL;
1181 }
1182 pMem->u.mem.cPages = 0;
1183}
1184
1185
1186/**
1187 * Maps a range of pages into user space.
1188 *
1189 * @returns Pointer to the user space mapping on success.
1190 * @returns NULL on failure.
1191 * @param papPages Array of the pages to map.
1192 * @param cPages Number of pages to map.
1193 * @param fProt The mapping protection.
1194 * @param pgFlags The page level protection.
1195 */
1196static RTR3PTR VBoxSupDrvMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags)
1197{
1198 int rc = SUPDRV_ERR_NO_MEMORY;
1199 unsigned long ulAddr;
1200
1201 /*
1202 * Allocate user space mapping.
1203 */
1204 down_write(&current->mm->mmap_sem);
1205 ulAddr = do_mmap(NULL, 0, cPages * PAGE_SIZE, fProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1206 if (!(ulAddr & ~PAGE_MASK))
1207 {
1208 /*
1209 * Map page by page into the mmap area.
1210 * This is generic, paranoid and not very efficient.
1211 */
1212 int rc = 0;
1213 unsigned long ulAddrCur = ulAddr;
1214 unsigned iPage;
1215 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1216 {
1217#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1218 struct vm_area_struct *vma = find_vma(current->mm, ulAddrCur);
1219 if (!vma)
1220 break;
1221#endif
1222
1223#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1224 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(papPages[iPage]), PAGE_SIZE, pgFlags);
1225#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1226 rc = remap_page_range(vma, ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1227#else /* 2.4 */
1228 rc = remap_page_range(ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1229#endif
1230 if (rc)
1231 break;
1232 }
1233
1234 /*
1235 * Successful?
1236 */
1237 if (iPage >= cPages)
1238 {
1239 up_write(&current->mm->mmap_sem);
1240 return ulAddr;
1241 }
1242
1243 /* no, cleanup! */
1244 if (rc)
1245 dprintf(("VBoxSupDrvMapUser: remap_[page|pfn]_range failed! rc=%d\n", rc));
1246 else
1247 dprintf(("VBoxSupDrvMapUser: find_vma failed!\n"));
1248
1249 MY_DO_MUNMAP(current->mm, ulAddr, cPages << PAGE_SHIFT);
1250 }
1251 else
1252 {
1253 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1254 rc = SUPDRV_ERR_NO_MEMORY;
1255 }
1256 up_write(&current->mm->mmap_sem);
1257
1258 return NIL_RTR3PTR;
1259}
1260
1261
1262/**
1263 * Initializes the GIP.
1264 *
1265 * @returns negative errno.
1266 * @param pDevExt Instance data. GIP stuff may be updated.
1267 */
1268static int VBoxSupDrvInitGip(PSUPDRVDEVEXT pDevExt)
1269{
1270 struct page *pPage;
1271 dma_addr_t HCPhys;
1272 PSUPGLOBALINFOPAGE pGip;
1273#ifdef CONFIG_SMP
1274 unsigned i;
1275#endif
1276 dprintf(("VBoxSupDrvInitGip:\n"));
1277
1278 /*
1279 * Allocate the page.
1280 */
1281 pPage = alloc_pages(GFP_USER, 0);
1282 if (!pPage)
1283 {
1284 dprintf(("VBoxSupDrvInitGip: failed to allocate the GIP page\n"));
1285 return -ENOMEM;
1286 }
1287
1288 /*
1289 * Lock the page.
1290 */
1291 SetPageReserved(pPage);
1292 g_pGipPage = pPage;
1293
1294 /*
1295 * Call common initialization routine.
1296 */
1297 HCPhys = page_to_phys(pPage);
1298 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1299 pDevExt->ulLastJiffies = jiffies;
1300#ifdef TICK_NSEC
1301 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1302 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1303 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1304#else
1305 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * (1000000 / HZ);
1306 dprintf(("VBoxSupDrvInitGIP: TICK_NSEC=%d HZ=%d jiffies=%ld now=%lld\n",
1307 (int)(1000000 / HZ), HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1308#endif
1309 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1310 HZ <= 1000 ? HZ : 1000);
1311
1312 /*
1313 * Initialize the timer.
1314 */
1315 init_timer(&g_GipTimer);
1316 g_GipTimer.data = (unsigned long)pDevExt;
1317 g_GipTimer.function = VBoxSupGipTimer;
1318 g_GipTimer.expires = jiffies;
1319#ifdef CONFIG_SMP
1320 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1321 {
1322 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1323 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1324 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1325 init_timer(&pDevExt->aCPUs[i].Timer);
1326 pDevExt->aCPUs[i].Timer.data = i;
1327 pDevExt->aCPUs[i].Timer.function = VBoxSupGipTimerPerCpu;
1328 pDevExt->aCPUs[i].Timer.expires = jiffies;
1329 }
1330#endif
1331
1332 return 0;
1333}
1334
1335
1336/**
1337 * Terminates the GIP.
1338 *
1339 * @returns negative errno.
1340 * @param pDevExt Instance data. GIP stuff may be updated.
1341 */
1342static int VBoxSupDrvTermGip(PSUPDRVDEVEXT pDevExt)
1343{
1344 struct page *pPage;
1345 PSUPGLOBALINFOPAGE pGip;
1346#ifdef CONFIG_SMP
1347 unsigned i;
1348#endif
1349 dprintf(("VBoxSupDrvTermGip:\n"));
1350
1351 /*
1352 * Delete the timer if it's pending.
1353 */
1354 if (timer_pending(&g_GipTimer))
1355 del_timer_sync(&g_GipTimer);
1356#ifdef CONFIG_SMP
1357 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1358 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1359 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1360#endif
1361
1362 /*
1363 * Uninitialize the content.
1364 */
1365 pGip = pDevExt->pGip;
1366 pDevExt->pGip = NULL;
1367 if (pGip)
1368 supdrvGipTerm(pGip);
1369
1370 /*
1371 * Free the page.
1372 */
1373 pPage = g_pGipPage;
1374 g_pGipPage = NULL;
1375 if (pPage)
1376 {
1377 ClearPageReserved(pPage);
1378 __free_pages(pPage, 0);
1379 }
1380
1381 return 0;
1382}
1383
1384/**
1385 * Timer callback function.
1386 *
1387 * In ASYNC TSC mode this is called on the primary CPU, and we're
1388 * assuming that the CPU remains online.
1389 *
1390 * @param ulUser The device extension pointer.
1391 */
1392static void VBoxSupGipTimer(unsigned long ulUser)
1393{
1394 PSUPDRVDEVEXT pDevExt;
1395 PSUPGLOBALINFOPAGE pGip;
1396 unsigned long ulNow;
1397 unsigned long ulDiff;
1398 uint64_t u64Monotime;
1399 unsigned long SavedFlags;
1400
1401 local_irq_save(SavedFlags);
1402
1403 pDevExt = (PSUPDRVDEVEXT)ulUser;
1404 pGip = pDevExt->pGip;
1405 ulNow = jiffies;
1406
1407#ifdef CONFIG_SMP
1408 if (pGip && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1409 {
1410 uint8_t iCPU = ASMGetApicId();
1411 ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1412 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1413#ifdef TICK_NSEC
1414 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1415#else
1416 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1417#endif
1418 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1419 }
1420 else
1421#endif /* CONFIG_SMP */
1422 {
1423 ulDiff = ulNow - pDevExt->ulLastJiffies;
1424 pDevExt->ulLastJiffies = ulNow;
1425#ifdef TICK_NSEC
1426 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1427#else
1428 u64Monotime = pDevExt->u64LastMonotime + ulDiff * (1000000 / HZ);
1429#endif
1430 pDevExt->u64LastMonotime = u64Monotime;
1431 }
1432 if (RT_LIKELY(pGip))
1433 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1434 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1435 mod_timer(&g_GipTimer, ulNow + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1436
1437 local_irq_restore(SavedFlags);
1438}
1439
1440
1441#ifdef CONFIG_SMP
1442/**
1443 * Timer callback function for the other CPUs.
1444 *
1445 * @param iTimerCPU The APIC ID of this timer.
1446 */
1447static void VBoxSupGipTimerPerCpu(unsigned long iTimerCPU)
1448{
1449 PSUPDRVDEVEXT pDevExt;
1450 PSUPGLOBALINFOPAGE pGip;
1451 uint8_t iCPU;
1452 uint64_t u64Monotime;
1453 unsigned long SavedFlags;
1454
1455 local_irq_save(SavedFlags);
1456
1457 pDevExt = &g_DevExt;
1458 pGip = pDevExt->pGip;
1459 iCPU = ASMGetApicId();
1460
1461 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1462 {
1463 if (RT_LIKELY(iTimerCPU == iCPU))
1464 {
1465 unsigned long ulNow = jiffies;
1466 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1467 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1468#ifdef TICK_NSEC
1469 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1470#else
1471 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1472#endif
1473 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1474 if (RT_LIKELY(pGip))
1475 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1476 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1477 mod_timer(&pDevExt->aCPUs[iCPU].Timer, ulNow + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1478 }
1479 else
1480 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d !=? timer-cpuid=%d)\n",
1481 iCPU, iTimerCPU, smp_processor_id(), pDevExt->aCPUs[iTimerCPU].iSmpProcessorId);
1482 }
1483 else
1484 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%d cpuid=%d\n",
1485 iCPU, RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1486
1487 local_irq_restore(SavedFlags);
1488}
1489#endif /* CONFIG_SMP */
1490
1491
1492/**
1493 * Maps the GIP into user space.
1494 *
1495 * @returns negative errno.
1496 * @param pDevExt Instance data.
1497 */
1498int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE *ppGip)
1499{
1500 int rc = 0;
1501 unsigned long ulAddr;
1502 unsigned long HCPhys = pDevExt->HCPhysGip;
1503 pgprot_t pgFlags;
1504 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1505 dprintf2(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1506
1507 /*
1508 * Allocate user space mapping and put the physical pages into it.
1509 */
1510 down_write(&current->mm->mmap_sem);
1511 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1512 if (!(ulAddr & ~PAGE_MASK))
1513 {
1514#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1515 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1516#else
1517 int rc2 = 0;
1518 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1519 if (vma)
1520#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1521 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1522#else
1523 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1524#endif
1525 else
1526 {
1527 rc = SUPDRV_ERR_NO_MEMORY;
1528 dprintf(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1529 }
1530#endif
1531 if (rc2)
1532 {
1533 rc = SUPDRV_ERR_NO_MEMORY;
1534 dprintf(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1535 }
1536 }
1537 else
1538 {
1539 dprintf(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1540 rc = SUPDRV_ERR_NO_MEMORY;
1541 }
1542 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1543
1544 /*
1545 * Success?
1546 */
1547 if (!rc)
1548 {
1549 *ppGip = (PCSUPGLOBALINFOPAGE)ulAddr;
1550 dprintf2(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1551 return 0;
1552 }
1553
1554 /*
1555 * Failure, cleanup and be gone.
1556 */
1557 if (ulAddr & ~PAGE_MASK)
1558 {
1559 down_write(&current->mm->mmap_sem);
1560 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1561 up_write(&current->mm->mmap_sem);
1562 }
1563
1564 dprintf2(("supdrvOSGipMap: returns %d\n", rc));
1565 return rc;
1566}
1567
1568
1569/**
1570 * Maps the GIP into user space.
1571 *
1572 * @returns negative errno.
1573 * @param pDevExt Instance data.
1574 */
1575int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PCSUPGLOBALINFOPAGE pGip)
1576{
1577 dprintf2(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1578 if (current->mm)
1579 {
1580 down_write(&current->mm->mmap_sem);
1581 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1582 up_write(&current->mm->mmap_sem);
1583 }
1584 dprintf2(("supdrvOSGipUnmap: returns 0\n"));
1585 return 0;
1586}
1587
1588
1589/**
1590 * Resumes the GIP updating.
1591 *
1592 * @param pDevExt Instance data.
1593 */
1594void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1595{
1596 dprintf2(("supdrvOSGipResume:\n"));
1597 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, false);
1598#ifdef CONFIG_SMP
1599 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1600#endif
1601 mod_timer(&g_GipTimer, jiffies);
1602#ifdef CONFIG_SMP
1603 else
1604 {
1605 mod_timer(&g_GipTimer, jiffies);
1606 smp_call_function(VBoxSupGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1607 }
1608#endif
1609}
1610
1611
1612#ifdef CONFIG_SMP
1613/**
1614 * Callback for resuming GIP updating on the other CPUs.
1615 *
1616 * This is only used when the GIP is in async tsc mode.
1617 *
1618 * @param pvUser Pointer to the device instance.
1619 */
1620static void VBoxSupGipResumePerCpu(void *pvUser)
1621{
1622 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1623 uint8_t iCPU = ASMGetApicId();
1624
1625 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1626 {
1627 printk("vboxdrv: error: apicid=%d max=%d cpuid=%d\n",
1628 iCPU, RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1629 return;
1630 }
1631
1632 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1633 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies);
1634}
1635#endif /* CONFIG_SMP */
1636
1637
1638/**
1639 * Suspends the GIP updating.
1640 *
1641 * @param pDevExt Instance data.
1642 */
1643void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1644{
1645#ifdef CONFIG_SMP
1646 unsigned i;
1647#endif
1648 dprintf2(("supdrvOSGipSuspend:\n"));
1649 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, true);
1650
1651 if (timer_pending(&g_GipTimer))
1652 del_timer_sync(&g_GipTimer);
1653#ifdef CONFIG_SMP
1654 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1655 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1656 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1657#endif
1658}
1659
1660
1661/**
1662 * Get the current CPU count.
1663 * @returns Number of cpus.
1664 */
1665unsigned VBOXCALL supdrvOSGetCPUCount(void)
1666{
1667#ifdef CONFIG_SMP
1668# ifdef num_present_cpus
1669 return num_present_cpus();
1670# else
1671 return smp_num_cpus;
1672# endif
1673#else
1674 return 1;
1675#endif
1676}
1677
1678/**
1679 * Force async tsc mode.
1680 * @todo add a module argument for this.
1681 */
1682bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1683{
1684 return false;
1685}
1686
1687
1688/**
1689 * Converts a supdrv error code to an linux error code.
1690 *
1691 * @returns corresponding linux error code.
1692 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1693 */
1694static int VBoxSupDrvErr2LinuxErr(int rc)
1695{
1696 switch (rc)
1697 {
1698 case 0: return 0;
1699 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1700 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1701 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1702 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1703 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1704 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1705 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1706 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1707 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1708 case SUPDRV_ERR_IDT_FAILED: return -1000;
1709 }
1710
1711 return -EPERM;
1712}
1713
1714
1715RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1716{
1717#if 1
1718 va_list args;
1719 char szMsg[512];
1720
1721 va_start(args, pszFormat);
1722 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1723 szMsg[sizeof(szMsg) - 1] = '\0';
1724 printk("%s", szMsg);
1725 va_end(args);
1726#else
1727 /* forward to printf - needs some more GCC hacking to fix ebp... */
1728 __asm__ __volatile__ ("mov %0, %esp\n\t"
1729 "jmp %1\n\t",
1730 :: "r" ((uintptr_t)&pszFormat - 4),
1731 "m" (printk));
1732#endif
1733 return 0;
1734}
1735
1736
1737/** Runtime assert implementation for Linux Ring-0. */
1738RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1739{
1740 printk("!!Assertion Failed!!\n"
1741 "Expression: %s\n"
1742 "Location : %s(%d) %s\n",
1743 pszExpr, pszFile, uLine, pszFunction);
1744}
1745
1746
1747/** Runtime assert implementation for Linux Ring-0. */
1748RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1749{ /* forwarder. */
1750 va_list ap;
1751 char msg[256];
1752
1753 va_start(ap, pszFormat);
1754 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1755 msg[sizeof(msg) - 1] = '\0';
1756 printk("%s", msg);
1757 va_end(ap);
1758}
1759
1760
1761/* GCC C++ hack. */
1762unsigned __gxx_personality_v0 = 0xcccccccc;
1763
1764
1765module_init(VBoxSupDrvInit);
1766module_exit(VBoxSupDrvUnload);
1767
1768MODULE_AUTHOR("InnoTek Systemberatung GmbH");
1769MODULE_DESCRIPTION("VirtualBox Support Driver");
1770MODULE_LICENSE("GPL");
1771#ifdef MODULE_VERSION
1772#define xstr(s) str(s)
1773#define str(s) #s
1774MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1775#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette