VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 4800

Last change on this file since 4800 was 4800, checked in by vboxsync, 17 years ago

Redid the supdrv interface. works on windows and linux while the other OSes still needs some adjusting/testing. internal networking is temporarily broken as the SUPCallVMMR0Ex interface is being reworked (this is what all this is really about).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 57.9 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 * Some lines of code to disable the local APIC on x86_64 machines taken
16 * from a Mandriva patch by Gwenole Beauchesne <[email protected]>.
17 */
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include "SUPDRV.h"
23#include "version-generated.h"
24
25#include <iprt/assert.h>
26#include <iprt/spinlock.h>
27#include <iprt/semaphore.h>
28#include <iprt/initterm.h>
29#include <iprt/process.h>
30#include <iprt/err.h>
31#include <iprt/mem.h>
32
33#include <linux/module.h>
34#include <linux/kernel.h>
35#include <linux/init.h>
36#include <linux/fs.h>
37#include <linux/mm.h>
38#include <linux/pagemap.h>
39#include <linux/sched.h>
40#include <linux/slab.h>
41#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
42# include <linux/jiffies.h>
43#endif
44#include <asm/mman.h>
45#include <asm/io.h>
46#include <asm/uaccess.h>
47#ifdef CONFIG_DEVFS_FS
48# include <linux/devfs_fs_kernel.h>
49#endif
50#ifdef CONFIG_VBOXDRV_AS_MISC
51# include <linux/miscdevice.h>
52#endif
53#ifdef CONFIG_X86_LOCAL_APIC
54# include <asm/apic.h>
55# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
56# include <asm/nmi.h>
57# endif
58#endif
59
60#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
61# ifndef page_to_pfn
62# define page_to_pfn(page) ((page) - mem_map)
63# endif
64# include <asm/pgtable.h>
65# define global_flush_tlb __flush_tlb_global
66#endif
67
68#include <iprt/mem.h>
69
70
71/* devfs defines */
72#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
73# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
74
75# define VBOX_REGISTER_DEVFS() \
76({ \
77 void *rc = NULL; \
78 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
79 S_IFCHR | S_IRUGO | S_IWUGO, \
80 DEVICE_NAME) == 0) \
81 rc = (void *)' '; /* return not NULL */ \
82 rc; \
83 })
84
85# define VBOX_UNREGISTER_DEVFS(handle) \
86 devfs_remove(DEVICE_NAME);
87
88# else /* < 2.6.0 */
89
90# define VBOX_REGISTER_DEVFS() \
91 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
92 DEVICE_MAJOR, 0, \
93 S_IFCHR | S_IRUGO | S_IWUGO, \
94 &gFileOpsVBoxDrv, NULL)
95
96# define VBOX_UNREGISTER_DEVFS(handle) \
97 if (handle != NULL) \
98 devfs_unregister(handle)
99
100# endif /* < 2.6.0 */
101#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
102
103#ifndef CONFIG_VBOXDRV_AS_MISC
104# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
105# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
106# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
107# else
108# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
109# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
110# endif
111#endif /* !CONFIG_VBOXDRV_AS_MISC */
112
113
114#ifdef CONFIG_X86_HIGH_ENTRY
115# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
116#endif
117
118/*
119 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
120 */
121#if defined(RT_ARCH_AMD64)
122# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
123#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
124# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
125#else
126# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
127#endif
128
129/*
130 * The redhat hack section.
131 * - The current hacks are for 2.4.21-15.EL only.
132 */
133#ifndef NO_REDHAT_HACKS
134/* accounting. */
135# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
136# ifdef VM_ACCOUNT
137# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
138# endif
139# endif
140
141/* backported remap_page_range. */
142# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
143# include <asm/tlb.h>
144# ifdef tlb_vma /* probably not good enough... */
145# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
146# endif
147# endif
148
149# ifndef RT_ARCH_AMD64
150/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
151 * the page attributes from PAGE_KERNEL to something else, because there appears
152 * to be a bug in one of the many patches that redhat applied.
153 * It should be safe to do this on less buggy linux kernels too. ;-)
154 */
155# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
156 do { \
157 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
158 change_page_attr(pPages, cPages, prot); \
159 change_page_attr(pPages, cPages, prot); \
160 } while (0)
161# endif
162#endif /* !NO_REDHAT_HACKS */
163
164
165#ifndef MY_DO_MUNMAP
166# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
167#endif
168
169#ifndef MY_CHANGE_PAGE_ATTR
170# ifdef RT_ARCH_AMD64 /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr(). */
171# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
172 do { \
173 change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
174 change_page_attr(pPages, cPages, prot); \
175 } while (0)
176# else
177# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
178# endif
179#endif
180
181
182/** @def ONE_MSEC_IN_JIFFIES
183 * The number of jiffies that make up 1 millisecond. This is only actually used
184 * when HZ is > 1000. */
185#if HZ <= 1000
186# define ONE_MSEC_IN_JIFFIES 0
187#elif !(HZ % 1000)
188# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
189#else
190# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
191# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
192#endif
193
194#ifdef CONFIG_X86_LOCAL_APIC
195
196/* If an NMI occurs while we are inside the world switcher the machine will
197 * crash. The Linux NMI watchdog generates periodic NMIs increasing a counter
198 * which is compared with another counter increased in the timer interrupt
199 * handler. We disable the NMI watchdog.
200 *
201 * - Linux >= 2.6.21: The watchdog is disabled by default on i386 and x86_64.
202 * - Linux < 2.6.21: The watchdog is normally enabled by default on x86_64
203 * and disabled on i386.
204 */
205# if defined(RT_ARCH_AMD64)
206# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
207# define DO_DISABLE_NMI 1
208# endif
209# endif
210
211# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
212extern int nmi_active;
213# define nmi_atomic_read(P) *(P)
214# define nmi_atomic_set(P, V) *(P) = (V)
215# define nmi_atomic_dec(P) nmi_atomic_set(P, 0)
216# else
217# define nmi_atomic_read(P) atomic_read(P)
218# define nmi_atomic_set(P, V) atomic_set(P, V)
219# define nmi_atomic_dec(P) atomic_dec(P)
220# endif
221
222# ifndef X86_FEATURE_ARCH_PERFMON
223# define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
224# endif
225# ifndef MSR_ARCH_PERFMON_EVENTSEL0
226# define MSR_ARCH_PERFMON_EVENTSEL0 0x186
227# endif
228# ifndef ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
229# define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
230# endif
231
232#endif /* CONFIG_X86_LOCAL_APIC */
233
234
235/*******************************************************************************
236* Defined Constants And Macros *
237*******************************************************************************/
238/**
239 * Device extention & session data association structure.
240 */
241static SUPDRVDEVEXT g_DevExt;
242
243/** Timer structure for the GIP update. */
244static struct timer_list g_GipTimer;
245/** Pointer to the page structure for the GIP. */
246struct page *g_pGipPage;
247
248/** Registered devfs device handle. */
249#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
250# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
251static void *g_hDevFsVBoxDrv = NULL;
252# else
253static devfs_handle_t g_hDevFsVBoxDrv = NULL;
254# endif
255#endif
256
257#ifndef CONFIG_VBOXDRV_AS_MISC
258/** Module major number */
259#define DEVICE_MAJOR 234
260/** Saved major device number */
261static int g_iModuleMajor;
262#endif /* !CONFIG_VBOXDRV_AS_MISC */
263
264/** The module name. */
265#define DEVICE_NAME "vboxdrv"
266
267#ifdef RT_ARCH_AMD64
268/**
269 * Memory for the executable memory heap (in IPRT).
270 */
271extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
272__asm__(".section execmemory, \"awx\", @progbits\n\t"
273 ".align 32\n\t"
274 ".globl g_abExecMemory\n"
275 "g_abExecMemory:\n\t"
276 ".zero 1572864\n\t"
277 ".type g_abExecMemory, @object\n\t"
278 ".size g_abExecMemory, 1572864\n\t"
279 ".text\n\t");
280#endif
281
282
283/*******************************************************************************
284* Internal Functions *
285*******************************************************************************/
286static int VBoxDrvLinuxInit(void);
287static void VBoxDrvLinuxUnload(void);
288static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp);
289static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp);
290static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
291static int VBoxDrvLinuxIOCtlSlow(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
292#ifndef USE_NEW_OS_INTERFACE_FOR_MM
293static RTR3PTR VBoxDrvLinuxMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags);
294#endif /* !USE_NEW_OS_INTERFACE_FOR_MM */
295static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt);
296static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt);
297static void VBoxDrvLinuxGipTimer(unsigned long ulUser);
298#ifdef CONFIG_SMP
299static void VBoxDrvLinuxGipTimerPerCpu(unsigned long ulUser);
300static void VBoxDrvLinuxGipResumePerCpu(void *pvUser);
301#endif
302static int VBoxDrvLinuxErr2LinuxErr(int);
303
304
305/** The file_operations structure. */
306static struct file_operations gFileOpsVBoxDrv =
307{
308 owner: THIS_MODULE,
309 open: VBoxDrvLinuxCreate,
310 release: VBoxDrvLinuxClose,
311 ioctl: VBoxDrvLinuxIOCtl,
312};
313
314#ifdef CONFIG_VBOXDRV_AS_MISC
315/** The miscdevice structure. */
316static struct miscdevice gMiscDevice =
317{
318 minor: MISC_DYNAMIC_MINOR,
319 name: DEVICE_NAME,
320 fops: &gFileOpsVBoxDrv,
321# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
322 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
323 devfs_name: DEVICE_NAME,
324# endif
325};
326#endif
327
328#ifdef CONFIG_X86_LOCAL_APIC
329# ifdef DO_DISABLE_NMI
330
331/** Stop AMD NMI watchdog (x86_64 only). */
332static int stop_k7_watchdog(void)
333{
334 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
335 return 1;
336}
337
338/** Stop Intel P4 NMI watchdog (x86_64 only). */
339static int stop_p4_watchdog(void)
340{
341 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
342 wrmsr(MSR_P4_IQ_CCCR1, 0, 0);
343 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
344 return 1;
345}
346
347/** The new method of detecting the event counter */
348static int stop_intel_arch_watchdog(void)
349{
350 unsigned ebx;
351
352 ebx = cpuid_ebx(10);
353 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
354 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
355 return 1;
356}
357
358/** Stop NMI watchdog. */
359static void vbox_stop_apic_nmi_watchdog(void *unused)
360{
361 int stopped = 0;
362
363 /* only support LOCAL and IO APICs for now */
364 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
365 (nmi_watchdog != NMI_IO_APIC))
366 return;
367
368 if (nmi_watchdog == NMI_LOCAL_APIC)
369 {
370 switch (boot_cpu_data.x86_vendor)
371 {
372 case X86_VENDOR_AMD:
373 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
374 return;
375 stopped = stop_k7_watchdog();
376 break;
377 case X86_VENDOR_INTEL:
378 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
379 {
380 stopped = stop_intel_arch_watchdog();
381 break;
382 }
383 stopped = stop_p4_watchdog();
384 break;
385 default:
386 return;
387 }
388 }
389
390 if (stopped)
391 nmi_atomic_dec(&nmi_active);
392}
393
394/** Disable LAPIC NMI watchdog. */
395static void disable_lapic_nmi_watchdog(void)
396{
397 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
398
399 if (nmi_atomic_read(&nmi_active) <= 0)
400 return;
401
402 on_each_cpu(vbox_stop_apic_nmi_watchdog, NULL, 1, 1);
403
404 BUG_ON(nmi_atomic_read(&nmi_active) != 0);
405
406 /* tell do_nmi() and others that we're not active any more */
407 nmi_watchdog = NMI_NONE;
408}
409
410/** Shutdown NMI. */
411static void nmi_cpu_shutdown(void * dummy)
412{
413 unsigned int vERR, vPC;
414
415 vPC = apic_read(APIC_LVTPC);
416
417 if ((GET_APIC_DELIVERY_MODE(vPC) == APIC_MODE_NMI) && !(vPC & APIC_LVT_MASKED))
418 {
419 vERR = apic_read(APIC_LVTERR);
420 apic_write(APIC_LVTERR, vERR | APIC_LVT_MASKED);
421 apic_write(APIC_LVTPC, vPC | APIC_LVT_MASKED);
422 apic_write(APIC_LVTERR, vERR);
423 }
424}
425
426static void nmi_shutdown(void)
427{
428 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
429}
430# endif /* DO_DISABLE_NMI */
431#endif /* CONFIG_X86_LOCAL_APIC */
432
433/**
434 * Initialize module.
435 *
436 * @returns appropriate status code.
437 */
438static int __init VBoxDrvLinuxInit(void)
439{
440 int rc;
441
442 dprintf(("VBoxDrv::ModuleInit\n"));
443
444#ifdef CONFIG_X86_LOCAL_APIC
445 /*
446 * If an NMI occurs while we are inside the world switcher the macine will crash.
447 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
448 * compared with another counter increased in the timer interrupt handler. Therefore
449 * we don't allow to setup an NMI watchdog.
450 */
451# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
452 /*
453 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
454 * the nmi_watchdog variable.
455 */
456# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
457 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
458# ifdef DO_DISABLE_NMI
459 if (nmi_atomic_read(&nmi_active) > 0)
460 {
461 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate the NMI watchdog...\n");
462
463 switch (nmi_watchdog)
464 {
465 case NMI_LOCAL_APIC:
466 disable_lapic_nmi_watchdog();
467 break;
468 case NMI_NONE:
469 nmi_atomic_dec(&nmi_active);
470 break;
471 }
472
473 if (nmi_atomic_read(&nmi_active) == 0)
474 {
475 nmi_shutdown();
476 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
477 }
478 else
479 printk(KERN_INFO DEVICE_NAME ": Failed!\n");
480 }
481# endif /* DO_DISABLE_NMI */
482
483 /*
484 * Permanent IO_APIC mode active? No way to handle this!
485 */
486 if (nmi_watchdog == NMI_IO_APIC)
487 {
488 printk(KERN_ERR DEVICE_NAME
489 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
490 DEVICE_NAME
491 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
492 DEVICE_NAME
493 ": command line.\n");
494 return -EINVAL;
495 }
496
497 /*
498 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
499 */
500 nmi_atomic_set(&nmi_active, -1);
501 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate the NMI watchdog permanently...\n");
502
503 /*
504 * Now fall through and see if it actually was enabled before. If so, fail
505 * as we cannot deactivate it cleanly from here.
506 */
507# else /* < 2.6.19 */
508 /*
509 * Older 2.6 kernels: nmi_watchdog is not initalized by default
510 */
511 if (nmi_watchdog != NMI_NONE)
512 goto nmi_activated;
513# endif
514# endif /* >= 2.6.0 */
515
516 /*
517 * Second test: Interrupt generated by performance counter not masked and can
518 * generate an NMI. Works also with Linux 2.4.
519 */
520 {
521 unsigned int v, ver, maxlvt;
522
523 v = apic_read(APIC_LVR);
524 ver = GET_APIC_VERSION(v);
525 /* 82489DXs do not report # of LVT entries. */
526 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
527 if (maxlvt >= 4)
528 {
529 /* Read status of performance counter IRQ vector */
530 v = apic_read(APIC_LVTPC);
531
532 /* performance counter generates NMI and is not masked? */
533 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
534 {
535# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
536 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
537 printk(KERN_ERR DEVICE_NAME
538 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
539 DEVICE_NAME
540 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
541 return -EINVAL;
542# else /* < 2.6.19 */
543# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
544nmi_activated:
545# endif
546 printk(KERN_ERR DEVICE_NAME
547 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
548 DEVICE_NAME
549 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
550 return -EINVAL;
551# endif /* >= 2.6.19 */
552 }
553 }
554 }
555# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
556 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
557# endif /* >= 2.6.19 */
558#endif /* CONFIG_X86_LOCAL_APIC */
559
560#ifdef CONFIG_VBOXDRV_AS_MISC
561 rc = misc_register(&gMiscDevice);
562 if (rc)
563 {
564 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
565 return rc;
566 }
567#else /* !CONFIG_VBOXDRV_AS_MISC */
568 /*
569 * Register character device.
570 */
571 g_iModuleMajor = DEVICE_MAJOR;
572 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
573 if (rc < 0)
574 {
575 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
576 return rc;
577 }
578
579 /*
580 * Save returned module major number
581 */
582 if (DEVICE_MAJOR != 0)
583 g_iModuleMajor = DEVICE_MAJOR;
584 else
585 g_iModuleMajor = rc;
586 rc = 0;
587
588#ifdef CONFIG_DEVFS_FS
589 /*
590 * Register a device entry
591 */
592 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
593 if (g_hDevFsVBoxDrv == NULL)
594 {
595 dprintf(("devfs_register failed!\n"));
596 rc = -EINVAL;
597 }
598#endif
599#endif /* !CONFIG_VBOXDRV_AS_MISC */
600 if (!rc)
601 {
602 /*
603 * Initialize the runtime.
604 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
605 */
606 rc = RTR0Init(0);
607 if (RT_SUCCESS(rc))
608 {
609#ifdef RT_ARCH_AMD64
610 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
611#endif
612 /*
613 * Initialize the device extension.
614 */
615 if (RT_SUCCESS(rc))
616 rc = supdrvInitDevExt(&g_DevExt);
617 if (!rc)
618 {
619 /*
620 * Create the GIP page.
621 */
622 rc = VBoxDrvLinuxInitGip(&g_DevExt);
623 if (!rc)
624 {
625 dprintf(("VBoxDrv::ModuleInit returning %#x\n", rc));
626 return rc;
627 }
628
629 supdrvDeleteDevExt(&g_DevExt);
630 }
631 else
632 rc = -EINVAL;
633 RTR0Term();
634 }
635 else
636 rc = -EINVAL;
637
638 /*
639 * Failed, cleanup and return the error code.
640 */
641#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
642 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
643#endif
644 }
645#ifdef CONFIG_VBOXDRV_AS_MISC
646 misc_deregister(&gMiscDevice);
647 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
648#else
649 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
650 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
651#endif
652 return rc;
653}
654
655
656/**
657 * Unload the module.
658 */
659static void __exit VBoxDrvLinuxUnload(void)
660{
661 int rc;
662 dprintf(("VBoxDrvLinuxUnload\n"));
663
664 /*
665 * I Don't think it's possible to unload a driver which processes have
666 * opened, at least we'll blindly assume that here.
667 */
668#ifdef CONFIG_VBOXDRV_AS_MISC
669 rc = misc_deregister(&gMiscDevice);
670 if (rc < 0)
671 {
672 dprintf(("misc_deregister failed with rc=%#x\n", rc));
673 }
674#else /* !CONFIG_VBOXDRV_AS_MISC */
675#ifdef CONFIG_DEVFS_FS
676 /*
677 * Unregister a device entry
678 */
679 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
680#endif // devfs
681 rc = VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
682 if (rc < 0)
683 {
684 dprintf(("unregister_chrdev failed with rc=%#x (major:%d)\n", rc, g_iModuleMajor));
685 }
686#endif /* !CONFIG_VBOXDRV_AS_MISC */
687
688 /*
689 * Destroy GIP, delete the device extension and terminate IPRT.
690 */
691 VBoxDrvLinuxTermGip(&g_DevExt);
692 supdrvDeleteDevExt(&g_DevExt);
693 RTR0Term();
694}
695
696
697/**
698 * Device open. Called on open /dev/vboxdrv
699 *
700 * @param pInode Pointer to inode info structure.
701 * @param pFilp Associated file pointer.
702 */
703static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp)
704{
705 int rc;
706 PSUPDRVSESSION pSession;
707 dprintf(("VBoxDrvLinuxCreate: pFilp=%p\n", pFilp));
708
709 /*
710 * Call common code for the rest.
711 */
712 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
713 if (!rc)
714 {
715 pSession->Uid = current->euid;
716 pSession->Gid = current->egid;
717 pSession->Process = RTProcSelf();
718 pSession->R0Process = RTR0ProcHandleSelf();
719 }
720
721 dprintf(("VBoxDrvLinuxCreate: g_DevExt=%p pSession=%p rc=%d\n", &g_DevExt, pSession, rc));
722 pFilp->private_data = pSession;
723
724 return VBoxDrvLinuxErr2LinuxErr(rc);
725}
726
727
728/**
729 * Close device.
730 *
731 * @param pInode Pointer to inode info structure.
732 * @param pFilp Associated file pointer.
733 */
734static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp)
735{
736 dprintf(("VBoxDrvLinuxClose: pFilp=%p private_data=%p\n", pFilp, pFilp->private_data));
737 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
738 pFilp->private_data = NULL;
739 return 0;
740}
741
742
743/**
744 * Device I/O Control entry point.
745 *
746 * @param pInode Pointer to inode info structure.
747 * @param pFilp Associated file pointer.
748 * @param uCmd The function specified to ioctl().
749 * @param ulArg The argument specified to ioctl().
750 */
751static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
752{
753#ifdef VBOX_WITHOUT_IDT_PATCHING
754 /*
755 * Deal with the two high-speed IOCtl that takes it's arguments from
756 * the session and iCmd, and only returns a VBox status code.
757 */
758 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
759 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
760 || uCmd == SUP_IOCTL_FAST_DO_NOP))
761 return supdrvIOCtlFast(iCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
762#endif
763 return VBoxDrvLinuxIOCtlSlow(pInode, pFilp, uCmd, ulArg);
764}
765
766
767/**
768 * Device I/O Control entry point.
769 *
770 * @param pInode Pointer to inode info structure.
771 * @param pFilp Associated file pointer.
772 * @param uCmd The function specified to ioctl().
773 * @param ulArg The argument specified to ioctl().
774 */
775static int VBoxDrvLinuxIOCtlSlow(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
776{
777 int rc;
778 SUPREQHDR Hdr;
779 PSUPREQHDR pHdr;
780 uint32_t cbBuf;
781
782 dprintf2(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p\n", pFilp, uCmd, (void *)ulArg));
783
784 /*
785 * Read the header.
786 */
787 if (RT_UNLIKELY(copy_from_user(&Hdr, (void *)ulArg, sizeof(Hdr))))
788 {
789 dprintf(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx,) failed; uCmd=%#x.\n", ulArg, uCmd));
790 return -EFAULT;
791 }
792 if (RT_UNLIKELY((Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC))
793 {
794 dprintf(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd));
795 return -EINVAL;
796 }
797
798 /*
799 * Buffer the request.
800 */
801 cbBuf = RT_MAX(Hdr.cbIn, Hdr.cbOut);
802 if (RT_UNLIKELY(cbBuf > _1M*16))
803 {
804 dprintf(("VBoxDrvLinuxIOCtl: too big cbBuf=%#x; uCmd=%#x\n", cbBuf, uCmd));
805 return -E2BIG;
806 }
807 if (RT_UNLIKELY(cbBuf != _IOC_SIZE(uCmd) && _IOC_SIZE(uCmd)))
808 {
809 dprintf(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x.\n", cbBuf, _IOC_SIZE(uCmd), uCmd));
810 return -EINVAL;
811 }
812 pHdr = RTMemAlloc(cbBuf);
813 if (RT_UNLIKELY(!pHdr))
814 {
815 OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x.\n", cbBuf, uCmd));
816 return -ENOMEM;
817 }
818 if (RT_UNLIKELY(copy_from_user(pHdr, (void *)ulArg, Hdr.cbIn)))
819 {
820 dprintf(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx, %#x) failed; uCmd=%#x.\n", ulArg, Hdr.cbIn, uCmd));
821 RTMemFree(pHdr);
822 return -EFAULT;
823 }
824
825 /*
826 * Process the IOCtl.
827 */
828 rc = supdrvIOCtl(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data, pHdr);
829
830 /*
831 * Copy ioctl data and output buffer back to user space.
832 */
833 if (RT_LIKELY(!rc))
834 {
835 uint32_t cbOut = pHdr->cbOut;
836 if (RT_UNLIKELY(cbOut > cbBuf))
837 {
838 OSDBGPRINT(("VBoxDrvLinuxIOCtl: too much output! %#x > %#x; uCmd=%#x!\n", cbOut, cbBuf, uCmd));
839 cbOut = cbBuf;
840 }
841 if (RT_UNLIKELY(copy_to_user((void *)ulArg, pHdr, cbOut)))
842 {
843 /* this is really bad! */
844 OSDBGPRINT(("VBoxDrvLinuxIOCtl: copy_to_user(%#lx,,%#x); uCmd=%#x!\n", ulArg, cbOut, uCmd));
845 rc = -EFAULT;
846 }
847 }
848 else
849 {
850 dprintf(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc));
851 rc = -EINVAL;
852 }
853 RTMemFree(pHdr);
854
855 dprintf2(("VBoxDrvLinuxIOCtl: returns %d\n", rc));
856 return rc;
857}
858
859
860/**
861 * Initializes any OS specific object creator fields.
862 */
863void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
864{
865 NOREF(pObj);
866 NOREF(pSession);
867}
868
869
870/**
871 * Checks if the session can access the object.
872 *
873 * @returns true if a decision has been made.
874 * @returns false if the default access policy should be applied.
875 *
876 * @param pObj The object in question.
877 * @param pSession The session wanting to access the object.
878 * @param pszObjName The object name, can be NULL.
879 * @param prc Where to store the result when returning true.
880 */
881bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
882{
883 NOREF(pObj);
884 NOREF(pSession);
885 NOREF(pszObjName);
886 NOREF(prc);
887 return false;
888}
889
890
891#ifndef USE_NEW_OS_INTERFACE_FOR_MM
892
893/**
894 * Compute order. Some functions allocate 2^order pages.
895 *
896 * @returns order.
897 * @param cPages Number of pages.
898 */
899static int VBoxDrvOrder(unsigned long cPages)
900{
901 int iOrder;
902 unsigned long cTmp;
903
904 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
905 ;
906 if (cPages & ~(1 << iOrder))
907 ++iOrder;
908
909 return iOrder;
910}
911
912
913/**
914 * OS Specific code for locking down memory.
915 *
916 * @returns 0 on success.
917 * @returns SUPDRV_ERR_* on failure.
918 * @param pMem Pointer to memory.
919 * This is not linked in anywhere.
920 * @param paPages Array which should be filled with the address of the physical pages.
921 *
922 * @remark See sgl_map_user_pages() for an example of an similar function.
923 */
924int VBOXCALL supdrvOSLockMemOne(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
925{
926 int rc;
927 struct page **papPages;
928 unsigned iPage;
929 unsigned cPages = pMem->cb >> PAGE_SHIFT;
930 unsigned long pv = (unsigned long)pMem->pvR3;
931 struct vm_area_struct **papVMAs;
932
933 /*
934 * Allocate page pointer array.
935 */
936 papPages = vmalloc(cPages * sizeof(*papPages));
937 if (!papPages)
938 return SUPDRV_ERR_NO_MEMORY;
939
940 /*
941 * Allocate the VMA pointer array.
942 */
943 papVMAs = vmalloc(cPages * sizeof(*papVMAs));
944 if (!papVMAs)
945 return SUPDRV_ERR_NO_MEMORY;
946
947 /*
948 * Get user pages.
949 */
950 down_read(&current->mm->mmap_sem);
951 rc = get_user_pages(current, /* Task for fault acounting. */
952 current->mm, /* Whose pages. */
953 (unsigned long)pv, /* Where from. */
954 cPages, /* How many pages. */
955 1, /* Write to memory. */
956 0, /* force. */
957 papPages, /* Page array. */
958 papVMAs); /* vmas */
959 if (rc != cPages)
960 {
961 up_read(&current->mm->mmap_sem);
962 dprintf(("supdrvOSLockMemOne: get_user_pages failed. rc=%d\n", rc));
963 return SUPDRV_ERR_LOCK_FAILED;
964 }
965
966 for (iPage = 0; iPage < cPages; iPage++)
967 flush_dcache_page(papPages[iPage]);
968 up_read(&current->mm->mmap_sem);
969
970 pMem->u.locked.papPages = papPages;
971 pMem->u.locked.cPages = cPages;
972
973 /*
974 * Get addresses, protect against fork()
975 */
976 for (iPage = 0; iPage < cPages; iPage++)
977 {
978 paPages[iPage].Phys = page_to_phys(papPages[iPage]);
979 paPages[iPage].uReserved = 0;
980 papVMAs[iPage]->vm_flags |= VM_DONTCOPY;
981 }
982
983 vfree(papVMAs);
984
985 dprintf2(("supdrvOSLockMemOne: pvR3=%p cb=%d papPages=%p\n",
986 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
987 return 0;
988}
989
990
991/**
992 * Unlocks the memory pointed to by pv.
993 *
994 * @param pMem Pointer to memory to unlock.
995 *
996 * @remark See sgl_unmap_user_pages() for an example of an similar function.
997 */
998void VBOXCALL supdrvOSUnlockMemOne(PSUPDRVMEMREF pMem)
999{
1000 unsigned iPage;
1001 dprintf2(("supdrvOSUnlockMemOne: pvR3=%p cb=%d papPages=%p\n",
1002 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
1003
1004 /*
1005 * Loop thru the pages and release them.
1006 */
1007 for (iPage = 0; iPage < pMem->u.locked.cPages; iPage++)
1008 {
1009 if (!PageReserved(pMem->u.locked.papPages[iPage]))
1010 SetPageDirty(pMem->u.locked.papPages[iPage]);
1011 page_cache_release(pMem->u.locked.papPages[iPage]);
1012 }
1013
1014 /* free the page array */
1015 vfree(pMem->u.locked.papPages);
1016 pMem->u.locked.cPages = 0;
1017}
1018
1019
1020/**
1021 * OS Specific code for allocating page aligned memory with continuous fixed
1022 * physical paged backing.
1023 *
1024 * @returns 0 on success.
1025 * @returns SUPDRV_ERR_* on failure.
1026 * @param pMem Memory reference record of the memory to be allocated.
1027 * (This is not linked in anywhere.)
1028 * @param ppvR0 Where to store the virtual address of the ring-0 mapping. (optional)
1029 * @param ppvR3 Where to store the virtual address of the ring-3 mapping.
1030 * @param pHCPhys Where to store the physical address.
1031 */
1032int VBOXCALL supdrvOSContAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1033{
1034 struct page *paPages;
1035 unsigned iPage;
1036 unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1037 unsigned cPages = cbAligned >> PAGE_SHIFT;
1038 unsigned cOrder = VBoxDrvOrder(cPages);
1039 unsigned long ulAddr;
1040 dma_addr_t HCPhys;
1041 int rc = 0;
1042 pgprot_t pgFlags;
1043 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1044
1045 Assert(ppvR3);
1046 Assert(pHCPhys);
1047
1048 /*
1049 * Allocate page pointer array.
1050 */
1051#ifdef RT_ARCH_AMD64 /** @todo check out if there is a correct way of getting memory below 4GB (physically). */
1052 paPages = alloc_pages(GFP_DMA, cOrder);
1053#else
1054 paPages = alloc_pages(GFP_USER, cOrder);
1055#endif
1056 if (!paPages)
1057 return SUPDRV_ERR_NO_MEMORY;
1058
1059 /*
1060 * Lock the pages.
1061 */
1062 for (iPage = 0; iPage < cPages; iPage++)
1063 {
1064 SetPageReserved(&paPages[iPage]);
1065 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1066 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1067#ifdef DEBUG
1068 if (iPage + 1 < cPages && (page_to_phys((&paPages[iPage])) + 0x1000) != page_to_phys((&paPages[iPage + 1])))
1069 {
1070 dprintf(("supdrvOSContAllocOne: Pages are not continuous!!!! iPage=%d phys=%llx physnext=%llx\n",
1071 iPage, (long long)page_to_phys((&paPages[iPage])), (long long)page_to_phys((&paPages[iPage + 1]))));
1072 BUG();
1073 }
1074#endif
1075 }
1076 HCPhys = page_to_phys(paPages);
1077
1078 /*
1079 * Allocate user space mapping and put the physical pages into it.
1080 */
1081 down_write(&current->mm->mmap_sem);
1082 ulAddr = do_mmap(NULL, 0, cbAligned, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0);
1083 if (!(ulAddr & ~PAGE_MASK))
1084 {
1085#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1086 int rc2 = remap_page_range(ulAddr, HCPhys, cbAligned, pgFlags);
1087#else
1088 int rc2 = 0;
1089 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1090 if (vma)
1091#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1092 rc2 = remap_page_range(vma, ulAddr, HCPhys, cbAligned, pgFlags);
1093#else
1094 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, cbAligned, pgFlags);
1095#endif
1096 else
1097 {
1098 rc = SUPDRV_ERR_NO_MEMORY;
1099 dprintf(("supdrvOSContAllocOne: no vma found for ulAddr=%#lx!\n", ulAddr));
1100 }
1101#endif
1102 if (rc2)
1103 {
1104 rc = SUPDRV_ERR_NO_MEMORY;
1105 dprintf(("supdrvOSContAllocOne: remap_page_range failed rc2=%d\n", rc2));
1106 }
1107 }
1108 else
1109 {
1110 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1111 rc = SUPDRV_ERR_NO_MEMORY;
1112 }
1113 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1114
1115 /*
1116 * Success?
1117 */
1118 if (!rc)
1119 {
1120 *pHCPhys = HCPhys;
1121 *ppvR3 = ulAddr;
1122 if (ppvR0)
1123 *ppvR0 = (void *)ulAddr;
1124 pMem->pvR3 = ulAddr;
1125 pMem->pvR0 = NULL;
1126 pMem->u.cont.paPages = paPages;
1127 pMem->u.cont.cPages = cPages;
1128 pMem->cb = cbAligned;
1129
1130 dprintf2(("supdrvOSContAllocOne: pvR0=%p pvR3=%p cb=%d paPages=%p *pHCPhys=%lx *ppvR0=*ppvR3=%p\n",
1131 pMem->pvR0, pMem->pvR3, pMem->cb, paPages, (unsigned long)*pHCPhys, *ppvR3));
1132 global_flush_tlb();
1133 return 0;
1134 }
1135
1136 /*
1137 * Failure, cleanup and be gone.
1138 */
1139 down_write(&current->mm->mmap_sem);
1140 if (ulAddr & ~PAGE_MASK)
1141 MY_DO_MUNMAP(current->mm, ulAddr, pMem->cb);
1142 for (iPage = 0; iPage < cPages; iPage++)
1143 {
1144 ClearPageReserved(&paPages[iPage]);
1145 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1146 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, PAGE_KERNEL);
1147 }
1148 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1149 __free_pages(paPages, cOrder);
1150
1151 global_flush_tlb();
1152 return rc;
1153}
1154
1155
1156/**
1157 * Frees contiguous memory.
1158 *
1159 * @param pMem Memory reference record of the memory to be freed.
1160 */
1161void VBOXCALL supdrvOSContFreeOne(PSUPDRVMEMREF pMem)
1162{
1163 unsigned iPage;
1164
1165 dprintf2(("supdrvOSContFreeOne: pvR0=%p pvR3=%p cb=%d paPages=%p\n",
1166 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.cont.paPages));
1167
1168 /*
1169 * do_exit() destroys the mm before closing files.
1170 * I really hope it cleans up our stuff properly...
1171 */
1172 if (current->mm)
1173 {
1174 down_write(&current->mm->mmap_sem);
1175 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, pMem->cb);
1176 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1177 }
1178
1179 /*
1180 * Change page attributes freeing the pages.
1181 */
1182 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1183 {
1184 ClearPageReserved(&pMem->u.cont.paPages[iPage]);
1185 if (!PageHighMem(&pMem->u.cont.paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1186 MY_CHANGE_PAGE_ATTR(&pMem->u.cont.paPages[iPage], 1, PAGE_KERNEL);
1187 }
1188 __free_pages(pMem->u.cont.paPages, VBoxDrvOrder(pMem->u.cont.cPages));
1189
1190 pMem->u.cont.cPages = 0;
1191}
1192
1193
1194/**
1195 * Allocates memory which mapped into both kernel and user space.
1196 * The returned memory is page aligned and so is the allocation.
1197 *
1198 * @returns 0 on success.
1199 * @returns SUPDRV_ERR_* on failure.
1200 * @param pMem Memory reference record of the memory to be allocated.
1201 * (This is not linked in anywhere.)
1202 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1203 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1204 */
1205int VBOXCALL supdrvOSMemAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1206{
1207 const unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1208 const unsigned cPages = cbAligned >> PAGE_SHIFT;
1209#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1210 unsigned cOrder = VBoxDrvOrder(cPages);
1211 struct page *paPages;
1212#endif
1213 struct page **papPages;
1214 unsigned iPage;
1215 pgprot_t pgFlags;
1216 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1217
1218 /*
1219 * Allocate array with page pointers.
1220 */
1221 pMem->u.mem.cPages = 0;
1222 pMem->u.mem.papPages = papPages = kmalloc(sizeof(papPages[0]) * cPages, GFP_KERNEL);
1223 if (!papPages)
1224 return SUPDRV_ERR_NO_MEMORY;
1225
1226 /*
1227 * Allocate the pages.
1228 */
1229#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1230 for (iPage = 0; iPage < cPages; iPage++)
1231 {
1232 papPages[iPage] = alloc_page(GFP_HIGHUSER);
1233 if (!papPages[iPage])
1234 {
1235 pMem->u.mem.cPages = iPage;
1236 supdrvOSMemFreeOne(pMem);
1237 return SUPDRV_ERR_NO_MEMORY;
1238 }
1239 }
1240
1241#else /* < 2.4.22 */
1242 paPages = alloc_pages(GFP_USER, cOrder);
1243 if (!paPages)
1244 {
1245 supdrvOSMemFreeOne(pMem);
1246 return SUPDRV_ERR_NO_MEMORY;
1247 }
1248 for (iPage = 0; iPage < cPages; iPage++)
1249 {
1250 papPages[iPage] = &paPages[iPage];
1251 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1252 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1253 if (PageHighMem(papPages[iPage]))
1254 BUG();
1255 }
1256#endif
1257 pMem->u.mem.cPages = cPages;
1258
1259 /*
1260 * Reserve the pages.
1261 */
1262 for (iPage = 0; iPage < cPages; iPage++)
1263 SetPageReserved(papPages[iPage]);
1264
1265 /*
1266 * Create the Ring-0 mapping.
1267 */
1268 if (ppvR0)
1269 {
1270#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1271# ifdef VM_MAP
1272 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_MAP, pgFlags);
1273# else
1274 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_ALLOC, pgFlags);
1275# endif
1276#else
1277 *ppvR0 = pMem->pvR0 = phys_to_virt(page_to_phys(papPages[0]));
1278#endif
1279 }
1280 if (pMem->pvR0 || !ppvR0)
1281 {
1282 /*
1283 * Create the ring3 mapping.
1284 */
1285 if (ppvR3)
1286 *ppvR3 = pMem->pvR3 = VBoxDrvLinuxMapUser(papPages, cPages, PROT_READ | PROT_WRITE | PROT_EXEC, pgFlags);
1287 if (pMem->pvR3 || !ppvR3)
1288 return 0;
1289 dprintf(("supdrvOSMemAllocOne: failed to map into r3! cPages=%u\n", cPages));
1290 }
1291 else
1292 dprintf(("supdrvOSMemAllocOne: failed to map into r0! cPages=%u\n", cPages));
1293
1294 supdrvOSMemFreeOne(pMem);
1295 return SUPDRV_ERR_NO_MEMORY;
1296}
1297
1298
1299/**
1300 * Get the physical addresses of the pages in the allocation.
1301 * This is called while inside bundle the spinlock.
1302 *
1303 * @param pMem Memory reference record of the memory.
1304 * @param paPages Where to store the page addresses.
1305 */
1306void VBOXCALL supdrvOSMemGetPages(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
1307{
1308 unsigned iPage;
1309 for (iPage = 0; iPage < pMem->u.mem.cPages; iPage++)
1310 {
1311 paPages[iPage].Phys = page_to_phys(pMem->u.mem.papPages[iPage]);
1312 paPages[iPage].uReserved = 0;
1313 }
1314}
1315
1316
1317/**
1318 * Frees memory allocated by supdrvOSMemAllocOne().
1319 *
1320 * @param pMem Memory reference record of the memory to be free.
1321 */
1322void VBOXCALL supdrvOSMemFreeOne(PSUPDRVMEMREF pMem)
1323{
1324 dprintf2(("supdrvOSMemFreeOne: pvR0=%p pvR3=%p cb=%d cPages=%d papPages=%p\n",
1325 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.mem.cPages, pMem->u.mem.papPages));
1326
1327 /*
1328 * Unmap the user mapping (if any).
1329 * do_exit() destroys the mm before closing files.
1330 */
1331 if (pMem->pvR3 && current->mm)
1332 {
1333 down_write(&current->mm->mmap_sem);
1334 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, RT_ALIGN(pMem->cb, PAGE_SIZE));
1335 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1336 }
1337 pMem->pvR3 = NIL_RTR3PTR;
1338
1339 /*
1340 * Unmap the kernel mapping (if any).
1341 */
1342 if (pMem->pvR0)
1343 {
1344#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1345 vunmap(pMem->pvR0);
1346#endif
1347 pMem->pvR0 = NULL;
1348 }
1349
1350 /*
1351 * Free the physical pages.
1352 */
1353 if (pMem->u.mem.papPages)
1354 {
1355 struct page **papPages = pMem->u.mem.papPages;
1356 const unsigned cPages = pMem->u.mem.cPages;
1357 unsigned iPage;
1358
1359 /* Restore the page flags. */
1360 for (iPage = 0; iPage < cPages; iPage++)
1361 {
1362 ClearPageReserved(papPages[iPage]);
1363#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1364 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1365 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, PAGE_KERNEL);
1366#endif
1367 }
1368
1369 /* Free the pages. */
1370#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1371 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1372 __free_page(papPages[iPage]);
1373#else
1374 if (cPages > 0)
1375 __free_pages(papPages[0], VBoxDrvOrder(cPages));
1376#endif
1377 /* Free the page pointer array. */
1378 kfree(papPages);
1379 pMem->u.mem.papPages = NULL;
1380 }
1381 pMem->u.mem.cPages = 0;
1382}
1383
1384
1385/**
1386 * Maps a range of pages into user space.
1387 *
1388 * @returns Pointer to the user space mapping on success.
1389 * @returns NULL on failure.
1390 * @param papPages Array of the pages to map.
1391 * @param cPages Number of pages to map.
1392 * @param fProt The mapping protection.
1393 * @param pgFlags The page level protection.
1394 */
1395static RTR3PTR VBoxDrvLinuxMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags)
1396{
1397 int rc = SUPDRV_ERR_NO_MEMORY;
1398 unsigned long ulAddr;
1399
1400 /*
1401 * Allocate user space mapping.
1402 */
1403 down_write(&current->mm->mmap_sem);
1404 ulAddr = do_mmap(NULL, 0, cPages * PAGE_SIZE, fProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1405 if (!(ulAddr & ~PAGE_MASK))
1406 {
1407 /*
1408 * Map page by page into the mmap area.
1409 * This is generic, paranoid and not very efficient.
1410 */
1411 int rc = 0;
1412 unsigned long ulAddrCur = ulAddr;
1413 unsigned iPage;
1414 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1415 {
1416#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1417 struct vm_area_struct *vma = find_vma(current->mm, ulAddrCur);
1418 if (!vma)
1419 break;
1420#endif
1421
1422#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1423 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(papPages[iPage]), PAGE_SIZE, pgFlags);
1424#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1425 rc = remap_page_range(vma, ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1426#else /* 2.4 */
1427 rc = remap_page_range(ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1428#endif
1429 if (rc)
1430 break;
1431 }
1432
1433 /*
1434 * Successful?
1435 */
1436 if (iPage >= cPages)
1437 {
1438 up_write(&current->mm->mmap_sem);
1439 return ulAddr;
1440 }
1441
1442 /* no, cleanup! */
1443 if (rc)
1444 dprintf(("VBoxDrvLinuxMapUser: remap_[page|pfn]_range failed! rc=%d\n", rc));
1445 else
1446 dprintf(("VBoxDrvLinuxMapUser: find_vma failed!\n"));
1447
1448 MY_DO_MUNMAP(current->mm, ulAddr, cPages << PAGE_SHIFT);
1449 }
1450 else
1451 {
1452 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1453 rc = SUPDRV_ERR_NO_MEMORY;
1454 }
1455 up_write(&current->mm->mmap_sem);
1456
1457 return NIL_RTR3PTR;
1458}
1459
1460#endif /* !USE_NEW_OS_INTERFACE_FOR_MM */
1461
1462
1463/**
1464 * Initializes the GIP.
1465 *
1466 * @returns negative errno.
1467 * @param pDevExt Instance data. GIP stuff may be updated.
1468 */
1469static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt)
1470{
1471 struct page *pPage;
1472 dma_addr_t HCPhys;
1473 PSUPGLOBALINFOPAGE pGip;
1474#ifdef CONFIG_SMP
1475 unsigned i;
1476#endif
1477 dprintf(("VBoxDrvLinuxInitGip:\n"));
1478
1479 /*
1480 * Allocate the page.
1481 */
1482 pPage = alloc_pages(GFP_USER, 0);
1483 if (!pPage)
1484 {
1485 dprintf(("VBoxDrvLinuxInitGip: failed to allocate the GIP page\n"));
1486 return -ENOMEM;
1487 }
1488
1489 /*
1490 * Lock the page.
1491 */
1492 SetPageReserved(pPage);
1493 g_pGipPage = pPage;
1494
1495 /*
1496 * Call common initialization routine.
1497 */
1498 HCPhys = page_to_phys(pPage);
1499 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1500 pDevExt->ulLastJiffies = jiffies;
1501#ifdef TICK_NSEC
1502 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1503 dprintf(("VBoxDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1504 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1505#else
1506 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * (1000000 / HZ);
1507 dprintf(("VBoxDrvInitGIP: TICK_NSEC=%d HZ=%d jiffies=%ld now=%lld\n",
1508 (int)(1000000 / HZ), HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1509#endif
1510 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1511 HZ <= 1000 ? HZ : 1000);
1512
1513 /*
1514 * Initialize the timer.
1515 */
1516 init_timer(&g_GipTimer);
1517 g_GipTimer.data = (unsigned long)pDevExt;
1518 g_GipTimer.function = VBoxDrvLinuxGipTimer;
1519 g_GipTimer.expires = jiffies;
1520#ifdef CONFIG_SMP
1521 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1522 {
1523 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1524 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1525 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1526 init_timer(&pDevExt->aCPUs[i].Timer);
1527 pDevExt->aCPUs[i].Timer.data = i;
1528 pDevExt->aCPUs[i].Timer.function = VBoxDrvLinuxGipTimerPerCpu;
1529 pDevExt->aCPUs[i].Timer.expires = jiffies;
1530 }
1531#endif
1532
1533 return 0;
1534}
1535
1536
1537/**
1538 * Terminates the GIP.
1539 *
1540 * @returns negative errno.
1541 * @param pDevExt Instance data. GIP stuff may be updated.
1542 */
1543static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt)
1544{
1545 struct page *pPage;
1546 PSUPGLOBALINFOPAGE pGip;
1547#ifdef CONFIG_SMP
1548 unsigned i;
1549#endif
1550 dprintf(("VBoxDrvLinuxTermGip:\n"));
1551
1552 /*
1553 * Delete the timer if it's pending.
1554 */
1555 if (timer_pending(&g_GipTimer))
1556 del_timer_sync(&g_GipTimer);
1557#ifdef CONFIG_SMP
1558 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1559 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1560 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1561#endif
1562
1563 /*
1564 * Uninitialize the content.
1565 */
1566 pGip = pDevExt->pGip;
1567 pDevExt->pGip = NULL;
1568 if (pGip)
1569 supdrvGipTerm(pGip);
1570
1571 /*
1572 * Free the page.
1573 */
1574 pPage = g_pGipPage;
1575 g_pGipPage = NULL;
1576 if (pPage)
1577 {
1578 ClearPageReserved(pPage);
1579 __free_pages(pPage, 0);
1580 }
1581
1582 return 0;
1583}
1584
1585/**
1586 * Timer callback function.
1587 *
1588 * In ASYNC TSC mode this is called on the primary CPU, and we're
1589 * assuming that the CPU remains online.
1590 *
1591 * @param ulUser The device extension pointer.
1592 */
1593static void VBoxDrvLinuxGipTimer(unsigned long ulUser)
1594{
1595 PSUPDRVDEVEXT pDevExt;
1596 PSUPGLOBALINFOPAGE pGip;
1597 unsigned long ulNow;
1598 unsigned long ulDiff;
1599 uint64_t u64Monotime;
1600 unsigned long SavedFlags;
1601
1602 local_irq_save(SavedFlags);
1603
1604 pDevExt = (PSUPDRVDEVEXT)ulUser;
1605 pGip = pDevExt->pGip;
1606 ulNow = jiffies;
1607
1608#ifdef CONFIG_SMP
1609 if (pGip && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1610 {
1611 uint8_t iCPU = ASMGetApicId();
1612 ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1613 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1614#ifdef TICK_NSEC
1615 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1616#else
1617 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1618#endif
1619 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1620 }
1621 else
1622#endif /* CONFIG_SMP */
1623 {
1624 ulDiff = ulNow - pDevExt->ulLastJiffies;
1625 pDevExt->ulLastJiffies = ulNow;
1626#ifdef TICK_NSEC
1627 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1628#else
1629 u64Monotime = pDevExt->u64LastMonotime + ulDiff * (1000000 / HZ);
1630#endif
1631 pDevExt->u64LastMonotime = u64Monotime;
1632 }
1633 if (RT_LIKELY(pGip))
1634 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1635 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1636 mod_timer(&g_GipTimer, ulNow + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1637
1638 local_irq_restore(SavedFlags);
1639}
1640
1641
1642#ifdef CONFIG_SMP
1643/**
1644 * Timer callback function for the other CPUs.
1645 *
1646 * @param iTimerCPU The APIC ID of this timer.
1647 */
1648static void VBoxDrvLinuxGipTimerPerCpu(unsigned long iTimerCPU)
1649{
1650 PSUPDRVDEVEXT pDevExt;
1651 PSUPGLOBALINFOPAGE pGip;
1652 uint8_t iCPU;
1653 uint64_t u64Monotime;
1654 unsigned long SavedFlags;
1655
1656 local_irq_save(SavedFlags);
1657
1658 pDevExt = &g_DevExt;
1659 pGip = pDevExt->pGip;
1660 iCPU = ASMGetApicId();
1661
1662 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1663 {
1664 if (RT_LIKELY(iTimerCPU == iCPU))
1665 {
1666 unsigned long ulNow = jiffies;
1667 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1668 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1669#ifdef TICK_NSEC
1670 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1671#else
1672 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1673#endif
1674 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1675 if (RT_LIKELY(pGip))
1676 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1677 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1678 mod_timer(&pDevExt->aCPUs[iCPU].Timer, ulNow + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1679 }
1680 else
1681 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d !=? timer-cpuid=%d)\n",
1682 iCPU, iTimerCPU, smp_processor_id(), pDevExt->aCPUs[iTimerCPU].iSmpProcessorId);
1683 }
1684 else
1685 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%lu cpuid=%d\n",
1686 iCPU, (unsigned long)RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1687
1688 local_irq_restore(SavedFlags);
1689}
1690#endif /* CONFIG_SMP */
1691
1692
1693/**
1694 * Maps the GIP into user space.
1695 *
1696 * @returns negative errno.
1697 * @param pDevExt Instance data.
1698 */
1699int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE *ppGip)
1700{
1701 int rc = 0;
1702 unsigned long ulAddr;
1703 unsigned long HCPhys = pDevExt->HCPhysGip;
1704 pgprot_t pgFlags;
1705 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1706 dprintf2(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1707
1708 /*
1709 * Allocate user space mapping and put the physical pages into it.
1710 */
1711 down_write(&current->mm->mmap_sem);
1712 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1713 if (!(ulAddr & ~PAGE_MASK))
1714 {
1715#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1716 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1717#else
1718 int rc2 = 0;
1719 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1720 if (vma)
1721#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1722 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1723#else
1724 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1725#endif
1726 else
1727 {
1728 rc = SUPDRV_ERR_NO_MEMORY;
1729 dprintf(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1730 }
1731#endif
1732 if (rc2)
1733 {
1734 rc = SUPDRV_ERR_NO_MEMORY;
1735 dprintf(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1736 }
1737 }
1738 else
1739 {
1740 dprintf(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1741 rc = SUPDRV_ERR_NO_MEMORY;
1742 }
1743 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1744
1745 /*
1746 * Success?
1747 */
1748 if (!rc)
1749 {
1750 *ppGip = (PSUPGLOBALINFOPAGE)ulAddr;
1751 dprintf2(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1752 return 0;
1753 }
1754
1755 /*
1756 * Failure, cleanup and be gone.
1757 */
1758 if (ulAddr & ~PAGE_MASK)
1759 {
1760 down_write(&current->mm->mmap_sem);
1761 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1762 up_write(&current->mm->mmap_sem);
1763 }
1764
1765 dprintf2(("supdrvOSGipMap: returns %d\n", rc));
1766 return rc;
1767}
1768
1769
1770/**
1771 * Maps the GIP into user space.
1772 *
1773 * @returns negative errno.
1774 * @param pDevExt Instance data.
1775 */
1776int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
1777{
1778 dprintf2(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1779 if (current->mm)
1780 {
1781 down_write(&current->mm->mmap_sem);
1782 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1783 up_write(&current->mm->mmap_sem);
1784 }
1785 dprintf2(("supdrvOSGipUnmap: returns 0\n"));
1786 return 0;
1787}
1788
1789
1790/**
1791 * Resumes the GIP updating.
1792 *
1793 * @param pDevExt Instance data.
1794 */
1795void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1796{
1797 dprintf2(("supdrvOSGipResume:\n"));
1798 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, false);
1799#ifdef CONFIG_SMP
1800 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1801#endif
1802 mod_timer(&g_GipTimer, jiffies);
1803#ifdef CONFIG_SMP
1804 else
1805 {
1806 mod_timer(&g_GipTimer, jiffies);
1807 smp_call_function(VBoxDrvLinuxGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1808 }
1809#endif
1810}
1811
1812
1813#ifdef CONFIG_SMP
1814/**
1815 * Callback for resuming GIP updating on the other CPUs.
1816 *
1817 * This is only used when the GIP is in async tsc mode.
1818 *
1819 * @param pvUser Pointer to the device instance.
1820 */
1821static void VBoxDrvLinuxGipResumePerCpu(void *pvUser)
1822{
1823 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1824 uint8_t iCPU = ASMGetApicId();
1825
1826 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1827 {
1828 printk("vboxdrv: error: apicid=%d max=%lu cpuid=%d\n",
1829 iCPU, (unsigned long)RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1830 return;
1831 }
1832
1833 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1834 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies);
1835}
1836#endif /* CONFIG_SMP */
1837
1838
1839/**
1840 * Suspends the GIP updating.
1841 *
1842 * @param pDevExt Instance data.
1843 */
1844void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1845{
1846#ifdef CONFIG_SMP
1847 unsigned i;
1848#endif
1849 dprintf2(("supdrvOSGipSuspend:\n"));
1850 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, true);
1851
1852 if (timer_pending(&g_GipTimer))
1853 del_timer_sync(&g_GipTimer);
1854#ifdef CONFIG_SMP
1855 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1856 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1857 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1858#endif
1859}
1860
1861
1862/**
1863 * Get the current CPU count.
1864 * @returns Number of cpus.
1865 */
1866unsigned VBOXCALL supdrvOSGetCPUCount(void)
1867{
1868#ifdef CONFIG_SMP
1869# if defined(num_present_cpus)
1870 return num_present_cpus();
1871# elif defined(num_online_cpus)
1872 return num_online_cpus();
1873# else
1874 return smp_num_cpus;
1875# endif
1876#else
1877 return 1;
1878#endif
1879}
1880
1881/**
1882 * Force async tsc mode.
1883 * @todo add a module argument for this.
1884 */
1885bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1886{
1887 return false;
1888}
1889
1890
1891/**
1892 * Converts a supdrv error code to an linux error code.
1893 *
1894 * @returns corresponding linux error code.
1895 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1896 */
1897static int VBoxDrvLinuxErr2LinuxErr(int rc)
1898{
1899 switch (rc)
1900 {
1901 case 0: return 0;
1902 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1903 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1904 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1905 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1906 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1907 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1908 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1909 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1910 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1911 case SUPDRV_ERR_IDT_FAILED: return -1000;
1912 }
1913
1914 return -EPERM;
1915}
1916
1917
1918RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1919{
1920#if 1
1921 va_list args;
1922 char szMsg[512];
1923
1924 va_start(args, pszFormat);
1925 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1926 szMsg[sizeof(szMsg) - 1] = '\0';
1927 printk("%s", szMsg);
1928 va_end(args);
1929#else
1930 /* forward to printf - needs some more GCC hacking to fix ebp... */
1931 __asm__ __volatile__ ("mov %0, %esp\n\t"
1932 "jmp %1\n\t",
1933 :: "r" ((uintptr_t)&pszFormat - 4),
1934 "m" (printk));
1935#endif
1936 return 0;
1937}
1938
1939
1940/** Runtime assert implementation for Linux Ring-0. */
1941RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1942{
1943 printk("!!Assertion Failed!!\n"
1944 "Expression: %s\n"
1945 "Location : %s(%d) %s\n",
1946 pszExpr, pszFile, uLine, pszFunction);
1947}
1948
1949
1950/** Runtime assert implementation for Linux Ring-0. */
1951RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1952{ /* forwarder. */
1953 va_list ap;
1954 char msg[256];
1955
1956 va_start(ap, pszFormat);
1957 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1958 msg[sizeof(msg) - 1] = '\0';
1959 printk("%s", msg);
1960 va_end(ap);
1961}
1962
1963
1964/* GCC C++ hack. */
1965unsigned __gxx_personality_v0 = 0xcccccccc;
1966
1967
1968module_init(VBoxDrvLinuxInit);
1969module_exit(VBoxDrvLinuxUnload);
1970
1971MODULE_AUTHOR("innotek GmbH");
1972MODULE_DESCRIPTION("VirtualBox Support Driver");
1973MODULE_LICENSE("GPL");
1974#ifdef MODULE_VERSION
1975#define xstr(s) str(s)
1976#define str(s) #s
1977MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1978#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette