VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 4811

Last change on this file since 4811 was 4811, checked in by vboxsync, 17 years ago

Split VMMR0Entry into VMMR0EntryInt, VMMR0EntryFast and VMMr0EntryEx. This will prevent the SUPCallVMMR0Ex path from causing harm and messing up the paths that has to be optimized.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 57.9 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License as published by the Free Software Foundation,
12 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
13 * distribution. VirtualBox OSE is distributed in the hope that it will
14 * be useful, but WITHOUT ANY WARRANTY of any kind.
15 * Some lines of code to disable the local APIC on x86_64 machines taken
16 * from a Mandriva patch by Gwenole Beauchesne <[email protected]>.
17 */
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include "SUPDRV.h"
23#include "version-generated.h"
24
25#include <iprt/assert.h>
26#include <iprt/spinlock.h>
27#include <iprt/semaphore.h>
28#include <iprt/initterm.h>
29#include <iprt/process.h>
30#include <iprt/err.h>
31#include <iprt/mem.h>
32
33#include <linux/module.h>
34#include <linux/kernel.h>
35#include <linux/init.h>
36#include <linux/fs.h>
37#include <linux/mm.h>
38#include <linux/pagemap.h>
39#include <linux/sched.h>
40#include <linux/slab.h>
41#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
42# include <linux/jiffies.h>
43#endif
44#include <asm/mman.h>
45#include <asm/io.h>
46#include <asm/uaccess.h>
47#ifdef CONFIG_DEVFS_FS
48# include <linux/devfs_fs_kernel.h>
49#endif
50#ifdef CONFIG_VBOXDRV_AS_MISC
51# include <linux/miscdevice.h>
52#endif
53#ifdef CONFIG_X86_LOCAL_APIC
54# include <asm/apic.h>
55# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
56# include <asm/nmi.h>
57# endif
58#endif
59
60#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
61# ifndef page_to_pfn
62# define page_to_pfn(page) ((page) - mem_map)
63# endif
64# include <asm/pgtable.h>
65# define global_flush_tlb __flush_tlb_global
66#endif
67
68#include <iprt/mem.h>
69
70
71/* devfs defines */
72#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
73# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
74
75# define VBOX_REGISTER_DEVFS() \
76({ \
77 void *rc = NULL; \
78 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
79 S_IFCHR | S_IRUGO | S_IWUGO, \
80 DEVICE_NAME) == 0) \
81 rc = (void *)' '; /* return not NULL */ \
82 rc; \
83 })
84
85# define VBOX_UNREGISTER_DEVFS(handle) \
86 devfs_remove(DEVICE_NAME);
87
88# else /* < 2.6.0 */
89
90# define VBOX_REGISTER_DEVFS() \
91 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
92 DEVICE_MAJOR, 0, \
93 S_IFCHR | S_IRUGO | S_IWUGO, \
94 &gFileOpsVBoxDrv, NULL)
95
96# define VBOX_UNREGISTER_DEVFS(handle) \
97 if (handle != NULL) \
98 devfs_unregister(handle)
99
100# endif /* < 2.6.0 */
101#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
102
103#ifndef CONFIG_VBOXDRV_AS_MISC
104# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
105# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
106# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
107# else
108# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
109# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
110# endif
111#endif /* !CONFIG_VBOXDRV_AS_MISC */
112
113
114#ifdef CONFIG_X86_HIGH_ENTRY
115# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
116#endif
117
118/*
119 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
120 */
121#if defined(RT_ARCH_AMD64)
122# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
123#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
124# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
125#else
126# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
127#endif
128
129/*
130 * The redhat hack section.
131 * - The current hacks are for 2.4.21-15.EL only.
132 */
133#ifndef NO_REDHAT_HACKS
134/* accounting. */
135# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
136# ifdef VM_ACCOUNT
137# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
138# endif
139# endif
140
141/* backported remap_page_range. */
142# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
143# include <asm/tlb.h>
144# ifdef tlb_vma /* probably not good enough... */
145# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
146# endif
147# endif
148
149# ifndef RT_ARCH_AMD64
150/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
151 * the page attributes from PAGE_KERNEL to something else, because there appears
152 * to be a bug in one of the many patches that redhat applied.
153 * It should be safe to do this on less buggy linux kernels too. ;-)
154 */
155# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
156 do { \
157 if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
158 change_page_attr(pPages, cPages, prot); \
159 change_page_attr(pPages, cPages, prot); \
160 } while (0)
161# endif
162#endif /* !NO_REDHAT_HACKS */
163
164
165#ifndef MY_DO_MUNMAP
166# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
167#endif
168
169#ifndef MY_CHANGE_PAGE_ATTR
170# ifdef RT_ARCH_AMD64 /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr(). */
171# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
172 do { \
173 change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
174 change_page_attr(pPages, cPages, prot); \
175 } while (0)
176# else
177# define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
178# endif
179#endif
180
181
182/** @def ONE_MSEC_IN_JIFFIES
183 * The number of jiffies that make up 1 millisecond. This is only actually used
184 * when HZ is > 1000. */
185#if HZ <= 1000
186# define ONE_MSEC_IN_JIFFIES 0
187#elif !(HZ % 1000)
188# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
189#else
190# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
191# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
192#endif
193
194#ifdef CONFIG_X86_LOCAL_APIC
195
196/* If an NMI occurs while we are inside the world switcher the machine will
197 * crash. The Linux NMI watchdog generates periodic NMIs increasing a counter
198 * which is compared with another counter increased in the timer interrupt
199 * handler. We disable the NMI watchdog.
200 *
201 * - Linux >= 2.6.21: The watchdog is disabled by default on i386 and x86_64.
202 * - Linux < 2.6.21: The watchdog is normally enabled by default on x86_64
203 * and disabled on i386.
204 */
205# if defined(RT_ARCH_AMD64)
206# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21)
207# define DO_DISABLE_NMI 1
208# endif
209# endif
210
211# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
212extern int nmi_active;
213# define nmi_atomic_read(P) *(P)
214# define nmi_atomic_set(P, V) *(P) = (V)
215# define nmi_atomic_dec(P) nmi_atomic_set(P, 0)
216# else
217# define nmi_atomic_read(P) atomic_read(P)
218# define nmi_atomic_set(P, V) atomic_set(P, V)
219# define nmi_atomic_dec(P) atomic_dec(P)
220# endif
221
222# ifndef X86_FEATURE_ARCH_PERFMON
223# define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
224# endif
225# ifndef MSR_ARCH_PERFMON_EVENTSEL0
226# define MSR_ARCH_PERFMON_EVENTSEL0 0x186
227# endif
228# ifndef ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
229# define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
230# endif
231
232#endif /* CONFIG_X86_LOCAL_APIC */
233
234
235/*******************************************************************************
236* Defined Constants And Macros *
237*******************************************************************************/
238/**
239 * Device extention & session data association structure.
240 */
241static SUPDRVDEVEXT g_DevExt;
242
243/** Timer structure for the GIP update. */
244static struct timer_list g_GipTimer;
245/** Pointer to the page structure for the GIP. */
246struct page *g_pGipPage;
247
248/** Registered devfs device handle. */
249#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
250# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
251static void *g_hDevFsVBoxDrv = NULL;
252# else
253static devfs_handle_t g_hDevFsVBoxDrv = NULL;
254# endif
255#endif
256
257#ifndef CONFIG_VBOXDRV_AS_MISC
258/** Module major number */
259#define DEVICE_MAJOR 234
260/** Saved major device number */
261static int g_iModuleMajor;
262#endif /* !CONFIG_VBOXDRV_AS_MISC */
263
264/** The module name. */
265#define DEVICE_NAME "vboxdrv"
266
267#ifdef RT_ARCH_AMD64
268/**
269 * Memory for the executable memory heap (in IPRT).
270 */
271extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
272__asm__(".section execmemory, \"awx\", @progbits\n\t"
273 ".align 32\n\t"
274 ".globl g_abExecMemory\n"
275 "g_abExecMemory:\n\t"
276 ".zero 1572864\n\t"
277 ".type g_abExecMemory, @object\n\t"
278 ".size g_abExecMemory, 1572864\n\t"
279 ".text\n\t");
280#endif
281
282
283/*******************************************************************************
284* Internal Functions *
285*******************************************************************************/
286static int VBoxDrvLinuxInit(void);
287static void VBoxDrvLinuxUnload(void);
288static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp);
289static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp);
290static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
291static int VBoxDrvLinuxIOCtlSlow(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
292#ifndef USE_NEW_OS_INTERFACE_FOR_MM
293static RTR3PTR VBoxDrvLinuxMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags);
294#endif /* !USE_NEW_OS_INTERFACE_FOR_MM */
295static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt);
296static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt);
297static void VBoxDrvLinuxGipTimer(unsigned long ulUser);
298#ifdef CONFIG_SMP
299static void VBoxDrvLinuxGipTimerPerCpu(unsigned long ulUser);
300static void VBoxDrvLinuxGipResumePerCpu(void *pvUser);
301#endif
302static int VBoxDrvLinuxErr2LinuxErr(int);
303
304
305/** The file_operations structure. */
306static struct file_operations gFileOpsVBoxDrv =
307{
308 owner: THIS_MODULE,
309 open: VBoxDrvLinuxCreate,
310 release: VBoxDrvLinuxClose,
311 ioctl: VBoxDrvLinuxIOCtl,
312};
313
314#ifdef CONFIG_VBOXDRV_AS_MISC
315/** The miscdevice structure. */
316static struct miscdevice gMiscDevice =
317{
318 minor: MISC_DYNAMIC_MINOR,
319 name: DEVICE_NAME,
320 fops: &gFileOpsVBoxDrv,
321# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
322 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
323 devfs_name: DEVICE_NAME,
324# endif
325};
326#endif
327
328#ifdef CONFIG_X86_LOCAL_APIC
329# ifdef DO_DISABLE_NMI
330
331/** Stop AMD NMI watchdog (x86_64 only). */
332static int stop_k7_watchdog(void)
333{
334 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
335 return 1;
336}
337
338/** Stop Intel P4 NMI watchdog (x86_64 only). */
339static int stop_p4_watchdog(void)
340{
341 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
342 wrmsr(MSR_P4_IQ_CCCR1, 0, 0);
343 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
344 return 1;
345}
346
347/** The new method of detecting the event counter */
348static int stop_intel_arch_watchdog(void)
349{
350 unsigned ebx;
351
352 ebx = cpuid_ebx(10);
353 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
354 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
355 return 1;
356}
357
358/** Stop NMI watchdog. */
359static void vbox_stop_apic_nmi_watchdog(void *unused)
360{
361 int stopped = 0;
362
363 /* only support LOCAL and IO APICs for now */
364 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
365 (nmi_watchdog != NMI_IO_APIC))
366 return;
367
368 if (nmi_watchdog == NMI_LOCAL_APIC)
369 {
370 switch (boot_cpu_data.x86_vendor)
371 {
372 case X86_VENDOR_AMD:
373 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
374 return;
375 stopped = stop_k7_watchdog();
376 break;
377 case X86_VENDOR_INTEL:
378 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
379 {
380 stopped = stop_intel_arch_watchdog();
381 break;
382 }
383 stopped = stop_p4_watchdog();
384 break;
385 default:
386 return;
387 }
388 }
389
390 if (stopped)
391 nmi_atomic_dec(&nmi_active);
392}
393
394/** Disable LAPIC NMI watchdog. */
395static void disable_lapic_nmi_watchdog(void)
396{
397 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
398
399 if (nmi_atomic_read(&nmi_active) <= 0)
400 return;
401
402 on_each_cpu(vbox_stop_apic_nmi_watchdog, NULL, 1, 1);
403
404 BUG_ON(nmi_atomic_read(&nmi_active) != 0);
405
406 /* tell do_nmi() and others that we're not active any more */
407 nmi_watchdog = NMI_NONE;
408}
409
410/** Shutdown NMI. */
411static void nmi_cpu_shutdown(void * dummy)
412{
413 unsigned int vERR, vPC;
414
415 vPC = apic_read(APIC_LVTPC);
416
417 if ((GET_APIC_DELIVERY_MODE(vPC) == APIC_MODE_NMI) && !(vPC & APIC_LVT_MASKED))
418 {
419 vERR = apic_read(APIC_LVTERR);
420 apic_write(APIC_LVTERR, vERR | APIC_LVT_MASKED);
421 apic_write(APIC_LVTPC, vPC | APIC_LVT_MASKED);
422 apic_write(APIC_LVTERR, vERR);
423 }
424}
425
426static void nmi_shutdown(void)
427{
428 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
429}
430# endif /* DO_DISABLE_NMI */
431#endif /* CONFIG_X86_LOCAL_APIC */
432
433/**
434 * Initialize module.
435 *
436 * @returns appropriate status code.
437 */
438static int __init VBoxDrvLinuxInit(void)
439{
440 int rc;
441
442 dprintf(("VBoxDrv::ModuleInit\n"));
443
444#ifdef CONFIG_X86_LOCAL_APIC
445 /*
446 * If an NMI occurs while we are inside the world switcher the macine will crash.
447 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
448 * compared with another counter increased in the timer interrupt handler. Therefore
449 * we don't allow to setup an NMI watchdog.
450 */
451# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
452 /*
453 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
454 * the nmi_watchdog variable.
455 */
456# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
457 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
458# ifdef DO_DISABLE_NMI
459 if (nmi_atomic_read(&nmi_active) > 0)
460 {
461 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate the NMI watchdog...\n");
462
463 switch (nmi_watchdog)
464 {
465 case NMI_LOCAL_APIC:
466 disable_lapic_nmi_watchdog();
467 break;
468 case NMI_NONE:
469 nmi_atomic_dec(&nmi_active);
470 break;
471 }
472
473 if (nmi_atomic_read(&nmi_active) == 0)
474 {
475 nmi_shutdown();
476 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
477 }
478 else
479 printk(KERN_INFO DEVICE_NAME ": Failed!\n");
480 }
481# endif /* DO_DISABLE_NMI */
482
483 /*
484 * Permanent IO_APIC mode active? No way to handle this!
485 */
486 if (nmi_watchdog == NMI_IO_APIC)
487 {
488 printk(KERN_ERR DEVICE_NAME
489 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
490 DEVICE_NAME
491 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
492 DEVICE_NAME
493 ": command line.\n");
494 return -EINVAL;
495 }
496
497 /*
498 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
499 */
500 nmi_atomic_set(&nmi_active, -1);
501 printk(KERN_INFO DEVICE_NAME ": Trying to deactivate the NMI watchdog permanently...\n");
502
503 /*
504 * Now fall through and see if it actually was enabled before. If so, fail
505 * as we cannot deactivate it cleanly from here.
506 */
507# else /* < 2.6.19 */
508 /*
509 * Older 2.6 kernels: nmi_watchdog is not initalized by default
510 */
511 if (nmi_watchdog != NMI_NONE)
512 goto nmi_activated;
513# endif
514# endif /* >= 2.6.0 */
515
516 /*
517 * Second test: Interrupt generated by performance counter not masked and can
518 * generate an NMI. Works also with Linux 2.4.
519 */
520 {
521 unsigned int v, ver, maxlvt;
522
523 v = apic_read(APIC_LVR);
524 ver = GET_APIC_VERSION(v);
525 /* 82489DXs do not report # of LVT entries. */
526 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
527 if (maxlvt >= 4)
528 {
529 /* Read status of performance counter IRQ vector */
530 v = apic_read(APIC_LVTPC);
531
532 /* performance counter generates NMI and is not masked? */
533 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
534 {
535# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
536 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
537 printk(KERN_ERR DEVICE_NAME
538 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
539 DEVICE_NAME
540 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
541 return -EINVAL;
542# else /* < 2.6.19 */
543# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
544nmi_activated:
545# endif
546 printk(KERN_ERR DEVICE_NAME
547 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
548 DEVICE_NAME
549 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
550 return -EINVAL;
551# endif /* >= 2.6.19 */
552 }
553 }
554 }
555# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
556 printk(KERN_INFO DEVICE_NAME ": Successfully done.\n");
557# endif /* >= 2.6.19 */
558#endif /* CONFIG_X86_LOCAL_APIC */
559
560#ifdef CONFIG_VBOXDRV_AS_MISC
561 rc = misc_register(&gMiscDevice);
562 if (rc)
563 {
564 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
565 return rc;
566 }
567#else /* !CONFIG_VBOXDRV_AS_MISC */
568 /*
569 * Register character device.
570 */
571 g_iModuleMajor = DEVICE_MAJOR;
572 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
573 if (rc < 0)
574 {
575 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
576 return rc;
577 }
578
579 /*
580 * Save returned module major number
581 */
582 if (DEVICE_MAJOR != 0)
583 g_iModuleMajor = DEVICE_MAJOR;
584 else
585 g_iModuleMajor = rc;
586 rc = 0;
587
588#ifdef CONFIG_DEVFS_FS
589 /*
590 * Register a device entry
591 */
592 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
593 if (g_hDevFsVBoxDrv == NULL)
594 {
595 dprintf(("devfs_register failed!\n"));
596 rc = -EINVAL;
597 }
598#endif
599#endif /* !CONFIG_VBOXDRV_AS_MISC */
600 if (!rc)
601 {
602 /*
603 * Initialize the runtime.
604 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
605 */
606 rc = RTR0Init(0);
607 if (RT_SUCCESS(rc))
608 {
609#ifdef RT_ARCH_AMD64
610 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
611#endif
612 /*
613 * Initialize the device extension.
614 */
615 if (RT_SUCCESS(rc))
616 rc = supdrvInitDevExt(&g_DevExt);
617 if (!rc)
618 {
619 /*
620 * Create the GIP page.
621 */
622 rc = VBoxDrvLinuxInitGip(&g_DevExt);
623 if (!rc)
624 {
625 dprintf(("VBoxDrv::ModuleInit returning %#x\n", rc));
626 return rc;
627 }
628
629 supdrvDeleteDevExt(&g_DevExt);
630 }
631 else
632 rc = -EINVAL;
633 RTR0Term();
634 }
635 else
636 rc = -EINVAL;
637
638 /*
639 * Failed, cleanup and return the error code.
640 */
641#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
642 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
643#endif
644 }
645#ifdef CONFIG_VBOXDRV_AS_MISC
646 misc_deregister(&gMiscDevice);
647 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
648#else
649 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
650 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
651#endif
652 return rc;
653}
654
655
656/**
657 * Unload the module.
658 */
659static void __exit VBoxDrvLinuxUnload(void)
660{
661 int rc;
662 dprintf(("VBoxDrvLinuxUnload\n"));
663
664 /*
665 * I Don't think it's possible to unload a driver which processes have
666 * opened, at least we'll blindly assume that here.
667 */
668#ifdef CONFIG_VBOXDRV_AS_MISC
669 rc = misc_deregister(&gMiscDevice);
670 if (rc < 0)
671 {
672 dprintf(("misc_deregister failed with rc=%#x\n", rc));
673 }
674#else /* !CONFIG_VBOXDRV_AS_MISC */
675#ifdef CONFIG_DEVFS_FS
676 /*
677 * Unregister a device entry
678 */
679 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
680#endif // devfs
681 rc = VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
682 if (rc < 0)
683 {
684 dprintf(("unregister_chrdev failed with rc=%#x (major:%d)\n", rc, g_iModuleMajor));
685 }
686#endif /* !CONFIG_VBOXDRV_AS_MISC */
687
688 /*
689 * Destroy GIP, delete the device extension and terminate IPRT.
690 */
691 VBoxDrvLinuxTermGip(&g_DevExt);
692 supdrvDeleteDevExt(&g_DevExt);
693 RTR0Term();
694}
695
696
697/**
698 * Device open. Called on open /dev/vboxdrv
699 *
700 * @param pInode Pointer to inode info structure.
701 * @param pFilp Associated file pointer.
702 */
703static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp)
704{
705 int rc;
706 PSUPDRVSESSION pSession;
707 dprintf(("VBoxDrvLinuxCreate: pFilp=%p\n", pFilp));
708
709 /*
710 * Call common code for the rest.
711 */
712 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
713 if (!rc)
714 {
715 pSession->Uid = current->euid;
716 pSession->Gid = current->egid;
717 pSession->Process = RTProcSelf();
718 pSession->R0Process = RTR0ProcHandleSelf();
719 }
720
721 dprintf(("VBoxDrvLinuxCreate: g_DevExt=%p pSession=%p rc=%d\n", &g_DevExt, pSession, rc));
722 pFilp->private_data = pSession;
723
724 return VBoxDrvLinuxErr2LinuxErr(rc);
725}
726
727
728/**
729 * Close device.
730 *
731 * @param pInode Pointer to inode info structure.
732 * @param pFilp Associated file pointer.
733 */
734static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp)
735{
736 dprintf(("VBoxDrvLinuxClose: pFilp=%p private_data=%p\n", pFilp, pFilp->private_data));
737 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
738 pFilp->private_data = NULL;
739 return 0;
740}
741
742
743/**
744 * Device I/O Control entry point.
745 *
746 * @param pInode Pointer to inode info structure.
747 * @param pFilp Associated file pointer.
748 * @param uCmd The function specified to ioctl().
749 * @param ulArg The argument specified to ioctl().
750 */
751static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
752{
753 /*
754 * Deal with the two high-speed IOCtl that takes it's arguments from
755 * the session and iCmd, and only returns a VBox status code.
756 */
757 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
758 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
759 || uCmd == SUP_IOCTL_FAST_DO_NOP))
760 return supdrvIOCtlFast(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
761 return VBoxDrvLinuxIOCtlSlow(pInode, pFilp, uCmd, ulArg);
762}
763
764
765/**
766 * Device I/O Control entry point.
767 *
768 * @param pInode Pointer to inode info structure.
769 * @param pFilp Associated file pointer.
770 * @param uCmd The function specified to ioctl().
771 * @param ulArg The argument specified to ioctl().
772 */
773static int VBoxDrvLinuxIOCtlSlow(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
774{
775 int rc;
776 SUPREQHDR Hdr;
777 PSUPREQHDR pHdr;
778 uint32_t cbBuf;
779
780 dprintf2(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p\n", pFilp, uCmd, (void *)ulArg));
781
782 /*
783 * Read the header.
784 */
785 if (RT_UNLIKELY(copy_from_user(&Hdr, (void *)ulArg, sizeof(Hdr))))
786 {
787 dprintf(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx,) failed; uCmd=%#x.\n", ulArg, uCmd));
788 return -EFAULT;
789 }
790 if (RT_UNLIKELY((Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC))
791 {
792 dprintf(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd));
793 return -EINVAL;
794 }
795
796 /*
797 * Buffer the request.
798 */
799 cbBuf = RT_MAX(Hdr.cbIn, Hdr.cbOut);
800 if (RT_UNLIKELY(cbBuf > _1M*16))
801 {
802 dprintf(("VBoxDrvLinuxIOCtl: too big cbBuf=%#x; uCmd=%#x\n", cbBuf, uCmd));
803 return -E2BIG;
804 }
805 if (RT_UNLIKELY(cbBuf != _IOC_SIZE(uCmd) && _IOC_SIZE(uCmd)))
806 {
807 dprintf(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x.\n", cbBuf, _IOC_SIZE(uCmd), uCmd));
808 return -EINVAL;
809 }
810 pHdr = RTMemAlloc(cbBuf);
811 if (RT_UNLIKELY(!pHdr))
812 {
813 OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x.\n", cbBuf, uCmd));
814 return -ENOMEM;
815 }
816 if (RT_UNLIKELY(copy_from_user(pHdr, (void *)ulArg, Hdr.cbIn)))
817 {
818 dprintf(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx, %#x) failed; uCmd=%#x.\n", ulArg, Hdr.cbIn, uCmd));
819 RTMemFree(pHdr);
820 return -EFAULT;
821 }
822
823 /*
824 * Process the IOCtl.
825 */
826 rc = supdrvIOCtl(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data, pHdr);
827
828 /*
829 * Copy ioctl data and output buffer back to user space.
830 */
831 if (RT_LIKELY(!rc))
832 {
833 uint32_t cbOut = pHdr->cbOut;
834 if (RT_UNLIKELY(cbOut > cbBuf))
835 {
836 OSDBGPRINT(("VBoxDrvLinuxIOCtl: too much output! %#x > %#x; uCmd=%#x!\n", cbOut, cbBuf, uCmd));
837 cbOut = cbBuf;
838 }
839 if (RT_UNLIKELY(copy_to_user((void *)ulArg, pHdr, cbOut)))
840 {
841 /* this is really bad! */
842 OSDBGPRINT(("VBoxDrvLinuxIOCtl: copy_to_user(%#lx,,%#x); uCmd=%#x!\n", ulArg, cbOut, uCmd));
843 rc = -EFAULT;
844 }
845 }
846 else
847 {
848 dprintf(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc));
849 rc = -EINVAL;
850 }
851 RTMemFree(pHdr);
852
853 dprintf2(("VBoxDrvLinuxIOCtl: returns %d\n", rc));
854 return rc;
855}
856
857
858/**
859 * Initializes any OS specific object creator fields.
860 */
861void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
862{
863 NOREF(pObj);
864 NOREF(pSession);
865}
866
867
868/**
869 * Checks if the session can access the object.
870 *
871 * @returns true if a decision has been made.
872 * @returns false if the default access policy should be applied.
873 *
874 * @param pObj The object in question.
875 * @param pSession The session wanting to access the object.
876 * @param pszObjName The object name, can be NULL.
877 * @param prc Where to store the result when returning true.
878 */
879bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
880{
881 NOREF(pObj);
882 NOREF(pSession);
883 NOREF(pszObjName);
884 NOREF(prc);
885 return false;
886}
887
888
889#ifndef USE_NEW_OS_INTERFACE_FOR_MM
890
891/**
892 * Compute order. Some functions allocate 2^order pages.
893 *
894 * @returns order.
895 * @param cPages Number of pages.
896 */
897static int VBoxDrvOrder(unsigned long cPages)
898{
899 int iOrder;
900 unsigned long cTmp;
901
902 for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
903 ;
904 if (cPages & ~(1 << iOrder))
905 ++iOrder;
906
907 return iOrder;
908}
909
910
911/**
912 * OS Specific code for locking down memory.
913 *
914 * @returns 0 on success.
915 * @returns SUPDRV_ERR_* on failure.
916 * @param pMem Pointer to memory.
917 * This is not linked in anywhere.
918 * @param paPages Array which should be filled with the address of the physical pages.
919 *
920 * @remark See sgl_map_user_pages() for an example of an similar function.
921 */
922int VBOXCALL supdrvOSLockMemOne(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
923{
924 int rc;
925 struct page **papPages;
926 unsigned iPage;
927 unsigned cPages = pMem->cb >> PAGE_SHIFT;
928 unsigned long pv = (unsigned long)pMem->pvR3;
929 struct vm_area_struct **papVMAs;
930
931 /*
932 * Allocate page pointer array.
933 */
934 papPages = vmalloc(cPages * sizeof(*papPages));
935 if (!papPages)
936 return SUPDRV_ERR_NO_MEMORY;
937
938 /*
939 * Allocate the VMA pointer array.
940 */
941 papVMAs = vmalloc(cPages * sizeof(*papVMAs));
942 if (!papVMAs)
943 return SUPDRV_ERR_NO_MEMORY;
944
945 /*
946 * Get user pages.
947 */
948 down_read(&current->mm->mmap_sem);
949 rc = get_user_pages(current, /* Task for fault acounting. */
950 current->mm, /* Whose pages. */
951 (unsigned long)pv, /* Where from. */
952 cPages, /* How many pages. */
953 1, /* Write to memory. */
954 0, /* force. */
955 papPages, /* Page array. */
956 papVMAs); /* vmas */
957 if (rc != cPages)
958 {
959 up_read(&current->mm->mmap_sem);
960 dprintf(("supdrvOSLockMemOne: get_user_pages failed. rc=%d\n", rc));
961 return SUPDRV_ERR_LOCK_FAILED;
962 }
963
964 for (iPage = 0; iPage < cPages; iPage++)
965 flush_dcache_page(papPages[iPage]);
966 up_read(&current->mm->mmap_sem);
967
968 pMem->u.locked.papPages = papPages;
969 pMem->u.locked.cPages = cPages;
970
971 /*
972 * Get addresses, protect against fork()
973 */
974 for (iPage = 0; iPage < cPages; iPage++)
975 {
976 paPages[iPage].Phys = page_to_phys(papPages[iPage]);
977 paPages[iPage].uReserved = 0;
978 papVMAs[iPage]->vm_flags |= VM_DONTCOPY;
979 }
980
981 vfree(papVMAs);
982
983 dprintf2(("supdrvOSLockMemOne: pvR3=%p cb=%d papPages=%p\n",
984 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
985 return 0;
986}
987
988
989/**
990 * Unlocks the memory pointed to by pv.
991 *
992 * @param pMem Pointer to memory to unlock.
993 *
994 * @remark See sgl_unmap_user_pages() for an example of an similar function.
995 */
996void VBOXCALL supdrvOSUnlockMemOne(PSUPDRVMEMREF pMem)
997{
998 unsigned iPage;
999 dprintf2(("supdrvOSUnlockMemOne: pvR3=%p cb=%d papPages=%p\n",
1000 pMem->pvR3, pMem->cb, pMem->u.locked.papPages));
1001
1002 /*
1003 * Loop thru the pages and release them.
1004 */
1005 for (iPage = 0; iPage < pMem->u.locked.cPages; iPage++)
1006 {
1007 if (!PageReserved(pMem->u.locked.papPages[iPage]))
1008 SetPageDirty(pMem->u.locked.papPages[iPage]);
1009 page_cache_release(pMem->u.locked.papPages[iPage]);
1010 }
1011
1012 /* free the page array */
1013 vfree(pMem->u.locked.papPages);
1014 pMem->u.locked.cPages = 0;
1015}
1016
1017
1018/**
1019 * OS Specific code for allocating page aligned memory with continuous fixed
1020 * physical paged backing.
1021 *
1022 * @returns 0 on success.
1023 * @returns SUPDRV_ERR_* on failure.
1024 * @param pMem Memory reference record of the memory to be allocated.
1025 * (This is not linked in anywhere.)
1026 * @param ppvR0 Where to store the virtual address of the ring-0 mapping. (optional)
1027 * @param ppvR3 Where to store the virtual address of the ring-3 mapping.
1028 * @param pHCPhys Where to store the physical address.
1029 */
1030int VBOXCALL supdrvOSContAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1031{
1032 struct page *paPages;
1033 unsigned iPage;
1034 unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1035 unsigned cPages = cbAligned >> PAGE_SHIFT;
1036 unsigned cOrder = VBoxDrvOrder(cPages);
1037 unsigned long ulAddr;
1038 dma_addr_t HCPhys;
1039 int rc = 0;
1040 pgprot_t pgFlags;
1041 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1042
1043 Assert(ppvR3);
1044 Assert(pHCPhys);
1045
1046 /*
1047 * Allocate page pointer array.
1048 */
1049#ifdef RT_ARCH_AMD64 /** @todo check out if there is a correct way of getting memory below 4GB (physically). */
1050 paPages = alloc_pages(GFP_DMA, cOrder);
1051#else
1052 paPages = alloc_pages(GFP_USER, cOrder);
1053#endif
1054 if (!paPages)
1055 return SUPDRV_ERR_NO_MEMORY;
1056
1057 /*
1058 * Lock the pages.
1059 */
1060 for (iPage = 0; iPage < cPages; iPage++)
1061 {
1062 SetPageReserved(&paPages[iPage]);
1063 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1064 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1065#ifdef DEBUG
1066 if (iPage + 1 < cPages && (page_to_phys((&paPages[iPage])) + 0x1000) != page_to_phys((&paPages[iPage + 1])))
1067 {
1068 dprintf(("supdrvOSContAllocOne: Pages are not continuous!!!! iPage=%d phys=%llx physnext=%llx\n",
1069 iPage, (long long)page_to_phys((&paPages[iPage])), (long long)page_to_phys((&paPages[iPage + 1]))));
1070 BUG();
1071 }
1072#endif
1073 }
1074 HCPhys = page_to_phys(paPages);
1075
1076 /*
1077 * Allocate user space mapping and put the physical pages into it.
1078 */
1079 down_write(&current->mm->mmap_sem);
1080 ulAddr = do_mmap(NULL, 0, cbAligned, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0);
1081 if (!(ulAddr & ~PAGE_MASK))
1082 {
1083#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1084 int rc2 = remap_page_range(ulAddr, HCPhys, cbAligned, pgFlags);
1085#else
1086 int rc2 = 0;
1087 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1088 if (vma)
1089#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1090 rc2 = remap_page_range(vma, ulAddr, HCPhys, cbAligned, pgFlags);
1091#else
1092 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, cbAligned, pgFlags);
1093#endif
1094 else
1095 {
1096 rc = SUPDRV_ERR_NO_MEMORY;
1097 dprintf(("supdrvOSContAllocOne: no vma found for ulAddr=%#lx!\n", ulAddr));
1098 }
1099#endif
1100 if (rc2)
1101 {
1102 rc = SUPDRV_ERR_NO_MEMORY;
1103 dprintf(("supdrvOSContAllocOne: remap_page_range failed rc2=%d\n", rc2));
1104 }
1105 }
1106 else
1107 {
1108 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1109 rc = SUPDRV_ERR_NO_MEMORY;
1110 }
1111 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1112
1113 /*
1114 * Success?
1115 */
1116 if (!rc)
1117 {
1118 *pHCPhys = HCPhys;
1119 *ppvR3 = ulAddr;
1120 if (ppvR0)
1121 *ppvR0 = (void *)ulAddr;
1122 pMem->pvR3 = ulAddr;
1123 pMem->pvR0 = NULL;
1124 pMem->u.cont.paPages = paPages;
1125 pMem->u.cont.cPages = cPages;
1126 pMem->cb = cbAligned;
1127
1128 dprintf2(("supdrvOSContAllocOne: pvR0=%p pvR3=%p cb=%d paPages=%p *pHCPhys=%lx *ppvR0=*ppvR3=%p\n",
1129 pMem->pvR0, pMem->pvR3, pMem->cb, paPages, (unsigned long)*pHCPhys, *ppvR3));
1130 global_flush_tlb();
1131 return 0;
1132 }
1133
1134 /*
1135 * Failure, cleanup and be gone.
1136 */
1137 down_write(&current->mm->mmap_sem);
1138 if (ulAddr & ~PAGE_MASK)
1139 MY_DO_MUNMAP(current->mm, ulAddr, pMem->cb);
1140 for (iPage = 0; iPage < cPages; iPage++)
1141 {
1142 ClearPageReserved(&paPages[iPage]);
1143 if (!PageHighMem(&paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1144 MY_CHANGE_PAGE_ATTR(&paPages[iPage], 1, PAGE_KERNEL);
1145 }
1146 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1147 __free_pages(paPages, cOrder);
1148
1149 global_flush_tlb();
1150 return rc;
1151}
1152
1153
1154/**
1155 * Frees contiguous memory.
1156 *
1157 * @param pMem Memory reference record of the memory to be freed.
1158 */
1159void VBOXCALL supdrvOSContFreeOne(PSUPDRVMEMREF pMem)
1160{
1161 unsigned iPage;
1162
1163 dprintf2(("supdrvOSContFreeOne: pvR0=%p pvR3=%p cb=%d paPages=%p\n",
1164 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.cont.paPages));
1165
1166 /*
1167 * do_exit() destroys the mm before closing files.
1168 * I really hope it cleans up our stuff properly...
1169 */
1170 if (current->mm)
1171 {
1172 down_write(&current->mm->mmap_sem);
1173 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, pMem->cb);
1174 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1175 }
1176
1177 /*
1178 * Change page attributes freeing the pages.
1179 */
1180 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1181 {
1182 ClearPageReserved(&pMem->u.cont.paPages[iPage]);
1183 if (!PageHighMem(&pMem->u.cont.paPages[iPage]) && pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1184 MY_CHANGE_PAGE_ATTR(&pMem->u.cont.paPages[iPage], 1, PAGE_KERNEL);
1185 }
1186 __free_pages(pMem->u.cont.paPages, VBoxDrvOrder(pMem->u.cont.cPages));
1187
1188 pMem->u.cont.cPages = 0;
1189}
1190
1191
1192/**
1193 * Allocates memory which mapped into both kernel and user space.
1194 * The returned memory is page aligned and so is the allocation.
1195 *
1196 * @returns 0 on success.
1197 * @returns SUPDRV_ERR_* on failure.
1198 * @param pMem Memory reference record of the memory to be allocated.
1199 * (This is not linked in anywhere.)
1200 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1201 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1202 */
1203int VBOXCALL supdrvOSMemAllocOne(PSUPDRVMEMREF pMem, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1204{
1205 const unsigned cbAligned = RT_ALIGN(pMem->cb, PAGE_SIZE);
1206 const unsigned cPages = cbAligned >> PAGE_SHIFT;
1207#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1208 unsigned cOrder = VBoxDrvOrder(cPages);
1209 struct page *paPages;
1210#endif
1211 struct page **papPages;
1212 unsigned iPage;
1213 pgprot_t pgFlags;
1214 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_RW | _PAGE_USER;
1215
1216 /*
1217 * Allocate array with page pointers.
1218 */
1219 pMem->u.mem.cPages = 0;
1220 pMem->u.mem.papPages = papPages = kmalloc(sizeof(papPages[0]) * cPages, GFP_KERNEL);
1221 if (!papPages)
1222 return SUPDRV_ERR_NO_MEMORY;
1223
1224 /*
1225 * Allocate the pages.
1226 */
1227#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1228 for (iPage = 0; iPage < cPages; iPage++)
1229 {
1230 papPages[iPage] = alloc_page(GFP_HIGHUSER);
1231 if (!papPages[iPage])
1232 {
1233 pMem->u.mem.cPages = iPage;
1234 supdrvOSMemFreeOne(pMem);
1235 return SUPDRV_ERR_NO_MEMORY;
1236 }
1237 }
1238
1239#else /* < 2.4.22 */
1240 paPages = alloc_pages(GFP_USER, cOrder);
1241 if (!paPages)
1242 {
1243 supdrvOSMemFreeOne(pMem);
1244 return SUPDRV_ERR_NO_MEMORY;
1245 }
1246 for (iPage = 0; iPage < cPages; iPage++)
1247 {
1248 papPages[iPage] = &paPages[iPage];
1249 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1250 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, MY_PAGE_KERNEL_EXEC);
1251 if (PageHighMem(papPages[iPage]))
1252 BUG();
1253 }
1254#endif
1255 pMem->u.mem.cPages = cPages;
1256
1257 /*
1258 * Reserve the pages.
1259 */
1260 for (iPage = 0; iPage < cPages; iPage++)
1261 SetPageReserved(papPages[iPage]);
1262
1263 /*
1264 * Create the Ring-0 mapping.
1265 */
1266 if (ppvR0)
1267 {
1268#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1269# ifdef VM_MAP
1270 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_MAP, pgFlags);
1271# else
1272 *ppvR0 = pMem->pvR0 = vmap(papPages, cPages, VM_ALLOC, pgFlags);
1273# endif
1274#else
1275 *ppvR0 = pMem->pvR0 = phys_to_virt(page_to_phys(papPages[0]));
1276#endif
1277 }
1278 if (pMem->pvR0 || !ppvR0)
1279 {
1280 /*
1281 * Create the ring3 mapping.
1282 */
1283 if (ppvR3)
1284 *ppvR3 = pMem->pvR3 = VBoxDrvLinuxMapUser(papPages, cPages, PROT_READ | PROT_WRITE | PROT_EXEC, pgFlags);
1285 if (pMem->pvR3 || !ppvR3)
1286 return 0;
1287 dprintf(("supdrvOSMemAllocOne: failed to map into r3! cPages=%u\n", cPages));
1288 }
1289 else
1290 dprintf(("supdrvOSMemAllocOne: failed to map into r0! cPages=%u\n", cPages));
1291
1292 supdrvOSMemFreeOne(pMem);
1293 return SUPDRV_ERR_NO_MEMORY;
1294}
1295
1296
1297/**
1298 * Get the physical addresses of the pages in the allocation.
1299 * This is called while inside bundle the spinlock.
1300 *
1301 * @param pMem Memory reference record of the memory.
1302 * @param paPages Where to store the page addresses.
1303 */
1304void VBOXCALL supdrvOSMemGetPages(PSUPDRVMEMREF pMem, PSUPPAGE paPages)
1305{
1306 unsigned iPage;
1307 for (iPage = 0; iPage < pMem->u.mem.cPages; iPage++)
1308 {
1309 paPages[iPage].Phys = page_to_phys(pMem->u.mem.papPages[iPage]);
1310 paPages[iPage].uReserved = 0;
1311 }
1312}
1313
1314
1315/**
1316 * Frees memory allocated by supdrvOSMemAllocOne().
1317 *
1318 * @param pMem Memory reference record of the memory to be free.
1319 */
1320void VBOXCALL supdrvOSMemFreeOne(PSUPDRVMEMREF pMem)
1321{
1322 dprintf2(("supdrvOSMemFreeOne: pvR0=%p pvR3=%p cb=%d cPages=%d papPages=%p\n",
1323 pMem->pvR0, pMem->pvR3, pMem->cb, pMem->u.mem.cPages, pMem->u.mem.papPages));
1324
1325 /*
1326 * Unmap the user mapping (if any).
1327 * do_exit() destroys the mm before closing files.
1328 */
1329 if (pMem->pvR3 && current->mm)
1330 {
1331 down_write(&current->mm->mmap_sem);
1332 MY_DO_MUNMAP(current->mm, (unsigned long)pMem->pvR3, RT_ALIGN(pMem->cb, PAGE_SIZE));
1333 up_write(&current->mm->mmap_sem); /* check when we can leave this. */
1334 }
1335 pMem->pvR3 = NIL_RTR3PTR;
1336
1337 /*
1338 * Unmap the kernel mapping (if any).
1339 */
1340 if (pMem->pvR0)
1341 {
1342#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1343 vunmap(pMem->pvR0);
1344#endif
1345 pMem->pvR0 = NULL;
1346 }
1347
1348 /*
1349 * Free the physical pages.
1350 */
1351 if (pMem->u.mem.papPages)
1352 {
1353 struct page **papPages = pMem->u.mem.papPages;
1354 const unsigned cPages = pMem->u.mem.cPages;
1355 unsigned iPage;
1356
1357 /* Restore the page flags. */
1358 for (iPage = 0; iPage < cPages; iPage++)
1359 {
1360 ClearPageReserved(papPages[iPage]);
1361#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22)
1362 if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL))
1363 MY_CHANGE_PAGE_ATTR(papPages[iPage], 1, PAGE_KERNEL);
1364#endif
1365 }
1366
1367 /* Free the pages. */
1368#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22)
1369 for (iPage = 0; iPage < pMem->u.cont.cPages; iPage++)
1370 __free_page(papPages[iPage]);
1371#else
1372 if (cPages > 0)
1373 __free_pages(papPages[0], VBoxDrvOrder(cPages));
1374#endif
1375 /* Free the page pointer array. */
1376 kfree(papPages);
1377 pMem->u.mem.papPages = NULL;
1378 }
1379 pMem->u.mem.cPages = 0;
1380}
1381
1382
1383/**
1384 * Maps a range of pages into user space.
1385 *
1386 * @returns Pointer to the user space mapping on success.
1387 * @returns NULL on failure.
1388 * @param papPages Array of the pages to map.
1389 * @param cPages Number of pages to map.
1390 * @param fProt The mapping protection.
1391 * @param pgFlags The page level protection.
1392 */
1393static RTR3PTR VBoxDrvLinuxMapUser(struct page **papPages, unsigned cPages, unsigned fProt, pgprot_t pgFlags)
1394{
1395 int rc = SUPDRV_ERR_NO_MEMORY;
1396 unsigned long ulAddr;
1397
1398 /*
1399 * Allocate user space mapping.
1400 */
1401 down_write(&current->mm->mmap_sem);
1402 ulAddr = do_mmap(NULL, 0, cPages * PAGE_SIZE, fProt, MAP_SHARED | MAP_ANONYMOUS, 0);
1403 if (!(ulAddr & ~PAGE_MASK))
1404 {
1405 /*
1406 * Map page by page into the mmap area.
1407 * This is generic, paranoid and not very efficient.
1408 */
1409 int rc = 0;
1410 unsigned long ulAddrCur = ulAddr;
1411 unsigned iPage;
1412 for (iPage = 0; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
1413 {
1414#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1415 struct vm_area_struct *vma = find_vma(current->mm, ulAddrCur);
1416 if (!vma)
1417 break;
1418#endif
1419
1420#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11)
1421 rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(papPages[iPage]), PAGE_SIZE, pgFlags);
1422#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1423 rc = remap_page_range(vma, ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1424#else /* 2.4 */
1425 rc = remap_page_range(ulAddrCur, page_to_phys(papPages[iPage]), PAGE_SIZE, pgFlags);
1426#endif
1427 if (rc)
1428 break;
1429 }
1430
1431 /*
1432 * Successful?
1433 */
1434 if (iPage >= cPages)
1435 {
1436 up_write(&current->mm->mmap_sem);
1437 return ulAddr;
1438 }
1439
1440 /* no, cleanup! */
1441 if (rc)
1442 dprintf(("VBoxDrvLinuxMapUser: remap_[page|pfn]_range failed! rc=%d\n", rc));
1443 else
1444 dprintf(("VBoxDrvLinuxMapUser: find_vma failed!\n"));
1445
1446 MY_DO_MUNMAP(current->mm, ulAddr, cPages << PAGE_SHIFT);
1447 }
1448 else
1449 {
1450 dprintf(("supdrvOSContAllocOne: do_mmap failed ulAddr=%#lx\n", ulAddr));
1451 rc = SUPDRV_ERR_NO_MEMORY;
1452 }
1453 up_write(&current->mm->mmap_sem);
1454
1455 return NIL_RTR3PTR;
1456}
1457
1458#endif /* !USE_NEW_OS_INTERFACE_FOR_MM */
1459
1460
1461/**
1462 * Initializes the GIP.
1463 *
1464 * @returns negative errno.
1465 * @param pDevExt Instance data. GIP stuff may be updated.
1466 */
1467static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt)
1468{
1469 struct page *pPage;
1470 dma_addr_t HCPhys;
1471 PSUPGLOBALINFOPAGE pGip;
1472#ifdef CONFIG_SMP
1473 unsigned i;
1474#endif
1475 dprintf(("VBoxDrvLinuxInitGip:\n"));
1476
1477 /*
1478 * Allocate the page.
1479 */
1480 pPage = alloc_pages(GFP_USER, 0);
1481 if (!pPage)
1482 {
1483 dprintf(("VBoxDrvLinuxInitGip: failed to allocate the GIP page\n"));
1484 return -ENOMEM;
1485 }
1486
1487 /*
1488 * Lock the page.
1489 */
1490 SetPageReserved(pPage);
1491 g_pGipPage = pPage;
1492
1493 /*
1494 * Call common initialization routine.
1495 */
1496 HCPhys = page_to_phys(pPage);
1497 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1498 pDevExt->ulLastJiffies = jiffies;
1499#ifdef TICK_NSEC
1500 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1501 dprintf(("VBoxDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1502 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1503#else
1504 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * (1000000 / HZ);
1505 dprintf(("VBoxDrvInitGIP: TICK_NSEC=%d HZ=%d jiffies=%ld now=%lld\n",
1506 (int)(1000000 / HZ), HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1507#endif
1508 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1509 HZ <= 1000 ? HZ : 1000);
1510
1511 /*
1512 * Initialize the timer.
1513 */
1514 init_timer(&g_GipTimer);
1515 g_GipTimer.data = (unsigned long)pDevExt;
1516 g_GipTimer.function = VBoxDrvLinuxGipTimer;
1517 g_GipTimer.expires = jiffies;
1518#ifdef CONFIG_SMP
1519 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1520 {
1521 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1522 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1523 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1524 init_timer(&pDevExt->aCPUs[i].Timer);
1525 pDevExt->aCPUs[i].Timer.data = i;
1526 pDevExt->aCPUs[i].Timer.function = VBoxDrvLinuxGipTimerPerCpu;
1527 pDevExt->aCPUs[i].Timer.expires = jiffies;
1528 }
1529#endif
1530
1531 return 0;
1532}
1533
1534
1535/**
1536 * Terminates the GIP.
1537 *
1538 * @returns negative errno.
1539 * @param pDevExt Instance data. GIP stuff may be updated.
1540 */
1541static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt)
1542{
1543 struct page *pPage;
1544 PSUPGLOBALINFOPAGE pGip;
1545#ifdef CONFIG_SMP
1546 unsigned i;
1547#endif
1548 dprintf(("VBoxDrvLinuxTermGip:\n"));
1549
1550 /*
1551 * Delete the timer if it's pending.
1552 */
1553 if (timer_pending(&g_GipTimer))
1554 del_timer_sync(&g_GipTimer);
1555#ifdef CONFIG_SMP
1556 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1557 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1558 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1559#endif
1560
1561 /*
1562 * Uninitialize the content.
1563 */
1564 pGip = pDevExt->pGip;
1565 pDevExt->pGip = NULL;
1566 if (pGip)
1567 supdrvGipTerm(pGip);
1568
1569 /*
1570 * Free the page.
1571 */
1572 pPage = g_pGipPage;
1573 g_pGipPage = NULL;
1574 if (pPage)
1575 {
1576 ClearPageReserved(pPage);
1577 __free_pages(pPage, 0);
1578 }
1579
1580 return 0;
1581}
1582
1583/**
1584 * Timer callback function.
1585 *
1586 * In ASYNC TSC mode this is called on the primary CPU, and we're
1587 * assuming that the CPU remains online.
1588 *
1589 * @param ulUser The device extension pointer.
1590 */
1591static void VBoxDrvLinuxGipTimer(unsigned long ulUser)
1592{
1593 PSUPDRVDEVEXT pDevExt;
1594 PSUPGLOBALINFOPAGE pGip;
1595 unsigned long ulNow;
1596 unsigned long ulDiff;
1597 uint64_t u64Monotime;
1598 unsigned long SavedFlags;
1599
1600 local_irq_save(SavedFlags);
1601
1602 pDevExt = (PSUPDRVDEVEXT)ulUser;
1603 pGip = pDevExt->pGip;
1604 ulNow = jiffies;
1605
1606#ifdef CONFIG_SMP
1607 if (pGip && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1608 {
1609 uint8_t iCPU = ASMGetApicId();
1610 ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1611 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1612#ifdef TICK_NSEC
1613 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1614#else
1615 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1616#endif
1617 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1618 }
1619 else
1620#endif /* CONFIG_SMP */
1621 {
1622 ulDiff = ulNow - pDevExt->ulLastJiffies;
1623 pDevExt->ulLastJiffies = ulNow;
1624#ifdef TICK_NSEC
1625 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1626#else
1627 u64Monotime = pDevExt->u64LastMonotime + ulDiff * (1000000 / HZ);
1628#endif
1629 pDevExt->u64LastMonotime = u64Monotime;
1630 }
1631 if (RT_LIKELY(pGip))
1632 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1633 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1634 mod_timer(&g_GipTimer, ulNow + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1635
1636 local_irq_restore(SavedFlags);
1637}
1638
1639
1640#ifdef CONFIG_SMP
1641/**
1642 * Timer callback function for the other CPUs.
1643 *
1644 * @param iTimerCPU The APIC ID of this timer.
1645 */
1646static void VBoxDrvLinuxGipTimerPerCpu(unsigned long iTimerCPU)
1647{
1648 PSUPDRVDEVEXT pDevExt;
1649 PSUPGLOBALINFOPAGE pGip;
1650 uint8_t iCPU;
1651 uint64_t u64Monotime;
1652 unsigned long SavedFlags;
1653
1654 local_irq_save(SavedFlags);
1655
1656 pDevExt = &g_DevExt;
1657 pGip = pDevExt->pGip;
1658 iCPU = ASMGetApicId();
1659
1660 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1661 {
1662 if (RT_LIKELY(iTimerCPU == iCPU))
1663 {
1664 unsigned long ulNow = jiffies;
1665 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1666 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1667#ifdef TICK_NSEC
1668 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1669#else
1670 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * (1000000 / HZ);
1671#endif
1672 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1673 if (RT_LIKELY(pGip))
1674 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1675 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1676 mod_timer(&pDevExt->aCPUs[iCPU].Timer, ulNow + (HZ <= 1000 ? 0 : ONE_MSEC_IN_JIFFIES));
1677 }
1678 else
1679 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d !=? timer-cpuid=%d)\n",
1680 iCPU, iTimerCPU, smp_processor_id(), pDevExt->aCPUs[iTimerCPU].iSmpProcessorId);
1681 }
1682 else
1683 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%lu cpuid=%d\n",
1684 iCPU, (unsigned long)RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1685
1686 local_irq_restore(SavedFlags);
1687}
1688#endif /* CONFIG_SMP */
1689
1690
1691/**
1692 * Maps the GIP into user space.
1693 *
1694 * @returns negative errno.
1695 * @param pDevExt Instance data.
1696 */
1697int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE *ppGip)
1698{
1699 int rc = 0;
1700 unsigned long ulAddr;
1701 unsigned long HCPhys = pDevExt->HCPhysGip;
1702 pgprot_t pgFlags;
1703 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1704 dprintf2(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1705
1706 /*
1707 * Allocate user space mapping and put the physical pages into it.
1708 */
1709 down_write(&current->mm->mmap_sem);
1710 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1711 if (!(ulAddr & ~PAGE_MASK))
1712 {
1713#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1714 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1715#else
1716 int rc2 = 0;
1717 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1718 if (vma)
1719#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1720 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1721#else
1722 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1723#endif
1724 else
1725 {
1726 rc = SUPDRV_ERR_NO_MEMORY;
1727 dprintf(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1728 }
1729#endif
1730 if (rc2)
1731 {
1732 rc = SUPDRV_ERR_NO_MEMORY;
1733 dprintf(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1734 }
1735 }
1736 else
1737 {
1738 dprintf(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1739 rc = SUPDRV_ERR_NO_MEMORY;
1740 }
1741 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1742
1743 /*
1744 * Success?
1745 */
1746 if (!rc)
1747 {
1748 *ppGip = (PSUPGLOBALINFOPAGE)ulAddr;
1749 dprintf2(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1750 return 0;
1751 }
1752
1753 /*
1754 * Failure, cleanup and be gone.
1755 */
1756 if (ulAddr & ~PAGE_MASK)
1757 {
1758 down_write(&current->mm->mmap_sem);
1759 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1760 up_write(&current->mm->mmap_sem);
1761 }
1762
1763 dprintf2(("supdrvOSGipMap: returns %d\n", rc));
1764 return rc;
1765}
1766
1767
1768/**
1769 * Maps the GIP into user space.
1770 *
1771 * @returns negative errno.
1772 * @param pDevExt Instance data.
1773 */
1774int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
1775{
1776 dprintf2(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1777 if (current->mm)
1778 {
1779 down_write(&current->mm->mmap_sem);
1780 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1781 up_write(&current->mm->mmap_sem);
1782 }
1783 dprintf2(("supdrvOSGipUnmap: returns 0\n"));
1784 return 0;
1785}
1786
1787
1788/**
1789 * Resumes the GIP updating.
1790 *
1791 * @param pDevExt Instance data.
1792 */
1793void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1794{
1795 dprintf2(("supdrvOSGipResume:\n"));
1796 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, false);
1797#ifdef CONFIG_SMP
1798 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1799#endif
1800 mod_timer(&g_GipTimer, jiffies);
1801#ifdef CONFIG_SMP
1802 else
1803 {
1804 mod_timer(&g_GipTimer, jiffies);
1805 smp_call_function(VBoxDrvLinuxGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1806 }
1807#endif
1808}
1809
1810
1811#ifdef CONFIG_SMP
1812/**
1813 * Callback for resuming GIP updating on the other CPUs.
1814 *
1815 * This is only used when the GIP is in async tsc mode.
1816 *
1817 * @param pvUser Pointer to the device instance.
1818 */
1819static void VBoxDrvLinuxGipResumePerCpu(void *pvUser)
1820{
1821 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1822 uint8_t iCPU = ASMGetApicId();
1823
1824 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1825 {
1826 printk("vboxdrv: error: apicid=%d max=%lu cpuid=%d\n",
1827 iCPU, (unsigned long)RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1828 return;
1829 }
1830
1831 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1832 mod_timer(&pDevExt->aCPUs[iCPU].Timer, jiffies);
1833}
1834#endif /* CONFIG_SMP */
1835
1836
1837/**
1838 * Suspends the GIP updating.
1839 *
1840 * @param pDevExt Instance data.
1841 */
1842void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1843{
1844#ifdef CONFIG_SMP
1845 unsigned i;
1846#endif
1847 dprintf2(("supdrvOSGipSuspend:\n"));
1848 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, true);
1849
1850 if (timer_pending(&g_GipTimer))
1851 del_timer_sync(&g_GipTimer);
1852#ifdef CONFIG_SMP
1853 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1854 if (timer_pending(&pDevExt->aCPUs[i].Timer))
1855 del_timer_sync(&pDevExt->aCPUs[i].Timer);
1856#endif
1857}
1858
1859
1860/**
1861 * Get the current CPU count.
1862 * @returns Number of cpus.
1863 */
1864unsigned VBOXCALL supdrvOSGetCPUCount(void)
1865{
1866#ifdef CONFIG_SMP
1867# if defined(num_present_cpus)
1868 return num_present_cpus();
1869# elif defined(num_online_cpus)
1870 return num_online_cpus();
1871# else
1872 return smp_num_cpus;
1873# endif
1874#else
1875 return 1;
1876#endif
1877}
1878
1879/**
1880 * Force async tsc mode.
1881 * @todo add a module argument for this.
1882 */
1883bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1884{
1885 return false;
1886}
1887
1888
1889/**
1890 * Converts a supdrv error code to an linux error code.
1891 *
1892 * @returns corresponding linux error code.
1893 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1894 */
1895static int VBoxDrvLinuxErr2LinuxErr(int rc)
1896{
1897 switch (rc)
1898 {
1899 case 0: return 0;
1900 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1901 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1902 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1903 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1904 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1905 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1906 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1907 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1908 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1909 case SUPDRV_ERR_IDT_FAILED: return -1000;
1910 }
1911
1912 return -EPERM;
1913}
1914
1915
1916RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1917{
1918#if 1
1919 va_list args;
1920 char szMsg[512];
1921
1922 va_start(args, pszFormat);
1923 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1924 szMsg[sizeof(szMsg) - 1] = '\0';
1925 printk("%s", szMsg);
1926 va_end(args);
1927#else
1928 /* forward to printf - needs some more GCC hacking to fix ebp... */
1929 __asm__ __volatile__ ("mov %0, %esp\n\t"
1930 "jmp %1\n\t",
1931 :: "r" ((uintptr_t)&pszFormat - 4),
1932 "m" (printk));
1933#endif
1934 return 0;
1935}
1936
1937
1938/** Runtime assert implementation for Linux Ring-0. */
1939RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1940{
1941 printk("!!Assertion Failed!!\n"
1942 "Expression: %s\n"
1943 "Location : %s(%d) %s\n",
1944 pszExpr, pszFile, uLine, pszFunction);
1945}
1946
1947
1948/** Runtime assert implementation for Linux Ring-0. */
1949RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1950{ /* forwarder. */
1951 va_list ap;
1952 char msg[256];
1953
1954 va_start(ap, pszFormat);
1955 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1956 msg[sizeof(msg) - 1] = '\0';
1957 printk("%s", msg);
1958 va_end(ap);
1959}
1960
1961
1962/* GCC C++ hack. */
1963unsigned __gxx_personality_v0 = 0xcccccccc;
1964
1965
1966module_init(VBoxDrvLinuxInit);
1967module_exit(VBoxDrvLinuxUnload);
1968
1969MODULE_AUTHOR("innotek GmbH");
1970MODULE_DESCRIPTION("VirtualBox Support Driver");
1971MODULE_LICENSE("GPL");
1972#ifdef MODULE_VERSION
1973#define xstr(s) str(s)
1974#define str(s) #s
1975MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1976#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette