VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 8465

Last change on this file since 8465 was 8347, checked in by vboxsync, 17 years ago

gcc warnings

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.3 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 *
25 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
26 * Clara, CA 95054 USA or visit http://www.sun.com if you need
27 * additional information or have any questions.
28 * Some lines of code to disable the local APIC on x86_64 machines taken
29 * from a Mandriva patch by Gwenole Beauchesne <[email protected]>.
30 */
31
32/*******************************************************************************
33* Header Files *
34*******************************************************************************/
35#include "SUPDRV.h"
36#include "the-linux-kernel.h"
37#include "version-generated.h"
38
39#include <iprt/assert.h>
40#include <iprt/spinlock.h>
41#include <iprt/semaphore.h>
42#include <iprt/initterm.h>
43#include <iprt/process.h>
44#include <iprt/err.h>
45#include <iprt/mem.h>
46#include <iprt/log.h>
47#include <iprt/mp.h>
48
49#include <linux/sched.h>
50#ifdef CONFIG_DEVFS_FS
51# include <linux/devfs_fs_kernel.h>
52#endif
53#ifdef CONFIG_VBOXDRV_AS_MISC
54# include <linux/miscdevice.h>
55#endif
56#ifdef CONFIG_X86_LOCAL_APIC
57# include <asm/apic.h>
58# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
59# include <asm/nmi.h>
60# endif
61#endif
62
63#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
64# include <asm/pgtable.h>
65# define global_flush_tlb __flush_tlb_global
66#endif
67
68#include <iprt/mem.h>
69
70
71/* devfs defines */
72#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
73# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
74
75# define VBOX_REGISTER_DEVFS() \
76({ \
77 void *rc = NULL; \
78 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
79 S_IFCHR | S_IRUGO | S_IWUGO, \
80 DEVICE_NAME) == 0) \
81 rc = (void *)' '; /* return not NULL */ \
82 rc; \
83 })
84
85# define VBOX_UNREGISTER_DEVFS(handle) \
86 devfs_remove(DEVICE_NAME);
87
88# else /* < 2.6.0 */
89
90# define VBOX_REGISTER_DEVFS() \
91 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
92 DEVICE_MAJOR, 0, \
93 S_IFCHR | S_IRUGO | S_IWUGO, \
94 &gFileOpsVBoxDrv, NULL)
95
96# define VBOX_UNREGISTER_DEVFS(handle) \
97 if (handle != NULL) \
98 devfs_unregister(handle)
99
100# endif /* < 2.6.0 */
101#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
102
103#ifndef CONFIG_VBOXDRV_AS_MISC
104# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
105# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
106# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
107# else
108# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
109# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
110# endif
111#endif /* !CONFIG_VBOXDRV_AS_MISC */
112
113
114#ifdef CONFIG_X86_HIGH_ENTRY
115# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
116#endif
117
118/*
119 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
120 */
121#if defined(RT_ARCH_AMD64)
122# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
123#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
124# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
125#else
126# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
127#endif
128
129/*
130 * The redhat hack section.
131 * - The current hacks are for 2.4.21-15.EL only.
132 */
133#ifndef NO_REDHAT_HACKS
134/* accounting. */
135# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
136# ifdef VM_ACCOUNT
137# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
138# endif
139# endif
140
141/* backported remap_page_range. */
142# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
143# include <asm/tlb.h>
144# ifdef tlb_vma /* probably not good enough... */
145# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
146# endif
147# endif
148
149#endif /* !NO_REDHAT_HACKS */
150
151
152#ifndef MY_DO_MUNMAP
153# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
154#endif
155
156
157/** @def ONE_MSEC_IN_JIFFIES
158 * The number of jiffies that make up 1 millisecond. Must be at least 1! */
159#if HZ <= 1000
160# define ONE_MSEC_IN_JIFFIES 1
161#elif !(HZ % 1000)
162# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
163#else
164# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
165# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
166#endif
167
168/** @def TICK_NSEC
169 * The time between ticks in nsec */
170#ifndef TICK_NSEC
171# define TICK_NSEC (1000000UL / HZ)
172#endif
173
174#ifdef CONFIG_X86_LOCAL_APIC
175
176/* If an NMI occurs while we are inside the world switcher the machine will
177 * crash. The Linux NMI watchdog generates periodic NMIs increasing a counter
178 * which is compared with another counter increased in the timer interrupt
179 * handler. We disable the NMI watchdog.
180 *
181 * - Linux >= 2.6.21: The watchdog is disabled by default on i386 and x86_64.
182 * - Linux < 2.6.21: The watchdog is normally enabled by default on x86_64
183 * and disabled on i386.
184 */
185# if defined(RT_ARCH_AMD64)
186# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21) && !defined(VBOX_REDHAT_KABI)
187# define DO_DISABLE_NMI 1
188# endif
189# endif
190
191# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
192extern int nmi_active;
193# define nmi_atomic_read(P) *(P)
194# define nmi_atomic_set(P, V) *(P) = (V)
195# define nmi_atomic_dec(P) nmi_atomic_set(P, 0)
196# else
197# define nmi_atomic_read(P) atomic_read(P)
198# define nmi_atomic_set(P, V) atomic_set(P, V)
199# define nmi_atomic_dec(P) atomic_dec(P)
200# endif
201
202# ifndef X86_FEATURE_ARCH_PERFMON
203# define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
204# endif
205# ifndef MSR_ARCH_PERFMON_EVENTSEL0
206# define MSR_ARCH_PERFMON_EVENTSEL0 0x186
207# endif
208# ifndef ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
209# define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
210# endif
211
212#endif /* CONFIG_X86_LOCAL_APIC */
213
214#define xstr(s) str(s)
215#define str(s) #s
216
217/*******************************************************************************
218* Defined Constants And Macros *
219*******************************************************************************/
220/**
221 * Device extention & session data association structure.
222 */
223static SUPDRVDEVEXT g_DevExt;
224
225/** Timer structure for the GIP update. */
226static VBOXKTIMER g_GipTimer;
227/** Pointer to the page structure for the GIP. */
228struct page *g_pGipPage;
229
230/** Registered devfs device handle. */
231#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
232# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
233static void *g_hDevFsVBoxDrv = NULL;
234# else
235static devfs_handle_t g_hDevFsVBoxDrv = NULL;
236# endif
237#endif
238
239#ifndef CONFIG_VBOXDRV_AS_MISC
240/** Module major number */
241#define DEVICE_MAJOR 234
242/** Saved major device number */
243static int g_iModuleMajor;
244#endif /* !CONFIG_VBOXDRV_AS_MISC */
245
246/** Module parameter.
247 * Not prefixed because the name is used by macros and the end of this file. */
248static int force_async_tsc = 0;
249
250/** The module name. */
251#define DEVICE_NAME "vboxdrv"
252
253#ifdef RT_ARCH_AMD64
254/**
255 * Memory for the executable memory heap (in IPRT).
256 */
257extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
258__asm__(".section execmemory, \"awx\", @progbits\n\t"
259 ".align 32\n\t"
260 ".globl g_abExecMemory\n"
261 "g_abExecMemory:\n\t"
262 ".zero 1572864\n\t"
263 ".type g_abExecMemory, @object\n\t"
264 ".size g_abExecMemory, 1572864\n\t"
265 ".text\n\t");
266#endif
267
268
269/*******************************************************************************
270* Internal Functions *
271*******************************************************************************/
272#ifdef VBOX_HRTIMER
273typedef enum hrtimer_restart (*PFNVBOXKTIMER)(struct hrtimer *);
274#else
275typedef void (*PFNVBOXKTIMER)(unsigned long);
276#endif
277
278static int VBoxDrvLinuxInit(void);
279static void VBoxDrvLinuxUnload(void);
280static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp);
281static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp);
282#ifdef HAVE_UNLOCKED_IOCTL
283static long VBoxDrvLinuxIOCtl(struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
284#else
285static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
286#endif
287static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
288static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt);
289static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt);
290#ifdef VBOX_HRTIMER
291static enum hrtimer_restart VBoxDrvLinuxGipTimer(struct hrtimer *pTimer);
292#else
293static void VBoxDrvLinuxGipTimer(unsigned long ulUser);
294#endif
295#ifdef CONFIG_SMP
296# ifdef VBOX_HRTIMER
297static enum hrtimer_restart VBoxDrvLinuxGipTimerPerCpu(struct hrtimer *pTimer);
298# else
299static void VBoxDrvLinuxGipTimerPerCpu(unsigned long ulUser);
300# endif
301static void VBoxDrvLinuxGipResumePerCpu(void *pvUser);
302#endif
303static int VBoxDrvLinuxErr2LinuxErr(int);
304
305
306/** The file_operations structure. */
307static struct file_operations gFileOpsVBoxDrv =
308{
309 owner: THIS_MODULE,
310 open: VBoxDrvLinuxCreate,
311 release: VBoxDrvLinuxClose,
312#ifdef HAVE_UNLOCKED_IOCTL
313 unlocked_ioctl: VBoxDrvLinuxIOCtl,
314#else
315 ioctl: VBoxDrvLinuxIOCtl,
316#endif
317};
318
319#ifdef CONFIG_VBOXDRV_AS_MISC
320/** The miscdevice structure. */
321static struct miscdevice gMiscDevice =
322{
323 minor: MISC_DYNAMIC_MINOR,
324 name: DEVICE_NAME,
325 fops: &gFileOpsVBoxDrv,
326# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
327 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
328 devfs_name: DEVICE_NAME,
329# endif
330};
331#endif
332
333static inline void vbox_ktimer_init(PVBOXKTIMER pTimer, PFNVBOXKTIMER pfnFunction, unsigned long ulData)
334{
335#ifdef VBOX_HRTIMER
336 hrtimer_init(pTimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
337 pTimer->function = pfnFunction;
338#else
339 init_timer(pTimer);
340 pTimer->data = ulData;
341 pTimer->function = pfnFunction;
342 pTimer->expires = jiffies;
343#endif
344}
345
346static inline void vbox_ktimer_start(PVBOXKTIMER pTimer)
347{
348#ifdef VBOX_HRTIMER
349 hrtimer_start(pTimer, ktime_add_ns(ktime_get(), 1000000), HRTIMER_MODE_ABS);
350#else
351 mod_timer(pTimer, jiffies);
352#endif
353}
354
355static inline void vbox_ktimer_stop(PVBOXKTIMER pTimer)
356{
357#ifdef VBOX_HRTIMER
358 hrtimer_cancel(pTimer);
359#else
360 if (timer_pending(pTimer))
361 del_timer_sync(pTimer);
362#endif
363}
364
365#ifdef CONFIG_X86_LOCAL_APIC
366# ifdef DO_DISABLE_NMI
367
368/** Stop AMD NMI watchdog (x86_64 only). */
369static int stop_k7_watchdog(void)
370{
371 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
372 return 1;
373}
374
375/** Stop Intel P4 NMI watchdog (x86_64 only). */
376static int stop_p4_watchdog(void)
377{
378 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
379 wrmsr(MSR_P4_IQ_CCCR1, 0, 0);
380 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
381 return 1;
382}
383
384/** The new method of detecting the event counter */
385static int stop_intel_arch_watchdog(void)
386{
387 unsigned ebx;
388
389 ebx = cpuid_ebx(10);
390 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
391 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
392 return 1;
393}
394
395/** Stop NMI watchdog. */
396static void vbox_stop_apic_nmi_watchdog(void *unused)
397{
398 int stopped = 0;
399
400 /* only support LOCAL and IO APICs for now */
401 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
402 (nmi_watchdog != NMI_IO_APIC))
403 return;
404
405 if (nmi_watchdog == NMI_LOCAL_APIC)
406 {
407 switch (boot_cpu_data.x86_vendor)
408 {
409 case X86_VENDOR_AMD:
410 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
411 return;
412 stopped = stop_k7_watchdog();
413 break;
414 case X86_VENDOR_INTEL:
415 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
416 {
417 stopped = stop_intel_arch_watchdog();
418 break;
419 }
420 stopped = stop_p4_watchdog();
421 break;
422 default:
423 return;
424 }
425 }
426
427 if (stopped)
428 nmi_atomic_dec(&nmi_active);
429}
430
431/** Disable LAPIC NMI watchdog. */
432static void disable_lapic_nmi_watchdog(void)
433{
434 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
435
436 if (nmi_atomic_read(&nmi_active) <= 0)
437 return;
438
439 on_each_cpu(vbox_stop_apic_nmi_watchdog, NULL, 1, 1);
440
441 BUG_ON(nmi_atomic_read(&nmi_active) != 0);
442
443 /* tell do_nmi() and others that we're not active any more */
444 nmi_watchdog = NMI_NONE;
445}
446
447/** Shutdown NMI. */
448static void nmi_cpu_shutdown(void * dummy)
449{
450 unsigned int vERR, vPC;
451
452 vPC = apic_read(APIC_LVTPC);
453
454 if ((GET_APIC_DELIVERY_MODE(vPC) == APIC_MODE_NMI) && !(vPC & APIC_LVT_MASKED))
455 {
456 vERR = apic_read(APIC_LVTERR);
457 apic_write(APIC_LVTERR, vERR | APIC_LVT_MASKED);
458 apic_write(APIC_LVTPC, vPC | APIC_LVT_MASKED);
459 apic_write(APIC_LVTERR, vERR);
460 }
461}
462
463static void nmi_shutdown(void)
464{
465 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
466}
467# endif /* DO_DISABLE_NMI */
468#endif /* CONFIG_X86_LOCAL_APIC */
469
470
471/**
472 * Initialize module.
473 *
474 * @returns appropriate status code.
475 */
476static int __init VBoxDrvLinuxInit(void)
477{
478 int rc;
479 bool fAsync;
480 uint64_t u64DiffCores;
481
482 dprintf(("VBoxDrv::ModuleInit\n"));
483
484#ifdef CONFIG_X86_LOCAL_APIC
485 /*
486 * If an NMI occurs while we are inside the world switcher the macine will crash.
487 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
488 * compared with another counter increased in the timer interrupt handler. Therefore
489 * we don't allow to setup an NMI watchdog.
490 */
491# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && !defined(VBOX_REDHAT_KABI)
492 /*
493 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
494 * the nmi_watchdog variable.
495 */
496# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
497 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
498# ifdef DO_DISABLE_NMI
499 if (nmi_atomic_read(&nmi_active) > 0)
500 {
501 printk(KERN_DEBUG DEVICE_NAME ": Trying to deactivate the NMI watchdog...\n");
502
503 switch (nmi_watchdog)
504 {
505 case NMI_LOCAL_APIC:
506 disable_lapic_nmi_watchdog();
507 break;
508 case NMI_NONE:
509 nmi_atomic_dec(&nmi_active);
510 break;
511 }
512
513 if (nmi_atomic_read(&nmi_active) == 0)
514 {
515 nmi_shutdown();
516 printk(KERN_DEBUG DEVICE_NAME ": Successfully done.\n");
517 }
518 else
519 printk(KERN_DEBUG DEVICE_NAME ": Failed!\n");
520 }
521# endif /* DO_DISABLE_NMI */
522
523 /*
524 * Permanent IO_APIC mode active? No way to handle this!
525 */
526 if (nmi_watchdog == NMI_IO_APIC)
527 {
528 printk(KERN_ERR DEVICE_NAME
529 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
530 DEVICE_NAME
531 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
532 DEVICE_NAME
533 ": command line.\n");
534 return -EINVAL;
535 }
536
537 /*
538 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
539 */
540 nmi_atomic_set(&nmi_active, -1);
541 printk(KERN_DEBUG DEVICE_NAME ": Trying to deactivate the NMI watchdog permanently...\n");
542
543 /*
544 * Now fall through and see if it actually was enabled before. If so, fail
545 * as we cannot deactivate it cleanly from here.
546 */
547# else /* < 2.6.19 */
548 /*
549 * Older 2.6 kernels: nmi_watchdog is not initalized by default
550 */
551 if (nmi_watchdog != NMI_NONE)
552 goto nmi_activated;
553# endif
554# endif /* >= 2.6.0 && !defined(VBOX_REDHAT_KABI) */
555
556 /*
557 * Second test: Interrupt generated by performance counter not masked and can
558 * generate an NMI. Works also with Linux 2.4.
559 */
560 {
561 unsigned int v, ver, maxlvt;
562
563 v = apic_read(APIC_LVR);
564 ver = GET_APIC_VERSION(v);
565 /* 82489DXs do not report # of LVT entries. */
566 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
567 if (maxlvt >= 4)
568 {
569 /* Read status of performance counter IRQ vector */
570 v = apic_read(APIC_LVTPC);
571
572 /* performance counter generates NMI and is not masked? */
573 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
574 {
575# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
576 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
577 printk(KERN_ERR DEVICE_NAME
578 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
579 DEVICE_NAME
580 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
581 return -EINVAL;
582# else /* < 2.6.19 */
583# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && !defined(VBOX_REDHAT_KABI)
584nmi_activated:
585# endif
586 printk(KERN_ERR DEVICE_NAME
587 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
588 DEVICE_NAME
589 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
590 return -EINVAL;
591# endif /* >= 2.6.19 */
592 }
593 }
594 }
595# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
596 printk(KERN_DEBUG DEVICE_NAME ": Successfully done.\n");
597# endif /* >= 2.6.19 */
598#endif /* CONFIG_X86_LOCAL_APIC */
599
600 /*
601 * Check for synchronous/asynchronous TSC mode.
602 */
603 printk(KERN_DEBUG DEVICE_NAME ": Found %u processor cores.\n", (unsigned)RTMpGetOnlineCount());
604 fAsync = supdrvDetermineAsyncTsc(&u64DiffCores);
605 /* no 64-bit arithmetics here, we assume that the TSC difference between the cores is < 2^32 */
606 printk(KERN_DEBUG DEVICE_NAME ": fAsync=%d u64DiffCores=%u.\n", fAsync, (uint32_t)u64DiffCores);
607 if (fAsync)
608 force_async_tsc = 1;
609
610#ifdef CONFIG_VBOXDRV_AS_MISC
611 rc = misc_register(&gMiscDevice);
612 if (rc)
613 {
614 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
615 return rc;
616 }
617#else /* !CONFIG_VBOXDRV_AS_MISC */
618 /*
619 * Register character device.
620 */
621 g_iModuleMajor = DEVICE_MAJOR;
622 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
623 if (rc < 0)
624 {
625 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
626 return rc;
627 }
628
629 /*
630 * Save returned module major number
631 */
632 if (DEVICE_MAJOR != 0)
633 g_iModuleMajor = DEVICE_MAJOR;
634 else
635 g_iModuleMajor = rc;
636 rc = 0;
637
638#ifdef CONFIG_DEVFS_FS
639 /*
640 * Register a device entry
641 */
642 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
643 if (g_hDevFsVBoxDrv == NULL)
644 {
645 dprintf(("devfs_register failed!\n"));
646 rc = -EINVAL;
647 }
648#endif
649#endif /* !CONFIG_VBOXDRV_AS_MISC */
650 if (!rc)
651 {
652 /*
653 * Initialize the runtime.
654 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
655 */
656 rc = RTR0Init(0);
657 if (RT_SUCCESS(rc))
658 {
659#ifdef RT_ARCH_AMD64
660 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
661#endif
662 /*
663 * Initialize the device extension.
664 */
665 if (RT_SUCCESS(rc))
666 rc = supdrvInitDevExt(&g_DevExt);
667 if (!rc)
668 {
669 /*
670 * Create the GIP page.
671 */
672 rc = VBoxDrvLinuxInitGip(&g_DevExt);
673 if (!rc)
674 {
675 printk(KERN_INFO DEVICE_NAME ": TSC mode is %s, kernel timer mode is "
676#ifdef VBOX_HRTIMER
677 "'high-res'"
678#else
679 "'normal'"
680#endif
681 ".\n",
682 g_DevExt.pGip->u32Mode == SUPGIPMODE_SYNC_TSC ? "'synchronous'" : "'asynchronous'");
683 LogFlow(("VBoxDrv::ModuleInit returning %#x\n", rc));
684 printk(KERN_DEBUG DEVICE_NAME ": Successfully loaded version "
685 VBOX_VERSION_STRING " (interface " xstr(SUPDRVIOC_VERSION) ").\n");
686 return rc;
687 }
688
689 supdrvDeleteDevExt(&g_DevExt);
690 }
691 else
692 rc = -EINVAL;
693 RTR0Term();
694 }
695 else
696 rc = -EINVAL;
697
698 /*
699 * Failed, cleanup and return the error code.
700 */
701#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
702 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
703#endif
704 }
705#ifdef CONFIG_VBOXDRV_AS_MISC
706 misc_deregister(&gMiscDevice);
707 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
708#else
709 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
710 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
711#endif
712 return rc;
713}
714
715
716/**
717 * Unload the module.
718 */
719static void __exit VBoxDrvLinuxUnload(void)
720{
721 int rc;
722 dprintf(("VBoxDrvLinuxUnload\n"));
723 NOREF(rc);
724
725 /*
726 * I Don't think it's possible to unload a driver which processes have
727 * opened, at least we'll blindly assume that here.
728 */
729#ifdef CONFIG_VBOXDRV_AS_MISC
730 rc = misc_deregister(&gMiscDevice);
731 if (rc < 0)
732 {
733 dprintf(("misc_deregister failed with rc=%#x\n", rc));
734 }
735#else /* !CONFIG_VBOXDRV_AS_MISC */
736# ifdef CONFIG_DEVFS_FS
737 /*
738 * Unregister a device entry
739 */
740 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
741# endif /* devfs */
742 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
743#endif /* !CONFIG_VBOXDRV_AS_MISC */
744
745 /*
746 * Destroy GIP, delete the device extension and terminate IPRT.
747 */
748 VBoxDrvLinuxTermGip(&g_DevExt);
749 supdrvDeleteDevExt(&g_DevExt);
750 RTR0Term();
751}
752
753
754/**
755 * Device open. Called on open /dev/vboxdrv
756 *
757 * @param pInode Pointer to inode info structure.
758 * @param pFilp Associated file pointer.
759 */
760static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp)
761{
762 int rc;
763 PSUPDRVSESSION pSession;
764 Log(("VBoxDrvLinuxCreate: pFilp=%p pid=%d/%d %s\n", pFilp, RTProcSelf(), current->pid, current->comm));
765
766 /*
767 * Call common code for the rest.
768 */
769 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
770 if (!rc)
771 {
772 pSession->Uid = current->euid;
773 pSession->Gid = current->egid;
774 pSession->Process = RTProcSelf();
775 pSession->R0Process = RTR0ProcHandleSelf();
776 }
777
778 pFilp->private_data = pSession;
779
780 Log(("VBoxDrvLinuxCreate: g_DevExt=%p pSession=%p rc=%d/%d (pid=%d/%d %s)\n",
781 &g_DevExt, pSession, rc, VBoxDrvLinuxErr2LinuxErr(rc),
782 RTProcSelf(), current->pid, current->comm));
783 return VBoxDrvLinuxErr2LinuxErr(rc);
784}
785
786
787/**
788 * Close device.
789 *
790 * @param pInode Pointer to inode info structure.
791 * @param pFilp Associated file pointer.
792 */
793static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp)
794{
795 Log(("VBoxDrvLinuxClose: pFilp=%p pSession=%p pid=%d/%d %s\n",
796 pFilp, pFilp->private_data, RTProcSelf(), current->pid, current->comm));
797 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
798 pFilp->private_data = NULL;
799 return 0;
800}
801
802
803/**
804 * Device I/O Control entry point.
805 *
806 * @param pFilp Associated file pointer.
807 * @param uCmd The function specified to ioctl().
808 * @param ulArg The argument specified to ioctl().
809 */
810#ifdef HAVE_UNLOCKED_IOCTL
811static long VBoxDrvLinuxIOCtl(struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
812#else
813static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
814#endif
815{
816 /*
817 * Deal with the two high-speed IOCtl that takes it's arguments from
818 * the session and iCmd, and only returns a VBox status code.
819 */
820#ifdef HAVE_UNLOCKED_IOCTL
821 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
822 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
823 || uCmd == SUP_IOCTL_FAST_DO_NOP))
824 return supdrvIOCtlFast(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
825 return VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg);
826
827#else /* !HAVE_UNLOCKED_IOCTL */
828
829 int rc;
830 unlock_kernel();
831 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
832 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
833 || uCmd == SUP_IOCTL_FAST_DO_NOP))
834 rc = supdrvIOCtlFast(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
835 else
836 rc = VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg);
837 lock_kernel();
838 return rc;
839#endif /* !HAVE_UNLOCKED_IOCTL */
840}
841
842
843/**
844 * Device I/O Control entry point.
845 *
846 * @param pFilp Associated file pointer.
847 * @param uCmd The function specified to ioctl().
848 * @param ulArg The argument specified to ioctl().
849 */
850static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
851{
852 int rc;
853 SUPREQHDR Hdr;
854 PSUPREQHDR pHdr;
855 uint32_t cbBuf;
856
857 Log6(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p pid=%d/%d\n", pFilp, uCmd, (void *)ulArg, RTProcSelf(), current->pid));
858
859 /*
860 * Read the header.
861 */
862 if (RT_UNLIKELY(copy_from_user(&Hdr, (void *)ulArg, sizeof(Hdr))))
863 {
864 Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx,) failed; uCmd=%#x.\n", ulArg, uCmd));
865 return -EFAULT;
866 }
867 if (RT_UNLIKELY((Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC))
868 {
869 Log(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd));
870 return -EINVAL;
871 }
872
873 /*
874 * Buffer the request.
875 */
876 cbBuf = RT_MAX(Hdr.cbIn, Hdr.cbOut);
877 if (RT_UNLIKELY(cbBuf > _1M*16))
878 {
879 Log(("VBoxDrvLinuxIOCtl: too big cbBuf=%#x; uCmd=%#x\n", cbBuf, uCmd));
880 return -E2BIG;
881 }
882 if (RT_UNLIKELY(cbBuf != _IOC_SIZE(uCmd) && _IOC_SIZE(uCmd)))
883 {
884 Log(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x.\n", cbBuf, _IOC_SIZE(uCmd), uCmd));
885 return -EINVAL;
886 }
887 pHdr = RTMemAlloc(cbBuf);
888 if (RT_UNLIKELY(!pHdr))
889 {
890 OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x.\n", cbBuf, uCmd));
891 return -ENOMEM;
892 }
893 if (RT_UNLIKELY(copy_from_user(pHdr, (void *)ulArg, Hdr.cbIn)))
894 {
895 Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx, %#x) failed; uCmd=%#x.\n", ulArg, Hdr.cbIn, uCmd));
896 RTMemFree(pHdr);
897 return -EFAULT;
898 }
899
900 /*
901 * Process the IOCtl.
902 */
903 rc = supdrvIOCtl(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data, pHdr);
904
905 /*
906 * Copy ioctl data and output buffer back to user space.
907 */
908 if (RT_LIKELY(!rc))
909 {
910 uint32_t cbOut = pHdr->cbOut;
911 if (RT_UNLIKELY(cbOut > cbBuf))
912 {
913 OSDBGPRINT(("VBoxDrvLinuxIOCtl: too much output! %#x > %#x; uCmd=%#x!\n", cbOut, cbBuf, uCmd));
914 cbOut = cbBuf;
915 }
916 if (RT_UNLIKELY(copy_to_user((void *)ulArg, pHdr, cbOut)))
917 {
918 /* this is really bad! */
919 OSDBGPRINT(("VBoxDrvLinuxIOCtl: copy_to_user(%#lx,,%#x); uCmd=%#x!\n", ulArg, cbOut, uCmd));
920 rc = -EFAULT;
921 }
922 }
923 else
924 {
925 Log(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc));
926 rc = -EINVAL;
927 }
928 RTMemFree(pHdr);
929
930 Log6(("VBoxDrvLinuxIOCtl: returns %d (pid=%d/%d)\n", rc, RTProcSelf(), current->pid));
931 return rc;
932}
933
934
935/**
936 * Initializes any OS specific object creator fields.
937 */
938void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
939{
940 NOREF(pObj);
941 NOREF(pSession);
942}
943
944
945/**
946 * Checks if the session can access the object.
947 *
948 * @returns true if a decision has been made.
949 * @returns false if the default access policy should be applied.
950 *
951 * @param pObj The object in question.
952 * @param pSession The session wanting to access the object.
953 * @param pszObjName The object name, can be NULL.
954 * @param prc Where to store the result when returning true.
955 */
956bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
957{
958 NOREF(pObj);
959 NOREF(pSession);
960 NOREF(pszObjName);
961 NOREF(prc);
962 return false;
963}
964
965
966/**
967 * Initializes the GIP.
968 *
969 * @returns negative errno.
970 * @param pDevExt Instance data. GIP stuff may be updated.
971 */
972static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt)
973{
974 struct page *pPage;
975 dma_addr_t HCPhys;
976 PSUPGLOBALINFOPAGE pGip;
977#ifdef CONFIG_SMP
978 unsigned i;
979#endif
980 LogFlow(("VBoxDrvLinuxInitGip:\n"));
981
982 /*
983 * Allocate the page.
984 */
985 pPage = alloc_pages(GFP_USER, 0);
986 if (!pPage)
987 {
988 Log(("VBoxDrvLinuxInitGip: failed to allocate the GIP page\n"));
989 return -ENOMEM;
990 }
991
992 /*
993 * Lock the page.
994 */
995 SetPageReserved(pPage);
996 g_pGipPage = pPage;
997
998 /*
999 * Call common initialization routine.
1000 */
1001 HCPhys = page_to_phys(pPage);
1002 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1003 pDevExt->ulLastJiffies = jiffies;
1004 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1005 Log(("VBoxDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1006 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1007 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1008 HZ <= 1000 ? HZ : 1000);
1009
1010 /*
1011 * Initialize the timer.
1012 */
1013 vbox_ktimer_init(&g_GipTimer, VBoxDrvLinuxGipTimer, (unsigned long)pDevExt);
1014#ifdef CONFIG_SMP
1015 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1016 {
1017 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1018 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1019 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1020 vbox_ktimer_init(&pDevExt->aCPUs[i].Timer, VBoxDrvLinuxGipTimerPerCpu, i);
1021 }
1022#endif
1023
1024 return 0;
1025}
1026
1027
1028/**
1029 * Terminates the GIP.
1030 *
1031 * @returns negative errno.
1032 * @param pDevExt Instance data. GIP stuff may be updated.
1033 */
1034static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt)
1035{
1036 struct page *pPage;
1037 PSUPGLOBALINFOPAGE pGip;
1038#ifdef CONFIG_SMP
1039 unsigned i;
1040#endif
1041 LogFlow(("VBoxDrvLinuxTermGip:\n"));
1042
1043 /*
1044 * Delete the timer if it's pending.
1045 */
1046 vbox_ktimer_stop(&g_GipTimer);
1047#ifdef CONFIG_SMP
1048 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1049 vbox_ktimer_stop(&pDevExt->aCPUs[i].Timer);
1050#endif
1051
1052 /*
1053 * Uninitialize the content.
1054 */
1055 pGip = pDevExt->pGip;
1056 pDevExt->pGip = NULL;
1057 if (pGip)
1058 supdrvGipTerm(pGip);
1059
1060 /*
1061 * Free the page.
1062 */
1063 pPage = g_pGipPage;
1064 g_pGipPage = NULL;
1065 if (pPage)
1066 {
1067 ClearPageReserved(pPage);
1068 __free_pages(pPage, 0);
1069 }
1070
1071 return 0;
1072}
1073
1074/**
1075 * Timer callback function.
1076 *
1077 * In ASYNC TSC mode this is called on the primary CPU, and we're
1078 * assuming that the CPU remains online.
1079 *
1080 * @param ulUser The device extension pointer.
1081 */
1082#ifdef VBOX_HRTIMER
1083static enum hrtimer_restart VBoxDrvLinuxGipTimer(struct hrtimer *pTimer)
1084#else
1085static void VBoxDrvLinuxGipTimer(unsigned long ulUser)
1086#endif
1087{
1088 PSUPDRVDEVEXT pDevExt;
1089 PSUPGLOBALINFOPAGE pGip;
1090 unsigned long ulNow;
1091 unsigned long ulDiff;
1092 uint64_t u64Monotime;
1093 unsigned long SavedFlags;
1094#ifdef VBOX_HRTIMER
1095 ktime_t KtNow;
1096#endif
1097
1098 local_irq_save(SavedFlags);
1099
1100 ulNow = jiffies;
1101#ifdef VBOX_HRTIMER
1102 KtNow = ktime_get();
1103 pDevExt = &g_DevExt;
1104#else
1105 pDevExt = (PSUPDRVDEVEXT)ulUser;
1106#endif
1107 pGip = pDevExt->pGip;
1108
1109#ifdef CONFIG_SMP
1110 if (pGip && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1111 {
1112 uint8_t iCPU = ASMGetApicId();
1113 ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1114 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1115 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1116 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1117 }
1118 else
1119#endif /* CONFIG_SMP */
1120 {
1121 ulDiff = ulNow - pDevExt->ulLastJiffies;
1122 pDevExt->ulLastJiffies = ulNow;
1123 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1124 pDevExt->u64LastMonotime = u64Monotime;
1125 }
1126 if (RT_LIKELY(pGip))
1127 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1128 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1129 {
1130#ifdef VBOX_HRTIMER
1131 hrtimer_forward(&g_GipTimer, KtNow, ktime_set(0, 1000000));
1132#else
1133 mod_timer(&g_GipTimer, ulNow + ONE_MSEC_IN_JIFFIES);
1134#endif
1135 }
1136
1137 local_irq_restore(SavedFlags);
1138
1139#ifdef VBOX_HRTIMER
1140 return pDevExt->fGIPSuspended ? HRTIMER_NORESTART : HRTIMER_RESTART;
1141#endif
1142}
1143
1144
1145#ifdef CONFIG_SMP
1146/**
1147 * Timer callback function for the other CPUs.
1148 *
1149 * @param iTimerCPU The APIC ID of this timer.
1150 */
1151#ifdef VBOX_HRTIMER
1152static enum hrtimer_restart VBoxDrvLinuxGipTimerPerCpu(struct hrtimer *pTimer)
1153#else
1154static void VBoxDrvLinuxGipTimerPerCpu(unsigned long iTimerCPU)
1155#endif
1156{
1157 PSUPDRVDEVEXT pDevExt;
1158 PSUPGLOBALINFOPAGE pGip;
1159 uint8_t iCPU;
1160 uint64_t u64Monotime;
1161 unsigned long SavedFlags;
1162 unsigned long ulNow;
1163# ifdef VBOX_HRTIMER
1164 unsigned long iTimerCPU;
1165 ktime_t KtNow;
1166# endif
1167
1168 local_irq_save(SavedFlags);
1169
1170 ulNow = jiffies;
1171 pDevExt = &g_DevExt;
1172 pGip = pDevExt->pGip;
1173 iCPU = ASMGetApicId();
1174# ifdef VBOX_HRTIMER
1175 iTimerCPU = iCPU; /* XXX hrtimer does not support a 'data' field */
1176 KtNow = ktime_get();
1177# endif
1178
1179 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1180 {
1181 if (RT_LIKELY(iTimerCPU == iCPU))
1182 {
1183 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1184 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1185 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1186 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1187 if (RT_LIKELY(pGip))
1188 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1189 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1190 {
1191# ifdef VBOX_HRTIMER
1192 hrtimer_forward(&pDevExt->aCPUs[iCPU].Timer, KtNow, ktime_set(0, 1000000));
1193# else
1194 mod_timer(&pDevExt->aCPUs[iCPU].Timer, ulNow + ONE_MSEC_IN_JIFFIES);
1195# endif
1196 }
1197 }
1198 else
1199 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d !=? timer-cpuid=%d)\n",
1200 iCPU, iTimerCPU, smp_processor_id(), pDevExt->aCPUs[iTimerCPU].iSmpProcessorId);
1201 }
1202 else
1203 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%lu cpuid=%d\n",
1204 iCPU, (unsigned long)RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1205
1206 local_irq_restore(SavedFlags);
1207
1208# ifdef VBOX_HRTIMER
1209 return pDevExt->fGIPSuspended ? HRTIMER_NORESTART : HRTIMER_RESTART;
1210# endif
1211}
1212#endif /* CONFIG_SMP */
1213
1214
1215/**
1216 * Maps the GIP into user space.
1217 *
1218 * @returns negative errno.
1219 * @param pDevExt Instance data.
1220 */
1221int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE *ppGip)
1222{
1223 int rc = 0;
1224 unsigned long ulAddr;
1225 unsigned long HCPhys = pDevExt->HCPhysGip;
1226 pgprot_t pgFlags;
1227 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1228 LogFlow(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1229
1230 /*
1231 * Allocate user space mapping and put the physical pages into it.
1232 */
1233 down_write(&current->mm->mmap_sem);
1234 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1235 if (!(ulAddr & ~PAGE_MASK))
1236 {
1237#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1238 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1239#else
1240 int rc2 = 0;
1241 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1242 if (vma)
1243#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1244 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1245#else
1246 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1247#endif
1248 else
1249 {
1250 rc = SUPDRV_ERR_NO_MEMORY;
1251 Log(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1252 }
1253#endif
1254 if (rc2)
1255 {
1256 rc = SUPDRV_ERR_NO_MEMORY;
1257 Log(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1258 }
1259 }
1260 else
1261 {
1262 Log(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1263 rc = SUPDRV_ERR_NO_MEMORY;
1264 }
1265 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1266
1267 /*
1268 * Success?
1269 */
1270 if (!rc)
1271 {
1272 *ppGip = (PSUPGLOBALINFOPAGE)ulAddr;
1273 LogFlow(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1274 return 0;
1275 }
1276
1277 /*
1278 * Failure, cleanup and be gone.
1279 */
1280 if (ulAddr & ~PAGE_MASK)
1281 {
1282 down_write(&current->mm->mmap_sem);
1283 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1284 up_write(&current->mm->mmap_sem);
1285 }
1286
1287 LogFlow(("supdrvOSGipMap: returns %d\n", rc));
1288 return rc;
1289}
1290
1291
1292/**
1293 * Maps the GIP into user space.
1294 *
1295 * @returns negative errno.
1296 * @param pDevExt Instance data.
1297 */
1298int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
1299{
1300 LogFlow(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1301 if (current->mm)
1302 {
1303 down_write(&current->mm->mmap_sem);
1304 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1305 up_write(&current->mm->mmap_sem);
1306 }
1307 LogFlow(("supdrvOSGipUnmap: returns 0\n"));
1308 return 0;
1309}
1310
1311
1312/**
1313 * Resumes the GIP updating.
1314 *
1315 * @param pDevExt Instance data.
1316 */
1317void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1318{
1319 LogFlow(("supdrvOSGipResume:\n"));
1320 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, false);
1321#ifdef CONFIG_SMP
1322 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1323 {
1324#endif
1325 vbox_ktimer_start(&g_GipTimer);
1326#ifdef CONFIG_SMP
1327 }
1328 else
1329 {
1330 vbox_ktimer_start(&g_GipTimer);
1331 smp_call_function(VBoxDrvLinuxGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1332 }
1333#endif
1334}
1335
1336
1337#ifdef CONFIG_SMP
1338/**
1339 * Callback for resuming GIP updating on the other CPUs.
1340 *
1341 * This is only used when the GIP is in async tsc mode.
1342 *
1343 * @param pvUser Pointer to the device instance.
1344 */
1345static void VBoxDrvLinuxGipResumePerCpu(void *pvUser)
1346{
1347 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1348 uint8_t iCPU = ASMGetApicId();
1349
1350 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1351 {
1352 printk("vboxdrv: error: apicid=%d max=%lu cpuid=%d\n",
1353 iCPU, (unsigned long)RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1354 return;
1355 }
1356
1357 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1358 vbox_ktimer_start(&pDevExt->aCPUs[iCPU].Timer);
1359}
1360#endif /* CONFIG_SMP */
1361
1362
1363/**
1364 * Suspends the GIP updating.
1365 *
1366 * @param pDevExt Instance data.
1367 */
1368void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1369{
1370#ifdef CONFIG_SMP
1371 unsigned i;
1372#endif
1373 LogFlow(("supdrvOSGipSuspend:\n"));
1374 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, true);
1375
1376 vbox_ktimer_stop(&g_GipTimer);
1377#ifdef CONFIG_SMP
1378 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1379 vbox_ktimer_stop(&pDevExt->aCPUs[i].Timer);
1380#endif
1381}
1382
1383
1384/**
1385 * Get the current CPU count.
1386 * @returns Number of cpus.
1387 */
1388unsigned VBOXCALL supdrvOSGetCPUCount(void)
1389{
1390#ifdef CONFIG_SMP
1391# if defined(num_present_cpus) && !defined(VBOX_REDHAT_KABI)
1392 return num_present_cpus();
1393# elif defined(num_possible_cpus)
1394 return num_possible_cpus();
1395# else
1396 return smp_num_cpus;
1397# endif
1398#else
1399 return 1;
1400#endif
1401}
1402
1403/**
1404 * Force async tsc mode.
1405 * @todo add a module argument for this.
1406 */
1407bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1408{
1409 return force_async_tsc != 0;
1410}
1411
1412
1413/**
1414 * Converts a supdrv error code to an linux error code.
1415 *
1416 * @returns corresponding linux error code.
1417 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1418 */
1419static int VBoxDrvLinuxErr2LinuxErr(int rc)
1420{
1421 switch (rc)
1422 {
1423 case 0: return 0;
1424 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1425 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1426 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1427 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1428 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1429 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1430 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1431 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1432 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1433 case SUPDRV_ERR_IDT_FAILED: return -1000;
1434 }
1435
1436 return -EPERM;
1437}
1438
1439
1440RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1441{
1442#if 1
1443 va_list args;
1444 char szMsg[512];
1445
1446 va_start(args, pszFormat);
1447 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1448 szMsg[sizeof(szMsg) - 1] = '\0';
1449 printk("%s", szMsg);
1450 va_end(args);
1451#else
1452 /* forward to printf - needs some more GCC hacking to fix ebp... */
1453 __asm__ __volatile__ ("mov %0, %esp\n\t"
1454 "jmp %1\n\t",
1455 :: "r" ((uintptr_t)&pszFormat - 4),
1456 "m" (printk));
1457#endif
1458 return 0;
1459}
1460
1461
1462/** Runtime assert implementation for Linux Ring-0. */
1463RTDECL(bool) RTAssertDoBreakpoint(void)
1464{
1465 return true;
1466}
1467
1468
1469/** Runtime assert implementation for Linux Ring-0. */
1470RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1471{
1472 printk("!!Assertion Failed!!\n"
1473 "Expression: %s\n"
1474 "Location : %s(%d) %s\n",
1475 pszExpr, pszFile, uLine, pszFunction);
1476}
1477
1478
1479/** Runtime assert implementation for Linux Ring-0. */
1480RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1481{ /* forwarder. */
1482 va_list ap;
1483 char msg[256];
1484
1485 va_start(ap, pszFormat);
1486 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1487 msg[sizeof(msg) - 1] = '\0';
1488 printk("%s", msg);
1489 va_end(ap);
1490}
1491
1492
1493/* GCC C++ hack. */
1494unsigned __gxx_personality_v0 = 0xcccccccc;
1495
1496
1497module_init(VBoxDrvLinuxInit);
1498module_exit(VBoxDrvLinuxUnload);
1499
1500MODULE_AUTHOR("Sun Microsystems, Inc.");
1501MODULE_DESCRIPTION("VirtualBox Support Driver");
1502MODULE_LICENSE("GPL");
1503#ifdef MODULE_VERSION
1504MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1505#endif
1506
1507#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1508module_param(force_async_tsc, int, 0444);
1509#else
1510MODULE_PARM(force_async_tsc, "i");
1511#endif
1512MODULE_PARM_DESC(force_async_tsc, "force the asynchronous TSC mode");
1513
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette