VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 7985

Last change on this file since 7985 was 7902, checked in by vboxsync, 17 years ago

the asynchronous TSC detection code is generic code

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.1 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 * Some lines of code to disable the local APIC on x86_64 machines taken
25 * from a Mandriva patch by Gwenole Beauchesne <[email protected]>.
26 */
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "SUPDRV.h"
32#include "the-linux-kernel.h"
33#include "version-generated.h"
34
35#include <iprt/assert.h>
36#include <iprt/spinlock.h>
37#include <iprt/semaphore.h>
38#include <iprt/initterm.h>
39#include <iprt/process.h>
40#include <iprt/err.h>
41#include <iprt/mem.h>
42#include <iprt/log.h>
43#include <iprt/mp.h>
44
45#include <linux/sched.h>
46#ifdef CONFIG_DEVFS_FS
47# include <linux/devfs_fs_kernel.h>
48#endif
49#ifdef CONFIG_VBOXDRV_AS_MISC
50# include <linux/miscdevice.h>
51#endif
52#ifdef CONFIG_X86_LOCAL_APIC
53# include <asm/apic.h>
54# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
55# include <asm/nmi.h>
56# endif
57#endif
58
59#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
60# include <asm/pgtable.h>
61# define global_flush_tlb __flush_tlb_global
62#endif
63
64#include <iprt/mem.h>
65
66
67/* devfs defines */
68#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
69# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
70
71# define VBOX_REGISTER_DEVFS() \
72({ \
73 void *rc = NULL; \
74 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
75 S_IFCHR | S_IRUGO | S_IWUGO, \
76 DEVICE_NAME) == 0) \
77 rc = (void *)' '; /* return not NULL */ \
78 rc; \
79 })
80
81# define VBOX_UNREGISTER_DEVFS(handle) \
82 devfs_remove(DEVICE_NAME);
83
84# else /* < 2.6.0 */
85
86# define VBOX_REGISTER_DEVFS() \
87 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
88 DEVICE_MAJOR, 0, \
89 S_IFCHR | S_IRUGO | S_IWUGO, \
90 &gFileOpsVBoxDrv, NULL)
91
92# define VBOX_UNREGISTER_DEVFS(handle) \
93 if (handle != NULL) \
94 devfs_unregister(handle)
95
96# endif /* < 2.6.0 */
97#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
98
99#ifndef CONFIG_VBOXDRV_AS_MISC
100# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
101# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
102# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
103# else
104# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
105# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
106# endif
107#endif /* !CONFIG_VBOXDRV_AS_MISC */
108
109
110#ifdef CONFIG_X86_HIGH_ENTRY
111# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
112#endif
113
114/*
115 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
116 */
117#if defined(RT_ARCH_AMD64)
118# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
119#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
120# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
121#else
122# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
123#endif
124
125/*
126 * The redhat hack section.
127 * - The current hacks are for 2.4.21-15.EL only.
128 */
129#ifndef NO_REDHAT_HACKS
130/* accounting. */
131# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
132# ifdef VM_ACCOUNT
133# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
134# endif
135# endif
136
137/* backported remap_page_range. */
138# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
139# include <asm/tlb.h>
140# ifdef tlb_vma /* probably not good enough... */
141# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
142# endif
143# endif
144
145#endif /* !NO_REDHAT_HACKS */
146
147
148#ifndef MY_DO_MUNMAP
149# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
150#endif
151
152
153/** @def ONE_MSEC_IN_JIFFIES
154 * The number of jiffies that make up 1 millisecond. Must be at least 1! */
155#if HZ <= 1000
156# define ONE_MSEC_IN_JIFFIES 1
157#elif !(HZ % 1000)
158# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
159#else
160# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
161# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
162#endif
163
164/** @def TICK_NSEC
165 * The time between ticks in nsec */
166#ifndef TICK_NSEC
167# define TICK_NSEC (1000000UL / HZ)
168#endif
169
170#ifdef CONFIG_X86_LOCAL_APIC
171
172/* If an NMI occurs while we are inside the world switcher the machine will
173 * crash. The Linux NMI watchdog generates periodic NMIs increasing a counter
174 * which is compared with another counter increased in the timer interrupt
175 * handler. We disable the NMI watchdog.
176 *
177 * - Linux >= 2.6.21: The watchdog is disabled by default on i386 and x86_64.
178 * - Linux < 2.6.21: The watchdog is normally enabled by default on x86_64
179 * and disabled on i386.
180 */
181# if defined(RT_ARCH_AMD64)
182# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21) && !defined(VBOX_REDHAT_KABI)
183# define DO_DISABLE_NMI 1
184# endif
185# endif
186
187# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
188extern int nmi_active;
189# define nmi_atomic_read(P) *(P)
190# define nmi_atomic_set(P, V) *(P) = (V)
191# define nmi_atomic_dec(P) nmi_atomic_set(P, 0)
192# else
193# define nmi_atomic_read(P) atomic_read(P)
194# define nmi_atomic_set(P, V) atomic_set(P, V)
195# define nmi_atomic_dec(P) atomic_dec(P)
196# endif
197
198# ifndef X86_FEATURE_ARCH_PERFMON
199# define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
200# endif
201# ifndef MSR_ARCH_PERFMON_EVENTSEL0
202# define MSR_ARCH_PERFMON_EVENTSEL0 0x186
203# endif
204# ifndef ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
205# define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
206# endif
207
208#endif /* CONFIG_X86_LOCAL_APIC */
209
210#define xstr(s) str(s)
211#define str(s) #s
212
213/*******************************************************************************
214* Defined Constants And Macros *
215*******************************************************************************/
216/**
217 * Device extention & session data association structure.
218 */
219static SUPDRVDEVEXT g_DevExt;
220
221/** Timer structure for the GIP update. */
222static VBOXKTIMER g_GipTimer;
223/** Pointer to the page structure for the GIP. */
224struct page *g_pGipPage;
225
226/** Registered devfs device handle. */
227#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
228# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
229static void *g_hDevFsVBoxDrv = NULL;
230# else
231static devfs_handle_t g_hDevFsVBoxDrv = NULL;
232# endif
233#endif
234
235#ifndef CONFIG_VBOXDRV_AS_MISC
236/** Module major number */
237#define DEVICE_MAJOR 234
238/** Saved major device number */
239static int g_iModuleMajor;
240#endif /* !CONFIG_VBOXDRV_AS_MISC */
241
242/** Module parameter.
243 * Not prefixed because the name is used by macros and the end of this file. */
244static int force_async_tsc = 0;
245
246/** The module name. */
247#define DEVICE_NAME "vboxdrv"
248
249#ifdef RT_ARCH_AMD64
250/**
251 * Memory for the executable memory heap (in IPRT).
252 */
253extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
254__asm__(".section execmemory, \"awx\", @progbits\n\t"
255 ".align 32\n\t"
256 ".globl g_abExecMemory\n"
257 "g_abExecMemory:\n\t"
258 ".zero 1572864\n\t"
259 ".type g_abExecMemory, @object\n\t"
260 ".size g_abExecMemory, 1572864\n\t"
261 ".text\n\t");
262#endif
263
264
265/*******************************************************************************
266* Internal Functions *
267*******************************************************************************/
268#ifdef VBOX_HRTIMER
269typedef enum hrtimer_restart (*PFNVBOXKTIMER)(struct hrtimer *);
270#else
271typedef void (*PFNVBOXKTIMER)(unsigned long);
272#endif
273
274static int VBoxDrvLinuxInit(void);
275static void VBoxDrvLinuxUnload(void);
276static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp);
277static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp);
278#ifdef HAVE_UNLOCKED_IOCTL
279static long VBoxDrvLinuxIOCtl(struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
280#else
281static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
282#endif
283static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
284static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt);
285static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt);
286#ifdef VBOX_HRTIMER
287static enum hrtimer_restart VBoxDrvLinuxGipTimer(struct hrtimer *pTimer);
288#else
289static void VBoxDrvLinuxGipTimer(unsigned long ulUser);
290#endif
291#ifdef CONFIG_SMP
292# ifdef VBOX_HRTIMER
293static enum hrtimer_restart VBoxDrvLinuxGipTimerPerCpu(struct hrtimer *pTimer);
294# else
295static void VBoxDrvLinuxGipTimerPerCpu(unsigned long ulUser);
296# endif
297static void VBoxDrvLinuxGipResumePerCpu(void *pvUser);
298#endif
299static int VBoxDrvLinuxErr2LinuxErr(int);
300
301
302/** The file_operations structure. */
303static struct file_operations gFileOpsVBoxDrv =
304{
305 owner: THIS_MODULE,
306 open: VBoxDrvLinuxCreate,
307 release: VBoxDrvLinuxClose,
308#ifdef HAVE_UNLOCKED_IOCTL
309 unlocked_ioctl: VBoxDrvLinuxIOCtl,
310#else
311 ioctl: VBoxDrvLinuxIOCtl,
312#endif
313};
314
315#ifdef CONFIG_VBOXDRV_AS_MISC
316/** The miscdevice structure. */
317static struct miscdevice gMiscDevice =
318{
319 minor: MISC_DYNAMIC_MINOR,
320 name: DEVICE_NAME,
321 fops: &gFileOpsVBoxDrv,
322# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
323 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
324 devfs_name: DEVICE_NAME,
325# endif
326};
327#endif
328
329static inline void vbox_ktimer_init(PVBOXKTIMER pTimer, PFNVBOXKTIMER pfnFunction, unsigned long ulData)
330{
331#ifdef VBOX_HRTIMER
332 hrtimer_init(pTimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
333 pTimer->function = pfnFunction;
334#else
335 init_timer(pTimer);
336 pTimer->data = ulData;
337 pTimer->function = pfnFunction;
338 pTimer->expires = jiffies;
339#endif
340}
341
342static inline void vbox_ktimer_start(PVBOXKTIMER pTimer)
343{
344#ifdef VBOX_HRTIMER
345 hrtimer_start(pTimer, ktime_add_ns(ktime_get(), 1000000), HRTIMER_MODE_ABS);
346#else
347 mod_timer(pTimer, jiffies);
348#endif
349}
350
351static inline void vbox_ktimer_stop(PVBOXKTIMER pTimer)
352{
353#ifdef VBOX_HRTIMER
354 hrtimer_cancel(pTimer);
355#else
356 if (timer_pending(pTimer))
357 del_timer_sync(pTimer);
358#endif
359}
360
361#ifdef CONFIG_X86_LOCAL_APIC
362# ifdef DO_DISABLE_NMI
363
364/** Stop AMD NMI watchdog (x86_64 only). */
365static int stop_k7_watchdog(void)
366{
367 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
368 return 1;
369}
370
371/** Stop Intel P4 NMI watchdog (x86_64 only). */
372static int stop_p4_watchdog(void)
373{
374 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
375 wrmsr(MSR_P4_IQ_CCCR1, 0, 0);
376 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
377 return 1;
378}
379
380/** The new method of detecting the event counter */
381static int stop_intel_arch_watchdog(void)
382{
383 unsigned ebx;
384
385 ebx = cpuid_ebx(10);
386 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
387 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
388 return 1;
389}
390
391/** Stop NMI watchdog. */
392static void vbox_stop_apic_nmi_watchdog(void *unused)
393{
394 int stopped = 0;
395
396 /* only support LOCAL and IO APICs for now */
397 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
398 (nmi_watchdog != NMI_IO_APIC))
399 return;
400
401 if (nmi_watchdog == NMI_LOCAL_APIC)
402 {
403 switch (boot_cpu_data.x86_vendor)
404 {
405 case X86_VENDOR_AMD:
406 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
407 return;
408 stopped = stop_k7_watchdog();
409 break;
410 case X86_VENDOR_INTEL:
411 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
412 {
413 stopped = stop_intel_arch_watchdog();
414 break;
415 }
416 stopped = stop_p4_watchdog();
417 break;
418 default:
419 return;
420 }
421 }
422
423 if (stopped)
424 nmi_atomic_dec(&nmi_active);
425}
426
427/** Disable LAPIC NMI watchdog. */
428static void disable_lapic_nmi_watchdog(void)
429{
430 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
431
432 if (nmi_atomic_read(&nmi_active) <= 0)
433 return;
434
435 on_each_cpu(vbox_stop_apic_nmi_watchdog, NULL, 1, 1);
436
437 BUG_ON(nmi_atomic_read(&nmi_active) != 0);
438
439 /* tell do_nmi() and others that we're not active any more */
440 nmi_watchdog = NMI_NONE;
441}
442
443/** Shutdown NMI. */
444static void nmi_cpu_shutdown(void * dummy)
445{
446 unsigned int vERR, vPC;
447
448 vPC = apic_read(APIC_LVTPC);
449
450 if ((GET_APIC_DELIVERY_MODE(vPC) == APIC_MODE_NMI) && !(vPC & APIC_LVT_MASKED))
451 {
452 vERR = apic_read(APIC_LVTERR);
453 apic_write(APIC_LVTERR, vERR | APIC_LVT_MASKED);
454 apic_write(APIC_LVTPC, vPC | APIC_LVT_MASKED);
455 apic_write(APIC_LVTERR, vERR);
456 }
457}
458
459static void nmi_shutdown(void)
460{
461 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
462}
463# endif /* DO_DISABLE_NMI */
464#endif /* CONFIG_X86_LOCAL_APIC */
465
466
467/**
468 * Initialize module.
469 *
470 * @returns appropriate status code.
471 */
472static int __init VBoxDrvLinuxInit(void)
473{
474 int rc;
475 bool fAsync;
476 uint64_t u64DiffCores;
477
478 dprintf(("VBoxDrv::ModuleInit\n"));
479
480#ifdef CONFIG_X86_LOCAL_APIC
481 /*
482 * If an NMI occurs while we are inside the world switcher the macine will crash.
483 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
484 * compared with another counter increased in the timer interrupt handler. Therefore
485 * we don't allow to setup an NMI watchdog.
486 */
487# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && !defined(VBOX_REDHAT_KABI)
488 /*
489 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
490 * the nmi_watchdog variable.
491 */
492# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
493 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
494# ifdef DO_DISABLE_NMI
495 if (nmi_atomic_read(&nmi_active) > 0)
496 {
497 printk(KERN_DEBUG DEVICE_NAME ": Trying to deactivate the NMI watchdog...\n");
498
499 switch (nmi_watchdog)
500 {
501 case NMI_LOCAL_APIC:
502 disable_lapic_nmi_watchdog();
503 break;
504 case NMI_NONE:
505 nmi_atomic_dec(&nmi_active);
506 break;
507 }
508
509 if (nmi_atomic_read(&nmi_active) == 0)
510 {
511 nmi_shutdown();
512 printk(KERN_DEBUG DEVICE_NAME ": Successfully done.\n");
513 }
514 else
515 printk(KERN_DEBUG DEVICE_NAME ": Failed!\n");
516 }
517# endif /* DO_DISABLE_NMI */
518
519 /*
520 * Permanent IO_APIC mode active? No way to handle this!
521 */
522 if (nmi_watchdog == NMI_IO_APIC)
523 {
524 printk(KERN_ERR DEVICE_NAME
525 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
526 DEVICE_NAME
527 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
528 DEVICE_NAME
529 ": command line.\n");
530 return -EINVAL;
531 }
532
533 /*
534 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
535 */
536 nmi_atomic_set(&nmi_active, -1);
537 printk(KERN_DEBUG DEVICE_NAME ": Trying to deactivate the NMI watchdog permanently...\n");
538
539 /*
540 * Now fall through and see if it actually was enabled before. If so, fail
541 * as we cannot deactivate it cleanly from here.
542 */
543# else /* < 2.6.19 */
544 /*
545 * Older 2.6 kernels: nmi_watchdog is not initalized by default
546 */
547 if (nmi_watchdog != NMI_NONE)
548 goto nmi_activated;
549# endif
550# endif /* >= 2.6.0 && !defined(VBOX_REDHAT_KABI) */
551
552 /*
553 * Second test: Interrupt generated by performance counter not masked and can
554 * generate an NMI. Works also with Linux 2.4.
555 */
556 {
557 unsigned int v, ver, maxlvt;
558
559 v = apic_read(APIC_LVR);
560 ver = GET_APIC_VERSION(v);
561 /* 82489DXs do not report # of LVT entries. */
562 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
563 if (maxlvt >= 4)
564 {
565 /* Read status of performance counter IRQ vector */
566 v = apic_read(APIC_LVTPC);
567
568 /* performance counter generates NMI and is not masked? */
569 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
570 {
571# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
572 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
573 printk(KERN_ERR DEVICE_NAME
574 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
575 DEVICE_NAME
576 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
577 return -EINVAL;
578# else /* < 2.6.19 */
579# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && !defined(VBOX_REDHAT_KABI)
580nmi_activated:
581# endif
582 printk(KERN_ERR DEVICE_NAME
583 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
584 DEVICE_NAME
585 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
586 return -EINVAL;
587# endif /* >= 2.6.19 */
588 }
589 }
590 }
591# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
592 printk(KERN_DEBUG DEVICE_NAME ": Successfully done.\n");
593# endif /* >= 2.6.19 */
594#endif /* CONFIG_X86_LOCAL_APIC */
595
596 /*
597 * Check for synchronous/asynchronous TSC mode.
598 */
599 printk(KERN_DEBUG DEVICE_NAME ": Found %u processor cores.\n", RTMpGetOnlineCount());
600 fAsync = supdrvDetermineAsyncTsc(&u64DiffCores);
601 /* no 64-bit arithmetics here, we assume that the TSC difference between the cores is < 2^32 */
602 printk(KERN_DEBUG DEVICE_NAME ": fAsync=%d u64DiffCores=%u.\n", fAsync, (uint32_t)u64DiffCores);
603 if (fAsync)
604 force_async_tsc = 1;
605
606#ifdef CONFIG_VBOXDRV_AS_MISC
607 rc = misc_register(&gMiscDevice);
608 if (rc)
609 {
610 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
611 return rc;
612 }
613#else /* !CONFIG_VBOXDRV_AS_MISC */
614 /*
615 * Register character device.
616 */
617 g_iModuleMajor = DEVICE_MAJOR;
618 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
619 if (rc < 0)
620 {
621 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
622 return rc;
623 }
624
625 /*
626 * Save returned module major number
627 */
628 if (DEVICE_MAJOR != 0)
629 g_iModuleMajor = DEVICE_MAJOR;
630 else
631 g_iModuleMajor = rc;
632 rc = 0;
633
634#ifdef CONFIG_DEVFS_FS
635 /*
636 * Register a device entry
637 */
638 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
639 if (g_hDevFsVBoxDrv == NULL)
640 {
641 dprintf(("devfs_register failed!\n"));
642 rc = -EINVAL;
643 }
644#endif
645#endif /* !CONFIG_VBOXDRV_AS_MISC */
646 if (!rc)
647 {
648 /*
649 * Initialize the runtime.
650 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
651 */
652 rc = RTR0Init(0);
653 if (RT_SUCCESS(rc))
654 {
655#ifdef RT_ARCH_AMD64
656 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
657#endif
658 /*
659 * Initialize the device extension.
660 */
661 if (RT_SUCCESS(rc))
662 rc = supdrvInitDevExt(&g_DevExt);
663 if (!rc)
664 {
665 /*
666 * Create the GIP page.
667 */
668 rc = VBoxDrvLinuxInitGip(&g_DevExt);
669 if (!rc)
670 {
671 printk(KERN_INFO DEVICE_NAME ": TSC mode is %s, kernel timer mode is "
672#ifdef VBOX_HRTIMER
673 "'high-res'"
674#else
675 "'normal'"
676#endif
677 ".\n",
678 g_DevExt.pGip->u32Mode == SUPGIPMODE_SYNC_TSC ? "'synchronous'" : "'asynchronous'");
679 LogFlow(("VBoxDrv::ModuleInit returning %#x\n", rc));
680 printk(KERN_DEBUG DEVICE_NAME ": Successfully loaded version "
681 VBOX_VERSION_STRING " (interface " xstr(SUPDRVIOC_VERSION) ").\n");
682 return rc;
683 }
684
685 supdrvDeleteDevExt(&g_DevExt);
686 }
687 else
688 rc = -EINVAL;
689 RTR0Term();
690 }
691 else
692 rc = -EINVAL;
693
694 /*
695 * Failed, cleanup and return the error code.
696 */
697#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
698 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
699#endif
700 }
701#ifdef CONFIG_VBOXDRV_AS_MISC
702 misc_deregister(&gMiscDevice);
703 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
704#else
705 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
706 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
707#endif
708 return rc;
709}
710
711
712/**
713 * Unload the module.
714 */
715static void __exit VBoxDrvLinuxUnload(void)
716{
717 int rc;
718 dprintf(("VBoxDrvLinuxUnload\n"));
719 NOREF(rc);
720
721 /*
722 * I Don't think it's possible to unload a driver which processes have
723 * opened, at least we'll blindly assume that here.
724 */
725#ifdef CONFIG_VBOXDRV_AS_MISC
726 rc = misc_deregister(&gMiscDevice);
727 if (rc < 0)
728 {
729 dprintf(("misc_deregister failed with rc=%#x\n", rc));
730 }
731#else /* !CONFIG_VBOXDRV_AS_MISC */
732# ifdef CONFIG_DEVFS_FS
733 /*
734 * Unregister a device entry
735 */
736 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
737# endif /* devfs */
738 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
739#endif /* !CONFIG_VBOXDRV_AS_MISC */
740
741 /*
742 * Destroy GIP, delete the device extension and terminate IPRT.
743 */
744 VBoxDrvLinuxTermGip(&g_DevExt);
745 supdrvDeleteDevExt(&g_DevExt);
746 RTR0Term();
747}
748
749
750/**
751 * Device open. Called on open /dev/vboxdrv
752 *
753 * @param pInode Pointer to inode info structure.
754 * @param pFilp Associated file pointer.
755 */
756static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp)
757{
758 int rc;
759 PSUPDRVSESSION pSession;
760 Log(("VBoxDrvLinuxCreate: pFilp=%p pid=%d/%d %s\n", pFilp, RTProcSelf(), current->pid, current->comm));
761
762 /*
763 * Call common code for the rest.
764 */
765 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
766 if (!rc)
767 {
768 pSession->Uid = current->euid;
769 pSession->Gid = current->egid;
770 pSession->Process = RTProcSelf();
771 pSession->R0Process = RTR0ProcHandleSelf();
772 }
773
774 pFilp->private_data = pSession;
775
776 Log(("VBoxDrvLinuxCreate: g_DevExt=%p pSession=%p rc=%d/%d (pid=%d/%d %s)\n",
777 &g_DevExt, pSession, rc, VBoxDrvLinuxErr2LinuxErr(rc),
778 RTProcSelf(), current->pid, current->comm));
779 return VBoxDrvLinuxErr2LinuxErr(rc);
780}
781
782
783/**
784 * Close device.
785 *
786 * @param pInode Pointer to inode info structure.
787 * @param pFilp Associated file pointer.
788 */
789static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp)
790{
791 Log(("VBoxDrvLinuxClose: pFilp=%p pSession=%p pid=%d/%d %s\n",
792 pFilp, pFilp->private_data, RTProcSelf(), current->pid, current->comm));
793 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
794 pFilp->private_data = NULL;
795 return 0;
796}
797
798
799/**
800 * Device I/O Control entry point.
801 *
802 * @param pFilp Associated file pointer.
803 * @param uCmd The function specified to ioctl().
804 * @param ulArg The argument specified to ioctl().
805 */
806#ifdef HAVE_UNLOCKED_IOCTL
807static long VBoxDrvLinuxIOCtl(struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
808#else
809static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
810#endif
811{
812 /*
813 * Deal with the two high-speed IOCtl that takes it's arguments from
814 * the session and iCmd, and only returns a VBox status code.
815 */
816#ifdef HAVE_UNLOCKED_IOCTL
817 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
818 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
819 || uCmd == SUP_IOCTL_FAST_DO_NOP))
820 return supdrvIOCtlFast(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
821 return VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg);
822
823#else /* !HAVE_UNLOCKED_IOCTL */
824
825 int rc;
826 unlock_kernel();
827 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
828 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
829 || uCmd == SUP_IOCTL_FAST_DO_NOP))
830 rc = supdrvIOCtlFast(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
831 else
832 rc = VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg);
833 lock_kernel();
834 return rc;
835#endif /* !HAVE_UNLOCKED_IOCTL */
836}
837
838
839/**
840 * Device I/O Control entry point.
841 *
842 * @param pFilp Associated file pointer.
843 * @param uCmd The function specified to ioctl().
844 * @param ulArg The argument specified to ioctl().
845 */
846static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
847{
848 int rc;
849 SUPREQHDR Hdr;
850 PSUPREQHDR pHdr;
851 uint32_t cbBuf;
852
853 Log6(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p pid=%d/%d\n", pFilp, uCmd, (void *)ulArg, RTProcSelf(), current->pid));
854
855 /*
856 * Read the header.
857 */
858 if (RT_UNLIKELY(copy_from_user(&Hdr, (void *)ulArg, sizeof(Hdr))))
859 {
860 Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx,) failed; uCmd=%#x.\n", ulArg, uCmd));
861 return -EFAULT;
862 }
863 if (RT_UNLIKELY((Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC))
864 {
865 Log(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd));
866 return -EINVAL;
867 }
868
869 /*
870 * Buffer the request.
871 */
872 cbBuf = RT_MAX(Hdr.cbIn, Hdr.cbOut);
873 if (RT_UNLIKELY(cbBuf > _1M*16))
874 {
875 Log(("VBoxDrvLinuxIOCtl: too big cbBuf=%#x; uCmd=%#x\n", cbBuf, uCmd));
876 return -E2BIG;
877 }
878 if (RT_UNLIKELY(cbBuf != _IOC_SIZE(uCmd) && _IOC_SIZE(uCmd)))
879 {
880 Log(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x.\n", cbBuf, _IOC_SIZE(uCmd), uCmd));
881 return -EINVAL;
882 }
883 pHdr = RTMemAlloc(cbBuf);
884 if (RT_UNLIKELY(!pHdr))
885 {
886 OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x.\n", cbBuf, uCmd));
887 return -ENOMEM;
888 }
889 if (RT_UNLIKELY(copy_from_user(pHdr, (void *)ulArg, Hdr.cbIn)))
890 {
891 Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx, %#x) failed; uCmd=%#x.\n", ulArg, Hdr.cbIn, uCmd));
892 RTMemFree(pHdr);
893 return -EFAULT;
894 }
895
896 /*
897 * Process the IOCtl.
898 */
899 rc = supdrvIOCtl(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data, pHdr);
900
901 /*
902 * Copy ioctl data and output buffer back to user space.
903 */
904 if (RT_LIKELY(!rc))
905 {
906 uint32_t cbOut = pHdr->cbOut;
907 if (RT_UNLIKELY(cbOut > cbBuf))
908 {
909 OSDBGPRINT(("VBoxDrvLinuxIOCtl: too much output! %#x > %#x; uCmd=%#x!\n", cbOut, cbBuf, uCmd));
910 cbOut = cbBuf;
911 }
912 if (RT_UNLIKELY(copy_to_user((void *)ulArg, pHdr, cbOut)))
913 {
914 /* this is really bad! */
915 OSDBGPRINT(("VBoxDrvLinuxIOCtl: copy_to_user(%#lx,,%#x); uCmd=%#x!\n", ulArg, cbOut, uCmd));
916 rc = -EFAULT;
917 }
918 }
919 else
920 {
921 Log(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc));
922 rc = -EINVAL;
923 }
924 RTMemFree(pHdr);
925
926 Log6(("VBoxDrvLinuxIOCtl: returns %d (pid=%d/%d)\n", rc, RTProcSelf(), current->pid));
927 return rc;
928}
929
930
931/**
932 * Initializes any OS specific object creator fields.
933 */
934void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
935{
936 NOREF(pObj);
937 NOREF(pSession);
938}
939
940
941/**
942 * Checks if the session can access the object.
943 *
944 * @returns true if a decision has been made.
945 * @returns false if the default access policy should be applied.
946 *
947 * @param pObj The object in question.
948 * @param pSession The session wanting to access the object.
949 * @param pszObjName The object name, can be NULL.
950 * @param prc Where to store the result when returning true.
951 */
952bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
953{
954 NOREF(pObj);
955 NOREF(pSession);
956 NOREF(pszObjName);
957 NOREF(prc);
958 return false;
959}
960
961
962/**
963 * Initializes the GIP.
964 *
965 * @returns negative errno.
966 * @param pDevExt Instance data. GIP stuff may be updated.
967 */
968static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt)
969{
970 struct page *pPage;
971 dma_addr_t HCPhys;
972 PSUPGLOBALINFOPAGE pGip;
973#ifdef CONFIG_SMP
974 unsigned i;
975#endif
976 LogFlow(("VBoxDrvLinuxInitGip:\n"));
977
978 /*
979 * Allocate the page.
980 */
981 pPage = alloc_pages(GFP_USER, 0);
982 if (!pPage)
983 {
984 Log(("VBoxDrvLinuxInitGip: failed to allocate the GIP page\n"));
985 return -ENOMEM;
986 }
987
988 /*
989 * Lock the page.
990 */
991 SetPageReserved(pPage);
992 g_pGipPage = pPage;
993
994 /*
995 * Call common initialization routine.
996 */
997 HCPhys = page_to_phys(pPage);
998 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
999 pDevExt->ulLastJiffies = jiffies;
1000 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1001 Log(("VBoxDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1002 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1003 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1004 HZ <= 1000 ? HZ : 1000);
1005
1006 /*
1007 * Initialize the timer.
1008 */
1009 vbox_ktimer_init(&g_GipTimer, VBoxDrvLinuxGipTimer, (unsigned long)pDevExt);
1010#ifdef CONFIG_SMP
1011 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1012 {
1013 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1014 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1015 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1016 vbox_ktimer_init(&pDevExt->aCPUs[i].Timer, VBoxDrvLinuxGipTimerPerCpu, i);
1017 }
1018#endif
1019
1020 return 0;
1021}
1022
1023
1024/**
1025 * Terminates the GIP.
1026 *
1027 * @returns negative errno.
1028 * @param pDevExt Instance data. GIP stuff may be updated.
1029 */
1030static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt)
1031{
1032 struct page *pPage;
1033 PSUPGLOBALINFOPAGE pGip;
1034#ifdef CONFIG_SMP
1035 unsigned i;
1036#endif
1037 LogFlow(("VBoxDrvLinuxTermGip:\n"));
1038
1039 /*
1040 * Delete the timer if it's pending.
1041 */
1042 vbox_ktimer_stop(&g_GipTimer);
1043#ifdef CONFIG_SMP
1044 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1045 vbox_ktimer_stop(&pDevExt->aCPUs[i].Timer);
1046#endif
1047
1048 /*
1049 * Uninitialize the content.
1050 */
1051 pGip = pDevExt->pGip;
1052 pDevExt->pGip = NULL;
1053 if (pGip)
1054 supdrvGipTerm(pGip);
1055
1056 /*
1057 * Free the page.
1058 */
1059 pPage = g_pGipPage;
1060 g_pGipPage = NULL;
1061 if (pPage)
1062 {
1063 ClearPageReserved(pPage);
1064 __free_pages(pPage, 0);
1065 }
1066
1067 return 0;
1068}
1069
1070/**
1071 * Timer callback function.
1072 *
1073 * In ASYNC TSC mode this is called on the primary CPU, and we're
1074 * assuming that the CPU remains online.
1075 *
1076 * @param ulUser The device extension pointer.
1077 */
1078#ifdef VBOX_HRTIMER
1079static enum hrtimer_restart VBoxDrvLinuxGipTimer(struct hrtimer *pTimer)
1080#else
1081static void VBoxDrvLinuxGipTimer(unsigned long ulUser)
1082#endif
1083{
1084 PSUPDRVDEVEXT pDevExt;
1085 PSUPGLOBALINFOPAGE pGip;
1086 unsigned long ulNow;
1087 unsigned long ulDiff;
1088 uint64_t u64Monotime;
1089 unsigned long SavedFlags;
1090#ifdef VBOX_HRTIMER
1091 ktime_t KtNow;
1092#endif
1093
1094 local_irq_save(SavedFlags);
1095
1096 ulNow = jiffies;
1097#ifdef VBOX_HRTIMER
1098 KtNow = ktime_get();
1099 pDevExt = &g_DevExt;
1100#else
1101 pDevExt = (PSUPDRVDEVEXT)ulUser;
1102#endif
1103 pGip = pDevExt->pGip;
1104
1105#ifdef CONFIG_SMP
1106 if (pGip && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1107 {
1108 uint8_t iCPU = ASMGetApicId();
1109 ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1110 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1111 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1112 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1113 }
1114 else
1115#endif /* CONFIG_SMP */
1116 {
1117 ulDiff = ulNow - pDevExt->ulLastJiffies;
1118 pDevExt->ulLastJiffies = ulNow;
1119 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1120 pDevExt->u64LastMonotime = u64Monotime;
1121 }
1122 if (RT_LIKELY(pGip))
1123 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1124 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1125 {
1126#ifdef VBOX_HRTIMER
1127 hrtimer_forward(&g_GipTimer, KtNow, ktime_set(0, 1000000));
1128#else
1129 mod_timer(&g_GipTimer, ulNow + ONE_MSEC_IN_JIFFIES);
1130#endif
1131 }
1132
1133 local_irq_restore(SavedFlags);
1134
1135#ifdef VBOX_HRTIMER
1136 return pDevExt->fGIPSuspended ? HRTIMER_NORESTART : HRTIMER_RESTART;
1137#endif
1138}
1139
1140
1141#ifdef CONFIG_SMP
1142/**
1143 * Timer callback function for the other CPUs.
1144 *
1145 * @param iTimerCPU The APIC ID of this timer.
1146 */
1147#ifdef VBOX_HRTIMER
1148static enum hrtimer_restart VBoxDrvLinuxGipTimerPerCpu(struct hrtimer *pTimer)
1149#else
1150static void VBoxDrvLinuxGipTimerPerCpu(unsigned long iTimerCPU)
1151#endif
1152{
1153 PSUPDRVDEVEXT pDevExt;
1154 PSUPGLOBALINFOPAGE pGip;
1155 uint8_t iCPU;
1156 uint64_t u64Monotime;
1157 unsigned long SavedFlags;
1158 unsigned long ulNow;
1159# ifdef VBOX_HRTIMER
1160 unsigned long iTimerCPU;
1161 ktime_t KtNow;
1162# endif
1163
1164 local_irq_save(SavedFlags);
1165
1166 ulNow = jiffies;
1167 pDevExt = &g_DevExt;
1168 pGip = pDevExt->pGip;
1169 iCPU = ASMGetApicId();
1170# ifdef VBOX_HRTIMER
1171 iTimerCPU = iCPU; /* XXX hrtimer does not support a 'data' field */
1172 KtNow = ktime_get();
1173# endif
1174
1175 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1176 {
1177 if (RT_LIKELY(iTimerCPU == iCPU))
1178 {
1179 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1180 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1181 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1182 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1183 if (RT_LIKELY(pGip))
1184 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1185 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1186 {
1187# ifdef VBOX_HRTIMER
1188 hrtimer_forward(&pDevExt->aCPUs[iCPU].Timer, KtNow, ktime_set(0, 1000000));
1189# else
1190 mod_timer(&pDevExt->aCPUs[iCPU].Timer, ulNow + ONE_MSEC_IN_JIFFIES);
1191# endif
1192 }
1193 }
1194 else
1195 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d !=? timer-cpuid=%d)\n",
1196 iCPU, iTimerCPU, smp_processor_id(), pDevExt->aCPUs[iTimerCPU].iSmpProcessorId);
1197 }
1198 else
1199 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%lu cpuid=%d\n",
1200 iCPU, (unsigned long)RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1201
1202 local_irq_restore(SavedFlags);
1203
1204# ifdef VBOX_HRTIMER
1205 return pDevExt->fGIPSuspended ? HRTIMER_NORESTART : HRTIMER_RESTART;
1206# endif
1207}
1208#endif /* CONFIG_SMP */
1209
1210
1211/**
1212 * Maps the GIP into user space.
1213 *
1214 * @returns negative errno.
1215 * @param pDevExt Instance data.
1216 */
1217int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE *ppGip)
1218{
1219 int rc = 0;
1220 unsigned long ulAddr;
1221 unsigned long HCPhys = pDevExt->HCPhysGip;
1222 pgprot_t pgFlags;
1223 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1224 LogFlow(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1225
1226 /*
1227 * Allocate user space mapping and put the physical pages into it.
1228 */
1229 down_write(&current->mm->mmap_sem);
1230 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1231 if (!(ulAddr & ~PAGE_MASK))
1232 {
1233#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1234 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1235#else
1236 int rc2 = 0;
1237 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1238 if (vma)
1239#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1240 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1241#else
1242 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1243#endif
1244 else
1245 {
1246 rc = SUPDRV_ERR_NO_MEMORY;
1247 Log(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1248 }
1249#endif
1250 if (rc2)
1251 {
1252 rc = SUPDRV_ERR_NO_MEMORY;
1253 Log(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1254 }
1255 }
1256 else
1257 {
1258 Log(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1259 rc = SUPDRV_ERR_NO_MEMORY;
1260 }
1261 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1262
1263 /*
1264 * Success?
1265 */
1266 if (!rc)
1267 {
1268 *ppGip = (PSUPGLOBALINFOPAGE)ulAddr;
1269 LogFlow(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1270 return 0;
1271 }
1272
1273 /*
1274 * Failure, cleanup and be gone.
1275 */
1276 if (ulAddr & ~PAGE_MASK)
1277 {
1278 down_write(&current->mm->mmap_sem);
1279 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1280 up_write(&current->mm->mmap_sem);
1281 }
1282
1283 LogFlow(("supdrvOSGipMap: returns %d\n", rc));
1284 return rc;
1285}
1286
1287
1288/**
1289 * Maps the GIP into user space.
1290 *
1291 * @returns negative errno.
1292 * @param pDevExt Instance data.
1293 */
1294int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
1295{
1296 LogFlow(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1297 if (current->mm)
1298 {
1299 down_write(&current->mm->mmap_sem);
1300 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1301 up_write(&current->mm->mmap_sem);
1302 }
1303 LogFlow(("supdrvOSGipUnmap: returns 0\n"));
1304 return 0;
1305}
1306
1307
1308/**
1309 * Resumes the GIP updating.
1310 *
1311 * @param pDevExt Instance data.
1312 */
1313void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1314{
1315 LogFlow(("supdrvOSGipResume:\n"));
1316 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, false);
1317#ifdef CONFIG_SMP
1318 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1319 {
1320#endif
1321 vbox_ktimer_start(&g_GipTimer);
1322#ifdef CONFIG_SMP
1323 }
1324 else
1325 {
1326 vbox_ktimer_start(&g_GipTimer);
1327 smp_call_function(VBoxDrvLinuxGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1328 }
1329#endif
1330}
1331
1332
1333#ifdef CONFIG_SMP
1334/**
1335 * Callback for resuming GIP updating on the other CPUs.
1336 *
1337 * This is only used when the GIP is in async tsc mode.
1338 *
1339 * @param pvUser Pointer to the device instance.
1340 */
1341static void VBoxDrvLinuxGipResumePerCpu(void *pvUser)
1342{
1343 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1344 uint8_t iCPU = ASMGetApicId();
1345
1346 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1347 {
1348 printk("vboxdrv: error: apicid=%d max=%lu cpuid=%d\n",
1349 iCPU, (unsigned long)RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1350 return;
1351 }
1352
1353 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1354 vbox_ktimer_start(&pDevExt->aCPUs[iCPU].Timer);
1355}
1356#endif /* CONFIG_SMP */
1357
1358
1359/**
1360 * Suspends the GIP updating.
1361 *
1362 * @param pDevExt Instance data.
1363 */
1364void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1365{
1366#ifdef CONFIG_SMP
1367 unsigned i;
1368#endif
1369 LogFlow(("supdrvOSGipSuspend:\n"));
1370 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, true);
1371
1372 vbox_ktimer_stop(&g_GipTimer);
1373#ifdef CONFIG_SMP
1374 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1375 vbox_ktimer_stop(&pDevExt->aCPUs[i].Timer);
1376#endif
1377}
1378
1379
1380/**
1381 * Get the current CPU count.
1382 * @returns Number of cpus.
1383 */
1384unsigned VBOXCALL supdrvOSGetCPUCount(void)
1385{
1386#ifdef CONFIG_SMP
1387# if defined(num_present_cpus) && !defined(VBOX_REDHAT_KABI)
1388 return num_present_cpus();
1389# elif defined(num_possible_cpus)
1390 return num_possible_cpus();
1391# else
1392 return smp_num_cpus;
1393# endif
1394#else
1395 return 1;
1396#endif
1397}
1398
1399/**
1400 * Force async tsc mode.
1401 * @todo add a module argument for this.
1402 */
1403bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1404{
1405 return force_async_tsc != 0;
1406}
1407
1408
1409/**
1410 * Converts a supdrv error code to an linux error code.
1411 *
1412 * @returns corresponding linux error code.
1413 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1414 */
1415static int VBoxDrvLinuxErr2LinuxErr(int rc)
1416{
1417 switch (rc)
1418 {
1419 case 0: return 0;
1420 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1421 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1422 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1423 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1424 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1425 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1426 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1427 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1428 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1429 case SUPDRV_ERR_IDT_FAILED: return -1000;
1430 }
1431
1432 return -EPERM;
1433}
1434
1435
1436RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1437{
1438#if 1
1439 va_list args;
1440 char szMsg[512];
1441
1442 va_start(args, pszFormat);
1443 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1444 szMsg[sizeof(szMsg) - 1] = '\0';
1445 printk("%s", szMsg);
1446 va_end(args);
1447#else
1448 /* forward to printf - needs some more GCC hacking to fix ebp... */
1449 __asm__ __volatile__ ("mov %0, %esp\n\t"
1450 "jmp %1\n\t",
1451 :: "r" ((uintptr_t)&pszFormat - 4),
1452 "m" (printk));
1453#endif
1454 return 0;
1455}
1456
1457
1458/** Runtime assert implementation for Linux Ring-0. */
1459RTDECL(bool) RTAssertDoBreakpoint(void)
1460{
1461 return true;
1462}
1463
1464
1465/** Runtime assert implementation for Linux Ring-0. */
1466RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1467{
1468 printk("!!Assertion Failed!!\n"
1469 "Expression: %s\n"
1470 "Location : %s(%d) %s\n",
1471 pszExpr, pszFile, uLine, pszFunction);
1472}
1473
1474
1475/** Runtime assert implementation for Linux Ring-0. */
1476RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1477{ /* forwarder. */
1478 va_list ap;
1479 char msg[256];
1480
1481 va_start(ap, pszFormat);
1482 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1483 msg[sizeof(msg) - 1] = '\0';
1484 printk("%s", msg);
1485 va_end(ap);
1486}
1487
1488
1489/* GCC C++ hack. */
1490unsigned __gxx_personality_v0 = 0xcccccccc;
1491
1492
1493module_init(VBoxDrvLinuxInit);
1494module_exit(VBoxDrvLinuxUnload);
1495
1496MODULE_AUTHOR("innotek GmbH");
1497MODULE_DESCRIPTION("VirtualBox Support Driver");
1498MODULE_LICENSE("GPL");
1499#ifdef MODULE_VERSION
1500MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1501#endif
1502
1503#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1504module_param(force_async_tsc, int, 0444);
1505#else
1506MODULE_PARM(force_async_tsc, "i");
1507#endif
1508MODULE_PARM_DESC(force_async_tsc, "force the asynchronous TSC mode");
1509
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette