VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c@ 7869

Last change on this file since 7869 was 7869, checked in by vboxsync, 17 years ago

Linux: detect asynchronous TSCs

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 44.9 KB
Line 
1/** @file
2 * The VirtualBox Support Driver - Linux hosts.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 * Some lines of code to disable the local APIC on x86_64 machines taken
25 * from a Mandriva patch by Gwenole Beauchesne <[email protected]>.
26 */
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "SUPDRV.h"
32#include "the-linux-kernel.h"
33#include "version-generated.h"
34
35#include <iprt/assert.h>
36#include <iprt/spinlock.h>
37#include <iprt/semaphore.h>
38#include <iprt/initterm.h>
39#include <iprt/process.h>
40#include <iprt/err.h>
41#include <iprt/mem.h>
42#include <iprt/log.h>
43#include <iprt/mp.h>
44
45#include <linux/sched.h>
46#ifdef CONFIG_DEVFS_FS
47# include <linux/devfs_fs_kernel.h>
48#endif
49#ifdef CONFIG_VBOXDRV_AS_MISC
50# include <linux/miscdevice.h>
51#endif
52#ifdef CONFIG_X86_LOCAL_APIC
53# include <asm/apic.h>
54# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
55# include <asm/nmi.h>
56# endif
57#endif
58
59#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
60# include <asm/pgtable.h>
61# define global_flush_tlb __flush_tlb_global
62#endif
63
64#include <iprt/mem.h>
65
66
67/* devfs defines */
68#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
69# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
70
71# define VBOX_REGISTER_DEVFS() \
72({ \
73 void *rc = NULL; \
74 if (devfs_mk_cdev(MKDEV(DEVICE_MAJOR, 0), \
75 S_IFCHR | S_IRUGO | S_IWUGO, \
76 DEVICE_NAME) == 0) \
77 rc = (void *)' '; /* return not NULL */ \
78 rc; \
79 })
80
81# define VBOX_UNREGISTER_DEVFS(handle) \
82 devfs_remove(DEVICE_NAME);
83
84# else /* < 2.6.0 */
85
86# define VBOX_REGISTER_DEVFS() \
87 devfs_register(NULL, DEVICE_NAME, DEVFS_FL_DEFAULT, \
88 DEVICE_MAJOR, 0, \
89 S_IFCHR | S_IRUGO | S_IWUGO, \
90 &gFileOpsVBoxDrv, NULL)
91
92# define VBOX_UNREGISTER_DEVFS(handle) \
93 if (handle != NULL) \
94 devfs_unregister(handle)
95
96# endif /* < 2.6.0 */
97#endif /* CONFIG_DEV_FS && !CONFIG_VBOXDEV_AS_MISC */
98
99#ifndef CONFIG_VBOXDRV_AS_MISC
100# if defined(CONFIG_DEVFS_FS) && LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 0)
101# define VBOX_REGISTER_DEVICE(a,b,c) devfs_register_chrdev(a,b,c)
102# define VBOX_UNREGISTER_DEVICE(a,b) devfs_unregister_chrdev(a,b)
103# else
104# define VBOX_REGISTER_DEVICE(a,b,c) register_chrdev(a,b,c)
105# define VBOX_UNREGISTER_DEVICE(a,b) unregister_chrdev(a,b)
106# endif
107#endif /* !CONFIG_VBOXDRV_AS_MISC */
108
109
110#ifdef CONFIG_X86_HIGH_ENTRY
111# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
112#endif
113
114/*
115 * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
116 */
117#if defined(RT_ARCH_AMD64)
118# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL_EXEC
119#elif defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
120# define MY_PAGE_KERNEL_EXEC __pgprot(cpu_has_pge ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
121#else
122# define MY_PAGE_KERNEL_EXEC PAGE_KERNEL
123#endif
124
125/*
126 * The redhat hack section.
127 * - The current hacks are for 2.4.21-15.EL only.
128 */
129#ifndef NO_REDHAT_HACKS
130/* accounting. */
131# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
132# ifdef VM_ACCOUNT
133# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c, 0) /* should it be 1 or 0? */
134# endif
135# endif
136
137/* backported remap_page_range. */
138# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
139# include <asm/tlb.h>
140# ifdef tlb_vma /* probably not good enough... */
141# define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
142# endif
143# endif
144
145#endif /* !NO_REDHAT_HACKS */
146
147
148#ifndef MY_DO_MUNMAP
149# define MY_DO_MUNMAP(a,b,c) do_munmap(a, b, c)
150#endif
151
152
153/** @def ONE_MSEC_IN_JIFFIES
154 * The number of jiffies that make up 1 millisecond. Must be at least 1! */
155#if HZ <= 1000
156# define ONE_MSEC_IN_JIFFIES 1
157#elif !(HZ % 1000)
158# define ONE_MSEC_IN_JIFFIES (HZ / 1000)
159#else
160# define ONE_MSEC_IN_JIFFIES ((HZ + 999) / 1000)
161# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
162#endif
163
164/** @def TICK_NSEC
165 * The time between ticks in nsec */
166#ifndef TICK_NSEC
167# define TICK_NSEC (1000000UL / HZ)
168#endif
169
170#ifdef CONFIG_X86_LOCAL_APIC
171
172/* If an NMI occurs while we are inside the world switcher the machine will
173 * crash. The Linux NMI watchdog generates periodic NMIs increasing a counter
174 * which is compared with another counter increased in the timer interrupt
175 * handler. We disable the NMI watchdog.
176 *
177 * - Linux >= 2.6.21: The watchdog is disabled by default on i386 and x86_64.
178 * - Linux < 2.6.21: The watchdog is normally enabled by default on x86_64
179 * and disabled on i386.
180 */
181# if defined(RT_ARCH_AMD64)
182# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 21) && !defined(VBOX_REDHAT_KABI)
183# define DO_DISABLE_NMI 1
184# endif
185# endif
186
187# if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
188extern int nmi_active;
189# define nmi_atomic_read(P) *(P)
190# define nmi_atomic_set(P, V) *(P) = (V)
191# define nmi_atomic_dec(P) nmi_atomic_set(P, 0)
192# else
193# define nmi_atomic_read(P) atomic_read(P)
194# define nmi_atomic_set(P, V) atomic_set(P, V)
195# define nmi_atomic_dec(P) atomic_dec(P)
196# endif
197
198# ifndef X86_FEATURE_ARCH_PERFMON
199# define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
200# endif
201# ifndef MSR_ARCH_PERFMON_EVENTSEL0
202# define MSR_ARCH_PERFMON_EVENTSEL0 0x186
203# endif
204# ifndef ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT
205# define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
206# endif
207
208#endif /* CONFIG_X86_LOCAL_APIC */
209
210#define xstr(s) str(s)
211#define str(s) #s
212
213/*******************************************************************************
214* Defined Constants And Macros *
215*******************************************************************************/
216/**
217 * Device extention & session data association structure.
218 */
219static SUPDRVDEVEXT g_DevExt;
220
221/** Timer structure for the GIP update. */
222static VBOXKTIMER g_GipTimer;
223/** Pointer to the page structure for the GIP. */
224struct page *g_pGipPage;
225
226/** Registered devfs device handle. */
227#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
228# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
229static void *g_hDevFsVBoxDrv = NULL;
230# else
231static devfs_handle_t g_hDevFsVBoxDrv = NULL;
232# endif
233#endif
234
235#ifndef CONFIG_VBOXDRV_AS_MISC
236/** Module major number */
237#define DEVICE_MAJOR 234
238/** Saved major device number */
239static int g_iModuleMajor;
240#endif /* !CONFIG_VBOXDRV_AS_MISC */
241
242/** Module parameter.
243 * Not prefixed because the name is used by macros and the end of this file. */
244static int force_async_tsc = 0;
245
246/** The module name. */
247#define DEVICE_NAME "vboxdrv"
248
249#ifdef RT_ARCH_AMD64
250/**
251 * Memory for the executable memory heap (in IPRT).
252 */
253extern uint8_t g_abExecMemory[1572864]; /* 1.5 MB */
254__asm__(".section execmemory, \"awx\", @progbits\n\t"
255 ".align 32\n\t"
256 ".globl g_abExecMemory\n"
257 "g_abExecMemory:\n\t"
258 ".zero 1572864\n\t"
259 ".type g_abExecMemory, @object\n\t"
260 ".size g_abExecMemory, 1572864\n\t"
261 ".text\n\t");
262#endif
263
264
265/*******************************************************************************
266* Internal Functions *
267*******************************************************************************/
268#ifdef VBOX_HRTIMER
269typedef enum hrtimer_restart (*PFNVBOXKTIMER)(struct hrtimer *);
270#else
271typedef void (*PFNVBOXKTIMER)(unsigned long);
272#endif
273
274static int VBoxDrvLinuxInit(void);
275static void VBoxDrvLinuxUnload(void);
276static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp);
277static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp);
278#ifdef HAVE_UNLOCKED_IOCTL
279static long VBoxDrvLinuxIOCtl(struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
280#else
281static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
282#endif
283static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
284static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt);
285static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt);
286#ifdef VBOX_HRTIMER
287static enum hrtimer_restart VBoxDrvLinuxGipTimer(struct hrtimer *pTimer);
288#else
289static void VBoxDrvLinuxGipTimer(unsigned long ulUser);
290#endif
291#ifdef CONFIG_SMP
292# ifdef VBOX_HRTIMER
293static enum hrtimer_restart VBoxDrvLinuxGipTimerPerCpu(struct hrtimer *pTimer);
294# else
295static void VBoxDrvLinuxGipTimerPerCpu(unsigned long ulUser);
296# endif
297static void VBoxDrvLinuxGipResumePerCpu(void *pvUser);
298#endif
299static int VBoxDrvLinuxErr2LinuxErr(int);
300
301
302/** The file_operations structure. */
303static struct file_operations gFileOpsVBoxDrv =
304{
305 owner: THIS_MODULE,
306 open: VBoxDrvLinuxCreate,
307 release: VBoxDrvLinuxClose,
308#ifdef HAVE_UNLOCKED_IOCTL
309 unlocked_ioctl: VBoxDrvLinuxIOCtl,
310#else
311 ioctl: VBoxDrvLinuxIOCtl,
312#endif
313};
314
315#ifdef CONFIG_VBOXDRV_AS_MISC
316/** The miscdevice structure. */
317static struct miscdevice gMiscDevice =
318{
319 minor: MISC_DYNAMIC_MINOR,
320 name: DEVICE_NAME,
321 fops: &gFileOpsVBoxDrv,
322# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && \
323 LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 17)
324 devfs_name: DEVICE_NAME,
325# endif
326};
327#endif
328
329static inline void vbox_ktimer_init(PVBOXKTIMER pTimer, PFNVBOXKTIMER pfnFunction, unsigned long ulData)
330{
331#ifdef VBOX_HRTIMER
332 hrtimer_init(pTimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
333 pTimer->function = pfnFunction;
334#else
335 init_timer(pTimer);
336 pTimer->data = ulData;
337 pTimer->function = pfnFunction;
338 pTimer->expires = jiffies;
339#endif
340}
341
342static inline void vbox_ktimer_start(PVBOXKTIMER pTimer)
343{
344#ifdef VBOX_HRTIMER
345 hrtimer_start(pTimer, ktime_add_ns(ktime_get(), 1000000), HRTIMER_MODE_ABS);
346#else
347 mod_timer(pTimer, jiffies);
348#endif
349}
350
351static inline void vbox_ktimer_stop(PVBOXKTIMER pTimer)
352{
353#ifdef VBOX_HRTIMER
354 hrtimer_cancel(pTimer);
355#else
356 if (timer_pending(pTimer))
357 del_timer_sync(pTimer);
358#endif
359}
360
361#ifdef CONFIG_X86_LOCAL_APIC
362# ifdef DO_DISABLE_NMI
363
364/** Stop AMD NMI watchdog (x86_64 only). */
365static int stop_k7_watchdog(void)
366{
367 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
368 return 1;
369}
370
371/** Stop Intel P4 NMI watchdog (x86_64 only). */
372static int stop_p4_watchdog(void)
373{
374 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
375 wrmsr(MSR_P4_IQ_CCCR1, 0, 0);
376 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
377 return 1;
378}
379
380/** The new method of detecting the event counter */
381static int stop_intel_arch_watchdog(void)
382{
383 unsigned ebx;
384
385 ebx = cpuid_ebx(10);
386 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
387 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
388 return 1;
389}
390
391/** Stop NMI watchdog. */
392static void vbox_stop_apic_nmi_watchdog(void *unused)
393{
394 int stopped = 0;
395
396 /* only support LOCAL and IO APICs for now */
397 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
398 (nmi_watchdog != NMI_IO_APIC))
399 return;
400
401 if (nmi_watchdog == NMI_LOCAL_APIC)
402 {
403 switch (boot_cpu_data.x86_vendor)
404 {
405 case X86_VENDOR_AMD:
406 if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
407 return;
408 stopped = stop_k7_watchdog();
409 break;
410 case X86_VENDOR_INTEL:
411 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
412 {
413 stopped = stop_intel_arch_watchdog();
414 break;
415 }
416 stopped = stop_p4_watchdog();
417 break;
418 default:
419 return;
420 }
421 }
422
423 if (stopped)
424 nmi_atomic_dec(&nmi_active);
425}
426
427/** Disable LAPIC NMI watchdog. */
428static void disable_lapic_nmi_watchdog(void)
429{
430 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
431
432 if (nmi_atomic_read(&nmi_active) <= 0)
433 return;
434
435 on_each_cpu(vbox_stop_apic_nmi_watchdog, NULL, 1, 1);
436
437 BUG_ON(nmi_atomic_read(&nmi_active) != 0);
438
439 /* tell do_nmi() and others that we're not active any more */
440 nmi_watchdog = NMI_NONE;
441}
442
443/** Shutdown NMI. */
444static void nmi_cpu_shutdown(void * dummy)
445{
446 unsigned int vERR, vPC;
447
448 vPC = apic_read(APIC_LVTPC);
449
450 if ((GET_APIC_DELIVERY_MODE(vPC) == APIC_MODE_NMI) && !(vPC & APIC_LVT_MASKED))
451 {
452 vERR = apic_read(APIC_LVTERR);
453 apic_write(APIC_LVTERR, vERR | APIC_LVT_MASKED);
454 apic_write(APIC_LVTPC, vPC | APIC_LVT_MASKED);
455 apic_write(APIC_LVTERR, vERR);
456 }
457}
458
459static void nmi_shutdown(void)
460{
461 on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
462}
463# endif /* DO_DISABLE_NMI */
464#endif /* CONFIG_X86_LOCAL_APIC */
465
466
467/**
468 * Determine if the time stamp counters of the CPU cores are asynchronous.
469 */
470static uint64_t g_aTsc[8][8];
471
472static DECLCALLBACK(void) Worker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
473{
474 int iSlot = *(int*)pvUser1;
475 int iCpu = *(int*)pvUser2;
476 g_aTsc[iSlot][iCpu] = ASMReadTSC();
477}
478
479/*
480 * When using the default/normal timer code it is essential that the time stamp counter
481 * (TSC) runs never backwards, that is, a read operation to the counter should return
482 * a bigger value than any previous read operation. This is guaranteed by the latest
483 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
484 * case we have to choose the asynchronous timer mode.
485 */
486static void VBoxDetermineAsyncTsc(void)
487{
488 uint64_t u64Diff, u64DiffMin, u64DiffMax, u64TscLast;
489 int iSlot, iCpu;
490 bool fBackwards = false;
491 int cCpu = RTMpGetOnlineCount();
492
493 printk(KERN_DEBUG DEVICE_NAME ": Found %u cores.\n", cCpu);
494 if (cCpu < 2)
495 return;
496
497 if (cCpu > RT_ELEMENTS(g_aTsc))
498 cCpu = RT_ELEMENTS(g_aTsc);
499
500 for (iSlot = 0; iSlot < RT_ELEMENTS(g_aTsc); iSlot++)
501 {
502 for (iCpu = 0; iCpu < cCpu; iCpu++)
503 RTMpOnSpecific(iCpu, Worker, &iSlot, &iCpu);
504 }
505
506 u64DiffMin = (uint64_t)~0;
507 u64TscLast = 0;
508 for (iSlot = 0; iSlot < RT_ELEMENTS(g_aTsc); iSlot++)
509 {
510 uint64_t u64Tsc0 = g_aTsc[iSlot][0];
511 u64DiffMax = 0;
512 if (u64Tsc0 <= u64TscLast)
513 fBackwards = true;
514 u64TscLast = u64Tsc0;
515 for (iCpu = 1; iCpu < cCpu; iCpu++)
516 {
517 uint64_t u64TscN = g_aTsc[iSlot][iCpu];
518 if (u64TscN <= u64TscLast)
519 fBackwards = true;
520 u64TscLast = u64TscN;
521 u64Diff = u64TscN > u64Tsc0 ? u64TscN - u64Tsc0 : u64Tsc0 - u64TscN;
522 if (u64DiffMax < u64Diff)
523 u64DiffMax = u64Diff;
524 }
525 if (u64DiffMin > u64DiffMax)
526 u64DiffMin = u64DiffMax;
527 }
528 /* Don't depend on 64-bit arithmetics in the printk code. We assume that the difference between both
529 * cores is smaller than 2^32. */
530 printk(KERN_DEBUG DEVICE_NAME ": fBackwards=%d u64DiffMin=%u.\n", fBackwards, (uint32_t)u64DiffMin);
531 if (fBackwards)
532 force_async_tsc = 1;
533}
534
535/**
536 * Initialize module.
537 *
538 * @returns appropriate status code.
539 */
540static int __init VBoxDrvLinuxInit(void)
541{
542 int rc;
543
544 dprintf(("VBoxDrv::ModuleInit\n"));
545
546#ifdef CONFIG_X86_LOCAL_APIC
547 /*
548 * If an NMI occurs while we are inside the world switcher the macine will crash.
549 * The Linux NMI watchdog generates periodic NMIs increasing a counter which is
550 * compared with another counter increased in the timer interrupt handler. Therefore
551 * we don't allow to setup an NMI watchdog.
552 */
553# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && !defined(VBOX_REDHAT_KABI)
554 /*
555 * First test: NMI actiated? Works only works with Linux 2.6 -- 2.4 does not export
556 * the nmi_watchdog variable.
557 */
558# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
559 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
560# ifdef DO_DISABLE_NMI
561 if (nmi_atomic_read(&nmi_active) > 0)
562 {
563 printk(KERN_DEBUG DEVICE_NAME ": Trying to deactivate the NMI watchdog...\n");
564
565 switch (nmi_watchdog)
566 {
567 case NMI_LOCAL_APIC:
568 disable_lapic_nmi_watchdog();
569 break;
570 case NMI_NONE:
571 nmi_atomic_dec(&nmi_active);
572 break;
573 }
574
575 if (nmi_atomic_read(&nmi_active) == 0)
576 {
577 nmi_shutdown();
578 printk(KERN_DEBUG DEVICE_NAME ": Successfully done.\n");
579 }
580 else
581 printk(KERN_DEBUG DEVICE_NAME ": Failed!\n");
582 }
583# endif /* DO_DISABLE_NMI */
584
585 /*
586 * Permanent IO_APIC mode active? No way to handle this!
587 */
588 if (nmi_watchdog == NMI_IO_APIC)
589 {
590 printk(KERN_ERR DEVICE_NAME
591 ": NMI watchdog in IO_APIC mode active -- refused to load the kernel module!\n"
592 DEVICE_NAME
593 ": Please disable the NMI watchdog by specifying 'nmi_watchdog=0' at kernel\n"
594 DEVICE_NAME
595 ": command line.\n");
596 return -EINVAL;
597 }
598
599 /*
600 * See arch/i386/kernel/nmi.c on >= 2.6.19: -1 means it can never enabled again
601 */
602 nmi_atomic_set(&nmi_active, -1);
603 printk(KERN_DEBUG DEVICE_NAME ": Trying to deactivate the NMI watchdog permanently...\n");
604
605 /*
606 * Now fall through and see if it actually was enabled before. If so, fail
607 * as we cannot deactivate it cleanly from here.
608 */
609# else /* < 2.6.19 */
610 /*
611 * Older 2.6 kernels: nmi_watchdog is not initalized by default
612 */
613 if (nmi_watchdog != NMI_NONE)
614 goto nmi_activated;
615# endif
616# endif /* >= 2.6.0 && !defined(VBOX_REDHAT_KABI) */
617
618 /*
619 * Second test: Interrupt generated by performance counter not masked and can
620 * generate an NMI. Works also with Linux 2.4.
621 */
622 {
623 unsigned int v, ver, maxlvt;
624
625 v = apic_read(APIC_LVR);
626 ver = GET_APIC_VERSION(v);
627 /* 82489DXs do not report # of LVT entries. */
628 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
629 if (maxlvt >= 4)
630 {
631 /* Read status of performance counter IRQ vector */
632 v = apic_read(APIC_LVTPC);
633
634 /* performance counter generates NMI and is not masked? */
635 if ((GET_APIC_DELIVERY_MODE(v) == APIC_MODE_NMI) && !(v & APIC_LVT_MASKED))
636 {
637# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) || \
638 (defined CONFIG_X86_64 && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0))
639 printk(KERN_ERR DEVICE_NAME
640 ": NMI watchdog either active or at least initialized. Please disable the NMI\n"
641 DEVICE_NAME
642 ": watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
643 return -EINVAL;
644# else /* < 2.6.19 */
645# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0) && !defined(VBOX_REDHAT_KABI)
646nmi_activated:
647# endif
648 printk(KERN_ERR DEVICE_NAME
649 ": NMI watchdog active -- refused to load the kernel module! Please disable\n"
650 DEVICE_NAME
651 ": the NMI watchdog by specifying 'nmi_watchdog=0' at kernel command line.\n");
652 return -EINVAL;
653# endif /* >= 2.6.19 */
654 }
655 }
656 }
657# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19)
658 printk(KERN_DEBUG DEVICE_NAME ": Successfully done.\n");
659# endif /* >= 2.6.19 */
660#endif /* CONFIG_X86_LOCAL_APIC */
661
662 /*
663 * Check for synchronous/asynchronous TSC mode.
664 */
665 VBoxDetermineAsyncTsc();
666
667#ifdef CONFIG_VBOXDRV_AS_MISC
668 rc = misc_register(&gMiscDevice);
669 if (rc)
670 {
671 printk(KERN_ERR DEVICE_NAME ": Can't register misc device! rc=%d\n", rc);
672 return rc;
673 }
674#else /* !CONFIG_VBOXDRV_AS_MISC */
675 /*
676 * Register character device.
677 */
678 g_iModuleMajor = DEVICE_MAJOR;
679 rc = VBOX_REGISTER_DEVICE((dev_t)g_iModuleMajor, DEVICE_NAME, &gFileOpsVBoxDrv);
680 if (rc < 0)
681 {
682 dprintf(("VBOX_REGISTER_DEVICE failed with rc=%#x!\n", rc));
683 return rc;
684 }
685
686 /*
687 * Save returned module major number
688 */
689 if (DEVICE_MAJOR != 0)
690 g_iModuleMajor = DEVICE_MAJOR;
691 else
692 g_iModuleMajor = rc;
693 rc = 0;
694
695#ifdef CONFIG_DEVFS_FS
696 /*
697 * Register a device entry
698 */
699 g_hDevFsVBoxDrv = VBOX_REGISTER_DEVFS();
700 if (g_hDevFsVBoxDrv == NULL)
701 {
702 dprintf(("devfs_register failed!\n"));
703 rc = -EINVAL;
704 }
705#endif
706#endif /* !CONFIG_VBOXDRV_AS_MISC */
707 if (!rc)
708 {
709 /*
710 * Initialize the runtime.
711 * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
712 */
713 rc = RTR0Init(0);
714 if (RT_SUCCESS(rc))
715 {
716#ifdef RT_ARCH_AMD64
717 rc = RTR0MemExecDonate(&g_abExecMemory[0], sizeof(g_abExecMemory));
718#endif
719 /*
720 * Initialize the device extension.
721 */
722 if (RT_SUCCESS(rc))
723 rc = supdrvInitDevExt(&g_DevExt);
724 if (!rc)
725 {
726 /*
727 * Create the GIP page.
728 */
729 rc = VBoxDrvLinuxInitGip(&g_DevExt);
730 if (!rc)
731 {
732 printk(KERN_INFO DEVICE_NAME ": TSC mode is %s, kernel timer mode is "
733#ifdef VBOX_HRTIMER
734 "'high-res'"
735#else
736 "'normal'"
737#endif
738 ".\n",
739 g_DevExt.pGip->u32Mode == SUPGIPMODE_SYNC_TSC ? "'synchronous'" : "'asynchronous'");
740 LogFlow(("VBoxDrv::ModuleInit returning %#x\n", rc));
741 printk(KERN_DEBUG DEVICE_NAME ": Successfully loaded version "
742 VBOX_VERSION_STRING " (interface " xstr(SUPDRVIOC_VERSION) ").\n");
743 return rc;
744 }
745
746 supdrvDeleteDevExt(&g_DevExt);
747 }
748 else
749 rc = -EINVAL;
750 RTR0Term();
751 }
752 else
753 rc = -EINVAL;
754
755 /*
756 * Failed, cleanup and return the error code.
757 */
758#if defined(CONFIG_DEVFS_FS) && !defined(CONFIG_VBOXDRV_AS_MISC)
759 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
760#endif
761 }
762#ifdef CONFIG_VBOXDRV_AS_MISC
763 misc_deregister(&gMiscDevice);
764 dprintf(("VBoxDrv::ModuleInit returning %#x (minor:%d)\n", rc, gMiscDevice.minor));
765#else
766 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
767 dprintf(("VBoxDrv::ModuleInit returning %#x (major:%d)\n", rc, g_iModuleMajor));
768#endif
769 return rc;
770}
771
772
773/**
774 * Unload the module.
775 */
776static void __exit VBoxDrvLinuxUnload(void)
777{
778 int rc;
779 dprintf(("VBoxDrvLinuxUnload\n"));
780 NOREF(rc);
781
782 /*
783 * I Don't think it's possible to unload a driver which processes have
784 * opened, at least we'll blindly assume that here.
785 */
786#ifdef CONFIG_VBOXDRV_AS_MISC
787 rc = misc_deregister(&gMiscDevice);
788 if (rc < 0)
789 {
790 dprintf(("misc_deregister failed with rc=%#x\n", rc));
791 }
792#else /* !CONFIG_VBOXDRV_AS_MISC */
793# ifdef CONFIG_DEVFS_FS
794 /*
795 * Unregister a device entry
796 */
797 VBOX_UNREGISTER_DEVFS(g_hDevFsVBoxDrv);
798# endif /* devfs */
799 VBOX_UNREGISTER_DEVICE(g_iModuleMajor, DEVICE_NAME);
800#endif /* !CONFIG_VBOXDRV_AS_MISC */
801
802 /*
803 * Destroy GIP, delete the device extension and terminate IPRT.
804 */
805 VBoxDrvLinuxTermGip(&g_DevExt);
806 supdrvDeleteDevExt(&g_DevExt);
807 RTR0Term();
808}
809
810
811/**
812 * Device open. Called on open /dev/vboxdrv
813 *
814 * @param pInode Pointer to inode info structure.
815 * @param pFilp Associated file pointer.
816 */
817static int VBoxDrvLinuxCreate(struct inode *pInode, struct file *pFilp)
818{
819 int rc;
820 PSUPDRVSESSION pSession;
821 Log(("VBoxDrvLinuxCreate: pFilp=%p pid=%d/%d %s\n", pFilp, RTProcSelf(), current->pid, current->comm));
822
823 /*
824 * Call common code for the rest.
825 */
826 rc = supdrvCreateSession(&g_DevExt, (PSUPDRVSESSION *)&pSession);
827 if (!rc)
828 {
829 pSession->Uid = current->euid;
830 pSession->Gid = current->egid;
831 pSession->Process = RTProcSelf();
832 pSession->R0Process = RTR0ProcHandleSelf();
833 }
834
835 pFilp->private_data = pSession;
836
837 Log(("VBoxDrvLinuxCreate: g_DevExt=%p pSession=%p rc=%d/%d (pid=%d/%d %s)\n",
838 &g_DevExt, pSession, rc, VBoxDrvLinuxErr2LinuxErr(rc),
839 RTProcSelf(), current->pid, current->comm));
840 return VBoxDrvLinuxErr2LinuxErr(rc);
841}
842
843
844/**
845 * Close device.
846 *
847 * @param pInode Pointer to inode info structure.
848 * @param pFilp Associated file pointer.
849 */
850static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp)
851{
852 Log(("VBoxDrvLinuxClose: pFilp=%p pSession=%p pid=%d/%d %s\n",
853 pFilp, pFilp->private_data, RTProcSelf(), current->pid, current->comm));
854 supdrvCloseSession(&g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
855 pFilp->private_data = NULL;
856 return 0;
857}
858
859
860/**
861 * Device I/O Control entry point.
862 *
863 * @param pFilp Associated file pointer.
864 * @param uCmd The function specified to ioctl().
865 * @param ulArg The argument specified to ioctl().
866 */
867#ifdef HAVE_UNLOCKED_IOCTL
868static long VBoxDrvLinuxIOCtl(struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
869#else
870static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
871#endif
872{
873 /*
874 * Deal with the two high-speed IOCtl that takes it's arguments from
875 * the session and iCmd, and only returns a VBox status code.
876 */
877#ifdef HAVE_UNLOCKED_IOCTL
878 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
879 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
880 || uCmd == SUP_IOCTL_FAST_DO_NOP))
881 return supdrvIOCtlFast(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
882 return VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg);
883
884#else /* !HAVE_UNLOCKED_IOCTL */
885
886 int rc;
887 unlock_kernel();
888 if (RT_LIKELY( uCmd == SUP_IOCTL_FAST_DO_RAW_RUN
889 || uCmd == SUP_IOCTL_FAST_DO_HWACC_RUN
890 || uCmd == SUP_IOCTL_FAST_DO_NOP))
891 rc = supdrvIOCtlFast(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data);
892 else
893 rc = VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg);
894 lock_kernel();
895 return rc;
896#endif /* !HAVE_UNLOCKED_IOCTL */
897}
898
899
900/**
901 * Device I/O Control entry point.
902 *
903 * @param pFilp Associated file pointer.
904 * @param uCmd The function specified to ioctl().
905 * @param ulArg The argument specified to ioctl().
906 */
907static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
908{
909 int rc;
910 SUPREQHDR Hdr;
911 PSUPREQHDR pHdr;
912 uint32_t cbBuf;
913
914 Log6(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p pid=%d/%d\n", pFilp, uCmd, (void *)ulArg, RTProcSelf(), current->pid));
915
916 /*
917 * Read the header.
918 */
919 if (RT_UNLIKELY(copy_from_user(&Hdr, (void *)ulArg, sizeof(Hdr))))
920 {
921 Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx,) failed; uCmd=%#x.\n", ulArg, uCmd));
922 return -EFAULT;
923 }
924 if (RT_UNLIKELY((Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC))
925 {
926 Log(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd));
927 return -EINVAL;
928 }
929
930 /*
931 * Buffer the request.
932 */
933 cbBuf = RT_MAX(Hdr.cbIn, Hdr.cbOut);
934 if (RT_UNLIKELY(cbBuf > _1M*16))
935 {
936 Log(("VBoxDrvLinuxIOCtl: too big cbBuf=%#x; uCmd=%#x\n", cbBuf, uCmd));
937 return -E2BIG;
938 }
939 if (RT_UNLIKELY(cbBuf != _IOC_SIZE(uCmd) && _IOC_SIZE(uCmd)))
940 {
941 Log(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x.\n", cbBuf, _IOC_SIZE(uCmd), uCmd));
942 return -EINVAL;
943 }
944 pHdr = RTMemAlloc(cbBuf);
945 if (RT_UNLIKELY(!pHdr))
946 {
947 OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x.\n", cbBuf, uCmd));
948 return -ENOMEM;
949 }
950 if (RT_UNLIKELY(copy_from_user(pHdr, (void *)ulArg, Hdr.cbIn)))
951 {
952 Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx, %#x) failed; uCmd=%#x.\n", ulArg, Hdr.cbIn, uCmd));
953 RTMemFree(pHdr);
954 return -EFAULT;
955 }
956
957 /*
958 * Process the IOCtl.
959 */
960 rc = supdrvIOCtl(uCmd, &g_DevExt, (PSUPDRVSESSION)pFilp->private_data, pHdr);
961
962 /*
963 * Copy ioctl data and output buffer back to user space.
964 */
965 if (RT_LIKELY(!rc))
966 {
967 uint32_t cbOut = pHdr->cbOut;
968 if (RT_UNLIKELY(cbOut > cbBuf))
969 {
970 OSDBGPRINT(("VBoxDrvLinuxIOCtl: too much output! %#x > %#x; uCmd=%#x!\n", cbOut, cbBuf, uCmd));
971 cbOut = cbBuf;
972 }
973 if (RT_UNLIKELY(copy_to_user((void *)ulArg, pHdr, cbOut)))
974 {
975 /* this is really bad! */
976 OSDBGPRINT(("VBoxDrvLinuxIOCtl: copy_to_user(%#lx,,%#x); uCmd=%#x!\n", ulArg, cbOut, uCmd));
977 rc = -EFAULT;
978 }
979 }
980 else
981 {
982 Log(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc));
983 rc = -EINVAL;
984 }
985 RTMemFree(pHdr);
986
987 Log6(("VBoxDrvLinuxIOCtl: returns %d (pid=%d/%d)\n", rc, RTProcSelf(), current->pid));
988 return rc;
989}
990
991
992/**
993 * Initializes any OS specific object creator fields.
994 */
995void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
996{
997 NOREF(pObj);
998 NOREF(pSession);
999}
1000
1001
1002/**
1003 * Checks if the session can access the object.
1004 *
1005 * @returns true if a decision has been made.
1006 * @returns false if the default access policy should be applied.
1007 *
1008 * @param pObj The object in question.
1009 * @param pSession The session wanting to access the object.
1010 * @param pszObjName The object name, can be NULL.
1011 * @param prc Where to store the result when returning true.
1012 */
1013bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
1014{
1015 NOREF(pObj);
1016 NOREF(pSession);
1017 NOREF(pszObjName);
1018 NOREF(prc);
1019 return false;
1020}
1021
1022
1023/**
1024 * Initializes the GIP.
1025 *
1026 * @returns negative errno.
1027 * @param pDevExt Instance data. GIP stuff may be updated.
1028 */
1029static int VBoxDrvLinuxInitGip(PSUPDRVDEVEXT pDevExt)
1030{
1031 struct page *pPage;
1032 dma_addr_t HCPhys;
1033 PSUPGLOBALINFOPAGE pGip;
1034#ifdef CONFIG_SMP
1035 unsigned i;
1036#endif
1037 LogFlow(("VBoxDrvLinuxInitGip:\n"));
1038
1039 /*
1040 * Allocate the page.
1041 */
1042 pPage = alloc_pages(GFP_USER, 0);
1043 if (!pPage)
1044 {
1045 Log(("VBoxDrvLinuxInitGip: failed to allocate the GIP page\n"));
1046 return -ENOMEM;
1047 }
1048
1049 /*
1050 * Lock the page.
1051 */
1052 SetPageReserved(pPage);
1053 g_pGipPage = pPage;
1054
1055 /*
1056 * Call common initialization routine.
1057 */
1058 HCPhys = page_to_phys(pPage);
1059 pGip = (PSUPGLOBALINFOPAGE)page_address(pPage);
1060 pDevExt->ulLastJiffies = jiffies;
1061 pDevExt->u64LastMonotime = (uint64_t)pDevExt->ulLastJiffies * TICK_NSEC;
1062 Log(("VBoxDrvInitGIP: TICK_NSEC=%ld HZ=%d jiffies=%ld now=%lld\n",
1063 TICK_NSEC, HZ, pDevExt->ulLastJiffies, pDevExt->u64LastMonotime));
1064 supdrvGipInit(pDevExt, pGip, HCPhys, pDevExt->u64LastMonotime,
1065 HZ <= 1000 ? HZ : 1000);
1066
1067 /*
1068 * Initialize the timer.
1069 */
1070 vbox_ktimer_init(&g_GipTimer, VBoxDrvLinuxGipTimer, (unsigned long)pDevExt);
1071#ifdef CONFIG_SMP
1072 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1073 {
1074 pDevExt->aCPUs[i].u64LastMonotime = pDevExt->u64LastMonotime;
1075 pDevExt->aCPUs[i].ulLastJiffies = pDevExt->ulLastJiffies;
1076 pDevExt->aCPUs[i].iSmpProcessorId = -512;
1077 vbox_ktimer_init(&pDevExt->aCPUs[i].Timer, VBoxDrvLinuxGipTimerPerCpu, i);
1078 }
1079#endif
1080
1081 return 0;
1082}
1083
1084
1085/**
1086 * Terminates the GIP.
1087 *
1088 * @returns negative errno.
1089 * @param pDevExt Instance data. GIP stuff may be updated.
1090 */
1091static int VBoxDrvLinuxTermGip(PSUPDRVDEVEXT pDevExt)
1092{
1093 struct page *pPage;
1094 PSUPGLOBALINFOPAGE pGip;
1095#ifdef CONFIG_SMP
1096 unsigned i;
1097#endif
1098 LogFlow(("VBoxDrvLinuxTermGip:\n"));
1099
1100 /*
1101 * Delete the timer if it's pending.
1102 */
1103 vbox_ktimer_stop(&g_GipTimer);
1104#ifdef CONFIG_SMP
1105 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1106 vbox_ktimer_stop(&pDevExt->aCPUs[i].Timer);
1107#endif
1108
1109 /*
1110 * Uninitialize the content.
1111 */
1112 pGip = pDevExt->pGip;
1113 pDevExt->pGip = NULL;
1114 if (pGip)
1115 supdrvGipTerm(pGip);
1116
1117 /*
1118 * Free the page.
1119 */
1120 pPage = g_pGipPage;
1121 g_pGipPage = NULL;
1122 if (pPage)
1123 {
1124 ClearPageReserved(pPage);
1125 __free_pages(pPage, 0);
1126 }
1127
1128 return 0;
1129}
1130
1131/**
1132 * Timer callback function.
1133 *
1134 * In ASYNC TSC mode this is called on the primary CPU, and we're
1135 * assuming that the CPU remains online.
1136 *
1137 * @param ulUser The device extension pointer.
1138 */
1139#ifdef VBOX_HRTIMER
1140static enum hrtimer_restart VBoxDrvLinuxGipTimer(struct hrtimer *pTimer)
1141#else
1142static void VBoxDrvLinuxGipTimer(unsigned long ulUser)
1143#endif
1144{
1145 PSUPDRVDEVEXT pDevExt;
1146 PSUPGLOBALINFOPAGE pGip;
1147 unsigned long ulNow;
1148 unsigned long ulDiff;
1149 uint64_t u64Monotime;
1150 unsigned long SavedFlags;
1151#ifdef VBOX_HRTIMER
1152 ktime_t KtNow;
1153#endif
1154
1155 local_irq_save(SavedFlags);
1156
1157 ulNow = jiffies;
1158#ifdef VBOX_HRTIMER
1159 KtNow = ktime_get();
1160 pDevExt = &g_DevExt;
1161#else
1162 pDevExt = (PSUPDRVDEVEXT)ulUser;
1163#endif
1164 pGip = pDevExt->pGip;
1165
1166#ifdef CONFIG_SMP
1167 if (pGip && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1168 {
1169 uint8_t iCPU = ASMGetApicId();
1170 ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1171 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1172 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1173 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1174 }
1175 else
1176#endif /* CONFIG_SMP */
1177 {
1178 ulDiff = ulNow - pDevExt->ulLastJiffies;
1179 pDevExt->ulLastJiffies = ulNow;
1180 u64Monotime = pDevExt->u64LastMonotime + ulDiff * TICK_NSEC;
1181 pDevExt->u64LastMonotime = u64Monotime;
1182 }
1183 if (RT_LIKELY(pGip))
1184 supdrvGipUpdate(pDevExt->pGip, u64Monotime);
1185 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1186 {
1187#ifdef VBOX_HRTIMER
1188 hrtimer_forward(&g_GipTimer, KtNow, ktime_set(0, 1000000));
1189#else
1190 mod_timer(&g_GipTimer, ulNow + ONE_MSEC_IN_JIFFIES);
1191#endif
1192 }
1193
1194 local_irq_restore(SavedFlags);
1195
1196#ifdef VBOX_HRTIMER
1197 return pDevExt->fGIPSuspended ? HRTIMER_NORESTART : HRTIMER_RESTART;
1198#endif
1199}
1200
1201
1202#ifdef CONFIG_SMP
1203/**
1204 * Timer callback function for the other CPUs.
1205 *
1206 * @param iTimerCPU The APIC ID of this timer.
1207 */
1208#ifdef VBOX_HRTIMER
1209static enum hrtimer_restart VBoxDrvLinuxGipTimerPerCpu(struct hrtimer *pTimer)
1210#else
1211static void VBoxDrvLinuxGipTimerPerCpu(unsigned long iTimerCPU)
1212#endif
1213{
1214 PSUPDRVDEVEXT pDevExt;
1215 PSUPGLOBALINFOPAGE pGip;
1216 uint8_t iCPU;
1217 uint64_t u64Monotime;
1218 unsigned long SavedFlags;
1219 unsigned long ulNow;
1220# ifdef VBOX_HRTIMER
1221 unsigned long iTimerCPU;
1222 ktime_t KtNow;
1223# endif
1224
1225 local_irq_save(SavedFlags);
1226
1227 ulNow = jiffies;
1228 pDevExt = &g_DevExt;
1229 pGip = pDevExt->pGip;
1230 iCPU = ASMGetApicId();
1231# ifdef VBOX_HRTIMER
1232 iTimerCPU = iCPU; /* XXX hrtimer does not support a 'data' field */
1233 KtNow = ktime_get();
1234# endif
1235
1236 if (RT_LIKELY(iCPU < RT_ELEMENTS(pGip->aCPUs)))
1237 {
1238 if (RT_LIKELY(iTimerCPU == iCPU))
1239 {
1240 unsigned long ulDiff = ulNow - pDevExt->aCPUs[iCPU].ulLastJiffies;
1241 pDevExt->aCPUs[iCPU].ulLastJiffies = ulNow;
1242 u64Monotime = pDevExt->aCPUs[iCPU].u64LastMonotime + ulDiff * TICK_NSEC;
1243 pDevExt->aCPUs[iCPU].u64LastMonotime = u64Monotime;
1244 if (RT_LIKELY(pGip))
1245 supdrvGipUpdatePerCpu(pGip, u64Monotime, iCPU);
1246 if (RT_LIKELY(!pDevExt->fGIPSuspended))
1247 {
1248# ifdef VBOX_HRTIMER
1249 hrtimer_forward(&pDevExt->aCPUs[iCPU].Timer, KtNow, ktime_set(0, 1000000));
1250# else
1251 mod_timer(&pDevExt->aCPUs[iCPU].Timer, ulNow + ONE_MSEC_IN_JIFFIES);
1252# endif
1253 }
1254 }
1255 else
1256 printk("vboxdrv: error: GIP CPU update timer executing on the wrong CPU: apicid=%d != timer-apicid=%ld (cpuid=%d !=? timer-cpuid=%d)\n",
1257 iCPU, iTimerCPU, smp_processor_id(), pDevExt->aCPUs[iTimerCPU].iSmpProcessorId);
1258 }
1259 else
1260 printk("vboxdrv: error: APIC ID is bogus (GIP CPU update): apicid=%d max=%lu cpuid=%d\n",
1261 iCPU, (unsigned long)RT_ELEMENTS(pGip->aCPUs), smp_processor_id());
1262
1263 local_irq_restore(SavedFlags);
1264
1265# ifdef VBOX_HRTIMER
1266 return pDevExt->fGIPSuspended ? HRTIMER_NORESTART : HRTIMER_RESTART;
1267# endif
1268}
1269#endif /* CONFIG_SMP */
1270
1271
1272/**
1273 * Maps the GIP into user space.
1274 *
1275 * @returns negative errno.
1276 * @param pDevExt Instance data.
1277 */
1278int VBOXCALL supdrvOSGipMap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE *ppGip)
1279{
1280 int rc = 0;
1281 unsigned long ulAddr;
1282 unsigned long HCPhys = pDevExt->HCPhysGip;
1283 pgprot_t pgFlags;
1284 pgprot_val(pgFlags) = _PAGE_PRESENT | _PAGE_USER;
1285 LogFlow(("supdrvOSGipMap: ppGip=%p\n", ppGip));
1286
1287 /*
1288 * Allocate user space mapping and put the physical pages into it.
1289 */
1290 down_write(&current->mm->mmap_sem);
1291 ulAddr = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, 0);
1292 if (!(ulAddr & ~PAGE_MASK))
1293 {
1294#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
1295 int rc2 = remap_page_range(ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1296#else
1297 int rc2 = 0;
1298 struct vm_area_struct *vma = find_vma(current->mm, ulAddr);
1299 if (vma)
1300#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 11)
1301 rc2 = remap_page_range(vma, ulAddr, HCPhys, PAGE_SIZE, pgFlags);
1302#else
1303 rc2 = remap_pfn_range(vma, ulAddr, HCPhys >> PAGE_SHIFT, PAGE_SIZE, pgFlags);
1304#endif
1305 else
1306 {
1307 rc = SUPDRV_ERR_NO_MEMORY;
1308 Log(("supdrvOSGipMap: no vma found for ulAddr=%#lx!\n", ulAddr));
1309 }
1310#endif
1311 if (rc2)
1312 {
1313 rc = SUPDRV_ERR_NO_MEMORY;
1314 Log(("supdrvOSGipMap: remap_page_range failed rc2=%d\n", rc2));
1315 }
1316 }
1317 else
1318 {
1319 Log(("supdrvOSGipMap: do_mmap failed ulAddr=%#lx\n", ulAddr));
1320 rc = SUPDRV_ERR_NO_MEMORY;
1321 }
1322 up_write(&current->mm->mmap_sem); /* not quite sure when to give this up. */
1323
1324 /*
1325 * Success?
1326 */
1327 if (!rc)
1328 {
1329 *ppGip = (PSUPGLOBALINFOPAGE)ulAddr;
1330 LogFlow(("supdrvOSGipMap: ppGip=%p\n", *ppGip));
1331 return 0;
1332 }
1333
1334 /*
1335 * Failure, cleanup and be gone.
1336 */
1337 if (ulAddr & ~PAGE_MASK)
1338 {
1339 down_write(&current->mm->mmap_sem);
1340 MY_DO_MUNMAP(current->mm, ulAddr, PAGE_SIZE);
1341 up_write(&current->mm->mmap_sem);
1342 }
1343
1344 LogFlow(("supdrvOSGipMap: returns %d\n", rc));
1345 return rc;
1346}
1347
1348
1349/**
1350 * Maps the GIP into user space.
1351 *
1352 * @returns negative errno.
1353 * @param pDevExt Instance data.
1354 */
1355int VBOXCALL supdrvOSGipUnmap(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
1356{
1357 LogFlow(("supdrvOSGipUnmap: pGip=%p\n", pGip));
1358 if (current->mm)
1359 {
1360 down_write(&current->mm->mmap_sem);
1361 MY_DO_MUNMAP(current->mm, (unsigned long)pGip, PAGE_SIZE);
1362 up_write(&current->mm->mmap_sem);
1363 }
1364 LogFlow(("supdrvOSGipUnmap: returns 0\n"));
1365 return 0;
1366}
1367
1368
1369/**
1370 * Resumes the GIP updating.
1371 *
1372 * @param pDevExt Instance data.
1373 */
1374void VBOXCALL supdrvOSGipResume(PSUPDRVDEVEXT pDevExt)
1375{
1376 LogFlow(("supdrvOSGipResume:\n"));
1377 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, false);
1378#ifdef CONFIG_SMP
1379 if (pDevExt->pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1380 {
1381#endif
1382 vbox_ktimer_start(&g_GipTimer);
1383#ifdef CONFIG_SMP
1384 }
1385 else
1386 {
1387 vbox_ktimer_start(&g_GipTimer);
1388 smp_call_function(VBoxDrvLinuxGipResumePerCpu, pDevExt, 0 /* retry */, 1 /* wait */);
1389 }
1390#endif
1391}
1392
1393
1394#ifdef CONFIG_SMP
1395/**
1396 * Callback for resuming GIP updating on the other CPUs.
1397 *
1398 * This is only used when the GIP is in async tsc mode.
1399 *
1400 * @param pvUser Pointer to the device instance.
1401 */
1402static void VBoxDrvLinuxGipResumePerCpu(void *pvUser)
1403{
1404 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1405 uint8_t iCPU = ASMGetApicId();
1406
1407 if (RT_UNLIKELY(iCPU >= RT_ELEMENTS(pDevExt->pGip->aCPUs)))
1408 {
1409 printk("vboxdrv: error: apicid=%d max=%lu cpuid=%d\n",
1410 iCPU, (unsigned long)RT_ELEMENTS(pDevExt->pGip->aCPUs), smp_processor_id());
1411 return;
1412 }
1413
1414 pDevExt->aCPUs[iCPU].iSmpProcessorId = smp_processor_id();
1415 vbox_ktimer_start(&pDevExt->aCPUs[iCPU].Timer);
1416}
1417#endif /* CONFIG_SMP */
1418
1419
1420/**
1421 * Suspends the GIP updating.
1422 *
1423 * @param pDevExt Instance data.
1424 */
1425void VBOXCALL supdrvOSGipSuspend(PSUPDRVDEVEXT pDevExt)
1426{
1427#ifdef CONFIG_SMP
1428 unsigned i;
1429#endif
1430 LogFlow(("supdrvOSGipSuspend:\n"));
1431 ASMAtomicXchgU8(&pDevExt->fGIPSuspended, true);
1432
1433 vbox_ktimer_stop(&g_GipTimer);
1434#ifdef CONFIG_SMP
1435 for (i = 0; i < RT_ELEMENTS(pDevExt->aCPUs); i++)
1436 vbox_ktimer_stop(&pDevExt->aCPUs[i].Timer);
1437#endif
1438}
1439
1440
1441/**
1442 * Get the current CPU count.
1443 * @returns Number of cpus.
1444 */
1445unsigned VBOXCALL supdrvOSGetCPUCount(void)
1446{
1447#ifdef CONFIG_SMP
1448# if defined(num_present_cpus) && !defined(VBOX_REDHAT_KABI)
1449 return num_present_cpus();
1450# elif defined(num_possible_cpus)
1451 return num_possible_cpus();
1452# else
1453 return smp_num_cpus;
1454# endif
1455#else
1456 return 1;
1457#endif
1458}
1459
1460/**
1461 * Force async tsc mode.
1462 * @todo add a module argument for this.
1463 */
1464bool VBOXCALL supdrvOSGetForcedAsyncTscMode(void)
1465{
1466 return force_async_tsc != 0;
1467}
1468
1469
1470/**
1471 * Converts a supdrv error code to an linux error code.
1472 *
1473 * @returns corresponding linux error code.
1474 * @param rc supdrv error code (SUPDRV_ERR_* defines).
1475 */
1476static int VBoxDrvLinuxErr2LinuxErr(int rc)
1477{
1478 switch (rc)
1479 {
1480 case 0: return 0;
1481 case SUPDRV_ERR_GENERAL_FAILURE: return -EACCES;
1482 case SUPDRV_ERR_INVALID_PARAM: return -EINVAL;
1483 case SUPDRV_ERR_INVALID_MAGIC: return -EILSEQ;
1484 case SUPDRV_ERR_INVALID_HANDLE: return -ENXIO;
1485 case SUPDRV_ERR_INVALID_POINTER: return -EFAULT;
1486 case SUPDRV_ERR_LOCK_FAILED: return -ENOLCK;
1487 case SUPDRV_ERR_ALREADY_LOADED: return -EEXIST;
1488 case SUPDRV_ERR_PERMISSION_DENIED: return -EPERM;
1489 case SUPDRV_ERR_VERSION_MISMATCH: return -ENOSYS;
1490 case SUPDRV_ERR_IDT_FAILED: return -1000;
1491 }
1492
1493 return -EPERM;
1494}
1495
1496
1497RTDECL(int) SUPR0Printf(const char *pszFormat, ...)
1498{
1499#if 1
1500 va_list args;
1501 char szMsg[512];
1502
1503 va_start(args, pszFormat);
1504 vsnprintf(szMsg, sizeof(szMsg) - 1, pszFormat, args);
1505 szMsg[sizeof(szMsg) - 1] = '\0';
1506 printk("%s", szMsg);
1507 va_end(args);
1508#else
1509 /* forward to printf - needs some more GCC hacking to fix ebp... */
1510 __asm__ __volatile__ ("mov %0, %esp\n\t"
1511 "jmp %1\n\t",
1512 :: "r" ((uintptr_t)&pszFormat - 4),
1513 "m" (printk));
1514#endif
1515 return 0;
1516}
1517
1518
1519/** Runtime assert implementation for Linux Ring-0. */
1520RTDECL(bool) RTAssertDoBreakpoint(void)
1521{
1522 return true;
1523}
1524
1525
1526/** Runtime assert implementation for Linux Ring-0. */
1527RTDECL(void) AssertMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
1528{
1529 printk("!!Assertion Failed!!\n"
1530 "Expression: %s\n"
1531 "Location : %s(%d) %s\n",
1532 pszExpr, pszFile, uLine, pszFunction);
1533}
1534
1535
1536/** Runtime assert implementation for Linux Ring-0. */
1537RTDECL(void) AssertMsg2(const char *pszFormat, ...)
1538{ /* forwarder. */
1539 va_list ap;
1540 char msg[256];
1541
1542 va_start(ap, pszFormat);
1543 vsnprintf(msg, sizeof(msg) - 1, pszFormat, ap);
1544 msg[sizeof(msg) - 1] = '\0';
1545 printk("%s", msg);
1546 va_end(ap);
1547}
1548
1549
1550/* GCC C++ hack. */
1551unsigned __gxx_personality_v0 = 0xcccccccc;
1552
1553
1554module_init(VBoxDrvLinuxInit);
1555module_exit(VBoxDrvLinuxUnload);
1556
1557MODULE_AUTHOR("innotek GmbH");
1558MODULE_DESCRIPTION("VirtualBox Support Driver");
1559MODULE_LICENSE("GPL");
1560#ifdef MODULE_VERSION
1561MODULE_VERSION(VBOX_VERSION_STRING " (" xstr(SUPDRVIOC_VERSION) ")");
1562#endif
1563
1564#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
1565module_param(force_async_tsc, int, 0444);
1566#else
1567MODULE_PARM(force_async_tsc, "i");
1568#endif
1569MODULE_PARM_DESC(force_async_tsc, "force the asynchronous TSC mode");
1570
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette