VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/solaris/vbi/i86pc/os/vbi.c@ 30378

Last change on this file since 30378 was 30378, checked in by vboxsync, 15 years ago

Solaris/vbi: spaces->tabs.

  • Property svn:eol-style set to native
File size: 29.2 KB
Line 
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Private interfaces for VirtualBox access to Solaris kernel internal
28 * facilities.
29 *
30 * See sys/vbi.h for what each function does.
31 */
32
33#include <sys/kmem.h>
34#include <sys/types.h>
35#include <sys/mman.h>
36#include <sys/thread.h>
37#include <sys/mutex.h>
38#include <sys/condvar.h>
39#include <sys/sdt.h>
40#include <sys/schedctl.h>
41#include <sys/time.h>
42#include <sys/sysmacros.h>
43#include <sys/cmn_err.h>
44#include <sys/vmsystm.h>
45#include <sys/cyclic.h>
46#include <sys/class.h>
47#include <sys/cpuvar.h>
48#include <sys/kobj.h>
49#include <sys/x_call.h>
50#include <sys/x86_archext.h>
51#include <vm/hat.h>
52#include <vm/seg_vn.h>
53#include <vm/seg_kmem.h>
54#include <sys/ddi.h>
55#include <sys/sunddi.h>
56#include <sys/modctl.h>
57#include <sys/machparam.h>
58#include <sys/utsname.h>
59
60#include "vbi.h"
61
62#define VBIPROC() ((proc_t *)vbi_proc())
63
64/*
65 * We have to use dl_lookup to find contig_free().
66 */
67extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int);
68extern void contig_free(void *, size_t);
69#pragma weak contig_free
70static void (*p_contig_free)(void *, size_t) = contig_free;
71
72/*
73 * Workarounds for running on old versions of solaris with different cross call
74 * interfaces. If we find xc_init_cpu() in the kenel, then just use the defined
75 * interfaces for xc_call() from the include file where the xc_call()
76 * interfaces just takes a pointer to a ulong_t array. The array must be long
77 * enough to hold "ncpus" bits at runtime.
78
79 * The reason for the hacks is that using the type "cpuset_t" is pretty much
80 * impossible from code built outside the Solaris source repository that wants
81 * to run on multiple releases of Solaris.
82 *
83 * For old style xc_call()s, 32 bit solaris and older 64 bit versions use
84 * "ulong_t" as cpuset_t.
85 *
86 * Later versions of 64 bit Solaris used: struct {ulong_t words[x];}
87 * where "x" depends on NCPU.
88 *
89 * We detect the difference in 64 bit support by checking the kernel value of
90 * max_cpuid, which always holds the compiled value of NCPU - 1.
91 *
92 * If Solaris increases NCPU to more than 256, this module will continue
93 * to work on all versions of Solaris as long as the number of installed
94 * CPUs in the machine is <= VBI_NCPU. If VBI_NCPU is increased, this code
95 * has to be re-written some to provide compatibility with older Solaris which
96 * expects cpuset_t to be based on NCPU==256 -- or we discontinue support
97 * of old Nevada/S10.
98 */
99static int use_old = 0;
100static int use_old_with_ulong = 0;
101static void (*p_xc_call)() = (void (*)())xc_call;
102
103#define VBI_NCPU 256
104#define VBI_SET_WORDS (VBI_NCPU / (sizeof (ulong_t) * 8))
105typedef struct vbi_cpuset {
106 ulong_t words[VBI_SET_WORDS];
107} vbi_cpuset_t;
108#define X_CALL_HIPRI (2) /* for old Solaris interface */
109
110/*
111 * module linkage stuff
112 */
113#if 0
114static struct modlmisc vbi_modlmisc = {
115 &mod_miscops, "VirtualBox Interfaces V8"
116};
117
118static struct modlinkage vbi_modlinkage = {
119 MODREV_1, { (void *)&vbi_modlmisc, NULL }
120};
121#endif
122
123extern uintptr_t kernelbase;
124#define IS_KERNEL(v) ((uintptr_t)(v) >= kernelbase)
125
126#if 0
127static int vbi_verbose = 0;
128
129#define VBI_VERBOSE(msg) {if (vbi_verbose) cmn_err(CE_WARN, msg);}
130#endif
131
132/* Introduced in v8 */
133static int vbi_is_initialized = 0;
134
135/* Introduced in v6 */
136static int vbi_is_nevada = 0;
137
138#ifdef _LP64
139/* 64-bit Solaris 10 offsets */
140/* CPU */
141static int off_s10_cpu_runrun = 232;
142static int off_s10_cpu_kprunrun = 233;
143/* kthread_t */
144static int off_s10_t_preempt = 42;
145
146/* 64-bit Solaris 11 (Nevada/OpenSolaris) offsets */
147/* CPU */
148static int off_s11_cpu_runrun = 216;
149static int off_s11_cpu_kprunrun = 217;
150/* kthread_t */
151static int off_s11_t_preempt = 42;
152#else
153/* 32-bit Solaris 10 offsets */
154/* CPU */
155static int off_s10_cpu_runrun = 124;
156static int off_s10_cpu_kprunrun = 125;
157/* kthread_t */
158static int off_s10_t_preempt = 26;
159
160/* 32-bit Solaris 11 (Nevada/OpenSolaris) offsets */
161/* CPU */
162static int off_s11_cpu_runrun = 112;
163static int off_s11_cpu_kprunrun = 113;
164/* kthread_t */
165static int off_s11_t_preempt = 26;
166#endif
167
168
169/* Which offsets will be used */
170static int off_cpu_runrun = -1;
171static int off_cpu_kprunrun = -1;
172static int off_t_preempt = -1;
173
174#define VBI_T_PREEMPT (*((char *)curthread + off_t_preempt))
175#define VBI_CPU_KPRUNRUN (*((char *)CPU + off_cpu_kprunrun))
176#define VBI_CPU_RUNRUN (*((char *)CPU + off_cpu_runrun))
177
178#undef kpreempt_disable
179#undef kpreempt_enable
180
181#define VBI_PREEMPT_DISABLE() \
182 { \
183 VBI_T_PREEMPT++; \
184 ASSERT(VBI_T_PREEMPT >= 1); \
185 }
186#define VBI_PREEMPT_ENABLE() \
187 { \
188 ASSERT(VBI_T_PREEMPT >= 1); \
189 if (--VBI_T_PREEMPT == 0 && \
190 VBI_CPU_RUNRUN) \
191 kpreempt(KPREEMPT_SYNC); \
192 }
193
194/* End of v6 intro */
195
196#if 0
197int
198_init(void)
199{
200 int err = vbi_init();
201 if (!err)
202 err = mod_install(&vbi_modlinkage);
203 return (err);
204}
205#endif
206
207int
208vbi_init(void)
209{
210 /*
211 * Check to see if this version of virtualbox interface module will work
212 * with the kernel.
213 */
214 if (kobj_getsymvalue("xc_init_cpu", 1) != NULL) {
215 /*
216 * Our bit vector storage needs to be large enough for the
217 * actual number of CPUs running in the sytem.
218 */
219 if (ncpus > VBI_NCPU) {
220 cmn_err(CE_NOTE, "cpu count mismatch.\n");
221 return (EINVAL);
222 }
223 } else {
224 use_old = 1;
225 if (max_cpuid + 1 == sizeof(ulong_t) * 8)
226 use_old_with_ulong = 1;
227 else if (max_cpuid + 1 != VBI_NCPU)
228 {
229 cmn_err(CE_NOTE, "cpuset_t size mismatch. probably too old a kernel.\n");
230 return (EINVAL); /* cpuset_t size mismatch */
231 }
232 }
233
234 /*
235 * In older versions of Solaris contig_free() is a static routine.
236 */
237 if (p_contig_free == NULL) {
238 p_contig_free = (void (*)(void *, size_t))
239 kobj_getsymvalue("contig_free", 1);
240 if (p_contig_free == NULL) {
241 cmn_err(CE_NOTE, " contig_free() not found in kernel\n");
242 return (EINVAL);
243 }
244 }
245
246 /*
247 * Check if this is S10 or Nevada
248 */
249 if (!strncmp(utsname.release, "5.11", sizeof("5.11") - 1)) {
250 /* Nevada detected... */
251 vbi_is_nevada = 1;
252
253 off_cpu_runrun = off_s11_cpu_runrun;
254 off_cpu_kprunrun = off_s11_cpu_kprunrun;
255 off_t_preempt = off_s11_t_preempt;
256 } else {
257 /* Solaris 10 detected... */
258 vbi_is_nevada = 0;
259
260 off_cpu_runrun = off_s10_cpu_runrun;
261 off_cpu_kprunrun = off_s10_cpu_kprunrun;
262 off_t_preempt = off_s10_t_preempt;
263 }
264
265 /*
266 * Sanity checking...
267 */
268 /* CPU */
269 char crr = VBI_CPU_RUNRUN;
270 char krr = VBI_CPU_KPRUNRUN;
271 if ( (crr < 0 || crr > 1)
272 || (krr < 0 || krr > 1)) {
273 cmn_err(CE_NOTE, ":CPU structure sanity check failed! OS version mismatch.\n");
274 return EINVAL;
275 }
276
277 /* Thread */
278 char t_preempt = VBI_T_PREEMPT;
279 if (t_preempt < 0 || t_preempt > 32) {
280 cmn_err(CE_NOTE, ":Thread structure sanity check failed! OS version mismatch.\n");
281 return EINVAL;
282 }
283
284 vbi_is_initialized = 1;
285
286 return (0);
287}
288
289#if 0
290int
291_fini(void)
292{
293 int err = mod_remove(&vbi_modlinkage);
294 if (err != 0)
295 return (err);
296
297 return (0);
298}
299
300int
301_info(struct modinfo *modinfop)
302{
303 return (mod_info(&vbi_modlinkage, modinfop));
304}
305#endif
306
307
308static ddi_dma_attr_t base_attr = {
309 DMA_ATTR_V0, /* Version Number */
310 (uint64_t)0, /* lower limit */
311 (uint64_t)0, /* high limit */
312 (uint64_t)0xffffffff, /* counter limit */
313 (uint64_t)PAGESIZE, /* pagesize alignment */
314 (uint64_t)PAGESIZE, /* pagesize burst size */
315 (uint64_t)PAGESIZE, /* pagesize effective DMA size */
316 (uint64_t)0xffffffff, /* max DMA xfer size */
317 (uint64_t)0xffffffff, /* segment boundary */
318 1, /* list length (1 for contiguous) */
319 1, /* device granularity */
320 0 /* bus-specific flags */
321};
322
323static void *
324vbi_internal_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
325{
326 ddi_dma_attr_t attr;
327 pfn_t pfn;
328 void *ptr;
329 uint_t npages;
330
331 if ((size & PAGEOFFSET) != 0)
332 return (NULL);
333 npages = (size + PAGESIZE - 1) >> PAGESHIFT;
334 if (npages == 0)
335 return (NULL);
336
337 attr = base_attr;
338 attr.dma_attr_addr_hi = *phys;
339 attr.dma_attr_align = alignment;
340 if (!contig)
341 attr.dma_attr_sgllen = npages;
342 ptr = contig_alloc(size, &attr, PAGESIZE, 1);
343
344 if (ptr == NULL) {
345 cmn_err(CE_NOTE, "vbi_internal_alloc() failure for %lu bytes", size);
346 return (NULL);
347 }
348
349 pfn = hat_getpfnum(kas.a_hat, (caddr_t)ptr);
350 if (pfn == PFN_INVALID)
351 panic("vbi_contig_alloc(): hat_getpfnum() failed\n");
352 *phys = (uint64_t)pfn << PAGESHIFT;
353 return (ptr);
354}
355
356void *
357vbi_contig_alloc(uint64_t *phys, size_t size)
358{
359 /* Obsolete */
360 return (vbi_internal_alloc(phys, size, PAGESIZE /* alignment */, 1 /* contiguous */));
361}
362
363void
364vbi_contig_free(void *va, size_t size)
365{
366 /* Obsolete */
367 p_contig_free(va, size);
368}
369
370void *
371vbi_kernel_map(uint64_t pa, size_t size, uint_t prot)
372{
373 caddr_t va;
374
375 if ((pa & PAGEOFFSET) || (size & PAGEOFFSET)) {
376 cmn_err(CE_NOTE, "vbi_kernel_map() bad pa (0x%lx) or size (%lu)", pa, size);
377 return (NULL);
378 }
379
380 va = vmem_alloc(heap_arena, size, VM_SLEEP);
381
382 hat_devload(kas.a_hat, va, size, (pfn_t)(pa >> PAGESHIFT),
383 prot, HAT_LOAD | HAT_LOAD_LOCK | HAT_UNORDERED_OK);
384
385 return (va);
386}
387
388void
389vbi_unmap(void *va, size_t size)
390{
391 if (IS_KERNEL(va)) {
392 hat_unload(kas.a_hat, va, size, HAT_UNLOAD | HAT_UNLOAD_UNLOCK);
393 vmem_free(heap_arena, va, size);
394 } else {
395 struct as *as = VBIPROC()->p_as;
396
397 as_rangelock(as);
398 (void) as_unmap(as, va, size);
399 as_rangeunlock(as);
400 }
401}
402
403void *
404vbi_curthread(void)
405{
406 return (curthread);
407}
408
409int
410vbi_yield(void)
411{
412 int rv = 0;
413
414 vbi_preempt_disable();
415
416 char tpr = VBI_T_PREEMPT;
417 char kpr = VBI_CPU_KPRUNRUN;
418 if (tpr == 1 && kpr)
419 rv = 1;
420
421 vbi_preempt_enable();
422 return (rv);
423}
424
425uint64_t
426vbi_timer_granularity(void)
427{
428 return (nsec_per_tick);
429}
430
431typedef struct vbi_timer {
432 cyc_handler_t vbi_handler;
433 cyclic_id_t vbi_cyclic;
434 uint64_t vbi_interval;
435 void (*vbi_func)();
436 void *vbi_arg1;
437 void *vbi_arg2;
438} vbi_timer_t;
439
440static void
441vbi_timer_callback(void *arg)
442{
443 vbi_timer_t *t = arg;
444
445 if (t->vbi_interval == 0)
446 vbi_timer_stop(arg);
447 t->vbi_func(t->vbi_arg1, t->vbi_arg2);
448}
449
450void *
451vbi_timer_create(void *callback, void *arg1, void *arg2, uint64_t interval)
452{
453 vbi_timer_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
454
455 t->vbi_func = (void (*)())callback;
456 t->vbi_arg1 = arg1;
457 t->vbi_arg2 = arg2;
458 t->vbi_handler.cyh_func = vbi_timer_callback;
459 t->vbi_handler.cyh_arg = (void *)t;
460 t->vbi_handler.cyh_level = CY_LOCK_LEVEL;
461 t->vbi_cyclic = CYCLIC_NONE;
462 t->vbi_interval = interval;
463 return (t);
464}
465
466void
467vbi_timer_destroy(void *timer)
468{
469 vbi_timer_t *t = timer;
470 if (t != NULL) {
471 vbi_timer_stop(timer);
472 kmem_free(t, sizeof (*t));
473 }
474}
475
476void
477vbi_timer_start(void *timer, uint64_t when)
478{
479 vbi_timer_t *t = timer;
480 cyc_time_t fire_time;
481 uint64_t interval = t->vbi_interval;
482
483 mutex_enter(&cpu_lock);
484 when += gethrtime();
485 fire_time.cyt_when = when;
486 if (interval == 0)
487 fire_time.cyt_interval = when;
488 else
489 fire_time.cyt_interval = interval;
490 t->vbi_cyclic = cyclic_add(&t->vbi_handler, &fire_time);
491 mutex_exit(&cpu_lock);
492}
493
494void
495vbi_timer_stop(void *timer)
496{
497 vbi_timer_t *t = timer;
498
499 if (t->vbi_cyclic == CYCLIC_NONE)
500 return;
501 mutex_enter(&cpu_lock);
502 if (t->vbi_cyclic != CYCLIC_NONE) {
503 cyclic_remove(t->vbi_cyclic);
504 t->vbi_cyclic = CYCLIC_NONE;
505 }
506 mutex_exit(&cpu_lock);
507}
508
509uint64_t
510vbi_tod(void)
511{
512 timestruc_t ts;
513
514 mutex_enter(&tod_lock);
515 ts = tod_get();
516 mutex_exit(&tod_lock);
517 return ((uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec);
518}
519
520
521void *
522vbi_proc(void)
523{
524 proc_t *p;
525 drv_getparm(UPROCP, &p);
526 return (p);
527}
528
529void
530vbi_set_priority(void *thread, int priority)
531{
532 kthread_t *t = thread;
533
534 thread_lock(t);
535 (void) thread_change_pri(t, priority, 0);
536 thread_unlock(t);
537}
538
539void *
540vbi_thread_create(void *func, void *arg, size_t len, int priority)
541{
542 kthread_t *t;
543
544 t = thread_create(NULL, NULL, (void (*)())func, arg, len,
545 VBIPROC(), TS_RUN, priority);
546 return (t);
547}
548
549void
550vbi_thread_exit(void)
551{
552 thread_exit();
553}
554
555void *
556vbi_text_alloc(size_t size)
557{
558 return (segkmem_alloc(heaptext_arena, size, KM_SLEEP));
559}
560
561void
562vbi_text_free(void *va, size_t size)
563{
564 segkmem_free(heaptext_arena, va, size);
565}
566
567int
568vbi_cpu_id(void)
569{
570 return (CPU->cpu_id);
571}
572
573int
574vbi_max_cpu_id(void)
575{
576 return (max_cpuid);
577}
578
579int
580vbi_cpu_maxcount(void)
581{
582 return (max_cpuid + 1);
583}
584
585int
586vbi_cpu_count(void)
587{
588 return (ncpus);
589}
590
591int
592vbi_cpu_online(int c)
593{
594 int x;
595
596 mutex_enter(&cpu_lock);
597 x = cpu_is_online(cpu[c]);
598 mutex_exit(&cpu_lock);
599 return (x);
600}
601
602void
603vbi_preempt_disable(void)
604{
605 VBI_PREEMPT_DISABLE();
606}
607
608void
609vbi_preempt_enable(void)
610{
611 VBI_PREEMPT_ENABLE();
612}
613
614void
615vbi_execute_on_all(void *func, void *arg)
616{
617 vbi_cpuset_t set;
618 int i;
619
620 for (i = 0; i < VBI_SET_WORDS; ++i)
621 set.words[i] = (ulong_t)-1L;
622 if (use_old) {
623 if (use_old_with_ulong) {
624 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
625 set.words[0], (xc_func_t)func);
626 } else {
627 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
628 set, (xc_func_t)func);
629 }
630 } else {
631 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
632 }
633}
634
635void
636vbi_execute_on_others(void *func, void *arg)
637{
638 vbi_cpuset_t set;
639 int i;
640
641 for (i = 0; i < VBI_SET_WORDS; ++i)
642 set.words[i] = (ulong_t)-1L;
643 BT_CLEAR(set.words, vbi_cpu_id());
644 if (use_old) {
645 if (use_old_with_ulong) {
646 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
647 set.words[0], (xc_func_t)func);
648 } else {
649 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
650 set, (xc_func_t)func);
651 }
652 } else {
653 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
654 }
655}
656
657void
658vbi_execute_on_one(void *func, void *arg, int c)
659{
660 vbi_cpuset_t set;
661 int i;
662
663 for (i = 0; i < VBI_SET_WORDS; ++i)
664 set.words[i] = 0;
665 BT_SET(set.words, c);
666 if (use_old) {
667 if (use_old_with_ulong) {
668 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
669 set.words[0], (xc_func_t)func);
670 } else {
671 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
672 set, (xc_func_t)func);
673 }
674 } else {
675 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
676 }
677}
678
679int
680vbi_lock_va(void *addr, size_t len, int access, void **handle)
681{
682 faultcode_t err;
683
684 /*
685 * kernel mappings on x86 are always locked, so only handle user.
686 */
687 *handle = NULL;
688 if (!IS_KERNEL(addr)) {
689 err = as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
690 (caddr_t)addr, len, F_SOFTLOCK, access);
691 if (err != 0) {
692 cmn_err(CE_NOTE, "vbi_lock_va() failed to lock");
693 return (-1);
694 }
695 }
696 return (0);
697}
698
699/*ARGSUSED*/
700void
701vbi_unlock_va(void *addr, size_t len, int access, void *handle)
702{
703 if (!IS_KERNEL(addr))
704 as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
705 (caddr_t)addr, len, F_SOFTUNLOCK, access);
706}
707
708uint64_t
709vbi_va_to_pa(void *addr)
710{
711 struct hat *hat;
712 pfn_t pfn;
713 uintptr_t v = (uintptr_t)addr;
714
715 if (IS_KERNEL(v))
716 hat = kas.a_hat;
717 else
718 hat = VBIPROC()->p_as->a_hat;
719 pfn = hat_getpfnum(hat, (caddr_t)(v & PAGEMASK));
720 if (pfn == PFN_INVALID)
721 return (-(uint64_t)1);
722 return (((uint64_t)pfn << PAGESHIFT) | (v & PAGEOFFSET));
723}
724
725
726struct segvbi_crargs {
727 uint64_t *palist;
728 uint_t prot;
729};
730
731struct segvbi_data {
732 uint_t prot;
733};
734
735static struct seg_ops segvbi_ops;
736
737static int
738segvbi_create(struct seg *seg, void *args)
739{
740 struct segvbi_crargs *a = args;
741 struct segvbi_data *data;
742 struct as *as = seg->s_as;
743 caddr_t va;
744 ulong_t pgcnt;
745 ulong_t p;
746
747 hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
748 data = kmem_zalloc(sizeof (*data), KM_SLEEP);
749 data->prot = a->prot | PROT_USER;
750
751 seg->s_ops = &segvbi_ops;
752 seg->s_data = data;
753
754 /*
755 * now load locked mappings to the pages
756 */
757 va = seg->s_base;
758 pgcnt = (seg->s_size + PAGESIZE - 1) >> PAGESHIFT;
759 for (p = 0; p < pgcnt; ++p, va += PAGESIZE) {
760 hat_devload(as->a_hat, va,
761 PAGESIZE, a->palist[p] >> PAGESHIFT,
762 data->prot | HAT_UNORDERED_OK, HAT_LOAD | HAT_LOAD_LOCK);
763 }
764
765 return (0);
766}
767
768/*
769 * Duplicate a seg and return new segment in newseg.
770 */
771static int
772segvbi_dup(struct seg *seg, struct seg *newseg)
773{
774 struct segvbi_data *data = seg->s_data;
775 struct segvbi_data *ndata;
776
777 ndata = kmem_zalloc(sizeof (*data), KM_SLEEP);
778 ndata->prot = data->prot;
779 newseg->s_ops = &segvbi_ops;
780 newseg->s_data = ndata;
781
782 return (0);
783}
784
785static int
786segvbi_unmap(struct seg *seg, caddr_t addr, size_t len)
787{
788 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
789 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
790 panic("segvbi_unmap");
791
792 if (addr != seg->s_base || len != seg->s_size)
793 return (ENOTSUP);
794
795 hat_unload(seg->s_as->a_hat, addr, len,
796 HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
797
798 seg_free(seg);
799 return (0);
800}
801
802static void
803segvbi_free(struct seg *seg)
804{
805 struct segvbi_data *data = seg->s_data;
806 kmem_free(data, sizeof (*data));
807}
808
809/*
810 * We would demand fault if the (u)read() path would SEGOP_FAULT()
811 * on buffers mapped in via vbi_user_map() i.e. prefaults before DMA.
812 * Don't fail in such case where we're called directly, see #5047.
813 */
814static int
815segvbi_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
816 enum fault_type type, enum seg_rw rw)
817{
818 return (0);
819}
820
821static int
822segvbi_faulta(struct seg *seg, caddr_t addr)
823{
824 return (0);
825}
826
827static int
828segvbi_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
829{
830 return (EACCES);
831}
832
833static int
834segvbi_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
835{
836 return (EINVAL);
837}
838
839static int
840segvbi_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
841{
842 return (-1);
843}
844
845static int
846segvbi_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
847{
848 return (0);
849}
850
851static size_t
852segvbi_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
853{
854 size_t v;
855
856 for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
857 len -= PAGESIZE, v += PAGESIZE)
858 *vec++ = 1;
859 return (v);
860}
861
862static int
863segvbi_lockop(struct seg *seg, caddr_t addr,
864 size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
865{
866 return (0);
867}
868
869static int
870segvbi_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
871{
872 struct segvbi_data *data = seg->s_data;
873 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
874 if (pgno != 0)
875 {
876 do
877 {
878 pgno--;
879 protv[pgno] = data->prot;
880 } while (pgno != 0);
881 }
882 return (0);
883}
884
885static u_offset_t
886segvbi_getoffset(struct seg *seg, caddr_t addr)
887{
888 return ((uintptr_t)addr - (uintptr_t)seg->s_base);
889}
890
891static int
892segvbi_gettype(struct seg *seg, caddr_t addr)
893{
894 return (MAP_SHARED);
895}
896
897static vnode_t vbivp;
898
899static int
900segvbi_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
901{
902 *vpp = &vbivp;
903 return (0);
904}
905
906static int
907segvbi_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
908{
909 return (0);
910}
911
912static void
913segvbi_dump(struct seg *seg)
914{}
915
916static int
917segvbi_pagelock(struct seg *seg, caddr_t addr, size_t len,
918 struct page ***ppp, enum lock_type type, enum seg_rw rw)
919{
920 return (ENOTSUP);
921}
922
923static int
924segvbi_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
925{
926 return (ENOTSUP);
927}
928
929static int
930segvbi_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
931{
932 return (ENODEV);
933}
934
935static lgrp_mem_policy_info_t *
936segvbi_getpolicy(struct seg *seg, caddr_t addr)
937{
938 return (NULL);
939}
940
941static int
942segvbi_capable(struct seg *seg, segcapability_t capability)
943{
944 return (0);
945}
946
947static struct seg_ops segvbi_ops = {
948 segvbi_dup,
949 segvbi_unmap,
950 segvbi_free,
951 segvbi_fault,
952 segvbi_faulta,
953 segvbi_setprot,
954 segvbi_checkprot,
955 (int (*)())segvbi_kluster,
956 (size_t (*)(struct seg *))NULL, /* swapout */
957 segvbi_sync,
958 segvbi_incore,
959 segvbi_lockop,
960 segvbi_getprot,
961 segvbi_getoffset,
962 segvbi_gettype,
963 segvbi_getvp,
964 segvbi_advise,
965 segvbi_dump,
966 segvbi_pagelock,
967 segvbi_setpagesize,
968 segvbi_getmemid,
969 segvbi_getpolicy,
970 segvbi_capable
971};
972
973
974
975/*
976 * Interfaces to inject physical pages into user address space
977 * and later remove them.
978 */
979int
980vbi_user_map(caddr_t *va, uint_t prot, uint64_t *palist, size_t len)
981{
982 struct as *as = VBIPROC()->p_as;
983 struct segvbi_crargs args;
984 int error = 0;
985
986 args.palist = palist;
987 args.prot = prot;
988 as_rangelock(as);
989 map_addr(va, len, 0, 0, MAP_SHARED);
990 if (*va != NULL)
991 {
992 error = as_map(as, *va, len, segvbi_create, &args);
993 }
994 else
995 error = ENOMEM;
996 if (error)
997 cmn_err(CE_NOTE, "vbi_user_map() failed error=%d", error);
998 as_rangeunlock(as);
999 return (error);
1000}
1001
1002
1003/*
1004 * This is revision 2 of the interface.
1005 */
1006
1007struct vbi_cpu_watch {
1008 void (*vbi_cpu_func)();
1009 void *vbi_cpu_arg;
1010};
1011
1012static int
1013vbi_watcher(cpu_setup_t state, int icpu, void *arg)
1014{
1015 vbi_cpu_watch_t *w = arg;
1016 int online;
1017
1018 if (state == CPU_ON)
1019 online = 1;
1020 else if (state == CPU_OFF)
1021 online = 0;
1022 else
1023 return (0);
1024 w->vbi_cpu_func(w->vbi_cpu_arg, icpu, online);
1025 return (0);
1026}
1027
1028vbi_cpu_watch_t *
1029vbi_watch_cpus(void (*func)(), void *arg, int current_too)
1030{
1031 int c;
1032 vbi_cpu_watch_t *w;
1033
1034 w = kmem_alloc(sizeof (*w), KM_SLEEP);
1035 w->vbi_cpu_func = func;
1036 w->vbi_cpu_arg = arg;
1037 mutex_enter(&cpu_lock);
1038 register_cpu_setup_func(vbi_watcher, w);
1039 if (current_too) {
1040 for (c = 0; c < ncpus; ++c) {
1041 if (cpu_is_online(cpu[c]))
1042 func(arg, c, 1);
1043 }
1044 }
1045 mutex_exit(&cpu_lock);
1046 return (w);
1047}
1048
1049void
1050vbi_ignore_cpus(vbi_cpu_watch_t *w)
1051{
1052 mutex_enter(&cpu_lock);
1053 unregister_cpu_setup_func(vbi_watcher, w);
1054 mutex_exit(&cpu_lock);
1055 kmem_free(w, sizeof (*w));
1056}
1057
1058/*
1059 * Simple timers are pretty much a pass through to the cyclic subsystem.
1060 */
1061struct vbi_stimer {
1062 cyc_handler_t s_handler;
1063 cyc_time_t s_fire_time;
1064 cyclic_id_t s_cyclic;
1065 uint64_t s_tick;
1066 void (*s_func)(void *, uint64_t);
1067 void *s_arg;
1068};
1069
1070static void
1071vbi_stimer_func(void *arg)
1072{
1073 vbi_stimer_t *t = arg;
1074 t->s_func(t->s_arg, ++t->s_tick);
1075}
1076
1077extern vbi_stimer_t *
1078vbi_stimer_begin(
1079 void (*func)(void *, uint64_t),
1080 void *arg,
1081 uint64_t when,
1082 uint64_t interval,
1083 int on_cpu)
1084{
1085 vbi_stimer_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1086
1087 t->s_handler.cyh_func = vbi_stimer_func;
1088 t->s_handler.cyh_arg = t;
1089 t->s_handler.cyh_level = CY_LOCK_LEVEL;
1090 t->s_tick = 0;
1091 t->s_func = func;
1092 t->s_arg = arg;
1093
1094 mutex_enter(&cpu_lock);
1095 if (on_cpu != VBI_ANY_CPU && !cpu_is_online(cpu[on_cpu])) {
1096 t = NULL;
1097 goto done;
1098 }
1099
1100 when += gethrtime();
1101 t->s_fire_time.cyt_when = when;
1102 if (interval == 0)
1103 t->s_fire_time.cyt_interval = INT64_MAX - when;
1104 else
1105 t->s_fire_time.cyt_interval = interval;
1106 t->s_cyclic = cyclic_add(&t->s_handler, &t->s_fire_time);
1107 if (on_cpu != VBI_ANY_CPU)
1108 cyclic_bind(t->s_cyclic, cpu[on_cpu], NULL);
1109done:
1110 mutex_exit(&cpu_lock);
1111 return (t);
1112}
1113
1114extern void
1115vbi_stimer_end(vbi_stimer_t *t)
1116{
1117 mutex_enter(&cpu_lock);
1118 cyclic_remove(t->s_cyclic);
1119 mutex_exit(&cpu_lock);
1120 kmem_free(t, sizeof (*t));
1121}
1122
1123/*
1124 * Global timers are more complicated. They include a counter on the callback,
1125 * that indicates the first call on a given cpu.
1126 */
1127struct vbi_gtimer {
1128 uint64_t *g_counters;
1129 void (*g_func)(void *, uint64_t);
1130 void *g_arg;
1131 uint64_t g_when;
1132 uint64_t g_interval;
1133 cyclic_id_t g_cyclic;
1134};
1135
1136static void
1137vbi_gtimer_func(void *arg)
1138{
1139 vbi_gtimer_t *t = arg;
1140 t->g_func(t->g_arg, ++t->g_counters[vbi_cpu_id()]);
1141}
1142
1143/*
1144 * Whenever a cpu is onlined, need to reset the g_counters[] for it to zero.
1145 */
1146static void
1147vbi_gtimer_online(void *arg, cpu_t *pcpu, cyc_handler_t *h, cyc_time_t *ct)
1148{
1149 vbi_gtimer_t *t = arg;
1150 hrtime_t now;
1151
1152 t->g_counters[pcpu->cpu_id] = 0;
1153 h->cyh_func = vbi_gtimer_func;
1154 h->cyh_arg = t;
1155 h->cyh_level = CY_LOCK_LEVEL;
1156 now = gethrtime();
1157 if (t->g_when < now)
1158 ct->cyt_when = now + t->g_interval / 2;
1159 else
1160 ct->cyt_when = t->g_when;
1161 ct->cyt_interval = t->g_interval;
1162}
1163
1164
1165vbi_gtimer_t *
1166vbi_gtimer_begin(
1167 void (*func)(void *, uint64_t),
1168 void *arg,
1169 uint64_t when,
1170 uint64_t interval)
1171{
1172 vbi_gtimer_t *t;
1173 cyc_omni_handler_t omni;
1174
1175 /*
1176 * one shot global timer is not supported yet.
1177 */
1178 if (interval == 0)
1179 return (NULL);
1180
1181 t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1182 t->g_counters = kmem_zalloc(ncpus * sizeof (uint64_t), KM_SLEEP);
1183 t->g_when = when + gethrtime();
1184 t->g_interval = interval;
1185 t->g_arg = arg;
1186 t->g_func = func;
1187 t->g_cyclic = CYCLIC_NONE;
1188
1189 omni.cyo_online = (void (*)())vbi_gtimer_online;
1190 omni.cyo_offline = NULL;
1191 omni.cyo_arg = t;
1192
1193 mutex_enter(&cpu_lock);
1194 t->g_cyclic = cyclic_add_omni(&omni);
1195 mutex_exit(&cpu_lock);
1196 return (t);
1197}
1198
1199extern void
1200vbi_gtimer_end(vbi_gtimer_t *t)
1201{
1202 mutex_enter(&cpu_lock);
1203 cyclic_remove(t->g_cyclic);
1204 mutex_exit(&cpu_lock);
1205 kmem_free(t->g_counters, ncpus * sizeof (uint64_t));
1206 kmem_free(t, sizeof (*t));
1207}
1208
1209int
1210vbi_is_preempt_enabled(void)
1211{
1212 if (vbi_is_initialized) {
1213 char tpr = VBI_T_PREEMPT;
1214 return (tpr == 0);
1215 } else {
1216 cmn_err(CE_NOTE, "vbi_is_preempt_enabled: called without initializing vbi!\n");
1217 return 1;
1218 }
1219}
1220
1221void
1222vbi_poke_cpu(int c)
1223{
1224 if (c < ncpus)
1225 poke_cpu(c);
1226}
1227
1228/*
1229 * This is revision 5 of the interface.
1230 */
1231
1232void *
1233vbi_lowmem_alloc(uint64_t phys, size_t size)
1234{
1235 return (vbi_internal_alloc(&phys, size, PAGESIZE /* alignment */, 0 /* non-contiguous */));
1236}
1237
1238void
1239vbi_lowmem_free(void *va, size_t size)
1240{
1241 p_contig_free(va, size);
1242}
1243
1244/*
1245 * This is revision 6 of the interface.
1246 */
1247
1248int
1249vbi_is_preempt_pending(void)
1250{
1251 char crr = VBI_CPU_RUNRUN;
1252 char krr = VBI_CPU_KPRUNRUN;
1253 return crr != 0 || krr != 0;
1254}
1255
1256/*
1257 * This is revision 7 of the interface.
1258 */
1259
1260void *
1261vbi_phys_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
1262{
1263 return (vbi_internal_alloc(phys, size, alignment, contig));
1264}
1265
1266void
1267vbi_phys_free(void *va, size_t size)
1268{
1269 p_contig_free(va, size);
1270}
1271
1272
1273/*
1274 * This is revision 8 of the interface.
1275 */
1276static vnode_t vbipagevp;
1277
1278page_t **
1279vbi_pages_alloc(uint64_t *phys, size_t size)
1280{
1281 /*
1282 * the page freelist and cachelist both hold pages that are not mapped into any address space.
1283 * the cachelist is not really free pages but when memory is exhausted they'll be moved to the
1284 * free lists.
1285 * it's the total of the free+cache list that we see on the 'free' column in vmstat.
1286 */
1287 page_t **pp_pages = NULL;
1288 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1289
1290 /* reserve available memory for pages */
1291 int rc = page_resv(npages, KM_NOSLEEP);
1292 if (rc)
1293 {
1294 /* create the pages */
1295 rc = page_create_wait(npages, 0 /* flags */);
1296 if (rc)
1297 {
1298 /* alloc space for page_t pointer array */
1299 size_t pp_size = npages * sizeof(page_t *);
1300 pp_pages = kmem_zalloc(pp_size, KM_SLEEP);
1301 if (pp_pages)
1302 {
1303 /*
1304 * get pages from kseg, the 'virtAddr' here is only for colouring but unforuntately
1305 * we dont' have the 'virtAddr' to which this memory may be mapped.
1306 */
1307 seg_t kernseg;
1308 kernseg.s_as = &kas;
1309 caddr_t virtAddr = NULL;
1310 for (int64_t i = 0; i < npages; i++, virtAddr += PAGESIZE)
1311 {
1312 /* get a page from the freelist */
1313 page_t *ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1314 PAGESIZE, 0 /* flags */, NULL /* local group */);
1315 if (!ppage)
1316 {
1317 /* try from the cachelist */
1318 ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1319 0 /* flags */, NULL /* local group */);
1320 if (!ppage)
1321 {
1322 /* damn */
1323 page_create_putback(npages - i);
1324 while (--i >= 0)
1325 page_free(pp_pages[i], 0 /* don't need, move to tail */);
1326 kmem_free(pp_pages, pp_size);
1327 page_unresv(npages);
1328 return NULL;
1329 }
1330
1331 /* remove association with the vnode for pages from the cachelist */
1332 if (!PP_ISAGED(ppage))
1333 page_hashout(ppage, NULL /* mutex */);
1334 }
1335
1336 PP_CLRFREE(ppage); /* Page is not free */
1337 PP_CLRAGED(ppage); /* Page is not hashed in */
1338 pp_pages[i] = ppage;
1339 }
1340
1341 /*
1342 * we now have the pages locked exclusively, before they are mapped in
1343 * we must downgrade the lock.
1344 */
1345 *phys = (uint64_t)page_pptonum(pp_pages[0]) << PAGESHIFT;
1346 return pp_pages;
1347 }
1348
1349 page_create_putback(npages);
1350 }
1351
1352 page_unresv(npages);
1353 }
1354
1355 return NULL;
1356}
1357
1358
1359void
1360vbi_pages_free(page_t **pp_pages, size_t size)
1361{
1362 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1363 size_t pp_size = npages * sizeof(page_t *);
1364 for (pgcnt_t i = 0; i < npages; i++)
1365 {
1366 /* we need to exclusive lock the pages before freeing them */
1367 int rc = page_tryupgrade(pp_pages[i]);
1368 if (!rc)
1369 {
1370 page_unlock(pp_pages[i]);
1371 while (!page_lock(pp_pages[i], SE_EXCL, NULL /* mutex */, P_RECLAIM))
1372 ;
1373 }
1374
1375 page_free(pp_pages[i], 0 /* don't need, move to tail */);
1376 }
1377
1378 kmem_free(pp_pages, pp_size);
1379 page_unresv(npages);
1380}
1381
1382
1383int
1384vbi_pages_premap(page_t **pp_pages, size_t size, uint64_t *pphysaddrs)
1385{
1386 if (!pphysaddrs)
1387 return -1;
1388
1389 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1390 for (pgcnt_t i = 0; i < npages; i++)
1391 {
1392 /*
1393 * prepare pages for mapping into kernel/user space, we need to
1394 * downgrade the exclusive page lock to a shared lock if the
1395 * pages is locked exclusively.
1396 */
1397 if (page_tryupgrade(pp_pages[i]) == 1)
1398 page_downgrade(pp_pages[i]);
1399 pphysaddrs[i] = vbi_page_to_pa(pp_pages, i);
1400 }
1401
1402 return 0;
1403}
1404
1405
1406uint64_t
1407vbi_page_to_pa(page_t **pp_pages, pgcnt_t i)
1408{
1409 pfn_t pfn = page_pptonum(pp_pages[i]);
1410 if (pfn == PFN_INVALID)
1411 panic("vbi_page_to_pa: page_pptonum() failed\n");
1412 return (uint64_t)pfn << PAGESHIFT;
1413}
1414
1415/*
1416 * As more functions are added, they should start with a comment indicating
1417 * the revision and above this point in the file and the revision level should
1418 * be increased. Also change vbi_modlmisc at the top of the file.
1419 */
1420uint_t vbi_revision_level = 8;
1421
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette