VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/solaris/vbi/i86pc/os/vbi.c@ 37947

Last change on this file since 37947 was 37947, checked in by vboxsync, 13 years ago

Runtime/r0drv/Solaris/vbi: temporary fix for modified t_preempt offset in newer S11 kernels. Also some cleanup.

  • Property svn:eol-style set to native
File size: 34.8 KB
Line 
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010-2011 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Private interfaces for VirtualBox access to Solaris kernel internal
28 * facilities.
29 *
30 * See sys/vbi.h for what each function does.
31 */
32
33#include <sys/kmem.h>
34#include <sys/types.h>
35#include <sys/mman.h>
36#include <sys/thread.h>
37#include <sys/mutex.h>
38#include <sys/condvar.h>
39#include <sys/sdt.h>
40#include <sys/schedctl.h>
41#include <sys/time.h>
42#include <sys/sysmacros.h>
43#include <sys/cmn_err.h>
44#include <sys/vmsystm.h>
45#include <sys/cyclic.h>
46#include <sys/class.h>
47#include <sys/cpuvar.h>
48#include <sys/kobj.h>
49#include <sys/x_call.h>
50#include <sys/x86_archext.h>
51#include <vm/hat.h>
52#include <vm/seg_vn.h>
53#include <vm/seg_kmem.h>
54#include <sys/ddi.h>
55#include <sys/sunddi.h>
56#include <sys/modctl.h>
57#include <sys/machparam.h>
58#include <sys/utsname.h>
59
60#include <iprt/assert.h>
61
62#include "vbi.h"
63
64#define VBIPROC() ((proc_t *)vbi_proc())
65
66/*
67 * We have to use dl_lookup to find contig_free().
68 */
69extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int);
70extern void contig_free(void *, size_t);
71#pragma weak contig_free
72static void (*p_contig_free)(void *, size_t) = contig_free;
73
74/*
75 * We have to use dl_lookup to find kflt_init() and thereby use kernel pages from
76 * the freelists if we no longer get user pages from freelist and cachelists.
77 */
78/* Introduced in v9 */
79static int use_kflt = 0;
80static page_t *vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize);
81
82
83/*
84 * Workarounds for running on old versions of solaris with different cross call
85 * interfaces. If we find xc_init_cpu() in the kernel, then just use the defined
86 * interfaces for xc_call() from the include file where the xc_call()
87 * interfaces just takes a pointer to a ulong_t array. The array must be long
88 * enough to hold "ncpus" bits at runtime.
89
90 * The reason for the hacks is that using the type "cpuset_t" is pretty much
91 * impossible from code built outside the Solaris source repository that wants
92 * to run on multiple releases of Solaris.
93 *
94 * For old style xc_call()s, 32 bit solaris and older 64 bit versions use
95 * "ulong_t" as cpuset_t.
96 *
97 * Later versions of 64 bit Solaris used: struct {ulong_t words[x];}
98 * where "x" depends on NCPU.
99 *
100 * We detect the difference in 64 bit support by checking the kernel value of
101 * max_cpuid, which always holds the compiled value of NCPU - 1.
102 *
103 * If Solaris increases NCPU to more than 256, this module will continue
104 * to work on all versions of Solaris as long as the number of installed
105 * CPUs in the machine is <= VBI_NCPU. If VBI_NCPU is increased, this code
106 * has to be re-written some to provide compatibility with older Solaris which
107 * expects cpuset_t to be based on NCPU==256 -- or we discontinue support
108 * of old Nevada/S10.
109 */
110static int use_old = 0;
111static int use_old_with_ulong = 0;
112static void (*p_xc_call)() = (void (*)())xc_call;
113
114#define VBI_NCPU 256
115#define VBI_SET_WORDS (VBI_NCPU / (sizeof (ulong_t) * 8))
116typedef struct vbi_cpuset {
117 ulong_t words[VBI_SET_WORDS];
118} vbi_cpuset_t;
119#define X_CALL_HIPRI (2) /* for old Solaris interface */
120
121/*
122 * module linkage stuff
123 */
124#if 0
125static struct modlmisc vbi_modlmisc = {
126 &mod_miscops, "VirtualBox Interfaces V8"
127};
128
129static struct modlinkage vbi_modlinkage = {
130 MODREV_1, { (void *)&vbi_modlmisc, NULL }
131};
132#endif
133
134extern uintptr_t kernelbase;
135#define IS_KERNEL(v) ((uintptr_t)(v) >= kernelbase)
136
137#if 0
138static int vbi_verbose = 0;
139
140#define VBI_VERBOSE(msg) {if (vbi_verbose) cmn_err(CE_WARN, msg);}
141#endif
142
143/* Introduced in v8 */
144static int vbi_is_initialized = 0;
145
146/* Introduced in v6 */
147static int vbi_is_nevada = 0;
148
149#ifdef _LP64
150/* 64-bit Solaris 10 offsets */
151/* CPU */
152static int off_s10_cpu_runrun = 232;
153static int off_s10_cpu_kprunrun = 233;
154/* kthread_t */
155static int off_s10_t_preempt = 42;
156
157/* 64-bit Solaris 11 (Nevada/OpenSolaris) offsets */
158/* CPU */
159static int off_s11_cpu_runrun = 216;
160static int off_s11_cpu_kprunrun = 217;
161/* kthread_t */
162static int off_s11_t_preempt = 42;
163
164/* 64-bit Solaris 11 snv_166+ offsets (CR 7037143) */
165static int off_s11_t_preempt_new = 48;
166#else
167/* 32-bit Solaris 10 offsets */
168/* CPU */
169static int off_s10_cpu_runrun = 124;
170static int off_s10_cpu_kprunrun = 125;
171/* kthread_t */
172static int off_s10_t_preempt = 26;
173
174/* 32-bit Solaris 11 (Nevada/OpenSolaris) offsets */
175/* CPU */
176static int off_s11_cpu_runrun = 112;
177static int off_s11_cpu_kprunrun = 113;
178/* kthread_t */
179static int off_s11_t_preempt = 26;
180#endif
181
182
183/* Which offsets will be used */
184static int off_cpu_runrun = -1;
185static int off_cpu_kprunrun = -1;
186static int off_t_preempt = -1;
187
188#define VBI_T_PREEMPT (*((char *)curthread + off_t_preempt))
189#define VBI_CPU_KPRUNRUN (*((char *)CPU + off_cpu_kprunrun))
190#define VBI_CPU_RUNRUN (*((char *)CPU + off_cpu_runrun))
191
192#undef kpreempt_disable
193#undef kpreempt_enable
194
195#define VBI_PREEMPT_DISABLE() \
196 { \
197 VBI_T_PREEMPT++; \
198 ASSERT(VBI_T_PREEMPT >= 1); \
199 }
200#define VBI_PREEMPT_ENABLE() \
201 { \
202 ASSERT(VBI_T_PREEMPT >= 1); \
203 if (--VBI_T_PREEMPT == 0 && \
204 VBI_CPU_RUNRUN) \
205 kpreempt(KPREEMPT_SYNC); \
206 }
207
208/* End of v6 intro */
209
210#if 0
211int
212_init(void)
213{
214 int err = vbi_init();
215 if (!err)
216 err = mod_install(&vbi_modlinkage);
217 return (err);
218}
219#endif
220
221int
222vbi_init(void)
223{
224 /*
225 * Check to see if this version of virtualbox interface module will work
226 * with the kernel.
227 */
228 if (kobj_getsymvalue("xc_init_cpu", 1) != NULL) {
229 /*
230 * Our bit vector storage needs to be large enough for the
231 * actual number of CPUs running in the system.
232 */
233 if (ncpus > VBI_NCPU) {
234 cmn_err(CE_NOTE, "cpu count mismatch.\n");
235 return (EINVAL);
236 }
237 } else {
238 use_old = 1;
239 if (max_cpuid + 1 == sizeof(ulong_t) * 8)
240 use_old_with_ulong = 1;
241 else if (max_cpuid + 1 != VBI_NCPU)
242 {
243 cmn_err(CE_NOTE, "cpuset_t size mismatch. probably too old a kernel.\n");
244 return (EINVAL); /* cpuset_t size mismatch */
245 }
246 }
247
248 /*
249 * In older versions of Solaris contig_free() is a static routine.
250 */
251 if (p_contig_free == NULL) {
252 p_contig_free = (void (*)(void *, size_t))
253 kobj_getsymvalue("contig_free", 1);
254 if (p_contig_free == NULL) {
255 cmn_err(CE_NOTE, " contig_free() not found in kernel\n");
256 return (EINVAL);
257 }
258 }
259
260 /*
261 * Use kernel page freelist flags to get pages from kernel page freelists
262 * while allocating physical pages, once the userpages are exhausted.
263 * snv_161+, see @bugref{5632}.
264 */
265 if (kobj_getsymvalue("kflt_init", 1) != NULL)
266 {
267 int *p_kflt_disable = (int*)kobj_getsymvalue("kflt_disable", 1); /* amd64 only, on 32-bit kflt's are disabled. */
268 if (p_kflt_disable && *p_kflt_disable == 0)
269 {
270 use_kflt = 1;
271 }
272 }
273
274
275 /*
276 * Check if this is S10 or Nevada
277 */
278 if (!strncmp(utsname.release, "5.11", sizeof("5.11") - 1)) {
279 /* Nevada detected... */
280 vbi_is_nevada = 1;
281
282 off_cpu_runrun = off_s11_cpu_runrun;
283 off_cpu_kprunrun = off_s11_cpu_kprunrun;
284 off_t_preempt = off_s11_t_preempt;
285
286#ifdef _LP64
287 /* Only 64-bit kernels */
288 long snv_version = 0;
289 if (!strncmp(utsname.version, "snv_", 4))
290 {
291 ddi_strtol(utsname.version + 4, NULL /* endptr */, 0, &snv_version);
292 if (snv_version >= 166)
293 {
294 off_t_preempt = off_s11_t_preempt_new;
295 cmn_err(CE_NOTE, "here\n");
296 }
297
298 cmn_err(CE_NOTE, "Detected S11 version %ld: Preemption offset=%d\n", snv_version, off_t_preempt);
299 }
300 else
301 cmn_err(CE_NOTE, "WARNING!! Cannot determine version. Assuming pre snv_166. Preemption offset=%ld may be busted!\n", off_t_preempt);
302#endif
303 } else {
304 /* Solaris 10 detected... */
305 vbi_is_nevada = 0;
306
307 off_cpu_runrun = off_s10_cpu_runrun;
308 off_cpu_kprunrun = off_s10_cpu_kprunrun;
309 off_t_preempt = off_s10_t_preempt;
310 }
311
312 /*
313 * Sanity checking...
314 */
315 /* CPU */
316 char crr = VBI_CPU_RUNRUN;
317 char krr = VBI_CPU_KPRUNRUN;
318 if ( (crr < 0 || crr > 1)
319 || (krr < 0 || krr > 1)) {
320 cmn_err(CE_NOTE, ":CPU structure sanity check failed! OS version mismatch.\n");
321 return EINVAL;
322 }
323
324 /* Thread */
325 char t_preempt = VBI_T_PREEMPT;
326 if (t_preempt < 0 || t_preempt > 32) {
327 cmn_err(CE_NOTE, ":Thread structure sanity check failed! OS version mismatch.\n");
328 return EINVAL;
329 }
330
331 vbi_is_initialized = 1;
332
333 return (0);
334}
335
336#if 0
337int
338_fini(void)
339{
340 int err = mod_remove(&vbi_modlinkage);
341 if (err != 0)
342 return (err);
343
344 return (0);
345}
346
347int
348_info(struct modinfo *modinfop)
349{
350 return (mod_info(&vbi_modlinkage, modinfop));
351}
352#endif
353
354
355static ddi_dma_attr_t base_attr = {
356 DMA_ATTR_V0, /* Version Number */
357 (uint64_t)0, /* lower limit */
358 (uint64_t)0, /* high limit */
359 (uint64_t)0xffffffff, /* counter limit */
360 (uint64_t)PAGESIZE, /* pagesize alignment */
361 (uint64_t)PAGESIZE, /* pagesize burst size */
362 (uint64_t)PAGESIZE, /* pagesize effective DMA size */
363 (uint64_t)0xffffffff, /* max DMA xfer size */
364 (uint64_t)0xffffffff, /* segment boundary */
365 1, /* list length (1 for contiguous) */
366 1, /* device granularity */
367 0 /* bus-specific flags */
368};
369
370static void *
371vbi_internal_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
372{
373 ddi_dma_attr_t attr;
374 pfn_t pfn;
375 void *ptr;
376 uint_t npages;
377
378 if ((size & PAGEOFFSET) != 0)
379 return (NULL);
380 npages = (size + PAGESIZE - 1) >> PAGESHIFT;
381 if (npages == 0)
382 return (NULL);
383
384 attr = base_attr;
385 attr.dma_attr_addr_hi = *phys;
386 attr.dma_attr_align = alignment;
387 if (!contig)
388 attr.dma_attr_sgllen = npages;
389 ptr = contig_alloc(size, &attr, PAGESIZE, 1);
390
391 if (ptr == NULL) {
392 cmn_err(CE_NOTE, "vbi_internal_alloc() failure for %lu bytes contig=%d", size, contig);
393 return (NULL);
394 }
395
396 pfn = hat_getpfnum(kas.a_hat, (caddr_t)ptr);
397 if (pfn == PFN_INVALID)
398 panic("vbi_contig_alloc(): hat_getpfnum() failed\n");
399 *phys = (uint64_t)pfn << PAGESHIFT;
400 return (ptr);
401}
402
403void *
404vbi_contig_alloc(uint64_t *phys, size_t size)
405{
406 /* Obsolete */
407 return (vbi_internal_alloc(phys, size, PAGESIZE /* alignment */, 1 /* contiguous */));
408}
409
410void
411vbi_contig_free(void *va, size_t size)
412{
413 /* Obsolete */
414 p_contig_free(va, size);
415}
416
417void *
418vbi_kernel_map(uint64_t pa, size_t size, uint_t prot)
419{
420 caddr_t va;
421
422 if ((pa & PAGEOFFSET) || (size & PAGEOFFSET)) {
423 cmn_err(CE_NOTE, "vbi_kernel_map() bad pa (0x%lx) or size (%lu)", pa, size);
424 return (NULL);
425 }
426
427 va = vmem_alloc(heap_arena, size, VM_SLEEP);
428
429 hat_devload(kas.a_hat, va, size, (pfn_t)(pa >> PAGESHIFT),
430 prot, HAT_LOAD | HAT_LOAD_LOCK | HAT_UNORDERED_OK);
431
432 return (va);
433}
434
435void
436vbi_unmap(void *va, size_t size)
437{
438 if (IS_KERNEL(va)) {
439 hat_unload(kas.a_hat, va, size, HAT_UNLOAD | HAT_UNLOAD_UNLOCK);
440 vmem_free(heap_arena, va, size);
441 } else {
442 struct as *as = VBIPROC()->p_as;
443
444 as_rangelock(as);
445 (void) as_unmap(as, va, size);
446 as_rangeunlock(as);
447 }
448}
449
450void *
451vbi_curthread(void)
452{
453 return (curthread);
454}
455
456int
457vbi_yield(void)
458{
459 int rv = 0;
460
461 vbi_preempt_disable();
462
463 char tpr = VBI_T_PREEMPT;
464 char kpr = VBI_CPU_KPRUNRUN;
465 if (tpr == 1 && kpr)
466 rv = 1;
467
468 vbi_preempt_enable();
469 return (rv);
470}
471
472uint64_t
473vbi_timer_granularity(void)
474{
475 return (nsec_per_tick);
476}
477
478typedef struct vbi_timer {
479 cyc_handler_t vbi_handler;
480 cyclic_id_t vbi_cyclic;
481 uint64_t vbi_interval;
482 void (*vbi_func)();
483 void *vbi_arg1;
484 void *vbi_arg2;
485} vbi_timer_t;
486
487static void
488vbi_timer_callback(void *arg)
489{
490 vbi_timer_t *t = arg;
491
492 if (t->vbi_interval == 0)
493 vbi_timer_stop(arg);
494 t->vbi_func(t->vbi_arg1, t->vbi_arg2);
495}
496
497void *
498vbi_timer_create(void *callback, void *arg1, void *arg2, uint64_t interval)
499{
500 vbi_timer_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
501
502 t->vbi_func = (void (*)())callback;
503 t->vbi_arg1 = arg1;
504 t->vbi_arg2 = arg2;
505 t->vbi_handler.cyh_func = vbi_timer_callback;
506 t->vbi_handler.cyh_arg = (void *)t;
507 t->vbi_handler.cyh_level = CY_LOCK_LEVEL;
508 t->vbi_cyclic = CYCLIC_NONE;
509 t->vbi_interval = interval;
510 return (t);
511}
512
513void
514vbi_timer_destroy(void *timer)
515{
516 vbi_timer_t *t = timer;
517 if (t != NULL) {
518 vbi_timer_stop(timer);
519 kmem_free(t, sizeof (*t));
520 }
521}
522
523void
524vbi_timer_start(void *timer, uint64_t when)
525{
526 vbi_timer_t *t = timer;
527 cyc_time_t fire_time;
528 uint64_t interval = t->vbi_interval;
529
530 mutex_enter(&cpu_lock);
531 when += gethrtime();
532 fire_time.cyt_when = when;
533 if (interval == 0)
534 fire_time.cyt_interval = when;
535 else
536 fire_time.cyt_interval = interval;
537 t->vbi_cyclic = cyclic_add(&t->vbi_handler, &fire_time);
538 mutex_exit(&cpu_lock);
539}
540
541void
542vbi_timer_stop(void *timer)
543{
544 vbi_timer_t *t = timer;
545
546 if (t->vbi_cyclic == CYCLIC_NONE)
547 return;
548 mutex_enter(&cpu_lock);
549 if (t->vbi_cyclic != CYCLIC_NONE) {
550 cyclic_remove(t->vbi_cyclic);
551 t->vbi_cyclic = CYCLIC_NONE;
552 }
553 mutex_exit(&cpu_lock);
554}
555
556uint64_t
557vbi_tod(void)
558{
559 timestruc_t ts;
560
561 mutex_enter(&tod_lock);
562 ts = tod_get();
563 mutex_exit(&tod_lock);
564 return ((uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec);
565}
566
567
568void *
569vbi_proc(void)
570{
571 proc_t *p;
572 drv_getparm(UPROCP, &p);
573 return (p);
574}
575
576void
577vbi_set_priority(void *thread, int priority)
578{
579 kthread_t *t = thread;
580
581 thread_lock(t);
582 (void) thread_change_pri(t, priority, 0);
583 thread_unlock(t);
584}
585
586void *
587vbi_thread_create(void (*func)(void *), void *arg, size_t len, int priority)
588{
589 kthread_t *t;
590
591 t = thread_create(NULL, NULL, (void (*)())func, arg, len,
592 VBIPROC(), TS_RUN, priority);
593 return (t);
594}
595
596void
597vbi_thread_exit(void)
598{
599 thread_exit();
600}
601
602void *
603vbi_text_alloc(size_t size)
604{
605 return (segkmem_alloc(heaptext_arena, size, KM_SLEEP));
606}
607
608void
609vbi_text_free(void *va, size_t size)
610{
611 segkmem_free(heaptext_arena, va, size);
612}
613
614int
615vbi_cpu_id(void)
616{
617 return (CPU->cpu_id);
618}
619
620int
621vbi_max_cpu_id(void)
622{
623 return (max_cpuid);
624}
625
626int
627vbi_cpu_maxcount(void)
628{
629 return (max_cpuid + 1);
630}
631
632int
633vbi_cpu_count(void)
634{
635 return (ncpus);
636}
637
638int
639vbi_cpu_online(int c)
640{
641 int x;
642
643 mutex_enter(&cpu_lock);
644 x = cpu_is_online(cpu[c]);
645 mutex_exit(&cpu_lock);
646 return (x);
647}
648
649void
650vbi_preempt_disable(void)
651{
652 VBI_PREEMPT_DISABLE();
653}
654
655void
656vbi_preempt_enable(void)
657{
658 VBI_PREEMPT_ENABLE();
659}
660
661void
662vbi_execute_on_all(void *func, void *arg)
663{
664 vbi_cpuset_t set;
665 int i;
666
667 for (i = 0; i < VBI_SET_WORDS; ++i)
668 set.words[i] = (ulong_t)-1L;
669 if (use_old) {
670 if (use_old_with_ulong) {
671 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
672 set.words[0], (xc_func_t)func);
673 } else {
674 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
675 set, (xc_func_t)func);
676 }
677 } else {
678 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
679 }
680}
681
682void
683vbi_execute_on_others(void *func, void *arg)
684{
685 vbi_cpuset_t set;
686 int i;
687
688 for (i = 0; i < VBI_SET_WORDS; ++i)
689 set.words[i] = (ulong_t)-1L;
690 BT_CLEAR(set.words, vbi_cpu_id());
691 if (use_old) {
692 if (use_old_with_ulong) {
693 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
694 set.words[0], (xc_func_t)func);
695 } else {
696 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
697 set, (xc_func_t)func);
698 }
699 } else {
700 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
701 }
702}
703
704void
705vbi_execute_on_one(void *func, void *arg, int c)
706{
707 vbi_cpuset_t set;
708 int i;
709
710 for (i = 0; i < VBI_SET_WORDS; ++i)
711 set.words[i] = 0;
712 BT_SET(set.words, c);
713 if (use_old) {
714 if (use_old_with_ulong) {
715 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
716 set.words[0], (xc_func_t)func);
717 } else {
718 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
719 set, (xc_func_t)func);
720 }
721 } else {
722 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
723 }
724}
725
726int
727vbi_lock_va(void *addr, size_t len, int access, void **handle)
728{
729 faultcode_t err;
730
731 /*
732 * kernel mappings on x86 are always locked, so only handle user.
733 */
734 *handle = NULL;
735 if (!IS_KERNEL(addr)) {
736 err = as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
737 (caddr_t)addr, len, F_SOFTLOCK, access);
738 if (err != 0) {
739 cmn_err(CE_NOTE, "vbi_lock_va() failed to lock");
740 return (-1);
741 }
742 }
743 return (0);
744}
745
746/*ARGSUSED*/
747void
748vbi_unlock_va(void *addr, size_t len, int access, void *handle)
749{
750 if (!IS_KERNEL(addr))
751 as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
752 (caddr_t)addr, len, F_SOFTUNLOCK, access);
753}
754
755uint64_t
756vbi_va_to_pa(void *addr)
757{
758 struct hat *hat;
759 pfn_t pfn;
760 uintptr_t v = (uintptr_t)addr;
761
762 if (IS_KERNEL(v))
763 hat = kas.a_hat;
764 else
765 hat = VBIPROC()->p_as->a_hat;
766 pfn = hat_getpfnum(hat, (caddr_t)(v & PAGEMASK));
767 if (pfn == PFN_INVALID)
768 return (-(uint64_t)1);
769 return (((uint64_t)pfn << PAGESHIFT) | (v & PAGEOFFSET));
770}
771
772
773struct segvbi_crargs {
774 uint64_t *palist;
775 uint_t prot;
776};
777
778struct segvbi_data {
779 uint_t prot;
780};
781
782static struct seg_ops segvbi_ops;
783
784static int
785segvbi_create(struct seg *seg, void *args)
786{
787 struct segvbi_crargs *a = args;
788 struct segvbi_data *data;
789 struct as *as = seg->s_as;
790 caddr_t va;
791 ulong_t pgcnt;
792 ulong_t p;
793
794 hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
795 data = kmem_zalloc(sizeof (*data), KM_SLEEP);
796 data->prot = a->prot | PROT_USER;
797
798 seg->s_ops = &segvbi_ops;
799 seg->s_data = data;
800
801 /*
802 * now load locked mappings to the pages
803 */
804 va = seg->s_base;
805 pgcnt = (seg->s_size + PAGESIZE - 1) >> PAGESHIFT;
806 for (p = 0; p < pgcnt; ++p, va += PAGESIZE) {
807 hat_devload(as->a_hat, va,
808 PAGESIZE, a->palist[p] >> PAGESHIFT,
809 data->prot | HAT_UNORDERED_OK, HAT_LOAD | HAT_LOAD_LOCK);
810 }
811
812 return (0);
813}
814
815/*
816 * Duplicate a seg and return new segment in newseg.
817 */
818static int
819segvbi_dup(struct seg *seg, struct seg *newseg)
820{
821 struct segvbi_data *data = seg->s_data;
822 struct segvbi_data *ndata;
823
824 ndata = kmem_zalloc(sizeof (*data), KM_SLEEP);
825 ndata->prot = data->prot;
826 newseg->s_ops = &segvbi_ops;
827 newseg->s_data = ndata;
828
829 return (0);
830}
831
832static int
833segvbi_unmap(struct seg *seg, caddr_t addr, size_t len)
834{
835 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
836 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
837 panic("segvbi_unmap");
838
839 if (addr != seg->s_base || len != seg->s_size)
840 return (ENOTSUP);
841
842 hat_unload(seg->s_as->a_hat, addr, len,
843 HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
844
845 seg_free(seg);
846 return (0);
847}
848
849static void
850segvbi_free(struct seg *seg)
851{
852 struct segvbi_data *data = seg->s_data;
853 kmem_free(data, sizeof (*data));
854}
855
856/*
857 * We would demand fault if the (u)read() path would SEGOP_FAULT()
858 * on buffers mapped in via vbi_user_map() i.e. prefaults before DMA.
859 * Don't fail in such case where we're called directly, see #5047.
860 */
861static int
862segvbi_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
863 enum fault_type type, enum seg_rw rw)
864{
865 return (0);
866}
867
868static int
869segvbi_faulta(struct seg *seg, caddr_t addr)
870{
871 return (0);
872}
873
874static int
875segvbi_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
876{
877 return (EACCES);
878}
879
880static int
881segvbi_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
882{
883 return (EINVAL);
884}
885
886static int
887segvbi_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
888{
889 return (-1);
890}
891
892static int
893segvbi_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
894{
895 return (0);
896}
897
898static size_t
899segvbi_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
900{
901 size_t v;
902
903 for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
904 len -= PAGESIZE, v += PAGESIZE)
905 *vec++ = 1;
906 return (v);
907}
908
909static int
910segvbi_lockop(struct seg *seg, caddr_t addr,
911 size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
912{
913 return (0);
914}
915
916static int
917segvbi_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
918{
919 struct segvbi_data *data = seg->s_data;
920 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
921 if (pgno != 0)
922 {
923 do
924 {
925 pgno--;
926 protv[pgno] = data->prot;
927 } while (pgno != 0);
928 }
929 return (0);
930}
931
932static u_offset_t
933segvbi_getoffset(struct seg *seg, caddr_t addr)
934{
935 return ((uintptr_t)addr - (uintptr_t)seg->s_base);
936}
937
938static int
939segvbi_gettype(struct seg *seg, caddr_t addr)
940{
941 return (MAP_SHARED);
942}
943
944static vnode_t vbivp;
945
946static int
947segvbi_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
948{
949 *vpp = &vbivp;
950 return (0);
951}
952
953static int
954segvbi_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
955{
956 return (0);
957}
958
959static void
960segvbi_dump(struct seg *seg)
961{}
962
963static int
964segvbi_pagelock(struct seg *seg, caddr_t addr, size_t len,
965 struct page ***ppp, enum lock_type type, enum seg_rw rw)
966{
967 return (ENOTSUP);
968}
969
970static int
971segvbi_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
972{
973 return (ENOTSUP);
974}
975
976static int
977segvbi_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
978{
979 return (ENODEV);
980}
981
982static lgrp_mem_policy_info_t *
983segvbi_getpolicy(struct seg *seg, caddr_t addr)
984{
985 return (NULL);
986}
987
988static int
989segvbi_capable(struct seg *seg, segcapability_t capability)
990{
991 return (0);
992}
993
994static struct seg_ops segvbi_ops = {
995 segvbi_dup,
996 segvbi_unmap,
997 segvbi_free,
998 segvbi_fault,
999 segvbi_faulta,
1000 segvbi_setprot,
1001 segvbi_checkprot,
1002 (int (*)())segvbi_kluster,
1003 (size_t (*)(struct seg *))NULL, /* swapout */
1004 segvbi_sync,
1005 segvbi_incore,
1006 segvbi_lockop,
1007 segvbi_getprot,
1008 segvbi_getoffset,
1009 segvbi_gettype,
1010 segvbi_getvp,
1011 segvbi_advise,
1012 segvbi_dump,
1013 segvbi_pagelock,
1014 segvbi_setpagesize,
1015 segvbi_getmemid,
1016 segvbi_getpolicy,
1017 segvbi_capable
1018};
1019
1020
1021
1022/*
1023 * Interfaces to inject physical pages into user address space
1024 * and later remove them.
1025 */
1026int
1027vbi_user_map(caddr_t *va, uint_t prot, uint64_t *palist, size_t len)
1028{
1029 struct as *as = VBIPROC()->p_as;
1030 struct segvbi_crargs args;
1031 int error = 0;
1032
1033 args.palist = palist;
1034 args.prot = prot;
1035 as_rangelock(as);
1036 map_addr(va, len, 0, 0, MAP_SHARED);
1037 if (*va != NULL)
1038 error = as_map(as, *va, len, segvbi_create, &args);
1039 else
1040 error = ENOMEM;
1041 if (error)
1042 cmn_err(CE_NOTE, "vbi_user_map() failed error=%d", error);
1043 as_rangeunlock(as);
1044 return (error);
1045}
1046
1047
1048/*
1049 * This is revision 2 of the interface.
1050 */
1051
1052struct vbi_cpu_watch {
1053 void (*vbi_cpu_func)(void *, int, int);
1054 void *vbi_cpu_arg;
1055};
1056
1057static int
1058vbi_watcher(cpu_setup_t state, int icpu, void *arg)
1059{
1060 vbi_cpu_watch_t *w = arg;
1061 int online;
1062
1063 if (state == CPU_ON)
1064 online = 1;
1065 else if (state == CPU_OFF)
1066 online = 0;
1067 else
1068 return (0);
1069 w->vbi_cpu_func(w->vbi_cpu_arg, icpu, online);
1070 return (0);
1071}
1072
1073vbi_cpu_watch_t *
1074vbi_watch_cpus(void (*func)(void *, int, int), void *arg, int current_too)
1075{
1076 int c;
1077 vbi_cpu_watch_t *w;
1078
1079 w = kmem_alloc(sizeof (*w), KM_SLEEP);
1080 w->vbi_cpu_func = func;
1081 w->vbi_cpu_arg = arg;
1082 mutex_enter(&cpu_lock);
1083 register_cpu_setup_func(vbi_watcher, w);
1084 if (current_too) {
1085 for (c = 0; c < ncpus; ++c) {
1086 if (cpu_is_online(cpu[c]))
1087 func(arg, c, 1);
1088 }
1089 }
1090 mutex_exit(&cpu_lock);
1091 return (w);
1092}
1093
1094void
1095vbi_ignore_cpus(vbi_cpu_watch_t *w)
1096{
1097 mutex_enter(&cpu_lock);
1098 unregister_cpu_setup_func(vbi_watcher, w);
1099 mutex_exit(&cpu_lock);
1100 kmem_free(w, sizeof (*w));
1101}
1102
1103/*
1104 * Simple timers are pretty much a pass through to the cyclic subsystem.
1105 */
1106struct vbi_stimer {
1107 cyc_handler_t s_handler;
1108 cyc_time_t s_fire_time;
1109 cyclic_id_t s_cyclic;
1110 uint64_t s_tick;
1111 void (*s_func)(void *, uint64_t);
1112 void *s_arg;
1113};
1114
1115static void
1116vbi_stimer_func(void *arg)
1117{
1118 vbi_stimer_t *t = arg;
1119 t->s_func(t->s_arg, ++t->s_tick);
1120}
1121
1122extern vbi_stimer_t *
1123vbi_stimer_begin(
1124 void (*func)(void *, uint64_t),
1125 void *arg,
1126 uint64_t when,
1127 uint64_t interval,
1128 int on_cpu)
1129{
1130 vbi_stimer_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1131
1132 t->s_handler.cyh_func = vbi_stimer_func;
1133 t->s_handler.cyh_arg = t;
1134 t->s_handler.cyh_level = CY_LOCK_LEVEL;
1135 t->s_tick = 0;
1136 t->s_func = func;
1137 t->s_arg = arg;
1138
1139 mutex_enter(&cpu_lock);
1140 if (on_cpu != VBI_ANY_CPU && !cpu_is_online(cpu[on_cpu])) {
1141 t = NULL;
1142 goto done;
1143 }
1144
1145 when += gethrtime();
1146 t->s_fire_time.cyt_when = when;
1147 if (interval == 0)
1148 t->s_fire_time.cyt_interval = INT64_MAX - when;
1149 else
1150 t->s_fire_time.cyt_interval = interval;
1151 t->s_cyclic = cyclic_add(&t->s_handler, &t->s_fire_time);
1152 if (on_cpu != VBI_ANY_CPU)
1153 cyclic_bind(t->s_cyclic, cpu[on_cpu], NULL);
1154done:
1155 mutex_exit(&cpu_lock);
1156 return (t);
1157}
1158
1159extern void
1160vbi_stimer_end(vbi_stimer_t *t)
1161{
1162 mutex_enter(&cpu_lock);
1163 cyclic_remove(t->s_cyclic);
1164 mutex_exit(&cpu_lock);
1165 kmem_free(t, sizeof (*t));
1166}
1167
1168/*
1169 * Global timers are more complicated. They include a counter on the callback,
1170 * that indicates the first call on a given cpu.
1171 */
1172struct vbi_gtimer {
1173 uint64_t *g_counters;
1174 void (*g_func)(void *, uint64_t);
1175 void *g_arg;
1176 uint64_t g_when;
1177 uint64_t g_interval;
1178 cyclic_id_t g_cyclic;
1179};
1180
1181static void
1182vbi_gtimer_func(void *arg)
1183{
1184 vbi_gtimer_t *t = arg;
1185 t->g_func(t->g_arg, ++t->g_counters[vbi_cpu_id()]);
1186}
1187
1188/*
1189 * Whenever a cpu is onlined, need to reset the g_counters[] for it to zero.
1190 */
1191static void
1192vbi_gtimer_online(void *arg, cpu_t *pcpu, cyc_handler_t *h, cyc_time_t *ct)
1193{
1194 vbi_gtimer_t *t = arg;
1195 hrtime_t now;
1196
1197 t->g_counters[pcpu->cpu_id] = 0;
1198 h->cyh_func = vbi_gtimer_func;
1199 h->cyh_arg = t;
1200 h->cyh_level = CY_LOCK_LEVEL;
1201 now = gethrtime();
1202 if (t->g_when < now)
1203 ct->cyt_when = now + t->g_interval / 2;
1204 else
1205 ct->cyt_when = t->g_when;
1206 ct->cyt_interval = t->g_interval;
1207}
1208
1209
1210vbi_gtimer_t *
1211vbi_gtimer_begin(
1212 void (*func)(void *, uint64_t),
1213 void *arg,
1214 uint64_t when,
1215 uint64_t interval)
1216{
1217 vbi_gtimer_t *t;
1218 cyc_omni_handler_t omni;
1219
1220 /*
1221 * one shot global timer is not supported yet.
1222 */
1223 if (interval == 0)
1224 return (NULL);
1225
1226 t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1227 t->g_counters = kmem_zalloc(ncpus * sizeof (uint64_t), KM_SLEEP);
1228 t->g_when = when + gethrtime();
1229 t->g_interval = interval;
1230 t->g_arg = arg;
1231 t->g_func = func;
1232 t->g_cyclic = CYCLIC_NONE;
1233
1234 omni.cyo_online = (void (*)(void *, cpu_t *, cyc_handler_t *, cyc_time_t *))vbi_gtimer_online;
1235 omni.cyo_offline = NULL;
1236 omni.cyo_arg = t;
1237
1238 mutex_enter(&cpu_lock);
1239 t->g_cyclic = cyclic_add_omni(&omni);
1240 mutex_exit(&cpu_lock);
1241 return (t);
1242}
1243
1244extern void
1245vbi_gtimer_end(vbi_gtimer_t *t)
1246{
1247 mutex_enter(&cpu_lock);
1248 cyclic_remove(t->g_cyclic);
1249 mutex_exit(&cpu_lock);
1250 kmem_free(t->g_counters, ncpus * sizeof (uint64_t));
1251 kmem_free(t, sizeof (*t));
1252}
1253
1254int
1255vbi_is_preempt_enabled(void)
1256{
1257 if (vbi_is_initialized) {
1258 char tpr = VBI_T_PREEMPT;
1259 return (tpr == 0);
1260 } else {
1261 cmn_err(CE_NOTE, "vbi_is_preempt_enabled: called without initializing vbi!\n");
1262 return 1;
1263 }
1264}
1265
1266void
1267vbi_poke_cpu(int c)
1268{
1269 if (c < ncpus)
1270 poke_cpu(c);
1271}
1272
1273/*
1274 * This is revision 5 of the interface.
1275 */
1276
1277void *
1278vbi_lowmem_alloc(uint64_t phys, size_t size)
1279{
1280 return (vbi_internal_alloc(&phys, size, PAGESIZE /* alignment */, 0 /* non-contiguous */));
1281}
1282
1283void
1284vbi_lowmem_free(void *va, size_t size)
1285{
1286 p_contig_free(va, size);
1287}
1288
1289/*
1290 * This is revision 6 of the interface.
1291 */
1292
1293int
1294vbi_is_preempt_pending(void)
1295{
1296 char crr = VBI_CPU_RUNRUN;
1297 char krr = VBI_CPU_KPRUNRUN;
1298 return crr != 0 || krr != 0;
1299}
1300
1301/*
1302 * This is revision 7 of the interface.
1303 */
1304
1305void *
1306vbi_phys_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
1307{
1308 return (vbi_internal_alloc(phys, size, alignment, contig));
1309}
1310
1311void
1312vbi_phys_free(void *va, size_t size)
1313{
1314 p_contig_free(va, size);
1315}
1316
1317
1318/*
1319 * This is revision 8 of the interface.
1320 */
1321static vnode_t vbipagevp;
1322
1323page_t **
1324vbi_pages_alloc(uint64_t *phys, size_t size)
1325{
1326 /*
1327 * the page freelist and cachelist both hold pages that are not mapped into any address space.
1328 * the cachelist is not really free pages but when memory is exhausted they'll be moved to the
1329 * free lists.
1330 * it's the total of the free+cache list that we see on the 'free' column in vmstat.
1331 */
1332 page_t **pp_pages = NULL;
1333 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1334
1335 /* reserve available memory for pages */
1336 int rc = page_resv(npages, KM_NOSLEEP);
1337 if (rc)
1338 {
1339 /* create the pages */
1340 rc = page_create_wait(npages, 0 /* flags */);
1341 if (rc)
1342 {
1343 /* alloc space for page_t pointer array */
1344 size_t pp_size = npages * sizeof(page_t *);
1345 pp_pages = kmem_zalloc(pp_size, KM_SLEEP);
1346 if (pp_pages)
1347 {
1348 /*
1349 * get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
1350 * we don't have the 'virtAddr' to which this memory may be mapped.
1351 */
1352 caddr_t virtAddr = NULL;
1353 for (int64_t i = 0; i < npages; i++, virtAddr += PAGESIZE)
1354 {
1355 /* get a page from the freelists */
1356 page_t *ppage = vbi_page_get_fromlist(1 /* freelist */, virtAddr, PAGESIZE);
1357 if (!ppage)
1358 {
1359 /* try from the cachelists */
1360 ppage = vbi_page_get_fromlist(2 /* cachelist */, virtAddr, PAGESIZE);
1361 if (!ppage)
1362 {
1363 /* damn */
1364 page_create_putback(npages - i);
1365 while (--i >= 0)
1366 page_free(pp_pages[i], 0 /* don't need, move to tail */);
1367 kmem_free(pp_pages, pp_size);
1368 page_unresv(npages);
1369 return NULL;
1370 }
1371
1372 /* remove association with the vnode for pages from the cachelist */
1373 if (!PP_ISAGED(ppage))
1374 page_hashout(ppage, NULL /* mutex */);
1375 }
1376
1377 PP_CLRFREE(ppage); /* Page is not free */
1378 PP_CLRAGED(ppage); /* Page is not hashed in */
1379 pp_pages[i] = ppage;
1380 }
1381
1382 /*
1383 * we now have the pages locked exclusively, before they are mapped in
1384 * we must downgrade the lock.
1385 */
1386 *phys = (uint64_t)page_pptonum(pp_pages[0]) << PAGESHIFT;
1387 return pp_pages;
1388 }
1389
1390 page_create_putback(npages);
1391 }
1392
1393 page_unresv(npages);
1394 }
1395
1396 return NULL;
1397}
1398
1399
1400void
1401vbi_pages_free(page_t **pp_pages, size_t size)
1402{
1403 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1404 size_t pp_size = npages * sizeof(page_t *);
1405 for (pgcnt_t i = 0; i < npages; i++)
1406 {
1407 /* we need to exclusive lock the pages before freeing them */
1408 int rc = page_tryupgrade(pp_pages[i]);
1409 if (!rc)
1410 {
1411 page_unlock(pp_pages[i]);
1412 while (!page_lock(pp_pages[i], SE_EXCL, NULL /* mutex */, P_RECLAIM))
1413 ;
1414 }
1415
1416 page_free(pp_pages[i], 0 /* don't need, move to tail */);
1417 }
1418
1419 kmem_free(pp_pages, pp_size);
1420 page_unresv(npages);
1421}
1422
1423
1424int
1425vbi_pages_premap(page_t **pp_pages, size_t size, uint64_t *pphysaddrs)
1426{
1427 if (!pphysaddrs)
1428 return -1;
1429
1430 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1431 for (pgcnt_t i = 0; i < npages; i++)
1432 {
1433 /*
1434 * prepare pages for mapping into kernel/user space, we need to
1435 * downgrade the exclusive page lock to a shared lock if the
1436 * pages is locked exclusively.
1437 */
1438 if (page_tryupgrade(pp_pages[i]) == 1)
1439 page_downgrade(pp_pages[i]);
1440 pphysaddrs[i] = vbi_page_to_pa(pp_pages, i);
1441 }
1442
1443 return 0;
1444}
1445
1446
1447uint64_t
1448vbi_page_to_pa(page_t **pp_pages, pgcnt_t i)
1449{
1450 pfn_t pfn = page_pptonum(pp_pages[i]);
1451 if (pfn == PFN_INVALID)
1452 panic("vbi_page_to_pa: page_pptonum() failed\n");
1453 return (uint64_t)pfn << PAGESHIFT;
1454}
1455
1456
1457static page_t *
1458vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize)
1459{
1460 /* pgsize only applies when using the freelist */
1461 seg_t kernseg;
1462 kernseg.s_as = &kas;
1463 page_t *ppage = NULL;
1464 if (freelist == 1)
1465 {
1466 ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1467 pgsize, 0 /* flags */, NULL /* local group */);
1468 if (!ppage && use_kflt)
1469 {
1470 ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1471 pgsize, 0x0200 /* PG_KFLT */, NULL /* local group */);
1472 }
1473 }
1474 else
1475 {
1476 /* cachelist */
1477 ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1478 0 /* flags */, NULL /* local group */);
1479 if (!ppage && use_kflt)
1480 {
1481 ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1482 0x0200 /* PG_KFLT */, NULL /* local group */);
1483 }
1484 }
1485 return ppage;
1486}
1487
1488
1489/*
1490 * Large page code.
1491 */
1492
1493page_t *
1494vbi_large_page_alloc(uint64_t *pphys, size_t pgsize)
1495{
1496 pgcnt_t const npages = pgsize >> PAGESHIFT;
1497 page_t *pproot, *pp, *pplist;
1498 pgcnt_t ipage;
1499 caddr_t vaddr;
1500 seg_t kernseg;
1501 int rc;
1502
1503 /*
1504 * Reserve available memory for a large page and create it.
1505 */
1506 rc = page_resv(npages, KM_NOSLEEP);
1507 if (!rc)
1508 return NULL;
1509
1510 rc = page_create_wait(npages, 0 /* flags */);
1511 if (!rc) {
1512 page_unresv(npages);
1513 return NULL;
1514 }
1515
1516 /*
1517 * Get a page off the free list. We set vaddr to 0 since we don't know
1518 * where the memory is going to be mapped.
1519 */
1520 vaddr = NULL;
1521 kernseg.s_as = &kas;
1522 pproot = vbi_page_get_fromlist(1 /* freelist */, vaddr, pgsize);
1523 if (!pproot)
1524 {
1525 page_create_putback(npages);
1526 page_unresv(npages);
1527 return NULL;
1528 }
1529 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1530
1531 /*
1532 * Mark all the sub-pages as non-free and not-hashed-in.
1533 * It is paramount that we destroy the list (before freeing it).
1534 */
1535 pplist = pproot;
1536 for (ipage = 0; ipage < npages; ipage++) {
1537 pp = pplist;
1538 AssertPtr(pp);
1539 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1540 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1541 page_sub(&pplist, pp);
1542 AssertMsg(PP_ISFREE(pp), ("%p\n", pp));
1543 AssertMsg(pp->p_szc == pproot->p_szc, ("%p - %d expected %d \n", pp, pp->p_szc, pproot->p_szc));
1544
1545 PP_CLRFREE(pp);
1546 PP_CLRAGED(pp);
1547 }
1548
1549 *pphys = (uint64_t)page_pptonum(pproot) << PAGESHIFT;
1550 AssertMsg(!(*pphys & (pgsize - 1)), ("%llx %zx\n", *pphys, pgsize));
1551 return pproot;
1552}
1553
1554void
1555vbi_large_page_free(page_t *pproot, size_t pgsize)
1556{
1557 pgcnt_t const npages = pgsize >> PAGESHIFT;
1558 pgcnt_t ipage;
1559
1560 Assert(page_get_pagecnt(pproot->p_szc) == npages);
1561 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1562
1563 /*
1564 * We need to exclusively lock the sub-pages before freeing
1565 * the large one.
1566 */
1567 for (ipage = 0; ipage < npages; ipage++) {
1568 page_t *pp = page_nextn(pproot, ipage);
1569 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1570 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1571 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1572
1573 int rc = page_tryupgrade(pp);
1574 if (!rc) {
1575 page_unlock(pp);
1576 while (!page_lock(pp, SE_EXCL, NULL /* mutex */, P_RECLAIM)) {
1577 /*nothing*/;
1578 }
1579 }
1580 }
1581
1582 /*
1583 * Free the large page and unreserve the memory.
1584 */
1585 page_free_pages(pproot);
1586 page_unresv(npages);
1587}
1588
1589int
1590vbi_large_page_premap(page_t *pproot, size_t pgsize)
1591{
1592 pgcnt_t const npages = pgsize >> PAGESHIFT;
1593 pgcnt_t ipage;
1594
1595 Assert(page_get_pagecnt(pproot->p_szc) == npages);
1596 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1597
1598 /*
1599 * We need to downgrade the sub-pages from exclusive to shared locking
1600 * because otherwise we cannot <you go figure>.
1601 */
1602 for (ipage = 0; ipage < npages; ipage++) {
1603 page_t *pp = page_nextn(pproot, ipage);
1604 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1605 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1606 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1607
1608 if (page_tryupgrade(pp) == 1)
1609 page_downgrade(pp);
1610 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1611 }
1612
1613 return 0;
1614}
1615
1616
1617/*
1618 * As more functions are added, they should start with a comment indicating
1619 * the revision and above this point in the file and the revision level should
1620 * be increased. Also change vbi_modlmisc at the top of the file.
1621 *
1622 * NOTE! We'll start care about this if anything in here ever makes it into
1623 * the solaris kernel proper.
1624 */
1625uint_t vbi_revision_level = 9;
1626
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette