VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/solaris/vbi/i86pc/os/vbi.c@ 39208

Last change on this file since 39208 was 39208, checked in by vboxsync, 13 years ago

Solaris/vbi Solaris/NetFlt: close ctf file opens.

  • Property svn:eol-style set to native
File size: 34.0 KB
Line 
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010-2011 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Private interfaces for VirtualBox access to Solaris kernel internal
28 * facilities.
29 *
30 * See sys/vbi.h for what each function does.
31 */
32
33#include <sys/kmem.h>
34#include <sys/types.h>
35#include <sys/mman.h>
36#include <sys/thread.h>
37#include <sys/mutex.h>
38#include <sys/condvar.h>
39#include <sys/sdt.h>
40#include <sys/schedctl.h>
41#include <sys/time.h>
42#include <sys/sysmacros.h>
43#include <sys/cmn_err.h>
44#include <sys/vmsystm.h>
45#include <sys/cyclic.h>
46#include <sys/class.h>
47#include <sys/cpuvar.h>
48#include <sys/kobj.h>
49#include <sys/x_call.h>
50#include <sys/x86_archext.h>
51#include <vm/hat.h>
52#include <vm/seg_vn.h>
53#include <vm/seg_kmem.h>
54#include <sys/ddi.h>
55#include <sys/sunddi.h>
56#include <sys/modctl.h>
57#include <sys/machparam.h>
58#include <sys/utsname.h>
59#include <sys/ctf_api.h>
60
61#include <iprt/assert.h>
62
63#include "vbi.h"
64
65#define VBIPROC() ((proc_t *)vbi_proc())
66
67/*
68 * We have to use dl_lookup to find contig_free().
69 */
70extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int);
71extern void contig_free(void *, size_t);
72#pragma weak contig_free
73static void (*p_contig_free)(void *, size_t) = contig_free;
74
75/*
76 * We have to use dl_lookup to find kflt_init() and thereby use kernel pages from
77 * the freelists if we no longer get user pages from freelist and cachelists.
78 */
79/* Introduced in v9 */
80static int use_kflt = 0;
81static page_t *vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize);
82
83
84/*
85 * Workarounds for running on old versions of solaris with different cross call
86 * interfaces. If we find xc_init_cpu() in the kernel, then just use the defined
87 * interfaces for xc_call() from the include file where the xc_call()
88 * interfaces just takes a pointer to a ulong_t array. The array must be long
89 * enough to hold "ncpus" bits at runtime.
90
91 * The reason for the hacks is that using the type "cpuset_t" is pretty much
92 * impossible from code built outside the Solaris source repository that wants
93 * to run on multiple releases of Solaris.
94 *
95 * For old style xc_call()s, 32 bit solaris and older 64 bit versions use
96 * "ulong_t" as cpuset_t.
97 *
98 * Later versions of 64 bit Solaris used: struct {ulong_t words[x];}
99 * where "x" depends on NCPU.
100 *
101 * We detect the difference in 64 bit support by checking the kernel value of
102 * max_cpuid, which always holds the compiled value of NCPU - 1.
103 *
104 * If Solaris increases NCPU to more than 256, this module will continue
105 * to work on all versions of Solaris as long as the number of installed
106 * CPUs in the machine is <= VBI_NCPU. If VBI_NCPU is increased, this code
107 * has to be re-written some to provide compatibility with older Solaris which
108 * expects cpuset_t to be based on NCPU==256 -- or we discontinue support
109 * of old Nevada/S10.
110 */
111static int use_old = 0;
112static int use_old_with_ulong = 0;
113static void (*p_xc_call)() = (void (*)())xc_call;
114
115#define VBI_NCPU 256
116#define VBI_SET_WORDS (VBI_NCPU / (sizeof (ulong_t) * 8))
117typedef struct vbi_cpuset {
118 ulong_t words[VBI_SET_WORDS];
119} vbi_cpuset_t;
120#define X_CALL_HIPRI (2) /* for old Solaris interface */
121
122/*
123 * module linkage stuff
124 */
125#if 0
126static struct modlmisc vbi_modlmisc = {
127 &mod_miscops, "VirtualBox Interfaces V8"
128};
129
130static struct modlinkage vbi_modlinkage = {
131 MODREV_1, { (void *)&vbi_modlmisc, NULL }
132};
133#endif
134
135extern uintptr_t kernelbase;
136#define IS_KERNEL(v) ((uintptr_t)(v) >= kernelbase)
137
138#if 0
139static int vbi_verbose = 0;
140
141#define VBI_VERBOSE(msg) {if (vbi_verbose) cmn_err(CE_WARN, msg);}
142#endif
143
144/* Introduced in v8 */
145static int vbi_is_initialized = 0;
146
147/* Which offsets will be used */
148static int off_cpu_runrun = -1;
149static int off_cpu_kprunrun = -1;
150static int off_t_preempt = -1;
151
152#define VBI_T_PREEMPT (*((char *)curthread + off_t_preempt))
153#define VBI_CPU_KPRUNRUN (*((char *)CPU + off_cpu_kprunrun))
154#define VBI_CPU_RUNRUN (*((char *)CPU + off_cpu_runrun))
155
156#undef kpreempt_disable
157#undef kpreempt_enable
158
159#define VBI_PREEMPT_DISABLE() \
160 { \
161 VBI_T_PREEMPT++; \
162 ASSERT(VBI_T_PREEMPT >= 1); \
163 }
164#define VBI_PREEMPT_ENABLE() \
165 { \
166 ASSERT(VBI_T_PREEMPT >= 1); \
167 if (--VBI_T_PREEMPT == 0 && \
168 VBI_CPU_RUNRUN) \
169 kpreempt(KPREEMPT_SYNC); \
170 }
171
172/* End of v6 intro */
173
174#if 0
175int
176_init(void)
177{
178 int err = vbi_init();
179 if (!err)
180 err = mod_install(&vbi_modlinkage);
181 return (err);
182}
183#endif
184
185static int
186vbi_get_ctf_member_offset(ctf_file_t *ctfp, const char *structname, const char *membername, int *offset)
187{
188 AssertReturn(ctfp, CTF_ERR);
189 AssertReturn(structname, CTF_ERR);
190 AssertReturn(membername, CTF_ERR);
191 AssertReturn(offset, CTF_ERR);
192
193 ctf_id_t typeident = ctf_lookup_by_name(ctfp, structname);
194 if (typeident != CTF_ERR)
195 {
196 ctf_membinfo_t memberinfo;
197 bzero(&memberinfo, sizeof(memberinfo));
198 if (ctf_member_info(ctfp, typeident, membername, &memberinfo) != CTF_ERR)
199 {
200 *offset = (memberinfo.ctm_offset >> 3);
201 cmn_err(CE_CONT, "!%s::%s at %d\n", structname, membername, *offset);
202 return (0);
203 }
204 else
205 cmn_err(CE_NOTE, "ctf_member_info failed for struct %s member %s\n", structname, membername);
206 }
207 else
208 cmn_err(CE_NOTE, "ctf_lookup_by_name failed for struct %s\n", structname);
209
210 return (CTF_ERR);
211}
212
213
214int
215vbi_init(void)
216{
217 /*
218 * Check to see if this version of virtualbox interface module will work
219 * with the kernel.
220 */
221 if (kobj_getsymvalue("xc_init_cpu", 1) != NULL) {
222 /*
223 * Our bit vector storage needs to be large enough for the
224 * actual number of CPUs running in the system.
225 */
226 if (ncpus > VBI_NCPU) {
227 cmn_err(CE_NOTE, "cpu count mismatch.\n");
228 return (EINVAL);
229 }
230 } else {
231 use_old = 1;
232 if (max_cpuid + 1 == sizeof(ulong_t) * 8)
233 use_old_with_ulong = 1;
234 else if (max_cpuid + 1 != VBI_NCPU)
235 {
236 cmn_err(CE_NOTE, "cpuset_t size mismatch. probably too old a kernel.\n");
237 return (EINVAL); /* cpuset_t size mismatch */
238 }
239 }
240
241 /*
242 * In older versions of Solaris contig_free() is a static routine.
243 */
244 if (p_contig_free == NULL) {
245 p_contig_free = (void (*)(void *, size_t))
246 kobj_getsymvalue("contig_free", 1);
247 if (p_contig_free == NULL) {
248 cmn_err(CE_NOTE, "contig_free() not found in kernel\n");
249 return (EINVAL);
250 }
251 }
252
253 /*
254 * Use kernel page freelist flags to get pages from kernel page freelists
255 * while allocating physical pages, once the userpages are exhausted.
256 * snv_161+, see @bugref{5632}.
257 */
258 if (kobj_getsymvalue("kflt_init", 1) != NULL)
259 {
260 int *p_kflt_disable = (int*)kobj_getsymvalue("kflt_disable", 1); /* amd64 only, on 32-bit kflt's are disabled. */
261 if (p_kflt_disable && *p_kflt_disable == 0)
262 {
263 use_kflt = 1;
264 }
265 }
266
267 /*
268 * CTF probing for fluid, private members.
269 */
270 int err = 0;
271 modctl_t *genunix_modctl = mod_hold_by_name("genunix");
272 if (genunix_modctl)
273 {
274 ctf_file_t *ctfp = ctf_modopen(genunix_modctl->mod_mp, &err);
275 if (ctfp)
276 {
277 do {
278 err = vbi_get_ctf_member_offset(ctfp, "kthread_t", "t_preempt", &off_t_preempt); AssertBreak(!err);
279 err = vbi_get_ctf_member_offset(ctfp, "cpu_t", "cpu_runrun", &off_cpu_runrun); AssertBreak(!err);
280 err = vbi_get_ctf_member_offset(ctfp, "cpu_t", "cpu_kprunrun", &off_cpu_kprunrun); AssertBreak(!err);
281 } while (0);
282 ctf_close(ctfp);
283 }
284
285 mod_release_mod(genunix_modctl);
286 }
287 else
288 {
289 cmn_err(CE_NOTE, "failed to open module genunix.\n");
290 err = EINVAL;
291 }
292
293 if (err)
294 return (EINVAL);
295
296 vbi_is_initialized = 1;
297
298 return (0);
299}
300
301#if 0
302int
303_fini(void)
304{
305 int err = mod_remove(&vbi_modlinkage);
306 if (err != 0)
307 return (err);
308
309 return (0);
310}
311
312int
313_info(struct modinfo *modinfop)
314{
315 return (mod_info(&vbi_modlinkage, modinfop));
316}
317#endif
318
319
320static ddi_dma_attr_t base_attr = {
321 DMA_ATTR_V0, /* Version Number */
322 (uint64_t)0, /* lower limit */
323 (uint64_t)0, /* high limit */
324 (uint64_t)0xffffffff, /* counter limit */
325 (uint64_t)PAGESIZE, /* pagesize alignment */
326 (uint64_t)PAGESIZE, /* pagesize burst size */
327 (uint64_t)PAGESIZE, /* pagesize effective DMA size */
328 (uint64_t)0xffffffff, /* max DMA xfer size */
329 (uint64_t)0xffffffff, /* segment boundary */
330 1, /* list length (1 for contiguous) */
331 1, /* device granularity */
332 0 /* bus-specific flags */
333};
334
335static void *
336vbi_internal_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
337{
338 ddi_dma_attr_t attr;
339 pfn_t pfn;
340 void *ptr;
341 uint_t npages;
342
343 if ((size & PAGEOFFSET) != 0)
344 return (NULL);
345 npages = (size + PAGESIZE - 1) >> PAGESHIFT;
346 if (npages == 0)
347 return (NULL);
348
349 attr = base_attr;
350 attr.dma_attr_addr_hi = *phys;
351 attr.dma_attr_align = alignment;
352 if (!contig)
353 attr.dma_attr_sgllen = npages;
354 ptr = contig_alloc(size, &attr, PAGESIZE, 1);
355
356 if (ptr == NULL) {
357 cmn_err(CE_NOTE, "vbi_internal_alloc() failure for %lu bytes contig=%d", size, contig);
358 return (NULL);
359 }
360
361 pfn = hat_getpfnum(kas.a_hat, (caddr_t)ptr);
362 if (pfn == PFN_INVALID)
363 panic("vbi_contig_alloc(): hat_getpfnum() failed\n");
364 *phys = (uint64_t)pfn << PAGESHIFT;
365 return (ptr);
366}
367
368void *
369vbi_contig_alloc(uint64_t *phys, size_t size)
370{
371 /* Obsolete */
372 return (vbi_internal_alloc(phys, size, PAGESIZE /* alignment */, 1 /* contiguous */));
373}
374
375void
376vbi_contig_free(void *va, size_t size)
377{
378 /* Obsolete */
379 p_contig_free(va, size);
380}
381
382void *
383vbi_kernel_map(uint64_t pa, size_t size, uint_t prot)
384{
385 caddr_t va;
386
387 if ((pa & PAGEOFFSET) || (size & PAGEOFFSET)) {
388 cmn_err(CE_NOTE, "vbi_kernel_map() bad pa (0x%lx) or size (%lu)", pa, size);
389 return (NULL);
390 }
391
392 va = vmem_alloc(heap_arena, size, VM_SLEEP);
393
394 hat_devload(kas.a_hat, va, size, (pfn_t)(pa >> PAGESHIFT),
395 prot, HAT_LOAD | HAT_LOAD_LOCK | HAT_UNORDERED_OK);
396
397 return (va);
398}
399
400void
401vbi_unmap(void *va, size_t size)
402{
403 if (IS_KERNEL(va)) {
404 hat_unload(kas.a_hat, va, size, HAT_UNLOAD | HAT_UNLOAD_UNLOCK);
405 vmem_free(heap_arena, va, size);
406 } else {
407 struct as *as = VBIPROC()->p_as;
408
409 as_rangelock(as);
410 (void) as_unmap(as, va, size);
411 as_rangeunlock(as);
412 }
413}
414
415void *
416vbi_curthread(void)
417{
418 return (curthread);
419}
420
421int
422vbi_yield(void)
423{
424 int rv = 0;
425
426 vbi_preempt_disable();
427
428 char tpr = VBI_T_PREEMPT;
429 char kpr = VBI_CPU_KPRUNRUN;
430 if (tpr == 1 && kpr)
431 rv = 1;
432
433 vbi_preempt_enable();
434 return (rv);
435}
436
437uint64_t
438vbi_timer_granularity(void)
439{
440 return (nsec_per_tick);
441}
442
443typedef struct vbi_timer {
444 cyc_handler_t vbi_handler;
445 cyclic_id_t vbi_cyclic;
446 uint64_t vbi_interval;
447 void (*vbi_func)();
448 void *vbi_arg1;
449 void *vbi_arg2;
450} vbi_timer_t;
451
452static void
453vbi_timer_callback(void *arg)
454{
455 vbi_timer_t *t = arg;
456
457 if (t->vbi_interval == 0)
458 vbi_timer_stop(arg);
459 t->vbi_func(t->vbi_arg1, t->vbi_arg2);
460}
461
462void *
463vbi_timer_create(void *callback, void *arg1, void *arg2, uint64_t interval)
464{
465 vbi_timer_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
466
467 t->vbi_func = (void (*)())callback;
468 t->vbi_arg1 = arg1;
469 t->vbi_arg2 = arg2;
470 t->vbi_handler.cyh_func = vbi_timer_callback;
471 t->vbi_handler.cyh_arg = (void *)t;
472 t->vbi_handler.cyh_level = CY_LOCK_LEVEL;
473 t->vbi_cyclic = CYCLIC_NONE;
474 t->vbi_interval = interval;
475 return (t);
476}
477
478void
479vbi_timer_destroy(void *timer)
480{
481 vbi_timer_t *t = timer;
482 if (t != NULL) {
483 vbi_timer_stop(timer);
484 kmem_free(t, sizeof (*t));
485 }
486}
487
488void
489vbi_timer_start(void *timer, uint64_t when)
490{
491 vbi_timer_t *t = timer;
492 cyc_time_t fire_time;
493 uint64_t interval = t->vbi_interval;
494
495 mutex_enter(&cpu_lock);
496 when += gethrtime();
497 fire_time.cyt_when = when;
498 if (interval == 0)
499 fire_time.cyt_interval = when;
500 else
501 fire_time.cyt_interval = interval;
502 t->vbi_cyclic = cyclic_add(&t->vbi_handler, &fire_time);
503 mutex_exit(&cpu_lock);
504}
505
506void
507vbi_timer_stop(void *timer)
508{
509 vbi_timer_t *t = timer;
510
511 if (t->vbi_cyclic == CYCLIC_NONE)
512 return;
513 mutex_enter(&cpu_lock);
514 if (t->vbi_cyclic != CYCLIC_NONE) {
515 cyclic_remove(t->vbi_cyclic);
516 t->vbi_cyclic = CYCLIC_NONE;
517 }
518 mutex_exit(&cpu_lock);
519}
520
521uint64_t
522vbi_tod(void)
523{
524 timestruc_t ts;
525
526 mutex_enter(&tod_lock);
527 ts = tod_get();
528 mutex_exit(&tod_lock);
529 return ((uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec);
530}
531
532
533void *
534vbi_proc(void)
535{
536 proc_t *p;
537 drv_getparm(UPROCP, &p);
538 return (p);
539}
540
541void
542vbi_set_priority(void *thread, int priority)
543{
544 kthread_t *t = thread;
545
546 thread_lock(t);
547 (void) thread_change_pri(t, priority, 0);
548 thread_unlock(t);
549}
550
551void *
552vbi_thread_create(void (*func)(void *), void *arg, size_t len, int priority)
553{
554 kthread_t *t;
555
556 t = thread_create(NULL, NULL, (void (*)())func, arg, len,
557 VBIPROC(), TS_RUN, priority);
558 return (t);
559}
560
561void
562vbi_thread_exit(void)
563{
564 thread_exit();
565}
566
567void *
568vbi_text_alloc(size_t size)
569{
570 return (segkmem_alloc(heaptext_arena, size, KM_SLEEP));
571}
572
573void
574vbi_text_free(void *va, size_t size)
575{
576 segkmem_free(heaptext_arena, va, size);
577}
578
579int
580vbi_cpu_id(void)
581{
582 return (CPU->cpu_id);
583}
584
585int
586vbi_max_cpu_id(void)
587{
588 return (max_cpuid);
589}
590
591int
592vbi_cpu_maxcount(void)
593{
594 return (max_cpuid + 1);
595}
596
597int
598vbi_cpu_count(void)
599{
600 return (ncpus);
601}
602
603int
604vbi_cpu_online(int c)
605{
606 int x;
607
608 mutex_enter(&cpu_lock);
609 x = cpu_is_online(cpu[c]);
610 mutex_exit(&cpu_lock);
611 return (x);
612}
613
614void
615vbi_preempt_disable(void)
616{
617 VBI_PREEMPT_DISABLE();
618}
619
620void
621vbi_preempt_enable(void)
622{
623 VBI_PREEMPT_ENABLE();
624}
625
626void
627vbi_execute_on_all(void *func, void *arg)
628{
629 vbi_cpuset_t set;
630 int i;
631
632 for (i = 0; i < VBI_SET_WORDS; ++i)
633 set.words[i] = (ulong_t)-1L;
634 if (use_old) {
635 if (use_old_with_ulong) {
636 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
637 set.words[0], (xc_func_t)func);
638 } else {
639 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
640 set, (xc_func_t)func);
641 }
642 } else {
643 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
644 }
645}
646
647void
648vbi_execute_on_others(void *func, void *arg)
649{
650 vbi_cpuset_t set;
651 int i;
652
653 for (i = 0; i < VBI_SET_WORDS; ++i)
654 set.words[i] = (ulong_t)-1L;
655 BT_CLEAR(set.words, vbi_cpu_id());
656 if (use_old) {
657 if (use_old_with_ulong) {
658 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
659 set.words[0], (xc_func_t)func);
660 } else {
661 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
662 set, (xc_func_t)func);
663 }
664 } else {
665 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
666 }
667}
668
669void
670vbi_execute_on_one(void *func, void *arg, int c)
671{
672 vbi_cpuset_t set;
673 int i;
674
675 for (i = 0; i < VBI_SET_WORDS; ++i)
676 set.words[i] = 0;
677 BT_SET(set.words, c);
678 if (use_old) {
679 if (use_old_with_ulong) {
680 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
681 set.words[0], (xc_func_t)func);
682 } else {
683 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
684 set, (xc_func_t)func);
685 }
686 } else {
687 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
688 }
689}
690
691int
692vbi_lock_va(void *addr, size_t len, int access, void **handle)
693{
694 faultcode_t err;
695
696 /*
697 * kernel mappings on x86 are always locked, so only handle user.
698 */
699 *handle = NULL;
700 if (!IS_KERNEL(addr)) {
701 err = as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
702 (caddr_t)addr, len, F_SOFTLOCK, access);
703 if (err != 0) {
704 cmn_err(CE_NOTE, "vbi_lock_va() failed to lock");
705 return (-1);
706 }
707 }
708 return (0);
709}
710
711/*ARGSUSED*/
712void
713vbi_unlock_va(void *addr, size_t len, int access, void *handle)
714{
715 if (!IS_KERNEL(addr))
716 as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
717 (caddr_t)addr, len, F_SOFTUNLOCK, access);
718}
719
720uint64_t
721vbi_va_to_pa(void *addr)
722{
723 struct hat *hat;
724 pfn_t pfn;
725 uintptr_t v = (uintptr_t)addr;
726
727 if (IS_KERNEL(v))
728 hat = kas.a_hat;
729 else
730 hat = VBIPROC()->p_as->a_hat;
731 pfn = hat_getpfnum(hat, (caddr_t)(v & PAGEMASK));
732 if (pfn == PFN_INVALID)
733 return (-(uint64_t)1);
734 return (((uint64_t)pfn << PAGESHIFT) | (v & PAGEOFFSET));
735}
736
737
738struct segvbi_crargs {
739 uint64_t *palist;
740 uint_t prot;
741};
742
743struct segvbi_data {
744 uint_t prot;
745};
746
747static struct seg_ops segvbi_ops;
748
749static int
750segvbi_create(struct seg *seg, void *args)
751{
752 struct segvbi_crargs *a = args;
753 struct segvbi_data *data;
754 struct as *as = seg->s_as;
755 caddr_t va;
756 ulong_t pgcnt;
757 ulong_t p;
758
759 hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
760 data = kmem_zalloc(sizeof (*data), KM_SLEEP);
761 data->prot = a->prot | PROT_USER;
762
763 seg->s_ops = &segvbi_ops;
764 seg->s_data = data;
765
766 /*
767 * now load locked mappings to the pages
768 */
769 va = seg->s_base;
770 pgcnt = (seg->s_size + PAGESIZE - 1) >> PAGESHIFT;
771 for (p = 0; p < pgcnt; ++p, va += PAGESIZE) {
772 hat_devload(as->a_hat, va,
773 PAGESIZE, a->palist[p] >> PAGESHIFT,
774 data->prot | HAT_UNORDERED_OK, HAT_LOAD | HAT_LOAD_LOCK);
775 }
776
777 return (0);
778}
779
780/*
781 * Duplicate a seg and return new segment in newseg.
782 */
783static int
784segvbi_dup(struct seg *seg, struct seg *newseg)
785{
786 struct segvbi_data *data = seg->s_data;
787 struct segvbi_data *ndata;
788
789 ndata = kmem_zalloc(sizeof (*data), KM_SLEEP);
790 ndata->prot = data->prot;
791 newseg->s_ops = &segvbi_ops;
792 newseg->s_data = ndata;
793
794 return (0);
795}
796
797static int
798segvbi_unmap(struct seg *seg, caddr_t addr, size_t len)
799{
800 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
801 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
802 panic("segvbi_unmap");
803
804 if (addr != seg->s_base || len != seg->s_size)
805 return (ENOTSUP);
806
807 hat_unload(seg->s_as->a_hat, addr, len,
808 HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
809
810 seg_free(seg);
811 return (0);
812}
813
814static void
815segvbi_free(struct seg *seg)
816{
817 struct segvbi_data *data = seg->s_data;
818 kmem_free(data, sizeof (*data));
819}
820
821/*
822 * We would demand fault if the (u)read() path would SEGOP_FAULT()
823 * on buffers mapped in via vbi_user_map() i.e. prefaults before DMA.
824 * Don't fail in such case where we're called directly, see #5047.
825 */
826static int
827segvbi_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
828 enum fault_type type, enum seg_rw rw)
829{
830 return (0);
831}
832
833static int
834segvbi_faulta(struct seg *seg, caddr_t addr)
835{
836 return (0);
837}
838
839static int
840segvbi_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
841{
842 return (EACCES);
843}
844
845static int
846segvbi_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
847{
848 return (EINVAL);
849}
850
851static int
852segvbi_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
853{
854 return (-1);
855}
856
857static int
858segvbi_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
859{
860 return (0);
861}
862
863static size_t
864segvbi_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
865{
866 size_t v;
867
868 for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
869 len -= PAGESIZE, v += PAGESIZE)
870 *vec++ = 1;
871 return (v);
872}
873
874static int
875segvbi_lockop(struct seg *seg, caddr_t addr,
876 size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
877{
878 return (0);
879}
880
881static int
882segvbi_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
883{
884 struct segvbi_data *data = seg->s_data;
885 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
886 if (pgno != 0)
887 {
888 do
889 {
890 pgno--;
891 protv[pgno] = data->prot;
892 } while (pgno != 0);
893 }
894 return (0);
895}
896
897static u_offset_t
898segvbi_getoffset(struct seg *seg, caddr_t addr)
899{
900 return ((uintptr_t)addr - (uintptr_t)seg->s_base);
901}
902
903static int
904segvbi_gettype(struct seg *seg, caddr_t addr)
905{
906 return (MAP_SHARED);
907}
908
909static vnode_t vbivp;
910
911static int
912segvbi_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
913{
914 *vpp = &vbivp;
915 return (0);
916}
917
918static int
919segvbi_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
920{
921 return (0);
922}
923
924static void
925segvbi_dump(struct seg *seg)
926{}
927
928static int
929segvbi_pagelock(struct seg *seg, caddr_t addr, size_t len,
930 struct page ***ppp, enum lock_type type, enum seg_rw rw)
931{
932 return (ENOTSUP);
933}
934
935static int
936segvbi_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
937{
938 return (ENOTSUP);
939}
940
941static int
942segvbi_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
943{
944 return (ENODEV);
945}
946
947static lgrp_mem_policy_info_t *
948segvbi_getpolicy(struct seg *seg, caddr_t addr)
949{
950 return (NULL);
951}
952
953static int
954segvbi_capable(struct seg *seg, segcapability_t capability)
955{
956 return (0);
957}
958
959static struct seg_ops segvbi_ops = {
960 segvbi_dup,
961 segvbi_unmap,
962 segvbi_free,
963 segvbi_fault,
964 segvbi_faulta,
965 segvbi_setprot,
966 segvbi_checkprot,
967 (int (*)())segvbi_kluster,
968 (size_t (*)(struct seg *))NULL, /* swapout */
969 segvbi_sync,
970 segvbi_incore,
971 segvbi_lockop,
972 segvbi_getprot,
973 segvbi_getoffset,
974 segvbi_gettype,
975 segvbi_getvp,
976 segvbi_advise,
977 segvbi_dump,
978 segvbi_pagelock,
979 segvbi_setpagesize,
980 segvbi_getmemid,
981 segvbi_getpolicy,
982 segvbi_capable
983};
984
985
986
987/*
988 * Interfaces to inject physical pages into user address space
989 * and later remove them.
990 */
991int
992vbi_user_map(caddr_t *va, uint_t prot, uint64_t *palist, size_t len)
993{
994 struct as *as = VBIPROC()->p_as;
995 struct segvbi_crargs args;
996 int error = 0;
997
998 args.palist = palist;
999 args.prot = prot;
1000 as_rangelock(as);
1001 map_addr(va, len, 0, 0, MAP_SHARED);
1002 if (*va != NULL)
1003 error = as_map(as, *va, len, segvbi_create, &args);
1004 else
1005 error = ENOMEM;
1006 if (error)
1007 cmn_err(CE_NOTE, "vbi_user_map() failed error=%d", error);
1008 as_rangeunlock(as);
1009 return (error);
1010}
1011
1012
1013/*
1014 * This is revision 2 of the interface.
1015 */
1016
1017struct vbi_cpu_watch {
1018 void (*vbi_cpu_func)(void *, int, int);
1019 void *vbi_cpu_arg;
1020};
1021
1022static int
1023vbi_watcher(cpu_setup_t state, int icpu, void *arg)
1024{
1025 vbi_cpu_watch_t *w = arg;
1026 int online;
1027
1028 if (state == CPU_ON)
1029 online = 1;
1030 else if (state == CPU_OFF)
1031 online = 0;
1032 else
1033 return (0);
1034 w->vbi_cpu_func(w->vbi_cpu_arg, icpu, online);
1035 return (0);
1036}
1037
1038vbi_cpu_watch_t *
1039vbi_watch_cpus(void (*func)(void *, int, int), void *arg, int current_too)
1040{
1041 int c;
1042 vbi_cpu_watch_t *w;
1043
1044 w = kmem_alloc(sizeof (*w), KM_SLEEP);
1045 w->vbi_cpu_func = func;
1046 w->vbi_cpu_arg = arg;
1047 mutex_enter(&cpu_lock);
1048 register_cpu_setup_func(vbi_watcher, w);
1049 if (current_too) {
1050 for (c = 0; c < ncpus; ++c) {
1051 if (cpu_is_online(cpu[c]))
1052 func(arg, c, 1);
1053 }
1054 }
1055 mutex_exit(&cpu_lock);
1056 return (w);
1057}
1058
1059void
1060vbi_ignore_cpus(vbi_cpu_watch_t *w)
1061{
1062 mutex_enter(&cpu_lock);
1063 unregister_cpu_setup_func(vbi_watcher, w);
1064 mutex_exit(&cpu_lock);
1065 kmem_free(w, sizeof (*w));
1066}
1067
1068/*
1069 * Simple timers are pretty much a pass through to the cyclic subsystem.
1070 */
1071struct vbi_stimer {
1072 cyc_handler_t s_handler;
1073 cyc_time_t s_fire_time;
1074 cyclic_id_t s_cyclic;
1075 uint64_t s_tick;
1076 void (*s_func)(void *, uint64_t);
1077 void *s_arg;
1078};
1079
1080static void
1081vbi_stimer_func(void *arg)
1082{
1083 vbi_stimer_t *t = arg;
1084 t->s_func(t->s_arg, ++t->s_tick);
1085}
1086
1087extern vbi_stimer_t *
1088vbi_stimer_begin(
1089 void (*func)(void *, uint64_t),
1090 void *arg,
1091 uint64_t when,
1092 uint64_t interval,
1093 int on_cpu)
1094{
1095 vbi_stimer_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1096
1097 t->s_handler.cyh_func = vbi_stimer_func;
1098 t->s_handler.cyh_arg = t;
1099 t->s_handler.cyh_level = CY_LOCK_LEVEL;
1100 t->s_tick = 0;
1101 t->s_func = func;
1102 t->s_arg = arg;
1103
1104 mutex_enter(&cpu_lock);
1105 if (on_cpu != VBI_ANY_CPU && !cpu_is_online(cpu[on_cpu])) {
1106 t = NULL;
1107 goto done;
1108 }
1109
1110 when += gethrtime();
1111 t->s_fire_time.cyt_when = when;
1112 if (interval == 0)
1113 t->s_fire_time.cyt_interval = INT64_MAX - when;
1114 else
1115 t->s_fire_time.cyt_interval = interval;
1116 t->s_cyclic = cyclic_add(&t->s_handler, &t->s_fire_time);
1117 if (on_cpu != VBI_ANY_CPU)
1118 cyclic_bind(t->s_cyclic, cpu[on_cpu], NULL);
1119done:
1120 mutex_exit(&cpu_lock);
1121 return (t);
1122}
1123
1124extern void
1125vbi_stimer_end(vbi_stimer_t *t)
1126{
1127 mutex_enter(&cpu_lock);
1128 cyclic_remove(t->s_cyclic);
1129 mutex_exit(&cpu_lock);
1130 kmem_free(t, sizeof (*t));
1131}
1132
1133/*
1134 * Global timers are more complicated. They include a counter on the callback,
1135 * that indicates the first call on a given cpu.
1136 */
1137struct vbi_gtimer {
1138 uint64_t *g_counters;
1139 void (*g_func)(void *, uint64_t);
1140 void *g_arg;
1141 uint64_t g_when;
1142 uint64_t g_interval;
1143 cyclic_id_t g_cyclic;
1144};
1145
1146static void
1147vbi_gtimer_func(void *arg)
1148{
1149 vbi_gtimer_t *t = arg;
1150 t->g_func(t->g_arg, ++t->g_counters[vbi_cpu_id()]);
1151}
1152
1153/*
1154 * Whenever a cpu is onlined, need to reset the g_counters[] for it to zero.
1155 */
1156static void
1157vbi_gtimer_online(void *arg, cpu_t *pcpu, cyc_handler_t *h, cyc_time_t *ct)
1158{
1159 vbi_gtimer_t *t = arg;
1160 hrtime_t now;
1161
1162 t->g_counters[pcpu->cpu_id] = 0;
1163 h->cyh_func = vbi_gtimer_func;
1164 h->cyh_arg = t;
1165 h->cyh_level = CY_LOCK_LEVEL;
1166 now = gethrtime();
1167 if (t->g_when < now)
1168 ct->cyt_when = now + t->g_interval / 2;
1169 else
1170 ct->cyt_when = t->g_when;
1171 ct->cyt_interval = t->g_interval;
1172}
1173
1174
1175vbi_gtimer_t *
1176vbi_gtimer_begin(
1177 void (*func)(void *, uint64_t),
1178 void *arg,
1179 uint64_t when,
1180 uint64_t interval)
1181{
1182 vbi_gtimer_t *t;
1183 cyc_omni_handler_t omni;
1184
1185 /*
1186 * one shot global timer is not supported yet.
1187 */
1188 if (interval == 0)
1189 return (NULL);
1190
1191 t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1192 t->g_counters = kmem_zalloc(ncpus * sizeof (uint64_t), KM_SLEEP);
1193 t->g_when = when + gethrtime();
1194 t->g_interval = interval;
1195 t->g_arg = arg;
1196 t->g_func = func;
1197 t->g_cyclic = CYCLIC_NONE;
1198
1199 omni.cyo_online = (void (*)(void *, cpu_t *, cyc_handler_t *, cyc_time_t *))vbi_gtimer_online;
1200 omni.cyo_offline = NULL;
1201 omni.cyo_arg = t;
1202
1203 mutex_enter(&cpu_lock);
1204 t->g_cyclic = cyclic_add_omni(&omni);
1205 mutex_exit(&cpu_lock);
1206 return (t);
1207}
1208
1209extern void
1210vbi_gtimer_end(vbi_gtimer_t *t)
1211{
1212 mutex_enter(&cpu_lock);
1213 cyclic_remove(t->g_cyclic);
1214 mutex_exit(&cpu_lock);
1215 kmem_free(t->g_counters, ncpus * sizeof (uint64_t));
1216 kmem_free(t, sizeof (*t));
1217}
1218
1219int
1220vbi_is_preempt_enabled(void)
1221{
1222 if (vbi_is_initialized) {
1223 char tpr = VBI_T_PREEMPT;
1224 return (tpr == 0);
1225 } else {
1226 cmn_err(CE_NOTE, "vbi_is_preempt_enabled: called without initializing vbi!\n");
1227 return 1;
1228 }
1229}
1230
1231void
1232vbi_poke_cpu(int c)
1233{
1234 if (c < ncpus)
1235 poke_cpu(c);
1236}
1237
1238/*
1239 * This is revision 5 of the interface.
1240 */
1241
1242void *
1243vbi_lowmem_alloc(uint64_t phys, size_t size)
1244{
1245 return (vbi_internal_alloc(&phys, size, PAGESIZE /* alignment */, 0 /* non-contiguous */));
1246}
1247
1248void
1249vbi_lowmem_free(void *va, size_t size)
1250{
1251 p_contig_free(va, size);
1252}
1253
1254/*
1255 * This is revision 6 of the interface.
1256 */
1257
1258int
1259vbi_is_preempt_pending(void)
1260{
1261 char crr = VBI_CPU_RUNRUN;
1262 char krr = VBI_CPU_KPRUNRUN;
1263 return crr != 0 || krr != 0;
1264}
1265
1266/*
1267 * This is revision 7 of the interface.
1268 */
1269
1270void *
1271vbi_phys_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
1272{
1273 return (vbi_internal_alloc(phys, size, alignment, contig));
1274}
1275
1276void
1277vbi_phys_free(void *va, size_t size)
1278{
1279 p_contig_free(va, size);
1280}
1281
1282
1283/*
1284 * This is revision 8 of the interface.
1285 */
1286static vnode_t vbipagevp;
1287
1288page_t **
1289vbi_pages_alloc(uint64_t *phys, size_t size)
1290{
1291 /*
1292 * the page freelist and cachelist both hold pages that are not mapped into any address space.
1293 * the cachelist is not really free pages but when memory is exhausted they'll be moved to the
1294 * free lists.
1295 * it's the total of the free+cache list that we see on the 'free' column in vmstat.
1296 */
1297 page_t **pp_pages = NULL;
1298 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1299
1300 /* reserve available memory for pages */
1301 int rc = page_resv(npages, KM_NOSLEEP);
1302 if (rc)
1303 {
1304 /* create the pages */
1305 rc = page_create_wait(npages, 0 /* flags */);
1306 if (rc)
1307 {
1308 /* alloc space for page_t pointer array */
1309 size_t pp_size = npages * sizeof(page_t *);
1310 pp_pages = kmem_zalloc(pp_size, KM_SLEEP);
1311 if (pp_pages)
1312 {
1313 /*
1314 * get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
1315 * we don't have the 'virtAddr' to which this memory may be mapped.
1316 */
1317 caddr_t virtAddr = NULL;
1318 for (int64_t i = 0; i < npages; i++, virtAddr += PAGESIZE)
1319 {
1320 /* get a page from the freelists */
1321 page_t *ppage = vbi_page_get_fromlist(1 /* freelist */, virtAddr, PAGESIZE);
1322 if (!ppage)
1323 {
1324 /* try from the cachelists */
1325 ppage = vbi_page_get_fromlist(2 /* cachelist */, virtAddr, PAGESIZE);
1326 if (!ppage)
1327 {
1328 /* damn */
1329 page_create_putback(npages - i);
1330 while (--i >= 0)
1331 page_free(pp_pages[i], 0 /* don't need, move to tail */);
1332 kmem_free(pp_pages, pp_size);
1333 page_unresv(npages);
1334 return NULL;
1335 }
1336
1337 /* remove association with the vnode for pages from the cachelist */
1338 if (!PP_ISAGED(ppage))
1339 page_hashout(ppage, NULL /* mutex */);
1340 }
1341
1342 PP_CLRFREE(ppage); /* Page is not free */
1343 PP_CLRAGED(ppage); /* Page is not hashed in */
1344 pp_pages[i] = ppage;
1345 }
1346
1347 /*
1348 * we now have the pages locked exclusively, before they are mapped in
1349 * we must downgrade the lock.
1350 */
1351 *phys = (uint64_t)page_pptonum(pp_pages[0]) << PAGESHIFT;
1352 return pp_pages;
1353 }
1354
1355 page_create_putback(npages);
1356 }
1357
1358 page_unresv(npages);
1359 }
1360
1361 return NULL;
1362}
1363
1364
1365void
1366vbi_pages_free(page_t **pp_pages, size_t size)
1367{
1368 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1369 size_t pp_size = npages * sizeof(page_t *);
1370 for (pgcnt_t i = 0; i < npages; i++)
1371 {
1372 /* we need to exclusive lock the pages before freeing them */
1373 int rc = page_tryupgrade(pp_pages[i]);
1374 if (!rc)
1375 {
1376 page_unlock(pp_pages[i]);
1377 while (!page_lock(pp_pages[i], SE_EXCL, NULL /* mutex */, P_RECLAIM))
1378 ;
1379 }
1380
1381 page_free(pp_pages[i], 0 /* don't need, move to tail */);
1382 }
1383
1384 kmem_free(pp_pages, pp_size);
1385 page_unresv(npages);
1386}
1387
1388
1389int
1390vbi_pages_premap(page_t **pp_pages, size_t size, uint64_t *pphysaddrs)
1391{
1392 if (!pphysaddrs)
1393 return -1;
1394
1395 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1396 for (pgcnt_t i = 0; i < npages; i++)
1397 {
1398 /*
1399 * prepare pages for mapping into kernel/user space, we need to
1400 * downgrade the exclusive page lock to a shared lock if the
1401 * pages is locked exclusively.
1402 */
1403 if (page_tryupgrade(pp_pages[i]) == 1)
1404 page_downgrade(pp_pages[i]);
1405 pphysaddrs[i] = vbi_page_to_pa(pp_pages, i);
1406 }
1407
1408 return 0;
1409}
1410
1411
1412uint64_t
1413vbi_page_to_pa(page_t **pp_pages, pgcnt_t i)
1414{
1415 pfn_t pfn = page_pptonum(pp_pages[i]);
1416 if (pfn == PFN_INVALID)
1417 panic("vbi_page_to_pa: page_pptonum() failed\n");
1418 return (uint64_t)pfn << PAGESHIFT;
1419}
1420
1421
1422static page_t *
1423vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize)
1424{
1425 /* pgsize only applies when using the freelist */
1426 seg_t kernseg;
1427 kernseg.s_as = &kas;
1428 page_t *ppage = NULL;
1429 if (freelist == 1)
1430 {
1431 ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1432 pgsize, 0 /* flags */, NULL /* local group */);
1433 if (!ppage && use_kflt)
1434 {
1435 ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1436 pgsize, 0x0200 /* PG_KFLT */, NULL /* local group */);
1437 }
1438 }
1439 else
1440 {
1441 /* cachelist */
1442 ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1443 0 /* flags */, NULL /* local group */);
1444 if (!ppage && use_kflt)
1445 {
1446 ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1447 0x0200 /* PG_KFLT */, NULL /* local group */);
1448 }
1449 }
1450 return ppage;
1451}
1452
1453
1454/*
1455 * Large page code.
1456 */
1457
1458page_t *
1459vbi_large_page_alloc(uint64_t *pphys, size_t pgsize)
1460{
1461 pgcnt_t const npages = pgsize >> PAGESHIFT;
1462 page_t *pproot, *pp, *pplist;
1463 pgcnt_t ipage;
1464 caddr_t vaddr;
1465 seg_t kernseg;
1466 int rc;
1467
1468 /*
1469 * Reserve available memory for a large page and create it.
1470 */
1471 rc = page_resv(npages, KM_NOSLEEP);
1472 if (!rc)
1473 return NULL;
1474
1475 rc = page_create_wait(npages, 0 /* flags */);
1476 if (!rc) {
1477 page_unresv(npages);
1478 return NULL;
1479 }
1480
1481 /*
1482 * Get a page off the free list. We set vaddr to 0 since we don't know
1483 * where the memory is going to be mapped.
1484 */
1485 vaddr = NULL;
1486 kernseg.s_as = &kas;
1487 pproot = vbi_page_get_fromlist(1 /* freelist */, vaddr, pgsize);
1488 if (!pproot)
1489 {
1490 page_create_putback(npages);
1491 page_unresv(npages);
1492 return NULL;
1493 }
1494 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1495
1496 /*
1497 * Mark all the sub-pages as non-free and not-hashed-in.
1498 * It is paramount that we destroy the list (before freeing it).
1499 */
1500 pplist = pproot;
1501 for (ipage = 0; ipage < npages; ipage++) {
1502 pp = pplist;
1503 AssertPtr(pp);
1504 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1505 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1506 page_sub(&pplist, pp);
1507 AssertMsg(PP_ISFREE(pp), ("%p\n", pp));
1508 AssertMsg(pp->p_szc == pproot->p_szc, ("%p - %d expected %d \n", pp, pp->p_szc, pproot->p_szc));
1509
1510 PP_CLRFREE(pp);
1511 PP_CLRAGED(pp);
1512 }
1513
1514 *pphys = (uint64_t)page_pptonum(pproot) << PAGESHIFT;
1515 AssertMsg(!(*pphys & (pgsize - 1)), ("%llx %zx\n", *pphys, pgsize));
1516 return pproot;
1517}
1518
1519void
1520vbi_large_page_free(page_t *pproot, size_t pgsize)
1521{
1522 pgcnt_t const npages = pgsize >> PAGESHIFT;
1523 pgcnt_t ipage;
1524
1525 Assert(page_get_pagecnt(pproot->p_szc) == npages);
1526 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1527
1528 /*
1529 * We need to exclusively lock the sub-pages before freeing
1530 * the large one.
1531 */
1532 for (ipage = 0; ipage < npages; ipage++) {
1533 page_t *pp = page_nextn(pproot, ipage);
1534 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1535 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1536 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1537
1538 int rc = page_tryupgrade(pp);
1539 if (!rc) {
1540 page_unlock(pp);
1541 while (!page_lock(pp, SE_EXCL, NULL /* mutex */, P_RECLAIM)) {
1542 /*nothing*/;
1543 }
1544 }
1545 }
1546
1547 /*
1548 * Free the large page and unreserve the memory.
1549 */
1550 page_free_pages(pproot);
1551 page_unresv(npages);
1552}
1553
1554int
1555vbi_large_page_premap(page_t *pproot, size_t pgsize)
1556{
1557 pgcnt_t const npages = pgsize >> PAGESHIFT;
1558 pgcnt_t ipage;
1559
1560 Assert(page_get_pagecnt(pproot->p_szc) == npages);
1561 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1562
1563 /*
1564 * We need to downgrade the sub-pages from exclusive to shared locking
1565 * because otherwise we cannot <you go figure>.
1566 */
1567 for (ipage = 0; ipage < npages; ipage++) {
1568 page_t *pp = page_nextn(pproot, ipage);
1569 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1570 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1571 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1572
1573 if (page_tryupgrade(pp) == 1)
1574 page_downgrade(pp);
1575 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1576 }
1577
1578 return 0;
1579}
1580
1581
1582/*
1583 * As more functions are added, they should start with a comment indicating
1584 * the revision and above this point in the file and the revision level should
1585 * be increased. Also change vbi_modlmisc at the top of the file.
1586 *
1587 * NOTE! We'll start care about this if anything in here ever makes it into
1588 * the solaris kernel proper.
1589 */
1590uint_t vbi_revision_level = 9;
1591
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette