VirtualBox

source: vbox/trunk/src/VBox/Runtime/r0drv/solaris/vbi/i86pc/os/vbi.c@ 37948

Last change on this file since 37948 was 37948, checked in by vboxsync, 13 years ago

Runtime/r0drv/Solaris/vbi: temporary t_preempt offset fix.

  • Property svn:eol-style set to native
File size: 34.8 KB
Line 
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010-2011 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Private interfaces for VirtualBox access to Solaris kernel internal
28 * facilities.
29 *
30 * See sys/vbi.h for what each function does.
31 */
32
33#include <sys/kmem.h>
34#include <sys/types.h>
35#include <sys/mman.h>
36#include <sys/thread.h>
37#include <sys/mutex.h>
38#include <sys/condvar.h>
39#include <sys/sdt.h>
40#include <sys/schedctl.h>
41#include <sys/time.h>
42#include <sys/sysmacros.h>
43#include <sys/cmn_err.h>
44#include <sys/vmsystm.h>
45#include <sys/cyclic.h>
46#include <sys/class.h>
47#include <sys/cpuvar.h>
48#include <sys/kobj.h>
49#include <sys/x_call.h>
50#include <sys/x86_archext.h>
51#include <vm/hat.h>
52#include <vm/seg_vn.h>
53#include <vm/seg_kmem.h>
54#include <sys/ddi.h>
55#include <sys/sunddi.h>
56#include <sys/modctl.h>
57#include <sys/machparam.h>
58#include <sys/utsname.h>
59
60#include <iprt/assert.h>
61
62#include "vbi.h"
63
64#define VBIPROC() ((proc_t *)vbi_proc())
65
66/*
67 * We have to use dl_lookup to find contig_free().
68 */
69extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int);
70extern void contig_free(void *, size_t);
71#pragma weak contig_free
72static void (*p_contig_free)(void *, size_t) = contig_free;
73
74/*
75 * We have to use dl_lookup to find kflt_init() and thereby use kernel pages from
76 * the freelists if we no longer get user pages from freelist and cachelists.
77 */
78/* Introduced in v9 */
79static int use_kflt = 0;
80static page_t *vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize);
81
82
83/*
84 * Workarounds for running on old versions of solaris with different cross call
85 * interfaces. If we find xc_init_cpu() in the kernel, then just use the defined
86 * interfaces for xc_call() from the include file where the xc_call()
87 * interfaces just takes a pointer to a ulong_t array. The array must be long
88 * enough to hold "ncpus" bits at runtime.
89
90 * The reason for the hacks is that using the type "cpuset_t" is pretty much
91 * impossible from code built outside the Solaris source repository that wants
92 * to run on multiple releases of Solaris.
93 *
94 * For old style xc_call()s, 32 bit solaris and older 64 bit versions use
95 * "ulong_t" as cpuset_t.
96 *
97 * Later versions of 64 bit Solaris used: struct {ulong_t words[x];}
98 * where "x" depends on NCPU.
99 *
100 * We detect the difference in 64 bit support by checking the kernel value of
101 * max_cpuid, which always holds the compiled value of NCPU - 1.
102 *
103 * If Solaris increases NCPU to more than 256, this module will continue
104 * to work on all versions of Solaris as long as the number of installed
105 * CPUs in the machine is <= VBI_NCPU. If VBI_NCPU is increased, this code
106 * has to be re-written some to provide compatibility with older Solaris which
107 * expects cpuset_t to be based on NCPU==256 -- or we discontinue support
108 * of old Nevada/S10.
109 */
110static int use_old = 0;
111static int use_old_with_ulong = 0;
112static void (*p_xc_call)() = (void (*)())xc_call;
113
114#define VBI_NCPU 256
115#define VBI_SET_WORDS (VBI_NCPU / (sizeof (ulong_t) * 8))
116typedef struct vbi_cpuset {
117 ulong_t words[VBI_SET_WORDS];
118} vbi_cpuset_t;
119#define X_CALL_HIPRI (2) /* for old Solaris interface */
120
121/*
122 * module linkage stuff
123 */
124#if 0
125static struct modlmisc vbi_modlmisc = {
126 &mod_miscops, "VirtualBox Interfaces V8"
127};
128
129static struct modlinkage vbi_modlinkage = {
130 MODREV_1, { (void *)&vbi_modlmisc, NULL }
131};
132#endif
133
134extern uintptr_t kernelbase;
135#define IS_KERNEL(v) ((uintptr_t)(v) >= kernelbase)
136
137#if 0
138static int vbi_verbose = 0;
139
140#define VBI_VERBOSE(msg) {if (vbi_verbose) cmn_err(CE_WARN, msg);}
141#endif
142
143/* Introduced in v8 */
144static int vbi_is_initialized = 0;
145
146/* Introduced in v6 */
147static int vbi_is_nevada = 0;
148
149#ifdef _LP64
150/* 64-bit Solaris 10 offsets */
151/* CPU */
152static int off_s10_cpu_runrun = 232;
153static int off_s10_cpu_kprunrun = 233;
154/* kthread_t */
155static int off_s10_t_preempt = 42;
156
157/* 64-bit Solaris 11 (Nevada/OpenSolaris) offsets */
158/* CPU */
159static int off_s11_cpu_runrun = 216;
160static int off_s11_cpu_kprunrun = 217;
161/* kthread_t */
162static int off_s11_t_preempt = 42;
163
164/* 64-bit Solaris 11 snv_166+ offsets (CR 7037143) */
165static int off_s11_t_preempt_new = 48;
166#else
167/* 32-bit Solaris 10 offsets */
168/* CPU */
169static int off_s10_cpu_runrun = 124;
170static int off_s10_cpu_kprunrun = 125;
171/* kthread_t */
172static int off_s10_t_preempt = 26;
173
174/* 32-bit Solaris 11 (Nevada/OpenSolaris) offsets */
175/* CPU */
176static int off_s11_cpu_runrun = 112;
177static int off_s11_cpu_kprunrun = 113;
178/* kthread_t */
179static int off_s11_t_preempt = 26;
180#endif
181
182
183/* Which offsets will be used */
184static int off_cpu_runrun = -1;
185static int off_cpu_kprunrun = -1;
186static int off_t_preempt = -1;
187
188#define VBI_T_PREEMPT (*((char *)curthread + off_t_preempt))
189#define VBI_CPU_KPRUNRUN (*((char *)CPU + off_cpu_kprunrun))
190#define VBI_CPU_RUNRUN (*((char *)CPU + off_cpu_runrun))
191
192#undef kpreempt_disable
193#undef kpreempt_enable
194
195#define VBI_PREEMPT_DISABLE() \
196 { \
197 VBI_T_PREEMPT++; \
198 ASSERT(VBI_T_PREEMPT >= 1); \
199 }
200#define VBI_PREEMPT_ENABLE() \
201 { \
202 ASSERT(VBI_T_PREEMPT >= 1); \
203 if (--VBI_T_PREEMPT == 0 && \
204 VBI_CPU_RUNRUN) \
205 kpreempt(KPREEMPT_SYNC); \
206 }
207
208/* End of v6 intro */
209
210#if 0
211int
212_init(void)
213{
214 int err = vbi_init();
215 if (!err)
216 err = mod_install(&vbi_modlinkage);
217 return (err);
218}
219#endif
220
221int
222vbi_init(void)
223{
224 /*
225 * Check to see if this version of virtualbox interface module will work
226 * with the kernel.
227 */
228 if (kobj_getsymvalue("xc_init_cpu", 1) != NULL) {
229 /*
230 * Our bit vector storage needs to be large enough for the
231 * actual number of CPUs running in the system.
232 */
233 if (ncpus > VBI_NCPU) {
234 cmn_err(CE_NOTE, "cpu count mismatch.\n");
235 return (EINVAL);
236 }
237 } else {
238 use_old = 1;
239 if (max_cpuid + 1 == sizeof(ulong_t) * 8)
240 use_old_with_ulong = 1;
241 else if (max_cpuid + 1 != VBI_NCPU)
242 {
243 cmn_err(CE_NOTE, "cpuset_t size mismatch. probably too old a kernel.\n");
244 return (EINVAL); /* cpuset_t size mismatch */
245 }
246 }
247
248 /*
249 * In older versions of Solaris contig_free() is a static routine.
250 */
251 if (p_contig_free == NULL) {
252 p_contig_free = (void (*)(void *, size_t))
253 kobj_getsymvalue("contig_free", 1);
254 if (p_contig_free == NULL) {
255 cmn_err(CE_NOTE, " contig_free() not found in kernel\n");
256 return (EINVAL);
257 }
258 }
259
260 /*
261 * Use kernel page freelist flags to get pages from kernel page freelists
262 * while allocating physical pages, once the userpages are exhausted.
263 * snv_161+, see @bugref{5632}.
264 */
265 if (kobj_getsymvalue("kflt_init", 1) != NULL)
266 {
267 int *p_kflt_disable = (int*)kobj_getsymvalue("kflt_disable", 1); /* amd64 only, on 32-bit kflt's are disabled. */
268 if (p_kflt_disable && *p_kflt_disable == 0)
269 {
270 use_kflt = 1;
271 }
272 }
273
274
275 /*
276 * Check if this is S10 or Nevada
277 */
278 if (!strncmp(utsname.release, "5.11", sizeof("5.11") - 1)) {
279 /* Nevada detected... */
280 vbi_is_nevada = 1;
281
282 off_cpu_runrun = off_s11_cpu_runrun;
283 off_cpu_kprunrun = off_s11_cpu_kprunrun;
284 off_t_preempt = off_s11_t_preempt;
285
286#ifdef _LP64
287 /* Only 64-bit kernels */
288 long snv_version = 0;
289 if ( !strncmp(utsname.version, "snv_", 4)
290 && strlen(utsname.version) > 4)
291 {
292 ddi_strtol(utsname.version + 4, NULL /* endptr */, 0, &snv_version);
293 if (snv_version >= 166)
294 {
295 off_t_preempt = off_s11_t_preempt_new;
296 cmn_err(CE_NOTE, "here\n");
297 }
298
299 cmn_err(CE_NOTE, "Detected S11 version %ld: Preemption offset=%d\n", snv_version, off_t_preempt);
300 }
301 else
302 cmn_err(CE_NOTE, "WARNING!! Cannot determine version. Assuming pre snv_166. Preemption offset=%ld may be busted!\n", off_t_preempt);
303#endif
304 } else {
305 /* Solaris 10 detected... */
306 vbi_is_nevada = 0;
307
308 off_cpu_runrun = off_s10_cpu_runrun;
309 off_cpu_kprunrun = off_s10_cpu_kprunrun;
310 off_t_preempt = off_s10_t_preempt;
311 }
312
313 /*
314 * Sanity checking...
315 */
316 /* CPU */
317 char crr = VBI_CPU_RUNRUN;
318 char krr = VBI_CPU_KPRUNRUN;
319 if ( (crr < 0 || crr > 1)
320 || (krr < 0 || krr > 1)) {
321 cmn_err(CE_NOTE, ":CPU structure sanity check failed! OS version mismatch.\n");
322 return EINVAL;
323 }
324
325 /* Thread */
326 char t_preempt = VBI_T_PREEMPT;
327 if (t_preempt < 0 || t_preempt > 32) {
328 cmn_err(CE_NOTE, ":Thread structure sanity check failed! OS version mismatch.\n");
329 return EINVAL;
330 }
331
332 vbi_is_initialized = 1;
333
334 return (0);
335}
336
337#if 0
338int
339_fini(void)
340{
341 int err = mod_remove(&vbi_modlinkage);
342 if (err != 0)
343 return (err);
344
345 return (0);
346}
347
348int
349_info(struct modinfo *modinfop)
350{
351 return (mod_info(&vbi_modlinkage, modinfop));
352}
353#endif
354
355
356static ddi_dma_attr_t base_attr = {
357 DMA_ATTR_V0, /* Version Number */
358 (uint64_t)0, /* lower limit */
359 (uint64_t)0, /* high limit */
360 (uint64_t)0xffffffff, /* counter limit */
361 (uint64_t)PAGESIZE, /* pagesize alignment */
362 (uint64_t)PAGESIZE, /* pagesize burst size */
363 (uint64_t)PAGESIZE, /* pagesize effective DMA size */
364 (uint64_t)0xffffffff, /* max DMA xfer size */
365 (uint64_t)0xffffffff, /* segment boundary */
366 1, /* list length (1 for contiguous) */
367 1, /* device granularity */
368 0 /* bus-specific flags */
369};
370
371static void *
372vbi_internal_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
373{
374 ddi_dma_attr_t attr;
375 pfn_t pfn;
376 void *ptr;
377 uint_t npages;
378
379 if ((size & PAGEOFFSET) != 0)
380 return (NULL);
381 npages = (size + PAGESIZE - 1) >> PAGESHIFT;
382 if (npages == 0)
383 return (NULL);
384
385 attr = base_attr;
386 attr.dma_attr_addr_hi = *phys;
387 attr.dma_attr_align = alignment;
388 if (!contig)
389 attr.dma_attr_sgllen = npages;
390 ptr = contig_alloc(size, &attr, PAGESIZE, 1);
391
392 if (ptr == NULL) {
393 cmn_err(CE_NOTE, "vbi_internal_alloc() failure for %lu bytes contig=%d", size, contig);
394 return (NULL);
395 }
396
397 pfn = hat_getpfnum(kas.a_hat, (caddr_t)ptr);
398 if (pfn == PFN_INVALID)
399 panic("vbi_contig_alloc(): hat_getpfnum() failed\n");
400 *phys = (uint64_t)pfn << PAGESHIFT;
401 return (ptr);
402}
403
404void *
405vbi_contig_alloc(uint64_t *phys, size_t size)
406{
407 /* Obsolete */
408 return (vbi_internal_alloc(phys, size, PAGESIZE /* alignment */, 1 /* contiguous */));
409}
410
411void
412vbi_contig_free(void *va, size_t size)
413{
414 /* Obsolete */
415 p_contig_free(va, size);
416}
417
418void *
419vbi_kernel_map(uint64_t pa, size_t size, uint_t prot)
420{
421 caddr_t va;
422
423 if ((pa & PAGEOFFSET) || (size & PAGEOFFSET)) {
424 cmn_err(CE_NOTE, "vbi_kernel_map() bad pa (0x%lx) or size (%lu)", pa, size);
425 return (NULL);
426 }
427
428 va = vmem_alloc(heap_arena, size, VM_SLEEP);
429
430 hat_devload(kas.a_hat, va, size, (pfn_t)(pa >> PAGESHIFT),
431 prot, HAT_LOAD | HAT_LOAD_LOCK | HAT_UNORDERED_OK);
432
433 return (va);
434}
435
436void
437vbi_unmap(void *va, size_t size)
438{
439 if (IS_KERNEL(va)) {
440 hat_unload(kas.a_hat, va, size, HAT_UNLOAD | HAT_UNLOAD_UNLOCK);
441 vmem_free(heap_arena, va, size);
442 } else {
443 struct as *as = VBIPROC()->p_as;
444
445 as_rangelock(as);
446 (void) as_unmap(as, va, size);
447 as_rangeunlock(as);
448 }
449}
450
451void *
452vbi_curthread(void)
453{
454 return (curthread);
455}
456
457int
458vbi_yield(void)
459{
460 int rv = 0;
461
462 vbi_preempt_disable();
463
464 char tpr = VBI_T_PREEMPT;
465 char kpr = VBI_CPU_KPRUNRUN;
466 if (tpr == 1 && kpr)
467 rv = 1;
468
469 vbi_preempt_enable();
470 return (rv);
471}
472
473uint64_t
474vbi_timer_granularity(void)
475{
476 return (nsec_per_tick);
477}
478
479typedef struct vbi_timer {
480 cyc_handler_t vbi_handler;
481 cyclic_id_t vbi_cyclic;
482 uint64_t vbi_interval;
483 void (*vbi_func)();
484 void *vbi_arg1;
485 void *vbi_arg2;
486} vbi_timer_t;
487
488static void
489vbi_timer_callback(void *arg)
490{
491 vbi_timer_t *t = arg;
492
493 if (t->vbi_interval == 0)
494 vbi_timer_stop(arg);
495 t->vbi_func(t->vbi_arg1, t->vbi_arg2);
496}
497
498void *
499vbi_timer_create(void *callback, void *arg1, void *arg2, uint64_t interval)
500{
501 vbi_timer_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
502
503 t->vbi_func = (void (*)())callback;
504 t->vbi_arg1 = arg1;
505 t->vbi_arg2 = arg2;
506 t->vbi_handler.cyh_func = vbi_timer_callback;
507 t->vbi_handler.cyh_arg = (void *)t;
508 t->vbi_handler.cyh_level = CY_LOCK_LEVEL;
509 t->vbi_cyclic = CYCLIC_NONE;
510 t->vbi_interval = interval;
511 return (t);
512}
513
514void
515vbi_timer_destroy(void *timer)
516{
517 vbi_timer_t *t = timer;
518 if (t != NULL) {
519 vbi_timer_stop(timer);
520 kmem_free(t, sizeof (*t));
521 }
522}
523
524void
525vbi_timer_start(void *timer, uint64_t when)
526{
527 vbi_timer_t *t = timer;
528 cyc_time_t fire_time;
529 uint64_t interval = t->vbi_interval;
530
531 mutex_enter(&cpu_lock);
532 when += gethrtime();
533 fire_time.cyt_when = when;
534 if (interval == 0)
535 fire_time.cyt_interval = when;
536 else
537 fire_time.cyt_interval = interval;
538 t->vbi_cyclic = cyclic_add(&t->vbi_handler, &fire_time);
539 mutex_exit(&cpu_lock);
540}
541
542void
543vbi_timer_stop(void *timer)
544{
545 vbi_timer_t *t = timer;
546
547 if (t->vbi_cyclic == CYCLIC_NONE)
548 return;
549 mutex_enter(&cpu_lock);
550 if (t->vbi_cyclic != CYCLIC_NONE) {
551 cyclic_remove(t->vbi_cyclic);
552 t->vbi_cyclic = CYCLIC_NONE;
553 }
554 mutex_exit(&cpu_lock);
555}
556
557uint64_t
558vbi_tod(void)
559{
560 timestruc_t ts;
561
562 mutex_enter(&tod_lock);
563 ts = tod_get();
564 mutex_exit(&tod_lock);
565 return ((uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec);
566}
567
568
569void *
570vbi_proc(void)
571{
572 proc_t *p;
573 drv_getparm(UPROCP, &p);
574 return (p);
575}
576
577void
578vbi_set_priority(void *thread, int priority)
579{
580 kthread_t *t = thread;
581
582 thread_lock(t);
583 (void) thread_change_pri(t, priority, 0);
584 thread_unlock(t);
585}
586
587void *
588vbi_thread_create(void (*func)(void *), void *arg, size_t len, int priority)
589{
590 kthread_t *t;
591
592 t = thread_create(NULL, NULL, (void (*)())func, arg, len,
593 VBIPROC(), TS_RUN, priority);
594 return (t);
595}
596
597void
598vbi_thread_exit(void)
599{
600 thread_exit();
601}
602
603void *
604vbi_text_alloc(size_t size)
605{
606 return (segkmem_alloc(heaptext_arena, size, KM_SLEEP));
607}
608
609void
610vbi_text_free(void *va, size_t size)
611{
612 segkmem_free(heaptext_arena, va, size);
613}
614
615int
616vbi_cpu_id(void)
617{
618 return (CPU->cpu_id);
619}
620
621int
622vbi_max_cpu_id(void)
623{
624 return (max_cpuid);
625}
626
627int
628vbi_cpu_maxcount(void)
629{
630 return (max_cpuid + 1);
631}
632
633int
634vbi_cpu_count(void)
635{
636 return (ncpus);
637}
638
639int
640vbi_cpu_online(int c)
641{
642 int x;
643
644 mutex_enter(&cpu_lock);
645 x = cpu_is_online(cpu[c]);
646 mutex_exit(&cpu_lock);
647 return (x);
648}
649
650void
651vbi_preempt_disable(void)
652{
653 VBI_PREEMPT_DISABLE();
654}
655
656void
657vbi_preempt_enable(void)
658{
659 VBI_PREEMPT_ENABLE();
660}
661
662void
663vbi_execute_on_all(void *func, void *arg)
664{
665 vbi_cpuset_t set;
666 int i;
667
668 for (i = 0; i < VBI_SET_WORDS; ++i)
669 set.words[i] = (ulong_t)-1L;
670 if (use_old) {
671 if (use_old_with_ulong) {
672 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
673 set.words[0], (xc_func_t)func);
674 } else {
675 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
676 set, (xc_func_t)func);
677 }
678 } else {
679 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
680 }
681}
682
683void
684vbi_execute_on_others(void *func, void *arg)
685{
686 vbi_cpuset_t set;
687 int i;
688
689 for (i = 0; i < VBI_SET_WORDS; ++i)
690 set.words[i] = (ulong_t)-1L;
691 BT_CLEAR(set.words, vbi_cpu_id());
692 if (use_old) {
693 if (use_old_with_ulong) {
694 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
695 set.words[0], (xc_func_t)func);
696 } else {
697 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
698 set, (xc_func_t)func);
699 }
700 } else {
701 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
702 }
703}
704
705void
706vbi_execute_on_one(void *func, void *arg, int c)
707{
708 vbi_cpuset_t set;
709 int i;
710
711 for (i = 0; i < VBI_SET_WORDS; ++i)
712 set.words[i] = 0;
713 BT_SET(set.words, c);
714 if (use_old) {
715 if (use_old_with_ulong) {
716 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
717 set.words[0], (xc_func_t)func);
718 } else {
719 p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
720 set, (xc_func_t)func);
721 }
722 } else {
723 xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
724 }
725}
726
727int
728vbi_lock_va(void *addr, size_t len, int access, void **handle)
729{
730 faultcode_t err;
731
732 /*
733 * kernel mappings on x86 are always locked, so only handle user.
734 */
735 *handle = NULL;
736 if (!IS_KERNEL(addr)) {
737 err = as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
738 (caddr_t)addr, len, F_SOFTLOCK, access);
739 if (err != 0) {
740 cmn_err(CE_NOTE, "vbi_lock_va() failed to lock");
741 return (-1);
742 }
743 }
744 return (0);
745}
746
747/*ARGSUSED*/
748void
749vbi_unlock_va(void *addr, size_t len, int access, void *handle)
750{
751 if (!IS_KERNEL(addr))
752 as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
753 (caddr_t)addr, len, F_SOFTUNLOCK, access);
754}
755
756uint64_t
757vbi_va_to_pa(void *addr)
758{
759 struct hat *hat;
760 pfn_t pfn;
761 uintptr_t v = (uintptr_t)addr;
762
763 if (IS_KERNEL(v))
764 hat = kas.a_hat;
765 else
766 hat = VBIPROC()->p_as->a_hat;
767 pfn = hat_getpfnum(hat, (caddr_t)(v & PAGEMASK));
768 if (pfn == PFN_INVALID)
769 return (-(uint64_t)1);
770 return (((uint64_t)pfn << PAGESHIFT) | (v & PAGEOFFSET));
771}
772
773
774struct segvbi_crargs {
775 uint64_t *palist;
776 uint_t prot;
777};
778
779struct segvbi_data {
780 uint_t prot;
781};
782
783static struct seg_ops segvbi_ops;
784
785static int
786segvbi_create(struct seg *seg, void *args)
787{
788 struct segvbi_crargs *a = args;
789 struct segvbi_data *data;
790 struct as *as = seg->s_as;
791 caddr_t va;
792 ulong_t pgcnt;
793 ulong_t p;
794
795 hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
796 data = kmem_zalloc(sizeof (*data), KM_SLEEP);
797 data->prot = a->prot | PROT_USER;
798
799 seg->s_ops = &segvbi_ops;
800 seg->s_data = data;
801
802 /*
803 * now load locked mappings to the pages
804 */
805 va = seg->s_base;
806 pgcnt = (seg->s_size + PAGESIZE - 1) >> PAGESHIFT;
807 for (p = 0; p < pgcnt; ++p, va += PAGESIZE) {
808 hat_devload(as->a_hat, va,
809 PAGESIZE, a->palist[p] >> PAGESHIFT,
810 data->prot | HAT_UNORDERED_OK, HAT_LOAD | HAT_LOAD_LOCK);
811 }
812
813 return (0);
814}
815
816/*
817 * Duplicate a seg and return new segment in newseg.
818 */
819static int
820segvbi_dup(struct seg *seg, struct seg *newseg)
821{
822 struct segvbi_data *data = seg->s_data;
823 struct segvbi_data *ndata;
824
825 ndata = kmem_zalloc(sizeof (*data), KM_SLEEP);
826 ndata->prot = data->prot;
827 newseg->s_ops = &segvbi_ops;
828 newseg->s_data = ndata;
829
830 return (0);
831}
832
833static int
834segvbi_unmap(struct seg *seg, caddr_t addr, size_t len)
835{
836 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
837 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
838 panic("segvbi_unmap");
839
840 if (addr != seg->s_base || len != seg->s_size)
841 return (ENOTSUP);
842
843 hat_unload(seg->s_as->a_hat, addr, len,
844 HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
845
846 seg_free(seg);
847 return (0);
848}
849
850static void
851segvbi_free(struct seg *seg)
852{
853 struct segvbi_data *data = seg->s_data;
854 kmem_free(data, sizeof (*data));
855}
856
857/*
858 * We would demand fault if the (u)read() path would SEGOP_FAULT()
859 * on buffers mapped in via vbi_user_map() i.e. prefaults before DMA.
860 * Don't fail in such case where we're called directly, see #5047.
861 */
862static int
863segvbi_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
864 enum fault_type type, enum seg_rw rw)
865{
866 return (0);
867}
868
869static int
870segvbi_faulta(struct seg *seg, caddr_t addr)
871{
872 return (0);
873}
874
875static int
876segvbi_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
877{
878 return (EACCES);
879}
880
881static int
882segvbi_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
883{
884 return (EINVAL);
885}
886
887static int
888segvbi_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
889{
890 return (-1);
891}
892
893static int
894segvbi_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
895{
896 return (0);
897}
898
899static size_t
900segvbi_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
901{
902 size_t v;
903
904 for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
905 len -= PAGESIZE, v += PAGESIZE)
906 *vec++ = 1;
907 return (v);
908}
909
910static int
911segvbi_lockop(struct seg *seg, caddr_t addr,
912 size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
913{
914 return (0);
915}
916
917static int
918segvbi_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
919{
920 struct segvbi_data *data = seg->s_data;
921 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
922 if (pgno != 0)
923 {
924 do
925 {
926 pgno--;
927 protv[pgno] = data->prot;
928 } while (pgno != 0);
929 }
930 return (0);
931}
932
933static u_offset_t
934segvbi_getoffset(struct seg *seg, caddr_t addr)
935{
936 return ((uintptr_t)addr - (uintptr_t)seg->s_base);
937}
938
939static int
940segvbi_gettype(struct seg *seg, caddr_t addr)
941{
942 return (MAP_SHARED);
943}
944
945static vnode_t vbivp;
946
947static int
948segvbi_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
949{
950 *vpp = &vbivp;
951 return (0);
952}
953
954static int
955segvbi_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
956{
957 return (0);
958}
959
960static void
961segvbi_dump(struct seg *seg)
962{}
963
964static int
965segvbi_pagelock(struct seg *seg, caddr_t addr, size_t len,
966 struct page ***ppp, enum lock_type type, enum seg_rw rw)
967{
968 return (ENOTSUP);
969}
970
971static int
972segvbi_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
973{
974 return (ENOTSUP);
975}
976
977static int
978segvbi_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
979{
980 return (ENODEV);
981}
982
983static lgrp_mem_policy_info_t *
984segvbi_getpolicy(struct seg *seg, caddr_t addr)
985{
986 return (NULL);
987}
988
989static int
990segvbi_capable(struct seg *seg, segcapability_t capability)
991{
992 return (0);
993}
994
995static struct seg_ops segvbi_ops = {
996 segvbi_dup,
997 segvbi_unmap,
998 segvbi_free,
999 segvbi_fault,
1000 segvbi_faulta,
1001 segvbi_setprot,
1002 segvbi_checkprot,
1003 (int (*)())segvbi_kluster,
1004 (size_t (*)(struct seg *))NULL, /* swapout */
1005 segvbi_sync,
1006 segvbi_incore,
1007 segvbi_lockop,
1008 segvbi_getprot,
1009 segvbi_getoffset,
1010 segvbi_gettype,
1011 segvbi_getvp,
1012 segvbi_advise,
1013 segvbi_dump,
1014 segvbi_pagelock,
1015 segvbi_setpagesize,
1016 segvbi_getmemid,
1017 segvbi_getpolicy,
1018 segvbi_capable
1019};
1020
1021
1022
1023/*
1024 * Interfaces to inject physical pages into user address space
1025 * and later remove them.
1026 */
1027int
1028vbi_user_map(caddr_t *va, uint_t prot, uint64_t *palist, size_t len)
1029{
1030 struct as *as = VBIPROC()->p_as;
1031 struct segvbi_crargs args;
1032 int error = 0;
1033
1034 args.palist = palist;
1035 args.prot = prot;
1036 as_rangelock(as);
1037 map_addr(va, len, 0, 0, MAP_SHARED);
1038 if (*va != NULL)
1039 error = as_map(as, *va, len, segvbi_create, &args);
1040 else
1041 error = ENOMEM;
1042 if (error)
1043 cmn_err(CE_NOTE, "vbi_user_map() failed error=%d", error);
1044 as_rangeunlock(as);
1045 return (error);
1046}
1047
1048
1049/*
1050 * This is revision 2 of the interface.
1051 */
1052
1053struct vbi_cpu_watch {
1054 void (*vbi_cpu_func)(void *, int, int);
1055 void *vbi_cpu_arg;
1056};
1057
1058static int
1059vbi_watcher(cpu_setup_t state, int icpu, void *arg)
1060{
1061 vbi_cpu_watch_t *w = arg;
1062 int online;
1063
1064 if (state == CPU_ON)
1065 online = 1;
1066 else if (state == CPU_OFF)
1067 online = 0;
1068 else
1069 return (0);
1070 w->vbi_cpu_func(w->vbi_cpu_arg, icpu, online);
1071 return (0);
1072}
1073
1074vbi_cpu_watch_t *
1075vbi_watch_cpus(void (*func)(void *, int, int), void *arg, int current_too)
1076{
1077 int c;
1078 vbi_cpu_watch_t *w;
1079
1080 w = kmem_alloc(sizeof (*w), KM_SLEEP);
1081 w->vbi_cpu_func = func;
1082 w->vbi_cpu_arg = arg;
1083 mutex_enter(&cpu_lock);
1084 register_cpu_setup_func(vbi_watcher, w);
1085 if (current_too) {
1086 for (c = 0; c < ncpus; ++c) {
1087 if (cpu_is_online(cpu[c]))
1088 func(arg, c, 1);
1089 }
1090 }
1091 mutex_exit(&cpu_lock);
1092 return (w);
1093}
1094
1095void
1096vbi_ignore_cpus(vbi_cpu_watch_t *w)
1097{
1098 mutex_enter(&cpu_lock);
1099 unregister_cpu_setup_func(vbi_watcher, w);
1100 mutex_exit(&cpu_lock);
1101 kmem_free(w, sizeof (*w));
1102}
1103
1104/*
1105 * Simple timers are pretty much a pass through to the cyclic subsystem.
1106 */
1107struct vbi_stimer {
1108 cyc_handler_t s_handler;
1109 cyc_time_t s_fire_time;
1110 cyclic_id_t s_cyclic;
1111 uint64_t s_tick;
1112 void (*s_func)(void *, uint64_t);
1113 void *s_arg;
1114};
1115
1116static void
1117vbi_stimer_func(void *arg)
1118{
1119 vbi_stimer_t *t = arg;
1120 t->s_func(t->s_arg, ++t->s_tick);
1121}
1122
1123extern vbi_stimer_t *
1124vbi_stimer_begin(
1125 void (*func)(void *, uint64_t),
1126 void *arg,
1127 uint64_t when,
1128 uint64_t interval,
1129 int on_cpu)
1130{
1131 vbi_stimer_t *t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1132
1133 t->s_handler.cyh_func = vbi_stimer_func;
1134 t->s_handler.cyh_arg = t;
1135 t->s_handler.cyh_level = CY_LOCK_LEVEL;
1136 t->s_tick = 0;
1137 t->s_func = func;
1138 t->s_arg = arg;
1139
1140 mutex_enter(&cpu_lock);
1141 if (on_cpu != VBI_ANY_CPU && !cpu_is_online(cpu[on_cpu])) {
1142 t = NULL;
1143 goto done;
1144 }
1145
1146 when += gethrtime();
1147 t->s_fire_time.cyt_when = when;
1148 if (interval == 0)
1149 t->s_fire_time.cyt_interval = INT64_MAX - when;
1150 else
1151 t->s_fire_time.cyt_interval = interval;
1152 t->s_cyclic = cyclic_add(&t->s_handler, &t->s_fire_time);
1153 if (on_cpu != VBI_ANY_CPU)
1154 cyclic_bind(t->s_cyclic, cpu[on_cpu], NULL);
1155done:
1156 mutex_exit(&cpu_lock);
1157 return (t);
1158}
1159
1160extern void
1161vbi_stimer_end(vbi_stimer_t *t)
1162{
1163 mutex_enter(&cpu_lock);
1164 cyclic_remove(t->s_cyclic);
1165 mutex_exit(&cpu_lock);
1166 kmem_free(t, sizeof (*t));
1167}
1168
1169/*
1170 * Global timers are more complicated. They include a counter on the callback,
1171 * that indicates the first call on a given cpu.
1172 */
1173struct vbi_gtimer {
1174 uint64_t *g_counters;
1175 void (*g_func)(void *, uint64_t);
1176 void *g_arg;
1177 uint64_t g_when;
1178 uint64_t g_interval;
1179 cyclic_id_t g_cyclic;
1180};
1181
1182static void
1183vbi_gtimer_func(void *arg)
1184{
1185 vbi_gtimer_t *t = arg;
1186 t->g_func(t->g_arg, ++t->g_counters[vbi_cpu_id()]);
1187}
1188
1189/*
1190 * Whenever a cpu is onlined, need to reset the g_counters[] for it to zero.
1191 */
1192static void
1193vbi_gtimer_online(void *arg, cpu_t *pcpu, cyc_handler_t *h, cyc_time_t *ct)
1194{
1195 vbi_gtimer_t *t = arg;
1196 hrtime_t now;
1197
1198 t->g_counters[pcpu->cpu_id] = 0;
1199 h->cyh_func = vbi_gtimer_func;
1200 h->cyh_arg = t;
1201 h->cyh_level = CY_LOCK_LEVEL;
1202 now = gethrtime();
1203 if (t->g_when < now)
1204 ct->cyt_when = now + t->g_interval / 2;
1205 else
1206 ct->cyt_when = t->g_when;
1207 ct->cyt_interval = t->g_interval;
1208}
1209
1210
1211vbi_gtimer_t *
1212vbi_gtimer_begin(
1213 void (*func)(void *, uint64_t),
1214 void *arg,
1215 uint64_t when,
1216 uint64_t interval)
1217{
1218 vbi_gtimer_t *t;
1219 cyc_omni_handler_t omni;
1220
1221 /*
1222 * one shot global timer is not supported yet.
1223 */
1224 if (interval == 0)
1225 return (NULL);
1226
1227 t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1228 t->g_counters = kmem_zalloc(ncpus * sizeof (uint64_t), KM_SLEEP);
1229 t->g_when = when + gethrtime();
1230 t->g_interval = interval;
1231 t->g_arg = arg;
1232 t->g_func = func;
1233 t->g_cyclic = CYCLIC_NONE;
1234
1235 omni.cyo_online = (void (*)(void *, cpu_t *, cyc_handler_t *, cyc_time_t *))vbi_gtimer_online;
1236 omni.cyo_offline = NULL;
1237 omni.cyo_arg = t;
1238
1239 mutex_enter(&cpu_lock);
1240 t->g_cyclic = cyclic_add_omni(&omni);
1241 mutex_exit(&cpu_lock);
1242 return (t);
1243}
1244
1245extern void
1246vbi_gtimer_end(vbi_gtimer_t *t)
1247{
1248 mutex_enter(&cpu_lock);
1249 cyclic_remove(t->g_cyclic);
1250 mutex_exit(&cpu_lock);
1251 kmem_free(t->g_counters, ncpus * sizeof (uint64_t));
1252 kmem_free(t, sizeof (*t));
1253}
1254
1255int
1256vbi_is_preempt_enabled(void)
1257{
1258 if (vbi_is_initialized) {
1259 char tpr = VBI_T_PREEMPT;
1260 return (tpr == 0);
1261 } else {
1262 cmn_err(CE_NOTE, "vbi_is_preempt_enabled: called without initializing vbi!\n");
1263 return 1;
1264 }
1265}
1266
1267void
1268vbi_poke_cpu(int c)
1269{
1270 if (c < ncpus)
1271 poke_cpu(c);
1272}
1273
1274/*
1275 * This is revision 5 of the interface.
1276 */
1277
1278void *
1279vbi_lowmem_alloc(uint64_t phys, size_t size)
1280{
1281 return (vbi_internal_alloc(&phys, size, PAGESIZE /* alignment */, 0 /* non-contiguous */));
1282}
1283
1284void
1285vbi_lowmem_free(void *va, size_t size)
1286{
1287 p_contig_free(va, size);
1288}
1289
1290/*
1291 * This is revision 6 of the interface.
1292 */
1293
1294int
1295vbi_is_preempt_pending(void)
1296{
1297 char crr = VBI_CPU_RUNRUN;
1298 char krr = VBI_CPU_KPRUNRUN;
1299 return crr != 0 || krr != 0;
1300}
1301
1302/*
1303 * This is revision 7 of the interface.
1304 */
1305
1306void *
1307vbi_phys_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
1308{
1309 return (vbi_internal_alloc(phys, size, alignment, contig));
1310}
1311
1312void
1313vbi_phys_free(void *va, size_t size)
1314{
1315 p_contig_free(va, size);
1316}
1317
1318
1319/*
1320 * This is revision 8 of the interface.
1321 */
1322static vnode_t vbipagevp;
1323
1324page_t **
1325vbi_pages_alloc(uint64_t *phys, size_t size)
1326{
1327 /*
1328 * the page freelist and cachelist both hold pages that are not mapped into any address space.
1329 * the cachelist is not really free pages but when memory is exhausted they'll be moved to the
1330 * free lists.
1331 * it's the total of the free+cache list that we see on the 'free' column in vmstat.
1332 */
1333 page_t **pp_pages = NULL;
1334 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1335
1336 /* reserve available memory for pages */
1337 int rc = page_resv(npages, KM_NOSLEEP);
1338 if (rc)
1339 {
1340 /* create the pages */
1341 rc = page_create_wait(npages, 0 /* flags */);
1342 if (rc)
1343 {
1344 /* alloc space for page_t pointer array */
1345 size_t pp_size = npages * sizeof(page_t *);
1346 pp_pages = kmem_zalloc(pp_size, KM_SLEEP);
1347 if (pp_pages)
1348 {
1349 /*
1350 * get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
1351 * we don't have the 'virtAddr' to which this memory may be mapped.
1352 */
1353 caddr_t virtAddr = NULL;
1354 for (int64_t i = 0; i < npages; i++, virtAddr += PAGESIZE)
1355 {
1356 /* get a page from the freelists */
1357 page_t *ppage = vbi_page_get_fromlist(1 /* freelist */, virtAddr, PAGESIZE);
1358 if (!ppage)
1359 {
1360 /* try from the cachelists */
1361 ppage = vbi_page_get_fromlist(2 /* cachelist */, virtAddr, PAGESIZE);
1362 if (!ppage)
1363 {
1364 /* damn */
1365 page_create_putback(npages - i);
1366 while (--i >= 0)
1367 page_free(pp_pages[i], 0 /* don't need, move to tail */);
1368 kmem_free(pp_pages, pp_size);
1369 page_unresv(npages);
1370 return NULL;
1371 }
1372
1373 /* remove association with the vnode for pages from the cachelist */
1374 if (!PP_ISAGED(ppage))
1375 page_hashout(ppage, NULL /* mutex */);
1376 }
1377
1378 PP_CLRFREE(ppage); /* Page is not free */
1379 PP_CLRAGED(ppage); /* Page is not hashed in */
1380 pp_pages[i] = ppage;
1381 }
1382
1383 /*
1384 * we now have the pages locked exclusively, before they are mapped in
1385 * we must downgrade the lock.
1386 */
1387 *phys = (uint64_t)page_pptonum(pp_pages[0]) << PAGESHIFT;
1388 return pp_pages;
1389 }
1390
1391 page_create_putback(npages);
1392 }
1393
1394 page_unresv(npages);
1395 }
1396
1397 return NULL;
1398}
1399
1400
1401void
1402vbi_pages_free(page_t **pp_pages, size_t size)
1403{
1404 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1405 size_t pp_size = npages * sizeof(page_t *);
1406 for (pgcnt_t i = 0; i < npages; i++)
1407 {
1408 /* we need to exclusive lock the pages before freeing them */
1409 int rc = page_tryupgrade(pp_pages[i]);
1410 if (!rc)
1411 {
1412 page_unlock(pp_pages[i]);
1413 while (!page_lock(pp_pages[i], SE_EXCL, NULL /* mutex */, P_RECLAIM))
1414 ;
1415 }
1416
1417 page_free(pp_pages[i], 0 /* don't need, move to tail */);
1418 }
1419
1420 kmem_free(pp_pages, pp_size);
1421 page_unresv(npages);
1422}
1423
1424
1425int
1426vbi_pages_premap(page_t **pp_pages, size_t size, uint64_t *pphysaddrs)
1427{
1428 if (!pphysaddrs)
1429 return -1;
1430
1431 pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1432 for (pgcnt_t i = 0; i < npages; i++)
1433 {
1434 /*
1435 * prepare pages for mapping into kernel/user space, we need to
1436 * downgrade the exclusive page lock to a shared lock if the
1437 * pages is locked exclusively.
1438 */
1439 if (page_tryupgrade(pp_pages[i]) == 1)
1440 page_downgrade(pp_pages[i]);
1441 pphysaddrs[i] = vbi_page_to_pa(pp_pages, i);
1442 }
1443
1444 return 0;
1445}
1446
1447
1448uint64_t
1449vbi_page_to_pa(page_t **pp_pages, pgcnt_t i)
1450{
1451 pfn_t pfn = page_pptonum(pp_pages[i]);
1452 if (pfn == PFN_INVALID)
1453 panic("vbi_page_to_pa: page_pptonum() failed\n");
1454 return (uint64_t)pfn << PAGESHIFT;
1455}
1456
1457
1458static page_t *
1459vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize)
1460{
1461 /* pgsize only applies when using the freelist */
1462 seg_t kernseg;
1463 kernseg.s_as = &kas;
1464 page_t *ppage = NULL;
1465 if (freelist == 1)
1466 {
1467 ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1468 pgsize, 0 /* flags */, NULL /* local group */);
1469 if (!ppage && use_kflt)
1470 {
1471 ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1472 pgsize, 0x0200 /* PG_KFLT */, NULL /* local group */);
1473 }
1474 }
1475 else
1476 {
1477 /* cachelist */
1478 ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1479 0 /* flags */, NULL /* local group */);
1480 if (!ppage && use_kflt)
1481 {
1482 ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1483 0x0200 /* PG_KFLT */, NULL /* local group */);
1484 }
1485 }
1486 return ppage;
1487}
1488
1489
1490/*
1491 * Large page code.
1492 */
1493
1494page_t *
1495vbi_large_page_alloc(uint64_t *pphys, size_t pgsize)
1496{
1497 pgcnt_t const npages = pgsize >> PAGESHIFT;
1498 page_t *pproot, *pp, *pplist;
1499 pgcnt_t ipage;
1500 caddr_t vaddr;
1501 seg_t kernseg;
1502 int rc;
1503
1504 /*
1505 * Reserve available memory for a large page and create it.
1506 */
1507 rc = page_resv(npages, KM_NOSLEEP);
1508 if (!rc)
1509 return NULL;
1510
1511 rc = page_create_wait(npages, 0 /* flags */);
1512 if (!rc) {
1513 page_unresv(npages);
1514 return NULL;
1515 }
1516
1517 /*
1518 * Get a page off the free list. We set vaddr to 0 since we don't know
1519 * where the memory is going to be mapped.
1520 */
1521 vaddr = NULL;
1522 kernseg.s_as = &kas;
1523 pproot = vbi_page_get_fromlist(1 /* freelist */, vaddr, pgsize);
1524 if (!pproot)
1525 {
1526 page_create_putback(npages);
1527 page_unresv(npages);
1528 return NULL;
1529 }
1530 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1531
1532 /*
1533 * Mark all the sub-pages as non-free and not-hashed-in.
1534 * It is paramount that we destroy the list (before freeing it).
1535 */
1536 pplist = pproot;
1537 for (ipage = 0; ipage < npages; ipage++) {
1538 pp = pplist;
1539 AssertPtr(pp);
1540 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1541 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1542 page_sub(&pplist, pp);
1543 AssertMsg(PP_ISFREE(pp), ("%p\n", pp));
1544 AssertMsg(pp->p_szc == pproot->p_szc, ("%p - %d expected %d \n", pp, pp->p_szc, pproot->p_szc));
1545
1546 PP_CLRFREE(pp);
1547 PP_CLRAGED(pp);
1548 }
1549
1550 *pphys = (uint64_t)page_pptonum(pproot) << PAGESHIFT;
1551 AssertMsg(!(*pphys & (pgsize - 1)), ("%llx %zx\n", *pphys, pgsize));
1552 return pproot;
1553}
1554
1555void
1556vbi_large_page_free(page_t *pproot, size_t pgsize)
1557{
1558 pgcnt_t const npages = pgsize >> PAGESHIFT;
1559 pgcnt_t ipage;
1560
1561 Assert(page_get_pagecnt(pproot->p_szc) == npages);
1562 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1563
1564 /*
1565 * We need to exclusively lock the sub-pages before freeing
1566 * the large one.
1567 */
1568 for (ipage = 0; ipage < npages; ipage++) {
1569 page_t *pp = page_nextn(pproot, ipage);
1570 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1571 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1572 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1573
1574 int rc = page_tryupgrade(pp);
1575 if (!rc) {
1576 page_unlock(pp);
1577 while (!page_lock(pp, SE_EXCL, NULL /* mutex */, P_RECLAIM)) {
1578 /*nothing*/;
1579 }
1580 }
1581 }
1582
1583 /*
1584 * Free the large page and unreserve the memory.
1585 */
1586 page_free_pages(pproot);
1587 page_unresv(npages);
1588}
1589
1590int
1591vbi_large_page_premap(page_t *pproot, size_t pgsize)
1592{
1593 pgcnt_t const npages = pgsize >> PAGESHIFT;
1594 pgcnt_t ipage;
1595
1596 Assert(page_get_pagecnt(pproot->p_szc) == npages);
1597 AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1598
1599 /*
1600 * We need to downgrade the sub-pages from exclusive to shared locking
1601 * because otherwise we cannot <you go figure>.
1602 */
1603 for (ipage = 0; ipage < npages; ipage++) {
1604 page_t *pp = page_nextn(pproot, ipage);
1605 AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1606 ("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1607 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1608
1609 if (page_tryupgrade(pp) == 1)
1610 page_downgrade(pp);
1611 AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1612 }
1613
1614 return 0;
1615}
1616
1617
1618/*
1619 * As more functions are added, they should start with a comment indicating
1620 * the revision and above this point in the file and the revision level should
1621 * be increased. Also change vbi_modlmisc at the top of the file.
1622 *
1623 * NOTE! We'll start care about this if anything in here ever makes it into
1624 * the solaris kernel proper.
1625 */
1626uint_t vbi_revision_level = 9;
1627
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette