vbi.c@ 37948

Last change on this file since 37948 was 37948, checked in by vboxsync, 13 years ago
Runtime/r0drv/Solaris/vbi: temporary t_preempt offset fix.
Property svn:eol-style set to `native`
File size: 34.8 KB

Line
1	/*
2	* CDDL HEADER START
3	*
4	* The contents of this file are subject to the terms of the
5	* Common Development and Distribution License (the "License").
6	* You may not use this file except in compliance with the License.
7	*
8	* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9	* or http://www.opensolaris.org/os/licensing.
10	* See the License for the specific language governing permissions
11	* and limitations under the License.
12	*
13	* When distributing Covered Code, include this CDDL HEADER in each
14	* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15	* If applicable, add the following below this CDDL HEADER, with the
16	* fields enclosed by brackets "[]" replaced with your own identifying
17	* information: Portions Copyright [yyyy] [name of copyright owner]
18	*
19	* CDDL HEADER END
20	*/
21	/*
22	* Copyright 2010-2011 Sun Microsystems, Inc. All rights reserved.
23	* Use is subject to license terms.
24	*/
25
26	/*
27	* Private interfaces for VirtualBox access to Solaris kernel internal
28	* facilities.
29	*
30	* See sys/vbi.h for what each function does.
31	*/
32
33	#include <sys/kmem.h>
34	#include <sys/types.h>
35	#include <sys/mman.h>
36	#include <sys/thread.h>
37	#include <sys/mutex.h>
38	#include <sys/condvar.h>
39	#include <sys/sdt.h>
40	#include <sys/schedctl.h>
41	#include <sys/time.h>
42	#include <sys/sysmacros.h>
43	#include <sys/cmn_err.h>
44	#include <sys/vmsystm.h>
45	#include <sys/cyclic.h>
46	#include <sys/class.h>
47	#include <sys/cpuvar.h>
48	#include <sys/kobj.h>
49	#include <sys/x_call.h>
50	#include <sys/x86_archext.h>
51	#include <vm/hat.h>
52	#include <vm/seg_vn.h>
53	#include <vm/seg_kmem.h>
54	#include <sys/ddi.h>
55	#include <sys/sunddi.h>
56	#include <sys/modctl.h>
57	#include <sys/machparam.h>
58	#include <sys/utsname.h>
59
60	#include <iprt/assert.h>
61
62	#include "vbi.h"
63
64	#define VBIPROC() ((proc_t *)vbi_proc())
65
66	/*
67	* We have to use dl_lookup to find contig_free().
68	*/
69	extern void contig_alloc(size_t, ddi_dma_attr_t , uintptr_t, int);
70	extern void contig_free(void *, size_t);
71	#pragma weak contig_free
72	static void (p_contig_free)(void , size_t) = contig_free;
73
74	/*
75	* We have to use dl_lookup to find kflt_init() and thereby use kernel pages from
76	* the freelists if we no longer get user pages from freelist and cachelists.
77	*/
78	/* Introduced in v9 */
79	static int use_kflt = 0;
80	static page_t *vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize);
81
82
83	/*
84	* Workarounds for running on old versions of solaris with different cross call
85	* interfaces. If we find xc_init_cpu() in the kernel, then just use the defined
86	* interfaces for xc_call() from the include file where the xc_call()
87	* interfaces just takes a pointer to a ulong_t array. The array must be long
88	* enough to hold "ncpus" bits at runtime.
89
90	* The reason for the hacks is that using the type "cpuset_t" is pretty much
91	* impossible from code built outside the Solaris source repository that wants
92	* to run on multiple releases of Solaris.
93	*
94	* For old style xc_call()s, 32 bit solaris and older 64 bit versions use
95	* "ulong_t" as cpuset_t.
96	*
97	* Later versions of 64 bit Solaris used: struct {ulong_t words[x];}
98	* where "x" depends on NCPU.
99	*
100	* We detect the difference in 64 bit support by checking the kernel value of
101	* max_cpuid, which always holds the compiled value of NCPU - 1.
102	*
103	* If Solaris increases NCPU to more than 256, this module will continue
104	* to work on all versions of Solaris as long as the number of installed
105	* CPUs in the machine is <= VBI_NCPU. If VBI_NCPU is increased, this code
106	* has to be re-written some to provide compatibility with older Solaris which
107	* expects cpuset_t to be based on NCPU==256 -- or we discontinue support
108	* of old Nevada/S10.
109	*/
110	static int use_old = 0;
111	static int use_old_with_ulong = 0;
112	static void (p_xc_call)() = (void ()())xc_call;
113
114	#define VBI_NCPU 256
115	#define VBI_SET_WORDS (VBI_NCPU / (sizeof (ulong_t) * 8))
116	typedef struct vbi_cpuset {
117	ulong_t words[VBI_SET_WORDS];
118	} vbi_cpuset_t;
119	#define X_CALL_HIPRI (2) /* for old Solaris interface */
120
121	/*
122	* module linkage stuff
123	*/
124	#if 0
125	static struct modlmisc vbi_modlmisc = {
126	&mod_miscops, "VirtualBox Interfaces V8"
127	};
128
129	static struct modlinkage vbi_modlinkage = {
130	MODREV_1, { (void *)&vbi_modlmisc, NULL }
131	};
132	#endif
133
134	extern uintptr_t kernelbase;
135	#define IS_KERNEL(v) ((uintptr_t)(v) >= kernelbase)
136
137	#if 0
138	static int vbi_verbose = 0;
139
140	#define VBI_VERBOSE(msg) {if (vbi_verbose) cmn_err(CE_WARN, msg);}
141	#endif
142
143	/* Introduced in v8 */
144	static int vbi_is_initialized = 0;
145
146	/* Introduced in v6 */
147	static int vbi_is_nevada = 0;
148
149	#ifdef _LP64
150	/* 64-bit Solaris 10 offsets */
151	/* CPU */
152	static int off_s10_cpu_runrun = 232;
153	static int off_s10_cpu_kprunrun = 233;
154	/* kthread_t */
155	static int off_s10_t_preempt = 42;
156
157	/* 64-bit Solaris 11 (Nevada/OpenSolaris) offsets */
158	/* CPU */
159	static int off_s11_cpu_runrun = 216;
160	static int off_s11_cpu_kprunrun = 217;
161	/* kthread_t */
162	static int off_s11_t_preempt = 42;
163
164	/* 64-bit Solaris 11 snv_166+ offsets (CR 7037143) */
165	static int off_s11_t_preempt_new = 48;
166	#else
167	/* 32-bit Solaris 10 offsets */
168	/* CPU */
169	static int off_s10_cpu_runrun = 124;
170	static int off_s10_cpu_kprunrun = 125;
171	/* kthread_t */
172	static int off_s10_t_preempt = 26;
173
174	/* 32-bit Solaris 11 (Nevada/OpenSolaris) offsets */
175	/* CPU */
176	static int off_s11_cpu_runrun = 112;
177	static int off_s11_cpu_kprunrun = 113;
178	/* kthread_t */
179	static int off_s11_t_preempt = 26;
180	#endif
181
182
183	/* Which offsets will be used */
184	static int off_cpu_runrun = -1;
185	static int off_cpu_kprunrun = -1;
186	static int off_t_preempt = -1;
187
188	#define VBI_T_PREEMPT (((char )curthread + off_t_preempt))
189	#define VBI_CPU_KPRUNRUN (((char )CPU + off_cpu_kprunrun))
190	#define VBI_CPU_RUNRUN (((char )CPU + off_cpu_runrun))
191
192	#undef kpreempt_disable
193	#undef kpreempt_enable
194
195	#define VBI_PREEMPT_DISABLE() \
196	{ \
197	VBI_T_PREEMPT++; \
198	ASSERT(VBI_T_PREEMPT >= 1); \
199	}
200	#define VBI_PREEMPT_ENABLE() \
201	{ \
202	ASSERT(VBI_T_PREEMPT >= 1); \
203	if (--VBI_T_PREEMPT == 0 && \
204	VBI_CPU_RUNRUN) \
205	kpreempt(KPREEMPT_SYNC); \
206	}
207
208	/* End of v6 intro */
209
210	#if 0
211	int
212	_init(void)
213	{
214	int err = vbi_init();
215	if (!err)
216	err = mod_install(&vbi_modlinkage);
217	return (err);
218	}
219	#endif
220
221	int
222	vbi_init(void)
223	{
224	/*
225	* Check to see if this version of virtualbox interface module will work
226	* with the kernel.
227	*/
228	if (kobj_getsymvalue("xc_init_cpu", 1) != NULL) {
229	/*
230	* Our bit vector storage needs to be large enough for the
231	* actual number of CPUs running in the system.
232	*/
233	if (ncpus > VBI_NCPU) {
234	cmn_err(CE_NOTE, "cpu count mismatch.\n");
235	return (EINVAL);
236	}
237	} else {
238	use_old = 1;
239	if (max_cpuid + 1 == sizeof(ulong_t) * 8)
240	use_old_with_ulong = 1;
241	else if (max_cpuid + 1 != VBI_NCPU)
242	{
243	cmn_err(CE_NOTE, "cpuset_t size mismatch. probably too old a kernel.\n");
244	return (EINVAL); /* cpuset_t size mismatch */
245	}
246	}
247
248	/*
249	* In older versions of Solaris contig_free() is a static routine.
250	*/
251	if (p_contig_free == NULL) {
252	p_contig_free = (void ()(void , size_t))
253	kobj_getsymvalue("contig_free", 1);
254	if (p_contig_free == NULL) {
255	cmn_err(CE_NOTE, " contig_free() not found in kernel\n");
256	return (EINVAL);
257	}
258	}
259
260	/*
261	* Use kernel page freelist flags to get pages from kernel page freelists
262	* while allocating physical pages, once the userpages are exhausted.
263	* snv_161+, see @bugref{5632}.
264	*/
265	if (kobj_getsymvalue("kflt_init", 1) != NULL)
266	{
267	int p_kflt_disable = (int)kobj_getsymvalue("kflt_disable", 1); /* amd64 only, on 32-bit kflt's are disabled. */
268	if (p_kflt_disable && *p_kflt_disable == 0)
269	{
270	use_kflt = 1;
271	}
272	}
273
274
275	/*
276	* Check if this is S10 or Nevada
277	*/
278	if (!strncmp(utsname.release, "5.11", sizeof("5.11") - 1)) {
279	/* Nevada detected... */
280	vbi_is_nevada = 1;
281
282	off_cpu_runrun = off_s11_cpu_runrun;
283	off_cpu_kprunrun = off_s11_cpu_kprunrun;
284	off_t_preempt = off_s11_t_preempt;
285
286	#ifdef _LP64
287	/* Only 64-bit kernels */
288	long snv_version = 0;
289	if ( !strncmp(utsname.version, "snv_", 4)
290	&& strlen(utsname.version) > 4)
291	{
292	ddi_strtol(utsname.version + 4, NULL /* endptr */, 0, &snv_version);
293	if (snv_version >= 166)
294	{
295	off_t_preempt = off_s11_t_preempt_new;
296	cmn_err(CE_NOTE, "here\n");
297	}
298
299	cmn_err(CE_NOTE, "Detected S11 version %ld: Preemption offset=%d\n", snv_version, off_t_preempt);
300	}
301	else
302	cmn_err(CE_NOTE, "WARNING!! Cannot determine version. Assuming pre snv_166. Preemption offset=%ld may be busted!\n", off_t_preempt);
303	#endif
304	} else {
305	/* Solaris 10 detected... */
306	vbi_is_nevada = 0;
307
308	off_cpu_runrun = off_s10_cpu_runrun;
309	off_cpu_kprunrun = off_s10_cpu_kprunrun;
310	off_t_preempt = off_s10_t_preempt;
311	}
312
313	/*
314	* Sanity checking...
315	*/
316	/* CPU */
317	char crr = VBI_CPU_RUNRUN;
318	char krr = VBI_CPU_KPRUNRUN;
319	if ( (crr < 0 \|\| crr > 1)
320	\|\| (krr < 0 \|\| krr > 1)) {
321	cmn_err(CE_NOTE, ":CPU structure sanity check failed! OS version mismatch.\n");
322	return EINVAL;
323	}
324
325	/* Thread */
326	char t_preempt = VBI_T_PREEMPT;
327	if (t_preempt < 0 \|\| t_preempt > 32) {
328	cmn_err(CE_NOTE, ":Thread structure sanity check failed! OS version mismatch.\n");
329	return EINVAL;
330	}
331
332	vbi_is_initialized = 1;
333
334	return (0);
335	}
336
337	#if 0
338	int
339	_fini(void)
340	{
341	int err = mod_remove(&vbi_modlinkage);
342	if (err != 0)
343	return (err);
344
345	return (0);
346	}
347
348	int
349	_info(struct modinfo *modinfop)
350	{
351	return (mod_info(&vbi_modlinkage, modinfop));
352	}
353	#endif
354
355
356	static ddi_dma_attr_t base_attr = {
357	DMA_ATTR_V0, /* Version Number */
358	(uint64_t)0, /* lower limit */
359	(uint64_t)0, /* high limit */
360	(uint64_t)0xffffffff, /* counter limit */
361	(uint64_t)PAGESIZE, /* pagesize alignment */
362	(uint64_t)PAGESIZE, /* pagesize burst size */
363	(uint64_t)PAGESIZE, /* pagesize effective DMA size */
364	(uint64_t)0xffffffff, /* max DMA xfer size */
365	(uint64_t)0xffffffff, /* segment boundary */
366	1, /* list length (1 for contiguous) */
367	1, /* device granularity */
368	0 /* bus-specific flags */
369	};
370
371	static void *
372	vbi_internal_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
373	{
374	ddi_dma_attr_t attr;
375	pfn_t pfn;
376	void *ptr;
377	uint_t npages;
378
379	if ((size & PAGEOFFSET) != 0)
380	return (NULL);
381	npages = (size + PAGESIZE - 1) >> PAGESHIFT;
382	if (npages == 0)
383	return (NULL);
384
385	attr = base_attr;
386	attr.dma_attr_addr_hi = *phys;
387	attr.dma_attr_align = alignment;
388	if (!contig)
389	attr.dma_attr_sgllen = npages;
390	ptr = contig_alloc(size, &attr, PAGESIZE, 1);
391
392	if (ptr == NULL) {
393	cmn_err(CE_NOTE, "vbi_internal_alloc() failure for %lu bytes contig=%d", size, contig);
394	return (NULL);
395	}
396
397	pfn = hat_getpfnum(kas.a_hat, (caddr_t)ptr);
398	if (pfn == PFN_INVALID)
399	panic("vbi_contig_alloc(): hat_getpfnum() failed\n");
400	*phys = (uint64_t)pfn << PAGESHIFT;
401	return (ptr);
402	}
403
404	void *
405	vbi_contig_alloc(uint64_t *phys, size_t size)
406	{
407	/* Obsolete */
408	return (vbi_internal_alloc(phys, size, PAGESIZE /* alignment /, 1 / contiguous */));
409	}
410
411	void
412	vbi_contig_free(void *va, size_t size)
413	{
414	/* Obsolete */
415	p_contig_free(va, size);
416	}
417
418	void *
419	vbi_kernel_map(uint64_t pa, size_t size, uint_t prot)
420	{
421	caddr_t va;
422
423	if ((pa & PAGEOFFSET) \|\| (size & PAGEOFFSET)) {
424	cmn_err(CE_NOTE, "vbi_kernel_map() bad pa (0x%lx) or size (%lu)", pa, size);
425	return (NULL);
426	}
427
428	va = vmem_alloc(heap_arena, size, VM_SLEEP);
429
430	hat_devload(kas.a_hat, va, size, (pfn_t)(pa >> PAGESHIFT),
431	prot, HAT_LOAD \| HAT_LOAD_LOCK \| HAT_UNORDERED_OK);
432
433	return (va);
434	}
435
436	void
437	vbi_unmap(void *va, size_t size)
438	{
439	if (IS_KERNEL(va)) {
440	hat_unload(kas.a_hat, va, size, HAT_UNLOAD \| HAT_UNLOAD_UNLOCK);
441	vmem_free(heap_arena, va, size);
442	} else {
443	struct as *as = VBIPROC()->p_as;
444
445	as_rangelock(as);
446	(void) as_unmap(as, va, size);
447	as_rangeunlock(as);
448	}
449	}
450
451	void *
452	vbi_curthread(void)
453	{
454	return (curthread);
455	}
456
457	int
458	vbi_yield(void)
459	{
460	int rv = 0;
461
462	vbi_preempt_disable();
463
464	char tpr = VBI_T_PREEMPT;
465	char kpr = VBI_CPU_KPRUNRUN;
466	if (tpr == 1 && kpr)
467	rv = 1;
468
469	vbi_preempt_enable();
470	return (rv);
471	}
472
473	uint64_t
474	vbi_timer_granularity(void)
475	{
476	return (nsec_per_tick);
477	}
478
479	typedef struct vbi_timer {
480	cyc_handler_t vbi_handler;
481	cyclic_id_t vbi_cyclic;
482	uint64_t vbi_interval;
483	void (*vbi_func)();
484	void *vbi_arg1;
485	void *vbi_arg2;
486	} vbi_timer_t;
487
488	static void
489	vbi_timer_callback(void *arg)
490	{
491	vbi_timer_t *t = arg;
492
493	if (t->vbi_interval == 0)
494	vbi_timer_stop(arg);
495	t->vbi_func(t->vbi_arg1, t->vbi_arg2);
496	}
497
498	void *
499	vbi_timer_create(void callback, void arg1, void *arg2, uint64_t interval)
500	{
501	vbi_timer_t t = kmem_zalloc(sizeof (t), KM_SLEEP);
502
503	t->vbi_func = (void (*)())callback;
504	t->vbi_arg1 = arg1;
505	t->vbi_arg2 = arg2;
506	t->vbi_handler.cyh_func = vbi_timer_callback;
507	t->vbi_handler.cyh_arg = (void *)t;
508	t->vbi_handler.cyh_level = CY_LOCK_LEVEL;
509	t->vbi_cyclic = CYCLIC_NONE;
510	t->vbi_interval = interval;
511	return (t);
512	}
513
514	void
515	vbi_timer_destroy(void *timer)
516	{
517	vbi_timer_t *t = timer;
518	if (t != NULL) {
519	vbi_timer_stop(timer);
520	kmem_free(t, sizeof (*t));
521	}
522	}
523
524	void
525	vbi_timer_start(void *timer, uint64_t when)
526	{
527	vbi_timer_t *t = timer;
528	cyc_time_t fire_time;
529	uint64_t interval = t->vbi_interval;
530
531	mutex_enter(&cpu_lock);
532	when += gethrtime();
533	fire_time.cyt_when = when;
534	if (interval == 0)
535	fire_time.cyt_interval = when;
536	else
537	fire_time.cyt_interval = interval;
538	t->vbi_cyclic = cyclic_add(&t->vbi_handler, &fire_time);
539	mutex_exit(&cpu_lock);
540	}
541
542	void
543	vbi_timer_stop(void *timer)
544	{
545	vbi_timer_t *t = timer;
546
547	if (t->vbi_cyclic == CYCLIC_NONE)
548	return;
549	mutex_enter(&cpu_lock);
550	if (t->vbi_cyclic != CYCLIC_NONE) {
551	cyclic_remove(t->vbi_cyclic);
552	t->vbi_cyclic = CYCLIC_NONE;
553	}
554	mutex_exit(&cpu_lock);
555	}
556
557	uint64_t
558	vbi_tod(void)
559	{
560	timestruc_t ts;
561
562	mutex_enter(&tod_lock);
563	ts = tod_get();
564	mutex_exit(&tod_lock);
565	return ((uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec);
566	}
567
568
569	void *
570	vbi_proc(void)
571	{
572	proc_t *p;
573	drv_getparm(UPROCP, &p);
574	return (p);
575	}
576
577	void
578	vbi_set_priority(void *thread, int priority)
579	{
580	kthread_t *t = thread;
581
582	thread_lock(t);
583	(void) thread_change_pri(t, priority, 0);
584	thread_unlock(t);
585	}
586
587	void *
588	vbi_thread_create(void (func)(void ), void *arg, size_t len, int priority)
589	{
590	kthread_t *t;
591
592	t = thread_create(NULL, NULL, (void (*)())func, arg, len,
593	VBIPROC(), TS_RUN, priority);
594	return (t);
595	}
596
597	void
598	vbi_thread_exit(void)
599	{
600	thread_exit();
601	}
602
603	void *
604	vbi_text_alloc(size_t size)
605	{
606	return (segkmem_alloc(heaptext_arena, size, KM_SLEEP));
607	}
608
609	void
610	vbi_text_free(void *va, size_t size)
611	{
612	segkmem_free(heaptext_arena, va, size);
613	}
614
615	int
616	vbi_cpu_id(void)
617	{
618	return (CPU->cpu_id);
619	}
620
621	int
622	vbi_max_cpu_id(void)
623	{
624	return (max_cpuid);
625	}
626
627	int
628	vbi_cpu_maxcount(void)
629	{
630	return (max_cpuid + 1);
631	}
632
633	int
634	vbi_cpu_count(void)
635	{
636	return (ncpus);
637	}
638
639	int
640	vbi_cpu_online(int c)
641	{
642	int x;
643
644	mutex_enter(&cpu_lock);
645	x = cpu_is_online(cpu[c]);
646	mutex_exit(&cpu_lock);
647	return (x);
648	}
649
650	void
651	vbi_preempt_disable(void)
652	{
653	VBI_PREEMPT_DISABLE();
654	}
655
656	void
657	vbi_preempt_enable(void)
658	{
659	VBI_PREEMPT_ENABLE();
660	}
661
662	void
663	vbi_execute_on_all(void func, void arg)
664	{
665	vbi_cpuset_t set;
666	int i;
667
668	for (i = 0; i < VBI_SET_WORDS; ++i)
669	set.words[i] = (ulong_t)-1L;
670	if (use_old) {
671	if (use_old_with_ulong) {
672	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
673	set.words[0], (xc_func_t)func);
674	} else {
675	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
676	set, (xc_func_t)func);
677	}
678	} else {
679	xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
680	}
681	}
682
683	void
684	vbi_execute_on_others(void func, void arg)
685	{
686	vbi_cpuset_t set;
687	int i;
688
689	for (i = 0; i < VBI_SET_WORDS; ++i)
690	set.words[i] = (ulong_t)-1L;
691	BT_CLEAR(set.words, vbi_cpu_id());
692	if (use_old) {
693	if (use_old_with_ulong) {
694	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
695	set.words[0], (xc_func_t)func);
696	} else {
697	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
698	set, (xc_func_t)func);
699	}
700	} else {
701	xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
702	}
703	}
704
705	void
706	vbi_execute_on_one(void func, void arg, int c)
707	{
708	vbi_cpuset_t set;
709	int i;
710
711	for (i = 0; i < VBI_SET_WORDS; ++i)
712	set.words[i] = 0;
713	BT_SET(set.words, c);
714	if (use_old) {
715	if (use_old_with_ulong) {
716	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
717	set.words[0], (xc_func_t)func);
718	} else {
719	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
720	set, (xc_func_t)func);
721	}
722	} else {
723	xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
724	}
725	}
726
727	int
728	vbi_lock_va(void addr, size_t len, int access, void *handle)
729	{
730	faultcode_t err;
731
732	/*
733	* kernel mappings on x86 are always locked, so only handle user.
734	*/
735	*handle = NULL;
736	if (!IS_KERNEL(addr)) {
737	err = as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
738	(caddr_t)addr, len, F_SOFTLOCK, access);
739	if (err != 0) {
740	cmn_err(CE_NOTE, "vbi_lock_va() failed to lock");
741	return (-1);
742	}
743	}
744	return (0);
745	}
746
747	/ARGSUSED/
748	void
749	vbi_unlock_va(void addr, size_t len, int access, void handle)
750	{
751	if (!IS_KERNEL(addr))
752	as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
753	(caddr_t)addr, len, F_SOFTUNLOCK, access);
754	}
755
756	uint64_t
757	vbi_va_to_pa(void *addr)
758	{
759	struct hat *hat;
760	pfn_t pfn;
761	uintptr_t v = (uintptr_t)addr;
762
763	if (IS_KERNEL(v))
764	hat = kas.a_hat;
765	else
766	hat = VBIPROC()->p_as->a_hat;
767	pfn = hat_getpfnum(hat, (caddr_t)(v & PAGEMASK));
768	if (pfn == PFN_INVALID)
769	return (-(uint64_t)1);
770	return (((uint64_t)pfn << PAGESHIFT) \| (v & PAGEOFFSET));
771	}
772
773
774	struct segvbi_crargs {
775	uint64_t *palist;
776	uint_t prot;
777	};
778
779	struct segvbi_data {
780	uint_t prot;
781	};
782
783	static struct seg_ops segvbi_ops;
784
785	static int
786	segvbi_create(struct seg seg, void args)
787	{
788	struct segvbi_crargs *a = args;
789	struct segvbi_data *data;
790	struct as *as = seg->s_as;
791	caddr_t va;
792	ulong_t pgcnt;
793	ulong_t p;
794
795	hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
796	data = kmem_zalloc(sizeof (*data), KM_SLEEP);
797	data->prot = a->prot \| PROT_USER;
798
799	seg->s_ops = &segvbi_ops;
800	seg->s_data = data;
801
802	/*
803	* now load locked mappings to the pages
804	*/
805	va = seg->s_base;
806	pgcnt = (seg->s_size + PAGESIZE - 1) >> PAGESHIFT;
807	for (p = 0; p < pgcnt; ++p, va += PAGESIZE) {
808	hat_devload(as->a_hat, va,
809	PAGESIZE, a->palist[p] >> PAGESHIFT,
810	data->prot \| HAT_UNORDERED_OK, HAT_LOAD \| HAT_LOAD_LOCK);
811	}
812
813	return (0);
814	}
815
816	/*
817	* Duplicate a seg and return new segment in newseg.
818	*/
819	static int
820	segvbi_dup(struct seg seg, struct seg newseg)
821	{
822	struct segvbi_data *data = seg->s_data;
823	struct segvbi_data *ndata;
824
825	ndata = kmem_zalloc(sizeof (*data), KM_SLEEP);
826	ndata->prot = data->prot;
827	newseg->s_ops = &segvbi_ops;
828	newseg->s_data = ndata;
829
830	return (0);
831	}
832
833	static int
834	segvbi_unmap(struct seg *seg, caddr_t addr, size_t len)
835	{
836	if (addr < seg->s_base \|\| addr + len > seg->s_base + seg->s_size \|\|
837	(len & PAGEOFFSET) \|\| ((uintptr_t)addr & PAGEOFFSET))
838	panic("segvbi_unmap");
839
840	if (addr != seg->s_base \|\| len != seg->s_size)
841	return (ENOTSUP);
842
843	hat_unload(seg->s_as->a_hat, addr, len,
844	HAT_UNLOAD_UNMAP \| HAT_UNLOAD_UNLOCK);
845
846	seg_free(seg);
847	return (0);
848	}
849
850	static void
851	segvbi_free(struct seg *seg)
852	{
853	struct segvbi_data *data = seg->s_data;
854	kmem_free(data, sizeof (*data));
855	}
856
857	/*
858	* We would demand fault if the (u)read() path would SEGOP_FAULT()
859	* on buffers mapped in via vbi_user_map() i.e. prefaults before DMA.
860	* Don't fail in such case where we're called directly, see #5047.
861	*/
862	static int
863	segvbi_fault(struct hat hat, struct seg seg, caddr_t addr, size_t len,
864	enum fault_type type, enum seg_rw rw)
865	{
866	return (0);
867	}
868
869	static int
870	segvbi_faulta(struct seg *seg, caddr_t addr)
871	{
872	return (0);
873	}
874
875	static int
876	segvbi_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
877	{
878	return (EACCES);
879	}
880
881	static int
882	segvbi_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
883	{
884	return (EINVAL);
885	}
886
887	static int
888	segvbi_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
889	{
890	return (-1);
891	}
892
893	static int
894	segvbi_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
895	{
896	return (0);
897	}
898
899	static size_t
900	segvbi_incore(struct seg seg, caddr_t addr, size_t len, char vec)
901	{
902	size_t v;
903
904	for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
905	len -= PAGESIZE, v += PAGESIZE)
906	*vec++ = 1;
907	return (v);
908	}
909
910	static int
911	segvbi_lockop(struct seg *seg, caddr_t addr,
912	size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
913	{
914	return (0);
915	}
916
917	static int
918	segvbi_getprot(struct seg seg, caddr_t addr, size_t len, uint_t protv)
919	{
920	struct segvbi_data *data = seg->s_data;
921	size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
922	if (pgno != 0)
923	{
924	do
925	{
926	pgno--;
927	protv[pgno] = data->prot;
928	} while (pgno != 0);
929	}
930	return (0);
931	}
932
933	static u_offset_t
934	segvbi_getoffset(struct seg *seg, caddr_t addr)
935	{
936	return ((uintptr_t)addr - (uintptr_t)seg->s_base);
937	}
938
939	static int
940	segvbi_gettype(struct seg *seg, caddr_t addr)
941	{
942	return (MAP_SHARED);
943	}
944
945	static vnode_t vbivp;
946
947	static int
948	segvbi_getvp(struct seg seg, caddr_t addr, struct vnode *vpp)
949	{
950	*vpp = &vbivp;
951	return (0);
952	}
953
954	static int
955	segvbi_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
956	{
957	return (0);
958	}
959
960	static void
961	segvbi_dump(struct seg *seg)
962	{}
963
964	static int
965	segvbi_pagelock(struct seg *seg, caddr_t addr, size_t len,
966	struct page ***ppp, enum lock_type type, enum seg_rw rw)
967	{
968	return (ENOTSUP);
969	}
970
971	static int
972	segvbi_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
973	{
974	return (ENOTSUP);
975	}
976
977	static int
978	segvbi_getmemid(struct seg seg, caddr_t addr, memid_t memid)
979	{
980	return (ENODEV);
981	}
982
983	static lgrp_mem_policy_info_t *
984	segvbi_getpolicy(struct seg *seg, caddr_t addr)
985	{
986	return (NULL);
987	}
988
989	static int
990	segvbi_capable(struct seg *seg, segcapability_t capability)
991	{
992	return (0);
993	}
994
995	static struct seg_ops segvbi_ops = {
996	segvbi_dup,
997	segvbi_unmap,
998	segvbi_free,
999	segvbi_fault,
1000	segvbi_faulta,
1001	segvbi_setprot,
1002	segvbi_checkprot,
1003	(int (*)())segvbi_kluster,
1004	(size_t ()(struct seg ))NULL, /* swapout */
1005	segvbi_sync,
1006	segvbi_incore,
1007	segvbi_lockop,
1008	segvbi_getprot,
1009	segvbi_getoffset,
1010	segvbi_gettype,
1011	segvbi_getvp,
1012	segvbi_advise,
1013	segvbi_dump,
1014	segvbi_pagelock,
1015	segvbi_setpagesize,
1016	segvbi_getmemid,
1017	segvbi_getpolicy,
1018	segvbi_capable
1019	};
1020
1021
1022
1023	/*
1024	* Interfaces to inject physical pages into user address space
1025	* and later remove them.
1026	*/
1027	int
1028	vbi_user_map(caddr_t va, uint_t prot, uint64_t palist, size_t len)
1029	{
1030	struct as *as = VBIPROC()->p_as;
1031	struct segvbi_crargs args;
1032	int error = 0;
1033
1034	args.palist = palist;
1035	args.prot = prot;
1036	as_rangelock(as);
1037	map_addr(va, len, 0, 0, MAP_SHARED);
1038	if (*va != NULL)
1039	error = as_map(as, *va, len, segvbi_create, &args);
1040	else
1041	error = ENOMEM;
1042	if (error)
1043	cmn_err(CE_NOTE, "vbi_user_map() failed error=%d", error);
1044	as_rangeunlock(as);
1045	return (error);
1046	}
1047
1048
1049	/*
1050	* This is revision 2 of the interface.
1051	*/
1052
1053	struct vbi_cpu_watch {
1054	void (vbi_cpu_func)(void , int, int);
1055	void *vbi_cpu_arg;
1056	};
1057
1058	static int
1059	vbi_watcher(cpu_setup_t state, int icpu, void *arg)
1060	{
1061	vbi_cpu_watch_t *w = arg;
1062	int online;
1063
1064	if (state == CPU_ON)
1065	online = 1;
1066	else if (state == CPU_OFF)
1067	online = 0;
1068	else
1069	return (0);
1070	w->vbi_cpu_func(w->vbi_cpu_arg, icpu, online);
1071	return (0);
1072	}
1073
1074	vbi_cpu_watch_t *
1075	vbi_watch_cpus(void (func)(void , int, int), void *arg, int current_too)
1076	{
1077	int c;
1078	vbi_cpu_watch_t *w;
1079
1080	w = kmem_alloc(sizeof (*w), KM_SLEEP);
1081	w->vbi_cpu_func = func;
1082	w->vbi_cpu_arg = arg;
1083	mutex_enter(&cpu_lock);
1084	register_cpu_setup_func(vbi_watcher, w);
1085	if (current_too) {
1086	for (c = 0; c < ncpus; ++c) {
1087	if (cpu_is_online(cpu[c]))
1088	func(arg, c, 1);
1089	}
1090	}
1091	mutex_exit(&cpu_lock);
1092	return (w);
1093	}
1094
1095	void
1096	vbi_ignore_cpus(vbi_cpu_watch_t *w)
1097	{
1098	mutex_enter(&cpu_lock);
1099	unregister_cpu_setup_func(vbi_watcher, w);
1100	mutex_exit(&cpu_lock);
1101	kmem_free(w, sizeof (*w));
1102	}
1103
1104	/*
1105	* Simple timers are pretty much a pass through to the cyclic subsystem.
1106	*/
1107	struct vbi_stimer {
1108	cyc_handler_t s_handler;
1109	cyc_time_t s_fire_time;
1110	cyclic_id_t s_cyclic;
1111	uint64_t s_tick;
1112	void (s_func)(void , uint64_t);
1113	void *s_arg;
1114	};
1115
1116	static void
1117	vbi_stimer_func(void *arg)
1118	{
1119	vbi_stimer_t *t = arg;
1120	t->s_func(t->s_arg, ++t->s_tick);
1121	}
1122
1123	extern vbi_stimer_t *
1124	vbi_stimer_begin(
1125	void (func)(void , uint64_t),
1126	void *arg,
1127	uint64_t when,
1128	uint64_t interval,
1129	int on_cpu)
1130	{
1131	vbi_stimer_t t = kmem_zalloc(sizeof (t), KM_SLEEP);
1132
1133	t->s_handler.cyh_func = vbi_stimer_func;
1134	t->s_handler.cyh_arg = t;
1135	t->s_handler.cyh_level = CY_LOCK_LEVEL;
1136	t->s_tick = 0;
1137	t->s_func = func;
1138	t->s_arg = arg;
1139
1140	mutex_enter(&cpu_lock);
1141	if (on_cpu != VBI_ANY_CPU && !cpu_is_online(cpu[on_cpu])) {
1142	t = NULL;
1143	goto done;
1144	}
1145
1146	when += gethrtime();
1147	t->s_fire_time.cyt_when = when;
1148	if (interval == 0)
1149	t->s_fire_time.cyt_interval = INT64_MAX - when;
1150	else
1151	t->s_fire_time.cyt_interval = interval;
1152	t->s_cyclic = cyclic_add(&t->s_handler, &t->s_fire_time);
1153	if (on_cpu != VBI_ANY_CPU)
1154	cyclic_bind(t->s_cyclic, cpu[on_cpu], NULL);
1155	done:
1156	mutex_exit(&cpu_lock);
1157	return (t);
1158	}
1159
1160	extern void
1161	vbi_stimer_end(vbi_stimer_t *t)
1162	{
1163	mutex_enter(&cpu_lock);
1164	cyclic_remove(t->s_cyclic);
1165	mutex_exit(&cpu_lock);
1166	kmem_free(t, sizeof (*t));
1167	}
1168
1169	/*
1170	* Global timers are more complicated. They include a counter on the callback,
1171	* that indicates the first call on a given cpu.
1172	*/
1173	struct vbi_gtimer {
1174	uint64_t *g_counters;
1175	void (g_func)(void , uint64_t);
1176	void *g_arg;
1177	uint64_t g_when;
1178	uint64_t g_interval;
1179	cyclic_id_t g_cyclic;
1180	};
1181
1182	static void
1183	vbi_gtimer_func(void *arg)
1184	{
1185	vbi_gtimer_t *t = arg;
1186	t->g_func(t->g_arg, ++t->g_counters[vbi_cpu_id()]);
1187	}
1188
1189	/*
1190	* Whenever a cpu is onlined, need to reset the g_counters[] for it to zero.
1191	*/
1192	static void
1193	vbi_gtimer_online(void arg, cpu_t pcpu, cyc_handler_t h, cyc_time_t ct)
1194	{
1195	vbi_gtimer_t *t = arg;
1196	hrtime_t now;
1197
1198	t->g_counters[pcpu->cpu_id] = 0;
1199	h->cyh_func = vbi_gtimer_func;
1200	h->cyh_arg = t;
1201	h->cyh_level = CY_LOCK_LEVEL;
1202	now = gethrtime();
1203	if (t->g_when < now)
1204	ct->cyt_when = now + t->g_interval / 2;
1205	else
1206	ct->cyt_when = t->g_when;
1207	ct->cyt_interval = t->g_interval;
1208	}
1209
1210
1211	vbi_gtimer_t *
1212	vbi_gtimer_begin(
1213	void (func)(void , uint64_t),
1214	void *arg,
1215	uint64_t when,
1216	uint64_t interval)
1217	{
1218	vbi_gtimer_t *t;
1219	cyc_omni_handler_t omni;
1220
1221	/*
1222	* one shot global timer is not supported yet.
1223	*/
1224	if (interval == 0)
1225	return (NULL);
1226
1227	t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1228	t->g_counters = kmem_zalloc(ncpus * sizeof (uint64_t), KM_SLEEP);
1229	t->g_when = when + gethrtime();
1230	t->g_interval = interval;
1231	t->g_arg = arg;
1232	t->g_func = func;
1233	t->g_cyclic = CYCLIC_NONE;
1234
1235	omni.cyo_online = (void ()(void , cpu_t , cyc_handler_t , cyc_time_t *))vbi_gtimer_online;
1236	omni.cyo_offline = NULL;
1237	omni.cyo_arg = t;
1238
1239	mutex_enter(&cpu_lock);
1240	t->g_cyclic = cyclic_add_omni(&omni);
1241	mutex_exit(&cpu_lock);
1242	return (t);
1243	}
1244
1245	extern void
1246	vbi_gtimer_end(vbi_gtimer_t *t)
1247	{
1248	mutex_enter(&cpu_lock);
1249	cyclic_remove(t->g_cyclic);
1250	mutex_exit(&cpu_lock);
1251	kmem_free(t->g_counters, ncpus * sizeof (uint64_t));
1252	kmem_free(t, sizeof (*t));
1253	}
1254
1255	int
1256	vbi_is_preempt_enabled(void)
1257	{
1258	if (vbi_is_initialized) {
1259	char tpr = VBI_T_PREEMPT;
1260	return (tpr == 0);
1261	} else {
1262	cmn_err(CE_NOTE, "vbi_is_preempt_enabled: called without initializing vbi!\n");
1263	return 1;
1264	}
1265	}
1266
1267	void
1268	vbi_poke_cpu(int c)
1269	{
1270	if (c < ncpus)
1271	poke_cpu(c);
1272	}
1273
1274	/*
1275	* This is revision 5 of the interface.
1276	*/
1277
1278	void *
1279	vbi_lowmem_alloc(uint64_t phys, size_t size)
1280	{
1281	return (vbi_internal_alloc(&phys, size, PAGESIZE /* alignment /, 0 / non-contiguous */));
1282	}
1283
1284	void
1285	vbi_lowmem_free(void *va, size_t size)
1286	{
1287	p_contig_free(va, size);
1288	}
1289
1290	/*
1291	* This is revision 6 of the interface.
1292	*/
1293
1294	int
1295	vbi_is_preempt_pending(void)
1296	{
1297	char crr = VBI_CPU_RUNRUN;
1298	char krr = VBI_CPU_KPRUNRUN;
1299	return crr != 0 \|\| krr != 0;
1300	}
1301
1302	/*
1303	* This is revision 7 of the interface.
1304	*/
1305
1306	void *
1307	vbi_phys_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
1308	{
1309	return (vbi_internal_alloc(phys, size, alignment, contig));
1310	}
1311
1312	void
1313	vbi_phys_free(void *va, size_t size)
1314	{
1315	p_contig_free(va, size);
1316	}
1317
1318
1319	/*
1320	* This is revision 8 of the interface.
1321	*/
1322	static vnode_t vbipagevp;
1323
1324	page_t **
1325	vbi_pages_alloc(uint64_t *phys, size_t size)
1326	{
1327	/*
1328	* the page freelist and cachelist both hold pages that are not mapped into any address space.
1329	* the cachelist is not really free pages but when memory is exhausted they'll be moved to the
1330	* free lists.
1331	* it's the total of the free+cache list that we see on the 'free' column in vmstat.
1332	*/
1333	page_t **pp_pages = NULL;
1334	pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1335
1336	/* reserve available memory for pages */
1337	int rc = page_resv(npages, KM_NOSLEEP);
1338	if (rc)
1339	{
1340	/* create the pages */
1341	rc = page_create_wait(npages, 0 /* flags */);
1342	if (rc)
1343	{
1344	/* alloc space for page_t pointer array */
1345	size_t pp_size = npages * sizeof(page_t *);
1346	pp_pages = kmem_zalloc(pp_size, KM_SLEEP);
1347	if (pp_pages)
1348	{
1349	/*
1350	* get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
1351	* we don't have the 'virtAddr' to which this memory may be mapped.
1352	*/
1353	caddr_t virtAddr = NULL;
1354	for (int64_t i = 0; i < npages; i++, virtAddr += PAGESIZE)
1355	{
1356	/* get a page from the freelists */
1357	page_t ppage = vbi_page_get_fromlist(1 / freelist */, virtAddr, PAGESIZE);
1358	if (!ppage)
1359	{
1360	/* try from the cachelists */
1361	ppage = vbi_page_get_fromlist(2 /* cachelist */, virtAddr, PAGESIZE);
1362	if (!ppage)
1363	{
1364	/* damn */
1365	page_create_putback(npages - i);
1366	while (--i >= 0)
1367	page_free(pp_pages[i], 0 /* don't need, move to tail */);
1368	kmem_free(pp_pages, pp_size);
1369	page_unresv(npages);
1370	return NULL;
1371	}
1372
1373	/* remove association with the vnode for pages from the cachelist */
1374	if (!PP_ISAGED(ppage))
1375	page_hashout(ppage, NULL /* mutex */);
1376	}
1377
1378	PP_CLRFREE(ppage); /* Page is not free */
1379	PP_CLRAGED(ppage); /* Page is not hashed in */
1380	pp_pages[i] = ppage;
1381	}
1382
1383	/*
1384	* we now have the pages locked exclusively, before they are mapped in
1385	* we must downgrade the lock.
1386	*/
1387	*phys = (uint64_t)page_pptonum(pp_pages[0]) << PAGESHIFT;
1388	return pp_pages;
1389	}
1390
1391	page_create_putback(npages);
1392	}
1393
1394	page_unresv(npages);
1395	}
1396
1397	return NULL;
1398	}
1399
1400
1401	void
1402	vbi_pages_free(page_t **pp_pages, size_t size)
1403	{
1404	pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1405	size_t pp_size = npages * sizeof(page_t *);
1406	for (pgcnt_t i = 0; i < npages; i++)
1407	{
1408	/* we need to exclusive lock the pages before freeing them */
1409	int rc = page_tryupgrade(pp_pages[i]);
1410	if (!rc)
1411	{
1412	page_unlock(pp_pages[i]);
1413	while (!page_lock(pp_pages[i], SE_EXCL, NULL /* mutex */, P_RECLAIM))
1414	;
1415	}
1416
1417	page_free(pp_pages[i], 0 /* don't need, move to tail */);
1418	}
1419
1420	kmem_free(pp_pages, pp_size);
1421	page_unresv(npages);
1422	}
1423
1424
1425	int
1426	vbi_pages_premap(page_t *pp_pages, size_t size, uint64_t pphysaddrs)
1427	{
1428	if (!pphysaddrs)
1429	return -1;
1430
1431	pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1432	for (pgcnt_t i = 0; i < npages; i++)
1433	{
1434	/*
1435	* prepare pages for mapping into kernel/user space, we need to
1436	* downgrade the exclusive page lock to a shared lock if the
1437	* pages is locked exclusively.
1438	*/
1439	if (page_tryupgrade(pp_pages[i]) == 1)
1440	page_downgrade(pp_pages[i]);
1441	pphysaddrs[i] = vbi_page_to_pa(pp_pages, i);
1442	}
1443
1444	return 0;
1445	}
1446
1447
1448	uint64_t
1449	vbi_page_to_pa(page_t **pp_pages, pgcnt_t i)
1450	{
1451	pfn_t pfn = page_pptonum(pp_pages[i]);
1452	if (pfn == PFN_INVALID)
1453	panic("vbi_page_to_pa: page_pptonum() failed\n");
1454	return (uint64_t)pfn << PAGESHIFT;
1455	}
1456
1457
1458	static page_t *
1459	vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize)
1460	{
1461	/* pgsize only applies when using the freelist */
1462	seg_t kernseg;
1463	kernseg.s_as = &kas;
1464	page_t *ppage = NULL;
1465	if (freelist == 1)
1466	{
1467	ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1468	pgsize, 0 /* flags /, NULL / local group */);
1469	if (!ppage && use_kflt)
1470	{
1471	ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1472	pgsize, 0x0200 /* PG_KFLT /, NULL / local group */);
1473	}
1474	}
1475	else
1476	{
1477	/* cachelist */
1478	ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1479	0 /* flags /, NULL / local group */);
1480	if (!ppage && use_kflt)
1481	{
1482	ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1483	0x0200 /* PG_KFLT /, NULL / local group */);
1484	}
1485	}
1486	return ppage;
1487	}
1488
1489
1490	/*
1491	* Large page code.
1492	*/
1493
1494	page_t *
1495	vbi_large_page_alloc(uint64_t *pphys, size_t pgsize)
1496	{
1497	pgcnt_t const npages = pgsize >> PAGESHIFT;
1498	page_t pproot, pp, *pplist;
1499	pgcnt_t ipage;
1500	caddr_t vaddr;
1501	seg_t kernseg;
1502	int rc;
1503
1504	/*
1505	* Reserve available memory for a large page and create it.
1506	*/
1507	rc = page_resv(npages, KM_NOSLEEP);
1508	if (!rc)
1509	return NULL;
1510
1511	rc = page_create_wait(npages, 0 /* flags */);
1512	if (!rc) {
1513	page_unresv(npages);
1514	return NULL;
1515	}
1516
1517	/*
1518	* Get a page off the free list. We set vaddr to 0 since we don't know
1519	* where the memory is going to be mapped.
1520	*/
1521	vaddr = NULL;
1522	kernseg.s_as = &kas;
1523	pproot = vbi_page_get_fromlist(1 /* freelist */, vaddr, pgsize);
1524	if (!pproot)
1525	{
1526	page_create_putback(npages);
1527	page_unresv(npages);
1528	return NULL;
1529	}
1530	AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1531
1532	/*
1533	* Mark all the sub-pages as non-free and not-hashed-in.
1534	* It is paramount that we destroy the list (before freeing it).
1535	*/
1536	pplist = pproot;
1537	for (ipage = 0; ipage < npages; ipage++) {
1538	pp = pplist;
1539	AssertPtr(pp);
1540	AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1541	("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1542	page_sub(&pplist, pp);
1543	AssertMsg(PP_ISFREE(pp), ("%p\n", pp));
1544	AssertMsg(pp->p_szc == pproot->p_szc, ("%p - %d expected %d \n", pp, pp->p_szc, pproot->p_szc));
1545
1546	PP_CLRFREE(pp);
1547	PP_CLRAGED(pp);
1548	}
1549
1550	*pphys = (uint64_t)page_pptonum(pproot) << PAGESHIFT;
1551	AssertMsg(!(pphys & (pgsize - 1)), ("%llx %zx\n", pphys, pgsize));
1552	return pproot;
1553	}
1554
1555	void
1556	vbi_large_page_free(page_t *pproot, size_t pgsize)
1557	{
1558	pgcnt_t const npages = pgsize >> PAGESHIFT;
1559	pgcnt_t ipage;
1560
1561	Assert(page_get_pagecnt(pproot->p_szc) == npages);
1562	AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1563
1564	/*
1565	* We need to exclusively lock the sub-pages before freeing
1566	* the large one.
1567	*/
1568	for (ipage = 0; ipage < npages; ipage++) {
1569	page_t *pp = page_nextn(pproot, ipage);
1570	AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1571	("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1572	AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1573
1574	int rc = page_tryupgrade(pp);
1575	if (!rc) {
1576	page_unlock(pp);
1577	while (!page_lock(pp, SE_EXCL, NULL /* mutex */, P_RECLAIM)) {
1578	/nothing/;
1579	}
1580	}
1581	}
1582
1583	/*
1584	* Free the large page and unreserve the memory.
1585	*/
1586	page_free_pages(pproot);
1587	page_unresv(npages);
1588	}
1589
1590	int
1591	vbi_large_page_premap(page_t *pproot, size_t pgsize)
1592	{
1593	pgcnt_t const npages = pgsize >> PAGESHIFT;
1594	pgcnt_t ipage;
1595
1596	Assert(page_get_pagecnt(pproot->p_szc) == npages);
1597	AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1598
1599	/*
1600	* We need to downgrade the sub-pages from exclusive to shared locking
1601	* because otherwise we cannot <you go figure>.
1602	*/
1603	for (ipage = 0; ipage < npages; ipage++) {
1604	page_t *pp = page_nextn(pproot, ipage);
1605	AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1606	("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1607	AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1608
1609	if (page_tryupgrade(pp) == 1)
1610	page_downgrade(pp);
1611	AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1612	}
1613
1614	return 0;
1615	}
1616
1617
1618	/*
1619	* As more functions are added, they should start with a comment indicating
1620	* the revision and above this point in the file and the revision level should
1621	* be increased. Also change vbi_modlmisc at the top of the file.
1622	*
1623	* NOTE! We'll start care about this if anything in here ever makes it into
1624	* the solaris kernel proper.
1625	*/
1626	uint_t vbi_revision_level = 9;
1627

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/r0drv/solaris/vbi/i86pc/os/vbi.c@ 37948

Download in other formats: