vbi.c@ 37947

Last change on this file since 37947 was 37947, checked in by vboxsync, 13 years ago
Runtime/r0drv/Solaris/vbi: temporary fix for modified t_preempt offset in newer S11 kernels. Also some cleanup.
Property svn:eol-style set to `native`
File size: 34.8 KB

Line
1	/*
2	* CDDL HEADER START
3	*
4	* The contents of this file are subject to the terms of the
5	* Common Development and Distribution License (the "License").
6	* You may not use this file except in compliance with the License.
7	*
8	* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9	* or http://www.opensolaris.org/os/licensing.
10	* See the License for the specific language governing permissions
11	* and limitations under the License.
12	*
13	* When distributing Covered Code, include this CDDL HEADER in each
14	* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15	* If applicable, add the following below this CDDL HEADER, with the
16	* fields enclosed by brackets "[]" replaced with your own identifying
17	* information: Portions Copyright [yyyy] [name of copyright owner]
18	*
19	* CDDL HEADER END
20	*/
21	/*
22	* Copyright 2010-2011 Sun Microsystems, Inc. All rights reserved.
23	* Use is subject to license terms.
24	*/
25
26	/*
27	* Private interfaces for VirtualBox access to Solaris kernel internal
28	* facilities.
29	*
30	* See sys/vbi.h for what each function does.
31	*/
32
33	#include <sys/kmem.h>
34	#include <sys/types.h>
35	#include <sys/mman.h>
36	#include <sys/thread.h>
37	#include <sys/mutex.h>
38	#include <sys/condvar.h>
39	#include <sys/sdt.h>
40	#include <sys/schedctl.h>
41	#include <sys/time.h>
42	#include <sys/sysmacros.h>
43	#include <sys/cmn_err.h>
44	#include <sys/vmsystm.h>
45	#include <sys/cyclic.h>
46	#include <sys/class.h>
47	#include <sys/cpuvar.h>
48	#include <sys/kobj.h>
49	#include <sys/x_call.h>
50	#include <sys/x86_archext.h>
51	#include <vm/hat.h>
52	#include <vm/seg_vn.h>
53	#include <vm/seg_kmem.h>
54	#include <sys/ddi.h>
55	#include <sys/sunddi.h>
56	#include <sys/modctl.h>
57	#include <sys/machparam.h>
58	#include <sys/utsname.h>
59
60	#include <iprt/assert.h>
61
62	#include "vbi.h"
63
64	#define VBIPROC() ((proc_t *)vbi_proc())
65
66	/*
67	* We have to use dl_lookup to find contig_free().
68	*/
69	extern void contig_alloc(size_t, ddi_dma_attr_t , uintptr_t, int);
70	extern void contig_free(void *, size_t);
71	#pragma weak contig_free
72	static void (p_contig_free)(void , size_t) = contig_free;
73
74	/*
75	* We have to use dl_lookup to find kflt_init() and thereby use kernel pages from
76	* the freelists if we no longer get user pages from freelist and cachelists.
77	*/
78	/* Introduced in v9 */
79	static int use_kflt = 0;
80	static page_t *vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize);
81
82
83	/*
84	* Workarounds for running on old versions of solaris with different cross call
85	* interfaces. If we find xc_init_cpu() in the kernel, then just use the defined
86	* interfaces for xc_call() from the include file where the xc_call()
87	* interfaces just takes a pointer to a ulong_t array. The array must be long
88	* enough to hold "ncpus" bits at runtime.
89
90	* The reason for the hacks is that using the type "cpuset_t" is pretty much
91	* impossible from code built outside the Solaris source repository that wants
92	* to run on multiple releases of Solaris.
93	*
94	* For old style xc_call()s, 32 bit solaris and older 64 bit versions use
95	* "ulong_t" as cpuset_t.
96	*
97	* Later versions of 64 bit Solaris used: struct {ulong_t words[x];}
98	* where "x" depends on NCPU.
99	*
100	* We detect the difference in 64 bit support by checking the kernel value of
101	* max_cpuid, which always holds the compiled value of NCPU - 1.
102	*
103	* If Solaris increases NCPU to more than 256, this module will continue
104	* to work on all versions of Solaris as long as the number of installed
105	* CPUs in the machine is <= VBI_NCPU. If VBI_NCPU is increased, this code
106	* has to be re-written some to provide compatibility with older Solaris which
107	* expects cpuset_t to be based on NCPU==256 -- or we discontinue support
108	* of old Nevada/S10.
109	*/
110	static int use_old = 0;
111	static int use_old_with_ulong = 0;
112	static void (p_xc_call)() = (void ()())xc_call;
113
114	#define VBI_NCPU 256
115	#define VBI_SET_WORDS (VBI_NCPU / (sizeof (ulong_t) * 8))
116	typedef struct vbi_cpuset {
117	ulong_t words[VBI_SET_WORDS];
118	} vbi_cpuset_t;
119	#define X_CALL_HIPRI (2) /* for old Solaris interface */
120
121	/*
122	* module linkage stuff
123	*/
124	#if 0
125	static struct modlmisc vbi_modlmisc = {
126	&mod_miscops, "VirtualBox Interfaces V8"
127	};
128
129	static struct modlinkage vbi_modlinkage = {
130	MODREV_1, { (void *)&vbi_modlmisc, NULL }
131	};
132	#endif
133
134	extern uintptr_t kernelbase;
135	#define IS_KERNEL(v) ((uintptr_t)(v) >= kernelbase)
136
137	#if 0
138	static int vbi_verbose = 0;
139
140	#define VBI_VERBOSE(msg) {if (vbi_verbose) cmn_err(CE_WARN, msg);}
141	#endif
142
143	/* Introduced in v8 */
144	static int vbi_is_initialized = 0;
145
146	/* Introduced in v6 */
147	static int vbi_is_nevada = 0;
148
149	#ifdef _LP64
150	/* 64-bit Solaris 10 offsets */
151	/* CPU */
152	static int off_s10_cpu_runrun = 232;
153	static int off_s10_cpu_kprunrun = 233;
154	/* kthread_t */
155	static int off_s10_t_preempt = 42;
156
157	/* 64-bit Solaris 11 (Nevada/OpenSolaris) offsets */
158	/* CPU */
159	static int off_s11_cpu_runrun = 216;
160	static int off_s11_cpu_kprunrun = 217;
161	/* kthread_t */
162	static int off_s11_t_preempt = 42;
163
164	/* 64-bit Solaris 11 snv_166+ offsets (CR 7037143) */
165	static int off_s11_t_preempt_new = 48;
166	#else
167	/* 32-bit Solaris 10 offsets */
168	/* CPU */
169	static int off_s10_cpu_runrun = 124;
170	static int off_s10_cpu_kprunrun = 125;
171	/* kthread_t */
172	static int off_s10_t_preempt = 26;
173
174	/* 32-bit Solaris 11 (Nevada/OpenSolaris) offsets */
175	/* CPU */
176	static int off_s11_cpu_runrun = 112;
177	static int off_s11_cpu_kprunrun = 113;
178	/* kthread_t */
179	static int off_s11_t_preempt = 26;
180	#endif
181
182
183	/* Which offsets will be used */
184	static int off_cpu_runrun = -1;
185	static int off_cpu_kprunrun = -1;
186	static int off_t_preempt = -1;
187
188	#define VBI_T_PREEMPT (((char )curthread + off_t_preempt))
189	#define VBI_CPU_KPRUNRUN (((char )CPU + off_cpu_kprunrun))
190	#define VBI_CPU_RUNRUN (((char )CPU + off_cpu_runrun))
191
192	#undef kpreempt_disable
193	#undef kpreempt_enable
194
195	#define VBI_PREEMPT_DISABLE() \
196	{ \
197	VBI_T_PREEMPT++; \
198	ASSERT(VBI_T_PREEMPT >= 1); \
199	}
200	#define VBI_PREEMPT_ENABLE() \
201	{ \
202	ASSERT(VBI_T_PREEMPT >= 1); \
203	if (--VBI_T_PREEMPT == 0 && \
204	VBI_CPU_RUNRUN) \
205	kpreempt(KPREEMPT_SYNC); \
206	}
207
208	/* End of v6 intro */
209
210	#if 0
211	int
212	_init(void)
213	{
214	int err = vbi_init();
215	if (!err)
216	err = mod_install(&vbi_modlinkage);
217	return (err);
218	}
219	#endif
220
221	int
222	vbi_init(void)
223	{
224	/*
225	* Check to see if this version of virtualbox interface module will work
226	* with the kernel.
227	*/
228	if (kobj_getsymvalue("xc_init_cpu", 1) != NULL) {
229	/*
230	* Our bit vector storage needs to be large enough for the
231	* actual number of CPUs running in the system.
232	*/
233	if (ncpus > VBI_NCPU) {
234	cmn_err(CE_NOTE, "cpu count mismatch.\n");
235	return (EINVAL);
236	}
237	} else {
238	use_old = 1;
239	if (max_cpuid + 1 == sizeof(ulong_t) * 8)
240	use_old_with_ulong = 1;
241	else if (max_cpuid + 1 != VBI_NCPU)
242	{
243	cmn_err(CE_NOTE, "cpuset_t size mismatch. probably too old a kernel.\n");
244	return (EINVAL); /* cpuset_t size mismatch */
245	}
246	}
247
248	/*
249	* In older versions of Solaris contig_free() is a static routine.
250	*/
251	if (p_contig_free == NULL) {
252	p_contig_free = (void ()(void , size_t))
253	kobj_getsymvalue("contig_free", 1);
254	if (p_contig_free == NULL) {
255	cmn_err(CE_NOTE, " contig_free() not found in kernel\n");
256	return (EINVAL);
257	}
258	}
259
260	/*
261	* Use kernel page freelist flags to get pages from kernel page freelists
262	* while allocating physical pages, once the userpages are exhausted.
263	* snv_161+, see @bugref{5632}.
264	*/
265	if (kobj_getsymvalue("kflt_init", 1) != NULL)
266	{
267	int p_kflt_disable = (int)kobj_getsymvalue("kflt_disable", 1); /* amd64 only, on 32-bit kflt's are disabled. */
268	if (p_kflt_disable && *p_kflt_disable == 0)
269	{
270	use_kflt = 1;
271	}
272	}
273
274
275	/*
276	* Check if this is S10 or Nevada
277	*/
278	if (!strncmp(utsname.release, "5.11", sizeof("5.11") - 1)) {
279	/* Nevada detected... */
280	vbi_is_nevada = 1;
281
282	off_cpu_runrun = off_s11_cpu_runrun;
283	off_cpu_kprunrun = off_s11_cpu_kprunrun;
284	off_t_preempt = off_s11_t_preempt;
285
286	#ifdef _LP64
287	/* Only 64-bit kernels */
288	long snv_version = 0;
289	if (!strncmp(utsname.version, "snv_", 4))
290	{
291	ddi_strtol(utsname.version + 4, NULL /* endptr */, 0, &snv_version);
292	if (snv_version >= 166)
293	{
294	off_t_preempt = off_s11_t_preempt_new;
295	cmn_err(CE_NOTE, "here\n");
296	}
297
298	cmn_err(CE_NOTE, "Detected S11 version %ld: Preemption offset=%d\n", snv_version, off_t_preempt);
299	}
300	else
301	cmn_err(CE_NOTE, "WARNING!! Cannot determine version. Assuming pre snv_166. Preemption offset=%ld may be busted!\n", off_t_preempt);
302	#endif
303	} else {
304	/* Solaris 10 detected... */
305	vbi_is_nevada = 0;
306
307	off_cpu_runrun = off_s10_cpu_runrun;
308	off_cpu_kprunrun = off_s10_cpu_kprunrun;
309	off_t_preempt = off_s10_t_preempt;
310	}
311
312	/*
313	* Sanity checking...
314	*/
315	/* CPU */
316	char crr = VBI_CPU_RUNRUN;
317	char krr = VBI_CPU_KPRUNRUN;
318	if ( (crr < 0 \|\| crr > 1)
319	\|\| (krr < 0 \|\| krr > 1)) {
320	cmn_err(CE_NOTE, ":CPU structure sanity check failed! OS version mismatch.\n");
321	return EINVAL;
322	}
323
324	/* Thread */
325	char t_preempt = VBI_T_PREEMPT;
326	if (t_preempt < 0 \|\| t_preempt > 32) {
327	cmn_err(CE_NOTE, ":Thread structure sanity check failed! OS version mismatch.\n");
328	return EINVAL;
329	}
330
331	vbi_is_initialized = 1;
332
333	return (0);
334	}
335
336	#if 0
337	int
338	_fini(void)
339	{
340	int err = mod_remove(&vbi_modlinkage);
341	if (err != 0)
342	return (err);
343
344	return (0);
345	}
346
347	int
348	_info(struct modinfo *modinfop)
349	{
350	return (mod_info(&vbi_modlinkage, modinfop));
351	}
352	#endif
353
354
355	static ddi_dma_attr_t base_attr = {
356	DMA_ATTR_V0, /* Version Number */
357	(uint64_t)0, /* lower limit */
358	(uint64_t)0, /* high limit */
359	(uint64_t)0xffffffff, /* counter limit */
360	(uint64_t)PAGESIZE, /* pagesize alignment */
361	(uint64_t)PAGESIZE, /* pagesize burst size */
362	(uint64_t)PAGESIZE, /* pagesize effective DMA size */
363	(uint64_t)0xffffffff, /* max DMA xfer size */
364	(uint64_t)0xffffffff, /* segment boundary */
365	1, /* list length (1 for contiguous) */
366	1, /* device granularity */
367	0 /* bus-specific flags */
368	};
369
370	static void *
371	vbi_internal_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
372	{
373	ddi_dma_attr_t attr;
374	pfn_t pfn;
375	void *ptr;
376	uint_t npages;
377
378	if ((size & PAGEOFFSET) != 0)
379	return (NULL);
380	npages = (size + PAGESIZE - 1) >> PAGESHIFT;
381	if (npages == 0)
382	return (NULL);
383
384	attr = base_attr;
385	attr.dma_attr_addr_hi = *phys;
386	attr.dma_attr_align = alignment;
387	if (!contig)
388	attr.dma_attr_sgllen = npages;
389	ptr = contig_alloc(size, &attr, PAGESIZE, 1);
390
391	if (ptr == NULL) {
392	cmn_err(CE_NOTE, "vbi_internal_alloc() failure for %lu bytes contig=%d", size, contig);
393	return (NULL);
394	}
395
396	pfn = hat_getpfnum(kas.a_hat, (caddr_t)ptr);
397	if (pfn == PFN_INVALID)
398	panic("vbi_contig_alloc(): hat_getpfnum() failed\n");
399	*phys = (uint64_t)pfn << PAGESHIFT;
400	return (ptr);
401	}
402
403	void *
404	vbi_contig_alloc(uint64_t *phys, size_t size)
405	{
406	/* Obsolete */
407	return (vbi_internal_alloc(phys, size, PAGESIZE /* alignment /, 1 / contiguous */));
408	}
409
410	void
411	vbi_contig_free(void *va, size_t size)
412	{
413	/* Obsolete */
414	p_contig_free(va, size);
415	}
416
417	void *
418	vbi_kernel_map(uint64_t pa, size_t size, uint_t prot)
419	{
420	caddr_t va;
421
422	if ((pa & PAGEOFFSET) \|\| (size & PAGEOFFSET)) {
423	cmn_err(CE_NOTE, "vbi_kernel_map() bad pa (0x%lx) or size (%lu)", pa, size);
424	return (NULL);
425	}
426
427	va = vmem_alloc(heap_arena, size, VM_SLEEP);
428
429	hat_devload(kas.a_hat, va, size, (pfn_t)(pa >> PAGESHIFT),
430	prot, HAT_LOAD \| HAT_LOAD_LOCK \| HAT_UNORDERED_OK);
431
432	return (va);
433	}
434
435	void
436	vbi_unmap(void *va, size_t size)
437	{
438	if (IS_KERNEL(va)) {
439	hat_unload(kas.a_hat, va, size, HAT_UNLOAD \| HAT_UNLOAD_UNLOCK);
440	vmem_free(heap_arena, va, size);
441	} else {
442	struct as *as = VBIPROC()->p_as;
443
444	as_rangelock(as);
445	(void) as_unmap(as, va, size);
446	as_rangeunlock(as);
447	}
448	}
449
450	void *
451	vbi_curthread(void)
452	{
453	return (curthread);
454	}
455
456	int
457	vbi_yield(void)
458	{
459	int rv = 0;
460
461	vbi_preempt_disable();
462
463	char tpr = VBI_T_PREEMPT;
464	char kpr = VBI_CPU_KPRUNRUN;
465	if (tpr == 1 && kpr)
466	rv = 1;
467
468	vbi_preempt_enable();
469	return (rv);
470	}
471
472	uint64_t
473	vbi_timer_granularity(void)
474	{
475	return (nsec_per_tick);
476	}
477
478	typedef struct vbi_timer {
479	cyc_handler_t vbi_handler;
480	cyclic_id_t vbi_cyclic;
481	uint64_t vbi_interval;
482	void (*vbi_func)();
483	void *vbi_arg1;
484	void *vbi_arg2;
485	} vbi_timer_t;
486
487	static void
488	vbi_timer_callback(void *arg)
489	{
490	vbi_timer_t *t = arg;
491
492	if (t->vbi_interval == 0)
493	vbi_timer_stop(arg);
494	t->vbi_func(t->vbi_arg1, t->vbi_arg2);
495	}
496
497	void *
498	vbi_timer_create(void callback, void arg1, void *arg2, uint64_t interval)
499	{
500	vbi_timer_t t = kmem_zalloc(sizeof (t), KM_SLEEP);
501
502	t->vbi_func = (void (*)())callback;
503	t->vbi_arg1 = arg1;
504	t->vbi_arg2 = arg2;
505	t->vbi_handler.cyh_func = vbi_timer_callback;
506	t->vbi_handler.cyh_arg = (void *)t;
507	t->vbi_handler.cyh_level = CY_LOCK_LEVEL;
508	t->vbi_cyclic = CYCLIC_NONE;
509	t->vbi_interval = interval;
510	return (t);
511	}
512
513	void
514	vbi_timer_destroy(void *timer)
515	{
516	vbi_timer_t *t = timer;
517	if (t != NULL) {
518	vbi_timer_stop(timer);
519	kmem_free(t, sizeof (*t));
520	}
521	}
522
523	void
524	vbi_timer_start(void *timer, uint64_t when)
525	{
526	vbi_timer_t *t = timer;
527	cyc_time_t fire_time;
528	uint64_t interval = t->vbi_interval;
529
530	mutex_enter(&cpu_lock);
531	when += gethrtime();
532	fire_time.cyt_when = when;
533	if (interval == 0)
534	fire_time.cyt_interval = when;
535	else
536	fire_time.cyt_interval = interval;
537	t->vbi_cyclic = cyclic_add(&t->vbi_handler, &fire_time);
538	mutex_exit(&cpu_lock);
539	}
540
541	void
542	vbi_timer_stop(void *timer)
543	{
544	vbi_timer_t *t = timer;
545
546	if (t->vbi_cyclic == CYCLIC_NONE)
547	return;
548	mutex_enter(&cpu_lock);
549	if (t->vbi_cyclic != CYCLIC_NONE) {
550	cyclic_remove(t->vbi_cyclic);
551	t->vbi_cyclic = CYCLIC_NONE;
552	}
553	mutex_exit(&cpu_lock);
554	}
555
556	uint64_t
557	vbi_tod(void)
558	{
559	timestruc_t ts;
560
561	mutex_enter(&tod_lock);
562	ts = tod_get();
563	mutex_exit(&tod_lock);
564	return ((uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec);
565	}
566
567
568	void *
569	vbi_proc(void)
570	{
571	proc_t *p;
572	drv_getparm(UPROCP, &p);
573	return (p);
574	}
575
576	void
577	vbi_set_priority(void *thread, int priority)
578	{
579	kthread_t *t = thread;
580
581	thread_lock(t);
582	(void) thread_change_pri(t, priority, 0);
583	thread_unlock(t);
584	}
585
586	void *
587	vbi_thread_create(void (func)(void ), void *arg, size_t len, int priority)
588	{
589	kthread_t *t;
590
591	t = thread_create(NULL, NULL, (void (*)())func, arg, len,
592	VBIPROC(), TS_RUN, priority);
593	return (t);
594	}
595
596	void
597	vbi_thread_exit(void)
598	{
599	thread_exit();
600	}
601
602	void *
603	vbi_text_alloc(size_t size)
604	{
605	return (segkmem_alloc(heaptext_arena, size, KM_SLEEP));
606	}
607
608	void
609	vbi_text_free(void *va, size_t size)
610	{
611	segkmem_free(heaptext_arena, va, size);
612	}
613
614	int
615	vbi_cpu_id(void)
616	{
617	return (CPU->cpu_id);
618	}
619
620	int
621	vbi_max_cpu_id(void)
622	{
623	return (max_cpuid);
624	}
625
626	int
627	vbi_cpu_maxcount(void)
628	{
629	return (max_cpuid + 1);
630	}
631
632	int
633	vbi_cpu_count(void)
634	{
635	return (ncpus);
636	}
637
638	int
639	vbi_cpu_online(int c)
640	{
641	int x;
642
643	mutex_enter(&cpu_lock);
644	x = cpu_is_online(cpu[c]);
645	mutex_exit(&cpu_lock);
646	return (x);
647	}
648
649	void
650	vbi_preempt_disable(void)
651	{
652	VBI_PREEMPT_DISABLE();
653	}
654
655	void
656	vbi_preempt_enable(void)
657	{
658	VBI_PREEMPT_ENABLE();
659	}
660
661	void
662	vbi_execute_on_all(void func, void arg)
663	{
664	vbi_cpuset_t set;
665	int i;
666
667	for (i = 0; i < VBI_SET_WORDS; ++i)
668	set.words[i] = (ulong_t)-1L;
669	if (use_old) {
670	if (use_old_with_ulong) {
671	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
672	set.words[0], (xc_func_t)func);
673	} else {
674	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
675	set, (xc_func_t)func);
676	}
677	} else {
678	xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
679	}
680	}
681
682	void
683	vbi_execute_on_others(void func, void arg)
684	{
685	vbi_cpuset_t set;
686	int i;
687
688	for (i = 0; i < VBI_SET_WORDS; ++i)
689	set.words[i] = (ulong_t)-1L;
690	BT_CLEAR(set.words, vbi_cpu_id());
691	if (use_old) {
692	if (use_old_with_ulong) {
693	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
694	set.words[0], (xc_func_t)func);
695	} else {
696	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
697	set, (xc_func_t)func);
698	}
699	} else {
700	xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
701	}
702	}
703
704	void
705	vbi_execute_on_one(void func, void arg, int c)
706	{
707	vbi_cpuset_t set;
708	int i;
709
710	for (i = 0; i < VBI_SET_WORDS; ++i)
711	set.words[i] = 0;
712	BT_SET(set.words, c);
713	if (use_old) {
714	if (use_old_with_ulong) {
715	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
716	set.words[0], (xc_func_t)func);
717	} else {
718	p_xc_call((xc_arg_t)arg, 0, 0, X_CALL_HIPRI,
719	set, (xc_func_t)func);
720	}
721	} else {
722	xc_call((xc_arg_t)arg, 0, 0, &set.words[0], (xc_func_t)func);
723	}
724	}
725
726	int
727	vbi_lock_va(void addr, size_t len, int access, void *handle)
728	{
729	faultcode_t err;
730
731	/*
732	* kernel mappings on x86 are always locked, so only handle user.
733	*/
734	*handle = NULL;
735	if (!IS_KERNEL(addr)) {
736	err = as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
737	(caddr_t)addr, len, F_SOFTLOCK, access);
738	if (err != 0) {
739	cmn_err(CE_NOTE, "vbi_lock_va() failed to lock");
740	return (-1);
741	}
742	}
743	return (0);
744	}
745
746	/ARGSUSED/
747	void
748	vbi_unlock_va(void addr, size_t len, int access, void handle)
749	{
750	if (!IS_KERNEL(addr))
751	as_fault(VBIPROC()->p_as->a_hat, VBIPROC()->p_as,
752	(caddr_t)addr, len, F_SOFTUNLOCK, access);
753	}
754
755	uint64_t
756	vbi_va_to_pa(void *addr)
757	{
758	struct hat *hat;
759	pfn_t pfn;
760	uintptr_t v = (uintptr_t)addr;
761
762	if (IS_KERNEL(v))
763	hat = kas.a_hat;
764	else
765	hat = VBIPROC()->p_as->a_hat;
766	pfn = hat_getpfnum(hat, (caddr_t)(v & PAGEMASK));
767	if (pfn == PFN_INVALID)
768	return (-(uint64_t)1);
769	return (((uint64_t)pfn << PAGESHIFT) \| (v & PAGEOFFSET));
770	}
771
772
773	struct segvbi_crargs {
774	uint64_t *palist;
775	uint_t prot;
776	};
777
778	struct segvbi_data {
779	uint_t prot;
780	};
781
782	static struct seg_ops segvbi_ops;
783
784	static int
785	segvbi_create(struct seg seg, void args)
786	{
787	struct segvbi_crargs *a = args;
788	struct segvbi_data *data;
789	struct as *as = seg->s_as;
790	caddr_t va;
791	ulong_t pgcnt;
792	ulong_t p;
793
794	hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
795	data = kmem_zalloc(sizeof (*data), KM_SLEEP);
796	data->prot = a->prot \| PROT_USER;
797
798	seg->s_ops = &segvbi_ops;
799	seg->s_data = data;
800
801	/*
802	* now load locked mappings to the pages
803	*/
804	va = seg->s_base;
805	pgcnt = (seg->s_size + PAGESIZE - 1) >> PAGESHIFT;
806	for (p = 0; p < pgcnt; ++p, va += PAGESIZE) {
807	hat_devload(as->a_hat, va,
808	PAGESIZE, a->palist[p] >> PAGESHIFT,
809	data->prot \| HAT_UNORDERED_OK, HAT_LOAD \| HAT_LOAD_LOCK);
810	}
811
812	return (0);
813	}
814
815	/*
816	* Duplicate a seg and return new segment in newseg.
817	*/
818	static int
819	segvbi_dup(struct seg seg, struct seg newseg)
820	{
821	struct segvbi_data *data = seg->s_data;
822	struct segvbi_data *ndata;
823
824	ndata = kmem_zalloc(sizeof (*data), KM_SLEEP);
825	ndata->prot = data->prot;
826	newseg->s_ops = &segvbi_ops;
827	newseg->s_data = ndata;
828
829	return (0);
830	}
831
832	static int
833	segvbi_unmap(struct seg *seg, caddr_t addr, size_t len)
834	{
835	if (addr < seg->s_base \|\| addr + len > seg->s_base + seg->s_size \|\|
836	(len & PAGEOFFSET) \|\| ((uintptr_t)addr & PAGEOFFSET))
837	panic("segvbi_unmap");
838
839	if (addr != seg->s_base \|\| len != seg->s_size)
840	return (ENOTSUP);
841
842	hat_unload(seg->s_as->a_hat, addr, len,
843	HAT_UNLOAD_UNMAP \| HAT_UNLOAD_UNLOCK);
844
845	seg_free(seg);
846	return (0);
847	}
848
849	static void
850	segvbi_free(struct seg *seg)
851	{
852	struct segvbi_data *data = seg->s_data;
853	kmem_free(data, sizeof (*data));
854	}
855
856	/*
857	* We would demand fault if the (u)read() path would SEGOP_FAULT()
858	* on buffers mapped in via vbi_user_map() i.e. prefaults before DMA.
859	* Don't fail in such case where we're called directly, see #5047.
860	*/
861	static int
862	segvbi_fault(struct hat hat, struct seg seg, caddr_t addr, size_t len,
863	enum fault_type type, enum seg_rw rw)
864	{
865	return (0);
866	}
867
868	static int
869	segvbi_faulta(struct seg *seg, caddr_t addr)
870	{
871	return (0);
872	}
873
874	static int
875	segvbi_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
876	{
877	return (EACCES);
878	}
879
880	static int
881	segvbi_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
882	{
883	return (EINVAL);
884	}
885
886	static int
887	segvbi_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
888	{
889	return (-1);
890	}
891
892	static int
893	segvbi_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
894	{
895	return (0);
896	}
897
898	static size_t
899	segvbi_incore(struct seg seg, caddr_t addr, size_t len, char vec)
900	{
901	size_t v;
902
903	for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
904	len -= PAGESIZE, v += PAGESIZE)
905	*vec++ = 1;
906	return (v);
907	}
908
909	static int
910	segvbi_lockop(struct seg *seg, caddr_t addr,
911	size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
912	{
913	return (0);
914	}
915
916	static int
917	segvbi_getprot(struct seg seg, caddr_t addr, size_t len, uint_t protv)
918	{
919	struct segvbi_data *data = seg->s_data;
920	size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
921	if (pgno != 0)
922	{
923	do
924	{
925	pgno--;
926	protv[pgno] = data->prot;
927	} while (pgno != 0);
928	}
929	return (0);
930	}
931
932	static u_offset_t
933	segvbi_getoffset(struct seg *seg, caddr_t addr)
934	{
935	return ((uintptr_t)addr - (uintptr_t)seg->s_base);
936	}
937
938	static int
939	segvbi_gettype(struct seg *seg, caddr_t addr)
940	{
941	return (MAP_SHARED);
942	}
943
944	static vnode_t vbivp;
945
946	static int
947	segvbi_getvp(struct seg seg, caddr_t addr, struct vnode *vpp)
948	{
949	*vpp = &vbivp;
950	return (0);
951	}
952
953	static int
954	segvbi_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
955	{
956	return (0);
957	}
958
959	static void
960	segvbi_dump(struct seg *seg)
961	{}
962
963	static int
964	segvbi_pagelock(struct seg *seg, caddr_t addr, size_t len,
965	struct page ***ppp, enum lock_type type, enum seg_rw rw)
966	{
967	return (ENOTSUP);
968	}
969
970	static int
971	segvbi_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
972	{
973	return (ENOTSUP);
974	}
975
976	static int
977	segvbi_getmemid(struct seg seg, caddr_t addr, memid_t memid)
978	{
979	return (ENODEV);
980	}
981
982	static lgrp_mem_policy_info_t *
983	segvbi_getpolicy(struct seg *seg, caddr_t addr)
984	{
985	return (NULL);
986	}
987
988	static int
989	segvbi_capable(struct seg *seg, segcapability_t capability)
990	{
991	return (0);
992	}
993
994	static struct seg_ops segvbi_ops = {
995	segvbi_dup,
996	segvbi_unmap,
997	segvbi_free,
998	segvbi_fault,
999	segvbi_faulta,
1000	segvbi_setprot,
1001	segvbi_checkprot,
1002	(int (*)())segvbi_kluster,
1003	(size_t ()(struct seg ))NULL, /* swapout */
1004	segvbi_sync,
1005	segvbi_incore,
1006	segvbi_lockop,
1007	segvbi_getprot,
1008	segvbi_getoffset,
1009	segvbi_gettype,
1010	segvbi_getvp,
1011	segvbi_advise,
1012	segvbi_dump,
1013	segvbi_pagelock,
1014	segvbi_setpagesize,
1015	segvbi_getmemid,
1016	segvbi_getpolicy,
1017	segvbi_capable
1018	};
1019
1020
1021
1022	/*
1023	* Interfaces to inject physical pages into user address space
1024	* and later remove them.
1025	*/
1026	int
1027	vbi_user_map(caddr_t va, uint_t prot, uint64_t palist, size_t len)
1028	{
1029	struct as *as = VBIPROC()->p_as;
1030	struct segvbi_crargs args;
1031	int error = 0;
1032
1033	args.palist = palist;
1034	args.prot = prot;
1035	as_rangelock(as);
1036	map_addr(va, len, 0, 0, MAP_SHARED);
1037	if (*va != NULL)
1038	error = as_map(as, *va, len, segvbi_create, &args);
1039	else
1040	error = ENOMEM;
1041	if (error)
1042	cmn_err(CE_NOTE, "vbi_user_map() failed error=%d", error);
1043	as_rangeunlock(as);
1044	return (error);
1045	}
1046
1047
1048	/*
1049	* This is revision 2 of the interface.
1050	*/
1051
1052	struct vbi_cpu_watch {
1053	void (vbi_cpu_func)(void , int, int);
1054	void *vbi_cpu_arg;
1055	};
1056
1057	static int
1058	vbi_watcher(cpu_setup_t state, int icpu, void *arg)
1059	{
1060	vbi_cpu_watch_t *w = arg;
1061	int online;
1062
1063	if (state == CPU_ON)
1064	online = 1;
1065	else if (state == CPU_OFF)
1066	online = 0;
1067	else
1068	return (0);
1069	w->vbi_cpu_func(w->vbi_cpu_arg, icpu, online);
1070	return (0);
1071	}
1072
1073	vbi_cpu_watch_t *
1074	vbi_watch_cpus(void (func)(void , int, int), void *arg, int current_too)
1075	{
1076	int c;
1077	vbi_cpu_watch_t *w;
1078
1079	w = kmem_alloc(sizeof (*w), KM_SLEEP);
1080	w->vbi_cpu_func = func;
1081	w->vbi_cpu_arg = arg;
1082	mutex_enter(&cpu_lock);
1083	register_cpu_setup_func(vbi_watcher, w);
1084	if (current_too) {
1085	for (c = 0; c < ncpus; ++c) {
1086	if (cpu_is_online(cpu[c]))
1087	func(arg, c, 1);
1088	}
1089	}
1090	mutex_exit(&cpu_lock);
1091	return (w);
1092	}
1093
1094	void
1095	vbi_ignore_cpus(vbi_cpu_watch_t *w)
1096	{
1097	mutex_enter(&cpu_lock);
1098	unregister_cpu_setup_func(vbi_watcher, w);
1099	mutex_exit(&cpu_lock);
1100	kmem_free(w, sizeof (*w));
1101	}
1102
1103	/*
1104	* Simple timers are pretty much a pass through to the cyclic subsystem.
1105	*/
1106	struct vbi_stimer {
1107	cyc_handler_t s_handler;
1108	cyc_time_t s_fire_time;
1109	cyclic_id_t s_cyclic;
1110	uint64_t s_tick;
1111	void (s_func)(void , uint64_t);
1112	void *s_arg;
1113	};
1114
1115	static void
1116	vbi_stimer_func(void *arg)
1117	{
1118	vbi_stimer_t *t = arg;
1119	t->s_func(t->s_arg, ++t->s_tick);
1120	}
1121
1122	extern vbi_stimer_t *
1123	vbi_stimer_begin(
1124	void (func)(void , uint64_t),
1125	void *arg,
1126	uint64_t when,
1127	uint64_t interval,
1128	int on_cpu)
1129	{
1130	vbi_stimer_t t = kmem_zalloc(sizeof (t), KM_SLEEP);
1131
1132	t->s_handler.cyh_func = vbi_stimer_func;
1133	t->s_handler.cyh_arg = t;
1134	t->s_handler.cyh_level = CY_LOCK_LEVEL;
1135	t->s_tick = 0;
1136	t->s_func = func;
1137	t->s_arg = arg;
1138
1139	mutex_enter(&cpu_lock);
1140	if (on_cpu != VBI_ANY_CPU && !cpu_is_online(cpu[on_cpu])) {
1141	t = NULL;
1142	goto done;
1143	}
1144
1145	when += gethrtime();
1146	t->s_fire_time.cyt_when = when;
1147	if (interval == 0)
1148	t->s_fire_time.cyt_interval = INT64_MAX - when;
1149	else
1150	t->s_fire_time.cyt_interval = interval;
1151	t->s_cyclic = cyclic_add(&t->s_handler, &t->s_fire_time);
1152	if (on_cpu != VBI_ANY_CPU)
1153	cyclic_bind(t->s_cyclic, cpu[on_cpu], NULL);
1154	done:
1155	mutex_exit(&cpu_lock);
1156	return (t);
1157	}
1158
1159	extern void
1160	vbi_stimer_end(vbi_stimer_t *t)
1161	{
1162	mutex_enter(&cpu_lock);
1163	cyclic_remove(t->s_cyclic);
1164	mutex_exit(&cpu_lock);
1165	kmem_free(t, sizeof (*t));
1166	}
1167
1168	/*
1169	* Global timers are more complicated. They include a counter on the callback,
1170	* that indicates the first call on a given cpu.
1171	*/
1172	struct vbi_gtimer {
1173	uint64_t *g_counters;
1174	void (g_func)(void , uint64_t);
1175	void *g_arg;
1176	uint64_t g_when;
1177	uint64_t g_interval;
1178	cyclic_id_t g_cyclic;
1179	};
1180
1181	static void
1182	vbi_gtimer_func(void *arg)
1183	{
1184	vbi_gtimer_t *t = arg;
1185	t->g_func(t->g_arg, ++t->g_counters[vbi_cpu_id()]);
1186	}
1187
1188	/*
1189	* Whenever a cpu is onlined, need to reset the g_counters[] for it to zero.
1190	*/
1191	static void
1192	vbi_gtimer_online(void arg, cpu_t pcpu, cyc_handler_t h, cyc_time_t ct)
1193	{
1194	vbi_gtimer_t *t = arg;
1195	hrtime_t now;
1196
1197	t->g_counters[pcpu->cpu_id] = 0;
1198	h->cyh_func = vbi_gtimer_func;
1199	h->cyh_arg = t;
1200	h->cyh_level = CY_LOCK_LEVEL;
1201	now = gethrtime();
1202	if (t->g_when < now)
1203	ct->cyt_when = now + t->g_interval / 2;
1204	else
1205	ct->cyt_when = t->g_when;
1206	ct->cyt_interval = t->g_interval;
1207	}
1208
1209
1210	vbi_gtimer_t *
1211	vbi_gtimer_begin(
1212	void (func)(void , uint64_t),
1213	void *arg,
1214	uint64_t when,
1215	uint64_t interval)
1216	{
1217	vbi_gtimer_t *t;
1218	cyc_omni_handler_t omni;
1219
1220	/*
1221	* one shot global timer is not supported yet.
1222	*/
1223	if (interval == 0)
1224	return (NULL);
1225
1226	t = kmem_zalloc(sizeof (*t), KM_SLEEP);
1227	t->g_counters = kmem_zalloc(ncpus * sizeof (uint64_t), KM_SLEEP);
1228	t->g_when = when + gethrtime();
1229	t->g_interval = interval;
1230	t->g_arg = arg;
1231	t->g_func = func;
1232	t->g_cyclic = CYCLIC_NONE;
1233
1234	omni.cyo_online = (void ()(void , cpu_t , cyc_handler_t , cyc_time_t *))vbi_gtimer_online;
1235	omni.cyo_offline = NULL;
1236	omni.cyo_arg = t;
1237
1238	mutex_enter(&cpu_lock);
1239	t->g_cyclic = cyclic_add_omni(&omni);
1240	mutex_exit(&cpu_lock);
1241	return (t);
1242	}
1243
1244	extern void
1245	vbi_gtimer_end(vbi_gtimer_t *t)
1246	{
1247	mutex_enter(&cpu_lock);
1248	cyclic_remove(t->g_cyclic);
1249	mutex_exit(&cpu_lock);
1250	kmem_free(t->g_counters, ncpus * sizeof (uint64_t));
1251	kmem_free(t, sizeof (*t));
1252	}
1253
1254	int
1255	vbi_is_preempt_enabled(void)
1256	{
1257	if (vbi_is_initialized) {
1258	char tpr = VBI_T_PREEMPT;
1259	return (tpr == 0);
1260	} else {
1261	cmn_err(CE_NOTE, "vbi_is_preempt_enabled: called without initializing vbi!\n");
1262	return 1;
1263	}
1264	}
1265
1266	void
1267	vbi_poke_cpu(int c)
1268	{
1269	if (c < ncpus)
1270	poke_cpu(c);
1271	}
1272
1273	/*
1274	* This is revision 5 of the interface.
1275	*/
1276
1277	void *
1278	vbi_lowmem_alloc(uint64_t phys, size_t size)
1279	{
1280	return (vbi_internal_alloc(&phys, size, PAGESIZE /* alignment /, 0 / non-contiguous */));
1281	}
1282
1283	void
1284	vbi_lowmem_free(void *va, size_t size)
1285	{
1286	p_contig_free(va, size);
1287	}
1288
1289	/*
1290	* This is revision 6 of the interface.
1291	*/
1292
1293	int
1294	vbi_is_preempt_pending(void)
1295	{
1296	char crr = VBI_CPU_RUNRUN;
1297	char krr = VBI_CPU_KPRUNRUN;
1298	return crr != 0 \|\| krr != 0;
1299	}
1300
1301	/*
1302	* This is revision 7 of the interface.
1303	*/
1304
1305	void *
1306	vbi_phys_alloc(uint64_t *phys, size_t size, uint64_t alignment, int contig)
1307	{
1308	return (vbi_internal_alloc(phys, size, alignment, contig));
1309	}
1310
1311	void
1312	vbi_phys_free(void *va, size_t size)
1313	{
1314	p_contig_free(va, size);
1315	}
1316
1317
1318	/*
1319	* This is revision 8 of the interface.
1320	*/
1321	static vnode_t vbipagevp;
1322
1323	page_t **
1324	vbi_pages_alloc(uint64_t *phys, size_t size)
1325	{
1326	/*
1327	* the page freelist and cachelist both hold pages that are not mapped into any address space.
1328	* the cachelist is not really free pages but when memory is exhausted they'll be moved to the
1329	* free lists.
1330	* it's the total of the free+cache list that we see on the 'free' column in vmstat.
1331	*/
1332	page_t **pp_pages = NULL;
1333	pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1334
1335	/* reserve available memory for pages */
1336	int rc = page_resv(npages, KM_NOSLEEP);
1337	if (rc)
1338	{
1339	/* create the pages */
1340	rc = page_create_wait(npages, 0 /* flags */);
1341	if (rc)
1342	{
1343	/* alloc space for page_t pointer array */
1344	size_t pp_size = npages * sizeof(page_t *);
1345	pp_pages = kmem_zalloc(pp_size, KM_SLEEP);
1346	if (pp_pages)
1347	{
1348	/*
1349	* get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
1350	* we don't have the 'virtAddr' to which this memory may be mapped.
1351	*/
1352	caddr_t virtAddr = NULL;
1353	for (int64_t i = 0; i < npages; i++, virtAddr += PAGESIZE)
1354	{
1355	/* get a page from the freelists */
1356	page_t ppage = vbi_page_get_fromlist(1 / freelist */, virtAddr, PAGESIZE);
1357	if (!ppage)
1358	{
1359	/* try from the cachelists */
1360	ppage = vbi_page_get_fromlist(2 /* cachelist */, virtAddr, PAGESIZE);
1361	if (!ppage)
1362	{
1363	/* damn */
1364	page_create_putback(npages - i);
1365	while (--i >= 0)
1366	page_free(pp_pages[i], 0 /* don't need, move to tail */);
1367	kmem_free(pp_pages, pp_size);
1368	page_unresv(npages);
1369	return NULL;
1370	}
1371
1372	/* remove association with the vnode for pages from the cachelist */
1373	if (!PP_ISAGED(ppage))
1374	page_hashout(ppage, NULL /* mutex */);
1375	}
1376
1377	PP_CLRFREE(ppage); /* Page is not free */
1378	PP_CLRAGED(ppage); /* Page is not hashed in */
1379	pp_pages[i] = ppage;
1380	}
1381
1382	/*
1383	* we now have the pages locked exclusively, before they are mapped in
1384	* we must downgrade the lock.
1385	*/
1386	*phys = (uint64_t)page_pptonum(pp_pages[0]) << PAGESHIFT;
1387	return pp_pages;
1388	}
1389
1390	page_create_putback(npages);
1391	}
1392
1393	page_unresv(npages);
1394	}
1395
1396	return NULL;
1397	}
1398
1399
1400	void
1401	vbi_pages_free(page_t **pp_pages, size_t size)
1402	{
1403	pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1404	size_t pp_size = npages * sizeof(page_t *);
1405	for (pgcnt_t i = 0; i < npages; i++)
1406	{
1407	/* we need to exclusive lock the pages before freeing them */
1408	int rc = page_tryupgrade(pp_pages[i]);
1409	if (!rc)
1410	{
1411	page_unlock(pp_pages[i]);
1412	while (!page_lock(pp_pages[i], SE_EXCL, NULL /* mutex */, P_RECLAIM))
1413	;
1414	}
1415
1416	page_free(pp_pages[i], 0 /* don't need, move to tail */);
1417	}
1418
1419	kmem_free(pp_pages, pp_size);
1420	page_unresv(npages);
1421	}
1422
1423
1424	int
1425	vbi_pages_premap(page_t *pp_pages, size_t size, uint64_t pphysaddrs)
1426	{
1427	if (!pphysaddrs)
1428	return -1;
1429
1430	pgcnt_t npages = (size + PAGESIZE - 1) >> PAGESHIFT;
1431	for (pgcnt_t i = 0; i < npages; i++)
1432	{
1433	/*
1434	* prepare pages for mapping into kernel/user space, we need to
1435	* downgrade the exclusive page lock to a shared lock if the
1436	* pages is locked exclusively.
1437	*/
1438	if (page_tryupgrade(pp_pages[i]) == 1)
1439	page_downgrade(pp_pages[i]);
1440	pphysaddrs[i] = vbi_page_to_pa(pp_pages, i);
1441	}
1442
1443	return 0;
1444	}
1445
1446
1447	uint64_t
1448	vbi_page_to_pa(page_t **pp_pages, pgcnt_t i)
1449	{
1450	pfn_t pfn = page_pptonum(pp_pages[i]);
1451	if (pfn == PFN_INVALID)
1452	panic("vbi_page_to_pa: page_pptonum() failed\n");
1453	return (uint64_t)pfn << PAGESHIFT;
1454	}
1455
1456
1457	static page_t *
1458	vbi_page_get_fromlist(uint_t freelist, caddr_t virtAddr, size_t pgsize)
1459	{
1460	/* pgsize only applies when using the freelist */
1461	seg_t kernseg;
1462	kernseg.s_as = &kas;
1463	page_t *ppage = NULL;
1464	if (freelist == 1)
1465	{
1466	ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1467	pgsize, 0 /* flags /, NULL / local group */);
1468	if (!ppage && use_kflt)
1469	{
1470	ppage = page_get_freelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1471	pgsize, 0x0200 /* PG_KFLT /, NULL / local group */);
1472	}
1473	}
1474	else
1475	{
1476	/* cachelist */
1477	ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1478	0 /* flags /, NULL / local group */);
1479	if (!ppage && use_kflt)
1480	{
1481	ppage = page_get_cachelist(&vbipagevp, 0 /* offset */, &kernseg, virtAddr,
1482	0x0200 /* PG_KFLT /, NULL / local group */);
1483	}
1484	}
1485	return ppage;
1486	}
1487
1488
1489	/*
1490	* Large page code.
1491	*/
1492
1493	page_t *
1494	vbi_large_page_alloc(uint64_t *pphys, size_t pgsize)
1495	{
1496	pgcnt_t const npages = pgsize >> PAGESHIFT;
1497	page_t pproot, pp, *pplist;
1498	pgcnt_t ipage;
1499	caddr_t vaddr;
1500	seg_t kernseg;
1501	int rc;
1502
1503	/*
1504	* Reserve available memory for a large page and create it.
1505	*/
1506	rc = page_resv(npages, KM_NOSLEEP);
1507	if (!rc)
1508	return NULL;
1509
1510	rc = page_create_wait(npages, 0 /* flags */);
1511	if (!rc) {
1512	page_unresv(npages);
1513	return NULL;
1514	}
1515
1516	/*
1517	* Get a page off the free list. We set vaddr to 0 since we don't know
1518	* where the memory is going to be mapped.
1519	*/
1520	vaddr = NULL;
1521	kernseg.s_as = &kas;
1522	pproot = vbi_page_get_fromlist(1 /* freelist */, vaddr, pgsize);
1523	if (!pproot)
1524	{
1525	page_create_putback(npages);
1526	page_unresv(npages);
1527	return NULL;
1528	}
1529	AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1530
1531	/*
1532	* Mark all the sub-pages as non-free and not-hashed-in.
1533	* It is paramount that we destroy the list (before freeing it).
1534	*/
1535	pplist = pproot;
1536	for (ipage = 0; ipage < npages; ipage++) {
1537	pp = pplist;
1538	AssertPtr(pp);
1539	AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1540	("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1541	page_sub(&pplist, pp);
1542	AssertMsg(PP_ISFREE(pp), ("%p\n", pp));
1543	AssertMsg(pp->p_szc == pproot->p_szc, ("%p - %d expected %d \n", pp, pp->p_szc, pproot->p_szc));
1544
1545	PP_CLRFREE(pp);
1546	PP_CLRAGED(pp);
1547	}
1548
1549	*pphys = (uint64_t)page_pptonum(pproot) << PAGESHIFT;
1550	AssertMsg(!(pphys & (pgsize - 1)), ("%llx %zx\n", pphys, pgsize));
1551	return pproot;
1552	}
1553
1554	void
1555	vbi_large_page_free(page_t *pproot, size_t pgsize)
1556	{
1557	pgcnt_t const npages = pgsize >> PAGESHIFT;
1558	pgcnt_t ipage;
1559
1560	Assert(page_get_pagecnt(pproot->p_szc) == npages);
1561	AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1562
1563	/*
1564	* We need to exclusively lock the sub-pages before freeing
1565	* the large one.
1566	*/
1567	for (ipage = 0; ipage < npages; ipage++) {
1568	page_t *pp = page_nextn(pproot, ipage);
1569	AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1570	("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1571	AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1572
1573	int rc = page_tryupgrade(pp);
1574	if (!rc) {
1575	page_unlock(pp);
1576	while (!page_lock(pp, SE_EXCL, NULL /* mutex */, P_RECLAIM)) {
1577	/nothing/;
1578	}
1579	}
1580	}
1581
1582	/*
1583	* Free the large page and unreserve the memory.
1584	*/
1585	page_free_pages(pproot);
1586	page_unresv(npages);
1587	}
1588
1589	int
1590	vbi_large_page_premap(page_t *pproot, size_t pgsize)
1591	{
1592	pgcnt_t const npages = pgsize >> PAGESHIFT;
1593	pgcnt_t ipage;
1594
1595	Assert(page_get_pagecnt(pproot->p_szc) == npages);
1596	AssertMsg(!(page_pptonum(pproot) & (npages - 1)), ("%p:%lx npages=%lx\n", pproot, page_pptonum(pproot), npages));
1597
1598	/*
1599	* We need to downgrade the sub-pages from exclusive to shared locking
1600	* because otherwise we cannot <you go figure>.
1601	*/
1602	for (ipage = 0; ipage < npages; ipage++) {
1603	page_t *pp = page_nextn(pproot, ipage);
1604	AssertMsg(page_pptonum(pp) == ipage + page_pptonum(pproot),
1605	("%p:%lx %lx+%lx\n", pp, page_pptonum(pp), ipage, page_pptonum(pproot)));
1606	AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1607
1608	if (page_tryupgrade(pp) == 1)
1609	page_downgrade(pp);
1610	AssertMsg(!PP_ISFREE(pp), ("%p\n", pp));
1611	}
1612
1613	return 0;
1614	}
1615
1616
1617	/*
1618	* As more functions are added, they should start with a comment indicating
1619	* the revision and above this point in the file and the revision level should
1620	* be increased. Also change vbi_modlmisc at the top of the file.
1621	*
1622	* NOTE! We'll start care about this if anything in here ever makes it into
1623	* the solaris kernel proper.
1624	*/
1625	uint_t vbi_revision_level = 9;
1626

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/r0drv/solaris/vbi/i86pc/os/vbi.c@ 37947

Download in other formats: