1 | /*
|
---|
2 | * CDDL HEADER START
|
---|
3 | *
|
---|
4 | * The contents of this file are subject to the terms of the
|
---|
5 | * Common Development and Distribution License (the "License").
|
---|
6 | * You may not use this file except in compliance with the License.
|
---|
7 | *
|
---|
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
---|
9 | * or http://www.opensolaris.org/os/licensing.
|
---|
10 | * See the License for the specific language governing permissions
|
---|
11 | * and limitations under the License.
|
---|
12 | *
|
---|
13 | * When distributing Covered Code, include this CDDL HEADER in each
|
---|
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
---|
15 | * If applicable, add the following below this CDDL HEADER, with the
|
---|
16 | * fields enclosed by brackets "[]" replaced with your own identifying
|
---|
17 | * information: Portions Copyright [yyyy] [name of copyright owner]
|
---|
18 | *
|
---|
19 | * CDDL HEADER END
|
---|
20 | */
|
---|
21 |
|
---|
22 | /*
|
---|
23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
---|
24 | */
|
---|
25 |
|
---|
26 | /*
|
---|
27 | * DTrace - Dynamic Tracing for Solaris
|
---|
28 | *
|
---|
29 | * This is the implementation of the Solaris Dynamic Tracing framework
|
---|
30 | * (DTrace). The user-visible interface to DTrace is described at length in
|
---|
31 | * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
|
---|
32 | * library, the in-kernel DTrace framework, and the DTrace providers are
|
---|
33 | * described in the block comments in the <sys/dtrace.h> header file. The
|
---|
34 | * internal architecture of DTrace is described in the block comments in the
|
---|
35 | * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
|
---|
36 | * implementation very much assume mastery of all of these sources; if one has
|
---|
37 | * an unanswered question about the implementation, one should consult them
|
---|
38 | * first.
|
---|
39 | *
|
---|
40 | * The functions here are ordered roughly as follows:
|
---|
41 | *
|
---|
42 | * - Probe context functions
|
---|
43 | * - Probe hashing functions
|
---|
44 | * - Non-probe context utility functions
|
---|
45 | * - Matching functions
|
---|
46 | * - Provider-to-Framework API functions
|
---|
47 | * - Probe management functions
|
---|
48 | * - DIF object functions
|
---|
49 | * - Format functions
|
---|
50 | * - Predicate functions
|
---|
51 | * - ECB functions
|
---|
52 | * - Buffer functions
|
---|
53 | * - Enabling functions
|
---|
54 | * - DOF functions
|
---|
55 | * - Anonymous enabling functions
|
---|
56 | * - Consumer state functions
|
---|
57 | * - Helper functions
|
---|
58 | * - Hook functions
|
---|
59 | * - Driver cookbook functions
|
---|
60 | *
|
---|
61 | * Each group of functions begins with a block comment labelled the "DTrace
|
---|
62 | * [Group] Functions", allowing one to find each block by searching forward
|
---|
63 | * on capital-f functions.
|
---|
64 | */
|
---|
65 | #ifndef VBOX
|
---|
66 | #include <sys/errno.h>
|
---|
67 | #include <sys/stat.h>
|
---|
68 | #include <sys/modctl.h>
|
---|
69 | #include <sys/conf.h>
|
---|
70 | #include <sys/systm.h>
|
---|
71 | #include <sys/ddi.h>
|
---|
72 | #include <sys/sunddi.h>
|
---|
73 | #include <sys/cpuvar.h>
|
---|
74 | #include <sys/kmem.h>
|
---|
75 | #include <sys/strsubr.h>
|
---|
76 | #include <sys/sysmacros.h>
|
---|
77 | #include <sys/dtrace_impl.h>
|
---|
78 | #include <sys/atomic.h>
|
---|
79 | #include <sys/cmn_err.h>
|
---|
80 | #include <sys/mutex_impl.h>
|
---|
81 | #include <sys/rwlock_impl.h>
|
---|
82 | #include <sys/ctf_api.h>
|
---|
83 | #include <sys/panic.h>
|
---|
84 | #include <sys/priv_impl.h>
|
---|
85 | #include <sys/policy.h>
|
---|
86 | #include <sys/cred_impl.h>
|
---|
87 | #include <sys/procfs_isa.h>
|
---|
88 | #include <sys/taskq.h>
|
---|
89 | #include <sys/mkdev.h>
|
---|
90 | #include <sys/kdi.h>
|
---|
91 | #include <sys/zone.h>
|
---|
92 | #include <sys/socket.h>
|
---|
93 | #include <netinet/in.h>
|
---|
94 |
|
---|
95 | #else /* VBOX */
|
---|
96 | # include <sys/dtrace_impl.h>
|
---|
97 | # include <VBox/sup.h>
|
---|
98 | # include <iprt/assert.h>
|
---|
99 | # include <iprt/cpuset.h>
|
---|
100 | # include <iprt/err.h>
|
---|
101 | # include <iprt/mem.h>
|
---|
102 | # include <iprt/mp.h>
|
---|
103 | # include <iprt/string.h>
|
---|
104 | # include <iprt/process.h>
|
---|
105 | # include <iprt/thread.h>
|
---|
106 | # include <iprt/timer.h>
|
---|
107 | # include <limits.h>
|
---|
108 |
|
---|
109 | /*
|
---|
110 | * Use asm.h to implemente some of the simple stuff in dtrace_asm.s.
|
---|
111 | */
|
---|
112 | # include <iprt/asm.h>
|
---|
113 | # include <iprt/asm-amd64-x86.h>
|
---|
114 | # define dtrace_casptr(a_ppvDst, a_pvOld, a_pvNew) \
|
---|
115 | VBoxDtCompareAndSwapPtr((void * volatile *)a_ppvDst, a_pvOld, a_pvNew)
|
---|
116 | DECLINLINE(void *) VBoxDtCompareAndSwapPtr(void * volatile *ppvDst, void *pvOld, void *pvNew)
|
---|
117 | {
|
---|
118 | void *pvRet;
|
---|
119 | ASMAtomicCmpXchgExPtrVoid(ppvDst, pvNew, pvOld, &pvRet);
|
---|
120 | return pvRet;
|
---|
121 | }
|
---|
122 |
|
---|
123 | # define dtrace_cas32(a_pu32Dst, a_pu32Old, a_pu32New) \
|
---|
124 | VBoxDtCompareAndSwapU32(a_pu32Dst, a_pu32Old, a_pu32New)
|
---|
125 | DECLINLINE(uint32_t) VBoxDtCompareAndSwapU32(uint32_t volatile *pu32Dst, uint32_t u32Old, uint32_t u32New)
|
---|
126 | {
|
---|
127 | uint32_t u32Ret;
|
---|
128 | ASMAtomicCmpXchgExU32(pu32Dst, u32New, u32Old, &u32Ret);
|
---|
129 | return u32Ret;
|
---|
130 | }
|
---|
131 |
|
---|
132 | #define dtrace_membar_consumer() ASMReadFence()
|
---|
133 | #define dtrace_membar_producer() ASMWriteFence()
|
---|
134 | #define dtrace_interrupt_disable() ASMIntDisableFlags()
|
---|
135 | #define dtrace_interrupt_enable(a_EFL) ASMSetFlags(a_EFL)
|
---|
136 |
|
---|
137 | /*
|
---|
138 | * NULL must be set to 0 or we'll end up with a billion warnings(=errors).
|
---|
139 | */
|
---|
140 | # undef NULL
|
---|
141 | # define NULL (0)
|
---|
142 | #endif /* VBOX */
|
---|
143 |
|
---|
144 | /** Check if the given address is a valid kernel address.
|
---|
145 | * The value can be uintptr_t or uint64_t. */
|
---|
146 | #ifndef VBOX
|
---|
147 | # define VBDT_IS_VALID_KRNL_ADDR(a_uAddr) ((a_uAddr) >= KERNELBASE)
|
---|
148 | #else
|
---|
149 | # define VBDT_IS_VALID_KRNL_ADDR(a_uAddr) \
|
---|
150 | ( (sizeof(a_uAddr) == sizeof(uintptr_t) || (uintptr_t)(a_uAddr) == (a_uAddr)) \
|
---|
151 | && RTR0MemKernelIsValidAddr((void *)(uintptr_t)(a_uAddr)) )
|
---|
152 | #endif
|
---|
153 |
|
---|
154 |
|
---|
155 | /*
|
---|
156 | * DTrace Tunable Variables
|
---|
157 | *
|
---|
158 | * The following variables may be tuned by adding a line to /etc/system that
|
---|
159 | * includes both the name of the DTrace module ("dtrace") and the name of the
|
---|
160 | * variable. For example:
|
---|
161 | *
|
---|
162 | * set dtrace:dtrace_destructive_disallow = 1
|
---|
163 | *
|
---|
164 | * In general, the only variables that one should be tuning this way are those
|
---|
165 | * that affect system-wide DTrace behavior, and for which the default behavior
|
---|
166 | * is undesirable. Most of these variables are tunable on a per-consumer
|
---|
167 | * basis using DTrace options, and need not be tuned on a system-wide basis.
|
---|
168 | * When tuning these variables, avoid pathological values; while some attempt
|
---|
169 | * is made to verify the integrity of these variables, they are not considered
|
---|
170 | * part of the supported interface to DTrace, and they are therefore not
|
---|
171 | * checked comprehensively. Further, these variables should not be tuned
|
---|
172 | * dynamically via "mdb -kw" or other means; they should only be tuned via
|
---|
173 | * /etc/system.
|
---|
174 | */
|
---|
175 | int dtrace_destructive_disallow = 0;
|
---|
176 | dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
|
---|
177 | size_t dtrace_difo_maxsize = (256 * 1024);
|
---|
178 | dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
|
---|
179 | size_t dtrace_global_maxsize = (16 * 1024);
|
---|
180 | size_t dtrace_actions_max = (16 * 1024);
|
---|
181 | size_t dtrace_retain_max = 1024;
|
---|
182 | dtrace_optval_t dtrace_helper_actions_max = 32;
|
---|
183 | dtrace_optval_t dtrace_helper_providers_max = 32;
|
---|
184 | dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
|
---|
185 | size_t dtrace_strsize_default = 256;
|
---|
186 | dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */
|
---|
187 | dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */
|
---|
188 | dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
|
---|
189 | dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
|
---|
190 | dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
|
---|
191 | dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
|
---|
192 | dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
|
---|
193 | dtrace_optval_t dtrace_nspec_default = 1;
|
---|
194 | dtrace_optval_t dtrace_specsize_default = 32 * 1024;
|
---|
195 | dtrace_optval_t dtrace_stackframes_default = 20;
|
---|
196 | dtrace_optval_t dtrace_ustackframes_default = 20;
|
---|
197 | dtrace_optval_t dtrace_jstackframes_default = 50;
|
---|
198 | dtrace_optval_t dtrace_jstackstrsize_default = 512;
|
---|
199 | int dtrace_msgdsize_max = 128;
|
---|
200 | hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */
|
---|
201 | hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
|
---|
202 | int dtrace_devdepth_max = 32;
|
---|
203 | int dtrace_err_verbose;
|
---|
204 | hrtime_t dtrace_deadman_interval = NANOSEC;
|
---|
205 | hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
|
---|
206 | hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
|
---|
207 |
|
---|
208 | /*
|
---|
209 | * DTrace External Variables
|
---|
210 | *
|
---|
211 | * As dtrace(7D) is a kernel module, any DTrace variables are obviously
|
---|
212 | * available to DTrace consumers via the backtick (`) syntax. One of these,
|
---|
213 | * dtrace_zero, is made deliberately so: it is provided as a source of
|
---|
214 | * well-known, zero-filled memory. While this variable is not documented,
|
---|
215 | * it is used by some translators as an implementation detail.
|
---|
216 | */
|
---|
217 | const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
|
---|
218 |
|
---|
219 | /*
|
---|
220 | * DTrace Internal Variables
|
---|
221 | */
|
---|
222 | #ifndef VBOX
|
---|
223 | static dev_info_t *dtrace_devi; /* device info */
|
---|
224 | #endif
|
---|
225 | static vmem_t *dtrace_arena; /* probe ID arena */
|
---|
226 | #ifndef VBOX
|
---|
227 | static vmem_t *dtrace_minor; /* minor number arena */
|
---|
228 | static taskq_t *dtrace_taskq; /* task queue */
|
---|
229 | #endif
|
---|
230 | static dtrace_probe_t **dtrace_probes; /* array of all probes */
|
---|
231 | static VBDTTYPE(uint32_t,int) dtrace_nprobes; /* number of probes */
|
---|
232 | static dtrace_provider_t *dtrace_provider; /* provider list */
|
---|
233 | static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
|
---|
234 | static int dtrace_opens; /* number of opens */
|
---|
235 | static int dtrace_helpers; /* number of helpers */
|
---|
236 | #ifndef VBOX
|
---|
237 | static void *dtrace_softstate; /* softstate pointer */
|
---|
238 | #endif
|
---|
239 | static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
|
---|
240 | static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
|
---|
241 | static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
|
---|
242 | static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
|
---|
243 | static int dtrace_toxranges; /* number of toxic ranges */
|
---|
244 | static int dtrace_toxranges_max; /* size of toxic range array */
|
---|
245 | static dtrace_anon_t dtrace_anon; /* anonymous enabling */
|
---|
246 | static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
|
---|
247 | static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
|
---|
248 | #ifndef VBOX
|
---|
249 | static kthread_t *dtrace_panicked; /* panicking thread */
|
---|
250 | #endif
|
---|
251 | static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
|
---|
252 | static dtrace_genid_t dtrace_probegen; /* current probe generation */
|
---|
253 | static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
|
---|
254 | static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
|
---|
255 | static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
|
---|
256 | static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
|
---|
257 | static int dtrace_dynvar_failclean; /* dynvars failed to clean */
|
---|
258 |
|
---|
259 | /*
|
---|
260 | * DTrace Locking
|
---|
261 | * DTrace is protected by three (relatively coarse-grained) locks:
|
---|
262 | *
|
---|
263 | * (1) dtrace_lock is required to manipulate essentially any DTrace state,
|
---|
264 | * including enabling state, probes, ECBs, consumer state, helper state,
|
---|
265 | * etc. Importantly, dtrace_lock is _not_ required when in probe context;
|
---|
266 | * probe context is lock-free -- synchronization is handled via the
|
---|
267 | * dtrace_sync() cross call mechanism.
|
---|
268 | *
|
---|
269 | * (2) dtrace_provider_lock is required when manipulating provider state, or
|
---|
270 | * when provider state must be held constant.
|
---|
271 | *
|
---|
272 | * (3) dtrace_meta_lock is required when manipulating meta provider state, or
|
---|
273 | * when meta provider state must be held constant.
|
---|
274 | *
|
---|
275 | * The lock ordering between these three locks is dtrace_meta_lock before
|
---|
276 | * dtrace_provider_lock before dtrace_lock. (In particular, there are
|
---|
277 | * several places where dtrace_provider_lock is held by the framework as it
|
---|
278 | * calls into the providers -- which then call back into the framework,
|
---|
279 | * grabbing dtrace_lock.)
|
---|
280 | *
|
---|
281 | * There are two other locks in the mix: mod_lock and cpu_lock. With respect
|
---|
282 | * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
|
---|
283 | * role as a coarse-grained lock; it is acquired before both of these locks.
|
---|
284 | * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
|
---|
285 | * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
|
---|
286 | * mod_lock is similar with respect to dtrace_provider_lock in that it must be
|
---|
287 | * acquired _between_ dtrace_provider_lock and dtrace_lock.
|
---|
288 | */
|
---|
289 | static kmutex_t dtrace_lock; /* probe state lock */
|
---|
290 | static kmutex_t dtrace_provider_lock; /* provider state lock */
|
---|
291 | static kmutex_t dtrace_meta_lock; /* meta-provider state lock */
|
---|
292 |
|
---|
293 | /*
|
---|
294 | * DTrace Provider Variables
|
---|
295 | *
|
---|
296 | * These are the variables relating to DTrace as a provider (that is, the
|
---|
297 | * provider of the BEGIN, END, and ERROR probes).
|
---|
298 | */
|
---|
299 | static dtrace_pattr_t dtrace_provider_attr = {
|
---|
300 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
|
---|
301 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
|
---|
302 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
|
---|
303 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
|
---|
304 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
|
---|
305 | };
|
---|
306 |
|
---|
307 | static void
|
---|
308 | dtrace_nullop(void)
|
---|
309 | {}
|
---|
310 |
|
---|
311 | static int
|
---|
312 | dtrace_enable_nullop(void)
|
---|
313 | {
|
---|
314 | return (0);
|
---|
315 | }
|
---|
316 |
|
---|
317 | static dtrace_pops_t dtrace_provider_ops = {
|
---|
318 | (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
|
---|
319 | (void (*)(void *, struct modctl *))dtrace_nullop,
|
---|
320 | (int (*)(void *, dtrace_id_t, void *))(uintptr_t)dtrace_enable_nullop,
|
---|
321 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
|
---|
322 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
|
---|
323 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
|
---|
324 | NULL,
|
---|
325 | NULL,
|
---|
326 | NULL,
|
---|
327 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop
|
---|
328 | };
|
---|
329 |
|
---|
330 | static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
|
---|
331 | static dtrace_id_t dtrace_probeid_end; /* special END probe */
|
---|
332 | dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
|
---|
333 |
|
---|
334 | /*
|
---|
335 | * DTrace Helper Tracing Variables
|
---|
336 | */
|
---|
337 | uint32_t dtrace_helptrace_next = 0;
|
---|
338 | uint32_t dtrace_helptrace_nlocals;
|
---|
339 | char *dtrace_helptrace_buffer;
|
---|
340 | int dtrace_helptrace_bufsize = 512 * 1024;
|
---|
341 |
|
---|
342 | #ifdef DEBUG
|
---|
343 | int dtrace_helptrace_enabled = 1;
|
---|
344 | #else
|
---|
345 | int dtrace_helptrace_enabled = 0;
|
---|
346 | #endif
|
---|
347 |
|
---|
348 | /*
|
---|
349 | * DTrace Error Hashing
|
---|
350 | *
|
---|
351 | * On DEBUG kernels, DTrace will track the errors that has seen in a hash
|
---|
352 | * table. This is very useful for checking coverage of tests that are
|
---|
353 | * expected to induce DIF or DOF processing errors, and may be useful for
|
---|
354 | * debugging problems in the DIF code generator or in DOF generation . The
|
---|
355 | * error hash may be examined with the ::dtrace_errhash MDB dcmd.
|
---|
356 | */
|
---|
357 | #ifdef DEBUG
|
---|
358 | static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
|
---|
359 | static const char *dtrace_errlast;
|
---|
360 | static kthread_t *dtrace_errthread;
|
---|
361 | static kmutex_t dtrace_errlock;
|
---|
362 | #endif
|
---|
363 |
|
---|
364 | /*
|
---|
365 | * DTrace Macros and Constants
|
---|
366 | *
|
---|
367 | * These are various macros that are useful in various spots in the
|
---|
368 | * implementation, along with a few random constants that have no meaning
|
---|
369 | * outside of the implementation. There is no real structure to this cpp
|
---|
370 | * mishmash -- but is there ever?
|
---|
371 | */
|
---|
372 | #define DTRACE_HASHSTR(hash, probe) \
|
---|
373 | dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
|
---|
374 |
|
---|
375 | #define DTRACE_HASHNEXT(hash, probe) \
|
---|
376 | (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
|
---|
377 |
|
---|
378 | #define DTRACE_HASHPREV(hash, probe) \
|
---|
379 | (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
|
---|
380 |
|
---|
381 | #define DTRACE_HASHEQ(hash, lhs, rhs) \
|
---|
382 | (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
|
---|
383 | *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
|
---|
384 |
|
---|
385 | #define DTRACE_AGGHASHSIZE_SLEW 17
|
---|
386 |
|
---|
387 | #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
|
---|
388 |
|
---|
389 | /*
|
---|
390 | * The key for a thread-local variable consists of the lower 61 bits of the
|
---|
391 | * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
|
---|
392 | * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
|
---|
393 | * equal to a variable identifier. This is necessary (but not sufficient) to
|
---|
394 | * assure that global associative arrays never collide with thread-local
|
---|
395 | * variables. To guarantee that they cannot collide, we must also define the
|
---|
396 | * order for keying dynamic variables. That order is:
|
---|
397 | *
|
---|
398 | * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
|
---|
399 | *
|
---|
400 | * Because the variable-key and the tls-key are in orthogonal spaces, there is
|
---|
401 | * no way for a global variable key signature to match a thread-local key
|
---|
402 | * signature.
|
---|
403 | */
|
---|
404 | #ifndef VBOX
|
---|
405 | #define DTRACE_TLS_THRKEY(where) { \
|
---|
406 | uint_t intr = 0; \
|
---|
407 | uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
|
---|
408 | for (; actv; actv >>= 1) \
|
---|
409 | intr++; \
|
---|
410 | ASSERT(intr < (1 << 3)); \
|
---|
411 | (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
|
---|
412 | (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
|
---|
413 | }
|
---|
414 | #else
|
---|
415 | #define DTRACE_TLS_THRKEY(where) do { \
|
---|
416 | (where) = (((uintptr_t)RTThreadNativeSelf() + DIF_VARIABLE_MAX) & (RT_BIT_64(61) - 1)) \
|
---|
417 | | (RTThreadIsInInterrupt(NIL_RTTHREAD) ? RT_BIT_64(61) : 0); \
|
---|
418 | } while (0)
|
---|
419 | #endif
|
---|
420 |
|
---|
421 | #define DT_BSWAP_8(x) ((x) & 0xff)
|
---|
422 | #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
|
---|
423 | #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
|
---|
424 | #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
|
---|
425 |
|
---|
426 | #define DT_MASK_LO 0x00000000FFFFFFFFULL
|
---|
427 |
|
---|
428 | #define DTRACE_STORE(type, tomax, offset, what) \
|
---|
429 | *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
|
---|
430 |
|
---|
431 | #ifndef __i386
|
---|
432 | #define DTRACE_ALIGNCHECK(addr, size, flags) \
|
---|
433 | if (addr & (size - 1)) { \
|
---|
434 | *flags |= CPU_DTRACE_BADALIGN; \
|
---|
435 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval = addr; \
|
---|
436 | return (0); \
|
---|
437 | }
|
---|
438 | #else
|
---|
439 | #define DTRACE_ALIGNCHECK(addr, size, flags)
|
---|
440 | #endif
|
---|
441 |
|
---|
442 | /*
|
---|
443 | * Test whether a range of memory starting at testaddr of size testsz falls
|
---|
444 | * within the range of memory described by addr, sz. We take care to avoid
|
---|
445 | * problems with overflow and underflow of the unsigned quantities, and
|
---|
446 | * disallow all negative sizes. Ranges of size 0 are allowed.
|
---|
447 | */
|
---|
448 | #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
|
---|
449 | ((testaddr) - (baseaddr) < (basesz) && \
|
---|
450 | (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
|
---|
451 | (testaddr) + (testsz) >= (testaddr))
|
---|
452 |
|
---|
453 | /*
|
---|
454 | * Test whether alloc_sz bytes will fit in the scratch region. We isolate
|
---|
455 | * alloc_sz on the righthand side of the comparison in order to avoid overflow
|
---|
456 | * or underflow in the comparison with it. This is simpler than the INRANGE
|
---|
457 | * check above, because we know that the dtms_scratch_ptr is valid in the
|
---|
458 | * range. Allocations of size zero are allowed.
|
---|
459 | */
|
---|
460 | #define DTRACE_INSCRATCH(mstate, alloc_sz) \
|
---|
461 | ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
|
---|
462 | (mstate)->dtms_scratch_ptr >= (alloc_sz))
|
---|
463 |
|
---|
464 | #ifndef VBOX
|
---|
465 | #define DTRACE_LOADFUNC(bits) \
|
---|
466 | /*CSTYLED*/ \
|
---|
467 | VBDTSTATIC uint##bits##_t \
|
---|
468 | dtrace_load##bits(uintptr_t addr) \
|
---|
469 | { \
|
---|
470 | size_t size = bits / NBBY; \
|
---|
471 | /*CSTYLED*/ \
|
---|
472 | uint##bits##_t rval; \
|
---|
473 | int i; \
|
---|
474 | processorid_t me = VBDT_GET_CPUID(); \
|
---|
475 | volatile uint16_t *flags = (volatile uint16_t *) \
|
---|
476 | &cpu_core[me].cpuc_dtrace_flags; \
|
---|
477 | \
|
---|
478 | DTRACE_ALIGNCHECK(addr, size, flags); \
|
---|
479 | \
|
---|
480 | for (i = 0; i < dtrace_toxranges; i++) { \
|
---|
481 | if (addr >= dtrace_toxrange[i].dtt_limit) \
|
---|
482 | continue; \
|
---|
483 | \
|
---|
484 | if (addr + size <= dtrace_toxrange[i].dtt_base) \
|
---|
485 | continue; \
|
---|
486 | \
|
---|
487 | /* \
|
---|
488 | * This address falls within a toxic region; return 0. \
|
---|
489 | */ \
|
---|
490 | *flags |= CPU_DTRACE_BADADDR; \
|
---|
491 | cpu_core[me].cpuc_dtrace_illval = addr; \
|
---|
492 | return (0); \
|
---|
493 | } \
|
---|
494 | \
|
---|
495 | *flags |= CPU_DTRACE_NOFAULT; \
|
---|
496 | /*CSTYLED*/ \
|
---|
497 | rval = *((volatile uint##bits##_t *)addr); \
|
---|
498 | *flags &= ~CPU_DTRACE_NOFAULT; \
|
---|
499 | \
|
---|
500 | return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
|
---|
501 | }
|
---|
502 | #else /* VBOX */
|
---|
503 | # define DTRACE_LOADFUNC(bits) \
|
---|
504 | VBDTSTATIC uint##bits##_t \
|
---|
505 | dtrace_load##bits(uintptr_t addr) \
|
---|
506 | { \
|
---|
507 | size_t const size = bits / NBBY; \
|
---|
508 | uint##bits##_t rval; \
|
---|
509 | processorid_t me; \
|
---|
510 | int i, rc; \
|
---|
511 | \
|
---|
512 | /*DTRACE_ALIGNCHECK(addr, size, flags);*/ \
|
---|
513 | \
|
---|
514 | for (i = 0; i < dtrace_toxranges; i++) { \
|
---|
515 | if (addr >= dtrace_toxrange[i].dtt_limit) \
|
---|
516 | continue; \
|
---|
517 | \
|
---|
518 | if (addr + size <= dtrace_toxrange[i].dtt_base) \
|
---|
519 | continue; \
|
---|
520 | \
|
---|
521 | /* \
|
---|
522 | * This address falls within a toxic region; return 0. \
|
---|
523 | */ \
|
---|
524 | me = VBDT_GET_CPUID(); \
|
---|
525 | cpu_core[me].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; \
|
---|
526 | cpu_core[me].cpuc_dtrace_illval = addr; \
|
---|
527 | return (0); \
|
---|
528 | } \
|
---|
529 | \
|
---|
530 | rc = RTR0MemKernelCopyFrom(&rval, (void const *)addr, size); \
|
---|
531 | if (RT_SUCCESS(rc)) \
|
---|
532 | return rval; \
|
---|
533 | \
|
---|
534 | /* \
|
---|
535 | * If not supported, pray it won't fault... \
|
---|
536 | */ \
|
---|
537 | if (rc == VERR_NOT_SUPPORTED) \
|
---|
538 | return *(uint##bits##_t const *)addr; \
|
---|
539 | \
|
---|
540 | me = VBDT_GET_CPUID(); \
|
---|
541 | cpu_core[me].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; \
|
---|
542 | cpu_core[me].cpuc_dtrace_illval = addr; \
|
---|
543 | return (0); \
|
---|
544 | }
|
---|
545 |
|
---|
546 | #endif /* VBOX */
|
---|
547 |
|
---|
548 | #ifdef _LP64
|
---|
549 | #define dtrace_loadptr dtrace_load64
|
---|
550 | #else
|
---|
551 | #define dtrace_loadptr dtrace_load32
|
---|
552 | #endif
|
---|
553 |
|
---|
554 | #define DTRACE_DYNHASH_FREE 0
|
---|
555 | #define DTRACE_DYNHASH_SINK 1
|
---|
556 | #define DTRACE_DYNHASH_VALID 2
|
---|
557 |
|
---|
558 | #define DTRACE_MATCH_FAIL -1
|
---|
559 | #define DTRACE_MATCH_NEXT 0
|
---|
560 | #define DTRACE_MATCH_DONE 1
|
---|
561 | #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
|
---|
562 | #define DTRACE_STATE_ALIGN 64
|
---|
563 |
|
---|
564 | #define DTRACE_FLAGS2FLT(flags) \
|
---|
565 | (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
|
---|
566 | ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
|
---|
567 | ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
|
---|
568 | ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
|
---|
569 | ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
|
---|
570 | ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
|
---|
571 | ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
|
---|
572 | ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
|
---|
573 | ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
|
---|
574 | DTRACEFLT_UNKNOWN)
|
---|
575 |
|
---|
576 | #define DTRACEACT_ISSTRING(act) \
|
---|
577 | ((act)->dta_kind == DTRACEACT_DIFEXPR && \
|
---|
578 | (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
|
---|
579 |
|
---|
580 | static size_t dtrace_strlen(const char *, size_t);
|
---|
581 | static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
|
---|
582 | static void dtrace_enabling_provide(dtrace_provider_t *);
|
---|
583 | static int dtrace_enabling_match(dtrace_enabling_t *, int *);
|
---|
584 | static void dtrace_enabling_matchall(void);
|
---|
585 | static dtrace_state_t *dtrace_anon_grab(void);
|
---|
586 | #ifndef VBOX
|
---|
587 | static uint64_t dtrace_helper(int, dtrace_mstate_t *,
|
---|
588 | dtrace_state_t *, uint64_t, uint64_t);
|
---|
589 | static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
|
---|
590 | #endif
|
---|
591 | static void dtrace_buffer_drop(dtrace_buffer_t *);
|
---|
592 | static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
|
---|
593 | dtrace_state_t *, dtrace_mstate_t *);
|
---|
594 | static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
|
---|
595 | dtrace_optval_t);
|
---|
596 | static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
|
---|
597 | #ifndef VBOX
|
---|
598 | static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
|
---|
599 | #endif
|
---|
600 |
|
---|
601 | /*
|
---|
602 | * DTrace Probe Context Functions
|
---|
603 | *
|
---|
604 | * These functions are called from probe context. Because probe context is
|
---|
605 | * any context in which C may be called, arbitrarily locks may be held,
|
---|
606 | * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
|
---|
607 | * As a result, functions called from probe context may only call other DTrace
|
---|
608 | * support functions -- they may not interact at all with the system at large.
|
---|
609 | * (Note that the ASSERT macro is made probe-context safe by redefining it in
|
---|
610 | * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
|
---|
611 | * loads are to be performed from probe context, they _must_ be in terms of
|
---|
612 | * the safe dtrace_load*() variants.
|
---|
613 | *
|
---|
614 | * Some functions in this block are not actually called from probe context;
|
---|
615 | * for these functions, there will be a comment above the function reading
|
---|
616 | * "Note: not called from probe context."
|
---|
617 | */
|
---|
618 | void
|
---|
619 | dtrace_panic(const char *format, ...)
|
---|
620 | {
|
---|
621 | va_list alist;
|
---|
622 |
|
---|
623 | va_start(alist, format);
|
---|
624 | dtrace_vpanic(format, alist);
|
---|
625 | va_end(alist);
|
---|
626 | }
|
---|
627 |
|
---|
628 | #ifndef VBOX /* We have our own assertion machinery. */
|
---|
629 | int
|
---|
630 | dtrace_assfail(const char *a, const char *f, int l)
|
---|
631 | {
|
---|
632 | dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
|
---|
633 |
|
---|
634 | /*
|
---|
635 | * We just need something here that even the most clever compiler
|
---|
636 | * cannot optimize away.
|
---|
637 | */
|
---|
638 | return (a[(uintptr_t)f]);
|
---|
639 | }
|
---|
640 | #endif
|
---|
641 |
|
---|
642 | /*
|
---|
643 | * Atomically increment a specified error counter from probe context.
|
---|
644 | */
|
---|
645 | static void
|
---|
646 | dtrace_error(uint32_t *counter)
|
---|
647 | {
|
---|
648 | /*
|
---|
649 | * Most counters stored to in probe context are per-CPU counters.
|
---|
650 | * However, there are some error conditions that are sufficiently
|
---|
651 | * arcane that they don't merit per-CPU storage. If these counters
|
---|
652 | * are incremented concurrently on different CPUs, scalability will be
|
---|
653 | * adversely affected -- but we don't expect them to be white-hot in a
|
---|
654 | * correctly constructed enabling...
|
---|
655 | */
|
---|
656 | uint32_t oval, nval;
|
---|
657 |
|
---|
658 | do {
|
---|
659 | oval = *counter;
|
---|
660 |
|
---|
661 | if ((nval = oval + 1) == 0) {
|
---|
662 | /*
|
---|
663 | * If the counter would wrap, set it to 1 -- assuring
|
---|
664 | * that the counter is never zero when we have seen
|
---|
665 | * errors. (The counter must be 32-bits because we
|
---|
666 | * aren't guaranteed a 64-bit compare&swap operation.)
|
---|
667 | * To save this code both the infamy of being fingered
|
---|
668 | * by a priggish news story and the indignity of being
|
---|
669 | * the target of a neo-puritan witch trial, we're
|
---|
670 | * carefully avoiding any colorful description of the
|
---|
671 | * likelihood of this condition -- but suffice it to
|
---|
672 | * say that it is only slightly more likely than the
|
---|
673 | * overflow of predicate cache IDs, as discussed in
|
---|
674 | * dtrace_predicate_create().
|
---|
675 | */
|
---|
676 | nval = 1;
|
---|
677 | }
|
---|
678 | } while (dtrace_cas32(counter, oval, nval) != oval);
|
---|
679 | }
|
---|
680 |
|
---|
681 | /*
|
---|
682 | * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
|
---|
683 | * uint8_t, a uint16_t, a uint32_t and a uint64_t.
|
---|
684 | */
|
---|
685 | DTRACE_LOADFUNC(8)
|
---|
686 | DTRACE_LOADFUNC(16)
|
---|
687 | DTRACE_LOADFUNC(32)
|
---|
688 | DTRACE_LOADFUNC(64)
|
---|
689 |
|
---|
690 | static int
|
---|
691 | dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
|
---|
692 | {
|
---|
693 | if (dest < mstate->dtms_scratch_base)
|
---|
694 | return (0);
|
---|
695 |
|
---|
696 | if (dest + size < dest)
|
---|
697 | return (0);
|
---|
698 |
|
---|
699 | if (dest + size > mstate->dtms_scratch_ptr)
|
---|
700 | return (0);
|
---|
701 |
|
---|
702 | return (1);
|
---|
703 | }
|
---|
704 |
|
---|
705 | static int
|
---|
706 | dtrace_canstore_statvar(uint64_t addr, size_t sz,
|
---|
707 | dtrace_statvar_t **svars, int nsvars)
|
---|
708 | {
|
---|
709 | int i;
|
---|
710 |
|
---|
711 | for (i = 0; i < nsvars; i++) {
|
---|
712 | dtrace_statvar_t *svar = svars[i];
|
---|
713 |
|
---|
714 | if (svar == NULL || svar->dtsv_size == 0)
|
---|
715 | continue;
|
---|
716 |
|
---|
717 | if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size))
|
---|
718 | return (1);
|
---|
719 | }
|
---|
720 |
|
---|
721 | return (0);
|
---|
722 | }
|
---|
723 |
|
---|
724 | /*
|
---|
725 | * Check to see if the address is within a memory region to which a store may
|
---|
726 | * be issued. This includes the DTrace scratch areas, and any DTrace variable
|
---|
727 | * region. The caller of dtrace_canstore() is responsible for performing any
|
---|
728 | * alignment checks that are needed before stores are actually executed.
|
---|
729 | */
|
---|
730 | static int
|
---|
731 | dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
|
---|
732 | dtrace_vstate_t *vstate)
|
---|
733 | {
|
---|
734 | /*
|
---|
735 | * First, check to see if the address is in scratch space...
|
---|
736 | */
|
---|
737 | if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
|
---|
738 | mstate->dtms_scratch_size))
|
---|
739 | return (1);
|
---|
740 |
|
---|
741 | /*
|
---|
742 | * Now check to see if it's a dynamic variable. This check will pick
|
---|
743 | * up both thread-local variables and any global dynamically-allocated
|
---|
744 | * variables.
|
---|
745 | */
|
---|
746 | if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
|
---|
747 | vstate->dtvs_dynvars.dtds_size)) {
|
---|
748 | dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
|
---|
749 | uintptr_t base = (uintptr_t)dstate->dtds_base +
|
---|
750 | (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
|
---|
751 | uintptr_t chunkoffs;
|
---|
752 |
|
---|
753 | /*
|
---|
754 | * Before we assume that we can store here, we need to make
|
---|
755 | * sure that it isn't in our metadata -- storing to our
|
---|
756 | * dynamic variable metadata would corrupt our state. For
|
---|
757 | * the range to not include any dynamic variable metadata,
|
---|
758 | * it must:
|
---|
759 | *
|
---|
760 | * (1) Start above the hash table that is at the base of
|
---|
761 | * the dynamic variable space
|
---|
762 | *
|
---|
763 | * (2) Have a starting chunk offset that is beyond the
|
---|
764 | * dtrace_dynvar_t that is at the base of every chunk
|
---|
765 | *
|
---|
766 | * (3) Not span a chunk boundary
|
---|
767 | *
|
---|
768 | */
|
---|
769 | if (addr < base)
|
---|
770 | return (0);
|
---|
771 |
|
---|
772 | chunkoffs = (addr - base) % dstate->dtds_chunksize;
|
---|
773 |
|
---|
774 | if (chunkoffs < sizeof (dtrace_dynvar_t))
|
---|
775 | return (0);
|
---|
776 |
|
---|
777 | if (chunkoffs + sz > dstate->dtds_chunksize)
|
---|
778 | return (0);
|
---|
779 |
|
---|
780 | return (1);
|
---|
781 | }
|
---|
782 |
|
---|
783 | /*
|
---|
784 | * Finally, check the static local and global variables. These checks
|
---|
785 | * take the longest, so we perform them last.
|
---|
786 | */
|
---|
787 | if (dtrace_canstore_statvar(addr, sz,
|
---|
788 | vstate->dtvs_locals, vstate->dtvs_nlocals))
|
---|
789 | return (1);
|
---|
790 |
|
---|
791 | if (dtrace_canstore_statvar(addr, sz,
|
---|
792 | vstate->dtvs_globals, vstate->dtvs_nglobals))
|
---|
793 | return (1);
|
---|
794 |
|
---|
795 | return (0);
|
---|
796 | }
|
---|
797 |
|
---|
798 |
|
---|
799 | /*
|
---|
800 | * Convenience routine to check to see if the address is within a memory
|
---|
801 | * region in which a load may be issued given the user's privilege level;
|
---|
802 | * if not, it sets the appropriate error flags and loads 'addr' into the
|
---|
803 | * illegal value slot.
|
---|
804 | *
|
---|
805 | * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
|
---|
806 | * appropriate memory access protection.
|
---|
807 | */
|
---|
808 | static int
|
---|
809 | dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
|
---|
810 | dtrace_vstate_t *vstate)
|
---|
811 | {
|
---|
812 | volatile uintptr_t *illval = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval;
|
---|
813 |
|
---|
814 | /*
|
---|
815 | * If we hold the privilege to read from kernel memory, then
|
---|
816 | * everything is readable.
|
---|
817 | */
|
---|
818 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
|
---|
819 | return (1);
|
---|
820 |
|
---|
821 | /*
|
---|
822 | * You can obviously read that which you can store.
|
---|
823 | */
|
---|
824 | if (dtrace_canstore(addr, sz, mstate, vstate))
|
---|
825 | return (1);
|
---|
826 |
|
---|
827 | /*
|
---|
828 | * We're allowed to read from our own string table.
|
---|
829 | */
|
---|
830 | if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
|
---|
831 | mstate->dtms_difo->dtdo_strlen))
|
---|
832 | return (1);
|
---|
833 |
|
---|
834 | DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
|
---|
835 | *illval = addr;
|
---|
836 | return (0);
|
---|
837 | }
|
---|
838 |
|
---|
839 | /*
|
---|
840 | * Convenience routine to check to see if a given string is within a memory
|
---|
841 | * region in which a load may be issued given the user's privilege level;
|
---|
842 | * this exists so that we don't need to issue unnecessary dtrace_strlen()
|
---|
843 | * calls in the event that the user has all privileges.
|
---|
844 | */
|
---|
845 | static int
|
---|
846 | dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
|
---|
847 | dtrace_vstate_t *vstate)
|
---|
848 | {
|
---|
849 | size_t strsz;
|
---|
850 |
|
---|
851 | /*
|
---|
852 | * If we hold the privilege to read from kernel memory, then
|
---|
853 | * everything is readable.
|
---|
854 | */
|
---|
855 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
|
---|
856 | return (1);
|
---|
857 |
|
---|
858 | strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz);
|
---|
859 | if (dtrace_canload(addr, strsz, mstate, vstate))
|
---|
860 | return (1);
|
---|
861 |
|
---|
862 | return (0);
|
---|
863 | }
|
---|
864 |
|
---|
865 | /*
|
---|
866 | * Convenience routine to check to see if a given variable is within a memory
|
---|
867 | * region in which a load may be issued given the user's privilege level.
|
---|
868 | */
|
---|
869 | static int
|
---|
870 | dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
|
---|
871 | dtrace_vstate_t *vstate)
|
---|
872 | {
|
---|
873 | size_t sz;
|
---|
874 | ASSERT(type->dtdt_flags & DIF_TF_BYREF);
|
---|
875 |
|
---|
876 | /*
|
---|
877 | * If we hold the privilege to read from kernel memory, then
|
---|
878 | * everything is readable.
|
---|
879 | */
|
---|
880 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
|
---|
881 | return (1);
|
---|
882 |
|
---|
883 | if (type->dtdt_kind == DIF_TYPE_STRING)
|
---|
884 | sz = dtrace_strlen(src,
|
---|
885 | vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1;
|
---|
886 | else
|
---|
887 | sz = type->dtdt_size;
|
---|
888 |
|
---|
889 | return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
|
---|
890 | }
|
---|
891 |
|
---|
892 | /*
|
---|
893 | * Compare two strings using safe loads.
|
---|
894 | */
|
---|
895 | static int
|
---|
896 | dtrace_strncmp(char *s1, char *s2, size_t limit)
|
---|
897 | {
|
---|
898 | uint8_t c1, c2;
|
---|
899 | volatile uint16_t *flags;
|
---|
900 |
|
---|
901 | if (s1 == s2 || limit == 0)
|
---|
902 | return (0);
|
---|
903 |
|
---|
904 | flags = (volatile uint16_t *)&cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
905 |
|
---|
906 | do {
|
---|
907 | if (s1 == NULL) {
|
---|
908 | c1 = '\0';
|
---|
909 | } else {
|
---|
910 | c1 = dtrace_load8((uintptr_t)s1++);
|
---|
911 | }
|
---|
912 |
|
---|
913 | if (s2 == NULL) {
|
---|
914 | c2 = '\0';
|
---|
915 | } else {
|
---|
916 | c2 = dtrace_load8((uintptr_t)s2++);
|
---|
917 | }
|
---|
918 |
|
---|
919 | if (c1 != c2)
|
---|
920 | return (c1 - c2);
|
---|
921 | } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
|
---|
922 |
|
---|
923 | return (0);
|
---|
924 | }
|
---|
925 |
|
---|
926 | /*
|
---|
927 | * Compute strlen(s) for a string using safe memory accesses. The additional
|
---|
928 | * len parameter is used to specify a maximum length to ensure completion.
|
---|
929 | */
|
---|
930 | static size_t
|
---|
931 | dtrace_strlen(const char *s, size_t lim)
|
---|
932 | {
|
---|
933 | uint_t len;
|
---|
934 |
|
---|
935 | for (len = 0; len != lim; len++) {
|
---|
936 | if (dtrace_load8((uintptr_t)s++) == '\0')
|
---|
937 | break;
|
---|
938 | }
|
---|
939 |
|
---|
940 | return (len);
|
---|
941 | }
|
---|
942 |
|
---|
943 | /*
|
---|
944 | * Check if an address falls within a toxic region.
|
---|
945 | */
|
---|
946 | static int
|
---|
947 | dtrace_istoxic(uintptr_t kaddr, size_t size)
|
---|
948 | {
|
---|
949 | uintptr_t taddr, tsize;
|
---|
950 | int i;
|
---|
951 |
|
---|
952 | for (i = 0; i < dtrace_toxranges; i++) {
|
---|
953 | taddr = dtrace_toxrange[i].dtt_base;
|
---|
954 | tsize = dtrace_toxrange[i].dtt_limit - taddr;
|
---|
955 |
|
---|
956 | if (kaddr - taddr < tsize) {
|
---|
957 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
|
---|
958 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval = kaddr;
|
---|
959 | return (1);
|
---|
960 | }
|
---|
961 |
|
---|
962 | if (taddr - kaddr < size) {
|
---|
963 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
|
---|
964 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval = taddr;
|
---|
965 | return (1);
|
---|
966 | }
|
---|
967 | }
|
---|
968 |
|
---|
969 | return (0);
|
---|
970 | }
|
---|
971 |
|
---|
972 | /*
|
---|
973 | * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
|
---|
974 | * memory specified by the DIF program. The dst is assumed to be safe memory
|
---|
975 | * that we can store to directly because it is managed by DTrace. As with
|
---|
976 | * standard bcopy, overlapping copies are handled properly.
|
---|
977 | */
|
---|
978 | static void
|
---|
979 | dtrace_bcopy(const void *src, void *dst, size_t len)
|
---|
980 | {
|
---|
981 | if (len != 0) {
|
---|
982 | uint8_t *s1 = dst;
|
---|
983 | const uint8_t *s2 = src;
|
---|
984 |
|
---|
985 | if (s1 <= s2) {
|
---|
986 | do {
|
---|
987 | *s1++ = dtrace_load8((uintptr_t)s2++);
|
---|
988 | } while (--len != 0);
|
---|
989 | } else {
|
---|
990 | s2 += len;
|
---|
991 | s1 += len;
|
---|
992 |
|
---|
993 | do {
|
---|
994 | *--s1 = dtrace_load8((uintptr_t)--s2);
|
---|
995 | } while (--len != 0);
|
---|
996 | }
|
---|
997 | }
|
---|
998 | }
|
---|
999 |
|
---|
1000 | /*
|
---|
1001 | * Copy src to dst using safe memory accesses, up to either the specified
|
---|
1002 | * length, or the point that a nul byte is encountered. The src is assumed to
|
---|
1003 | * be unsafe memory specified by the DIF program. The dst is assumed to be
|
---|
1004 | * safe memory that we can store to directly because it is managed by DTrace.
|
---|
1005 | * Unlike dtrace_bcopy(), overlapping regions are not handled.
|
---|
1006 | */
|
---|
1007 | static void
|
---|
1008 | dtrace_strcpy(const void *src, void *dst, size_t len)
|
---|
1009 | {
|
---|
1010 | if (len != 0) {
|
---|
1011 | uint8_t *s1 = dst, c;
|
---|
1012 | const uint8_t *s2 = src;
|
---|
1013 |
|
---|
1014 | do {
|
---|
1015 | *s1++ = c = dtrace_load8((uintptr_t)s2++);
|
---|
1016 | } while (--len != 0 && c != '\0');
|
---|
1017 | }
|
---|
1018 | }
|
---|
1019 |
|
---|
1020 | /*
|
---|
1021 | * Copy src to dst, deriving the size and type from the specified (BYREF)
|
---|
1022 | * variable type. The src is assumed to be unsafe memory specified by the DIF
|
---|
1023 | * program. The dst is assumed to be DTrace variable memory that is of the
|
---|
1024 | * specified type; we assume that we can store to directly.
|
---|
1025 | */
|
---|
1026 | static void
|
---|
1027 | dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type)
|
---|
1028 | {
|
---|
1029 | ASSERT(type->dtdt_flags & DIF_TF_BYREF);
|
---|
1030 |
|
---|
1031 | if (type->dtdt_kind == DIF_TYPE_STRING) {
|
---|
1032 | dtrace_strcpy(src, dst, type->dtdt_size);
|
---|
1033 | } else {
|
---|
1034 | dtrace_bcopy(src, dst, type->dtdt_size);
|
---|
1035 | }
|
---|
1036 | }
|
---|
1037 |
|
---|
1038 | /*
|
---|
1039 | * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
|
---|
1040 | * unsafe memory specified by the DIF program. The s2 data is assumed to be
|
---|
1041 | * safe memory that we can access directly because it is managed by DTrace.
|
---|
1042 | */
|
---|
1043 | static int
|
---|
1044 | dtrace_bcmp(const void *s1, const void *s2, size_t len)
|
---|
1045 | {
|
---|
1046 | volatile uint16_t *flags;
|
---|
1047 |
|
---|
1048 | flags = (volatile uint16_t *)&cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
1049 |
|
---|
1050 | if (s1 == s2)
|
---|
1051 | return (0);
|
---|
1052 |
|
---|
1053 | if (s1 == NULL || s2 == NULL)
|
---|
1054 | return (1);
|
---|
1055 |
|
---|
1056 | if (s1 != s2 && len != 0) {
|
---|
1057 | const uint8_t *ps1 = s1;
|
---|
1058 | const uint8_t *ps2 = s2;
|
---|
1059 |
|
---|
1060 | do {
|
---|
1061 | if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
|
---|
1062 | return (1);
|
---|
1063 | } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
|
---|
1064 | }
|
---|
1065 | return (0);
|
---|
1066 | }
|
---|
1067 |
|
---|
1068 | /*
|
---|
1069 | * Zero the specified region using a simple byte-by-byte loop. Note that this
|
---|
1070 | * is for safe DTrace-managed memory only.
|
---|
1071 | */
|
---|
1072 | static void
|
---|
1073 | dtrace_bzero(void *dst, size_t len)
|
---|
1074 | {
|
---|
1075 | uchar_t *cp;
|
---|
1076 |
|
---|
1077 | for (cp = dst; len != 0; len--)
|
---|
1078 | *cp++ = 0;
|
---|
1079 | }
|
---|
1080 |
|
---|
1081 | static void
|
---|
1082 | dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
|
---|
1083 | {
|
---|
1084 | uint64_t result[2];
|
---|
1085 |
|
---|
1086 | result[0] = addend1[0] + addend2[0];
|
---|
1087 | result[1] = addend1[1] + addend2[1] +
|
---|
1088 | (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
|
---|
1089 |
|
---|
1090 | sum[0] = result[0];
|
---|
1091 | sum[1] = result[1];
|
---|
1092 | }
|
---|
1093 |
|
---|
1094 | /*
|
---|
1095 | * Shift the 128-bit value in a by b. If b is positive, shift left.
|
---|
1096 | * If b is negative, shift right.
|
---|
1097 | */
|
---|
1098 | static void
|
---|
1099 | dtrace_shift_128(uint64_t *a, int b)
|
---|
1100 | {
|
---|
1101 | uint64_t mask;
|
---|
1102 |
|
---|
1103 | if (b == 0)
|
---|
1104 | return;
|
---|
1105 |
|
---|
1106 | if (b < 0) {
|
---|
1107 | b = -b;
|
---|
1108 | if (b >= 64) {
|
---|
1109 | a[0] = a[1] >> (b - 64);
|
---|
1110 | a[1] = 0;
|
---|
1111 | } else {
|
---|
1112 | a[0] >>= b;
|
---|
1113 | mask = 1LL << (64 - b);
|
---|
1114 | mask -= 1;
|
---|
1115 | a[0] |= ((a[1] & mask) << (64 - b));
|
---|
1116 | a[1] >>= b;
|
---|
1117 | }
|
---|
1118 | } else {
|
---|
1119 | if (b >= 64) {
|
---|
1120 | a[1] = a[0] << (b - 64);
|
---|
1121 | a[0] = 0;
|
---|
1122 | } else {
|
---|
1123 | a[1] <<= b;
|
---|
1124 | mask = a[0] >> (64 - b);
|
---|
1125 | a[1] |= mask;
|
---|
1126 | a[0] <<= b;
|
---|
1127 | }
|
---|
1128 | }
|
---|
1129 | }
|
---|
1130 |
|
---|
1131 | /*
|
---|
1132 | * The basic idea is to break the 2 64-bit values into 4 32-bit values,
|
---|
1133 | * use native multiplication on those, and then re-combine into the
|
---|
1134 | * resulting 128-bit value.
|
---|
1135 | *
|
---|
1136 | * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
|
---|
1137 | * hi1 * hi2 << 64 +
|
---|
1138 | * hi1 * lo2 << 32 +
|
---|
1139 | * hi2 * lo1 << 32 +
|
---|
1140 | * lo1 * lo2
|
---|
1141 | */
|
---|
1142 | static void
|
---|
1143 | dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
|
---|
1144 | {
|
---|
1145 | uint64_t hi1, hi2, lo1, lo2;
|
---|
1146 | uint64_t tmp[2];
|
---|
1147 |
|
---|
1148 | hi1 = factor1 >> 32;
|
---|
1149 | hi2 = factor2 >> 32;
|
---|
1150 |
|
---|
1151 | lo1 = factor1 & DT_MASK_LO;
|
---|
1152 | lo2 = factor2 & DT_MASK_LO;
|
---|
1153 |
|
---|
1154 | product[0] = lo1 * lo2;
|
---|
1155 | product[1] = hi1 * hi2;
|
---|
1156 |
|
---|
1157 | tmp[0] = hi1 * lo2;
|
---|
1158 | tmp[1] = 0;
|
---|
1159 | dtrace_shift_128(tmp, 32);
|
---|
1160 | dtrace_add_128(product, tmp, product);
|
---|
1161 |
|
---|
1162 | tmp[0] = hi2 * lo1;
|
---|
1163 | tmp[1] = 0;
|
---|
1164 | dtrace_shift_128(tmp, 32);
|
---|
1165 | dtrace_add_128(product, tmp, product);
|
---|
1166 | }
|
---|
1167 |
|
---|
1168 | /*
|
---|
1169 | * This privilege check should be used by actions and subroutines to
|
---|
1170 | * verify that the user credentials of the process that enabled the
|
---|
1171 | * invoking ECB match the target credentials
|
---|
1172 | */
|
---|
1173 | static int
|
---|
1174 | dtrace_priv_proc_common_user(dtrace_state_t *state)
|
---|
1175 | {
|
---|
1176 | cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
|
---|
1177 |
|
---|
1178 | /*
|
---|
1179 | * We should always have a non-NULL state cred here, since if cred
|
---|
1180 | * is null (anonymous tracing), we fast-path bypass this routine.
|
---|
1181 | */
|
---|
1182 | ASSERT(s_cr != NULL);
|
---|
1183 |
|
---|
1184 | if ((cr = CRED()) != NULL &&
|
---|
1185 | s_cr->cr_uid == cr->cr_uid &&
|
---|
1186 | s_cr->cr_uid == cr->cr_ruid &&
|
---|
1187 | s_cr->cr_uid == cr->cr_suid &&
|
---|
1188 | s_cr->cr_gid == cr->cr_gid &&
|
---|
1189 | s_cr->cr_gid == cr->cr_rgid &&
|
---|
1190 | s_cr->cr_gid == cr->cr_sgid)
|
---|
1191 | return (1);
|
---|
1192 |
|
---|
1193 | return (0);
|
---|
1194 | }
|
---|
1195 |
|
---|
1196 | /*
|
---|
1197 | * This privilege check should be used by actions and subroutines to
|
---|
1198 | * verify that the zone of the process that enabled the invoking ECB
|
---|
1199 | * matches the target credentials
|
---|
1200 | */
|
---|
1201 | static int
|
---|
1202 | dtrace_priv_proc_common_zone(dtrace_state_t *state)
|
---|
1203 | {
|
---|
1204 | cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
|
---|
1205 |
|
---|
1206 | /*
|
---|
1207 | * We should always have a non-NULL state cred here, since if cred
|
---|
1208 | * is null (anonymous tracing), we fast-path bypass this routine.
|
---|
1209 | */
|
---|
1210 | ASSERT(s_cr != NULL);
|
---|
1211 |
|
---|
1212 | if ((cr = CRED()) != NULL &&
|
---|
1213 | s_cr->cr_zone == cr->cr_zone)
|
---|
1214 | return (1);
|
---|
1215 |
|
---|
1216 | return (0);
|
---|
1217 | }
|
---|
1218 |
|
---|
1219 | /*
|
---|
1220 | * This privilege check should be used by actions and subroutines to
|
---|
1221 | * verify that the process has not setuid or changed credentials.
|
---|
1222 | */
|
---|
1223 | static int
|
---|
1224 | dtrace_priv_proc_common_nocd(VBDTVOID)
|
---|
1225 | {
|
---|
1226 | #ifndef VBOX
|
---|
1227 | proc_t *proc;
|
---|
1228 |
|
---|
1229 | if ((proc = VBDT_GET_PROC()) != NULL &&
|
---|
1230 | !(proc->p_flag & SNOCD))
|
---|
1231 | return (1);
|
---|
1232 |
|
---|
1233 | return (0);
|
---|
1234 | #else
|
---|
1235 | return (1);
|
---|
1236 | #endif
|
---|
1237 | }
|
---|
1238 |
|
---|
1239 | #ifndef VBOX
|
---|
1240 | static int
|
---|
1241 | dtrace_priv_proc_destructive(dtrace_state_t *state)
|
---|
1242 | {
|
---|
1243 | int action = state->dts_cred.dcr_action;
|
---|
1244 |
|
---|
1245 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
|
---|
1246 | dtrace_priv_proc_common_zone(state) == 0)
|
---|
1247 | goto bad;
|
---|
1248 |
|
---|
1249 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
|
---|
1250 | dtrace_priv_proc_common_user(state) == 0)
|
---|
1251 | goto bad;
|
---|
1252 |
|
---|
1253 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
|
---|
1254 | dtrace_priv_proc_common_nocd() == 0)
|
---|
1255 | goto bad;
|
---|
1256 |
|
---|
1257 | return (1);
|
---|
1258 |
|
---|
1259 | bad:
|
---|
1260 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
|
---|
1261 |
|
---|
1262 | return (0);
|
---|
1263 | }
|
---|
1264 | #endif /* !VBOX */
|
---|
1265 |
|
---|
1266 | static int
|
---|
1267 | dtrace_priv_proc_control(dtrace_state_t *state)
|
---|
1268 | {
|
---|
1269 | if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
|
---|
1270 | return (1);
|
---|
1271 |
|
---|
1272 | if (dtrace_priv_proc_common_zone(state) &&
|
---|
1273 | dtrace_priv_proc_common_user(state) &&
|
---|
1274 | dtrace_priv_proc_common_nocd())
|
---|
1275 | return (1);
|
---|
1276 |
|
---|
1277 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
|
---|
1278 |
|
---|
1279 | return (0);
|
---|
1280 | }
|
---|
1281 |
|
---|
1282 | static int
|
---|
1283 | dtrace_priv_proc(dtrace_state_t *state)
|
---|
1284 | {
|
---|
1285 | if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
|
---|
1286 | return (1);
|
---|
1287 |
|
---|
1288 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
|
---|
1289 |
|
---|
1290 | return (0);
|
---|
1291 | }
|
---|
1292 |
|
---|
1293 | static int
|
---|
1294 | dtrace_priv_kernel(dtrace_state_t *state)
|
---|
1295 | {
|
---|
1296 | if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
|
---|
1297 | return (1);
|
---|
1298 |
|
---|
1299 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
|
---|
1300 |
|
---|
1301 | return (0);
|
---|
1302 | }
|
---|
1303 |
|
---|
1304 | static int
|
---|
1305 | dtrace_priv_kernel_destructive(dtrace_state_t *state)
|
---|
1306 | {
|
---|
1307 | if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
|
---|
1308 | return (1);
|
---|
1309 |
|
---|
1310 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
|
---|
1311 |
|
---|
1312 | return (0);
|
---|
1313 | }
|
---|
1314 |
|
---|
1315 | /*
|
---|
1316 | * Note: not called from probe context. This function is called
|
---|
1317 | * asynchronously (and at a regular interval) from outside of probe context to
|
---|
1318 | * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
|
---|
1319 | * cleaning is explained in detail in <sys/dtrace_impl.h>.
|
---|
1320 | */
|
---|
1321 | VBDTSTATIC void
|
---|
1322 | dtrace_dynvar_clean(dtrace_dstate_t *dstate)
|
---|
1323 | {
|
---|
1324 | dtrace_dynvar_t *dirty;
|
---|
1325 | dtrace_dstate_percpu_t *dcpu;
|
---|
1326 | dtrace_dynvar_t **rinsep;
|
---|
1327 | int i, j, work = 0;
|
---|
1328 |
|
---|
1329 | for (i = 0; i < NCPU; i++) {
|
---|
1330 | dcpu = &dstate->dtds_percpu[i];
|
---|
1331 | rinsep = &dcpu->dtdsc_rinsing;
|
---|
1332 |
|
---|
1333 | /*
|
---|
1334 | * If the dirty list is NULL, there is no dirty work to do.
|
---|
1335 | */
|
---|
1336 | if (dcpu->dtdsc_dirty == NULL)
|
---|
1337 | continue;
|
---|
1338 |
|
---|
1339 | if (dcpu->dtdsc_rinsing != NULL) {
|
---|
1340 | /*
|
---|
1341 | * If the rinsing list is non-NULL, then it is because
|
---|
1342 | * this CPU was selected to accept another CPU's
|
---|
1343 | * dirty list -- and since that time, dirty buffers
|
---|
1344 | * have accumulated. This is a highly unlikely
|
---|
1345 | * condition, but we choose to ignore the dirty
|
---|
1346 | * buffers -- they'll be picked up a future cleanse.
|
---|
1347 | */
|
---|
1348 | continue;
|
---|
1349 | }
|
---|
1350 |
|
---|
1351 | if (dcpu->dtdsc_clean != NULL) {
|
---|
1352 | /*
|
---|
1353 | * If the clean list is non-NULL, then we're in a
|
---|
1354 | * situation where a CPU has done deallocations (we
|
---|
1355 | * have a non-NULL dirty list) but no allocations (we
|
---|
1356 | * also have a non-NULL clean list). We can't simply
|
---|
1357 | * move the dirty list into the clean list on this
|
---|
1358 | * CPU, yet we also don't want to allow this condition
|
---|
1359 | * to persist, lest a short clean list prevent a
|
---|
1360 | * massive dirty list from being cleaned (which in
|
---|
1361 | * turn could lead to otherwise avoidable dynamic
|
---|
1362 | * drops). To deal with this, we look for some CPU
|
---|
1363 | * with a NULL clean list, NULL dirty list, and NULL
|
---|
1364 | * rinsing list -- and then we borrow this CPU to
|
---|
1365 | * rinse our dirty list.
|
---|
1366 | */
|
---|
1367 | for (j = 0; j < NCPU; j++) {
|
---|
1368 | dtrace_dstate_percpu_t *rinser;
|
---|
1369 |
|
---|
1370 | rinser = &dstate->dtds_percpu[j];
|
---|
1371 |
|
---|
1372 | if (rinser->dtdsc_rinsing != NULL)
|
---|
1373 | continue;
|
---|
1374 |
|
---|
1375 | if (rinser->dtdsc_dirty != NULL)
|
---|
1376 | continue;
|
---|
1377 |
|
---|
1378 | if (rinser->dtdsc_clean != NULL)
|
---|
1379 | continue;
|
---|
1380 |
|
---|
1381 | rinsep = &rinser->dtdsc_rinsing;
|
---|
1382 | break;
|
---|
1383 | }
|
---|
1384 |
|
---|
1385 | if (j == NCPU) {
|
---|
1386 | /*
|
---|
1387 | * We were unable to find another CPU that
|
---|
1388 | * could accept this dirty list -- we are
|
---|
1389 | * therefore unable to clean it now.
|
---|
1390 | */
|
---|
1391 | dtrace_dynvar_failclean++;
|
---|
1392 | continue;
|
---|
1393 | }
|
---|
1394 | }
|
---|
1395 |
|
---|
1396 | work = 1;
|
---|
1397 |
|
---|
1398 | /*
|
---|
1399 | * Atomically move the dirty list aside.
|
---|
1400 | */
|
---|
1401 | do {
|
---|
1402 | dirty = dcpu->dtdsc_dirty;
|
---|
1403 |
|
---|
1404 | /*
|
---|
1405 | * Before we zap the dirty list, set the rinsing list.
|
---|
1406 | * (This allows for a potential assertion in
|
---|
1407 | * dtrace_dynvar(): if a free dynamic variable appears
|
---|
1408 | * on a hash chain, either the dirty list or the
|
---|
1409 | * rinsing list for some CPU must be non-NULL.)
|
---|
1410 | */
|
---|
1411 | *rinsep = dirty;
|
---|
1412 | dtrace_membar_producer();
|
---|
1413 | } while (dtrace_casptr(&dcpu->dtdsc_dirty,
|
---|
1414 | dirty, NULL) != dirty);
|
---|
1415 | }
|
---|
1416 |
|
---|
1417 | if (!work) {
|
---|
1418 | /*
|
---|
1419 | * We have no work to do; we can simply return.
|
---|
1420 | */
|
---|
1421 | return;
|
---|
1422 | }
|
---|
1423 |
|
---|
1424 | dtrace_sync();
|
---|
1425 |
|
---|
1426 | for (i = 0; i < NCPU; i++) {
|
---|
1427 | dcpu = &dstate->dtds_percpu[i];
|
---|
1428 |
|
---|
1429 | if (dcpu->dtdsc_rinsing == NULL)
|
---|
1430 | continue;
|
---|
1431 |
|
---|
1432 | /*
|
---|
1433 | * We are now guaranteed that no hash chain contains a pointer
|
---|
1434 | * into this dirty list; we can make it clean.
|
---|
1435 | */
|
---|
1436 | ASSERT(dcpu->dtdsc_clean == NULL);
|
---|
1437 | dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
|
---|
1438 | dcpu->dtdsc_rinsing = NULL;
|
---|
1439 | }
|
---|
1440 |
|
---|
1441 | /*
|
---|
1442 | * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
|
---|
1443 | * sure that all CPUs have seen all of the dtdsc_clean pointers.
|
---|
1444 | * This prevents a race whereby a CPU incorrectly decides that
|
---|
1445 | * the state should be something other than DTRACE_DSTATE_CLEAN
|
---|
1446 | * after dtrace_dynvar_clean() has completed.
|
---|
1447 | */
|
---|
1448 | dtrace_sync();
|
---|
1449 |
|
---|
1450 | dstate->dtds_state = DTRACE_DSTATE_CLEAN;
|
---|
1451 | }
|
---|
1452 |
|
---|
1453 | /*
|
---|
1454 | * Depending on the value of the op parameter, this function looks-up,
|
---|
1455 | * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
|
---|
1456 | * allocation is requested, this function will return a pointer to a
|
---|
1457 | * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
|
---|
1458 | * variable can be allocated. If NULL is returned, the appropriate counter
|
---|
1459 | * will be incremented.
|
---|
1460 | */
|
---|
1461 | VBDTSTATIC dtrace_dynvar_t *
|
---|
1462 | dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
|
---|
1463 | dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
|
---|
1464 | dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
|
---|
1465 | {
|
---|
1466 | uint64_t hashval = DTRACE_DYNHASH_VALID;
|
---|
1467 | dtrace_dynhash_t *hash = dstate->dtds_hash;
|
---|
1468 | dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
|
---|
1469 | processorid_t me = VBDT_GET_CPUID(), cpu = me;
|
---|
1470 | dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
|
---|
1471 | size_t bucket, ksize;
|
---|
1472 | size_t chunksize = dstate->dtds_chunksize;
|
---|
1473 | uintptr_t kdata, lock, nstate;
|
---|
1474 | uint_t i;
|
---|
1475 |
|
---|
1476 | ASSERT(nkeys != 0);
|
---|
1477 |
|
---|
1478 | /*
|
---|
1479 | * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
|
---|
1480 | * algorithm. For the by-value portions, we perform the algorithm in
|
---|
1481 | * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
|
---|
1482 | * bit, and seems to have only a minute effect on distribution. For
|
---|
1483 | * the by-reference data, we perform "One-at-a-time" iterating (safely)
|
---|
1484 | * over each referenced byte. It's painful to do this, but it's much
|
---|
1485 | * better than pathological hash distribution. The efficacy of the
|
---|
1486 | * hashing algorithm (and a comparison with other algorithms) may be
|
---|
1487 | * found by running the ::dtrace_dynstat MDB dcmd.
|
---|
1488 | */
|
---|
1489 | for (i = 0; i < nkeys; i++) {
|
---|
1490 | if (key[i].dttk_size == 0) {
|
---|
1491 | uint64_t val = key[i].dttk_value;
|
---|
1492 |
|
---|
1493 | hashval += (val >> 48) & 0xffff;
|
---|
1494 | hashval += (hashval << 10);
|
---|
1495 | hashval ^= (hashval >> 6);
|
---|
1496 |
|
---|
1497 | hashval += (val >> 32) & 0xffff;
|
---|
1498 | hashval += (hashval << 10);
|
---|
1499 | hashval ^= (hashval >> 6);
|
---|
1500 |
|
---|
1501 | hashval += (val >> 16) & 0xffff;
|
---|
1502 | hashval += (hashval << 10);
|
---|
1503 | hashval ^= (hashval >> 6);
|
---|
1504 |
|
---|
1505 | hashval += val & 0xffff;
|
---|
1506 | hashval += (hashval << 10);
|
---|
1507 | hashval ^= (hashval >> 6);
|
---|
1508 | } else {
|
---|
1509 | /*
|
---|
1510 | * This is incredibly painful, but it beats the hell
|
---|
1511 | * out of the alternative.
|
---|
1512 | */
|
---|
1513 | uint64_t j, size = key[i].dttk_size;
|
---|
1514 | uintptr_t base = (uintptr_t)key[i].dttk_value;
|
---|
1515 |
|
---|
1516 | if (!dtrace_canload(base, size, mstate, vstate))
|
---|
1517 | break;
|
---|
1518 |
|
---|
1519 | for (j = 0; j < size; j++) {
|
---|
1520 | hashval += dtrace_load8(base + j);
|
---|
1521 | hashval += (hashval << 10);
|
---|
1522 | hashval ^= (hashval >> 6);
|
---|
1523 | }
|
---|
1524 | }
|
---|
1525 | }
|
---|
1526 |
|
---|
1527 | if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
|
---|
1528 | return (NULL);
|
---|
1529 |
|
---|
1530 | hashval += (hashval << 3);
|
---|
1531 | hashval ^= (hashval >> 11);
|
---|
1532 | hashval += (hashval << 15);
|
---|
1533 |
|
---|
1534 | /*
|
---|
1535 | * There is a remote chance (ideally, 1 in 2^31) that our hashval
|
---|
1536 | * comes out to be one of our two sentinel hash values. If this
|
---|
1537 | * actually happens, we set the hashval to be a value known to be a
|
---|
1538 | * non-sentinel value.
|
---|
1539 | */
|
---|
1540 | if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
|
---|
1541 | hashval = DTRACE_DYNHASH_VALID;
|
---|
1542 |
|
---|
1543 | /*
|
---|
1544 | * Yes, it's painful to do a divide here. If the cycle count becomes
|
---|
1545 | * important here, tricks can be pulled to reduce it. (However, it's
|
---|
1546 | * critical that hash collisions be kept to an absolute minimum;
|
---|
1547 | * they're much more painful than a divide.) It's better to have a
|
---|
1548 | * solution that generates few collisions and still keeps things
|
---|
1549 | * relatively simple.
|
---|
1550 | */
|
---|
1551 | bucket = hashval % dstate->dtds_hashsize;
|
---|
1552 |
|
---|
1553 | if (op == DTRACE_DYNVAR_DEALLOC) {
|
---|
1554 | volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
|
---|
1555 |
|
---|
1556 | for (;;) {
|
---|
1557 | while ((lock = *lockp) & 1)
|
---|
1558 | continue;
|
---|
1559 |
|
---|
1560 | if (dtrace_casptr((void *)lockp,
|
---|
1561 | (void *)lock, (void *)(lock + 1)) == (void *)lock)
|
---|
1562 | break;
|
---|
1563 | }
|
---|
1564 |
|
---|
1565 | dtrace_membar_producer();
|
---|
1566 | }
|
---|
1567 |
|
---|
1568 | top:
|
---|
1569 | prev = NULL;
|
---|
1570 | lock = hash[bucket].dtdh_lock;
|
---|
1571 |
|
---|
1572 | dtrace_membar_consumer();
|
---|
1573 |
|
---|
1574 | start = hash[bucket].dtdh_chain;
|
---|
1575 | ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
|
---|
1576 | start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
|
---|
1577 | op != DTRACE_DYNVAR_DEALLOC));
|
---|
1578 |
|
---|
1579 | for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
|
---|
1580 | dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
|
---|
1581 | dtrace_key_t *dkey = &dtuple->dtt_key[0];
|
---|
1582 |
|
---|
1583 | if (dvar->dtdv_hashval != hashval) {
|
---|
1584 | if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
|
---|
1585 | /*
|
---|
1586 | * We've reached the sink, and therefore the
|
---|
1587 | * end of the hash chain; we can kick out of
|
---|
1588 | * the loop knowing that we have seen a valid
|
---|
1589 | * snapshot of state.
|
---|
1590 | */
|
---|
1591 | ASSERT(dvar->dtdv_next == NULL);
|
---|
1592 | ASSERT(dvar == &dtrace_dynhash_sink);
|
---|
1593 | break;
|
---|
1594 | }
|
---|
1595 |
|
---|
1596 | if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
|
---|
1597 | /*
|
---|
1598 | * We've gone off the rails: somewhere along
|
---|
1599 | * the line, one of the members of this hash
|
---|
1600 | * chain was deleted. Note that we could also
|
---|
1601 | * detect this by simply letting this loop run
|
---|
1602 | * to completion, as we would eventually hit
|
---|
1603 | * the end of the dirty list. However, we
|
---|
1604 | * want to avoid running the length of the
|
---|
1605 | * dirty list unnecessarily (it might be quite
|
---|
1606 | * long), so we catch this as early as
|
---|
1607 | * possible by detecting the hash marker. In
|
---|
1608 | * this case, we simply set dvar to NULL and
|
---|
1609 | * break; the conditional after the loop will
|
---|
1610 | * send us back to top.
|
---|
1611 | */
|
---|
1612 | dvar = NULL;
|
---|
1613 | break;
|
---|
1614 | }
|
---|
1615 |
|
---|
1616 | goto next;
|
---|
1617 | }
|
---|
1618 |
|
---|
1619 | if (dtuple->dtt_nkeys != nkeys)
|
---|
1620 | goto next;
|
---|
1621 |
|
---|
1622 | for (i = 0; i < nkeys; i++, dkey++) {
|
---|
1623 | if (dkey->dttk_size != key[i].dttk_size)
|
---|
1624 | goto next; /* size or type mismatch */
|
---|
1625 |
|
---|
1626 | if (dkey->dttk_size != 0) {
|
---|
1627 | if (dtrace_bcmp(
|
---|
1628 | (void *)(uintptr_t)key[i].dttk_value,
|
---|
1629 | (void *)(uintptr_t)dkey->dttk_value,
|
---|
1630 | dkey->dttk_size))
|
---|
1631 | goto next;
|
---|
1632 | } else {
|
---|
1633 | if (dkey->dttk_value != key[i].dttk_value)
|
---|
1634 | goto next;
|
---|
1635 | }
|
---|
1636 | }
|
---|
1637 |
|
---|
1638 | if (op != DTRACE_DYNVAR_DEALLOC)
|
---|
1639 | return (dvar);
|
---|
1640 |
|
---|
1641 | ASSERT(dvar->dtdv_next == NULL ||
|
---|
1642 | dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
|
---|
1643 |
|
---|
1644 | if (prev != NULL) {
|
---|
1645 | ASSERT(hash[bucket].dtdh_chain != dvar);
|
---|
1646 | ASSERT(start != dvar);
|
---|
1647 | ASSERT(prev->dtdv_next == dvar);
|
---|
1648 | prev->dtdv_next = dvar->dtdv_next;
|
---|
1649 | } else {
|
---|
1650 | if (dtrace_casptr(&hash[bucket].dtdh_chain,
|
---|
1651 | start, dvar->dtdv_next) != start) {
|
---|
1652 | /*
|
---|
1653 | * We have failed to atomically swing the
|
---|
1654 | * hash table head pointer, presumably because
|
---|
1655 | * of a conflicting allocation on another CPU.
|
---|
1656 | * We need to reread the hash chain and try
|
---|
1657 | * again.
|
---|
1658 | */
|
---|
1659 | goto top;
|
---|
1660 | }
|
---|
1661 | }
|
---|
1662 |
|
---|
1663 | dtrace_membar_producer();
|
---|
1664 |
|
---|
1665 | /*
|
---|
1666 | * Now set the hash value to indicate that it's free.
|
---|
1667 | */
|
---|
1668 | ASSERT(hash[bucket].dtdh_chain != dvar);
|
---|
1669 | dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
|
---|
1670 |
|
---|
1671 | dtrace_membar_producer();
|
---|
1672 |
|
---|
1673 | /*
|
---|
1674 | * Set the next pointer to point at the dirty list, and
|
---|
1675 | * atomically swing the dirty pointer to the newly freed dvar.
|
---|
1676 | */
|
---|
1677 | do {
|
---|
1678 | next = dcpu->dtdsc_dirty;
|
---|
1679 | dvar->dtdv_next = next;
|
---|
1680 | } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
|
---|
1681 |
|
---|
1682 | /*
|
---|
1683 | * Finally, unlock this hash bucket.
|
---|
1684 | */
|
---|
1685 | ASSERT(hash[bucket].dtdh_lock == lock);
|
---|
1686 | ASSERT(lock & 1);
|
---|
1687 | hash[bucket].dtdh_lock++;
|
---|
1688 |
|
---|
1689 | return (NULL);
|
---|
1690 | next:
|
---|
1691 | prev = dvar;
|
---|
1692 | continue;
|
---|
1693 | }
|
---|
1694 |
|
---|
1695 | if (dvar == NULL) {
|
---|
1696 | /*
|
---|
1697 | * If dvar is NULL, it is because we went off the rails:
|
---|
1698 | * one of the elements that we traversed in the hash chain
|
---|
1699 | * was deleted while we were traversing it. In this case,
|
---|
1700 | * we assert that we aren't doing a dealloc (deallocs lock
|
---|
1701 | * the hash bucket to prevent themselves from racing with
|
---|
1702 | * one another), and retry the hash chain traversal.
|
---|
1703 | */
|
---|
1704 | ASSERT(op != DTRACE_DYNVAR_DEALLOC);
|
---|
1705 | goto top;
|
---|
1706 | }
|
---|
1707 |
|
---|
1708 | if (op != DTRACE_DYNVAR_ALLOC) {
|
---|
1709 | /*
|
---|
1710 | * If we are not to allocate a new variable, we want to
|
---|
1711 | * return NULL now. Before we return, check that the value
|
---|
1712 | * of the lock word hasn't changed. If it has, we may have
|
---|
1713 | * seen an inconsistent snapshot.
|
---|
1714 | */
|
---|
1715 | if (op == DTRACE_DYNVAR_NOALLOC) {
|
---|
1716 | if (hash[bucket].dtdh_lock != lock)
|
---|
1717 | goto top;
|
---|
1718 | } else {
|
---|
1719 | ASSERT(op == DTRACE_DYNVAR_DEALLOC);
|
---|
1720 | ASSERT(hash[bucket].dtdh_lock == lock);
|
---|
1721 | ASSERT(lock & 1);
|
---|
1722 | hash[bucket].dtdh_lock++;
|
---|
1723 | }
|
---|
1724 |
|
---|
1725 | return (NULL);
|
---|
1726 | }
|
---|
1727 |
|
---|
1728 | /*
|
---|
1729 | * We need to allocate a new dynamic variable. The size we need is the
|
---|
1730 | * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
|
---|
1731 | * size of any auxiliary key data (rounded up to 8-byte alignment) plus
|
---|
1732 | * the size of any referred-to data (dsize). We then round the final
|
---|
1733 | * size up to the chunksize for allocation.
|
---|
1734 | */
|
---|
1735 | for (ksize = 0, i = 0; i < nkeys; i++)
|
---|
1736 | ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
|
---|
1737 |
|
---|
1738 | /*
|
---|
1739 | * This should be pretty much impossible, but could happen if, say,
|
---|
1740 | * strange DIF specified the tuple. Ideally, this should be an
|
---|
1741 | * assertion and not an error condition -- but that requires that the
|
---|
1742 | * chunksize calculation in dtrace_difo_chunksize() be absolutely
|
---|
1743 | * bullet-proof. (That is, it must not be able to be fooled by
|
---|
1744 | * malicious DIF.) Given the lack of backwards branches in DIF,
|
---|
1745 | * solving this would presumably not amount to solving the Halting
|
---|
1746 | * Problem -- but it still seems awfully hard.
|
---|
1747 | */
|
---|
1748 | if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
|
---|
1749 | ksize + dsize > chunksize) {
|
---|
1750 | dcpu->dtdsc_drops++;
|
---|
1751 | return (NULL);
|
---|
1752 | }
|
---|
1753 |
|
---|
1754 | nstate = DTRACE_DSTATE_EMPTY;
|
---|
1755 |
|
---|
1756 | do {
|
---|
1757 | retry:
|
---|
1758 | free = dcpu->dtdsc_free;
|
---|
1759 |
|
---|
1760 | if (free == NULL) {
|
---|
1761 | dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
|
---|
1762 | void *rval;
|
---|
1763 |
|
---|
1764 | if (clean == NULL) {
|
---|
1765 | /*
|
---|
1766 | * We're out of dynamic variable space on
|
---|
1767 | * this CPU. Unless we have tried all CPUs,
|
---|
1768 | * we'll try to allocate from a different
|
---|
1769 | * CPU.
|
---|
1770 | */
|
---|
1771 | switch (dstate->dtds_state) {
|
---|
1772 | case DTRACE_DSTATE_CLEAN: {
|
---|
1773 | void *sp = &dstate->dtds_state;
|
---|
1774 |
|
---|
1775 | if (++cpu >= NCPU)
|
---|
1776 | cpu = 0;
|
---|
1777 |
|
---|
1778 | if (dcpu->dtdsc_dirty != NULL &&
|
---|
1779 | nstate == DTRACE_DSTATE_EMPTY)
|
---|
1780 | nstate = DTRACE_DSTATE_DIRTY;
|
---|
1781 |
|
---|
1782 | if (dcpu->dtdsc_rinsing != NULL)
|
---|
1783 | nstate = DTRACE_DSTATE_RINSING;
|
---|
1784 |
|
---|
1785 | dcpu = &dstate->dtds_percpu[cpu];
|
---|
1786 |
|
---|
1787 | if (cpu != me)
|
---|
1788 | goto retry;
|
---|
1789 |
|
---|
1790 | (void) dtrace_cas32(sp,
|
---|
1791 | DTRACE_DSTATE_CLEAN, nstate);
|
---|
1792 |
|
---|
1793 | /*
|
---|
1794 | * To increment the correct bean
|
---|
1795 | * counter, take another lap.
|
---|
1796 | */
|
---|
1797 | goto retry;
|
---|
1798 | }
|
---|
1799 |
|
---|
1800 | case DTRACE_DSTATE_DIRTY:
|
---|
1801 | dcpu->dtdsc_dirty_drops++;
|
---|
1802 | break;
|
---|
1803 |
|
---|
1804 | case DTRACE_DSTATE_RINSING:
|
---|
1805 | dcpu->dtdsc_rinsing_drops++;
|
---|
1806 | break;
|
---|
1807 |
|
---|
1808 | case DTRACE_DSTATE_EMPTY:
|
---|
1809 | dcpu->dtdsc_drops++;
|
---|
1810 | break;
|
---|
1811 | }
|
---|
1812 |
|
---|
1813 | DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
|
---|
1814 | return (NULL);
|
---|
1815 | }
|
---|
1816 |
|
---|
1817 | /*
|
---|
1818 | * The clean list appears to be non-empty. We want to
|
---|
1819 | * move the clean list to the free list; we start by
|
---|
1820 | * moving the clean pointer aside.
|
---|
1821 | */
|
---|
1822 | if (dtrace_casptr(&dcpu->dtdsc_clean,
|
---|
1823 | clean, NULL) != clean) {
|
---|
1824 | /*
|
---|
1825 | * We are in one of two situations:
|
---|
1826 | *
|
---|
1827 | * (a) The clean list was switched to the
|
---|
1828 | * free list by another CPU.
|
---|
1829 | *
|
---|
1830 | * (b) The clean list was added to by the
|
---|
1831 | * cleansing cyclic.
|
---|
1832 | *
|
---|
1833 | * In either of these situations, we can
|
---|
1834 | * just reattempt the free list allocation.
|
---|
1835 | */
|
---|
1836 | goto retry;
|
---|
1837 | }
|
---|
1838 |
|
---|
1839 | ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
|
---|
1840 |
|
---|
1841 | /*
|
---|
1842 | * Now we'll move the clean list to our free list.
|
---|
1843 | * It's impossible for this to fail: the only way
|
---|
1844 | * the free list can be updated is through this
|
---|
1845 | * code path, and only one CPU can own the clean list.
|
---|
1846 | * Thus, it would only be possible for this to fail if
|
---|
1847 | * this code were racing with dtrace_dynvar_clean().
|
---|
1848 | * (That is, if dtrace_dynvar_clean() updated the clean
|
---|
1849 | * list, and we ended up racing to update the free
|
---|
1850 | * list.) This race is prevented by the dtrace_sync()
|
---|
1851 | * in dtrace_dynvar_clean() -- which flushes the
|
---|
1852 | * owners of the clean lists out before resetting
|
---|
1853 | * the clean lists.
|
---|
1854 | */
|
---|
1855 | dcpu = &dstate->dtds_percpu[me];
|
---|
1856 | rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
|
---|
1857 | ASSERT(rval == NULL);
|
---|
1858 | goto retry;
|
---|
1859 | }
|
---|
1860 |
|
---|
1861 | dvar = free;
|
---|
1862 | new_free = dvar->dtdv_next;
|
---|
1863 | } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
|
---|
1864 |
|
---|
1865 | /*
|
---|
1866 | * We have now allocated a new chunk. We copy the tuple keys into the
|
---|
1867 | * tuple array and copy any referenced key data into the data space
|
---|
1868 | * following the tuple array. As we do this, we relocate dttk_value
|
---|
1869 | * in the final tuple to point to the key data address in the chunk.
|
---|
1870 | */
|
---|
1871 | kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
|
---|
1872 | dvar->dtdv_data = (void *)(kdata + ksize);
|
---|
1873 | dvar->dtdv_tuple.dtt_nkeys = nkeys;
|
---|
1874 |
|
---|
1875 | for (i = 0; i < nkeys; i++) {
|
---|
1876 | dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
|
---|
1877 | size_t kesize = key[i].dttk_size;
|
---|
1878 |
|
---|
1879 | if (kesize != 0) {
|
---|
1880 | dtrace_bcopy(
|
---|
1881 | (const void *)(uintptr_t)key[i].dttk_value,
|
---|
1882 | (void *)kdata, kesize);
|
---|
1883 | dkey->dttk_value = kdata;
|
---|
1884 | kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
|
---|
1885 | } else {
|
---|
1886 | dkey->dttk_value = key[i].dttk_value;
|
---|
1887 | }
|
---|
1888 |
|
---|
1889 | dkey->dttk_size = kesize;
|
---|
1890 | }
|
---|
1891 |
|
---|
1892 | ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
|
---|
1893 | dvar->dtdv_hashval = hashval;
|
---|
1894 | dvar->dtdv_next = start;
|
---|
1895 |
|
---|
1896 | if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
|
---|
1897 | return (dvar);
|
---|
1898 |
|
---|
1899 | /*
|
---|
1900 | * The cas has failed. Either another CPU is adding an element to
|
---|
1901 | * this hash chain, or another CPU is deleting an element from this
|
---|
1902 | * hash chain. The simplest way to deal with both of these cases
|
---|
1903 | * (though not necessarily the most efficient) is to free our
|
---|
1904 | * allocated block and tail-call ourselves. Note that the free is
|
---|
1905 | * to the dirty list and _not_ to the free list. This is to prevent
|
---|
1906 | * races with allocators, above.
|
---|
1907 | */
|
---|
1908 | dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
|
---|
1909 |
|
---|
1910 | dtrace_membar_producer();
|
---|
1911 |
|
---|
1912 | do {
|
---|
1913 | free = dcpu->dtdsc_dirty;
|
---|
1914 | dvar->dtdv_next = free;
|
---|
1915 | } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
|
---|
1916 |
|
---|
1917 | return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate));
|
---|
1918 | }
|
---|
1919 |
|
---|
1920 | /*ARGSUSED*/
|
---|
1921 | static void
|
---|
1922 | dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
|
---|
1923 | {
|
---|
1924 | RT_NOREF_PV(arg);
|
---|
1925 | if ((int64_t)nval < (int64_t)*oval)
|
---|
1926 | *oval = nval;
|
---|
1927 | }
|
---|
1928 |
|
---|
1929 | /*ARGSUSED*/
|
---|
1930 | static void
|
---|
1931 | dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
|
---|
1932 | {
|
---|
1933 | RT_NOREF_PV(arg);
|
---|
1934 | if ((int64_t)nval > (int64_t)*oval)
|
---|
1935 | *oval = nval;
|
---|
1936 | }
|
---|
1937 |
|
---|
1938 | static void
|
---|
1939 | dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
|
---|
1940 | {
|
---|
1941 | int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
|
---|
1942 | int64_t val = (int64_t)nval;
|
---|
1943 |
|
---|
1944 | if (val < 0) {
|
---|
1945 | for (i = 0; i < zero; i++) {
|
---|
1946 | if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
|
---|
1947 | quanta[i] += incr;
|
---|
1948 | return;
|
---|
1949 | }
|
---|
1950 | }
|
---|
1951 | } else {
|
---|
1952 | for (i = zero + 1; i < VBDTCAST(int)DTRACE_QUANTIZE_NBUCKETS; i++) {
|
---|
1953 | if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
|
---|
1954 | quanta[i - 1] += incr;
|
---|
1955 | return;
|
---|
1956 | }
|
---|
1957 | }
|
---|
1958 |
|
---|
1959 | quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
|
---|
1960 | return;
|
---|
1961 | }
|
---|
1962 |
|
---|
1963 | #ifndef VBOX
|
---|
1964 | ASSERT(0);
|
---|
1965 | #else
|
---|
1966 | AssertFatalFailed();
|
---|
1967 | #endif
|
---|
1968 | }
|
---|
1969 |
|
---|
1970 | static void
|
---|
1971 | dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
|
---|
1972 | {
|
---|
1973 | uint64_t arg = *lquanta++;
|
---|
1974 | int32_t base = DTRACE_LQUANTIZE_BASE(arg);
|
---|
1975 | uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
|
---|
1976 | uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
|
---|
1977 | int32_t val = (int32_t)nval, level;
|
---|
1978 |
|
---|
1979 | ASSERT(step != 0);
|
---|
1980 | ASSERT(levels != 0);
|
---|
1981 |
|
---|
1982 | if (val < base) {
|
---|
1983 | /*
|
---|
1984 | * This is an underflow.
|
---|
1985 | */
|
---|
1986 | lquanta[0] += incr;
|
---|
1987 | return;
|
---|
1988 | }
|
---|
1989 |
|
---|
1990 | level = (val - base) / step;
|
---|
1991 |
|
---|
1992 | if (level < levels) {
|
---|
1993 | lquanta[level + 1] += incr;
|
---|
1994 | return;
|
---|
1995 | }
|
---|
1996 |
|
---|
1997 | /*
|
---|
1998 | * This is an overflow.
|
---|
1999 | */
|
---|
2000 | lquanta[levels + 1] += incr;
|
---|
2001 | }
|
---|
2002 |
|
---|
2003 | /*ARGSUSED*/
|
---|
2004 | static void
|
---|
2005 | dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
|
---|
2006 | {
|
---|
2007 | RT_NOREF_PV(arg);
|
---|
2008 | data[0]++;
|
---|
2009 | data[1] += nval;
|
---|
2010 | }
|
---|
2011 |
|
---|
2012 | /*ARGSUSED*/
|
---|
2013 | static void
|
---|
2014 | dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
|
---|
2015 | {
|
---|
2016 | int64_t snval = (int64_t)nval;
|
---|
2017 | uint64_t tmp[2];
|
---|
2018 | RT_NOREF_PV(arg);
|
---|
2019 |
|
---|
2020 | data[0]++;
|
---|
2021 | data[1] += nval;
|
---|
2022 |
|
---|
2023 | /*
|
---|
2024 | * What we want to say here is:
|
---|
2025 | *
|
---|
2026 | * data[2] += nval * nval;
|
---|
2027 | *
|
---|
2028 | * But given that nval is 64-bit, we could easily overflow, so
|
---|
2029 | * we do this as 128-bit arithmetic.
|
---|
2030 | */
|
---|
2031 | if (snval < 0)
|
---|
2032 | snval = -snval;
|
---|
2033 |
|
---|
2034 | dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
|
---|
2035 | dtrace_add_128(data + 2, tmp, data + 2);
|
---|
2036 | }
|
---|
2037 |
|
---|
2038 | /*ARGSUSED*/
|
---|
2039 | static void
|
---|
2040 | dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
|
---|
2041 | {
|
---|
2042 | RT_NOREF_PV(arg); RT_NOREF_PV(nval);
|
---|
2043 |
|
---|
2044 | *oval = *oval + 1;
|
---|
2045 | }
|
---|
2046 |
|
---|
2047 | /*ARGSUSED*/
|
---|
2048 | static void
|
---|
2049 | dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
|
---|
2050 | {
|
---|
2051 | RT_NOREF_PV(arg);
|
---|
2052 | *oval += nval;
|
---|
2053 | }
|
---|
2054 |
|
---|
2055 | /*
|
---|
2056 | * Aggregate given the tuple in the principal data buffer, and the aggregating
|
---|
2057 | * action denoted by the specified dtrace_aggregation_t. The aggregation
|
---|
2058 | * buffer is specified as the buf parameter. This routine does not return
|
---|
2059 | * failure; if there is no space in the aggregation buffer, the data will be
|
---|
2060 | * dropped, and a corresponding counter incremented.
|
---|
2061 | */
|
---|
2062 | static void
|
---|
2063 | dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
|
---|
2064 | intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
|
---|
2065 | {
|
---|
2066 | dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
|
---|
2067 | uint32_t i, ndx, size, fsize;
|
---|
2068 | uint32_t align = sizeof (uint64_t) - 1;
|
---|
2069 | dtrace_aggbuffer_t *agb;
|
---|
2070 | dtrace_aggkey_t *key;
|
---|
2071 | uint32_t hashval = 0, limit, isstr;
|
---|
2072 | caddr_t tomax, data, kdata;
|
---|
2073 | dtrace_actkind_t action;
|
---|
2074 | dtrace_action_t *act;
|
---|
2075 | uintptr_t offs;
|
---|
2076 |
|
---|
2077 | if (buf == NULL)
|
---|
2078 | return;
|
---|
2079 |
|
---|
2080 | if (!agg->dtag_hasarg) {
|
---|
2081 | /*
|
---|
2082 | * Currently, only quantize() and lquantize() take additional
|
---|
2083 | * arguments, and they have the same semantics: an increment
|
---|
2084 | * value that defaults to 1 when not present. If additional
|
---|
2085 | * aggregating actions take arguments, the setting of the
|
---|
2086 | * default argument value will presumably have to become more
|
---|
2087 | * sophisticated...
|
---|
2088 | */
|
---|
2089 | arg = 1;
|
---|
2090 | }
|
---|
2091 |
|
---|
2092 | action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
|
---|
2093 | size = rec->dtrd_offset - agg->dtag_base;
|
---|
2094 | fsize = size + rec->dtrd_size;
|
---|
2095 |
|
---|
2096 | ASSERT(dbuf->dtb_tomax != NULL);
|
---|
2097 | data = dbuf->dtb_tomax + offset + agg->dtag_base;
|
---|
2098 |
|
---|
2099 | if ((tomax = buf->dtb_tomax) == NULL) {
|
---|
2100 | dtrace_buffer_drop(buf);
|
---|
2101 | return;
|
---|
2102 | }
|
---|
2103 |
|
---|
2104 | /*
|
---|
2105 | * The metastructure is always at the bottom of the buffer.
|
---|
2106 | */
|
---|
2107 | agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
|
---|
2108 | sizeof (dtrace_aggbuffer_t));
|
---|
2109 |
|
---|
2110 | if (buf->dtb_offset == 0) {
|
---|
2111 | /*
|
---|
2112 | * We just kludge up approximately 1/8th of the size to be
|
---|
2113 | * buckets. If this guess ends up being routinely
|
---|
2114 | * off-the-mark, we may need to dynamically readjust this
|
---|
2115 | * based on past performance.
|
---|
2116 | */
|
---|
2117 | uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
|
---|
2118 |
|
---|
2119 | if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
|
---|
2120 | (uintptr_t)tomax || hashsize == 0) {
|
---|
2121 | /*
|
---|
2122 | * We've been given a ludicrously small buffer;
|
---|
2123 | * increment our drop count and leave.
|
---|
2124 | */
|
---|
2125 | dtrace_buffer_drop(buf);
|
---|
2126 | return;
|
---|
2127 | }
|
---|
2128 |
|
---|
2129 | /*
|
---|
2130 | * And now, a pathetic attempt to try to get a an odd (or
|
---|
2131 | * perchance, a prime) hash size for better hash distribution.
|
---|
2132 | */
|
---|
2133 | if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
|
---|
2134 | hashsize -= DTRACE_AGGHASHSIZE_SLEW;
|
---|
2135 |
|
---|
2136 | agb->dtagb_hashsize = hashsize;
|
---|
2137 | agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
|
---|
2138 | agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
|
---|
2139 | agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
|
---|
2140 |
|
---|
2141 | for (i = 0; i < agb->dtagb_hashsize; i++)
|
---|
2142 | agb->dtagb_hash[i] = NULL;
|
---|
2143 | }
|
---|
2144 |
|
---|
2145 | ASSERT(agg->dtag_first != NULL);
|
---|
2146 | ASSERT(agg->dtag_first->dta_intuple);
|
---|
2147 |
|
---|
2148 | /*
|
---|
2149 | * Calculate the hash value based on the key. Note that we _don't_
|
---|
2150 | * include the aggid in the hashing (but we will store it as part of
|
---|
2151 | * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
|
---|
2152 | * algorithm: a simple, quick algorithm that has no known funnels, and
|
---|
2153 | * gets good distribution in practice. The efficacy of the hashing
|
---|
2154 | * algorithm (and a comparison with other algorithms) may be found by
|
---|
2155 | * running the ::dtrace_aggstat MDB dcmd.
|
---|
2156 | */
|
---|
2157 | for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
|
---|
2158 | i = act->dta_rec.dtrd_offset - agg->dtag_base;
|
---|
2159 | limit = i + act->dta_rec.dtrd_size;
|
---|
2160 | ASSERT(limit <= size);
|
---|
2161 | isstr = DTRACEACT_ISSTRING(act);
|
---|
2162 |
|
---|
2163 | for (; i < limit; i++) {
|
---|
2164 | hashval += data[i];
|
---|
2165 | hashval += (hashval << 10);
|
---|
2166 | hashval ^= (hashval >> 6);
|
---|
2167 |
|
---|
2168 | if (isstr && data[i] == '\0')
|
---|
2169 | break;
|
---|
2170 | }
|
---|
2171 | }
|
---|
2172 |
|
---|
2173 | hashval += (hashval << 3);
|
---|
2174 | hashval ^= (hashval >> 11);
|
---|
2175 | hashval += (hashval << 15);
|
---|
2176 |
|
---|
2177 | /*
|
---|
2178 | * Yes, the divide here is expensive -- but it's generally the least
|
---|
2179 | * of the performance issues given the amount of data that we iterate
|
---|
2180 | * over to compute hash values, compare data, etc.
|
---|
2181 | */
|
---|
2182 | ndx = hashval % agb->dtagb_hashsize;
|
---|
2183 |
|
---|
2184 | for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
|
---|
2185 | ASSERT((caddr_t)key >= tomax);
|
---|
2186 | ASSERT((caddr_t)key < tomax + buf->dtb_size);
|
---|
2187 |
|
---|
2188 | if (hashval != key->dtak_hashval || key->dtak_size != size)
|
---|
2189 | continue;
|
---|
2190 |
|
---|
2191 | kdata = key->dtak_data;
|
---|
2192 | ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
|
---|
2193 |
|
---|
2194 | for (act = agg->dtag_first; act->dta_intuple;
|
---|
2195 | act = act->dta_next) {
|
---|
2196 | i = act->dta_rec.dtrd_offset - agg->dtag_base;
|
---|
2197 | limit = i + act->dta_rec.dtrd_size;
|
---|
2198 | ASSERT(limit <= size);
|
---|
2199 | isstr = DTRACEACT_ISSTRING(act);
|
---|
2200 |
|
---|
2201 | for (; i < limit; i++) {
|
---|
2202 | if (kdata[i] != data[i])
|
---|
2203 | goto next;
|
---|
2204 |
|
---|
2205 | if (isstr && data[i] == '\0')
|
---|
2206 | break;
|
---|
2207 | }
|
---|
2208 | }
|
---|
2209 |
|
---|
2210 | if (action != key->dtak_action) {
|
---|
2211 | /*
|
---|
2212 | * We are aggregating on the same value in the same
|
---|
2213 | * aggregation with two different aggregating actions.
|
---|
2214 | * (This should have been picked up in the compiler,
|
---|
2215 | * so we may be dealing with errant or devious DIF.)
|
---|
2216 | * This is an error condition; we indicate as much,
|
---|
2217 | * and return.
|
---|
2218 | */
|
---|
2219 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
2220 | return;
|
---|
2221 | }
|
---|
2222 |
|
---|
2223 | /*
|
---|
2224 | * This is a hit: we need to apply the aggregator to
|
---|
2225 | * the value at this key.
|
---|
2226 | */
|
---|
2227 | agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
|
---|
2228 | return;
|
---|
2229 | next:
|
---|
2230 | continue;
|
---|
2231 | }
|
---|
2232 |
|
---|
2233 | /*
|
---|
2234 | * We didn't find it. We need to allocate some zero-filled space,
|
---|
2235 | * link it into the hash table appropriately, and apply the aggregator
|
---|
2236 | * to the (zero-filled) value.
|
---|
2237 | */
|
---|
2238 | offs = buf->dtb_offset;
|
---|
2239 | while (offs & (align - 1))
|
---|
2240 | offs += sizeof (uint32_t);
|
---|
2241 |
|
---|
2242 | /*
|
---|
2243 | * If we don't have enough room to both allocate a new key _and_
|
---|
2244 | * its associated data, increment the drop count and return.
|
---|
2245 | */
|
---|
2246 | if ((uintptr_t)tomax + offs + fsize >
|
---|
2247 | agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
|
---|
2248 | dtrace_buffer_drop(buf);
|
---|
2249 | return;
|
---|
2250 | }
|
---|
2251 |
|
---|
2252 | /*CONSTCOND*/
|
---|
2253 | ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
|
---|
2254 | key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
|
---|
2255 | agb->dtagb_free -= sizeof (dtrace_aggkey_t);
|
---|
2256 |
|
---|
2257 | key->dtak_data = kdata = tomax + offs;
|
---|
2258 | buf->dtb_offset = offs + fsize;
|
---|
2259 |
|
---|
2260 | /*
|
---|
2261 | * Now copy the data across.
|
---|
2262 | */
|
---|
2263 | *((dtrace_aggid_t *)kdata) = agg->dtag_id;
|
---|
2264 |
|
---|
2265 | for (i = sizeof (dtrace_aggid_t); i < size; i++)
|
---|
2266 | kdata[i] = data[i];
|
---|
2267 |
|
---|
2268 | /*
|
---|
2269 | * Because strings are not zeroed out by default, we need to iterate
|
---|
2270 | * looking for actions that store strings, and we need to explicitly
|
---|
2271 | * pad these strings out with zeroes.
|
---|
2272 | */
|
---|
2273 | for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
|
---|
2274 | int nul;
|
---|
2275 |
|
---|
2276 | if (!DTRACEACT_ISSTRING(act))
|
---|
2277 | continue;
|
---|
2278 |
|
---|
2279 | i = act->dta_rec.dtrd_offset - agg->dtag_base;
|
---|
2280 | limit = i + act->dta_rec.dtrd_size;
|
---|
2281 | ASSERT(limit <= size);
|
---|
2282 |
|
---|
2283 | for (nul = 0; i < limit; i++) {
|
---|
2284 | if (nul) {
|
---|
2285 | kdata[i] = '\0';
|
---|
2286 | continue;
|
---|
2287 | }
|
---|
2288 |
|
---|
2289 | if (data[i] != '\0')
|
---|
2290 | continue;
|
---|
2291 |
|
---|
2292 | nul = 1;
|
---|
2293 | }
|
---|
2294 | }
|
---|
2295 |
|
---|
2296 | for (i = size; i < fsize; i++)
|
---|
2297 | kdata[i] = 0;
|
---|
2298 |
|
---|
2299 | key->dtak_hashval = hashval;
|
---|
2300 | key->dtak_size = size;
|
---|
2301 | key->dtak_action = action;
|
---|
2302 | key->dtak_next = agb->dtagb_hash[ndx];
|
---|
2303 | agb->dtagb_hash[ndx] = key;
|
---|
2304 |
|
---|
2305 | /*
|
---|
2306 | * Finally, apply the aggregator.
|
---|
2307 | */
|
---|
2308 | *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
|
---|
2309 | agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
|
---|
2310 | }
|
---|
2311 |
|
---|
2312 | /*
|
---|
2313 | * Given consumer state, this routine finds a speculation in the INACTIVE
|
---|
2314 | * state and transitions it into the ACTIVE state. If there is no speculation
|
---|
2315 | * in the INACTIVE state, 0 is returned. In this case, no error counter is
|
---|
2316 | * incremented -- it is up to the caller to take appropriate action.
|
---|
2317 | */
|
---|
2318 | static int
|
---|
2319 | dtrace_speculation(dtrace_state_t *state)
|
---|
2320 | {
|
---|
2321 | int i = 0;
|
---|
2322 | dtrace_speculation_state_t current;
|
---|
2323 | uint32_t *stat = &state->dts_speculations_unavail, count;
|
---|
2324 |
|
---|
2325 | while (i < state->dts_nspeculations) {
|
---|
2326 | dtrace_speculation_t *spec = &state->dts_speculations[i];
|
---|
2327 |
|
---|
2328 | current = spec->dtsp_state;
|
---|
2329 |
|
---|
2330 | if (current != DTRACESPEC_INACTIVE) {
|
---|
2331 | if (current == DTRACESPEC_COMMITTINGMANY ||
|
---|
2332 | current == DTRACESPEC_COMMITTING ||
|
---|
2333 | current == DTRACESPEC_DISCARDING)
|
---|
2334 | stat = &state->dts_speculations_busy;
|
---|
2335 | i++;
|
---|
2336 | continue;
|
---|
2337 | }
|
---|
2338 |
|
---|
2339 | if ( (dtrace_speculation_state_t)dtrace_cas32((uint32_t *)&spec->dtsp_state, current, DTRACESPEC_ACTIVE)
|
---|
2340 | == current)
|
---|
2341 | return (i + 1);
|
---|
2342 | }
|
---|
2343 |
|
---|
2344 | /*
|
---|
2345 | * We couldn't find a speculation. If we found as much as a single
|
---|
2346 | * busy speculation buffer, we'll attribute this failure as "busy"
|
---|
2347 | * instead of "unavail".
|
---|
2348 | */
|
---|
2349 | do {
|
---|
2350 | count = *stat;
|
---|
2351 | } while (dtrace_cas32(stat, count, count + 1) != count);
|
---|
2352 |
|
---|
2353 | return (0);
|
---|
2354 | }
|
---|
2355 |
|
---|
2356 | /*
|
---|
2357 | * This routine commits an active speculation. If the specified speculation
|
---|
2358 | * is not in a valid state to perform a commit(), this routine will silently do
|
---|
2359 | * nothing. The state of the specified speculation is transitioned according
|
---|
2360 | * to the state transition diagram outlined in <sys/dtrace_impl.h>
|
---|
2361 | */
|
---|
2362 | static void
|
---|
2363 | dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
|
---|
2364 | dtrace_specid_t which)
|
---|
2365 | {
|
---|
2366 | dtrace_speculation_t *spec;
|
---|
2367 | dtrace_buffer_t *src, *dest;
|
---|
2368 | uintptr_t daddr, saddr, dlimit;
|
---|
2369 | dtrace_speculation_state_t current, new VBDTUNASS(-1);
|
---|
2370 | intptr_t offs;
|
---|
2371 |
|
---|
2372 | if (which == 0)
|
---|
2373 | return;
|
---|
2374 |
|
---|
2375 | if (which > VBDTCAST(unsigned)state->dts_nspeculations) {
|
---|
2376 | cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2377 | return;
|
---|
2378 | }
|
---|
2379 |
|
---|
2380 | spec = &state->dts_speculations[which - 1];
|
---|
2381 | src = &spec->dtsp_buffer[cpu];
|
---|
2382 | dest = &state->dts_buffer[cpu];
|
---|
2383 |
|
---|
2384 | do {
|
---|
2385 | current = spec->dtsp_state;
|
---|
2386 |
|
---|
2387 | if (current == DTRACESPEC_COMMITTINGMANY)
|
---|
2388 | break;
|
---|
2389 |
|
---|
2390 | switch (current) {
|
---|
2391 | case DTRACESPEC_INACTIVE:
|
---|
2392 | case DTRACESPEC_DISCARDING:
|
---|
2393 | return;
|
---|
2394 |
|
---|
2395 | case DTRACESPEC_COMMITTING:
|
---|
2396 | /*
|
---|
2397 | * This is only possible if we are (a) commit()'ing
|
---|
2398 | * without having done a prior speculate() on this CPU
|
---|
2399 | * and (b) racing with another commit() on a different
|
---|
2400 | * CPU. There's nothing to do -- we just assert that
|
---|
2401 | * our offset is 0.
|
---|
2402 | */
|
---|
2403 | ASSERT(src->dtb_offset == 0);
|
---|
2404 | return;
|
---|
2405 |
|
---|
2406 | case DTRACESPEC_ACTIVE:
|
---|
2407 | new = DTRACESPEC_COMMITTING;
|
---|
2408 | break;
|
---|
2409 |
|
---|
2410 | case DTRACESPEC_ACTIVEONE:
|
---|
2411 | /*
|
---|
2412 | * This speculation is active on one CPU. If our
|
---|
2413 | * buffer offset is non-zero, we know that the one CPU
|
---|
2414 | * must be us. Otherwise, we are committing on a
|
---|
2415 | * different CPU from the speculate(), and we must
|
---|
2416 | * rely on being asynchronously cleaned.
|
---|
2417 | */
|
---|
2418 | if (src->dtb_offset != 0) {
|
---|
2419 | new = DTRACESPEC_COMMITTING;
|
---|
2420 | break;
|
---|
2421 | }
|
---|
2422 | RT_FALL_THRU();
|
---|
2423 |
|
---|
2424 | case DTRACESPEC_ACTIVEMANY:
|
---|
2425 | new = DTRACESPEC_COMMITTINGMANY;
|
---|
2426 | break;
|
---|
2427 |
|
---|
2428 | default:
|
---|
2429 | #ifndef VBOX
|
---|
2430 | ASSERT(0);
|
---|
2431 | #else
|
---|
2432 | AssertFatalMsgFailed(("%d\n", current));
|
---|
2433 | #endif
|
---|
2434 | }
|
---|
2435 | } while ((dtrace_speculation_state_t)dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current);
|
---|
2436 |
|
---|
2437 | /*
|
---|
2438 | * We have set the state to indicate that we are committing this
|
---|
2439 | * speculation. Now reserve the necessary space in the destination
|
---|
2440 | * buffer.
|
---|
2441 | */
|
---|
2442 | if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
|
---|
2443 | sizeof (uint64_t), state, NULL)) < 0) {
|
---|
2444 | dtrace_buffer_drop(dest);
|
---|
2445 | goto out;
|
---|
2446 | }
|
---|
2447 |
|
---|
2448 | /*
|
---|
2449 | * We have the space; copy the buffer across. (Note that this is a
|
---|
2450 | * highly subobtimal bcopy(); in the unlikely event that this becomes
|
---|
2451 | * a serious performance issue, a high-performance DTrace-specific
|
---|
2452 | * bcopy() should obviously be invented.)
|
---|
2453 | */
|
---|
2454 | daddr = (uintptr_t)dest->dtb_tomax + offs;
|
---|
2455 | dlimit = daddr + src->dtb_offset;
|
---|
2456 | saddr = (uintptr_t)src->dtb_tomax;
|
---|
2457 |
|
---|
2458 | /*
|
---|
2459 | * First, the aligned portion.
|
---|
2460 | */
|
---|
2461 | while (dlimit - daddr >= sizeof (uint64_t)) {
|
---|
2462 | *((uint64_t *)daddr) = *((uint64_t *)saddr);
|
---|
2463 |
|
---|
2464 | daddr += sizeof (uint64_t);
|
---|
2465 | saddr += sizeof (uint64_t);
|
---|
2466 | }
|
---|
2467 |
|
---|
2468 | /*
|
---|
2469 | * Now any left-over bit...
|
---|
2470 | */
|
---|
2471 | while (dlimit - daddr)
|
---|
2472 | *((uint8_t *)daddr++) = *((uint8_t *)saddr++);
|
---|
2473 |
|
---|
2474 | /*
|
---|
2475 | * Finally, commit the reserved space in the destination buffer.
|
---|
2476 | */
|
---|
2477 | dest->dtb_offset = offs + src->dtb_offset;
|
---|
2478 |
|
---|
2479 | out:
|
---|
2480 | /*
|
---|
2481 | * If we're lucky enough to be the only active CPU on this speculation
|
---|
2482 | * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
|
---|
2483 | */
|
---|
2484 | if (current == DTRACESPEC_ACTIVE ||
|
---|
2485 | (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
|
---|
2486 | uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
|
---|
2487 | DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
|
---|
2488 |
|
---|
2489 | ASSERT(rval == DTRACESPEC_COMMITTING); NOREF(rval);
|
---|
2490 | }
|
---|
2491 |
|
---|
2492 | src->dtb_offset = 0;
|
---|
2493 | src->dtb_xamot_drops += src->dtb_drops;
|
---|
2494 | src->dtb_drops = 0;
|
---|
2495 | }
|
---|
2496 |
|
---|
2497 | /*
|
---|
2498 | * This routine discards an active speculation. If the specified speculation
|
---|
2499 | * is not in a valid state to perform a discard(), this routine will silently
|
---|
2500 | * do nothing. The state of the specified speculation is transitioned
|
---|
2501 | * according to the state transition diagram outlined in <sys/dtrace_impl.h>
|
---|
2502 | */
|
---|
2503 | static void
|
---|
2504 | dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
|
---|
2505 | dtrace_specid_t which)
|
---|
2506 | {
|
---|
2507 | dtrace_speculation_t *spec;
|
---|
2508 | dtrace_speculation_state_t current, new;
|
---|
2509 | dtrace_buffer_t *buf;
|
---|
2510 |
|
---|
2511 | if (which == 0)
|
---|
2512 | return;
|
---|
2513 |
|
---|
2514 | if (which > VBDTCAST(unsigned)state->dts_nspeculations) {
|
---|
2515 | cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2516 | return;
|
---|
2517 | }
|
---|
2518 |
|
---|
2519 | spec = &state->dts_speculations[which - 1];
|
---|
2520 | buf = &spec->dtsp_buffer[cpu];
|
---|
2521 |
|
---|
2522 | do {
|
---|
2523 | current = spec->dtsp_state;
|
---|
2524 |
|
---|
2525 | switch (current) {
|
---|
2526 | case DTRACESPEC_INACTIVE:
|
---|
2527 | case DTRACESPEC_COMMITTINGMANY:
|
---|
2528 | case DTRACESPEC_COMMITTING:
|
---|
2529 | case DTRACESPEC_DISCARDING:
|
---|
2530 | return;
|
---|
2531 |
|
---|
2532 | case DTRACESPEC_ACTIVE:
|
---|
2533 | case DTRACESPEC_ACTIVEMANY:
|
---|
2534 | new = DTRACESPEC_DISCARDING;
|
---|
2535 | break;
|
---|
2536 |
|
---|
2537 | case DTRACESPEC_ACTIVEONE:
|
---|
2538 | if (buf->dtb_offset != 0) {
|
---|
2539 | new = DTRACESPEC_INACTIVE;
|
---|
2540 | } else {
|
---|
2541 | new = DTRACESPEC_DISCARDING;
|
---|
2542 | }
|
---|
2543 | break;
|
---|
2544 |
|
---|
2545 | default:
|
---|
2546 | #ifndef VBOX
|
---|
2547 | ASSERT(0);
|
---|
2548 | #else
|
---|
2549 | AssertFatalMsgFailed(("%d\n", current));
|
---|
2550 | #endif
|
---|
2551 | }
|
---|
2552 | } while ((dtrace_speculation_state_t)dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current);
|
---|
2553 |
|
---|
2554 | buf->dtb_offset = 0;
|
---|
2555 | buf->dtb_drops = 0;
|
---|
2556 | }
|
---|
2557 |
|
---|
2558 | /*
|
---|
2559 | * Note: not called from probe context. This function is called
|
---|
2560 | * asynchronously from cross call context to clean any speculations that are
|
---|
2561 | * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
|
---|
2562 | * transitioned back to the INACTIVE state until all CPUs have cleaned the
|
---|
2563 | * speculation.
|
---|
2564 | */
|
---|
2565 | static void
|
---|
2566 | dtrace_speculation_clean_here(dtrace_state_t *state)
|
---|
2567 | {
|
---|
2568 | dtrace_icookie_t cookie;
|
---|
2569 | processorid_t cpu = VBDT_GET_CPUID();
|
---|
2570 | dtrace_buffer_t *dest = &state->dts_buffer[cpu];
|
---|
2571 | dtrace_specid_t i;
|
---|
2572 |
|
---|
2573 | cookie = dtrace_interrupt_disable();
|
---|
2574 |
|
---|
2575 | if (dest->dtb_tomax == NULL) {
|
---|
2576 | dtrace_interrupt_enable(cookie);
|
---|
2577 | return;
|
---|
2578 | }
|
---|
2579 |
|
---|
2580 | for (i = 0; i < VBDTCAST(unsigned)state->dts_nspeculations; i++) {
|
---|
2581 | dtrace_speculation_t *spec = &state->dts_speculations[i];
|
---|
2582 | dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
|
---|
2583 |
|
---|
2584 | if (src->dtb_tomax == NULL)
|
---|
2585 | continue;
|
---|
2586 |
|
---|
2587 | if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
|
---|
2588 | src->dtb_offset = 0;
|
---|
2589 | continue;
|
---|
2590 | }
|
---|
2591 |
|
---|
2592 | if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
|
---|
2593 | continue;
|
---|
2594 |
|
---|
2595 | if (src->dtb_offset == 0)
|
---|
2596 | continue;
|
---|
2597 |
|
---|
2598 | dtrace_speculation_commit(state, cpu, i + 1);
|
---|
2599 | }
|
---|
2600 |
|
---|
2601 | dtrace_interrupt_enable(cookie);
|
---|
2602 | }
|
---|
2603 |
|
---|
2604 | #ifdef VBOX
|
---|
2605 | /** */
|
---|
2606 | static DECLCALLBACK(void) dtrace_speculation_clean_here_wrapper(RTCPUID idCpu, void *pvUser1, void *pvUser2)
|
---|
2607 | {
|
---|
2608 | dtrace_speculation_clean_here((dtrace_state_t *)pvUser1);
|
---|
2609 | NOREF(pvUser2); NOREF(idCpu);
|
---|
2610 | }
|
---|
2611 | #endif
|
---|
2612 |
|
---|
2613 | /*
|
---|
2614 | * Note: not called from probe context. This function is called
|
---|
2615 | * asynchronously (and at a regular interval) to clean any speculations that
|
---|
2616 | * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
|
---|
2617 | * is work to be done, it cross calls all CPUs to perform that work;
|
---|
2618 | * COMMITMANY and DISCARDING speculations may not be transitioned back to the
|
---|
2619 | * INACTIVE state until they have been cleaned by all CPUs.
|
---|
2620 | */
|
---|
2621 | static void
|
---|
2622 | dtrace_speculation_clean(dtrace_state_t *state)
|
---|
2623 | {
|
---|
2624 | int work = 0, rv;
|
---|
2625 | dtrace_specid_t i;
|
---|
2626 |
|
---|
2627 | for (i = 0; i < VBDTCAST(unsigned)state->dts_nspeculations; i++) {
|
---|
2628 | dtrace_speculation_t *spec = &state->dts_speculations[i];
|
---|
2629 |
|
---|
2630 | ASSERT(!spec->dtsp_cleaning);
|
---|
2631 |
|
---|
2632 | if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
|
---|
2633 | spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
|
---|
2634 | continue;
|
---|
2635 |
|
---|
2636 | work++;
|
---|
2637 | spec->dtsp_cleaning = 1;
|
---|
2638 | }
|
---|
2639 |
|
---|
2640 | if (!work)
|
---|
2641 | return;
|
---|
2642 |
|
---|
2643 | #ifndef VBOX
|
---|
2644 | dtrace_xcall(DTRACE_CPUALL,
|
---|
2645 | (dtrace_xcall_t)dtrace_speculation_clean_here, state);
|
---|
2646 | #else
|
---|
2647 | RTMpOnAll(dtrace_speculation_clean_here_wrapper, state, NULL);
|
---|
2648 | #endif
|
---|
2649 |
|
---|
2650 | /*
|
---|
2651 | * We now know that all CPUs have committed or discarded their
|
---|
2652 | * speculation buffers, as appropriate. We can now set the state
|
---|
2653 | * to inactive.
|
---|
2654 | */
|
---|
2655 | for (i = 0; i < VBDTCAST(unsigned)state->dts_nspeculations; i++) {
|
---|
2656 | dtrace_speculation_t *spec = &state->dts_speculations[i];
|
---|
2657 | dtrace_speculation_state_t current, new;
|
---|
2658 |
|
---|
2659 | if (!spec->dtsp_cleaning)
|
---|
2660 | continue;
|
---|
2661 |
|
---|
2662 | current = spec->dtsp_state;
|
---|
2663 | ASSERT(current == DTRACESPEC_DISCARDING ||
|
---|
2664 | current == DTRACESPEC_COMMITTINGMANY);
|
---|
2665 |
|
---|
2666 | new = DTRACESPEC_INACTIVE;
|
---|
2667 |
|
---|
2668 | rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
|
---|
2669 | ASSERT(VBDTCAST(dtrace_speculation_state_t)rv == current);
|
---|
2670 | spec->dtsp_cleaning = 0;
|
---|
2671 | }
|
---|
2672 | }
|
---|
2673 |
|
---|
2674 | /*
|
---|
2675 | * Called as part of a speculate() to get the speculative buffer associated
|
---|
2676 | * with a given speculation. Returns NULL if the specified speculation is not
|
---|
2677 | * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
|
---|
2678 | * the active CPU is not the specified CPU -- the speculation will be
|
---|
2679 | * atomically transitioned into the ACTIVEMANY state.
|
---|
2680 | */
|
---|
2681 | static dtrace_buffer_t *
|
---|
2682 | dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
|
---|
2683 | dtrace_specid_t which)
|
---|
2684 | {
|
---|
2685 | dtrace_speculation_t *spec;
|
---|
2686 | dtrace_speculation_state_t current, new VBDTUNASS(-1);
|
---|
2687 | dtrace_buffer_t *buf;
|
---|
2688 |
|
---|
2689 | if (which == 0)
|
---|
2690 | return (NULL);
|
---|
2691 |
|
---|
2692 | if (which > VBDTCAST(unsigned)state->dts_nspeculations) {
|
---|
2693 | cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2694 | return (NULL);
|
---|
2695 | }
|
---|
2696 |
|
---|
2697 | spec = &state->dts_speculations[which - 1];
|
---|
2698 | buf = &spec->dtsp_buffer[cpuid];
|
---|
2699 |
|
---|
2700 | do {
|
---|
2701 | current = spec->dtsp_state;
|
---|
2702 |
|
---|
2703 | switch (current) {
|
---|
2704 | case DTRACESPEC_INACTIVE:
|
---|
2705 | case DTRACESPEC_COMMITTINGMANY:
|
---|
2706 | case DTRACESPEC_DISCARDING:
|
---|
2707 | return (NULL);
|
---|
2708 |
|
---|
2709 | case DTRACESPEC_COMMITTING:
|
---|
2710 | ASSERT(buf->dtb_offset == 0);
|
---|
2711 | return (NULL);
|
---|
2712 |
|
---|
2713 | case DTRACESPEC_ACTIVEONE:
|
---|
2714 | /*
|
---|
2715 | * This speculation is currently active on one CPU.
|
---|
2716 | * Check the offset in the buffer; if it's non-zero,
|
---|
2717 | * that CPU must be us (and we leave the state alone).
|
---|
2718 | * If it's zero, assume that we're starting on a new
|
---|
2719 | * CPU -- and change the state to indicate that the
|
---|
2720 | * speculation is active on more than one CPU.
|
---|
2721 | */
|
---|
2722 | if (buf->dtb_offset != 0)
|
---|
2723 | return (buf);
|
---|
2724 |
|
---|
2725 | new = DTRACESPEC_ACTIVEMANY;
|
---|
2726 | break;
|
---|
2727 |
|
---|
2728 | case DTRACESPEC_ACTIVEMANY:
|
---|
2729 | return (buf);
|
---|
2730 |
|
---|
2731 | case DTRACESPEC_ACTIVE:
|
---|
2732 | new = DTRACESPEC_ACTIVEONE;
|
---|
2733 | break;
|
---|
2734 |
|
---|
2735 | default:
|
---|
2736 | #ifndef VBOX
|
---|
2737 | ASSERT(0);
|
---|
2738 | #else
|
---|
2739 | AssertFatalMsgFailed(("%d\n", current));
|
---|
2740 | #endif
|
---|
2741 | }
|
---|
2742 | } while ((dtrace_speculation_state_t)dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current);
|
---|
2743 |
|
---|
2744 | ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
|
---|
2745 | return (buf);
|
---|
2746 | }
|
---|
2747 |
|
---|
2748 | /*
|
---|
2749 | * Return a string. In the event that the user lacks the privilege to access
|
---|
2750 | * arbitrary kernel memory, we copy the string out to scratch memory so that we
|
---|
2751 | * don't fail access checking.
|
---|
2752 | *
|
---|
2753 | * dtrace_dif_variable() uses this routine as a helper for various
|
---|
2754 | * builtin values such as 'execname' and 'probefunc.'
|
---|
2755 | */
|
---|
2756 | VBDTSTATIC uintptr_t
|
---|
2757 | dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
|
---|
2758 | dtrace_mstate_t *mstate)
|
---|
2759 | {
|
---|
2760 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
2761 | uintptr_t ret;
|
---|
2762 | size_t strsz;
|
---|
2763 |
|
---|
2764 | /*
|
---|
2765 | * The easy case: this probe is allowed to read all of memory, so
|
---|
2766 | * we can just return this as a vanilla pointer.
|
---|
2767 | */
|
---|
2768 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
|
---|
2769 | return (addr);
|
---|
2770 |
|
---|
2771 | /*
|
---|
2772 | * This is the tougher case: we copy the string in question from
|
---|
2773 | * kernel memory into scratch memory and return it that way: this
|
---|
2774 | * ensures that we won't trip up when access checking tests the
|
---|
2775 | * BYREF return value.
|
---|
2776 | */
|
---|
2777 | strsz = dtrace_strlen((char *)addr, size) + 1;
|
---|
2778 |
|
---|
2779 | if (mstate->dtms_scratch_ptr + strsz >
|
---|
2780 | mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
|
---|
2781 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
2782 | return (NULL);
|
---|
2783 | }
|
---|
2784 |
|
---|
2785 | dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
|
---|
2786 | strsz);
|
---|
2787 | ret = mstate->dtms_scratch_ptr;
|
---|
2788 | mstate->dtms_scratch_ptr += strsz;
|
---|
2789 | return (ret);
|
---|
2790 | }
|
---|
2791 |
|
---|
2792 | /*
|
---|
2793 | * This function implements the DIF emulator's variable lookups. The emulator
|
---|
2794 | * passes a reserved variable identifier and optional built-in array index.
|
---|
2795 | */
|
---|
2796 | static uint64_t
|
---|
2797 | dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
|
---|
2798 | uint64_t ndx)
|
---|
2799 | {
|
---|
2800 | /*
|
---|
2801 | * If we're accessing one of the uncached arguments, we'll turn this
|
---|
2802 | * into a reference in the args array.
|
---|
2803 | */
|
---|
2804 | if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
|
---|
2805 | ndx = v - DIF_VAR_ARG0;
|
---|
2806 | v = DIF_VAR_ARGS;
|
---|
2807 | }
|
---|
2808 |
|
---|
2809 | switch (v) {
|
---|
2810 | case DIF_VAR_ARGS:
|
---|
2811 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
|
---|
2812 | if (ndx >= sizeof (mstate->dtms_arg) /
|
---|
2813 | sizeof (mstate->dtms_arg[0])) {
|
---|
2814 | int aframes = mstate->dtms_probe->dtpr_aframes + 2;
|
---|
2815 | dtrace_provider_t *pv;
|
---|
2816 | uint64_t val;
|
---|
2817 |
|
---|
2818 | pv = mstate->dtms_probe->dtpr_provider;
|
---|
2819 | if (pv->dtpv_pops.dtps_getargval != NULL)
|
---|
2820 | val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
|
---|
2821 | mstate->dtms_probe->dtpr_id,
|
---|
2822 | mstate->dtms_probe->dtpr_arg, ndx, aframes);
|
---|
2823 | else
|
---|
2824 | val = dtrace_getarg(ndx, aframes);
|
---|
2825 |
|
---|
2826 | /*
|
---|
2827 | * This is regrettably required to keep the compiler
|
---|
2828 | * from tail-optimizing the call to dtrace_getarg().
|
---|
2829 | * The condition always evaluates to true, but the
|
---|
2830 | * compiler has no way of figuring that out a priori.
|
---|
2831 | * (None of this would be necessary if the compiler
|
---|
2832 | * could be relied upon to _always_ tail-optimize
|
---|
2833 | * the call to dtrace_getarg() -- but it can't.)
|
---|
2834 | */
|
---|
2835 | if (mstate->dtms_probe != NULL)
|
---|
2836 | return (val);
|
---|
2837 |
|
---|
2838 | #ifndef VBOX
|
---|
2839 | ASSERT(0);
|
---|
2840 | #else
|
---|
2841 | AssertFatalFailed();
|
---|
2842 | #endif
|
---|
2843 | }
|
---|
2844 |
|
---|
2845 | return (mstate->dtms_arg[ndx]);
|
---|
2846 |
|
---|
2847 | case DIF_VAR_UREGS: {
|
---|
2848 | #ifndef VBOX
|
---|
2849 | klwp_t *lwp;
|
---|
2850 |
|
---|
2851 | if (!dtrace_priv_proc(state))
|
---|
2852 | return (0);
|
---|
2853 |
|
---|
2854 | if ((lwp = curthread->t_lwp) == NULL) {
|
---|
2855 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
|
---|
2856 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval = NULL;
|
---|
2857 | return (0);
|
---|
2858 | }
|
---|
2859 |
|
---|
2860 | return (dtrace_getreg(lwp->lwp_regs, ndx));
|
---|
2861 | #else
|
---|
2862 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2863 | return (0);
|
---|
2864 | #endif
|
---|
2865 | }
|
---|
2866 |
|
---|
2867 | case DIF_VAR_CURTHREAD:
|
---|
2868 | if (!dtrace_priv_kernel(state))
|
---|
2869 | return (0);
|
---|
2870 | #ifndef VBOX
|
---|
2871 | return ((uint64_t)(uintptr_t)curthread);
|
---|
2872 | #else
|
---|
2873 | return ((uintptr_t)RTThreadNativeSelf());
|
---|
2874 | #endif
|
---|
2875 |
|
---|
2876 | case DIF_VAR_TIMESTAMP:
|
---|
2877 | if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
|
---|
2878 | mstate->dtms_timestamp = dtrace_gethrtime();
|
---|
2879 | mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
|
---|
2880 | }
|
---|
2881 | return (mstate->dtms_timestamp);
|
---|
2882 |
|
---|
2883 | case DIF_VAR_VTIMESTAMP:
|
---|
2884 | #ifndef VBOX
|
---|
2885 | ASSERT(dtrace_vtime_references != 0);
|
---|
2886 | return (curthread->t_dtrace_vtime);
|
---|
2887 | #else
|
---|
2888 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2889 | return (0);
|
---|
2890 | #endif
|
---|
2891 |
|
---|
2892 | case DIF_VAR_WALLTIMESTAMP:
|
---|
2893 | if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
|
---|
2894 | mstate->dtms_walltimestamp = dtrace_gethrestime();
|
---|
2895 | mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
|
---|
2896 | }
|
---|
2897 | return (mstate->dtms_walltimestamp);
|
---|
2898 |
|
---|
2899 | case DIF_VAR_IPL:
|
---|
2900 | if (!dtrace_priv_kernel(state))
|
---|
2901 | return (0);
|
---|
2902 | if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
|
---|
2903 | mstate->dtms_ipl = dtrace_getipl();
|
---|
2904 | mstate->dtms_present |= DTRACE_MSTATE_IPL;
|
---|
2905 | }
|
---|
2906 | return (mstate->dtms_ipl);
|
---|
2907 |
|
---|
2908 | case DIF_VAR_EPID:
|
---|
2909 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
|
---|
2910 | return (mstate->dtms_epid);
|
---|
2911 |
|
---|
2912 | case DIF_VAR_ID:
|
---|
2913 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
2914 | return (mstate->dtms_probe->dtpr_id);
|
---|
2915 |
|
---|
2916 | case DIF_VAR_STACKDEPTH:
|
---|
2917 | if (!dtrace_priv_kernel(state))
|
---|
2918 | return (0);
|
---|
2919 | if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
|
---|
2920 | int aframes = mstate->dtms_probe->dtpr_aframes + 2;
|
---|
2921 |
|
---|
2922 | mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
|
---|
2923 | mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
|
---|
2924 | }
|
---|
2925 | return (mstate->dtms_stackdepth);
|
---|
2926 |
|
---|
2927 | case DIF_VAR_USTACKDEPTH:
|
---|
2928 | if (!dtrace_priv_proc(state))
|
---|
2929 | return (0);
|
---|
2930 | if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
|
---|
2931 | /*
|
---|
2932 | * See comment in DIF_VAR_PID.
|
---|
2933 | */
|
---|
2934 | if (DTRACE_ANCHORED(mstate->dtms_probe) &&
|
---|
2935 | CPU_ON_INTR(CPU)) {
|
---|
2936 | mstate->dtms_ustackdepth = 0;
|
---|
2937 | } else {
|
---|
2938 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
2939 | mstate->dtms_ustackdepth =
|
---|
2940 | dtrace_getustackdepth();
|
---|
2941 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
2942 | }
|
---|
2943 | mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
|
---|
2944 | }
|
---|
2945 | return (mstate->dtms_ustackdepth);
|
---|
2946 |
|
---|
2947 | case DIF_VAR_CALLER:
|
---|
2948 | if (!dtrace_priv_kernel(state))
|
---|
2949 | return (0);
|
---|
2950 | if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
|
---|
2951 | int aframes = mstate->dtms_probe->dtpr_aframes + 2;
|
---|
2952 |
|
---|
2953 | if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
|
---|
2954 | /*
|
---|
2955 | * If this is an unanchored probe, we are
|
---|
2956 | * required to go through the slow path:
|
---|
2957 | * dtrace_caller() only guarantees correct
|
---|
2958 | * results for anchored probes.
|
---|
2959 | */
|
---|
2960 | pc_t caller[2];
|
---|
2961 |
|
---|
2962 | dtrace_getpcstack(caller, 2, aframes,
|
---|
2963 | (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
|
---|
2964 | mstate->dtms_caller = caller[1];
|
---|
2965 | } else if ((mstate->dtms_caller =
|
---|
2966 | dtrace_caller(aframes)) == VBDTCAST(uintptr_t)-1) {
|
---|
2967 | /*
|
---|
2968 | * We have failed to do this the quick way;
|
---|
2969 | * we must resort to the slower approach of
|
---|
2970 | * calling dtrace_getpcstack().
|
---|
2971 | */
|
---|
2972 | pc_t caller;
|
---|
2973 |
|
---|
2974 | dtrace_getpcstack(&caller, 1, aframes, NULL);
|
---|
2975 | mstate->dtms_caller = caller;
|
---|
2976 | }
|
---|
2977 |
|
---|
2978 | mstate->dtms_present |= DTRACE_MSTATE_CALLER;
|
---|
2979 | }
|
---|
2980 | return (mstate->dtms_caller);
|
---|
2981 |
|
---|
2982 | case DIF_VAR_UCALLER:
|
---|
2983 | if (!dtrace_priv_proc(state))
|
---|
2984 | return (0);
|
---|
2985 |
|
---|
2986 | if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
|
---|
2987 | uint64_t ustack[3];
|
---|
2988 |
|
---|
2989 | /*
|
---|
2990 | * dtrace_getupcstack() fills in the first uint64_t
|
---|
2991 | * with the current PID. The second uint64_t will
|
---|
2992 | * be the program counter at user-level. The third
|
---|
2993 | * uint64_t will contain the caller, which is what
|
---|
2994 | * we're after.
|
---|
2995 | */
|
---|
2996 | ustack[2] = NULL;
|
---|
2997 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
2998 | dtrace_getupcstack(ustack, 3);
|
---|
2999 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3000 | mstate->dtms_ucaller = ustack[2];
|
---|
3001 | mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
|
---|
3002 | }
|
---|
3003 |
|
---|
3004 | return (mstate->dtms_ucaller);
|
---|
3005 |
|
---|
3006 | case DIF_VAR_PROBEPROV:
|
---|
3007 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
3008 | return (dtrace_dif_varstr(
|
---|
3009 | (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
|
---|
3010 | state, mstate));
|
---|
3011 |
|
---|
3012 | case DIF_VAR_PROBEMOD:
|
---|
3013 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
3014 | return (dtrace_dif_varstr(
|
---|
3015 | (uintptr_t)mstate->dtms_probe->dtpr_mod,
|
---|
3016 | state, mstate));
|
---|
3017 |
|
---|
3018 | case DIF_VAR_PROBEFUNC:
|
---|
3019 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
3020 | return (dtrace_dif_varstr(
|
---|
3021 | (uintptr_t)mstate->dtms_probe->dtpr_func,
|
---|
3022 | state, mstate));
|
---|
3023 |
|
---|
3024 | case DIF_VAR_PROBENAME:
|
---|
3025 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
3026 | return (dtrace_dif_varstr(
|
---|
3027 | (uintptr_t)mstate->dtms_probe->dtpr_name,
|
---|
3028 | state, mstate));
|
---|
3029 |
|
---|
3030 | case DIF_VAR_PID:
|
---|
3031 | if (!dtrace_priv_proc(state))
|
---|
3032 | return (0);
|
---|
3033 |
|
---|
3034 | #ifndef VBOX
|
---|
3035 | /*
|
---|
3036 | * Note that we are assuming that an unanchored probe is
|
---|
3037 | * always due to a high-level interrupt. (And we're assuming
|
---|
3038 | * that there is only a single high level interrupt.)
|
---|
3039 | */
|
---|
3040 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3041 | return (pid0.pid_id);
|
---|
3042 |
|
---|
3043 | /*
|
---|
3044 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3045 | * it always points to a valid, allocated proc structure.
|
---|
3046 | * Further, it is always safe to dereference the p_pidp member
|
---|
3047 | * of one's own proc structure. (These are truisms becuase
|
---|
3048 | * threads and processes don't clean up their own state --
|
---|
3049 | * they leave that task to whomever reaps them.)
|
---|
3050 | */
|
---|
3051 | return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
|
---|
3052 | #else
|
---|
3053 | return (RTProcSelf());
|
---|
3054 | #endif
|
---|
3055 |
|
---|
3056 | case DIF_VAR_PPID:
|
---|
3057 | if (!dtrace_priv_proc(state))
|
---|
3058 | return (0);
|
---|
3059 |
|
---|
3060 | #ifndef VBOX
|
---|
3061 | /*
|
---|
3062 | * See comment in DIF_VAR_PID.
|
---|
3063 | */
|
---|
3064 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3065 | return (pid0.pid_id);
|
---|
3066 |
|
---|
3067 | /*
|
---|
3068 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3069 | * it always points to a valid, allocated proc structure.
|
---|
3070 | * (This is true because threads don't clean up their own
|
---|
3071 | * state -- they leave that task to whomever reaps them.)
|
---|
3072 | */
|
---|
3073 | return ((uint64_t)curthread->t_procp->p_ppid);
|
---|
3074 | #else
|
---|
3075 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3076 | return (0); /** @todo parent pid? */
|
---|
3077 | #endif
|
---|
3078 |
|
---|
3079 | case DIF_VAR_TID:
|
---|
3080 | #ifndef VBOX
|
---|
3081 | /*
|
---|
3082 | * See comment in DIF_VAR_PID.
|
---|
3083 | */
|
---|
3084 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3085 | return (0);
|
---|
3086 |
|
---|
3087 | return ((uint64_t)curthread->t_tid);
|
---|
3088 | #else
|
---|
3089 | return (RTThreadNativeSelf()); /** @todo proper tid? */
|
---|
3090 | #endif
|
---|
3091 |
|
---|
3092 | case DIF_VAR_EXECNAME:
|
---|
3093 | if (!dtrace_priv_proc(state))
|
---|
3094 | return (0);
|
---|
3095 |
|
---|
3096 | #ifndef VBOX
|
---|
3097 | /*
|
---|
3098 | * See comment in DIF_VAR_PID.
|
---|
3099 | */
|
---|
3100 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3101 | return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
|
---|
3102 |
|
---|
3103 | /*
|
---|
3104 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3105 | * it always points to a valid, allocated proc structure.
|
---|
3106 | * (This is true because threads don't clean up their own
|
---|
3107 | * state -- they leave that task to whomever reaps them.)
|
---|
3108 | */
|
---|
3109 | return (dtrace_dif_varstr(
|
---|
3110 | (uintptr_t)curthread->t_procp->p_user.u_comm,
|
---|
3111 | state, mstate));
|
---|
3112 | #else
|
---|
3113 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3114 | return (0); /** @todo execname */
|
---|
3115 | #endif
|
---|
3116 |
|
---|
3117 | case DIF_VAR_ZONENAME:
|
---|
3118 | if (!dtrace_priv_proc(state))
|
---|
3119 | return (0);
|
---|
3120 |
|
---|
3121 | #ifndef VBOX
|
---|
3122 | /*
|
---|
3123 | * See comment in DIF_VAR_PID.
|
---|
3124 | */
|
---|
3125 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3126 | return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
|
---|
3127 |
|
---|
3128 | /*
|
---|
3129 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3130 | * it always points to a valid, allocated proc structure.
|
---|
3131 | * (This is true because threads don't clean up their own
|
---|
3132 | * state -- they leave that task to whomever reaps them.)
|
---|
3133 | */
|
---|
3134 | return (dtrace_dif_varstr(
|
---|
3135 | (uintptr_t)curthread->t_procp->p_zone->zone_name,
|
---|
3136 | state, mstate));
|
---|
3137 | #else
|
---|
3138 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3139 | return (0);
|
---|
3140 | #endif
|
---|
3141 |
|
---|
3142 | case DIF_VAR_UID:
|
---|
3143 | if (!dtrace_priv_proc(state))
|
---|
3144 | return (0);
|
---|
3145 |
|
---|
3146 | #ifndef VBOX
|
---|
3147 | /*
|
---|
3148 | * See comment in DIF_VAR_PID.
|
---|
3149 | */
|
---|
3150 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3151 | return ((uint64_t)p0.p_cred->cr_uid);
|
---|
3152 |
|
---|
3153 | /*
|
---|
3154 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3155 | * it always points to a valid, allocated proc structure.
|
---|
3156 | * (This is true because threads don't clean up their own
|
---|
3157 | * state -- they leave that task to whomever reaps them.)
|
---|
3158 | *
|
---|
3159 | * Additionally, it is safe to dereference one's own process
|
---|
3160 | * credential, since this is never NULL after process birth.
|
---|
3161 | */
|
---|
3162 | return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
|
---|
3163 | #else
|
---|
3164 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3165 | return (0);
|
---|
3166 | #endif
|
---|
3167 |
|
---|
3168 | case DIF_VAR_GID:
|
---|
3169 | if (!dtrace_priv_proc(state))
|
---|
3170 | return (0);
|
---|
3171 |
|
---|
3172 | #ifndef VBOX
|
---|
3173 | /*
|
---|
3174 | * See comment in DIF_VAR_PID.
|
---|
3175 | */
|
---|
3176 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3177 | return ((uint64_t)p0.p_cred->cr_gid);
|
---|
3178 |
|
---|
3179 | /*
|
---|
3180 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3181 | * it always points to a valid, allocated proc structure.
|
---|
3182 | * (This is true because threads don't clean up their own
|
---|
3183 | * state -- they leave that task to whomever reaps them.)
|
---|
3184 | *
|
---|
3185 | * Additionally, it is safe to dereference one's own process
|
---|
3186 | * credential, since this is never NULL after process birth.
|
---|
3187 | */
|
---|
3188 | return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
|
---|
3189 | #else
|
---|
3190 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3191 | return (0);
|
---|
3192 | #endif
|
---|
3193 |
|
---|
3194 | case DIF_VAR_ERRNO: {
|
---|
3195 | #ifndef VBOX
|
---|
3196 | klwp_t *lwp;
|
---|
3197 | #endif
|
---|
3198 | if (!dtrace_priv_proc(state))
|
---|
3199 | return (0);
|
---|
3200 |
|
---|
3201 | #ifndef VBOX
|
---|
3202 | /*
|
---|
3203 | * See comment in DIF_VAR_PID.
|
---|
3204 | */
|
---|
3205 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3206 | return (0);
|
---|
3207 |
|
---|
3208 | /*
|
---|
3209 | * It is always safe to dereference one's own t_lwp pointer in
|
---|
3210 | * the event that this pointer is non-NULL. (This is true
|
---|
3211 | * because threads and lwps don't clean up their own state --
|
---|
3212 | * they leave that task to whomever reaps them.)
|
---|
3213 | */
|
---|
3214 | if ((lwp = curthread->t_lwp) == NULL)
|
---|
3215 | return (0);
|
---|
3216 |
|
---|
3217 | return ((uint64_t)lwp->lwp_errno);
|
---|
3218 | #else
|
---|
3219 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3220 | return (0);
|
---|
3221 | #endif
|
---|
3222 | }
|
---|
3223 | default:
|
---|
3224 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3225 | return (0);
|
---|
3226 | }
|
---|
3227 | }
|
---|
3228 |
|
---|
3229 | /*
|
---|
3230 | * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
|
---|
3231 | * Notice that we don't bother validating the proper number of arguments or
|
---|
3232 | * their types in the tuple stack. This isn't needed because all argument
|
---|
3233 | * interpretation is safe because of our load safety -- the worst that can
|
---|
3234 | * happen is that a bogus program can obtain bogus results.
|
---|
3235 | */
|
---|
3236 | static void
|
---|
3237 | dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
|
---|
3238 | dtrace_key_t *tupregs, int nargs,
|
---|
3239 | dtrace_mstate_t *mstate, dtrace_state_t *state)
|
---|
3240 | {
|
---|
3241 | volatile uint16_t *flags = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
3242 | volatile uintptr_t *illval = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval;
|
---|
3243 | dtrace_vstate_t *vstate = &state->dts_vstate;
|
---|
3244 |
|
---|
3245 | #ifndef VBOX
|
---|
3246 | union {
|
---|
3247 | mutex_impl_t mi;
|
---|
3248 | uint64_t mx;
|
---|
3249 | } m;
|
---|
3250 |
|
---|
3251 | union {
|
---|
3252 | krwlock_t ri;
|
---|
3253 | uintptr_t rw;
|
---|
3254 | } r;
|
---|
3255 | #endif
|
---|
3256 |
|
---|
3257 | switch (subr) {
|
---|
3258 | case DIF_SUBR_RAND:
|
---|
3259 | regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
|
---|
3260 | break;
|
---|
3261 |
|
---|
3262 | case DIF_SUBR_MUTEX_OWNED:
|
---|
3263 | #ifndef VBOX
|
---|
3264 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
|
---|
3265 | mstate, vstate)) {
|
---|
3266 | regs[rd] = NULL;
|
---|
3267 | break;
|
---|
3268 | }
|
---|
3269 |
|
---|
3270 | m.mx = dtrace_load64(tupregs[0].dttk_value);
|
---|
3271 | if (MUTEX_TYPE_ADAPTIVE(&m.mi))
|
---|
3272 | regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
|
---|
3273 | else
|
---|
3274 | regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
|
---|
3275 | #else
|
---|
3276 | regs[rd] = 0;
|
---|
3277 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3278 | #endif
|
---|
3279 | break;
|
---|
3280 |
|
---|
3281 | case DIF_SUBR_MUTEX_OWNER:
|
---|
3282 | #ifndef VBOX
|
---|
3283 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
|
---|
3284 | mstate, vstate)) {
|
---|
3285 | regs[rd] = NULL;
|
---|
3286 | break;
|
---|
3287 | }
|
---|
3288 |
|
---|
3289 | m.mx = dtrace_load64(tupregs[0].dttk_value);
|
---|
3290 | if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
|
---|
3291 | MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
|
---|
3292 | regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
|
---|
3293 | else
|
---|
3294 | regs[rd] = 0;
|
---|
3295 | #else
|
---|
3296 | regs[rd] = 0;
|
---|
3297 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3298 | #endif
|
---|
3299 | break;
|
---|
3300 |
|
---|
3301 | case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
|
---|
3302 | #ifndef VBOX
|
---|
3303 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
|
---|
3304 | mstate, vstate)) {
|
---|
3305 | regs[rd] = NULL;
|
---|
3306 | break;
|
---|
3307 | }
|
---|
3308 |
|
---|
3309 | m.mx = dtrace_load64(tupregs[0].dttk_value);
|
---|
3310 | regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
|
---|
3311 | #else
|
---|
3312 | regs[rd] = 0;
|
---|
3313 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3314 | #endif
|
---|
3315 | break;
|
---|
3316 |
|
---|
3317 | case DIF_SUBR_MUTEX_TYPE_SPIN:
|
---|
3318 | #ifndef VBOX
|
---|
3319 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
|
---|
3320 | mstate, vstate)) {
|
---|
3321 | regs[rd] = NULL;
|
---|
3322 | break;
|
---|
3323 | }
|
---|
3324 |
|
---|
3325 | m.mx = dtrace_load64(tupregs[0].dttk_value);
|
---|
3326 | regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
|
---|
3327 | #else
|
---|
3328 | regs[rd] = 0;
|
---|
3329 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3330 | #endif
|
---|
3331 | break;
|
---|
3332 |
|
---|
3333 | case DIF_SUBR_RW_READ_HELD: {
|
---|
3334 | #ifndef VBOX
|
---|
3335 | uintptr_t tmp;
|
---|
3336 |
|
---|
3337 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
|
---|
3338 | mstate, vstate)) {
|
---|
3339 | regs[rd] = NULL;
|
---|
3340 | break;
|
---|
3341 | }
|
---|
3342 |
|
---|
3343 | r.rw = dtrace_loadptr(tupregs[0].dttk_value);
|
---|
3344 | regs[rd] = _RW_READ_HELD(&r.ri, tmp);
|
---|
3345 | #else
|
---|
3346 | regs[rd] = 0;
|
---|
3347 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3348 | #endif
|
---|
3349 | break;
|
---|
3350 | }
|
---|
3351 |
|
---|
3352 | case DIF_SUBR_RW_WRITE_HELD:
|
---|
3353 | #ifndef VBOX
|
---|
3354 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
|
---|
3355 | mstate, vstate)) {
|
---|
3356 | regs[rd] = NULL;
|
---|
3357 | break;
|
---|
3358 | }
|
---|
3359 |
|
---|
3360 | r.rw = dtrace_loadptr(tupregs[0].dttk_value);
|
---|
3361 | regs[rd] = _RW_WRITE_HELD(&r.ri);
|
---|
3362 | #else
|
---|
3363 | regs[rd] = 0;
|
---|
3364 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3365 | #endif
|
---|
3366 | break;
|
---|
3367 |
|
---|
3368 | case DIF_SUBR_RW_ISWRITER:
|
---|
3369 | #ifndef VBOX
|
---|
3370 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
|
---|
3371 | mstate, vstate)) {
|
---|
3372 | regs[rd] = NULL;
|
---|
3373 | break;
|
---|
3374 | }
|
---|
3375 |
|
---|
3376 | r.rw = dtrace_loadptr(tupregs[0].dttk_value);
|
---|
3377 | regs[rd] = _RW_ISWRITER(&r.ri);
|
---|
3378 | #else
|
---|
3379 | regs[rd] = 0;
|
---|
3380 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3381 | #endif
|
---|
3382 | break;
|
---|
3383 |
|
---|
3384 | case DIF_SUBR_BCOPY: {
|
---|
3385 | /*
|
---|
3386 | * We need to be sure that the destination is in the scratch
|
---|
3387 | * region -- no other region is allowed.
|
---|
3388 | */
|
---|
3389 | uintptr_t src = tupregs[0].dttk_value;
|
---|
3390 | uintptr_t dest = tupregs[1].dttk_value;
|
---|
3391 | size_t size = tupregs[2].dttk_value;
|
---|
3392 |
|
---|
3393 | if (!dtrace_inscratch(dest, size, mstate)) {
|
---|
3394 | *flags |= CPU_DTRACE_BADADDR;
|
---|
3395 | *illval = regs[rd];
|
---|
3396 | break;
|
---|
3397 | }
|
---|
3398 |
|
---|
3399 | if (!dtrace_canload(src, size, mstate, vstate)) {
|
---|
3400 | regs[rd] = NULL;
|
---|
3401 | break;
|
---|
3402 | }
|
---|
3403 |
|
---|
3404 | dtrace_bcopy((void *)src, (void *)dest, size);
|
---|
3405 | break;
|
---|
3406 | }
|
---|
3407 |
|
---|
3408 | case DIF_SUBR_ALLOCA:
|
---|
3409 | case DIF_SUBR_COPYIN: {
|
---|
3410 | uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
|
---|
3411 | uint64_t size =
|
---|
3412 | tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
|
---|
3413 | size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
|
---|
3414 |
|
---|
3415 | /*
|
---|
3416 | * This action doesn't require any credential checks since
|
---|
3417 | * probes will not activate in user contexts to which the
|
---|
3418 | * enabling user does not have permissions.
|
---|
3419 | */
|
---|
3420 |
|
---|
3421 | /*
|
---|
3422 | * Rounding up the user allocation size could have overflowed
|
---|
3423 | * a large, bogus allocation (like -1ULL) to 0.
|
---|
3424 | */
|
---|
3425 | if (scratch_size < size ||
|
---|
3426 | !DTRACE_INSCRATCH(mstate, scratch_size)) {
|
---|
3427 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
3428 | regs[rd] = NULL;
|
---|
3429 | break;
|
---|
3430 | }
|
---|
3431 |
|
---|
3432 | if (subr == DIF_SUBR_COPYIN) {
|
---|
3433 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3434 | dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
|
---|
3435 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3436 | }
|
---|
3437 |
|
---|
3438 | mstate->dtms_scratch_ptr += scratch_size;
|
---|
3439 | regs[rd] = dest;
|
---|
3440 | break;
|
---|
3441 | }
|
---|
3442 |
|
---|
3443 | case DIF_SUBR_COPYINTO: {
|
---|
3444 | uint64_t size = tupregs[1].dttk_value;
|
---|
3445 | uintptr_t dest = tupregs[2].dttk_value;
|
---|
3446 |
|
---|
3447 | /*
|
---|
3448 | * This action doesn't require any credential checks since
|
---|
3449 | * probes will not activate in user contexts to which the
|
---|
3450 | * enabling user does not have permissions.
|
---|
3451 | */
|
---|
3452 | if (!dtrace_inscratch(dest, size, mstate)) {
|
---|
3453 | *flags |= CPU_DTRACE_BADADDR;
|
---|
3454 | *illval = regs[rd];
|
---|
3455 | break;
|
---|
3456 | }
|
---|
3457 |
|
---|
3458 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3459 | dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
|
---|
3460 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3461 | break;
|
---|
3462 | }
|
---|
3463 |
|
---|
3464 | case DIF_SUBR_COPYINSTR: {
|
---|
3465 | uintptr_t dest = mstate->dtms_scratch_ptr;
|
---|
3466 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3467 |
|
---|
3468 | if (nargs > 1 && tupregs[1].dttk_value < size)
|
---|
3469 | size = tupregs[1].dttk_value + 1;
|
---|
3470 |
|
---|
3471 | /*
|
---|
3472 | * This action doesn't require any credential checks since
|
---|
3473 | * probes will not activate in user contexts to which the
|
---|
3474 | * enabling user does not have permissions.
|
---|
3475 | */
|
---|
3476 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
3477 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
3478 | regs[rd] = NULL;
|
---|
3479 | break;
|
---|
3480 | }
|
---|
3481 |
|
---|
3482 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3483 | dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
|
---|
3484 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3485 |
|
---|
3486 | ((char *)dest)[size - 1] = '\0';
|
---|
3487 | mstate->dtms_scratch_ptr += size;
|
---|
3488 | regs[rd] = dest;
|
---|
3489 | break;
|
---|
3490 | }
|
---|
3491 |
|
---|
3492 | case DIF_SUBR_MSGSIZE:
|
---|
3493 | case DIF_SUBR_MSGDSIZE: {
|
---|
3494 | #ifndef VBOX
|
---|
3495 | uintptr_t baddr = tupregs[0].dttk_value, daddr;
|
---|
3496 | uintptr_t wptr, rptr;
|
---|
3497 | size_t count = 0;
|
---|
3498 | int cont = 0;
|
---|
3499 |
|
---|
3500 | while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
|
---|
3501 |
|
---|
3502 | if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
|
---|
3503 | vstate)) {
|
---|
3504 | regs[rd] = NULL;
|
---|
3505 | break;
|
---|
3506 | }
|
---|
3507 |
|
---|
3508 | wptr = dtrace_loadptr(baddr +
|
---|
3509 | offsetof(mblk_t, b_wptr));
|
---|
3510 |
|
---|
3511 | rptr = dtrace_loadptr(baddr +
|
---|
3512 | offsetof(mblk_t, b_rptr));
|
---|
3513 |
|
---|
3514 | if (wptr < rptr) {
|
---|
3515 | *flags |= CPU_DTRACE_BADADDR;
|
---|
3516 | *illval = tupregs[0].dttk_value;
|
---|
3517 | break;
|
---|
3518 | }
|
---|
3519 |
|
---|
3520 | daddr = dtrace_loadptr(baddr +
|
---|
3521 | offsetof(mblk_t, b_datap));
|
---|
3522 |
|
---|
3523 | baddr = dtrace_loadptr(baddr +
|
---|
3524 | offsetof(mblk_t, b_cont));
|
---|
3525 |
|
---|
3526 | /*
|
---|
3527 | * We want to prevent against denial-of-service here,
|
---|
3528 | * so we're only going to search the list for
|
---|
3529 | * dtrace_msgdsize_max mblks.
|
---|
3530 | */
|
---|
3531 | if (cont++ > dtrace_msgdsize_max) {
|
---|
3532 | *flags |= CPU_DTRACE_ILLOP;
|
---|
3533 | break;
|
---|
3534 | }
|
---|
3535 |
|
---|
3536 | if (subr == DIF_SUBR_MSGDSIZE) {
|
---|
3537 | if (dtrace_load8(daddr +
|
---|
3538 | offsetof(dblk_t, db_type)) != M_DATA)
|
---|
3539 | continue;
|
---|
3540 | }
|
---|
3541 |
|
---|
3542 | count += wptr - rptr;
|
---|
3543 | }
|
---|
3544 |
|
---|
3545 | if (!(*flags & CPU_DTRACE_FAULT))
|
---|
3546 | regs[rd] = count;
|
---|
3547 |
|
---|
3548 | #else
|
---|
3549 | regs[rd] = 0;
|
---|
3550 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3551 | #endif
|
---|
3552 | break;
|
---|
3553 | }
|
---|
3554 |
|
---|
3555 | case DIF_SUBR_PROGENYOF: {
|
---|
3556 | #ifndef VBOX
|
---|
3557 | pid_t pid = tupregs[0].dttk_value;
|
---|
3558 | proc_t *p;
|
---|
3559 | int rval = 0;
|
---|
3560 |
|
---|
3561 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3562 |
|
---|
3563 | for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
|
---|
3564 | if (p->p_pidp->pid_id == pid) {
|
---|
3565 | rval = 1;
|
---|
3566 | break;
|
---|
3567 | }
|
---|
3568 | }
|
---|
3569 |
|
---|
3570 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3571 |
|
---|
3572 | regs[rd] = rval;
|
---|
3573 | #else
|
---|
3574 | regs[rd] = 0;
|
---|
3575 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3576 | #endif
|
---|
3577 | break;
|
---|
3578 | }
|
---|
3579 |
|
---|
3580 | case DIF_SUBR_SPECULATION:
|
---|
3581 | regs[rd] = dtrace_speculation(state);
|
---|
3582 | break;
|
---|
3583 |
|
---|
3584 | case DIF_SUBR_COPYOUT: {
|
---|
3585 | uintptr_t kaddr = tupregs[0].dttk_value;
|
---|
3586 | uintptr_t uaddr = tupregs[1].dttk_value;
|
---|
3587 | uint64_t size = tupregs[2].dttk_value;
|
---|
3588 |
|
---|
3589 | if (!dtrace_destructive_disallow &&
|
---|
3590 | dtrace_priv_proc_control(state) &&
|
---|
3591 | !dtrace_istoxic(kaddr, size)) {
|
---|
3592 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3593 | dtrace_copyout(kaddr, uaddr, size, flags);
|
---|
3594 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3595 | }
|
---|
3596 | break;
|
---|
3597 | }
|
---|
3598 |
|
---|
3599 | case DIF_SUBR_COPYOUTSTR: {
|
---|
3600 | uintptr_t kaddr = tupregs[0].dttk_value;
|
---|
3601 | uintptr_t uaddr = tupregs[1].dttk_value;
|
---|
3602 | uint64_t size = tupregs[2].dttk_value;
|
---|
3603 |
|
---|
3604 | if (!dtrace_destructive_disallow &&
|
---|
3605 | dtrace_priv_proc_control(state) &&
|
---|
3606 | !dtrace_istoxic(kaddr, size)) {
|
---|
3607 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3608 | dtrace_copyoutstr(kaddr, uaddr, size, flags);
|
---|
3609 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3610 | }
|
---|
3611 | break;
|
---|
3612 | }
|
---|
3613 |
|
---|
3614 | case DIF_SUBR_STRLEN: {
|
---|
3615 | size_t sz;
|
---|
3616 | uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
|
---|
3617 | sz = dtrace_strlen((char *)addr,
|
---|
3618 | state->dts_options[DTRACEOPT_STRSIZE]);
|
---|
3619 |
|
---|
3620 | if (!dtrace_canload(addr, sz + 1, mstate, vstate)) {
|
---|
3621 | regs[rd] = NULL;
|
---|
3622 | break;
|
---|
3623 | }
|
---|
3624 |
|
---|
3625 | regs[rd] = sz;
|
---|
3626 |
|
---|
3627 | break;
|
---|
3628 | }
|
---|
3629 |
|
---|
3630 | case DIF_SUBR_STRCHR:
|
---|
3631 | case DIF_SUBR_STRRCHR: {
|
---|
3632 | /*
|
---|
3633 | * We're going to iterate over the string looking for the
|
---|
3634 | * specified character. We will iterate until we have reached
|
---|
3635 | * the string length or we have found the character. If this
|
---|
3636 | * is DIF_SUBR_STRRCHR, we will look for the last occurrence
|
---|
3637 | * of the specified character instead of the first.
|
---|
3638 | */
|
---|
3639 | uintptr_t saddr = tupregs[0].dttk_value;
|
---|
3640 | uintptr_t addr = tupregs[0].dttk_value;
|
---|
3641 | uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3642 | char c, target = (char)tupregs[1].dttk_value;
|
---|
3643 |
|
---|
3644 | for (regs[rd] = NULL; addr < limit; addr++) {
|
---|
3645 | if ((c = dtrace_load8(addr)) == target) {
|
---|
3646 | regs[rd] = addr;
|
---|
3647 |
|
---|
3648 | if (subr == DIF_SUBR_STRCHR)
|
---|
3649 | break;
|
---|
3650 | }
|
---|
3651 |
|
---|
3652 | if (c == '\0')
|
---|
3653 | break;
|
---|
3654 | }
|
---|
3655 |
|
---|
3656 | if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) {
|
---|
3657 | regs[rd] = NULL;
|
---|
3658 | break;
|
---|
3659 | }
|
---|
3660 |
|
---|
3661 | break;
|
---|
3662 | }
|
---|
3663 |
|
---|
3664 | case DIF_SUBR_STRSTR:
|
---|
3665 | case DIF_SUBR_INDEX:
|
---|
3666 | case DIF_SUBR_RINDEX: {
|
---|
3667 | /*
|
---|
3668 | * We're going to iterate over the string looking for the
|
---|
3669 | * specified string. We will iterate until we have reached
|
---|
3670 | * the string length or we have found the string. (Yes, this
|
---|
3671 | * is done in the most naive way possible -- but considering
|
---|
3672 | * that the string we're searching for is likely to be
|
---|
3673 | * relatively short, the complexity of Rabin-Karp or similar
|
---|
3674 | * hardly seems merited.)
|
---|
3675 | */
|
---|
3676 | char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
|
---|
3677 | char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
|
---|
3678 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3679 | size_t len = dtrace_strlen(addr, size);
|
---|
3680 | size_t sublen = dtrace_strlen(substr, size);
|
---|
3681 | char *limit = addr + len, *orig = addr;
|
---|
3682 | int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
|
---|
3683 | int inc = 1;
|
---|
3684 |
|
---|
3685 | regs[rd] = notfound;
|
---|
3686 |
|
---|
3687 | if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
|
---|
3688 | regs[rd] = NULL;
|
---|
3689 | break;
|
---|
3690 | }
|
---|
3691 |
|
---|
3692 | if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
|
---|
3693 | vstate)) {
|
---|
3694 | regs[rd] = NULL;
|
---|
3695 | break;
|
---|
3696 | }
|
---|
3697 |
|
---|
3698 | /*
|
---|
3699 | * strstr() and index()/rindex() have similar semantics if
|
---|
3700 | * both strings are the empty string: strstr() returns a
|
---|
3701 | * pointer to the (empty) string, and index() and rindex()
|
---|
3702 | * both return index 0 (regardless of any position argument).
|
---|
3703 | */
|
---|
3704 | if (sublen == 0 && len == 0) {
|
---|
3705 | if (subr == DIF_SUBR_STRSTR)
|
---|
3706 | regs[rd] = (uintptr_t)addr;
|
---|
3707 | else
|
---|
3708 | regs[rd] = 0;
|
---|
3709 | break;
|
---|
3710 | }
|
---|
3711 |
|
---|
3712 | if (subr != DIF_SUBR_STRSTR) {
|
---|
3713 | if (subr == DIF_SUBR_RINDEX) {
|
---|
3714 | limit = orig - 1;
|
---|
3715 | addr += len;
|
---|
3716 | inc = -1;
|
---|
3717 | }
|
---|
3718 |
|
---|
3719 | /*
|
---|
3720 | * Both index() and rindex() take an optional position
|
---|
3721 | * argument that denotes the starting position.
|
---|
3722 | */
|
---|
3723 | if (nargs == 3) {
|
---|
3724 | int64_t pos = (int64_t)tupregs[2].dttk_value;
|
---|
3725 |
|
---|
3726 | /*
|
---|
3727 | * If the position argument to index() is
|
---|
3728 | * negative, Perl implicitly clamps it at
|
---|
3729 | * zero. This semantic is a little surprising
|
---|
3730 | * given the special meaning of negative
|
---|
3731 | * positions to similar Perl functions like
|
---|
3732 | * substr(), but it appears to reflect a
|
---|
3733 | * notion that index() can start from a
|
---|
3734 | * negative index and increment its way up to
|
---|
3735 | * the string. Given this notion, Perl's
|
---|
3736 | * rindex() is at least self-consistent in
|
---|
3737 | * that it implicitly clamps positions greater
|
---|
3738 | * than the string length to be the string
|
---|
3739 | * length. Where Perl completely loses
|
---|
3740 | * coherence, however, is when the specified
|
---|
3741 | * substring is the empty string (""). In
|
---|
3742 | * this case, even if the position is
|
---|
3743 | * negative, rindex() returns 0 -- and even if
|
---|
3744 | * the position is greater than the length,
|
---|
3745 | * index() returns the string length. These
|
---|
3746 | * semantics violate the notion that index()
|
---|
3747 | * should never return a value less than the
|
---|
3748 | * specified position and that rindex() should
|
---|
3749 | * never return a value greater than the
|
---|
3750 | * specified position. (One assumes that
|
---|
3751 | * these semantics are artifacts of Perl's
|
---|
3752 | * implementation and not the results of
|
---|
3753 | * deliberate design -- it beggars belief that
|
---|
3754 | * even Larry Wall could desire such oddness.)
|
---|
3755 | * While in the abstract one would wish for
|
---|
3756 | * consistent position semantics across
|
---|
3757 | * substr(), index() and rindex() -- or at the
|
---|
3758 | * very least self-consistent position
|
---|
3759 | * semantics for index() and rindex() -- we
|
---|
3760 | * instead opt to keep with the extant Perl
|
---|
3761 | * semantics, in all their broken glory. (Do
|
---|
3762 | * we have more desire to maintain Perl's
|
---|
3763 | * semantics than Perl does? Probably.)
|
---|
3764 | */
|
---|
3765 | if (subr == DIF_SUBR_RINDEX) {
|
---|
3766 | if (pos < 0) {
|
---|
3767 | if (sublen == 0)
|
---|
3768 | regs[rd] = 0;
|
---|
3769 | break;
|
---|
3770 | }
|
---|
3771 |
|
---|
3772 | if (VBDTCAST(uint64_t)pos > len)
|
---|
3773 | pos = len;
|
---|
3774 | } else {
|
---|
3775 | if (pos < 0)
|
---|
3776 | pos = 0;
|
---|
3777 |
|
---|
3778 | if (VBDTCAST(uint64_t)pos >= len) {
|
---|
3779 | if (sublen == 0)
|
---|
3780 | regs[rd] = len;
|
---|
3781 | break;
|
---|
3782 | }
|
---|
3783 | }
|
---|
3784 |
|
---|
3785 | addr = orig + pos;
|
---|
3786 | }
|
---|
3787 | }
|
---|
3788 |
|
---|
3789 | for (regs[rd] = notfound; addr != limit; addr += inc) {
|
---|
3790 | if (dtrace_strncmp(addr, substr, sublen) == 0) {
|
---|
3791 | if (subr != DIF_SUBR_STRSTR) {
|
---|
3792 | /*
|
---|
3793 | * As D index() and rindex() are
|
---|
3794 | * modeled on Perl (and not on awk),
|
---|
3795 | * we return a zero-based (and not a
|
---|
3796 | * one-based) index. (For you Perl
|
---|
3797 | * weenies: no, we're not going to add
|
---|
3798 | * $[ -- and shouldn't you be at a con
|
---|
3799 | * or something?)
|
---|
3800 | */
|
---|
3801 | regs[rd] = (uintptr_t)(addr - orig);
|
---|
3802 | break;
|
---|
3803 | }
|
---|
3804 |
|
---|
3805 | ASSERT(subr == DIF_SUBR_STRSTR);
|
---|
3806 | regs[rd] = (uintptr_t)addr;
|
---|
3807 | break;
|
---|
3808 | }
|
---|
3809 | }
|
---|
3810 |
|
---|
3811 | break;
|
---|
3812 | }
|
---|
3813 |
|
---|
3814 | case DIF_SUBR_STRTOK: {
|
---|
3815 | uintptr_t addr = tupregs[0].dttk_value;
|
---|
3816 | uintptr_t tokaddr = tupregs[1].dttk_value;
|
---|
3817 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3818 | uintptr_t limit, toklimit = tokaddr + size;
|
---|
3819 | uint8_t c VBDTUNASS(0), tokmap[32]; /* 256 / 8 */
|
---|
3820 | char *dest = (char *)mstate->dtms_scratch_ptr;
|
---|
3821 | VBDTTYPE(unsigned,int) i;
|
---|
3822 |
|
---|
3823 | /*
|
---|
3824 | * Check both the token buffer and (later) the input buffer,
|
---|
3825 | * since both could be non-scratch addresses.
|
---|
3826 | */
|
---|
3827 | if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) {
|
---|
3828 | regs[rd] = NULL;
|
---|
3829 | break;
|
---|
3830 | }
|
---|
3831 |
|
---|
3832 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
3833 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
3834 | regs[rd] = NULL;
|
---|
3835 | break;
|
---|
3836 | }
|
---|
3837 |
|
---|
3838 | if (addr == NULL) {
|
---|
3839 | /*
|
---|
3840 | * If the address specified is NULL, we use our saved
|
---|
3841 | * strtok pointer from the mstate. Note that this
|
---|
3842 | * means that the saved strtok pointer is _only_
|
---|
3843 | * valid within multiple enablings of the same probe --
|
---|
3844 | * it behaves like an implicit clause-local variable.
|
---|
3845 | */
|
---|
3846 | addr = mstate->dtms_strtok;
|
---|
3847 | } else {
|
---|
3848 | /*
|
---|
3849 | * If the user-specified address is non-NULL we must
|
---|
3850 | * access check it. This is the only time we have
|
---|
3851 | * a chance to do so, since this address may reside
|
---|
3852 | * in the string table of this clause-- future calls
|
---|
3853 | * (when we fetch addr from mstate->dtms_strtok)
|
---|
3854 | * would fail this access check.
|
---|
3855 | */
|
---|
3856 | if (!dtrace_strcanload(addr, size, mstate, vstate)) {
|
---|
3857 | regs[rd] = NULL;
|
---|
3858 | break;
|
---|
3859 | }
|
---|
3860 | }
|
---|
3861 |
|
---|
3862 | /*
|
---|
3863 | * First, zero the token map, and then process the token
|
---|
3864 | * string -- setting a bit in the map for every character
|
---|
3865 | * found in the token string.
|
---|
3866 | */
|
---|
3867 | for (i = 0; i < sizeof (tokmap); i++)
|
---|
3868 | tokmap[i] = 0;
|
---|
3869 |
|
---|
3870 | for (; tokaddr < toklimit; tokaddr++) {
|
---|
3871 | if ((c = dtrace_load8(tokaddr)) == '\0')
|
---|
3872 | break;
|
---|
3873 |
|
---|
3874 | ASSERT((c >> 3) < sizeof (tokmap));
|
---|
3875 | tokmap[c >> 3] |= (1 << (c & 0x7));
|
---|
3876 | }
|
---|
3877 |
|
---|
3878 | for (limit = addr + size; addr < limit; addr++) {
|
---|
3879 | /*
|
---|
3880 | * We're looking for a character that is _not_ contained
|
---|
3881 | * in the token string.
|
---|
3882 | */
|
---|
3883 | if ((c = dtrace_load8(addr)) == '\0')
|
---|
3884 | break;
|
---|
3885 |
|
---|
3886 | if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
|
---|
3887 | break;
|
---|
3888 | }
|
---|
3889 |
|
---|
3890 | if (c == '\0') {
|
---|
3891 | /*
|
---|
3892 | * We reached the end of the string without finding
|
---|
3893 | * any character that was not in the token string.
|
---|
3894 | * We return NULL in this case, and we set the saved
|
---|
3895 | * address to NULL as well.
|
---|
3896 | */
|
---|
3897 | regs[rd] = NULL;
|
---|
3898 | mstate->dtms_strtok = NULL;
|
---|
3899 | break;
|
---|
3900 | }
|
---|
3901 |
|
---|
3902 | /*
|
---|
3903 | * From here on, we're copying into the destination string.
|
---|
3904 | */
|
---|
3905 | for (i = 0; addr < limit && i < size - 1; addr++) {
|
---|
3906 | if ((c = dtrace_load8(addr)) == '\0')
|
---|
3907 | break;
|
---|
3908 |
|
---|
3909 | if (tokmap[c >> 3] & (1 << (c & 0x7)))
|
---|
3910 | break;
|
---|
3911 |
|
---|
3912 | ASSERT(i < size);
|
---|
3913 | dest[i++] = c;
|
---|
3914 | }
|
---|
3915 |
|
---|
3916 | ASSERT(i < size);
|
---|
3917 | dest[i] = '\0';
|
---|
3918 | regs[rd] = (uintptr_t)dest;
|
---|
3919 | mstate->dtms_scratch_ptr += size;
|
---|
3920 | mstate->dtms_strtok = addr;
|
---|
3921 | break;
|
---|
3922 | }
|
---|
3923 |
|
---|
3924 | case DIF_SUBR_SUBSTR: {
|
---|
3925 | uintptr_t s = tupregs[0].dttk_value;
|
---|
3926 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3927 | char *d = (char *)mstate->dtms_scratch_ptr;
|
---|
3928 | int64_t index = (int64_t)tupregs[1].dttk_value;
|
---|
3929 | int64_t remaining = (int64_t)tupregs[2].dttk_value;
|
---|
3930 | size_t len = dtrace_strlen((char *)s, size);
|
---|
3931 | int64_t i;
|
---|
3932 |
|
---|
3933 | if (!dtrace_canload(s, len + 1, mstate, vstate)) {
|
---|
3934 | regs[rd] = NULL;
|
---|
3935 | break;
|
---|
3936 | }
|
---|
3937 |
|
---|
3938 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
3939 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
3940 | regs[rd] = NULL;
|
---|
3941 | break;
|
---|
3942 | }
|
---|
3943 |
|
---|
3944 | if (nargs <= 2)
|
---|
3945 | remaining = (int64_t)size;
|
---|
3946 |
|
---|
3947 | if (index < 0) {
|
---|
3948 | index += len;
|
---|
3949 |
|
---|
3950 | if (index < 0 && index + remaining > 0) {
|
---|
3951 | remaining += index;
|
---|
3952 | index = 0;
|
---|
3953 | }
|
---|
3954 | }
|
---|
3955 |
|
---|
3956 | if (VBDTCAST(uint64_t)index >= len || index < 0) {
|
---|
3957 | remaining = 0;
|
---|
3958 | } else if (remaining < 0) {
|
---|
3959 | remaining += len - index;
|
---|
3960 | } else if (VBDTCAST(uint64_t)index + remaining > size) {
|
---|
3961 | remaining = size - index;
|
---|
3962 | }
|
---|
3963 |
|
---|
3964 | for (i = 0; i < remaining; i++) {
|
---|
3965 | if ((d[i] = dtrace_load8(s + index + i)) == '\0')
|
---|
3966 | break;
|
---|
3967 | }
|
---|
3968 |
|
---|
3969 | d[i] = '\0';
|
---|
3970 |
|
---|
3971 | mstate->dtms_scratch_ptr += size;
|
---|
3972 | regs[rd] = (uintptr_t)d;
|
---|
3973 | break;
|
---|
3974 | }
|
---|
3975 |
|
---|
3976 | case DIF_SUBR_GETMAJOR:
|
---|
3977 | #ifndef VBOX
|
---|
3978 | #ifdef _LP64
|
---|
3979 | regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
|
---|
3980 | #else
|
---|
3981 | regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
|
---|
3982 | #endif
|
---|
3983 | #else
|
---|
3984 | regs[rd] = 0;
|
---|
3985 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3986 | #endif
|
---|
3987 | break;
|
---|
3988 |
|
---|
3989 | case DIF_SUBR_GETMINOR:
|
---|
3990 | #ifndef VBOX
|
---|
3991 | #ifdef _LP64
|
---|
3992 | regs[rd] = tupregs[0].dttk_value & MAXMIN64;
|
---|
3993 | #else
|
---|
3994 | regs[rd] = tupregs[0].dttk_value & MAXMIN;
|
---|
3995 | #endif
|
---|
3996 | #else
|
---|
3997 | regs[rd] = 0;
|
---|
3998 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3999 | #endif
|
---|
4000 | break;
|
---|
4001 |
|
---|
4002 | case DIF_SUBR_DDI_PATHNAME: {
|
---|
4003 | #ifndef VBOX
|
---|
4004 | /*
|
---|
4005 | * This one is a galactic mess. We are going to roughly
|
---|
4006 | * emulate ddi_pathname(), but it's made more complicated
|
---|
4007 | * by the fact that we (a) want to include the minor name and
|
---|
4008 | * (b) must proceed iteratively instead of recursively.
|
---|
4009 | */
|
---|
4010 | uintptr_t dest = mstate->dtms_scratch_ptr;
|
---|
4011 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
4012 | char *start = (char *)dest, *end = start + size - 1;
|
---|
4013 | uintptr_t daddr = tupregs[0].dttk_value;
|
---|
4014 | int64_t minor = (int64_t)tupregs[1].dttk_value;
|
---|
4015 | char *s;
|
---|
4016 | int i, len, depth = 0;
|
---|
4017 |
|
---|
4018 | /*
|
---|
4019 | * Due to all the pointer jumping we do and context we must
|
---|
4020 | * rely upon, we just mandate that the user must have kernel
|
---|
4021 | * read privileges to use this routine.
|
---|
4022 | */
|
---|
4023 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
|
---|
4024 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4025 | *illval = daddr;
|
---|
4026 | regs[rd] = NULL;
|
---|
4027 | }
|
---|
4028 |
|
---|
4029 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4030 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4031 | regs[rd] = NULL;
|
---|
4032 | break;
|
---|
4033 | }
|
---|
4034 |
|
---|
4035 | *end = '\0';
|
---|
4036 |
|
---|
4037 | /*
|
---|
4038 | * We want to have a name for the minor. In order to do this,
|
---|
4039 | * we need to walk the minor list from the devinfo. We want
|
---|
4040 | * to be sure that we don't infinitely walk a circular list,
|
---|
4041 | * so we check for circularity by sending a scout pointer
|
---|
4042 | * ahead two elements for every element that we iterate over;
|
---|
4043 | * if the list is circular, these will ultimately point to the
|
---|
4044 | * same element. You may recognize this little trick as the
|
---|
4045 | * answer to a stupid interview question -- one that always
|
---|
4046 | * seems to be asked by those who had to have it laboriously
|
---|
4047 | * explained to them, and who can't even concisely describe
|
---|
4048 | * the conditions under which one would be forced to resort to
|
---|
4049 | * this technique. Needless to say, those conditions are
|
---|
4050 | * found here -- and probably only here. Is this the only use
|
---|
4051 | * of this infamous trick in shipping, production code? If it
|
---|
4052 | * isn't, it probably should be...
|
---|
4053 | */
|
---|
4054 | if (minor != -1) {
|
---|
4055 | uintptr_t maddr = dtrace_loadptr(daddr +
|
---|
4056 | offsetof(struct dev_info, devi_minor));
|
---|
4057 |
|
---|
4058 | uintptr_t next = offsetof(struct ddi_minor_data, next);
|
---|
4059 | uintptr_t name = offsetof(struct ddi_minor_data,
|
---|
4060 | d_minor) + offsetof(struct ddi_minor, name);
|
---|
4061 | uintptr_t dev = offsetof(struct ddi_minor_data,
|
---|
4062 | d_minor) + offsetof(struct ddi_minor, dev);
|
---|
4063 | uintptr_t scout;
|
---|
4064 |
|
---|
4065 | if (maddr != NULL)
|
---|
4066 | scout = dtrace_loadptr(maddr + next);
|
---|
4067 |
|
---|
4068 | while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
|
---|
4069 | uint64_t m;
|
---|
4070 | #ifdef _LP64
|
---|
4071 | m = dtrace_load64(maddr + dev) & MAXMIN64;
|
---|
4072 | #else
|
---|
4073 | m = dtrace_load32(maddr + dev) & MAXMIN;
|
---|
4074 | #endif
|
---|
4075 | if (m != minor) {
|
---|
4076 | maddr = dtrace_loadptr(maddr + next);
|
---|
4077 |
|
---|
4078 | if (scout == NULL)
|
---|
4079 | continue;
|
---|
4080 |
|
---|
4081 | scout = dtrace_loadptr(scout + next);
|
---|
4082 |
|
---|
4083 | if (scout == NULL)
|
---|
4084 | continue;
|
---|
4085 |
|
---|
4086 | scout = dtrace_loadptr(scout + next);
|
---|
4087 |
|
---|
4088 | if (scout == NULL)
|
---|
4089 | continue;
|
---|
4090 |
|
---|
4091 | if (scout == maddr) {
|
---|
4092 | *flags |= CPU_DTRACE_ILLOP;
|
---|
4093 | break;
|
---|
4094 | }
|
---|
4095 |
|
---|
4096 | continue;
|
---|
4097 | }
|
---|
4098 |
|
---|
4099 | /*
|
---|
4100 | * We have the minor data. Now we need to
|
---|
4101 | * copy the minor's name into the end of the
|
---|
4102 | * pathname.
|
---|
4103 | */
|
---|
4104 | s = (char *)dtrace_loadptr(maddr + name);
|
---|
4105 | len = dtrace_strlen(s, size);
|
---|
4106 |
|
---|
4107 | if (*flags & CPU_DTRACE_FAULT)
|
---|
4108 | break;
|
---|
4109 |
|
---|
4110 | if (len != 0) {
|
---|
4111 | if ((end -= (len + 1)) < start)
|
---|
4112 | break;
|
---|
4113 |
|
---|
4114 | *end = ':';
|
---|
4115 | }
|
---|
4116 |
|
---|
4117 | for (i = 1; i <= len; i++)
|
---|
4118 | end[i] = dtrace_load8((uintptr_t)s++);
|
---|
4119 | break;
|
---|
4120 | }
|
---|
4121 | }
|
---|
4122 |
|
---|
4123 | while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
|
---|
4124 | ddi_node_state_t devi_state;
|
---|
4125 |
|
---|
4126 | devi_state = dtrace_load32(daddr +
|
---|
4127 | offsetof(struct dev_info, devi_node_state));
|
---|
4128 |
|
---|
4129 | if (*flags & CPU_DTRACE_FAULT)
|
---|
4130 | break;
|
---|
4131 |
|
---|
4132 | if (devi_state >= DS_INITIALIZED) {
|
---|
4133 | s = (char *)dtrace_loadptr(daddr +
|
---|
4134 | offsetof(struct dev_info, devi_addr));
|
---|
4135 | len = dtrace_strlen(s, size);
|
---|
4136 |
|
---|
4137 | if (*flags & CPU_DTRACE_FAULT)
|
---|
4138 | break;
|
---|
4139 |
|
---|
4140 | if (len != 0) {
|
---|
4141 | if ((end -= (len + 1)) < start)
|
---|
4142 | break;
|
---|
4143 |
|
---|
4144 | *end = '@';
|
---|
4145 | }
|
---|
4146 |
|
---|
4147 | for (i = 1; i <= len; i++)
|
---|
4148 | end[i] = dtrace_load8((uintptr_t)s++);
|
---|
4149 | }
|
---|
4150 |
|
---|
4151 | /*
|
---|
4152 | * Now for the node name...
|
---|
4153 | */
|
---|
4154 | s = (char *)dtrace_loadptr(daddr +
|
---|
4155 | offsetof(struct dev_info, devi_node_name));
|
---|
4156 |
|
---|
4157 | daddr = dtrace_loadptr(daddr +
|
---|
4158 | offsetof(struct dev_info, devi_parent));
|
---|
4159 |
|
---|
4160 | /*
|
---|
4161 | * If our parent is NULL (that is, if we're the root
|
---|
4162 | * node), we're going to use the special path
|
---|
4163 | * "devices".
|
---|
4164 | */
|
---|
4165 | if (daddr == NULL)
|
---|
4166 | s = "devices";
|
---|
4167 |
|
---|
4168 | len = dtrace_strlen(s, size);
|
---|
4169 | if (*flags & CPU_DTRACE_FAULT)
|
---|
4170 | break;
|
---|
4171 |
|
---|
4172 | if ((end -= (len + 1)) < start)
|
---|
4173 | break;
|
---|
4174 |
|
---|
4175 | for (i = 1; i <= len; i++)
|
---|
4176 | end[i] = dtrace_load8((uintptr_t)s++);
|
---|
4177 | *end = '/';
|
---|
4178 |
|
---|
4179 | if (depth++ > dtrace_devdepth_max) {
|
---|
4180 | *flags |= CPU_DTRACE_ILLOP;
|
---|
4181 | break;
|
---|
4182 | }
|
---|
4183 | }
|
---|
4184 |
|
---|
4185 | if (end < start)
|
---|
4186 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4187 |
|
---|
4188 | if (daddr == NULL) {
|
---|
4189 | regs[rd] = (uintptr_t)end;
|
---|
4190 | mstate->dtms_scratch_ptr += size;
|
---|
4191 | }
|
---|
4192 |
|
---|
4193 | #else
|
---|
4194 | regs[rd] = 0;
|
---|
4195 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
4196 | #endif
|
---|
4197 | break;
|
---|
4198 | }
|
---|
4199 |
|
---|
4200 | case DIF_SUBR_STRJOIN: {
|
---|
4201 | char *d = (char *)mstate->dtms_scratch_ptr;
|
---|
4202 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
4203 | uintptr_t s1 = tupregs[0].dttk_value;
|
---|
4204 | uintptr_t s2 = tupregs[1].dttk_value;
|
---|
4205 | VBDTTYPE(unsigned,int) i = 0;
|
---|
4206 |
|
---|
4207 | if (!dtrace_strcanload(s1, size, mstate, vstate) ||
|
---|
4208 | !dtrace_strcanload(s2, size, mstate, vstate)) {
|
---|
4209 | regs[rd] = NULL;
|
---|
4210 | break;
|
---|
4211 | }
|
---|
4212 |
|
---|
4213 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4214 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4215 | regs[rd] = NULL;
|
---|
4216 | break;
|
---|
4217 | }
|
---|
4218 |
|
---|
4219 | for (;;) {
|
---|
4220 | if (i >= size) {
|
---|
4221 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4222 | regs[rd] = NULL;
|
---|
4223 | break;
|
---|
4224 | }
|
---|
4225 |
|
---|
4226 | if ((d[i++] = dtrace_load8(s1++)) == '\0') {
|
---|
4227 | i--;
|
---|
4228 | break;
|
---|
4229 | }
|
---|
4230 | }
|
---|
4231 |
|
---|
4232 | for (;;) {
|
---|
4233 | if (i >= size) {
|
---|
4234 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4235 | regs[rd] = NULL;
|
---|
4236 | break;
|
---|
4237 | }
|
---|
4238 |
|
---|
4239 | if ((d[i++] = dtrace_load8(s2++)) == '\0')
|
---|
4240 | break;
|
---|
4241 | }
|
---|
4242 |
|
---|
4243 | if (i < size) {
|
---|
4244 | mstate->dtms_scratch_ptr += i;
|
---|
4245 | regs[rd] = (uintptr_t)d;
|
---|
4246 | }
|
---|
4247 |
|
---|
4248 | break;
|
---|
4249 | }
|
---|
4250 |
|
---|
4251 | case DIF_SUBR_LLTOSTR: {
|
---|
4252 | int64_t i = (int64_t)tupregs[0].dttk_value;
|
---|
4253 | int64_t val = i < 0 ? i * -1 : i;
|
---|
4254 | uint64_t size = 22; /* enough room for 2^64 in decimal */
|
---|
4255 | char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
|
---|
4256 |
|
---|
4257 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4258 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4259 | regs[rd] = NULL;
|
---|
4260 | break;
|
---|
4261 | }
|
---|
4262 |
|
---|
4263 | for (*end-- = '\0'; val; val /= 10)
|
---|
4264 | *end-- = '0' + (val % 10);
|
---|
4265 |
|
---|
4266 | if (i == 0)
|
---|
4267 | *end-- = '0';
|
---|
4268 |
|
---|
4269 | if (i < 0)
|
---|
4270 | *end-- = '-';
|
---|
4271 |
|
---|
4272 | regs[rd] = (uintptr_t)end + 1;
|
---|
4273 | mstate->dtms_scratch_ptr += size;
|
---|
4274 | break;
|
---|
4275 | }
|
---|
4276 |
|
---|
4277 | case DIF_SUBR_HTONS:
|
---|
4278 | case DIF_SUBR_NTOHS:
|
---|
4279 | #ifdef _BIG_ENDIAN
|
---|
4280 | regs[rd] = (uint16_t)tupregs[0].dttk_value;
|
---|
4281 | #else
|
---|
4282 | regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
|
---|
4283 | #endif
|
---|
4284 | break;
|
---|
4285 |
|
---|
4286 |
|
---|
4287 | case DIF_SUBR_HTONL:
|
---|
4288 | case DIF_SUBR_NTOHL:
|
---|
4289 | #ifdef _BIG_ENDIAN
|
---|
4290 | regs[rd] = (uint32_t)tupregs[0].dttk_value;
|
---|
4291 | #else
|
---|
4292 | regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
|
---|
4293 | #endif
|
---|
4294 | break;
|
---|
4295 |
|
---|
4296 |
|
---|
4297 | case DIF_SUBR_HTONLL:
|
---|
4298 | case DIF_SUBR_NTOHLL:
|
---|
4299 | #ifdef _BIG_ENDIAN
|
---|
4300 | regs[rd] = (uint64_t)tupregs[0].dttk_value;
|
---|
4301 | #else
|
---|
4302 | regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
|
---|
4303 | #endif
|
---|
4304 | break;
|
---|
4305 |
|
---|
4306 |
|
---|
4307 | case DIF_SUBR_DIRNAME:
|
---|
4308 | case DIF_SUBR_BASENAME: {
|
---|
4309 | char *dest = (char *)mstate->dtms_scratch_ptr;
|
---|
4310 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
4311 | uintptr_t src = tupregs[0].dttk_value;
|
---|
4312 | int i, j, len = VBDTCAST(int)dtrace_strlen((char *)src, size);
|
---|
4313 | int lastbase = -1, firstbase = -1, lastdir = -1;
|
---|
4314 | int start, end;
|
---|
4315 |
|
---|
4316 | if (!dtrace_canload(src, len + 1, mstate, vstate)) {
|
---|
4317 | regs[rd] = NULL;
|
---|
4318 | break;
|
---|
4319 | }
|
---|
4320 |
|
---|
4321 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4322 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4323 | regs[rd] = NULL;
|
---|
4324 | break;
|
---|
4325 | }
|
---|
4326 |
|
---|
4327 | /*
|
---|
4328 | * The basename and dirname for a zero-length string is
|
---|
4329 | * defined to be "."
|
---|
4330 | */
|
---|
4331 | if (len == 0) {
|
---|
4332 | len = 1;
|
---|
4333 | src = (uintptr_t)".";
|
---|
4334 | }
|
---|
4335 |
|
---|
4336 | /*
|
---|
4337 | * Start from the back of the string, moving back toward the
|
---|
4338 | * front until we see a character that isn't a slash. That
|
---|
4339 | * character is the last character in the basename.
|
---|
4340 | */
|
---|
4341 | for (i = len - 1; i >= 0; i--) {
|
---|
4342 | if (dtrace_load8(src + i) != '/')
|
---|
4343 | break;
|
---|
4344 | }
|
---|
4345 |
|
---|
4346 | if (i >= 0)
|
---|
4347 | lastbase = i;
|
---|
4348 |
|
---|
4349 | /*
|
---|
4350 | * Starting from the last character in the basename, move
|
---|
4351 | * towards the front until we find a slash. The character
|
---|
4352 | * that we processed immediately before that is the first
|
---|
4353 | * character in the basename.
|
---|
4354 | */
|
---|
4355 | for (; i >= 0; i--) {
|
---|
4356 | if (dtrace_load8(src + i) == '/')
|
---|
4357 | break;
|
---|
4358 | }
|
---|
4359 |
|
---|
4360 | if (i >= 0)
|
---|
4361 | firstbase = i + 1;
|
---|
4362 |
|
---|
4363 | /*
|
---|
4364 | * Now keep going until we find a non-slash character. That
|
---|
4365 | * character is the last character in the dirname.
|
---|
4366 | */
|
---|
4367 | for (; i >= 0; i--) {
|
---|
4368 | if (dtrace_load8(src + i) != '/')
|
---|
4369 | break;
|
---|
4370 | }
|
---|
4371 |
|
---|
4372 | if (i >= 0)
|
---|
4373 | lastdir = i;
|
---|
4374 |
|
---|
4375 | ASSERT(!(lastbase == -1 && firstbase != -1));
|
---|
4376 | ASSERT(!(firstbase == -1 && lastdir != -1));
|
---|
4377 |
|
---|
4378 | if (lastbase == -1) {
|
---|
4379 | /*
|
---|
4380 | * We didn't find a non-slash character. We know that
|
---|
4381 | * the length is non-zero, so the whole string must be
|
---|
4382 | * slashes. In either the dirname or the basename
|
---|
4383 | * case, we return '/'.
|
---|
4384 | */
|
---|
4385 | ASSERT(firstbase == -1);
|
---|
4386 | firstbase = lastbase = lastdir = 0;
|
---|
4387 | }
|
---|
4388 |
|
---|
4389 | if (firstbase == -1) {
|
---|
4390 | /*
|
---|
4391 | * The entire string consists only of a basename
|
---|
4392 | * component. If we're looking for dirname, we need
|
---|
4393 | * to change our string to be just "."; if we're
|
---|
4394 | * looking for a basename, we'll just set the first
|
---|
4395 | * character of the basename to be 0.
|
---|
4396 | */
|
---|
4397 | if (subr == DIF_SUBR_DIRNAME) {
|
---|
4398 | ASSERT(lastdir == -1);
|
---|
4399 | src = (uintptr_t)".";
|
---|
4400 | lastdir = 0;
|
---|
4401 | } else {
|
---|
4402 | firstbase = 0;
|
---|
4403 | }
|
---|
4404 | }
|
---|
4405 |
|
---|
4406 | if (subr == DIF_SUBR_DIRNAME) {
|
---|
4407 | if (lastdir == -1) {
|
---|
4408 | /*
|
---|
4409 | * We know that we have a slash in the name --
|
---|
4410 | * or lastdir would be set to 0, above. And
|
---|
4411 | * because lastdir is -1, we know that this
|
---|
4412 | * slash must be the first character. (That
|
---|
4413 | * is, the full string must be of the form
|
---|
4414 | * "/basename".) In this case, the last
|
---|
4415 | * character of the directory name is 0.
|
---|
4416 | */
|
---|
4417 | lastdir = 0;
|
---|
4418 | }
|
---|
4419 |
|
---|
4420 | start = 0;
|
---|
4421 | end = lastdir;
|
---|
4422 | } else {
|
---|
4423 | ASSERT(subr == DIF_SUBR_BASENAME);
|
---|
4424 | ASSERT(firstbase != -1 && lastbase != -1);
|
---|
4425 | start = firstbase;
|
---|
4426 | end = lastbase;
|
---|
4427 | }
|
---|
4428 |
|
---|
4429 | for (i = start, j = 0; i <= end && VBDTCAST(unsigned)j < size - 1; i++, j++)
|
---|
4430 | dest[j] = dtrace_load8(src + i);
|
---|
4431 |
|
---|
4432 | dest[j] = '\0';
|
---|
4433 | regs[rd] = (uintptr_t)dest;
|
---|
4434 | mstate->dtms_scratch_ptr += size;
|
---|
4435 | break;
|
---|
4436 | }
|
---|
4437 |
|
---|
4438 | case DIF_SUBR_CLEANPATH: {
|
---|
4439 | char *dest = (char *)mstate->dtms_scratch_ptr, c;
|
---|
4440 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
4441 | uintptr_t src = tupregs[0].dttk_value;
|
---|
4442 | int i = 0, j = 0;
|
---|
4443 |
|
---|
4444 | if (!dtrace_strcanload(src, size, mstate, vstate)) {
|
---|
4445 | regs[rd] = NULL;
|
---|
4446 | break;
|
---|
4447 | }
|
---|
4448 |
|
---|
4449 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4450 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4451 | regs[rd] = NULL;
|
---|
4452 | break;
|
---|
4453 | }
|
---|
4454 |
|
---|
4455 | /*
|
---|
4456 | * Move forward, loading each character.
|
---|
4457 | */
|
---|
4458 | do {
|
---|
4459 | c = dtrace_load8(src + i++);
|
---|
4460 | next:
|
---|
4461 | if (j + 5 >= VBDTCAST(int64_t)size) /* 5 = strlen("/..c\0") */
|
---|
4462 | break;
|
---|
4463 |
|
---|
4464 | if (c != '/') {
|
---|
4465 | dest[j++] = c;
|
---|
4466 | continue;
|
---|
4467 | }
|
---|
4468 |
|
---|
4469 | c = dtrace_load8(src + i++);
|
---|
4470 |
|
---|
4471 | if (c == '/') {
|
---|
4472 | /*
|
---|
4473 | * We have two slashes -- we can just advance
|
---|
4474 | * to the next character.
|
---|
4475 | */
|
---|
4476 | goto next;
|
---|
4477 | }
|
---|
4478 |
|
---|
4479 | if (c != '.') {
|
---|
4480 | /*
|
---|
4481 | * This is not "." and it's not ".." -- we can
|
---|
4482 | * just store the "/" and this character and
|
---|
4483 | * drive on.
|
---|
4484 | */
|
---|
4485 | dest[j++] = '/';
|
---|
4486 | dest[j++] = c;
|
---|
4487 | continue;
|
---|
4488 | }
|
---|
4489 |
|
---|
4490 | c = dtrace_load8(src + i++);
|
---|
4491 |
|
---|
4492 | if (c == '/') {
|
---|
4493 | /*
|
---|
4494 | * This is a "/./" component. We're not going
|
---|
4495 | * to store anything in the destination buffer;
|
---|
4496 | * we're just going to go to the next component.
|
---|
4497 | */
|
---|
4498 | goto next;
|
---|
4499 | }
|
---|
4500 |
|
---|
4501 | if (c != '.') {
|
---|
4502 | /*
|
---|
4503 | * This is not ".." -- we can just store the
|
---|
4504 | * "/." and this character and continue
|
---|
4505 | * processing.
|
---|
4506 | */
|
---|
4507 | dest[j++] = '/';
|
---|
4508 | dest[j++] = '.';
|
---|
4509 | dest[j++] = c;
|
---|
4510 | continue;
|
---|
4511 | }
|
---|
4512 |
|
---|
4513 | c = dtrace_load8(src + i++);
|
---|
4514 |
|
---|
4515 | if (c != '/' && c != '\0') {
|
---|
4516 | /*
|
---|
4517 | * This is not ".." -- it's "..[mumble]".
|
---|
4518 | * We'll store the "/.." and this character
|
---|
4519 | * and continue processing.
|
---|
4520 | */
|
---|
4521 | dest[j++] = '/';
|
---|
4522 | dest[j++] = '.';
|
---|
4523 | dest[j++] = '.';
|
---|
4524 | dest[j++] = c;
|
---|
4525 | continue;
|
---|
4526 | }
|
---|
4527 |
|
---|
4528 | /*
|
---|
4529 | * This is "/../" or "/..\0". We need to back up
|
---|
4530 | * our destination pointer until we find a "/".
|
---|
4531 | */
|
---|
4532 | i--;
|
---|
4533 | while (j != 0 && dest[--j] != '/')
|
---|
4534 | continue;
|
---|
4535 |
|
---|
4536 | if (c == '\0')
|
---|
4537 | dest[++j] = '/';
|
---|
4538 | } while (c != '\0');
|
---|
4539 |
|
---|
4540 | dest[j] = '\0';
|
---|
4541 | regs[rd] = (uintptr_t)dest;
|
---|
4542 | mstate->dtms_scratch_ptr += size;
|
---|
4543 | break;
|
---|
4544 | }
|
---|
4545 |
|
---|
4546 | case DIF_SUBR_INET_NTOA:
|
---|
4547 | case DIF_SUBR_INET_NTOA6:
|
---|
4548 | case DIF_SUBR_INET_NTOP: {
|
---|
4549 | #ifndef VBOX
|
---|
4550 | size_t size;
|
---|
4551 | int af, argi, i;
|
---|
4552 | char *base, *end;
|
---|
4553 |
|
---|
4554 | if (subr == DIF_SUBR_INET_NTOP) {
|
---|
4555 | af = (int)tupregs[0].dttk_value;
|
---|
4556 | argi = 1;
|
---|
4557 | } else {
|
---|
4558 | af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
|
---|
4559 | argi = 0;
|
---|
4560 | }
|
---|
4561 |
|
---|
4562 | if (af == AF_INET) {
|
---|
4563 | ipaddr_t ip4;
|
---|
4564 | uint8_t *ptr8, val;
|
---|
4565 |
|
---|
4566 | /*
|
---|
4567 | * Safely load the IPv4 address.
|
---|
4568 | */
|
---|
4569 | ip4 = dtrace_load32(tupregs[argi].dttk_value);
|
---|
4570 |
|
---|
4571 | /*
|
---|
4572 | * Check an IPv4 string will fit in scratch.
|
---|
4573 | */
|
---|
4574 | size = INET_ADDRSTRLEN;
|
---|
4575 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4576 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4577 | regs[rd] = NULL;
|
---|
4578 | break;
|
---|
4579 | }
|
---|
4580 | base = (char *)mstate->dtms_scratch_ptr;
|
---|
4581 | end = (char *)mstate->dtms_scratch_ptr + size - 1;
|
---|
4582 |
|
---|
4583 | /*
|
---|
4584 | * Stringify as a dotted decimal quad.
|
---|
4585 | */
|
---|
4586 | *end-- = '\0';
|
---|
4587 | ptr8 = (uint8_t *)&ip4;
|
---|
4588 | for (i = 3; i >= 0; i--) {
|
---|
4589 | val = ptr8[i];
|
---|
4590 |
|
---|
4591 | if (val == 0) {
|
---|
4592 | *end-- = '0';
|
---|
4593 | } else {
|
---|
4594 | for (; val; val /= 10) {
|
---|
4595 | *end-- = '0' + (val % 10);
|
---|
4596 | }
|
---|
4597 | }
|
---|
4598 |
|
---|
4599 | if (i > 0)
|
---|
4600 | *end-- = '.';
|
---|
4601 | }
|
---|
4602 | ASSERT(end + 1 >= base);
|
---|
4603 |
|
---|
4604 | } else if (af == AF_INET6) {
|
---|
4605 | struct in6_addr ip6;
|
---|
4606 | int firstzero, tryzero, numzero, v6end;
|
---|
4607 | uint16_t val;
|
---|
4608 | const char digits[] = "0123456789abcdef";
|
---|
4609 |
|
---|
4610 | /*
|
---|
4611 | * Stringify using RFC 1884 convention 2 - 16 bit
|
---|
4612 | * hexadecimal values with a zero-run compression.
|
---|
4613 | * Lower case hexadecimal digits are used.
|
---|
4614 | * eg, fe80::214:4fff:fe0b:76c8.
|
---|
4615 | * The IPv4 embedded form is returned for inet_ntop,
|
---|
4616 | * just the IPv4 string is returned for inet_ntoa6.
|
---|
4617 | */
|
---|
4618 |
|
---|
4619 | /*
|
---|
4620 | * Safely load the IPv6 address.
|
---|
4621 | */
|
---|
4622 | dtrace_bcopy(
|
---|
4623 | (void *)(uintptr_t)tupregs[argi].dttk_value,
|
---|
4624 | (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
|
---|
4625 |
|
---|
4626 | /*
|
---|
4627 | * Check an IPv6 string will fit in scratch.
|
---|
4628 | */
|
---|
4629 | size = INET6_ADDRSTRLEN;
|
---|
4630 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4631 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4632 | regs[rd] = NULL;
|
---|
4633 | break;
|
---|
4634 | }
|
---|
4635 | base = (char *)mstate->dtms_scratch_ptr;
|
---|
4636 | end = (char *)mstate->dtms_scratch_ptr + size - 1;
|
---|
4637 | *end-- = '\0';
|
---|
4638 |
|
---|
4639 | /*
|
---|
4640 | * Find the longest run of 16 bit zero values
|
---|
4641 | * for the single allowed zero compression - "::".
|
---|
4642 | */
|
---|
4643 | firstzero = -1;
|
---|
4644 | tryzero = -1;
|
---|
4645 | numzero = 1;
|
---|
4646 | for (i = 0; i < sizeof (struct in6_addr); i++) {
|
---|
4647 | if (ip6._S6_un._S6_u8[i] == 0 &&
|
---|
4648 | tryzero == -1 && i % 2 == 0) {
|
---|
4649 | tryzero = i;
|
---|
4650 | continue;
|
---|
4651 | }
|
---|
4652 |
|
---|
4653 | if (tryzero != -1 &&
|
---|
4654 | (ip6._S6_un._S6_u8[i] != 0 ||
|
---|
4655 | i == sizeof (struct in6_addr) - 1)) {
|
---|
4656 |
|
---|
4657 | if (i - tryzero <= numzero) {
|
---|
4658 | tryzero = -1;
|
---|
4659 | continue;
|
---|
4660 | }
|
---|
4661 |
|
---|
4662 | firstzero = tryzero;
|
---|
4663 | numzero = i - i % 2 - tryzero;
|
---|
4664 | tryzero = -1;
|
---|
4665 |
|
---|
4666 | if (ip6._S6_un._S6_u8[i] == 0 &&
|
---|
4667 | i == sizeof (struct in6_addr) - 1)
|
---|
4668 | numzero += 2;
|
---|
4669 | }
|
---|
4670 | }
|
---|
4671 | ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
|
---|
4672 |
|
---|
4673 | /*
|
---|
4674 | * Check for an IPv4 embedded address.
|
---|
4675 | */
|
---|
4676 | v6end = sizeof (struct in6_addr) - 2;
|
---|
4677 | if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
|
---|
4678 | IN6_IS_ADDR_V4COMPAT(&ip6)) {
|
---|
4679 | for (i = sizeof (struct in6_addr) - 1;
|
---|
4680 | i >= DTRACE_V4MAPPED_OFFSET; i--) {
|
---|
4681 | ASSERT(end >= base);
|
---|
4682 |
|
---|
4683 | val = ip6._S6_un._S6_u8[i];
|
---|
4684 |
|
---|
4685 | if (val == 0) {
|
---|
4686 | *end-- = '0';
|
---|
4687 | } else {
|
---|
4688 | for (; val; val /= 10) {
|
---|
4689 | *end-- = '0' + val % 10;
|
---|
4690 | }
|
---|
4691 | }
|
---|
4692 |
|
---|
4693 | if (i > DTRACE_V4MAPPED_OFFSET)
|
---|
4694 | *end-- = '.';
|
---|
4695 | }
|
---|
4696 |
|
---|
4697 | if (subr == DIF_SUBR_INET_NTOA6)
|
---|
4698 | goto inetout;
|
---|
4699 |
|
---|
4700 | /*
|
---|
4701 | * Set v6end to skip the IPv4 address that
|
---|
4702 | * we have already stringified.
|
---|
4703 | */
|
---|
4704 | v6end = 10;
|
---|
4705 | }
|
---|
4706 |
|
---|
4707 | /*
|
---|
4708 | * Build the IPv6 string by working through the
|
---|
4709 | * address in reverse.
|
---|
4710 | */
|
---|
4711 | for (i = v6end; i >= 0; i -= 2) {
|
---|
4712 | ASSERT(end >= base);
|
---|
4713 |
|
---|
4714 | if (i == firstzero + numzero - 2) {
|
---|
4715 | *end-- = ':';
|
---|
4716 | *end-- = ':';
|
---|
4717 | i -= numzero - 2;
|
---|
4718 | continue;
|
---|
4719 | }
|
---|
4720 |
|
---|
4721 | if (i < 14 && i != firstzero - 2)
|
---|
4722 | *end-- = ':';
|
---|
4723 |
|
---|
4724 | val = (ip6._S6_un._S6_u8[i] << 8) +
|
---|
4725 | ip6._S6_un._S6_u8[i + 1];
|
---|
4726 |
|
---|
4727 | if (val == 0) {
|
---|
4728 | *end-- = '0';
|
---|
4729 | } else {
|
---|
4730 | for (; val; val /= 16) {
|
---|
4731 | *end-- = digits[val % 16];
|
---|
4732 | }
|
---|
4733 | }
|
---|
4734 | }
|
---|
4735 | ASSERT(end + 1 >= base);
|
---|
4736 |
|
---|
4737 | } else {
|
---|
4738 | /*
|
---|
4739 | * The user didn't use AH_INET or AH_INET6.
|
---|
4740 | */
|
---|
4741 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
4742 | regs[rd] = NULL;
|
---|
4743 | break;
|
---|
4744 | }
|
---|
4745 |
|
---|
4746 | inetout: regs[rd] = (uintptr_t)end + 1;
|
---|
4747 | mstate->dtms_scratch_ptr += size;
|
---|
4748 | #else /* VBOX */
|
---|
4749 | regs[rd] = 0;
|
---|
4750 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
4751 | #endif /* VBOX */
|
---|
4752 | break;
|
---|
4753 | }
|
---|
4754 |
|
---|
4755 | }
|
---|
4756 | }
|
---|
4757 |
|
---|
4758 | /*
|
---|
4759 | * Emulate the execution of DTrace IR instructions specified by the given
|
---|
4760 | * DIF object. This function is deliberately void of assertions as all of
|
---|
4761 | * the necessary checks are handled by a call to dtrace_difo_validate().
|
---|
4762 | */
|
---|
4763 | static uint64_t
|
---|
4764 | dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
|
---|
4765 | dtrace_vstate_t *vstate, dtrace_state_t *state)
|
---|
4766 | {
|
---|
4767 | const dif_instr_t *text = difo->dtdo_buf;
|
---|
4768 | const uint_t textlen = difo->dtdo_len;
|
---|
4769 | const char *strtab = difo->dtdo_strtab;
|
---|
4770 | const uint64_t *inttab = difo->dtdo_inttab;
|
---|
4771 |
|
---|
4772 | uint64_t rval = 0;
|
---|
4773 | dtrace_statvar_t *svar;
|
---|
4774 | dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
|
---|
4775 | dtrace_difv_t *v;
|
---|
4776 | volatile uint16_t *flags = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
4777 | volatile uintptr_t *illval = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval;
|
---|
4778 |
|
---|
4779 | dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
|
---|
4780 | uint64_t regs[DIF_DIR_NREGS];
|
---|
4781 | uint64_t *tmp;
|
---|
4782 |
|
---|
4783 | uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
|
---|
4784 | int64_t cc_r;
|
---|
4785 | uint_t pc = 0, id, opc VBDTUNASS(0);
|
---|
4786 | uint8_t ttop = 0;
|
---|
4787 | dif_instr_t instr;
|
---|
4788 | uint_t r1, r2, rd;
|
---|
4789 |
|
---|
4790 | /*
|
---|
4791 | * We stash the current DIF object into the machine state: we need it
|
---|
4792 | * for subsequent access checking.
|
---|
4793 | */
|
---|
4794 | mstate->dtms_difo = difo;
|
---|
4795 |
|
---|
4796 | regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
|
---|
4797 |
|
---|
4798 | while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
|
---|
4799 | opc = pc;
|
---|
4800 |
|
---|
4801 | instr = text[pc++];
|
---|
4802 | r1 = DIF_INSTR_R1(instr);
|
---|
4803 | r2 = DIF_INSTR_R2(instr);
|
---|
4804 | rd = DIF_INSTR_RD(instr);
|
---|
4805 |
|
---|
4806 | switch (DIF_INSTR_OP(instr)) {
|
---|
4807 | case DIF_OP_OR:
|
---|
4808 | regs[rd] = regs[r1] | regs[r2];
|
---|
4809 | break;
|
---|
4810 | case DIF_OP_XOR:
|
---|
4811 | regs[rd] = regs[r1] ^ regs[r2];
|
---|
4812 | break;
|
---|
4813 | case DIF_OP_AND:
|
---|
4814 | regs[rd] = regs[r1] & regs[r2];
|
---|
4815 | break;
|
---|
4816 | case DIF_OP_SLL:
|
---|
4817 | regs[rd] = regs[r1] << regs[r2];
|
---|
4818 | break;
|
---|
4819 | case DIF_OP_SRL:
|
---|
4820 | regs[rd] = regs[r1] >> regs[r2];
|
---|
4821 | break;
|
---|
4822 | case DIF_OP_SUB:
|
---|
4823 | regs[rd] = regs[r1] - regs[r2];
|
---|
4824 | break;
|
---|
4825 | case DIF_OP_ADD:
|
---|
4826 | regs[rd] = regs[r1] + regs[r2];
|
---|
4827 | break;
|
---|
4828 | case DIF_OP_MUL:
|
---|
4829 | regs[rd] = regs[r1] * regs[r2];
|
---|
4830 | break;
|
---|
4831 | case DIF_OP_SDIV:
|
---|
4832 | if (regs[r2] == 0) {
|
---|
4833 | regs[rd] = 0;
|
---|
4834 | *flags |= CPU_DTRACE_DIVZERO;
|
---|
4835 | } else {
|
---|
4836 | regs[rd] = (int64_t)regs[r1] /
|
---|
4837 | (int64_t)regs[r2];
|
---|
4838 | }
|
---|
4839 | break;
|
---|
4840 |
|
---|
4841 | case DIF_OP_UDIV:
|
---|
4842 | if (regs[r2] == 0) {
|
---|
4843 | regs[rd] = 0;
|
---|
4844 | *flags |= CPU_DTRACE_DIVZERO;
|
---|
4845 | } else {
|
---|
4846 | regs[rd] = regs[r1] / regs[r2];
|
---|
4847 | }
|
---|
4848 | break;
|
---|
4849 |
|
---|
4850 | case DIF_OP_SREM:
|
---|
4851 | if (regs[r2] == 0) {
|
---|
4852 | regs[rd] = 0;
|
---|
4853 | *flags |= CPU_DTRACE_DIVZERO;
|
---|
4854 | } else {
|
---|
4855 | regs[rd] = (int64_t)regs[r1] %
|
---|
4856 | (int64_t)regs[r2];
|
---|
4857 | }
|
---|
4858 | break;
|
---|
4859 |
|
---|
4860 | case DIF_OP_UREM:
|
---|
4861 | if (regs[r2] == 0) {
|
---|
4862 | regs[rd] = 0;
|
---|
4863 | *flags |= CPU_DTRACE_DIVZERO;
|
---|
4864 | } else {
|
---|
4865 | regs[rd] = regs[r1] % regs[r2];
|
---|
4866 | }
|
---|
4867 | break;
|
---|
4868 |
|
---|
4869 | case DIF_OP_NOT:
|
---|
4870 | regs[rd] = ~regs[r1];
|
---|
4871 | break;
|
---|
4872 | case DIF_OP_MOV:
|
---|
4873 | regs[rd] = regs[r1];
|
---|
4874 | break;
|
---|
4875 | case DIF_OP_CMP:
|
---|
4876 | cc_r = regs[r1] - regs[r2];
|
---|
4877 | cc_n = cc_r < 0;
|
---|
4878 | cc_z = cc_r == 0;
|
---|
4879 | cc_v = 0;
|
---|
4880 | cc_c = regs[r1] < regs[r2];
|
---|
4881 | break;
|
---|
4882 | case DIF_OP_TST:
|
---|
4883 | cc_n = cc_v = cc_c = 0;
|
---|
4884 | cc_z = regs[r1] == 0;
|
---|
4885 | break;
|
---|
4886 | case DIF_OP_BA:
|
---|
4887 | pc = DIF_INSTR_LABEL(instr);
|
---|
4888 | break;
|
---|
4889 | case DIF_OP_BE:
|
---|
4890 | if (cc_z)
|
---|
4891 | pc = DIF_INSTR_LABEL(instr);
|
---|
4892 | break;
|
---|
4893 | case DIF_OP_BNE:
|
---|
4894 | if (cc_z == 0)
|
---|
4895 | pc = DIF_INSTR_LABEL(instr);
|
---|
4896 | break;
|
---|
4897 | case DIF_OP_BG:
|
---|
4898 | if ((cc_z | (cc_n ^ cc_v)) == 0)
|
---|
4899 | pc = DIF_INSTR_LABEL(instr);
|
---|
4900 | break;
|
---|
4901 | case DIF_OP_BGU:
|
---|
4902 | if ((cc_c | cc_z) == 0)
|
---|
4903 | pc = DIF_INSTR_LABEL(instr);
|
---|
4904 | break;
|
---|
4905 | case DIF_OP_BGE:
|
---|
4906 | if ((cc_n ^ cc_v) == 0)
|
---|
4907 | pc = DIF_INSTR_LABEL(instr);
|
---|
4908 | break;
|
---|
4909 | case DIF_OP_BGEU:
|
---|
4910 | if (cc_c == 0)
|
---|
4911 | pc = DIF_INSTR_LABEL(instr);
|
---|
4912 | break;
|
---|
4913 | case DIF_OP_BL:
|
---|
4914 | if (cc_n ^ cc_v)
|
---|
4915 | pc = DIF_INSTR_LABEL(instr);
|
---|
4916 | break;
|
---|
4917 | case DIF_OP_BLU:
|
---|
4918 | if (cc_c)
|
---|
4919 | pc = DIF_INSTR_LABEL(instr);
|
---|
4920 | break;
|
---|
4921 | case DIF_OP_BLE:
|
---|
4922 | if (cc_z | (cc_n ^ cc_v))
|
---|
4923 | pc = DIF_INSTR_LABEL(instr);
|
---|
4924 | break;
|
---|
4925 | case DIF_OP_BLEU:
|
---|
4926 | if (cc_c | cc_z)
|
---|
4927 | pc = DIF_INSTR_LABEL(instr);
|
---|
4928 | break;
|
---|
4929 | case DIF_OP_RLDSB:
|
---|
4930 | if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
|
---|
4931 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4932 | *illval = regs[r1];
|
---|
4933 | break;
|
---|
4934 | }
|
---|
4935 | RT_FALL_THRU();
|
---|
4936 | case DIF_OP_LDSB:
|
---|
4937 | regs[rd] = (int8_t)dtrace_load8(regs[r1]);
|
---|
4938 | break;
|
---|
4939 | case DIF_OP_RLDSH:
|
---|
4940 | if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
|
---|
4941 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4942 | *illval = regs[r1];
|
---|
4943 | break;
|
---|
4944 | }
|
---|
4945 | RT_FALL_THRU();
|
---|
4946 | case DIF_OP_LDSH:
|
---|
4947 | regs[rd] = (int16_t)dtrace_load16(regs[r1]);
|
---|
4948 | break;
|
---|
4949 | case DIF_OP_RLDSW:
|
---|
4950 | if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
|
---|
4951 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4952 | *illval = regs[r1];
|
---|
4953 | break;
|
---|
4954 | }
|
---|
4955 | RT_FALL_THRU();
|
---|
4956 | case DIF_OP_LDSW:
|
---|
4957 | regs[rd] = (int32_t)dtrace_load32(regs[r1]);
|
---|
4958 | break;
|
---|
4959 | case DIF_OP_RLDUB:
|
---|
4960 | if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
|
---|
4961 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4962 | *illval = regs[r1];
|
---|
4963 | break;
|
---|
4964 | }
|
---|
4965 | RT_FALL_THRU();
|
---|
4966 | case DIF_OP_LDUB:
|
---|
4967 | regs[rd] = dtrace_load8(regs[r1]);
|
---|
4968 | break;
|
---|
4969 | case DIF_OP_RLDUH:
|
---|
4970 | if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
|
---|
4971 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4972 | *illval = regs[r1];
|
---|
4973 | break;
|
---|
4974 | }
|
---|
4975 | RT_FALL_THRU();
|
---|
4976 | case DIF_OP_LDUH:
|
---|
4977 | regs[rd] = dtrace_load16(regs[r1]);
|
---|
4978 | break;
|
---|
4979 | case DIF_OP_RLDUW:
|
---|
4980 | if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
|
---|
4981 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4982 | *illval = regs[r1];
|
---|
4983 | break;
|
---|
4984 | }
|
---|
4985 | RT_FALL_THRU();
|
---|
4986 | case DIF_OP_LDUW:
|
---|
4987 | regs[rd] = dtrace_load32(regs[r1]);
|
---|
4988 | break;
|
---|
4989 | case DIF_OP_RLDX:
|
---|
4990 | if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
|
---|
4991 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4992 | *illval = regs[r1];
|
---|
4993 | break;
|
---|
4994 | }
|
---|
4995 | RT_FALL_THRU();
|
---|
4996 | case DIF_OP_LDX:
|
---|
4997 | regs[rd] = dtrace_load64(regs[r1]);
|
---|
4998 | break;
|
---|
4999 | case DIF_OP_ULDSB:
|
---|
5000 | regs[rd] = (int8_t)
|
---|
5001 | dtrace_fuword8((void *)(uintptr_t)regs[r1]);
|
---|
5002 | break;
|
---|
5003 | case DIF_OP_ULDSH:
|
---|
5004 | regs[rd] = (int16_t)
|
---|
5005 | dtrace_fuword16((void *)(uintptr_t)regs[r1]);
|
---|
5006 | break;
|
---|
5007 | case DIF_OP_ULDSW:
|
---|
5008 | regs[rd] = (int32_t)
|
---|
5009 | dtrace_fuword32((void *)(uintptr_t)regs[r1]);
|
---|
5010 | break;
|
---|
5011 | case DIF_OP_ULDUB:
|
---|
5012 | regs[rd] =
|
---|
5013 | dtrace_fuword8((void *)(uintptr_t)regs[r1]);
|
---|
5014 | break;
|
---|
5015 | case DIF_OP_ULDUH:
|
---|
5016 | regs[rd] =
|
---|
5017 | dtrace_fuword16((void *)(uintptr_t)regs[r1]);
|
---|
5018 | break;
|
---|
5019 | case DIF_OP_ULDUW:
|
---|
5020 | regs[rd] =
|
---|
5021 | dtrace_fuword32((void *)(uintptr_t)regs[r1]);
|
---|
5022 | break;
|
---|
5023 | case DIF_OP_ULDX:
|
---|
5024 | regs[rd] =
|
---|
5025 | dtrace_fuword64((void *)(uintptr_t)regs[r1]);
|
---|
5026 | break;
|
---|
5027 | case DIF_OP_RET:
|
---|
5028 | rval = regs[rd];
|
---|
5029 | pc = textlen;
|
---|
5030 | break;
|
---|
5031 | case DIF_OP_NOP:
|
---|
5032 | break;
|
---|
5033 | case DIF_OP_SETX:
|
---|
5034 | regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
|
---|
5035 | break;
|
---|
5036 | case DIF_OP_SETS:
|
---|
5037 | regs[rd] = (uint64_t)(uintptr_t)
|
---|
5038 | (strtab + DIF_INSTR_STRING(instr));
|
---|
5039 | break;
|
---|
5040 | case DIF_OP_SCMP: {
|
---|
5041 | size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
5042 | uintptr_t s1 = regs[r1];
|
---|
5043 | uintptr_t s2 = regs[r2];
|
---|
5044 |
|
---|
5045 | if (s1 != NULL &&
|
---|
5046 | !dtrace_strcanload(s1, sz, mstate, vstate))
|
---|
5047 | break;
|
---|
5048 | if (s2 != NULL &&
|
---|
5049 | !dtrace_strcanload(s2, sz, mstate, vstate))
|
---|
5050 | break;
|
---|
5051 |
|
---|
5052 | cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz);
|
---|
5053 |
|
---|
5054 | cc_n = cc_r < 0;
|
---|
5055 | cc_z = cc_r == 0;
|
---|
5056 | cc_v = cc_c = 0;
|
---|
5057 | break;
|
---|
5058 | }
|
---|
5059 | case DIF_OP_LDGA:
|
---|
5060 | regs[rd] = dtrace_dif_variable(mstate, state,
|
---|
5061 | r1, regs[r2]);
|
---|
5062 | break;
|
---|
5063 | case DIF_OP_LDGS:
|
---|
5064 | id = DIF_INSTR_VAR(instr);
|
---|
5065 |
|
---|
5066 | if (id >= DIF_VAR_OTHER_UBASE) {
|
---|
5067 | uintptr_t a;
|
---|
5068 |
|
---|
5069 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5070 | svar = vstate->dtvs_globals[id];
|
---|
5071 | ASSERT(svar != NULL);
|
---|
5072 | v = &svar->dtsv_var;
|
---|
5073 |
|
---|
5074 | if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
|
---|
5075 | regs[rd] = svar->dtsv_data;
|
---|
5076 | break;
|
---|
5077 | }
|
---|
5078 |
|
---|
5079 | a = (uintptr_t)svar->dtsv_data;
|
---|
5080 |
|
---|
5081 | if (*(uint8_t *)a == UINT8_MAX) {
|
---|
5082 | /*
|
---|
5083 | * If the 0th byte is set to UINT8_MAX
|
---|
5084 | * then this is to be treated as a
|
---|
5085 | * reference to a NULL variable.
|
---|
5086 | */
|
---|
5087 | regs[rd] = NULL;
|
---|
5088 | } else {
|
---|
5089 | regs[rd] = a + sizeof (uint64_t);
|
---|
5090 | }
|
---|
5091 |
|
---|
5092 | break;
|
---|
5093 | }
|
---|
5094 |
|
---|
5095 | regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
|
---|
5096 | break;
|
---|
5097 |
|
---|
5098 | case DIF_OP_STGS:
|
---|
5099 | id = DIF_INSTR_VAR(instr);
|
---|
5100 |
|
---|
5101 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5102 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5103 |
|
---|
5104 | svar = vstate->dtvs_globals[id];
|
---|
5105 | ASSERT(svar != NULL);
|
---|
5106 | v = &svar->dtsv_var;
|
---|
5107 |
|
---|
5108 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5109 | uintptr_t a = (uintptr_t)svar->dtsv_data;
|
---|
5110 |
|
---|
5111 | ASSERT(a != NULL);
|
---|
5112 | ASSERT(svar->dtsv_size != 0);
|
---|
5113 |
|
---|
5114 | if (regs[rd] == NULL) {
|
---|
5115 | *(uint8_t *)a = UINT8_MAX;
|
---|
5116 | break;
|
---|
5117 | } else {
|
---|
5118 | *(uint8_t *)a = 0;
|
---|
5119 | a += sizeof (uint64_t);
|
---|
5120 | }
|
---|
5121 | if (!dtrace_vcanload(
|
---|
5122 | (void *)(uintptr_t)regs[rd], &v->dtdv_type,
|
---|
5123 | mstate, vstate))
|
---|
5124 | break;
|
---|
5125 |
|
---|
5126 | dtrace_vcopy((void *)(uintptr_t)regs[rd],
|
---|
5127 | (void *)a, &v->dtdv_type);
|
---|
5128 | break;
|
---|
5129 | }
|
---|
5130 |
|
---|
5131 | svar->dtsv_data = regs[rd];
|
---|
5132 | break;
|
---|
5133 |
|
---|
5134 | case DIF_OP_LDTA:
|
---|
5135 | /*
|
---|
5136 | * There are no DTrace built-in thread-local arrays at
|
---|
5137 | * present. This opcode is saved for future work.
|
---|
5138 | */
|
---|
5139 | *flags |= CPU_DTRACE_ILLOP;
|
---|
5140 | regs[rd] = 0;
|
---|
5141 | break;
|
---|
5142 |
|
---|
5143 | case DIF_OP_LDLS:
|
---|
5144 | id = DIF_INSTR_VAR(instr);
|
---|
5145 |
|
---|
5146 | if (id < DIF_VAR_OTHER_UBASE) {
|
---|
5147 | /*
|
---|
5148 | * For now, this has no meaning.
|
---|
5149 | */
|
---|
5150 | regs[rd] = 0;
|
---|
5151 | break;
|
---|
5152 | }
|
---|
5153 |
|
---|
5154 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5155 |
|
---|
5156 | ASSERT(VBDTCAST(int64_t)id < vstate->dtvs_nlocals);
|
---|
5157 | ASSERT(vstate->dtvs_locals != NULL);
|
---|
5158 |
|
---|
5159 | svar = vstate->dtvs_locals[id];
|
---|
5160 | ASSERT(svar != NULL);
|
---|
5161 | v = &svar->dtsv_var;
|
---|
5162 |
|
---|
5163 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5164 | uintptr_t a = (uintptr_t)svar->dtsv_data;
|
---|
5165 | size_t sz = v->dtdv_type.dtdt_size;
|
---|
5166 |
|
---|
5167 | sz += sizeof (uint64_t);
|
---|
5168 | ASSERT(svar->dtsv_size == NCPU * sz);
|
---|
5169 | a += VBDT_GET_CPUID() * sz;
|
---|
5170 |
|
---|
5171 | if (*(uint8_t *)a == UINT8_MAX) {
|
---|
5172 | /*
|
---|
5173 | * If the 0th byte is set to UINT8_MAX
|
---|
5174 | * then this is to be treated as a
|
---|
5175 | * reference to a NULL variable.
|
---|
5176 | */
|
---|
5177 | regs[rd] = NULL;
|
---|
5178 | } else {
|
---|
5179 | regs[rd] = a + sizeof (uint64_t);
|
---|
5180 | }
|
---|
5181 |
|
---|
5182 | break;
|
---|
5183 | }
|
---|
5184 |
|
---|
5185 | ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
|
---|
5186 | tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
|
---|
5187 | regs[rd] = tmp[VBDT_GET_CPUID()];
|
---|
5188 | break;
|
---|
5189 |
|
---|
5190 | case DIF_OP_STLS:
|
---|
5191 | id = DIF_INSTR_VAR(instr);
|
---|
5192 |
|
---|
5193 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5194 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5195 | ASSERT(VBDTCAST(int64_t)id < vstate->dtvs_nlocals);
|
---|
5196 |
|
---|
5197 | ASSERT(vstate->dtvs_locals != NULL);
|
---|
5198 | svar = vstate->dtvs_locals[id];
|
---|
5199 | ASSERT(svar != NULL);
|
---|
5200 | v = &svar->dtsv_var;
|
---|
5201 |
|
---|
5202 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5203 | uintptr_t a = (uintptr_t)svar->dtsv_data;
|
---|
5204 | size_t sz = v->dtdv_type.dtdt_size;
|
---|
5205 |
|
---|
5206 | sz += sizeof (uint64_t);
|
---|
5207 | ASSERT(svar->dtsv_size == NCPU * sz);
|
---|
5208 | a += VBDT_GET_CPUID() * sz;
|
---|
5209 |
|
---|
5210 | if (regs[rd] == NULL) {
|
---|
5211 | *(uint8_t *)a = UINT8_MAX;
|
---|
5212 | break;
|
---|
5213 | } else {
|
---|
5214 | *(uint8_t *)a = 0;
|
---|
5215 | a += sizeof (uint64_t);
|
---|
5216 | }
|
---|
5217 |
|
---|
5218 | if (!dtrace_vcanload(
|
---|
5219 | (void *)(uintptr_t)regs[rd], &v->dtdv_type,
|
---|
5220 | mstate, vstate))
|
---|
5221 | break;
|
---|
5222 |
|
---|
5223 | dtrace_vcopy((void *)(uintptr_t)regs[rd],
|
---|
5224 | (void *)a, &v->dtdv_type);
|
---|
5225 | break;
|
---|
5226 | }
|
---|
5227 |
|
---|
5228 | ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
|
---|
5229 | tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
|
---|
5230 | tmp[VBDT_GET_CPUID()] = regs[rd];
|
---|
5231 | break;
|
---|
5232 |
|
---|
5233 | case DIF_OP_LDTS: {
|
---|
5234 | dtrace_dynvar_t *dvar;
|
---|
5235 | dtrace_key_t *key;
|
---|
5236 |
|
---|
5237 | id = DIF_INSTR_VAR(instr);
|
---|
5238 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5239 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5240 | v = &vstate->dtvs_tlocals[id];
|
---|
5241 |
|
---|
5242 | key = &tupregs[DIF_DTR_NREGS];
|
---|
5243 | key[0].dttk_value = (uint64_t)id;
|
---|
5244 | key[0].dttk_size = 0;
|
---|
5245 | DTRACE_TLS_THRKEY(key[1].dttk_value);
|
---|
5246 | key[1].dttk_size = 0;
|
---|
5247 |
|
---|
5248 | dvar = dtrace_dynvar(dstate, 2, key,
|
---|
5249 | sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
|
---|
5250 | mstate, vstate);
|
---|
5251 |
|
---|
5252 | if (dvar == NULL) {
|
---|
5253 | regs[rd] = 0;
|
---|
5254 | break;
|
---|
5255 | }
|
---|
5256 |
|
---|
5257 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5258 | regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
|
---|
5259 | } else {
|
---|
5260 | regs[rd] = *((uint64_t *)dvar->dtdv_data);
|
---|
5261 | }
|
---|
5262 |
|
---|
5263 | break;
|
---|
5264 | }
|
---|
5265 |
|
---|
5266 | case DIF_OP_STTS: {
|
---|
5267 | dtrace_dynvar_t *dvar;
|
---|
5268 | dtrace_key_t *key;
|
---|
5269 |
|
---|
5270 | id = DIF_INSTR_VAR(instr);
|
---|
5271 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5272 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5273 |
|
---|
5274 | key = &tupregs[DIF_DTR_NREGS];
|
---|
5275 | key[0].dttk_value = (uint64_t)id;
|
---|
5276 | key[0].dttk_size = 0;
|
---|
5277 | DTRACE_TLS_THRKEY(key[1].dttk_value);
|
---|
5278 | key[1].dttk_size = 0;
|
---|
5279 | v = &vstate->dtvs_tlocals[id];
|
---|
5280 |
|
---|
5281 | dvar = dtrace_dynvar(dstate, 2, key,
|
---|
5282 | v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
|
---|
5283 | v->dtdv_type.dtdt_size : sizeof (uint64_t),
|
---|
5284 | regs[rd] ? DTRACE_DYNVAR_ALLOC :
|
---|
5285 | DTRACE_DYNVAR_DEALLOC, mstate, vstate);
|
---|
5286 |
|
---|
5287 | /*
|
---|
5288 | * Given that we're storing to thread-local data,
|
---|
5289 | * we need to flush our predicate cache.
|
---|
5290 | */
|
---|
5291 | curthread->t_predcache = NULL;
|
---|
5292 |
|
---|
5293 | if (dvar == NULL)
|
---|
5294 | break;
|
---|
5295 |
|
---|
5296 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5297 | if (!dtrace_vcanload(
|
---|
5298 | (void *)(uintptr_t)regs[rd],
|
---|
5299 | &v->dtdv_type, mstate, vstate))
|
---|
5300 | break;
|
---|
5301 |
|
---|
5302 | dtrace_vcopy((void *)(uintptr_t)regs[rd],
|
---|
5303 | dvar->dtdv_data, &v->dtdv_type);
|
---|
5304 | } else {
|
---|
5305 | *((uint64_t *)dvar->dtdv_data) = regs[rd];
|
---|
5306 | }
|
---|
5307 |
|
---|
5308 | break;
|
---|
5309 | }
|
---|
5310 |
|
---|
5311 | case DIF_OP_SRA:
|
---|
5312 | regs[rd] = (int64_t)regs[r1] >> regs[r2];
|
---|
5313 | break;
|
---|
5314 |
|
---|
5315 | case DIF_OP_CALL:
|
---|
5316 | dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
|
---|
5317 | regs, tupregs, ttop, mstate, state);
|
---|
5318 | break;
|
---|
5319 |
|
---|
5320 | case DIF_OP_PUSHTR:
|
---|
5321 | if (ttop == DIF_DTR_NREGS) {
|
---|
5322 | *flags |= CPU_DTRACE_TUPOFLOW;
|
---|
5323 | break;
|
---|
5324 | }
|
---|
5325 |
|
---|
5326 | if (r1 == DIF_TYPE_STRING) {
|
---|
5327 | /*
|
---|
5328 | * If this is a string type and the size is 0,
|
---|
5329 | * we'll use the system-wide default string
|
---|
5330 | * size. Note that we are _not_ looking at
|
---|
5331 | * the value of the DTRACEOPT_STRSIZE option;
|
---|
5332 | * had this been set, we would expect to have
|
---|
5333 | * a non-zero size value in the "pushtr".
|
---|
5334 | */
|
---|
5335 | tupregs[ttop].dttk_size =
|
---|
5336 | dtrace_strlen((char *)(uintptr_t)regs[rd],
|
---|
5337 | regs[r2] ? regs[r2] :
|
---|
5338 | dtrace_strsize_default) + 1;
|
---|
5339 | } else {
|
---|
5340 | tupregs[ttop].dttk_size = regs[r2];
|
---|
5341 | }
|
---|
5342 |
|
---|
5343 | tupregs[ttop++].dttk_value = regs[rd];
|
---|
5344 | break;
|
---|
5345 |
|
---|
5346 | case DIF_OP_PUSHTV:
|
---|
5347 | if (ttop == DIF_DTR_NREGS) {
|
---|
5348 | *flags |= CPU_DTRACE_TUPOFLOW;
|
---|
5349 | break;
|
---|
5350 | }
|
---|
5351 |
|
---|
5352 | tupregs[ttop].dttk_value = regs[rd];
|
---|
5353 | tupregs[ttop++].dttk_size = 0;
|
---|
5354 | break;
|
---|
5355 |
|
---|
5356 | case DIF_OP_POPTS:
|
---|
5357 | if (ttop != 0)
|
---|
5358 | ttop--;
|
---|
5359 | break;
|
---|
5360 |
|
---|
5361 | case DIF_OP_FLUSHTS:
|
---|
5362 | ttop = 0;
|
---|
5363 | break;
|
---|
5364 |
|
---|
5365 | case DIF_OP_LDGAA:
|
---|
5366 | case DIF_OP_LDTAA: {
|
---|
5367 | dtrace_dynvar_t *dvar;
|
---|
5368 | dtrace_key_t *key = tupregs;
|
---|
5369 | uint_t nkeys = ttop;
|
---|
5370 |
|
---|
5371 | id = DIF_INSTR_VAR(instr);
|
---|
5372 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5373 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5374 |
|
---|
5375 | key[nkeys].dttk_value = (uint64_t)id;
|
---|
5376 | key[nkeys++].dttk_size = 0;
|
---|
5377 |
|
---|
5378 | if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
|
---|
5379 | DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
|
---|
5380 | key[nkeys++].dttk_size = 0;
|
---|
5381 | v = &vstate->dtvs_tlocals[id];
|
---|
5382 | } else {
|
---|
5383 | v = &vstate->dtvs_globals[id]->dtsv_var;
|
---|
5384 | }
|
---|
5385 |
|
---|
5386 | dvar = dtrace_dynvar(dstate, nkeys, key,
|
---|
5387 | v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
|
---|
5388 | v->dtdv_type.dtdt_size : sizeof (uint64_t),
|
---|
5389 | DTRACE_DYNVAR_NOALLOC, mstate, vstate);
|
---|
5390 |
|
---|
5391 | if (dvar == NULL) {
|
---|
5392 | regs[rd] = 0;
|
---|
5393 | break;
|
---|
5394 | }
|
---|
5395 |
|
---|
5396 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5397 | regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
|
---|
5398 | } else {
|
---|
5399 | regs[rd] = *((uint64_t *)dvar->dtdv_data);
|
---|
5400 | }
|
---|
5401 |
|
---|
5402 | break;
|
---|
5403 | }
|
---|
5404 |
|
---|
5405 | case DIF_OP_STGAA:
|
---|
5406 | case DIF_OP_STTAA: {
|
---|
5407 | dtrace_dynvar_t *dvar;
|
---|
5408 | dtrace_key_t *key = tupregs;
|
---|
5409 | uint_t nkeys = ttop;
|
---|
5410 |
|
---|
5411 | id = DIF_INSTR_VAR(instr);
|
---|
5412 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5413 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5414 |
|
---|
5415 | key[nkeys].dttk_value = (uint64_t)id;
|
---|
5416 | key[nkeys++].dttk_size = 0;
|
---|
5417 |
|
---|
5418 | if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
|
---|
5419 | DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
|
---|
5420 | key[nkeys++].dttk_size = 0;
|
---|
5421 | v = &vstate->dtvs_tlocals[id];
|
---|
5422 | } else {
|
---|
5423 | v = &vstate->dtvs_globals[id]->dtsv_var;
|
---|
5424 | }
|
---|
5425 |
|
---|
5426 | dvar = dtrace_dynvar(dstate, nkeys, key,
|
---|
5427 | v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
|
---|
5428 | v->dtdv_type.dtdt_size : sizeof (uint64_t),
|
---|
5429 | regs[rd] ? DTRACE_DYNVAR_ALLOC :
|
---|
5430 | DTRACE_DYNVAR_DEALLOC, mstate, vstate);
|
---|
5431 |
|
---|
5432 | if (dvar == NULL)
|
---|
5433 | break;
|
---|
5434 |
|
---|
5435 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5436 | if (!dtrace_vcanload(
|
---|
5437 | (void *)(uintptr_t)regs[rd], &v->dtdv_type,
|
---|
5438 | mstate, vstate))
|
---|
5439 | break;
|
---|
5440 |
|
---|
5441 | dtrace_vcopy((void *)(uintptr_t)regs[rd],
|
---|
5442 | dvar->dtdv_data, &v->dtdv_type);
|
---|
5443 | } else {
|
---|
5444 | *((uint64_t *)dvar->dtdv_data) = regs[rd];
|
---|
5445 | }
|
---|
5446 |
|
---|
5447 | break;
|
---|
5448 | }
|
---|
5449 |
|
---|
5450 | case DIF_OP_ALLOCS: {
|
---|
5451 | uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
|
---|
5452 | size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
|
---|
5453 |
|
---|
5454 | /*
|
---|
5455 | * Rounding up the user allocation size could have
|
---|
5456 | * overflowed large, bogus allocations (like -1ULL) to
|
---|
5457 | * 0.
|
---|
5458 | */
|
---|
5459 | if (size < regs[r1] ||
|
---|
5460 | !DTRACE_INSCRATCH(mstate, size)) {
|
---|
5461 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
5462 | regs[rd] = NULL;
|
---|
5463 | break;
|
---|
5464 | }
|
---|
5465 |
|
---|
5466 | dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
|
---|
5467 | mstate->dtms_scratch_ptr += size;
|
---|
5468 | regs[rd] = ptr;
|
---|
5469 | break;
|
---|
5470 | }
|
---|
5471 |
|
---|
5472 | case DIF_OP_COPYS:
|
---|
5473 | if (!dtrace_canstore(regs[rd], regs[r2],
|
---|
5474 | mstate, vstate)) {
|
---|
5475 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5476 | *illval = regs[rd];
|
---|
5477 | break;
|
---|
5478 | }
|
---|
5479 |
|
---|
5480 | if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
|
---|
5481 | break;
|
---|
5482 |
|
---|
5483 | dtrace_bcopy((void *)(uintptr_t)regs[r1],
|
---|
5484 | (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
|
---|
5485 | break;
|
---|
5486 |
|
---|
5487 | case DIF_OP_STB:
|
---|
5488 | if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
|
---|
5489 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5490 | *illval = regs[rd];
|
---|
5491 | break;
|
---|
5492 | }
|
---|
5493 | *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
|
---|
5494 | break;
|
---|
5495 |
|
---|
5496 | case DIF_OP_STH:
|
---|
5497 | if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
|
---|
5498 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5499 | *illval = regs[rd];
|
---|
5500 | break;
|
---|
5501 | }
|
---|
5502 | if (regs[rd] & 1) {
|
---|
5503 | *flags |= CPU_DTRACE_BADALIGN;
|
---|
5504 | *illval = regs[rd];
|
---|
5505 | break;
|
---|
5506 | }
|
---|
5507 | *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
|
---|
5508 | break;
|
---|
5509 |
|
---|
5510 | case DIF_OP_STW:
|
---|
5511 | if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
|
---|
5512 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5513 | *illval = regs[rd];
|
---|
5514 | break;
|
---|
5515 | }
|
---|
5516 | if (regs[rd] & 3) {
|
---|
5517 | *flags |= CPU_DTRACE_BADALIGN;
|
---|
5518 | *illval = regs[rd];
|
---|
5519 | break;
|
---|
5520 | }
|
---|
5521 | *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
|
---|
5522 | break;
|
---|
5523 |
|
---|
5524 | case DIF_OP_STX:
|
---|
5525 | if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
|
---|
5526 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5527 | *illval = regs[rd];
|
---|
5528 | break;
|
---|
5529 | }
|
---|
5530 | if (regs[rd] & 7) {
|
---|
5531 | *flags |= CPU_DTRACE_BADALIGN;
|
---|
5532 | *illval = regs[rd];
|
---|
5533 | break;
|
---|
5534 | }
|
---|
5535 | *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
|
---|
5536 | break;
|
---|
5537 | }
|
---|
5538 | }
|
---|
5539 |
|
---|
5540 | if (!(*flags & CPU_DTRACE_FAULT))
|
---|
5541 | return (rval);
|
---|
5542 |
|
---|
5543 | mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
|
---|
5544 | mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
|
---|
5545 |
|
---|
5546 | return (0);
|
---|
5547 | }
|
---|
5548 |
|
---|
5549 | #ifndef VBOX /* no destructive stuff */
|
---|
5550 |
|
---|
5551 | static void
|
---|
5552 | dtrace_action_breakpoint(dtrace_ecb_t *ecb)
|
---|
5553 | {
|
---|
5554 | dtrace_probe_t *probe = ecb->dte_probe;
|
---|
5555 | dtrace_provider_t *prov = probe->dtpr_provider;
|
---|
5556 | char c[DTRACE_FULLNAMELEN + 80], *str;
|
---|
5557 | char *msg = "dtrace: breakpoint action at probe ";
|
---|
5558 | char *ecbmsg = " (ecb ";
|
---|
5559 | uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
|
---|
5560 | uintptr_t val = (uintptr_t)ecb;
|
---|
5561 | int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
|
---|
5562 |
|
---|
5563 | if (dtrace_destructive_disallow)
|
---|
5564 | return;
|
---|
5565 |
|
---|
5566 | /*
|
---|
5567 | * It's impossible to be taking action on the NULL probe.
|
---|
5568 | */
|
---|
5569 | ASSERT(probe != NULL);
|
---|
5570 |
|
---|
5571 | /*
|
---|
5572 | * This is a poor man's (destitute man's?) sprintf(): we want to
|
---|
5573 | * print the provider name, module name, function name and name of
|
---|
5574 | * the probe, along with the hex address of the ECB with the breakpoint
|
---|
5575 | * action -- all of which we must place in the character buffer by
|
---|
5576 | * hand.
|
---|
5577 | */
|
---|
5578 | while (*msg != '\0')
|
---|
5579 | c[i++] = *msg++;
|
---|
5580 |
|
---|
5581 | for (str = prov->dtpv_name; *str != '\0'; str++)
|
---|
5582 | c[i++] = *str;
|
---|
5583 | c[i++] = ':';
|
---|
5584 |
|
---|
5585 | for (str = probe->dtpr_mod; *str != '\0'; str++)
|
---|
5586 | c[i++] = *str;
|
---|
5587 | c[i++] = ':';
|
---|
5588 |
|
---|
5589 | for (str = probe->dtpr_func; *str != '\0'; str++)
|
---|
5590 | c[i++] = *str;
|
---|
5591 | c[i++] = ':';
|
---|
5592 |
|
---|
5593 | for (str = probe->dtpr_name; *str != '\0'; str++)
|
---|
5594 | c[i++] = *str;
|
---|
5595 |
|
---|
5596 | while (*ecbmsg != '\0')
|
---|
5597 | c[i++] = *ecbmsg++;
|
---|
5598 |
|
---|
5599 | while (shift >= 0) {
|
---|
5600 | mask = (uintptr_t)0xf << shift;
|
---|
5601 |
|
---|
5602 | if (val >= ((uintptr_t)1 << shift))
|
---|
5603 | c[i++] = "0123456789abcdef"[(val & mask) >> shift];
|
---|
5604 | shift -= 4;
|
---|
5605 | }
|
---|
5606 |
|
---|
5607 | c[i++] = ')';
|
---|
5608 | c[i] = '\0';
|
---|
5609 |
|
---|
5610 | debug_enter(c);
|
---|
5611 | }
|
---|
5612 |
|
---|
5613 | static void
|
---|
5614 | dtrace_action_panic(dtrace_ecb_t *ecb)
|
---|
5615 | {
|
---|
5616 | dtrace_probe_t *probe = ecb->dte_probe;
|
---|
5617 |
|
---|
5618 | /*
|
---|
5619 | * It's impossible to be taking action on the NULL probe.
|
---|
5620 | */
|
---|
5621 | ASSERT(probe != NULL);
|
---|
5622 |
|
---|
5623 | if (dtrace_destructive_disallow)
|
---|
5624 | return;
|
---|
5625 |
|
---|
5626 | if (dtrace_panicked != NULL)
|
---|
5627 | return;
|
---|
5628 |
|
---|
5629 | if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL)
|
---|
5630 | return;
|
---|
5631 |
|
---|
5632 | /*
|
---|
5633 | * We won the right to panic. (We want to be sure that only one
|
---|
5634 | * thread calls panic() from dtrace_probe(), and that panic() is
|
---|
5635 | * called exactly once.)
|
---|
5636 | */
|
---|
5637 | dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
|
---|
5638 | probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
|
---|
5639 | probe->dtpr_func, probe->dtpr_name, (void *)ecb);
|
---|
5640 | }
|
---|
5641 |
|
---|
5642 | static void
|
---|
5643 | dtrace_action_raise(uint64_t sig)
|
---|
5644 | {
|
---|
5645 | if (dtrace_destructive_disallow)
|
---|
5646 | return;
|
---|
5647 |
|
---|
5648 | if (sig >= NSIG) {
|
---|
5649 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
5650 | return;
|
---|
5651 | }
|
---|
5652 |
|
---|
5653 | /*
|
---|
5654 | * raise() has a queue depth of 1 -- we ignore all subsequent
|
---|
5655 | * invocations of the raise() action.
|
---|
5656 | */
|
---|
5657 | if (curthread->t_dtrace_sig == 0)
|
---|
5658 | curthread->t_dtrace_sig = (uint8_t)sig;
|
---|
5659 |
|
---|
5660 | curthread->t_sig_check = 1;
|
---|
5661 | aston(curthread);
|
---|
5662 | }
|
---|
5663 |
|
---|
5664 | static void
|
---|
5665 | dtrace_action_stop(void)
|
---|
5666 | {
|
---|
5667 | if (dtrace_destructive_disallow)
|
---|
5668 | return;
|
---|
5669 |
|
---|
5670 | if (!curthread->t_dtrace_stop) {
|
---|
5671 | curthread->t_dtrace_stop = 1;
|
---|
5672 | curthread->t_sig_check = 1;
|
---|
5673 | aston(curthread);
|
---|
5674 | }
|
---|
5675 | }
|
---|
5676 |
|
---|
5677 | static void
|
---|
5678 | dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
|
---|
5679 | {
|
---|
5680 | hrtime_t now;
|
---|
5681 | volatile uint16_t *flags;
|
---|
5682 | cpu_t *cpu = CPU;
|
---|
5683 |
|
---|
5684 | if (dtrace_destructive_disallow)
|
---|
5685 | return;
|
---|
5686 |
|
---|
5687 | flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags;
|
---|
5688 |
|
---|
5689 | now = dtrace_gethrtime();
|
---|
5690 |
|
---|
5691 | if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
|
---|
5692 | /*
|
---|
5693 | * We need to advance the mark to the current time.
|
---|
5694 | */
|
---|
5695 | cpu->cpu_dtrace_chillmark = now;
|
---|
5696 | cpu->cpu_dtrace_chilled = 0;
|
---|
5697 | }
|
---|
5698 |
|
---|
5699 | /*
|
---|
5700 | * Now check to see if the requested chill time would take us over
|
---|
5701 | * the maximum amount of time allowed in the chill interval. (Or
|
---|
5702 | * worse, if the calculation itself induces overflow.)
|
---|
5703 | */
|
---|
5704 | if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
|
---|
5705 | cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
|
---|
5706 | *flags |= CPU_DTRACE_ILLOP;
|
---|
5707 | return;
|
---|
5708 | }
|
---|
5709 |
|
---|
5710 | while (dtrace_gethrtime() - now < val)
|
---|
5711 | continue;
|
---|
5712 |
|
---|
5713 | /*
|
---|
5714 | * Normally, we assure that the value of the variable "timestamp" does
|
---|
5715 | * not change within an ECB. The presence of chill() represents an
|
---|
5716 | * exception to this rule, however.
|
---|
5717 | */
|
---|
5718 | mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
|
---|
5719 | cpu->cpu_dtrace_chilled += val;
|
---|
5720 | }
|
---|
5721 |
|
---|
5722 | #endif /* !VBOX */
|
---|
5723 |
|
---|
5724 | #ifndef VBOX
|
---|
5725 | static void
|
---|
5726 | dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
|
---|
5727 | uint64_t *buf, uint64_t arg)
|
---|
5728 | {
|
---|
5729 | int nframes = DTRACE_USTACK_NFRAMES(arg);
|
---|
5730 | int strsize = DTRACE_USTACK_STRSIZE(arg);
|
---|
5731 | uint64_t *pcs = &buf[1], *fps;
|
---|
5732 | char *str = (char *)&pcs[nframes];
|
---|
5733 | int size, offs = 0, i, j;
|
---|
5734 | uintptr_t old = mstate->dtms_scratch_ptr, saved;
|
---|
5735 | #ifndef VBOX
|
---|
5736 | uint16_t *flags = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
5737 | #else
|
---|
5738 | uint16_t volatile *flags = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
5739 | #endif
|
---|
5740 | char *sym;
|
---|
5741 |
|
---|
5742 | /*
|
---|
5743 | * Should be taking a faster path if string space has not been
|
---|
5744 | * allocated.
|
---|
5745 | */
|
---|
5746 | ASSERT(strsize != 0);
|
---|
5747 |
|
---|
5748 | /*
|
---|
5749 | * We will first allocate some temporary space for the frame pointers.
|
---|
5750 | */
|
---|
5751 | fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
|
---|
5752 | size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
|
---|
5753 | (nframes * sizeof (uint64_t));
|
---|
5754 |
|
---|
5755 | if (!DTRACE_INSCRATCH(mstate, VBDTCAST(unsigned)size)) {
|
---|
5756 | /*
|
---|
5757 | * Not enough room for our frame pointers -- need to indicate
|
---|
5758 | * that we ran out of scratch space.
|
---|
5759 | */
|
---|
5760 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
5761 | return;
|
---|
5762 | }
|
---|
5763 |
|
---|
5764 | mstate->dtms_scratch_ptr += size;
|
---|
5765 | saved = mstate->dtms_scratch_ptr;
|
---|
5766 |
|
---|
5767 | /*
|
---|
5768 | * Now get a stack with both program counters and frame pointers.
|
---|
5769 | */
|
---|
5770 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
5771 | dtrace_getufpstack(buf, fps, nframes + 1);
|
---|
5772 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
5773 |
|
---|
5774 | /*
|
---|
5775 | * If that faulted, we're cooked.
|
---|
5776 | */
|
---|
5777 | if (*flags & CPU_DTRACE_FAULT)
|
---|
5778 | goto out;
|
---|
5779 |
|
---|
5780 | /*
|
---|
5781 | * Now we want to walk up the stack, calling the USTACK helper. For
|
---|
5782 | * each iteration, we restore the scratch pointer.
|
---|
5783 | */
|
---|
5784 | for (i = 0; i < nframes; i++) {
|
---|
5785 | mstate->dtms_scratch_ptr = saved;
|
---|
5786 |
|
---|
5787 | if (offs >= strsize)
|
---|
5788 | break;
|
---|
5789 |
|
---|
5790 | #ifndef VBOX
|
---|
5791 | sym = (char *)(uintptr_t)dtrace_helper(
|
---|
5792 | DTRACE_HELPER_ACTION_USTACK,
|
---|
5793 | mstate, state, pcs[i], fps[i]);
|
---|
5794 | #else
|
---|
5795 | sym = NULL;
|
---|
5796 | #endif
|
---|
5797 |
|
---|
5798 | /*
|
---|
5799 | * If we faulted while running the helper, we're going to
|
---|
5800 | * clear the fault and null out the corresponding string.
|
---|
5801 | */
|
---|
5802 | if (*flags & CPU_DTRACE_FAULT) {
|
---|
5803 | *flags &= ~CPU_DTRACE_FAULT;
|
---|
5804 | str[offs++] = '\0';
|
---|
5805 | continue;
|
---|
5806 | }
|
---|
5807 |
|
---|
5808 | if (sym == NULL) {
|
---|
5809 | str[offs++] = '\0';
|
---|
5810 | continue;
|
---|
5811 | }
|
---|
5812 |
|
---|
5813 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
5814 |
|
---|
5815 | /*
|
---|
5816 | * Now copy in the string that the helper returned to us.
|
---|
5817 | */
|
---|
5818 | for (j = 0; offs + j < strsize; j++) {
|
---|
5819 | if ((str[offs + j] = sym[j]) == '\0')
|
---|
5820 | break;
|
---|
5821 | }
|
---|
5822 |
|
---|
5823 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
5824 |
|
---|
5825 | offs += j + 1;
|
---|
5826 | }
|
---|
5827 |
|
---|
5828 | if (offs >= strsize) {
|
---|
5829 | /*
|
---|
5830 | * If we didn't have room for all of the strings, we don't
|
---|
5831 | * abort processing -- this needn't be a fatal error -- but we
|
---|
5832 | * still want to increment a counter (dts_stkstroverflows) to
|
---|
5833 | * allow this condition to be warned about. (If this is from
|
---|
5834 | * a jstack() action, it is easily tuned via jstackstrsize.)
|
---|
5835 | */
|
---|
5836 | dtrace_error(&state->dts_stkstroverflows);
|
---|
5837 | }
|
---|
5838 |
|
---|
5839 | while (offs < strsize)
|
---|
5840 | str[offs++] = '\0';
|
---|
5841 |
|
---|
5842 | out:
|
---|
5843 | mstate->dtms_scratch_ptr = old;
|
---|
5844 | }
|
---|
5845 | #endif /* !VBOX */
|
---|
5846 |
|
---|
5847 | #ifdef VBOX
|
---|
5848 | extern void dtrace_probe6(dtrace_id_t, uintptr_t arg0, uintptr_t arg1,
|
---|
5849 | uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5);
|
---|
5850 | # define dtrace_probe_error(a1, a2, a3, a4, a5, a6) \
|
---|
5851 | dtrace_probe6(dtrace_probeid_error, (uintptr_t)a1, a2, a3, a4, a5, a6)
|
---|
5852 | #endif
|
---|
5853 |
|
---|
5854 | /*
|
---|
5855 | * If you're looking for the epicenter of DTrace, you just found it. This
|
---|
5856 | * is the function called by the provider to fire a probe -- from which all
|
---|
5857 | * subsequent probe-context DTrace activity emanates.
|
---|
5858 | */
|
---|
5859 | void
|
---|
5860 | dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
|
---|
5861 | uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
|
---|
5862 | {
|
---|
5863 | processorid_t cpuid;
|
---|
5864 | dtrace_icookie_t cookie;
|
---|
5865 | dtrace_probe_t *probe;
|
---|
5866 | dtrace_mstate_t mstate;
|
---|
5867 | dtrace_ecb_t *ecb;
|
---|
5868 | dtrace_action_t *act;
|
---|
5869 | intptr_t offs;
|
---|
5870 | size_t size;
|
---|
5871 | int vtime, onintr;
|
---|
5872 | volatile uint16_t *flags;
|
---|
5873 | hrtime_t now;
|
---|
5874 |
|
---|
5875 | #ifndef VBOX
|
---|
5876 | /*
|
---|
5877 | * Kick out immediately if this CPU is still being born (in which case
|
---|
5878 | * curthread will be set to -1) or the current thread can't allow
|
---|
5879 | * probes in its current context.
|
---|
5880 | */
|
---|
5881 | if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE))
|
---|
5882 | return;
|
---|
5883 | #endif
|
---|
5884 |
|
---|
5885 | cookie = dtrace_interrupt_disable();
|
---|
5886 | probe = dtrace_probes[id - 1];
|
---|
5887 | cpuid = VBDT_GET_CPUID();
|
---|
5888 | onintr = CPU_ON_INTR(CPU);
|
---|
5889 |
|
---|
5890 | if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
|
---|
5891 | probe->dtpr_predcache == curthread->t_predcache) {
|
---|
5892 | /*
|
---|
5893 | * We have hit in the predicate cache; we know that
|
---|
5894 | * this predicate would evaluate to be false.
|
---|
5895 | */
|
---|
5896 | dtrace_interrupt_enable(cookie);
|
---|
5897 | return;
|
---|
5898 | }
|
---|
5899 |
|
---|
5900 | #ifndef VBOX
|
---|
5901 | if (panic_quiesce) {
|
---|
5902 | /*
|
---|
5903 | * We don't trace anything if we're panicking.
|
---|
5904 | */
|
---|
5905 | dtrace_interrupt_enable(cookie);
|
---|
5906 | return;
|
---|
5907 | }
|
---|
5908 | #endif
|
---|
5909 |
|
---|
5910 | now = dtrace_gethrtime();
|
---|
5911 | vtime = dtrace_vtime_references != 0;
|
---|
5912 |
|
---|
5913 | if (vtime && curthread->t_dtrace_start)
|
---|
5914 | curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
|
---|
5915 |
|
---|
5916 | mstate.dtms_difo = NULL;
|
---|
5917 | mstate.dtms_probe = probe;
|
---|
5918 | mstate.dtms_strtok = NULL;
|
---|
5919 | mstate.dtms_arg[0] = arg0;
|
---|
5920 | mstate.dtms_arg[1] = arg1;
|
---|
5921 | mstate.dtms_arg[2] = arg2;
|
---|
5922 | mstate.dtms_arg[3] = arg3;
|
---|
5923 | mstate.dtms_arg[4] = arg4;
|
---|
5924 |
|
---|
5925 | flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
|
---|
5926 |
|
---|
5927 | for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
|
---|
5928 | dtrace_predicate_t *pred = ecb->dte_predicate;
|
---|
5929 | dtrace_state_t *state = ecb->dte_state;
|
---|
5930 | dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
|
---|
5931 | dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
|
---|
5932 | dtrace_vstate_t *vstate = &state->dts_vstate;
|
---|
5933 | dtrace_provider_t *prov = probe->dtpr_provider;
|
---|
5934 | int committed = 0;
|
---|
5935 | caddr_t tomax;
|
---|
5936 |
|
---|
5937 | /*
|
---|
5938 | * A little subtlety with the following (seemingly innocuous)
|
---|
5939 | * declaration of the automatic 'val': by looking at the
|
---|
5940 | * code, you might think that it could be declared in the
|
---|
5941 | * action processing loop, below. (That is, it's only used in
|
---|
5942 | * the action processing loop.) However, it must be declared
|
---|
5943 | * out of that scope because in the case of DIF expression
|
---|
5944 | * arguments to aggregating actions, one iteration of the
|
---|
5945 | * action loop will use the last iteration's value.
|
---|
5946 | */
|
---|
5947 | #ifdef lint
|
---|
5948 | uint64_t val = 0;
|
---|
5949 | #else
|
---|
5950 | uint64_t val VBDTUNASS(0);
|
---|
5951 | #endif
|
---|
5952 |
|
---|
5953 | mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
|
---|
5954 | *flags &= ~CPU_DTRACE_ERROR;
|
---|
5955 |
|
---|
5956 | if (prov == dtrace_provider) {
|
---|
5957 | /*
|
---|
5958 | * If dtrace itself is the provider of this probe,
|
---|
5959 | * we're only going to continue processing the ECB if
|
---|
5960 | * arg0 (the dtrace_state_t) is equal to the ECB's
|
---|
5961 | * creating state. (This prevents disjoint consumers
|
---|
5962 | * from seeing one another's metaprobes.)
|
---|
5963 | */
|
---|
5964 | if (arg0 != (uint64_t)(uintptr_t)state)
|
---|
5965 | continue;
|
---|
5966 | }
|
---|
5967 |
|
---|
5968 | if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
|
---|
5969 | /*
|
---|
5970 | * We're not currently active. If our provider isn't
|
---|
5971 | * the dtrace pseudo provider, we're not interested.
|
---|
5972 | */
|
---|
5973 | if (prov != dtrace_provider)
|
---|
5974 | continue;
|
---|
5975 |
|
---|
5976 | /*
|
---|
5977 | * Now we must further check if we are in the BEGIN
|
---|
5978 | * probe. If we are, we will only continue processing
|
---|
5979 | * if we're still in WARMUP -- if one BEGIN enabling
|
---|
5980 | * has invoked the exit() action, we don't want to
|
---|
5981 | * evaluate subsequent BEGIN enablings.
|
---|
5982 | */
|
---|
5983 | if (probe->dtpr_id == dtrace_probeid_begin &&
|
---|
5984 | state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
|
---|
5985 | ASSERT(state->dts_activity ==
|
---|
5986 | DTRACE_ACTIVITY_DRAINING);
|
---|
5987 | continue;
|
---|
5988 | }
|
---|
5989 | }
|
---|
5990 |
|
---|
5991 | if (ecb->dte_cond) {
|
---|
5992 | /*
|
---|
5993 | * If the dte_cond bits indicate that this
|
---|
5994 | * consumer is only allowed to see user-mode firings
|
---|
5995 | * of this probe, call the provider's dtps_usermode()
|
---|
5996 | * entry point to check that the probe was fired
|
---|
5997 | * while in a user context. Skip this ECB if that's
|
---|
5998 | * not the case.
|
---|
5999 | */
|
---|
6000 | if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
|
---|
6001 | prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
|
---|
6002 | probe->dtpr_id, probe->dtpr_arg) == 0)
|
---|
6003 | continue;
|
---|
6004 |
|
---|
6005 | /*
|
---|
6006 | * This is more subtle than it looks. We have to be
|
---|
6007 | * absolutely certain that CRED() isn't going to
|
---|
6008 | * change out from under us so it's only legit to
|
---|
6009 | * examine that structure if we're in constrained
|
---|
6010 | * situations. Currently, the only times we'll this
|
---|
6011 | * check is if a non-super-user has enabled the
|
---|
6012 | * profile or syscall providers -- providers that
|
---|
6013 | * allow visibility of all processes. For the
|
---|
6014 | * profile case, the check above will ensure that
|
---|
6015 | * we're examining a user context.
|
---|
6016 | */
|
---|
6017 | if (ecb->dte_cond & DTRACE_COND_OWNER) {
|
---|
6018 | cred_t *cr;
|
---|
6019 | cred_t *s_cr =
|
---|
6020 | ecb->dte_state->dts_cred.dcr_cred;
|
---|
6021 | #ifndef VBOX
|
---|
6022 | proc_t *proc;
|
---|
6023 | #endif
|
---|
6024 |
|
---|
6025 | ASSERT(s_cr != NULL);
|
---|
6026 |
|
---|
6027 | if ((cr = CRED()) == NULL ||
|
---|
6028 | s_cr->cr_uid != cr->cr_uid ||
|
---|
6029 | s_cr->cr_uid != cr->cr_ruid ||
|
---|
6030 | s_cr->cr_uid != cr->cr_suid ||
|
---|
6031 | s_cr->cr_gid != cr->cr_gid ||
|
---|
6032 | s_cr->cr_gid != cr->cr_rgid ||
|
---|
6033 | s_cr->cr_gid != cr->cr_sgid ||
|
---|
6034 | #ifndef VBOX
|
---|
6035 | (proc = VBDT_GET_PROC()) == NULL ||
|
---|
6036 | (proc->p_flag & SNOCD))
|
---|
6037 | #else
|
---|
6038 | 0)
|
---|
6039 |
|
---|
6040 | #endif
|
---|
6041 | continue;
|
---|
6042 | }
|
---|
6043 |
|
---|
6044 | #ifndef VBOX
|
---|
6045 | if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
|
---|
6046 | cred_t *cr;
|
---|
6047 | cred_t *s_cr =
|
---|
6048 | ecb->dte_state->dts_cred.dcr_cred;
|
---|
6049 |
|
---|
6050 | ASSERT(s_cr != NULL);
|
---|
6051 |
|
---|
6052 | if ((cr = CRED()) == NULL ||
|
---|
6053 | s_cr->cr_zone->zone_id !=
|
---|
6054 | cr->cr_zone->zone_id)
|
---|
6055 | continue;
|
---|
6056 | }
|
---|
6057 | #endif
|
---|
6058 | }
|
---|
6059 |
|
---|
6060 | if (now - state->dts_alive > dtrace_deadman_timeout) {
|
---|
6061 | /*
|
---|
6062 | * We seem to be dead. Unless we (a) have kernel
|
---|
6063 | * destructive permissions (b) have expicitly enabled
|
---|
6064 | * destructive actions and (c) destructive actions have
|
---|
6065 | * not been disabled, we're going to transition into
|
---|
6066 | * the KILLED state, from which no further processing
|
---|
6067 | * on this state will be performed.
|
---|
6068 | */
|
---|
6069 | if (!dtrace_priv_kernel_destructive(state) ||
|
---|
6070 | !state->dts_cred.dcr_destructive ||
|
---|
6071 | dtrace_destructive_disallow) {
|
---|
6072 | void *activity = &state->dts_activity;
|
---|
6073 | dtrace_activity_t current;
|
---|
6074 |
|
---|
6075 | do {
|
---|
6076 | current = state->dts_activity;
|
---|
6077 | } while ( (dtrace_activity_t)dtrace_cas32(activity, current, DTRACE_ACTIVITY_KILLED)
|
---|
6078 | != current);
|
---|
6079 |
|
---|
6080 | continue;
|
---|
6081 | }
|
---|
6082 | }
|
---|
6083 |
|
---|
6084 | if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
|
---|
6085 | ecb->dte_alignment, state, &mstate)) < 0)
|
---|
6086 | continue;
|
---|
6087 |
|
---|
6088 | tomax = buf->dtb_tomax;
|
---|
6089 | ASSERT(tomax != NULL);
|
---|
6090 |
|
---|
6091 | if (ecb->dte_size != 0)
|
---|
6092 | DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid);
|
---|
6093 |
|
---|
6094 | mstate.dtms_epid = ecb->dte_epid;
|
---|
6095 | mstate.dtms_present |= DTRACE_MSTATE_EPID;
|
---|
6096 |
|
---|
6097 | if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
|
---|
6098 | mstate.dtms_access = DTRACE_ACCESS_KERNEL;
|
---|
6099 | else
|
---|
6100 | mstate.dtms_access = 0;
|
---|
6101 |
|
---|
6102 | if (pred != NULL) {
|
---|
6103 | dtrace_difo_t *dp = pred->dtp_difo;
|
---|
6104 | int rval;
|
---|
6105 |
|
---|
6106 | rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
|
---|
6107 |
|
---|
6108 | if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
|
---|
6109 | dtrace_cacheid_t cid = probe->dtpr_predcache;
|
---|
6110 |
|
---|
6111 | if (cid != DTRACE_CACHEIDNONE && !onintr) {
|
---|
6112 | /*
|
---|
6113 | * Update the predicate cache...
|
---|
6114 | */
|
---|
6115 | ASSERT(cid == pred->dtp_cacheid);
|
---|
6116 | curthread->t_predcache = cid;
|
---|
6117 | }
|
---|
6118 |
|
---|
6119 | continue;
|
---|
6120 | }
|
---|
6121 | }
|
---|
6122 |
|
---|
6123 | for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
|
---|
6124 | act != NULL; act = act->dta_next) {
|
---|
6125 | size_t valoffs;
|
---|
6126 | dtrace_difo_t *dp;
|
---|
6127 | dtrace_recdesc_t *rec = &act->dta_rec;
|
---|
6128 |
|
---|
6129 | size = rec->dtrd_size;
|
---|
6130 | valoffs = offs + rec->dtrd_offset;
|
---|
6131 |
|
---|
6132 | if (DTRACEACT_ISAGG(act->dta_kind)) {
|
---|
6133 | uint64_t v = 0xbad;
|
---|
6134 | dtrace_aggregation_t *agg;
|
---|
6135 |
|
---|
6136 | agg = (dtrace_aggregation_t *)act;
|
---|
6137 |
|
---|
6138 | if ((dp = act->dta_difo) != NULL)
|
---|
6139 | v = dtrace_dif_emulate(dp,
|
---|
6140 | &mstate, vstate, state);
|
---|
6141 |
|
---|
6142 | if (*flags & CPU_DTRACE_ERROR)
|
---|
6143 | continue;
|
---|
6144 |
|
---|
6145 | /*
|
---|
6146 | * Note that we always pass the expression
|
---|
6147 | * value from the previous iteration of the
|
---|
6148 | * action loop. This value will only be used
|
---|
6149 | * if there is an expression argument to the
|
---|
6150 | * aggregating action, denoted by the
|
---|
6151 | * dtag_hasarg field.
|
---|
6152 | */
|
---|
6153 | dtrace_aggregate(agg, buf,
|
---|
6154 | offs, aggbuf, v, val);
|
---|
6155 | continue;
|
---|
6156 | }
|
---|
6157 |
|
---|
6158 | switch (act->dta_kind) {
|
---|
6159 | case DTRACEACT_STOP:
|
---|
6160 | #ifndef VBOX
|
---|
6161 | if (dtrace_priv_proc_destructive(state))
|
---|
6162 | dtrace_action_stop();
|
---|
6163 | #else
|
---|
6164 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
6165 | #endif
|
---|
6166 | continue;
|
---|
6167 |
|
---|
6168 | case DTRACEACT_BREAKPOINT:
|
---|
6169 | #ifndef VBOX
|
---|
6170 | if (dtrace_priv_kernel_destructive(state))
|
---|
6171 | dtrace_action_breakpoint(ecb);
|
---|
6172 | #else
|
---|
6173 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
6174 | #endif
|
---|
6175 | continue;
|
---|
6176 |
|
---|
6177 | case DTRACEACT_PANIC:
|
---|
6178 | #ifndef VBOX
|
---|
6179 | if (dtrace_priv_kernel_destructive(state))
|
---|
6180 | dtrace_action_panic(ecb);
|
---|
6181 | #endif
|
---|
6182 | continue;
|
---|
6183 |
|
---|
6184 | case DTRACEACT_STACK:
|
---|
6185 | if (!dtrace_priv_kernel(state))
|
---|
6186 | continue;
|
---|
6187 |
|
---|
6188 | dtrace_getpcstack((pc_t *)(tomax + valoffs),
|
---|
6189 | VBDTCAST(int)(size / sizeof (pc_t)), probe->dtpr_aframes,
|
---|
6190 | DTRACE_ANCHORED(probe) ? NULL :
|
---|
6191 | (uint32_t *)arg0);
|
---|
6192 |
|
---|
6193 | continue;
|
---|
6194 |
|
---|
6195 | case DTRACEACT_JSTACK:
|
---|
6196 | case DTRACEACT_USTACK:
|
---|
6197 | if (!dtrace_priv_proc(state))
|
---|
6198 | continue;
|
---|
6199 |
|
---|
6200 | /*
|
---|
6201 | * See comment in DIF_VAR_PID.
|
---|
6202 | */
|
---|
6203 | if (DTRACE_ANCHORED(mstate.dtms_probe) &&
|
---|
6204 | CPU_ON_INTR(CPU)) {
|
---|
6205 | int depth = DTRACE_USTACK_NFRAMES(
|
---|
6206 | rec->dtrd_arg) + 1;
|
---|
6207 |
|
---|
6208 | dtrace_bzero((void *)(tomax + valoffs),
|
---|
6209 | DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
|
---|
6210 | + depth * sizeof (uint64_t));
|
---|
6211 |
|
---|
6212 | continue;
|
---|
6213 | }
|
---|
6214 |
|
---|
6215 | #ifndef VBOX /* no helpers */
|
---|
6216 | if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
|
---|
6217 | curproc->p_dtrace_helpers != NULL) {
|
---|
6218 | /*
|
---|
6219 | * This is the slow path -- we have
|
---|
6220 | * allocated string space, and we're
|
---|
6221 | * getting the stack of a process that
|
---|
6222 | * has helpers. Call into a separate
|
---|
6223 | * routine to perform this processing.
|
---|
6224 | */
|
---|
6225 | dtrace_action_ustack(&mstate, state,
|
---|
6226 | (uint64_t *)(tomax + valoffs),
|
---|
6227 | rec->dtrd_arg);
|
---|
6228 | continue;
|
---|
6229 | }
|
---|
6230 | #endif
|
---|
6231 |
|
---|
6232 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
6233 | dtrace_getupcstack((uint64_t *)
|
---|
6234 | (tomax + valoffs),
|
---|
6235 | DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
|
---|
6236 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
6237 | continue;
|
---|
6238 |
|
---|
6239 | default:
|
---|
6240 | break;
|
---|
6241 | }
|
---|
6242 |
|
---|
6243 | dp = act->dta_difo;
|
---|
6244 | ASSERT(dp != NULL);
|
---|
6245 |
|
---|
6246 | val = dtrace_dif_emulate(dp, &mstate, vstate, state);
|
---|
6247 |
|
---|
6248 | if (*flags & CPU_DTRACE_ERROR)
|
---|
6249 | continue;
|
---|
6250 |
|
---|
6251 | switch (act->dta_kind) {
|
---|
6252 | case DTRACEACT_SPECULATE:
|
---|
6253 | ASSERT(buf == &state->dts_buffer[cpuid]);
|
---|
6254 | buf = dtrace_speculation_buffer(state,
|
---|
6255 | cpuid, val);
|
---|
6256 |
|
---|
6257 | if (buf == NULL) {
|
---|
6258 | *flags |= CPU_DTRACE_DROP;
|
---|
6259 | continue;
|
---|
6260 | }
|
---|
6261 |
|
---|
6262 | offs = dtrace_buffer_reserve(buf,
|
---|
6263 | ecb->dte_needed, ecb->dte_alignment,
|
---|
6264 | state, NULL);
|
---|
6265 |
|
---|
6266 | if (offs < 0) {
|
---|
6267 | *flags |= CPU_DTRACE_DROP;
|
---|
6268 | continue;
|
---|
6269 | }
|
---|
6270 |
|
---|
6271 | tomax = buf->dtb_tomax;
|
---|
6272 | ASSERT(tomax != NULL);
|
---|
6273 |
|
---|
6274 | if (ecb->dte_size != 0)
|
---|
6275 | DTRACE_STORE(uint32_t, tomax, offs,
|
---|
6276 | ecb->dte_epid);
|
---|
6277 | continue;
|
---|
6278 |
|
---|
6279 | case DTRACEACT_CHILL:
|
---|
6280 | #ifndef VBOX
|
---|
6281 | if (dtrace_priv_kernel_destructive(state))
|
---|
6282 | dtrace_action_chill(&mstate, val);
|
---|
6283 | #else
|
---|
6284 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
6285 | #endif
|
---|
6286 | continue;
|
---|
6287 |
|
---|
6288 | case DTRACEACT_RAISE:
|
---|
6289 | #ifndef VBOX
|
---|
6290 | if (dtrace_priv_proc_destructive(state))
|
---|
6291 | dtrace_action_raise(val);
|
---|
6292 | #else
|
---|
6293 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
6294 | #endif
|
---|
6295 | continue;
|
---|
6296 |
|
---|
6297 | case DTRACEACT_COMMIT:
|
---|
6298 | ASSERT(!committed);
|
---|
6299 |
|
---|
6300 | /*
|
---|
6301 | * We need to commit our buffer state.
|
---|
6302 | */
|
---|
6303 | if (ecb->dte_size)
|
---|
6304 | buf->dtb_offset = offs + ecb->dte_size;
|
---|
6305 | buf = &state->dts_buffer[cpuid];
|
---|
6306 | dtrace_speculation_commit(state, cpuid, val);
|
---|
6307 | committed = 1;
|
---|
6308 | continue;
|
---|
6309 |
|
---|
6310 | case DTRACEACT_DISCARD:
|
---|
6311 | dtrace_speculation_discard(state, cpuid, val);
|
---|
6312 | continue;
|
---|
6313 |
|
---|
6314 | case DTRACEACT_DIFEXPR:
|
---|
6315 | case DTRACEACT_LIBACT:
|
---|
6316 | case DTRACEACT_PRINTF:
|
---|
6317 | case DTRACEACT_PRINTA:
|
---|
6318 | case DTRACEACT_SYSTEM:
|
---|
6319 | case DTRACEACT_FREOPEN:
|
---|
6320 | break;
|
---|
6321 |
|
---|
6322 | case DTRACEACT_SYM:
|
---|
6323 | case DTRACEACT_MOD:
|
---|
6324 | if (!dtrace_priv_kernel(state))
|
---|
6325 | continue;
|
---|
6326 | break;
|
---|
6327 |
|
---|
6328 | case DTRACEACT_USYM:
|
---|
6329 | case DTRACEACT_UMOD:
|
---|
6330 | case DTRACEACT_UADDR: {
|
---|
6331 | #ifndef VBOX
|
---|
6332 | struct pid *pid = curthread->t_procp->p_pidp;
|
---|
6333 |
|
---|
6334 | if (!dtrace_priv_proc(state))
|
---|
6335 | continue;
|
---|
6336 |
|
---|
6337 | DTRACE_STORE(uint64_t, tomax,
|
---|
6338 | valoffs, (uint64_t)pid->pid_id);
|
---|
6339 | DTRACE_STORE(uint64_t, tomax,
|
---|
6340 | valoffs + sizeof (uint64_t), val);
|
---|
6341 | #else
|
---|
6342 | DTRACE_CPUFLAG_SET(CPU_DTRACE_UPRIV);
|
---|
6343 | #endif
|
---|
6344 | continue;
|
---|
6345 | }
|
---|
6346 |
|
---|
6347 | case DTRACEACT_EXIT: {
|
---|
6348 | /*
|
---|
6349 | * For the exit action, we are going to attempt
|
---|
6350 | * to atomically set our activity to be
|
---|
6351 | * draining. If this fails (either because
|
---|
6352 | * another CPU has beat us to the exit action,
|
---|
6353 | * or because our current activity is something
|
---|
6354 | * other than ACTIVE or WARMUP), we will
|
---|
6355 | * continue. This assures that the exit action
|
---|
6356 | * can be successfully recorded at most once
|
---|
6357 | * when we're in the ACTIVE state. If we're
|
---|
6358 | * encountering the exit() action while in
|
---|
6359 | * COOLDOWN, however, we want to honor the new
|
---|
6360 | * status code. (We know that we're the only
|
---|
6361 | * thread in COOLDOWN, so there is no race.)
|
---|
6362 | */
|
---|
6363 | void *activity = &state->dts_activity;
|
---|
6364 | dtrace_activity_t current = state->dts_activity;
|
---|
6365 |
|
---|
6366 | if (current == DTRACE_ACTIVITY_COOLDOWN)
|
---|
6367 | break;
|
---|
6368 |
|
---|
6369 | if (current != DTRACE_ACTIVITY_WARMUP)
|
---|
6370 | current = DTRACE_ACTIVITY_ACTIVE;
|
---|
6371 |
|
---|
6372 | if ( (dtrace_activity_t)dtrace_cas32(activity, current, DTRACE_ACTIVITY_DRAINING)
|
---|
6373 | != current) {
|
---|
6374 | *flags |= CPU_DTRACE_DROP;
|
---|
6375 | continue;
|
---|
6376 | }
|
---|
6377 |
|
---|
6378 | break;
|
---|
6379 | }
|
---|
6380 |
|
---|
6381 | default:
|
---|
6382 | #ifndef VBOX
|
---|
6383 | ASSERT(0);
|
---|
6384 | #else
|
---|
6385 | AssertFatalMsgFailed(("%d\n", act->dta_kind));
|
---|
6386 | #endif
|
---|
6387 | }
|
---|
6388 |
|
---|
6389 | if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) {
|
---|
6390 | uintptr_t end = valoffs + size;
|
---|
6391 |
|
---|
6392 | if (!dtrace_vcanload((void *)(uintptr_t)val,
|
---|
6393 | &dp->dtdo_rtype, &mstate, vstate))
|
---|
6394 | continue;
|
---|
6395 |
|
---|
6396 | /*
|
---|
6397 | * If this is a string, we're going to only
|
---|
6398 | * load until we find the zero byte -- after
|
---|
6399 | * which we'll store zero bytes.
|
---|
6400 | */
|
---|
6401 | if (dp->dtdo_rtype.dtdt_kind ==
|
---|
6402 | DIF_TYPE_STRING) {
|
---|
6403 | char c = '\0' + 1;
|
---|
6404 | int intuple = act->dta_intuple;
|
---|
6405 | size_t s;
|
---|
6406 |
|
---|
6407 | for (s = 0; s < size; s++) {
|
---|
6408 | if (c != '\0')
|
---|
6409 | c = dtrace_load8(val++);
|
---|
6410 |
|
---|
6411 | DTRACE_STORE(uint8_t, tomax,
|
---|
6412 | valoffs++, c);
|
---|
6413 |
|
---|
6414 | if (c == '\0' && intuple)
|
---|
6415 | break;
|
---|
6416 | }
|
---|
6417 |
|
---|
6418 | continue;
|
---|
6419 | }
|
---|
6420 |
|
---|
6421 | while (valoffs < end) {
|
---|
6422 | DTRACE_STORE(uint8_t, tomax, valoffs++,
|
---|
6423 | dtrace_load8(val++));
|
---|
6424 | }
|
---|
6425 |
|
---|
6426 | continue;
|
---|
6427 | }
|
---|
6428 |
|
---|
6429 | switch (size) {
|
---|
6430 | case 0:
|
---|
6431 | break;
|
---|
6432 |
|
---|
6433 | case sizeof (uint8_t):
|
---|
6434 | DTRACE_STORE(uint8_t, tomax, valoffs, val);
|
---|
6435 | break;
|
---|
6436 | case sizeof (uint16_t):
|
---|
6437 | DTRACE_STORE(uint16_t, tomax, valoffs, val);
|
---|
6438 | break;
|
---|
6439 | case sizeof (uint32_t):
|
---|
6440 | DTRACE_STORE(uint32_t, tomax, valoffs, val);
|
---|
6441 | break;
|
---|
6442 | case sizeof (uint64_t):
|
---|
6443 | DTRACE_STORE(uint64_t, tomax, valoffs, val);
|
---|
6444 | break;
|
---|
6445 | default:
|
---|
6446 | /*
|
---|
6447 | * Any other size should have been returned by
|
---|
6448 | * reference, not by value.
|
---|
6449 | */
|
---|
6450 | #ifndef VBOX
|
---|
6451 | ASSERT(0);
|
---|
6452 | #else
|
---|
6453 | AssertFatalMsgFailed(("%zu\n", size));
|
---|
6454 | #endif
|
---|
6455 | break;
|
---|
6456 | }
|
---|
6457 | }
|
---|
6458 |
|
---|
6459 | if (*flags & CPU_DTRACE_DROP)
|
---|
6460 | continue;
|
---|
6461 |
|
---|
6462 | if (*flags & CPU_DTRACE_FAULT) {
|
---|
6463 | int ndx;
|
---|
6464 | dtrace_action_t *err;
|
---|
6465 |
|
---|
6466 | buf->dtb_errors++;
|
---|
6467 |
|
---|
6468 | if (probe->dtpr_id == dtrace_probeid_error) {
|
---|
6469 | /*
|
---|
6470 | * There's nothing we can do -- we had an
|
---|
6471 | * error on the error probe. We bump an
|
---|
6472 | * error counter to at least indicate that
|
---|
6473 | * this condition happened.
|
---|
6474 | */
|
---|
6475 | dtrace_error(&state->dts_dblerrors);
|
---|
6476 | continue;
|
---|
6477 | }
|
---|
6478 |
|
---|
6479 | if (vtime) {
|
---|
6480 | /*
|
---|
6481 | * Before recursing on dtrace_probe(), we
|
---|
6482 | * need to explicitly clear out our start
|
---|
6483 | * time to prevent it from being accumulated
|
---|
6484 | * into t_dtrace_vtime.
|
---|
6485 | */
|
---|
6486 | curthread->t_dtrace_start = 0;
|
---|
6487 | }
|
---|
6488 |
|
---|
6489 | /*
|
---|
6490 | * Iterate over the actions to figure out which action
|
---|
6491 | * we were processing when we experienced the error.
|
---|
6492 | * Note that act points _past_ the faulting action; if
|
---|
6493 | * act is ecb->dte_action, the fault was in the
|
---|
6494 | * predicate, if it's ecb->dte_action->dta_next it's
|
---|
6495 | * in action #1, and so on.
|
---|
6496 | */
|
---|
6497 | for (err = ecb->dte_action, ndx = 0;
|
---|
6498 | err != act; err = err->dta_next, ndx++)
|
---|
6499 | continue;
|
---|
6500 |
|
---|
6501 | dtrace_probe_error(state, ecb->dte_epid, ndx,
|
---|
6502 | (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
|
---|
6503 | mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
|
---|
6504 | cpu_core[cpuid].cpuc_dtrace_illval);
|
---|
6505 |
|
---|
6506 | continue;
|
---|
6507 | }
|
---|
6508 |
|
---|
6509 | if (!committed)
|
---|
6510 | buf->dtb_offset = offs + ecb->dte_size;
|
---|
6511 | }
|
---|
6512 |
|
---|
6513 | if (vtime)
|
---|
6514 | curthread->t_dtrace_start = dtrace_gethrtime();
|
---|
6515 |
|
---|
6516 | dtrace_interrupt_enable(cookie);
|
---|
6517 | }
|
---|
6518 |
|
---|
6519 | /*
|
---|
6520 | * DTrace Probe Hashing Functions
|
---|
6521 | *
|
---|
6522 | * The functions in this section (and indeed, the functions in remaining
|
---|
6523 | * sections) are not _called_ from probe context. (Any exceptions to this are
|
---|
6524 | * marked with a "Note:".) Rather, they are called from elsewhere in the
|
---|
6525 | * DTrace framework to look-up probes in, add probes to and remove probes from
|
---|
6526 | * the DTrace probe hashes. (Each probe is hashed by each element of the
|
---|
6527 | * probe tuple -- allowing for fast lookups, regardless of what was
|
---|
6528 | * specified.)
|
---|
6529 | */
|
---|
6530 | static uint_t
|
---|
6531 | dtrace_hash_str(char *p)
|
---|
6532 | {
|
---|
6533 | unsigned int g;
|
---|
6534 | uint_t hval = 0;
|
---|
6535 |
|
---|
6536 | while (*p) {
|
---|
6537 | hval = (hval << 4) + *p++;
|
---|
6538 | if ((g = (hval & 0xf0000000)) != 0)
|
---|
6539 | hval ^= g >> 24;
|
---|
6540 | hval &= ~g;
|
---|
6541 | }
|
---|
6542 | return (hval);
|
---|
6543 | }
|
---|
6544 |
|
---|
6545 | static dtrace_hash_t *
|
---|
6546 | dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs)
|
---|
6547 | {
|
---|
6548 | dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
|
---|
6549 |
|
---|
6550 | hash->dth_stroffs = stroffs;
|
---|
6551 | hash->dth_nextoffs = nextoffs;
|
---|
6552 | hash->dth_prevoffs = prevoffs;
|
---|
6553 |
|
---|
6554 | hash->dth_size = 1;
|
---|
6555 | hash->dth_mask = hash->dth_size - 1;
|
---|
6556 |
|
---|
6557 | hash->dth_tab = kmem_zalloc(hash->dth_size *
|
---|
6558 | sizeof (dtrace_hashbucket_t *), KM_SLEEP);
|
---|
6559 |
|
---|
6560 | return (hash);
|
---|
6561 | }
|
---|
6562 |
|
---|
6563 | static void
|
---|
6564 | dtrace_hash_destroy(dtrace_hash_t *hash)
|
---|
6565 | {
|
---|
6566 | #ifdef DEBUG
|
---|
6567 | int i;
|
---|
6568 |
|
---|
6569 | for (i = 0; i < hash->dth_size; i++)
|
---|
6570 | ASSERT(hash->dth_tab[i] == NULL);
|
---|
6571 | #endif
|
---|
6572 |
|
---|
6573 | kmem_free(hash->dth_tab,
|
---|
6574 | hash->dth_size * sizeof (dtrace_hashbucket_t *));
|
---|
6575 | kmem_free(hash, sizeof (dtrace_hash_t));
|
---|
6576 | }
|
---|
6577 |
|
---|
6578 | static void
|
---|
6579 | dtrace_hash_resize(dtrace_hash_t *hash)
|
---|
6580 | {
|
---|
6581 | int size = hash->dth_size, i, ndx;
|
---|
6582 | int new_size = hash->dth_size << 1;
|
---|
6583 | int new_mask = new_size - 1;
|
---|
6584 | dtrace_hashbucket_t **new_tab, *bucket, *next;
|
---|
6585 |
|
---|
6586 | ASSERT((new_size & new_mask) == 0);
|
---|
6587 |
|
---|
6588 | new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
|
---|
6589 |
|
---|
6590 | for (i = 0; i < size; i++) {
|
---|
6591 | for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
|
---|
6592 | dtrace_probe_t *probe = bucket->dthb_chain;
|
---|
6593 |
|
---|
6594 | ASSERT(probe != NULL);
|
---|
6595 | ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
|
---|
6596 |
|
---|
6597 | next = bucket->dthb_next;
|
---|
6598 | bucket->dthb_next = new_tab[ndx];
|
---|
6599 | new_tab[ndx] = bucket;
|
---|
6600 | }
|
---|
6601 | }
|
---|
6602 |
|
---|
6603 | kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
|
---|
6604 | hash->dth_tab = new_tab;
|
---|
6605 | hash->dth_size = new_size;
|
---|
6606 | hash->dth_mask = new_mask;
|
---|
6607 | }
|
---|
6608 |
|
---|
6609 | static void
|
---|
6610 | dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
|
---|
6611 | {
|
---|
6612 | int hashval = DTRACE_HASHSTR(hash, new);
|
---|
6613 | int ndx = hashval & hash->dth_mask;
|
---|
6614 | dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
|
---|
6615 | dtrace_probe_t **nextp, **prevp;
|
---|
6616 |
|
---|
6617 | for (; bucket != NULL; bucket = bucket->dthb_next) {
|
---|
6618 | if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
|
---|
6619 | goto add;
|
---|
6620 | }
|
---|
6621 |
|
---|
6622 | if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
|
---|
6623 | dtrace_hash_resize(hash);
|
---|
6624 | dtrace_hash_add(hash, new);
|
---|
6625 | return;
|
---|
6626 | }
|
---|
6627 |
|
---|
6628 | bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
|
---|
6629 | bucket->dthb_next = hash->dth_tab[ndx];
|
---|
6630 | hash->dth_tab[ndx] = bucket;
|
---|
6631 | hash->dth_nbuckets++;
|
---|
6632 |
|
---|
6633 | add:
|
---|
6634 | nextp = DTRACE_HASHNEXT(hash, new);
|
---|
6635 | ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
|
---|
6636 | *nextp = bucket->dthb_chain;
|
---|
6637 |
|
---|
6638 | if (bucket->dthb_chain != NULL) {
|
---|
6639 | prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
|
---|
6640 | ASSERT(*prevp == NULL);
|
---|
6641 | *prevp = new;
|
---|
6642 | }
|
---|
6643 |
|
---|
6644 | bucket->dthb_chain = new;
|
---|
6645 | bucket->dthb_len++;
|
---|
6646 | }
|
---|
6647 |
|
---|
6648 | static dtrace_probe_t *
|
---|
6649 | dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
|
---|
6650 | {
|
---|
6651 | int hashval = DTRACE_HASHSTR(hash, template);
|
---|
6652 | int ndx = hashval & hash->dth_mask;
|
---|
6653 | dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
|
---|
6654 |
|
---|
6655 | for (; bucket != NULL; bucket = bucket->dthb_next) {
|
---|
6656 | if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
|
---|
6657 | return (bucket->dthb_chain);
|
---|
6658 | }
|
---|
6659 |
|
---|
6660 | return (NULL);
|
---|
6661 | }
|
---|
6662 |
|
---|
6663 | static int
|
---|
6664 | dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
|
---|
6665 | {
|
---|
6666 | int hashval = DTRACE_HASHSTR(hash, template);
|
---|
6667 | int ndx = hashval & hash->dth_mask;
|
---|
6668 | dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
|
---|
6669 |
|
---|
6670 | for (; bucket != NULL; bucket = bucket->dthb_next) {
|
---|
6671 | if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
|
---|
6672 | return (bucket->dthb_len);
|
---|
6673 | }
|
---|
6674 |
|
---|
6675 | return (NULL);
|
---|
6676 | }
|
---|
6677 |
|
---|
6678 | static void
|
---|
6679 | dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
|
---|
6680 | {
|
---|
6681 | int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
|
---|
6682 | dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
|
---|
6683 |
|
---|
6684 | dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
|
---|
6685 | dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
|
---|
6686 |
|
---|
6687 | /*
|
---|
6688 | * Find the bucket that we're removing this probe from.
|
---|
6689 | */
|
---|
6690 | for (; bucket != NULL; bucket = bucket->dthb_next) {
|
---|
6691 | if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
|
---|
6692 | break;
|
---|
6693 | }
|
---|
6694 |
|
---|
6695 | ASSERT(bucket != NULL);
|
---|
6696 |
|
---|
6697 | if (*prevp == NULL) {
|
---|
6698 | if (*nextp == NULL) {
|
---|
6699 | /*
|
---|
6700 | * The removed probe was the only probe on this
|
---|
6701 | * bucket; we need to remove the bucket.
|
---|
6702 | */
|
---|
6703 | dtrace_hashbucket_t *b = hash->dth_tab[ndx];
|
---|
6704 |
|
---|
6705 | ASSERT(bucket->dthb_chain == probe);
|
---|
6706 | ASSERT(b != NULL);
|
---|
6707 |
|
---|
6708 | if (b == bucket) {
|
---|
6709 | hash->dth_tab[ndx] = bucket->dthb_next;
|
---|
6710 | } else {
|
---|
6711 | while (b->dthb_next != bucket)
|
---|
6712 | b = b->dthb_next;
|
---|
6713 | b->dthb_next = bucket->dthb_next;
|
---|
6714 | }
|
---|
6715 |
|
---|
6716 | ASSERT(hash->dth_nbuckets > 0);
|
---|
6717 | hash->dth_nbuckets--;
|
---|
6718 | kmem_free(bucket, sizeof (dtrace_hashbucket_t));
|
---|
6719 | return;
|
---|
6720 | }
|
---|
6721 |
|
---|
6722 | bucket->dthb_chain = *nextp;
|
---|
6723 | } else {
|
---|
6724 | *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
|
---|
6725 | }
|
---|
6726 |
|
---|
6727 | if (*nextp != NULL)
|
---|
6728 | *(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
|
---|
6729 | }
|
---|
6730 |
|
---|
6731 | /*
|
---|
6732 | * DTrace Utility Functions
|
---|
6733 | *
|
---|
6734 | * These are random utility functions that are _not_ called from probe context.
|
---|
6735 | */
|
---|
6736 | static int
|
---|
6737 | dtrace_badattr(const dtrace_attribute_t *a)
|
---|
6738 | {
|
---|
6739 | return (a->dtat_name > DTRACE_STABILITY_MAX ||
|
---|
6740 | a->dtat_data > DTRACE_STABILITY_MAX ||
|
---|
6741 | a->dtat_class > DTRACE_CLASS_MAX);
|
---|
6742 | }
|
---|
6743 |
|
---|
6744 | /*
|
---|
6745 | * Return a duplicate copy of a string. If the specified string is NULL,
|
---|
6746 | * this function returns a zero-length string.
|
---|
6747 | */
|
---|
6748 | static char *
|
---|
6749 | dtrace_strdup(const char *str)
|
---|
6750 | {
|
---|
6751 | char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP);
|
---|
6752 |
|
---|
6753 | if (str != NULL)
|
---|
6754 | (void) strcpy(new, str);
|
---|
6755 |
|
---|
6756 | return (new);
|
---|
6757 | }
|
---|
6758 |
|
---|
6759 | #define DTRACE_ISALPHA(c) \
|
---|
6760 | (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
|
---|
6761 |
|
---|
6762 | static int
|
---|
6763 | dtrace_badname(const char *s)
|
---|
6764 | {
|
---|
6765 | char c;
|
---|
6766 |
|
---|
6767 | if (s == NULL || (c = *s++) == '\0')
|
---|
6768 | return (0);
|
---|
6769 |
|
---|
6770 | if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
|
---|
6771 | return (1);
|
---|
6772 |
|
---|
6773 | while ((c = *s++) != '\0') {
|
---|
6774 | if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
|
---|
6775 | c != '-' && c != '_' && c != '.' && c != '`')
|
---|
6776 | return (1);
|
---|
6777 | }
|
---|
6778 |
|
---|
6779 | return (0);
|
---|
6780 | }
|
---|
6781 |
|
---|
6782 | static void
|
---|
6783 | dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
|
---|
6784 | {
|
---|
6785 | uint32_t priv;
|
---|
6786 |
|
---|
6787 | if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
|
---|
6788 | /*
|
---|
6789 | * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
|
---|
6790 | */
|
---|
6791 | priv = DTRACE_PRIV_ALL;
|
---|
6792 | #ifdef VBOX
|
---|
6793 | *uidp = UINT32_MAX;
|
---|
6794 | *zoneidp = 0;
|
---|
6795 | #endif
|
---|
6796 | } else {
|
---|
6797 | *uidp = crgetuid(cr);
|
---|
6798 | *zoneidp = crgetzoneid(cr);
|
---|
6799 |
|
---|
6800 | priv = 0;
|
---|
6801 | if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
|
---|
6802 | priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
|
---|
6803 | else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
|
---|
6804 | priv |= DTRACE_PRIV_USER;
|
---|
6805 | if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
|
---|
6806 | priv |= DTRACE_PRIV_PROC;
|
---|
6807 | if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
|
---|
6808 | priv |= DTRACE_PRIV_OWNER;
|
---|
6809 | if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
|
---|
6810 | priv |= DTRACE_PRIV_ZONEOWNER;
|
---|
6811 | }
|
---|
6812 |
|
---|
6813 | *privp = priv;
|
---|
6814 | }
|
---|
6815 |
|
---|
6816 | #ifdef DTRACE_ERRDEBUG
|
---|
6817 | static void
|
---|
6818 | dtrace_errdebug(const char *str)
|
---|
6819 | {
|
---|
6820 | int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ;
|
---|
6821 | int occupied = 0;
|
---|
6822 |
|
---|
6823 | mutex_enter(&dtrace_errlock);
|
---|
6824 | dtrace_errlast = str;
|
---|
6825 | dtrace_errthread = curthread;
|
---|
6826 |
|
---|
6827 | while (occupied++ < DTRACE_ERRHASHSZ) {
|
---|
6828 | if (dtrace_errhash[hval].dter_msg == str) {
|
---|
6829 | dtrace_errhash[hval].dter_count++;
|
---|
6830 | goto out;
|
---|
6831 | }
|
---|
6832 |
|
---|
6833 | if (dtrace_errhash[hval].dter_msg != NULL) {
|
---|
6834 | hval = (hval + 1) % DTRACE_ERRHASHSZ;
|
---|
6835 | continue;
|
---|
6836 | }
|
---|
6837 |
|
---|
6838 | dtrace_errhash[hval].dter_msg = str;
|
---|
6839 | dtrace_errhash[hval].dter_count = 1;
|
---|
6840 | goto out;
|
---|
6841 | }
|
---|
6842 |
|
---|
6843 | panic("dtrace: undersized error hash");
|
---|
6844 | out:
|
---|
6845 | mutex_exit(&dtrace_errlock);
|
---|
6846 | }
|
---|
6847 | #endif
|
---|
6848 |
|
---|
6849 | /*
|
---|
6850 | * DTrace Matching Functions
|
---|
6851 | *
|
---|
6852 | * These functions are used to match groups of probes, given some elements of
|
---|
6853 | * a probe tuple, or some globbed expressions for elements of a probe tuple.
|
---|
6854 | */
|
---|
6855 | static int
|
---|
6856 | dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
|
---|
6857 | zoneid_t zoneid)
|
---|
6858 | {
|
---|
6859 | if (priv != DTRACE_PRIV_ALL) {
|
---|
6860 | uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
|
---|
6861 | uint32_t match = priv & ppriv;
|
---|
6862 |
|
---|
6863 | /*
|
---|
6864 | * No PRIV_DTRACE_* privileges...
|
---|
6865 | */
|
---|
6866 | if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
|
---|
6867 | DTRACE_PRIV_KERNEL)) == 0)
|
---|
6868 | return (0);
|
---|
6869 |
|
---|
6870 | /*
|
---|
6871 | * No matching bits, but there were bits to match...
|
---|
6872 | */
|
---|
6873 | if (match == 0 && ppriv != 0)
|
---|
6874 | return (0);
|
---|
6875 |
|
---|
6876 | /*
|
---|
6877 | * Need to have permissions to the process, but don't...
|
---|
6878 | */
|
---|
6879 | if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
|
---|
6880 | uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
|
---|
6881 | return (0);
|
---|
6882 | }
|
---|
6883 |
|
---|
6884 | /*
|
---|
6885 | * Need to be in the same zone unless we possess the
|
---|
6886 | * privilege to examine all zones.
|
---|
6887 | */
|
---|
6888 | if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
|
---|
6889 | zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
|
---|
6890 | return (0);
|
---|
6891 | }
|
---|
6892 | }
|
---|
6893 |
|
---|
6894 | return (1);
|
---|
6895 | }
|
---|
6896 |
|
---|
6897 | /*
|
---|
6898 | * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
|
---|
6899 | * consists of input pattern strings and an ops-vector to evaluate them.
|
---|
6900 | * This function returns >0 for match, 0 for no match, and <0 for error.
|
---|
6901 | */
|
---|
6902 | static int
|
---|
6903 | dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
|
---|
6904 | uint32_t priv, uid_t uid, zoneid_t zoneid)
|
---|
6905 | {
|
---|
6906 | dtrace_provider_t *pvp = prp->dtpr_provider;
|
---|
6907 | int rv;
|
---|
6908 |
|
---|
6909 | if (pvp->dtpv_defunct)
|
---|
6910 | return (0);
|
---|
6911 |
|
---|
6912 | if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
|
---|
6913 | return (rv);
|
---|
6914 |
|
---|
6915 | if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
|
---|
6916 | return (rv);
|
---|
6917 |
|
---|
6918 | if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
|
---|
6919 | return (rv);
|
---|
6920 |
|
---|
6921 | if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
|
---|
6922 | return (rv);
|
---|
6923 |
|
---|
6924 | if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
|
---|
6925 | return (0);
|
---|
6926 |
|
---|
6927 | return (rv);
|
---|
6928 | }
|
---|
6929 |
|
---|
6930 | /*
|
---|
6931 | * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
|
---|
6932 | * interface for matching a glob pattern 'p' to an input string 's'. Unlike
|
---|
6933 | * libc's version, the kernel version only applies to 8-bit ASCII strings.
|
---|
6934 | * In addition, all of the recursion cases except for '*' matching have been
|
---|
6935 | * unwound. For '*', we still implement recursive evaluation, but a depth
|
---|
6936 | * counter is maintained and matching is aborted if we recurse too deep.
|
---|
6937 | * The function returns 0 if no match, >0 if match, and <0 if recursion error.
|
---|
6938 | */
|
---|
6939 | static int
|
---|
6940 | dtrace_match_glob(const char *s, const char *p, int depth)
|
---|
6941 | {
|
---|
6942 | const char *olds;
|
---|
6943 | char s1, c;
|
---|
6944 | int gs;
|
---|
6945 |
|
---|
6946 | if (depth > DTRACE_PROBEKEY_MAXDEPTH)
|
---|
6947 | return (-1);
|
---|
6948 |
|
---|
6949 | if (s == NULL)
|
---|
6950 | s = ""; /* treat NULL as empty string */
|
---|
6951 |
|
---|
6952 | top:
|
---|
6953 | olds = s;
|
---|
6954 | s1 = *s++;
|
---|
6955 |
|
---|
6956 | if (p == NULL)
|
---|
6957 | return (0);
|
---|
6958 |
|
---|
6959 | if ((c = *p++) == '\0')
|
---|
6960 | return (s1 == '\0');
|
---|
6961 |
|
---|
6962 | switch (c) {
|
---|
6963 | case '[': {
|
---|
6964 | int ok = 0, notflag = 0;
|
---|
6965 | char lc = '\0';
|
---|
6966 |
|
---|
6967 | if (s1 == '\0')
|
---|
6968 | return (0);
|
---|
6969 |
|
---|
6970 | if (*p == '!') {
|
---|
6971 | notflag = 1;
|
---|
6972 | p++;
|
---|
6973 | }
|
---|
6974 |
|
---|
6975 | if ((c = *p++) == '\0')
|
---|
6976 | return (0);
|
---|
6977 |
|
---|
6978 | do {
|
---|
6979 | if (c == '-' && lc != '\0' && *p != ']') {
|
---|
6980 | if ((c = *p++) == '\0')
|
---|
6981 | return (0);
|
---|
6982 | if (c == '\\' && (c = *p++) == '\0')
|
---|
6983 | return (0);
|
---|
6984 |
|
---|
6985 | if (notflag) {
|
---|
6986 | if (s1 < lc || s1 > c)
|
---|
6987 | ok++;
|
---|
6988 | else
|
---|
6989 | return (0);
|
---|
6990 | } else if (lc <= s1 && s1 <= c)
|
---|
6991 | ok++;
|
---|
6992 |
|
---|
6993 | } else if (c == '\\' && (c = *p++) == '\0')
|
---|
6994 | return (0);
|
---|
6995 |
|
---|
6996 | lc = c; /* save left-hand 'c' for next iteration */
|
---|
6997 |
|
---|
6998 | if (notflag) {
|
---|
6999 | if (s1 != c)
|
---|
7000 | ok++;
|
---|
7001 | else
|
---|
7002 | return (0);
|
---|
7003 | } else if (s1 == c)
|
---|
7004 | ok++;
|
---|
7005 |
|
---|
7006 | if ((c = *p++) == '\0')
|
---|
7007 | return (0);
|
---|
7008 |
|
---|
7009 | } while (c != ']');
|
---|
7010 |
|
---|
7011 | if (ok)
|
---|
7012 | goto top;
|
---|
7013 |
|
---|
7014 | return (0);
|
---|
7015 | }
|
---|
7016 |
|
---|
7017 | case '\\':
|
---|
7018 | if ((c = *p++) == '\0')
|
---|
7019 | return (0);
|
---|
7020 | RT_FALL_THRU();
|
---|
7021 |
|
---|
7022 | default:
|
---|
7023 | if (c != s1)
|
---|
7024 | return (0);
|
---|
7025 | RT_FALL_THRU();
|
---|
7026 |
|
---|
7027 | case '?':
|
---|
7028 | if (s1 != '\0')
|
---|
7029 | goto top;
|
---|
7030 | return (0);
|
---|
7031 |
|
---|
7032 | case '*':
|
---|
7033 | while (*p == '*')
|
---|
7034 | p++; /* consecutive *'s are identical to a single one */
|
---|
7035 |
|
---|
7036 | if (*p == '\0')
|
---|
7037 | return (1);
|
---|
7038 |
|
---|
7039 | for (s = olds; *s != '\0'; s++) {
|
---|
7040 | if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
|
---|
7041 | return (gs);
|
---|
7042 | }
|
---|
7043 |
|
---|
7044 | return (0);
|
---|
7045 | }
|
---|
7046 | }
|
---|
7047 |
|
---|
7048 | /*ARGSUSED*/
|
---|
7049 | static int
|
---|
7050 | dtrace_match_string(const char *s, const char *p, int depth)
|
---|
7051 | {
|
---|
7052 | RT_NOREF_PV(depth);
|
---|
7053 | return (s != NULL && strcmp(s, p) == 0);
|
---|
7054 | }
|
---|
7055 |
|
---|
7056 | /*ARGSUSED*/
|
---|
7057 | static int
|
---|
7058 | dtrace_match_nul(const char *s, const char *p, int depth)
|
---|
7059 | {
|
---|
7060 | RT_NOREF_PV(s); RT_NOREF_PV(p); RT_NOREF_PV(depth);
|
---|
7061 | return (1); /* always match the empty pattern */
|
---|
7062 | }
|
---|
7063 |
|
---|
7064 | /*ARGSUSED*/
|
---|
7065 | static int
|
---|
7066 | dtrace_match_nonzero(const char *s, const char *p, int depth)
|
---|
7067 | {
|
---|
7068 | RT_NOREF_PV(p); RT_NOREF_PV(depth);
|
---|
7069 | return (s != NULL && s[0] != '\0');
|
---|
7070 | }
|
---|
7071 |
|
---|
7072 | static int
|
---|
7073 | dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
|
---|
7074 | zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg)
|
---|
7075 | {
|
---|
7076 | dtrace_probe_t template, *probe;
|
---|
7077 | dtrace_hash_t *hash = NULL;
|
---|
7078 | int len, rc, best = INT_MAX, nmatched = 0;
|
---|
7079 | dtrace_id_t i;
|
---|
7080 |
|
---|
7081 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
7082 |
|
---|
7083 | /*
|
---|
7084 | * If the probe ID is specified in the key, just lookup by ID and
|
---|
7085 | * invoke the match callback once if a matching probe is found.
|
---|
7086 | */
|
---|
7087 | if (pkp->dtpk_id != DTRACE_IDNONE) {
|
---|
7088 | if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
|
---|
7089 | dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
|
---|
7090 | if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
|
---|
7091 | return (DTRACE_MATCH_FAIL);
|
---|
7092 | nmatched++;
|
---|
7093 | }
|
---|
7094 | return (nmatched);
|
---|
7095 | }
|
---|
7096 |
|
---|
7097 | template.dtpr_mod = (char *)pkp->dtpk_mod;
|
---|
7098 | template.dtpr_func = (char *)pkp->dtpk_func;
|
---|
7099 | template.dtpr_name = (char *)pkp->dtpk_name;
|
---|
7100 |
|
---|
7101 | /*
|
---|
7102 | * We want to find the most distinct of the module name, function
|
---|
7103 | * name, and name. So for each one that is not a glob pattern or
|
---|
7104 | * empty string, we perform a lookup in the corresponding hash and
|
---|
7105 | * use the hash table with the fewest collisions to do our search.
|
---|
7106 | */
|
---|
7107 | if (pkp->dtpk_mmatch == &dtrace_match_string &&
|
---|
7108 | (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
|
---|
7109 | best = len;
|
---|
7110 | hash = dtrace_bymod;
|
---|
7111 | }
|
---|
7112 |
|
---|
7113 | if (pkp->dtpk_fmatch == &dtrace_match_string &&
|
---|
7114 | (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
|
---|
7115 | best = len;
|
---|
7116 | hash = dtrace_byfunc;
|
---|
7117 | }
|
---|
7118 |
|
---|
7119 | if (pkp->dtpk_nmatch == &dtrace_match_string &&
|
---|
7120 | (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
|
---|
7121 | best = len;
|
---|
7122 | hash = dtrace_byname;
|
---|
7123 | }
|
---|
7124 |
|
---|
7125 | /*
|
---|
7126 | * If we did not select a hash table, iterate over every probe and
|
---|
7127 | * invoke our callback for each one that matches our input probe key.
|
---|
7128 | */
|
---|
7129 | if (hash == NULL) {
|
---|
7130 | for (i = 0; i < VBDTCAST(dtrace_id_t)dtrace_nprobes; i++) {
|
---|
7131 | if ((probe = dtrace_probes[i]) == NULL ||
|
---|
7132 | dtrace_match_probe(probe, pkp, priv, uid,
|
---|
7133 | zoneid) <= 0)
|
---|
7134 | continue;
|
---|
7135 |
|
---|
7136 | nmatched++;
|
---|
7137 |
|
---|
7138 | if ((rc = (*matched)(probe, arg)) !=
|
---|
7139 | DTRACE_MATCH_NEXT) {
|
---|
7140 | if (rc == DTRACE_MATCH_FAIL)
|
---|
7141 | return (DTRACE_MATCH_FAIL);
|
---|
7142 | break;
|
---|
7143 | }
|
---|
7144 | }
|
---|
7145 |
|
---|
7146 | return (nmatched);
|
---|
7147 | }
|
---|
7148 |
|
---|
7149 | /*
|
---|
7150 | * If we selected a hash table, iterate over each probe of the same key
|
---|
7151 | * name and invoke the callback for every probe that matches the other
|
---|
7152 | * attributes of our input probe key.
|
---|
7153 | */
|
---|
7154 | for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
|
---|
7155 | probe = *(DTRACE_HASHNEXT(hash, probe))) {
|
---|
7156 |
|
---|
7157 | if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
|
---|
7158 | continue;
|
---|
7159 |
|
---|
7160 | nmatched++;
|
---|
7161 |
|
---|
7162 | if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
|
---|
7163 | if (rc == DTRACE_MATCH_FAIL)
|
---|
7164 | return (DTRACE_MATCH_FAIL);
|
---|
7165 | break;
|
---|
7166 | }
|
---|
7167 | }
|
---|
7168 |
|
---|
7169 | return (nmatched);
|
---|
7170 | }
|
---|
7171 |
|
---|
7172 | /*
|
---|
7173 | * Return the function pointer dtrace_probecmp() should use to compare the
|
---|
7174 | * specified pattern with a string. For NULL or empty patterns, we select
|
---|
7175 | * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
|
---|
7176 | * For non-empty non-glob strings, we use dtrace_match_string().
|
---|
7177 | */
|
---|
7178 | static dtrace_probekey_f *
|
---|
7179 | dtrace_probekey_func(const char *p)
|
---|
7180 | {
|
---|
7181 | char c;
|
---|
7182 |
|
---|
7183 | if (p == NULL || *p == '\0')
|
---|
7184 | return (&dtrace_match_nul);
|
---|
7185 |
|
---|
7186 | while ((c = *p++) != '\0') {
|
---|
7187 | if (c == '[' || c == '?' || c == '*' || c == '\\')
|
---|
7188 | return (&dtrace_match_glob);
|
---|
7189 | }
|
---|
7190 |
|
---|
7191 | return (&dtrace_match_string);
|
---|
7192 | }
|
---|
7193 |
|
---|
7194 | /*
|
---|
7195 | * Build a probe comparison key for use with dtrace_match_probe() from the
|
---|
7196 | * given probe description. By convention, a null key only matches anchored
|
---|
7197 | * probes: if each field is the empty string, reset dtpk_fmatch to
|
---|
7198 | * dtrace_match_nonzero().
|
---|
7199 | */
|
---|
7200 | static void
|
---|
7201 | dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
|
---|
7202 | {
|
---|
7203 | pkp->dtpk_prov = pdp->dtpd_provider;
|
---|
7204 | pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
|
---|
7205 |
|
---|
7206 | pkp->dtpk_mod = pdp->dtpd_mod;
|
---|
7207 | pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
|
---|
7208 |
|
---|
7209 | pkp->dtpk_func = pdp->dtpd_func;
|
---|
7210 | pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
|
---|
7211 |
|
---|
7212 | pkp->dtpk_name = pdp->dtpd_name;
|
---|
7213 | pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
|
---|
7214 |
|
---|
7215 | pkp->dtpk_id = pdp->dtpd_id;
|
---|
7216 |
|
---|
7217 | if (pkp->dtpk_id == DTRACE_IDNONE &&
|
---|
7218 | pkp->dtpk_pmatch == &dtrace_match_nul &&
|
---|
7219 | pkp->dtpk_mmatch == &dtrace_match_nul &&
|
---|
7220 | pkp->dtpk_fmatch == &dtrace_match_nul &&
|
---|
7221 | pkp->dtpk_nmatch == &dtrace_match_nul)
|
---|
7222 | pkp->dtpk_fmatch = &dtrace_match_nonzero;
|
---|
7223 | }
|
---|
7224 |
|
---|
7225 | /*
|
---|
7226 | * DTrace Provider-to-Framework API Functions
|
---|
7227 | *
|
---|
7228 | * These functions implement much of the Provider-to-Framework API, as
|
---|
7229 | * described in <sys/dtrace.h>. The parts of the API not in this section are
|
---|
7230 | * the functions in the API for probe management (found below), and
|
---|
7231 | * dtrace_probe() itself (found above).
|
---|
7232 | */
|
---|
7233 |
|
---|
7234 | /*
|
---|
7235 | * Register the calling provider with the DTrace framework. This should
|
---|
7236 | * generally be called by DTrace providers in their attach(9E) entry point.
|
---|
7237 | */
|
---|
7238 | int
|
---|
7239 | dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
|
---|
7240 | cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
|
---|
7241 | {
|
---|
7242 | dtrace_provider_t *provider;
|
---|
7243 |
|
---|
7244 | if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
|
---|
7245 | cmn_err(CE_WARN, "failed to register provider '%s': invalid "
|
---|
7246 | "arguments", name ? name : "<NULL>");
|
---|
7247 | return (EINVAL);
|
---|
7248 | }
|
---|
7249 |
|
---|
7250 | if (name[0] == '\0' || dtrace_badname(name)) {
|
---|
7251 | cmn_err(CE_WARN, "failed to register provider '%s': invalid "
|
---|
7252 | "provider name", name);
|
---|
7253 | return (EINVAL);
|
---|
7254 | }
|
---|
7255 |
|
---|
7256 | if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
|
---|
7257 | pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
|
---|
7258 | pops->dtps_destroy == NULL ||
|
---|
7259 | ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
|
---|
7260 | cmn_err(CE_WARN, "failed to register provider '%s': invalid "
|
---|
7261 | "provider ops", name);
|
---|
7262 | return (EINVAL);
|
---|
7263 | }
|
---|
7264 |
|
---|
7265 | if (dtrace_badattr(&pap->dtpa_provider) ||
|
---|
7266 | dtrace_badattr(&pap->dtpa_mod) ||
|
---|
7267 | dtrace_badattr(&pap->dtpa_func) ||
|
---|
7268 | dtrace_badattr(&pap->dtpa_name) ||
|
---|
7269 | dtrace_badattr(&pap->dtpa_args)) {
|
---|
7270 | cmn_err(CE_WARN, "failed to register provider '%s': invalid "
|
---|
7271 | "provider attributes", name);
|
---|
7272 | return (EINVAL);
|
---|
7273 | }
|
---|
7274 |
|
---|
7275 | if (priv & ~DTRACE_PRIV_ALL) {
|
---|
7276 | cmn_err(CE_WARN, "failed to register provider '%s': invalid "
|
---|
7277 | "privilege attributes", name);
|
---|
7278 | return (EINVAL);
|
---|
7279 | }
|
---|
7280 |
|
---|
7281 | if ((priv & DTRACE_PRIV_KERNEL) &&
|
---|
7282 | (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
|
---|
7283 | pops->dtps_usermode == NULL) {
|
---|
7284 | cmn_err(CE_WARN, "failed to register provider '%s': need "
|
---|
7285 | "dtps_usermode() op for given privilege attributes", name);
|
---|
7286 | return (EINVAL);
|
---|
7287 | }
|
---|
7288 |
|
---|
7289 | provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
|
---|
7290 | provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
|
---|
7291 | (void) strcpy(provider->dtpv_name, name);
|
---|
7292 |
|
---|
7293 | provider->dtpv_attr = *pap;
|
---|
7294 | provider->dtpv_priv.dtpp_flags = priv;
|
---|
7295 | if (cr != NULL) {
|
---|
7296 | provider->dtpv_priv.dtpp_uid = crgetuid(cr);
|
---|
7297 | provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
|
---|
7298 | }
|
---|
7299 | provider->dtpv_pops = *pops;
|
---|
7300 |
|
---|
7301 | if (pops->dtps_provide == NULL) {
|
---|
7302 | ASSERT(pops->dtps_provide_module != NULL);
|
---|
7303 | provider->dtpv_pops.dtps_provide =
|
---|
7304 | (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop;
|
---|
7305 | }
|
---|
7306 |
|
---|
7307 | if (pops->dtps_provide_module == NULL) {
|
---|
7308 | ASSERT(pops->dtps_provide != NULL);
|
---|
7309 | provider->dtpv_pops.dtps_provide_module =
|
---|
7310 | (void (*)(void *, struct modctl *))dtrace_nullop;
|
---|
7311 | }
|
---|
7312 |
|
---|
7313 | if (pops->dtps_suspend == NULL) {
|
---|
7314 | ASSERT(pops->dtps_resume == NULL);
|
---|
7315 | provider->dtpv_pops.dtps_suspend =
|
---|
7316 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
|
---|
7317 | provider->dtpv_pops.dtps_resume =
|
---|
7318 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
|
---|
7319 | }
|
---|
7320 |
|
---|
7321 | provider->dtpv_arg = arg;
|
---|
7322 | *idp = (dtrace_provider_id_t)provider;
|
---|
7323 |
|
---|
7324 | if (pops == &dtrace_provider_ops) {
|
---|
7325 | ASSERT(MUTEX_HELD(&dtrace_provider_lock));
|
---|
7326 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
7327 | ASSERT(dtrace_anon.dta_enabling == NULL);
|
---|
7328 |
|
---|
7329 | /*
|
---|
7330 | * We make sure that the DTrace provider is at the head of
|
---|
7331 | * the provider chain.
|
---|
7332 | */
|
---|
7333 | provider->dtpv_next = dtrace_provider;
|
---|
7334 | dtrace_provider = provider;
|
---|
7335 | return (0);
|
---|
7336 | }
|
---|
7337 |
|
---|
7338 | mutex_enter(&dtrace_provider_lock);
|
---|
7339 | mutex_enter(&dtrace_lock);
|
---|
7340 |
|
---|
7341 | /*
|
---|
7342 | * If there is at least one provider registered, we'll add this
|
---|
7343 | * provider after the first provider.
|
---|
7344 | */
|
---|
7345 | if (dtrace_provider != NULL) {
|
---|
7346 | provider->dtpv_next = dtrace_provider->dtpv_next;
|
---|
7347 | dtrace_provider->dtpv_next = provider;
|
---|
7348 | } else {
|
---|
7349 | dtrace_provider = provider;
|
---|
7350 | }
|
---|
7351 |
|
---|
7352 | if (dtrace_retained != NULL) {
|
---|
7353 | dtrace_enabling_provide(provider);
|
---|
7354 |
|
---|
7355 | /*
|
---|
7356 | * Now we need to call dtrace_enabling_matchall() -- which
|
---|
7357 | * will acquire cpu_lock and dtrace_lock. We therefore need
|
---|
7358 | * to drop all of our locks before calling into it...
|
---|
7359 | */
|
---|
7360 | mutex_exit(&dtrace_lock);
|
---|
7361 | mutex_exit(&dtrace_provider_lock);
|
---|
7362 | dtrace_enabling_matchall();
|
---|
7363 |
|
---|
7364 | return (0);
|
---|
7365 | }
|
---|
7366 |
|
---|
7367 | mutex_exit(&dtrace_lock);
|
---|
7368 | mutex_exit(&dtrace_provider_lock);
|
---|
7369 |
|
---|
7370 | return (0);
|
---|
7371 | }
|
---|
7372 |
|
---|
7373 | /*
|
---|
7374 | * Unregister the specified provider from the DTrace framework. This should
|
---|
7375 | * generally be called by DTrace providers in their detach(9E) entry point.
|
---|
7376 | */
|
---|
7377 | int
|
---|
7378 | dtrace_unregister(dtrace_provider_id_t id)
|
---|
7379 | {
|
---|
7380 | dtrace_provider_t *old = (dtrace_provider_t *)id;
|
---|
7381 | dtrace_provider_t *prev = NULL;
|
---|
7382 | VBDTTYPE(uint32_t,int) i, self = 0;
|
---|
7383 | dtrace_probe_t *probe, *first = NULL;
|
---|
7384 |
|
---|
7385 | if (old->dtpv_pops.dtps_enable ==
|
---|
7386 | (int (*)(void *, dtrace_id_t, void *))(uintptr_t)dtrace_enable_nullop) {
|
---|
7387 | /*
|
---|
7388 | * If DTrace itself is the provider, we're called with locks
|
---|
7389 | * already held.
|
---|
7390 | */
|
---|
7391 | ASSERT(old == dtrace_provider);
|
---|
7392 | #ifndef VBOX
|
---|
7393 | ASSERT(dtrace_devi != NULL);
|
---|
7394 | #endif
|
---|
7395 | ASSERT(MUTEX_HELD(&dtrace_provider_lock));
|
---|
7396 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
7397 | self = 1;
|
---|
7398 |
|
---|
7399 | if (dtrace_provider->dtpv_next != NULL) {
|
---|
7400 | /*
|
---|
7401 | * There's another provider here; return failure.
|
---|
7402 | */
|
---|
7403 | return (EBUSY);
|
---|
7404 | }
|
---|
7405 | } else {
|
---|
7406 | mutex_enter(&dtrace_provider_lock);
|
---|
7407 | mutex_enter(&mod_lock);
|
---|
7408 | mutex_enter(&dtrace_lock);
|
---|
7409 | }
|
---|
7410 |
|
---|
7411 | /*
|
---|
7412 | * If anyone has /dev/dtrace open, or if there are anonymous enabled
|
---|
7413 | * probes, we refuse to let providers slither away, unless this
|
---|
7414 | * provider has already been explicitly invalidated.
|
---|
7415 | */
|
---|
7416 | if (!old->dtpv_defunct &&
|
---|
7417 | (dtrace_opens || (dtrace_anon.dta_state != NULL &&
|
---|
7418 | dtrace_anon.dta_state->dts_necbs > 0))) {
|
---|
7419 | if (!self) {
|
---|
7420 | mutex_exit(&dtrace_lock);
|
---|
7421 | mutex_exit(&mod_lock);
|
---|
7422 | mutex_exit(&dtrace_provider_lock);
|
---|
7423 | }
|
---|
7424 | return (EBUSY);
|
---|
7425 | }
|
---|
7426 |
|
---|
7427 | /*
|
---|
7428 | * Attempt to destroy the probes associated with this provider.
|
---|
7429 | */
|
---|
7430 | for (i = 0; i < dtrace_nprobes; i++) {
|
---|
7431 | if ((probe = dtrace_probes[i]) == NULL)
|
---|
7432 | continue;
|
---|
7433 |
|
---|
7434 | if (probe->dtpr_provider != old)
|
---|
7435 | continue;
|
---|
7436 |
|
---|
7437 | if (probe->dtpr_ecb == NULL)
|
---|
7438 | continue;
|
---|
7439 |
|
---|
7440 | /*
|
---|
7441 | * We have at least one ECB; we can't remove this provider.
|
---|
7442 | */
|
---|
7443 | if (!self) {
|
---|
7444 | mutex_exit(&dtrace_lock);
|
---|
7445 | mutex_exit(&mod_lock);
|
---|
7446 | mutex_exit(&dtrace_provider_lock);
|
---|
7447 | }
|
---|
7448 | return (EBUSY);
|
---|
7449 | }
|
---|
7450 |
|
---|
7451 | /*
|
---|
7452 | * All of the probes for this provider are disabled; we can safely
|
---|
7453 | * remove all of them from their hash chains and from the probe array.
|
---|
7454 | */
|
---|
7455 | for (i = 0; i < dtrace_nprobes; i++) {
|
---|
7456 | if ((probe = dtrace_probes[i]) == NULL)
|
---|
7457 | continue;
|
---|
7458 |
|
---|
7459 | if (probe->dtpr_provider != old)
|
---|
7460 | continue;
|
---|
7461 |
|
---|
7462 | dtrace_probes[i] = NULL;
|
---|
7463 |
|
---|
7464 | dtrace_hash_remove(dtrace_bymod, probe);
|
---|
7465 | dtrace_hash_remove(dtrace_byfunc, probe);
|
---|
7466 | dtrace_hash_remove(dtrace_byname, probe);
|
---|
7467 |
|
---|
7468 | if (first == NULL) {
|
---|
7469 | first = probe;
|
---|
7470 | probe->dtpr_nextmod = NULL;
|
---|
7471 | } else {
|
---|
7472 | probe->dtpr_nextmod = first;
|
---|
7473 | first = probe;
|
---|
7474 | }
|
---|
7475 | }
|
---|
7476 |
|
---|
7477 | /*
|
---|
7478 | * The provider's probes have been removed from the hash chains and
|
---|
7479 | * from the probe array. Now issue a dtrace_sync() to be sure that
|
---|
7480 | * everyone has cleared out from any probe array processing.
|
---|
7481 | */
|
---|
7482 | dtrace_sync();
|
---|
7483 |
|
---|
7484 | for (probe = first; probe != NULL; probe = first) {
|
---|
7485 | first = probe->dtpr_nextmod;
|
---|
7486 |
|
---|
7487 | old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
|
---|
7488 | probe->dtpr_arg);
|
---|
7489 | kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
|
---|
7490 | kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
|
---|
7491 | kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
|
---|
7492 | vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
|
---|
7493 | kmem_free(probe, sizeof (dtrace_probe_t));
|
---|
7494 | }
|
---|
7495 |
|
---|
7496 | if ((prev = dtrace_provider) == old) {
|
---|
7497 | #ifndef VBOX
|
---|
7498 | ASSERT(self || dtrace_devi == NULL);
|
---|
7499 | ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
|
---|
7500 | #endif
|
---|
7501 | dtrace_provider = old->dtpv_next;
|
---|
7502 | } else {
|
---|
7503 | while (prev != NULL && prev->dtpv_next != old)
|
---|
7504 | prev = prev->dtpv_next;
|
---|
7505 |
|
---|
7506 | if (prev == NULL) {
|
---|
7507 | panic("attempt to unregister non-existent "
|
---|
7508 | "dtrace provider %p\n", (void *)id);
|
---|
7509 | }
|
---|
7510 |
|
---|
7511 | prev->dtpv_next = old->dtpv_next;
|
---|
7512 | }
|
---|
7513 |
|
---|
7514 | if (!self) {
|
---|
7515 | mutex_exit(&dtrace_lock);
|
---|
7516 | mutex_exit(&mod_lock);
|
---|
7517 | mutex_exit(&dtrace_provider_lock);
|
---|
7518 | }
|
---|
7519 |
|
---|
7520 | kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
|
---|
7521 | kmem_free(old, sizeof (dtrace_provider_t));
|
---|
7522 |
|
---|
7523 | return (0);
|
---|
7524 | }
|
---|
7525 |
|
---|
7526 | /*
|
---|
7527 | * Invalidate the specified provider. All subsequent probe lookups for the
|
---|
7528 | * specified provider will fail, but its probes will not be removed.
|
---|
7529 | */
|
---|
7530 | void
|
---|
7531 | dtrace_invalidate(dtrace_provider_id_t id)
|
---|
7532 | {
|
---|
7533 | dtrace_provider_t *pvp = (dtrace_provider_t *)id;
|
---|
7534 |
|
---|
7535 | ASSERT(pvp->dtpv_pops.dtps_enable !=
|
---|
7536 | (int (*)(void *, dtrace_id_t, void *))(uintptr_t)dtrace_enable_nullop);
|
---|
7537 |
|
---|
7538 | mutex_enter(&dtrace_provider_lock);
|
---|
7539 | mutex_enter(&dtrace_lock);
|
---|
7540 |
|
---|
7541 | pvp->dtpv_defunct = 1;
|
---|
7542 |
|
---|
7543 | mutex_exit(&dtrace_lock);
|
---|
7544 | mutex_exit(&dtrace_provider_lock);
|
---|
7545 | }
|
---|
7546 |
|
---|
7547 | /*
|
---|
7548 | * Indicate whether or not DTrace has attached.
|
---|
7549 | */
|
---|
7550 | int
|
---|
7551 | dtrace_attached(void)
|
---|
7552 | {
|
---|
7553 | /*
|
---|
7554 | * dtrace_provider will be non-NULL iff the DTrace driver has
|
---|
7555 | * attached. (It's non-NULL because DTrace is always itself a
|
---|
7556 | * provider.)
|
---|
7557 | */
|
---|
7558 | return (dtrace_provider != NULL);
|
---|
7559 | }
|
---|
7560 |
|
---|
7561 | /*
|
---|
7562 | * Remove all the unenabled probes for the given provider. This function is
|
---|
7563 | * not unlike dtrace_unregister(), except that it doesn't remove the provider
|
---|
7564 | * -- just as many of its associated probes as it can.
|
---|
7565 | */
|
---|
7566 | int
|
---|
7567 | dtrace_condense(dtrace_provider_id_t id)
|
---|
7568 | {
|
---|
7569 | dtrace_provider_t *prov = (dtrace_provider_t *)id;
|
---|
7570 | VBDTTYPE(uint32_t,int) i;
|
---|
7571 | dtrace_probe_t *probe;
|
---|
7572 |
|
---|
7573 | /*
|
---|
7574 | * Make sure this isn't the dtrace provider itself.
|
---|
7575 | */
|
---|
7576 | ASSERT(prov->dtpv_pops.dtps_enable !=
|
---|
7577 | (int (*)(void *, dtrace_id_t, void *))(uintptr_t)dtrace_enable_nullop);
|
---|
7578 |
|
---|
7579 | mutex_enter(&dtrace_provider_lock);
|
---|
7580 | mutex_enter(&dtrace_lock);
|
---|
7581 |
|
---|
7582 | /*
|
---|
7583 | * Attempt to destroy the probes associated with this provider.
|
---|
7584 | */
|
---|
7585 | for (i = 0; i < dtrace_nprobes; i++) {
|
---|
7586 | if ((probe = dtrace_probes[i]) == NULL)
|
---|
7587 | continue;
|
---|
7588 |
|
---|
7589 | if (probe->dtpr_provider != prov)
|
---|
7590 | continue;
|
---|
7591 |
|
---|
7592 | if (probe->dtpr_ecb != NULL)
|
---|
7593 | continue;
|
---|
7594 |
|
---|
7595 | dtrace_probes[i] = NULL;
|
---|
7596 |
|
---|
7597 | dtrace_hash_remove(dtrace_bymod, probe);
|
---|
7598 | dtrace_hash_remove(dtrace_byfunc, probe);
|
---|
7599 | dtrace_hash_remove(dtrace_byname, probe);
|
---|
7600 |
|
---|
7601 | prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
|
---|
7602 | probe->dtpr_arg);
|
---|
7603 | kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
|
---|
7604 | kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
|
---|
7605 | kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
|
---|
7606 | kmem_free(probe, sizeof (dtrace_probe_t));
|
---|
7607 | vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
|
---|
7608 | }
|
---|
7609 |
|
---|
7610 | mutex_exit(&dtrace_lock);
|
---|
7611 | mutex_exit(&dtrace_provider_lock);
|
---|
7612 |
|
---|
7613 | return (0);
|
---|
7614 | }
|
---|
7615 |
|
---|
7616 | /*
|
---|
7617 | * DTrace Probe Management Functions
|
---|
7618 | *
|
---|
7619 | * The functions in this section perform the DTrace probe management,
|
---|
7620 | * including functions to create probes, look-up probes, and call into the
|
---|
7621 | * providers to request that probes be provided. Some of these functions are
|
---|
7622 | * in the Provider-to-Framework API; these functions can be identified by the
|
---|
7623 | * fact that they are not declared "static".
|
---|
7624 | */
|
---|
7625 |
|
---|
7626 | /*
|
---|
7627 | * Create a probe with the specified module name, function name, and name.
|
---|
7628 | */
|
---|
7629 | dtrace_id_t
|
---|
7630 | dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
|
---|
7631 | const char *func, const char *name, int aframes, void *arg)
|
---|
7632 | {
|
---|
7633 | dtrace_probe_t *probe, **probes;
|
---|
7634 | dtrace_provider_t *provider = (dtrace_provider_t *)prov;
|
---|
7635 | dtrace_id_t id;
|
---|
7636 |
|
---|
7637 | if (provider == dtrace_provider) {
|
---|
7638 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
7639 | } else {
|
---|
7640 | mutex_enter(&dtrace_lock);
|
---|
7641 | }
|
---|
7642 |
|
---|
7643 | id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
|
---|
7644 | VM_BESTFIT | VM_SLEEP);
|
---|
7645 | probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP);
|
---|
7646 |
|
---|
7647 | probe->dtpr_id = id;
|
---|
7648 | probe->dtpr_gen = dtrace_probegen++;
|
---|
7649 | probe->dtpr_mod = dtrace_strdup(mod);
|
---|
7650 | probe->dtpr_func = dtrace_strdup(func);
|
---|
7651 | probe->dtpr_name = dtrace_strdup(name);
|
---|
7652 | probe->dtpr_arg = arg;
|
---|
7653 | probe->dtpr_aframes = aframes;
|
---|
7654 | probe->dtpr_provider = provider;
|
---|
7655 |
|
---|
7656 | dtrace_hash_add(dtrace_bymod, probe);
|
---|
7657 | dtrace_hash_add(dtrace_byfunc, probe);
|
---|
7658 | dtrace_hash_add(dtrace_byname, probe);
|
---|
7659 |
|
---|
7660 | if (id - 1 >= dtrace_nprobes) {
|
---|
7661 | size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
|
---|
7662 | size_t nsize = osize << 1;
|
---|
7663 |
|
---|
7664 | if (nsize == 0) {
|
---|
7665 | ASSERT(osize == 0);
|
---|
7666 | ASSERT(dtrace_probes == NULL);
|
---|
7667 | nsize = sizeof (dtrace_probe_t *);
|
---|
7668 | }
|
---|
7669 |
|
---|
7670 | probes = kmem_zalloc(nsize, KM_SLEEP);
|
---|
7671 |
|
---|
7672 | if (dtrace_probes == NULL) {
|
---|
7673 | ASSERT(osize == 0);
|
---|
7674 | dtrace_probes = probes;
|
---|
7675 | dtrace_nprobes = 1;
|
---|
7676 | } else {
|
---|
7677 | dtrace_probe_t **oprobes = dtrace_probes;
|
---|
7678 |
|
---|
7679 | bcopy(oprobes, probes, osize);
|
---|
7680 | dtrace_membar_producer();
|
---|
7681 | dtrace_probes = probes;
|
---|
7682 |
|
---|
7683 | dtrace_sync();
|
---|
7684 |
|
---|
7685 | /*
|
---|
7686 | * All CPUs are now seeing the new probes array; we can
|
---|
7687 | * safely free the old array.
|
---|
7688 | */
|
---|
7689 | kmem_free(oprobes, osize);
|
---|
7690 | dtrace_nprobes <<= 1;
|
---|
7691 | }
|
---|
7692 |
|
---|
7693 | ASSERT(id - 1 < dtrace_nprobes);
|
---|
7694 | }
|
---|
7695 |
|
---|
7696 | ASSERT(dtrace_probes[id - 1] == NULL);
|
---|
7697 | dtrace_probes[id - 1] = probe;
|
---|
7698 |
|
---|
7699 | if (provider != dtrace_provider)
|
---|
7700 | mutex_exit(&dtrace_lock);
|
---|
7701 |
|
---|
7702 | return (id);
|
---|
7703 | }
|
---|
7704 |
|
---|
7705 | static dtrace_probe_t *
|
---|
7706 | dtrace_probe_lookup_id(dtrace_id_t id)
|
---|
7707 | {
|
---|
7708 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
7709 |
|
---|
7710 | if (id == 0 || id > dtrace_nprobes)
|
---|
7711 | return (NULL);
|
---|
7712 |
|
---|
7713 | return (dtrace_probes[id - 1]);
|
---|
7714 | }
|
---|
7715 |
|
---|
7716 | static int
|
---|
7717 | dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg)
|
---|
7718 | {
|
---|
7719 | *((dtrace_id_t *)arg) = probe->dtpr_id;
|
---|
7720 |
|
---|
7721 | return (DTRACE_MATCH_DONE);
|
---|
7722 | }
|
---|
7723 |
|
---|
7724 | /*
|
---|
7725 | * Look up a probe based on provider and one or more of module name, function
|
---|
7726 | * name and probe name.
|
---|
7727 | */
|
---|
7728 | dtrace_id_t
|
---|
7729 | dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod,
|
---|
7730 | const char *func, const char *name)
|
---|
7731 | {
|
---|
7732 | dtrace_probekey_t pkey;
|
---|
7733 | dtrace_id_t id;
|
---|
7734 | int match;
|
---|
7735 |
|
---|
7736 | pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
|
---|
7737 | pkey.dtpk_pmatch = &dtrace_match_string;
|
---|
7738 | pkey.dtpk_mod = mod;
|
---|
7739 | pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
|
---|
7740 | pkey.dtpk_func = func;
|
---|
7741 | pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
|
---|
7742 | pkey.dtpk_name = name;
|
---|
7743 | pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
|
---|
7744 | pkey.dtpk_id = DTRACE_IDNONE;
|
---|
7745 |
|
---|
7746 | mutex_enter(&dtrace_lock);
|
---|
7747 | match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
|
---|
7748 | dtrace_probe_lookup_match, &id);
|
---|
7749 | mutex_exit(&dtrace_lock);
|
---|
7750 |
|
---|
7751 | ASSERT(match == 1 || match == 0);
|
---|
7752 | return (match ? id : 0);
|
---|
7753 | }
|
---|
7754 |
|
---|
7755 | /*
|
---|
7756 | * Returns the probe argument associated with the specified probe.
|
---|
7757 | */
|
---|
7758 | void *
|
---|
7759 | dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
|
---|
7760 | {
|
---|
7761 | dtrace_probe_t *probe;
|
---|
7762 | void *rval = NULL;
|
---|
7763 |
|
---|
7764 | mutex_enter(&dtrace_lock);
|
---|
7765 |
|
---|
7766 | if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
|
---|
7767 | probe->dtpr_provider == (dtrace_provider_t *)id)
|
---|
7768 | rval = probe->dtpr_arg;
|
---|
7769 |
|
---|
7770 | mutex_exit(&dtrace_lock);
|
---|
7771 |
|
---|
7772 | return (rval);
|
---|
7773 | }
|
---|
7774 |
|
---|
7775 | /*
|
---|
7776 | * Copy a probe into a probe description.
|
---|
7777 | */
|
---|
7778 | static void
|
---|
7779 | dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
|
---|
7780 | {
|
---|
7781 | bzero(pdp, sizeof (dtrace_probedesc_t));
|
---|
7782 | pdp->dtpd_id = prp->dtpr_id;
|
---|
7783 |
|
---|
7784 | (void) strncpy(pdp->dtpd_provider,
|
---|
7785 | prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1);
|
---|
7786 |
|
---|
7787 | (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1);
|
---|
7788 | (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1);
|
---|
7789 | (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1);
|
---|
7790 | }
|
---|
7791 |
|
---|
7792 | /*
|
---|
7793 | * Called to indicate that a probe -- or probes -- should be provided by a
|
---|
7794 | * specfied provider. If the specified description is NULL, the provider will
|
---|
7795 | * be told to provide all of its probes. (This is done whenever a new
|
---|
7796 | * consumer comes along, or whenever a retained enabling is to be matched.) If
|
---|
7797 | * the specified description is non-NULL, the provider is given the
|
---|
7798 | * opportunity to dynamically provide the specified probe, allowing providers
|
---|
7799 | * to support the creation of probes on-the-fly. (So-called _autocreated_
|
---|
7800 | * probes.) If the provider is NULL, the operations will be applied to all
|
---|
7801 | * providers; if the provider is non-NULL the operations will only be applied
|
---|
7802 | * to the specified provider. The dtrace_provider_lock must be held, and the
|
---|
7803 | * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
|
---|
7804 | * will need to grab the dtrace_lock when it reenters the framework through
|
---|
7805 | * dtrace_probe_lookup(), dtrace_probe_create(), etc.
|
---|
7806 | */
|
---|
7807 | static void
|
---|
7808 | dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
|
---|
7809 | {
|
---|
7810 | #ifndef VBOX
|
---|
7811 | struct modctl *ctl;
|
---|
7812 | #endif
|
---|
7813 | int all = 0;
|
---|
7814 |
|
---|
7815 | ASSERT(MUTEX_HELD(&dtrace_provider_lock));
|
---|
7816 |
|
---|
7817 | if (prv == NULL) {
|
---|
7818 | all = 1;
|
---|
7819 | prv = dtrace_provider;
|
---|
7820 | }
|
---|
7821 |
|
---|
7822 | do {
|
---|
7823 | /*
|
---|
7824 | * First, call the blanket provide operation.
|
---|
7825 | */
|
---|
7826 | prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
|
---|
7827 |
|
---|
7828 | #ifndef VBOX
|
---|
7829 | /*
|
---|
7830 | * Now call the per-module provide operation. We will grab
|
---|
7831 | * mod_lock to prevent the list from being modified. Note
|
---|
7832 | * that this also prevents the mod_busy bits from changing.
|
---|
7833 | * (mod_busy can only be changed with mod_lock held.)
|
---|
7834 | */
|
---|
7835 | mutex_enter(&mod_lock);
|
---|
7836 |
|
---|
7837 | ctl = &modules;
|
---|
7838 | do {
|
---|
7839 | if (ctl->mod_busy || ctl->mod_mp == NULL)
|
---|
7840 | continue;
|
---|
7841 |
|
---|
7842 | prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
|
---|
7843 |
|
---|
7844 | } while ((ctl = ctl->mod_next) != &modules);
|
---|
7845 |
|
---|
7846 | mutex_exit(&mod_lock);
|
---|
7847 | #endif
|
---|
7848 | } while (all && (prv = prv->dtpv_next) != NULL);
|
---|
7849 | }
|
---|
7850 |
|
---|
7851 | #ifndef VBOX
|
---|
7852 | /*
|
---|
7853 | * Iterate over each probe, and call the Framework-to-Provider API function
|
---|
7854 | * denoted by offs.
|
---|
7855 | */
|
---|
7856 | static void
|
---|
7857 | dtrace_probe_foreach(uintptr_t offs)
|
---|
7858 | {
|
---|
7859 | dtrace_provider_t *prov;
|
---|
7860 | void (*func)(void *, dtrace_id_t, void *);
|
---|
7861 | dtrace_probe_t *probe;
|
---|
7862 | dtrace_icookie_t cookie;
|
---|
7863 | VBDTTYPE(uint32_t,int) i;
|
---|
7864 |
|
---|
7865 | /*
|
---|
7866 | * We disable interrupts to walk through the probe array. This is
|
---|
7867 | * safe -- the dtrace_sync() in dtrace_unregister() assures that we
|
---|
7868 | * won't see stale data.
|
---|
7869 | */
|
---|
7870 | cookie = dtrace_interrupt_disable();
|
---|
7871 |
|
---|
7872 | for (i = 0; i < dtrace_nprobes; i++) {
|
---|
7873 | if ((probe = dtrace_probes[i]) == NULL)
|
---|
7874 | continue;
|
---|
7875 |
|
---|
7876 | if (probe->dtpr_ecb == NULL) {
|
---|
7877 | /*
|
---|
7878 | * This probe isn't enabled -- don't call the function.
|
---|
7879 | */
|
---|
7880 | continue;
|
---|
7881 | }
|
---|
7882 |
|
---|
7883 | prov = probe->dtpr_provider;
|
---|
7884 | func = *((void(**)(void *, dtrace_id_t, void *))
|
---|
7885 | ((uintptr_t)&prov->dtpv_pops + offs));
|
---|
7886 |
|
---|
7887 | func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
|
---|
7888 | }
|
---|
7889 |
|
---|
7890 | dtrace_interrupt_enable(cookie);
|
---|
7891 | }
|
---|
7892 | #endif /* !VBOX */
|
---|
7893 |
|
---|
7894 | static int
|
---|
7895 | dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
|
---|
7896 | {
|
---|
7897 | dtrace_probekey_t pkey;
|
---|
7898 | uint32_t priv;
|
---|
7899 | uid_t uid;
|
---|
7900 | zoneid_t zoneid;
|
---|
7901 |
|
---|
7902 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
7903 | dtrace_ecb_create_cache = NULL;
|
---|
7904 |
|
---|
7905 | if (desc == NULL) {
|
---|
7906 | /*
|
---|
7907 | * If we're passed a NULL description, we're being asked to
|
---|
7908 | * create an ECB with a NULL probe.
|
---|
7909 | */
|
---|
7910 | (void) dtrace_ecb_create_enable(NULL, enab);
|
---|
7911 | return (0);
|
---|
7912 | }
|
---|
7913 |
|
---|
7914 | dtrace_probekey(desc, &pkey);
|
---|
7915 | dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
|
---|
7916 | &priv, &uid, &zoneid);
|
---|
7917 |
|
---|
7918 | return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
|
---|
7919 | enab));
|
---|
7920 | }
|
---|
7921 |
|
---|
7922 | /*
|
---|
7923 | * DTrace Helper Provider Functions
|
---|
7924 | */
|
---|
7925 | static void
|
---|
7926 | dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
|
---|
7927 | {
|
---|
7928 | attr->dtat_name = DOF_ATTR_NAME(dofattr);
|
---|
7929 | attr->dtat_data = DOF_ATTR_DATA(dofattr);
|
---|
7930 | attr->dtat_class = DOF_ATTR_CLASS(dofattr);
|
---|
7931 | }
|
---|
7932 |
|
---|
7933 | static void
|
---|
7934 | dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
|
---|
7935 | const dof_provider_t *dofprov, char *strtab)
|
---|
7936 | {
|
---|
7937 | hprov->dthpv_provname = strtab + dofprov->dofpv_name;
|
---|
7938 | dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
|
---|
7939 | dofprov->dofpv_provattr);
|
---|
7940 | dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
|
---|
7941 | dofprov->dofpv_modattr);
|
---|
7942 | dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
|
---|
7943 | dofprov->dofpv_funcattr);
|
---|
7944 | dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
|
---|
7945 | dofprov->dofpv_nameattr);
|
---|
7946 | dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
|
---|
7947 | dofprov->dofpv_argsattr);
|
---|
7948 | }
|
---|
7949 |
|
---|
7950 | static void
|
---|
7951 | dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
|
---|
7952 | {
|
---|
7953 | uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
|
---|
7954 | dof_hdr_t *dof = (dof_hdr_t *)daddr;
|
---|
7955 | dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
|
---|
7956 | dof_provider_t *provider;
|
---|
7957 | dof_probe_t *probe;
|
---|
7958 | uint32_t *off, *enoff;
|
---|
7959 | uint8_t *arg;
|
---|
7960 | char *strtab;
|
---|
7961 | uint_t i, nprobes;
|
---|
7962 | dtrace_helper_provdesc_t dhpv;
|
---|
7963 | dtrace_helper_probedesc_t dhpb;
|
---|
7964 | dtrace_meta_t *meta = dtrace_meta_pid;
|
---|
7965 | dtrace_mops_t *mops = &meta->dtm_mops;
|
---|
7966 | void *parg;
|
---|
7967 |
|
---|
7968 | provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
|
---|
7969 | str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
|
---|
7970 | provider->dofpv_strtab * dof->dofh_secsize);
|
---|
7971 | prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
|
---|
7972 | provider->dofpv_probes * dof->dofh_secsize);
|
---|
7973 | arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
|
---|
7974 | provider->dofpv_prargs * dof->dofh_secsize);
|
---|
7975 | off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
|
---|
7976 | provider->dofpv_proffs * dof->dofh_secsize);
|
---|
7977 |
|
---|
7978 | strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
|
---|
7979 | off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
|
---|
7980 | arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
|
---|
7981 | enoff = NULL;
|
---|
7982 |
|
---|
7983 | /*
|
---|
7984 | * See dtrace_helper_provider_validate().
|
---|
7985 | */
|
---|
7986 | if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
|
---|
7987 | provider->dofpv_prenoffs != DOF_SECT_NONE) {
|
---|
7988 | enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
|
---|
7989 | provider->dofpv_prenoffs * dof->dofh_secsize);
|
---|
7990 | enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
|
---|
7991 | }
|
---|
7992 |
|
---|
7993 | nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
|
---|
7994 |
|
---|
7995 | /*
|
---|
7996 | * Create the provider.
|
---|
7997 | */
|
---|
7998 | dtrace_dofprov2hprov(&dhpv, provider, strtab);
|
---|
7999 |
|
---|
8000 | if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL)
|
---|
8001 | return;
|
---|
8002 |
|
---|
8003 | meta->dtm_count++;
|
---|
8004 |
|
---|
8005 | /*
|
---|
8006 | * Create the probes.
|
---|
8007 | */
|
---|
8008 | for (i = 0; i < nprobes; i++) {
|
---|
8009 | probe = (dof_probe_t *)(uintptr_t)(daddr +
|
---|
8010 | prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
|
---|
8011 |
|
---|
8012 | dhpb.dthpb_mod = dhp->dofhp_mod;
|
---|
8013 | dhpb.dthpb_func = strtab + probe->dofpr_func;
|
---|
8014 | dhpb.dthpb_name = strtab + probe->dofpr_name;
|
---|
8015 | dhpb.dthpb_base = probe->dofpr_addr;
|
---|
8016 | dhpb.dthpb_offs = off + probe->dofpr_offidx;
|
---|
8017 | dhpb.dthpb_noffs = probe->dofpr_noffs;
|
---|
8018 | if (enoff != NULL) {
|
---|
8019 | dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx;
|
---|
8020 | dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
|
---|
8021 | } else {
|
---|
8022 | dhpb.dthpb_enoffs = NULL;
|
---|
8023 | dhpb.dthpb_nenoffs = 0;
|
---|
8024 | }
|
---|
8025 | dhpb.dthpb_args = arg + probe->dofpr_argidx;
|
---|
8026 | dhpb.dthpb_nargc = probe->dofpr_nargc;
|
---|
8027 | dhpb.dthpb_xargc = probe->dofpr_xargc;
|
---|
8028 | dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
|
---|
8029 | dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
|
---|
8030 |
|
---|
8031 | mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
|
---|
8032 | }
|
---|
8033 | }
|
---|
8034 |
|
---|
8035 | static void
|
---|
8036 | dtrace_helper_provide(dof_helper_t *dhp, pid_t pid)
|
---|
8037 | {
|
---|
8038 | uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
|
---|
8039 | dof_hdr_t *dof = (dof_hdr_t *)daddr;
|
---|
8040 | VBDTTYPE(uint32_t,int) i;
|
---|
8041 |
|
---|
8042 | ASSERT(MUTEX_HELD(&dtrace_meta_lock));
|
---|
8043 |
|
---|
8044 | for (i = 0; i < dof->dofh_secnum; i++) {
|
---|
8045 | dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
|
---|
8046 | dof->dofh_secoff + i * dof->dofh_secsize);
|
---|
8047 |
|
---|
8048 | if (sec->dofs_type != DOF_SECT_PROVIDER)
|
---|
8049 | continue;
|
---|
8050 |
|
---|
8051 | dtrace_helper_provide_one(dhp, sec, pid);
|
---|
8052 | }
|
---|
8053 |
|
---|
8054 | /*
|
---|
8055 | * We may have just created probes, so we must now rematch against
|
---|
8056 | * any retained enablings. Note that this call will acquire both
|
---|
8057 | * cpu_lock and dtrace_lock; the fact that we are holding
|
---|
8058 | * dtrace_meta_lock now is what defines the ordering with respect to
|
---|
8059 | * these three locks.
|
---|
8060 | */
|
---|
8061 | dtrace_enabling_matchall();
|
---|
8062 | }
|
---|
8063 |
|
---|
8064 | #ifndef VBOX
|
---|
8065 |
|
---|
8066 | static void
|
---|
8067 | dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid)
|
---|
8068 | {
|
---|
8069 | uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
|
---|
8070 | dof_hdr_t *dof = (dof_hdr_t *)daddr;
|
---|
8071 | dof_sec_t *str_sec;
|
---|
8072 | dof_provider_t *provider;
|
---|
8073 | char *strtab;
|
---|
8074 | dtrace_helper_provdesc_t dhpv;
|
---|
8075 | dtrace_meta_t *meta = dtrace_meta_pid;
|
---|
8076 | dtrace_mops_t *mops = &meta->dtm_mops;
|
---|
8077 |
|
---|
8078 | provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
|
---|
8079 | str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
|
---|
8080 | provider->dofpv_strtab * dof->dofh_secsize);
|
---|
8081 |
|
---|
8082 | strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
|
---|
8083 |
|
---|
8084 | /*
|
---|
8085 | * Create the provider.
|
---|
8086 | */
|
---|
8087 | dtrace_dofprov2hprov(&dhpv, provider, strtab);
|
---|
8088 |
|
---|
8089 | mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid);
|
---|
8090 |
|
---|
8091 | meta->dtm_count--;
|
---|
8092 | }
|
---|
8093 |
|
---|
8094 | static void
|
---|
8095 | dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid)
|
---|
8096 | {
|
---|
8097 | uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
|
---|
8098 | dof_hdr_t *dof = (dof_hdr_t *)daddr;
|
---|
8099 | VBDTTYPE(uint32_t,int) i;
|
---|
8100 |
|
---|
8101 | ASSERT(MUTEX_HELD(&dtrace_meta_lock));
|
---|
8102 |
|
---|
8103 | for (i = 0; i < dof->dofh_secnum; i++) {
|
---|
8104 | dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
|
---|
8105 | dof->dofh_secoff + i * dof->dofh_secsize);
|
---|
8106 |
|
---|
8107 | if (sec->dofs_type != DOF_SECT_PROVIDER)
|
---|
8108 | continue;
|
---|
8109 |
|
---|
8110 | dtrace_helper_provider_remove_one(dhp, sec, pid);
|
---|
8111 | }
|
---|
8112 | }
|
---|
8113 |
|
---|
8114 | #endif /* !VBOX */
|
---|
8115 |
|
---|
8116 | /*
|
---|
8117 | * DTrace Meta Provider-to-Framework API Functions
|
---|
8118 | *
|
---|
8119 | * These functions implement the Meta Provider-to-Framework API, as described
|
---|
8120 | * in <sys/dtrace.h>.
|
---|
8121 | */
|
---|
8122 | int
|
---|
8123 | dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
|
---|
8124 | dtrace_meta_provider_id_t *idp)
|
---|
8125 | {
|
---|
8126 | dtrace_meta_t *meta;
|
---|
8127 | dtrace_helpers_t *help, *next;
|
---|
8128 | VBDTTYPE(uint32_t,int) i;
|
---|
8129 |
|
---|
8130 | *idp = DTRACE_METAPROVNONE;
|
---|
8131 |
|
---|
8132 | /*
|
---|
8133 | * We strictly don't need the name, but we hold onto it for
|
---|
8134 | * debuggability. All hail error queues!
|
---|
8135 | */
|
---|
8136 | if (name == NULL) {
|
---|
8137 | cmn_err(CE_WARN, "failed to register meta-provider: "
|
---|
8138 | "invalid name");
|
---|
8139 | return (EINVAL);
|
---|
8140 | }
|
---|
8141 |
|
---|
8142 | if (mops == NULL ||
|
---|
8143 | mops->dtms_create_probe == NULL ||
|
---|
8144 | mops->dtms_provide_pid == NULL ||
|
---|
8145 | mops->dtms_remove_pid == NULL) {
|
---|
8146 | cmn_err(CE_WARN, "failed to register meta-register %s: "
|
---|
8147 | "invalid ops", name);
|
---|
8148 | return (EINVAL);
|
---|
8149 | }
|
---|
8150 |
|
---|
8151 | meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
|
---|
8152 | meta->dtm_mops = *mops;
|
---|
8153 | meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
|
---|
8154 | (void) strcpy(meta->dtm_name, name);
|
---|
8155 | meta->dtm_arg = arg;
|
---|
8156 |
|
---|
8157 | mutex_enter(&dtrace_meta_lock);
|
---|
8158 | mutex_enter(&dtrace_lock);
|
---|
8159 |
|
---|
8160 | if (dtrace_meta_pid != NULL) {
|
---|
8161 | mutex_exit(&dtrace_lock);
|
---|
8162 | mutex_exit(&dtrace_meta_lock);
|
---|
8163 | cmn_err(CE_WARN, "failed to register meta-register %s: "
|
---|
8164 | "user-land meta-provider exists", name);
|
---|
8165 | kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
|
---|
8166 | kmem_free(meta, sizeof (dtrace_meta_t));
|
---|
8167 | return (EINVAL);
|
---|
8168 | }
|
---|
8169 |
|
---|
8170 | dtrace_meta_pid = meta;
|
---|
8171 | *idp = (dtrace_meta_provider_id_t)meta;
|
---|
8172 |
|
---|
8173 | /*
|
---|
8174 | * If there are providers and probes ready to go, pass them
|
---|
8175 | * off to the new meta provider now.
|
---|
8176 | */
|
---|
8177 |
|
---|
8178 | help = dtrace_deferred_pid;
|
---|
8179 | dtrace_deferred_pid = NULL;
|
---|
8180 |
|
---|
8181 | mutex_exit(&dtrace_lock);
|
---|
8182 |
|
---|
8183 | while (help != NULL) {
|
---|
8184 | for (i = 0; i < help->dthps_nprovs; i++) {
|
---|
8185 | dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
|
---|
8186 | help->dthps_pid);
|
---|
8187 | }
|
---|
8188 |
|
---|
8189 | next = help->dthps_next;
|
---|
8190 | help->dthps_next = NULL;
|
---|
8191 | help->dthps_prev = NULL;
|
---|
8192 | help->dthps_deferred = 0;
|
---|
8193 | help = next;
|
---|
8194 | }
|
---|
8195 |
|
---|
8196 | mutex_exit(&dtrace_meta_lock);
|
---|
8197 |
|
---|
8198 | return (0);
|
---|
8199 | }
|
---|
8200 |
|
---|
8201 | int
|
---|
8202 | dtrace_meta_unregister(dtrace_meta_provider_id_t id)
|
---|
8203 | {
|
---|
8204 | dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
|
---|
8205 |
|
---|
8206 | mutex_enter(&dtrace_meta_lock);
|
---|
8207 | mutex_enter(&dtrace_lock);
|
---|
8208 |
|
---|
8209 | if (old == dtrace_meta_pid) {
|
---|
8210 | pp = &dtrace_meta_pid;
|
---|
8211 | } else {
|
---|
8212 | panic("attempt to unregister non-existent "
|
---|
8213 | "dtrace meta-provider %p\n", (void *)old);
|
---|
8214 | #ifdef VBOX
|
---|
8215 | return EINVAL;
|
---|
8216 | #endif
|
---|
8217 | }
|
---|
8218 |
|
---|
8219 | if (old->dtm_count != 0) {
|
---|
8220 | mutex_exit(&dtrace_lock);
|
---|
8221 | mutex_exit(&dtrace_meta_lock);
|
---|
8222 | return (EBUSY);
|
---|
8223 | }
|
---|
8224 |
|
---|
8225 | *pp = NULL;
|
---|
8226 |
|
---|
8227 | mutex_exit(&dtrace_lock);
|
---|
8228 | mutex_exit(&dtrace_meta_lock);
|
---|
8229 |
|
---|
8230 | kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
|
---|
8231 | kmem_free(old, sizeof (dtrace_meta_t));
|
---|
8232 |
|
---|
8233 | return (0);
|
---|
8234 | }
|
---|
8235 |
|
---|
8236 |
|
---|
8237 | /*
|
---|
8238 | * DTrace DIF Object Functions
|
---|
8239 | */
|
---|
8240 | static int
|
---|
8241 | dtrace_difo_err(uint_t pc, const char *format, ...)
|
---|
8242 | {
|
---|
8243 | if (dtrace_err_verbose) {
|
---|
8244 | va_list alist;
|
---|
8245 |
|
---|
8246 | (void) uprintf("dtrace DIF object error: [%u]: ", pc);
|
---|
8247 | va_start(alist, format);
|
---|
8248 | (void) vuprintf(format, alist);
|
---|
8249 | va_end(alist);
|
---|
8250 | }
|
---|
8251 |
|
---|
8252 | #ifdef DTRACE_ERRDEBUG
|
---|
8253 | dtrace_errdebug(format);
|
---|
8254 | #endif
|
---|
8255 | return (1);
|
---|
8256 | }
|
---|
8257 |
|
---|
8258 | /*
|
---|
8259 | * Validate a DTrace DIF object by checking the IR instructions. The following
|
---|
8260 | * rules are currently enforced by dtrace_difo_validate():
|
---|
8261 | *
|
---|
8262 | * 1. Each instruction must have a valid opcode
|
---|
8263 | * 2. Each register, string, variable, or subroutine reference must be valid
|
---|
8264 | * 3. No instruction can modify register %r0 (must be zero)
|
---|
8265 | * 4. All instruction reserved bits must be set to zero
|
---|
8266 | * 5. The last instruction must be a "ret" instruction
|
---|
8267 | * 6. All branch targets must reference a valid instruction _after_ the branch
|
---|
8268 | */
|
---|
8269 | static int
|
---|
8270 | dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
|
---|
8271 | cred_t *cr)
|
---|
8272 | {
|
---|
8273 | #ifndef VBOX
|
---|
8274 | int err = 0, i;
|
---|
8275 | #else
|
---|
8276 | int err = 0;
|
---|
8277 | uint_t i;
|
---|
8278 | #endif
|
---|
8279 | int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
|
---|
8280 | int kcheckload;
|
---|
8281 | uint_t pc;
|
---|
8282 |
|
---|
8283 | kcheckload = cr == NULL ||
|
---|
8284 | (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
|
---|
8285 |
|
---|
8286 | dp->dtdo_destructive = 0;
|
---|
8287 |
|
---|
8288 | for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
|
---|
8289 | dif_instr_t instr = dp->dtdo_buf[pc];
|
---|
8290 |
|
---|
8291 | uint_t r1 = DIF_INSTR_R1(instr);
|
---|
8292 | uint_t r2 = DIF_INSTR_R2(instr);
|
---|
8293 | uint_t rd = DIF_INSTR_RD(instr);
|
---|
8294 | uint_t rs = DIF_INSTR_RS(instr);
|
---|
8295 | uint_t label = DIF_INSTR_LABEL(instr);
|
---|
8296 | uint_t v = DIF_INSTR_VAR(instr);
|
---|
8297 | uint_t subr = DIF_INSTR_SUBR(instr);
|
---|
8298 | uint_t type = DIF_INSTR_TYPE(instr);
|
---|
8299 | uint_t op = DIF_INSTR_OP(instr);
|
---|
8300 |
|
---|
8301 | switch (op) {
|
---|
8302 | case DIF_OP_OR:
|
---|
8303 | case DIF_OP_XOR:
|
---|
8304 | case DIF_OP_AND:
|
---|
8305 | case DIF_OP_SLL:
|
---|
8306 | case DIF_OP_SRL:
|
---|
8307 | case DIF_OP_SRA:
|
---|
8308 | case DIF_OP_SUB:
|
---|
8309 | case DIF_OP_ADD:
|
---|
8310 | case DIF_OP_MUL:
|
---|
8311 | case DIF_OP_SDIV:
|
---|
8312 | case DIF_OP_UDIV:
|
---|
8313 | case DIF_OP_SREM:
|
---|
8314 | case DIF_OP_UREM:
|
---|
8315 | case DIF_OP_COPYS:
|
---|
8316 | if (r1 >= nregs)
|
---|
8317 | err += efunc(pc, "invalid register %u\n", r1);
|
---|
8318 | if (r2 >= nregs)
|
---|
8319 | err += efunc(pc, "invalid register %u\n", r2);
|
---|
8320 | if (rd >= nregs)
|
---|
8321 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8322 | if (rd == 0)
|
---|
8323 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8324 | break;
|
---|
8325 | case DIF_OP_NOT:
|
---|
8326 | case DIF_OP_MOV:
|
---|
8327 | case DIF_OP_ALLOCS:
|
---|
8328 | if (r1 >= nregs)
|
---|
8329 | err += efunc(pc, "invalid register %u\n", r1);
|
---|
8330 | if (r2 != 0)
|
---|
8331 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8332 | if (rd >= nregs)
|
---|
8333 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8334 | if (rd == 0)
|
---|
8335 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8336 | break;
|
---|
8337 | case DIF_OP_LDSB:
|
---|
8338 | case DIF_OP_LDSH:
|
---|
8339 | case DIF_OP_LDSW:
|
---|
8340 | case DIF_OP_LDUB:
|
---|
8341 | case DIF_OP_LDUH:
|
---|
8342 | case DIF_OP_LDUW:
|
---|
8343 | case DIF_OP_LDX:
|
---|
8344 | if (r1 >= nregs)
|
---|
8345 | err += efunc(pc, "invalid register %u\n", r1);
|
---|
8346 | if (r2 != 0)
|
---|
8347 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8348 | if (rd >= nregs)
|
---|
8349 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8350 | if (rd == 0)
|
---|
8351 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8352 | if (kcheckload)
|
---|
8353 | dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
|
---|
8354 | DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
|
---|
8355 | break;
|
---|
8356 | case DIF_OP_RLDSB:
|
---|
8357 | case DIF_OP_RLDSH:
|
---|
8358 | case DIF_OP_RLDSW:
|
---|
8359 | case DIF_OP_RLDUB:
|
---|
8360 | case DIF_OP_RLDUH:
|
---|
8361 | case DIF_OP_RLDUW:
|
---|
8362 | case DIF_OP_RLDX:
|
---|
8363 | if (r1 >= nregs)
|
---|
8364 | err += efunc(pc, "invalid register %u\n", r1);
|
---|
8365 | if (r2 != 0)
|
---|
8366 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8367 | if (rd >= nregs)
|
---|
8368 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8369 | if (rd == 0)
|
---|
8370 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8371 | break;
|
---|
8372 | case DIF_OP_ULDSB:
|
---|
8373 | case DIF_OP_ULDSH:
|
---|
8374 | case DIF_OP_ULDSW:
|
---|
8375 | case DIF_OP_ULDUB:
|
---|
8376 | case DIF_OP_ULDUH:
|
---|
8377 | case DIF_OP_ULDUW:
|
---|
8378 | case DIF_OP_ULDX:
|
---|
8379 | if (r1 >= nregs)
|
---|
8380 | err += efunc(pc, "invalid register %u\n", r1);
|
---|
8381 | if (r2 != 0)
|
---|
8382 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8383 | if (rd >= nregs)
|
---|
8384 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8385 | if (rd == 0)
|
---|
8386 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8387 | break;
|
---|
8388 | case DIF_OP_STB:
|
---|
8389 | case DIF_OP_STH:
|
---|
8390 | case DIF_OP_STW:
|
---|
8391 | case DIF_OP_STX:
|
---|
8392 | if (r1 >= nregs)
|
---|
8393 | err += efunc(pc, "invalid register %u\n", r1);
|
---|
8394 | if (r2 != 0)
|
---|
8395 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8396 | if (rd >= nregs)
|
---|
8397 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8398 | if (rd == 0)
|
---|
8399 | err += efunc(pc, "cannot write to 0 address\n");
|
---|
8400 | break;
|
---|
8401 | case DIF_OP_CMP:
|
---|
8402 | case DIF_OP_SCMP:
|
---|
8403 | if (r1 >= nregs)
|
---|
8404 | err += efunc(pc, "invalid register %u\n", r1);
|
---|
8405 | if (r2 >= nregs)
|
---|
8406 | err += efunc(pc, "invalid register %u\n", r2);
|
---|
8407 | if (rd != 0)
|
---|
8408 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8409 | break;
|
---|
8410 | case DIF_OP_TST:
|
---|
8411 | if (r1 >= nregs)
|
---|
8412 | err += efunc(pc, "invalid register %u\n", r1);
|
---|
8413 | if (r2 != 0 || rd != 0)
|
---|
8414 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8415 | break;
|
---|
8416 | case DIF_OP_BA:
|
---|
8417 | case DIF_OP_BE:
|
---|
8418 | case DIF_OP_BNE:
|
---|
8419 | case DIF_OP_BG:
|
---|
8420 | case DIF_OP_BGU:
|
---|
8421 | case DIF_OP_BGE:
|
---|
8422 | case DIF_OP_BGEU:
|
---|
8423 | case DIF_OP_BL:
|
---|
8424 | case DIF_OP_BLU:
|
---|
8425 | case DIF_OP_BLE:
|
---|
8426 | case DIF_OP_BLEU:
|
---|
8427 | if (label >= dp->dtdo_len) {
|
---|
8428 | err += efunc(pc, "invalid branch target %u\n",
|
---|
8429 | label);
|
---|
8430 | }
|
---|
8431 | if (label <= pc) {
|
---|
8432 | err += efunc(pc, "backward branch to %u\n",
|
---|
8433 | label);
|
---|
8434 | }
|
---|
8435 | break;
|
---|
8436 | case DIF_OP_RET:
|
---|
8437 | if (r1 != 0 || r2 != 0)
|
---|
8438 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8439 | if (rd >= nregs)
|
---|
8440 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8441 | break;
|
---|
8442 | case DIF_OP_NOP:
|
---|
8443 | case DIF_OP_POPTS:
|
---|
8444 | case DIF_OP_FLUSHTS:
|
---|
8445 | if (r1 != 0 || r2 != 0 || rd != 0)
|
---|
8446 | err += efunc(pc, "non-zero reserved bits\n");
|
---|
8447 | break;
|
---|
8448 | case DIF_OP_SETX:
|
---|
8449 | if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
|
---|
8450 | err += efunc(pc, "invalid integer ref %u\n",
|
---|
8451 | DIF_INSTR_INTEGER(instr));
|
---|
8452 | }
|
---|
8453 | if (rd >= nregs)
|
---|
8454 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8455 | if (rd == 0)
|
---|
8456 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8457 | break;
|
---|
8458 | case DIF_OP_SETS:
|
---|
8459 | if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
|
---|
8460 | err += efunc(pc, "invalid string ref %u\n",
|
---|
8461 | DIF_INSTR_STRING(instr));
|
---|
8462 | }
|
---|
8463 | if (rd >= nregs)
|
---|
8464 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8465 | if (rd == 0)
|
---|
8466 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8467 | break;
|
---|
8468 | case DIF_OP_LDGA:
|
---|
8469 | case DIF_OP_LDTA:
|
---|
8470 | if (r1 > DIF_VAR_ARRAY_MAX)
|
---|
8471 | err += efunc(pc, "invalid array %u\n", r1);
|
---|
8472 | if (r2 >= nregs)
|
---|
8473 | err += efunc(pc, "invalid register %u\n", r2);
|
---|
8474 | if (rd >= nregs)
|
---|
8475 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8476 | if (rd == 0)
|
---|
8477 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8478 | break;
|
---|
8479 | case DIF_OP_LDGS:
|
---|
8480 | case DIF_OP_LDTS:
|
---|
8481 | case DIF_OP_LDLS:
|
---|
8482 | case DIF_OP_LDGAA:
|
---|
8483 | case DIF_OP_LDTAA:
|
---|
8484 | if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
|
---|
8485 | err += efunc(pc, "invalid variable %u\n", v);
|
---|
8486 | if (rd >= nregs)
|
---|
8487 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8488 | if (rd == 0)
|
---|
8489 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8490 | break;
|
---|
8491 | case DIF_OP_STGS:
|
---|
8492 | case DIF_OP_STTS:
|
---|
8493 | case DIF_OP_STLS:
|
---|
8494 | case DIF_OP_STGAA:
|
---|
8495 | case DIF_OP_STTAA:
|
---|
8496 | if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
|
---|
8497 | err += efunc(pc, "invalid variable %u\n", v);
|
---|
8498 | if (rs >= nregs)
|
---|
8499 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8500 | break;
|
---|
8501 | case DIF_OP_CALL:
|
---|
8502 | if (subr > DIF_SUBR_MAX)
|
---|
8503 | err += efunc(pc, "invalid subr %u\n", subr);
|
---|
8504 | if (rd >= nregs)
|
---|
8505 | err += efunc(pc, "invalid register %u\n", rd);
|
---|
8506 | if (rd == 0)
|
---|
8507 | err += efunc(pc, "cannot write to %r0\n");
|
---|
8508 |
|
---|
8509 | if (subr == DIF_SUBR_COPYOUT ||
|
---|
8510 | subr == DIF_SUBR_COPYOUTSTR) {
|
---|
8511 | dp->dtdo_destructive = 1;
|
---|
8512 | }
|
---|
8513 | break;
|
---|
8514 | case DIF_OP_PUSHTR:
|
---|
8515 | if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
|
---|
8516 | err += efunc(pc, "invalid ref type %u\n", type);
|
---|
8517 | if (r2 >= nregs)
|
---|
8518 | err += efunc(pc, "invalid register %u\n", r2);
|
---|
8519 | if (rs >= nregs)
|
---|
8520 | err += efunc(pc, "invalid register %u\n", rs);
|
---|
8521 | break;
|
---|
8522 | case DIF_OP_PUSHTV:
|
---|
8523 | if (type != DIF_TYPE_CTF)
|
---|
8524 | err += efunc(pc, "invalid val type %u\n", type);
|
---|
8525 | if (r2 >= nregs)
|
---|
8526 | err += efunc(pc, "invalid register %u\n", r2);
|
---|
8527 | if (rs >= nregs)
|
---|
8528 | err += efunc(pc, "invalid register %u\n", rs);
|
---|
8529 | break;
|
---|
8530 | default:
|
---|
8531 | err += efunc(pc, "invalid opcode %u\n",
|
---|
8532 | DIF_INSTR_OP(instr));
|
---|
8533 | }
|
---|
8534 | }
|
---|
8535 |
|
---|
8536 | if (dp->dtdo_len != 0 &&
|
---|
8537 | DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
|
---|
8538 | err += efunc(dp->dtdo_len - 1,
|
---|
8539 | "expected 'ret' as last DIF instruction\n");
|
---|
8540 | }
|
---|
8541 |
|
---|
8542 | if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) {
|
---|
8543 | /*
|
---|
8544 | * If we're not returning by reference, the size must be either
|
---|
8545 | * 0 or the size of one of the base types.
|
---|
8546 | */
|
---|
8547 | switch (dp->dtdo_rtype.dtdt_size) {
|
---|
8548 | case 0:
|
---|
8549 | case sizeof (uint8_t):
|
---|
8550 | case sizeof (uint16_t):
|
---|
8551 | case sizeof (uint32_t):
|
---|
8552 | case sizeof (uint64_t):
|
---|
8553 | break;
|
---|
8554 |
|
---|
8555 | default:
|
---|
8556 | err += efunc(dp->dtdo_len - 1, "bad return size\n");
|
---|
8557 | }
|
---|
8558 | }
|
---|
8559 |
|
---|
8560 | for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
|
---|
8561 | dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
|
---|
8562 | dtrace_diftype_t *vt, *et;
|
---|
8563 | uint_t id, ndx;
|
---|
8564 |
|
---|
8565 | if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
|
---|
8566 | v->dtdv_scope != DIFV_SCOPE_THREAD &&
|
---|
8567 | v->dtdv_scope != DIFV_SCOPE_LOCAL) {
|
---|
8568 | err += efunc(i, "unrecognized variable scope %d\n",
|
---|
8569 | v->dtdv_scope);
|
---|
8570 | break;
|
---|
8571 | }
|
---|
8572 |
|
---|
8573 | if (v->dtdv_kind != DIFV_KIND_ARRAY &&
|
---|
8574 | v->dtdv_kind != DIFV_KIND_SCALAR) {
|
---|
8575 | err += efunc(i, "unrecognized variable type %d\n",
|
---|
8576 | v->dtdv_kind);
|
---|
8577 | break;
|
---|
8578 | }
|
---|
8579 |
|
---|
8580 | if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
|
---|
8581 | err += efunc(i, "%d exceeds variable id limit\n", id);
|
---|
8582 | break;
|
---|
8583 | }
|
---|
8584 |
|
---|
8585 | if (id < DIF_VAR_OTHER_UBASE)
|
---|
8586 | continue;
|
---|
8587 |
|
---|
8588 | /*
|
---|
8589 | * For user-defined variables, we need to check that this
|
---|
8590 | * definition is identical to any previous definition that we
|
---|
8591 | * encountered.
|
---|
8592 | */
|
---|
8593 | ndx = id - DIF_VAR_OTHER_UBASE;
|
---|
8594 |
|
---|
8595 | switch (v->dtdv_scope) {
|
---|
8596 | case DIFV_SCOPE_GLOBAL:
|
---|
8597 | if (VBDTCAST(int64_t)ndx < vstate->dtvs_nglobals) {
|
---|
8598 | dtrace_statvar_t *svar;
|
---|
8599 |
|
---|
8600 | if ((svar = vstate->dtvs_globals[ndx]) != NULL)
|
---|
8601 | existing = &svar->dtsv_var;
|
---|
8602 | }
|
---|
8603 |
|
---|
8604 | break;
|
---|
8605 |
|
---|
8606 | case DIFV_SCOPE_THREAD:
|
---|
8607 | if (VBDTCAST(int64_t)ndx < vstate->dtvs_ntlocals)
|
---|
8608 | existing = &vstate->dtvs_tlocals[ndx];
|
---|
8609 | break;
|
---|
8610 |
|
---|
8611 | case DIFV_SCOPE_LOCAL:
|
---|
8612 | if (VBDTCAST(int64_t)ndx < vstate->dtvs_nlocals) {
|
---|
8613 | dtrace_statvar_t *svar;
|
---|
8614 |
|
---|
8615 | if ((svar = vstate->dtvs_locals[ndx]) != NULL)
|
---|
8616 | existing = &svar->dtsv_var;
|
---|
8617 | }
|
---|
8618 |
|
---|
8619 | break;
|
---|
8620 | }
|
---|
8621 |
|
---|
8622 | vt = &v->dtdv_type;
|
---|
8623 |
|
---|
8624 | if (vt->dtdt_flags & DIF_TF_BYREF) {
|
---|
8625 | if (vt->dtdt_size == 0) {
|
---|
8626 | err += efunc(i, "zero-sized variable\n");
|
---|
8627 | break;
|
---|
8628 | }
|
---|
8629 |
|
---|
8630 | if (v->dtdv_scope == DIFV_SCOPE_GLOBAL &&
|
---|
8631 | vt->dtdt_size > dtrace_global_maxsize) {
|
---|
8632 | err += efunc(i, "oversized by-ref global\n");
|
---|
8633 | break;
|
---|
8634 | }
|
---|
8635 | }
|
---|
8636 |
|
---|
8637 | if (existing == NULL || existing->dtdv_id == 0)
|
---|
8638 | continue;
|
---|
8639 |
|
---|
8640 | ASSERT(existing->dtdv_id == v->dtdv_id);
|
---|
8641 | ASSERT(existing->dtdv_scope == v->dtdv_scope);
|
---|
8642 |
|
---|
8643 | if (existing->dtdv_kind != v->dtdv_kind)
|
---|
8644 | err += efunc(i, "%d changed variable kind\n", id);
|
---|
8645 |
|
---|
8646 | et = &existing->dtdv_type;
|
---|
8647 |
|
---|
8648 | if (vt->dtdt_flags != et->dtdt_flags) {
|
---|
8649 | err += efunc(i, "%d changed variable type flags\n", id);
|
---|
8650 | break;
|
---|
8651 | }
|
---|
8652 |
|
---|
8653 | if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
|
---|
8654 | err += efunc(i, "%d changed variable type size\n", id);
|
---|
8655 | break;
|
---|
8656 | }
|
---|
8657 | }
|
---|
8658 |
|
---|
8659 | return (err);
|
---|
8660 | }
|
---|
8661 |
|
---|
8662 | #ifndef VBOX
|
---|
8663 | /*
|
---|
8664 | * Validate a DTrace DIF object that it is to be used as a helper. Helpers
|
---|
8665 | * are much more constrained than normal DIFOs. Specifically, they may
|
---|
8666 | * not:
|
---|
8667 | *
|
---|
8668 | * 1. Make calls to subroutines other than copyin(), copyinstr() or
|
---|
8669 | * miscellaneous string routines
|
---|
8670 | * 2. Access DTrace variables other than the args[] array, and the
|
---|
8671 | * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
|
---|
8672 | * 3. Have thread-local variables.
|
---|
8673 | * 4. Have dynamic variables.
|
---|
8674 | */
|
---|
8675 | static int
|
---|
8676 | dtrace_difo_validate_helper(dtrace_difo_t *dp)
|
---|
8677 | {
|
---|
8678 | int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
|
---|
8679 | int err = 0;
|
---|
8680 | uint_t pc;
|
---|
8681 |
|
---|
8682 | for (pc = 0; pc < dp->dtdo_len; pc++) {
|
---|
8683 | dif_instr_t instr = dp->dtdo_buf[pc];
|
---|
8684 |
|
---|
8685 | uint_t v = DIF_INSTR_VAR(instr);
|
---|
8686 | uint_t subr = DIF_INSTR_SUBR(instr);
|
---|
8687 | uint_t op = DIF_INSTR_OP(instr);
|
---|
8688 |
|
---|
8689 | switch (op) {
|
---|
8690 | case DIF_OP_OR:
|
---|
8691 | case DIF_OP_XOR:
|
---|
8692 | case DIF_OP_AND:
|
---|
8693 | case DIF_OP_SLL:
|
---|
8694 | case DIF_OP_SRL:
|
---|
8695 | case DIF_OP_SRA:
|
---|
8696 | case DIF_OP_SUB:
|
---|
8697 | case DIF_OP_ADD:
|
---|
8698 | case DIF_OP_MUL:
|
---|
8699 | case DIF_OP_SDIV:
|
---|
8700 | case DIF_OP_UDIV:
|
---|
8701 | case DIF_OP_SREM:
|
---|
8702 | case DIF_OP_UREM:
|
---|
8703 | case DIF_OP_COPYS:
|
---|
8704 | case DIF_OP_NOT:
|
---|
8705 | case DIF_OP_MOV:
|
---|
8706 | case DIF_OP_RLDSB:
|
---|
8707 | case DIF_OP_RLDSH:
|
---|
8708 | case DIF_OP_RLDSW:
|
---|
8709 | case DIF_OP_RLDUB:
|
---|
8710 | case DIF_OP_RLDUH:
|
---|
8711 | case DIF_OP_RLDUW:
|
---|
8712 | case DIF_OP_RLDX:
|
---|
8713 | case DIF_OP_ULDSB:
|
---|
8714 | case DIF_OP_ULDSH:
|
---|
8715 | case DIF_OP_ULDSW:
|
---|
8716 | case DIF_OP_ULDUB:
|
---|
8717 | case DIF_OP_ULDUH:
|
---|
8718 | case DIF_OP_ULDUW:
|
---|
8719 | case DIF_OP_ULDX:
|
---|
8720 | case DIF_OP_STB:
|
---|
8721 | case DIF_OP_STH:
|
---|
8722 | case DIF_OP_STW:
|
---|
8723 | case DIF_OP_STX:
|
---|
8724 | case DIF_OP_ALLOCS:
|
---|
8725 | case DIF_OP_CMP:
|
---|
8726 | case DIF_OP_SCMP:
|
---|
8727 | case DIF_OP_TST:
|
---|
8728 | case DIF_OP_BA:
|
---|
8729 | case DIF_OP_BE:
|
---|
8730 | case DIF_OP_BNE:
|
---|
8731 | case DIF_OP_BG:
|
---|
8732 | case DIF_OP_BGU:
|
---|
8733 | case DIF_OP_BGE:
|
---|
8734 | case DIF_OP_BGEU:
|
---|
8735 | case DIF_OP_BL:
|
---|
8736 | case DIF_OP_BLU:
|
---|
8737 | case DIF_OP_BLE:
|
---|
8738 | case DIF_OP_BLEU:
|
---|
8739 | case DIF_OP_RET:
|
---|
8740 | case DIF_OP_NOP:
|
---|
8741 | case DIF_OP_POPTS:
|
---|
8742 | case DIF_OP_FLUSHTS:
|
---|
8743 | case DIF_OP_SETX:
|
---|
8744 | case DIF_OP_SETS:
|
---|
8745 | case DIF_OP_LDGA:
|
---|
8746 | case DIF_OP_LDLS:
|
---|
8747 | case DIF_OP_STGS:
|
---|
8748 | case DIF_OP_STLS:
|
---|
8749 | case DIF_OP_PUSHTR:
|
---|
8750 | case DIF_OP_PUSHTV:
|
---|
8751 | break;
|
---|
8752 |
|
---|
8753 | case DIF_OP_LDGS:
|
---|
8754 | if (v >= DIF_VAR_OTHER_UBASE)
|
---|
8755 | break;
|
---|
8756 |
|
---|
8757 | if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
|
---|
8758 | break;
|
---|
8759 |
|
---|
8760 | if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
|
---|
8761 | v == DIF_VAR_PPID || v == DIF_VAR_TID ||
|
---|
8762 | v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
|
---|
8763 | v == DIF_VAR_UID || v == DIF_VAR_GID)
|
---|
8764 | break;
|
---|
8765 |
|
---|
8766 | err += efunc(pc, "illegal variable %u\n", v);
|
---|
8767 | break;
|
---|
8768 |
|
---|
8769 | case DIF_OP_LDTA:
|
---|
8770 | case DIF_OP_LDTS:
|
---|
8771 | case DIF_OP_LDGAA:
|
---|
8772 | case DIF_OP_LDTAA:
|
---|
8773 | err += efunc(pc, "illegal dynamic variable load\n");
|
---|
8774 | break;
|
---|
8775 |
|
---|
8776 | case DIF_OP_STTS:
|
---|
8777 | case DIF_OP_STGAA:
|
---|
8778 | case DIF_OP_STTAA:
|
---|
8779 | err += efunc(pc, "illegal dynamic variable store\n");
|
---|
8780 | break;
|
---|
8781 |
|
---|
8782 | case DIF_OP_CALL:
|
---|
8783 | if (subr == DIF_SUBR_ALLOCA ||
|
---|
8784 | subr == DIF_SUBR_BCOPY ||
|
---|
8785 | subr == DIF_SUBR_COPYIN ||
|
---|
8786 | subr == DIF_SUBR_COPYINTO ||
|
---|
8787 | subr == DIF_SUBR_COPYINSTR ||
|
---|
8788 | subr == DIF_SUBR_INDEX ||
|
---|
8789 | subr == DIF_SUBR_INET_NTOA ||
|
---|
8790 | subr == DIF_SUBR_INET_NTOA6 ||
|
---|
8791 | subr == DIF_SUBR_INET_NTOP ||
|
---|
8792 | subr == DIF_SUBR_LLTOSTR ||
|
---|
8793 | subr == DIF_SUBR_RINDEX ||
|
---|
8794 | subr == DIF_SUBR_STRCHR ||
|
---|
8795 | subr == DIF_SUBR_STRJOIN ||
|
---|
8796 | subr == DIF_SUBR_STRRCHR ||
|
---|
8797 | subr == DIF_SUBR_STRSTR ||
|
---|
8798 | subr == DIF_SUBR_HTONS ||
|
---|
8799 | subr == DIF_SUBR_HTONL ||
|
---|
8800 | subr == DIF_SUBR_HTONLL ||
|
---|
8801 | subr == DIF_SUBR_NTOHS ||
|
---|
8802 | subr == DIF_SUBR_NTOHL ||
|
---|
8803 | subr == DIF_SUBR_NTOHLL)
|
---|
8804 | break;
|
---|
8805 |
|
---|
8806 | err += efunc(pc, "invalid subr %u\n", subr);
|
---|
8807 | break;
|
---|
8808 |
|
---|
8809 | default:
|
---|
8810 | err += efunc(pc, "invalid opcode %u\n",
|
---|
8811 | DIF_INSTR_OP(instr));
|
---|
8812 | }
|
---|
8813 | }
|
---|
8814 |
|
---|
8815 | return (err);
|
---|
8816 | }
|
---|
8817 | #endif /* !VBOX */
|
---|
8818 |
|
---|
8819 | /*
|
---|
8820 | * Returns 1 if the expression in the DIF object can be cached on a per-thread
|
---|
8821 | * basis; 0 if not.
|
---|
8822 | */
|
---|
8823 | static int
|
---|
8824 | dtrace_difo_cacheable(dtrace_difo_t *dp)
|
---|
8825 | {
|
---|
8826 | VBDTTYPE(uint_t,int) i;
|
---|
8827 |
|
---|
8828 | if (dp == NULL)
|
---|
8829 | return (0);
|
---|
8830 |
|
---|
8831 | for (i = 0; i < dp->dtdo_varlen; i++) {
|
---|
8832 | dtrace_difv_t *v = &dp->dtdo_vartab[i];
|
---|
8833 |
|
---|
8834 | if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
|
---|
8835 | continue;
|
---|
8836 |
|
---|
8837 | switch (v->dtdv_id) {
|
---|
8838 | case DIF_VAR_CURTHREAD:
|
---|
8839 | case DIF_VAR_PID:
|
---|
8840 | case DIF_VAR_TID:
|
---|
8841 | case DIF_VAR_EXECNAME:
|
---|
8842 | case DIF_VAR_ZONENAME:
|
---|
8843 | break;
|
---|
8844 |
|
---|
8845 | default:
|
---|
8846 | return (0);
|
---|
8847 | }
|
---|
8848 | }
|
---|
8849 |
|
---|
8850 | /*
|
---|
8851 | * This DIF object may be cacheable. Now we need to look for any
|
---|
8852 | * array loading instructions, any memory loading instructions, or
|
---|
8853 | * any stores to thread-local variables.
|
---|
8854 | */
|
---|
8855 | for (i = 0; i < dp->dtdo_len; i++) {
|
---|
8856 | uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
|
---|
8857 |
|
---|
8858 | if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
|
---|
8859 | (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
|
---|
8860 | (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
|
---|
8861 | op == DIF_OP_LDGA || op == DIF_OP_STTS)
|
---|
8862 | return (0);
|
---|
8863 | }
|
---|
8864 |
|
---|
8865 | return (1);
|
---|
8866 | }
|
---|
8867 |
|
---|
8868 | static void
|
---|
8869 | dtrace_difo_hold(dtrace_difo_t *dp)
|
---|
8870 | {
|
---|
8871 | #ifndef VBOX
|
---|
8872 | VBDTTYPE(uint_t,int) i;
|
---|
8873 | #endif
|
---|
8874 |
|
---|
8875 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
8876 |
|
---|
8877 | dp->dtdo_refcnt++;
|
---|
8878 | ASSERT(dp->dtdo_refcnt != 0);
|
---|
8879 |
|
---|
8880 | #ifndef VBOX
|
---|
8881 | /*
|
---|
8882 | * We need to check this DIF object for references to the variable
|
---|
8883 | * DIF_VAR_VTIMESTAMP.
|
---|
8884 | */
|
---|
8885 | for (i = 0; i < dp->dtdo_varlen; i++) {
|
---|
8886 | dtrace_difv_t *v = &dp->dtdo_vartab[i];
|
---|
8887 |
|
---|
8888 | if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
|
---|
8889 | continue;
|
---|
8890 |
|
---|
8891 | if (dtrace_vtime_references++ == 0)
|
---|
8892 | dtrace_vtime_enable();
|
---|
8893 | }
|
---|
8894 | #endif
|
---|
8895 | }
|
---|
8896 |
|
---|
8897 | /*
|
---|
8898 | * This routine calculates the dynamic variable chunksize for a given DIF
|
---|
8899 | * object. The calculation is not fool-proof, and can probably be tricked by
|
---|
8900 | * malicious DIF -- but it works for all compiler-generated DIF. Because this
|
---|
8901 | * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
|
---|
8902 | * if a dynamic variable size exceeds the chunksize.
|
---|
8903 | */
|
---|
8904 | static void
|
---|
8905 | dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
|
---|
8906 | {
|
---|
8907 | uint64_t sval VBDTGCC(0);
|
---|
8908 | dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
|
---|
8909 | const dif_instr_t *text = dp->dtdo_buf;
|
---|
8910 | uint_t pc, srd = 0;
|
---|
8911 | uint_t ttop = 0;
|
---|
8912 | size_t size, ksize;
|
---|
8913 | uint_t id, i;
|
---|
8914 |
|
---|
8915 | for (pc = 0; pc < dp->dtdo_len; pc++) {
|
---|
8916 | dif_instr_t instr = text[pc];
|
---|
8917 | uint_t op = DIF_INSTR_OP(instr);
|
---|
8918 | uint_t rd = DIF_INSTR_RD(instr);
|
---|
8919 | uint_t r1 = DIF_INSTR_R1(instr);
|
---|
8920 | uint_t nkeys = 0;
|
---|
8921 | uchar_t scope VBDTGCC(0);
|
---|
8922 |
|
---|
8923 | dtrace_key_t *key = tupregs;
|
---|
8924 |
|
---|
8925 | switch (op) {
|
---|
8926 | case DIF_OP_SETX:
|
---|
8927 | sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
|
---|
8928 | srd = rd;
|
---|
8929 | continue;
|
---|
8930 |
|
---|
8931 | case DIF_OP_STTS:
|
---|
8932 | key = &tupregs[DIF_DTR_NREGS];
|
---|
8933 | key[0].dttk_size = 0;
|
---|
8934 | key[1].dttk_size = 0;
|
---|
8935 | nkeys = 2;
|
---|
8936 | scope = DIFV_SCOPE_THREAD;
|
---|
8937 | break;
|
---|
8938 |
|
---|
8939 | case DIF_OP_STGAA:
|
---|
8940 | case DIF_OP_STTAA:
|
---|
8941 | nkeys = ttop;
|
---|
8942 |
|
---|
8943 | if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
|
---|
8944 | key[nkeys++].dttk_size = 0;
|
---|
8945 |
|
---|
8946 | key[nkeys++].dttk_size = 0;
|
---|
8947 |
|
---|
8948 | if (op == DIF_OP_STTAA) {
|
---|
8949 | scope = DIFV_SCOPE_THREAD;
|
---|
8950 | } else {
|
---|
8951 | scope = DIFV_SCOPE_GLOBAL;
|
---|
8952 | }
|
---|
8953 |
|
---|
8954 | break;
|
---|
8955 |
|
---|
8956 | case DIF_OP_PUSHTR:
|
---|
8957 | if (ttop == DIF_DTR_NREGS)
|
---|
8958 | return;
|
---|
8959 |
|
---|
8960 | if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
|
---|
8961 | /*
|
---|
8962 | * If the register for the size of the "pushtr"
|
---|
8963 | * is %r0 (or the value is 0) and the type is
|
---|
8964 | * a string, we'll use the system-wide default
|
---|
8965 | * string size.
|
---|
8966 | */
|
---|
8967 | tupregs[ttop++].dttk_size =
|
---|
8968 | dtrace_strsize_default;
|
---|
8969 | } else {
|
---|
8970 | if (srd == 0)
|
---|
8971 | return;
|
---|
8972 |
|
---|
8973 | tupregs[ttop++].dttk_size = sval;
|
---|
8974 | }
|
---|
8975 |
|
---|
8976 | break;
|
---|
8977 |
|
---|
8978 | case DIF_OP_PUSHTV:
|
---|
8979 | if (ttop == DIF_DTR_NREGS)
|
---|
8980 | return;
|
---|
8981 |
|
---|
8982 | tupregs[ttop++].dttk_size = 0;
|
---|
8983 | break;
|
---|
8984 |
|
---|
8985 | case DIF_OP_FLUSHTS:
|
---|
8986 | ttop = 0;
|
---|
8987 | break;
|
---|
8988 |
|
---|
8989 | case DIF_OP_POPTS:
|
---|
8990 | if (ttop != 0)
|
---|
8991 | ttop--;
|
---|
8992 | break;
|
---|
8993 | }
|
---|
8994 |
|
---|
8995 | sval = 0;
|
---|
8996 | srd = 0;
|
---|
8997 |
|
---|
8998 | if (nkeys == 0)
|
---|
8999 | continue;
|
---|
9000 |
|
---|
9001 | /*
|
---|
9002 | * We have a dynamic variable allocation; calculate its size.
|
---|
9003 | */
|
---|
9004 | for (ksize = 0, i = 0; i < nkeys; i++)
|
---|
9005 | ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
|
---|
9006 |
|
---|
9007 | size = sizeof (dtrace_dynvar_t);
|
---|
9008 | size += sizeof (dtrace_key_t) * (nkeys - 1);
|
---|
9009 | size += ksize;
|
---|
9010 |
|
---|
9011 | /*
|
---|
9012 | * Now we need to determine the size of the stored data.
|
---|
9013 | */
|
---|
9014 | id = DIF_INSTR_VAR(instr);
|
---|
9015 |
|
---|
9016 | for (i = 0; i < dp->dtdo_varlen; i++) {
|
---|
9017 | dtrace_difv_t *v = &dp->dtdo_vartab[i];
|
---|
9018 |
|
---|
9019 | if (v->dtdv_id == id && v->dtdv_scope == scope) {
|
---|
9020 | size += v->dtdv_type.dtdt_size;
|
---|
9021 | break;
|
---|
9022 | }
|
---|
9023 | }
|
---|
9024 |
|
---|
9025 | if (i == dp->dtdo_varlen)
|
---|
9026 | return;
|
---|
9027 |
|
---|
9028 | /*
|
---|
9029 | * We have the size. If this is larger than the chunk size
|
---|
9030 | * for our dynamic variable state, reset the chunk size.
|
---|
9031 | */
|
---|
9032 | size = P2ROUNDUP(size, sizeof (uint64_t));
|
---|
9033 |
|
---|
9034 | if (size > vstate->dtvs_dynvars.dtds_chunksize)
|
---|
9035 | vstate->dtvs_dynvars.dtds_chunksize = size;
|
---|
9036 | }
|
---|
9037 | }
|
---|
9038 |
|
---|
9039 | static void
|
---|
9040 | dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
|
---|
9041 | {
|
---|
9042 | #ifndef VBOX
|
---|
9043 | int i, oldsvars, osz, nsz, otlocals, ntlocals;
|
---|
9044 | #else
|
---|
9045 | int oldsvars, osz, nsz, otlocals, ntlocals;
|
---|
9046 | uint_t i;
|
---|
9047 | #endif
|
---|
9048 | uint_t id;
|
---|
9049 |
|
---|
9050 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
9051 | ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
|
---|
9052 |
|
---|
9053 | for (i = 0; i < dp->dtdo_varlen; i++) {
|
---|
9054 | dtrace_difv_t *v = &dp->dtdo_vartab[i];
|
---|
9055 | dtrace_statvar_t *svar, ***svarp;
|
---|
9056 | size_t dsize = 0;
|
---|
9057 | uint8_t scope = v->dtdv_scope;
|
---|
9058 | int *np;
|
---|
9059 |
|
---|
9060 | if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
|
---|
9061 | continue;
|
---|
9062 |
|
---|
9063 | id -= DIF_VAR_OTHER_UBASE;
|
---|
9064 |
|
---|
9065 | switch (scope) {
|
---|
9066 | case DIFV_SCOPE_THREAD:
|
---|
9067 | while (VBDTCAST(int64_t)id >= (otlocals = vstate->dtvs_ntlocals)) {
|
---|
9068 | dtrace_difv_t *tlocals;
|
---|
9069 |
|
---|
9070 | if ((ntlocals = (otlocals << 1)) == 0)
|
---|
9071 | ntlocals = 1;
|
---|
9072 |
|
---|
9073 | osz = otlocals * sizeof (dtrace_difv_t);
|
---|
9074 | nsz = ntlocals * sizeof (dtrace_difv_t);
|
---|
9075 |
|
---|
9076 | tlocals = kmem_zalloc(nsz, KM_SLEEP);
|
---|
9077 |
|
---|
9078 | if (osz != 0) {
|
---|
9079 | bcopy(vstate->dtvs_tlocals,
|
---|
9080 | tlocals, osz);
|
---|
9081 | kmem_free(vstate->dtvs_tlocals, osz);
|
---|
9082 | }
|
---|
9083 |
|
---|
9084 | vstate->dtvs_tlocals = tlocals;
|
---|
9085 | vstate->dtvs_ntlocals = ntlocals;
|
---|
9086 | }
|
---|
9087 |
|
---|
9088 | vstate->dtvs_tlocals[id] = *v;
|
---|
9089 | continue;
|
---|
9090 |
|
---|
9091 | case DIFV_SCOPE_LOCAL:
|
---|
9092 | np = &vstate->dtvs_nlocals;
|
---|
9093 | svarp = &vstate->dtvs_locals;
|
---|
9094 |
|
---|
9095 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
|
---|
9096 | dsize = NCPU * (v->dtdv_type.dtdt_size +
|
---|
9097 | sizeof (uint64_t));
|
---|
9098 | else
|
---|
9099 | dsize = NCPU * sizeof (uint64_t);
|
---|
9100 |
|
---|
9101 | break;
|
---|
9102 |
|
---|
9103 | case DIFV_SCOPE_GLOBAL:
|
---|
9104 | np = &vstate->dtvs_nglobals;
|
---|
9105 | svarp = &vstate->dtvs_globals;
|
---|
9106 |
|
---|
9107 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
|
---|
9108 | dsize = v->dtdv_type.dtdt_size +
|
---|
9109 | sizeof (uint64_t);
|
---|
9110 |
|
---|
9111 | break;
|
---|
9112 |
|
---|
9113 | default:
|
---|
9114 | #ifndef VBOX
|
---|
9115 | ASSERT(0);
|
---|
9116 | #else
|
---|
9117 | AssertFatalMsgFailed(("%d\n", scope));
|
---|
9118 | #endif
|
---|
9119 | }
|
---|
9120 |
|
---|
9121 | while (VBDTCAST(int64_t)id >= (oldsvars = *np)) {
|
---|
9122 | dtrace_statvar_t **statics;
|
---|
9123 | int newsvars, oldsize, newsize;
|
---|
9124 |
|
---|
9125 | if ((newsvars = (oldsvars << 1)) == 0)
|
---|
9126 | newsvars = 1;
|
---|
9127 |
|
---|
9128 | oldsize = oldsvars * sizeof (dtrace_statvar_t *);
|
---|
9129 | newsize = newsvars * sizeof (dtrace_statvar_t *);
|
---|
9130 |
|
---|
9131 | statics = kmem_zalloc(newsize, KM_SLEEP);
|
---|
9132 |
|
---|
9133 | if (oldsize != 0) {
|
---|
9134 | bcopy(*svarp, statics, oldsize);
|
---|
9135 | kmem_free(*svarp, oldsize);
|
---|
9136 | }
|
---|
9137 |
|
---|
9138 | *svarp = statics;
|
---|
9139 | *np = newsvars;
|
---|
9140 | }
|
---|
9141 |
|
---|
9142 | if ((svar = (*svarp)[id]) == NULL) {
|
---|
9143 | svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
|
---|
9144 | svar->dtsv_var = *v;
|
---|
9145 |
|
---|
9146 | if ((svar->dtsv_size = dsize) != 0) {
|
---|
9147 | svar->dtsv_data = (uint64_t)(uintptr_t)
|
---|
9148 | kmem_zalloc(dsize, KM_SLEEP);
|
---|
9149 | }
|
---|
9150 |
|
---|
9151 | (*svarp)[id] = svar;
|
---|
9152 | }
|
---|
9153 |
|
---|
9154 | svar->dtsv_refcnt++;
|
---|
9155 | }
|
---|
9156 |
|
---|
9157 | dtrace_difo_chunksize(dp, vstate);
|
---|
9158 | dtrace_difo_hold(dp);
|
---|
9159 | }
|
---|
9160 |
|
---|
9161 | #ifndef VBOX
|
---|
9162 | static dtrace_difo_t *
|
---|
9163 | dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
|
---|
9164 | {
|
---|
9165 | dtrace_difo_t *new;
|
---|
9166 | size_t sz;
|
---|
9167 |
|
---|
9168 | ASSERT(dp->dtdo_buf != NULL);
|
---|
9169 | ASSERT(dp->dtdo_refcnt != 0);
|
---|
9170 |
|
---|
9171 | new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
|
---|
9172 |
|
---|
9173 | ASSERT(dp->dtdo_buf != NULL);
|
---|
9174 | sz = dp->dtdo_len * sizeof (dif_instr_t);
|
---|
9175 | new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
|
---|
9176 | bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
|
---|
9177 | new->dtdo_len = dp->dtdo_len;
|
---|
9178 |
|
---|
9179 | if (dp->dtdo_strtab != NULL) {
|
---|
9180 | ASSERT(dp->dtdo_strlen != 0);
|
---|
9181 | new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
|
---|
9182 | bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
|
---|
9183 | new->dtdo_strlen = dp->dtdo_strlen;
|
---|
9184 | }
|
---|
9185 |
|
---|
9186 | if (dp->dtdo_inttab != NULL) {
|
---|
9187 | ASSERT(dp->dtdo_intlen != 0);
|
---|
9188 | sz = dp->dtdo_intlen * sizeof (uint64_t);
|
---|
9189 | new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
|
---|
9190 | bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
|
---|
9191 | new->dtdo_intlen = dp->dtdo_intlen;
|
---|
9192 | }
|
---|
9193 |
|
---|
9194 | if (dp->dtdo_vartab != NULL) {
|
---|
9195 | ASSERT(dp->dtdo_varlen != 0);
|
---|
9196 | sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
|
---|
9197 | new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
|
---|
9198 | bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
|
---|
9199 | new->dtdo_varlen = dp->dtdo_varlen;
|
---|
9200 | }
|
---|
9201 |
|
---|
9202 | dtrace_difo_init(new, vstate);
|
---|
9203 | return (new);
|
---|
9204 | }
|
---|
9205 | #endif
|
---|
9206 |
|
---|
9207 | static void
|
---|
9208 | dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
|
---|
9209 | {
|
---|
9210 | VBDTTYPE(uint_t,int) i;
|
---|
9211 |
|
---|
9212 | ASSERT(dp->dtdo_refcnt == 0);
|
---|
9213 |
|
---|
9214 | for (i = 0; i < dp->dtdo_varlen; i++) {
|
---|
9215 | dtrace_difv_t *v = &dp->dtdo_vartab[i];
|
---|
9216 | dtrace_statvar_t *svar, **svarp;
|
---|
9217 | uint_t id;
|
---|
9218 | uint8_t scope = v->dtdv_scope;
|
---|
9219 | int *np;
|
---|
9220 |
|
---|
9221 | switch (scope) {
|
---|
9222 | case DIFV_SCOPE_THREAD:
|
---|
9223 | continue;
|
---|
9224 |
|
---|
9225 | case DIFV_SCOPE_LOCAL:
|
---|
9226 | np = &vstate->dtvs_nlocals;
|
---|
9227 | svarp = vstate->dtvs_locals;
|
---|
9228 | break;
|
---|
9229 |
|
---|
9230 | case DIFV_SCOPE_GLOBAL:
|
---|
9231 | np = &vstate->dtvs_nglobals;
|
---|
9232 | svarp = vstate->dtvs_globals;
|
---|
9233 | break;
|
---|
9234 |
|
---|
9235 | default:
|
---|
9236 | #ifndef VBOX
|
---|
9237 | ASSERT(0);
|
---|
9238 | #else
|
---|
9239 | AssertFatalMsgFailed(("%d\n", scope));
|
---|
9240 | #endif
|
---|
9241 | }
|
---|
9242 |
|
---|
9243 | if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
|
---|
9244 | continue;
|
---|
9245 |
|
---|
9246 | id -= DIF_VAR_OTHER_UBASE;
|
---|
9247 | ASSERT(VBDTCAST(int64_t)id < *np);
|
---|
9248 |
|
---|
9249 | svar = svarp[id];
|
---|
9250 | ASSERT(svar != NULL);
|
---|
9251 | ASSERT(svar->dtsv_refcnt > 0);
|
---|
9252 |
|
---|
9253 | if (--svar->dtsv_refcnt > 0)
|
---|
9254 | continue;
|
---|
9255 |
|
---|
9256 | if (svar->dtsv_size != 0) {
|
---|
9257 | ASSERT(svar->dtsv_data != NULL);
|
---|
9258 | kmem_free((void *)(uintptr_t)svar->dtsv_data,
|
---|
9259 | svar->dtsv_size);
|
---|
9260 | }
|
---|
9261 |
|
---|
9262 | kmem_free(svar, sizeof (dtrace_statvar_t));
|
---|
9263 | svarp[id] = NULL;
|
---|
9264 | }
|
---|
9265 |
|
---|
9266 | kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
|
---|
9267 | kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
|
---|
9268 | kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
|
---|
9269 | kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
|
---|
9270 |
|
---|
9271 | kmem_free(dp, sizeof (dtrace_difo_t));
|
---|
9272 | }
|
---|
9273 |
|
---|
9274 | static void
|
---|
9275 | dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
|
---|
9276 | {
|
---|
9277 | #ifndef VBOX
|
---|
9278 | VBDTTYPE(uint_t,int) i;
|
---|
9279 | #endif
|
---|
9280 |
|
---|
9281 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
9282 | ASSERT(dp->dtdo_refcnt != 0);
|
---|
9283 |
|
---|
9284 | #ifndef VBOX
|
---|
9285 | for (i = 0; i < dp->dtdo_varlen; i++) {
|
---|
9286 | dtrace_difv_t *v = &dp->dtdo_vartab[i];
|
---|
9287 |
|
---|
9288 | if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
|
---|
9289 | continue;
|
---|
9290 |
|
---|
9291 | ASSERT(dtrace_vtime_references > 0);
|
---|
9292 | if (--dtrace_vtime_references == 0)
|
---|
9293 | dtrace_vtime_disable();
|
---|
9294 | }
|
---|
9295 | #endif
|
---|
9296 |
|
---|
9297 | if (--dp->dtdo_refcnt == 0)
|
---|
9298 | dtrace_difo_destroy(dp, vstate);
|
---|
9299 | }
|
---|
9300 |
|
---|
9301 | /*
|
---|
9302 | * DTrace Format Functions
|
---|
9303 | */
|
---|
9304 | static uint16_t
|
---|
9305 | dtrace_format_add(dtrace_state_t *state, char *str)
|
---|
9306 | {
|
---|
9307 | char *fmt, **new;
|
---|
9308 | uint16_t ndx, len = VBDTCAST(uint16_t)strlen(str) + 1;
|
---|
9309 |
|
---|
9310 | fmt = kmem_zalloc(len, KM_SLEEP);
|
---|
9311 | bcopy(str, fmt, len);
|
---|
9312 |
|
---|
9313 | for (ndx = 0; ndx < state->dts_nformats; ndx++) {
|
---|
9314 | if (state->dts_formats[ndx] == NULL) {
|
---|
9315 | state->dts_formats[ndx] = fmt;
|
---|
9316 | return (ndx + 1);
|
---|
9317 | }
|
---|
9318 | }
|
---|
9319 |
|
---|
9320 | if (state->dts_nformats == USHRT_MAX) {
|
---|
9321 | /*
|
---|
9322 | * This is only likely if a denial-of-service attack is being
|
---|
9323 | * attempted. As such, it's okay to fail silently here.
|
---|
9324 | */
|
---|
9325 | kmem_free(fmt, len);
|
---|
9326 | return (0);
|
---|
9327 | }
|
---|
9328 |
|
---|
9329 | /*
|
---|
9330 | * For simplicity, we always resize the formats array to be exactly the
|
---|
9331 | * number of formats.
|
---|
9332 | */
|
---|
9333 | ndx = state->dts_nformats++;
|
---|
9334 | new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
|
---|
9335 |
|
---|
9336 | if (state->dts_formats != NULL) {
|
---|
9337 | ASSERT(ndx != 0);
|
---|
9338 | bcopy(state->dts_formats, new, ndx * sizeof (char *));
|
---|
9339 | kmem_free(state->dts_formats, ndx * sizeof (char *));
|
---|
9340 | }
|
---|
9341 |
|
---|
9342 | state->dts_formats = new;
|
---|
9343 | state->dts_formats[ndx] = fmt;
|
---|
9344 |
|
---|
9345 | return (ndx + 1);
|
---|
9346 | }
|
---|
9347 |
|
---|
9348 | static void
|
---|
9349 | dtrace_format_remove(dtrace_state_t *state, uint16_t format)
|
---|
9350 | {
|
---|
9351 | char *fmt;
|
---|
9352 |
|
---|
9353 | ASSERT(state->dts_formats != NULL);
|
---|
9354 | ASSERT(format <= state->dts_nformats);
|
---|
9355 | ASSERT(state->dts_formats[format - 1] != NULL);
|
---|
9356 |
|
---|
9357 | fmt = state->dts_formats[format - 1];
|
---|
9358 | kmem_free(fmt, strlen(fmt) + 1);
|
---|
9359 | state->dts_formats[format - 1] = NULL;
|
---|
9360 | }
|
---|
9361 |
|
---|
9362 | static void
|
---|
9363 | dtrace_format_destroy(dtrace_state_t *state)
|
---|
9364 | {
|
---|
9365 | int i;
|
---|
9366 |
|
---|
9367 | if (state->dts_nformats == 0) {
|
---|
9368 | ASSERT(state->dts_formats == NULL);
|
---|
9369 | return;
|
---|
9370 | }
|
---|
9371 |
|
---|
9372 | ASSERT(state->dts_formats != NULL);
|
---|
9373 |
|
---|
9374 | for (i = 0; i < state->dts_nformats; i++) {
|
---|
9375 | char *fmt = state->dts_formats[i];
|
---|
9376 |
|
---|
9377 | if (fmt == NULL)
|
---|
9378 | continue;
|
---|
9379 |
|
---|
9380 | kmem_free(fmt, strlen(fmt) + 1);
|
---|
9381 | }
|
---|
9382 |
|
---|
9383 | kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
|
---|
9384 | state->dts_nformats = 0;
|
---|
9385 | state->dts_formats = NULL;
|
---|
9386 | }
|
---|
9387 |
|
---|
9388 | /*
|
---|
9389 | * DTrace Predicate Functions
|
---|
9390 | */
|
---|
9391 | static dtrace_predicate_t *
|
---|
9392 | dtrace_predicate_create(dtrace_difo_t *dp)
|
---|
9393 | {
|
---|
9394 | dtrace_predicate_t *pred;
|
---|
9395 |
|
---|
9396 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
9397 | ASSERT(dp->dtdo_refcnt != 0);
|
---|
9398 |
|
---|
9399 | pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
|
---|
9400 | pred->dtp_difo = dp;
|
---|
9401 | pred->dtp_refcnt = 1;
|
---|
9402 |
|
---|
9403 | if (!dtrace_difo_cacheable(dp))
|
---|
9404 | return (pred);
|
---|
9405 |
|
---|
9406 | if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
|
---|
9407 | /*
|
---|
9408 | * This is only theoretically possible -- we have had 2^32
|
---|
9409 | * cacheable predicates on this machine. We cannot allow any
|
---|
9410 | * more predicates to become cacheable: as unlikely as it is,
|
---|
9411 | * there may be a thread caching a (now stale) predicate cache
|
---|
9412 | * ID. (N.B.: the temptation is being successfully resisted to
|
---|
9413 | * have this cmn_err() "Holy shit -- we executed this code!")
|
---|
9414 | */
|
---|
9415 | return (pred);
|
---|
9416 | }
|
---|
9417 |
|
---|
9418 | pred->dtp_cacheid = dtrace_predcache_id++;
|
---|
9419 |
|
---|
9420 | return (pred);
|
---|
9421 | }
|
---|
9422 |
|
---|
9423 | static void
|
---|
9424 | dtrace_predicate_hold(dtrace_predicate_t *pred)
|
---|
9425 | {
|
---|
9426 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
9427 | ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
|
---|
9428 | ASSERT(pred->dtp_refcnt > 0);
|
---|
9429 |
|
---|
9430 | pred->dtp_refcnt++;
|
---|
9431 | }
|
---|
9432 |
|
---|
9433 | static void
|
---|
9434 | dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
|
---|
9435 | {
|
---|
9436 | #ifdef VBOX_STRICT
|
---|
9437 | dtrace_difo_t *dp = pred->dtp_difo;
|
---|
9438 | #endif
|
---|
9439 |
|
---|
9440 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
9441 | ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
|
---|
9442 | ASSERT(pred->dtp_refcnt > 0);
|
---|
9443 |
|
---|
9444 | if (--pred->dtp_refcnt == 0) {
|
---|
9445 | dtrace_difo_release(pred->dtp_difo, vstate);
|
---|
9446 | kmem_free(pred, sizeof (dtrace_predicate_t));
|
---|
9447 | }
|
---|
9448 | }
|
---|
9449 |
|
---|
9450 | /*
|
---|
9451 | * DTrace Action Description Functions
|
---|
9452 | */
|
---|
9453 | static dtrace_actdesc_t *
|
---|
9454 | dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
|
---|
9455 | uint64_t uarg, uint64_t arg)
|
---|
9456 | {
|
---|
9457 | dtrace_actdesc_t *act;
|
---|
9458 |
|
---|
9459 | ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
|
---|
9460 | VBDT_IS_VALID_KRNL_ADDR(arg)) || (arg == NULL && kind == DTRACEACT_PRINTA));
|
---|
9461 |
|
---|
9462 | act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
|
---|
9463 | act->dtad_kind = kind;
|
---|
9464 | act->dtad_ntuple = ntuple;
|
---|
9465 | act->dtad_uarg = uarg;
|
---|
9466 | act->dtad_arg = arg;
|
---|
9467 | act->dtad_refcnt = 1;
|
---|
9468 |
|
---|
9469 | return (act);
|
---|
9470 | }
|
---|
9471 |
|
---|
9472 | static void
|
---|
9473 | dtrace_actdesc_hold(dtrace_actdesc_t *act)
|
---|
9474 | {
|
---|
9475 | ASSERT(act->dtad_refcnt >= 1);
|
---|
9476 | act->dtad_refcnt++;
|
---|
9477 | }
|
---|
9478 |
|
---|
9479 | static void
|
---|
9480 | dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
|
---|
9481 | {
|
---|
9482 | dtrace_actkind_t kind = act->dtad_kind;
|
---|
9483 | dtrace_difo_t *dp;
|
---|
9484 |
|
---|
9485 | ASSERT(act->dtad_refcnt >= 1);
|
---|
9486 |
|
---|
9487 | if (--act->dtad_refcnt != 0)
|
---|
9488 | return;
|
---|
9489 |
|
---|
9490 | if ((dp = act->dtad_difo) != NULL)
|
---|
9491 | dtrace_difo_release(dp, vstate);
|
---|
9492 |
|
---|
9493 | if (DTRACEACT_ISPRINTFLIKE(kind)) {
|
---|
9494 | char *str = (char *)(uintptr_t)act->dtad_arg;
|
---|
9495 |
|
---|
9496 | ASSERT((str != NULL && VBDT_IS_VALID_KRNL_ADDR((uintptr_t)str)) ||
|
---|
9497 | (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
|
---|
9498 |
|
---|
9499 | if (str != NULL)
|
---|
9500 | kmem_free(str, strlen(str) + 1);
|
---|
9501 | }
|
---|
9502 |
|
---|
9503 | kmem_free(act, sizeof (dtrace_actdesc_t));
|
---|
9504 | }
|
---|
9505 |
|
---|
9506 | /*
|
---|
9507 | * DTrace ECB Functions
|
---|
9508 | */
|
---|
9509 | static dtrace_ecb_t *
|
---|
9510 | dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
|
---|
9511 | {
|
---|
9512 | dtrace_ecb_t *ecb;
|
---|
9513 | dtrace_epid_t epid;
|
---|
9514 |
|
---|
9515 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
9516 |
|
---|
9517 | ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
|
---|
9518 | ecb->dte_predicate = NULL;
|
---|
9519 | ecb->dte_probe = probe;
|
---|
9520 |
|
---|
9521 | /*
|
---|
9522 | * The default size is the size of the default action: recording
|
---|
9523 | * the epid.
|
---|
9524 | */
|
---|
9525 | ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
|
---|
9526 | ecb->dte_alignment = sizeof (dtrace_epid_t);
|
---|
9527 |
|
---|
9528 | epid = state->dts_epid++;
|
---|
9529 |
|
---|
9530 | if (VBDTCAST(int64_t)epid - 1 >= state->dts_necbs) {
|
---|
9531 | dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
|
---|
9532 | int necbs = state->dts_necbs << 1;
|
---|
9533 |
|
---|
9534 | ASSERT(epid == VBDTCAST(dtrace_epid_t)state->dts_necbs + 1);
|
---|
9535 |
|
---|
9536 | if (necbs == 0) {
|
---|
9537 | ASSERT(oecbs == NULL);
|
---|
9538 | necbs = 1;
|
---|
9539 | }
|
---|
9540 |
|
---|
9541 | ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
|
---|
9542 |
|
---|
9543 | if (oecbs != NULL)
|
---|
9544 | bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
|
---|
9545 |
|
---|
9546 | dtrace_membar_producer();
|
---|
9547 | state->dts_ecbs = ecbs;
|
---|
9548 |
|
---|
9549 | if (oecbs != NULL) {
|
---|
9550 | /*
|
---|
9551 | * If this state is active, we must dtrace_sync()
|
---|
9552 | * before we can free the old dts_ecbs array: we're
|
---|
9553 | * coming in hot, and there may be active ring
|
---|
9554 | * buffer processing (which indexes into the dts_ecbs
|
---|
9555 | * array) on another CPU.
|
---|
9556 | */
|
---|
9557 | if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
|
---|
9558 | dtrace_sync();
|
---|
9559 |
|
---|
9560 | kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
|
---|
9561 | }
|
---|
9562 |
|
---|
9563 | dtrace_membar_producer();
|
---|
9564 | state->dts_necbs = necbs;
|
---|
9565 | }
|
---|
9566 |
|
---|
9567 | ecb->dte_state = state;
|
---|
9568 |
|
---|
9569 | ASSERT(state->dts_ecbs[epid - 1] == NULL);
|
---|
9570 | dtrace_membar_producer();
|
---|
9571 | state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
|
---|
9572 |
|
---|
9573 | return (ecb);
|
---|
9574 | }
|
---|
9575 |
|
---|
9576 | static int
|
---|
9577 | dtrace_ecb_enable(dtrace_ecb_t *ecb)
|
---|
9578 | {
|
---|
9579 | dtrace_probe_t *probe = ecb->dte_probe;
|
---|
9580 |
|
---|
9581 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
9582 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
9583 | ASSERT(ecb->dte_next == NULL);
|
---|
9584 |
|
---|
9585 | if (probe == NULL) {
|
---|
9586 | /*
|
---|
9587 | * This is the NULL probe -- there's nothing to do.
|
---|
9588 | */
|
---|
9589 | return (0);
|
---|
9590 | }
|
---|
9591 |
|
---|
9592 | if (probe->dtpr_ecb == NULL) {
|
---|
9593 | dtrace_provider_t *prov = probe->dtpr_provider;
|
---|
9594 |
|
---|
9595 | /*
|
---|
9596 | * We're the first ECB on this probe.
|
---|
9597 | */
|
---|
9598 | probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
|
---|
9599 |
|
---|
9600 | if (ecb->dte_predicate != NULL)
|
---|
9601 | probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
|
---|
9602 |
|
---|
9603 | return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
|
---|
9604 | probe->dtpr_id, probe->dtpr_arg));
|
---|
9605 | } else {
|
---|
9606 | /*
|
---|
9607 | * This probe is already active. Swing the last pointer to
|
---|
9608 | * point to the new ECB, and issue a dtrace_sync() to assure
|
---|
9609 | * that all CPUs have seen the change.
|
---|
9610 | */
|
---|
9611 | ASSERT(probe->dtpr_ecb_last != NULL);
|
---|
9612 | probe->dtpr_ecb_last->dte_next = ecb;
|
---|
9613 | probe->dtpr_ecb_last = ecb;
|
---|
9614 | probe->dtpr_predcache = 0;
|
---|
9615 |
|
---|
9616 | dtrace_sync();
|
---|
9617 | return (0);
|
---|
9618 | }
|
---|
9619 | }
|
---|
9620 |
|
---|
9621 | static void
|
---|
9622 | dtrace_ecb_resize(dtrace_ecb_t *ecb)
|
---|
9623 | {
|
---|
9624 | uint32_t maxalign = sizeof (dtrace_epid_t);
|
---|
9625 | uint32_t align = sizeof (uint8_t), offs, diff;
|
---|
9626 | dtrace_action_t *act;
|
---|
9627 | int wastuple = 0;
|
---|
9628 | uint32_t aggbase = UINT32_MAX;
|
---|
9629 | dtrace_state_t *state = ecb->dte_state;
|
---|
9630 |
|
---|
9631 | /*
|
---|
9632 | * If we record anything, we always record the epid. (And we always
|
---|
9633 | * record it first.)
|
---|
9634 | */
|
---|
9635 | offs = sizeof (dtrace_epid_t);
|
---|
9636 | ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
|
---|
9637 |
|
---|
9638 | for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
|
---|
9639 | dtrace_recdesc_t *rec = &act->dta_rec;
|
---|
9640 |
|
---|
9641 | if ((align = rec->dtrd_alignment) > maxalign)
|
---|
9642 | maxalign = align;
|
---|
9643 |
|
---|
9644 | if (!wastuple && act->dta_intuple) {
|
---|
9645 | /*
|
---|
9646 | * This is the first record in a tuple. Align the
|
---|
9647 | * offset to be at offset 4 in an 8-byte aligned
|
---|
9648 | * block.
|
---|
9649 | */
|
---|
9650 | diff = offs + sizeof (dtrace_aggid_t);
|
---|
9651 |
|
---|
9652 | if ((diff = (diff & (sizeof (uint64_t) - 1))))
|
---|
9653 | offs += sizeof (uint64_t) - diff;
|
---|
9654 |
|
---|
9655 | aggbase = offs - sizeof (dtrace_aggid_t);
|
---|
9656 | ASSERT(!(aggbase & (sizeof (uint64_t) - 1)));
|
---|
9657 | }
|
---|
9658 |
|
---|
9659 | /*LINTED*/
|
---|
9660 | if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) {
|
---|
9661 | /*
|
---|
9662 | * The current offset is not properly aligned; align it.
|
---|
9663 | */
|
---|
9664 | offs += align - diff;
|
---|
9665 | }
|
---|
9666 |
|
---|
9667 | rec->dtrd_offset = offs;
|
---|
9668 |
|
---|
9669 | if (offs + rec->dtrd_size > ecb->dte_needed) {
|
---|
9670 | ecb->dte_needed = offs + rec->dtrd_size;
|
---|
9671 |
|
---|
9672 | if (ecb->dte_needed > state->dts_needed)
|
---|
9673 | state->dts_needed = ecb->dte_needed;
|
---|
9674 | }
|
---|
9675 |
|
---|
9676 | if (DTRACEACT_ISAGG(act->dta_kind)) {
|
---|
9677 | dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
|
---|
9678 | dtrace_action_t *first = agg->dtag_first, *prev;
|
---|
9679 |
|
---|
9680 | ASSERT(rec->dtrd_size != 0 && first != NULL);
|
---|
9681 | ASSERT(wastuple);
|
---|
9682 | ASSERT(aggbase != UINT32_MAX);
|
---|
9683 |
|
---|
9684 | agg->dtag_base = aggbase;
|
---|
9685 |
|
---|
9686 | while ((prev = first->dta_prev) != NULL &&
|
---|
9687 | DTRACEACT_ISAGG(prev->dta_kind)) {
|
---|
9688 | agg = (dtrace_aggregation_t *)prev;
|
---|
9689 | first = agg->dtag_first;
|
---|
9690 | }
|
---|
9691 |
|
---|
9692 | if (prev != NULL) {
|
---|
9693 | offs = prev->dta_rec.dtrd_offset +
|
---|
9694 | prev->dta_rec.dtrd_size;
|
---|
9695 | } else {
|
---|
9696 | offs = sizeof (dtrace_epid_t);
|
---|
9697 | }
|
---|
9698 | wastuple = 0;
|
---|
9699 | } else {
|
---|
9700 | if (!act->dta_intuple)
|
---|
9701 | ecb->dte_size = offs + rec->dtrd_size;
|
---|
9702 |
|
---|
9703 | offs += rec->dtrd_size;
|
---|
9704 | }
|
---|
9705 |
|
---|
9706 | wastuple = act->dta_intuple;
|
---|
9707 | }
|
---|
9708 |
|
---|
9709 | if ((act = ecb->dte_action) != NULL &&
|
---|
9710 | !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
|
---|
9711 | ecb->dte_size == sizeof (dtrace_epid_t)) {
|
---|
9712 | /*
|
---|
9713 | * If the size is still sizeof (dtrace_epid_t), then all
|
---|
9714 | * actions store no data; set the size to 0.
|
---|
9715 | */
|
---|
9716 | ecb->dte_alignment = maxalign;
|
---|
9717 | ecb->dte_size = 0;
|
---|
9718 |
|
---|
9719 | /*
|
---|
9720 | * If the needed space is still sizeof (dtrace_epid_t), then
|
---|
9721 | * all actions need no additional space; set the needed
|
---|
9722 | * size to 0.
|
---|
9723 | */
|
---|
9724 | if (ecb->dte_needed == sizeof (dtrace_epid_t))
|
---|
9725 | ecb->dte_needed = 0;
|
---|
9726 |
|
---|
9727 | return;
|
---|
9728 | }
|
---|
9729 |
|
---|
9730 | /*
|
---|
9731 | * Set our alignment, and make sure that the dte_size and dte_needed
|
---|
9732 | * are aligned to the size of an EPID.
|
---|
9733 | */
|
---|
9734 | ecb->dte_alignment = maxalign;
|
---|
9735 | ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) &
|
---|
9736 | ~(sizeof (dtrace_epid_t) - 1);
|
---|
9737 | ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) &
|
---|
9738 | ~(sizeof (dtrace_epid_t) - 1);
|
---|
9739 | ASSERT(ecb->dte_size <= ecb->dte_needed);
|
---|
9740 | }
|
---|
9741 |
|
---|
9742 | static dtrace_action_t *
|
---|
9743 | dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
|
---|
9744 | {
|
---|
9745 | dtrace_aggregation_t *agg;
|
---|
9746 | size_t size = sizeof (uint64_t);
|
---|
9747 | int ntuple = desc->dtad_ntuple;
|
---|
9748 | dtrace_action_t *act;
|
---|
9749 | dtrace_recdesc_t *frec;
|
---|
9750 | dtrace_aggid_t aggid;
|
---|
9751 | dtrace_state_t *state = ecb->dte_state;
|
---|
9752 |
|
---|
9753 | agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
|
---|
9754 | agg->dtag_ecb = ecb;
|
---|
9755 |
|
---|
9756 | ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
|
---|
9757 |
|
---|
9758 | switch (desc->dtad_kind) {
|
---|
9759 | case DTRACEAGG_MIN:
|
---|
9760 | agg->dtag_initial = INT64_MAX;
|
---|
9761 | agg->dtag_aggregate = dtrace_aggregate_min;
|
---|
9762 | break;
|
---|
9763 |
|
---|
9764 | case DTRACEAGG_MAX:
|
---|
9765 | agg->dtag_initial = (uint64_t)INT64_MIN;
|
---|
9766 | agg->dtag_aggregate = dtrace_aggregate_max;
|
---|
9767 | break;
|
---|
9768 |
|
---|
9769 | case DTRACEAGG_COUNT:
|
---|
9770 | agg->dtag_aggregate = dtrace_aggregate_count;
|
---|
9771 | break;
|
---|
9772 |
|
---|
9773 | case DTRACEAGG_QUANTIZE:
|
---|
9774 | agg->dtag_aggregate = dtrace_aggregate_quantize;
|
---|
9775 | size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
|
---|
9776 | sizeof (uint64_t);
|
---|
9777 | break;
|
---|
9778 |
|
---|
9779 | case DTRACEAGG_LQUANTIZE: {
|
---|
9780 | uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
|
---|
9781 | uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
|
---|
9782 |
|
---|
9783 | agg->dtag_initial = desc->dtad_arg;
|
---|
9784 | agg->dtag_aggregate = dtrace_aggregate_lquantize;
|
---|
9785 |
|
---|
9786 | if (step == 0 || levels == 0)
|
---|
9787 | goto err;
|
---|
9788 |
|
---|
9789 | size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
|
---|
9790 | break;
|
---|
9791 | }
|
---|
9792 |
|
---|
9793 | case DTRACEAGG_AVG:
|
---|
9794 | agg->dtag_aggregate = dtrace_aggregate_avg;
|
---|
9795 | size = sizeof (uint64_t) * 2;
|
---|
9796 | break;
|
---|
9797 |
|
---|
9798 | case DTRACEAGG_STDDEV:
|
---|
9799 | agg->dtag_aggregate = dtrace_aggregate_stddev;
|
---|
9800 | size = sizeof (uint64_t) * 4;
|
---|
9801 | break;
|
---|
9802 |
|
---|
9803 | case DTRACEAGG_SUM:
|
---|
9804 | agg->dtag_aggregate = dtrace_aggregate_sum;
|
---|
9805 | break;
|
---|
9806 |
|
---|
9807 | default:
|
---|
9808 | goto err;
|
---|
9809 | }
|
---|
9810 |
|
---|
9811 | agg->dtag_action.dta_rec.dtrd_size = VBDTCAST(uint32_t)size;
|
---|
9812 |
|
---|
9813 | if (ntuple == 0)
|
---|
9814 | goto err;
|
---|
9815 |
|
---|
9816 | /*
|
---|
9817 | * We must make sure that we have enough actions for the n-tuple.
|
---|
9818 | */
|
---|
9819 | for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
|
---|
9820 | if (DTRACEACT_ISAGG(act->dta_kind))
|
---|
9821 | break;
|
---|
9822 |
|
---|
9823 | if (--ntuple == 0) {
|
---|
9824 | /*
|
---|
9825 | * This is the action with which our n-tuple begins.
|
---|
9826 | */
|
---|
9827 | agg->dtag_first = act;
|
---|
9828 | goto success;
|
---|
9829 | }
|
---|
9830 | }
|
---|
9831 |
|
---|
9832 | /*
|
---|
9833 | * This n-tuple is short by ntuple elements. Return failure.
|
---|
9834 | */
|
---|
9835 | ASSERT(ntuple != 0);
|
---|
9836 | err:
|
---|
9837 | kmem_free(agg, sizeof (dtrace_aggregation_t));
|
---|
9838 | return (NULL);
|
---|
9839 |
|
---|
9840 | success:
|
---|
9841 | /*
|
---|
9842 | * If the last action in the tuple has a size of zero, it's actually
|
---|
9843 | * an expression argument for the aggregating action.
|
---|
9844 | */
|
---|
9845 | ASSERT(ecb->dte_action_last != NULL);
|
---|
9846 | act = ecb->dte_action_last;
|
---|
9847 |
|
---|
9848 | if (act->dta_kind == DTRACEACT_DIFEXPR) {
|
---|
9849 | ASSERT(act->dta_difo != NULL);
|
---|
9850 |
|
---|
9851 | if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
|
---|
9852 | agg->dtag_hasarg = 1;
|
---|
9853 | }
|
---|
9854 |
|
---|
9855 | /*
|
---|
9856 | * We need to allocate an id for this aggregation.
|
---|
9857 | */
|
---|
9858 | aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
|
---|
9859 | VM_BESTFIT | VM_SLEEP);
|
---|
9860 |
|
---|
9861 | if (VBDTCAST(int64_t)aggid - 1 >= state->dts_naggregations) {
|
---|
9862 | dtrace_aggregation_t **oaggs = state->dts_aggregations;
|
---|
9863 | dtrace_aggregation_t **aggs;
|
---|
9864 | int naggs = state->dts_naggregations << 1;
|
---|
9865 | int onaggs = state->dts_naggregations;
|
---|
9866 |
|
---|
9867 | ASSERT(aggid == VBDTCAST(dtrace_aggid_t)state->dts_naggregations + 1);
|
---|
9868 |
|
---|
9869 | if (naggs == 0) {
|
---|
9870 | ASSERT(oaggs == NULL);
|
---|
9871 | naggs = 1;
|
---|
9872 | }
|
---|
9873 |
|
---|
9874 | aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
|
---|
9875 |
|
---|
9876 | if (oaggs != NULL) {
|
---|
9877 | bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
|
---|
9878 | kmem_free(oaggs, onaggs * sizeof (*aggs));
|
---|
9879 | }
|
---|
9880 |
|
---|
9881 | state->dts_aggregations = aggs;
|
---|
9882 | state->dts_naggregations = naggs;
|
---|
9883 | }
|
---|
9884 |
|
---|
9885 | ASSERT(state->dts_aggregations[aggid - 1] == NULL);
|
---|
9886 | state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
|
---|
9887 |
|
---|
9888 | frec = &agg->dtag_first->dta_rec;
|
---|
9889 | if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
|
---|
9890 | frec->dtrd_alignment = sizeof (dtrace_aggid_t);
|
---|
9891 |
|
---|
9892 | for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
|
---|
9893 | ASSERT(!act->dta_intuple);
|
---|
9894 | act->dta_intuple = 1;
|
---|
9895 | }
|
---|
9896 |
|
---|
9897 | return (&agg->dtag_action);
|
---|
9898 | }
|
---|
9899 |
|
---|
9900 | static void
|
---|
9901 | dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
|
---|
9902 | {
|
---|
9903 | dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
|
---|
9904 | dtrace_state_t *state = ecb->dte_state;
|
---|
9905 | dtrace_aggid_t aggid = agg->dtag_id;
|
---|
9906 |
|
---|
9907 | ASSERT(DTRACEACT_ISAGG(act->dta_kind));
|
---|
9908 | vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
|
---|
9909 |
|
---|
9910 | ASSERT(state->dts_aggregations[aggid - 1] == agg);
|
---|
9911 | state->dts_aggregations[aggid - 1] = NULL;
|
---|
9912 |
|
---|
9913 | kmem_free(agg, sizeof (dtrace_aggregation_t));
|
---|
9914 | }
|
---|
9915 |
|
---|
9916 | static int
|
---|
9917 | dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
|
---|
9918 | {
|
---|
9919 | dtrace_action_t *action, *last;
|
---|
9920 | dtrace_difo_t *dp = desc->dtad_difo;
|
---|
9921 | uint32_t size = 0, align = sizeof (uint8_t), mask;
|
---|
9922 | uint16_t format = 0;
|
---|
9923 | dtrace_recdesc_t *rec;
|
---|
9924 | dtrace_state_t *state = ecb->dte_state;
|
---|
9925 | dtrace_optval_t *opt = state->dts_options, nframes VBDTUNASS(0), strsize;
|
---|
9926 | uint64_t arg = desc->dtad_arg;
|
---|
9927 |
|
---|
9928 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
9929 | ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
|
---|
9930 |
|
---|
9931 | if (DTRACEACT_ISAGG(desc->dtad_kind)) {
|
---|
9932 | /*
|
---|
9933 | * If this is an aggregating action, there must be neither
|
---|
9934 | * a speculate nor a commit on the action chain.
|
---|
9935 | */
|
---|
9936 | dtrace_action_t *act;
|
---|
9937 |
|
---|
9938 | for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
|
---|
9939 | if (act->dta_kind == DTRACEACT_COMMIT)
|
---|
9940 | return (EINVAL);
|
---|
9941 |
|
---|
9942 | if (act->dta_kind == DTRACEACT_SPECULATE)
|
---|
9943 | return (EINVAL);
|
---|
9944 | }
|
---|
9945 |
|
---|
9946 | action = dtrace_ecb_aggregation_create(ecb, desc);
|
---|
9947 |
|
---|
9948 | if (action == NULL)
|
---|
9949 | return (EINVAL);
|
---|
9950 | } else {
|
---|
9951 | if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
|
---|
9952 | (desc->dtad_kind == DTRACEACT_DIFEXPR &&
|
---|
9953 | dp != NULL && dp->dtdo_destructive)) {
|
---|
9954 | state->dts_destructive = 1;
|
---|
9955 | }
|
---|
9956 |
|
---|
9957 | switch (desc->dtad_kind) {
|
---|
9958 | case DTRACEACT_PRINTF:
|
---|
9959 | case DTRACEACT_PRINTA:
|
---|
9960 | case DTRACEACT_SYSTEM:
|
---|
9961 | case DTRACEACT_FREOPEN:
|
---|
9962 | /*
|
---|
9963 | * We know that our arg is a string -- turn it into a
|
---|
9964 | * format.
|
---|
9965 | */
|
---|
9966 | if (arg == NULL) {
|
---|
9967 | ASSERT(desc->dtad_kind == DTRACEACT_PRINTA);
|
---|
9968 | format = 0;
|
---|
9969 | } else {
|
---|
9970 | ASSERT(arg != NULL);
|
---|
9971 | ASSERT(VBDT_IS_VALID_KRNL_ADDR(arg));
|
---|
9972 | format = dtrace_format_add(state,
|
---|
9973 | (char *)(uintptr_t)arg);
|
---|
9974 | }
|
---|
9975 |
|
---|
9976 | RT_FALL_THRU();
|
---|
9977 | case DTRACEACT_LIBACT:
|
---|
9978 | case DTRACEACT_DIFEXPR:
|
---|
9979 | if (dp == NULL)
|
---|
9980 | return (EINVAL);
|
---|
9981 |
|
---|
9982 | if ((size = dp->dtdo_rtype.dtdt_size) != 0)
|
---|
9983 | break;
|
---|
9984 |
|
---|
9985 | if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
|
---|
9986 | if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
|
---|
9987 | return (EINVAL);
|
---|
9988 |
|
---|
9989 | size = opt[DTRACEOPT_STRSIZE];
|
---|
9990 | }
|
---|
9991 |
|
---|
9992 | break;
|
---|
9993 |
|
---|
9994 | case DTRACEACT_STACK:
|
---|
9995 | if ((nframes = arg) == 0) {
|
---|
9996 | nframes = opt[DTRACEOPT_STACKFRAMES];
|
---|
9997 | ASSERT(nframes > 0);
|
---|
9998 | arg = nframes;
|
---|
9999 | }
|
---|
10000 |
|
---|
10001 | size = VBDTCAST(uint32_t)(nframes * sizeof (pc_t));
|
---|
10002 | break;
|
---|
10003 |
|
---|
10004 | case DTRACEACT_JSTACK:
|
---|
10005 | if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
|
---|
10006 | strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
|
---|
10007 |
|
---|
10008 | if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
|
---|
10009 | nframes = opt[DTRACEOPT_JSTACKFRAMES];
|
---|
10010 |
|
---|
10011 | arg = DTRACE_USTACK_ARG(nframes, strsize);
|
---|
10012 |
|
---|
10013 | RT_FALL_THRU();
|
---|
10014 | case DTRACEACT_USTACK:
|
---|
10015 | if (desc->dtad_kind != DTRACEACT_JSTACK &&
|
---|
10016 | (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
|
---|
10017 | strsize = DTRACE_USTACK_STRSIZE(arg);
|
---|
10018 | nframes = opt[DTRACEOPT_USTACKFRAMES];
|
---|
10019 | ASSERT(nframes > 0);
|
---|
10020 | arg = DTRACE_USTACK_ARG(nframes, strsize);
|
---|
10021 | }
|
---|
10022 |
|
---|
10023 | /*
|
---|
10024 | * Save a slot for the pid.
|
---|
10025 | */
|
---|
10026 | size = VBDTCAST(uint32_t)((nframes + 1) * sizeof (uint64_t));
|
---|
10027 | size += DTRACE_USTACK_STRSIZE(arg);
|
---|
10028 | size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
|
---|
10029 |
|
---|
10030 | break;
|
---|
10031 |
|
---|
10032 | case DTRACEACT_SYM:
|
---|
10033 | case DTRACEACT_MOD:
|
---|
10034 | if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
|
---|
10035 | sizeof (uint64_t)) ||
|
---|
10036 | (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
|
---|
10037 | return (EINVAL);
|
---|
10038 | break;
|
---|
10039 |
|
---|
10040 | case DTRACEACT_USYM:
|
---|
10041 | case DTRACEACT_UMOD:
|
---|
10042 | case DTRACEACT_UADDR:
|
---|
10043 | if (dp == NULL ||
|
---|
10044 | (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
|
---|
10045 | (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
|
---|
10046 | return (EINVAL);
|
---|
10047 |
|
---|
10048 | /*
|
---|
10049 | * We have a slot for the pid, plus a slot for the
|
---|
10050 | * argument. To keep things simple (aligned with
|
---|
10051 | * bitness-neutral sizing), we store each as a 64-bit
|
---|
10052 | * quantity.
|
---|
10053 | */
|
---|
10054 | size = 2 * sizeof (uint64_t);
|
---|
10055 | break;
|
---|
10056 |
|
---|
10057 | case DTRACEACT_STOP:
|
---|
10058 | case DTRACEACT_BREAKPOINT:
|
---|
10059 | case DTRACEACT_PANIC:
|
---|
10060 | break;
|
---|
10061 |
|
---|
10062 | case DTRACEACT_CHILL:
|
---|
10063 | case DTRACEACT_DISCARD:
|
---|
10064 | case DTRACEACT_RAISE:
|
---|
10065 | if (dp == NULL)
|
---|
10066 | return (EINVAL);
|
---|
10067 | break;
|
---|
10068 |
|
---|
10069 | case DTRACEACT_EXIT:
|
---|
10070 | if (dp == NULL ||
|
---|
10071 | (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
|
---|
10072 | (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
|
---|
10073 | return (EINVAL);
|
---|
10074 | break;
|
---|
10075 |
|
---|
10076 | case DTRACEACT_SPECULATE:
|
---|
10077 | if (ecb->dte_size > sizeof (dtrace_epid_t))
|
---|
10078 | return (EINVAL);
|
---|
10079 |
|
---|
10080 | if (dp == NULL)
|
---|
10081 | return (EINVAL);
|
---|
10082 |
|
---|
10083 | state->dts_speculates = 1;
|
---|
10084 | break;
|
---|
10085 |
|
---|
10086 | case DTRACEACT_COMMIT: {
|
---|
10087 | dtrace_action_t *act = ecb->dte_action;
|
---|
10088 |
|
---|
10089 | for (; act != NULL; act = act->dta_next) {
|
---|
10090 | if (act->dta_kind == DTRACEACT_COMMIT)
|
---|
10091 | return (EINVAL);
|
---|
10092 | }
|
---|
10093 |
|
---|
10094 | if (dp == NULL)
|
---|
10095 | return (EINVAL);
|
---|
10096 | break;
|
---|
10097 | }
|
---|
10098 |
|
---|
10099 | default:
|
---|
10100 | return (EINVAL);
|
---|
10101 | }
|
---|
10102 |
|
---|
10103 | if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
|
---|
10104 | /*
|
---|
10105 | * If this is a data-storing action or a speculate,
|
---|
10106 | * we must be sure that there isn't a commit on the
|
---|
10107 | * action chain.
|
---|
10108 | */
|
---|
10109 | dtrace_action_t *act = ecb->dte_action;
|
---|
10110 |
|
---|
10111 | for (; act != NULL; act = act->dta_next) {
|
---|
10112 | if (act->dta_kind == DTRACEACT_COMMIT)
|
---|
10113 | return (EINVAL);
|
---|
10114 | }
|
---|
10115 | }
|
---|
10116 |
|
---|
10117 | action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
|
---|
10118 | action->dta_rec.dtrd_size = size;
|
---|
10119 | }
|
---|
10120 |
|
---|
10121 | action->dta_refcnt = 1;
|
---|
10122 | rec = &action->dta_rec;
|
---|
10123 | size = rec->dtrd_size;
|
---|
10124 |
|
---|
10125 | for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
|
---|
10126 | if (!(size & mask)) {
|
---|
10127 | align = mask + 1;
|
---|
10128 | break;
|
---|
10129 | }
|
---|
10130 | }
|
---|
10131 |
|
---|
10132 | action->dta_kind = desc->dtad_kind;
|
---|
10133 |
|
---|
10134 | if ((action->dta_difo = dp) != NULL)
|
---|
10135 | dtrace_difo_hold(dp);
|
---|
10136 |
|
---|
10137 | rec->dtrd_action = action->dta_kind;
|
---|
10138 | rec->dtrd_arg = arg;
|
---|
10139 | rec->dtrd_uarg = desc->dtad_uarg;
|
---|
10140 | rec->dtrd_alignment = (uint16_t)align;
|
---|
10141 | rec->dtrd_format = format;
|
---|
10142 |
|
---|
10143 | if ((last = ecb->dte_action_last) != NULL) {
|
---|
10144 | ASSERT(ecb->dte_action != NULL);
|
---|
10145 | action->dta_prev = last;
|
---|
10146 | last->dta_next = action;
|
---|
10147 | } else {
|
---|
10148 | ASSERT(ecb->dte_action == NULL);
|
---|
10149 | ecb->dte_action = action;
|
---|
10150 | }
|
---|
10151 |
|
---|
10152 | ecb->dte_action_last = action;
|
---|
10153 |
|
---|
10154 | return (0);
|
---|
10155 | }
|
---|
10156 |
|
---|
10157 | static void
|
---|
10158 | dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
|
---|
10159 | {
|
---|
10160 | dtrace_action_t *act = ecb->dte_action, *next;
|
---|
10161 | dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
|
---|
10162 | dtrace_difo_t *dp;
|
---|
10163 | uint16_t format;
|
---|
10164 |
|
---|
10165 | if (act != NULL && act->dta_refcnt > 1) {
|
---|
10166 | ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
|
---|
10167 | act->dta_refcnt--;
|
---|
10168 | } else {
|
---|
10169 | for (; act != NULL; act = next) {
|
---|
10170 | next = act->dta_next;
|
---|
10171 | ASSERT(next != NULL || act == ecb->dte_action_last);
|
---|
10172 | ASSERT(act->dta_refcnt == 1);
|
---|
10173 |
|
---|
10174 | if ((format = act->dta_rec.dtrd_format) != 0)
|
---|
10175 | dtrace_format_remove(ecb->dte_state, format);
|
---|
10176 |
|
---|
10177 | if ((dp = act->dta_difo) != NULL)
|
---|
10178 | dtrace_difo_release(dp, vstate);
|
---|
10179 |
|
---|
10180 | if (DTRACEACT_ISAGG(act->dta_kind)) {
|
---|
10181 | dtrace_ecb_aggregation_destroy(ecb, act);
|
---|
10182 | } else {
|
---|
10183 | kmem_free(act, sizeof (dtrace_action_t));
|
---|
10184 | }
|
---|
10185 | }
|
---|
10186 | }
|
---|
10187 |
|
---|
10188 | ecb->dte_action = NULL;
|
---|
10189 | ecb->dte_action_last = NULL;
|
---|
10190 | ecb->dte_size = sizeof (dtrace_epid_t);
|
---|
10191 | }
|
---|
10192 |
|
---|
10193 | static void
|
---|
10194 | dtrace_ecb_disable(dtrace_ecb_t *ecb)
|
---|
10195 | {
|
---|
10196 | /*
|
---|
10197 | * We disable the ECB by removing it from its probe.
|
---|
10198 | */
|
---|
10199 | dtrace_ecb_t *pecb, *prev = NULL;
|
---|
10200 | dtrace_probe_t *probe = ecb->dte_probe;
|
---|
10201 |
|
---|
10202 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
10203 |
|
---|
10204 | if (probe == NULL) {
|
---|
10205 | /*
|
---|
10206 | * This is the NULL probe; there is nothing to disable.
|
---|
10207 | */
|
---|
10208 | return;
|
---|
10209 | }
|
---|
10210 |
|
---|
10211 | for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
|
---|
10212 | if (pecb == ecb)
|
---|
10213 | break;
|
---|
10214 | prev = pecb;
|
---|
10215 | }
|
---|
10216 |
|
---|
10217 | ASSERT(pecb != NULL);
|
---|
10218 |
|
---|
10219 | if (prev == NULL) {
|
---|
10220 | probe->dtpr_ecb = ecb->dte_next;
|
---|
10221 | } else {
|
---|
10222 | prev->dte_next = ecb->dte_next;
|
---|
10223 | }
|
---|
10224 |
|
---|
10225 | if (ecb == probe->dtpr_ecb_last) {
|
---|
10226 | ASSERT(ecb->dte_next == NULL);
|
---|
10227 | probe->dtpr_ecb_last = prev;
|
---|
10228 | }
|
---|
10229 |
|
---|
10230 | /*
|
---|
10231 | * The ECB has been disconnected from the probe; now sync to assure
|
---|
10232 | * that all CPUs have seen the change before returning.
|
---|
10233 | */
|
---|
10234 | dtrace_sync();
|
---|
10235 |
|
---|
10236 | if (probe->dtpr_ecb == NULL) {
|
---|
10237 | /*
|
---|
10238 | * That was the last ECB on the probe; clear the predicate
|
---|
10239 | * cache ID for the probe, disable it and sync one more time
|
---|
10240 | * to assure that we'll never hit it again.
|
---|
10241 | */
|
---|
10242 | dtrace_provider_t *prov = probe->dtpr_provider;
|
---|
10243 |
|
---|
10244 | ASSERT(ecb->dte_next == NULL);
|
---|
10245 | ASSERT(probe->dtpr_ecb_last == NULL);
|
---|
10246 | probe->dtpr_predcache = DTRACE_CACHEIDNONE;
|
---|
10247 | prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
|
---|
10248 | probe->dtpr_id, probe->dtpr_arg);
|
---|
10249 | dtrace_sync();
|
---|
10250 | } else {
|
---|
10251 | /*
|
---|
10252 | * There is at least one ECB remaining on the probe. If there
|
---|
10253 | * is _exactly_ one, set the probe's predicate cache ID to be
|
---|
10254 | * the predicate cache ID of the remaining ECB.
|
---|
10255 | */
|
---|
10256 | ASSERT(probe->dtpr_ecb_last != NULL);
|
---|
10257 | ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
|
---|
10258 |
|
---|
10259 | if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
|
---|
10260 | dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
|
---|
10261 |
|
---|
10262 | ASSERT(probe->dtpr_ecb->dte_next == NULL);
|
---|
10263 |
|
---|
10264 | if (p != NULL)
|
---|
10265 | probe->dtpr_predcache = p->dtp_cacheid;
|
---|
10266 | }
|
---|
10267 |
|
---|
10268 | ecb->dte_next = NULL;
|
---|
10269 | }
|
---|
10270 | }
|
---|
10271 |
|
---|
10272 | static void
|
---|
10273 | dtrace_ecb_destroy(dtrace_ecb_t *ecb)
|
---|
10274 | {
|
---|
10275 | dtrace_state_t *state = ecb->dte_state;
|
---|
10276 | dtrace_vstate_t *vstate = &state->dts_vstate;
|
---|
10277 | dtrace_predicate_t *pred;
|
---|
10278 | dtrace_epid_t epid = ecb->dte_epid;
|
---|
10279 |
|
---|
10280 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
10281 | ASSERT(ecb->dte_next == NULL);
|
---|
10282 | ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
|
---|
10283 |
|
---|
10284 | if ((pred = ecb->dte_predicate) != NULL)
|
---|
10285 | dtrace_predicate_release(pred, vstate);
|
---|
10286 |
|
---|
10287 | dtrace_ecb_action_remove(ecb);
|
---|
10288 |
|
---|
10289 | ASSERT(state->dts_ecbs[epid - 1] == ecb);
|
---|
10290 | state->dts_ecbs[epid - 1] = NULL;
|
---|
10291 |
|
---|
10292 | kmem_free(ecb, sizeof (dtrace_ecb_t));
|
---|
10293 | }
|
---|
10294 |
|
---|
10295 | static dtrace_ecb_t *
|
---|
10296 | dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
|
---|
10297 | dtrace_enabling_t *enab)
|
---|
10298 | {
|
---|
10299 | dtrace_ecb_t *ecb;
|
---|
10300 | dtrace_predicate_t *pred;
|
---|
10301 | dtrace_actdesc_t *act;
|
---|
10302 | dtrace_provider_t *prov;
|
---|
10303 | dtrace_ecbdesc_t *desc = enab->dten_current;
|
---|
10304 |
|
---|
10305 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
10306 | ASSERT(state != NULL);
|
---|
10307 |
|
---|
10308 | ecb = dtrace_ecb_add(state, probe);
|
---|
10309 | ecb->dte_uarg = desc->dted_uarg;
|
---|
10310 |
|
---|
10311 | if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
|
---|
10312 | dtrace_predicate_hold(pred);
|
---|
10313 | ecb->dte_predicate = pred;
|
---|
10314 | }
|
---|
10315 |
|
---|
10316 | if (probe != NULL) {
|
---|
10317 | /*
|
---|
10318 | * If the provider shows more leg than the consumer is old
|
---|
10319 | * enough to see, we need to enable the appropriate implicit
|
---|
10320 | * predicate bits to prevent the ecb from activating at
|
---|
10321 | * revealing times.
|
---|
10322 | *
|
---|
10323 | * Providers specifying DTRACE_PRIV_USER at register time
|
---|
10324 | * are stating that they need the /proc-style privilege
|
---|
10325 | * model to be enforced, and this is what DTRACE_COND_OWNER
|
---|
10326 | * and DTRACE_COND_ZONEOWNER will then do at probe time.
|
---|
10327 | */
|
---|
10328 | prov = probe->dtpr_provider;
|
---|
10329 | if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
|
---|
10330 | (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
|
---|
10331 | ecb->dte_cond |= DTRACE_COND_OWNER;
|
---|
10332 |
|
---|
10333 | if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
|
---|
10334 | (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
|
---|
10335 | ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
|
---|
10336 |
|
---|
10337 | /*
|
---|
10338 | * If the provider shows us kernel innards and the user
|
---|
10339 | * is lacking sufficient privilege, enable the
|
---|
10340 | * DTRACE_COND_USERMODE implicit predicate.
|
---|
10341 | */
|
---|
10342 | if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
|
---|
10343 | (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
|
---|
10344 | ecb->dte_cond |= DTRACE_COND_USERMODE;
|
---|
10345 | }
|
---|
10346 |
|
---|
10347 | if (dtrace_ecb_create_cache != NULL) {
|
---|
10348 | /*
|
---|
10349 | * If we have a cached ecb, we'll use its action list instead
|
---|
10350 | * of creating our own (saving both time and space).
|
---|
10351 | */
|
---|
10352 | dtrace_ecb_t *cached = dtrace_ecb_create_cache;
|
---|
10353 | dtrace_action_t *act2 = cached->dte_action;
|
---|
10354 |
|
---|
10355 | if (act2 != NULL) {
|
---|
10356 | ASSERT(act2->dta_refcnt > 0);
|
---|
10357 | act2->dta_refcnt++;
|
---|
10358 | ecb->dte_action = act2;
|
---|
10359 | ecb->dte_action_last = cached->dte_action_last;
|
---|
10360 | ecb->dte_needed = cached->dte_needed;
|
---|
10361 | ecb->dte_size = cached->dte_size;
|
---|
10362 | ecb->dte_alignment = cached->dte_alignment;
|
---|
10363 | }
|
---|
10364 |
|
---|
10365 | return (ecb);
|
---|
10366 | }
|
---|
10367 |
|
---|
10368 | for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
|
---|
10369 | if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
|
---|
10370 | dtrace_ecb_destroy(ecb);
|
---|
10371 | return (NULL);
|
---|
10372 | }
|
---|
10373 | }
|
---|
10374 |
|
---|
10375 | dtrace_ecb_resize(ecb);
|
---|
10376 |
|
---|
10377 | return (dtrace_ecb_create_cache = ecb);
|
---|
10378 | }
|
---|
10379 |
|
---|
10380 | static int
|
---|
10381 | dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg)
|
---|
10382 | {
|
---|
10383 | dtrace_ecb_t *ecb;
|
---|
10384 | dtrace_enabling_t *enab = arg;
|
---|
10385 | dtrace_state_t *state = enab->dten_vstate->dtvs_state;
|
---|
10386 |
|
---|
10387 | ASSERT(state != NULL);
|
---|
10388 |
|
---|
10389 | if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) {
|
---|
10390 | /*
|
---|
10391 | * This probe was created in a generation for which this
|
---|
10392 | * enabling has previously created ECBs; we don't want to
|
---|
10393 | * enable it again, so just kick out.
|
---|
10394 | */
|
---|
10395 | return (DTRACE_MATCH_NEXT);
|
---|
10396 | }
|
---|
10397 |
|
---|
10398 | if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
|
---|
10399 | return (DTRACE_MATCH_DONE);
|
---|
10400 |
|
---|
10401 | if (dtrace_ecb_enable(ecb) < 0)
|
---|
10402 | return (DTRACE_MATCH_FAIL);
|
---|
10403 |
|
---|
10404 | return (DTRACE_MATCH_NEXT);
|
---|
10405 | }
|
---|
10406 |
|
---|
10407 | static dtrace_ecb_t *
|
---|
10408 | dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
|
---|
10409 | {
|
---|
10410 | dtrace_ecb_t *ecb; NOREF(ecb);
|
---|
10411 |
|
---|
10412 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
10413 |
|
---|
10414 | if (id == 0 || VBDTCAST(int64_t)id > state->dts_necbs)
|
---|
10415 | return (NULL);
|
---|
10416 |
|
---|
10417 | ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
|
---|
10418 | ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
|
---|
10419 |
|
---|
10420 | return (state->dts_ecbs[id - 1]);
|
---|
10421 | }
|
---|
10422 |
|
---|
10423 | static dtrace_aggregation_t *
|
---|
10424 | dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
|
---|
10425 | {
|
---|
10426 | dtrace_aggregation_t *agg; NOREF(agg);
|
---|
10427 |
|
---|
10428 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
10429 |
|
---|
10430 | if (id == 0 || VBDTCAST(int64_t)id > state->dts_naggregations)
|
---|
10431 | return (NULL);
|
---|
10432 |
|
---|
10433 | ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
|
---|
10434 | ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
|
---|
10435 | agg->dtag_id == id);
|
---|
10436 |
|
---|
10437 | return (state->dts_aggregations[id - 1]);
|
---|
10438 | }
|
---|
10439 |
|
---|
10440 | /*
|
---|
10441 | * DTrace Buffer Functions
|
---|
10442 | *
|
---|
10443 | * The following functions manipulate DTrace buffers. Most of these functions
|
---|
10444 | * are called in the context of establishing or processing consumer state;
|
---|
10445 | * exceptions are explicitly noted.
|
---|
10446 | */
|
---|
10447 |
|
---|
10448 | /*
|
---|
10449 | * Note: called from cross call context. This function switches the two
|
---|
10450 | * buffers on a given CPU. The atomicity of this operation is assured by
|
---|
10451 | * disabling interrupts while the actual switch takes place; the disabling of
|
---|
10452 | * interrupts serializes the execution with any execution of dtrace_probe() on
|
---|
10453 | * the same CPU.
|
---|
10454 | */
|
---|
10455 | static void
|
---|
10456 | dtrace_buffer_switch(dtrace_buffer_t *buf)
|
---|
10457 | {
|
---|
10458 | caddr_t tomax = buf->dtb_tomax;
|
---|
10459 | caddr_t xamot = buf->dtb_xamot;
|
---|
10460 | dtrace_icookie_t cookie;
|
---|
10461 |
|
---|
10462 | ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
|
---|
10463 | ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
|
---|
10464 |
|
---|
10465 | cookie = dtrace_interrupt_disable();
|
---|
10466 | buf->dtb_tomax = xamot;
|
---|
10467 | buf->dtb_xamot = tomax;
|
---|
10468 | buf->dtb_xamot_drops = buf->dtb_drops;
|
---|
10469 | buf->dtb_xamot_offset = buf->dtb_offset;
|
---|
10470 | buf->dtb_xamot_errors = buf->dtb_errors;
|
---|
10471 | buf->dtb_xamot_flags = buf->dtb_flags;
|
---|
10472 | buf->dtb_offset = 0;
|
---|
10473 | buf->dtb_drops = 0;
|
---|
10474 | buf->dtb_errors = 0;
|
---|
10475 | buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
|
---|
10476 | dtrace_interrupt_enable(cookie);
|
---|
10477 | }
|
---|
10478 |
|
---|
10479 | #ifdef VBOX
|
---|
10480 | static DECLCALLBACK(void) dtrace_buffer_switch_wrapper(RTCPUID idCpu, void *pvUser1, void *pvUser2)
|
---|
10481 | {
|
---|
10482 | dtrace_buffer_switch((dtrace_buffer_t *)pvUser1);
|
---|
10483 | NOREF(pvUser2); NOREF(idCpu);
|
---|
10484 | }
|
---|
10485 | #endif
|
---|
10486 |
|
---|
10487 | /*
|
---|
10488 | * Note: called from cross call context. This function activates a buffer
|
---|
10489 | * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
|
---|
10490 | * is guaranteed by the disabling of interrupts.
|
---|
10491 | */
|
---|
10492 | static void
|
---|
10493 | dtrace_buffer_activate(dtrace_state_t *state)
|
---|
10494 | {
|
---|
10495 | dtrace_buffer_t *buf;
|
---|
10496 | dtrace_icookie_t cookie = dtrace_interrupt_disable();
|
---|
10497 |
|
---|
10498 | buf = &state->dts_buffer[VBDT_GET_CPUID()];
|
---|
10499 |
|
---|
10500 | if (buf->dtb_tomax != NULL) {
|
---|
10501 | /*
|
---|
10502 | * We might like to assert that the buffer is marked inactive,
|
---|
10503 | * but this isn't necessarily true: the buffer for the CPU
|
---|
10504 | * that processes the BEGIN probe has its buffer activated
|
---|
10505 | * manually. In this case, we take the (harmless) action
|
---|
10506 | * re-clearing the bit INACTIVE bit.
|
---|
10507 | */
|
---|
10508 | buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
|
---|
10509 | }
|
---|
10510 |
|
---|
10511 | dtrace_interrupt_enable(cookie);
|
---|
10512 | }
|
---|
10513 |
|
---|
10514 | #ifdef VBOX
|
---|
10515 | static DECLCALLBACK(void) dtrace_buffer_activate_wrapper(RTCPUID idCpu, void *pvUser1, void *pvUser2)
|
---|
10516 | {
|
---|
10517 | dtrace_buffer_activate((dtrace_state_t *)pvUser1);
|
---|
10518 | NOREF(pvUser2); NOREF(idCpu);
|
---|
10519 | }
|
---|
10520 | #endif
|
---|
10521 |
|
---|
10522 | static int
|
---|
10523 | dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
|
---|
10524 | processorid_t cpu)
|
---|
10525 | {
|
---|
10526 | #ifndef VBOX
|
---|
10527 | cpu_t *cp;
|
---|
10528 | #else
|
---|
10529 | RTCPUSET CpuSet;
|
---|
10530 | unsigned iCpu;
|
---|
10531 | #endif
|
---|
10532 | dtrace_buffer_t *buf;
|
---|
10533 |
|
---|
10534 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
10535 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
10536 |
|
---|
10537 | if (VBDTCAST(int64_t)size > dtrace_nonroot_maxsize
|
---|
10538 | #ifndef VBOX
|
---|
10539 | && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)
|
---|
10540 | #endif
|
---|
10541 | )
|
---|
10542 | return (EFBIG);
|
---|
10543 |
|
---|
10544 | #ifndef VBOX
|
---|
10545 | cp = cpu_list;
|
---|
10546 | #else
|
---|
10547 | RTMpGetSet(&CpuSet);
|
---|
10548 | #endif
|
---|
10549 |
|
---|
10550 | #ifndef VBOX
|
---|
10551 | do {
|
---|
10552 | if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
|
---|
10553 | continue;
|
---|
10554 |
|
---|
10555 | buf = &bufs[cp->cpu_id];
|
---|
10556 | #else
|
---|
10557 | for (iCpu = 0; iCpu < RTCPUSET_MAX_CPUS; iCpu++) {
|
---|
10558 | if ( !RTCpuSetIsMember(&CpuSet, iCpu)
|
---|
10559 | || (cpu != (processorid_t)DTRACE_CPUALL && cpu != iCpu))
|
---|
10560 | continue;
|
---|
10561 |
|
---|
10562 | buf = &bufs[iCpu];
|
---|
10563 | #endif
|
---|
10564 |
|
---|
10565 | /*
|
---|
10566 | * If there is already a buffer allocated for this CPU, it
|
---|
10567 | * is only possible that this is a DR event. In this case,
|
---|
10568 | * the buffer size must match our specified size.
|
---|
10569 | */
|
---|
10570 | if (buf->dtb_tomax != NULL) {
|
---|
10571 | ASSERT(buf->dtb_size == size);
|
---|
10572 | continue;
|
---|
10573 | }
|
---|
10574 |
|
---|
10575 | ASSERT(buf->dtb_xamot == NULL);
|
---|
10576 |
|
---|
10577 | if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
|
---|
10578 | goto err;
|
---|
10579 |
|
---|
10580 | buf->dtb_size = size;
|
---|
10581 | buf->dtb_flags = flags;
|
---|
10582 | buf->dtb_offset = 0;
|
---|
10583 | buf->dtb_drops = 0;
|
---|
10584 |
|
---|
10585 | if (flags & DTRACEBUF_NOSWITCH)
|
---|
10586 | continue;
|
---|
10587 |
|
---|
10588 | if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
|
---|
10589 | goto err;
|
---|
10590 | #ifndef VBOX
|
---|
10591 | } while ((cp = cp->cpu_next) != cpu_list);
|
---|
10592 | #else
|
---|
10593 | }
|
---|
10594 | #endif
|
---|
10595 |
|
---|
10596 | return (0);
|
---|
10597 |
|
---|
10598 | err:
|
---|
10599 | #ifndef VBOX
|
---|
10600 | cp = cpu_list;
|
---|
10601 |
|
---|
10602 | do {
|
---|
10603 | if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
|
---|
10604 | continue;
|
---|
10605 |
|
---|
10606 | buf = &bufs[cp->cpu_id];
|
---|
10607 | #else
|
---|
10608 | for (iCpu = 0; iCpu < RTCPUSET_MAX_CPUS; iCpu++) {
|
---|
10609 | if ( !RTCpuSetIsMember(&CpuSet, iCpu)
|
---|
10610 | || (cpu != (processorid_t)DTRACE_CPUALL && cpu != iCpu))
|
---|
10611 | continue;
|
---|
10612 |
|
---|
10613 | buf = &bufs[iCpu];
|
---|
10614 | #endif
|
---|
10615 |
|
---|
10616 | if (buf->dtb_xamot != NULL) {
|
---|
10617 | ASSERT(buf->dtb_tomax != NULL);
|
---|
10618 | ASSERT(buf->dtb_size == size);
|
---|
10619 | kmem_free(buf->dtb_xamot, size);
|
---|
10620 | }
|
---|
10621 |
|
---|
10622 | if (buf->dtb_tomax != NULL) {
|
---|
10623 | ASSERT(buf->dtb_size == size);
|
---|
10624 | kmem_free(buf->dtb_tomax, size);
|
---|
10625 | }
|
---|
10626 |
|
---|
10627 | buf->dtb_tomax = NULL;
|
---|
10628 | buf->dtb_xamot = NULL;
|
---|
10629 | buf->dtb_size = 0;
|
---|
10630 | #ifndef VBOX
|
---|
10631 | } while ((cp = cp->cpu_next) != cpu_list);
|
---|
10632 | #else
|
---|
10633 | }
|
---|
10634 | #endif
|
---|
10635 |
|
---|
10636 | return (ENOMEM);
|
---|
10637 | }
|
---|
10638 |
|
---|
10639 | /*
|
---|
10640 | * Note: called from probe context. This function just increments the drop
|
---|
10641 | * count on a buffer. It has been made a function to allow for the
|
---|
10642 | * possibility of understanding the source of mysterious drop counts. (A
|
---|
10643 | * problem for which one may be particularly disappointed that DTrace cannot
|
---|
10644 | * be used to understand DTrace.)
|
---|
10645 | */
|
---|
10646 | static void
|
---|
10647 | dtrace_buffer_drop(dtrace_buffer_t *buf)
|
---|
10648 | {
|
---|
10649 | buf->dtb_drops++;
|
---|
10650 | }
|
---|
10651 |
|
---|
10652 | /*
|
---|
10653 | * Note: called from probe context. This function is called to reserve space
|
---|
10654 | * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
|
---|
10655 | * mstate. Returns the new offset in the buffer, or a negative value if an
|
---|
10656 | * error has occurred.
|
---|
10657 | */
|
---|
10658 | static intptr_t
|
---|
10659 | dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
|
---|
10660 | dtrace_state_t *state, dtrace_mstate_t *mstate)
|
---|
10661 | {
|
---|
10662 | intptr_t offs = buf->dtb_offset, soffs;
|
---|
10663 | intptr_t woffs;
|
---|
10664 | caddr_t tomax;
|
---|
10665 | size_t total;
|
---|
10666 |
|
---|
10667 | if (buf->dtb_flags & DTRACEBUF_INACTIVE)
|
---|
10668 | return (-1);
|
---|
10669 |
|
---|
10670 | if ((tomax = buf->dtb_tomax) == NULL) {
|
---|
10671 | dtrace_buffer_drop(buf);
|
---|
10672 | return (-1);
|
---|
10673 | }
|
---|
10674 |
|
---|
10675 | if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
|
---|
10676 | while (offs & (align - 1)) {
|
---|
10677 | /*
|
---|
10678 | * Assert that our alignment is off by a number which
|
---|
10679 | * is itself sizeof (uint32_t) aligned.
|
---|
10680 | */
|
---|
10681 | ASSERT(!((align - (offs & (align - 1))) &
|
---|
10682 | (sizeof (uint32_t) - 1)));
|
---|
10683 | DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
|
---|
10684 | offs += sizeof (uint32_t);
|
---|
10685 | }
|
---|
10686 |
|
---|
10687 | if (VBDTCAST(uintptr_t)(soffs = offs + needed) > buf->dtb_size) {
|
---|
10688 | dtrace_buffer_drop(buf);
|
---|
10689 | return (-1);
|
---|
10690 | }
|
---|
10691 |
|
---|
10692 | if (mstate == NULL)
|
---|
10693 | return (offs);
|
---|
10694 |
|
---|
10695 | mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
|
---|
10696 | mstate->dtms_scratch_size = buf->dtb_size - soffs;
|
---|
10697 | mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
|
---|
10698 |
|
---|
10699 | return (offs);
|
---|
10700 | }
|
---|
10701 |
|
---|
10702 | if (buf->dtb_flags & DTRACEBUF_FILL) {
|
---|
10703 | if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
|
---|
10704 | (buf->dtb_flags & DTRACEBUF_FULL))
|
---|
10705 | return (-1);
|
---|
10706 | goto out;
|
---|
10707 | }
|
---|
10708 |
|
---|
10709 | total = needed + (offs & (align - 1));
|
---|
10710 |
|
---|
10711 | /*
|
---|
10712 | * For a ring buffer, life is quite a bit more complicated. Before
|
---|
10713 | * we can store any padding, we need to adjust our wrapping offset.
|
---|
10714 | * (If we've never before wrapped or we're not about to, no adjustment
|
---|
10715 | * is required.)
|
---|
10716 | */
|
---|
10717 | if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
|
---|
10718 | offs + total > buf->dtb_size) {
|
---|
10719 | woffs = buf->dtb_xamot_offset;
|
---|
10720 |
|
---|
10721 | if (offs + total > buf->dtb_size) {
|
---|
10722 | /*
|
---|
10723 | * We can't fit in the end of the buffer. First, a
|
---|
10724 | * sanity check that we can fit in the buffer at all.
|
---|
10725 | */
|
---|
10726 | if (total > buf->dtb_size) {
|
---|
10727 | dtrace_buffer_drop(buf);
|
---|
10728 | return (-1);
|
---|
10729 | }
|
---|
10730 |
|
---|
10731 | /*
|
---|
10732 | * We're going to be storing at the top of the buffer,
|
---|
10733 | * so now we need to deal with the wrapped offset. We
|
---|
10734 | * only reset our wrapped offset to 0 if it is
|
---|
10735 | * currently greater than the current offset. If it
|
---|
10736 | * is less than the current offset, it is because a
|
---|
10737 | * previous allocation induced a wrap -- but the
|
---|
10738 | * allocation didn't subsequently take the space due
|
---|
10739 | * to an error or false predicate evaluation. In this
|
---|
10740 | * case, we'll just leave the wrapped offset alone: if
|
---|
10741 | * the wrapped offset hasn't been advanced far enough
|
---|
10742 | * for this allocation, it will be adjusted in the
|
---|
10743 | * lower loop.
|
---|
10744 | */
|
---|
10745 | if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
|
---|
10746 | if (woffs >= offs)
|
---|
10747 | woffs = 0;
|
---|
10748 | } else {
|
---|
10749 | woffs = 0;
|
---|
10750 | }
|
---|
10751 |
|
---|
10752 | /*
|
---|
10753 | * Now we know that we're going to be storing to the
|
---|
10754 | * top of the buffer and that there is room for us
|
---|
10755 | * there. We need to clear the buffer from the current
|
---|
10756 | * offset to the end (there may be old gunk there).
|
---|
10757 | */
|
---|
10758 | while (VBDTCAST(uintptr_t)offs < buf->dtb_size)
|
---|
10759 | tomax[offs++] = 0;
|
---|
10760 |
|
---|
10761 | /*
|
---|
10762 | * We need to set our offset to zero. And because we
|
---|
10763 | * are wrapping, we need to set the bit indicating as
|
---|
10764 | * much. We can also adjust our needed space back
|
---|
10765 | * down to the space required by the ECB -- we know
|
---|
10766 | * that the top of the buffer is aligned.
|
---|
10767 | */
|
---|
10768 | offs = 0;
|
---|
10769 | total = needed;
|
---|
10770 | buf->dtb_flags |= DTRACEBUF_WRAPPED;
|
---|
10771 | } else {
|
---|
10772 | /*
|
---|
10773 | * There is room for us in the buffer, so we simply
|
---|
10774 | * need to check the wrapped offset.
|
---|
10775 | */
|
---|
10776 | if (woffs < offs) {
|
---|
10777 | /*
|
---|
10778 | * The wrapped offset is less than the offset.
|
---|
10779 | * This can happen if we allocated buffer space
|
---|
10780 | * that induced a wrap, but then we didn't
|
---|
10781 | * subsequently take the space due to an error
|
---|
10782 | * or false predicate evaluation. This is
|
---|
10783 | * okay; we know that _this_ allocation isn't
|
---|
10784 | * going to induce a wrap. We still can't
|
---|
10785 | * reset the wrapped offset to be zero,
|
---|
10786 | * however: the space may have been trashed in
|
---|
10787 | * the previous failed probe attempt. But at
|
---|
10788 | * least the wrapped offset doesn't need to
|
---|
10789 | * be adjusted at all...
|
---|
10790 | */
|
---|
10791 | goto out;
|
---|
10792 | }
|
---|
10793 | }
|
---|
10794 |
|
---|
10795 | while (VBDTCAST(uintptr_t)offs + total > VBDTCAST(uintptr_t)woffs) {
|
---|
10796 | dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
|
---|
10797 | size_t size;
|
---|
10798 |
|
---|
10799 | if (epid == DTRACE_EPIDNONE) {
|
---|
10800 | size = sizeof (uint32_t);
|
---|
10801 | } else {
|
---|
10802 | ASSERT(VBDTCAST(int64_t)epid <= state->dts_necbs);
|
---|
10803 | ASSERT(state->dts_ecbs[epid - 1] != NULL);
|
---|
10804 |
|
---|
10805 | size = state->dts_ecbs[epid - 1]->dte_size;
|
---|
10806 | }
|
---|
10807 |
|
---|
10808 | ASSERT(woffs + size <= buf->dtb_size);
|
---|
10809 | ASSERT(size != 0);
|
---|
10810 |
|
---|
10811 | if (woffs + size == buf->dtb_size) {
|
---|
10812 | /*
|
---|
10813 | * We've reached the end of the buffer; we want
|
---|
10814 | * to set the wrapped offset to 0 and break
|
---|
10815 | * out. However, if the offs is 0, then we're
|
---|
10816 | * in a strange edge-condition: the amount of
|
---|
10817 | * space that we want to reserve plus the size
|
---|
10818 | * of the record that we're overwriting is
|
---|
10819 | * greater than the size of the buffer. This
|
---|
10820 | * is problematic because if we reserve the
|
---|
10821 | * space but subsequently don't consume it (due
|
---|
10822 | * to a failed predicate or error) the wrapped
|
---|
10823 | * offset will be 0 -- yet the EPID at offset 0
|
---|
10824 | * will not be committed. This situation is
|
---|
10825 | * relatively easy to deal with: if we're in
|
---|
10826 | * this case, the buffer is indistinguishable
|
---|
10827 | * from one that hasn't wrapped; we need only
|
---|
10828 | * finish the job by clearing the wrapped bit,
|
---|
10829 | * explicitly setting the offset to be 0, and
|
---|
10830 | * zero'ing out the old data in the buffer.
|
---|
10831 | */
|
---|
10832 | if (offs == 0) {
|
---|
10833 | buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
|
---|
10834 | buf->dtb_offset = 0;
|
---|
10835 | woffs = total;
|
---|
10836 |
|
---|
10837 | while (VBDTCAST(uintptr_t)woffs < buf->dtb_size)
|
---|
10838 | tomax[woffs++] = 0;
|
---|
10839 | }
|
---|
10840 |
|
---|
10841 | woffs = 0;
|
---|
10842 | break;
|
---|
10843 | }
|
---|
10844 |
|
---|
10845 | woffs += size;
|
---|
10846 | }
|
---|
10847 |
|
---|
10848 | /*
|
---|
10849 | * We have a wrapped offset. It may be that the wrapped offset
|
---|
10850 | * has become zero -- that's okay.
|
---|
10851 | */
|
---|
10852 | buf->dtb_xamot_offset = woffs;
|
---|
10853 | }
|
---|
10854 |
|
---|
10855 | out:
|
---|
10856 | /*
|
---|
10857 | * Now we can plow the buffer with any necessary padding.
|
---|
10858 | */
|
---|
10859 | while (offs & (align - 1)) {
|
---|
10860 | /*
|
---|
10861 | * Assert that our alignment is off by a number which
|
---|
10862 | * is itself sizeof (uint32_t) aligned.
|
---|
10863 | */
|
---|
10864 | ASSERT(!((align - (offs & (align - 1))) &
|
---|
10865 | (sizeof (uint32_t) - 1)));
|
---|
10866 | DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
|
---|
10867 | offs += sizeof (uint32_t);
|
---|
10868 | }
|
---|
10869 |
|
---|
10870 | if (buf->dtb_flags & DTRACEBUF_FILL) {
|
---|
10871 | if (offs + needed > buf->dtb_size - state->dts_reserve) {
|
---|
10872 | buf->dtb_flags |= DTRACEBUF_FULL;
|
---|
10873 | return (-1);
|
---|
10874 | }
|
---|
10875 | }
|
---|
10876 |
|
---|
10877 | if (mstate == NULL)
|
---|
10878 | return (offs);
|
---|
10879 |
|
---|
10880 | /*
|
---|
10881 | * For ring buffers and fill buffers, the scratch space is always
|
---|
10882 | * the inactive buffer.
|
---|
10883 | */
|
---|
10884 | mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
|
---|
10885 | mstate->dtms_scratch_size = buf->dtb_size;
|
---|
10886 | mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
|
---|
10887 |
|
---|
10888 | return (offs);
|
---|
10889 | }
|
---|
10890 |
|
---|
10891 | static void
|
---|
10892 | dtrace_buffer_polish(dtrace_buffer_t *buf)
|
---|
10893 | {
|
---|
10894 | ASSERT(buf->dtb_flags & DTRACEBUF_RING);
|
---|
10895 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
10896 |
|
---|
10897 | if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
|
---|
10898 | return;
|
---|
10899 |
|
---|
10900 | /*
|
---|
10901 | * We need to polish the ring buffer. There are three cases:
|
---|
10902 | *
|
---|
10903 | * - The first (and presumably most common) is that there is no gap
|
---|
10904 | * between the buffer offset and the wrapped offset. In this case,
|
---|
10905 | * there is nothing in the buffer that isn't valid data; we can
|
---|
10906 | * mark the buffer as polished and return.
|
---|
10907 | *
|
---|
10908 | * - The second (less common than the first but still more common
|
---|
10909 | * than the third) is that there is a gap between the buffer offset
|
---|
10910 | * and the wrapped offset, and the wrapped offset is larger than the
|
---|
10911 | * buffer offset. This can happen because of an alignment issue, or
|
---|
10912 | * can happen because of a call to dtrace_buffer_reserve() that
|
---|
10913 | * didn't subsequently consume the buffer space. In this case,
|
---|
10914 | * we need to zero the data from the buffer offset to the wrapped
|
---|
10915 | * offset.
|
---|
10916 | *
|
---|
10917 | * - The third (and least common) is that there is a gap between the
|
---|
10918 | * buffer offset and the wrapped offset, but the wrapped offset is
|
---|
10919 | * _less_ than the buffer offset. This can only happen because a
|
---|
10920 | * call to dtrace_buffer_reserve() induced a wrap, but the space
|
---|
10921 | * was not subsequently consumed. In this case, we need to zero the
|
---|
10922 | * space from the offset to the end of the buffer _and_ from the
|
---|
10923 | * top of the buffer to the wrapped offset.
|
---|
10924 | */
|
---|
10925 | if (buf->dtb_offset < buf->dtb_xamot_offset) {
|
---|
10926 | bzero(buf->dtb_tomax + buf->dtb_offset,
|
---|
10927 | buf->dtb_xamot_offset - buf->dtb_offset);
|
---|
10928 | }
|
---|
10929 |
|
---|
10930 | if (buf->dtb_offset > buf->dtb_xamot_offset) {
|
---|
10931 | bzero(buf->dtb_tomax + buf->dtb_offset,
|
---|
10932 | buf->dtb_size - buf->dtb_offset);
|
---|
10933 | bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
|
---|
10934 | }
|
---|
10935 | }
|
---|
10936 |
|
---|
10937 | static void
|
---|
10938 | dtrace_buffer_free(dtrace_buffer_t *bufs)
|
---|
10939 | {
|
---|
10940 | int i;
|
---|
10941 |
|
---|
10942 | for (i = 0; i < NCPU; i++) {
|
---|
10943 | dtrace_buffer_t *buf = &bufs[i];
|
---|
10944 |
|
---|
10945 | if (buf->dtb_tomax == NULL) {
|
---|
10946 | ASSERT(buf->dtb_xamot == NULL);
|
---|
10947 | ASSERT(buf->dtb_size == 0);
|
---|
10948 | continue;
|
---|
10949 | }
|
---|
10950 |
|
---|
10951 | if (buf->dtb_xamot != NULL) {
|
---|
10952 | ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
|
---|
10953 | kmem_free(buf->dtb_xamot, buf->dtb_size);
|
---|
10954 | }
|
---|
10955 |
|
---|
10956 | kmem_free(buf->dtb_tomax, buf->dtb_size);
|
---|
10957 | buf->dtb_size = 0;
|
---|
10958 | buf->dtb_tomax = NULL;
|
---|
10959 | buf->dtb_xamot = NULL;
|
---|
10960 | }
|
---|
10961 | }
|
---|
10962 |
|
---|
10963 | /*
|
---|
10964 | * DTrace Enabling Functions
|
---|
10965 | */
|
---|
10966 | static dtrace_enabling_t *
|
---|
10967 | dtrace_enabling_create(dtrace_vstate_t *vstate)
|
---|
10968 | {
|
---|
10969 | dtrace_enabling_t *enab;
|
---|
10970 |
|
---|
10971 | enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
|
---|
10972 | enab->dten_vstate = vstate;
|
---|
10973 |
|
---|
10974 | return (enab);
|
---|
10975 | }
|
---|
10976 |
|
---|
10977 | static void
|
---|
10978 | dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
|
---|
10979 | {
|
---|
10980 | dtrace_ecbdesc_t **ndesc;
|
---|
10981 | size_t osize, nsize;
|
---|
10982 |
|
---|
10983 | /*
|
---|
10984 | * We can't add to enablings after we've enabled them, or after we've
|
---|
10985 | * retained them.
|
---|
10986 | */
|
---|
10987 | ASSERT(enab->dten_probegen == 0);
|
---|
10988 | ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
|
---|
10989 |
|
---|
10990 | if (enab->dten_ndesc < enab->dten_maxdesc) {
|
---|
10991 | enab->dten_desc[enab->dten_ndesc++] = ecb;
|
---|
10992 | return;
|
---|
10993 | }
|
---|
10994 |
|
---|
10995 | osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
|
---|
10996 |
|
---|
10997 | if (enab->dten_maxdesc == 0) {
|
---|
10998 | enab->dten_maxdesc = 1;
|
---|
10999 | } else {
|
---|
11000 | enab->dten_maxdesc <<= 1;
|
---|
11001 | }
|
---|
11002 |
|
---|
11003 | ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
|
---|
11004 |
|
---|
11005 | nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
|
---|
11006 | ndesc = kmem_zalloc(nsize, KM_SLEEP);
|
---|
11007 | bcopy(enab->dten_desc, ndesc, osize);
|
---|
11008 | kmem_free(enab->dten_desc, osize);
|
---|
11009 |
|
---|
11010 | enab->dten_desc = ndesc;
|
---|
11011 | enab->dten_desc[enab->dten_ndesc++] = ecb;
|
---|
11012 | }
|
---|
11013 |
|
---|
11014 | static void
|
---|
11015 | dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
|
---|
11016 | dtrace_probedesc_t *pd)
|
---|
11017 | {
|
---|
11018 | dtrace_ecbdesc_t *new;
|
---|
11019 | dtrace_predicate_t *pred;
|
---|
11020 | dtrace_actdesc_t *act;
|
---|
11021 |
|
---|
11022 | /*
|
---|
11023 | * We're going to create a new ECB description that matches the
|
---|
11024 | * specified ECB in every way, but has the specified probe description.
|
---|
11025 | */
|
---|
11026 | new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
|
---|
11027 |
|
---|
11028 | if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
|
---|
11029 | dtrace_predicate_hold(pred);
|
---|
11030 |
|
---|
11031 | for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
|
---|
11032 | dtrace_actdesc_hold(act);
|
---|
11033 |
|
---|
11034 | new->dted_action = ecb->dted_action;
|
---|
11035 | new->dted_pred = ecb->dted_pred;
|
---|
11036 | new->dted_probe = *pd;
|
---|
11037 | new->dted_uarg = ecb->dted_uarg;
|
---|
11038 |
|
---|
11039 | dtrace_enabling_add(enab, new);
|
---|
11040 | }
|
---|
11041 |
|
---|
11042 | #ifndef VBOX
|
---|
11043 | static void
|
---|
11044 | dtrace_enabling_dump(dtrace_enabling_t *enab)
|
---|
11045 | {
|
---|
11046 | int i;
|
---|
11047 |
|
---|
11048 | for (i = 0; i < enab->dten_ndesc; i++) {
|
---|
11049 | dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
|
---|
11050 |
|
---|
11051 | cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
|
---|
11052 | desc->dtpd_provider, desc->dtpd_mod,
|
---|
11053 | desc->dtpd_func, desc->dtpd_name);
|
---|
11054 | }
|
---|
11055 | }
|
---|
11056 | #endif /* !VBOX */
|
---|
11057 |
|
---|
11058 | static void
|
---|
11059 | dtrace_enabling_destroy(dtrace_enabling_t *enab)
|
---|
11060 | {
|
---|
11061 | int i;
|
---|
11062 | dtrace_ecbdesc_t *ep;
|
---|
11063 | dtrace_vstate_t *vstate = enab->dten_vstate;
|
---|
11064 |
|
---|
11065 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
11066 |
|
---|
11067 | for (i = 0; i < enab->dten_ndesc; i++) {
|
---|
11068 | dtrace_actdesc_t *act, *next;
|
---|
11069 | dtrace_predicate_t *pred;
|
---|
11070 |
|
---|
11071 | ep = enab->dten_desc[i];
|
---|
11072 |
|
---|
11073 | if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
|
---|
11074 | dtrace_predicate_release(pred, vstate);
|
---|
11075 |
|
---|
11076 | for (act = ep->dted_action; act != NULL; act = next) {
|
---|
11077 | next = act->dtad_next;
|
---|
11078 | dtrace_actdesc_release(act, vstate);
|
---|
11079 | }
|
---|
11080 |
|
---|
11081 | kmem_free(ep, sizeof (dtrace_ecbdesc_t));
|
---|
11082 | }
|
---|
11083 |
|
---|
11084 | kmem_free(enab->dten_desc,
|
---|
11085 | enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
|
---|
11086 |
|
---|
11087 | /*
|
---|
11088 | * If this was a retained enabling, decrement the dts_nretained count
|
---|
11089 | * and take it off of the dtrace_retained list.
|
---|
11090 | */
|
---|
11091 | if (enab->dten_prev != NULL || enab->dten_next != NULL ||
|
---|
11092 | dtrace_retained == enab) {
|
---|
11093 | ASSERT(enab->dten_vstate->dtvs_state != NULL);
|
---|
11094 | ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
|
---|
11095 | enab->dten_vstate->dtvs_state->dts_nretained--;
|
---|
11096 | dtrace_retained_gen++;
|
---|
11097 | }
|
---|
11098 |
|
---|
11099 | if (enab->dten_prev == NULL) {
|
---|
11100 | if (dtrace_retained == enab) {
|
---|
11101 | dtrace_retained = enab->dten_next;
|
---|
11102 |
|
---|
11103 | if (dtrace_retained != NULL)
|
---|
11104 | dtrace_retained->dten_prev = NULL;
|
---|
11105 | }
|
---|
11106 | } else {
|
---|
11107 | ASSERT(enab != dtrace_retained);
|
---|
11108 | ASSERT(dtrace_retained != NULL);
|
---|
11109 | enab->dten_prev->dten_next = enab->dten_next;
|
---|
11110 | }
|
---|
11111 |
|
---|
11112 | if (enab->dten_next != NULL) {
|
---|
11113 | ASSERT(dtrace_retained != NULL);
|
---|
11114 | enab->dten_next->dten_prev = enab->dten_prev;
|
---|
11115 | }
|
---|
11116 |
|
---|
11117 | kmem_free(enab, sizeof (dtrace_enabling_t));
|
---|
11118 | }
|
---|
11119 |
|
---|
11120 | static int
|
---|
11121 | dtrace_enabling_retain(dtrace_enabling_t *enab)
|
---|
11122 | {
|
---|
11123 | dtrace_state_t *state;
|
---|
11124 |
|
---|
11125 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
11126 | ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
|
---|
11127 | ASSERT(enab->dten_vstate != NULL);
|
---|
11128 |
|
---|
11129 | state = enab->dten_vstate->dtvs_state;
|
---|
11130 | ASSERT(state != NULL);
|
---|
11131 |
|
---|
11132 | /*
|
---|
11133 | * We only allow each state to retain dtrace_retain_max enablings.
|
---|
11134 | */
|
---|
11135 | if (state->dts_nretained >= dtrace_retain_max)
|
---|
11136 | return (ENOSPC);
|
---|
11137 |
|
---|
11138 | state->dts_nretained++;
|
---|
11139 | dtrace_retained_gen++;
|
---|
11140 |
|
---|
11141 | if (dtrace_retained == NULL) {
|
---|
11142 | dtrace_retained = enab;
|
---|
11143 | return (0);
|
---|
11144 | }
|
---|
11145 |
|
---|
11146 | enab->dten_next = dtrace_retained;
|
---|
11147 | dtrace_retained->dten_prev = enab;
|
---|
11148 | dtrace_retained = enab;
|
---|
11149 |
|
---|
11150 | return (0);
|
---|
11151 | }
|
---|
11152 |
|
---|
11153 | static int
|
---|
11154 | dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
|
---|
11155 | dtrace_probedesc_t *create)
|
---|
11156 | {
|
---|
11157 | dtrace_enabling_t *new, *enab;
|
---|
11158 | int found = 0, err = ENOENT;
|
---|
11159 |
|
---|
11160 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
11161 | ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
|
---|
11162 | ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
|
---|
11163 | ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
|
---|
11164 | ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
|
---|
11165 |
|
---|
11166 | new = dtrace_enabling_create(&state->dts_vstate);
|
---|
11167 |
|
---|
11168 | /*
|
---|
11169 | * Iterate over all retained enablings, looking for enablings that
|
---|
11170 | * match the specified state.
|
---|
11171 | */
|
---|
11172 | for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
|
---|
11173 | int i;
|
---|
11174 |
|
---|
11175 | /*
|
---|
11176 | * dtvs_state can only be NULL for helper enablings -- and
|
---|
11177 | * helper enablings can't be retained.
|
---|
11178 | */
|
---|
11179 | ASSERT(enab->dten_vstate->dtvs_state != NULL);
|
---|
11180 |
|
---|
11181 | if (enab->dten_vstate->dtvs_state != state)
|
---|
11182 | continue;
|
---|
11183 |
|
---|
11184 | /*
|
---|
11185 | * Now iterate over each probe description; we're looking for
|
---|
11186 | * an exact match to the specified probe description.
|
---|
11187 | */
|
---|
11188 | for (i = 0; i < enab->dten_ndesc; i++) {
|
---|
11189 | dtrace_ecbdesc_t *ep = enab->dten_desc[i];
|
---|
11190 | dtrace_probedesc_t *pd = &ep->dted_probe;
|
---|
11191 |
|
---|
11192 | if (strcmp(pd->dtpd_provider, match->dtpd_provider))
|
---|
11193 | continue;
|
---|
11194 |
|
---|
11195 | if (strcmp(pd->dtpd_mod, match->dtpd_mod))
|
---|
11196 | continue;
|
---|
11197 |
|
---|
11198 | if (strcmp(pd->dtpd_func, match->dtpd_func))
|
---|
11199 | continue;
|
---|
11200 |
|
---|
11201 | if (strcmp(pd->dtpd_name, match->dtpd_name))
|
---|
11202 | continue;
|
---|
11203 |
|
---|
11204 | /*
|
---|
11205 | * We have a winning probe! Add it to our growing
|
---|
11206 | * enabling.
|
---|
11207 | */
|
---|
11208 | found = 1;
|
---|
11209 | dtrace_enabling_addlike(new, ep, create);
|
---|
11210 | }
|
---|
11211 | }
|
---|
11212 |
|
---|
11213 | if (!found || (err = dtrace_enabling_retain(new)) != 0) {
|
---|
11214 | dtrace_enabling_destroy(new);
|
---|
11215 | return (err);
|
---|
11216 | }
|
---|
11217 |
|
---|
11218 | return (0);
|
---|
11219 | }
|
---|
11220 |
|
---|
11221 | static void
|
---|
11222 | dtrace_enabling_retract(dtrace_state_t *state)
|
---|
11223 | {
|
---|
11224 | dtrace_enabling_t *enab, *next;
|
---|
11225 |
|
---|
11226 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
11227 |
|
---|
11228 | /*
|
---|
11229 | * Iterate over all retained enablings, destroy the enablings retained
|
---|
11230 | * for the specified state.
|
---|
11231 | */
|
---|
11232 | for (enab = dtrace_retained; enab != NULL; enab = next) {
|
---|
11233 | next = enab->dten_next;
|
---|
11234 |
|
---|
11235 | /*
|
---|
11236 | * dtvs_state can only be NULL for helper enablings -- and
|
---|
11237 | * helper enablings can't be retained.
|
---|
11238 | */
|
---|
11239 | ASSERT(enab->dten_vstate->dtvs_state != NULL);
|
---|
11240 |
|
---|
11241 | if (enab->dten_vstate->dtvs_state == state) {
|
---|
11242 | ASSERT(state->dts_nretained > 0);
|
---|
11243 | dtrace_enabling_destroy(enab);
|
---|
11244 | }
|
---|
11245 | }
|
---|
11246 |
|
---|
11247 | ASSERT(state->dts_nretained == 0);
|
---|
11248 | }
|
---|
11249 |
|
---|
11250 | static int
|
---|
11251 | dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
|
---|
11252 | {
|
---|
11253 | int i = 0;
|
---|
11254 | int total_matched = 0, matched = 0;
|
---|
11255 |
|
---|
11256 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
11257 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
11258 |
|
---|
11259 | for (i = 0; i < enab->dten_ndesc; i++) {
|
---|
11260 | dtrace_ecbdesc_t *ep = enab->dten_desc[i];
|
---|
11261 |
|
---|
11262 | enab->dten_current = ep;
|
---|
11263 | enab->dten_error = 0;
|
---|
11264 |
|
---|
11265 | /*
|
---|
11266 | * If a provider failed to enable a probe then get out and
|
---|
11267 | * let the consumer know we failed.
|
---|
11268 | */
|
---|
11269 | if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
|
---|
11270 | return (EBUSY);
|
---|
11271 |
|
---|
11272 | total_matched += matched;
|
---|
11273 |
|
---|
11274 | if (enab->dten_error != 0) {
|
---|
11275 | /*
|
---|
11276 | * If we get an error half-way through enabling the
|
---|
11277 | * probes, we kick out -- perhaps with some number of
|
---|
11278 | * them enabled. Leaving enabled probes enabled may
|
---|
11279 | * be slightly confusing for user-level, but we expect
|
---|
11280 | * that no one will attempt to actually drive on in
|
---|
11281 | * the face of such errors. If this is an anonymous
|
---|
11282 | * enabling (indicated with a NULL nmatched pointer),
|
---|
11283 | * we cmn_err() a message. We aren't expecting to
|
---|
11284 | * get such an error -- such as it can exist at all,
|
---|
11285 | * it would be a result of corrupted DOF in the driver
|
---|
11286 | * properties.
|
---|
11287 | */
|
---|
11288 | if (nmatched == NULL) {
|
---|
11289 | cmn_err(CE_WARN, "dtrace_enabling_match() "
|
---|
11290 | "error on %p: %d", (void *)ep,
|
---|
11291 | enab->dten_error);
|
---|
11292 | }
|
---|
11293 |
|
---|
11294 | return (enab->dten_error);
|
---|
11295 | }
|
---|
11296 | }
|
---|
11297 |
|
---|
11298 | enab->dten_probegen = dtrace_probegen;
|
---|
11299 | if (nmatched != NULL)
|
---|
11300 | *nmatched = total_matched;
|
---|
11301 |
|
---|
11302 | return (0);
|
---|
11303 | }
|
---|
11304 |
|
---|
11305 | static void
|
---|
11306 | dtrace_enabling_matchall(void)
|
---|
11307 | {
|
---|
11308 | dtrace_enabling_t *enab;
|
---|
11309 |
|
---|
11310 | mutex_enter(&cpu_lock);
|
---|
11311 | mutex_enter(&dtrace_lock);
|
---|
11312 |
|
---|
11313 | /*
|
---|
11314 | * Iterate over all retained enablings to see if any probes match
|
---|
11315 | * against them. We only perform this operation on enablings for which
|
---|
11316 | * we have sufficient permissions by virtue of being in the global zone
|
---|
11317 | * or in the same zone as the DTrace client. Because we can be called
|
---|
11318 | * after dtrace_detach() has been called, we cannot assert that there
|
---|
11319 | * are retained enablings. We can safely load from dtrace_retained,
|
---|
11320 | * however: the taskq_destroy() at the end of dtrace_detach() will
|
---|
11321 | * block pending our completion.
|
---|
11322 | */
|
---|
11323 | for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
|
---|
11324 | #ifndef VBOX
|
---|
11325 | cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred;
|
---|
11326 |
|
---|
11327 | if (INGLOBALZONE(curproc) ||
|
---|
11328 | cr != NULL && getzoneid() == crgetzoneid(cr))
|
---|
11329 | #endif
|
---|
11330 | (void) dtrace_enabling_match(enab, NULL);
|
---|
11331 | }
|
---|
11332 |
|
---|
11333 | mutex_exit(&dtrace_lock);
|
---|
11334 | mutex_exit(&cpu_lock);
|
---|
11335 | }
|
---|
11336 |
|
---|
11337 | /*
|
---|
11338 | * If an enabling is to be enabled without having matched probes (that is, if
|
---|
11339 | * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
|
---|
11340 | * enabling must be _primed_ by creating an ECB for every ECB description.
|
---|
11341 | * This must be done to assure that we know the number of speculations, the
|
---|
11342 | * number of aggregations, the minimum buffer size needed, etc. before we
|
---|
11343 | * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
|
---|
11344 | * enabling any probes, we create ECBs for every ECB decription, but with a
|
---|
11345 | * NULL probe -- which is exactly what this function does.
|
---|
11346 | */
|
---|
11347 | static void
|
---|
11348 | dtrace_enabling_prime(dtrace_state_t *state)
|
---|
11349 | {
|
---|
11350 | dtrace_enabling_t *enab;
|
---|
11351 | int i;
|
---|
11352 |
|
---|
11353 | for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
|
---|
11354 | ASSERT(enab->dten_vstate->dtvs_state != NULL);
|
---|
11355 |
|
---|
11356 | if (enab->dten_vstate->dtvs_state != state)
|
---|
11357 | continue;
|
---|
11358 |
|
---|
11359 | /*
|
---|
11360 | * We don't want to prime an enabling more than once, lest
|
---|
11361 | * we allow a malicious user to induce resource exhaustion.
|
---|
11362 | * (The ECBs that result from priming an enabling aren't
|
---|
11363 | * leaked -- but they also aren't deallocated until the
|
---|
11364 | * consumer state is destroyed.)
|
---|
11365 | */
|
---|
11366 | if (enab->dten_primed)
|
---|
11367 | continue;
|
---|
11368 |
|
---|
11369 | for (i = 0; i < enab->dten_ndesc; i++) {
|
---|
11370 | enab->dten_current = enab->dten_desc[i];
|
---|
11371 | (void) dtrace_probe_enable(NULL, enab);
|
---|
11372 | }
|
---|
11373 |
|
---|
11374 | enab->dten_primed = 1;
|
---|
11375 | }
|
---|
11376 | }
|
---|
11377 |
|
---|
11378 | /*
|
---|
11379 | * Called to indicate that probes should be provided due to retained
|
---|
11380 | * enablings. This is implemented in terms of dtrace_probe_provide(), but it
|
---|
11381 | * must take an initial lap through the enabling calling the dtps_provide()
|
---|
11382 | * entry point explicitly to allow for autocreated probes.
|
---|
11383 | */
|
---|
11384 | static void
|
---|
11385 | dtrace_enabling_provide(dtrace_provider_t *prv)
|
---|
11386 | {
|
---|
11387 | int i, all = 0;
|
---|
11388 | dtrace_probedesc_t desc;
|
---|
11389 | dtrace_genid_t gen;
|
---|
11390 |
|
---|
11391 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
11392 | ASSERT(MUTEX_HELD(&dtrace_provider_lock));
|
---|
11393 |
|
---|
11394 | if (prv == NULL) {
|
---|
11395 | all = 1;
|
---|
11396 | prv = dtrace_provider;
|
---|
11397 | }
|
---|
11398 |
|
---|
11399 | do {
|
---|
11400 | dtrace_enabling_t *enab;
|
---|
11401 | void *parg = prv->dtpv_arg;
|
---|
11402 |
|
---|
11403 | retry:
|
---|
11404 | gen = dtrace_retained_gen;
|
---|
11405 | for (enab = dtrace_retained; enab != NULL;
|
---|
11406 | enab = enab->dten_next) {
|
---|
11407 | for (i = 0; i < enab->dten_ndesc; i++) {
|
---|
11408 | desc = enab->dten_desc[i]->dted_probe;
|
---|
11409 | mutex_exit(&dtrace_lock);
|
---|
11410 | prv->dtpv_pops.dtps_provide(parg, &desc);
|
---|
11411 | mutex_enter(&dtrace_lock);
|
---|
11412 | /*
|
---|
11413 | * Process the retained enablings again if
|
---|
11414 | * they have changed while we weren't holding
|
---|
11415 | * dtrace_lock.
|
---|
11416 | */
|
---|
11417 | if (gen != dtrace_retained_gen)
|
---|
11418 | goto retry;
|
---|
11419 | }
|
---|
11420 | }
|
---|
11421 | } while (all && (prv = prv->dtpv_next) != NULL);
|
---|
11422 |
|
---|
11423 | mutex_exit(&dtrace_lock);
|
---|
11424 | dtrace_probe_provide(NULL, all ? NULL : prv);
|
---|
11425 | mutex_enter(&dtrace_lock);
|
---|
11426 | }
|
---|
11427 |
|
---|
11428 | /*
|
---|
11429 | * DTrace DOF Functions
|
---|
11430 | */
|
---|
11431 | /*ARGSUSED*/
|
---|
11432 | static void
|
---|
11433 | dtrace_dof_error(dof_hdr_t *dof, const char *str)
|
---|
11434 | {
|
---|
11435 | RT_NOREF_PV(dof);
|
---|
11436 |
|
---|
11437 | if (dtrace_err_verbose)
|
---|
11438 | cmn_err(CE_WARN, "failed to process DOF: %s", str);
|
---|
11439 |
|
---|
11440 | #ifdef DTRACE_ERRDEBUG
|
---|
11441 | dtrace_errdebug(str);
|
---|
11442 | #endif
|
---|
11443 | }
|
---|
11444 |
|
---|
11445 | /*
|
---|
11446 | * Create DOF out of a currently enabled state. Right now, we only create
|
---|
11447 | * DOF containing the run-time options -- but this could be expanded to create
|
---|
11448 | * complete DOF representing the enabled state.
|
---|
11449 | */
|
---|
11450 | static dof_hdr_t *
|
---|
11451 | dtrace_dof_create(dtrace_state_t *state)
|
---|
11452 | {
|
---|
11453 | dof_hdr_t *dof;
|
---|
11454 | dof_sec_t *sec;
|
---|
11455 | dof_optdesc_t *opt;
|
---|
11456 | int i, len = sizeof (dof_hdr_t) +
|
---|
11457 | roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
|
---|
11458 | sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
|
---|
11459 |
|
---|
11460 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
11461 |
|
---|
11462 | dof = kmem_zalloc(len, KM_SLEEP);
|
---|
11463 | dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
|
---|
11464 | dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
|
---|
11465 | dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
|
---|
11466 | dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
|
---|
11467 |
|
---|
11468 | dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
|
---|
11469 | dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
|
---|
11470 | dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
|
---|
11471 | dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
|
---|
11472 | dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
|
---|
11473 | dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
|
---|
11474 |
|
---|
11475 | dof->dofh_flags = 0;
|
---|
11476 | dof->dofh_hdrsize = sizeof (dof_hdr_t);
|
---|
11477 | dof->dofh_secsize = sizeof (dof_sec_t);
|
---|
11478 | dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */
|
---|
11479 | dof->dofh_secoff = sizeof (dof_hdr_t);
|
---|
11480 | dof->dofh_loadsz = len;
|
---|
11481 | dof->dofh_filesz = len;
|
---|
11482 | dof->dofh_pad = 0;
|
---|
11483 |
|
---|
11484 | /*
|
---|
11485 | * Fill in the option section header...
|
---|
11486 | */
|
---|
11487 | sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
|
---|
11488 | sec->dofs_type = DOF_SECT_OPTDESC;
|
---|
11489 | sec->dofs_align = sizeof (uint64_t);
|
---|
11490 | sec->dofs_flags = DOF_SECF_LOAD;
|
---|
11491 | sec->dofs_entsize = sizeof (dof_optdesc_t);
|
---|
11492 |
|
---|
11493 | opt = (dof_optdesc_t *)((uintptr_t)sec +
|
---|
11494 | roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
|
---|
11495 |
|
---|
11496 | sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
|
---|
11497 | sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
|
---|
11498 |
|
---|
11499 | for (i = 0; i < DTRACEOPT_MAX; i++) {
|
---|
11500 | opt[i].dofo_option = i;
|
---|
11501 | opt[i].dofo_strtab = DOF_SECIDX_NONE;
|
---|
11502 | opt[i].dofo_value = state->dts_options[i];
|
---|
11503 | }
|
---|
11504 |
|
---|
11505 | return (dof);
|
---|
11506 | }
|
---|
11507 |
|
---|
11508 | static dof_hdr_t *
|
---|
11509 | dtrace_dof_copyin(uintptr_t uarg, int *errp)
|
---|
11510 | {
|
---|
11511 | dof_hdr_t hdr, *dof;
|
---|
11512 |
|
---|
11513 | ASSERT(!MUTEX_HELD(&dtrace_lock));
|
---|
11514 |
|
---|
11515 | /*
|
---|
11516 | * First, we're going to copyin() the sizeof (dof_hdr_t).
|
---|
11517 | */
|
---|
11518 | if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) {
|
---|
11519 | dtrace_dof_error(NULL, "failed to copyin DOF header");
|
---|
11520 | *errp = EFAULT;
|
---|
11521 | return (NULL);
|
---|
11522 | }
|
---|
11523 |
|
---|
11524 | /*
|
---|
11525 | * Now we'll allocate the entire DOF and copy it in -- provided
|
---|
11526 | * that the length isn't outrageous.
|
---|
11527 | */
|
---|
11528 | if (hdr.dofh_loadsz >= VBDTCAST(uint64_t)dtrace_dof_maxsize) {
|
---|
11529 | dtrace_dof_error(&hdr, "load size exceeds maximum");
|
---|
11530 | *errp = E2BIG;
|
---|
11531 | return (NULL);
|
---|
11532 | }
|
---|
11533 |
|
---|
11534 | if (hdr.dofh_loadsz < sizeof (hdr)) {
|
---|
11535 | dtrace_dof_error(&hdr, "invalid load size");
|
---|
11536 | *errp = EINVAL;
|
---|
11537 | return (NULL);
|
---|
11538 | }
|
---|
11539 |
|
---|
11540 | dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
|
---|
11541 |
|
---|
11542 | if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
|
---|
11543 | dof->dofh_loadsz != hdr.dofh_loadsz) {
|
---|
11544 | kmem_free(dof, hdr.dofh_loadsz);
|
---|
11545 | *errp = EFAULT;
|
---|
11546 | return (NULL);
|
---|
11547 | }
|
---|
11548 |
|
---|
11549 | return (dof);
|
---|
11550 | }
|
---|
11551 |
|
---|
11552 | #ifndef VBOX
|
---|
11553 | static dof_hdr_t *
|
---|
11554 | dtrace_dof_property(const char *name)
|
---|
11555 | {
|
---|
11556 | #ifndef VBOX
|
---|
11557 | uchar_t *buf;
|
---|
11558 | uint64_t loadsz;
|
---|
11559 | unsigned int len, i;
|
---|
11560 | dof_hdr_t *dof;
|
---|
11561 |
|
---|
11562 | /*
|
---|
11563 | * Unfortunately, array of values in .conf files are always (and
|
---|
11564 | * only) interpreted to be integer arrays. We must read our DOF
|
---|
11565 | * as an integer array, and then squeeze it into a byte array.
|
---|
11566 | */
|
---|
11567 | if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
|
---|
11568 | (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
|
---|
11569 | return (NULL);
|
---|
11570 |
|
---|
11571 | for (i = 0; i < len; i++)
|
---|
11572 | buf[i] = (uchar_t)(((int *)buf)[i]);
|
---|
11573 |
|
---|
11574 | if (len < sizeof (dof_hdr_t)) {
|
---|
11575 | ddi_prop_free(buf);
|
---|
11576 | dtrace_dof_error(NULL, "truncated header");
|
---|
11577 | return (NULL);
|
---|
11578 | }
|
---|
11579 |
|
---|
11580 | if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
|
---|
11581 | ddi_prop_free(buf);
|
---|
11582 | dtrace_dof_error(NULL, "truncated DOF");
|
---|
11583 | return (NULL);
|
---|
11584 | }
|
---|
11585 |
|
---|
11586 | if (loadsz >= dtrace_dof_maxsize) {
|
---|
11587 | ddi_prop_free(buf);
|
---|
11588 | dtrace_dof_error(NULL, "oversized DOF");
|
---|
11589 | return (NULL);
|
---|
11590 | }
|
---|
11591 |
|
---|
11592 | dof = kmem_alloc(loadsz, KM_SLEEP);
|
---|
11593 | bcopy(buf, dof, loadsz);
|
---|
11594 | ddi_prop_free(buf);
|
---|
11595 |
|
---|
11596 | return (dof);
|
---|
11597 | #else /* VBOX */
|
---|
11598 | RT_NOREF_PV(name);
|
---|
11599 | return (NULL);
|
---|
11600 | #endif /* VBOX */
|
---|
11601 | }
|
---|
11602 | #endif /* !VBOX */
|
---|
11603 |
|
---|
11604 | static void
|
---|
11605 | dtrace_dof_destroy(dof_hdr_t *dof)
|
---|
11606 | {
|
---|
11607 | kmem_free(dof, dof->dofh_loadsz);
|
---|
11608 | }
|
---|
11609 |
|
---|
11610 | /*
|
---|
11611 | * Return the dof_sec_t pointer corresponding to a given section index. If the
|
---|
11612 | * index is not valid, dtrace_dof_error() is called and NULL is returned. If
|
---|
11613 | * a type other than DOF_SECT_NONE is specified, the header is checked against
|
---|
11614 | * this type and NULL is returned if the types do not match.
|
---|
11615 | */
|
---|
11616 | static dof_sec_t *
|
---|
11617 | dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
|
---|
11618 | {
|
---|
11619 | dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
|
---|
11620 | ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
|
---|
11621 |
|
---|
11622 | if (i >= dof->dofh_secnum) {
|
---|
11623 | dtrace_dof_error(dof, "referenced section index is invalid");
|
---|
11624 | return (NULL);
|
---|
11625 | }
|
---|
11626 |
|
---|
11627 | if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
|
---|
11628 | dtrace_dof_error(dof, "referenced section is not loadable");
|
---|
11629 | return (NULL);
|
---|
11630 | }
|
---|
11631 |
|
---|
11632 | if (type != DOF_SECT_NONE && type != sec->dofs_type) {
|
---|
11633 | dtrace_dof_error(dof, "referenced section is the wrong type");
|
---|
11634 | return (NULL);
|
---|
11635 | }
|
---|
11636 |
|
---|
11637 | return (sec);
|
---|
11638 | }
|
---|
11639 |
|
---|
11640 | static dtrace_probedesc_t *
|
---|
11641 | dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
|
---|
11642 | {
|
---|
11643 | dof_probedesc_t *probe;
|
---|
11644 | dof_sec_t *strtab;
|
---|
11645 | uintptr_t daddr = (uintptr_t)dof;
|
---|
11646 | uintptr_t str;
|
---|
11647 | size_t size;
|
---|
11648 |
|
---|
11649 | if (sec->dofs_type != DOF_SECT_PROBEDESC) {
|
---|
11650 | dtrace_dof_error(dof, "invalid probe section");
|
---|
11651 | return (NULL);
|
---|
11652 | }
|
---|
11653 |
|
---|
11654 | if (sec->dofs_align != sizeof (dof_secidx_t)) {
|
---|
11655 | dtrace_dof_error(dof, "bad alignment in probe description");
|
---|
11656 | return (NULL);
|
---|
11657 | }
|
---|
11658 |
|
---|
11659 | if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
|
---|
11660 | dtrace_dof_error(dof, "truncated probe description");
|
---|
11661 | return (NULL);
|
---|
11662 | }
|
---|
11663 |
|
---|
11664 | probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
|
---|
11665 | strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
|
---|
11666 |
|
---|
11667 | if (strtab == NULL)
|
---|
11668 | return (NULL);
|
---|
11669 |
|
---|
11670 | str = daddr + strtab->dofs_offset;
|
---|
11671 | size = strtab->dofs_size;
|
---|
11672 |
|
---|
11673 | if (probe->dofp_provider >= strtab->dofs_size) {
|
---|
11674 | dtrace_dof_error(dof, "corrupt probe provider");
|
---|
11675 | return (NULL);
|
---|
11676 | }
|
---|
11677 |
|
---|
11678 | (void) strncpy(desc->dtpd_provider,
|
---|
11679 | (char *)(str + probe->dofp_provider),
|
---|
11680 | MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
|
---|
11681 |
|
---|
11682 | if (probe->dofp_mod >= strtab->dofs_size) {
|
---|
11683 | dtrace_dof_error(dof, "corrupt probe module");
|
---|
11684 | return (NULL);
|
---|
11685 | }
|
---|
11686 |
|
---|
11687 | (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
|
---|
11688 | MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
|
---|
11689 |
|
---|
11690 | if (probe->dofp_func >= strtab->dofs_size) {
|
---|
11691 | dtrace_dof_error(dof, "corrupt probe function");
|
---|
11692 | return (NULL);
|
---|
11693 | }
|
---|
11694 |
|
---|
11695 | (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
|
---|
11696 | MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
|
---|
11697 |
|
---|
11698 | if (probe->dofp_name >= strtab->dofs_size) {
|
---|
11699 | dtrace_dof_error(dof, "corrupt probe name");
|
---|
11700 | return (NULL);
|
---|
11701 | }
|
---|
11702 |
|
---|
11703 | (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
|
---|
11704 | MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
|
---|
11705 |
|
---|
11706 | return (desc);
|
---|
11707 | }
|
---|
11708 |
|
---|
11709 | static dtrace_difo_t *
|
---|
11710 | dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
|
---|
11711 | cred_t *cr)
|
---|
11712 | {
|
---|
11713 | dtrace_difo_t *dp;
|
---|
11714 | size_t ttl = 0;
|
---|
11715 | dof_difohdr_t *dofd;
|
---|
11716 | uintptr_t daddr = (uintptr_t)dof;
|
---|
11717 | size_t max = dtrace_difo_maxsize;
|
---|
11718 | int i, l, n;
|
---|
11719 |
|
---|
11720 | static const struct {
|
---|
11721 | int section;
|
---|
11722 | int bufoffs;
|
---|
11723 | int lenoffs;
|
---|
11724 | int entsize;
|
---|
11725 | int align;
|
---|
11726 | const char *msg;
|
---|
11727 | } difo[] = {
|
---|
11728 | { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
|
---|
11729 | offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
|
---|
11730 | sizeof (dif_instr_t), "multiple DIF sections" },
|
---|
11731 |
|
---|
11732 | { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
|
---|
11733 | offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
|
---|
11734 | sizeof (uint64_t), "multiple integer tables" },
|
---|
11735 |
|
---|
11736 | { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
|
---|
11737 | offsetof(dtrace_difo_t, dtdo_strlen), 0,
|
---|
11738 | sizeof (char), "multiple string tables" },
|
---|
11739 |
|
---|
11740 | { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
|
---|
11741 | offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
|
---|
11742 | sizeof (uint_t), "multiple variable tables" },
|
---|
11743 |
|
---|
11744 | { DOF_SECT_NONE, 0, 0, 0, NULL }
|
---|
11745 | };
|
---|
11746 |
|
---|
11747 | if (sec->dofs_type != DOF_SECT_DIFOHDR) {
|
---|
11748 | dtrace_dof_error(dof, "invalid DIFO header section");
|
---|
11749 | return (NULL);
|
---|
11750 | }
|
---|
11751 |
|
---|
11752 | if (sec->dofs_align != sizeof (dof_secidx_t)) {
|
---|
11753 | dtrace_dof_error(dof, "bad alignment in DIFO header");
|
---|
11754 | return (NULL);
|
---|
11755 | }
|
---|
11756 |
|
---|
11757 | if (sec->dofs_size < sizeof (dof_difohdr_t) ||
|
---|
11758 | sec->dofs_size % sizeof (dof_secidx_t)) {
|
---|
11759 | dtrace_dof_error(dof, "bad size in DIFO header");
|
---|
11760 | return (NULL);
|
---|
11761 | }
|
---|
11762 |
|
---|
11763 | dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
|
---|
11764 | n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
|
---|
11765 |
|
---|
11766 | dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
|
---|
11767 | dp->dtdo_rtype = dofd->dofd_rtype;
|
---|
11768 |
|
---|
11769 | for (l = 0; l < n; l++) {
|
---|
11770 | dof_sec_t *subsec;
|
---|
11771 | void **bufp;
|
---|
11772 | uint32_t *lenp;
|
---|
11773 |
|
---|
11774 | if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
|
---|
11775 | dofd->dofd_links[l])) == NULL)
|
---|
11776 | goto err; /* invalid section link */
|
---|
11777 |
|
---|
11778 | if (ttl + subsec->dofs_size > max) {
|
---|
11779 | dtrace_dof_error(dof, "exceeds maximum size");
|
---|
11780 | goto err;
|
---|
11781 | }
|
---|
11782 |
|
---|
11783 | ttl += subsec->dofs_size;
|
---|
11784 |
|
---|
11785 | for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
|
---|
11786 | if (subsec->dofs_type != VBDTCAST(uint32_t)difo[i].section)
|
---|
11787 | continue;
|
---|
11788 |
|
---|
11789 | if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
|
---|
11790 | dtrace_dof_error(dof, "section not loaded");
|
---|
11791 | goto err;
|
---|
11792 | }
|
---|
11793 |
|
---|
11794 | if (subsec->dofs_align != VBDTCAST(uint32_t)difo[i].align) {
|
---|
11795 | dtrace_dof_error(dof, "bad alignment");
|
---|
11796 | goto err;
|
---|
11797 | }
|
---|
11798 |
|
---|
11799 | bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
|
---|
11800 | lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
|
---|
11801 |
|
---|
11802 | if (*bufp != NULL) {
|
---|
11803 | dtrace_dof_error(dof, difo[i].msg);
|
---|
11804 | goto err;
|
---|
11805 | }
|
---|
11806 |
|
---|
11807 | if (VBDTCAST(uint32_t)difo[i].entsize != subsec->dofs_entsize) {
|
---|
11808 | dtrace_dof_error(dof, "entry size mismatch");
|
---|
11809 | goto err;
|
---|
11810 | }
|
---|
11811 |
|
---|
11812 | if (subsec->dofs_entsize != 0 &&
|
---|
11813 | (subsec->dofs_size % subsec->dofs_entsize) != 0) {
|
---|
11814 | dtrace_dof_error(dof, "corrupt entry size");
|
---|
11815 | goto err;
|
---|
11816 | }
|
---|
11817 |
|
---|
11818 | *lenp = subsec->dofs_size;
|
---|
11819 | *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
|
---|
11820 | bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
|
---|
11821 | *bufp, subsec->dofs_size);
|
---|
11822 |
|
---|
11823 | if (subsec->dofs_entsize != 0)
|
---|
11824 | *lenp /= subsec->dofs_entsize;
|
---|
11825 |
|
---|
11826 | break;
|
---|
11827 | }
|
---|
11828 |
|
---|
11829 | /*
|
---|
11830 | * If we encounter a loadable DIFO sub-section that is not
|
---|
11831 | * known to us, assume this is a broken program and fail.
|
---|
11832 | */
|
---|
11833 | if (difo[i].section == DOF_SECT_NONE &&
|
---|
11834 | (subsec->dofs_flags & DOF_SECF_LOAD)) {
|
---|
11835 | dtrace_dof_error(dof, "unrecognized DIFO subsection");
|
---|
11836 | goto err;
|
---|
11837 | }
|
---|
11838 | }
|
---|
11839 |
|
---|
11840 | if (dp->dtdo_buf == NULL) {
|
---|
11841 | /*
|
---|
11842 | * We can't have a DIF object without DIF text.
|
---|
11843 | */
|
---|
11844 | dtrace_dof_error(dof, "missing DIF text");
|
---|
11845 | goto err;
|
---|
11846 | }
|
---|
11847 |
|
---|
11848 | /*
|
---|
11849 | * Before we validate the DIF object, run through the variable table
|
---|
11850 | * looking for the strings -- if any of their size are under, we'll set
|
---|
11851 | * their size to be the system-wide default string size. Note that
|
---|
11852 | * this should _not_ happen if the "strsize" option has been set --
|
---|
11853 | * in this case, the compiler should have set the size to reflect the
|
---|
11854 | * setting of the option.
|
---|
11855 | */
|
---|
11856 | for (i = 0; VBDTCAST(unsigned)i < dp->dtdo_varlen; i++) {
|
---|
11857 | dtrace_difv_t *v = &dp->dtdo_vartab[i];
|
---|
11858 | dtrace_diftype_t *t = &v->dtdv_type;
|
---|
11859 |
|
---|
11860 | if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
|
---|
11861 | continue;
|
---|
11862 |
|
---|
11863 | if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
|
---|
11864 | t->dtdt_size = VBDTCAST(uint32_t)dtrace_strsize_default;
|
---|
11865 | }
|
---|
11866 |
|
---|
11867 | if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
|
---|
11868 | goto err;
|
---|
11869 |
|
---|
11870 | dtrace_difo_init(dp, vstate);
|
---|
11871 | return (dp);
|
---|
11872 |
|
---|
11873 | err:
|
---|
11874 | kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
|
---|
11875 | kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
|
---|
11876 | kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
|
---|
11877 | kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
|
---|
11878 |
|
---|
11879 | kmem_free(dp, sizeof (dtrace_difo_t));
|
---|
11880 | return (NULL);
|
---|
11881 | }
|
---|
11882 |
|
---|
11883 | static dtrace_predicate_t *
|
---|
11884 | dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
|
---|
11885 | cred_t *cr)
|
---|
11886 | {
|
---|
11887 | dtrace_difo_t *dp;
|
---|
11888 |
|
---|
11889 | if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
|
---|
11890 | return (NULL);
|
---|
11891 |
|
---|
11892 | return (dtrace_predicate_create(dp));
|
---|
11893 | }
|
---|
11894 |
|
---|
11895 | static dtrace_actdesc_t *
|
---|
11896 | dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
|
---|
11897 | cred_t *cr)
|
---|
11898 | {
|
---|
11899 | dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
|
---|
11900 | dof_actdesc_t *desc;
|
---|
11901 | dof_sec_t *difosec;
|
---|
11902 | size_t offs;
|
---|
11903 | uintptr_t daddr = (uintptr_t)dof;
|
---|
11904 | uint64_t arg;
|
---|
11905 | dtrace_actkind_t kind;
|
---|
11906 |
|
---|
11907 | if (sec->dofs_type != DOF_SECT_ACTDESC) {
|
---|
11908 | dtrace_dof_error(dof, "invalid action section");
|
---|
11909 | return (NULL);
|
---|
11910 | }
|
---|
11911 |
|
---|
11912 | if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
|
---|
11913 | dtrace_dof_error(dof, "truncated action description");
|
---|
11914 | return (NULL);
|
---|
11915 | }
|
---|
11916 |
|
---|
11917 | if (sec->dofs_align != sizeof (uint64_t)) {
|
---|
11918 | dtrace_dof_error(dof, "bad alignment in action description");
|
---|
11919 | return (NULL);
|
---|
11920 | }
|
---|
11921 |
|
---|
11922 | if (sec->dofs_size < sec->dofs_entsize) {
|
---|
11923 | dtrace_dof_error(dof, "section entry size exceeds total size");
|
---|
11924 | return (NULL);
|
---|
11925 | }
|
---|
11926 |
|
---|
11927 | if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
|
---|
11928 | dtrace_dof_error(dof, "bad entry size in action description");
|
---|
11929 | return (NULL);
|
---|
11930 | }
|
---|
11931 |
|
---|
11932 | if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
|
---|
11933 | dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
|
---|
11934 | return (NULL);
|
---|
11935 | }
|
---|
11936 |
|
---|
11937 | for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
|
---|
11938 | desc = (dof_actdesc_t *)(daddr +
|
---|
11939 | (uintptr_t)sec->dofs_offset + offs);
|
---|
11940 | kind = (dtrace_actkind_t)desc->dofa_kind;
|
---|
11941 |
|
---|
11942 | if (DTRACEACT_ISPRINTFLIKE(kind) &&
|
---|
11943 | (kind != DTRACEACT_PRINTA ||
|
---|
11944 | desc->dofa_strtab != DOF_SECIDX_NONE)) {
|
---|
11945 | dof_sec_t *strtab;
|
---|
11946 | char *str, *fmt;
|
---|
11947 | uint64_t i;
|
---|
11948 |
|
---|
11949 | /*
|
---|
11950 | * printf()-like actions must have a format string.
|
---|
11951 | */
|
---|
11952 | if ((strtab = dtrace_dof_sect(dof,
|
---|
11953 | DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
|
---|
11954 | goto err;
|
---|
11955 |
|
---|
11956 | str = (char *)((uintptr_t)dof +
|
---|
11957 | (uintptr_t)strtab->dofs_offset);
|
---|
11958 |
|
---|
11959 | for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
|
---|
11960 | if (str[i] == '\0')
|
---|
11961 | break;
|
---|
11962 | }
|
---|
11963 |
|
---|
11964 | if (i >= strtab->dofs_size) {
|
---|
11965 | dtrace_dof_error(dof, "bogus format string");
|
---|
11966 | goto err;
|
---|
11967 | }
|
---|
11968 |
|
---|
11969 | if (i == desc->dofa_arg) {
|
---|
11970 | dtrace_dof_error(dof, "empty format string");
|
---|
11971 | goto err;
|
---|
11972 | }
|
---|
11973 |
|
---|
11974 | i -= desc->dofa_arg;
|
---|
11975 | fmt = kmem_alloc(i + 1, KM_SLEEP);
|
---|
11976 | bcopy(&str[desc->dofa_arg], fmt, i + 1);
|
---|
11977 | arg = (uint64_t)(uintptr_t)fmt;
|
---|
11978 | } else {
|
---|
11979 | if (kind == DTRACEACT_PRINTA) {
|
---|
11980 | ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
|
---|
11981 | arg = 0;
|
---|
11982 | } else {
|
---|
11983 | arg = desc->dofa_arg;
|
---|
11984 | }
|
---|
11985 | }
|
---|
11986 |
|
---|
11987 | act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
|
---|
11988 | desc->dofa_uarg, arg);
|
---|
11989 |
|
---|
11990 | if (last != NULL) {
|
---|
11991 | last->dtad_next = act;
|
---|
11992 | } else {
|
---|
11993 | first = act;
|
---|
11994 | }
|
---|
11995 |
|
---|
11996 | last = act;
|
---|
11997 |
|
---|
11998 | if (desc->dofa_difo == DOF_SECIDX_NONE)
|
---|
11999 | continue;
|
---|
12000 |
|
---|
12001 | if ((difosec = dtrace_dof_sect(dof,
|
---|
12002 | DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
|
---|
12003 | goto err;
|
---|
12004 |
|
---|
12005 | act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
|
---|
12006 |
|
---|
12007 | if (act->dtad_difo == NULL)
|
---|
12008 | goto err;
|
---|
12009 | }
|
---|
12010 |
|
---|
12011 | ASSERT(first != NULL);
|
---|
12012 | return (first);
|
---|
12013 |
|
---|
12014 | err:
|
---|
12015 | for (act = first; act != NULL; act = next) {
|
---|
12016 | next = act->dtad_next;
|
---|
12017 | dtrace_actdesc_release(act, vstate);
|
---|
12018 | }
|
---|
12019 |
|
---|
12020 | return (NULL);
|
---|
12021 | }
|
---|
12022 |
|
---|
12023 | static dtrace_ecbdesc_t *
|
---|
12024 | dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
|
---|
12025 | cred_t *cr)
|
---|
12026 | {
|
---|
12027 | dtrace_ecbdesc_t *ep;
|
---|
12028 | dof_ecbdesc_t *ecb;
|
---|
12029 | dtrace_probedesc_t *desc;
|
---|
12030 | dtrace_predicate_t *pred = NULL;
|
---|
12031 |
|
---|
12032 | if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
|
---|
12033 | dtrace_dof_error(dof, "truncated ECB description");
|
---|
12034 | return (NULL);
|
---|
12035 | }
|
---|
12036 |
|
---|
12037 | if (sec->dofs_align != sizeof (uint64_t)) {
|
---|
12038 | dtrace_dof_error(dof, "bad alignment in ECB description");
|
---|
12039 | return (NULL);
|
---|
12040 | }
|
---|
12041 |
|
---|
12042 | ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
|
---|
12043 | sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
|
---|
12044 |
|
---|
12045 | if (sec == NULL)
|
---|
12046 | return (NULL);
|
---|
12047 |
|
---|
12048 | ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
|
---|
12049 | ep->dted_uarg = ecb->dofe_uarg;
|
---|
12050 | desc = &ep->dted_probe;
|
---|
12051 |
|
---|
12052 | if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
|
---|
12053 | goto err;
|
---|
12054 |
|
---|
12055 | if (ecb->dofe_pred != DOF_SECIDX_NONE) {
|
---|
12056 | if ((sec = dtrace_dof_sect(dof,
|
---|
12057 | DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
|
---|
12058 | goto err;
|
---|
12059 |
|
---|
12060 | if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
|
---|
12061 | goto err;
|
---|
12062 |
|
---|
12063 | ep->dted_pred.dtpdd_predicate = pred;
|
---|
12064 | }
|
---|
12065 |
|
---|
12066 | if (ecb->dofe_actions != DOF_SECIDX_NONE) {
|
---|
12067 | if ((sec = dtrace_dof_sect(dof,
|
---|
12068 | DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
|
---|
12069 | goto err;
|
---|
12070 |
|
---|
12071 | ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
|
---|
12072 |
|
---|
12073 | if (ep->dted_action == NULL)
|
---|
12074 | goto err;
|
---|
12075 | }
|
---|
12076 |
|
---|
12077 | return (ep);
|
---|
12078 |
|
---|
12079 | err:
|
---|
12080 | if (pred != NULL)
|
---|
12081 | dtrace_predicate_release(pred, vstate);
|
---|
12082 | kmem_free(ep, sizeof (dtrace_ecbdesc_t));
|
---|
12083 | return (NULL);
|
---|
12084 | }
|
---|
12085 |
|
---|
12086 | /*
|
---|
12087 | * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
|
---|
12088 | * specified DOF. At present, this amounts to simply adding 'ubase' to the
|
---|
12089 | * site of any user SETX relocations to account for load object base address.
|
---|
12090 | * In the future, if we need other relocations, this function can be extended.
|
---|
12091 | */
|
---|
12092 | static int
|
---|
12093 | dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase)
|
---|
12094 | {
|
---|
12095 | uintptr_t daddr = (uintptr_t)dof;
|
---|
12096 | dof_relohdr_t *dofr =
|
---|
12097 | (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
|
---|
12098 | dof_sec_t *ss, *rs, *ts;
|
---|
12099 | dof_relodesc_t *r;
|
---|
12100 | uint_t i, n;
|
---|
12101 |
|
---|
12102 | if (sec->dofs_size < sizeof (dof_relohdr_t) ||
|
---|
12103 | sec->dofs_align != sizeof (dof_secidx_t)) {
|
---|
12104 | dtrace_dof_error(dof, "invalid relocation header");
|
---|
12105 | return (-1);
|
---|
12106 | }
|
---|
12107 |
|
---|
12108 | ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab);
|
---|
12109 | rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec);
|
---|
12110 | ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec);
|
---|
12111 |
|
---|
12112 | if (ss == NULL || rs == NULL || ts == NULL)
|
---|
12113 | return (-1); /* dtrace_dof_error() has been called already */
|
---|
12114 |
|
---|
12115 | if (rs->dofs_entsize < sizeof (dof_relodesc_t) ||
|
---|
12116 | rs->dofs_align != sizeof (uint64_t)) {
|
---|
12117 | dtrace_dof_error(dof, "invalid relocation section");
|
---|
12118 | return (-1);
|
---|
12119 | }
|
---|
12120 |
|
---|
12121 | r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset);
|
---|
12122 | n = rs->dofs_size / rs->dofs_entsize;
|
---|
12123 |
|
---|
12124 | for (i = 0; i < n; i++) {
|
---|
12125 | uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset;
|
---|
12126 |
|
---|
12127 | switch (r->dofr_type) {
|
---|
12128 | case DOF_RELO_NONE:
|
---|
12129 | break;
|
---|
12130 | case DOF_RELO_SETX:
|
---|
12131 | if (r->dofr_offset >= ts->dofs_size || r->dofr_offset +
|
---|
12132 | sizeof (uint64_t) > ts->dofs_size) {
|
---|
12133 | dtrace_dof_error(dof, "bad relocation offset");
|
---|
12134 | return (-1);
|
---|
12135 | }
|
---|
12136 |
|
---|
12137 | if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) {
|
---|
12138 | dtrace_dof_error(dof, "misaligned setx relo");
|
---|
12139 | return (-1);
|
---|
12140 | }
|
---|
12141 |
|
---|
12142 | *(uint64_t *)taddr += ubase;
|
---|
12143 | break;
|
---|
12144 | default:
|
---|
12145 | dtrace_dof_error(dof, "invalid relocation type");
|
---|
12146 | return (-1);
|
---|
12147 | }
|
---|
12148 |
|
---|
12149 | r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize);
|
---|
12150 | }
|
---|
12151 |
|
---|
12152 | return (0);
|
---|
12153 | }
|
---|
12154 |
|
---|
12155 | /*
|
---|
12156 | * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
|
---|
12157 | * header: it should be at the front of a memory region that is at least
|
---|
12158 | * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
|
---|
12159 | * size. It need not be validated in any other way.
|
---|
12160 | */
|
---|
12161 | static int
|
---|
12162 | dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
|
---|
12163 | dtrace_enabling_t **enabp, uint64_t ubase, int noprobes)
|
---|
12164 | {
|
---|
12165 | uint64_t len = dof->dofh_loadsz, seclen;
|
---|
12166 | uintptr_t daddr = (uintptr_t)dof;
|
---|
12167 | dtrace_ecbdesc_t *ep;
|
---|
12168 | dtrace_enabling_t *enab;
|
---|
12169 | uint_t i;
|
---|
12170 |
|
---|
12171 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
12172 | ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
|
---|
12173 |
|
---|
12174 | /*
|
---|
12175 | * Check the DOF header identification bytes. In addition to checking
|
---|
12176 | * valid settings, we also verify that unused bits/bytes are zeroed so
|
---|
12177 | * we can use them later without fear of regressing existing binaries.
|
---|
12178 | */
|
---|
12179 | if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
|
---|
12180 | DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
|
---|
12181 | dtrace_dof_error(dof, "DOF magic string mismatch");
|
---|
12182 | return (-1);
|
---|
12183 | }
|
---|
12184 |
|
---|
12185 | if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
|
---|
12186 | dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
|
---|
12187 | dtrace_dof_error(dof, "DOF has invalid data model");
|
---|
12188 | return (-1);
|
---|
12189 | }
|
---|
12190 |
|
---|
12191 | if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
|
---|
12192 | dtrace_dof_error(dof, "DOF encoding mismatch");
|
---|
12193 | return (-1);
|
---|
12194 | }
|
---|
12195 |
|
---|
12196 | if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
|
---|
12197 | dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) {
|
---|
12198 | dtrace_dof_error(dof, "DOF version mismatch");
|
---|
12199 | return (-1);
|
---|
12200 | }
|
---|
12201 |
|
---|
12202 | if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
|
---|
12203 | dtrace_dof_error(dof, "DOF uses unsupported instruction set");
|
---|
12204 | return (-1);
|
---|
12205 | }
|
---|
12206 |
|
---|
12207 | if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
|
---|
12208 | dtrace_dof_error(dof, "DOF uses too many integer registers");
|
---|
12209 | return (-1);
|
---|
12210 | }
|
---|
12211 |
|
---|
12212 | if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
|
---|
12213 | dtrace_dof_error(dof, "DOF uses too many tuple registers");
|
---|
12214 | return (-1);
|
---|
12215 | }
|
---|
12216 |
|
---|
12217 | for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
|
---|
12218 | if (dof->dofh_ident[i] != 0) {
|
---|
12219 | dtrace_dof_error(dof, "DOF has invalid ident byte set");
|
---|
12220 | return (-1);
|
---|
12221 | }
|
---|
12222 | }
|
---|
12223 |
|
---|
12224 | if (dof->dofh_flags & ~DOF_FL_VALID) {
|
---|
12225 | dtrace_dof_error(dof, "DOF has invalid flag bits set");
|
---|
12226 | return (-1);
|
---|
12227 | }
|
---|
12228 |
|
---|
12229 | if (dof->dofh_secsize == 0) {
|
---|
12230 | dtrace_dof_error(dof, "zero section header size");
|
---|
12231 | return (-1);
|
---|
12232 | }
|
---|
12233 |
|
---|
12234 | /*
|
---|
12235 | * Check that the section headers don't exceed the amount of DOF
|
---|
12236 | * data. Note that we cast the section size and number of sections
|
---|
12237 | * to uint64_t's to prevent possible overflow in the multiplication.
|
---|
12238 | */
|
---|
12239 | seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
|
---|
12240 |
|
---|
12241 | if (dof->dofh_secoff > len || seclen > len ||
|
---|
12242 | dof->dofh_secoff + seclen > len) {
|
---|
12243 | dtrace_dof_error(dof, "truncated section headers");
|
---|
12244 | return (-1);
|
---|
12245 | }
|
---|
12246 |
|
---|
12247 | if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
|
---|
12248 | dtrace_dof_error(dof, "misaligned section headers");
|
---|
12249 | return (-1);
|
---|
12250 | }
|
---|
12251 |
|
---|
12252 | if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
|
---|
12253 | dtrace_dof_error(dof, "misaligned section size");
|
---|
12254 | return (-1);
|
---|
12255 | }
|
---|
12256 |
|
---|
12257 | /*
|
---|
12258 | * Take an initial pass through the section headers to be sure that
|
---|
12259 | * the headers don't have stray offsets. If the 'noprobes' flag is
|
---|
12260 | * set, do not permit sections relating to providers, probes, or args.
|
---|
12261 | */
|
---|
12262 | for (i = 0; i < dof->dofh_secnum; i++) {
|
---|
12263 | dof_sec_t *sec = (dof_sec_t *)(daddr +
|
---|
12264 | (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
|
---|
12265 |
|
---|
12266 | if (noprobes) {
|
---|
12267 | switch (sec->dofs_type) {
|
---|
12268 | case DOF_SECT_PROVIDER:
|
---|
12269 | case DOF_SECT_PROBES:
|
---|
12270 | case DOF_SECT_PRARGS:
|
---|
12271 | case DOF_SECT_PROFFS:
|
---|
12272 | dtrace_dof_error(dof, "illegal sections "
|
---|
12273 | "for enabling");
|
---|
12274 | return (-1);
|
---|
12275 | }
|
---|
12276 | }
|
---|
12277 |
|
---|
12278 | if (DOF_SEC_ISLOADABLE(sec->dofs_type) &&
|
---|
12279 | !(sec->dofs_flags & DOF_SECF_LOAD)) {
|
---|
12280 | dtrace_dof_error(dof, "loadable section with load "
|
---|
12281 | "flag unset");
|
---|
12282 | return (-1);
|
---|
12283 | }
|
---|
12284 |
|
---|
12285 | if (!(sec->dofs_flags & DOF_SECF_LOAD))
|
---|
12286 | continue; /* just ignore non-loadable sections */
|
---|
12287 |
|
---|
12288 | if (sec->dofs_align & (sec->dofs_align - 1)) {
|
---|
12289 | dtrace_dof_error(dof, "bad section alignment");
|
---|
12290 | return (-1);
|
---|
12291 | }
|
---|
12292 |
|
---|
12293 | if (sec->dofs_offset & (sec->dofs_align - 1)) {
|
---|
12294 | dtrace_dof_error(dof, "misaligned section");
|
---|
12295 | return (-1);
|
---|
12296 | }
|
---|
12297 |
|
---|
12298 | if (sec->dofs_offset > len || sec->dofs_size > len ||
|
---|
12299 | sec->dofs_offset + sec->dofs_size > len) {
|
---|
12300 | dtrace_dof_error(dof, "corrupt section header");
|
---|
12301 | return (-1);
|
---|
12302 | }
|
---|
12303 |
|
---|
12304 | if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
|
---|
12305 | sec->dofs_offset + sec->dofs_size - 1) != '\0') {
|
---|
12306 | dtrace_dof_error(dof, "non-terminating string table");
|
---|
12307 | return (-1);
|
---|
12308 | }
|
---|
12309 | }
|
---|
12310 |
|
---|
12311 | /*
|
---|
12312 | * Take a second pass through the sections and locate and perform any
|
---|
12313 | * relocations that are present. We do this after the first pass to
|
---|
12314 | * be sure that all sections have had their headers validated.
|
---|
12315 | */
|
---|
12316 | for (i = 0; i < dof->dofh_secnum; i++) {
|
---|
12317 | dof_sec_t *sec = (dof_sec_t *)(daddr +
|
---|
12318 | (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
|
---|
12319 |
|
---|
12320 | if (!(sec->dofs_flags & DOF_SECF_LOAD))
|
---|
12321 | continue; /* skip sections that are not loadable */
|
---|
12322 |
|
---|
12323 | switch (sec->dofs_type) {
|
---|
12324 | case DOF_SECT_URELHDR:
|
---|
12325 | if (dtrace_dof_relocate(dof, sec, ubase) != 0)
|
---|
12326 | return (-1);
|
---|
12327 | break;
|
---|
12328 | }
|
---|
12329 | }
|
---|
12330 |
|
---|
12331 | if ((enab = *enabp) == NULL)
|
---|
12332 | enab = *enabp = dtrace_enabling_create(vstate);
|
---|
12333 |
|
---|
12334 | for (i = 0; i < dof->dofh_secnum; i++) {
|
---|
12335 | dof_sec_t *sec = (dof_sec_t *)(daddr +
|
---|
12336 | (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
|
---|
12337 |
|
---|
12338 | if (sec->dofs_type != DOF_SECT_ECBDESC)
|
---|
12339 | continue;
|
---|
12340 |
|
---|
12341 | if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) {
|
---|
12342 | dtrace_enabling_destroy(enab);
|
---|
12343 | *enabp = NULL;
|
---|
12344 | return (-1);
|
---|
12345 | }
|
---|
12346 |
|
---|
12347 | dtrace_enabling_add(enab, ep);
|
---|
12348 | }
|
---|
12349 |
|
---|
12350 | return (0);
|
---|
12351 | }
|
---|
12352 |
|
---|
12353 | /*
|
---|
12354 | * Process DOF for any options. This routine assumes that the DOF has been
|
---|
12355 | * at least processed by dtrace_dof_slurp().
|
---|
12356 | */
|
---|
12357 | static int
|
---|
12358 | dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
|
---|
12359 | {
|
---|
12360 | int i, rval;
|
---|
12361 | uint32_t entsize;
|
---|
12362 | size_t offs;
|
---|
12363 | dof_optdesc_t *desc;
|
---|
12364 |
|
---|
12365 | for (i = 0; VBDTCAST(unsigned)i < dof->dofh_secnum; i++) {
|
---|
12366 | dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
|
---|
12367 | (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
|
---|
12368 |
|
---|
12369 | if (sec->dofs_type != DOF_SECT_OPTDESC)
|
---|
12370 | continue;
|
---|
12371 |
|
---|
12372 | if (sec->dofs_align != sizeof (uint64_t)) {
|
---|
12373 | dtrace_dof_error(dof, "bad alignment in "
|
---|
12374 | "option description");
|
---|
12375 | return (EINVAL);
|
---|
12376 | }
|
---|
12377 |
|
---|
12378 | if ((entsize = sec->dofs_entsize) == 0) {
|
---|
12379 | dtrace_dof_error(dof, "zeroed option entry size");
|
---|
12380 | return (EINVAL);
|
---|
12381 | }
|
---|
12382 |
|
---|
12383 | if (entsize < sizeof (dof_optdesc_t)) {
|
---|
12384 | dtrace_dof_error(dof, "bad option entry size");
|
---|
12385 | return (EINVAL);
|
---|
12386 | }
|
---|
12387 |
|
---|
12388 | for (offs = 0; offs < sec->dofs_size; offs += entsize) {
|
---|
12389 | desc = (dof_optdesc_t *)((uintptr_t)dof +
|
---|
12390 | (uintptr_t)sec->dofs_offset + offs);
|
---|
12391 |
|
---|
12392 | if (desc->dofo_strtab != DOF_SECIDX_NONE) {
|
---|
12393 | dtrace_dof_error(dof, "non-zero option string");
|
---|
12394 | return (EINVAL);
|
---|
12395 | }
|
---|
12396 |
|
---|
12397 | if (desc->dofo_value == VBDTCAST(uint64_t)DTRACEOPT_UNSET) {
|
---|
12398 | dtrace_dof_error(dof, "unset option");
|
---|
12399 | return (EINVAL);
|
---|
12400 | }
|
---|
12401 |
|
---|
12402 | if ((rval = dtrace_state_option(state,
|
---|
12403 | desc->dofo_option, desc->dofo_value)) != 0) {
|
---|
12404 | dtrace_dof_error(dof, "rejected option");
|
---|
12405 | return (rval);
|
---|
12406 | }
|
---|
12407 | }
|
---|
12408 | }
|
---|
12409 |
|
---|
12410 | return (0);
|
---|
12411 | }
|
---|
12412 |
|
---|
12413 | /*
|
---|
12414 | * DTrace Consumer State Functions
|
---|
12415 | */
|
---|
12416 | VBDTSTATIC int
|
---|
12417 | dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
|
---|
12418 | {
|
---|
12419 | size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize;
|
---|
12420 | void *base;
|
---|
12421 | uintptr_t limit;
|
---|
12422 | dtrace_dynvar_t *dvar, *next, *start;
|
---|
12423 | VBDTTYPE(size_t,int) i;
|
---|
12424 |
|
---|
12425 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
12426 | ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
|
---|
12427 |
|
---|
12428 | bzero(dstate, sizeof (dtrace_dstate_t));
|
---|
12429 |
|
---|
12430 | if ((dstate->dtds_chunksize = chunksize) == 0)
|
---|
12431 | dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
|
---|
12432 |
|
---|
12433 | if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
|
---|
12434 | size = min;
|
---|
12435 |
|
---|
12436 | if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
|
---|
12437 | return (ENOMEM);
|
---|
12438 |
|
---|
12439 | dstate->dtds_size = size;
|
---|
12440 | dstate->dtds_base = base;
|
---|
12441 | dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
|
---|
12442 | bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t));
|
---|
12443 |
|
---|
12444 | hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
|
---|
12445 |
|
---|
12446 | if (hashsize != 1 && (hashsize & 1))
|
---|
12447 | hashsize--;
|
---|
12448 |
|
---|
12449 | dstate->dtds_hashsize = hashsize;
|
---|
12450 | dstate->dtds_hash = dstate->dtds_base;
|
---|
12451 |
|
---|
12452 | /*
|
---|
12453 | * Set all of our hash buckets to point to the single sink, and (if
|
---|
12454 | * it hasn't already been set), set the sink's hash value to be the
|
---|
12455 | * sink sentinel value. The sink is needed for dynamic variable
|
---|
12456 | * lookups to know that they have iterated over an entire, valid hash
|
---|
12457 | * chain.
|
---|
12458 | */
|
---|
12459 | for (i = 0; i < hashsize; i++)
|
---|
12460 | dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
|
---|
12461 |
|
---|
12462 | if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
|
---|
12463 | dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
|
---|
12464 |
|
---|
12465 | /*
|
---|
12466 | * Determine number of active CPUs. Divide free list evenly among
|
---|
12467 | * active CPUs.
|
---|
12468 | */
|
---|
12469 | start = (dtrace_dynvar_t *)
|
---|
12470 | ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
|
---|
12471 | limit = (uintptr_t)base + size;
|
---|
12472 |
|
---|
12473 | maxper = (limit - (uintptr_t)start) / NCPU;
|
---|
12474 | maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
|
---|
12475 |
|
---|
12476 | for (i = 0; i < NCPU; i++) {
|
---|
12477 | dstate->dtds_percpu[i].dtdsc_free = dvar = start;
|
---|
12478 |
|
---|
12479 | /*
|
---|
12480 | * If we don't even have enough chunks to make it once through
|
---|
12481 | * NCPUs, we're just going to allocate everything to the first
|
---|
12482 | * CPU. And if we're on the last CPU, we're going to allocate
|
---|
12483 | * whatever is left over. In either case, we set the limit to
|
---|
12484 | * be the limit of the dynamic variable space.
|
---|
12485 | */
|
---|
12486 | if (maxper == 0 || i == NCPU - 1) {
|
---|
12487 | limit = (uintptr_t)base + size;
|
---|
12488 | start = NULL;
|
---|
12489 | } else {
|
---|
12490 | limit = (uintptr_t)start + maxper;
|
---|
12491 | start = (dtrace_dynvar_t *)limit;
|
---|
12492 | }
|
---|
12493 |
|
---|
12494 | ASSERT(limit <= (uintptr_t)base + size);
|
---|
12495 |
|
---|
12496 | for (;;) {
|
---|
12497 | next = (dtrace_dynvar_t *)((uintptr_t)dvar +
|
---|
12498 | dstate->dtds_chunksize);
|
---|
12499 |
|
---|
12500 | if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
|
---|
12501 | break;
|
---|
12502 |
|
---|
12503 | dvar->dtdv_next = next;
|
---|
12504 | dvar = next;
|
---|
12505 | }
|
---|
12506 |
|
---|
12507 | if (maxper == 0)
|
---|
12508 | break;
|
---|
12509 | }
|
---|
12510 |
|
---|
12511 | return (0);
|
---|
12512 | }
|
---|
12513 |
|
---|
12514 | VBDTSTATIC void
|
---|
12515 | dtrace_dstate_fini(dtrace_dstate_t *dstate)
|
---|
12516 | {
|
---|
12517 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
12518 |
|
---|
12519 | if (dstate->dtds_base == NULL)
|
---|
12520 | return;
|
---|
12521 |
|
---|
12522 | kmem_free(dstate->dtds_base, dstate->dtds_size);
|
---|
12523 | kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
|
---|
12524 | }
|
---|
12525 |
|
---|
12526 | static void
|
---|
12527 | dtrace_vstate_fini(dtrace_vstate_t *vstate)
|
---|
12528 | {
|
---|
12529 | /*
|
---|
12530 | * Logical XOR, where are you?
|
---|
12531 | */
|
---|
12532 | ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
|
---|
12533 |
|
---|
12534 | if (vstate->dtvs_nglobals > 0) {
|
---|
12535 | kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
|
---|
12536 | sizeof (dtrace_statvar_t *));
|
---|
12537 | }
|
---|
12538 |
|
---|
12539 | if (vstate->dtvs_ntlocals > 0) {
|
---|
12540 | kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
|
---|
12541 | sizeof (dtrace_difv_t));
|
---|
12542 | }
|
---|
12543 |
|
---|
12544 | ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
|
---|
12545 |
|
---|
12546 | if (vstate->dtvs_nlocals > 0) {
|
---|
12547 | kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
|
---|
12548 | sizeof (dtrace_statvar_t *));
|
---|
12549 | }
|
---|
12550 | }
|
---|
12551 |
|
---|
12552 | static void
|
---|
12553 | dtrace_state_clean(dtrace_state_t *state)
|
---|
12554 | {
|
---|
12555 | if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
|
---|
12556 | return;
|
---|
12557 |
|
---|
12558 | dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
|
---|
12559 | dtrace_speculation_clean(state);
|
---|
12560 | }
|
---|
12561 | #ifdef VBOX
|
---|
12562 | static DECLCALLBACK(void) dtrace_state_clean_timer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
|
---|
12563 | {
|
---|
12564 | dtrace_state_clean((dtrace_state_t *)pvUser);
|
---|
12565 | NOREF(pTimer); NOREF(iTick);
|
---|
12566 | }
|
---|
12567 | #endif
|
---|
12568 |
|
---|
12569 | static void
|
---|
12570 | dtrace_state_deadman(dtrace_state_t *state)
|
---|
12571 | {
|
---|
12572 | hrtime_t now;
|
---|
12573 |
|
---|
12574 | dtrace_sync();
|
---|
12575 |
|
---|
12576 | now = dtrace_gethrtime();
|
---|
12577 |
|
---|
12578 | if (state != dtrace_anon.dta_state &&
|
---|
12579 | now - state->dts_laststatus >= dtrace_deadman_user)
|
---|
12580 | return;
|
---|
12581 |
|
---|
12582 | /*
|
---|
12583 | * We must be sure that dts_alive never appears to be less than the
|
---|
12584 | * value upon entry to dtrace_state_deadman(), and because we lack a
|
---|
12585 | * dtrace_cas64(), we cannot store to it atomically. We thus instead
|
---|
12586 | * store INT64_MAX to it, followed by a memory barrier, followed by
|
---|
12587 | * the new value. This assures that dts_alive never appears to be
|
---|
12588 | * less than its true value, regardless of the order in which the
|
---|
12589 | * stores to the underlying storage are issued.
|
---|
12590 | */
|
---|
12591 | state->dts_alive = INT64_MAX;
|
---|
12592 | dtrace_membar_producer();
|
---|
12593 | state->dts_alive = now;
|
---|
12594 | }
|
---|
12595 |
|
---|
12596 | #ifdef VBOX
|
---|
12597 | static DECLCALLBACK(void) dtrace_state_deadman_timer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
|
---|
12598 | {
|
---|
12599 | dtrace_state_deadman((dtrace_state_t *)pvUser);
|
---|
12600 | NOREF(pTimer); NOREF(iTick);
|
---|
12601 | }
|
---|
12602 | #endif
|
---|
12603 |
|
---|
12604 | VBDTSTATIC dtrace_state_t *
|
---|
12605 | #ifdef VBOX
|
---|
12606 | dtrace_state_create(cred_t *cr)
|
---|
12607 | #else
|
---|
12608 | dtrace_state_create(dev_t *devp, cred_t *cr)
|
---|
12609 | #endif
|
---|
12610 | {
|
---|
12611 | #ifndef VBOX
|
---|
12612 | minor_t minor;
|
---|
12613 | major_t major;
|
---|
12614 | #endif
|
---|
12615 | char c[30];
|
---|
12616 | dtrace_state_t *state;
|
---|
12617 | dtrace_optval_t *opt;
|
---|
12618 | int bufsize = NCPU * sizeof (dtrace_buffer_t), i;
|
---|
12619 |
|
---|
12620 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
12621 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
12622 |
|
---|
12623 | #ifndef VBOX
|
---|
12624 | minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1,
|
---|
12625 | VM_BESTFIT | VM_SLEEP);
|
---|
12626 |
|
---|
12627 | if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) {
|
---|
12628 | vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
|
---|
12629 | return (NULL);
|
---|
12630 | }
|
---|
12631 |
|
---|
12632 | state = ddi_get_soft_state(dtrace_softstate, minor);
|
---|
12633 | #else
|
---|
12634 | state = kmem_zalloc(sizeof (*state), KM_SLEEP);
|
---|
12635 | if (!state) {
|
---|
12636 | return (NULL);
|
---|
12637 | }
|
---|
12638 | #endif
|
---|
12639 | state->dts_epid = DTRACE_EPIDNONE + 1;
|
---|
12640 |
|
---|
12641 | #ifndef VBOX
|
---|
12642 | (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor);
|
---|
12643 | #else
|
---|
12644 | (void) snprintf(c, sizeof (c), "dtrace_aggid_%p", state);
|
---|
12645 | #endif
|
---|
12646 | #ifndef VBOX /* Avoid idProbe = UINT32_MAX as it is used as invalid value by VTG. */
|
---|
12647 | state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
|
---|
12648 | NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
|
---|
12649 | #else
|
---|
12650 | state->dts_aggid_arena = vmem_create(c, (void *)(uintptr_t)1, _1G, 1,
|
---|
12651 | NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
|
---|
12652 | #endif
|
---|
12653 |
|
---|
12654 | #ifndef VBOX
|
---|
12655 | if (devp != NULL) {
|
---|
12656 | major = getemajor(*devp);
|
---|
12657 | } else {
|
---|
12658 | major = ddi_driver_major(dtrace_devi);
|
---|
12659 | }
|
---|
12660 |
|
---|
12661 | state->dts_dev = makedevice(major, minor);
|
---|
12662 |
|
---|
12663 | if (devp != NULL)
|
---|
12664 | *devp = state->dts_dev;
|
---|
12665 | #endif
|
---|
12666 |
|
---|
12667 | /*
|
---|
12668 | * We allocate NCPU buffers. On the one hand, this can be quite
|
---|
12669 | * a bit of memory per instance (nearly 36K on a Starcat). On the
|
---|
12670 | * other hand, it saves an additional memory reference in the probe
|
---|
12671 | * path.
|
---|
12672 | */
|
---|
12673 | state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
|
---|
12674 | state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
|
---|
12675 | state->dts_cleaner = CYCLIC_NONE;
|
---|
12676 | state->dts_deadman = CYCLIC_NONE;
|
---|
12677 | state->dts_vstate.dtvs_state = state;
|
---|
12678 |
|
---|
12679 | for (i = 0; i < DTRACEOPT_MAX; i++)
|
---|
12680 | state->dts_options[i] = DTRACEOPT_UNSET;
|
---|
12681 |
|
---|
12682 | /*
|
---|
12683 | * Set the default options.
|
---|
12684 | */
|
---|
12685 | opt = state->dts_options;
|
---|
12686 | opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
|
---|
12687 | opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
|
---|
12688 | opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
|
---|
12689 | opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
|
---|
12690 | opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
|
---|
12691 | opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
|
---|
12692 | opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
|
---|
12693 | opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
|
---|
12694 | opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
|
---|
12695 | opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
|
---|
12696 | opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
|
---|
12697 | opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
|
---|
12698 | opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
|
---|
12699 | opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
|
---|
12700 |
|
---|
12701 | state->dts_activity = DTRACE_ACTIVITY_INACTIVE;
|
---|
12702 |
|
---|
12703 | /*
|
---|
12704 | * Depending on the user credentials, we set flag bits which alter probe
|
---|
12705 | * visibility or the amount of destructiveness allowed. In the case of
|
---|
12706 | * actual anonymous tracing, or the possession of all privileges, all of
|
---|
12707 | * the normal checks are bypassed.
|
---|
12708 | */
|
---|
12709 | if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
|
---|
12710 | state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
|
---|
12711 | state->dts_cred.dcr_action = DTRACE_CRA_ALL;
|
---|
12712 | } else {
|
---|
12713 | /*
|
---|
12714 | * Set up the credentials for this instantiation. We take a
|
---|
12715 | * hold on the credential to prevent it from disappearing on
|
---|
12716 | * us; this in turn prevents the zone_t referenced by this
|
---|
12717 | * credential from disappearing. This means that we can
|
---|
12718 | * examine the credential and the zone from probe context.
|
---|
12719 | */
|
---|
12720 | crhold(cr);
|
---|
12721 | state->dts_cred.dcr_cred = cr;
|
---|
12722 |
|
---|
12723 | /*
|
---|
12724 | * CRA_PROC means "we have *some* privilege for dtrace" and
|
---|
12725 | * unlocks the use of variables like pid, zonename, etc.
|
---|
12726 | */
|
---|
12727 | if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
|
---|
12728 | PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
|
---|
12729 | state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
|
---|
12730 | }
|
---|
12731 |
|
---|
12732 | /*
|
---|
12733 | * dtrace_user allows use of syscall and profile providers.
|
---|
12734 | * If the user also has proc_owner and/or proc_zone, we
|
---|
12735 | * extend the scope to include additional visibility and
|
---|
12736 | * destructive power.
|
---|
12737 | */
|
---|
12738 | if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
|
---|
12739 | if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
|
---|
12740 | state->dts_cred.dcr_visible |=
|
---|
12741 | DTRACE_CRV_ALLPROC;
|
---|
12742 |
|
---|
12743 | state->dts_cred.dcr_action |=
|
---|
12744 | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
|
---|
12745 | }
|
---|
12746 |
|
---|
12747 | if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
|
---|
12748 | state->dts_cred.dcr_visible |=
|
---|
12749 | DTRACE_CRV_ALLZONE;
|
---|
12750 |
|
---|
12751 | state->dts_cred.dcr_action |=
|
---|
12752 | DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
|
---|
12753 | }
|
---|
12754 |
|
---|
12755 | /*
|
---|
12756 | * If we have all privs in whatever zone this is,
|
---|
12757 | * we can do destructive things to processes which
|
---|
12758 | * have altered credentials.
|
---|
12759 | */
|
---|
12760 | if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
|
---|
12761 | cr->cr_zone->zone_privset)) {
|
---|
12762 | state->dts_cred.dcr_action |=
|
---|
12763 | DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
|
---|
12764 | }
|
---|
12765 | }
|
---|
12766 |
|
---|
12767 | /*
|
---|
12768 | * Holding the dtrace_kernel privilege also implies that
|
---|
12769 | * the user has the dtrace_user privilege from a visibility
|
---|
12770 | * perspective. But without further privileges, some
|
---|
12771 | * destructive actions are not available.
|
---|
12772 | */
|
---|
12773 | if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
|
---|
12774 | /*
|
---|
12775 | * Make all probes in all zones visible. However,
|
---|
12776 | * this doesn't mean that all actions become available
|
---|
12777 | * to all zones.
|
---|
12778 | */
|
---|
12779 | state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
|
---|
12780 | DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
|
---|
12781 |
|
---|
12782 | state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
|
---|
12783 | DTRACE_CRA_PROC;
|
---|
12784 | /*
|
---|
12785 | * Holding proc_owner means that destructive actions
|
---|
12786 | * for *this* zone are allowed.
|
---|
12787 | */
|
---|
12788 | if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
|
---|
12789 | state->dts_cred.dcr_action |=
|
---|
12790 | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
|
---|
12791 |
|
---|
12792 | /*
|
---|
12793 | * Holding proc_zone means that destructive actions
|
---|
12794 | * for this user/group ID in all zones is allowed.
|
---|
12795 | */
|
---|
12796 | if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
|
---|
12797 | state->dts_cred.dcr_action |=
|
---|
12798 | DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
|
---|
12799 |
|
---|
12800 | /*
|
---|
12801 | * If we have all privs in whatever zone this is,
|
---|
12802 | * we can do destructive things to processes which
|
---|
12803 | * have altered credentials.
|
---|
12804 | */
|
---|
12805 | if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE),
|
---|
12806 | cr->cr_zone->zone_privset)) {
|
---|
12807 | state->dts_cred.dcr_action |=
|
---|
12808 | DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
|
---|
12809 | }
|
---|
12810 | }
|
---|
12811 |
|
---|
12812 | /*
|
---|
12813 | * Holding the dtrace_proc privilege gives control over fasttrap
|
---|
12814 | * and pid providers. We need to grant wider destructive
|
---|
12815 | * privileges in the event that the user has proc_owner and/or
|
---|
12816 | * proc_zone.
|
---|
12817 | */
|
---|
12818 | if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
|
---|
12819 | if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
|
---|
12820 | state->dts_cred.dcr_action |=
|
---|
12821 | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
|
---|
12822 |
|
---|
12823 | if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
|
---|
12824 | state->dts_cred.dcr_action |=
|
---|
12825 | DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
|
---|
12826 | }
|
---|
12827 | }
|
---|
12828 |
|
---|
12829 | return (state);
|
---|
12830 | }
|
---|
12831 |
|
---|
12832 | static int
|
---|
12833 | dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
|
---|
12834 | {
|
---|
12835 | dtrace_optval_t *opt = state->dts_options, size;
|
---|
12836 | processorid_t cpu VBDTUNASS((processorid_t)DTRACE_CPUALL);
|
---|
12837 | int flags = 0, rval;
|
---|
12838 |
|
---|
12839 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
12840 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
12841 | ASSERT(which < DTRACEOPT_MAX);
|
---|
12842 | ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
|
---|
12843 | (state == dtrace_anon.dta_state &&
|
---|
12844 | state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
|
---|
12845 |
|
---|
12846 | if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
|
---|
12847 | return (0);
|
---|
12848 |
|
---|
12849 | if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
|
---|
12850 | cpu = opt[DTRACEOPT_CPU];
|
---|
12851 |
|
---|
12852 | if (which == DTRACEOPT_SPECSIZE)
|
---|
12853 | flags |= DTRACEBUF_NOSWITCH;
|
---|
12854 |
|
---|
12855 | if (which == DTRACEOPT_BUFSIZE) {
|
---|
12856 | if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
|
---|
12857 | flags |= DTRACEBUF_RING;
|
---|
12858 |
|
---|
12859 | if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
|
---|
12860 | flags |= DTRACEBUF_FILL;
|
---|
12861 |
|
---|
12862 | if (state != dtrace_anon.dta_state ||
|
---|
12863 | state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
|
---|
12864 | flags |= DTRACEBUF_INACTIVE;
|
---|
12865 | }
|
---|
12866 |
|
---|
12867 | for (size = opt[which]; size >= VBDTCAST(dtrace_optval_t)sizeof (uint64_t); size >>= 1) {
|
---|
12868 | /*
|
---|
12869 | * The size must be 8-byte aligned. If the size is not 8-byte
|
---|
12870 | * aligned, drop it down by the difference.
|
---|
12871 | */
|
---|
12872 | if (size & (sizeof (uint64_t) - 1))
|
---|
12873 | size -= size & (sizeof (uint64_t) - 1);
|
---|
12874 |
|
---|
12875 | if (size < state->dts_reserve) {
|
---|
12876 | /*
|
---|
12877 | * Buffers always must be large enough to accommodate
|
---|
12878 | * their prereserved space. We return E2BIG instead
|
---|
12879 | * of ENOMEM in this case to allow for user-level
|
---|
12880 | * software to differentiate the cases.
|
---|
12881 | */
|
---|
12882 | return (E2BIG);
|
---|
12883 | }
|
---|
12884 |
|
---|
12885 | rval = dtrace_buffer_alloc(buf, size, flags, cpu);
|
---|
12886 |
|
---|
12887 | if (rval != ENOMEM) {
|
---|
12888 | opt[which] = size;
|
---|
12889 | return (rval);
|
---|
12890 | }
|
---|
12891 |
|
---|
12892 | if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
|
---|
12893 | return (rval);
|
---|
12894 | }
|
---|
12895 |
|
---|
12896 | return (ENOMEM);
|
---|
12897 | }
|
---|
12898 |
|
---|
12899 | static int
|
---|
12900 | dtrace_state_buffers(dtrace_state_t *state)
|
---|
12901 | {
|
---|
12902 | dtrace_speculation_t *spec = state->dts_speculations;
|
---|
12903 | int rval, i;
|
---|
12904 |
|
---|
12905 | if ((rval = dtrace_state_buffer(state, state->dts_buffer,
|
---|
12906 | DTRACEOPT_BUFSIZE)) != 0)
|
---|
12907 | return (rval);
|
---|
12908 |
|
---|
12909 | if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
|
---|
12910 | DTRACEOPT_AGGSIZE)) != 0)
|
---|
12911 | return (rval);
|
---|
12912 |
|
---|
12913 | for (i = 0; i < state->dts_nspeculations; i++) {
|
---|
12914 | if ((rval = dtrace_state_buffer(state,
|
---|
12915 | spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
|
---|
12916 | return (rval);
|
---|
12917 | }
|
---|
12918 |
|
---|
12919 | return (0);
|
---|
12920 | }
|
---|
12921 |
|
---|
12922 | static void
|
---|
12923 | dtrace_state_prereserve(dtrace_state_t *state)
|
---|
12924 | {
|
---|
12925 | dtrace_ecb_t *ecb;
|
---|
12926 | dtrace_probe_t *probe;
|
---|
12927 |
|
---|
12928 | state->dts_reserve = 0;
|
---|
12929 |
|
---|
12930 | if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
|
---|
12931 | return;
|
---|
12932 |
|
---|
12933 | /*
|
---|
12934 | * If our buffer policy is a "fill" buffer policy, we need to set the
|
---|
12935 | * prereserved space to be the space required by the END probes.
|
---|
12936 | */
|
---|
12937 | probe = dtrace_probes[dtrace_probeid_end - 1];
|
---|
12938 | ASSERT(probe != NULL);
|
---|
12939 |
|
---|
12940 | for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
|
---|
12941 | if (ecb->dte_state != state)
|
---|
12942 | continue;
|
---|
12943 |
|
---|
12944 | state->dts_reserve += VBDTCAST(uint32_t)ecb->dte_needed + ecb->dte_alignment;
|
---|
12945 | }
|
---|
12946 | }
|
---|
12947 |
|
---|
12948 | static int
|
---|
12949 | dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
|
---|
12950 | {
|
---|
12951 | dtrace_optval_t *opt = state->dts_options, sz, nspec;
|
---|
12952 | dtrace_speculation_t *spec;
|
---|
12953 | dtrace_buffer_t *buf;
|
---|
12954 | #ifndef VBOX
|
---|
12955 | cyc_handler_t hdlr;
|
---|
12956 | cyc_time_t when;
|
---|
12957 | #endif
|
---|
12958 | int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t);
|
---|
12959 | dtrace_icookie_t cookie;
|
---|
12960 |
|
---|
12961 | mutex_enter(&cpu_lock);
|
---|
12962 | mutex_enter(&dtrace_lock);
|
---|
12963 |
|
---|
12964 | if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
|
---|
12965 | rval = EBUSY;
|
---|
12966 | goto out;
|
---|
12967 | }
|
---|
12968 |
|
---|
12969 | /*
|
---|
12970 | * Before we can perform any checks, we must prime all of the
|
---|
12971 | * retained enablings that correspond to this state.
|
---|
12972 | */
|
---|
12973 | dtrace_enabling_prime(state);
|
---|
12974 |
|
---|
12975 | if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
|
---|
12976 | rval = EACCES;
|
---|
12977 | goto out;
|
---|
12978 | }
|
---|
12979 |
|
---|
12980 | dtrace_state_prereserve(state);
|
---|
12981 |
|
---|
12982 | /*
|
---|
12983 | * Now we want to do is try to allocate our speculations.
|
---|
12984 | * We do not automatically resize the number of speculations; if
|
---|
12985 | * this fails, we will fail the operation.
|
---|
12986 | */
|
---|
12987 | nspec = opt[DTRACEOPT_NSPEC];
|
---|
12988 | ASSERT(nspec != DTRACEOPT_UNSET);
|
---|
12989 |
|
---|
12990 | if (nspec > INT_MAX) {
|
---|
12991 | rval = ENOMEM;
|
---|
12992 | goto out;
|
---|
12993 | }
|
---|
12994 |
|
---|
12995 | spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP);
|
---|
12996 |
|
---|
12997 | if (spec == NULL) {
|
---|
12998 | rval = ENOMEM;
|
---|
12999 | goto out;
|
---|
13000 | }
|
---|
13001 |
|
---|
13002 | state->dts_speculations = spec;
|
---|
13003 | state->dts_nspeculations = (int)nspec;
|
---|
13004 |
|
---|
13005 | for (i = 0; i < nspec; i++) {
|
---|
13006 | if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) {
|
---|
13007 | rval = ENOMEM;
|
---|
13008 | goto err;
|
---|
13009 | }
|
---|
13010 |
|
---|
13011 | spec[i].dtsp_buffer = buf;
|
---|
13012 | }
|
---|
13013 |
|
---|
13014 | if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
|
---|
13015 | if (dtrace_anon.dta_state == NULL) {
|
---|
13016 | rval = ENOENT;
|
---|
13017 | goto out;
|
---|
13018 | }
|
---|
13019 |
|
---|
13020 | if (state->dts_necbs != 0) {
|
---|
13021 | rval = EALREADY;
|
---|
13022 | goto out;
|
---|
13023 | }
|
---|
13024 |
|
---|
13025 | state->dts_anon = dtrace_anon_grab();
|
---|
13026 | ASSERT(state->dts_anon != NULL);
|
---|
13027 | state = state->dts_anon;
|
---|
13028 |
|
---|
13029 | /*
|
---|
13030 | * We want "grabanon" to be set in the grabbed state, so we'll
|
---|
13031 | * copy that option value from the grabbing state into the
|
---|
13032 | * grabbed state.
|
---|
13033 | */
|
---|
13034 | state->dts_options[DTRACEOPT_GRABANON] =
|
---|
13035 | opt[DTRACEOPT_GRABANON];
|
---|
13036 |
|
---|
13037 | *cpu = dtrace_anon.dta_beganon;
|
---|
13038 |
|
---|
13039 | /*
|
---|
13040 | * If the anonymous state is active (as it almost certainly
|
---|
13041 | * is if the anonymous enabling ultimately matched anything),
|
---|
13042 | * we don't allow any further option processing -- but we
|
---|
13043 | * don't return failure.
|
---|
13044 | */
|
---|
13045 | if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
|
---|
13046 | goto out;
|
---|
13047 | }
|
---|
13048 |
|
---|
13049 | if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
|
---|
13050 | opt[DTRACEOPT_AGGSIZE] != 0) {
|
---|
13051 | if (state->dts_aggregations == NULL) {
|
---|
13052 | /*
|
---|
13053 | * We're not going to create an aggregation buffer
|
---|
13054 | * because we don't have any ECBs that contain
|
---|
13055 | * aggregations -- set this option to 0.
|
---|
13056 | */
|
---|
13057 | opt[DTRACEOPT_AGGSIZE] = 0;
|
---|
13058 | } else {
|
---|
13059 | /*
|
---|
13060 | * If we have an aggregation buffer, we must also have
|
---|
13061 | * a buffer to use as scratch.
|
---|
13062 | */
|
---|
13063 | if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
|
---|
13064 | opt[DTRACEOPT_BUFSIZE] < VBDTCAST(dtrace_optval_t)state->dts_needed) {
|
---|
13065 | opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
|
---|
13066 | }
|
---|
13067 | }
|
---|
13068 | }
|
---|
13069 |
|
---|
13070 | if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
|
---|
13071 | opt[DTRACEOPT_SPECSIZE] != 0) {
|
---|
13072 | if (!state->dts_speculates) {
|
---|
13073 | /*
|
---|
13074 | * We're not going to create speculation buffers
|
---|
13075 | * because we don't have any ECBs that actually
|
---|
13076 | * speculate -- set the speculation size to 0.
|
---|
13077 | */
|
---|
13078 | opt[DTRACEOPT_SPECSIZE] = 0;
|
---|
13079 | }
|
---|
13080 | }
|
---|
13081 |
|
---|
13082 | /*
|
---|
13083 | * The bare minimum size for any buffer that we're actually going to
|
---|
13084 | * do anything to is sizeof (uint64_t).
|
---|
13085 | */
|
---|
13086 | sz = sizeof (uint64_t);
|
---|
13087 |
|
---|
13088 | if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
|
---|
13089 | (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
|
---|
13090 | (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
|
---|
13091 | /*
|
---|
13092 | * A buffer size has been explicitly set to 0 (or to a size
|
---|
13093 | * that will be adjusted to 0) and we need the space -- we
|
---|
13094 | * need to return failure. We return ENOSPC to differentiate
|
---|
13095 | * it from failing to allocate a buffer due to failure to meet
|
---|
13096 | * the reserve (for which we return E2BIG).
|
---|
13097 | */
|
---|
13098 | rval = ENOSPC;
|
---|
13099 | goto out;
|
---|
13100 | }
|
---|
13101 |
|
---|
13102 | if ((rval = dtrace_state_buffers(state)) != 0)
|
---|
13103 | goto err;
|
---|
13104 |
|
---|
13105 | if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
|
---|
13106 | sz = dtrace_dstate_defsize;
|
---|
13107 |
|
---|
13108 | do {
|
---|
13109 | rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
|
---|
13110 |
|
---|
13111 | if (rval == 0)
|
---|
13112 | break;
|
---|
13113 |
|
---|
13114 | if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
|
---|
13115 | goto err;
|
---|
13116 | } while (sz >>= 1);
|
---|
13117 |
|
---|
13118 | opt[DTRACEOPT_DYNVARSIZE] = sz;
|
---|
13119 |
|
---|
13120 | if (rval != 0)
|
---|
13121 | goto err;
|
---|
13122 |
|
---|
13123 | if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
|
---|
13124 | opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
|
---|
13125 |
|
---|
13126 | if (opt[DTRACEOPT_CLEANRATE] == 0)
|
---|
13127 | opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
|
---|
13128 |
|
---|
13129 | if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
|
---|
13130 | opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
|
---|
13131 |
|
---|
13132 | if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
|
---|
13133 | opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
|
---|
13134 |
|
---|
13135 | #ifndef VBOX
|
---|
13136 | hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
|
---|
13137 | hdlr.cyh_arg = state;
|
---|
13138 | hdlr.cyh_level = CY_LOW_LEVEL;
|
---|
13139 |
|
---|
13140 | when.cyt_when = 0;
|
---|
13141 | when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
|
---|
13142 |
|
---|
13143 | state->dts_cleaner = cyclic_add(&hdlr, &when);
|
---|
13144 |
|
---|
13145 | hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
|
---|
13146 | hdlr.cyh_arg = state;
|
---|
13147 | hdlr.cyh_level = CY_LOW_LEVEL;
|
---|
13148 |
|
---|
13149 | when.cyt_when = 0;
|
---|
13150 | when.cyt_interval = dtrace_deadman_interval;
|
---|
13151 |
|
---|
13152 | state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
|
---|
13153 | state->dts_deadman = cyclic_add(&hdlr, &when);
|
---|
13154 | #else /* VBOX */
|
---|
13155 |
|
---|
13156 | rval = RTTimerCreateEx(&state->dts_cleaner, opt[DTRACEOPT_CLEANRATE],
|
---|
13157 | RTTIMER_FLAGS_CPU_ANY, dtrace_state_clean_timer, state);
|
---|
13158 | if (RT_FAILURE(rval)) {
|
---|
13159 | rval = RTErrConvertToErrno(rval);
|
---|
13160 | goto err;
|
---|
13161 | }
|
---|
13162 |
|
---|
13163 | state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
|
---|
13164 | rval = RTTimerCreateEx(&state->dts_deadman, dtrace_deadman_interval,
|
---|
13165 | RTTIMER_FLAGS_CPU_ANY, dtrace_state_deadman_timer, state);
|
---|
13166 | if (RT_FAILURE(rval)) {
|
---|
13167 | RTTimerDestroy(state->dts_cleaner);
|
---|
13168 | state->dts_cleaner = CYCLIC_NONE;
|
---|
13169 | state->dts_deadman = CYCLIC_NONE;
|
---|
13170 | rval = RTErrConvertToErrno(rval);
|
---|
13171 | goto err;
|
---|
13172 | }
|
---|
13173 |
|
---|
13174 | rval = RTTimerStart(state->dts_cleaner, 0);
|
---|
13175 | if (RT_SUCCESS(rval))
|
---|
13176 | rval = RTTimerStart(state->dts_deadman, 0);
|
---|
13177 | if (RT_FAILURE(rval)) {
|
---|
13178 | rval = RTErrConvertToErrno(rval);
|
---|
13179 | goto err;
|
---|
13180 | }
|
---|
13181 | #endif /* VBOX */
|
---|
13182 |
|
---|
13183 | state->dts_activity = DTRACE_ACTIVITY_WARMUP;
|
---|
13184 |
|
---|
13185 | /*
|
---|
13186 | * Now it's time to actually fire the BEGIN probe. We need to disable
|
---|
13187 | * interrupts here both to record the CPU on which we fired the BEGIN
|
---|
13188 | * probe (the data from this CPU will be processed first at user
|
---|
13189 | * level) and to manually activate the buffer for this CPU.
|
---|
13190 | */
|
---|
13191 | cookie = dtrace_interrupt_disable();
|
---|
13192 | *cpu = VBDT_GET_CPUID();
|
---|
13193 | ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
|
---|
13194 | state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
|
---|
13195 |
|
---|
13196 | dtrace_probe(dtrace_probeid_begin,
|
---|
13197 | (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
|
---|
13198 | dtrace_interrupt_enable(cookie);
|
---|
13199 | /*
|
---|
13200 | * We may have had an exit action from a BEGIN probe; only change our
|
---|
13201 | * state to ACTIVE if we're still in WARMUP.
|
---|
13202 | */
|
---|
13203 | ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
|
---|
13204 | state->dts_activity == DTRACE_ACTIVITY_DRAINING);
|
---|
13205 |
|
---|
13206 | if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
|
---|
13207 | state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
|
---|
13208 |
|
---|
13209 | /*
|
---|
13210 | * Regardless of whether or not now we're in ACTIVE or DRAINING, we
|
---|
13211 | * want each CPU to transition its principal buffer out of the
|
---|
13212 | * INACTIVE state. Doing this assures that no CPU will suddenly begin
|
---|
13213 | * processing an ECB halfway down a probe's ECB chain; all CPUs will
|
---|
13214 | * atomically transition from processing none of a state's ECBs to
|
---|
13215 | * processing all of them.
|
---|
13216 | */
|
---|
13217 | #ifndef VBOX
|
---|
13218 | dtrace_xcall(DTRACE_CPUALL,
|
---|
13219 | (dtrace_xcall_t)dtrace_buffer_activate, state);
|
---|
13220 | #else
|
---|
13221 | RTMpOnAll(dtrace_buffer_activate_wrapper, state, NULL);
|
---|
13222 | #endif
|
---|
13223 | goto out;
|
---|
13224 |
|
---|
13225 | err:
|
---|
13226 | dtrace_buffer_free(state->dts_buffer);
|
---|
13227 | dtrace_buffer_free(state->dts_aggbuffer);
|
---|
13228 |
|
---|
13229 | if ((nspec = state->dts_nspeculations) == 0) {
|
---|
13230 | ASSERT(state->dts_speculations == NULL);
|
---|
13231 | goto out;
|
---|
13232 | }
|
---|
13233 |
|
---|
13234 | spec = state->dts_speculations;
|
---|
13235 | ASSERT(spec != NULL);
|
---|
13236 |
|
---|
13237 | for (i = 0; i < state->dts_nspeculations; i++) {
|
---|
13238 | if ((buf = spec[i].dtsp_buffer) == NULL)
|
---|
13239 | break;
|
---|
13240 |
|
---|
13241 | dtrace_buffer_free(buf);
|
---|
13242 | kmem_free(buf, bufsize);
|
---|
13243 | }
|
---|
13244 |
|
---|
13245 | kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
|
---|
13246 | state->dts_nspeculations = 0;
|
---|
13247 | state->dts_speculations = NULL;
|
---|
13248 |
|
---|
13249 | out:
|
---|
13250 | mutex_exit(&dtrace_lock);
|
---|
13251 | mutex_exit(&cpu_lock);
|
---|
13252 |
|
---|
13253 | return (rval);
|
---|
13254 | }
|
---|
13255 |
|
---|
13256 | static int
|
---|
13257 | dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
|
---|
13258 | {
|
---|
13259 | dtrace_icookie_t cookie;
|
---|
13260 |
|
---|
13261 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
13262 |
|
---|
13263 | if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
|
---|
13264 | state->dts_activity != DTRACE_ACTIVITY_DRAINING)
|
---|
13265 | return (EINVAL);
|
---|
13266 |
|
---|
13267 | /*
|
---|
13268 | * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
|
---|
13269 | * to be sure that every CPU has seen it. See below for the details
|
---|
13270 | * on why this is done.
|
---|
13271 | */
|
---|
13272 | state->dts_activity = DTRACE_ACTIVITY_DRAINING;
|
---|
13273 | dtrace_sync();
|
---|
13274 |
|
---|
13275 | /*
|
---|
13276 | * By this point, it is impossible for any CPU to be still processing
|
---|
13277 | * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
|
---|
13278 | * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
|
---|
13279 | * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
|
---|
13280 | * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
|
---|
13281 | * iff we're in the END probe.
|
---|
13282 | */
|
---|
13283 | state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
|
---|
13284 | dtrace_sync();
|
---|
13285 | ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
|
---|
13286 |
|
---|
13287 | /*
|
---|
13288 | * Finally, we can release the reserve and call the END probe. We
|
---|
13289 | * disable interrupts across calling the END probe to allow us to
|
---|
13290 | * return the CPU on which we actually called the END probe. This
|
---|
13291 | * allows user-land to be sure that this CPU's principal buffer is
|
---|
13292 | * processed last.
|
---|
13293 | */
|
---|
13294 | state->dts_reserve = 0;
|
---|
13295 |
|
---|
13296 | cookie = dtrace_interrupt_disable();
|
---|
13297 | *cpu = VBDT_GET_CPUID();
|
---|
13298 | dtrace_probe(dtrace_probeid_end,
|
---|
13299 | (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
|
---|
13300 | dtrace_interrupt_enable(cookie);
|
---|
13301 |
|
---|
13302 | state->dts_activity = DTRACE_ACTIVITY_STOPPED;
|
---|
13303 | dtrace_sync();
|
---|
13304 |
|
---|
13305 | return (0);
|
---|
13306 | }
|
---|
13307 |
|
---|
13308 | static int
|
---|
13309 | dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
|
---|
13310 | dtrace_optval_t val)
|
---|
13311 | {
|
---|
13312 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
13313 |
|
---|
13314 | if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
|
---|
13315 | return (EBUSY);
|
---|
13316 |
|
---|
13317 | if (option >= DTRACEOPT_MAX)
|
---|
13318 | return (EINVAL);
|
---|
13319 |
|
---|
13320 | if (option != DTRACEOPT_CPU && val < 0)
|
---|
13321 | return (EINVAL);
|
---|
13322 |
|
---|
13323 | switch (option) {
|
---|
13324 | case DTRACEOPT_DESTRUCTIVE:
|
---|
13325 | if (dtrace_destructive_disallow)
|
---|
13326 | return (EACCES);
|
---|
13327 |
|
---|
13328 | state->dts_cred.dcr_destructive = 1;
|
---|
13329 | break;
|
---|
13330 |
|
---|
13331 | case DTRACEOPT_BUFSIZE:
|
---|
13332 | case DTRACEOPT_DYNVARSIZE:
|
---|
13333 | case DTRACEOPT_AGGSIZE:
|
---|
13334 | case DTRACEOPT_SPECSIZE:
|
---|
13335 | case DTRACEOPT_STRSIZE:
|
---|
13336 | if (val < 0)
|
---|
13337 | return (EINVAL);
|
---|
13338 |
|
---|
13339 | if (val >= LONG_MAX) {
|
---|
13340 | /*
|
---|
13341 | * If this is an otherwise negative value, set it to
|
---|
13342 | * the highest multiple of 128m less than LONG_MAX.
|
---|
13343 | * Technically, we're adjusting the size without
|
---|
13344 | * regard to the buffer resizing policy, but in fact,
|
---|
13345 | * this has no effect -- if we set the buffer size to
|
---|
13346 | * ~LONG_MAX and the buffer policy is ultimately set to
|
---|
13347 | * be "manual", the buffer allocation is guaranteed to
|
---|
13348 | * fail, if only because the allocation requires two
|
---|
13349 | * buffers. (We set the the size to the highest
|
---|
13350 | * multiple of 128m because it ensures that the size
|
---|
13351 | * will remain a multiple of a megabyte when
|
---|
13352 | * repeatedly halved -- all the way down to 15m.)
|
---|
13353 | */
|
---|
13354 | val = LONG_MAX - (1 << 27) + 1;
|
---|
13355 | }
|
---|
13356 | }
|
---|
13357 |
|
---|
13358 | state->dts_options[option] = val;
|
---|
13359 |
|
---|
13360 | return (0);
|
---|
13361 | }
|
---|
13362 |
|
---|
13363 | static void
|
---|
13364 | dtrace_state_destroy(dtrace_state_t *state)
|
---|
13365 | {
|
---|
13366 | dtrace_ecb_t *ecb;
|
---|
13367 | dtrace_vstate_t *vstate = &state->dts_vstate;
|
---|
13368 | #ifndef VBOX
|
---|
13369 | minor_t minor = getminor(state->dts_dev);
|
---|
13370 | #endif
|
---|
13371 | int i, bufsize = NCPU * sizeof (dtrace_buffer_t);
|
---|
13372 | dtrace_speculation_t *spec = state->dts_speculations;
|
---|
13373 | int nspec = state->dts_nspeculations;
|
---|
13374 | uint32_t match;
|
---|
13375 |
|
---|
13376 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
13377 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
13378 |
|
---|
13379 | /*
|
---|
13380 | * First, retract any retained enablings for this state.
|
---|
13381 | */
|
---|
13382 | dtrace_enabling_retract(state);
|
---|
13383 | ASSERT(state->dts_nretained == 0);
|
---|
13384 |
|
---|
13385 | if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
|
---|
13386 | state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
|
---|
13387 | /*
|
---|
13388 | * We have managed to come into dtrace_state_destroy() on a
|
---|
13389 | * hot enabling -- almost certainly because of a disorderly
|
---|
13390 | * shutdown of a consumer. (That is, a consumer that is
|
---|
13391 | * exiting without having called dtrace_stop().) In this case,
|
---|
13392 | * we're going to set our activity to be KILLED, and then
|
---|
13393 | * issue a sync to be sure that everyone is out of probe
|
---|
13394 | * context before we start blowing away ECBs.
|
---|
13395 | */
|
---|
13396 | state->dts_activity = DTRACE_ACTIVITY_KILLED;
|
---|
13397 | dtrace_sync();
|
---|
13398 | }
|
---|
13399 |
|
---|
13400 | /*
|
---|
13401 | * Release the credential hold we took in dtrace_state_create().
|
---|
13402 | */
|
---|
13403 | if (state->dts_cred.dcr_cred != NULL)
|
---|
13404 | crfree(state->dts_cred.dcr_cred);
|
---|
13405 |
|
---|
13406 | /*
|
---|
13407 | * Now we can safely disable and destroy any enabled probes. Because
|
---|
13408 | * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
|
---|
13409 | * (especially if they're all enabled), we take two passes through the
|
---|
13410 | * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
|
---|
13411 | * in the second we disable whatever is left over.
|
---|
13412 | */
|
---|
13413 | for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
|
---|
13414 | for (i = 0; i < state->dts_necbs; i++) {
|
---|
13415 | if ((ecb = state->dts_ecbs[i]) == NULL)
|
---|
13416 | continue;
|
---|
13417 |
|
---|
13418 | if (match && ecb->dte_probe != NULL) {
|
---|
13419 | dtrace_probe_t *probe = ecb->dte_probe;
|
---|
13420 | dtrace_provider_t *prov = probe->dtpr_provider;
|
---|
13421 |
|
---|
13422 | if (!(prov->dtpv_priv.dtpp_flags & match))
|
---|
13423 | continue;
|
---|
13424 | }
|
---|
13425 |
|
---|
13426 | dtrace_ecb_disable(ecb);
|
---|
13427 | dtrace_ecb_destroy(ecb);
|
---|
13428 | }
|
---|
13429 |
|
---|
13430 | if (!match)
|
---|
13431 | break;
|
---|
13432 | }
|
---|
13433 |
|
---|
13434 | /*
|
---|
13435 | * Before we free the buffers, perform one more sync to assure that
|
---|
13436 | * every CPU is out of probe context.
|
---|
13437 | */
|
---|
13438 | dtrace_sync();
|
---|
13439 |
|
---|
13440 | dtrace_buffer_free(state->dts_buffer);
|
---|
13441 | dtrace_buffer_free(state->dts_aggbuffer);
|
---|
13442 |
|
---|
13443 | for (i = 0; i < nspec; i++)
|
---|
13444 | dtrace_buffer_free(spec[i].dtsp_buffer);
|
---|
13445 |
|
---|
13446 | if (state->dts_cleaner != CYCLIC_NONE)
|
---|
13447 | cyclic_remove(state->dts_cleaner);
|
---|
13448 |
|
---|
13449 | if (state->dts_deadman != CYCLIC_NONE)
|
---|
13450 | cyclic_remove(state->dts_deadman);
|
---|
13451 |
|
---|
13452 | dtrace_dstate_fini(&vstate->dtvs_dynvars);
|
---|
13453 | dtrace_vstate_fini(vstate);
|
---|
13454 | kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
|
---|
13455 |
|
---|
13456 | if (state->dts_aggregations != NULL) {
|
---|
13457 | #ifdef DEBUG
|
---|
13458 | for (i = 0; i < state->dts_naggregations; i++)
|
---|
13459 | ASSERT(state->dts_aggregations[i] == NULL);
|
---|
13460 | #endif
|
---|
13461 | ASSERT(state->dts_naggregations > 0);
|
---|
13462 | kmem_free(state->dts_aggregations,
|
---|
13463 | state->dts_naggregations * sizeof (dtrace_aggregation_t *));
|
---|
13464 | }
|
---|
13465 |
|
---|
13466 | kmem_free(state->dts_buffer, bufsize);
|
---|
13467 | kmem_free(state->dts_aggbuffer, bufsize);
|
---|
13468 |
|
---|
13469 | for (i = 0; i < nspec; i++)
|
---|
13470 | kmem_free(spec[i].dtsp_buffer, bufsize);
|
---|
13471 |
|
---|
13472 | kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
|
---|
13473 |
|
---|
13474 | dtrace_format_destroy(state);
|
---|
13475 |
|
---|
13476 | vmem_destroy(state->dts_aggid_arena);
|
---|
13477 | #ifndef VBOX
|
---|
13478 | ddi_soft_state_free(dtrace_softstate, minor);
|
---|
13479 | vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1);
|
---|
13480 | #else
|
---|
13481 | kmem_free(state, sizeof (*state));
|
---|
13482 | #endif
|
---|
13483 | }
|
---|
13484 |
|
---|
13485 | /*
|
---|
13486 | * DTrace Anonymous Enabling Functions
|
---|
13487 | */
|
---|
13488 | static dtrace_state_t *
|
---|
13489 | dtrace_anon_grab(void)
|
---|
13490 | {
|
---|
13491 | dtrace_state_t *state;
|
---|
13492 |
|
---|
13493 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
13494 |
|
---|
13495 | if ((state = dtrace_anon.dta_state) == NULL) {
|
---|
13496 | ASSERT(dtrace_anon.dta_enabling == NULL);
|
---|
13497 | return (NULL);
|
---|
13498 | }
|
---|
13499 |
|
---|
13500 | ASSERT(dtrace_anon.dta_enabling != NULL);
|
---|
13501 | ASSERT(dtrace_retained != NULL);
|
---|
13502 |
|
---|
13503 | dtrace_enabling_destroy(dtrace_anon.dta_enabling);
|
---|
13504 | dtrace_anon.dta_enabling = NULL;
|
---|
13505 | dtrace_anon.dta_state = NULL;
|
---|
13506 |
|
---|
13507 | return (state);
|
---|
13508 | }
|
---|
13509 |
|
---|
13510 | #ifndef VBOX
|
---|
13511 | static void
|
---|
13512 | dtrace_anon_property(void)
|
---|
13513 | {
|
---|
13514 | int i, rv;
|
---|
13515 | dtrace_state_t *state;
|
---|
13516 | dof_hdr_t *dof;
|
---|
13517 | char c[32]; /* enough for "dof-data-" + digits */
|
---|
13518 |
|
---|
13519 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
13520 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
13521 |
|
---|
13522 | for (i = 0; ; i++) {
|
---|
13523 | (void) snprintf(c, sizeof (c), "dof-data-%d", i);
|
---|
13524 |
|
---|
13525 | dtrace_err_verbose = 1;
|
---|
13526 |
|
---|
13527 | if ((dof = dtrace_dof_property(c)) == NULL) {
|
---|
13528 | dtrace_err_verbose = 0;
|
---|
13529 | break;
|
---|
13530 | }
|
---|
13531 |
|
---|
13532 | #ifndef VBOX
|
---|
13533 | /*
|
---|
13534 | * We want to create anonymous state, so we need to transition
|
---|
13535 | * the kernel debugger to indicate that DTrace is active. If
|
---|
13536 | * this fails (e.g. because the debugger has modified text in
|
---|
13537 | * some way), we won't continue with the processing.
|
---|
13538 | */
|
---|
13539 | if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
|
---|
13540 | cmn_err(CE_NOTE, "kernel debugger active; anonymous "
|
---|
13541 | "enabling ignored.");
|
---|
13542 | dtrace_dof_destroy(dof);
|
---|
13543 | break;
|
---|
13544 | }
|
---|
13545 | #endif
|
---|
13546 |
|
---|
13547 | /*
|
---|
13548 | * If we haven't allocated an anonymous state, we'll do so now.
|
---|
13549 | */
|
---|
13550 | if ((state = dtrace_anon.dta_state) == NULL) {
|
---|
13551 | state = dtrace_state_create(NULL, NULL);
|
---|
13552 | dtrace_anon.dta_state = state;
|
---|
13553 |
|
---|
13554 | if (state == NULL) {
|
---|
13555 | /*
|
---|
13556 | * This basically shouldn't happen: the only
|
---|
13557 | * failure mode from dtrace_state_create() is a
|
---|
13558 | * failure of ddi_soft_state_zalloc() that
|
---|
13559 | * itself should never happen. Still, the
|
---|
13560 | * interface allows for a failure mode, and
|
---|
13561 | * we want to fail as gracefully as possible:
|
---|
13562 | * we'll emit an error message and cease
|
---|
13563 | * processing anonymous state in this case.
|
---|
13564 | */
|
---|
13565 | cmn_err(CE_WARN, "failed to create "
|
---|
13566 | "anonymous state");
|
---|
13567 | dtrace_dof_destroy(dof);
|
---|
13568 | break;
|
---|
13569 | }
|
---|
13570 | }
|
---|
13571 |
|
---|
13572 | rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
|
---|
13573 | &dtrace_anon.dta_enabling, 0, B_TRUE);
|
---|
13574 |
|
---|
13575 | if (rv == 0)
|
---|
13576 | rv = dtrace_dof_options(dof, state);
|
---|
13577 |
|
---|
13578 | dtrace_err_verbose = 0;
|
---|
13579 | dtrace_dof_destroy(dof);
|
---|
13580 |
|
---|
13581 | if (rv != 0) {
|
---|
13582 | /*
|
---|
13583 | * This is malformed DOF; chuck any anonymous state
|
---|
13584 | * that we created.
|
---|
13585 | */
|
---|
13586 | ASSERT(dtrace_anon.dta_enabling == NULL);
|
---|
13587 | dtrace_state_destroy(state);
|
---|
13588 | dtrace_anon.dta_state = NULL;
|
---|
13589 | break;
|
---|
13590 | }
|
---|
13591 |
|
---|
13592 | ASSERT(dtrace_anon.dta_enabling != NULL);
|
---|
13593 | }
|
---|
13594 |
|
---|
13595 | if (dtrace_anon.dta_enabling != NULL) {
|
---|
13596 | int rval;
|
---|
13597 |
|
---|
13598 | /*
|
---|
13599 | * dtrace_enabling_retain() can only fail because we are
|
---|
13600 | * trying to retain more enablings than are allowed -- but
|
---|
13601 | * we only have one anonymous enabling, and we are guaranteed
|
---|
13602 | * to be allowed at least one retained enabling; we assert
|
---|
13603 | * that dtrace_enabling_retain() returns success.
|
---|
13604 | */
|
---|
13605 | rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
|
---|
13606 | ASSERT(rval == 0);
|
---|
13607 |
|
---|
13608 | dtrace_enabling_dump(dtrace_anon.dta_enabling);
|
---|
13609 | }
|
---|
13610 | }
|
---|
13611 | #endif /* !VBOX */
|
---|
13612 |
|
---|
13613 | /*
|
---|
13614 | * DTrace Helper Functions
|
---|
13615 | */
|
---|
13616 | #ifndef VBOX /* No helper stuff */
|
---|
13617 | static void
|
---|
13618 | dtrace_helper_trace(dtrace_helper_action_t *helper,
|
---|
13619 | dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
|
---|
13620 | {
|
---|
13621 | uint32_t size, next, nnext, i;
|
---|
13622 | dtrace_helptrace_t *ent;
|
---|
13623 | uint16_t flags = cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
13624 |
|
---|
13625 | if (!dtrace_helptrace_enabled)
|
---|
13626 | return;
|
---|
13627 |
|
---|
13628 | ASSERT(vstate->dtvs_nlocals <= VBDTCAST(int32_t)dtrace_helptrace_nlocals);
|
---|
13629 |
|
---|
13630 | /*
|
---|
13631 | * What would a tracing framework be without its own tracing
|
---|
13632 | * framework? (Well, a hell of a lot simpler, for starters...)
|
---|
13633 | */
|
---|
13634 | size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
|
---|
13635 | sizeof (uint64_t) - sizeof (uint64_t);
|
---|
13636 |
|
---|
13637 | /*
|
---|
13638 | * Iterate until we can allocate a slot in the trace buffer.
|
---|
13639 | */
|
---|
13640 | do {
|
---|
13641 | next = dtrace_helptrace_next;
|
---|
13642 |
|
---|
13643 | if (next + size < VBDTCAST(unsigned)dtrace_helptrace_bufsize) {
|
---|
13644 | nnext = next + size;
|
---|
13645 | } else {
|
---|
13646 | nnext = size;
|
---|
13647 | }
|
---|
13648 | } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
|
---|
13649 |
|
---|
13650 | /*
|
---|
13651 | * We have our slot; fill it in.
|
---|
13652 | */
|
---|
13653 | if (nnext == size)
|
---|
13654 | next = 0;
|
---|
13655 |
|
---|
13656 | ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next];
|
---|
13657 | ent->dtht_helper = helper;
|
---|
13658 | ent->dtht_where = where;
|
---|
13659 | ent->dtht_nlocals = vstate->dtvs_nlocals;
|
---|
13660 |
|
---|
13661 | ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
|
---|
13662 | mstate->dtms_fltoffs : -1;
|
---|
13663 | ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
|
---|
13664 | ent->dtht_illval = cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval;
|
---|
13665 |
|
---|
13666 | for (i = 0; VBDTCAST(int32_t)i < vstate->dtvs_nlocals; i++) {
|
---|
13667 | dtrace_statvar_t *svar;
|
---|
13668 |
|
---|
13669 | if ((svar = vstate->dtvs_locals[i]) == NULL)
|
---|
13670 | continue;
|
---|
13671 |
|
---|
13672 | ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t));
|
---|
13673 | ent->dtht_locals[i] =
|
---|
13674 | ((uint64_t *)(uintptr_t)svar->dtsv_data)[VBDT_GET_CPUID()];
|
---|
13675 | }
|
---|
13676 | }
|
---|
13677 |
|
---|
13678 | static uint64_t
|
---|
13679 | dtrace_helper(int which, dtrace_mstate_t *mstate,
|
---|
13680 | dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
|
---|
13681 | {
|
---|
13682 | VBDTTYPE(uint16_t volatile *, uint16_t *)flags = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
13683 | uint64_t sarg0 = mstate->dtms_arg[0];
|
---|
13684 | uint64_t sarg1 = mstate->dtms_arg[1];
|
---|
13685 | uint64_t rval VBDTUNASS(666);
|
---|
13686 | dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
|
---|
13687 | dtrace_helper_action_t *helper;
|
---|
13688 | dtrace_vstate_t *vstate;
|
---|
13689 | dtrace_difo_t *pred;
|
---|
13690 | int i, trace = dtrace_helptrace_enabled;
|
---|
13691 |
|
---|
13692 | ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
|
---|
13693 |
|
---|
13694 | if (helpers == NULL)
|
---|
13695 | return (0);
|
---|
13696 |
|
---|
13697 | if ((helper = helpers->dthps_actions[which]) == NULL)
|
---|
13698 | return (0);
|
---|
13699 |
|
---|
13700 | vstate = &helpers->dthps_vstate;
|
---|
13701 | mstate->dtms_arg[0] = arg0;
|
---|
13702 | mstate->dtms_arg[1] = arg1;
|
---|
13703 |
|
---|
13704 | /*
|
---|
13705 | * Now iterate over each helper. If its predicate evaluates to 'true',
|
---|
13706 | * we'll call the corresponding actions. Note that the below calls
|
---|
13707 | * to dtrace_dif_emulate() may set faults in machine state. This is
|
---|
13708 | * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
|
---|
13709 | * the stored DIF offset with its own (which is the desired behavior).
|
---|
13710 | * Also, note the calls to dtrace_dif_emulate() may allocate scratch
|
---|
13711 | * from machine state; this is okay, too.
|
---|
13712 | */
|
---|
13713 | for (; helper != NULL; helper = helper->dtha_next) {
|
---|
13714 | if ((pred = helper->dtha_predicate) != NULL) {
|
---|
13715 | if (trace)
|
---|
13716 | dtrace_helper_trace(helper, mstate, vstate, 0);
|
---|
13717 |
|
---|
13718 | if (!dtrace_dif_emulate(pred, mstate, vstate, state))
|
---|
13719 | goto next;
|
---|
13720 |
|
---|
13721 | if (*flags & CPU_DTRACE_FAULT)
|
---|
13722 | goto err;
|
---|
13723 | }
|
---|
13724 |
|
---|
13725 | for (i = 0; i < helper->dtha_nactions; i++) {
|
---|
13726 | if (trace)
|
---|
13727 | dtrace_helper_trace(helper,
|
---|
13728 | mstate, vstate, i + 1);
|
---|
13729 |
|
---|
13730 | rval = dtrace_dif_emulate(helper->dtha_actions[i],
|
---|
13731 | mstate, vstate, state);
|
---|
13732 |
|
---|
13733 | if (*flags & CPU_DTRACE_FAULT)
|
---|
13734 | goto err;
|
---|
13735 | }
|
---|
13736 |
|
---|
13737 | next:
|
---|
13738 | if (trace)
|
---|
13739 | dtrace_helper_trace(helper, mstate, vstate,
|
---|
13740 | DTRACE_HELPTRACE_NEXT);
|
---|
13741 | }
|
---|
13742 |
|
---|
13743 | if (trace)
|
---|
13744 | dtrace_helper_trace(helper, mstate, vstate,
|
---|
13745 | DTRACE_HELPTRACE_DONE);
|
---|
13746 |
|
---|
13747 | /*
|
---|
13748 | * Restore the arg0 that we saved upon entry.
|
---|
13749 | */
|
---|
13750 | mstate->dtms_arg[0] = sarg0;
|
---|
13751 | mstate->dtms_arg[1] = sarg1;
|
---|
13752 |
|
---|
13753 | return (rval);
|
---|
13754 |
|
---|
13755 | err:
|
---|
13756 | if (trace)
|
---|
13757 | dtrace_helper_trace(helper, mstate, vstate,
|
---|
13758 | DTRACE_HELPTRACE_ERR);
|
---|
13759 |
|
---|
13760 | /*
|
---|
13761 | * Restore the arg0 that we saved upon entry.
|
---|
13762 | */
|
---|
13763 | mstate->dtms_arg[0] = sarg0;
|
---|
13764 | mstate->dtms_arg[1] = sarg1;
|
---|
13765 |
|
---|
13766 | return (NULL);
|
---|
13767 | }
|
---|
13768 |
|
---|
13769 | static void
|
---|
13770 | dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
|
---|
13771 | dtrace_vstate_t *vstate)
|
---|
13772 | {
|
---|
13773 | int i;
|
---|
13774 |
|
---|
13775 | if (helper->dtha_predicate != NULL)
|
---|
13776 | dtrace_difo_release(helper->dtha_predicate, vstate);
|
---|
13777 |
|
---|
13778 | for (i = 0; i < helper->dtha_nactions; i++) {
|
---|
13779 | ASSERT(helper->dtha_actions[i] != NULL);
|
---|
13780 | dtrace_difo_release(helper->dtha_actions[i], vstate);
|
---|
13781 | }
|
---|
13782 |
|
---|
13783 | kmem_free(helper->dtha_actions,
|
---|
13784 | helper->dtha_nactions * sizeof (dtrace_difo_t *));
|
---|
13785 | kmem_free(helper, sizeof (dtrace_helper_action_t));
|
---|
13786 | }
|
---|
13787 |
|
---|
13788 | static int
|
---|
13789 | dtrace_helper_destroygen(int gen)
|
---|
13790 | {
|
---|
13791 | proc_t *p = curproc;
|
---|
13792 | dtrace_helpers_t *help = p->p_dtrace_helpers;
|
---|
13793 | dtrace_vstate_t *vstate;
|
---|
13794 | VBDTTYPE(uint_t,int) i;
|
---|
13795 |
|
---|
13796 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
13797 |
|
---|
13798 | if (help == NULL || gen > help->dthps_generation)
|
---|
13799 | return (EINVAL);
|
---|
13800 |
|
---|
13801 | vstate = &help->dthps_vstate;
|
---|
13802 |
|
---|
13803 | for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
|
---|
13804 | dtrace_helper_action_t *last = NULL, *h, *next;
|
---|
13805 |
|
---|
13806 | for (h = help->dthps_actions[i]; h != NULL; h = next) {
|
---|
13807 | next = h->dtha_next;
|
---|
13808 |
|
---|
13809 | if (h->dtha_generation == gen) {
|
---|
13810 | if (last != NULL) {
|
---|
13811 | last->dtha_next = next;
|
---|
13812 | } else {
|
---|
13813 | help->dthps_actions[i] = next;
|
---|
13814 | }
|
---|
13815 |
|
---|
13816 | dtrace_helper_action_destroy(h, vstate);
|
---|
13817 | } else {
|
---|
13818 | last = h;
|
---|
13819 | }
|
---|
13820 | }
|
---|
13821 | }
|
---|
13822 |
|
---|
13823 | /*
|
---|
13824 | * Interate until we've cleared out all helper providers with the
|
---|
13825 | * given generation number.
|
---|
13826 | */
|
---|
13827 | for (;;) {
|
---|
13828 | dtrace_helper_provider_t *prov VBDTGCC(NULL);
|
---|
13829 |
|
---|
13830 | /*
|
---|
13831 | * Look for a helper provider with the right generation. We
|
---|
13832 | * have to start back at the beginning of the list each time
|
---|
13833 | * because we drop dtrace_lock. It's unlikely that we'll make
|
---|
13834 | * more than two passes.
|
---|
13835 | */
|
---|
13836 | for (i = 0; i < help->dthps_nprovs; i++) {
|
---|
13837 | prov = help->dthps_provs[i];
|
---|
13838 |
|
---|
13839 | if (prov->dthp_generation == gen)
|
---|
13840 | break;
|
---|
13841 | }
|
---|
13842 |
|
---|
13843 | /*
|
---|
13844 | * If there were no matches, we're done.
|
---|
13845 | */
|
---|
13846 | if (i == help->dthps_nprovs)
|
---|
13847 | break;
|
---|
13848 |
|
---|
13849 | /*
|
---|
13850 | * Move the last helper provider into this slot.
|
---|
13851 | */
|
---|
13852 | help->dthps_nprovs--;
|
---|
13853 | help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
|
---|
13854 | help->dthps_provs[help->dthps_nprovs] = NULL;
|
---|
13855 |
|
---|
13856 | mutex_exit(&dtrace_lock);
|
---|
13857 |
|
---|
13858 | /*
|
---|
13859 | * If we have a meta provider, remove this helper provider.
|
---|
13860 | */
|
---|
13861 | mutex_enter(&dtrace_meta_lock);
|
---|
13862 | if (dtrace_meta_pid != NULL) {
|
---|
13863 | ASSERT(dtrace_deferred_pid == NULL);
|
---|
13864 | dtrace_helper_provider_remove(&prov->dthp_prov,
|
---|
13865 | p->p_pid);
|
---|
13866 | }
|
---|
13867 | mutex_exit(&dtrace_meta_lock);
|
---|
13868 |
|
---|
13869 | dtrace_helper_provider_destroy(prov);
|
---|
13870 |
|
---|
13871 | mutex_enter(&dtrace_lock);
|
---|
13872 | }
|
---|
13873 |
|
---|
13874 | return (0);
|
---|
13875 | }
|
---|
13876 |
|
---|
13877 | static int
|
---|
13878 | dtrace_helper_validate(dtrace_helper_action_t *helper)
|
---|
13879 | {
|
---|
13880 | int err = 0, i;
|
---|
13881 | dtrace_difo_t *dp;
|
---|
13882 |
|
---|
13883 | if ((dp = helper->dtha_predicate) != NULL)
|
---|
13884 | err += dtrace_difo_validate_helper(dp);
|
---|
13885 |
|
---|
13886 | for (i = 0; i < helper->dtha_nactions; i++)
|
---|
13887 | err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
|
---|
13888 |
|
---|
13889 | return (err == 0);
|
---|
13890 | }
|
---|
13891 |
|
---|
13892 | static int
|
---|
13893 | dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep)
|
---|
13894 | {
|
---|
13895 | dtrace_helpers_t *help;
|
---|
13896 | dtrace_helper_action_t *helper, *last;
|
---|
13897 | dtrace_actdesc_t *act;
|
---|
13898 | dtrace_vstate_t *vstate;
|
---|
13899 | dtrace_predicate_t *pred;
|
---|
13900 | int count = 0, nactions = 0, i;
|
---|
13901 |
|
---|
13902 | if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
|
---|
13903 | return (EINVAL);
|
---|
13904 |
|
---|
13905 | help = curproc->p_dtrace_helpers;
|
---|
13906 | last = help->dthps_actions[which];
|
---|
13907 | vstate = &help->dthps_vstate;
|
---|
13908 |
|
---|
13909 | for (count = 0; last != NULL; last = last->dtha_next) {
|
---|
13910 | count++;
|
---|
13911 | if (last->dtha_next == NULL)
|
---|
13912 | break;
|
---|
13913 | }
|
---|
13914 |
|
---|
13915 | /*
|
---|
13916 | * If we already have dtrace_helper_actions_max helper actions for this
|
---|
13917 | * helper action type, we'll refuse to add a new one.
|
---|
13918 | */
|
---|
13919 | if (count >= dtrace_helper_actions_max)
|
---|
13920 | return (ENOSPC);
|
---|
13921 |
|
---|
13922 | helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
|
---|
13923 | helper->dtha_generation = help->dthps_generation;
|
---|
13924 |
|
---|
13925 | if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
|
---|
13926 | ASSERT(pred->dtp_difo != NULL);
|
---|
13927 | dtrace_difo_hold(pred->dtp_difo);
|
---|
13928 | helper->dtha_predicate = pred->dtp_difo;
|
---|
13929 | }
|
---|
13930 |
|
---|
13931 | for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
|
---|
13932 | if (act->dtad_kind != DTRACEACT_DIFEXPR)
|
---|
13933 | goto err;
|
---|
13934 |
|
---|
13935 | if (act->dtad_difo == NULL)
|
---|
13936 | goto err;
|
---|
13937 |
|
---|
13938 | nactions++;
|
---|
13939 | }
|
---|
13940 |
|
---|
13941 | helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
|
---|
13942 | (helper->dtha_nactions = nactions), KM_SLEEP);
|
---|
13943 |
|
---|
13944 | for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
|
---|
13945 | dtrace_difo_hold(act->dtad_difo);
|
---|
13946 | helper->dtha_actions[i++] = act->dtad_difo;
|
---|
13947 | }
|
---|
13948 |
|
---|
13949 | if (!dtrace_helper_validate(helper))
|
---|
13950 | goto err;
|
---|
13951 |
|
---|
13952 | if (last == NULL) {
|
---|
13953 | help->dthps_actions[which] = helper;
|
---|
13954 | } else {
|
---|
13955 | last->dtha_next = helper;
|
---|
13956 | }
|
---|
13957 |
|
---|
13958 | if (vstate->dtvs_nlocals > VBDTCAST(int32_t)dtrace_helptrace_nlocals) {
|
---|
13959 | dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
|
---|
13960 | dtrace_helptrace_next = 0;
|
---|
13961 | }
|
---|
13962 |
|
---|
13963 | return (0);
|
---|
13964 | err:
|
---|
13965 | dtrace_helper_action_destroy(helper, vstate);
|
---|
13966 | return (EINVAL);
|
---|
13967 | }
|
---|
13968 |
|
---|
13969 | static void
|
---|
13970 | dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
|
---|
13971 | dof_helper_t *dofhp)
|
---|
13972 | {
|
---|
13973 | ASSERT(MUTEX_NOT_HELD(&dtrace_lock));
|
---|
13974 |
|
---|
13975 | mutex_enter(&dtrace_meta_lock);
|
---|
13976 | mutex_enter(&dtrace_lock);
|
---|
13977 |
|
---|
13978 | if (!dtrace_attached() || dtrace_meta_pid == NULL) {
|
---|
13979 | /*
|
---|
13980 | * If the dtrace module is loaded but not attached, or if
|
---|
13981 | * there aren't isn't a meta provider registered to deal with
|
---|
13982 | * these provider descriptions, we need to postpone creating
|
---|
13983 | * the actual providers until later.
|
---|
13984 | */
|
---|
13985 |
|
---|
13986 | if (help->dthps_next == NULL && help->dthps_prev == NULL &&
|
---|
13987 | dtrace_deferred_pid != help) {
|
---|
13988 | help->dthps_deferred = 1;
|
---|
13989 | help->dthps_pid = p->p_pid;
|
---|
13990 | help->dthps_next = dtrace_deferred_pid;
|
---|
13991 | help->dthps_prev = NULL;
|
---|
13992 | if (dtrace_deferred_pid != NULL)
|
---|
13993 | dtrace_deferred_pid->dthps_prev = help;
|
---|
13994 | dtrace_deferred_pid = help;
|
---|
13995 | }
|
---|
13996 |
|
---|
13997 | mutex_exit(&dtrace_lock);
|
---|
13998 |
|
---|
13999 | } else if (dofhp != NULL) {
|
---|
14000 | /*
|
---|
14001 | * If the dtrace module is loaded and we have a particular
|
---|
14002 | * helper provider description, pass that off to the
|
---|
14003 | * meta provider.
|
---|
14004 | */
|
---|
14005 |
|
---|
14006 | mutex_exit(&dtrace_lock);
|
---|
14007 |
|
---|
14008 | dtrace_helper_provide(dofhp, p->p_pid);
|
---|
14009 |
|
---|
14010 | } else {
|
---|
14011 | /*
|
---|
14012 | * Otherwise, just pass all the helper provider descriptions
|
---|
14013 | * off to the meta provider.
|
---|
14014 | */
|
---|
14015 |
|
---|
14016 | VBDTTYPE(uint_t,int) i;
|
---|
14017 | mutex_exit(&dtrace_lock);
|
---|
14018 |
|
---|
14019 | for (i = 0; i < help->dthps_nprovs; i++) {
|
---|
14020 | dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
|
---|
14021 | p->p_pid);
|
---|
14022 | }
|
---|
14023 | }
|
---|
14024 |
|
---|
14025 | mutex_exit(&dtrace_meta_lock);
|
---|
14026 | }
|
---|
14027 |
|
---|
14028 | static int
|
---|
14029 | dtrace_helper_provider_add(dof_helper_t *dofhp, int gen)
|
---|
14030 | {
|
---|
14031 | dtrace_helpers_t *help;
|
---|
14032 | dtrace_helper_provider_t *hprov, **tmp_provs;
|
---|
14033 | uint_t tmp_maxprovs, i;
|
---|
14034 |
|
---|
14035 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
14036 |
|
---|
14037 | help = curproc->p_dtrace_helpers;
|
---|
14038 | ASSERT(help != NULL);
|
---|
14039 |
|
---|
14040 | /*
|
---|
14041 | * If we already have dtrace_helper_providers_max helper providers,
|
---|
14042 | * we're refuse to add a new one.
|
---|
14043 | */
|
---|
14044 | if (help->dthps_nprovs >= dtrace_helper_providers_max)
|
---|
14045 | return (ENOSPC);
|
---|
14046 |
|
---|
14047 | /*
|
---|
14048 | * Check to make sure this isn't a duplicate.
|
---|
14049 | */
|
---|
14050 | for (i = 0; i < help->dthps_nprovs; i++) {
|
---|
14051 | if (dofhp->dofhp_addr ==
|
---|
14052 | help->dthps_provs[i]->dthp_prov.dofhp_addr)
|
---|
14053 | return (EALREADY);
|
---|
14054 | }
|
---|
14055 |
|
---|
14056 | hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
|
---|
14057 | hprov->dthp_prov = *dofhp;
|
---|
14058 | hprov->dthp_ref = 1;
|
---|
14059 | hprov->dthp_generation = gen;
|
---|
14060 |
|
---|
14061 | /*
|
---|
14062 | * Allocate a bigger table for helper providers if it's already full.
|
---|
14063 | */
|
---|
14064 | if (help->dthps_maxprovs == help->dthps_nprovs) {
|
---|
14065 | tmp_maxprovs = help->dthps_maxprovs;
|
---|
14066 | tmp_provs = help->dthps_provs;
|
---|
14067 |
|
---|
14068 | if (help->dthps_maxprovs == 0)
|
---|
14069 | help->dthps_maxprovs = 2;
|
---|
14070 | else
|
---|
14071 | help->dthps_maxprovs *= 2;
|
---|
14072 | if (help->dthps_maxprovs > dtrace_helper_providers_max)
|
---|
14073 | help->dthps_maxprovs = dtrace_helper_providers_max;
|
---|
14074 |
|
---|
14075 | ASSERT(tmp_maxprovs < help->dthps_maxprovs);
|
---|
14076 |
|
---|
14077 | help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
|
---|
14078 | sizeof (dtrace_helper_provider_t *), KM_SLEEP);
|
---|
14079 |
|
---|
14080 | if (tmp_provs != NULL) {
|
---|
14081 | bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
|
---|
14082 | sizeof (dtrace_helper_provider_t *));
|
---|
14083 | kmem_free(tmp_provs, tmp_maxprovs *
|
---|
14084 | sizeof (dtrace_helper_provider_t *));
|
---|
14085 | }
|
---|
14086 | }
|
---|
14087 |
|
---|
14088 | help->dthps_provs[help->dthps_nprovs] = hprov;
|
---|
14089 | help->dthps_nprovs++;
|
---|
14090 |
|
---|
14091 | return (0);
|
---|
14092 | }
|
---|
14093 |
|
---|
14094 | static void
|
---|
14095 | dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
|
---|
14096 | {
|
---|
14097 | mutex_enter(&dtrace_lock);
|
---|
14098 |
|
---|
14099 | if (--hprov->dthp_ref == 0) {
|
---|
14100 | dof_hdr_t *dof;
|
---|
14101 | mutex_exit(&dtrace_lock);
|
---|
14102 | dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
|
---|
14103 | dtrace_dof_destroy(dof);
|
---|
14104 | kmem_free(hprov, sizeof (dtrace_helper_provider_t));
|
---|
14105 | } else {
|
---|
14106 | mutex_exit(&dtrace_lock);
|
---|
14107 | }
|
---|
14108 | }
|
---|
14109 |
|
---|
14110 | static int
|
---|
14111 | dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
|
---|
14112 | {
|
---|
14113 | uintptr_t daddr = (uintptr_t)dof;
|
---|
14114 | dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
|
---|
14115 | dof_provider_t *provider;
|
---|
14116 | dof_probe_t *probe;
|
---|
14117 | uint8_t *arg;
|
---|
14118 | char *strtab, *typestr;
|
---|
14119 | dof_stridx_t typeidx;
|
---|
14120 | size_t typesz;
|
---|
14121 | uint_t nprobes, j, k;
|
---|
14122 |
|
---|
14123 | ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
|
---|
14124 |
|
---|
14125 | if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
|
---|
14126 | dtrace_dof_error(dof, "misaligned section offset");
|
---|
14127 | return (-1);
|
---|
14128 | }
|
---|
14129 |
|
---|
14130 | /*
|
---|
14131 | * The section needs to be large enough to contain the DOF provider
|
---|
14132 | * structure appropriate for the given version.
|
---|
14133 | */
|
---|
14134 | if (sec->dofs_size <
|
---|
14135 | ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
|
---|
14136 | offsetof(dof_provider_t, dofpv_prenoffs) :
|
---|
14137 | sizeof (dof_provider_t))) {
|
---|
14138 | dtrace_dof_error(dof, "provider section too small");
|
---|
14139 | return (-1);
|
---|
14140 | }
|
---|
14141 |
|
---|
14142 | provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
|
---|
14143 | str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
|
---|
14144 | prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
|
---|
14145 | arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
|
---|
14146 | off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
|
---|
14147 |
|
---|
14148 | if (str_sec == NULL || prb_sec == NULL ||
|
---|
14149 | arg_sec == NULL || off_sec == NULL)
|
---|
14150 | return (-1);
|
---|
14151 |
|
---|
14152 | enoff_sec = NULL;
|
---|
14153 |
|
---|
14154 | if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
|
---|
14155 | provider->dofpv_prenoffs != DOF_SECT_NONE &&
|
---|
14156 | (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
|
---|
14157 | provider->dofpv_prenoffs)) == NULL)
|
---|
14158 | return (-1);
|
---|
14159 |
|
---|
14160 | strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
|
---|
14161 |
|
---|
14162 | if (provider->dofpv_name >= str_sec->dofs_size ||
|
---|
14163 | strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
|
---|
14164 | dtrace_dof_error(dof, "invalid provider name");
|
---|
14165 | return (-1);
|
---|
14166 | }
|
---|
14167 |
|
---|
14168 | if (prb_sec->dofs_entsize == 0 ||
|
---|
14169 | prb_sec->dofs_entsize > prb_sec->dofs_size) {
|
---|
14170 | dtrace_dof_error(dof, "invalid entry size");
|
---|
14171 | return (-1);
|
---|
14172 | }
|
---|
14173 |
|
---|
14174 | if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
|
---|
14175 | dtrace_dof_error(dof, "misaligned entry size");
|
---|
14176 | return (-1);
|
---|
14177 | }
|
---|
14178 |
|
---|
14179 | if (off_sec->dofs_entsize != sizeof (uint32_t)) {
|
---|
14180 | dtrace_dof_error(dof, "invalid entry size");
|
---|
14181 | return (-1);
|
---|
14182 | }
|
---|
14183 |
|
---|
14184 | if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
|
---|
14185 | dtrace_dof_error(dof, "misaligned section offset");
|
---|
14186 | return (-1);
|
---|
14187 | }
|
---|
14188 |
|
---|
14189 | if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
|
---|
14190 | dtrace_dof_error(dof, "invalid entry size");
|
---|
14191 | return (-1);
|
---|
14192 | }
|
---|
14193 |
|
---|
14194 | arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
|
---|
14195 |
|
---|
14196 | nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
|
---|
14197 |
|
---|
14198 | /*
|
---|
14199 | * Take a pass through the probes to check for errors.
|
---|
14200 | */
|
---|
14201 | for (j = 0; j < nprobes; j++) {
|
---|
14202 | probe = (dof_probe_t *)(uintptr_t)(daddr +
|
---|
14203 | prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
|
---|
14204 |
|
---|
14205 | if (probe->dofpr_func >= str_sec->dofs_size) {
|
---|
14206 | dtrace_dof_error(dof, "invalid function name");
|
---|
14207 | return (-1);
|
---|
14208 | }
|
---|
14209 |
|
---|
14210 | if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
|
---|
14211 | dtrace_dof_error(dof, "function name too long");
|
---|
14212 | return (-1);
|
---|
14213 | }
|
---|
14214 |
|
---|
14215 | if (probe->dofpr_name >= str_sec->dofs_size ||
|
---|
14216 | strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
|
---|
14217 | dtrace_dof_error(dof, "invalid probe name");
|
---|
14218 | return (-1);
|
---|
14219 | }
|
---|
14220 |
|
---|
14221 | /*
|
---|
14222 | * The offset count must not wrap the index, and the offsets
|
---|
14223 | * must also not overflow the section's data.
|
---|
14224 | */
|
---|
14225 | if (probe->dofpr_offidx + probe->dofpr_noffs <
|
---|
14226 | probe->dofpr_offidx ||
|
---|
14227 | (probe->dofpr_offidx + probe->dofpr_noffs) *
|
---|
14228 | off_sec->dofs_entsize > off_sec->dofs_size) {
|
---|
14229 | dtrace_dof_error(dof, "invalid probe offset");
|
---|
14230 | return (-1);
|
---|
14231 | }
|
---|
14232 |
|
---|
14233 | if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
|
---|
14234 | /*
|
---|
14235 | * If there's no is-enabled offset section, make sure
|
---|
14236 | * there aren't any is-enabled offsets. Otherwise
|
---|
14237 | * perform the same checks as for probe offsets
|
---|
14238 | * (immediately above).
|
---|
14239 | */
|
---|
14240 | if (enoff_sec == NULL) {
|
---|
14241 | if (probe->dofpr_enoffidx != 0 ||
|
---|
14242 | probe->dofpr_nenoffs != 0) {
|
---|
14243 | dtrace_dof_error(dof, "is-enabled "
|
---|
14244 | "offsets with null section");
|
---|
14245 | return (-1);
|
---|
14246 | }
|
---|
14247 | } else if (probe->dofpr_enoffidx +
|
---|
14248 | probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
|
---|
14249 | (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
|
---|
14250 | enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
|
---|
14251 | dtrace_dof_error(dof, "invalid is-enabled "
|
---|
14252 | "offset");
|
---|
14253 | return (-1);
|
---|
14254 | }
|
---|
14255 |
|
---|
14256 | if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
|
---|
14257 | dtrace_dof_error(dof, "zero probe and "
|
---|
14258 | "is-enabled offsets");
|
---|
14259 | return (-1);
|
---|
14260 | }
|
---|
14261 | } else if (probe->dofpr_noffs == 0) {
|
---|
14262 | dtrace_dof_error(dof, "zero probe offsets");
|
---|
14263 | return (-1);
|
---|
14264 | }
|
---|
14265 |
|
---|
14266 | if (probe->dofpr_argidx + probe->dofpr_xargc <
|
---|
14267 | probe->dofpr_argidx ||
|
---|
14268 | (probe->dofpr_argidx + probe->dofpr_xargc) *
|
---|
14269 | arg_sec->dofs_entsize > arg_sec->dofs_size) {
|
---|
14270 | dtrace_dof_error(dof, "invalid args");
|
---|
14271 | return (-1);
|
---|
14272 | }
|
---|
14273 |
|
---|
14274 | typeidx = probe->dofpr_nargv;
|
---|
14275 | typestr = strtab + probe->dofpr_nargv;
|
---|
14276 | for (k = 0; k < probe->dofpr_nargc; k++) {
|
---|
14277 | if (typeidx >= str_sec->dofs_size) {
|
---|
14278 | dtrace_dof_error(dof, "bad "
|
---|
14279 | "native argument type");
|
---|
14280 | return (-1);
|
---|
14281 | }
|
---|
14282 |
|
---|
14283 | typesz = strlen(typestr) + 1;
|
---|
14284 | if (typesz > DTRACE_ARGTYPELEN) {
|
---|
14285 | dtrace_dof_error(dof, "native "
|
---|
14286 | "argument type too long");
|
---|
14287 | return (-1);
|
---|
14288 | }
|
---|
14289 | typeidx += VBDTCAST(dof_stridx_t)typesz;
|
---|
14290 | typestr += typesz;
|
---|
14291 | }
|
---|
14292 |
|
---|
14293 | typeidx = probe->dofpr_xargv;
|
---|
14294 | typestr = strtab + probe->dofpr_xargv;
|
---|
14295 | for (k = 0; k < probe->dofpr_xargc; k++) {
|
---|
14296 | if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
|
---|
14297 | dtrace_dof_error(dof, "bad "
|
---|
14298 | "native argument index");
|
---|
14299 | return (-1);
|
---|
14300 | }
|
---|
14301 |
|
---|
14302 | if (typeidx >= str_sec->dofs_size) {
|
---|
14303 | dtrace_dof_error(dof, "bad "
|
---|
14304 | "translated argument type");
|
---|
14305 | return (-1);
|
---|
14306 | }
|
---|
14307 |
|
---|
14308 | typesz = strlen(typestr) + 1;
|
---|
14309 | if (typesz > DTRACE_ARGTYPELEN) {
|
---|
14310 | dtrace_dof_error(dof, "translated argument "
|
---|
14311 | "type too long");
|
---|
14312 | return (-1);
|
---|
14313 | }
|
---|
14314 |
|
---|
14315 | typeidx += VBDTCAST(dof_stridx_t)typesz;
|
---|
14316 | typestr += typesz;
|
---|
14317 | }
|
---|
14318 | }
|
---|
14319 |
|
---|
14320 | return (0);
|
---|
14321 | }
|
---|
14322 |
|
---|
14323 | static int
|
---|
14324 | dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp)
|
---|
14325 | {
|
---|
14326 | dtrace_helpers_t *help;
|
---|
14327 | dtrace_vstate_t *vstate;
|
---|
14328 | dtrace_enabling_t *enab = NULL;
|
---|
14329 | int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
|
---|
14330 | uintptr_t daddr = (uintptr_t)dof;
|
---|
14331 |
|
---|
14332 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
14333 |
|
---|
14334 | if ((help = curproc->p_dtrace_helpers) == NULL)
|
---|
14335 | help = dtrace_helpers_create(curproc);
|
---|
14336 |
|
---|
14337 | vstate = &help->dthps_vstate;
|
---|
14338 |
|
---|
14339 | if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab,
|
---|
14340 | dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) {
|
---|
14341 | dtrace_dof_destroy(dof);
|
---|
14342 | return (rv);
|
---|
14343 | }
|
---|
14344 |
|
---|
14345 | /*
|
---|
14346 | * Look for helper providers and validate their descriptions.
|
---|
14347 | */
|
---|
14348 | if (dhp != NULL) {
|
---|
14349 | for (i = 0; i < VBDTCAST(int)dof->dofh_secnum; i++) {
|
---|
14350 | dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
|
---|
14351 | dof->dofh_secoff + i * dof->dofh_secsize);
|
---|
14352 |
|
---|
14353 | if (sec->dofs_type != DOF_SECT_PROVIDER)
|
---|
14354 | continue;
|
---|
14355 |
|
---|
14356 | if (dtrace_helper_provider_validate(dof, sec) != 0) {
|
---|
14357 | dtrace_enabling_destroy(enab);
|
---|
14358 | dtrace_dof_destroy(dof);
|
---|
14359 | return (-1);
|
---|
14360 | }
|
---|
14361 |
|
---|
14362 | nprovs++;
|
---|
14363 | }
|
---|
14364 | }
|
---|
14365 |
|
---|
14366 | /*
|
---|
14367 | * Now we need to walk through the ECB descriptions in the enabling.
|
---|
14368 | */
|
---|
14369 | for (i = 0; i < enab->dten_ndesc; i++) {
|
---|
14370 | dtrace_ecbdesc_t *ep = enab->dten_desc[i];
|
---|
14371 | dtrace_probedesc_t *desc = &ep->dted_probe;
|
---|
14372 |
|
---|
14373 | if (strcmp(desc->dtpd_provider, "dtrace") != 0)
|
---|
14374 | continue;
|
---|
14375 |
|
---|
14376 | if (strcmp(desc->dtpd_mod, "helper") != 0)
|
---|
14377 | continue;
|
---|
14378 |
|
---|
14379 | if (strcmp(desc->dtpd_func, "ustack") != 0)
|
---|
14380 | continue;
|
---|
14381 |
|
---|
14382 | if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK,
|
---|
14383 | ep)) != 0) {
|
---|
14384 | /*
|
---|
14385 | * Adding this helper action failed -- we are now going
|
---|
14386 | * to rip out the entire generation and return failure.
|
---|
14387 | */
|
---|
14388 | (void) dtrace_helper_destroygen(help->dthps_generation);
|
---|
14389 | dtrace_enabling_destroy(enab);
|
---|
14390 | dtrace_dof_destroy(dof);
|
---|
14391 | return (-1);
|
---|
14392 | }
|
---|
14393 |
|
---|
14394 | nhelpers++;
|
---|
14395 | }
|
---|
14396 |
|
---|
14397 | if (nhelpers < enab->dten_ndesc)
|
---|
14398 | dtrace_dof_error(dof, "unmatched helpers");
|
---|
14399 |
|
---|
14400 | gen = help->dthps_generation++;
|
---|
14401 | dtrace_enabling_destroy(enab);
|
---|
14402 |
|
---|
14403 | if (dhp != NULL && nprovs > 0) {
|
---|
14404 | dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
|
---|
14405 | if (dtrace_helper_provider_add(dhp, gen) == 0) {
|
---|
14406 | mutex_exit(&dtrace_lock);
|
---|
14407 | dtrace_helper_provider_register(curproc, help, dhp);
|
---|
14408 | mutex_enter(&dtrace_lock);
|
---|
14409 |
|
---|
14410 | destroy = 0;
|
---|
14411 | }
|
---|
14412 | }
|
---|
14413 |
|
---|
14414 | if (destroy)
|
---|
14415 | dtrace_dof_destroy(dof);
|
---|
14416 |
|
---|
14417 | return (gen);
|
---|
14418 | }
|
---|
14419 |
|
---|
14420 | static dtrace_helpers_t *
|
---|
14421 | dtrace_helpers_create(proc_t *p)
|
---|
14422 | {
|
---|
14423 | dtrace_helpers_t *help;
|
---|
14424 |
|
---|
14425 | ASSERT(MUTEX_HELD(&dtrace_lock));
|
---|
14426 | ASSERT(p->p_dtrace_helpers == NULL);
|
---|
14427 |
|
---|
14428 | help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
|
---|
14429 | help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
|
---|
14430 | DTRACE_NHELPER_ACTIONS, KM_SLEEP);
|
---|
14431 |
|
---|
14432 | p->p_dtrace_helpers = help;
|
---|
14433 | dtrace_helpers++;
|
---|
14434 |
|
---|
14435 | return (help);
|
---|
14436 | }
|
---|
14437 |
|
---|
14438 | static void
|
---|
14439 | dtrace_helpers_destroy(void)
|
---|
14440 | {
|
---|
14441 | dtrace_helpers_t *help;
|
---|
14442 | dtrace_vstate_t *vstate;
|
---|
14443 | proc_t *p = curproc;
|
---|
14444 | VBDTTYPE(uint_t, int) i;
|
---|
14445 |
|
---|
14446 | mutex_enter(&dtrace_lock);
|
---|
14447 |
|
---|
14448 | ASSERT(p->p_dtrace_helpers != NULL);
|
---|
14449 | ASSERT(dtrace_helpers > 0);
|
---|
14450 |
|
---|
14451 | help = p->p_dtrace_helpers;
|
---|
14452 | vstate = &help->dthps_vstate;
|
---|
14453 |
|
---|
14454 | /*
|
---|
14455 | * We're now going to lose the help from this process.
|
---|
14456 | */
|
---|
14457 | p->p_dtrace_helpers = NULL;
|
---|
14458 | dtrace_sync();
|
---|
14459 |
|
---|
14460 | /*
|
---|
14461 | * Destory the helper actions.
|
---|
14462 | */
|
---|
14463 | for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
|
---|
14464 | dtrace_helper_action_t *h, *next;
|
---|
14465 |
|
---|
14466 | for (h = help->dthps_actions[i]; h != NULL; h = next) {
|
---|
14467 | next = h->dtha_next;
|
---|
14468 | dtrace_helper_action_destroy(h, vstate);
|
---|
14469 | h = next;
|
---|
14470 | }
|
---|
14471 | }
|
---|
14472 |
|
---|
14473 | mutex_exit(&dtrace_lock);
|
---|
14474 |
|
---|
14475 | /*
|
---|
14476 | * Destroy the helper providers.
|
---|
14477 | */
|
---|
14478 | if (help->dthps_maxprovs > 0) {
|
---|
14479 | mutex_enter(&dtrace_meta_lock);
|
---|
14480 | if (dtrace_meta_pid != NULL) {
|
---|
14481 | ASSERT(dtrace_deferred_pid == NULL);
|
---|
14482 |
|
---|
14483 | for (i = 0; i < help->dthps_nprovs; i++) {
|
---|
14484 | dtrace_helper_provider_remove(
|
---|
14485 | &help->dthps_provs[i]->dthp_prov, p->p_pid);
|
---|
14486 | }
|
---|
14487 | } else {
|
---|
14488 | mutex_enter(&dtrace_lock);
|
---|
14489 | ASSERT(help->dthps_deferred == 0 ||
|
---|
14490 | help->dthps_next != NULL ||
|
---|
14491 | help->dthps_prev != NULL ||
|
---|
14492 | help == dtrace_deferred_pid);
|
---|
14493 |
|
---|
14494 | /*
|
---|
14495 | * Remove the helper from the deferred list.
|
---|
14496 | */
|
---|
14497 | if (help->dthps_next != NULL)
|
---|
14498 | help->dthps_next->dthps_prev = help->dthps_prev;
|
---|
14499 | if (help->dthps_prev != NULL)
|
---|
14500 | help->dthps_prev->dthps_next = help->dthps_next;
|
---|
14501 | if (dtrace_deferred_pid == help) {
|
---|
14502 | dtrace_deferred_pid = help->dthps_next;
|
---|
14503 | ASSERT(help->dthps_prev == NULL);
|
---|
14504 | }
|
---|
14505 |
|
---|
14506 | mutex_exit(&dtrace_lock);
|
---|
14507 | }
|
---|
14508 |
|
---|
14509 | mutex_exit(&dtrace_meta_lock);
|
---|
14510 |
|
---|
14511 | for (i = 0; i < help->dthps_nprovs; i++) {
|
---|
14512 | dtrace_helper_provider_destroy(help->dthps_provs[i]);
|
---|
14513 | }
|
---|
14514 |
|
---|
14515 | kmem_free(help->dthps_provs, help->dthps_maxprovs *
|
---|
14516 | sizeof (dtrace_helper_provider_t *));
|
---|
14517 | }
|
---|
14518 |
|
---|
14519 | mutex_enter(&dtrace_lock);
|
---|
14520 |
|
---|
14521 | dtrace_vstate_fini(&help->dthps_vstate);
|
---|
14522 | kmem_free(help->dthps_actions,
|
---|
14523 | sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
|
---|
14524 | kmem_free(help, sizeof (dtrace_helpers_t));
|
---|
14525 |
|
---|
14526 | --dtrace_helpers;
|
---|
14527 | mutex_exit(&dtrace_lock);
|
---|
14528 | }
|
---|
14529 |
|
---|
14530 | static void
|
---|
14531 | dtrace_helpers_duplicate(proc_t *from, proc_t *to)
|
---|
14532 | {
|
---|
14533 | dtrace_helpers_t *help, *newhelp;
|
---|
14534 | dtrace_helper_action_t *helper, *new, *last;
|
---|
14535 | dtrace_difo_t *dp;
|
---|
14536 | dtrace_vstate_t *vstate;
|
---|
14537 | int i, j, sz, hasprovs = 0;
|
---|
14538 |
|
---|
14539 | mutex_enter(&dtrace_lock);
|
---|
14540 | ASSERT(from->p_dtrace_helpers != NULL);
|
---|
14541 | ASSERT(dtrace_helpers > 0);
|
---|
14542 |
|
---|
14543 | help = from->p_dtrace_helpers;
|
---|
14544 | newhelp = dtrace_helpers_create(to);
|
---|
14545 | ASSERT(to->p_dtrace_helpers != NULL);
|
---|
14546 |
|
---|
14547 | newhelp->dthps_generation = help->dthps_generation;
|
---|
14548 | vstate = &newhelp->dthps_vstate;
|
---|
14549 |
|
---|
14550 | /*
|
---|
14551 | * Duplicate the helper actions.
|
---|
14552 | */
|
---|
14553 | for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
|
---|
14554 | if ((helper = help->dthps_actions[i]) == NULL)
|
---|
14555 | continue;
|
---|
14556 |
|
---|
14557 | for (last = NULL; helper != NULL; helper = helper->dtha_next) {
|
---|
14558 | new = kmem_zalloc(sizeof (dtrace_helper_action_t),
|
---|
14559 | KM_SLEEP);
|
---|
14560 | new->dtha_generation = helper->dtha_generation;
|
---|
14561 |
|
---|
14562 | if ((dp = helper->dtha_predicate) != NULL) {
|
---|
14563 | dp = dtrace_difo_duplicate(dp, vstate);
|
---|
14564 | new->dtha_predicate = dp;
|
---|
14565 | }
|
---|
14566 |
|
---|
14567 | new->dtha_nactions = helper->dtha_nactions;
|
---|
14568 | sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
|
---|
14569 | new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
|
---|
14570 |
|
---|
14571 | for (j = 0; j < new->dtha_nactions; j++) {
|
---|
14572 | dtrace_difo_t *dp2 = helper->dtha_actions[j];
|
---|
14573 |
|
---|
14574 | ASSERT(dp2 != NULL);
|
---|
14575 | dp2 = dtrace_difo_duplicate(dp2, vstate);
|
---|
14576 | new->dtha_actions[j] = dp2;
|
---|
14577 | }
|
---|
14578 |
|
---|
14579 | if (last != NULL) {
|
---|
14580 | last->dtha_next = new;
|
---|
14581 | } else {
|
---|
14582 | newhelp->dthps_actions[i] = new;
|
---|
14583 | }
|
---|
14584 |
|
---|
14585 | last = new;
|
---|
14586 | }
|
---|
14587 | }
|
---|
14588 |
|
---|
14589 | /*
|
---|
14590 | * Duplicate the helper providers and register them with the
|
---|
14591 | * DTrace framework.
|
---|
14592 | */
|
---|
14593 | if (help->dthps_nprovs > 0) {
|
---|
14594 | newhelp->dthps_nprovs = help->dthps_nprovs;
|
---|
14595 | newhelp->dthps_maxprovs = help->dthps_nprovs;
|
---|
14596 | newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
|
---|
14597 | sizeof (dtrace_helper_provider_t *), KM_SLEEP);
|
---|
14598 | for (i = 0; i < VBDTCAST(int)newhelp->dthps_nprovs; i++) {
|
---|
14599 | newhelp->dthps_provs[i] = help->dthps_provs[i];
|
---|
14600 | newhelp->dthps_provs[i]->dthp_ref++;
|
---|
14601 | }
|
---|
14602 |
|
---|
14603 | hasprovs = 1;
|
---|
14604 | }
|
---|
14605 |
|
---|
14606 | mutex_exit(&dtrace_lock);
|
---|
14607 |
|
---|
14608 | if (hasprovs)
|
---|
14609 | dtrace_helper_provider_register(to, newhelp, NULL);
|
---|
14610 | }
|
---|
14611 |
|
---|
14612 | /*
|
---|
14613 | * DTrace Hook Functions
|
---|
14614 | */
|
---|
14615 | static void
|
---|
14616 | dtrace_module_loaded(struct modctl *ctl)
|
---|
14617 | {
|
---|
14618 | dtrace_provider_t *prv;
|
---|
14619 |
|
---|
14620 | mutex_enter(&dtrace_provider_lock);
|
---|
14621 | mutex_enter(&mod_lock);
|
---|
14622 |
|
---|
14623 | ASSERT(ctl->mod_busy);
|
---|
14624 |
|
---|
14625 | /*
|
---|
14626 | * We're going to call each providers per-module provide operation
|
---|
14627 | * specifying only this module.
|
---|
14628 | */
|
---|
14629 | for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
|
---|
14630 | prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
|
---|
14631 |
|
---|
14632 | mutex_exit(&mod_lock);
|
---|
14633 | mutex_exit(&dtrace_provider_lock);
|
---|
14634 |
|
---|
14635 | /*
|
---|
14636 | * If we have any retained enablings, we need to match against them.
|
---|
14637 | * Enabling probes requires that cpu_lock be held, and we cannot hold
|
---|
14638 | * cpu_lock here -- it is legal for cpu_lock to be held when loading a
|
---|
14639 | * module. (In particular, this happens when loading scheduling
|
---|
14640 | * classes.) So if we have any retained enablings, we need to dispatch
|
---|
14641 | * our task queue to do the match for us.
|
---|
14642 | */
|
---|
14643 | mutex_enter(&dtrace_lock);
|
---|
14644 |
|
---|
14645 | if (dtrace_retained == NULL) {
|
---|
14646 | mutex_exit(&dtrace_lock);
|
---|
14647 | return;
|
---|
14648 | }
|
---|
14649 |
|
---|
14650 | (void) taskq_dispatch(dtrace_taskq,
|
---|
14651 | (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
|
---|
14652 |
|
---|
14653 | mutex_exit(&dtrace_lock);
|
---|
14654 |
|
---|
14655 | /*
|
---|
14656 | * And now, for a little heuristic sleaze: in general, we want to
|
---|
14657 | * match modules as soon as they load. However, we cannot guarantee
|
---|
14658 | * this, because it would lead us to the lock ordering violation
|
---|
14659 | * outlined above. The common case, of course, is that cpu_lock is
|
---|
14660 | * _not_ held -- so we delay here for a clock tick, hoping that that's
|
---|
14661 | * long enough for the task queue to do its work. If it's not, it's
|
---|
14662 | * not a serious problem -- it just means that the module that we
|
---|
14663 | * just loaded may not be immediately instrumentable.
|
---|
14664 | */
|
---|
14665 | delay(1);
|
---|
14666 | }
|
---|
14667 |
|
---|
14668 | static void
|
---|
14669 | dtrace_module_unloaded(struct modctl *ctl)
|
---|
14670 | {
|
---|
14671 | dtrace_probe_t template, *probe, *first, *next;
|
---|
14672 | dtrace_provider_t *prov;
|
---|
14673 |
|
---|
14674 | template.dtpr_mod = ctl->mod_modname;
|
---|
14675 |
|
---|
14676 | mutex_enter(&dtrace_provider_lock);
|
---|
14677 | mutex_enter(&mod_lock);
|
---|
14678 | mutex_enter(&dtrace_lock);
|
---|
14679 |
|
---|
14680 | if (dtrace_bymod == NULL) {
|
---|
14681 | /*
|
---|
14682 | * The DTrace module is loaded (obviously) but not attached;
|
---|
14683 | * we don't have any work to do.
|
---|
14684 | */
|
---|
14685 | mutex_exit(&dtrace_provider_lock);
|
---|
14686 | mutex_exit(&mod_lock);
|
---|
14687 | mutex_exit(&dtrace_lock);
|
---|
14688 | return;
|
---|
14689 | }
|
---|
14690 |
|
---|
14691 | for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
|
---|
14692 | probe != NULL; probe = probe->dtpr_nextmod) {
|
---|
14693 | if (probe->dtpr_ecb != NULL) {
|
---|
14694 | mutex_exit(&dtrace_provider_lock);
|
---|
14695 | mutex_exit(&mod_lock);
|
---|
14696 | mutex_exit(&dtrace_lock);
|
---|
14697 |
|
---|
14698 | /*
|
---|
14699 | * This shouldn't _actually_ be possible -- we're
|
---|
14700 | * unloading a module that has an enabled probe in it.
|
---|
14701 | * (It's normally up to the provider to make sure that
|
---|
14702 | * this can't happen.) However, because dtps_enable()
|
---|
14703 | * doesn't have a failure mode, there can be an
|
---|
14704 | * enable/unload race. Upshot: we don't want to
|
---|
14705 | * assert, but we're not going to disable the
|
---|
14706 | * probe, either.
|
---|
14707 | */
|
---|
14708 | if (dtrace_err_verbose) {
|
---|
14709 | cmn_err(CE_WARN, "unloaded module '%s' had "
|
---|
14710 | "enabled probes", ctl->mod_modname);
|
---|
14711 | }
|
---|
14712 |
|
---|
14713 | return;
|
---|
14714 | }
|
---|
14715 | }
|
---|
14716 |
|
---|
14717 | probe = first;
|
---|
14718 |
|
---|
14719 | for (first = NULL; probe != NULL; probe = next) {
|
---|
14720 | ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
|
---|
14721 |
|
---|
14722 | dtrace_probes[probe->dtpr_id - 1] = NULL;
|
---|
14723 |
|
---|
14724 | next = probe->dtpr_nextmod;
|
---|
14725 | dtrace_hash_remove(dtrace_bymod, probe);
|
---|
14726 | dtrace_hash_remove(dtrace_byfunc, probe);
|
---|
14727 | dtrace_hash_remove(dtrace_byname, probe);
|
---|
14728 |
|
---|
14729 | if (first == NULL) {
|
---|
14730 | first = probe;
|
---|
14731 | probe->dtpr_nextmod = NULL;
|
---|
14732 | } else {
|
---|
14733 | probe->dtpr_nextmod = first;
|
---|
14734 | first = probe;
|
---|
14735 | }
|
---|
14736 | }
|
---|
14737 |
|
---|
14738 | /*
|
---|
14739 | * We've removed all of the module's probes from the hash chains and
|
---|
14740 | * from the probe array. Now issue a dtrace_sync() to be sure that
|
---|
14741 | * everyone has cleared out from any probe array processing.
|
---|
14742 | */
|
---|
14743 | dtrace_sync();
|
---|
14744 |
|
---|
14745 | for (probe = first; probe != NULL; probe = first) {
|
---|
14746 | first = probe->dtpr_nextmod;
|
---|
14747 | prov = probe->dtpr_provider;
|
---|
14748 | prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
|
---|
14749 | probe->dtpr_arg);
|
---|
14750 | kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
|
---|
14751 | kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
|
---|
14752 | kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
|
---|
14753 | vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
|
---|
14754 | kmem_free(probe, sizeof (dtrace_probe_t));
|
---|
14755 | }
|
---|
14756 |
|
---|
14757 | mutex_exit(&dtrace_lock);
|
---|
14758 | mutex_exit(&mod_lock);
|
---|
14759 | mutex_exit(&dtrace_provider_lock);
|
---|
14760 | }
|
---|
14761 |
|
---|
14762 | VBDTSTATIC void
|
---|
14763 | dtrace_suspend(void)
|
---|
14764 | {
|
---|
14765 | dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
|
---|
14766 | }
|
---|
14767 |
|
---|
14768 | VBDTSTATIC void
|
---|
14769 | dtrace_resume(void)
|
---|
14770 | {
|
---|
14771 | dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
|
---|
14772 | }
|
---|
14773 |
|
---|
14774 | #endif /* !VBOX */
|
---|
14775 |
|
---|
14776 | #ifdef VBOX
|
---|
14777 | typedef enum {
|
---|
14778 | CPU_INVALID,
|
---|
14779 | CPU_CONFIG,
|
---|
14780 | CPU_UNCONFIG
|
---|
14781 | } cpu_setup_t;
|
---|
14782 | #endif
|
---|
14783 |
|
---|
14784 | #ifndef VBOX
|
---|
14785 |
|
---|
14786 | static int
|
---|
14787 | dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
|
---|
14788 | {
|
---|
14789 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
14790 | mutex_enter(&dtrace_lock);
|
---|
14791 |
|
---|
14792 | switch (what) {
|
---|
14793 | case CPU_CONFIG: {
|
---|
14794 | dtrace_state_t *state;
|
---|
14795 | dtrace_optval_t *opt, rs, c;
|
---|
14796 |
|
---|
14797 | /*
|
---|
14798 | * For now, we only allocate a new buffer for anonymous state.
|
---|
14799 | */
|
---|
14800 | if ((state = dtrace_anon.dta_state) == NULL)
|
---|
14801 | break;
|
---|
14802 |
|
---|
14803 | if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
|
---|
14804 | break;
|
---|
14805 |
|
---|
14806 | opt = state->dts_options;
|
---|
14807 | c = opt[DTRACEOPT_CPU];
|
---|
14808 |
|
---|
14809 | if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
|
---|
14810 | break;
|
---|
14811 |
|
---|
14812 | /*
|
---|
14813 | * Regardless of what the actual policy is, we're going to
|
---|
14814 | * temporarily set our resize policy to be manual. We're
|
---|
14815 | * also going to temporarily set our CPU option to denote
|
---|
14816 | * the newly configured CPU.
|
---|
14817 | */
|
---|
14818 | rs = opt[DTRACEOPT_BUFRESIZE];
|
---|
14819 | opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
|
---|
14820 | opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
|
---|
14821 |
|
---|
14822 | (void) dtrace_state_buffers(state);
|
---|
14823 |
|
---|
14824 | opt[DTRACEOPT_BUFRESIZE] = rs;
|
---|
14825 | opt[DTRACEOPT_CPU] = c;
|
---|
14826 |
|
---|
14827 | break;
|
---|
14828 | }
|
---|
14829 |
|
---|
14830 | case CPU_UNCONFIG:
|
---|
14831 | /*
|
---|
14832 | * We don't free the buffer in the CPU_UNCONFIG case. (The
|
---|
14833 | * buffer will be freed when the consumer exits.)
|
---|
14834 | */
|
---|
14835 | break;
|
---|
14836 |
|
---|
14837 | default:
|
---|
14838 | break;
|
---|
14839 | }
|
---|
14840 |
|
---|
14841 | mutex_exit(&dtrace_lock);
|
---|
14842 | return (0);
|
---|
14843 | }
|
---|
14844 |
|
---|
14845 | static void
|
---|
14846 | dtrace_cpu_setup_initial(processorid_t cpu)
|
---|
14847 | {
|
---|
14848 | (void) dtrace_cpu_setup(CPU_CONFIG, cpu);
|
---|
14849 | }
|
---|
14850 |
|
---|
14851 | #endif /* !VBOX */
|
---|
14852 |
|
---|
14853 | static void
|
---|
14854 | dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
|
---|
14855 | {
|
---|
14856 | if (dtrace_toxranges >= dtrace_toxranges_max) {
|
---|
14857 | int osize, nsize;
|
---|
14858 | dtrace_toxrange_t *range;
|
---|
14859 |
|
---|
14860 | osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
|
---|
14861 |
|
---|
14862 | if (osize == 0) {
|
---|
14863 | ASSERT(dtrace_toxrange == NULL);
|
---|
14864 | ASSERT(dtrace_toxranges_max == 0);
|
---|
14865 | dtrace_toxranges_max = 1;
|
---|
14866 | } else {
|
---|
14867 | dtrace_toxranges_max <<= 1;
|
---|
14868 | }
|
---|
14869 |
|
---|
14870 | nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
|
---|
14871 | range = kmem_zalloc(nsize, KM_SLEEP);
|
---|
14872 |
|
---|
14873 | if (dtrace_toxrange != NULL) {
|
---|
14874 | ASSERT(osize != 0);
|
---|
14875 | bcopy(dtrace_toxrange, range, osize);
|
---|
14876 | kmem_free(dtrace_toxrange, osize);
|
---|
14877 | }
|
---|
14878 |
|
---|
14879 | dtrace_toxrange = range;
|
---|
14880 | }
|
---|
14881 |
|
---|
14882 | ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL);
|
---|
14883 | ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL);
|
---|
14884 |
|
---|
14885 | dtrace_toxrange[dtrace_toxranges].dtt_base = base;
|
---|
14886 | dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
|
---|
14887 | dtrace_toxranges++;
|
---|
14888 | }
|
---|
14889 |
|
---|
14890 | /*
|
---|
14891 | * DTrace Driver Cookbook Functions
|
---|
14892 | */
|
---|
14893 | #ifdef VBOX
|
---|
14894 | int dtrace_attach(void)
|
---|
14895 | #else
|
---|
14896 | /*ARGSUSED*/
|
---|
14897 | static int
|
---|
14898 | dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
|
---|
14899 | #endif
|
---|
14900 | {
|
---|
14901 | dtrace_provider_id_t id;
|
---|
14902 | dtrace_state_t *state = NULL;
|
---|
14903 | dtrace_enabling_t *enab;
|
---|
14904 |
|
---|
14905 | #ifdef VBOX
|
---|
14906 | if ( VBoxDtMutexInit(&dtrace_lock)
|
---|
14907 | || VBoxDtMutexInit(&dtrace_provider_lock)
|
---|
14908 | || VBoxDtMutexInit(&dtrace_meta_lock)
|
---|
14909 | # ifdef DEBUG
|
---|
14910 | || VBoxDtMutexInit(&dtrace_errlock)
|
---|
14911 | # endif
|
---|
14912 | )
|
---|
14913 | return (DDI_FAILURE);
|
---|
14914 | #endif
|
---|
14915 |
|
---|
14916 | mutex_enter(&cpu_lock);
|
---|
14917 | mutex_enter(&dtrace_provider_lock);
|
---|
14918 | mutex_enter(&dtrace_lock);
|
---|
14919 |
|
---|
14920 | #ifndef VBOX
|
---|
14921 | if (ddi_soft_state_init(&dtrace_softstate,
|
---|
14922 | sizeof (dtrace_state_t), 0) != 0) {
|
---|
14923 | cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state");
|
---|
14924 | mutex_exit(&cpu_lock);
|
---|
14925 | mutex_exit(&dtrace_provider_lock);
|
---|
14926 | mutex_exit(&dtrace_lock);
|
---|
14927 | return (DDI_FAILURE);
|
---|
14928 | }
|
---|
14929 |
|
---|
14930 | if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR,
|
---|
14931 | DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE ||
|
---|
14932 | ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR,
|
---|
14933 | DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) {
|
---|
14934 | cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes");
|
---|
14935 | ddi_remove_minor_node(devi, NULL);
|
---|
14936 | ddi_soft_state_fini(&dtrace_softstate);
|
---|
14937 | mutex_exit(&cpu_lock);
|
---|
14938 | mutex_exit(&dtrace_provider_lock);
|
---|
14939 | mutex_exit(&dtrace_lock);
|
---|
14940 | return (DDI_FAILURE);
|
---|
14941 | }
|
---|
14942 |
|
---|
14943 | ddi_report_dev(devi);
|
---|
14944 | dtrace_devi = devi;
|
---|
14945 |
|
---|
14946 | dtrace_modload = dtrace_module_loaded;
|
---|
14947 | dtrace_modunload = dtrace_module_unloaded;
|
---|
14948 | dtrace_cpu_init = dtrace_cpu_setup_initial;
|
---|
14949 | dtrace_helpers_cleanup = dtrace_helpers_destroy;
|
---|
14950 | dtrace_helpers_fork = dtrace_helpers_duplicate;
|
---|
14951 | dtrace_cpustart_init = dtrace_suspend;
|
---|
14952 | dtrace_cpustart_fini = dtrace_resume;
|
---|
14953 | dtrace_debugger_init = dtrace_suspend;
|
---|
14954 | dtrace_debugger_fini = dtrace_resume;
|
---|
14955 |
|
---|
14956 | register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
|
---|
14957 | #else
|
---|
14958 | /** @todo some of these hooks needs checking out! */
|
---|
14959 | #endif
|
---|
14960 |
|
---|
14961 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
14962 |
|
---|
14963 | #ifndef VBOX /* Reduce the area a bit just to be sure our vmem fake doesn't blow up. */
|
---|
14964 | dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
|
---|
14965 | NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
|
---|
14966 | #else
|
---|
14967 | dtrace_arena = vmem_create("dtrace", (void *)(uintptr_t)1, UINT32_MAX - 16, 1,
|
---|
14968 | NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
|
---|
14969 | #endif
|
---|
14970 | #ifndef VBOX
|
---|
14971 | dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE,
|
---|
14972 | UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0,
|
---|
14973 | VM_SLEEP | VMC_IDENTIFIER);
|
---|
14974 | dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
|
---|
14975 | 1, INT_MAX, 0);
|
---|
14976 | #endif
|
---|
14977 |
|
---|
14978 | dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
|
---|
14979 | sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN,
|
---|
14980 | NULL, NULL, NULL, NULL, NULL, 0);
|
---|
14981 |
|
---|
14982 | ASSERT(MUTEX_HELD(&cpu_lock));
|
---|
14983 | dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
|
---|
14984 | offsetof(dtrace_probe_t, dtpr_nextmod),
|
---|
14985 | offsetof(dtrace_probe_t, dtpr_prevmod));
|
---|
14986 |
|
---|
14987 | dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
|
---|
14988 | offsetof(dtrace_probe_t, dtpr_nextfunc),
|
---|
14989 | offsetof(dtrace_probe_t, dtpr_prevfunc));
|
---|
14990 |
|
---|
14991 | dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
|
---|
14992 | offsetof(dtrace_probe_t, dtpr_nextname),
|
---|
14993 | offsetof(dtrace_probe_t, dtpr_prevname));
|
---|
14994 |
|
---|
14995 | if (dtrace_retain_max < 1) {
|
---|
14996 | cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
|
---|
14997 | "setting to 1", dtrace_retain_max);
|
---|
14998 | dtrace_retain_max = 1;
|
---|
14999 | }
|
---|
15000 |
|
---|
15001 | /*
|
---|
15002 | * Now discover our toxic ranges.
|
---|
15003 | */
|
---|
15004 | dtrace_toxic_ranges(dtrace_toxrange_add);
|
---|
15005 |
|
---|
15006 | /*
|
---|
15007 | * Before we register ourselves as a provider to our own framework,
|
---|
15008 | * we would like to assert that dtrace_provider is NULL -- but that's
|
---|
15009 | * not true if we were loaded as a dependency of a DTrace provider.
|
---|
15010 | * Once we've registered, we can assert that dtrace_provider is our
|
---|
15011 | * pseudo provider.
|
---|
15012 | */
|
---|
15013 | (void) dtrace_register("dtrace", &dtrace_provider_attr,
|
---|
15014 | DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
|
---|
15015 |
|
---|
15016 | ASSERT(dtrace_provider != NULL);
|
---|
15017 | ASSERT((dtrace_provider_id_t)dtrace_provider == id);
|
---|
15018 |
|
---|
15019 | dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
|
---|
15020 | dtrace_provider, NULL, NULL, "BEGIN", 0, NULL);
|
---|
15021 | dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
|
---|
15022 | dtrace_provider, NULL, NULL, "END", 0, NULL);
|
---|
15023 | dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
|
---|
15024 | dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
|
---|
15025 |
|
---|
15026 | #ifndef VBOX
|
---|
15027 | dtrace_anon_property();
|
---|
15028 | #endif
|
---|
15029 | mutex_exit(&cpu_lock);
|
---|
15030 |
|
---|
15031 | /*
|
---|
15032 | * If DTrace helper tracing is enabled, we need to allocate the
|
---|
15033 | * trace buffer and initialize the values.
|
---|
15034 | */
|
---|
15035 | if (dtrace_helptrace_enabled) {
|
---|
15036 | ASSERT(dtrace_helptrace_buffer == NULL);
|
---|
15037 | dtrace_helptrace_buffer =
|
---|
15038 | kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
|
---|
15039 | dtrace_helptrace_next = 0;
|
---|
15040 | }
|
---|
15041 |
|
---|
15042 | /*
|
---|
15043 | * If there are already providers, we must ask them to provide their
|
---|
15044 | * probes, and then match any anonymous enabling against them. Note
|
---|
15045 | * that there should be no other retained enablings at this time:
|
---|
15046 | * the only retained enablings at this time should be the anonymous
|
---|
15047 | * enabling.
|
---|
15048 | */
|
---|
15049 | if (dtrace_anon.dta_enabling != NULL) {
|
---|
15050 | ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
|
---|
15051 |
|
---|
15052 | dtrace_enabling_provide(NULL);
|
---|
15053 | state = dtrace_anon.dta_state;
|
---|
15054 |
|
---|
15055 | /*
|
---|
15056 | * We couldn't hold cpu_lock across the above call to
|
---|
15057 | * dtrace_enabling_provide(), but we must hold it to actually
|
---|
15058 | * enable the probes. We have to drop all of our locks, pick
|
---|
15059 | * up cpu_lock, and regain our locks before matching the
|
---|
15060 | * retained anonymous enabling.
|
---|
15061 | */
|
---|
15062 | mutex_exit(&dtrace_lock);
|
---|
15063 | mutex_exit(&dtrace_provider_lock);
|
---|
15064 |
|
---|
15065 | mutex_enter(&cpu_lock);
|
---|
15066 | mutex_enter(&dtrace_provider_lock);
|
---|
15067 | mutex_enter(&dtrace_lock);
|
---|
15068 |
|
---|
15069 | if ((enab = dtrace_anon.dta_enabling) != NULL)
|
---|
15070 | (void) dtrace_enabling_match(enab, NULL);
|
---|
15071 |
|
---|
15072 | mutex_exit(&cpu_lock);
|
---|
15073 | }
|
---|
15074 |
|
---|
15075 | mutex_exit(&dtrace_lock);
|
---|
15076 | mutex_exit(&dtrace_provider_lock);
|
---|
15077 |
|
---|
15078 | if (state != NULL) {
|
---|
15079 | /*
|
---|
15080 | * If we created any anonymous state, set it going now.
|
---|
15081 | */
|
---|
15082 | (void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
|
---|
15083 | }
|
---|
15084 |
|
---|
15085 | return (DDI_SUCCESS);
|
---|
15086 | }
|
---|
15087 |
|
---|
15088 | #ifdef VBOX
|
---|
15089 | int dtrace_open(dtrace_state_t **ppState, cred_t *cred_p)
|
---|
15090 | #else
|
---|
15091 | /*ARGSUSED*/
|
---|
15092 | static int
|
---|
15093 | dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
|
---|
15094 | #endif
|
---|
15095 | {
|
---|
15096 | dtrace_state_t *state;
|
---|
15097 | uint32_t priv;
|
---|
15098 | uid_t uid;
|
---|
15099 | zoneid_t zoneid;
|
---|
15100 |
|
---|
15101 | #ifndef VBOX
|
---|
15102 | if (getminor(*devp) == DTRACEMNRN_HELPER)
|
---|
15103 | return (0);
|
---|
15104 |
|
---|
15105 | /*
|
---|
15106 | * If this wasn't an open with the "helper" minor, then it must be
|
---|
15107 | * the "dtrace" minor.
|
---|
15108 | */
|
---|
15109 | if (getminor(*devp) != DTRACEMNRN_DTRACE)
|
---|
15110 | return (ENXIO);
|
---|
15111 | #endif /* !VBOX */
|
---|
15112 |
|
---|
15113 | /*
|
---|
15114 | * If no DTRACE_PRIV_* bits are set in the credential, then the
|
---|
15115 | * caller lacks sufficient permission to do anything with DTrace.
|
---|
15116 | */
|
---|
15117 | dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
|
---|
15118 | if (priv == DTRACE_PRIV_NONE)
|
---|
15119 | return (EACCES);
|
---|
15120 |
|
---|
15121 | /*
|
---|
15122 | * Ask all providers to provide all their probes.
|
---|
15123 | */
|
---|
15124 | mutex_enter(&dtrace_provider_lock);
|
---|
15125 | dtrace_probe_provide(NULL, NULL);
|
---|
15126 | mutex_exit(&dtrace_provider_lock);
|
---|
15127 |
|
---|
15128 | mutex_enter(&cpu_lock);
|
---|
15129 | mutex_enter(&dtrace_lock);
|
---|
15130 | dtrace_opens++;
|
---|
15131 | dtrace_membar_producer();
|
---|
15132 |
|
---|
15133 | #ifndef VBOX
|
---|
15134 | /*
|
---|
15135 | * If the kernel debugger is active (that is, if the kernel debugger
|
---|
15136 | * modified text in some way), we won't allow the open.
|
---|
15137 | */
|
---|
15138 | if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
|
---|
15139 | dtrace_opens--;
|
---|
15140 | mutex_exit(&cpu_lock);
|
---|
15141 | mutex_exit(&dtrace_lock);
|
---|
15142 | return (EBUSY);
|
---|
15143 | }
|
---|
15144 | #endif
|
---|
15145 |
|
---|
15146 | #ifndef VBOX
|
---|
15147 | state = dtrace_state_create(devp, cred_p);
|
---|
15148 | #else
|
---|
15149 | state = dtrace_state_create(cred_p);
|
---|
15150 | #endif
|
---|
15151 | mutex_exit(&cpu_lock);
|
---|
15152 |
|
---|
15153 | if (state == NULL) {
|
---|
15154 | #ifndef VBOX
|
---|
15155 | if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
|
---|
15156 | (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
|
---|
15157 | #else
|
---|
15158 | dtrace_opens--;
|
---|
15159 | #endif
|
---|
15160 | mutex_exit(&dtrace_lock);
|
---|
15161 | return (EAGAIN);
|
---|
15162 | }
|
---|
15163 |
|
---|
15164 | mutex_exit(&dtrace_lock);
|
---|
15165 |
|
---|
15166 | #ifdef VBOX
|
---|
15167 | *ppState = state;
|
---|
15168 | #endif
|
---|
15169 | return (0);
|
---|
15170 | }
|
---|
15171 |
|
---|
15172 | #ifdef VBOX
|
---|
15173 | int dtrace_close(dtrace_state_t *state)
|
---|
15174 | #else
|
---|
15175 | /*ARGSUSED*/
|
---|
15176 | static int
|
---|
15177 | dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
|
---|
15178 | #endif
|
---|
15179 | {
|
---|
15180 | #ifndef VBOX
|
---|
15181 | minor_t minor = getminor(dev);
|
---|
15182 | dtrace_state_t *state;
|
---|
15183 |
|
---|
15184 | if (minor == DTRACEMNRN_HELPER)
|
---|
15185 | return (0);
|
---|
15186 |
|
---|
15187 | state = ddi_get_soft_state(dtrace_softstate, minor);
|
---|
15188 | #endif
|
---|
15189 |
|
---|
15190 | mutex_enter(&cpu_lock);
|
---|
15191 | mutex_enter(&dtrace_lock);
|
---|
15192 |
|
---|
15193 | if (state->dts_anon) {
|
---|
15194 | /*
|
---|
15195 | * There is anonymous state. Destroy that first.
|
---|
15196 | */
|
---|
15197 | ASSERT(dtrace_anon.dta_state == NULL);
|
---|
15198 | dtrace_state_destroy(state->dts_anon);
|
---|
15199 | }
|
---|
15200 |
|
---|
15201 | dtrace_state_destroy(state);
|
---|
15202 | ASSERT(dtrace_opens > 0);
|
---|
15203 |
|
---|
15204 | #ifndef VBOX
|
---|
15205 | /*
|
---|
15206 | * Only relinquish control of the kernel debugger interface when there
|
---|
15207 | * are no consumers and no anonymous enablings.
|
---|
15208 | */
|
---|
15209 | if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
|
---|
15210 | (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
|
---|
15211 | #else
|
---|
15212 | dtrace_opens--;
|
---|
15213 | #endif
|
---|
15214 |
|
---|
15215 | mutex_exit(&dtrace_lock);
|
---|
15216 | mutex_exit(&cpu_lock);
|
---|
15217 |
|
---|
15218 | return (0);
|
---|
15219 | }
|
---|
15220 |
|
---|
15221 | #ifndef VBOX
|
---|
15222 | /*ARGSUSED*/
|
---|
15223 | static int
|
---|
15224 | dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv)
|
---|
15225 | {
|
---|
15226 | int rval;
|
---|
15227 | dof_helper_t help, *dhp = NULL;
|
---|
15228 |
|
---|
15229 | switch (cmd) {
|
---|
15230 | case DTRACEHIOC_ADDDOF:
|
---|
15231 | if (copyin((void *)arg, &help, sizeof (help)) != 0) {
|
---|
15232 | dtrace_dof_error(NULL, "failed to copyin DOF helper");
|
---|
15233 | return (EFAULT);
|
---|
15234 | }
|
---|
15235 |
|
---|
15236 | dhp = &help;
|
---|
15237 | arg = (intptr_t)help.dofhp_dof;
|
---|
15238 | RT_FALL_THRU();
|
---|
15239 |
|
---|
15240 | case DTRACEHIOC_ADD: {
|
---|
15241 | dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval);
|
---|
15242 |
|
---|
15243 | if (dof == NULL)
|
---|
15244 | return (rval);
|
---|
15245 |
|
---|
15246 | mutex_enter(&dtrace_lock);
|
---|
15247 |
|
---|
15248 | /*
|
---|
15249 | * dtrace_helper_slurp() takes responsibility for the dof --
|
---|
15250 | * it may free it now or it may save it and free it later.
|
---|
15251 | */
|
---|
15252 | if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) {
|
---|
15253 | *rv = rval;
|
---|
15254 | rval = 0;
|
---|
15255 | } else {
|
---|
15256 | rval = EINVAL;
|
---|
15257 | }
|
---|
15258 |
|
---|
15259 | mutex_exit(&dtrace_lock);
|
---|
15260 | return (rval);
|
---|
15261 | }
|
---|
15262 |
|
---|
15263 | case DTRACEHIOC_REMOVE: {
|
---|
15264 | mutex_enter(&dtrace_lock);
|
---|
15265 | rval = dtrace_helper_destroygen(arg);
|
---|
15266 | mutex_exit(&dtrace_lock);
|
---|
15267 |
|
---|
15268 | return (rval);
|
---|
15269 | }
|
---|
15270 |
|
---|
15271 | default:
|
---|
15272 | break;
|
---|
15273 | }
|
---|
15274 |
|
---|
15275 | return (ENOTTY);
|
---|
15276 | }
|
---|
15277 | #endif /* !VBOX */
|
---|
15278 |
|
---|
15279 | #ifdef VBOX
|
---|
15280 | int dtrace_ioctl(dtrace_state_t *state, int cmd, intptr_t arg, int32_t *rv)
|
---|
15281 | #else
|
---|
15282 | /*ARGSUSED*/
|
---|
15283 | static int
|
---|
15284 | dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
|
---|
15285 | #endif
|
---|
15286 | {
|
---|
15287 | #ifndef VBOX
|
---|
15288 | minor_t minor = getminor(dev);
|
---|
15289 | dtrace_state_t *state;
|
---|
15290 | #endif
|
---|
15291 | int rval;
|
---|
15292 |
|
---|
15293 | #ifndef VBOX
|
---|
15294 | if (minor == DTRACEMNRN_HELPER)
|
---|
15295 | return (dtrace_ioctl_helper(cmd, arg, rv));
|
---|
15296 |
|
---|
15297 | state = ddi_get_soft_state(dtrace_softstate, minor);
|
---|
15298 | #endif
|
---|
15299 |
|
---|
15300 | if (state->dts_anon) {
|
---|
15301 | ASSERT(dtrace_anon.dta_state == NULL);
|
---|
15302 | state = state->dts_anon;
|
---|
15303 | }
|
---|
15304 |
|
---|
15305 | switch (cmd) {
|
---|
15306 | case DTRACEIOC_PROVIDER: {
|
---|
15307 | dtrace_providerdesc_t pvd;
|
---|
15308 | dtrace_provider_t *pvp;
|
---|
15309 |
|
---|
15310 | if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0)
|
---|
15311 | return (EFAULT);
|
---|
15312 |
|
---|
15313 | pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
|
---|
15314 | mutex_enter(&dtrace_provider_lock);
|
---|
15315 |
|
---|
15316 | for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
|
---|
15317 | if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0)
|
---|
15318 | break;
|
---|
15319 | }
|
---|
15320 |
|
---|
15321 | mutex_exit(&dtrace_provider_lock);
|
---|
15322 |
|
---|
15323 | if (pvp == NULL)
|
---|
15324 | return (ESRCH);
|
---|
15325 |
|
---|
15326 | bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
|
---|
15327 | bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
|
---|
15328 | if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0)
|
---|
15329 | return (EFAULT);
|
---|
15330 |
|
---|
15331 | return (0);
|
---|
15332 | }
|
---|
15333 |
|
---|
15334 | case DTRACEIOC_EPROBE: {
|
---|
15335 | dtrace_eprobedesc_t epdesc;
|
---|
15336 | dtrace_ecb_t *ecb;
|
---|
15337 | dtrace_action_t *act;
|
---|
15338 | void *buf;
|
---|
15339 | size_t size;
|
---|
15340 | uintptr_t dest;
|
---|
15341 | int nrecs;
|
---|
15342 |
|
---|
15343 | if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0)
|
---|
15344 | return (EFAULT);
|
---|
15345 |
|
---|
15346 | mutex_enter(&dtrace_lock);
|
---|
15347 |
|
---|
15348 | if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
|
---|
15349 | mutex_exit(&dtrace_lock);
|
---|
15350 | return (EINVAL);
|
---|
15351 | }
|
---|
15352 |
|
---|
15353 | if (ecb->dte_probe == NULL) {
|
---|
15354 | mutex_exit(&dtrace_lock);
|
---|
15355 | return (EINVAL);
|
---|
15356 | }
|
---|
15357 |
|
---|
15358 | epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
|
---|
15359 | epdesc.dtepd_uarg = ecb->dte_uarg;
|
---|
15360 | epdesc.dtepd_size = VBDTCAST(uint32_t)ecb->dte_size;
|
---|
15361 |
|
---|
15362 | nrecs = epdesc.dtepd_nrecs;
|
---|
15363 | epdesc.dtepd_nrecs = 0;
|
---|
15364 | for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
|
---|
15365 | if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
|
---|
15366 | continue;
|
---|
15367 |
|
---|
15368 | epdesc.dtepd_nrecs++;
|
---|
15369 | }
|
---|
15370 |
|
---|
15371 | /*
|
---|
15372 | * Now that we have the size, we need to allocate a temporary
|
---|
15373 | * buffer in which to store the complete description. We need
|
---|
15374 | * the temporary buffer to be able to drop dtrace_lock()
|
---|
15375 | * across the copyout(), below.
|
---|
15376 | */
|
---|
15377 | size = sizeof (dtrace_eprobedesc_t) +
|
---|
15378 | (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
|
---|
15379 |
|
---|
15380 | buf = kmem_alloc(size, KM_SLEEP);
|
---|
15381 | dest = (uintptr_t)buf;
|
---|
15382 |
|
---|
15383 | bcopy(&epdesc, (void *)dest, sizeof (epdesc));
|
---|
15384 | dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
|
---|
15385 |
|
---|
15386 | for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
|
---|
15387 | if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
|
---|
15388 | continue;
|
---|
15389 |
|
---|
15390 | if (nrecs-- == 0)
|
---|
15391 | break;
|
---|
15392 |
|
---|
15393 | bcopy(&act->dta_rec, (void *)dest,
|
---|
15394 | sizeof (dtrace_recdesc_t));
|
---|
15395 | dest += sizeof (dtrace_recdesc_t);
|
---|
15396 | }
|
---|
15397 |
|
---|
15398 | mutex_exit(&dtrace_lock);
|
---|
15399 |
|
---|
15400 | if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
|
---|
15401 | kmem_free(buf, size);
|
---|
15402 | return (EFAULT);
|
---|
15403 | }
|
---|
15404 |
|
---|
15405 | kmem_free(buf, size);
|
---|
15406 | return (0);
|
---|
15407 | }
|
---|
15408 |
|
---|
15409 | case DTRACEIOC_AGGDESC: {
|
---|
15410 | dtrace_aggdesc_t aggdesc;
|
---|
15411 | dtrace_action_t *act;
|
---|
15412 | dtrace_aggregation_t *agg;
|
---|
15413 | int nrecs;
|
---|
15414 | uint32_t offs;
|
---|
15415 | dtrace_recdesc_t *lrec;
|
---|
15416 | void *buf;
|
---|
15417 | size_t size;
|
---|
15418 | uintptr_t dest;
|
---|
15419 |
|
---|
15420 | if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0)
|
---|
15421 | return (EFAULT);
|
---|
15422 |
|
---|
15423 | mutex_enter(&dtrace_lock);
|
---|
15424 |
|
---|
15425 | if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
|
---|
15426 | mutex_exit(&dtrace_lock);
|
---|
15427 | return (EINVAL);
|
---|
15428 | }
|
---|
15429 |
|
---|
15430 | aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
|
---|
15431 |
|
---|
15432 | nrecs = aggdesc.dtagd_nrecs;
|
---|
15433 | aggdesc.dtagd_nrecs = 0;
|
---|
15434 |
|
---|
15435 | offs = agg->dtag_base;
|
---|
15436 | lrec = &agg->dtag_action.dta_rec;
|
---|
15437 | aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
|
---|
15438 |
|
---|
15439 | for (act = agg->dtag_first; ; act = act->dta_next) {
|
---|
15440 | ASSERT(act->dta_intuple ||
|
---|
15441 | DTRACEACT_ISAGG(act->dta_kind));
|
---|
15442 |
|
---|
15443 | /*
|
---|
15444 | * If this action has a record size of zero, it
|
---|
15445 | * denotes an argument to the aggregating action.
|
---|
15446 | * Because the presence of this record doesn't (or
|
---|
15447 | * shouldn't) affect the way the data is interpreted,
|
---|
15448 | * we don't copy it out to save user-level the
|
---|
15449 | * confusion of dealing with a zero-length record.
|
---|
15450 | */
|
---|
15451 | if (act->dta_rec.dtrd_size == 0) {
|
---|
15452 | ASSERT(agg->dtag_hasarg);
|
---|
15453 | continue;
|
---|
15454 | }
|
---|
15455 |
|
---|
15456 | aggdesc.dtagd_nrecs++;
|
---|
15457 |
|
---|
15458 | if (act == &agg->dtag_action)
|
---|
15459 | break;
|
---|
15460 | }
|
---|
15461 |
|
---|
15462 | /*
|
---|
15463 | * Now that we have the size, we need to allocate a temporary
|
---|
15464 | * buffer in which to store the complete description. We need
|
---|
15465 | * the temporary buffer to be able to drop dtrace_lock()
|
---|
15466 | * across the copyout(), below.
|
---|
15467 | */
|
---|
15468 | size = sizeof (dtrace_aggdesc_t) +
|
---|
15469 | (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
|
---|
15470 |
|
---|
15471 | buf = kmem_alloc(size, KM_SLEEP);
|
---|
15472 | dest = (uintptr_t)buf;
|
---|
15473 |
|
---|
15474 | bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
|
---|
15475 | dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
|
---|
15476 |
|
---|
15477 | for (act = agg->dtag_first; ; act = act->dta_next) {
|
---|
15478 | dtrace_recdesc_t rec = act->dta_rec;
|
---|
15479 |
|
---|
15480 | /*
|
---|
15481 | * See the comment in the above loop for why we pass
|
---|
15482 | * over zero-length records.
|
---|
15483 | */
|
---|
15484 | if (rec.dtrd_size == 0) {
|
---|
15485 | ASSERT(agg->dtag_hasarg);
|
---|
15486 | continue;
|
---|
15487 | }
|
---|
15488 |
|
---|
15489 | if (nrecs-- == 0)
|
---|
15490 | break;
|
---|
15491 |
|
---|
15492 | rec.dtrd_offset -= offs;
|
---|
15493 | bcopy(&rec, (void *)dest, sizeof (rec));
|
---|
15494 | dest += sizeof (dtrace_recdesc_t);
|
---|
15495 |
|
---|
15496 | if (act == &agg->dtag_action)
|
---|
15497 | break;
|
---|
15498 | }
|
---|
15499 |
|
---|
15500 | mutex_exit(&dtrace_lock);
|
---|
15501 |
|
---|
15502 | if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) {
|
---|
15503 | kmem_free(buf, size);
|
---|
15504 | return (EFAULT);
|
---|
15505 | }
|
---|
15506 |
|
---|
15507 | kmem_free(buf, size);
|
---|
15508 | return (0);
|
---|
15509 | }
|
---|
15510 |
|
---|
15511 | case DTRACEIOC_ENABLE: {
|
---|
15512 | dof_hdr_t *dof;
|
---|
15513 | dtrace_enabling_t *enab = NULL;
|
---|
15514 | dtrace_vstate_t *vstate;
|
---|
15515 | int err = 0;
|
---|
15516 | #ifdef VBOX
|
---|
15517 | cred_t *cr = CRED();
|
---|
15518 | #endif
|
---|
15519 |
|
---|
15520 | *rv = 0;
|
---|
15521 |
|
---|
15522 | /*
|
---|
15523 | * If a NULL argument has been passed, we take this as our
|
---|
15524 | * cue to reevaluate our enablings.
|
---|
15525 | */
|
---|
15526 | if (arg == NULL) {
|
---|
15527 | dtrace_enabling_matchall();
|
---|
15528 |
|
---|
15529 | return (0);
|
---|
15530 | }
|
---|
15531 |
|
---|
15532 | if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
|
---|
15533 | return (rval);
|
---|
15534 |
|
---|
15535 | mutex_enter(&cpu_lock);
|
---|
15536 | mutex_enter(&dtrace_lock);
|
---|
15537 | vstate = &state->dts_vstate;
|
---|
15538 |
|
---|
15539 | if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
|
---|
15540 | mutex_exit(&dtrace_lock);
|
---|
15541 | mutex_exit(&cpu_lock);
|
---|
15542 | dtrace_dof_destroy(dof);
|
---|
15543 | return (EBUSY);
|
---|
15544 | }
|
---|
15545 |
|
---|
15546 | if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
|
---|
15547 | mutex_exit(&dtrace_lock);
|
---|
15548 | mutex_exit(&cpu_lock);
|
---|
15549 | dtrace_dof_destroy(dof);
|
---|
15550 | return (EINVAL);
|
---|
15551 | }
|
---|
15552 |
|
---|
15553 | if ((rval = dtrace_dof_options(dof, state)) != 0) {
|
---|
15554 | dtrace_enabling_destroy(enab);
|
---|
15555 | mutex_exit(&dtrace_lock);
|
---|
15556 | mutex_exit(&cpu_lock);
|
---|
15557 | dtrace_dof_destroy(dof);
|
---|
15558 | return (rval);
|
---|
15559 | }
|
---|
15560 |
|
---|
15561 | if ((err = dtrace_enabling_match(enab, rv)) == 0) {
|
---|
15562 | err = dtrace_enabling_retain(enab);
|
---|
15563 | } else {
|
---|
15564 | dtrace_enabling_destroy(enab);
|
---|
15565 | }
|
---|
15566 |
|
---|
15567 | mutex_exit(&cpu_lock);
|
---|
15568 | mutex_exit(&dtrace_lock);
|
---|
15569 | dtrace_dof_destroy(dof);
|
---|
15570 |
|
---|
15571 | return (err);
|
---|
15572 | }
|
---|
15573 |
|
---|
15574 | case DTRACEIOC_REPLICATE: {
|
---|
15575 | dtrace_repldesc_t desc;
|
---|
15576 | dtrace_probedesc_t *match = &desc.dtrpd_match;
|
---|
15577 | dtrace_probedesc_t *create = &desc.dtrpd_create;
|
---|
15578 | int err;
|
---|
15579 |
|
---|
15580 | if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
|
---|
15581 | return (EFAULT);
|
---|
15582 |
|
---|
15583 | match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
|
---|
15584 | match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
|
---|
15585 | match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
|
---|
15586 | match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
|
---|
15587 |
|
---|
15588 | create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
|
---|
15589 | create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
|
---|
15590 | create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
|
---|
15591 | create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
|
---|
15592 |
|
---|
15593 | mutex_enter(&dtrace_lock);
|
---|
15594 | err = dtrace_enabling_replicate(state, match, create);
|
---|
15595 | mutex_exit(&dtrace_lock);
|
---|
15596 |
|
---|
15597 | return (err);
|
---|
15598 | }
|
---|
15599 |
|
---|
15600 | case DTRACEIOC_PROBEMATCH:
|
---|
15601 | case DTRACEIOC_PROBES: {
|
---|
15602 | dtrace_probe_t *probe = NULL;
|
---|
15603 | dtrace_probedesc_t desc;
|
---|
15604 | dtrace_probekey_t pkey;
|
---|
15605 | dtrace_id_t i;
|
---|
15606 | int m = 0;
|
---|
15607 | uint32_t priv;
|
---|
15608 | uid_t uid;
|
---|
15609 | zoneid_t zoneid;
|
---|
15610 | #ifdef VBOX
|
---|
15611 | cred_t *cr = CRED();
|
---|
15612 | #endif
|
---|
15613 |
|
---|
15614 | if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
|
---|
15615 | return (EFAULT);
|
---|
15616 |
|
---|
15617 | desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
|
---|
15618 | desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
|
---|
15619 | desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
|
---|
15620 | desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
|
---|
15621 |
|
---|
15622 | /*
|
---|
15623 | * Before we attempt to match this probe, we want to give
|
---|
15624 | * all providers the opportunity to provide it.
|
---|
15625 | */
|
---|
15626 | if (desc.dtpd_id == DTRACE_IDNONE) {
|
---|
15627 | mutex_enter(&dtrace_provider_lock);
|
---|
15628 | dtrace_probe_provide(&desc, NULL);
|
---|
15629 | mutex_exit(&dtrace_provider_lock);
|
---|
15630 | desc.dtpd_id++;
|
---|
15631 | }
|
---|
15632 |
|
---|
15633 | if (cmd == DTRACEIOC_PROBEMATCH) {
|
---|
15634 | dtrace_probekey(&desc, &pkey);
|
---|
15635 | pkey.dtpk_id = DTRACE_IDNONE;
|
---|
15636 | }
|
---|
15637 |
|
---|
15638 | dtrace_cred2priv(cr, &priv, &uid, &zoneid);
|
---|
15639 |
|
---|
15640 | mutex_enter(&dtrace_lock);
|
---|
15641 |
|
---|
15642 | if (cmd == DTRACEIOC_PROBEMATCH) {
|
---|
15643 | for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
|
---|
15644 | if ((probe = dtrace_probes[i - 1]) != NULL &&
|
---|
15645 | (m = dtrace_match_probe(probe, &pkey,
|
---|
15646 | priv, uid, zoneid)) != 0)
|
---|
15647 | break;
|
---|
15648 | }
|
---|
15649 |
|
---|
15650 | if (m < 0) {
|
---|
15651 | mutex_exit(&dtrace_lock);
|
---|
15652 | return (EINVAL);
|
---|
15653 | }
|
---|
15654 |
|
---|
15655 | } else {
|
---|
15656 | for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) {
|
---|
15657 | if ((probe = dtrace_probes[i - 1]) != NULL &&
|
---|
15658 | dtrace_match_priv(probe, priv, uid, zoneid))
|
---|
15659 | break;
|
---|
15660 | }
|
---|
15661 | }
|
---|
15662 |
|
---|
15663 | if (probe == NULL) {
|
---|
15664 | mutex_exit(&dtrace_lock);
|
---|
15665 | return (ESRCH);
|
---|
15666 | }
|
---|
15667 |
|
---|
15668 | dtrace_probe_description(probe, &desc);
|
---|
15669 | mutex_exit(&dtrace_lock);
|
---|
15670 |
|
---|
15671 | if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
|
---|
15672 | return (EFAULT);
|
---|
15673 |
|
---|
15674 | return (0);
|
---|
15675 | }
|
---|
15676 |
|
---|
15677 | case DTRACEIOC_PROBEARG: {
|
---|
15678 | dtrace_argdesc_t desc;
|
---|
15679 | dtrace_probe_t *probe;
|
---|
15680 | dtrace_provider_t *prov;
|
---|
15681 |
|
---|
15682 | if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
|
---|
15683 | return (EFAULT);
|
---|
15684 |
|
---|
15685 | if (desc.dtargd_id == DTRACE_IDNONE)
|
---|
15686 | return (EINVAL);
|
---|
15687 |
|
---|
15688 | if (desc.dtargd_ndx == DTRACE_ARGNONE)
|
---|
15689 | return (EINVAL);
|
---|
15690 |
|
---|
15691 | mutex_enter(&dtrace_provider_lock);
|
---|
15692 | mutex_enter(&mod_lock);
|
---|
15693 | mutex_enter(&dtrace_lock);
|
---|
15694 |
|
---|
15695 | if (desc.dtargd_id > dtrace_nprobes) {
|
---|
15696 | mutex_exit(&dtrace_lock);
|
---|
15697 | mutex_exit(&mod_lock);
|
---|
15698 | mutex_exit(&dtrace_provider_lock);
|
---|
15699 | return (EINVAL);
|
---|
15700 | }
|
---|
15701 |
|
---|
15702 | if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
|
---|
15703 | mutex_exit(&dtrace_lock);
|
---|
15704 | mutex_exit(&mod_lock);
|
---|
15705 | mutex_exit(&dtrace_provider_lock);
|
---|
15706 | return (EINVAL);
|
---|
15707 | }
|
---|
15708 |
|
---|
15709 | mutex_exit(&dtrace_lock);
|
---|
15710 |
|
---|
15711 | prov = probe->dtpr_provider;
|
---|
15712 |
|
---|
15713 | if (prov->dtpv_pops.dtps_getargdesc == NULL) {
|
---|
15714 | /*
|
---|
15715 | * There isn't any typed information for this probe.
|
---|
15716 | * Set the argument number to DTRACE_ARGNONE.
|
---|
15717 | */
|
---|
15718 | desc.dtargd_ndx = DTRACE_ARGNONE;
|
---|
15719 | } else {
|
---|
15720 | desc.dtargd_native[0] = '\0';
|
---|
15721 | desc.dtargd_xlate[0] = '\0';
|
---|
15722 | desc.dtargd_mapping = desc.dtargd_ndx;
|
---|
15723 |
|
---|
15724 | prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
|
---|
15725 | probe->dtpr_id, probe->dtpr_arg, &desc);
|
---|
15726 | }
|
---|
15727 |
|
---|
15728 | mutex_exit(&mod_lock);
|
---|
15729 | mutex_exit(&dtrace_provider_lock);
|
---|
15730 |
|
---|
15731 | if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
|
---|
15732 | return (EFAULT);
|
---|
15733 |
|
---|
15734 | return (0);
|
---|
15735 | }
|
---|
15736 |
|
---|
15737 | case DTRACEIOC_GO: {
|
---|
15738 | processorid_t cpuid;
|
---|
15739 | rval = dtrace_state_go(state, &cpuid);
|
---|
15740 |
|
---|
15741 | if (rval != 0)
|
---|
15742 | return (rval);
|
---|
15743 |
|
---|
15744 | if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
|
---|
15745 | return (EFAULT);
|
---|
15746 |
|
---|
15747 | return (0);
|
---|
15748 | }
|
---|
15749 |
|
---|
15750 | case DTRACEIOC_STOP: {
|
---|
15751 | processorid_t cpuid;
|
---|
15752 |
|
---|
15753 | mutex_enter(&dtrace_lock);
|
---|
15754 | rval = dtrace_state_stop(state, &cpuid);
|
---|
15755 | mutex_exit(&dtrace_lock);
|
---|
15756 |
|
---|
15757 | if (rval != 0)
|
---|
15758 | return (rval);
|
---|
15759 |
|
---|
15760 | if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0)
|
---|
15761 | return (EFAULT);
|
---|
15762 |
|
---|
15763 | return (0);
|
---|
15764 | }
|
---|
15765 |
|
---|
15766 | case DTRACEIOC_DOFGET: {
|
---|
15767 | dof_hdr_t hdr, *dof;
|
---|
15768 | uint64_t len;
|
---|
15769 |
|
---|
15770 | if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0)
|
---|
15771 | return (EFAULT);
|
---|
15772 |
|
---|
15773 | mutex_enter(&dtrace_lock);
|
---|
15774 | dof = dtrace_dof_create(state);
|
---|
15775 | mutex_exit(&dtrace_lock);
|
---|
15776 |
|
---|
15777 | len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
|
---|
15778 | rval = copyout(dof, (void *)arg, len);
|
---|
15779 | dtrace_dof_destroy(dof);
|
---|
15780 |
|
---|
15781 | return (rval == 0 ? 0 : EFAULT);
|
---|
15782 | }
|
---|
15783 |
|
---|
15784 | case DTRACEIOC_AGGSNAP:
|
---|
15785 | case DTRACEIOC_BUFSNAP: {
|
---|
15786 | dtrace_bufdesc_t desc;
|
---|
15787 | caddr_t cached;
|
---|
15788 | dtrace_buffer_t *buf;
|
---|
15789 |
|
---|
15790 | if (copyin((void *)arg, &desc, sizeof (desc)) != 0)
|
---|
15791 | return (EFAULT);
|
---|
15792 |
|
---|
15793 | if (/*VBox value is is unsigned: desc.dtbd_cpu < 0 ||*/ desc.dtbd_cpu >= NCPU)
|
---|
15794 | return (EINVAL);
|
---|
15795 |
|
---|
15796 | mutex_enter(&dtrace_lock);
|
---|
15797 |
|
---|
15798 | if (cmd == DTRACEIOC_BUFSNAP) {
|
---|
15799 | buf = &state->dts_buffer[desc.dtbd_cpu];
|
---|
15800 | } else {
|
---|
15801 | buf = &state->dts_aggbuffer[desc.dtbd_cpu];
|
---|
15802 | }
|
---|
15803 |
|
---|
15804 | if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
|
---|
15805 | size_t sz = buf->dtb_offset;
|
---|
15806 |
|
---|
15807 | if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
|
---|
15808 | mutex_exit(&dtrace_lock);
|
---|
15809 | return (EBUSY);
|
---|
15810 | }
|
---|
15811 |
|
---|
15812 | /*
|
---|
15813 | * If this buffer has already been consumed, we're
|
---|
15814 | * going to indicate that there's nothing left here
|
---|
15815 | * to consume.
|
---|
15816 | */
|
---|
15817 | if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
|
---|
15818 | mutex_exit(&dtrace_lock);
|
---|
15819 |
|
---|
15820 | desc.dtbd_size = 0;
|
---|
15821 | desc.dtbd_drops = 0;
|
---|
15822 | desc.dtbd_errors = 0;
|
---|
15823 | desc.dtbd_oldest = 0;
|
---|
15824 | sz = sizeof (desc);
|
---|
15825 |
|
---|
15826 | if (copyout(&desc, (void *)arg, sz) != 0)
|
---|
15827 | return (EFAULT);
|
---|
15828 |
|
---|
15829 | return (0);
|
---|
15830 | }
|
---|
15831 |
|
---|
15832 | /*
|
---|
15833 | * If this is a ring buffer that has wrapped, we want
|
---|
15834 | * to copy the whole thing out.
|
---|
15835 | */
|
---|
15836 | if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
|
---|
15837 | dtrace_buffer_polish(buf);
|
---|
15838 | sz = buf->dtb_size;
|
---|
15839 | }
|
---|
15840 |
|
---|
15841 | if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) {
|
---|
15842 | mutex_exit(&dtrace_lock);
|
---|
15843 | return (EFAULT);
|
---|
15844 | }
|
---|
15845 |
|
---|
15846 | desc.dtbd_size = sz;
|
---|
15847 | desc.dtbd_drops = buf->dtb_drops;
|
---|
15848 | desc.dtbd_errors = buf->dtb_errors;
|
---|
15849 | desc.dtbd_oldest = buf->dtb_xamot_offset;
|
---|
15850 |
|
---|
15851 | mutex_exit(&dtrace_lock);
|
---|
15852 |
|
---|
15853 | if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
|
---|
15854 | return (EFAULT);
|
---|
15855 |
|
---|
15856 | buf->dtb_flags |= DTRACEBUF_CONSUMED;
|
---|
15857 |
|
---|
15858 | return (0);
|
---|
15859 | }
|
---|
15860 |
|
---|
15861 | if (buf->dtb_tomax == NULL) {
|
---|
15862 | ASSERT(buf->dtb_xamot == NULL);
|
---|
15863 | mutex_exit(&dtrace_lock);
|
---|
15864 | return (ENOENT);
|
---|
15865 | }
|
---|
15866 |
|
---|
15867 | cached = buf->dtb_tomax;
|
---|
15868 | ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
|
---|
15869 |
|
---|
15870 | #ifndef VBOX
|
---|
15871 | dtrace_xcall(desc.dtbd_cpu,
|
---|
15872 | (dtrace_xcall_t)dtrace_buffer_switch, buf);
|
---|
15873 | #else
|
---|
15874 | if (desc.dtbd_cpu == DTRACE_CPUALL)
|
---|
15875 | RTMpOnAll(dtrace_buffer_switch_wrapper, buf, NULL);
|
---|
15876 | else
|
---|
15877 | RTMpOnSpecific(desc.dtbd_cpu, dtrace_buffer_switch_wrapper, buf, NULL);
|
---|
15878 | #endif
|
---|
15879 |
|
---|
15880 | state->dts_errors += buf->dtb_xamot_errors;
|
---|
15881 |
|
---|
15882 | /*
|
---|
15883 | * If the buffers did not actually switch, then the cross call
|
---|
15884 | * did not take place -- presumably because the given CPU is
|
---|
15885 | * not in the ready set. If this is the case, we'll return
|
---|
15886 | * ENOENT.
|
---|
15887 | */
|
---|
15888 | if (buf->dtb_tomax == cached) {
|
---|
15889 | ASSERT(buf->dtb_xamot != cached);
|
---|
15890 | mutex_exit(&dtrace_lock);
|
---|
15891 | return (ENOENT);
|
---|
15892 | }
|
---|
15893 |
|
---|
15894 | ASSERT(cached == buf->dtb_xamot);
|
---|
15895 |
|
---|
15896 | /*
|
---|
15897 | * We have our snapshot; now copy it out.
|
---|
15898 | */
|
---|
15899 | if (copyout(buf->dtb_xamot, desc.dtbd_data,
|
---|
15900 | buf->dtb_xamot_offset) != 0) {
|
---|
15901 | mutex_exit(&dtrace_lock);
|
---|
15902 | return (EFAULT);
|
---|
15903 | }
|
---|
15904 |
|
---|
15905 | desc.dtbd_size = buf->dtb_xamot_offset;
|
---|
15906 | desc.dtbd_drops = buf->dtb_xamot_drops;
|
---|
15907 | desc.dtbd_errors = buf->dtb_xamot_errors;
|
---|
15908 | desc.dtbd_oldest = 0;
|
---|
15909 |
|
---|
15910 | mutex_exit(&dtrace_lock);
|
---|
15911 |
|
---|
15912 | /*
|
---|
15913 | * Finally, copy out the buffer description.
|
---|
15914 | */
|
---|
15915 | if (copyout(&desc, (void *)arg, sizeof (desc)) != 0)
|
---|
15916 | return (EFAULT);
|
---|
15917 |
|
---|
15918 | return (0);
|
---|
15919 | }
|
---|
15920 |
|
---|
15921 | case DTRACEIOC_CONF: {
|
---|
15922 | dtrace_conf_t conf;
|
---|
15923 |
|
---|
15924 | bzero(&conf, sizeof (conf));
|
---|
15925 | conf.dtc_difversion = DIF_VERSION;
|
---|
15926 | conf.dtc_difintregs = DIF_DIR_NREGS;
|
---|
15927 | conf.dtc_diftupregs = DIF_DTR_NREGS;
|
---|
15928 | conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
|
---|
15929 |
|
---|
15930 | if (copyout(&conf, (void *)arg, sizeof (conf)) != 0)
|
---|
15931 | return (EFAULT);
|
---|
15932 |
|
---|
15933 | return (0);
|
---|
15934 | }
|
---|
15935 |
|
---|
15936 | case DTRACEIOC_STATUS: {
|
---|
15937 | dtrace_status_t stat;
|
---|
15938 | dtrace_dstate_t *dstate;
|
---|
15939 | int i, j;
|
---|
15940 | uint64_t nerrs;
|
---|
15941 |
|
---|
15942 | /*
|
---|
15943 | * See the comment in dtrace_state_deadman() for the reason
|
---|
15944 | * for setting dts_laststatus to INT64_MAX before setting
|
---|
15945 | * it to the correct value.
|
---|
15946 | */
|
---|
15947 | state->dts_laststatus = INT64_MAX;
|
---|
15948 | dtrace_membar_producer();
|
---|
15949 | state->dts_laststatus = dtrace_gethrtime();
|
---|
15950 |
|
---|
15951 | bzero(&stat, sizeof (stat));
|
---|
15952 |
|
---|
15953 | mutex_enter(&dtrace_lock);
|
---|
15954 |
|
---|
15955 | if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
|
---|
15956 | mutex_exit(&dtrace_lock);
|
---|
15957 | return (ENOENT);
|
---|
15958 | }
|
---|
15959 |
|
---|
15960 | if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
|
---|
15961 | stat.dtst_exiting = 1;
|
---|
15962 |
|
---|
15963 | nerrs = state->dts_errors;
|
---|
15964 | dstate = &state->dts_vstate.dtvs_dynvars;
|
---|
15965 |
|
---|
15966 | for (i = 0; i < NCPU; i++) {
|
---|
15967 | dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
|
---|
15968 |
|
---|
15969 | stat.dtst_dyndrops += dcpu->dtdsc_drops;
|
---|
15970 | stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
|
---|
15971 | stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
|
---|
15972 |
|
---|
15973 | if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
|
---|
15974 | stat.dtst_filled++;
|
---|
15975 |
|
---|
15976 | nerrs += state->dts_buffer[i].dtb_errors;
|
---|
15977 |
|
---|
15978 | for (j = 0; j < state->dts_nspeculations; j++) {
|
---|
15979 | dtrace_speculation_t *spec;
|
---|
15980 | dtrace_buffer_t *buf;
|
---|
15981 |
|
---|
15982 | spec = &state->dts_speculations[j];
|
---|
15983 | buf = &spec->dtsp_buffer[i];
|
---|
15984 | stat.dtst_specdrops += buf->dtb_xamot_drops;
|
---|
15985 | }
|
---|
15986 | }
|
---|
15987 |
|
---|
15988 | stat.dtst_specdrops_busy = state->dts_speculations_busy;
|
---|
15989 | stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
|
---|
15990 | stat.dtst_stkstroverflows = state->dts_stkstroverflows;
|
---|
15991 | stat.dtst_dblerrors = state->dts_dblerrors;
|
---|
15992 | stat.dtst_killed =
|
---|
15993 | (state->dts_activity == DTRACE_ACTIVITY_KILLED);
|
---|
15994 | stat.dtst_errors = nerrs;
|
---|
15995 |
|
---|
15996 | mutex_exit(&dtrace_lock);
|
---|
15997 |
|
---|
15998 | if (copyout(&stat, (void *)arg, sizeof (stat)) != 0)
|
---|
15999 | return (EFAULT);
|
---|
16000 |
|
---|
16001 | return (0);
|
---|
16002 | }
|
---|
16003 |
|
---|
16004 | case DTRACEIOC_FORMAT: {
|
---|
16005 | dtrace_fmtdesc_t fmt;
|
---|
16006 | char *str;
|
---|
16007 | int len;
|
---|
16008 |
|
---|
16009 | if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0)
|
---|
16010 | return (EFAULT);
|
---|
16011 |
|
---|
16012 | mutex_enter(&dtrace_lock);
|
---|
16013 |
|
---|
16014 | if (fmt.dtfd_format == 0 ||
|
---|
16015 | fmt.dtfd_format > state->dts_nformats) {
|
---|
16016 | mutex_exit(&dtrace_lock);
|
---|
16017 | return (EINVAL);
|
---|
16018 | }
|
---|
16019 |
|
---|
16020 | /*
|
---|
16021 | * Format strings are allocated contiguously and they are
|
---|
16022 | * never freed; if a format index is less than the number
|
---|
16023 | * of formats, we can assert that the format map is non-NULL
|
---|
16024 | * and that the format for the specified index is non-NULL.
|
---|
16025 | */
|
---|
16026 | ASSERT(state->dts_formats != NULL);
|
---|
16027 | str = state->dts_formats[fmt.dtfd_format - 1];
|
---|
16028 | ASSERT(str != NULL);
|
---|
16029 |
|
---|
16030 | len = VBDTCAST(int)strlen(str) + 1;
|
---|
16031 |
|
---|
16032 | if (len > fmt.dtfd_length) {
|
---|
16033 | fmt.dtfd_length = len;
|
---|
16034 |
|
---|
16035 | if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) {
|
---|
16036 | mutex_exit(&dtrace_lock);
|
---|
16037 | return (EINVAL);
|
---|
16038 | }
|
---|
16039 | } else {
|
---|
16040 | if (copyout(str, fmt.dtfd_string, len) != 0) {
|
---|
16041 | mutex_exit(&dtrace_lock);
|
---|
16042 | return (EINVAL);
|
---|
16043 | }
|
---|
16044 | }
|
---|
16045 |
|
---|
16046 | mutex_exit(&dtrace_lock);
|
---|
16047 | return (0);
|
---|
16048 | }
|
---|
16049 |
|
---|
16050 | default:
|
---|
16051 | break;
|
---|
16052 | }
|
---|
16053 |
|
---|
16054 | return (ENOTTY);
|
---|
16055 | }
|
---|
16056 |
|
---|
16057 | #ifdef VBOX
|
---|
16058 | int dtrace_detach(void)
|
---|
16059 | #else
|
---|
16060 | /*ARGSUSED*/
|
---|
16061 | static int
|
---|
16062 | dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
|
---|
16063 | #endif
|
---|
16064 | {
|
---|
16065 | dtrace_state_t *state;
|
---|
16066 |
|
---|
16067 | #ifndef VBOX
|
---|
16068 | switch (cmd) {
|
---|
16069 | case DDI_DETACH:
|
---|
16070 | break;
|
---|
16071 |
|
---|
16072 | case DDI_SUSPEND:
|
---|
16073 | return (DDI_SUCCESS);
|
---|
16074 |
|
---|
16075 | default:
|
---|
16076 | return (DDI_FAILURE);
|
---|
16077 | }
|
---|
16078 | #endif
|
---|
16079 |
|
---|
16080 | mutex_enter(&cpu_lock);
|
---|
16081 | mutex_enter(&dtrace_provider_lock);
|
---|
16082 | mutex_enter(&dtrace_lock);
|
---|
16083 |
|
---|
16084 | ASSERT(dtrace_opens == 0);
|
---|
16085 |
|
---|
16086 | if (dtrace_helpers > 0) {
|
---|
16087 | mutex_exit(&dtrace_provider_lock);
|
---|
16088 | mutex_exit(&dtrace_lock);
|
---|
16089 | mutex_exit(&cpu_lock);
|
---|
16090 | return (DDI_FAILURE);
|
---|
16091 | }
|
---|
16092 |
|
---|
16093 | if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
|
---|
16094 | mutex_exit(&dtrace_provider_lock);
|
---|
16095 | mutex_exit(&dtrace_lock);
|
---|
16096 | mutex_exit(&cpu_lock);
|
---|
16097 | return (DDI_FAILURE);
|
---|
16098 | }
|
---|
16099 |
|
---|
16100 | dtrace_provider = NULL;
|
---|
16101 |
|
---|
16102 | if ((state = dtrace_anon_grab()) != NULL) {
|
---|
16103 | /*
|
---|
16104 | * If there were ECBs on this state, the provider should
|
---|
16105 | * have not been allowed to detach; assert that there is
|
---|
16106 | * none.
|
---|
16107 | */
|
---|
16108 | ASSERT(state->dts_necbs == 0);
|
---|
16109 | dtrace_state_destroy(state);
|
---|
16110 |
|
---|
16111 | #ifndef VBOX
|
---|
16112 | /*
|
---|
16113 | * If we're being detached with anonymous state, we need to
|
---|
16114 | * indicate to the kernel debugger that DTrace is now inactive.
|
---|
16115 | */
|
---|
16116 | (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
|
---|
16117 | #endif
|
---|
16118 | }
|
---|
16119 |
|
---|
16120 | bzero(&dtrace_anon, sizeof (dtrace_anon_t));
|
---|
16121 | #ifndef VBOX /** @todo CPU hooks */
|
---|
16122 | unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
|
---|
16123 | dtrace_cpu_init = NULL;
|
---|
16124 | dtrace_helpers_cleanup = NULL;
|
---|
16125 | dtrace_helpers_fork = NULL;
|
---|
16126 | dtrace_cpustart_init = NULL;
|
---|
16127 | dtrace_cpustart_fini = NULL;
|
---|
16128 | dtrace_debugger_init = NULL;
|
---|
16129 | dtrace_debugger_fini = NULL;
|
---|
16130 | dtrace_modload = NULL;
|
---|
16131 | dtrace_modunload = NULL;
|
---|
16132 | #endif
|
---|
16133 |
|
---|
16134 | mutex_exit(&cpu_lock);
|
---|
16135 |
|
---|
16136 | if (dtrace_helptrace_enabled) {
|
---|
16137 | kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
|
---|
16138 | dtrace_helptrace_buffer = NULL;
|
---|
16139 | }
|
---|
16140 |
|
---|
16141 | kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
|
---|
16142 | dtrace_probes = NULL;
|
---|
16143 | dtrace_nprobes = 0;
|
---|
16144 |
|
---|
16145 | dtrace_hash_destroy(dtrace_bymod);
|
---|
16146 | dtrace_hash_destroy(dtrace_byfunc);
|
---|
16147 | dtrace_hash_destroy(dtrace_byname);
|
---|
16148 | dtrace_bymod = NULL;
|
---|
16149 | dtrace_byfunc = NULL;
|
---|
16150 | dtrace_byname = NULL;
|
---|
16151 |
|
---|
16152 | kmem_cache_destroy(dtrace_state_cache);
|
---|
16153 | #ifndef VBOX
|
---|
16154 | vmem_destroy(dtrace_minor);
|
---|
16155 | #endif
|
---|
16156 | vmem_destroy(dtrace_arena);
|
---|
16157 |
|
---|
16158 | if (dtrace_toxrange != NULL) {
|
---|
16159 | kmem_free(dtrace_toxrange,
|
---|
16160 | dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
|
---|
16161 | dtrace_toxrange = NULL;
|
---|
16162 | dtrace_toxranges = 0;
|
---|
16163 | dtrace_toxranges_max = 0;
|
---|
16164 | }
|
---|
16165 |
|
---|
16166 | #ifndef VBOX
|
---|
16167 | ddi_remove_minor_node(dtrace_devi, NULL);
|
---|
16168 | dtrace_devi = NULL;
|
---|
16169 |
|
---|
16170 | ddi_soft_state_fini(&dtrace_softstate);
|
---|
16171 | #endif
|
---|
16172 |
|
---|
16173 | ASSERT(dtrace_vtime_references == 0);
|
---|
16174 | ASSERT(dtrace_opens == 0);
|
---|
16175 | ASSERT(dtrace_retained == NULL);
|
---|
16176 |
|
---|
16177 | mutex_exit(&dtrace_lock);
|
---|
16178 | mutex_exit(&dtrace_provider_lock);
|
---|
16179 | #ifdef VBOX
|
---|
16180 | VBoxDtMutexDelete(&dtrace_lock);
|
---|
16181 | VBoxDtMutexDelete(&dtrace_provider_lock);
|
---|
16182 | VBoxDtMutexDelete(&dtrace_meta_lock);
|
---|
16183 | # ifdef DEBUG
|
---|
16184 | VBoxDtMutexDelete(&dtrace_errlock);
|
---|
16185 | # endif
|
---|
16186 | #endif
|
---|
16187 |
|
---|
16188 | /*
|
---|
16189 | * We don't destroy the task queue until after we have dropped our
|
---|
16190 | * locks (taskq_destroy() may block on running tasks). To prevent
|
---|
16191 | * attempting to do work after we have effectively detached but before
|
---|
16192 | * the task queue has been destroyed, all tasks dispatched via the
|
---|
16193 | * task queue must check that DTrace is still attached before
|
---|
16194 | * performing any operation.
|
---|
16195 | */
|
---|
16196 | #ifndef VBOX
|
---|
16197 | taskq_destroy(dtrace_taskq);
|
---|
16198 | dtrace_taskq = NULL;
|
---|
16199 | #endif
|
---|
16200 |
|
---|
16201 | return (DDI_SUCCESS);
|
---|
16202 | }
|
---|
16203 |
|
---|
16204 | #ifndef VBOX
|
---|
16205 | /*ARGSUSED*/
|
---|
16206 | static int
|
---|
16207 | dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
|
---|
16208 | {
|
---|
16209 | int error;
|
---|
16210 |
|
---|
16211 | switch (infocmd) {
|
---|
16212 | case DDI_INFO_DEVT2DEVINFO:
|
---|
16213 | *result = (void *)dtrace_devi;
|
---|
16214 | error = DDI_SUCCESS;
|
---|
16215 | break;
|
---|
16216 | case DDI_INFO_DEVT2INSTANCE:
|
---|
16217 | *result = (void *)0;
|
---|
16218 | error = DDI_SUCCESS;
|
---|
16219 | break;
|
---|
16220 | default:
|
---|
16221 | error = DDI_FAILURE;
|
---|
16222 | }
|
---|
16223 | return (error);
|
---|
16224 | }
|
---|
16225 |
|
---|
16226 | static struct cb_ops dtrace_cb_ops = {
|
---|
16227 | dtrace_open, /* open */
|
---|
16228 | dtrace_close, /* close */
|
---|
16229 | nulldev, /* strategy */
|
---|
16230 | nulldev, /* print */
|
---|
16231 | nodev, /* dump */
|
---|
16232 | nodev, /* read */
|
---|
16233 | nodev, /* write */
|
---|
16234 | dtrace_ioctl, /* ioctl */
|
---|
16235 | nodev, /* devmap */
|
---|
16236 | nodev, /* mmap */
|
---|
16237 | nodev, /* segmap */
|
---|
16238 | nochpoll, /* poll */
|
---|
16239 | ddi_prop_op, /* cb_prop_op */
|
---|
16240 | 0, /* streamtab */
|
---|
16241 | D_NEW | D_MP /* Driver compatibility flag */
|
---|
16242 | };
|
---|
16243 |
|
---|
16244 | static struct dev_ops dtrace_ops = {
|
---|
16245 | DEVO_REV, /* devo_rev */
|
---|
16246 | 0, /* refcnt */
|
---|
16247 | dtrace_info, /* get_dev_info */
|
---|
16248 | nulldev, /* identify */
|
---|
16249 | nulldev, /* probe */
|
---|
16250 | dtrace_attach, /* attach */
|
---|
16251 | dtrace_detach, /* detach */
|
---|
16252 | nodev, /* reset */
|
---|
16253 | &dtrace_cb_ops, /* driver operations */
|
---|
16254 | NULL, /* bus operations */
|
---|
16255 | nodev, /* dev power */
|
---|
16256 | ddi_quiesce_not_needed, /* quiesce */
|
---|
16257 | };
|
---|
16258 |
|
---|
16259 | static struct modldrv modldrv = {
|
---|
16260 | &mod_driverops, /* module type (this is a pseudo driver) */
|
---|
16261 | "Dynamic Tracing", /* name of module */
|
---|
16262 | &dtrace_ops, /* driver ops */
|
---|
16263 | };
|
---|
16264 |
|
---|
16265 | static struct modlinkage modlinkage = {
|
---|
16266 | MODREV_1,
|
---|
16267 | (void *)&modldrv,
|
---|
16268 | NULL
|
---|
16269 | };
|
---|
16270 |
|
---|
16271 | int
|
---|
16272 | _init(void)
|
---|
16273 | {
|
---|
16274 | return (mod_install(&modlinkage));
|
---|
16275 | }
|
---|
16276 |
|
---|
16277 | int
|
---|
16278 | _info(struct modinfo *modinfop)
|
---|
16279 | {
|
---|
16280 | return (mod_info(&modlinkage, modinfop));
|
---|
16281 | }
|
---|
16282 |
|
---|
16283 | int
|
---|
16284 | _fini(void)
|
---|
16285 | {
|
---|
16286 | return (mod_remove(&modlinkage));
|
---|
16287 | }
|
---|
16288 |
|
---|
16289 | #endif /* !VBOX */
|
---|