xref: /netbsd-src/external/cddl/osnet/dev/profile/profile.c (revision 4c6ec8e8ebf33afa71c68d29c1a448d331ddf710)
1 /*	$NetBSD: profile.c,v 1.11 2020/05/15 23:57:17 ad Exp $	*/
2 
3 /*
4  * CDDL HEADER START
5  *
6  * The contents of this file are subject to the terms of the
7  * Common Development and Distribution License (the "License").
8  * You may not use this file except in compliance with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  *
23  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
24  *
25  * $FreeBSD: head/sys/cddl/dev/profile/profile.c 300618 2016-05-24 16:41:37Z br $
26  *
27  */
28 
29 /*
30  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
31  * Use is subject to license terms.
32  */
33 
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/conf.h>
38 #include <sys/cpuvar.h>
39 #include <sys/fcntl.h>
40 #include <sys/filio.h>
41 #ifdef __FreeBSD__
42 #include <sys/kdb.h>
43 #endif
44 #include <sys/kernel.h>
45 #include <sys/kmem.h>
46 #include <sys/kthread.h>
47 #ifdef __FreeBSD__
48 #include <sys/limits.h>
49 #endif
50 #include <sys/linker.h>
51 #include <sys/lock.h>
52 #include <sys/malloc.h>
53 #include <sys/module.h>
54 #include <sys/mutex.h>
55 #include <sys/poll.h>
56 #include <sys/proc.h>
57 #include <sys/selinfo.h>
58 #ifdef __FreeBSD__
59 #include <sys/smp.h>
60 #include <sys/sysctl.h>
61 #endif
62 #include <sys/uio.h>
63 #include <sys/unistd.h>
64 #ifdef __FreeBSD__
65 #include <machine/cpu.h>
66 #include <machine/stdarg.h>
67 #endif
68 
69 #ifdef __NetBSD__
70 #include <sys/syslimits.h>
71 #include <sys/atomic.h>
72 #include <sys/cpu.h>
73 #include <sys/cyclic.h>
74 #endif
75 
76 #include <sys/dtrace.h>
77 #include <sys/dtrace_bsd.h>
78 
79 #define	PROF_NAMELEN		15
80 
81 #define	PROF_PROFILE		0
82 #define	PROF_TICK		1
83 #define	PROF_PREFIX_PROFILE	"profile-"
84 #define	PROF_PREFIX_TICK	"tick-"
85 
86 /*
87  * Regardless of platform, there are five artificial frames in the case of the
88  * profile provider:
89  *
90  *	profile_fire
91  *	cyclic_expire
92  *	cyclic_fire
93  *	[ cbe ]
94  *	[ locore ]
95  *
96  * On amd64, there are two frames associated with locore:  one in locore, and
97  * another in common interrupt dispatch code.  (i386 has not been modified to
98  * use this common layer.)  Further, on i386, the interrupted instruction
99  * appears as its own stack frame.  All of this means that we need to add one
100  * frame for amd64, and then take one away for both amd64 and i386.
101  *
102  * On SPARC, the picture is further complicated because the compiler
103  * optimizes away tail-calls -- so the following frames are optimized away:
104  *
105  * 	profile_fire
106  *	cyclic_expire
107  *
108  * This gives three frames.  However, on DEBUG kernels, the cyclic_expire
109  * frame cannot be tail-call eliminated, yielding four frames in this case.
110  *
111  * All of the above constraints lead to the mess below.  Yes, the profile
112  * provider should ideally figure this out on-the-fly by hiting one of its own
113  * probes and then walking its own stack trace.  This is complicated, however,
114  * and the static definition doesn't seem to be overly brittle.  Still, we
115  * allow for a manual override in case we get it completely wrong.
116  */
117 #ifdef __FreeBSD__
118 #ifdef __amd64
119 #define	PROF_ARTIFICIAL_FRAMES	10
120 #else
121 #ifdef __i386
122 #define	PROF_ARTIFICIAL_FRAMES	6
123 #else
124 #ifdef __sparc
125 #ifdef DEBUG
126 #define	PROF_ARTIFICIAL_FRAMES	4
127 #else
128 #define	PROF_ARTIFICIAL_FRAMES	3
129 #endif
130 #endif
131 #endif
132 #endif
133 
134 #ifdef __mips
135 /*
136  * This value is bogus just to make module compilable on mips
137  */
138 #define	PROF_ARTIFICIAL_FRAMES	3
139 #endif
140 
141 #ifdef __powerpc__
142 /*
143  * This value is bogus just to make module compilable on powerpc
144  */
145 #define	PROF_ARTIFICIAL_FRAMES	3
146 #endif
147 
148 struct profile_probe_percpu;
149 
150 #ifdef __mips
151 /* bogus */
152 #define	PROF_ARTIFICIAL_FRAMES	3
153 #endif
154 
155 #ifdef __arm__
156 #define	PROF_ARTIFICIAL_FRAMES	3
157 #endif
158 
159 #ifdef __aarch64__
160 /* TODO: verify */
161 #define	PROF_ARTIFICIAL_FRAMES	10
162 #endif
163 
164 #ifdef __riscv__
165 /* TODO: verify */
166 #define	PROF_ARTIFICIAL_FRAMES	10
167 #endif
168 
169 #endif /* __FreeBSD__ */
170 
171 #ifdef __NetBSD__
172 #define	PROF_ARTIFICIAL_FRAMES	4
173 #endif
174 
175 typedef struct profile_probe {
176 	char		prof_name[PROF_NAMELEN];
177 	dtrace_id_t	prof_id;
178 	int		prof_kind;
179 #if defined(illumos) || defined(__NetBSD__)
180 	hrtime_t	prof_interval;
181 	cyclic_id_t	prof_cyclic;
182 #endif
183 #ifdef __FreeBSD__
184 	sbintime_t	prof_interval;
185 	struct callout	prof_cyclic;
186 	sbintime_t	prof_expected;
187 	struct profile_probe_percpu **prof_pcpus;
188 #endif
189 } profile_probe_t;
190 
191 typedef struct profile_probe_percpu {
192 	hrtime_t	profc_expected;
193 	hrtime_t	profc_interval;
194 	profile_probe_t	*profc_probe;
195 #ifdef __FreeBSD__
196 	struct callout	profc_cyclic;
197 #endif
198 } profile_probe_percpu_t;
199 
200 #ifdef __FreeBSD__
201 static d_open_t	profile_open;
202 #endif
203 static int	profile_unload(void);
204 static void	profile_create(hrtime_t, char *, int);
205 static void	profile_destroy(void *, dtrace_id_t, void *);
206 static int	profile_enable(void *, dtrace_id_t, void *);
207 static void	profile_disable(void *, dtrace_id_t, void *);
208 static void	profile_load(void *);
209 static void	profile_provide(void *, dtrace_probedesc_t *);
210 
211 static int profile_rates[] = {
212     97, 199, 499, 997, 1999,
213     4001, 4999, 0, 0, 0,
214     0, 0, 0, 0, 0,
215     0, 0, 0, 0, 0
216 };
217 
218 static int profile_ticks[] = {
219     1, 10, 100, 500, 1000,
220     5000, 0, 0, 0, 0,
221     0, 0, 0, 0, 0
222 };
223 
224 /*
225  * profile_max defines the upper bound on the number of profile probes that
226  * can exist (this is to prevent malicious or clumsy users from exhausing
227  * system resources by creating a slew of profile probes). At mod load time,
228  * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
229  * present in the profile.conf file.
230  */
231 #define	PROFILE_MAX_DEFAULT	1000	/* default max. number of probes */
232 static uint32_t profile_max = PROFILE_MAX_DEFAULT;
233 					/* maximum number of profile probes */
234 static uint32_t profile_total;		/* current number of profile probes */
235 
236 #ifdef __FreeBSD__
237 static struct cdevsw profile_cdevsw = {
238 	.d_version	= D_VERSION,
239 	.d_open		= profile_open,
240 	.d_name		= "profile",
241 };
242 #endif
243 
244 static dtrace_pattr_t profile_attr = {
245 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
246 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
247 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
248 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
249 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
250 };
251 
252 static dtrace_pops_t profile_pops = {
253 	profile_provide,
254 	NULL,
255 	profile_enable,
256 	profile_disable,
257 	NULL,
258 	NULL,
259 	NULL,
260 	NULL,
261 	NULL,
262 	profile_destroy
263 };
264 
265 #ifdef __FreeBSD__
266 static struct cdev		*profile_cdev;
267 #endif
268 static dtrace_provider_id_t	profile_id;
269 static hrtime_t			profile_interval_min = NANOSEC / 5000;	/* 5000 hz */
270 static int			profile_aframes = PROF_ARTIFICIAL_FRAMES;
271 
272 #ifdef __FreeBSD__
273 SYSCTL_DECL(_kern_dtrace);
274 SYSCTL_NODE(_kern_dtrace, OID_AUTO, profile, CTLFLAG_RD, 0, "DTrace profile parameters");
275 SYSCTL_INT(_kern_dtrace_profile, OID_AUTO, aframes, CTLFLAG_RW, &profile_aframes,
276     0, "Skipped frames for profile provider");
277 
278 static sbintime_t
nsec_to_sbt(hrtime_t nsec)279 nsec_to_sbt(hrtime_t nsec)
280 {
281 	time_t sec;
282 
283 	/*
284 	 * We need to calculate nsec * 2^32 / 10^9
285 	 * Seconds and nanoseconds are split to avoid overflow.
286 	 */
287 	sec = nsec / NANOSEC;
288 	nsec = nsec % NANOSEC;
289 	return (((sbintime_t)sec << 32) | ((sbintime_t)nsec << 32) / NANOSEC);
290 }
291 
292 static hrtime_t
sbt_to_nsec(sbintime_t sbt)293 sbt_to_nsec(sbintime_t sbt)
294 {
295 
296 	return ((sbt >> 32) * NANOSEC +
297 	    (((uint32_t)sbt * (hrtime_t)NANOSEC) >> 32));
298 }
299 
300 static void
profile_fire(void * arg)301 profile_fire(void *arg)
302 {
303 	profile_probe_percpu_t *pcpu = arg;
304 	profile_probe_t *prof = pcpu->profc_probe;
305 	hrtime_t late;
306 	struct trapframe *frame;
307 	uintfptr_t pc, upc;
308 
309 #ifdef illumos
310 	late = gethrtime() - pcpu->profc_expected;
311 #else
312 	late = sbt_to_nsec(sbinuptime() - pcpu->profc_expected);
313 #endif
314 
315 	pc = 0;
316 	upc = 0;
317 
318 	/*
319 	 * td_intr_frame can be unset if this is a catch up event
320 	 * after waking up from idle sleep.
321 	 * This can only happen on a CPU idle thread.
322 	 */
323 	frame = curthread->td_intr_frame;
324 	if (frame != NULL) {
325 		if (TRAPF_USERMODE(frame))
326 			upc = TRAPF_PC(frame);
327 		else
328 			pc = TRAPF_PC(frame);
329 	}
330 	dtrace_probe(prof->prof_id, pc, upc, late, 0, 0);
331 
332 	pcpu->profc_expected += pcpu->profc_interval;
333 	callout_schedule_sbt_curcpu(&pcpu->profc_cyclic,
334 	    pcpu->profc_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE);
335 }
336 
337 static void
profile_tick(void * arg)338 profile_tick(void *arg)
339 {
340 	profile_probe_t *prof = arg;
341 	struct trapframe *frame;
342 	uintfptr_t pc, upc;
343 
344 	pc = 0;
345 	upc = 0;
346 
347 	/*
348 	 * td_intr_frame can be unset if this is a catch up event
349 	 * after waking up from idle sleep.
350 	 * This can only happen on a CPU idle thread.
351 	 */
352 	frame = curthread->td_intr_frame;
353 	if (frame != NULL) {
354 		if (TRAPF_USERMODE(frame))
355 			upc = TRAPF_PC(frame);
356 		else
357 			pc = TRAPF_PC(frame);
358 	}
359 	dtrace_probe(prof->prof_id, pc, upc, 0, 0, 0);
360 
361 	prof->prof_expected += prof->prof_interval;
362 	callout_schedule_sbt(&prof->prof_cyclic,
363 	    prof->prof_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE);
364 }
365 
366 #endif
367 
368 #ifdef __NetBSD__
369 static void
profile_fire(void * arg)370 profile_fire(void *arg)
371 {
372 	profile_probe_percpu_t *pcpu = arg;
373 	profile_probe_t *prof = pcpu->profc_probe;
374 	hrtime_t late;
375 	solaris_cpu_t *c = &solaris_cpu[cpu_number()];
376 
377 	late = gethrtime() - pcpu->profc_expected;
378 	pcpu->profc_expected += pcpu->profc_interval;
379 
380 	dtrace_probe(prof->prof_id, c->cpu_profile_pc,
381 	    c->cpu_profile_upc, late, 0, 0);
382 }
383 
384 static void
profile_tick(void * arg)385 profile_tick(void *arg)
386 {
387 	profile_probe_t *prof = arg;
388 	solaris_cpu_t *c = &solaris_cpu[cpu_number()];
389 
390 	dtrace_probe(prof->prof_id, c->cpu_profile_pc,
391 	    c->cpu_profile_upc, 0, 0, 0);
392 }
393 
394 #endif
395 
396 static void
profile_create(hrtime_t interval,char * name,int kind)397 profile_create(hrtime_t interval, char *name, int kind)
398 {
399 	profile_probe_t *prof;
400 
401 	if (interval < profile_interval_min)
402 		return;
403 
404 	if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
405 		return;
406 
407 	atomic_add_32(&profile_total, 1);
408 	if (profile_total > profile_max) {
409 		atomic_add_32(&profile_total, -1);
410 		return;
411 	}
412 
413 	prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
414 	(void) strcpy(prof->prof_name, name);
415 #ifdef __FreeBSD__
416 	prof->prof_interval = nsec_to_sbt(interval);
417 	callout_init(&prof->prof_cyclic, 1);
418 #else
419 	prof->prof_interval = interval;
420 	prof->prof_cyclic = CYCLIC_NONE;
421 #endif
422 	prof->prof_kind = kind;
423 	prof->prof_id = dtrace_probe_create(profile_id,
424 	    NULL, NULL, name,
425 	    profile_aframes, prof);
426 }
427 
428 /*ARGSUSED*/
429 static void
profile_provide(void * arg,dtrace_probedesc_t * desc)430 profile_provide(void *arg, dtrace_probedesc_t *desc)
431 {
432 	int i, j, rate, kind;
433 	hrtime_t val = 0, mult = 1, len = 0;
434 	char *name, *suffix = NULL;
435 
436 	const struct {
437 		char *prefix;
438 		int kind;
439 	} types[] = {
440 		{ PROF_PREFIX_PROFILE, PROF_PROFILE },
441 		{ PROF_PREFIX_TICK, PROF_TICK },
442 		{ 0, 0 }
443 	};
444 
445 	const struct {
446 		char *name;
447 		hrtime_t mult;
448 	} suffixes[] = {
449 		{ "ns", 	NANOSEC / NANOSEC },
450 		{ "nsec",	NANOSEC / NANOSEC },
451 		{ "us",		NANOSEC / MICROSEC },
452 		{ "usec",	NANOSEC / MICROSEC },
453 		{ "ms",		NANOSEC / MILLISEC },
454 		{ "msec",	NANOSEC / MILLISEC },
455 		{ "s",		NANOSEC / SEC },
456 		{ "sec",	NANOSEC / SEC },
457 		{ "m",		NANOSEC * (hrtime_t)60 },
458 		{ "min",	NANOSEC * (hrtime_t)60 },
459 		{ "h",		NANOSEC * (hrtime_t)(60 * 60) },
460 		{ "hour",	NANOSEC * (hrtime_t)(60 * 60) },
461 		{ "d",		NANOSEC * (hrtime_t)(24 * 60 * 60) },
462 		{ "day",	NANOSEC * (hrtime_t)(24 * 60 * 60) },
463 		{ "hz",		0 },
464 		{ NULL,		0 }
465 	};
466 
467 	if (desc == NULL) {
468 		char n[PROF_NAMELEN];
469 
470 		/*
471 		 * If no description was provided, provide all of our probes.
472 		 */
473 		for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
474 			if ((rate = profile_rates[i]) == 0)
475 				continue;
476 
477 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
478 			    PROF_PREFIX_PROFILE, rate);
479 			profile_create(NANOSEC / rate, n, PROF_PROFILE);
480 		}
481 
482 		for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
483 			if ((rate = profile_ticks[i]) == 0)
484 				continue;
485 
486 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
487 			    PROF_PREFIX_TICK, rate);
488 			profile_create(NANOSEC / rate, n, PROF_TICK);
489 		}
490 
491 		return;
492 	}
493 
494 	name = desc->dtpd_name;
495 
496 	for (i = 0; types[i].prefix != NULL; i++) {
497 		len = strlen(types[i].prefix);
498 
499 		if (strncmp(name, types[i].prefix, len) != 0)
500 			continue;
501 		break;
502 	}
503 
504 	if (types[i].prefix == NULL)
505 		return;
506 
507 	kind = types[i].kind;
508 	j = strlen(name) - len;
509 
510 	/*
511 	 * We need to start before any time suffix.
512 	 */
513 	for (j = strlen(name); j >= len; j--) {
514 		if (name[j] >= '0' && name[j] <= '9')
515 			break;
516 		suffix = &name[j];
517 	}
518 
519 	ASSERT(suffix != NULL);
520 
521 	/*
522 	 * Now determine the numerical value present in the probe name.
523 	 */
524 	for (; j >= len; j--) {
525 		if (name[j] < '0' || name[j] > '9')
526 			return;
527 
528 		val += (name[j] - '0') * mult;
529 		mult *= (hrtime_t)10;
530 	}
531 
532 	if (val == 0)
533 		return;
534 
535 	/*
536 	 * Look-up the suffix to determine the multiplier.
537 	 */
538 	for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
539 		if (strcasecmp(suffixes[i].name, suffix) == 0) {
540 			mult = suffixes[i].mult;
541 			break;
542 		}
543 	}
544 
545 	if (suffixes[i].name == NULL && *suffix != '\0')
546 		return;
547 
548 	if (mult == 0) {
549 		/*
550 		 * The default is frequency-per-second.
551 		 */
552 		val = NANOSEC / val;
553 	} else {
554 		val *= mult;
555 	}
556 
557 	profile_create(val, name, kind);
558 }
559 
560 /* ARGSUSED */
561 static void
profile_destroy(void * arg,dtrace_id_t id,void * parg)562 profile_destroy(void *arg, dtrace_id_t id, void *parg)
563 {
564 	profile_probe_t *prof = parg;
565 
566 #ifdef __FreeBSD__
567 	ASSERT(!callout_active(&prof->prof_cyclic) && prof->prof_pcpus == NULL);
568 #else
569 	ASSERT(prof->prof_cyclic == CYCLIC_NONE);
570 #endif
571 	kmem_free(prof, sizeof (profile_probe_t));
572 
573 	ASSERT(profile_total >= 1);
574 	atomic_add_32(&profile_total, -1);
575 }
576 
577 #ifndef __FreeBSD__
578 
579 /*ARGSUSED*/
580 static void
profile_online(void * arg,cpu_t * cpu,cyc_handler_t * hdlr,cyc_time_t * when)581 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
582 {
583 	profile_probe_t *prof = arg;
584 	profile_probe_percpu_t *pcpu;
585 
586 	pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
587 	pcpu->profc_probe = prof;
588 
589 	hdlr->cyh_func = profile_fire;
590 	hdlr->cyh_arg = pcpu;
591 
592 	when->cyt_interval = prof->prof_interval;
593 	when->cyt_when = gethrtime() + when->cyt_interval;
594 
595 	pcpu->profc_expected = when->cyt_when;
596 	pcpu->profc_interval = when->cyt_interval;
597 }
598 
599 /*ARGSUSED*/
600 static void
profile_offline(void * arg,cpu_t * cpu,void * oarg)601 profile_offline(void *arg, cpu_t *cpu, void *oarg)
602 {
603 	profile_probe_percpu_t *pcpu = oarg;
604 
605 	ASSERT(pcpu->profc_probe == arg);
606 	kmem_free(pcpu, sizeof (profile_probe_percpu_t));
607 }
608 
609 /* ARGSUSED */
610 static int
profile_enable(void * arg,dtrace_id_t id,void * parg)611 profile_enable(void *arg, dtrace_id_t id, void *parg)
612 {
613 	profile_probe_t *prof = parg;
614 	cyc_omni_handler_t omni;
615 	cyc_handler_t hdlr;
616 	cyc_time_t when;
617 
618 	ASSERT(prof->prof_interval != 0);
619 	ASSERT(MUTEX_HELD(&cpu_lock));
620 
621 	if (prof->prof_kind == PROF_TICK) {
622 		hdlr.cyh_func = profile_tick;
623 		hdlr.cyh_arg = prof;
624 
625 		when.cyt_interval = prof->prof_interval;
626 		when.cyt_when = gethrtime() + when.cyt_interval;
627 	} else {
628 		ASSERT(prof->prof_kind == PROF_PROFILE);
629 		omni.cyo_online = profile_online;
630 		omni.cyo_offline = profile_offline;
631 		omni.cyo_arg = prof;
632 	}
633 
634 	if (prof->prof_kind == PROF_TICK) {
635 		prof->prof_cyclic = cyclic_add(&hdlr, &when);
636 	} else {
637 		prof->prof_cyclic = cyclic_add_omni(&omni);
638 	}
639 	return 0;
640 }
641 
642 /* ARGSUSED */
643 static void
profile_disable(void * arg,dtrace_id_t id,void * parg)644 profile_disable(void *arg, dtrace_id_t id, void *parg)
645 {
646 	profile_probe_t *prof = parg;
647 
648 	ASSERT(prof->prof_cyclic != CYCLIC_NONE);
649 	ASSERT(MUTEX_HELD(&cpu_lock));
650 
651 	cyclic_remove(prof->prof_cyclic);
652 	prof->prof_cyclic = CYCLIC_NONE;
653 }
654 
655 #else
656 
657 static void
profile_enable_omni(profile_probe_t * prof)658 profile_enable_omni(profile_probe_t *prof)
659 {
660 	profile_probe_percpu_t *pcpu;
661 	int cpu;
662 
663 	prof->prof_pcpus = kmem_zalloc((mp_maxid + 1) * sizeof(pcpu), KM_SLEEP);
664 	CPU_FOREACH(cpu) {
665 		pcpu = kmem_zalloc(sizeof(profile_probe_percpu_t), KM_SLEEP);
666 		prof->prof_pcpus[cpu] = pcpu;
667 		pcpu->profc_probe = prof;
668 		pcpu->profc_expected = sbinuptime() + prof->prof_interval;
669 		pcpu->profc_interval = prof->prof_interval;
670 		callout_init(&pcpu->profc_cyclic, 1);
671 		callout_reset_sbt_on(&pcpu->profc_cyclic,
672 		    pcpu->profc_expected, 0, profile_fire, pcpu,
673 		    cpu, C_DIRECT_EXEC | C_ABSOLUTE);
674 	}
675 }
676 
677 static void
profile_disable_omni(profile_probe_t * prof)678 profile_disable_omni(profile_probe_t *prof)
679 {
680 	profile_probe_percpu_t *pcpu;
681 	int cpu;
682 
683 	ASSERT(prof->prof_pcpus != NULL);
684 	CPU_FOREACH(cpu) {
685 		pcpu = prof->prof_pcpus[cpu];
686 		ASSERT(pcpu->profc_probe == prof);
687 		ASSERT(callout_active(&pcpu->profc_cyclic));
688 		callout_stop(&pcpu->profc_cyclic);
689 		callout_drain(&pcpu->profc_cyclic);
690 		kmem_free(pcpu, sizeof(profile_probe_percpu_t));
691 	}
692 	kmem_free(prof->prof_pcpus, (mp_maxid + 1) * sizeof(pcpu));
693 	prof->prof_pcpus = NULL;
694 }
695 
696 /* ARGSUSED */
697 static void
profile_enable(void * arg,dtrace_id_t id,void * parg)698 profile_enable(void *arg, dtrace_id_t id, void *parg)
699 {
700 	profile_probe_t *prof = parg;
701 
702 	if (prof->prof_kind == PROF_TICK) {
703 		prof->prof_expected = sbinuptime() + prof->prof_interval;
704 		callout_reset_sbt(&prof->prof_cyclic,
705 		    prof->prof_expected, 0, profile_tick, prof,
706 		    C_DIRECT_EXEC | C_ABSOLUTE);
707 	} else {
708 		ASSERT(prof->prof_kind == PROF_PROFILE);
709 		profile_enable_omni(prof);
710 	}
711 }
712 
713 /* ARGSUSED */
714 static void
profile_disable(void * arg,dtrace_id_t id,void * parg)715 profile_disable(void *arg, dtrace_id_t id, void *parg)
716 {
717 	profile_probe_t *prof = parg;
718 
719 	if (prof->prof_kind == PROF_TICK) {
720 		ASSERT(callout_active(&prof->prof_cyclic));
721 		callout_stop(&prof->prof_cyclic);
722 		callout_drain(&prof->prof_cyclic);
723 	} else {
724 		ASSERT(prof->prof_kind == PROF_PROFILE);
725 		profile_disable_omni(prof);
726 	}
727 }
728 #endif
729 
730 static void
profile_load(void * dummy)731 profile_load(void *dummy)
732 {
733 #ifdef __FreeBSD__
734 	/* Create the /dev/dtrace/profile entry. */
735 	profile_cdev = make_dev(&profile_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
736 	    "dtrace/profile");
737 #endif
738 
739 	if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER,
740 	    NULL, &profile_pops, NULL, &profile_id) != 0)
741 		return;
742 }
743 
744 
745 static int
profile_unload()746 profile_unload()
747 {
748 	int error = 0;
749 
750 	if ((error = dtrace_unregister(profile_id)) != 0)
751 		return (error);
752 
753 #ifdef __FreeBSD__
754 	destroy_dev(profile_cdev);
755 #endif
756 
757 	return (error);
758 }
759 
760 #ifdef __FreeBSD__
761 
762 /* ARGSUSED */
763 static int
profile_modevent(module_t mod __unused,int type,void * data __unused)764 profile_modevent(module_t mod __unused, int type, void *data __unused)
765 {
766 	int error = 0;
767 
768 	switch (type) {
769 	case MOD_LOAD:
770 		break;
771 
772 	case MOD_UNLOAD:
773 		break;
774 
775 	case MOD_SHUTDOWN:
776 		break;
777 
778 	default:
779 		error = EOPNOTSUPP;
780 		break;
781 
782 	}
783 	return (error);
784 }
785 
786 /* ARGSUSED */
787 static int
profile_open(struct cdev * dev __unused,int oflags __unused,int devtype __unused,struct thread * td __unused)788 profile_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
789 {
790 	return (0);
791 }
792 
793 SYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL);
794 SYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL);
795 
796 DEV_MODULE(profile, profile_modevent, NULL);
797 MODULE_VERSION(profile, 1);
798 MODULE_DEPEND(profile, dtrace, 1, 1, 1);
799 MODULE_DEPEND(profile, cyclic, 1, 1, 1);
800 MODULE_DEPEND(profile, opensolaris, 1, 1, 1);
801 
802 #endif
803 
804 #ifdef __NetBSD__
805 
806 static int
dtrace_profile_modcmd(modcmd_t cmd,void * data)807 dtrace_profile_modcmd(modcmd_t cmd, void *data)
808 {
809 	switch (cmd) {
810 	case MODULE_CMD_INIT:
811 		profile_load(NULL);
812 		return 0;
813 
814 	case MODULE_CMD_FINI:
815 		profile_unload();
816 		return 0;
817 
818 	case MODULE_CMD_AUTOUNLOAD:
819 		if (profile_total)
820 			return EBUSY;
821 		return 0;
822 
823 	default:
824 		return ENOTTY;
825 	}
826 }
827 
828 MODULE(MODULE_CLASS_MISC, dtrace_profile, "dtrace,cyclic");
829 
830 #endif
831