xref: /netbsd-src/external/cddl/osnet/dev/profile/profile.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: profile.c,v 1.4 2012/12/07 03:11:17 chs Exp $	*/
2 
3 /*
4  * CDDL HEADER START
5  *
6  * The contents of this file are subject to the terms of the
7  * Common Development and Distribution License (the "License").
8  * You may not use this file except in compliance with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  *
23  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
24  *
25  * $FreeBSD: src/sys/cddl/dev/profile/profile.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $
26  *
27  */
28 
29 /*
30  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
31  * Use is subject to license terms.
32  */
33 
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/conf.h>
38 #include <sys/cpuvar.h>
39 #include <sys/fcntl.h>
40 #include <sys/filio.h>
41 #ifdef __FreeBSD__
42 #include <sys/kdb.h>
43 #endif
44 #include <sys/kernel.h>
45 #include <sys/kmem.h>
46 #include <sys/kthread.h>
47 #include <sys/limits.h>
48 #include <sys/linker.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/poll.h>
54 #include <sys/proc.h>
55 #include <sys/selinfo.h>
56 #ifdef __FreeBSD__
57 #include <sys/smp.h>
58 #endif
59 #include <sys/uio.h>
60 #include <sys/unistd.h>
61 
62 #ifdef __NetBSD__
63 #include <sys/atomic.h>
64 #include <sys/cpu.h>
65 #define ASSERT(x) KASSERT(x)
66 #endif
67 
68 #include <sys/cyclic.h>
69 #include <sys/dtrace.h>
70 #include <sys/dtrace_bsd.h>
71 
72 #define	PROF_NAMELEN		15
73 
74 #define	PROF_PROFILE		0
75 #define	PROF_TICK		1
76 #define	PROF_PREFIX_PROFILE	"profile-"
77 #define	PROF_PREFIX_TICK	"tick-"
78 
79 /*
80  * Regardless of platform, there are five artificial frames in the case of the
81  * profile provider:
82  *
83  *	profile_fire
84  *	cyclic_expire
85  *	cyclic_fire
86  *	[ cbe ]
87  *	[ locore ]
88  *
89  * On amd64, there are two frames associated with locore:  one in locore, and
90  * another in common interrupt dispatch code.  (i386 has not been modified to
91  * use this common layer.)  Further, on i386, the interrupted instruction
92  * appears as its own stack frame.  All of this means that we need to add one
93  * frame for amd64, and then take one away for both amd64 and i386.
94  *
95  * On SPARC, the picture is further complicated because the compiler
96  * optimizes away tail-calls -- so the following frames are optimized away:
97  *
98  * 	profile_fire
99  *	cyclic_expire
100  *
101  * This gives three frames.  However, on DEBUG kernels, the cyclic_expire
102  * frame cannot be tail-call eliminated, yielding four frames in this case.
103  *
104  * All of the above constraints lead to the mess below.  Yes, the profile
105  * provider should ideally figure this out on-the-fly by hiting one of its own
106  * probes and then walking its own stack trace.  This is complicated, however,
107  * and the static definition doesn't seem to be overly brittle.  Still, we
108  * allow for a manual override in case we get it completely wrong.
109  */
110 #ifdef __FreeBSD__
111 #ifdef __amd64
112 #define	PROF_ARTIFICIAL_FRAMES	7
113 #else
114 #ifdef __i386
115 #define	PROF_ARTIFICIAL_FRAMES	6
116 #else
117 #ifdef __sparc
118 #ifdef DEBUG
119 #define	PROF_ARTIFICIAL_FRAMES	4
120 #else
121 #define	PROF_ARTIFICIAL_FRAMES	3
122 #endif
123 #endif
124 #endif
125 #endif
126 #endif
127 
128 #ifdef __NetBSD__
129 #define	PROF_ARTIFICIAL_FRAMES	3
130 #endif
131 
132 typedef struct profile_probe {
133 	char		prof_name[PROF_NAMELEN];
134 	dtrace_id_t	prof_id;
135 	int		prof_kind;
136 	hrtime_t	prof_interval;
137 	cyclic_id_t	prof_cyclic;
138 } profile_probe_t;
139 
140 typedef struct profile_probe_percpu {
141 	hrtime_t	profc_expected;
142 	hrtime_t	profc_interval;
143 	profile_probe_t	*profc_probe;
144 } profile_probe_percpu_t;
145 
146 #ifdef __FreeBSD__
147 static d_open_t	profile_open;
148 #endif
149 static int	profile_unload(void);
150 static void	profile_create(hrtime_t, char *, int);
151 static void	profile_destroy(void *, dtrace_id_t, void *);
152 static int	profile_enable(void *, dtrace_id_t, void *);
153 static void	profile_disable(void *, dtrace_id_t, void *);
154 static void	profile_load(void *);
155 static void	profile_provide(void *, const dtrace_probedesc_t *);
156 
157 static int profile_rates[] = {
158     97, 199, 499, 997, 1999,
159     4001, 4999, 0, 0, 0,
160     0, 0, 0, 0, 0,
161     0, 0, 0, 0, 0
162 };
163 
164 static int profile_ticks[] = {
165     1, 10, 100, 500, 1000,
166     5000, 0, 0, 0, 0,
167     0, 0, 0, 0, 0
168 };
169 
170 /*
171  * profile_max defines the upper bound on the number of profile probes that
172  * can exist (this is to prevent malicious or clumsy users from exhausing
173  * system resources by creating a slew of profile probes). At mod load time,
174  * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
175  * present in the profile.conf file.
176  */
177 #define	PROFILE_MAX_DEFAULT	1000	/* default max. number of probes */
178 static uint32_t profile_max = PROFILE_MAX_DEFAULT;
179 					/* maximum number of profile probes */
180 static uint32_t profile_total;		/* current number of profile probes */
181 
182 #ifdef __FreeBSD__
183 static struct cdevsw profile_cdevsw = {
184 	.d_version	= D_VERSION,
185 	.d_open		= profile_open,
186 	.d_name		= "profile",
187 };
188 #endif
189 
190 static dtrace_pattr_t profile_attr = {
191 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
192 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
193 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
194 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
195 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
196 };
197 
198 static dtrace_pops_t profile_pops = {
199 	profile_provide,
200 	NULL,
201 	profile_enable,
202 	profile_disable,
203 	NULL,
204 	NULL,
205 	NULL,
206 	NULL,
207 	NULL,
208 	profile_destroy
209 };
210 
211 #ifdef __FreeBSD__
212 static struct cdev		*profile_cdev;
213 #endif
214 static dtrace_provider_id_t	profile_id;
215 static hrtime_t			profile_interval_min = NANOSEC / 5000;	/* 5000 hz */
216 static int			profile_aframes = 0;			/* override */
217 
218 static void
219 profile_fire(void *arg)
220 {
221 	profile_probe_percpu_t *pcpu = arg;
222 	profile_probe_t *prof = pcpu->profc_probe;
223 	hrtime_t late;
224 	solaris_cpu_t *c = &solaris_cpu[cpu_number()];
225 
226 	late = gethrtime() - pcpu->profc_expected;
227 	pcpu->profc_expected += pcpu->profc_interval;
228 
229 	dtrace_probe(prof->prof_id, c->cpu_profile_pc,
230 	    c->cpu_profile_upc, late, 0, 0);
231 }
232 
233 static void
234 profile_tick(void *arg)
235 {
236 	profile_probe_t *prof = arg;
237 	solaris_cpu_t *c = &solaris_cpu[cpu_number()];
238 
239 	dtrace_probe(prof->prof_id, c->cpu_profile_pc,
240 	    c->cpu_profile_upc, 0, 0, 0);
241 }
242 
243 static void
244 profile_create(hrtime_t interval, char *name, int kind)
245 {
246 	profile_probe_t *prof;
247 
248 	if (interval < profile_interval_min)
249 		return;
250 
251 	if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
252 		return;
253 
254 	atomic_add_32(&profile_total, 1);
255 	if (profile_total > profile_max) {
256 		atomic_add_32(&profile_total, -1);
257 		return;
258 	}
259 
260 	prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
261 	(void) strcpy(prof->prof_name, name);
262 	prof->prof_interval = interval;
263 	prof->prof_cyclic = CYCLIC_NONE;
264 	prof->prof_kind = kind;
265 	prof->prof_id = dtrace_probe_create(profile_id,
266 	    NULL, NULL, name,
267 	    profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof);
268 }
269 
270 /*ARGSUSED*/
271 static void
272 profile_provide(void *arg, const dtrace_probedesc_t *desc)
273 {
274 	int i, j, rate, kind;
275 	hrtime_t val = 0, mult = 1, len = 0;
276 	char *name, *suffix = NULL;
277 
278 	const struct {
279 		const char *prefix;
280 		int kind;
281 	} types[] = {
282 		{ PROF_PREFIX_PROFILE, PROF_PROFILE },
283 		{ PROF_PREFIX_TICK, PROF_TICK },
284 		{ 0, 0 }
285 	};
286 
287 	const struct {
288 		const char *name;
289 		hrtime_t mult;
290 	} suffixes[] = {
291 		{ "ns", 	NANOSEC / NANOSEC },
292 		{ "nsec",	NANOSEC / NANOSEC },
293 		{ "us",		NANOSEC / MICROSEC },
294 		{ "usec",	NANOSEC / MICROSEC },
295 		{ "ms",		NANOSEC / MILLISEC },
296 		{ "msec",	NANOSEC / MILLISEC },
297 		{ "s",		NANOSEC / SEC },
298 		{ "sec",	NANOSEC / SEC },
299 		{ "m",		NANOSEC * (hrtime_t)60 },
300 		{ "min",	NANOSEC * (hrtime_t)60 },
301 		{ "h",		NANOSEC * (hrtime_t)(60 * 60) },
302 		{ "hour",	NANOSEC * (hrtime_t)(60 * 60) },
303 		{ "d",		NANOSEC * (hrtime_t)(24 * 60 * 60) },
304 		{ "day",	NANOSEC * (hrtime_t)(24 * 60 * 60) },
305 		{ "hz",		0 },
306 		{ NULL,		0 }
307 	};
308 
309 	if (desc == NULL) {
310 		char n[PROF_NAMELEN];
311 
312 		/*
313 		 * If no description was provided, provide all of our probes.
314 		 */
315 		for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
316 			if ((rate = profile_rates[i]) == 0)
317 				continue;
318 
319 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
320 			    PROF_PREFIX_PROFILE, rate);
321 			profile_create(NANOSEC / rate, n, PROF_PROFILE);
322 		}
323 
324 		for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
325 			if ((rate = profile_ticks[i]) == 0)
326 				continue;
327 
328 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
329 			    PROF_PREFIX_TICK, rate);
330 			profile_create(NANOSEC / rate, n, PROF_TICK);
331 		}
332 
333 		return;
334 	}
335 
336 	name = (char *)desc->dtpd_name;
337 
338 	for (i = 0; types[i].prefix != NULL; i++) {
339 		len = strlen(types[i].prefix);
340 
341 		if (strncmp(name, types[i].prefix, len) != 0)
342 			continue;
343 		break;
344 	}
345 
346 	if (types[i].prefix == NULL)
347 		return;
348 
349 	kind = types[i].kind;
350 	j = strlen(name) - len;
351 
352 	/*
353 	 * We need to start before any time suffix.
354 	 */
355 	for (j = strlen(name); j >= len; j--) {
356 		if (name[j] >= '0' && name[j] <= '9')
357 			break;
358 		suffix = &name[j];
359 	}
360 
361 	ASSERT(suffix != NULL);
362 
363 	/*
364 	 * Now determine the numerical value present in the probe name.
365 	 */
366 	for (; j >= len; j--) {
367 		if (name[j] < '0' || name[j] > '9')
368 			return;
369 
370 		val += (name[j] - '0') * mult;
371 		mult *= (hrtime_t)10;
372 	}
373 
374 	if (val == 0)
375 		return;
376 
377 	/*
378 	 * Look-up the suffix to determine the multiplier.
379 	 */
380 	for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
381 		if (strcasecmp(suffixes[i].name, suffix) == 0) {
382 			mult = suffixes[i].mult;
383 			break;
384 		}
385 	}
386 
387 	if (suffixes[i].name == NULL && *suffix != '\0')
388 		return;
389 
390 	if (mult == 0) {
391 		/*
392 		 * The default is frequency-per-second.
393 		 */
394 		val = NANOSEC / val;
395 	} else {
396 		val *= mult;
397 	}
398 
399 	profile_create(val, name, kind);
400 }
401 
402 /* ARGSUSED */
403 static void
404 profile_destroy(void *arg, dtrace_id_t id, void *parg)
405 {
406 	profile_probe_t *prof = parg;
407 
408 	ASSERT(prof->prof_cyclic == CYCLIC_NONE);
409 	kmem_free(prof, sizeof (profile_probe_t));
410 
411 	ASSERT(profile_total >= 1);
412 	atomic_add_32(&profile_total, -1);
413 }
414 
415 /*ARGSUSED*/
416 static void
417 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
418 {
419 	profile_probe_t *prof = arg;
420 	profile_probe_percpu_t *pcpu;
421 
422 	pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
423 	pcpu->profc_probe = prof;
424 
425 	hdlr->cyh_func = profile_fire;
426 	hdlr->cyh_arg = pcpu;
427 
428 	when->cyt_interval = prof->prof_interval;
429 	when->cyt_when = gethrtime() + when->cyt_interval;
430 
431 	pcpu->profc_expected = when->cyt_when;
432 	pcpu->profc_interval = when->cyt_interval;
433 }
434 
435 /*ARGSUSED*/
436 static void
437 profile_offline(void *arg, cpu_t *cpu, void *oarg)
438 {
439 	profile_probe_percpu_t *pcpu = oarg;
440 
441 	ASSERT(pcpu->profc_probe == arg);
442 	kmem_free(pcpu, sizeof (profile_probe_percpu_t));
443 }
444 
445 /* ARGSUSED */
446 static int
447 profile_enable(void *arg, dtrace_id_t id, void *parg)
448 {
449 	profile_probe_t *prof = parg;
450 	cyc_omni_handler_t omni;
451 	cyc_handler_t hdlr;
452 	cyc_time_t when;
453 
454 	ASSERT(prof->prof_interval != 0);
455 	ASSERT(MUTEX_HELD(&cpu_lock));
456 
457 	if (prof->prof_kind == PROF_TICK) {
458 		hdlr.cyh_func = profile_tick;
459 		hdlr.cyh_arg = prof;
460 
461 		when.cyt_interval = prof->prof_interval;
462 		when.cyt_when = gethrtime() + when.cyt_interval;
463 	} else {
464 		ASSERT(prof->prof_kind == PROF_PROFILE);
465 		omni.cyo_online = profile_online;
466 		omni.cyo_offline = profile_offline;
467 		omni.cyo_arg = prof;
468 	}
469 
470 	if (prof->prof_kind == PROF_TICK) {
471 		prof->prof_cyclic = cyclic_add(&hdlr, &when);
472 	} else {
473 		prof->prof_cyclic = cyclic_add_omni(&omni);
474 	}
475 	return 0;
476 }
477 
478 /* ARGSUSED */
479 static void
480 profile_disable(void *arg, dtrace_id_t id, void *parg)
481 {
482 	profile_probe_t *prof = parg;
483 
484 	ASSERT(prof->prof_cyclic != CYCLIC_NONE);
485 	ASSERT(MUTEX_HELD(&cpu_lock));
486 
487 	cyclic_remove(prof->prof_cyclic);
488 	prof->prof_cyclic = CYCLIC_NONE;
489 }
490 
491 static void
492 profile_load(void *dummy)
493 {
494 #ifdef __FreeBSD__
495 	/* Create the /dev/dtrace/profile entry. */
496 	profile_cdev = make_dev(&profile_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
497 	    "dtrace/profile");
498 #endif
499 
500 	if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER,
501 	    NULL, &profile_pops, NULL, &profile_id) != 0)
502 		return;
503 }
504 
505 
506 static int
507 profile_unload()
508 {
509 	int error = 0;
510 
511 	if ((error = dtrace_unregister(profile_id)) != 0)
512 		return (error);
513 
514 #ifdef __FreeBSD__
515 	destroy_dev(profile_cdev);
516 #endif
517 
518 	return (error);
519 }
520 
521 #ifdef __FreeBSD__
522 
523 /* ARGSUSED */
524 static int
525 profile_modevent(module_t mod __unused, int type, void *data __unused)
526 {
527 	int error = 0;
528 
529 	switch (type) {
530 	case MOD_LOAD:
531 		break;
532 
533 	case MOD_UNLOAD:
534 		break;
535 
536 	case MOD_SHUTDOWN:
537 		break;
538 
539 	default:
540 		error = EOPNOTSUPP;
541 		break;
542 
543 	}
544 	return (error);
545 }
546 
547 /* ARGSUSED */
548 static int
549 profile_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
550 {
551 	return (0);
552 }
553 
554 SYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL);
555 SYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL);
556 
557 DEV_MODULE(profile, profile_modevent, NULL);
558 MODULE_VERSION(profile, 1);
559 MODULE_DEPEND(profile, dtrace, 1, 1, 1);
560 MODULE_DEPEND(profile, cyclic, 1, 1, 1);
561 MODULE_DEPEND(profile, opensolaris, 1, 1, 1);
562 
563 #endif
564 
565 #ifdef __NetBSD__
566 
567 static int
568 profile_modcmd(modcmd_t cmd, void *data)
569 {
570 	switch (cmd) {
571 	case MODULE_CMD_INIT:
572 		profile_load(NULL);
573 		return 0;
574 
575 	case MODULE_CMD_FINI:
576 		profile_unload();
577 		return 0;
578 
579 	default:
580 		return ENOTTY;
581 	}
582 }
583 
584 MODULE(MODULE_CLASS_MISC, profile, "dtrace,cyclic");
585 
586 #endif
587