xref: /netbsd-src/external/cddl/osnet/dev/profile/profile.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*	$NetBSD: profile.c,v 1.3 2011/07/17 20:54:33 joerg Exp $	*/
2 
3 /*
4  * CDDL HEADER START
5  *
6  * The contents of this file are subject to the terms of the
7  * Common Development and Distribution License (the "License").
8  * You may not use this file except in compliance with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  *
23  * Portions Copyright 2006-2008 John Birrell jb@freebsd.org
24  *
25  * $FreeBSD: src/sys/cddl/dev/profile/profile.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $
26  *
27  */
28 
29 /*
30  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
31  * Use is subject to license terms.
32  */
33 
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/conf.h>
38 #include <sys/cpuvar.h>
39 #include <sys/fcntl.h>
40 #include <sys/filio.h>
41 #include <sys/kdb.h>
42 #include <sys/kernel.h>
43 #include <sys/kmem.h>
44 #include <sys/kthread.h>
45 #include <sys/limits.h>
46 #include <sys/linker.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/module.h>
50 #include <sys/mutex.h>
51 #include <sys/poll.h>
52 #include <sys/proc.h>
53 #include <sys/selinfo.h>
54 #include <sys/smp.h>
55 #include <sys/uio.h>
56 #include <sys/unistd.h>
57 
58 #include <sys/cyclic.h>
59 #include <sys/dtrace.h>
60 #include <sys/dtrace_bsd.h>
61 
62 #define	PROF_NAMELEN		15
63 
64 #define	PROF_PROFILE		0
65 #define	PROF_TICK		1
66 #define	PROF_PREFIX_PROFILE	"profile-"
67 #define	PROF_PREFIX_TICK	"tick-"
68 
69 /*
70  * Regardless of platform, there are five artificial frames in the case of the
71  * profile provider:
72  *
73  *	profile_fire
74  *	cyclic_expire
75  *	cyclic_fire
76  *	[ cbe ]
77  *	[ locore ]
78  *
79  * On amd64, there are two frames associated with locore:  one in locore, and
80  * another in common interrupt dispatch code.  (i386 has not been modified to
81  * use this common layer.)  Further, on i386, the interrupted instruction
82  * appears as its own stack frame.  All of this means that we need to add one
83  * frame for amd64, and then take one away for both amd64 and i386.
84  *
85  * On SPARC, the picture is further complicated because the compiler
86  * optimizes away tail-calls -- so the following frames are optimized away:
87  *
88  * 	profile_fire
89  *	cyclic_expire
90  *
91  * This gives three frames.  However, on DEBUG kernels, the cyclic_expire
92  * frame cannot be tail-call eliminated, yielding four frames in this case.
93  *
94  * All of the above constraints lead to the mess below.  Yes, the profile
95  * provider should ideally figure this out on-the-fly by hiting one of its own
96  * probes and then walking its own stack trace.  This is complicated, however,
97  * and the static definition doesn't seem to be overly brittle.  Still, we
98  * allow for a manual override in case we get it completely wrong.
99  */
100 #ifdef __amd64
101 #define	PROF_ARTIFICIAL_FRAMES	7
102 #else
103 #ifdef __i386
104 #define	PROF_ARTIFICIAL_FRAMES	6
105 #else
106 #ifdef __sparc
107 #ifdef DEBUG
108 #define	PROF_ARTIFICIAL_FRAMES	4
109 #else
110 #define	PROF_ARTIFICIAL_FRAMES	3
111 #endif
112 #endif
113 #endif
114 #endif
115 
116 typedef struct profile_probe {
117 	char		prof_name[PROF_NAMELEN];
118 	dtrace_id_t	prof_id;
119 	int		prof_kind;
120 	hrtime_t	prof_interval;
121 	cyclic_id_t	prof_cyclic;
122 } profile_probe_t;
123 
124 typedef struct profile_probe_percpu {
125 	hrtime_t	profc_expected;
126 	hrtime_t	profc_interval;
127 	profile_probe_t	*profc_probe;
128 } profile_probe_percpu_t;
129 
130 static d_open_t	profile_open;
131 static int	profile_unload(void);
132 static void	profile_create(hrtime_t, char *, int);
133 static void	profile_destroy(void *, dtrace_id_t, void *);
134 static void	profile_enable(void *, dtrace_id_t, void *);
135 static void	profile_disable(void *, dtrace_id_t, void *);
136 static void	profile_load(void *);
137 static void	profile_provide(void *, dtrace_probedesc_t *);
138 
139 static int profile_rates[] = {
140     97, 199, 499, 997, 1999,
141     4001, 4999, 0, 0, 0,
142     0, 0, 0, 0, 0,
143     0, 0, 0, 0, 0
144 };
145 
146 static int profile_ticks[] = {
147     1, 10, 100, 500, 1000,
148     5000, 0, 0, 0, 0,
149     0, 0, 0, 0, 0
150 };
151 
152 /*
153  * profile_max defines the upper bound on the number of profile probes that
154  * can exist (this is to prevent malicious or clumsy users from exhausing
155  * system resources by creating a slew of profile probes). At mod load time,
156  * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's
157  * present in the profile.conf file.
158  */
159 #define	PROFILE_MAX_DEFAULT	1000	/* default max. number of probes */
160 static uint32_t profile_max = PROFILE_MAX_DEFAULT;
161 					/* maximum number of profile probes */
162 static uint32_t profile_total;		/* current number of profile probes */
163 
164 static struct cdevsw profile_cdevsw = {
165 	.d_version	= D_VERSION,
166 	.d_open		= profile_open,
167 	.d_name		= "profile",
168 };
169 
170 static dtrace_pattr_t profile_attr = {
171 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
172 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
173 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
174 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
175 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
176 };
177 
178 static dtrace_pops_t profile_pops = {
179 	profile_provide,
180 	NULL,
181 	profile_enable,
182 	profile_disable,
183 	NULL,
184 	NULL,
185 	NULL,
186 	NULL,
187 	NULL,
188 	profile_destroy
189 };
190 
191 static struct cdev		*profile_cdev;
192 static dtrace_provider_id_t	profile_id;
193 static hrtime_t			profile_interval_min = NANOSEC / 5000;	/* 5000 hz */
194 static int			profile_aframes = 0;			/* override */
195 
196 static void
197 profile_fire(void *arg)
198 {
199 	profile_probe_percpu_t *pcpu = arg;
200 	profile_probe_t *prof = pcpu->profc_probe;
201 	hrtime_t late;
202 	solaris_cpu_t *c = &solaris_cpu[curcpu];
203 
204 	late = gethrtime() - pcpu->profc_expected;
205 	pcpu->profc_expected += pcpu->profc_interval;
206 
207 	dtrace_probe(prof->prof_id, c->cpu_profile_pc,
208 	    c->cpu_profile_upc, late, 0, 0);
209 }
210 
211 static void
212 profile_tick(void *arg)
213 {
214 	profile_probe_t *prof = arg;
215 	solaris_cpu_t *c = &solaris_cpu[curcpu];
216 
217 	dtrace_probe(prof->prof_id, c->cpu_profile_pc,
218 	    c->cpu_profile_upc, 0, 0, 0);
219 }
220 
221 static void
222 profile_create(hrtime_t interval, char *name, int kind)
223 {
224 	profile_probe_t *prof;
225 
226 	if (interval < profile_interval_min)
227 		return;
228 
229 	if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0)
230 		return;
231 
232 	atomic_add_32(&profile_total, 1);
233 	if (profile_total > profile_max) {
234 		atomic_add_32(&profile_total, -1);
235 		return;
236 	}
237 
238 	prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP);
239 	(void) strcpy(prof->prof_name, name);
240 	prof->prof_interval = interval;
241 	prof->prof_cyclic = CYCLIC_NONE;
242 	prof->prof_kind = kind;
243 	prof->prof_id = dtrace_probe_create(profile_id,
244 	    NULL, NULL, name,
245 	    profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof);
246 }
247 
248 /*ARGSUSED*/
249 static void
250 profile_provide(void *arg, dtrace_probedesc_t *desc)
251 {
252 	int i, j, rate, kind;
253 	hrtime_t val = 0, mult = 1, len = 0;
254 	char *name, *suffix = NULL;
255 
256 	const struct {
257 		char *prefix;
258 		int kind;
259 	} types[] = {
260 		{ PROF_PREFIX_PROFILE, PROF_PROFILE },
261 		{ PROF_PREFIX_TICK, PROF_TICK },
262 		{ 0, 0 }
263 	};
264 
265 	const struct {
266 		char *name;
267 		hrtime_t mult;
268 	} suffixes[] = {
269 		{ "ns", 	NANOSEC / NANOSEC },
270 		{ "nsec",	NANOSEC / NANOSEC },
271 		{ "us",		NANOSEC / MICROSEC },
272 		{ "usec",	NANOSEC / MICROSEC },
273 		{ "ms",		NANOSEC / MILLISEC },
274 		{ "msec",	NANOSEC / MILLISEC },
275 		{ "s",		NANOSEC / SEC },
276 		{ "sec",	NANOSEC / SEC },
277 		{ "m",		NANOSEC * (hrtime_t)60 },
278 		{ "min",	NANOSEC * (hrtime_t)60 },
279 		{ "h",		NANOSEC * (hrtime_t)(60 * 60) },
280 		{ "hour",	NANOSEC * (hrtime_t)(60 * 60) },
281 		{ "d",		NANOSEC * (hrtime_t)(24 * 60 * 60) },
282 		{ "day",	NANOSEC * (hrtime_t)(24 * 60 * 60) },
283 		{ "hz",		0 },
284 		{ NULL }
285 	};
286 
287 	if (desc == NULL) {
288 		char n[PROF_NAMELEN];
289 
290 		/*
291 		 * If no description was provided, provide all of our probes.
292 		 */
293 		for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) {
294 			if ((rate = profile_rates[i]) == 0)
295 				continue;
296 
297 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
298 			    PROF_PREFIX_PROFILE, rate);
299 			profile_create(NANOSEC / rate, n, PROF_PROFILE);
300 		}
301 
302 		for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) {
303 			if ((rate = profile_ticks[i]) == 0)
304 				continue;
305 
306 			(void) snprintf(n, PROF_NAMELEN, "%s%d",
307 			    PROF_PREFIX_TICK, rate);
308 			profile_create(NANOSEC / rate, n, PROF_TICK);
309 		}
310 
311 		return;
312 	}
313 
314 	name = desc->dtpd_name;
315 
316 	for (i = 0; types[i].prefix != NULL; i++) {
317 		len = strlen(types[i].prefix);
318 
319 		if (strncmp(name, types[i].prefix, len) != 0)
320 			continue;
321 		break;
322 	}
323 
324 	if (types[i].prefix == NULL)
325 		return;
326 
327 	kind = types[i].kind;
328 	j = strlen(name) - len;
329 
330 	/*
331 	 * We need to start before any time suffix.
332 	 */
333 	for (j = strlen(name); j >= len; j--) {
334 		if (name[j] >= '0' && name[j] <= '9')
335 			break;
336 		suffix = &name[j];
337 	}
338 
339 	ASSERT(suffix != NULL);
340 
341 	/*
342 	 * Now determine the numerical value present in the probe name.
343 	 */
344 	for (; j >= len; j--) {
345 		if (name[j] < '0' || name[j] > '9')
346 			return;
347 
348 		val += (name[j] - '0') * mult;
349 		mult *= (hrtime_t)10;
350 	}
351 
352 	if (val == 0)
353 		return;
354 
355 	/*
356 	 * Look-up the suffix to determine the multiplier.
357 	 */
358 	for (i = 0, mult = 0; suffixes[i].name != NULL; i++) {
359 		if (strcasecmp(suffixes[i].name, suffix) == 0) {
360 			mult = suffixes[i].mult;
361 			break;
362 		}
363 	}
364 
365 	if (suffixes[i].name == NULL && *suffix != '\0')
366 		return;
367 
368 	if (mult == 0) {
369 		/*
370 		 * The default is frequency-per-second.
371 		 */
372 		val = NANOSEC / val;
373 	} else {
374 		val *= mult;
375 	}
376 
377 	profile_create(val, name, kind);
378 }
379 
380 /* ARGSUSED */
381 static void
382 profile_destroy(void *arg, dtrace_id_t id, void *parg)
383 {
384 	profile_probe_t *prof = parg;
385 
386 	ASSERT(prof->prof_cyclic == CYCLIC_NONE);
387 	kmem_free(prof, sizeof (profile_probe_t));
388 
389 	ASSERT(profile_total >= 1);
390 	atomic_add_32(&profile_total, -1);
391 }
392 
393 /*ARGSUSED*/
394 static void
395 profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
396 {
397 	profile_probe_t *prof = arg;
398 	profile_probe_percpu_t *pcpu;
399 
400 	pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP);
401 	pcpu->profc_probe = prof;
402 
403 	hdlr->cyh_func = profile_fire;
404 	hdlr->cyh_arg = pcpu;
405 
406 	when->cyt_interval = prof->prof_interval;
407 	when->cyt_when = gethrtime() + when->cyt_interval;
408 
409 	pcpu->profc_expected = when->cyt_when;
410 	pcpu->profc_interval = when->cyt_interval;
411 }
412 
413 /*ARGSUSED*/
414 static void
415 profile_offline(void *arg, cpu_t *cpu, void *oarg)
416 {
417 	profile_probe_percpu_t *pcpu = oarg;
418 
419 	ASSERT(pcpu->profc_probe == arg);
420 	kmem_free(pcpu, sizeof (profile_probe_percpu_t));
421 }
422 
423 /* ARGSUSED */
424 static void
425 profile_enable(void *arg, dtrace_id_t id, void *parg)
426 {
427 	profile_probe_t *prof = parg;
428 	cyc_omni_handler_t omni;
429 	cyc_handler_t hdlr;
430 	cyc_time_t when;
431 
432 	ASSERT(prof->prof_interval != 0);
433 	ASSERT(MUTEX_HELD(&cpu_lock));
434 
435 	if (prof->prof_kind == PROF_TICK) {
436 		hdlr.cyh_func = profile_tick;
437 		hdlr.cyh_arg = prof;
438 
439 		when.cyt_interval = prof->prof_interval;
440 		when.cyt_when = gethrtime() + when.cyt_interval;
441 	} else {
442 		ASSERT(prof->prof_kind == PROF_PROFILE);
443 		omni.cyo_online = profile_online;
444 		omni.cyo_offline = profile_offline;
445 		omni.cyo_arg = prof;
446 	}
447 
448 	if (prof->prof_kind == PROF_TICK) {
449 		prof->prof_cyclic = cyclic_add(&hdlr, &when);
450 	} else {
451 		prof->prof_cyclic = cyclic_add_omni(&omni);
452 	}
453 }
454 
455 /* ARGSUSED */
456 static void
457 profile_disable(void *arg, dtrace_id_t id, void *parg)
458 {
459 	profile_probe_t *prof = parg;
460 
461 	ASSERT(prof->prof_cyclic != CYCLIC_NONE);
462 	ASSERT(MUTEX_HELD(&cpu_lock));
463 
464 	cyclic_remove(prof->prof_cyclic);
465 	prof->prof_cyclic = CYCLIC_NONE;
466 }
467 
468 static void
469 profile_load(void *dummy)
470 {
471 	/* Create the /dev/dtrace/profile entry. */
472 	profile_cdev = make_dev(&profile_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
473 	    "dtrace/profile");
474 
475 	if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER,
476 	    NULL, &profile_pops, NULL, &profile_id) != 0)
477 		return;
478 }
479 
480 
481 static int
482 profile_unload()
483 {
484 	int error = 0;
485 
486 	if ((error = dtrace_unregister(profile_id)) != 0)
487 		return (error);
488 
489 	destroy_dev(profile_cdev);
490 
491 	return (error);
492 }
493 
494 /* ARGSUSED */
495 static int
496 profile_modevent(module_t mod __unused, int type, void *data __unused)
497 {
498 	int error = 0;
499 
500 	switch (type) {
501 	case MOD_LOAD:
502 		break;
503 
504 	case MOD_UNLOAD:
505 		break;
506 
507 	case MOD_SHUTDOWN:
508 		break;
509 
510 	default:
511 		error = EOPNOTSUPP;
512 		break;
513 
514 	}
515 	return (error);
516 }
517 
518 /* ARGSUSED */
519 static int
520 profile_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused)
521 {
522 	return (0);
523 }
524 
525 SYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL);
526 SYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL);
527 
528 DEV_MODULE(profile, profile_modevent, NULL);
529 MODULE_VERSION(profile, 1);
530 MODULE_DEPEND(profile, dtrace, 1, 1, 1);
531 MODULE_DEPEND(profile, cyclic, 1, 1, 1);
532 MODULE_DEPEND(profile, opensolaris, 1, 1, 1);
533