xref: /netbsd-src/sys/dev/tprof/tprof.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*	$NetBSD: tprof.c,v 1.2 2008/05/07 08:48:11 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c)2008 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.2 2008/05/07 08:48:11 yamt Exp $");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 
36 #include <sys/cpu.h>
37 #include <sys/conf.h>
38 #include <sys/callout.h>
39 #include <sys/kmem.h>
40 #include <sys/workqueue.h>
41 #include <sys/queue.h>
42 
43 #include <dev/tprof/tprof.h>
44 #include <dev/tprof/tprof_ioctl.h>
45 
46 #include <machine/db_machdep.h> /* PC_REGS */
47 
48 typedef struct {
49 	uintptr_t s_pc;	/* program counter */
50 } tprof_sample_t;
51 
52 typedef struct tprof_buf {
53 	u_int b_used;
54 	u_int b_size;
55 	u_int b_overflow;
56 	u_int b_unused;
57 	STAILQ_ENTRY(tprof_buf) b_list;
58 	tprof_sample_t b_data[];
59 } tprof_buf_t;
60 #define	TPROF_BUF_BYTESIZE(sz) \
61 	(sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
62 #define	TPROF_MAX_SAMPLES_PER_BUF	10000
63 
64 #define	TPROF_MAX_BUF			100
65 
66 typedef struct {
67 	tprof_buf_t *c_buf;
68 	struct work c_work;
69 	callout_t c_callout;
70 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
71 
72 static kmutex_t tprof_lock;
73 static bool tprof_running;
74 static u_int tprof_nworker;
75 static lwp_t *tprof_owner;
76 static STAILQ_HEAD(, tprof_buf) tprof_list;
77 static u_int tprof_nbuf_on_list;
78 static struct workqueue *tprof_wq;
79 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
80 static u_int tprof_samples_per_buf;
81 
82 static kmutex_t tprof_reader_lock;
83 static kcondvar_t tprof_reader_cv;
84 static off_t tprof_reader_offset;
85 
86 static kmutex_t tprof_startstop_lock;
87 static kcondvar_t tprof_cv;
88 
89 static struct tprof_stat tprof_stat;
90 
91 static tprof_cpu_t *
92 tprof_cpu(struct cpu_info *ci)
93 {
94 
95 	return &tprof_cpus[cpu_index(ci)];
96 }
97 
98 static tprof_cpu_t *
99 tprof_curcpu(void)
100 {
101 
102 	return tprof_cpu(curcpu());
103 }
104 
105 static tprof_buf_t *
106 tprof_buf_alloc(void)
107 {
108 	tprof_buf_t *new;
109 	u_int size = tprof_samples_per_buf;
110 
111 	new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
112 	new->b_used = 0;
113 	new->b_size = size;
114 	new->b_overflow = 0;
115 	return new;
116 }
117 
118 static void
119 tprof_buf_free(tprof_buf_t *buf)
120 {
121 
122 	kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
123 }
124 
125 static tprof_buf_t *
126 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
127 {
128 	tprof_buf_t *old;
129 
130 	old = c->c_buf;
131 	c->c_buf = new;
132 	return old;
133 }
134 
135 static tprof_buf_t *
136 tprof_buf_refresh(void)
137 {
138 	tprof_cpu_t * const c = tprof_curcpu();
139 	tprof_buf_t *new;
140 
141 	new = tprof_buf_alloc();
142 	return tprof_buf_switch(c, new);
143 }
144 
145 static void
146 tprof_worker(struct work *wk, void *dummy)
147 {
148 	tprof_cpu_t * const c = tprof_curcpu();
149 	tprof_buf_t *buf;
150 	bool shouldstop;
151 
152 	KASSERT(wk == &c->c_work);
153 	KASSERT(dummy == NULL);
154 
155 	/*
156 	 * get a per cpu buffer.
157 	 */
158 	buf = tprof_buf_refresh();
159 
160 	/*
161 	 * and put it on the global list for read(2).
162 	 */
163 	mutex_enter(&tprof_lock);
164 	shouldstop = !tprof_running;
165 	if (shouldstop) {
166 		KASSERT(tprof_nworker > 0);
167 		tprof_nworker--;
168 		cv_broadcast(&tprof_cv);
169 		cv_broadcast(&tprof_reader_cv);
170 	}
171 	if (buf->b_used == 0) {
172 		tprof_stat.ts_emptybuf++;
173 	} else if (tprof_nbuf_on_list < TPROF_MAX_BUF) {
174 		tprof_stat.ts_sample += buf->b_used;
175 		tprof_stat.ts_overflow += buf->b_overflow;
176 		tprof_stat.ts_buf++;
177 		STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
178 		tprof_nbuf_on_list++;
179 		buf = NULL;
180 		cv_broadcast(&tprof_reader_cv);
181 	} else {
182 		tprof_stat.ts_dropbuf_sample += buf->b_used;
183 		tprof_stat.ts_dropbuf++;
184 	}
185 	mutex_exit(&tprof_lock);
186 	if (buf) {
187 		tprof_buf_free(buf);
188 	}
189 	if (!shouldstop) {
190 		callout_schedule(&c->c_callout, hz);
191 	}
192 }
193 
194 static void
195 tprof_kick(void *vp)
196 {
197 	struct cpu_info * const ci = vp;
198 	tprof_cpu_t * const c = tprof_cpu(ci);
199 
200 	workqueue_enqueue(tprof_wq, &c->c_work, ci);
201 }
202 
203 static void
204 tprof_stop1(void)
205 {
206 	CPU_INFO_ITERATOR cii;
207 	struct cpu_info *ci;
208 
209 	KASSERT(mutex_owned(&tprof_startstop_lock));
210 
211 	for (CPU_INFO_FOREACH(cii, ci)) {
212 		tprof_cpu_t * const c = tprof_cpu(ci);
213 		tprof_buf_t *old;
214 
215 		old = tprof_buf_switch(c, NULL);
216 		if (old != NULL) {
217 			tprof_buf_free(old);
218 		}
219 		callout_destroy(&c->c_callout);
220 	}
221 	workqueue_destroy(tprof_wq);
222 }
223 
224 static int
225 tprof_start(const struct tprof_param *param)
226 {
227 	CPU_INFO_ITERATOR cii;
228 	struct cpu_info *ci;
229 	int error;
230 	uint64_t freq;
231 
232 	KASSERT(mutex_owned(&tprof_startstop_lock));
233 	if (tprof_running) {
234 		error = EBUSY;
235 		goto done;
236 	}
237 
238 	freq = tprof_backend_estimate_freq();
239 	tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF);
240 
241 	error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL,
242 	    PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
243 	if (error != 0) {
244 		goto done;
245 	}
246 
247 	for (CPU_INFO_FOREACH(cii, ci)) {
248 		tprof_cpu_t * const c = tprof_cpu(ci);
249 		tprof_buf_t *new;
250 		tprof_buf_t *old;
251 
252 		new = tprof_buf_alloc();
253 		old = tprof_buf_switch(c, new);
254 		if (old != NULL) {
255 			tprof_buf_free(old);
256 		}
257 		callout_init(&c->c_callout, CALLOUT_MPSAFE);
258 		callout_setfunc(&c->c_callout, tprof_kick, ci);
259 	}
260 
261 	error = tprof_backend_start();
262 	if (error != 0) {
263 		tprof_stop1();
264 		goto done;
265 	}
266 
267 	mutex_enter(&tprof_lock);
268 	tprof_running = true;
269 	mutex_exit(&tprof_lock);
270 	for (CPU_INFO_FOREACH(cii, ci)) {
271 		tprof_cpu_t * const c = tprof_cpu(ci);
272 
273 		mutex_enter(&tprof_lock);
274 		tprof_nworker++;
275 		mutex_exit(&tprof_lock);
276 		workqueue_enqueue(tprof_wq, &c->c_work, ci);
277 	}
278 done:
279 	return error;
280 }
281 
282 static void
283 tprof_stop(void)
284 {
285 	CPU_INFO_ITERATOR cii;
286 	struct cpu_info *ci;
287 
288 	KASSERT(mutex_owned(&tprof_startstop_lock));
289 	if (!tprof_running) {
290 		goto done;
291 	}
292 
293 	tprof_backend_stop();
294 
295 	mutex_enter(&tprof_lock);
296 	tprof_running = false;
297 	cv_broadcast(&tprof_reader_cv);
298 	mutex_exit(&tprof_lock);
299 
300 	for (CPU_INFO_FOREACH(cii, ci)) {
301 		mutex_enter(&tprof_lock);
302 		while (tprof_nworker > 0) {
303 			cv_wait(&tprof_cv, &tprof_lock);
304 		}
305 		mutex_exit(&tprof_lock);
306 	}
307 
308 	tprof_stop1();
309 done:
310 	;
311 }
312 
313 static void
314 tprof_clear(void)
315 {
316 	tprof_buf_t *buf;
317 
318 	mutex_enter(&tprof_reader_lock);
319 	mutex_enter(&tprof_lock);
320 	while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
321 		if (buf != NULL) {
322 			STAILQ_REMOVE_HEAD(&tprof_list, b_list);
323 			KASSERT(tprof_nbuf_on_list > 0);
324 			tprof_nbuf_on_list--;
325 			mutex_exit(&tprof_lock);
326 			tprof_buf_free(buf);
327 			mutex_enter(&tprof_lock);
328 		}
329 	}
330 	KASSERT(tprof_nbuf_on_list == 0);
331 	mutex_exit(&tprof_lock);
332 	tprof_reader_offset = 0;
333 	mutex_exit(&tprof_reader_lock);
334 
335 	memset(&tprof_stat, 0, sizeof(tprof_stat));
336 }
337 
338 /* -------------------- backend interfaces */
339 
340 /*
341  * tprof_sample: record a sample on the per-cpu buffer.
342  *
343  * be careful; can be called in NMI context.
344  * we are assuming that curcpu() is safe.
345  */
346 
347 void
348 tprof_sample(const struct trapframe *tf)
349 {
350 	tprof_cpu_t * const c = tprof_curcpu();
351 	tprof_buf_t * const buf = c->c_buf;
352 	const uintptr_t pc = PC_REGS(tf);
353 	u_int idx;
354 
355 	idx = buf->b_used;
356 	if (__predict_false(idx >= buf->b_size)) {
357 		buf->b_overflow++;
358 		return;
359 	}
360 	buf->b_data[idx].s_pc = pc;
361 	buf->b_used = idx + 1;
362 }
363 
364 /* -------------------- cdevsw interfaces */
365 
366 void tprofattach(int);
367 
368 static int
369 tprof_open(dev_t dev, int flags, int type, struct lwp *l)
370 {
371 
372 	if (minor(dev) != 0) {
373 		return EXDEV;
374 	}
375 	mutex_enter(&tprof_lock);
376 	if (tprof_owner != NULL) {
377 		mutex_exit(&tprof_lock);
378 		return  EBUSY;
379 	}
380 	tprof_owner = curlwp;
381 	mutex_exit(&tprof_lock);
382 
383 	return 0;
384 }
385 
386 static int
387 tprof_close(dev_t dev, int flags, int type, struct lwp *l)
388 {
389 
390 	KASSERT(minor(dev) == 0);
391 
392 	mutex_enter(&tprof_startstop_lock);
393 	mutex_enter(&tprof_lock);
394 	tprof_owner = NULL;
395 	mutex_exit(&tprof_lock);
396 	tprof_stop();
397 	tprof_clear();
398 	mutex_exit(&tprof_startstop_lock);
399 
400 	return 0;
401 }
402 
403 static int
404 tprof_read(dev_t dev, struct uio *uio, int flags)
405 {
406 	tprof_buf_t *buf;
407 	size_t bytes;
408 	size_t resid;
409 	size_t done;
410 	int error = 0;
411 
412 	KASSERT(minor(dev) == 0);
413 	mutex_enter(&tprof_reader_lock);
414 	while (uio->uio_resid > 0 && error == 0) {
415 		/*
416 		 * take the first buffer from the list.
417 		 */
418 		mutex_enter(&tprof_lock);
419 		buf = STAILQ_FIRST(&tprof_list);
420 		if (buf == NULL) {
421 			if (tprof_nworker == 0) {
422 				mutex_exit(&tprof_lock);
423 				error = 0;
424 				break;
425 			}
426 			mutex_exit(&tprof_reader_lock);
427 			error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
428 			mutex_exit(&tprof_lock);
429 			mutex_enter(&tprof_reader_lock);
430 			continue;
431 		}
432 		STAILQ_REMOVE_HEAD(&tprof_list, b_list);
433 		KASSERT(tprof_nbuf_on_list > 0);
434 		tprof_nbuf_on_list--;
435 		mutex_exit(&tprof_lock);
436 
437 		/*
438 		 * copy it out.
439 		 */
440 		bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
441 		    tprof_reader_offset, uio->uio_resid);
442 		resid = uio->uio_resid;
443 		error = uiomove((char *)buf->b_data + tprof_reader_offset,
444 		    bytes, uio);
445 		done = resid - uio->uio_resid;
446 		tprof_reader_offset += done;
447 
448 		/*
449 		 * if we didn't consume the whole buffer,
450 		 * put it back to the list.
451 		 */
452 		if (tprof_reader_offset <
453 		    buf->b_used * sizeof(tprof_sample_t)) {
454 			mutex_enter(&tprof_lock);
455 			STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
456 			tprof_nbuf_on_list++;
457 			cv_broadcast(&tprof_reader_cv);
458 			mutex_exit(&tprof_lock);
459 		} else {
460 			tprof_buf_free(buf);
461 			tprof_reader_offset = 0;
462 		}
463 	}
464 	mutex_exit(&tprof_reader_lock);
465 
466 	return error;
467 }
468 
469 static int
470 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
471 {
472 	const struct tprof_param *param;
473 	int error = 0;
474 
475 	KASSERT(minor(dev) == 0);
476 
477 	switch (cmd) {
478 	case TPROF_IOC_GETVERSION:
479 		*(int *)data = TPROF_VERSION;
480 		break;
481 	case TPROF_IOC_START:
482 		param = data;
483 		mutex_enter(&tprof_startstop_lock);
484 		error = tprof_start(param);
485 		mutex_exit(&tprof_startstop_lock);
486 		break;
487 	case TPROF_IOC_STOP:
488 		mutex_enter(&tprof_startstop_lock);
489 		tprof_stop();
490 		mutex_exit(&tprof_startstop_lock);
491 		break;
492 	case TPROF_IOC_GETSTAT:
493 		mutex_enter(&tprof_lock);
494 		memcpy(data, &tprof_stat, sizeof(tprof_stat));
495 		mutex_exit(&tprof_lock);
496 		break;
497 	default:
498 		error = EINVAL;
499 		break;
500 	}
501 
502 	return error;
503 }
504 
505 const struct cdevsw tprof_cdevsw = {
506 	.d_open = tprof_open,
507 	.d_close = tprof_close,
508 	.d_read = tprof_read,
509 	.d_write = nowrite,
510 	.d_ioctl = tprof_ioctl,
511 	.d_stop = nostop,
512 	.d_tty = notty,
513 	.d_poll = nopoll,
514 	.d_mmap = nommap,
515 	.d_kqfilter = nokqfilter,
516 	.d_flag = D_OTHER | D_MPSAFE,
517 };
518 
519 void
520 tprofattach(int nunits)
521 {
522 
523 	mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
524 	mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
525 	mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
526 	cv_init(&tprof_cv, "tprof");
527 	cv_init(&tprof_reader_cv, "tprofread");
528 	STAILQ_INIT(&tprof_list);
529 }
530