1 /* $NetBSD: tprof.c,v 1.2 2008/05/07 08:48:11 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)2008 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.2 2008/05/07 08:48:11 yamt Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 36 #include <sys/cpu.h> 37 #include <sys/conf.h> 38 #include <sys/callout.h> 39 #include <sys/kmem.h> 40 #include <sys/workqueue.h> 41 #include <sys/queue.h> 42 43 #include <dev/tprof/tprof.h> 44 #include <dev/tprof/tprof_ioctl.h> 45 46 #include <machine/db_machdep.h> /* PC_REGS */ 47 48 typedef struct { 49 uintptr_t s_pc; /* program counter */ 50 } tprof_sample_t; 51 52 typedef struct tprof_buf { 53 u_int b_used; 54 u_int b_size; 55 u_int b_overflow; 56 u_int b_unused; 57 STAILQ_ENTRY(tprof_buf) b_list; 58 tprof_sample_t b_data[]; 59 } tprof_buf_t; 60 #define TPROF_BUF_BYTESIZE(sz) \ 61 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t)) 62 #define TPROF_MAX_SAMPLES_PER_BUF 10000 63 64 #define TPROF_MAX_BUF 100 65 66 typedef struct { 67 tprof_buf_t *c_buf; 68 struct work c_work; 69 callout_t c_callout; 70 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t; 71 72 static kmutex_t tprof_lock; 73 static bool tprof_running; 74 static u_int tprof_nworker; 75 static lwp_t *tprof_owner; 76 static STAILQ_HEAD(, tprof_buf) tprof_list; 77 static u_int tprof_nbuf_on_list; 78 static struct workqueue *tprof_wq; 79 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE); 80 static u_int tprof_samples_per_buf; 81 82 static kmutex_t tprof_reader_lock; 83 static kcondvar_t tprof_reader_cv; 84 static off_t tprof_reader_offset; 85 86 static kmutex_t tprof_startstop_lock; 87 static kcondvar_t tprof_cv; 88 89 static struct tprof_stat tprof_stat; 90 91 static tprof_cpu_t * 92 tprof_cpu(struct cpu_info *ci) 93 { 94 95 return &tprof_cpus[cpu_index(ci)]; 96 } 97 98 static tprof_cpu_t * 99 tprof_curcpu(void) 100 { 101 102 return tprof_cpu(curcpu()); 103 } 104 105 static tprof_buf_t * 106 tprof_buf_alloc(void) 107 { 108 tprof_buf_t *new; 109 u_int size = tprof_samples_per_buf; 110 111 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP); 112 new->b_used = 0; 113 new->b_size = size; 114 new->b_overflow = 0; 115 return new; 116 } 117 118 static void 119 tprof_buf_free(tprof_buf_t *buf) 120 { 121 122 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size)); 123 } 124 125 static tprof_buf_t * 126 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new) 127 { 128 tprof_buf_t *old; 129 130 old = c->c_buf; 131 c->c_buf = new; 132 return old; 133 } 134 135 static tprof_buf_t * 136 tprof_buf_refresh(void) 137 { 138 tprof_cpu_t * const c = tprof_curcpu(); 139 tprof_buf_t *new; 140 141 new = tprof_buf_alloc(); 142 return tprof_buf_switch(c, new); 143 } 144 145 static void 146 tprof_worker(struct work *wk, void *dummy) 147 { 148 tprof_cpu_t * const c = tprof_curcpu(); 149 tprof_buf_t *buf; 150 bool shouldstop; 151 152 KASSERT(wk == &c->c_work); 153 KASSERT(dummy == NULL); 154 155 /* 156 * get a per cpu buffer. 157 */ 158 buf = tprof_buf_refresh(); 159 160 /* 161 * and put it on the global list for read(2). 162 */ 163 mutex_enter(&tprof_lock); 164 shouldstop = !tprof_running; 165 if (shouldstop) { 166 KASSERT(tprof_nworker > 0); 167 tprof_nworker--; 168 cv_broadcast(&tprof_cv); 169 cv_broadcast(&tprof_reader_cv); 170 } 171 if (buf->b_used == 0) { 172 tprof_stat.ts_emptybuf++; 173 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) { 174 tprof_stat.ts_sample += buf->b_used; 175 tprof_stat.ts_overflow += buf->b_overflow; 176 tprof_stat.ts_buf++; 177 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list); 178 tprof_nbuf_on_list++; 179 buf = NULL; 180 cv_broadcast(&tprof_reader_cv); 181 } else { 182 tprof_stat.ts_dropbuf_sample += buf->b_used; 183 tprof_stat.ts_dropbuf++; 184 } 185 mutex_exit(&tprof_lock); 186 if (buf) { 187 tprof_buf_free(buf); 188 } 189 if (!shouldstop) { 190 callout_schedule(&c->c_callout, hz); 191 } 192 } 193 194 static void 195 tprof_kick(void *vp) 196 { 197 struct cpu_info * const ci = vp; 198 tprof_cpu_t * const c = tprof_cpu(ci); 199 200 workqueue_enqueue(tprof_wq, &c->c_work, ci); 201 } 202 203 static void 204 tprof_stop1(void) 205 { 206 CPU_INFO_ITERATOR cii; 207 struct cpu_info *ci; 208 209 KASSERT(mutex_owned(&tprof_startstop_lock)); 210 211 for (CPU_INFO_FOREACH(cii, ci)) { 212 tprof_cpu_t * const c = tprof_cpu(ci); 213 tprof_buf_t *old; 214 215 old = tprof_buf_switch(c, NULL); 216 if (old != NULL) { 217 tprof_buf_free(old); 218 } 219 callout_destroy(&c->c_callout); 220 } 221 workqueue_destroy(tprof_wq); 222 } 223 224 static int 225 tprof_start(const struct tprof_param *param) 226 { 227 CPU_INFO_ITERATOR cii; 228 struct cpu_info *ci; 229 int error; 230 uint64_t freq; 231 232 KASSERT(mutex_owned(&tprof_startstop_lock)); 233 if (tprof_running) { 234 error = EBUSY; 235 goto done; 236 } 237 238 freq = tprof_backend_estimate_freq(); 239 tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF); 240 241 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL, 242 PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); 243 if (error != 0) { 244 goto done; 245 } 246 247 for (CPU_INFO_FOREACH(cii, ci)) { 248 tprof_cpu_t * const c = tprof_cpu(ci); 249 tprof_buf_t *new; 250 tprof_buf_t *old; 251 252 new = tprof_buf_alloc(); 253 old = tprof_buf_switch(c, new); 254 if (old != NULL) { 255 tprof_buf_free(old); 256 } 257 callout_init(&c->c_callout, CALLOUT_MPSAFE); 258 callout_setfunc(&c->c_callout, tprof_kick, ci); 259 } 260 261 error = tprof_backend_start(); 262 if (error != 0) { 263 tprof_stop1(); 264 goto done; 265 } 266 267 mutex_enter(&tprof_lock); 268 tprof_running = true; 269 mutex_exit(&tprof_lock); 270 for (CPU_INFO_FOREACH(cii, ci)) { 271 tprof_cpu_t * const c = tprof_cpu(ci); 272 273 mutex_enter(&tprof_lock); 274 tprof_nworker++; 275 mutex_exit(&tprof_lock); 276 workqueue_enqueue(tprof_wq, &c->c_work, ci); 277 } 278 done: 279 return error; 280 } 281 282 static void 283 tprof_stop(void) 284 { 285 CPU_INFO_ITERATOR cii; 286 struct cpu_info *ci; 287 288 KASSERT(mutex_owned(&tprof_startstop_lock)); 289 if (!tprof_running) { 290 goto done; 291 } 292 293 tprof_backend_stop(); 294 295 mutex_enter(&tprof_lock); 296 tprof_running = false; 297 cv_broadcast(&tprof_reader_cv); 298 mutex_exit(&tprof_lock); 299 300 for (CPU_INFO_FOREACH(cii, ci)) { 301 mutex_enter(&tprof_lock); 302 while (tprof_nworker > 0) { 303 cv_wait(&tprof_cv, &tprof_lock); 304 } 305 mutex_exit(&tprof_lock); 306 } 307 308 tprof_stop1(); 309 done: 310 ; 311 } 312 313 static void 314 tprof_clear(void) 315 { 316 tprof_buf_t *buf; 317 318 mutex_enter(&tprof_reader_lock); 319 mutex_enter(&tprof_lock); 320 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) { 321 if (buf != NULL) { 322 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 323 KASSERT(tprof_nbuf_on_list > 0); 324 tprof_nbuf_on_list--; 325 mutex_exit(&tprof_lock); 326 tprof_buf_free(buf); 327 mutex_enter(&tprof_lock); 328 } 329 } 330 KASSERT(tprof_nbuf_on_list == 0); 331 mutex_exit(&tprof_lock); 332 tprof_reader_offset = 0; 333 mutex_exit(&tprof_reader_lock); 334 335 memset(&tprof_stat, 0, sizeof(tprof_stat)); 336 } 337 338 /* -------------------- backend interfaces */ 339 340 /* 341 * tprof_sample: record a sample on the per-cpu buffer. 342 * 343 * be careful; can be called in NMI context. 344 * we are assuming that curcpu() is safe. 345 */ 346 347 void 348 tprof_sample(const struct trapframe *tf) 349 { 350 tprof_cpu_t * const c = tprof_curcpu(); 351 tprof_buf_t * const buf = c->c_buf; 352 const uintptr_t pc = PC_REGS(tf); 353 u_int idx; 354 355 idx = buf->b_used; 356 if (__predict_false(idx >= buf->b_size)) { 357 buf->b_overflow++; 358 return; 359 } 360 buf->b_data[idx].s_pc = pc; 361 buf->b_used = idx + 1; 362 } 363 364 /* -------------------- cdevsw interfaces */ 365 366 void tprofattach(int); 367 368 static int 369 tprof_open(dev_t dev, int flags, int type, struct lwp *l) 370 { 371 372 if (minor(dev) != 0) { 373 return EXDEV; 374 } 375 mutex_enter(&tprof_lock); 376 if (tprof_owner != NULL) { 377 mutex_exit(&tprof_lock); 378 return EBUSY; 379 } 380 tprof_owner = curlwp; 381 mutex_exit(&tprof_lock); 382 383 return 0; 384 } 385 386 static int 387 tprof_close(dev_t dev, int flags, int type, struct lwp *l) 388 { 389 390 KASSERT(minor(dev) == 0); 391 392 mutex_enter(&tprof_startstop_lock); 393 mutex_enter(&tprof_lock); 394 tprof_owner = NULL; 395 mutex_exit(&tprof_lock); 396 tprof_stop(); 397 tprof_clear(); 398 mutex_exit(&tprof_startstop_lock); 399 400 return 0; 401 } 402 403 static int 404 tprof_read(dev_t dev, struct uio *uio, int flags) 405 { 406 tprof_buf_t *buf; 407 size_t bytes; 408 size_t resid; 409 size_t done; 410 int error = 0; 411 412 KASSERT(minor(dev) == 0); 413 mutex_enter(&tprof_reader_lock); 414 while (uio->uio_resid > 0 && error == 0) { 415 /* 416 * take the first buffer from the list. 417 */ 418 mutex_enter(&tprof_lock); 419 buf = STAILQ_FIRST(&tprof_list); 420 if (buf == NULL) { 421 if (tprof_nworker == 0) { 422 mutex_exit(&tprof_lock); 423 error = 0; 424 break; 425 } 426 mutex_exit(&tprof_reader_lock); 427 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock); 428 mutex_exit(&tprof_lock); 429 mutex_enter(&tprof_reader_lock); 430 continue; 431 } 432 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 433 KASSERT(tprof_nbuf_on_list > 0); 434 tprof_nbuf_on_list--; 435 mutex_exit(&tprof_lock); 436 437 /* 438 * copy it out. 439 */ 440 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) - 441 tprof_reader_offset, uio->uio_resid); 442 resid = uio->uio_resid; 443 error = uiomove((char *)buf->b_data + tprof_reader_offset, 444 bytes, uio); 445 done = resid - uio->uio_resid; 446 tprof_reader_offset += done; 447 448 /* 449 * if we didn't consume the whole buffer, 450 * put it back to the list. 451 */ 452 if (tprof_reader_offset < 453 buf->b_used * sizeof(tprof_sample_t)) { 454 mutex_enter(&tprof_lock); 455 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list); 456 tprof_nbuf_on_list++; 457 cv_broadcast(&tprof_reader_cv); 458 mutex_exit(&tprof_lock); 459 } else { 460 tprof_buf_free(buf); 461 tprof_reader_offset = 0; 462 } 463 } 464 mutex_exit(&tprof_reader_lock); 465 466 return error; 467 } 468 469 static int 470 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) 471 { 472 const struct tprof_param *param; 473 int error = 0; 474 475 KASSERT(minor(dev) == 0); 476 477 switch (cmd) { 478 case TPROF_IOC_GETVERSION: 479 *(int *)data = TPROF_VERSION; 480 break; 481 case TPROF_IOC_START: 482 param = data; 483 mutex_enter(&tprof_startstop_lock); 484 error = tprof_start(param); 485 mutex_exit(&tprof_startstop_lock); 486 break; 487 case TPROF_IOC_STOP: 488 mutex_enter(&tprof_startstop_lock); 489 tprof_stop(); 490 mutex_exit(&tprof_startstop_lock); 491 break; 492 case TPROF_IOC_GETSTAT: 493 mutex_enter(&tprof_lock); 494 memcpy(data, &tprof_stat, sizeof(tprof_stat)); 495 mutex_exit(&tprof_lock); 496 break; 497 default: 498 error = EINVAL; 499 break; 500 } 501 502 return error; 503 } 504 505 const struct cdevsw tprof_cdevsw = { 506 .d_open = tprof_open, 507 .d_close = tprof_close, 508 .d_read = tprof_read, 509 .d_write = nowrite, 510 .d_ioctl = tprof_ioctl, 511 .d_stop = nostop, 512 .d_tty = notty, 513 .d_poll = nopoll, 514 .d_mmap = nommap, 515 .d_kqfilter = nokqfilter, 516 .d_flag = D_OTHER | D_MPSAFE, 517 }; 518 519 void 520 tprofattach(int nunits) 521 { 522 523 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE); 524 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE); 525 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE); 526 cv_init(&tprof_cv, "tprof"); 527 cv_init(&tprof_reader_cv, "tprofread"); 528 STAILQ_INIT(&tprof_list); 529 } 530