1 /* $NetBSD: tprof.c,v 1.13 2015/08/20 14:40:18 christos Exp $ */ 2 3 /*- 4 * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.13 2015/08/20 14:40:18 christos Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 36 #include <sys/cpu.h> 37 #include <sys/conf.h> 38 #include <sys/callout.h> 39 #include <sys/kmem.h> 40 #include <sys/module.h> 41 #include <sys/proc.h> 42 #include <sys/workqueue.h> 43 #include <sys/queue.h> 44 45 #include <dev/tprof/tprof.h> 46 #include <dev/tprof/tprof_ioctl.h> 47 48 #include "ioconf.h" 49 50 /* 51 * locking order: 52 * tprof_reader_lock -> tprof_lock 53 * tprof_startstop_lock -> tprof_lock 54 */ 55 56 /* 57 * protected by: 58 * L: tprof_lock 59 * R: tprof_reader_lock 60 * S: tprof_startstop_lock 61 * s: writer should hold tprof_startstop_lock and tprof_lock 62 * reader should hold tprof_startstop_lock or tprof_lock 63 */ 64 65 typedef struct tprof_buf { 66 u_int b_used; 67 u_int b_size; 68 u_int b_overflow; 69 u_int b_unused; 70 STAILQ_ENTRY(tprof_buf) b_list; 71 tprof_sample_t b_data[]; 72 } tprof_buf_t; 73 #define TPROF_BUF_BYTESIZE(sz) \ 74 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t)) 75 #define TPROF_MAX_SAMPLES_PER_BUF 10000 76 77 #define TPROF_MAX_BUF 100 78 79 typedef struct { 80 tprof_buf_t *c_buf; 81 uint32_t c_cpuid; 82 struct work c_work; 83 callout_t c_callout; 84 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t; 85 86 typedef struct tprof_backend { 87 const char *tb_name; 88 const tprof_backend_ops_t *tb_ops; 89 LIST_ENTRY(tprof_backend) tb_list; 90 int tb_usecount; /* S: */ 91 } tprof_backend_t; 92 93 static kmutex_t tprof_lock; 94 static bool tprof_running; /* s: */ 95 static u_int tprof_nworker; /* L: # of running worker LWPs */ 96 static lwp_t *tprof_owner; 97 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */ 98 static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */ 99 static struct workqueue *tprof_wq; 100 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE); 101 static u_int tprof_samples_per_buf; 102 103 static tprof_backend_t *tprof_backend; /* S: */ 104 static LIST_HEAD(, tprof_backend) tprof_backends = 105 LIST_HEAD_INITIALIZER(tprof_backend); /* S: */ 106 107 static kmutex_t tprof_reader_lock; 108 static kcondvar_t tprof_reader_cv; /* L: */ 109 static off_t tprof_reader_offset; /* R: */ 110 111 static kmutex_t tprof_startstop_lock; 112 static kcondvar_t tprof_cv; /* L: */ 113 114 static struct tprof_stat tprof_stat; /* L: */ 115 116 static tprof_cpu_t * 117 tprof_cpu(struct cpu_info *ci) 118 { 119 120 return &tprof_cpus[cpu_index(ci)]; 121 } 122 123 static tprof_cpu_t * 124 tprof_curcpu(void) 125 { 126 127 return tprof_cpu(curcpu()); 128 } 129 130 static tprof_buf_t * 131 tprof_buf_alloc(void) 132 { 133 tprof_buf_t *new; 134 u_int size = tprof_samples_per_buf; 135 136 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP); 137 new->b_used = 0; 138 new->b_size = size; 139 new->b_overflow = 0; 140 return new; 141 } 142 143 static void 144 tprof_buf_free(tprof_buf_t *buf) 145 { 146 147 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size)); 148 } 149 150 static tprof_buf_t * 151 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new) 152 { 153 tprof_buf_t *old; 154 155 old = c->c_buf; 156 c->c_buf = new; 157 return old; 158 } 159 160 static tprof_buf_t * 161 tprof_buf_refresh(void) 162 { 163 tprof_cpu_t * const c = tprof_curcpu(); 164 tprof_buf_t *new; 165 166 new = tprof_buf_alloc(); 167 return tprof_buf_switch(c, new); 168 } 169 170 static void 171 tprof_worker(struct work *wk, void *dummy) 172 { 173 tprof_cpu_t * const c = tprof_curcpu(); 174 tprof_buf_t *buf; 175 bool shouldstop; 176 177 KASSERT(wk == &c->c_work); 178 KASSERT(dummy == NULL); 179 180 /* 181 * get a per cpu buffer. 182 */ 183 buf = tprof_buf_refresh(); 184 185 /* 186 * and put it on the global list for read(2). 187 */ 188 mutex_enter(&tprof_lock); 189 shouldstop = !tprof_running; 190 if (shouldstop) { 191 KASSERT(tprof_nworker > 0); 192 tprof_nworker--; 193 cv_broadcast(&tprof_cv); 194 cv_broadcast(&tprof_reader_cv); 195 } 196 if (buf->b_used == 0) { 197 tprof_stat.ts_emptybuf++; 198 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) { 199 tprof_stat.ts_sample += buf->b_used; 200 tprof_stat.ts_overflow += buf->b_overflow; 201 tprof_stat.ts_buf++; 202 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list); 203 tprof_nbuf_on_list++; 204 buf = NULL; 205 cv_broadcast(&tprof_reader_cv); 206 } else { 207 tprof_stat.ts_dropbuf_sample += buf->b_used; 208 tprof_stat.ts_dropbuf++; 209 } 210 mutex_exit(&tprof_lock); 211 if (buf) { 212 tprof_buf_free(buf); 213 } 214 if (!shouldstop) { 215 callout_schedule(&c->c_callout, hz); 216 } 217 } 218 219 static void 220 tprof_kick(void *vp) 221 { 222 struct cpu_info * const ci = vp; 223 tprof_cpu_t * const c = tprof_cpu(ci); 224 225 workqueue_enqueue(tprof_wq, &c->c_work, ci); 226 } 227 228 static void 229 tprof_stop1(void) 230 { 231 CPU_INFO_ITERATOR cii; 232 struct cpu_info *ci; 233 234 KASSERT(mutex_owned(&tprof_startstop_lock)); 235 KASSERT(tprof_nworker == 0); 236 237 for (CPU_INFO_FOREACH(cii, ci)) { 238 tprof_cpu_t * const c = tprof_cpu(ci); 239 tprof_buf_t *old; 240 241 old = tprof_buf_switch(c, NULL); 242 if (old != NULL) { 243 tprof_buf_free(old); 244 } 245 callout_destroy(&c->c_callout); 246 } 247 workqueue_destroy(tprof_wq); 248 } 249 250 static int 251 tprof_start(const struct tprof_param *param) 252 { 253 CPU_INFO_ITERATOR cii; 254 struct cpu_info *ci; 255 int error; 256 uint64_t freq; 257 tprof_backend_t *tb; 258 259 KASSERT(mutex_owned(&tprof_startstop_lock)); 260 if (tprof_running) { 261 error = EBUSY; 262 goto done; 263 } 264 265 tb = tprof_backend; 266 if (tb == NULL) { 267 error = ENOENT; 268 goto done; 269 } 270 if (tb->tb_usecount > 0) { 271 error = EBUSY; 272 goto done; 273 } 274 275 tb->tb_usecount++; 276 freq = tb->tb_ops->tbo_estimate_freq(); 277 tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF); 278 279 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL, 280 PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); 281 if (error != 0) { 282 goto done; 283 } 284 285 for (CPU_INFO_FOREACH(cii, ci)) { 286 tprof_cpu_t * const c = tprof_cpu(ci); 287 tprof_buf_t *new; 288 tprof_buf_t *old; 289 290 new = tprof_buf_alloc(); 291 old = tprof_buf_switch(c, new); 292 if (old != NULL) { 293 tprof_buf_free(old); 294 } 295 callout_init(&c->c_callout, CALLOUT_MPSAFE); 296 callout_setfunc(&c->c_callout, tprof_kick, ci); 297 } 298 299 error = tb->tb_ops->tbo_start(NULL); 300 if (error != 0) { 301 KASSERT(tb->tb_usecount > 0); 302 tb->tb_usecount--; 303 tprof_stop1(); 304 goto done; 305 } 306 307 mutex_enter(&tprof_lock); 308 tprof_running = true; 309 mutex_exit(&tprof_lock); 310 for (CPU_INFO_FOREACH(cii, ci)) { 311 tprof_cpu_t * const c = tprof_cpu(ci); 312 313 mutex_enter(&tprof_lock); 314 tprof_nworker++; 315 mutex_exit(&tprof_lock); 316 workqueue_enqueue(tprof_wq, &c->c_work, ci); 317 } 318 done: 319 return error; 320 } 321 322 static void 323 tprof_stop(void) 324 { 325 tprof_backend_t *tb; 326 327 KASSERT(mutex_owned(&tprof_startstop_lock)); 328 if (!tprof_running) { 329 goto done; 330 } 331 332 tb = tprof_backend; 333 KASSERT(tb->tb_usecount > 0); 334 tb->tb_ops->tbo_stop(NULL); 335 tb->tb_usecount--; 336 337 mutex_enter(&tprof_lock); 338 tprof_running = false; 339 cv_broadcast(&tprof_reader_cv); 340 while (tprof_nworker > 0) { 341 cv_wait(&tprof_cv, &tprof_lock); 342 } 343 mutex_exit(&tprof_lock); 344 345 tprof_stop1(); 346 done: 347 ; 348 } 349 350 /* 351 * tprof_clear: drain unread samples. 352 */ 353 354 static void 355 tprof_clear(void) 356 { 357 tprof_buf_t *buf; 358 359 mutex_enter(&tprof_reader_lock); 360 mutex_enter(&tprof_lock); 361 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) { 362 if (buf != NULL) { 363 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 364 KASSERT(tprof_nbuf_on_list > 0); 365 tprof_nbuf_on_list--; 366 mutex_exit(&tprof_lock); 367 tprof_buf_free(buf); 368 mutex_enter(&tprof_lock); 369 } 370 } 371 KASSERT(tprof_nbuf_on_list == 0); 372 mutex_exit(&tprof_lock); 373 tprof_reader_offset = 0; 374 mutex_exit(&tprof_reader_lock); 375 376 memset(&tprof_stat, 0, sizeof(tprof_stat)); 377 } 378 379 static tprof_backend_t * 380 tprof_backend_lookup(const char *name) 381 { 382 tprof_backend_t *tb; 383 384 KASSERT(mutex_owned(&tprof_startstop_lock)); 385 386 LIST_FOREACH(tb, &tprof_backends, tb_list) { 387 if (!strcmp(tb->tb_name, name)) { 388 return tb; 389 } 390 } 391 return NULL; 392 } 393 394 /* -------------------- backend interfaces */ 395 396 /* 397 * tprof_sample: record a sample on the per-cpu buffer. 398 * 399 * be careful; can be called in NMI context. 400 * we are bluntly assuming the followings are safe. 401 * curcpu() 402 * curlwp->l_lid 403 * curlwp->l_proc->p_pid 404 */ 405 406 void 407 tprof_sample(tprof_backend_cookie_t *cookie, const tprof_frame_info_t *tfi) 408 { 409 tprof_cpu_t * const c = tprof_curcpu(); 410 tprof_buf_t * const buf = c->c_buf; 411 tprof_sample_t *sp; 412 const uintptr_t pc = tfi->tfi_pc; 413 const lwp_t * const l = curlwp; 414 u_int idx; 415 416 idx = buf->b_used; 417 if (__predict_false(idx >= buf->b_size)) { 418 buf->b_overflow++; 419 return; 420 } 421 sp = &buf->b_data[idx]; 422 sp->s_pid = l->l_proc->p_pid; 423 sp->s_lwpid = l->l_lid; 424 sp->s_cpuid = c->c_cpuid; 425 sp->s_flags = (tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0; 426 sp->s_pc = pc; 427 buf->b_used = idx + 1; 428 } 429 430 /* 431 * tprof_backend_register: 432 */ 433 434 int 435 tprof_backend_register(const char *name, const tprof_backend_ops_t *ops, 436 int vers) 437 { 438 tprof_backend_t *tb; 439 440 if (vers != TPROF_BACKEND_VERSION) { 441 return EINVAL; 442 } 443 444 mutex_enter(&tprof_startstop_lock); 445 tb = tprof_backend_lookup(name); 446 if (tb != NULL) { 447 mutex_exit(&tprof_startstop_lock); 448 return EEXIST; 449 } 450 #if 1 /* XXX for now */ 451 if (!LIST_EMPTY(&tprof_backends)) { 452 mutex_exit(&tprof_startstop_lock); 453 return ENOTSUP; 454 } 455 #endif 456 tb = kmem_alloc(sizeof(*tb), KM_SLEEP); 457 tb->tb_name = name; 458 tb->tb_ops = ops; 459 tb->tb_usecount = 0; 460 LIST_INSERT_HEAD(&tprof_backends, tb, tb_list); 461 #if 1 /* XXX for now */ 462 if (tprof_backend == NULL) { 463 tprof_backend = tb; 464 } 465 #endif 466 mutex_exit(&tprof_startstop_lock); 467 468 return 0; 469 } 470 471 /* 472 * tprof_backend_unregister: 473 */ 474 475 int 476 tprof_backend_unregister(const char *name) 477 { 478 tprof_backend_t *tb; 479 480 mutex_enter(&tprof_startstop_lock); 481 tb = tprof_backend_lookup(name); 482 #if defined(DIAGNOSTIC) 483 if (tb == NULL) { 484 mutex_exit(&tprof_startstop_lock); 485 panic("%s: not found '%s'", __func__, name); 486 } 487 #endif /* defined(DIAGNOSTIC) */ 488 if (tb->tb_usecount > 0) { 489 mutex_exit(&tprof_startstop_lock); 490 return EBUSY; 491 } 492 #if 1 /* XXX for now */ 493 if (tprof_backend == tb) { 494 tprof_backend = NULL; 495 } 496 #endif 497 LIST_REMOVE(tb, tb_list); 498 mutex_exit(&tprof_startstop_lock); 499 500 kmem_free(tb, sizeof(*tb)); 501 502 return 0; 503 } 504 505 /* -------------------- cdevsw interfaces */ 506 507 static int 508 tprof_open(dev_t dev, int flags, int type, struct lwp *l) 509 { 510 511 if (minor(dev) != 0) { 512 return EXDEV; 513 } 514 mutex_enter(&tprof_lock); 515 if (tprof_owner != NULL) { 516 mutex_exit(&tprof_lock); 517 return EBUSY; 518 } 519 tprof_owner = curlwp; 520 mutex_exit(&tprof_lock); 521 522 return 0; 523 } 524 525 static int 526 tprof_close(dev_t dev, int flags, int type, struct lwp *l) 527 { 528 529 KASSERT(minor(dev) == 0); 530 531 mutex_enter(&tprof_startstop_lock); 532 mutex_enter(&tprof_lock); 533 tprof_owner = NULL; 534 mutex_exit(&tprof_lock); 535 tprof_stop(); 536 tprof_clear(); 537 mutex_exit(&tprof_startstop_lock); 538 539 return 0; 540 } 541 542 static int 543 tprof_read(dev_t dev, struct uio *uio, int flags) 544 { 545 tprof_buf_t *buf; 546 size_t bytes; 547 size_t resid; 548 size_t done; 549 int error = 0; 550 551 KASSERT(minor(dev) == 0); 552 mutex_enter(&tprof_reader_lock); 553 while (uio->uio_resid > 0 && error == 0) { 554 /* 555 * take the first buffer from the list. 556 */ 557 mutex_enter(&tprof_lock); 558 buf = STAILQ_FIRST(&tprof_list); 559 if (buf == NULL) { 560 if (tprof_nworker == 0) { 561 mutex_exit(&tprof_lock); 562 error = 0; 563 break; 564 } 565 mutex_exit(&tprof_reader_lock); 566 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock); 567 mutex_exit(&tprof_lock); 568 mutex_enter(&tprof_reader_lock); 569 continue; 570 } 571 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 572 KASSERT(tprof_nbuf_on_list > 0); 573 tprof_nbuf_on_list--; 574 mutex_exit(&tprof_lock); 575 576 /* 577 * copy it out. 578 */ 579 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) - 580 tprof_reader_offset, uio->uio_resid); 581 resid = uio->uio_resid; 582 error = uiomove((char *)buf->b_data + tprof_reader_offset, 583 bytes, uio); 584 done = resid - uio->uio_resid; 585 tprof_reader_offset += done; 586 587 /* 588 * if we didn't consume the whole buffer, 589 * put it back to the list. 590 */ 591 if (tprof_reader_offset < 592 buf->b_used * sizeof(tprof_sample_t)) { 593 mutex_enter(&tprof_lock); 594 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list); 595 tprof_nbuf_on_list++; 596 cv_broadcast(&tprof_reader_cv); 597 mutex_exit(&tprof_lock); 598 } else { 599 tprof_buf_free(buf); 600 tprof_reader_offset = 0; 601 } 602 } 603 mutex_exit(&tprof_reader_lock); 604 605 return error; 606 } 607 608 static int 609 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) 610 { 611 const struct tprof_param *param; 612 int error = 0; 613 614 KASSERT(minor(dev) == 0); 615 616 switch (cmd) { 617 case TPROF_IOC_GETVERSION: 618 *(int *)data = TPROF_VERSION; 619 break; 620 case TPROF_IOC_START: 621 param = data; 622 mutex_enter(&tprof_startstop_lock); 623 error = tprof_start(param); 624 mutex_exit(&tprof_startstop_lock); 625 break; 626 case TPROF_IOC_STOP: 627 mutex_enter(&tprof_startstop_lock); 628 tprof_stop(); 629 mutex_exit(&tprof_startstop_lock); 630 break; 631 case TPROF_IOC_GETSTAT: 632 mutex_enter(&tprof_lock); 633 memcpy(data, &tprof_stat, sizeof(tprof_stat)); 634 mutex_exit(&tprof_lock); 635 break; 636 default: 637 error = EINVAL; 638 break; 639 } 640 641 return error; 642 } 643 644 const struct cdevsw tprof_cdevsw = { 645 .d_open = tprof_open, 646 .d_close = tprof_close, 647 .d_read = tprof_read, 648 .d_write = nowrite, 649 .d_ioctl = tprof_ioctl, 650 .d_stop = nostop, 651 .d_tty = notty, 652 .d_poll = nopoll, 653 .d_mmap = nommap, 654 .d_kqfilter = nokqfilter, 655 .d_discard = nodiscard, 656 .d_flag = D_OTHER | D_MPSAFE 657 }; 658 659 void 660 tprofattach(int nunits) 661 { 662 663 /* nothing */ 664 } 665 666 MODULE(MODULE_CLASS_DRIVER, tprof, NULL); 667 668 static void 669 tprof_driver_init(void) 670 { 671 unsigned int i; 672 673 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE); 674 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE); 675 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE); 676 cv_init(&tprof_cv, "tprof"); 677 cv_init(&tprof_reader_cv, "tprof_rd"); 678 STAILQ_INIT(&tprof_list); 679 for (i = 0; i < __arraycount(tprof_cpus); i++) { 680 tprof_cpu_t * const c = &tprof_cpus[i]; 681 682 c->c_buf = NULL; 683 c->c_cpuid = i; 684 } 685 } 686 687 static void 688 tprof_driver_fini(void) 689 { 690 691 mutex_destroy(&tprof_lock); 692 mutex_destroy(&tprof_reader_lock); 693 mutex_destroy(&tprof_startstop_lock); 694 cv_destroy(&tprof_cv); 695 cv_destroy(&tprof_reader_cv); 696 } 697 698 static int 699 tprof_modcmd(modcmd_t cmd, void *arg) 700 { 701 702 switch (cmd) { 703 case MODULE_CMD_INIT: 704 tprof_driver_init(); 705 #if defined(_MODULE) 706 { 707 devmajor_t bmajor = NODEVMAJOR; 708 devmajor_t cmajor = NODEVMAJOR; 709 int error; 710 711 error = devsw_attach("tprof", NULL, &bmajor, 712 &tprof_cdevsw, &cmajor); 713 if (error) { 714 tprof_driver_fini(); 715 return error; 716 } 717 } 718 #endif /* defined(_MODULE) */ 719 return 0; 720 721 case MODULE_CMD_FINI: 722 #if defined(_MODULE) 723 { 724 int error; 725 error = devsw_detach(NULL, &tprof_cdevsw); 726 if (error) { 727 return error; 728 } 729 } 730 #endif /* defined(_MODULE) */ 731 tprof_driver_fini(); 732 return 0; 733 734 default: 735 return ENOTTY; 736 } 737 } 738