1 /* $NetBSD: tprof.c,v 1.22 2022/12/16 17:38:56 ryo Exp $ */ 2 3 /*- 4 * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.22 2022/12/16 17:38:56 ryo Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 36 #include <sys/callout.h> 37 #include <sys/conf.h> 38 #include <sys/cpu.h> 39 #include <sys/kmem.h> 40 #include <sys/module.h> 41 #include <sys/percpu.h> 42 #include <sys/poll.h> 43 #include <sys/proc.h> 44 #include <sys/queue.h> 45 #include <sys/select.h> 46 #include <sys/workqueue.h> 47 #include <sys/xcall.h> 48 49 #include <dev/tprof/tprof.h> 50 #include <dev/tprof/tprof_ioctl.h> 51 52 #include "ioconf.h" 53 54 #ifndef TPROF_HZ 55 #define TPROF_HZ 10000 56 #endif 57 58 /* 59 * locking order: 60 * tprof_reader_lock -> tprof_lock 61 * tprof_startstop_lock -> tprof_lock 62 */ 63 64 /* 65 * protected by: 66 * L: tprof_lock 67 * R: tprof_reader_lock 68 * S: tprof_startstop_lock 69 * s: writer should hold tprof_startstop_lock and tprof_lock 70 * reader should hold tprof_startstop_lock or tprof_lock 71 */ 72 73 typedef struct tprof_buf { 74 u_int b_used; 75 u_int b_size; 76 u_int b_overflow; 77 u_int b_unused; 78 STAILQ_ENTRY(tprof_buf) b_list; 79 tprof_sample_t b_data[]; 80 } tprof_buf_t; 81 #define TPROF_BUF_BYTESIZE(sz) \ 82 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t)) 83 #define TPROF_MAX_SAMPLES_PER_BUF TPROF_HZ 84 85 typedef struct { 86 tprof_buf_t *c_buf; 87 uint32_t c_cpuid; 88 struct work c_work; 89 callout_t c_callout; 90 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t; 91 92 typedef struct tprof_backend { 93 /* 94 * tprof_backend_softc_t must be passed as an argument to the interrupt 95 * handler, but since this is difficult to implement in armv7/v8. Then, 96 * tprof_backend is exposed. Additionally, softc must be placed at the 97 * beginning of struct tprof_backend. 98 */ 99 tprof_backend_softc_t tb_softc; 100 101 const char *tb_name; 102 const tprof_backend_ops_t *tb_ops; 103 LIST_ENTRY(tprof_backend) tb_list; 104 } tprof_backend_t; 105 106 static kmutex_t tprof_lock; 107 static u_int tprof_nworker; /* L: # of running worker LWPs */ 108 static lwp_t *tprof_owner; 109 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */ 110 static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */ 111 static struct workqueue *tprof_wq; 112 static struct percpu *tprof_cpus __read_mostly; /* tprof_cpu_t * */ 113 static u_int tprof_samples_per_buf; 114 static u_int tprof_max_buf; 115 116 tprof_backend_t *tprof_backend; /* S: */ 117 static LIST_HEAD(, tprof_backend) tprof_backends = 118 LIST_HEAD_INITIALIZER(tprof_backend); /* S: */ 119 120 static kmutex_t tprof_reader_lock; 121 static kcondvar_t tprof_reader_cv; /* L: */ 122 static off_t tprof_reader_offset; /* R: */ 123 124 static kmutex_t tprof_startstop_lock; 125 static kcondvar_t tprof_cv; /* L: */ 126 static struct selinfo tprof_selp; /* L: */ 127 128 static struct tprof_stat tprof_stat; /* L: */ 129 130 static tprof_cpu_t * 131 tprof_cpu_direct(struct cpu_info *ci) 132 { 133 tprof_cpu_t **cp; 134 135 cp = percpu_getptr_remote(tprof_cpus, ci); 136 return *cp; 137 } 138 139 static tprof_cpu_t * 140 tprof_cpu(struct cpu_info *ci) 141 { 142 tprof_cpu_t *c; 143 144 /* 145 * As long as xcalls are blocked -- e.g., by kpreempt_disable 146 * -- the percpu object will not be swapped and destroyed. We 147 * can't write to it, because the data may have already been 148 * moved to a new buffer, but we can safely read from it. 149 */ 150 kpreempt_disable(); 151 c = tprof_cpu_direct(ci); 152 kpreempt_enable(); 153 154 return c; 155 } 156 157 static tprof_cpu_t * 158 tprof_curcpu(void) 159 { 160 161 return tprof_cpu(curcpu()); 162 } 163 164 static tprof_buf_t * 165 tprof_buf_alloc(void) 166 { 167 tprof_buf_t *new; 168 u_int size = tprof_samples_per_buf; 169 170 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP); 171 new->b_used = 0; 172 new->b_size = size; 173 new->b_overflow = 0; 174 return new; 175 } 176 177 static void 178 tprof_buf_free(tprof_buf_t *buf) 179 { 180 181 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size)); 182 } 183 184 static tprof_buf_t * 185 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new) 186 { 187 tprof_buf_t *old; 188 189 old = c->c_buf; 190 c->c_buf = new; 191 return old; 192 } 193 194 static tprof_buf_t * 195 tprof_buf_refresh(void) 196 { 197 tprof_cpu_t * const c = tprof_curcpu(); 198 tprof_buf_t *new; 199 200 new = tprof_buf_alloc(); 201 return tprof_buf_switch(c, new); 202 } 203 204 static void 205 tprof_worker(struct work *wk, void *dummy) 206 { 207 tprof_cpu_t * const c = tprof_curcpu(); 208 tprof_buf_t *buf; 209 tprof_backend_t *tb; 210 bool shouldstop; 211 212 KASSERT(wk == &c->c_work); 213 KASSERT(dummy == NULL); 214 215 /* 216 * get a per cpu buffer. 217 */ 218 buf = tprof_buf_refresh(); 219 220 /* 221 * and put it on the global list for read(2). 222 */ 223 mutex_enter(&tprof_lock); 224 tb = tprof_backend; 225 shouldstop = (tb == NULL || tb->tb_softc.sc_ctr_running_mask == 0); 226 if (shouldstop) { 227 KASSERT(tprof_nworker > 0); 228 tprof_nworker--; 229 cv_broadcast(&tprof_cv); 230 cv_broadcast(&tprof_reader_cv); 231 } 232 if (buf->b_used == 0) { 233 tprof_stat.ts_emptybuf++; 234 } else if (tprof_nbuf_on_list < tprof_max_buf) { 235 tprof_stat.ts_sample += buf->b_used; 236 tprof_stat.ts_overflow += buf->b_overflow; 237 tprof_stat.ts_buf++; 238 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list); 239 tprof_nbuf_on_list++; 240 buf = NULL; 241 selnotify(&tprof_selp, 0, NOTE_SUBMIT); 242 cv_broadcast(&tprof_reader_cv); 243 } else { 244 tprof_stat.ts_dropbuf_sample += buf->b_used; 245 tprof_stat.ts_dropbuf++; 246 } 247 mutex_exit(&tprof_lock); 248 if (buf) { 249 tprof_buf_free(buf); 250 } 251 if (!shouldstop) { 252 callout_schedule(&c->c_callout, hz / 8); 253 } 254 } 255 256 static void 257 tprof_kick(void *vp) 258 { 259 struct cpu_info * const ci = vp; 260 tprof_cpu_t * const c = tprof_cpu(ci); 261 262 workqueue_enqueue(tprof_wq, &c->c_work, ci); 263 } 264 265 static void 266 tprof_stop1(void) 267 { 268 CPU_INFO_ITERATOR cii; 269 struct cpu_info *ci; 270 271 KASSERT(mutex_owned(&tprof_startstop_lock)); 272 KASSERT(tprof_nworker == 0); 273 274 for (CPU_INFO_FOREACH(cii, ci)) { 275 tprof_cpu_t * const c = tprof_cpu(ci); 276 tprof_buf_t *old; 277 278 old = tprof_buf_switch(c, NULL); 279 if (old != NULL) { 280 tprof_buf_free(old); 281 } 282 callout_destroy(&c->c_callout); 283 } 284 workqueue_destroy(tprof_wq); 285 } 286 287 static void 288 tprof_getinfo(struct tprof_info *info) 289 { 290 tprof_backend_t *tb; 291 292 KASSERT(mutex_owned(&tprof_startstop_lock)); 293 294 memset(info, 0, sizeof(*info)); 295 info->ti_version = TPROF_VERSION; 296 if ((tb = tprof_backend) != NULL) { 297 info->ti_ident = tb->tb_ops->tbo_ident(); 298 } 299 } 300 301 static int 302 tprof_getncounters(u_int *ncounters) 303 { 304 tprof_backend_t *tb; 305 306 tb = tprof_backend; 307 if (tb == NULL) 308 return ENOENT; 309 310 *ncounters = tb->tb_ops->tbo_ncounters(); 311 return 0; 312 } 313 314 static void 315 tprof_start_cpu(void *arg1, void *arg2) 316 { 317 tprof_backend_t *tb = arg1; 318 tprof_countermask_t runmask = (uintptr_t)arg2; 319 320 tb->tb_ops->tbo_start(runmask); 321 } 322 323 static void 324 tprof_stop_cpu(void *arg1, void *arg2) 325 { 326 tprof_backend_t *tb = arg1; 327 tprof_countermask_t stopmask = (uintptr_t)arg2; 328 329 tb->tb_ops->tbo_stop(stopmask); 330 } 331 332 static int 333 tprof_start(tprof_countermask_t runmask) 334 { 335 CPU_INFO_ITERATOR cii; 336 struct cpu_info *ci; 337 tprof_backend_t *tb; 338 uint64_t xc; 339 int error; 340 bool firstrun; 341 342 KASSERT(mutex_owned(&tprof_startstop_lock)); 343 344 tb = tprof_backend; 345 if (tb == NULL) { 346 error = ENOENT; 347 goto done; 348 } 349 350 runmask &= ~tb->tb_softc.sc_ctr_running_mask; 351 runmask &= tb->tb_softc.sc_ctr_configured_mask; 352 if (runmask == 0) { 353 /* 354 * targets are already running. 355 * unconfigured counters are ignored. 356 */ 357 error = 0; 358 goto done; 359 } 360 361 firstrun = (tb->tb_softc.sc_ctr_running_mask == 0); 362 if (firstrun) { 363 if (tb->tb_ops->tbo_establish != NULL) { 364 error = tb->tb_ops->tbo_establish(&tb->tb_softc); 365 if (error != 0) 366 goto done; 367 } 368 369 tprof_samples_per_buf = TPROF_MAX_SAMPLES_PER_BUF; 370 tprof_max_buf = ncpu * 3; 371 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, 372 NULL, PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); 373 if (error != 0) { 374 if (tb->tb_ops->tbo_disestablish != NULL) 375 tb->tb_ops->tbo_disestablish(&tb->tb_softc); 376 goto done; 377 } 378 379 for (CPU_INFO_FOREACH(cii, ci)) { 380 tprof_cpu_t * const c = tprof_cpu(ci); 381 tprof_buf_t *new; 382 tprof_buf_t *old; 383 384 new = tprof_buf_alloc(); 385 old = tprof_buf_switch(c, new); 386 if (old != NULL) { 387 tprof_buf_free(old); 388 } 389 callout_init(&c->c_callout, CALLOUT_MPSAFE); 390 callout_setfunc(&c->c_callout, tprof_kick, ci); 391 } 392 } 393 394 runmask &= tb->tb_softc.sc_ctr_configured_mask; 395 xc = xc_broadcast(0, tprof_start_cpu, tb, (void *)(uintptr_t)runmask); 396 xc_wait(xc); 397 mutex_enter(&tprof_lock); 398 tb->tb_softc.sc_ctr_running_mask |= runmask; 399 mutex_exit(&tprof_lock); 400 401 if (firstrun) { 402 for (CPU_INFO_FOREACH(cii, ci)) { 403 tprof_cpu_t * const c = tprof_cpu(ci); 404 405 mutex_enter(&tprof_lock); 406 tprof_nworker++; 407 mutex_exit(&tprof_lock); 408 workqueue_enqueue(tprof_wq, &c->c_work, ci); 409 } 410 } 411 error = 0; 412 413 done: 414 return error; 415 } 416 417 static void 418 tprof_stop(tprof_countermask_t stopmask) 419 { 420 tprof_backend_t *tb; 421 uint64_t xc; 422 423 tb = tprof_backend; 424 if (tb == NULL) 425 return; 426 427 KASSERT(mutex_owned(&tprof_startstop_lock)); 428 stopmask &= tb->tb_softc.sc_ctr_running_mask; 429 if (stopmask == 0) { 430 /* targets are not running */ 431 goto done; 432 } 433 434 xc = xc_broadcast(0, tprof_stop_cpu, tb, (void *)(uintptr_t)stopmask); 435 xc_wait(xc); 436 mutex_enter(&tprof_lock); 437 tb->tb_softc.sc_ctr_running_mask &= ~stopmask; 438 mutex_exit(&tprof_lock); 439 440 /* all counters have stopped? */ 441 if (tb->tb_softc.sc_ctr_running_mask == 0) { 442 mutex_enter(&tprof_lock); 443 cv_broadcast(&tprof_reader_cv); 444 while (tprof_nworker > 0) { 445 cv_wait(&tprof_cv, &tprof_lock); 446 } 447 mutex_exit(&tprof_lock); 448 449 tprof_stop1(); 450 if (tb->tb_ops->tbo_disestablish != NULL) 451 tb->tb_ops->tbo_disestablish(&tb->tb_softc); 452 } 453 done: 454 ; 455 } 456 457 static void 458 tprof_init_percpu_counters_offset(void *vp, void *vp2, struct cpu_info *ci) 459 { 460 uint64_t *counters_offset = vp; 461 u_int counter = (uintptr_t)vp2; 462 463 tprof_backend_t *tb = tprof_backend; 464 tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param; 465 counters_offset[counter] = param->p_value; 466 } 467 468 static void 469 tprof_configure_event_cpu(void *arg1, void *arg2) 470 { 471 tprof_backend_t *tb = arg1; 472 u_int counter = (uintptr_t)arg2; 473 tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param; 474 475 tb->tb_ops->tbo_configure_event(counter, param); 476 } 477 478 static int 479 tprof_configure_event(const tprof_param_t *param) 480 { 481 tprof_backend_t *tb; 482 tprof_backend_softc_t *sc; 483 tprof_param_t *sc_param; 484 uint64_t xc; 485 int c, error; 486 487 if ((param->p_flags & (TPROF_PARAM_USER | TPROF_PARAM_KERN)) == 0) { 488 error = EINVAL; 489 goto done; 490 } 491 492 tb = tprof_backend; 493 if (tb == NULL) { 494 error = ENOENT; 495 goto done; 496 } 497 sc = &tb->tb_softc; 498 499 c = param->p_counter; 500 if (c >= tb->tb_softc.sc_ncounters) { 501 error = EINVAL; 502 goto done; 503 } 504 505 if (tb->tb_ops->tbo_valid_event != NULL) { 506 error = tb->tb_ops->tbo_valid_event(param->p_counter, param); 507 if (error != 0) 508 goto done; 509 } 510 511 /* if already running, stop the counter */ 512 if (ISSET(c, tb->tb_softc.sc_ctr_running_mask)) 513 tprof_stop(__BIT(c)); 514 515 sc->sc_count[c].ctr_bitwidth = 516 tb->tb_ops->tbo_counter_bitwidth(param->p_counter); 517 518 sc_param = &sc->sc_count[c].ctr_param; 519 memcpy(sc_param, param, sizeof(*sc_param)); /* save copy of param */ 520 521 if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) { 522 uint64_t freq, inum, dnum; 523 524 freq = tb->tb_ops->tbo_counter_estimate_freq(c); 525 sc->sc_count[c].ctr_counter_val = freq / TPROF_HZ; 526 if (sc->sc_count[c].ctr_counter_val == 0) { 527 printf("%s: counter#%d frequency (%"PRIu64") is" 528 " very low relative to TPROF_HZ (%u)\n", __func__, 529 c, freq, TPROF_HZ); 530 sc->sc_count[c].ctr_counter_val = 531 4000000000ULL / TPROF_HZ; 532 } 533 534 switch (param->p_flags & TPROF_PARAM_VALUE2_MASK) { 535 case TPROF_PARAM_VALUE2_SCALE: 536 if (sc_param->p_value2 == 0) 537 break; 538 /* 539 * p_value2 is 64-bit fixed-point 540 * upper 32 bits are the integer part 541 * lower 32 bits are the decimal part 542 */ 543 inum = sc_param->p_value2 >> 32; 544 dnum = sc_param->p_value2 & __BITS(31, 0); 545 sc->sc_count[c].ctr_counter_val = 546 sc->sc_count[c].ctr_counter_val * inum + 547 (sc->sc_count[c].ctr_counter_val * dnum >> 32); 548 if (sc->sc_count[c].ctr_counter_val == 0) 549 sc->sc_count[c].ctr_counter_val = 1; 550 break; 551 case TPROF_PARAM_VALUE2_TRIGGERCOUNT: 552 if (sc_param->p_value2 == 0) 553 sc_param->p_value2 = 1; 554 if (sc_param->p_value2 > 555 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)) { 556 sc_param->p_value2 = 557 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0); 558 } 559 sc->sc_count[c].ctr_counter_val = sc_param->p_value2; 560 break; 561 default: 562 break; 563 } 564 sc->sc_count[c].ctr_counter_reset_val = 565 -sc->sc_count[c].ctr_counter_val; 566 sc->sc_count[c].ctr_counter_reset_val &= 567 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0); 568 } else { 569 sc->sc_count[c].ctr_counter_val = 0; 570 sc->sc_count[c].ctr_counter_reset_val = 0; 571 } 572 573 /* At this point, p_value is used as an initial value */ 574 percpu_foreach(tb->tb_softc.sc_ctr_offset_percpu, 575 tprof_init_percpu_counters_offset, (void *)(uintptr_t)c); 576 /* On the backend side, p_value is used as the reset value */ 577 sc_param->p_value = tb->tb_softc.sc_count[c].ctr_counter_reset_val; 578 579 xc = xc_broadcast(0, tprof_configure_event_cpu, 580 tb, (void *)(uintptr_t)c); 581 xc_wait(xc); 582 583 mutex_enter(&tprof_lock); 584 /* update counters bitmasks */ 585 SET(tb->tb_softc.sc_ctr_configured_mask, __BIT(c)); 586 CLR(tb->tb_softc.sc_ctr_prof_mask, __BIT(c)); 587 CLR(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); 588 /* profiled counter requires overflow handling */ 589 if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) { 590 SET(tb->tb_softc.sc_ctr_prof_mask, __BIT(c)); 591 SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); 592 } 593 /* counters with less than 64bits also require overflow handling */ 594 if (sc->sc_count[c].ctr_bitwidth != 64) 595 SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); 596 mutex_exit(&tprof_lock); 597 598 error = 0; 599 600 done: 601 return error; 602 } 603 604 static void 605 tprof_getcounts_cpu(void *arg1, void *arg2) 606 { 607 tprof_backend_t *tb = arg1; 608 tprof_backend_softc_t *sc = &tb->tb_softc; 609 uint64_t *counters = arg2; 610 uint64_t *counters_offset; 611 unsigned int c; 612 613 tprof_countermask_t configmask = sc->sc_ctr_configured_mask; 614 counters_offset = percpu_getref(sc->sc_ctr_offset_percpu); 615 for (c = 0; c < sc->sc_ncounters; c++) { 616 if (ISSET(configmask, __BIT(c))) { 617 uint64_t ctr = tb->tb_ops->tbo_counter_read(c); 618 counters[c] = counters_offset[c] + 619 ((ctr - sc->sc_count[c].ctr_counter_reset_val) & 620 __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)); 621 } else { 622 counters[c] = 0; 623 } 624 } 625 percpu_putref(sc->sc_ctr_offset_percpu); 626 } 627 628 static int 629 tprof_getcounts(tprof_counts_t *counts) 630 { 631 struct cpu_info *ci; 632 tprof_backend_t *tb; 633 uint64_t xc; 634 635 tb = tprof_backend; 636 if (tb == NULL) 637 return ENOENT; 638 639 if (counts->c_cpu >= ncpu) 640 return ESRCH; 641 ci = cpu_lookup(counts->c_cpu); 642 if (ci == NULL) 643 return ESRCH; 644 645 xc = xc_unicast(0, tprof_getcounts_cpu, tb, counts->c_count, ci); 646 xc_wait(xc); 647 648 counts->c_ncounters = tb->tb_softc.sc_ncounters; 649 counts->c_runningmask = tb->tb_softc.sc_ctr_running_mask; 650 return 0; 651 } 652 653 /* 654 * tprof_clear: drain unread samples. 655 */ 656 657 static void 658 tprof_clear(void) 659 { 660 tprof_buf_t *buf; 661 662 mutex_enter(&tprof_reader_lock); 663 mutex_enter(&tprof_lock); 664 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) { 665 if (buf != NULL) { 666 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 667 KASSERT(tprof_nbuf_on_list > 0); 668 tprof_nbuf_on_list--; 669 mutex_exit(&tprof_lock); 670 tprof_buf_free(buf); 671 mutex_enter(&tprof_lock); 672 } 673 } 674 KASSERT(tprof_nbuf_on_list == 0); 675 mutex_exit(&tprof_lock); 676 tprof_reader_offset = 0; 677 mutex_exit(&tprof_reader_lock); 678 679 memset(&tprof_stat, 0, sizeof(tprof_stat)); 680 } 681 682 static tprof_backend_t * 683 tprof_backend_lookup(const char *name) 684 { 685 tprof_backend_t *tb; 686 687 KASSERT(mutex_owned(&tprof_startstop_lock)); 688 689 LIST_FOREACH(tb, &tprof_backends, tb_list) { 690 if (!strcmp(tb->tb_name, name)) { 691 return tb; 692 } 693 } 694 return NULL; 695 } 696 697 /* -------------------- backend interfaces */ 698 699 /* 700 * tprof_sample: record a sample on the per-cpu buffer. 701 * 702 * be careful; can be called in NMI context. 703 * we are bluntly assuming the followings are safe. 704 * curcpu() 705 * curlwp->l_lid 706 * curlwp->l_proc->p_pid 707 */ 708 709 void 710 tprof_sample(void *unused, const tprof_frame_info_t *tfi) 711 { 712 tprof_cpu_t * const c = tprof_cpu_direct(curcpu()); 713 tprof_buf_t * const buf = c->c_buf; 714 tprof_sample_t *sp; 715 const uintptr_t pc = tfi->tfi_pc; 716 const lwp_t * const l = curlwp; 717 u_int idx; 718 719 idx = buf->b_used; 720 if (__predict_false(idx >= buf->b_size)) { 721 buf->b_overflow++; 722 return; 723 } 724 sp = &buf->b_data[idx]; 725 sp->s_pid = l->l_proc->p_pid; 726 sp->s_lwpid = l->l_lid; 727 sp->s_cpuid = c->c_cpuid; 728 sp->s_flags = ((tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0) | 729 __SHIFTIN(tfi->tfi_counter, TPROF_SAMPLE_COUNTER_MASK); 730 sp->s_pc = pc; 731 buf->b_used = idx + 1; 732 } 733 734 /* 735 * tprof_backend_register: 736 */ 737 738 int 739 tprof_backend_register(const char *name, const tprof_backend_ops_t *ops, 740 int vers) 741 { 742 tprof_backend_t *tb; 743 744 if (vers != TPROF_BACKEND_VERSION) { 745 return EINVAL; 746 } 747 748 mutex_enter(&tprof_startstop_lock); 749 tb = tprof_backend_lookup(name); 750 if (tb != NULL) { 751 mutex_exit(&tprof_startstop_lock); 752 return EEXIST; 753 } 754 #if 1 /* XXX for now */ 755 if (!LIST_EMPTY(&tprof_backends)) { 756 mutex_exit(&tprof_startstop_lock); 757 return ENOTSUP; 758 } 759 #endif 760 tb = kmem_zalloc(sizeof(*tb), KM_SLEEP); 761 tb->tb_name = name; 762 tb->tb_ops = ops; 763 LIST_INSERT_HEAD(&tprof_backends, tb, tb_list); 764 #if 1 /* XXX for now */ 765 if (tprof_backend == NULL) { 766 tprof_backend = tb; 767 } 768 #endif 769 mutex_exit(&tprof_startstop_lock); 770 771 /* init backend softc */ 772 tb->tb_softc.sc_ncounters = tb->tb_ops->tbo_ncounters(); 773 tb->tb_softc.sc_ctr_offset_percpu_size = 774 sizeof(uint64_t) * tb->tb_softc.sc_ncounters; 775 tb->tb_softc.sc_ctr_offset_percpu = 776 percpu_alloc(tb->tb_softc.sc_ctr_offset_percpu_size); 777 778 return 0; 779 } 780 781 /* 782 * tprof_backend_unregister: 783 */ 784 785 int 786 tprof_backend_unregister(const char *name) 787 { 788 tprof_backend_t *tb; 789 790 mutex_enter(&tprof_startstop_lock); 791 tb = tprof_backend_lookup(name); 792 #if defined(DIAGNOSTIC) 793 if (tb == NULL) { 794 mutex_exit(&tprof_startstop_lock); 795 panic("%s: not found '%s'", __func__, name); 796 } 797 #endif /* defined(DIAGNOSTIC) */ 798 if (tb->tb_softc.sc_ctr_running_mask != 0) { 799 mutex_exit(&tprof_startstop_lock); 800 return EBUSY; 801 } 802 #if 1 /* XXX for now */ 803 if (tprof_backend == tb) { 804 tprof_backend = NULL; 805 } 806 #endif 807 LIST_REMOVE(tb, tb_list); 808 mutex_exit(&tprof_startstop_lock); 809 810 /* fini backend softc */ 811 percpu_free(tb->tb_softc.sc_ctr_offset_percpu, 812 tb->tb_softc.sc_ctr_offset_percpu_size); 813 814 /* free backend */ 815 kmem_free(tb, sizeof(*tb)); 816 817 return 0; 818 } 819 820 /* -------------------- cdevsw interfaces */ 821 822 static int 823 tprof_open(dev_t dev, int flags, int type, struct lwp *l) 824 { 825 826 if (minor(dev) != 0) { 827 return EXDEV; 828 } 829 mutex_enter(&tprof_lock); 830 if (tprof_owner != NULL) { 831 mutex_exit(&tprof_lock); 832 return EBUSY; 833 } 834 tprof_owner = curlwp; 835 mutex_exit(&tprof_lock); 836 837 return 0; 838 } 839 840 static int 841 tprof_close(dev_t dev, int flags, int type, struct lwp *l) 842 { 843 844 KASSERT(minor(dev) == 0); 845 846 mutex_enter(&tprof_startstop_lock); 847 mutex_enter(&tprof_lock); 848 tprof_owner = NULL; 849 mutex_exit(&tprof_lock); 850 tprof_stop(TPROF_COUNTERMASK_ALL); 851 tprof_clear(); 852 853 tprof_backend_t *tb = tprof_backend; 854 if (tb != NULL) { 855 KASSERT(tb->tb_softc.sc_ctr_running_mask == 0); 856 tb->tb_softc.sc_ctr_configured_mask = 0; 857 tb->tb_softc.sc_ctr_prof_mask = 0; 858 tb->tb_softc.sc_ctr_ovf_mask = 0; 859 } 860 861 mutex_exit(&tprof_startstop_lock); 862 863 return 0; 864 } 865 866 static int 867 tprof_poll(dev_t dev, int events, struct lwp *l) 868 { 869 int revents; 870 871 revents = events & (POLLIN | POLLRDNORM); 872 if (revents == 0) 873 return 0; 874 875 mutex_enter(&tprof_lock); 876 if (STAILQ_EMPTY(&tprof_list)) { 877 revents = 0; 878 selrecord(l, &tprof_selp); 879 } 880 mutex_exit(&tprof_lock); 881 882 return revents; 883 } 884 885 static void 886 filt_tprof_read_detach(struct knote *kn) 887 { 888 mutex_enter(&tprof_lock); 889 selremove_knote(&tprof_selp, kn); 890 mutex_exit(&tprof_lock); 891 } 892 893 static int 894 filt_tprof_read_event(struct knote *kn, long hint) 895 { 896 int rv = 0; 897 898 if ((hint & NOTE_SUBMIT) == 0) 899 mutex_enter(&tprof_lock); 900 901 if (!STAILQ_EMPTY(&tprof_list)) { 902 tprof_buf_t *buf; 903 int64_t n = 0; 904 905 STAILQ_FOREACH(buf, &tprof_list, b_list) { 906 n += buf->b_used; 907 } 908 kn->kn_data = n * sizeof(tprof_sample_t); 909 910 rv = 1; 911 } 912 913 if ((hint & NOTE_SUBMIT) == 0) 914 mutex_exit(&tprof_lock); 915 916 return rv; 917 } 918 919 static const struct filterops tprof_read_filtops = { 920 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 921 .f_attach = NULL, 922 .f_detach = filt_tprof_read_detach, 923 .f_event = filt_tprof_read_event, 924 }; 925 926 static int 927 tprof_kqfilter(dev_t dev, struct knote *kn) 928 { 929 switch (kn->kn_filter) { 930 case EVFILT_READ: 931 kn->kn_fop = &tprof_read_filtops; 932 mutex_enter(&tprof_lock); 933 selrecord_knote(&tprof_selp, kn); 934 mutex_exit(&tprof_lock); 935 break; 936 default: 937 return EINVAL; 938 } 939 940 return 0; 941 } 942 943 static int 944 tprof_read(dev_t dev, struct uio *uio, int flags) 945 { 946 tprof_buf_t *buf; 947 size_t bytes; 948 size_t resid; 949 size_t done = 0; 950 int error = 0; 951 952 KASSERT(minor(dev) == 0); 953 mutex_enter(&tprof_reader_lock); 954 while (uio->uio_resid > 0 && error == 0) { 955 /* 956 * take the first buffer from the list. 957 */ 958 mutex_enter(&tprof_lock); 959 buf = STAILQ_FIRST(&tprof_list); 960 if (buf == NULL) { 961 if (tprof_nworker == 0 || done != 0) { 962 mutex_exit(&tprof_lock); 963 error = 0; 964 break; 965 } 966 mutex_exit(&tprof_reader_lock); 967 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock); 968 mutex_exit(&tprof_lock); 969 mutex_enter(&tprof_reader_lock); 970 continue; 971 } 972 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 973 KASSERT(tprof_nbuf_on_list > 0); 974 tprof_nbuf_on_list--; 975 mutex_exit(&tprof_lock); 976 977 /* 978 * copy it out. 979 */ 980 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) - 981 tprof_reader_offset, uio->uio_resid); 982 resid = uio->uio_resid; 983 error = uiomove((char *)buf->b_data + tprof_reader_offset, 984 bytes, uio); 985 done = resid - uio->uio_resid; 986 tprof_reader_offset += done; 987 988 /* 989 * if we didn't consume the whole buffer, 990 * put it back to the list. 991 */ 992 if (tprof_reader_offset < 993 buf->b_used * sizeof(tprof_sample_t)) { 994 mutex_enter(&tprof_lock); 995 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list); 996 tprof_nbuf_on_list++; 997 cv_broadcast(&tprof_reader_cv); 998 mutex_exit(&tprof_lock); 999 } else { 1000 tprof_buf_free(buf); 1001 tprof_reader_offset = 0; 1002 } 1003 } 1004 mutex_exit(&tprof_reader_lock); 1005 1006 return error; 1007 } 1008 1009 static int 1010 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) 1011 { 1012 const tprof_param_t *param; 1013 tprof_counts_t *counts; 1014 int error = 0; 1015 1016 KASSERT(minor(dev) == 0); 1017 1018 switch (cmd) { 1019 case TPROF_IOC_GETINFO: 1020 mutex_enter(&tprof_startstop_lock); 1021 tprof_getinfo(data); 1022 mutex_exit(&tprof_startstop_lock); 1023 break; 1024 case TPROF_IOC_GETNCOUNTERS: 1025 mutex_enter(&tprof_lock); 1026 error = tprof_getncounters((u_int *)data); 1027 mutex_exit(&tprof_lock); 1028 break; 1029 case TPROF_IOC_START: 1030 mutex_enter(&tprof_startstop_lock); 1031 error = tprof_start(*(tprof_countermask_t *)data); 1032 mutex_exit(&tprof_startstop_lock); 1033 break; 1034 case TPROF_IOC_STOP: 1035 mutex_enter(&tprof_startstop_lock); 1036 tprof_stop(*(tprof_countermask_t *)data); 1037 mutex_exit(&tprof_startstop_lock); 1038 break; 1039 case TPROF_IOC_GETSTAT: 1040 mutex_enter(&tprof_lock); 1041 memcpy(data, &tprof_stat, sizeof(tprof_stat)); 1042 mutex_exit(&tprof_lock); 1043 break; 1044 case TPROF_IOC_CONFIGURE_EVENT: 1045 param = data; 1046 mutex_enter(&tprof_startstop_lock); 1047 error = tprof_configure_event(param); 1048 mutex_exit(&tprof_startstop_lock); 1049 break; 1050 case TPROF_IOC_GETCOUNTS: 1051 counts = data; 1052 mutex_enter(&tprof_startstop_lock); 1053 error = tprof_getcounts(counts); 1054 mutex_exit(&tprof_startstop_lock); 1055 break; 1056 default: 1057 error = EINVAL; 1058 break; 1059 } 1060 1061 return error; 1062 } 1063 1064 const struct cdevsw tprof_cdevsw = { 1065 .d_open = tprof_open, 1066 .d_close = tprof_close, 1067 .d_read = tprof_read, 1068 .d_write = nowrite, 1069 .d_ioctl = tprof_ioctl, 1070 .d_stop = nostop, 1071 .d_tty = notty, 1072 .d_poll = tprof_poll, 1073 .d_mmap = nommap, 1074 .d_kqfilter = tprof_kqfilter, 1075 .d_discard = nodiscard, 1076 .d_flag = D_OTHER | D_MPSAFE 1077 }; 1078 1079 void 1080 tprofattach(int nunits) 1081 { 1082 1083 /* nothing */ 1084 } 1085 1086 MODULE(MODULE_CLASS_DRIVER, tprof, NULL); 1087 1088 static void 1089 tprof_cpu_init(void *vcp, void *vcookie, struct cpu_info *ci) 1090 { 1091 tprof_cpu_t **cp = vcp, *c; 1092 1093 c = kmem_zalloc(sizeof(*c), KM_SLEEP); 1094 c->c_buf = NULL; 1095 c->c_cpuid = cpu_index(ci); 1096 *cp = c; 1097 } 1098 1099 static void 1100 tprof_cpu_fini(void *vcp, void *vcookie, struct cpu_info *ci) 1101 { 1102 tprof_cpu_t **cp = vcp, *c; 1103 1104 c = *cp; 1105 KASSERT(c->c_cpuid == cpu_index(ci)); 1106 KASSERT(c->c_buf == NULL); 1107 kmem_free(c, sizeof(*c)); 1108 *cp = NULL; 1109 } 1110 1111 static void 1112 tprof_driver_init(void) 1113 { 1114 1115 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE); 1116 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE); 1117 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE); 1118 selinit(&tprof_selp); 1119 cv_init(&tprof_cv, "tprof"); 1120 cv_init(&tprof_reader_cv, "tprof_rd"); 1121 STAILQ_INIT(&tprof_list); 1122 tprof_cpus = percpu_create(sizeof(tprof_cpu_t *), 1123 tprof_cpu_init, tprof_cpu_fini, NULL); 1124 } 1125 1126 static void 1127 tprof_driver_fini(void) 1128 { 1129 1130 percpu_free(tprof_cpus, sizeof(tprof_cpu_t *)); 1131 mutex_destroy(&tprof_lock); 1132 mutex_destroy(&tprof_reader_lock); 1133 mutex_destroy(&tprof_startstop_lock); 1134 seldestroy(&tprof_selp); 1135 cv_destroy(&tprof_cv); 1136 cv_destroy(&tprof_reader_cv); 1137 } 1138 1139 static int 1140 tprof_modcmd(modcmd_t cmd, void *arg) 1141 { 1142 1143 switch (cmd) { 1144 case MODULE_CMD_INIT: 1145 tprof_driver_init(); 1146 #if defined(_MODULE) 1147 { 1148 devmajor_t bmajor = NODEVMAJOR; 1149 devmajor_t cmajor = NODEVMAJOR; 1150 int error; 1151 1152 error = devsw_attach("tprof", NULL, &bmajor, 1153 &tprof_cdevsw, &cmajor); 1154 if (error) { 1155 tprof_driver_fini(); 1156 return error; 1157 } 1158 } 1159 #endif /* defined(_MODULE) */ 1160 return 0; 1161 1162 case MODULE_CMD_FINI: 1163 #if defined(_MODULE) 1164 devsw_detach(NULL, &tprof_cdevsw); 1165 #endif /* defined(_MODULE) */ 1166 tprof_driver_fini(); 1167 return 0; 1168 1169 default: 1170 return ENOTTY; 1171 } 1172 } 1173