1 /* $OpenBSD: dt_dev.c,v 1.42 2024/12/04 09:37:33 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/systm.h> 21 #include <sys/param.h> 22 #include <sys/clockintr.h> 23 #include <sys/device.h> 24 #include <sys/exec_elf.h> 25 #include <sys/malloc.h> 26 #include <sys/proc.h> 27 #include <sys/ptrace.h> 28 29 #include <machine/intr.h> 30 31 #include <dev/dt/dtvar.h> 32 33 /* 34 * Number of frames to skip in stack traces. 35 * 36 * The number of frames required to execute dt(4) profiling code 37 * depends on the probe, context, architecture and possibly the 38 * compiler. 39 * 40 * Static probes (tracepoints) are executed in the context of the 41 * current thread and only need to skip frames up to the recording 42 * function. For example the syscall provider: 43 * 44 * dt_prov_syscall_entry+0x141 45 * syscall+0x205 <--- start here 46 * Xsyscall+0x128 47 * 48 * Probes executed in their own context, like the profile provider, 49 * need to skip the frames of that context which are different for 50 * every architecture. For example the profile provider executed 51 * from hardclock(9) on amd64: 52 * 53 * dt_prov_profile_enter+0x6e 54 * hardclock+0x1a9 55 * lapic_clockintr+0x3f 56 * Xresume_lapic_ltimer+0x26 57 * acpicpu_idle+0x1d2 <---- start here. 58 * sched_idle+0x225 59 * proc_trampoline+0x1c 60 */ 61 #if defined(__amd64__) 62 #define DT_FA_PROFILE 5 63 #define DT_FA_STATIC 2 64 #elif defined(__i386__) 65 #define DT_FA_PROFILE 5 66 #define DT_FA_STATIC 2 67 #elif defined(__macppc__) 68 #define DT_FA_PROFILE 5 69 #define DT_FA_STATIC 2 70 #elif defined(__octeon__) 71 #define DT_FA_PROFILE 6 72 #define DT_FA_STATIC 2 73 #elif defined(__powerpc64__) 74 #define DT_FA_PROFILE 6 75 #define DT_FA_STATIC 2 76 #elif defined(__sparc64__) 77 #define DT_FA_PROFILE 7 78 #define DT_FA_STATIC 1 79 #else 80 #define DT_FA_STATIC 0 81 #define DT_FA_PROFILE 0 82 #endif 83 84 #define DT_EVTRING_SIZE 16 /* # of slots in per PCB event ring */ 85 86 #define DPRINTF(x...) /* nothing */ 87 88 /* 89 * Per-CPU Event States 90 * 91 * Locks used to protect struct members: 92 * r owned by thread doing read(2) 93 * c owned by CPU 94 * s sliced ownership, based on read/write indexes 95 * p written by CPU, read by thread doing read(2) 96 */ 97 struct dt_cpubuf { 98 unsigned int dc_prod; /* [r] read index */ 99 unsigned int dc_cons; /* [c] write index */ 100 struct dt_evt *dc_ring; /* [s] ring of event states */ 101 unsigned int dc_inevt; /* [c] in event already? */ 102 103 /* Counters */ 104 unsigned int dc_dropevt; /* [p] # of events dropped */ 105 unsigned int dc_skiptick; /* [p] # of ticks skipped */ 106 unsigned int dc_recurevt; /* [p] # of recursive events */ 107 unsigned int dc_readevt; /* [r] # of events read */ 108 }; 109 110 /* 111 * Descriptor associated with each program opening /dev/dt. It is used 112 * to keep track of enabled PCBs. 113 * 114 * Locks used to protect struct members in this file: 115 * a atomic 116 * K kernel lock 117 * r owned by thread doing read(2) 118 * I invariant after initialization 119 */ 120 struct dt_softc { 121 SLIST_ENTRY(dt_softc) ds_next; /* [K] descriptor list */ 122 int ds_unit; /* [I] D_CLONE unique unit */ 123 pid_t ds_pid; /* [I] PID of tracing program */ 124 void *ds_si; /* [I] to defer wakeup(9) */ 125 126 struct dt_pcb_list ds_pcbs; /* [K] list of enabled PCBs */ 127 int ds_recording; /* [K] currently recording? */ 128 unsigned int ds_evtcnt; /* [a] # of readable evts */ 129 130 struct dt_cpubuf ds_cpu[MAXCPUS]; /* [I] Per-cpu event states */ 131 unsigned int ds_lastcpu; /* [r] last CPU ring read(2). */ 132 }; 133 134 SLIST_HEAD(, dt_softc) dtdev_list; /* [K] list of open /dev/dt nodes */ 135 136 /* 137 * Probes are created during dt_attach() and never modified/freed during 138 * the lifetime of the system. That's why we consider them as [I]mmutable. 139 */ 140 unsigned int dt_nprobes; /* [I] # of probes available */ 141 SIMPLEQ_HEAD(, dt_probe) dt_probe_list; /* [I] list of probes */ 142 143 struct rwlock dt_lock = RWLOCK_INITIALIZER("dtlk"); 144 volatile uint32_t dt_tracing = 0; /* [K] # of processes tracing */ 145 146 int allowdt; /* [a] */ 147 148 void dtattach(struct device *, struct device *, void *); 149 int dtopen(dev_t, int, int, struct proc *); 150 int dtclose(dev_t, int, int, struct proc *); 151 int dtread(dev_t, struct uio *, int); 152 int dtioctl(dev_t, u_long, caddr_t, int, struct proc *); 153 154 struct dt_softc *dtlookup(int); 155 struct dt_softc *dtalloc(void); 156 void dtfree(struct dt_softc *); 157 158 int dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *); 159 int dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *); 160 int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *); 161 int dt_ioctl_record_start(struct dt_softc *); 162 void dt_ioctl_record_stop(struct dt_softc *); 163 int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *); 164 int dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *); 165 int dt_ioctl_get_auxbase(struct dt_softc *, struct dtioc_getaux *); 166 167 int dt_ring_copy(struct dt_cpubuf *, struct uio *, size_t, size_t *); 168 169 void dt_wakeup(struct dt_softc *); 170 void dt_deferred_wakeup(void *); 171 172 void 173 dtattach(struct device *parent, struct device *self, void *aux) 174 { 175 SLIST_INIT(&dtdev_list); 176 SIMPLEQ_INIT(&dt_probe_list); 177 178 /* Init providers */ 179 dt_nprobes += dt_prov_profile_init(); 180 dt_nprobes += dt_prov_syscall_init(); 181 dt_nprobes += dt_prov_static_init(); 182 #ifdef DDBPROF 183 dt_nprobes += dt_prov_kprobe_init(); 184 #endif 185 } 186 187 int 188 dtopen(dev_t dev, int flags, int mode, struct proc *p) 189 { 190 struct dt_softc *sc; 191 int unit = minor(dev); 192 193 if (atomic_load_int(&allowdt) == 0) 194 return EPERM; 195 196 sc = dtalloc(); 197 if (sc == NULL) 198 return ENOMEM; 199 200 /* no sleep after this point */ 201 if (dtlookup(unit) != NULL) { 202 dtfree(sc); 203 return EBUSY; 204 } 205 206 sc->ds_unit = unit; 207 sc->ds_pid = p->p_p->ps_pid; 208 TAILQ_INIT(&sc->ds_pcbs); 209 sc->ds_lastcpu = 0; 210 sc->ds_evtcnt = 0; 211 212 SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next); 213 214 DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid); 215 216 return 0; 217 } 218 219 int 220 dtclose(dev_t dev, int flags, int mode, struct proc *p) 221 { 222 struct dt_softc *sc; 223 int unit = minor(dev); 224 225 sc = dtlookup(unit); 226 KASSERT(sc != NULL); 227 228 DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid); 229 230 SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next); 231 dt_ioctl_record_stop(sc); 232 dt_pcb_purge(&sc->ds_pcbs); 233 dtfree(sc); 234 235 return 0; 236 } 237 238 int 239 dtread(dev_t dev, struct uio *uio, int flags) 240 { 241 struct dt_softc *sc; 242 struct dt_cpubuf *dc; 243 int i, error = 0, unit = minor(dev); 244 size_t count, max, read = 0; 245 246 sc = dtlookup(unit); 247 KASSERT(sc != NULL); 248 249 max = howmany(uio->uio_resid, sizeof(struct dt_evt)); 250 if (max < 1) 251 return (EMSGSIZE); 252 253 while (!atomic_load_int(&sc->ds_evtcnt)) { 254 sleep_setup(sc, PWAIT | PCATCH, "dtread"); 255 error = sleep_finish(0, !atomic_load_int(&sc->ds_evtcnt)); 256 if (error == EINTR || error == ERESTART) 257 break; 258 } 259 if (error) 260 return error; 261 262 KERNEL_ASSERT_LOCKED(); 263 for (i = 0; i < ncpusfound; i++) { 264 count = 0; 265 dc = &sc->ds_cpu[(sc->ds_lastcpu + i) % ncpusfound]; 266 error = dt_ring_copy(dc, uio, max, &count); 267 if (error && count == 0) 268 break; 269 270 read += count; 271 max -= count; 272 if (max == 0) 273 break; 274 } 275 sc->ds_lastcpu += i % ncpusfound; 276 277 atomic_sub_int(&sc->ds_evtcnt, read); 278 279 return error; 280 } 281 282 int 283 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 284 { 285 struct dt_softc *sc; 286 int unit = minor(dev); 287 int on, error = 0; 288 289 sc = dtlookup(unit); 290 KASSERT(sc != NULL); 291 292 switch (cmd) { 293 case DTIOCGPLIST: 294 return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr); 295 case DTIOCGARGS: 296 return dt_ioctl_get_args(sc, (struct dtioc_arg *)addr); 297 case DTIOCGSTATS: 298 return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr); 299 case DTIOCRECORD: 300 case DTIOCPRBENABLE: 301 case DTIOCPRBDISABLE: 302 case DTIOCGETAUXBASE: 303 /* root only ioctl(2) */ 304 break; 305 default: 306 return ENOTTY; 307 } 308 309 if ((error = suser(p)) != 0) 310 return error; 311 312 switch (cmd) { 313 case DTIOCRECORD: 314 on = *(int *)addr; 315 if (on) 316 error = dt_ioctl_record_start(sc); 317 else 318 dt_ioctl_record_stop(sc); 319 break; 320 case DTIOCPRBENABLE: 321 error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr); 322 break; 323 case DTIOCPRBDISABLE: 324 error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr); 325 break; 326 case DTIOCGETAUXBASE: 327 error = dt_ioctl_get_auxbase(sc, (struct dtioc_getaux *)addr); 328 break; 329 default: 330 KASSERT(0); 331 } 332 333 return error; 334 } 335 336 struct dt_softc * 337 dtlookup(int unit) 338 { 339 struct dt_softc *sc; 340 341 KERNEL_ASSERT_LOCKED(); 342 343 SLIST_FOREACH(sc, &dtdev_list, ds_next) { 344 if (sc->ds_unit == unit) 345 break; 346 } 347 348 return sc; 349 } 350 351 struct dt_softc * 352 dtalloc(void) 353 { 354 struct dt_softc *sc; 355 struct dt_evt *dtev; 356 int i; 357 358 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 359 if (sc == NULL) 360 return NULL; 361 362 for (i = 0; i < ncpusfound; i++) { 363 dtev = mallocarray(DT_EVTRING_SIZE, sizeof(*dtev), M_DEVBUF, 364 M_WAITOK|M_CANFAIL|M_ZERO); 365 if (dtev == NULL) 366 break; 367 sc->ds_cpu[i].dc_ring = dtev; 368 } 369 if (i < ncpusfound) { 370 dtfree(sc); 371 return NULL; 372 } 373 374 sc->ds_si = softintr_establish(IPL_SOFTCLOCK, dt_deferred_wakeup, sc); 375 if (sc->ds_si == NULL) { 376 dtfree(sc); 377 return NULL; 378 } 379 380 return sc; 381 } 382 383 void 384 dtfree(struct dt_softc *sc) 385 { 386 struct dt_evt *dtev; 387 int i; 388 389 if (sc->ds_si != NULL) 390 softintr_disestablish(sc->ds_si); 391 392 for (i = 0; i < ncpusfound; i++) { 393 dtev = sc->ds_cpu[i].dc_ring; 394 free(dtev, M_DEVBUF, DT_EVTRING_SIZE * sizeof(*dtev)); 395 } 396 free(sc, M_DEVBUF, sizeof(*sc)); 397 } 398 399 int 400 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr) 401 { 402 struct dtioc_probe_info info, *dtpi; 403 struct dt_probe *dtp; 404 size_t size; 405 int error = 0; 406 407 size = dtpr->dtpr_size; 408 dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi); 409 if (size == 0) 410 return 0; 411 412 dtpi = dtpr->dtpr_probes; 413 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 414 if (size < sizeof(*dtpi)) { 415 error = ENOSPC; 416 break; 417 } 418 memset(&info, 0, sizeof(info)); 419 info.dtpi_pbn = dtp->dtp_pbn; 420 info.dtpi_nargs = dtp->dtp_nargs; 421 strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name, 422 sizeof(info.dtpi_prov)); 423 strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func)); 424 strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name)); 425 error = copyout(&info, dtpi, sizeof(*dtpi)); 426 if (error) 427 break; 428 size -= sizeof(*dtpi); 429 dtpi++; 430 } 431 432 return error; 433 } 434 435 int 436 dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar) 437 { 438 struct dtioc_arg_info info, *dtai; 439 struct dt_probe *dtp; 440 size_t size, n, t; 441 uint32_t pbn; 442 int error = 0; 443 444 pbn = dtar->dtar_pbn; 445 if (pbn == 0 || pbn > dt_nprobes) 446 return EINVAL; 447 448 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 449 if (pbn == dtp->dtp_pbn) 450 break; 451 } 452 if (dtp == NULL) 453 return EINVAL; 454 455 if (dtp->dtp_sysnum != 0) { 456 /* currently not supported for system calls */ 457 dtar->dtar_size = 0; 458 return 0; 459 } 460 461 size = dtar->dtar_size; 462 dtar->dtar_size = dtp->dtp_nargs * sizeof(*dtar); 463 if (size == 0) 464 return 0; 465 466 t = 0; 467 dtai = dtar->dtar_args; 468 for (n = 0; n < dtp->dtp_nargs; n++) { 469 if (size < sizeof(*dtai)) { 470 error = ENOSPC; 471 break; 472 } 473 if (n >= DTMAXARGTYPES || dtp->dtp_argtype[n] == NULL) 474 continue; 475 memset(&info, 0, sizeof(info)); 476 info.dtai_pbn = dtp->dtp_pbn; 477 info.dtai_argn = t++; 478 strlcpy(info.dtai_argtype, dtp->dtp_argtype[n], 479 sizeof(info.dtai_argtype)); 480 error = copyout(&info, dtai, sizeof(*dtai)); 481 if (error) 482 break; 483 size -= sizeof(*dtai); 484 dtai++; 485 } 486 dtar->dtar_size = t * sizeof(*dtar); 487 488 return error; 489 } 490 491 int 492 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst) 493 { 494 struct dt_cpubuf *dc; 495 uint64_t readevt, dropevt, skiptick, recurevt; 496 int i; 497 498 readevt = dropevt = skiptick = 0; 499 for (i = 0; i < ncpusfound; i++) { 500 dc = &sc->ds_cpu[i]; 501 502 membar_consumer(); 503 dropevt += dc->dc_dropevt; 504 skiptick = dc->dc_skiptick; 505 recurevt = dc->dc_recurevt; 506 readevt += dc->dc_readevt; 507 } 508 509 dtst->dtst_readevt = readevt; 510 dtst->dtst_dropevt = dropevt; 511 dtst->dtst_skiptick = skiptick; 512 dtst->dtst_recurevt = recurevt; 513 return 0; 514 } 515 516 int 517 dt_ioctl_record_start(struct dt_softc *sc) 518 { 519 uint64_t now; 520 struct dt_pcb *dp; 521 522 if (sc->ds_recording) 523 return EBUSY; 524 525 KERNEL_ASSERT_LOCKED(); 526 if (TAILQ_EMPTY(&sc->ds_pcbs)) 527 return ENOENT; 528 529 rw_enter_write(&dt_lock); 530 now = nsecuptime(); 531 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 532 struct dt_probe *dtp = dp->dp_dtp; 533 534 SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext); 535 dtp->dtp_recording++; 536 dtp->dtp_prov->dtpv_recording++; 537 538 if (dp->dp_nsecs != 0) { 539 clockintr_bind(&dp->dp_clockintr, dp->dp_cpu, dt_clock, 540 dp); 541 clockintr_schedule(&dp->dp_clockintr, 542 now + dp->dp_nsecs); 543 } 544 } 545 rw_exit_write(&dt_lock); 546 547 sc->ds_recording = 1; 548 dt_tracing++; 549 550 return 0; 551 } 552 553 void 554 dt_ioctl_record_stop(struct dt_softc *sc) 555 { 556 struct dt_pcb *dp; 557 558 if (!sc->ds_recording) 559 return; 560 561 DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid); 562 563 dt_tracing--; 564 sc->ds_recording = 0; 565 566 rw_enter_write(&dt_lock); 567 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 568 struct dt_probe *dtp = dp->dp_dtp; 569 570 /* 571 * Set an execution barrier to ensure the shared 572 * reference to dp is inactive. 573 */ 574 if (dp->dp_nsecs != 0) 575 clockintr_unbind(&dp->dp_clockintr, CL_BARRIER); 576 577 dtp->dtp_recording--; 578 dtp->dtp_prov->dtpv_recording--; 579 SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext); 580 } 581 rw_exit_write(&dt_lock); 582 583 /* Wait until readers cannot access the PCBs. */ 584 smr_barrier(); 585 } 586 587 int 588 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq) 589 { 590 struct dt_pcb_list plist; 591 struct dt_probe *dtp; 592 struct dt_pcb *dp; 593 int error; 594 595 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 596 if (dtp->dtp_pbn == dtrq->dtrq_pbn) 597 break; 598 } 599 if (dtp == NULL) 600 return ENOENT; 601 602 /* Only allow one probe of each type. */ 603 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 604 if (dp->dp_dtp->dtp_pbn == dtrq->dtrq_pbn) 605 return EEXIST; 606 } 607 608 TAILQ_INIT(&plist); 609 error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq); 610 if (error) 611 return error; 612 613 DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid, 614 dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS); 615 616 /* Append all PCBs to this instance */ 617 TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext); 618 619 return 0; 620 } 621 622 int 623 dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq) 624 { 625 struct dt_probe *dtp; 626 int error; 627 628 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 629 if (dtp->dtp_pbn == dtrq->dtrq_pbn) 630 break; 631 } 632 if (dtp == NULL) 633 return ENOENT; 634 635 if (dtp->dtp_prov->dtpv_dealloc) { 636 error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq); 637 if (error) 638 return error; 639 } 640 641 DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid, 642 dtrq->dtrq_pbn); 643 644 return 0; 645 } 646 647 int 648 dt_ioctl_get_auxbase(struct dt_softc *sc, struct dtioc_getaux *dtga) 649 { 650 struct uio uio; 651 struct iovec iov; 652 struct process *pr; 653 struct proc *p = curproc; 654 AuxInfo auxv[ELF_AUX_ENTRIES]; 655 int i, error; 656 657 dtga->dtga_auxbase = 0; 658 659 if ((pr = prfind(dtga->dtga_pid)) == NULL) 660 return ESRCH; 661 662 iov.iov_base = auxv; 663 iov.iov_len = sizeof(auxv); 664 uio.uio_iov = &iov; 665 uio.uio_iovcnt = 1; 666 uio.uio_offset = pr->ps_auxinfo; 667 uio.uio_resid = sizeof(auxv); 668 uio.uio_segflg = UIO_SYSSPACE; 669 uio.uio_procp = p; 670 uio.uio_rw = UIO_READ; 671 672 error = process_domem(p, pr, &uio, PT_READ_D); 673 if (error) 674 return error; 675 676 for (i = 0; i < ELF_AUX_ENTRIES; i++) 677 if (auxv[i].au_id == AUX_base) 678 dtga->dtga_auxbase = auxv[i].au_v; 679 680 return 0; 681 } 682 683 struct dt_probe * 684 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv) 685 { 686 struct dt_probe *dtp; 687 688 dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO); 689 if (dtp == NULL) 690 return NULL; 691 692 SMR_SLIST_INIT(&dtp->dtp_pcbs); 693 dtp->dtp_prov = dtpv; 694 dtp->dtp_func = func; 695 dtp->dtp_name = name; 696 dtp->dtp_sysnum = -1; 697 dtp->dtp_ref = 0; 698 699 return dtp; 700 } 701 702 void 703 dt_dev_register_probe(struct dt_probe *dtp) 704 { 705 static uint64_t probe_nb; 706 707 dtp->dtp_pbn = ++probe_nb; 708 SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next); 709 } 710 711 struct dt_pcb * 712 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc) 713 { 714 struct dt_pcb *dp; 715 716 dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO); 717 if (dp == NULL) 718 return NULL; 719 720 dp->dp_sc = sc; 721 dp->dp_dtp = dtp; 722 return dp; 723 } 724 725 void 726 dt_pcb_free(struct dt_pcb *dp) 727 { 728 free(dp, M_DT, sizeof(*dp)); 729 } 730 731 void 732 dt_pcb_purge(struct dt_pcb_list *plist) 733 { 734 struct dt_pcb *dp; 735 736 while ((dp = TAILQ_FIRST(plist)) != NULL) { 737 TAILQ_REMOVE(plist, dp, dp_snext); 738 dt_pcb_free(dp); 739 } 740 } 741 742 void 743 dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip) 744 { 745 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()]; 746 747 dc->dc_skiptick += skip; 748 membar_producer(); 749 } 750 751 /* 752 * Get a reference to the next free event state from the ring. 753 */ 754 struct dt_evt * 755 dt_pcb_ring_get(struct dt_pcb *dp, int profiling) 756 { 757 struct proc *p = curproc; 758 struct dt_evt *dtev; 759 int prod, cons, distance; 760 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()]; 761 762 if (dc->dc_inevt == 1) { 763 dc->dc_recurevt++; 764 membar_producer(); 765 return NULL; 766 } 767 768 dc->dc_inevt = 1; 769 770 membar_consumer(); 771 prod = dc->dc_prod; 772 cons = dc->dc_cons; 773 distance = prod - cons; 774 if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) { 775 /* read(2) isn't finished */ 776 dc->dc_dropevt++; 777 membar_producer(); 778 779 dc->dc_inevt = 0; 780 return NULL; 781 } 782 783 /* 784 * Save states in next free event slot. 785 */ 786 dtev = &dc->dc_ring[cons]; 787 memset(dtev, 0, sizeof(*dtev)); 788 789 dtev->dtev_pbn = dp->dp_dtp->dtp_pbn; 790 dtev->dtev_cpu = cpu_number(); 791 dtev->dtev_pid = p->p_p->ps_pid; 792 dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET; 793 nanotime(&dtev->dtev_tsp); 794 795 if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME)) 796 strlcpy(dtev->dtev_comm, p->p_p->ps_comm, sizeof(dtev->dtev_comm)); 797 798 if (ISSET(dp->dp_evtflags, DTEVT_KSTACK)) { 799 if (profiling) 800 stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE); 801 else 802 stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC); 803 } 804 if (ISSET(dp->dp_evtflags, DTEVT_USTACK)) 805 stacktrace_save_utrace(&dtev->dtev_ustack); 806 807 return dtev; 808 } 809 810 void 811 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev) 812 { 813 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()]; 814 815 KASSERT(dtev == &dc->dc_ring[dc->dc_cons]); 816 817 dc->dc_cons = (dc->dc_cons + 1) % DT_EVTRING_SIZE; 818 membar_producer(); 819 820 atomic_inc_int(&dp->dp_sc->ds_evtcnt); 821 dc->dc_inevt = 0; 822 823 dt_wakeup(dp->dp_sc); 824 } 825 826 /* 827 * Copy at most `max' events from `dc', producing the same amount 828 * of free slots. 829 */ 830 int 831 dt_ring_copy(struct dt_cpubuf *dc, struct uio *uio, size_t max, size_t *rcvd) 832 { 833 size_t count, copied = 0; 834 unsigned int cons, prod; 835 int error = 0; 836 837 KASSERT(max > 0); 838 839 membar_consumer(); 840 cons = dc->dc_cons; 841 prod = dc->dc_prod; 842 843 if (cons < prod) 844 count = DT_EVTRING_SIZE - prod; 845 else 846 count = cons - prod; 847 848 if (count == 0) 849 return 0; 850 851 count = MIN(count, max); 852 error = uiomove(&dc->dc_ring[prod], count * sizeof(struct dt_evt), uio); 853 if (error) 854 return error; 855 copied += count; 856 857 /* Produce */ 858 prod = (prod + count) % DT_EVTRING_SIZE; 859 860 /* If the ring didn't wrap, stop here. */ 861 if (max == copied || prod != 0 || cons == 0) 862 goto out; 863 864 count = MIN(cons, (max - copied)); 865 error = uiomove(&dc->dc_ring[0], count * sizeof(struct dt_evt), uio); 866 if (error) 867 goto out; 868 869 copied += count; 870 prod += count; 871 872 out: 873 dc->dc_readevt += copied; 874 dc->dc_prod = prod; 875 membar_producer(); 876 877 *rcvd = copied; 878 return error; 879 } 880 881 void 882 dt_wakeup(struct dt_softc *sc) 883 { 884 /* 885 * It is not always safe or possible to call wakeup(9) and grab 886 * the SCHED_LOCK() from a given tracepoint. This is true for 887 * any tracepoint that might trigger inside the scheduler or at 888 * any IPL higher than IPL_SCHED. For this reason use a soft- 889 * interrupt to defer the wakeup. 890 */ 891 softintr_schedule(sc->ds_si); 892 } 893 894 void 895 dt_deferred_wakeup(void *arg) 896 { 897 struct dt_softc *sc = arg; 898 899 wakeup(sc); 900 } 901