1 /* $OpenBSD: dt_dev.c,v 1.4 2020/02/04 10:56:15 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 #include <sys/systm.h> 21 #include <sys/param.h> 22 #include <sys/device.h> 23 #include <sys/malloc.h> 24 #include <sys/proc.h> 25 26 #include <dev/dt/dtvar.h> 27 28 /* 29 * How many frames are used by the profiling code? For example 30 * on amd64: 31 * 32 * From syscall provider: 33 * 34 * dt_prov_syscall_entry+0x141 35 * syscall+0x205 <--- start here 36 * Xsyscall+0x128 37 * 38 * From profile provider: 39 * 40 * dt_prov_profile_enter+0x6e 41 * hardclock+0x12c 42 * clockintr+0x59 43 * intr_handler+0x6e 44 * Xresume_legacy0+0x1d3 45 * cpu_idle_cycle+0x1b <---- start here. 46 * proc_trampoline+0x1c 47 */ 48 #if notyet 49 #define DT_HOOK_FRAME_ADDRESS __builtin_frame_address(4) 50 #else 51 #define DT_HOOK_FRAME_ADDRESS __builtin_frame_address(0) 52 #endif 53 54 #define DT_EVTRING_SIZE 16 /* # of slots in per PCB event ring */ 55 56 #define DPRINTF(x...) /* nothing */ 57 58 /* 59 * Descriptor associated with each program opening /dev/dt. It is used 60 * to keep track of enabled PCBs. 61 * 62 * Locks used to protect struct members in this file: 63 * m per-softc mutex 64 * k kernel lock 65 */ 66 struct dt_softc { 67 SLIST_ENTRY(dt_softc) ds_next; /* [k] descriptor list */ 68 int ds_unit; /* [I] D_CLONE unique unit */ 69 pid_t ds_pid; /* [I] PID of tracing program */ 70 71 struct mutex ds_mtx; 72 73 struct dt_pcb_list ds_pcbs; /* [k] list of enabled PCBs */ 74 struct dt_evt *ds_bufqueue; /* [k] copy evts to userland */ 75 size_t ds_bufqlen; /* [k] length of the queue */ 76 int ds_recording; /* [k] currently recording? */ 77 int ds_evtcnt; /* [m] # of readable evts */ 78 79 /* Counters */ 80 uint64_t ds_readevt; /* [m] # of events read */ 81 uint64_t ds_dropevt; /* [m] # of events dropped */ 82 }; 83 84 SLIST_HEAD(, dt_softc) dtdev_list; /* [k] list of open /dev/dt nodes */ 85 86 /* 87 * Probes are created during dt_attach() and never modified/freed during 88 * the lifetime of the system. That's why we consider them as [I]mmutable. 89 */ 90 unsigned int dt_nprobes; /* [I] # of probes available */ 91 SIMPLEQ_HEAD(, dt_probe) dt_probe_list; /* [I] list of probes */ 92 93 struct rwlock dt_lock = RWLOCK_INITIALIZER("dtlk"); 94 volatile uint32_t dt_tracing = 0; /* [k] # of processes tracing */ 95 96 void dtattach(struct device *, struct device *, void *); 97 int dtopen(dev_t, int, int, struct proc *); 98 int dtclose(dev_t, int, int, struct proc *); 99 int dtread(dev_t, struct uio *, int); 100 int dtioctl(dev_t, u_long, caddr_t, int, struct proc *); 101 102 struct dt_softc *dtlookup(int); 103 104 int dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *); 105 int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *); 106 int dt_ioctl_record_start(struct dt_softc *); 107 void dt_ioctl_record_stop(struct dt_softc *); 108 int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *); 109 void dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *); 110 111 int dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *); 112 113 void 114 dtattach(struct device *parent, struct device *self, void *aux) 115 { 116 SLIST_INIT(&dtdev_list); 117 SIMPLEQ_INIT(&dt_probe_list); 118 119 /* Init providers */ 120 dt_nprobes += dt_prov_profile_init(); 121 dt_nprobes += dt_prov_syscall_init(); 122 dt_nprobes += dt_prov_static_init(); 123 124 printf("dt: %u probes\n", dt_nprobes); 125 } 126 127 int 128 dtopen(dev_t dev, int flags, int mode, struct proc *p) 129 { 130 struct dt_softc *sc; 131 int unit = minor(dev); 132 extern int allowdt; 133 134 if (!allowdt) 135 return EPERM; 136 137 KASSERT(dtlookup(unit) == NULL); 138 139 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 140 if (sc == NULL) 141 return ENOMEM; 142 143 /* 144 * Enough space to empty 2 full rings of events in a single read. 145 */ 146 sc->ds_bufqlen = 2 * DT_EVTRING_SIZE; 147 sc->ds_bufqueue = mallocarray(sc->ds_bufqlen, sizeof(*sc->ds_bufqueue), 148 M_DEVBUF, M_WAITOK|M_CANFAIL); 149 if (sc->ds_bufqueue == NULL) 150 goto bad; 151 152 sc->ds_unit = unit; 153 sc->ds_pid = p->p_p->ps_pid; 154 TAILQ_INIT(&sc->ds_pcbs); 155 mtx_init(&sc->ds_mtx, IPL_HIGH); 156 sc->ds_evtcnt = 0; 157 sc->ds_readevt = 0; 158 sc->ds_dropevt = 0; 159 160 SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next); 161 162 DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid); 163 164 return 0; 165 166 bad: 167 free(sc, M_DEVBUF, sizeof(*sc)); 168 return ENOMEM; 169 } 170 171 int 172 dtclose(dev_t dev, int flags, int mode, struct proc *p) 173 { 174 struct dt_softc *sc; 175 int unit = minor(dev); 176 177 sc = dtlookup(unit); 178 KASSERT(sc != NULL); 179 180 DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid); 181 182 SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next); 183 dt_ioctl_record_stop(sc); 184 dt_pcb_purge(&sc->ds_pcbs); 185 186 free(sc->ds_bufqueue, M_DEVBUF, 187 sc->ds_bufqlen * sizeof(*sc->ds_bufqueue)); 188 free(sc, M_DEVBUF, sizeof(*sc)); 189 190 return 0; 191 } 192 193 int 194 dtread(dev_t dev, struct uio *uio, int flags) 195 { 196 struct sleep_state sls; 197 struct dt_softc *sc; 198 struct dt_evt *estq; 199 struct dt_pcb *dp; 200 int error, unit = minor(dev); 201 size_t qlen, count, read = 0; 202 uint64_t dropped = 0; 203 204 sc = dtlookup(unit); 205 KASSERT(sc != NULL); 206 207 count = howmany(uio->uio_resid, sizeof(struct dt_evt)); 208 if (count < 1) 209 return (EMSGSIZE); 210 211 while (!sc->ds_evtcnt) { 212 sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread"); 213 sleep_setup_signal(&sls); 214 sleep_finish(&sls, !sc->ds_evtcnt); 215 error = sleep_finish_signal(&sls); 216 if (error == EINTR || error == ERESTART) 217 break; 218 } 219 if (error) 220 return error; 221 222 estq = sc->ds_bufqueue; 223 qlen = MIN(sc->ds_bufqlen, count); 224 225 KERNEL_ASSERT_LOCKED(); 226 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 227 count = dt_pcb_ring_copy(dp, estq, qlen, &dropped); 228 read += count; 229 estq += count; /* pointer aritmetic */ 230 qlen -= count; 231 if (qlen == 0) 232 break; 233 } 234 if (read > 0) 235 uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio); 236 237 mtx_enter(&sc->ds_mtx); 238 sc->ds_evtcnt -= read; 239 sc->ds_readevt += read; 240 sc->ds_dropevt += dropped; 241 mtx_leave(&sc->ds_mtx); 242 243 return 0; 244 } 245 246 int 247 dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 248 { 249 struct dt_softc *sc; 250 int unit = minor(dev); 251 int on, error = 0; 252 253 sc = dtlookup(unit); 254 KASSERT(sc != NULL); 255 256 switch (cmd) { 257 case DTIOCGPLIST: 258 return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr); 259 case DTIOCGSTATS: 260 return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr); 261 case DTIOCRECORD: 262 case DTIOCPRBENABLE: 263 /* root only ioctl(2) */ 264 break; 265 default: 266 return ENOTTY; 267 } 268 269 if ((error = suser(p)) != 0) 270 return error; 271 272 switch (cmd) { 273 case DTIOCRECORD: 274 on = *(int *)addr; 275 if (on) 276 error = dt_ioctl_record_start(sc); 277 else 278 dt_ioctl_record_stop(sc); 279 break; 280 case DTIOCPRBENABLE: 281 error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr); 282 break; 283 default: 284 KASSERT(0); 285 } 286 287 return error; 288 } 289 290 struct dt_softc * 291 dtlookup(int unit) 292 { 293 struct dt_softc *sc; 294 295 KERNEL_ASSERT_LOCKED(); 296 297 SLIST_FOREACH(sc, &dtdev_list, ds_next) { 298 if (sc->ds_unit == unit) 299 break; 300 } 301 302 return sc; 303 } 304 305 int 306 dtioc_req_isvalid(struct dtioc_req *dtrq) 307 { 308 switch (dtrq->dtrq_filter.dtf_operand) { 309 case DT_OP_NONE: 310 case DT_OP_EQ: 311 case DT_OP_NE: 312 break; 313 default: 314 return 0; 315 } 316 317 switch (dtrq->dtrq_filter.dtf_variable) { 318 case DT_FV_NONE: 319 case DT_FV_PID: 320 case DT_FV_TID: 321 break; 322 default: 323 return 0; 324 } 325 326 return 1; 327 } 328 329 int 330 dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr) 331 { 332 struct dtioc_probe_info info, *dtpi; 333 struct dt_probe *dtp; 334 size_t size; 335 int error = 0; 336 337 if (dtpr->dtpr_size == 0) { 338 dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi); 339 return 0; 340 } 341 342 size = dtpr->dtpr_size; 343 dtpi = dtpr->dtpr_probes; 344 memset(&info, 0, sizeof(info)); 345 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 346 if (size < sizeof(*dtpi)) { 347 error = ENOSPC; 348 break; 349 } 350 info.dtpi_pbn = dtp->dtp_pbn; 351 strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name, 352 sizeof(info.dtpi_prov)); 353 strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func)); 354 strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name)); 355 error = copyout(&info, dtpi, sizeof(*dtpi)); 356 if (error) 357 break; 358 size -= sizeof(*dtpi); 359 dtpi++; 360 }; 361 362 return error; 363 } 364 365 int 366 dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst) 367 { 368 mtx_enter(&sc->ds_mtx); 369 dtst->dtst_readevt = sc->ds_readevt; 370 dtst->dtst_dropevt = sc->ds_dropevt; 371 mtx_leave(&sc->ds_mtx); 372 373 return 0; 374 } 375 376 int 377 dt_ioctl_record_start(struct dt_softc *sc) 378 { 379 struct dt_pcb *dp; 380 381 if (sc->ds_recording) 382 return EBUSY; 383 384 KERNEL_ASSERT_LOCKED(); 385 if (TAILQ_EMPTY(&sc->ds_pcbs)) 386 return ENOENT; 387 388 rw_enter_write(&dt_lock); 389 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 390 struct dt_probe *dtp = dp->dp_dtp; 391 392 SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext); 393 dtp->dtp_recording++; 394 dtp->dtp_prov->dtpv_recording++; 395 } 396 rw_exit_write(&dt_lock); 397 398 sc->ds_recording = 1; 399 dt_tracing++; 400 401 return 0; 402 } 403 404 void 405 dt_ioctl_record_stop(struct dt_softc *sc) 406 { 407 struct dt_pcb *dp; 408 409 KASSERT(suser(curproc) == 0); 410 411 if (!sc->ds_recording) 412 return; 413 414 DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid); 415 416 dt_tracing--; 417 sc->ds_recording = 0; 418 419 rw_enter_write(&dt_lock); 420 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 421 struct dt_probe *dtp = dp->dp_dtp; 422 423 dtp->dtp_recording--; 424 dtp->dtp_prov->dtpv_recording--; 425 SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext); 426 } 427 rw_exit_write(&dt_lock); 428 429 /* Wait until readers cannot access the PCBs. */ 430 smr_barrier(); 431 } 432 433 int 434 dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq) 435 { 436 struct dt_pcb_list plist; 437 struct dt_probe *dtp; 438 struct dt_pcb *dp; 439 int error; 440 441 KASSERT(suser(curproc) == 0); 442 443 if (!dtioc_req_isvalid(dtrq)) 444 return EINVAL; 445 446 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 447 if (dtp->dtp_pbn == dtrq->dtrq_pbn) 448 break; 449 } 450 if (dtp == NULL) 451 return ENOENT; 452 453 TAILQ_INIT(&plist); 454 error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq); 455 if (error) 456 return error; 457 458 DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid, 459 dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS); 460 461 /* Append all PCBs to this instance */ 462 while ((dp = TAILQ_FIRST(&plist)) != NULL) { 463 TAILQ_REMOVE(&plist, dp, dp_snext); 464 TAILQ_INSERT_HEAD(&sc->ds_pcbs, dp, dp_snext); 465 } 466 467 return 0; 468 } 469 470 struct dt_probe * 471 dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv) 472 { 473 struct dt_probe *dtp; 474 475 dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO); 476 if (dtp == NULL) 477 return NULL; 478 479 SMR_SLIST_INIT(&dtp->dtp_pcbs); 480 dtp->dtp_prov = dtpv; 481 dtp->dtp_func = func; 482 dtp->dtp_name = name; 483 dtp->dtp_sysnum = -1; 484 485 return dtp; 486 } 487 488 void 489 dt_dev_register_probe(struct dt_probe *dtp) 490 { 491 static uint64_t probe_nb; 492 493 dtp->dtp_pbn = ++probe_nb; 494 SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next); 495 } 496 497 struct dt_pcb * 498 dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc) 499 { 500 struct dt_pcb *dp; 501 502 dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO); 503 if (dp == NULL) 504 goto bad; 505 506 dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT, 507 M_WAITOK|M_CANFAIL|M_ZERO); 508 if (dp->dp_ring == NULL) 509 goto bad; 510 511 mtx_init(&dp->dp_mtx, IPL_HIGH); 512 dp->dp_sc = sc; 513 dp->dp_dtp = dtp; 514 return dp; 515 bad: 516 dt_pcb_free(dp); 517 return NULL; 518 } 519 520 void 521 dt_pcb_free(struct dt_pcb *dp) 522 { 523 if (dp == NULL) 524 return; 525 free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring)); 526 free(dp, M_DT, sizeof(*dp)); 527 } 528 529 void 530 dt_pcb_purge(struct dt_pcb_list *plist) 531 { 532 struct dt_pcb *dp; 533 534 while ((dp = TAILQ_FIRST(plist)) != NULL) { 535 TAILQ_REMOVE(plist, dp, dp_snext); 536 dt_pcb_free(dp); 537 } 538 } 539 540 int 541 dt_pcb_filter(struct dt_pcb *dp) 542 { 543 struct dt_filter *dtf = &dp->dp_filter; 544 struct proc *p = curproc; 545 unsigned int var; 546 int match = 1; 547 548 /* Filter out tracing program. */ 549 if (dp->dp_sc->ds_pid == p->p_p->ps_pid) 550 return 1; 551 552 switch (dtf->dtf_variable) { 553 case DT_FV_PID: 554 var = p->p_p->ps_pid; 555 break; 556 case DT_FV_TID: 557 var = p->p_tid; 558 break; 559 case DT_FV_NONE: 560 break; 561 default: 562 KASSERT(0); 563 } 564 565 switch (dtf->dtf_operand) { 566 case DT_OP_EQ: 567 match = !!(var == dtf->dtf_value); 568 break; 569 case DT_OP_NE: 570 match = !!(var != dtf->dtf_value); 571 break; 572 case DT_OP_NONE: 573 break; 574 default: 575 KASSERT(0); 576 } 577 578 return !match; 579 } 580 581 582 /* 583 * Get a reference to the next free event state from the ring. 584 */ 585 struct dt_evt * 586 dt_pcb_ring_get(struct dt_pcb *dp) 587 { 588 struct proc *p = curproc; 589 struct dt_evt *dtev; 590 int distance; 591 592 if (dt_pcb_filter(dp)) 593 return NULL; 594 595 mtx_enter(&dp->dp_mtx); 596 distance = dp->dp_prod - dp->dp_cons; 597 if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) { 598 /* read(2) isn't finished */ 599 dp->dp_dropevt++; 600 mtx_leave(&dp->dp_mtx); 601 return NULL; 602 } 603 604 /* 605 * Save states in next free event slot. 606 */ 607 dtev = &dp->dp_ring[dp->dp_cons]; 608 memset(dtev, 0, sizeof(*dtev)); 609 610 dtev->dtev_pbn = dp->dp_dtp->dtp_pbn; 611 dtev->dtev_cpu = cpu_number(); 612 dtev->dtev_pid = p->p_p->ps_pid; 613 dtev->dtev_tid = p->p_tid; 614 nanotime(&dtev->dtev_tsp); 615 616 if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME)) 617 memcpy(dtev->dtev_comm, p->p_p->ps_comm, DTMAXCOMLEN - 1); 618 619 if (ISSET(dp->dp_evtflags, DTEVT_KSTACK|DTEVT_USTACK)) { 620 #if notyet 621 stacktrace_save_at(&dtev->dtev_kstack, DT_HOOK_FRAME_ADDRESS); 622 #else 623 stacktrace_save(&dtev->dtev_kstack); 624 #endif 625 } 626 627 return dtev; 628 } 629 630 void 631 dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev) 632 { 633 MUTEX_ASSERT_LOCKED(&dp->dp_mtx); 634 KASSERT(dtev == &dp->dp_ring[dp->dp_cons]); 635 636 dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE; 637 mtx_leave(&dp->dp_mtx); 638 639 mtx_enter(&dp->dp_sc->ds_mtx); 640 dp->dp_sc->ds_evtcnt++; 641 mtx_leave(&dp->dp_sc->ds_mtx); 642 wakeup(dp->dp_sc); 643 } 644 645 /* 646 * Copy at most `qlen' events from `dp', producing the same amount 647 * of free slots. 648 */ 649 int 650 dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen, 651 uint64_t *dropped) 652 { 653 size_t count, copied = 0; 654 unsigned int cons, prod; 655 656 KASSERT(qlen > 0); 657 658 mtx_enter(&dp->dp_mtx); 659 cons = dp->dp_cons; 660 prod = dp->dp_prod; 661 662 if (cons < prod) 663 count = DT_EVTRING_SIZE - prod; 664 else 665 count = cons - prod; 666 667 if (count == 0) 668 goto out; 669 670 *dropped += dp->dp_dropevt; 671 dp->dp_dropevt = 0; 672 673 count = MIN(count, qlen); 674 675 memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq)); 676 copied += count; 677 678 /* Produce */ 679 prod = (prod + count) % DT_EVTRING_SIZE; 680 681 /* If the queue is full or the ring didn't wrap, stop here. */ 682 if (qlen == copied || prod != 0 || cons == 0) 683 goto out; 684 685 count = MIN(cons, (qlen - copied)); 686 memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq)); 687 copied += count; 688 prod += count; 689 690 out: 691 dp->dp_prod = prod; 692 mtx_leave(&dp->dp_mtx); 693 return copied; 694 } 695