1 /* $OpenBSD: kern_ktrace.c,v 1.106 2022/02/22 17:14:14 deraadt Exp $ */ 2 /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/sched.h> 39 #include <sys/fcntl.h> 40 #include <sys/namei.h> 41 #include <sys/vnode.h> 42 #include <sys/lock.h> 43 #include <sys/ktrace.h> 44 #include <sys/malloc.h> 45 #include <sys/syslog.h> 46 #include <sys/sysctl.h> 47 #include <sys/pledge.h> 48 49 #include <sys/mount.h> 50 #include <sys/syscall.h> 51 #include <sys/syscallargs.h> 52 53 #include <uvm/uvm_extern.h> 54 55 void ktrinitheaderraw(struct ktr_header *, uint, pid_t, pid_t); 56 void ktrinitheader(struct ktr_header *, struct proc *, int); 57 int ktrstart(struct proc *, struct vnode *, struct ucred *); 58 int ktrops(struct proc *, struct process *, int, int, struct vnode *, 59 struct ucred *); 60 int ktrsetchildren(struct proc *, struct process *, int, int, 61 struct vnode *, struct ucred *); 62 int ktrwrite(struct proc *, struct ktr_header *, const void *, size_t); 63 int ktrwrite2(struct proc *, struct ktr_header *, const void *, size_t, 64 const void *, size_t); 65 int ktrwriteraw(struct proc *, struct vnode *, struct ucred *, 66 struct ktr_header *, struct iovec *); 67 int ktrcanset(struct proc *, struct process *); 68 69 /* 70 * Clear the trace settings in a correct way (to avoid races). 71 */ 72 void 73 ktrcleartrace(struct process *pr) 74 { 75 struct vnode *vp; 76 struct ucred *cred; 77 78 if (pr->ps_tracevp != NULL) { 79 vp = pr->ps_tracevp; 80 cred = pr->ps_tracecred; 81 82 pr->ps_traceflag = 0; 83 pr->ps_tracevp = NULL; 84 pr->ps_tracecred = NULL; 85 86 vp->v_writecount--; 87 vrele(vp); 88 crfree(cred); 89 } 90 } 91 92 /* 93 * Change the trace setting in a correct way (to avoid races). 94 */ 95 void 96 ktrsettrace(struct process *pr, int facs, struct vnode *newvp, 97 struct ucred *newcred) 98 { 99 struct vnode *oldvp; 100 struct ucred *oldcred; 101 102 KASSERT(newvp != NULL); 103 KASSERT(newcred != NULL); 104 105 pr->ps_traceflag |= facs; 106 107 /* nothing to change about where the trace goes? */ 108 if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred) 109 return; 110 111 vref(newvp); 112 crhold(newcred); 113 newvp->v_writecount++; 114 115 oldvp = pr->ps_tracevp; 116 oldcred = pr->ps_tracecred; 117 118 pr->ps_tracevp = newvp; 119 pr->ps_tracecred = newcred; 120 121 if (oldvp != NULL) { 122 oldvp->v_writecount--; 123 vrele(oldvp); 124 crfree(oldcred); 125 } 126 } 127 128 void 129 ktrinitheaderraw(struct ktr_header *kth, uint type, pid_t pid, pid_t tid) 130 { 131 memset(kth, 0, sizeof(struct ktr_header)); 132 kth->ktr_type = type; 133 nanotime(&kth->ktr_time); 134 kth->ktr_pid = pid; 135 kth->ktr_tid = tid; 136 } 137 138 void 139 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 140 { 141 struct process *pr = p->p_p; 142 143 ktrinitheaderraw(kth, type, pr->ps_pid, p->p_tid + THREAD_PID_OFFSET); 144 memcpy(kth->ktr_comm, pr->ps_comm, sizeof(kth->ktr_comm)); 145 } 146 147 int 148 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred) 149 { 150 struct ktr_header kth; 151 152 ktrinitheaderraw(&kth, htobe32(KTR_START), -1, -1); 153 return (ktrwriteraw(p, vp, cred, &kth, NULL)); 154 } 155 156 void 157 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[]) 158 { 159 struct ktr_header kth; 160 struct ktr_syscall *ktp; 161 size_t len = sizeof(struct ktr_syscall) + argsize; 162 register_t *argp; 163 u_int nargs = 0; 164 int i; 165 166 if (code == SYS_sysctl) { 167 /* 168 * The sysctl encoding stores the mib[] 169 * array because it is interesting. 170 */ 171 if (args[1] > 0) 172 nargs = lmin(args[1], CTL_MAXNAME); 173 len += nargs * sizeof(int); 174 } 175 atomic_setbits_int(&p->p_flag, P_INKTR); 176 ktrinitheader(&kth, p, KTR_SYSCALL); 177 ktp = malloc(len, M_TEMP, M_WAITOK); 178 ktp->ktr_code = code; 179 ktp->ktr_argsize = argsize; 180 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); 181 for (i = 0; i < (argsize / sizeof *argp); i++) 182 *argp++ = args[i]; 183 if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int))) 184 memset(argp, 0, nargs * sizeof(int)); 185 ktrwrite(p, &kth, ktp, len); 186 free(ktp, M_TEMP, len); 187 atomic_clearbits_int(&p->p_flag, P_INKTR); 188 } 189 190 void 191 ktrsysret(struct proc *p, register_t code, int error, 192 const register_t retval[2]) 193 { 194 struct ktr_header kth; 195 struct ktr_sysret ktp; 196 int len; 197 198 atomic_setbits_int(&p->p_flag, P_INKTR); 199 ktrinitheader(&kth, p, KTR_SYSRET); 200 ktp.ktr_code = code; 201 ktp.ktr_error = error; 202 if (error) 203 len = 0; 204 else if (code == SYS_lseek) 205 /* the one exception: lseek on ILP32 needs more */ 206 len = sizeof(long long); 207 #if 1 208 else if (code == SYS_pad_lseek) 209 len = sizeof(long long); 210 #endif 211 else 212 len = sizeof(register_t); 213 ktrwrite2(p, &kth, &ktp, sizeof(ktp), retval, len); 214 atomic_clearbits_int(&p->p_flag, P_INKTR); 215 } 216 217 void 218 ktrnamei(struct proc *p, char *path) 219 { 220 struct ktr_header kth; 221 222 atomic_setbits_int(&p->p_flag, P_INKTR); 223 ktrinitheader(&kth, p, KTR_NAMEI); 224 ktrwrite(p, &kth, path, strlen(path)); 225 atomic_clearbits_int(&p->p_flag, P_INKTR); 226 } 227 228 void 229 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, 230 ssize_t len) 231 { 232 struct ktr_header kth; 233 struct ktr_genio ktp; 234 caddr_t cp; 235 int count, error; 236 int buflen; 237 238 atomic_setbits_int(&p->p_flag, P_INKTR); 239 240 /* beware overflow */ 241 if (len > PAGE_SIZE) 242 buflen = PAGE_SIZE; 243 else 244 buflen = len + sizeof(struct ktr_genio); 245 246 ktrinitheader(&kth, p, KTR_GENIO); 247 ktp.ktr_fd = fd; 248 ktp.ktr_rw = rw; 249 250 cp = malloc(buflen, M_TEMP, M_WAITOK); 251 while (len > 0) { 252 /* 253 * Don't allow this process to hog the cpu when doing 254 * huge I/O. 255 */ 256 sched_pause(preempt); 257 258 count = lmin(iov->iov_len, buflen); 259 if (count > len) 260 count = len; 261 if (copyin(iov->iov_base, cp, count)) 262 break; 263 264 KERNEL_LOCK(); 265 error = ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count); 266 KERNEL_UNLOCK(); 267 if (error != 0) 268 break; 269 270 iov->iov_len -= count; 271 iov->iov_base = (caddr_t)iov->iov_base + count; 272 273 if (iov->iov_len == 0) 274 iov++; 275 276 len -= count; 277 } 278 279 free(cp, M_TEMP, buflen); 280 atomic_clearbits_int(&p->p_flag, P_INKTR); 281 } 282 283 void 284 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code, 285 siginfo_t *si) 286 { 287 struct ktr_header kth; 288 struct ktr_psig kp; 289 290 atomic_setbits_int(&p->p_flag, P_INKTR); 291 ktrinitheader(&kth, p, KTR_PSIG); 292 kp.signo = (char)sig; 293 kp.action = action; 294 kp.mask = mask; 295 kp.code = code; 296 kp.si = *si; 297 298 KERNEL_LOCK(); 299 ktrwrite(p, &kth, &kp, sizeof(kp)); 300 KERNEL_UNLOCK(); 301 atomic_clearbits_int(&p->p_flag, P_INKTR); 302 } 303 304 void 305 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen) 306 { 307 struct ktr_header kth; 308 309 atomic_setbits_int(&p->p_flag, P_INKTR); 310 ktrinitheader(&kth, p, KTR_STRUCT); 311 312 if (data == NULL) 313 datalen = 0; 314 KERNEL_LOCK(); 315 ktrwrite2(p, &kth, name, strlen(name) + 1, data, datalen); 316 KERNEL_UNLOCK(); 317 atomic_clearbits_int(&p->p_flag, P_INKTR); 318 } 319 320 int 321 ktruser(struct proc *p, const char *id, const void *addr, size_t len) 322 { 323 struct ktr_header kth; 324 struct ktr_user ktp; 325 int error; 326 void *memp; 327 #define STK_PARAMS 128 328 long long stkbuf[STK_PARAMS / sizeof(long long)]; 329 330 if (!KTRPOINT(p, KTR_USER)) 331 return (0); 332 if (len > KTR_USER_MAXLEN) 333 return (EINVAL); 334 335 atomic_setbits_int(&p->p_flag, P_INKTR); 336 ktrinitheader(&kth, p, KTR_USER); 337 memset(ktp.ktr_id, 0, KTR_USER_MAXIDLEN); 338 error = copyinstr(id, ktp.ktr_id, KTR_USER_MAXIDLEN, NULL); 339 if (error == 0) { 340 if (len > sizeof(stkbuf)) 341 memp = malloc(len, M_TEMP, M_WAITOK); 342 else 343 memp = stkbuf; 344 error = copyin(addr, memp, len); 345 if (error == 0) 346 ktrwrite2(p, &kth, &ktp, sizeof(ktp), memp, len); 347 if (memp != stkbuf) 348 free(memp, M_TEMP, len); 349 } 350 atomic_clearbits_int(&p->p_flag, P_INKTR); 351 return (error); 352 } 353 354 void 355 ktrexec(struct proc *p, int type, const char *data, ssize_t len) 356 { 357 struct ktr_header kth; 358 int count; 359 int buflen; 360 361 assert(type == KTR_EXECARGS || type == KTR_EXECENV); 362 atomic_setbits_int(&p->p_flag, P_INKTR); 363 364 /* beware overflow */ 365 if (len > PAGE_SIZE) 366 buflen = PAGE_SIZE; 367 else 368 buflen = len; 369 370 ktrinitheader(&kth, p, type); 371 372 while (len > 0) { 373 /* 374 * Don't allow this process to hog the cpu when doing 375 * huge I/O. 376 */ 377 sched_pause(preempt); 378 379 count = lmin(len, buflen); 380 if (ktrwrite(p, &kth, data, count) != 0) 381 break; 382 383 len -= count; 384 data += count; 385 } 386 387 atomic_clearbits_int(&p->p_flag, P_INKTR); 388 } 389 390 void 391 ktrpledge(struct proc *p, int error, uint64_t code, int syscall) 392 { 393 struct ktr_header kth; 394 struct ktr_pledge kp; 395 396 atomic_setbits_int(&p->p_flag, P_INKTR); 397 ktrinitheader(&kth, p, KTR_PLEDGE); 398 kp.error = error; 399 kp.code = code; 400 kp.syscall = syscall; 401 402 KERNEL_LOCK(); 403 ktrwrite(p, &kth, &kp, sizeof(kp)); 404 KERNEL_UNLOCK(); 405 atomic_clearbits_int(&p->p_flag, P_INKTR); 406 } 407 408 /* Interface and common routines */ 409 410 int 411 doktrace(struct vnode *vp, int ops, int facs, pid_t pid, struct proc *p) 412 { 413 struct process *pr = NULL; 414 struct ucred *cred = NULL; 415 struct pgrp *pg; 416 int descend = ops & KTRFLAG_DESCEND; 417 int ret = 0; 418 int error = 0; 419 420 facs = facs & ~((unsigned)KTRFAC_ROOT); 421 ops = KTROP(ops); 422 423 if (ops != KTROP_CLEAR) { 424 /* 425 * an operation which requires a file argument. 426 */ 427 cred = p->p_ucred; 428 if (!vp) { 429 error = EINVAL; 430 goto done; 431 } 432 if (vp->v_type != VREG) { 433 error = EACCES; 434 goto done; 435 } 436 } 437 /* 438 * Clear all uses of the tracefile 439 */ 440 if (ops == KTROP_CLEARFILE) { 441 LIST_FOREACH(pr, &allprocess, ps_list) { 442 if (pr->ps_tracevp == vp) { 443 if (ktrcanset(p, pr)) 444 ktrcleartrace(pr); 445 else 446 error = EPERM; 447 } 448 } 449 goto done; 450 } 451 /* 452 * need something to (un)trace (XXX - why is this here?) 453 */ 454 if (!facs) { 455 error = EINVAL; 456 goto done; 457 } 458 if (ops == KTROP_SET) { 459 if (suser(p) == 0) 460 facs |= KTRFAC_ROOT; 461 error = ktrstart(p, vp, cred); 462 if (error != 0) 463 goto done; 464 } 465 /* 466 * do it 467 */ 468 if (pid < 0) { 469 /* 470 * by process group 471 */ 472 pg = pgfind(-pid); 473 if (pg == NULL) { 474 error = ESRCH; 475 goto done; 476 } 477 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 478 if (descend) 479 ret |= ktrsetchildren(p, pr, ops, facs, vp, 480 cred); 481 else 482 ret |= ktrops(p, pr, ops, facs, vp, cred); 483 } 484 } else { 485 /* 486 * by pid 487 */ 488 pr = prfind(pid); 489 if (pr == NULL) { 490 error = ESRCH; 491 goto done; 492 } 493 if (descend) 494 ret |= ktrsetchildren(p, pr, ops, facs, vp, cred); 495 else 496 ret |= ktrops(p, pr, ops, facs, vp, cred); 497 } 498 if (!ret) 499 error = EPERM; 500 done: 501 return (error); 502 } 503 504 /* 505 * ktrace system call 506 */ 507 int 508 sys_ktrace(struct proc *p, void *v, register_t *retval) 509 { 510 struct sys_ktrace_args /* { 511 syscallarg(const char *) fname; 512 syscallarg(int) ops; 513 syscallarg(int) facs; 514 syscallarg(pid_t) pid; 515 } */ *uap = v; 516 struct vnode *vp = NULL; 517 const char *fname = SCARG(uap, fname); 518 struct ucred *cred = NULL; 519 int error; 520 521 if (fname) { 522 struct nameidata nd; 523 524 cred = p->p_ucred; 525 NDINIT(&nd, 0, 0, UIO_USERSPACE, fname, p); 526 nd.ni_pledge = PLEDGE_CPATH | PLEDGE_WPATH; 527 nd.ni_unveil = UNVEIL_CREATE | UNVEIL_WRITE; 528 if ((error = vn_open(&nd, FWRITE|O_NOFOLLOW, 0)) != 0) 529 return error; 530 vp = nd.ni_vp; 531 532 VOP_UNLOCK(vp); 533 } 534 535 error = doktrace(vp, SCARG(uap, ops), SCARG(uap, facs), 536 SCARG(uap, pid), p); 537 if (vp != NULL) 538 (void)vn_close(vp, FWRITE, cred, p); 539 540 return error; 541 } 542 543 int 544 ktrops(struct proc *curp, struct process *pr, int ops, int facs, 545 struct vnode *vp, struct ucred *cred) 546 { 547 if (!ktrcanset(curp, pr)) 548 return (0); 549 if (ops == KTROP_SET) 550 ktrsettrace(pr, facs, vp, cred); 551 else { 552 /* KTROP_CLEAR */ 553 pr->ps_traceflag &= ~facs; 554 if ((pr->ps_traceflag & KTRFAC_MASK) == 0) { 555 /* cleared all the facility bits, so stop completely */ 556 ktrcleartrace(pr); 557 } 558 } 559 560 return (1); 561 } 562 563 int 564 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs, 565 struct vnode *vp, struct ucred *cred) 566 { 567 struct process *pr; 568 int ret = 0; 569 570 pr = top; 571 for (;;) { 572 ret |= ktrops(curp, pr, ops, facs, vp, cred); 573 /* 574 * If this process has children, descend to them next, 575 * otherwise do any siblings, and if done with this level, 576 * follow back up the tree (but not past top). 577 */ 578 if (!LIST_EMPTY(&pr->ps_children)) 579 pr = LIST_FIRST(&pr->ps_children); 580 else for (;;) { 581 if (pr == top) 582 return (ret); 583 if (LIST_NEXT(pr, ps_sibling) != NULL) { 584 pr = LIST_NEXT(pr, ps_sibling); 585 break; 586 } 587 pr = pr->ps_pptr; 588 } 589 } 590 /*NOTREACHED*/ 591 } 592 593 int 594 ktrwrite(struct proc *p, struct ktr_header *kth, const void *aux, size_t len) 595 { 596 struct vnode *vp = p->p_p->ps_tracevp; 597 struct ucred *cred = p->p_p->ps_tracecred; 598 struct iovec data[2]; 599 int error; 600 601 if (vp == NULL) 602 return 0; 603 crhold(cred); 604 data[0].iov_base = (void *)aux; 605 data[0].iov_len = len; 606 data[1].iov_len = 0; 607 kth->ktr_len = len; 608 error = ktrwriteraw(p, vp, cred, kth, data); 609 crfree(cred); 610 return (error); 611 } 612 613 int 614 ktrwrite2(struct proc *p, struct ktr_header *kth, const void *aux1, 615 size_t len1, const void *aux2, size_t len2) 616 { 617 struct vnode *vp = p->p_p->ps_tracevp; 618 struct ucred *cred = p->p_p->ps_tracecred; 619 struct iovec data[2]; 620 int error; 621 622 if (vp == NULL) 623 return 0; 624 crhold(cred); 625 data[0].iov_base = (void *)aux1; 626 data[0].iov_len = len1; 627 data[1].iov_base = (void *)aux2; 628 data[1].iov_len = len2; 629 kth->ktr_len = len1 + len2; 630 error = ktrwriteraw(p, vp, cred, kth, data); 631 crfree(cred); 632 return (error); 633 } 634 635 int 636 ktrwriteraw(struct proc *curp, struct vnode *vp, struct ucred *cred, 637 struct ktr_header *kth, struct iovec *data) 638 { 639 struct uio auio; 640 struct iovec aiov[3]; 641 struct process *pr; 642 int error; 643 644 KERNEL_ASSERT_LOCKED(); 645 646 auio.uio_iov = &aiov[0]; 647 auio.uio_offset = 0; 648 auio.uio_segflg = UIO_SYSSPACE; 649 auio.uio_rw = UIO_WRITE; 650 aiov[0].iov_base = (caddr_t)kth; 651 aiov[0].iov_len = sizeof(struct ktr_header); 652 auio.uio_resid = sizeof(struct ktr_header); 653 auio.uio_iovcnt = 1; 654 auio.uio_procp = curp; 655 if (kth->ktr_len > 0) { 656 aiov[1] = data[0]; 657 aiov[2] = data[1]; 658 auio.uio_iovcnt++; 659 if (aiov[2].iov_len > 0) 660 auio.uio_iovcnt++; 661 auio.uio_resid += kth->ktr_len; 662 } 663 error = vget(vp, LK_EXCLUSIVE | LK_RETRY); 664 if (error) 665 goto bad; 666 error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred); 667 vput(vp); 668 if (error) 669 goto bad; 670 671 return (0); 672 673 bad: 674 /* 675 * If error encountered, give up tracing on this vnode. 676 */ 677 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 678 error); 679 LIST_FOREACH(pr, &allprocess, ps_list) { 680 if (pr == curp->p_p) 681 continue; 682 if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) 683 ktrcleartrace(pr); 684 } 685 ktrcleartrace(curp->p_p); 686 return (error); 687 } 688 689 /* 690 * Return true if caller has permission to set the ktracing state 691 * of target. Essentially, the target can't possess any 692 * more permissions than the caller. KTRFAC_ROOT signifies that 693 * root previously set the tracing status on the target process, and 694 * so, only root may further change it. 695 * 696 * TODO: check groups. use caller effective gid. 697 */ 698 int 699 ktrcanset(struct proc *callp, struct process *targetpr) 700 { 701 struct ucred *caller = callp->p_ucred; 702 struct ucred *target = targetpr->ps_ucred; 703 704 if ((caller->cr_uid == target->cr_ruid && 705 target->cr_ruid == target->cr_svuid && 706 caller->cr_rgid == target->cr_rgid && /* XXX */ 707 target->cr_rgid == target->cr_svgid && 708 (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 && 709 !ISSET(targetpr->ps_flags, PS_SUGID)) || 710 caller->cr_uid == 0) 711 return (1); 712 713 return (0); 714 } 715