1 /* $OpenBSD: kern_ktrace.c,v 1.109 2022/12/05 23:18:37 deraadt Exp $ */ 2 /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/sched.h> 39 #include <sys/fcntl.h> 40 #include <sys/namei.h> 41 #include <sys/vnode.h> 42 #include <sys/lock.h> 43 #include <sys/ktrace.h> 44 #include <sys/malloc.h> 45 #include <sys/syslog.h> 46 #include <sys/sysctl.h> 47 #include <sys/pledge.h> 48 49 #include <sys/mount.h> 50 #include <sys/syscall.h> 51 #include <sys/syscallargs.h> 52 53 void ktrinitheaderraw(struct ktr_header *, uint, pid_t, pid_t); 54 void ktrinitheader(struct ktr_header *, struct proc *, int); 55 int ktrstart(struct proc *, struct vnode *, struct ucred *); 56 int ktrops(struct proc *, struct process *, int, int, struct vnode *, 57 struct ucred *); 58 int ktrsetchildren(struct proc *, struct process *, int, int, 59 struct vnode *, struct ucred *); 60 int ktrwrite(struct proc *, struct ktr_header *, const void *, size_t); 61 int ktrwrite2(struct proc *, struct ktr_header *, const void *, size_t, 62 const void *, size_t); 63 int ktrwriteraw(struct proc *, struct vnode *, struct ucred *, 64 struct ktr_header *, struct iovec *); 65 int ktrcanset(struct proc *, struct process *); 66 67 /* 68 * Clear the trace settings in a correct way (to avoid races). 69 */ 70 void 71 ktrcleartrace(struct process *pr) 72 { 73 struct vnode *vp; 74 struct ucred *cred; 75 76 if (pr->ps_tracevp != NULL) { 77 vp = pr->ps_tracevp; 78 cred = pr->ps_tracecred; 79 80 pr->ps_traceflag = 0; 81 pr->ps_tracevp = NULL; 82 pr->ps_tracecred = NULL; 83 84 vp->v_writecount--; 85 vrele(vp); 86 crfree(cred); 87 } 88 } 89 90 /* 91 * Change the trace setting in a correct way (to avoid races). 92 */ 93 void 94 ktrsettrace(struct process *pr, int facs, struct vnode *newvp, 95 struct ucred *newcred) 96 { 97 struct vnode *oldvp; 98 struct ucred *oldcred; 99 100 KASSERT(newvp != NULL); 101 KASSERT(newcred != NULL); 102 103 pr->ps_traceflag |= facs; 104 105 /* nothing to change about where the trace goes? */ 106 if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred) 107 return; 108 109 vref(newvp); 110 crhold(newcred); 111 newvp->v_writecount++; 112 113 oldvp = pr->ps_tracevp; 114 oldcred = pr->ps_tracecred; 115 116 pr->ps_tracevp = newvp; 117 pr->ps_tracecred = newcred; 118 119 if (oldvp != NULL) { 120 oldvp->v_writecount--; 121 vrele(oldvp); 122 crfree(oldcred); 123 } 124 } 125 126 void 127 ktrinitheaderraw(struct ktr_header *kth, uint type, pid_t pid, pid_t tid) 128 { 129 memset(kth, 0, sizeof(struct ktr_header)); 130 kth->ktr_type = type; 131 nanotime(&kth->ktr_time); 132 kth->ktr_pid = pid; 133 kth->ktr_tid = tid; 134 } 135 136 void 137 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 138 { 139 struct process *pr = p->p_p; 140 141 ktrinitheaderraw(kth, type, pr->ps_pid, p->p_tid + THREAD_PID_OFFSET); 142 memcpy(kth->ktr_comm, pr->ps_comm, sizeof(kth->ktr_comm)); 143 } 144 145 int 146 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred) 147 { 148 struct ktr_header kth; 149 150 ktrinitheaderraw(&kth, htobe32(KTR_START), -1, -1); 151 return (ktrwriteraw(p, vp, cred, &kth, NULL)); 152 } 153 154 void 155 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[]) 156 { 157 struct ktr_header kth; 158 struct ktr_syscall *ktp; 159 size_t len = sizeof(struct ktr_syscall) + argsize; 160 register_t *argp; 161 u_int nargs = 0; 162 int i; 163 164 if (code == SYS_sysctl) { 165 /* 166 * The sysctl encoding stores the mib[] 167 * array because it is interesting. 168 */ 169 if (args[1] > 0) 170 nargs = lmin(args[1], CTL_MAXNAME); 171 len += nargs * sizeof(int); 172 } 173 atomic_setbits_int(&p->p_flag, P_INKTR); 174 ktrinitheader(&kth, p, KTR_SYSCALL); 175 ktp = malloc(len, M_TEMP, M_WAITOK); 176 ktp->ktr_code = code; 177 ktp->ktr_argsize = argsize; 178 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); 179 for (i = 0; i < (argsize / sizeof *argp); i++) 180 *argp++ = args[i]; 181 if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int))) 182 memset(argp, 0, nargs * sizeof(int)); 183 ktrwrite(p, &kth, ktp, len); 184 free(ktp, M_TEMP, len); 185 atomic_clearbits_int(&p->p_flag, P_INKTR); 186 } 187 188 void 189 ktrsysret(struct proc *p, register_t code, int error, 190 const register_t retval[2]) 191 { 192 struct ktr_header kth; 193 struct ktr_sysret ktp; 194 int len; 195 196 atomic_setbits_int(&p->p_flag, P_INKTR); 197 ktrinitheader(&kth, p, KTR_SYSRET); 198 ktp.ktr_code = code; 199 ktp.ktr_error = error; 200 if (error) 201 len = 0; 202 else if (code == SYS_lseek) 203 /* the one exception: lseek on ILP32 needs more */ 204 len = sizeof(long long); 205 else 206 len = sizeof(register_t); 207 ktrwrite2(p, &kth, &ktp, sizeof(ktp), retval, len); 208 atomic_clearbits_int(&p->p_flag, P_INKTR); 209 } 210 211 void 212 ktrnamei(struct proc *p, char *path) 213 { 214 struct ktr_header kth; 215 216 atomic_setbits_int(&p->p_flag, P_INKTR); 217 ktrinitheader(&kth, p, KTR_NAMEI); 218 ktrwrite(p, &kth, path, strlen(path)); 219 atomic_clearbits_int(&p->p_flag, P_INKTR); 220 } 221 222 void 223 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, 224 ssize_t len) 225 { 226 struct ktr_header kth; 227 struct ktr_genio ktp; 228 caddr_t cp; 229 int count, error; 230 int buflen; 231 232 atomic_setbits_int(&p->p_flag, P_INKTR); 233 234 /* beware overflow */ 235 if (len > PAGE_SIZE) 236 buflen = PAGE_SIZE; 237 else 238 buflen = len + sizeof(struct ktr_genio); 239 240 ktrinitheader(&kth, p, KTR_GENIO); 241 ktp.ktr_fd = fd; 242 ktp.ktr_rw = rw; 243 244 cp = malloc(buflen, M_TEMP, M_WAITOK); 245 while (len > 0) { 246 /* 247 * Don't allow this process to hog the cpu when doing 248 * huge I/O. 249 */ 250 sched_pause(preempt); 251 252 count = lmin(iov->iov_len, buflen); 253 if (count > len) 254 count = len; 255 if (copyin(iov->iov_base, cp, count)) 256 break; 257 258 KERNEL_LOCK(); 259 error = ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count); 260 KERNEL_UNLOCK(); 261 if (error != 0) 262 break; 263 264 iov->iov_len -= count; 265 iov->iov_base = (caddr_t)iov->iov_base + count; 266 267 if (iov->iov_len == 0) 268 iov++; 269 270 len -= count; 271 } 272 273 free(cp, M_TEMP, buflen); 274 atomic_clearbits_int(&p->p_flag, P_INKTR); 275 } 276 277 void 278 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code, 279 siginfo_t *si) 280 { 281 struct ktr_header kth; 282 struct ktr_psig kp; 283 284 atomic_setbits_int(&p->p_flag, P_INKTR); 285 ktrinitheader(&kth, p, KTR_PSIG); 286 kp.signo = (char)sig; 287 kp.action = action; 288 kp.mask = mask; 289 kp.code = code; 290 kp.si = *si; 291 292 KERNEL_LOCK(); 293 ktrwrite(p, &kth, &kp, sizeof(kp)); 294 KERNEL_UNLOCK(); 295 atomic_clearbits_int(&p->p_flag, P_INKTR); 296 } 297 298 void 299 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen) 300 { 301 struct ktr_header kth; 302 303 atomic_setbits_int(&p->p_flag, P_INKTR); 304 ktrinitheader(&kth, p, KTR_STRUCT); 305 306 if (data == NULL) 307 datalen = 0; 308 KERNEL_LOCK(); 309 ktrwrite2(p, &kth, name, strlen(name) + 1, data, datalen); 310 KERNEL_UNLOCK(); 311 atomic_clearbits_int(&p->p_flag, P_INKTR); 312 } 313 314 int 315 ktruser(struct proc *p, const char *id, const void *addr, size_t len) 316 { 317 struct ktr_header kth; 318 struct ktr_user ktp; 319 int error; 320 void *memp; 321 #define STK_PARAMS 128 322 long long stkbuf[STK_PARAMS / sizeof(long long)]; 323 324 if (!KTRPOINT(p, KTR_USER)) 325 return (0); 326 if (len > KTR_USER_MAXLEN) 327 return (EINVAL); 328 329 atomic_setbits_int(&p->p_flag, P_INKTR); 330 ktrinitheader(&kth, p, KTR_USER); 331 memset(ktp.ktr_id, 0, KTR_USER_MAXIDLEN); 332 error = copyinstr(id, ktp.ktr_id, KTR_USER_MAXIDLEN, NULL); 333 if (error == 0) { 334 if (len > sizeof(stkbuf)) 335 memp = malloc(len, M_TEMP, M_WAITOK); 336 else 337 memp = stkbuf; 338 error = copyin(addr, memp, len); 339 if (error == 0) 340 ktrwrite2(p, &kth, &ktp, sizeof(ktp), memp, len); 341 if (memp != stkbuf) 342 free(memp, M_TEMP, len); 343 } 344 atomic_clearbits_int(&p->p_flag, P_INKTR); 345 return (error); 346 } 347 348 void 349 ktrexec(struct proc *p, int type, const char *data, ssize_t len) 350 { 351 struct ktr_header kth; 352 int count; 353 int buflen; 354 355 assert(type == KTR_EXECARGS || type == KTR_EXECENV); 356 atomic_setbits_int(&p->p_flag, P_INKTR); 357 358 /* beware overflow */ 359 if (len > PAGE_SIZE) 360 buflen = PAGE_SIZE; 361 else 362 buflen = len; 363 364 ktrinitheader(&kth, p, type); 365 366 while (len > 0) { 367 /* 368 * Don't allow this process to hog the cpu when doing 369 * huge I/O. 370 */ 371 sched_pause(preempt); 372 373 count = lmin(len, buflen); 374 if (ktrwrite(p, &kth, data, count) != 0) 375 break; 376 377 len -= count; 378 data += count; 379 } 380 381 atomic_clearbits_int(&p->p_flag, P_INKTR); 382 } 383 384 void 385 ktrpledge(struct proc *p, int error, uint64_t code, int syscall) 386 { 387 struct ktr_header kth; 388 struct ktr_pledge kp; 389 390 atomic_setbits_int(&p->p_flag, P_INKTR); 391 ktrinitheader(&kth, p, KTR_PLEDGE); 392 kp.error = error; 393 kp.code = code; 394 kp.syscall = syscall; 395 396 KERNEL_LOCK(); 397 ktrwrite(p, &kth, &kp, sizeof(kp)); 398 KERNEL_UNLOCK(); 399 atomic_clearbits_int(&p->p_flag, P_INKTR); 400 } 401 402 /* Interface and common routines */ 403 404 int 405 doktrace(struct vnode *vp, int ops, int facs, pid_t pid, struct proc *p) 406 { 407 struct process *pr = NULL; 408 struct ucred *cred = NULL; 409 struct pgrp *pg; 410 int descend = ops & KTRFLAG_DESCEND; 411 int ret = 0; 412 int error = 0; 413 414 facs = facs & ~((unsigned)KTRFAC_ROOT); 415 ops = KTROP(ops); 416 417 if (ops != KTROP_CLEAR) { 418 /* 419 * an operation which requires a file argument. 420 */ 421 cred = p->p_ucred; 422 if (!vp) { 423 error = EINVAL; 424 goto done; 425 } 426 if (vp->v_type != VREG) { 427 error = EACCES; 428 goto done; 429 } 430 } 431 /* 432 * Clear all uses of the tracefile 433 */ 434 if (ops == KTROP_CLEARFILE) { 435 LIST_FOREACH(pr, &allprocess, ps_list) { 436 if (pr->ps_tracevp == vp) { 437 if (ktrcanset(p, pr)) 438 ktrcleartrace(pr); 439 else 440 error = EPERM; 441 } 442 } 443 goto done; 444 } 445 /* 446 * need something to (un)trace (XXX - why is this here?) 447 */ 448 if (!facs) { 449 error = EINVAL; 450 goto done; 451 } 452 if (ops == KTROP_SET) { 453 if (suser(p) == 0) 454 facs |= KTRFAC_ROOT; 455 error = ktrstart(p, vp, cred); 456 if (error != 0) 457 goto done; 458 } 459 /* 460 * do it 461 */ 462 if (pid < 0) { 463 /* 464 * by process group 465 */ 466 pg = pgfind(-pid); 467 if (pg == NULL) { 468 error = ESRCH; 469 goto done; 470 } 471 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 472 if (descend) 473 ret |= ktrsetchildren(p, pr, ops, facs, vp, 474 cred); 475 else 476 ret |= ktrops(p, pr, ops, facs, vp, cred); 477 } 478 } else { 479 /* 480 * by pid 481 */ 482 pr = prfind(pid); 483 if (pr == NULL) { 484 error = ESRCH; 485 goto done; 486 } 487 if (descend) 488 ret |= ktrsetchildren(p, pr, ops, facs, vp, cred); 489 else 490 ret |= ktrops(p, pr, ops, facs, vp, cred); 491 } 492 if (!ret) 493 error = EPERM; 494 done: 495 return (error); 496 } 497 498 /* 499 * ktrace system call 500 */ 501 int 502 sys_ktrace(struct proc *p, void *v, register_t *retval) 503 { 504 struct sys_ktrace_args /* { 505 syscallarg(const char *) fname; 506 syscallarg(int) ops; 507 syscallarg(int) facs; 508 syscallarg(pid_t) pid; 509 } */ *uap = v; 510 struct vnode *vp = NULL; 511 const char *fname = SCARG(uap, fname); 512 struct ucred *cred = NULL; 513 int error; 514 515 if (fname) { 516 struct nameidata nd; 517 518 cred = p->p_ucred; 519 NDINIT(&nd, 0, 0, UIO_USERSPACE, fname, p); 520 nd.ni_pledge = PLEDGE_CPATH | PLEDGE_WPATH; 521 nd.ni_unveil = UNVEIL_CREATE | UNVEIL_WRITE; 522 if ((error = vn_open(&nd, FWRITE|O_NOFOLLOW, 0)) != 0) 523 return error; 524 vp = nd.ni_vp; 525 526 VOP_UNLOCK(vp); 527 } 528 529 error = doktrace(vp, SCARG(uap, ops), SCARG(uap, facs), 530 SCARG(uap, pid), p); 531 if (vp != NULL) 532 (void)vn_close(vp, FWRITE, cred, p); 533 534 return error; 535 } 536 537 int 538 ktrops(struct proc *curp, struct process *pr, int ops, int facs, 539 struct vnode *vp, struct ucred *cred) 540 { 541 if (!ktrcanset(curp, pr)) 542 return (0); 543 if (ops == KTROP_SET) 544 ktrsettrace(pr, facs, vp, cred); 545 else { 546 /* KTROP_CLEAR */ 547 pr->ps_traceflag &= ~facs; 548 if ((pr->ps_traceflag & KTRFAC_MASK) == 0) { 549 /* cleared all the facility bits, so stop completely */ 550 ktrcleartrace(pr); 551 } 552 } 553 554 return (1); 555 } 556 557 int 558 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs, 559 struct vnode *vp, struct ucred *cred) 560 { 561 struct process *pr; 562 int ret = 0; 563 564 pr = top; 565 for (;;) { 566 ret |= ktrops(curp, pr, ops, facs, vp, cred); 567 /* 568 * If this process has children, descend to them next, 569 * otherwise do any siblings, and if done with this level, 570 * follow back up the tree (but not past top). 571 */ 572 if (!LIST_EMPTY(&pr->ps_children)) 573 pr = LIST_FIRST(&pr->ps_children); 574 else for (;;) { 575 if (pr == top) 576 return (ret); 577 if (LIST_NEXT(pr, ps_sibling) != NULL) { 578 pr = LIST_NEXT(pr, ps_sibling); 579 break; 580 } 581 pr = pr->ps_pptr; 582 } 583 } 584 /*NOTREACHED*/ 585 } 586 587 int 588 ktrwrite(struct proc *p, struct ktr_header *kth, const void *aux, size_t len) 589 { 590 struct vnode *vp = p->p_p->ps_tracevp; 591 struct ucred *cred = p->p_p->ps_tracecred; 592 struct iovec data[2]; 593 int error; 594 595 if (vp == NULL) 596 return 0; 597 crhold(cred); 598 data[0].iov_base = (void *)aux; 599 data[0].iov_len = len; 600 data[1].iov_len = 0; 601 kth->ktr_len = len; 602 error = ktrwriteraw(p, vp, cred, kth, data); 603 crfree(cred); 604 return (error); 605 } 606 607 int 608 ktrwrite2(struct proc *p, struct ktr_header *kth, const void *aux1, 609 size_t len1, const void *aux2, size_t len2) 610 { 611 struct vnode *vp = p->p_p->ps_tracevp; 612 struct ucred *cred = p->p_p->ps_tracecred; 613 struct iovec data[2]; 614 int error; 615 616 if (vp == NULL) 617 return 0; 618 crhold(cred); 619 data[0].iov_base = (void *)aux1; 620 data[0].iov_len = len1; 621 data[1].iov_base = (void *)aux2; 622 data[1].iov_len = len2; 623 kth->ktr_len = len1 + len2; 624 error = ktrwriteraw(p, vp, cred, kth, data); 625 crfree(cred); 626 return (error); 627 } 628 629 int 630 ktrwriteraw(struct proc *curp, struct vnode *vp, struct ucred *cred, 631 struct ktr_header *kth, struct iovec *data) 632 { 633 struct uio auio; 634 struct iovec aiov[3]; 635 struct process *pr; 636 int error; 637 638 KERNEL_ASSERT_LOCKED(); 639 640 auio.uio_iov = &aiov[0]; 641 auio.uio_offset = 0; 642 auio.uio_segflg = UIO_SYSSPACE; 643 auio.uio_rw = UIO_WRITE; 644 aiov[0].iov_base = (caddr_t)kth; 645 aiov[0].iov_len = sizeof(struct ktr_header); 646 auio.uio_resid = sizeof(struct ktr_header); 647 auio.uio_iovcnt = 1; 648 auio.uio_procp = curp; 649 if (kth->ktr_len > 0) { 650 aiov[1] = data[0]; 651 aiov[2] = data[1]; 652 auio.uio_iovcnt++; 653 if (aiov[2].iov_len > 0) 654 auio.uio_iovcnt++; 655 auio.uio_resid += kth->ktr_len; 656 } 657 error = vget(vp, LK_EXCLUSIVE | LK_RETRY); 658 if (error) 659 goto bad; 660 error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred); 661 vput(vp); 662 if (error) 663 goto bad; 664 665 return (0); 666 667 bad: 668 /* 669 * If error encountered, give up tracing on this vnode. 670 */ 671 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 672 error); 673 LIST_FOREACH(pr, &allprocess, ps_list) { 674 if (pr == curp->p_p) 675 continue; 676 if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) 677 ktrcleartrace(pr); 678 } 679 ktrcleartrace(curp->p_p); 680 return (error); 681 } 682 683 /* 684 * Return true if caller has permission to set the ktracing state 685 * of target. Essentially, the target can't possess any 686 * more permissions than the caller. KTRFAC_ROOT signifies that 687 * root previously set the tracing status on the target process, and 688 * so, only root may further change it. 689 * 690 * TODO: check groups. use caller effective gid. 691 */ 692 int 693 ktrcanset(struct proc *callp, struct process *targetpr) 694 { 695 struct ucred *caller = callp->p_ucred; 696 struct ucred *target = targetpr->ps_ucred; 697 698 if ((caller->cr_uid == target->cr_ruid && 699 target->cr_ruid == target->cr_svuid && 700 caller->cr_rgid == target->cr_rgid && /* XXX */ 701 target->cr_rgid == target->cr_svgid && 702 (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 && 703 !ISSET(targetpr->ps_flags, PS_SUGID)) || 704 caller->cr_uid == 0) 705 return (1); 706 707 return (0); 708 } 709