1 /* $OpenBSD: kern_ktrace.c,v 1.100 2019/10/06 16:24:14 beck Exp $ */ 2 /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/sched.h> 39 #include <sys/fcntl.h> 40 #include <sys/namei.h> 41 #include <sys/vnode.h> 42 #include <sys/lock.h> 43 #include <sys/ktrace.h> 44 #include <sys/malloc.h> 45 #include <sys/syslog.h> 46 #include <sys/sysctl.h> 47 #include <sys/pledge.h> 48 49 #include <sys/mount.h> 50 #include <sys/syscall.h> 51 #include <sys/syscallargs.h> 52 53 #include <uvm/uvm_extern.h> 54 55 void ktrinitheaderraw(struct ktr_header *, uint, pid_t, pid_t); 56 void ktrinitheader(struct ktr_header *, struct proc *, int); 57 void ktrstart(struct proc *, struct vnode *, struct ucred *); 58 int ktrops(struct proc *, struct process *, int, int, struct vnode *, 59 struct ucred *); 60 int ktrsetchildren(struct proc *, struct process *, int, int, 61 struct vnode *, struct ucred *); 62 int ktrwrite(struct proc *, struct ktr_header *, const void *, size_t); 63 int ktrwrite2(struct proc *, struct ktr_header *, const void *, size_t, 64 const void *, size_t); 65 int ktrwriteraw(struct proc *, struct vnode *, struct ucred *, 66 struct ktr_header *, struct iovec *); 67 int ktrcanset(struct proc *, struct process *); 68 69 /* 70 * Clear the trace settings in a correct way (to avoid races). 71 */ 72 void 73 ktrcleartrace(struct process *pr) 74 { 75 struct vnode *vp; 76 struct ucred *cred; 77 78 if (pr->ps_tracevp != NULL) { 79 vp = pr->ps_tracevp; 80 cred = pr->ps_tracecred; 81 82 pr->ps_traceflag = 0; 83 pr->ps_tracevp = NULL; 84 pr->ps_tracecred = NULL; 85 86 vrele(vp); 87 crfree(cred); 88 } 89 } 90 91 /* 92 * Change the trace setting in a correct way (to avoid races). 93 */ 94 void 95 ktrsettrace(struct process *pr, int facs, struct vnode *newvp, 96 struct ucred *newcred) 97 { 98 struct vnode *oldvp; 99 struct ucred *oldcred; 100 101 KASSERT(newvp != NULL); 102 KASSERT(newcred != NULL); 103 104 pr->ps_traceflag |= facs; 105 106 /* nothing to change about where the trace goes? */ 107 if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred) 108 return; 109 110 vref(newvp); 111 crhold(newcred); 112 113 oldvp = pr->ps_tracevp; 114 oldcred = pr->ps_tracecred; 115 116 pr->ps_tracevp = newvp; 117 pr->ps_tracecred = newcred; 118 119 if (oldvp != NULL) { 120 vrele(oldvp); 121 crfree(oldcred); 122 } 123 } 124 125 void 126 ktrinitheaderraw(struct ktr_header *kth, uint type, pid_t pid, pid_t tid) 127 { 128 memset(kth, 0, sizeof(struct ktr_header)); 129 kth->ktr_type = type; 130 nanotime(&kth->ktr_time); 131 kth->ktr_pid = pid; 132 kth->ktr_tid = tid; 133 } 134 135 void 136 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 137 { 138 struct process *pr = p->p_p; 139 140 ktrinitheaderraw(kth, type, pr->ps_pid, p->p_tid + THREAD_PID_OFFSET); 141 memcpy(kth->ktr_comm, pr->ps_comm, MAXCOMLEN); 142 } 143 144 void 145 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred) 146 { 147 struct ktr_header kth; 148 149 ktrinitheaderraw(&kth, htobe32(KTR_START), -1, -1); 150 ktrwriteraw(p, vp, cred, &kth, NULL); 151 } 152 153 void 154 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[]) 155 { 156 struct ktr_header kth; 157 struct ktr_syscall *ktp; 158 size_t len = sizeof(struct ktr_syscall) + argsize; 159 register_t *argp; 160 u_int nargs = 0; 161 int i; 162 163 if (code == SYS_sysctl) { 164 /* 165 * The sysctl encoding stores the mib[] 166 * array because it is interesting. 167 */ 168 if (args[1] > 0) 169 nargs = lmin(args[1], CTL_MAXNAME); 170 len += nargs * sizeof(int); 171 } 172 atomic_setbits_int(&p->p_flag, P_INKTR); 173 ktrinitheader(&kth, p, KTR_SYSCALL); 174 ktp = malloc(len, M_TEMP, M_WAITOK); 175 ktp->ktr_code = code; 176 ktp->ktr_argsize = argsize; 177 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); 178 for (i = 0; i < (argsize / sizeof *argp); i++) 179 *argp++ = args[i]; 180 if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int))) 181 memset(argp, 0, nargs * sizeof(int)); 182 ktrwrite(p, &kth, ktp, len); 183 free(ktp, M_TEMP, len); 184 atomic_clearbits_int(&p->p_flag, P_INKTR); 185 } 186 187 void 188 ktrsysret(struct proc *p, register_t code, int error, 189 const register_t retval[2]) 190 { 191 struct ktr_header kth; 192 struct ktr_sysret ktp; 193 int len; 194 195 atomic_setbits_int(&p->p_flag, P_INKTR); 196 ktrinitheader(&kth, p, KTR_SYSRET); 197 ktp.ktr_code = code; 198 ktp.ktr_error = error; 199 if (error) 200 len = 0; 201 else if (code == SYS_lseek) 202 /* the one exception: lseek on ILP32 needs more */ 203 len = sizeof(long long); 204 else 205 len = sizeof(register_t); 206 ktrwrite2(p, &kth, &ktp, sizeof(ktp), retval, len); 207 atomic_clearbits_int(&p->p_flag, P_INKTR); 208 } 209 210 void 211 ktrnamei(struct proc *p, char *path) 212 { 213 struct ktr_header kth; 214 215 atomic_setbits_int(&p->p_flag, P_INKTR); 216 ktrinitheader(&kth, p, KTR_NAMEI); 217 ktrwrite(p, &kth, path, strlen(path)); 218 atomic_clearbits_int(&p->p_flag, P_INKTR); 219 } 220 221 void 222 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, 223 ssize_t len) 224 { 225 struct ktr_header kth; 226 struct ktr_genio ktp; 227 caddr_t cp; 228 int count, error; 229 int buflen; 230 231 atomic_setbits_int(&p->p_flag, P_INKTR); 232 233 /* beware overflow */ 234 if (len > PAGE_SIZE) 235 buflen = PAGE_SIZE; 236 else 237 buflen = len + sizeof(struct ktr_genio); 238 239 ktrinitheader(&kth, p, KTR_GENIO); 240 ktp.ktr_fd = fd; 241 ktp.ktr_rw = rw; 242 243 cp = malloc(buflen, M_TEMP, M_WAITOK); 244 while (len > 0) { 245 /* 246 * Don't allow this process to hog the cpu when doing 247 * huge I/O. 248 */ 249 sched_pause(preempt); 250 251 count = lmin(iov->iov_len, buflen); 252 if (count > len) 253 count = len; 254 if (copyin(iov->iov_base, cp, count)) 255 break; 256 257 KERNEL_LOCK(); 258 error = ktrwrite2(p, &kth, &ktp, sizeof(ktp), cp, count); 259 KERNEL_UNLOCK(); 260 if (error != 0) 261 break; 262 263 iov->iov_len -= count; 264 iov->iov_base = (caddr_t)iov->iov_base + count; 265 266 if (iov->iov_len == 0) 267 iov++; 268 269 len -= count; 270 } 271 272 free(cp, M_TEMP, buflen); 273 atomic_clearbits_int(&p->p_flag, P_INKTR); 274 } 275 276 void 277 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code, 278 siginfo_t *si) 279 { 280 struct ktr_header kth; 281 struct ktr_psig kp; 282 283 atomic_setbits_int(&p->p_flag, P_INKTR); 284 ktrinitheader(&kth, p, KTR_PSIG); 285 kp.signo = (char)sig; 286 kp.action = action; 287 kp.mask = mask; 288 kp.code = code; 289 kp.si = *si; 290 291 ktrwrite(p, &kth, &kp, sizeof(kp)); 292 atomic_clearbits_int(&p->p_flag, P_INKTR); 293 } 294 295 void 296 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen) 297 { 298 struct ktr_header kth; 299 300 atomic_setbits_int(&p->p_flag, P_INKTR); 301 ktrinitheader(&kth, p, KTR_STRUCT); 302 303 if (data == NULL) 304 datalen = 0; 305 KERNEL_LOCK(); 306 ktrwrite2(p, &kth, name, strlen(name) + 1, data, datalen); 307 KERNEL_UNLOCK(); 308 atomic_clearbits_int(&p->p_flag, P_INKTR); 309 } 310 311 int 312 ktruser(struct proc *p, const char *id, const void *addr, size_t len) 313 { 314 struct ktr_header kth; 315 struct ktr_user ktp; 316 int error; 317 void *memp; 318 #define STK_PARAMS 128 319 long long stkbuf[STK_PARAMS / sizeof(long long)]; 320 321 if (!KTRPOINT(p, KTR_USER)) 322 return (0); 323 if (len > KTR_USER_MAXLEN) 324 return (EINVAL); 325 326 atomic_setbits_int(&p->p_flag, P_INKTR); 327 ktrinitheader(&kth, p, KTR_USER); 328 memset(ktp.ktr_id, 0, KTR_USER_MAXIDLEN); 329 error = copyinstr(id, ktp.ktr_id, KTR_USER_MAXIDLEN, NULL); 330 if (error == 0) { 331 if (len > sizeof(stkbuf)) 332 memp = malloc(len, M_TEMP, M_WAITOK); 333 else 334 memp = stkbuf; 335 error = copyin(addr, memp, len); 336 if (error == 0) 337 ktrwrite2(p, &kth, &ktp, sizeof(ktp), memp, len); 338 if (memp != stkbuf) 339 free(memp, M_TEMP, len); 340 } 341 atomic_clearbits_int(&p->p_flag, P_INKTR); 342 return (error); 343 } 344 345 void 346 ktrexec(struct proc *p, int type, const char *data, ssize_t len) 347 { 348 struct ktr_header kth; 349 int count; 350 int buflen; 351 352 assert(type == KTR_EXECARGS || type == KTR_EXECENV); 353 atomic_setbits_int(&p->p_flag, P_INKTR); 354 355 /* beware overflow */ 356 if (len > PAGE_SIZE) 357 buflen = PAGE_SIZE; 358 else 359 buflen = len; 360 361 ktrinitheader(&kth, p, type); 362 363 while (len > 0) { 364 /* 365 * Don't allow this process to hog the cpu when doing 366 * huge I/O. 367 */ 368 sched_pause(preempt); 369 370 count = lmin(len, buflen); 371 if (ktrwrite(p, &kth, data, count) != 0) 372 break; 373 374 len -= count; 375 data += count; 376 } 377 378 atomic_clearbits_int(&p->p_flag, P_INKTR); 379 } 380 381 void 382 ktrpledge(struct proc *p, int error, uint64_t code, int syscall) 383 { 384 struct ktr_header kth; 385 struct ktr_pledge kp; 386 387 atomic_setbits_int(&p->p_flag, P_INKTR); 388 ktrinitheader(&kth, p, KTR_PLEDGE); 389 kp.error = error; 390 kp.code = code; 391 kp.syscall = syscall; 392 393 KERNEL_LOCK(); 394 ktrwrite(p, &kth, &kp, sizeof(kp)); 395 KERNEL_UNLOCK(); 396 atomic_clearbits_int(&p->p_flag, P_INKTR); 397 } 398 399 /* Interface and common routines */ 400 401 int 402 doktrace(struct vnode *vp, int ops, int facs, pid_t pid, struct proc *p) 403 { 404 struct process *pr = NULL; 405 struct ucred *cred = NULL; 406 struct pgrp *pg; 407 int descend = ops & KTRFLAG_DESCEND; 408 int ret = 0; 409 int error = 0; 410 411 facs = facs & ~((unsigned)KTRFAC_ROOT); 412 ops = KTROP(ops); 413 414 if (ops != KTROP_CLEAR) { 415 /* 416 * an operation which requires a file argument. 417 */ 418 cred = p->p_ucred; 419 if (!vp) { 420 error = EINVAL; 421 goto done; 422 } 423 if (vp->v_type != VREG) { 424 error = EACCES; 425 goto done; 426 } 427 } 428 /* 429 * Clear all uses of the tracefile 430 */ 431 if (ops == KTROP_CLEARFILE) { 432 LIST_FOREACH(pr, &allprocess, ps_list) { 433 if (pr->ps_tracevp == vp) { 434 if (ktrcanset(p, pr)) 435 ktrcleartrace(pr); 436 else 437 error = EPERM; 438 } 439 } 440 goto done; 441 } 442 /* 443 * need something to (un)trace (XXX - why is this here?) 444 */ 445 if (!facs) { 446 error = EINVAL; 447 goto done; 448 } 449 if (ops == KTROP_SET) { 450 if (suser(p) == 0) 451 facs |= KTRFAC_ROOT; 452 ktrstart(p, vp, cred); 453 } 454 /* 455 * do it 456 */ 457 if (pid < 0) { 458 /* 459 * by process group 460 */ 461 pg = pgfind(-pid); 462 if (pg == NULL) { 463 error = ESRCH; 464 goto done; 465 } 466 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 467 if (descend) 468 ret |= ktrsetchildren(p, pr, ops, facs, vp, 469 cred); 470 else 471 ret |= ktrops(p, pr, ops, facs, vp, cred); 472 } 473 } else { 474 /* 475 * by pid 476 */ 477 pr = prfind(pid); 478 if (pr == NULL) { 479 error = ESRCH; 480 goto done; 481 } 482 if (descend) 483 ret |= ktrsetchildren(p, pr, ops, facs, vp, cred); 484 else 485 ret |= ktrops(p, pr, ops, facs, vp, cred); 486 } 487 if (!ret) 488 error = EPERM; 489 done: 490 return (error); 491 } 492 493 /* 494 * ktrace system call 495 */ 496 int 497 sys_ktrace(struct proc *p, void *v, register_t *retval) 498 { 499 struct sys_ktrace_args /* { 500 syscallarg(const char *) fname; 501 syscallarg(int) ops; 502 syscallarg(int) facs; 503 syscallarg(pid_t) pid; 504 } */ *uap = v; 505 struct vnode *vp = NULL; 506 const char *fname = SCARG(uap, fname); 507 struct ucred *cred = NULL; 508 int error; 509 510 if (fname) { 511 struct nameidata nd; 512 513 cred = p->p_ucred; 514 NDINIT(&nd, 0, 0, UIO_USERSPACE, fname, p); 515 nd.ni_pledge = PLEDGE_CPATH | PLEDGE_WPATH; 516 nd.ni_unveil = UNVEIL_CREATE | UNVEIL_WRITE; 517 if ((error = vn_open(&nd, FWRITE|O_NOFOLLOW, 0)) != 0) 518 return error; 519 vp = nd.ni_vp; 520 521 VOP_UNLOCK(vp); 522 } 523 524 error = doktrace(vp, SCARG(uap, ops), SCARG(uap, facs), 525 SCARG(uap, pid), p); 526 if (vp != NULL) 527 (void)vn_close(vp, FWRITE, cred, p); 528 529 return error; 530 } 531 532 int 533 ktrops(struct proc *curp, struct process *pr, int ops, int facs, 534 struct vnode *vp, struct ucred *cred) 535 { 536 if (!ktrcanset(curp, pr)) 537 return (0); 538 if (ops == KTROP_SET) 539 ktrsettrace(pr, facs, vp, cred); 540 else { 541 /* KTROP_CLEAR */ 542 pr->ps_traceflag &= ~facs; 543 if ((pr->ps_traceflag & KTRFAC_MASK) == 0) { 544 /* cleared all the facility bits, so stop completely */ 545 ktrcleartrace(pr); 546 } 547 } 548 549 return (1); 550 } 551 552 int 553 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs, 554 struct vnode *vp, struct ucred *cred) 555 { 556 struct process *pr; 557 int ret = 0; 558 559 pr = top; 560 for (;;) { 561 ret |= ktrops(curp, pr, ops, facs, vp, cred); 562 /* 563 * If this process has children, descend to them next, 564 * otherwise do any siblings, and if done with this level, 565 * follow back up the tree (but not past top). 566 */ 567 if (!LIST_EMPTY(&pr->ps_children)) 568 pr = LIST_FIRST(&pr->ps_children); 569 else for (;;) { 570 if (pr == top) 571 return (ret); 572 if (LIST_NEXT(pr, ps_sibling) != NULL) { 573 pr = LIST_NEXT(pr, ps_sibling); 574 break; 575 } 576 pr = pr->ps_pptr; 577 } 578 } 579 /*NOTREACHED*/ 580 } 581 582 int 583 ktrwrite(struct proc *p, struct ktr_header *kth, const void *aux, size_t len) 584 { 585 struct vnode *vp = p->p_p->ps_tracevp; 586 struct ucred *cred = p->p_p->ps_tracecred; 587 struct iovec data[2]; 588 int error; 589 590 if (vp == NULL) 591 return 0; 592 crhold(cred); 593 data[0].iov_base = (void *)aux; 594 data[0].iov_len = len; 595 data[1].iov_len = 0; 596 kth->ktr_len = len; 597 error = ktrwriteraw(p, vp, cred, kth, data); 598 crfree(cred); 599 return (error); 600 } 601 602 int 603 ktrwrite2(struct proc *p, struct ktr_header *kth, const void *aux1, 604 size_t len1, const void *aux2, size_t len2) 605 { 606 struct vnode *vp = p->p_p->ps_tracevp; 607 struct ucred *cred = p->p_p->ps_tracecred; 608 struct iovec data[2]; 609 int error; 610 611 if (vp == NULL) 612 return 0; 613 crhold(cred); 614 data[0].iov_base = (void *)aux1; 615 data[0].iov_len = len1; 616 data[1].iov_base = (void *)aux2; 617 data[1].iov_len = len2; 618 kth->ktr_len = len1 + len2; 619 error = ktrwriteraw(p, vp, cred, kth, data); 620 crfree(cred); 621 return (error); 622 } 623 624 int 625 ktrwriteraw(struct proc *curp, struct vnode *vp, struct ucred *cred, 626 struct ktr_header *kth, struct iovec *data) 627 { 628 struct uio auio; 629 struct iovec aiov[3]; 630 struct process *pr; 631 int error; 632 633 KERNEL_ASSERT_LOCKED(); 634 635 auio.uio_iov = &aiov[0]; 636 auio.uio_offset = 0; 637 auio.uio_segflg = UIO_SYSSPACE; 638 auio.uio_rw = UIO_WRITE; 639 aiov[0].iov_base = (caddr_t)kth; 640 aiov[0].iov_len = sizeof(struct ktr_header); 641 auio.uio_resid = sizeof(struct ktr_header); 642 auio.uio_iovcnt = 1; 643 auio.uio_procp = curp; 644 if (kth->ktr_len > 0) { 645 aiov[1] = data[0]; 646 aiov[2] = data[1]; 647 auio.uio_iovcnt++; 648 if (aiov[2].iov_len > 0) 649 auio.uio_iovcnt++; 650 auio.uio_resid += kth->ktr_len; 651 } 652 vget(vp, LK_EXCLUSIVE | LK_RETRY); 653 error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred); 654 if (!error) { 655 vput(vp); 656 return (0); 657 } 658 /* 659 * If error encountered, give up tracing on this vnode. 660 */ 661 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 662 error); 663 LIST_FOREACH(pr, &allprocess, ps_list) 664 if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) 665 ktrcleartrace(pr); 666 667 vput(vp); 668 return (error); 669 } 670 671 /* 672 * Return true if caller has permission to set the ktracing state 673 * of target. Essentially, the target can't possess any 674 * more permissions than the caller. KTRFAC_ROOT signifies that 675 * root previously set the tracing status on the target process, and 676 * so, only root may further change it. 677 * 678 * TODO: check groups. use caller effective gid. 679 */ 680 int 681 ktrcanset(struct proc *callp, struct process *targetpr) 682 { 683 struct ucred *caller = callp->p_ucred; 684 struct ucred *target = targetpr->ps_ucred; 685 686 if ((caller->cr_uid == target->cr_ruid && 687 target->cr_ruid == target->cr_svuid && 688 caller->cr_rgid == target->cr_rgid && /* XXX */ 689 target->cr_rgid == target->cr_svgid && 690 (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 && 691 !ISSET(targetpr->ps_flags, PS_SUGID)) || 692 caller->cr_uid == 0) 693 return (1); 694 695 return (0); 696 } 697