1 /* $NetBSD: kern_ktrace.c,v 1.58 2002/06/28 01:59:36 itojun Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95 36 */ 37 38 #include <sys/cdefs.h> 39 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.58 2002/06/28 01:59:36 itojun Exp $"); 40 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/namei.h> 48 #include <sys/vnode.h> 49 #include <sys/ktrace.h> 50 #include <sys/malloc.h> 51 #include <sys/syslog.h> 52 #include <sys/filedesc.h> 53 #include <sys/ioctl.h> 54 55 #include <sys/mount.h> 56 #include <sys/syscallargs.h> 57 58 #ifdef KTRACE 59 60 int ktrace_common(struct proc *, int, int, int, struct file *); 61 void ktrinitheader(struct ktr_header *, struct proc *, int); 62 int ktrops(struct proc *, struct proc *, int, int, struct file *); 63 int ktrsetchildren(struct proc *, struct proc *, int, int, 64 struct file *); 65 int ktrwrite(struct proc *, struct ktr_header *); 66 int ktrcanset(struct proc *, struct proc *); 67 int ktrsamefile(struct file *, struct file *); 68 69 /* 70 * "deep" compare of two files for the purposes of clearing a trace. 71 * Returns true if they're the same open file, or if they point at the 72 * same underlying vnode/socket. 73 */ 74 75 int 76 ktrsamefile(struct file *f1, struct file *f2) 77 { 78 return ((f1 == f2) || 79 ((f1 != NULL) && (f2 != NULL) && 80 (f1->f_type == f2->f_type) && 81 (f1->f_data == f2->f_data))); 82 } 83 84 void 85 ktrderef(struct proc *p) 86 { 87 struct file *fp = p->p_tracep; 88 p->p_traceflag = 0; 89 if (fp == NULL) 90 return; 91 FILE_USE(fp); 92 closef(fp, NULL); 93 94 p->p_tracep = NULL; 95 } 96 97 void 98 ktradref(struct proc *p) 99 { 100 struct file *fp = p->p_tracep; 101 102 fp->f_count++; 103 } 104 105 void 106 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 107 { 108 109 memset(kth, 0, sizeof(*kth)); 110 kth->ktr_type = type; 111 microtime(&kth->ktr_time); 112 kth->ktr_pid = p->p_pid; 113 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN); 114 } 115 116 void 117 ktrsyscall(struct proc *p, register_t code, register_t args[]) 118 { 119 struct ktr_header kth; 120 struct ktr_syscall *ktp; 121 register_t *argp; 122 int argsize; 123 size_t len; 124 int i; 125 126 argsize = p->p_emul->e_sysent[code].sy_narg * sizeof (register_t); 127 len = sizeof(struct ktr_syscall) + argsize; 128 129 p->p_traceflag |= KTRFAC_ACTIVE; 130 ktrinitheader(&kth, p, KTR_SYSCALL); 131 ktp = malloc(len, M_TEMP, M_WAITOK); 132 ktp->ktr_code = code; 133 ktp->ktr_argsize = argsize; 134 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); 135 for (i = 0; i < (argsize / sizeof(*argp)); i++) 136 *argp++ = args[i]; 137 kth.ktr_buf = (caddr_t)ktp; 138 kth.ktr_len = len; 139 (void) ktrwrite(p, &kth); 140 free(ktp, M_TEMP); 141 p->p_traceflag &= ~KTRFAC_ACTIVE; 142 } 143 144 void 145 ktrsysret(struct proc *p, register_t code, int error, register_t retval) 146 { 147 struct ktr_header kth; 148 struct ktr_sysret ktp; 149 150 p->p_traceflag |= KTRFAC_ACTIVE; 151 ktrinitheader(&kth, p, KTR_SYSRET); 152 ktp.ktr_code = code; 153 ktp.ktr_eosys = 0; /* XXX unused */ 154 ktp.ktr_error = error; 155 ktp.ktr_retval = retval; /* what about val2 ? */ 156 157 kth.ktr_buf = (caddr_t)&ktp; 158 kth.ktr_len = sizeof(struct ktr_sysret); 159 160 (void) ktrwrite(p, &kth); 161 p->p_traceflag &= ~KTRFAC_ACTIVE; 162 } 163 164 void 165 ktrnamei(struct proc *p, char *path) 166 { 167 struct ktr_header kth; 168 169 p->p_traceflag |= KTRFAC_ACTIVE; 170 ktrinitheader(&kth, p, KTR_NAMEI); 171 kth.ktr_len = strlen(path); 172 kth.ktr_buf = path; 173 174 (void) ktrwrite(p, &kth); 175 p->p_traceflag &= ~KTRFAC_ACTIVE; 176 } 177 178 void 179 ktremul(struct proc *p) 180 { 181 struct ktr_header kth; 182 const char *emul = p->p_emul->e_name; 183 184 p->p_traceflag |= KTRFAC_ACTIVE; 185 ktrinitheader(&kth, p, KTR_EMUL); 186 kth.ktr_len = strlen(emul); 187 kth.ktr_buf = (caddr_t)emul; 188 189 (void) ktrwrite(p, &kth); 190 p->p_traceflag &= ~KTRFAC_ACTIVE; 191 } 192 193 void 194 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, 195 int len, int error) 196 { 197 struct ktr_header kth; 198 struct ktr_genio *ktp; 199 caddr_t cp; 200 int resid = len, cnt; 201 int buflen; 202 203 if (error) 204 return; 205 206 p->p_traceflag |= KTRFAC_ACTIVE; 207 208 buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio)); 209 210 ktrinitheader(&kth, p, KTR_GENIO); 211 ktp = malloc(buflen, M_TEMP, M_WAITOK); 212 ktp->ktr_fd = fd; 213 ktp->ktr_rw = rw; 214 215 kth.ktr_buf = (caddr_t)ktp; 216 217 cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio)); 218 buflen -= sizeof(struct ktr_genio); 219 220 while (resid > 0) { 221 KDASSERT(p->p_cpu != NULL); 222 KDASSERT(p->p_cpu == curcpu()); 223 if (p->p_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 224 preempt(NULL); 225 226 cnt = min(iov->iov_len, buflen); 227 if (cnt > resid) 228 cnt = resid; 229 if (copyin(iov->iov_base, cp, cnt)) 230 break; 231 232 kth.ktr_len = cnt + sizeof(struct ktr_genio); 233 234 if (__predict_false(ktrwrite(p, &kth) != 0)) 235 break; 236 237 iov->iov_base = (caddr_t)iov->iov_base + cnt; 238 iov->iov_len -= cnt; 239 240 if (iov->iov_len == 0) 241 iov++; 242 243 resid -= cnt; 244 } 245 246 free(ktp, M_TEMP); 247 p->p_traceflag &= ~KTRFAC_ACTIVE; 248 } 249 250 void 251 ktrpsig(struct proc *p, int sig, sig_t action, sigset_t *mask, int code) 252 { 253 struct ktr_header kth; 254 struct ktr_psig kp; 255 256 p->p_traceflag |= KTRFAC_ACTIVE; 257 ktrinitheader(&kth, p, KTR_PSIG); 258 kp.signo = (char)sig; 259 kp.action = action; 260 kp.mask = *mask; 261 kp.code = code; 262 kth.ktr_buf = (caddr_t)&kp; 263 kth.ktr_len = sizeof(struct ktr_psig); 264 265 (void) ktrwrite(p, &kth); 266 p->p_traceflag &= ~KTRFAC_ACTIVE; 267 } 268 269 void 270 ktrcsw(struct proc *p, int out, int user) 271 { 272 struct ktr_header kth; 273 struct ktr_csw kc; 274 275 p->p_traceflag |= KTRFAC_ACTIVE; 276 ktrinitheader(&kth, p, KTR_CSW); 277 kc.out = out; 278 kc.user = user; 279 kth.ktr_buf = (caddr_t)&kc; 280 kth.ktr_len = sizeof(struct ktr_csw); 281 282 (void) ktrwrite(p, &kth); 283 p->p_traceflag &= ~KTRFAC_ACTIVE; 284 } 285 286 void 287 ktruser(p, id, addr, len, ustr) 288 struct proc *p; 289 const char *id; 290 void *addr; 291 size_t len; 292 int ustr; 293 { 294 struct ktr_header kth; 295 struct ktr_user *ktp; 296 caddr_t user_dta; 297 298 p->p_traceflag |= KTRFAC_ACTIVE; 299 ktrinitheader(&kth, p, KTR_USER); 300 ktp = malloc(sizeof(struct ktr_user) + len, M_TEMP, M_WAITOK); 301 if (ustr) { 302 if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0) 303 ktp->ktr_id[0] = '\0'; 304 } else 305 strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN); 306 ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0'; 307 308 user_dta = (caddr_t) ((char *)ktp + sizeof(struct ktr_user)); 309 if (copyin(addr, (void *) user_dta, len) != 0) 310 len = 0; 311 312 kth.ktr_buf = (void *)ktp; 313 kth.ktr_len = sizeof(struct ktr_user) + len; 314 (void) ktrwrite(p, &kth); 315 316 free(ktp, M_TEMP); 317 p->p_traceflag &= ~KTRFAC_ACTIVE; 318 319 } 320 321 /* Interface and common routines */ 322 323 int 324 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp) 325 { 326 int ret = 0; 327 int error = 0; 328 int one = 1; 329 int descend; 330 struct proc *p; 331 struct pgrp *pg; 332 333 curp->p_traceflag |= KTRFAC_ACTIVE; 334 descend = ops & KTRFLAG_DESCEND; 335 facs = facs & ~((unsigned) KTRFAC_ROOT); 336 337 /* 338 * Clear all uses of the tracefile 339 */ 340 if (KTROP(ops) == KTROP_CLEARFILE) { 341 proclist_lock_read(); 342 for (p = LIST_FIRST(&allproc); p != NULL; 343 p = LIST_NEXT(p, p_list)) { 344 if (ktrsamefile(p->p_tracep, fp)) { 345 if (ktrcanset(curp, p)) 346 ktrderef(p); 347 else 348 error = EPERM; 349 } 350 } 351 proclist_unlock_read(); 352 goto done; 353 } 354 355 /* 356 * Mark fp non-blocking, to avoid problems from possible deadlocks. 357 */ 358 359 if (fp != NULL) { 360 fp->f_flag |= FNONBLOCK; 361 (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&one, curp); 362 } 363 364 /* 365 * need something to (un)trace (XXX - why is this here?) 366 */ 367 if (!facs) { 368 error = EINVAL; 369 goto done; 370 } 371 /* 372 * do it 373 */ 374 if (pid < 0) { 375 /* 376 * by process group 377 */ 378 pg = pgfind(-pid); 379 if (pg == NULL) { 380 error = ESRCH; 381 goto done; 382 } 383 for (p = LIST_FIRST(&pg->pg_members); p != NULL; 384 p = LIST_NEXT(p, p_pglist)) { 385 if (descend) 386 ret |= ktrsetchildren(curp, p, ops, facs, fp); 387 else 388 ret |= ktrops(curp, p, ops, facs, fp); 389 } 390 391 } else { 392 /* 393 * by pid 394 */ 395 p = pfind(pid); 396 if (p == NULL) { 397 error = ESRCH; 398 goto done; 399 } 400 if (descend) 401 ret |= ktrsetchildren(curp, p, ops, facs, fp); 402 else 403 ret |= ktrops(curp, p, ops, facs, fp); 404 } 405 if (!ret) 406 error = EPERM; 407 done: 408 curp->p_traceflag &= ~KTRFAC_ACTIVE; 409 return (error); 410 } 411 412 /* 413 * ktrace system call 414 */ 415 /* ARGSUSED */ 416 int 417 sys_fktrace(struct proc *curp, void *v, register_t *retval) 418 { 419 struct sys_fktrace_args /* { 420 syscallarg(int) fd; 421 syscallarg(int) ops; 422 syscallarg(int) facs; 423 syscallarg(int) pid; 424 } */ *uap = v; 425 struct file *fp = NULL; 426 struct filedesc *fdp = curp->p_fd; 427 428 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 429 return (EBADF); 430 431 if ((fp->f_flag & FWRITE) == 0) 432 return (EBADF); 433 434 return ktrace_common(curp, SCARG(uap, ops), 435 SCARG(uap, facs), SCARG(uap, pid), fp); 436 } 437 438 /* 439 * ktrace system call 440 */ 441 /* ARGSUSED */ 442 int 443 sys_ktrace(struct proc *curp, void *v, register_t *retval) 444 { 445 struct sys_ktrace_args /* { 446 syscallarg(const char *) fname; 447 syscallarg(int) ops; 448 syscallarg(int) facs; 449 syscallarg(int) pid; 450 } */ *uap = v; 451 struct vnode *vp = NULL; 452 struct file *fp = NULL; 453 int fd; 454 int ops = SCARG(uap, ops); 455 int error = 0; 456 struct nameidata nd; 457 458 ops = KTROP(ops) | (ops & KTRFLAG_DESCEND); 459 460 curp->p_traceflag |= KTRFAC_ACTIVE; 461 if (ops != KTROP_CLEAR) { 462 /* 463 * an operation which requires a file argument. 464 */ 465 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname), 466 curp); 467 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 468 curp->p_traceflag &= ~KTRFAC_ACTIVE; 469 return (error); 470 } 471 vp = nd.ni_vp; 472 VOP_UNLOCK(vp, 0); 473 if (vp->v_type != VREG) { 474 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); 475 curp->p_traceflag &= ~KTRFAC_ACTIVE; 476 return (EACCES); 477 } 478 /* 479 * XXX This uses up a file descriptor slot in the 480 * tracing process for the duration of this syscall. 481 * This is not expected to be a problem. If 482 * falloc(NULL, ...) DTRT we could skip that part, but 483 * that would require changing its interface to allow 484 * the caller to pass in a ucred.. 485 * 486 * This will FILE_USE the fp it returns, if any. 487 * Keep it in use until we return. 488 */ 489 if ((error = falloc(curp, &fp, &fd)) != 0) 490 goto done; 491 492 fp->f_flag = FWRITE|FAPPEND; 493 fp->f_type = DTYPE_VNODE; 494 fp->f_ops = &vnops; 495 fp->f_data = (caddr_t)vp; 496 FILE_SET_MATURE(fp); 497 vp = NULL; 498 } 499 error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs), 500 SCARG(uap, pid), fp); 501 done: 502 if (vp != NULL) 503 (void) vn_close(vp, FWRITE, curp->p_ucred, curp); 504 if (fp != NULL) { 505 FILE_UNUSE(fp, curp); /* release file */ 506 fdrelease(curp, fd); /* release fd table slot */ 507 } 508 return (error); 509 } 510 511 int 512 ktrops(struct proc *curp, struct proc *p, int ops, int facs, struct file *fp) 513 { 514 515 if (!ktrcanset(curp, p)) 516 return (0); 517 if (KTROP(ops) == KTROP_SET) { 518 if (p->p_tracep != fp) { 519 /* 520 * if trace file already in use, relinquish 521 */ 522 ktrderef(p); 523 p->p_tracep = fp; 524 ktradref(p); 525 } 526 p->p_traceflag |= facs; 527 if (curp->p_ucred->cr_uid == 0) 528 p->p_traceflag |= KTRFAC_ROOT; 529 } else { 530 /* KTROP_CLEAR */ 531 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 532 /* no more tracing */ 533 ktrderef(p); 534 } 535 } 536 537 /* 538 * Emit an emulation record, every time there is a ktrace 539 * change/attach request. 540 */ 541 if (KTRPOINT(p, KTR_EMUL)) 542 ktremul(p); 543 #ifdef __HAVE_SYSCALL_INTERN 544 (*p->p_emul->e_syscall_intern)(p); 545 #endif 546 547 return (1); 548 } 549 550 int 551 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs, 552 struct file *fp) 553 { 554 struct proc *p; 555 int ret = 0; 556 557 p = top; 558 for (;;) { 559 ret |= ktrops(curp, p, ops, facs, fp); 560 /* 561 * If this process has children, descend to them next, 562 * otherwise do any siblings, and if done with this level, 563 * follow back up the tree (but not past top). 564 */ 565 if (LIST_FIRST(&p->p_children) != NULL) 566 p = LIST_FIRST(&p->p_children); 567 else for (;;) { 568 if (p == top) 569 return (ret); 570 if (LIST_NEXT(p, p_sibling) != NULL) { 571 p = LIST_NEXT(p, p_sibling); 572 break; 573 } 574 p = p->p_pptr; 575 } 576 } 577 /*NOTREACHED*/ 578 } 579 580 int 581 ktrwrite(struct proc *p, struct ktr_header *kth) 582 { 583 struct uio auio; 584 struct iovec aiov[2]; 585 int error, tries; 586 struct file *fp = p->p_tracep; 587 588 if (fp == NULL) 589 return 0; 590 591 auio.uio_iov = &aiov[0]; 592 auio.uio_offset = 0; 593 auio.uio_segflg = UIO_SYSSPACE; 594 auio.uio_rw = UIO_WRITE; 595 aiov[0].iov_base = (caddr_t)kth; 596 aiov[0].iov_len = sizeof(struct ktr_header); 597 auio.uio_resid = sizeof(struct ktr_header); 598 auio.uio_iovcnt = 1; 599 auio.uio_procp = (struct proc *)0; 600 if (kth->ktr_len > 0) { 601 auio.uio_iovcnt++; 602 aiov[1].iov_base = kth->ktr_buf; 603 aiov[1].iov_len = kth->ktr_len; 604 auio.uio_resid += kth->ktr_len; 605 } 606 607 FILE_USE(fp); 608 609 tries = 0; 610 do { 611 error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio, 612 fp->f_cred, FOF_UPDATE_OFFSET); 613 tries++; 614 if (error == EWOULDBLOCK) 615 yield(); 616 } while ((error == EWOULDBLOCK) && (tries < 3)); 617 FILE_UNUSE(fp, NULL); 618 619 if (__predict_true(error == 0)) 620 return (0); 621 /* 622 * If error encountered, give up tracing on this vnode. Don't report 623 * EPIPE as this can easily happen with fktrace()/ktruss. 624 */ 625 if (error != EPIPE) 626 log(LOG_NOTICE, 627 "ktrace write failed, errno %d, tracing stopped\n", 628 error); 629 proclist_lock_read(); 630 for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) { 631 if (ktrsamefile(p->p_tracep, fp)) 632 ktrderef(p); 633 } 634 proclist_unlock_read(); 635 636 return (error); 637 } 638 639 /* 640 * Return true if caller has permission to set the ktracing state 641 * of target. Essentially, the target can't possess any 642 * more permissions than the caller. KTRFAC_ROOT signifies that 643 * root previously set the tracing status on the target process, and 644 * so, only root may further change it. 645 * 646 * TODO: check groups. use caller effective gid. 647 */ 648 int 649 ktrcanset(struct proc *callp, struct proc *targetp) 650 { 651 struct pcred *caller = callp->p_cred; 652 struct pcred *target = targetp->p_cred; 653 654 if ((caller->pc_ucred->cr_uid == target->p_ruid && 655 target->p_ruid == target->p_svuid && 656 caller->p_rgid == target->p_rgid && /* XXX */ 657 target->p_rgid == target->p_svgid && 658 (targetp->p_traceflag & KTRFAC_ROOT) == 0 && 659 (targetp->p_flag & P_SUGID) == 0) || 660 caller->pc_ucred->cr_uid == 0) 661 return (1); 662 663 return (0); 664 } 665 #endif /* KTRACE */ 666 667 /* 668 * Put user defined entry to ktrace records. 669 */ 670 int 671 sys_utrace(p, v, retval) 672 struct proc *p; 673 void *v; 674 register_t *retval; 675 { 676 #ifdef KTRACE 677 struct sys_utrace_args /* { 678 syscallarg(const char *) label; 679 syscallarg(void *) addr; 680 syscallarg(size_t) len; 681 } */ *uap = v; 682 683 if (!KTRPOINT(p, KTR_USER)) 684 return (0); 685 686 if (SCARG(uap, len) > KTR_USER_MAXLEN) 687 return (EINVAL); 688 689 ktruser(p, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1); 690 691 return (0); 692 #else /* !KTRACE */ 693 return ENOSYS; 694 #endif /* KTRACE */ 695 } 696