1 /* $OpenBSD: kern_ktrace.c,v 1.58 2012/04/10 20:39:37 mikeb Exp $ */ 2 /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 33 */ 34 35 #ifdef KTRACE 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/proc.h> 40 #include <sys/sched.h> 41 #include <sys/file.h> 42 #include <sys/namei.h> 43 #include <sys/vnode.h> 44 #include <sys/ktrace.h> 45 #include <sys/malloc.h> 46 #include <sys/syslog.h> 47 #include <sys/sysctl.h> 48 49 #include <sys/mount.h> 50 #include <sys/syscall.h> 51 #include <sys/syscallargs.h> 52 53 #include <uvm/uvm_extern.h> 54 55 void ktrinitheader(struct ktr_header *, struct proc *, int); 56 void ktrstart(struct proc *, struct vnode *, struct ucred *); 57 int ktrops(struct proc *, struct process *, int, int, struct vnode *, 58 struct ucred *); 59 int ktrsetchildren(struct proc *, struct process *, int, int, 60 struct vnode *, struct ucred *); 61 int ktrwrite(struct proc *, struct ktr_header *, void *); 62 int ktrwriteraw(struct proc *, struct vnode *, struct ucred *, 63 struct ktr_header *, void *); 64 int ktrcanset(struct proc *, struct process *); 65 66 /* 67 * Clear the trace settings in a correct way (to avoid races). 68 */ 69 void 70 ktrcleartrace(struct process *pr) 71 { 72 struct vnode *vp; 73 struct ucred *cred; 74 75 if (pr->ps_tracevp != NULL) { 76 vp = pr->ps_tracevp; 77 cred = pr->ps_tracecred; 78 79 pr->ps_traceflag = 0; 80 pr->ps_tracevp = NULL; 81 pr->ps_tracecred = NULL; 82 83 vrele(vp); 84 crfree(cred); 85 } 86 } 87 88 /* 89 * Change the trace setting in a correct way (to avoid races). 90 */ 91 void 92 ktrsettrace(struct process *pr, int facs, struct vnode *newvp, 93 struct ucred *newcred) 94 { 95 struct vnode *oldvp; 96 struct ucred *oldcred; 97 98 KASSERT(newvp != NULL); 99 KASSERT(newcred != NULL); 100 101 pr->ps_traceflag |= facs; 102 103 /* nothing to change about where the trace goes? */ 104 if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred) 105 return; 106 107 vref(newvp); 108 crhold(newcred); 109 110 oldvp = pr->ps_tracevp; 111 oldcred = pr->ps_tracecred; 112 113 pr->ps_tracevp = newvp; 114 pr->ps_tracecred = newcred; 115 116 if (oldvp != NULL) { 117 vrele(oldvp); 118 crfree(oldcred); 119 } 120 } 121 122 void 123 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 124 { 125 bzero(kth, sizeof (struct ktr_header)); 126 kth->ktr_type = type; 127 nanotime(&kth->ktr_time); 128 kth->ktr_pid = p->p_p->ps_pid; 129 kth->ktr_tid = p->p_pid + THREAD_PID_OFFSET; 130 bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN); 131 } 132 133 void 134 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred) 135 { 136 struct ktr_header kth; 137 138 bzero(&kth, sizeof (kth)); 139 kth.ktr_type = htobe32(KTR_START); 140 nanotime(&kth.ktr_time); 141 kth.ktr_pid = (pid_t)-1; 142 kth.ktr_tid = (pid_t)-1; 143 atomic_setbits_int(&p->p_flag, P_INKTR); 144 ktrwriteraw(p, vp, cred, &kth, NULL); 145 atomic_clearbits_int(&p->p_flag, P_INKTR); 146 } 147 148 void 149 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[]) 150 { 151 struct ktr_header kth; 152 struct ktr_syscall *ktp; 153 size_t len = sizeof(struct ktr_syscall) + argsize; 154 register_t *argp; 155 u_int nargs = 0; 156 int i; 157 158 if (code == SYS___sysctl && (p->p_emul->e_flags & EMUL_NATIVE)) { 159 /* 160 * The native sysctl encoding stores the mib[] 161 * array because it is interesting. 162 */ 163 if (args[1] > 0) 164 nargs = min(args[1], CTL_MAXNAME); 165 len += nargs * sizeof(int); 166 } 167 atomic_setbits_int(&p->p_flag, P_INKTR); 168 ktrinitheader(&kth, p, KTR_SYSCALL); 169 ktp = malloc(len, M_TEMP, M_WAITOK); 170 ktp->ktr_code = code; 171 ktp->ktr_argsize = argsize; 172 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); 173 for (i = 0; i < (argsize / sizeof *argp); i++) 174 *argp++ = args[i]; 175 if (code == SYS___sysctl && (p->p_emul->e_flags & EMUL_NATIVE) && 176 nargs && 177 copyin((void *)args[0], argp, nargs * sizeof(int))) 178 bzero(argp, nargs * sizeof(int)); 179 kth.ktr_len = len; 180 ktrwrite(p, &kth, ktp); 181 free(ktp, M_TEMP); 182 atomic_clearbits_int(&p->p_flag, P_INKTR); 183 } 184 185 void 186 ktrsysret(struct proc *p, register_t code, int error, register_t retval) 187 { 188 struct ktr_header kth; 189 struct ktr_sysret ktp; 190 191 atomic_setbits_int(&p->p_flag, P_INKTR); 192 ktrinitheader(&kth, p, KTR_SYSRET); 193 ktp.ktr_code = code; 194 ktp.ktr_error = error; 195 ktp.ktr_retval = error == 0 ? retval : 0; 196 197 kth.ktr_len = sizeof(struct ktr_sysret); 198 199 ktrwrite(p, &kth, &ktp); 200 atomic_clearbits_int(&p->p_flag, P_INKTR); 201 } 202 203 void 204 ktrnamei(struct proc *p, char *path) 205 { 206 struct ktr_header kth; 207 208 atomic_setbits_int(&p->p_flag, P_INKTR); 209 ktrinitheader(&kth, p, KTR_NAMEI); 210 kth.ktr_len = strlen(path); 211 212 ktrwrite(p, &kth, path); 213 atomic_clearbits_int(&p->p_flag, P_INKTR); 214 } 215 216 void 217 ktremul(struct proc *p, char *emul) 218 { 219 struct ktr_header kth; 220 221 atomic_setbits_int(&p->p_flag, P_INKTR); 222 ktrinitheader(&kth, p, KTR_EMUL); 223 kth.ktr_len = strlen(emul); 224 225 ktrwrite(p, &kth, emul); 226 atomic_clearbits_int(&p->p_flag, P_INKTR); 227 } 228 229 void 230 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, int len, 231 int error) 232 { 233 struct ktr_header kth; 234 struct ktr_genio *ktp; 235 caddr_t cp; 236 int resid = len, count; 237 int buflen; 238 239 if (error) 240 return; 241 242 atomic_setbits_int(&p->p_flag, P_INKTR); 243 244 buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio)); 245 246 ktrinitheader(&kth, p, KTR_GENIO); 247 ktp = malloc(buflen, M_TEMP, M_WAITOK); 248 ktp->ktr_fd = fd; 249 ktp->ktr_rw = rw; 250 251 cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio)); 252 buflen -= sizeof(struct ktr_genio); 253 254 while (resid > 0) { 255 /* 256 * Don't allow this process to hog the cpu when doing 257 * huge I/O. 258 */ 259 if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) 260 preempt(NULL); 261 262 count = min(iov->iov_len, buflen); 263 if (count > resid) 264 count = resid; 265 if (copyin(iov->iov_base, cp, count)) 266 break; 267 268 kth.ktr_len = count + sizeof(struct ktr_genio); 269 270 if (ktrwrite(p, &kth, ktp) != 0) 271 break; 272 273 iov->iov_len -= count; 274 iov->iov_base = (caddr_t)iov->iov_base + count; 275 276 if (iov->iov_len == 0) 277 iov++; 278 279 resid -= count; 280 } 281 282 free(ktp, M_TEMP); 283 atomic_clearbits_int(&p->p_flag, P_INKTR); 284 } 285 286 void 287 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code, 288 siginfo_t *si) 289 { 290 struct ktr_header kth; 291 struct ktr_psig kp; 292 293 atomic_setbits_int(&p->p_flag, P_INKTR); 294 ktrinitheader(&kth, p, KTR_PSIG); 295 kp.signo = (char)sig; 296 kp.action = action; 297 kp.mask = mask; 298 kp.code = code; 299 kp.si = *si; 300 kth.ktr_len = sizeof(struct ktr_psig); 301 302 ktrwrite(p, &kth, &kp); 303 atomic_clearbits_int(&p->p_flag, P_INKTR); 304 } 305 306 void 307 ktrcsw(struct proc *p, int out, int user) 308 { 309 struct ktr_header kth; 310 struct ktr_csw kc; 311 312 atomic_setbits_int(&p->p_flag, P_INKTR); 313 ktrinitheader(&kth, p, KTR_CSW); 314 kc.out = out; 315 kc.user = user; 316 kth.ktr_len = sizeof(struct ktr_csw); 317 318 ktrwrite(p, &kth, &kc); 319 atomic_clearbits_int(&p->p_flag, P_INKTR); 320 } 321 322 void 323 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen) 324 { 325 struct ktr_header kth; 326 void *buf; 327 size_t buflen; 328 329 #ifdef MULTIPROCESSOR 330 KASSERT(__mp_lock_held(&kernel_lock) > 0); 331 #endif 332 atomic_setbits_int(&p->p_flag, P_INKTR); 333 ktrinitheader(&kth, p, KTR_STRUCT); 334 335 if (data == NULL) 336 datalen = 0; 337 buflen = strlen(name) + 1 + datalen; 338 buf = malloc(buflen, M_TEMP, M_WAITOK); 339 strlcpy(buf, name, buflen); 340 bcopy(data, buf + strlen(name) + 1, datalen); 341 kth.ktr_len = buflen; 342 343 ktrwrite(p, &kth, buf); 344 free(buf, M_TEMP); 345 atomic_clearbits_int(&p->p_flag, P_INKTR); 346 } 347 348 /* Interface and common routines */ 349 350 /* 351 * ktrace system call 352 */ 353 /* ARGSUSED */ 354 int 355 sys_ktrace(struct proc *curp, void *v, register_t *retval) 356 { 357 struct sys_ktrace_args /* { 358 syscallarg(const char *) fname; 359 syscallarg(int) ops; 360 syscallarg(int) facs; 361 syscallarg(pid_t) pid; 362 } */ *uap = v; 363 struct vnode *vp = NULL; 364 struct proc *p = NULL; 365 struct process *pr = NULL; 366 struct ucred *cred = NULL; 367 struct pgrp *pg; 368 int facs = SCARG(uap, facs) & ~((unsigned) KTRFAC_ROOT); 369 int ops = KTROP(SCARG(uap, ops)); 370 int descend = SCARG(uap, ops) & KTRFLAG_DESCEND; 371 int ret = 0; 372 int error = 0; 373 struct nameidata nd; 374 375 atomic_setbits_int(&curp->p_flag, P_INKTR); 376 if (ops != KTROP_CLEAR) { 377 /* 378 * an operation which requires a file argument. 379 */ 380 cred = curp->p_ucred; 381 crhold(cred); 382 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname), 383 curp); 384 if ((error = vn_open(&nd, FREAD|FWRITE|O_NOFOLLOW, 0)) != 0) 385 goto done; 386 vp = nd.ni_vp; 387 388 VOP_UNLOCK(vp, 0, curp); 389 if (vp->v_type != VREG) { 390 error = EACCES; 391 goto done; 392 } 393 } 394 /* 395 * Clear all uses of the tracefile 396 */ 397 if (ops == KTROP_CLEARFILE) { 398 LIST_FOREACH(p, &allproc, p_list) { 399 if (p->p_p->ps_tracevp == vp) { 400 if (ktrcanset(curp, p->p_p)) 401 ktrcleartrace(p->p_p); 402 else 403 error = EPERM; 404 } 405 } 406 goto done; 407 } 408 /* 409 * need something to (un)trace (XXX - why is this here?) 410 */ 411 if (!facs) { 412 error = EINVAL; 413 goto done; 414 } 415 if (ops == KTROP_SET) { 416 if (suser(curp, 0) == 0) 417 facs |= KTRFAC_ROOT; 418 ktrstart(curp, vp, cred); 419 } 420 /* 421 * do it 422 */ 423 if (SCARG(uap, pid) < 0) { 424 /* 425 * by process group 426 */ 427 pg = pgfind(-SCARG(uap, pid)); 428 if (pg == NULL) { 429 error = ESRCH; 430 goto done; 431 } 432 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 433 if (descend) 434 ret |= ktrsetchildren(curp, pr, ops, facs, vp, 435 cred); 436 else 437 ret |= ktrops(curp, pr, ops, facs, vp, cred); 438 } 439 } else { 440 /* 441 * by pid 442 */ 443 pr = prfind(SCARG(uap, pid)); 444 if (pr == NULL) { 445 error = ESRCH; 446 goto done; 447 } 448 if (descend) 449 ret |= ktrsetchildren(curp, pr, ops, facs, vp, cred); 450 else 451 ret |= ktrops(curp, pr, ops, facs, vp, cred); 452 } 453 if (!ret) 454 error = EPERM; 455 done: 456 if (vp != NULL) 457 (void) vn_close(vp, FREAD|FWRITE, cred, curp); 458 if (cred != NULL) 459 crfree(cred); 460 atomic_clearbits_int(&curp->p_flag, P_INKTR); 461 return (error); 462 } 463 464 int 465 ktrops(struct proc *curp, struct process *pr, int ops, int facs, 466 struct vnode *vp, struct ucred *cred) 467 { 468 struct proc *p; 469 470 if (!ktrcanset(curp, pr)) 471 return (0); 472 if (ops == KTROP_SET) 473 ktrsettrace(pr, facs, vp, cred); 474 else { 475 /* KTROP_CLEAR */ 476 pr->ps_traceflag &= ~facs; 477 if ((pr->ps_traceflag & KTRFAC_MASK) == 0) { 478 /* cleared all the facility bits, so stop completely */ 479 ktrcleartrace(pr); 480 } 481 } 482 483 /* 484 * Emit an emulation record, every time there is a ktrace 485 * change/attach request. 486 * XXX an EMUL record for each thread? Perhaps should have 487 * XXX a record type to say "this pid is really a thread of this 488 * XXX other pid" and only generate an EMUL record for the main pid 489 */ 490 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) 491 if (KTRPOINT(p, KTR_EMUL)) 492 ktremul(p, p->p_emul->e_name); 493 494 return (1); 495 } 496 497 int 498 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs, 499 struct vnode *vp, struct ucred *cred) 500 { 501 struct process *pr; 502 int ret = 0; 503 504 pr = top; 505 for (;;) { 506 ret |= ktrops(curp, pr, ops, facs, vp, cred); 507 /* 508 * If this process has children, descend to them next, 509 * otherwise do any siblings, and if done with this level, 510 * follow back up the tree (but not past top). 511 */ 512 if (!LIST_EMPTY(&pr->ps_children)) 513 pr = LIST_FIRST(&pr->ps_children); 514 else for (;;) { 515 if (pr == top) 516 return (ret); 517 if (LIST_NEXT(pr, ps_sibling) != NULL) { 518 pr = LIST_NEXT(pr, ps_sibling); 519 break; 520 } 521 pr = pr->ps_pptr; 522 } 523 } 524 /*NOTREACHED*/ 525 } 526 527 int 528 ktrwrite(struct proc *p, struct ktr_header *kth, void *aux) 529 { 530 struct vnode *vp = p->p_p->ps_tracevp; 531 struct ucred *cred = p->p_p->ps_tracecred; 532 int error; 533 534 if (vp == NULL) 535 return 0; 536 crhold(cred); 537 error = ktrwriteraw(p, vp, cred, kth, aux); 538 crfree(cred); 539 return (error); 540 } 541 542 int 543 ktrwriteraw(struct proc *p, struct vnode *vp, struct ucred *cred, 544 struct ktr_header *kth, void *aux) 545 { 546 struct uio auio; 547 struct iovec aiov[2]; 548 int error; 549 550 auio.uio_iov = &aiov[0]; 551 auio.uio_offset = 0; 552 auio.uio_segflg = UIO_SYSSPACE; 553 auio.uio_rw = UIO_WRITE; 554 aiov[0].iov_base = (caddr_t)kth; 555 aiov[0].iov_len = sizeof(struct ktr_header); 556 auio.uio_resid = sizeof(struct ktr_header); 557 auio.uio_iovcnt = 1; 558 auio.uio_procp = p; 559 if (kth->ktr_len > 0) { 560 auio.uio_iovcnt++; 561 aiov[1].iov_base = aux; 562 aiov[1].iov_len = kth->ktr_len; 563 auio.uio_resid += kth->ktr_len; 564 } 565 vget(vp, LK_EXCLUSIVE | LK_RETRY, p); 566 error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred); 567 if (!error) { 568 vput(vp); 569 return (0); 570 } 571 /* 572 * If error encountered, give up tracing on this vnode. 573 */ 574 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 575 error); 576 LIST_FOREACH(p, &allproc, p_list) 577 if (p->p_p->ps_tracevp == vp && p->p_p->ps_tracecred == cred) 578 ktrcleartrace(p->p_p); 579 580 vput(vp); 581 return (error); 582 } 583 584 /* 585 * Return true if caller has permission to set the ktracing state 586 * of target. Essentially, the target can't possess any 587 * more permissions than the caller. KTRFAC_ROOT signifies that 588 * root previously set the tracing status on the target process, and 589 * so, only root may further change it. 590 * 591 * TODO: check groups. use caller effective gid. 592 */ 593 int 594 ktrcanset(struct proc *callp, struct process *targetpr) 595 { 596 struct pcred *caller = callp->p_cred; 597 struct pcred *target = targetpr->ps_cred; 598 599 if ((caller->pc_ucred->cr_uid == target->p_ruid && 600 target->p_ruid == target->p_svuid && 601 caller->p_rgid == target->p_rgid && /* XXX */ 602 target->p_rgid == target->p_svgid && 603 (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 && 604 !ISSET(targetpr->ps_flags, PS_SUGID)) || 605 caller->pc_ucred->cr_uid == 0) 606 return (1); 607 608 return (0); 609 } 610 611 #endif 612