1 /* $OpenBSD: kern_ktrace.c,v 1.69 2014/07/13 15:46:21 uebayasi Exp $ */ 2 /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/sched.h> 39 #include <sys/file.h> 40 #include <sys/namei.h> 41 #include <sys/vnode.h> 42 #include <sys/ktrace.h> 43 #include <sys/malloc.h> 44 #include <sys/syslog.h> 45 #include <sys/sysctl.h> 46 47 #include <sys/mount.h> 48 #include <sys/syscall.h> 49 #include <sys/syscallargs.h> 50 51 void ktrinitheaderraw(struct ktr_header *, uint, pid_t, pid_t); 52 void ktrinitheader(struct ktr_header *, struct proc *, int); 53 void ktrstart(struct proc *, struct vnode *, struct ucred *); 54 void ktremulraw(struct proc *, struct process *, pid_t); 55 int ktrops(struct proc *, struct process *, int, int, struct vnode *, 56 struct ucred *); 57 int ktrsetchildren(struct proc *, struct process *, int, int, 58 struct vnode *, struct ucred *); 59 int ktrwrite(struct proc *, struct ktr_header *, void *); 60 int ktrwriteraw(struct proc *, struct vnode *, struct ucred *, 61 struct ktr_header *, void *); 62 int ktrcanset(struct proc *, struct process *); 63 64 /* 65 * Clear the trace settings in a correct way (to avoid races). 66 */ 67 void 68 ktrcleartrace(struct process *pr) 69 { 70 struct vnode *vp; 71 struct ucred *cred; 72 73 if (pr->ps_tracevp != NULL) { 74 vp = pr->ps_tracevp; 75 cred = pr->ps_tracecred; 76 77 pr->ps_traceflag = 0; 78 pr->ps_tracevp = NULL; 79 pr->ps_tracecred = NULL; 80 81 vrele(vp); 82 crfree(cred); 83 } 84 } 85 86 /* 87 * Change the trace setting in a correct way (to avoid races). 88 */ 89 void 90 ktrsettrace(struct process *pr, int facs, struct vnode *newvp, 91 struct ucred *newcred) 92 { 93 struct vnode *oldvp; 94 struct ucred *oldcred; 95 96 KASSERT(newvp != NULL); 97 KASSERT(newcred != NULL); 98 99 pr->ps_traceflag |= facs; 100 101 /* nothing to change about where the trace goes? */ 102 if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred) 103 return; 104 105 vref(newvp); 106 crhold(newcred); 107 108 oldvp = pr->ps_tracevp; 109 oldcred = pr->ps_tracecred; 110 111 pr->ps_tracevp = newvp; 112 pr->ps_tracecred = newcred; 113 114 if (oldvp != NULL) { 115 vrele(oldvp); 116 crfree(oldcred); 117 } 118 } 119 120 void 121 ktrinitheaderraw(struct ktr_header *kth, uint type, pid_t pid, pid_t tid) 122 { 123 memset(kth, 0, sizeof(struct ktr_header)); 124 kth->ktr_type = type; 125 nanotime(&kth->ktr_time); 126 kth->ktr_pid = pid; 127 kth->ktr_tid = tid; 128 } 129 130 void 131 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 132 { 133 ktrinitheaderraw(kth, type, p->p_p->ps_pid, 134 p->p_pid + THREAD_PID_OFFSET); 135 bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN); 136 } 137 138 void 139 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred) 140 { 141 struct ktr_header kth; 142 143 ktrinitheaderraw(&kth, htobe32(KTR_START), -1, -1); 144 ktrwriteraw(p, vp, cred, &kth, NULL); 145 } 146 147 void 148 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[]) 149 { 150 struct ktr_header kth; 151 struct ktr_syscall *ktp; 152 size_t len = sizeof(struct ktr_syscall) + argsize; 153 register_t *argp; 154 u_int nargs = 0; 155 int i; 156 157 if (code == SYS___sysctl && (p->p_p->ps_emul->e_flags & EMUL_NATIVE)) { 158 /* 159 * The native sysctl encoding stores the mib[] 160 * array because it is interesting. 161 */ 162 if (args[1] > 0) 163 nargs = lmin(args[1], CTL_MAXNAME); 164 len += nargs * sizeof(int); 165 } 166 atomic_setbits_int(&p->p_flag, P_INKTR); 167 ktrinitheader(&kth, p, KTR_SYSCALL); 168 ktp = malloc(len, M_TEMP, M_WAITOK); 169 ktp->ktr_code = code; 170 ktp->ktr_argsize = argsize; 171 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); 172 for (i = 0; i < (argsize / sizeof *argp); i++) 173 *argp++ = args[i]; 174 if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int))) 175 memset(argp, 0, nargs * sizeof(int)); 176 kth.ktr_len = len; 177 ktrwrite(p, &kth, ktp); 178 free(ktp, M_TEMP, len); 179 atomic_clearbits_int(&p->p_flag, P_INKTR); 180 } 181 182 void 183 ktrsysret(struct proc *p, register_t code, int error, register_t retval) 184 { 185 struct ktr_header kth; 186 struct ktr_sysret ktp; 187 188 atomic_setbits_int(&p->p_flag, P_INKTR); 189 ktrinitheader(&kth, p, KTR_SYSRET); 190 ktp.ktr_code = code; 191 ktp.ktr_error = error; 192 ktp.ktr_retval = error == 0 ? retval : 0; 193 194 kth.ktr_len = sizeof(struct ktr_sysret); 195 196 ktrwrite(p, &kth, &ktp); 197 atomic_clearbits_int(&p->p_flag, P_INKTR); 198 } 199 200 void 201 ktrnamei(struct proc *p, char *path) 202 { 203 struct ktr_header kth; 204 205 atomic_setbits_int(&p->p_flag, P_INKTR); 206 ktrinitheader(&kth, p, KTR_NAMEI); 207 kth.ktr_len = strlen(path); 208 209 ktrwrite(p, &kth, path); 210 atomic_clearbits_int(&p->p_flag, P_INKTR); 211 } 212 213 void 214 ktremulraw(struct proc *curp, struct process *pr, pid_t tid) 215 { 216 struct ktr_header kth; 217 char *emul = pr->ps_emul->e_name; 218 219 ktrinitheaderraw(&kth, KTR_EMUL, pr->ps_pid, tid); 220 kth.ktr_len = strlen(emul); 221 222 ktrwriteraw(curp, pr->ps_tracevp, pr->ps_tracecred, &kth, emul); 223 } 224 225 void 226 ktremul(struct proc *p) 227 { 228 atomic_setbits_int(&p->p_flag, P_INKTR); 229 ktremulraw(p, p->p_p, p->p_pid + THREAD_PID_OFFSET); 230 atomic_clearbits_int(&p->p_flag, P_INKTR); 231 } 232 233 void 234 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, 235 ssize_t len) 236 { 237 struct ktr_header kth; 238 struct ktr_genio *ktp; 239 caddr_t cp; 240 int count; 241 int mlen, buflen; 242 243 atomic_setbits_int(&p->p_flag, P_INKTR); 244 245 /* beware overflow */ 246 if (len > PAGE_SIZE - sizeof(struct ktr_genio)) 247 buflen = PAGE_SIZE; 248 else 249 buflen = len + sizeof(struct ktr_genio); 250 251 ktrinitheader(&kth, p, KTR_GENIO); 252 mlen = buflen; 253 ktp = malloc(mlen, M_TEMP, M_WAITOK); 254 ktp->ktr_fd = fd; 255 ktp->ktr_rw = rw; 256 257 cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio)); 258 buflen -= sizeof(struct ktr_genio); 259 260 while (len > 0) { 261 /* 262 * Don't allow this process to hog the cpu when doing 263 * huge I/O. 264 */ 265 if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) 266 preempt(NULL); 267 268 count = lmin(iov->iov_len, buflen); 269 if (count > len) 270 count = len; 271 if (copyin(iov->iov_base, cp, count)) 272 break; 273 274 kth.ktr_len = count + sizeof(struct ktr_genio); 275 276 if (ktrwrite(p, &kth, ktp) != 0) 277 break; 278 279 iov->iov_len -= count; 280 iov->iov_base = (caddr_t)iov->iov_base + count; 281 282 if (iov->iov_len == 0) 283 iov++; 284 285 len -= count; 286 } 287 288 free(ktp, M_TEMP, mlen); 289 atomic_clearbits_int(&p->p_flag, P_INKTR); 290 } 291 292 void 293 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code, 294 siginfo_t *si) 295 { 296 struct ktr_header kth; 297 struct ktr_psig kp; 298 299 atomic_setbits_int(&p->p_flag, P_INKTR); 300 ktrinitheader(&kth, p, KTR_PSIG); 301 kp.signo = (char)sig; 302 kp.action = action; 303 kp.mask = mask; 304 kp.code = code; 305 kp.si = *si; 306 kth.ktr_len = sizeof(struct ktr_psig); 307 308 ktrwrite(p, &kth, &kp); 309 atomic_clearbits_int(&p->p_flag, P_INKTR); 310 } 311 312 void 313 ktrcsw(struct proc *p, int out, int user) 314 { 315 struct ktr_header kth; 316 struct ktr_csw kc; 317 318 atomic_setbits_int(&p->p_flag, P_INKTR); 319 ktrinitheader(&kth, p, KTR_CSW); 320 kc.out = out; 321 kc.user = user; 322 kth.ktr_len = sizeof(struct ktr_csw); 323 324 ktrwrite(p, &kth, &kc); 325 atomic_clearbits_int(&p->p_flag, P_INKTR); 326 } 327 328 void 329 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen) 330 { 331 struct ktr_header kth; 332 void *buf; 333 size_t buflen; 334 335 KERNEL_ASSERT_LOCKED(); 336 atomic_setbits_int(&p->p_flag, P_INKTR); 337 ktrinitheader(&kth, p, KTR_STRUCT); 338 339 if (data == NULL) 340 datalen = 0; 341 buflen = strlen(name) + 1 + datalen; 342 buf = malloc(buflen, M_TEMP, M_WAITOK); 343 strlcpy(buf, name, buflen); 344 bcopy(data, buf + strlen(name) + 1, datalen); 345 kth.ktr_len = buflen; 346 347 ktrwrite(p, &kth, buf); 348 free(buf, M_TEMP, buflen); 349 atomic_clearbits_int(&p->p_flag, P_INKTR); 350 } 351 352 int 353 ktruser(struct proc *p, const char *id, const void *addr, size_t len) 354 { 355 struct ktr_header kth; 356 struct ktr_user *ktp; 357 int error; 358 void *memp; 359 size_t size; 360 #define STK_PARAMS 128 361 long long stkbuf[STK_PARAMS / sizeof(long long)]; 362 363 if (!KTRPOINT(p, KTR_USER)) 364 return (0); 365 if (len > KTR_USER_MAXLEN) 366 return EINVAL; 367 368 atomic_setbits_int(&p->p_flag, P_INKTR); 369 ktrinitheader(&kth, p, KTR_USER); 370 size = sizeof(*ktp) + len; 371 memp = NULL; 372 if (size > sizeof(stkbuf)) { 373 memp = malloc(sizeof(*ktp) + len, M_TEMP, M_WAITOK); 374 ktp = (struct ktr_user *)memp; 375 } else 376 ktp = (struct ktr_user *)stkbuf; 377 memset(ktp->ktr_id, 0, KTR_USER_MAXIDLEN); 378 error = copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL); 379 if (error) 380 goto out; 381 382 error = copyin(addr, (void *)(ktp + 1), len); 383 if (error) 384 goto out; 385 kth.ktr_len = sizeof(*ktp) + len; 386 ktrwrite(p, &kth, ktp); 387 out: 388 if (memp != NULL) 389 free(memp, M_TEMP, sizeof(*ktp) + len); 390 atomic_clearbits_int(&p->p_flag, P_INKTR); 391 return (error); 392 } 393 394 395 /* Interface and common routines */ 396 397 /* 398 * ktrace system call 399 */ 400 /* ARGSUSED */ 401 int 402 sys_ktrace(struct proc *curp, void *v, register_t *retval) 403 { 404 struct sys_ktrace_args /* { 405 syscallarg(const char *) fname; 406 syscallarg(int) ops; 407 syscallarg(int) facs; 408 syscallarg(pid_t) pid; 409 } */ *uap = v; 410 struct vnode *vp = NULL; 411 struct process *pr = NULL; 412 struct ucred *cred = NULL; 413 struct pgrp *pg; 414 int facs = SCARG(uap, facs) & ~((unsigned) KTRFAC_ROOT); 415 int ops = KTROP(SCARG(uap, ops)); 416 int descend = SCARG(uap, ops) & KTRFLAG_DESCEND; 417 int ret = 0; 418 int error = 0; 419 struct nameidata nd; 420 421 if (ops != KTROP_CLEAR) { 422 /* 423 * an operation which requires a file argument. 424 */ 425 cred = curp->p_ucred; 426 crhold(cred); 427 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname), 428 curp); 429 if ((error = vn_open(&nd, FREAD|FWRITE|O_NOFOLLOW, 0)) != 0) 430 goto done; 431 vp = nd.ni_vp; 432 433 VOP_UNLOCK(vp, 0, curp); 434 if (vp->v_type != VREG) { 435 error = EACCES; 436 goto done; 437 } 438 } 439 /* 440 * Clear all uses of the tracefile 441 */ 442 if (ops == KTROP_CLEARFILE) { 443 LIST_FOREACH(pr, &allprocess, ps_list) { 444 if (pr->ps_tracevp == vp) { 445 if (ktrcanset(curp, pr)) 446 ktrcleartrace(pr); 447 else 448 error = EPERM; 449 } 450 } 451 goto done; 452 } 453 /* 454 * need something to (un)trace (XXX - why is this here?) 455 */ 456 if (!facs) { 457 error = EINVAL; 458 goto done; 459 } 460 if (ops == KTROP_SET) { 461 if (suser(curp, 0) == 0) 462 facs |= KTRFAC_ROOT; 463 ktrstart(curp, vp, cred); 464 } 465 /* 466 * do it 467 */ 468 if (SCARG(uap, pid) < 0) { 469 /* 470 * by process group 471 */ 472 pg = pgfind(-SCARG(uap, pid)); 473 if (pg == NULL) { 474 error = ESRCH; 475 goto done; 476 } 477 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 478 if (descend) 479 ret |= ktrsetchildren(curp, pr, ops, facs, vp, 480 cred); 481 else 482 ret |= ktrops(curp, pr, ops, facs, vp, cred); 483 } 484 } else { 485 /* 486 * by pid 487 */ 488 pr = prfind(SCARG(uap, pid)); 489 if (pr == NULL) { 490 error = ESRCH; 491 goto done; 492 } 493 if (descend) 494 ret |= ktrsetchildren(curp, pr, ops, facs, vp, cred); 495 else 496 ret |= ktrops(curp, pr, ops, facs, vp, cred); 497 } 498 if (!ret) 499 error = EPERM; 500 done: 501 if (vp != NULL) 502 (void) vn_close(vp, FREAD|FWRITE, cred, curp); 503 if (cred != NULL) 504 crfree(cred); 505 return (error); 506 } 507 508 int 509 ktrops(struct proc *curp, struct process *pr, int ops, int facs, 510 struct vnode *vp, struct ucred *cred) 511 { 512 if (!ktrcanset(curp, pr)) 513 return (0); 514 if (ops == KTROP_SET) 515 ktrsettrace(pr, facs, vp, cred); 516 else { 517 /* KTROP_CLEAR */ 518 pr->ps_traceflag &= ~facs; 519 if ((pr->ps_traceflag & KTRFAC_MASK) == 0) { 520 /* cleared all the facility bits, so stop completely */ 521 ktrcleartrace(pr); 522 } 523 } 524 525 /* 526 * Emit an emulation record every time there is a ktrace 527 * change/attach request. 528 */ 529 if (pr->ps_traceflag & KTRFAC_EMUL) 530 ktremulraw(curp, pr, -1); 531 532 return (1); 533 } 534 535 int 536 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs, 537 struct vnode *vp, struct ucred *cred) 538 { 539 struct process *pr; 540 int ret = 0; 541 542 pr = top; 543 for (;;) { 544 ret |= ktrops(curp, pr, ops, facs, vp, cred); 545 /* 546 * If this process has children, descend to them next, 547 * otherwise do any siblings, and if done with this level, 548 * follow back up the tree (but not past top). 549 */ 550 if (!LIST_EMPTY(&pr->ps_children)) 551 pr = LIST_FIRST(&pr->ps_children); 552 else for (;;) { 553 if (pr == top) 554 return (ret); 555 if (LIST_NEXT(pr, ps_sibling) != NULL) { 556 pr = LIST_NEXT(pr, ps_sibling); 557 break; 558 } 559 pr = pr->ps_pptr; 560 } 561 } 562 /*NOTREACHED*/ 563 } 564 565 int 566 ktrwrite(struct proc *p, struct ktr_header *kth, void *aux) 567 { 568 struct vnode *vp = p->p_p->ps_tracevp; 569 struct ucred *cred = p->p_p->ps_tracecred; 570 int error; 571 572 if (vp == NULL) 573 return 0; 574 crhold(cred); 575 error = ktrwriteraw(p, vp, cred, kth, aux); 576 crfree(cred); 577 return (error); 578 } 579 580 int 581 ktrwriteraw(struct proc *curp, struct vnode *vp, struct ucred *cred, 582 struct ktr_header *kth, void *aux) 583 { 584 struct uio auio; 585 struct iovec aiov[2]; 586 struct process *pr; 587 int error; 588 589 auio.uio_iov = &aiov[0]; 590 auio.uio_offset = 0; 591 auio.uio_segflg = UIO_SYSSPACE; 592 auio.uio_rw = UIO_WRITE; 593 aiov[0].iov_base = (caddr_t)kth; 594 aiov[0].iov_len = sizeof(struct ktr_header); 595 auio.uio_resid = sizeof(struct ktr_header); 596 auio.uio_iovcnt = 1; 597 auio.uio_procp = curp; 598 if (kth->ktr_len > 0) { 599 auio.uio_iovcnt++; 600 aiov[1].iov_base = aux; 601 aiov[1].iov_len = kth->ktr_len; 602 auio.uio_resid += kth->ktr_len; 603 } 604 vget(vp, LK_EXCLUSIVE | LK_RETRY, curp); 605 error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred); 606 if (!error) { 607 vput(vp); 608 return (0); 609 } 610 /* 611 * If error encountered, give up tracing on this vnode. 612 */ 613 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 614 error); 615 LIST_FOREACH(pr, &allprocess, ps_list) 616 if (pr->ps_tracevp == vp && pr->ps_tracecred == cred) 617 ktrcleartrace(pr); 618 619 vput(vp); 620 return (error); 621 } 622 623 /* 624 * Return true if caller has permission to set the ktracing state 625 * of target. Essentially, the target can't possess any 626 * more permissions than the caller. KTRFAC_ROOT signifies that 627 * root previously set the tracing status on the target process, and 628 * so, only root may further change it. 629 * 630 * TODO: check groups. use caller effective gid. 631 */ 632 int 633 ktrcanset(struct proc *callp, struct process *targetpr) 634 { 635 struct ucred *caller = callp->p_ucred; 636 struct ucred *target = targetpr->ps_ucred; 637 638 if ((caller->cr_uid == target->cr_ruid && 639 target->cr_ruid == target->cr_svuid && 640 caller->cr_rgid == target->cr_rgid && /* XXX */ 641 target->cr_rgid == target->cr_svgid && 642 (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 && 643 !ISSET(targetpr->ps_flags, PS_SUGID)) || 644 caller->cr_uid == 0) 645 return (1); 646 647 return (0); 648 } 649