1 /* $OpenBSD: kern_ktrace.c,v 1.54 2011/07/11 15:40:47 guenther Exp $ */ 2 /* $NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 33 */ 34 35 #ifdef KTRACE 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/proc.h> 40 #include <sys/sched.h> 41 #include <sys/file.h> 42 #include <sys/namei.h> 43 #include <sys/vnode.h> 44 #include <sys/ktrace.h> 45 #include <sys/malloc.h> 46 #include <sys/syslog.h> 47 #include <sys/sysctl.h> 48 49 #include <sys/mount.h> 50 #include <sys/syscall.h> 51 #include <sys/syscallargs.h> 52 53 #include <uvm/uvm_extern.h> 54 55 void ktrinitheader(struct ktr_header *, struct proc *, int); 56 int ktrops(struct proc *, struct proc *, int, int, struct vnode *); 57 int ktrsetchildren(struct proc *, struct process *, int, int, 58 struct vnode *); 59 int ktrwrite(struct proc *, struct ktr_header *); 60 int ktrcanset(struct proc *, struct proc *); 61 62 /* 63 * Change the trace vnode in a correct way (to avoid races). 64 */ 65 void 66 ktrsettracevnode(struct proc *p, struct vnode *newvp) 67 { 68 struct vnode *vp; 69 70 if (p->p_tracep == newvp) /* avoid work */ 71 return; 72 73 if (newvp != NULL) 74 vref(newvp); 75 76 vp = p->p_tracep; 77 p->p_tracep = newvp; 78 79 if (vp != NULL) 80 vrele(vp); 81 } 82 83 void 84 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 85 { 86 bzero(kth, sizeof (struct ktr_header)); 87 kth->ktr_type = type; 88 microtime(&kth->ktr_time); 89 kth->ktr_pid = p->p_pid; 90 bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN); 91 } 92 93 void 94 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[]) 95 { 96 struct ktr_header kth; 97 struct ktr_syscall *ktp; 98 size_t len = sizeof(struct ktr_syscall) + argsize; 99 register_t *argp; 100 u_int nargs = 0; 101 int i; 102 103 if (code == SYS___sysctl && (p->p_emul->e_flags & EMUL_NATIVE)) { 104 /* 105 * The native sysctl encoding stores the mib[] 106 * array because it is interesting. 107 */ 108 if (args[1] > 0) 109 nargs = min(args[1], CTL_MAXNAME); 110 len += nargs * sizeof(int); 111 } 112 p->p_traceflag |= KTRFAC_ACTIVE; 113 ktrinitheader(&kth, p, KTR_SYSCALL); 114 ktp = malloc(len, M_TEMP, M_WAITOK); 115 ktp->ktr_code = code; 116 ktp->ktr_argsize = argsize; 117 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall)); 118 for (i = 0; i < (argsize / sizeof *argp); i++) 119 *argp++ = args[i]; 120 if (code == SYS___sysctl && (p->p_emul->e_flags & EMUL_NATIVE) && 121 nargs && 122 copyin((void *)args[0], argp, nargs * sizeof(int))) 123 bzero(argp, nargs * sizeof(int)); 124 kth.ktr_buf = (caddr_t)ktp; 125 kth.ktr_len = len; 126 ktrwrite(p, &kth); 127 free(ktp, M_TEMP); 128 p->p_traceflag &= ~KTRFAC_ACTIVE; 129 } 130 131 void 132 ktrsysret(struct proc *p, register_t code, int error, register_t retval) 133 { 134 struct ktr_header kth; 135 struct ktr_sysret ktp; 136 137 p->p_traceflag |= KTRFAC_ACTIVE; 138 ktrinitheader(&kth, p, KTR_SYSRET); 139 ktp.ktr_code = code; 140 ktp.ktr_error = error; 141 ktp.ktr_retval = retval; 142 143 kth.ktr_buf = (caddr_t)&ktp; 144 kth.ktr_len = sizeof(struct ktr_sysret); 145 146 ktrwrite(p, &kth); 147 p->p_traceflag &= ~KTRFAC_ACTIVE; 148 } 149 150 void 151 ktrnamei(struct proc *p, char *path) 152 { 153 struct ktr_header kth; 154 155 p->p_traceflag |= KTRFAC_ACTIVE; 156 ktrinitheader(&kth, p, KTR_NAMEI); 157 kth.ktr_len = strlen(path); 158 kth.ktr_buf = path; 159 160 ktrwrite(p, &kth); 161 p->p_traceflag &= ~KTRFAC_ACTIVE; 162 } 163 164 void 165 ktremul(struct proc *p, char *emul) 166 { 167 struct ktr_header kth; 168 169 p->p_traceflag |= KTRFAC_ACTIVE; 170 ktrinitheader(&kth, p, KTR_EMUL); 171 kth.ktr_len = strlen(emul); 172 kth.ktr_buf = emul; 173 174 ktrwrite(p, &kth); 175 p->p_traceflag &= ~KTRFAC_ACTIVE; 176 } 177 178 void 179 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, int len, 180 int error) 181 { 182 struct ktr_header kth; 183 struct ktr_genio *ktp; 184 caddr_t cp; 185 int resid = len, count; 186 int buflen; 187 188 if (error) 189 return; 190 191 p->p_traceflag |= KTRFAC_ACTIVE; 192 193 buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio)); 194 195 ktrinitheader(&kth, p, KTR_GENIO); 196 ktp = malloc(buflen, M_TEMP, M_WAITOK); 197 ktp->ktr_fd = fd; 198 ktp->ktr_rw = rw; 199 200 kth.ktr_buf = (caddr_t)ktp; 201 202 cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio)); 203 buflen -= sizeof(struct ktr_genio); 204 205 while (resid > 0) { 206 /* 207 * Don't allow this process to hog the cpu when doing 208 * huge I/O. 209 */ 210 if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) 211 preempt(NULL); 212 213 count = min(iov->iov_len, buflen); 214 if (count > resid) 215 count = resid; 216 if (copyin(iov->iov_base, cp, count)) 217 break; 218 219 kth.ktr_len = count + sizeof(struct ktr_genio); 220 221 if (ktrwrite(p, &kth) != 0) 222 break; 223 224 iov->iov_len -= count; 225 iov->iov_base = (caddr_t)iov->iov_base + count; 226 227 if (iov->iov_len == 0) 228 iov++; 229 230 resid -= count; 231 } 232 233 free(ktp, M_TEMP); 234 p->p_traceflag &= ~KTRFAC_ACTIVE; 235 236 } 237 238 void 239 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code, 240 siginfo_t *si) 241 { 242 struct ktr_header kth; 243 struct ktr_psig kp; 244 245 p->p_traceflag |= KTRFAC_ACTIVE; 246 ktrinitheader(&kth, p, KTR_PSIG); 247 kp.signo = (char)sig; 248 kp.action = action; 249 kp.mask = mask; 250 kp.code = code; 251 kp.si = *si; 252 kth.ktr_buf = (caddr_t)&kp; 253 kth.ktr_len = sizeof(struct ktr_psig); 254 255 ktrwrite(p, &kth); 256 p->p_traceflag &= ~KTRFAC_ACTIVE; 257 } 258 259 void 260 ktrcsw(struct proc *p, int out, int user) 261 { 262 struct ktr_header kth; 263 struct ktr_csw kc; 264 265 p->p_traceflag |= KTRFAC_ACTIVE; 266 ktrinitheader(&kth, p, KTR_CSW); 267 kc.out = out; 268 kc.user = user; 269 kth.ktr_buf = (caddr_t)&kc; 270 kth.ktr_len = sizeof(struct ktr_csw); 271 272 ktrwrite(p, &kth); 273 p->p_traceflag &= ~KTRFAC_ACTIVE; 274 } 275 276 void 277 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen) 278 { 279 struct ktr_header kth; 280 void *buf; 281 size_t buflen; 282 283 p->p_traceflag |= KTRFAC_ACTIVE; 284 ktrinitheader(&kth, p, KTR_STRUCT); 285 286 if (data == NULL) 287 datalen = 0; 288 buflen = strlen(name) + 1 + datalen; 289 buf = malloc(buflen, M_TEMP, M_WAITOK); 290 strlcpy(buf, name, buflen); 291 bcopy(data, buf + strlen(name) + 1, datalen); 292 kth.ktr_buf = buf; 293 kth.ktr_len = buflen; 294 295 ktrwrite(p, &kth); 296 free(buf, M_TEMP); 297 p->p_traceflag &= ~KTRFAC_ACTIVE; 298 } 299 300 /* Interface and common routines */ 301 302 /* 303 * ktrace system call 304 */ 305 /* ARGSUSED */ 306 int 307 sys_ktrace(struct proc *curp, void *v, register_t *retval) 308 { 309 struct sys_ktrace_args /* { 310 syscallarg(const char *) fname; 311 syscallarg(int) ops; 312 syscallarg(int) facs; 313 syscallarg(pid_t) pid; 314 } */ *uap = v; 315 struct vnode *vp = NULL; 316 struct proc *p = NULL; 317 struct process *pr = NULL; 318 struct pgrp *pg; 319 int facs = SCARG(uap, facs) & ~((unsigned) KTRFAC_ROOT); 320 int ops = KTROP(SCARG(uap, ops)); 321 int descend = SCARG(uap, ops) & KTRFLAG_DESCEND; 322 int ret = 0; 323 int error = 0; 324 struct nameidata nd; 325 326 curp->p_traceflag |= KTRFAC_ACTIVE; 327 if (ops != KTROP_CLEAR) { 328 /* 329 * an operation which requires a file argument. 330 */ 331 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname), 332 curp); 333 if ((error = vn_open(&nd, FREAD|FWRITE|O_NOFOLLOW, 0)) != 0) { 334 curp->p_traceflag &= ~KTRFAC_ACTIVE; 335 return (error); 336 } 337 vp = nd.ni_vp; 338 339 VOP_UNLOCK(vp, 0, curp); 340 if (vp->v_type != VREG) { 341 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); 342 curp->p_traceflag &= ~KTRFAC_ACTIVE; 343 return (EACCES); 344 } 345 } 346 /* 347 * Clear all uses of the tracefile 348 */ 349 if (ops == KTROP_CLEARFILE) { 350 LIST_FOREACH(p, &allproc, p_list) { 351 if (p->p_tracep == vp) { 352 if (ktrcanset(curp, p)) { 353 p->p_traceflag = 0; 354 ktrsettracevnode(p, NULL); 355 } else 356 error = EPERM; 357 } 358 } 359 goto done; 360 } 361 /* 362 * need something to (un)trace (XXX - why is this here?) 363 */ 364 if (!facs) { 365 error = EINVAL; 366 goto done; 367 } 368 /* 369 * do it 370 */ 371 if (SCARG(uap, pid) < 0) { 372 /* 373 * by process group 374 */ 375 pg = pgfind(-SCARG(uap, pid)); 376 if (pg == NULL) { 377 error = ESRCH; 378 goto done; 379 } 380 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 381 if (descend) 382 ret |= ktrsetchildren(curp, pr, ops, facs, vp); 383 else 384 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) 385 ret |= ktrops(curp, p, ops, facs, vp); 386 } 387 388 } else { 389 /* 390 * by pid 391 */ 392 pr = prfind(SCARG(uap, pid)); 393 if (pr == NULL) { 394 error = ESRCH; 395 goto done; 396 } 397 if (descend) 398 ret |= ktrsetchildren(curp, pr, ops, facs, vp); 399 else 400 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { 401 ret |= ktrops(curp, p, ops, facs, vp); 402 } 403 } 404 if (!ret) 405 error = EPERM; 406 done: 407 if (vp != NULL) 408 (void) vn_close(vp, FWRITE, curp->p_ucred, curp); 409 curp->p_traceflag &= ~KTRFAC_ACTIVE; 410 return (error); 411 } 412 413 int 414 ktrops(struct proc *curp, struct proc *p, int ops, int facs, struct vnode *vp) 415 { 416 417 if (!ktrcanset(curp, p)) 418 return (0); 419 if (ops == KTROP_SET) { 420 ktrsettracevnode(p, vp); 421 p->p_traceflag |= facs; 422 if (suser(curp, 0) == 0) 423 p->p_traceflag |= KTRFAC_ROOT; 424 } else { 425 /* KTROP_CLEAR */ 426 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 427 /* no more tracing */ 428 p->p_traceflag = 0; 429 ktrsettracevnode(p, NULL); 430 } 431 } 432 433 /* 434 * Emit an emulation record, every time there is a ktrace 435 * change/attach request. 436 */ 437 if (KTRPOINT(p, KTR_EMUL)) 438 ktremul(p, p->p_emul->e_name); 439 440 return (1); 441 } 442 443 int 444 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs, 445 struct vnode *vp) 446 { 447 struct process *pr; 448 struct proc *p; 449 int ret = 0; 450 451 pr = top; 452 for (;;) { 453 TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) 454 ret |= ktrops(curp, p, ops, facs, vp); 455 /* 456 * If this process has children, descend to them next, 457 * otherwise do any siblings, and if done with this level, 458 * follow back up the tree (but not past top). 459 */ 460 if (!LIST_EMPTY(&pr->ps_children)) 461 pr = LIST_FIRST(&pr->ps_children); 462 else for (;;) { 463 if (pr == top) 464 return (ret); 465 if (LIST_NEXT(pr, ps_sibling) != NULL) { 466 pr = LIST_NEXT(pr, ps_sibling); 467 break; 468 } 469 pr = pr->ps_pptr; 470 } 471 } 472 /*NOTREACHED*/ 473 } 474 475 int 476 ktrwrite(struct proc *p, struct ktr_header *kth) 477 { 478 struct uio auio; 479 struct iovec aiov[2]; 480 int error; 481 struct vnode *vp = p->p_tracep; 482 483 if (vp == NULL) 484 return 0; 485 auio.uio_iov = &aiov[0]; 486 auio.uio_offset = 0; 487 auio.uio_segflg = UIO_SYSSPACE; 488 auio.uio_rw = UIO_WRITE; 489 aiov[0].iov_base = (caddr_t)kth; 490 aiov[0].iov_len = sizeof(struct ktr_header); 491 auio.uio_resid = sizeof(struct ktr_header); 492 auio.uio_iovcnt = 1; 493 auio.uio_procp = p; 494 if (kth->ktr_len > 0) { 495 auio.uio_iovcnt++; 496 aiov[1].iov_base = kth->ktr_buf; 497 aiov[1].iov_len = kth->ktr_len; 498 auio.uio_resid += kth->ktr_len; 499 } 500 vget(vp, LK_EXCLUSIVE | LK_RETRY, p); 501 error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred); 502 if (!error) { 503 vput(vp); 504 return (0); 505 } 506 /* 507 * If error encountered, give up tracing on this vnode. 508 */ 509 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 510 error); 511 LIST_FOREACH(p, &allproc, p_list) { 512 if (p->p_tracep == vp) { 513 p->p_traceflag = 0; 514 ktrsettracevnode(p, NULL); 515 } 516 } 517 518 vput(vp); 519 return (error); 520 } 521 522 /* 523 * Return true if caller has permission to set the ktracing state 524 * of target. Essentially, the target can't possess any 525 * more permissions than the caller. KTRFAC_ROOT signifies that 526 * root previously set the tracing status on the target process, and 527 * so, only root may further change it. 528 * 529 * TODO: check groups. use caller effective gid. 530 */ 531 int 532 ktrcanset(struct proc *callp, struct proc *targetp) 533 { 534 struct pcred *caller = callp->p_cred; 535 struct pcred *target = targetp->p_cred; 536 537 if ((caller->pc_ucred->cr_uid == target->p_ruid && 538 target->p_ruid == target->p_svuid && 539 caller->p_rgid == target->p_rgid && /* XXX */ 540 target->p_rgid == target->p_svgid && 541 (targetp->p_traceflag & KTRFAC_ROOT) == 0 && 542 !ISSET(targetp->p_p->ps_flags, PS_SUGID)) || 543 caller->pc_ucred->cr_uid == 0) 544 return (1); 545 546 return (0); 547 } 548 549 #endif 550