1 /* $NetBSD: kern_ktrace.c,v 1.95 2004/10/26 06:58:05 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.95 2004/10/26 06:58:05 skrll Exp $"); 36 37 #include "opt_ktrace.h" 38 #include "opt_compat_mach.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/file.h> 44 #include <sys/namei.h> 45 #include <sys/vnode.h> 46 #include <sys/kernel.h> 47 #include <sys/kthread.h> 48 #include <sys/ktrace.h> 49 #include <sys/malloc.h> 50 #include <sys/syslog.h> 51 #include <sys/filedesc.h> 52 #include <sys/ioctl.h> 53 #include <sys/callout.h> 54 55 #include <sys/mount.h> 56 #include <sys/sa.h> 57 #include <sys/syscallargs.h> 58 59 #ifdef KTRACE 60 61 /* 62 * XXX: 63 * - need better error reporting? 64 * - p->p_tracep access lock. lock p_lock, lock ktd if !NULL, inc ref. 65 * - userland utility to sort ktrace.out by timestamp. 66 * - keep minimum information in ktrace_entry when rest of alloc failed. 67 * - enlarge ktrace_entry so that small entry won't require additional 68 * alloc? 69 * - per trace control of configurable parameters. 70 */ 71 72 struct ktrace_entry { 73 TAILQ_ENTRY(ktrace_entry) kte_list; 74 struct ktr_header kte_kth; 75 void *kte_buf; /* ktr_buf */ 76 }; 77 78 struct ktr_desc { 79 TAILQ_ENTRY(ktr_desc) ktd_list; 80 int ktd_flags; 81 #define KTDF_WAIT 0x0001 82 #define KTDF_DONE 0x0002 83 #define KTDF_BLOCKING 0x0004 84 #define KTDF_INTERACTIVE 0x0008 85 int ktd_error; 86 #define KTDE_ENOMEM 0x0001 87 #define KTDE_ENOSPC 0x0002 88 int ktd_errcnt; 89 int ktd_ref; /* # of reference */ 90 int ktd_qcount; /* # of entry in the queue */ 91 92 /* 93 * Params to control behaviour. 94 */ 95 int ktd_delayqcnt; /* # of entry allowed to delay */ 96 int ktd_wakedelay; /* delay of wakeup in *tick* */ 97 int ktd_intrwakdl; /* ditto, but when interactive */ 98 99 struct file *ktd_fp; /* trace output file */ 100 struct proc *ktd_proc; /* our kernel thread */ 101 TAILQ_HEAD(, ktrace_entry) ktd_queue; 102 struct callout ktd_wakch; /* delayed wakeup */ 103 struct simplelock ktd_slock; 104 }; 105 106 static void ktrinitheader(struct ktr_header *, struct proc *, int); 107 static void ktrwrite(struct ktr_desc *, struct ktrace_entry *); 108 static int ktrace_common(struct proc *, int, int, int, struct file *); 109 static int ktrops(struct proc *, struct proc *, int, int, 110 struct ktr_desc *); 111 static int ktrsetchildren(struct proc *, struct proc *, int, int, 112 struct ktr_desc *); 113 static int ktrcanset(struct proc *, struct proc *); 114 static int ktrsamefile(struct file *, struct file *); 115 116 static struct ktr_desc * 117 ktd_lookup(struct file *); 118 static void ktdrel(struct ktr_desc *); 119 static void ktdref(struct ktr_desc *); 120 static void ktraddentry(struct proc *, struct ktrace_entry *, int); 121 /* Flags for ktraddentry (3rd arg) */ 122 #define KTA_NOWAIT 0x0000 123 #define KTA_WAITOK 0x0001 124 #define KTA_LARGE 0x0002 125 static void ktefree(struct ktrace_entry *); 126 static void ktd_logerrl(struct ktr_desc *, int); 127 static void ktd_logerr(struct proc *, int); 128 static void ktrace_thread(void *); 129 130 /* 131 * Default vaules. 132 */ 133 #define KTD_MAXENTRY 1000 /* XXX: tune */ 134 #define KTD_TIMEOUT 5 /* XXX: tune */ 135 #define KTD_DELAYQCNT 100 /* XXX: tune */ 136 #define KTD_WAKEDELAY 5000 /* XXX: tune */ 137 #define KTD_INTRWAKDL 100 /* XXX: tune */ 138 139 /* 140 * Patchable variables. 141 */ 142 int ktd_maxentry = KTD_MAXENTRY; /* max # of entry in the queue */ 143 int ktd_timeout = KTD_TIMEOUT; /* timeout in seconds */ 144 int ktd_delayqcnt = KTD_DELAYQCNT; /* # of entry allowed to delay */ 145 int ktd_wakedelay = KTD_WAKEDELAY; /* delay of wakeup in *ms* */ 146 int ktd_intrwakdl = KTD_INTRWAKDL; /* ditto, but when interactive */ 147 148 static struct simplelock ktdq_slock = SIMPLELOCK_INITIALIZER; 149 static TAILQ_HEAD(, ktr_desc) ktdq = TAILQ_HEAD_INITIALIZER(ktdq); 150 151 MALLOC_DEFINE(M_KTRACE, "ktrace", "ktrace data buffer"); 152 POOL_INIT(kte_pool, sizeof(struct ktrace_entry), 0, 0, 0, 153 "ktepl", &pool_allocator_nointr); 154 155 static __inline void 156 ktd_wakeup(struct ktr_desc *ktd) 157 { 158 159 callout_stop(&ktd->ktd_wakch); 160 wakeup(ktd); 161 } 162 163 static void 164 ktd_logerrl(struct ktr_desc *ktd, int error) 165 { 166 167 ktd->ktd_error |= error; 168 ktd->ktd_errcnt++; 169 } 170 171 static void 172 ktd_logerr(struct proc *p, int error) 173 { 174 struct ktr_desc *ktd = p->p_tracep; 175 176 if (ktd == NULL) 177 return; 178 179 simple_lock(&ktd->ktd_slock); 180 ktd_logerrl(ktd, error); 181 simple_unlock(&ktd->ktd_slock); 182 } 183 184 /* 185 * Release a reference. Called with ktd_slock held. 186 */ 187 void 188 ktdrel(struct ktr_desc *ktd) 189 { 190 191 KDASSERT(ktd->ktd_ref != 0); 192 KASSERT(ktd->ktd_ref > 0); 193 if (--ktd->ktd_ref <= 0) { 194 ktd->ktd_flags |= KTDF_DONE; 195 wakeup(ktd); 196 } 197 simple_unlock(&ktd->ktd_slock); 198 } 199 200 void 201 ktdref(struct ktr_desc *ktd) 202 { 203 204 simple_lock(&ktd->ktd_slock); 205 ktd->ktd_ref++; 206 simple_unlock(&ktd->ktd_slock); 207 } 208 209 struct ktr_desc * 210 ktd_lookup(struct file *fp) 211 { 212 struct ktr_desc *ktd; 213 214 simple_lock(&ktdq_slock); 215 for (ktd = TAILQ_FIRST(&ktdq); ktd != NULL; 216 ktd = TAILQ_NEXT(ktd, ktd_list)) { 217 simple_lock(&ktd->ktd_slock); 218 if (ktrsamefile(ktd->ktd_fp, fp)) { 219 ktd->ktd_ref++; 220 simple_unlock(&ktd->ktd_slock); 221 break; 222 } 223 simple_unlock(&ktd->ktd_slock); 224 } 225 simple_unlock(&ktdq_slock); 226 return (ktd); 227 } 228 229 void 230 ktraddentry(struct proc *p, struct ktrace_entry *kte, int flags) 231 { 232 struct ktr_desc *ktd; 233 #ifdef DEBUG 234 struct timeval t; 235 int s; 236 #endif 237 238 if (p->p_traceflag & KTRFAC_TRC_EMUL) { 239 /* Add emulation trace before first entry for this process */ 240 p->p_traceflag &= ~KTRFAC_TRC_EMUL; 241 ktremul(p); 242 } 243 244 /* 245 * Tracing may be canceled while we were sleeping waiting for 246 * memory. 247 */ 248 ktd = p->p_tracep; 249 if (ktd == NULL) 250 goto freekte; 251 252 /* 253 * Bump reference count so that the object will remain while 254 * we are here. Note that the trace is controlled by other 255 * process. 256 */ 257 ktdref(ktd); 258 259 simple_lock(&ktd->ktd_slock); 260 if (ktd->ktd_flags & KTDF_DONE) 261 goto relktd; 262 263 if (ktd->ktd_qcount > ktd_maxentry) { 264 ktd_logerrl(ktd, KTDE_ENOSPC); 265 goto relktd; 266 } 267 TAILQ_INSERT_TAIL(&ktd->ktd_queue, kte, kte_list); 268 ktd->ktd_qcount++; 269 if (ktd->ktd_flags & KTDF_BLOCKING) 270 goto skip_sync; 271 272 if (flags & KTA_WAITOK && 273 (/* flags & KTA_LARGE */0 || ktd->ktd_flags & KTDF_WAIT || 274 ktd->ktd_qcount > ktd_maxentry >> 1)) 275 /* 276 * Sync with writer thread since we're requesting rather 277 * big one or many requests are pending. 278 */ 279 do { 280 ktd->ktd_flags |= KTDF_WAIT; 281 ktd_wakeup(ktd); 282 #ifdef DEBUG 283 s = splclock(); 284 t = mono_time; 285 splx(s); 286 #endif 287 if (ltsleep(&ktd->ktd_flags, PWAIT, "ktrsync", 288 ktd_timeout * hz, &ktd->ktd_slock) != 0) { 289 ktd->ktd_flags |= KTDF_BLOCKING; 290 /* 291 * Maybe the writer thread is blocking 292 * completely for some reason, but 293 * don't stop target process forever. 294 */ 295 log(LOG_NOTICE, "ktrace timeout\n"); 296 break; 297 } 298 #ifdef DEBUG 299 s = splclock(); 300 timersub(&mono_time, &t, &t); 301 splx(s); 302 if (t.tv_sec > 0) 303 log(LOG_NOTICE, 304 "ktrace long wait: %ld.%06ld\n", 305 t.tv_sec, t.tv_usec); 306 #endif 307 } while (p->p_tracep == ktd && 308 (ktd->ktd_flags & (KTDF_WAIT | KTDF_DONE)) == KTDF_WAIT); 309 else { 310 /* Schedule delayed wakeup */ 311 if (ktd->ktd_qcount > ktd->ktd_delayqcnt) 312 ktd_wakeup(ktd); /* Wakeup now */ 313 else if (!callout_pending(&ktd->ktd_wakch)) 314 callout_reset(&ktd->ktd_wakch, 315 ktd->ktd_flags & KTDF_INTERACTIVE ? 316 ktd->ktd_intrwakdl : ktd->ktd_wakedelay, 317 (void (*)(void *))wakeup, ktd); 318 } 319 320 skip_sync: 321 ktdrel(ktd); 322 return; 323 324 relktd: 325 ktdrel(ktd); 326 327 freekte: 328 ktefree(kte); 329 } 330 331 void 332 ktefree(struct ktrace_entry *kte) 333 { 334 struct ktr_header *kth = &kte->kte_kth; 335 336 if (kth->ktr_len > 0) 337 free(kte->kte_buf, M_KTRACE); 338 pool_put(&kte_pool, kte); 339 } 340 341 /* 342 * "deep" compare of two files for the purposes of clearing a trace. 343 * Returns true if they're the same open file, or if they point at the 344 * same underlying vnode/socket. 345 */ 346 347 int 348 ktrsamefile(struct file *f1, struct file *f2) 349 { 350 351 return ((f1 == f2) || 352 ((f1 != NULL) && (f2 != NULL) && 353 (f1->f_type == f2->f_type) && 354 (f1->f_data == f2->f_data))); 355 } 356 357 void 358 ktrderef(struct proc *p) 359 { 360 struct ktr_desc *ktd = p->p_tracep; 361 362 p->p_traceflag = 0; 363 if (ktd == NULL) 364 return; 365 p->p_tracep = NULL; 366 367 simple_lock(&ktd->ktd_slock); 368 wakeup(&ktd->ktd_flags); 369 ktdrel(ktd); 370 } 371 372 void 373 ktradref(struct proc *p) 374 { 375 struct ktr_desc *ktd = p->p_tracep; 376 377 ktdref(ktd); 378 } 379 380 void 381 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 382 { 383 384 (void)memset(kth, 0, sizeof(*kth)); 385 kth->ktr_type = type; 386 microtime(&kth->ktr_time); 387 kth->ktr_pid = p->p_pid; 388 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN); 389 } 390 391 void 392 ktrsyscall(struct proc *p, register_t code, register_t realcode, 393 const struct sysent *callp, register_t args[]) 394 { 395 struct ktrace_entry *kte; 396 struct ktr_header *kth; 397 struct ktr_syscall *ktp; 398 register_t *argp; 399 int argsize; 400 size_t len; 401 u_int i; 402 403 if (callp == NULL) 404 callp = p->p_emul->e_sysent; 405 406 argsize = callp[code].sy_argsize; 407 #ifdef _LP64 408 if (p->p_flag & P_32) 409 argsize = argsize << 1; 410 #endif 411 len = sizeof(struct ktr_syscall) + argsize; 412 413 p->p_traceflag |= KTRFAC_ACTIVE; 414 kte = pool_get(&kte_pool, PR_WAITOK); 415 kth = &kte->kte_kth; 416 ktrinitheader(kth, p, KTR_SYSCALL); 417 418 ktp = malloc(len, M_KTRACE, M_WAITOK); 419 ktp->ktr_code = realcode; 420 ktp->ktr_argsize = argsize; 421 argp = (register_t *)(ktp + 1); 422 for (i = 0; i < (argsize / sizeof(*argp)); i++) 423 *argp++ = args[i]; 424 kth->ktr_len = len; 425 kte->kte_buf = ktp; 426 427 ktraddentry(p, kte, KTA_WAITOK); 428 p->p_traceflag &= ~KTRFAC_ACTIVE; 429 } 430 431 void 432 ktrsysret(struct proc *p, register_t code, int error, register_t *retval) 433 { 434 struct ktrace_entry *kte; 435 struct ktr_header *kth; 436 struct ktr_sysret *ktp; 437 438 p->p_traceflag |= KTRFAC_ACTIVE; 439 kte = pool_get(&kte_pool, PR_WAITOK); 440 kth = &kte->kte_kth; 441 ktrinitheader(kth, p, KTR_SYSRET); 442 443 ktp = malloc(sizeof(struct ktr_sysret), M_KTRACE, M_WAITOK); 444 ktp->ktr_code = code; 445 ktp->ktr_eosys = 0; /* XXX unused */ 446 ktp->ktr_error = error; 447 ktp->ktr_retval = retval ? retval[0] : 0; 448 ktp->ktr_retval_1 = retval ? retval[1] : 0; 449 450 kth->ktr_len = sizeof(struct ktr_sysret); 451 kte->kte_buf = ktp; 452 453 ktraddentry(p, kte, KTA_WAITOK); 454 p->p_traceflag &= ~KTRFAC_ACTIVE; 455 } 456 457 /* 458 * XXX: ndp->ni_pathlen should be passed. 459 */ 460 void 461 ktrnamei(struct proc *p, char *path) 462 { 463 464 ktrkmem(p, KTR_NAMEI, path, strlen(path)); 465 } 466 467 void 468 ktremul(struct proc *p) 469 { 470 const char *emul = p->p_emul->e_name; 471 472 ktrkmem(p, KTR_EMUL, emul, strlen(emul)); 473 } 474 475 void 476 ktrkmem(struct proc *p, int type, const void *buf, size_t len) 477 { 478 struct ktrace_entry *kte; 479 struct ktr_header *kth; 480 481 p->p_traceflag |= KTRFAC_ACTIVE; 482 kte = pool_get(&kte_pool, PR_WAITOK); 483 kth = &kte->kte_kth; 484 ktrinitheader(kth, p, type); 485 486 kth->ktr_len = len; 487 kte->kte_buf = malloc(len, M_KTRACE, M_WAITOK); 488 memcpy(kte->kte_buf, buf, len); 489 490 ktraddentry(p, kte, KTA_WAITOK); 491 p->p_traceflag &= ~KTRFAC_ACTIVE; 492 } 493 494 void 495 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, 496 int len, int error) 497 { 498 struct ktrace_entry *kte; 499 struct ktr_header *kth; 500 struct ktr_genio *ktp; 501 caddr_t cp; 502 int resid = len, cnt; 503 int buflen; 504 505 if (error) 506 return; 507 508 p->p_traceflag |= KTRFAC_ACTIVE; 509 510 next: 511 buflen = min(PAGE_SIZE, resid + sizeof(struct ktr_genio)); 512 513 kte = pool_get(&kte_pool, PR_WAITOK); 514 kth = &kte->kte_kth; 515 ktrinitheader(kth, p, KTR_GENIO); 516 517 ktp = malloc(buflen, M_KTRACE, M_WAITOK); 518 ktp->ktr_fd = fd; 519 ktp->ktr_rw = rw; 520 521 kte->kte_buf = ktp; 522 523 cp = (caddr_t)(ktp + 1); 524 buflen -= sizeof(struct ktr_genio); 525 kth->ktr_len = sizeof(struct ktr_genio); 526 527 while (buflen > 0) { 528 cnt = min(iov->iov_len, buflen); 529 if (copyin(iov->iov_base, cp, cnt) != 0) 530 goto out; 531 kth->ktr_len += cnt; 532 buflen -= cnt; 533 resid -= cnt; 534 iov->iov_len -= cnt; 535 if (iov->iov_len == 0) 536 iov++; 537 else 538 iov->iov_base = (caddr_t)iov->iov_base + cnt; 539 } 540 541 /* 542 * Don't push so many entry at once. It will cause kmem map 543 * shortage. 544 */ 545 ktraddentry(p, kte, KTA_WAITOK | KTA_LARGE); 546 if (resid > 0) { 547 #if 0 /* XXX NJWLWP */ 548 KDASSERT(p->p_cpu != NULL); 549 KDASSERT(p->p_cpu == curcpu()); 550 #endif 551 /* XXX NJWLWP */ 552 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 553 preempt(1); 554 555 goto next; 556 } 557 558 p->p_traceflag &= ~KTRFAC_ACTIVE; 559 return; 560 561 out: 562 ktefree(kte); 563 p->p_traceflag &= ~KTRFAC_ACTIVE; 564 } 565 566 void 567 ktrpsig(struct proc *p, int sig, sig_t action, const sigset_t *mask, 568 const ksiginfo_t *ksi) 569 { 570 struct ktrace_entry *kte; 571 struct ktr_header *kth; 572 struct { 573 struct ktr_psig kp; 574 siginfo_t si; 575 } *kbuf; 576 577 p->p_traceflag |= KTRFAC_ACTIVE; 578 kte = pool_get(&kte_pool, PR_WAITOK); 579 kth = &kte->kte_kth; 580 ktrinitheader(kth, p, KTR_PSIG); 581 582 kbuf = malloc(sizeof(*kbuf), M_KTRACE, M_WAITOK); 583 kbuf->kp.signo = (char)sig; 584 kbuf->kp.action = action; 585 kbuf->kp.mask = *mask; 586 kte->kte_buf = kbuf; 587 if (ksi) { 588 kbuf->kp.code = KSI_TRAPCODE(ksi); 589 (void)memset(&kbuf->si, 0, sizeof(kbuf->si)); 590 kbuf->si._info = ksi->ksi_info; 591 kth->ktr_len = sizeof(*kbuf); 592 } else { 593 kbuf->kp.code = 0; 594 kth->ktr_len = sizeof(struct ktr_psig); 595 } 596 597 ktraddentry(p, kte, KTA_WAITOK); 598 p->p_traceflag &= ~KTRFAC_ACTIVE; 599 } 600 601 void 602 ktrcsw(struct proc *p, int out, int user) 603 { 604 struct ktrace_entry *kte; 605 struct ktr_header *kth; 606 struct ktr_csw *kc; 607 608 p->p_traceflag |= KTRFAC_ACTIVE; 609 610 /* 611 * We can't sleep if we're already going to sleep (if original 612 * condition is met during sleep, we hang up). 613 */ 614 kte = pool_get(&kte_pool, out ? PR_NOWAIT : PR_WAITOK); 615 if (kte == NULL) { 616 ktd_logerr(p, KTDE_ENOMEM); 617 goto out; 618 } 619 kth = &kte->kte_kth; 620 ktrinitheader(kth, p, KTR_CSW); 621 622 kc = malloc(sizeof(struct ktr_csw), M_KTRACE, 623 out ? M_NOWAIT : M_WAITOK); 624 if (kc == NULL) { 625 ktd_logerr(p, KTDE_ENOMEM); 626 goto free_kte; 627 } 628 kc->out = out; 629 kc->user = user; 630 kth->ktr_len = sizeof(struct ktr_csw); 631 kte->kte_buf = kc; 632 633 ktraddentry(p, kte, out ? KTA_NOWAIT : KTA_WAITOK); 634 p->p_traceflag &= ~KTRFAC_ACTIVE; 635 return; 636 637 free_kte: 638 pool_put(&kte_pool, kte); 639 out: 640 p->p_traceflag &= ~KTRFAC_ACTIVE; 641 } 642 643 void 644 ktruser(struct proc *p, const char *id, void *addr, size_t len, int ustr) 645 { 646 struct ktrace_entry *kte; 647 struct ktr_header *kth; 648 struct ktr_user *ktp; 649 caddr_t user_dta; 650 651 p->p_traceflag |= KTRFAC_ACTIVE; 652 kte = pool_get(&kte_pool, PR_WAITOK); 653 kth = &kte->kte_kth; 654 ktrinitheader(kth, p, KTR_USER); 655 656 ktp = malloc(sizeof(struct ktr_user) + len, M_KTRACE, M_WAITOK); 657 if (ustr) { 658 if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0) 659 ktp->ktr_id[0] = '\0'; 660 } else 661 strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN); 662 ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0'; 663 664 user_dta = (caddr_t)(ktp + 1); 665 if (copyin(addr, (void *)user_dta, len) != 0) 666 len = 0; 667 668 kth->ktr_len = sizeof(struct ktr_user) + len; 669 kte->kte_buf = ktp; 670 671 ktraddentry(p, kte, KTA_WAITOK); 672 p->p_traceflag &= ~KTRFAC_ACTIVE; 673 } 674 675 void 676 ktrmmsg(struct proc *p, const void *msgh, size_t size) 677 { 678 679 ktrkmem(p, KTR_MMSG, msgh, size); 680 } 681 682 void 683 ktrmool(struct proc *p, const void *kaddr, size_t size, const void *uaddr) 684 { 685 struct ktrace_entry *kte; 686 struct ktr_header *kth; 687 struct ktr_mool *kp; 688 struct ktr_mool *buf; 689 690 p->p_traceflag |= KTRFAC_ACTIVE; 691 kte = pool_get(&kte_pool, PR_WAITOK); 692 kth = &kte->kte_kth; 693 ktrinitheader(kth, p, KTR_MOOL); 694 695 kp = malloc(size + sizeof(*kp), M_KTRACE, M_WAITOK); 696 kp->uaddr = uaddr; 697 kp->size = size; 698 buf = kp + 1; /* Skip uaddr and size */ 699 (void)memcpy(buf, kaddr, size); 700 701 kth->ktr_len = size + sizeof(*kp); 702 kte->kte_buf = kp; 703 704 ktraddentry(p, kte, KTA_WAITOK); 705 p->p_traceflag &= ~KTRFAC_ACTIVE; 706 } 707 708 709 /* Interface and common routines */ 710 711 int 712 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp) 713 { 714 struct proc *p; 715 struct pgrp *pg; 716 struct ktr_desc *ktd = NULL; 717 int ret = 0; 718 int error = 0; 719 int descend; 720 721 curp->p_traceflag |= KTRFAC_ACTIVE; 722 descend = ops & KTRFLAG_DESCEND; 723 facs = facs & ~((unsigned) KTRFAC_ROOT); 724 725 switch (KTROP(ops)) { 726 727 case KTROP_CLEARFILE: 728 /* 729 * Clear all uses of the tracefile 730 */ 731 732 ktd = ktd_lookup(fp); 733 if (ktd == NULL) 734 goto done; 735 736 proclist_lock_read(); 737 PROCLIST_FOREACH(p, &allproc) { 738 if (p->p_tracep == ktd) { 739 if (ktrcanset(curp, p)) 740 ktrderef(p); 741 else 742 error = EPERM; 743 } 744 } 745 proclist_unlock_read(); 746 goto done; 747 748 case KTROP_SET: 749 ktd = ktd_lookup(fp); 750 if (ktd == NULL) { 751 ktd = malloc(sizeof(struct ktr_desc), 752 M_KTRACE, M_WAITOK); 753 TAILQ_INIT(&ktd->ktd_queue); 754 simple_lock_init(&ktd->ktd_slock); 755 callout_init(&ktd->ktd_wakch); 756 ktd->ktd_flags = ktd->ktd_qcount = 757 ktd->ktd_error = ktd->ktd_errcnt = 0; 758 ktd->ktd_ref = 1; 759 ktd->ktd_delayqcnt = ktd_delayqcnt; 760 ktd->ktd_wakedelay = mstohz(ktd_wakedelay); 761 ktd->ktd_intrwakdl = mstohz(ktd_intrwakdl); 762 /* 763 * XXX: not correct. needs an way to detect 764 * whether ktruss or ktrace. 765 */ 766 if (fp->f_type == DTYPE_PIPE) 767 ktd->ktd_flags |= KTDF_INTERACTIVE; 768 769 error = kthread_create1(ktrace_thread, ktd, 770 &ktd->ktd_proc, "ktr %p", ktd); 771 if (error != 0) { 772 free(ktd, M_KTRACE); 773 goto done; 774 } 775 776 simple_lock(&fp->f_slock); 777 fp->f_count++; 778 simple_unlock(&fp->f_slock); 779 ktd->ktd_fp = fp; 780 781 simple_lock(&ktdq_slock); 782 TAILQ_INSERT_TAIL(&ktdq, ktd, ktd_list); 783 simple_unlock(&ktdq_slock); 784 } 785 break; 786 787 case KTROP_CLEAR: 788 break; 789 } 790 791 /* 792 * need something to (un)trace (XXX - why is this here?) 793 */ 794 if (!facs) { 795 error = EINVAL; 796 goto done; 797 } 798 799 /* 800 * do it 801 */ 802 if (pid < 0) { 803 /* 804 * by process group 805 */ 806 pg = pg_find(-pid, PFIND_UNLOCK_FAIL); 807 if (pg == NULL) { 808 error = ESRCH; 809 goto done; 810 } 811 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 812 if (descend) 813 ret |= ktrsetchildren(curp, p, ops, facs, ktd); 814 else 815 ret |= ktrops(curp, p, ops, facs, ktd); 816 } 817 818 } else { 819 /* 820 * by pid 821 */ 822 p = p_find(pid, PFIND_UNLOCK_FAIL); 823 if (p == NULL) { 824 error = ESRCH; 825 goto done; 826 } 827 if (descend) 828 ret |= ktrsetchildren(curp, p, ops, facs, ktd); 829 else 830 ret |= ktrops(curp, p, ops, facs, ktd); 831 } 832 proclist_unlock_read(); /* taken by p{g}_find */ 833 if (!ret) 834 error = EPERM; 835 done: 836 if (error != 0 && ktd != NULL) 837 /* 838 * Wakup the thread so that it can be die if we 839 * can't trace any process. 840 */ 841 ktd_wakeup(ktd); 842 if (KTROP(ops) == KTROP_SET || 843 KTROP(ops) == KTROP_CLEARFILE) { 844 simple_lock(&ktd->ktd_slock); 845 ktdrel(ktd); 846 } 847 curp->p_traceflag &= ~KTRFAC_ACTIVE; 848 return (error); 849 } 850 851 /* 852 * fktrace system call 853 */ 854 /* ARGSUSED */ 855 int 856 sys_fktrace(struct lwp *l, void *v, register_t *retval) 857 { 858 struct sys_fktrace_args /* { 859 syscallarg(int) fd; 860 syscallarg(int) ops; 861 syscallarg(int) facs; 862 syscallarg(int) pid; 863 } */ *uap = v; 864 struct proc *curp = l->l_proc; 865 struct file *fp = NULL; 866 struct filedesc *fdp = curp->p_fd; 867 int error; 868 869 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 870 return (EBADF); 871 872 FILE_USE(fp); 873 874 if ((fp->f_flag & FWRITE) == 0) 875 error = EBADF; 876 else 877 error = ktrace_common(curp, SCARG(uap, ops), 878 SCARG(uap, facs), SCARG(uap, pid), fp); 879 880 FILE_UNUSE(fp, curp); 881 882 return error; 883 } 884 885 /* 886 * ktrace system call 887 */ 888 /* ARGSUSED */ 889 int 890 sys_ktrace(struct lwp *l, void *v, register_t *retval) 891 { 892 struct sys_ktrace_args /* { 893 syscallarg(const char *) fname; 894 syscallarg(int) ops; 895 syscallarg(int) facs; 896 syscallarg(int) pid; 897 } */ *uap = v; 898 struct proc *curp = l->l_proc; 899 struct vnode *vp = NULL; 900 struct file *fp = NULL; 901 int fd; 902 int ops = SCARG(uap, ops); 903 int error = 0; 904 struct nameidata nd; 905 906 ops = KTROP(ops) | (ops & KTRFLAG_DESCEND); 907 908 curp->p_traceflag |= KTRFAC_ACTIVE; 909 if ((ops & KTROP_CLEAR) == 0) { 910 /* 911 * an operation which requires a file argument. 912 */ 913 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname), 914 curp); 915 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 916 curp->p_traceflag &= ~KTRFAC_ACTIVE; 917 return (error); 918 } 919 vp = nd.ni_vp; 920 VOP_UNLOCK(vp, 0); 921 if (vp->v_type != VREG) { 922 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); 923 curp->p_traceflag &= ~KTRFAC_ACTIVE; 924 return (EACCES); 925 } 926 /* 927 * XXX This uses up a file descriptor slot in the 928 * tracing process for the duration of this syscall. 929 * This is not expected to be a problem. If 930 * falloc(NULL, ...) DTRT we could skip that part, but 931 * that would require changing its interface to allow 932 * the caller to pass in a ucred.. 933 * 934 * This will FILE_USE the fp it returns, if any. 935 * Keep it in use until we return. 936 */ 937 if ((error = falloc(curp, &fp, &fd)) != 0) 938 goto done; 939 940 fp->f_flag = FWRITE; 941 fp->f_type = DTYPE_VNODE; 942 fp->f_ops = &vnops; 943 fp->f_data = (caddr_t)vp; 944 FILE_SET_MATURE(fp); 945 vp = NULL; 946 } 947 error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs), 948 SCARG(uap, pid), fp); 949 done: 950 if (vp != NULL) 951 (void) vn_close(vp, FWRITE, curp->p_ucred, curp); 952 if (fp != NULL) { 953 FILE_UNUSE(fp, curp); /* release file */ 954 fdrelease(curp, fd); /* release fd table slot */ 955 } 956 return (error); 957 } 958 959 int 960 ktrops(struct proc *curp, struct proc *p, int ops, int facs, 961 struct ktr_desc *ktd) 962 { 963 964 if (!ktrcanset(curp, p)) 965 return (0); 966 if (KTROP(ops) == KTROP_SET) { 967 if (p->p_tracep != ktd) { 968 /* 969 * if trace file already in use, relinquish 970 */ 971 ktrderef(p); 972 p->p_tracep = ktd; 973 ktradref(p); 974 } 975 p->p_traceflag |= facs; 976 if (curp->p_ucred->cr_uid == 0) 977 p->p_traceflag |= KTRFAC_ROOT; 978 } else { 979 /* KTROP_CLEAR */ 980 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 981 /* no more tracing */ 982 ktrderef(p); 983 } 984 } 985 986 /* 987 * Emit an emulation record, every time there is a ktrace 988 * change/attach request. 989 */ 990 if (KTRPOINT(p, KTR_EMUL)) 991 p->p_traceflag |= KTRFAC_TRC_EMUL; 992 #ifdef __HAVE_SYSCALL_INTERN 993 (*p->p_emul->e_syscall_intern)(p); 994 #endif 995 996 return (1); 997 } 998 999 int 1000 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs, 1001 struct ktr_desc *ktd) 1002 { 1003 struct proc *p; 1004 int ret = 0; 1005 1006 p = top; 1007 for (;;) { 1008 ret |= ktrops(curp, p, ops, facs, ktd); 1009 /* 1010 * If this process has children, descend to them next, 1011 * otherwise do any siblings, and if done with this level, 1012 * follow back up the tree (but not past top). 1013 */ 1014 if (LIST_FIRST(&p->p_children) != NULL) { 1015 p = LIST_FIRST(&p->p_children); 1016 continue; 1017 } 1018 for (;;) { 1019 if (p == top) 1020 return (ret); 1021 if (LIST_NEXT(p, p_sibling) != NULL) { 1022 p = LIST_NEXT(p, p_sibling); 1023 break; 1024 } 1025 p = p->p_pptr; 1026 } 1027 } 1028 /*NOTREACHED*/ 1029 } 1030 1031 void 1032 ktrwrite(struct ktr_desc *ktd, struct ktrace_entry *kte) 1033 { 1034 struct uio auio; 1035 struct iovec aiov[64], *iov; 1036 struct ktrace_entry *top = kte; 1037 struct ktr_header *kth; 1038 struct file *fp = ktd->ktd_fp; 1039 struct proc *p; 1040 int error; 1041 1042 next: 1043 auio.uio_iov = iov = &aiov[0]; 1044 auio.uio_offset = 0; 1045 auio.uio_segflg = UIO_SYSSPACE; 1046 auio.uio_rw = UIO_WRITE; 1047 auio.uio_resid = 0; 1048 auio.uio_iovcnt = 0; 1049 auio.uio_procp = NULL; 1050 do { 1051 kth = &kte->kte_kth; 1052 iov->iov_base = (caddr_t)kth; 1053 iov++->iov_len = sizeof(struct ktr_header); 1054 auio.uio_resid += sizeof(struct ktr_header); 1055 auio.uio_iovcnt++; 1056 if (kth->ktr_len > 0) { 1057 iov->iov_base = kte->kte_buf; 1058 iov++->iov_len = kth->ktr_len; 1059 auio.uio_resid += kth->ktr_len; 1060 auio.uio_iovcnt++; 1061 } 1062 } while ((kte = TAILQ_NEXT(kte, kte_list)) != NULL && 1063 auio.uio_iovcnt < sizeof(aiov) / sizeof(aiov[0]) - 1); 1064 1065 again: 1066 simple_lock(&fp->f_slock); 1067 FILE_USE(fp); 1068 error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio, 1069 fp->f_cred, FOF_UPDATE_OFFSET); 1070 FILE_UNUSE(fp, NULL); 1071 switch (error) { 1072 1073 case 0: 1074 if (auio.uio_resid > 0) 1075 goto again; 1076 if (kte != NULL) 1077 goto next; 1078 break; 1079 1080 case EWOULDBLOCK: 1081 preempt(1); 1082 goto again; 1083 1084 default: 1085 /* 1086 * If error encountered, give up tracing on this 1087 * vnode. Don't report EPIPE as this can easily 1088 * happen with fktrace()/ktruss. 1089 */ 1090 #ifndef DEBUG 1091 if (error != EPIPE) 1092 #endif 1093 log(LOG_NOTICE, 1094 "ktrace write failed, errno %d, tracing stopped\n", 1095 error); 1096 proclist_lock_read(); 1097 PROCLIST_FOREACH(p, &allproc) { 1098 if (p->p_tracep == ktd) 1099 ktrderef(p); 1100 } 1101 proclist_unlock_read(); 1102 } 1103 1104 while ((kte = top) != NULL) { 1105 top = TAILQ_NEXT(top, kte_list); 1106 ktefree(kte); 1107 } 1108 } 1109 1110 void 1111 ktrace_thread(void *arg) 1112 { 1113 struct ktr_desc *ktd = arg; 1114 struct file *fp = ktd->ktd_fp; 1115 struct ktrace_entry *kte; 1116 int ktrerr, errcnt; 1117 1118 for (;;) { 1119 simple_lock(&ktd->ktd_slock); 1120 kte = TAILQ_FIRST(&ktd->ktd_queue); 1121 if (kte == NULL) { 1122 if (ktd->ktd_flags & KTDF_WAIT) { 1123 ktd->ktd_flags &= ~(KTDF_WAIT | KTDF_BLOCKING); 1124 wakeup(&ktd->ktd_flags); 1125 } 1126 if (ktd->ktd_ref == 0) 1127 break; 1128 ltsleep(ktd, PWAIT | PNORELOCK, "ktrwait", 0, 1129 &ktd->ktd_slock); 1130 continue; 1131 } 1132 TAILQ_INIT(&ktd->ktd_queue); 1133 ktd->ktd_qcount = 0; 1134 ktrerr = ktd->ktd_error; 1135 errcnt = ktd->ktd_errcnt; 1136 ktd->ktd_error = ktd->ktd_errcnt = 0; 1137 simple_unlock(&ktd->ktd_slock); 1138 1139 if (ktrerr) { 1140 log(LOG_NOTICE, 1141 "ktrace failed, fp %p, error 0x%x, total %d\n", 1142 fp, ktrerr, errcnt); 1143 } 1144 ktrwrite(ktd, kte); 1145 } 1146 simple_unlock(&ktd->ktd_slock); 1147 1148 simple_lock(&ktdq_slock); 1149 TAILQ_REMOVE(&ktdq, ktd, ktd_list); 1150 simple_unlock(&ktdq_slock); 1151 1152 simple_lock(&fp->f_slock); 1153 FILE_USE(fp); 1154 1155 /* 1156 * ktrace file descriptor can't be watched (are not visible to 1157 * userspace), so no kqueue stuff here 1158 * XXX: The above comment is wrong, because the fktrace file 1159 * descriptor is available in userland. 1160 */ 1161 closef(fp, NULL); 1162 1163 callout_stop(&ktd->ktd_wakch); 1164 free(ktd, M_KTRACE); 1165 1166 kthread_exit(0); 1167 } 1168 1169 /* 1170 * Return true if caller has permission to set the ktracing state 1171 * of target. Essentially, the target can't possess any 1172 * more permissions than the caller. KTRFAC_ROOT signifies that 1173 * root previously set the tracing status on the target process, and 1174 * so, only root may further change it. 1175 * 1176 * TODO: check groups. use caller effective gid. 1177 */ 1178 int 1179 ktrcanset(struct proc *callp, struct proc *targetp) 1180 { 1181 struct pcred *caller = callp->p_cred; 1182 struct pcred *target = targetp->p_cred; 1183 1184 if ((caller->pc_ucred->cr_uid == target->p_ruid && 1185 target->p_ruid == target->p_svuid && 1186 caller->p_rgid == target->p_rgid && /* XXX */ 1187 target->p_rgid == target->p_svgid && 1188 (targetp->p_traceflag & KTRFAC_ROOT) == 0 && 1189 (targetp->p_flag & P_SUGID) == 0) || 1190 caller->pc_ucred->cr_uid == 0) 1191 return (1); 1192 1193 return (0); 1194 } 1195 #endif /* KTRACE */ 1196 1197 /* 1198 * Put user defined entry to ktrace records. 1199 */ 1200 int 1201 sys_utrace(struct lwp *l, void *v, register_t *retval) 1202 { 1203 #ifdef KTRACE 1204 struct sys_utrace_args /* { 1205 syscallarg(const char *) label; 1206 syscallarg(void *) addr; 1207 syscallarg(size_t) len; 1208 } */ *uap = v; 1209 struct proc *p = l->l_proc; 1210 1211 if (!KTRPOINT(p, KTR_USER)) 1212 return (0); 1213 1214 if (SCARG(uap, len) > KTR_USER_MAXLEN) 1215 return (EINVAL); 1216 1217 ktruser(p, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1); 1218 1219 return (0); 1220 #else /* !KTRACE */ 1221 return ENOSYS; 1222 #endif /* KTRACE */ 1223 } 1224