1 /* $NetBSD: kern_ktrace.c,v 1.99 2005/12/13 13:12:18 reinoud Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.99 2005/12/13 13:12:18 reinoud Exp $"); 36 37 #include "opt_ktrace.h" 38 #include "opt_compat_mach.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/file.h> 44 #include <sys/namei.h> 45 #include <sys/vnode.h> 46 #include <sys/kernel.h> 47 #include <sys/kthread.h> 48 #include <sys/ktrace.h> 49 #include <sys/malloc.h> 50 #include <sys/syslog.h> 51 #include <sys/filedesc.h> 52 #include <sys/ioctl.h> 53 #include <sys/callout.h> 54 55 #include <sys/mount.h> 56 #include <sys/sa.h> 57 #include <sys/syscallargs.h> 58 59 #ifdef KTRACE 60 61 /* 62 * XXX: 63 * - need better error reporting? 64 * - p->p_tracep access lock. lock p_lock, lock ktd if !NULL, inc ref. 65 * - userland utility to sort ktrace.out by timestamp. 66 * - keep minimum information in ktrace_entry when rest of alloc failed. 67 * - enlarge ktrace_entry so that small entry won't require additional 68 * alloc? 69 * - per trace control of configurable parameters. 70 */ 71 72 struct ktrace_entry { 73 TAILQ_ENTRY(ktrace_entry) kte_list; 74 struct ktr_header kte_kth; 75 void *kte_buf; 76 }; 77 78 struct ktr_desc { 79 TAILQ_ENTRY(ktr_desc) ktd_list; 80 int ktd_flags; 81 #define KTDF_WAIT 0x0001 82 #define KTDF_DONE 0x0002 83 #define KTDF_BLOCKING 0x0004 84 #define KTDF_INTERACTIVE 0x0008 85 int ktd_error; 86 #define KTDE_ENOMEM 0x0001 87 #define KTDE_ENOSPC 0x0002 88 int ktd_errcnt; 89 int ktd_ref; /* # of reference */ 90 int ktd_qcount; /* # of entry in the queue */ 91 92 /* 93 * Params to control behaviour. 94 */ 95 int ktd_delayqcnt; /* # of entry allowed to delay */ 96 int ktd_wakedelay; /* delay of wakeup in *tick* */ 97 int ktd_intrwakdl; /* ditto, but when interactive */ 98 99 struct file *ktd_fp; /* trace output file */ 100 struct proc *ktd_proc; /* our kernel thread */ 101 TAILQ_HEAD(, ktrace_entry) ktd_queue; 102 struct callout ktd_wakch; /* delayed wakeup */ 103 struct simplelock ktd_slock; 104 }; 105 106 static void ktrinitheader(struct ktr_header *, struct lwp *, int); 107 static void ktrwrite(struct ktr_desc *, struct ktrace_entry *); 108 static int ktrace_common(struct proc *, int, int, int, struct file *); 109 static int ktrops(struct proc *, struct proc *, int, int, 110 struct ktr_desc *); 111 static int ktrsetchildren(struct proc *, struct proc *, int, int, 112 struct ktr_desc *); 113 static int ktrcanset(struct proc *, struct proc *); 114 static int ktrsamefile(struct file *, struct file *); 115 116 static struct ktr_desc * 117 ktd_lookup(struct file *); 118 static void ktdrel(struct ktr_desc *); 119 static void ktdref(struct ktr_desc *); 120 static void ktraddentry(struct lwp *, struct ktrace_entry *, int); 121 /* Flags for ktraddentry (3rd arg) */ 122 #define KTA_NOWAIT 0x0000 123 #define KTA_WAITOK 0x0001 124 #define KTA_LARGE 0x0002 125 static void ktefree(struct ktrace_entry *); 126 static void ktd_logerrl(struct ktr_desc *, int); 127 static void ktd_logerr(struct proc *, int); 128 static void ktrace_thread(void *); 129 130 /* 131 * Default vaules. 132 */ 133 #define KTD_MAXENTRY 1000 /* XXX: tune */ 134 #define KTD_TIMEOUT 5 /* XXX: tune */ 135 #define KTD_DELAYQCNT 100 /* XXX: tune */ 136 #define KTD_WAKEDELAY 5000 /* XXX: tune */ 137 #define KTD_INTRWAKDL 100 /* XXX: tune */ 138 139 /* 140 * Patchable variables. 141 */ 142 int ktd_maxentry = KTD_MAXENTRY; /* max # of entry in the queue */ 143 int ktd_timeout = KTD_TIMEOUT; /* timeout in seconds */ 144 int ktd_delayqcnt = KTD_DELAYQCNT; /* # of entry allowed to delay */ 145 int ktd_wakedelay = KTD_WAKEDELAY; /* delay of wakeup in *ms* */ 146 int ktd_intrwakdl = KTD_INTRWAKDL; /* ditto, but when interactive */ 147 148 static struct simplelock ktdq_slock = SIMPLELOCK_INITIALIZER; 149 static TAILQ_HEAD(, ktr_desc) ktdq = TAILQ_HEAD_INITIALIZER(ktdq); 150 151 MALLOC_DEFINE(M_KTRACE, "ktrace", "ktrace data buffer"); 152 POOL_INIT(kte_pool, sizeof(struct ktrace_entry), 0, 0, 0, 153 "ktepl", &pool_allocator_nointr); 154 155 static __inline void 156 ktd_wakeup(struct ktr_desc *ktd) 157 { 158 159 callout_stop(&ktd->ktd_wakch); 160 wakeup(ktd); 161 } 162 163 static void 164 ktd_logerrl(struct ktr_desc *ktd, int error) 165 { 166 167 ktd->ktd_error |= error; 168 ktd->ktd_errcnt++; 169 } 170 171 static void 172 ktd_logerr(struct proc *p, int error) 173 { 174 struct ktr_desc *ktd = p->p_tracep; 175 176 if (ktd == NULL) 177 return; 178 179 simple_lock(&ktd->ktd_slock); 180 ktd_logerrl(ktd, error); 181 simple_unlock(&ktd->ktd_slock); 182 } 183 184 /* 185 * Release a reference. Called with ktd_slock held. 186 */ 187 void 188 ktdrel(struct ktr_desc *ktd) 189 { 190 191 KDASSERT(ktd->ktd_ref != 0); 192 KASSERT(ktd->ktd_ref > 0); 193 if (--ktd->ktd_ref <= 0) { 194 ktd->ktd_flags |= KTDF_DONE; 195 wakeup(ktd); 196 } 197 simple_unlock(&ktd->ktd_slock); 198 } 199 200 void 201 ktdref(struct ktr_desc *ktd) 202 { 203 204 simple_lock(&ktd->ktd_slock); 205 ktd->ktd_ref++; 206 simple_unlock(&ktd->ktd_slock); 207 } 208 209 struct ktr_desc * 210 ktd_lookup(struct file *fp) 211 { 212 struct ktr_desc *ktd; 213 214 simple_lock(&ktdq_slock); 215 for (ktd = TAILQ_FIRST(&ktdq); ktd != NULL; 216 ktd = TAILQ_NEXT(ktd, ktd_list)) { 217 simple_lock(&ktd->ktd_slock); 218 if (ktrsamefile(ktd->ktd_fp, fp)) { 219 ktd->ktd_ref++; 220 simple_unlock(&ktd->ktd_slock); 221 break; 222 } 223 simple_unlock(&ktd->ktd_slock); 224 } 225 simple_unlock(&ktdq_slock); 226 return (ktd); 227 } 228 229 void 230 ktraddentry(struct lwp *l, struct ktrace_entry *kte, int flags) 231 { 232 struct proc *p = l->l_proc; 233 struct ktr_desc *ktd; 234 #ifdef DEBUG 235 struct timeval t; 236 int s; 237 #endif 238 239 if (p->p_traceflag & KTRFAC_TRC_EMUL) { 240 /* Add emulation trace before first entry for this process */ 241 p->p_traceflag &= ~KTRFAC_TRC_EMUL; 242 ktremul(l); 243 } 244 245 /* 246 * Tracing may be canceled while we were sleeping waiting for 247 * memory. 248 */ 249 ktd = p->p_tracep; 250 if (ktd == NULL) 251 goto freekte; 252 253 /* 254 * Bump reference count so that the object will remain while 255 * we are here. Note that the trace is controlled by other 256 * process. 257 */ 258 ktdref(ktd); 259 260 simple_lock(&ktd->ktd_slock); 261 if (ktd->ktd_flags & KTDF_DONE) 262 goto relktd; 263 264 if (ktd->ktd_qcount > ktd_maxentry) { 265 ktd_logerrl(ktd, KTDE_ENOSPC); 266 goto relktd; 267 } 268 TAILQ_INSERT_TAIL(&ktd->ktd_queue, kte, kte_list); 269 ktd->ktd_qcount++; 270 if (ktd->ktd_flags & KTDF_BLOCKING) 271 goto skip_sync; 272 273 if (flags & KTA_WAITOK && 274 (/* flags & KTA_LARGE */0 || ktd->ktd_flags & KTDF_WAIT || 275 ktd->ktd_qcount > ktd_maxentry >> 1)) 276 /* 277 * Sync with writer thread since we're requesting rather 278 * big one or many requests are pending. 279 */ 280 do { 281 ktd->ktd_flags |= KTDF_WAIT; 282 ktd_wakeup(ktd); 283 #ifdef DEBUG 284 s = splclock(); 285 t = mono_time; 286 splx(s); 287 #endif 288 if (ltsleep(&ktd->ktd_flags, PWAIT, "ktrsync", 289 ktd_timeout * hz, &ktd->ktd_slock) != 0) { 290 ktd->ktd_flags |= KTDF_BLOCKING; 291 /* 292 * Maybe the writer thread is blocking 293 * completely for some reason, but 294 * don't stop target process forever. 295 */ 296 log(LOG_NOTICE, "ktrace timeout\n"); 297 break; 298 } 299 #ifdef DEBUG 300 s = splclock(); 301 timersub(&mono_time, &t, &t); 302 splx(s); 303 if (t.tv_sec > 0) 304 log(LOG_NOTICE, 305 "ktrace long wait: %ld.%06ld\n", 306 t.tv_sec, t.tv_usec); 307 #endif 308 } while (p->p_tracep == ktd && 309 (ktd->ktd_flags & (KTDF_WAIT | KTDF_DONE)) == KTDF_WAIT); 310 else { 311 /* Schedule delayed wakeup */ 312 if (ktd->ktd_qcount > ktd->ktd_delayqcnt) 313 ktd_wakeup(ktd); /* Wakeup now */ 314 else if (!callout_pending(&ktd->ktd_wakch)) 315 callout_reset(&ktd->ktd_wakch, 316 ktd->ktd_flags & KTDF_INTERACTIVE ? 317 ktd->ktd_intrwakdl : ktd->ktd_wakedelay, 318 (void (*)(void *))wakeup, ktd); 319 } 320 321 skip_sync: 322 ktdrel(ktd); 323 return; 324 325 relktd: 326 ktdrel(ktd); 327 328 freekte: 329 ktefree(kte); 330 } 331 332 void 333 ktefree(struct ktrace_entry *kte) 334 { 335 336 if (kte->kte_buf != NULL) 337 free(kte->kte_buf, M_KTRACE); 338 pool_put(&kte_pool, kte); 339 } 340 341 /* 342 * "deep" compare of two files for the purposes of clearing a trace. 343 * Returns true if they're the same open file, or if they point at the 344 * same underlying vnode/socket. 345 */ 346 347 int 348 ktrsamefile(struct file *f1, struct file *f2) 349 { 350 351 return ((f1 == f2) || 352 ((f1 != NULL) && (f2 != NULL) && 353 (f1->f_type == f2->f_type) && 354 (f1->f_data == f2->f_data))); 355 } 356 357 void 358 ktrderef(struct proc *p) 359 { 360 struct ktr_desc *ktd = p->p_tracep; 361 362 p->p_traceflag = 0; 363 if (ktd == NULL) 364 return; 365 p->p_tracep = NULL; 366 367 simple_lock(&ktd->ktd_slock); 368 wakeup(&ktd->ktd_flags); 369 ktdrel(ktd); 370 } 371 372 void 373 ktradref(struct proc *p) 374 { 375 struct ktr_desc *ktd = p->p_tracep; 376 377 ktdref(ktd); 378 } 379 380 void 381 ktrinitheader(struct ktr_header *kth, struct lwp *l, int type) 382 { 383 struct proc *p = l->l_proc; 384 385 (void)memset(kth, 0, sizeof(*kth)); 386 kth->ktr_type = type; 387 kth->ktr_pid = p->p_pid; 388 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN); 389 390 kth->ktr_version = KTRFAC_VERSION(p->p_traceflag); 391 392 switch (KTRFAC_VERSION(p->p_traceflag)) { 393 case 0: 394 /* This is the original format */ 395 microtime(&kth->ktr_tv); 396 break; 397 case 1: 398 kth->ktr_lid = l->l_lid; 399 nanotime(&kth->ktr_time); 400 break; 401 default: 402 break; 403 } 404 } 405 406 void 407 ktrsyscall(struct lwp *l, register_t code, register_t realcode, 408 const struct sysent *callp, register_t args[]) 409 { 410 struct proc *p = l->l_proc; 411 struct ktrace_entry *kte; 412 struct ktr_header *kth; 413 struct ktr_syscall *ktp; 414 register_t *argp; 415 int argsize; 416 size_t len; 417 u_int i; 418 419 if (callp == NULL) 420 callp = p->p_emul->e_sysent; 421 422 argsize = callp[code].sy_argsize; 423 #ifdef _LP64 424 if (p->p_flag & P_32) 425 argsize = argsize << 1; 426 #endif 427 len = sizeof(struct ktr_syscall) + argsize; 428 429 p->p_traceflag |= KTRFAC_ACTIVE; 430 kte = pool_get(&kte_pool, PR_WAITOK); 431 kth = &kte->kte_kth; 432 ktrinitheader(kth, l, KTR_SYSCALL); 433 434 ktp = malloc(len, M_KTRACE, M_WAITOK); 435 ktp->ktr_code = realcode; 436 ktp->ktr_argsize = argsize; 437 argp = (register_t *)(ktp + 1); 438 for (i = 0; i < (argsize / sizeof(*argp)); i++) 439 *argp++ = args[i]; 440 kth->ktr_len = len; 441 kte->kte_buf = ktp; 442 443 ktraddentry(l, kte, KTA_WAITOK); 444 p->p_traceflag &= ~KTRFAC_ACTIVE; 445 } 446 447 void 448 ktrsysret(struct lwp *l, register_t code, int error, register_t *retval) 449 { 450 struct proc *p = l->l_proc; 451 struct ktrace_entry *kte; 452 struct ktr_header *kth; 453 struct ktr_sysret *ktp; 454 455 p->p_traceflag |= KTRFAC_ACTIVE; 456 kte = pool_get(&kte_pool, PR_WAITOK); 457 kth = &kte->kte_kth; 458 ktrinitheader(kth, l, KTR_SYSRET); 459 460 ktp = malloc(sizeof(struct ktr_sysret), M_KTRACE, M_WAITOK); 461 ktp->ktr_code = code; 462 ktp->ktr_eosys = 0; /* XXX unused */ 463 ktp->ktr_error = error; 464 ktp->ktr_retval = retval ? retval[0] : 0; 465 ktp->ktr_retval_1 = retval ? retval[1] : 0; 466 467 kth->ktr_len = sizeof(struct ktr_sysret); 468 kte->kte_buf = ktp; 469 470 ktraddentry(l, kte, KTA_WAITOK); 471 p->p_traceflag &= ~KTRFAC_ACTIVE; 472 } 473 474 /* 475 * XXX: ndp->ni_pathlen should be passed. 476 */ 477 void 478 ktrnamei(struct lwp *l, char *path) 479 { 480 481 ktrkmem(l, KTR_NAMEI, path, strlen(path)); 482 } 483 484 void 485 ktremul(struct lwp *l) 486 { 487 const char *emul = l->l_proc->p_emul->e_name; 488 489 ktrkmem(l, KTR_EMUL, emul, strlen(emul)); 490 } 491 492 void 493 ktrkmem(struct lwp *l, int type, const void *bf, size_t len) 494 { 495 struct proc *p = l->l_proc; 496 struct ktrace_entry *kte; 497 struct ktr_header *kth; 498 499 p->p_traceflag |= KTRFAC_ACTIVE; 500 kte = pool_get(&kte_pool, PR_WAITOK); 501 kth = &kte->kte_kth; 502 ktrinitheader(kth, l, type); 503 504 kth->ktr_len = len; 505 kte->kte_buf = malloc(len, M_KTRACE, M_WAITOK); 506 memcpy(kte->kte_buf, bf, len); 507 508 ktraddentry(l, kte, KTA_WAITOK); 509 p->p_traceflag &= ~KTRFAC_ACTIVE; 510 } 511 512 void 513 ktrgenio(struct lwp *l, int fd, enum uio_rw rw, struct iovec *iov, 514 int len, int error) 515 { 516 struct proc *p = l->l_proc; 517 struct ktrace_entry *kte; 518 struct ktr_header *kth; 519 struct ktr_genio *ktp; 520 int resid = len, cnt; 521 caddr_t cp; 522 int buflen; 523 524 if (error) 525 return; 526 527 p->p_traceflag |= KTRFAC_ACTIVE; 528 529 next: 530 buflen = min(PAGE_SIZE, resid + sizeof(struct ktr_genio)); 531 532 kte = pool_get(&kte_pool, PR_WAITOK); 533 kth = &kte->kte_kth; 534 ktrinitheader(kth, l, KTR_GENIO); 535 536 ktp = malloc(buflen, M_KTRACE, M_WAITOK); 537 ktp->ktr_fd = fd; 538 ktp->ktr_rw = rw; 539 540 kte->kte_buf = ktp; 541 542 cp = (caddr_t)(ktp + 1); 543 buflen -= sizeof(struct ktr_genio); 544 kth->ktr_len = sizeof(struct ktr_genio); 545 546 while (buflen > 0) { 547 cnt = min(iov->iov_len, buflen); 548 if (copyin(iov->iov_base, cp, cnt) != 0) 549 goto out; 550 kth->ktr_len += cnt; 551 buflen -= cnt; 552 resid -= cnt; 553 iov->iov_len -= cnt; 554 if (iov->iov_len == 0) 555 iov++; 556 else 557 iov->iov_base = (caddr_t)iov->iov_base + cnt; 558 } 559 560 /* 561 * Don't push so many entry at once. It will cause kmem map 562 * shortage. 563 */ 564 ktraddentry(l, kte, KTA_WAITOK | KTA_LARGE); 565 if (resid > 0) { 566 #if 0 /* XXX NJWLWP */ 567 KDASSERT(p->p_cpu != NULL); 568 KDASSERT(p->p_cpu == curcpu()); 569 #endif 570 /* XXX NJWLWP */ 571 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 572 preempt(1); 573 574 goto next; 575 } 576 577 p->p_traceflag &= ~KTRFAC_ACTIVE; 578 return; 579 580 out: 581 ktefree(kte); 582 p->p_traceflag &= ~KTRFAC_ACTIVE; 583 } 584 585 void 586 ktrpsig(struct lwp *l, int sig, sig_t action, const sigset_t *mask, 587 const ksiginfo_t *ksi) 588 { 589 struct proc *p = l->l_proc; 590 struct ktrace_entry *kte; 591 struct ktr_header *kth; 592 struct { 593 struct ktr_psig kp; 594 siginfo_t si; 595 } *kbuf; 596 597 p->p_traceflag |= KTRFAC_ACTIVE; 598 kte = pool_get(&kte_pool, PR_WAITOK); 599 kth = &kte->kte_kth; 600 ktrinitheader(kth, l, KTR_PSIG); 601 602 kbuf = malloc(sizeof(*kbuf), M_KTRACE, M_WAITOK); 603 kbuf->kp.signo = (char)sig; 604 kbuf->kp.action = action; 605 kbuf->kp.mask = *mask; 606 kte->kte_buf = kbuf; 607 if (ksi) { 608 kbuf->kp.code = KSI_TRAPCODE(ksi); 609 (void)memset(&kbuf->si, 0, sizeof(kbuf->si)); 610 kbuf->si._info = ksi->ksi_info; 611 kth->ktr_len = sizeof(*kbuf); 612 } else { 613 kbuf->kp.code = 0; 614 kth->ktr_len = sizeof(struct ktr_psig); 615 } 616 617 ktraddentry(l, kte, KTA_WAITOK); 618 p->p_traceflag &= ~KTRFAC_ACTIVE; 619 } 620 621 void 622 ktrcsw(struct lwp *l, int out, int user) 623 { 624 struct proc *p = l->l_proc; 625 struct ktrace_entry *kte; 626 struct ktr_header *kth; 627 struct ktr_csw *kc; 628 629 p->p_traceflag |= KTRFAC_ACTIVE; 630 631 /* 632 * We can't sleep if we're already going to sleep (if original 633 * condition is met during sleep, we hang up). 634 */ 635 kte = pool_get(&kte_pool, out ? PR_NOWAIT : PR_WAITOK); 636 if (kte == NULL) { 637 ktd_logerr(p, KTDE_ENOMEM); 638 goto out; 639 } 640 kth = &kte->kte_kth; 641 ktrinitheader(kth, l, KTR_CSW); 642 643 kc = malloc(sizeof(struct ktr_csw), M_KTRACE, 644 out ? M_NOWAIT : M_WAITOK); 645 if (kc == NULL) { 646 ktd_logerr(p, KTDE_ENOMEM); 647 goto free_kte; 648 } 649 kc->out = out; 650 kc->user = user; 651 kth->ktr_len = sizeof(struct ktr_csw); 652 kte->kte_buf = kc; 653 654 ktraddentry(l, kte, out ? KTA_NOWAIT : KTA_WAITOK); 655 p->p_traceflag &= ~KTRFAC_ACTIVE; 656 return; 657 658 free_kte: 659 pool_put(&kte_pool, kte); 660 out: 661 p->p_traceflag &= ~KTRFAC_ACTIVE; 662 } 663 664 void 665 ktruser(struct lwp *l, const char *id, void *addr, size_t len, int ustr) 666 { 667 struct proc *p = l->l_proc; 668 struct ktrace_entry *kte; 669 struct ktr_header *kth; 670 struct ktr_user *ktp; 671 caddr_t user_dta; 672 673 p->p_traceflag |= KTRFAC_ACTIVE; 674 kte = pool_get(&kte_pool, PR_WAITOK); 675 kth = &kte->kte_kth; 676 ktrinitheader(kth, l, KTR_USER); 677 678 ktp = malloc(sizeof(struct ktr_user) + len, M_KTRACE, M_WAITOK); 679 if (ustr) { 680 if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0) 681 ktp->ktr_id[0] = '\0'; 682 } else 683 strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN); 684 ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0'; 685 686 user_dta = (caddr_t)(ktp + 1); 687 if (copyin(addr, (void *)user_dta, len) != 0) 688 len = 0; 689 690 kth->ktr_len = sizeof(struct ktr_user) + len; 691 kte->kte_buf = ktp; 692 693 ktraddentry(l, kte, KTA_WAITOK); 694 p->p_traceflag &= ~KTRFAC_ACTIVE; 695 } 696 697 void 698 ktrmmsg(struct lwp *l, const void *msgh, size_t size) 699 { 700 ktrkmem(l, KTR_MMSG, msgh, size); 701 } 702 703 void 704 ktrmool(struct lwp *l, const void *kaddr, size_t size, const void *uaddr) 705 { 706 struct proc *p = l->l_proc; 707 struct ktrace_entry *kte; 708 struct ktr_header *kth; 709 struct ktr_mool *kp; 710 struct ktr_mool *bf; 711 712 p->p_traceflag |= KTRFAC_ACTIVE; 713 kte = pool_get(&kte_pool, PR_WAITOK); 714 kth = &kte->kte_kth; 715 ktrinitheader(kth, l, KTR_MOOL); 716 717 kp = malloc(size + sizeof(*kp), M_KTRACE, M_WAITOK); 718 kp->uaddr = uaddr; 719 kp->size = size; 720 bf = kp + 1; /* Skip uaddr and size */ 721 (void)memcpy(bf, kaddr, size); 722 723 kth->ktr_len = size + sizeof(*kp); 724 kte->kte_buf = kp; 725 726 ktraddentry(l, kte, KTA_WAITOK); 727 p->p_traceflag &= ~KTRFAC_ACTIVE; 728 } 729 730 void 731 ktrsaupcall(struct lwp *l, int type, int nevent, int nint, void *sas, 732 void *ap) 733 { 734 struct proc *p = l->l_proc; 735 struct ktrace_entry *kte; 736 struct ktr_header *kth; 737 struct ktr_saupcall *ktp; 738 size_t len; 739 struct sa_t **sapp; 740 int i; 741 742 p->p_traceflag |= KTRFAC_ACTIVE; 743 kte = pool_get(&kte_pool, PR_WAITOK); 744 kth = &kte->kte_kth; 745 ktrinitheader(kth, l, KTR_SAUPCALL); 746 747 len = sizeof(struct ktr_saupcall); 748 ktp = malloc(len + sizeof(struct sa_t) * (nevent + nint + 1), M_KTRACE, 749 M_WAITOK); 750 751 ktp->ktr_type = type; 752 ktp->ktr_nevent = nevent; 753 ktp->ktr_nint = nint; 754 ktp->ktr_sas = sas; 755 ktp->ktr_ap = ap; 756 /* 757 * Copy the sa_t's 758 */ 759 sapp = (struct sa_t **) sas; 760 761 for (i = nevent + nint; i >= 0; i--) { 762 if (copyin(*sapp, (char *)ktp + len, sizeof(struct sa_t)) == 0) 763 len += sizeof(struct sa_t); 764 sapp++; 765 } 766 767 kth->ktr_len = len; 768 kte->kte_buf = ktp; 769 770 ktraddentry(l, kte, KTA_WAITOK); 771 p->p_traceflag &= ~KTRFAC_ACTIVE; 772 } 773 774 /* Interface and common routines */ 775 776 int 777 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp) 778 { 779 struct proc *p; 780 struct pgrp *pg; 781 struct ktr_desc *ktd = NULL; 782 int ret = 0; 783 int error = 0; 784 int descend; 785 786 curp->p_traceflag |= KTRFAC_ACTIVE; 787 descend = ops & KTRFLAG_DESCEND; 788 facs = facs & ~((unsigned) KTRFAC_ROOT); 789 790 switch (KTROP(ops)) { 791 792 case KTROP_CLEARFILE: 793 /* 794 * Clear all uses of the tracefile 795 */ 796 797 ktd = ktd_lookup(fp); 798 if (ktd == NULL) 799 goto done; 800 801 proclist_lock_read(); 802 PROCLIST_FOREACH(p, &allproc) { 803 if (p->p_tracep == ktd) { 804 if (ktrcanset(curp, p)) 805 ktrderef(p); 806 else 807 error = EPERM; 808 } 809 } 810 proclist_unlock_read(); 811 goto done; 812 813 case KTROP_SET: 814 ktd = ktd_lookup(fp); 815 if (ktd == NULL) { 816 ktd = malloc(sizeof(struct ktr_desc), 817 M_KTRACE, M_WAITOK); 818 TAILQ_INIT(&ktd->ktd_queue); 819 simple_lock_init(&ktd->ktd_slock); 820 callout_init(&ktd->ktd_wakch); 821 ktd->ktd_flags = ktd->ktd_qcount = 822 ktd->ktd_error = ktd->ktd_errcnt = 0; 823 ktd->ktd_ref = 1; 824 ktd->ktd_delayqcnt = ktd_delayqcnt; 825 ktd->ktd_wakedelay = mstohz(ktd_wakedelay); 826 ktd->ktd_intrwakdl = mstohz(ktd_intrwakdl); 827 /* 828 * XXX: not correct. needs an way to detect 829 * whether ktruss or ktrace. 830 */ 831 if (fp->f_type == DTYPE_PIPE) 832 ktd->ktd_flags |= KTDF_INTERACTIVE; 833 834 error = kthread_create1(ktrace_thread, ktd, 835 &ktd->ktd_proc, "ktr %p", ktd); 836 if (error != 0) { 837 free(ktd, M_KTRACE); 838 goto done; 839 } 840 841 simple_lock(&fp->f_slock); 842 fp->f_count++; 843 simple_unlock(&fp->f_slock); 844 ktd->ktd_fp = fp; 845 846 simple_lock(&ktdq_slock); 847 TAILQ_INSERT_TAIL(&ktdq, ktd, ktd_list); 848 simple_unlock(&ktdq_slock); 849 } 850 break; 851 852 case KTROP_CLEAR: 853 break; 854 } 855 856 /* 857 * need something to (un)trace (XXX - why is this here?) 858 */ 859 if (!facs) { 860 error = EINVAL; 861 goto done; 862 } 863 864 /* 865 * do it 866 */ 867 if (pid < 0) { 868 /* 869 * by process group 870 */ 871 pg = pg_find(-pid, PFIND_UNLOCK_FAIL); 872 if (pg == NULL) { 873 error = ESRCH; 874 goto done; 875 } 876 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 877 if (descend) 878 ret |= ktrsetchildren(curp, p, ops, facs, ktd); 879 else 880 ret |= ktrops(curp, p, ops, facs, ktd); 881 } 882 883 } else { 884 /* 885 * by pid 886 */ 887 p = p_find(pid, PFIND_UNLOCK_FAIL); 888 if (p == NULL) { 889 error = ESRCH; 890 goto done; 891 } 892 if (descend) 893 ret |= ktrsetchildren(curp, p, ops, facs, ktd); 894 else 895 ret |= ktrops(curp, p, ops, facs, ktd); 896 } 897 proclist_unlock_read(); /* taken by p{g}_find */ 898 if (!ret) 899 error = EPERM; 900 done: 901 if (ktd != NULL) { 902 if (error != 0) { 903 /* 904 * Wakeup the thread so that it can be die if we 905 * can't trace any process. 906 */ 907 ktd_wakeup(ktd); 908 } 909 if (KTROP(ops) == KTROP_SET || KTROP(ops) == KTROP_CLEARFILE) { 910 simple_lock(&ktd->ktd_slock); 911 ktdrel(ktd); 912 } 913 } 914 curp->p_traceflag &= ~KTRFAC_ACTIVE; 915 return (error); 916 } 917 918 /* 919 * fktrace system call 920 */ 921 /* ARGSUSED */ 922 int 923 sys_fktrace(struct lwp *l, void *v, register_t *retval) 924 { 925 struct sys_fktrace_args /* { 926 syscallarg(int) fd; 927 syscallarg(int) ops; 928 syscallarg(int) facs; 929 syscallarg(int) pid; 930 } */ *uap = v; 931 struct proc *curp; 932 struct file *fp = NULL; 933 struct filedesc *fdp = l->l_proc->p_fd; 934 int error; 935 936 curp = l->l_proc; 937 fdp = curp->p_fd; 938 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 939 return (EBADF); 940 941 FILE_USE(fp); 942 943 if ((fp->f_flag & FWRITE) == 0) 944 error = EBADF; 945 else 946 error = ktrace_common(curp, SCARG(uap, ops), 947 SCARG(uap, facs), SCARG(uap, pid), fp); 948 949 FILE_UNUSE(fp, l); 950 951 return error; 952 } 953 954 /* 955 * ktrace system call 956 */ 957 /* ARGSUSED */ 958 int 959 sys_ktrace(struct lwp *l, void *v, register_t *retval) 960 { 961 struct sys_ktrace_args /* { 962 syscallarg(const char *) fname; 963 syscallarg(int) ops; 964 syscallarg(int) facs; 965 syscallarg(int) pid; 966 } */ *uap = v; 967 struct proc *curp = l->l_proc; 968 struct vnode *vp = NULL; 969 struct file *fp = NULL; 970 int ops = SCARG(uap, ops); 971 struct nameidata nd; 972 int error = 0; 973 int fd; 974 975 ops = KTROP(ops) | (ops & KTRFLAG_DESCEND); 976 977 curp->p_traceflag |= KTRFAC_ACTIVE; 978 if ((ops & KTROP_CLEAR) == 0) { 979 /* 980 * an operation which requires a file argument. 981 */ 982 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname), 983 l); 984 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 985 curp->p_traceflag &= ~KTRFAC_ACTIVE; 986 return (error); 987 } 988 vp = nd.ni_vp; 989 VOP_UNLOCK(vp, 0); 990 if (vp->v_type != VREG) { 991 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, l); 992 curp->p_traceflag &= ~KTRFAC_ACTIVE; 993 return (EACCES); 994 } 995 /* 996 * XXX This uses up a file descriptor slot in the 997 * tracing process for the duration of this syscall. 998 * This is not expected to be a problem. If 999 * falloc(NULL, ...) DTRT we could skip that part, but 1000 * that would require changing its interface to allow 1001 * the caller to pass in a ucred.. 1002 * 1003 * This will FILE_USE the fp it returns, if any. 1004 * Keep it in use until we return. 1005 */ 1006 if ((error = falloc(curp, &fp, &fd)) != 0) 1007 goto done; 1008 1009 fp->f_flag = FWRITE; 1010 fp->f_type = DTYPE_VNODE; 1011 fp->f_ops = &vnops; 1012 fp->f_data = (caddr_t)vp; 1013 FILE_SET_MATURE(fp); 1014 vp = NULL; 1015 } 1016 error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs), 1017 SCARG(uap, pid), fp); 1018 done: 1019 if (vp != NULL) 1020 (void) vn_close(vp, FWRITE, curp->p_ucred, l); 1021 if (fp != NULL) { 1022 FILE_UNUSE(fp, l); /* release file */ 1023 fdrelease(l, fd); /* release fd table slot */ 1024 } 1025 return (error); 1026 } 1027 1028 int 1029 ktrops(struct proc *curp, struct proc *p, int ops, int facs, 1030 struct ktr_desc *ktd) 1031 { 1032 1033 int vers = ops & KTRFAC_VER_MASK; 1034 1035 if (!ktrcanset(curp, p)) 1036 return (0); 1037 1038 switch (vers) { 1039 case KTRFACv0: 1040 case KTRFACv1: 1041 break; 1042 default: 1043 return EINVAL; 1044 } 1045 1046 if (KTROP(ops) == KTROP_SET) { 1047 if (p->p_tracep != ktd) { 1048 /* 1049 * if trace file already in use, relinquish 1050 */ 1051 ktrderef(p); 1052 p->p_tracep = ktd; 1053 ktradref(p); 1054 } 1055 p->p_traceflag |= facs; 1056 if (curp->p_ucred->cr_uid == 0) 1057 p->p_traceflag |= KTRFAC_ROOT; 1058 } else { 1059 /* KTROP_CLEAR */ 1060 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 1061 /* no more tracing */ 1062 ktrderef(p); 1063 } 1064 } 1065 1066 if (p->p_traceflag) 1067 p->p_traceflag |= vers; 1068 /* 1069 * Emit an emulation record, every time there is a ktrace 1070 * change/attach request. 1071 */ 1072 if (KTRPOINT(p, KTR_EMUL)) 1073 p->p_traceflag |= KTRFAC_TRC_EMUL; 1074 #ifdef __HAVE_SYSCALL_INTERN 1075 (*p->p_emul->e_syscall_intern)(p); 1076 #endif 1077 1078 return (1); 1079 } 1080 1081 int 1082 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs, 1083 struct ktr_desc *ktd) 1084 { 1085 struct proc *p; 1086 int ret = 0; 1087 1088 p = top; 1089 for (;;) { 1090 ret |= ktrops(curp, p, ops, facs, ktd); 1091 /* 1092 * If this process has children, descend to them next, 1093 * otherwise do any siblings, and if done with this level, 1094 * follow back up the tree (but not past top). 1095 */ 1096 if (LIST_FIRST(&p->p_children) != NULL) { 1097 p = LIST_FIRST(&p->p_children); 1098 continue; 1099 } 1100 for (;;) { 1101 if (p == top) 1102 return (ret); 1103 if (LIST_NEXT(p, p_sibling) != NULL) { 1104 p = LIST_NEXT(p, p_sibling); 1105 break; 1106 } 1107 p = p->p_pptr; 1108 } 1109 } 1110 /*NOTREACHED*/ 1111 } 1112 1113 void 1114 ktrwrite(struct ktr_desc *ktd, struct ktrace_entry *kte) 1115 { 1116 struct uio auio; 1117 struct iovec aiov[64], *iov; 1118 struct ktrace_entry *top = kte; 1119 struct ktr_header *kth; 1120 struct file *fp = ktd->ktd_fp; 1121 struct proc *p; 1122 int error; 1123 next: 1124 auio.uio_iov = iov = &aiov[0]; 1125 auio.uio_offset = 0; 1126 auio.uio_segflg = UIO_SYSSPACE; 1127 auio.uio_rw = UIO_WRITE; 1128 auio.uio_resid = 0; 1129 auio.uio_iovcnt = 0; 1130 auio.uio_lwp = curlwp; 1131 do { 1132 kth = &kte->kte_kth; 1133 1134 if (kth->ktr_version == 0) { 1135 /* 1136 * Convert back to the old format fields 1137 */ 1138 TIMESPEC_TO_TIMEVAL(&kth->ktr_tv, &kth->ktr_time); 1139 kth->ktr_unused = NULL; 1140 } 1141 iov->iov_base = (caddr_t)kth; 1142 iov++->iov_len = sizeof(struct ktr_header); 1143 auio.uio_resid += sizeof(struct ktr_header); 1144 auio.uio_iovcnt++; 1145 if (kth->ktr_len > 0) { 1146 iov->iov_base = kte->kte_buf; 1147 iov++->iov_len = kth->ktr_len; 1148 auio.uio_resid += kth->ktr_len; 1149 auio.uio_iovcnt++; 1150 } 1151 } while ((kte = TAILQ_NEXT(kte, kte_list)) != NULL && 1152 auio.uio_iovcnt < sizeof(aiov) / sizeof(aiov[0]) - 1); 1153 1154 again: 1155 simple_lock(&fp->f_slock); 1156 FILE_USE(fp); 1157 error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio, 1158 fp->f_cred, FOF_UPDATE_OFFSET); 1159 FILE_UNUSE(fp, NULL); 1160 switch (error) { 1161 1162 case 0: 1163 if (auio.uio_resid > 0) 1164 goto again; 1165 if (kte != NULL) 1166 goto next; 1167 break; 1168 1169 case EWOULDBLOCK: 1170 preempt(1); 1171 goto again; 1172 1173 default: 1174 /* 1175 * If error encountered, give up tracing on this 1176 * vnode. Don't report EPIPE as this can easily 1177 * happen with fktrace()/ktruss. 1178 */ 1179 #ifndef DEBUG 1180 if (error != EPIPE) 1181 #endif 1182 log(LOG_NOTICE, 1183 "ktrace write failed, errno %d, tracing stopped\n", 1184 error); 1185 proclist_lock_read(); 1186 PROCLIST_FOREACH(p, &allproc) { 1187 if (p->p_tracep == ktd) 1188 ktrderef(p); 1189 } 1190 proclist_unlock_read(); 1191 } 1192 1193 while ((kte = top) != NULL) { 1194 top = TAILQ_NEXT(top, kte_list); 1195 ktefree(kte); 1196 } 1197 } 1198 1199 void 1200 ktrace_thread(void *arg) 1201 { 1202 struct ktr_desc *ktd = arg; 1203 struct file *fp = ktd->ktd_fp; 1204 struct ktrace_entry *kte; 1205 int ktrerr, errcnt; 1206 1207 for (;;) { 1208 simple_lock(&ktd->ktd_slock); 1209 kte = TAILQ_FIRST(&ktd->ktd_queue); 1210 if (kte == NULL) { 1211 if (ktd->ktd_flags & KTDF_WAIT) { 1212 ktd->ktd_flags &= ~(KTDF_WAIT | KTDF_BLOCKING); 1213 wakeup(&ktd->ktd_flags); 1214 } 1215 if (ktd->ktd_ref == 0) 1216 break; 1217 ltsleep(ktd, PWAIT | PNORELOCK, "ktrwait", 0, 1218 &ktd->ktd_slock); 1219 continue; 1220 } 1221 TAILQ_INIT(&ktd->ktd_queue); 1222 ktd->ktd_qcount = 0; 1223 ktrerr = ktd->ktd_error; 1224 errcnt = ktd->ktd_errcnt; 1225 ktd->ktd_error = ktd->ktd_errcnt = 0; 1226 simple_unlock(&ktd->ktd_slock); 1227 1228 if (ktrerr) { 1229 log(LOG_NOTICE, 1230 "ktrace failed, fp %p, error 0x%x, total %d\n", 1231 fp, ktrerr, errcnt); 1232 } 1233 ktrwrite(ktd, kte); 1234 } 1235 simple_unlock(&ktd->ktd_slock); 1236 1237 simple_lock(&ktdq_slock); 1238 TAILQ_REMOVE(&ktdq, ktd, ktd_list); 1239 simple_unlock(&ktdq_slock); 1240 1241 simple_lock(&fp->f_slock); 1242 FILE_USE(fp); 1243 1244 /* 1245 * ktrace file descriptor can't be watched (are not visible to 1246 * userspace), so no kqueue stuff here 1247 * XXX: The above comment is wrong, because the fktrace file 1248 * descriptor is available in userland. 1249 */ 1250 closef(fp, NULL); 1251 1252 callout_stop(&ktd->ktd_wakch); 1253 free(ktd, M_KTRACE); 1254 1255 kthread_exit(0); 1256 } 1257 1258 /* 1259 * Return true if caller has permission to set the ktracing state 1260 * of target. Essentially, the target can't possess any 1261 * more permissions than the caller. KTRFAC_ROOT signifies that 1262 * root previously set the tracing status on the target process, and 1263 * so, only root may further change it. 1264 * 1265 * TODO: check groups. use caller effective gid. 1266 */ 1267 int 1268 ktrcanset(struct proc *callp, struct proc *targetp) 1269 { 1270 struct pcred *caller = callp->p_cred; 1271 struct pcred *target = targetp->p_cred; 1272 1273 if ((caller->pc_ucred->cr_uid == target->p_ruid && 1274 target->p_ruid == target->p_svuid && 1275 caller->p_rgid == target->p_rgid && /* XXX */ 1276 target->p_rgid == target->p_svgid && 1277 (targetp->p_traceflag & KTRFAC_ROOT) == 0 && 1278 (targetp->p_flag & P_SUGID) == 0) || 1279 caller->pc_ucred->cr_uid == 0) 1280 return (1); 1281 1282 return (0); 1283 } 1284 #endif /* KTRACE */ 1285 1286 /* 1287 * Put user defined entry to ktrace records. 1288 */ 1289 int 1290 sys_utrace(struct lwp *l, void *v, register_t *retval) 1291 { 1292 #ifdef KTRACE 1293 struct sys_utrace_args /* { 1294 syscallarg(const char *) label; 1295 syscallarg(void *) addr; 1296 syscallarg(size_t) len; 1297 } */ *uap = v; 1298 struct proc *p = l->l_proc; 1299 1300 if (!KTRPOINT(p, KTR_USER)) 1301 return (0); 1302 1303 if (SCARG(uap, len) > KTR_USER_MAXLEN) 1304 return (EINVAL); 1305 1306 ktruser(l, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1); 1307 1308 return (0); 1309 #else /* !KTRACE */ 1310 return ENOSYS; 1311 #endif /* KTRACE */ 1312 } 1313