1 /* $NetBSD: kern_ktrace.c,v 1.97 2005/05/29 22:24:15 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.97 2005/05/29 22:24:15 christos Exp $"); 36 37 #include "opt_ktrace.h" 38 #include "opt_compat_mach.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/file.h> 44 #include <sys/namei.h> 45 #include <sys/vnode.h> 46 #include <sys/kernel.h> 47 #include <sys/kthread.h> 48 #include <sys/ktrace.h> 49 #include <sys/malloc.h> 50 #include <sys/syslog.h> 51 #include <sys/filedesc.h> 52 #include <sys/ioctl.h> 53 #include <sys/callout.h> 54 55 #include <sys/mount.h> 56 #include <sys/sa.h> 57 #include <sys/syscallargs.h> 58 59 #ifdef KTRACE 60 61 /* 62 * XXX: 63 * - need better error reporting? 64 * - p->p_tracep access lock. lock p_lock, lock ktd if !NULL, inc ref. 65 * - userland utility to sort ktrace.out by timestamp. 66 * - keep minimum information in ktrace_entry when rest of alloc failed. 67 * - enlarge ktrace_entry so that small entry won't require additional 68 * alloc? 69 * - per trace control of configurable parameters. 70 */ 71 72 struct ktrace_entry { 73 TAILQ_ENTRY(ktrace_entry) kte_list; 74 struct ktr_header kte_kth; 75 void *kte_buf; /* ktr_buf */ 76 }; 77 78 struct ktr_desc { 79 TAILQ_ENTRY(ktr_desc) ktd_list; 80 int ktd_flags; 81 #define KTDF_WAIT 0x0001 82 #define KTDF_DONE 0x0002 83 #define KTDF_BLOCKING 0x0004 84 #define KTDF_INTERACTIVE 0x0008 85 int ktd_error; 86 #define KTDE_ENOMEM 0x0001 87 #define KTDE_ENOSPC 0x0002 88 int ktd_errcnt; 89 int ktd_ref; /* # of reference */ 90 int ktd_qcount; /* # of entry in the queue */ 91 92 /* 93 * Params to control behaviour. 94 */ 95 int ktd_delayqcnt; /* # of entry allowed to delay */ 96 int ktd_wakedelay; /* delay of wakeup in *tick* */ 97 int ktd_intrwakdl; /* ditto, but when interactive */ 98 99 struct file *ktd_fp; /* trace output file */ 100 struct proc *ktd_proc; /* our kernel thread */ 101 TAILQ_HEAD(, ktrace_entry) ktd_queue; 102 struct callout ktd_wakch; /* delayed wakeup */ 103 struct simplelock ktd_slock; 104 }; 105 106 static void ktrinitheader(struct ktr_header *, struct proc *, int); 107 static void ktrwrite(struct ktr_desc *, struct ktrace_entry *); 108 static int ktrace_common(struct proc *, int, int, int, struct file *); 109 static int ktrops(struct proc *, struct proc *, int, int, 110 struct ktr_desc *); 111 static int ktrsetchildren(struct proc *, struct proc *, int, int, 112 struct ktr_desc *); 113 static int ktrcanset(struct proc *, struct proc *); 114 static int ktrsamefile(struct file *, struct file *); 115 116 static struct ktr_desc * 117 ktd_lookup(struct file *); 118 static void ktdrel(struct ktr_desc *); 119 static void ktdref(struct ktr_desc *); 120 static void ktraddentry(struct proc *, struct ktrace_entry *, int); 121 /* Flags for ktraddentry (3rd arg) */ 122 #define KTA_NOWAIT 0x0000 123 #define KTA_WAITOK 0x0001 124 #define KTA_LARGE 0x0002 125 static void ktefree(struct ktrace_entry *); 126 static void ktd_logerrl(struct ktr_desc *, int); 127 static void ktd_logerr(struct proc *, int); 128 static void ktrace_thread(void *); 129 130 /* 131 * Default vaules. 132 */ 133 #define KTD_MAXENTRY 1000 /* XXX: tune */ 134 #define KTD_TIMEOUT 5 /* XXX: tune */ 135 #define KTD_DELAYQCNT 100 /* XXX: tune */ 136 #define KTD_WAKEDELAY 5000 /* XXX: tune */ 137 #define KTD_INTRWAKDL 100 /* XXX: tune */ 138 139 /* 140 * Patchable variables. 141 */ 142 int ktd_maxentry = KTD_MAXENTRY; /* max # of entry in the queue */ 143 int ktd_timeout = KTD_TIMEOUT; /* timeout in seconds */ 144 int ktd_delayqcnt = KTD_DELAYQCNT; /* # of entry allowed to delay */ 145 int ktd_wakedelay = KTD_WAKEDELAY; /* delay of wakeup in *ms* */ 146 int ktd_intrwakdl = KTD_INTRWAKDL; /* ditto, but when interactive */ 147 148 static struct simplelock ktdq_slock = SIMPLELOCK_INITIALIZER; 149 static TAILQ_HEAD(, ktr_desc) ktdq = TAILQ_HEAD_INITIALIZER(ktdq); 150 151 MALLOC_DEFINE(M_KTRACE, "ktrace", "ktrace data buffer"); 152 POOL_INIT(kte_pool, sizeof(struct ktrace_entry), 0, 0, 0, 153 "ktepl", &pool_allocator_nointr); 154 155 static __inline void 156 ktd_wakeup(struct ktr_desc *ktd) 157 { 158 159 callout_stop(&ktd->ktd_wakch); 160 wakeup(ktd); 161 } 162 163 static void 164 ktd_logerrl(struct ktr_desc *ktd, int error) 165 { 166 167 ktd->ktd_error |= error; 168 ktd->ktd_errcnt++; 169 } 170 171 static void 172 ktd_logerr(struct proc *p, int error) 173 { 174 struct ktr_desc *ktd = p->p_tracep; 175 176 if (ktd == NULL) 177 return; 178 179 simple_lock(&ktd->ktd_slock); 180 ktd_logerrl(ktd, error); 181 simple_unlock(&ktd->ktd_slock); 182 } 183 184 /* 185 * Release a reference. Called with ktd_slock held. 186 */ 187 void 188 ktdrel(struct ktr_desc *ktd) 189 { 190 191 KDASSERT(ktd->ktd_ref != 0); 192 KASSERT(ktd->ktd_ref > 0); 193 if (--ktd->ktd_ref <= 0) { 194 ktd->ktd_flags |= KTDF_DONE; 195 wakeup(ktd); 196 } 197 simple_unlock(&ktd->ktd_slock); 198 } 199 200 void 201 ktdref(struct ktr_desc *ktd) 202 { 203 204 simple_lock(&ktd->ktd_slock); 205 ktd->ktd_ref++; 206 simple_unlock(&ktd->ktd_slock); 207 } 208 209 struct ktr_desc * 210 ktd_lookup(struct file *fp) 211 { 212 struct ktr_desc *ktd; 213 214 simple_lock(&ktdq_slock); 215 for (ktd = TAILQ_FIRST(&ktdq); ktd != NULL; 216 ktd = TAILQ_NEXT(ktd, ktd_list)) { 217 simple_lock(&ktd->ktd_slock); 218 if (ktrsamefile(ktd->ktd_fp, fp)) { 219 ktd->ktd_ref++; 220 simple_unlock(&ktd->ktd_slock); 221 break; 222 } 223 simple_unlock(&ktd->ktd_slock); 224 } 225 simple_unlock(&ktdq_slock); 226 return (ktd); 227 } 228 229 void 230 ktraddentry(struct proc *p, struct ktrace_entry *kte, int flags) 231 { 232 struct ktr_desc *ktd; 233 #ifdef DEBUG 234 struct timeval t; 235 int s; 236 #endif 237 238 if (p->p_traceflag & KTRFAC_TRC_EMUL) { 239 /* Add emulation trace before first entry for this process */ 240 p->p_traceflag &= ~KTRFAC_TRC_EMUL; 241 ktremul(p); 242 } 243 244 /* 245 * Tracing may be canceled while we were sleeping waiting for 246 * memory. 247 */ 248 ktd = p->p_tracep; 249 if (ktd == NULL) 250 goto freekte; 251 252 /* 253 * Bump reference count so that the object will remain while 254 * we are here. Note that the trace is controlled by other 255 * process. 256 */ 257 ktdref(ktd); 258 259 simple_lock(&ktd->ktd_slock); 260 if (ktd->ktd_flags & KTDF_DONE) 261 goto relktd; 262 263 if (ktd->ktd_qcount > ktd_maxentry) { 264 ktd_logerrl(ktd, KTDE_ENOSPC); 265 goto relktd; 266 } 267 TAILQ_INSERT_TAIL(&ktd->ktd_queue, kte, kte_list); 268 ktd->ktd_qcount++; 269 if (ktd->ktd_flags & KTDF_BLOCKING) 270 goto skip_sync; 271 272 if (flags & KTA_WAITOK && 273 (/* flags & KTA_LARGE */0 || ktd->ktd_flags & KTDF_WAIT || 274 ktd->ktd_qcount > ktd_maxentry >> 1)) 275 /* 276 * Sync with writer thread since we're requesting rather 277 * big one or many requests are pending. 278 */ 279 do { 280 ktd->ktd_flags |= KTDF_WAIT; 281 ktd_wakeup(ktd); 282 #ifdef DEBUG 283 s = splclock(); 284 t = mono_time; 285 splx(s); 286 #endif 287 if (ltsleep(&ktd->ktd_flags, PWAIT, "ktrsync", 288 ktd_timeout * hz, &ktd->ktd_slock) != 0) { 289 ktd->ktd_flags |= KTDF_BLOCKING; 290 /* 291 * Maybe the writer thread is blocking 292 * completely for some reason, but 293 * don't stop target process forever. 294 */ 295 log(LOG_NOTICE, "ktrace timeout\n"); 296 break; 297 } 298 #ifdef DEBUG 299 s = splclock(); 300 timersub(&mono_time, &t, &t); 301 splx(s); 302 if (t.tv_sec > 0) 303 log(LOG_NOTICE, 304 "ktrace long wait: %ld.%06ld\n", 305 t.tv_sec, t.tv_usec); 306 #endif 307 } while (p->p_tracep == ktd && 308 (ktd->ktd_flags & (KTDF_WAIT | KTDF_DONE)) == KTDF_WAIT); 309 else { 310 /* Schedule delayed wakeup */ 311 if (ktd->ktd_qcount > ktd->ktd_delayqcnt) 312 ktd_wakeup(ktd); /* Wakeup now */ 313 else if (!callout_pending(&ktd->ktd_wakch)) 314 callout_reset(&ktd->ktd_wakch, 315 ktd->ktd_flags & KTDF_INTERACTIVE ? 316 ktd->ktd_intrwakdl : ktd->ktd_wakedelay, 317 (void (*)(void *))wakeup, ktd); 318 } 319 320 skip_sync: 321 ktdrel(ktd); 322 return; 323 324 relktd: 325 ktdrel(ktd); 326 327 freekte: 328 ktefree(kte); 329 } 330 331 void 332 ktefree(struct ktrace_entry *kte) 333 { 334 struct ktr_header *kth = &kte->kte_kth; 335 336 if (kth->ktr_len > 0) 337 free(kte->kte_buf, M_KTRACE); 338 pool_put(&kte_pool, kte); 339 } 340 341 /* 342 * "deep" compare of two files for the purposes of clearing a trace. 343 * Returns true if they're the same open file, or if they point at the 344 * same underlying vnode/socket. 345 */ 346 347 int 348 ktrsamefile(struct file *f1, struct file *f2) 349 { 350 351 return ((f1 == f2) || 352 ((f1 != NULL) && (f2 != NULL) && 353 (f1->f_type == f2->f_type) && 354 (f1->f_data == f2->f_data))); 355 } 356 357 void 358 ktrderef(struct proc *p) 359 { 360 struct ktr_desc *ktd = p->p_tracep; 361 362 p->p_traceflag = 0; 363 if (ktd == NULL) 364 return; 365 p->p_tracep = NULL; 366 367 simple_lock(&ktd->ktd_slock); 368 wakeup(&ktd->ktd_flags); 369 ktdrel(ktd); 370 } 371 372 void 373 ktradref(struct proc *p) 374 { 375 struct ktr_desc *ktd = p->p_tracep; 376 377 ktdref(ktd); 378 } 379 380 void 381 ktrinitheader(struct ktr_header *kth, struct proc *p, int type) 382 { 383 384 (void)memset(kth, 0, sizeof(*kth)); 385 kth->ktr_type = type; 386 microtime(&kth->ktr_time); 387 kth->ktr_pid = p->p_pid; 388 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN); 389 } 390 391 void 392 ktrsyscall(struct proc *p, register_t code, register_t realcode, 393 const struct sysent *callp, register_t args[]) 394 { 395 struct ktrace_entry *kte; 396 struct ktr_header *kth; 397 struct ktr_syscall *ktp; 398 register_t *argp; 399 int argsize; 400 size_t len; 401 u_int i; 402 403 if (callp == NULL) 404 callp = p->p_emul->e_sysent; 405 406 argsize = callp[code].sy_argsize; 407 #ifdef _LP64 408 if (p->p_flag & P_32) 409 argsize = argsize << 1; 410 #endif 411 len = sizeof(struct ktr_syscall) + argsize; 412 413 p->p_traceflag |= KTRFAC_ACTIVE; 414 kte = pool_get(&kte_pool, PR_WAITOK); 415 kth = &kte->kte_kth; 416 ktrinitheader(kth, p, KTR_SYSCALL); 417 418 ktp = malloc(len, M_KTRACE, M_WAITOK); 419 ktp->ktr_code = realcode; 420 ktp->ktr_argsize = argsize; 421 argp = (register_t *)(ktp + 1); 422 for (i = 0; i < (argsize / sizeof(*argp)); i++) 423 *argp++ = args[i]; 424 kth->ktr_len = len; 425 kte->kte_buf = ktp; 426 427 ktraddentry(p, kte, KTA_WAITOK); 428 p->p_traceflag &= ~KTRFAC_ACTIVE; 429 } 430 431 void 432 ktrsysret(struct proc *p, register_t code, int error, register_t *retval) 433 { 434 struct ktrace_entry *kte; 435 struct ktr_header *kth; 436 struct ktr_sysret *ktp; 437 438 p->p_traceflag |= KTRFAC_ACTIVE; 439 kte = pool_get(&kte_pool, PR_WAITOK); 440 kth = &kte->kte_kth; 441 ktrinitheader(kth, p, KTR_SYSRET); 442 443 ktp = malloc(sizeof(struct ktr_sysret), M_KTRACE, M_WAITOK); 444 ktp->ktr_code = code; 445 ktp->ktr_eosys = 0; /* XXX unused */ 446 ktp->ktr_error = error; 447 ktp->ktr_retval = retval ? retval[0] : 0; 448 ktp->ktr_retval_1 = retval ? retval[1] : 0; 449 450 kth->ktr_len = sizeof(struct ktr_sysret); 451 kte->kte_buf = ktp; 452 453 ktraddentry(p, kte, KTA_WAITOK); 454 p->p_traceflag &= ~KTRFAC_ACTIVE; 455 } 456 457 /* 458 * XXX: ndp->ni_pathlen should be passed. 459 */ 460 void 461 ktrnamei(struct proc *p, char *path) 462 { 463 464 ktrkmem(p, KTR_NAMEI, path, strlen(path)); 465 } 466 467 void 468 ktremul(struct proc *p) 469 { 470 const char *emul = p->p_emul->e_name; 471 472 ktrkmem(p, KTR_EMUL, emul, strlen(emul)); 473 } 474 475 void 476 ktrkmem(struct proc *p, int type, const void *bf, size_t len) 477 { 478 struct ktrace_entry *kte; 479 struct ktr_header *kth; 480 481 p->p_traceflag |= KTRFAC_ACTIVE; 482 kte = pool_get(&kte_pool, PR_WAITOK); 483 kth = &kte->kte_kth; 484 ktrinitheader(kth, p, type); 485 486 kth->ktr_len = len; 487 kte->kte_buf = malloc(len, M_KTRACE, M_WAITOK); 488 memcpy(kte->kte_buf, bf, len); 489 490 ktraddentry(p, kte, KTA_WAITOK); 491 p->p_traceflag &= ~KTRFAC_ACTIVE; 492 } 493 494 void 495 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, 496 int len, int error) 497 { 498 struct ktrace_entry *kte; 499 struct ktr_header *kth; 500 struct ktr_genio *ktp; 501 caddr_t cp; 502 int resid = len, cnt; 503 int buflen; 504 505 if (error) 506 return; 507 508 p->p_traceflag |= KTRFAC_ACTIVE; 509 510 next: 511 buflen = min(PAGE_SIZE, resid + sizeof(struct ktr_genio)); 512 513 kte = pool_get(&kte_pool, PR_WAITOK); 514 kth = &kte->kte_kth; 515 ktrinitheader(kth, p, KTR_GENIO); 516 517 ktp = malloc(buflen, M_KTRACE, M_WAITOK); 518 ktp->ktr_fd = fd; 519 ktp->ktr_rw = rw; 520 521 kte->kte_buf = ktp; 522 523 cp = (caddr_t)(ktp + 1); 524 buflen -= sizeof(struct ktr_genio); 525 kth->ktr_len = sizeof(struct ktr_genio); 526 527 while (buflen > 0) { 528 cnt = min(iov->iov_len, buflen); 529 if (copyin(iov->iov_base, cp, cnt) != 0) 530 goto out; 531 kth->ktr_len += cnt; 532 buflen -= cnt; 533 resid -= cnt; 534 iov->iov_len -= cnt; 535 if (iov->iov_len == 0) 536 iov++; 537 else 538 iov->iov_base = (caddr_t)iov->iov_base + cnt; 539 } 540 541 /* 542 * Don't push so many entry at once. It will cause kmem map 543 * shortage. 544 */ 545 ktraddentry(p, kte, KTA_WAITOK | KTA_LARGE); 546 if (resid > 0) { 547 #if 0 /* XXX NJWLWP */ 548 KDASSERT(p->p_cpu != NULL); 549 KDASSERT(p->p_cpu == curcpu()); 550 #endif 551 /* XXX NJWLWP */ 552 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 553 preempt(1); 554 555 goto next; 556 } 557 558 p->p_traceflag &= ~KTRFAC_ACTIVE; 559 return; 560 561 out: 562 ktefree(kte); 563 p->p_traceflag &= ~KTRFAC_ACTIVE; 564 } 565 566 void 567 ktrpsig(struct proc *p, int sig, sig_t action, const sigset_t *mask, 568 const ksiginfo_t *ksi) 569 { 570 struct ktrace_entry *kte; 571 struct ktr_header *kth; 572 struct { 573 struct ktr_psig kp; 574 siginfo_t si; 575 } *kbuf; 576 577 p->p_traceflag |= KTRFAC_ACTIVE; 578 kte = pool_get(&kte_pool, PR_WAITOK); 579 kth = &kte->kte_kth; 580 ktrinitheader(kth, p, KTR_PSIG); 581 582 kbuf = malloc(sizeof(*kbuf), M_KTRACE, M_WAITOK); 583 kbuf->kp.signo = (char)sig; 584 kbuf->kp.action = action; 585 kbuf->kp.mask = *mask; 586 kte->kte_buf = kbuf; 587 if (ksi) { 588 kbuf->kp.code = KSI_TRAPCODE(ksi); 589 (void)memset(&kbuf->si, 0, sizeof(kbuf->si)); 590 kbuf->si._info = ksi->ksi_info; 591 kth->ktr_len = sizeof(*kbuf); 592 } else { 593 kbuf->kp.code = 0; 594 kth->ktr_len = sizeof(struct ktr_psig); 595 } 596 597 ktraddentry(p, kte, KTA_WAITOK); 598 p->p_traceflag &= ~KTRFAC_ACTIVE; 599 } 600 601 void 602 ktrcsw(struct proc *p, int out, int user) 603 { 604 struct ktrace_entry *kte; 605 struct ktr_header *kth; 606 struct ktr_csw *kc; 607 608 p->p_traceflag |= KTRFAC_ACTIVE; 609 610 /* 611 * We can't sleep if we're already going to sleep (if original 612 * condition is met during sleep, we hang up). 613 */ 614 kte = pool_get(&kte_pool, out ? PR_NOWAIT : PR_WAITOK); 615 if (kte == NULL) { 616 ktd_logerr(p, KTDE_ENOMEM); 617 goto out; 618 } 619 kth = &kte->kte_kth; 620 ktrinitheader(kth, p, KTR_CSW); 621 622 kc = malloc(sizeof(struct ktr_csw), M_KTRACE, 623 out ? M_NOWAIT : M_WAITOK); 624 if (kc == NULL) { 625 ktd_logerr(p, KTDE_ENOMEM); 626 goto free_kte; 627 } 628 kc->out = out; 629 kc->user = user; 630 kth->ktr_len = sizeof(struct ktr_csw); 631 kte->kte_buf = kc; 632 633 ktraddentry(p, kte, out ? KTA_NOWAIT : KTA_WAITOK); 634 p->p_traceflag &= ~KTRFAC_ACTIVE; 635 return; 636 637 free_kte: 638 pool_put(&kte_pool, kte); 639 out: 640 p->p_traceflag &= ~KTRFAC_ACTIVE; 641 } 642 643 void 644 ktruser(struct proc *p, const char *id, void *addr, size_t len, int ustr) 645 { 646 struct ktrace_entry *kte; 647 struct ktr_header *kth; 648 struct ktr_user *ktp; 649 caddr_t user_dta; 650 651 p->p_traceflag |= KTRFAC_ACTIVE; 652 kte = pool_get(&kte_pool, PR_WAITOK); 653 kth = &kte->kte_kth; 654 ktrinitheader(kth, p, KTR_USER); 655 656 ktp = malloc(sizeof(struct ktr_user) + len, M_KTRACE, M_WAITOK); 657 if (ustr) { 658 if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0) 659 ktp->ktr_id[0] = '\0'; 660 } else 661 strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN); 662 ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0'; 663 664 user_dta = (caddr_t)(ktp + 1); 665 if (copyin(addr, (void *)user_dta, len) != 0) 666 len = 0; 667 668 kth->ktr_len = sizeof(struct ktr_user) + len; 669 kte->kte_buf = ktp; 670 671 ktraddentry(p, kte, KTA_WAITOK); 672 p->p_traceflag &= ~KTRFAC_ACTIVE; 673 } 674 675 void 676 ktrmmsg(struct proc *p, const void *msgh, size_t size) 677 { 678 679 ktrkmem(p, KTR_MMSG, msgh, size); 680 } 681 682 void 683 ktrmool(struct proc *p, const void *kaddr, size_t size, const void *uaddr) 684 { 685 struct ktrace_entry *kte; 686 struct ktr_header *kth; 687 struct ktr_mool *kp; 688 struct ktr_mool *bf; 689 690 p->p_traceflag |= KTRFAC_ACTIVE; 691 kte = pool_get(&kte_pool, PR_WAITOK); 692 kth = &kte->kte_kth; 693 ktrinitheader(kth, p, KTR_MOOL); 694 695 kp = malloc(size + sizeof(*kp), M_KTRACE, M_WAITOK); 696 kp->uaddr = uaddr; 697 kp->size = size; 698 bf = kp + 1; /* Skip uaddr and size */ 699 (void)memcpy(bf, kaddr, size); 700 701 kth->ktr_len = size + sizeof(*kp); 702 kte->kte_buf = kp; 703 704 ktraddentry(p, kte, KTA_WAITOK); 705 p->p_traceflag &= ~KTRFAC_ACTIVE; 706 } 707 708 709 /* Interface and common routines */ 710 711 int 712 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp) 713 { 714 struct proc *p; 715 struct pgrp *pg; 716 struct ktr_desc *ktd = NULL; 717 int ret = 0; 718 int error = 0; 719 int descend; 720 721 curp->p_traceflag |= KTRFAC_ACTIVE; 722 descend = ops & KTRFLAG_DESCEND; 723 facs = facs & ~((unsigned) KTRFAC_ROOT); 724 725 switch (KTROP(ops)) { 726 727 case KTROP_CLEARFILE: 728 /* 729 * Clear all uses of the tracefile 730 */ 731 732 ktd = ktd_lookup(fp); 733 if (ktd == NULL) 734 goto done; 735 736 proclist_lock_read(); 737 PROCLIST_FOREACH(p, &allproc) { 738 if (p->p_tracep == ktd) { 739 if (ktrcanset(curp, p)) 740 ktrderef(p); 741 else 742 error = EPERM; 743 } 744 } 745 proclist_unlock_read(); 746 goto done; 747 748 case KTROP_SET: 749 ktd = ktd_lookup(fp); 750 if (ktd == NULL) { 751 ktd = malloc(sizeof(struct ktr_desc), 752 M_KTRACE, M_WAITOK); 753 TAILQ_INIT(&ktd->ktd_queue); 754 simple_lock_init(&ktd->ktd_slock); 755 callout_init(&ktd->ktd_wakch); 756 ktd->ktd_flags = ktd->ktd_qcount = 757 ktd->ktd_error = ktd->ktd_errcnt = 0; 758 ktd->ktd_ref = 1; 759 ktd->ktd_delayqcnt = ktd_delayqcnt; 760 ktd->ktd_wakedelay = mstohz(ktd_wakedelay); 761 ktd->ktd_intrwakdl = mstohz(ktd_intrwakdl); 762 /* 763 * XXX: not correct. needs an way to detect 764 * whether ktruss or ktrace. 765 */ 766 if (fp->f_type == DTYPE_PIPE) 767 ktd->ktd_flags |= KTDF_INTERACTIVE; 768 769 error = kthread_create1(ktrace_thread, ktd, 770 &ktd->ktd_proc, "ktr %p", ktd); 771 if (error != 0) { 772 free(ktd, M_KTRACE); 773 goto done; 774 } 775 776 simple_lock(&fp->f_slock); 777 fp->f_count++; 778 simple_unlock(&fp->f_slock); 779 ktd->ktd_fp = fp; 780 781 simple_lock(&ktdq_slock); 782 TAILQ_INSERT_TAIL(&ktdq, ktd, ktd_list); 783 simple_unlock(&ktdq_slock); 784 } 785 break; 786 787 case KTROP_CLEAR: 788 break; 789 } 790 791 /* 792 * need something to (un)trace (XXX - why is this here?) 793 */ 794 if (!facs) { 795 error = EINVAL; 796 goto done; 797 } 798 799 /* 800 * do it 801 */ 802 if (pid < 0) { 803 /* 804 * by process group 805 */ 806 pg = pg_find(-pid, PFIND_UNLOCK_FAIL); 807 if (pg == NULL) { 808 error = ESRCH; 809 goto done; 810 } 811 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 812 if (descend) 813 ret |= ktrsetchildren(curp, p, ops, facs, ktd); 814 else 815 ret |= ktrops(curp, p, ops, facs, ktd); 816 } 817 818 } else { 819 /* 820 * by pid 821 */ 822 p = p_find(pid, PFIND_UNLOCK_FAIL); 823 if (p == NULL) { 824 error = ESRCH; 825 goto done; 826 } 827 if (descend) 828 ret |= ktrsetchildren(curp, p, ops, facs, ktd); 829 else 830 ret |= ktrops(curp, p, ops, facs, ktd); 831 } 832 proclist_unlock_read(); /* taken by p{g}_find */ 833 if (!ret) 834 error = EPERM; 835 done: 836 if (ktd != NULL) { 837 if (error != 0) { 838 /* 839 * Wakeup the thread so that it can be die if we 840 * can't trace any process. 841 */ 842 ktd_wakeup(ktd); 843 } 844 if (KTROP(ops) == KTROP_SET || KTROP(ops) == KTROP_CLEARFILE) { 845 simple_lock(&ktd->ktd_slock); 846 ktdrel(ktd); 847 } 848 } 849 curp->p_traceflag &= ~KTRFAC_ACTIVE; 850 return (error); 851 } 852 853 /* 854 * fktrace system call 855 */ 856 /* ARGSUSED */ 857 int 858 sys_fktrace(struct lwp *l, void *v, register_t *retval) 859 { 860 struct sys_fktrace_args /* { 861 syscallarg(int) fd; 862 syscallarg(int) ops; 863 syscallarg(int) facs; 864 syscallarg(int) pid; 865 } */ *uap = v; 866 struct proc *curp = l->l_proc; 867 struct file *fp = NULL; 868 struct filedesc *fdp = curp->p_fd; 869 int error; 870 871 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 872 return (EBADF); 873 874 FILE_USE(fp); 875 876 if ((fp->f_flag & FWRITE) == 0) 877 error = EBADF; 878 else 879 error = ktrace_common(curp, SCARG(uap, ops), 880 SCARG(uap, facs), SCARG(uap, pid), fp); 881 882 FILE_UNUSE(fp, curp); 883 884 return error; 885 } 886 887 /* 888 * ktrace system call 889 */ 890 /* ARGSUSED */ 891 int 892 sys_ktrace(struct lwp *l, void *v, register_t *retval) 893 { 894 struct sys_ktrace_args /* { 895 syscallarg(const char *) fname; 896 syscallarg(int) ops; 897 syscallarg(int) facs; 898 syscallarg(int) pid; 899 } */ *uap = v; 900 struct proc *curp = l->l_proc; 901 struct vnode *vp = NULL; 902 struct file *fp = NULL; 903 int fd; 904 int ops = SCARG(uap, ops); 905 int error = 0; 906 struct nameidata nd; 907 908 ops = KTROP(ops) | (ops & KTRFLAG_DESCEND); 909 910 curp->p_traceflag |= KTRFAC_ACTIVE; 911 if ((ops & KTROP_CLEAR) == 0) { 912 /* 913 * an operation which requires a file argument. 914 */ 915 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname), 916 curp); 917 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 918 curp->p_traceflag &= ~KTRFAC_ACTIVE; 919 return (error); 920 } 921 vp = nd.ni_vp; 922 VOP_UNLOCK(vp, 0); 923 if (vp->v_type != VREG) { 924 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); 925 curp->p_traceflag &= ~KTRFAC_ACTIVE; 926 return (EACCES); 927 } 928 /* 929 * XXX This uses up a file descriptor slot in the 930 * tracing process for the duration of this syscall. 931 * This is not expected to be a problem. If 932 * falloc(NULL, ...) DTRT we could skip that part, but 933 * that would require changing its interface to allow 934 * the caller to pass in a ucred.. 935 * 936 * This will FILE_USE the fp it returns, if any. 937 * Keep it in use until we return. 938 */ 939 if ((error = falloc(curp, &fp, &fd)) != 0) 940 goto done; 941 942 fp->f_flag = FWRITE; 943 fp->f_type = DTYPE_VNODE; 944 fp->f_ops = &vnops; 945 fp->f_data = (caddr_t)vp; 946 FILE_SET_MATURE(fp); 947 vp = NULL; 948 } 949 error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs), 950 SCARG(uap, pid), fp); 951 done: 952 if (vp != NULL) 953 (void) vn_close(vp, FWRITE, curp->p_ucred, curp); 954 if (fp != NULL) { 955 FILE_UNUSE(fp, curp); /* release file */ 956 fdrelease(curp, fd); /* release fd table slot */ 957 } 958 return (error); 959 } 960 961 int 962 ktrops(struct proc *curp, struct proc *p, int ops, int facs, 963 struct ktr_desc *ktd) 964 { 965 966 if (!ktrcanset(curp, p)) 967 return (0); 968 if (KTROP(ops) == KTROP_SET) { 969 if (p->p_tracep != ktd) { 970 /* 971 * if trace file already in use, relinquish 972 */ 973 ktrderef(p); 974 p->p_tracep = ktd; 975 ktradref(p); 976 } 977 p->p_traceflag |= facs; 978 if (curp->p_ucred->cr_uid == 0) 979 p->p_traceflag |= KTRFAC_ROOT; 980 } else { 981 /* KTROP_CLEAR */ 982 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) { 983 /* no more tracing */ 984 ktrderef(p); 985 } 986 } 987 988 /* 989 * Emit an emulation record, every time there is a ktrace 990 * change/attach request. 991 */ 992 if (KTRPOINT(p, KTR_EMUL)) 993 p->p_traceflag |= KTRFAC_TRC_EMUL; 994 #ifdef __HAVE_SYSCALL_INTERN 995 (*p->p_emul->e_syscall_intern)(p); 996 #endif 997 998 return (1); 999 } 1000 1001 int 1002 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs, 1003 struct ktr_desc *ktd) 1004 { 1005 struct proc *p; 1006 int ret = 0; 1007 1008 p = top; 1009 for (;;) { 1010 ret |= ktrops(curp, p, ops, facs, ktd); 1011 /* 1012 * If this process has children, descend to them next, 1013 * otherwise do any siblings, and if done with this level, 1014 * follow back up the tree (but not past top). 1015 */ 1016 if (LIST_FIRST(&p->p_children) != NULL) { 1017 p = LIST_FIRST(&p->p_children); 1018 continue; 1019 } 1020 for (;;) { 1021 if (p == top) 1022 return (ret); 1023 if (LIST_NEXT(p, p_sibling) != NULL) { 1024 p = LIST_NEXT(p, p_sibling); 1025 break; 1026 } 1027 p = p->p_pptr; 1028 } 1029 } 1030 /*NOTREACHED*/ 1031 } 1032 1033 void 1034 ktrwrite(struct ktr_desc *ktd, struct ktrace_entry *kte) 1035 { 1036 struct uio auio; 1037 struct iovec aiov[64], *iov; 1038 struct ktrace_entry *top = kte; 1039 struct ktr_header *kth; 1040 struct file *fp = ktd->ktd_fp; 1041 struct proc *p; 1042 int error; 1043 1044 next: 1045 auio.uio_iov = iov = &aiov[0]; 1046 auio.uio_offset = 0; 1047 auio.uio_segflg = UIO_SYSSPACE; 1048 auio.uio_rw = UIO_WRITE; 1049 auio.uio_resid = 0; 1050 auio.uio_iovcnt = 0; 1051 auio.uio_procp = NULL; 1052 do { 1053 kth = &kte->kte_kth; 1054 iov->iov_base = (caddr_t)kth; 1055 iov++->iov_len = sizeof(struct ktr_header); 1056 auio.uio_resid += sizeof(struct ktr_header); 1057 auio.uio_iovcnt++; 1058 if (kth->ktr_len > 0) { 1059 iov->iov_base = kte->kte_buf; 1060 iov++->iov_len = kth->ktr_len; 1061 auio.uio_resid += kth->ktr_len; 1062 auio.uio_iovcnt++; 1063 } 1064 } while ((kte = TAILQ_NEXT(kte, kte_list)) != NULL && 1065 auio.uio_iovcnt < sizeof(aiov) / sizeof(aiov[0]) - 1); 1066 1067 again: 1068 simple_lock(&fp->f_slock); 1069 FILE_USE(fp); 1070 error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio, 1071 fp->f_cred, FOF_UPDATE_OFFSET); 1072 FILE_UNUSE(fp, NULL); 1073 switch (error) { 1074 1075 case 0: 1076 if (auio.uio_resid > 0) 1077 goto again; 1078 if (kte != NULL) 1079 goto next; 1080 break; 1081 1082 case EWOULDBLOCK: 1083 preempt(1); 1084 goto again; 1085 1086 default: 1087 /* 1088 * If error encountered, give up tracing on this 1089 * vnode. Don't report EPIPE as this can easily 1090 * happen with fktrace()/ktruss. 1091 */ 1092 #ifndef DEBUG 1093 if (error != EPIPE) 1094 #endif 1095 log(LOG_NOTICE, 1096 "ktrace write failed, errno %d, tracing stopped\n", 1097 error); 1098 proclist_lock_read(); 1099 PROCLIST_FOREACH(p, &allproc) { 1100 if (p->p_tracep == ktd) 1101 ktrderef(p); 1102 } 1103 proclist_unlock_read(); 1104 } 1105 1106 while ((kte = top) != NULL) { 1107 top = TAILQ_NEXT(top, kte_list); 1108 ktefree(kte); 1109 } 1110 } 1111 1112 void 1113 ktrace_thread(void *arg) 1114 { 1115 struct ktr_desc *ktd = arg; 1116 struct file *fp = ktd->ktd_fp; 1117 struct ktrace_entry *kte; 1118 int ktrerr, errcnt; 1119 1120 for (;;) { 1121 simple_lock(&ktd->ktd_slock); 1122 kte = TAILQ_FIRST(&ktd->ktd_queue); 1123 if (kte == NULL) { 1124 if (ktd->ktd_flags & KTDF_WAIT) { 1125 ktd->ktd_flags &= ~(KTDF_WAIT | KTDF_BLOCKING); 1126 wakeup(&ktd->ktd_flags); 1127 } 1128 if (ktd->ktd_ref == 0) 1129 break; 1130 ltsleep(ktd, PWAIT | PNORELOCK, "ktrwait", 0, 1131 &ktd->ktd_slock); 1132 continue; 1133 } 1134 TAILQ_INIT(&ktd->ktd_queue); 1135 ktd->ktd_qcount = 0; 1136 ktrerr = ktd->ktd_error; 1137 errcnt = ktd->ktd_errcnt; 1138 ktd->ktd_error = ktd->ktd_errcnt = 0; 1139 simple_unlock(&ktd->ktd_slock); 1140 1141 if (ktrerr) { 1142 log(LOG_NOTICE, 1143 "ktrace failed, fp %p, error 0x%x, total %d\n", 1144 fp, ktrerr, errcnt); 1145 } 1146 ktrwrite(ktd, kte); 1147 } 1148 simple_unlock(&ktd->ktd_slock); 1149 1150 simple_lock(&ktdq_slock); 1151 TAILQ_REMOVE(&ktdq, ktd, ktd_list); 1152 simple_unlock(&ktdq_slock); 1153 1154 simple_lock(&fp->f_slock); 1155 FILE_USE(fp); 1156 1157 /* 1158 * ktrace file descriptor can't be watched (are not visible to 1159 * userspace), so no kqueue stuff here 1160 * XXX: The above comment is wrong, because the fktrace file 1161 * descriptor is available in userland. 1162 */ 1163 closef(fp, NULL); 1164 1165 callout_stop(&ktd->ktd_wakch); 1166 free(ktd, M_KTRACE); 1167 1168 kthread_exit(0); 1169 } 1170 1171 /* 1172 * Return true if caller has permission to set the ktracing state 1173 * of target. Essentially, the target can't possess any 1174 * more permissions than the caller. KTRFAC_ROOT signifies that 1175 * root previously set the tracing status on the target process, and 1176 * so, only root may further change it. 1177 * 1178 * TODO: check groups. use caller effective gid. 1179 */ 1180 int 1181 ktrcanset(struct proc *callp, struct proc *targetp) 1182 { 1183 struct pcred *caller = callp->p_cred; 1184 struct pcred *target = targetp->p_cred; 1185 1186 if ((caller->pc_ucred->cr_uid == target->p_ruid && 1187 target->p_ruid == target->p_svuid && 1188 caller->p_rgid == target->p_rgid && /* XXX */ 1189 target->p_rgid == target->p_svgid && 1190 (targetp->p_traceflag & KTRFAC_ROOT) == 0 && 1191 (targetp->p_flag & P_SUGID) == 0) || 1192 caller->pc_ucred->cr_uid == 0) 1193 return (1); 1194 1195 return (0); 1196 } 1197 #endif /* KTRACE */ 1198 1199 /* 1200 * Put user defined entry to ktrace records. 1201 */ 1202 int 1203 sys_utrace(struct lwp *l, void *v, register_t *retval) 1204 { 1205 #ifdef KTRACE 1206 struct sys_utrace_args /* { 1207 syscallarg(const char *) label; 1208 syscallarg(void *) addr; 1209 syscallarg(size_t) len; 1210 } */ *uap = v; 1211 struct proc *p = l->l_proc; 1212 1213 if (!KTRPOINT(p, KTR_USER)) 1214 return (0); 1215 1216 if (SCARG(uap, len) > KTR_USER_MAXLEN) 1217 return (EINVAL); 1218 1219 ktruser(p, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1); 1220 1221 return (0); 1222 #else /* !KTRACE */ 1223 return ENOSYS; 1224 #endif /* KTRACE */ 1225 } 1226