xref: /netbsd-src/sys/kern/kern_ktrace.c (revision 4472dbe5e3bd91ef2540bada7a7ca7384627ff9b)
1 /*	$NetBSD: kern_ktrace.c,v 1.46 2000/05/31 05:02:32 thorpej Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)kern_ktrace.c	8.5 (Berkeley) 5/14/95
36  */
37 
38 #include "opt_ktrace.h"
39 
40 #ifdef KTRACE
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/file.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/ktrace.h>
49 #include <sys/malloc.h>
50 #include <sys/syslog.h>
51 #include <sys/filedesc.h>
52 #include <sys/ioctl.h>
53 
54 #include <sys/mount.h>
55 #include <sys/syscallargs.h>
56 
57 int	ktrace_common __P((struct proc *, int, int, int, struct file *));
58 void	ktrinitheader __P((struct ktr_header *, struct proc *, int));
59 int	ktrops __P((struct proc *, struct proc *, int, int, struct file *));
60 int	ktrsetchildren __P((struct proc *, struct proc *, int, int,
61     struct file *));
62 int	ktrwrite __P((struct proc *, struct ktr_header *));
63 int	ktrcanset __P((struct proc *, struct proc *));
64 int	ktrsamefile __P((struct file *, struct file *));
65 
66 /*
67  * "deep" compare of two files for the purposes of clearing a trace.
68  * Returns true if they're the same open file, or if they point at the
69  * same underlying vnode/socket.
70  */
71 
72 int
73 ktrsamefile (f1, f2)
74 	struct file *f1, *f2;
75 {
76 	return ((f1 == f2) ||
77 	    ((f1 != NULL) && (f2 != NULL) &&
78 		(f1->f_type == f2->f_type) &&
79 		(f1->f_data == f2->f_data)));
80 }
81 
82 void
83 ktrderef(p)
84 	struct proc *p;
85 {
86 	struct file *fp = p->p_tracep;
87 	p->p_traceflag = 0;
88 	if (fp == NULL)
89 		return;
90 	FILE_USE(fp);
91 	closef(fp, NULL);
92 
93 	p->p_tracep = NULL;
94 }
95 
96 void
97 ktradref(p)
98 	struct proc *p;
99 {
100 	struct file *fp = p->p_tracep;
101 
102 	fp->f_count++;
103 }
104 
105 void
106 ktrinitheader(kth, p, type)
107 	struct ktr_header *kth;
108 	struct proc *p;
109 	int type;
110 {
111 
112 	memset(kth, 0, sizeof(*kth));
113 	kth->ktr_type = type;
114 	microtime(&kth->ktr_time);
115 	kth->ktr_pid = p->p_pid;
116 	memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN);
117 }
118 
119 void
120 ktrsyscall(p, code, argsize, args)
121 	struct proc *p;
122 	register_t code;
123 	size_t argsize;
124 	register_t args[];
125 {
126 	struct ktr_header kth;
127 	struct ktr_syscall *ktp;
128 	register_t *argp;
129 	size_t len = sizeof(struct ktr_syscall) + argsize;
130 	int i;
131 
132 	p->p_traceflag |= KTRFAC_ACTIVE;
133 	ktrinitheader(&kth, p, KTR_SYSCALL);
134 	ktp = malloc(len, M_TEMP, M_WAITOK);
135 	ktp->ktr_code = code;
136 	ktp->ktr_argsize = argsize;
137 	argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
138 	for (i = 0; i < (argsize / sizeof(*argp)); i++)
139 		*argp++ = args[i];
140 	kth.ktr_buf = (caddr_t)ktp;
141 	kth.ktr_len = len;
142 	(void) ktrwrite(p, &kth);
143 	free(ktp, M_TEMP);
144 	p->p_traceflag &= ~KTRFAC_ACTIVE;
145 }
146 
147 void
148 ktrsysret(p, code, error, retval)
149 	struct proc *p;
150 	register_t code;
151 	int error;
152 	register_t retval;
153 {
154 	struct ktr_header kth;
155 	struct ktr_sysret ktp;
156 
157 	p->p_traceflag |= KTRFAC_ACTIVE;
158 	ktrinitheader(&kth, p, KTR_SYSRET);
159 	ktp.ktr_code = code;
160 	ktp.ktr_eosys = 0;			/* XXX unused */
161 	ktp.ktr_error = error;
162 	ktp.ktr_retval = retval;		/* what about val2 ? */
163 
164 	kth.ktr_buf = (caddr_t)&ktp;
165 	kth.ktr_len = sizeof(struct ktr_sysret);
166 
167 	(void) ktrwrite(p, &kth);
168 	p->p_traceflag &= ~KTRFAC_ACTIVE;
169 }
170 
171 void
172 ktrnamei(p, path)
173 	struct proc *p;
174 	char *path;
175 {
176 	struct ktr_header kth;
177 
178 	p->p_traceflag |= KTRFAC_ACTIVE;
179 	ktrinitheader(&kth, p, KTR_NAMEI);
180 	kth.ktr_len = strlen(path);
181 	kth.ktr_buf = path;
182 
183 	(void) ktrwrite(p, &kth);
184 	p->p_traceflag &= ~KTRFAC_ACTIVE;
185 }
186 
187 void
188 ktremul(p)
189 	struct proc *p;
190 {
191 	struct ktr_header kth;
192 	char *emul = p->p_emul->e_name;
193 
194 	p->p_traceflag |= KTRFAC_ACTIVE;
195 	ktrinitheader(&kth, p, KTR_EMUL);
196 	kth.ktr_len = strlen(emul);
197 	kth.ktr_buf = emul;
198 
199 	(void) ktrwrite(p, &kth);
200 	p->p_traceflag &= ~KTRFAC_ACTIVE;
201 }
202 
203 void
204 ktrgenio(p, fd, rw, iov, len, error)
205 	struct proc *p;
206 	int fd;
207 	enum uio_rw rw;
208 	struct iovec *iov;
209 	int len, error;
210 {
211 	struct ktr_header kth;
212 	struct ktr_genio *ktp;
213 	caddr_t cp;
214 	int resid = len, cnt;
215 	int buflen;
216 
217 	if (error)
218 		return;
219 
220 	p->p_traceflag |= KTRFAC_ACTIVE;
221 
222 	buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio));
223 
224 	ktrinitheader(&kth, p, KTR_GENIO);
225 	ktp = malloc(buflen, M_TEMP, M_WAITOK);
226 	ktp->ktr_fd = fd;
227 	ktp->ktr_rw = rw;
228 
229 	kth.ktr_buf = (caddr_t)ktp;
230 
231 	cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio));
232 	buflen -= sizeof(struct ktr_genio);
233 
234 	while (resid > 0) {
235 		KDASSERT(p->p_cpu != NULL);
236 		KDASSERT(p->p_cpu == curcpu());
237 		if (p->p_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
238 			preempt(NULL);
239 
240 		cnt = min(iov->iov_len, buflen);
241 		if (cnt > resid)
242 			cnt = resid;
243 		if (copyin(iov->iov_base, cp, cnt))
244 			break;
245 
246 		kth.ktr_len = cnt + sizeof(struct ktr_genio);
247 
248 		if (__predict_false(ktrwrite(p, &kth) != 0))
249 			break;
250 
251 		iov->iov_base = (caddr_t)iov->iov_base + cnt;
252 		iov->iov_len -= cnt;
253 
254 		if (iov->iov_len == 0)
255 			iov++;
256 
257 		resid -= cnt;
258 	}
259 
260 	free(ktp, M_TEMP);
261 	p->p_traceflag &= ~KTRFAC_ACTIVE;
262 }
263 
264 void
265 ktrpsig(p, sig, action, mask, code)
266 	struct proc *p;
267 	int sig;
268 	sig_t action;
269 	sigset_t *mask;
270 	int code;
271 {
272 	struct ktr_header kth;
273 	struct ktr_psig	kp;
274 
275 	p->p_traceflag |= KTRFAC_ACTIVE;
276 	ktrinitheader(&kth, p, KTR_PSIG);
277 	kp.signo = (char)sig;
278 	kp.action = action;
279 	kp.mask = *mask;
280 	kp.code = code;
281 	kth.ktr_buf = (caddr_t)&kp;
282 	kth.ktr_len = sizeof(struct ktr_psig);
283 
284 	(void) ktrwrite(p, &kth);
285 	p->p_traceflag &= ~KTRFAC_ACTIVE;
286 }
287 
288 void
289 ktrcsw(p, out, user)
290 	struct proc *p;
291 	int out, user;
292 {
293 	struct ktr_header kth;
294 	struct ktr_csw kc;
295 
296 	p->p_traceflag |= KTRFAC_ACTIVE;
297 	ktrinitheader(&kth, p, KTR_CSW);
298 	kc.out = out;
299 	kc.user = user;
300 	kth.ktr_buf = (caddr_t)&kc;
301 	kth.ktr_len = sizeof(struct ktr_csw);
302 
303 	(void) ktrwrite(p, &kth);
304 	p->p_traceflag &= ~KTRFAC_ACTIVE;
305 }
306 
307 /* Interface and common routines */
308 
309 int
310 ktrace_common (curp, ops, facs, pid, fp)
311 	struct proc *curp;
312 	int ops, facs, pid;
313 	struct file *fp;
314 {
315 	int ret = 0;
316 	int error = 0;
317 	int one = 1;
318 	int descend;
319 	struct proc *p;
320 	struct pgrp *pg;
321 
322 	curp->p_traceflag |= KTRFAC_ACTIVE;
323 	descend = ops & KTRFLAG_DESCEND;
324 	facs = facs & ~((unsigned) KTRFAC_ROOT);
325 
326 	/*
327 	 * Clear all uses of the tracefile
328 	 */
329 	if (KTROP(ops) == KTROP_CLEARFILE) {
330 		proclist_lock_read();
331 		for (p = LIST_FIRST(&allproc); p != NULL;
332 		     p = LIST_NEXT(p, p_list)) {
333 			if (ktrsamefile(p->p_tracep, fp)) {
334 				if (ktrcanset(curp, p))
335 					ktrderef(p);
336 				else
337 					error = EPERM;
338 			}
339 		}
340 		proclist_unlock_read();
341 		goto done;
342 	}
343 
344 	/*
345 	 * Mark fp non-blocking, to avoid problems from possible deadlocks.
346 	 */
347 
348 	if (fp != NULL) {
349 		fp->f_flag |= FNONBLOCK;
350 		(*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&one, curp);
351 	}
352 
353 	/*
354 	 * need something to (un)trace (XXX - why is this here?)
355 	 */
356 	if (!facs) {
357 		error = EINVAL;
358 		goto done;
359 	}
360 	/*
361 	 * do it
362 	 */
363 	if (pid < 0) {
364 		/*
365 		 * by process group
366 		 */
367 		pg = pgfind(-pid);
368 		if (pg == NULL) {
369 			error = ESRCH;
370 			goto done;
371 		}
372 		for (p = LIST_FIRST(&pg->pg_members); p != NULL;
373 		     p = LIST_NEXT(p, p_pglist)) {
374 			if (descend)
375 				ret |= ktrsetchildren(curp, p, ops, facs, fp);
376 			else
377 				ret |= ktrops(curp, p, ops, facs, fp);
378 		}
379 
380 	} else {
381 		/*
382 		 * by pid
383 		 */
384 		p = pfind(pid);
385 		if (p == NULL) {
386 			error = ESRCH;
387 			goto done;
388 		}
389 		if (descend)
390 			ret |= ktrsetchildren(curp, p, ops, facs, fp);
391 		else
392 			ret |= ktrops(curp, p, ops, facs, fp);
393 	}
394 	if (!ret)
395 		error = EPERM;
396 done:
397 	curp->p_traceflag &= ~KTRFAC_ACTIVE;
398 	return (error);
399 }
400 
401 /*
402  * ktrace system call
403  */
404 /* ARGSUSED */
405 int
406 sys_fktrace(curp, v, retval)
407 	struct proc *curp;
408 	void *v;
409 	register_t *retval;
410 {
411 	struct sys_fktrace_args /* {
412 		syscallarg(int) fd;
413 		syscallarg(int) ops;
414 		syscallarg(int) facs;
415 		syscallarg(int) pid;
416 	} */ *uap = v;
417 	struct file *fp = NULL;
418 	struct filedesc *fdp = curp->p_fd;
419 
420 	if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
421 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
422 	    (fp->f_flag & FWRITE) == 0)
423 		return (EBADF);
424 
425 	return ktrace_common(curp, SCARG(uap, ops),
426 	    SCARG(uap, facs), SCARG(uap, pid), fp);
427 }
428 
429 /*
430  * ktrace system call
431  */
432 /* ARGSUSED */
433 int
434 sys_ktrace(curp, v, retval)
435 	struct proc *curp;
436 	void *v;
437 	register_t *retval;
438 {
439 	struct sys_ktrace_args /* {
440 		syscallarg(const char *) fname;
441 		syscallarg(int) ops;
442 		syscallarg(int) facs;
443 		syscallarg(int) pid;
444 	} */ *uap = v;
445 	struct vnode *vp = NULL;
446 	struct file *fp = NULL;
447 	int fd;
448 	int ops = SCARG(uap, ops);
449 	int error = 0;
450 	struct nameidata nd;
451 
452 	ops = KTROP(ops) | (ops & KTRFLAG_DESCEND);
453 
454 	curp->p_traceflag |= KTRFAC_ACTIVE;
455 	if (ops != KTROP_CLEAR) {
456 		/*
457 		 * an operation which requires a file argument.
458 		 */
459 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
460 		    curp);
461 		if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
462 			curp->p_traceflag &= ~KTRFAC_ACTIVE;
463 			return (error);
464 		}
465 		vp = nd.ni_vp;
466 		VOP_UNLOCK(vp, 0);
467 		if (vp->v_type != VREG) {
468 			(void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
469 			curp->p_traceflag &= ~KTRFAC_ACTIVE;
470 			return (EACCES);
471 		}
472 		/*
473 		 * XXX This uses up a file descriptor slot in the
474 		 * tracing process for the duration of this syscall.
475 		 * This is not expected to be a problem.  If
476 		 * falloc(NULL, ...) DTRT we could skip that part, but
477 		 * that would require changing its interface to allow
478 		 * the caller to pass in a ucred..
479 		 *
480 		 * This will FILE_USE the fp it returns, if any.
481 		 * Keep it in use until we return.
482 		 */
483 		if ((error = falloc(curp, &fp, &fd)) != 0)
484 			goto done;
485 
486 		fp->f_flag = FWRITE|FAPPEND;
487 		fp->f_type = DTYPE_VNODE;
488 		fp->f_ops = &vnops;
489 		fp->f_data = (caddr_t)vp;
490 		vp = NULL;
491 	}
492 	error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs),
493 	    SCARG(uap, pid), fp);
494 done:
495 	if (vp != NULL)
496 		(void) vn_close(vp, FWRITE, curp->p_ucred, curp);
497 	if (fp != NULL) {
498 		FILE_UNUSE(fp, curp);	/* release file */
499 		fdrelease(curp, fd); 	/* release fd table slot */
500 	}
501 	return (error);
502 }
503 
504 int
505 ktrops(curp, p, ops, facs, fp)
506 	struct proc *p, *curp;
507 	int ops, facs;
508 	struct file *fp;
509 {
510 
511 	if (!ktrcanset(curp, p))
512 		return (0);
513 	if (KTROP(ops) == KTROP_SET) {
514 		if (p->p_tracep != fp) {
515 			/*
516 			 * if trace file already in use, relinquish
517 			 */
518 			ktrderef(p);
519 			p->p_tracep = fp;
520 			ktradref(p);
521 		}
522 		p->p_traceflag |= facs;
523 		if (curp->p_ucred->cr_uid == 0)
524 			p->p_traceflag |= KTRFAC_ROOT;
525 	} else {
526 		/* KTROP_CLEAR */
527 		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
528 			/* no more tracing */
529 			ktrderef(p);
530 		}
531 	}
532 
533 	/*
534 	 * Emit an emulation record, every time there is a ktrace
535 	 * change/attach request.
536 	 */
537 	if (KTRPOINT(p, KTR_EMUL))
538 		ktremul(p);
539 
540 	return (1);
541 }
542 
543 int
544 ktrsetchildren(curp, top, ops, facs, fp)
545 	struct proc *curp, *top;
546 	int ops, facs;
547 	struct file *fp;
548 {
549 	struct proc *p;
550 	int ret = 0;
551 
552 	p = top;
553 	for (;;) {
554 		ret |= ktrops(curp, p, ops, facs, fp);
555 		/*
556 		 * If this process has children, descend to them next,
557 		 * otherwise do any siblings, and if done with this level,
558 		 * follow back up the tree (but not past top).
559 		 */
560 		if (LIST_FIRST(&p->p_children) != NULL)
561 			p = LIST_FIRST(&p->p_children);
562 		else for (;;) {
563 			if (p == top)
564 				return (ret);
565 			if (LIST_NEXT(p, p_sibling) != NULL) {
566 				p = LIST_NEXT(p, p_sibling);
567 				break;
568 			}
569 			p = p->p_pptr;
570 		}
571 	}
572 	/*NOTREACHED*/
573 }
574 
575 int
576 ktrwrite(p, kth)
577 	struct proc *p;
578 	struct ktr_header *kth;
579 {
580 	struct uio auio;
581 	struct iovec aiov[2];
582 	int error, tries;
583 	struct file *fp = p->p_tracep;
584 
585 	if (fp == NULL)
586 		return 0;
587 
588 	auio.uio_iov = &aiov[0];
589 	auio.uio_offset = 0;
590 	auio.uio_segflg = UIO_SYSSPACE;
591 	auio.uio_rw = UIO_WRITE;
592 	aiov[0].iov_base = (caddr_t)kth;
593 	aiov[0].iov_len = sizeof(struct ktr_header);
594 	auio.uio_resid = sizeof(struct ktr_header);
595 	auio.uio_iovcnt = 1;
596 	auio.uio_procp = (struct proc *)0;
597 	if (kth->ktr_len > 0) {
598 		auio.uio_iovcnt++;
599 		aiov[1].iov_base = kth->ktr_buf;
600 		aiov[1].iov_len = kth->ktr_len;
601 		auio.uio_resid += kth->ktr_len;
602 	}
603 
604 	FILE_USE(fp);
605 
606 	tries = 0;
607 	do {
608 		error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio,
609 		    fp->f_cred, FOF_UPDATE_OFFSET);
610 		tries++;
611 		if (error == EWOULDBLOCK)
612 		  	yield();
613 	} while ((error == EWOULDBLOCK) && (tries < 3));
614 	FILE_UNUSE(fp, NULL);
615 
616 	if (__predict_true(error == 0))
617 		return (0);
618 	/*
619 	 * If error encountered, give up tracing on this vnode.  Don't report
620 	 * EPIPE as this can easily happen with fktrace()/ktruss.
621 	 */
622 	if (error != EPIPE)
623 		log(LOG_NOTICE,
624 		    "ktrace write failed, errno %d, tracing stopped\n",
625 		    error);
626 	proclist_lock_read();
627 	for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) {
628 		if (ktrsamefile(p->p_tracep, fp))
629 			ktrderef(p);
630 	}
631 	proclist_unlock_read();
632 
633 	return (error);
634 }
635 
636 /*
637  * Return true if caller has permission to set the ktracing state
638  * of target.  Essentially, the target can't possess any
639  * more permissions than the caller.  KTRFAC_ROOT signifies that
640  * root previously set the tracing status on the target process, and
641  * so, only root may further change it.
642  *
643  * TODO: check groups.  use caller effective gid.
644  */
645 int
646 ktrcanset(callp, targetp)
647 	struct proc *callp, *targetp;
648 {
649 	struct pcred *caller = callp->p_cred;
650 	struct pcred *target = targetp->p_cred;
651 
652 	if ((caller->pc_ucred->cr_uid == target->p_ruid &&
653 	     target->p_ruid == target->p_svuid &&
654 	     caller->p_rgid == target->p_rgid &&	/* XXX */
655 	     target->p_rgid == target->p_svgid &&
656 	     (targetp->p_traceflag & KTRFAC_ROOT) == 0) ||
657 	     caller->pc_ucred->cr_uid == 0)
658 		return (1);
659 
660 	return (0);
661 }
662 
663 #endif
664