xref: /openbsd-src/sys/kern/kern_ktrace.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: kern_ktrace.c,v 1.58 2012/04/10 20:39:37 mikeb Exp $	*/
2 /*	$NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
33  */
34 
35 #ifdef KTRACE
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/file.h>
42 #include <sys/namei.h>
43 #include <sys/vnode.h>
44 #include <sys/ktrace.h>
45 #include <sys/malloc.h>
46 #include <sys/syslog.h>
47 #include <sys/sysctl.h>
48 
49 #include <sys/mount.h>
50 #include <sys/syscall.h>
51 #include <sys/syscallargs.h>
52 
53 #include <uvm/uvm_extern.h>
54 
55 void	ktrinitheader(struct ktr_header *, struct proc *, int);
56 void	ktrstart(struct proc *, struct vnode *, struct ucred *);
57 int	ktrops(struct proc *, struct process *, int, int, struct vnode *,
58 	    struct ucred *);
59 int	ktrsetchildren(struct proc *, struct process *, int, int,
60 	    struct vnode *, struct ucred *);
61 int	ktrwrite(struct proc *, struct ktr_header *, void *);
62 int	ktrwriteraw(struct proc *, struct vnode *, struct ucred *,
63 	    struct ktr_header *, void *);
64 int	ktrcanset(struct proc *, struct process *);
65 
66 /*
67  * Clear the trace settings in a correct way (to avoid races).
68  */
69 void
70 ktrcleartrace(struct process *pr)
71 {
72 	struct vnode *vp;
73 	struct ucred *cred;
74 
75 	if (pr->ps_tracevp != NULL) {
76 		vp = pr->ps_tracevp;
77 		cred = pr->ps_tracecred;
78 
79 		pr->ps_traceflag = 0;
80 		pr->ps_tracevp = NULL;
81 		pr->ps_tracecred = NULL;
82 
83 		vrele(vp);
84 		crfree(cred);
85 	}
86 }
87 
88 /*
89  * Change the trace setting in a correct way (to avoid races).
90  */
91 void
92 ktrsettrace(struct process *pr, int facs, struct vnode *newvp,
93     struct ucred *newcred)
94 {
95 	struct vnode *oldvp;
96 	struct ucred *oldcred;
97 
98 	KASSERT(newvp != NULL);
99 	KASSERT(newcred != NULL);
100 
101 	pr->ps_traceflag |= facs;
102 
103 	/* nothing to change about where the trace goes? */
104 	if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred)
105 		return;
106 
107 	vref(newvp);
108 	crhold(newcred);
109 
110 	oldvp = pr->ps_tracevp;
111 	oldcred = pr->ps_tracecred;
112 
113 	pr->ps_tracevp = newvp;
114 	pr->ps_tracecred = newcred;
115 
116 	if (oldvp != NULL) {
117 		vrele(oldvp);
118 		crfree(oldcred);
119 	}
120 }
121 
122 void
123 ktrinitheader(struct ktr_header *kth, struct proc *p, int type)
124 {
125 	bzero(kth, sizeof (struct ktr_header));
126 	kth->ktr_type = type;
127 	nanotime(&kth->ktr_time);
128 	kth->ktr_pid = p->p_p->ps_pid;
129 	kth->ktr_tid = p->p_pid + THREAD_PID_OFFSET;
130 	bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN);
131 }
132 
133 void
134 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred)
135 {
136 	struct ktr_header kth;
137 
138 	bzero(&kth, sizeof (kth));
139 	kth.ktr_type = htobe32(KTR_START);
140 	nanotime(&kth.ktr_time);
141 	kth.ktr_pid = (pid_t)-1;
142 	kth.ktr_tid = (pid_t)-1;
143 	atomic_setbits_int(&p->p_flag, P_INKTR);
144 	ktrwriteraw(p, vp, cred, &kth, NULL);
145 	atomic_clearbits_int(&p->p_flag, P_INKTR);
146 }
147 
148 void
149 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[])
150 {
151 	struct	ktr_header kth;
152 	struct	ktr_syscall *ktp;
153 	size_t len = sizeof(struct ktr_syscall) + argsize;
154 	register_t *argp;
155 	u_int nargs = 0;
156 	int i;
157 
158 	if (code == SYS___sysctl && (p->p_emul->e_flags & EMUL_NATIVE)) {
159 		/*
160 		 * The native sysctl encoding stores the mib[]
161 		 * array because it is interesting.
162 		 */
163 		if (args[1] > 0)
164 			nargs = min(args[1], CTL_MAXNAME);
165 		len += nargs * sizeof(int);
166 	}
167 	atomic_setbits_int(&p->p_flag, P_INKTR);
168 	ktrinitheader(&kth, p, KTR_SYSCALL);
169 	ktp = malloc(len, M_TEMP, M_WAITOK);
170 	ktp->ktr_code = code;
171 	ktp->ktr_argsize = argsize;
172 	argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
173 	for (i = 0; i < (argsize / sizeof *argp); i++)
174 		*argp++ = args[i];
175 	if (code == SYS___sysctl && (p->p_emul->e_flags & EMUL_NATIVE) &&
176 	    nargs &&
177 	    copyin((void *)args[0], argp, nargs * sizeof(int)))
178 		bzero(argp, nargs * sizeof(int));
179 	kth.ktr_len = len;
180 	ktrwrite(p, &kth, ktp);
181 	free(ktp, M_TEMP);
182 	atomic_clearbits_int(&p->p_flag, P_INKTR);
183 }
184 
185 void
186 ktrsysret(struct proc *p, register_t code, int error, register_t retval)
187 {
188 	struct ktr_header kth;
189 	struct ktr_sysret ktp;
190 
191 	atomic_setbits_int(&p->p_flag, P_INKTR);
192 	ktrinitheader(&kth, p, KTR_SYSRET);
193 	ktp.ktr_code = code;
194 	ktp.ktr_error = error;
195 	ktp.ktr_retval = error == 0 ? retval : 0;
196 
197 	kth.ktr_len = sizeof(struct ktr_sysret);
198 
199 	ktrwrite(p, &kth, &ktp);
200 	atomic_clearbits_int(&p->p_flag, P_INKTR);
201 }
202 
203 void
204 ktrnamei(struct proc *p, char *path)
205 {
206 	struct ktr_header kth;
207 
208 	atomic_setbits_int(&p->p_flag, P_INKTR);
209 	ktrinitheader(&kth, p, KTR_NAMEI);
210 	kth.ktr_len = strlen(path);
211 
212 	ktrwrite(p, &kth, path);
213 	atomic_clearbits_int(&p->p_flag, P_INKTR);
214 }
215 
216 void
217 ktremul(struct proc *p, char *emul)
218 {
219 	struct ktr_header kth;
220 
221 	atomic_setbits_int(&p->p_flag, P_INKTR);
222 	ktrinitheader(&kth, p, KTR_EMUL);
223 	kth.ktr_len = strlen(emul);
224 
225 	ktrwrite(p, &kth, emul);
226 	atomic_clearbits_int(&p->p_flag, P_INKTR);
227 }
228 
229 void
230 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov, int len,
231     int error)
232 {
233 	struct ktr_header kth;
234 	struct ktr_genio *ktp;
235 	caddr_t cp;
236 	int resid = len, count;
237 	int buflen;
238 
239 	if (error)
240 		return;
241 
242 	atomic_setbits_int(&p->p_flag, P_INKTR);
243 
244 	buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio));
245 
246 	ktrinitheader(&kth, p, KTR_GENIO);
247 	ktp = malloc(buflen, M_TEMP, M_WAITOK);
248 	ktp->ktr_fd = fd;
249 	ktp->ktr_rw = rw;
250 
251 	cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio));
252 	buflen -= sizeof(struct ktr_genio);
253 
254 	while (resid > 0) {
255 		/*
256 		 * Don't allow this process to hog the cpu when doing
257 		 * huge I/O.
258 		 */
259 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD)
260 			preempt(NULL);
261 
262 		count = min(iov->iov_len, buflen);
263 		if (count > resid)
264 			count = resid;
265 		if (copyin(iov->iov_base, cp, count))
266 			break;
267 
268 		kth.ktr_len = count + sizeof(struct ktr_genio);
269 
270 		if (ktrwrite(p, &kth, ktp) != 0)
271 			break;
272 
273 		iov->iov_len -= count;
274 		iov->iov_base = (caddr_t)iov->iov_base + count;
275 
276 		if (iov->iov_len == 0)
277 			iov++;
278 
279 		resid -= count;
280 	}
281 
282 	free(ktp, M_TEMP);
283 	atomic_clearbits_int(&p->p_flag, P_INKTR);
284 }
285 
286 void
287 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code,
288     siginfo_t *si)
289 {
290 	struct ktr_header kth;
291 	struct ktr_psig kp;
292 
293 	atomic_setbits_int(&p->p_flag, P_INKTR);
294 	ktrinitheader(&kth, p, KTR_PSIG);
295 	kp.signo = (char)sig;
296 	kp.action = action;
297 	kp.mask = mask;
298 	kp.code = code;
299 	kp.si = *si;
300 	kth.ktr_len = sizeof(struct ktr_psig);
301 
302 	ktrwrite(p, &kth, &kp);
303 	atomic_clearbits_int(&p->p_flag, P_INKTR);
304 }
305 
306 void
307 ktrcsw(struct proc *p, int out, int user)
308 {
309 	struct ktr_header kth;
310 	struct	ktr_csw kc;
311 
312 	atomic_setbits_int(&p->p_flag, P_INKTR);
313 	ktrinitheader(&kth, p, KTR_CSW);
314 	kc.out = out;
315 	kc.user = user;
316 	kth.ktr_len = sizeof(struct ktr_csw);
317 
318 	ktrwrite(p, &kth, &kc);
319 	atomic_clearbits_int(&p->p_flag, P_INKTR);
320 }
321 
322 void
323 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen)
324 {
325 	struct ktr_header kth;
326 	void *buf;
327 	size_t buflen;
328 
329 #ifdef MULTIPROCESSOR
330 	KASSERT(__mp_lock_held(&kernel_lock) > 0);
331 #endif
332 	atomic_setbits_int(&p->p_flag, P_INKTR);
333 	ktrinitheader(&kth, p, KTR_STRUCT);
334 
335 	if (data == NULL)
336 		datalen = 0;
337 	buflen = strlen(name) + 1 + datalen;
338 	buf = malloc(buflen, M_TEMP, M_WAITOK);
339 	strlcpy(buf, name, buflen);
340 	bcopy(data, buf + strlen(name) + 1, datalen);
341 	kth.ktr_len = buflen;
342 
343 	ktrwrite(p, &kth, buf);
344 	free(buf, M_TEMP);
345 	atomic_clearbits_int(&p->p_flag, P_INKTR);
346 }
347 
348 /* Interface and common routines */
349 
350 /*
351  * ktrace system call
352  */
353 /* ARGSUSED */
354 int
355 sys_ktrace(struct proc *curp, void *v, register_t *retval)
356 {
357 	struct sys_ktrace_args /* {
358 		syscallarg(const char *) fname;
359 		syscallarg(int) ops;
360 		syscallarg(int) facs;
361 		syscallarg(pid_t) pid;
362 	} */ *uap = v;
363 	struct vnode *vp = NULL;
364 	struct proc *p = NULL;
365 	struct process *pr = NULL;
366 	struct ucred *cred = NULL;
367 	struct pgrp *pg;
368 	int facs = SCARG(uap, facs) & ~((unsigned) KTRFAC_ROOT);
369 	int ops = KTROP(SCARG(uap, ops));
370 	int descend = SCARG(uap, ops) & KTRFLAG_DESCEND;
371 	int ret = 0;
372 	int error = 0;
373 	struct nameidata nd;
374 
375 	atomic_setbits_int(&curp->p_flag, P_INKTR);
376 	if (ops != KTROP_CLEAR) {
377 		/*
378 		 * an operation which requires a file argument.
379 		 */
380 		cred = curp->p_ucred;
381 		crhold(cred);
382 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
383 		    curp);
384 		if ((error = vn_open(&nd, FREAD|FWRITE|O_NOFOLLOW, 0)) != 0)
385 			goto done;
386 		vp = nd.ni_vp;
387 
388 		VOP_UNLOCK(vp, 0, curp);
389 		if (vp->v_type != VREG) {
390 			error = EACCES;
391 			goto done;
392 		}
393 	}
394 	/*
395 	 * Clear all uses of the tracefile
396 	 */
397 	if (ops == KTROP_CLEARFILE) {
398 		LIST_FOREACH(p, &allproc, p_list) {
399 			if (p->p_p->ps_tracevp == vp) {
400 				if (ktrcanset(curp, p->p_p))
401 					ktrcleartrace(p->p_p);
402 				else
403 					error = EPERM;
404 			}
405 		}
406 		goto done;
407 	}
408 	/*
409 	 * need something to (un)trace (XXX - why is this here?)
410 	 */
411 	if (!facs) {
412 		error = EINVAL;
413 		goto done;
414 	}
415 	if (ops == KTROP_SET) {
416 		if (suser(curp, 0) == 0)
417 			facs |= KTRFAC_ROOT;
418 		ktrstart(curp, vp, cred);
419 	}
420 	/*
421 	 * do it
422 	 */
423 	if (SCARG(uap, pid) < 0) {
424 		/*
425 		 * by process group
426 		 */
427 		pg = pgfind(-SCARG(uap, pid));
428 		if (pg == NULL) {
429 			error = ESRCH;
430 			goto done;
431 		}
432 		LIST_FOREACH(pr, &pg->pg_members, ps_pglist) {
433 			if (descend)
434 				ret |= ktrsetchildren(curp, pr, ops, facs, vp,
435 				    cred);
436 			else
437 				ret |= ktrops(curp, pr, ops, facs, vp, cred);
438 		}
439 	} else {
440 		/*
441 		 * by pid
442 		 */
443 		pr = prfind(SCARG(uap, pid));
444 		if (pr == NULL) {
445 			error = ESRCH;
446 			goto done;
447 		}
448 		if (descend)
449 			ret |= ktrsetchildren(curp, pr, ops, facs, vp, cred);
450 		else
451 			ret |= ktrops(curp, pr, ops, facs, vp, cred);
452 	}
453 	if (!ret)
454 		error = EPERM;
455 done:
456 	if (vp != NULL)
457 		(void) vn_close(vp, FREAD|FWRITE, cred, curp);
458 	if (cred != NULL)
459 		crfree(cred);
460 	atomic_clearbits_int(&curp->p_flag, P_INKTR);
461 	return (error);
462 }
463 
464 int
465 ktrops(struct proc *curp, struct process *pr, int ops, int facs,
466     struct vnode *vp, struct ucred *cred)
467 {
468 	struct proc *p;
469 
470 	if (!ktrcanset(curp, pr))
471 		return (0);
472 	if (ops == KTROP_SET)
473 		ktrsettrace(pr, facs, vp, cred);
474 	else {
475 		/* KTROP_CLEAR */
476 		pr->ps_traceflag &= ~facs;
477 		if ((pr->ps_traceflag & KTRFAC_MASK) == 0) {
478 			/* cleared all the facility bits, so stop completely */
479 			ktrcleartrace(pr);
480 		}
481 	}
482 
483 	/*
484 	 * Emit an emulation record, every time there is a ktrace
485 	 * change/attach request.
486 	 * XXX an EMUL record for each thread?  Perhaps should have
487 	 * XXX a record type to say "this pid is really a thread of this
488 	 * XXX other pid" and only generate an EMUL record for the main pid
489 	 */
490 	TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link)
491 		if (KTRPOINT(p, KTR_EMUL))
492 			ktremul(p, p->p_emul->e_name);
493 
494 	return (1);
495 }
496 
497 int
498 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs,
499     struct vnode *vp, struct ucred *cred)
500 {
501 	struct process *pr;
502 	int ret = 0;
503 
504 	pr = top;
505 	for (;;) {
506 		ret |= ktrops(curp, pr, ops, facs, vp, cred);
507 		/*
508 		 * If this process has children, descend to them next,
509 		 * otherwise do any siblings, and if done with this level,
510 		 * follow back up the tree (but not past top).
511 		 */
512 		if (!LIST_EMPTY(&pr->ps_children))
513 			pr = LIST_FIRST(&pr->ps_children);
514 		else for (;;) {
515 			if (pr == top)
516 				return (ret);
517 			if (LIST_NEXT(pr, ps_sibling) != NULL) {
518 				pr = LIST_NEXT(pr, ps_sibling);
519 				break;
520 			}
521 			pr = pr->ps_pptr;
522 		}
523 	}
524 	/*NOTREACHED*/
525 }
526 
527 int
528 ktrwrite(struct proc *p, struct ktr_header *kth, void *aux)
529 {
530 	struct vnode *vp = p->p_p->ps_tracevp;
531 	struct ucred *cred = p->p_p->ps_tracecred;
532 	int error;
533 
534 	if (vp == NULL)
535 		return 0;
536 	crhold(cred);
537 	error = ktrwriteraw(p, vp, cred, kth, aux);
538 	crfree(cred);
539 	return (error);
540 }
541 
542 int
543 ktrwriteraw(struct proc *p, struct vnode *vp, struct ucred *cred,
544     struct ktr_header *kth, void *aux)
545 {
546 	struct uio auio;
547 	struct iovec aiov[2];
548 	int error;
549 
550 	auio.uio_iov = &aiov[0];
551 	auio.uio_offset = 0;
552 	auio.uio_segflg = UIO_SYSSPACE;
553 	auio.uio_rw = UIO_WRITE;
554 	aiov[0].iov_base = (caddr_t)kth;
555 	aiov[0].iov_len = sizeof(struct ktr_header);
556 	auio.uio_resid = sizeof(struct ktr_header);
557 	auio.uio_iovcnt = 1;
558 	auio.uio_procp = p;
559 	if (kth->ktr_len > 0) {
560 		auio.uio_iovcnt++;
561 		aiov[1].iov_base = aux;
562 		aiov[1].iov_len = kth->ktr_len;
563 		auio.uio_resid += kth->ktr_len;
564 	}
565 	vget(vp, LK_EXCLUSIVE | LK_RETRY, p);
566 	error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred);
567 	if (!error) {
568 		vput(vp);
569 		return (0);
570 	}
571 	/*
572 	 * If error encountered, give up tracing on this vnode.
573 	 */
574 	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
575 	    error);
576 	LIST_FOREACH(p, &allproc, p_list)
577 		if (p->p_p->ps_tracevp == vp && p->p_p->ps_tracecred == cred)
578 			ktrcleartrace(p->p_p);
579 
580 	vput(vp);
581 	return (error);
582 }
583 
584 /*
585  * Return true if caller has permission to set the ktracing state
586  * of target.  Essentially, the target can't possess any
587  * more permissions than the caller.  KTRFAC_ROOT signifies that
588  * root previously set the tracing status on the target process, and
589  * so, only root may further change it.
590  *
591  * TODO: check groups.  use caller effective gid.
592  */
593 int
594 ktrcanset(struct proc *callp, struct process *targetpr)
595 {
596 	struct pcred *caller = callp->p_cred;
597 	struct pcred *target = targetpr->ps_cred;
598 
599 	if ((caller->pc_ucred->cr_uid == target->p_ruid &&
600 	    target->p_ruid == target->p_svuid &&
601 	    caller->p_rgid == target->p_rgid &&	/* XXX */
602 	    target->p_rgid == target->p_svgid &&
603 	    (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 &&
604 	    !ISSET(targetpr->ps_flags, PS_SUGID)) ||
605 	    caller->pc_ucred->cr_uid == 0)
606 		return (1);
607 
608 	return (0);
609 }
610 
611 #endif
612