xref: /openbsd-src/sys/kern/kern_ktrace.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: kern_ktrace.c,v 1.69 2014/07/13 15:46:21 uebayasi Exp $	*/
2 /*	$NetBSD: kern_ktrace.c,v 1.23 1996/02/09 18:59:36 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/sched.h>
39 #include <sys/file.h>
40 #include <sys/namei.h>
41 #include <sys/vnode.h>
42 #include <sys/ktrace.h>
43 #include <sys/malloc.h>
44 #include <sys/syslog.h>
45 #include <sys/sysctl.h>
46 
47 #include <sys/mount.h>
48 #include <sys/syscall.h>
49 #include <sys/syscallargs.h>
50 
51 void	ktrinitheaderraw(struct ktr_header *, uint, pid_t, pid_t);
52 void	ktrinitheader(struct ktr_header *, struct proc *, int);
53 void	ktrstart(struct proc *, struct vnode *, struct ucred *);
54 void	ktremulraw(struct proc *, struct process *, pid_t);
55 int	ktrops(struct proc *, struct process *, int, int, struct vnode *,
56 	    struct ucred *);
57 int	ktrsetchildren(struct proc *, struct process *, int, int,
58 	    struct vnode *, struct ucred *);
59 int	ktrwrite(struct proc *, struct ktr_header *, void *);
60 int	ktrwriteraw(struct proc *, struct vnode *, struct ucred *,
61 	    struct ktr_header *, void *);
62 int	ktrcanset(struct proc *, struct process *);
63 
64 /*
65  * Clear the trace settings in a correct way (to avoid races).
66  */
67 void
68 ktrcleartrace(struct process *pr)
69 {
70 	struct vnode *vp;
71 	struct ucred *cred;
72 
73 	if (pr->ps_tracevp != NULL) {
74 		vp = pr->ps_tracevp;
75 		cred = pr->ps_tracecred;
76 
77 		pr->ps_traceflag = 0;
78 		pr->ps_tracevp = NULL;
79 		pr->ps_tracecred = NULL;
80 
81 		vrele(vp);
82 		crfree(cred);
83 	}
84 }
85 
86 /*
87  * Change the trace setting in a correct way (to avoid races).
88  */
89 void
90 ktrsettrace(struct process *pr, int facs, struct vnode *newvp,
91     struct ucred *newcred)
92 {
93 	struct vnode *oldvp;
94 	struct ucred *oldcred;
95 
96 	KASSERT(newvp != NULL);
97 	KASSERT(newcred != NULL);
98 
99 	pr->ps_traceflag |= facs;
100 
101 	/* nothing to change about where the trace goes? */
102 	if (pr->ps_tracevp == newvp && pr->ps_tracecred == newcred)
103 		return;
104 
105 	vref(newvp);
106 	crhold(newcred);
107 
108 	oldvp = pr->ps_tracevp;
109 	oldcred = pr->ps_tracecred;
110 
111 	pr->ps_tracevp = newvp;
112 	pr->ps_tracecred = newcred;
113 
114 	if (oldvp != NULL) {
115 		vrele(oldvp);
116 		crfree(oldcred);
117 	}
118 }
119 
120 void
121 ktrinitheaderraw(struct ktr_header *kth, uint type, pid_t pid, pid_t tid)
122 {
123 	memset(kth, 0, sizeof(struct ktr_header));
124 	kth->ktr_type = type;
125 	nanotime(&kth->ktr_time);
126 	kth->ktr_pid = pid;
127 	kth->ktr_tid = tid;
128 }
129 
130 void
131 ktrinitheader(struct ktr_header *kth, struct proc *p, int type)
132 {
133 	ktrinitheaderraw(kth, type, p->p_p->ps_pid,
134 	    p->p_pid + THREAD_PID_OFFSET);
135 	bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN);
136 }
137 
138 void
139 ktrstart(struct proc *p, struct vnode *vp, struct ucred *cred)
140 {
141 	struct ktr_header kth;
142 
143 	ktrinitheaderraw(&kth, htobe32(KTR_START), -1, -1);
144 	ktrwriteraw(p, vp, cred, &kth, NULL);
145 }
146 
147 void
148 ktrsyscall(struct proc *p, register_t code, size_t argsize, register_t args[])
149 {
150 	struct	ktr_header kth;
151 	struct	ktr_syscall *ktp;
152 	size_t len = sizeof(struct ktr_syscall) + argsize;
153 	register_t *argp;
154 	u_int nargs = 0;
155 	int i;
156 
157 	if (code == SYS___sysctl && (p->p_p->ps_emul->e_flags & EMUL_NATIVE)) {
158 		/*
159 		 * The native sysctl encoding stores the mib[]
160 		 * array because it is interesting.
161 		 */
162 		if (args[1] > 0)
163 			nargs = lmin(args[1], CTL_MAXNAME);
164 		len += nargs * sizeof(int);
165 	}
166 	atomic_setbits_int(&p->p_flag, P_INKTR);
167 	ktrinitheader(&kth, p, KTR_SYSCALL);
168 	ktp = malloc(len, M_TEMP, M_WAITOK);
169 	ktp->ktr_code = code;
170 	ktp->ktr_argsize = argsize;
171 	argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
172 	for (i = 0; i < (argsize / sizeof *argp); i++)
173 		*argp++ = args[i];
174 	if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int)))
175 		memset(argp, 0, nargs * sizeof(int));
176 	kth.ktr_len = len;
177 	ktrwrite(p, &kth, ktp);
178 	free(ktp, M_TEMP, len);
179 	atomic_clearbits_int(&p->p_flag, P_INKTR);
180 }
181 
182 void
183 ktrsysret(struct proc *p, register_t code, int error, register_t retval)
184 {
185 	struct ktr_header kth;
186 	struct ktr_sysret ktp;
187 
188 	atomic_setbits_int(&p->p_flag, P_INKTR);
189 	ktrinitheader(&kth, p, KTR_SYSRET);
190 	ktp.ktr_code = code;
191 	ktp.ktr_error = error;
192 	ktp.ktr_retval = error == 0 ? retval : 0;
193 
194 	kth.ktr_len = sizeof(struct ktr_sysret);
195 
196 	ktrwrite(p, &kth, &ktp);
197 	atomic_clearbits_int(&p->p_flag, P_INKTR);
198 }
199 
200 void
201 ktrnamei(struct proc *p, char *path)
202 {
203 	struct ktr_header kth;
204 
205 	atomic_setbits_int(&p->p_flag, P_INKTR);
206 	ktrinitheader(&kth, p, KTR_NAMEI);
207 	kth.ktr_len = strlen(path);
208 
209 	ktrwrite(p, &kth, path);
210 	atomic_clearbits_int(&p->p_flag, P_INKTR);
211 }
212 
213 void
214 ktremulraw(struct proc *curp, struct process *pr, pid_t tid)
215 {
216 	struct ktr_header kth;
217 	char *emul = pr->ps_emul->e_name;
218 
219 	ktrinitheaderraw(&kth, KTR_EMUL, pr->ps_pid, tid);
220 	kth.ktr_len = strlen(emul);
221 
222 	ktrwriteraw(curp, pr->ps_tracevp, pr->ps_tracecred, &kth, emul);
223 }
224 
225 void
226 ktremul(struct proc *p)
227 {
228 	atomic_setbits_int(&p->p_flag, P_INKTR);
229 	ktremulraw(p, p->p_p, p->p_pid + THREAD_PID_OFFSET);
230 	atomic_clearbits_int(&p->p_flag, P_INKTR);
231 }
232 
233 void
234 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov,
235     ssize_t len)
236 {
237 	struct ktr_header kth;
238 	struct ktr_genio *ktp;
239 	caddr_t cp;
240 	int count;
241 	int mlen, buflen;
242 
243 	atomic_setbits_int(&p->p_flag, P_INKTR);
244 
245 	/* beware overflow */
246 	if (len > PAGE_SIZE - sizeof(struct ktr_genio))
247 		buflen = PAGE_SIZE;
248 	else
249 		buflen = len + sizeof(struct ktr_genio);
250 
251 	ktrinitheader(&kth, p, KTR_GENIO);
252 	mlen = buflen;
253 	ktp = malloc(mlen, M_TEMP, M_WAITOK);
254 	ktp->ktr_fd = fd;
255 	ktp->ktr_rw = rw;
256 
257 	cp = (caddr_t)((char *)ktp + sizeof (struct ktr_genio));
258 	buflen -= sizeof(struct ktr_genio);
259 
260 	while (len > 0) {
261 		/*
262 		 * Don't allow this process to hog the cpu when doing
263 		 * huge I/O.
264 		 */
265 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD)
266 			preempt(NULL);
267 
268 		count = lmin(iov->iov_len, buflen);
269 		if (count > len)
270 			count = len;
271 		if (copyin(iov->iov_base, cp, count))
272 			break;
273 
274 		kth.ktr_len = count + sizeof(struct ktr_genio);
275 
276 		if (ktrwrite(p, &kth, ktp) != 0)
277 			break;
278 
279 		iov->iov_len -= count;
280 		iov->iov_base = (caddr_t)iov->iov_base + count;
281 
282 		if (iov->iov_len == 0)
283 			iov++;
284 
285 		len -= count;
286 	}
287 
288 	free(ktp, M_TEMP, mlen);
289 	atomic_clearbits_int(&p->p_flag, P_INKTR);
290 }
291 
292 void
293 ktrpsig(struct proc *p, int sig, sig_t action, int mask, int code,
294     siginfo_t *si)
295 {
296 	struct ktr_header kth;
297 	struct ktr_psig kp;
298 
299 	atomic_setbits_int(&p->p_flag, P_INKTR);
300 	ktrinitheader(&kth, p, KTR_PSIG);
301 	kp.signo = (char)sig;
302 	kp.action = action;
303 	kp.mask = mask;
304 	kp.code = code;
305 	kp.si = *si;
306 	kth.ktr_len = sizeof(struct ktr_psig);
307 
308 	ktrwrite(p, &kth, &kp);
309 	atomic_clearbits_int(&p->p_flag, P_INKTR);
310 }
311 
312 void
313 ktrcsw(struct proc *p, int out, int user)
314 {
315 	struct ktr_header kth;
316 	struct	ktr_csw kc;
317 
318 	atomic_setbits_int(&p->p_flag, P_INKTR);
319 	ktrinitheader(&kth, p, KTR_CSW);
320 	kc.out = out;
321 	kc.user = user;
322 	kth.ktr_len = sizeof(struct ktr_csw);
323 
324 	ktrwrite(p, &kth, &kc);
325 	atomic_clearbits_int(&p->p_flag, P_INKTR);
326 }
327 
328 void
329 ktrstruct(struct proc *p, const char *name, const void *data, size_t datalen)
330 {
331 	struct ktr_header kth;
332 	void *buf;
333 	size_t buflen;
334 
335 	KERNEL_ASSERT_LOCKED();
336 	atomic_setbits_int(&p->p_flag, P_INKTR);
337 	ktrinitheader(&kth, p, KTR_STRUCT);
338 
339 	if (data == NULL)
340 		datalen = 0;
341 	buflen = strlen(name) + 1 + datalen;
342 	buf = malloc(buflen, M_TEMP, M_WAITOK);
343 	strlcpy(buf, name, buflen);
344 	bcopy(data, buf + strlen(name) + 1, datalen);
345 	kth.ktr_len = buflen;
346 
347 	ktrwrite(p, &kth, buf);
348 	free(buf, M_TEMP, buflen);
349 	atomic_clearbits_int(&p->p_flag, P_INKTR);
350 }
351 
352 int
353 ktruser(struct proc *p, const char *id, const void *addr, size_t len)
354 {
355 	struct ktr_header kth;
356 	struct ktr_user *ktp;
357 	int error;
358 	void *memp;
359 	size_t size;
360 #define	STK_PARAMS	128
361 	long long stkbuf[STK_PARAMS / sizeof(long long)];
362 
363 	if (!KTRPOINT(p, KTR_USER))
364 		return (0);
365 	if (len > KTR_USER_MAXLEN)
366 		return EINVAL;
367 
368 	atomic_setbits_int(&p->p_flag, P_INKTR);
369 	ktrinitheader(&kth, p, KTR_USER);
370 	size = sizeof(*ktp) + len;
371 	memp = NULL;
372 	if (size > sizeof(stkbuf)) {
373 		memp = malloc(sizeof(*ktp) + len, M_TEMP, M_WAITOK);
374 		ktp = (struct ktr_user *)memp;
375 	} else
376 		ktp = (struct ktr_user *)stkbuf;
377 	memset(ktp->ktr_id, 0, KTR_USER_MAXIDLEN);
378 	error = copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL);
379 	if (error)
380 	    goto out;
381 
382 	error = copyin(addr, (void *)(ktp + 1), len);
383 	if (error)
384 		goto out;
385 	kth.ktr_len = sizeof(*ktp) + len;
386 	ktrwrite(p, &kth, ktp);
387 out:
388 	if (memp != NULL)
389 		free(memp, M_TEMP, sizeof(*ktp) + len);
390 	atomic_clearbits_int(&p->p_flag, P_INKTR);
391 	return (error);
392 }
393 
394 
395 /* Interface and common routines */
396 
397 /*
398  * ktrace system call
399  */
400 /* ARGSUSED */
401 int
402 sys_ktrace(struct proc *curp, void *v, register_t *retval)
403 {
404 	struct sys_ktrace_args /* {
405 		syscallarg(const char *) fname;
406 		syscallarg(int) ops;
407 		syscallarg(int) facs;
408 		syscallarg(pid_t) pid;
409 	} */ *uap = v;
410 	struct vnode *vp = NULL;
411 	struct process *pr = NULL;
412 	struct ucred *cred = NULL;
413 	struct pgrp *pg;
414 	int facs = SCARG(uap, facs) & ~((unsigned) KTRFAC_ROOT);
415 	int ops = KTROP(SCARG(uap, ops));
416 	int descend = SCARG(uap, ops) & KTRFLAG_DESCEND;
417 	int ret = 0;
418 	int error = 0;
419 	struct nameidata nd;
420 
421 	if (ops != KTROP_CLEAR) {
422 		/*
423 		 * an operation which requires a file argument.
424 		 */
425 		cred = curp->p_ucred;
426 		crhold(cred);
427 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
428 		    curp);
429 		if ((error = vn_open(&nd, FREAD|FWRITE|O_NOFOLLOW, 0)) != 0)
430 			goto done;
431 		vp = nd.ni_vp;
432 
433 		VOP_UNLOCK(vp, 0, curp);
434 		if (vp->v_type != VREG) {
435 			error = EACCES;
436 			goto done;
437 		}
438 	}
439 	/*
440 	 * Clear all uses of the tracefile
441 	 */
442 	if (ops == KTROP_CLEARFILE) {
443 		LIST_FOREACH(pr, &allprocess, ps_list) {
444 			if (pr->ps_tracevp == vp) {
445 				if (ktrcanset(curp, pr))
446 					ktrcleartrace(pr);
447 				else
448 					error = EPERM;
449 			}
450 		}
451 		goto done;
452 	}
453 	/*
454 	 * need something to (un)trace (XXX - why is this here?)
455 	 */
456 	if (!facs) {
457 		error = EINVAL;
458 		goto done;
459 	}
460 	if (ops == KTROP_SET) {
461 		if (suser(curp, 0) == 0)
462 			facs |= KTRFAC_ROOT;
463 		ktrstart(curp, vp, cred);
464 	}
465 	/*
466 	 * do it
467 	 */
468 	if (SCARG(uap, pid) < 0) {
469 		/*
470 		 * by process group
471 		 */
472 		pg = pgfind(-SCARG(uap, pid));
473 		if (pg == NULL) {
474 			error = ESRCH;
475 			goto done;
476 		}
477 		LIST_FOREACH(pr, &pg->pg_members, ps_pglist) {
478 			if (descend)
479 				ret |= ktrsetchildren(curp, pr, ops, facs, vp,
480 				    cred);
481 			else
482 				ret |= ktrops(curp, pr, ops, facs, vp, cred);
483 		}
484 	} else {
485 		/*
486 		 * by pid
487 		 */
488 		pr = prfind(SCARG(uap, pid));
489 		if (pr == NULL) {
490 			error = ESRCH;
491 			goto done;
492 		}
493 		if (descend)
494 			ret |= ktrsetchildren(curp, pr, ops, facs, vp, cred);
495 		else
496 			ret |= ktrops(curp, pr, ops, facs, vp, cred);
497 	}
498 	if (!ret)
499 		error = EPERM;
500 done:
501 	if (vp != NULL)
502 		(void) vn_close(vp, FREAD|FWRITE, cred, curp);
503 	if (cred != NULL)
504 		crfree(cred);
505 	return (error);
506 }
507 
508 int
509 ktrops(struct proc *curp, struct process *pr, int ops, int facs,
510     struct vnode *vp, struct ucred *cred)
511 {
512 	if (!ktrcanset(curp, pr))
513 		return (0);
514 	if (ops == KTROP_SET)
515 		ktrsettrace(pr, facs, vp, cred);
516 	else {
517 		/* KTROP_CLEAR */
518 		pr->ps_traceflag &= ~facs;
519 		if ((pr->ps_traceflag & KTRFAC_MASK) == 0) {
520 			/* cleared all the facility bits, so stop completely */
521 			ktrcleartrace(pr);
522 		}
523 	}
524 
525 	/*
526 	 * Emit an emulation record every time there is a ktrace
527 	 * change/attach request.
528 	 */
529 	if (pr->ps_traceflag & KTRFAC_EMUL)
530 		ktremulraw(curp, pr, -1);
531 
532 	return (1);
533 }
534 
535 int
536 ktrsetchildren(struct proc *curp, struct process *top, int ops, int facs,
537     struct vnode *vp, struct ucred *cred)
538 {
539 	struct process *pr;
540 	int ret = 0;
541 
542 	pr = top;
543 	for (;;) {
544 		ret |= ktrops(curp, pr, ops, facs, vp, cred);
545 		/*
546 		 * If this process has children, descend to them next,
547 		 * otherwise do any siblings, and if done with this level,
548 		 * follow back up the tree (but not past top).
549 		 */
550 		if (!LIST_EMPTY(&pr->ps_children))
551 			pr = LIST_FIRST(&pr->ps_children);
552 		else for (;;) {
553 			if (pr == top)
554 				return (ret);
555 			if (LIST_NEXT(pr, ps_sibling) != NULL) {
556 				pr = LIST_NEXT(pr, ps_sibling);
557 				break;
558 			}
559 			pr = pr->ps_pptr;
560 		}
561 	}
562 	/*NOTREACHED*/
563 }
564 
565 int
566 ktrwrite(struct proc *p, struct ktr_header *kth, void *aux)
567 {
568 	struct vnode *vp = p->p_p->ps_tracevp;
569 	struct ucred *cred = p->p_p->ps_tracecred;
570 	int error;
571 
572 	if (vp == NULL)
573 		return 0;
574 	crhold(cred);
575 	error = ktrwriteraw(p, vp, cred, kth, aux);
576 	crfree(cred);
577 	return (error);
578 }
579 
580 int
581 ktrwriteraw(struct proc *curp, struct vnode *vp, struct ucred *cred,
582     struct ktr_header *kth, void *aux)
583 {
584 	struct uio auio;
585 	struct iovec aiov[2];
586 	struct process *pr;
587 	int error;
588 
589 	auio.uio_iov = &aiov[0];
590 	auio.uio_offset = 0;
591 	auio.uio_segflg = UIO_SYSSPACE;
592 	auio.uio_rw = UIO_WRITE;
593 	aiov[0].iov_base = (caddr_t)kth;
594 	aiov[0].iov_len = sizeof(struct ktr_header);
595 	auio.uio_resid = sizeof(struct ktr_header);
596 	auio.uio_iovcnt = 1;
597 	auio.uio_procp = curp;
598 	if (kth->ktr_len > 0) {
599 		auio.uio_iovcnt++;
600 		aiov[1].iov_base = aux;
601 		aiov[1].iov_len = kth->ktr_len;
602 		auio.uio_resid += kth->ktr_len;
603 	}
604 	vget(vp, LK_EXCLUSIVE | LK_RETRY, curp);
605 	error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, cred);
606 	if (!error) {
607 		vput(vp);
608 		return (0);
609 	}
610 	/*
611 	 * If error encountered, give up tracing on this vnode.
612 	 */
613 	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
614 	    error);
615 	LIST_FOREACH(pr, &allprocess, ps_list)
616 		if (pr->ps_tracevp == vp && pr->ps_tracecred == cred)
617 			ktrcleartrace(pr);
618 
619 	vput(vp);
620 	return (error);
621 }
622 
623 /*
624  * Return true if caller has permission to set the ktracing state
625  * of target.  Essentially, the target can't possess any
626  * more permissions than the caller.  KTRFAC_ROOT signifies that
627  * root previously set the tracing status on the target process, and
628  * so, only root may further change it.
629  *
630  * TODO: check groups.  use caller effective gid.
631  */
632 int
633 ktrcanset(struct proc *callp, struct process *targetpr)
634 {
635 	struct ucred *caller = callp->p_ucred;
636 	struct ucred *target = targetpr->ps_ucred;
637 
638 	if ((caller->cr_uid == target->cr_ruid &&
639 	    target->cr_ruid == target->cr_svuid &&
640 	    caller->cr_rgid == target->cr_rgid &&	/* XXX */
641 	    target->cr_rgid == target->cr_svgid &&
642 	    (targetpr->ps_traceflag & KTRFAC_ROOT) == 0 &&
643 	    !ISSET(targetpr->ps_flags, PS_SUGID)) ||
644 	    caller->cr_uid == 0)
645 		return (1);
646 
647 	return (0);
648 }
649