xref: /netbsd-src/sys/kern/kern_ktrace.c (revision de1dfb1250df962f1ff3a011772cf58e605aed11)
1 /*	$NetBSD: kern_ktrace.c,v 1.92 2004/09/04 07:09:35 skrll Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)kern_ktrace.c	8.5 (Berkeley) 5/14/95
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.92 2004/09/04 07:09:35 skrll Exp $");
36 
37 #include "opt_ktrace.h"
38 #include "opt_compat_mach.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/namei.h>
45 #include <sys/vnode.h>
46 #include <sys/ktrace.h>
47 #include <sys/malloc.h>
48 #include <sys/syslog.h>
49 #include <sys/filedesc.h>
50 #include <sys/ioctl.h>
51 
52 #include <sys/mount.h>
53 #include <sys/sa.h>
54 #include <sys/syscallargs.h>
55 
56 #ifdef KTRACE
57 
58 void	ktrinitheader(struct ktr_header *, struct proc *, int);
59 int	ktrwrite(struct proc *, struct ktr_header *);
60 int	ktrace_common(struct proc *, int, int, int, struct file *);
61 int	ktrops(struct proc *, struct proc *, int, int, struct file *);
62 int	ktrsetchildren(struct proc *, struct proc *, int, int,
63 	    struct file *);
64 int	ktrcanset(struct proc *, struct proc *);
65 int	ktrsamefile(struct file *, struct file *);
66 
67 /*
68  * "deep" compare of two files for the purposes of clearing a trace.
69  * Returns true if they're the same open file, or if they point at the
70  * same underlying vnode/socket.
71  */
72 
73 int
74 ktrsamefile(struct file *f1, struct file *f2)
75 {
76 
77 	return ((f1 == f2) ||
78 	    ((f1 != NULL) && (f2 != NULL) &&
79 		(f1->f_type == f2->f_type) &&
80 		(f1->f_data == f2->f_data)));
81 }
82 
83 void
84 ktrderef(struct proc *p)
85 {
86 	struct file *fp = p->p_tracep;
87 	p->p_traceflag = 0;
88 	if (fp == NULL)
89 		return;
90 	p->p_tracep = NULL;
91 
92 	simple_lock(&fp->f_slock);
93 	FILE_USE(fp);
94 
95 	/*
96 	 * ktrace file descriptor can't be watched (are not visible to
97 	 * userspace), so no kqueue stuff here.
98 	 * XXX: The above comment is wrong, because the fktrace file
99 	 * descriptor is available in userland.
100 	 */
101 	closef(fp, NULL);
102 }
103 
104 void
105 ktradref(struct proc *p)
106 {
107 	struct file *fp = p->p_tracep;
108 
109 	fp->f_count++;
110 }
111 
112 void
113 ktrinitheader(struct ktr_header *kth, struct proc *p, int type)
114 {
115 
116 	(void)memset(kth, 0, sizeof(*kth));
117 	kth->ktr_type = type;
118 	microtime(&kth->ktr_time);
119 	kth->ktr_pid = p->p_pid;
120 	memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN);
121 }
122 
123 int
124 ktrsyscall(struct proc *p, register_t code, register_t realcode,
125     const struct sysent *callp, register_t args[])
126 {
127 	struct ktr_header kth;
128 	struct ktr_syscall *ktp;
129 	register_t *argp;
130 	int argsize, error;
131 	size_t len;
132 	u_int i;
133 
134 	if (callp == NULL)
135 		callp = p->p_emul->e_sysent;
136 
137 	argsize = callp[code].sy_argsize;
138 #ifdef _LP64
139 	if (p->p_flag & P_32)
140 		argsize = argsize << 1;
141 #endif
142 	len = sizeof(struct ktr_syscall) + argsize;
143 
144 	p->p_traceflag |= KTRFAC_ACTIVE;
145 	ktrinitheader(&kth, p, KTR_SYSCALL);
146 	ktp = malloc(len, M_TEMP, M_WAITOK);
147 	ktp->ktr_code = realcode;
148 	ktp->ktr_argsize = argsize;
149 	argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
150 	for (i = 0; i < (argsize / sizeof(*argp)); i++)
151 		*argp++ = args[i];
152 	kth.ktr_buf = (caddr_t)ktp;
153 	kth.ktr_len = len;
154 	error = ktrwrite(p, &kth);
155 	free(ktp, M_TEMP);
156 	p->p_traceflag &= ~KTRFAC_ACTIVE;
157 	return error;
158 }
159 
160 int
161 ktrsysret(struct proc *p, register_t code, int error, register_t *retval)
162 {
163 	struct ktr_header kth;
164 	struct ktr_sysret ktp;
165 
166 	p->p_traceflag |= KTRFAC_ACTIVE;
167 	ktrinitheader(&kth, p, KTR_SYSRET);
168 	ktp.ktr_code = code;
169 	ktp.ktr_eosys = 0;			/* XXX unused */
170 	ktp.ktr_error = error;
171 	ktp.ktr_retval = retval ? retval[0] : 0;
172 	ktp.ktr_retval_1 = retval ? retval[1] : 0;
173 
174 	kth.ktr_buf = (caddr_t)&ktp;
175 	kth.ktr_len = sizeof(struct ktr_sysret);
176 
177 	error = ktrwrite(p, &kth);
178 	p->p_traceflag &= ~KTRFAC_ACTIVE;
179 	return error;
180 }
181 
182 int
183 ktrnamei(struct proc *p, char *path)
184 {
185 	struct ktr_header kth;
186 	int error;
187 
188 	p->p_traceflag |= KTRFAC_ACTIVE;
189 	ktrinitheader(&kth, p, KTR_NAMEI);
190 	kth.ktr_len = strlen(path);
191 	kth.ktr_buf = path;
192 
193 	error = ktrwrite(p, &kth);
194 	p->p_traceflag &= ~KTRFAC_ACTIVE;
195 	return error;
196 }
197 
198 int
199 ktremul(struct proc *p)
200 {
201 	struct ktr_header kth;
202 	const char *emul = p->p_emul->e_name;
203 	int error;
204 
205 	p->p_traceflag |= KTRFAC_ACTIVE;
206 	ktrinitheader(&kth, p, KTR_EMUL);
207 	kth.ktr_len = strlen(emul);
208 	kth.ktr_buf = (caddr_t)emul;
209 
210 	error = ktrwrite(p, &kth);
211 	p->p_traceflag &= ~KTRFAC_ACTIVE;
212 	return error;
213 }
214 
215 int
216 ktrkmem(struct proc *p, int ktr, const void *buf, size_t len)
217 {
218 	struct ktr_header kth;
219 	int error;
220 
221 	p->p_traceflag |= KTRFAC_ACTIVE;
222 	ktrinitheader(&kth, p, ktr);
223 	kth.ktr_len = len;
224 	kth.ktr_buf = buf;
225 
226 	error = ktrwrite(p, &kth);
227 	p->p_traceflag &= ~KTRFAC_ACTIVE;
228 	return error;
229 }
230 
231 int
232 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov,
233     int len, int error)
234 {
235 	struct ktr_header kth;
236 	struct ktr_genio *ktp;
237 	caddr_t cp;
238 	int resid = len, cnt;
239 	int buflen;
240 
241 	if (error)
242 		return error;
243 
244 	p->p_traceflag |= KTRFAC_ACTIVE;
245 
246 	buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio));
247 
248 	ktrinitheader(&kth, p, KTR_GENIO);
249 	ktp = malloc(buflen, M_TEMP, M_WAITOK);
250 	ktp->ktr_fd = fd;
251 	ktp->ktr_rw = rw;
252 
253 	kth.ktr_buf = (caddr_t)ktp;
254 
255 	cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio));
256 	buflen -= sizeof(struct ktr_genio);
257 
258 	while (resid > 0) {
259 #if 0 /* XXX NJWLWP */
260 		KDASSERT(p->p_cpu != NULL);
261 		KDASSERT(p->p_cpu == curcpu());
262 #endif
263 		/* XXX NJWLWP */
264 		if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
265 			preempt(1);
266 
267 		cnt = min(iov->iov_len, buflen);
268 		if (cnt > resid)
269 			cnt = resid;
270 		if ((error = copyin(iov->iov_base, cp, cnt)) != 0)
271 			break;
272 
273 		kth.ktr_len = cnt + sizeof(struct ktr_genio);
274 
275 		error = ktrwrite(p, &kth);
276 		if (__predict_false(error != 0))
277 			break;
278 
279 		iov->iov_base = (caddr_t)iov->iov_base + cnt;
280 		iov->iov_len -= cnt;
281 
282 		if (iov->iov_len == 0)
283 			iov++;
284 
285 		resid -= cnt;
286 	}
287 
288 	free(ktp, M_TEMP);
289 	p->p_traceflag &= ~KTRFAC_ACTIVE;
290 	return error;
291 }
292 
293 int
294 ktrpsig(struct proc *p, int sig, sig_t action, const sigset_t *mask,
295     const ksiginfo_t *ksi)
296 {
297 	int error;
298 
299 	struct ktr_header kth;
300 	struct {
301 		struct ktr_psig	kp;
302 		siginfo_t	si;
303 	} kbuf;
304 
305 	p->p_traceflag |= KTRFAC_ACTIVE;
306 	ktrinitheader(&kth, p, KTR_PSIG);
307 	kbuf.kp.signo = (char)sig;
308 	kbuf.kp.action = action;
309 	kbuf.kp.mask = *mask;
310 	kth.ktr_buf = (caddr_t)&kbuf;
311 	if (ksi) {
312 		kbuf.kp.code = KSI_TRAPCODE(ksi);
313 		(void)memset(&kbuf.si, 0, sizeof(kbuf.si));
314 		kbuf.si._info = ksi->ksi_info;
315 		kth.ktr_len = sizeof(kbuf);
316 	} else {
317 		kbuf.kp.code = 0;
318 		kth.ktr_len = sizeof(struct ktr_psig);
319 	}
320 	error = ktrwrite(p, &kth);
321 	p->p_traceflag &= ~KTRFAC_ACTIVE;
322 	return error;
323 }
324 
325 int
326 ktrcsw(struct proc *p, int out, int user)
327 {
328 	struct ktr_header kth;
329 	struct ktr_csw kc;
330 	int error;
331 
332 	p->p_traceflag |= KTRFAC_ACTIVE;
333 	ktrinitheader(&kth, p, KTR_CSW);
334 	kc.out = out;
335 	kc.user = user;
336 	kth.ktr_buf = (caddr_t)&kc;
337 	kth.ktr_len = sizeof(struct ktr_csw);
338 
339 	error = ktrwrite(p, &kth);
340 	p->p_traceflag &= ~KTRFAC_ACTIVE;
341 	return error;
342 }
343 
344 int
345 ktruser(struct proc *p, const char *id, void *addr, size_t len, int ustr)
346 {
347 	struct ktr_header kth;
348 	struct ktr_user *ktp;
349 	caddr_t user_dta;
350 	int error;
351 
352 	p->p_traceflag |= KTRFAC_ACTIVE;
353 	ktrinitheader(&kth, p, KTR_USER);
354 	ktp = malloc(sizeof(struct ktr_user) + len, M_TEMP, M_WAITOK);
355 	if (ustr) {
356 		if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0)
357 			ktp->ktr_id[0] = '\0';
358 	} else
359 		strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN);
360 	ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0';
361 
362 	user_dta = (caddr_t) ((char *)ktp + sizeof(struct ktr_user));
363 	if (copyin(addr, (void *) user_dta, len) != 0)
364 		len = 0;
365 
366 	kth.ktr_buf = (void *)ktp;
367 	kth.ktr_len = sizeof(struct ktr_user) + len;
368 	error = ktrwrite(p, &kth);
369 
370 	free(ktp, M_TEMP);
371 	p->p_traceflag &= ~KTRFAC_ACTIVE;
372 	return error;
373 
374 }
375 
376 int
377 ktrmmsg(struct proc *p, const void *msgh, size_t size)
378 {
379 	struct ktr_header kth;
380 	struct ktr_mmsg	*kp;
381 	int error;
382 
383 	p->p_traceflag |= KTRFAC_ACTIVE;
384 	ktrinitheader(&kth, p, KTR_MMSG);
385 
386 	kp = (struct ktr_mmsg *)msgh;
387 	kth.ktr_buf = (caddr_t)kp;
388 	kth.ktr_len = size;
389 	error = ktrwrite(p, &kth);
390 	p->p_traceflag &= ~KTRFAC_ACTIVE;
391 	return error;
392 }
393 
394 int
395 ktrmool(struct proc *p, const void *kaddr, size_t size, const void *uaddr)
396 {
397 	struct ktr_header kth;
398 	struct ktr_mool *kp;
399 	struct ktr_mool *buf;
400 	int error;
401 
402 	p->p_traceflag |= KTRFAC_ACTIVE;
403 	ktrinitheader(&kth, p, KTR_MOOL);
404 
405 	kp = malloc(size + sizeof(*kp), M_TEMP, M_WAITOK);
406 	kp->uaddr = uaddr;
407 	kp->size = size;
408 	buf = kp + 1; /* Skip uaddr and size */
409 	(void)memcpy(buf, kaddr, size);
410 
411 	kth.ktr_buf = (caddr_t)kp;
412 	kth.ktr_len = size + sizeof(*kp);
413 	error = ktrwrite(p, &kth);
414 	free(kp, M_TEMP);
415 
416 	p->p_traceflag &= ~KTRFAC_ACTIVE;
417 	return error;
418 }
419 
420 
421 /* Interface and common routines */
422 
423 int
424 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp)
425 {
426 	int ret = 0;
427 	int error = 0;
428 	int one = 1;
429 	int descend;
430 	struct proc *p;
431 	struct pgrp *pg;
432 
433 	curp->p_traceflag |= KTRFAC_ACTIVE;
434 	descend = ops & KTRFLAG_DESCEND;
435 	facs = facs & ~((unsigned) KTRFAC_ROOT);
436 
437 	/*
438 	 * Clear all uses of the tracefile
439 	 */
440 	if (KTROP(ops) == KTROP_CLEARFILE) {
441 		proclist_lock_read();
442 		LIST_FOREACH(p, &allproc, p_list) {
443 			if (ktrsamefile(p->p_tracep, fp)) {
444 				if (ktrcanset(curp, p))
445 					ktrderef(p);
446 				else
447 					error = EPERM;
448 			}
449 		}
450 		proclist_unlock_read();
451 		goto done;
452 	}
453 
454 	/*
455 	 * Mark fp non-blocking, to avoid problems from possible deadlocks.
456 	 */
457 
458 	if (fp != NULL) {
459 		fp->f_flag |= FNONBLOCK;
460 		(*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&one, curp);
461 	}
462 
463 	/*
464 	 * need something to (un)trace (XXX - why is this here?)
465 	 */
466 	if (!facs) {
467 		error = EINVAL;
468 		goto done;
469 	}
470 	/*
471 	 * do it
472 	 */
473 	if (pid < 0) {
474 		/*
475 		 * by process group
476 		 */
477 		pg = pg_find(-pid, PFIND_UNLOCK_FAIL);
478 		if (pg == NULL) {
479 			error = ESRCH;
480 			goto done;
481 		}
482 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
483 			if (descend)
484 				ret |= ktrsetchildren(curp, p, ops, facs, fp);
485 			else
486 				ret |= ktrops(curp, p, ops, facs, fp);
487 		}
488 
489 	} else {
490 		/*
491 		 * by pid
492 		 */
493 		p = p_find(pid, PFIND_UNLOCK_FAIL);
494 		if (p == NULL) {
495 			error = ESRCH;
496 			goto done;
497 		}
498 		if (descend)
499 			ret |= ktrsetchildren(curp, p, ops, facs, fp);
500 		else
501 			ret |= ktrops(curp, p, ops, facs, fp);
502 	}
503 	proclist_unlock_read();	/* taken by p{g}_find */
504 	if (!ret)
505 		error = EPERM;
506 done:
507 	curp->p_traceflag &= ~KTRFAC_ACTIVE;
508 	return (error);
509 }
510 
511 /*
512  * ktrace system call
513  */
514 /* ARGSUSED */
515 int
516 sys_fktrace(struct lwp *l, void *v, register_t *retval)
517 {
518 	struct sys_fktrace_args /* {
519 		syscallarg(int) fd;
520 		syscallarg(int) ops;
521 		syscallarg(int) facs;
522 		syscallarg(int) pid;
523 	} */ *uap = v;
524 	struct proc *curp = l->l_proc;
525 	struct file *fp = NULL;
526 	struct filedesc *fdp = curp->p_fd;
527 	int error;
528 
529 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
530 		return (EBADF);
531 
532 	FILE_USE(fp);
533 
534 	if ((fp->f_flag & FWRITE) == 0)
535 		error = EBADF;
536 	else
537 		error = ktrace_common(curp, SCARG(uap, ops),
538 		    SCARG(uap, facs), SCARG(uap, pid), fp);
539 
540 	FILE_UNUSE(fp, curp);
541 
542 	return error;
543 }
544 
545 /*
546  * ktrace system call
547  */
548 /* ARGSUSED */
549 int
550 sys_ktrace(struct lwp *l, void *v, register_t *retval)
551 {
552 	struct sys_ktrace_args /* {
553 		syscallarg(const char *) fname;
554 		syscallarg(int) ops;
555 		syscallarg(int) facs;
556 		syscallarg(int) pid;
557 	} */ *uap = v;
558 	struct proc *curp = l->l_proc;
559 	struct vnode *vp = NULL;
560 	struct file *fp = NULL;
561 	int fd;
562 	int ops = SCARG(uap, ops);
563 	int error = 0;
564 	struct nameidata nd;
565 
566 	ops = KTROP(ops) | (ops & KTRFLAG_DESCEND);
567 
568 	curp->p_traceflag |= KTRFAC_ACTIVE;
569 	if ((ops & KTROP_CLEAR) == 0) {
570 		/*
571 		 * an operation which requires a file argument.
572 		 */
573 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
574 		    curp);
575 		if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
576 			curp->p_traceflag &= ~KTRFAC_ACTIVE;
577 			return (error);
578 		}
579 		vp = nd.ni_vp;
580 		VOP_UNLOCK(vp, 0);
581 		if (vp->v_type != VREG) {
582 			(void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
583 			curp->p_traceflag &= ~KTRFAC_ACTIVE;
584 			return (EACCES);
585 		}
586 		/*
587 		 * XXX This uses up a file descriptor slot in the
588 		 * tracing process for the duration of this syscall.
589 		 * This is not expected to be a problem.  If
590 		 * falloc(NULL, ...) DTRT we could skip that part, but
591 		 * that would require changing its interface to allow
592 		 * the caller to pass in a ucred..
593 		 *
594 		 * This will FILE_USE the fp it returns, if any.
595 		 * Keep it in use until we return.
596 		 */
597 		if ((error = falloc(curp, &fp, &fd)) != 0)
598 			goto done;
599 
600 		fp->f_flag = FWRITE|FAPPEND;
601 		fp->f_type = DTYPE_VNODE;
602 		fp->f_ops = &vnops;
603 		fp->f_data = (caddr_t)vp;
604 		FILE_SET_MATURE(fp);
605 		vp = NULL;
606 	}
607 	error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs),
608 	    SCARG(uap, pid), fp);
609 done:
610 	if (vp != NULL)
611 		(void) vn_close(vp, FWRITE, curp->p_ucred, curp);
612 	if (fp != NULL) {
613 		FILE_UNUSE(fp, curp);	/* release file */
614 		fdrelease(curp, fd); 	/* release fd table slot */
615 	}
616 	return (error);
617 }
618 
619 int
620 ktrops(struct proc *curp, struct proc *p, int ops, int facs,
621     struct file *fp)
622 {
623 
624 	if (!ktrcanset(curp, p))
625 		return (0);
626 	if (KTROP(ops) == KTROP_SET) {
627 		if (p->p_tracep != fp) {
628 			/*
629 			 * if trace file already in use, relinquish
630 			 */
631 			ktrderef(p);
632 			p->p_tracep = fp;
633 			ktradref(p);
634 		}
635 		p->p_traceflag |= facs;
636 		if (curp->p_ucred->cr_uid == 0)
637 			p->p_traceflag |= KTRFAC_ROOT;
638 	} else {
639 		/* KTROP_CLEAR */
640 		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
641 			/* no more tracing */
642 			ktrderef(p);
643 		}
644 	}
645 
646 	/*
647 	 * Emit an emulation record, every time there is a ktrace
648 	 * change/attach request.
649 	 */
650 	if (KTRPOINT(p, KTR_EMUL))
651 		p->p_traceflag |= KTRFAC_TRC_EMUL;
652 #ifdef __HAVE_SYSCALL_INTERN
653 	(*p->p_emul->e_syscall_intern)(p);
654 #endif
655 
656 	return (1);
657 }
658 
659 int
660 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs,
661     struct file *fp)
662 {
663 	struct proc *p;
664 	int ret = 0;
665 
666 	p = top;
667 	for (;;) {
668 		ret |= ktrops(curp, p, ops, facs, fp);
669 		/*
670 		 * If this process has children, descend to them next,
671 		 * otherwise do any siblings, and if done with this level,
672 		 * follow back up the tree (but not past top).
673 		 */
674 		if (LIST_FIRST(&p->p_children) != NULL) {
675 			p = LIST_FIRST(&p->p_children);
676 			continue;
677 		}
678 		for (;;) {
679 			if (p == top)
680 				return (ret);
681 			if (LIST_NEXT(p, p_sibling) != NULL) {
682 				p = LIST_NEXT(p, p_sibling);
683 				break;
684 			}
685 			p = p->p_pptr;
686 		}
687 	}
688 	/*NOTREACHED*/
689 }
690 
691 int
692 ktrwrite(struct proc *p, struct ktr_header *kth)
693 {
694 	struct uio auio;
695 	struct iovec aiov[2];
696 	int error, tries;
697 	struct file *fp = p->p_tracep;
698 
699 	if (fp == NULL)
700 		return 0;
701 
702 	if (p->p_traceflag & KTRFAC_TRC_EMUL) {
703 		/* Add emulation trace before first entry for this process */
704 		p->p_traceflag &= ~KTRFAC_TRC_EMUL;
705 		if ((error = ktremul(p)) != 0)
706 			return error;
707 	}
708 
709 	auio.uio_iov = &aiov[0];
710 	auio.uio_offset = 0;
711 	auio.uio_segflg = UIO_SYSSPACE;
712 	auio.uio_rw = UIO_WRITE;
713 	aiov[0].iov_base = (caddr_t)kth;
714 	aiov[0].iov_len = sizeof(struct ktr_header);
715 	auio.uio_resid = sizeof(struct ktr_header);
716 	auio.uio_iovcnt = 1;
717 	auio.uio_procp = NULL;
718 	if (kth->ktr_len > 0) {
719 		auio.uio_iovcnt++;
720 		aiov[1].iov_base = (void *)kth->ktr_buf;
721 		aiov[1].iov_len = kth->ktr_len;
722 		auio.uio_resid += kth->ktr_len;
723 	}
724 
725 	simple_lock(&fp->f_slock);
726 	FILE_USE(fp);
727 
728 	tries = 0;
729 	do {
730 		error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio,
731 		    fp->f_cred, FOF_UPDATE_OFFSET);
732 		tries++;
733 		if (error == EWOULDBLOCK)
734 			preempt(1);
735 	} while ((error == EWOULDBLOCK) && (tries < 3));
736 	FILE_UNUSE(fp, NULL);
737 
738 	if (__predict_true(error == 0))
739 		return (0);
740 	/*
741 	 * If error encountered, give up tracing on this vnode.  Don't report
742 	 * EPIPE as this can easily happen with fktrace()/ktruss.
743 	 */
744 	if (error != EPIPE)
745 		log(LOG_NOTICE,
746 		    "ktrace write failed, errno %d, tracing stopped\n",
747 		    error);
748 	proclist_lock_read();
749 	LIST_FOREACH(p, &allproc, p_list) {
750 		if (ktrsamefile(p->p_tracep, fp))
751 			ktrderef(p);
752 	}
753 	proclist_unlock_read();
754 
755 	return (error);
756 }
757 
758 /*
759  * Return true if caller has permission to set the ktracing state
760  * of target.  Essentially, the target can't possess any
761  * more permissions than the caller.  KTRFAC_ROOT signifies that
762  * root previously set the tracing status on the target process, and
763  * so, only root may further change it.
764  *
765  * TODO: check groups.  use caller effective gid.
766  */
767 int
768 ktrcanset(struct proc *callp, struct proc *targetp)
769 {
770 	struct pcred *caller = callp->p_cred;
771 	struct pcred *target = targetp->p_cred;
772 
773 	if ((caller->pc_ucred->cr_uid == target->p_ruid &&
774 	    target->p_ruid == target->p_svuid &&
775 	    caller->p_rgid == target->p_rgid &&	/* XXX */
776 	    target->p_rgid == target->p_svgid &&
777 	    (targetp->p_traceflag & KTRFAC_ROOT) == 0 &&
778 	    (targetp->p_flag & P_SUGID) == 0) ||
779 	    caller->pc_ucred->cr_uid == 0)
780 		return (1);
781 
782 	return (0);
783 }
784 #endif /* KTRACE */
785 
786 /*
787  * Put user defined entry to ktrace records.
788  */
789 int
790 sys_utrace(struct lwp *l, void *v, register_t *retval)
791 {
792 #ifdef KTRACE
793 	struct sys_utrace_args /* {
794 		syscallarg(const char *) label;
795 		syscallarg(void *) addr;
796 		syscallarg(size_t) len;
797 	} */ *uap = v;
798 	struct proc *p = l->l_proc;
799 
800 	if (!KTRPOINT(p, KTR_USER))
801 		return (0);
802 
803 	if (SCARG(uap, len) > KTR_USER_MAXLEN)
804 		return (EINVAL);
805 
806 	ktruser(p, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1);
807 
808 	return (0);
809 #else /* !KTRACE */
810 	return ENOSYS;
811 #endif /* KTRACE */
812 }
813