xref: /openbsd-src/sys/kern/sys_process.c (revision deef986e6c665c0bdd5fa41cac39567525a53523)
1 /*	$OpenBSD: sys_process.c,v 1.103 2024/11/27 05:25:57 anton Exp $	*/
2 /*	$NetBSD: sys_process.c,v 1.55 1996/05/15 06:17:47 tls Exp $	*/
3 
4 /*-
5  * Copyright (c) 1994 Christopher G. Demetriou.  All rights reserved.
6  * Copyright (c) 1982, 1986, 1989, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * (c) UNIX System Laboratories, Inc.
9  * All or some portions of this file are derived from material licensed
10  * to the University of California by American Telephone and Telegraph
11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12  * the permission of UNIX System Laboratories, Inc.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	from: @(#)sys_process.c	8.1 (Berkeley) 6/10/93
39  */
40 
41 /*
42  * References:
43  *	(1) Bach's "The Design of the UNIX Operating System",
44  *	(2) sys/miscfs/procfs from UCB's 4.4BSD-Lite distribution,
45  *	(3) the "4.4BSD Programmer's Reference Manual" published
46  *		by USENIX and O'Reilly & Associates.
47  * The 4.4BSD PRM does a reasonably good job of documenting what the various
48  * ptrace() requests should actually do, and its text is quoted several times
49  * in this file.
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/exec.h>
55 #include <sys/proc.h>
56 #include <sys/signalvar.h>
57 #include <sys/errno.h>
58 #include <sys/malloc.h>
59 #include <sys/ptrace.h>
60 #include <sys/uio.h>
61 #include <sys/sched.h>
62 #include <sys/exec_elf.h>
63 
64 #include <sys/mount.h>
65 #include <sys/syscallargs.h>
66 
67 #include <uvm/uvm_extern.h>
68 
69 #include <machine/fpu.h>
70 #include <machine/reg.h>
71 
72 #ifdef PTRACE
73 
74 static inline int	process_checktracestate(struct process *_curpr,
75 			    struct process *_tr, struct proc *_t);
76 static inline struct process *process_tprfind(pid_t _tpid, struct proc **_tp);
77 
78 int	ptrace_ctrl(struct proc *, int, pid_t, caddr_t, int);
79 int	ptrace_ustate(struct proc *, int, pid_t, void *, int, register_t *);
80 int	ptrace_kstate(struct proc *, int, pid_t, void *);
81 
82 int	global_ptrace;	/* permit tracing of not children */
83 
84 
85 /*
86  * Process debugging system call.
87  */
88 int
89 sys_ptrace(struct proc *p, void *v, register_t *retval)
90 {
91 	struct sys_ptrace_args /* {
92 		syscallarg(int) req;
93 		syscallarg(pid_t) pid;
94 		syscallarg(caddr_t) addr;
95 		syscallarg(int) data;
96 	} */ *uap = v;
97 	int req = SCARG(uap, req);
98 	pid_t pid = SCARG(uap, pid);
99 	caddr_t uaddr = SCARG(uap, addr);	/* userspace */
100 	void *kaddr = NULL;			/* kernelspace */
101 	int data = SCARG(uap, data);
102 	union {
103 		struct ptrace_thread_state u_pts;
104 		struct ptrace_io_desc u_piod;
105 		struct ptrace_event u_pe;
106 		struct ptrace_state u_ps;
107 		register_t u_wcookie;
108 		register_t u_pacmask[2];
109 	} u;
110 	int size = 0;
111 	enum { NONE, IN, IN_ALLOC, OUT, OUT_ALLOC, IN_OUT } mode;
112 	int kstate = 0;
113 	int error;
114 
115 	*retval = 0;
116 
117 	/* Figure out what sort of copyin/out operations we'll do */
118 	switch (req) {
119 	case PT_TRACE_ME:
120 	case PT_CONTINUE:
121 	case PT_KILL:
122 	case PT_ATTACH:
123 	case PT_DETACH:
124 #ifdef PT_STEP
125 	case PT_STEP:
126 #endif
127 		/* control operations do no copyin/out; dispatch directly */
128 		return ptrace_ctrl(p, req, pid, uaddr, data);
129 
130 	case PT_READ_I:
131 	case PT_READ_D:
132 	case PT_WRITE_I:
133 	case PT_WRITE_D:
134 		mode = NONE;
135 		break;
136 	case PT_IO:
137 		mode = IN_OUT;
138 		size = sizeof u.u_piod;
139 		data = size;	/* suppress the data == size check */
140 		break;
141 	case PT_GET_THREAD_FIRST:
142 		mode = OUT;
143 		size = sizeof u.u_pts;
144 		kstate = 1;
145 		break;
146 	case PT_GET_THREAD_NEXT:
147 		mode = IN_OUT;
148 		size = sizeof u.u_pts;
149 		kstate = 1;
150 		break;
151 	case PT_GET_EVENT_MASK:
152 		mode = OUT;
153 		size = sizeof u.u_pe;
154 		kstate = 1;
155 		break;
156 	case PT_SET_EVENT_MASK:
157 		mode = IN;
158 		size = sizeof u.u_pe;
159 		kstate = 1;
160 		break;
161 	case PT_GET_PROCESS_STATE:
162 		mode = OUT;
163 		size = sizeof u.u_ps;
164 		kstate = 1;
165 		break;
166 	case PT_GETREGS:
167 		mode = OUT_ALLOC;
168 		size = sizeof(struct reg);
169 		break;
170 	case PT_SETREGS:
171 		mode = IN_ALLOC;
172 		size = sizeof(struct reg);
173 		break;
174 #ifdef PT_GETFPREGS
175 	case PT_GETFPREGS:
176 		mode = OUT_ALLOC;
177 		size = sizeof(struct fpreg);
178 		break;
179 #endif
180 #ifdef PT_SETFPREGS
181 	case PT_SETFPREGS:
182 		mode = IN_ALLOC;
183 		size = sizeof(struct fpreg);
184 		break;
185 #endif
186 #ifdef PT_GETXMMREGS
187 	case PT_GETXMMREGS:
188 		mode = OUT_ALLOC;
189 		size = sizeof(struct xmmregs);
190 		break;
191 #endif
192 #ifdef PT_SETXMMREGS
193 	case PT_SETXMMREGS:
194 		mode = IN_ALLOC;
195 		size = sizeof(struct xmmregs);
196 		break;
197 #endif
198 #ifdef PT_WCOOKIE
199 	case PT_WCOOKIE:
200 		mode = OUT;
201 		size = sizeof u.u_wcookie;
202 		data = size;	/* suppress the data == size check */
203 		break;
204 #endif
205 #ifdef PT_PACMASK
206 	case PT_PACMASK:
207 		mode = OUT;
208 		size = sizeof u.u_pacmask;
209 		break;
210 #endif
211 #ifdef PT_GETXSTATE_INFO
212 	case PT_GETXSTATE_INFO:
213 		mode = OUT_ALLOC;
214 		size = sizeof(struct ptrace_xstate_info);
215 		break;
216 #endif
217 #ifdef PT_GETXSTATE
218 	case PT_GETXSTATE:
219 		mode = OUT_ALLOC;
220 		size = fpu_save_len;
221 		break;
222 #endif
223 #ifdef PT_SETXSTATE
224 	case PT_SETXSTATE:
225 		mode = IN_ALLOC;
226 		size = fpu_save_len;
227 		break;
228 #endif
229 	default:
230 		return EINVAL;
231 	}
232 
233 
234 	/* Now do any copyin()s and allocations in a consistent manner */
235 	switch (mode) {
236 	case NONE:
237 		kaddr = uaddr;
238 		break;
239 	case IN:
240 	case IN_OUT:
241 	case OUT:
242 		KASSERT(size <= sizeof u);
243 		if (data != size)
244 			return EINVAL;
245 		if (mode == OUT)
246 			memset(&u, 0, size);
247 		else { /* IN or IN_OUT */
248 			if ((error = copyin(uaddr, &u, size)))
249 				return error;
250 		}
251 		kaddr = &u;
252 		break;
253 	case IN_ALLOC:
254 		kaddr = malloc(size, M_TEMP, M_WAITOK);
255 		if ((error = copyin(uaddr, kaddr, size))) {
256 			free(kaddr, M_TEMP, size);
257 			return error;
258 		}
259 		break;
260 	case OUT_ALLOC:
261 		kaddr = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
262 		break;
263 	}
264 
265 	if (kstate)
266 		error = ptrace_kstate(p, req, pid, kaddr);
267 	else
268 		error = ptrace_ustate(p, req, pid, kaddr, data, retval);
269 
270 	/* Do any copyout()s and frees */
271 	if (error == 0) {
272 		switch (mode) {
273 		case NONE:
274 		case IN:
275 		case IN_ALLOC:
276 			break;
277 		case IN_OUT:
278 		case OUT:
279 			error = copyout(&u, uaddr, size);
280 			if (req == PT_IO) {
281 				/* historically, errors here are ignored */
282 				error = 0;
283 			}
284 			break;
285 		case OUT_ALLOC:
286 			error = copyout(kaddr, uaddr, size);
287 			break;
288 		}
289 	}
290 
291 	if (mode == IN_ALLOC || mode == OUT_ALLOC)
292 		free(kaddr, M_TEMP, size);
293 	return error;
294 }
295 
296 /*
297  * ptrace control requests: attach, detach, continue, kill, single-step, etc
298  */
299 int
300 ptrace_ctrl(struct proc *p, int req, pid_t pid, caddr_t addr, int data)
301 {
302 	struct proc *t;				/* target thread */
303 	struct process *tr;			/* target process */
304 	int error = 0;
305 
306 	switch (req) {
307 	case PT_TRACE_ME:
308 		/* Just set the trace flag. */
309 		tr = p->p_p;
310 		mtx_enter(&tr->ps_mtx);
311 		if (ISSET(tr->ps_flags, PS_TRACED)) {
312 			mtx_leave(&tr->ps_mtx);
313 			return EBUSY;
314 		}
315 		atomic_setbits_int(&tr->ps_flags, PS_TRACED);
316 		tr->ps_opptr = tr->ps_pptr;
317 		mtx_leave(&tr->ps_mtx);
318 		if (tr->ps_ptstat == NULL)
319 			tr->ps_ptstat = malloc(sizeof(*tr->ps_ptstat),
320 			    M_SUBPROC, M_WAITOK);
321 		memset(tr->ps_ptstat, 0, sizeof(*tr->ps_ptstat));
322 		return 0;
323 
324 	/* calls that only operate on the PID */
325 	case PT_KILL:
326 	case PT_ATTACH:
327 	case PT_DETACH:
328 		/* Find the process we're supposed to be operating on. */
329 		if ((tr = prfind(pid)) == NULL) {
330 			error = ESRCH;
331 			goto fail;
332 		}
333 		t = TAILQ_FIRST(&tr->ps_threads);
334 		break;
335 
336 	/* calls that accept a PID or a thread ID */
337 	case PT_CONTINUE:
338 #ifdef PT_STEP
339 	case PT_STEP:
340 #endif
341 		if ((tr = process_tprfind(pid, &t)) == NULL) {
342 			error = ESRCH;
343 			goto fail;
344 		}
345 		break;
346 	}
347 
348 	/* Check permissions/state */
349 	if (req != PT_ATTACH) {
350 		/* Check that the data is a valid signal number or zero. */
351 		if (req != PT_KILL && (data < 0 || data >= NSIG)) {
352 			error = EINVAL;
353 			goto fail;
354 		}
355 
356 		/* Most operations require the target to already be traced */
357 		if ((error = process_checktracestate(p->p_p, tr, t)))
358 			goto fail;
359 
360 		/* Do single-step fixup if needed. */
361 		FIX_SSTEP(t);
362 	} else {
363 		/*
364 		 * PT_ATTACH is the opposite; you can't attach to a process if:
365 		 *	(1) it's the process that's doing the attaching,
366 		 */
367 		if (tr == p->p_p) {
368 			error = EINVAL;
369 			goto fail;
370 		}
371 
372 		/*
373 		 *	(2) it's a system process
374 		 */
375 		if (ISSET(tr->ps_flags, PS_SYSTEM)) {
376 			error = EPERM;
377 			goto fail;
378 		}
379 
380 		/*
381 		 *	(3) it's already being traced, or
382 		 */
383 		if (ISSET(tr->ps_flags, PS_TRACED)) {
384 			error = EBUSY;
385 			goto fail;
386 		}
387 
388 		/*
389 		 *	(4) it's in the middle of execve(2)
390 		 */
391 		if (ISSET(tr->ps_flags, PS_INEXEC)) {
392 			error = EAGAIN;
393 			goto fail;
394 		}
395 
396 		/*
397 		 *	(5) it's not owned by you, or the last exec
398 		 *	    gave us setuid/setgid privs (unless
399 		 *	    you're root), or...
400 		 *
401 		 *      [Note: once PS_SUGID or PS_SUGIDEXEC gets set in
402 		 *	execve(), they stay set until the process does
403 		 *	another execve().  Hence this prevents a setuid
404 		 *	process which revokes its special privileges using
405 		 *	setuid() from being traced.  This is good security.]
406 		 */
407 		if ((tr->ps_ucred->cr_ruid != p->p_ucred->cr_ruid ||
408 		    ISSET(tr->ps_flags, PS_SUGIDEXEC | PS_SUGID)) &&
409 		    (error = suser(p)) != 0)
410 			goto fail;
411 
412 		/*
413 		 * 	(5.5) it's not a child of the tracing process.
414 		 */
415 		if (global_ptrace == 0 && !inferior(tr, p->p_p) &&
416 		    (error = suser(p)) != 0)
417 			goto fail;
418 
419 		/*
420 		 *	(6) ...it's init, which controls the security level
421 		 *	    of the entire system, and the system was not
422 		 *          compiled with permanently insecure mode turned
423 		 *	    on.
424 		 */
425 		if ((tr->ps_pid == 1) && (securelevel > -1)) {
426 			error = EPERM;
427 			goto fail;
428 		}
429 
430 		/*
431 		 *	(7) it's an ancestor of the current process and
432 		 *	    not init (because that would create a loop in
433 		 *	    the process graph).
434 		 */
435 		if (tr->ps_pid != 1 && inferior(p->p_p, tr)) {
436 			error = EINVAL;
437 			goto fail;
438 		}
439 	}
440 
441 	switch (req) {
442 
443 #ifdef PT_STEP
444 	case PT_STEP:
445 		/*
446 		 * From the 4.4BSD PRM:
447 		 * "Execution continues as in request PT_CONTINUE; however
448 		 * as soon as possible after execution of at least one
449 		 * instruction, execution stops again. [ ... ]"
450 		 */
451 #endif
452 	case PT_CONTINUE:
453 		/*
454 		 * From the 4.4BSD PRM:
455 		 * "The data argument is taken as a signal number and the
456 		 * child's execution continues at location addr as if it
457 		 * incurred that signal.  Normally the signal number will
458 		 * be either 0 to indicate that the signal that caused the
459 		 * stop should be ignored, or that value fetched out of
460 		 * the process's image indicating which signal caused
461 		 * the stop.  If addr is (int *)1 then execution continues
462 		 * from where it stopped."
463 		 */
464 
465 		if (pid < THREAD_PID_OFFSET && tr->ps_single)
466 			t = tr->ps_single;
467 		else if (t == tr->ps_single)
468 			atomic_setbits_int(&t->p_flag, P_TRACESINGLE);
469 		else {
470 			error = EINVAL;
471 			goto fail;
472 		}
473 
474 
475 		/* If the address parameter is not (int *)1, set the pc. */
476 		if ((int *)addr != (int *)1)
477 			if ((error = process_set_pc(t, addr)) != 0)
478 				goto fail;
479 
480 #ifdef PT_STEP
481 		/*
482 		 * Arrange for a single-step, if that's requested and possible.
483 		 */
484 		error = process_sstep(t, req == PT_STEP);
485 		if (error)
486 			goto fail;
487 #endif
488 		goto sendsig;
489 
490 	case PT_DETACH:
491 		/*
492 		 * From the 4.4BSD PRM:
493 		 * "The data argument is taken as a signal number and the
494 		 * child's execution continues at location addr as if it
495 		 * incurred that signal.  Normally the signal number will
496 		 * be either 0 to indicate that the signal that caused the
497 		 * stop should be ignored, or that value fetched out of
498 		 * the process's image indicating which signal caused
499 		 * the stop.  If addr is (int *)1 then execution continues
500 		 * from where it stopped."
501 		 */
502 
503 		if (pid < THREAD_PID_OFFSET && tr->ps_single)
504 			t = tr->ps_single;
505 
506 #ifdef PT_STEP
507 		/*
508 		 * Stop single stepping.
509 		 */
510 		error = process_sstep(t, 0);
511 		if (error)
512 			goto fail;
513 #endif
514 
515 		mtx_enter(&tr->ps_mtx);
516 		process_untrace(tr);
517 		atomic_clearbits_int(&tr->ps_flags, PS_WAITED);
518 		mtx_leave(&tr->ps_mtx);
519 
520 	sendsig:
521 		memset(tr->ps_ptstat, 0, sizeof(*tr->ps_ptstat));
522 
523 		/* Finally, deliver the requested signal (or none). */
524 		if (t->p_stat == SSTOP) {
525 			tr->ps_xsig = data;
526 			SCHED_LOCK();
527 			unsleep(t);
528 			setrunnable(t);
529 			SCHED_UNLOCK();
530 		} else {
531 			if (data != 0)
532 				psignal(t, data);
533 		}
534 		break;
535 
536 	case PT_KILL:
537 		if (pid < THREAD_PID_OFFSET && tr->ps_single)
538 			t = tr->ps_single;
539 
540 		/* just send the process a KILL signal. */
541 		data = SIGKILL;
542 		goto sendsig;	/* in PT_CONTINUE, above. */
543 
544 	case PT_ATTACH:
545 		/*
546 		 * As was done in procfs:
547 		 * Go ahead and set the trace flag.
548 		 * Save the old parent (it's reset in
549 		 *   _DETACH, and also in kern_exit.c:wait4()
550 		 * Reparent the process so that the tracing
551 		 *   proc gets to see all the action.
552 		 * Stop the target.
553 		 */
554 		mtx_enter(&tr->ps_mtx);
555 		atomic_setbits_int(&tr->ps_flags, PS_TRACED);
556 		tr->ps_opptr = tr->ps_pptr;
557 		process_reparent(tr, p->p_p);
558 		mtx_leave(&tr->ps_mtx);
559 		if (tr->ps_ptstat == NULL)
560 			tr->ps_ptstat = malloc(sizeof(*tr->ps_ptstat),
561 			    M_SUBPROC, M_WAITOK);
562 		data = SIGSTOP;
563 		goto sendsig;
564 	default:
565 		KASSERTMSG(0, "%s: unhandled request %d", __func__, req);
566 		break;
567 	}
568 
569 fail:
570 	return error;
571 }
572 
573 /*
574  * ptrace kernel-state requests: thread list, event mask, process state
575  */
576 int
577 ptrace_kstate(struct proc *p, int req, pid_t pid, void *addr)
578 {
579 	struct process *tr;			/* target process */
580 	struct ptrace_event *pe = addr;
581 	int error;
582 
583 	KASSERT((p->p_flag & P_SYSTEM) == 0);
584 
585 	/* Find the process we're supposed to be operating on. */
586 	if ((tr = prfind(pid)) == NULL)
587 		return ESRCH;
588 
589 	if ((error = process_checktracestate(p->p_p, tr, NULL)))
590 		return error;
591 
592 	switch (req) {
593 	case PT_GET_THREAD_FIRST:
594 	case PT_GET_THREAD_NEXT:
595 	      {
596 		struct ptrace_thread_state *pts = addr;
597 		struct proc *t;
598 
599 		if (req == PT_GET_THREAD_NEXT) {
600 			t = tfind_user(pts->pts_tid, tr);
601 			if (t == NULL || ISSET(t->p_flag, P_WEXIT))
602 				return ESRCH;
603 			t = TAILQ_NEXT(t, p_thr_link);
604 		} else {
605 			t = TAILQ_FIRST(&tr->ps_threads);
606 		}
607 
608 		if (t == NULL)
609 			pts->pts_tid = -1;
610 		else
611 			pts->pts_tid = t->p_tid + THREAD_PID_OFFSET;
612 		return 0;
613 	      }
614 	}
615 
616 	switch (req) {
617 	case PT_GET_EVENT_MASK:
618 		pe->pe_set_event = tr->ps_ptmask;
619 		break;
620 	case PT_SET_EVENT_MASK:
621 		tr->ps_ptmask = pe->pe_set_event;
622 		break;
623 	case PT_GET_PROCESS_STATE:
624 		if (tr->ps_single)
625 			tr->ps_ptstat->pe_tid =
626 			    tr->ps_single->p_tid + THREAD_PID_OFFSET;
627 		memcpy(addr, tr->ps_ptstat, sizeof *tr->ps_ptstat);
628 		break;
629 	default:
630 		KASSERTMSG(0, "%s: unhandled request %d", __func__, req);
631 		break;
632 	}
633 
634 	return 0;
635 }
636 
637 /*
638  * ptrace user-state requests: memory access, registers, stack cookie
639  */
640 int
641 ptrace_ustate(struct proc *p, int req, pid_t pid, void *addr, int data,
642     register_t *retval)
643 {
644 	struct proc *t;				/* target thread */
645 	struct process *tr;			/* target process */
646 	struct uio uio;
647 	struct iovec iov;
648 	int error, write;
649 	int temp = 0;
650 
651 	KASSERT((p->p_flag & P_SYSTEM) == 0);
652 
653 	/* Accept either PID or TID */
654 	if ((tr = process_tprfind(pid, &t)) == NULL)
655 		return ESRCH;
656 
657 	if ((error = process_checktracestate(p->p_p, tr, t)))
658 		return error;
659 
660 	FIX_SSTEP(t);
661 
662 	/* Now do the operation. */
663 	write = 0;
664 
665 	if ((error = process_checkioperm(p, tr)) != 0)
666 		return error;
667 
668 	switch (req) {
669 	case PT_WRITE_I:		/* XXX no separate I and D spaces */
670 	case PT_WRITE_D:
671 		write = 1;
672 		temp = data;
673 	case PT_READ_I:		/* XXX no separate I and D spaces */
674 	case PT_READ_D:
675 		/* write = 0 done above. */
676 		iov.iov_base = (caddr_t)&temp;
677 		iov.iov_len = sizeof(int);
678 		uio.uio_iov = &iov;
679 		uio.uio_iovcnt = 1;
680 		uio.uio_offset = (off_t)(vaddr_t)addr;
681 		uio.uio_resid = sizeof(int);
682 		uio.uio_segflg = UIO_SYSSPACE;
683 		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
684 		uio.uio_procp = p;
685 		error = process_domem(p, tr, &uio, write ? PT_WRITE_I :
686 				PT_READ_I);
687 		if (write == 0)
688 			*retval = temp;
689 		return error;
690 
691 	case PT_IO:
692 	      {
693 		struct ptrace_io_desc *piod = addr;
694 
695 		iov.iov_base = piod->piod_addr;
696 		iov.iov_len = piod->piod_len;
697 		uio.uio_iov = &iov;
698 		uio.uio_iovcnt = 1;
699 		uio.uio_offset = (off_t)(vaddr_t)piod->piod_offs;
700 		uio.uio_resid = piod->piod_len;
701 		uio.uio_segflg = UIO_USERSPACE;
702 		uio.uio_procp = p;
703 		switch (piod->piod_op) {
704 		case PIOD_READ_I:
705 			req = PT_READ_I;
706 			uio.uio_rw = UIO_READ;
707 			break;
708 		case PIOD_READ_D:
709 			req = PT_READ_D;
710 			uio.uio_rw = UIO_READ;
711 			break;
712 		case PIOD_WRITE_I:
713 			req = PT_WRITE_I;
714 			uio.uio_rw = UIO_WRITE;
715 			break;
716 		case PIOD_WRITE_D:
717 			req = PT_WRITE_D;
718 			uio.uio_rw = UIO_WRITE;
719 			break;
720 		case PIOD_READ_AUXV:
721 			req = PT_READ_D;
722 			uio.uio_rw = UIO_READ;
723 			temp = ELF_AUX_WORDS * sizeof(char *);
724 			if (uio.uio_offset > temp)
725 				return EIO;
726 			if (uio.uio_resid > temp - uio.uio_offset)
727 				uio.uio_resid = temp - uio.uio_offset;
728 			piod->piod_len = iov.iov_len = uio.uio_resid;
729 			uio.uio_offset += tr->ps_auxinfo;
730 #ifdef MACHINE_STACK_GROWS_UP
731 			if (uio.uio_offset < (off_t)tr->ps_strings)
732 				return EIO;
733 #else
734 			if (uio.uio_offset > (off_t)tr->ps_strings)
735 				return EIO;
736 			if ((uio.uio_offset + uio.uio_resid) >
737 			    (off_t)tr->ps_strings)
738 				uio.uio_resid = (off_t)tr->ps_strings -
739 				    uio.uio_offset;
740 #endif
741 			break;
742 		default:
743 			return EINVAL;
744 		}
745 		error = process_domem(p, tr, &uio, req);
746 		piod->piod_len -= uio.uio_resid;
747 		return error;
748 	      }
749 
750 	case PT_SETREGS:
751 		return process_write_regs(t, addr);
752 	case PT_GETREGS:
753 		return process_read_regs(t, addr);
754 
755 #ifdef PT_SETFPREGS
756 	case PT_SETFPREGS:
757 		return process_write_fpregs(t, addr);
758 #endif
759 #ifdef PT_SETFPREGS
760 	case PT_GETFPREGS:
761 		return process_read_fpregs(t, addr);
762 #endif
763 #ifdef PT_SETXMMREGS
764 	case PT_SETXMMREGS:
765 		return process_write_xmmregs(t, addr);
766 #endif
767 #ifdef PT_SETXMMREGS
768 	case PT_GETXMMREGS:
769 		return process_read_xmmregs(t, addr);
770 #endif
771 #ifdef PT_WCOOKIE
772 	case PT_WCOOKIE:
773 		*(register_t *)addr = process_get_wcookie(t);
774 		return 0;
775 #endif
776 #ifdef PT_PACMASK
777 	case PT_PACMASK:
778 		((register_t *)addr)[0] = process_get_pacmask(t);
779 		((register_t *)addr)[1] = process_get_pacmask(t);
780 		return 0;
781 #endif
782 #ifdef PT_GETXSTATE_INFO
783 	case PT_GETXSTATE_INFO:
784 		return process_read_xstate_info(t, addr);
785 #endif
786 #ifdef PT_GETXSTATE
787 	case PT_GETXSTATE:
788 		return process_read_xstate(t, addr);
789 #endif
790 #ifdef PT_SETXSTATE
791 	case PT_SETXSTATE:
792 		return process_write_xstate(t, addr);
793 #endif
794 	default:
795 		KASSERTMSG(0, "%s: unhandled request %d", __func__, req);
796 		break;
797 	}
798 
799 	return 0;
800 }
801 
802 
803 /*
804  * Helper for doing "it could be a PID or TID" lookup.  On failure
805  * returns NULL; on success returns the selected process and sets *tp
806  * to an appropriate thread in that process.
807  */
808 static inline struct process *
809 process_tprfind(pid_t tpid, struct proc **tp)
810 {
811 	if (tpid > THREAD_PID_OFFSET) {
812 		struct proc *t = tfind(tpid - THREAD_PID_OFFSET);
813 
814 		if (t == NULL)
815 			return NULL;
816 		*tp = t;
817 		return t->p_p;
818 	} else {
819 		struct process *tr = prfind(tpid);
820 
821 		if (tr == NULL)
822 			return NULL;
823 		*tp = TAILQ_FIRST(&tr->ps_threads);
824 		return tr;
825 	}
826 }
827 
828 
829 /*
830  * Check whether 'tr' is currently traced by 'curpr' and in a state
831  * to be manipulated.  If 't' is supplied then it must be stopped and
832  * waited for.
833  */
834 static inline int
835 process_checktracestate(struct process *curpr, struct process *tr,
836     struct proc *t)
837 {
838 	/*
839 	 * You can't do what you want to the process if:
840 	 *	(1) It's not being traced at all,
841 	 */
842 	if (!ISSET(tr->ps_flags, PS_TRACED))
843 		return EPERM;
844 
845 	/*
846 	 *	(2) it's not being traced by _you_, or
847 	 */
848 	if (tr->ps_pptr != curpr)
849 		return EBUSY;
850 
851 	/*
852 	 *	(3) it's in the middle of execve(2)
853 	 */
854 	if (ISSET(tr->ps_flags, PS_INEXEC))
855 		return EAGAIN;
856 
857 	/*
858 	 *	(4) if a thread was specified and it's not currently stopped.
859 	 */
860 	if (t != NULL &&
861 	    (t->p_stat != SSTOP || !ISSET(tr->ps_flags, PS_WAITED)))
862 		return EBUSY;
863 
864 	return 0;
865 }
866 
867 #endif /* PTRACE */
868 
869 /*
870  * Check if a process is allowed to fiddle with the memory of another.
871  *
872  * p = tracer
873  * tr = tracee
874  *
875  * 1.  You can't attach to a process not owned by you or one that has raised
876  *     its privileges.
877  * 1a. ...unless you are root.
878  *
879  * 2.  init is always off-limits because it can control the securelevel.
880  * 2a. ...unless securelevel is permanently set to insecure.
881  *
882  * 3.  Processes that are in the process of doing an exec() are always
883  *     off-limits because of the can of worms they are. Just wait a
884  *     second.
885  */
886 int
887 process_checkioperm(struct proc *p, struct process *tr)
888 {
889 	int error;
890 
891 	if ((tr->ps_ucred->cr_ruid != p->p_ucred->cr_ruid ||
892 	    ISSET(tr->ps_flags, PS_SUGIDEXEC | PS_SUGID)) &&
893 	    (error = suser(p)) != 0)
894 		return (error);
895 
896 	if ((tr->ps_pid == 1) && (securelevel > -1))
897 		return (EPERM);
898 
899 	if (ISSET(tr->ps_flags, PS_INEXEC))
900 		return (EAGAIN);
901 
902 	return (0);
903 }
904 
905 int
906 process_domem(struct proc *curp, struct process *tr, struct uio *uio, int req)
907 {
908 	struct vmspace *vm;
909 	int error;
910 	vaddr_t addr;
911 	vsize_t len;
912 
913 	len = uio->uio_resid;
914 	if (len == 0)
915 		return 0;
916 
917 	if ((error = process_checkioperm(curp, tr)) != 0)
918 		return error;
919 
920 	vm = tr->ps_vmspace;
921 	if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
922 		return EFAULT;
923 	addr = uio->uio_offset;
924 
925 	uvmspace_addref(vm);
926 
927 	error = uvm_io(&vm->vm_map, uio, UVM_IO_FIXPROT);
928 
929 	uvmspace_free(vm);
930 
931 	if (error == 0 && req == PT_WRITE_I)
932 		pmap_proc_iflush(tr, addr, len);
933 
934 	return error;
935 }
936