xref: /minix3/minix/usr.bin/trace/kernel.c (revision 521fa314e2aaec3c192c15f2aaa4c677a544e62a)
1*521fa314SDavid van Moolenbroek /*
2*521fa314SDavid van Moolenbroek  * This file, and only this file, should contain all the ugliness needed to
3*521fa314SDavid van Moolenbroek  * obtain values from the kernel.  It has to be recompiled every time the
4*521fa314SDavid van Moolenbroek  * layout of the kernel "struct proc" and/or "struct priv" structures changes.
5*521fa314SDavid van Moolenbroek  * In addition, this file contains the platform-dependent code related to
6*521fa314SDavid van Moolenbroek  * interpreting the registers exposed by the kernel.
7*521fa314SDavid van Moolenbroek  *
8*521fa314SDavid van Moolenbroek  * As a quick note, some functions return TRUE/FALSE, and some return 0/-1.
9*521fa314SDavid van Moolenbroek  * The former convention is used for functions that return a boolean value;
10*521fa314SDavid van Moolenbroek  * the latter is used for functions that set errno in all cases of failure,
11*521fa314SDavid van Moolenbroek  * and where the caller may conceivably use errno as a result.
12*521fa314SDavid van Moolenbroek  *
13*521fa314SDavid van Moolenbroek  * On a related note, relevant here and elsewhere: we define _MINIX_SYSTEM but
14*521fa314SDavid van Moolenbroek  * not _SYSTEM, which means that we should not get negative error numbers.
15*521fa314SDavid van Moolenbroek  */
16*521fa314SDavid van Moolenbroek 
17*521fa314SDavid van Moolenbroek #include "inc.h"
18*521fa314SDavid van Moolenbroek 
19*521fa314SDavid van Moolenbroek #include <machine/archtypes.h>
20*521fa314SDavid van Moolenbroek #include <minix/timers.h>
21*521fa314SDavid van Moolenbroek #include "kernel/proc.h"
22*521fa314SDavid van Moolenbroek #include "kernel/priv.h"
23*521fa314SDavid van Moolenbroek #if defined(__i386__)
24*521fa314SDavid van Moolenbroek #include "kernel/arch/i386/include/archconst.h" /* for the KTS_ constants */
25*521fa314SDavid van Moolenbroek #endif
26*521fa314SDavid van Moolenbroek 
27*521fa314SDavid van Moolenbroek #include <minix/param.h>
28*521fa314SDavid van Moolenbroek 
29*521fa314SDavid van Moolenbroek extern struct minix_kerninfo *_minix_kerninfo;
30*521fa314SDavid van Moolenbroek 
31*521fa314SDavid van Moolenbroek /*
32*521fa314SDavid van Moolenbroek  * Working area.  By obtaining values from the kernel into these local process
33*521fa314SDavid van Moolenbroek  * structures, and then returning them, we gain a little robustness against
34*521fa314SDavid van Moolenbroek  * changes in data types of the fields we need.
35*521fa314SDavid van Moolenbroek  */
36*521fa314SDavid van Moolenbroek static struct proc kernel_proc;
37*521fa314SDavid van Moolenbroek static struct priv kernel_priv;
38*521fa314SDavid van Moolenbroek 
39*521fa314SDavid van Moolenbroek /*
40*521fa314SDavid van Moolenbroek  * Check whether our notion of the kernel process structure layout matches that
41*521fa314SDavid van Moolenbroek  * of the kernel, by comparing magic values.  This can be done only once we
42*521fa314SDavid van Moolenbroek  * have attached to a process.  Return TRUE if everything seems alright; FALSE
43*521fa314SDavid van Moolenbroek  * otherwise.
44*521fa314SDavid van Moolenbroek  */
45*521fa314SDavid van Moolenbroek int
46*521fa314SDavid van Moolenbroek kernel_check(pid_t pid)
47*521fa314SDavid van Moolenbroek {
48*521fa314SDavid van Moolenbroek 
49*521fa314SDavid van Moolenbroek 	if (mem_get_user(pid, offsetof(struct proc, p_magic),
50*521fa314SDavid van Moolenbroek 	    &kernel_proc.p_magic, sizeof(kernel_proc.p_magic)) < 0)
51*521fa314SDavid van Moolenbroek 		return FALSE;
52*521fa314SDavid van Moolenbroek 
53*521fa314SDavid van Moolenbroek 	return (kernel_proc.p_magic == PMAGIC);
54*521fa314SDavid van Moolenbroek }
55*521fa314SDavid van Moolenbroek 
56*521fa314SDavid van Moolenbroek /*
57*521fa314SDavid van Moolenbroek  * Obtain the kernel name for the given (stopped) process.  Return 0 on
58*521fa314SDavid van Moolenbroek  * success, with the (possibly truncated) name stored in the 'name' buffer
59*521fa314SDavid van Moolenbroek  * which is of 'size' bytes; the name will be null-terminated.  Note that the
60*521fa314SDavid van Moolenbroek  * name may contain any suffixes as set by the kernel.  Return -1 on failure,
61*521fa314SDavid van Moolenbroek  * with errno set as appropriate.
62*521fa314SDavid van Moolenbroek  */
63*521fa314SDavid van Moolenbroek int
64*521fa314SDavid van Moolenbroek kernel_get_name(pid_t pid, char * name, size_t size)
65*521fa314SDavid van Moolenbroek {
66*521fa314SDavid van Moolenbroek 
67*521fa314SDavid van Moolenbroek 	if (mem_get_user(pid, offsetof(struct proc, p_name),
68*521fa314SDavid van Moolenbroek 	    kernel_proc.p_name, sizeof(kernel_proc.p_name)) < 0)
69*521fa314SDavid van Moolenbroek 		return -1;
70*521fa314SDavid van Moolenbroek 
71*521fa314SDavid van Moolenbroek 	strlcpy(name, kernel_proc.p_name, size);
72*521fa314SDavid van Moolenbroek 	return 0;
73*521fa314SDavid van Moolenbroek }
74*521fa314SDavid van Moolenbroek 
75*521fa314SDavid van Moolenbroek /*
76*521fa314SDavid van Moolenbroek  * Check whether the given process, which we have just attached to, is a system
77*521fa314SDavid van Moolenbroek  * service.  PM does not prevent us from attaching to most system services,
78*521fa314SDavid van Moolenbroek  * even though this utility only supports tracing user programs.  Unlike a few
79*521fa314SDavid van Moolenbroek  * other routines in this file, this function can not use ProcFS to obtain its
80*521fa314SDavid van Moolenbroek  * result, because the given process may actually be VFS or ProcFS itself!
81*521fa314SDavid van Moolenbroek  * Return TRUE if the given process is a system service; FALSE if not.
82*521fa314SDavid van Moolenbroek  */
83*521fa314SDavid van Moolenbroek int
84*521fa314SDavid van Moolenbroek kernel_is_service(pid_t pid)
85*521fa314SDavid van Moolenbroek {
86*521fa314SDavid van Moolenbroek 	size_t align, off;
87*521fa314SDavid van Moolenbroek 
88*521fa314SDavid van Moolenbroek 	/*
89*521fa314SDavid van Moolenbroek 	 * For T_GETUSER, the priv structure follows the proc structure, but
90*521fa314SDavid van Moolenbroek 	 * possibly with padding in between so as to align the priv structure
91*521fa314SDavid van Moolenbroek 	 * to long boundary.
92*521fa314SDavid van Moolenbroek 	 */
93*521fa314SDavid van Moolenbroek 	align = sizeof(long) - 1;
94*521fa314SDavid van Moolenbroek 	off = (sizeof(struct proc) + align) & ~align;
95*521fa314SDavid van Moolenbroek 
96*521fa314SDavid van Moolenbroek 	if (mem_get_user(pid, off + offsetof(struct priv, s_id),
97*521fa314SDavid van Moolenbroek 	    &kernel_priv.s_id, sizeof(kernel_priv.s_id)) < 0)
98*521fa314SDavid van Moolenbroek 		return FALSE; /* process may have disappeared, so no danger */
99*521fa314SDavid van Moolenbroek 
100*521fa314SDavid van Moolenbroek 	return (kernel_priv.s_id != USER_PRIV_ID);
101*521fa314SDavid van Moolenbroek }
102*521fa314SDavid van Moolenbroek 
103*521fa314SDavid van Moolenbroek /*
104*521fa314SDavid van Moolenbroek  * For the given process, which must be stopped on entering a system call,
105*521fa314SDavid van Moolenbroek  * retrieve the three register values describing the system call.  Return 0 on
106*521fa314SDavid van Moolenbroek  * success, or -1 on failure with errno set as appropriate.
107*521fa314SDavid van Moolenbroek  */
108*521fa314SDavid van Moolenbroek int
109*521fa314SDavid van Moolenbroek kernel_get_syscall(pid_t pid, reg_t reg[3])
110*521fa314SDavid van Moolenbroek {
111*521fa314SDavid van Moolenbroek 
112*521fa314SDavid van Moolenbroek 	assert(sizeof(kernel_proc.p_defer) == sizeof(reg_t) * 3);
113*521fa314SDavid van Moolenbroek 
114*521fa314SDavid van Moolenbroek 	if (mem_get_user(pid, offsetof(struct proc, p_defer),
115*521fa314SDavid van Moolenbroek 	    &kernel_proc.p_defer, sizeof(kernel_proc.p_defer)) < 0)
116*521fa314SDavid van Moolenbroek 		return -1;
117*521fa314SDavid van Moolenbroek 
118*521fa314SDavid van Moolenbroek 	reg[0] = kernel_proc.p_defer.r1;
119*521fa314SDavid van Moolenbroek 	reg[1] = kernel_proc.p_defer.r2;
120*521fa314SDavid van Moolenbroek 	reg[2] = kernel_proc.p_defer.r3;
121*521fa314SDavid van Moolenbroek 	return 0;
122*521fa314SDavid van Moolenbroek }
123*521fa314SDavid van Moolenbroek 
124*521fa314SDavid van Moolenbroek /*
125*521fa314SDavid van Moolenbroek  * Retrieve the value of the primary return register for the given process,
126*521fa314SDavid van Moolenbroek  * which must be stopped on leaving a system call.  This register contains the
127*521fa314SDavid van Moolenbroek  * IPC-level result of the system call.  Return 0 on success, or -1 on failure
128*521fa314SDavid van Moolenbroek  * with errno set as appropriate.
129*521fa314SDavid van Moolenbroek  */
130*521fa314SDavid van Moolenbroek int
131*521fa314SDavid van Moolenbroek kernel_get_retreg(pid_t pid, reg_t * retreg)
132*521fa314SDavid van Moolenbroek {
133*521fa314SDavid van Moolenbroek 	size_t off;
134*521fa314SDavid van Moolenbroek 
135*521fa314SDavid van Moolenbroek 	/*
136*521fa314SDavid van Moolenbroek 	 * Historically p_reg had to be the first field in the proc structure,
137*521fa314SDavid van Moolenbroek 	 * but since this is no longer a hard requirement, getting its actual
138*521fa314SDavid van Moolenbroek 	 * offset into the proc structure certainly doesn't hurt.
139*521fa314SDavid van Moolenbroek 	 */
140*521fa314SDavid van Moolenbroek 	off = offsetof(struct proc, p_reg);
141*521fa314SDavid van Moolenbroek 
142*521fa314SDavid van Moolenbroek 	if (mem_get_user(pid, off + offsetof(struct stackframe_s, retreg),
143*521fa314SDavid van Moolenbroek 	    &kernel_proc.p_reg.retreg, sizeof(kernel_proc.p_reg.retreg)) < 0)
144*521fa314SDavid van Moolenbroek 		return -1;
145*521fa314SDavid van Moolenbroek 
146*521fa314SDavid van Moolenbroek 	*retreg = kernel_proc.p_reg.retreg;
147*521fa314SDavid van Moolenbroek 	return 0;
148*521fa314SDavid van Moolenbroek }
149*521fa314SDavid van Moolenbroek 
150*521fa314SDavid van Moolenbroek /*
151*521fa314SDavid van Moolenbroek  * Return the stack top for user processes.  This is needed for execve(), since
152*521fa314SDavid van Moolenbroek  * the supplied frame contains pointers prepared for the new location of the
153*521fa314SDavid van Moolenbroek  * frame, which is at the stack top of the process after the execve().
154*521fa314SDavid van Moolenbroek  */
155*521fa314SDavid van Moolenbroek vir_bytes
156*521fa314SDavid van Moolenbroek kernel_get_stacktop(void)
157*521fa314SDavid van Moolenbroek {
158*521fa314SDavid van Moolenbroek 
159*521fa314SDavid van Moolenbroek 	return _minix_kerninfo->kinfo->user_sp;
160*521fa314SDavid van Moolenbroek }
161*521fa314SDavid van Moolenbroek 
162*521fa314SDavid van Moolenbroek /*
163*521fa314SDavid van Moolenbroek  * For the given stopped process, get its program counter (pc), stack pointer
164*521fa314SDavid van Moolenbroek  * (sp), and optionally its frame pointer (fp).  The given fp pointer may be
165*521fa314SDavid van Moolenbroek  * NULL, in which case the frame pointer is not obtained.  The given pc and sp
166*521fa314SDavid van Moolenbroek  * pointers must not be NULL, and this is intentional: obtaining fp may require
167*521fa314SDavid van Moolenbroek  * obtaining sp first.  Return 0 on success, or -1 on failure with errno set
168*521fa314SDavid van Moolenbroek  * as appropriate.  This functionality is not essential for tracing processes,
169*521fa314SDavid van Moolenbroek  * and may not be supported on all platforms, in part or full.  In particular,
170*521fa314SDavid van Moolenbroek  * on some platforms, a zero (= invalid) frame pointer may be returned on
171*521fa314SDavid van Moolenbroek  * success, indicating that obtaining frame pointers is not supported.
172*521fa314SDavid van Moolenbroek  */
173*521fa314SDavid van Moolenbroek int
174*521fa314SDavid van Moolenbroek kernel_get_context(pid_t pid, reg_t * pc, reg_t * sp, reg_t * fp)
175*521fa314SDavid van Moolenbroek {
176*521fa314SDavid van Moolenbroek 	size_t off;
177*521fa314SDavid van Moolenbroek 
178*521fa314SDavid van Moolenbroek 	off = offsetof(struct proc, p_reg); /* as above */
179*521fa314SDavid van Moolenbroek 
180*521fa314SDavid van Moolenbroek 	if (mem_get_user(pid, off + offsetof(struct stackframe_s, pc),
181*521fa314SDavid van Moolenbroek 	    &kernel_proc.p_reg.pc, sizeof(kernel_proc.p_reg.pc)) < 0)
182*521fa314SDavid van Moolenbroek 		return -1;
183*521fa314SDavid van Moolenbroek 	if (mem_get_user(pid, off + offsetof(struct stackframe_s, sp),
184*521fa314SDavid van Moolenbroek 	    &kernel_proc.p_reg.sp, sizeof(kernel_proc.p_reg.sp)) < 0)
185*521fa314SDavid van Moolenbroek 		return -1;
186*521fa314SDavid van Moolenbroek 
187*521fa314SDavid van Moolenbroek 	*pc = kernel_proc.p_reg.pc;
188*521fa314SDavid van Moolenbroek 	*sp = kernel_proc.p_reg.sp;
189*521fa314SDavid van Moolenbroek 
190*521fa314SDavid van Moolenbroek 	if (fp == NULL)
191*521fa314SDavid van Moolenbroek 		return 0;
192*521fa314SDavid van Moolenbroek 
193*521fa314SDavid van Moolenbroek #if defined(__i386__)
194*521fa314SDavid van Moolenbroek 	if (mem_get_user(pid, offsetof(struct proc, p_seg) +
195*521fa314SDavid van Moolenbroek 	    offsetof(struct segframe, p_kern_trap_style),
196*521fa314SDavid van Moolenbroek 	    &kernel_proc.p_seg.p_kern_trap_style,
197*521fa314SDavid van Moolenbroek 	    sizeof(kernel_proc.p_seg.p_kern_trap_style)) < 0)
198*521fa314SDavid van Moolenbroek 		return -1;
199*521fa314SDavid van Moolenbroek 
200*521fa314SDavid van Moolenbroek 	/* This is taken from the kernel i386 exception code. */
201*521fa314SDavid van Moolenbroek 	switch (kernel_proc.p_seg.p_kern_trap_style) {
202*521fa314SDavid van Moolenbroek 	case KTS_SYSENTER:
203*521fa314SDavid van Moolenbroek 	case KTS_SYSCALL:
204*521fa314SDavid van Moolenbroek 		if (mem_get_data(pid, *sp + 16, fp, sizeof(fp)) < 0)
205*521fa314SDavid van Moolenbroek 			return -1;
206*521fa314SDavid van Moolenbroek 		break;
207*521fa314SDavid van Moolenbroek 
208*521fa314SDavid van Moolenbroek 	default:
209*521fa314SDavid van Moolenbroek 		if (mem_get_user(pid, off + offsetof(struct stackframe_s, fp),
210*521fa314SDavid van Moolenbroek 		    &kernel_proc.p_reg.fp, sizeof(kernel_proc.p_reg.fp)) < 0)
211*521fa314SDavid van Moolenbroek 			return -1;
212*521fa314SDavid van Moolenbroek 
213*521fa314SDavid van Moolenbroek 		*fp = kernel_proc.p_reg.fp;
214*521fa314SDavid van Moolenbroek 	}
215*521fa314SDavid van Moolenbroek #else
216*521fa314SDavid van Moolenbroek 	*fp = 0; /* not supported; this is not a failure (*pc is valid) */
217*521fa314SDavid van Moolenbroek #endif
218*521fa314SDavid van Moolenbroek 	return 0;
219*521fa314SDavid van Moolenbroek }
220*521fa314SDavid van Moolenbroek 
221*521fa314SDavid van Moolenbroek /*
222*521fa314SDavid van Moolenbroek  * Given a frame pointer, obtain the next program counter and frame pointer.
223*521fa314SDavid van Moolenbroek  * Return 0 if successful, or -1 on failure with errno set appropriately.  The
224*521fa314SDavid van Moolenbroek  * functionality is not essential for tracing processes, and may not be
225*521fa314SDavid van Moolenbroek  * supported on all platforms.  Thus, on some platforms, this function may
226*521fa314SDavid van Moolenbroek  * always fail.
227*521fa314SDavid van Moolenbroek  */
228*521fa314SDavid van Moolenbroek static int
229*521fa314SDavid van Moolenbroek kernel_get_nextframe(pid_t pid, reg_t fp, reg_t * next_pc, reg_t * next_fp)
230*521fa314SDavid van Moolenbroek {
231*521fa314SDavid van Moolenbroek #if defined(__i386__)
232*521fa314SDavid van Moolenbroek 	void *p[2];
233*521fa314SDavid van Moolenbroek 
234*521fa314SDavid van Moolenbroek 	if (mem_get_data(pid, (vir_bytes)fp, &p, sizeof(p)) < 0)
235*521fa314SDavid van Moolenbroek 		return -1;
236*521fa314SDavid van Moolenbroek 
237*521fa314SDavid van Moolenbroek 	*next_pc = (reg_t)p[1];
238*521fa314SDavid van Moolenbroek 	*next_fp = (reg_t)p[0];
239*521fa314SDavid van Moolenbroek 	return 0;
240*521fa314SDavid van Moolenbroek #else
241*521fa314SDavid van Moolenbroek 	/* Not supported (yet). */
242*521fa314SDavid van Moolenbroek 	errno = ENOSYS;
243*521fa314SDavid van Moolenbroek 	return -1;
244*521fa314SDavid van Moolenbroek #endif
245*521fa314SDavid van Moolenbroek }
246*521fa314SDavid van Moolenbroek 
247*521fa314SDavid van Moolenbroek /*
248*521fa314SDavid van Moolenbroek  * Print a stack trace for the given process, which is known to be stopped on
249*521fa314SDavid van Moolenbroek  * entering a system call.  This function does not really belong here, but
250*521fa314SDavid van Moolenbroek  * without a doubt it is going to have to be fully rewritten to support
251*521fa314SDavid van Moolenbroek  * anything other than i386.
252*521fa314SDavid van Moolenbroek  *
253*521fa314SDavid van Moolenbroek  * Getting symbol names is currently an absolute nightmare.  Not just because
254*521fa314SDavid van Moolenbroek  * of shared libraries, but also since ProcFS does not offer a /proc/NNN/exe,
255*521fa314SDavid van Moolenbroek  * so that we cannot reliably determine the binary being executed: not for
256*521fa314SDavid van Moolenbroek  * processes being attached to, and not for exec calls using a relative path.
257*521fa314SDavid van Moolenbroek  */
258*521fa314SDavid van Moolenbroek void
259*521fa314SDavid van Moolenbroek kernel_put_stacktrace(struct trace_proc * proc)
260*521fa314SDavid van Moolenbroek {
261*521fa314SDavid van Moolenbroek 	unsigned int count, max;
262*521fa314SDavid van Moolenbroek 	reg_t pc, sp, fp, low, high;
263*521fa314SDavid van Moolenbroek 
264*521fa314SDavid van Moolenbroek 	if (kernel_get_context(proc->pid, &pc, &sp, &fp) < 0)
265*521fa314SDavid van Moolenbroek 		return;
266*521fa314SDavid van Moolenbroek 
267*521fa314SDavid van Moolenbroek 	/*
268*521fa314SDavid van Moolenbroek 	 * A low default limit such as 6 looks much prettier, but is simply not
269*521fa314SDavid van Moolenbroek 	 * useful enough for moderately-sized programs in practice.  Right now,
270*521fa314SDavid van Moolenbroek 	 * 15 is about two lines on a 80-column terminal.
271*521fa314SDavid van Moolenbroek 	 */
272*521fa314SDavid van Moolenbroek 	if (verbose == 0) max = 15;
273*521fa314SDavid van Moolenbroek 	else if (verbose == 1) max = 31;
274*521fa314SDavid van Moolenbroek 	else max = UINT_MAX;
275*521fa314SDavid van Moolenbroek 
276*521fa314SDavid van Moolenbroek 	/*
277*521fa314SDavid van Moolenbroek 	 * We keep formatting to an absolute minimum, to facilitate passing
278*521fa314SDavid van Moolenbroek 	 * the lines straight into tools such as addr2line.
279*521fa314SDavid van Moolenbroek 	 */
280*521fa314SDavid van Moolenbroek 	put_newline();
281*521fa314SDavid van Moolenbroek 	put_fmt(proc, "  0x%x", pc);
282*521fa314SDavid van Moolenbroek 
283*521fa314SDavid van Moolenbroek 	low = high = fp;
284*521fa314SDavid van Moolenbroek 
285*521fa314SDavid van Moolenbroek 	for (count = 1; count < max && fp != 0; count++) {
286*521fa314SDavid van Moolenbroek 		if (kernel_get_nextframe(proc->pid, fp, &pc, &fp) < 0)
287*521fa314SDavid van Moolenbroek 			break;
288*521fa314SDavid van Moolenbroek 
289*521fa314SDavid van Moolenbroek 		put_fmt(proc, " 0x%x", pc);
290*521fa314SDavid van Moolenbroek 
291*521fa314SDavid van Moolenbroek 		/*
292*521fa314SDavid van Moolenbroek 		 * Stop if we see a frame pointer that falls within the range
293*521fa314SDavid van Moolenbroek 		 * of the frame pointers we have seen so far.  This also
294*521fa314SDavid van Moolenbroek 		 * prevents getting stuck in a loop on the same frame pointer.
295*521fa314SDavid van Moolenbroek 		 */
296*521fa314SDavid van Moolenbroek 		if (fp >= low && fp <= high)
297*521fa314SDavid van Moolenbroek 			break;
298*521fa314SDavid van Moolenbroek 		if (low > fp)
299*521fa314SDavid van Moolenbroek 			low = fp;
300*521fa314SDavid van Moolenbroek 		if (high < fp)
301*521fa314SDavid van Moolenbroek 			high = fp;
302*521fa314SDavid van Moolenbroek 	}
303*521fa314SDavid van Moolenbroek 
304*521fa314SDavid van Moolenbroek 	if (fp != 0)
305*521fa314SDavid van Moolenbroek 		put_text(proc, " ..");
306*521fa314SDavid van Moolenbroek 	put_newline();
307*521fa314SDavid van Moolenbroek }
308