xref: /minix3/minix/usr.bin/trace/kernel.c (revision 594df55e53732746ac76b15ad87a3eac02ec1619)
1 /*
2  * This file, and only this file, should contain all the ugliness needed to
3  * obtain values from the kernel.  It has to be recompiled every time the
4  * layout of the kernel "struct proc" and/or "struct priv" structures changes.
5  * In addition, this file contains the platform-dependent code related to
6  * interpreting the registers exposed by the kernel.
7  *
8  * As a quick note, some functions return TRUE/FALSE, and some return 0/-1.
9  * The former convention is used for functions that return a boolean value;
10  * the latter is used for functions that set errno in all cases of failure,
11  * and where the caller may conceivably use errno as a result.
12  *
13  * On a related note, relevant here and elsewhere: we define _MINIX_SYSTEM but
14  * not _SYSTEM, which means that we should not get negative error numbers.
15  */
16 
17 #include "inc.h"
18 
19 #include <machine/archtypes.h>
20 #include <minix/timers.h>
21 #include "kernel/proc.h"
22 #include "kernel/priv.h"
23 #if defined(__i386__)
24 #include "kernel/arch/i386/include/archconst.h" /* for the KTS_ constants */
25 #endif
26 #include <lib.h>
27 #include <minix/param.h>
28 
29 /*
30  * Working area.  By obtaining values from the kernel into these local process
31  * structures, and then returning them, we gain a little robustness against
32  * changes in data types of the fields we need.
33  */
34 static struct proc kernel_proc;
35 static struct priv kernel_priv;
36 
37 /*
38  * Check whether our notion of the kernel process structure layout matches that
39  * of the kernel, by comparing magic values.  This can be done only once we
40  * have attached to a process.  Return TRUE if everything seems alright; FALSE
41  * otherwise.
42  */
43 int
44 kernel_check(pid_t pid)
45 {
46 
47 	if (mem_get_user(pid, offsetof(struct proc, p_magic),
48 	    &kernel_proc.p_magic, sizeof(kernel_proc.p_magic)) < 0)
49 		return FALSE;
50 
51 	return (kernel_proc.p_magic == PMAGIC);
52 }
53 
54 /*
55  * Obtain the kernel name for the given (stopped) process.  Return 0 on
56  * success, with the (possibly truncated) name stored in the 'name' buffer
57  * which is of 'size' bytes; the name will be null-terminated.  Note that the
58  * name may contain any suffixes as set by the kernel.  Return -1 on failure,
59  * with errno set as appropriate.
60  */
61 int
62 kernel_get_name(pid_t pid, char * name, size_t size)
63 {
64 
65 	if (mem_get_user(pid, offsetof(struct proc, p_name),
66 	    kernel_proc.p_name, sizeof(kernel_proc.p_name)) < 0)
67 		return -1;
68 
69 	strlcpy(name, kernel_proc.p_name, size);
70 	return 0;
71 }
72 
73 /*
74  * Check whether the given process, which we have just attached to, is a system
75  * service.  PM does not prevent us from attaching to most system services,
76  * even though this utility only supports tracing user programs.  Unlike a few
77  * other routines in this file, this function can not use ProcFS to obtain its
78  * result, because the given process may actually be VFS or ProcFS itself!
79  * Return TRUE if the given process is a system service; FALSE if not.
80  */
81 int
82 kernel_is_service(pid_t pid)
83 {
84 	size_t align, off;
85 
86 	/*
87 	 * For T_GETUSER, the priv structure follows the proc structure, but
88 	 * possibly with padding in between so as to align the priv structure
89 	 * to long boundary.
90 	 */
91 	align = sizeof(long) - 1;
92 	off = (sizeof(struct proc) + align) & ~align;
93 
94 	if (mem_get_user(pid, off + offsetof(struct priv, s_id),
95 	    &kernel_priv.s_id, sizeof(kernel_priv.s_id)) < 0)
96 		return FALSE; /* process may have disappeared, so no danger */
97 
98 	return (kernel_priv.s_id != USER_PRIV_ID);
99 }
100 
101 /*
102  * For the given process, which must be stopped on entering a system call,
103  * retrieve the three register values describing the system call.  Return 0 on
104  * success, or -1 on failure with errno set as appropriate.
105  */
106 int
107 kernel_get_syscall(pid_t pid, reg_t reg[3])
108 {
109 
110 	assert(sizeof(kernel_proc.p_defer) == sizeof(reg_t) * 3);
111 
112 	if (mem_get_user(pid, offsetof(struct proc, p_defer),
113 	    &kernel_proc.p_defer, sizeof(kernel_proc.p_defer)) < 0)
114 		return -1;
115 
116 	reg[0] = kernel_proc.p_defer.r1;
117 	reg[1] = kernel_proc.p_defer.r2;
118 	reg[2] = kernel_proc.p_defer.r3;
119 	return 0;
120 }
121 
122 /*
123  * Retrieve the value of the primary return register for the given process,
124  * which must be stopped on leaving a system call.  This register contains the
125  * IPC-level result of the system call.  Return 0 on success, or -1 on failure
126  * with errno set as appropriate.
127  */
128 int
129 kernel_get_retreg(pid_t pid, reg_t * retreg)
130 {
131 	size_t off;
132 
133 	/*
134 	 * Historically p_reg had to be the first field in the proc structure,
135 	 * but since this is no longer a hard requirement, getting its actual
136 	 * offset into the proc structure certainly doesn't hurt.
137 	 */
138 	off = offsetof(struct proc, p_reg);
139 
140 	if (mem_get_user(pid, off + offsetof(struct stackframe_s, retreg),
141 	    &kernel_proc.p_reg.retreg, sizeof(kernel_proc.p_reg.retreg)) < 0)
142 		return -1;
143 
144 	*retreg = kernel_proc.p_reg.retreg;
145 	return 0;
146 }
147 
148 /*
149  * Return the stack top for user processes.  This is needed for execve(), since
150  * the supplied frame contains pointers prepared for the new location of the
151  * frame, which is at the stack top of the process after the execve().
152  */
153 vir_bytes
154 kernel_get_stacktop(void)
155 {
156 
157 	return get_minix_kerninfo()->kinfo->user_sp;
158 }
159 
160 /*
161  * For the given stopped process, get its program counter (pc), stack pointer
162  * (sp), and optionally its frame pointer (fp).  The given fp pointer may be
163  * NULL, in which case the frame pointer is not obtained.  The given pc and sp
164  * pointers must not be NULL, and this is intentional: obtaining fp may require
165  * obtaining sp first.  Return 0 on success, or -1 on failure with errno set
166  * as appropriate.  This functionality is not essential for tracing processes,
167  * and may not be supported on all platforms, in part or full.  In particular,
168  * on some platforms, a zero (= invalid) frame pointer may be returned on
169  * success, indicating that obtaining frame pointers is not supported.
170  */
171 int
172 kernel_get_context(pid_t pid, reg_t * pc, reg_t * sp, reg_t * fp)
173 {
174 	size_t off;
175 
176 	off = offsetof(struct proc, p_reg); /* as above */
177 
178 	if (mem_get_user(pid, off + offsetof(struct stackframe_s, pc),
179 	    &kernel_proc.p_reg.pc, sizeof(kernel_proc.p_reg.pc)) < 0)
180 		return -1;
181 	if (mem_get_user(pid, off + offsetof(struct stackframe_s, sp),
182 	    &kernel_proc.p_reg.sp, sizeof(kernel_proc.p_reg.sp)) < 0)
183 		return -1;
184 
185 	*pc = kernel_proc.p_reg.pc;
186 	*sp = kernel_proc.p_reg.sp;
187 
188 	if (fp == NULL)
189 		return 0;
190 
191 #if defined(__i386__)
192 	if (mem_get_user(pid, offsetof(struct proc, p_seg) +
193 	    offsetof(struct segframe, p_kern_trap_style),
194 	    &kernel_proc.p_seg.p_kern_trap_style,
195 	    sizeof(kernel_proc.p_seg.p_kern_trap_style)) < 0)
196 		return -1;
197 
198 	/* This is taken from the kernel i386 exception code. */
199 	switch (kernel_proc.p_seg.p_kern_trap_style) {
200 	case KTS_SYSENTER:
201 	case KTS_SYSCALL:
202 		if (mem_get_data(pid, *sp + 16, fp, sizeof(fp)) < 0)
203 			return -1;
204 		break;
205 
206 	default:
207 		if (mem_get_user(pid, off + offsetof(struct stackframe_s, fp),
208 		    &kernel_proc.p_reg.fp, sizeof(kernel_proc.p_reg.fp)) < 0)
209 			return -1;
210 
211 		*fp = kernel_proc.p_reg.fp;
212 	}
213 #else
214 	*fp = 0; /* not supported; this is not a failure (*pc is valid) */
215 #endif
216 	return 0;
217 }
218 
219 /*
220  * Given a frame pointer, obtain the next program counter and frame pointer.
221  * Return 0 if successful, or -1 on failure with errno set appropriately.  The
222  * functionality is not essential for tracing processes, and may not be
223  * supported on all platforms.  Thus, on some platforms, this function may
224  * always fail.
225  */
226 static int
227 kernel_get_nextframe(pid_t pid, reg_t fp, reg_t * next_pc, reg_t * next_fp)
228 {
229 #if defined(__i386__)
230 	void *p[2];
231 
232 	if (mem_get_data(pid, (vir_bytes)fp, &p, sizeof(p)) < 0)
233 		return -1;
234 
235 	*next_pc = (reg_t)p[1];
236 	*next_fp = (reg_t)p[0];
237 	return 0;
238 #else
239 	/* Not supported (yet). */
240 	errno = ENOSYS;
241 	return -1;
242 #endif
243 }
244 
245 /*
246  * Print a stack trace for the given process, which is known to be stopped on
247  * entering a system call.  This function does not really belong here, but
248  * without a doubt it is going to have to be fully rewritten to support
249  * anything other than i386.
250  *
251  * Getting symbol names is currently an absolute nightmare.  Not just because
252  * of shared libraries, but also since ProcFS does not offer a /proc/NNN/exe,
253  * so that we cannot reliably determine the binary being executed: not for
254  * processes being attached to, and not for exec calls using a relative path.
255  */
256 void
257 kernel_put_stacktrace(struct trace_proc * proc)
258 {
259 	unsigned int count, max;
260 	reg_t pc, sp, fp, low, high;
261 
262 	if (kernel_get_context(proc->pid, &pc, &sp, &fp) < 0)
263 		return;
264 
265 	/*
266 	 * A low default limit such as 6 looks much prettier, but is simply not
267 	 * useful enough for moderately-sized programs in practice.  Right now,
268 	 * 15 is about two lines on a 80-column terminal.
269 	 */
270 	if (verbose == 0) max = 15;
271 	else if (verbose == 1) max = 31;
272 	else max = UINT_MAX;
273 
274 	/*
275 	 * We keep formatting to an absolute minimum, to facilitate passing
276 	 * the lines straight into tools such as addr2line.
277 	 */
278 	put_newline();
279 	put_fmt(proc, "  0x%x", pc);
280 
281 	low = high = fp;
282 
283 	for (count = 1; count < max && fp != 0; count++) {
284 		if (kernel_get_nextframe(proc->pid, fp, &pc, &fp) < 0)
285 			break;
286 
287 		put_fmt(proc, " 0x%x", pc);
288 
289 		/*
290 		 * Stop if we see a frame pointer that falls within the range
291 		 * of the frame pointers we have seen so far.  This also
292 		 * prevents getting stuck in a loop on the same frame pointer.
293 		 */
294 		if (fp >= low && fp <= high)
295 			break;
296 		if (low > fp)
297 			low = fp;
298 		if (high < fp)
299 			high = fp;
300 	}
301 
302 	if (fp != 0)
303 		put_text(proc, " ..");
304 	put_newline();
305 }
306