xref: /netbsd-src/sys/kern/kern_syscall.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /*	$NetBSD: kern_syscall.c,v 1.18 2019/05/06 08:05:03 kamil Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.18 2019/05/06 08:05:03 kamil Exp $");
34 
35 #ifdef _KERNEL_OPT
36 #include "opt_modular.h"
37 #include "opt_syscall_debug.h"
38 #include "opt_ktrace.h"
39 #include "opt_ptrace.h"
40 #include "opt_dtrace.h"
41 #endif
42 
43 /* XXX To get syscall prototypes. */
44 #define SYSVSHM
45 #define SYSVSEM
46 #define SYSVMSG
47 
48 #include <sys/param.h>
49 #include <sys/module.h>
50 #include <sys/sched.h>
51 #include <sys/syscall.h>
52 #include <sys/syscallargs.h>
53 #include <sys/syscallvar.h>
54 #include <sys/systm.h>
55 #include <sys/xcall.h>
56 #include <sys/ktrace.h>
57 #include <sys/ptrace.h>
58 
59 int
60 sys_nomodule(struct lwp *l, const void *v, register_t *retval)
61 {
62 #ifdef MODULAR
63 
64 	const struct sysent *sy;
65 	const struct emul *em;
66 	const struct sc_autoload *auto_list;
67 	u_int code;
68 
69 	/*
70 	 * Restart the syscall if we interrupted a module unload that
71 	 * failed.  Acquiring kernconfig_lock delays us until any unload
72 	 * has been completed or rolled back.
73 	 */
74 	kernconfig_lock();
75 	sy = l->l_sysent;
76 	if (sy->sy_call != sys_nomodule) {
77 		kernconfig_unlock();
78 		return ERESTART;
79 	}
80 	/*
81 	 * Try to autoload a module to satisfy the request.  If it
82 	 * works, retry the request.
83 	 */
84 	em = l->l_proc->p_emul;
85 	code = sy - em->e_sysent;
86 
87 	if ((auto_list = em->e_sc_autoload) != NULL)
88 		for (; auto_list->al_code > 0; auto_list++) {
89 			if (auto_list->al_code != code) {
90 				continue;
91 			}
92 			if (module_autoload(auto_list->al_module,
93 			    MODULE_CLASS_ANY) != 0 ||
94 			    sy->sy_call == sys_nomodule) {
95 			    	break;
96 			}
97 			kernconfig_unlock();
98 			return ERESTART;
99 		}
100 	kernconfig_unlock();
101 #endif	/* MODULAR */
102 
103 	return sys_nosys(l, v, retval);
104 }
105 
106 int
107 syscall_establish(const struct emul *em, const struct syscall_package *sp)
108 {
109 	struct sysent *sy;
110 	int i;
111 
112 	KASSERT(kernconfig_is_held());
113 
114 	if (em == NULL) {
115 		em = &emul_netbsd;
116 	}
117 	sy = em->e_sysent;
118 
119 	/*
120 	 * Ensure that all preconditions are valid, since this is
121 	 * an all or nothing deal.  Once a system call is entered,
122 	 * it can become busy and we could be unable to remove it
123 	 * on error.
124 	 */
125 	for (i = 0; sp[i].sp_call != NULL; i++) {
126 		if (sp[i].sp_code >= SYS_NSYSENT)
127 			return EINVAL;
128 		if (sy[sp[i].sp_code].sy_call != sys_nomodule &&
129 		    sy[sp[i].sp_code].sy_call != sys_nosys) {
130 #ifdef DIAGNOSTIC
131 			printf("syscall %d is busy\n", sp[i].sp_code);
132 #endif
133 			return EBUSY;
134 		}
135 	}
136 	/* Everything looks good, patch them in. */
137 	for (i = 0; sp[i].sp_call != NULL; i++) {
138 		sy[sp[i].sp_code].sy_call = sp[i].sp_call;
139 	}
140 
141 	return 0;
142 }
143 
144 int
145 syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
146 {
147 	struct sysent *sy;
148 	const uint32_t *sb;
149 	uint64_t where;
150 	lwp_t *l;
151 	int i;
152 
153 	KASSERT(kernconfig_is_held());
154 
155 	if (em == NULL) {
156 		em = &emul_netbsd;
157 	}
158 	sy = em->e_sysent;
159 	sb = em->e_nomodbits;
160 
161 	/*
162 	 * First, patch the system calls to sys_nomodule or sys_nosys
163 	 * to gate further activity.
164 	 */
165 	for (i = 0; sp[i].sp_call != NULL; i++) {
166 		KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
167 		sy[sp[i].sp_code].sy_call =
168 		    sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ?
169 		      sys_nomodule : sys_nosys;
170 	}
171 
172 	/*
173 	 * Run a cross call to cycle through all CPUs.  This does two
174 	 * things: lock activity provides a barrier and makes our update
175 	 * of sy_call visible to all CPUs, and upon return we can be sure
176 	 * that we see pertinent values of l_sysent posted by remote CPUs.
177 	 */
178 	where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
179 	xc_wait(where);
180 
181 	/*
182 	 * Now it's safe to check l_sysent.  Run through all LWPs and see
183 	 * if anyone is still using the system call.
184 	 */
185 	for (i = 0; sp[i].sp_call != NULL; i++) {
186 		mutex_enter(proc_lock);
187 		LIST_FOREACH(l, &alllwp, l_list) {
188 			if (l->l_sysent == &sy[sp[i].sp_code]) {
189 				break;
190 			}
191 		}
192 		mutex_exit(proc_lock);
193 		if (l == NULL) {
194 			continue;
195 		}
196 		/*
197 		 * We lose: one or more calls are still in use.  Put back
198 		 * the old entrypoints and act like nothing happened.
199 		 * When we drop kernconfig_lock, any system calls held in
200 		 * sys_nomodule() will be restarted.
201 		 */
202 		for (i = 0; sp[i].sp_call != NULL; i++) {
203 			sy[sp[i].sp_code].sy_call = sp[i].sp_call;
204 		}
205 		return EBUSY;
206 	}
207 
208 	return 0;
209 }
210 
211 /*
212  * Return true if system call tracing is enabled for the specified process.
213  */
214 bool
215 trace_is_enabled(struct proc *p)
216 {
217 #ifdef SYSCALL_DEBUG
218 	return (true);
219 #endif
220 #ifdef KTRACE
221 	if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
222 		return (true);
223 #endif
224 #ifdef PTRACE
225 	if (ISSET(p->p_slflag, PSL_SYSCALL))
226 		return (true);
227 #endif
228 
229 	return (false);
230 }
231 
232 /*
233  * Start trace of particular system call. If process is being traced,
234  * this routine is called by MD syscall dispatch code just before
235  * a system call is actually executed.
236  */
237 int
238 trace_enter(register_t code, const struct sysent *sy, const void *args)
239 {
240 	int error = 0;
241 
242 #ifdef KDTRACE_HOOKS
243 	if (sy->sy_entry) {
244 		struct emul *e = curlwp->l_proc->p_emul;
245 		(*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0);
246 	}
247 #endif
248 
249 #ifdef SYSCALL_DEBUG
250 	scdebug_call(code, args);
251 #endif /* SYSCALL_DEBUG */
252 
253 	ktrsyscall(code, args, sy->sy_narg);
254 
255 #ifdef PTRACE
256 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
257 	    (PSL_SYSCALL|PSL_TRACED)) {
258 		proc_stoptrace(TRAP_SCE, code, args, NULL, 0);
259 		if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
260 			/* tracer will emulate syscall for us */
261 			error = EJUSTRETURN;
262 		}
263 	}
264 #endif
265 	return error;
266 }
267 
268 /*
269  * End trace of particular system call. If process is being traced,
270  * this routine is called by MD syscall dispatch code just after
271  * a system call finishes.
272  * MD caller guarantees the passed 'code' is within the supported
273  * system call number range for emulation the process runs under.
274  */
275 void
276 trace_exit(register_t code, const struct sysent *sy, const void *args,
277     register_t rval[], int error)
278 {
279 #if defined(PTRACE) || defined(KDTRACE_HOOKS)
280 	struct proc *p = curlwp->l_proc;
281 #endif
282 
283 #ifdef KDTRACE_HOOKS
284 	if (sy->sy_return) {
285 		(*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args,
286 		    rval, error);
287 	}
288 #endif
289 
290 #ifdef SYSCALL_DEBUG
291 	scdebug_ret(code, error, rval);
292 #endif /* SYSCALL_DEBUG */
293 
294 	ktrsysret(code, error, rval);
295 
296 #ifdef PTRACE
297 	if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
298 	    (PSL_SYSCALL|PSL_TRACED)) {
299 		proc_stoptrace(TRAP_SCX, code, args, rval, error);
300 	}
301 	CLR(p->p_slflag, PSL_SYSCALLEMU);
302 #endif
303 }
304