xref: /netbsd-src/sys/kern/kern_syscall.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: kern_syscall.c,v 1.16 2017/03/24 17:40:44 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.16 2017/03/24 17:40:44 christos Exp $");
34 
35 #ifdef _KERNEL_OPT
36 #include "opt_modular.h"
37 #include "opt_syscall_debug.h"
38 #include "opt_ktrace.h"
39 #include "opt_ptrace.h"
40 #include "opt_dtrace.h"
41 #endif
42 
43 /* XXX To get syscall prototypes. */
44 #define SYSVSHM
45 #define SYSVSEM
46 #define SYSVMSG
47 
48 #include <sys/param.h>
49 #include <sys/module.h>
50 #include <sys/sched.h>
51 #include <sys/syscall.h>
52 #include <sys/syscallargs.h>
53 #include <sys/syscallvar.h>
54 #include <sys/systm.h>
55 #include <sys/xcall.h>
56 #include <sys/ktrace.h>
57 #include <sys/ptrace.h>
58 
59 int
60 sys_nomodule(struct lwp *l, const void *v, register_t *retval)
61 {
62 #ifdef MODULAR
63 
64 	const struct sysent *sy;
65 	const struct emul *em;
66 	const struct sc_autoload *auto_list;
67 	u_int code;
68 
69 	/*
70 	 * Restart the syscall if we interrupted a module unload that
71 	 * failed.  Acquiring kernconfig_lock delays us until any unload
72 	 * has been completed or rolled back.
73 	 */
74 	kernconfig_lock();
75 	sy = l->l_sysent;
76 	if (sy->sy_call != sys_nomodule) {
77 		kernconfig_unlock();
78 		return ERESTART;
79 	}
80 	/*
81 	 * Try to autoload a module to satisfy the request.  If it
82 	 * works, retry the request.
83 	 */
84 	em = l->l_proc->p_emul;
85 	code = sy - em->e_sysent;
86 
87 	if ((auto_list = em->e_sc_autoload) != NULL)
88 		for (; auto_list->al_code > 0; auto_list++) {
89 			if (auto_list->al_code != code) {
90 				continue;
91 			}
92 			if (module_autoload(auto_list->al_module,
93 			    MODULE_CLASS_ANY) != 0 ||
94 			    sy->sy_call == sys_nomodule) {
95 			    	break;
96 			}
97 			kernconfig_unlock();
98 			return ERESTART;
99 		}
100 	kernconfig_unlock();
101 #endif	/* MODULAR */
102 
103 	return sys_nosys(l, v, retval);
104 }
105 
106 int
107 syscall_establish(const struct emul *em, const struct syscall_package *sp)
108 {
109 	struct sysent *sy;
110 	int i;
111 
112 	KASSERT(kernconfig_is_held());
113 
114 	if (em == NULL) {
115 		em = &emul_netbsd;
116 	}
117 	sy = em->e_sysent;
118 
119 	/*
120 	 * Ensure that all preconditions are valid, since this is
121 	 * an all or nothing deal.  Once a system call is entered,
122 	 * it can become busy and we could be unable to remove it
123 	 * on error.
124 	 */
125 	for (i = 0; sp[i].sp_call != NULL; i++) {
126 		if (sy[sp[i].sp_code].sy_call != sys_nomodule) {
127 #ifdef DIAGNOSTIC
128 			printf("syscall %d is busy\n", sp[i].sp_code);
129 #endif
130 			return EBUSY;
131 		}
132 	}
133 	/* Everything looks good, patch them in. */
134 	for (i = 0; sp[i].sp_call != NULL; i++) {
135 		sy[sp[i].sp_code].sy_call = sp[i].sp_call;
136 	}
137 
138 	return 0;
139 }
140 
141 int
142 syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
143 {
144 	struct sysent *sy;
145 	uint64_t where;
146 	lwp_t *l;
147 	int i;
148 
149 	KASSERT(kernconfig_is_held());
150 
151 	if (em == NULL) {
152 		em = &emul_netbsd;
153 	}
154 	sy = em->e_sysent;
155 
156 	/*
157 	 * First, patch the system calls to sys_nomodule to gate further
158 	 * activity.
159 	 */
160 	for (i = 0; sp[i].sp_call != NULL; i++) {
161 		KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
162 		sy[sp[i].sp_code].sy_call = sys_nomodule;
163 	}
164 
165 	/*
166 	 * Run a cross call to cycle through all CPUs.  This does two
167 	 * things: lock activity provides a barrier and makes our update
168 	 * of sy_call visible to all CPUs, and upon return we can be sure
169 	 * that we see pertinent values of l_sysent posted by remote CPUs.
170 	 */
171 	where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
172 	xc_wait(where);
173 
174 	/*
175 	 * Now it's safe to check l_sysent.  Run through all LWPs and see
176 	 * if anyone is still using the system call.
177 	 */
178 	for (i = 0; sp[i].sp_call != NULL; i++) {
179 		mutex_enter(proc_lock);
180 		LIST_FOREACH(l, &alllwp, l_list) {
181 			if (l->l_sysent == &sy[sp[i].sp_code]) {
182 				break;
183 			}
184 		}
185 		mutex_exit(proc_lock);
186 		if (l == NULL) {
187 			continue;
188 		}
189 		/*
190 		 * We lose: one or more calls are still in use.  Put back
191 		 * the old entrypoints and act like nothing happened.
192 		 * When we drop kernconfig_lock, any system calls held in
193 		 * sys_nomodule() will be restarted.
194 		 */
195 		for (i = 0; sp[i].sp_call != NULL; i++) {
196 			sy[sp[i].sp_code].sy_call = sp[i].sp_call;
197 		}
198 		return EBUSY;
199 	}
200 
201 	return 0;
202 }
203 
204 /*
205  * Return true if system call tracing is enabled for the specified process.
206  */
207 bool
208 trace_is_enabled(struct proc *p)
209 {
210 #ifdef SYSCALL_DEBUG
211 	return (true);
212 #endif
213 #ifdef KTRACE
214 	if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
215 		return (true);
216 #endif
217 #ifdef PTRACE
218 	if (ISSET(p->p_slflag, PSL_SYSCALL))
219 		return (true);
220 #endif
221 
222 	return (false);
223 }
224 
225 /*
226  * Start trace of particular system call. If process is being traced,
227  * this routine is called by MD syscall dispatch code just before
228  * a system call is actually executed.
229  */
230 int
231 trace_enter(register_t code, const struct sysent *sy, const void *args)
232 {
233 	int error = 0;
234 
235 #ifdef KDTRACE_HOOKS
236 	if (sy->sy_entry) {
237 		struct emul *e = curlwp->l_proc->p_emul;
238 		(*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, NULL, 0);
239 	}
240 #endif
241 
242 #ifdef SYSCALL_DEBUG
243 	scdebug_call(code, args);
244 #endif /* SYSCALL_DEBUG */
245 
246 	ktrsyscall(code, args, sy->sy_narg);
247 
248 #ifdef PTRACE
249 	if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
250 	    (PSL_SYSCALL|PSL_TRACED)) {
251 		proc_stoptrace(TRAP_SCE);
252 		if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
253 			/* tracer will emulate syscall for us */
254 			error = EJUSTRETURN;
255 		}
256 	}
257 #endif
258 	return error;
259 }
260 
261 /*
262  * End trace of particular system call. If process is being traced,
263  * this routine is called by MD syscall dispatch code just after
264  * a system call finishes.
265  * MD caller guarantees the passed 'code' is within the supported
266  * system call number range for emulation the process runs under.
267  */
268 void
269 trace_exit(register_t code, const struct sysent *sy, const void *args,
270     register_t rval[], int error)
271 {
272 #if defined(PTRACE) || defined(KDTRACE_HOOKS)
273 	struct proc *p = curlwp->l_proc;
274 #endif
275 
276 #ifdef KDTRACE_HOOKS
277 	if (sy->sy_return) {
278 		(*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, args,
279 		    rval, error);
280 	}
281 #endif
282 
283 #ifdef SYSCALL_DEBUG
284 	scdebug_ret(code, error, rval);
285 #endif /* SYSCALL_DEBUG */
286 
287 	ktrsysret(code, error, rval);
288 
289 #ifdef PTRACE
290 	if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
291 	    (PSL_SYSCALL|PSL_TRACED)) {
292 		proc_stoptrace(TRAP_SCX);
293 	}
294 	CLR(p->p_slflag, PSL_SYSCALLEMU);
295 #endif
296 }
297