1 /* $NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software developed for The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $"); 34 35 #ifdef _KERNEL_OPT 36 #include "opt_modular.h" 37 #include "opt_syscall_debug.h" 38 #include "opt_ktrace.h" 39 #include "opt_ptrace.h" 40 #include "opt_dtrace.h" 41 #endif 42 43 /* XXX To get syscall prototypes. */ 44 #define SYSVSHM 45 #define SYSVSEM 46 #define SYSVMSG 47 48 #include <sys/param.h> 49 #include <sys/module.h> 50 #include <sys/sched.h> 51 #include <sys/syscall.h> 52 #include <sys/syscallargs.h> 53 #include <sys/syscallvar.h> 54 #include <sys/systm.h> 55 #include <sys/xcall.h> 56 #include <sys/ktrace.h> 57 #include <sys/ptrace.h> 58 59 int 60 sys_nomodule(struct lwp *l, const void *v, register_t *retval) 61 { 62 #ifdef MODULAR 63 64 const struct sysent *sy; 65 const struct emul *em; 66 const struct sc_autoload *auto_list; 67 u_int code; 68 69 /* 70 * Restart the syscall if we interrupted a module unload that 71 * failed. Acquiring kernconfig_lock delays us until any unload 72 * has been completed or rolled back. 73 */ 74 kernconfig_lock(); 75 sy = l->l_sysent; 76 if (sy->sy_call != sys_nomodule) { 77 kernconfig_unlock(); 78 return ERESTART; 79 } 80 /* 81 * Try to autoload a module to satisfy the request. If it 82 * works, retry the request. 83 */ 84 em = l->l_proc->p_emul; 85 code = sy - em->e_sysent; 86 87 if ((auto_list = em->e_sc_autoload) != NULL) 88 for (; auto_list->al_code > 0; auto_list++) { 89 if (auto_list->al_code != code) { 90 continue; 91 } 92 if (module_autoload(auto_list->al_module, 93 MODULE_CLASS_ANY) != 0 || 94 sy->sy_call == sys_nomodule) { 95 break; 96 } 97 kernconfig_unlock(); 98 return ERESTART; 99 } 100 kernconfig_unlock(); 101 #endif /* MODULAR */ 102 103 return sys_nosys(l, v, retval); 104 } 105 106 int 107 syscall_establish(const struct emul *em, const struct syscall_package *sp) 108 { 109 struct sysent *sy; 110 int i; 111 112 KASSERT(kernconfig_is_held()); 113 114 if (em == NULL) { 115 em = &emul_netbsd; 116 } 117 sy = em->e_sysent; 118 119 /* 120 * Ensure that all preconditions are valid, since this is 121 * an all or nothing deal. Once a system call is entered, 122 * it can become busy and we could be unable to remove it 123 * on error. 124 */ 125 for (i = 0; sp[i].sp_call != NULL; i++) { 126 if (sp[i].sp_code >= SYS_NSYSENT) 127 return EINVAL; 128 if (sy[sp[i].sp_code].sy_call != sys_nomodule && 129 sy[sp[i].sp_code].sy_call != sys_nosys) { 130 #ifdef DIAGNOSTIC 131 printf("syscall %d is busy\n", sp[i].sp_code); 132 #endif 133 return EBUSY; 134 } 135 } 136 /* Everything looks good, patch them in. */ 137 for (i = 0; sp[i].sp_call != NULL; i++) { 138 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 139 } 140 141 return 0; 142 } 143 144 int 145 syscall_disestablish(const struct emul *em, const struct syscall_package *sp) 146 { 147 struct sysent *sy; 148 const uint32_t *sb; 149 lwp_t *l; 150 int i; 151 152 KASSERT(kernconfig_is_held()); 153 154 if (em == NULL) { 155 em = &emul_netbsd; 156 } 157 sy = em->e_sysent; 158 sb = em->e_nomodbits; 159 160 /* 161 * First, patch the system calls to sys_nomodule or sys_nosys 162 * to gate further activity. 163 */ 164 for (i = 0; sp[i].sp_call != NULL; i++) { 165 KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); 166 sy[sp[i].sp_code].sy_call = 167 sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ? 168 sys_nomodule : sys_nosys; 169 } 170 171 /* 172 * Run a cross call to cycle through all CPUs. This does two 173 * things: lock activity provides a barrier and makes our update 174 * of sy_call visible to all CPUs, and upon return we can be sure 175 * that we see pertinent values of l_sysent posted by remote CPUs. 176 */ 177 xc_barrier(0); 178 179 /* 180 * Now it's safe to check l_sysent. Run through all LWPs and see 181 * if anyone is still using the system call. 182 */ 183 for (i = 0; sp[i].sp_call != NULL; i++) { 184 mutex_enter(&proc_lock); 185 LIST_FOREACH(l, &alllwp, l_list) { 186 if (l->l_sysent == &sy[sp[i].sp_code]) { 187 break; 188 } 189 } 190 mutex_exit(&proc_lock); 191 if (l == NULL) { 192 continue; 193 } 194 /* 195 * We lose: one or more calls are still in use. Put back 196 * the old entrypoints and act like nothing happened. 197 * When we drop kernconfig_lock, any system calls held in 198 * sys_nomodule() will be restarted. 199 */ 200 for (i = 0; sp[i].sp_call != NULL; i++) { 201 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 202 } 203 return EBUSY; 204 } 205 206 return 0; 207 } 208 209 /* 210 * Return true if system call tracing is enabled for the specified process. 211 */ 212 bool 213 trace_is_enabled(struct proc *p) 214 { 215 #ifdef SYSCALL_DEBUG 216 return (true); 217 #endif 218 #ifdef KTRACE 219 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 220 return (true); 221 #endif 222 #ifdef PTRACE 223 if (ISSET(p->p_slflag, PSL_SYSCALL)) 224 return (true); 225 #endif 226 227 return (false); 228 } 229 230 /* 231 * Start trace of particular system call. If process is being traced, 232 * this routine is called by MD syscall dispatch code just before 233 * a system call is actually executed. 234 */ 235 int 236 trace_enter(register_t code, const struct sysent *sy, const void *args) 237 { 238 int error = 0; 239 #if defined(PTRACE) || defined(KDTRACE_HOOKS) 240 struct proc *p = curlwp->l_proc; 241 #endif 242 243 #ifdef KDTRACE_HOOKS 244 if (sy->sy_entry) { 245 struct emul *e = p->p_emul; 246 if (e->e_dtrace_syscall) 247 (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args, 248 NULL, 0); 249 } 250 #endif 251 252 #ifdef SYSCALL_DEBUG 253 scdebug_call(code, args); 254 #endif /* SYSCALL_DEBUG */ 255 256 ktrsyscall(code, args, sy->sy_narg); 257 258 #ifdef PTRACE 259 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 260 (PSL_SYSCALL|PSL_TRACED)) { 261 proc_stoptrace(TRAP_SCE, code, args, NULL, 0); 262 if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) { 263 /* tracer will emulate syscall for us */ 264 error = EJUSTRETURN; 265 } 266 } 267 #endif 268 return error; 269 } 270 271 /* 272 * End trace of particular system call. If process is being traced, 273 * this routine is called by MD syscall dispatch code just after 274 * a system call finishes. 275 * MD caller guarantees the passed 'code' is within the supported 276 * system call number range for emulation the process runs under. 277 */ 278 void 279 trace_exit(register_t code, const struct sysent *sy, const void *args, 280 register_t rval[], int error) 281 { 282 #if defined(PTRACE) || defined(KDTRACE_HOOKS) 283 struct proc *p = curlwp->l_proc; 284 #endif 285 286 #ifdef KDTRACE_HOOKS 287 if (sy->sy_return) { 288 struct emul *e = p->p_emul; 289 if (e->e_dtrace_syscall) 290 (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy, 291 args, rval, error); 292 } 293 #endif 294 295 #ifdef SYSCALL_DEBUG 296 scdebug_ret(code, error, rval); 297 #endif /* SYSCALL_DEBUG */ 298 299 ktrsysret(code, error, rval); 300 301 #ifdef PTRACE 302 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) == 303 (PSL_SYSCALL|PSL_TRACED)) { 304 proc_stoptrace(TRAP_SCX, code, args, rval, error); 305 } 306 CLR(p->p_slflag, PSL_SYSCALLEMU); 307 #endif 308 } 309