1 /* $NetBSD: kern_syscall.c,v 1.9 2013/12/14 06:27:57 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software developed for The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.9 2013/12/14 06:27:57 pgoyette Exp $"); 34 35 #ifdef _KERNEL_OPT 36 #include "opt_modular.h" 37 #include "opt_syscall_debug.h" 38 #include "opt_ktrace.h" 39 #include "opt_ptrace.h" 40 #endif 41 42 /* XXX To get syscall prototypes. */ 43 #define SYSVSHM 44 #define SYSVSEM 45 #define SYSVMSG 46 47 #include <sys/param.h> 48 #include <sys/module.h> 49 #include <sys/sched.h> 50 #include <sys/syscall.h> 51 #include <sys/syscallargs.h> 52 #include <sys/syscallvar.h> 53 #include <sys/systm.h> 54 #include <sys/xcall.h> 55 #include <sys/ktrace.h> 56 #include <sys/ptrace.h> 57 58 int 59 sys_nomodule(struct lwp *l, const void *v, register_t *retval) 60 { 61 #ifdef MODULAR 62 static struct { 63 u_int al_code; 64 const char *al_module; 65 } const autoload[] = { 66 { SYS_aio_cancel, "aio" }, 67 { SYS_aio_error, "aio" }, 68 { SYS_aio_fsync, "aio" }, 69 { SYS_aio_read, "aio" }, 70 { SYS_aio_return, "aio" }, 71 { SYS___aio_suspend50, "aio" }, 72 { SYS_aio_write, "aio" }, 73 { SYS_lio_listio, "aio" }, 74 { SYS_mq_open, "mqueue" }, 75 { SYS_mq_close, "mqueue" }, 76 { SYS_mq_unlink, "mqueue" }, 77 { SYS_mq_getattr, "mqueue" }, 78 { SYS_mq_setattr, "mqueue" }, 79 { SYS_mq_notify, "mqueue" }, 80 { SYS_mq_send, "mqueue" }, 81 { SYS_mq_receive, "mqueue" }, 82 { SYS___mq_timedsend50, "mqueue" }, 83 { SYS___mq_timedreceive50, "mqueue" }, 84 { SYS_compat_43_fstat43, "compat" }, 85 { SYS_compat_43_lstat43, "compat" }, 86 { SYS_compat_43_oaccept, "compat" }, 87 { SYS_compat_43_ocreat, "compat" }, 88 { SYS_compat_43_oftruncate, "compat" }, 89 { SYS_compat_43_ogetdirentries, "compat" }, 90 { SYS_compat_43_ogetdtablesize, "compat" }, 91 { SYS_compat_43_ogethostid, "compat" }, 92 { SYS_compat_43_ogethostname, "compat" }, 93 { SYS_compat_43_ogetkerninfo, "compat" }, 94 { SYS_compat_43_ogetpagesize, "compat" }, 95 { SYS_compat_43_ogetpeername, "compat" }, 96 { SYS_compat_43_ogetrlimit, "compat" }, 97 { SYS_compat_43_ogetsockname, "compat" }, 98 { SYS_compat_43_okillpg, "compat" }, 99 { SYS_compat_43_olseek, "compat" }, 100 { SYS_compat_43_ommap, "compat" }, 101 { SYS_compat_43_oquota, "compat" }, 102 { SYS_compat_43_orecv, "compat" }, 103 { SYS_compat_43_orecvfrom, "compat" }, 104 { SYS_compat_43_orecvmsg, "compat" }, 105 { SYS_compat_43_osend, "compat" }, 106 { SYS_compat_43_osendmsg, "compat" }, 107 { SYS_compat_43_osethostid, "compat" }, 108 { SYS_compat_43_osethostname, "compat" }, 109 { SYS_compat_43_osetrlimit, "compat" }, 110 { SYS_compat_43_osigblock, "compat" }, 111 { SYS_compat_43_osigsetmask, "compat" }, 112 { SYS_compat_43_osigstack, "compat" }, 113 { SYS_compat_43_osigvec, "compat" }, 114 { SYS_compat_43_otruncate, "compat" }, 115 { SYS_compat_43_owait, "compat" }, 116 { SYS_compat_43_stat43, "compat" }, 117 { SYS_compat_09_ogetdomainname, "compat" }, 118 { SYS_compat_09_osetdomainname, "compat" }, 119 { SYS_compat_09_ouname, "compat" }, 120 #ifndef _LP64 121 { SYS_compat_10_omsgsys, "compat" }, 122 { SYS_compat_10_osemsys, "compat" }, 123 { SYS_compat_10_oshmsys, "compat" }, 124 #endif 125 { SYS_compat_12_fstat12, "compat" }, 126 { SYS_compat_12_getdirentries, "compat" }, 127 { SYS_compat_12_lstat12, "compat" }, 128 { SYS_compat_12_msync, "compat" }, 129 { SYS_compat_12_oreboot, "compat" }, 130 { SYS_compat_12_oswapon, "compat" }, 131 { SYS_compat_12_stat12, "compat" }, 132 { SYS_compat_13_sigaction13, "compat" }, 133 { SYS_compat_13_sigaltstack13, "compat" }, 134 { SYS_compat_13_sigpending13, "compat" }, 135 { SYS_compat_13_sigprocmask13, "compat" }, 136 { SYS_compat_13_sigreturn13, "compat" }, 137 { SYS_compat_13_sigsuspend13, "compat" }, 138 { SYS_compat_14___semctl, "compat" }, 139 { SYS_compat_14_msgctl, "compat" }, 140 { SYS_compat_14_shmctl, "compat" }, 141 { SYS_compat_16___sigaction14, "compat" }, 142 { SYS_compat_16___sigreturn14, "compat" }, 143 { SYS_compat_20_fhstatfs, "compat" }, 144 { SYS_compat_20_fstatfs, "compat" }, 145 { SYS_compat_20_getfsstat, "compat" }, 146 { SYS_compat_20_statfs, "compat" }, 147 { SYS_compat_30___fhstat30, "compat" }, 148 { SYS_compat_30___fstat13, "compat" }, 149 { SYS_compat_30___lstat13, "compat" }, 150 { SYS_compat_30___stat13, "compat" }, 151 { SYS_compat_30_fhopen, "compat" }, 152 { SYS_compat_30_fhstat, "compat" }, 153 { SYS_compat_30_fhstatvfs1, "compat" }, 154 { SYS_compat_30_getdents, "compat" }, 155 { SYS_compat_30_getfh, "compat" }, 156 { SYS_compat_30_socket, "compat" }, 157 { SYS_compat_40_mount, "compat" }, 158 { SYS_compat_50_wait4, "compat" }, 159 { SYS_compat_50_mknod, "compat" }, 160 { SYS_compat_50_setitimer, "compat" }, 161 { SYS_compat_50_getitimer, "compat" }, 162 { SYS_compat_50_select, "compat" }, 163 { SYS_compat_50_gettimeofday, "compat" }, 164 { SYS_compat_50_getrusage, "compat" }, 165 { SYS_compat_50_settimeofday, "compat" }, 166 { SYS_compat_50_utimes, "compat" }, 167 { SYS_compat_50_adjtime, "compat" }, 168 { SYS_compat_50_lfs_segwait, "compat" }, 169 { SYS_compat_50_futimes, "compat" }, 170 { SYS_compat_50_clock_gettime, "compat" }, 171 { SYS_compat_50_clock_settime, "compat" }, 172 { SYS_compat_50_clock_getres, "compat" }, 173 { SYS_compat_50_timer_settime, "compat" }, 174 { SYS_compat_50_timer_gettime, "compat" }, 175 { SYS_compat_50_nanosleep, "compat" }, 176 { SYS_compat_50___sigtimedwait, "compat" }, 177 { SYS_compat_50_mq_timedsend, "compat" }, 178 { SYS_compat_50_mq_timedreceive, "compat" }, 179 { SYS_compat_50_lutimes, "compat" }, 180 { SYS_compat_50_____semctl13, "compat" }, 181 { SYS_compat_50___msgctl13, "compat" }, 182 { SYS_compat_50___shmctl13, "compat" }, 183 { SYS_compat_50__lwp_park, "compat" }, 184 { SYS_compat_50_kevent, "compat" }, 185 { SYS_compat_50_pselect, "compat" }, 186 { SYS_compat_50_pollts, "compat" }, 187 { SYS_compat_50___stat30, "compat" }, 188 { SYS_compat_50___fstat30, "compat" }, 189 { SYS_compat_50___lstat30, "compat" }, 190 { SYS_compat_50___ntp_gettime30, "compat" }, 191 { SYS_compat_50___fhstat40, "compat" }, 192 { SYS_compat_50_aio_suspend, "compat" }, 193 { SYS_compat_60__lwp_park, "compat" }, 194 { SYS__ksem_init, "ksem" }, 195 { SYS__ksem_open, "ksem" }, 196 { SYS__ksem_unlink, "ksem" }, 197 { SYS__ksem_close, "ksem" }, 198 { SYS__ksem_post, "ksem" }, 199 { SYS__ksem_wait, "ksem" }, 200 { SYS__ksem_trywait, "ksem" }, 201 { SYS__ksem_getvalue, "ksem" }, 202 { SYS__ksem_destroy, "ksem" }, 203 { SYS__ksem_timedwait, "ksem" }, 204 { SYS_nfssvc, "nfsserver" }, 205 { SYS_afssys, "openafs" }, 206 }; 207 const struct sysent *sy; 208 const struct emul *em; 209 int code, i; 210 211 /* 212 * Restart the syscall if we interrupted a module unload that 213 * failed. Acquiring kernconfig_lock delays us until any unload 214 * has been completed or rolled back. 215 */ 216 kernconfig_lock(); 217 sy = l->l_sysent; 218 if (sy->sy_call != sys_nomodule) { 219 kernconfig_unlock(); 220 return ERESTART; 221 } 222 /* 223 * Try to autoload a module to satisfy the request. If it 224 * works, retry the request. 225 */ 226 em = l->l_proc->p_emul; 227 if (em == &emul_netbsd) { 228 code = sy - em->e_sysent; 229 for (i = 0; i < __arraycount(autoload); i++) { 230 if (autoload[i].al_code != code) { 231 continue; 232 } 233 if (module_autoload(autoload[i].al_module, 234 MODULE_CLASS_ANY) != 0 || 235 sy->sy_call == sys_nomodule) { 236 break; 237 } 238 kernconfig_unlock(); 239 return ERESTART; 240 } 241 } 242 kernconfig_unlock(); 243 #endif /* MODULAR */ 244 245 return sys_nosys(l, v, retval); 246 } 247 248 int 249 syscall_establish(const struct emul *em, const struct syscall_package *sp) 250 { 251 struct sysent *sy; 252 int i; 253 254 KASSERT(kernconfig_is_held()); 255 256 if (em == NULL) { 257 em = &emul_netbsd; 258 } 259 sy = em->e_sysent; 260 261 /* 262 * Ensure that all preconditions are valid, since this is 263 * an all or nothing deal. Once a system call is entered, 264 * it can become busy and we could be unable to remove it 265 * on error. 266 */ 267 for (i = 0; sp[i].sp_call != NULL; i++) { 268 if (sy[sp[i].sp_code].sy_call != sys_nomodule) { 269 #ifdef DIAGNOSTIC 270 printf("syscall %d is busy\n", sp[i].sp_code); 271 #endif 272 return EBUSY; 273 } 274 } 275 /* Everything looks good, patch them in. */ 276 for (i = 0; sp[i].sp_call != NULL; i++) { 277 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 278 } 279 280 return 0; 281 } 282 283 int 284 syscall_disestablish(const struct emul *em, const struct syscall_package *sp) 285 { 286 struct sysent *sy; 287 uint64_t where; 288 lwp_t *l; 289 int i; 290 291 KASSERT(kernconfig_is_held()); 292 293 if (em == NULL) { 294 em = &emul_netbsd; 295 } 296 sy = em->e_sysent; 297 298 /* 299 * First, patch the system calls to sys_nomodule to gate further 300 * activity. 301 */ 302 for (i = 0; sp[i].sp_call != NULL; i++) { 303 KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call); 304 sy[sp[i].sp_code].sy_call = sys_nomodule; 305 } 306 307 /* 308 * Run a cross call to cycle through all CPUs. This does two 309 * things: lock activity provides a barrier and makes our update 310 * of sy_call visible to all CPUs, and upon return we can be sure 311 * that we see pertinent values of l_sysent posted by remote CPUs. 312 */ 313 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 314 xc_wait(where); 315 316 /* 317 * Now it's safe to check l_sysent. Run through all LWPs and see 318 * if anyone is still using the system call. 319 */ 320 for (i = 0; sp[i].sp_call != NULL; i++) { 321 mutex_enter(proc_lock); 322 LIST_FOREACH(l, &alllwp, l_list) { 323 if (l->l_sysent == &sy[sp[i].sp_code]) { 324 break; 325 } 326 } 327 mutex_exit(proc_lock); 328 if (l == NULL) { 329 continue; 330 } 331 /* 332 * We lose: one or more calls are still in use. Put back 333 * the old entrypoints and act like nothing happened. 334 * When we drop kernconfig_lock, any system calls held in 335 * sys_nomodule() will be restarted. 336 */ 337 for (i = 0; sp[i].sp_call != NULL; i++) { 338 sy[sp[i].sp_code].sy_call = sp[i].sp_call; 339 } 340 return EBUSY; 341 } 342 343 return 0; 344 } 345 346 /* 347 * Return true if system call tracing is enabled for the specified process. 348 */ 349 bool 350 trace_is_enabled(struct proc *p) 351 { 352 #ifdef SYSCALL_DEBUG 353 return (true); 354 #endif 355 #ifdef KTRACE 356 if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET))) 357 return (true); 358 #endif 359 #ifdef PTRACE 360 if (ISSET(p->p_slflag, PSL_SYSCALL)) 361 return (true); 362 #endif 363 364 return (false); 365 } 366 367 /* 368 * Start trace of particular system call. If process is being traced, 369 * this routine is called by MD syscall dispatch code just before 370 * a system call is actually executed. 371 */ 372 int 373 trace_enter(register_t code, const register_t *args, int narg) 374 { 375 int error = 0; 376 377 #ifdef SYSCALL_DEBUG 378 scdebug_call(code, args); 379 #endif /* SYSCALL_DEBUG */ 380 381 ktrsyscall(code, args, narg); 382 383 #ifdef PTRACE 384 if ((curlwp->l_proc->p_slflag & (PSL_SYSCALL|PSL_TRACED)) == 385 (PSL_SYSCALL|PSL_TRACED)) { 386 process_stoptrace(); 387 if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) { 388 /* tracer will emulate syscall for us */ 389 error = EJUSTRETURN; 390 } 391 } 392 #endif 393 return error; 394 } 395 396 /* 397 * End trace of particular system call. If process is being traced, 398 * this routine is called by MD syscall dispatch code just after 399 * a system call finishes. 400 * MD caller guarantees the passed 'code' is within the supported 401 * system call number range for emulation the process runs under. 402 */ 403 void 404 trace_exit(register_t code, register_t rval[], int error) 405 { 406 #ifdef PTRACE 407 struct proc *p = curlwp->l_proc; 408 #endif 409 410 #ifdef SYSCALL_DEBUG 411 scdebug_ret(code, error, rval); 412 #endif /* SYSCALL_DEBUG */ 413 414 ktrsysret(code, error, rval); 415 416 #ifdef PTRACE 417 if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) == 418 (PSL_SYSCALL|PSL_TRACED)) 419 process_stoptrace(); 420 CLR(p->p_slflag, PSL_SYSCALLEMU); 421 #endif 422 } 423