1*67653226Schristos /* $NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $ */
27ea24651Spooka
37ea24651Spooka /*-
47ea24651Spooka * Copyright (c) 2008 The NetBSD Foundation, Inc.
57ea24651Spooka * All rights reserved.
67ea24651Spooka *
77ea24651Spooka * This code is derived from software developed for The NetBSD Foundation
87ea24651Spooka * by Andrew Doran.
97ea24651Spooka *
107ea24651Spooka * Redistribution and use in source and binary forms, with or without
117ea24651Spooka * modification, are permitted provided that the following conditions
127ea24651Spooka * are met:
137ea24651Spooka * 1. Redistributions of source code must retain the above copyright
147ea24651Spooka * notice, this list of conditions and the following disclaimer.
157ea24651Spooka * 2. Redistributions in binary form must reproduce the above copyright
167ea24651Spooka * notice, this list of conditions and the following disclaimer in the
177ea24651Spooka * documentation and/or other materials provided with the distribution.
187ea24651Spooka *
197ea24651Spooka * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
207ea24651Spooka * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
217ea24651Spooka * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
227ea24651Spooka * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
237ea24651Spooka * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
247ea24651Spooka * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
257ea24651Spooka * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
267ea24651Spooka * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
277ea24651Spooka * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
287ea24651Spooka * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
297ea24651Spooka * POSSIBILITY OF SUCH DAMAGE.
307ea24651Spooka */
317ea24651Spooka
327ea24651Spooka #include <sys/cdefs.h>
33*67653226Schristos __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $");
3464d6a27dSpooka
359737cfddSpooka #ifdef _KERNEL_OPT
3664d6a27dSpooka #include "opt_modular.h"
379737cfddSpooka #include "opt_syscall_debug.h"
389737cfddSpooka #include "opt_ktrace.h"
399737cfddSpooka #include "opt_ptrace.h"
407678e817Schristos #include "opt_dtrace.h"
419737cfddSpooka #endif
427ea24651Spooka
430d8b367eSpooka /* XXX To get syscall prototypes. */
440d8b367eSpooka #define SYSVSHM
450d8b367eSpooka #define SYSVSEM
460d8b367eSpooka #define SYSVMSG
470d8b367eSpooka
487ea24651Spooka #include <sys/param.h>
497ea24651Spooka #include <sys/module.h>
50cc69e456Spooka #include <sys/sched.h>
517ea24651Spooka #include <sys/syscall.h>
527ea24651Spooka #include <sys/syscallargs.h>
537ea24651Spooka #include <sys/syscallvar.h>
5423d5409eSpgoyette #include <sys/systm.h>
557ea24651Spooka #include <sys/xcall.h>
569737cfddSpooka #include <sys/ktrace.h>
579737cfddSpooka #include <sys/ptrace.h>
587ea24651Spooka
597ea24651Spooka int
sys_nomodule(struct lwp * l,const void * v,register_t * retval)607ea24651Spooka sys_nomodule(struct lwp *l, const void *v, register_t *retval)
617ea24651Spooka {
627ea24651Spooka #ifdef MODULAR
636318a135Spgoyette
647ea24651Spooka const struct sysent *sy;
657ea24651Spooka const struct emul *em;
6689c8da71Spgoyette const struct sc_autoload *auto_list;
6789c8da71Spgoyette u_int code;
687ea24651Spooka
697ea24651Spooka /*
707ea24651Spooka * Restart the syscall if we interrupted a module unload that
7123d5409eSpgoyette * failed. Acquiring kernconfig_lock delays us until any unload
727ea24651Spooka * has been completed or rolled back.
737ea24651Spooka */
7423d5409eSpgoyette kernconfig_lock();
757ea24651Spooka sy = l->l_sysent;
767ea24651Spooka if (sy->sy_call != sys_nomodule) {
7723d5409eSpgoyette kernconfig_unlock();
787ea24651Spooka return ERESTART;
797ea24651Spooka }
807ea24651Spooka /*
817ea24651Spooka * Try to autoload a module to satisfy the request. If it
827ea24651Spooka * works, retry the request.
837ea24651Spooka */
847ea24651Spooka em = l->l_proc->p_emul;
857ea24651Spooka code = sy - em->e_sysent;
860513b92cSpgoyette
870513b92cSpgoyette if ((auto_list = em->e_sc_autoload) != NULL)
880513b92cSpgoyette for (; auto_list->al_code > 0; auto_list++) {
890513b92cSpgoyette if (auto_list->al_code != code) {
907ea24651Spooka continue;
917ea24651Spooka }
920513b92cSpgoyette if (module_autoload(auto_list->al_module,
937ea24651Spooka MODULE_CLASS_ANY) != 0 ||
947ea24651Spooka sy->sy_call == sys_nomodule) {
957ea24651Spooka break;
967ea24651Spooka }
9723d5409eSpgoyette kernconfig_unlock();
987ea24651Spooka return ERESTART;
997ea24651Spooka }
10023d5409eSpgoyette kernconfig_unlock();
1017ea24651Spooka #endif /* MODULAR */
1027ea24651Spooka
1037ea24651Spooka return sys_nosys(l, v, retval);
1047ea24651Spooka }
1057ea24651Spooka
1067ea24651Spooka int
syscall_establish(const struct emul * em,const struct syscall_package * sp)1077ea24651Spooka syscall_establish(const struct emul *em, const struct syscall_package *sp)
1087ea24651Spooka {
1097ea24651Spooka struct sysent *sy;
1107ea24651Spooka int i;
1117ea24651Spooka
11223d5409eSpgoyette KASSERT(kernconfig_is_held());
1137ea24651Spooka
1147ea24651Spooka if (em == NULL) {
1157ea24651Spooka em = &emul_netbsd;
1167ea24651Spooka }
1177ea24651Spooka sy = em->e_sysent;
1187ea24651Spooka
1197ea24651Spooka /*
1207ea24651Spooka * Ensure that all preconditions are valid, since this is
1217ea24651Spooka * an all or nothing deal. Once a system call is entered,
1227ea24651Spooka * it can become busy and we could be unable to remove it
1237ea24651Spooka * on error.
1247ea24651Spooka */
1257ea24651Spooka for (i = 0; sp[i].sp_call != NULL; i++) {
1263cd7406aSpgoyette if (sp[i].sp_code >= SYS_NSYSENT)
1273cd7406aSpgoyette return EINVAL;
1283cd7406aSpgoyette if (sy[sp[i].sp_code].sy_call != sys_nomodule &&
1293cd7406aSpgoyette sy[sp[i].sp_code].sy_call != sys_nosys) {
1307ea24651Spooka #ifdef DIAGNOSTIC
1317ea24651Spooka printf("syscall %d is busy\n", sp[i].sp_code);
1327ea24651Spooka #endif
1337ea24651Spooka return EBUSY;
1347ea24651Spooka }
1357ea24651Spooka }
1367ea24651Spooka /* Everything looks good, patch them in. */
1377ea24651Spooka for (i = 0; sp[i].sp_call != NULL; i++) {
1387ea24651Spooka sy[sp[i].sp_code].sy_call = sp[i].sp_call;
1397ea24651Spooka }
1407ea24651Spooka
1417ea24651Spooka return 0;
1427ea24651Spooka }
1437ea24651Spooka
1447ea24651Spooka int
syscall_disestablish(const struct emul * em,const struct syscall_package * sp)1457ea24651Spooka syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
1467ea24651Spooka {
1477ea24651Spooka struct sysent *sy;
1483cd7406aSpgoyette const uint32_t *sb;
1497ea24651Spooka lwp_t *l;
1507ea24651Spooka int i;
1517ea24651Spooka
15223d5409eSpgoyette KASSERT(kernconfig_is_held());
1537ea24651Spooka
1547ea24651Spooka if (em == NULL) {
1557ea24651Spooka em = &emul_netbsd;
1567ea24651Spooka }
1577ea24651Spooka sy = em->e_sysent;
1583cd7406aSpgoyette sb = em->e_nomodbits;
1597ea24651Spooka
1607ea24651Spooka /*
1613cd7406aSpgoyette * First, patch the system calls to sys_nomodule or sys_nosys
1623cd7406aSpgoyette * to gate further activity.
1637ea24651Spooka */
1647ea24651Spooka for (i = 0; sp[i].sp_call != NULL; i++) {
1657ea24651Spooka KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
1663cd7406aSpgoyette sy[sp[i].sp_code].sy_call =
1673cd7406aSpgoyette sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ?
1683cd7406aSpgoyette sys_nomodule : sys_nosys;
1697ea24651Spooka }
1707ea24651Spooka
1717ea24651Spooka /*
1727ea24651Spooka * Run a cross call to cycle through all CPUs. This does two
1737ea24651Spooka * things: lock activity provides a barrier and makes our update
1747ea24651Spooka * of sy_call visible to all CPUs, and upon return we can be sure
1757ea24651Spooka * that we see pertinent values of l_sysent posted by remote CPUs.
1767ea24651Spooka */
177edcef67eSuwe xc_barrier(0);
1787ea24651Spooka
1797ea24651Spooka /*
1807ea24651Spooka * Now it's safe to check l_sysent. Run through all LWPs and see
1817ea24651Spooka * if anyone is still using the system call.
1827ea24651Spooka */
1837ea24651Spooka for (i = 0; sp[i].sp_call != NULL; i++) {
1840eaaa024Sad mutex_enter(&proc_lock);
1857ea24651Spooka LIST_FOREACH(l, &alllwp, l_list) {
1867ea24651Spooka if (l->l_sysent == &sy[sp[i].sp_code]) {
1877ea24651Spooka break;
1887ea24651Spooka }
1897ea24651Spooka }
1900eaaa024Sad mutex_exit(&proc_lock);
1917ea24651Spooka if (l == NULL) {
1927ea24651Spooka continue;
1937ea24651Spooka }
1947ea24651Spooka /*
1957ea24651Spooka * We lose: one or more calls are still in use. Put back
1967ea24651Spooka * the old entrypoints and act like nothing happened.
19723d5409eSpgoyette * When we drop kernconfig_lock, any system calls held in
1987ea24651Spooka * sys_nomodule() will be restarted.
1997ea24651Spooka */
2007ea24651Spooka for (i = 0; sp[i].sp_call != NULL; i++) {
2017ea24651Spooka sy[sp[i].sp_code].sy_call = sp[i].sp_call;
2027ea24651Spooka }
2037ea24651Spooka return EBUSY;
2047ea24651Spooka }
2057ea24651Spooka
2067ea24651Spooka return 0;
2077ea24651Spooka }
2089737cfddSpooka
2099737cfddSpooka /*
2109737cfddSpooka * Return true if system call tracing is enabled for the specified process.
2119737cfddSpooka */
2129737cfddSpooka bool
trace_is_enabled(struct proc * p)2139737cfddSpooka trace_is_enabled(struct proc *p)
2149737cfddSpooka {
2159737cfddSpooka #ifdef SYSCALL_DEBUG
2169737cfddSpooka return (true);
2179737cfddSpooka #endif
2189737cfddSpooka #ifdef KTRACE
2199737cfddSpooka if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
2209737cfddSpooka return (true);
2219737cfddSpooka #endif
2229737cfddSpooka #ifdef PTRACE
2239737cfddSpooka if (ISSET(p->p_slflag, PSL_SYSCALL))
2249737cfddSpooka return (true);
2259737cfddSpooka #endif
2269737cfddSpooka
2279737cfddSpooka return (false);
2289737cfddSpooka }
2299737cfddSpooka
2309737cfddSpooka /*
2319737cfddSpooka * Start trace of particular system call. If process is being traced,
2329737cfddSpooka * this routine is called by MD syscall dispatch code just before
2339737cfddSpooka * a system call is actually executed.
2349737cfddSpooka */
2359737cfddSpooka int
trace_enter(register_t code,const struct sysent * sy,const void * args)2367678e817Schristos trace_enter(register_t code, const struct sysent *sy, const void *args)
2379737cfddSpooka {
2389737cfddSpooka int error = 0;
239*67653226Schristos #if defined(PTRACE) || defined(KDTRACE_HOOKS)
240*67653226Schristos struct proc *p = curlwp->l_proc;
241*67653226Schristos #endif
2429737cfddSpooka
2437678e817Schristos #ifdef KDTRACE_HOOKS
2447678e817Schristos if (sy->sy_entry) {
245*67653226Schristos struct emul *e = p->p_emul;
246*67653226Schristos if (e->e_dtrace_syscall)
247*67653226Schristos (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args,
248*67653226Schristos NULL, 0);
2497678e817Schristos }
2507678e817Schristos #endif
2517678e817Schristos
2529737cfddSpooka #ifdef SYSCALL_DEBUG
2539737cfddSpooka scdebug_call(code, args);
2549737cfddSpooka #endif /* SYSCALL_DEBUG */
2559737cfddSpooka
2567678e817Schristos ktrsyscall(code, args, sy->sy_narg);
2579737cfddSpooka
2589737cfddSpooka #ifdef PTRACE
259*67653226Schristos if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
2609737cfddSpooka (PSL_SYSCALL|PSL_TRACED)) {
2617dee5622Skamil proc_stoptrace(TRAP_SCE, code, args, NULL, 0);
2629737cfddSpooka if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
2639737cfddSpooka /* tracer will emulate syscall for us */
2649737cfddSpooka error = EJUSTRETURN;
2659737cfddSpooka }
2669737cfddSpooka }
2679737cfddSpooka #endif
2689737cfddSpooka return error;
2699737cfddSpooka }
2709737cfddSpooka
2719737cfddSpooka /*
2729737cfddSpooka * End trace of particular system call. If process is being traced,
2739737cfddSpooka * this routine is called by MD syscall dispatch code just after
2749737cfddSpooka * a system call finishes.
2759737cfddSpooka * MD caller guarantees the passed 'code' is within the supported
2769737cfddSpooka * system call number range for emulation the process runs under.
2779737cfddSpooka */
2789737cfddSpooka void
trace_exit(register_t code,const struct sysent * sy,const void * args,register_t rval[],int error)2797678e817Schristos trace_exit(register_t code, const struct sysent *sy, const void *args,
2807678e817Schristos register_t rval[], int error)
2819737cfddSpooka {
2827678e817Schristos #if defined(PTRACE) || defined(KDTRACE_HOOKS)
2839737cfddSpooka struct proc *p = curlwp->l_proc;
2849737cfddSpooka #endif
2859737cfddSpooka
2867678e817Schristos #ifdef KDTRACE_HOOKS
2877678e817Schristos if (sy->sy_return) {
288*67653226Schristos struct emul *e = p->p_emul;
289*67653226Schristos if (e->e_dtrace_syscall)
290*67653226Schristos (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy,
291*67653226Schristos args, rval, error);
2927678e817Schristos }
2937678e817Schristos #endif
2947678e817Schristos
2959737cfddSpooka #ifdef SYSCALL_DEBUG
2969737cfddSpooka scdebug_ret(code, error, rval);
2979737cfddSpooka #endif /* SYSCALL_DEBUG */
2989737cfddSpooka
2999737cfddSpooka ktrsysret(code, error, rval);
3009737cfddSpooka
3019737cfddSpooka #ifdef PTRACE
3029737cfddSpooka if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
30302a58fe8Schristos (PSL_SYSCALL|PSL_TRACED)) {
3047dee5622Skamil proc_stoptrace(TRAP_SCX, code, args, rval, error);
30502a58fe8Schristos }
3069737cfddSpooka CLR(p->p_slflag, PSL_SYSCALLEMU);
3079737cfddSpooka #endif
3089737cfddSpooka }
309