xref: /netbsd-src/sys/kern/kern_syscall.c (revision 67653226d176d2359c25aea3366702b794d52ec1)
1*67653226Schristos /*	$NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $	*/
27ea24651Spooka 
37ea24651Spooka /*-
47ea24651Spooka  * Copyright (c) 2008 The NetBSD Foundation, Inc.
57ea24651Spooka  * All rights reserved.
67ea24651Spooka  *
77ea24651Spooka  * This code is derived from software developed for The NetBSD Foundation
87ea24651Spooka  * by Andrew Doran.
97ea24651Spooka  *
107ea24651Spooka  * Redistribution and use in source and binary forms, with or without
117ea24651Spooka  * modification, are permitted provided that the following conditions
127ea24651Spooka  * are met:
137ea24651Spooka  * 1. Redistributions of source code must retain the above copyright
147ea24651Spooka  *    notice, this list of conditions and the following disclaimer.
157ea24651Spooka  * 2. Redistributions in binary form must reproduce the above copyright
167ea24651Spooka  *    notice, this list of conditions and the following disclaimer in the
177ea24651Spooka  *    documentation and/or other materials provided with the distribution.
187ea24651Spooka  *
197ea24651Spooka  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
207ea24651Spooka  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
217ea24651Spooka  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
227ea24651Spooka  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
237ea24651Spooka  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
247ea24651Spooka  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
257ea24651Spooka  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
267ea24651Spooka  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
277ea24651Spooka  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
287ea24651Spooka  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
297ea24651Spooka  * POSSIBILITY OF SUCH DAMAGE.
307ea24651Spooka  */
317ea24651Spooka 
327ea24651Spooka #include <sys/cdefs.h>
33*67653226Schristos __KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $");
3464d6a27dSpooka 
359737cfddSpooka #ifdef _KERNEL_OPT
3664d6a27dSpooka #include "opt_modular.h"
379737cfddSpooka #include "opt_syscall_debug.h"
389737cfddSpooka #include "opt_ktrace.h"
399737cfddSpooka #include "opt_ptrace.h"
407678e817Schristos #include "opt_dtrace.h"
419737cfddSpooka #endif
427ea24651Spooka 
430d8b367eSpooka /* XXX To get syscall prototypes. */
440d8b367eSpooka #define SYSVSHM
450d8b367eSpooka #define SYSVSEM
460d8b367eSpooka #define SYSVMSG
470d8b367eSpooka 
487ea24651Spooka #include <sys/param.h>
497ea24651Spooka #include <sys/module.h>
50cc69e456Spooka #include <sys/sched.h>
517ea24651Spooka #include <sys/syscall.h>
527ea24651Spooka #include <sys/syscallargs.h>
537ea24651Spooka #include <sys/syscallvar.h>
5423d5409eSpgoyette #include <sys/systm.h>
557ea24651Spooka #include <sys/xcall.h>
569737cfddSpooka #include <sys/ktrace.h>
579737cfddSpooka #include <sys/ptrace.h>
587ea24651Spooka 
597ea24651Spooka int
sys_nomodule(struct lwp * l,const void * v,register_t * retval)607ea24651Spooka sys_nomodule(struct lwp *l, const void *v, register_t *retval)
617ea24651Spooka {
627ea24651Spooka #ifdef MODULAR
636318a135Spgoyette 
647ea24651Spooka 	const struct sysent *sy;
657ea24651Spooka 	const struct emul *em;
6689c8da71Spgoyette 	const struct sc_autoload *auto_list;
6789c8da71Spgoyette 	u_int code;
687ea24651Spooka 
697ea24651Spooka 	/*
707ea24651Spooka 	 * Restart the syscall if we interrupted a module unload that
7123d5409eSpgoyette 	 * failed.  Acquiring kernconfig_lock delays us until any unload
727ea24651Spooka 	 * has been completed or rolled back.
737ea24651Spooka 	 */
7423d5409eSpgoyette 	kernconfig_lock();
757ea24651Spooka 	sy = l->l_sysent;
767ea24651Spooka 	if (sy->sy_call != sys_nomodule) {
7723d5409eSpgoyette 		kernconfig_unlock();
787ea24651Spooka 		return ERESTART;
797ea24651Spooka 	}
807ea24651Spooka 	/*
817ea24651Spooka 	 * Try to autoload a module to satisfy the request.  If it
827ea24651Spooka 	 * works, retry the request.
837ea24651Spooka 	 */
847ea24651Spooka 	em = l->l_proc->p_emul;
857ea24651Spooka 	code = sy - em->e_sysent;
860513b92cSpgoyette 
870513b92cSpgoyette 	if ((auto_list = em->e_sc_autoload) != NULL)
880513b92cSpgoyette 		for (; auto_list->al_code > 0; auto_list++) {
890513b92cSpgoyette 			if (auto_list->al_code != code) {
907ea24651Spooka 				continue;
917ea24651Spooka 			}
920513b92cSpgoyette 			if (module_autoload(auto_list->al_module,
937ea24651Spooka 			    MODULE_CLASS_ANY) != 0 ||
947ea24651Spooka 			    sy->sy_call == sys_nomodule) {
957ea24651Spooka 			    	break;
967ea24651Spooka 			}
9723d5409eSpgoyette 			kernconfig_unlock();
987ea24651Spooka 			return ERESTART;
997ea24651Spooka 		}
10023d5409eSpgoyette 	kernconfig_unlock();
1017ea24651Spooka #endif	/* MODULAR */
1027ea24651Spooka 
1037ea24651Spooka 	return sys_nosys(l, v, retval);
1047ea24651Spooka }
1057ea24651Spooka 
1067ea24651Spooka int
syscall_establish(const struct emul * em,const struct syscall_package * sp)1077ea24651Spooka syscall_establish(const struct emul *em, const struct syscall_package *sp)
1087ea24651Spooka {
1097ea24651Spooka 	struct sysent *sy;
1107ea24651Spooka 	int i;
1117ea24651Spooka 
11223d5409eSpgoyette 	KASSERT(kernconfig_is_held());
1137ea24651Spooka 
1147ea24651Spooka 	if (em == NULL) {
1157ea24651Spooka 		em = &emul_netbsd;
1167ea24651Spooka 	}
1177ea24651Spooka 	sy = em->e_sysent;
1187ea24651Spooka 
1197ea24651Spooka 	/*
1207ea24651Spooka 	 * Ensure that all preconditions are valid, since this is
1217ea24651Spooka 	 * an all or nothing deal.  Once a system call is entered,
1227ea24651Spooka 	 * it can become busy and we could be unable to remove it
1237ea24651Spooka 	 * on error.
1247ea24651Spooka 	 */
1257ea24651Spooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
1263cd7406aSpgoyette 		if (sp[i].sp_code >= SYS_NSYSENT)
1273cd7406aSpgoyette 			return EINVAL;
1283cd7406aSpgoyette 		if (sy[sp[i].sp_code].sy_call != sys_nomodule &&
1293cd7406aSpgoyette 		    sy[sp[i].sp_code].sy_call != sys_nosys) {
1307ea24651Spooka #ifdef DIAGNOSTIC
1317ea24651Spooka 			printf("syscall %d is busy\n", sp[i].sp_code);
1327ea24651Spooka #endif
1337ea24651Spooka 			return EBUSY;
1347ea24651Spooka 		}
1357ea24651Spooka 	}
1367ea24651Spooka 	/* Everything looks good, patch them in. */
1377ea24651Spooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
1387ea24651Spooka 		sy[sp[i].sp_code].sy_call = sp[i].sp_call;
1397ea24651Spooka 	}
1407ea24651Spooka 
1417ea24651Spooka 	return 0;
1427ea24651Spooka }
1437ea24651Spooka 
1447ea24651Spooka int
syscall_disestablish(const struct emul * em,const struct syscall_package * sp)1457ea24651Spooka syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
1467ea24651Spooka {
1477ea24651Spooka 	struct sysent *sy;
1483cd7406aSpgoyette 	const uint32_t *sb;
1497ea24651Spooka 	lwp_t *l;
1507ea24651Spooka 	int i;
1517ea24651Spooka 
15223d5409eSpgoyette 	KASSERT(kernconfig_is_held());
1537ea24651Spooka 
1547ea24651Spooka 	if (em == NULL) {
1557ea24651Spooka 		em = &emul_netbsd;
1567ea24651Spooka 	}
1577ea24651Spooka 	sy = em->e_sysent;
1583cd7406aSpgoyette 	sb = em->e_nomodbits;
1597ea24651Spooka 
1607ea24651Spooka 	/*
1613cd7406aSpgoyette 	 * First, patch the system calls to sys_nomodule or sys_nosys
1623cd7406aSpgoyette 	 * to gate further activity.
1637ea24651Spooka 	 */
1647ea24651Spooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
1657ea24651Spooka 		KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
1663cd7406aSpgoyette 		sy[sp[i].sp_code].sy_call =
1673cd7406aSpgoyette 		    sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ?
1683cd7406aSpgoyette 		      sys_nomodule : sys_nosys;
1697ea24651Spooka 	}
1707ea24651Spooka 
1717ea24651Spooka 	/*
1727ea24651Spooka 	 * Run a cross call to cycle through all CPUs.  This does two
1737ea24651Spooka 	 * things: lock activity provides a barrier and makes our update
1747ea24651Spooka 	 * of sy_call visible to all CPUs, and upon return we can be sure
1757ea24651Spooka 	 * that we see pertinent values of l_sysent posted by remote CPUs.
1767ea24651Spooka 	 */
177edcef67eSuwe 	xc_barrier(0);
1787ea24651Spooka 
1797ea24651Spooka 	/*
1807ea24651Spooka 	 * Now it's safe to check l_sysent.  Run through all LWPs and see
1817ea24651Spooka 	 * if anyone is still using the system call.
1827ea24651Spooka 	 */
1837ea24651Spooka 	for (i = 0; sp[i].sp_call != NULL; i++) {
1840eaaa024Sad 		mutex_enter(&proc_lock);
1857ea24651Spooka 		LIST_FOREACH(l, &alllwp, l_list) {
1867ea24651Spooka 			if (l->l_sysent == &sy[sp[i].sp_code]) {
1877ea24651Spooka 				break;
1887ea24651Spooka 			}
1897ea24651Spooka 		}
1900eaaa024Sad 		mutex_exit(&proc_lock);
1917ea24651Spooka 		if (l == NULL) {
1927ea24651Spooka 			continue;
1937ea24651Spooka 		}
1947ea24651Spooka 		/*
1957ea24651Spooka 		 * We lose: one or more calls are still in use.  Put back
1967ea24651Spooka 		 * the old entrypoints and act like nothing happened.
19723d5409eSpgoyette 		 * When we drop kernconfig_lock, any system calls held in
1987ea24651Spooka 		 * sys_nomodule() will be restarted.
1997ea24651Spooka 		 */
2007ea24651Spooka 		for (i = 0; sp[i].sp_call != NULL; i++) {
2017ea24651Spooka 			sy[sp[i].sp_code].sy_call = sp[i].sp_call;
2027ea24651Spooka 		}
2037ea24651Spooka 		return EBUSY;
2047ea24651Spooka 	}
2057ea24651Spooka 
2067ea24651Spooka 	return 0;
2077ea24651Spooka }
2089737cfddSpooka 
2099737cfddSpooka /*
2109737cfddSpooka  * Return true if system call tracing is enabled for the specified process.
2119737cfddSpooka  */
2129737cfddSpooka bool
trace_is_enabled(struct proc * p)2139737cfddSpooka trace_is_enabled(struct proc *p)
2149737cfddSpooka {
2159737cfddSpooka #ifdef SYSCALL_DEBUG
2169737cfddSpooka 	return (true);
2179737cfddSpooka #endif
2189737cfddSpooka #ifdef KTRACE
2199737cfddSpooka 	if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
2209737cfddSpooka 		return (true);
2219737cfddSpooka #endif
2229737cfddSpooka #ifdef PTRACE
2239737cfddSpooka 	if (ISSET(p->p_slflag, PSL_SYSCALL))
2249737cfddSpooka 		return (true);
2259737cfddSpooka #endif
2269737cfddSpooka 
2279737cfddSpooka 	return (false);
2289737cfddSpooka }
2299737cfddSpooka 
2309737cfddSpooka /*
2319737cfddSpooka  * Start trace of particular system call. If process is being traced,
2329737cfddSpooka  * this routine is called by MD syscall dispatch code just before
2339737cfddSpooka  * a system call is actually executed.
2349737cfddSpooka  */
2359737cfddSpooka int
trace_enter(register_t code,const struct sysent * sy,const void * args)2367678e817Schristos trace_enter(register_t code, const struct sysent *sy, const void *args)
2379737cfddSpooka {
2389737cfddSpooka 	int error = 0;
239*67653226Schristos #if defined(PTRACE) || defined(KDTRACE_HOOKS)
240*67653226Schristos 	struct proc *p = curlwp->l_proc;
241*67653226Schristos #endif
2429737cfddSpooka 
2437678e817Schristos #ifdef KDTRACE_HOOKS
2447678e817Schristos 	if (sy->sy_entry) {
245*67653226Schristos 		struct emul *e = p->p_emul;
246*67653226Schristos 		if (e->e_dtrace_syscall)
247*67653226Schristos 			(*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args,
248*67653226Schristos 			    NULL, 0);
2497678e817Schristos 	}
2507678e817Schristos #endif
2517678e817Schristos 
2529737cfddSpooka #ifdef SYSCALL_DEBUG
2539737cfddSpooka 	scdebug_call(code, args);
2549737cfddSpooka #endif /* SYSCALL_DEBUG */
2559737cfddSpooka 
2567678e817Schristos 	ktrsyscall(code, args, sy->sy_narg);
2579737cfddSpooka 
2589737cfddSpooka #ifdef PTRACE
259*67653226Schristos 	if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
2609737cfddSpooka 	    (PSL_SYSCALL|PSL_TRACED)) {
2617dee5622Skamil 		proc_stoptrace(TRAP_SCE, code, args, NULL, 0);
2629737cfddSpooka 		if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
2639737cfddSpooka 			/* tracer will emulate syscall for us */
2649737cfddSpooka 			error = EJUSTRETURN;
2659737cfddSpooka 		}
2669737cfddSpooka 	}
2679737cfddSpooka #endif
2689737cfddSpooka 	return error;
2699737cfddSpooka }
2709737cfddSpooka 
2719737cfddSpooka /*
2729737cfddSpooka  * End trace of particular system call. If process is being traced,
2739737cfddSpooka  * this routine is called by MD syscall dispatch code just after
2749737cfddSpooka  * a system call finishes.
2759737cfddSpooka  * MD caller guarantees the passed 'code' is within the supported
2769737cfddSpooka  * system call number range for emulation the process runs under.
2779737cfddSpooka  */
2789737cfddSpooka void
trace_exit(register_t code,const struct sysent * sy,const void * args,register_t rval[],int error)2797678e817Schristos trace_exit(register_t code, const struct sysent *sy, const void *args,
2807678e817Schristos     register_t rval[], int error)
2819737cfddSpooka {
2827678e817Schristos #if defined(PTRACE) || defined(KDTRACE_HOOKS)
2839737cfddSpooka 	struct proc *p = curlwp->l_proc;
2849737cfddSpooka #endif
2859737cfddSpooka 
2867678e817Schristos #ifdef KDTRACE_HOOKS
2877678e817Schristos 	if (sy->sy_return) {
288*67653226Schristos 		struct emul *e = p->p_emul;
289*67653226Schristos 		if (e->e_dtrace_syscall)
290*67653226Schristos 			(*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy,
291*67653226Schristos 			    args, rval, error);
2927678e817Schristos 	}
2937678e817Schristos #endif
2947678e817Schristos 
2959737cfddSpooka #ifdef SYSCALL_DEBUG
2969737cfddSpooka 	scdebug_ret(code, error, rval);
2979737cfddSpooka #endif /* SYSCALL_DEBUG */
2989737cfddSpooka 
2999737cfddSpooka 	ktrsysret(code, error, rval);
3009737cfddSpooka 
3019737cfddSpooka #ifdef PTRACE
3029737cfddSpooka 	if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
30302a58fe8Schristos 	    (PSL_SYSCALL|PSL_TRACED)) {
3047dee5622Skamil 		proc_stoptrace(TRAP_SCX, code, args, rval, error);
30502a58fe8Schristos 	}
3069737cfddSpooka 	CLR(p->p_slflag, PSL_SYSCALLEMU);
3079737cfddSpooka #endif
3089737cfddSpooka }
309