xref: /netbsd-src/sys/rump/kern/lib/libsysproxy/sysproxy.c (revision c6043c0ab520c1b39fe42a5ca8063bcf0e93c1bf)
1*c6043c0aSriastradh /*	$NetBSD: sysproxy.c,v 1.10 2023/07/16 23:05:53 riastradh Exp $	*/
20397f3a7Spooka 
30397f3a7Spooka /*
40397f3a7Spooka  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
50397f3a7Spooka  *
60397f3a7Spooka  * Redistribution and use in source and binary forms, with or without
70397f3a7Spooka  * modification, are permitted provided that the following conditions
80397f3a7Spooka  * are met:
90397f3a7Spooka  * 1. Redistributions of source code must retain the above copyright
100397f3a7Spooka  *    notice, this list of conditions and the following disclaimer.
110397f3a7Spooka  * 2. Redistributions in binary form must reproduce the above copyright
120397f3a7Spooka  *    notice, this list of conditions and the following disclaimer in the
130397f3a7Spooka  *    documentation and/or other materials provided with the distribution.
140397f3a7Spooka  *
150397f3a7Spooka  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
160397f3a7Spooka  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
170397f3a7Spooka  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
180397f3a7Spooka  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
190397f3a7Spooka  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
200397f3a7Spooka  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
210397f3a7Spooka  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
220397f3a7Spooka  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
230397f3a7Spooka  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
240397f3a7Spooka  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
250397f3a7Spooka  * SUCH DAMAGE.
260397f3a7Spooka  */
270397f3a7Spooka 
280397f3a7Spooka #include <sys/cdefs.h>
29*c6043c0aSriastradh __KERNEL_RCSID(0, "$NetBSD: sysproxy.c,v 1.10 2023/07/16 23:05:53 riastradh Exp $");
300397f3a7Spooka 
310397f3a7Spooka #include <sys/param.h>
320397f3a7Spooka #include <sys/filedesc.h>
330397f3a7Spooka #include <sys/kmem.h>
340397f3a7Spooka #include <sys/syscall.h>
350397f3a7Spooka #include <sys/syscallvar.h>
360397f3a7Spooka #include <sys/systm.h>
370397f3a7Spooka #include <sys/xcall.h>
387a39609dSozaki-r #include <sys/lockdebug.h>
397fc219a5Sozaki-r #include <sys/psref.h>
400397f3a7Spooka 
411f04e3f8Sriastradh #if defined(__i386__) || defined(__x86_64__)
421f04e3f8Sriastradh /*
431f04e3f8Sriastradh  * This file abuses the pmap abstraction to create its own statically
441f04e3f8Sriastradh  * allocated struct pmap object, even though it can't do anything
451f04e3f8Sriastradh  * useful with such a thing from userland.  On x86 the struct pmap
461f04e3f8Sriastradh  * definition is private, so we have to go to extra effort to abuse it
471f04e3f8Sriastradh  * there.  This should be fixed -- all of the struct pmap definitions
481f04e3f8Sriastradh  * should be private, and then rump can furnish its own fake struct
491f04e3f8Sriastradh  * pmap without clashing with anything.
501f04e3f8Sriastradh  */
511f04e3f8Sriastradh #include <machine/pmap_private.h>
521f04e3f8Sriastradh #endif
531f04e3f8Sriastradh 
540397f3a7Spooka #define _RUMP_SYSPROXY
550397f3a7Spooka #include <rump/rumpuser.h>
560397f3a7Spooka 
576bb51422Spooka #include <rump-sys/kern.h>
580397f3a7Spooka 
590397f3a7Spooka int
rump_init_server(const char * url)600397f3a7Spooka rump_init_server(const char *url)
610397f3a7Spooka {
620397f3a7Spooka 
630397f3a7Spooka 	return rumpuser_sp_init(url, ostype, osrelease, MACHINE);
640397f3a7Spooka }
650397f3a7Spooka 
660397f3a7Spooka static pid_t
hyp_getpid(void)670397f3a7Spooka hyp_getpid(void)
680397f3a7Spooka {
690397f3a7Spooka 
700397f3a7Spooka 	return curproc->p_pid;
710397f3a7Spooka }
720397f3a7Spooka 
730397f3a7Spooka static int
hyp_syscall(int num,void * arg,long * retval)740397f3a7Spooka hyp_syscall(int num, void *arg, long *retval)
750397f3a7Spooka {
760397f3a7Spooka 	register_t regrv[2] = {0, 0};
770397f3a7Spooka 	struct lwp *l;
780397f3a7Spooka 	struct sysent *callp;
790397f3a7Spooka 	int rv;
800397f3a7Spooka 
810397f3a7Spooka 	if (__predict_false(num >= SYS_NSYSENT))
820397f3a7Spooka 		return ENOSYS;
830397f3a7Spooka 
840397f3a7Spooka 	/* XXX: always uses native syscall vector */
850397f3a7Spooka 	callp = rump_sysent + num;
860397f3a7Spooka 	l = curlwp;
870397f3a7Spooka 	rv = sy_invoke(callp, l, (void *)arg, regrv, num);
880397f3a7Spooka 	retval[0] = regrv[0];
890397f3a7Spooka 	retval[1] = regrv[1];
900397f3a7Spooka 
917a39609dSozaki-r 	/* Sanity checks (from mi_userret) */
927a39609dSozaki-r 	LOCKDEBUG_BARRIER(NULL, 0);
937a39609dSozaki-r 	KASSERT(l->l_nopreempt == 0);
947fc219a5Sozaki-r 	PSREF_DEBUG_BARRIER();
953843688cSozaki-r 	KASSERT(l->l_psrefs == 0);
967a39609dSozaki-r 
970397f3a7Spooka 	return rv;
980397f3a7Spooka }
990397f3a7Spooka 
1006195daadSpooka static struct pmap remotepmap;
1016195daadSpooka 
1020397f3a7Spooka static int
hyp_rfork(void * priv,int flags,const char * comm)1030397f3a7Spooka hyp_rfork(void *priv, int flags, const char *comm)
1040397f3a7Spooka {
1056195daadSpooka 	struct rump_spctl *spctl;
10652af9983Spooka 	struct vmspace *vm;
1070397f3a7Spooka 	struct proc *p;
1080397f3a7Spooka 	struct lwp *l;
1090397f3a7Spooka 	int error;
1100397f3a7Spooka 	bool initfds;
1110397f3a7Spooka 
1120397f3a7Spooka 	/*
1130397f3a7Spooka 	 * If we are forking off of pid 1, initialize file descriptors.
1140397f3a7Spooka 	 */
1150397f3a7Spooka 	l = curlwp;
1160397f3a7Spooka 	if (l->l_proc->p_pid == 1) {
1170397f3a7Spooka 		KASSERT(flags == RUMP_RFFD_CLEAR);
1180397f3a7Spooka 		initfds = true;
1190397f3a7Spooka 	} else {
1200397f3a7Spooka 		initfds = false;
1210397f3a7Spooka 	}
1220397f3a7Spooka 
12352af9983Spooka 	/*
1246195daadSpooka 	 * Since it's a proxy proc, we create a vmspace for it.
12552af9983Spooka 	 */
1266195daadSpooka 	spctl = kmem_zalloc(sizeof(*spctl), KM_SLEEP);
1276195daadSpooka 	vm = &spctl->spctl_vm;
1286195daadSpooka 	uvmspace_init(vm, &remotepmap, 0, 0, false);
1296195daadSpooka 	spctl->spctl = priv;
13052af9983Spooka 
13152af9983Spooka 	if ((error = rump_lwproc_rfork_vmspace(vm, flags)) != 0) {
13252af9983Spooka 		kmem_free(vm, sizeof(*vm));
1330397f3a7Spooka 		return error;
13452af9983Spooka 	}
1350397f3a7Spooka 
1360397f3a7Spooka 	/*
1370397f3a7Spooka 	 * We forked in this routine, so cannot use curlwp (const)
1380397f3a7Spooka 	 */
1390397f3a7Spooka 	l = rump_lwproc_curlwp();
1400397f3a7Spooka 	p = l->l_proc;
1410397f3a7Spooka 
1420397f3a7Spooka 	if (comm)
1430397f3a7Spooka 		strlcpy(p->p_comm, comm, sizeof(p->p_comm));
1440397f3a7Spooka 	if (initfds)
1450397f3a7Spooka 		rump_consdev_init();
1460397f3a7Spooka 
1470397f3a7Spooka 	return 0;
1480397f3a7Spooka }
1490397f3a7Spooka 
1500397f3a7Spooka /*
1510397f3a7Spooka  * Order all lwps in a process to exit.  does *not* wait for them to drain.
1520397f3a7Spooka  */
1530397f3a7Spooka static void
hyp_lwpexit(void)1540397f3a7Spooka hyp_lwpexit(void)
1550397f3a7Spooka {
1560397f3a7Spooka 	struct proc *p = curproc;
1570397f3a7Spooka 	struct lwp *l;
1580397f3a7Spooka 
1590397f3a7Spooka 	mutex_enter(p->p_lock);
1600397f3a7Spooka 	/*
1610397f3a7Spooka 	 * First pass: mark all lwps in the process with LW_RUMP_QEXIT
1620397f3a7Spooka 	 * so that they know they should exit.
1630397f3a7Spooka 	 */
1640397f3a7Spooka 	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1650397f3a7Spooka 		if (l == curlwp)
1660397f3a7Spooka 			continue;
1670397f3a7Spooka 		l->l_flag |= LW_RUMP_QEXIT;
1680397f3a7Spooka 	}
1690397f3a7Spooka 	mutex_exit(p->p_lock);
1700397f3a7Spooka 
1710397f3a7Spooka 	/*
1720397f3a7Spooka 	 * Next, make sure everyone on all CPUs sees our status
1730397f3a7Spooka 	 * update.  This keeps threads inside cv_wait() and makes
1740397f3a7Spooka 	 * sure we don't access a stale cv pointer later when
1750397f3a7Spooka 	 * we wake up the threads.
1760397f3a7Spooka 	 */
1770397f3a7Spooka 
178edcef67eSuwe 	xc_barrier(0);
1790397f3a7Spooka 
1800397f3a7Spooka 	/*
1810397f3a7Spooka 	 * Ok, all lwps are either:
1820397f3a7Spooka 	 *  1) not in the cv code
183*c6043c0aSriastradh 	 *  2) sleeping on l->l_sched.info
1840397f3a7Spooka 	 *  3) sleeping on p->p_waitcv
1850397f3a7Spooka 	 *
186*c6043c0aSriastradh 	 * Either way, l_sched.info is stable until we set
187*c6043c0aSriastradh 	 * PS_RUMP_LWPEXIT in p->p_sflag.
1880397f3a7Spooka 	 */
1890397f3a7Spooka 
1900397f3a7Spooka 	mutex_enter(p->p_lock);
1910397f3a7Spooka 	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
192*c6043c0aSriastradh 		if (l->l_sched.info)
193*c6043c0aSriastradh 			cv_broadcast(l->l_sched.info);
1940397f3a7Spooka 	}
1950397f3a7Spooka 	p->p_sflag |= PS_RUMP_LWPEXIT;
1960397f3a7Spooka 	cv_broadcast(&p->p_waitcv);
1970397f3a7Spooka 	mutex_exit(p->p_lock);
1980397f3a7Spooka }
1990397f3a7Spooka 
2000397f3a7Spooka /*
2010397f3a7Spooka  * Notify process that all threads have been drained and exec is complete.
2020397f3a7Spooka  */
2030397f3a7Spooka static void
hyp_execnotify(const char * comm)2040397f3a7Spooka hyp_execnotify(const char *comm)
2050397f3a7Spooka {
2060397f3a7Spooka 	struct proc *p = curproc;
2070397f3a7Spooka 
2080397f3a7Spooka 	fd_closeexec();
2090397f3a7Spooka 	mutex_enter(p->p_lock);
2100397f3a7Spooka 	KASSERT(p->p_nlwps == 1 && p->p_sflag & PS_RUMP_LWPEXIT);
2110397f3a7Spooka 	p->p_sflag &= ~PS_RUMP_LWPEXIT;
2120397f3a7Spooka 	mutex_exit(p->p_lock);
2130397f3a7Spooka 	strlcpy(p->p_comm, comm, sizeof(p->p_comm));
2140397f3a7Spooka }
2150397f3a7Spooka 
2160397f3a7Spooka /*
2170397f3a7Spooka  * Initialize interface pointers since component is present.
2180397f3a7Spooka  */
RUMP_COMPONENT(RUMP_COMPONENT_KERN)2190397f3a7Spooka RUMP_COMPONENT(RUMP_COMPONENT_KERN)
2200397f3a7Spooka {
2210397f3a7Spooka 
2220397f3a7Spooka 	rump_sysproxy_ops.rspo_copyin		= rumpuser_sp_copyin;
2230397f3a7Spooka 	rump_sysproxy_ops.rspo_copyinstr	= rumpuser_sp_copyinstr;
2240397f3a7Spooka 	rump_sysproxy_ops.rspo_copyout		= rumpuser_sp_copyout;
2250397f3a7Spooka 	rump_sysproxy_ops.rspo_copyoutstr	= rumpuser_sp_copyoutstr;
2260397f3a7Spooka 	rump_sysproxy_ops.rspo_anonmmap		= rumpuser_sp_anonmmap;
2270397f3a7Spooka 	rump_sysproxy_ops.rspo_raise		= rumpuser_sp_raise;
2280397f3a7Spooka 	rump_sysproxy_ops.rspo_fini		= rumpuser_sp_fini;
2290397f3a7Spooka 
2300397f3a7Spooka 	rump_sysproxy_ops.rspo_hyp_getpid	= hyp_getpid;
2310397f3a7Spooka 	rump_sysproxy_ops.rspo_hyp_syscall	= hyp_syscall;
2320397f3a7Spooka 	rump_sysproxy_ops.rspo_hyp_rfork	= hyp_rfork;
2330397f3a7Spooka 	rump_sysproxy_ops.rspo_hyp_lwpexit	= hyp_lwpexit;
2340397f3a7Spooka 	rump_sysproxy_ops.rspo_hyp_execnotify	= hyp_execnotify;
2350397f3a7Spooka }
236