xref: /netbsd-src/sys/rump/net/lib/libshmif/if_shmem.c (revision fb485dae673de70705a792d489eb14f9f1960d81)
1*fb485daeSrin /*	$NetBSD: if_shmem.c,v 1.89 2024/10/01 08:55:58 rin Exp $	*/
2a4aff0cbSpooka 
3a4aff0cbSpooka /*
423bbd0e0Spooka  * Copyright (c) 2009, 2010 Antti Kantee.  All Rights Reserved.
5a4aff0cbSpooka  *
6a4aff0cbSpooka  * Development of this software was supported by The Nokia Foundation.
7a4aff0cbSpooka  *
8a4aff0cbSpooka  * Redistribution and use in source and binary forms, with or without
9a4aff0cbSpooka  * modification, are permitted provided that the following conditions
10a4aff0cbSpooka  * are met:
11a4aff0cbSpooka  * 1. Redistributions of source code must retain the above copyright
12a4aff0cbSpooka  *    notice, this list of conditions and the following disclaimer.
13a4aff0cbSpooka  * 2. Redistributions in binary form must reproduce the above copyright
14a4aff0cbSpooka  *    notice, this list of conditions and the following disclaimer in the
15a4aff0cbSpooka  *    documentation and/or other materials provided with the distribution.
16a4aff0cbSpooka  *
17a4aff0cbSpooka  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18a4aff0cbSpooka  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19a4aff0cbSpooka  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20a4aff0cbSpooka  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21a4aff0cbSpooka  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22a4aff0cbSpooka  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23a4aff0cbSpooka  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24a4aff0cbSpooka  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25a4aff0cbSpooka  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26a4aff0cbSpooka  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27a4aff0cbSpooka  * SUCH DAMAGE.
28a4aff0cbSpooka  */
29a4aff0cbSpooka 
30a4aff0cbSpooka #include <sys/cdefs.h>
31*fb485daeSrin __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.89 2024/10/01 08:55:58 rin Exp $");
32a4aff0cbSpooka 
33a4aff0cbSpooka #include <sys/param.h>
34066824f4Spooka #include <sys/atomic.h>
35a4aff0cbSpooka #include <sys/fcntl.h>
36a4aff0cbSpooka #include <sys/kmem.h>
37a4aff0cbSpooka #include <sys/kthread.h>
38a4aff0cbSpooka #include <sys/lock.h>
39b4e3a317Spooka #include <sys/vmem.h>
403afd44cfStls #include <sys/cprng.h>
41a4aff0cbSpooka 
428fa23649Spooka #include <net/bpf.h>
43a4aff0cbSpooka #include <net/if.h>
4486a95d8eSpooka #include <net/if_dl.h>
45a4aff0cbSpooka #include <net/if_ether.h>
46d89b0a37Sozaki-r #include <net/if_media.h>
47f7201ab7Srin #include <net/ether_sw_offload.h>
48a4aff0cbSpooka 
49a4aff0cbSpooka #include <netinet/in.h>
50a4aff0cbSpooka #include <netinet/in_var.h>
51a4aff0cbSpooka 
526bb51422Spooka #include <rump-sys/kern.h>
536bb51422Spooka #include <rump-sys/net.h>
546bb51422Spooka 
55a4aff0cbSpooka #include <rump/rump.h>
56a4aff0cbSpooka #include <rump/rumpuser.h>
57a4aff0cbSpooka 
58d1665e5aSpooka #include "shmif_user.h"
59a4aff0cbSpooka 
609f725ef6Spooka static int shmif_clone(struct if_clone *, int);
619f725ef6Spooka static int shmif_unclone(struct ifnet *);
629f725ef6Spooka 
63d89b0a37Sozaki-r static int shmif_mediachange(struct ifnet *);
64d89b0a37Sozaki-r static void shmif_mediastatus(struct ifnet *, struct ifmediareq *);
65d89b0a37Sozaki-r 
669f725ef6Spooka struct if_clone shmif_cloner =
679f725ef6Spooka     IF_CLONE_INITIALIZER("shmif", shmif_clone, shmif_unclone);
689f725ef6Spooka 
69a4aff0cbSpooka /*
70942c7278Spooka  * Do r/w prefault for backend pages when attaching the interface.
719f725ef6Spooka  * At least logically thinking improves performance (although no
729f725ef6Spooka  * mlocking is done, so they might go away).
73942c7278Spooka  */
74942c7278Spooka #define PREFAULT_RW
75942c7278Spooka 
76942c7278Spooka /*
77a4aff0cbSpooka  * A virtual ethernet interface which uses shared memory from a
78a4aff0cbSpooka  * memory mapped file as the bus.
79a4aff0cbSpooka  */
80a4aff0cbSpooka 
81a4aff0cbSpooka static int	shmif_init(struct ifnet *);
82a4aff0cbSpooka static int	shmif_ioctl(struct ifnet *, u_long, void *);
83a4aff0cbSpooka static void	shmif_start(struct ifnet *);
84f7201ab7Srin static void	shmif_snd(struct ifnet *, struct mbuf *);
85a4aff0cbSpooka static void	shmif_stop(struct ifnet *, int);
86a4aff0cbSpooka 
8736e63e68Spooka #include "shmifvar.h"
8836e63e68Spooka 
89a4aff0cbSpooka struct shmif_sc {
90a4aff0cbSpooka 	struct ethercom sc_ec;
91d89b0a37Sozaki-r 	struct ifmedia sc_im;
9236e63e68Spooka 	struct shmif_mem *sc_busmem;
93a4aff0cbSpooka 	int sc_memfd;
94a4aff0cbSpooka 	int sc_kq;
95d9aab6f8Spooka 	int sc_unit;
96a4aff0cbSpooka 
979f725ef6Spooka 	char *sc_backfile;
989f725ef6Spooka 	size_t sc_backfilelen;
999f725ef6Spooka 
10011082373Spooka 	uint64_t sc_devgen;
101a4aff0cbSpooka 	uint32_t sc_nextpacket;
102d9aab6f8Spooka 
103d9aab6f8Spooka 	kmutex_t sc_mtx;
104d9aab6f8Spooka 	kcondvar_t sc_cv;
105d9aab6f8Spooka 
106d9aab6f8Spooka 	struct lwp *sc_rcvl;
107d9aab6f8Spooka 	bool sc_dying;
1085c34a715Sozaki-r 
109056381ceSozaki-r 	uint64_t sc_uid;
110a4aff0cbSpooka };
111a4aff0cbSpooka 
112a4aff0cbSpooka static void shmif_rcv(void *);
113a4aff0cbSpooka 
114b4e3a317Spooka vmem_t *shmif_units;
115b4e3a317Spooka 
1160dc3609eSpooka static void
1170dc3609eSpooka dowakeup(struct shmif_sc *sc)
1180dc3609eSpooka {
1190dc3609eSpooka 	struct rumpuser_iovec iov;
1200dc3609eSpooka 	uint32_t ver = SHMIF_VERSION;
12132a34307Spooka 	size_t n;
1220dc3609eSpooka 
1230dc3609eSpooka 	iov.iov_base = &ver;
1240dc3609eSpooka 	iov.iov_len = sizeof(ver);
12532a34307Spooka 	rumpuser_iovwrite(sc->sc_memfd, &iov, 1, IFMEM_WAKEUP, &n);
1260dc3609eSpooka }
1270dc3609eSpooka 
12850fa67ffSpooka /*
12950fa67ffSpooka  * This locking needs work and will misbehave severely if:
13050fa67ffSpooka  * 1) the backing memory has to be paged in
13150fa67ffSpooka  * 2) some lockholder exits while holding the lock
13250fa67ffSpooka  */
13350fa67ffSpooka static void
13450fa67ffSpooka shmif_lockbus(struct shmif_mem *busmem)
13550fa67ffSpooka {
13650fa67ffSpooka 	int i = 0;
13750fa67ffSpooka 
13850fa67ffSpooka 	while (__predict_false(atomic_cas_32(&busmem->shm_lock,
13950fa67ffSpooka 	    LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) {
14050fa67ffSpooka 		if (__predict_false(++i > LOCK_COOLDOWN)) {
141a53a2a53Spooka 			/* wait 1ms */
142c7fca9d6Spooka 			rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL,
143c7fca9d6Spooka 			    0, 1000*1000);
14450fa67ffSpooka 			i = 0;
14550fa67ffSpooka 		}
14650fa67ffSpooka 		continue;
14750fa67ffSpooka 	}
14836c7c52aSriastradh 	membar_acquire();
14950fa67ffSpooka }
15050fa67ffSpooka 
15150fa67ffSpooka static void
15250fa67ffSpooka shmif_unlockbus(struct shmif_mem *busmem)
15350fa67ffSpooka {
154129ca10eSjustin 	unsigned int old __diagused;
15550fa67ffSpooka 
15636c7c52aSriastradh 	membar_release();
15750fa67ffSpooka 	old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED);
15850fa67ffSpooka 	KASSERT(old == LOCK_LOCKED);
15950fa67ffSpooka }
16050fa67ffSpooka 
1619f725ef6Spooka static int
1629f725ef6Spooka allocif(int unit, struct shmif_sc **scp)
163a4aff0cbSpooka {
1649f725ef6Spooka 	uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 };
165a4aff0cbSpooka 	struct shmif_sc *sc;
166a4aff0cbSpooka 	struct ifnet *ifp;
16782bf8980Sozaki-r 	uint64_t randnum;
168076e3579Sriastradh 	int error = 0;
1699f725ef6Spooka 
17082bf8980Sozaki-r 	randnum = cprng_strong64();
17182bf8980Sozaki-r 	memcpy(&enaddr[2], &randnum, 4);
1729f725ef6Spooka 
1739f725ef6Spooka 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
1749f725ef6Spooka 	sc->sc_memfd = -1;
175d9aab6f8Spooka 	sc->sc_unit = unit;
17682bf8980Sozaki-r 	sc->sc_uid = randnum;
1779f725ef6Spooka 
1789f725ef6Spooka 	ifp = &sc->sc_ec.ec_if;
1799f725ef6Spooka 
180d89b0a37Sozaki-r 	ifmedia_init(&sc->sc_im, 0, shmif_mediachange, shmif_mediastatus);
181d89b0a37Sozaki-r 	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_AUTO, 0, NULL);
182d89b0a37Sozaki-r 	ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_NONE, 0, NULL);
183d89b0a37Sozaki-r 	ifmedia_set(&sc->sc_im, IFM_ETHER|IFM_AUTO);
184d89b0a37Sozaki-r 
185c1ae06abSchristos 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "shmif%d", unit);
1869f725ef6Spooka 	ifp->if_softc = sc;
1875c34a715Sozaki-r 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1889f725ef6Spooka 	ifp->if_init = shmif_init;
1899f725ef6Spooka 	ifp->if_ioctl = shmif_ioctl;
1909f725ef6Spooka 	ifp->if_start = shmif_start;
1919f725ef6Spooka 	ifp->if_stop = shmif_stop;
1929f725ef6Spooka 	ifp->if_mtu = ETHERMTU;
19386a95d8eSpooka 	ifp->if_dlt = DLT_EN10MB;
194f7201ab7Srin 	ifp->if_capabilities = IFCAP_TSOv4 | IFCAP_TSOv6 |
195f7201ab7Srin 	    IFCAP_CSUM_IPv4_Rx	| IFCAP_CSUM_IPv4_Tx |
196f7201ab7Srin 	    IFCAP_CSUM_TCPv4_Rx	| IFCAP_CSUM_TCPv4_Tx |
197f7201ab7Srin 	    IFCAP_CSUM_UDPv4_Rx	| IFCAP_CSUM_UDPv4_Tx |
198f7201ab7Srin 	    IFCAP_CSUM_TCPv6_Rx	| IFCAP_CSUM_TCPv6_Tx |
199f7201ab7Srin 	    IFCAP_CSUM_UDPv6_Rx	| IFCAP_CSUM_UDPv6_Tx;
200f62bf30aSozaki-r 	IFQ_SET_READY(&ifp->if_snd);
2019f725ef6Spooka 
202d9aab6f8Spooka 	mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE);
203d9aab6f8Spooka 	cv_init(&sc->sc_cv, "shmifcv");
204d9aab6f8Spooka 
205076e3579Sriastradh 	if_initialize(ifp);
206f7201ab7Srin #if 1
207f7201ab7Srin 	char buf[256];
208f7201ab7Srin 
209f7201ab7Srin 	if (rumpuser_getparam("RUMP_SHMIF_CAPENABLE", buf, sizeof(buf)) == 0) {
210f7201ab7Srin 		uint64_t capen = strtoul(buf, NULL, 0);
211f7201ab7Srin 
212f7201ab7Srin 		ifp->if_capenable = capen & ifp->if_capabilities;
213f7201ab7Srin 	}
214f7201ab7Srin #endif
215f7201ab7Srin 
216f62bf30aSozaki-r 	if_deferred_start_init(ifp, NULL);
2179f725ef6Spooka 	ether_ifattach(ifp, enaddr);
2189c4cd063Sozaki-r 	if_register(ifp);
2199f725ef6Spooka 
2209f725ef6Spooka 	aprint_verbose("shmif%d: Ethernet address %s\n",
221d9aab6f8Spooka 	    unit, ether_sprintf(enaddr));
2229f725ef6Spooka 
2239f725ef6Spooka 	if (scp)
2249f725ef6Spooka 		*scp = sc;
2259f725ef6Spooka 
226d9aab6f8Spooka 	if (rump_threads) {
227d9aab6f8Spooka 		error = kthread_create(PRI_NONE,
228a0ffc02aSrmind 		    KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, NULL,
229d9aab6f8Spooka 		    shmif_rcv, ifp, &sc->sc_rcvl, "shmif");
230d9aab6f8Spooka 	} else {
231d9aab6f8Spooka 		printf("WARNING: threads not enabled, shmif NOT working\n");
232d9aab6f8Spooka 	}
233d9aab6f8Spooka 
234d9aab6f8Spooka 	if (error) {
235d9aab6f8Spooka 		shmif_unclone(ifp);
236d9aab6f8Spooka 	}
237d9aab6f8Spooka 
238076e3579Sriastradh 	return 0;
2399f725ef6Spooka }
2409f725ef6Spooka 
2419f725ef6Spooka static int
2429f725ef6Spooka initbackend(struct shmif_sc *sc, int memfd)
2439f725ef6Spooka {
244942c7278Spooka 	volatile uint8_t v;
245942c7278Spooka 	volatile uint8_t *p;
24632a34307Spooka 	void *mem;
247a4aff0cbSpooka 	int error;
248a4aff0cbSpooka 
24932a34307Spooka 	error = rumpcomp_shmif_mmap(memfd, BUSMEM_SIZE, &mem);
250a4aff0cbSpooka 	if (error)
2519f725ef6Spooka 		return error;
25232a34307Spooka 	sc->sc_busmem = mem;
253a4aff0cbSpooka 
2549f725ef6Spooka 	if (sc->sc_busmem->shm_magic
2559f725ef6Spooka 	    && sc->sc_busmem->shm_magic != SHMIF_MAGIC) {
2569f725ef6Spooka 		printf("bus is not magical");
2579f725ef6Spooka 		rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE);
2589f725ef6Spooka 		return ENOEXEC;
2599f725ef6Spooka 	}
260942c7278Spooka 
261a6893ed0Spooka 	/*
262a6893ed0Spooka 	 * Prefault in pages to minimize runtime penalty with buslock.
263a6893ed0Spooka 	 * Use 512 instead of PAGE_SIZE to make sure we catch cases where
264a6893ed0Spooka 	 * rump kernel PAGE_SIZE > host page size.
265a6893ed0Spooka 	 */
266942c7278Spooka 	for (p = (uint8_t *)sc->sc_busmem;
267942c7278Spooka 	    p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE;
268a6893ed0Spooka 	    p += 512)
269942c7278Spooka 		v = *p;
270942c7278Spooka 
271ce68b7aeSpooka 	shmif_lockbus(sc->sc_busmem);
272ce68b7aeSpooka 	/* we're first?  initialize bus */
273ce68b7aeSpooka 	if (sc->sc_busmem->shm_magic == 0) {
274ce68b7aeSpooka 		sc->sc_busmem->shm_magic = SHMIF_MAGIC;
275ce68b7aeSpooka 		sc->sc_busmem->shm_first = BUSMEM_DATASIZE;
276ce68b7aeSpooka 	}
277ce68b7aeSpooka 
27836e63e68Spooka 	sc->sc_nextpacket = sc->sc_busmem->shm_last;
27911082373Spooka 	sc->sc_devgen = sc->sc_busmem->shm_gen;
280942c7278Spooka 
281942c7278Spooka #ifdef PREFAULT_RW
282942c7278Spooka 	for (p = (uint8_t *)sc->sc_busmem;
283942c7278Spooka 	    p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE;
284942c7278Spooka 	    p += PAGE_SIZE) {
285942c7278Spooka 		v = *p;
286942c7278Spooka 		*p = v;
287942c7278Spooka 	}
288942c7278Spooka #endif
289ce68b7aeSpooka 	shmif_unlockbus(sc->sc_busmem);
290a4aff0cbSpooka 
29132a34307Spooka 	sc->sc_kq = -1;
29232a34307Spooka 	error = rumpcomp_shmif_watchsetup(&sc->sc_kq, memfd);
29332a34307Spooka 	if (error) {
294d9aab6f8Spooka 		rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE);
2959f725ef6Spooka 		return error;
296d9aab6f8Spooka 	}
297a4aff0cbSpooka 
2989f725ef6Spooka 	sc->sc_memfd = memfd;
299d9aab6f8Spooka 
300d9aab6f8Spooka 	return error;
3019f725ef6Spooka }
302a4aff0cbSpooka 
3039f725ef6Spooka static void
3049f725ef6Spooka finibackend(struct shmif_sc *sc)
3059f725ef6Spooka {
306a4aff0cbSpooka 
307d9aab6f8Spooka 	if (sc->sc_backfile == NULL)
308d9aab6f8Spooka 		return;
309d9aab6f8Spooka 
310d9aab6f8Spooka 	if (sc->sc_backfile) {
3119f725ef6Spooka 		kmem_free(sc->sc_backfile, sc->sc_backfilelen);
3129f725ef6Spooka 		sc->sc_backfile = NULL;
3139f725ef6Spooka 		sc->sc_backfilelen = 0;
314d9aab6f8Spooka 	}
3159f725ef6Spooka 
3169f725ef6Spooka 	rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE);
31732a34307Spooka 	rumpuser_close(sc->sc_memfd);
31832a34307Spooka 	rumpuser_close(sc->sc_kq);
319d9aab6f8Spooka 
320d9aab6f8Spooka 	sc->sc_memfd = -1;
3219f725ef6Spooka }
3229f725ef6Spooka 
3239f725ef6Spooka int
3249f725ef6Spooka rump_shmif_create(const char *path, int *ifnum)
3259f725ef6Spooka {
3269f725ef6Spooka 	struct shmif_sc *sc;
32778b0e183Sdyoung 	vmem_addr_t t;
32853fd1876Spooka 	int unit, error;
32953fd1876Spooka 	int memfd = -1; /* XXXgcc */
3309f725ef6Spooka 
33153fd1876Spooka 	if (path) {
33232a34307Spooka 		error = rumpuser_open(path,
33332a34307Spooka 		    RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd);
33432a34307Spooka 		if (error)
3359f725ef6Spooka 			return error;
33653fd1876Spooka 	}
3379f725ef6Spooka 
33878b0e183Sdyoung 	error = vmem_xalloc(shmif_units, 1, 0, 0, 0,
33978b0e183Sdyoung 	    VMEM_ADDR_MIN, VMEM_ADDR_MAX, VM_INSTANTFIT | VM_SLEEP, &t);
34078b0e183Sdyoung 
34178b0e183Sdyoung 	if (error != 0) {
34278b0e183Sdyoung 		if (path)
34332a34307Spooka 			rumpuser_close(memfd);
34478b0e183Sdyoung 		return error;
34578b0e183Sdyoung 	}
34678b0e183Sdyoung 
34778b0e183Sdyoung 	unit = t - 1;
348b4e3a317Spooka 
349d9aab6f8Spooka 	if ((error = allocif(unit, &sc)) != 0) {
35053fd1876Spooka 		if (path)
35132a34307Spooka 			rumpuser_close(memfd);
3529f725ef6Spooka 		return error;
3539f725ef6Spooka 	}
35453fd1876Spooka 
35553fd1876Spooka 	if (!path)
35653fd1876Spooka 		goto out;
35753fd1876Spooka 
3589f725ef6Spooka 	error = initbackend(sc, memfd);
3599f725ef6Spooka 	if (error) {
360d9aab6f8Spooka 		shmif_unclone(&sc->sc_ec.ec_if);
3619f725ef6Spooka 		return error;
3629f725ef6Spooka 	}
3639f725ef6Spooka 
3649f725ef6Spooka 	sc->sc_backfilelen = strlen(path)+1;
3659f725ef6Spooka 	sc->sc_backfile = kmem_alloc(sc->sc_backfilelen, KM_SLEEP);
3669f725ef6Spooka 	strcpy(sc->sc_backfile, path);
367316bb4eaSpooka 
36853fd1876Spooka  out:
369e4b2f37aSpooka 	if (ifnum)
370d9aab6f8Spooka 		*ifnum = unit;
371a4aff0cbSpooka 
3729f725ef6Spooka 	return 0;
3739f725ef6Spooka }
3749f725ef6Spooka 
3759f725ef6Spooka static int
3769f725ef6Spooka shmif_clone(struct if_clone *ifc, int unit)
3779f725ef6Spooka {
378129ca10eSjustin 	int rc __diagused;
37978b0e183Sdyoung 	vmem_addr_t unit2;
3809f725ef6Spooka 
381b4e3a317Spooka 	/*
382b4e3a317Spooka 	 * Ok, we know the unit number, but we must still reserve it.
383b4e3a317Spooka 	 * Otherwise the wildcard-side of things might get the same one.
384b4e3a317Spooka 	 * This is slightly offset-happy due to vmem.  First, we offset
385b4e3a317Spooka 	 * the range of unit numbers by +1 since vmem cannot deal with
38664311e1fSdyoung 	 * ranges starting from 0.  Talk about uuuh.
387b4e3a317Spooka 	 */
38878b0e183Sdyoung 	rc = vmem_xalloc(shmif_units, 1, 0, 0, 0, unit+1, unit+1,
38978b0e183Sdyoung 	    VM_SLEEP | VM_INSTANTFIT, &unit2);
39078b0e183Sdyoung 	KASSERT(rc == 0 && unit2-1 == unit);
3919f725ef6Spooka 
3929f725ef6Spooka 	return allocif(unit, NULL);
3939f725ef6Spooka }
3949f725ef6Spooka 
3959f725ef6Spooka static int
3969f725ef6Spooka shmif_unclone(struct ifnet *ifp)
3979f725ef6Spooka {
398d9aab6f8Spooka 	struct shmif_sc *sc = ifp->if_softc;
3999f725ef6Spooka 
400d9aab6f8Spooka 	shmif_stop(ifp, 1);
401d9aab6f8Spooka 	if_down(ifp);
402d9aab6f8Spooka 
403d9aab6f8Spooka 	mutex_enter(&sc->sc_mtx);
404d9aab6f8Spooka 	sc->sc_dying = true;
405d9aab6f8Spooka 	cv_broadcast(&sc->sc_cv);
406d9aab6f8Spooka 	mutex_exit(&sc->sc_mtx);
407d9aab6f8Spooka 
408d9aab6f8Spooka 	if (sc->sc_rcvl)
409d9aab6f8Spooka 		kthread_join(sc->sc_rcvl);
410d9aab6f8Spooka 	sc->sc_rcvl = NULL;
411d9aab6f8Spooka 
41295d40beaSozaki-r 	/*
41395d40beaSozaki-r 	 * Need to be called after the kthread left, otherwise closing kqueue
41495d40beaSozaki-r 	 * (sc_kq) hangs sometimes perhaps because of a race condition between
41595d40beaSozaki-r 	 * close and kevent in the kthread on the kqueue.
41695d40beaSozaki-r 	 */
41795d40beaSozaki-r 	finibackend(sc);
41895d40beaSozaki-r 
419d9aab6f8Spooka 	vmem_xfree(shmif_units, sc->sc_unit+1, 1);
420d9aab6f8Spooka 
421d9aab6f8Spooka 	ether_ifdetach(ifp);
422d9aab6f8Spooka 	if_detach(ifp);
423d9aab6f8Spooka 
424d9aab6f8Spooka 	cv_destroy(&sc->sc_cv);
425d9aab6f8Spooka 	mutex_destroy(&sc->sc_mtx);
426d9aab6f8Spooka 
427d9aab6f8Spooka 	kmem_free(sc, sizeof(*sc));
428d9aab6f8Spooka 
429d9aab6f8Spooka 	return 0;
430a4aff0cbSpooka }
431a4aff0cbSpooka 
432a4aff0cbSpooka static int
433a4aff0cbSpooka shmif_init(struct ifnet *ifp)
434a4aff0cbSpooka {
4359f725ef6Spooka 	struct shmif_sc *sc = ifp->if_softc;
436d8327efbSpooka 	int error = 0;
437d8327efbSpooka 
4389f725ef6Spooka 	if (sc->sc_memfd == -1)
4399f725ef6Spooka 		return ENXIO;
440d9aab6f8Spooka 	KASSERT(sc->sc_busmem);
441a4aff0cbSpooka 
442a4aff0cbSpooka 	ifp->if_flags |= IFF_RUNNING;
443d9aab6f8Spooka 
444d9aab6f8Spooka 	mutex_enter(&sc->sc_mtx);
445d9aab6f8Spooka 	sc->sc_nextpacket = sc->sc_busmem->shm_last;
446d9aab6f8Spooka 	sc->sc_devgen = sc->sc_busmem->shm_gen;
447d9aab6f8Spooka 
448d9aab6f8Spooka 	cv_broadcast(&sc->sc_cv);
449d9aab6f8Spooka 	mutex_exit(&sc->sc_mtx);
450d9aab6f8Spooka 
451d8327efbSpooka 	return error;
452a4aff0cbSpooka }
453a4aff0cbSpooka 
454a4aff0cbSpooka static int
455d89b0a37Sozaki-r shmif_mediachange(struct ifnet *ifp)
456d89b0a37Sozaki-r {
457d89b0a37Sozaki-r 	struct shmif_sc *sc = ifp->if_softc;
458*fb485daeSrin 	int link_state;
459d89b0a37Sozaki-r 
460*fb485daeSrin 	if (IFM_SUBTYPE(sc->sc_im.ifm_cur->ifm_media) == IFM_NONE)
461*fb485daeSrin 		link_state = LINK_STATE_DOWN;
462*fb485daeSrin 	else
463*fb485daeSrin 		link_state = LINK_STATE_UP;
464*fb485daeSrin 
465*fb485daeSrin 	if_link_state_change(ifp, link_state);
466d89b0a37Sozaki-r 	return 0;
467d89b0a37Sozaki-r }
468d89b0a37Sozaki-r 
469d89b0a37Sozaki-r static void
470d89b0a37Sozaki-r shmif_mediastatus(struct ifnet *ifp, struct ifmediareq *imr)
471d89b0a37Sozaki-r {
472d89b0a37Sozaki-r 	struct shmif_sc *sc = ifp->if_softc;
473*fb485daeSrin 
474d89b0a37Sozaki-r 	imr->ifm_active = sc->sc_im.ifm_cur->ifm_media;
475*fb485daeSrin 
476*fb485daeSrin 	imr->ifm_status = IFM_AVALID;
477*fb485daeSrin 	if (IFM_SUBTYPE(imr->ifm_active) != IFM_NONE)
478*fb485daeSrin 		imr->ifm_status |= IFM_ACTIVE;
479d89b0a37Sozaki-r }
480d89b0a37Sozaki-r 
481d89b0a37Sozaki-r static int
482a4aff0cbSpooka shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
483a4aff0cbSpooka {
4849f725ef6Spooka 	struct shmif_sc *sc = ifp->if_softc;
4859f725ef6Spooka 	struct ifdrv *ifd;
4869f725ef6Spooka 	char *path;
487d9aab6f8Spooka 	int s, rv, memfd;
488a4aff0cbSpooka 
489a4aff0cbSpooka 	s = splnet();
4909f725ef6Spooka 	switch (cmd) {
4919f725ef6Spooka 	case SIOCGLINKSTR:
4929f725ef6Spooka 		ifd = data;
4939f725ef6Spooka 
4949f725ef6Spooka 		if (sc->sc_backfilelen == 0) {
4959f725ef6Spooka 			rv = ENOENT;
4969f725ef6Spooka 			break;
4979f725ef6Spooka 		}
4989f725ef6Spooka 
4999f725ef6Spooka 		ifd->ifd_len = sc->sc_backfilelen;
5009f725ef6Spooka 		if (ifd->ifd_cmd == IFLINKSTR_QUERYLEN) {
5019f725ef6Spooka 			rv = 0;
5029f725ef6Spooka 			break;
5039f725ef6Spooka 		}
5049f725ef6Spooka 
5059f725ef6Spooka 		if (ifd->ifd_cmd != 0) {
5069f725ef6Spooka 			rv = EINVAL;
5079f725ef6Spooka 			break;
5089f725ef6Spooka 		}
5099f725ef6Spooka 
5109f725ef6Spooka 		rv = copyoutstr(sc->sc_backfile, ifd->ifd_data,
5119f725ef6Spooka 		    MIN(sc->sc_backfilelen, ifd->ifd_len), NULL);
5129f725ef6Spooka 		break;
5139f725ef6Spooka 	case SIOCSLINKSTR:
5149f725ef6Spooka 		if (ifp->if_flags & IFF_UP) {
5159f725ef6Spooka 			rv = EBUSY;
5169f725ef6Spooka 			break;
5179f725ef6Spooka 		}
5189f725ef6Spooka 
5199f725ef6Spooka 		ifd = data;
5209f725ef6Spooka 		if (ifd->ifd_cmd == IFLINKSTR_UNSET) {
5219f725ef6Spooka 			finibackend(sc);
5224a952ad7Sozaki-r 			/* Back to the default just in case */
5234a952ad7Sozaki-r 			ifp->if_link_state = LINK_STATE_UNKNOWN;
5249f725ef6Spooka 			rv = 0;
5259f725ef6Spooka 			break;
5269f725ef6Spooka 		} else if (ifd->ifd_cmd != 0) {
5279f725ef6Spooka 			rv = EINVAL;
5289f725ef6Spooka 			break;
5299f725ef6Spooka 		} else if (sc->sc_backfile) {
5309f725ef6Spooka 			rv = EBUSY;
5319f725ef6Spooka 			break;
5329f725ef6Spooka 		}
5339f725ef6Spooka 
5349f725ef6Spooka 		if (ifd->ifd_len > MAXPATHLEN) {
5359f725ef6Spooka 			rv = E2BIG;
5369f725ef6Spooka 			break;
5379f725ef6Spooka 		} else if (ifd->ifd_len < 1) {
5389f725ef6Spooka 			rv = EINVAL;
5399f725ef6Spooka 			break;
5409f725ef6Spooka 		}
5419f725ef6Spooka 
5429f725ef6Spooka 		path = kmem_alloc(ifd->ifd_len, KM_SLEEP);
5439f725ef6Spooka 		rv = copyinstr(ifd->ifd_data, path, ifd->ifd_len, NULL);
5449f725ef6Spooka 		if (rv) {
5459f725ef6Spooka 			kmem_free(path, ifd->ifd_len);
5469f725ef6Spooka 			break;
5479f725ef6Spooka 		}
54832a34307Spooka 		rv = rumpuser_open(path,
54932a34307Spooka 		    RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd);
55032a34307Spooka 		if (rv) {
5519f725ef6Spooka 			kmem_free(path, ifd->ifd_len);
5529f725ef6Spooka 			break;
5539f725ef6Spooka 		}
5549f725ef6Spooka 		rv = initbackend(sc, memfd);
5559f725ef6Spooka 		if (rv) {
5569f725ef6Spooka 			kmem_free(path, ifd->ifd_len);
55732a34307Spooka 			rumpuser_close(memfd);
5589f725ef6Spooka 			break;
5599f725ef6Spooka 		}
5609f725ef6Spooka 		sc->sc_backfile = path;
5619f725ef6Spooka 		sc->sc_backfilelen = ifd->ifd_len;
5629f725ef6Spooka 
5634a952ad7Sozaki-r 		if_link_state_change(ifp, LINK_STATE_UP);
5649f725ef6Spooka 		break;
565d89b0a37Sozaki-r 
566d89b0a37Sozaki-r #ifdef OSIOCSIFMEDIA
567d89b0a37Sozaki-r 	case OSIOCSIFMEDIA:
568d89b0a37Sozaki-r #endif
569d89b0a37Sozaki-r 	case SIOCSIFMEDIA:
570d89b0a37Sozaki-r 	case SIOCGIFMEDIA:
571d89b0a37Sozaki-r 		rv = ifmedia_ioctl(ifp, data, &sc->sc_im, cmd);
572d89b0a37Sozaki-r 		break;
573d89b0a37Sozaki-r 
5749f725ef6Spooka 	default:
575a4aff0cbSpooka 		rv = ether_ioctl(ifp, cmd, data);
5769c2d055fSpooka 		if (rv == ENETRESET)
5779c2d055fSpooka 			rv = 0;
5789f725ef6Spooka 		break;
5799f725ef6Spooka 	}
580a4aff0cbSpooka 	splx(s);
581a4aff0cbSpooka 
582a4aff0cbSpooka 	return rv;
583a4aff0cbSpooka }
584a4aff0cbSpooka 
585a4aff0cbSpooka static void
586a4aff0cbSpooka shmif_start(struct ifnet *ifp)
587a4aff0cbSpooka {
588a4aff0cbSpooka 	struct shmif_sc *sc = ifp->if_softc;
589f7201ab7Srin 	struct mbuf *m, *n;
590a4aff0cbSpooka 	bool wrote = false;
591a4aff0cbSpooka 
59211082373Spooka 	ifp->if_flags |= IFF_OACTIVE;
59311082373Spooka 
594a4aff0cbSpooka 	for (;;) {
595f62bf30aSozaki-r 		IFQ_DEQUEUE(&ifp->if_snd, m);
596f7201ab7Srin 		if (m == NULL)
597f7201ab7Srin 			break;
598b97bdf94Spooka 
599f7201ab7Srin 		m = ether_sw_offload_tx(ifp, m);
600f7201ab7Srin 		if (m == NULL) {
601c85e2f36Sthorpej 			if_statinc(ifp, if_oerrors);
602a4aff0cbSpooka 			break;
603a4aff0cbSpooka 		}
604a4aff0cbSpooka 
605f7201ab7Srin 		do {
606f7201ab7Srin 			n = m->m_nextpkt;
607f7201ab7Srin 			shmif_snd(ifp, m);
608f7201ab7Srin 			m = n;
609f7201ab7Srin 		} while (m != NULL);
610f7201ab7Srin 
611f7201ab7Srin 		wrote = true;
612f7201ab7Srin 	}
613f7201ab7Srin 
614f7201ab7Srin 	ifp->if_flags &= ~IFF_OACTIVE;
615f7201ab7Srin 
616f7201ab7Srin 	/* wakeup? */
617f7201ab7Srin 	if (wrote) {
618f7201ab7Srin 		dowakeup(sc);
619f7201ab7Srin 	}
620f7201ab7Srin }
621f7201ab7Srin 
622f7201ab7Srin /* send everything in-context since it's just a matter of mem-to-mem copy */
623f7201ab7Srin static void
624f7201ab7Srin shmif_snd(struct ifnet *ifp, struct mbuf *m0)
625f7201ab7Srin {
626f7201ab7Srin 	struct shmif_sc *sc = ifp->if_softc;
627f7201ab7Srin 	struct shmif_mem *busmem = sc->sc_busmem;
628f7201ab7Srin 	struct shmif_pkthdr sp;
629f7201ab7Srin 	struct timeval tv;
630f7201ab7Srin 	struct mbuf *m;
631f7201ab7Srin 	uint32_t dataoff;
632f7201ab7Srin 	uint32_t pktsize, pktwrote;
633f7201ab7Srin 	bool wrap;
634f7201ab7Srin 
635d2a293bcSpooka 	pktsize = 0;
636a4aff0cbSpooka 	for (m = m0; m != NULL; m = m->m_next) {
637a4aff0cbSpooka 		pktsize += m->m_len;
638a4aff0cbSpooka 	}
63911082373Spooka 	KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN);
640ce68b7aeSpooka 
641b97bdf94Spooka 	getmicrouptime(&tv);
642b97bdf94Spooka 	sp.sp_len = pktsize;
643b97bdf94Spooka 	sp.sp_sec = tv.tv_sec;
644b97bdf94Spooka 	sp.sp_usec = tv.tv_usec;
645056381ceSozaki-r 	sp.sp_sender = sc->sc_uid;
646b97bdf94Spooka 
6473cd62456Smsaitoh 	bpf_mtap(ifp, m0, BPF_D_OUT);
6488fa23649Spooka 
6494a952ad7Sozaki-r 	/*
6504a952ad7Sozaki-r 	 * Compare with DOWN to allow UNKNOWN (the default value),
651dd766027Sriastradh 	 * which is required by some ATF tests using rump servers
652dd766027Sriastradh 	 * written in C.
6534a952ad7Sozaki-r 	 */
6544a952ad7Sozaki-r 	if (ifp->if_link_state == LINK_STATE_DOWN)
6554a952ad7Sozaki-r 		goto dontsend;
6564a952ad7Sozaki-r 
65711082373Spooka 	shmif_lockbus(busmem);
65811082373Spooka 	KASSERT(busmem->shm_magic == SHMIF_MAGIC);
65911082373Spooka 	busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last);
660b1559dbbSpooka 
661962bc2f8Spooka 	wrap = false;
662f7201ab7Srin 	dataoff =
663f7201ab7Srin 	    shmif_buswrite(busmem, busmem->shm_last, &sp, sizeof(sp), &wrap);
66411082373Spooka 	pktwrote = 0;
665ce68b7aeSpooka 	for (m = m0; m != NULL; m = m->m_next) {
66611082373Spooka 		pktwrote += m->m_len;
667f7201ab7Srin 		dataoff = shmif_buswrite(busmem, dataoff, mtod(m, void *),
668f7201ab7Srin 		    m->m_len, &wrap);
669ce68b7aeSpooka 	}
67011082373Spooka 	KASSERT(pktwrote == pktsize);
67186ea9bb1Spooka 	if (wrap) {
67211082373Spooka 		busmem->shm_gen++;
673f7201ab7Srin 		DPRINTF(("bus generation now %" PRIu64 "\n", busmem->shm_gen));
67486ea9bb1Spooka 	}
67511082373Spooka 	shmif_unlockbus(busmem);
676a4aff0cbSpooka 
6774a952ad7Sozaki-r dontsend:
678a4aff0cbSpooka 	m_freem(m0);
679c85e2f36Sthorpej 	if_statinc(ifp, if_opackets);
680a4aff0cbSpooka 
681f7201ab7Srin 	DPRINTF(("shmif_start: send %d bytes at off %d\n", pktsize,
682f7201ab7Srin 	    busmem->shm_last));
683a4aff0cbSpooka }
684a4aff0cbSpooka 
685a4aff0cbSpooka static void
686a4aff0cbSpooka shmif_stop(struct ifnet *ifp, int disable)
687a4aff0cbSpooka {
688d9aab6f8Spooka 	struct shmif_sc *sc = ifp->if_softc;
689a4aff0cbSpooka 
690d9aab6f8Spooka 	ifp->if_flags &= ~IFF_RUNNING;
691d9aab6f8Spooka 	membar_producer();
692d9aab6f8Spooka 
693d9aab6f8Spooka 	/*
694d9aab6f8Spooka 	 * wakeup thread.  this will of course wake up all bus
695d9aab6f8Spooka 	 * listeners, but that's life.
696d9aab6f8Spooka 	 */
6970dc3609eSpooka 	if (sc->sc_memfd != -1) {
6980dc3609eSpooka 		dowakeup(sc);
6990dc3609eSpooka 	}
700a4aff0cbSpooka }
701a4aff0cbSpooka 
70286ea9bb1Spooka 
70386ea9bb1Spooka /*
70486ea9bb1Spooka  * Check if we have been sleeping too long.  Basically,
70586ea9bb1Spooka  * our in-sc nextpkt must by first <= nextpkt <= last"+1".
70686ea9bb1Spooka  * We use the fact that first is guaranteed to never overlap
70786ea9bb1Spooka  * with the last frame in the ring.
70886ea9bb1Spooka  */
70986ea9bb1Spooka static __inline bool
71086ea9bb1Spooka stillvalid_p(struct shmif_sc *sc)
71186ea9bb1Spooka {
71286ea9bb1Spooka 	struct shmif_mem *busmem = sc->sc_busmem;
71386ea9bb1Spooka 	unsigned gendiff = busmem->shm_gen - sc->sc_devgen;
71486ea9bb1Spooka 	uint32_t lastoff, devoff;
71586ea9bb1Spooka 
71686ea9bb1Spooka 	KASSERT(busmem->shm_first != busmem->shm_last);
71786ea9bb1Spooka 
71886ea9bb1Spooka 	/* normalize onto a 2x busmem chunk */
71986ea9bb1Spooka 	devoff = sc->sc_nextpacket;
72086ea9bb1Spooka 	lastoff = shmif_nextpktoff(busmem, busmem->shm_last);
72186ea9bb1Spooka 
72286ea9bb1Spooka 	/* trivial case */
72386ea9bb1Spooka 	if (gendiff > 1)
72486ea9bb1Spooka 		return false;
72586ea9bb1Spooka 	KASSERT(gendiff <= 1);
72686ea9bb1Spooka 
72786ea9bb1Spooka 	/* Normalize onto 2x busmem chunk */
72886ea9bb1Spooka 	if (busmem->shm_first >= lastoff) {
72986ea9bb1Spooka 		lastoff += BUSMEM_DATASIZE;
73086ea9bb1Spooka 		if (gendiff == 0)
73186ea9bb1Spooka 			devoff += BUSMEM_DATASIZE;
73286ea9bb1Spooka 	} else {
73386ea9bb1Spooka 		if (gendiff)
73486ea9bb1Spooka 			return false;
73586ea9bb1Spooka 	}
73686ea9bb1Spooka 
73786ea9bb1Spooka 	return devoff >= busmem->shm_first && devoff <= lastoff;
73886ea9bb1Spooka }
73986ea9bb1Spooka 
740a4aff0cbSpooka static void
741a4aff0cbSpooka shmif_rcv(void *arg)
742a4aff0cbSpooka {
743a4aff0cbSpooka 	struct ifnet *ifp = arg;
744a4aff0cbSpooka 	struct shmif_sc *sc = ifp->if_softc;
745d9aab6f8Spooka 	struct shmif_mem *busmem;
746a4aff0cbSpooka 	struct mbuf *m = NULL;
747a4aff0cbSpooka 	struct ether_header *eth;
74886ea9bb1Spooka 	uint32_t nextpkt;
74986a95d8eSpooka 	bool wrap, passup;
750a4aff0cbSpooka 	int error;
751f371d8fbSpooka 	const int align
752f371d8fbSpooka 	    = ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header);
753a4aff0cbSpooka 
754d9aab6f8Spooka  reup:
755d9aab6f8Spooka 	mutex_enter(&sc->sc_mtx);
756d9aab6f8Spooka 	while ((ifp->if_flags & IFF_RUNNING) == 0 && !sc->sc_dying)
757d9aab6f8Spooka 		cv_wait(&sc->sc_cv, &sc->sc_mtx);
758d9aab6f8Spooka 	mutex_exit(&sc->sc_mtx);
759d9aab6f8Spooka 
760d9aab6f8Spooka 	busmem = sc->sc_busmem;
761d9aab6f8Spooka 
762d9aab6f8Spooka 	while (ifp->if_flags & IFF_RUNNING) {
763b97bdf94Spooka 		struct shmif_pkthdr sp;
764b97bdf94Spooka 
765a4aff0cbSpooka 		if (m == NULL) {
766a4aff0cbSpooka 			m = m_gethdr(M_WAIT, MT_DATA);
767a4aff0cbSpooka 			MCLGET(m, M_WAIT);
768f371d8fbSpooka 			m->m_data += align;
769a4aff0cbSpooka 		}
770a4aff0cbSpooka 
771d95e8f23Spooka 		DPRINTF(("waiting %d/%" PRIu64 "\n",
772d95e8f23Spooka 		    sc->sc_nextpacket, sc->sc_devgen));
773a4aff0cbSpooka 		KASSERT(m->m_flags & M_EXT);
77411082373Spooka 
77511082373Spooka 		shmif_lockbus(busmem);
77611082373Spooka 		KASSERT(busmem->shm_magic == SHMIF_MAGIC);
77786ea9bb1Spooka 		KASSERT(busmem->shm_gen >= sc->sc_devgen);
778a4aff0cbSpooka 
779a4aff0cbSpooka 		/* need more data? */
78086ea9bb1Spooka 		if (sc->sc_devgen == busmem->shm_gen &&
78111082373Spooka 		    shmif_nextpktoff(busmem, busmem->shm_last)
78211082373Spooka 		     == sc->sc_nextpacket) {
78311082373Spooka 			shmif_unlockbus(busmem);
784bf6d49ecSozaki-r 			error = rumpcomp_shmif_watchwait(sc->sc_kq);
785a4aff0cbSpooka 			if (__predict_false(error))
786a4aff0cbSpooka 				printf("shmif_rcv: wait failed %d\n", error);
787d9aab6f8Spooka 			membar_consumer();
788a4aff0cbSpooka 			continue;
789a4aff0cbSpooka 		}
790a4aff0cbSpooka 
79186ea9bb1Spooka 		if (stillvalid_p(sc)) {
79286ea9bb1Spooka 			nextpkt = sc->sc_nextpacket;
79386ea9bb1Spooka 		} else {
79486ea9bb1Spooka 			KASSERT(busmem->shm_gen > 0);
79511082373Spooka 			nextpkt = busmem->shm_first;
79611082373Spooka 			if (busmem->shm_first > busmem->shm_last)
79786ea9bb1Spooka 				sc->sc_devgen = busmem->shm_gen - 1;
79811082373Spooka 			else
79986ea9bb1Spooka 				sc->sc_devgen = busmem->shm_gen;
800d95e8f23Spooka 			DPRINTF(("dev %p overrun, new data: %d/%" PRIu64 "\n",
80186ea9bb1Spooka 			    sc, nextpkt, sc->sc_devgen));
80211082373Spooka 		}
80311082373Spooka 
80411082373Spooka 		/*
80511082373Spooka 		 * If our read pointer is ahead the bus last write, our
80611082373Spooka 		 * generation must be one behind.
80711082373Spooka 		 */
80811082373Spooka 		KASSERT(!(nextpkt > busmem->shm_last
80986ea9bb1Spooka 		    && sc->sc_devgen == busmem->shm_gen));
81011082373Spooka 
811962bc2f8Spooka 		wrap = false;
81211082373Spooka 		nextpkt = shmif_busread(busmem, &sp,
81311082373Spooka 		    nextpkt, sizeof(sp), &wrap);
81411082373Spooka 		KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN);
81511082373Spooka 		nextpkt = shmif_busread(busmem, mtod(m, void *),
81611082373Spooka 		    nextpkt, sp.sp_len, &wrap);
817a4aff0cbSpooka 
818a4aff0cbSpooka 		DPRINTF(("shmif_rcv: read packet of length %d at %d\n",
819b97bdf94Spooka 		    sp.sp_len, nextpkt));
820a4aff0cbSpooka 
82111082373Spooka 		sc->sc_nextpacket = nextpkt;
822ce68b7aeSpooka 		shmif_unlockbus(sc->sc_busmem);
823a4aff0cbSpooka 
82486ea9bb1Spooka 		if (wrap) {
82511082373Spooka 			sc->sc_devgen++;
826d95e8f23Spooka 			DPRINTF(("dev %p generation now %" PRIu64 "\n",
82786ea9bb1Spooka 			    sc, sc->sc_devgen));
82886ea9bb1Spooka 		}
82911082373Spooka 
830ef255f82Spooka 		/*
831ef255f82Spooka 		 * Ignore packets too short to possibly be valid.
832ef255f82Spooka 		 * This is hit at least for the first frame on a new bus.
833ef255f82Spooka 		 */
8343dfa58adSpooka 		if (__predict_false(sp.sp_len < ETHER_HDR_LEN)) {
8353dfa58adSpooka 			DPRINTF(("shmif read packet len %d < ETHER_HDR_LEN\n",
8363dfa58adSpooka 			    sp.sp_len));
8373dfa58adSpooka 			continue;
8383dfa58adSpooka 		}
8393dfa58adSpooka 
840b97bdf94Spooka 		m->m_len = m->m_pkthdr.len = sp.sp_len;
841d938d837Sozaki-r 		m_set_rcvif(m, ifp);
842a4aff0cbSpooka 
84386a95d8eSpooka 		/*
84486a95d8eSpooka 		 * Test if we want to pass the packet upwards
84586a95d8eSpooka 		 */
846a4aff0cbSpooka 		eth = mtod(m, struct ether_header *);
8474a952ad7Sozaki-r 		/*
8484a952ad7Sozaki-r 		 * Compare with DOWN to allow UNKNOWN (the default value),
849dd766027Sriastradh 		 * which is required by some ATF tests using rump servers
850dd766027Sriastradh 		 * written in C.
8514a952ad7Sozaki-r 		 */
8524a952ad7Sozaki-r 		if (ifp->if_link_state == LINK_STATE_DOWN) {
8534a952ad7Sozaki-r 			passup = false;
8544a952ad7Sozaki-r 		} else if (sp.sp_sender == sc->sc_uid) {
8555c34a715Sozaki-r 			passup = false;
8565c34a715Sozaki-r 		} else if (memcmp(eth->ether_dhost, CLLADDR(ifp->if_sadl),
85786a95d8eSpooka 		    ETHER_ADDR_LEN) == 0) {
85886a95d8eSpooka 			passup = true;
859ccdd96e5Spooka 		} else if (ETHER_IS_MULTICAST(eth->ether_dhost)) {
86086a95d8eSpooka 			passup = true;
86186a95d8eSpooka 		} else if (ifp->if_flags & IFF_PROMISC) {
86286a95d8eSpooka 			m->m_flags |= M_PROMISC;
86386a95d8eSpooka 			passup = true;
8642750f1b5Spooka 		} else {
8652750f1b5Spooka 			passup = false;
86686a95d8eSpooka 		}
86786a95d8eSpooka 
86886a95d8eSpooka 		if (passup) {
869e1135cd9Sozaki-r 			int bound;
870f7201ab7Srin 
871f7201ab7Srin 			m = ether_sw_offload_rx(ifp, m);
872f7201ab7Srin 
8739ae53a34Spooka 			KERNEL_LOCK(1, NULL);
874220ff4e7Sozaki-r 			/* Prevent LWP migrations between CPUs for psref(9) */
875e1135cd9Sozaki-r 			bound = curlwp_bind();
8769c4cd063Sozaki-r 			if_input(ifp, m);
877e1135cd9Sozaki-r 			curlwp_bindx(bound);
8789ae53a34Spooka 			KERNEL_UNLOCK_ONE(NULL);
879f7201ab7Srin 
880a4aff0cbSpooka 			m = NULL;
881a4aff0cbSpooka 		}
88286a95d8eSpooka 		/* else: reuse mbuf for a future packet */
883a4aff0cbSpooka 	}
884d9aab6f8Spooka 	m_freem(m);
885d9aab6f8Spooka 	m = NULL;
886a4aff0cbSpooka 
887d9aab6f8Spooka 	if (!sc->sc_dying)
888d9aab6f8Spooka 		goto reup;
889d9aab6f8Spooka 
890d9aab6f8Spooka 	kthread_exit(0);
891a4aff0cbSpooka }
892