xref: /netbsd-src/sys/rump/net/lib/libshmif/if_shmem.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: if_shmem.c,v 1.28 2010/08/17 20:42:47 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by The Nokia Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.28 2010/08/17 20:42:47 pooka Exp $");
32 
33 #include <sys/param.h>
34 #include <sys/atomic.h>
35 #include <sys/fcntl.h>
36 #include <sys/kmem.h>
37 #include <sys/kthread.h>
38 #include <sys/lock.h>
39 #include <sys/atomic.h>
40 
41 #include <net/if.h>
42 #include <net/if_ether.h>
43 
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 
47 #include <rump/rump.h>
48 #include <rump/rumpuser.h>
49 
50 #include "rump_private.h"
51 #include "rump_net_private.h"
52 
53 /*
54  * Do r/w prefault for backend pages when attaching the interface.
55  * This works aroud the most likely kernel/ffs/x86pmap bug described
56  * in http://mail-index.netbsd.org/tech-kern/2010/08/17/msg008749.html
57  *
58  * NOTE: read prefaulting is not enough (that's done always)!
59  */
60 
61 #define PREFAULT_RW
62 
63 /*
64  * A virtual ethernet interface which uses shared memory from a
65  * memory mapped file as the bus.
66  */
67 
68 static int	shmif_init(struct ifnet *);
69 static int	shmif_ioctl(struct ifnet *, u_long, void *);
70 static void	shmif_start(struct ifnet *);
71 static void	shmif_stop(struct ifnet *, int);
72 
73 #include "shmifvar.h"
74 
75 struct shmif_sc {
76 	struct ethercom sc_ec;
77 	uint8_t sc_myaddr[6];
78 	struct shmif_mem *sc_busmem;
79 	int sc_memfd;
80 	int sc_kq;
81 
82 	uint64_t sc_devgen;
83 	uint32_t sc_nextpacket;
84 };
85 
86 static const uint32_t busversion = SHMIF_VERSION;
87 
88 static void shmif_rcv(void *);
89 
90 static uint32_t numif;
91 
92 #define LOCK_UNLOCKED	0
93 #define LOCK_LOCKED	1
94 #define LOCK_COOLDOWN	1001
95 
96 /*
97  * This locking needs work and will misbehave severely if:
98  * 1) the backing memory has to be paged in
99  * 2) some lockholder exits while holding the lock
100  */
101 static void
102 shmif_lockbus(struct shmif_mem *busmem)
103 {
104 	int i = 0;
105 
106 	while (__predict_false(atomic_cas_32(&busmem->shm_lock,
107 	    LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) {
108 		if (__predict_false(++i > LOCK_COOLDOWN)) {
109 			uint64_t sec, nsec;
110 			int error;
111 
112 			sec = 0;
113 			nsec = 1000*1000; /* 1ms */
114 			rumpuser_nanosleep(&sec, &nsec, &error);
115 			i = 0;
116 		}
117 		continue;
118 	}
119 	membar_enter();
120 }
121 
122 static void
123 shmif_unlockbus(struct shmif_mem *busmem)
124 {
125 	unsigned int old;
126 
127 	membar_exit();
128 	old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED);
129 	KASSERT(old == LOCK_LOCKED);
130 }
131 
132 int
133 rump_shmif_create(const char *path, int *ifnum)
134 {
135 	struct shmif_sc *sc;
136 	struct ifnet *ifp;
137 	uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 };
138 	uint32_t randnum;
139 	unsigned mynum;
140 	volatile uint8_t v;
141 	volatile uint8_t *p;
142 	int error;
143 
144 	randnum = arc4random();
145 	memcpy(&enaddr[2], &randnum, sizeof(randnum));
146 	mynum = atomic_inc_uint_nv(&numif)-1;
147 
148 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
149 	ifp = &sc->sc_ec.ec_if;
150 	memcpy(sc->sc_myaddr, enaddr, sizeof(enaddr));
151 
152 	sc->sc_memfd = rumpuser_open(path, O_RDWR | O_CREAT, &error);
153 	if (sc->sc_memfd == -1)
154 		goto fail;
155 	sc->sc_busmem = rumpuser_filemmap(sc->sc_memfd, 0, BUSMEM_SIZE,
156 	    RUMPUSER_FILEMMAP_TRUNCATE | RUMPUSER_FILEMMAP_SHARED
157 	    | RUMPUSER_FILEMMAP_READ | RUMPUSER_FILEMMAP_WRITE, &error);
158 	if (error)
159 		goto fail;
160 
161 	if (sc->sc_busmem->shm_magic && sc->sc_busmem->shm_magic != SHMIF_MAGIC)
162 		panic("bus is not magical");
163 
164 
165 	/* Prefault in pages to minimize runtime penalty with buslock */
166 	for (p = (uint8_t *)sc->sc_busmem;
167 	    p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE;
168 	    p += PAGE_SIZE)
169 		v = *p;
170 
171 	shmif_lockbus(sc->sc_busmem);
172 	/* we're first?  initialize bus */
173 	if (sc->sc_busmem->shm_magic == 0) {
174 		sc->sc_busmem->shm_magic = SHMIF_MAGIC;
175 		sc->sc_busmem->shm_first = BUSMEM_DATASIZE;
176 	}
177 
178 	sc->sc_nextpacket = sc->sc_busmem->shm_last;
179 	sc->sc_devgen = sc->sc_busmem->shm_gen;
180 
181 #ifdef PREFAULT_RW
182 	for (p = (uint8_t *)sc->sc_busmem;
183 	    p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE;
184 	    p += PAGE_SIZE) {
185 		v = *p;
186 		*p = v;
187 	}
188 #endif
189 	shmif_unlockbus(sc->sc_busmem);
190 
191 	sc->sc_kq = rumpuser_writewatchfile_setup(-1, sc->sc_memfd, 0, &error);
192 	if (sc->sc_kq == -1)
193 		goto fail;
194 
195 	sprintf(ifp->if_xname, "shmif%d", mynum);
196 	ifp->if_softc = sc;
197 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
198 	ifp->if_init = shmif_init;
199 	ifp->if_ioctl = shmif_ioctl;
200 	ifp->if_start = shmif_start;
201 	ifp->if_stop = shmif_stop;
202 	ifp->if_mtu = ETHERMTU;
203 
204 	if_attach(ifp);
205 	ether_ifattach(ifp, enaddr);
206 
207 	aprint_verbose("shmif%d: bus %s\n", mynum, path);
208 	aprint_verbose("shmif%d: Ethernet address %s\n",
209 	    mynum, ether_sprintf(enaddr));
210 
211 	if (ifnum)
212 		*ifnum = mynum;
213 	return 0;
214 
215  fail:
216 	panic("rump_shmemif_create: fixme");
217 }
218 
219 static int
220 shmif_init(struct ifnet *ifp)
221 {
222 	int error = 0;
223 
224 	if (rump_threads) {
225 		error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
226 		    shmif_rcv, ifp, NULL, "shmif");
227 	} else {
228 		printf("WARNING: threads not enabled, shmif NOT working\n");
229 	}
230 
231 	ifp->if_flags |= IFF_RUNNING;
232 	return error;
233 }
234 
235 static int
236 shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
237 {
238 	int s, rv;
239 
240 	s = splnet();
241 	rv = ether_ioctl(ifp, cmd, data);
242 	if (rv == ENETRESET)
243 		rv = 0;
244 	splx(s);
245 
246 	return rv;
247 }
248 
249 /* send everything in-context */
250 static void
251 shmif_start(struct ifnet *ifp)
252 {
253 	struct shmif_sc *sc = ifp->if_softc;
254 	struct shmif_mem *busmem = sc->sc_busmem;
255 	struct mbuf *m, *m0;
256 	uint32_t dataoff;
257 	uint32_t pktsize, pktwrote;
258 	bool wrote = false;
259 	bool wrap;
260 	int error;
261 
262 	ifp->if_flags |= IFF_OACTIVE;
263 
264 	for (;;) {
265 		struct shmif_pkthdr sp;
266 		struct timeval tv;
267 
268 		IF_DEQUEUE(&ifp->if_snd, m0);
269 		if (m0 == NULL) {
270 			break;
271 		}
272 
273 		pktsize = 0;
274 		for (m = m0; m != NULL; m = m->m_next) {
275 			pktsize += m->m_len;
276 		}
277 		KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN);
278 
279 		getmicrouptime(&tv);
280 		sp.sp_len = pktsize;
281 		sp.sp_sec = tv.tv_sec;
282 		sp.sp_usec = tv.tv_usec;
283 
284 		shmif_lockbus(busmem);
285 		KASSERT(busmem->shm_magic == SHMIF_MAGIC);
286 		busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last);
287 
288 		wrap = false;
289 		dataoff = shmif_buswrite(busmem,
290 		    busmem->shm_last, &sp, sizeof(sp), &wrap);
291 		pktwrote = 0;
292 		for (m = m0; m != NULL; m = m->m_next) {
293 			pktwrote += m->m_len;
294 			dataoff = shmif_buswrite(busmem, dataoff,
295 			    mtod(m, void *), m->m_len, &wrap);
296 		}
297 		KASSERT(pktwrote == pktsize);
298 		if (wrap) {
299 			busmem->shm_gen++;
300 			DPRINTF(("bus generation now %d\n", busmem->shm_gen));
301 		}
302 		shmif_unlockbus(busmem);
303 
304 		m_freem(m0);
305 		wrote = true;
306 
307 		DPRINTF(("shmif_start: send %d bytes at off %d\n",
308 		    pktsize, busmem->shm_last));
309 	}
310 
311 	ifp->if_flags &= ~IFF_OACTIVE;
312 
313 	/* wakeup */
314 	if (wrote)
315 		rumpuser_pwrite(sc->sc_memfd,
316 		    &busversion, sizeof(busversion), IFMEM_WAKEUP, &error);
317 }
318 
319 static void
320 shmif_stop(struct ifnet *ifp, int disable)
321 {
322 
323 	panic("%s: unimpl", __func__);
324 }
325 
326 
327 /*
328  * Check if we have been sleeping too long.  Basically,
329  * our in-sc nextpkt must by first <= nextpkt <= last"+1".
330  * We use the fact that first is guaranteed to never overlap
331  * with the last frame in the ring.
332  */
333 static __inline bool
334 stillvalid_p(struct shmif_sc *sc)
335 {
336 	struct shmif_mem *busmem = sc->sc_busmem;
337 	unsigned gendiff = busmem->shm_gen - sc->sc_devgen;
338 	uint32_t lastoff, devoff;
339 
340 	KASSERT(busmem->shm_first != busmem->shm_last);
341 
342 	/* normalize onto a 2x busmem chunk */
343 	devoff = sc->sc_nextpacket;
344 	lastoff = shmif_nextpktoff(busmem, busmem->shm_last);
345 
346 	/* trivial case */
347 	if (gendiff > 1)
348 		return false;
349 	KASSERT(gendiff <= 1);
350 
351 	/* Normalize onto 2x busmem chunk */
352 	if (busmem->shm_first >= lastoff) {
353 		lastoff += BUSMEM_DATASIZE;
354 		if (gendiff == 0)
355 			devoff += BUSMEM_DATASIZE;
356 	} else {
357 		if (gendiff)
358 			return false;
359 	}
360 
361 	return devoff >= busmem->shm_first && devoff <= lastoff;
362 }
363 
364 static void
365 shmif_rcv(void *arg)
366 {
367 	struct ifnet *ifp = arg;
368 	struct shmif_sc *sc = ifp->if_softc;
369 	struct shmif_mem *busmem = sc->sc_busmem;
370 	struct mbuf *m = NULL;
371 	struct ether_header *eth;
372 	uint32_t nextpkt;
373 	bool wrap;
374 	int error;
375 
376 	for (;;) {
377 		struct shmif_pkthdr sp;
378 
379 		if (m == NULL) {
380 			m = m_gethdr(M_WAIT, MT_DATA);
381 			MCLGET(m, M_WAIT);
382 		}
383 
384 		DPRINTF(("waiting %d/%d\n", sc->sc_nextpacket, sc->sc_devgen));
385 		KASSERT(m->m_flags & M_EXT);
386 
387 		shmif_lockbus(busmem);
388 		KASSERT(busmem->shm_magic == SHMIF_MAGIC);
389 		KASSERT(busmem->shm_gen >= sc->sc_devgen);
390 
391 		/* need more data? */
392 		if (sc->sc_devgen == busmem->shm_gen &&
393 		    shmif_nextpktoff(busmem, busmem->shm_last)
394 		     == sc->sc_nextpacket) {
395 			shmif_unlockbus(busmem);
396 			error = 0;
397 			rumpuser_writewatchfile_wait(sc->sc_kq, NULL, &error);
398 			if (__predict_false(error))
399 				printf("shmif_rcv: wait failed %d\n", error);
400 			continue;
401 		}
402 
403 		if (stillvalid_p(sc)) {
404 			nextpkt = sc->sc_nextpacket;
405 		} else {
406 			KASSERT(busmem->shm_gen > 0);
407 			nextpkt = busmem->shm_first;
408 			if (busmem->shm_first > busmem->shm_last)
409 				sc->sc_devgen = busmem->shm_gen - 1;
410 			else
411 				sc->sc_devgen = busmem->shm_gen;
412 			DPRINTF(("dev %p overrun, new data: %d/%d\n",
413 			    sc, nextpkt, sc->sc_devgen));
414 		}
415 
416 		/*
417 		 * If our read pointer is ahead the bus last write, our
418 		 * generation must be one behind.
419 		 */
420 		KASSERT(!(nextpkt > busmem->shm_last
421 		    && sc->sc_devgen == busmem->shm_gen));
422 
423 		wrap = false;
424 		nextpkt = shmif_busread(busmem, &sp,
425 		    nextpkt, sizeof(sp), &wrap);
426 		KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN);
427 		nextpkt = shmif_busread(busmem, mtod(m, void *),
428 		    nextpkt, sp.sp_len, &wrap);
429 
430 		DPRINTF(("shmif_rcv: read packet of length %d at %d\n",
431 		    sp.sp_len, nextpkt));
432 
433 		sc->sc_nextpacket = nextpkt;
434 		shmif_unlockbus(sc->sc_busmem);
435 
436 		if (wrap) {
437 			sc->sc_devgen++;
438 			DPRINTF(("dev %p generation now %d\n",
439 			    sc, sc->sc_devgen));
440 		}
441 
442 		m->m_len = m->m_pkthdr.len = sp.sp_len;
443 		m->m_pkthdr.rcvif = ifp;
444 
445 		/* if it's from us, don't pass up and reuse storage space */
446 		eth = mtod(m, struct ether_header *);
447 		if (memcmp(eth->ether_shost, sc->sc_myaddr, 6) != 0) {
448 			KERNEL_LOCK(1, NULL);
449 			ifp->if_input(ifp, m);
450 			KERNEL_UNLOCK_ONE(NULL);
451 			m = NULL;
452 		}
453 	}
454 
455 	panic("shmif_worker is a lazy boy %d\n", error);
456 }
457