1 /* $NetBSD: if_shmem.c,v 1.28 2010/08/17 20:42:47 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by The Nokia Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.28 2010/08/17 20:42:47 pooka Exp $"); 32 33 #include <sys/param.h> 34 #include <sys/atomic.h> 35 #include <sys/fcntl.h> 36 #include <sys/kmem.h> 37 #include <sys/kthread.h> 38 #include <sys/lock.h> 39 #include <sys/atomic.h> 40 41 #include <net/if.h> 42 #include <net/if_ether.h> 43 44 #include <netinet/in.h> 45 #include <netinet/in_var.h> 46 47 #include <rump/rump.h> 48 #include <rump/rumpuser.h> 49 50 #include "rump_private.h" 51 #include "rump_net_private.h" 52 53 /* 54 * Do r/w prefault for backend pages when attaching the interface. 55 * This works aroud the most likely kernel/ffs/x86pmap bug described 56 * in http://mail-index.netbsd.org/tech-kern/2010/08/17/msg008749.html 57 * 58 * NOTE: read prefaulting is not enough (that's done always)! 59 */ 60 61 #define PREFAULT_RW 62 63 /* 64 * A virtual ethernet interface which uses shared memory from a 65 * memory mapped file as the bus. 66 */ 67 68 static int shmif_init(struct ifnet *); 69 static int shmif_ioctl(struct ifnet *, u_long, void *); 70 static void shmif_start(struct ifnet *); 71 static void shmif_stop(struct ifnet *, int); 72 73 #include "shmifvar.h" 74 75 struct shmif_sc { 76 struct ethercom sc_ec; 77 uint8_t sc_myaddr[6]; 78 struct shmif_mem *sc_busmem; 79 int sc_memfd; 80 int sc_kq; 81 82 uint64_t sc_devgen; 83 uint32_t sc_nextpacket; 84 }; 85 86 static const uint32_t busversion = SHMIF_VERSION; 87 88 static void shmif_rcv(void *); 89 90 static uint32_t numif; 91 92 #define LOCK_UNLOCKED 0 93 #define LOCK_LOCKED 1 94 #define LOCK_COOLDOWN 1001 95 96 /* 97 * This locking needs work and will misbehave severely if: 98 * 1) the backing memory has to be paged in 99 * 2) some lockholder exits while holding the lock 100 */ 101 static void 102 shmif_lockbus(struct shmif_mem *busmem) 103 { 104 int i = 0; 105 106 while (__predict_false(atomic_cas_32(&busmem->shm_lock, 107 LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) { 108 if (__predict_false(++i > LOCK_COOLDOWN)) { 109 uint64_t sec, nsec; 110 int error; 111 112 sec = 0; 113 nsec = 1000*1000; /* 1ms */ 114 rumpuser_nanosleep(&sec, &nsec, &error); 115 i = 0; 116 } 117 continue; 118 } 119 membar_enter(); 120 } 121 122 static void 123 shmif_unlockbus(struct shmif_mem *busmem) 124 { 125 unsigned int old; 126 127 membar_exit(); 128 old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED); 129 KASSERT(old == LOCK_LOCKED); 130 } 131 132 int 133 rump_shmif_create(const char *path, int *ifnum) 134 { 135 struct shmif_sc *sc; 136 struct ifnet *ifp; 137 uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 }; 138 uint32_t randnum; 139 unsigned mynum; 140 volatile uint8_t v; 141 volatile uint8_t *p; 142 int error; 143 144 randnum = arc4random(); 145 memcpy(&enaddr[2], &randnum, sizeof(randnum)); 146 mynum = atomic_inc_uint_nv(&numif)-1; 147 148 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 149 ifp = &sc->sc_ec.ec_if; 150 memcpy(sc->sc_myaddr, enaddr, sizeof(enaddr)); 151 152 sc->sc_memfd = rumpuser_open(path, O_RDWR | O_CREAT, &error); 153 if (sc->sc_memfd == -1) 154 goto fail; 155 sc->sc_busmem = rumpuser_filemmap(sc->sc_memfd, 0, BUSMEM_SIZE, 156 RUMPUSER_FILEMMAP_TRUNCATE | RUMPUSER_FILEMMAP_SHARED 157 | RUMPUSER_FILEMMAP_READ | RUMPUSER_FILEMMAP_WRITE, &error); 158 if (error) 159 goto fail; 160 161 if (sc->sc_busmem->shm_magic && sc->sc_busmem->shm_magic != SHMIF_MAGIC) 162 panic("bus is not magical"); 163 164 165 /* Prefault in pages to minimize runtime penalty with buslock */ 166 for (p = (uint8_t *)sc->sc_busmem; 167 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE; 168 p += PAGE_SIZE) 169 v = *p; 170 171 shmif_lockbus(sc->sc_busmem); 172 /* we're first? initialize bus */ 173 if (sc->sc_busmem->shm_magic == 0) { 174 sc->sc_busmem->shm_magic = SHMIF_MAGIC; 175 sc->sc_busmem->shm_first = BUSMEM_DATASIZE; 176 } 177 178 sc->sc_nextpacket = sc->sc_busmem->shm_last; 179 sc->sc_devgen = sc->sc_busmem->shm_gen; 180 181 #ifdef PREFAULT_RW 182 for (p = (uint8_t *)sc->sc_busmem; 183 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE; 184 p += PAGE_SIZE) { 185 v = *p; 186 *p = v; 187 } 188 #endif 189 shmif_unlockbus(sc->sc_busmem); 190 191 sc->sc_kq = rumpuser_writewatchfile_setup(-1, sc->sc_memfd, 0, &error); 192 if (sc->sc_kq == -1) 193 goto fail; 194 195 sprintf(ifp->if_xname, "shmif%d", mynum); 196 ifp->if_softc = sc; 197 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST; 198 ifp->if_init = shmif_init; 199 ifp->if_ioctl = shmif_ioctl; 200 ifp->if_start = shmif_start; 201 ifp->if_stop = shmif_stop; 202 ifp->if_mtu = ETHERMTU; 203 204 if_attach(ifp); 205 ether_ifattach(ifp, enaddr); 206 207 aprint_verbose("shmif%d: bus %s\n", mynum, path); 208 aprint_verbose("shmif%d: Ethernet address %s\n", 209 mynum, ether_sprintf(enaddr)); 210 211 if (ifnum) 212 *ifnum = mynum; 213 return 0; 214 215 fail: 216 panic("rump_shmemif_create: fixme"); 217 } 218 219 static int 220 shmif_init(struct ifnet *ifp) 221 { 222 int error = 0; 223 224 if (rump_threads) { 225 error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, 226 shmif_rcv, ifp, NULL, "shmif"); 227 } else { 228 printf("WARNING: threads not enabled, shmif NOT working\n"); 229 } 230 231 ifp->if_flags |= IFF_RUNNING; 232 return error; 233 } 234 235 static int 236 shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data) 237 { 238 int s, rv; 239 240 s = splnet(); 241 rv = ether_ioctl(ifp, cmd, data); 242 if (rv == ENETRESET) 243 rv = 0; 244 splx(s); 245 246 return rv; 247 } 248 249 /* send everything in-context */ 250 static void 251 shmif_start(struct ifnet *ifp) 252 { 253 struct shmif_sc *sc = ifp->if_softc; 254 struct shmif_mem *busmem = sc->sc_busmem; 255 struct mbuf *m, *m0; 256 uint32_t dataoff; 257 uint32_t pktsize, pktwrote; 258 bool wrote = false; 259 bool wrap; 260 int error; 261 262 ifp->if_flags |= IFF_OACTIVE; 263 264 for (;;) { 265 struct shmif_pkthdr sp; 266 struct timeval tv; 267 268 IF_DEQUEUE(&ifp->if_snd, m0); 269 if (m0 == NULL) { 270 break; 271 } 272 273 pktsize = 0; 274 for (m = m0; m != NULL; m = m->m_next) { 275 pktsize += m->m_len; 276 } 277 KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN); 278 279 getmicrouptime(&tv); 280 sp.sp_len = pktsize; 281 sp.sp_sec = tv.tv_sec; 282 sp.sp_usec = tv.tv_usec; 283 284 shmif_lockbus(busmem); 285 KASSERT(busmem->shm_magic == SHMIF_MAGIC); 286 busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last); 287 288 wrap = false; 289 dataoff = shmif_buswrite(busmem, 290 busmem->shm_last, &sp, sizeof(sp), &wrap); 291 pktwrote = 0; 292 for (m = m0; m != NULL; m = m->m_next) { 293 pktwrote += m->m_len; 294 dataoff = shmif_buswrite(busmem, dataoff, 295 mtod(m, void *), m->m_len, &wrap); 296 } 297 KASSERT(pktwrote == pktsize); 298 if (wrap) { 299 busmem->shm_gen++; 300 DPRINTF(("bus generation now %d\n", busmem->shm_gen)); 301 } 302 shmif_unlockbus(busmem); 303 304 m_freem(m0); 305 wrote = true; 306 307 DPRINTF(("shmif_start: send %d bytes at off %d\n", 308 pktsize, busmem->shm_last)); 309 } 310 311 ifp->if_flags &= ~IFF_OACTIVE; 312 313 /* wakeup */ 314 if (wrote) 315 rumpuser_pwrite(sc->sc_memfd, 316 &busversion, sizeof(busversion), IFMEM_WAKEUP, &error); 317 } 318 319 static void 320 shmif_stop(struct ifnet *ifp, int disable) 321 { 322 323 panic("%s: unimpl", __func__); 324 } 325 326 327 /* 328 * Check if we have been sleeping too long. Basically, 329 * our in-sc nextpkt must by first <= nextpkt <= last"+1". 330 * We use the fact that first is guaranteed to never overlap 331 * with the last frame in the ring. 332 */ 333 static __inline bool 334 stillvalid_p(struct shmif_sc *sc) 335 { 336 struct shmif_mem *busmem = sc->sc_busmem; 337 unsigned gendiff = busmem->shm_gen - sc->sc_devgen; 338 uint32_t lastoff, devoff; 339 340 KASSERT(busmem->shm_first != busmem->shm_last); 341 342 /* normalize onto a 2x busmem chunk */ 343 devoff = sc->sc_nextpacket; 344 lastoff = shmif_nextpktoff(busmem, busmem->shm_last); 345 346 /* trivial case */ 347 if (gendiff > 1) 348 return false; 349 KASSERT(gendiff <= 1); 350 351 /* Normalize onto 2x busmem chunk */ 352 if (busmem->shm_first >= lastoff) { 353 lastoff += BUSMEM_DATASIZE; 354 if (gendiff == 0) 355 devoff += BUSMEM_DATASIZE; 356 } else { 357 if (gendiff) 358 return false; 359 } 360 361 return devoff >= busmem->shm_first && devoff <= lastoff; 362 } 363 364 static void 365 shmif_rcv(void *arg) 366 { 367 struct ifnet *ifp = arg; 368 struct shmif_sc *sc = ifp->if_softc; 369 struct shmif_mem *busmem = sc->sc_busmem; 370 struct mbuf *m = NULL; 371 struct ether_header *eth; 372 uint32_t nextpkt; 373 bool wrap; 374 int error; 375 376 for (;;) { 377 struct shmif_pkthdr sp; 378 379 if (m == NULL) { 380 m = m_gethdr(M_WAIT, MT_DATA); 381 MCLGET(m, M_WAIT); 382 } 383 384 DPRINTF(("waiting %d/%d\n", sc->sc_nextpacket, sc->sc_devgen)); 385 KASSERT(m->m_flags & M_EXT); 386 387 shmif_lockbus(busmem); 388 KASSERT(busmem->shm_magic == SHMIF_MAGIC); 389 KASSERT(busmem->shm_gen >= sc->sc_devgen); 390 391 /* need more data? */ 392 if (sc->sc_devgen == busmem->shm_gen && 393 shmif_nextpktoff(busmem, busmem->shm_last) 394 == sc->sc_nextpacket) { 395 shmif_unlockbus(busmem); 396 error = 0; 397 rumpuser_writewatchfile_wait(sc->sc_kq, NULL, &error); 398 if (__predict_false(error)) 399 printf("shmif_rcv: wait failed %d\n", error); 400 continue; 401 } 402 403 if (stillvalid_p(sc)) { 404 nextpkt = sc->sc_nextpacket; 405 } else { 406 KASSERT(busmem->shm_gen > 0); 407 nextpkt = busmem->shm_first; 408 if (busmem->shm_first > busmem->shm_last) 409 sc->sc_devgen = busmem->shm_gen - 1; 410 else 411 sc->sc_devgen = busmem->shm_gen; 412 DPRINTF(("dev %p overrun, new data: %d/%d\n", 413 sc, nextpkt, sc->sc_devgen)); 414 } 415 416 /* 417 * If our read pointer is ahead the bus last write, our 418 * generation must be one behind. 419 */ 420 KASSERT(!(nextpkt > busmem->shm_last 421 && sc->sc_devgen == busmem->shm_gen)); 422 423 wrap = false; 424 nextpkt = shmif_busread(busmem, &sp, 425 nextpkt, sizeof(sp), &wrap); 426 KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN); 427 nextpkt = shmif_busread(busmem, mtod(m, void *), 428 nextpkt, sp.sp_len, &wrap); 429 430 DPRINTF(("shmif_rcv: read packet of length %d at %d\n", 431 sp.sp_len, nextpkt)); 432 433 sc->sc_nextpacket = nextpkt; 434 shmif_unlockbus(sc->sc_busmem); 435 436 if (wrap) { 437 sc->sc_devgen++; 438 DPRINTF(("dev %p generation now %d\n", 439 sc, sc->sc_devgen)); 440 } 441 442 m->m_len = m->m_pkthdr.len = sp.sp_len; 443 m->m_pkthdr.rcvif = ifp; 444 445 /* if it's from us, don't pass up and reuse storage space */ 446 eth = mtod(m, struct ether_header *); 447 if (memcmp(eth->ether_shost, sc->sc_myaddr, 6) != 0) { 448 KERNEL_LOCK(1, NULL); 449 ifp->if_input(ifp, m); 450 KERNEL_UNLOCK_ONE(NULL); 451 m = NULL; 452 } 453 } 454 455 panic("shmif_worker is a lazy boy %d\n", error); 456 } 457