1 /* $NetBSD: if_shmem.c,v 1.81 2020/02/25 03:26:18 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (c) 2009, 2010 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by The Nokia Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.81 2020/02/25 03:26:18 ozaki-r Exp $"); 32 33 #include <sys/param.h> 34 #include <sys/atomic.h> 35 #include <sys/fcntl.h> 36 #include <sys/kmem.h> 37 #include <sys/kthread.h> 38 #include <sys/lock.h> 39 #include <sys/vmem.h> 40 #include <sys/cprng.h> 41 42 #include <net/bpf.h> 43 #include <net/if.h> 44 #include <net/if_dl.h> 45 #include <net/if_ether.h> 46 #include <net/ether_sw_offload.h> 47 48 #include <netinet/in.h> 49 #include <netinet/in_var.h> 50 51 #include <rump-sys/kern.h> 52 #include <rump-sys/net.h> 53 54 #include <rump/rump.h> 55 #include <rump/rumpuser.h> 56 57 #include "shmif_user.h" 58 59 static int shmif_clone(struct if_clone *, int); 60 static int shmif_unclone(struct ifnet *); 61 62 struct if_clone shmif_cloner = 63 IF_CLONE_INITIALIZER("shmif", shmif_clone, shmif_unclone); 64 65 /* 66 * Do r/w prefault for backend pages when attaching the interface. 67 * At least logically thinking improves performance (although no 68 * mlocking is done, so they might go away). 69 */ 70 #define PREFAULT_RW 71 72 /* 73 * A virtual ethernet interface which uses shared memory from a 74 * memory mapped file as the bus. 75 */ 76 77 static int shmif_init(struct ifnet *); 78 static int shmif_ioctl(struct ifnet *, u_long, void *); 79 static void shmif_start(struct ifnet *); 80 static void shmif_snd(struct ifnet *, struct mbuf *); 81 static void shmif_stop(struct ifnet *, int); 82 83 #include "shmifvar.h" 84 85 struct shmif_sc { 86 struct ethercom sc_ec; 87 struct shmif_mem *sc_busmem; 88 int sc_memfd; 89 int sc_kq; 90 int sc_unit; 91 92 char *sc_backfile; 93 size_t sc_backfilelen; 94 95 uint64_t sc_devgen; 96 uint32_t sc_nextpacket; 97 98 kmutex_t sc_mtx; 99 kcondvar_t sc_cv; 100 101 struct lwp *sc_rcvl; 102 bool sc_dying; 103 104 uint64_t sc_uid; 105 }; 106 107 static void shmif_rcv(void *); 108 109 #define LOCK_UNLOCKED 0 110 #define LOCK_LOCKED 1 111 #define LOCK_COOLDOWN 1001 112 113 vmem_t *shmif_units; 114 115 static void 116 dowakeup(struct shmif_sc *sc) 117 { 118 struct rumpuser_iovec iov; 119 uint32_t ver = SHMIF_VERSION; 120 size_t n; 121 122 iov.iov_base = &ver; 123 iov.iov_len = sizeof(ver); 124 rumpuser_iovwrite(sc->sc_memfd, &iov, 1, IFMEM_WAKEUP, &n); 125 } 126 127 /* 128 * This locking needs work and will misbehave severely if: 129 * 1) the backing memory has to be paged in 130 * 2) some lockholder exits while holding the lock 131 */ 132 static void 133 shmif_lockbus(struct shmif_mem *busmem) 134 { 135 int i = 0; 136 137 while (__predict_false(atomic_cas_32(&busmem->shm_lock, 138 LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) { 139 if (__predict_false(++i > LOCK_COOLDOWN)) { 140 /* wait 1ms */ 141 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 142 0, 1000*1000); 143 i = 0; 144 } 145 continue; 146 } 147 membar_enter(); 148 } 149 150 static void 151 shmif_unlockbus(struct shmif_mem *busmem) 152 { 153 unsigned int old __diagused; 154 155 membar_exit(); 156 old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED); 157 KASSERT(old == LOCK_LOCKED); 158 } 159 160 static int 161 allocif(int unit, struct shmif_sc **scp) 162 { 163 uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 }; 164 struct shmif_sc *sc; 165 struct ifnet *ifp; 166 uint64_t randnum; 167 int error; 168 169 randnum = cprng_strong64(); 170 memcpy(&enaddr[2], &randnum, 4); 171 172 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 173 sc->sc_memfd = -1; 174 sc->sc_unit = unit; 175 sc->sc_uid = randnum; 176 177 ifp = &sc->sc_ec.ec_if; 178 179 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "shmif%d", unit); 180 ifp->if_softc = sc; 181 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 182 ifp->if_init = shmif_init; 183 ifp->if_ioctl = shmif_ioctl; 184 ifp->if_start = shmif_start; 185 ifp->if_stop = shmif_stop; 186 ifp->if_mtu = ETHERMTU; 187 ifp->if_dlt = DLT_EN10MB; 188 ifp->if_capabilities = IFCAP_TSOv4 | IFCAP_TSOv6 | 189 IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx | 190 IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx | 191 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | 192 IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx | 193 IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx; 194 195 mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE); 196 cv_init(&sc->sc_cv, "shmifcv"); 197 198 error = if_initialize(ifp); 199 if (error != 0) { 200 aprint_error("shmif%d: if_initialize failed(%d)\n", unit, 201 error); 202 cv_destroy(&sc->sc_cv); 203 mutex_destroy(&sc->sc_mtx); 204 kmem_free(sc, sizeof(*sc)); 205 206 return error; 207 } 208 #if 1 209 char buf[256]; 210 211 if (rumpuser_getparam("RUMP_SHMIF_CAPENABLE", buf, sizeof(buf)) == 0) { 212 uint64_t capen = strtoul(buf, NULL, 0); 213 214 ifp->if_capenable = capen & ifp->if_capabilities; 215 } 216 #endif 217 218 ether_ifattach(ifp, enaddr); 219 if_register(ifp); 220 221 aprint_verbose("shmif%d: Ethernet address %s\n", 222 unit, ether_sprintf(enaddr)); 223 224 if (scp) 225 *scp = sc; 226 227 error = 0; 228 if (rump_threads) { 229 error = kthread_create(PRI_NONE, 230 KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, NULL, 231 shmif_rcv, ifp, &sc->sc_rcvl, "shmif"); 232 } else { 233 printf("WARNING: threads not enabled, shmif NOT working\n"); 234 } 235 236 if (error) { 237 shmif_unclone(ifp); 238 } 239 240 return error; 241 } 242 243 static int 244 initbackend(struct shmif_sc *sc, int memfd) 245 { 246 volatile uint8_t v; 247 volatile uint8_t *p; 248 void *mem; 249 int error; 250 251 error = rumpcomp_shmif_mmap(memfd, BUSMEM_SIZE, &mem); 252 if (error) 253 return error; 254 sc->sc_busmem = mem; 255 256 if (sc->sc_busmem->shm_magic 257 && sc->sc_busmem->shm_magic != SHMIF_MAGIC) { 258 printf("bus is not magical"); 259 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 260 return ENOEXEC; 261 } 262 263 /* 264 * Prefault in pages to minimize runtime penalty with buslock. 265 * Use 512 instead of PAGE_SIZE to make sure we catch cases where 266 * rump kernel PAGE_SIZE > host page size. 267 */ 268 for (p = (uint8_t *)sc->sc_busmem; 269 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE; 270 p += 512) 271 v = *p; 272 273 shmif_lockbus(sc->sc_busmem); 274 /* we're first? initialize bus */ 275 if (sc->sc_busmem->shm_magic == 0) { 276 sc->sc_busmem->shm_magic = SHMIF_MAGIC; 277 sc->sc_busmem->shm_first = BUSMEM_DATASIZE; 278 } 279 280 sc->sc_nextpacket = sc->sc_busmem->shm_last; 281 sc->sc_devgen = sc->sc_busmem->shm_gen; 282 283 #ifdef PREFAULT_RW 284 for (p = (uint8_t *)sc->sc_busmem; 285 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE; 286 p += PAGE_SIZE) { 287 v = *p; 288 *p = v; 289 } 290 #endif 291 shmif_unlockbus(sc->sc_busmem); 292 293 sc->sc_kq = -1; 294 error = rumpcomp_shmif_watchsetup(&sc->sc_kq, memfd); 295 if (error) { 296 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 297 return error; 298 } 299 300 sc->sc_memfd = memfd; 301 302 return error; 303 } 304 305 static void 306 finibackend(struct shmif_sc *sc) 307 { 308 309 if (sc->sc_backfile == NULL) 310 return; 311 312 if (sc->sc_backfile) { 313 kmem_free(sc->sc_backfile, sc->sc_backfilelen); 314 sc->sc_backfile = NULL; 315 sc->sc_backfilelen = 0; 316 } 317 318 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 319 rumpuser_close(sc->sc_memfd); 320 rumpuser_close(sc->sc_kq); 321 322 sc->sc_memfd = -1; 323 } 324 325 int 326 rump_shmif_create(const char *path, int *ifnum) 327 { 328 struct shmif_sc *sc; 329 vmem_addr_t t; 330 int unit, error; 331 int memfd = -1; /* XXXgcc */ 332 333 if (path) { 334 error = rumpuser_open(path, 335 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd); 336 if (error) 337 return error; 338 } 339 340 error = vmem_xalloc(shmif_units, 1, 0, 0, 0, 341 VMEM_ADDR_MIN, VMEM_ADDR_MAX, VM_INSTANTFIT | VM_SLEEP, &t); 342 343 if (error != 0) { 344 if (path) 345 rumpuser_close(memfd); 346 return error; 347 } 348 349 unit = t - 1; 350 351 if ((error = allocif(unit, &sc)) != 0) { 352 if (path) 353 rumpuser_close(memfd); 354 return error; 355 } 356 357 if (!path) 358 goto out; 359 360 error = initbackend(sc, memfd); 361 if (error) { 362 shmif_unclone(&sc->sc_ec.ec_if); 363 return error; 364 } 365 366 sc->sc_backfilelen = strlen(path)+1; 367 sc->sc_backfile = kmem_alloc(sc->sc_backfilelen, KM_SLEEP); 368 strcpy(sc->sc_backfile, path); 369 370 out: 371 if (ifnum) 372 *ifnum = unit; 373 374 return 0; 375 } 376 377 static int 378 shmif_clone(struct if_clone *ifc, int unit) 379 { 380 int rc __diagused; 381 vmem_addr_t unit2; 382 383 /* 384 * Ok, we know the unit number, but we must still reserve it. 385 * Otherwise the wildcard-side of things might get the same one. 386 * This is slightly offset-happy due to vmem. First, we offset 387 * the range of unit numbers by +1 since vmem cannot deal with 388 * ranges starting from 0. Talk about uuuh. 389 */ 390 rc = vmem_xalloc(shmif_units, 1, 0, 0, 0, unit+1, unit+1, 391 VM_SLEEP | VM_INSTANTFIT, &unit2); 392 KASSERT(rc == 0 && unit2-1 == unit); 393 394 return allocif(unit, NULL); 395 } 396 397 static int 398 shmif_unclone(struct ifnet *ifp) 399 { 400 struct shmif_sc *sc = ifp->if_softc; 401 402 shmif_stop(ifp, 1); 403 if_down(ifp); 404 405 mutex_enter(&sc->sc_mtx); 406 sc->sc_dying = true; 407 cv_broadcast(&sc->sc_cv); 408 mutex_exit(&sc->sc_mtx); 409 410 if (sc->sc_rcvl) 411 kthread_join(sc->sc_rcvl); 412 sc->sc_rcvl = NULL; 413 414 /* 415 * Need to be called after the kthread left, otherwise closing kqueue 416 * (sc_kq) hangs sometimes perhaps because of a race condition between 417 * close and kevent in the kthread on the kqueue. 418 */ 419 finibackend(sc); 420 421 vmem_xfree(shmif_units, sc->sc_unit+1, 1); 422 423 ether_ifdetach(ifp); 424 if_detach(ifp); 425 426 cv_destroy(&sc->sc_cv); 427 mutex_destroy(&sc->sc_mtx); 428 429 kmem_free(sc, sizeof(*sc)); 430 431 return 0; 432 } 433 434 static int 435 shmif_init(struct ifnet *ifp) 436 { 437 struct shmif_sc *sc = ifp->if_softc; 438 int error = 0; 439 440 if (sc->sc_memfd == -1) 441 return ENXIO; 442 KASSERT(sc->sc_busmem); 443 444 ifp->if_flags |= IFF_RUNNING; 445 446 mutex_enter(&sc->sc_mtx); 447 sc->sc_nextpacket = sc->sc_busmem->shm_last; 448 sc->sc_devgen = sc->sc_busmem->shm_gen; 449 450 cv_broadcast(&sc->sc_cv); 451 mutex_exit(&sc->sc_mtx); 452 453 return error; 454 } 455 456 static int 457 shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data) 458 { 459 struct shmif_sc *sc = ifp->if_softc; 460 struct ifdrv *ifd; 461 char *path; 462 int s, rv, memfd; 463 464 s = splnet(); 465 switch (cmd) { 466 case SIOCGLINKSTR: 467 ifd = data; 468 469 if (sc->sc_backfilelen == 0) { 470 rv = ENOENT; 471 break; 472 } 473 474 ifd->ifd_len = sc->sc_backfilelen; 475 if (ifd->ifd_cmd == IFLINKSTR_QUERYLEN) { 476 rv = 0; 477 break; 478 } 479 480 if (ifd->ifd_cmd != 0) { 481 rv = EINVAL; 482 break; 483 } 484 485 rv = copyoutstr(sc->sc_backfile, ifd->ifd_data, 486 MIN(sc->sc_backfilelen, ifd->ifd_len), NULL); 487 break; 488 case SIOCSLINKSTR: 489 if (ifp->if_flags & IFF_UP) { 490 rv = EBUSY; 491 break; 492 } 493 494 ifd = data; 495 if (ifd->ifd_cmd == IFLINKSTR_UNSET) { 496 finibackend(sc); 497 rv = 0; 498 break; 499 } else if (ifd->ifd_cmd != 0) { 500 rv = EINVAL; 501 break; 502 } else if (sc->sc_backfile) { 503 rv = EBUSY; 504 break; 505 } 506 507 if (ifd->ifd_len > MAXPATHLEN) { 508 rv = E2BIG; 509 break; 510 } else if (ifd->ifd_len < 1) { 511 rv = EINVAL; 512 break; 513 } 514 515 path = kmem_alloc(ifd->ifd_len, KM_SLEEP); 516 rv = copyinstr(ifd->ifd_data, path, ifd->ifd_len, NULL); 517 if (rv) { 518 kmem_free(path, ifd->ifd_len); 519 break; 520 } 521 rv = rumpuser_open(path, 522 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd); 523 if (rv) { 524 kmem_free(path, ifd->ifd_len); 525 break; 526 } 527 rv = initbackend(sc, memfd); 528 if (rv) { 529 kmem_free(path, ifd->ifd_len); 530 rumpuser_close(memfd); 531 break; 532 } 533 sc->sc_backfile = path; 534 sc->sc_backfilelen = ifd->ifd_len; 535 536 break; 537 default: 538 rv = ether_ioctl(ifp, cmd, data); 539 if (rv == ENETRESET) 540 rv = 0; 541 break; 542 } 543 splx(s); 544 545 return rv; 546 } 547 548 static void 549 shmif_start(struct ifnet *ifp) 550 { 551 struct shmif_sc *sc = ifp->if_softc; 552 struct mbuf *m, *n; 553 bool wrote = false; 554 555 ifp->if_flags |= IFF_OACTIVE; 556 557 for (;;) { 558 IF_DEQUEUE(&ifp->if_snd, m); 559 if (m == NULL) 560 break; 561 562 m = ether_sw_offload_tx(ifp, m); 563 if (m == NULL) { 564 if_statinc(ifp, if_oerrors); 565 break; 566 } 567 568 do { 569 n = m->m_nextpkt; 570 shmif_snd(ifp, m); 571 m = n; 572 } while (m != NULL); 573 574 wrote = true; 575 } 576 577 ifp->if_flags &= ~IFF_OACTIVE; 578 579 /* wakeup? */ 580 if (wrote) { 581 dowakeup(sc); 582 } 583 } 584 585 /* send everything in-context since it's just a matter of mem-to-mem copy */ 586 static void 587 shmif_snd(struct ifnet *ifp, struct mbuf *m0) 588 { 589 struct shmif_sc *sc = ifp->if_softc; 590 struct shmif_mem *busmem = sc->sc_busmem; 591 struct shmif_pkthdr sp; 592 struct timeval tv; 593 struct mbuf *m; 594 uint32_t dataoff; 595 uint32_t pktsize, pktwrote; 596 bool wrap; 597 598 pktsize = 0; 599 for (m = m0; m != NULL; m = m->m_next) { 600 pktsize += m->m_len; 601 } 602 KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN); 603 604 getmicrouptime(&tv); 605 sp.sp_len = pktsize; 606 sp.sp_sec = tv.tv_sec; 607 sp.sp_usec = tv.tv_usec; 608 sp.sp_sender = sc->sc_uid; 609 610 bpf_mtap(ifp, m0, BPF_D_OUT); 611 612 shmif_lockbus(busmem); 613 KASSERT(busmem->shm_magic == SHMIF_MAGIC); 614 busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last); 615 616 wrap = false; 617 dataoff = 618 shmif_buswrite(busmem, busmem->shm_last, &sp, sizeof(sp), &wrap); 619 pktwrote = 0; 620 for (m = m0; m != NULL; m = m->m_next) { 621 pktwrote += m->m_len; 622 dataoff = shmif_buswrite(busmem, dataoff, mtod(m, void *), 623 m->m_len, &wrap); 624 } 625 KASSERT(pktwrote == pktsize); 626 if (wrap) { 627 busmem->shm_gen++; 628 DPRINTF(("bus generation now %" PRIu64 "\n", busmem->shm_gen)); 629 } 630 shmif_unlockbus(busmem); 631 632 m_freem(m0); 633 if_statinc(ifp, if_opackets); 634 635 DPRINTF(("shmif_start: send %d bytes at off %d\n", pktsize, 636 busmem->shm_last)); 637 } 638 639 static void 640 shmif_stop(struct ifnet *ifp, int disable) 641 { 642 struct shmif_sc *sc = ifp->if_softc; 643 644 ifp->if_flags &= ~IFF_RUNNING; 645 membar_producer(); 646 647 /* 648 * wakeup thread. this will of course wake up all bus 649 * listeners, but that's life. 650 */ 651 if (sc->sc_memfd != -1) { 652 dowakeup(sc); 653 } 654 } 655 656 657 /* 658 * Check if we have been sleeping too long. Basically, 659 * our in-sc nextpkt must by first <= nextpkt <= last"+1". 660 * We use the fact that first is guaranteed to never overlap 661 * with the last frame in the ring. 662 */ 663 static __inline bool 664 stillvalid_p(struct shmif_sc *sc) 665 { 666 struct shmif_mem *busmem = sc->sc_busmem; 667 unsigned gendiff = busmem->shm_gen - sc->sc_devgen; 668 uint32_t lastoff, devoff; 669 670 KASSERT(busmem->shm_first != busmem->shm_last); 671 672 /* normalize onto a 2x busmem chunk */ 673 devoff = sc->sc_nextpacket; 674 lastoff = shmif_nextpktoff(busmem, busmem->shm_last); 675 676 /* trivial case */ 677 if (gendiff > 1) 678 return false; 679 KASSERT(gendiff <= 1); 680 681 /* Normalize onto 2x busmem chunk */ 682 if (busmem->shm_first >= lastoff) { 683 lastoff += BUSMEM_DATASIZE; 684 if (gendiff == 0) 685 devoff += BUSMEM_DATASIZE; 686 } else { 687 if (gendiff) 688 return false; 689 } 690 691 return devoff >= busmem->shm_first && devoff <= lastoff; 692 } 693 694 static void 695 shmif_rcv(void *arg) 696 { 697 struct ifnet *ifp = arg; 698 struct shmif_sc *sc = ifp->if_softc; 699 struct shmif_mem *busmem; 700 struct mbuf *m = NULL; 701 struct ether_header *eth; 702 uint32_t nextpkt; 703 bool wrap, passup; 704 int error; 705 const int align 706 = ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header); 707 708 reup: 709 mutex_enter(&sc->sc_mtx); 710 while ((ifp->if_flags & IFF_RUNNING) == 0 && !sc->sc_dying) 711 cv_wait(&sc->sc_cv, &sc->sc_mtx); 712 mutex_exit(&sc->sc_mtx); 713 714 busmem = sc->sc_busmem; 715 716 while (ifp->if_flags & IFF_RUNNING) { 717 struct shmif_pkthdr sp; 718 719 if (m == NULL) { 720 m = m_gethdr(M_WAIT, MT_DATA); 721 MCLGET(m, M_WAIT); 722 m->m_data += align; 723 } 724 725 DPRINTF(("waiting %d/%" PRIu64 "\n", 726 sc->sc_nextpacket, sc->sc_devgen)); 727 KASSERT(m->m_flags & M_EXT); 728 729 shmif_lockbus(busmem); 730 KASSERT(busmem->shm_magic == SHMIF_MAGIC); 731 KASSERT(busmem->shm_gen >= sc->sc_devgen); 732 733 /* need more data? */ 734 if (sc->sc_devgen == busmem->shm_gen && 735 shmif_nextpktoff(busmem, busmem->shm_last) 736 == sc->sc_nextpacket) { 737 shmif_unlockbus(busmem); 738 error = rumpcomp_shmif_watchwait(sc->sc_kq); 739 if (__predict_false(error)) 740 printf("shmif_rcv: wait failed %d\n", error); 741 membar_consumer(); 742 continue; 743 } 744 745 if (stillvalid_p(sc)) { 746 nextpkt = sc->sc_nextpacket; 747 } else { 748 KASSERT(busmem->shm_gen > 0); 749 nextpkt = busmem->shm_first; 750 if (busmem->shm_first > busmem->shm_last) 751 sc->sc_devgen = busmem->shm_gen - 1; 752 else 753 sc->sc_devgen = busmem->shm_gen; 754 DPRINTF(("dev %p overrun, new data: %d/%" PRIu64 "\n", 755 sc, nextpkt, sc->sc_devgen)); 756 } 757 758 /* 759 * If our read pointer is ahead the bus last write, our 760 * generation must be one behind. 761 */ 762 KASSERT(!(nextpkt > busmem->shm_last 763 && sc->sc_devgen == busmem->shm_gen)); 764 765 wrap = false; 766 nextpkt = shmif_busread(busmem, &sp, 767 nextpkt, sizeof(sp), &wrap); 768 KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN); 769 nextpkt = shmif_busread(busmem, mtod(m, void *), 770 nextpkt, sp.sp_len, &wrap); 771 772 DPRINTF(("shmif_rcv: read packet of length %d at %d\n", 773 sp.sp_len, nextpkt)); 774 775 sc->sc_nextpacket = nextpkt; 776 shmif_unlockbus(sc->sc_busmem); 777 778 if (wrap) { 779 sc->sc_devgen++; 780 DPRINTF(("dev %p generation now %" PRIu64 "\n", 781 sc, sc->sc_devgen)); 782 } 783 784 /* 785 * Ignore packets too short to possibly be valid. 786 * This is hit at least for the first frame on a new bus. 787 */ 788 if (__predict_false(sp.sp_len < ETHER_HDR_LEN)) { 789 DPRINTF(("shmif read packet len %d < ETHER_HDR_LEN\n", 790 sp.sp_len)); 791 continue; 792 } 793 794 m->m_len = m->m_pkthdr.len = sp.sp_len; 795 m_set_rcvif(m, ifp); 796 797 /* 798 * Test if we want to pass the packet upwards 799 */ 800 eth = mtod(m, struct ether_header *); 801 if (sp.sp_sender == sc->sc_uid) { 802 passup = false; 803 } else if (memcmp(eth->ether_dhost, CLLADDR(ifp->if_sadl), 804 ETHER_ADDR_LEN) == 0) { 805 passup = true; 806 } else if (ETHER_IS_MULTICAST(eth->ether_dhost)) { 807 passup = true; 808 } else if (ifp->if_flags & IFF_PROMISC) { 809 m->m_flags |= M_PROMISC; 810 passup = true; 811 } else { 812 passup = false; 813 } 814 815 if (passup) { 816 int bound; 817 818 m = ether_sw_offload_rx(ifp, m); 819 820 KERNEL_LOCK(1, NULL); 821 /* Prevent LWP migrations between CPUs for psref(9) */ 822 bound = curlwp_bind(); 823 if_input(ifp, m); 824 curlwp_bindx(bound); 825 KERNEL_UNLOCK_ONE(NULL); 826 827 m = NULL; 828 } 829 /* else: reuse mbuf for a future packet */ 830 } 831 m_freem(m); 832 m = NULL; 833 834 if (!sc->sc_dying) 835 goto reup; 836 837 kthread_exit(0); 838 } 839