1 /* $NetBSD: if_shmem.c,v 1.89 2024/10/01 08:55:58 rin Exp $ */ 2 3 /* 4 * Copyright (c) 2009, 2010 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by The Nokia Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.89 2024/10/01 08:55:58 rin Exp $"); 32 33 #include <sys/param.h> 34 #include <sys/atomic.h> 35 #include <sys/fcntl.h> 36 #include <sys/kmem.h> 37 #include <sys/kthread.h> 38 #include <sys/lock.h> 39 #include <sys/vmem.h> 40 #include <sys/cprng.h> 41 42 #include <net/bpf.h> 43 #include <net/if.h> 44 #include <net/if_dl.h> 45 #include <net/if_ether.h> 46 #include <net/if_media.h> 47 #include <net/ether_sw_offload.h> 48 49 #include <netinet/in.h> 50 #include <netinet/in_var.h> 51 52 #include <rump-sys/kern.h> 53 #include <rump-sys/net.h> 54 55 #include <rump/rump.h> 56 #include <rump/rumpuser.h> 57 58 #include "shmif_user.h" 59 60 static int shmif_clone(struct if_clone *, int); 61 static int shmif_unclone(struct ifnet *); 62 63 static int shmif_mediachange(struct ifnet *); 64 static void shmif_mediastatus(struct ifnet *, struct ifmediareq *); 65 66 struct if_clone shmif_cloner = 67 IF_CLONE_INITIALIZER("shmif", shmif_clone, shmif_unclone); 68 69 /* 70 * Do r/w prefault for backend pages when attaching the interface. 71 * At least logically thinking improves performance (although no 72 * mlocking is done, so they might go away). 73 */ 74 #define PREFAULT_RW 75 76 /* 77 * A virtual ethernet interface which uses shared memory from a 78 * memory mapped file as the bus. 79 */ 80 81 static int shmif_init(struct ifnet *); 82 static int shmif_ioctl(struct ifnet *, u_long, void *); 83 static void shmif_start(struct ifnet *); 84 static void shmif_snd(struct ifnet *, struct mbuf *); 85 static void shmif_stop(struct ifnet *, int); 86 87 #include "shmifvar.h" 88 89 struct shmif_sc { 90 struct ethercom sc_ec; 91 struct ifmedia sc_im; 92 struct shmif_mem *sc_busmem; 93 int sc_memfd; 94 int sc_kq; 95 int sc_unit; 96 97 char *sc_backfile; 98 size_t sc_backfilelen; 99 100 uint64_t sc_devgen; 101 uint32_t sc_nextpacket; 102 103 kmutex_t sc_mtx; 104 kcondvar_t sc_cv; 105 106 struct lwp *sc_rcvl; 107 bool sc_dying; 108 109 uint64_t sc_uid; 110 }; 111 112 static void shmif_rcv(void *); 113 114 vmem_t *shmif_units; 115 116 static void 117 dowakeup(struct shmif_sc *sc) 118 { 119 struct rumpuser_iovec iov; 120 uint32_t ver = SHMIF_VERSION; 121 size_t n; 122 123 iov.iov_base = &ver; 124 iov.iov_len = sizeof(ver); 125 rumpuser_iovwrite(sc->sc_memfd, &iov, 1, IFMEM_WAKEUP, &n); 126 } 127 128 /* 129 * This locking needs work and will misbehave severely if: 130 * 1) the backing memory has to be paged in 131 * 2) some lockholder exits while holding the lock 132 */ 133 static void 134 shmif_lockbus(struct shmif_mem *busmem) 135 { 136 int i = 0; 137 138 while (__predict_false(atomic_cas_32(&busmem->shm_lock, 139 LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) { 140 if (__predict_false(++i > LOCK_COOLDOWN)) { 141 /* wait 1ms */ 142 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 143 0, 1000*1000); 144 i = 0; 145 } 146 continue; 147 } 148 membar_acquire(); 149 } 150 151 static void 152 shmif_unlockbus(struct shmif_mem *busmem) 153 { 154 unsigned int old __diagused; 155 156 membar_release(); 157 old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED); 158 KASSERT(old == LOCK_LOCKED); 159 } 160 161 static int 162 allocif(int unit, struct shmif_sc **scp) 163 { 164 uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 }; 165 struct shmif_sc *sc; 166 struct ifnet *ifp; 167 uint64_t randnum; 168 int error = 0; 169 170 randnum = cprng_strong64(); 171 memcpy(&enaddr[2], &randnum, 4); 172 173 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 174 sc->sc_memfd = -1; 175 sc->sc_unit = unit; 176 sc->sc_uid = randnum; 177 178 ifp = &sc->sc_ec.ec_if; 179 180 ifmedia_init(&sc->sc_im, 0, shmif_mediachange, shmif_mediastatus); 181 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_AUTO, 0, NULL); 182 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_NONE, 0, NULL); 183 ifmedia_set(&sc->sc_im, IFM_ETHER|IFM_AUTO); 184 185 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "shmif%d", unit); 186 ifp->if_softc = sc; 187 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 188 ifp->if_init = shmif_init; 189 ifp->if_ioctl = shmif_ioctl; 190 ifp->if_start = shmif_start; 191 ifp->if_stop = shmif_stop; 192 ifp->if_mtu = ETHERMTU; 193 ifp->if_dlt = DLT_EN10MB; 194 ifp->if_capabilities = IFCAP_TSOv4 | IFCAP_TSOv6 | 195 IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx | 196 IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx | 197 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | 198 IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx | 199 IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx; 200 IFQ_SET_READY(&ifp->if_snd); 201 202 mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE); 203 cv_init(&sc->sc_cv, "shmifcv"); 204 205 if_initialize(ifp); 206 #if 1 207 char buf[256]; 208 209 if (rumpuser_getparam("RUMP_SHMIF_CAPENABLE", buf, sizeof(buf)) == 0) { 210 uint64_t capen = strtoul(buf, NULL, 0); 211 212 ifp->if_capenable = capen & ifp->if_capabilities; 213 } 214 #endif 215 216 if_deferred_start_init(ifp, NULL); 217 ether_ifattach(ifp, enaddr); 218 if_register(ifp); 219 220 aprint_verbose("shmif%d: Ethernet address %s\n", 221 unit, ether_sprintf(enaddr)); 222 223 if (scp) 224 *scp = sc; 225 226 if (rump_threads) { 227 error = kthread_create(PRI_NONE, 228 KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, NULL, 229 shmif_rcv, ifp, &sc->sc_rcvl, "shmif"); 230 } else { 231 printf("WARNING: threads not enabled, shmif NOT working\n"); 232 } 233 234 if (error) { 235 shmif_unclone(ifp); 236 } 237 238 return 0; 239 } 240 241 static int 242 initbackend(struct shmif_sc *sc, int memfd) 243 { 244 volatile uint8_t v; 245 volatile uint8_t *p; 246 void *mem; 247 int error; 248 249 error = rumpcomp_shmif_mmap(memfd, BUSMEM_SIZE, &mem); 250 if (error) 251 return error; 252 sc->sc_busmem = mem; 253 254 if (sc->sc_busmem->shm_magic 255 && sc->sc_busmem->shm_magic != SHMIF_MAGIC) { 256 printf("bus is not magical"); 257 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 258 return ENOEXEC; 259 } 260 261 /* 262 * Prefault in pages to minimize runtime penalty with buslock. 263 * Use 512 instead of PAGE_SIZE to make sure we catch cases where 264 * rump kernel PAGE_SIZE > host page size. 265 */ 266 for (p = (uint8_t *)sc->sc_busmem; 267 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE; 268 p += 512) 269 v = *p; 270 271 shmif_lockbus(sc->sc_busmem); 272 /* we're first? initialize bus */ 273 if (sc->sc_busmem->shm_magic == 0) { 274 sc->sc_busmem->shm_magic = SHMIF_MAGIC; 275 sc->sc_busmem->shm_first = BUSMEM_DATASIZE; 276 } 277 278 sc->sc_nextpacket = sc->sc_busmem->shm_last; 279 sc->sc_devgen = sc->sc_busmem->shm_gen; 280 281 #ifdef PREFAULT_RW 282 for (p = (uint8_t *)sc->sc_busmem; 283 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE; 284 p += PAGE_SIZE) { 285 v = *p; 286 *p = v; 287 } 288 #endif 289 shmif_unlockbus(sc->sc_busmem); 290 291 sc->sc_kq = -1; 292 error = rumpcomp_shmif_watchsetup(&sc->sc_kq, memfd); 293 if (error) { 294 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 295 return error; 296 } 297 298 sc->sc_memfd = memfd; 299 300 return error; 301 } 302 303 static void 304 finibackend(struct shmif_sc *sc) 305 { 306 307 if (sc->sc_backfile == NULL) 308 return; 309 310 if (sc->sc_backfile) { 311 kmem_free(sc->sc_backfile, sc->sc_backfilelen); 312 sc->sc_backfile = NULL; 313 sc->sc_backfilelen = 0; 314 } 315 316 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 317 rumpuser_close(sc->sc_memfd); 318 rumpuser_close(sc->sc_kq); 319 320 sc->sc_memfd = -1; 321 } 322 323 int 324 rump_shmif_create(const char *path, int *ifnum) 325 { 326 struct shmif_sc *sc; 327 vmem_addr_t t; 328 int unit, error; 329 int memfd = -1; /* XXXgcc */ 330 331 if (path) { 332 error = rumpuser_open(path, 333 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd); 334 if (error) 335 return error; 336 } 337 338 error = vmem_xalloc(shmif_units, 1, 0, 0, 0, 339 VMEM_ADDR_MIN, VMEM_ADDR_MAX, VM_INSTANTFIT | VM_SLEEP, &t); 340 341 if (error != 0) { 342 if (path) 343 rumpuser_close(memfd); 344 return error; 345 } 346 347 unit = t - 1; 348 349 if ((error = allocif(unit, &sc)) != 0) { 350 if (path) 351 rumpuser_close(memfd); 352 return error; 353 } 354 355 if (!path) 356 goto out; 357 358 error = initbackend(sc, memfd); 359 if (error) { 360 shmif_unclone(&sc->sc_ec.ec_if); 361 return error; 362 } 363 364 sc->sc_backfilelen = strlen(path)+1; 365 sc->sc_backfile = kmem_alloc(sc->sc_backfilelen, KM_SLEEP); 366 strcpy(sc->sc_backfile, path); 367 368 out: 369 if (ifnum) 370 *ifnum = unit; 371 372 return 0; 373 } 374 375 static int 376 shmif_clone(struct if_clone *ifc, int unit) 377 { 378 int rc __diagused; 379 vmem_addr_t unit2; 380 381 /* 382 * Ok, we know the unit number, but we must still reserve it. 383 * Otherwise the wildcard-side of things might get the same one. 384 * This is slightly offset-happy due to vmem. First, we offset 385 * the range of unit numbers by +1 since vmem cannot deal with 386 * ranges starting from 0. Talk about uuuh. 387 */ 388 rc = vmem_xalloc(shmif_units, 1, 0, 0, 0, unit+1, unit+1, 389 VM_SLEEP | VM_INSTANTFIT, &unit2); 390 KASSERT(rc == 0 && unit2-1 == unit); 391 392 return allocif(unit, NULL); 393 } 394 395 static int 396 shmif_unclone(struct ifnet *ifp) 397 { 398 struct shmif_sc *sc = ifp->if_softc; 399 400 shmif_stop(ifp, 1); 401 if_down(ifp); 402 403 mutex_enter(&sc->sc_mtx); 404 sc->sc_dying = true; 405 cv_broadcast(&sc->sc_cv); 406 mutex_exit(&sc->sc_mtx); 407 408 if (sc->sc_rcvl) 409 kthread_join(sc->sc_rcvl); 410 sc->sc_rcvl = NULL; 411 412 /* 413 * Need to be called after the kthread left, otherwise closing kqueue 414 * (sc_kq) hangs sometimes perhaps because of a race condition between 415 * close and kevent in the kthread on the kqueue. 416 */ 417 finibackend(sc); 418 419 vmem_xfree(shmif_units, sc->sc_unit+1, 1); 420 421 ether_ifdetach(ifp); 422 if_detach(ifp); 423 424 cv_destroy(&sc->sc_cv); 425 mutex_destroy(&sc->sc_mtx); 426 427 kmem_free(sc, sizeof(*sc)); 428 429 return 0; 430 } 431 432 static int 433 shmif_init(struct ifnet *ifp) 434 { 435 struct shmif_sc *sc = ifp->if_softc; 436 int error = 0; 437 438 if (sc->sc_memfd == -1) 439 return ENXIO; 440 KASSERT(sc->sc_busmem); 441 442 ifp->if_flags |= IFF_RUNNING; 443 444 mutex_enter(&sc->sc_mtx); 445 sc->sc_nextpacket = sc->sc_busmem->shm_last; 446 sc->sc_devgen = sc->sc_busmem->shm_gen; 447 448 cv_broadcast(&sc->sc_cv); 449 mutex_exit(&sc->sc_mtx); 450 451 return error; 452 } 453 454 static int 455 shmif_mediachange(struct ifnet *ifp) 456 { 457 struct shmif_sc *sc = ifp->if_softc; 458 int link_state; 459 460 if (IFM_SUBTYPE(sc->sc_im.ifm_cur->ifm_media) == IFM_NONE) 461 link_state = LINK_STATE_DOWN; 462 else 463 link_state = LINK_STATE_UP; 464 465 if_link_state_change(ifp, link_state); 466 return 0; 467 } 468 469 static void 470 shmif_mediastatus(struct ifnet *ifp, struct ifmediareq *imr) 471 { 472 struct shmif_sc *sc = ifp->if_softc; 473 474 imr->ifm_active = sc->sc_im.ifm_cur->ifm_media; 475 476 imr->ifm_status = IFM_AVALID; 477 if (IFM_SUBTYPE(imr->ifm_active) != IFM_NONE) 478 imr->ifm_status |= IFM_ACTIVE; 479 } 480 481 static int 482 shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data) 483 { 484 struct shmif_sc *sc = ifp->if_softc; 485 struct ifdrv *ifd; 486 char *path; 487 int s, rv, memfd; 488 489 s = splnet(); 490 switch (cmd) { 491 case SIOCGLINKSTR: 492 ifd = data; 493 494 if (sc->sc_backfilelen == 0) { 495 rv = ENOENT; 496 break; 497 } 498 499 ifd->ifd_len = sc->sc_backfilelen; 500 if (ifd->ifd_cmd == IFLINKSTR_QUERYLEN) { 501 rv = 0; 502 break; 503 } 504 505 if (ifd->ifd_cmd != 0) { 506 rv = EINVAL; 507 break; 508 } 509 510 rv = copyoutstr(sc->sc_backfile, ifd->ifd_data, 511 MIN(sc->sc_backfilelen, ifd->ifd_len), NULL); 512 break; 513 case SIOCSLINKSTR: 514 if (ifp->if_flags & IFF_UP) { 515 rv = EBUSY; 516 break; 517 } 518 519 ifd = data; 520 if (ifd->ifd_cmd == IFLINKSTR_UNSET) { 521 finibackend(sc); 522 /* Back to the default just in case */ 523 ifp->if_link_state = LINK_STATE_UNKNOWN; 524 rv = 0; 525 break; 526 } else if (ifd->ifd_cmd != 0) { 527 rv = EINVAL; 528 break; 529 } else if (sc->sc_backfile) { 530 rv = EBUSY; 531 break; 532 } 533 534 if (ifd->ifd_len > MAXPATHLEN) { 535 rv = E2BIG; 536 break; 537 } else if (ifd->ifd_len < 1) { 538 rv = EINVAL; 539 break; 540 } 541 542 path = kmem_alloc(ifd->ifd_len, KM_SLEEP); 543 rv = copyinstr(ifd->ifd_data, path, ifd->ifd_len, NULL); 544 if (rv) { 545 kmem_free(path, ifd->ifd_len); 546 break; 547 } 548 rv = rumpuser_open(path, 549 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd); 550 if (rv) { 551 kmem_free(path, ifd->ifd_len); 552 break; 553 } 554 rv = initbackend(sc, memfd); 555 if (rv) { 556 kmem_free(path, ifd->ifd_len); 557 rumpuser_close(memfd); 558 break; 559 } 560 sc->sc_backfile = path; 561 sc->sc_backfilelen = ifd->ifd_len; 562 563 if_link_state_change(ifp, LINK_STATE_UP); 564 break; 565 566 #ifdef OSIOCSIFMEDIA 567 case OSIOCSIFMEDIA: 568 #endif 569 case SIOCSIFMEDIA: 570 case SIOCGIFMEDIA: 571 rv = ifmedia_ioctl(ifp, data, &sc->sc_im, cmd); 572 break; 573 574 default: 575 rv = ether_ioctl(ifp, cmd, data); 576 if (rv == ENETRESET) 577 rv = 0; 578 break; 579 } 580 splx(s); 581 582 return rv; 583 } 584 585 static void 586 shmif_start(struct ifnet *ifp) 587 { 588 struct shmif_sc *sc = ifp->if_softc; 589 struct mbuf *m, *n; 590 bool wrote = false; 591 592 ifp->if_flags |= IFF_OACTIVE; 593 594 for (;;) { 595 IFQ_DEQUEUE(&ifp->if_snd, m); 596 if (m == NULL) 597 break; 598 599 m = ether_sw_offload_tx(ifp, m); 600 if (m == NULL) { 601 if_statinc(ifp, if_oerrors); 602 break; 603 } 604 605 do { 606 n = m->m_nextpkt; 607 shmif_snd(ifp, m); 608 m = n; 609 } while (m != NULL); 610 611 wrote = true; 612 } 613 614 ifp->if_flags &= ~IFF_OACTIVE; 615 616 /* wakeup? */ 617 if (wrote) { 618 dowakeup(sc); 619 } 620 } 621 622 /* send everything in-context since it's just a matter of mem-to-mem copy */ 623 static void 624 shmif_snd(struct ifnet *ifp, struct mbuf *m0) 625 { 626 struct shmif_sc *sc = ifp->if_softc; 627 struct shmif_mem *busmem = sc->sc_busmem; 628 struct shmif_pkthdr sp; 629 struct timeval tv; 630 struct mbuf *m; 631 uint32_t dataoff; 632 uint32_t pktsize, pktwrote; 633 bool wrap; 634 635 pktsize = 0; 636 for (m = m0; m != NULL; m = m->m_next) { 637 pktsize += m->m_len; 638 } 639 KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN); 640 641 getmicrouptime(&tv); 642 sp.sp_len = pktsize; 643 sp.sp_sec = tv.tv_sec; 644 sp.sp_usec = tv.tv_usec; 645 sp.sp_sender = sc->sc_uid; 646 647 bpf_mtap(ifp, m0, BPF_D_OUT); 648 649 /* 650 * Compare with DOWN to allow UNKNOWN (the default value), 651 * which is required by some ATF tests using rump servers 652 * written in C. 653 */ 654 if (ifp->if_link_state == LINK_STATE_DOWN) 655 goto dontsend; 656 657 shmif_lockbus(busmem); 658 KASSERT(busmem->shm_magic == SHMIF_MAGIC); 659 busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last); 660 661 wrap = false; 662 dataoff = 663 shmif_buswrite(busmem, busmem->shm_last, &sp, sizeof(sp), &wrap); 664 pktwrote = 0; 665 for (m = m0; m != NULL; m = m->m_next) { 666 pktwrote += m->m_len; 667 dataoff = shmif_buswrite(busmem, dataoff, mtod(m, void *), 668 m->m_len, &wrap); 669 } 670 KASSERT(pktwrote == pktsize); 671 if (wrap) { 672 busmem->shm_gen++; 673 DPRINTF(("bus generation now %" PRIu64 "\n", busmem->shm_gen)); 674 } 675 shmif_unlockbus(busmem); 676 677 dontsend: 678 m_freem(m0); 679 if_statinc(ifp, if_opackets); 680 681 DPRINTF(("shmif_start: send %d bytes at off %d\n", pktsize, 682 busmem->shm_last)); 683 } 684 685 static void 686 shmif_stop(struct ifnet *ifp, int disable) 687 { 688 struct shmif_sc *sc = ifp->if_softc; 689 690 ifp->if_flags &= ~IFF_RUNNING; 691 membar_producer(); 692 693 /* 694 * wakeup thread. this will of course wake up all bus 695 * listeners, but that's life. 696 */ 697 if (sc->sc_memfd != -1) { 698 dowakeup(sc); 699 } 700 } 701 702 703 /* 704 * Check if we have been sleeping too long. Basically, 705 * our in-sc nextpkt must by first <= nextpkt <= last"+1". 706 * We use the fact that first is guaranteed to never overlap 707 * with the last frame in the ring. 708 */ 709 static __inline bool 710 stillvalid_p(struct shmif_sc *sc) 711 { 712 struct shmif_mem *busmem = sc->sc_busmem; 713 unsigned gendiff = busmem->shm_gen - sc->sc_devgen; 714 uint32_t lastoff, devoff; 715 716 KASSERT(busmem->shm_first != busmem->shm_last); 717 718 /* normalize onto a 2x busmem chunk */ 719 devoff = sc->sc_nextpacket; 720 lastoff = shmif_nextpktoff(busmem, busmem->shm_last); 721 722 /* trivial case */ 723 if (gendiff > 1) 724 return false; 725 KASSERT(gendiff <= 1); 726 727 /* Normalize onto 2x busmem chunk */ 728 if (busmem->shm_first >= lastoff) { 729 lastoff += BUSMEM_DATASIZE; 730 if (gendiff == 0) 731 devoff += BUSMEM_DATASIZE; 732 } else { 733 if (gendiff) 734 return false; 735 } 736 737 return devoff >= busmem->shm_first && devoff <= lastoff; 738 } 739 740 static void 741 shmif_rcv(void *arg) 742 { 743 struct ifnet *ifp = arg; 744 struct shmif_sc *sc = ifp->if_softc; 745 struct shmif_mem *busmem; 746 struct mbuf *m = NULL; 747 struct ether_header *eth; 748 uint32_t nextpkt; 749 bool wrap, passup; 750 int error; 751 const int align 752 = ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header); 753 754 reup: 755 mutex_enter(&sc->sc_mtx); 756 while ((ifp->if_flags & IFF_RUNNING) == 0 && !sc->sc_dying) 757 cv_wait(&sc->sc_cv, &sc->sc_mtx); 758 mutex_exit(&sc->sc_mtx); 759 760 busmem = sc->sc_busmem; 761 762 while (ifp->if_flags & IFF_RUNNING) { 763 struct shmif_pkthdr sp; 764 765 if (m == NULL) { 766 m = m_gethdr(M_WAIT, MT_DATA); 767 MCLGET(m, M_WAIT); 768 m->m_data += align; 769 } 770 771 DPRINTF(("waiting %d/%" PRIu64 "\n", 772 sc->sc_nextpacket, sc->sc_devgen)); 773 KASSERT(m->m_flags & M_EXT); 774 775 shmif_lockbus(busmem); 776 KASSERT(busmem->shm_magic == SHMIF_MAGIC); 777 KASSERT(busmem->shm_gen >= sc->sc_devgen); 778 779 /* need more data? */ 780 if (sc->sc_devgen == busmem->shm_gen && 781 shmif_nextpktoff(busmem, busmem->shm_last) 782 == sc->sc_nextpacket) { 783 shmif_unlockbus(busmem); 784 error = rumpcomp_shmif_watchwait(sc->sc_kq); 785 if (__predict_false(error)) 786 printf("shmif_rcv: wait failed %d\n", error); 787 membar_consumer(); 788 continue; 789 } 790 791 if (stillvalid_p(sc)) { 792 nextpkt = sc->sc_nextpacket; 793 } else { 794 KASSERT(busmem->shm_gen > 0); 795 nextpkt = busmem->shm_first; 796 if (busmem->shm_first > busmem->shm_last) 797 sc->sc_devgen = busmem->shm_gen - 1; 798 else 799 sc->sc_devgen = busmem->shm_gen; 800 DPRINTF(("dev %p overrun, new data: %d/%" PRIu64 "\n", 801 sc, nextpkt, sc->sc_devgen)); 802 } 803 804 /* 805 * If our read pointer is ahead the bus last write, our 806 * generation must be one behind. 807 */ 808 KASSERT(!(nextpkt > busmem->shm_last 809 && sc->sc_devgen == busmem->shm_gen)); 810 811 wrap = false; 812 nextpkt = shmif_busread(busmem, &sp, 813 nextpkt, sizeof(sp), &wrap); 814 KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN); 815 nextpkt = shmif_busread(busmem, mtod(m, void *), 816 nextpkt, sp.sp_len, &wrap); 817 818 DPRINTF(("shmif_rcv: read packet of length %d at %d\n", 819 sp.sp_len, nextpkt)); 820 821 sc->sc_nextpacket = nextpkt; 822 shmif_unlockbus(sc->sc_busmem); 823 824 if (wrap) { 825 sc->sc_devgen++; 826 DPRINTF(("dev %p generation now %" PRIu64 "\n", 827 sc, sc->sc_devgen)); 828 } 829 830 /* 831 * Ignore packets too short to possibly be valid. 832 * This is hit at least for the first frame on a new bus. 833 */ 834 if (__predict_false(sp.sp_len < ETHER_HDR_LEN)) { 835 DPRINTF(("shmif read packet len %d < ETHER_HDR_LEN\n", 836 sp.sp_len)); 837 continue; 838 } 839 840 m->m_len = m->m_pkthdr.len = sp.sp_len; 841 m_set_rcvif(m, ifp); 842 843 /* 844 * Test if we want to pass the packet upwards 845 */ 846 eth = mtod(m, struct ether_header *); 847 /* 848 * Compare with DOWN to allow UNKNOWN (the default value), 849 * which is required by some ATF tests using rump servers 850 * written in C. 851 */ 852 if (ifp->if_link_state == LINK_STATE_DOWN) { 853 passup = false; 854 } else if (sp.sp_sender == sc->sc_uid) { 855 passup = false; 856 } else if (memcmp(eth->ether_dhost, CLLADDR(ifp->if_sadl), 857 ETHER_ADDR_LEN) == 0) { 858 passup = true; 859 } else if (ETHER_IS_MULTICAST(eth->ether_dhost)) { 860 passup = true; 861 } else if (ifp->if_flags & IFF_PROMISC) { 862 m->m_flags |= M_PROMISC; 863 passup = true; 864 } else { 865 passup = false; 866 } 867 868 if (passup) { 869 int bound; 870 871 m = ether_sw_offload_rx(ifp, m); 872 873 KERNEL_LOCK(1, NULL); 874 /* Prevent LWP migrations between CPUs for psref(9) */ 875 bound = curlwp_bind(); 876 if_input(ifp, m); 877 curlwp_bindx(bound); 878 KERNEL_UNLOCK_ONE(NULL); 879 880 m = NULL; 881 } 882 /* else: reuse mbuf for a future packet */ 883 } 884 m_freem(m); 885 m = NULL; 886 887 if (!sc->sc_dying) 888 goto reup; 889 890 kthread_exit(0); 891 } 892