1 /* $NetBSD: if_shmem.c,v 1.88 2024/09/02 05:12:53 ozaki-r Exp $ */ 2 3 /* 4 * Copyright (c) 2009, 2010 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by The Nokia Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: if_shmem.c,v 1.88 2024/09/02 05:12:53 ozaki-r Exp $"); 32 33 #include <sys/param.h> 34 #include <sys/atomic.h> 35 #include <sys/fcntl.h> 36 #include <sys/kmem.h> 37 #include <sys/kthread.h> 38 #include <sys/lock.h> 39 #include <sys/vmem.h> 40 #include <sys/cprng.h> 41 42 #include <net/bpf.h> 43 #include <net/if.h> 44 #include <net/if_dl.h> 45 #include <net/if_ether.h> 46 #include <net/if_media.h> 47 #include <net/ether_sw_offload.h> 48 49 #include <netinet/in.h> 50 #include <netinet/in_var.h> 51 52 #include <rump-sys/kern.h> 53 #include <rump-sys/net.h> 54 55 #include <rump/rump.h> 56 #include <rump/rumpuser.h> 57 58 #include "shmif_user.h" 59 60 static int shmif_clone(struct if_clone *, int); 61 static int shmif_unclone(struct ifnet *); 62 63 static int shmif_mediachange(struct ifnet *); 64 static void shmif_mediastatus(struct ifnet *, struct ifmediareq *); 65 66 struct if_clone shmif_cloner = 67 IF_CLONE_INITIALIZER("shmif", shmif_clone, shmif_unclone); 68 69 /* 70 * Do r/w prefault for backend pages when attaching the interface. 71 * At least logically thinking improves performance (although no 72 * mlocking is done, so they might go away). 73 */ 74 #define PREFAULT_RW 75 76 /* 77 * A virtual ethernet interface which uses shared memory from a 78 * memory mapped file as the bus. 79 */ 80 81 static int shmif_init(struct ifnet *); 82 static int shmif_ioctl(struct ifnet *, u_long, void *); 83 static void shmif_start(struct ifnet *); 84 static void shmif_snd(struct ifnet *, struct mbuf *); 85 static void shmif_stop(struct ifnet *, int); 86 87 #include "shmifvar.h" 88 89 struct shmif_sc { 90 struct ethercom sc_ec; 91 struct ifmedia sc_im; 92 struct shmif_mem *sc_busmem; 93 int sc_memfd; 94 int sc_kq; 95 int sc_unit; 96 97 char *sc_backfile; 98 size_t sc_backfilelen; 99 100 uint64_t sc_devgen; 101 uint32_t sc_nextpacket; 102 103 kmutex_t sc_mtx; 104 kcondvar_t sc_cv; 105 106 struct lwp *sc_rcvl; 107 bool sc_dying; 108 109 uint64_t sc_uid; 110 }; 111 112 static void shmif_rcv(void *); 113 114 vmem_t *shmif_units; 115 116 static void 117 dowakeup(struct shmif_sc *sc) 118 { 119 struct rumpuser_iovec iov; 120 uint32_t ver = SHMIF_VERSION; 121 size_t n; 122 123 iov.iov_base = &ver; 124 iov.iov_len = sizeof(ver); 125 rumpuser_iovwrite(sc->sc_memfd, &iov, 1, IFMEM_WAKEUP, &n); 126 } 127 128 /* 129 * This locking needs work and will misbehave severely if: 130 * 1) the backing memory has to be paged in 131 * 2) some lockholder exits while holding the lock 132 */ 133 static void 134 shmif_lockbus(struct shmif_mem *busmem) 135 { 136 int i = 0; 137 138 while (__predict_false(atomic_cas_32(&busmem->shm_lock, 139 LOCK_UNLOCKED, LOCK_LOCKED) == LOCK_LOCKED)) { 140 if (__predict_false(++i > LOCK_COOLDOWN)) { 141 /* wait 1ms */ 142 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 143 0, 1000*1000); 144 i = 0; 145 } 146 continue; 147 } 148 membar_acquire(); 149 } 150 151 static void 152 shmif_unlockbus(struct shmif_mem *busmem) 153 { 154 unsigned int old __diagused; 155 156 membar_release(); 157 old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED); 158 KASSERT(old == LOCK_LOCKED); 159 } 160 161 static int 162 allocif(int unit, struct shmif_sc **scp) 163 { 164 uint8_t enaddr[ETHER_ADDR_LEN] = { 0xb2, 0xa0, 0x00, 0x00, 0x00, 0x00 }; 165 struct shmif_sc *sc; 166 struct ifnet *ifp; 167 uint64_t randnum; 168 int error = 0; 169 170 randnum = cprng_strong64(); 171 memcpy(&enaddr[2], &randnum, 4); 172 173 sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 174 sc->sc_memfd = -1; 175 sc->sc_unit = unit; 176 sc->sc_uid = randnum; 177 178 ifp = &sc->sc_ec.ec_if; 179 180 ifmedia_init(&sc->sc_im, 0, shmif_mediachange, shmif_mediastatus); 181 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_AUTO, 0, NULL); 182 ifmedia_add(&sc->sc_im, IFM_ETHER|IFM_NONE, 0, NULL); 183 ifmedia_set(&sc->sc_im, IFM_ETHER|IFM_AUTO); 184 185 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "shmif%d", unit); 186 ifp->if_softc = sc; 187 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 188 ifp->if_init = shmif_init; 189 ifp->if_ioctl = shmif_ioctl; 190 ifp->if_start = shmif_start; 191 ifp->if_stop = shmif_stop; 192 ifp->if_mtu = ETHERMTU; 193 ifp->if_dlt = DLT_EN10MB; 194 ifp->if_capabilities = IFCAP_TSOv4 | IFCAP_TSOv6 | 195 IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_IPv4_Tx | 196 IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx | 197 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx | 198 IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx | 199 IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx; 200 IFQ_SET_READY(&ifp->if_snd); 201 202 mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE); 203 cv_init(&sc->sc_cv, "shmifcv"); 204 205 if_initialize(ifp); 206 #if 1 207 char buf[256]; 208 209 if (rumpuser_getparam("RUMP_SHMIF_CAPENABLE", buf, sizeof(buf)) == 0) { 210 uint64_t capen = strtoul(buf, NULL, 0); 211 212 ifp->if_capenable = capen & ifp->if_capabilities; 213 } 214 #endif 215 216 if_deferred_start_init(ifp, NULL); 217 ether_ifattach(ifp, enaddr); 218 if_register(ifp); 219 220 aprint_verbose("shmif%d: Ethernet address %s\n", 221 unit, ether_sprintf(enaddr)); 222 223 if (scp) 224 *scp = sc; 225 226 if (rump_threads) { 227 error = kthread_create(PRI_NONE, 228 KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, NULL, 229 shmif_rcv, ifp, &sc->sc_rcvl, "shmif"); 230 } else { 231 printf("WARNING: threads not enabled, shmif NOT working\n"); 232 } 233 234 if (error) { 235 shmif_unclone(ifp); 236 } 237 238 return 0; 239 } 240 241 static int 242 initbackend(struct shmif_sc *sc, int memfd) 243 { 244 volatile uint8_t v; 245 volatile uint8_t *p; 246 void *mem; 247 int error; 248 249 error = rumpcomp_shmif_mmap(memfd, BUSMEM_SIZE, &mem); 250 if (error) 251 return error; 252 sc->sc_busmem = mem; 253 254 if (sc->sc_busmem->shm_magic 255 && sc->sc_busmem->shm_magic != SHMIF_MAGIC) { 256 printf("bus is not magical"); 257 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 258 return ENOEXEC; 259 } 260 261 /* 262 * Prefault in pages to minimize runtime penalty with buslock. 263 * Use 512 instead of PAGE_SIZE to make sure we catch cases where 264 * rump kernel PAGE_SIZE > host page size. 265 */ 266 for (p = (uint8_t *)sc->sc_busmem; 267 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE; 268 p += 512) 269 v = *p; 270 271 shmif_lockbus(sc->sc_busmem); 272 /* we're first? initialize bus */ 273 if (sc->sc_busmem->shm_magic == 0) { 274 sc->sc_busmem->shm_magic = SHMIF_MAGIC; 275 sc->sc_busmem->shm_first = BUSMEM_DATASIZE; 276 } 277 278 sc->sc_nextpacket = sc->sc_busmem->shm_last; 279 sc->sc_devgen = sc->sc_busmem->shm_gen; 280 281 #ifdef PREFAULT_RW 282 for (p = (uint8_t *)sc->sc_busmem; 283 p < (uint8_t *)sc->sc_busmem + BUSMEM_SIZE; 284 p += PAGE_SIZE) { 285 v = *p; 286 *p = v; 287 } 288 #endif 289 shmif_unlockbus(sc->sc_busmem); 290 291 sc->sc_kq = -1; 292 error = rumpcomp_shmif_watchsetup(&sc->sc_kq, memfd); 293 if (error) { 294 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 295 return error; 296 } 297 298 sc->sc_memfd = memfd; 299 300 return error; 301 } 302 303 static void 304 finibackend(struct shmif_sc *sc) 305 { 306 307 if (sc->sc_backfile == NULL) 308 return; 309 310 if (sc->sc_backfile) { 311 kmem_free(sc->sc_backfile, sc->sc_backfilelen); 312 sc->sc_backfile = NULL; 313 sc->sc_backfilelen = 0; 314 } 315 316 rumpuser_unmap(sc->sc_busmem, BUSMEM_SIZE); 317 rumpuser_close(sc->sc_memfd); 318 rumpuser_close(sc->sc_kq); 319 320 sc->sc_memfd = -1; 321 } 322 323 int 324 rump_shmif_create(const char *path, int *ifnum) 325 { 326 struct shmif_sc *sc; 327 vmem_addr_t t; 328 int unit, error; 329 int memfd = -1; /* XXXgcc */ 330 331 if (path) { 332 error = rumpuser_open(path, 333 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd); 334 if (error) 335 return error; 336 } 337 338 error = vmem_xalloc(shmif_units, 1, 0, 0, 0, 339 VMEM_ADDR_MIN, VMEM_ADDR_MAX, VM_INSTANTFIT | VM_SLEEP, &t); 340 341 if (error != 0) { 342 if (path) 343 rumpuser_close(memfd); 344 return error; 345 } 346 347 unit = t - 1; 348 349 if ((error = allocif(unit, &sc)) != 0) { 350 if (path) 351 rumpuser_close(memfd); 352 return error; 353 } 354 355 if (!path) 356 goto out; 357 358 error = initbackend(sc, memfd); 359 if (error) { 360 shmif_unclone(&sc->sc_ec.ec_if); 361 return error; 362 } 363 364 sc->sc_backfilelen = strlen(path)+1; 365 sc->sc_backfile = kmem_alloc(sc->sc_backfilelen, KM_SLEEP); 366 strcpy(sc->sc_backfile, path); 367 368 out: 369 if (ifnum) 370 *ifnum = unit; 371 372 return 0; 373 } 374 375 static int 376 shmif_clone(struct if_clone *ifc, int unit) 377 { 378 int rc __diagused; 379 vmem_addr_t unit2; 380 381 /* 382 * Ok, we know the unit number, but we must still reserve it. 383 * Otherwise the wildcard-side of things might get the same one. 384 * This is slightly offset-happy due to vmem. First, we offset 385 * the range of unit numbers by +1 since vmem cannot deal with 386 * ranges starting from 0. Talk about uuuh. 387 */ 388 rc = vmem_xalloc(shmif_units, 1, 0, 0, 0, unit+1, unit+1, 389 VM_SLEEP | VM_INSTANTFIT, &unit2); 390 KASSERT(rc == 0 && unit2-1 == unit); 391 392 return allocif(unit, NULL); 393 } 394 395 static int 396 shmif_unclone(struct ifnet *ifp) 397 { 398 struct shmif_sc *sc = ifp->if_softc; 399 400 shmif_stop(ifp, 1); 401 if_down(ifp); 402 403 mutex_enter(&sc->sc_mtx); 404 sc->sc_dying = true; 405 cv_broadcast(&sc->sc_cv); 406 mutex_exit(&sc->sc_mtx); 407 408 if (sc->sc_rcvl) 409 kthread_join(sc->sc_rcvl); 410 sc->sc_rcvl = NULL; 411 412 /* 413 * Need to be called after the kthread left, otherwise closing kqueue 414 * (sc_kq) hangs sometimes perhaps because of a race condition between 415 * close and kevent in the kthread on the kqueue. 416 */ 417 finibackend(sc); 418 419 vmem_xfree(shmif_units, sc->sc_unit+1, 1); 420 421 ether_ifdetach(ifp); 422 if_detach(ifp); 423 424 cv_destroy(&sc->sc_cv); 425 mutex_destroy(&sc->sc_mtx); 426 427 kmem_free(sc, sizeof(*sc)); 428 429 return 0; 430 } 431 432 static int 433 shmif_init(struct ifnet *ifp) 434 { 435 struct shmif_sc *sc = ifp->if_softc; 436 int error = 0; 437 438 if (sc->sc_memfd == -1) 439 return ENXIO; 440 KASSERT(sc->sc_busmem); 441 442 ifp->if_flags |= IFF_RUNNING; 443 444 mutex_enter(&sc->sc_mtx); 445 sc->sc_nextpacket = sc->sc_busmem->shm_last; 446 sc->sc_devgen = sc->sc_busmem->shm_gen; 447 448 cv_broadcast(&sc->sc_cv); 449 mutex_exit(&sc->sc_mtx); 450 451 return error; 452 } 453 454 static int 455 shmif_mediachange(struct ifnet *ifp) 456 { 457 struct shmif_sc *sc = ifp->if_softc; 458 459 if (IFM_SUBTYPE(sc->sc_im.ifm_cur->ifm_media) == IFM_NONE && 460 ifp->if_link_state != LINK_STATE_DOWN) { 461 if_link_state_change(ifp, LINK_STATE_DOWN); 462 } else if (IFM_SUBTYPE(sc->sc_im.ifm_cur->ifm_media) == IFM_AUTO && 463 ifp->if_link_state != LINK_STATE_UP) { 464 if_link_state_change(ifp, LINK_STATE_UP); 465 } 466 return 0; 467 } 468 469 static void 470 shmif_mediastatus(struct ifnet *ifp, struct ifmediareq *imr) 471 { 472 struct shmif_sc *sc = ifp->if_softc; 473 imr->ifm_active = sc->sc_im.ifm_cur->ifm_media; 474 } 475 476 static int 477 shmif_ioctl(struct ifnet *ifp, u_long cmd, void *data) 478 { 479 struct shmif_sc *sc = ifp->if_softc; 480 struct ifdrv *ifd; 481 char *path; 482 int s, rv, memfd; 483 484 s = splnet(); 485 switch (cmd) { 486 case SIOCGLINKSTR: 487 ifd = data; 488 489 if (sc->sc_backfilelen == 0) { 490 rv = ENOENT; 491 break; 492 } 493 494 ifd->ifd_len = sc->sc_backfilelen; 495 if (ifd->ifd_cmd == IFLINKSTR_QUERYLEN) { 496 rv = 0; 497 break; 498 } 499 500 if (ifd->ifd_cmd != 0) { 501 rv = EINVAL; 502 break; 503 } 504 505 rv = copyoutstr(sc->sc_backfile, ifd->ifd_data, 506 MIN(sc->sc_backfilelen, ifd->ifd_len), NULL); 507 break; 508 case SIOCSLINKSTR: 509 if (ifp->if_flags & IFF_UP) { 510 rv = EBUSY; 511 break; 512 } 513 514 ifd = data; 515 if (ifd->ifd_cmd == IFLINKSTR_UNSET) { 516 finibackend(sc); 517 /* Back to the default just in case */ 518 ifp->if_link_state = LINK_STATE_UNKNOWN; 519 rv = 0; 520 break; 521 } else if (ifd->ifd_cmd != 0) { 522 rv = EINVAL; 523 break; 524 } else if (sc->sc_backfile) { 525 rv = EBUSY; 526 break; 527 } 528 529 if (ifd->ifd_len > MAXPATHLEN) { 530 rv = E2BIG; 531 break; 532 } else if (ifd->ifd_len < 1) { 533 rv = EINVAL; 534 break; 535 } 536 537 path = kmem_alloc(ifd->ifd_len, KM_SLEEP); 538 rv = copyinstr(ifd->ifd_data, path, ifd->ifd_len, NULL); 539 if (rv) { 540 kmem_free(path, ifd->ifd_len); 541 break; 542 } 543 rv = rumpuser_open(path, 544 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_CREATE, &memfd); 545 if (rv) { 546 kmem_free(path, ifd->ifd_len); 547 break; 548 } 549 rv = initbackend(sc, memfd); 550 if (rv) { 551 kmem_free(path, ifd->ifd_len); 552 rumpuser_close(memfd); 553 break; 554 } 555 sc->sc_backfile = path; 556 sc->sc_backfilelen = ifd->ifd_len; 557 558 if_link_state_change(ifp, LINK_STATE_UP); 559 break; 560 561 #ifdef OSIOCSIFMEDIA 562 case OSIOCSIFMEDIA: 563 #endif 564 case SIOCSIFMEDIA: 565 case SIOCGIFMEDIA: 566 rv = ifmedia_ioctl(ifp, data, &sc->sc_im, cmd); 567 break; 568 569 default: 570 rv = ether_ioctl(ifp, cmd, data); 571 if (rv == ENETRESET) 572 rv = 0; 573 break; 574 } 575 splx(s); 576 577 return rv; 578 } 579 580 static void 581 shmif_start(struct ifnet *ifp) 582 { 583 struct shmif_sc *sc = ifp->if_softc; 584 struct mbuf *m, *n; 585 bool wrote = false; 586 587 ifp->if_flags |= IFF_OACTIVE; 588 589 for (;;) { 590 IFQ_DEQUEUE(&ifp->if_snd, m); 591 if (m == NULL) 592 break; 593 594 m = ether_sw_offload_tx(ifp, m); 595 if (m == NULL) { 596 if_statinc(ifp, if_oerrors); 597 break; 598 } 599 600 do { 601 n = m->m_nextpkt; 602 shmif_snd(ifp, m); 603 m = n; 604 } while (m != NULL); 605 606 wrote = true; 607 } 608 609 ifp->if_flags &= ~IFF_OACTIVE; 610 611 /* wakeup? */ 612 if (wrote) { 613 dowakeup(sc); 614 } 615 } 616 617 /* send everything in-context since it's just a matter of mem-to-mem copy */ 618 static void 619 shmif_snd(struct ifnet *ifp, struct mbuf *m0) 620 { 621 struct shmif_sc *sc = ifp->if_softc; 622 struct shmif_mem *busmem = sc->sc_busmem; 623 struct shmif_pkthdr sp; 624 struct timeval tv; 625 struct mbuf *m; 626 uint32_t dataoff; 627 uint32_t pktsize, pktwrote; 628 bool wrap; 629 630 pktsize = 0; 631 for (m = m0; m != NULL; m = m->m_next) { 632 pktsize += m->m_len; 633 } 634 KASSERT(pktsize <= ETHERMTU + ETHER_HDR_LEN); 635 636 getmicrouptime(&tv); 637 sp.sp_len = pktsize; 638 sp.sp_sec = tv.tv_sec; 639 sp.sp_usec = tv.tv_usec; 640 sp.sp_sender = sc->sc_uid; 641 642 bpf_mtap(ifp, m0, BPF_D_OUT); 643 644 /* 645 * Compare with DOWN to allow UNKNOWN (the default value), 646 * which is required by some ATF tests using rump servers 647 * written in C. 648 */ 649 if (ifp->if_link_state == LINK_STATE_DOWN) 650 goto dontsend; 651 652 shmif_lockbus(busmem); 653 KASSERT(busmem->shm_magic == SHMIF_MAGIC); 654 busmem->shm_last = shmif_nextpktoff(busmem, busmem->shm_last); 655 656 wrap = false; 657 dataoff = 658 shmif_buswrite(busmem, busmem->shm_last, &sp, sizeof(sp), &wrap); 659 pktwrote = 0; 660 for (m = m0; m != NULL; m = m->m_next) { 661 pktwrote += m->m_len; 662 dataoff = shmif_buswrite(busmem, dataoff, mtod(m, void *), 663 m->m_len, &wrap); 664 } 665 KASSERT(pktwrote == pktsize); 666 if (wrap) { 667 busmem->shm_gen++; 668 DPRINTF(("bus generation now %" PRIu64 "\n", busmem->shm_gen)); 669 } 670 shmif_unlockbus(busmem); 671 672 dontsend: 673 m_freem(m0); 674 if_statinc(ifp, if_opackets); 675 676 DPRINTF(("shmif_start: send %d bytes at off %d\n", pktsize, 677 busmem->shm_last)); 678 } 679 680 static void 681 shmif_stop(struct ifnet *ifp, int disable) 682 { 683 struct shmif_sc *sc = ifp->if_softc; 684 685 ifp->if_flags &= ~IFF_RUNNING; 686 membar_producer(); 687 688 /* 689 * wakeup thread. this will of course wake up all bus 690 * listeners, but that's life. 691 */ 692 if (sc->sc_memfd != -1) { 693 dowakeup(sc); 694 } 695 } 696 697 698 /* 699 * Check if we have been sleeping too long. Basically, 700 * our in-sc nextpkt must by first <= nextpkt <= last"+1". 701 * We use the fact that first is guaranteed to never overlap 702 * with the last frame in the ring. 703 */ 704 static __inline bool 705 stillvalid_p(struct shmif_sc *sc) 706 { 707 struct shmif_mem *busmem = sc->sc_busmem; 708 unsigned gendiff = busmem->shm_gen - sc->sc_devgen; 709 uint32_t lastoff, devoff; 710 711 KASSERT(busmem->shm_first != busmem->shm_last); 712 713 /* normalize onto a 2x busmem chunk */ 714 devoff = sc->sc_nextpacket; 715 lastoff = shmif_nextpktoff(busmem, busmem->shm_last); 716 717 /* trivial case */ 718 if (gendiff > 1) 719 return false; 720 KASSERT(gendiff <= 1); 721 722 /* Normalize onto 2x busmem chunk */ 723 if (busmem->shm_first >= lastoff) { 724 lastoff += BUSMEM_DATASIZE; 725 if (gendiff == 0) 726 devoff += BUSMEM_DATASIZE; 727 } else { 728 if (gendiff) 729 return false; 730 } 731 732 return devoff >= busmem->shm_first && devoff <= lastoff; 733 } 734 735 static void 736 shmif_rcv(void *arg) 737 { 738 struct ifnet *ifp = arg; 739 struct shmif_sc *sc = ifp->if_softc; 740 struct shmif_mem *busmem; 741 struct mbuf *m = NULL; 742 struct ether_header *eth; 743 uint32_t nextpkt; 744 bool wrap, passup; 745 int error; 746 const int align 747 = ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header); 748 749 reup: 750 mutex_enter(&sc->sc_mtx); 751 while ((ifp->if_flags & IFF_RUNNING) == 0 && !sc->sc_dying) 752 cv_wait(&sc->sc_cv, &sc->sc_mtx); 753 mutex_exit(&sc->sc_mtx); 754 755 busmem = sc->sc_busmem; 756 757 while (ifp->if_flags & IFF_RUNNING) { 758 struct shmif_pkthdr sp; 759 760 if (m == NULL) { 761 m = m_gethdr(M_WAIT, MT_DATA); 762 MCLGET(m, M_WAIT); 763 m->m_data += align; 764 } 765 766 DPRINTF(("waiting %d/%" PRIu64 "\n", 767 sc->sc_nextpacket, sc->sc_devgen)); 768 KASSERT(m->m_flags & M_EXT); 769 770 shmif_lockbus(busmem); 771 KASSERT(busmem->shm_magic == SHMIF_MAGIC); 772 KASSERT(busmem->shm_gen >= sc->sc_devgen); 773 774 /* need more data? */ 775 if (sc->sc_devgen == busmem->shm_gen && 776 shmif_nextpktoff(busmem, busmem->shm_last) 777 == sc->sc_nextpacket) { 778 shmif_unlockbus(busmem); 779 error = rumpcomp_shmif_watchwait(sc->sc_kq); 780 if (__predict_false(error)) 781 printf("shmif_rcv: wait failed %d\n", error); 782 membar_consumer(); 783 continue; 784 } 785 786 if (stillvalid_p(sc)) { 787 nextpkt = sc->sc_nextpacket; 788 } else { 789 KASSERT(busmem->shm_gen > 0); 790 nextpkt = busmem->shm_first; 791 if (busmem->shm_first > busmem->shm_last) 792 sc->sc_devgen = busmem->shm_gen - 1; 793 else 794 sc->sc_devgen = busmem->shm_gen; 795 DPRINTF(("dev %p overrun, new data: %d/%" PRIu64 "\n", 796 sc, nextpkt, sc->sc_devgen)); 797 } 798 799 /* 800 * If our read pointer is ahead the bus last write, our 801 * generation must be one behind. 802 */ 803 KASSERT(!(nextpkt > busmem->shm_last 804 && sc->sc_devgen == busmem->shm_gen)); 805 806 wrap = false; 807 nextpkt = shmif_busread(busmem, &sp, 808 nextpkt, sizeof(sp), &wrap); 809 KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN); 810 nextpkt = shmif_busread(busmem, mtod(m, void *), 811 nextpkt, sp.sp_len, &wrap); 812 813 DPRINTF(("shmif_rcv: read packet of length %d at %d\n", 814 sp.sp_len, nextpkt)); 815 816 sc->sc_nextpacket = nextpkt; 817 shmif_unlockbus(sc->sc_busmem); 818 819 if (wrap) { 820 sc->sc_devgen++; 821 DPRINTF(("dev %p generation now %" PRIu64 "\n", 822 sc, sc->sc_devgen)); 823 } 824 825 /* 826 * Ignore packets too short to possibly be valid. 827 * This is hit at least for the first frame on a new bus. 828 */ 829 if (__predict_false(sp.sp_len < ETHER_HDR_LEN)) { 830 DPRINTF(("shmif read packet len %d < ETHER_HDR_LEN\n", 831 sp.sp_len)); 832 continue; 833 } 834 835 m->m_len = m->m_pkthdr.len = sp.sp_len; 836 m_set_rcvif(m, ifp); 837 838 /* 839 * Test if we want to pass the packet upwards 840 */ 841 eth = mtod(m, struct ether_header *); 842 /* 843 * Compare with DOWN to allow UNKNOWN (the default value), 844 * which is required by some ATF tests using rump servers 845 * written in C. 846 */ 847 if (ifp->if_link_state == LINK_STATE_DOWN) { 848 passup = false; 849 } else if (sp.sp_sender == sc->sc_uid) { 850 passup = false; 851 } else if (memcmp(eth->ether_dhost, CLLADDR(ifp->if_sadl), 852 ETHER_ADDR_LEN) == 0) { 853 passup = true; 854 } else if (ETHER_IS_MULTICAST(eth->ether_dhost)) { 855 passup = true; 856 } else if (ifp->if_flags & IFF_PROMISC) { 857 m->m_flags |= M_PROMISC; 858 passup = true; 859 } else { 860 passup = false; 861 } 862 863 if (passup) { 864 int bound; 865 866 m = ether_sw_offload_rx(ifp, m); 867 868 KERNEL_LOCK(1, NULL); 869 /* Prevent LWP migrations between CPUs for psref(9) */ 870 bound = curlwp_bind(); 871 if_input(ifp, m); 872 curlwp_bindx(bound); 873 KERNEL_UNLOCK_ONE(NULL); 874 875 m = NULL; 876 } 877 /* else: reuse mbuf for a future packet */ 878 } 879 m_freem(m); 880 m = NULL; 881 882 if (!sc->sc_dying) 883 goto reup; 884 885 kthread_exit(0); 886 } 887