1 /* 2 * Copyright (C) 2013 Universita` di Pisa. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* 27 * This module implements netmap support on top of standard, 28 * unmodified device drivers. 29 * 30 * A NIOCREGIF request is handled here if the device does not 31 * have native support. TX and RX rings are emulated as follows: 32 * 33 * NIOCREGIF 34 * We preallocate a block of TX mbufs (roughly as many as 35 * tx descriptors; the number is not critical) to speed up 36 * operation during transmissions. The refcount on most of 37 * these buffers is artificially bumped up so we can recycle 38 * them more easily. Also, the destructor is intercepted 39 * so we use it as an interrupt notification to wake up 40 * processes blocked on a poll(). 41 * 42 * For each receive ring we allocate one "struct mbq" 43 * (an mbuf tailq plus a spinlock). We intercept packets 44 * (through if_input) 45 * on the receive path and put them in the mbq from which 46 * netmap receive routines can grab them. 47 * 48 * TX: 49 * in the generic_txsync() routine, netmap buffers are copied 50 * (or linked, in a future) to the preallocated mbufs 51 * and pushed to the transmit queue. Some of these mbufs 52 * (those with NS_REPORT, or otherwise every half ring) 53 * have the refcount=1, others have refcount=2. 54 * When the destructor is invoked, we take that as 55 * a notification that all mbufs up to that one in 56 * the specific ring have been completed, and generate 57 * the equivalent of a transmit interrupt. 58 * 59 * RX: 60 * 61 */ 62 63 /* __FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257666 2013-11-05 01:06:22Z luigi $"); */ 64 65 #include <sys/types.h> 66 #include <sys/errno.h> 67 #include <sys/malloc.h> 68 #include <sys/lock.h> /* PROT_EXEC */ 69 #include <sys/rwlock.h> 70 #include <sys/socket.h> /* sockaddrs */ 71 #include <sys/selinfo.h> 72 #include <net/if.h> 73 #include <net/if_var.h> 74 #include <sys/bus.h> /* bus_dmamap_* in netmap_kern.h */ 75 76 // XXX temporary - D() defined here 77 #include <net/netmap.h> 78 79 #include "netmap_kern.h" 80 #include "netmap_mem2.h" 81 82 #define rtnl_lock() D("rtnl_lock called"); 83 #define rtnl_unlock() D("rtnl_lock called"); 84 #define MBUF_TXQ(m) ((m)->m_pkthdr.flowid) 85 #define smp_mb() 86 87 /* 88 * mbuf wrappers 89 */ 90 91 /* 92 * we allocate an EXT_PACKET 93 */ 94 #define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE) 95 96 /* mbuf destructor, also need to change the type to EXT_EXTREF, 97 * add an M_NOFREE flag, and then clear the flag and 98 * chain into uma_zfree(zone_pack, mf) 99 * (or reinstall the buffer ?) 100 */ 101 #define SET_MBUF_DESTRUCTOR(m, fn) do { \ 102 (m)->m_ext.ext_free = (void *)fn; \ 103 (m)->m_ext.ext_type = EXT_EXTREF; \ 104 } while (0) 105 106 107 #define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1) 108 109 /* ======================== usage stats =========================== */ 110 111 #ifdef RATE 112 #define IFRATE(x) x 113 struct rate_stats { 114 unsigned long txpkt; 115 unsigned long txsync; 116 unsigned long txirq; 117 unsigned long rxpkt; 118 unsigned long rxirq; 119 unsigned long rxsync; 120 }; 121 122 struct rate_context { 123 unsigned refcount; 124 struct timer_list timer; 125 struct rate_stats new; 126 struct rate_stats old; 127 }; 128 129 #define RATE_PRINTK(_NAME_) \ 130 printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD); 131 #define RATE_PERIOD 2 132 static void rate_callback(unsigned long arg) 133 { 134 struct rate_context * ctx = (struct rate_context *)arg; 135 struct rate_stats cur = ctx->new; 136 int r; 137 138 RATE_PRINTK(txpkt); 139 RATE_PRINTK(txsync); 140 RATE_PRINTK(txirq); 141 RATE_PRINTK(rxpkt); 142 RATE_PRINTK(rxsync); 143 RATE_PRINTK(rxirq); 144 printk("\n"); 145 146 ctx->old = cur; 147 r = mod_timer(&ctx->timer, jiffies + 148 msecs_to_jiffies(RATE_PERIOD * 1000)); 149 if (unlikely(r)) 150 D("[v1000] Error: mod_timer()"); 151 } 152 153 static struct rate_context rate_ctx; 154 155 #else /* !RATE */ 156 #define IFRATE(x) 157 #endif /* !RATE */ 158 159 160 /* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */ 161 #define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */ 162 163 /* 164 * Wrapper used by the generic adapter layer to notify 165 * the poller threads. Differently from netmap_rx_irq(), we check 166 * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq. 167 */ 168 static int 169 netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done) 170 { 171 if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP))) 172 return 0; 173 174 return netmap_common_irq(ifp, q, work_done); 175 } 176 177 178 /* Enable/disable netmap mode for a generic network interface. */ 179 int generic_netmap_register(struct netmap_adapter *na, int enable) 180 { 181 struct ifnet *ifp = na->ifp; 182 struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 183 struct mbuf *m; 184 int error; 185 int i, r; 186 187 if (!na) 188 return EINVAL; 189 190 #ifdef REG_RESET 191 error = ifp->netdev_ops->ndo_stop(ifp); 192 if (error) { 193 return error; 194 } 195 #endif /* REG_RESET */ 196 197 if (enable) { /* Enable netmap mode. */ 198 /* Initialize the rx queue, as generic_rx_handler() can 199 * be called as soon as netmap_catch_rx() returns. 200 */ 201 for (r=0; r<na->num_rx_rings; r++) { 202 mbq_safe_init(&na->rx_rings[r].rx_queue); 203 na->rx_rings[r].nr_ntc = 0; 204 } 205 206 /* Init the mitigation timer. */ 207 netmap_mitigation_init(gna); 208 209 /* 210 * Preallocate packet buffers for the tx rings. 211 */ 212 for (r=0; r<na->num_tx_rings; r++) { 213 na->tx_rings[r].nr_ntc = 0; 214 na->tx_rings[r].tx_pool = kmalloc(na->num_tx_desc * sizeof(struct mbuf *), 215 M_DEVBUF, M_NOWAIT | M_ZERO); 216 if (!na->tx_rings[r].tx_pool) { 217 D("tx_pool allocation failed"); 218 error = ENOMEM; 219 goto free_tx_pool; 220 } 221 for (i=0; i<na->num_tx_desc; i++) { 222 m = netmap_get_mbuf(GENERIC_BUF_SIZE); 223 if (!m) { 224 D("tx_pool[%d] allocation failed", i); 225 error = ENOMEM; 226 goto free_mbufs; 227 } 228 na->tx_rings[r].tx_pool[i] = m; 229 } 230 } 231 rtnl_lock(); 232 /* Prepare to intercept incoming traffic. */ 233 error = netmap_catch_rx(na, 1); 234 if (error) { 235 D("netdev_rx_handler_register() failed"); 236 goto register_handler; 237 } 238 ifp->if_capenable |= IFCAP_NETMAP; 239 240 /* Make netmap control the packet steering. */ 241 netmap_catch_packet_steering(gna, 1); 242 243 rtnl_unlock(); 244 245 #ifdef RATE 246 if (rate_ctx.refcount == 0) { 247 D("setup_timer()"); 248 memset(&rate_ctx, 0, sizeof(rate_ctx)); 249 setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); 250 if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { 251 D("Error: mod_timer()"); 252 } 253 } 254 rate_ctx.refcount++; 255 #endif /* RATE */ 256 257 } else { /* Disable netmap mode. */ 258 rtnl_lock(); 259 260 ifp->if_capenable &= ~IFCAP_NETMAP; 261 262 /* Release packet steering control. */ 263 netmap_catch_packet_steering(gna, 0); 264 265 /* Do not intercept packets on the rx path. */ 266 netmap_catch_rx(na, 0); 267 268 rtnl_unlock(); 269 270 /* Free the mbufs going to the netmap rings */ 271 for (r=0; r<na->num_rx_rings; r++) { 272 mbq_safe_purge(&na->rx_rings[r].rx_queue); 273 mbq_safe_destroy(&na->rx_rings[r].rx_queue); 274 } 275 276 netmap_mitigation_cleanup(gna); 277 278 for (r=0; r<na->num_tx_rings; r++) { 279 for (i=0; i<na->num_tx_desc; i++) { 280 m_freem(na->tx_rings[r].tx_pool[i]); 281 } 282 kfree(na->tx_rings[r].tx_pool, M_DEVBUF); 283 } 284 285 #ifdef RATE 286 if (--rate_ctx.refcount == 0) { 287 D("del_timer()"); 288 del_timer(&rate_ctx.timer); 289 } 290 #endif 291 } 292 293 #ifdef REG_RESET 294 error = ifp->netdev_ops->ndo_open(ifp); 295 if (error) { 296 goto alloc_tx_pool; 297 } 298 #endif 299 300 return 0; 301 302 register_handler: 303 rtnl_unlock(); 304 free_tx_pool: 305 r--; 306 i = na->num_tx_desc; /* Useless, but just to stay safe. */ 307 free_mbufs: 308 i--; 309 for (; r>=0; r--) { 310 for (; i>=0; i--) { 311 m_freem(na->tx_rings[r].tx_pool[i]); 312 } 313 kfree(na->tx_rings[r].tx_pool, M_DEVBUF); 314 i = na->num_tx_desc - 1; 315 } 316 317 return error; 318 } 319 320 /* 321 * Callback invoked when the device driver frees an mbuf used 322 * by netmap to transmit a packet. This usually happens when 323 * the NIC notifies the driver that transmission is completed. 324 */ 325 static void 326 generic_mbuf_destructor(struct mbuf *m) 327 { 328 if (netmap_verbose) 329 D("Tx irq (%p) queue %d", m, MBUF_TXQ(m)); 330 netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL); 331 m->m_ext.ext_type = EXT_PACKET; 332 m->m_ext.ext_free = NULL; 333 if (*(m->m_ext.ref_cnt) == 0) 334 *(m->m_ext.ref_cnt) = 1; 335 uma_zfree(zone_pack, m); 336 IFRATE(rate_ctx.new.txirq++); 337 } 338 339 /* Record completed transmissions and update hwavail. 340 * 341 * nr_ntc is the oldest tx buffer not yet completed 342 * (same as nr_hwavail + nr_hwcur + 1), 343 * nr_hwcur is the first unsent buffer. 344 * When cleaning, we try to recover buffers between nr_ntc and nr_hwcur. 345 */ 346 static int 347 generic_netmap_tx_clean(struct netmap_kring *kring) 348 { 349 u_int num_slots = kring->nkr_num_slots; 350 u_int ntc = kring->nr_ntc; 351 u_int hwcur = kring->nr_hwcur; 352 u_int n = 0; 353 struct mbuf **tx_pool = kring->tx_pool; 354 355 while (ntc != hwcur) { /* buffers not completed */ 356 struct mbuf *m = tx_pool[ntc]; 357 358 if (unlikely(m == NULL)) { 359 /* try to replenish the entry */ 360 tx_pool[ntc] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); 361 if (unlikely(m == NULL)) { 362 D("mbuf allocation failed, XXX error"); 363 // XXX how do we proceed ? break ? 364 return -ENOMEM; 365 } 366 } else if (GET_MBUF_REFCNT(m) != 1) { 367 break; /* This mbuf is still busy: its refcnt is 2. */ 368 } 369 if (unlikely(++ntc == num_slots)) { 370 ntc = 0; 371 } 372 n++; 373 } 374 kring->nr_ntc = ntc; 375 kring->nr_hwavail += n; 376 ND("tx completed [%d] -> hwavail %d", n, kring->nr_hwavail); 377 378 return n; 379 } 380 381 382 /* 383 * We have pending packets in the driver between nr_ntc and j. 384 * Compute a position in the middle, to be used to generate 385 * a notification. 386 */ 387 static inline u_int 388 generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur) 389 { 390 u_int n = kring->nkr_num_slots; 391 u_int ntc = kring->nr_ntc; 392 u_int e; 393 394 if (hwcur >= ntc) { 395 e = (hwcur + ntc) / 2; 396 } else { /* wrap around */ 397 e = (hwcur + n + ntc) / 2; 398 if (e >= n) { 399 e -= n; 400 } 401 } 402 403 if (unlikely(e >= n)) { 404 D("This cannot happen"); 405 e = 0; 406 } 407 408 return e; 409 } 410 411 /* 412 * We have pending packets in the driver between nr_ntc and hwcur. 413 * Schedule a notification approximately in the middle of the two. 414 * There is a race but this is only called within txsync which does 415 * a double check. 416 */ 417 static void 418 generic_set_tx_event(struct netmap_kring *kring, u_int hwcur) 419 { 420 struct mbuf *m; 421 u_int e; 422 423 if (kring->nr_ntc == hwcur) { 424 return; 425 } 426 e = generic_tx_event_middle(kring, hwcur); 427 428 m = kring->tx_pool[e]; 429 if (m == NULL) { 430 /* This can happen if there is already an event on the netmap 431 slot 'e': There is nothing to do. */ 432 return; 433 } 434 ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m)); 435 kring->tx_pool[e] = NULL; 436 SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor); 437 438 // XXX wmb() ? 439 /* Decrement the refcount an free it if we have the last one. */ 440 m_freem(m); 441 smp_mb(); 442 } 443 444 445 /* 446 * generic_netmap_txsync() transforms netmap buffers into mbufs 447 * and passes them to the standard device driver 448 * (ndo_start_xmit() or ifp->if_transmit() ). 449 * On linux this is not done directly, but using dev_queue_xmit(), 450 * since it implements the TX flow control (and takes some locks). 451 */ 452 static int 453 generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags) 454 { 455 struct ifnet *ifp = na->ifp; 456 struct netmap_kring *kring = &na->tx_rings[ring_nr]; 457 struct netmap_ring *ring = kring->ring; 458 u_int j, k, num_slots = kring->nkr_num_slots; 459 int new_slots, ntx; 460 461 IFRATE(rate_ctx.new.txsync++); 462 463 // TODO: handle the case of mbuf allocation failure 464 /* first, reclaim completed buffers */ 465 generic_netmap_tx_clean(kring); 466 467 /* Take a copy of ring->cur now, and never read it again. */ 468 k = ring->cur; 469 if (unlikely(k >= num_slots)) { 470 return netmap_ring_reinit(kring); 471 } 472 473 rmb(); 474 j = kring->nr_hwcur; 475 /* 476 * 'new_slots' counts how many new slots have been added: 477 * everything from hwcur to cur, excluding reserved ones, if any. 478 * nr_hwreserved start from hwcur and counts how many slots were 479 * not sent to the NIC from the previous round. 480 */ 481 new_slots = k - j - kring->nr_hwreserved; 482 if (new_slots < 0) { 483 new_slots += num_slots; 484 } 485 ntx = 0; 486 if (j != k) { 487 /* Process new packets to send: 488 * j is the current index in the netmap ring. 489 */ 490 while (j != k) { 491 struct netmap_slot *slot = &ring->slot[j]; /* Current slot in the netmap ring */ 492 void *addr = NMB(slot); 493 u_int len = slot->len; 494 struct mbuf *m; 495 int tx_ret; 496 497 if (unlikely(addr == netmap_buffer_base || len > NETMAP_BUF_SIZE)) { 498 return netmap_ring_reinit(kring); 499 } 500 /* Tale a mbuf from the tx pool and copy in the user packet. */ 501 m = kring->tx_pool[j]; 502 if (unlikely(!m)) { 503 RD(5, "This should never happen"); 504 kring->tx_pool[j] = m = netmap_get_mbuf(GENERIC_BUF_SIZE); 505 if (unlikely(m == NULL)) { 506 D("mbuf allocation failed"); 507 break; 508 } 509 } 510 /* XXX we should ask notifications when NS_REPORT is set, 511 * or roughly every half frame. We can optimize this 512 * by lazily requesting notifications only when a 513 * transmission fails. Probably the best way is to 514 * break on failures and set notifications when 515 * ring->avail == 0 || j != k 516 */ 517 tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr); 518 if (unlikely(tx_ret)) { 519 RD(5, "start_xmit failed: err %d [%u,%u,%u,%u]", 520 tx_ret, kring->nr_ntc, j, k, kring->nr_hwavail); 521 /* 522 * No room for this mbuf in the device driver. 523 * Request a notification FOR A PREVIOUS MBUF, 524 * then call generic_netmap_tx_clean(kring) to do the 525 * double check and see if we can free more buffers. 526 * If there is space continue, else break; 527 * NOTE: the double check is necessary if the problem 528 * occurs in the txsync call after selrecord(). 529 * Also, we need some way to tell the caller that not 530 * all buffers were queued onto the device (this was 531 * not a problem with native netmap driver where space 532 * is preallocated). The bridge has a similar problem 533 * and we solve it there by dropping the excess packets. 534 */ 535 generic_set_tx_event(kring, j); 536 if (generic_netmap_tx_clean(kring)) { /* space now available */ 537 continue; 538 } else { 539 break; 540 } 541 } 542 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); 543 if (unlikely(++j == num_slots)) 544 j = 0; 545 ntx++; 546 } 547 548 /* Update hwcur to the next slot to transmit. */ 549 kring->nr_hwcur = j; 550 551 /* 552 * Report all new slots as unavailable, even those not sent. 553 * We account for them with with hwreserved, so that 554 * nr_hwreserved =:= cur - nr_hwcur 555 */ 556 kring->nr_hwavail -= new_slots; 557 kring->nr_hwreserved = k - j; 558 if (kring->nr_hwreserved < 0) { 559 kring->nr_hwreserved += num_slots; 560 } 561 562 IFRATE(rate_ctx.new.txpkt += ntx); 563 564 if (!kring->nr_hwavail) { 565 /* No more available slots? Set a notification event 566 * on a netmap slot that will be cleaned in the future. 567 * No doublecheck is performed, since txsync() will be 568 * called twice by netmap_poll(). 569 */ 570 generic_set_tx_event(kring, j); 571 } 572 ND("tx #%d, hwavail = %d", n, kring->nr_hwavail); 573 } 574 575 /* Synchronize the user's view to the kernel view. */ 576 ring->avail = kring->nr_hwavail; 577 ring->reserved = kring->nr_hwreserved; 578 579 return 0; 580 } 581 582 /* 583 * This handler is registered (through netmap_catch_rx()) 584 * within the attached network interface 585 * in the RX subsystem, so that every mbuf passed up by 586 * the driver can be stolen to the network stack. 587 * Stolen packets are put in a queue where the 588 * generic_netmap_rxsync() callback can extract them. 589 */ 590 void generic_rx_handler(struct ifnet *ifp, struct mbuf *m) 591 { 592 struct netmap_adapter *na = NA(ifp); 593 struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na; 594 u_int work_done; 595 u_int rr = 0; // receive ring number 596 597 ND("called"); 598 /* limit the size of the queue */ 599 if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) { 600 m_freem(m); 601 } else { 602 mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m); 603 } 604 605 if (netmap_generic_mit < 32768) { 606 /* no rx mitigation, pass notification up */ 607 netmap_generic_irq(na->ifp, rr, &work_done); 608 IFRATE(rate_ctx.new.rxirq++); 609 } else { 610 /* same as send combining, filter notification if there is a 611 * pending timer, otherwise pass it up and start a timer. 612 */ 613 if (likely(netmap_mitigation_active(gna))) { 614 /* Record that there is some pending work. */ 615 gna->mit_pending = 1; 616 } else { 617 netmap_generic_irq(na->ifp, rr, &work_done); 618 IFRATE(rate_ctx.new.rxirq++); 619 netmap_mitigation_start(gna); 620 } 621 } 622 } 623 624 /* 625 * generic_netmap_rxsync() extracts mbufs from the queue filled by 626 * generic_netmap_rx_handler() and puts their content in the netmap 627 * receive ring. 628 * Access must be protected because the rx handler is asynchronous, 629 */ 630 static int 631 generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags) 632 { 633 struct netmap_kring *kring = &na->rx_rings[ring_nr]; 634 struct netmap_ring *ring = kring->ring; 635 u_int j, n, lim = kring->nkr_num_slots - 1; 636 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 637 u_int k, resvd = ring->reserved; 638 639 if (ring->cur > lim) 640 return netmap_ring_reinit(kring); 641 642 /* Import newly received packets into the netmap ring. */ 643 if (netmap_no_pendintr || force_update) { 644 uint16_t slot_flags = kring->nkr_slot_flags; 645 struct mbuf *m; 646 647 n = 0; 648 j = kring->nr_ntc; /* first empty slot in the receive ring */ 649 /* extract buffers from the rx queue, stop at most one 650 * slot before nr_hwcur (index k) 651 */ 652 k = (kring->nr_hwcur) ? kring->nr_hwcur-1 : lim; 653 while (j != k) { 654 int len; 655 void *addr = NMB(&ring->slot[j]); 656 657 if (addr == netmap_buffer_base) { /* Bad buffer */ 658 return netmap_ring_reinit(kring); 659 } 660 /* 661 * Call the locked version of the function. 662 * XXX Ideally we could grab a batch of mbufs at once, 663 * by changing rx_queue into a ring. 664 */ 665 m = mbq_safe_dequeue(&kring->rx_queue); 666 if (!m) 667 break; 668 len = MBUF_LEN(m); 669 m_copydata(m, 0, len, addr); 670 ring->slot[j].len = len; 671 ring->slot[j].flags = slot_flags; 672 m_freem(m); 673 if (unlikely(j++ == lim)) 674 j = 0; 675 n++; 676 } 677 if (n) { 678 kring->nr_ntc = j; 679 kring->nr_hwavail += n; 680 IFRATE(rate_ctx.new.rxpkt += n); 681 } 682 kring->nr_kflags &= ~NKR_PENDINTR; 683 } 684 685 // XXX should we invert the order ? 686 /* Skip past packets that userspace has released */ 687 j = kring->nr_hwcur; 688 k = ring->cur; 689 if (resvd > 0) { 690 if (resvd + ring->avail >= lim + 1) { 691 D("XXX invalid reserve/avail %d %d", resvd, ring->avail); 692 ring->reserved = resvd = 0; // XXX panic... 693 } 694 k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd; 695 } 696 if (j != k) { 697 /* Userspace has released some packets. */ 698 for (n = 0; j != k; n++) { 699 struct netmap_slot *slot = &ring->slot[j]; 700 701 slot->flags &= ~NS_BUF_CHANGED; 702 if (unlikely(j++ == lim)) 703 j = 0; 704 } 705 kring->nr_hwavail -= n; 706 kring->nr_hwcur = k; 707 } 708 /* Tell userspace that there are new packets. */ 709 ring->avail = kring->nr_hwavail - resvd; 710 IFRATE(rate_ctx.new.rxsync++); 711 712 return 0; 713 } 714 715 static void 716 generic_netmap_dtor(struct netmap_adapter *na) 717 { 718 struct ifnet *ifp = na->ifp; 719 struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na; 720 struct netmap_adapter *prev_na = gna->prev; 721 722 if (prev_na != NULL) { 723 D("Released generic NA %p", gna); 724 if_rele(na->ifp); 725 netmap_adapter_put(prev_na); 726 } 727 if (ifp != NULL) { 728 WNA(ifp) = prev_na; 729 D("Restored native NA %p", prev_na); 730 na->ifp = NULL; 731 } 732 } 733 734 /* 735 * generic_netmap_attach() makes it possible to use netmap on 736 * a device without native netmap support. 737 * This is less performant than native support but potentially 738 * faster than raw sockets or similar schemes. 739 * 740 * In this "emulated" mode, netmap rings do not necessarily 741 * have the same size as those in the NIC. We use a default 742 * value and possibly override it if the OS has ways to fetch the 743 * actual configuration. 744 */ 745 int 746 generic_netmap_attach(struct ifnet *ifp) 747 { 748 struct netmap_adapter *na; 749 struct netmap_generic_adapter *gna; 750 int retval; 751 u_int num_tx_desc, num_rx_desc; 752 753 num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ 754 755 generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); 756 ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); 757 758 gna = kmalloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO); 759 if (gna == NULL) { 760 D("no memory on attach, give up"); 761 return ENOMEM; 762 } 763 na = (struct netmap_adapter *)gna; 764 na->ifp = ifp; 765 na->num_tx_desc = num_tx_desc; 766 na->num_rx_desc = num_rx_desc; 767 na->nm_register = &generic_netmap_register; 768 na->nm_txsync = &generic_netmap_txsync; 769 na->nm_rxsync = &generic_netmap_rxsync; 770 na->nm_dtor = &generic_netmap_dtor; 771 /* when using generic, IFCAP_NETMAP is set so we force 772 * NAF_SKIP_INTR to use the regular interrupt handler 773 */ 774 na->na_flags = NAF_SKIP_INTR; 775 776 ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", 777 ifp->num_tx_queues, ifp->real_num_tx_queues, 778 ifp->tx_queue_len); 779 ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", 780 ifp->num_rx_queues, ifp->real_num_rx_queues); 781 782 generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); 783 784 retval = netmap_attach_common(na); 785 if (retval) { 786 kfree(gna, M_DEVBUF); 787 } 788 789 return retval; 790 } 791