1 /****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ******************************************************************************/ 33 /* 34 * Copyright (c) 2011 The NetBSD Foundation, Inc. 35 * All rights reserved. 36 * 37 * This code is derived from software contributed to The NetBSD Foundation 38 * by Coyote Point Systems, Inc. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 59 * POSSIBILITY OF SUCH DAMAGE. 60 */ 61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/ 62 /*$NetBSD: ix_txrx.c,v 1.20 2017/02/13 10:13:54 msaitoh Exp $*/ 63 64 #include "opt_inet.h" 65 #include "opt_inet6.h" 66 67 #include "ixgbe.h" 68 69 #ifdef DEV_NETMAP 70 #include <net/netmap.h> 71 #include <sys/selinfo.h> 72 #include <dev/netmap/netmap_kern.h> 73 74 extern int ix_crcstrip; 75 #endif 76 77 /* 78 ** HW RSC control: 79 ** this feature only works with 80 ** IPv4, and only on 82599 and later. 81 ** Also this will cause IP forwarding to 82 ** fail and that can't be controlled by 83 ** the stack as LRO can. For all these 84 ** reasons I've deemed it best to leave 85 ** this off and not bother with a tuneable 86 ** interface, this would need to be compiled 87 ** to enable. 88 */ 89 static bool ixgbe_rsc_enable = FALSE; 90 91 #ifdef IXGBE_FDIR 92 /* 93 ** For Flow Director: this is the 94 ** number of TX packets we sample 95 ** for the filter pool, this means 96 ** every 20th packet will be probed. 97 ** 98 ** This feature can be disabled by 99 ** setting this to 0. 100 */ 101 static int atr_sample_rate = 20; 102 #endif 103 104 /********************************************************************* 105 * Local Function prototypes 106 *********************************************************************/ 107 static void ixgbe_setup_transmit_ring(struct tx_ring *); 108 static void ixgbe_free_transmit_buffers(struct tx_ring *); 109 static int ixgbe_setup_receive_ring(struct rx_ring *); 110 static void ixgbe_free_receive_buffers(struct rx_ring *); 111 112 static void ixgbe_rx_checksum(u32, struct mbuf *, u32, 113 struct ixgbe_hw_stats *); 114 static void ixgbe_refresh_mbufs(struct rx_ring *, int); 115 static int ixgbe_xmit(struct tx_ring *, struct mbuf *); 116 static int ixgbe_tx_ctx_setup(struct tx_ring *, 117 struct mbuf *, u32 *, u32 *); 118 static int ixgbe_tso_setup(struct tx_ring *, 119 struct mbuf *, u32 *, u32 *); 120 #ifdef IXGBE_FDIR 121 static void ixgbe_atr(struct tx_ring *, struct mbuf *); 122 #endif 123 static __inline void ixgbe_rx_discard(struct rx_ring *, int); 124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, 125 struct mbuf *, u32); 126 127 static void ixgbe_setup_hw_rsc(struct rx_ring *); 128 129 /********************************************************************* 130 * Transmit entry point 131 * 132 * ixgbe_start is called by the stack to initiate a transmit. 133 * The driver will remain in this routine as long as there are 134 * packets to transmit and transmit resources are available. 135 * In case resources are not available stack is notified and 136 * the packet is requeued. 137 **********************************************************************/ 138 139 void 140 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp) 141 { 142 int rc; 143 struct mbuf *m_head; 144 struct adapter *adapter = txr->adapter; 145 146 IXGBE_TX_LOCK_ASSERT(txr); 147 148 if ((ifp->if_flags & IFF_RUNNING) == 0) 149 return; 150 if (!adapter->link_active) 151 return; 152 153 while (!IFQ_IS_EMPTY(&ifp->if_snd)) { 154 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) 155 break; 156 157 IFQ_POLL(&ifp->if_snd, m_head); 158 if (m_head == NULL) 159 break; 160 161 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) { 162 break; 163 } 164 IFQ_DEQUEUE(&ifp->if_snd, m_head); 165 if (rc == EFBIG) { 166 struct mbuf *mtmp; 167 168 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) { 169 m_head = mtmp; 170 rc = ixgbe_xmit(txr, m_head); 171 if (rc != 0) 172 adapter->efbig2_tx_dma_setup.ev_count++; 173 } else 174 adapter->mbuf_defrag_failed.ev_count++; 175 } 176 if (rc != 0) { 177 m_freem(m_head); 178 continue; 179 } 180 181 /* Send a copy of the frame to the BPF listener */ 182 bpf_mtap(ifp, m_head); 183 } 184 return; 185 } 186 187 /* 188 * Legacy TX start - called by the stack, this 189 * always uses the first tx ring, and should 190 * not be used with multiqueue tx enabled. 191 */ 192 void 193 ixgbe_start(struct ifnet *ifp) 194 { 195 struct adapter *adapter = ifp->if_softc; 196 struct tx_ring *txr = adapter->tx_rings; 197 198 if (ifp->if_flags & IFF_RUNNING) { 199 IXGBE_TX_LOCK(txr); 200 ixgbe_start_locked(txr, ifp); 201 IXGBE_TX_UNLOCK(txr); 202 } 203 return; 204 } 205 206 #ifndef IXGBE_LEGACY_TX 207 208 /* 209 ** Multiqueue Transmit Entry Point 210 ** (if_transmit function) 211 */ 212 int 213 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) 214 { 215 struct adapter *adapter = ifp->if_softc; 216 struct tx_ring *txr; 217 int i, err = 0; 218 #ifdef RSS 219 uint32_t bucket_id; 220 #endif 221 222 /* 223 * When doing RSS, map it to the same outbound queue 224 * as the incoming flow would be mapped to. 225 * 226 * If everything is setup correctly, it should be the 227 * same bucket that the current CPU we're on is. 228 */ 229 #if 0 230 #if __FreeBSD_version < 1100054 231 if (m->m_flags & M_FLOWID) { 232 #else 233 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 234 #endif 235 #ifdef RSS 236 if (rss_hash2bucket(m->m_pkthdr.flowid, 237 M_HASHTYPE_GET(m), &bucket_id) == 0) { 238 /* TODO: spit out something if bucket_id > num_queues? */ 239 i = bucket_id % adapter->num_queues; 240 #ifdef IXGBE_DEBUG 241 if (bucket_id > adapter->num_queues) 242 if_printf(ifp, "bucket_id (%d) > num_queues " 243 "(%d)\n", bucket_id, adapter->num_queues); 244 #endif 245 } else 246 #endif /* RSS */ 247 i = m->m_pkthdr.flowid % adapter->num_queues; 248 } else 249 #endif 250 i = cpu_index(curcpu()) % adapter->num_queues; 251 252 /* Check for a hung queue and pick alternative */ 253 if (((1 << i) & adapter->active_queues) == 0) 254 i = ffs64(adapter->active_queues); 255 256 txr = &adapter->tx_rings[i]; 257 258 err = pcq_put(txr->txr_interq, m); 259 if (err == false) { 260 m_freem(m); 261 txr->pcq_drops.ev_count++; 262 return (err); 263 } 264 if (IXGBE_TX_TRYLOCK(txr)) { 265 ixgbe_mq_start_locked(ifp, txr); 266 IXGBE_TX_UNLOCK(txr); 267 } else 268 softint_schedule(txr->txr_si); 269 270 return (0); 271 } 272 273 int 274 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 275 { 276 struct adapter *adapter = txr->adapter; 277 struct mbuf *next; 278 int enqueued = 0, err = 0; 279 280 if (((ifp->if_flags & IFF_RUNNING) == 0) || 281 adapter->link_active == 0) 282 return (ENETDOWN); 283 284 /* Process the queue */ 285 while ((next = pcq_get(txr->txr_interq)) != NULL) { 286 if ((err = ixgbe_xmit(txr, next)) != 0) { 287 m_freem(next); 288 /* All errors are counted in ixgbe_xmit() */ 289 break; 290 } 291 enqueued++; 292 #if 0 // this is VF-only 293 #if __FreeBSD_version >= 1100036 294 /* 295 * Since we're looking at the tx ring, we can check 296 * to see if we're a VF by examing our tail register 297 * address. 298 */ 299 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST) 300 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 301 #endif 302 #endif /* 0 */ 303 /* Send a copy of the frame to the BPF listener */ 304 bpf_mtap(ifp, next); 305 if ((ifp->if_flags & IFF_RUNNING) == 0) 306 break; 307 } 308 309 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD) 310 ixgbe_txeof(txr); 311 312 return (err); 313 } 314 315 /* 316 * Called from a taskqueue to drain queued transmit packets. 317 */ 318 void 319 ixgbe_deferred_mq_start(void *arg) 320 { 321 struct tx_ring *txr = arg; 322 struct adapter *adapter = txr->adapter; 323 struct ifnet *ifp = adapter->ifp; 324 325 IXGBE_TX_LOCK(txr); 326 if (pcq_peek(txr->txr_interq) != NULL) 327 ixgbe_mq_start_locked(ifp, txr); 328 IXGBE_TX_UNLOCK(txr); 329 } 330 331 #endif /* IXGBE_LEGACY_TX */ 332 333 334 /********************************************************************* 335 * 336 * This routine maps the mbufs to tx descriptors, allowing the 337 * TX engine to transmit the packets. 338 * - return 0 on success, positive on failure 339 * 340 **********************************************************************/ 341 342 static int 343 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head) 344 { 345 struct m_tag *mtag; 346 struct adapter *adapter = txr->adapter; 347 struct ethercom *ec = &adapter->osdep.ec; 348 u32 olinfo_status = 0, cmd_type_len; 349 int i, j, error; 350 int first; 351 bus_dmamap_t map; 352 struct ixgbe_tx_buf *txbuf; 353 union ixgbe_adv_tx_desc *txd = NULL; 354 355 /* Basic descriptor defines */ 356 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 357 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 358 359 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL) 360 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 361 362 /* 363 * Important to capture the first descriptor 364 * used because it will contain the index of 365 * the one we tell the hardware to report back 366 */ 367 first = txr->next_avail_desc; 368 txbuf = &txr->tx_buffers[first]; 369 map = txbuf->map; 370 371 /* 372 * Map the packet for DMA. 373 */ 374 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, 375 m_head, BUS_DMA_NOWAIT); 376 377 if (__predict_false(error)) { 378 379 switch (error) { 380 case EAGAIN: 381 adapter->eagain_tx_dma_setup.ev_count++; 382 return EAGAIN; 383 case ENOMEM: 384 adapter->enomem_tx_dma_setup.ev_count++; 385 return EAGAIN; 386 case EFBIG: 387 /* 388 * XXX Try it again? 389 * do m_defrag() and retry bus_dmamap_load_mbuf(). 390 */ 391 adapter->efbig_tx_dma_setup.ev_count++; 392 return error; 393 case EINVAL: 394 adapter->einval_tx_dma_setup.ev_count++; 395 return error; 396 default: 397 adapter->other_tx_dma_setup.ev_count++; 398 return error; 399 } 400 } 401 402 /* Make certain there are enough descriptors */ 403 if (txr->tx_avail < (map->dm_nsegs + 2)) { 404 txr->no_desc_avail.ev_count++; 405 ixgbe_dmamap_unload(txr->txtag, txbuf->map); 406 return EAGAIN; 407 } 408 409 /* 410 * Set up the appropriate offload context 411 * this will consume the first descriptor 412 */ 413 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 414 if (__predict_false(error)) { 415 return (error); 416 } 417 418 #ifdef IXGBE_FDIR 419 /* Do the flow director magic */ 420 if ((txr->atr_sample) && (!adapter->fdir_reinit)) { 421 ++txr->atr_count; 422 if (txr->atr_count >= atr_sample_rate) { 423 ixgbe_atr(txr, m_head); 424 txr->atr_count = 0; 425 } 426 } 427 #endif 428 429 olinfo_status |= IXGBE_ADVTXD_CC; 430 i = txr->next_avail_desc; 431 for (j = 0; j < map->dm_nsegs; j++) { 432 bus_size_t seglen; 433 bus_addr_t segaddr; 434 435 txbuf = &txr->tx_buffers[i]; 436 txd = &txr->tx_base[i]; 437 seglen = map->dm_segs[j].ds_len; 438 segaddr = htole64(map->dm_segs[j].ds_addr); 439 440 txd->read.buffer_addr = segaddr; 441 txd->read.cmd_type_len = htole32(txr->txd_cmd | 442 cmd_type_len |seglen); 443 txd->read.olinfo_status = htole32(olinfo_status); 444 445 if (++i == txr->num_desc) 446 i = 0; 447 } 448 449 txd->read.cmd_type_len |= 450 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS); 451 txr->tx_avail -= map->dm_nsegs; 452 txr->next_avail_desc = i; 453 454 txbuf->m_head = m_head; 455 /* 456 * Here we swap the map so the last descriptor, 457 * which gets the completion interrupt has the 458 * real map, and the first descriptor gets the 459 * unused map from this descriptor. 460 */ 461 txr->tx_buffers[first].map = txbuf->map; 462 txbuf->map = map; 463 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len, 464 BUS_DMASYNC_PREWRITE); 465 466 /* Set the EOP descriptor that will be marked done */ 467 txbuf = &txr->tx_buffers[first]; 468 txbuf->eop = txd; 469 470 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 471 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 472 /* 473 * Advance the Transmit Descriptor Tail (Tdt), this tells the 474 * hardware that this frame is available to transmit. 475 */ 476 ++txr->total_packets.ev_count; 477 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i); 478 479 /* Mark queue as having work */ 480 if (txr->busy == 0) 481 txr->busy = 1; 482 483 return 0; 484 } 485 486 487 /********************************************************************* 488 * 489 * Allocate memory for tx_buffer structures. The tx_buffer stores all 490 * the information needed to transmit a packet on the wire. This is 491 * called only once at attach, setup is done every reset. 492 * 493 **********************************************************************/ 494 int 495 ixgbe_allocate_transmit_buffers(struct tx_ring *txr) 496 { 497 struct adapter *adapter = txr->adapter; 498 device_t dev = adapter->dev; 499 struct ixgbe_tx_buf *txbuf; 500 int error, i; 501 502 /* 503 * Setup DMA descriptor areas. 504 */ 505 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */ 506 1, 0, /* alignment, bounds */ 507 IXGBE_TSO_SIZE, /* maxsize */ 508 adapter->num_segs, /* nsegments */ 509 PAGE_SIZE, /* maxsegsize */ 510 0, /* flags */ 511 &txr->txtag))) { 512 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n"); 513 goto fail; 514 } 515 516 if (!(txr->tx_buffers = 517 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) * 518 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 519 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n"); 520 error = ENOMEM; 521 goto fail; 522 } 523 524 /* Create the descriptor buffer dma maps */ 525 txbuf = txr->tx_buffers; 526 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 527 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map); 528 if (error != 0) { 529 aprint_error_dev(dev, 530 "Unable to create TX DMA map (%d)\n", error); 531 goto fail; 532 } 533 } 534 535 return 0; 536 fail: 537 /* We free all, it handles case where we are in the middle */ 538 #if 0 /* XXX was FreeBSD */ 539 ixgbe_free_transmit_structures(adapter); 540 #else 541 ixgbe_free_transmit_buffers(txr); 542 #endif 543 return (error); 544 } 545 546 /********************************************************************* 547 * 548 * Initialize a transmit ring. 549 * 550 **********************************************************************/ 551 static void 552 ixgbe_setup_transmit_ring(struct tx_ring *txr) 553 { 554 struct adapter *adapter = txr->adapter; 555 struct ixgbe_tx_buf *txbuf; 556 #ifdef DEV_NETMAP 557 struct netmap_adapter *na = NA(adapter->ifp); 558 struct netmap_slot *slot; 559 #endif /* DEV_NETMAP */ 560 561 /* Clear the old ring contents */ 562 IXGBE_TX_LOCK(txr); 563 #ifdef DEV_NETMAP 564 /* 565 * (under lock): if in netmap mode, do some consistency 566 * checks and set slot to entry 0 of the netmap ring. 567 */ 568 slot = netmap_reset(na, NR_TX, txr->me, 0); 569 #endif /* DEV_NETMAP */ 570 bzero((void *)txr->tx_base, 571 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); 572 /* Reset indices */ 573 txr->next_avail_desc = 0; 574 txr->next_to_clean = 0; 575 576 /* Free any existing tx buffers. */ 577 txbuf = txr->tx_buffers; 578 for (int i = 0; i < txr->num_desc; i++, txbuf++) { 579 if (txbuf->m_head != NULL) { 580 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map, 581 0, txbuf->m_head->m_pkthdr.len, 582 BUS_DMASYNC_POSTWRITE); 583 ixgbe_dmamap_unload(txr->txtag, txbuf->map); 584 m_freem(txbuf->m_head); 585 txbuf->m_head = NULL; 586 } 587 #ifdef DEV_NETMAP 588 /* 589 * In netmap mode, set the map for the packet buffer. 590 * NOTE: Some drivers (not this one) also need to set 591 * the physical buffer address in the NIC ring. 592 * Slots in the netmap ring (indexed by "si") are 593 * kring->nkr_hwofs positions "ahead" wrt the 594 * corresponding slot in the NIC ring. In some drivers 595 * (not here) nkr_hwofs can be negative. Function 596 * netmap_idx_n2k() handles wraparounds properly. 597 */ 598 if (slot) { 599 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 600 netmap_load_map(na, txr->txtag, 601 txbuf->map, NMB(na, slot + si)); 602 } 603 #endif /* DEV_NETMAP */ 604 /* Clear the EOP descriptor pointer */ 605 txbuf->eop = NULL; 606 } 607 608 #ifdef IXGBE_FDIR 609 /* Set the rate at which we sample packets */ 610 if (adapter->hw.mac.type != ixgbe_mac_82598EB) 611 txr->atr_sample = atr_sample_rate; 612 #endif 613 614 /* Set number of descriptors available */ 615 txr->tx_avail = adapter->num_tx_desc; 616 617 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 618 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 619 IXGBE_TX_UNLOCK(txr); 620 } 621 622 /********************************************************************* 623 * 624 * Initialize all transmit rings. 625 * 626 **********************************************************************/ 627 int 628 ixgbe_setup_transmit_structures(struct adapter *adapter) 629 { 630 struct tx_ring *txr = adapter->tx_rings; 631 632 for (int i = 0; i < adapter->num_queues; i++, txr++) 633 ixgbe_setup_transmit_ring(txr); 634 635 return (0); 636 } 637 638 /********************************************************************* 639 * 640 * Free all transmit rings. 641 * 642 **********************************************************************/ 643 void 644 ixgbe_free_transmit_structures(struct adapter *adapter) 645 { 646 struct tx_ring *txr = adapter->tx_rings; 647 648 for (int i = 0; i < adapter->num_queues; i++, txr++) { 649 ixgbe_free_transmit_buffers(txr); 650 ixgbe_dma_free(adapter, &txr->txdma); 651 IXGBE_TX_LOCK_DESTROY(txr); 652 } 653 free(adapter->tx_rings, M_DEVBUF); 654 } 655 656 /********************************************************************* 657 * 658 * Free transmit ring related data structures. 659 * 660 **********************************************************************/ 661 static void 662 ixgbe_free_transmit_buffers(struct tx_ring *txr) 663 { 664 struct adapter *adapter = txr->adapter; 665 struct ixgbe_tx_buf *tx_buffer; 666 int i; 667 668 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin"); 669 670 if (txr->tx_buffers == NULL) 671 return; 672 673 tx_buffer = txr->tx_buffers; 674 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 675 if (tx_buffer->m_head != NULL) { 676 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map, 677 0, tx_buffer->m_head->m_pkthdr.len, 678 BUS_DMASYNC_POSTWRITE); 679 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map); 680 m_freem(tx_buffer->m_head); 681 tx_buffer->m_head = NULL; 682 if (tx_buffer->map != NULL) { 683 ixgbe_dmamap_destroy(txr->txtag, 684 tx_buffer->map); 685 tx_buffer->map = NULL; 686 } 687 } else if (tx_buffer->map != NULL) { 688 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map); 689 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map); 690 tx_buffer->map = NULL; 691 } 692 } 693 #ifndef IXGBE_LEGACY_TX 694 if (txr->txr_interq != NULL) { 695 struct mbuf *m; 696 697 while ((m = pcq_get(txr->txr_interq)) != NULL) 698 m_freem(m); 699 pcq_destroy(txr->txr_interq); 700 } 701 #endif 702 if (txr->tx_buffers != NULL) { 703 free(txr->tx_buffers, M_DEVBUF); 704 txr->tx_buffers = NULL; 705 } 706 if (txr->txtag != NULL) { 707 ixgbe_dma_tag_destroy(txr->txtag); 708 txr->txtag = NULL; 709 } 710 return; 711 } 712 713 /********************************************************************* 714 * 715 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 716 * 717 **********************************************************************/ 718 719 static int 720 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 721 u32 *cmd_type_len, u32 *olinfo_status) 722 { 723 struct adapter *adapter = txr->adapter; 724 struct ethercom *ec = &adapter->osdep.ec; 725 struct m_tag *mtag; 726 struct ixgbe_adv_tx_context_desc *TXD; 727 struct ether_vlan_header *eh; 728 #ifdef INET 729 struct ip *ip; 730 #endif 731 #ifdef INET6 732 struct ip6_hdr *ip6; 733 #endif 734 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 735 int ehdrlen, ip_hlen = 0; 736 u16 etype; 737 u8 ipproto = 0; 738 int offload = TRUE; 739 int ctxd = txr->next_avail_desc; 740 u16 vtag = 0; 741 char *l3d; 742 743 744 /* First check if TSO is to be used */ 745 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6)) { 746 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status); 747 748 if (rv != 0) { 749 ++adapter->tso_err.ev_count; 750 return rv; 751 } 752 } 753 754 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0) 755 offload = FALSE; 756 757 /* Indicate the whole packet as payload when not doing TSO */ 758 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 759 760 /* Now ready a context descriptor */ 761 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 762 763 /* 764 ** In advanced descriptors the vlan tag must 765 ** be placed into the context descriptor. Hence 766 ** we need to make one even if not doing offloads. 767 */ 768 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) { 769 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff); 770 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 771 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE)) 772 return (0); 773 774 /* 775 * Determine where frame payload starts. 776 * Jump over vlan headers if already present, 777 * helpful for QinQ too. 778 */ 779 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag)); 780 eh = mtod(mp, struct ether_vlan_header *); 781 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 782 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header)); 783 etype = ntohs(eh->evl_proto); 784 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 785 } else { 786 etype = ntohs(eh->evl_encap_proto); 787 ehdrlen = ETHER_HDR_LEN; 788 } 789 790 /* Set the ether header length */ 791 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 792 793 if (offload == FALSE) 794 goto no_offloads; 795 796 /* 797 * If the first mbuf only includes the ethernet header, jump to the next one 798 * XXX: This assumes the stack splits mbufs containing headers on header boundaries 799 * XXX: And assumes the entire IP header is contained in one mbuf 800 */ 801 if (mp->m_len == ehdrlen && mp->m_next) 802 l3d = mtod(mp->m_next, char *); 803 else 804 l3d = mtod(mp, char *) + ehdrlen; 805 806 switch (etype) { 807 #ifdef INET 808 case ETHERTYPE_IP: 809 ip = (struct ip *)(l3d); 810 ip_hlen = ip->ip_hl << 2; 811 ipproto = ip->ip_p; 812 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 813 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 || 814 ip->ip_sum == 0); 815 break; 816 #endif 817 #ifdef INET6 818 case ETHERTYPE_IPV6: 819 ip6 = (struct ip6_hdr *)(l3d); 820 ip_hlen = sizeof(struct ip6_hdr); 821 ipproto = ip6->ip6_nxt; 822 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 823 break; 824 #endif 825 default: 826 offload = false; 827 break; 828 } 829 830 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0) 831 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 832 833 vlan_macip_lens |= ip_hlen; 834 835 /* No support for offloads for non-L4 next headers */ 836 switch (ipproto) { 837 case IPPROTO_TCP: 838 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) 839 840 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 841 else 842 offload = false; 843 break; 844 case IPPROTO_UDP: 845 if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) 846 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 847 else 848 offload = false; 849 break; 850 default: 851 offload = false; 852 break; 853 } 854 855 if (offload) /* Insert L4 checksum into data descriptors */ 856 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 857 858 no_offloads: 859 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 860 861 /* Now copy bits into descriptor */ 862 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 863 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 864 TXD->seqnum_seed = htole32(0); 865 TXD->mss_l4len_idx = htole32(0); 866 867 /* We've consumed the first desc, adjust counters */ 868 if (++ctxd == txr->num_desc) 869 ctxd = 0; 870 txr->next_avail_desc = ctxd; 871 --txr->tx_avail; 872 873 return 0; 874 } 875 876 /********************************************************************** 877 * 878 * Setup work for hardware segmentation offload (TSO) on 879 * adapters using advanced tx descriptors 880 * 881 **********************************************************************/ 882 static int 883 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, 884 u32 *cmd_type_len, u32 *olinfo_status) 885 { 886 struct m_tag *mtag; 887 struct adapter *adapter = txr->adapter; 888 struct ethercom *ec = &adapter->osdep.ec; 889 struct ixgbe_adv_tx_context_desc *TXD; 890 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 891 u32 mss_l4len_idx = 0, paylen; 892 u16 vtag = 0, eh_type; 893 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 894 struct ether_vlan_header *eh; 895 #ifdef INET6 896 struct ip6_hdr *ip6; 897 #endif 898 #ifdef INET 899 struct ip *ip; 900 #endif 901 struct tcphdr *th; 902 903 /* 904 * Determine where frame payload starts. 905 * Jump over vlan headers if already present 906 */ 907 eh = mtod(mp, struct ether_vlan_header *); 908 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 909 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 910 eh_type = eh->evl_proto; 911 } else { 912 ehdrlen = ETHER_HDR_LEN; 913 eh_type = eh->evl_encap_proto; 914 } 915 916 switch (ntohs(eh_type)) { 917 #ifdef INET6 918 case ETHERTYPE_IPV6: 919 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 920 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 921 if (ip6->ip6_nxt != IPPROTO_TCP) 922 return (ENXIO); 923 ip_hlen = sizeof(struct ip6_hdr); 924 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 925 th = (struct tcphdr *)((char *)ip6 + ip_hlen); 926 th->th_sum = in6_cksum_phdr(&ip6->ip6_src, 927 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP)); 928 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 929 break; 930 #endif 931 #ifdef INET 932 case ETHERTYPE_IP: 933 ip = (struct ip *)(mp->m_data + ehdrlen); 934 if (ip->ip_p != IPPROTO_TCP) 935 return (ENXIO); 936 ip->ip_sum = 0; 937 ip_hlen = ip->ip_hl << 2; 938 th = (struct tcphdr *)((char *)ip + ip_hlen); 939 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr, 940 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 941 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 942 /* Tell transmit desc to also do IPv4 checksum. */ 943 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 944 break; 945 #endif 946 default: 947 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 948 __func__, ntohs(eh_type)); 949 break; 950 } 951 952 ctxd = txr->next_avail_desc; 953 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 954 955 tcp_hlen = th->th_off << 2; 956 957 /* This is used in the transmit desc in encap */ 958 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 959 960 /* VLAN MACLEN IPLEN */ 961 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) { 962 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff); 963 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT); 964 } 965 966 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 967 vlan_macip_lens |= ip_hlen; 968 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 969 970 /* ADV DTYPE TUCMD */ 971 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 972 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 973 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 974 975 /* MSS L4LEN IDX */ 976 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT); 977 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 978 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 979 980 TXD->seqnum_seed = htole32(0); 981 982 if (++ctxd == txr->num_desc) 983 ctxd = 0; 984 985 txr->tx_avail--; 986 txr->next_avail_desc = ctxd; 987 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 988 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 989 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 990 ++txr->tso_tx.ev_count; 991 return (0); 992 } 993 994 995 /********************************************************************** 996 * 997 * Examine each tx_buffer in the used queue. If the hardware is done 998 * processing the packet then free associated resources. The 999 * tx_buffer is put back on the free queue. 1000 * 1001 **********************************************************************/ 1002 void 1003 ixgbe_txeof(struct tx_ring *txr) 1004 { 1005 struct adapter *adapter = txr->adapter; 1006 struct ifnet *ifp = adapter->ifp; 1007 u32 work, processed = 0; 1008 u32 limit = adapter->tx_process_limit; 1009 struct ixgbe_tx_buf *buf; 1010 union ixgbe_adv_tx_desc *txd; 1011 1012 KASSERT(mutex_owned(&txr->tx_mtx)); 1013 1014 #ifdef DEV_NETMAP 1015 if (ifp->if_capenable & IFCAP_NETMAP) { 1016 struct netmap_adapter *na = NA(ifp); 1017 struct netmap_kring *kring = &na->tx_rings[txr->me]; 1018 txd = txr->tx_base; 1019 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1020 BUS_DMASYNC_POSTREAD); 1021 /* 1022 * In netmap mode, all the work is done in the context 1023 * of the client thread. Interrupt handlers only wake up 1024 * clients, which may be sleeping on individual rings 1025 * or on a global resource for all rings. 1026 * To implement tx interrupt mitigation, we wake up the client 1027 * thread roughly every half ring, even if the NIC interrupts 1028 * more frequently. This is implemented as follows: 1029 * - ixgbe_txsync() sets kring->nr_kflags with the index of 1030 * the slot that should wake up the thread (nkr_num_slots 1031 * means the user thread should not be woken up); 1032 * - the driver ignores tx interrupts unless netmap_mitigate=0 1033 * or the slot has the DD bit set. 1034 */ 1035 if (!netmap_mitigate || 1036 (kring->nr_kflags < kring->nkr_num_slots && 1037 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { 1038 netmap_tx_irq(ifp, txr->me); 1039 } 1040 return; 1041 } 1042 #endif /* DEV_NETMAP */ 1043 1044 if (txr->tx_avail == txr->num_desc) { 1045 txr->busy = 0; 1046 return; 1047 } 1048 1049 /* Get work starting point */ 1050 work = txr->next_to_clean; 1051 buf = &txr->tx_buffers[work]; 1052 txd = &txr->tx_base[work]; 1053 work -= txr->num_desc; /* The distance to ring end */ 1054 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1055 BUS_DMASYNC_POSTREAD); 1056 1057 do { 1058 union ixgbe_adv_tx_desc *eop = buf->eop; 1059 if (eop == NULL) /* No work */ 1060 break; 1061 1062 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0) 1063 break; /* I/O not complete */ 1064 1065 if (buf->m_head) { 1066 txr->bytes += 1067 buf->m_head->m_pkthdr.len; 1068 bus_dmamap_sync(txr->txtag->dt_dmat, 1069 buf->map, 1070 0, buf->m_head->m_pkthdr.len, 1071 BUS_DMASYNC_POSTWRITE); 1072 ixgbe_dmamap_unload(txr->txtag, 1073 buf->map); 1074 m_freem(buf->m_head); 1075 buf->m_head = NULL; 1076 } 1077 buf->eop = NULL; 1078 ++txr->tx_avail; 1079 1080 /* We clean the range if multi segment */ 1081 while (txd != eop) { 1082 ++txd; 1083 ++buf; 1084 ++work; 1085 /* wrap the ring? */ 1086 if (__predict_false(!work)) { 1087 work -= txr->num_desc; 1088 buf = txr->tx_buffers; 1089 txd = txr->tx_base; 1090 } 1091 if (buf->m_head) { 1092 txr->bytes += 1093 buf->m_head->m_pkthdr.len; 1094 bus_dmamap_sync(txr->txtag->dt_dmat, 1095 buf->map, 1096 0, buf->m_head->m_pkthdr.len, 1097 BUS_DMASYNC_POSTWRITE); 1098 ixgbe_dmamap_unload(txr->txtag, 1099 buf->map); 1100 m_freem(buf->m_head); 1101 buf->m_head = NULL; 1102 } 1103 ++txr->tx_avail; 1104 buf->eop = NULL; 1105 1106 } 1107 ++txr->packets; 1108 ++processed; 1109 ++ifp->if_opackets; 1110 1111 /* Try the next packet */ 1112 ++txd; 1113 ++buf; 1114 ++work; 1115 /* reset with a wrap */ 1116 if (__predict_false(!work)) { 1117 work -= txr->num_desc; 1118 buf = txr->tx_buffers; 1119 txd = txr->tx_base; 1120 } 1121 prefetch(txd); 1122 } while (__predict_true(--limit)); 1123 1124 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1125 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1126 1127 work += txr->num_desc; 1128 txr->next_to_clean = work; 1129 1130 /* 1131 ** Queue Hang detection, we know there's 1132 ** work outstanding or the first return 1133 ** would have been taken, so increment busy 1134 ** if nothing managed to get cleaned, then 1135 ** in local_timer it will be checked and 1136 ** marked as HUNG if it exceeds a MAX attempt. 1137 */ 1138 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG)) 1139 ++txr->busy; 1140 /* 1141 ** If anything gets cleaned we reset state to 1, 1142 ** note this will turn off HUNG if its set. 1143 */ 1144 if (processed) 1145 txr->busy = 1; 1146 1147 if (txr->tx_avail == txr->num_desc) 1148 txr->busy = 0; 1149 1150 return; 1151 } 1152 1153 1154 #ifdef IXGBE_FDIR 1155 /* 1156 ** This routine parses packet headers so that Flow 1157 ** Director can make a hashed filter table entry 1158 ** allowing traffic flows to be identified and kept 1159 ** on the same cpu. This would be a performance 1160 ** hit, but we only do it at IXGBE_FDIR_RATE of 1161 ** packets. 1162 */ 1163 static void 1164 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) 1165 { 1166 struct adapter *adapter = txr->adapter; 1167 struct ix_queue *que; 1168 struct ip *ip; 1169 struct tcphdr *th; 1170 struct udphdr *uh; 1171 struct ether_vlan_header *eh; 1172 union ixgbe_atr_hash_dword input = {.dword = 0}; 1173 union ixgbe_atr_hash_dword common = {.dword = 0}; 1174 int ehdrlen, ip_hlen; 1175 u16 etype; 1176 1177 eh = mtod(mp, struct ether_vlan_header *); 1178 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1179 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1180 etype = eh->evl_proto; 1181 } else { 1182 ehdrlen = ETHER_HDR_LEN; 1183 etype = eh->evl_encap_proto; 1184 } 1185 1186 /* Only handling IPv4 */ 1187 if (etype != htons(ETHERTYPE_IP)) 1188 return; 1189 1190 ip = (struct ip *)(mp->m_data + ehdrlen); 1191 ip_hlen = ip->ip_hl << 2; 1192 1193 /* check if we're UDP or TCP */ 1194 switch (ip->ip_p) { 1195 case IPPROTO_TCP: 1196 th = (struct tcphdr *)((char *)ip + ip_hlen); 1197 /* src and dst are inverted */ 1198 common.port.dst ^= th->th_sport; 1199 common.port.src ^= th->th_dport; 1200 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4; 1201 break; 1202 case IPPROTO_UDP: 1203 uh = (struct udphdr *)((char *)ip + ip_hlen); 1204 /* src and dst are inverted */ 1205 common.port.dst ^= uh->uh_sport; 1206 common.port.src ^= uh->uh_dport; 1207 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4; 1208 break; 1209 default: 1210 return; 1211 } 1212 1213 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag); 1214 if (mp->m_pkthdr.ether_vtag) 1215 common.flex_bytes ^= htons(ETHERTYPE_VLAN); 1216 else 1217 common.flex_bytes ^= etype; 1218 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr; 1219 1220 que = &adapter->queues[txr->me]; 1221 /* 1222 ** This assumes the Rx queue and Tx 1223 ** queue are bound to the same CPU 1224 */ 1225 ixgbe_fdir_add_signature_filter_82599(&adapter->hw, 1226 input, common, que->msix); 1227 } 1228 #endif /* IXGBE_FDIR */ 1229 1230 /* 1231 ** Used to detect a descriptor that has 1232 ** been merged by Hardware RSC. 1233 */ 1234 static inline u32 1235 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx) 1236 { 1237 return (le32toh(rx->wb.lower.lo_dword.data) & 1238 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 1239 } 1240 1241 /********************************************************************* 1242 * 1243 * Initialize Hardware RSC (LRO) feature on 82599 1244 * for an RX ring, this is toggled by the LRO capability 1245 * even though it is transparent to the stack. 1246 * 1247 * NOTE: since this HW feature only works with IPV4 and 1248 * our testing has shown soft LRO to be as effective 1249 * I have decided to disable this by default. 1250 * 1251 **********************************************************************/ 1252 static void 1253 ixgbe_setup_hw_rsc(struct rx_ring *rxr) 1254 { 1255 struct adapter *adapter = rxr->adapter; 1256 struct ixgbe_hw *hw = &adapter->hw; 1257 u32 rscctrl, rdrxctl; 1258 1259 /* If turning LRO/RSC off we need to disable it */ 1260 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) { 1261 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1262 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 1263 return; 1264 } 1265 1266 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 1267 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 1268 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */ 1269 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip) 1270 #endif /* DEV_NETMAP */ 1271 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 1272 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 1273 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 1274 1275 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 1276 rscctrl |= IXGBE_RSCCTL_RSCEN; 1277 /* 1278 ** Limit the total number of descriptors that 1279 ** can be combined, so it does not exceed 64K 1280 */ 1281 if (rxr->mbuf_sz == MCLBYTES) 1282 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 1283 else if (rxr->mbuf_sz == MJUMPAGESIZE) 1284 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 1285 else if (rxr->mbuf_sz == MJUM9BYTES) 1286 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 1287 else /* Using 16K cluster */ 1288 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 1289 1290 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 1291 1292 /* Enable TCP header recognition */ 1293 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 1294 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 1295 IXGBE_PSRTYPE_TCPHDR)); 1296 1297 /* Disable RSC for ACK packets */ 1298 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 1299 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 1300 1301 rxr->hw_rsc = TRUE; 1302 } 1303 1304 /********************************************************************* 1305 * 1306 * Refresh mbuf buffers for RX descriptor rings 1307 * - now keeps its own state so discards due to resource 1308 * exhaustion are unnecessary, if an mbuf cannot be obtained 1309 * it just returns, keeping its placeholder, thus it can simply 1310 * be recalled to try again. 1311 * 1312 **********************************************************************/ 1313 static void 1314 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) 1315 { 1316 struct adapter *adapter = rxr->adapter; 1317 struct ixgbe_rx_buf *rxbuf; 1318 struct mbuf *mp; 1319 int i, j, error; 1320 bool refreshed = false; 1321 1322 i = j = rxr->next_to_refresh; 1323 /* Control the loop with one beyond */ 1324 if (++j == rxr->num_desc) 1325 j = 0; 1326 1327 while (j != limit) { 1328 rxbuf = &rxr->rx_buffers[i]; 1329 if (rxbuf->buf == NULL) { 1330 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT, 1331 MT_DATA, M_PKTHDR, rxr->mbuf_sz); 1332 if (mp == NULL) { 1333 rxr->no_jmbuf.ev_count++; 1334 goto update; 1335 } 1336 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN)) 1337 m_adj(mp, ETHER_ALIGN); 1338 } else 1339 mp = rxbuf->buf; 1340 1341 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1342 1343 /* If we're dealing with an mbuf that was copied rather 1344 * than replaced, there's no need to go through busdma. 1345 */ 1346 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { 1347 /* Get the memory mapping */ 1348 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap); 1349 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, 1350 rxbuf->pmap, mp, BUS_DMA_NOWAIT); 1351 if (error != 0) { 1352 printf("Refresh mbufs: payload dmamap load" 1353 " failure - %d\n", error); 1354 m_free(mp); 1355 rxbuf->buf = NULL; 1356 goto update; 1357 } 1358 rxbuf->buf = mp; 1359 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap, 1360 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD); 1361 rxbuf->addr = rxr->rx_base[i].read.pkt_addr = 1362 htole64(rxbuf->pmap->dm_segs[0].ds_addr); 1363 } else { 1364 rxr->rx_base[i].read.pkt_addr = rxbuf->addr; 1365 rxbuf->flags &= ~IXGBE_RX_COPY; 1366 } 1367 1368 refreshed = true; 1369 /* Next is precalculated */ 1370 i = j; 1371 rxr->next_to_refresh = i; 1372 if (++j == rxr->num_desc) 1373 j = 0; 1374 } 1375 update: 1376 if (refreshed) /* Update hardware tail index */ 1377 IXGBE_WRITE_REG(&adapter->hw, 1378 rxr->tail, rxr->next_to_refresh); 1379 return; 1380 } 1381 1382 /********************************************************************* 1383 * 1384 * Allocate memory for rx_buffer structures. Since we use one 1385 * rx_buffer per received packet, the maximum number of rx_buffer's 1386 * that we'll need is equal to the number of receive descriptors 1387 * that we've allocated. 1388 * 1389 **********************************************************************/ 1390 int 1391 ixgbe_allocate_receive_buffers(struct rx_ring *rxr) 1392 { 1393 struct adapter *adapter = rxr->adapter; 1394 device_t dev = adapter->dev; 1395 struct ixgbe_rx_buf *rxbuf; 1396 int bsize, error; 1397 1398 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc; 1399 if (!(rxr->rx_buffers = 1400 (struct ixgbe_rx_buf *) malloc(bsize, 1401 M_DEVBUF, M_NOWAIT | M_ZERO))) { 1402 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n"); 1403 error = ENOMEM; 1404 goto fail; 1405 } 1406 1407 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */ 1408 1, 0, /* alignment, bounds */ 1409 MJUM16BYTES, /* maxsize */ 1410 1, /* nsegments */ 1411 MJUM16BYTES, /* maxsegsize */ 1412 0, /* flags */ 1413 &rxr->ptag))) { 1414 aprint_error_dev(dev, "Unable to create RX DMA tag\n"); 1415 goto fail; 1416 } 1417 1418 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) { 1419 rxbuf = &rxr->rx_buffers[i]; 1420 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 1421 if (error) { 1422 aprint_error_dev(dev, "Unable to create RX dma map\n"); 1423 goto fail; 1424 } 1425 } 1426 1427 return (0); 1428 1429 fail: 1430 /* Frees all, but can handle partial completion */ 1431 ixgbe_free_receive_structures(adapter); 1432 return (error); 1433 } 1434 1435 static void 1436 ixgbe_free_receive_ring(struct rx_ring *rxr) 1437 { 1438 struct ixgbe_rx_buf *rxbuf; 1439 1440 for (int i = 0; i < rxr->num_desc; i++) { 1441 rxbuf = &rxr->rx_buffers[i]; 1442 if (rxbuf->buf != NULL) { 1443 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap, 1444 0, rxbuf->buf->m_pkthdr.len, 1445 BUS_DMASYNC_POSTREAD); 1446 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap); 1447 rxbuf->buf->m_flags |= M_PKTHDR; 1448 m_freem(rxbuf->buf); 1449 rxbuf->buf = NULL; 1450 rxbuf->flags = 0; 1451 } 1452 } 1453 } 1454 1455 /********************************************************************* 1456 * 1457 * Initialize a receive ring and its buffers. 1458 * 1459 **********************************************************************/ 1460 static int 1461 ixgbe_setup_receive_ring(struct rx_ring *rxr) 1462 { 1463 struct adapter *adapter; 1464 struct ixgbe_rx_buf *rxbuf; 1465 #ifdef LRO 1466 struct ifnet *ifp; 1467 struct lro_ctrl *lro = &rxr->lro; 1468 #endif /* LRO */ 1469 int rsize, error = 0; 1470 #ifdef DEV_NETMAP 1471 struct netmap_adapter *na = NA(rxr->adapter->ifp); 1472 struct netmap_slot *slot; 1473 #endif /* DEV_NETMAP */ 1474 1475 adapter = rxr->adapter; 1476 #ifdef LRO 1477 ifp = adapter->ifp; 1478 #endif /* LRO */ 1479 1480 /* Clear the ring contents */ 1481 IXGBE_RX_LOCK(rxr); 1482 #ifdef DEV_NETMAP 1483 /* same as in ixgbe_setup_transmit_ring() */ 1484 slot = netmap_reset(na, NR_RX, rxr->me, 0); 1485 #endif /* DEV_NETMAP */ 1486 rsize = roundup2(adapter->num_rx_desc * 1487 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 1488 bzero((void *)rxr->rx_base, rsize); 1489 /* Cache the size */ 1490 rxr->mbuf_sz = adapter->rx_mbuf_sz; 1491 1492 /* Free current RX buffer structs and their mbufs */ 1493 ixgbe_free_receive_ring(rxr); 1494 1495 IXGBE_RX_UNLOCK(rxr); 1496 1497 /* Now reinitialize our supply of jumbo mbufs. The number 1498 * or size of jumbo mbufs may have changed. 1499 */ 1500 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat, 1501 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz); 1502 1503 IXGBE_RX_LOCK(rxr); 1504 1505 /* Now replenish the mbufs */ 1506 for (int j = 0; j != rxr->num_desc; ++j) { 1507 struct mbuf *mp; 1508 1509 rxbuf = &rxr->rx_buffers[j]; 1510 #ifdef DEV_NETMAP 1511 /* 1512 * In netmap mode, fill the map and set the buffer 1513 * address in the NIC ring, considering the offset 1514 * between the netmap and NIC rings (see comment in 1515 * ixgbe_setup_transmit_ring() ). No need to allocate 1516 * an mbuf, so end the block with a continue; 1517 */ 1518 if (slot) { 1519 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 1520 uint64_t paddr; 1521 void *addr; 1522 1523 addr = PNMB(na, slot + sj, &paddr); 1524 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 1525 /* Update descriptor and the cached value */ 1526 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 1527 rxbuf->addr = htole64(paddr); 1528 continue; 1529 } 1530 #endif /* DEV_NETMAP */ 1531 rxbuf->flags = 0; 1532 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT, 1533 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); 1534 if (rxbuf->buf == NULL) { 1535 error = ENOBUFS; 1536 goto fail; 1537 } 1538 mp = rxbuf->buf; 1539 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz; 1540 /* Get the memory mapping */ 1541 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, 1542 rxbuf->pmap, mp, BUS_DMA_NOWAIT); 1543 if (error != 0) 1544 goto fail; 1545 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap, 1546 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD); 1547 /* Update the descriptor and the cached value */ 1548 rxr->rx_base[j].read.pkt_addr = 1549 htole64(rxbuf->pmap->dm_segs[0].ds_addr); 1550 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr); 1551 } 1552 1553 1554 /* Setup our descriptor indices */ 1555 rxr->next_to_check = 0; 1556 rxr->next_to_refresh = 0; 1557 rxr->lro_enabled = FALSE; 1558 rxr->rx_copies.ev_count = 0; 1559 #if 0 /* NetBSD */ 1560 rxr->rx_bytes.ev_count = 0; 1561 #if 1 /* Fix inconsistency */ 1562 rxr->rx_packets.ev_count = 0; 1563 #endif 1564 #endif 1565 rxr->vtag_strip = FALSE; 1566 1567 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1568 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1569 1570 /* 1571 ** Now set up the LRO interface: 1572 */ 1573 if (ixgbe_rsc_enable) 1574 ixgbe_setup_hw_rsc(rxr); 1575 #ifdef LRO 1576 else if (ifp->if_capenable & IFCAP_LRO) { 1577 device_t dev = adapter->dev; 1578 int err = tcp_lro_init(lro); 1579 if (err) { 1580 device_printf(dev, "LRO Initialization failed!\n"); 1581 goto fail; 1582 } 1583 INIT_DEBUGOUT("RX Soft LRO Initialized\n"); 1584 rxr->lro_enabled = TRUE; 1585 lro->ifp = adapter->ifp; 1586 } 1587 #endif /* LRO */ 1588 1589 IXGBE_RX_UNLOCK(rxr); 1590 return (0); 1591 1592 fail: 1593 ixgbe_free_receive_ring(rxr); 1594 IXGBE_RX_UNLOCK(rxr); 1595 return (error); 1596 } 1597 1598 /********************************************************************* 1599 * 1600 * Initialize all receive rings. 1601 * 1602 **********************************************************************/ 1603 int 1604 ixgbe_setup_receive_structures(struct adapter *adapter) 1605 { 1606 struct rx_ring *rxr = adapter->rx_rings; 1607 int j; 1608 1609 for (j = 0; j < adapter->num_queues; j++, rxr++) 1610 if (ixgbe_setup_receive_ring(rxr)) 1611 goto fail; 1612 1613 return (0); 1614 fail: 1615 /* 1616 * Free RX buffers allocated so far, we will only handle 1617 * the rings that completed, the failing case will have 1618 * cleaned up for itself. 'j' failed, so its the terminus. 1619 */ 1620 for (int i = 0; i < j; ++i) { 1621 rxr = &adapter->rx_rings[i]; 1622 ixgbe_free_receive_ring(rxr); 1623 } 1624 1625 return (ENOBUFS); 1626 } 1627 1628 1629 /********************************************************************* 1630 * 1631 * Free all receive rings. 1632 * 1633 **********************************************************************/ 1634 void 1635 ixgbe_free_receive_structures(struct adapter *adapter) 1636 { 1637 struct rx_ring *rxr = adapter->rx_rings; 1638 1639 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin"); 1640 1641 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 1642 #ifdef LRO 1643 struct lro_ctrl *lro = &rxr->lro; 1644 #endif /* LRO */ 1645 ixgbe_free_receive_buffers(rxr); 1646 #ifdef LRO 1647 /* Free LRO memory */ 1648 tcp_lro_free(lro); 1649 #endif /* LRO */ 1650 /* Free the ring memory as well */ 1651 ixgbe_dma_free(adapter, &rxr->rxdma); 1652 IXGBE_RX_LOCK_DESTROY(rxr); 1653 } 1654 1655 free(adapter->rx_rings, M_DEVBUF); 1656 } 1657 1658 1659 /********************************************************************* 1660 * 1661 * Free receive ring data structures 1662 * 1663 **********************************************************************/ 1664 static void 1665 ixgbe_free_receive_buffers(struct rx_ring *rxr) 1666 { 1667 struct adapter *adapter = rxr->adapter; 1668 struct ixgbe_rx_buf *rxbuf; 1669 1670 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin"); 1671 1672 /* Cleanup any existing buffers */ 1673 if (rxr->rx_buffers != NULL) { 1674 for (int i = 0; i < adapter->num_rx_desc; i++) { 1675 rxbuf = &rxr->rx_buffers[i]; 1676 if (rxbuf->buf != NULL) { 1677 bus_dmamap_sync(rxr->ptag->dt_dmat, 1678 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len, 1679 BUS_DMASYNC_POSTREAD); 1680 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap); 1681 rxbuf->buf->m_flags |= M_PKTHDR; 1682 m_freem(rxbuf->buf); 1683 } 1684 rxbuf->buf = NULL; 1685 if (rxbuf->pmap != NULL) { 1686 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap); 1687 rxbuf->pmap = NULL; 1688 } 1689 } 1690 if (rxr->rx_buffers != NULL) { 1691 free(rxr->rx_buffers, M_DEVBUF); 1692 rxr->rx_buffers = NULL; 1693 } 1694 } 1695 1696 if (rxr->ptag != NULL) { 1697 ixgbe_dma_tag_destroy(rxr->ptag); 1698 rxr->ptag = NULL; 1699 } 1700 1701 return; 1702 } 1703 1704 static __inline void 1705 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 1706 { 1707 struct adapter *adapter = ifp->if_softc; 1708 1709 #ifdef LRO 1710 struct ethercom *ec = &adapter->osdep.ec; 1711 1712 /* 1713 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet 1714 * should be computed by hardware. Also it should not have VLAN tag in 1715 * ethernet header. In case of IPv6 we do not yet support ext. hdrs. 1716 */ 1717 if (rxr->lro_enabled && 1718 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 && 1719 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 1720 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1721 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) || 1722 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) == 1723 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) && 1724 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 1725 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 1726 /* 1727 * Send to the stack if: 1728 ** - LRO not enabled, or 1729 ** - no LRO resources, or 1730 ** - lro enqueue fails 1731 */ 1732 if (rxr->lro.lro_cnt != 0) 1733 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 1734 return; 1735 } 1736 #endif /* LRO */ 1737 1738 IXGBE_RX_UNLOCK(rxr); 1739 1740 if_percpuq_enqueue(adapter->ipq, m); 1741 1742 IXGBE_RX_LOCK(rxr); 1743 } 1744 1745 static __inline void 1746 ixgbe_rx_discard(struct rx_ring *rxr, int i) 1747 { 1748 struct ixgbe_rx_buf *rbuf; 1749 1750 rbuf = &rxr->rx_buffers[i]; 1751 1752 1753 /* 1754 ** With advanced descriptors the writeback 1755 ** clobbers the buffer addrs, so its easier 1756 ** to just free the existing mbufs and take 1757 ** the normal refresh path to get new buffers 1758 ** and mapping. 1759 */ 1760 1761 if (rbuf->buf != NULL) {/* Partial chain ? */ 1762 rbuf->fmp->m_flags |= M_PKTHDR; 1763 m_freem(rbuf->fmp); 1764 rbuf->fmp = NULL; 1765 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */ 1766 } else if (rbuf->buf) { 1767 m_free(rbuf->buf); 1768 rbuf->buf = NULL; 1769 } 1770 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap); 1771 1772 rbuf->flags = 0; 1773 1774 return; 1775 } 1776 1777 1778 /********************************************************************* 1779 * 1780 * This routine executes in interrupt context. It replenishes 1781 * the mbufs in the descriptor and sends data which has been 1782 * dma'ed into host memory to upper layer. 1783 * 1784 * Return TRUE for more work, FALSE for all clean. 1785 *********************************************************************/ 1786 bool 1787 ixgbe_rxeof(struct ix_queue *que) 1788 { 1789 struct adapter *adapter = que->adapter; 1790 struct rx_ring *rxr = que->rxr; 1791 struct ifnet *ifp = adapter->ifp; 1792 #ifdef LRO 1793 struct lro_ctrl *lro = &rxr->lro; 1794 #endif /* LRO */ 1795 int i, nextp, processed = 0; 1796 u32 staterr = 0; 1797 u32 count = adapter->rx_process_limit; 1798 union ixgbe_adv_rx_desc *cur; 1799 struct ixgbe_rx_buf *rbuf, *nbuf; 1800 #ifdef RSS 1801 u16 pkt_info; 1802 #endif 1803 1804 IXGBE_RX_LOCK(rxr); 1805 1806 #ifdef DEV_NETMAP 1807 /* Same as the txeof routine: wakeup clients on intr. */ 1808 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 1809 IXGBE_RX_UNLOCK(rxr); 1810 return (FALSE); 1811 } 1812 #endif /* DEV_NETMAP */ 1813 1814 for (i = rxr->next_to_check; count != 0;) { 1815 struct mbuf *sendmp, *mp; 1816 u32 rsc, ptype; 1817 u16 len; 1818 u16 vtag = 0; 1819 bool eop; 1820 1821 /* Sync the ring. */ 1822 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 1823 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1824 1825 cur = &rxr->rx_base[i]; 1826 staterr = le32toh(cur->wb.upper.status_error); 1827 #ifdef RSS 1828 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 1829 #endif 1830 1831 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 1832 break; 1833 if ((ifp->if_flags & IFF_RUNNING) == 0) 1834 break; 1835 1836 count--; 1837 sendmp = NULL; 1838 nbuf = NULL; 1839 rsc = 0; 1840 cur->wb.upper.status_error = 0; 1841 rbuf = &rxr->rx_buffers[i]; 1842 mp = rbuf->buf; 1843 1844 len = le16toh(cur->wb.upper.length); 1845 ptype = le32toh(cur->wb.lower.lo_dword.data) & 1846 IXGBE_RXDADV_PKTTYPE_MASK; 1847 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 1848 1849 /* Make sure bad packets are discarded */ 1850 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) { 1851 #if __FreeBSD_version >= 1100036 1852 if (IXGBE_IS_VF(adapter)) 1853 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 1854 #endif 1855 rxr->rx_discarded.ev_count++; 1856 ixgbe_rx_discard(rxr, i); 1857 goto next_desc; 1858 } 1859 1860 /* 1861 ** On 82599 which supports a hardware 1862 ** LRO (called HW RSC), packets need 1863 ** not be fragmented across sequential 1864 ** descriptors, rather the next descriptor 1865 ** is indicated in bits of the descriptor. 1866 ** This also means that we might proceses 1867 ** more than one packet at a time, something 1868 ** that has never been true before, it 1869 ** required eliminating global chain pointers 1870 ** in favor of what we are doing here. -jfv 1871 */ 1872 if (!eop) { 1873 /* 1874 ** Figure out the next descriptor 1875 ** of this frame. 1876 */ 1877 if (rxr->hw_rsc == TRUE) { 1878 rsc = ixgbe_rsc_count(cur); 1879 rxr->rsc_num += (rsc - 1); 1880 } 1881 if (rsc) { /* Get hardware index */ 1882 nextp = ((staterr & 1883 IXGBE_RXDADV_NEXTP_MASK) >> 1884 IXGBE_RXDADV_NEXTP_SHIFT); 1885 } else { /* Just sequential */ 1886 nextp = i + 1; 1887 if (nextp == adapter->num_rx_desc) 1888 nextp = 0; 1889 } 1890 nbuf = &rxr->rx_buffers[nextp]; 1891 prefetch(nbuf); 1892 } 1893 /* 1894 ** Rather than using the fmp/lmp global pointers 1895 ** we now keep the head of a packet chain in the 1896 ** buffer struct and pass this along from one 1897 ** descriptor to the next, until we get EOP. 1898 */ 1899 mp->m_len = len; 1900 /* 1901 ** See if there is a stored head 1902 ** that determines what we are 1903 */ 1904 sendmp = rbuf->fmp; 1905 if (sendmp != NULL) { /* secondary frag */ 1906 rbuf->buf = rbuf->fmp = NULL; 1907 mp->m_flags &= ~M_PKTHDR; 1908 sendmp->m_pkthdr.len += mp->m_len; 1909 } else { 1910 /* 1911 * Optimize. This might be a small packet, 1912 * maybe just a TCP ACK. Do a fast copy that 1913 * is cache aligned into a new mbuf, and 1914 * leave the old mbuf+cluster for re-use. 1915 */ 1916 if (eop && len <= IXGBE_RX_COPY_LEN) { 1917 sendmp = m_gethdr(M_NOWAIT, MT_DATA); 1918 if (sendmp != NULL) { 1919 sendmp->m_data += 1920 IXGBE_RX_COPY_ALIGN; 1921 ixgbe_bcopy(mp->m_data, 1922 sendmp->m_data, len); 1923 sendmp->m_len = len; 1924 rxr->rx_copies.ev_count++; 1925 rbuf->flags |= IXGBE_RX_COPY; 1926 } 1927 } 1928 if (sendmp == NULL) { 1929 rbuf->buf = rbuf->fmp = NULL; 1930 sendmp = mp; 1931 } 1932 1933 /* first desc of a non-ps chain */ 1934 sendmp->m_flags |= M_PKTHDR; 1935 sendmp->m_pkthdr.len = mp->m_len; 1936 } 1937 ++processed; 1938 1939 /* Pass the head pointer on */ 1940 if (eop == 0) { 1941 nbuf->fmp = sendmp; 1942 sendmp = NULL; 1943 mp->m_next = nbuf->buf; 1944 } else { /* Sending this frame */ 1945 m_set_rcvif(sendmp, ifp); 1946 ifp->if_ipackets++; 1947 rxr->rx_packets.ev_count++; 1948 /* capture data for AIM */ 1949 rxr->bytes += sendmp->m_pkthdr.len; 1950 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len; 1951 /* Process vlan info */ 1952 if ((rxr->vtag_strip) && 1953 (staterr & IXGBE_RXD_STAT_VP)) 1954 vtag = le16toh(cur->wb.upper.vlan); 1955 if (vtag) { 1956 VLAN_INPUT_TAG(ifp, sendmp, vtag, 1957 printf("%s: could not apply VLAN " 1958 "tag", __func__)); 1959 } 1960 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) { 1961 ixgbe_rx_checksum(staterr, sendmp, ptype, 1962 &adapter->stats.pf); 1963 } 1964 1965 #if 0 /* FreeBSD */ 1966 /* 1967 * In case of multiqueue, we have RXCSUM.PCSD bit set 1968 * and never cleared. This means we have RSS hash 1969 * available to be used. 1970 */ 1971 if (adapter->num_queues > 1) { 1972 sendmp->m_pkthdr.flowid = 1973 le32toh(cur->wb.lower.hi_dword.rss); 1974 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) { 1975 case IXGBE_RXDADV_RSSTYPE_IPV4: 1976 M_HASHTYPE_SET(sendmp, 1977 M_HASHTYPE_RSS_IPV4); 1978 break; 1979 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 1980 M_HASHTYPE_SET(sendmp, 1981 M_HASHTYPE_RSS_TCP_IPV4); 1982 break; 1983 case IXGBE_RXDADV_RSSTYPE_IPV6: 1984 M_HASHTYPE_SET(sendmp, 1985 M_HASHTYPE_RSS_IPV6); 1986 break; 1987 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP: 1988 M_HASHTYPE_SET(sendmp, 1989 M_HASHTYPE_RSS_TCP_IPV6); 1990 break; 1991 case IXGBE_RXDADV_RSSTYPE_IPV6_EX: 1992 M_HASHTYPE_SET(sendmp, 1993 M_HASHTYPE_RSS_IPV6_EX); 1994 break; 1995 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX: 1996 M_HASHTYPE_SET(sendmp, 1997 M_HASHTYPE_RSS_TCP_IPV6_EX); 1998 break; 1999 #if __FreeBSD_version > 1100000 2000 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP: 2001 M_HASHTYPE_SET(sendmp, 2002 M_HASHTYPE_RSS_UDP_IPV4); 2003 break; 2004 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP: 2005 M_HASHTYPE_SET(sendmp, 2006 M_HASHTYPE_RSS_UDP_IPV6); 2007 break; 2008 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX: 2009 M_HASHTYPE_SET(sendmp, 2010 M_HASHTYPE_RSS_UDP_IPV6_EX); 2011 break; 2012 #endif 2013 default: 2014 M_HASHTYPE_SET(sendmp, 2015 M_HASHTYPE_OPAQUE_HASH); 2016 } 2017 } else { 2018 sendmp->m_pkthdr.flowid = que->msix; 2019 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE); 2020 } 2021 #endif 2022 } 2023 next_desc: 2024 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 2025 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2026 2027 /* Advance our pointers to the next descriptor. */ 2028 if (++i == rxr->num_desc) 2029 i = 0; 2030 2031 /* Now send to the stack or do LRO */ 2032 if (sendmp != NULL) { 2033 rxr->next_to_check = i; 2034 ixgbe_rx_input(rxr, ifp, sendmp, ptype); 2035 i = rxr->next_to_check; 2036 } 2037 2038 /* Every 8 descriptors we go to refresh mbufs */ 2039 if (processed == 8) { 2040 ixgbe_refresh_mbufs(rxr, i); 2041 processed = 0; 2042 } 2043 } 2044 2045 /* Refresh any remaining buf structs */ 2046 if (ixgbe_rx_unrefreshed(rxr)) 2047 ixgbe_refresh_mbufs(rxr, i); 2048 2049 rxr->next_to_check = i; 2050 2051 #ifdef LRO 2052 /* 2053 * Flush any outstanding LRO work 2054 */ 2055 tcp_lro_flush_all(lro); 2056 #endif /* LRO */ 2057 2058 IXGBE_RX_UNLOCK(rxr); 2059 2060 /* 2061 ** Still have cleaning to do? 2062 */ 2063 if ((staterr & IXGBE_RXD_STAT_DD) != 0) 2064 return true; 2065 else 2066 return false; 2067 } 2068 2069 2070 /********************************************************************* 2071 * 2072 * Verify that the hardware indicated that the checksum is valid. 2073 * Inform the stack about the status of checksum so that stack 2074 * doesn't spend time verifying the checksum. 2075 * 2076 *********************************************************************/ 2077 static void 2078 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype, 2079 struct ixgbe_hw_stats *stats) 2080 { 2081 u16 status = (u16) staterr; 2082 u8 errors = (u8) (staterr >> 24); 2083 #if 0 2084 bool sctp = false; 2085 2086 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && 2087 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0) 2088 sctp = true; 2089 #endif 2090 2091 /* IPv4 checksum */ 2092 if (status & IXGBE_RXD_STAT_IPCS) { 2093 stats->ipcs.ev_count++; 2094 if (!(errors & IXGBE_RXD_ERR_IPE)) { 2095 /* IP Checksum Good */ 2096 mp->m_pkthdr.csum_flags = M_CSUM_IPv4; 2097 } else { 2098 stats->ipcs_bad.ev_count++; 2099 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD; 2100 } 2101 } 2102 /* TCP/UDP/SCTP checksum */ 2103 if (status & IXGBE_RXD_STAT_L4CS) { 2104 stats->l4cs.ev_count++; 2105 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6; 2106 if (!(errors & IXGBE_RXD_ERR_TCPE)) { 2107 mp->m_pkthdr.csum_flags |= type; 2108 } else { 2109 stats->l4cs_bad.ev_count++; 2110 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD; 2111 } 2112 } 2113 } 2114 2115 2116 /******************************************************************** 2117 * Manage DMA'able memory. 2118 *******************************************************************/ 2119 2120 int 2121 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size, 2122 struct ixgbe_dma_alloc *dma, const int mapflags) 2123 { 2124 device_t dev = adapter->dev; 2125 int r, rsegs; 2126 2127 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */ 2128 DBA_ALIGN, 0, /* alignment, bounds */ 2129 size, /* maxsize */ 2130 1, /* nsegments */ 2131 size, /* maxsegsize */ 2132 BUS_DMA_ALLOCNOW, /* flags */ 2133 &dma->dma_tag); 2134 if (r != 0) { 2135 aprint_error_dev(dev, 2136 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r); 2137 goto fail_0; 2138 } 2139 2140 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, 2141 size, 2142 dma->dma_tag->dt_alignment, 2143 dma->dma_tag->dt_boundary, 2144 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT); 2145 if (r != 0) { 2146 aprint_error_dev(dev, 2147 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r); 2148 goto fail_1; 2149 } 2150 2151 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs, 2152 size, &dma->dma_vaddr, BUS_DMA_NOWAIT); 2153 if (r != 0) { 2154 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n", 2155 __func__, r); 2156 goto fail_2; 2157 } 2158 2159 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map); 2160 if (r != 0) { 2161 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n", 2162 __func__, r); 2163 goto fail_3; 2164 } 2165 2166 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr, 2167 size, 2168 NULL, 2169 mapflags | BUS_DMA_NOWAIT); 2170 if (r != 0) { 2171 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n", 2172 __func__, r); 2173 goto fail_4; 2174 } 2175 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr; 2176 dma->dma_size = size; 2177 return 0; 2178 fail_4: 2179 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map); 2180 fail_3: 2181 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size); 2182 fail_2: 2183 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs); 2184 fail_1: 2185 ixgbe_dma_tag_destroy(dma->dma_tag); 2186 fail_0: 2187 return r; 2188 } 2189 2190 void 2191 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) 2192 { 2193 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size, 2194 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2195 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map); 2196 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1); 2197 ixgbe_dma_tag_destroy(dma->dma_tag); 2198 } 2199 2200 2201 /********************************************************************* 2202 * 2203 * Allocate memory for the transmit and receive rings, and then 2204 * the descriptors associated with each, called only once at attach. 2205 * 2206 **********************************************************************/ 2207 int 2208 ixgbe_allocate_queues(struct adapter *adapter) 2209 { 2210 device_t dev = adapter->dev; 2211 struct ix_queue *que; 2212 struct tx_ring *txr; 2213 struct rx_ring *rxr; 2214 int rsize, tsize, error = IXGBE_SUCCESS; 2215 int txconf = 0, rxconf = 0; 2216 #ifdef PCI_IOV 2217 enum ixgbe_iov_mode iov_mode; 2218 #endif 2219 2220 /* First allocate the top level queue structs */ 2221 if (!(adapter->queues = 2222 (struct ix_queue *) malloc(sizeof(struct ix_queue) * 2223 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2224 aprint_error_dev(dev, "Unable to allocate queue memory\n"); 2225 error = ENOMEM; 2226 goto fail; 2227 } 2228 2229 /* First allocate the TX ring struct memory */ 2230 if (!(adapter->tx_rings = 2231 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 2232 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2233 aprint_error_dev(dev, "Unable to allocate TX ring memory\n"); 2234 error = ENOMEM; 2235 goto tx_fail; 2236 } 2237 2238 /* Next allocate the RX */ 2239 if (!(adapter->rx_rings = 2240 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 2241 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 2242 aprint_error_dev(dev, "Unable to allocate RX ring memory\n"); 2243 error = ENOMEM; 2244 goto rx_fail; 2245 } 2246 2247 /* For the ring itself */ 2248 tsize = roundup2(adapter->num_tx_desc * 2249 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); 2250 2251 #ifdef PCI_IOV 2252 iov_mode = ixgbe_get_iov_mode(adapter); 2253 adapter->pool = ixgbe_max_vfs(iov_mode); 2254 #else 2255 adapter->pool = 0; 2256 #endif 2257 /* 2258 * Now set up the TX queues, txconf is needed to handle the 2259 * possibility that things fail midcourse and we need to 2260 * undo memory gracefully 2261 */ 2262 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 2263 /* Set up some basics */ 2264 txr = &adapter->tx_rings[i]; 2265 txr->adapter = adapter; 2266 #ifdef PCI_IOV 2267 txr->me = ixgbe_pf_que_index(iov_mode, i); 2268 #else 2269 txr->me = i; 2270 #endif 2271 txr->num_desc = adapter->num_tx_desc; 2272 2273 /* Initialize the TX side lock */ 2274 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 2275 device_xname(dev), txr->me); 2276 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET); 2277 2278 if (ixgbe_dma_malloc(adapter, tsize, 2279 &txr->txdma, BUS_DMA_NOWAIT)) { 2280 aprint_error_dev(dev, 2281 "Unable to allocate TX Descriptor memory\n"); 2282 error = ENOMEM; 2283 goto err_tx_desc; 2284 } 2285 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr; 2286 bzero((void *)txr->tx_base, tsize); 2287 2288 /* Now allocate transmit buffers for the ring */ 2289 if (ixgbe_allocate_transmit_buffers(txr)) { 2290 aprint_error_dev(dev, 2291 "Critical Failure setting up transmit buffers\n"); 2292 error = ENOMEM; 2293 goto err_tx_desc; 2294 } 2295 #ifndef IXGBE_LEGACY_TX 2296 /* Allocate a buf ring */ 2297 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP); 2298 if (txr->txr_interq == NULL) { 2299 aprint_error_dev(dev, 2300 "Critical Failure setting up buf ring\n"); 2301 error = ENOMEM; 2302 goto err_tx_desc; 2303 } 2304 #endif 2305 } 2306 2307 /* 2308 * Next the RX queues... 2309 */ 2310 rsize = roundup2(adapter->num_rx_desc * 2311 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); 2312 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 2313 rxr = &adapter->rx_rings[i]; 2314 /* Set up some basics */ 2315 rxr->adapter = adapter; 2316 #ifdef PCI_IOV 2317 rxr->me = ixgbe_pf_que_index(iov_mode, i); 2318 #else 2319 rxr->me = i; 2320 #endif 2321 rxr->num_desc = adapter->num_rx_desc; 2322 2323 /* Initialize the RX side lock */ 2324 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 2325 device_xname(dev), rxr->me); 2326 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET); 2327 2328 if (ixgbe_dma_malloc(adapter, rsize, 2329 &rxr->rxdma, BUS_DMA_NOWAIT)) { 2330 aprint_error_dev(dev, 2331 "Unable to allocate RxDescriptor memory\n"); 2332 error = ENOMEM; 2333 goto err_rx_desc; 2334 } 2335 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr; 2336 bzero((void *)rxr->rx_base, rsize); 2337 2338 /* Allocate receive buffers for the ring*/ 2339 if (ixgbe_allocate_receive_buffers(rxr)) { 2340 aprint_error_dev(dev, 2341 "Critical Failure setting up receive buffers\n"); 2342 error = ENOMEM; 2343 goto err_rx_desc; 2344 } 2345 } 2346 2347 /* 2348 ** Finally set up the queue holding structs 2349 */ 2350 for (int i = 0; i < adapter->num_queues; i++) { 2351 que = &adapter->queues[i]; 2352 que->adapter = adapter; 2353 que->me = i; 2354 que->txr = &adapter->tx_rings[i]; 2355 que->rxr = &adapter->rx_rings[i]; 2356 } 2357 2358 return (0); 2359 2360 err_rx_desc: 2361 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 2362 ixgbe_dma_free(adapter, &rxr->rxdma); 2363 err_tx_desc: 2364 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 2365 ixgbe_dma_free(adapter, &txr->txdma); 2366 free(adapter->rx_rings, M_DEVBUF); 2367 rx_fail: 2368 free(adapter->tx_rings, M_DEVBUF); 2369 tx_fail: 2370 free(adapter->queues, M_DEVBUF); 2371 fail: 2372 return (error); 2373 } 2374