1 /* 2 * Copyright (c) 2001-2011, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_polling.h" 33 #include "opt_igb.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/endian.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/proc.h> 43 #include <sys/rman.h> 44 #include <sys/serialize.h> 45 #include <sys/serialize2.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_arp.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/ifq_var.h> 58 #include <net/toeplitz.h> 59 #include <net/toeplitz2.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/vlan/if_vlan_ether.h> 62 #include <net/if_poll.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 #include <netinet/udp.h> 69 70 #include <bus/pci/pcivar.h> 71 #include <bus/pci/pcireg.h> 72 73 #include <dev/netif/ig_hal/e1000_api.h> 74 #include <dev/netif/ig_hal/e1000_82575.h> 75 #include <dev/netif/igb/if_igb.h> 76 77 #ifdef IGB_RSS_DEBUG 78 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) \ 79 do { \ 80 if (sc->rss_debug >= lvl) \ 81 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \ 82 } while (0) 83 #else /* !IGB_RSS_DEBUG */ 84 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0) 85 #endif /* IGB_RSS_DEBUG */ 86 87 #define IGB_NAME "Intel(R) PRO/1000 " 88 #define IGB_DEVICE(id) \ 89 { IGB_VENDOR_ID, E1000_DEV_ID_##id, IGB_NAME #id } 90 #define IGB_DEVICE_NULL { 0, 0, NULL } 91 92 static struct igb_device { 93 uint16_t vid; 94 uint16_t did; 95 const char *desc; 96 } igb_devices[] = { 97 IGB_DEVICE(82575EB_COPPER), 98 IGB_DEVICE(82575EB_FIBER_SERDES), 99 IGB_DEVICE(82575GB_QUAD_COPPER), 100 IGB_DEVICE(82576), 101 IGB_DEVICE(82576_NS), 102 IGB_DEVICE(82576_NS_SERDES), 103 IGB_DEVICE(82576_FIBER), 104 IGB_DEVICE(82576_SERDES), 105 IGB_DEVICE(82576_SERDES_QUAD), 106 IGB_DEVICE(82576_QUAD_COPPER), 107 IGB_DEVICE(82576_QUAD_COPPER_ET2), 108 IGB_DEVICE(82576_VF), 109 IGB_DEVICE(82580_COPPER), 110 IGB_DEVICE(82580_FIBER), 111 IGB_DEVICE(82580_SERDES), 112 IGB_DEVICE(82580_SGMII), 113 IGB_DEVICE(82580_COPPER_DUAL), 114 IGB_DEVICE(82580_QUAD_FIBER), 115 IGB_DEVICE(DH89XXCC_SERDES), 116 IGB_DEVICE(DH89XXCC_SGMII), 117 IGB_DEVICE(DH89XXCC_SFP), 118 IGB_DEVICE(DH89XXCC_BACKPLANE), 119 IGB_DEVICE(I350_COPPER), 120 IGB_DEVICE(I350_FIBER), 121 IGB_DEVICE(I350_SERDES), 122 IGB_DEVICE(I350_SGMII), 123 IGB_DEVICE(I350_VF), 124 125 /* required last entry */ 126 IGB_DEVICE_NULL 127 }; 128 129 static int igb_probe(device_t); 130 static int igb_attach(device_t); 131 static int igb_detach(device_t); 132 static int igb_shutdown(device_t); 133 static int igb_suspend(device_t); 134 static int igb_resume(device_t); 135 136 static boolean_t igb_is_valid_ether_addr(const uint8_t *); 137 static void igb_setup_ifp(struct igb_softc *); 138 static boolean_t igb_txcsum_ctx(struct igb_tx_ring *, struct mbuf *); 139 static int igb_tso_pullup(struct igb_tx_ring *, struct mbuf **); 140 static void igb_tso_ctx(struct igb_tx_ring *, struct mbuf *, uint32_t *); 141 static void igb_add_sysctl(struct igb_softc *); 142 static int igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS); 143 static int igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS); 144 static int igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS); 145 static void igb_set_ring_inuse(struct igb_softc *, boolean_t); 146 147 static void igb_vf_init_stats(struct igb_softc *); 148 static void igb_reset(struct igb_softc *); 149 static void igb_update_stats_counters(struct igb_softc *); 150 static void igb_update_vf_stats_counters(struct igb_softc *); 151 static void igb_update_link_status(struct igb_softc *); 152 static void igb_init_tx_unit(struct igb_softc *); 153 static void igb_init_rx_unit(struct igb_softc *); 154 155 static void igb_set_vlan(struct igb_softc *); 156 static void igb_set_multi(struct igb_softc *); 157 static void igb_set_promisc(struct igb_softc *); 158 static void igb_disable_promisc(struct igb_softc *); 159 160 static int igb_alloc_rings(struct igb_softc *); 161 static void igb_free_rings(struct igb_softc *); 162 static int igb_create_tx_ring(struct igb_tx_ring *); 163 static int igb_create_rx_ring(struct igb_rx_ring *); 164 static void igb_free_tx_ring(struct igb_tx_ring *); 165 static void igb_free_rx_ring(struct igb_rx_ring *); 166 static void igb_destroy_tx_ring(struct igb_tx_ring *, int); 167 static void igb_destroy_rx_ring(struct igb_rx_ring *, int); 168 static void igb_init_tx_ring(struct igb_tx_ring *); 169 static int igb_init_rx_ring(struct igb_rx_ring *); 170 static int igb_newbuf(struct igb_rx_ring *, int, boolean_t); 171 static int igb_encap(struct igb_tx_ring *, struct mbuf **); 172 173 static void igb_stop(struct igb_softc *); 174 static void igb_init(void *); 175 static int igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 176 static void igb_media_status(struct ifnet *, struct ifmediareq *); 177 static int igb_media_change(struct ifnet *); 178 static void igb_timer(void *); 179 static void igb_watchdog(struct ifnet *); 180 static void igb_start(struct ifnet *); 181 #ifdef DEVICE_POLLING 182 static void igb_poll(struct ifnet *, enum poll_cmd, int); 183 #endif 184 static void igb_serialize(struct ifnet *, enum ifnet_serialize); 185 static void igb_deserialize(struct ifnet *, enum ifnet_serialize); 186 static int igb_tryserialize(struct ifnet *, enum ifnet_serialize); 187 #ifdef INVARIANTS 188 static void igb_serialize_assert(struct ifnet *, enum ifnet_serialize, 189 boolean_t); 190 #endif 191 192 static void igb_intr(void *); 193 static void igb_intr_shared(void *); 194 static void igb_rxeof(struct igb_rx_ring *, int); 195 static void igb_txeof(struct igb_tx_ring *); 196 static void igb_set_eitr(struct igb_softc *, int, int); 197 static void igb_enable_intr(struct igb_softc *); 198 static void igb_disable_intr(struct igb_softc *); 199 static void igb_init_unshared_intr(struct igb_softc *); 200 static void igb_init_intr(struct igb_softc *); 201 static int igb_setup_intr(struct igb_softc *); 202 static void igb_set_txintr_mask(struct igb_tx_ring *, int *, int); 203 static void igb_set_rxintr_mask(struct igb_rx_ring *, int *, int); 204 static void igb_set_intr_mask(struct igb_softc *); 205 static int igb_alloc_intr(struct igb_softc *); 206 static void igb_free_intr(struct igb_softc *); 207 static void igb_teardown_intr(struct igb_softc *); 208 static void igb_msix_try_alloc(struct igb_softc *); 209 static void igb_msix_free(struct igb_softc *, boolean_t); 210 static int igb_msix_setup(struct igb_softc *); 211 static void igb_msix_teardown(struct igb_softc *, int); 212 static void igb_msix_rx(void *); 213 static void igb_msix_tx(void *); 214 static void igb_msix_status(void *); 215 216 /* Management and WOL Support */ 217 static void igb_get_mgmt(struct igb_softc *); 218 static void igb_rel_mgmt(struct igb_softc *); 219 static void igb_get_hw_control(struct igb_softc *); 220 static void igb_rel_hw_control(struct igb_softc *); 221 static void igb_enable_wol(device_t); 222 223 static device_method_t igb_methods[] = { 224 /* Device interface */ 225 DEVMETHOD(device_probe, igb_probe), 226 DEVMETHOD(device_attach, igb_attach), 227 DEVMETHOD(device_detach, igb_detach), 228 DEVMETHOD(device_shutdown, igb_shutdown), 229 DEVMETHOD(device_suspend, igb_suspend), 230 DEVMETHOD(device_resume, igb_resume), 231 { 0, 0 } 232 }; 233 234 static driver_t igb_driver = { 235 "igb", 236 igb_methods, 237 sizeof(struct igb_softc), 238 }; 239 240 static devclass_t igb_devclass; 241 242 DECLARE_DUMMY_MODULE(if_igb); 243 MODULE_DEPEND(igb, ig_hal, 1, 1, 1); 244 DRIVER_MODULE(if_igb, pci, igb_driver, igb_devclass, NULL, NULL); 245 246 static int igb_rxd = IGB_DEFAULT_RXD; 247 static int igb_txd = IGB_DEFAULT_TXD; 248 static int igb_rxr = 0; 249 static int igb_msi_enable = 1; 250 static int igb_msix_enable = 1; 251 static int igb_eee_disabled = 1; /* Energy Efficient Ethernet */ 252 static int igb_fc_setting = e1000_fc_full; 253 254 /* 255 * DMA Coalescing, only for i350 - default to off, 256 * this feature is for power savings 257 */ 258 static int igb_dma_coalesce = 0; 259 260 TUNABLE_INT("hw.igb.rxd", &igb_rxd); 261 TUNABLE_INT("hw.igb.txd", &igb_txd); 262 TUNABLE_INT("hw.igb.rxr", &igb_rxr); 263 TUNABLE_INT("hw.igb.msi.enable", &igb_msi_enable); 264 TUNABLE_INT("hw.igb.msix.enable", &igb_msix_enable); 265 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); 266 267 /* i350 specific */ 268 TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled); 269 TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce); 270 271 static __inline void 272 igb_rxcsum(uint32_t staterr, struct mbuf *mp) 273 { 274 /* Ignore Checksum bit is set */ 275 if (staterr & E1000_RXD_STAT_IXSM) 276 return; 277 278 if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 279 E1000_RXD_STAT_IPCS) 280 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; 281 282 if (staterr & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { 283 if ((staterr & E1000_RXDEXT_STATERR_TCPE) == 0) { 284 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | 285 CSUM_PSEUDO_HDR | CSUM_FRAG_NOT_CHECKED; 286 mp->m_pkthdr.csum_data = htons(0xffff); 287 } 288 } 289 } 290 291 static __inline struct pktinfo * 292 igb_rssinfo(struct mbuf *m, struct pktinfo *pi, 293 uint32_t hash, uint32_t hashtype, uint32_t staterr) 294 { 295 switch (hashtype) { 296 case E1000_RXDADV_RSSTYPE_IPV4_TCP: 297 pi->pi_netisr = NETISR_IP; 298 pi->pi_flags = 0; 299 pi->pi_l3proto = IPPROTO_TCP; 300 break; 301 302 case E1000_RXDADV_RSSTYPE_IPV4: 303 if (staterr & E1000_RXD_STAT_IXSM) 304 return NULL; 305 306 if ((staterr & 307 (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 308 E1000_RXD_STAT_TCPCS) { 309 pi->pi_netisr = NETISR_IP; 310 pi->pi_flags = 0; 311 pi->pi_l3proto = IPPROTO_UDP; 312 break; 313 } 314 /* FALL THROUGH */ 315 default: 316 return NULL; 317 } 318 319 m->m_flags |= M_HASH; 320 m->m_pkthdr.hash = toeplitz_hash(hash); 321 return pi; 322 } 323 324 static int 325 igb_probe(device_t dev) 326 { 327 const struct igb_device *d; 328 uint16_t vid, did; 329 330 vid = pci_get_vendor(dev); 331 did = pci_get_device(dev); 332 333 for (d = igb_devices; d->desc != NULL; ++d) { 334 if (vid == d->vid && did == d->did) { 335 device_set_desc(dev, d->desc); 336 return 0; 337 } 338 } 339 return ENXIO; 340 } 341 342 static int 343 igb_attach(device_t dev) 344 { 345 struct igb_softc *sc = device_get_softc(dev); 346 uint16_t eeprom_data; 347 int error = 0, i, j, ring_max; 348 349 #ifdef notyet 350 /* SYSCTL stuff */ 351 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 352 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 353 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 354 igb_sysctl_nvm_info, "I", "NVM Information"); 355 356 SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), 357 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 358 OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW, 359 &igb_enable_aim, 1, "Interrupt Moderation"); 360 361 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 362 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 363 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, 364 adapter, 0, igb_set_flowcntl, "I", "Flow Control"); 365 #endif 366 367 callout_init_mp(&sc->timer); 368 lwkt_serialize_init(&sc->main_serialize); 369 370 sc->dev = sc->osdep.dev = dev; 371 372 /* 373 * Determine hardware and mac type 374 */ 375 sc->hw.vendor_id = pci_get_vendor(dev); 376 sc->hw.device_id = pci_get_device(dev); 377 sc->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 378 sc->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); 379 sc->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); 380 381 if (e1000_set_mac_type(&sc->hw)) 382 return ENXIO; 383 384 /* Are we a VF device? */ 385 if (sc->hw.mac.type == e1000_vfadapt || 386 sc->hw.mac.type == e1000_vfadapt_i350) 387 sc->vf_ifp = 1; 388 else 389 sc->vf_ifp = 0; 390 391 /* 392 * Configure total supported RX/TX ring count 393 */ 394 switch (sc->hw.mac.type) { 395 case e1000_82575: 396 ring_max = IGB_MAX_RING_82575; 397 break; 398 case e1000_82580: 399 ring_max = IGB_MAX_RING_82580; 400 break; 401 case e1000_i350: 402 ring_max = IGB_MAX_RING_I350; 403 break; 404 case e1000_82576: 405 ring_max = IGB_MAX_RING_82576; 406 break; 407 default: 408 ring_max = IGB_MIN_RING; 409 break; 410 } 411 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", igb_rxr); 412 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, ring_max); 413 #ifdef IGB_RSS_DEBUG 414 sc->rx_ring_cnt = device_getenv_int(dev, "rxr_debug", sc->rx_ring_cnt); 415 #endif 416 sc->rx_ring_inuse = sc->rx_ring_cnt; 417 sc->tx_ring_cnt = 1; /* XXX */ 418 419 if (sc->hw.mac.type == e1000_82575) 420 sc->flags |= IGB_FLAG_TSO_IPLEN0; 421 422 /* Enable bus mastering */ 423 pci_enable_busmaster(dev); 424 425 /* 426 * Allocate IO memory 427 */ 428 sc->mem_rid = PCIR_BAR(0); 429 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, 430 RF_ACTIVE); 431 if (sc->mem_res == NULL) { 432 device_printf(dev, "Unable to allocate bus resource: memory\n"); 433 error = ENXIO; 434 goto failed; 435 } 436 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res); 437 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res); 438 439 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle; 440 441 /* Save PCI command register for Shared Code */ 442 sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 443 sc->hw.back = &sc->osdep; 444 445 /* Do Shared Code initialization */ 446 if (e1000_setup_init_funcs(&sc->hw, TRUE)) { 447 device_printf(dev, "Setup of Shared code failed\n"); 448 error = ENXIO; 449 goto failed; 450 } 451 452 e1000_get_bus_info(&sc->hw); 453 454 sc->hw.mac.autoneg = DO_AUTO_NEG; 455 sc->hw.phy.autoneg_wait_to_complete = FALSE; 456 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 457 458 /* Copper options */ 459 if (sc->hw.phy.media_type == e1000_media_type_copper) { 460 sc->hw.phy.mdix = AUTO_ALL_MODES; 461 sc->hw.phy.disable_polarity_correction = FALSE; 462 sc->hw.phy.ms_type = IGB_MASTER_SLAVE; 463 } 464 465 /* Set the frame limits assuming standard ethernet sized frames. */ 466 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; 467 468 /* Allocate RX/TX rings */ 469 error = igb_alloc_rings(sc); 470 if (error) 471 goto failed; 472 473 /* Allocate interrupt */ 474 error = igb_alloc_intr(sc); 475 if (error) 476 goto failed; 477 478 /* 479 * Setup serializers 480 */ 481 i = 0; 482 sc->serializes[i++] = &sc->main_serialize; 483 484 sc->tx_serialize = i; 485 for (j = 0; j < sc->tx_ring_cnt; ++j) 486 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize; 487 488 sc->rx_serialize = i; 489 for (j = 0; j < sc->rx_ring_cnt; ++j) 490 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize; 491 492 sc->serialize_cnt = i; 493 KKASSERT(sc->serialize_cnt <= IGB_NSERIALIZE); 494 495 /* Allocate the appropriate stats memory */ 496 if (sc->vf_ifp) { 497 sc->stats = kmalloc(sizeof(struct e1000_vf_stats), M_DEVBUF, 498 M_WAITOK | M_ZERO); 499 igb_vf_init_stats(sc); 500 } else { 501 sc->stats = kmalloc(sizeof(struct e1000_hw_stats), M_DEVBUF, 502 M_WAITOK | M_ZERO); 503 } 504 505 /* Allocate multicast array memory. */ 506 sc->mta = kmalloc(ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, 507 M_DEVBUF, M_WAITOK); 508 509 /* Some adapter-specific advanced features */ 510 if (sc->hw.mac.type >= e1000_i350) { 511 #ifdef notyet 512 igb_set_sysctl_value(adapter, "dma_coalesce", 513 "configure dma coalesce", 514 &adapter->dma_coalesce, igb_dma_coalesce); 515 igb_set_sysctl_value(adapter, "eee_disabled", 516 "enable Energy Efficient Ethernet", 517 &adapter->hw.dev_spec._82575.eee_disable, 518 igb_eee_disabled); 519 #else 520 sc->dma_coalesce = igb_dma_coalesce; 521 sc->hw.dev_spec._82575.eee_disable = igb_eee_disabled; 522 #endif 523 e1000_set_eee_i350(&sc->hw); 524 } 525 526 /* 527 * Start from a known state, this is important in reading the nvm and 528 * mac from that. 529 */ 530 e1000_reset_hw(&sc->hw); 531 532 /* Make sure we have a good EEPROM before we read from it */ 533 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 534 /* 535 * Some PCI-E parts fail the first check due to 536 * the link being in sleep state, call it again, 537 * if it fails a second time its a real issue. 538 */ 539 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 540 device_printf(dev, 541 "The EEPROM Checksum Is Not Valid\n"); 542 error = EIO; 543 goto failed; 544 } 545 } 546 547 /* Copy the permanent MAC address out of the EEPROM */ 548 if (e1000_read_mac_addr(&sc->hw) < 0) { 549 device_printf(dev, "EEPROM read error while reading MAC" 550 " address\n"); 551 error = EIO; 552 goto failed; 553 } 554 if (!igb_is_valid_ether_addr(sc->hw.mac.addr)) { 555 device_printf(dev, "Invalid MAC address\n"); 556 error = EIO; 557 goto failed; 558 } 559 560 #ifdef notyet 561 /* 562 ** Configure Interrupts 563 */ 564 if ((adapter->msix > 1) && (igb_enable_msix)) 565 error = igb_allocate_msix(adapter); 566 else /* MSI or Legacy */ 567 error = igb_allocate_legacy(adapter); 568 if (error) 569 goto err_late; 570 #endif 571 572 /* Setup OS specific network interface */ 573 igb_setup_ifp(sc); 574 575 /* Add sysctl tree, must after igb_setup_ifp() */ 576 igb_add_sysctl(sc); 577 578 /* Now get a good starting state */ 579 igb_reset(sc); 580 581 /* Initialize statistics */ 582 igb_update_stats_counters(sc); 583 584 sc->hw.mac.get_link_status = 1; 585 igb_update_link_status(sc); 586 587 /* Indicate SOL/IDER usage */ 588 if (e1000_check_reset_block(&sc->hw)) { 589 device_printf(dev, 590 "PHY reset is blocked due to SOL/IDER session.\n"); 591 } 592 593 /* Determine if we have to control management hardware */ 594 if (e1000_enable_mng_pass_thru(&sc->hw)) 595 sc->flags |= IGB_FLAG_HAS_MGMT; 596 597 /* 598 * Setup Wake-on-Lan 599 */ 600 /* APME bit in EEPROM is mapped to WUC.APME */ 601 eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC) & E1000_WUC_APME; 602 if (eeprom_data) 603 sc->wol = E1000_WUFC_MAG; 604 /* XXX disable WOL */ 605 sc->wol = 0; 606 607 #ifdef notyet 608 /* Register for VLAN events */ 609 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 610 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 611 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 612 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 613 #endif 614 615 #ifdef notyet 616 igb_add_hw_stats(adapter); 617 #endif 618 619 error = igb_setup_intr(sc); 620 if (error) { 621 ether_ifdetach(&sc->arpcom.ac_if); 622 goto failed; 623 } 624 return 0; 625 626 failed: 627 igb_detach(dev); 628 return error; 629 } 630 631 static int 632 igb_detach(device_t dev) 633 { 634 struct igb_softc *sc = device_get_softc(dev); 635 636 if (device_is_attached(dev)) { 637 struct ifnet *ifp = &sc->arpcom.ac_if; 638 639 ifnet_serialize_all(ifp); 640 641 igb_stop(sc); 642 643 e1000_phy_hw_reset(&sc->hw); 644 645 /* Give control back to firmware */ 646 igb_rel_mgmt(sc); 647 igb_rel_hw_control(sc); 648 649 if (sc->wol) { 650 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 651 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 652 igb_enable_wol(dev); 653 } 654 655 igb_teardown_intr(sc); 656 657 ifnet_deserialize_all(ifp); 658 659 ether_ifdetach(ifp); 660 } else if (sc->mem_res != NULL) { 661 igb_rel_hw_control(sc); 662 } 663 bus_generic_detach(dev); 664 665 if (sc->sysctl_tree != NULL) 666 sysctl_ctx_free(&sc->sysctl_ctx); 667 668 igb_free_intr(sc); 669 670 if (sc->msix_mem_res != NULL) { 671 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid, 672 sc->msix_mem_res); 673 } 674 if (sc->mem_res != NULL) { 675 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, 676 sc->mem_res); 677 } 678 679 igb_free_rings(sc); 680 681 if (sc->mta != NULL) 682 kfree(sc->mta, M_DEVBUF); 683 if (sc->stats != NULL) 684 kfree(sc->stats, M_DEVBUF); 685 686 return 0; 687 } 688 689 static int 690 igb_shutdown(device_t dev) 691 { 692 return igb_suspend(dev); 693 } 694 695 static int 696 igb_suspend(device_t dev) 697 { 698 struct igb_softc *sc = device_get_softc(dev); 699 struct ifnet *ifp = &sc->arpcom.ac_if; 700 701 ifnet_serialize_all(ifp); 702 703 igb_stop(sc); 704 705 igb_rel_mgmt(sc); 706 igb_rel_hw_control(sc); 707 708 if (sc->wol) { 709 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 710 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 711 igb_enable_wol(dev); 712 } 713 714 ifnet_deserialize_all(ifp); 715 716 return bus_generic_suspend(dev); 717 } 718 719 static int 720 igb_resume(device_t dev) 721 { 722 struct igb_softc *sc = device_get_softc(dev); 723 struct ifnet *ifp = &sc->arpcom.ac_if; 724 725 ifnet_serialize_all(ifp); 726 727 igb_init(sc); 728 igb_get_mgmt(sc); 729 730 if_devstart(ifp); 731 732 ifnet_deserialize_all(ifp); 733 734 return bus_generic_resume(dev); 735 } 736 737 static int 738 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 739 { 740 struct igb_softc *sc = ifp->if_softc; 741 struct ifreq *ifr = (struct ifreq *)data; 742 int max_frame_size, mask, reinit; 743 int error = 0; 744 745 ASSERT_IFNET_SERIALIZED_ALL(ifp); 746 747 switch (command) { 748 case SIOCSIFMTU: 749 max_frame_size = 9234; 750 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 751 ETHER_CRC_LEN) { 752 error = EINVAL; 753 break; 754 } 755 756 ifp->if_mtu = ifr->ifr_mtu; 757 sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + 758 ETHER_CRC_LEN; 759 760 if (ifp->if_flags & IFF_RUNNING) 761 igb_init(sc); 762 break; 763 764 case SIOCSIFFLAGS: 765 if (ifp->if_flags & IFF_UP) { 766 if (ifp->if_flags & IFF_RUNNING) { 767 if ((ifp->if_flags ^ sc->if_flags) & 768 (IFF_PROMISC | IFF_ALLMULTI)) { 769 igb_disable_promisc(sc); 770 igb_set_promisc(sc); 771 } 772 } else { 773 igb_init(sc); 774 } 775 } else if (ifp->if_flags & IFF_RUNNING) { 776 igb_stop(sc); 777 } 778 sc->if_flags = ifp->if_flags; 779 break; 780 781 case SIOCADDMULTI: 782 case SIOCDELMULTI: 783 if (ifp->if_flags & IFF_RUNNING) { 784 igb_disable_intr(sc); 785 igb_set_multi(sc); 786 #ifdef DEVICE_POLLING 787 if (!(ifp->if_flags & IFF_POLLING)) 788 #endif 789 igb_enable_intr(sc); 790 } 791 break; 792 793 case SIOCSIFMEDIA: 794 /* 795 * As the speed/duplex settings are being 796 * changed, we need toreset the PHY. 797 */ 798 sc->hw.phy.reset_disable = FALSE; 799 800 /* Check SOL/IDER usage */ 801 if (e1000_check_reset_block(&sc->hw)) { 802 if_printf(ifp, "Media change is " 803 "blocked due to SOL/IDER session.\n"); 804 break; 805 } 806 /* FALL THROUGH */ 807 808 case SIOCGIFMEDIA: 809 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 810 break; 811 812 case SIOCSIFCAP: 813 reinit = 0; 814 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 815 if (mask & IFCAP_RXCSUM) { 816 ifp->if_capenable ^= IFCAP_RXCSUM; 817 reinit = 1; 818 } 819 if (mask & IFCAP_VLAN_HWTAGGING) { 820 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 821 reinit = 1; 822 } 823 if (mask & IFCAP_TXCSUM) { 824 ifp->if_capenable ^= IFCAP_TXCSUM; 825 if (ifp->if_capenable & IFCAP_TXCSUM) 826 ifp->if_hwassist |= IGB_CSUM_FEATURES; 827 else 828 ifp->if_hwassist &= ~IGB_CSUM_FEATURES; 829 } 830 if (mask & IFCAP_TSO) { 831 ifp->if_capenable ^= IFCAP_TSO; 832 if (ifp->if_capenable & IFCAP_TSO) 833 ifp->if_hwassist |= CSUM_TSO; 834 else 835 ifp->if_hwassist &= ~CSUM_TSO; 836 } 837 if (mask & IFCAP_RSS) 838 ifp->if_capenable ^= IFCAP_RSS; 839 if (reinit && (ifp->if_flags & IFF_RUNNING)) 840 igb_init(sc); 841 break; 842 843 default: 844 error = ether_ioctl(ifp, command, data); 845 break; 846 } 847 return error; 848 } 849 850 static void 851 igb_init(void *xsc) 852 { 853 struct igb_softc *sc = xsc; 854 struct ifnet *ifp = &sc->arpcom.ac_if; 855 boolean_t polling; 856 int i; 857 858 ASSERT_IFNET_SERIALIZED_ALL(ifp); 859 860 igb_stop(sc); 861 862 /* Get the latest mac address, User can use a LAA */ 863 bcopy(IF_LLADDR(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN); 864 865 /* Put the address into the Receive Address Array */ 866 e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0); 867 868 igb_reset(sc); 869 igb_update_link_status(sc); 870 871 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 872 873 /* Configure for OS presence */ 874 igb_get_mgmt(sc); 875 876 polling = FALSE; 877 #ifdef DEVICE_POLLING 878 if (ifp->if_flags & IFF_POLLING) 879 polling = TRUE; 880 #endif 881 882 /* Configured used RX/TX rings */ 883 igb_set_ring_inuse(sc, polling); 884 885 /* Initialize interrupt */ 886 igb_init_intr(sc); 887 888 /* Prepare transmit descriptors and buffers */ 889 for (i = 0; i < sc->tx_ring_cnt; ++i) 890 igb_init_tx_ring(&sc->tx_rings[i]); 891 igb_init_tx_unit(sc); 892 893 /* Setup Multicast table */ 894 igb_set_multi(sc); 895 896 #if 0 897 /* 898 * Figure out the desired mbuf pool 899 * for doing jumbo/packetsplit 900 */ 901 if (adapter->max_frame_size <= 2048) 902 adapter->rx_mbuf_sz = MCLBYTES; 903 else if (adapter->max_frame_size <= 4096) 904 adapter->rx_mbuf_sz = MJUMPAGESIZE; 905 else 906 adapter->rx_mbuf_sz = MJUM9BYTES; 907 #endif 908 909 /* Prepare receive descriptors and buffers */ 910 for (i = 0; i < sc->rx_ring_inuse; ++i) { 911 int error; 912 913 error = igb_init_rx_ring(&sc->rx_rings[i]); 914 if (error) { 915 if_printf(ifp, "Could not setup receive structures\n"); 916 igb_stop(sc); 917 return; 918 } 919 } 920 igb_init_rx_unit(sc); 921 922 /* Enable VLAN support */ 923 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) 924 igb_set_vlan(sc); 925 926 /* Don't lose promiscuous settings */ 927 igb_set_promisc(sc); 928 929 ifp->if_flags |= IFF_RUNNING; 930 ifp->if_flags &= ~IFF_OACTIVE; 931 932 callout_reset(&sc->timer, hz, igb_timer, sc); 933 e1000_clear_hw_cntrs_base_generic(&sc->hw); 934 935 #if 0 936 if (adapter->msix > 1) /* Set up queue routing */ 937 igb_configure_queues(adapter); 938 #endif 939 940 /* This clears any pending interrupts */ 941 E1000_READ_REG(&sc->hw, E1000_ICR); 942 943 /* 944 * Only enable interrupts if we are not polling, make sure 945 * they are off otherwise. 946 */ 947 if (polling) { 948 igb_disable_intr(sc); 949 } else { 950 igb_enable_intr(sc); 951 E1000_WRITE_REG(&sc->hw, E1000_ICS, E1000_ICS_LSC); 952 } 953 954 /* Set Energy Efficient Ethernet */ 955 e1000_set_eee_i350(&sc->hw); 956 957 /* Don't reset the phy next time init gets called */ 958 sc->hw.phy.reset_disable = TRUE; 959 } 960 961 static void 962 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 963 { 964 struct igb_softc *sc = ifp->if_softc; 965 u_char fiber_type = IFM_1000_SX; 966 967 ASSERT_IFNET_SERIALIZED_ALL(ifp); 968 969 igb_update_link_status(sc); 970 971 ifmr->ifm_status = IFM_AVALID; 972 ifmr->ifm_active = IFM_ETHER; 973 974 if (!sc->link_active) 975 return; 976 977 ifmr->ifm_status |= IFM_ACTIVE; 978 979 if (sc->hw.phy.media_type == e1000_media_type_fiber || 980 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 981 ifmr->ifm_active |= fiber_type | IFM_FDX; 982 } else { 983 switch (sc->link_speed) { 984 case 10: 985 ifmr->ifm_active |= IFM_10_T; 986 break; 987 988 case 100: 989 ifmr->ifm_active |= IFM_100_TX; 990 break; 991 992 case 1000: 993 ifmr->ifm_active |= IFM_1000_T; 994 break; 995 } 996 if (sc->link_duplex == FULL_DUPLEX) 997 ifmr->ifm_active |= IFM_FDX; 998 else 999 ifmr->ifm_active |= IFM_HDX; 1000 } 1001 } 1002 1003 static int 1004 igb_media_change(struct ifnet *ifp) 1005 { 1006 struct igb_softc *sc = ifp->if_softc; 1007 struct ifmedia *ifm = &sc->media; 1008 1009 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1010 1011 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1012 return EINVAL; 1013 1014 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1015 case IFM_AUTO: 1016 sc->hw.mac.autoneg = DO_AUTO_NEG; 1017 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1018 break; 1019 1020 case IFM_1000_LX: 1021 case IFM_1000_SX: 1022 case IFM_1000_T: 1023 sc->hw.mac.autoneg = DO_AUTO_NEG; 1024 sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1025 break; 1026 1027 case IFM_100_TX: 1028 sc->hw.mac.autoneg = FALSE; 1029 sc->hw.phy.autoneg_advertised = 0; 1030 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1031 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1032 else 1033 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1034 break; 1035 1036 case IFM_10_T: 1037 sc->hw.mac.autoneg = FALSE; 1038 sc->hw.phy.autoneg_advertised = 0; 1039 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1040 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1041 else 1042 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1043 break; 1044 1045 default: 1046 if_printf(ifp, "Unsupported media type\n"); 1047 break; 1048 } 1049 1050 igb_init(sc); 1051 1052 return 0; 1053 } 1054 1055 static void 1056 igb_set_promisc(struct igb_softc *sc) 1057 { 1058 struct ifnet *ifp = &sc->arpcom.ac_if; 1059 struct e1000_hw *hw = &sc->hw; 1060 uint32_t reg; 1061 1062 if (sc->vf_ifp) { 1063 e1000_promisc_set_vf(hw, e1000_promisc_enabled); 1064 return; 1065 } 1066 1067 reg = E1000_READ_REG(hw, E1000_RCTL); 1068 if (ifp->if_flags & IFF_PROMISC) { 1069 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 1070 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1071 } else if (ifp->if_flags & IFF_ALLMULTI) { 1072 reg |= E1000_RCTL_MPE; 1073 reg &= ~E1000_RCTL_UPE; 1074 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1075 } 1076 } 1077 1078 static void 1079 igb_disable_promisc(struct igb_softc *sc) 1080 { 1081 struct e1000_hw *hw = &sc->hw; 1082 uint32_t reg; 1083 1084 if (sc->vf_ifp) { 1085 e1000_promisc_set_vf(hw, e1000_promisc_disabled); 1086 return; 1087 } 1088 reg = E1000_READ_REG(hw, E1000_RCTL); 1089 reg &= ~E1000_RCTL_UPE; 1090 reg &= ~E1000_RCTL_MPE; 1091 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1092 } 1093 1094 static void 1095 igb_set_multi(struct igb_softc *sc) 1096 { 1097 struct ifnet *ifp = &sc->arpcom.ac_if; 1098 struct ifmultiaddr *ifma; 1099 uint32_t reg_rctl = 0; 1100 uint8_t *mta; 1101 int mcnt = 0; 1102 1103 mta = sc->mta; 1104 bzero(mta, ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 1105 1106 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1107 if (ifma->ifma_addr->sa_family != AF_LINK) 1108 continue; 1109 1110 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1111 break; 1112 1113 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1114 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); 1115 mcnt++; 1116 } 1117 1118 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 1119 reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); 1120 reg_rctl |= E1000_RCTL_MPE; 1121 E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); 1122 } else { 1123 e1000_update_mc_addr_list(&sc->hw, mta, mcnt); 1124 } 1125 } 1126 1127 static void 1128 igb_timer(void *xsc) 1129 { 1130 struct igb_softc *sc = xsc; 1131 1132 lwkt_serialize_enter(&sc->main_serialize); 1133 1134 igb_update_link_status(sc); 1135 igb_update_stats_counters(sc); 1136 1137 callout_reset(&sc->timer, hz, igb_timer, sc); 1138 1139 lwkt_serialize_exit(&sc->main_serialize); 1140 } 1141 1142 static void 1143 igb_update_link_status(struct igb_softc *sc) 1144 { 1145 struct ifnet *ifp = &sc->arpcom.ac_if; 1146 struct e1000_hw *hw = &sc->hw; 1147 uint32_t link_check, thstat, ctrl; 1148 1149 link_check = thstat = ctrl = 0; 1150 1151 /* Get the cached link value or read for real */ 1152 switch (hw->phy.media_type) { 1153 case e1000_media_type_copper: 1154 if (hw->mac.get_link_status) { 1155 /* Do the work to read phy */ 1156 e1000_check_for_link(hw); 1157 link_check = !hw->mac.get_link_status; 1158 } else { 1159 link_check = TRUE; 1160 } 1161 break; 1162 1163 case e1000_media_type_fiber: 1164 e1000_check_for_link(hw); 1165 link_check = E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU; 1166 break; 1167 1168 case e1000_media_type_internal_serdes: 1169 e1000_check_for_link(hw); 1170 link_check = hw->mac.serdes_has_link; 1171 break; 1172 1173 /* VF device is type_unknown */ 1174 case e1000_media_type_unknown: 1175 e1000_check_for_link(hw); 1176 link_check = !hw->mac.get_link_status; 1177 /* Fall thru */ 1178 default: 1179 break; 1180 } 1181 1182 /* Check for thermal downshift or shutdown */ 1183 if (hw->mac.type == e1000_i350) { 1184 thstat = E1000_READ_REG(hw, E1000_THSTAT); 1185 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); 1186 } 1187 1188 /* Now we check if a transition has happened */ 1189 if (link_check && sc->link_active == 0) { 1190 e1000_get_speed_and_duplex(hw, 1191 &sc->link_speed, &sc->link_duplex); 1192 if (bootverbose) { 1193 if_printf(ifp, "Link is up %d Mbps %s\n", 1194 sc->link_speed, 1195 sc->link_duplex == FULL_DUPLEX ? 1196 "Full Duplex" : "Half Duplex"); 1197 } 1198 sc->link_active = 1; 1199 1200 ifp->if_baudrate = sc->link_speed * 1000000; 1201 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1202 (thstat & E1000_THSTAT_LINK_THROTTLE)) 1203 if_printf(ifp, "Link: thermal downshift\n"); 1204 /* This can sleep */ 1205 ifp->if_link_state = LINK_STATE_UP; 1206 if_link_state_change(ifp); 1207 } else if (!link_check && sc->link_active == 1) { 1208 ifp->if_baudrate = sc->link_speed = 0; 1209 sc->link_duplex = 0; 1210 if (bootverbose) 1211 if_printf(ifp, "Link is Down\n"); 1212 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1213 (thstat & E1000_THSTAT_PWR_DOWN)) 1214 if_printf(ifp, "Link: thermal shutdown\n"); 1215 sc->link_active = 0; 1216 /* This can sleep */ 1217 ifp->if_link_state = LINK_STATE_DOWN; 1218 if_link_state_change(ifp); 1219 } 1220 } 1221 1222 static void 1223 igb_stop(struct igb_softc *sc) 1224 { 1225 struct ifnet *ifp = &sc->arpcom.ac_if; 1226 int i; 1227 1228 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1229 1230 igb_disable_intr(sc); 1231 1232 callout_stop(&sc->timer); 1233 1234 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE); 1235 ifp->if_timer = 0; 1236 1237 e1000_reset_hw(&sc->hw); 1238 E1000_WRITE_REG(&sc->hw, E1000_WUC, 0); 1239 1240 e1000_led_off(&sc->hw); 1241 e1000_cleanup_led(&sc->hw); 1242 1243 for (i = 0; i < sc->tx_ring_cnt; ++i) 1244 igb_free_tx_ring(&sc->tx_rings[i]); 1245 for (i = 0; i < sc->rx_ring_cnt; ++i) 1246 igb_free_rx_ring(&sc->rx_rings[i]); 1247 } 1248 1249 static void 1250 igb_reset(struct igb_softc *sc) 1251 { 1252 struct ifnet *ifp = &sc->arpcom.ac_if; 1253 struct e1000_hw *hw = &sc->hw; 1254 struct e1000_fc_info *fc = &hw->fc; 1255 uint32_t pba = 0; 1256 uint16_t hwm; 1257 1258 /* Let the firmware know the OS is in control */ 1259 igb_get_hw_control(sc); 1260 1261 /* 1262 * Packet Buffer Allocation (PBA) 1263 * Writing PBA sets the receive portion of the buffer 1264 * the remainder is used for the transmit buffer. 1265 */ 1266 switch (hw->mac.type) { 1267 case e1000_82575: 1268 pba = E1000_PBA_32K; 1269 break; 1270 1271 case e1000_82576: 1272 case e1000_vfadapt: 1273 pba = E1000_READ_REG(hw, E1000_RXPBS); 1274 pba &= E1000_RXPBS_SIZE_MASK_82576; 1275 break; 1276 1277 case e1000_82580: 1278 case e1000_i350: 1279 case e1000_vfadapt_i350: 1280 pba = E1000_READ_REG(hw, E1000_RXPBS); 1281 pba = e1000_rxpbs_adjust_82580(pba); 1282 break; 1283 /* XXX pba = E1000_PBA_35K; */ 1284 1285 default: 1286 break; 1287 } 1288 1289 /* Special needs in case of Jumbo frames */ 1290 if (hw->mac.type == e1000_82575 && ifp->if_mtu > ETHERMTU) { 1291 uint32_t tx_space, min_tx, min_rx; 1292 1293 pba = E1000_READ_REG(hw, E1000_PBA); 1294 tx_space = pba >> 16; 1295 pba &= 0xffff; 1296 1297 min_tx = (sc->max_frame_size + 1298 sizeof(struct e1000_tx_desc) - ETHER_CRC_LEN) * 2; 1299 min_tx = roundup2(min_tx, 1024); 1300 min_tx >>= 10; 1301 min_rx = sc->max_frame_size; 1302 min_rx = roundup2(min_rx, 1024); 1303 min_rx >>= 10; 1304 if (tx_space < min_tx && (min_tx - tx_space) < pba) { 1305 pba = pba - (min_tx - tx_space); 1306 /* 1307 * if short on rx space, rx wins 1308 * and must trump tx adjustment 1309 */ 1310 if (pba < min_rx) 1311 pba = min_rx; 1312 } 1313 E1000_WRITE_REG(hw, E1000_PBA, pba); 1314 } 1315 1316 /* 1317 * These parameters control the automatic generation (Tx) and 1318 * response (Rx) to Ethernet PAUSE frames. 1319 * - High water mark should allow for at least two frames to be 1320 * received after sending an XOFF. 1321 * - Low water mark works best when it is very near the high water mark. 1322 * This allows the receiver to restart by sending XON when it has 1323 * drained a bit. 1324 */ 1325 hwm = min(((pba << 10) * 9 / 10), 1326 ((pba << 10) - 2 * sc->max_frame_size)); 1327 1328 if (hw->mac.type < e1000_82576) { 1329 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ 1330 fc->low_water = fc->high_water - 8; 1331 } else { 1332 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1333 fc->low_water = fc->high_water - 16; 1334 } 1335 fc->pause_time = IGB_FC_PAUSE_TIME; 1336 fc->send_xon = TRUE; 1337 1338 /* Issue a global reset */ 1339 e1000_reset_hw(hw); 1340 E1000_WRITE_REG(hw, E1000_WUC, 0); 1341 1342 if (e1000_init_hw(hw) < 0) 1343 if_printf(ifp, "Hardware Initialization Failed\n"); 1344 1345 /* Setup DMA Coalescing */ 1346 if (hw->mac.type == e1000_i350 && sc->dma_coalesce) { 1347 uint32_t reg; 1348 1349 hwm = (pba - 4) << 10; 1350 reg = ((pba - 6) << E1000_DMACR_DMACTHR_SHIFT) 1351 & E1000_DMACR_DMACTHR_MASK; 1352 1353 /* transition to L0x or L1 if available..*/ 1354 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 1355 1356 /* timer = +-1000 usec in 32usec intervals */ 1357 reg |= (1000 >> 5); 1358 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1359 1360 /* No lower threshold */ 1361 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); 1362 1363 /* set hwm to PBA - 2 * max frame size */ 1364 E1000_WRITE_REG(hw, E1000_FCRTC, hwm); 1365 1366 /* Set the interval before transition */ 1367 reg = E1000_READ_REG(hw, E1000_DMCTLX); 1368 reg |= 0x800000FF; /* 255 usec */ 1369 E1000_WRITE_REG(hw, E1000_DMCTLX, reg); 1370 1371 /* free space in tx packet buffer to wake from DMA coal */ 1372 E1000_WRITE_REG(hw, E1000_DMCTXTH, 1373 (20480 - (2 * sc->max_frame_size)) >> 6); 1374 1375 /* make low power state decision controlled by DMA coal */ 1376 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1377 E1000_WRITE_REG(hw, E1000_PCIEMISC, 1378 reg | E1000_PCIEMISC_LX_DECISION); 1379 if_printf(ifp, "DMA Coalescing enabled\n"); 1380 } 1381 1382 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 1383 e1000_get_phy_info(hw); 1384 e1000_check_for_link(hw); 1385 } 1386 1387 static void 1388 igb_setup_ifp(struct igb_softc *sc) 1389 { 1390 struct ifnet *ifp = &sc->arpcom.ac_if; 1391 1392 if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); 1393 ifp->if_softc = sc; 1394 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1395 ifp->if_init = igb_init; 1396 ifp->if_ioctl = igb_ioctl; 1397 ifp->if_start = igb_start; 1398 ifp->if_serialize = igb_serialize; 1399 ifp->if_deserialize = igb_deserialize; 1400 ifp->if_tryserialize = igb_tryserialize; 1401 #ifdef INVARIANTS 1402 ifp->if_serialize_assert = igb_serialize_assert; 1403 #endif 1404 #ifdef DEVICE_POLLING 1405 ifp->if_poll = igb_poll; 1406 #endif 1407 ifp->if_watchdog = igb_watchdog; 1408 1409 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].num_tx_desc - 1); 1410 ifq_set_ready(&ifp->if_snd); 1411 1412 ether_ifattach(ifp, sc->hw.mac.addr, NULL); 1413 1414 ifp->if_capabilities = 1415 IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_TSO; 1416 if (IGB_ENABLE_HWRSS(sc)) 1417 ifp->if_capabilities |= IFCAP_RSS; 1418 ifp->if_capenable = ifp->if_capabilities; 1419 ifp->if_hwassist = IGB_CSUM_FEATURES | CSUM_TSO; 1420 1421 /* 1422 * Tell the upper layer(s) we support long frames 1423 */ 1424 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 1425 1426 /* 1427 * Specify the media types supported by this adapter and register 1428 * callbacks to update media and link information 1429 */ 1430 ifmedia_init(&sc->media, IFM_IMASK, igb_media_change, igb_media_status); 1431 if (sc->hw.phy.media_type == e1000_media_type_fiber || 1432 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 1433 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 1434 0, NULL); 1435 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); 1436 } else { 1437 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL); 1438 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 1439 0, NULL); 1440 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); 1441 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 1442 0, NULL); 1443 if (sc->hw.phy.type != e1000_phy_ife) { 1444 ifmedia_add(&sc->media, 1445 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 1446 ifmedia_add(&sc->media, 1447 IFM_ETHER | IFM_1000_T, 0, NULL); 1448 } 1449 } 1450 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 1451 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); 1452 } 1453 1454 static void 1455 igb_add_sysctl(struct igb_softc *sc) 1456 { 1457 char node[32]; 1458 int i; 1459 1460 sysctl_ctx_init(&sc->sysctl_ctx); 1461 sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, 1462 SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, 1463 device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1464 if (sc->sysctl_tree == NULL) { 1465 device_printf(sc->dev, "can't add sysctl node\n"); 1466 return; 1467 } 1468 1469 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1470 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings"); 1471 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1472 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0, 1473 "# of RX rings used"); 1474 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1475 OID_AUTO, "rxd", CTLFLAG_RD, &sc->rx_rings[0].num_rx_desc, 0, 1476 "# of RX descs"); 1477 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1478 OID_AUTO, "txd", CTLFLAG_RD, &sc->tx_rings[0].num_tx_desc, 0, 1479 "# of TX descs"); 1480 1481 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 1482 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1483 SYSCTL_CHILDREN(sc->sysctl_tree), 1484 OID_AUTO, "intr_rate", CTLTYPE_INT | CTLFLAG_RW, 1485 sc, 0, igb_sysctl_intr_rate, "I", "interrupt rate"); 1486 } else { 1487 for (i = 0; i < sc->msix_cnt; ++i) { 1488 struct igb_msix_data *msix = &sc->msix_data[i]; 1489 1490 ksnprintf(node, sizeof(node), "msix%d_rate", i); 1491 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1492 SYSCTL_CHILDREN(sc->sysctl_tree), 1493 OID_AUTO, node, CTLTYPE_INT | CTLFLAG_RW, 1494 msix, 0, igb_sysctl_msix_rate, "I", 1495 msix->msix_rate_desc); 1496 } 1497 } 1498 1499 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1500 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1501 sc, 0, igb_sysctl_tx_intr_nsegs, "I", 1502 "# of segments per TX interrupt"); 1503 1504 #ifdef IGB_RSS_DEBUG 1505 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1506 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0, 1507 "RSS debug level"); 1508 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1509 ksnprintf(node, sizeof(node), "rx%d_pkt", i); 1510 SYSCTL_ADD_ULONG(&sc->sysctl_ctx, 1511 SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node, 1512 CTLFLAG_RW, &sc->rx_rings[i].rx_packets, "RXed packets"); 1513 } 1514 #endif 1515 } 1516 1517 static int 1518 igb_alloc_rings(struct igb_softc *sc) 1519 { 1520 int error, i; 1521 1522 /* 1523 * Create top level busdma tag 1524 */ 1525 error = bus_dma_tag_create(NULL, 1, 0, 1526 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1527 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, 1528 &sc->parent_tag); 1529 if (error) { 1530 device_printf(sc->dev, "could not create top level DMA tag\n"); 1531 return error; 1532 } 1533 1534 /* 1535 * Allocate TX descriptor rings and buffers 1536 */ 1537 sc->tx_rings = kmalloc(sizeof(struct igb_tx_ring) * sc->tx_ring_cnt, 1538 M_DEVBUF, M_WAITOK | M_ZERO); 1539 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1540 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1541 1542 /* Set up some basics */ 1543 txr->sc = sc; 1544 txr->me = i; 1545 lwkt_serialize_init(&txr->tx_serialize); 1546 1547 error = igb_create_tx_ring(txr); 1548 if (error) 1549 return error; 1550 } 1551 1552 /* 1553 * Allocate RX descriptor rings and buffers 1554 */ 1555 sc->rx_rings = kmalloc(sizeof(struct igb_rx_ring) * sc->rx_ring_cnt, 1556 M_DEVBUF, M_WAITOK | M_ZERO); 1557 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1558 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1559 1560 /* Set up some basics */ 1561 rxr->sc = sc; 1562 rxr->me = i; 1563 lwkt_serialize_init(&rxr->rx_serialize); 1564 1565 error = igb_create_rx_ring(rxr); 1566 if (error) 1567 return error; 1568 } 1569 1570 return 0; 1571 } 1572 1573 static void 1574 igb_free_rings(struct igb_softc *sc) 1575 { 1576 int i; 1577 1578 if (sc->tx_rings != NULL) { 1579 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1580 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1581 1582 igb_destroy_tx_ring(txr, txr->num_tx_desc); 1583 } 1584 kfree(sc->tx_rings, M_DEVBUF); 1585 } 1586 1587 if (sc->rx_rings != NULL) { 1588 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1589 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1590 1591 igb_destroy_rx_ring(rxr, rxr->num_rx_desc); 1592 } 1593 kfree(sc->rx_rings, M_DEVBUF); 1594 } 1595 } 1596 1597 static int 1598 igb_create_tx_ring(struct igb_tx_ring *txr) 1599 { 1600 int tsize, error, i; 1601 1602 /* 1603 * Validate number of transmit descriptors. It must not exceed 1604 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1605 */ 1606 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || 1607 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { 1608 device_printf(txr->sc->dev, 1609 "Using %d TX descriptors instead of %d!\n", 1610 IGB_DEFAULT_TXD, igb_txd); 1611 txr->num_tx_desc = IGB_DEFAULT_TXD; 1612 } else { 1613 txr->num_tx_desc = igb_txd; 1614 } 1615 1616 /* 1617 * Allocate TX descriptor ring 1618 */ 1619 tsize = roundup2(txr->num_tx_desc * sizeof(union e1000_adv_tx_desc), 1620 IGB_DBA_ALIGN); 1621 txr->txdma.dma_vaddr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1622 IGB_DBA_ALIGN, tsize, BUS_DMA_WAITOK, 1623 &txr->txdma.dma_tag, &txr->txdma.dma_map, &txr->txdma.dma_paddr); 1624 if (txr->txdma.dma_vaddr == NULL) { 1625 device_printf(txr->sc->dev, 1626 "Unable to allocate TX Descriptor memory\n"); 1627 return ENOMEM; 1628 } 1629 txr->tx_base = txr->txdma.dma_vaddr; 1630 bzero(txr->tx_base, tsize); 1631 1632 txr->tx_buf = kmalloc(sizeof(struct igb_tx_buf) * txr->num_tx_desc, 1633 M_DEVBUF, M_WAITOK | M_ZERO); 1634 1635 /* 1636 * Allocate TX head write-back buffer 1637 */ 1638 txr->tx_hdr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1639 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK, 1640 &txr->tx_hdr_dtag, &txr->tx_hdr_dmap, &txr->tx_hdr_paddr); 1641 if (txr->tx_hdr == NULL) { 1642 device_printf(txr->sc->dev, 1643 "Unable to allocate TX head write-back buffer\n"); 1644 return ENOMEM; 1645 } 1646 1647 /* 1648 * Create DMA tag for TX buffers 1649 */ 1650 error = bus_dma_tag_create(txr->sc->parent_tag, 1651 1, 0, /* alignment, bounds */ 1652 BUS_SPACE_MAXADDR, /* lowaddr */ 1653 BUS_SPACE_MAXADDR, /* highaddr */ 1654 NULL, NULL, /* filter, filterarg */ 1655 IGB_TSO_SIZE, /* maxsize */ 1656 IGB_MAX_SCATTER, /* nsegments */ 1657 PAGE_SIZE, /* maxsegsize */ 1658 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 1659 BUS_DMA_ONEBPAGE, /* flags */ 1660 &txr->tx_tag); 1661 if (error) { 1662 device_printf(txr->sc->dev, "Unable to allocate TX DMA tag\n"); 1663 kfree(txr->tx_buf, M_DEVBUF); 1664 txr->tx_buf = NULL; 1665 return error; 1666 } 1667 1668 /* 1669 * Create DMA maps for TX buffers 1670 */ 1671 for (i = 0; i < txr->num_tx_desc; ++i) { 1672 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1673 1674 error = bus_dmamap_create(txr->tx_tag, 1675 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map); 1676 if (error) { 1677 device_printf(txr->sc->dev, 1678 "Unable to create TX DMA map\n"); 1679 igb_destroy_tx_ring(txr, i); 1680 return error; 1681 } 1682 } 1683 1684 /* 1685 * Initialize various watermark 1686 */ 1687 txr->spare_desc = IGB_TX_SPARE; 1688 txr->intr_nsegs = txr->num_tx_desc / 16; 1689 txr->oact_hi_desc = txr->num_tx_desc / 2; 1690 txr->oact_lo_desc = txr->num_tx_desc / 8; 1691 if (txr->oact_lo_desc > IGB_TX_OACTIVE_MAX) 1692 txr->oact_lo_desc = IGB_TX_OACTIVE_MAX; 1693 if (txr->oact_lo_desc < txr->spare_desc + IGB_TX_RESERVED) 1694 txr->oact_lo_desc = txr->spare_desc + IGB_TX_RESERVED; 1695 1696 return 0; 1697 } 1698 1699 static void 1700 igb_free_tx_ring(struct igb_tx_ring *txr) 1701 { 1702 int i; 1703 1704 for (i = 0; i < txr->num_tx_desc; ++i) { 1705 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1706 1707 if (txbuf->m_head != NULL) { 1708 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1709 m_freem(txbuf->m_head); 1710 txbuf->m_head = NULL; 1711 } 1712 } 1713 } 1714 1715 static void 1716 igb_destroy_tx_ring(struct igb_tx_ring *txr, int ndesc) 1717 { 1718 int i; 1719 1720 if (txr->txdma.dma_vaddr != NULL) { 1721 bus_dmamap_unload(txr->txdma.dma_tag, txr->txdma.dma_map); 1722 bus_dmamem_free(txr->txdma.dma_tag, txr->txdma.dma_vaddr, 1723 txr->txdma.dma_map); 1724 bus_dma_tag_destroy(txr->txdma.dma_tag); 1725 txr->txdma.dma_vaddr = NULL; 1726 } 1727 1728 if (txr->tx_hdr != NULL) { 1729 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_dmap); 1730 bus_dmamem_free(txr->tx_hdr_dtag, txr->tx_hdr, 1731 txr->tx_hdr_dmap); 1732 bus_dma_tag_destroy(txr->tx_hdr_dtag); 1733 txr->tx_hdr = NULL; 1734 } 1735 1736 if (txr->tx_buf == NULL) 1737 return; 1738 1739 for (i = 0; i < ndesc; ++i) { 1740 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1741 1742 KKASSERT(txbuf->m_head == NULL); 1743 bus_dmamap_destroy(txr->tx_tag, txbuf->map); 1744 } 1745 bus_dma_tag_destroy(txr->tx_tag); 1746 1747 kfree(txr->tx_buf, M_DEVBUF); 1748 txr->tx_buf = NULL; 1749 } 1750 1751 static void 1752 igb_init_tx_ring(struct igb_tx_ring *txr) 1753 { 1754 /* Clear the old descriptor contents */ 1755 bzero(txr->tx_base, 1756 sizeof(union e1000_adv_tx_desc) * txr->num_tx_desc); 1757 1758 /* Clear TX head write-back buffer */ 1759 *(txr->tx_hdr) = 0; 1760 1761 /* Reset indices */ 1762 txr->next_avail_desc = 0; 1763 txr->next_to_clean = 0; 1764 txr->tx_nsegs = 0; 1765 1766 /* Set number of descriptors available */ 1767 txr->tx_avail = txr->num_tx_desc; 1768 } 1769 1770 static void 1771 igb_init_tx_unit(struct igb_softc *sc) 1772 { 1773 struct e1000_hw *hw = &sc->hw; 1774 uint32_t tctl; 1775 int i; 1776 1777 /* Setup the Tx Descriptor Rings */ 1778 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1779 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1780 uint64_t bus_addr = txr->txdma.dma_paddr; 1781 uint64_t hdr_paddr = txr->tx_hdr_paddr; 1782 uint32_t txdctl = 0; 1783 uint32_t dca_txctrl; 1784 1785 E1000_WRITE_REG(hw, E1000_TDLEN(i), 1786 txr->num_tx_desc * sizeof(struct e1000_tx_desc)); 1787 E1000_WRITE_REG(hw, E1000_TDBAH(i), 1788 (uint32_t)(bus_addr >> 32)); 1789 E1000_WRITE_REG(hw, E1000_TDBAL(i), 1790 (uint32_t)bus_addr); 1791 1792 /* Setup the HW Tx Head and Tail descriptor pointers */ 1793 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 1794 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 1795 1796 /* 1797 * WTHRESH is ignored by the hardware, since header 1798 * write back mode is used. 1799 */ 1800 txdctl |= IGB_TX_PTHRESH; 1801 txdctl |= IGB_TX_HTHRESH << 8; 1802 txdctl |= IGB_TX_WTHRESH << 16; 1803 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 1804 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 1805 1806 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); 1807 dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN; 1808 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(i), dca_txctrl); 1809 1810 /* 1811 * Don't set WB_on_EITR: 1812 * - 82575 does not have it 1813 * - It almost has no effect on 82576, see: 1814 * 82576 specification update errata #26 1815 * - It causes unnecessary bus traffic 1816 */ 1817 E1000_WRITE_REG(hw, E1000_TDWBAH(i), 1818 (uint32_t)(hdr_paddr >> 32)); 1819 E1000_WRITE_REG(hw, E1000_TDWBAL(i), 1820 ((uint32_t)hdr_paddr) | E1000_TX_HEAD_WB_ENABLE); 1821 } 1822 1823 if (sc->vf_ifp) 1824 return; 1825 1826 e1000_config_collision_dist(hw); 1827 1828 /* Program the Transmit Control Register */ 1829 tctl = E1000_READ_REG(hw, E1000_TCTL); 1830 tctl &= ~E1000_TCTL_CT; 1831 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 1832 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 1833 1834 /* This write will effectively turn on the transmit unit. */ 1835 E1000_WRITE_REG(hw, E1000_TCTL, tctl); 1836 } 1837 1838 static boolean_t 1839 igb_txcsum_ctx(struct igb_tx_ring *txr, struct mbuf *mp) 1840 { 1841 struct e1000_adv_tx_context_desc *TXD; 1842 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 1843 int ehdrlen, ctxd, ip_hlen = 0; 1844 boolean_t offload = TRUE; 1845 1846 if ((mp->m_pkthdr.csum_flags & IGB_CSUM_FEATURES) == 0) 1847 offload = FALSE; 1848 1849 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 1850 1851 ctxd = txr->next_avail_desc; 1852 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 1853 1854 /* 1855 * In advanced descriptors the vlan tag must 1856 * be placed into the context descriptor, thus 1857 * we need to be here just for that setup. 1858 */ 1859 if (mp->m_flags & M_VLANTAG) { 1860 uint16_t vlantag; 1861 1862 vlantag = htole16(mp->m_pkthdr.ether_vlantag); 1863 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 1864 } else if (!offload) { 1865 return FALSE; 1866 } 1867 1868 ehdrlen = mp->m_pkthdr.csum_lhlen; 1869 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 1870 1871 /* Set the ether header length */ 1872 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 1873 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 1874 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 1875 ip_hlen = mp->m_pkthdr.csum_iphlen; 1876 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 1877 } 1878 vlan_macip_lens |= ip_hlen; 1879 1880 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 1881 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 1882 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 1883 else if (mp->m_pkthdr.csum_flags & CSUM_UDP) 1884 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; 1885 1886 /* 82575 needs the queue index added */ 1887 if (txr->sc->hw.mac.type == e1000_82575) 1888 mss_l4len_idx = txr->me << 4; 1889 1890 /* Now copy bits into descriptor */ 1891 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 1892 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 1893 TXD->seqnum_seed = htole32(0); 1894 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 1895 1896 /* We've consumed the first desc, adjust counters */ 1897 if (++ctxd == txr->num_tx_desc) 1898 ctxd = 0; 1899 txr->next_avail_desc = ctxd; 1900 --txr->tx_avail; 1901 1902 return offload; 1903 } 1904 1905 static void 1906 igb_txeof(struct igb_tx_ring *txr) 1907 { 1908 struct ifnet *ifp = &txr->sc->arpcom.ac_if; 1909 int first, hdr, avail; 1910 1911 if (txr->tx_avail == txr->num_tx_desc) 1912 return; 1913 1914 first = txr->next_to_clean; 1915 hdr = *(txr->tx_hdr); 1916 1917 if (first == hdr) 1918 return; 1919 1920 avail = txr->tx_avail; 1921 while (first != hdr) { 1922 struct igb_tx_buf *txbuf = &txr->tx_buf[first]; 1923 1924 ++avail; 1925 if (txbuf->m_head) { 1926 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1927 m_freem(txbuf->m_head); 1928 txbuf->m_head = NULL; 1929 ++ifp->if_opackets; 1930 } 1931 if (++first == txr->num_tx_desc) 1932 first = 0; 1933 } 1934 txr->next_to_clean = first; 1935 txr->tx_avail = avail; 1936 1937 /* 1938 * If we have a minimum free, clear IFF_OACTIVE 1939 * to tell the stack that it is OK to send packets. 1940 */ 1941 if (IGB_IS_NOT_OACTIVE(txr)) { 1942 ifp->if_flags &= ~IFF_OACTIVE; 1943 1944 /* 1945 * We have enough TX descriptors, turn off 1946 * the watchdog. We allow small amount of 1947 * packets (roughly intr_nsegs) pending on 1948 * the transmit ring. 1949 */ 1950 ifp->if_timer = 0; 1951 } 1952 } 1953 1954 static int 1955 igb_create_rx_ring(struct igb_rx_ring *rxr) 1956 { 1957 int rsize, i, error; 1958 1959 /* 1960 * Validate number of receive descriptors. It must not exceed 1961 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1962 */ 1963 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || 1964 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { 1965 device_printf(rxr->sc->dev, 1966 "Using %d RX descriptors instead of %d!\n", 1967 IGB_DEFAULT_RXD, igb_rxd); 1968 rxr->num_rx_desc = IGB_DEFAULT_RXD; 1969 } else { 1970 rxr->num_rx_desc = igb_rxd; 1971 } 1972 1973 /* 1974 * Allocate RX descriptor ring 1975 */ 1976 rsize = roundup2(rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc), 1977 IGB_DBA_ALIGN); 1978 rxr->rxdma.dma_vaddr = bus_dmamem_coherent_any(rxr->sc->parent_tag, 1979 IGB_DBA_ALIGN, rsize, BUS_DMA_WAITOK, 1980 &rxr->rxdma.dma_tag, &rxr->rxdma.dma_map, 1981 &rxr->rxdma.dma_paddr); 1982 if (rxr->rxdma.dma_vaddr == NULL) { 1983 device_printf(rxr->sc->dev, 1984 "Unable to allocate RxDescriptor memory\n"); 1985 return ENOMEM; 1986 } 1987 rxr->rx_base = rxr->rxdma.dma_vaddr; 1988 bzero(rxr->rx_base, rsize); 1989 1990 rxr->rx_buf = kmalloc(sizeof(struct igb_rx_buf) * rxr->num_rx_desc, 1991 M_DEVBUF, M_WAITOK | M_ZERO); 1992 1993 /* 1994 * Create DMA tag for RX buffers 1995 */ 1996 error = bus_dma_tag_create(rxr->sc->parent_tag, 1997 1, 0, /* alignment, bounds */ 1998 BUS_SPACE_MAXADDR, /* lowaddr */ 1999 BUS_SPACE_MAXADDR, /* highaddr */ 2000 NULL, NULL, /* filter, filterarg */ 2001 MCLBYTES, /* maxsize */ 2002 1, /* nsegments */ 2003 MCLBYTES, /* maxsegsize */ 2004 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */ 2005 &rxr->rx_tag); 2006 if (error) { 2007 device_printf(rxr->sc->dev, 2008 "Unable to create RX payload DMA tag\n"); 2009 kfree(rxr->rx_buf, M_DEVBUF); 2010 rxr->rx_buf = NULL; 2011 return error; 2012 } 2013 2014 /* 2015 * Create spare DMA map for RX buffers 2016 */ 2017 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK, 2018 &rxr->rx_sparemap); 2019 if (error) { 2020 device_printf(rxr->sc->dev, 2021 "Unable to create spare RX DMA maps\n"); 2022 bus_dma_tag_destroy(rxr->rx_tag); 2023 kfree(rxr->rx_buf, M_DEVBUF); 2024 rxr->rx_buf = NULL; 2025 return error; 2026 } 2027 2028 /* 2029 * Create DMA maps for RX buffers 2030 */ 2031 for (i = 0; i < rxr->num_rx_desc; i++) { 2032 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2033 2034 error = bus_dmamap_create(rxr->rx_tag, 2035 BUS_DMA_WAITOK, &rxbuf->map); 2036 if (error) { 2037 device_printf(rxr->sc->dev, 2038 "Unable to create RX DMA maps\n"); 2039 igb_destroy_rx_ring(rxr, i); 2040 return error; 2041 } 2042 } 2043 return 0; 2044 } 2045 2046 static void 2047 igb_free_rx_ring(struct igb_rx_ring *rxr) 2048 { 2049 int i; 2050 2051 for (i = 0; i < rxr->num_rx_desc; ++i) { 2052 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2053 2054 if (rxbuf->m_head != NULL) { 2055 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2056 m_freem(rxbuf->m_head); 2057 rxbuf->m_head = NULL; 2058 } 2059 } 2060 2061 if (rxr->fmp != NULL) 2062 m_freem(rxr->fmp); 2063 rxr->fmp = NULL; 2064 rxr->lmp = NULL; 2065 } 2066 2067 static void 2068 igb_destroy_rx_ring(struct igb_rx_ring *rxr, int ndesc) 2069 { 2070 int i; 2071 2072 if (rxr->rxdma.dma_vaddr != NULL) { 2073 bus_dmamap_unload(rxr->rxdma.dma_tag, rxr->rxdma.dma_map); 2074 bus_dmamem_free(rxr->rxdma.dma_tag, rxr->rxdma.dma_vaddr, 2075 rxr->rxdma.dma_map); 2076 bus_dma_tag_destroy(rxr->rxdma.dma_tag); 2077 rxr->rxdma.dma_vaddr = NULL; 2078 } 2079 2080 if (rxr->rx_buf == NULL) 2081 return; 2082 2083 for (i = 0; i < ndesc; ++i) { 2084 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2085 2086 KKASSERT(rxbuf->m_head == NULL); 2087 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map); 2088 } 2089 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap); 2090 bus_dma_tag_destroy(rxr->rx_tag); 2091 2092 kfree(rxr->rx_buf, M_DEVBUF); 2093 rxr->rx_buf = NULL; 2094 } 2095 2096 static void 2097 igb_setup_rxdesc(union e1000_adv_rx_desc *rxd, const struct igb_rx_buf *rxbuf) 2098 { 2099 rxd->read.pkt_addr = htole64(rxbuf->paddr); 2100 rxd->wb.upper.status_error = 0; 2101 } 2102 2103 static int 2104 igb_newbuf(struct igb_rx_ring *rxr, int i, boolean_t wait) 2105 { 2106 struct mbuf *m; 2107 bus_dma_segment_t seg; 2108 bus_dmamap_t map; 2109 struct igb_rx_buf *rxbuf; 2110 int error, nseg; 2111 2112 m = m_getcl(wait ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR); 2113 if (m == NULL) { 2114 if (wait) { 2115 if_printf(&rxr->sc->arpcom.ac_if, 2116 "Unable to allocate RX mbuf\n"); 2117 } 2118 return ENOBUFS; 2119 } 2120 m->m_len = m->m_pkthdr.len = MCLBYTES; 2121 2122 if (rxr->sc->max_frame_size <= MCLBYTES - ETHER_ALIGN) 2123 m_adj(m, ETHER_ALIGN); 2124 2125 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag, 2126 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT); 2127 if (error) { 2128 m_freem(m); 2129 if (wait) { 2130 if_printf(&rxr->sc->arpcom.ac_if, 2131 "Unable to load RX mbuf\n"); 2132 } 2133 return error; 2134 } 2135 2136 rxbuf = &rxr->rx_buf[i]; 2137 if (rxbuf->m_head != NULL) 2138 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2139 2140 map = rxbuf->map; 2141 rxbuf->map = rxr->rx_sparemap; 2142 rxr->rx_sparemap = map; 2143 2144 rxbuf->m_head = m; 2145 rxbuf->paddr = seg.ds_addr; 2146 2147 igb_setup_rxdesc(&rxr->rx_base[i], rxbuf); 2148 return 0; 2149 } 2150 2151 static int 2152 igb_init_rx_ring(struct igb_rx_ring *rxr) 2153 { 2154 int i; 2155 2156 /* Clear the ring contents */ 2157 bzero(rxr->rx_base, 2158 rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc)); 2159 2160 /* Now replenish the ring mbufs */ 2161 for (i = 0; i < rxr->num_rx_desc; ++i) { 2162 int error; 2163 2164 error = igb_newbuf(rxr, i, TRUE); 2165 if (error) 2166 return error; 2167 } 2168 2169 /* Setup our descriptor indices */ 2170 rxr->next_to_check = 0; 2171 2172 rxr->fmp = NULL; 2173 rxr->lmp = NULL; 2174 rxr->discard = FALSE; 2175 2176 return 0; 2177 } 2178 2179 static void 2180 igb_init_rx_unit(struct igb_softc *sc) 2181 { 2182 struct ifnet *ifp = &sc->arpcom.ac_if; 2183 struct e1000_hw *hw = &sc->hw; 2184 uint32_t rctl, rxcsum, srrctl = 0; 2185 int i; 2186 2187 /* 2188 * Make sure receives are disabled while setting 2189 * up the descriptor ring 2190 */ 2191 rctl = E1000_READ_REG(hw, E1000_RCTL); 2192 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2193 2194 #if 0 2195 /* 2196 ** Set up for header split 2197 */ 2198 if (igb_header_split) { 2199 /* Use a standard mbuf for the header */ 2200 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 2201 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 2202 } else 2203 #endif 2204 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 2205 2206 /* 2207 ** Set up for jumbo frames 2208 */ 2209 if (ifp->if_mtu > ETHERMTU) { 2210 rctl |= E1000_RCTL_LPE; 2211 #if 0 2212 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { 2213 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2214 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 2215 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { 2216 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2217 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 2218 } 2219 /* Set maximum packet len */ 2220 psize = adapter->max_frame_size; 2221 /* are we on a vlan? */ 2222 if (adapter->ifp->if_vlantrunk != NULL) 2223 psize += VLAN_TAG_SIZE; 2224 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); 2225 #else 2226 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2227 rctl |= E1000_RCTL_SZ_2048; 2228 #endif 2229 } else { 2230 rctl &= ~E1000_RCTL_LPE; 2231 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2232 rctl |= E1000_RCTL_SZ_2048; 2233 } 2234 2235 /* Setup the Base and Length of the Rx Descriptor Rings */ 2236 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2237 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2238 uint64_t bus_addr = rxr->rxdma.dma_paddr; 2239 uint32_t rxdctl; 2240 2241 E1000_WRITE_REG(hw, E1000_RDLEN(i), 2242 rxr->num_rx_desc * sizeof(struct e1000_rx_desc)); 2243 E1000_WRITE_REG(hw, E1000_RDBAH(i), 2244 (uint32_t)(bus_addr >> 32)); 2245 E1000_WRITE_REG(hw, E1000_RDBAL(i), 2246 (uint32_t)bus_addr); 2247 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); 2248 /* Enable this Queue */ 2249 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 2250 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 2251 rxdctl &= 0xFFF00000; 2252 rxdctl |= IGB_RX_PTHRESH; 2253 rxdctl |= IGB_RX_HTHRESH << 8; 2254 /* 2255 * Don't set WTHRESH to a value above 1 on 82576, see: 2256 * 82576 specification update errata #26 2257 */ 2258 rxdctl |= IGB_RX_WTHRESH << 16; 2259 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 2260 } 2261 2262 rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM); 2263 rxcsum &= ~(E1000_RXCSUM_PCSS_MASK | E1000_RXCSUM_IPPCSE); 2264 2265 /* 2266 * Receive Checksum Offload for TCP and UDP 2267 * 2268 * Checksum offloading is also enabled if multiple receive 2269 * queue is to be supported, since we need it to figure out 2270 * fragments. 2271 */ 2272 if ((ifp->if_capenable & IFCAP_RXCSUM) || IGB_ENABLE_HWRSS(sc)) { 2273 /* 2274 * NOTE: 2275 * PCSD must be enabled to enable multiple 2276 * receive queues. 2277 */ 2278 rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2279 E1000_RXCSUM_PCSD; 2280 } else { 2281 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2282 E1000_RXCSUM_PCSD); 2283 } 2284 E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum); 2285 2286 if (IGB_ENABLE_HWRSS(sc)) { 2287 uint8_t key[IGB_NRSSRK * IGB_RSSRK_SIZE]; 2288 uint32_t reta_shift; 2289 int j, r; 2290 2291 /* 2292 * NOTE: 2293 * When we reach here, RSS has already been disabled 2294 * in igb_stop(), so we could safely configure RSS key 2295 * and redirect table. 2296 */ 2297 2298 /* 2299 * Configure RSS key 2300 */ 2301 toeplitz_get_key(key, sizeof(key)); 2302 for (i = 0; i < IGB_NRSSRK; ++i) { 2303 uint32_t rssrk; 2304 2305 rssrk = IGB_RSSRK_VAL(key, i); 2306 IGB_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk); 2307 2308 E1000_WRITE_REG(hw, E1000_RSSRK(i), rssrk); 2309 } 2310 2311 /* 2312 * Configure RSS redirect table in following fashion: 2313 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 2314 */ 2315 reta_shift = IGB_RETA_SHIFT; 2316 if (hw->mac.type == e1000_82575) 2317 reta_shift = IGB_RETA_SHIFT_82575; 2318 2319 r = 0; 2320 for (j = 0; j < IGB_NRETA; ++j) { 2321 uint32_t reta = 0; 2322 2323 for (i = 0; i < IGB_RETA_SIZE; ++i) { 2324 uint32_t q; 2325 2326 q = (r % sc->rx_ring_inuse) << reta_shift; 2327 reta |= q << (8 * i); 2328 ++r; 2329 } 2330 IGB_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta); 2331 E1000_WRITE_REG(hw, E1000_RETA(j), reta); 2332 } 2333 2334 /* 2335 * Enable multiple receive queues. 2336 * Enable IPv4 RSS standard hash functions. 2337 * Disable RSS interrupt on 82575 2338 */ 2339 E1000_WRITE_REG(&sc->hw, E1000_MRQC, 2340 E1000_MRQC_ENABLE_RSS_4Q | 2341 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2342 E1000_MRQC_RSS_FIELD_IPV4); 2343 } 2344 2345 /* Setup the Receive Control Register */ 2346 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2347 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | 2348 E1000_RCTL_RDMTS_HALF | 2349 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2350 /* Strip CRC bytes. */ 2351 rctl |= E1000_RCTL_SECRC; 2352 /* Make sure VLAN Filters are off */ 2353 rctl &= ~E1000_RCTL_VFE; 2354 /* Don't store bad packets */ 2355 rctl &= ~E1000_RCTL_SBP; 2356 2357 /* Enable Receives */ 2358 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 2359 2360 /* 2361 * Setup the HW Rx Head and Tail Descriptor Pointers 2362 * - needs to be after enable 2363 */ 2364 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2365 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2366 2367 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); 2368 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->num_rx_desc - 1); 2369 } 2370 } 2371 2372 static void 2373 igb_rxeof(struct igb_rx_ring *rxr, int count) 2374 { 2375 struct ifnet *ifp = &rxr->sc->arpcom.ac_if; 2376 union e1000_adv_rx_desc *cur; 2377 uint32_t staterr; 2378 int i; 2379 2380 i = rxr->next_to_check; 2381 cur = &rxr->rx_base[i]; 2382 staterr = le32toh(cur->wb.upper.status_error); 2383 2384 if ((staterr & E1000_RXD_STAT_DD) == 0) 2385 return; 2386 2387 while ((staterr & E1000_RXD_STAT_DD) && count != 0) { 2388 struct pktinfo *pi = NULL, pi0; 2389 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2390 struct mbuf *m = NULL; 2391 boolean_t eop; 2392 2393 eop = (staterr & E1000_RXD_STAT_EOP) ? TRUE : FALSE; 2394 if (eop) 2395 --count; 2396 2397 if ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) == 0 && 2398 !rxr->discard) { 2399 struct mbuf *mp = rxbuf->m_head; 2400 uint32_t hash, hashtype; 2401 uint16_t vlan; 2402 int len; 2403 2404 len = le16toh(cur->wb.upper.length); 2405 if (rxr->sc->hw.mac.type == e1000_i350 && 2406 (staterr & E1000_RXDEXT_STATERR_LB)) 2407 vlan = be16toh(cur->wb.upper.vlan); 2408 else 2409 vlan = le16toh(cur->wb.upper.vlan); 2410 2411 hash = le32toh(cur->wb.lower.hi_dword.rss); 2412 hashtype = le32toh(cur->wb.lower.lo_dword.data) & 2413 E1000_RXDADV_RSSTYPE_MASK; 2414 2415 IGB_RSS_DPRINTF(rxr->sc, 10, 2416 "ring%d, hash 0x%08x, hashtype %u\n", 2417 rxr->me, hash, hashtype); 2418 2419 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, 2420 BUS_DMASYNC_POSTREAD); 2421 2422 if (igb_newbuf(rxr, i, FALSE) != 0) { 2423 ifp->if_iqdrops++; 2424 goto discard; 2425 } 2426 2427 mp->m_len = len; 2428 if (rxr->fmp == NULL) { 2429 mp->m_pkthdr.len = len; 2430 rxr->fmp = mp; 2431 rxr->lmp = mp; 2432 } else { 2433 rxr->lmp->m_next = mp; 2434 rxr->lmp = rxr->lmp->m_next; 2435 rxr->fmp->m_pkthdr.len += len; 2436 } 2437 2438 if (eop) { 2439 m = rxr->fmp; 2440 rxr->fmp = NULL; 2441 rxr->lmp = NULL; 2442 2443 m->m_pkthdr.rcvif = ifp; 2444 ifp->if_ipackets++; 2445 2446 if (ifp->if_capenable & IFCAP_RXCSUM) 2447 igb_rxcsum(staterr, m); 2448 2449 if (staterr & E1000_RXD_STAT_VP) { 2450 m->m_pkthdr.ether_vlantag = vlan; 2451 m->m_flags |= M_VLANTAG; 2452 } 2453 2454 if (ifp->if_capenable & IFCAP_RSS) { 2455 pi = igb_rssinfo(m, &pi0, 2456 hash, hashtype, staterr); 2457 } 2458 #ifdef IGB_RSS_DEBUG 2459 rxr->rx_packets++; 2460 #endif 2461 } 2462 } else { 2463 ifp->if_ierrors++; 2464 discard: 2465 igb_setup_rxdesc(cur, rxbuf); 2466 if (!eop) 2467 rxr->discard = TRUE; 2468 else 2469 rxr->discard = FALSE; 2470 if (rxr->fmp != NULL) { 2471 m_freem(rxr->fmp); 2472 rxr->fmp = NULL; 2473 rxr->lmp = NULL; 2474 } 2475 m = NULL; 2476 } 2477 2478 if (m != NULL) 2479 ether_input_pkt(ifp, m, pi); 2480 2481 /* Advance our pointers to the next descriptor. */ 2482 if (++i == rxr->num_rx_desc) 2483 i = 0; 2484 2485 cur = &rxr->rx_base[i]; 2486 staterr = le32toh(cur->wb.upper.status_error); 2487 } 2488 rxr->next_to_check = i; 2489 2490 if (--i < 0) 2491 i = rxr->num_rx_desc - 1; 2492 E1000_WRITE_REG(&rxr->sc->hw, E1000_RDT(rxr->me), i); 2493 } 2494 2495 2496 static void 2497 igb_set_vlan(struct igb_softc *sc) 2498 { 2499 struct e1000_hw *hw = &sc->hw; 2500 uint32_t reg; 2501 #if 0 2502 struct ifnet *ifp = sc->arpcom.ac_if; 2503 #endif 2504 2505 if (sc->vf_ifp) { 2506 e1000_rlpml_set_vf(hw, sc->max_frame_size + VLAN_TAG_SIZE); 2507 return; 2508 } 2509 2510 reg = E1000_READ_REG(hw, E1000_CTRL); 2511 reg |= E1000_CTRL_VME; 2512 E1000_WRITE_REG(hw, E1000_CTRL, reg); 2513 2514 #if 0 2515 /* Enable the Filter Table */ 2516 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { 2517 reg = E1000_READ_REG(hw, E1000_RCTL); 2518 reg &= ~E1000_RCTL_CFIEN; 2519 reg |= E1000_RCTL_VFE; 2520 E1000_WRITE_REG(hw, E1000_RCTL, reg); 2521 } 2522 #endif 2523 2524 /* Update the frame size */ 2525 E1000_WRITE_REG(&sc->hw, E1000_RLPML, 2526 sc->max_frame_size + VLAN_TAG_SIZE); 2527 2528 #if 0 2529 /* Don't bother with table if no vlans */ 2530 if ((adapter->num_vlans == 0) || 2531 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) 2532 return; 2533 /* 2534 ** A soft reset zero's out the VFTA, so 2535 ** we need to repopulate it now. 2536 */ 2537 for (int i = 0; i < IGB_VFTA_SIZE; i++) 2538 if (adapter->shadow_vfta[i] != 0) { 2539 if (adapter->vf_ifp) 2540 e1000_vfta_set_vf(hw, 2541 adapter->shadow_vfta[i], TRUE); 2542 else 2543 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 2544 i, adapter->shadow_vfta[i]); 2545 } 2546 #endif 2547 } 2548 2549 static void 2550 igb_enable_intr(struct igb_softc *sc) 2551 { 2552 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2553 lwkt_serialize_handler_enable(&sc->main_serialize); 2554 } else { 2555 int i; 2556 2557 for (i = 0; i < sc->msix_cnt; ++i) { 2558 lwkt_serialize_handler_enable( 2559 sc->msix_data[i].msix_serialize); 2560 } 2561 } 2562 2563 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2564 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 2565 E1000_WRITE_REG(&sc->hw, E1000_EIAC, sc->intr_mask); 2566 else 2567 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2568 E1000_WRITE_REG(&sc->hw, E1000_EIAM, sc->intr_mask); 2569 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 2570 E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); 2571 } else { 2572 E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK); 2573 } 2574 E1000_WRITE_FLUSH(&sc->hw); 2575 } 2576 2577 static void 2578 igb_disable_intr(struct igb_softc *sc) 2579 { 2580 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2581 E1000_WRITE_REG(&sc->hw, E1000_EIMC, 0xffffffff); 2582 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2583 } 2584 E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff); 2585 E1000_WRITE_FLUSH(&sc->hw); 2586 2587 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2588 lwkt_serialize_handler_disable(&sc->main_serialize); 2589 } else { 2590 int i; 2591 2592 for (i = 0; i < sc->msix_cnt; ++i) { 2593 lwkt_serialize_handler_disable( 2594 sc->msix_data[i].msix_serialize); 2595 } 2596 } 2597 } 2598 2599 /* 2600 * Bit of a misnomer, what this really means is 2601 * to enable OS management of the system... aka 2602 * to disable special hardware management features 2603 */ 2604 static void 2605 igb_get_mgmt(struct igb_softc *sc) 2606 { 2607 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2608 int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H); 2609 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2610 2611 /* disable hardware interception of ARP */ 2612 manc &= ~E1000_MANC_ARP_EN; 2613 2614 /* enable receiving management packets to the host */ 2615 manc |= E1000_MANC_EN_MNG2HOST; 2616 manc2h |= 1 << 5; /* Mng Port 623 */ 2617 manc2h |= 1 << 6; /* Mng Port 664 */ 2618 E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h); 2619 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2620 } 2621 } 2622 2623 /* 2624 * Give control back to hardware management controller 2625 * if there is one. 2626 */ 2627 static void 2628 igb_rel_mgmt(struct igb_softc *sc) 2629 { 2630 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2631 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2632 2633 /* Re-enable hardware interception of ARP */ 2634 manc |= E1000_MANC_ARP_EN; 2635 manc &= ~E1000_MANC_EN_MNG2HOST; 2636 2637 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2638 } 2639 } 2640 2641 /* 2642 * Sets CTRL_EXT:DRV_LOAD bit. 2643 * 2644 * For ASF and Pass Through versions of f/w this means that 2645 * the driver is loaded. 2646 */ 2647 static void 2648 igb_get_hw_control(struct igb_softc *sc) 2649 { 2650 uint32_t ctrl_ext; 2651 2652 if (sc->vf_ifp) 2653 return; 2654 2655 /* Let firmware know the driver has taken over */ 2656 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2657 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2658 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 2659 } 2660 2661 /* 2662 * Resets CTRL_EXT:DRV_LOAD bit. 2663 * 2664 * For ASF and Pass Through versions of f/w this means that the 2665 * driver is no longer loaded. 2666 */ 2667 static void 2668 igb_rel_hw_control(struct igb_softc *sc) 2669 { 2670 uint32_t ctrl_ext; 2671 2672 if (sc->vf_ifp) 2673 return; 2674 2675 /* Let firmware taken over control of h/w */ 2676 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2677 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2678 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 2679 } 2680 2681 static int 2682 igb_is_valid_ether_addr(const uint8_t *addr) 2683 { 2684 uint8_t zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2685 2686 if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN)) 2687 return FALSE; 2688 return TRUE; 2689 } 2690 2691 /* 2692 * Enable PCI Wake On Lan capability 2693 */ 2694 static void 2695 igb_enable_wol(device_t dev) 2696 { 2697 uint16_t cap, status; 2698 uint8_t id; 2699 2700 /* First find the capabilities pointer*/ 2701 cap = pci_read_config(dev, PCIR_CAP_PTR, 2); 2702 2703 /* Read the PM Capabilities */ 2704 id = pci_read_config(dev, cap, 1); 2705 if (id != PCIY_PMG) /* Something wrong */ 2706 return; 2707 2708 /* 2709 * OK, we have the power capabilities, 2710 * so now get the status register 2711 */ 2712 cap += PCIR_POWER_STATUS; 2713 status = pci_read_config(dev, cap, 2); 2714 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 2715 pci_write_config(dev, cap, status, 2); 2716 } 2717 2718 static void 2719 igb_update_stats_counters(struct igb_softc *sc) 2720 { 2721 struct e1000_hw *hw = &sc->hw; 2722 struct e1000_hw_stats *stats; 2723 struct ifnet *ifp = &sc->arpcom.ac_if; 2724 2725 /* 2726 * The virtual function adapter has only a 2727 * small controlled set of stats, do only 2728 * those and return. 2729 */ 2730 if (sc->vf_ifp) { 2731 igb_update_vf_stats_counters(sc); 2732 return; 2733 } 2734 stats = sc->stats; 2735 2736 if (sc->hw.phy.media_type == e1000_media_type_copper || 2737 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { 2738 stats->symerrs += 2739 E1000_READ_REG(hw,E1000_SYMERRS); 2740 stats->sec += E1000_READ_REG(hw, E1000_SEC); 2741 } 2742 2743 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); 2744 stats->mpc += E1000_READ_REG(hw, E1000_MPC); 2745 stats->scc += E1000_READ_REG(hw, E1000_SCC); 2746 stats->ecol += E1000_READ_REG(hw, E1000_ECOL); 2747 2748 stats->mcc += E1000_READ_REG(hw, E1000_MCC); 2749 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); 2750 stats->colc += E1000_READ_REG(hw, E1000_COLC); 2751 stats->dc += E1000_READ_REG(hw, E1000_DC); 2752 stats->rlec += E1000_READ_REG(hw, E1000_RLEC); 2753 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); 2754 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); 2755 2756 /* 2757 * For watchdog management we need to know if we have been 2758 * paused during the last interval, so capture that here. 2759 */ 2760 sc->pause_frames = E1000_READ_REG(hw, E1000_XOFFRXC); 2761 stats->xoffrxc += sc->pause_frames; 2762 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); 2763 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); 2764 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); 2765 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); 2766 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); 2767 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); 2768 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); 2769 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); 2770 stats->gprc += E1000_READ_REG(hw, E1000_GPRC); 2771 stats->bprc += E1000_READ_REG(hw, E1000_BPRC); 2772 stats->mprc += E1000_READ_REG(hw, E1000_MPRC); 2773 stats->gptc += E1000_READ_REG(hw, E1000_GPTC); 2774 2775 /* For the 64-bit byte counters the low dword must be read first. */ 2776 /* Both registers clear on the read of the high dword */ 2777 2778 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + 2779 ((uint64_t)E1000_READ_REG(hw, E1000_GORCH) << 32); 2780 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + 2781 ((uint64_t)E1000_READ_REG(hw, E1000_GOTCH) << 32); 2782 2783 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); 2784 stats->ruc += E1000_READ_REG(hw, E1000_RUC); 2785 stats->rfc += E1000_READ_REG(hw, E1000_RFC); 2786 stats->roc += E1000_READ_REG(hw, E1000_ROC); 2787 stats->rjc += E1000_READ_REG(hw, E1000_RJC); 2788 2789 stats->tor += E1000_READ_REG(hw, E1000_TORH); 2790 stats->tot += E1000_READ_REG(hw, E1000_TOTH); 2791 2792 stats->tpr += E1000_READ_REG(hw, E1000_TPR); 2793 stats->tpt += E1000_READ_REG(hw, E1000_TPT); 2794 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); 2795 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); 2796 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); 2797 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); 2798 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); 2799 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); 2800 stats->mptc += E1000_READ_REG(hw, E1000_MPTC); 2801 stats->bptc += E1000_READ_REG(hw, E1000_BPTC); 2802 2803 /* Interrupt Counts */ 2804 2805 stats->iac += E1000_READ_REG(hw, E1000_IAC); 2806 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); 2807 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); 2808 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); 2809 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); 2810 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); 2811 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); 2812 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); 2813 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); 2814 2815 /* Host to Card Statistics */ 2816 2817 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); 2818 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); 2819 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); 2820 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); 2821 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); 2822 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); 2823 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); 2824 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + 2825 ((uint64_t)E1000_READ_REG(hw, E1000_HGORCH) << 32)); 2826 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + 2827 ((uint64_t)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); 2828 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); 2829 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); 2830 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); 2831 2832 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); 2833 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); 2834 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); 2835 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); 2836 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); 2837 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); 2838 2839 ifp->if_collisions = stats->colc; 2840 2841 /* Rx Errors */ 2842 ifp->if_ierrors = stats->rxerrc + stats->crcerrs + stats->algnerrc + 2843 stats->ruc + stats->roc + stats->mpc + stats->cexterr; 2844 2845 /* Tx Errors */ 2846 ifp->if_oerrors = stats->ecol + stats->latecol + sc->watchdog_events; 2847 2848 /* Driver specific counters */ 2849 sc->device_control = E1000_READ_REG(hw, E1000_CTRL); 2850 sc->rx_control = E1000_READ_REG(hw, E1000_RCTL); 2851 sc->int_mask = E1000_READ_REG(hw, E1000_IMS); 2852 sc->eint_mask = E1000_READ_REG(hw, E1000_EIMS); 2853 sc->packet_buf_alloc_tx = 2854 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); 2855 sc->packet_buf_alloc_rx = 2856 (E1000_READ_REG(hw, E1000_PBA) & 0xffff); 2857 } 2858 2859 static void 2860 igb_vf_init_stats(struct igb_softc *sc) 2861 { 2862 struct e1000_hw *hw = &sc->hw; 2863 struct e1000_vf_stats *stats; 2864 2865 stats = sc->stats; 2866 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); 2867 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); 2868 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); 2869 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); 2870 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); 2871 } 2872 2873 static void 2874 igb_update_vf_stats_counters(struct igb_softc *sc) 2875 { 2876 struct e1000_hw *hw = &sc->hw; 2877 struct e1000_vf_stats *stats; 2878 2879 if (sc->link_speed == 0) 2880 return; 2881 2882 stats = sc->stats; 2883 UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); 2884 UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); 2885 UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); 2886 UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); 2887 UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); 2888 } 2889 2890 #ifdef DEVICE_POLLING 2891 2892 static void 2893 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) 2894 { 2895 struct igb_softc *sc = ifp->if_softc; 2896 uint32_t reg_icr; 2897 2898 switch (cmd) { 2899 case POLL_REGISTER: 2900 case POLL_DEREGISTER: 2901 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2902 igb_init(sc); 2903 break; 2904 2905 case POLL_AND_CHECK_STATUS: 2906 ASSERT_SERIALIZED(&sc->main_serialize); 2907 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 2908 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 2909 sc->hw.mac.get_link_status = 1; 2910 igb_update_link_status(sc); 2911 } 2912 /* FALL THROUGH */ 2913 case POLL_ONLY: 2914 ASSERT_SERIALIZED(&sc->main_serialize); 2915 if (ifp->if_flags & IFF_RUNNING) { 2916 struct igb_tx_ring *txr; 2917 int i; 2918 2919 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2920 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2921 2922 lwkt_serialize_enter(&rxr->rx_serialize); 2923 igb_rxeof(rxr, count); 2924 lwkt_serialize_exit(&rxr->rx_serialize); 2925 } 2926 2927 txr = &sc->tx_rings[0]; 2928 lwkt_serialize_enter(&txr->tx_serialize); 2929 igb_txeof(txr); 2930 if (!ifq_is_empty(&ifp->if_snd)) 2931 if_devstart(ifp); 2932 lwkt_serialize_exit(&txr->tx_serialize); 2933 } 2934 break; 2935 } 2936 } 2937 2938 #endif /* DEVICE_POLLING */ 2939 2940 static void 2941 igb_intr(void *xsc) 2942 { 2943 struct igb_softc *sc = xsc; 2944 struct ifnet *ifp = &sc->arpcom.ac_if; 2945 uint32_t eicr; 2946 2947 ASSERT_SERIALIZED(&sc->main_serialize); 2948 2949 eicr = E1000_READ_REG(&sc->hw, E1000_EICR); 2950 2951 if (eicr == 0) 2952 return; 2953 2954 if (ifp->if_flags & IFF_RUNNING) { 2955 struct igb_tx_ring *txr; 2956 int i; 2957 2958 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2959 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2960 2961 if (eicr & rxr->rx_intr_mask) { 2962 lwkt_serialize_enter(&rxr->rx_serialize); 2963 igb_rxeof(rxr, -1); 2964 lwkt_serialize_exit(&rxr->rx_serialize); 2965 } 2966 } 2967 2968 txr = &sc->tx_rings[0]; 2969 if (eicr & txr->tx_intr_mask) { 2970 lwkt_serialize_enter(&txr->tx_serialize); 2971 igb_txeof(txr); 2972 if (!ifq_is_empty(&ifp->if_snd)) 2973 if_devstart(ifp); 2974 lwkt_serialize_exit(&txr->tx_serialize); 2975 } 2976 } 2977 2978 if (eicr & E1000_EICR_OTHER) { 2979 uint32_t icr = E1000_READ_REG(&sc->hw, E1000_ICR); 2980 2981 /* Link status change */ 2982 if (icr & E1000_ICR_LSC) { 2983 sc->hw.mac.get_link_status = 1; 2984 igb_update_link_status(sc); 2985 } 2986 } 2987 2988 /* 2989 * Reading EICR has the side effect to clear interrupt mask, 2990 * so all interrupts need to be enabled here. 2991 */ 2992 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 2993 } 2994 2995 static void 2996 igb_intr_shared(void *xsc) 2997 { 2998 struct igb_softc *sc = xsc; 2999 struct ifnet *ifp = &sc->arpcom.ac_if; 3000 uint32_t reg_icr; 3001 3002 ASSERT_SERIALIZED(&sc->main_serialize); 3003 3004 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3005 3006 /* Hot eject? */ 3007 if (reg_icr == 0xffffffff) 3008 return; 3009 3010 /* Definitely not our interrupt. */ 3011 if (reg_icr == 0x0) 3012 return; 3013 3014 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) 3015 return; 3016 3017 if (ifp->if_flags & IFF_RUNNING) { 3018 if (reg_icr & 3019 (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) { 3020 int i; 3021 3022 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3023 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3024 3025 lwkt_serialize_enter(&rxr->rx_serialize); 3026 igb_rxeof(rxr, -1); 3027 lwkt_serialize_exit(&rxr->rx_serialize); 3028 } 3029 } 3030 3031 if (reg_icr & E1000_ICR_TXDW) { 3032 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3033 3034 lwkt_serialize_enter(&txr->tx_serialize); 3035 igb_txeof(txr); 3036 if (!ifq_is_empty(&ifp->if_snd)) 3037 if_devstart(ifp); 3038 lwkt_serialize_exit(&txr->tx_serialize); 3039 } 3040 } 3041 3042 /* Link status change */ 3043 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3044 sc->hw.mac.get_link_status = 1; 3045 igb_update_link_status(sc); 3046 } 3047 3048 if (reg_icr & E1000_ICR_RXO) 3049 sc->rx_overruns++; 3050 } 3051 3052 static int 3053 igb_encap(struct igb_tx_ring *txr, struct mbuf **m_headp) 3054 { 3055 bus_dma_segment_t segs[IGB_MAX_SCATTER]; 3056 bus_dmamap_t map; 3057 struct igb_tx_buf *tx_buf, *tx_buf_mapped; 3058 union e1000_adv_tx_desc *txd = NULL; 3059 struct mbuf *m_head = *m_headp; 3060 uint32_t olinfo_status = 0, cmd_type_len = 0, cmd_rs = 0; 3061 int maxsegs, nsegs, i, j, error, last = 0; 3062 uint32_t hdrlen = 0; 3063 3064 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3065 error = igb_tso_pullup(txr, m_headp); 3066 if (error) 3067 return error; 3068 m_head = *m_headp; 3069 } 3070 3071 /* Set basic descriptor constants */ 3072 cmd_type_len |= E1000_ADVTXD_DTYP_DATA; 3073 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT; 3074 if (m_head->m_flags & M_VLANTAG) 3075 cmd_type_len |= E1000_ADVTXD_DCMD_VLE; 3076 3077 /* 3078 * Map the packet for DMA. 3079 */ 3080 tx_buf = &txr->tx_buf[txr->next_avail_desc]; 3081 tx_buf_mapped = tx_buf; 3082 map = tx_buf->map; 3083 3084 maxsegs = txr->tx_avail - IGB_TX_RESERVED; 3085 KASSERT(maxsegs >= txr->spare_desc, ("not enough spare TX desc\n")); 3086 if (maxsegs > IGB_MAX_SCATTER) 3087 maxsegs = IGB_MAX_SCATTER; 3088 3089 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp, 3090 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT); 3091 if (error) { 3092 if (error == ENOBUFS) 3093 txr->sc->mbuf_defrag_failed++; 3094 else 3095 txr->sc->no_tx_dma_setup++; 3096 3097 m_freem(*m_headp); 3098 *m_headp = NULL; 3099 return error; 3100 } 3101 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE); 3102 3103 m_head = *m_headp; 3104 3105 #if 0 3106 /* 3107 * Set up the context descriptor: 3108 * used when any hardware offload is done. 3109 * This includes CSUM, VLAN, and TSO. It 3110 * will use the first descriptor. 3111 */ 3112 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3113 if (igb_tso_setup(txr, m_head, &hdrlen)) { 3114 cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3115 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3116 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3117 } else 3118 return (ENXIO); 3119 } else if (igb_tx_ctx_setup(txr, m_head)) 3120 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3121 #else 3122 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3123 igb_tso_ctx(txr, m_head, &hdrlen); 3124 cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3125 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3126 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3127 txr->tx_nsegs++; 3128 } else if (igb_txcsum_ctx(txr, m_head)) { 3129 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3130 olinfo_status |= (E1000_TXD_POPTS_IXSM << 8); 3131 if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP)) 3132 olinfo_status |= (E1000_TXD_POPTS_TXSM << 8); 3133 txr->tx_nsegs++; 3134 } 3135 #endif 3136 3137 txr->tx_nsegs += nsegs; 3138 if (txr->tx_nsegs >= txr->intr_nsegs) { 3139 /* 3140 * Report Status (RS) is turned on every intr_nsegs 3141 * descriptors (roughly). 3142 */ 3143 txr->tx_nsegs = 0; 3144 cmd_rs = E1000_ADVTXD_DCMD_RS; 3145 } 3146 3147 /* Calculate payload length */ 3148 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen) 3149 << E1000_ADVTXD_PAYLEN_SHIFT); 3150 3151 /* 82575 needs the queue index added */ 3152 if (txr->sc->hw.mac.type == e1000_82575) 3153 olinfo_status |= txr->me << 4; 3154 3155 /* Set up our transmit descriptors */ 3156 i = txr->next_avail_desc; 3157 for (j = 0; j < nsegs; j++) { 3158 bus_size_t seg_len; 3159 bus_addr_t seg_addr; 3160 3161 tx_buf = &txr->tx_buf[i]; 3162 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; 3163 seg_addr = segs[j].ds_addr; 3164 seg_len = segs[j].ds_len; 3165 3166 txd->read.buffer_addr = htole64(seg_addr); 3167 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len); 3168 txd->read.olinfo_status = htole32(olinfo_status); 3169 last = i; 3170 if (++i == txr->num_tx_desc) 3171 i = 0; 3172 tx_buf->m_head = NULL; 3173 } 3174 3175 KASSERT(txr->tx_avail > nsegs, ("invalid avail TX desc\n")); 3176 txr->next_avail_desc = i; 3177 txr->tx_avail -= nsegs; 3178 3179 tx_buf->m_head = m_head; 3180 tx_buf_mapped->map = tx_buf->map; 3181 tx_buf->map = map; 3182 3183 /* 3184 * Last Descriptor of Packet needs End Of Packet (EOP) 3185 */ 3186 txd->read.cmd_type_len |= htole32(E1000_ADVTXD_DCMD_EOP | cmd_rs); 3187 3188 /* 3189 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000 3190 * that this frame is available to transmit. 3191 */ 3192 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), i); 3193 ++txr->tx_packets; 3194 3195 return 0; 3196 } 3197 3198 static void 3199 igb_start(struct ifnet *ifp) 3200 { 3201 struct igb_softc *sc = ifp->if_softc; 3202 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3203 struct mbuf *m_head; 3204 3205 ASSERT_SERIALIZED(&txr->tx_serialize); 3206 3207 if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING) 3208 return; 3209 3210 if (!sc->link_active) { 3211 ifq_purge(&ifp->if_snd); 3212 return; 3213 } 3214 3215 if (!IGB_IS_NOT_OACTIVE(txr)) 3216 igb_txeof(txr); 3217 3218 while (!ifq_is_empty(&ifp->if_snd)) { 3219 if (IGB_IS_OACTIVE(txr)) { 3220 ifp->if_flags |= IFF_OACTIVE; 3221 /* Set watchdog on */ 3222 ifp->if_timer = 5; 3223 break; 3224 } 3225 3226 m_head = ifq_dequeue(&ifp->if_snd, NULL); 3227 if (m_head == NULL) 3228 break; 3229 3230 if (igb_encap(txr, &m_head)) { 3231 ifp->if_oerrors++; 3232 continue; 3233 } 3234 3235 /* Send a copy of the frame to the BPF listener */ 3236 ETHER_BPF_MTAP(ifp, m_head); 3237 } 3238 } 3239 3240 static void 3241 igb_watchdog(struct ifnet *ifp) 3242 { 3243 struct igb_softc *sc = ifp->if_softc; 3244 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3245 3246 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3247 3248 /* 3249 * If flow control has paused us since last checking 3250 * it invalidates the watchdog timing, so dont run it. 3251 */ 3252 if (sc->pause_frames) { 3253 sc->pause_frames = 0; 3254 ifp->if_timer = 5; 3255 return; 3256 } 3257 3258 if_printf(ifp, "Watchdog timeout -- resetting\n"); 3259 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, 3260 E1000_READ_REG(&sc->hw, E1000_TDH(txr->me)), 3261 E1000_READ_REG(&sc->hw, E1000_TDT(txr->me))); 3262 if_printf(ifp, "TX(%d) desc avail = %d, " 3263 "Next TX to Clean = %d\n", 3264 txr->me, txr->tx_avail, txr->next_to_clean); 3265 3266 ifp->if_oerrors++; 3267 sc->watchdog_events++; 3268 3269 igb_init(sc); 3270 if (!ifq_is_empty(&ifp->if_snd)) 3271 if_devstart(ifp); 3272 } 3273 3274 static void 3275 igb_set_eitr(struct igb_softc *sc, int idx, int rate) 3276 { 3277 uint32_t eitr = 0; 3278 3279 if (rate > 0) { 3280 if (sc->hw.mac.type == e1000_82575) { 3281 eitr = 1000000000 / 256 / rate; 3282 /* 3283 * NOTE: 3284 * Document is wrong on the 2 bits left shift 3285 */ 3286 } else { 3287 eitr = 1000000 / rate; 3288 eitr <<= IGB_EITR_INTVL_SHIFT; 3289 } 3290 3291 if (eitr == 0) { 3292 /* Don't disable it */ 3293 eitr = 1 << IGB_EITR_INTVL_SHIFT; 3294 } else if (eitr > IGB_EITR_INTVL_MASK) { 3295 /* Don't allow it to be too large */ 3296 eitr = IGB_EITR_INTVL_MASK; 3297 } 3298 } 3299 if (sc->hw.mac.type == e1000_82575) 3300 eitr |= eitr << 16; 3301 else 3302 eitr |= E1000_EITR_CNT_IGNR; 3303 E1000_WRITE_REG(&sc->hw, E1000_EITR(idx), eitr); 3304 } 3305 3306 static int 3307 igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS) 3308 { 3309 struct igb_softc *sc = (void *)arg1; 3310 struct ifnet *ifp = &sc->arpcom.ac_if; 3311 int error, intr_rate; 3312 3313 intr_rate = sc->intr_rate; 3314 error = sysctl_handle_int(oidp, &intr_rate, 0, req); 3315 if (error || req->newptr == NULL) 3316 return error; 3317 if (intr_rate < 0) 3318 return EINVAL; 3319 3320 ifnet_serialize_all(ifp); 3321 3322 sc->intr_rate = intr_rate; 3323 if (ifp->if_flags & IFF_RUNNING) 3324 igb_set_eitr(sc, 0, sc->intr_rate); 3325 3326 if (bootverbose) 3327 if_printf(ifp, "interrupt rate set to %d/sec\n", sc->intr_rate); 3328 3329 ifnet_deserialize_all(ifp); 3330 3331 return 0; 3332 } 3333 3334 static int 3335 igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS) 3336 { 3337 struct igb_msix_data *msix = (void *)arg1; 3338 struct igb_softc *sc = msix->msix_sc; 3339 struct ifnet *ifp = &sc->arpcom.ac_if; 3340 int error, msix_rate; 3341 3342 msix_rate = msix->msix_rate; 3343 error = sysctl_handle_int(oidp, &msix_rate, 0, req); 3344 if (error || req->newptr == NULL) 3345 return error; 3346 if (msix_rate < 0) 3347 return EINVAL; 3348 3349 lwkt_serialize_enter(msix->msix_serialize); 3350 3351 msix->msix_rate = msix_rate; 3352 if (ifp->if_flags & IFF_RUNNING) 3353 igb_set_eitr(sc, msix->msix_vector, msix->msix_rate); 3354 3355 if (bootverbose) { 3356 if_printf(ifp, "%s set to %d/sec\n", msix->msix_rate_desc, 3357 msix->msix_rate); 3358 } 3359 3360 lwkt_serialize_exit(msix->msix_serialize); 3361 3362 return 0; 3363 } 3364 3365 static int 3366 igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS) 3367 { 3368 struct igb_softc *sc = (void *)arg1; 3369 struct ifnet *ifp = &sc->arpcom.ac_if; 3370 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3371 int error, nsegs; 3372 3373 nsegs = txr->intr_nsegs; 3374 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3375 if (error || req->newptr == NULL) 3376 return error; 3377 if (nsegs <= 0) 3378 return EINVAL; 3379 3380 ifnet_serialize_all(ifp); 3381 3382 if (nsegs >= txr->num_tx_desc - txr->oact_lo_desc || 3383 nsegs >= txr->oact_hi_desc - IGB_MAX_SCATTER) { 3384 error = EINVAL; 3385 } else { 3386 error = 0; 3387 txr->intr_nsegs = nsegs; 3388 } 3389 3390 ifnet_deserialize_all(ifp); 3391 3392 return error; 3393 } 3394 3395 static void 3396 igb_init_intr(struct igb_softc *sc) 3397 { 3398 igb_set_intr_mask(sc); 3399 3400 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) 3401 igb_init_unshared_intr(sc); 3402 3403 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3404 igb_set_eitr(sc, 0, sc->intr_rate); 3405 } else { 3406 int i; 3407 3408 for (i = 0; i < sc->msix_cnt; ++i) 3409 igb_set_eitr(sc, i, sc->msix_data[i].msix_rate); 3410 } 3411 } 3412 3413 static void 3414 igb_init_unshared_intr(struct igb_softc *sc) 3415 { 3416 struct e1000_hw *hw = &sc->hw; 3417 const struct igb_rx_ring *rxr; 3418 const struct igb_tx_ring *txr; 3419 uint32_t ivar, index; 3420 int i; 3421 3422 /* 3423 * Enable extended mode 3424 */ 3425 if (sc->hw.mac.type != e1000_82575) { 3426 uint32_t gpie; 3427 int ivar_max; 3428 3429 gpie = E1000_GPIE_NSICR; 3430 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3431 gpie |= E1000_GPIE_MSIX_MODE | 3432 E1000_GPIE_EIAME | 3433 E1000_GPIE_PBA; 3434 } 3435 E1000_WRITE_REG(hw, E1000_GPIE, gpie); 3436 3437 /* 3438 * Clear IVARs 3439 */ 3440 switch (sc->hw.mac.type) { 3441 case e1000_82580: 3442 ivar_max = IGB_MAX_IVAR_82580; 3443 break; 3444 3445 case e1000_i350: 3446 ivar_max = IGB_MAX_IVAR_I350; 3447 break; 3448 3449 case e1000_vfadapt: 3450 case e1000_vfadapt_i350: 3451 ivar_max = IGB_MAX_IVAR_VF; 3452 break; 3453 3454 case e1000_82576: 3455 ivar_max = IGB_MAX_IVAR_82576; 3456 break; 3457 3458 default: 3459 panic("unknown mac type %d\n", sc->hw.mac.type); 3460 } 3461 for (i = 0; i < ivar_max; ++i) 3462 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, 0); 3463 E1000_WRITE_REG(hw, E1000_IVAR_MISC, 0); 3464 } else { 3465 uint32_t tmp; 3466 3467 KASSERT(sc->intr_type != PCI_INTR_TYPE_MSIX, 3468 ("82575 w/ MSI-X")); 3469 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); 3470 tmp |= E1000_CTRL_EXT_IRCA; 3471 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); 3472 } 3473 3474 /* 3475 * Map TX/RX interrupts to EICR 3476 */ 3477 switch (sc->hw.mac.type) { 3478 case e1000_82580: 3479 case e1000_i350: 3480 case e1000_vfadapt: 3481 case e1000_vfadapt_i350: 3482 /* RX entries */ 3483 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3484 rxr = &sc->rx_rings[i]; 3485 3486 index = i >> 1; 3487 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3488 3489 if (i & 1) { 3490 ivar &= 0xff00ffff; 3491 ivar |= 3492 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3493 } else { 3494 ivar &= 0xffffff00; 3495 ivar |= 3496 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3497 } 3498 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3499 } 3500 /* TX entries */ 3501 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3502 txr = &sc->tx_rings[i]; 3503 3504 index = i >> 1; 3505 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3506 3507 if (i & 1) { 3508 ivar &= 0x00ffffff; 3509 ivar |= 3510 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3511 } else { 3512 ivar &= 0xffff00ff; 3513 ivar |= 3514 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3515 } 3516 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3517 } 3518 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3519 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3520 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3521 } 3522 break; 3523 3524 case e1000_82576: 3525 /* RX entries */ 3526 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3527 rxr = &sc->rx_rings[i]; 3528 3529 index = i & 0x7; /* Each IVAR has two entries */ 3530 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3531 3532 if (i < 8) { 3533 ivar &= 0xffffff00; 3534 ivar |= 3535 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3536 } else { 3537 ivar &= 0xff00ffff; 3538 ivar |= 3539 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3540 } 3541 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3542 } 3543 /* TX entries */ 3544 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3545 txr = &sc->tx_rings[i]; 3546 3547 index = i & 0x7; /* Each IVAR has two entries */ 3548 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3549 3550 if (i < 8) { 3551 ivar &= 0xffff00ff; 3552 ivar |= 3553 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3554 } else { 3555 ivar &= 0x00ffffff; 3556 ivar |= 3557 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3558 } 3559 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3560 } 3561 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3562 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3563 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3564 } 3565 break; 3566 3567 case e1000_82575: 3568 /* 3569 * Enable necessary interrupt bits. 3570 * 3571 * The name of the register is confusing; in addition to 3572 * configuring the first vector of MSI-X, it also configures 3573 * which bits of EICR could be set by the hardware even when 3574 * MSI or line interrupt is used; it thus controls interrupt 3575 * generation. It MUST be configured explicitly; the default 3576 * value mentioned in the datasheet is wrong: RX queue0 and 3577 * TX queue0 are NOT enabled by default. 3578 */ 3579 E1000_WRITE_REG(&sc->hw, E1000_MSIXBM(0), sc->intr_mask); 3580 break; 3581 3582 default: 3583 panic("unknown mac type %d\n", sc->hw.mac.type); 3584 } 3585 } 3586 3587 static int 3588 igb_setup_intr(struct igb_softc *sc) 3589 { 3590 struct ifnet *ifp = &sc->arpcom.ac_if; 3591 int error; 3592 3593 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 3594 return igb_msix_setup(sc); 3595 3596 error = bus_setup_intr(sc->dev, sc->intr_res, INTR_MPSAFE, 3597 (sc->flags & IGB_FLAG_SHARED_INTR) ? igb_intr_shared : igb_intr, 3598 sc, &sc->intr_tag, &sc->main_serialize); 3599 if (error) { 3600 device_printf(sc->dev, "Failed to register interrupt handler"); 3601 return error; 3602 } 3603 3604 ifp->if_cpuid = rman_get_cpuid(sc->intr_res); 3605 KKASSERT(ifp->if_cpuid >= 0 && ifp->if_cpuid < ncpus); 3606 3607 return 0; 3608 } 3609 3610 static void 3611 igb_set_txintr_mask(struct igb_tx_ring *txr, int *intr_bit0, int intr_bitmax) 3612 { 3613 if (txr->sc->hw.mac.type == e1000_82575) { 3614 txr->tx_intr_bit = 0; /* unused */ 3615 switch (txr->me) { 3616 case 0: 3617 txr->tx_intr_mask = E1000_EICR_TX_QUEUE0; 3618 break; 3619 case 1: 3620 txr->tx_intr_mask = E1000_EICR_TX_QUEUE1; 3621 break; 3622 case 2: 3623 txr->tx_intr_mask = E1000_EICR_TX_QUEUE2; 3624 break; 3625 case 3: 3626 txr->tx_intr_mask = E1000_EICR_TX_QUEUE3; 3627 break; 3628 default: 3629 panic("unsupported # of TX ring, %d\n", txr->me); 3630 } 3631 } else { 3632 int intr_bit = *intr_bit0; 3633 3634 txr->tx_intr_bit = intr_bit % intr_bitmax; 3635 txr->tx_intr_mask = 1 << txr->tx_intr_bit; 3636 3637 *intr_bit0 = intr_bit + 1; 3638 } 3639 } 3640 3641 static void 3642 igb_set_rxintr_mask(struct igb_rx_ring *rxr, int *intr_bit0, int intr_bitmax) 3643 { 3644 if (rxr->sc->hw.mac.type == e1000_82575) { 3645 rxr->rx_intr_bit = 0; /* unused */ 3646 switch (rxr->me) { 3647 case 0: 3648 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE0; 3649 break; 3650 case 1: 3651 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE1; 3652 break; 3653 case 2: 3654 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE2; 3655 break; 3656 case 3: 3657 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE3; 3658 break; 3659 default: 3660 panic("unsupported # of RX ring, %d\n", rxr->me); 3661 } 3662 } else { 3663 int intr_bit = *intr_bit0; 3664 3665 rxr->rx_intr_bit = intr_bit % intr_bitmax; 3666 rxr->rx_intr_mask = 1 << rxr->rx_intr_bit; 3667 3668 *intr_bit0 = intr_bit + 1; 3669 } 3670 } 3671 3672 static void 3673 igb_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 3674 { 3675 struct igb_softc *sc = ifp->if_softc; 3676 3677 ifnet_serialize_array_enter(sc->serializes, sc->serialize_cnt, 3678 sc->tx_serialize, sc->rx_serialize, slz); 3679 } 3680 3681 static void 3682 igb_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 3683 { 3684 struct igb_softc *sc = ifp->if_softc; 3685 3686 ifnet_serialize_array_exit(sc->serializes, sc->serialize_cnt, 3687 sc->tx_serialize, sc->rx_serialize, slz); 3688 } 3689 3690 static int 3691 igb_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 3692 { 3693 struct igb_softc *sc = ifp->if_softc; 3694 3695 return ifnet_serialize_array_try(sc->serializes, sc->serialize_cnt, 3696 sc->tx_serialize, sc->rx_serialize, slz); 3697 } 3698 3699 #ifdef INVARIANTS 3700 3701 static void 3702 igb_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 3703 boolean_t serialized) 3704 { 3705 struct igb_softc *sc = ifp->if_softc; 3706 3707 ifnet_serialize_array_assert(sc->serializes, sc->serialize_cnt, 3708 sc->tx_serialize, sc->rx_serialize, slz, serialized); 3709 } 3710 3711 #endif /* INVARIANTS */ 3712 3713 static void 3714 igb_set_intr_mask(struct igb_softc *sc) 3715 { 3716 int i; 3717 3718 sc->intr_mask = sc->sts_intr_mask; 3719 for (i = 0; i < sc->rx_ring_inuse; ++i) 3720 sc->intr_mask |= sc->rx_rings[i].rx_intr_mask; 3721 for (i = 0; i < sc->tx_ring_cnt; ++i) 3722 sc->intr_mask |= sc->tx_rings[i].tx_intr_mask; 3723 if (bootverbose) 3724 device_printf(sc->dev, "intr mask 0x%08x\n", sc->intr_mask); 3725 } 3726 3727 static int 3728 igb_alloc_intr(struct igb_softc *sc) 3729 { 3730 int i, intr_bit, intr_bitmax; 3731 u_int intr_flags; 3732 3733 igb_msix_try_alloc(sc); 3734 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 3735 goto done; 3736 3737 /* 3738 * Allocate MSI/legacy interrupt resource 3739 */ 3740 sc->intr_type = pci_alloc_1intr(sc->dev, igb_msi_enable, 3741 &sc->intr_rid, &intr_flags); 3742 3743 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) { 3744 int unshared; 3745 3746 unshared = device_getenv_int(sc->dev, "irq.unshared", 0); 3747 if (!unshared) { 3748 sc->flags |= IGB_FLAG_SHARED_INTR; 3749 if (bootverbose) 3750 device_printf(sc->dev, "IRQ shared\n"); 3751 } else { 3752 intr_flags &= ~RF_SHAREABLE; 3753 if (bootverbose) 3754 device_printf(sc->dev, "IRQ unshared\n"); 3755 } 3756 } 3757 3758 sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 3759 &sc->intr_rid, intr_flags); 3760 if (sc->intr_res == NULL) { 3761 device_printf(sc->dev, "Unable to allocate bus resource: " 3762 "interrupt\n"); 3763 return ENXIO; 3764 } 3765 3766 /* 3767 * Setup MSI/legacy interrupt mask 3768 */ 3769 switch (sc->hw.mac.type) { 3770 case e1000_82575: 3771 intr_bitmax = IGB_MAX_TXRXINT_82575; 3772 break; 3773 case e1000_82580: 3774 intr_bitmax = IGB_MAX_TXRXINT_82580; 3775 break; 3776 case e1000_i350: 3777 intr_bitmax = IGB_MAX_TXRXINT_I350; 3778 break; 3779 case e1000_82576: 3780 intr_bitmax = IGB_MAX_TXRXINT_82576; 3781 break; 3782 default: 3783 intr_bitmax = IGB_MIN_TXRXINT; 3784 break; 3785 } 3786 intr_bit = 0; 3787 for (i = 0; i < sc->tx_ring_cnt; ++i) 3788 igb_set_txintr_mask(&sc->tx_rings[i], &intr_bit, intr_bitmax); 3789 for (i = 0; i < sc->rx_ring_cnt; ++i) 3790 igb_set_rxintr_mask(&sc->rx_rings[i], &intr_bit, intr_bitmax); 3791 sc->sts_intr_bit = 0; 3792 sc->sts_intr_mask = E1000_EICR_OTHER; 3793 3794 /* Initialize interrupt rate */ 3795 sc->intr_rate = IGB_INTR_RATE; 3796 done: 3797 igb_set_ring_inuse(sc, FALSE); 3798 igb_set_intr_mask(sc); 3799 return 0; 3800 } 3801 3802 static void 3803 igb_free_intr(struct igb_softc *sc) 3804 { 3805 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3806 if (sc->intr_res != NULL) { 3807 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr_rid, 3808 sc->intr_res); 3809 } 3810 if (sc->intr_type == PCI_INTR_TYPE_MSI) 3811 pci_release_msi(sc->dev); 3812 } else { 3813 igb_msix_free(sc, TRUE); 3814 } 3815 } 3816 3817 static void 3818 igb_teardown_intr(struct igb_softc *sc) 3819 { 3820 if (sc->intr_type != PCI_INTR_TYPE_MSIX) 3821 bus_teardown_intr(sc->dev, sc->intr_res, sc->intr_tag); 3822 else 3823 igb_msix_teardown(sc, sc->msix_cnt); 3824 } 3825 3826 static void 3827 igb_msix_try_alloc(struct igb_softc *sc) 3828 { 3829 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt; 3830 int i, x, error; 3831 struct igb_msix_data *msix; 3832 boolean_t aggregate, setup = FALSE; 3833 3834 /* 3835 * Don't enable MSI-X on 82575, see: 3836 * 82575 specification update errata #25 3837 */ 3838 if (sc->hw.mac.type == e1000_82575) 3839 return; 3840 3841 /* Don't enable MSI-X on VF */ 3842 if (sc->vf_ifp) 3843 return; 3844 3845 msix_enable = device_getenv_int(sc->dev, "msix.enable", 3846 igb_msix_enable); 3847 if (!msix_enable) 3848 return; 3849 3850 msix_cnt = pci_msix_count(sc->dev); 3851 #ifdef IGB_MSIX_DEBUG 3852 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt); 3853 #endif 3854 if (msix_cnt <= 1) { 3855 /* One MSI-X model does not make sense */ 3856 return; 3857 } 3858 3859 i = 0; 3860 while ((1 << (i + 1)) <= msix_cnt) 3861 ++i; 3862 msix_cnt2 = 1 << i; 3863 3864 if (bootverbose) { 3865 device_printf(sc->dev, "MSI-X count %d/%d\n", 3866 msix_cnt2, msix_cnt); 3867 } 3868 3869 KKASSERT(msix_cnt2 <= msix_cnt); 3870 if (msix_cnt == msix_cnt2) { 3871 /* We need at least one MSI-X for link status */ 3872 msix_cnt2 >>= 1; 3873 if (msix_cnt2 <= 1) { 3874 /* One MSI-X for RX/TX does not make sense */ 3875 device_printf(sc->dev, "not enough MSI-X for TX/RX, " 3876 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt); 3877 return; 3878 } 3879 KKASSERT(msix_cnt > msix_cnt2); 3880 3881 if (bootverbose) { 3882 device_printf(sc->dev, "MSI-X count fixup %d/%d\n", 3883 msix_cnt2, msix_cnt); 3884 } 3885 } 3886 3887 sc->rx_ring_msix = sc->rx_ring_cnt; 3888 if (sc->rx_ring_msix > msix_cnt2) 3889 sc->rx_ring_msix = msix_cnt2; 3890 3891 if (msix_cnt >= sc->tx_ring_cnt + sc->rx_ring_msix + 1) { 3892 /* 3893 * Independent TX/RX MSI-X 3894 */ 3895 aggregate = FALSE; 3896 if (bootverbose) 3897 device_printf(sc->dev, "independent TX/RX MSI-X\n"); 3898 alloc_cnt = sc->tx_ring_cnt + sc->rx_ring_msix; 3899 } else { 3900 /* 3901 * Aggregate TX/RX MSI-X 3902 */ 3903 aggregate = TRUE; 3904 if (bootverbose) 3905 device_printf(sc->dev, "aggregate TX/RX MSI-X\n"); 3906 alloc_cnt = msix_cnt2; 3907 if (alloc_cnt > ncpus2) 3908 alloc_cnt = ncpus2; 3909 if (sc->rx_ring_msix > alloc_cnt) 3910 sc->rx_ring_msix = alloc_cnt; 3911 } 3912 ++alloc_cnt; /* For link status */ 3913 3914 if (bootverbose) { 3915 device_printf(sc->dev, "MSI-X alloc %d, RX ring %d\n", 3916 alloc_cnt, sc->rx_ring_msix); 3917 } 3918 3919 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR); 3920 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 3921 &sc->msix_mem_rid, RF_ACTIVE); 3922 if (sc->msix_mem_res == NULL) { 3923 device_printf(sc->dev, "Unable to map MSI-X table\n"); 3924 return; 3925 } 3926 3927 sc->msix_cnt = alloc_cnt; 3928 sc->msix_data = kmalloc(sizeof(struct igb_msix_data) * sc->msix_cnt, 3929 M_DEVBUF, M_WAITOK | M_ZERO); 3930 for (x = 0; x < sc->msix_cnt; ++x) { 3931 msix = &sc->msix_data[x]; 3932 3933 lwkt_serialize_init(&msix->msix_serialize0); 3934 msix->msix_sc = sc; 3935 msix->msix_rid = -1; 3936 msix->msix_vector = x; 3937 msix->msix_mask = 1 << msix->msix_vector; 3938 msix->msix_rate = IGB_INTR_RATE; 3939 } 3940 3941 x = 0; 3942 if (!aggregate) { 3943 int offset, offset_def; 3944 3945 if (sc->rx_ring_msix == ncpus2) { 3946 offset = 0; 3947 } else { 3948 offset_def = (sc->rx_ring_msix * 3949 device_get_unit(sc->dev)) % ncpus2; 3950 3951 offset = device_getenv_int(sc->dev, 3952 "msix.rxoff", offset_def); 3953 if (offset >= ncpus2 || 3954 offset % sc->rx_ring_msix != 0) { 3955 device_printf(sc->dev, 3956 "invalid msix.rxoff %d, use %d\n", 3957 offset, offset_def); 3958 offset = offset_def; 3959 } 3960 } 3961 3962 /* RX rings */ 3963 for (i = 0; i < sc->rx_ring_msix; ++i) { 3964 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3965 3966 KKASSERT(x < sc->msix_cnt); 3967 msix = &sc->msix_data[x++]; 3968 rxr->rx_intr_bit = msix->msix_vector; 3969 rxr->rx_intr_mask = msix->msix_mask; 3970 3971 msix->msix_serialize = &rxr->rx_serialize; 3972 msix->msix_func = igb_msix_rx; 3973 msix->msix_arg = rxr; 3974 msix->msix_cpuid = i + offset; 3975 KKASSERT(msix->msix_cpuid < ncpus2); 3976 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 3977 "%s rx%d", device_get_nameunit(sc->dev), i); 3978 msix->msix_rate = IGB_MSIX_RX_RATE; 3979 ksnprintf(msix->msix_rate_desc, 3980 sizeof(msix->msix_rate_desc), 3981 "RX%d interrupt rate", i); 3982 } 3983 3984 offset_def = device_get_unit(sc->dev) % ncpus2; 3985 offset = device_getenv_int(sc->dev, "msix.txoff", offset_def); 3986 if (offset >= ncpus2) { 3987 device_printf(sc->dev, "invalid msix.txoff %d, " 3988 "use %d\n", offset, offset_def); 3989 offset = offset_def; 3990 } 3991 3992 /* TX rings */ 3993 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3994 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3995 3996 KKASSERT(x < sc->msix_cnt); 3997 msix = &sc->msix_data[x++]; 3998 txr->tx_intr_bit = msix->msix_vector; 3999 txr->tx_intr_mask = msix->msix_mask; 4000 4001 msix->msix_serialize = &txr->tx_serialize; 4002 msix->msix_func = igb_msix_tx; 4003 msix->msix_arg = txr; 4004 msix->msix_cpuid = i + offset; 4005 sc->msix_tx_cpuid = msix->msix_cpuid; /* XXX */ 4006 KKASSERT(msix->msix_cpuid < ncpus2); 4007 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 4008 "%s tx%d", device_get_nameunit(sc->dev), i); 4009 msix->msix_rate = IGB_MSIX_TX_RATE; 4010 ksnprintf(msix->msix_rate_desc, 4011 sizeof(msix->msix_rate_desc), 4012 "TX%d interrupt rate", i); 4013 } 4014 } else { 4015 /* TODO */ 4016 error = EOPNOTSUPP; 4017 goto back; 4018 } 4019 4020 /* 4021 * Link status 4022 */ 4023 KKASSERT(x < sc->msix_cnt); 4024 msix = &sc->msix_data[x++]; 4025 sc->sts_intr_bit = msix->msix_vector; 4026 sc->sts_intr_mask = msix->msix_mask; 4027 4028 msix->msix_serialize = &sc->main_serialize; 4029 msix->msix_func = igb_msix_status; 4030 msix->msix_arg = sc; 4031 msix->msix_cpuid = 0; /* TODO tunable */ 4032 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s sts", 4033 device_get_nameunit(sc->dev)); 4034 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4035 "status interrupt rate"); 4036 4037 KKASSERT(x == sc->msix_cnt); 4038 4039 error = pci_setup_msix(sc->dev); 4040 if (error) { 4041 device_printf(sc->dev, "Setup MSI-X failed\n"); 4042 goto back; 4043 } 4044 setup = TRUE; 4045 4046 for (i = 0; i < sc->msix_cnt; ++i) { 4047 msix = &sc->msix_data[i]; 4048 4049 error = pci_alloc_msix_vector(sc->dev, msix->msix_vector, 4050 &msix->msix_rid, msix->msix_cpuid); 4051 if (error) { 4052 device_printf(sc->dev, 4053 "Unable to allocate MSI-X %d on cpu%d\n", 4054 msix->msix_vector, msix->msix_cpuid); 4055 goto back; 4056 } 4057 4058 msix->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4059 &msix->msix_rid, RF_ACTIVE); 4060 if (msix->msix_res == NULL) { 4061 device_printf(sc->dev, 4062 "Unable to allocate MSI-X %d resource\n", 4063 msix->msix_vector); 4064 error = ENOMEM; 4065 goto back; 4066 } 4067 } 4068 4069 pci_enable_msix(sc->dev); 4070 sc->intr_type = PCI_INTR_TYPE_MSIX; 4071 back: 4072 if (error) 4073 igb_msix_free(sc, setup); 4074 } 4075 4076 static void 4077 igb_msix_free(struct igb_softc *sc, boolean_t setup) 4078 { 4079 int i; 4080 4081 KKASSERT(sc->msix_cnt > 1); 4082 4083 for (i = 0; i < sc->msix_cnt; ++i) { 4084 struct igb_msix_data *msix = &sc->msix_data[i]; 4085 4086 if (msix->msix_res != NULL) { 4087 bus_release_resource(sc->dev, SYS_RES_IRQ, 4088 msix->msix_rid, msix->msix_res); 4089 } 4090 if (msix->msix_rid >= 0) 4091 pci_release_msix_vector(sc->dev, msix->msix_rid); 4092 } 4093 if (setup) 4094 pci_teardown_msix(sc->dev); 4095 4096 sc->msix_cnt = 0; 4097 kfree(sc->msix_data, M_DEVBUF); 4098 sc->msix_data = NULL; 4099 } 4100 4101 static int 4102 igb_msix_setup(struct igb_softc *sc) 4103 { 4104 struct ifnet *ifp = &sc->arpcom.ac_if; 4105 int i; 4106 4107 for (i = 0; i < sc->msix_cnt; ++i) { 4108 struct igb_msix_data *msix = &sc->msix_data[i]; 4109 int error; 4110 4111 error = bus_setup_intr_descr(sc->dev, msix->msix_res, 4112 INTR_MPSAFE, msix->msix_func, msix->msix_arg, 4113 &msix->msix_handle, msix->msix_serialize, msix->msix_desc); 4114 if (error) { 4115 device_printf(sc->dev, "could not set up %s " 4116 "interrupt handler.\n", msix->msix_desc); 4117 igb_msix_teardown(sc, i); 4118 return error; 4119 } 4120 } 4121 ifp->if_cpuid = sc->msix_tx_cpuid; 4122 4123 return 0; 4124 } 4125 4126 static void 4127 igb_msix_teardown(struct igb_softc *sc, int msix_cnt) 4128 { 4129 int i; 4130 4131 for (i = 0; i < msix_cnt; ++i) { 4132 struct igb_msix_data *msix = &sc->msix_data[i]; 4133 4134 bus_teardown_intr(sc->dev, msix->msix_res, msix->msix_handle); 4135 } 4136 } 4137 4138 static void 4139 igb_msix_rx(void *arg) 4140 { 4141 struct igb_rx_ring *rxr = arg; 4142 4143 ASSERT_SERIALIZED(&rxr->rx_serialize); 4144 igb_rxeof(rxr, -1); 4145 4146 E1000_WRITE_REG(&rxr->sc->hw, E1000_EIMS, rxr->rx_intr_mask); 4147 } 4148 4149 static void 4150 igb_msix_tx(void *arg) 4151 { 4152 struct igb_tx_ring *txr = arg; 4153 struct ifnet *ifp = &txr->sc->arpcom.ac_if; 4154 4155 ASSERT_SERIALIZED(&txr->tx_serialize); 4156 4157 igb_txeof(txr); 4158 if (!ifq_is_empty(&ifp->if_snd)) 4159 if_devstart(ifp); 4160 4161 E1000_WRITE_REG(&txr->sc->hw, E1000_EIMS, txr->tx_intr_mask); 4162 } 4163 4164 static void 4165 igb_msix_status(void *arg) 4166 { 4167 struct igb_softc *sc = arg; 4168 uint32_t icr; 4169 4170 ASSERT_SERIALIZED(&sc->main_serialize); 4171 4172 icr = E1000_READ_REG(&sc->hw, E1000_ICR); 4173 if (icr & E1000_ICR_LSC) { 4174 sc->hw.mac.get_link_status = 1; 4175 igb_update_link_status(sc); 4176 } 4177 4178 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->sts_intr_mask); 4179 } 4180 4181 static void 4182 igb_set_ring_inuse(struct igb_softc *sc, boolean_t polling) 4183 { 4184 if (!IGB_ENABLE_HWRSS(sc)) 4185 return; 4186 4187 if (sc->intr_type != PCI_INTR_TYPE_MSIX || polling) 4188 sc->rx_ring_inuse = IGB_MIN_RING_RSS; 4189 else 4190 sc->rx_ring_inuse = sc->rx_ring_msix; 4191 if (bootverbose) { 4192 device_printf(sc->dev, "RX rings %d/%d\n", 4193 sc->rx_ring_inuse, sc->rx_ring_cnt); 4194 } 4195 } 4196 4197 static int 4198 igb_tso_pullup(struct igb_tx_ring *txr, struct mbuf **mp) 4199 { 4200 int hoff, iphlen, thoff; 4201 struct mbuf *m; 4202 4203 m = *mp; 4204 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 4205 4206 iphlen = m->m_pkthdr.csum_iphlen; 4207 thoff = m->m_pkthdr.csum_thlen; 4208 hoff = m->m_pkthdr.csum_lhlen; 4209 4210 KASSERT(iphlen > 0, ("invalid ip hlen")); 4211 KASSERT(thoff > 0, ("invalid tcp hlen")); 4212 KASSERT(hoff > 0, ("invalid ether hlen")); 4213 4214 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 4215 m = m_pullup(m, hoff + iphlen + thoff); 4216 if (m == NULL) { 4217 *mp = NULL; 4218 return ENOBUFS; 4219 } 4220 *mp = m; 4221 } 4222 if (txr->sc->flags & IGB_FLAG_TSO_IPLEN0) { 4223 struct ip *ip; 4224 4225 ip = mtodoff(m, struct ip *, hoff); 4226 ip->ip_len = 0; 4227 } 4228 4229 return 0; 4230 } 4231 4232 static void 4233 igb_tso_ctx(struct igb_tx_ring *txr, struct mbuf *m, uint32_t *hlen) 4234 { 4235 struct e1000_adv_tx_context_desc *TXD; 4236 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 4237 int hoff, ctxd, iphlen, thoff; 4238 4239 iphlen = m->m_pkthdr.csum_iphlen; 4240 thoff = m->m_pkthdr.csum_thlen; 4241 hoff = m->m_pkthdr.csum_lhlen; 4242 4243 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 4244 4245 ctxd = txr->next_avail_desc; 4246 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 4247 4248 if (m->m_flags & M_VLANTAG) { 4249 uint16_t vlantag; 4250 4251 vlantag = htole16(m->m_pkthdr.ether_vlantag); 4252 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 4253 } 4254 4255 vlan_macip_lens |= (hoff << E1000_ADVTXD_MACLEN_SHIFT); 4256 vlan_macip_lens |= iphlen; 4257 4258 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4259 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 4260 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 4261 4262 mss_l4len_idx |= (m->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); 4263 mss_l4len_idx |= (thoff << E1000_ADVTXD_L4LEN_SHIFT); 4264 /* 82575 needs the queue index added */ 4265 if (txr->sc->hw.mac.type == e1000_82575) 4266 mss_l4len_idx |= txr->me << 4; 4267 4268 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 4269 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 4270 TXD->seqnum_seed = htole32(0); 4271 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 4272 4273 /* We've consumed the first desc, adjust counters */ 4274 if (++ctxd == txr->num_tx_desc) 4275 ctxd = 0; 4276 txr->next_avail_desc = ctxd; 4277 --txr->tx_avail; 4278 4279 *hlen = hoff + iphlen + thoff; 4280 } 4281