1 /* 2 * Copyright (c) 2001-2011, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_ifpoll.h" 33 #include "opt_igb.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/endian.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/proc.h> 43 #include <sys/rman.h> 44 #include <sys/serialize.h> 45 #include <sys/serialize2.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_arp.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/ifq_var.h> 58 #include <net/toeplitz.h> 59 #include <net/toeplitz2.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/vlan/if_vlan_ether.h> 62 #include <net/if_poll.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 #include <netinet/tcp.h> 68 #include <netinet/udp.h> 69 70 #include <bus/pci/pcivar.h> 71 #include <bus/pci/pcireg.h> 72 73 #include <dev/netif/ig_hal/e1000_api.h> 74 #include <dev/netif/ig_hal/e1000_82575.h> 75 #include <dev/netif/igb/if_igb.h> 76 77 #ifdef IGB_RSS_DEBUG 78 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) \ 79 do { \ 80 if (sc->rss_debug >= lvl) \ 81 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \ 82 } while (0) 83 #else /* !IGB_RSS_DEBUG */ 84 #define IGB_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0) 85 #endif /* IGB_RSS_DEBUG */ 86 87 #define IGB_NAME "Intel(R) PRO/1000 " 88 #define IGB_DEVICE(id) \ 89 { IGB_VENDOR_ID, E1000_DEV_ID_##id, IGB_NAME #id } 90 #define IGB_DEVICE_NULL { 0, 0, NULL } 91 92 static struct igb_device { 93 uint16_t vid; 94 uint16_t did; 95 const char *desc; 96 } igb_devices[] = { 97 IGB_DEVICE(82575EB_COPPER), 98 IGB_DEVICE(82575EB_FIBER_SERDES), 99 IGB_DEVICE(82575GB_QUAD_COPPER), 100 IGB_DEVICE(82576), 101 IGB_DEVICE(82576_NS), 102 IGB_DEVICE(82576_NS_SERDES), 103 IGB_DEVICE(82576_FIBER), 104 IGB_DEVICE(82576_SERDES), 105 IGB_DEVICE(82576_SERDES_QUAD), 106 IGB_DEVICE(82576_QUAD_COPPER), 107 IGB_DEVICE(82576_QUAD_COPPER_ET2), 108 IGB_DEVICE(82576_VF), 109 IGB_DEVICE(82580_COPPER), 110 IGB_DEVICE(82580_FIBER), 111 IGB_DEVICE(82580_SERDES), 112 IGB_DEVICE(82580_SGMII), 113 IGB_DEVICE(82580_COPPER_DUAL), 114 IGB_DEVICE(82580_QUAD_FIBER), 115 IGB_DEVICE(DH89XXCC_SERDES), 116 IGB_DEVICE(DH89XXCC_SGMII), 117 IGB_DEVICE(DH89XXCC_SFP), 118 IGB_DEVICE(DH89XXCC_BACKPLANE), 119 IGB_DEVICE(I350_COPPER), 120 IGB_DEVICE(I350_FIBER), 121 IGB_DEVICE(I350_SERDES), 122 IGB_DEVICE(I350_SGMII), 123 IGB_DEVICE(I350_VF), 124 IGB_DEVICE(I210_COPPER), 125 IGB_DEVICE(I210_COPPER_IT), 126 IGB_DEVICE(I210_COPPER_OEM1), 127 IGB_DEVICE(I210_FIBER), 128 IGB_DEVICE(I210_SERDES), 129 IGB_DEVICE(I210_SGMII), 130 IGB_DEVICE(I211_COPPER), 131 132 /* required last entry */ 133 IGB_DEVICE_NULL 134 }; 135 136 static int igb_probe(device_t); 137 static int igb_attach(device_t); 138 static int igb_detach(device_t); 139 static int igb_shutdown(device_t); 140 static int igb_suspend(device_t); 141 static int igb_resume(device_t); 142 143 static boolean_t igb_is_valid_ether_addr(const uint8_t *); 144 static void igb_setup_ifp(struct igb_softc *); 145 static boolean_t igb_txcsum_ctx(struct igb_tx_ring *, struct mbuf *); 146 static int igb_tso_pullup(struct igb_tx_ring *, struct mbuf **); 147 static void igb_tso_ctx(struct igb_tx_ring *, struct mbuf *, uint32_t *); 148 static void igb_add_sysctl(struct igb_softc *); 149 static int igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS); 150 static int igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS); 151 static int igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS); 152 static int igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 153 static int igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 154 static void igb_set_ring_inuse(struct igb_softc *, boolean_t); 155 static int igb_get_rxring_inuse(const struct igb_softc *, boolean_t); 156 static int igb_get_txring_inuse(const struct igb_softc *, boolean_t); 157 static void igb_set_timer_cpuid(struct igb_softc *, boolean_t); 158 #ifdef IFPOLL_ENABLE 159 static int igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS); 160 static int igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS); 161 #endif 162 163 static void igb_vf_init_stats(struct igb_softc *); 164 static void igb_reset(struct igb_softc *); 165 static void igb_update_stats_counters(struct igb_softc *); 166 static void igb_update_vf_stats_counters(struct igb_softc *); 167 static void igb_update_link_status(struct igb_softc *); 168 static void igb_init_tx_unit(struct igb_softc *); 169 static void igb_init_rx_unit(struct igb_softc *); 170 171 static void igb_set_vlan(struct igb_softc *); 172 static void igb_set_multi(struct igb_softc *); 173 static void igb_set_promisc(struct igb_softc *); 174 static void igb_disable_promisc(struct igb_softc *); 175 176 static int igb_alloc_rings(struct igb_softc *); 177 static void igb_free_rings(struct igb_softc *); 178 static int igb_create_tx_ring(struct igb_tx_ring *); 179 static int igb_create_rx_ring(struct igb_rx_ring *); 180 static void igb_free_tx_ring(struct igb_tx_ring *); 181 static void igb_free_rx_ring(struct igb_rx_ring *); 182 static void igb_destroy_tx_ring(struct igb_tx_ring *, int); 183 static void igb_destroy_rx_ring(struct igb_rx_ring *, int); 184 static void igb_init_tx_ring(struct igb_tx_ring *); 185 static int igb_init_rx_ring(struct igb_rx_ring *); 186 static int igb_newbuf(struct igb_rx_ring *, int, boolean_t); 187 static int igb_encap(struct igb_tx_ring *, struct mbuf **, int *, int *); 188 static void igb_rx_refresh(struct igb_rx_ring *, int); 189 static void igb_setup_serializer(struct igb_softc *); 190 191 static void igb_stop(struct igb_softc *); 192 static void igb_init(void *); 193 static int igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 194 static void igb_media_status(struct ifnet *, struct ifmediareq *); 195 static int igb_media_change(struct ifnet *); 196 static void igb_timer(void *); 197 static void igb_watchdog(struct ifaltq_subque *); 198 static void igb_start(struct ifnet *, struct ifaltq_subque *); 199 #ifdef IFPOLL_ENABLE 200 static void igb_npoll(struct ifnet *, struct ifpoll_info *); 201 static void igb_npoll_rx(struct ifnet *, void *, int); 202 static void igb_npoll_tx(struct ifnet *, void *, int); 203 static void igb_npoll_status(struct ifnet *); 204 #endif 205 static void igb_serialize(struct ifnet *, enum ifnet_serialize); 206 static void igb_deserialize(struct ifnet *, enum ifnet_serialize); 207 static int igb_tryserialize(struct ifnet *, enum ifnet_serialize); 208 #ifdef INVARIANTS 209 static void igb_serialize_assert(struct ifnet *, enum ifnet_serialize, 210 boolean_t); 211 #endif 212 213 static void igb_intr(void *); 214 static void igb_intr_shared(void *); 215 static void igb_rxeof(struct igb_rx_ring *, int); 216 static void igb_txeof(struct igb_tx_ring *); 217 static void igb_set_eitr(struct igb_softc *, int, int); 218 static void igb_enable_intr(struct igb_softc *); 219 static void igb_disable_intr(struct igb_softc *); 220 static void igb_init_unshared_intr(struct igb_softc *); 221 static void igb_init_intr(struct igb_softc *); 222 static int igb_setup_intr(struct igb_softc *); 223 static void igb_set_txintr_mask(struct igb_tx_ring *, int *, int); 224 static void igb_set_rxintr_mask(struct igb_rx_ring *, int *, int); 225 static void igb_set_intr_mask(struct igb_softc *); 226 static int igb_alloc_intr(struct igb_softc *); 227 static void igb_free_intr(struct igb_softc *); 228 static void igb_teardown_intr(struct igb_softc *); 229 static void igb_msix_try_alloc(struct igb_softc *); 230 static void igb_msix_rx_conf(struct igb_softc *, int, int *, int); 231 static void igb_msix_tx_conf(struct igb_softc *, int, int *, int); 232 static void igb_msix_free(struct igb_softc *, boolean_t); 233 static int igb_msix_setup(struct igb_softc *); 234 static void igb_msix_teardown(struct igb_softc *, int); 235 static void igb_msix_rx(void *); 236 static void igb_msix_tx(void *); 237 static void igb_msix_status(void *); 238 static void igb_msix_rxtx(void *); 239 240 /* Management and WOL Support */ 241 static void igb_get_mgmt(struct igb_softc *); 242 static void igb_rel_mgmt(struct igb_softc *); 243 static void igb_get_hw_control(struct igb_softc *); 244 static void igb_rel_hw_control(struct igb_softc *); 245 static void igb_enable_wol(device_t); 246 247 static device_method_t igb_methods[] = { 248 /* Device interface */ 249 DEVMETHOD(device_probe, igb_probe), 250 DEVMETHOD(device_attach, igb_attach), 251 DEVMETHOD(device_detach, igb_detach), 252 DEVMETHOD(device_shutdown, igb_shutdown), 253 DEVMETHOD(device_suspend, igb_suspend), 254 DEVMETHOD(device_resume, igb_resume), 255 DEVMETHOD_END 256 }; 257 258 static driver_t igb_driver = { 259 "igb", 260 igb_methods, 261 sizeof(struct igb_softc), 262 }; 263 264 static devclass_t igb_devclass; 265 266 DECLARE_DUMMY_MODULE(if_igb); 267 MODULE_DEPEND(igb, ig_hal, 1, 1, 1); 268 DRIVER_MODULE(if_igb, pci, igb_driver, igb_devclass, NULL, NULL); 269 270 static int igb_rxd = IGB_DEFAULT_RXD; 271 static int igb_txd = IGB_DEFAULT_TXD; 272 static int igb_rxr = 0; 273 static int igb_txr = 0; 274 static int igb_msi_enable = 1; 275 static int igb_msix_enable = 1; 276 static int igb_eee_disabled = 1; /* Energy Efficient Ethernet */ 277 static int igb_fc_setting = e1000_fc_full; 278 279 /* 280 * DMA Coalescing, only for i350 - default to off, 281 * this feature is for power savings 282 */ 283 static int igb_dma_coalesce = 0; 284 285 TUNABLE_INT("hw.igb.rxd", &igb_rxd); 286 TUNABLE_INT("hw.igb.txd", &igb_txd); 287 TUNABLE_INT("hw.igb.rxr", &igb_rxr); 288 TUNABLE_INT("hw.igb.txr", &igb_txr); 289 TUNABLE_INT("hw.igb.msi.enable", &igb_msi_enable); 290 TUNABLE_INT("hw.igb.msix.enable", &igb_msix_enable); 291 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting); 292 293 /* i350 specific */ 294 TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled); 295 TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce); 296 297 static __inline void 298 igb_rxcsum(uint32_t staterr, struct mbuf *mp) 299 { 300 /* Ignore Checksum bit is set */ 301 if (staterr & E1000_RXD_STAT_IXSM) 302 return; 303 304 if ((staterr & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) == 305 E1000_RXD_STAT_IPCS) 306 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; 307 308 if (staterr & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { 309 if ((staterr & E1000_RXDEXT_STATERR_TCPE) == 0) { 310 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | 311 CSUM_PSEUDO_HDR | CSUM_FRAG_NOT_CHECKED; 312 mp->m_pkthdr.csum_data = htons(0xffff); 313 } 314 } 315 } 316 317 static __inline struct pktinfo * 318 igb_rssinfo(struct mbuf *m, struct pktinfo *pi, 319 uint32_t hash, uint32_t hashtype, uint32_t staterr) 320 { 321 switch (hashtype) { 322 case E1000_RXDADV_RSSTYPE_IPV4_TCP: 323 pi->pi_netisr = NETISR_IP; 324 pi->pi_flags = 0; 325 pi->pi_l3proto = IPPROTO_TCP; 326 break; 327 328 case E1000_RXDADV_RSSTYPE_IPV4: 329 if (staterr & E1000_RXD_STAT_IXSM) 330 return NULL; 331 332 if ((staterr & 333 (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) == 334 E1000_RXD_STAT_TCPCS) { 335 pi->pi_netisr = NETISR_IP; 336 pi->pi_flags = 0; 337 pi->pi_l3proto = IPPROTO_UDP; 338 break; 339 } 340 /* FALL THROUGH */ 341 default: 342 return NULL; 343 } 344 345 m->m_flags |= M_HASH; 346 m->m_pkthdr.hash = toeplitz_hash(hash); 347 return pi; 348 } 349 350 static int 351 igb_probe(device_t dev) 352 { 353 const struct igb_device *d; 354 uint16_t vid, did; 355 356 vid = pci_get_vendor(dev); 357 did = pci_get_device(dev); 358 359 for (d = igb_devices; d->desc != NULL; ++d) { 360 if (vid == d->vid && did == d->did) { 361 device_set_desc(dev, d->desc); 362 return 0; 363 } 364 } 365 return ENXIO; 366 } 367 368 static int 369 igb_attach(device_t dev) 370 { 371 struct igb_softc *sc = device_get_softc(dev); 372 uint16_t eeprom_data; 373 int error = 0, i, ring_max; 374 #ifdef IFPOLL_ENABLE 375 int offset, offset_def; 376 #endif 377 378 #ifdef notyet 379 /* SYSCTL stuff */ 380 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 381 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 382 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 383 igb_sysctl_nvm_info, "I", "NVM Information"); 384 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 385 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 386 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW, 387 adapter, 0, igb_set_flowcntl, "I", "Flow Control"); 388 #endif 389 390 callout_init_mp(&sc->timer); 391 lwkt_serialize_init(&sc->main_serialize); 392 393 if_initname(&sc->arpcom.ac_if, device_get_name(dev), 394 device_get_unit(dev)); 395 sc->dev = sc->osdep.dev = dev; 396 397 /* 398 * Determine hardware and mac type 399 */ 400 sc->hw.vendor_id = pci_get_vendor(dev); 401 sc->hw.device_id = pci_get_device(dev); 402 sc->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 403 sc->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); 404 sc->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); 405 406 if (e1000_set_mac_type(&sc->hw)) 407 return ENXIO; 408 409 /* Are we a VF device? */ 410 if (sc->hw.mac.type == e1000_vfadapt || 411 sc->hw.mac.type == e1000_vfadapt_i350) 412 sc->vf_ifp = 1; 413 else 414 sc->vf_ifp = 0; 415 416 /* 417 * Configure total supported RX/TX ring count 418 */ 419 switch (sc->hw.mac.type) { 420 case e1000_82575: 421 ring_max = IGB_MAX_RING_82575; 422 break; 423 424 case e1000_82576: 425 ring_max = IGB_MAX_RING_82576; 426 break; 427 428 case e1000_82580: 429 ring_max = IGB_MAX_RING_82580; 430 break; 431 432 case e1000_i350: 433 ring_max = IGB_MAX_RING_I350; 434 break; 435 436 case e1000_i210: 437 ring_max = IGB_MAX_RING_I210; 438 break; 439 440 case e1000_i211: 441 ring_max = IGB_MAX_RING_I211; 442 break; 443 444 default: 445 ring_max = IGB_MIN_RING; 446 break; 447 } 448 449 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", igb_rxr); 450 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, ring_max); 451 #ifdef IGB_RSS_DEBUG 452 sc->rx_ring_cnt = device_getenv_int(dev, "rxr_debug", sc->rx_ring_cnt); 453 #endif 454 sc->rx_ring_inuse = sc->rx_ring_cnt; 455 456 sc->tx_ring_cnt = device_getenv_int(dev, "txr", igb_txr); 457 sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_max); 458 #ifdef IGB_TSS_DEBUG 459 sc->tx_ring_cnt = device_getenv_int(dev, "txr_debug", sc->tx_ring_cnt); 460 #endif 461 sc->tx_ring_inuse = sc->tx_ring_cnt; 462 463 /* Enable bus mastering */ 464 pci_enable_busmaster(dev); 465 466 /* 467 * Allocate IO memory 468 */ 469 sc->mem_rid = PCIR_BAR(0); 470 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, 471 RF_ACTIVE); 472 if (sc->mem_res == NULL) { 473 device_printf(dev, "Unable to allocate bus resource: memory\n"); 474 error = ENXIO; 475 goto failed; 476 } 477 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res); 478 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res); 479 480 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle; 481 482 /* Save PCI command register for Shared Code */ 483 sc->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 484 sc->hw.back = &sc->osdep; 485 486 /* Do Shared Code initialization */ 487 if (e1000_setup_init_funcs(&sc->hw, TRUE)) { 488 device_printf(dev, "Setup of Shared code failed\n"); 489 error = ENXIO; 490 goto failed; 491 } 492 493 e1000_get_bus_info(&sc->hw); 494 495 sc->hw.mac.autoneg = DO_AUTO_NEG; 496 sc->hw.phy.autoneg_wait_to_complete = FALSE; 497 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 498 499 /* Copper options */ 500 if (sc->hw.phy.media_type == e1000_media_type_copper) { 501 sc->hw.phy.mdix = AUTO_ALL_MODES; 502 sc->hw.phy.disable_polarity_correction = FALSE; 503 sc->hw.phy.ms_type = IGB_MASTER_SLAVE; 504 } 505 506 /* Set the frame limits assuming standard ethernet sized frames. */ 507 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; 508 509 /* Allocate RX/TX rings */ 510 error = igb_alloc_rings(sc); 511 if (error) 512 goto failed; 513 514 #ifdef IFPOLL_ENABLE 515 /* 516 * NPOLLING RX CPU offset 517 */ 518 if (sc->rx_ring_cnt == ncpus2) { 519 offset = 0; 520 } else { 521 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2; 522 offset = device_getenv_int(dev, "npoll.rxoff", offset_def); 523 if (offset >= ncpus2 || 524 offset % sc->rx_ring_cnt != 0) { 525 device_printf(dev, "invalid npoll.rxoff %d, use %d\n", 526 offset, offset_def); 527 offset = offset_def; 528 } 529 } 530 sc->rx_npoll_off = offset; 531 532 /* 533 * NPOLLING TX CPU offset 534 */ 535 if (sc->tx_ring_cnt == ncpus2) { 536 offset = 0; 537 } else { 538 offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2; 539 offset = device_getenv_int(dev, "npoll.txoff", offset_def); 540 if (offset >= ncpus2 || 541 offset % sc->tx_ring_cnt != 0) { 542 device_printf(dev, "invalid npoll.txoff %d, use %d\n", 543 offset, offset_def); 544 offset = offset_def; 545 } 546 } 547 sc->tx_npoll_off = offset; 548 #endif 549 550 /* Allocate interrupt */ 551 error = igb_alloc_intr(sc); 552 if (error) 553 goto failed; 554 555 /* Setup serializers */ 556 igb_setup_serializer(sc); 557 558 /* Allocate the appropriate stats memory */ 559 if (sc->vf_ifp) { 560 sc->stats = kmalloc(sizeof(struct e1000_vf_stats), M_DEVBUF, 561 M_WAITOK | M_ZERO); 562 igb_vf_init_stats(sc); 563 } else { 564 sc->stats = kmalloc(sizeof(struct e1000_hw_stats), M_DEVBUF, 565 M_WAITOK | M_ZERO); 566 } 567 568 /* Allocate multicast array memory. */ 569 sc->mta = kmalloc(ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, 570 M_DEVBUF, M_WAITOK); 571 572 /* Some adapter-specific advanced features */ 573 if (sc->hw.mac.type >= e1000_i350) { 574 #ifdef notyet 575 igb_set_sysctl_value(adapter, "dma_coalesce", 576 "configure dma coalesce", 577 &adapter->dma_coalesce, igb_dma_coalesce); 578 igb_set_sysctl_value(adapter, "eee_disabled", 579 "enable Energy Efficient Ethernet", 580 &adapter->hw.dev_spec._82575.eee_disable, 581 igb_eee_disabled); 582 #else 583 sc->dma_coalesce = igb_dma_coalesce; 584 sc->hw.dev_spec._82575.eee_disable = igb_eee_disabled; 585 #endif 586 if (sc->hw.phy.media_type == e1000_media_type_copper) 587 e1000_set_eee_i350(&sc->hw); 588 } 589 590 /* 591 * Start from a known state, this is important in reading the nvm and 592 * mac from that. 593 */ 594 e1000_reset_hw(&sc->hw); 595 596 /* Make sure we have a good EEPROM before we read from it */ 597 if (sc->hw.mac.type != e1000_i210 && sc->hw.mac.type != e1000_i211 && 598 e1000_validate_nvm_checksum(&sc->hw) < 0) { 599 /* 600 * Some PCI-E parts fail the first check due to 601 * the link being in sleep state, call it again, 602 * if it fails a second time its a real issue. 603 */ 604 if (e1000_validate_nvm_checksum(&sc->hw) < 0) { 605 device_printf(dev, 606 "The EEPROM Checksum Is Not Valid\n"); 607 error = EIO; 608 goto failed; 609 } 610 } 611 612 /* Copy the permanent MAC address out of the EEPROM */ 613 if (e1000_read_mac_addr(&sc->hw) < 0) { 614 device_printf(dev, "EEPROM read error while reading MAC" 615 " address\n"); 616 error = EIO; 617 goto failed; 618 } 619 if (!igb_is_valid_ether_addr(sc->hw.mac.addr)) { 620 device_printf(dev, "Invalid MAC address\n"); 621 error = EIO; 622 goto failed; 623 } 624 625 /* Setup OS specific network interface */ 626 igb_setup_ifp(sc); 627 628 /* Add sysctl tree, must after igb_setup_ifp() */ 629 igb_add_sysctl(sc); 630 631 /* Now get a good starting state */ 632 igb_reset(sc); 633 634 /* Initialize statistics */ 635 igb_update_stats_counters(sc); 636 637 sc->hw.mac.get_link_status = 1; 638 igb_update_link_status(sc); 639 640 /* Indicate SOL/IDER usage */ 641 if (e1000_check_reset_block(&sc->hw)) { 642 device_printf(dev, 643 "PHY reset is blocked due to SOL/IDER session.\n"); 644 } 645 646 /* Determine if we have to control management hardware */ 647 if (e1000_enable_mng_pass_thru(&sc->hw)) 648 sc->flags |= IGB_FLAG_HAS_MGMT; 649 650 /* 651 * Setup Wake-on-Lan 652 */ 653 /* APME bit in EEPROM is mapped to WUC.APME */ 654 eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC) & E1000_WUC_APME; 655 if (eeprom_data) 656 sc->wol = E1000_WUFC_MAG; 657 /* XXX disable WOL */ 658 sc->wol = 0; 659 660 #ifdef notyet 661 /* Register for VLAN events */ 662 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 663 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 664 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 665 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 666 #endif 667 668 #ifdef notyet 669 igb_add_hw_stats(adapter); 670 #endif 671 672 error = igb_setup_intr(sc); 673 if (error) { 674 ether_ifdetach(&sc->arpcom.ac_if); 675 goto failed; 676 } 677 678 for (i = 0; i < sc->tx_ring_cnt; ++i) { 679 struct ifaltq_subque *ifsq = 680 ifq_get_subq(&sc->arpcom.ac_if.if_snd, i); 681 struct igb_tx_ring *txr = &sc->tx_rings[i]; 682 683 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid); 684 ifsq_set_priv(ifsq, txr); 685 txr->ifsq = ifsq; 686 687 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, igb_watchdog); 688 } 689 690 return 0; 691 692 failed: 693 igb_detach(dev); 694 return error; 695 } 696 697 static int 698 igb_detach(device_t dev) 699 { 700 struct igb_softc *sc = device_get_softc(dev); 701 702 if (device_is_attached(dev)) { 703 struct ifnet *ifp = &sc->arpcom.ac_if; 704 705 ifnet_serialize_all(ifp); 706 707 igb_stop(sc); 708 709 e1000_phy_hw_reset(&sc->hw); 710 711 /* Give control back to firmware */ 712 igb_rel_mgmt(sc); 713 igb_rel_hw_control(sc); 714 715 if (sc->wol) { 716 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 717 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 718 igb_enable_wol(dev); 719 } 720 721 igb_teardown_intr(sc); 722 723 ifnet_deserialize_all(ifp); 724 725 ether_ifdetach(ifp); 726 } else if (sc->mem_res != NULL) { 727 igb_rel_hw_control(sc); 728 } 729 bus_generic_detach(dev); 730 731 if (sc->sysctl_tree != NULL) 732 sysctl_ctx_free(&sc->sysctl_ctx); 733 734 igb_free_intr(sc); 735 736 if (sc->msix_mem_res != NULL) { 737 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid, 738 sc->msix_mem_res); 739 } 740 if (sc->mem_res != NULL) { 741 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, 742 sc->mem_res); 743 } 744 745 igb_free_rings(sc); 746 747 if (sc->mta != NULL) 748 kfree(sc->mta, M_DEVBUF); 749 if (sc->stats != NULL) 750 kfree(sc->stats, M_DEVBUF); 751 if (sc->serializes != NULL) 752 kfree(sc->serializes, M_DEVBUF); 753 754 return 0; 755 } 756 757 static int 758 igb_shutdown(device_t dev) 759 { 760 return igb_suspend(dev); 761 } 762 763 static int 764 igb_suspend(device_t dev) 765 { 766 struct igb_softc *sc = device_get_softc(dev); 767 struct ifnet *ifp = &sc->arpcom.ac_if; 768 769 ifnet_serialize_all(ifp); 770 771 igb_stop(sc); 772 773 igb_rel_mgmt(sc); 774 igb_rel_hw_control(sc); 775 776 if (sc->wol) { 777 E1000_WRITE_REG(&sc->hw, E1000_WUC, E1000_WUC_PME_EN); 778 E1000_WRITE_REG(&sc->hw, E1000_WUFC, sc->wol); 779 igb_enable_wol(dev); 780 } 781 782 ifnet_deserialize_all(ifp); 783 784 return bus_generic_suspend(dev); 785 } 786 787 static int 788 igb_resume(device_t dev) 789 { 790 struct igb_softc *sc = device_get_softc(dev); 791 struct ifnet *ifp = &sc->arpcom.ac_if; 792 int i; 793 794 ifnet_serialize_all(ifp); 795 796 igb_init(sc); 797 igb_get_mgmt(sc); 798 799 for (i = 0; i < sc->tx_ring_inuse; ++i) 800 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 801 802 ifnet_deserialize_all(ifp); 803 804 return bus_generic_resume(dev); 805 } 806 807 static int 808 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 809 { 810 struct igb_softc *sc = ifp->if_softc; 811 struct ifreq *ifr = (struct ifreq *)data; 812 int max_frame_size, mask, reinit; 813 int error = 0; 814 815 ASSERT_IFNET_SERIALIZED_ALL(ifp); 816 817 switch (command) { 818 case SIOCSIFMTU: 819 max_frame_size = 9234; 820 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 821 ETHER_CRC_LEN) { 822 error = EINVAL; 823 break; 824 } 825 826 ifp->if_mtu = ifr->ifr_mtu; 827 sc->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + 828 ETHER_CRC_LEN; 829 830 if (ifp->if_flags & IFF_RUNNING) 831 igb_init(sc); 832 break; 833 834 case SIOCSIFFLAGS: 835 if (ifp->if_flags & IFF_UP) { 836 if (ifp->if_flags & IFF_RUNNING) { 837 if ((ifp->if_flags ^ sc->if_flags) & 838 (IFF_PROMISC | IFF_ALLMULTI)) { 839 igb_disable_promisc(sc); 840 igb_set_promisc(sc); 841 } 842 } else { 843 igb_init(sc); 844 } 845 } else if (ifp->if_flags & IFF_RUNNING) { 846 igb_stop(sc); 847 } 848 sc->if_flags = ifp->if_flags; 849 break; 850 851 case SIOCADDMULTI: 852 case SIOCDELMULTI: 853 if (ifp->if_flags & IFF_RUNNING) { 854 igb_disable_intr(sc); 855 igb_set_multi(sc); 856 #ifdef IFPOLL_ENABLE 857 if (!(ifp->if_flags & IFF_NPOLLING)) 858 #endif 859 igb_enable_intr(sc); 860 } 861 break; 862 863 case SIOCSIFMEDIA: 864 /* Check SOL/IDER usage */ 865 if (e1000_check_reset_block(&sc->hw)) { 866 if_printf(ifp, "Media change is " 867 "blocked due to SOL/IDER session.\n"); 868 break; 869 } 870 /* FALL THROUGH */ 871 872 case SIOCGIFMEDIA: 873 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 874 break; 875 876 case SIOCSIFCAP: 877 reinit = 0; 878 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 879 if (mask & IFCAP_RXCSUM) { 880 ifp->if_capenable ^= IFCAP_RXCSUM; 881 reinit = 1; 882 } 883 if (mask & IFCAP_VLAN_HWTAGGING) { 884 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 885 reinit = 1; 886 } 887 if (mask & IFCAP_TXCSUM) { 888 ifp->if_capenable ^= IFCAP_TXCSUM; 889 if (ifp->if_capenable & IFCAP_TXCSUM) 890 ifp->if_hwassist |= IGB_CSUM_FEATURES; 891 else 892 ifp->if_hwassist &= ~IGB_CSUM_FEATURES; 893 } 894 if (mask & IFCAP_TSO) { 895 ifp->if_capenable ^= IFCAP_TSO; 896 if (ifp->if_capenable & IFCAP_TSO) 897 ifp->if_hwassist |= CSUM_TSO; 898 else 899 ifp->if_hwassist &= ~CSUM_TSO; 900 } 901 if (mask & IFCAP_RSS) 902 ifp->if_capenable ^= IFCAP_RSS; 903 if (reinit && (ifp->if_flags & IFF_RUNNING)) 904 igb_init(sc); 905 break; 906 907 default: 908 error = ether_ioctl(ifp, command, data); 909 break; 910 } 911 return error; 912 } 913 914 static void 915 igb_init(void *xsc) 916 { 917 struct igb_softc *sc = xsc; 918 struct ifnet *ifp = &sc->arpcom.ac_if; 919 boolean_t polling; 920 int i; 921 922 ASSERT_IFNET_SERIALIZED_ALL(ifp); 923 924 igb_stop(sc); 925 926 /* Get the latest mac address, User can use a LAA */ 927 bcopy(IF_LLADDR(ifp), sc->hw.mac.addr, ETHER_ADDR_LEN); 928 929 /* Put the address into the Receive Address Array */ 930 e1000_rar_set(&sc->hw, sc->hw.mac.addr, 0); 931 932 igb_reset(sc); 933 igb_update_link_status(sc); 934 935 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 936 937 /* Configure for OS presence */ 938 igb_get_mgmt(sc); 939 940 polling = FALSE; 941 #ifdef IFPOLL_ENABLE 942 if (ifp->if_flags & IFF_NPOLLING) 943 polling = TRUE; 944 #endif 945 946 /* Configured used RX/TX rings */ 947 igb_set_ring_inuse(sc, polling); 948 ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1); 949 950 /* Initialize interrupt */ 951 igb_init_intr(sc); 952 953 /* Prepare transmit descriptors and buffers */ 954 for (i = 0; i < sc->tx_ring_inuse; ++i) 955 igb_init_tx_ring(&sc->tx_rings[i]); 956 igb_init_tx_unit(sc); 957 958 /* Setup Multicast table */ 959 igb_set_multi(sc); 960 961 #if 0 962 /* 963 * Figure out the desired mbuf pool 964 * for doing jumbo/packetsplit 965 */ 966 if (adapter->max_frame_size <= 2048) 967 adapter->rx_mbuf_sz = MCLBYTES; 968 else if (adapter->max_frame_size <= 4096) 969 adapter->rx_mbuf_sz = MJUMPAGESIZE; 970 else 971 adapter->rx_mbuf_sz = MJUM9BYTES; 972 #endif 973 974 /* Prepare receive descriptors and buffers */ 975 for (i = 0; i < sc->rx_ring_inuse; ++i) { 976 int error; 977 978 error = igb_init_rx_ring(&sc->rx_rings[i]); 979 if (error) { 980 if_printf(ifp, "Could not setup receive structures\n"); 981 igb_stop(sc); 982 return; 983 } 984 } 985 igb_init_rx_unit(sc); 986 987 /* Enable VLAN support */ 988 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) 989 igb_set_vlan(sc); 990 991 /* Don't lose promiscuous settings */ 992 igb_set_promisc(sc); 993 994 ifp->if_flags |= IFF_RUNNING; 995 for (i = 0; i < sc->tx_ring_inuse; ++i) { 996 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 997 ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog); 998 } 999 1000 igb_set_timer_cpuid(sc, polling); 1001 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1002 e1000_clear_hw_cntrs_base_generic(&sc->hw); 1003 1004 /* This clears any pending interrupts */ 1005 E1000_READ_REG(&sc->hw, E1000_ICR); 1006 1007 /* 1008 * Only enable interrupts if we are not polling, make sure 1009 * they are off otherwise. 1010 */ 1011 if (polling) { 1012 igb_disable_intr(sc); 1013 } else { 1014 igb_enable_intr(sc); 1015 E1000_WRITE_REG(&sc->hw, E1000_ICS, E1000_ICS_LSC); 1016 } 1017 1018 /* Set Energy Efficient Ethernet */ 1019 if (sc->hw.phy.media_type == e1000_media_type_copper) 1020 e1000_set_eee_i350(&sc->hw); 1021 } 1022 1023 static void 1024 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 1025 { 1026 struct igb_softc *sc = ifp->if_softc; 1027 1028 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1029 1030 igb_update_link_status(sc); 1031 1032 ifmr->ifm_status = IFM_AVALID; 1033 ifmr->ifm_active = IFM_ETHER; 1034 1035 if (!sc->link_active) 1036 return; 1037 1038 ifmr->ifm_status |= IFM_ACTIVE; 1039 1040 switch (sc->link_speed) { 1041 case 10: 1042 ifmr->ifm_active |= IFM_10_T; 1043 break; 1044 1045 case 100: 1046 /* 1047 * Support for 100Mb SFP - these are Fiber 1048 * but the media type appears as serdes 1049 */ 1050 if (sc->hw.phy.media_type == e1000_media_type_internal_serdes) 1051 ifmr->ifm_active |= IFM_100_FX; 1052 else 1053 ifmr->ifm_active |= IFM_100_TX; 1054 break; 1055 1056 case 1000: 1057 ifmr->ifm_active |= IFM_1000_T; 1058 break; 1059 } 1060 1061 if (sc->link_duplex == FULL_DUPLEX) 1062 ifmr->ifm_active |= IFM_FDX; 1063 else 1064 ifmr->ifm_active |= IFM_HDX; 1065 } 1066 1067 static int 1068 igb_media_change(struct ifnet *ifp) 1069 { 1070 struct igb_softc *sc = ifp->if_softc; 1071 struct ifmedia *ifm = &sc->media; 1072 1073 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1074 1075 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1076 return EINVAL; 1077 1078 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1079 case IFM_AUTO: 1080 sc->hw.mac.autoneg = DO_AUTO_NEG; 1081 sc->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1082 break; 1083 1084 case IFM_1000_LX: 1085 case IFM_1000_SX: 1086 case IFM_1000_T: 1087 sc->hw.mac.autoneg = DO_AUTO_NEG; 1088 sc->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1089 break; 1090 1091 case IFM_100_TX: 1092 sc->hw.mac.autoneg = FALSE; 1093 sc->hw.phy.autoneg_advertised = 0; 1094 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1095 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1096 else 1097 sc->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1098 break; 1099 1100 case IFM_10_T: 1101 sc->hw.mac.autoneg = FALSE; 1102 sc->hw.phy.autoneg_advertised = 0; 1103 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1104 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1105 else 1106 sc->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1107 break; 1108 1109 default: 1110 if_printf(ifp, "Unsupported media type\n"); 1111 break; 1112 } 1113 1114 igb_init(sc); 1115 1116 return 0; 1117 } 1118 1119 static void 1120 igb_set_promisc(struct igb_softc *sc) 1121 { 1122 struct ifnet *ifp = &sc->arpcom.ac_if; 1123 struct e1000_hw *hw = &sc->hw; 1124 uint32_t reg; 1125 1126 if (sc->vf_ifp) { 1127 e1000_promisc_set_vf(hw, e1000_promisc_enabled); 1128 return; 1129 } 1130 1131 reg = E1000_READ_REG(hw, E1000_RCTL); 1132 if (ifp->if_flags & IFF_PROMISC) { 1133 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 1134 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1135 } else if (ifp->if_flags & IFF_ALLMULTI) { 1136 reg |= E1000_RCTL_MPE; 1137 reg &= ~E1000_RCTL_UPE; 1138 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1139 } 1140 } 1141 1142 static void 1143 igb_disable_promisc(struct igb_softc *sc) 1144 { 1145 struct e1000_hw *hw = &sc->hw; 1146 uint32_t reg; 1147 1148 if (sc->vf_ifp) { 1149 e1000_promisc_set_vf(hw, e1000_promisc_disabled); 1150 return; 1151 } 1152 reg = E1000_READ_REG(hw, E1000_RCTL); 1153 reg &= ~E1000_RCTL_UPE; 1154 reg &= ~E1000_RCTL_MPE; 1155 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1156 } 1157 1158 static void 1159 igb_set_multi(struct igb_softc *sc) 1160 { 1161 struct ifnet *ifp = &sc->arpcom.ac_if; 1162 struct ifmultiaddr *ifma; 1163 uint32_t reg_rctl = 0; 1164 uint8_t *mta; 1165 int mcnt = 0; 1166 1167 mta = sc->mta; 1168 bzero(mta, ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); 1169 1170 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1171 if (ifma->ifma_addr->sa_family != AF_LINK) 1172 continue; 1173 1174 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 1175 break; 1176 1177 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1178 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); 1179 mcnt++; 1180 } 1181 1182 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 1183 reg_rctl = E1000_READ_REG(&sc->hw, E1000_RCTL); 1184 reg_rctl |= E1000_RCTL_MPE; 1185 E1000_WRITE_REG(&sc->hw, E1000_RCTL, reg_rctl); 1186 } else { 1187 e1000_update_mc_addr_list(&sc->hw, mta, mcnt); 1188 } 1189 } 1190 1191 static void 1192 igb_timer(void *xsc) 1193 { 1194 struct igb_softc *sc = xsc; 1195 1196 lwkt_serialize_enter(&sc->main_serialize); 1197 1198 igb_update_link_status(sc); 1199 igb_update_stats_counters(sc); 1200 1201 callout_reset_bycpu(&sc->timer, hz, igb_timer, sc, sc->timer_cpuid); 1202 1203 lwkt_serialize_exit(&sc->main_serialize); 1204 } 1205 1206 static void 1207 igb_update_link_status(struct igb_softc *sc) 1208 { 1209 struct ifnet *ifp = &sc->arpcom.ac_if; 1210 struct e1000_hw *hw = &sc->hw; 1211 uint32_t link_check, thstat, ctrl; 1212 1213 link_check = thstat = ctrl = 0; 1214 1215 /* Get the cached link value or read for real */ 1216 switch (hw->phy.media_type) { 1217 case e1000_media_type_copper: 1218 if (hw->mac.get_link_status) { 1219 /* Do the work to read phy */ 1220 e1000_check_for_link(hw); 1221 link_check = !hw->mac.get_link_status; 1222 } else { 1223 link_check = TRUE; 1224 } 1225 break; 1226 1227 case e1000_media_type_fiber: 1228 e1000_check_for_link(hw); 1229 link_check = E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU; 1230 break; 1231 1232 case e1000_media_type_internal_serdes: 1233 e1000_check_for_link(hw); 1234 link_check = hw->mac.serdes_has_link; 1235 break; 1236 1237 /* VF device is type_unknown */ 1238 case e1000_media_type_unknown: 1239 e1000_check_for_link(hw); 1240 link_check = !hw->mac.get_link_status; 1241 /* Fall thru */ 1242 default: 1243 break; 1244 } 1245 1246 /* Check for thermal downshift or shutdown */ 1247 if (hw->mac.type == e1000_i350) { 1248 thstat = E1000_READ_REG(hw, E1000_THSTAT); 1249 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); 1250 } 1251 1252 /* Now we check if a transition has happened */ 1253 if (link_check && sc->link_active == 0) { 1254 e1000_get_speed_and_duplex(hw, 1255 &sc->link_speed, &sc->link_duplex); 1256 if (bootverbose) { 1257 const char *flowctl; 1258 1259 /* Get the flow control for display */ 1260 switch (hw->fc.current_mode) { 1261 case e1000_fc_rx_pause: 1262 flowctl = "RX"; 1263 break; 1264 1265 case e1000_fc_tx_pause: 1266 flowctl = "TX"; 1267 break; 1268 1269 case e1000_fc_full: 1270 flowctl = "Full"; 1271 break; 1272 1273 default: 1274 flowctl = "None"; 1275 break; 1276 } 1277 1278 if_printf(ifp, "Link is up %d Mbps %s, " 1279 "Flow control: %s\n", 1280 sc->link_speed, 1281 sc->link_duplex == FULL_DUPLEX ? 1282 "Full Duplex" : "Half Duplex", 1283 flowctl); 1284 } 1285 sc->link_active = 1; 1286 1287 ifp->if_baudrate = sc->link_speed * 1000000; 1288 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1289 (thstat & E1000_THSTAT_LINK_THROTTLE)) 1290 if_printf(ifp, "Link: thermal downshift\n"); 1291 /* This can sleep */ 1292 ifp->if_link_state = LINK_STATE_UP; 1293 if_link_state_change(ifp); 1294 } else if (!link_check && sc->link_active == 1) { 1295 ifp->if_baudrate = sc->link_speed = 0; 1296 sc->link_duplex = 0; 1297 if (bootverbose) 1298 if_printf(ifp, "Link is Down\n"); 1299 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 1300 (thstat & E1000_THSTAT_PWR_DOWN)) 1301 if_printf(ifp, "Link: thermal shutdown\n"); 1302 sc->link_active = 0; 1303 /* This can sleep */ 1304 ifp->if_link_state = LINK_STATE_DOWN; 1305 if_link_state_change(ifp); 1306 } 1307 } 1308 1309 static void 1310 igb_stop(struct igb_softc *sc) 1311 { 1312 struct ifnet *ifp = &sc->arpcom.ac_if; 1313 int i; 1314 1315 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1316 1317 igb_disable_intr(sc); 1318 1319 callout_stop(&sc->timer); 1320 1321 ifp->if_flags &= ~IFF_RUNNING; 1322 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1323 ifsq_clr_oactive(sc->tx_rings[i].ifsq); 1324 ifsq_watchdog_stop(&sc->tx_rings[i].tx_watchdog); 1325 sc->tx_rings[i].tx_flags &= ~IGB_TXFLAG_ENABLED; 1326 } 1327 1328 e1000_reset_hw(&sc->hw); 1329 E1000_WRITE_REG(&sc->hw, E1000_WUC, 0); 1330 1331 e1000_led_off(&sc->hw); 1332 e1000_cleanup_led(&sc->hw); 1333 1334 for (i = 0; i < sc->tx_ring_cnt; ++i) 1335 igb_free_tx_ring(&sc->tx_rings[i]); 1336 for (i = 0; i < sc->rx_ring_cnt; ++i) 1337 igb_free_rx_ring(&sc->rx_rings[i]); 1338 } 1339 1340 static void 1341 igb_reset(struct igb_softc *sc) 1342 { 1343 struct ifnet *ifp = &sc->arpcom.ac_if; 1344 struct e1000_hw *hw = &sc->hw; 1345 struct e1000_fc_info *fc = &hw->fc; 1346 uint32_t pba = 0; 1347 uint16_t hwm; 1348 1349 /* Let the firmware know the OS is in control */ 1350 igb_get_hw_control(sc); 1351 1352 /* 1353 * Packet Buffer Allocation (PBA) 1354 * Writing PBA sets the receive portion of the buffer 1355 * the remainder is used for the transmit buffer. 1356 */ 1357 switch (hw->mac.type) { 1358 case e1000_82575: 1359 pba = E1000_PBA_32K; 1360 break; 1361 1362 case e1000_82576: 1363 case e1000_vfadapt: 1364 pba = E1000_READ_REG(hw, E1000_RXPBS); 1365 pba &= E1000_RXPBS_SIZE_MASK_82576; 1366 break; 1367 1368 case e1000_82580: 1369 case e1000_i350: 1370 case e1000_vfadapt_i350: 1371 pba = E1000_READ_REG(hw, E1000_RXPBS); 1372 pba = e1000_rxpbs_adjust_82580(pba); 1373 break; 1374 1375 case e1000_i210: 1376 case e1000_i211: 1377 pba = E1000_PBA_34K; 1378 break; 1379 1380 default: 1381 break; 1382 } 1383 1384 /* Special needs in case of Jumbo frames */ 1385 if (hw->mac.type == e1000_82575 && ifp->if_mtu > ETHERMTU) { 1386 uint32_t tx_space, min_tx, min_rx; 1387 1388 pba = E1000_READ_REG(hw, E1000_PBA); 1389 tx_space = pba >> 16; 1390 pba &= 0xffff; 1391 1392 min_tx = (sc->max_frame_size + 1393 sizeof(struct e1000_tx_desc) - ETHER_CRC_LEN) * 2; 1394 min_tx = roundup2(min_tx, 1024); 1395 min_tx >>= 10; 1396 min_rx = sc->max_frame_size; 1397 min_rx = roundup2(min_rx, 1024); 1398 min_rx >>= 10; 1399 if (tx_space < min_tx && (min_tx - tx_space) < pba) { 1400 pba = pba - (min_tx - tx_space); 1401 /* 1402 * if short on rx space, rx wins 1403 * and must trump tx adjustment 1404 */ 1405 if (pba < min_rx) 1406 pba = min_rx; 1407 } 1408 E1000_WRITE_REG(hw, E1000_PBA, pba); 1409 } 1410 1411 /* 1412 * These parameters control the automatic generation (Tx) and 1413 * response (Rx) to Ethernet PAUSE frames. 1414 * - High water mark should allow for at least two frames to be 1415 * received after sending an XOFF. 1416 * - Low water mark works best when it is very near the high water mark. 1417 * This allows the receiver to restart by sending XON when it has 1418 * drained a bit. 1419 */ 1420 hwm = min(((pba << 10) * 9 / 10), 1421 ((pba << 10) - 2 * sc->max_frame_size)); 1422 1423 if (hw->mac.type < e1000_82576) { 1424 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ 1425 fc->low_water = fc->high_water - 8; 1426 } else { 1427 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1428 fc->low_water = fc->high_water - 16; 1429 } 1430 fc->pause_time = IGB_FC_PAUSE_TIME; 1431 fc->send_xon = TRUE; 1432 fc->requested_mode = e1000_fc_default; 1433 1434 /* Issue a global reset */ 1435 e1000_reset_hw(hw); 1436 E1000_WRITE_REG(hw, E1000_WUC, 0); 1437 1438 if (e1000_init_hw(hw) < 0) 1439 if_printf(ifp, "Hardware Initialization Failed\n"); 1440 1441 /* Setup DMA Coalescing */ 1442 if (hw->mac.type > e1000_82580 && hw->mac.type != e1000_i211) { 1443 uint32_t dmac; 1444 uint32_t reg; 1445 1446 if (sc->dma_coalesce == 0) { 1447 /* 1448 * Disabled 1449 */ 1450 reg = E1000_READ_REG(hw, E1000_DMACR); 1451 reg &= ~E1000_DMACR_DMAC_EN; 1452 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1453 goto reset_out; 1454 } 1455 1456 /* Set starting thresholds */ 1457 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); 1458 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); 1459 1460 hwm = 64 * pba - sc->max_frame_size / 16; 1461 if (hwm < 64 * (pba - 6)) 1462 hwm = 64 * (pba - 6); 1463 reg = E1000_READ_REG(hw, E1000_FCRTC); 1464 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 1465 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) 1466 & E1000_FCRTC_RTH_COAL_MASK); 1467 E1000_WRITE_REG(hw, E1000_FCRTC, reg); 1468 1469 dmac = pba - sc->max_frame_size / 512; 1470 if (dmac < pba - 10) 1471 dmac = pba - 10; 1472 reg = E1000_READ_REG(hw, E1000_DMACR); 1473 reg &= ~E1000_DMACR_DMACTHR_MASK; 1474 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) 1475 & E1000_DMACR_DMACTHR_MASK); 1476 /* Transition to L0x or L1 if available.. */ 1477 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 1478 /* timer = value in sc->dma_coalesce in 32usec intervals */ 1479 reg |= (sc->dma_coalesce >> 5); 1480 E1000_WRITE_REG(hw, E1000_DMACR, reg); 1481 1482 /* Set the interval before transition */ 1483 reg = E1000_READ_REG(hw, E1000_DMCTLX); 1484 reg |= 0x80000004; 1485 E1000_WRITE_REG(hw, E1000_DMCTLX, reg); 1486 1487 /* Free space in tx packet buffer to wake from DMA coal */ 1488 E1000_WRITE_REG(hw, E1000_DMCTXTH, 1489 (20480 - (2 * sc->max_frame_size)) >> 6); 1490 1491 /* Make low power state decision controlled by DMA coal */ 1492 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1493 reg &= ~E1000_PCIEMISC_LX_DECISION; 1494 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); 1495 if_printf(ifp, "DMA Coalescing enabled\n"); 1496 } else if (hw->mac.type == e1000_82580) { 1497 uint32_t reg = E1000_READ_REG(hw, E1000_PCIEMISC); 1498 1499 E1000_WRITE_REG(hw, E1000_DMACR, 0); 1500 E1000_WRITE_REG(hw, E1000_PCIEMISC, 1501 reg & ~E1000_PCIEMISC_LX_DECISION); 1502 } 1503 1504 reset_out: 1505 E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); 1506 e1000_get_phy_info(hw); 1507 e1000_check_for_link(hw); 1508 } 1509 1510 static void 1511 igb_setup_ifp(struct igb_softc *sc) 1512 { 1513 struct ifnet *ifp = &sc->arpcom.ac_if; 1514 1515 ifp->if_softc = sc; 1516 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1517 ifp->if_init = igb_init; 1518 ifp->if_ioctl = igb_ioctl; 1519 ifp->if_start = igb_start; 1520 ifp->if_serialize = igb_serialize; 1521 ifp->if_deserialize = igb_deserialize; 1522 ifp->if_tryserialize = igb_tryserialize; 1523 #ifdef INVARIANTS 1524 ifp->if_serialize_assert = igb_serialize_assert; 1525 #endif 1526 #ifdef IFPOLL_ENABLE 1527 ifp->if_npoll = igb_npoll; 1528 #endif 1529 1530 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].num_tx_desc - 1); 1531 ifq_set_ready(&ifp->if_snd); 1532 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt); 1533 1534 ifp->if_mapsubq = ifq_mapsubq_mask; 1535 ifq_set_subq_mask(&ifp->if_snd, 0); 1536 1537 ether_ifattach(ifp, sc->hw.mac.addr, NULL); 1538 1539 ifp->if_capabilities = 1540 IFCAP_HWCSUM | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_TSO; 1541 if (IGB_ENABLE_HWRSS(sc)) 1542 ifp->if_capabilities |= IFCAP_RSS; 1543 ifp->if_capenable = ifp->if_capabilities; 1544 ifp->if_hwassist = IGB_CSUM_FEATURES | CSUM_TSO; 1545 1546 /* 1547 * Tell the upper layer(s) we support long frames 1548 */ 1549 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 1550 1551 /* 1552 * Specify the media types supported by this adapter and register 1553 * callbacks to update media and link information 1554 */ 1555 ifmedia_init(&sc->media, IFM_IMASK, igb_media_change, igb_media_status); 1556 if (sc->hw.phy.media_type == e1000_media_type_fiber || 1557 sc->hw.phy.media_type == e1000_media_type_internal_serdes) { 1558 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 1559 0, NULL); 1560 ifmedia_add(&sc->media, IFM_ETHER | IFM_1000_SX, 0, NULL); 1561 } else { 1562 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T, 0, NULL); 1563 ifmedia_add(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX, 1564 0, NULL); 1565 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX, 0, NULL); 1566 ifmedia_add(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 1567 0, NULL); 1568 if (sc->hw.phy.type != e1000_phy_ife) { 1569 ifmedia_add(&sc->media, 1570 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 1571 ifmedia_add(&sc->media, 1572 IFM_ETHER | IFM_1000_T, 0, NULL); 1573 } 1574 } 1575 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 1576 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); 1577 } 1578 1579 static void 1580 igb_add_sysctl(struct igb_softc *sc) 1581 { 1582 char node[32]; 1583 int i; 1584 1585 sysctl_ctx_init(&sc->sysctl_ctx); 1586 sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, 1587 SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, 1588 device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1589 if (sc->sysctl_tree == NULL) { 1590 device_printf(sc->dev, "can't add sysctl node\n"); 1591 return; 1592 } 1593 1594 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1595 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings"); 1596 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1597 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0, 1598 "# of RX rings used"); 1599 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1600 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings"); 1601 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1602 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0, 1603 "# of TX rings used"); 1604 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1605 OID_AUTO, "rxd", CTLFLAG_RD, &sc->rx_rings[0].num_rx_desc, 0, 1606 "# of RX descs"); 1607 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1608 OID_AUTO, "txd", CTLFLAG_RD, &sc->tx_rings[0].num_tx_desc, 0, 1609 "# of TX descs"); 1610 1611 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 1612 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1613 SYSCTL_CHILDREN(sc->sysctl_tree), 1614 OID_AUTO, "intr_rate", CTLTYPE_INT | CTLFLAG_RW, 1615 sc, 0, igb_sysctl_intr_rate, "I", "interrupt rate"); 1616 } else { 1617 for (i = 0; i < sc->msix_cnt; ++i) { 1618 struct igb_msix_data *msix = &sc->msix_data[i]; 1619 1620 ksnprintf(node, sizeof(node), "msix%d_rate", i); 1621 SYSCTL_ADD_PROC(&sc->sysctl_ctx, 1622 SYSCTL_CHILDREN(sc->sysctl_tree), 1623 OID_AUTO, node, CTLTYPE_INT | CTLFLAG_RW, 1624 msix, 0, igb_sysctl_msix_rate, "I", 1625 msix->msix_rate_desc); 1626 } 1627 } 1628 1629 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1630 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1631 sc, 0, igb_sysctl_tx_intr_nsegs, "I", 1632 "# of segments per TX interrupt"); 1633 1634 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1635 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1636 sc, 0, igb_sysctl_tx_wreg_nsegs, "I", 1637 "# of segments sent before write to hardware register"); 1638 1639 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1640 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 1641 sc, 0, igb_sysctl_rx_wreg_nsegs, "I", 1642 "# of segments received before write to hardware register"); 1643 1644 #ifdef IFPOLL_ENABLE 1645 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1646 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW, 1647 sc, 0, igb_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset"); 1648 SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1649 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW, 1650 sc, 0, igb_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset"); 1651 #endif 1652 1653 #ifdef IGB_RSS_DEBUG 1654 SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), 1655 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0, 1656 "RSS debug level"); 1657 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1658 ksnprintf(node, sizeof(node), "rx%d_pkt", i); 1659 SYSCTL_ADD_ULONG(&sc->sysctl_ctx, 1660 SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node, 1661 CTLFLAG_RW, &sc->rx_rings[i].rx_packets, "RXed packets"); 1662 } 1663 #endif 1664 #ifdef IGB_TSS_DEBUG 1665 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1666 ksnprintf(node, sizeof(node), "tx%d_pkt", i); 1667 SYSCTL_ADD_ULONG(&sc->sysctl_ctx, 1668 SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, node, 1669 CTLFLAG_RW, &sc->tx_rings[i].tx_packets, "TXed packets"); 1670 } 1671 #endif 1672 } 1673 1674 static int 1675 igb_alloc_rings(struct igb_softc *sc) 1676 { 1677 int error, i; 1678 1679 /* 1680 * Create top level busdma tag 1681 */ 1682 error = bus_dma_tag_create(NULL, 1, 0, 1683 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1684 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, 1685 &sc->parent_tag); 1686 if (error) { 1687 device_printf(sc->dev, "could not create top level DMA tag\n"); 1688 return error; 1689 } 1690 1691 /* 1692 * Allocate TX descriptor rings and buffers 1693 */ 1694 sc->tx_rings = kmalloc_cachealign( 1695 sizeof(struct igb_tx_ring) * sc->tx_ring_cnt, 1696 M_DEVBUF, M_WAITOK | M_ZERO); 1697 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1698 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1699 1700 /* Set up some basics */ 1701 txr->sc = sc; 1702 txr->me = i; 1703 lwkt_serialize_init(&txr->tx_serialize); 1704 1705 error = igb_create_tx_ring(txr); 1706 if (error) 1707 return error; 1708 } 1709 1710 /* 1711 * Allocate RX descriptor rings and buffers 1712 */ 1713 sc->rx_rings = kmalloc_cachealign( 1714 sizeof(struct igb_rx_ring) * sc->rx_ring_cnt, 1715 M_DEVBUF, M_WAITOK | M_ZERO); 1716 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1717 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1718 1719 /* Set up some basics */ 1720 rxr->sc = sc; 1721 rxr->me = i; 1722 lwkt_serialize_init(&rxr->rx_serialize); 1723 1724 error = igb_create_rx_ring(rxr); 1725 if (error) 1726 return error; 1727 } 1728 1729 return 0; 1730 } 1731 1732 static void 1733 igb_free_rings(struct igb_softc *sc) 1734 { 1735 int i; 1736 1737 if (sc->tx_rings != NULL) { 1738 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1739 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1740 1741 igb_destroy_tx_ring(txr, txr->num_tx_desc); 1742 } 1743 kfree(sc->tx_rings, M_DEVBUF); 1744 } 1745 1746 if (sc->rx_rings != NULL) { 1747 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1748 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 1749 1750 igb_destroy_rx_ring(rxr, rxr->num_rx_desc); 1751 } 1752 kfree(sc->rx_rings, M_DEVBUF); 1753 } 1754 } 1755 1756 static int 1757 igb_create_tx_ring(struct igb_tx_ring *txr) 1758 { 1759 int tsize, error, i, ntxd; 1760 1761 /* 1762 * Validate number of transmit descriptors. It must not exceed 1763 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 1764 */ 1765 ntxd = device_getenv_int(txr->sc->dev, "txd", igb_txd); 1766 if ((ntxd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN != 0 || 1767 ntxd > IGB_MAX_TXD || ntxd < IGB_MIN_TXD) { 1768 device_printf(txr->sc->dev, 1769 "Using %d TX descriptors instead of %d!\n", 1770 IGB_DEFAULT_TXD, ntxd); 1771 txr->num_tx_desc = IGB_DEFAULT_TXD; 1772 } else { 1773 txr->num_tx_desc = ntxd; 1774 } 1775 1776 /* 1777 * Allocate TX descriptor ring 1778 */ 1779 tsize = roundup2(txr->num_tx_desc * sizeof(union e1000_adv_tx_desc), 1780 IGB_DBA_ALIGN); 1781 txr->txdma.dma_vaddr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1782 IGB_DBA_ALIGN, tsize, BUS_DMA_WAITOK, 1783 &txr->txdma.dma_tag, &txr->txdma.dma_map, &txr->txdma.dma_paddr); 1784 if (txr->txdma.dma_vaddr == NULL) { 1785 device_printf(txr->sc->dev, 1786 "Unable to allocate TX Descriptor memory\n"); 1787 return ENOMEM; 1788 } 1789 txr->tx_base = txr->txdma.dma_vaddr; 1790 bzero(txr->tx_base, tsize); 1791 1792 tsize = __VM_CACHELINE_ALIGN( 1793 sizeof(struct igb_tx_buf) * txr->num_tx_desc); 1794 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO); 1795 1796 /* 1797 * Allocate TX head write-back buffer 1798 */ 1799 txr->tx_hdr = bus_dmamem_coherent_any(txr->sc->parent_tag, 1800 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK, 1801 &txr->tx_hdr_dtag, &txr->tx_hdr_dmap, &txr->tx_hdr_paddr); 1802 if (txr->tx_hdr == NULL) { 1803 device_printf(txr->sc->dev, 1804 "Unable to allocate TX head write-back buffer\n"); 1805 return ENOMEM; 1806 } 1807 1808 /* 1809 * Create DMA tag for TX buffers 1810 */ 1811 error = bus_dma_tag_create(txr->sc->parent_tag, 1812 1, 0, /* alignment, bounds */ 1813 BUS_SPACE_MAXADDR, /* lowaddr */ 1814 BUS_SPACE_MAXADDR, /* highaddr */ 1815 NULL, NULL, /* filter, filterarg */ 1816 IGB_TSO_SIZE, /* maxsize */ 1817 IGB_MAX_SCATTER, /* nsegments */ 1818 PAGE_SIZE, /* maxsegsize */ 1819 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 1820 BUS_DMA_ONEBPAGE, /* flags */ 1821 &txr->tx_tag); 1822 if (error) { 1823 device_printf(txr->sc->dev, "Unable to allocate TX DMA tag\n"); 1824 kfree(txr->tx_buf, M_DEVBUF); 1825 txr->tx_buf = NULL; 1826 return error; 1827 } 1828 1829 /* 1830 * Create DMA maps for TX buffers 1831 */ 1832 for (i = 0; i < txr->num_tx_desc; ++i) { 1833 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1834 1835 error = bus_dmamap_create(txr->tx_tag, 1836 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map); 1837 if (error) { 1838 device_printf(txr->sc->dev, 1839 "Unable to create TX DMA map\n"); 1840 igb_destroy_tx_ring(txr, i); 1841 return error; 1842 } 1843 } 1844 1845 if (txr->sc->hw.mac.type == e1000_82575) 1846 txr->tx_flags |= IGB_TXFLAG_TSO_IPLEN0; 1847 1848 /* 1849 * Initialize various watermark 1850 */ 1851 txr->spare_desc = IGB_TX_SPARE; 1852 txr->intr_nsegs = txr->num_tx_desc / 16; 1853 txr->wreg_nsegs = IGB_DEF_TXWREG_NSEGS; 1854 txr->oact_hi_desc = txr->num_tx_desc / 2; 1855 txr->oact_lo_desc = txr->num_tx_desc / 8; 1856 if (txr->oact_lo_desc > IGB_TX_OACTIVE_MAX) 1857 txr->oact_lo_desc = IGB_TX_OACTIVE_MAX; 1858 if (txr->oact_lo_desc < txr->spare_desc + IGB_TX_RESERVED) 1859 txr->oact_lo_desc = txr->spare_desc + IGB_TX_RESERVED; 1860 1861 return 0; 1862 } 1863 1864 static void 1865 igb_free_tx_ring(struct igb_tx_ring *txr) 1866 { 1867 int i; 1868 1869 for (i = 0; i < txr->num_tx_desc; ++i) { 1870 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1871 1872 if (txbuf->m_head != NULL) { 1873 bus_dmamap_unload(txr->tx_tag, txbuf->map); 1874 m_freem(txbuf->m_head); 1875 txbuf->m_head = NULL; 1876 } 1877 } 1878 } 1879 1880 static void 1881 igb_destroy_tx_ring(struct igb_tx_ring *txr, int ndesc) 1882 { 1883 int i; 1884 1885 if (txr->txdma.dma_vaddr != NULL) { 1886 bus_dmamap_unload(txr->txdma.dma_tag, txr->txdma.dma_map); 1887 bus_dmamem_free(txr->txdma.dma_tag, txr->txdma.dma_vaddr, 1888 txr->txdma.dma_map); 1889 bus_dma_tag_destroy(txr->txdma.dma_tag); 1890 txr->txdma.dma_vaddr = NULL; 1891 } 1892 1893 if (txr->tx_hdr != NULL) { 1894 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_dmap); 1895 bus_dmamem_free(txr->tx_hdr_dtag, txr->tx_hdr, 1896 txr->tx_hdr_dmap); 1897 bus_dma_tag_destroy(txr->tx_hdr_dtag); 1898 txr->tx_hdr = NULL; 1899 } 1900 1901 if (txr->tx_buf == NULL) 1902 return; 1903 1904 for (i = 0; i < ndesc; ++i) { 1905 struct igb_tx_buf *txbuf = &txr->tx_buf[i]; 1906 1907 KKASSERT(txbuf->m_head == NULL); 1908 bus_dmamap_destroy(txr->tx_tag, txbuf->map); 1909 } 1910 bus_dma_tag_destroy(txr->tx_tag); 1911 1912 kfree(txr->tx_buf, M_DEVBUF); 1913 txr->tx_buf = NULL; 1914 } 1915 1916 static void 1917 igb_init_tx_ring(struct igb_tx_ring *txr) 1918 { 1919 /* Clear the old descriptor contents */ 1920 bzero(txr->tx_base, 1921 sizeof(union e1000_adv_tx_desc) * txr->num_tx_desc); 1922 1923 /* Clear TX head write-back buffer */ 1924 *(txr->tx_hdr) = 0; 1925 1926 /* Reset indices */ 1927 txr->next_avail_desc = 0; 1928 txr->next_to_clean = 0; 1929 txr->tx_nsegs = 0; 1930 1931 /* Set number of descriptors available */ 1932 txr->tx_avail = txr->num_tx_desc; 1933 1934 /* Enable this TX ring */ 1935 txr->tx_flags |= IGB_TXFLAG_ENABLED; 1936 } 1937 1938 static void 1939 igb_init_tx_unit(struct igb_softc *sc) 1940 { 1941 struct e1000_hw *hw = &sc->hw; 1942 uint32_t tctl; 1943 int i; 1944 1945 /* Setup the Tx Descriptor Rings */ 1946 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1947 struct igb_tx_ring *txr = &sc->tx_rings[i]; 1948 uint64_t bus_addr = txr->txdma.dma_paddr; 1949 uint64_t hdr_paddr = txr->tx_hdr_paddr; 1950 uint32_t txdctl = 0; 1951 uint32_t dca_txctrl; 1952 1953 E1000_WRITE_REG(hw, E1000_TDLEN(i), 1954 txr->num_tx_desc * sizeof(struct e1000_tx_desc)); 1955 E1000_WRITE_REG(hw, E1000_TDBAH(i), 1956 (uint32_t)(bus_addr >> 32)); 1957 E1000_WRITE_REG(hw, E1000_TDBAL(i), 1958 (uint32_t)bus_addr); 1959 1960 /* Setup the HW Tx Head and Tail descriptor pointers */ 1961 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 1962 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 1963 1964 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); 1965 dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN; 1966 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(i), dca_txctrl); 1967 1968 /* 1969 * Don't set WB_on_EITR: 1970 * - 82575 does not have it 1971 * - It almost has no effect on 82576, see: 1972 * 82576 specification update errata #26 1973 * - It causes unnecessary bus traffic 1974 */ 1975 E1000_WRITE_REG(hw, E1000_TDWBAH(i), 1976 (uint32_t)(hdr_paddr >> 32)); 1977 E1000_WRITE_REG(hw, E1000_TDWBAL(i), 1978 ((uint32_t)hdr_paddr) | E1000_TX_HEAD_WB_ENABLE); 1979 1980 /* 1981 * WTHRESH is ignored by the hardware, since header 1982 * write back mode is used. 1983 */ 1984 txdctl |= IGB_TX_PTHRESH; 1985 txdctl |= IGB_TX_HTHRESH << 8; 1986 txdctl |= IGB_TX_WTHRESH << 16; 1987 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 1988 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 1989 } 1990 1991 if (sc->vf_ifp) 1992 return; 1993 1994 e1000_config_collision_dist(hw); 1995 1996 /* Program the Transmit Control Register */ 1997 tctl = E1000_READ_REG(hw, E1000_TCTL); 1998 tctl &= ~E1000_TCTL_CT; 1999 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 2000 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 2001 2002 /* This write will effectively turn on the transmit unit. */ 2003 E1000_WRITE_REG(hw, E1000_TCTL, tctl); 2004 } 2005 2006 static boolean_t 2007 igb_txcsum_ctx(struct igb_tx_ring *txr, struct mbuf *mp) 2008 { 2009 struct e1000_adv_tx_context_desc *TXD; 2010 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 2011 int ehdrlen, ctxd, ip_hlen = 0; 2012 boolean_t offload = TRUE; 2013 2014 if ((mp->m_pkthdr.csum_flags & IGB_CSUM_FEATURES) == 0) 2015 offload = FALSE; 2016 2017 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 2018 2019 ctxd = txr->next_avail_desc; 2020 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 2021 2022 /* 2023 * In advanced descriptors the vlan tag must 2024 * be placed into the context descriptor, thus 2025 * we need to be here just for that setup. 2026 */ 2027 if (mp->m_flags & M_VLANTAG) { 2028 uint16_t vlantag; 2029 2030 vlantag = htole16(mp->m_pkthdr.ether_vlantag); 2031 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 2032 } else if (!offload) { 2033 return FALSE; 2034 } 2035 2036 ehdrlen = mp->m_pkthdr.csum_lhlen; 2037 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 2038 2039 /* Set the ether header length */ 2040 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 2041 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 2042 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 2043 ip_hlen = mp->m_pkthdr.csum_iphlen; 2044 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 2045 } 2046 vlan_macip_lens |= ip_hlen; 2047 2048 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 2049 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 2050 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 2051 else if (mp->m_pkthdr.csum_flags & CSUM_UDP) 2052 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; 2053 2054 /* 82575 needs the queue index added */ 2055 if (txr->sc->hw.mac.type == e1000_82575) 2056 mss_l4len_idx = txr->me << 4; 2057 2058 /* Now copy bits into descriptor */ 2059 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 2060 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 2061 TXD->seqnum_seed = htole32(0); 2062 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 2063 2064 /* We've consumed the first desc, adjust counters */ 2065 if (++ctxd == txr->num_tx_desc) 2066 ctxd = 0; 2067 txr->next_avail_desc = ctxd; 2068 --txr->tx_avail; 2069 2070 return offload; 2071 } 2072 2073 static void 2074 igb_txeof(struct igb_tx_ring *txr) 2075 { 2076 struct ifnet *ifp = &txr->sc->arpcom.ac_if; 2077 int first, hdr, avail; 2078 2079 if (txr->tx_avail == txr->num_tx_desc) 2080 return; 2081 2082 first = txr->next_to_clean; 2083 hdr = *(txr->tx_hdr); 2084 2085 if (first == hdr) 2086 return; 2087 2088 avail = txr->tx_avail; 2089 while (first != hdr) { 2090 struct igb_tx_buf *txbuf = &txr->tx_buf[first]; 2091 2092 ++avail; 2093 if (txbuf->m_head) { 2094 bus_dmamap_unload(txr->tx_tag, txbuf->map); 2095 m_freem(txbuf->m_head); 2096 txbuf->m_head = NULL; 2097 IFNET_STAT_INC(ifp, opackets, 1); 2098 } 2099 if (++first == txr->num_tx_desc) 2100 first = 0; 2101 } 2102 txr->next_to_clean = first; 2103 txr->tx_avail = avail; 2104 2105 /* 2106 * If we have a minimum free, clear OACTIVE 2107 * to tell the stack that it is OK to send packets. 2108 */ 2109 if (IGB_IS_NOT_OACTIVE(txr)) { 2110 ifsq_clr_oactive(txr->ifsq); 2111 2112 /* 2113 * We have enough TX descriptors, turn off 2114 * the watchdog. We allow small amount of 2115 * packets (roughly intr_nsegs) pending on 2116 * the transmit ring. 2117 */ 2118 txr->tx_watchdog.wd_timer = 0; 2119 } 2120 } 2121 2122 static int 2123 igb_create_rx_ring(struct igb_rx_ring *rxr) 2124 { 2125 int rsize, i, error, nrxd; 2126 2127 /* 2128 * Validate number of receive descriptors. It must not exceed 2129 * hardware maximum, and must be multiple of IGB_DBA_ALIGN. 2130 */ 2131 nrxd = device_getenv_int(rxr->sc->dev, "rxd", igb_rxd); 2132 if ((nrxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN != 0 || 2133 nrxd > IGB_MAX_RXD || nrxd < IGB_MIN_RXD) { 2134 device_printf(rxr->sc->dev, 2135 "Using %d RX descriptors instead of %d!\n", 2136 IGB_DEFAULT_RXD, nrxd); 2137 rxr->num_rx_desc = IGB_DEFAULT_RXD; 2138 } else { 2139 rxr->num_rx_desc = nrxd; 2140 } 2141 2142 /* 2143 * Allocate RX descriptor ring 2144 */ 2145 rsize = roundup2(rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc), 2146 IGB_DBA_ALIGN); 2147 rxr->rxdma.dma_vaddr = bus_dmamem_coherent_any(rxr->sc->parent_tag, 2148 IGB_DBA_ALIGN, rsize, BUS_DMA_WAITOK, 2149 &rxr->rxdma.dma_tag, &rxr->rxdma.dma_map, 2150 &rxr->rxdma.dma_paddr); 2151 if (rxr->rxdma.dma_vaddr == NULL) { 2152 device_printf(rxr->sc->dev, 2153 "Unable to allocate RxDescriptor memory\n"); 2154 return ENOMEM; 2155 } 2156 rxr->rx_base = rxr->rxdma.dma_vaddr; 2157 bzero(rxr->rx_base, rsize); 2158 2159 rsize = __VM_CACHELINE_ALIGN( 2160 sizeof(struct igb_rx_buf) * rxr->num_rx_desc); 2161 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO); 2162 2163 /* 2164 * Create DMA tag for RX buffers 2165 */ 2166 error = bus_dma_tag_create(rxr->sc->parent_tag, 2167 1, 0, /* alignment, bounds */ 2168 BUS_SPACE_MAXADDR, /* lowaddr */ 2169 BUS_SPACE_MAXADDR, /* highaddr */ 2170 NULL, NULL, /* filter, filterarg */ 2171 MCLBYTES, /* maxsize */ 2172 1, /* nsegments */ 2173 MCLBYTES, /* maxsegsize */ 2174 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */ 2175 &rxr->rx_tag); 2176 if (error) { 2177 device_printf(rxr->sc->dev, 2178 "Unable to create RX payload DMA tag\n"); 2179 kfree(rxr->rx_buf, M_DEVBUF); 2180 rxr->rx_buf = NULL; 2181 return error; 2182 } 2183 2184 /* 2185 * Create spare DMA map for RX buffers 2186 */ 2187 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK, 2188 &rxr->rx_sparemap); 2189 if (error) { 2190 device_printf(rxr->sc->dev, 2191 "Unable to create spare RX DMA maps\n"); 2192 bus_dma_tag_destroy(rxr->rx_tag); 2193 kfree(rxr->rx_buf, M_DEVBUF); 2194 rxr->rx_buf = NULL; 2195 return error; 2196 } 2197 2198 /* 2199 * Create DMA maps for RX buffers 2200 */ 2201 for (i = 0; i < rxr->num_rx_desc; i++) { 2202 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2203 2204 error = bus_dmamap_create(rxr->rx_tag, 2205 BUS_DMA_WAITOK, &rxbuf->map); 2206 if (error) { 2207 device_printf(rxr->sc->dev, 2208 "Unable to create RX DMA maps\n"); 2209 igb_destroy_rx_ring(rxr, i); 2210 return error; 2211 } 2212 } 2213 2214 /* 2215 * Initialize various watermark 2216 */ 2217 rxr->wreg_nsegs = IGB_DEF_RXWREG_NSEGS; 2218 2219 return 0; 2220 } 2221 2222 static void 2223 igb_free_rx_ring(struct igb_rx_ring *rxr) 2224 { 2225 int i; 2226 2227 for (i = 0; i < rxr->num_rx_desc; ++i) { 2228 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2229 2230 if (rxbuf->m_head != NULL) { 2231 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2232 m_freem(rxbuf->m_head); 2233 rxbuf->m_head = NULL; 2234 } 2235 } 2236 2237 if (rxr->fmp != NULL) 2238 m_freem(rxr->fmp); 2239 rxr->fmp = NULL; 2240 rxr->lmp = NULL; 2241 } 2242 2243 static void 2244 igb_destroy_rx_ring(struct igb_rx_ring *rxr, int ndesc) 2245 { 2246 int i; 2247 2248 if (rxr->rxdma.dma_vaddr != NULL) { 2249 bus_dmamap_unload(rxr->rxdma.dma_tag, rxr->rxdma.dma_map); 2250 bus_dmamem_free(rxr->rxdma.dma_tag, rxr->rxdma.dma_vaddr, 2251 rxr->rxdma.dma_map); 2252 bus_dma_tag_destroy(rxr->rxdma.dma_tag); 2253 rxr->rxdma.dma_vaddr = NULL; 2254 } 2255 2256 if (rxr->rx_buf == NULL) 2257 return; 2258 2259 for (i = 0; i < ndesc; ++i) { 2260 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2261 2262 KKASSERT(rxbuf->m_head == NULL); 2263 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map); 2264 } 2265 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap); 2266 bus_dma_tag_destroy(rxr->rx_tag); 2267 2268 kfree(rxr->rx_buf, M_DEVBUF); 2269 rxr->rx_buf = NULL; 2270 } 2271 2272 static void 2273 igb_setup_rxdesc(union e1000_adv_rx_desc *rxd, const struct igb_rx_buf *rxbuf) 2274 { 2275 rxd->read.pkt_addr = htole64(rxbuf->paddr); 2276 rxd->wb.upper.status_error = 0; 2277 } 2278 2279 static int 2280 igb_newbuf(struct igb_rx_ring *rxr, int i, boolean_t wait) 2281 { 2282 struct mbuf *m; 2283 bus_dma_segment_t seg; 2284 bus_dmamap_t map; 2285 struct igb_rx_buf *rxbuf; 2286 int error, nseg; 2287 2288 m = m_getcl(wait ? MB_WAIT : MB_DONTWAIT, MT_DATA, M_PKTHDR); 2289 if (m == NULL) { 2290 if (wait) { 2291 if_printf(&rxr->sc->arpcom.ac_if, 2292 "Unable to allocate RX mbuf\n"); 2293 } 2294 return ENOBUFS; 2295 } 2296 m->m_len = m->m_pkthdr.len = MCLBYTES; 2297 2298 if (rxr->sc->max_frame_size <= MCLBYTES - ETHER_ALIGN) 2299 m_adj(m, ETHER_ALIGN); 2300 2301 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag, 2302 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT); 2303 if (error) { 2304 m_freem(m); 2305 if (wait) { 2306 if_printf(&rxr->sc->arpcom.ac_if, 2307 "Unable to load RX mbuf\n"); 2308 } 2309 return error; 2310 } 2311 2312 rxbuf = &rxr->rx_buf[i]; 2313 if (rxbuf->m_head != NULL) 2314 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 2315 2316 map = rxbuf->map; 2317 rxbuf->map = rxr->rx_sparemap; 2318 rxr->rx_sparemap = map; 2319 2320 rxbuf->m_head = m; 2321 rxbuf->paddr = seg.ds_addr; 2322 2323 igb_setup_rxdesc(&rxr->rx_base[i], rxbuf); 2324 return 0; 2325 } 2326 2327 static int 2328 igb_init_rx_ring(struct igb_rx_ring *rxr) 2329 { 2330 int i; 2331 2332 /* Clear the ring contents */ 2333 bzero(rxr->rx_base, 2334 rxr->num_rx_desc * sizeof(union e1000_adv_rx_desc)); 2335 2336 /* Now replenish the ring mbufs */ 2337 for (i = 0; i < rxr->num_rx_desc; ++i) { 2338 int error; 2339 2340 error = igb_newbuf(rxr, i, TRUE); 2341 if (error) 2342 return error; 2343 } 2344 2345 /* Setup our descriptor indices */ 2346 rxr->next_to_check = 0; 2347 2348 rxr->fmp = NULL; 2349 rxr->lmp = NULL; 2350 rxr->discard = FALSE; 2351 2352 return 0; 2353 } 2354 2355 static void 2356 igb_init_rx_unit(struct igb_softc *sc) 2357 { 2358 struct ifnet *ifp = &sc->arpcom.ac_if; 2359 struct e1000_hw *hw = &sc->hw; 2360 uint32_t rctl, rxcsum, srrctl = 0; 2361 int i; 2362 2363 /* 2364 * Make sure receives are disabled while setting 2365 * up the descriptor ring 2366 */ 2367 rctl = E1000_READ_REG(hw, E1000_RCTL); 2368 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 2369 2370 #if 0 2371 /* 2372 ** Set up for header split 2373 */ 2374 if (igb_header_split) { 2375 /* Use a standard mbuf for the header */ 2376 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 2377 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 2378 } else 2379 #endif 2380 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 2381 2382 /* 2383 ** Set up for jumbo frames 2384 */ 2385 if (ifp->if_mtu > ETHERMTU) { 2386 rctl |= E1000_RCTL_LPE; 2387 #if 0 2388 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { 2389 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2390 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 2391 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { 2392 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2393 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 2394 } 2395 /* Set maximum packet len */ 2396 psize = adapter->max_frame_size; 2397 /* are we on a vlan? */ 2398 if (adapter->ifp->if_vlantrunk != NULL) 2399 psize += VLAN_TAG_SIZE; 2400 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); 2401 #else 2402 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2403 rctl |= E1000_RCTL_SZ_2048; 2404 #endif 2405 } else { 2406 rctl &= ~E1000_RCTL_LPE; 2407 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 2408 rctl |= E1000_RCTL_SZ_2048; 2409 } 2410 2411 /* Setup the Base and Length of the Rx Descriptor Rings */ 2412 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2413 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2414 uint64_t bus_addr = rxr->rxdma.dma_paddr; 2415 uint32_t rxdctl; 2416 2417 E1000_WRITE_REG(hw, E1000_RDLEN(i), 2418 rxr->num_rx_desc * sizeof(struct e1000_rx_desc)); 2419 E1000_WRITE_REG(hw, E1000_RDBAH(i), 2420 (uint32_t)(bus_addr >> 32)); 2421 E1000_WRITE_REG(hw, E1000_RDBAL(i), 2422 (uint32_t)bus_addr); 2423 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); 2424 /* Enable this Queue */ 2425 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 2426 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 2427 rxdctl &= 0xFFF00000; 2428 rxdctl |= IGB_RX_PTHRESH; 2429 rxdctl |= IGB_RX_HTHRESH << 8; 2430 /* 2431 * Don't set WTHRESH to a value above 1 on 82576, see: 2432 * 82576 specification update errata #26 2433 */ 2434 rxdctl |= IGB_RX_WTHRESH << 16; 2435 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 2436 } 2437 2438 rxcsum = E1000_READ_REG(&sc->hw, E1000_RXCSUM); 2439 rxcsum &= ~(E1000_RXCSUM_PCSS_MASK | E1000_RXCSUM_IPPCSE); 2440 2441 /* 2442 * Receive Checksum Offload for TCP and UDP 2443 * 2444 * Checksum offloading is also enabled if multiple receive 2445 * queue is to be supported, since we need it to figure out 2446 * fragments. 2447 */ 2448 if ((ifp->if_capenable & IFCAP_RXCSUM) || IGB_ENABLE_HWRSS(sc)) { 2449 /* 2450 * NOTE: 2451 * PCSD must be enabled to enable multiple 2452 * receive queues. 2453 */ 2454 rxcsum |= E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2455 E1000_RXCSUM_PCSD; 2456 } else { 2457 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL | 2458 E1000_RXCSUM_PCSD); 2459 } 2460 E1000_WRITE_REG(&sc->hw, E1000_RXCSUM, rxcsum); 2461 2462 if (IGB_ENABLE_HWRSS(sc)) { 2463 uint8_t key[IGB_NRSSRK * IGB_RSSRK_SIZE]; 2464 uint32_t reta_shift; 2465 int j, r; 2466 2467 /* 2468 * NOTE: 2469 * When we reach here, RSS has already been disabled 2470 * in igb_stop(), so we could safely configure RSS key 2471 * and redirect table. 2472 */ 2473 2474 /* 2475 * Configure RSS key 2476 */ 2477 toeplitz_get_key(key, sizeof(key)); 2478 for (i = 0; i < IGB_NRSSRK; ++i) { 2479 uint32_t rssrk; 2480 2481 rssrk = IGB_RSSRK_VAL(key, i); 2482 IGB_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", i, rssrk); 2483 2484 E1000_WRITE_REG(hw, E1000_RSSRK(i), rssrk); 2485 } 2486 2487 /* 2488 * Configure RSS redirect table in following fashion: 2489 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 2490 */ 2491 reta_shift = IGB_RETA_SHIFT; 2492 if (hw->mac.type == e1000_82575) 2493 reta_shift = IGB_RETA_SHIFT_82575; 2494 2495 r = 0; 2496 for (j = 0; j < IGB_NRETA; ++j) { 2497 uint32_t reta = 0; 2498 2499 for (i = 0; i < IGB_RETA_SIZE; ++i) { 2500 uint32_t q; 2501 2502 q = (r % sc->rx_ring_inuse) << reta_shift; 2503 reta |= q << (8 * i); 2504 ++r; 2505 } 2506 IGB_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta); 2507 E1000_WRITE_REG(hw, E1000_RETA(j), reta); 2508 } 2509 2510 /* 2511 * Enable multiple receive queues. 2512 * Enable IPv4 RSS standard hash functions. 2513 * Disable RSS interrupt on 82575 2514 */ 2515 E1000_WRITE_REG(&sc->hw, E1000_MRQC, 2516 E1000_MRQC_ENABLE_RSS_4Q | 2517 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2518 E1000_MRQC_RSS_FIELD_IPV4); 2519 } 2520 2521 /* Setup the Receive Control Register */ 2522 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2523 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | 2524 E1000_RCTL_RDMTS_HALF | 2525 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2526 /* Strip CRC bytes. */ 2527 rctl |= E1000_RCTL_SECRC; 2528 /* Make sure VLAN Filters are off */ 2529 rctl &= ~E1000_RCTL_VFE; 2530 /* Don't store bad packets */ 2531 rctl &= ~E1000_RCTL_SBP; 2532 2533 /* Enable Receives */ 2534 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 2535 2536 /* 2537 * Setup the HW Rx Head and Tail Descriptor Pointers 2538 * - needs to be after enable 2539 */ 2540 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2541 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 2542 2543 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); 2544 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->num_rx_desc - 1); 2545 } 2546 } 2547 2548 static void 2549 igb_rx_refresh(struct igb_rx_ring *rxr, int i) 2550 { 2551 if (--i < 0) 2552 i = rxr->num_rx_desc - 1; 2553 E1000_WRITE_REG(&rxr->sc->hw, E1000_RDT(rxr->me), i); 2554 } 2555 2556 static void 2557 igb_rxeof(struct igb_rx_ring *rxr, int count) 2558 { 2559 struct ifnet *ifp = &rxr->sc->arpcom.ac_if; 2560 union e1000_adv_rx_desc *cur; 2561 uint32_t staterr; 2562 int i, ncoll = 0; 2563 2564 i = rxr->next_to_check; 2565 cur = &rxr->rx_base[i]; 2566 staterr = le32toh(cur->wb.upper.status_error); 2567 2568 if ((staterr & E1000_RXD_STAT_DD) == 0) 2569 return; 2570 2571 while ((staterr & E1000_RXD_STAT_DD) && count != 0) { 2572 struct pktinfo *pi = NULL, pi0; 2573 struct igb_rx_buf *rxbuf = &rxr->rx_buf[i]; 2574 struct mbuf *m = NULL; 2575 boolean_t eop; 2576 2577 eop = (staterr & E1000_RXD_STAT_EOP) ? TRUE : FALSE; 2578 if (eop) 2579 --count; 2580 2581 ++ncoll; 2582 if ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) == 0 && 2583 !rxr->discard) { 2584 struct mbuf *mp = rxbuf->m_head; 2585 uint32_t hash, hashtype; 2586 uint16_t vlan; 2587 int len; 2588 2589 len = le16toh(cur->wb.upper.length); 2590 if (rxr->sc->hw.mac.type == e1000_i350 && 2591 (staterr & E1000_RXDEXT_STATERR_LB)) 2592 vlan = be16toh(cur->wb.upper.vlan); 2593 else 2594 vlan = le16toh(cur->wb.upper.vlan); 2595 2596 hash = le32toh(cur->wb.lower.hi_dword.rss); 2597 hashtype = le32toh(cur->wb.lower.lo_dword.data) & 2598 E1000_RXDADV_RSSTYPE_MASK; 2599 2600 IGB_RSS_DPRINTF(rxr->sc, 10, 2601 "ring%d, hash 0x%08x, hashtype %u\n", 2602 rxr->me, hash, hashtype); 2603 2604 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, 2605 BUS_DMASYNC_POSTREAD); 2606 2607 if (igb_newbuf(rxr, i, FALSE) != 0) { 2608 IFNET_STAT_INC(ifp, iqdrops, 1); 2609 goto discard; 2610 } 2611 2612 mp->m_len = len; 2613 if (rxr->fmp == NULL) { 2614 mp->m_pkthdr.len = len; 2615 rxr->fmp = mp; 2616 rxr->lmp = mp; 2617 } else { 2618 rxr->lmp->m_next = mp; 2619 rxr->lmp = rxr->lmp->m_next; 2620 rxr->fmp->m_pkthdr.len += len; 2621 } 2622 2623 if (eop) { 2624 m = rxr->fmp; 2625 rxr->fmp = NULL; 2626 rxr->lmp = NULL; 2627 2628 m->m_pkthdr.rcvif = ifp; 2629 IFNET_STAT_INC(ifp, ipackets, 1); 2630 2631 if (ifp->if_capenable & IFCAP_RXCSUM) 2632 igb_rxcsum(staterr, m); 2633 2634 if (staterr & E1000_RXD_STAT_VP) { 2635 m->m_pkthdr.ether_vlantag = vlan; 2636 m->m_flags |= M_VLANTAG; 2637 } 2638 2639 if (ifp->if_capenable & IFCAP_RSS) { 2640 pi = igb_rssinfo(m, &pi0, 2641 hash, hashtype, staterr); 2642 } 2643 #ifdef IGB_RSS_DEBUG 2644 rxr->rx_packets++; 2645 #endif 2646 } 2647 } else { 2648 IFNET_STAT_INC(ifp, ierrors, 1); 2649 discard: 2650 igb_setup_rxdesc(cur, rxbuf); 2651 if (!eop) 2652 rxr->discard = TRUE; 2653 else 2654 rxr->discard = FALSE; 2655 if (rxr->fmp != NULL) { 2656 m_freem(rxr->fmp); 2657 rxr->fmp = NULL; 2658 rxr->lmp = NULL; 2659 } 2660 m = NULL; 2661 } 2662 2663 if (m != NULL) 2664 ether_input_pkt(ifp, m, pi); 2665 2666 /* Advance our pointers to the next descriptor. */ 2667 if (++i == rxr->num_rx_desc) 2668 i = 0; 2669 2670 if (ncoll >= rxr->wreg_nsegs) { 2671 igb_rx_refresh(rxr, i); 2672 ncoll = 0; 2673 } 2674 2675 cur = &rxr->rx_base[i]; 2676 staterr = le32toh(cur->wb.upper.status_error); 2677 } 2678 rxr->next_to_check = i; 2679 2680 if (ncoll > 0) 2681 igb_rx_refresh(rxr, i); 2682 } 2683 2684 2685 static void 2686 igb_set_vlan(struct igb_softc *sc) 2687 { 2688 struct e1000_hw *hw = &sc->hw; 2689 uint32_t reg; 2690 #if 0 2691 struct ifnet *ifp = sc->arpcom.ac_if; 2692 #endif 2693 2694 if (sc->vf_ifp) { 2695 e1000_rlpml_set_vf(hw, sc->max_frame_size + VLAN_TAG_SIZE); 2696 return; 2697 } 2698 2699 reg = E1000_READ_REG(hw, E1000_CTRL); 2700 reg |= E1000_CTRL_VME; 2701 E1000_WRITE_REG(hw, E1000_CTRL, reg); 2702 2703 #if 0 2704 /* Enable the Filter Table */ 2705 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { 2706 reg = E1000_READ_REG(hw, E1000_RCTL); 2707 reg &= ~E1000_RCTL_CFIEN; 2708 reg |= E1000_RCTL_VFE; 2709 E1000_WRITE_REG(hw, E1000_RCTL, reg); 2710 } 2711 #endif 2712 2713 /* Update the frame size */ 2714 E1000_WRITE_REG(&sc->hw, E1000_RLPML, 2715 sc->max_frame_size + VLAN_TAG_SIZE); 2716 2717 #if 0 2718 /* Don't bother with table if no vlans */ 2719 if ((adapter->num_vlans == 0) || 2720 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) 2721 return; 2722 /* 2723 ** A soft reset zero's out the VFTA, so 2724 ** we need to repopulate it now. 2725 */ 2726 for (int i = 0; i < IGB_VFTA_SIZE; i++) 2727 if (adapter->shadow_vfta[i] != 0) { 2728 if (adapter->vf_ifp) 2729 e1000_vfta_set_vf(hw, 2730 adapter->shadow_vfta[i], TRUE); 2731 else 2732 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, 2733 i, adapter->shadow_vfta[i]); 2734 } 2735 #endif 2736 } 2737 2738 static void 2739 igb_enable_intr(struct igb_softc *sc) 2740 { 2741 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2742 lwkt_serialize_handler_enable(&sc->main_serialize); 2743 } else { 2744 int i; 2745 2746 for (i = 0; i < sc->msix_cnt; ++i) { 2747 lwkt_serialize_handler_enable( 2748 sc->msix_data[i].msix_serialize); 2749 } 2750 } 2751 2752 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2753 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 2754 E1000_WRITE_REG(&sc->hw, E1000_EIAC, sc->intr_mask); 2755 else 2756 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2757 E1000_WRITE_REG(&sc->hw, E1000_EIAM, sc->intr_mask); 2758 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 2759 E1000_WRITE_REG(&sc->hw, E1000_IMS, E1000_IMS_LSC); 2760 } else { 2761 E1000_WRITE_REG(&sc->hw, E1000_IMS, IMS_ENABLE_MASK); 2762 } 2763 E1000_WRITE_FLUSH(&sc->hw); 2764 } 2765 2766 static void 2767 igb_disable_intr(struct igb_softc *sc) 2768 { 2769 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) { 2770 E1000_WRITE_REG(&sc->hw, E1000_EIMC, 0xffffffff); 2771 E1000_WRITE_REG(&sc->hw, E1000_EIAC, 0); 2772 } 2773 E1000_WRITE_REG(&sc->hw, E1000_IMC, 0xffffffff); 2774 E1000_WRITE_FLUSH(&sc->hw); 2775 2776 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 2777 lwkt_serialize_handler_disable(&sc->main_serialize); 2778 } else { 2779 int i; 2780 2781 for (i = 0; i < sc->msix_cnt; ++i) { 2782 lwkt_serialize_handler_disable( 2783 sc->msix_data[i].msix_serialize); 2784 } 2785 } 2786 } 2787 2788 /* 2789 * Bit of a misnomer, what this really means is 2790 * to enable OS management of the system... aka 2791 * to disable special hardware management features 2792 */ 2793 static void 2794 igb_get_mgmt(struct igb_softc *sc) 2795 { 2796 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2797 int manc2h = E1000_READ_REG(&sc->hw, E1000_MANC2H); 2798 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2799 2800 /* disable hardware interception of ARP */ 2801 manc &= ~E1000_MANC_ARP_EN; 2802 2803 /* enable receiving management packets to the host */ 2804 manc |= E1000_MANC_EN_MNG2HOST; 2805 manc2h |= 1 << 5; /* Mng Port 623 */ 2806 manc2h |= 1 << 6; /* Mng Port 664 */ 2807 E1000_WRITE_REG(&sc->hw, E1000_MANC2H, manc2h); 2808 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2809 } 2810 } 2811 2812 /* 2813 * Give control back to hardware management controller 2814 * if there is one. 2815 */ 2816 static void 2817 igb_rel_mgmt(struct igb_softc *sc) 2818 { 2819 if (sc->flags & IGB_FLAG_HAS_MGMT) { 2820 int manc = E1000_READ_REG(&sc->hw, E1000_MANC); 2821 2822 /* Re-enable hardware interception of ARP */ 2823 manc |= E1000_MANC_ARP_EN; 2824 manc &= ~E1000_MANC_EN_MNG2HOST; 2825 2826 E1000_WRITE_REG(&sc->hw, E1000_MANC, manc); 2827 } 2828 } 2829 2830 /* 2831 * Sets CTRL_EXT:DRV_LOAD bit. 2832 * 2833 * For ASF and Pass Through versions of f/w this means that 2834 * the driver is loaded. 2835 */ 2836 static void 2837 igb_get_hw_control(struct igb_softc *sc) 2838 { 2839 uint32_t ctrl_ext; 2840 2841 if (sc->vf_ifp) 2842 return; 2843 2844 /* Let firmware know the driver has taken over */ 2845 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2846 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2847 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 2848 } 2849 2850 /* 2851 * Resets CTRL_EXT:DRV_LOAD bit. 2852 * 2853 * For ASF and Pass Through versions of f/w this means that the 2854 * driver is no longer loaded. 2855 */ 2856 static void 2857 igb_rel_hw_control(struct igb_softc *sc) 2858 { 2859 uint32_t ctrl_ext; 2860 2861 if (sc->vf_ifp) 2862 return; 2863 2864 /* Let firmware taken over control of h/w */ 2865 ctrl_ext = E1000_READ_REG(&sc->hw, E1000_CTRL_EXT); 2866 E1000_WRITE_REG(&sc->hw, E1000_CTRL_EXT, 2867 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 2868 } 2869 2870 static int 2871 igb_is_valid_ether_addr(const uint8_t *addr) 2872 { 2873 uint8_t zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2874 2875 if ((addr[0] & 1) || !bcmp(addr, zero_addr, ETHER_ADDR_LEN)) 2876 return FALSE; 2877 return TRUE; 2878 } 2879 2880 /* 2881 * Enable PCI Wake On Lan capability 2882 */ 2883 static void 2884 igb_enable_wol(device_t dev) 2885 { 2886 uint16_t cap, status; 2887 uint8_t id; 2888 2889 /* First find the capabilities pointer*/ 2890 cap = pci_read_config(dev, PCIR_CAP_PTR, 2); 2891 2892 /* Read the PM Capabilities */ 2893 id = pci_read_config(dev, cap, 1); 2894 if (id != PCIY_PMG) /* Something wrong */ 2895 return; 2896 2897 /* 2898 * OK, we have the power capabilities, 2899 * so now get the status register 2900 */ 2901 cap += PCIR_POWER_STATUS; 2902 status = pci_read_config(dev, cap, 2); 2903 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 2904 pci_write_config(dev, cap, status, 2); 2905 } 2906 2907 static void 2908 igb_update_stats_counters(struct igb_softc *sc) 2909 { 2910 struct e1000_hw *hw = &sc->hw; 2911 struct e1000_hw_stats *stats; 2912 struct ifnet *ifp = &sc->arpcom.ac_if; 2913 2914 /* 2915 * The virtual function adapter has only a 2916 * small controlled set of stats, do only 2917 * those and return. 2918 */ 2919 if (sc->vf_ifp) { 2920 igb_update_vf_stats_counters(sc); 2921 return; 2922 } 2923 stats = sc->stats; 2924 2925 if (sc->hw.phy.media_type == e1000_media_type_copper || 2926 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { 2927 stats->symerrs += 2928 E1000_READ_REG(hw,E1000_SYMERRS); 2929 stats->sec += E1000_READ_REG(hw, E1000_SEC); 2930 } 2931 2932 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); 2933 stats->mpc += E1000_READ_REG(hw, E1000_MPC); 2934 stats->scc += E1000_READ_REG(hw, E1000_SCC); 2935 stats->ecol += E1000_READ_REG(hw, E1000_ECOL); 2936 2937 stats->mcc += E1000_READ_REG(hw, E1000_MCC); 2938 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); 2939 stats->colc += E1000_READ_REG(hw, E1000_COLC); 2940 stats->dc += E1000_READ_REG(hw, E1000_DC); 2941 stats->rlec += E1000_READ_REG(hw, E1000_RLEC); 2942 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); 2943 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); 2944 2945 /* 2946 * For watchdog management we need to know if we have been 2947 * paused during the last interval, so capture that here. 2948 */ 2949 sc->pause_frames = E1000_READ_REG(hw, E1000_XOFFRXC); 2950 stats->xoffrxc += sc->pause_frames; 2951 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); 2952 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); 2953 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); 2954 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); 2955 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); 2956 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); 2957 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); 2958 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); 2959 stats->gprc += E1000_READ_REG(hw, E1000_GPRC); 2960 stats->bprc += E1000_READ_REG(hw, E1000_BPRC); 2961 stats->mprc += E1000_READ_REG(hw, E1000_MPRC); 2962 stats->gptc += E1000_READ_REG(hw, E1000_GPTC); 2963 2964 /* For the 64-bit byte counters the low dword must be read first. */ 2965 /* Both registers clear on the read of the high dword */ 2966 2967 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + 2968 ((uint64_t)E1000_READ_REG(hw, E1000_GORCH) << 32); 2969 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + 2970 ((uint64_t)E1000_READ_REG(hw, E1000_GOTCH) << 32); 2971 2972 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); 2973 stats->ruc += E1000_READ_REG(hw, E1000_RUC); 2974 stats->rfc += E1000_READ_REG(hw, E1000_RFC); 2975 stats->roc += E1000_READ_REG(hw, E1000_ROC); 2976 stats->rjc += E1000_READ_REG(hw, E1000_RJC); 2977 2978 stats->tor += E1000_READ_REG(hw, E1000_TORH); 2979 stats->tot += E1000_READ_REG(hw, E1000_TOTH); 2980 2981 stats->tpr += E1000_READ_REG(hw, E1000_TPR); 2982 stats->tpt += E1000_READ_REG(hw, E1000_TPT); 2983 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); 2984 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); 2985 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); 2986 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); 2987 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); 2988 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); 2989 stats->mptc += E1000_READ_REG(hw, E1000_MPTC); 2990 stats->bptc += E1000_READ_REG(hw, E1000_BPTC); 2991 2992 /* Interrupt Counts */ 2993 2994 stats->iac += E1000_READ_REG(hw, E1000_IAC); 2995 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); 2996 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); 2997 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); 2998 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); 2999 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); 3000 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); 3001 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); 3002 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); 3003 3004 /* Host to Card Statistics */ 3005 3006 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); 3007 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); 3008 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); 3009 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); 3010 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); 3011 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); 3012 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); 3013 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + 3014 ((uint64_t)E1000_READ_REG(hw, E1000_HGORCH) << 32)); 3015 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + 3016 ((uint64_t)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); 3017 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); 3018 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); 3019 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); 3020 3021 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); 3022 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); 3023 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); 3024 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); 3025 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); 3026 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); 3027 3028 IFNET_STAT_SET(ifp, collisions, stats->colc); 3029 3030 /* Rx Errors */ 3031 IFNET_STAT_SET(ifp, ierrors, 3032 stats->rxerrc + stats->crcerrs + stats->algnerrc + 3033 stats->ruc + stats->roc + stats->mpc + stats->cexterr); 3034 3035 /* Tx Errors */ 3036 IFNET_STAT_SET(ifp, oerrors, 3037 stats->ecol + stats->latecol + sc->watchdog_events); 3038 3039 /* Driver specific counters */ 3040 sc->device_control = E1000_READ_REG(hw, E1000_CTRL); 3041 sc->rx_control = E1000_READ_REG(hw, E1000_RCTL); 3042 sc->int_mask = E1000_READ_REG(hw, E1000_IMS); 3043 sc->eint_mask = E1000_READ_REG(hw, E1000_EIMS); 3044 sc->packet_buf_alloc_tx = 3045 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); 3046 sc->packet_buf_alloc_rx = 3047 (E1000_READ_REG(hw, E1000_PBA) & 0xffff); 3048 } 3049 3050 static void 3051 igb_vf_init_stats(struct igb_softc *sc) 3052 { 3053 struct e1000_hw *hw = &sc->hw; 3054 struct e1000_vf_stats *stats; 3055 3056 stats = sc->stats; 3057 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); 3058 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); 3059 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); 3060 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); 3061 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); 3062 } 3063 3064 static void 3065 igb_update_vf_stats_counters(struct igb_softc *sc) 3066 { 3067 struct e1000_hw *hw = &sc->hw; 3068 struct e1000_vf_stats *stats; 3069 3070 if (sc->link_speed == 0) 3071 return; 3072 3073 stats = sc->stats; 3074 UPDATE_VF_REG(E1000_VFGPRC, stats->last_gprc, stats->gprc); 3075 UPDATE_VF_REG(E1000_VFGORC, stats->last_gorc, stats->gorc); 3076 UPDATE_VF_REG(E1000_VFGPTC, stats->last_gptc, stats->gptc); 3077 UPDATE_VF_REG(E1000_VFGOTC, stats->last_gotc, stats->gotc); 3078 UPDATE_VF_REG(E1000_VFMPRC, stats->last_mprc, stats->mprc); 3079 } 3080 3081 #ifdef IFPOLL_ENABLE 3082 3083 static void 3084 igb_npoll_status(struct ifnet *ifp) 3085 { 3086 struct igb_softc *sc = ifp->if_softc; 3087 uint32_t reg_icr; 3088 3089 ASSERT_SERIALIZED(&sc->main_serialize); 3090 3091 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3092 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3093 sc->hw.mac.get_link_status = 1; 3094 igb_update_link_status(sc); 3095 } 3096 } 3097 3098 static void 3099 igb_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused) 3100 { 3101 struct igb_tx_ring *txr = arg; 3102 3103 ASSERT_SERIALIZED(&txr->tx_serialize); 3104 3105 igb_txeof(txr); 3106 if (!ifsq_is_empty(txr->ifsq)) 3107 ifsq_devstart(txr->ifsq); 3108 } 3109 3110 static void 3111 igb_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle) 3112 { 3113 struct igb_rx_ring *rxr = arg; 3114 3115 ASSERT_SERIALIZED(&rxr->rx_serialize); 3116 3117 igb_rxeof(rxr, cycle); 3118 } 3119 3120 static void 3121 igb_npoll(struct ifnet *ifp, struct ifpoll_info *info) 3122 { 3123 struct igb_softc *sc = ifp->if_softc; 3124 int i, txr_cnt, rxr_cnt; 3125 3126 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3127 3128 if (info) { 3129 int off; 3130 3131 info->ifpi_status.status_func = igb_npoll_status; 3132 info->ifpi_status.serializer = &sc->main_serialize; 3133 3134 txr_cnt = igb_get_txring_inuse(sc, TRUE); 3135 off = sc->tx_npoll_off; 3136 for (i = 0; i < txr_cnt; ++i) { 3137 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3138 int idx = i + off; 3139 3140 KKASSERT(idx < ncpus2); 3141 info->ifpi_tx[idx].poll_func = igb_npoll_tx; 3142 info->ifpi_tx[idx].arg = txr; 3143 info->ifpi_tx[idx].serializer = &txr->tx_serialize; 3144 ifsq_set_cpuid(txr->ifsq, idx); 3145 } 3146 3147 rxr_cnt = igb_get_rxring_inuse(sc, TRUE); 3148 off = sc->rx_npoll_off; 3149 for (i = 0; i < rxr_cnt; ++i) { 3150 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3151 int idx = i + off; 3152 3153 KKASSERT(idx < ncpus2); 3154 info->ifpi_rx[idx].poll_func = igb_npoll_rx; 3155 info->ifpi_rx[idx].arg = rxr; 3156 info->ifpi_rx[idx].serializer = &rxr->rx_serialize; 3157 } 3158 3159 if (ifp->if_flags & IFF_RUNNING) { 3160 if (rxr_cnt == sc->rx_ring_inuse && 3161 txr_cnt == sc->tx_ring_inuse) { 3162 igb_set_timer_cpuid(sc, TRUE); 3163 igb_disable_intr(sc); 3164 } else { 3165 igb_init(sc); 3166 } 3167 } 3168 } else { 3169 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3170 struct igb_tx_ring *txr = &sc->tx_rings[i]; 3171 3172 ifsq_set_cpuid(txr->ifsq, txr->tx_intr_cpuid); 3173 } 3174 3175 if (ifp->if_flags & IFF_RUNNING) { 3176 txr_cnt = igb_get_txring_inuse(sc, FALSE); 3177 rxr_cnt = igb_get_rxring_inuse(sc, FALSE); 3178 3179 if (rxr_cnt == sc->rx_ring_inuse && 3180 txr_cnt == sc->tx_ring_inuse) { 3181 igb_set_timer_cpuid(sc, FALSE); 3182 igb_enable_intr(sc); 3183 } else { 3184 igb_init(sc); 3185 } 3186 } 3187 } 3188 } 3189 3190 #endif /* IFPOLL_ENABLE */ 3191 3192 static void 3193 igb_intr(void *xsc) 3194 { 3195 struct igb_softc *sc = xsc; 3196 struct ifnet *ifp = &sc->arpcom.ac_if; 3197 uint32_t eicr; 3198 3199 ASSERT_SERIALIZED(&sc->main_serialize); 3200 3201 eicr = E1000_READ_REG(&sc->hw, E1000_EICR); 3202 3203 if (eicr == 0) 3204 return; 3205 3206 if (ifp->if_flags & IFF_RUNNING) { 3207 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3208 int i; 3209 3210 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3211 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3212 3213 if (eicr & rxr->rx_intr_mask) { 3214 lwkt_serialize_enter(&rxr->rx_serialize); 3215 igb_rxeof(rxr, -1); 3216 lwkt_serialize_exit(&rxr->rx_serialize); 3217 } 3218 } 3219 3220 if (eicr & txr->tx_intr_mask) { 3221 lwkt_serialize_enter(&txr->tx_serialize); 3222 igb_txeof(txr); 3223 if (!ifsq_is_empty(txr->ifsq)) 3224 ifsq_devstart(txr->ifsq); 3225 lwkt_serialize_exit(&txr->tx_serialize); 3226 } 3227 } 3228 3229 if (eicr & E1000_EICR_OTHER) { 3230 uint32_t icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3231 3232 /* Link status change */ 3233 if (icr & E1000_ICR_LSC) { 3234 sc->hw.mac.get_link_status = 1; 3235 igb_update_link_status(sc); 3236 } 3237 } 3238 3239 /* 3240 * Reading EICR has the side effect to clear interrupt mask, 3241 * so all interrupts need to be enabled here. 3242 */ 3243 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->intr_mask); 3244 } 3245 3246 static void 3247 igb_intr_shared(void *xsc) 3248 { 3249 struct igb_softc *sc = xsc; 3250 struct ifnet *ifp = &sc->arpcom.ac_if; 3251 uint32_t reg_icr; 3252 3253 ASSERT_SERIALIZED(&sc->main_serialize); 3254 3255 reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR); 3256 3257 /* Hot eject? */ 3258 if (reg_icr == 0xffffffff) 3259 return; 3260 3261 /* Definitely not our interrupt. */ 3262 if (reg_icr == 0x0) 3263 return; 3264 3265 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) 3266 return; 3267 3268 if (ifp->if_flags & IFF_RUNNING) { 3269 if (reg_icr & 3270 (E1000_ICR_RXT0 | E1000_ICR_RXDMT0 | E1000_ICR_RXO)) { 3271 int i; 3272 3273 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3274 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 3275 3276 lwkt_serialize_enter(&rxr->rx_serialize); 3277 igb_rxeof(rxr, -1); 3278 lwkt_serialize_exit(&rxr->rx_serialize); 3279 } 3280 } 3281 3282 if (reg_icr & E1000_ICR_TXDW) { 3283 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3284 3285 lwkt_serialize_enter(&txr->tx_serialize); 3286 igb_txeof(txr); 3287 if (!ifsq_is_empty(txr->ifsq)) 3288 ifsq_devstart(txr->ifsq); 3289 lwkt_serialize_exit(&txr->tx_serialize); 3290 } 3291 } 3292 3293 /* Link status change */ 3294 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 3295 sc->hw.mac.get_link_status = 1; 3296 igb_update_link_status(sc); 3297 } 3298 3299 if (reg_icr & E1000_ICR_RXO) 3300 sc->rx_overruns++; 3301 } 3302 3303 static int 3304 igb_encap(struct igb_tx_ring *txr, struct mbuf **m_headp, 3305 int *segs_used, int *idx) 3306 { 3307 bus_dma_segment_t segs[IGB_MAX_SCATTER]; 3308 bus_dmamap_t map; 3309 struct igb_tx_buf *tx_buf, *tx_buf_mapped; 3310 union e1000_adv_tx_desc *txd = NULL; 3311 struct mbuf *m_head = *m_headp; 3312 uint32_t olinfo_status = 0, cmd_type_len = 0, cmd_rs = 0; 3313 int maxsegs, nsegs, i, j, error; 3314 uint32_t hdrlen = 0; 3315 3316 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3317 error = igb_tso_pullup(txr, m_headp); 3318 if (error) 3319 return error; 3320 m_head = *m_headp; 3321 } 3322 3323 /* Set basic descriptor constants */ 3324 cmd_type_len |= E1000_ADVTXD_DTYP_DATA; 3325 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT; 3326 if (m_head->m_flags & M_VLANTAG) 3327 cmd_type_len |= E1000_ADVTXD_DCMD_VLE; 3328 3329 /* 3330 * Map the packet for DMA. 3331 */ 3332 tx_buf = &txr->tx_buf[txr->next_avail_desc]; 3333 tx_buf_mapped = tx_buf; 3334 map = tx_buf->map; 3335 3336 maxsegs = txr->tx_avail - IGB_TX_RESERVED; 3337 KASSERT(maxsegs >= txr->spare_desc, ("not enough spare TX desc\n")); 3338 if (maxsegs > IGB_MAX_SCATTER) 3339 maxsegs = IGB_MAX_SCATTER; 3340 3341 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp, 3342 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT); 3343 if (error) { 3344 if (error == ENOBUFS) 3345 txr->sc->mbuf_defrag_failed++; 3346 else 3347 txr->sc->no_tx_dma_setup++; 3348 3349 m_freem(*m_headp); 3350 *m_headp = NULL; 3351 return error; 3352 } 3353 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE); 3354 3355 m_head = *m_headp; 3356 3357 /* 3358 * Set up the TX context descriptor, if any hardware offloading is 3359 * needed. This includes CSUM, VLAN, and TSO. It will consume one 3360 * TX descriptor. 3361 * 3362 * Unlike these chips' predecessors (em/emx), TX context descriptor 3363 * will _not_ interfere TX data fetching pipelining. 3364 */ 3365 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3366 igb_tso_ctx(txr, m_head, &hdrlen); 3367 cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3368 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3369 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3370 txr->tx_nsegs++; 3371 (*segs_used)++; 3372 } else if (igb_txcsum_ctx(txr, m_head)) { 3373 if (m_head->m_pkthdr.csum_flags & CSUM_IP) 3374 olinfo_status |= (E1000_TXD_POPTS_IXSM << 8); 3375 if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP)) 3376 olinfo_status |= (E1000_TXD_POPTS_TXSM << 8); 3377 txr->tx_nsegs++; 3378 (*segs_used)++; 3379 } 3380 3381 *segs_used += nsegs; 3382 txr->tx_nsegs += nsegs; 3383 if (txr->tx_nsegs >= txr->intr_nsegs) { 3384 /* 3385 * Report Status (RS) is turned on every intr_nsegs 3386 * descriptors (roughly). 3387 */ 3388 txr->tx_nsegs = 0; 3389 cmd_rs = E1000_ADVTXD_DCMD_RS; 3390 } 3391 3392 /* Calculate payload length */ 3393 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen) 3394 << E1000_ADVTXD_PAYLEN_SHIFT); 3395 3396 /* 82575 needs the queue index added */ 3397 if (txr->sc->hw.mac.type == e1000_82575) 3398 olinfo_status |= txr->me << 4; 3399 3400 /* Set up our transmit descriptors */ 3401 i = txr->next_avail_desc; 3402 for (j = 0; j < nsegs; j++) { 3403 bus_size_t seg_len; 3404 bus_addr_t seg_addr; 3405 3406 tx_buf = &txr->tx_buf[i]; 3407 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i]; 3408 seg_addr = segs[j].ds_addr; 3409 seg_len = segs[j].ds_len; 3410 3411 txd->read.buffer_addr = htole64(seg_addr); 3412 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len); 3413 txd->read.olinfo_status = htole32(olinfo_status); 3414 if (++i == txr->num_tx_desc) 3415 i = 0; 3416 tx_buf->m_head = NULL; 3417 } 3418 3419 KASSERT(txr->tx_avail > nsegs, ("invalid avail TX desc\n")); 3420 txr->next_avail_desc = i; 3421 txr->tx_avail -= nsegs; 3422 3423 tx_buf->m_head = m_head; 3424 tx_buf_mapped->map = tx_buf->map; 3425 tx_buf->map = map; 3426 3427 /* 3428 * Last Descriptor of Packet needs End Of Packet (EOP) 3429 */ 3430 txd->read.cmd_type_len |= htole32(E1000_ADVTXD_DCMD_EOP | cmd_rs); 3431 3432 /* 3433 * Defer TDT updating, until enough descrptors are setup 3434 */ 3435 *idx = i; 3436 #ifdef IGB_TSS_DEBUG 3437 ++txr->tx_packets; 3438 #endif 3439 3440 return 0; 3441 } 3442 3443 static void 3444 igb_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 3445 { 3446 struct igb_softc *sc = ifp->if_softc; 3447 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3448 struct mbuf *m_head; 3449 int idx = -1, nsegs = 0; 3450 3451 KKASSERT(txr->ifsq == ifsq); 3452 ASSERT_SERIALIZED(&txr->tx_serialize); 3453 3454 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 3455 return; 3456 3457 if (!sc->link_active || (txr->tx_flags & IGB_TXFLAG_ENABLED) == 0) { 3458 ifsq_purge(ifsq); 3459 return; 3460 } 3461 3462 if (!IGB_IS_NOT_OACTIVE(txr)) 3463 igb_txeof(txr); 3464 3465 while (!ifsq_is_empty(ifsq)) { 3466 if (IGB_IS_OACTIVE(txr)) { 3467 ifsq_set_oactive(ifsq); 3468 /* Set watchdog on */ 3469 txr->tx_watchdog.wd_timer = 5; 3470 break; 3471 } 3472 3473 m_head = ifsq_dequeue(ifsq, NULL); 3474 if (m_head == NULL) 3475 break; 3476 3477 if (igb_encap(txr, &m_head, &nsegs, &idx)) { 3478 IFNET_STAT_INC(ifp, oerrors, 1); 3479 continue; 3480 } 3481 3482 if (nsegs >= txr->wreg_nsegs) { 3483 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3484 idx = -1; 3485 nsegs = 0; 3486 } 3487 3488 /* Send a copy of the frame to the BPF listener */ 3489 ETHER_BPF_MTAP(ifp, m_head); 3490 } 3491 if (idx >= 0) 3492 E1000_WRITE_REG(&txr->sc->hw, E1000_TDT(txr->me), idx); 3493 } 3494 3495 static void 3496 igb_watchdog(struct ifaltq_subque *ifsq) 3497 { 3498 struct igb_tx_ring *txr = ifsq_get_priv(ifsq); 3499 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3500 struct igb_softc *sc = ifp->if_softc; 3501 int i; 3502 3503 KKASSERT(txr->ifsq == ifsq); 3504 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3505 3506 /* 3507 * If flow control has paused us since last checking 3508 * it invalidates the watchdog timing, so dont run it. 3509 */ 3510 if (sc->pause_frames) { 3511 sc->pause_frames = 0; 3512 txr->tx_watchdog.wd_timer = 5; 3513 return; 3514 } 3515 3516 if_printf(ifp, "Watchdog timeout -- resetting\n"); 3517 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, 3518 E1000_READ_REG(&sc->hw, E1000_TDH(txr->me)), 3519 E1000_READ_REG(&sc->hw, E1000_TDT(txr->me))); 3520 if_printf(ifp, "TX(%d) desc avail = %d, " 3521 "Next TX to Clean = %d\n", 3522 txr->me, txr->tx_avail, txr->next_to_clean); 3523 3524 IFNET_STAT_INC(ifp, oerrors, 1); 3525 sc->watchdog_events++; 3526 3527 igb_init(sc); 3528 for (i = 0; i < sc->tx_ring_inuse; ++i) 3529 ifsq_devstart_sched(sc->tx_rings[i].ifsq); 3530 } 3531 3532 static void 3533 igb_set_eitr(struct igb_softc *sc, int idx, int rate) 3534 { 3535 uint32_t eitr = 0; 3536 3537 if (rate > 0) { 3538 if (sc->hw.mac.type == e1000_82575) { 3539 eitr = 1000000000 / 256 / rate; 3540 /* 3541 * NOTE: 3542 * Document is wrong on the 2 bits left shift 3543 */ 3544 } else { 3545 eitr = 1000000 / rate; 3546 eitr <<= IGB_EITR_INTVL_SHIFT; 3547 } 3548 3549 if (eitr == 0) { 3550 /* Don't disable it */ 3551 eitr = 1 << IGB_EITR_INTVL_SHIFT; 3552 } else if (eitr > IGB_EITR_INTVL_MASK) { 3553 /* Don't allow it to be too large */ 3554 eitr = IGB_EITR_INTVL_MASK; 3555 } 3556 } 3557 if (sc->hw.mac.type == e1000_82575) 3558 eitr |= eitr << 16; 3559 else 3560 eitr |= E1000_EITR_CNT_IGNR; 3561 E1000_WRITE_REG(&sc->hw, E1000_EITR(idx), eitr); 3562 } 3563 3564 static int 3565 igb_sysctl_intr_rate(SYSCTL_HANDLER_ARGS) 3566 { 3567 struct igb_softc *sc = (void *)arg1; 3568 struct ifnet *ifp = &sc->arpcom.ac_if; 3569 int error, intr_rate; 3570 3571 intr_rate = sc->intr_rate; 3572 error = sysctl_handle_int(oidp, &intr_rate, 0, req); 3573 if (error || req->newptr == NULL) 3574 return error; 3575 if (intr_rate < 0) 3576 return EINVAL; 3577 3578 ifnet_serialize_all(ifp); 3579 3580 sc->intr_rate = intr_rate; 3581 if (ifp->if_flags & IFF_RUNNING) 3582 igb_set_eitr(sc, 0, sc->intr_rate); 3583 3584 if (bootverbose) 3585 if_printf(ifp, "interrupt rate set to %d/sec\n", sc->intr_rate); 3586 3587 ifnet_deserialize_all(ifp); 3588 3589 return 0; 3590 } 3591 3592 static int 3593 igb_sysctl_msix_rate(SYSCTL_HANDLER_ARGS) 3594 { 3595 struct igb_msix_data *msix = (void *)arg1; 3596 struct igb_softc *sc = msix->msix_sc; 3597 struct ifnet *ifp = &sc->arpcom.ac_if; 3598 int error, msix_rate; 3599 3600 msix_rate = msix->msix_rate; 3601 error = sysctl_handle_int(oidp, &msix_rate, 0, req); 3602 if (error || req->newptr == NULL) 3603 return error; 3604 if (msix_rate < 0) 3605 return EINVAL; 3606 3607 lwkt_serialize_enter(msix->msix_serialize); 3608 3609 msix->msix_rate = msix_rate; 3610 if (ifp->if_flags & IFF_RUNNING) 3611 igb_set_eitr(sc, msix->msix_vector, msix->msix_rate); 3612 3613 if (bootverbose) { 3614 if_printf(ifp, "%s set to %d/sec\n", msix->msix_rate_desc, 3615 msix->msix_rate); 3616 } 3617 3618 lwkt_serialize_exit(msix->msix_serialize); 3619 3620 return 0; 3621 } 3622 3623 static int 3624 igb_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS) 3625 { 3626 struct igb_softc *sc = (void *)arg1; 3627 struct ifnet *ifp = &sc->arpcom.ac_if; 3628 struct igb_tx_ring *txr = &sc->tx_rings[0]; 3629 int error, nsegs; 3630 3631 nsegs = txr->intr_nsegs; 3632 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3633 if (error || req->newptr == NULL) 3634 return error; 3635 if (nsegs <= 0) 3636 return EINVAL; 3637 3638 ifnet_serialize_all(ifp); 3639 3640 if (nsegs >= txr->num_tx_desc - txr->oact_lo_desc || 3641 nsegs >= txr->oact_hi_desc - IGB_MAX_SCATTER) { 3642 error = EINVAL; 3643 } else { 3644 int i; 3645 3646 error = 0; 3647 for (i = 0; i < sc->tx_ring_cnt; ++i) 3648 sc->tx_rings[i].intr_nsegs = nsegs; 3649 } 3650 3651 ifnet_deserialize_all(ifp); 3652 3653 return error; 3654 } 3655 3656 static int 3657 igb_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3658 { 3659 struct igb_softc *sc = (void *)arg1; 3660 struct ifnet *ifp = &sc->arpcom.ac_if; 3661 int error, nsegs, i; 3662 3663 nsegs = sc->rx_rings[0].wreg_nsegs; 3664 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3665 if (error || req->newptr == NULL) 3666 return error; 3667 3668 ifnet_serialize_all(ifp); 3669 for (i = 0; i < sc->rx_ring_cnt; ++i) 3670 sc->rx_rings[i].wreg_nsegs =nsegs; 3671 ifnet_deserialize_all(ifp); 3672 3673 return 0; 3674 } 3675 3676 static int 3677 igb_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 3678 { 3679 struct igb_softc *sc = (void *)arg1; 3680 struct ifnet *ifp = &sc->arpcom.ac_if; 3681 int error, nsegs, i; 3682 3683 nsegs = sc->tx_rings[0].wreg_nsegs; 3684 error = sysctl_handle_int(oidp, &nsegs, 0, req); 3685 if (error || req->newptr == NULL) 3686 return error; 3687 3688 ifnet_serialize_all(ifp); 3689 for (i = 0; i < sc->tx_ring_cnt; ++i) 3690 sc->tx_rings[i].wreg_nsegs =nsegs; 3691 ifnet_deserialize_all(ifp); 3692 3693 return 0; 3694 } 3695 3696 #ifdef IFPOLL_ENABLE 3697 3698 static int 3699 igb_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS) 3700 { 3701 struct igb_softc *sc = (void *)arg1; 3702 struct ifnet *ifp = &sc->arpcom.ac_if; 3703 int error, off; 3704 3705 off = sc->rx_npoll_off; 3706 error = sysctl_handle_int(oidp, &off, 0, req); 3707 if (error || req->newptr == NULL) 3708 return error; 3709 if (off < 0) 3710 return EINVAL; 3711 3712 ifnet_serialize_all(ifp); 3713 if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) { 3714 error = EINVAL; 3715 } else { 3716 error = 0; 3717 sc->rx_npoll_off = off; 3718 } 3719 ifnet_deserialize_all(ifp); 3720 3721 return error; 3722 } 3723 3724 static int 3725 igb_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS) 3726 { 3727 struct igb_softc *sc = (void *)arg1; 3728 struct ifnet *ifp = &sc->arpcom.ac_if; 3729 int error, off; 3730 3731 off = sc->tx_npoll_off; 3732 error = sysctl_handle_int(oidp, &off, 0, req); 3733 if (error || req->newptr == NULL) 3734 return error; 3735 if (off < 0) 3736 return EINVAL; 3737 3738 ifnet_serialize_all(ifp); 3739 if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) { 3740 error = EINVAL; 3741 } else { 3742 error = 0; 3743 sc->tx_npoll_off = off; 3744 } 3745 ifnet_deserialize_all(ifp); 3746 3747 return error; 3748 } 3749 3750 #endif /* IFPOLL_ENABLE */ 3751 3752 static void 3753 igb_init_intr(struct igb_softc *sc) 3754 { 3755 igb_set_intr_mask(sc); 3756 3757 if ((sc->flags & IGB_FLAG_SHARED_INTR) == 0) 3758 igb_init_unshared_intr(sc); 3759 3760 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3761 igb_set_eitr(sc, 0, sc->intr_rate); 3762 } else { 3763 int i; 3764 3765 for (i = 0; i < sc->msix_cnt; ++i) 3766 igb_set_eitr(sc, i, sc->msix_data[i].msix_rate); 3767 } 3768 } 3769 3770 static void 3771 igb_init_unshared_intr(struct igb_softc *sc) 3772 { 3773 struct e1000_hw *hw = &sc->hw; 3774 const struct igb_rx_ring *rxr; 3775 const struct igb_tx_ring *txr; 3776 uint32_t ivar, index; 3777 int i; 3778 3779 /* 3780 * Enable extended mode 3781 */ 3782 if (sc->hw.mac.type != e1000_82575) { 3783 uint32_t gpie; 3784 int ivar_max; 3785 3786 gpie = E1000_GPIE_NSICR; 3787 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3788 gpie |= E1000_GPIE_MSIX_MODE | 3789 E1000_GPIE_EIAME | 3790 E1000_GPIE_PBA; 3791 } 3792 E1000_WRITE_REG(hw, E1000_GPIE, gpie); 3793 3794 /* 3795 * Clear IVARs 3796 */ 3797 switch (sc->hw.mac.type) { 3798 case e1000_82576: 3799 ivar_max = IGB_MAX_IVAR_82576; 3800 break; 3801 3802 case e1000_82580: 3803 ivar_max = IGB_MAX_IVAR_82580; 3804 break; 3805 3806 case e1000_i350: 3807 ivar_max = IGB_MAX_IVAR_I350; 3808 break; 3809 3810 case e1000_vfadapt: 3811 case e1000_vfadapt_i350: 3812 ivar_max = IGB_MAX_IVAR_VF; 3813 break; 3814 3815 case e1000_i210: 3816 ivar_max = IGB_MAX_IVAR_I210; 3817 break; 3818 3819 case e1000_i211: 3820 ivar_max = IGB_MAX_IVAR_I211; 3821 break; 3822 3823 default: 3824 panic("unknown mac type %d\n", sc->hw.mac.type); 3825 } 3826 for (i = 0; i < ivar_max; ++i) 3827 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, 0); 3828 E1000_WRITE_REG(hw, E1000_IVAR_MISC, 0); 3829 } else { 3830 uint32_t tmp; 3831 3832 KASSERT(sc->intr_type != PCI_INTR_TYPE_MSIX, 3833 ("82575 w/ MSI-X")); 3834 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); 3835 tmp |= E1000_CTRL_EXT_IRCA; 3836 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); 3837 } 3838 3839 /* 3840 * Map TX/RX interrupts to EICR 3841 */ 3842 switch (sc->hw.mac.type) { 3843 case e1000_82580: 3844 case e1000_i350: 3845 case e1000_vfadapt: 3846 case e1000_vfadapt_i350: 3847 case e1000_i210: 3848 case e1000_i211: 3849 /* RX entries */ 3850 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3851 rxr = &sc->rx_rings[i]; 3852 3853 index = i >> 1; 3854 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3855 3856 if (i & 1) { 3857 ivar &= 0xff00ffff; 3858 ivar |= 3859 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3860 } else { 3861 ivar &= 0xffffff00; 3862 ivar |= 3863 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3864 } 3865 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3866 } 3867 /* TX entries */ 3868 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3869 txr = &sc->tx_rings[i]; 3870 3871 index = i >> 1; 3872 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3873 3874 if (i & 1) { 3875 ivar &= 0x00ffffff; 3876 ivar |= 3877 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3878 } else { 3879 ivar &= 0xffff00ff; 3880 ivar |= 3881 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3882 } 3883 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3884 } 3885 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3886 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3887 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3888 } 3889 break; 3890 3891 case e1000_82576: 3892 /* RX entries */ 3893 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3894 rxr = &sc->rx_rings[i]; 3895 3896 index = i & 0x7; /* Each IVAR has two entries */ 3897 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3898 3899 if (i < 8) { 3900 ivar &= 0xffffff00; 3901 ivar |= 3902 (rxr->rx_intr_bit | E1000_IVAR_VALID); 3903 } else { 3904 ivar &= 0xff00ffff; 3905 ivar |= 3906 (rxr->rx_intr_bit | E1000_IVAR_VALID) << 16; 3907 } 3908 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3909 } 3910 /* TX entries */ 3911 for (i = 0; i < sc->tx_ring_inuse; ++i) { 3912 txr = &sc->tx_rings[i]; 3913 3914 index = i & 0x7; /* Each IVAR has two entries */ 3915 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 3916 3917 if (i < 8) { 3918 ivar &= 0xffff00ff; 3919 ivar |= 3920 (txr->tx_intr_bit | E1000_IVAR_VALID) << 8; 3921 } else { 3922 ivar &= 0x00ffffff; 3923 ivar |= 3924 (txr->tx_intr_bit | E1000_IVAR_VALID) << 24; 3925 } 3926 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 3927 } 3928 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3929 ivar = (sc->sts_intr_bit | E1000_IVAR_VALID) << 8; 3930 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 3931 } 3932 break; 3933 3934 case e1000_82575: 3935 /* 3936 * Enable necessary interrupt bits. 3937 * 3938 * The name of the register is confusing; in addition to 3939 * configuring the first vector of MSI-X, it also configures 3940 * which bits of EICR could be set by the hardware even when 3941 * MSI or line interrupt is used; it thus controls interrupt 3942 * generation. It MUST be configured explicitly; the default 3943 * value mentioned in the datasheet is wrong: RX queue0 and 3944 * TX queue0 are NOT enabled by default. 3945 */ 3946 E1000_WRITE_REG(&sc->hw, E1000_MSIXBM(0), sc->intr_mask); 3947 break; 3948 3949 default: 3950 panic("unknown mac type %d\n", sc->hw.mac.type); 3951 } 3952 } 3953 3954 static int 3955 igb_setup_intr(struct igb_softc *sc) 3956 { 3957 int error, i; 3958 3959 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 3960 return igb_msix_setup(sc); 3961 3962 error = bus_setup_intr(sc->dev, sc->intr_res, INTR_MPSAFE, 3963 (sc->flags & IGB_FLAG_SHARED_INTR) ? igb_intr_shared : igb_intr, 3964 sc, &sc->intr_tag, &sc->main_serialize); 3965 if (error) { 3966 device_printf(sc->dev, "Failed to register interrupt handler"); 3967 return error; 3968 } 3969 3970 for (i = 0; i < sc->tx_ring_cnt; ++i) 3971 sc->tx_rings[i].tx_intr_cpuid = rman_get_cpuid(sc->intr_res); 3972 3973 return 0; 3974 } 3975 3976 static void 3977 igb_set_txintr_mask(struct igb_tx_ring *txr, int *intr_bit0, int intr_bitmax) 3978 { 3979 if (txr->sc->hw.mac.type == e1000_82575) { 3980 txr->tx_intr_bit = 0; /* unused */ 3981 switch (txr->me) { 3982 case 0: 3983 txr->tx_intr_mask = E1000_EICR_TX_QUEUE0; 3984 break; 3985 case 1: 3986 txr->tx_intr_mask = E1000_EICR_TX_QUEUE1; 3987 break; 3988 case 2: 3989 txr->tx_intr_mask = E1000_EICR_TX_QUEUE2; 3990 break; 3991 case 3: 3992 txr->tx_intr_mask = E1000_EICR_TX_QUEUE3; 3993 break; 3994 default: 3995 panic("unsupported # of TX ring, %d\n", txr->me); 3996 } 3997 } else { 3998 int intr_bit = *intr_bit0; 3999 4000 txr->tx_intr_bit = intr_bit % intr_bitmax; 4001 txr->tx_intr_mask = 1 << txr->tx_intr_bit; 4002 4003 *intr_bit0 = intr_bit + 1; 4004 } 4005 } 4006 4007 static void 4008 igb_set_rxintr_mask(struct igb_rx_ring *rxr, int *intr_bit0, int intr_bitmax) 4009 { 4010 if (rxr->sc->hw.mac.type == e1000_82575) { 4011 rxr->rx_intr_bit = 0; /* unused */ 4012 switch (rxr->me) { 4013 case 0: 4014 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE0; 4015 break; 4016 case 1: 4017 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE1; 4018 break; 4019 case 2: 4020 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE2; 4021 break; 4022 case 3: 4023 rxr->rx_intr_mask = E1000_EICR_RX_QUEUE3; 4024 break; 4025 default: 4026 panic("unsupported # of RX ring, %d\n", rxr->me); 4027 } 4028 } else { 4029 int intr_bit = *intr_bit0; 4030 4031 rxr->rx_intr_bit = intr_bit % intr_bitmax; 4032 rxr->rx_intr_mask = 1 << rxr->rx_intr_bit; 4033 4034 *intr_bit0 = intr_bit + 1; 4035 } 4036 } 4037 4038 static void 4039 igb_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4040 { 4041 struct igb_softc *sc = ifp->if_softc; 4042 4043 ifnet_serialize_array_enter(sc->serializes, sc->serialize_cnt, 4044 sc->tx_serialize, sc->rx_serialize, slz); 4045 } 4046 4047 static void 4048 igb_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4049 { 4050 struct igb_softc *sc = ifp->if_softc; 4051 4052 ifnet_serialize_array_exit(sc->serializes, sc->serialize_cnt, 4053 sc->tx_serialize, sc->rx_serialize, slz); 4054 } 4055 4056 static int 4057 igb_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4058 { 4059 struct igb_softc *sc = ifp->if_softc; 4060 4061 return ifnet_serialize_array_try(sc->serializes, sc->serialize_cnt, 4062 sc->tx_serialize, sc->rx_serialize, slz); 4063 } 4064 4065 #ifdef INVARIANTS 4066 4067 static void 4068 igb_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4069 boolean_t serialized) 4070 { 4071 struct igb_softc *sc = ifp->if_softc; 4072 4073 ifnet_serialize_array_assert(sc->serializes, sc->serialize_cnt, 4074 sc->tx_serialize, sc->rx_serialize, slz, serialized); 4075 } 4076 4077 #endif /* INVARIANTS */ 4078 4079 static void 4080 igb_set_intr_mask(struct igb_softc *sc) 4081 { 4082 int i; 4083 4084 sc->intr_mask = sc->sts_intr_mask; 4085 for (i = 0; i < sc->rx_ring_inuse; ++i) 4086 sc->intr_mask |= sc->rx_rings[i].rx_intr_mask; 4087 for (i = 0; i < sc->tx_ring_inuse; ++i) 4088 sc->intr_mask |= sc->tx_rings[i].tx_intr_mask; 4089 if (bootverbose) { 4090 if_printf(&sc->arpcom.ac_if, "intr mask 0x%08x\n", 4091 sc->intr_mask); 4092 } 4093 } 4094 4095 static int 4096 igb_alloc_intr(struct igb_softc *sc) 4097 { 4098 int i, intr_bit, intr_bitmax; 4099 u_int intr_flags; 4100 4101 igb_msix_try_alloc(sc); 4102 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 4103 goto done; 4104 4105 /* 4106 * Allocate MSI/legacy interrupt resource 4107 */ 4108 sc->intr_type = pci_alloc_1intr(sc->dev, igb_msi_enable, 4109 &sc->intr_rid, &intr_flags); 4110 4111 if (sc->intr_type == PCI_INTR_TYPE_LEGACY) { 4112 int unshared; 4113 4114 unshared = device_getenv_int(sc->dev, "irq.unshared", 0); 4115 if (!unshared) { 4116 sc->flags |= IGB_FLAG_SHARED_INTR; 4117 if (bootverbose) 4118 device_printf(sc->dev, "IRQ shared\n"); 4119 } else { 4120 intr_flags &= ~RF_SHAREABLE; 4121 if (bootverbose) 4122 device_printf(sc->dev, "IRQ unshared\n"); 4123 } 4124 } 4125 4126 sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4127 &sc->intr_rid, intr_flags); 4128 if (sc->intr_res == NULL) { 4129 device_printf(sc->dev, "Unable to allocate bus resource: " 4130 "interrupt\n"); 4131 return ENXIO; 4132 } 4133 4134 /* 4135 * Setup MSI/legacy interrupt mask 4136 */ 4137 switch (sc->hw.mac.type) { 4138 case e1000_82575: 4139 intr_bitmax = IGB_MAX_TXRXINT_82575; 4140 break; 4141 4142 case e1000_82576: 4143 intr_bitmax = IGB_MAX_TXRXINT_82576; 4144 break; 4145 4146 case e1000_82580: 4147 intr_bitmax = IGB_MAX_TXRXINT_82580; 4148 break; 4149 4150 case e1000_i350: 4151 intr_bitmax = IGB_MAX_TXRXINT_I350; 4152 break; 4153 4154 case e1000_i210: 4155 intr_bitmax = IGB_MAX_TXRXINT_I210; 4156 break; 4157 4158 case e1000_i211: 4159 intr_bitmax = IGB_MAX_TXRXINT_I211; 4160 break; 4161 4162 default: 4163 intr_bitmax = IGB_MIN_TXRXINT; 4164 break; 4165 } 4166 intr_bit = 0; 4167 for (i = 0; i < sc->tx_ring_cnt; ++i) 4168 igb_set_txintr_mask(&sc->tx_rings[i], &intr_bit, intr_bitmax); 4169 for (i = 0; i < sc->rx_ring_cnt; ++i) 4170 igb_set_rxintr_mask(&sc->rx_rings[i], &intr_bit, intr_bitmax); 4171 sc->sts_intr_bit = 0; 4172 sc->sts_intr_mask = E1000_EICR_OTHER; 4173 4174 /* Initialize interrupt rate */ 4175 sc->intr_rate = IGB_INTR_RATE; 4176 done: 4177 igb_set_ring_inuse(sc, FALSE); 4178 igb_set_intr_mask(sc); 4179 return 0; 4180 } 4181 4182 static void 4183 igb_free_intr(struct igb_softc *sc) 4184 { 4185 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 4186 if (sc->intr_res != NULL) { 4187 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intr_rid, 4188 sc->intr_res); 4189 } 4190 if (sc->intr_type == PCI_INTR_TYPE_MSI) 4191 pci_release_msi(sc->dev); 4192 } else { 4193 igb_msix_free(sc, TRUE); 4194 } 4195 } 4196 4197 static void 4198 igb_teardown_intr(struct igb_softc *sc) 4199 { 4200 if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4201 bus_teardown_intr(sc->dev, sc->intr_res, sc->intr_tag); 4202 else 4203 igb_msix_teardown(sc, sc->msix_cnt); 4204 } 4205 4206 static void 4207 igb_msix_try_alloc(struct igb_softc *sc) 4208 { 4209 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt; 4210 int i, x, error; 4211 int offset, offset_def; 4212 struct igb_msix_data *msix; 4213 boolean_t aggregate, setup = FALSE; 4214 4215 /* 4216 * Don't enable MSI-X on 82575, see: 4217 * 82575 specification update errata #25 4218 */ 4219 if (sc->hw.mac.type == e1000_82575) 4220 return; 4221 4222 /* Don't enable MSI-X on VF */ 4223 if (sc->vf_ifp) 4224 return; 4225 4226 msix_enable = device_getenv_int(sc->dev, "msix.enable", 4227 igb_msix_enable); 4228 if (!msix_enable) 4229 return; 4230 4231 msix_cnt = pci_msix_count(sc->dev); 4232 #ifdef IGB_MSIX_DEBUG 4233 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt); 4234 #endif 4235 if (msix_cnt <= 1) { 4236 /* One MSI-X model does not make sense */ 4237 return; 4238 } 4239 4240 i = 0; 4241 while ((1 << (i + 1)) <= msix_cnt) 4242 ++i; 4243 msix_cnt2 = 1 << i; 4244 4245 if (bootverbose) { 4246 device_printf(sc->dev, "MSI-X count %d/%d\n", 4247 msix_cnt2, msix_cnt); 4248 } 4249 4250 KKASSERT(msix_cnt2 <= msix_cnt); 4251 if (msix_cnt == msix_cnt2) { 4252 /* We need at least one MSI-X for link status */ 4253 msix_cnt2 >>= 1; 4254 if (msix_cnt2 <= 1) { 4255 /* One MSI-X for RX/TX does not make sense */ 4256 device_printf(sc->dev, "not enough MSI-X for TX/RX, " 4257 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt); 4258 return; 4259 } 4260 KKASSERT(msix_cnt > msix_cnt2); 4261 4262 if (bootverbose) { 4263 device_printf(sc->dev, "MSI-X count fixup %d/%d\n", 4264 msix_cnt2, msix_cnt); 4265 } 4266 } 4267 4268 sc->rx_ring_msix = sc->rx_ring_cnt; 4269 if (sc->rx_ring_msix > msix_cnt2) 4270 sc->rx_ring_msix = msix_cnt2; 4271 4272 sc->tx_ring_msix = sc->tx_ring_cnt; 4273 if (sc->tx_ring_msix > msix_cnt2) 4274 sc->tx_ring_msix = msix_cnt2; 4275 4276 if (msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) { 4277 /* 4278 * Independent TX/RX MSI-X 4279 */ 4280 aggregate = FALSE; 4281 if (bootverbose) 4282 device_printf(sc->dev, "independent TX/RX MSI-X\n"); 4283 alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix; 4284 } else { 4285 /* 4286 * Aggregate TX/RX MSI-X 4287 */ 4288 aggregate = TRUE; 4289 if (bootverbose) 4290 device_printf(sc->dev, "aggregate TX/RX MSI-X\n"); 4291 alloc_cnt = msix_cnt2; 4292 if (alloc_cnt > ncpus2) 4293 alloc_cnt = ncpus2; 4294 if (sc->rx_ring_msix > alloc_cnt) 4295 sc->rx_ring_msix = alloc_cnt; 4296 if (sc->tx_ring_msix > alloc_cnt) 4297 sc->tx_ring_msix = alloc_cnt; 4298 } 4299 ++alloc_cnt; /* For link status */ 4300 4301 if (bootverbose) { 4302 device_printf(sc->dev, "MSI-X alloc %d, " 4303 "RX ring %d, TX ring %d\n", alloc_cnt, 4304 sc->rx_ring_msix, sc->tx_ring_msix); 4305 } 4306 4307 sc->msix_mem_rid = PCIR_BAR(IGB_MSIX_BAR); 4308 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4309 &sc->msix_mem_rid, RF_ACTIVE); 4310 if (sc->msix_mem_res == NULL) { 4311 device_printf(sc->dev, "Unable to map MSI-X table\n"); 4312 return; 4313 } 4314 4315 sc->msix_cnt = alloc_cnt; 4316 sc->msix_data = kmalloc_cachealign( 4317 sizeof(struct igb_msix_data) * sc->msix_cnt, 4318 M_DEVBUF, M_WAITOK | M_ZERO); 4319 for (x = 0; x < sc->msix_cnt; ++x) { 4320 msix = &sc->msix_data[x]; 4321 4322 lwkt_serialize_init(&msix->msix_serialize0); 4323 msix->msix_sc = sc; 4324 msix->msix_rid = -1; 4325 msix->msix_vector = x; 4326 msix->msix_mask = 1 << msix->msix_vector; 4327 msix->msix_rate = IGB_INTR_RATE; 4328 } 4329 4330 x = 0; 4331 if (!aggregate) { 4332 /* 4333 * RX rings 4334 */ 4335 if (sc->rx_ring_msix == ncpus2) { 4336 offset = 0; 4337 } else { 4338 offset_def = (sc->rx_ring_msix * 4339 device_get_unit(sc->dev)) % ncpus2; 4340 4341 offset = device_getenv_int(sc->dev, 4342 "msix.rxoff", offset_def); 4343 if (offset >= ncpus2 || 4344 offset % sc->rx_ring_msix != 0) { 4345 device_printf(sc->dev, 4346 "invalid msix.rxoff %d, use %d\n", 4347 offset, offset_def); 4348 offset = offset_def; 4349 } 4350 } 4351 igb_msix_rx_conf(sc, 0, &x, offset); 4352 4353 /* 4354 * TX rings 4355 */ 4356 if (sc->tx_ring_msix == ncpus2) { 4357 offset = 0; 4358 } else { 4359 offset_def = (sc->tx_ring_msix * 4360 device_get_unit(sc->dev)) % ncpus2; 4361 4362 offset = device_getenv_int(sc->dev, 4363 "msix.txoff", offset_def); 4364 if (offset >= ncpus2 || 4365 offset % sc->tx_ring_msix != 0) { 4366 device_printf(sc->dev, 4367 "invalid msix.txoff %d, use %d\n", 4368 offset, offset_def); 4369 offset = offset_def; 4370 } 4371 } 4372 igb_msix_tx_conf(sc, 0, &x, offset); 4373 } else { 4374 int ring_agg, ring_max; 4375 4376 ring_agg = sc->rx_ring_msix; 4377 if (ring_agg > sc->tx_ring_msix) 4378 ring_agg = sc->tx_ring_msix; 4379 4380 ring_max = sc->rx_ring_msix; 4381 if (ring_max < sc->tx_ring_msix) 4382 ring_max = sc->tx_ring_msix; 4383 4384 if (ring_max == ncpus2) { 4385 offset = 0; 4386 } else { 4387 offset_def = (ring_max * device_get_unit(sc->dev)) % 4388 ncpus2; 4389 4390 offset = device_getenv_int(sc->dev, "msix.off", 4391 offset_def); 4392 if (offset >= ncpus2 || offset % ring_max != 0) { 4393 device_printf(sc->dev, 4394 "invalid msix.off %d, use %d\n", 4395 offset, offset_def); 4396 offset = offset_def; 4397 } 4398 } 4399 4400 for (i = 0; i < ring_agg; ++i) { 4401 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4402 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4403 4404 KKASSERT(x < sc->msix_cnt); 4405 msix = &sc->msix_data[x++]; 4406 4407 txr->tx_intr_bit = msix->msix_vector; 4408 txr->tx_intr_mask = msix->msix_mask; 4409 rxr->rx_intr_bit = msix->msix_vector; 4410 rxr->rx_intr_mask = msix->msix_mask; 4411 4412 msix->msix_serialize = &msix->msix_serialize0; 4413 msix->msix_func = igb_msix_rxtx; 4414 msix->msix_arg = msix; 4415 msix->msix_rx = rxr; 4416 msix->msix_tx = txr; 4417 4418 msix->msix_cpuid = i + offset; 4419 KKASSERT(msix->msix_cpuid < ncpus2); 4420 txr->tx_intr_cpuid = msix->msix_cpuid; 4421 4422 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), 4423 "%s rxtx%d", device_get_nameunit(sc->dev), i); 4424 msix->msix_rate = IGB_MSIX_RX_RATE; 4425 ksnprintf(msix->msix_rate_desc, 4426 sizeof(msix->msix_rate_desc), 4427 "RXTX%d interrupt rate", i); 4428 } 4429 4430 if (ring_agg != ring_max) { 4431 if (ring_max == sc->tx_ring_msix) 4432 igb_msix_tx_conf(sc, i, &x, offset); 4433 else 4434 igb_msix_rx_conf(sc, i, &x, offset); 4435 } 4436 } 4437 4438 /* 4439 * Link status 4440 */ 4441 KKASSERT(x < sc->msix_cnt); 4442 msix = &sc->msix_data[x++]; 4443 sc->sts_intr_bit = msix->msix_vector; 4444 sc->sts_intr_mask = msix->msix_mask; 4445 4446 msix->msix_serialize = &sc->main_serialize; 4447 msix->msix_func = igb_msix_status; 4448 msix->msix_arg = sc; 4449 msix->msix_cpuid = 0; 4450 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s sts", 4451 device_get_nameunit(sc->dev)); 4452 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4453 "status interrupt rate"); 4454 4455 KKASSERT(x == sc->msix_cnt); 4456 4457 error = pci_setup_msix(sc->dev); 4458 if (error) { 4459 device_printf(sc->dev, "Setup MSI-X failed\n"); 4460 goto back; 4461 } 4462 setup = TRUE; 4463 4464 for (i = 0; i < sc->msix_cnt; ++i) { 4465 msix = &sc->msix_data[i]; 4466 4467 error = pci_alloc_msix_vector(sc->dev, msix->msix_vector, 4468 &msix->msix_rid, msix->msix_cpuid); 4469 if (error) { 4470 device_printf(sc->dev, 4471 "Unable to allocate MSI-X %d on cpu%d\n", 4472 msix->msix_vector, msix->msix_cpuid); 4473 goto back; 4474 } 4475 4476 msix->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4477 &msix->msix_rid, RF_ACTIVE); 4478 if (msix->msix_res == NULL) { 4479 device_printf(sc->dev, 4480 "Unable to allocate MSI-X %d resource\n", 4481 msix->msix_vector); 4482 error = ENOMEM; 4483 goto back; 4484 } 4485 } 4486 4487 pci_enable_msix(sc->dev); 4488 sc->intr_type = PCI_INTR_TYPE_MSIX; 4489 back: 4490 if (error) 4491 igb_msix_free(sc, setup); 4492 } 4493 4494 static void 4495 igb_msix_free(struct igb_softc *sc, boolean_t setup) 4496 { 4497 int i; 4498 4499 KKASSERT(sc->msix_cnt > 1); 4500 4501 for (i = 0; i < sc->msix_cnt; ++i) { 4502 struct igb_msix_data *msix = &sc->msix_data[i]; 4503 4504 if (msix->msix_res != NULL) { 4505 bus_release_resource(sc->dev, SYS_RES_IRQ, 4506 msix->msix_rid, msix->msix_res); 4507 } 4508 if (msix->msix_rid >= 0) 4509 pci_release_msix_vector(sc->dev, msix->msix_rid); 4510 } 4511 if (setup) 4512 pci_teardown_msix(sc->dev); 4513 4514 sc->msix_cnt = 0; 4515 kfree(sc->msix_data, M_DEVBUF); 4516 sc->msix_data = NULL; 4517 } 4518 4519 static int 4520 igb_msix_setup(struct igb_softc *sc) 4521 { 4522 int i; 4523 4524 for (i = 0; i < sc->msix_cnt; ++i) { 4525 struct igb_msix_data *msix = &sc->msix_data[i]; 4526 int error; 4527 4528 error = bus_setup_intr_descr(sc->dev, msix->msix_res, 4529 INTR_MPSAFE, msix->msix_func, msix->msix_arg, 4530 &msix->msix_handle, msix->msix_serialize, msix->msix_desc); 4531 if (error) { 4532 device_printf(sc->dev, "could not set up %s " 4533 "interrupt handler.\n", msix->msix_desc); 4534 igb_msix_teardown(sc, i); 4535 return error; 4536 } 4537 } 4538 return 0; 4539 } 4540 4541 static void 4542 igb_msix_teardown(struct igb_softc *sc, int msix_cnt) 4543 { 4544 int i; 4545 4546 for (i = 0; i < msix_cnt; ++i) { 4547 struct igb_msix_data *msix = &sc->msix_data[i]; 4548 4549 bus_teardown_intr(sc->dev, msix->msix_res, msix->msix_handle); 4550 } 4551 } 4552 4553 static void 4554 igb_msix_rx(void *arg) 4555 { 4556 struct igb_rx_ring *rxr = arg; 4557 4558 ASSERT_SERIALIZED(&rxr->rx_serialize); 4559 igb_rxeof(rxr, -1); 4560 4561 E1000_WRITE_REG(&rxr->sc->hw, E1000_EIMS, rxr->rx_intr_mask); 4562 } 4563 4564 static void 4565 igb_msix_tx(void *arg) 4566 { 4567 struct igb_tx_ring *txr = arg; 4568 4569 ASSERT_SERIALIZED(&txr->tx_serialize); 4570 4571 igb_txeof(txr); 4572 if (!ifsq_is_empty(txr->ifsq)) 4573 ifsq_devstart(txr->ifsq); 4574 4575 E1000_WRITE_REG(&txr->sc->hw, E1000_EIMS, txr->tx_intr_mask); 4576 } 4577 4578 static void 4579 igb_msix_status(void *arg) 4580 { 4581 struct igb_softc *sc = arg; 4582 uint32_t icr; 4583 4584 ASSERT_SERIALIZED(&sc->main_serialize); 4585 4586 icr = E1000_READ_REG(&sc->hw, E1000_ICR); 4587 if (icr & E1000_ICR_LSC) { 4588 sc->hw.mac.get_link_status = 1; 4589 igb_update_link_status(sc); 4590 } 4591 4592 E1000_WRITE_REG(&sc->hw, E1000_EIMS, sc->sts_intr_mask); 4593 } 4594 4595 static void 4596 igb_set_ring_inuse(struct igb_softc *sc, boolean_t polling) 4597 { 4598 sc->rx_ring_inuse = igb_get_rxring_inuse(sc, polling); 4599 sc->tx_ring_inuse = igb_get_txring_inuse(sc, polling); 4600 if (bootverbose) { 4601 if_printf(&sc->arpcom.ac_if, "RX rings %d/%d, TX rings %d/%d\n", 4602 sc->rx_ring_inuse, sc->rx_ring_cnt, 4603 sc->tx_ring_inuse, sc->tx_ring_cnt); 4604 } 4605 } 4606 4607 static int 4608 igb_get_rxring_inuse(const struct igb_softc *sc, boolean_t polling) 4609 { 4610 if (!IGB_ENABLE_HWRSS(sc)) 4611 return 1; 4612 4613 if (polling) 4614 return sc->rx_ring_cnt; 4615 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4616 return IGB_MIN_RING_RSS; 4617 else 4618 return sc->rx_ring_msix; 4619 } 4620 4621 static int 4622 igb_get_txring_inuse(const struct igb_softc *sc, boolean_t polling) 4623 { 4624 if (!IGB_ENABLE_HWTSS(sc)) 4625 return 1; 4626 4627 if (polling) 4628 return sc->tx_ring_cnt; 4629 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 4630 return IGB_MIN_RING; 4631 else 4632 return sc->tx_ring_msix; 4633 } 4634 4635 static int 4636 igb_tso_pullup(struct igb_tx_ring *txr, struct mbuf **mp) 4637 { 4638 int hoff, iphlen, thoff; 4639 struct mbuf *m; 4640 4641 m = *mp; 4642 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 4643 4644 iphlen = m->m_pkthdr.csum_iphlen; 4645 thoff = m->m_pkthdr.csum_thlen; 4646 hoff = m->m_pkthdr.csum_lhlen; 4647 4648 KASSERT(iphlen > 0, ("invalid ip hlen")); 4649 KASSERT(thoff > 0, ("invalid tcp hlen")); 4650 KASSERT(hoff > 0, ("invalid ether hlen")); 4651 4652 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 4653 m = m_pullup(m, hoff + iphlen + thoff); 4654 if (m == NULL) { 4655 *mp = NULL; 4656 return ENOBUFS; 4657 } 4658 *mp = m; 4659 } 4660 if (txr->tx_flags & IGB_TXFLAG_TSO_IPLEN0) { 4661 struct ip *ip; 4662 4663 ip = mtodoff(m, struct ip *, hoff); 4664 ip->ip_len = 0; 4665 } 4666 4667 return 0; 4668 } 4669 4670 static void 4671 igb_tso_ctx(struct igb_tx_ring *txr, struct mbuf *m, uint32_t *hlen) 4672 { 4673 struct e1000_adv_tx_context_desc *TXD; 4674 uint32_t vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx; 4675 int hoff, ctxd, iphlen, thoff; 4676 4677 iphlen = m->m_pkthdr.csum_iphlen; 4678 thoff = m->m_pkthdr.csum_thlen; 4679 hoff = m->m_pkthdr.csum_lhlen; 4680 4681 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0; 4682 4683 ctxd = txr->next_avail_desc; 4684 TXD = (struct e1000_adv_tx_context_desc *)&txr->tx_base[ctxd]; 4685 4686 if (m->m_flags & M_VLANTAG) { 4687 uint16_t vlantag; 4688 4689 vlantag = htole16(m->m_pkthdr.ether_vlantag); 4690 vlan_macip_lens |= (vlantag << E1000_ADVTXD_VLAN_SHIFT); 4691 } 4692 4693 vlan_macip_lens |= (hoff << E1000_ADVTXD_MACLEN_SHIFT); 4694 vlan_macip_lens |= iphlen; 4695 4696 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 4697 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 4698 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 4699 4700 mss_l4len_idx |= (m->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); 4701 mss_l4len_idx |= (thoff << E1000_ADVTXD_L4LEN_SHIFT); 4702 /* 82575 needs the queue index added */ 4703 if (txr->sc->hw.mac.type == e1000_82575) 4704 mss_l4len_idx |= txr->me << 4; 4705 4706 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 4707 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 4708 TXD->seqnum_seed = htole32(0); 4709 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 4710 4711 /* We've consumed the first desc, adjust counters */ 4712 if (++ctxd == txr->num_tx_desc) 4713 ctxd = 0; 4714 txr->next_avail_desc = ctxd; 4715 --txr->tx_avail; 4716 4717 *hlen = hoff + iphlen + thoff; 4718 } 4719 4720 static void 4721 igb_setup_serializer(struct igb_softc *sc) 4722 { 4723 const struct igb_msix_data *msix; 4724 int i, j; 4725 4726 /* 4727 * Allocate serializer array 4728 */ 4729 4730 /* Main + TX + RX */ 4731 sc->serialize_cnt = 1 + sc->tx_ring_cnt + sc->rx_ring_cnt; 4732 4733 /* Aggregate TX/RX MSI-X */ 4734 for (i = 0; i < sc->msix_cnt; ++i) { 4735 msix = &sc->msix_data[i]; 4736 if (msix->msix_serialize == &msix->msix_serialize0) 4737 sc->serialize_cnt++; 4738 } 4739 4740 sc->serializes = 4741 kmalloc(sc->serialize_cnt * sizeof(struct lwkt_serialize *), 4742 M_DEVBUF, M_WAITOK | M_ZERO); 4743 4744 /* 4745 * Setup serializers 4746 * 4747 * NOTE: Order is critical 4748 */ 4749 4750 i = 0; 4751 KKASSERT(i < sc->serialize_cnt); 4752 sc->serializes[i++] = &sc->main_serialize; 4753 4754 for (j = 0; j < sc->msix_cnt; ++j) { 4755 msix = &sc->msix_data[j]; 4756 if (msix->msix_serialize == &msix->msix_serialize0) { 4757 KKASSERT(i < sc->serialize_cnt); 4758 sc->serializes[i++] = msix->msix_serialize; 4759 } 4760 } 4761 4762 sc->tx_serialize = i; 4763 for (j = 0; j < sc->tx_ring_cnt; ++j) { 4764 KKASSERT(i < sc->serialize_cnt); 4765 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize; 4766 } 4767 4768 sc->rx_serialize = i; 4769 for (j = 0; j < sc->rx_ring_cnt; ++j) { 4770 KKASSERT(i < sc->serialize_cnt); 4771 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize; 4772 } 4773 4774 KKASSERT(i == sc->serialize_cnt); 4775 } 4776 4777 static void 4778 igb_msix_rx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4779 { 4780 int x = *x0; 4781 4782 for (; i < sc->rx_ring_msix; ++i) { 4783 struct igb_rx_ring *rxr = &sc->rx_rings[i]; 4784 struct igb_msix_data *msix; 4785 4786 KKASSERT(x < sc->msix_cnt); 4787 msix = &sc->msix_data[x++]; 4788 4789 rxr->rx_intr_bit = msix->msix_vector; 4790 rxr->rx_intr_mask = msix->msix_mask; 4791 4792 msix->msix_serialize = &rxr->rx_serialize; 4793 msix->msix_func = igb_msix_rx; 4794 msix->msix_arg = rxr; 4795 4796 msix->msix_cpuid = i + offset; 4797 KKASSERT(msix->msix_cpuid < ncpus2); 4798 4799 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s rx%d", 4800 device_get_nameunit(sc->dev), i); 4801 4802 msix->msix_rate = IGB_MSIX_RX_RATE; 4803 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4804 "RX%d interrupt rate", i); 4805 } 4806 *x0 = x; 4807 } 4808 4809 static void 4810 igb_msix_tx_conf(struct igb_softc *sc, int i, int *x0, int offset) 4811 { 4812 int x = *x0; 4813 4814 for (; i < sc->tx_ring_msix; ++i) { 4815 struct igb_tx_ring *txr = &sc->tx_rings[i]; 4816 struct igb_msix_data *msix; 4817 4818 KKASSERT(x < sc->msix_cnt); 4819 msix = &sc->msix_data[x++]; 4820 4821 txr->tx_intr_bit = msix->msix_vector; 4822 txr->tx_intr_mask = msix->msix_mask; 4823 4824 msix->msix_serialize = &txr->tx_serialize; 4825 msix->msix_func = igb_msix_tx; 4826 msix->msix_arg = txr; 4827 4828 msix->msix_cpuid = i + offset; 4829 KKASSERT(msix->msix_cpuid < ncpus2); 4830 txr->tx_intr_cpuid = msix->msix_cpuid; 4831 4832 ksnprintf(msix->msix_desc, sizeof(msix->msix_desc), "%s tx%d", 4833 device_get_nameunit(sc->dev), i); 4834 4835 msix->msix_rate = IGB_MSIX_TX_RATE; 4836 ksnprintf(msix->msix_rate_desc, sizeof(msix->msix_rate_desc), 4837 "TX%d interrupt rate", i); 4838 } 4839 *x0 = x; 4840 } 4841 4842 static void 4843 igb_msix_rxtx(void *arg) 4844 { 4845 struct igb_msix_data *msix = arg; 4846 struct igb_rx_ring *rxr = msix->msix_rx; 4847 struct igb_tx_ring *txr = msix->msix_tx; 4848 4849 ASSERT_SERIALIZED(&msix->msix_serialize0); 4850 4851 lwkt_serialize_enter(&rxr->rx_serialize); 4852 igb_rxeof(rxr, -1); 4853 lwkt_serialize_exit(&rxr->rx_serialize); 4854 4855 lwkt_serialize_enter(&txr->tx_serialize); 4856 igb_txeof(txr); 4857 if (!ifsq_is_empty(txr->ifsq)) 4858 ifsq_devstart(txr->ifsq); 4859 lwkt_serialize_exit(&txr->tx_serialize); 4860 4861 E1000_WRITE_REG(&msix->msix_sc->hw, E1000_EIMS, msix->msix_mask); 4862 } 4863 4864 static void 4865 igb_set_timer_cpuid(struct igb_softc *sc, boolean_t polling) 4866 { 4867 if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX) 4868 sc->timer_cpuid = 0; /* XXX fixed */ 4869 else 4870 sc->timer_cpuid = rman_get_cpuid(sc->intr_res); 4871 } 4872