1 /* 2 * Copyright (c) 2001-2013, Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * 3. Neither the name of the Intel Corporation nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "opt_ifpoll.h" 33 #include "opt_ix.h" 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/endian.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/proc.h> 43 #include <sys/rman.h> 44 #include <sys/serialize.h> 45 #include <sys/serialize2.h> 46 #include <sys/socket.h> 47 #include <sys/sockio.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/bpf.h> 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_arp.h> 55 #include <net/if_dl.h> 56 #include <net/if_media.h> 57 #include <net/ifq_var.h> 58 #include <net/toeplitz.h> 59 #include <net/toeplitz2.h> 60 #include <net/vlan/if_vlan_var.h> 61 #include <net/vlan/if_vlan_ether.h> 62 #include <net/if_poll.h> 63 64 #include <netinet/in_systm.h> 65 #include <netinet/in.h> 66 #include <netinet/ip.h> 67 68 #include <bus/pci/pcivar.h> 69 #include <bus/pci/pcireg.h> 70 71 #include <dev/netif/ix/ixgbe_api.h> 72 #include <dev/netif/ix/if_ix.h> 73 74 #ifdef IX_RSS_DEBUG 75 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \ 76 do { \ 77 if (sc->rss_debug >= lvl) \ 78 if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \ 79 } while (0) 80 #else /* !IX_RSS_DEBUG */ 81 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) ((void)0) 82 #endif /* IX_RSS_DEBUG */ 83 84 #define IX_NAME "Intel(R) PRO/10GbE " 85 #define IX_DEVICE(id) \ 86 { IXGBE_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id } 87 #define IX_DEVICE_NULL { 0, 0, NULL } 88 89 static struct ix_device { 90 uint16_t vid; 91 uint16_t did; 92 const char *desc; 93 } ix_devices[] = { 94 IX_DEVICE(82598AF_DUAL_PORT), 95 IX_DEVICE(82598AF_SINGLE_PORT), 96 IX_DEVICE(82598EB_CX4), 97 IX_DEVICE(82598AT), 98 IX_DEVICE(82598AT2), 99 IX_DEVICE(82598), 100 IX_DEVICE(82598_DA_DUAL_PORT), 101 IX_DEVICE(82598_CX4_DUAL_PORT), 102 IX_DEVICE(82598EB_XF_LR), 103 IX_DEVICE(82598_SR_DUAL_PORT_EM), 104 IX_DEVICE(82598EB_SFP_LOM), 105 IX_DEVICE(82599_KX4), 106 IX_DEVICE(82599_KX4_MEZZ), 107 IX_DEVICE(82599_SFP), 108 IX_DEVICE(82599_XAUI_LOM), 109 IX_DEVICE(82599_CX4), 110 IX_DEVICE(82599_T3_LOM), 111 IX_DEVICE(82599_COMBO_BACKPLANE), 112 IX_DEVICE(82599_BACKPLANE_FCOE), 113 IX_DEVICE(82599_SFP_SF2), 114 IX_DEVICE(82599_SFP_FCOE), 115 IX_DEVICE(82599EN_SFP), 116 IX_DEVICE(82599_SFP_SF_QP), 117 IX_DEVICE(X540T), 118 119 /* required last entry */ 120 IX_DEVICE_NULL 121 }; 122 123 static int ix_probe(device_t); 124 static int ix_attach(device_t); 125 static int ix_detach(device_t); 126 static int ix_shutdown(device_t); 127 128 static void ix_serialize(struct ifnet *, enum ifnet_serialize); 129 static void ix_deserialize(struct ifnet *, enum ifnet_serialize); 130 static int ix_tryserialize(struct ifnet *, enum ifnet_serialize); 131 #ifdef INVARIANTS 132 static void ix_serialize_assert(struct ifnet *, enum ifnet_serialize, 133 boolean_t); 134 #endif 135 static void ix_start(struct ifnet *, struct ifaltq_subque *); 136 static void ix_watchdog(struct ifaltq_subque *); 137 static int ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 138 static void ix_init(void *); 139 static void ix_stop(struct ix_softc *); 140 static void ix_media_status(struct ifnet *, struct ifmediareq *); 141 static int ix_media_change(struct ifnet *); 142 static void ix_timer(void *); 143 #ifdef IFPOLL_ENABLE 144 static void ix_npoll(struct ifnet *, struct ifpoll_info *); 145 static void ix_npoll_rx(struct ifnet *, void *, int); 146 static void ix_npoll_tx(struct ifnet *, void *, int); 147 static void ix_npoll_status(struct ifnet *); 148 #endif 149 150 static void ix_add_sysctl(struct ix_softc *); 151 static void ix_add_intr_rate_sysctl(struct ix_softc *, int, 152 const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *); 153 static int ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 154 static int ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS); 155 static int ix_sysctl_txd(SYSCTL_HANDLER_ARGS); 156 static int ix_sysctl_rxd(SYSCTL_HANDLER_ARGS); 157 static int ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS); 158 static int ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int); 159 static int ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS); 160 static int ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS); 161 static int ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS); 162 static int ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS); 163 #ifdef foo 164 static int ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS); 165 #endif 166 #if 0 167 static void ix_add_hw_stats(struct ix_softc *); 168 #endif 169 #ifdef IFPOLL_ENABLE 170 static int ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS); 171 static int ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS); 172 #endif 173 174 static void ix_slot_info(struct ix_softc *); 175 static int ix_alloc_rings(struct ix_softc *); 176 static void ix_free_rings(struct ix_softc *); 177 static void ix_setup_ifp(struct ix_softc *); 178 static void ix_setup_serialize(struct ix_softc *); 179 static void ix_set_ring_inuse(struct ix_softc *, boolean_t); 180 static void ix_set_timer_cpuid(struct ix_softc *, boolean_t); 181 static void ix_update_stats(struct ix_softc *); 182 183 static void ix_set_promisc(struct ix_softc *); 184 static void ix_set_multi(struct ix_softc *); 185 static void ix_set_vlan(struct ix_softc *); 186 static uint8_t *ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *); 187 static enum ixgbe_fc_mode ix_ifmedia2fc(int); 188 static const char *ix_ifmedia2str(int); 189 static const char *ix_fc2str(enum ixgbe_fc_mode); 190 191 static int ix_get_txring_inuse(const struct ix_softc *, boolean_t); 192 static void ix_init_tx_ring(struct ix_tx_ring *); 193 static void ix_free_tx_ring(struct ix_tx_ring *); 194 static int ix_create_tx_ring(struct ix_tx_ring *); 195 static void ix_destroy_tx_ring(struct ix_tx_ring *, int); 196 static void ix_init_tx_unit(struct ix_softc *); 197 static int ix_encap(struct ix_tx_ring *, struct mbuf **, 198 uint16_t *, int *); 199 static int ix_tx_ctx_setup(struct ix_tx_ring *, 200 const struct mbuf *, uint32_t *, uint32_t *); 201 static int ix_tso_ctx_setup(struct ix_tx_ring *, 202 const struct mbuf *, uint32_t *, uint32_t *); 203 static void ix_txeof(struct ix_tx_ring *, int); 204 205 static int ix_get_rxring_inuse(const struct ix_softc *, boolean_t); 206 static int ix_init_rx_ring(struct ix_rx_ring *); 207 static void ix_free_rx_ring(struct ix_rx_ring *); 208 static int ix_create_rx_ring(struct ix_rx_ring *); 209 static void ix_destroy_rx_ring(struct ix_rx_ring *, int); 210 static void ix_init_rx_unit(struct ix_softc *); 211 #if 0 212 static void ix_setup_hw_rsc(struct ix_rx_ring *); 213 #endif 214 static int ix_newbuf(struct ix_rx_ring *, int, boolean_t); 215 static void ix_rxeof(struct ix_rx_ring *, int); 216 static void ix_rx_discard(struct ix_rx_ring *, int, boolean_t); 217 static void ix_enable_rx_drop(struct ix_softc *); 218 static void ix_disable_rx_drop(struct ix_softc *); 219 220 static void ix_alloc_msix(struct ix_softc *); 221 static void ix_free_msix(struct ix_softc *, boolean_t); 222 static void ix_conf_rx_msix(struct ix_softc *, int, int *, int); 223 static void ix_conf_tx_msix(struct ix_softc *, int, int *, int); 224 static void ix_setup_msix_eims(const struct ix_softc *, int, 225 uint32_t *, uint32_t *); 226 static int ix_alloc_intr(struct ix_softc *); 227 static void ix_free_intr(struct ix_softc *); 228 static int ix_setup_intr(struct ix_softc *); 229 static void ix_teardown_intr(struct ix_softc *, int); 230 static void ix_enable_intr(struct ix_softc *); 231 static void ix_disable_intr(struct ix_softc *); 232 static void ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t); 233 static void ix_set_eitr(struct ix_softc *, int, int); 234 static void ix_intr_status(struct ix_softc *, uint32_t); 235 static void ix_intr(void *); 236 static void ix_msix_rxtx(void *); 237 static void ix_msix_rx(void *); 238 static void ix_msix_tx(void *); 239 static void ix_msix_status(void *); 240 241 static void ix_config_link(struct ix_softc *); 242 static boolean_t ix_sfp_probe(struct ix_softc *); 243 static boolean_t ix_is_sfp(const struct ixgbe_hw *); 244 static void ix_setup_optics(struct ix_softc *); 245 static void ix_update_link_status(struct ix_softc *); 246 static void ix_handle_link(struct ix_softc *); 247 static void ix_handle_mod(struct ix_softc *); 248 static void ix_handle_msf(struct ix_softc *); 249 250 /* XXX Shared code structure requires this for the moment */ 251 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *); 252 253 static device_method_t ix_methods[] = { 254 /* Device interface */ 255 DEVMETHOD(device_probe, ix_probe), 256 DEVMETHOD(device_attach, ix_attach), 257 DEVMETHOD(device_detach, ix_detach), 258 DEVMETHOD(device_shutdown, ix_shutdown), 259 DEVMETHOD_END 260 }; 261 262 static driver_t ix_driver = { 263 "ix", 264 ix_methods, 265 sizeof(struct ix_softc) 266 }; 267 268 static devclass_t ix_devclass; 269 270 DECLARE_DUMMY_MODULE(if_ix); 271 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL); 272 273 static int ix_msi_enable = 1; 274 static int ix_msix_enable = 1; 275 static int ix_msix_agg_rxtx = 1; 276 static int ix_rxr = 0; 277 static int ix_txr = 0; 278 static int ix_txd = IX_PERF_TXD; 279 static int ix_rxd = IX_PERF_RXD; 280 static int ix_unsupported_sfp = 0; 281 282 static char ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FULL; 283 284 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable); 285 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable); 286 TUNABLE_INT("hw.ix.msix.agg_rxtx", &ix_msix_agg_rxtx); 287 TUNABLE_INT("hw.ix.rxr", &ix_rxr); 288 TUNABLE_INT("hw.ix.txr", &ix_txr); 289 TUNABLE_INT("hw.ix.txd", &ix_txd); 290 TUNABLE_INT("hw.ix.rxd", &ix_rxd); 291 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp); 292 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl)); 293 294 /* 295 * Smart speed setting, default to on. This only works 296 * as a compile option right now as its during attach, 297 * set this to 'ixgbe_smart_speed_off' to disable. 298 */ 299 static const enum ixgbe_smart_speed ix_smart_speed = 300 ixgbe_smart_speed_on; 301 302 static int 303 ix_probe(device_t dev) 304 { 305 const struct ix_device *d; 306 uint16_t vid, did; 307 308 vid = pci_get_vendor(dev); 309 did = pci_get_device(dev); 310 311 for (d = ix_devices; d->desc != NULL; ++d) { 312 if (vid == d->vid && did == d->did) { 313 device_set_desc(dev, d->desc); 314 return 0; 315 } 316 } 317 return ENXIO; 318 } 319 320 static int 321 ix_attach(device_t dev) 322 { 323 struct ix_softc *sc = device_get_softc(dev); 324 struct ixgbe_hw *hw; 325 int error, ring_cnt_max; 326 uint16_t csum; 327 uint32_t ctrl_ext; 328 #ifdef IFPOLL_ENABLE 329 int offset, offset_def; 330 #endif 331 char flowctrl[IFM_ETH_FC_STRLEN]; 332 333 sc->dev = sc->osdep.dev = dev; 334 hw = &sc->hw; 335 336 if_initname(&sc->arpcom.ac_if, device_get_name(dev), 337 device_get_unit(dev)); 338 ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK, 339 ix_media_change, ix_media_status); 340 341 /* Save frame size */ 342 sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN; 343 344 callout_init_mp(&sc->timer); 345 lwkt_serialize_init(&sc->main_serialize); 346 347 /* 348 * Save off the information about this board 349 */ 350 hw->vendor_id = pci_get_vendor(dev); 351 hw->device_id = pci_get_device(dev); 352 hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); 353 hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); 354 hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); 355 356 ixgbe_set_mac_type(hw); 357 358 /* Pick up the 82599 and VF settings */ 359 if (hw->mac.type != ixgbe_mac_82598EB) 360 hw->phy.smart_speed = ix_smart_speed; 361 362 /* Enable bus mastering */ 363 pci_enable_busmaster(dev); 364 365 /* 366 * Allocate IO memory 367 */ 368 sc->mem_rid = PCIR_BAR(0); 369 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 370 &sc->mem_rid, RF_ACTIVE); 371 if (sc->mem_res == NULL) { 372 device_printf(dev, "Unable to allocate bus resource: memory\n"); 373 error = ENXIO; 374 goto failed; 375 } 376 377 sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res); 378 sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res); 379 380 sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle; 381 sc->hw.back = &sc->osdep; 382 383 /* 384 * Configure total supported RX/TX ring count 385 */ 386 sc->rx_ring_cnt = device_getenv_int(dev, "rxr", ix_rxr); 387 sc->rx_ring_cnt = if_ring_count2(sc->rx_ring_cnt, IX_MAX_RXRING); 388 sc->rx_ring_inuse = sc->rx_ring_cnt; 389 390 switch (hw->mac.type) { 391 case ixgbe_mac_82598EB: 392 ring_cnt_max = IX_MAX_TXRING_82598; 393 break; 394 395 case ixgbe_mac_82599EB: 396 ring_cnt_max = IX_MAX_TXRING_82599; 397 break; 398 399 case ixgbe_mac_X540: 400 ring_cnt_max = IX_MAX_TXRING_X540; 401 break; 402 403 default: 404 ring_cnt_max = 1; 405 break; 406 } 407 sc->tx_ring_cnt = device_getenv_int(dev, "txr", ix_txr); 408 sc->tx_ring_cnt = if_ring_count2(sc->tx_ring_cnt, ring_cnt_max); 409 sc->tx_ring_inuse = sc->tx_ring_cnt; 410 411 /* Allocate TX/RX rings */ 412 error = ix_alloc_rings(sc); 413 if (error) 414 goto failed; 415 416 #ifdef IFPOLL_ENABLE 417 /* 418 * NPOLLING RX CPU offset 419 */ 420 if (sc->rx_ring_cnt == ncpus2) { 421 offset = 0; 422 } else { 423 offset_def = (sc->rx_ring_cnt * device_get_unit(dev)) % ncpus2; 424 offset = device_getenv_int(dev, "npoll.rxoff", offset_def); 425 if (offset >= ncpus2 || 426 offset % sc->rx_ring_cnt != 0) { 427 device_printf(dev, "invalid npoll.rxoff %d, use %d\n", 428 offset, offset_def); 429 offset = offset_def; 430 } 431 } 432 sc->rx_npoll_off = offset; 433 434 /* 435 * NPOLLING TX CPU offset 436 */ 437 if (sc->tx_ring_cnt == ncpus2) { 438 offset = 0; 439 } else { 440 offset_def = (sc->tx_ring_cnt * device_get_unit(dev)) % ncpus2; 441 offset = device_getenv_int(dev, "npoll.txoff", offset_def); 442 if (offset >= ncpus2 || 443 offset % sc->tx_ring_cnt != 0) { 444 device_printf(dev, "invalid npoll.txoff %d, use %d\n", 445 offset, offset_def); 446 offset = offset_def; 447 } 448 } 449 sc->tx_npoll_off = offset; 450 #endif 451 452 /* Allocate interrupt */ 453 error = ix_alloc_intr(sc); 454 if (error) 455 goto failed; 456 457 /* Setup serializes */ 458 ix_setup_serialize(sc); 459 460 /* Allocate multicast array memory. */ 461 sc->mta = kmalloc(IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR, 462 M_DEVBUF, M_WAITOK); 463 464 /* Initialize the shared code */ 465 hw->allow_unsupported_sfp = ix_unsupported_sfp; 466 error = ixgbe_init_shared_code(hw); 467 if (error == IXGBE_ERR_SFP_NOT_PRESENT) { 468 /* 469 * No optics in this port; ask timer routine 470 * to probe for later insertion. 471 */ 472 sc->sfp_probe = TRUE; 473 error = 0; 474 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { 475 device_printf(dev, "Unsupported SFP+ module detected!\n"); 476 error = EIO; 477 goto failed; 478 } else if (error) { 479 device_printf(dev, "Unable to initialize the shared code\n"); 480 error = EIO; 481 goto failed; 482 } 483 484 /* Make sure we have a good EEPROM before we read from it */ 485 if (ixgbe_validate_eeprom_checksum(&sc->hw, &csum) < 0) { 486 device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); 487 error = EIO; 488 goto failed; 489 } 490 491 error = ixgbe_init_hw(hw); 492 if (error == IXGBE_ERR_EEPROM_VERSION) { 493 device_printf(dev, "Pre-production device detected\n"); 494 } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { 495 device_printf(dev, "Unsupported SFP+ Module\n"); 496 error = EIO; 497 goto failed; 498 } else if (error == IXGBE_ERR_SFP_NOT_PRESENT) { 499 device_printf(dev, "No SFP+ Module found\n"); 500 } 501 502 /* Detect and set physical type */ 503 ix_setup_optics(sc); 504 505 /* Get default flow control settings */ 506 device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl), 507 ix_flowctrl); 508 sc->ifm_flowctrl = ifmedia_str2ethfc(flowctrl); 509 510 /* Setup OS specific network interface */ 511 ix_setup_ifp(sc); 512 513 /* Add sysctl tree */ 514 ix_add_sysctl(sc); 515 516 error = ix_setup_intr(sc); 517 if (error) { 518 ether_ifdetach(&sc->arpcom.ac_if); 519 goto failed; 520 } 521 522 /* Initialize statistics */ 523 ix_update_stats(sc); 524 525 /* 526 * Check PCIE slot type/speed/width 527 */ 528 ix_slot_info(sc); 529 530 /* Let hardware know driver is loaded */ 531 ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); 532 ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; 533 IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); 534 535 return 0; 536 failed: 537 ix_detach(dev); 538 return error; 539 } 540 541 static int 542 ix_detach(device_t dev) 543 { 544 struct ix_softc *sc = device_get_softc(dev); 545 546 if (device_is_attached(dev)) { 547 struct ifnet *ifp = &sc->arpcom.ac_if; 548 uint32_t ctrl_ext; 549 550 ifnet_serialize_all(ifp); 551 552 ix_stop(sc); 553 ix_teardown_intr(sc, sc->intr_cnt); 554 555 ifnet_deserialize_all(ifp); 556 557 callout_terminate(&sc->timer); 558 ether_ifdetach(ifp); 559 560 /* Let hardware know driver is unloading */ 561 ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT); 562 ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; 563 IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext); 564 } 565 566 ifmedia_removeall(&sc->media); 567 bus_generic_detach(dev); 568 569 ix_free_intr(sc); 570 571 if (sc->msix_mem_res != NULL) { 572 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid, 573 sc->msix_mem_res); 574 } 575 if (sc->mem_res != NULL) { 576 bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, 577 sc->mem_res); 578 } 579 580 ix_free_rings(sc); 581 582 if (sc->mta != NULL) 583 kfree(sc->mta, M_DEVBUF); 584 if (sc->serializes != NULL) 585 kfree(sc->serializes, M_DEVBUF); 586 587 return 0; 588 } 589 590 static int 591 ix_shutdown(device_t dev) 592 { 593 struct ix_softc *sc = device_get_softc(dev); 594 struct ifnet *ifp = &sc->arpcom.ac_if; 595 596 ifnet_serialize_all(ifp); 597 ix_stop(sc); 598 ifnet_deserialize_all(ifp); 599 600 return 0; 601 } 602 603 static void 604 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 605 { 606 struct ix_softc *sc = ifp->if_softc; 607 struct ix_tx_ring *txr = ifsq_get_priv(ifsq); 608 int idx = -1; 609 uint16_t nsegs; 610 611 KKASSERT(txr->tx_ifsq == ifsq); 612 ASSERT_SERIALIZED(&txr->tx_serialize); 613 614 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 615 return; 616 617 if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) { 618 ifsq_purge(ifsq); 619 return; 620 } 621 622 while (!ifsq_is_empty(ifsq)) { 623 struct mbuf *m_head; 624 625 if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) { 626 ifsq_set_oactive(ifsq); 627 txr->tx_watchdog.wd_timer = 5; 628 break; 629 } 630 631 m_head = ifsq_dequeue(ifsq); 632 if (m_head == NULL) 633 break; 634 635 if (ix_encap(txr, &m_head, &nsegs, &idx)) { 636 IFNET_STAT_INC(ifp, oerrors, 1); 637 continue; 638 } 639 640 /* 641 * TX interrupt are aggressively aggregated, so increasing 642 * opackets at TX interrupt time will make the opackets 643 * statistics vastly inaccurate; we do the opackets increment 644 * now. 645 */ 646 IFNET_STAT_INC(ifp, opackets, 1); 647 648 if (nsegs >= txr->tx_wreg_nsegs) { 649 IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx); 650 nsegs = 0; 651 idx = -1; 652 } 653 654 ETHER_BPF_MTAP(ifp, m_head); 655 } 656 if (idx >= 0) 657 IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx); 658 } 659 660 static int 661 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr) 662 { 663 struct ix_softc *sc = ifp->if_softc; 664 struct ifreq *ifr = (struct ifreq *) data; 665 int error = 0, mask, reinit; 666 667 ASSERT_IFNET_SERIALIZED_ALL(ifp); 668 669 switch (command) { 670 case SIOCSIFMTU: 671 if (ifr->ifr_mtu > IX_MAX_FRAME_SIZE - ETHER_HDR_LEN) { 672 error = EINVAL; 673 } else { 674 ifp->if_mtu = ifr->ifr_mtu; 675 sc->max_frame_size = 676 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; 677 ix_init(sc); 678 } 679 break; 680 681 case SIOCSIFFLAGS: 682 if (ifp->if_flags & IFF_UP) { 683 if (ifp->if_flags & IFF_RUNNING) { 684 if ((ifp->if_flags ^ sc->if_flags) & 685 (IFF_PROMISC | IFF_ALLMULTI)) 686 ix_set_promisc(sc); 687 } else { 688 ix_init(sc); 689 } 690 } else if (ifp->if_flags & IFF_RUNNING) { 691 ix_stop(sc); 692 } 693 sc->if_flags = ifp->if_flags; 694 break; 695 696 case SIOCADDMULTI: 697 case SIOCDELMULTI: 698 if (ifp->if_flags & IFF_RUNNING) { 699 ix_disable_intr(sc); 700 ix_set_multi(sc); 701 #ifdef IFPOLL_ENABLE 702 if ((ifp->if_flags & IFF_NPOLLING) == 0) 703 #endif 704 ix_enable_intr(sc); 705 } 706 break; 707 708 case SIOCSIFMEDIA: 709 case SIOCGIFMEDIA: 710 error = ifmedia_ioctl(ifp, ifr, &sc->media, command); 711 break; 712 713 case SIOCSIFCAP: 714 reinit = 0; 715 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 716 if (mask & IFCAP_RXCSUM) { 717 ifp->if_capenable ^= IFCAP_RXCSUM; 718 reinit = 1; 719 } 720 if (mask & IFCAP_VLAN_HWTAGGING) { 721 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 722 reinit = 1; 723 } 724 if (mask & IFCAP_TXCSUM) { 725 ifp->if_capenable ^= IFCAP_TXCSUM; 726 if (ifp->if_capenable & IFCAP_TXCSUM) 727 ifp->if_hwassist |= CSUM_OFFLOAD; 728 else 729 ifp->if_hwassist &= ~CSUM_OFFLOAD; 730 } 731 if (mask & IFCAP_TSO) { 732 ifp->if_capenable ^= IFCAP_TSO; 733 if (ifp->if_capenable & IFCAP_TSO) 734 ifp->if_hwassist |= CSUM_TSO; 735 else 736 ifp->if_hwassist &= ~CSUM_TSO; 737 } 738 if (mask & IFCAP_RSS) 739 ifp->if_capenable ^= IFCAP_RSS; 740 if (reinit && (ifp->if_flags & IFF_RUNNING)) 741 ix_init(sc); 742 break; 743 744 #if 0 745 case SIOCGI2C: 746 { 747 struct ixgbe_i2c_req i2c; 748 error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); 749 if (error) 750 break; 751 if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){ 752 error = EINVAL; 753 break; 754 } 755 hw->phy.ops.read_i2c_byte(hw, i2c.offset, 756 i2c.dev_addr, i2c.data); 757 error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); 758 break; 759 } 760 #endif 761 762 default: 763 error = ether_ioctl(ifp, command, data); 764 break; 765 } 766 return error; 767 } 768 769 #define IXGBE_MHADD_MFS_SHIFT 16 770 771 static void 772 ix_init(void *xsc) 773 { 774 struct ix_softc *sc = xsc; 775 struct ifnet *ifp = &sc->arpcom.ac_if; 776 struct ixgbe_hw *hw = &sc->hw; 777 uint32_t rxpb, frame, size, tmp; 778 uint32_t gpie, rxctrl; 779 int i, error; 780 boolean_t polling; 781 782 ASSERT_IFNET_SERIALIZED_ALL(ifp); 783 784 ix_stop(sc); 785 786 polling = FALSE; 787 #ifdef IFPOLL_ENABLE 788 if (ifp->if_flags & IFF_NPOLLING) 789 polling = TRUE; 790 #endif 791 792 /* Configure # of used RX/TX rings */ 793 ix_set_ring_inuse(sc, polling); 794 ifq_set_subq_mask(&ifp->if_snd, sc->tx_ring_inuse - 1); 795 796 /* Get the latest mac address, User can use a LAA */ 797 bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); 798 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); 799 hw->addr_ctrl.rar_used_count = 1; 800 801 /* Prepare transmit descriptors and buffers */ 802 for (i = 0; i < sc->tx_ring_inuse; ++i) 803 ix_init_tx_ring(&sc->tx_rings[i]); 804 805 ixgbe_init_hw(hw); 806 ix_init_tx_unit(sc); 807 808 /* Setup Multicast table */ 809 ix_set_multi(sc); 810 811 /* Prepare receive descriptors and buffers */ 812 for (i = 0; i < sc->rx_ring_inuse; ++i) { 813 error = ix_init_rx_ring(&sc->rx_rings[i]); 814 if (error) { 815 if_printf(ifp, "Could not initialize RX ring%d\n", i); 816 ix_stop(sc); 817 return; 818 } 819 } 820 821 /* Configure RX settings */ 822 ix_init_rx_unit(sc); 823 824 gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); 825 826 /* Enable Fan Failure Interrupt */ 827 gpie |= IXGBE_SDP1_GPIEN; 828 829 /* Add for Module detection */ 830 if (hw->mac.type == ixgbe_mac_82599EB) 831 gpie |= IXGBE_SDP2_GPIEN; 832 833 /* Thermal Failure Detection */ 834 if (hw->mac.type == ixgbe_mac_X540) 835 gpie |= IXGBE_SDP0_GPIEN; 836 837 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 838 /* Enable Enhanced MSIX mode */ 839 gpie |= IXGBE_GPIE_MSIX_MODE; 840 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | 841 IXGBE_GPIE_OCD; 842 } 843 IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); 844 845 /* Set MTU size */ 846 if (ifp->if_mtu > ETHERMTU) { 847 uint32_t mhadd; 848 849 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); 850 mhadd &= ~IXGBE_MHADD_MFS_MASK; 851 mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT; 852 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); 853 } 854 855 /* 856 * Enable TX rings 857 */ 858 for (i = 0; i < sc->tx_ring_inuse; ++i) { 859 uint32_t txdctl; 860 861 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); 862 txdctl |= IXGBE_TXDCTL_ENABLE; 863 864 /* 865 * Set WTHRESH to 0, since TX head write-back is used 866 */ 867 txdctl &= ~(0x7f << 16); 868 869 /* 870 * When the internal queue falls below PTHRESH (32), 871 * start prefetching as long as there are at least 872 * HTHRESH (1) buffers ready. The values are taken 873 * from the Intel linux driver 3.8.21. 874 * Prefetching enables tx line rate even with 1 queue. 875 */ 876 txdctl |= (32 << 0) | (1 << 8); 877 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl); 878 } 879 880 /* 881 * Enable RX rings 882 */ 883 for (i = 0; i < sc->rx_ring_inuse; ++i) { 884 uint32_t rxdctl; 885 int k; 886 887 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); 888 if (hw->mac.type == ixgbe_mac_82598EB) { 889 /* 890 * PTHRESH = 21 891 * HTHRESH = 4 892 * WTHRESH = 8 893 */ 894 rxdctl &= ~0x3FFFFF; 895 rxdctl |= 0x080420; 896 } 897 rxdctl |= IXGBE_RXDCTL_ENABLE; 898 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl); 899 for (k = 0; k < 10; ++k) { 900 if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) & 901 IXGBE_RXDCTL_ENABLE) 902 break; 903 else 904 msec_delay(1); 905 } 906 wmb(); 907 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 908 sc->rx_rings[0].rx_ndesc - 1); 909 } 910 911 /* Set up VLAN support and filter */ 912 ix_set_vlan(sc); 913 914 /* Enable Receive engine */ 915 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); 916 if (hw->mac.type == ixgbe_mac_82598EB) 917 rxctrl |= IXGBE_RXCTRL_DMBYPS; 918 rxctrl |= IXGBE_RXCTRL_RXEN; 919 ixgbe_enable_rx_dma(hw, rxctrl); 920 921 for (i = 0; i < sc->tx_ring_inuse; ++i) { 922 const struct ix_tx_ring *txr = &sc->tx_rings[i]; 923 924 if (txr->tx_intr_vec >= 0) { 925 ix_set_ivar(sc, i, txr->tx_intr_vec, 1); 926 } else { 927 /* 928 * Unconfigured TX interrupt vector could only 929 * happen for MSI-X. 930 */ 931 KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX, 932 ("TX intr vector is not set")); 933 KASSERT(i < sc->rx_ring_inuse, 934 ("invalid TX ring %d, no piggyback RX ring", i)); 935 KASSERT(sc->rx_rings[i].rx_txr == txr, 936 ("RX ring %d piggybacked TX ring mismatch", i)); 937 if (bootverbose) 938 if_printf(ifp, "IVAR skips TX ring %d\n", i); 939 } 940 } 941 for (i = 0; i < sc->rx_ring_inuse; ++i) { 942 const struct ix_rx_ring *rxr = &sc->rx_rings[i]; 943 944 KKASSERT(rxr->rx_intr_vec >= 0); 945 ix_set_ivar(sc, i, rxr->rx_intr_vec, 0); 946 if (rxr->rx_txr != NULL) { 947 /* 948 * Piggyback the TX ring interrupt onto the RX 949 * ring interrupt vector. 950 */ 951 KASSERT(rxr->rx_txr->tx_intr_vec < 0, 952 ("piggybacked TX ring configured intr vector")); 953 KASSERT(rxr->rx_txr->tx_idx == i, 954 ("RX ring %d piggybacked TX ring %u", 955 i, rxr->rx_txr->tx_idx)); 956 ix_set_ivar(sc, i, rxr->rx_intr_vec, 1); 957 if (bootverbose) { 958 if_printf(ifp, "IVAR RX ring %d piggybacks " 959 "TX ring %u\n", i, rxr->rx_txr->tx_idx); 960 } 961 } 962 } 963 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 964 /* Set up status MSI-X vector; it is using fixed entry 1 */ 965 ix_set_ivar(sc, 1, sc->sts_msix_vec, -1); 966 967 /* Set up auto-mask for TX and RX rings */ 968 if (hw->mac.type == ixgbe_mac_82598EB) { 969 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE); 970 } else { 971 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); 972 IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); 973 } 974 } else { 975 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE); 976 } 977 for (i = 0; i < sc->intr_cnt; ++i) 978 ix_set_eitr(sc, i, sc->intr_data[i].intr_rate); 979 980 /* 981 * Check on any SFP devices that need to be kick-started 982 */ 983 if (hw->phy.type == ixgbe_phy_none) { 984 error = hw->phy.ops.identify(hw); 985 if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { 986 if_printf(ifp, 987 "Unsupported SFP+ module type was detected.\n"); 988 /* XXX stop */ 989 return; 990 } 991 } 992 993 /* Config/Enable Link */ 994 ix_config_link(sc); 995 996 /* 997 * Hardware Packet Buffer & Flow Control setup 998 */ 999 frame = sc->max_frame_size; 1000 1001 /* Calculate High Water */ 1002 if (hw->mac.type == ixgbe_mac_X540) 1003 tmp = IXGBE_DV_X540(frame, frame); 1004 else 1005 tmp = IXGBE_DV(frame, frame); 1006 size = IXGBE_BT2KB(tmp); 1007 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; 1008 hw->fc.high_water[0] = rxpb - size; 1009 1010 /* Now calculate Low Water */ 1011 if (hw->mac.type == ixgbe_mac_X540) 1012 tmp = IXGBE_LOW_DV_X540(frame); 1013 else 1014 tmp = IXGBE_LOW_DV(frame); 1015 hw->fc.low_water[0] = IXGBE_BT2KB(tmp); 1016 1017 hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_flowctrl); 1018 if (sc->ifm_flowctrl & IFM_ETH_FORCEPAUSE) 1019 hw->fc.disable_fc_autoneg = TRUE; 1020 else 1021 hw->fc.disable_fc_autoneg = FALSE; 1022 hw->fc.pause_time = IX_FC_PAUSE; 1023 hw->fc.send_xon = TRUE; 1024 1025 /* Initialize the FC settings */ 1026 ixgbe_start_hw(hw); 1027 1028 /* 1029 * Only enable interrupts if we are not polling, make sure 1030 * they are off otherwise. 1031 */ 1032 if (polling) 1033 ix_disable_intr(sc); 1034 else 1035 ix_enable_intr(sc); 1036 1037 ifp->if_flags |= IFF_RUNNING; 1038 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1039 ifsq_clr_oactive(sc->tx_rings[i].tx_ifsq); 1040 ifsq_watchdog_start(&sc->tx_rings[i].tx_watchdog); 1041 } 1042 1043 ix_set_timer_cpuid(sc, polling); 1044 callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid); 1045 } 1046 1047 static void 1048 ix_intr(void *xsc) 1049 { 1050 struct ix_softc *sc = xsc; 1051 struct ixgbe_hw *hw = &sc->hw; 1052 uint32_t eicr; 1053 1054 ASSERT_SERIALIZED(&sc->main_serialize); 1055 1056 eicr = IXGBE_READ_REG(hw, IXGBE_EICR); 1057 if (eicr == 0) { 1058 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask); 1059 return; 1060 } 1061 1062 if (eicr & IX_RX0_INTR_MASK) { 1063 struct ix_rx_ring *rxr = &sc->rx_rings[0]; 1064 1065 lwkt_serialize_enter(&rxr->rx_serialize); 1066 ix_rxeof(rxr, -1); 1067 lwkt_serialize_exit(&rxr->rx_serialize); 1068 } 1069 if (eicr & IX_RX1_INTR_MASK) { 1070 struct ix_rx_ring *rxr; 1071 1072 KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS); 1073 rxr = &sc->rx_rings[1]; 1074 1075 lwkt_serialize_enter(&rxr->rx_serialize); 1076 ix_rxeof(rxr, -1); 1077 lwkt_serialize_exit(&rxr->rx_serialize); 1078 } 1079 1080 if (eicr & IX_TX_INTR_MASK) { 1081 struct ix_tx_ring *txr = &sc->tx_rings[0]; 1082 1083 lwkt_serialize_enter(&txr->tx_serialize); 1084 ix_txeof(txr, *(txr->tx_hdr)); 1085 if (!ifsq_is_empty(txr->tx_ifsq)) 1086 ifsq_devstart(txr->tx_ifsq); 1087 lwkt_serialize_exit(&txr->tx_serialize); 1088 } 1089 1090 if (__predict_false(eicr & IX_EICR_STATUS)) 1091 ix_intr_status(sc, eicr); 1092 1093 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask); 1094 } 1095 1096 static void 1097 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 1098 { 1099 struct ix_softc *sc = ifp->if_softc; 1100 1101 ix_update_link_status(sc); 1102 1103 ifmr->ifm_status = IFM_AVALID; 1104 ifmr->ifm_active = IFM_ETHER; 1105 1106 if (!sc->link_active) { 1107 ifmr->ifm_active |= IFM_NONE; 1108 return; 1109 } 1110 1111 ifmr->ifm_status |= IFM_ACTIVE; 1112 1113 switch (sc->link_speed) { 1114 case IXGBE_LINK_SPEED_100_FULL: 1115 ifmr->ifm_active |= IFM_100_TX | IFM_FDX; 1116 break; 1117 case IXGBE_LINK_SPEED_1GB_FULL: 1118 ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; 1119 break; 1120 case IXGBE_LINK_SPEED_10GB_FULL: 1121 ifmr->ifm_active |= sc->optics | IFM_FDX; 1122 break; 1123 default: 1124 ifmr->ifm_active |= IFM_NONE; 1125 return; 1126 } 1127 1128 if (sc->ifm_flowctrl & IFM_ETH_FORCEPAUSE) 1129 ifmr->ifm_active |= IFM_ETH_FORCEPAUSE; 1130 1131 switch (sc->hw.fc.current_mode) { 1132 case ixgbe_fc_full: 1133 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE; 1134 break; 1135 case ixgbe_fc_rx_pause: 1136 ifmr->ifm_active |= IFM_ETH_RXPAUSE; 1137 break; 1138 case ixgbe_fc_tx_pause: 1139 ifmr->ifm_active |= IFM_ETH_TXPAUSE; 1140 break; 1141 default: 1142 break; 1143 } 1144 } 1145 1146 static int 1147 ix_media_change(struct ifnet *ifp) 1148 { 1149 struct ix_softc *sc = ifp->if_softc; 1150 struct ifmedia *ifm = &sc->media; 1151 1152 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1153 return EINVAL; 1154 1155 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1156 case IFM_AUTO: 1157 sc->hw.phy.autoneg_advertised = 1158 IXGBE_LINK_SPEED_100_FULL | 1159 IXGBE_LINK_SPEED_1GB_FULL | 1160 IXGBE_LINK_SPEED_10GB_FULL; 1161 break; 1162 default: 1163 if_printf(ifp, "Only auto media type\n"); 1164 return EINVAL; 1165 } 1166 sc->ifm_flowctrl = ifm->ifm_media & IFM_ETH_FCMASK; 1167 1168 if (ifp->if_flags & IFF_RUNNING) 1169 ix_init(sc); 1170 return 0; 1171 } 1172 1173 static __inline int 1174 ix_tso_pullup(struct mbuf **mp) 1175 { 1176 int hoff, iphlen, thoff; 1177 struct mbuf *m; 1178 1179 m = *mp; 1180 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1181 1182 iphlen = m->m_pkthdr.csum_iphlen; 1183 thoff = m->m_pkthdr.csum_thlen; 1184 hoff = m->m_pkthdr.csum_lhlen; 1185 1186 KASSERT(iphlen > 0, ("invalid ip hlen")); 1187 KASSERT(thoff > 0, ("invalid tcp hlen")); 1188 KASSERT(hoff > 0, ("invalid ether hlen")); 1189 1190 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1191 m = m_pullup(m, hoff + iphlen + thoff); 1192 if (m == NULL) { 1193 *mp = NULL; 1194 return ENOBUFS; 1195 } 1196 *mp = m; 1197 } 1198 return 0; 1199 } 1200 1201 static int 1202 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp, 1203 uint16_t *segs_used, int *idx) 1204 { 1205 uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0; 1206 int i, j, error, nsegs, first, maxsegs; 1207 struct mbuf *m_head = *m_headp; 1208 bus_dma_segment_t segs[IX_MAX_SCATTER]; 1209 bus_dmamap_t map; 1210 struct ix_tx_buf *txbuf; 1211 union ixgbe_adv_tx_desc *txd = NULL; 1212 1213 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 1214 error = ix_tso_pullup(m_headp); 1215 if (__predict_false(error)) 1216 return error; 1217 m_head = *m_headp; 1218 } 1219 1220 /* Basic descriptor defines */ 1221 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA | 1222 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); 1223 1224 if (m_head->m_flags & M_VLANTAG) 1225 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; 1226 1227 /* 1228 * Important to capture the first descriptor 1229 * used because it will contain the index of 1230 * the one we tell the hardware to report back 1231 */ 1232 first = txr->tx_next_avail; 1233 txbuf = &txr->tx_buf[first]; 1234 map = txbuf->map; 1235 1236 /* 1237 * Map the packet for DMA. 1238 */ 1239 maxsegs = txr->tx_avail - IX_TX_RESERVED; 1240 if (maxsegs > IX_MAX_SCATTER) 1241 maxsegs = IX_MAX_SCATTER; 1242 1243 error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp, 1244 segs, maxsegs, &nsegs, BUS_DMA_NOWAIT); 1245 if (__predict_false(error)) { 1246 m_freem(*m_headp); 1247 *m_headp = NULL; 1248 return error; 1249 } 1250 bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE); 1251 1252 m_head = *m_headp; 1253 1254 /* 1255 * Set up the appropriate offload context if requested, 1256 * this may consume one TX descriptor. 1257 */ 1258 if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) { 1259 (*segs_used)++; 1260 txr->tx_nsegs++; 1261 } 1262 1263 *segs_used += nsegs; 1264 txr->tx_nsegs += nsegs; 1265 if (txr->tx_nsegs >= txr->tx_intr_nsegs) { 1266 /* 1267 * Report Status (RS) is turned on every intr_nsegs 1268 * descriptors (roughly). 1269 */ 1270 txr->tx_nsegs = 0; 1271 cmd_rs = IXGBE_TXD_CMD_RS; 1272 } 1273 1274 i = txr->tx_next_avail; 1275 for (j = 0; j < nsegs; j++) { 1276 bus_size_t seglen; 1277 bus_addr_t segaddr; 1278 1279 txbuf = &txr->tx_buf[i]; 1280 txd = &txr->tx_base[i]; 1281 seglen = segs[j].ds_len; 1282 segaddr = htole64(segs[j].ds_addr); 1283 1284 txd->read.buffer_addr = segaddr; 1285 txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS | 1286 cmd_type_len |seglen); 1287 txd->read.olinfo_status = htole32(olinfo_status); 1288 1289 if (++i == txr->tx_ndesc) 1290 i = 0; 1291 } 1292 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs); 1293 1294 txr->tx_avail -= nsegs; 1295 txr->tx_next_avail = i; 1296 1297 txbuf->m_head = m_head; 1298 txr->tx_buf[first].map = txbuf->map; 1299 txbuf->map = map; 1300 1301 /* 1302 * Defer TDT updating, until enough descrptors are setup 1303 */ 1304 *idx = i; 1305 1306 return 0; 1307 } 1308 1309 static void 1310 ix_set_promisc(struct ix_softc *sc) 1311 { 1312 struct ifnet *ifp = &sc->arpcom.ac_if; 1313 uint32_t reg_rctl; 1314 int mcnt = 0; 1315 1316 reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL); 1317 reg_rctl &= ~IXGBE_FCTRL_UPE; 1318 if (ifp->if_flags & IFF_ALLMULTI) { 1319 mcnt = IX_MAX_MCASTADDR; 1320 } else { 1321 struct ifmultiaddr *ifma; 1322 1323 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1324 if (ifma->ifma_addr->sa_family != AF_LINK) 1325 continue; 1326 if (mcnt == IX_MAX_MCASTADDR) 1327 break; 1328 mcnt++; 1329 } 1330 } 1331 if (mcnt < IX_MAX_MCASTADDR) 1332 reg_rctl &= ~IXGBE_FCTRL_MPE; 1333 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl); 1334 1335 if (ifp->if_flags & IFF_PROMISC) { 1336 reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE; 1337 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl); 1338 } else if (ifp->if_flags & IFF_ALLMULTI) { 1339 reg_rctl |= IXGBE_FCTRL_MPE; 1340 reg_rctl &= ~IXGBE_FCTRL_UPE; 1341 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl); 1342 } 1343 } 1344 1345 static void 1346 ix_set_multi(struct ix_softc *sc) 1347 { 1348 struct ifnet *ifp = &sc->arpcom.ac_if; 1349 struct ifmultiaddr *ifma; 1350 uint32_t fctrl; 1351 uint8_t *mta; 1352 int mcnt = 0; 1353 1354 mta = sc->mta; 1355 bzero(mta, IXGBE_ETH_LENGTH_OF_ADDRESS * IX_MAX_MCASTADDR); 1356 1357 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1358 if (ifma->ifma_addr->sa_family != AF_LINK) 1359 continue; 1360 if (mcnt == IX_MAX_MCASTADDR) 1361 break; 1362 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1363 &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS], 1364 IXGBE_ETH_LENGTH_OF_ADDRESS); 1365 mcnt++; 1366 } 1367 1368 fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL); 1369 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); 1370 if (ifp->if_flags & IFF_PROMISC) { 1371 fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE; 1372 } else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) { 1373 fctrl |= IXGBE_FCTRL_MPE; 1374 fctrl &= ~IXGBE_FCTRL_UPE; 1375 } else { 1376 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); 1377 } 1378 IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl); 1379 1380 if (mcnt < IX_MAX_MCASTADDR) { 1381 ixgbe_update_mc_addr_list(&sc->hw, 1382 mta, mcnt, ix_mc_array_itr, TRUE); 1383 } 1384 } 1385 1386 /* 1387 * This is an iterator function now needed by the multicast 1388 * shared code. It simply feeds the shared code routine the 1389 * addresses in the array of ix_set_multi() one by one. 1390 */ 1391 static uint8_t * 1392 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq) 1393 { 1394 uint8_t *addr = *update_ptr; 1395 uint8_t *newptr; 1396 *vmdq = 0; 1397 1398 newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS; 1399 *update_ptr = newptr; 1400 return addr; 1401 } 1402 1403 static void 1404 ix_timer(void *arg) 1405 { 1406 struct ix_softc *sc = arg; 1407 1408 lwkt_serialize_enter(&sc->main_serialize); 1409 1410 if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) { 1411 lwkt_serialize_exit(&sc->main_serialize); 1412 return; 1413 } 1414 1415 /* Check for pluggable optics */ 1416 if (sc->sfp_probe) { 1417 if (!ix_sfp_probe(sc)) 1418 goto done; /* Nothing to do */ 1419 } 1420 1421 ix_update_link_status(sc); 1422 ix_update_stats(sc); 1423 1424 done: 1425 callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid); 1426 lwkt_serialize_exit(&sc->main_serialize); 1427 } 1428 1429 static void 1430 ix_update_link_status(struct ix_softc *sc) 1431 { 1432 struct ifnet *ifp = &sc->arpcom.ac_if; 1433 1434 if (sc->link_up) { 1435 if (sc->link_active == FALSE) { 1436 if (bootverbose) { 1437 if_printf(ifp, "Link is up %d Gbps %s\n", 1438 sc->link_speed == 128 ? 10 : 1, 1439 "Full Duplex"); 1440 } 1441 1442 /* 1443 * Update any Flow Control changes 1444 */ 1445 ixgbe_fc_enable(&sc->hw); 1446 /* MUST after ixgbe_fc_enable() */ 1447 if (sc->rx_ring_inuse > 1) { 1448 switch (sc->hw.fc.current_mode) { 1449 case ixgbe_fc_rx_pause: 1450 case ixgbe_fc_tx_pause: 1451 case ixgbe_fc_full: 1452 ix_disable_rx_drop(sc); 1453 break; 1454 1455 case ixgbe_fc_none: 1456 ix_enable_rx_drop(sc); 1457 break; 1458 1459 default: 1460 break; 1461 } 1462 } 1463 1464 sc->link_active = TRUE; 1465 1466 ifp->if_link_state = LINK_STATE_UP; 1467 if_link_state_change(ifp); 1468 } 1469 } else { /* Link down */ 1470 if (sc->link_active == TRUE) { 1471 if (bootverbose) 1472 if_printf(ifp, "Link is Down\n"); 1473 ifp->if_link_state = LINK_STATE_DOWN; 1474 if_link_state_change(ifp); 1475 1476 sc->link_active = FALSE; 1477 } 1478 } 1479 } 1480 1481 static void 1482 ix_stop(struct ix_softc *sc) 1483 { 1484 struct ixgbe_hw *hw = &sc->hw; 1485 struct ifnet *ifp = &sc->arpcom.ac_if; 1486 int i; 1487 1488 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1489 1490 ix_disable_intr(sc); 1491 callout_stop(&sc->timer); 1492 1493 ifp->if_flags &= ~IFF_RUNNING; 1494 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1495 struct ix_tx_ring *txr = &sc->tx_rings[i]; 1496 1497 ifsq_clr_oactive(txr->tx_ifsq); 1498 ifsq_watchdog_stop(&txr->tx_watchdog); 1499 txr->tx_flags &= ~IX_TXFLAG_ENABLED; 1500 } 1501 1502 ixgbe_reset_hw(hw); 1503 hw->adapter_stopped = FALSE; 1504 ixgbe_stop_adapter(hw); 1505 if (hw->mac.type == ixgbe_mac_82599EB) 1506 ixgbe_stop_mac_link_on_d3_82599(hw); 1507 /* Turn off the laser - noop with no optics */ 1508 ixgbe_disable_tx_laser(hw); 1509 1510 /* Update the stack */ 1511 sc->link_up = FALSE; 1512 ix_update_link_status(sc); 1513 1514 /* Reprogram the RAR[0] in case user changed it. */ 1515 ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); 1516 1517 for (i = 0; i < sc->tx_ring_cnt; ++i) 1518 ix_free_tx_ring(&sc->tx_rings[i]); 1519 1520 for (i = 0; i < sc->rx_ring_cnt; ++i) 1521 ix_free_rx_ring(&sc->rx_rings[i]); 1522 } 1523 1524 static void 1525 ix_setup_optics(struct ix_softc *sc) 1526 { 1527 struct ixgbe_hw *hw = &sc->hw; 1528 int layer; 1529 1530 layer = ixgbe_get_supported_physical_layer(hw); 1531 1532 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) { 1533 sc->optics = IFM_10G_T; 1534 return; 1535 } 1536 1537 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) { 1538 sc->optics = IFM_1000_T; 1539 return; 1540 } 1541 1542 if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) { 1543 sc->optics = IFM_1000_SX; 1544 return; 1545 } 1546 1547 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR | 1548 IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) { 1549 sc->optics = IFM_10G_LR; 1550 return; 1551 } 1552 1553 if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { 1554 sc->optics = IFM_10G_SR; 1555 return; 1556 } 1557 1558 if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) { 1559 sc->optics = IFM_10G_TWINAX; 1560 return; 1561 } 1562 1563 if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 | 1564 IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) { 1565 sc->optics = IFM_10G_CX4; 1566 return; 1567 } 1568 1569 /* 1570 * If we get here just set the default. 1571 * XXX this probably is wrong. 1572 */ 1573 sc->optics = IFM_AUTO; 1574 } 1575 1576 static void 1577 ix_setup_ifp(struct ix_softc *sc) 1578 { 1579 struct ixgbe_hw *hw = &sc->hw; 1580 struct ifnet *ifp = &sc->arpcom.ac_if; 1581 int i; 1582 1583 ifp->if_baudrate = IF_Gbps(10UL); 1584 1585 ifp->if_softc = sc; 1586 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 1587 ifp->if_init = ix_init; 1588 ifp->if_ioctl = ix_ioctl; 1589 ifp->if_start = ix_start; 1590 ifp->if_serialize = ix_serialize; 1591 ifp->if_deserialize = ix_deserialize; 1592 ifp->if_tryserialize = ix_tryserialize; 1593 #ifdef INVARIANTS 1594 ifp->if_serialize_assert = ix_serialize_assert; 1595 #endif 1596 #ifdef IFPOLL_ENABLE 1597 ifp->if_npoll = ix_npoll; 1598 #endif 1599 1600 /* Increase TSO burst length */ 1601 ifp->if_tsolen = (8 * ETHERMTU); 1602 1603 ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc; 1604 ifp->if_nmbjclusters = ifp->if_nmbclusters; 1605 1606 ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2); 1607 ifq_set_ready(&ifp->if_snd); 1608 ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt); 1609 1610 ifp->if_mapsubq = ifq_mapsubq_mask; 1611 ifq_set_subq_mask(&ifp->if_snd, 0); 1612 1613 ether_ifattach(ifp, hw->mac.addr, NULL); 1614 1615 ifp->if_capabilities = 1616 IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; 1617 if (IX_ENABLE_HWRSS(sc)) 1618 ifp->if_capabilities |= IFCAP_RSS; 1619 ifp->if_capenable = ifp->if_capabilities; 1620 ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO; 1621 1622 /* 1623 * Tell the upper layer(s) we support long frames. 1624 */ 1625 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 1626 1627 /* Setup TX rings and subqueues */ 1628 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1629 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i); 1630 struct ix_tx_ring *txr = &sc->tx_rings[i]; 1631 1632 ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid); 1633 ifsq_set_priv(ifsq, txr); 1634 ifsq_set_hw_serialize(ifsq, &txr->tx_serialize); 1635 txr->tx_ifsq = ifsq; 1636 1637 ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog); 1638 } 1639 1640 /* 1641 * Specify the media types supported by this adapter and register 1642 * callbacks to update media and link information 1643 */ 1644 ifmedia_add(&sc->media, IFM_ETHER | sc->optics | IFM_FDX, 0, NULL); 1645 if (hw->device_id == IXGBE_DEV_ID_82598AT) { 1646 if (sc->optics != IFM_1000_T) { 1647 ifmedia_add(&sc->media, 1648 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 1649 } 1650 } 1651 if (sc->optics != IFM_AUTO) 1652 ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); 1653 ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO | sc->ifm_flowctrl); 1654 } 1655 1656 static boolean_t 1657 ix_is_sfp(const struct ixgbe_hw *hw) 1658 { 1659 switch (hw->phy.type) { 1660 case ixgbe_phy_sfp_avago: 1661 case ixgbe_phy_sfp_ftl: 1662 case ixgbe_phy_sfp_intel: 1663 case ixgbe_phy_sfp_unknown: 1664 case ixgbe_phy_sfp_passive_tyco: 1665 case ixgbe_phy_sfp_passive_unknown: 1666 return TRUE; 1667 default: 1668 return FALSE; 1669 } 1670 } 1671 1672 static void 1673 ix_config_link(struct ix_softc *sc) 1674 { 1675 struct ixgbe_hw *hw = &sc->hw; 1676 boolean_t sfp; 1677 1678 sfp = ix_is_sfp(hw); 1679 if (sfp) { 1680 if (hw->phy.multispeed_fiber) { 1681 hw->mac.ops.setup_sfp(hw); 1682 ixgbe_enable_tx_laser(hw); 1683 ix_handle_msf(sc); 1684 } else { 1685 ix_handle_mod(sc); 1686 } 1687 } else { 1688 uint32_t autoneg, err = 0; 1689 1690 if (hw->mac.ops.check_link != NULL) { 1691 err = ixgbe_check_link(hw, &sc->link_speed, 1692 &sc->link_up, FALSE); 1693 if (err) 1694 return; 1695 } 1696 1697 autoneg = hw->phy.autoneg_advertised; 1698 if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) { 1699 bool negotiate; 1700 1701 err = hw->mac.ops.get_link_capabilities(hw, 1702 &autoneg, &negotiate); 1703 if (err) 1704 return; 1705 } 1706 1707 if (hw->mac.ops.setup_link != NULL) { 1708 err = hw->mac.ops.setup_link(hw, 1709 autoneg, sc->link_up); 1710 if (err) 1711 return; 1712 } 1713 } 1714 } 1715 1716 static int 1717 ix_alloc_rings(struct ix_softc *sc) 1718 { 1719 int error, i; 1720 1721 /* 1722 * Create top level busdma tag 1723 */ 1724 error = bus_dma_tag_create(NULL, 1, 0, 1725 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 1726 BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0, 1727 &sc->parent_tag); 1728 if (error) { 1729 device_printf(sc->dev, "could not create top level DMA tag\n"); 1730 return error; 1731 } 1732 1733 /* 1734 * Allocate TX descriptor rings and buffers 1735 */ 1736 sc->tx_rings = kmalloc_cachealign( 1737 sizeof(struct ix_tx_ring) * sc->tx_ring_cnt, 1738 M_DEVBUF, M_WAITOK | M_ZERO); 1739 for (i = 0; i < sc->tx_ring_cnt; ++i) { 1740 struct ix_tx_ring *txr = &sc->tx_rings[i]; 1741 1742 txr->tx_sc = sc; 1743 txr->tx_idx = i; 1744 txr->tx_intr_vec = -1; 1745 lwkt_serialize_init(&txr->tx_serialize); 1746 1747 error = ix_create_tx_ring(txr); 1748 if (error) 1749 return error; 1750 } 1751 1752 /* 1753 * Allocate RX descriptor rings and buffers 1754 */ 1755 sc->rx_rings = kmalloc_cachealign( 1756 sizeof(struct ix_rx_ring) * sc->rx_ring_cnt, 1757 M_DEVBUF, M_WAITOK | M_ZERO); 1758 for (i = 0; i < sc->rx_ring_cnt; ++i) { 1759 struct ix_rx_ring *rxr = &sc->rx_rings[i]; 1760 1761 rxr->rx_sc = sc; 1762 rxr->rx_idx = i; 1763 rxr->rx_intr_vec = -1; 1764 lwkt_serialize_init(&rxr->rx_serialize); 1765 1766 error = ix_create_rx_ring(rxr); 1767 if (error) 1768 return error; 1769 } 1770 1771 return 0; 1772 } 1773 1774 static int 1775 ix_create_tx_ring(struct ix_tx_ring *txr) 1776 { 1777 int error, i, tsize, ntxd; 1778 1779 /* 1780 * Validate number of transmit descriptors. It must not exceed 1781 * hardware maximum, and must be multiple of IX_DBA_ALIGN. 1782 */ 1783 ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd); 1784 if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 || 1785 ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) { 1786 device_printf(txr->tx_sc->dev, 1787 "Using %d TX descriptors instead of %d!\n", 1788 IX_DEF_TXD, ntxd); 1789 txr->tx_ndesc = IX_DEF_TXD; 1790 } else { 1791 txr->tx_ndesc = ntxd; 1792 } 1793 1794 /* 1795 * Allocate TX head write-back buffer 1796 */ 1797 txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag, 1798 __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK, 1799 &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr); 1800 if (txr->tx_hdr == NULL) { 1801 device_printf(txr->tx_sc->dev, 1802 "Unable to allocate TX head write-back buffer\n"); 1803 return ENOMEM; 1804 } 1805 1806 /* 1807 * Allocate TX descriptor ring 1808 */ 1809 tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc), 1810 IX_DBA_ALIGN); 1811 txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag, 1812 IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO, 1813 &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr); 1814 if (txr->tx_base == NULL) { 1815 device_printf(txr->tx_sc->dev, 1816 "Unable to allocate TX Descriptor memory\n"); 1817 return ENOMEM; 1818 } 1819 1820 tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc); 1821 txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO); 1822 1823 /* 1824 * Create DMA tag for TX buffers 1825 */ 1826 error = bus_dma_tag_create(txr->tx_sc->parent_tag, 1827 1, 0, /* alignment, bounds */ 1828 BUS_SPACE_MAXADDR, /* lowaddr */ 1829 BUS_SPACE_MAXADDR, /* highaddr */ 1830 NULL, NULL, /* filter, filterarg */ 1831 IX_TSO_SIZE, /* maxsize */ 1832 IX_MAX_SCATTER, /* nsegments */ 1833 PAGE_SIZE, /* maxsegsize */ 1834 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 1835 BUS_DMA_ONEBPAGE, /* flags */ 1836 &txr->tx_tag); 1837 if (error) { 1838 device_printf(txr->tx_sc->dev, 1839 "Unable to allocate TX DMA tag\n"); 1840 kfree(txr->tx_buf, M_DEVBUF); 1841 txr->tx_buf = NULL; 1842 return error; 1843 } 1844 1845 /* 1846 * Create DMA maps for TX buffers 1847 */ 1848 for (i = 0; i < txr->tx_ndesc; ++i) { 1849 struct ix_tx_buf *txbuf = &txr->tx_buf[i]; 1850 1851 error = bus_dmamap_create(txr->tx_tag, 1852 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map); 1853 if (error) { 1854 device_printf(txr->tx_sc->dev, 1855 "Unable to create TX DMA map\n"); 1856 ix_destroy_tx_ring(txr, i); 1857 return error; 1858 } 1859 } 1860 1861 /* 1862 * Initialize various watermark 1863 */ 1864 txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS; 1865 txr->tx_intr_nsegs = txr->tx_ndesc / 16; 1866 1867 return 0; 1868 } 1869 1870 static void 1871 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc) 1872 { 1873 int i; 1874 1875 if (txr->tx_hdr != NULL) { 1876 bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map); 1877 bus_dmamem_free(txr->tx_hdr_dtag, 1878 __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map); 1879 bus_dma_tag_destroy(txr->tx_hdr_dtag); 1880 txr->tx_hdr = NULL; 1881 } 1882 1883 if (txr->tx_base != NULL) { 1884 bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map); 1885 bus_dmamem_free(txr->tx_base_dtag, txr->tx_base, 1886 txr->tx_base_map); 1887 bus_dma_tag_destroy(txr->tx_base_dtag); 1888 txr->tx_base = NULL; 1889 } 1890 1891 if (txr->tx_buf == NULL) 1892 return; 1893 1894 for (i = 0; i < ndesc; ++i) { 1895 struct ix_tx_buf *txbuf = &txr->tx_buf[i]; 1896 1897 KKASSERT(txbuf->m_head == NULL); 1898 bus_dmamap_destroy(txr->tx_tag, txbuf->map); 1899 } 1900 bus_dma_tag_destroy(txr->tx_tag); 1901 1902 kfree(txr->tx_buf, M_DEVBUF); 1903 txr->tx_buf = NULL; 1904 } 1905 1906 static void 1907 ix_init_tx_ring(struct ix_tx_ring *txr) 1908 { 1909 /* Clear the old ring contents */ 1910 bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc); 1911 1912 /* Clear TX head write-back buffer */ 1913 *(txr->tx_hdr) = 0; 1914 1915 /* Reset indices */ 1916 txr->tx_next_avail = 0; 1917 txr->tx_next_clean = 0; 1918 txr->tx_nsegs = 0; 1919 1920 /* Set number of descriptors available */ 1921 txr->tx_avail = txr->tx_ndesc; 1922 1923 /* Enable this TX ring */ 1924 txr->tx_flags |= IX_TXFLAG_ENABLED; 1925 } 1926 1927 static void 1928 ix_init_tx_unit(struct ix_softc *sc) 1929 { 1930 struct ixgbe_hw *hw = &sc->hw; 1931 int i; 1932 1933 /* 1934 * Setup the Base and Length of the Tx Descriptor Ring 1935 */ 1936 for (i = 0; i < sc->tx_ring_inuse; ++i) { 1937 struct ix_tx_ring *txr = &sc->tx_rings[i]; 1938 uint64_t tdba = txr->tx_base_paddr; 1939 uint64_t hdr_paddr = txr->tx_hdr_paddr; 1940 uint32_t txctrl; 1941 1942 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba); 1943 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32)); 1944 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i), 1945 txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc)); 1946 1947 /* Setup the HW Tx Head and Tail descriptor pointers */ 1948 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0); 1949 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0); 1950 1951 /* Disable TX head write-back relax ordering */ 1952 switch (hw->mac.type) { 1953 case ixgbe_mac_82598EB: 1954 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); 1955 break; 1956 case ixgbe_mac_82599EB: 1957 case ixgbe_mac_X540: 1958 default: 1959 txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); 1960 break; 1961 } 1962 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; 1963 switch (hw->mac.type) { 1964 case ixgbe_mac_82598EB: 1965 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl); 1966 break; 1967 case ixgbe_mac_82599EB: 1968 case ixgbe_mac_X540: 1969 default: 1970 IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl); 1971 break; 1972 } 1973 1974 /* Enable TX head write-back */ 1975 IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i), 1976 (uint32_t)(hdr_paddr >> 32)); 1977 IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i), 1978 ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE); 1979 } 1980 1981 if (hw->mac.type != ixgbe_mac_82598EB) { 1982 uint32_t dmatxctl, rttdcs; 1983 1984 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); 1985 dmatxctl |= IXGBE_DMATXCTL_TE; 1986 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); 1987 1988 /* Disable arbiter to set MTQC */ 1989 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); 1990 rttdcs |= IXGBE_RTTDCS_ARBDIS; 1991 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); 1992 1993 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB); 1994 1995 /* Reenable aribter */ 1996 rttdcs &= ~IXGBE_RTTDCS_ARBDIS; 1997 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); 1998 } 1999 } 2000 2001 static int 2002 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp, 2003 uint32_t *cmd_type_len, uint32_t *olinfo_status) 2004 { 2005 struct ixgbe_adv_tx_context_desc *TXD; 2006 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0; 2007 int ehdrlen, ip_hlen = 0, ctxd; 2008 boolean_t offload = TRUE; 2009 2010 /* First check if TSO is to be used */ 2011 if (mp->m_pkthdr.csum_flags & CSUM_TSO) { 2012 return ix_tso_ctx_setup(txr, mp, 2013 cmd_type_len, olinfo_status); 2014 } 2015 2016 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 2017 offload = FALSE; 2018 2019 /* Indicate the whole packet as payload when not doing TSO */ 2020 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT; 2021 2022 /* 2023 * In advanced descriptors the vlan tag must be placed into the 2024 * context descriptor. Hence we need to make one even if not 2025 * doing checksum offloads. 2026 */ 2027 if (mp->m_flags & M_VLANTAG) { 2028 vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) << 2029 IXGBE_ADVTXD_VLAN_SHIFT; 2030 } else if (!offload) { 2031 /* No TX descriptor is consumed */ 2032 return 0; 2033 } 2034 2035 /* Set the ether header length */ 2036 ehdrlen = mp->m_pkthdr.csum_lhlen; 2037 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 2038 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 2039 2040 if (mp->m_pkthdr.csum_flags & CSUM_IP) { 2041 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 2042 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 2043 ip_hlen = mp->m_pkthdr.csum_iphlen; 2044 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 2045 } 2046 vlan_macip_lens |= ip_hlen; 2047 2048 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 2049 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 2050 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 2051 else if (mp->m_pkthdr.csum_flags & CSUM_UDP) 2052 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP; 2053 2054 if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)) 2055 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 2056 2057 /* Now ready a context descriptor */ 2058 ctxd = txr->tx_next_avail; 2059 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd]; 2060 2061 /* Now copy bits into descriptor */ 2062 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 2063 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 2064 TXD->seqnum_seed = htole32(0); 2065 TXD->mss_l4len_idx = htole32(0); 2066 2067 /* We've consumed the first desc, adjust counters */ 2068 if (++ctxd == txr->tx_ndesc) 2069 ctxd = 0; 2070 txr->tx_next_avail = ctxd; 2071 --txr->tx_avail; 2072 2073 /* One TX descriptor is consumed */ 2074 return 1; 2075 } 2076 2077 static int 2078 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp, 2079 uint32_t *cmd_type_len, uint32_t *olinfo_status) 2080 { 2081 struct ixgbe_adv_tx_context_desc *TXD; 2082 uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0; 2083 uint32_t mss_l4len_idx = 0, paylen; 2084 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 2085 2086 ehdrlen = mp->m_pkthdr.csum_lhlen; 2087 KASSERT(ehdrlen > 0, ("invalid ether hlen")); 2088 2089 ip_hlen = mp->m_pkthdr.csum_iphlen; 2090 KASSERT(ip_hlen > 0, ("invalid ip hlen")); 2091 2092 tcp_hlen = mp->m_pkthdr.csum_thlen; 2093 KASSERT(tcp_hlen > 0, ("invalid tcp hlen")); 2094 2095 ctxd = txr->tx_next_avail; 2096 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd]; 2097 2098 if (mp->m_flags & M_VLANTAG) { 2099 vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) << 2100 IXGBE_ADVTXD_VLAN_SHIFT; 2101 } 2102 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT; 2103 vlan_macip_lens |= ip_hlen; 2104 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 2105 2106 /* ADV DTYPE TUCMD */ 2107 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 2108 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 2109 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 2110 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 2111 2112 /* MSS L4LEN IDX */ 2113 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT); 2114 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT); 2115 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 2116 2117 TXD->seqnum_seed = htole32(0); 2118 2119 if (++ctxd == txr->tx_ndesc) 2120 ctxd = 0; 2121 2122 txr->tx_avail--; 2123 txr->tx_next_avail = ctxd; 2124 2125 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 2126 2127 /* This is used in the transmit desc in encap */ 2128 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 2129 2130 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8; 2131 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8; 2132 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; 2133 2134 /* One TX descriptor is consumed */ 2135 return 1; 2136 } 2137 2138 static void 2139 ix_txeof(struct ix_tx_ring *txr, int hdr) 2140 { 2141 int first, avail; 2142 2143 if (txr->tx_avail == txr->tx_ndesc) 2144 return; 2145 2146 first = txr->tx_next_clean; 2147 if (first == hdr) 2148 return; 2149 2150 avail = txr->tx_avail; 2151 while (first != hdr) { 2152 struct ix_tx_buf *txbuf = &txr->tx_buf[first]; 2153 2154 ++avail; 2155 if (txbuf->m_head) { 2156 bus_dmamap_unload(txr->tx_tag, txbuf->map); 2157 m_freem(txbuf->m_head); 2158 txbuf->m_head = NULL; 2159 } 2160 if (++first == txr->tx_ndesc) 2161 first = 0; 2162 } 2163 txr->tx_next_clean = first; 2164 txr->tx_avail = avail; 2165 2166 if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) { 2167 ifsq_clr_oactive(txr->tx_ifsq); 2168 txr->tx_watchdog.wd_timer = 0; 2169 } 2170 } 2171 2172 static int 2173 ix_create_rx_ring(struct ix_rx_ring *rxr) 2174 { 2175 int i, rsize, error, nrxd; 2176 2177 /* 2178 * Validate number of receive descriptors. It must not exceed 2179 * hardware maximum, and must be multiple of IX_DBA_ALIGN. 2180 */ 2181 nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd); 2182 if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 || 2183 nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) { 2184 device_printf(rxr->rx_sc->dev, 2185 "Using %d RX descriptors instead of %d!\n", 2186 IX_DEF_RXD, nrxd); 2187 rxr->rx_ndesc = IX_DEF_RXD; 2188 } else { 2189 rxr->rx_ndesc = nrxd; 2190 } 2191 2192 /* 2193 * Allocate RX descriptor ring 2194 */ 2195 rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc), 2196 IX_DBA_ALIGN); 2197 rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag, 2198 IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO, 2199 &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr); 2200 if (rxr->rx_base == NULL) { 2201 device_printf(rxr->rx_sc->dev, 2202 "Unable to allocate TX Descriptor memory\n"); 2203 return ENOMEM; 2204 } 2205 2206 rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc); 2207 rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO); 2208 2209 /* 2210 * Create DMA tag for RX buffers 2211 */ 2212 error = bus_dma_tag_create(rxr->rx_sc->parent_tag, 2213 1, 0, /* alignment, bounds */ 2214 BUS_SPACE_MAXADDR, /* lowaddr */ 2215 BUS_SPACE_MAXADDR, /* highaddr */ 2216 NULL, NULL, /* filter, filterarg */ 2217 PAGE_SIZE, /* maxsize */ 2218 1, /* nsegments */ 2219 PAGE_SIZE, /* maxsegsize */ 2220 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */ 2221 &rxr->rx_tag); 2222 if (error) { 2223 device_printf(rxr->rx_sc->dev, 2224 "Unable to create RX DMA tag\n"); 2225 kfree(rxr->rx_buf, M_DEVBUF); 2226 rxr->rx_buf = NULL; 2227 return error; 2228 } 2229 2230 /* 2231 * Create spare DMA map for RX buffers 2232 */ 2233 error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK, 2234 &rxr->rx_sparemap); 2235 if (error) { 2236 device_printf(rxr->rx_sc->dev, 2237 "Unable to create spare RX DMA map\n"); 2238 bus_dma_tag_destroy(rxr->rx_tag); 2239 kfree(rxr->rx_buf, M_DEVBUF); 2240 rxr->rx_buf = NULL; 2241 return error; 2242 } 2243 2244 /* 2245 * Create DMA maps for RX buffers 2246 */ 2247 for (i = 0; i < rxr->rx_ndesc; ++i) { 2248 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i]; 2249 2250 error = bus_dmamap_create(rxr->rx_tag, 2251 BUS_DMA_WAITOK, &rxbuf->map); 2252 if (error) { 2253 device_printf(rxr->rx_sc->dev, 2254 "Unable to create RX dma map\n"); 2255 ix_destroy_rx_ring(rxr, i); 2256 return error; 2257 } 2258 } 2259 2260 /* 2261 * Initialize various watermark 2262 */ 2263 rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS; 2264 2265 return 0; 2266 } 2267 2268 static void 2269 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc) 2270 { 2271 int i; 2272 2273 if (rxr->rx_base != NULL) { 2274 bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map); 2275 bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base, 2276 rxr->rx_base_map); 2277 bus_dma_tag_destroy(rxr->rx_base_dtag); 2278 rxr->rx_base = NULL; 2279 } 2280 2281 if (rxr->rx_buf == NULL) 2282 return; 2283 2284 for (i = 0; i < ndesc; ++i) { 2285 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i]; 2286 2287 KKASSERT(rxbuf->m_head == NULL); 2288 bus_dmamap_destroy(rxr->rx_tag, rxbuf->map); 2289 } 2290 bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap); 2291 bus_dma_tag_destroy(rxr->rx_tag); 2292 2293 kfree(rxr->rx_buf, M_DEVBUF); 2294 rxr->rx_buf = NULL; 2295 } 2296 2297 /* 2298 ** Used to detect a descriptor that has 2299 ** been merged by Hardware RSC. 2300 */ 2301 static __inline uint32_t 2302 ix_rsc_count(union ixgbe_adv_rx_desc *rx) 2303 { 2304 return (le32toh(rx->wb.lower.lo_dword.data) & 2305 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT; 2306 } 2307 2308 #if 0 2309 /********************************************************************* 2310 * 2311 * Initialize Hardware RSC (LRO) feature on 82599 2312 * for an RX ring, this is toggled by the LRO capability 2313 * even though it is transparent to the stack. 2314 * 2315 * NOTE: since this HW feature only works with IPV4 and 2316 * our testing has shown soft LRO to be as effective 2317 * I have decided to disable this by default. 2318 * 2319 **********************************************************************/ 2320 static void 2321 ix_setup_hw_rsc(struct ix_rx_ring *rxr) 2322 { 2323 struct ix_softc *sc = rxr->rx_sc; 2324 struct ixgbe_hw *hw = &sc->hw; 2325 uint32_t rscctrl, rdrxctl; 2326 2327 #if 0 2328 /* If turning LRO/RSC off we need to disable it */ 2329 if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) { 2330 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 2331 rscctrl &= ~IXGBE_RSCCTL_RSCEN; 2332 return; 2333 } 2334 #endif 2335 2336 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); 2337 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; 2338 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; 2339 rdrxctl |= IXGBE_RDRXCTL_RSCACKC; 2340 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); 2341 2342 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); 2343 rscctrl |= IXGBE_RSCCTL_RSCEN; 2344 /* 2345 ** Limit the total number of descriptors that 2346 ** can be combined, so it does not exceed 64K 2347 */ 2348 if (rxr->mbuf_sz == MCLBYTES) 2349 rscctrl |= IXGBE_RSCCTL_MAXDESC_16; 2350 else if (rxr->mbuf_sz == MJUMPAGESIZE) 2351 rscctrl |= IXGBE_RSCCTL_MAXDESC_8; 2352 else if (rxr->mbuf_sz == MJUM9BYTES) 2353 rscctrl |= IXGBE_RSCCTL_MAXDESC_4; 2354 else /* Using 16K cluster */ 2355 rscctrl |= IXGBE_RSCCTL_MAXDESC_1; 2356 2357 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl); 2358 2359 /* Enable TCP header recognition */ 2360 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 2361 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | 2362 IXGBE_PSRTYPE_TCPHDR)); 2363 2364 /* Disable RSC for ACK packets */ 2365 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, 2366 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); 2367 2368 rxr->hw_rsc = TRUE; 2369 } 2370 #endif 2371 2372 static int 2373 ix_init_rx_ring(struct ix_rx_ring *rxr) 2374 { 2375 int i; 2376 2377 /* Clear the ring contents */ 2378 bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc)); 2379 2380 /* XXX we need JUMPAGESIZE for RSC too */ 2381 if (rxr->rx_sc->max_frame_size <= MCLBYTES) 2382 rxr->rx_mbuf_sz = MCLBYTES; 2383 else 2384 rxr->rx_mbuf_sz = MJUMPAGESIZE; 2385 2386 /* Now replenish the mbufs */ 2387 for (i = 0; i < rxr->rx_ndesc; ++i) { 2388 int error; 2389 2390 error = ix_newbuf(rxr, i, TRUE); 2391 if (error) 2392 return error; 2393 } 2394 2395 /* Setup our descriptor indices */ 2396 rxr->rx_next_check = 0; 2397 rxr->rx_flags &= ~IX_RXRING_FLAG_DISC; 2398 2399 #if 0 2400 /* 2401 ** Now set up the LRO interface: 2402 */ 2403 if (ixgbe_rsc_enable) 2404 ix_setup_hw_rsc(rxr); 2405 #endif 2406 2407 return 0; 2408 } 2409 2410 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 2411 2412 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1) 2413 2414 static void 2415 ix_init_rx_unit(struct ix_softc *sc) 2416 { 2417 struct ixgbe_hw *hw = &sc->hw; 2418 struct ifnet *ifp = &sc->arpcom.ac_if; 2419 uint32_t bufsz, rxctrl, fctrl, rxcsum, hlreg; 2420 int i; 2421 2422 /* 2423 * Make sure receives are disabled while setting up the descriptor ring 2424 */ 2425 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); 2426 IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN); 2427 2428 /* Enable broadcasts */ 2429 fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); 2430 fctrl |= IXGBE_FCTRL_BAM; 2431 fctrl |= IXGBE_FCTRL_DPF; 2432 fctrl |= IXGBE_FCTRL_PMCF; 2433 IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); 2434 2435 /* Set for Jumbo Frames? */ 2436 hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); 2437 if (ifp->if_mtu > ETHERMTU) 2438 hlreg |= IXGBE_HLREG0_JUMBOEN; 2439 else 2440 hlreg &= ~IXGBE_HLREG0_JUMBOEN; 2441 IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); 2442 2443 KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES); 2444 bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> 2445 IXGBE_SRRCTL_BSIZEPKT_SHIFT; 2446 2447 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2448 struct ix_rx_ring *rxr = &sc->rx_rings[i]; 2449 uint64_t rdba = rxr->rx_base_paddr; 2450 uint32_t srrctl; 2451 2452 /* Setup the Base and Length of the Rx Descriptor Ring */ 2453 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba); 2454 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32)); 2455 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i), 2456 rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc)); 2457 2458 /* 2459 * Set up the SRRCTL register 2460 */ 2461 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); 2462 2463 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; 2464 srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; 2465 srrctl |= bufsz; 2466 srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; 2467 if (sc->rx_ring_inuse > 1) { 2468 /* See the commend near ix_enable_rx_drop() */ 2469 if (sc->ifm_flowctrl & 2470 (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { 2471 srrctl &= ~IXGBE_SRRCTL_DROP_EN; 2472 if (i == 0 && bootverbose) { 2473 if_printf(ifp, "flow control %s, " 2474 "disable RX drop\n", 2475 ix_ifmedia2str(sc->ifm_flowctrl)); 2476 } 2477 } else { 2478 srrctl |= IXGBE_SRRCTL_DROP_EN; 2479 if (i == 0 && bootverbose) { 2480 if_printf(ifp, "flow control %s, " 2481 "enable RX drop\n", 2482 ix_ifmedia2str(sc->ifm_flowctrl)); 2483 } 2484 } 2485 } 2486 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); 2487 2488 /* Setup the HW Rx Head and Tail Descriptor Pointers */ 2489 IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0); 2490 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0); 2491 } 2492 2493 if (sc->hw.mac.type != ixgbe_mac_82598EB) 2494 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0); 2495 2496 rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); 2497 2498 /* 2499 * Setup RSS 2500 */ 2501 if (IX_ENABLE_HWRSS(sc)) { 2502 uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE]; 2503 int j, r; 2504 2505 /* 2506 * NOTE: 2507 * When we reach here, RSS has already been disabled 2508 * in ix_stop(), so we could safely configure RSS key 2509 * and redirect table. 2510 */ 2511 2512 /* 2513 * Configure RSS key 2514 */ 2515 toeplitz_get_key(key, sizeof(key)); 2516 for (i = 0; i < IX_NRSSRK; ++i) { 2517 uint32_t rssrk; 2518 2519 rssrk = IX_RSSRK_VAL(key, i); 2520 IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n", 2521 i, rssrk); 2522 2523 IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk); 2524 } 2525 2526 /* 2527 * Configure RSS redirect table in following fashion: 2528 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] 2529 */ 2530 r = 0; 2531 for (j = 0; j < IX_NRETA; ++j) { 2532 uint32_t reta = 0; 2533 2534 for (i = 0; i < IX_RETA_SIZE; ++i) { 2535 uint32_t q; 2536 2537 q = r % sc->rx_ring_inuse; 2538 reta |= q << (8 * i); 2539 ++r; 2540 } 2541 IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta); 2542 IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta); 2543 } 2544 2545 /* 2546 * Enable multiple receive queues. 2547 * Enable IPv4 RSS standard hash functions. 2548 */ 2549 IXGBE_WRITE_REG(hw, IXGBE_MRQC, 2550 IXGBE_MRQC_RSSEN | 2551 IXGBE_MRQC_RSS_FIELD_IPV4 | 2552 IXGBE_MRQC_RSS_FIELD_IPV4_TCP); 2553 2554 /* 2555 * NOTE: 2556 * PCSD must be enabled to enable multiple 2557 * receive queues. 2558 */ 2559 rxcsum |= IXGBE_RXCSUM_PCSD; 2560 } 2561 2562 if (ifp->if_capenable & IFCAP_RXCSUM) 2563 rxcsum |= IXGBE_RXCSUM_PCSD; 2564 2565 IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); 2566 } 2567 2568 static __inline void 2569 ix_rx_refresh(struct ix_rx_ring *rxr, int i) 2570 { 2571 if (--i < 0) 2572 i = rxr->rx_ndesc - 1; 2573 IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i); 2574 } 2575 2576 static __inline void 2577 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype) 2578 { 2579 if ((ptype & 2580 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) { 2581 /* Not IPv4 */ 2582 return; 2583 } 2584 2585 if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) == 2586 IXGBE_RXD_STAT_IPCS) 2587 mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; 2588 2589 if ((ptype & 2590 (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) { 2591 /* 2592 * - Neither TCP nor UDP 2593 * - IPv4 fragment 2594 */ 2595 return; 2596 } 2597 2598 if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) == 2599 IXGBE_RXD_STAT_L4CS) { 2600 mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR | 2601 CSUM_FRAG_NOT_CHECKED; 2602 mp->m_pkthdr.csum_data = htons(0xffff); 2603 } 2604 } 2605 2606 static __inline struct pktinfo * 2607 ix_rssinfo(struct mbuf *m, struct pktinfo *pi, 2608 uint32_t hash, uint32_t hashtype, uint32_t ptype) 2609 { 2610 switch (hashtype) { 2611 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP: 2612 pi->pi_netisr = NETISR_IP; 2613 pi->pi_flags = 0; 2614 pi->pi_l3proto = IPPROTO_TCP; 2615 break; 2616 2617 case IXGBE_RXDADV_RSSTYPE_IPV4: 2618 if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) { 2619 /* Not UDP or is fragment */ 2620 return NULL; 2621 } 2622 pi->pi_netisr = NETISR_IP; 2623 pi->pi_flags = 0; 2624 pi->pi_l3proto = IPPROTO_UDP; 2625 break; 2626 2627 default: 2628 return NULL; 2629 } 2630 2631 m->m_flags |= M_HASH; 2632 m->m_pkthdr.hash = toeplitz_hash(hash); 2633 return pi; 2634 } 2635 2636 static __inline void 2637 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf) 2638 { 2639 rxd->read.pkt_addr = htole64(rxbuf->paddr); 2640 rxd->wb.upper.status_error = 0; 2641 } 2642 2643 static void 2644 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop) 2645 { 2646 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i]; 2647 2648 /* 2649 * XXX discard may not be correct 2650 */ 2651 if (eop) { 2652 IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1); 2653 rxr->rx_flags &= ~IX_RXRING_FLAG_DISC; 2654 } else { 2655 rxr->rx_flags |= IX_RXRING_FLAG_DISC; 2656 } 2657 if (rxbuf->fmp != NULL) { 2658 m_freem(rxbuf->fmp); 2659 rxbuf->fmp = NULL; 2660 rxbuf->lmp = NULL; 2661 } 2662 ix_setup_rxdesc(&rxr->rx_base[i], rxbuf); 2663 } 2664 2665 static void 2666 ix_rxeof(struct ix_rx_ring *rxr, int count) 2667 { 2668 struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if; 2669 int i, nsegs = 0, cpuid = mycpuid; 2670 2671 i = rxr->rx_next_check; 2672 while (count != 0) { 2673 struct ix_rx_buf *rxbuf, *nbuf = NULL; 2674 union ixgbe_adv_rx_desc *cur; 2675 struct mbuf *sendmp = NULL, *mp; 2676 struct pktinfo *pi = NULL, pi0; 2677 uint32_t rsc = 0, ptype, staterr, hash, hashtype; 2678 uint16_t len; 2679 boolean_t eop; 2680 2681 cur = &rxr->rx_base[i]; 2682 staterr = le32toh(cur->wb.upper.status_error); 2683 2684 if ((staterr & IXGBE_RXD_STAT_DD) == 0) 2685 break; 2686 ++nsegs; 2687 2688 rxbuf = &rxr->rx_buf[i]; 2689 mp = rxbuf->m_head; 2690 2691 len = le16toh(cur->wb.upper.length); 2692 ptype = le32toh(cur->wb.lower.lo_dword.data) & 2693 IXGBE_RXDADV_PKTTYPE_MASK; 2694 hash = le32toh(cur->wb.lower.hi_dword.rss); 2695 hashtype = le32toh(cur->wb.lower.lo_dword.data) & 2696 IXGBE_RXDADV_RSSTYPE_MASK; 2697 2698 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); 2699 if (eop) 2700 --count; 2701 2702 /* 2703 * Make sure bad packets are discarded 2704 */ 2705 if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) || 2706 (rxr->rx_flags & IX_RXRING_FLAG_DISC)) { 2707 ix_rx_discard(rxr, i, eop); 2708 goto next_desc; 2709 } 2710 2711 bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD); 2712 if (ix_newbuf(rxr, i, FALSE) != 0) { 2713 ix_rx_discard(rxr, i, eop); 2714 goto next_desc; 2715 } 2716 2717 /* 2718 * On 82599 which supports a hardware LRO, packets 2719 * need not be fragmented across sequential descriptors, 2720 * rather the next descriptor is indicated in bits 2721 * of the descriptor. This also means that we might 2722 * proceses more than one packet at a time, something 2723 * that has never been true before, it required 2724 * eliminating global chain pointers in favor of what 2725 * we are doing here. 2726 */ 2727 if (!eop) { 2728 int nextp; 2729 2730 /* 2731 * Figure out the next descriptor 2732 * of this frame. 2733 */ 2734 if (rxr->rx_flags & IX_RXRING_FLAG_LRO) 2735 rsc = ix_rsc_count(cur); 2736 if (rsc) { /* Get hardware index */ 2737 nextp = ((staterr & 2738 IXGBE_RXDADV_NEXTP_MASK) >> 2739 IXGBE_RXDADV_NEXTP_SHIFT); 2740 } else { /* Just sequential */ 2741 nextp = i + 1; 2742 if (nextp == rxr->rx_ndesc) 2743 nextp = 0; 2744 } 2745 nbuf = &rxr->rx_buf[nextp]; 2746 prefetch(nbuf); 2747 } 2748 mp->m_len = len; 2749 2750 /* 2751 * Rather than using the fmp/lmp global pointers 2752 * we now keep the head of a packet chain in the 2753 * buffer struct and pass this along from one 2754 * descriptor to the next, until we get EOP. 2755 */ 2756 if (rxbuf->fmp == NULL) { 2757 mp->m_pkthdr.len = len; 2758 rxbuf->fmp = mp; 2759 rxbuf->lmp = mp; 2760 } else { 2761 rxbuf->fmp->m_pkthdr.len += len; 2762 rxbuf->lmp->m_next = mp; 2763 rxbuf->lmp = mp; 2764 } 2765 2766 if (nbuf != NULL) { 2767 /* 2768 * Not the last fragment of this frame, 2769 * pass this fragment list on 2770 */ 2771 nbuf->fmp = rxbuf->fmp; 2772 nbuf->lmp = rxbuf->lmp; 2773 } else { 2774 /* 2775 * Send this frame 2776 */ 2777 sendmp = rxbuf->fmp; 2778 2779 sendmp->m_pkthdr.rcvif = ifp; 2780 IFNET_STAT_INC(ifp, ipackets, 1); 2781 #ifdef IX_RSS_DEBUG 2782 rxr->rx_pkts++; 2783 #endif 2784 2785 /* Process vlan info */ 2786 if (staterr & IXGBE_RXD_STAT_VP) { 2787 sendmp->m_pkthdr.ether_vlantag = 2788 le16toh(cur->wb.upper.vlan); 2789 sendmp->m_flags |= M_VLANTAG; 2790 } 2791 if (ifp->if_capenable & IFCAP_RXCSUM) 2792 ix_rxcsum(staterr, sendmp, ptype); 2793 if (ifp->if_capenable & IFCAP_RSS) { 2794 pi = ix_rssinfo(sendmp, &pi0, 2795 hash, hashtype, ptype); 2796 } 2797 } 2798 rxbuf->fmp = NULL; 2799 rxbuf->lmp = NULL; 2800 next_desc: 2801 /* Advance our pointers to the next descriptor. */ 2802 if (++i == rxr->rx_ndesc) 2803 i = 0; 2804 2805 if (sendmp != NULL) 2806 ifp->if_input(ifp, sendmp, pi, cpuid); 2807 2808 if (nsegs >= rxr->rx_wreg_nsegs) { 2809 ix_rx_refresh(rxr, i); 2810 nsegs = 0; 2811 } 2812 } 2813 rxr->rx_next_check = i; 2814 2815 if (nsegs > 0) 2816 ix_rx_refresh(rxr, i); 2817 } 2818 2819 static void 2820 ix_set_vlan(struct ix_softc *sc) 2821 { 2822 struct ixgbe_hw *hw = &sc->hw; 2823 uint32_t ctrl; 2824 2825 if (hw->mac.type == ixgbe_mac_82598EB) { 2826 ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); 2827 ctrl |= IXGBE_VLNCTRL_VME; 2828 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); 2829 } else { 2830 int i; 2831 2832 /* 2833 * On 82599 and later chips the VLAN enable is 2834 * per queue in RXDCTL 2835 */ 2836 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2837 ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); 2838 ctrl |= IXGBE_RXDCTL_VME; 2839 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl); 2840 } 2841 } 2842 } 2843 2844 static void 2845 ix_enable_intr(struct ix_softc *sc) 2846 { 2847 struct ixgbe_hw *hw = &sc->hw; 2848 uint32_t fwsm; 2849 int i; 2850 2851 for (i = 0; i < sc->intr_cnt; ++i) 2852 lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize); 2853 2854 sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); 2855 2856 /* Enable Fan Failure detection */ 2857 if (hw->device_id == IXGBE_DEV_ID_82598AT) 2858 sc->intr_mask |= IXGBE_EIMS_GPI_SDP1; 2859 2860 switch (sc->hw.mac.type) { 2861 case ixgbe_mac_82599EB: 2862 sc->intr_mask |= IXGBE_EIMS_ECC; 2863 sc->intr_mask |= IXGBE_EIMS_GPI_SDP0; 2864 sc->intr_mask |= IXGBE_EIMS_GPI_SDP1; 2865 sc->intr_mask |= IXGBE_EIMS_GPI_SDP2; 2866 break; 2867 2868 case ixgbe_mac_X540: 2869 sc->intr_mask |= IXGBE_EIMS_ECC; 2870 /* Detect if Thermal Sensor is enabled */ 2871 fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); 2872 if (fwsm & IXGBE_FWSM_TS_ENABLED) 2873 sc->intr_mask |= IXGBE_EIMS_TS; 2874 /* FALL THROUGH */ 2875 default: 2876 break; 2877 } 2878 2879 /* With MSI-X we use auto clear for RX and TX rings */ 2880 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 2881 /* 2882 * There are no EIAC1/EIAC2 for newer chips; the related 2883 * bits for TX and RX rings > 16 are always auto clear. 2884 * 2885 * XXX which bits? There are _no_ documented EICR1 and 2886 * EICR2 at all; only EICR. 2887 */ 2888 IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE); 2889 } else { 2890 sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK; 2891 2892 KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS); 2893 if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) 2894 sc->intr_mask |= IX_RX1_INTR_MASK; 2895 } 2896 2897 IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask); 2898 2899 /* 2900 * Enable RX and TX rings for MSI-X 2901 */ 2902 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 2903 for (i = 0; i < sc->tx_ring_inuse; ++i) { 2904 const struct ix_tx_ring *txr = &sc->tx_rings[i]; 2905 2906 if (txr->tx_intr_vec >= 0) { 2907 IXGBE_WRITE_REG(hw, txr->tx_eims, 2908 txr->tx_eims_val); 2909 } 2910 } 2911 for (i = 0; i < sc->rx_ring_inuse; ++i) { 2912 const struct ix_rx_ring *rxr = &sc->rx_rings[i]; 2913 2914 KKASSERT(rxr->rx_intr_vec >= 0); 2915 IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val); 2916 } 2917 } 2918 2919 IXGBE_WRITE_FLUSH(hw); 2920 } 2921 2922 static void 2923 ix_disable_intr(struct ix_softc *sc) 2924 { 2925 int i; 2926 2927 if (sc->intr_type == PCI_INTR_TYPE_MSIX) 2928 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0); 2929 2930 if (sc->hw.mac.type == ixgbe_mac_82598EB) { 2931 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0); 2932 } else { 2933 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000); 2934 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0); 2935 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0); 2936 } 2937 IXGBE_WRITE_FLUSH(&sc->hw); 2938 2939 for (i = 0; i < sc->intr_cnt; ++i) 2940 lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize); 2941 } 2942 2943 uint16_t 2944 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, uint32_t reg) 2945 { 2946 return pci_read_config(((struct ixgbe_osdep *)hw->back)->dev, 2947 reg, 2); 2948 } 2949 2950 void 2951 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, uint32_t reg, uint16_t value) 2952 { 2953 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev, 2954 reg, value, 2); 2955 } 2956 2957 static void 2958 ix_slot_info(struct ix_softc *sc) 2959 { 2960 struct ixgbe_hw *hw = &sc->hw; 2961 device_t dev = sc->dev; 2962 struct ixgbe_mac_info *mac = &hw->mac; 2963 uint16_t link; 2964 uint32_t offset; 2965 2966 /* For most devices simply call the shared code routine */ 2967 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) { 2968 ixgbe_get_bus_info(hw); 2969 goto display; 2970 } 2971 2972 /* 2973 * For the Quad port adapter we need to parse back 2974 * up the PCI tree to find the speed of the expansion 2975 * slot into which this adapter is plugged. A bit more work. 2976 */ 2977 dev = device_get_parent(device_get_parent(dev)); 2978 #ifdef IXGBE_DEBUG 2979 device_printf(dev, "parent pcib = %x,%x,%x\n", 2980 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); 2981 #endif 2982 dev = device_get_parent(device_get_parent(dev)); 2983 #ifdef IXGBE_DEBUG 2984 device_printf(dev, "slot pcib = %x,%x,%x\n", 2985 pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); 2986 #endif 2987 /* Now get the PCI Express Capabilities offset */ 2988 offset = pci_get_pciecap_ptr(dev); 2989 /* ...and read the Link Status Register */ 2990 link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2); 2991 switch (link & IXGBE_PCI_LINK_WIDTH) { 2992 case IXGBE_PCI_LINK_WIDTH_1: 2993 hw->bus.width = ixgbe_bus_width_pcie_x1; 2994 break; 2995 case IXGBE_PCI_LINK_WIDTH_2: 2996 hw->bus.width = ixgbe_bus_width_pcie_x2; 2997 break; 2998 case IXGBE_PCI_LINK_WIDTH_4: 2999 hw->bus.width = ixgbe_bus_width_pcie_x4; 3000 break; 3001 case IXGBE_PCI_LINK_WIDTH_8: 3002 hw->bus.width = ixgbe_bus_width_pcie_x8; 3003 break; 3004 default: 3005 hw->bus.width = ixgbe_bus_width_unknown; 3006 break; 3007 } 3008 3009 switch (link & IXGBE_PCI_LINK_SPEED) { 3010 case IXGBE_PCI_LINK_SPEED_2500: 3011 hw->bus.speed = ixgbe_bus_speed_2500; 3012 break; 3013 case IXGBE_PCI_LINK_SPEED_5000: 3014 hw->bus.speed = ixgbe_bus_speed_5000; 3015 break; 3016 case IXGBE_PCI_LINK_SPEED_8000: 3017 hw->bus.speed = ixgbe_bus_speed_8000; 3018 break; 3019 default: 3020 hw->bus.speed = ixgbe_bus_speed_unknown; 3021 break; 3022 } 3023 3024 mac->ops.set_lan_id(hw); 3025 3026 display: 3027 device_printf(dev, "PCI Express Bus: Speed %s %s\n", 3028 hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" : 3029 hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" : 3030 hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown", 3031 hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" : 3032 hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" : 3033 hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown"); 3034 3035 if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP && 3036 hw->bus.width <= ixgbe_bus_width_pcie_x4 && 3037 hw->bus.speed == ixgbe_bus_speed_2500) { 3038 device_printf(dev, "For optimal performance a x8 " 3039 "PCIE, or x4 PCIE Gen2 slot is required.\n"); 3040 } else if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP && 3041 hw->bus.width <= ixgbe_bus_width_pcie_x8 && 3042 hw->bus.speed < ixgbe_bus_speed_8000) { 3043 device_printf(dev, "For optimal performance a x8 " 3044 "PCIE Gen3 slot is required.\n"); 3045 } 3046 } 3047 3048 /* 3049 * TODO comment is incorrect 3050 * 3051 * Setup the correct IVAR register for a particular MSIX interrupt 3052 * - entry is the register array entry 3053 * - vector is the MSIX vector for this queue 3054 * - type is RX/TX/MISC 3055 */ 3056 static void 3057 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector, 3058 int8_t type) 3059 { 3060 struct ixgbe_hw *hw = &sc->hw; 3061 uint32_t ivar, index; 3062 3063 vector |= IXGBE_IVAR_ALLOC_VAL; 3064 3065 switch (hw->mac.type) { 3066 case ixgbe_mac_82598EB: 3067 if (type == -1) 3068 entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; 3069 else 3070 entry += (type * 64); 3071 index = (entry >> 2) & 0x1F; 3072 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); 3073 ivar &= ~(0xFF << (8 * (entry & 0x3))); 3074 ivar |= (vector << (8 * (entry & 0x3))); 3075 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar); 3076 break; 3077 3078 case ixgbe_mac_82599EB: 3079 case ixgbe_mac_X540: 3080 if (type == -1) { /* MISC IVAR */ 3081 index = (entry & 1) * 8; 3082 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); 3083 ivar &= ~(0xFF << index); 3084 ivar |= (vector << index); 3085 IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); 3086 } else { /* RX/TX IVARS */ 3087 index = (16 * (entry & 1)) + (8 * type); 3088 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); 3089 ivar &= ~(0xFF << index); 3090 ivar |= (vector << index); 3091 IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); 3092 } 3093 3094 default: 3095 break; 3096 } 3097 } 3098 3099 static boolean_t 3100 ix_sfp_probe(struct ix_softc *sc) 3101 { 3102 struct ixgbe_hw *hw = &sc->hw; 3103 3104 if (hw->phy.type == ixgbe_phy_nl && 3105 hw->phy.sfp_type == ixgbe_sfp_type_not_present) { 3106 int32_t ret; 3107 3108 ret = hw->phy.ops.identify_sfp(hw); 3109 if (ret) 3110 return FALSE; 3111 3112 ret = hw->phy.ops.reset(hw); 3113 if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { 3114 if_printf(&sc->arpcom.ac_if, 3115 "Unsupported SFP+ module detected! " 3116 "Reload driver with supported module.\n"); 3117 sc->sfp_probe = FALSE; 3118 return FALSE; 3119 } 3120 if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n"); 3121 3122 /* We now have supported optics */ 3123 sc->sfp_probe = FALSE; 3124 /* Set the optics type so system reports correctly */ 3125 ix_setup_optics(sc); 3126 3127 return TRUE; 3128 } 3129 return FALSE; 3130 } 3131 3132 static void 3133 ix_handle_link(struct ix_softc *sc) 3134 { 3135 ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0); 3136 ix_update_link_status(sc); 3137 } 3138 3139 /* 3140 * Handling SFP module 3141 */ 3142 static void 3143 ix_handle_mod(struct ix_softc *sc) 3144 { 3145 struct ixgbe_hw *hw = &sc->hw; 3146 uint32_t err; 3147 3148 err = hw->phy.ops.identify_sfp(hw); 3149 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { 3150 if_printf(&sc->arpcom.ac_if, 3151 "Unsupported SFP+ module type was detected.\n"); 3152 return; 3153 } 3154 err = hw->mac.ops.setup_sfp(hw); 3155 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { 3156 if_printf(&sc->arpcom.ac_if, 3157 "Setup failure - unsupported SFP+ module type.\n"); 3158 return; 3159 } 3160 ix_handle_msf(sc); 3161 } 3162 3163 /* 3164 * Handling MSF (multispeed fiber) 3165 */ 3166 static void 3167 ix_handle_msf(struct ix_softc *sc) 3168 { 3169 struct ixgbe_hw *hw = &sc->hw; 3170 uint32_t autoneg; 3171 3172 autoneg = hw->phy.autoneg_advertised; 3173 if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) { 3174 bool negotiate; 3175 3176 hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); 3177 } 3178 if (hw->mac.ops.setup_link != NULL) 3179 hw->mac.ops.setup_link(hw, autoneg, TRUE); 3180 } 3181 3182 static void 3183 ix_update_stats(struct ix_softc *sc) 3184 { 3185 struct ifnet *ifp = &sc->arpcom.ac_if; 3186 struct ixgbe_hw *hw = &sc->hw; 3187 uint32_t missed_rx = 0, bprc, lxon, lxoff, total; 3188 uint64_t total_missed_rx = 0; 3189 int i; 3190 3191 sc->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); 3192 sc->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); 3193 sc->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); 3194 sc->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); 3195 3196 /* 3197 * Note: These are for the 8 possible traffic classes, which 3198 * in current implementation is unused, therefore only 0 should 3199 * read real data. 3200 */ 3201 for (i = 0; i < 8; i++) { 3202 uint32_t mp; 3203 3204 mp = IXGBE_READ_REG(hw, IXGBE_MPC(i)); 3205 /* missed_rx tallies misses for the gprc workaround */ 3206 missed_rx += mp; 3207 /* global total per queue */ 3208 sc->stats.mpc[i] += mp; 3209 3210 /* Running comprehensive total for stats display */ 3211 total_missed_rx += sc->stats.mpc[i]; 3212 3213 if (hw->mac.type == ixgbe_mac_82598EB) { 3214 sc->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i)); 3215 sc->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i)); 3216 sc->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i)); 3217 sc->stats.pxonrxc[i] += 3218 IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); 3219 } else { 3220 sc->stats.pxonrxc[i] += 3221 IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); 3222 } 3223 sc->stats.pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); 3224 sc->stats.pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); 3225 sc->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); 3226 sc->stats.pxon2offc[i] += 3227 IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i)); 3228 } 3229 for (i = 0; i < 16; i++) { 3230 sc->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); 3231 sc->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); 3232 sc->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); 3233 } 3234 sc->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); 3235 sc->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); 3236 sc->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); 3237 3238 /* Hardware workaround, gprc counts missed packets */ 3239 sc->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); 3240 sc->stats.gprc -= missed_rx; 3241 3242 if (hw->mac.type != ixgbe_mac_82598EB) { 3243 sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + 3244 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); 3245 sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + 3246 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); 3247 sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) + 3248 ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); 3249 sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); 3250 sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); 3251 } else { 3252 sc->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); 3253 sc->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); 3254 /* 82598 only has a counter in the high register */ 3255 sc->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); 3256 sc->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); 3257 sc->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH); 3258 } 3259 3260 /* 3261 * Workaround: mprc hardware is incorrectly counting 3262 * broadcasts, so for now we subtract those. 3263 */ 3264 bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); 3265 sc->stats.bprc += bprc; 3266 sc->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); 3267 if (hw->mac.type == ixgbe_mac_82598EB) 3268 sc->stats.mprc -= bprc; 3269 3270 sc->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); 3271 sc->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); 3272 sc->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); 3273 sc->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); 3274 sc->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); 3275 sc->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); 3276 3277 lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); 3278 sc->stats.lxontxc += lxon; 3279 lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); 3280 sc->stats.lxofftxc += lxoff; 3281 total = lxon + lxoff; 3282 3283 sc->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); 3284 sc->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); 3285 sc->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); 3286 sc->stats.gptc -= total; 3287 sc->stats.mptc -= total; 3288 sc->stats.ptc64 -= total; 3289 sc->stats.gotc -= total * ETHER_MIN_LEN; 3290 3291 sc->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC); 3292 sc->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC); 3293 sc->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC); 3294 sc->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC); 3295 sc->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); 3296 sc->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); 3297 sc->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); 3298 sc->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR); 3299 sc->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT); 3300 sc->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); 3301 sc->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); 3302 sc->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); 3303 sc->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); 3304 sc->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); 3305 sc->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); 3306 sc->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC); 3307 sc->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); 3308 sc->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); 3309 /* Only read FCOE on 82599 */ 3310 if (hw->mac.type != ixgbe_mac_82598EB) { 3311 sc->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); 3312 sc->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); 3313 sc->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); 3314 sc->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); 3315 sc->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); 3316 } 3317 3318 /* Rx Errors */ 3319 IFNET_STAT_SET(ifp, iqdrops, total_missed_rx); 3320 IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec); 3321 } 3322 3323 #if 0 3324 /* 3325 * Add sysctl variables, one per statistic, to the system. 3326 */ 3327 static void 3328 ix_add_hw_stats(struct ix_softc *sc) 3329 { 3330 3331 device_t dev = sc->dev; 3332 3333 struct ix_tx_ring *txr = sc->tx_rings; 3334 struct ix_rx_ring *rxr = sc->rx_rings; 3335 3336 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 3337 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 3338 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 3339 struct ixgbe_hw_stats *stats = &sc->stats; 3340 3341 struct sysctl_oid *stat_node, *queue_node; 3342 struct sysctl_oid_list *stat_list, *queue_list; 3343 3344 #define QUEUE_NAME_LEN 32 3345 char namebuf[QUEUE_NAME_LEN]; 3346 3347 /* MAC stats get the own sub node */ 3348 3349 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 3350 CTLFLAG_RD, NULL, "MAC Statistics"); 3351 stat_list = SYSCTL_CHILDREN(stat_node); 3352 3353 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", 3354 CTLFLAG_RD, &stats->crcerrs, 3355 "CRC Errors"); 3356 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", 3357 CTLFLAG_RD, &stats->illerrc, 3358 "Illegal Byte Errors"); 3359 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", 3360 CTLFLAG_RD, &stats->errbc, 3361 "Byte Errors"); 3362 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", 3363 CTLFLAG_RD, &stats->mspdc, 3364 "MAC Short Packets Discarded"); 3365 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", 3366 CTLFLAG_RD, &stats->mlfc, 3367 "MAC Local Faults"); 3368 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", 3369 CTLFLAG_RD, &stats->mrfc, 3370 "MAC Remote Faults"); 3371 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", 3372 CTLFLAG_RD, &stats->rlec, 3373 "Receive Length Errors"); 3374 3375 /* Flow Control stats */ 3376 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", 3377 CTLFLAG_RD, &stats->lxontxc, 3378 "Link XON Transmitted"); 3379 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", 3380 CTLFLAG_RD, &stats->lxonrxc, 3381 "Link XON Received"); 3382 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", 3383 CTLFLAG_RD, &stats->lxofftxc, 3384 "Link XOFF Transmitted"); 3385 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", 3386 CTLFLAG_RD, &stats->lxoffrxc, 3387 "Link XOFF Received"); 3388 3389 /* Packet Reception Stats */ 3390 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", 3391 CTLFLAG_RD, &stats->tor, 3392 "Total Octets Received"); 3393 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", 3394 CTLFLAG_RD, &stats->gorc, 3395 "Good Octets Received"); 3396 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", 3397 CTLFLAG_RD, &stats->tpr, 3398 "Total Packets Received"); 3399 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", 3400 CTLFLAG_RD, &stats->gprc, 3401 "Good Packets Received"); 3402 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", 3403 CTLFLAG_RD, &stats->mprc, 3404 "Multicast Packets Received"); 3405 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", 3406 CTLFLAG_RD, &stats->bprc, 3407 "Broadcast Packets Received"); 3408 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", 3409 CTLFLAG_RD, &stats->prc64, 3410 "64 byte frames received "); 3411 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", 3412 CTLFLAG_RD, &stats->prc127, 3413 "65-127 byte frames received"); 3414 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", 3415 CTLFLAG_RD, &stats->prc255, 3416 "128-255 byte frames received"); 3417 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", 3418 CTLFLAG_RD, &stats->prc511, 3419 "256-511 byte frames received"); 3420 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", 3421 CTLFLAG_RD, &stats->prc1023, 3422 "512-1023 byte frames received"); 3423 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", 3424 CTLFLAG_RD, &stats->prc1522, 3425 "1023-1522 byte frames received"); 3426 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", 3427 CTLFLAG_RD, &stats->ruc, 3428 "Receive Undersized"); 3429 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", 3430 CTLFLAG_RD, &stats->rfc, 3431 "Fragmented Packets Received "); 3432 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", 3433 CTLFLAG_RD, &stats->roc, 3434 "Oversized Packets Received"); 3435 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", 3436 CTLFLAG_RD, &stats->rjc, 3437 "Received Jabber"); 3438 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", 3439 CTLFLAG_RD, &stats->mngprc, 3440 "Management Packets Received"); 3441 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", 3442 CTLFLAG_RD, &stats->mngptc, 3443 "Management Packets Dropped"); 3444 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", 3445 CTLFLAG_RD, &stats->xec, 3446 "Checksum Errors"); 3447 3448 /* Packet Transmission Stats */ 3449 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 3450 CTLFLAG_RD, &stats->gotc, 3451 "Good Octets Transmitted"); 3452 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", 3453 CTLFLAG_RD, &stats->tpt, 3454 "Total Packets Transmitted"); 3455 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", 3456 CTLFLAG_RD, &stats->gptc, 3457 "Good Packets Transmitted"); 3458 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", 3459 CTLFLAG_RD, &stats->bptc, 3460 "Broadcast Packets Transmitted"); 3461 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", 3462 CTLFLAG_RD, &stats->mptc, 3463 "Multicast Packets Transmitted"); 3464 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", 3465 CTLFLAG_RD, &stats->mngptc, 3466 "Management Packets Transmitted"); 3467 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", 3468 CTLFLAG_RD, &stats->ptc64, 3469 "64 byte frames transmitted "); 3470 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", 3471 CTLFLAG_RD, &stats->ptc127, 3472 "65-127 byte frames transmitted"); 3473 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", 3474 CTLFLAG_RD, &stats->ptc255, 3475 "128-255 byte frames transmitted"); 3476 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", 3477 CTLFLAG_RD, &stats->ptc511, 3478 "256-511 byte frames transmitted"); 3479 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", 3480 CTLFLAG_RD, &stats->ptc1023, 3481 "512-1023 byte frames transmitted"); 3482 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", 3483 CTLFLAG_RD, &stats->ptc1522, 3484 "1024-1522 byte frames transmitted"); 3485 } 3486 #endif 3487 3488 /* 3489 * Enable the hardware to drop packets when the buffer is full. 3490 * This is useful when multiple RX rings are used, so that no 3491 * single RX ring being full stalls the entire RX engine. We 3492 * only enable this when multiple RX rings are used and when 3493 * flow control is disabled. 3494 */ 3495 static void 3496 ix_enable_rx_drop(struct ix_softc *sc) 3497 { 3498 struct ixgbe_hw *hw = &sc->hw; 3499 int i; 3500 3501 if (bootverbose) { 3502 if_printf(&sc->arpcom.ac_if, 3503 "flow control %s, enable RX drop\n", 3504 ix_fc2str(sc->hw.fc.current_mode)); 3505 } 3506 3507 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3508 uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); 3509 3510 srrctl |= IXGBE_SRRCTL_DROP_EN; 3511 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); 3512 } 3513 } 3514 3515 static void 3516 ix_disable_rx_drop(struct ix_softc *sc) 3517 { 3518 struct ixgbe_hw *hw = &sc->hw; 3519 int i; 3520 3521 if (bootverbose) { 3522 if_printf(&sc->arpcom.ac_if, 3523 "flow control %s, disable RX drop\n", 3524 ix_fc2str(sc->hw.fc.current_mode)); 3525 } 3526 3527 for (i = 0; i < sc->rx_ring_inuse; ++i) { 3528 uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); 3529 3530 srrctl &= ~IXGBE_SRRCTL_DROP_EN; 3531 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl); 3532 } 3533 } 3534 3535 #ifdef foo 3536 /* XXX not working properly w/ 82599 connected w/ DAC */ 3537 /* XXX only work after the interface is up */ 3538 static int 3539 ix_sysctl_advspeed(SYSCTL_HANDLER_ARGS) 3540 { 3541 struct ix_softc *sc = (struct ix_softc *)arg1; 3542 struct ifnet *ifp = &sc->arpcom.ac_if; 3543 struct ixgbe_hw *hw = &sc->hw; 3544 ixgbe_link_speed speed; 3545 int error, advspeed; 3546 3547 advspeed = sc->advspeed; 3548 error = sysctl_handle_int(oidp, &advspeed, 0, req); 3549 if (error || req->newptr == NULL) 3550 return error; 3551 3552 if (!(hw->phy.media_type == ixgbe_media_type_copper || 3553 hw->phy.multispeed_fiber)) 3554 return EOPNOTSUPP; 3555 if (hw->mac.ops.setup_link == NULL) 3556 return EOPNOTSUPP; 3557 3558 switch (advspeed) { 3559 case 0: /* auto */ 3560 speed = IXGBE_LINK_SPEED_UNKNOWN; 3561 break; 3562 3563 case 1: /* 1Gb */ 3564 speed = IXGBE_LINK_SPEED_1GB_FULL; 3565 break; 3566 3567 case 2: /* 100Mb */ 3568 speed = IXGBE_LINK_SPEED_100_FULL; 3569 break; 3570 3571 case 3: /* 1Gb/10Gb */ 3572 speed = IXGBE_LINK_SPEED_1GB_FULL | 3573 IXGBE_LINK_SPEED_10GB_FULL; 3574 break; 3575 3576 default: 3577 return EINVAL; 3578 } 3579 3580 ifnet_serialize_all(ifp); 3581 3582 if (sc->advspeed == advspeed) /* no change */ 3583 goto done; 3584 3585 if ((speed & IXGBE_LINK_SPEED_100_FULL) && 3586 hw->mac.type != ixgbe_mac_X540) { 3587 error = EOPNOTSUPP; 3588 goto done; 3589 } 3590 3591 sc->advspeed = advspeed; 3592 3593 if ((ifp->if_flags & IFF_RUNNING) == 0) 3594 goto done; 3595 3596 if (speed == IXGBE_LINK_SPEED_UNKNOWN) { 3597 ix_config_link(sc); 3598 } else { 3599 hw->mac.autotry_restart = TRUE; 3600 hw->mac.ops.setup_link(hw, speed, sc->link_up); 3601 } 3602 3603 done: 3604 ifnet_deserialize_all(ifp); 3605 return error; 3606 } 3607 #endif 3608 3609 static void 3610 ix_setup_serialize(struct ix_softc *sc) 3611 { 3612 int i = 0, j; 3613 3614 /* Main + RX + TX */ 3615 sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt; 3616 sc->serializes = 3617 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 3618 M_DEVBUF, M_WAITOK | M_ZERO); 3619 3620 /* 3621 * Setup serializes 3622 * 3623 * NOTE: Order is critical 3624 */ 3625 3626 KKASSERT(i < sc->nserialize); 3627 sc->serializes[i++] = &sc->main_serialize; 3628 3629 for (j = 0; j < sc->rx_ring_cnt; ++j) { 3630 KKASSERT(i < sc->nserialize); 3631 sc->serializes[i++] = &sc->rx_rings[j].rx_serialize; 3632 } 3633 3634 for (j = 0; j < sc->tx_ring_cnt; ++j) { 3635 KKASSERT(i < sc->nserialize); 3636 sc->serializes[i++] = &sc->tx_rings[j].tx_serialize; 3637 } 3638 3639 KKASSERT(i == sc->nserialize); 3640 } 3641 3642 static int 3643 ix_alloc_intr(struct ix_softc *sc) 3644 { 3645 struct ix_intr_data *intr; 3646 u_int intr_flags; 3647 3648 ix_alloc_msix(sc); 3649 if (sc->intr_type == PCI_INTR_TYPE_MSIX) { 3650 ix_set_ring_inuse(sc, FALSE); 3651 return 0; 3652 } 3653 3654 if (sc->intr_data != NULL) 3655 kfree(sc->intr_data, M_DEVBUF); 3656 3657 sc->intr_cnt = 1; 3658 sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF, 3659 M_WAITOK | M_ZERO); 3660 intr = &sc->intr_data[0]; 3661 3662 /* 3663 * Allocate MSI/legacy interrupt resource 3664 */ 3665 sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable, 3666 &intr->intr_rid, &intr_flags); 3667 3668 intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 3669 &intr->intr_rid, intr_flags); 3670 if (intr->intr_res == NULL) { 3671 device_printf(sc->dev, "Unable to allocate bus resource: " 3672 "interrupt\n"); 3673 return ENXIO; 3674 } 3675 3676 intr->intr_serialize = &sc->main_serialize; 3677 intr->intr_cpuid = rman_get_cpuid(intr->intr_res); 3678 intr->intr_func = ix_intr; 3679 intr->intr_funcarg = sc; 3680 intr->intr_rate = IX_INTR_RATE; 3681 intr->intr_use = IX_INTR_USE_RXTX; 3682 3683 sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid; 3684 sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC; 3685 3686 sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC; 3687 3688 ix_set_ring_inuse(sc, FALSE); 3689 3690 KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS); 3691 if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) 3692 sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC; 3693 3694 return 0; 3695 } 3696 3697 static void 3698 ix_free_intr(struct ix_softc *sc) 3699 { 3700 if (sc->intr_data == NULL) 3701 return; 3702 3703 if (sc->intr_type != PCI_INTR_TYPE_MSIX) { 3704 struct ix_intr_data *intr = &sc->intr_data[0]; 3705 3706 KKASSERT(sc->intr_cnt == 1); 3707 if (intr->intr_res != NULL) { 3708 bus_release_resource(sc->dev, SYS_RES_IRQ, 3709 intr->intr_rid, intr->intr_res); 3710 } 3711 if (sc->intr_type == PCI_INTR_TYPE_MSI) 3712 pci_release_msi(sc->dev); 3713 3714 kfree(sc->intr_data, M_DEVBUF); 3715 } else { 3716 ix_free_msix(sc, TRUE); 3717 } 3718 } 3719 3720 static void 3721 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling) 3722 { 3723 sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling); 3724 sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling); 3725 if (bootverbose) { 3726 if_printf(&sc->arpcom.ac_if, 3727 "RX rings %d/%d, TX rings %d/%d\n", 3728 sc->rx_ring_inuse, sc->rx_ring_cnt, 3729 sc->tx_ring_inuse, sc->tx_ring_cnt); 3730 } 3731 } 3732 3733 static int 3734 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling) 3735 { 3736 if (!IX_ENABLE_HWRSS(sc)) 3737 return 1; 3738 3739 if (polling) 3740 return sc->rx_ring_cnt; 3741 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 3742 return IX_MIN_RXRING_RSS; 3743 else 3744 return sc->rx_ring_msix; 3745 } 3746 3747 static int 3748 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling) 3749 { 3750 if (!IX_ENABLE_HWTSS(sc)) 3751 return 1; 3752 3753 if (polling) 3754 return sc->tx_ring_cnt; 3755 else if (sc->intr_type != PCI_INTR_TYPE_MSIX) 3756 return 1; 3757 else 3758 return sc->tx_ring_msix; 3759 } 3760 3761 static int 3762 ix_setup_intr(struct ix_softc *sc) 3763 { 3764 int i; 3765 3766 for (i = 0; i < sc->intr_cnt; ++i) { 3767 struct ix_intr_data *intr = &sc->intr_data[i]; 3768 int error; 3769 3770 error = bus_setup_intr_descr(sc->dev, intr->intr_res, 3771 INTR_MPSAFE, intr->intr_func, intr->intr_funcarg, 3772 &intr->intr_hand, intr->intr_serialize, intr->intr_desc); 3773 if (error) { 3774 device_printf(sc->dev, "can't setup %dth intr\n", i); 3775 ix_teardown_intr(sc, i); 3776 return error; 3777 } 3778 } 3779 return 0; 3780 } 3781 3782 static void 3783 ix_teardown_intr(struct ix_softc *sc, int intr_cnt) 3784 { 3785 int i; 3786 3787 if (sc->intr_data == NULL) 3788 return; 3789 3790 for (i = 0; i < intr_cnt; ++i) { 3791 struct ix_intr_data *intr = &sc->intr_data[i]; 3792 3793 bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand); 3794 } 3795 } 3796 3797 static void 3798 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 3799 { 3800 struct ix_softc *sc = ifp->if_softc; 3801 3802 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 3803 } 3804 3805 static void 3806 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 3807 { 3808 struct ix_softc *sc = ifp->if_softc; 3809 3810 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 3811 } 3812 3813 static int 3814 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 3815 { 3816 struct ix_softc *sc = ifp->if_softc; 3817 3818 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 3819 } 3820 3821 #ifdef INVARIANTS 3822 3823 static void 3824 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 3825 boolean_t serialized) 3826 { 3827 struct ix_softc *sc = ifp->if_softc; 3828 3829 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz, 3830 serialized); 3831 } 3832 3833 #endif /* INVARIANTS */ 3834 3835 static void 3836 ix_free_rings(struct ix_softc *sc) 3837 { 3838 int i; 3839 3840 if (sc->tx_rings != NULL) { 3841 for (i = 0; i < sc->tx_ring_cnt; ++i) { 3842 struct ix_tx_ring *txr = &sc->tx_rings[i]; 3843 3844 ix_destroy_tx_ring(txr, txr->tx_ndesc); 3845 } 3846 kfree(sc->tx_rings, M_DEVBUF); 3847 } 3848 3849 if (sc->rx_rings != NULL) { 3850 for (i =0; i < sc->rx_ring_cnt; ++i) { 3851 struct ix_rx_ring *rxr = &sc->rx_rings[i]; 3852 3853 ix_destroy_rx_ring(rxr, rxr->rx_ndesc); 3854 } 3855 kfree(sc->rx_rings, M_DEVBUF); 3856 } 3857 3858 if (sc->parent_tag != NULL) 3859 bus_dma_tag_destroy(sc->parent_tag); 3860 } 3861 3862 static void 3863 ix_watchdog(struct ifaltq_subque *ifsq) 3864 { 3865 struct ix_tx_ring *txr = ifsq_get_priv(ifsq); 3866 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3867 struct ix_softc *sc = ifp->if_softc; 3868 int i; 3869 3870 KKASSERT(txr->tx_ifsq == ifsq); 3871 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3872 3873 /* 3874 * If the interface has been paused then don't do the watchdog check 3875 */ 3876 if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) { 3877 txr->tx_watchdog.wd_timer = 5; 3878 return; 3879 } 3880 3881 if_printf(ifp, "Watchdog timeout -- resetting\n"); 3882 if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx, 3883 IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)), 3884 IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx))); 3885 if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n", 3886 txr->tx_idx, txr->tx_avail, txr->tx_next_clean); 3887 3888 ix_init(sc); 3889 for (i = 0; i < sc->tx_ring_inuse; ++i) 3890 ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq); 3891 } 3892 3893 static void 3894 ix_free_tx_ring(struct ix_tx_ring *txr) 3895 { 3896 int i; 3897 3898 for (i = 0; i < txr->tx_ndesc; ++i) { 3899 struct ix_tx_buf *txbuf = &txr->tx_buf[i]; 3900 3901 if (txbuf->m_head != NULL) { 3902 bus_dmamap_unload(txr->tx_tag, txbuf->map); 3903 m_freem(txbuf->m_head); 3904 txbuf->m_head = NULL; 3905 } 3906 } 3907 } 3908 3909 static void 3910 ix_free_rx_ring(struct ix_rx_ring *rxr) 3911 { 3912 int i; 3913 3914 for (i = 0; i < rxr->rx_ndesc; ++i) { 3915 struct ix_rx_buf *rxbuf = &rxr->rx_buf[i]; 3916 3917 if (rxbuf->fmp != NULL) { 3918 m_freem(rxbuf->fmp); 3919 rxbuf->fmp = NULL; 3920 rxbuf->lmp = NULL; 3921 } else { 3922 KKASSERT(rxbuf->lmp == NULL); 3923 } 3924 if (rxbuf->m_head != NULL) { 3925 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 3926 m_freem(rxbuf->m_head); 3927 rxbuf->m_head = NULL; 3928 } 3929 } 3930 } 3931 3932 static int 3933 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait) 3934 { 3935 struct mbuf *m; 3936 bus_dma_segment_t seg; 3937 bus_dmamap_t map; 3938 struct ix_rx_buf *rxbuf; 3939 int flags, error, nseg; 3940 3941 flags = M_NOWAIT; 3942 if (__predict_false(wait)) 3943 flags = M_WAITOK; 3944 3945 m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz); 3946 if (m == NULL) { 3947 if (wait) { 3948 if_printf(&rxr->rx_sc->arpcom.ac_if, 3949 "Unable to allocate RX mbuf\n"); 3950 } 3951 return ENOBUFS; 3952 } 3953 m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz; 3954 3955 error = bus_dmamap_load_mbuf_segment(rxr->rx_tag, 3956 rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT); 3957 if (error) { 3958 m_freem(m); 3959 if (wait) { 3960 if_printf(&rxr->rx_sc->arpcom.ac_if, 3961 "Unable to load RX mbuf\n"); 3962 } 3963 return error; 3964 } 3965 3966 rxbuf = &rxr->rx_buf[i]; 3967 if (rxbuf->m_head != NULL) 3968 bus_dmamap_unload(rxr->rx_tag, rxbuf->map); 3969 3970 map = rxbuf->map; 3971 rxbuf->map = rxr->rx_sparemap; 3972 rxr->rx_sparemap = map; 3973 3974 rxbuf->m_head = m; 3975 rxbuf->paddr = seg.ds_addr; 3976 3977 ix_setup_rxdesc(&rxr->rx_base[i], rxbuf); 3978 return 0; 3979 } 3980 3981 static void 3982 ix_add_sysctl(struct ix_softc *sc) 3983 { 3984 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); 3985 struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev); 3986 #ifdef IX_RSS_DEBUG 3987 char node[32]; 3988 int i; 3989 #endif 3990 3991 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 3992 OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings"); 3993 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 3994 OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0, 3995 "# of RX rings used"); 3996 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 3997 OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings"); 3998 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 3999 OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0, 4000 "# of TX rings used"); 4001 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 4002 OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD, 4003 sc, 0, ix_sysctl_rxd, "I", 4004 "# of RX descs"); 4005 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 4006 OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD, 4007 sc, 0, ix_sysctl_txd, "I", 4008 "# of TX descs"); 4009 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 4010 OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 4011 sc, 0, ix_sysctl_tx_wreg_nsegs, "I", 4012 "# of segments sent before write to hardware register"); 4013 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 4014 OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW, 4015 sc, 0, ix_sysctl_rx_wreg_nsegs, "I", 4016 "# of received segments sent before write to hardware register"); 4017 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 4018 OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW, 4019 sc, 0, ix_sysctl_tx_intr_nsegs, "I", 4020 "# of segments per TX interrupt"); 4021 4022 #ifdef IFPOLL_ENABLE 4023 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 4024 OID_AUTO, "npoll_rxoff", CTLTYPE_INT|CTLFLAG_RW, 4025 sc, 0, ix_sysctl_npoll_rxoff, "I", "NPOLLING RX cpu offset"); 4026 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 4027 OID_AUTO, "npoll_txoff", CTLTYPE_INT|CTLFLAG_RW, 4028 sc, 0, ix_sysctl_npoll_txoff, "I", "NPOLLING TX cpu offset"); 4029 #endif 4030 4031 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \ 4032 do { \ 4033 ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \ 4034 ix_sysctl_##name, #use " interrupt rate"); \ 4035 } while (0) 4036 4037 IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate); 4038 IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate); 4039 IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate); 4040 IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate); 4041 4042 #undef IX_ADD_INTR_RATE_SYSCTL 4043 4044 #ifdef IX_RSS_DEBUG 4045 SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), 4046 OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0, 4047 "RSS debug level"); 4048 for (i = 0; i < sc->rx_ring_cnt; ++i) { 4049 ksnprintf(node, sizeof(node), "rx%d_pkt", i); 4050 SYSCTL_ADD_ULONG(ctx, 4051 SYSCTL_CHILDREN(tree), OID_AUTO, node, 4052 CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets"); 4053 } 4054 #endif 4055 4056 #ifdef foo 4057 /* 4058 * Allow a kind of speed control by forcing the autoneg 4059 * advertised speed list to only a certain value, this 4060 * supports 1G on 82599 devices, and 100Mb on X540. 4061 */ 4062 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), 4063 OID_AUTO, "advspeed", CTLTYPE_INT | CTLFLAG_RW, 4064 sc, 0, ix_sysctl_advspeed, "I", 4065 "advertised link speed, " 4066 "0 - auto, 1 - 1Gb, 2 - 100Mb, 3 - 1Gb/10Gb"); 4067 #endif 4068 4069 #if 0 4070 ix_add_hw_stats(sc); 4071 #endif 4072 4073 } 4074 4075 static int 4076 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 4077 { 4078 struct ix_softc *sc = (void *)arg1; 4079 struct ifnet *ifp = &sc->arpcom.ac_if; 4080 int error, nsegs, i; 4081 4082 nsegs = sc->tx_rings[0].tx_wreg_nsegs; 4083 error = sysctl_handle_int(oidp, &nsegs, 0, req); 4084 if (error || req->newptr == NULL) 4085 return error; 4086 if (nsegs < 0) 4087 return EINVAL; 4088 4089 ifnet_serialize_all(ifp); 4090 for (i = 0; i < sc->tx_ring_cnt; ++i) 4091 sc->tx_rings[i].tx_wreg_nsegs = nsegs; 4092 ifnet_deserialize_all(ifp); 4093 4094 return 0; 4095 } 4096 4097 static int 4098 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS) 4099 { 4100 struct ix_softc *sc = (void *)arg1; 4101 struct ifnet *ifp = &sc->arpcom.ac_if; 4102 int error, nsegs, i; 4103 4104 nsegs = sc->rx_rings[0].rx_wreg_nsegs; 4105 error = sysctl_handle_int(oidp, &nsegs, 0, req); 4106 if (error || req->newptr == NULL) 4107 return error; 4108 if (nsegs < 0) 4109 return EINVAL; 4110 4111 ifnet_serialize_all(ifp); 4112 for (i = 0; i < sc->rx_ring_cnt; ++i) 4113 sc->rx_rings[i].rx_wreg_nsegs =nsegs; 4114 ifnet_deserialize_all(ifp); 4115 4116 return 0; 4117 } 4118 4119 static int 4120 ix_sysctl_txd(SYSCTL_HANDLER_ARGS) 4121 { 4122 struct ix_softc *sc = (void *)arg1; 4123 int txd; 4124 4125 txd = sc->tx_rings[0].tx_ndesc; 4126 return sysctl_handle_int(oidp, &txd, 0, req); 4127 } 4128 4129 static int 4130 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS) 4131 { 4132 struct ix_softc *sc = (void *)arg1; 4133 int rxd; 4134 4135 rxd = sc->rx_rings[0].rx_ndesc; 4136 return sysctl_handle_int(oidp, &rxd, 0, req); 4137 } 4138 4139 static int 4140 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS) 4141 { 4142 struct ix_softc *sc = (void *)arg1; 4143 struct ifnet *ifp = &sc->arpcom.ac_if; 4144 struct ix_tx_ring *txr = &sc->tx_rings[0]; 4145 int error, nsegs; 4146 4147 nsegs = txr->tx_intr_nsegs; 4148 error = sysctl_handle_int(oidp, &nsegs, 0, req); 4149 if (error || req->newptr == NULL) 4150 return error; 4151 if (nsegs < 0) 4152 return EINVAL; 4153 4154 ifnet_serialize_all(ifp); 4155 4156 if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) { 4157 error = EINVAL; 4158 } else { 4159 int i; 4160 4161 error = 0; 4162 for (i = 0; i < sc->tx_ring_cnt; ++i) 4163 sc->tx_rings[i].tx_intr_nsegs = nsegs; 4164 } 4165 4166 ifnet_deserialize_all(ifp); 4167 4168 return error; 4169 } 4170 4171 static void 4172 ix_set_eitr(struct ix_softc *sc, int idx, int rate) 4173 { 4174 uint32_t eitr, eitr_intvl; 4175 4176 eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx)); 4177 eitr_intvl = 1000000000 / 256 / rate; 4178 4179 if (sc->hw.mac.type == ixgbe_mac_82598EB) { 4180 eitr &= ~IX_EITR_INTVL_MASK_82598; 4181 if (eitr_intvl == 0) 4182 eitr_intvl = 1; 4183 else if (eitr_intvl > IX_EITR_INTVL_MASK_82598) 4184 eitr_intvl = IX_EITR_INTVL_MASK_82598; 4185 } else { 4186 eitr &= ~IX_EITR_INTVL_MASK; 4187 4188 eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK; 4189 if (eitr_intvl == 0) 4190 eitr_intvl = IX_EITR_INTVL_MIN; 4191 else if (eitr_intvl > IX_EITR_INTVL_MAX) 4192 eitr_intvl = IX_EITR_INTVL_MAX; 4193 } 4194 eitr |= eitr_intvl; 4195 4196 IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr); 4197 } 4198 4199 static int 4200 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS) 4201 { 4202 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX); 4203 } 4204 4205 static int 4206 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS) 4207 { 4208 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX); 4209 } 4210 4211 static int 4212 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS) 4213 { 4214 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX); 4215 } 4216 4217 static int 4218 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS) 4219 { 4220 return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS); 4221 } 4222 4223 static int 4224 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use) 4225 { 4226 struct ix_softc *sc = (void *)arg1; 4227 struct ifnet *ifp = &sc->arpcom.ac_if; 4228 int error, rate, i; 4229 4230 rate = 0; 4231 for (i = 0; i < sc->intr_cnt; ++i) { 4232 if (sc->intr_data[i].intr_use == use) { 4233 rate = sc->intr_data[i].intr_rate; 4234 break; 4235 } 4236 } 4237 4238 error = sysctl_handle_int(oidp, &rate, 0, req); 4239 if (error || req->newptr == NULL) 4240 return error; 4241 if (rate <= 0) 4242 return EINVAL; 4243 4244 ifnet_serialize_all(ifp); 4245 4246 for (i = 0; i < sc->intr_cnt; ++i) { 4247 if (sc->intr_data[i].intr_use == use) { 4248 sc->intr_data[i].intr_rate = rate; 4249 if (ifp->if_flags & IFF_RUNNING) 4250 ix_set_eitr(sc, i, rate); 4251 } 4252 } 4253 4254 ifnet_deserialize_all(ifp); 4255 4256 return error; 4257 } 4258 4259 static void 4260 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use, 4261 const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc) 4262 { 4263 int i; 4264 4265 for (i = 0; i < sc->intr_cnt; ++i) { 4266 if (sc->intr_data[i].intr_use == use) { 4267 SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev), 4268 SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), 4269 OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW, 4270 sc, 0, handler, "I", desc); 4271 break; 4272 } 4273 } 4274 } 4275 4276 static void 4277 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling) 4278 { 4279 if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX) 4280 sc->timer_cpuid = 0; /* XXX fixed */ 4281 else 4282 sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res); 4283 } 4284 4285 static void 4286 ix_alloc_msix(struct ix_softc *sc) 4287 { 4288 int msix_enable, msix_cnt, msix_cnt2, alloc_cnt; 4289 struct ix_intr_data *intr; 4290 int i, x, error; 4291 int offset, offset_def, agg_rxtx, ring_max; 4292 boolean_t aggregate, setup = FALSE; 4293 4294 msix_enable = ix_msix_enable; 4295 /* 4296 * Don't enable MSI-X on 82598 by default, see: 4297 * 82598 specification update errata #38 4298 */ 4299 if (sc->hw.mac.type == ixgbe_mac_82598EB) 4300 msix_enable = 0; 4301 msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable); 4302 if (!msix_enable) 4303 return; 4304 4305 msix_cnt = pci_msix_count(sc->dev); 4306 #ifdef IX_MSIX_DEBUG 4307 msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt); 4308 #endif 4309 if (msix_cnt <= 1) { 4310 /* One MSI-X model does not make sense */ 4311 return; 4312 } 4313 4314 i = 0; 4315 while ((1 << (i + 1)) <= msix_cnt) 4316 ++i; 4317 msix_cnt2 = 1 << i; 4318 4319 if (bootverbose) { 4320 device_printf(sc->dev, "MSI-X count %d/%d\n", 4321 msix_cnt2, msix_cnt); 4322 } 4323 4324 KKASSERT(msix_cnt >= msix_cnt2); 4325 if (msix_cnt == msix_cnt2) { 4326 /* We need at least one MSI-X for link status */ 4327 msix_cnt2 >>= 1; 4328 if (msix_cnt2 <= 1) { 4329 /* One MSI-X for RX/TX does not make sense */ 4330 device_printf(sc->dev, "not enough MSI-X for TX/RX, " 4331 "MSI-X count %d/%d\n", msix_cnt2, msix_cnt); 4332 return; 4333 } 4334 KKASSERT(msix_cnt > msix_cnt2); 4335 4336 if (bootverbose) { 4337 device_printf(sc->dev, "MSI-X count eq fixup %d/%d\n", 4338 msix_cnt2, msix_cnt); 4339 } 4340 } 4341 4342 /* 4343 * Make sure that we don't break interrupt related registers 4344 * (EIMS, etc) limitation. 4345 * 4346 * NOTE: msix_cnt > msix_cnt2, when we reach here 4347 */ 4348 if (sc->hw.mac.type == ixgbe_mac_82598EB) { 4349 if (msix_cnt2 > IX_MAX_MSIX_82598) 4350 msix_cnt2 = IX_MAX_MSIX_82598; 4351 } else { 4352 if (msix_cnt2 > IX_MAX_MSIX) 4353 msix_cnt2 = IX_MAX_MSIX; 4354 } 4355 msix_cnt = msix_cnt2 + 1; /* +1 for status */ 4356 4357 if (bootverbose) { 4358 device_printf(sc->dev, "MSI-X count max fixup %d/%d\n", 4359 msix_cnt2, msix_cnt); 4360 } 4361 4362 sc->rx_ring_msix = sc->rx_ring_cnt; 4363 if (sc->rx_ring_msix > msix_cnt2) 4364 sc->rx_ring_msix = msix_cnt2; 4365 4366 sc->tx_ring_msix = sc->tx_ring_cnt; 4367 if (sc->tx_ring_msix > msix_cnt2) 4368 sc->tx_ring_msix = msix_cnt2; 4369 4370 ring_max = sc->rx_ring_msix; 4371 if (ring_max < sc->tx_ring_msix) 4372 ring_max = sc->tx_ring_msix; 4373 4374 /* Allow user to force independent RX/TX MSI-X handling */ 4375 agg_rxtx = device_getenv_int(sc->dev, "msix.agg_rxtx", 4376 ix_msix_agg_rxtx); 4377 4378 if (!agg_rxtx && msix_cnt >= sc->tx_ring_msix + sc->rx_ring_msix + 1) { 4379 /* 4380 * Independent TX/RX MSI-X 4381 */ 4382 aggregate = FALSE; 4383 if (bootverbose) 4384 device_printf(sc->dev, "independent TX/RX MSI-X\n"); 4385 alloc_cnt = sc->tx_ring_msix + sc->rx_ring_msix; 4386 } else { 4387 /* 4388 * Aggregate TX/RX MSI-X 4389 */ 4390 aggregate = TRUE; 4391 if (bootverbose) 4392 device_printf(sc->dev, "aggregate TX/RX MSI-X\n"); 4393 alloc_cnt = msix_cnt2; 4394 if (alloc_cnt > ring_max) 4395 alloc_cnt = ring_max; 4396 KKASSERT(alloc_cnt >= sc->rx_ring_msix && 4397 alloc_cnt >= sc->tx_ring_msix); 4398 } 4399 ++alloc_cnt; /* For status */ 4400 4401 if (bootverbose) { 4402 device_printf(sc->dev, "MSI-X alloc %d, " 4403 "RX ring %d, TX ring %d\n", alloc_cnt, 4404 sc->rx_ring_msix, sc->tx_ring_msix); 4405 } 4406 4407 sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598); 4408 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4409 &sc->msix_mem_rid, RF_ACTIVE); 4410 if (sc->msix_mem_res == NULL) { 4411 sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599); 4412 sc->msix_mem_res = bus_alloc_resource_any(sc->dev, 4413 SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE); 4414 if (sc->msix_mem_res == NULL) { 4415 device_printf(sc->dev, "Unable to map MSI-X table\n"); 4416 return; 4417 } 4418 } 4419 4420 sc->intr_cnt = alloc_cnt; 4421 sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt, 4422 M_DEVBUF, M_WAITOK | M_ZERO); 4423 for (x = 0; x < sc->intr_cnt; ++x) { 4424 intr = &sc->intr_data[x]; 4425 intr->intr_rid = -1; 4426 intr->intr_rate = IX_INTR_RATE; 4427 } 4428 4429 x = 0; 4430 if (!aggregate) { 4431 /* 4432 * RX rings 4433 */ 4434 if (sc->rx_ring_msix == ncpus2) { 4435 offset = 0; 4436 } else { 4437 offset_def = (sc->rx_ring_msix * 4438 device_get_unit(sc->dev)) % ncpus2; 4439 4440 offset = device_getenv_int(sc->dev, 4441 "msix.rxoff", offset_def); 4442 if (offset >= ncpus2 || 4443 offset % sc->rx_ring_msix != 0) { 4444 device_printf(sc->dev, 4445 "invalid msix.rxoff %d, use %d\n", 4446 offset, offset_def); 4447 offset = offset_def; 4448 } 4449 } 4450 ix_conf_rx_msix(sc, 0, &x, offset); 4451 4452 /* 4453 * TX rings 4454 */ 4455 if (sc->tx_ring_msix == ncpus2) { 4456 offset = 0; 4457 } else { 4458 offset_def = (sc->tx_ring_msix * 4459 device_get_unit(sc->dev)) % ncpus2; 4460 4461 offset = device_getenv_int(sc->dev, 4462 "msix.txoff", offset_def); 4463 if (offset >= ncpus2 || 4464 offset % sc->tx_ring_msix != 0) { 4465 device_printf(sc->dev, 4466 "invalid msix.txoff %d, use %d\n", 4467 offset, offset_def); 4468 offset = offset_def; 4469 } 4470 } 4471 ix_conf_tx_msix(sc, 0, &x, offset); 4472 } else { 4473 int ring_agg; 4474 4475 ring_agg = sc->rx_ring_msix; 4476 if (ring_agg > sc->tx_ring_msix) 4477 ring_agg = sc->tx_ring_msix; 4478 4479 if (ring_max == ncpus2) { 4480 offset = 0; 4481 } else { 4482 offset_def = (ring_max * device_get_unit(sc->dev)) % 4483 ncpus2; 4484 4485 offset = device_getenv_int(sc->dev, "msix.off", 4486 offset_def); 4487 if (offset >= ncpus2 || offset % ring_max != 0) { 4488 device_printf(sc->dev, 4489 "invalid msix.off %d, use %d\n", 4490 offset, offset_def); 4491 offset = offset_def; 4492 } 4493 } 4494 4495 for (i = 0; i < ring_agg; ++i) { 4496 struct ix_tx_ring *txr = &sc->tx_rings[i]; 4497 struct ix_rx_ring *rxr = &sc->rx_rings[i]; 4498 4499 KKASSERT(x < sc->intr_cnt); 4500 rxr->rx_intr_vec = x; 4501 ix_setup_msix_eims(sc, x, 4502 &rxr->rx_eims, &rxr->rx_eims_val); 4503 rxr->rx_txr = txr; 4504 /* NOTE: Leave TX ring's intr_vec negative */ 4505 4506 intr = &sc->intr_data[x++]; 4507 4508 intr->intr_serialize = &rxr->rx_serialize; 4509 intr->intr_func = ix_msix_rxtx; 4510 intr->intr_funcarg = rxr; 4511 intr->intr_use = IX_INTR_USE_RXTX; 4512 4513 intr->intr_cpuid = i + offset; 4514 KKASSERT(intr->intr_cpuid < ncpus2); 4515 txr->tx_intr_cpuid = intr->intr_cpuid; 4516 4517 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), 4518 "%s rxtx%d", device_get_nameunit(sc->dev), i); 4519 intr->intr_desc = intr->intr_desc0; 4520 } 4521 4522 if (ring_agg != ring_max) { 4523 if (ring_max == sc->tx_ring_msix) 4524 ix_conf_tx_msix(sc, i, &x, offset); 4525 else 4526 ix_conf_rx_msix(sc, i, &x, offset); 4527 } 4528 } 4529 4530 /* 4531 * Status MSI-X 4532 */ 4533 KKASSERT(x < sc->intr_cnt); 4534 sc->sts_msix_vec = x; 4535 4536 intr = &sc->intr_data[x++]; 4537 4538 intr->intr_serialize = &sc->main_serialize; 4539 intr->intr_func = ix_msix_status; 4540 intr->intr_funcarg = sc; 4541 intr->intr_cpuid = 0; 4542 intr->intr_use = IX_INTR_USE_STATUS; 4543 4544 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts", 4545 device_get_nameunit(sc->dev)); 4546 intr->intr_desc = intr->intr_desc0; 4547 4548 KKASSERT(x == sc->intr_cnt); 4549 4550 error = pci_setup_msix(sc->dev); 4551 if (error) { 4552 device_printf(sc->dev, "Setup MSI-X failed\n"); 4553 goto back; 4554 } 4555 setup = TRUE; 4556 4557 for (i = 0; i < sc->intr_cnt; ++i) { 4558 intr = &sc->intr_data[i]; 4559 4560 error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid, 4561 intr->intr_cpuid); 4562 if (error) { 4563 device_printf(sc->dev, 4564 "Unable to allocate MSI-X %d on cpu%d\n", i, 4565 intr->intr_cpuid); 4566 goto back; 4567 } 4568 4569 intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 4570 &intr->intr_rid, RF_ACTIVE); 4571 if (intr->intr_res == NULL) { 4572 device_printf(sc->dev, 4573 "Unable to allocate MSI-X %d resource\n", i); 4574 error = ENOMEM; 4575 goto back; 4576 } 4577 } 4578 4579 pci_enable_msix(sc->dev); 4580 sc->intr_type = PCI_INTR_TYPE_MSIX; 4581 back: 4582 if (error) 4583 ix_free_msix(sc, setup); 4584 } 4585 4586 static void 4587 ix_free_msix(struct ix_softc *sc, boolean_t setup) 4588 { 4589 int i; 4590 4591 KKASSERT(sc->intr_cnt > 1); 4592 4593 for (i = 0; i < sc->intr_cnt; ++i) { 4594 struct ix_intr_data *intr = &sc->intr_data[i]; 4595 4596 if (intr->intr_res != NULL) { 4597 bus_release_resource(sc->dev, SYS_RES_IRQ, 4598 intr->intr_rid, intr->intr_res); 4599 } 4600 if (intr->intr_rid >= 0) 4601 pci_release_msix_vector(sc->dev, intr->intr_rid); 4602 } 4603 if (setup) 4604 pci_teardown_msix(sc->dev); 4605 4606 sc->intr_cnt = 0; 4607 kfree(sc->intr_data, M_DEVBUF); 4608 sc->intr_data = NULL; 4609 } 4610 4611 static void 4612 ix_conf_rx_msix(struct ix_softc *sc, int i, int *x0, int offset) 4613 { 4614 int x = *x0; 4615 4616 for (; i < sc->rx_ring_msix; ++i) { 4617 struct ix_rx_ring *rxr = &sc->rx_rings[i]; 4618 struct ix_intr_data *intr; 4619 4620 KKASSERT(x < sc->intr_cnt); 4621 rxr->rx_intr_vec = x; 4622 ix_setup_msix_eims(sc, x, &rxr->rx_eims, &rxr->rx_eims_val); 4623 4624 intr = &sc->intr_data[x++]; 4625 4626 intr->intr_serialize = &rxr->rx_serialize; 4627 intr->intr_func = ix_msix_rx; 4628 intr->intr_funcarg = rxr; 4629 intr->intr_rate = IX_MSIX_RX_RATE; 4630 intr->intr_use = IX_INTR_USE_RX; 4631 4632 intr->intr_cpuid = i + offset; 4633 KKASSERT(intr->intr_cpuid < ncpus2); 4634 4635 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s rx%d", 4636 device_get_nameunit(sc->dev), i); 4637 intr->intr_desc = intr->intr_desc0; 4638 } 4639 *x0 = x; 4640 } 4641 4642 static void 4643 ix_conf_tx_msix(struct ix_softc *sc, int i, int *x0, int offset) 4644 { 4645 int x = *x0; 4646 4647 for (; i < sc->tx_ring_msix; ++i) { 4648 struct ix_tx_ring *txr = &sc->tx_rings[i]; 4649 struct ix_intr_data *intr; 4650 4651 KKASSERT(x < sc->intr_cnt); 4652 txr->tx_intr_vec = x; 4653 ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val); 4654 4655 intr = &sc->intr_data[x++]; 4656 4657 intr->intr_serialize = &txr->tx_serialize; 4658 intr->intr_func = ix_msix_tx; 4659 intr->intr_funcarg = txr; 4660 intr->intr_rate = IX_MSIX_TX_RATE; 4661 intr->intr_use = IX_INTR_USE_TX; 4662 4663 intr->intr_cpuid = i + offset; 4664 KKASSERT(intr->intr_cpuid < ncpus2); 4665 txr->tx_intr_cpuid = intr->intr_cpuid; 4666 4667 ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d", 4668 device_get_nameunit(sc->dev), i); 4669 intr->intr_desc = intr->intr_desc0; 4670 } 4671 *x0 = x; 4672 } 4673 4674 static void 4675 ix_msix_rx(void *xrxr) 4676 { 4677 struct ix_rx_ring *rxr = xrxr; 4678 4679 ASSERT_SERIALIZED(&rxr->rx_serialize); 4680 4681 ix_rxeof(rxr, -1); 4682 IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val); 4683 } 4684 4685 static void 4686 ix_msix_tx(void *xtxr) 4687 { 4688 struct ix_tx_ring *txr = xtxr; 4689 4690 ASSERT_SERIALIZED(&txr->tx_serialize); 4691 4692 ix_txeof(txr, *(txr->tx_hdr)); 4693 if (!ifsq_is_empty(txr->tx_ifsq)) 4694 ifsq_devstart(txr->tx_ifsq); 4695 IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val); 4696 } 4697 4698 static void 4699 ix_msix_rxtx(void *xrxr) 4700 { 4701 struct ix_rx_ring *rxr = xrxr; 4702 struct ix_tx_ring *txr; 4703 int hdr; 4704 4705 ASSERT_SERIALIZED(&rxr->rx_serialize); 4706 4707 ix_rxeof(rxr, -1); 4708 4709 /* 4710 * NOTE: 4711 * Since tx_next_clean is only changed by ix_txeof(), 4712 * which is called only in interrupt handler, the 4713 * check w/o holding tx serializer is MPSAFE. 4714 */ 4715 txr = rxr->rx_txr; 4716 hdr = *(txr->tx_hdr); 4717 if (hdr != txr->tx_next_clean) { 4718 lwkt_serialize_enter(&txr->tx_serialize); 4719 ix_txeof(txr, hdr); 4720 if (!ifsq_is_empty(txr->tx_ifsq)) 4721 ifsq_devstart(txr->tx_ifsq); 4722 lwkt_serialize_exit(&txr->tx_serialize); 4723 } 4724 4725 IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val); 4726 } 4727 4728 static void 4729 ix_intr_status(struct ix_softc *sc, uint32_t eicr) 4730 { 4731 struct ixgbe_hw *hw = &sc->hw; 4732 4733 /* Link status change */ 4734 if (eicr & IXGBE_EICR_LSC) 4735 ix_handle_link(sc); 4736 4737 if (hw->mac.type != ixgbe_mac_82598EB) { 4738 if (eicr & IXGBE_EICR_ECC) 4739 if_printf(&sc->arpcom.ac_if, "ECC ERROR!! Reboot!!\n"); 4740 else if (eicr & IXGBE_EICR_GPI_SDP1) 4741 ix_handle_msf(sc); 4742 else if (eicr & IXGBE_EICR_GPI_SDP2) 4743 ix_handle_mod(sc); 4744 } 4745 4746 /* Check for fan failure */ 4747 if (hw->device_id == IXGBE_DEV_ID_82598AT && 4748 (eicr & IXGBE_EICR_GPI_SDP1)) 4749 if_printf(&sc->arpcom.ac_if, "FAN FAILURE!! Replace!!\n"); 4750 4751 /* Check for over temp condition */ 4752 if (hw->mac.type == ixgbe_mac_X540 && (eicr & IXGBE_EICR_TS)) { 4753 if_printf(&sc->arpcom.ac_if, "OVER TEMP!! " 4754 "PHY IS SHUT DOWN!! Reboot\n"); 4755 } 4756 } 4757 4758 static void 4759 ix_msix_status(void *xsc) 4760 { 4761 struct ix_softc *sc = xsc; 4762 uint32_t eicr; 4763 4764 ASSERT_SERIALIZED(&sc->main_serialize); 4765 4766 eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR); 4767 ix_intr_status(sc, eicr); 4768 4769 IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask); 4770 } 4771 4772 static void 4773 ix_setup_msix_eims(const struct ix_softc *sc, int x, 4774 uint32_t *eims, uint32_t *eims_val) 4775 { 4776 if (x < 32) { 4777 if (sc->hw.mac.type == ixgbe_mac_82598EB) { 4778 KASSERT(x < IX_MAX_MSIX_82598, 4779 ("%s: invalid vector %d for 82598", 4780 device_get_nameunit(sc->dev), x)); 4781 *eims = IXGBE_EIMS; 4782 } else { 4783 *eims = IXGBE_EIMS_EX(0); 4784 } 4785 *eims_val = 1 << x; 4786 } else { 4787 KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d", 4788 device_get_nameunit(sc->dev), x)); 4789 KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB, 4790 ("%s: invalid vector %d for 82598", 4791 device_get_nameunit(sc->dev), x)); 4792 *eims = IXGBE_EIMS_EX(1); 4793 *eims_val = 1 << (x - 32); 4794 } 4795 } 4796 4797 #ifdef IFPOLL_ENABLE 4798 4799 static void 4800 ix_npoll_status(struct ifnet *ifp) 4801 { 4802 struct ix_softc *sc = ifp->if_softc; 4803 uint32_t eicr; 4804 4805 ASSERT_SERIALIZED(&sc->main_serialize); 4806 4807 eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR); 4808 ix_intr_status(sc, eicr); 4809 } 4810 4811 static void 4812 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused) 4813 { 4814 struct ix_tx_ring *txr = arg; 4815 4816 ASSERT_SERIALIZED(&txr->tx_serialize); 4817 4818 ix_txeof(txr, *(txr->tx_hdr)); 4819 if (!ifsq_is_empty(txr->tx_ifsq)) 4820 ifsq_devstart(txr->tx_ifsq); 4821 } 4822 4823 static void 4824 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle) 4825 { 4826 struct ix_rx_ring *rxr = arg; 4827 4828 ASSERT_SERIALIZED(&rxr->rx_serialize); 4829 4830 ix_rxeof(rxr, cycle); 4831 } 4832 4833 static void 4834 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info) 4835 { 4836 struct ix_softc *sc = ifp->if_softc; 4837 int i, txr_cnt, rxr_cnt; 4838 4839 ASSERT_IFNET_SERIALIZED_ALL(ifp); 4840 4841 if (info) { 4842 int off; 4843 4844 info->ifpi_status.status_func = ix_npoll_status; 4845 info->ifpi_status.serializer = &sc->main_serialize; 4846 4847 txr_cnt = ix_get_txring_inuse(sc, TRUE); 4848 off = sc->tx_npoll_off; 4849 for (i = 0; i < txr_cnt; ++i) { 4850 struct ix_tx_ring *txr = &sc->tx_rings[i]; 4851 int idx = i + off; 4852 4853 KKASSERT(idx < ncpus2); 4854 info->ifpi_tx[idx].poll_func = ix_npoll_tx; 4855 info->ifpi_tx[idx].arg = txr; 4856 info->ifpi_tx[idx].serializer = &txr->tx_serialize; 4857 ifsq_set_cpuid(txr->tx_ifsq, idx); 4858 } 4859 4860 rxr_cnt = ix_get_rxring_inuse(sc, TRUE); 4861 off = sc->rx_npoll_off; 4862 for (i = 0; i < rxr_cnt; ++i) { 4863 struct ix_rx_ring *rxr = &sc->rx_rings[i]; 4864 int idx = i + off; 4865 4866 KKASSERT(idx < ncpus2); 4867 info->ifpi_rx[idx].poll_func = ix_npoll_rx; 4868 info->ifpi_rx[idx].arg = rxr; 4869 info->ifpi_rx[idx].serializer = &rxr->rx_serialize; 4870 } 4871 4872 if (ifp->if_flags & IFF_RUNNING) { 4873 if (rxr_cnt == sc->rx_ring_inuse && 4874 txr_cnt == sc->tx_ring_inuse) { 4875 ix_set_timer_cpuid(sc, TRUE); 4876 ix_disable_intr(sc); 4877 } else { 4878 ix_init(sc); 4879 } 4880 } 4881 } else { 4882 for (i = 0; i < sc->tx_ring_cnt; ++i) { 4883 struct ix_tx_ring *txr = &sc->tx_rings[i]; 4884 4885 ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid); 4886 } 4887 4888 if (ifp->if_flags & IFF_RUNNING) { 4889 txr_cnt = ix_get_txring_inuse(sc, FALSE); 4890 rxr_cnt = ix_get_rxring_inuse(sc, FALSE); 4891 4892 if (rxr_cnt == sc->rx_ring_inuse && 4893 txr_cnt == sc->tx_ring_inuse) { 4894 ix_set_timer_cpuid(sc, FALSE); 4895 ix_enable_intr(sc); 4896 } else { 4897 ix_init(sc); 4898 } 4899 } 4900 } 4901 } 4902 4903 static int 4904 ix_sysctl_npoll_rxoff(SYSCTL_HANDLER_ARGS) 4905 { 4906 struct ix_softc *sc = (void *)arg1; 4907 struct ifnet *ifp = &sc->arpcom.ac_if; 4908 int error, off; 4909 4910 off = sc->rx_npoll_off; 4911 error = sysctl_handle_int(oidp, &off, 0, req); 4912 if (error || req->newptr == NULL) 4913 return error; 4914 if (off < 0) 4915 return EINVAL; 4916 4917 ifnet_serialize_all(ifp); 4918 if (off >= ncpus2 || off % sc->rx_ring_cnt != 0) { 4919 error = EINVAL; 4920 } else { 4921 error = 0; 4922 sc->rx_npoll_off = off; 4923 } 4924 ifnet_deserialize_all(ifp); 4925 4926 return error; 4927 } 4928 4929 static int 4930 ix_sysctl_npoll_txoff(SYSCTL_HANDLER_ARGS) 4931 { 4932 struct ix_softc *sc = (void *)arg1; 4933 struct ifnet *ifp = &sc->arpcom.ac_if; 4934 int error, off; 4935 4936 off = sc->tx_npoll_off; 4937 error = sysctl_handle_int(oidp, &off, 0, req); 4938 if (error || req->newptr == NULL) 4939 return error; 4940 if (off < 0) 4941 return EINVAL; 4942 4943 ifnet_serialize_all(ifp); 4944 if (off >= ncpus2 || off % sc->tx_ring_cnt != 0) { 4945 error = EINVAL; 4946 } else { 4947 error = 0; 4948 sc->tx_npoll_off = off; 4949 } 4950 ifnet_deserialize_all(ifp); 4951 4952 return error; 4953 } 4954 4955 #endif /* IFPOLL_ENABLE */ 4956 4957 static enum ixgbe_fc_mode 4958 ix_ifmedia2fc(int ifm) 4959 { 4960 int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); 4961 4962 switch (fc_opt) { 4963 case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE): 4964 return ixgbe_fc_full; 4965 4966 case IFM_ETH_RXPAUSE: 4967 return ixgbe_fc_rx_pause; 4968 4969 case IFM_ETH_TXPAUSE: 4970 return ixgbe_fc_tx_pause; 4971 4972 default: 4973 return ixgbe_fc_none; 4974 } 4975 } 4976 4977 static const char * 4978 ix_ifmedia2str(int ifm) 4979 { 4980 int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); 4981 4982 switch (fc_opt) { 4983 case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE): 4984 return IFM_ETH_FC_FULL; 4985 4986 case IFM_ETH_RXPAUSE: 4987 return IFM_ETH_FC_RXPAUSE; 4988 4989 case IFM_ETH_TXPAUSE: 4990 return IFM_ETH_FC_TXPAUSE; 4991 4992 default: 4993 return IFM_ETH_FC_NONE; 4994 } 4995 } 4996 4997 static const char * 4998 ix_fc2str(enum ixgbe_fc_mode fc) 4999 { 5000 switch (fc) { 5001 case ixgbe_fc_full: 5002 return IFM_ETH_FC_FULL; 5003 5004 case ixgbe_fc_rx_pause: 5005 return IFM_ETH_FC_RXPAUSE; 5006 5007 case ixgbe_fc_tx_pause: 5008 return IFM_ETH_FC_TXPAUSE; 5009 5010 default: 5011 return IFM_ETH_FC_NONE; 5012 } 5013 } 5014