1 /****************************************************************************** 2 3 Copyright (c) 2006-2013, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $ 29 30 ***************************************************************************/ 31 32 #include "opt_inet.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/linker.h> 37 #include <sys/firmware.h> 38 #include <sys/endian.h> 39 #include <sys/in_cksum.h> 40 #include <sys/sockio.h> 41 #include <sys/mbuf.h> 42 #include <sys/malloc.h> 43 #include <sys/kernel.h> 44 #include <sys/module.h> 45 #include <sys/serialize.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 49 #include <net/if.h> 50 #include <net/if_arp.h> 51 #include <net/ifq_var.h> 52 #include <net/ethernet.h> 53 #include <net/if_dl.h> 54 #include <net/if_media.h> 55 56 #include <net/bpf.h> 57 58 #include <net/if_types.h> 59 #include <net/vlan/if_vlan_var.h> 60 #include <net/zlib.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <sys/bus.h> 68 #include <sys/rman.h> 69 70 #include <bus/pci/pcireg.h> 71 #include <bus/pci/pcivar.h> 72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */ 73 74 #include <vm/vm.h> /* for pmap_mapdev() */ 75 #include <vm/pmap.h> 76 77 #if defined(__i386__) || defined(__x86_64__) 78 #include <machine/specialreg.h> 79 #endif 80 81 #include <dev/netif/mxge/mxge_mcp.h> 82 #include <dev/netif/mxge/mcp_gen_header.h> 83 #include <dev/netif/mxge/if_mxge_var.h> 84 85 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD) 86 87 /* tunable params */ 88 static int mxge_nvidia_ecrc_enable = 1; 89 static int mxge_force_firmware = 0; 90 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY; 91 static int mxge_deassert_wait = 1; 92 static int mxge_flow_control = 1; 93 static int mxge_ticks; 94 static int mxge_max_slices = 1; 95 static int mxge_always_promisc = 0; 96 static int mxge_throttle = 0; 97 static int mxge_msi_enable = 1; 98 99 static const char *mxge_fw_unaligned = "mxge_ethp_z8e"; 100 static const char *mxge_fw_aligned = "mxge_eth_z8e"; 101 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 102 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 103 104 TUNABLE_INT("hw.mxge.max_slices", &mxge_max_slices); 105 TUNABLE_INT("hw.mxge.flow_control_enabled", &mxge_flow_control); 106 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay); 107 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable); 108 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware); 109 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait); 110 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks); 111 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc); 112 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle); 113 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable); 114 115 static int mxge_probe(device_t dev); 116 static int mxge_attach(device_t dev); 117 static int mxge_detach(device_t dev); 118 static int mxge_shutdown(device_t dev); 119 120 static device_method_t mxge_methods[] = { 121 /* Device interface */ 122 DEVMETHOD(device_probe, mxge_probe), 123 DEVMETHOD(device_attach, mxge_attach), 124 DEVMETHOD(device_detach, mxge_detach), 125 DEVMETHOD(device_shutdown, mxge_shutdown), 126 DEVMETHOD_END 127 }; 128 129 static driver_t mxge_driver = { 130 "mxge", 131 mxge_methods, 132 sizeof(mxge_softc_t), 133 }; 134 135 static devclass_t mxge_devclass; 136 137 /* Declare ourselves to be a child of the PCI bus.*/ 138 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL); 139 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 140 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 141 142 static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 143 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 144 static void mxge_close(mxge_softc_t *sc, int down); 145 static int mxge_open(mxge_softc_t *sc); 146 static void mxge_tick(void *arg); 147 static void mxge_watchdog_reset(mxge_softc_t *sc); 148 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice); 149 150 static int 151 mxge_probe(device_t dev) 152 { 153 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM && 154 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E || 155 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) { 156 int rev = pci_get_revid(dev); 157 158 switch (rev) { 159 case MXGE_PCI_REV_Z8E: 160 device_set_desc(dev, "Myri10G-PCIE-8A"); 161 break; 162 case MXGE_PCI_REV_Z8ES: 163 device_set_desc(dev, "Myri10G-PCIE-8B"); 164 break; 165 default: 166 device_set_desc(dev, "Myri10G-PCIE-8??"); 167 device_printf(dev, "Unrecognized rev %d NIC\n", rev); 168 break; 169 } 170 return 0; 171 } 172 return ENXIO; 173 } 174 175 static void 176 mxge_enable_wc(mxge_softc_t *sc) 177 { 178 #if defined(__i386__) || defined(__x86_64__) 179 vm_offset_t len; 180 181 sc->wc = 1; 182 len = rman_get_size(sc->mem_res); 183 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE, 184 PAT_WRITE_COMBINING); 185 #endif 186 } 187 188 static int 189 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes, 190 bus_size_t alignment) 191 { 192 bus_size_t boundary; 193 int err; 194 195 if (bytes > 4096 && alignment == 4096) 196 boundary = 0; 197 else 198 boundary = 4096; 199 200 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary, 201 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes, 202 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma); 203 if (err != 0) { 204 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err); 205 return err; 206 } 207 return 0; 208 } 209 210 static void 211 mxge_dma_free(bus_dmamem_t *dma) 212 { 213 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map); 214 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map); 215 bus_dma_tag_destroy(dma->dmem_tag); 216 } 217 218 /* 219 * The eeprom strings on the lanaiX have the format 220 * SN=x\0 221 * MAC=x:x:x:x:x:x\0 222 * PC=text\0 223 */ 224 static int 225 mxge_parse_strings(mxge_softc_t *sc) 226 { 227 const char *ptr; 228 int i, found_mac, found_sn2; 229 char *endptr; 230 231 ptr = sc->eeprom_strings; 232 found_mac = 0; 233 found_sn2 = 0; 234 while (*ptr != '\0') { 235 if (strncmp(ptr, "MAC=", 4) == 0) { 236 ptr += 4; 237 for (i = 0;;) { 238 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 239 if (endptr - ptr != 2) 240 goto abort; 241 ptr = endptr; 242 if (++i == 6) 243 break; 244 if (*ptr++ != ':') 245 goto abort; 246 } 247 found_mac = 1; 248 } else if (strncmp(ptr, "PC=", 3) == 0) { 249 ptr += 3; 250 strlcpy(sc->product_code_string, ptr, 251 sizeof(sc->product_code_string)); 252 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 253 ptr += 3; 254 strlcpy(sc->serial_number_string, ptr, 255 sizeof(sc->serial_number_string)); 256 } else if (strncmp(ptr, "SN2=", 4) == 0) { 257 /* SN2 takes precedence over SN */ 258 ptr += 4; 259 found_sn2 = 1; 260 strlcpy(sc->serial_number_string, ptr, 261 sizeof(sc->serial_number_string)); 262 } 263 while (*ptr++ != '\0') {} 264 } 265 266 if (found_mac) 267 return 0; 268 269 abort: 270 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 271 return ENXIO; 272 } 273 274 #if defined(__i386__) || defined(__x86_64__) 275 276 static void 277 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 278 { 279 uint32_t val; 280 unsigned long base, off; 281 char *va, *cfgptr; 282 device_t pdev, mcp55; 283 uint16_t vendor_id, device_id, word; 284 uintptr_t bus, slot, func, ivend, idev; 285 uint32_t *ptr32; 286 287 if (!mxge_nvidia_ecrc_enable) 288 return; 289 290 pdev = device_get_parent(device_get_parent(sc->dev)); 291 if (pdev == NULL) { 292 device_printf(sc->dev, "could not find parent?\n"); 293 return; 294 } 295 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 296 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 297 298 if (vendor_id != 0x10de) 299 return; 300 301 base = 0; 302 303 if (device_id == 0x005d) { 304 /* ck804, base address is magic */ 305 base = 0xe0000000UL; 306 } else if (device_id >= 0x0374 && device_id <= 0x378) { 307 /* mcp55, base address stored in chipset */ 308 mcp55 = pci_find_bsf(0, 0, 0); 309 if (mcp55 && 310 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 311 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 312 word = pci_read_config(mcp55, 0x90, 2); 313 base = ((unsigned long)word & 0x7ffeU) << 25; 314 } 315 } 316 if (!base) 317 return; 318 319 /* 320 * XXXX 321 * Test below is commented because it is believed that doing 322 * config read/write beyond 0xff will access the config space 323 * for the next larger function. Uncomment this and remove 324 * the hacky pmap_mapdev() way of accessing config space when 325 * DragonFly grows support for extended pcie config space access. 326 */ 327 #if 0 328 /* 329 * See if we can, by some miracle, access the extended 330 * config space 331 */ 332 val = pci_read_config(pdev, 0x178, 4); 333 if (val != 0xffffffff) { 334 val |= 0x40; 335 pci_write_config(pdev, 0x178, val, 4); 336 return; 337 } 338 #endif 339 /* 340 * Rather than using normal pci config space writes, we must 341 * map the Nvidia config space ourselves. This is because on 342 * opteron/nvidia class machine the 0xe000000 mapping is 343 * handled by the nvidia chipset, that means the internal PCI 344 * device (the on-chip northbridge), or the amd-8131 bridge 345 * and things behind them are not visible by this method. 346 */ 347 348 BUS_READ_IVAR(device_get_parent(pdev), pdev, 349 PCI_IVAR_BUS, &bus); 350 BUS_READ_IVAR(device_get_parent(pdev), pdev, 351 PCI_IVAR_SLOT, &slot); 352 BUS_READ_IVAR(device_get_parent(pdev), pdev, 353 PCI_IVAR_FUNCTION, &func); 354 BUS_READ_IVAR(device_get_parent(pdev), pdev, 355 PCI_IVAR_VENDOR, &ivend); 356 BUS_READ_IVAR(device_get_parent(pdev), pdev, 357 PCI_IVAR_DEVICE, &idev); 358 359 off = base + 0x00100000UL * (unsigned long)bus + 360 0x00001000UL * (unsigned long)(func + 8 * slot); 361 362 /* map it into the kernel */ 363 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 364 if (va == NULL) { 365 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 366 return; 367 } 368 /* get a pointer to the config space mapped into the kernel */ 369 cfgptr = va + (off & PAGE_MASK); 370 371 /* make sure that we can really access it */ 372 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 373 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 374 if (!(vendor_id == ivend && device_id == idev)) { 375 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 376 vendor_id, device_id); 377 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 378 return; 379 } 380 381 ptr32 = (uint32_t*)(cfgptr + 0x178); 382 val = *ptr32; 383 384 if (val == 0xffffffff) { 385 device_printf(sc->dev, "extended mapping failed\n"); 386 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 387 return; 388 } 389 *ptr32 = val | 0x40; 390 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 391 if (bootverbose) { 392 device_printf(sc->dev, "Enabled ECRC on upstream " 393 "Nvidia bridge at %d:%d:%d\n", 394 (int)bus, (int)slot, (int)func); 395 } 396 } 397 398 #else /* __i386__ || __x86_64__ */ 399 400 static void 401 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 402 { 403 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n"); 404 } 405 406 #endif 407 408 static int 409 mxge_dma_test(mxge_softc_t *sc, int test_type) 410 { 411 mxge_cmd_t cmd; 412 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr; 413 int status; 414 uint32_t len; 415 const char *test = " "; 416 417 /* 418 * Run a small DMA test. 419 * The magic multipliers to the length tell the firmware 420 * to do DMA read, write, or read+write tests. The 421 * results are returned in cmd.data0. The upper 16 422 * bits of the return is the number of transfers completed. 423 * The lower 16 bits is the time in 0.5us ticks that the 424 * transfers took to complete. 425 */ 426 427 len = sc->tx_boundary; 428 429 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 430 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 431 cmd.data2 = len * 0x10000; 432 status = mxge_send_cmd(sc, test_type, &cmd); 433 if (status != 0) { 434 test = "read"; 435 goto abort; 436 } 437 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 438 439 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 440 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 441 cmd.data2 = len * 0x1; 442 status = mxge_send_cmd(sc, test_type, &cmd); 443 if (status != 0) { 444 test = "write"; 445 goto abort; 446 } 447 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff); 448 449 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 450 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 451 cmd.data2 = len * 0x10001; 452 status = mxge_send_cmd(sc, test_type, &cmd); 453 if (status != 0) { 454 test = "read/write"; 455 goto abort; 456 } 457 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 458 (cmd.data0 & 0xffff); 459 460 abort: 461 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) { 462 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 463 test, status); 464 } 465 return status; 466 } 467 468 /* 469 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 470 * when the PCI-E Completion packets are aligned on an 8-byte 471 * boundary. Some PCI-E chip sets always align Completion packets; on 472 * the ones that do not, the alignment can be enforced by enabling 473 * ECRC generation (if supported). 474 * 475 * When PCI-E Completion packets are not aligned, it is actually more 476 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 477 * 478 * If the driver can neither enable ECRC nor verify that it has 479 * already been enabled, then it must use a firmware image which works 480 * around unaligned completion packets (ethp_z8e.dat), and it should 481 * also ensure that it never gives the device a Read-DMA which is 482 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 483 * enabled, then the driver should use the aligned (eth_z8e.dat) 484 * firmware image, and set tx_boundary to 4KB. 485 */ 486 static int 487 mxge_firmware_probe(mxge_softc_t *sc) 488 { 489 device_t dev = sc->dev; 490 int reg, status; 491 uint16_t pectl; 492 493 sc->tx_boundary = 4096; 494 495 /* 496 * Verify the max read request size was set to 4KB 497 * before trying the test with 4KB. 498 */ 499 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 500 pectl = pci_read_config(dev, reg + 0x8, 2); 501 if ((pectl & (5 << 12)) != (5 << 12)) { 502 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n", 503 pectl); 504 sc->tx_boundary = 2048; 505 } 506 } 507 508 /* 509 * Load the optimized firmware (which assumes aligned PCIe 510 * completions) in order to see if it works on this host. 511 */ 512 sc->fw_name = mxge_fw_aligned; 513 status = mxge_load_firmware(sc, 1); 514 if (status != 0) 515 return status; 516 517 /* 518 * Enable ECRC if possible 519 */ 520 mxge_enable_nvidia_ecrc(sc); 521 522 /* 523 * Run a DMA test which watches for unaligned completions and 524 * aborts on the first one seen. Not required on Z8ES or newer. 525 */ 526 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 527 return 0; 528 529 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 530 if (status == 0) 531 return 0; /* keep the aligned firmware */ 532 533 if (status != E2BIG) 534 device_printf(dev, "DMA test failed: %d\n", status); 535 if (status == ENOSYS) { 536 device_printf(dev, "Falling back to ethp! " 537 "Please install up to date fw\n"); 538 } 539 return status; 540 } 541 542 static int 543 mxge_select_firmware(mxge_softc_t *sc) 544 { 545 int aligned = 0; 546 int force_firmware = mxge_force_firmware; 547 548 if (sc->throttle) 549 force_firmware = sc->throttle; 550 551 if (force_firmware != 0) { 552 if (force_firmware == 1) 553 aligned = 1; 554 else 555 aligned = 0; 556 if (bootverbose) { 557 device_printf(sc->dev, 558 "Assuming %s completions (forced)\n", 559 aligned ? "aligned" : "unaligned"); 560 } 561 goto abort; 562 } 563 564 /* 565 * If the PCIe link width is 4 or less, we can use the aligned 566 * firmware and skip any checks 567 */ 568 if (sc->link_width != 0 && sc->link_width <= 4) { 569 device_printf(sc->dev, "PCIe x%d Link, " 570 "expect reduced performance\n", sc->link_width); 571 aligned = 1; 572 goto abort; 573 } 574 575 if (mxge_firmware_probe(sc) == 0) 576 return 0; 577 578 abort: 579 if (aligned) { 580 sc->fw_name = mxge_fw_aligned; 581 sc->tx_boundary = 4096; 582 } else { 583 sc->fw_name = mxge_fw_unaligned; 584 sc->tx_boundary = 2048; 585 } 586 return mxge_load_firmware(sc, 0); 587 } 588 589 static int 590 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 591 { 592 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 593 if_printf(sc->ifp, "Bad firmware type: 0x%x\n", 594 be32toh(hdr->mcp_type)); 595 return EIO; 596 } 597 598 /* Save firmware version for sysctl */ 599 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 600 if (bootverbose) 601 if_printf(sc->ifp, "firmware id: %s\n", hdr->version); 602 603 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 604 &sc->fw_ver_minor, &sc->fw_ver_tiny); 605 606 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR && 607 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 608 if_printf(sc->ifp, "Found firmware version %s\n", 609 sc->fw_version); 610 if_printf(sc->ifp, "Driver needs %d.%d\n", 611 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 612 return EINVAL; 613 } 614 return 0; 615 } 616 617 static void * 618 z_alloc(void *nil, u_int items, u_int size) 619 { 620 return kmalloc(items * size, M_TEMP, M_WAITOK); 621 } 622 623 static void 624 z_free(void *nil, void *ptr) 625 { 626 kfree(ptr, M_TEMP); 627 } 628 629 static int 630 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 631 { 632 z_stream zs; 633 char *inflate_buffer; 634 const struct firmware *fw; 635 const mcp_gen_header_t *hdr; 636 unsigned hdr_offset; 637 int status; 638 unsigned int i; 639 char dummy; 640 size_t fw_len; 641 642 fw = firmware_get(sc->fw_name); 643 if (fw == NULL) { 644 if_printf(sc->ifp, "Could not find firmware image %s\n", 645 sc->fw_name); 646 return ENOENT; 647 } 648 649 /* Setup zlib and decompress f/w */ 650 bzero(&zs, sizeof(zs)); 651 zs.zalloc = z_alloc; 652 zs.zfree = z_free; 653 status = inflateInit(&zs); 654 if (status != Z_OK) { 655 status = EIO; 656 goto abort_with_fw; 657 } 658 659 /* 660 * The uncompressed size is stored as the firmware version, 661 * which would otherwise go unused 662 */ 663 fw_len = (size_t)fw->version; 664 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK); 665 zs.avail_in = fw->datasize; 666 zs.next_in = __DECONST(char *, fw->data); 667 zs.avail_out = fw_len; 668 zs.next_out = inflate_buffer; 669 status = inflate(&zs, Z_FINISH); 670 if (status != Z_STREAM_END) { 671 if_printf(sc->ifp, "zlib %d\n", status); 672 status = EIO; 673 goto abort_with_buffer; 674 } 675 676 /* Check id */ 677 hdr_offset = 678 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET)); 679 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 680 if_printf(sc->ifp, "Bad firmware file"); 681 status = EIO; 682 goto abort_with_buffer; 683 } 684 hdr = (const void*)(inflate_buffer + hdr_offset); 685 686 status = mxge_validate_firmware(sc, hdr); 687 if (status != 0) 688 goto abort_with_buffer; 689 690 /* Copy the inflated firmware to NIC SRAM. */ 691 for (i = 0; i < fw_len; i += 256) { 692 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i, 693 min(256U, (unsigned)(fw_len - i))); 694 wmb(); 695 dummy = *sc->sram; 696 wmb(); 697 } 698 699 *limit = fw_len; 700 status = 0; 701 abort_with_buffer: 702 kfree(inflate_buffer, M_TEMP); 703 inflateEnd(&zs); 704 abort_with_fw: 705 firmware_put(fw, FIRMWARE_UNLOAD); 706 return status; 707 } 708 709 /* 710 * Enable or disable periodic RDMAs from the host to make certain 711 * chipsets resend dropped PCIe messages 712 */ 713 static void 714 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 715 { 716 char buf_bytes[72]; 717 volatile uint32_t *confirm; 718 volatile char *submit; 719 uint32_t *buf, dma_low, dma_high; 720 int i; 721 722 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 723 724 /* Clear confirmation addr */ 725 confirm = (volatile uint32_t *)sc->cmd; 726 *confirm = 0; 727 wmb(); 728 729 /* 730 * Send an rdma command to the PCIe engine, and wait for the 731 * response in the confirmation address. The firmware should 732 * write a -1 there to indicate it is alive and well 733 */ 734 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 735 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 736 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 737 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 738 buf[2] = htobe32(0xffffffff); /* confirm data */ 739 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 740 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr); 741 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 742 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 743 buf[5] = htobe32(enable); /* enable? */ 744 745 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 746 747 mxge_pio_copy(submit, buf, 64); 748 wmb(); 749 DELAY(1000); 750 wmb(); 751 i = 0; 752 while (*confirm != 0xffffffff && i < 20) { 753 DELAY(1000); 754 i++; 755 } 756 if (*confirm != 0xffffffff) { 757 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)", 758 (enable ? "enable" : "disable"), confirm, *confirm); 759 } 760 } 761 762 static int 763 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 764 { 765 mcp_cmd_t *buf; 766 char buf_bytes[sizeof(*buf) + 8]; 767 volatile mcp_cmd_response_t *response = sc->cmd; 768 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 769 uint32_t dma_low, dma_high; 770 int err, sleep_total = 0; 771 772 /* Ensure buf is aligned to 8 bytes */ 773 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 774 775 buf->data0 = htobe32(data->data0); 776 buf->data1 = htobe32(data->data1); 777 buf->data2 = htobe32(data->data2); 778 buf->cmd = htobe32(cmd); 779 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 780 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 781 782 buf->response_addr.low = htobe32(dma_low); 783 buf->response_addr.high = htobe32(dma_high); 784 785 response->result = 0xffffffff; 786 wmb(); 787 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 788 789 /* 790 * Wait up to 20ms 791 */ 792 err = EAGAIN; 793 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 794 wmb(); 795 switch (be32toh(response->result)) { 796 case 0: 797 data->data0 = be32toh(response->data); 798 err = 0; 799 break; 800 case 0xffffffff: 801 DELAY(1000); 802 break; 803 case MXGEFW_CMD_UNKNOWN: 804 err = ENOSYS; 805 break; 806 case MXGEFW_CMD_ERROR_UNALIGNED: 807 err = E2BIG; 808 break; 809 case MXGEFW_CMD_ERROR_BUSY: 810 err = EBUSY; 811 break; 812 case MXGEFW_CMD_ERROR_I2C_ABSENT: 813 err = ENXIO; 814 break; 815 default: 816 if_printf(sc->ifp, "command %d failed, result = %d\n", 817 cmd, be32toh(response->result)); 818 err = ENXIO; 819 break; 820 } 821 if (err != EAGAIN) 822 break; 823 } 824 if (err == EAGAIN) { 825 if_printf(sc->ifp, "command %d timed out result = %d\n", 826 cmd, be32toh(response->result)); 827 } 828 return err; 829 } 830 831 static int 832 mxge_adopt_running_firmware(mxge_softc_t *sc) 833 { 834 struct mcp_gen_header *hdr; 835 const size_t bytes = sizeof(struct mcp_gen_header); 836 size_t hdr_offset; 837 int status; 838 839 /* 840 * Find running firmware header 841 */ 842 hdr_offset = 843 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET)); 844 845 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 846 if_printf(sc->ifp, "Running firmware has bad header offset " 847 "(%zu)\n", hdr_offset); 848 return EIO; 849 } 850 851 /* 852 * Copy header of running firmware from SRAM to host memory to 853 * validate firmware 854 */ 855 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK); 856 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 857 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes); 858 status = mxge_validate_firmware(sc, hdr); 859 kfree(hdr, M_DEVBUF); 860 861 /* 862 * Check to see if adopted firmware has bug where adopting 863 * it will cause broadcasts to be filtered unless the NIC 864 * is kept in ALLMULTI mode 865 */ 866 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 867 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 868 sc->adopted_rx_filter_bug = 1; 869 if_printf(sc->ifp, "Adopting fw %d.%d.%d: " 870 "working around rx filter bug\n", 871 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny); 872 } 873 874 return status; 875 } 876 877 static int 878 mxge_load_firmware(mxge_softc_t *sc, int adopt) 879 { 880 volatile uint32_t *confirm; 881 volatile char *submit; 882 char buf_bytes[72]; 883 uint32_t *buf, size, dma_low, dma_high; 884 int status, i; 885 886 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 887 888 size = sc->sram_size; 889 status = mxge_load_firmware_helper(sc, &size); 890 if (status) { 891 if (!adopt) 892 return status; 893 894 /* 895 * Try to use the currently running firmware, if 896 * it is new enough 897 */ 898 status = mxge_adopt_running_firmware(sc); 899 if (status) { 900 if_printf(sc->ifp, 901 "failed to adopt running firmware\n"); 902 return status; 903 } 904 if_printf(sc->ifp, "Successfully adopted running firmware\n"); 905 906 if (sc->tx_boundary == 4096) { 907 if_printf(sc->ifp, 908 "Using firmware currently running on NIC. " 909 "For optimal\n"); 910 if_printf(sc->ifp, "performance consider loading " 911 "optimized firmware\n"); 912 } 913 sc->fw_name = mxge_fw_unaligned; 914 sc->tx_boundary = 2048; 915 return 0; 916 } 917 918 /* Clear confirmation addr */ 919 confirm = (volatile uint32_t *)sc->cmd; 920 *confirm = 0; 921 wmb(); 922 923 /* 924 * Send a reload command to the bootstrap MCP, and wait for the 925 * response in the confirmation address. The firmware should 926 * write a -1 there to indicate it is alive and well 927 */ 928 929 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr); 930 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr); 931 932 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 933 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 934 buf[2] = htobe32(0xffffffff); /* confirm data */ 935 936 /* 937 * FIX: All newest firmware should un-protect the bottom of 938 * the sram before handoff. However, the very first interfaces 939 * do not. Therefore the handoff copy must skip the first 8 bytes 940 */ 941 /* where the code starts*/ 942 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 943 buf[4] = htobe32(size - 8); /* length of code */ 944 buf[5] = htobe32(8); /* where to copy to */ 945 buf[6] = htobe32(0); /* where to jump to */ 946 947 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 948 mxge_pio_copy(submit, buf, 64); 949 wmb(); 950 DELAY(1000); 951 wmb(); 952 i = 0; 953 while (*confirm != 0xffffffff && i < 20) { 954 DELAY(1000*10); 955 i++; 956 } 957 if (*confirm != 0xffffffff) { 958 if_printf(sc->ifp,"handoff failed (%p = 0x%x)", 959 confirm, *confirm); 960 return ENXIO; 961 } 962 return 0; 963 } 964 965 static int 966 mxge_update_mac_address(mxge_softc_t *sc) 967 { 968 mxge_cmd_t cmd; 969 uint8_t *addr = sc->mac_addr; 970 971 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) | 972 (addr[2] << 8) | addr[3]; 973 cmd.data1 = (addr[4] << 8) | (addr[5]); 974 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 975 } 976 977 static int 978 mxge_change_pause(mxge_softc_t *sc, int pause) 979 { 980 mxge_cmd_t cmd; 981 int status; 982 983 if (pause) 984 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd); 985 else 986 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd); 987 if (status) { 988 if_printf(sc->ifp, "Failed to set flow control mode\n"); 989 return ENXIO; 990 } 991 sc->pause = pause; 992 return 0; 993 } 994 995 static void 996 mxge_change_promisc(mxge_softc_t *sc, int promisc) 997 { 998 mxge_cmd_t cmd; 999 int status; 1000 1001 if (mxge_always_promisc) 1002 promisc = 1; 1003 1004 if (promisc) 1005 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd); 1006 else 1007 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd); 1008 if (status) 1009 if_printf(sc->ifp, "Failed to set promisc mode\n"); 1010 } 1011 1012 static void 1013 mxge_set_multicast_list(mxge_softc_t *sc) 1014 { 1015 mxge_cmd_t cmd; 1016 struct ifmultiaddr *ifma; 1017 struct ifnet *ifp = sc->ifp; 1018 int err; 1019 1020 /* This firmware is known to not support multicast */ 1021 if (!sc->fw_multicast_support) 1022 return; 1023 1024 /* Disable multicast filtering while we play with the lists*/ 1025 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1026 if (err != 0) { 1027 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, " 1028 "error status: %d\n", err); 1029 return; 1030 } 1031 1032 if (sc->adopted_rx_filter_bug) 1033 return; 1034 1035 if (ifp->if_flags & IFF_ALLMULTI) { 1036 /* Request to disable multicast filtering, so quit here */ 1037 return; 1038 } 1039 1040 /* Flush all the filters */ 1041 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1042 if (err != 0) { 1043 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, " 1044 "error status: %d\n", err); 1045 return; 1046 } 1047 1048 /* 1049 * Walk the multicast list, and add each address 1050 */ 1051 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1052 if (ifma->ifma_addr->sa_family != AF_LINK) 1053 continue; 1054 1055 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1056 &cmd.data0, 4); 1057 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1058 &cmd.data1, 2); 1059 cmd.data0 = htonl(cmd.data0); 1060 cmd.data1 = htonl(cmd.data1); 1061 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1062 if (err != 0) { 1063 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, " 1064 "error status: %d\n", err); 1065 /* Abort, leaving multicast filtering off */ 1066 return; 1067 } 1068 } 1069 1070 /* Enable multicast filtering */ 1071 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1072 if (err != 0) { 1073 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, " 1074 "error status: %d\n", err); 1075 } 1076 } 1077 1078 #if 0 1079 static int 1080 mxge_max_mtu(mxge_softc_t *sc) 1081 { 1082 mxge_cmd_t cmd; 1083 int status; 1084 1085 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1086 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1087 1088 /* try to set nbufs to see if it we can 1089 use virtually contiguous jumbos */ 1090 cmd.data0 = 0; 1091 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1092 &cmd); 1093 if (status == 0) 1094 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1095 1096 /* otherwise, we're limited to MJUMPAGESIZE */ 1097 return MJUMPAGESIZE - MXGEFW_PAD; 1098 } 1099 #endif 1100 1101 static int 1102 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1103 { 1104 struct mxge_slice_state *ss; 1105 mxge_rx_done_t *rx_done; 1106 volatile uint32_t *irq_claim; 1107 mxge_cmd_t cmd; 1108 int slice, status, rx_intr_size; 1109 1110 /* 1111 * Try to send a reset command to the card to see if it 1112 * is alive 1113 */ 1114 memset(&cmd, 0, sizeof (cmd)); 1115 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1116 if (status != 0) { 1117 if_printf(sc->ifp, "failed reset\n"); 1118 return ENXIO; 1119 } 1120 1121 mxge_dummy_rdma(sc, 1); 1122 1123 /* 1124 * Set the intrq size 1125 * XXX assume 4byte mcp_slot 1126 */ 1127 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t); 1128 cmd.data0 = rx_intr_size; 1129 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1130 1131 /* 1132 * Even though we already know how many slices are supported 1133 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1134 * has magic side effects, and must be called after a reset. 1135 * It must be called prior to calling any RSS related cmds, 1136 * including assigning an interrupt queue for anything but 1137 * slice 0. It must also be called *after* 1138 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1139 * the firmware to compute offsets. 1140 */ 1141 if (sc->num_slices > 1) { 1142 /* Ask the maximum number of slices it supports */ 1143 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 1144 if (status != 0) { 1145 if_printf(sc->ifp, "failed to get number of slices\n"); 1146 return status; 1147 } 1148 1149 /* 1150 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1151 * to setting up the interrupt queue DMA 1152 */ 1153 cmd.data0 = sc->num_slices; 1154 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1155 #ifdef IFNET_BUF_RING 1156 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1157 #endif 1158 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); 1159 if (status != 0) { 1160 if_printf(sc->ifp, "failed to set number of slices\n"); 1161 return status; 1162 } 1163 } 1164 1165 if (interrupts_setup) { 1166 /* Now exchange information about interrupts */ 1167 for (slice = 0; slice < sc->num_slices; slice++) { 1168 ss = &sc->ss[slice]; 1169 1170 rx_done = &ss->rx_data.rx_done; 1171 memset(rx_done->entry, 0, rx_intr_size); 1172 1173 cmd.data0 = 1174 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1175 cmd.data1 = 1176 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr); 1177 cmd.data2 = slice; 1178 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, 1179 &cmd); 1180 } 1181 } 1182 1183 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, 1184 &cmd); 1185 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1186 1187 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1188 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1189 1190 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd); 1191 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1192 1193 if (status != 0) { 1194 if_printf(sc->ifp, "failed set interrupt parameters\n"); 1195 return status; 1196 } 1197 1198 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1199 1200 /* Run a DMA benchmark */ 1201 mxge_dma_test(sc, MXGEFW_DMA_TEST); 1202 1203 for (slice = 0; slice < sc->num_slices; slice++) { 1204 ss = &sc->ss[slice]; 1205 1206 ss->irq_claim = irq_claim + (2 * slice); 1207 1208 /* Reset mcp/driver shared state back to 0 */ 1209 ss->rx_data.rx_done.idx = 0; 1210 ss->tx.req = 0; 1211 ss->tx.done = 0; 1212 ss->tx.pkt_done = 0; 1213 ss->tx.queue_active = 0; 1214 ss->tx.activate = 0; 1215 ss->tx.deactivate = 0; 1216 ss->rx_data.rx_big.cnt = 0; 1217 ss->rx_data.rx_small.cnt = 0; 1218 if (ss->fw_stats != NULL) 1219 bzero(ss->fw_stats, sizeof(*ss->fw_stats)); 1220 } 1221 sc->rdma_tags_available = 15; 1222 1223 status = mxge_update_mac_address(sc); 1224 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1225 mxge_change_pause(sc, sc->pause); 1226 mxge_set_multicast_list(sc); 1227 1228 if (sc->throttle) { 1229 cmd.data0 = sc->throttle; 1230 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd)) 1231 if_printf(sc->ifp, "can't enable throttle\n"); 1232 } 1233 return status; 1234 } 1235 1236 static int 1237 mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1238 { 1239 mxge_cmd_t cmd; 1240 mxge_softc_t *sc; 1241 int err; 1242 unsigned int throttle; 1243 1244 sc = arg1; 1245 throttle = sc->throttle; 1246 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1247 if (err != 0) 1248 return err; 1249 1250 if (throttle == sc->throttle) 1251 return 0; 1252 1253 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1254 return EINVAL; 1255 1256 ifnet_serialize_all(sc->ifp); 1257 1258 cmd.data0 = throttle; 1259 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1260 if (err == 0) 1261 sc->throttle = throttle; 1262 1263 ifnet_deserialize_all(sc->ifp); 1264 return err; 1265 } 1266 1267 static int 1268 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1269 { 1270 mxge_softc_t *sc; 1271 unsigned int intr_coal_delay; 1272 int err; 1273 1274 sc = arg1; 1275 intr_coal_delay = sc->intr_coal_delay; 1276 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1277 if (err != 0) 1278 return err; 1279 1280 if (intr_coal_delay == sc->intr_coal_delay) 1281 return 0; 1282 1283 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1284 return EINVAL; 1285 1286 ifnet_serialize_all(sc->ifp); 1287 1288 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1289 sc->intr_coal_delay = intr_coal_delay; 1290 1291 ifnet_deserialize_all(sc->ifp); 1292 return err; 1293 } 1294 1295 static int 1296 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1297 { 1298 mxge_softc_t *sc; 1299 unsigned int enabled; 1300 int err; 1301 1302 sc = arg1; 1303 enabled = sc->pause; 1304 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1305 if (err != 0) 1306 return err; 1307 1308 if (enabled == sc->pause) 1309 return 0; 1310 1311 ifnet_serialize_all(sc->ifp); 1312 err = mxge_change_pause(sc, enabled); 1313 ifnet_deserialize_all(sc->ifp); 1314 1315 return err; 1316 } 1317 1318 static int 1319 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1320 { 1321 int err; 1322 1323 if (arg1 == NULL) 1324 return EFAULT; 1325 arg2 = be32toh(*(int *)arg1); 1326 arg1 = NULL; 1327 err = sysctl_handle_int(oidp, arg1, arg2, req); 1328 1329 return err; 1330 } 1331 1332 static void 1333 mxge_rem_sysctls(mxge_softc_t *sc) 1334 { 1335 if (sc->ss != NULL) { 1336 struct mxge_slice_state *ss; 1337 int slice; 1338 1339 for (slice = 0; slice < sc->num_slices; slice++) { 1340 ss = &sc->ss[slice]; 1341 if (ss->sysctl_tree != NULL) { 1342 sysctl_ctx_free(&ss->sysctl_ctx); 1343 ss->sysctl_tree = NULL; 1344 } 1345 } 1346 } 1347 1348 if (sc->slice_sysctl_tree != NULL) { 1349 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1350 sc->slice_sysctl_tree = NULL; 1351 } 1352 1353 if (sc->sysctl_tree != NULL) { 1354 sysctl_ctx_free(&sc->sysctl_ctx); 1355 sc->sysctl_tree = NULL; 1356 } 1357 } 1358 1359 static void 1360 mxge_add_sysctls(mxge_softc_t *sc) 1361 { 1362 struct sysctl_ctx_list *ctx; 1363 struct sysctl_oid_list *children; 1364 mcp_irq_data_t *fw; 1365 struct mxge_slice_state *ss; 1366 int slice; 1367 char slice_num[8]; 1368 1369 ctx = &sc->sysctl_ctx; 1370 sysctl_ctx_init(ctx); 1371 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), 1372 OID_AUTO, device_get_nameunit(sc->dev), CTLFLAG_RD, 0, ""); 1373 if (sc->sysctl_tree == NULL) { 1374 device_printf(sc->dev, "can't add sysctl node\n"); 1375 return; 1376 } 1377 1378 children = SYSCTL_CHILDREN(sc->sysctl_tree); 1379 fw = sc->ss[0].fw_stats; 1380 1381 /* 1382 * Random information 1383 */ 1384 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", 1385 CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); 1386 1387 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number", 1388 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number"); 1389 1390 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code", 1391 CTLFLAG_RD, &sc->product_code_string, 0, "product code"); 1392 1393 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width", 1394 CTLFLAG_RD, &sc->link_width, 0, "link width"); 1395 1396 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary", 1397 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary"); 1398 1399 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine", 1400 CTLFLAG_RD, &sc->wc, 0, "write combining PIO"); 1401 1402 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs", 1403 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s"); 1404 1405 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs", 1406 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s"); 1407 1408 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs", 1409 CTLFLAG_RD, &sc->read_write_dma, 0, 1410 "DMA concurrent Read/Write speed in MB/s"); 1411 1412 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets", 1413 CTLFLAG_RD, &sc->watchdog_resets, 0, 1414 "Number of times NIC was reset"); 1415 1416 /* 1417 * Performance related tunables 1418 */ 1419 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay", 1420 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I", 1421 "Interrupt coalescing delay in usecs"); 1422 1423 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle", 1424 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I", 1425 "Transmit throttling"); 1426 1427 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "flow_control_enabled", 1428 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_flow_control, "I", 1429 "Interrupt coalescing delay in usecs"); 1430 1431 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait", 1432 CTLFLAG_RW, &mxge_deassert_wait, 0, 1433 "Wait for IRQ line to go low in ihandler"); 1434 1435 /* 1436 * Stats block from firmware is in network byte order. 1437 * Need to swap it 1438 */ 1439 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up", 1440 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0, 1441 mxge_handle_be32, "I", "link up"); 1442 1443 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available", 1444 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0, 1445 mxge_handle_be32, "I", "rdma_tags_available"); 1446 1447 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32", 1448 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0, 1449 mxge_handle_be32, "I", "dropped_bad_crc32"); 1450 1451 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy", 1452 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0, 1453 mxge_handle_be32, "I", "dropped_bad_phy"); 1454 1455 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered", 1456 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0, 1457 mxge_handle_be32, "I", "dropped_link_error_or_filtered"); 1458 1459 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow", 1460 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0, 1461 mxge_handle_be32, "I", "dropped_link_overflow"); 1462 1463 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered", 1464 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0, 1465 mxge_handle_be32, "I", "dropped_multicast_filtered"); 1466 1467 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer", 1468 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0, 1469 mxge_handle_be32, "I", "dropped_no_big_buffer"); 1470 1471 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer", 1472 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0, 1473 mxge_handle_be32, "I", "dropped_no_small_buffer"); 1474 1475 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun", 1476 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0, 1477 mxge_handle_be32, "I", "dropped_overrun"); 1478 1479 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause", 1480 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0, 1481 mxge_handle_be32, "I", "dropped_pause"); 1482 1483 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt", 1484 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0, 1485 mxge_handle_be32, "I", "dropped_runt"); 1486 1487 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered", 1488 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0, 1489 mxge_handle_be32, "I", "dropped_unicast_filtered"); 1490 1491 /* add counters exported for debugging from all slices */ 1492 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1493 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, 1494 children, OID_AUTO, "slice", CTLFLAG_RD, 0, ""); 1495 if (sc->slice_sysctl_tree == NULL) { 1496 device_printf(sc->dev, "can't add slice sysctl node\n"); 1497 return; 1498 } 1499 1500 for (slice = 0; slice < sc->num_slices; slice++) { 1501 ss = &sc->ss[slice]; 1502 sysctl_ctx_init(&ss->sysctl_ctx); 1503 ctx = &ss->sysctl_ctx; 1504 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1505 ksprintf(slice_num, "%d", slice); 1506 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, 1507 slice_num, CTLFLAG_RD, 0, ""); 1508 if (ss->sysctl_tree == NULL) { 1509 device_printf(sc->dev, 1510 "can't add %d slice sysctl node\n", slice); 1511 return; /* XXX continue? */ 1512 } 1513 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1514 1515 /* 1516 * XXX change to ULONG 1517 */ 1518 1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt", 1520 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt"); 1521 1522 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt", 1523 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt"); 1524 1525 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req", 1526 CTLFLAG_RD, &ss->tx.req, 0, "tx_req"); 1527 1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", 1529 CTLFLAG_RD, &ss->tx.done, 0, "tx_done"); 1530 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done", 1532 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done"); 1533 1534 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active", 1535 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active"); 1536 1537 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate", 1538 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate"); 1539 1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate", 1541 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate"); 1542 } 1543 } 1544 1545 /* 1546 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1547 * backwards one at a time and handle ring wraps 1548 */ 1549 static __inline void 1550 mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1551 mcp_kreq_ether_send_t *src, int cnt) 1552 { 1553 int idx, starting_slot; 1554 1555 starting_slot = tx->req; 1556 while (cnt > 1) { 1557 cnt--; 1558 idx = (starting_slot + cnt) & tx->mask; 1559 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 1560 wmb(); 1561 } 1562 } 1563 1564 /* 1565 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1566 * at most 32 bytes at a time, so as to avoid involving the software 1567 * pio handler in the nic. We re-write the first segment's flags 1568 * to mark them valid only after writing the entire chain 1569 */ 1570 static __inline void 1571 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1572 { 1573 int idx, i; 1574 uint32_t *src_ints; 1575 volatile uint32_t *dst_ints; 1576 mcp_kreq_ether_send_t *srcp; 1577 volatile mcp_kreq_ether_send_t *dstp, *dst; 1578 uint8_t last_flags; 1579 1580 idx = tx->req & tx->mask; 1581 1582 last_flags = src->flags; 1583 src->flags = 0; 1584 wmb(); 1585 dst = dstp = &tx->lanai[idx]; 1586 srcp = src; 1587 1588 if ((idx + cnt) < tx->mask) { 1589 for (i = 0; i < cnt - 1; i += 2) { 1590 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1591 wmb(); /* force write every 32 bytes */ 1592 srcp += 2; 1593 dstp += 2; 1594 } 1595 } else { 1596 /* 1597 * Submit all but the first request, and ensure 1598 * that it is submitted below 1599 */ 1600 mxge_submit_req_backwards(tx, src, cnt); 1601 i = 0; 1602 } 1603 if (i < cnt) { 1604 /* Submit the first request */ 1605 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1606 wmb(); /* barrier before setting valid flag */ 1607 } 1608 1609 /* Re-write the last 32-bits with the valid flags */ 1610 src->flags = last_flags; 1611 src_ints = (uint32_t *)src; 1612 src_ints+=3; 1613 dst_ints = (volatile uint32_t *)dst; 1614 dst_ints+=3; 1615 *dst_ints = *src_ints; 1616 tx->req += cnt; 1617 wmb(); 1618 } 1619 1620 static int 1621 mxge_pullup_tso(struct mbuf **mp) 1622 { 1623 int hoff, iphlen, thoff; 1624 struct mbuf *m; 1625 1626 m = *mp; 1627 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable")); 1628 1629 iphlen = m->m_pkthdr.csum_iphlen; 1630 thoff = m->m_pkthdr.csum_thlen; 1631 hoff = m->m_pkthdr.csum_lhlen; 1632 1633 KASSERT(iphlen > 0, ("invalid ip hlen")); 1634 KASSERT(thoff > 0, ("invalid tcp hlen")); 1635 KASSERT(hoff > 0, ("invalid ether hlen")); 1636 1637 if (__predict_false(m->m_len < hoff + iphlen + thoff)) { 1638 m = m_pullup(m, hoff + iphlen + thoff); 1639 if (m == NULL) { 1640 *mp = NULL; 1641 return ENOBUFS; 1642 } 1643 *mp = m; 1644 } 1645 return 0; 1646 } 1647 1648 static int 1649 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map, 1650 struct mbuf *m, int busdma_seg_cnt) 1651 { 1652 mcp_kreq_ether_send_t *req; 1653 bus_dma_segment_t *seg; 1654 uint32_t low, high_swapped; 1655 int len, seglen, cum_len, cum_len_next; 1656 int next_is_first, chop, cnt, rdma_count, small; 1657 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1658 uint8_t flags, flags_next; 1659 struct mxge_buffer_state *info_last; 1660 bus_dmamap_t map = info_map->map; 1661 1662 mss = m->m_pkthdr.tso_segsz; 1663 1664 /* 1665 * Negative cum_len signifies to the send loop that we are 1666 * still in the header portion of the TSO packet. 1667 */ 1668 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen + 1669 m->m_pkthdr.csum_thlen); 1670 1671 /* 1672 * TSO implies checksum offload on this hardware 1673 */ 1674 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1675 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1676 1677 /* 1678 * For TSO, pseudo_hdr_offset holds mss. The firmware figures 1679 * out where to put the checksum by parsing the header. 1680 */ 1681 pseudo_hdr_offset = htobe16(mss); 1682 1683 req = tx->req_list; 1684 seg = tx->seg_list; 1685 cnt = 0; 1686 rdma_count = 0; 1687 1688 /* 1689 * "rdma_count" is the number of RDMAs belonging to the current 1690 * packet BEFORE the current send request. For non-TSO packets, 1691 * this is equal to "count". 1692 * 1693 * For TSO packets, rdma_count needs to be reset to 0 after a 1694 * segment cut. 1695 * 1696 * The rdma_count field of the send request is the number of 1697 * RDMAs of the packet starting at that request. For TSO send 1698 * requests with one ore more cuts in the middle, this is the 1699 * number of RDMAs starting after the last cut in the request. 1700 * All previous segments before the last cut implicitly have 1 1701 * RDMA. 1702 * 1703 * Since the number of RDMAs is not known beforehand, it must be 1704 * filled-in retroactively - after each segmentation cut or at 1705 * the end of the entire packet. 1706 */ 1707 1708 while (busdma_seg_cnt) { 1709 /* 1710 * Break the busdma segment up into pieces 1711 */ 1712 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1713 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1714 len = seg->ds_len; 1715 1716 while (len) { 1717 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1718 seglen = len; 1719 cum_len_next = cum_len + seglen; 1720 (req - rdma_count)->rdma_count = rdma_count + 1; 1721 if (__predict_true(cum_len >= 0)) { 1722 /* Payload */ 1723 chop = (cum_len_next > mss); 1724 cum_len_next = cum_len_next % mss; 1725 next_is_first = (cum_len_next == 0); 1726 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1727 flags_next |= 1728 next_is_first * MXGEFW_FLAGS_FIRST; 1729 rdma_count |= -(chop | next_is_first); 1730 rdma_count += chop & !next_is_first; 1731 } else if (cum_len_next >= 0) { 1732 /* Header ends */ 1733 rdma_count = -1; 1734 cum_len_next = 0; 1735 seglen = -cum_len; 1736 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1737 flags_next = MXGEFW_FLAGS_TSO_PLD | 1738 MXGEFW_FLAGS_FIRST | 1739 (small * MXGEFW_FLAGS_SMALL); 1740 } 1741 1742 req->addr_high = high_swapped; 1743 req->addr_low = htobe32(low); 1744 req->pseudo_hdr_offset = pseudo_hdr_offset; 1745 req->pad = 0; 1746 req->rdma_count = 1; 1747 req->length = htobe16(seglen); 1748 req->cksum_offset = cksum_offset; 1749 req->flags = 1750 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD); 1751 low += seglen; 1752 len -= seglen; 1753 cum_len = cum_len_next; 1754 flags = flags_next; 1755 req++; 1756 cnt++; 1757 rdma_count++; 1758 if (__predict_false(cksum_offset > seglen)) 1759 cksum_offset -= seglen; 1760 else 1761 cksum_offset = 0; 1762 if (__predict_false(cnt > tx->max_desc)) 1763 goto drop; 1764 } 1765 busdma_seg_cnt--; 1766 seg++; 1767 } 1768 (req - rdma_count)->rdma_count = rdma_count; 1769 1770 do { 1771 req--; 1772 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1773 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1774 1775 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1776 1777 info_map->map = info_last->map; 1778 info_last->map = map; 1779 info_last->m = m; 1780 1781 mxge_submit_req(tx, tx->req_list, cnt); 1782 #ifdef IFNET_BUF_RING 1783 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1784 /* tell the NIC to start polling this slice */ 1785 *tx->send_go = 1; 1786 tx->queue_active = 1; 1787 tx->activate++; 1788 wmb(); 1789 } 1790 #endif 1791 return 0; 1792 1793 drop: 1794 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1795 m_freem(m); 1796 return ENOBUFS; 1797 } 1798 1799 static int 1800 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad) 1801 { 1802 mcp_kreq_ether_send_t *req; 1803 bus_dma_segment_t *seg; 1804 bus_dmamap_t map; 1805 int cnt, cum_len, err, i, idx, odd_flag; 1806 uint16_t pseudo_hdr_offset; 1807 uint8_t flags, cksum_offset; 1808 struct mxge_buffer_state *info_map, *info_last; 1809 1810 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 1811 err = mxge_pullup_tso(&m); 1812 if (__predict_false(err)) 1813 return err; 1814 } 1815 1816 /* 1817 * Map the frame for DMA 1818 */ 1819 idx = tx->req & tx->mask; 1820 info_map = &tx->info[idx]; 1821 map = info_map->map; 1822 1823 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m, 1824 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT); 1825 if (__predict_false(err != 0)) 1826 goto drop; 1827 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE); 1828 1829 /* 1830 * TSO is different enough, we handle it in another routine 1831 */ 1832 if (m->m_pkthdr.csum_flags & CSUM_TSO) 1833 return mxge_encap_tso(tx, info_map, m, cnt); 1834 1835 req = tx->req_list; 1836 cksum_offset = 0; 1837 pseudo_hdr_offset = 0; 1838 flags = MXGEFW_FLAGS_NO_TSO; 1839 1840 /* 1841 * Checksum offloading 1842 */ 1843 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 1844 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen; 1845 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1846 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1847 req->cksum_offset = cksum_offset; 1848 flags |= MXGEFW_FLAGS_CKSUM; 1849 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1850 } else { 1851 odd_flag = 0; 1852 } 1853 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1854 flags |= MXGEFW_FLAGS_SMALL; 1855 1856 /* 1857 * Convert segments into a request list 1858 */ 1859 cum_len = 0; 1860 seg = tx->seg_list; 1861 req->flags = MXGEFW_FLAGS_FIRST; 1862 for (i = 0; i < cnt; i++) { 1863 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1864 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1865 req->length = htobe16(seg->ds_len); 1866 req->cksum_offset = cksum_offset; 1867 if (cksum_offset > seg->ds_len) 1868 cksum_offset -= seg->ds_len; 1869 else 1870 cksum_offset = 0; 1871 req->pseudo_hdr_offset = pseudo_hdr_offset; 1872 req->pad = 0; /* complete solid 16-byte block */ 1873 req->rdma_count = 1; 1874 req->flags |= flags | ((cum_len & 1) * odd_flag); 1875 cum_len += seg->ds_len; 1876 seg++; 1877 req++; 1878 req->flags = 0; 1879 } 1880 req--; 1881 1882 /* 1883 * Pad runt to 60 bytes 1884 */ 1885 if (cum_len < 60) { 1886 req++; 1887 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad)); 1888 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad)); 1889 req->length = htobe16(60 - cum_len); 1890 req->cksum_offset = 0; 1891 req->pseudo_hdr_offset = pseudo_hdr_offset; 1892 req->pad = 0; /* complete solid 16-byte block */ 1893 req->rdma_count = 1; 1894 req->flags |= flags | ((cum_len & 1) * odd_flag); 1895 cnt++; 1896 } 1897 1898 tx->req_list[0].rdma_count = cnt; 1899 #if 0 1900 /* print what the firmware will see */ 1901 for (i = 0; i < cnt; i++) { 1902 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d," 1903 "cso:%d, flags:0x%x, rdma:%d\n", 1904 i, (int)ntohl(tx->req_list[i].addr_high), 1905 (int)ntohl(tx->req_list[i].addr_low), 1906 (int)ntohs(tx->req_list[i].length), 1907 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1908 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1909 tx->req_list[i].rdma_count); 1910 } 1911 kprintf("--------------\n"); 1912 #endif 1913 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask]; 1914 1915 info_map->map = info_last->map; 1916 info_last->map = map; 1917 info_last->m = m; 1918 1919 mxge_submit_req(tx, tx->req_list, cnt); 1920 #ifdef IFNET_BUF_RING 1921 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1922 /* tell the NIC to start polling this slice */ 1923 *tx->send_go = 1; 1924 tx->queue_active = 1; 1925 tx->activate++; 1926 wmb(); 1927 } 1928 #endif 1929 return 0; 1930 1931 drop: 1932 m_freem(m); 1933 return err; 1934 } 1935 1936 static void 1937 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq) 1938 { 1939 mxge_softc_t *sc = ifp->if_softc; 1940 mxge_tx_ring_t *tx; 1941 bus_addr_t zeropad; 1942 int encap = 0; 1943 1944 /* XXX Only use the first slice for now */ 1945 tx = &sc->ss[0].tx; 1946 1947 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 1948 ASSERT_SERIALIZED(&tx->tx_serialize); 1949 1950 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq)) 1951 return; 1952 1953 zeropad = sc->zeropad_dma.dmem_busaddr; 1954 while (tx->mask - (tx->req - tx->done) > tx->max_desc) { 1955 struct mbuf *m; 1956 int error; 1957 1958 m = ifsq_dequeue(ifsq); 1959 if (m == NULL) 1960 goto done; 1961 1962 BPF_MTAP(ifp, m); 1963 error = mxge_encap(tx, m, zeropad); 1964 if (!error) 1965 encap = 1; 1966 else 1967 IFNET_STAT_INC(ifp, oerrors, 1); 1968 } 1969 1970 /* Ran out of transmit slots */ 1971 ifsq_set_oactive(ifsq); 1972 done: 1973 if (encap) 1974 ifp->if_timer = 5; 1975 } 1976 1977 static void 1978 mxge_watchdog(struct ifnet *ifp) 1979 { 1980 struct mxge_softc *sc = ifp->if_softc; 1981 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 1982 mxge_tx_ring_t *tx = &sc->ss[0].tx; 1983 1984 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1985 1986 /* Check for pause blocking before resetting */ 1987 if (tx->watchdog_rx_pause == rx_pause) { 1988 mxge_warn_stuck(sc, tx, 0); 1989 mxge_watchdog_reset(sc); 1990 return; 1991 } else { 1992 if_printf(ifp, "Flow control blocking xmits, " 1993 "check link partner\n"); 1994 } 1995 tx->watchdog_rx_pause = rx_pause; 1996 } 1997 1998 /* 1999 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2000 * at most 32 bytes at a time, so as to avoid involving the software 2001 * pio handler in the nic. We re-write the first segment's low 2002 * DMA address to mark it valid only after we write the entire chunk 2003 * in a burst 2004 */ 2005 static __inline void 2006 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2007 mcp_kreq_ether_recv_t *src) 2008 { 2009 uint32_t low; 2010 2011 low = src->addr_low; 2012 src->addr_low = 0xffffffff; 2013 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2014 wmb(); 2015 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2016 wmb(); 2017 src->addr_low = low; 2018 dst->addr_low = low; 2019 wmb(); 2020 } 2021 2022 static int 2023 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2024 boolean_t init) 2025 { 2026 bus_dma_segment_t seg; 2027 struct mbuf *m; 2028 int cnt, err, mflag; 2029 2030 mflag = MB_DONTWAIT; 2031 if (__predict_false(init)) 2032 mflag = MB_WAIT; 2033 2034 m = m_gethdr(mflag, MT_DATA); 2035 if (m == NULL) { 2036 err = ENOBUFS; 2037 if (__predict_false(init)) { 2038 /* 2039 * During initialization, there 2040 * is nothing to setup; bail out 2041 */ 2042 return err; 2043 } 2044 goto done; 2045 } 2046 m->m_len = m->m_pkthdr.len = MHLEN; 2047 2048 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2049 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2050 if (err != 0) { 2051 m_freem(m); 2052 if (__predict_false(init)) { 2053 /* 2054 * During initialization, there 2055 * is nothing to setup; bail out 2056 */ 2057 return err; 2058 } 2059 goto done; 2060 } 2061 2062 rx->info[idx].m = m; 2063 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2064 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2065 2066 done: 2067 if ((idx & 7) == 7) 2068 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2069 return err; 2070 } 2071 2072 static int 2073 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx, 2074 boolean_t init) 2075 { 2076 bus_dma_segment_t seg; 2077 struct mbuf *m; 2078 int cnt, err, mflag; 2079 2080 mflag = MB_DONTWAIT; 2081 if (__predict_false(init)) 2082 mflag = MB_WAIT; 2083 2084 if (rx->cl_size == MCLBYTES) 2085 m = m_getcl(mflag, MT_DATA, M_PKTHDR); 2086 else 2087 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE); 2088 if (m == NULL) { 2089 err = ENOBUFS; 2090 if (__predict_false(init)) { 2091 /* 2092 * During initialization, there 2093 * is nothing to setup; bail out 2094 */ 2095 return err; 2096 } 2097 goto done; 2098 } 2099 m->m_len = m->m_pkthdr.len = rx->cl_size; 2100 2101 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m, 2102 &seg, 1, &cnt, BUS_DMA_NOWAIT); 2103 if (err != 0) { 2104 m_freem(m); 2105 if (__predict_false(init)) { 2106 /* 2107 * During initialization, there 2108 * is nothing to setup; bail out 2109 */ 2110 return err; 2111 } 2112 goto done; 2113 } 2114 2115 rx->info[idx].m = m; 2116 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2117 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2118 2119 done: 2120 if ((idx & 7) == 7) 2121 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2122 return err; 2123 } 2124 2125 /* 2126 * Myri10GE hardware checksums are not valid if the sender 2127 * padded the frame with non-zero padding. This is because 2128 * the firmware just does a simple 16-bit 1s complement 2129 * checksum across the entire frame, excluding the first 14 2130 * bytes. It is best to simply to check the checksum and 2131 * tell the stack about it only if the checksum is good 2132 */ 2133 static __inline uint16_t 2134 mxge_rx_csum(struct mbuf *m, int csum) 2135 { 2136 const struct ether_header *eh; 2137 const struct ip *ip; 2138 uint16_t c; 2139 2140 eh = mtod(m, const struct ether_header *); 2141 2142 /* Only deal with IPv4 TCP & UDP for now */ 2143 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2144 return 1; 2145 2146 ip = (const struct ip *)(eh + 1); 2147 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)) 2148 return 1; 2149 2150 #ifdef INET 2151 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2152 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2153 - (ip->ip_hl << 2) + ip->ip_p)); 2154 #else 2155 c = 1; 2156 #endif 2157 c ^= 0xffff; 2158 return c; 2159 } 2160 2161 static void 2162 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2163 { 2164 struct ether_vlan_header *evl; 2165 uint32_t partial; 2166 2167 evl = mtod(m, struct ether_vlan_header *); 2168 2169 /* 2170 * Fix checksum by subtracting EVL_ENCAPLEN bytes after 2171 * what the firmware thought was the end of the ethernet 2172 * header. 2173 */ 2174 2175 /* Put checksum into host byte order */ 2176 *csum = ntohs(*csum); 2177 2178 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2179 *csum += ~partial; 2180 *csum += ((*csum) < ~partial); 2181 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2182 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2183 2184 /* 2185 * Restore checksum to network byte order; 2186 * later consumers expect this 2187 */ 2188 *csum = htons(*csum); 2189 2190 /* save the tag */ 2191 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag); 2192 m->m_flags |= M_VLANTAG; 2193 2194 /* 2195 * Remove the 802.1q header by copying the Ethernet 2196 * addresses over it and adjusting the beginning of 2197 * the data in the mbuf. The encapsulated Ethernet 2198 * type field is already in place. 2199 */ 2200 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN, 2201 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2202 m_adj(m, EVL_ENCAPLEN); 2203 } 2204 2205 2206 static __inline void 2207 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx, 2208 uint32_t len, uint32_t csum) 2209 { 2210 struct mbuf *m; 2211 const struct ether_header *eh; 2212 bus_dmamap_t old_map; 2213 int idx; 2214 2215 idx = rx->cnt & rx->mask; 2216 rx->cnt++; 2217 2218 /* Save a pointer to the received mbuf */ 2219 m = rx->info[idx].m; 2220 2221 /* Try to replace the received mbuf */ 2222 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) { 2223 /* Drop the frame -- the old mbuf is re-cycled */ 2224 IFNET_STAT_INC(ifp, ierrors, 1); 2225 return; 2226 } 2227 2228 /* Unmap the received buffer */ 2229 old_map = rx->info[idx].map; 2230 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2231 bus_dmamap_unload(rx->dmat, old_map); 2232 2233 /* Swap the bus_dmamap_t's */ 2234 rx->info[idx].map = rx->extra_map; 2235 rx->extra_map = old_map; 2236 2237 /* 2238 * mcp implicitly skips 1st 2 bytes so that packet is properly 2239 * aligned 2240 */ 2241 m->m_data += MXGEFW_PAD; 2242 2243 m->m_pkthdr.rcvif = ifp; 2244 m->m_len = m->m_pkthdr.len = len; 2245 2246 IFNET_STAT_INC(ifp, ipackets, 1); 2247 2248 eh = mtod(m, const struct ether_header *); 2249 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2250 mxge_vlan_tag_remove(m, &csum); 2251 2252 /* If the checksum is valid, mark it in the mbuf header */ 2253 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2254 mxge_rx_csum(m, csum) == 0) { 2255 /* Tell the stack that the checksum is good */ 2256 m->m_pkthdr.csum_data = 0xffff; 2257 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2258 CSUM_DATA_VALID; 2259 } 2260 ifp->if_input(ifp, m); 2261 } 2262 2263 static __inline void 2264 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx, 2265 uint32_t len, uint32_t csum) 2266 { 2267 const struct ether_header *eh; 2268 struct mbuf *m; 2269 bus_dmamap_t old_map; 2270 int idx; 2271 2272 idx = rx->cnt & rx->mask; 2273 rx->cnt++; 2274 2275 /* Save a pointer to the received mbuf */ 2276 m = rx->info[idx].m; 2277 2278 /* Try to replace the received mbuf */ 2279 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) { 2280 /* Drop the frame -- the old mbuf is re-cycled */ 2281 IFNET_STAT_INC(ifp, ierrors, 1); 2282 return; 2283 } 2284 2285 /* Unmap the received buffer */ 2286 old_map = rx->info[idx].map; 2287 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2288 bus_dmamap_unload(rx->dmat, old_map); 2289 2290 /* Swap the bus_dmamap_t's */ 2291 rx->info[idx].map = rx->extra_map; 2292 rx->extra_map = old_map; 2293 2294 /* 2295 * mcp implicitly skips 1st 2 bytes so that packet is properly 2296 * aligned 2297 */ 2298 m->m_data += MXGEFW_PAD; 2299 2300 m->m_pkthdr.rcvif = ifp; 2301 m->m_len = m->m_pkthdr.len = len; 2302 2303 IFNET_STAT_INC(ifp, ipackets, 1); 2304 2305 eh = mtod(m, const struct ether_header *); 2306 if (eh->ether_type == htons(ETHERTYPE_VLAN)) 2307 mxge_vlan_tag_remove(m, &csum); 2308 2309 /* If the checksum is valid, mark it in the mbuf header */ 2310 if ((ifp->if_capenable & IFCAP_RXCSUM) && 2311 mxge_rx_csum(m, csum) == 0) { 2312 /* Tell the stack that the checksum is good */ 2313 m->m_pkthdr.csum_data = 0xffff; 2314 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2315 CSUM_DATA_VALID; 2316 } 2317 ifp->if_input(ifp, m); 2318 } 2319 2320 static __inline void 2321 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data) 2322 { 2323 mxge_rx_done_t *rx_done = &rx_data->rx_done; 2324 2325 while (rx_done->entry[rx_done->idx].length != 0) { 2326 uint16_t length, checksum; 2327 2328 length = ntohs(rx_done->entry[rx_done->idx].length); 2329 rx_done->entry[rx_done->idx].length = 0; 2330 2331 checksum = rx_done->entry[rx_done->idx].checksum; 2332 2333 if (length <= MXGE_RX_SMALL_BUFLEN) { 2334 mxge_rx_done_small(ifp, &rx_data->rx_small, 2335 length, checksum); 2336 } else { 2337 mxge_rx_done_big(ifp, &rx_data->rx_big, 2338 length, checksum); 2339 } 2340 2341 rx_done->idx++; 2342 rx_done->idx &= rx_done->mask; 2343 } 2344 } 2345 2346 static __inline void 2347 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx) 2348 { 2349 ASSERT_SERIALIZED(&tx->tx_serialize); 2350 2351 while (tx->pkt_done != mcp_idx) { 2352 struct mbuf *m; 2353 int idx; 2354 2355 idx = tx->done & tx->mask; 2356 tx->done++; 2357 2358 m = tx->info[idx].m; 2359 /* 2360 * mbuf and DMA map only attached to the first 2361 * segment per-mbuf. 2362 */ 2363 if (m != NULL) { 2364 tx->pkt_done++; 2365 IFNET_STAT_INC(ifp, opackets, 1); 2366 tx->info[idx].m = NULL; 2367 bus_dmamap_unload(tx->dmat, tx->info[idx].map); 2368 m_freem(m); 2369 } 2370 } 2371 2372 /* 2373 * If we have space, clear OACTIVE to tell the stack that 2374 * its OK to send packets 2375 */ 2376 if (tx->req - tx->done < (tx->mask + 1) / 2) { 2377 ifq_clr_oactive(&ifp->if_snd); 2378 if (tx->req == tx->done) 2379 ifp->if_timer = 0; 2380 } 2381 2382 if (!ifq_is_empty(&ifp->if_snd)) 2383 if_devstart(ifp); 2384 2385 #ifdef IFNET_BUF_RING 2386 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2387 /* let the NIC stop polling this queue, since there 2388 * are no more transmits pending */ 2389 if (tx->req == tx->done) { 2390 *tx->send_stop = 1; 2391 tx->queue_active = 0; 2392 tx->deactivate++; 2393 wmb(); 2394 } 2395 } 2396 #endif 2397 } 2398 2399 static struct mxge_media_type mxge_xfp_media_types[] = { 2400 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2401 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2402 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2403 {0, (1 << 5), "10GBASE-ER"}, 2404 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2405 {0, (1 << 3), "10GBASE-SW"}, 2406 {0, (1 << 2), "10GBASE-LW"}, 2407 {0, (1 << 1), "10GBASE-EW"}, 2408 {0, (1 << 0), "Reserved"} 2409 }; 2410 2411 static struct mxge_media_type mxge_sfp_media_types[] = { 2412 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2413 {0, (1 << 7), "Reserved"}, 2414 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2415 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2416 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2417 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2418 }; 2419 2420 static void 2421 mxge_media_set(mxge_softc_t *sc, int media_type) 2422 { 2423 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 0, NULL); 2424 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2425 sc->current_media = media_type; 2426 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2427 } 2428 2429 static void 2430 mxge_media_init(mxge_softc_t *sc) 2431 { 2432 const char *ptr; 2433 int i; 2434 2435 ifmedia_removeall(&sc->media); 2436 mxge_media_set(sc, IFM_AUTO); 2437 2438 /* 2439 * Parse the product code to deterimine the interface type 2440 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2441 * after the 3rd dash in the driver's cached copy of the 2442 * EEPROM's product code string. 2443 */ 2444 ptr = sc->product_code_string; 2445 if (ptr == NULL) { 2446 if_printf(sc->ifp, "Missing product code\n"); 2447 return; 2448 } 2449 2450 for (i = 0; i < 3; i++, ptr++) { 2451 ptr = strchr(ptr, '-'); 2452 if (ptr == NULL) { 2453 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i); 2454 return; 2455 } 2456 } 2457 if (*ptr == 'C' || *(ptr +1) == 'C') { 2458 /* -C is CX4 */ 2459 sc->connector = MXGE_CX4; 2460 mxge_media_set(sc, IFM_10G_CX4); 2461 } else if (*ptr == 'Q') { 2462 /* -Q is Quad Ribbon Fiber */ 2463 sc->connector = MXGE_QRF; 2464 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n"); 2465 /* DragonFly has no media type for Quad ribbon fiber */ 2466 } else if (*ptr == 'R') { 2467 /* -R is XFP */ 2468 sc->connector = MXGE_XFP; 2469 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2470 /* -S or -2S is SFP+ */ 2471 sc->connector = MXGE_SFP; 2472 } else { 2473 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr); 2474 } 2475 } 2476 2477 /* 2478 * Determine the media type for a NIC. Some XFPs will identify 2479 * themselves only when their link is up, so this is initiated via a 2480 * link up interrupt. However, this can potentially take up to 2481 * several milliseconds, so it is run via the watchdog routine, rather 2482 * than in the interrupt handler itself. 2483 */ 2484 static void 2485 mxge_media_probe(mxge_softc_t *sc) 2486 { 2487 mxge_cmd_t cmd; 2488 const char *cage_type; 2489 struct mxge_media_type *mxge_media_types = NULL; 2490 int i, err, ms, mxge_media_type_entries; 2491 uint32_t byte; 2492 2493 sc->need_media_probe = 0; 2494 2495 if (sc->connector == MXGE_XFP) { 2496 /* -R is XFP */ 2497 mxge_media_types = mxge_xfp_media_types; 2498 mxge_media_type_entries = sizeof(mxge_xfp_media_types) / 2499 sizeof(mxge_xfp_media_types[0]); 2500 byte = MXGE_XFP_COMPLIANCE_BYTE; 2501 cage_type = "XFP"; 2502 } else if (sc->connector == MXGE_SFP) { 2503 /* -S or -2S is SFP+ */ 2504 mxge_media_types = mxge_sfp_media_types; 2505 mxge_media_type_entries = sizeof(mxge_sfp_media_types) / 2506 sizeof(mxge_sfp_media_types[0]); 2507 cage_type = "SFP+"; 2508 byte = 3; 2509 } else { 2510 /* nothing to do; media type cannot change */ 2511 return; 2512 } 2513 2514 /* 2515 * At this point we know the NIC has an XFP cage, so now we 2516 * try to determine what is in the cage by using the 2517 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2518 * register. We read just one byte, which may take over 2519 * a millisecond 2520 */ 2521 2522 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2523 cmd.data1 = byte; 2524 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2525 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) 2526 if_printf(sc->ifp, "failed to read XFP\n"); 2527 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) 2528 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n"); 2529 if (err != MXGEFW_CMD_OK) 2530 return; 2531 2532 /* Now we wait for the data to be cached */ 2533 cmd.data0 = byte; 2534 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2535 for (ms = 0; err == EBUSY && ms < 50; ms++) { 2536 DELAY(1000); 2537 cmd.data0 = byte; 2538 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2539 } 2540 if (err != MXGEFW_CMD_OK) { 2541 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n", 2542 cage_type, err, ms); 2543 return; 2544 } 2545 2546 if (cmd.data0 == mxge_media_types[0].bitmask) { 2547 if (bootverbose) { 2548 if_printf(sc->ifp, "%s:%s\n", cage_type, 2549 mxge_media_types[0].name); 2550 } 2551 if (sc->current_media != mxge_media_types[0].flag) { 2552 mxge_media_init(sc); 2553 mxge_media_set(sc, mxge_media_types[0].flag); 2554 } 2555 return; 2556 } 2557 for (i = 1; i < mxge_media_type_entries; i++) { 2558 if (cmd.data0 & mxge_media_types[i].bitmask) { 2559 if (bootverbose) { 2560 if_printf(sc->ifp, "%s:%s\n", cage_type, 2561 mxge_media_types[i].name); 2562 } 2563 2564 if (sc->current_media != mxge_media_types[i].flag) { 2565 mxge_media_init(sc); 2566 mxge_media_set(sc, mxge_media_types[i].flag); 2567 } 2568 return; 2569 } 2570 } 2571 if (bootverbose) { 2572 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type, 2573 cmd.data0); 2574 } 2575 } 2576 2577 static void 2578 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats) 2579 { 2580 if (sc->link_state != stats->link_up) { 2581 sc->link_state = stats->link_up; 2582 if (sc->link_state) { 2583 sc->ifp->if_link_state = LINK_STATE_UP; 2584 if_link_state_change(sc->ifp); 2585 if (bootverbose) 2586 if_printf(sc->ifp, "link up\n"); 2587 } else { 2588 sc->ifp->if_link_state = LINK_STATE_DOWN; 2589 if_link_state_change(sc->ifp); 2590 if (bootverbose) 2591 if_printf(sc->ifp, "link down\n"); 2592 } 2593 sc->need_media_probe = 1; 2594 } 2595 2596 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) { 2597 sc->rdma_tags_available = be32toh(stats->rdma_tags_available); 2598 if_printf(sc->ifp, "RDMA timed out! %d tags left\n", 2599 sc->rdma_tags_available); 2600 } 2601 2602 if (stats->link_down) { 2603 sc->down_cnt += stats->link_down; 2604 sc->link_state = 0; 2605 sc->ifp->if_link_state = LINK_STATE_DOWN; 2606 if_link_state_change(sc->ifp); 2607 } 2608 } 2609 2610 static void 2611 mxge_serialize_skipmain(struct mxge_softc *sc) 2612 { 2613 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1); 2614 } 2615 2616 static void 2617 mxge_deserialize_skipmain(struct mxge_softc *sc) 2618 { 2619 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1); 2620 } 2621 2622 static void 2623 mxge_legacy(void *arg) 2624 { 2625 struct mxge_slice_state *ss = arg; 2626 mxge_softc_t *sc = ss->sc; 2627 mcp_irq_data_t *stats = ss->fw_stats; 2628 mxge_tx_ring_t *tx = &ss->tx; 2629 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2630 uint32_t send_done_count; 2631 uint8_t valid; 2632 2633 ASSERT_SERIALIZED(&sc->main_serialize); 2634 2635 #if 0 2636 /* an interrupt on a non-zero slice is implicitly valid 2637 since MSI-X irqs are not shared */ 2638 if (ss != sc->ss) { 2639 mxge_clean_rx_done(rx_done); 2640 *ss->irq_claim = be32toh(3); 2641 return; 2642 } 2643 #endif 2644 2645 /* Make sure the DMA has finished */ 2646 if (!stats->valid) 2647 return; 2648 valid = stats->valid; 2649 2650 /* Lower legacy IRQ */ 2651 *sc->irq_deassert = 0; 2652 if (!mxge_deassert_wait) { 2653 /* Don't wait for conf. that irq is low */ 2654 stats->valid = 0; 2655 } 2656 2657 mxge_serialize_skipmain(sc); 2658 2659 /* 2660 * Loop while waiting for legacy irq deassertion 2661 * XXX do we really want to loop? 2662 */ 2663 do { 2664 /* Check for transmit completes and receives */ 2665 send_done_count = be32toh(stats->send_done_count); 2666 while ((send_done_count != tx->pkt_done) || 2667 (rx_done->entry[rx_done->idx].length != 0)) { 2668 if (send_done_count != tx->pkt_done) { 2669 mxge_tx_done(&sc->arpcom.ac_if, tx, 2670 (int)send_done_count); 2671 } 2672 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data); 2673 send_done_count = be32toh(stats->send_done_count); 2674 } 2675 if (mxge_deassert_wait) 2676 wmb(); 2677 } while (*((volatile uint8_t *)&stats->valid)); 2678 2679 mxge_deserialize_skipmain(sc); 2680 2681 /* Fw link & error stats meaningful only on the first slice */ 2682 if (__predict_false(stats->stats_updated)) 2683 mxge_intr_status(sc, stats); 2684 2685 /* Check to see if we have rx token to pass back */ 2686 if (valid & 0x1) 2687 *ss->irq_claim = be32toh(3); 2688 *(ss->irq_claim + 1) = be32toh(3); 2689 } 2690 2691 static void 2692 mxge_msi(void *arg) 2693 { 2694 struct mxge_slice_state *ss = arg; 2695 mxge_softc_t *sc = ss->sc; 2696 mcp_irq_data_t *stats = ss->fw_stats; 2697 mxge_tx_ring_t *tx = &ss->tx; 2698 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done; 2699 uint32_t send_done_count; 2700 uint8_t valid; 2701 2702 ASSERT_SERIALIZED(&sc->main_serialize); 2703 2704 /* Make sure the DMA has finished */ 2705 if (__predict_false(!stats->valid)) 2706 return; 2707 2708 valid = stats->valid; 2709 stats->valid = 0; 2710 2711 /* Check for receives */ 2712 lwkt_serialize_enter(&ss->rx_data.rx_serialize); 2713 if (rx_done->entry[rx_done->idx].length != 0) 2714 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data); 2715 lwkt_serialize_exit(&ss->rx_data.rx_serialize); 2716 2717 /* 2718 * Check for transmit completes 2719 * 2720 * NOTE: 2721 * Since pkt_done is only changed by mxge_tx_done(), 2722 * which is called only in interrupt handler, the 2723 * check w/o holding tx serializer is MPSAFE. 2724 */ 2725 send_done_count = be32toh(stats->send_done_count); 2726 if (send_done_count != tx->pkt_done) { 2727 lwkt_serialize_enter(&tx->tx_serialize); 2728 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count); 2729 lwkt_serialize_exit(&tx->tx_serialize); 2730 } 2731 2732 if (__predict_false(stats->stats_updated)) 2733 mxge_intr_status(sc, stats); 2734 2735 /* Check to see if we have rx token to pass back */ 2736 if (valid & 0x1) 2737 *ss->irq_claim = be32toh(3); 2738 *(ss->irq_claim + 1) = be32toh(3); 2739 } 2740 2741 static void 2742 mxge_init(void *arg) 2743 { 2744 struct mxge_softc *sc = arg; 2745 2746 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp); 2747 if ((sc->ifp->if_flags & IFF_RUNNING) == 0) 2748 mxge_open(sc); 2749 } 2750 2751 static void 2752 mxge_free_slice_mbufs(struct mxge_slice_state *ss) 2753 { 2754 int i; 2755 2756 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2757 if (ss->rx_data.rx_big.info[i].m == NULL) 2758 continue; 2759 bus_dmamap_unload(ss->rx_data.rx_big.dmat, 2760 ss->rx_data.rx_big.info[i].map); 2761 m_freem(ss->rx_data.rx_big.info[i].m); 2762 ss->rx_data.rx_big.info[i].m = NULL; 2763 } 2764 2765 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2766 if (ss->rx_data.rx_small.info[i].m == NULL) 2767 continue; 2768 bus_dmamap_unload(ss->rx_data.rx_small.dmat, 2769 ss->rx_data.rx_small.info[i].map); 2770 m_freem(ss->rx_data.rx_small.info[i].m); 2771 ss->rx_data.rx_small.info[i].m = NULL; 2772 } 2773 2774 /* Transmit ring used only on the first slice */ 2775 if (ss->tx.info == NULL) 2776 return; 2777 2778 for (i = 0; i <= ss->tx.mask; i++) { 2779 if (ss->tx.info[i].m == NULL) 2780 continue; 2781 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map); 2782 m_freem(ss->tx.info[i].m); 2783 ss->tx.info[i].m = NULL; 2784 } 2785 } 2786 2787 static void 2788 mxge_free_mbufs(mxge_softc_t *sc) 2789 { 2790 int slice; 2791 2792 for (slice = 0; slice < sc->num_slices; slice++) 2793 mxge_free_slice_mbufs(&sc->ss[slice]); 2794 } 2795 2796 static void 2797 mxge_free_slice_rings(struct mxge_slice_state *ss) 2798 { 2799 int i; 2800 2801 if (ss->rx_data.rx_done.entry != NULL) { 2802 mxge_dma_free(&ss->rx_done_dma); 2803 ss->rx_data.rx_done.entry = NULL; 2804 } 2805 2806 if (ss->tx.req_list != NULL) { 2807 kfree(ss->tx.req_list, M_DEVBUF); 2808 ss->tx.req_list = NULL; 2809 } 2810 2811 if (ss->tx.seg_list != NULL) { 2812 kfree(ss->tx.seg_list, M_DEVBUF); 2813 ss->tx.seg_list = NULL; 2814 } 2815 2816 if (ss->rx_data.rx_small.shadow != NULL) { 2817 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF); 2818 ss->rx_data.rx_small.shadow = NULL; 2819 } 2820 2821 if (ss->rx_data.rx_big.shadow != NULL) { 2822 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF); 2823 ss->rx_data.rx_big.shadow = NULL; 2824 } 2825 2826 if (ss->tx.info != NULL) { 2827 if (ss->tx.dmat != NULL) { 2828 for (i = 0; i <= ss->tx.mask; i++) { 2829 bus_dmamap_destroy(ss->tx.dmat, 2830 ss->tx.info[i].map); 2831 } 2832 bus_dma_tag_destroy(ss->tx.dmat); 2833 } 2834 kfree(ss->tx.info, M_DEVBUF); 2835 ss->tx.info = NULL; 2836 } 2837 2838 if (ss->rx_data.rx_small.info != NULL) { 2839 if (ss->rx_data.rx_small.dmat != NULL) { 2840 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2841 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2842 ss->rx_data.rx_small.info[i].map); 2843 } 2844 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2845 ss->rx_data.rx_small.extra_map); 2846 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2847 } 2848 kfree(ss->rx_data.rx_small.info, M_DEVBUF); 2849 ss->rx_data.rx_small.info = NULL; 2850 } 2851 2852 if (ss->rx_data.rx_big.info != NULL) { 2853 if (ss->rx_data.rx_big.dmat != NULL) { 2854 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2855 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2856 ss->rx_data.rx_big.info[i].map); 2857 } 2858 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2859 ss->rx_data.rx_big.extra_map); 2860 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2861 } 2862 kfree(ss->rx_data.rx_big.info, M_DEVBUF); 2863 ss->rx_data.rx_big.info = NULL; 2864 } 2865 } 2866 2867 static void 2868 mxge_free_rings(mxge_softc_t *sc) 2869 { 2870 int slice; 2871 2872 if (sc->ss == NULL) 2873 return; 2874 2875 for (slice = 0; slice < sc->num_slices; slice++) 2876 mxge_free_slice_rings(&sc->ss[slice]); 2877 } 2878 2879 static int 2880 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 2881 int tx_ring_entries) 2882 { 2883 mxge_softc_t *sc = ss->sc; 2884 size_t bytes; 2885 int err, i; 2886 2887 /* 2888 * Allocate per-slice receive resources 2889 */ 2890 2891 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask = 2892 rx_ring_entries - 1; 2893 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1; 2894 2895 /* Allocate the rx shadow rings */ 2896 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow); 2897 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2898 2899 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow); 2900 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2901 2902 /* Allocate the rx host info rings */ 2903 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info); 2904 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2905 2906 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info); 2907 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2908 2909 /* Allocate the rx busdma resources */ 2910 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2911 1, /* alignment */ 2912 4096, /* boundary */ 2913 BUS_SPACE_MAXADDR, /* low */ 2914 BUS_SPACE_MAXADDR, /* high */ 2915 NULL, NULL, /* filter */ 2916 MHLEN, /* maxsize */ 2917 1, /* num segs */ 2918 MHLEN, /* maxsegsize */ 2919 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 2920 /* flags */ 2921 &ss->rx_data.rx_small.dmat); /* tag */ 2922 if (err != 0) { 2923 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2924 err); 2925 return err; 2926 } 2927 2928 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK, 2929 &ss->rx_data.rx_small.extra_map); 2930 if (err != 0) { 2931 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err); 2932 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2933 ss->rx_data.rx_small.dmat = NULL; 2934 return err; 2935 } 2936 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 2937 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, 2938 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map); 2939 if (err != 0) { 2940 int j; 2941 2942 device_printf(sc->dev, "Err %d rx_small dmamap\n", err); 2943 2944 for (j = 0; j < i; ++j) { 2945 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2946 ss->rx_data.rx_small.info[j].map); 2947 } 2948 bus_dmamap_destroy(ss->rx_data.rx_small.dmat, 2949 ss->rx_data.rx_small.extra_map); 2950 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat); 2951 ss->rx_data.rx_small.dmat = NULL; 2952 return err; 2953 } 2954 } 2955 2956 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2957 1, /* alignment */ 2958 4096, /* boundary */ 2959 BUS_SPACE_MAXADDR, /* low */ 2960 BUS_SPACE_MAXADDR, /* high */ 2961 NULL, NULL, /* filter */ 2962 4096, /* maxsize */ 2963 1, /* num segs */ 2964 4096, /* maxsegsize*/ 2965 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 2966 /* flags */ 2967 &ss->rx_data.rx_big.dmat); /* tag */ 2968 if (err != 0) { 2969 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2970 err); 2971 return err; 2972 } 2973 2974 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 2975 &ss->rx_data.rx_big.extra_map); 2976 if (err != 0) { 2977 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err); 2978 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2979 ss->rx_data.rx_big.dmat = NULL; 2980 return err; 2981 } 2982 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 2983 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK, 2984 &ss->rx_data.rx_big.info[i].map); 2985 if (err != 0) { 2986 int j; 2987 2988 device_printf(sc->dev, "Err %d rx_big dmamap\n", err); 2989 for (j = 0; j < i; ++j) { 2990 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2991 ss->rx_data.rx_big.info[j].map); 2992 } 2993 bus_dmamap_destroy(ss->rx_data.rx_big.dmat, 2994 ss->rx_data.rx_big.extra_map); 2995 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat); 2996 ss->rx_data.rx_big.dmat = NULL; 2997 return err; 2998 } 2999 } 3000 3001 /* 3002 * Now allocate TX resources 3003 */ 3004 3005 ss->tx.mask = tx_ring_entries - 1; 3006 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3007 3008 /* 3009 * Allocate the tx request copy block; MUST be at least 8 bytes 3010 * aligned 3011 */ 3012 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4); 3013 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes), 3014 M_DEVBUF, M_WAITOK); 3015 3016 /* Allocate the tx busdma segment list */ 3017 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc; 3018 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK); 3019 3020 /* Allocate the tx host info ring */ 3021 bytes = tx_ring_entries * sizeof(*ss->tx.info); 3022 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3023 3024 /* Allocate the tx busdma resources */ 3025 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3026 1, /* alignment */ 3027 sc->tx_boundary, /* boundary */ 3028 BUS_SPACE_MAXADDR, /* low */ 3029 BUS_SPACE_MAXADDR, /* high */ 3030 NULL, NULL, /* filter */ 3031 IP_MAXPACKET + 3032 sizeof(struct ether_vlan_header), 3033 /* maxsize */ 3034 ss->tx.max_desc - 2, /* num segs */ 3035 sc->tx_boundary, /* maxsegsz */ 3036 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | 3037 BUS_DMA_ONEBPAGE, /* flags */ 3038 &ss->tx.dmat); /* tag */ 3039 if (err != 0) { 3040 device_printf(sc->dev, "Err %d allocating tx dmat\n", err); 3041 return err; 3042 } 3043 3044 /* 3045 * Now use these tags to setup DMA maps for each slot in the ring 3046 */ 3047 for (i = 0; i <= ss->tx.mask; i++) { 3048 err = bus_dmamap_create(ss->tx.dmat, 3049 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map); 3050 if (err != 0) { 3051 int j; 3052 3053 device_printf(sc->dev, "Err %d tx dmamap\n", err); 3054 for (j = 0; j < i; ++j) { 3055 bus_dmamap_destroy(ss->tx.dmat, 3056 ss->tx.info[j].map); 3057 } 3058 bus_dma_tag_destroy(ss->tx.dmat); 3059 ss->tx.dmat = NULL; 3060 return err; 3061 } 3062 } 3063 return 0; 3064 } 3065 3066 static int 3067 mxge_alloc_rings(mxge_softc_t *sc) 3068 { 3069 mxge_cmd_t cmd; 3070 int tx_ring_size; 3071 int tx_ring_entries, rx_ring_entries; 3072 int err, slice; 3073 3074 /* Get ring sizes */ 3075 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3076 if (err != 0) { 3077 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3078 return err; 3079 } 3080 tx_ring_size = cmd.data0; 3081 3082 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t); 3083 rx_ring_entries = sc->rx_intr_slots / 2; 3084 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1); 3085 ifq_set_ready(&sc->ifp->if_snd); 3086 3087 for (slice = 0; slice < sc->num_slices; slice++) { 3088 err = mxge_alloc_slice_rings(&sc->ss[slice], 3089 rx_ring_entries, tx_ring_entries); 3090 if (err != 0) { 3091 device_printf(sc->dev, 3092 "alloc %d slice rings failed\n", slice); 3093 return err; 3094 } 3095 } 3096 return 0; 3097 } 3098 3099 static void 3100 mxge_choose_params(int mtu, int *cl_size) 3101 { 3102 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD; 3103 3104 if (bufsize < MCLBYTES) { 3105 *cl_size = MCLBYTES; 3106 } else { 3107 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu)); 3108 *cl_size = MJUMPAGESIZE; 3109 } 3110 } 3111 3112 static int 3113 mxge_slice_open(struct mxge_slice_state *ss, int cl_size) 3114 { 3115 mxge_cmd_t cmd; 3116 int err, i, slice; 3117 3118 slice = ss - ss->sc->ss; 3119 3120 /* 3121 * Get the lanai pointers to the send and receive rings 3122 */ 3123 err = 0; 3124 #ifndef IFNET_BUF_RING 3125 /* We currently only send from the first slice */ 3126 if (slice == 0) { 3127 #endif 3128 cmd.data0 = slice; 3129 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3130 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *) 3131 (ss->sc->sram + cmd.data0); 3132 ss->tx.send_go = (volatile uint32_t *) 3133 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3134 ss->tx.send_stop = (volatile uint32_t *) 3135 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3136 #ifndef IFNET_BUF_RING 3137 } 3138 #endif 3139 3140 cmd.data0 = slice; 3141 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3142 ss->rx_data.rx_small.lanai = 3143 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3144 3145 cmd.data0 = slice; 3146 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3147 ss->rx_data.rx_big.lanai = 3148 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0); 3149 3150 if (err != 0) { 3151 if_printf(ss->sc->ifp, 3152 "failed to get ring sizes or locations\n"); 3153 return EIO; 3154 } 3155 3156 /* 3157 * Stock small receive ring 3158 */ 3159 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) { 3160 err = mxge_get_buf_small(&ss->rx_data.rx_small, 3161 ss->rx_data.rx_small.info[i].map, i, TRUE); 3162 if (err) { 3163 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i, 3164 ss->rx_data.rx_small.mask + 1); 3165 return ENOMEM; 3166 } 3167 } 3168 3169 /* 3170 * Stock big receive ring 3171 */ 3172 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3173 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff; 3174 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff; 3175 } 3176 3177 ss->rx_data.rx_big.cl_size = cl_size; 3178 3179 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) { 3180 err = mxge_get_buf_big(&ss->rx_data.rx_big, 3181 ss->rx_data.rx_big.info[i].map, i, TRUE); 3182 if (err) { 3183 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i, 3184 ss->rx_data.rx_big.mask + 1); 3185 return ENOMEM; 3186 } 3187 } 3188 return 0; 3189 } 3190 3191 static int 3192 mxge_open(mxge_softc_t *sc) 3193 { 3194 struct ifnet *ifp = sc->ifp; 3195 mxge_cmd_t cmd; 3196 int err, slice, cl_size, i; 3197 bus_addr_t bus; 3198 volatile uint8_t *itable; 3199 struct mxge_slice_state *ss; 3200 3201 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3202 3203 /* Copy the MAC address in case it was overridden */ 3204 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN); 3205 3206 err = mxge_reset(sc, 1); 3207 if (err != 0) { 3208 if_printf(ifp, "failed to reset\n"); 3209 return EIO; 3210 } 3211 3212 if (sc->num_slices > 1) { 3213 /* Setup the indirection table */ 3214 cmd.data0 = sc->num_slices; 3215 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd); 3216 3217 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd); 3218 if (err != 0) { 3219 if_printf(ifp, "failed to setup rss tables\n"); 3220 return err; 3221 } 3222 3223 /* Just enable an identity mapping */ 3224 itable = sc->sram + cmd.data0; 3225 for (i = 0; i < sc->num_slices; i++) 3226 itable[i] = (uint8_t)i; 3227 3228 cmd.data0 = 1; 3229 cmd.data1 = MXGEFW_RSS_HASH_TYPE_TCP_IPV4; 3230 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3231 if (err != 0) { 3232 if_printf(ifp, "failed to enable slices\n"); 3233 return err; 3234 } 3235 } 3236 3237 cmd.data0 = MXGEFW_TSO_MODE_NDIS; 3238 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd); 3239 if (err) { 3240 /* 3241 * Can't change TSO mode to NDIS, never allow TSO then 3242 */ 3243 if_printf(ifp, "failed to set TSO mode\n"); 3244 ifp->if_capenable &= ~IFCAP_TSO; 3245 ifp->if_capabilities &= ~IFCAP_TSO; 3246 ifp->if_hwassist &= ~CSUM_TSO; 3247 } 3248 3249 mxge_choose_params(ifp->if_mtu, &cl_size); 3250 3251 cmd.data0 = 1; 3252 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd); 3253 /* 3254 * Error is only meaningful if we're trying to set 3255 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 3256 */ 3257 3258 /* 3259 * Give the firmware the mtu and the big and small buffer 3260 * sizes. The firmware wants the big buf size to be a power 3261 * of two. Luckily, DragonFly's clusters are powers of two 3262 */ 3263 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3264 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3265 3266 cmd.data0 = MXGE_RX_SMALL_BUFLEN; 3267 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd); 3268 3269 cmd.data0 = cl_size; 3270 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3271 3272 if (err != 0) { 3273 if_printf(ifp, "failed to setup params\n"); 3274 goto abort; 3275 } 3276 3277 /* Now give him the pointer to the stats block */ 3278 for (slice = 0; slice < sc->num_slices; slice++) { 3279 ss = &sc->ss[slice]; 3280 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3281 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr); 3282 cmd.data2 = sizeof(struct mcp_irq_data); 3283 cmd.data2 |= (slice << 16); 3284 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3285 } 3286 3287 if (err != 0) { 3288 bus = sc->ss->fw_stats_dma.dmem_busaddr; 3289 bus += offsetof(struct mcp_irq_data, send_done_count); 3290 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3291 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3292 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3293 &cmd); 3294 3295 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3296 sc->fw_multicast_support = 0; 3297 } else { 3298 sc->fw_multicast_support = 1; 3299 } 3300 3301 if (err != 0) { 3302 if_printf(ifp, "failed to setup params\n"); 3303 goto abort; 3304 } 3305 3306 for (slice = 0; slice < sc->num_slices; slice++) { 3307 err = mxge_slice_open(&sc->ss[slice], cl_size); 3308 if (err != 0) { 3309 if_printf(ifp, "couldn't open slice %d\n", slice); 3310 goto abort; 3311 } 3312 } 3313 3314 /* Finally, start the firmware running */ 3315 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3316 if (err) { 3317 if_printf(ifp, "Couldn't bring up link\n"); 3318 goto abort; 3319 } 3320 ifp->if_flags |= IFF_RUNNING; 3321 ifq_clr_oactive(&ifp->if_snd); 3322 ifp->if_timer = 0; 3323 3324 return 0; 3325 3326 abort: 3327 mxge_free_mbufs(sc); 3328 return err; 3329 } 3330 3331 static void 3332 mxge_close(mxge_softc_t *sc, int down) 3333 { 3334 struct ifnet *ifp = sc->ifp; 3335 mxge_cmd_t cmd; 3336 int err, old_down_cnt; 3337 3338 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3339 3340 ifp->if_flags &= ~IFF_RUNNING; 3341 ifq_clr_oactive(&ifp->if_snd); 3342 ifp->if_timer = 0; 3343 3344 if (!down) { 3345 old_down_cnt = sc->down_cnt; 3346 wmb(); 3347 3348 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3349 if (err) 3350 if_printf(ifp, "Couldn't bring down link\n"); 3351 3352 if (old_down_cnt == sc->down_cnt) { 3353 /* Wait for down irq */ 3354 ifnet_deserialize_all(ifp); 3355 DELAY(10 * sc->intr_coal_delay); 3356 ifnet_serialize_all(ifp); 3357 } 3358 3359 wmb(); 3360 if (old_down_cnt == sc->down_cnt) 3361 if_printf(ifp, "never got down irq\n"); 3362 } 3363 mxge_free_mbufs(sc); 3364 } 3365 3366 static void 3367 mxge_setup_cfg_space(mxge_softc_t *sc) 3368 { 3369 device_t dev = sc->dev; 3370 int reg; 3371 uint16_t lnk, pectl; 3372 3373 /* Find the PCIe link width and set max read request to 4KB */ 3374 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3375 lnk = pci_read_config(dev, reg + 0x12, 2); 3376 sc->link_width = (lnk >> 4) & 0x3f; 3377 3378 if (sc->pectl == 0) { 3379 pectl = pci_read_config(dev, reg + 0x8, 2); 3380 pectl = (pectl & ~0x7000) | (5 << 12); 3381 pci_write_config(dev, reg + 0x8, pectl, 2); 3382 sc->pectl = pectl; 3383 } else { 3384 /* Restore saved pectl after watchdog reset */ 3385 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3386 } 3387 } 3388 3389 /* Enable DMA and memory space access */ 3390 pci_enable_busmaster(dev); 3391 } 3392 3393 static uint32_t 3394 mxge_read_reboot(mxge_softc_t *sc) 3395 { 3396 device_t dev = sc->dev; 3397 uint32_t vs; 3398 3399 /* Find the vendor specific offset */ 3400 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3401 if_printf(sc->ifp, "could not find vendor specific offset\n"); 3402 return (uint32_t)-1; 3403 } 3404 /* Enable read32 mode */ 3405 pci_write_config(dev, vs + 0x10, 0x3, 1); 3406 /* Tell NIC which register to read */ 3407 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3408 return pci_read_config(dev, vs + 0x14, 4); 3409 } 3410 3411 static void 3412 mxge_watchdog_reset(mxge_softc_t *sc) 3413 { 3414 struct pci_devinfo *dinfo; 3415 int err, running; 3416 uint32_t reboot; 3417 uint16_t cmd; 3418 3419 err = ENXIO; 3420 3421 if_printf(sc->ifp, "Watchdog reset!\n"); 3422 3423 /* 3424 * Check to see if the NIC rebooted. If it did, then all of 3425 * PCI config space has been reset, and things like the 3426 * busmaster bit will be zero. If this is the case, then we 3427 * must restore PCI config space before the NIC can be used 3428 * again 3429 */ 3430 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3431 if (cmd == 0xffff) { 3432 /* 3433 * Maybe the watchdog caught the NIC rebooting; wait 3434 * up to 100ms for it to finish. If it does not come 3435 * back, then give up 3436 */ 3437 DELAY(1000*100); 3438 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3439 if (cmd == 0xffff) 3440 if_printf(sc->ifp, "NIC disappeared!\n"); 3441 } 3442 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3443 /* Print the reboot status */ 3444 reboot = mxge_read_reboot(sc); 3445 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot); 3446 3447 running = sc->ifp->if_flags & IFF_RUNNING; 3448 if (running) { 3449 /* 3450 * Quiesce NIC so that TX routines will not try to 3451 * xmit after restoration of BAR 3452 */ 3453 3454 /* Mark the link as down */ 3455 if (sc->link_state) { 3456 sc->ifp->if_link_state = LINK_STATE_DOWN; 3457 if_link_state_change(sc->ifp); 3458 } 3459 mxge_close(sc, 1); 3460 } 3461 /* Restore PCI configuration space */ 3462 dinfo = device_get_ivars(sc->dev); 3463 pci_cfg_restore(sc->dev, dinfo); 3464 3465 /* And redo any changes we made to our config space */ 3466 mxge_setup_cfg_space(sc); 3467 3468 /* Reload f/w */ 3469 err = mxge_load_firmware(sc, 0); 3470 if (err) 3471 if_printf(sc->ifp, "Unable to re-load f/w\n"); 3472 if (running && !err) { 3473 err = mxge_open(sc); 3474 if_devstart_sched(sc->ifp); 3475 } 3476 sc->watchdog_resets++; 3477 } else { 3478 if_printf(sc->ifp, "NIC did not reboot, not resetting\n"); 3479 err = 0; 3480 } 3481 if (err) { 3482 if_printf(sc->ifp, "watchdog reset failed\n"); 3483 } else { 3484 if (sc->dying == 2) 3485 sc->dying = 0; 3486 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3487 } 3488 } 3489 3490 static void 3491 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3492 { 3493 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice); 3494 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3495 tx->req, tx->done, tx->queue_active); 3496 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n", 3497 tx->activate, tx->deactivate); 3498 if_printf(sc->ifp, "pkt_done=%d fw=%d\n", 3499 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count)); 3500 } 3501 3502 static u_long 3503 mxge_update_stats(mxge_softc_t *sc) 3504 { 3505 u_long ipackets, opackets, pkts; 3506 3507 IFNET_STAT_GET(sc->ifp, ipackets, ipackets); 3508 IFNET_STAT_GET(sc->ifp, opackets, opackets); 3509 3510 pkts = ipackets - sc->ipackets; 3511 pkts += opackets - sc->opackets; 3512 3513 sc->ipackets = ipackets; 3514 sc->opackets = opackets; 3515 3516 return pkts; 3517 } 3518 3519 static void 3520 mxge_tick(void *arg) 3521 { 3522 mxge_softc_t *sc = arg; 3523 u_long pkts = 0; 3524 int err = 0; 3525 int ticks; 3526 3527 lwkt_serialize_enter(&sc->main_serialize); 3528 3529 ticks = mxge_ticks; 3530 if (sc->ifp->if_flags & IFF_RUNNING) { 3531 /* Aggregate stats from different slices */ 3532 pkts = mxge_update_stats(sc); 3533 if (sc->need_media_probe) 3534 mxge_media_probe(sc); 3535 } 3536 if (pkts == 0) { 3537 uint16_t cmd; 3538 3539 /* Ensure NIC did not suffer h/w fault while idle */ 3540 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3541 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3542 sc->dying = 2; 3543 mxge_serialize_skipmain(sc); 3544 mxge_watchdog_reset(sc); 3545 mxge_deserialize_skipmain(sc); 3546 err = ENXIO; 3547 } 3548 3549 /* Look less often if NIC is idle */ 3550 ticks *= 4; 3551 } 3552 3553 if (err == 0) 3554 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3555 3556 lwkt_serialize_exit(&sc->main_serialize); 3557 } 3558 3559 static int 3560 mxge_media_change(struct ifnet *ifp) 3561 { 3562 return EINVAL; 3563 } 3564 3565 static int 3566 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3567 { 3568 struct ifnet *ifp = sc->ifp; 3569 int real_mtu, old_mtu; 3570 int err = 0; 3571 3572 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN; 3573 if (mtu > sc->max_mtu || real_mtu < 60) 3574 return EINVAL; 3575 3576 old_mtu = ifp->if_mtu; 3577 ifp->if_mtu = mtu; 3578 if (ifp->if_flags & IFF_RUNNING) { 3579 mxge_close(sc, 0); 3580 err = mxge_open(sc); 3581 if (err != 0) { 3582 ifp->if_mtu = old_mtu; 3583 mxge_close(sc, 0); 3584 mxge_open(sc); 3585 } 3586 } 3587 return err; 3588 } 3589 3590 static void 3591 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3592 { 3593 mxge_softc_t *sc = ifp->if_softc; 3594 3595 3596 if (sc == NULL) 3597 return; 3598 ifmr->ifm_status = IFM_AVALID; 3599 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 3600 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3601 ifmr->ifm_active |= sc->current_media; 3602 } 3603 3604 static int 3605 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, 3606 struct ucred *cr __unused) 3607 { 3608 mxge_softc_t *sc = ifp->if_softc; 3609 struct ifreq *ifr = (struct ifreq *)data; 3610 int err, mask; 3611 3612 ASSERT_IFNET_SERIALIZED_ALL(ifp); 3613 err = 0; 3614 3615 switch (command) { 3616 case SIOCSIFMTU: 3617 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3618 break; 3619 3620 case SIOCSIFFLAGS: 3621 if (sc->dying) 3622 return EINVAL; 3623 3624 if (ifp->if_flags & IFF_UP) { 3625 if (!(ifp->if_flags & IFF_RUNNING)) { 3626 err = mxge_open(sc); 3627 } else { 3628 /* 3629 * Take care of PROMISC and ALLMULTI 3630 * flag changes 3631 */ 3632 mxge_change_promisc(sc, 3633 ifp->if_flags & IFF_PROMISC); 3634 mxge_set_multicast_list(sc); 3635 } 3636 } else { 3637 if (ifp->if_flags & IFF_RUNNING) 3638 mxge_close(sc, 0); 3639 } 3640 break; 3641 3642 case SIOCADDMULTI: 3643 case SIOCDELMULTI: 3644 mxge_set_multicast_list(sc); 3645 break; 3646 3647 case SIOCSIFCAP: 3648 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3649 if (mask & IFCAP_TXCSUM) { 3650 ifp->if_capenable ^= IFCAP_TXCSUM; 3651 if (ifp->if_capenable & IFCAP_TXCSUM) 3652 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; 3653 else 3654 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 3655 } 3656 if (mask & IFCAP_TSO) { 3657 ifp->if_capenable ^= IFCAP_TSO; 3658 if (ifp->if_capenable & IFCAP_TSO) 3659 ifp->if_hwassist |= CSUM_TSO; 3660 else 3661 ifp->if_hwassist &= ~CSUM_TSO; 3662 } 3663 if (mask & IFCAP_RXCSUM) 3664 ifp->if_capenable ^= IFCAP_RXCSUM; 3665 if (mask & IFCAP_VLAN_HWTAGGING) 3666 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3667 break; 3668 3669 case SIOCGIFMEDIA: 3670 mxge_media_probe(sc); 3671 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3672 &sc->media, command); 3673 break; 3674 3675 default: 3676 err = ether_ioctl(ifp, command, data); 3677 break; 3678 } 3679 return err; 3680 } 3681 3682 static void 3683 mxge_fetch_tunables(mxge_softc_t *sc) 3684 { 3685 sc->intr_coal_delay = mxge_intr_coal_delay; 3686 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000)) 3687 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY; 3688 3689 /* XXX */ 3690 if (mxge_ticks == 0) 3691 mxge_ticks = hz / 2; 3692 3693 sc->pause = mxge_flow_control; 3694 3695 sc->throttle = mxge_throttle; 3696 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE) 3697 sc->throttle = MXGE_MAX_THROTTLE; 3698 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE) 3699 sc->throttle = MXGE_MIN_THROTTLE; 3700 } 3701 3702 static void 3703 mxge_free_slices(mxge_softc_t *sc) 3704 { 3705 struct mxge_slice_state *ss; 3706 int i; 3707 3708 if (sc->ss == NULL) 3709 return; 3710 3711 for (i = 0; i < sc->num_slices; i++) { 3712 ss = &sc->ss[i]; 3713 if (ss->fw_stats != NULL) { 3714 mxge_dma_free(&ss->fw_stats_dma); 3715 ss->fw_stats = NULL; 3716 } 3717 if (ss->rx_data.rx_done.entry != NULL) { 3718 mxge_dma_free(&ss->rx_done_dma); 3719 ss->rx_data.rx_done.entry = NULL; 3720 } 3721 } 3722 kfree(sc->ss, M_DEVBUF); 3723 sc->ss = NULL; 3724 } 3725 3726 static int 3727 mxge_alloc_slices(mxge_softc_t *sc) 3728 { 3729 mxge_cmd_t cmd; 3730 struct mxge_slice_state *ss; 3731 size_t bytes; 3732 int err, i, rx_ring_size; 3733 3734 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3735 if (err != 0) { 3736 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3737 return err; 3738 } 3739 rx_ring_size = cmd.data0; 3740 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t)); 3741 3742 bytes = sizeof(*sc->ss) * sc->num_slices; 3743 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO); 3744 3745 for (i = 0; i < sc->num_slices; i++) { 3746 ss = &sc->ss[i]; 3747 3748 ss->sc = sc; 3749 3750 lwkt_serialize_init(&ss->rx_data.rx_serialize); 3751 lwkt_serialize_init(&ss->tx.tx_serialize); 3752 3753 /* 3754 * Allocate per-slice rx interrupt queues 3755 * XXX assume 4bytes mcp_slot 3756 */ 3757 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t); 3758 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096); 3759 if (err != 0) { 3760 device_printf(sc->dev, 3761 "alloc %d slice rx_done failed\n", i); 3762 return err; 3763 } 3764 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr; 3765 3766 /* 3767 * Allocate the per-slice firmware stats 3768 */ 3769 bytes = sizeof(*ss->fw_stats); 3770 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 3771 sizeof(*ss->fw_stats), 64); 3772 if (err != 0) { 3773 device_printf(sc->dev, 3774 "alloc %d fw_stats failed\n", i); 3775 return err; 3776 } 3777 ss->fw_stats = ss->fw_stats_dma.dmem_addr; 3778 } 3779 return 0; 3780 } 3781 3782 static void 3783 mxge_slice_probe(mxge_softc_t *sc) 3784 { 3785 mxge_cmd_t cmd; 3786 const char *old_fw; 3787 int msix_cnt, status, max_intr_slots; 3788 3789 sc->num_slices = 1; 3790 3791 /* 3792 * XXX 3793 * 3794 * Don't enable multiple slices if they are not enabled, 3795 * or if this is not an SMP system 3796 */ 3797 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2) 3798 return; 3799 3800 /* see how many MSI-X interrupts are available */ 3801 msix_cnt = pci_msix_count(sc->dev); 3802 if (msix_cnt < 2) 3803 return; 3804 3805 /* now load the slice aware firmware see what it supports */ 3806 old_fw = sc->fw_name; 3807 if (old_fw == mxge_fw_aligned) 3808 sc->fw_name = mxge_fw_rss_aligned; 3809 else 3810 sc->fw_name = mxge_fw_rss_unaligned; 3811 status = mxge_load_firmware(sc, 0); 3812 if (status != 0) { 3813 device_printf(sc->dev, "Falling back to a single slice\n"); 3814 return; 3815 } 3816 3817 /* try to send a reset command to the card to see if it 3818 is alive */ 3819 memset(&cmd, 0, sizeof (cmd)); 3820 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 3821 if (status != 0) { 3822 device_printf(sc->dev, "failed reset\n"); 3823 goto abort_with_fw; 3824 } 3825 3826 /* get rx ring size */ 3827 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 3828 if (status != 0) { 3829 device_printf(sc->dev, "Cannot determine rx ring size\n"); 3830 goto abort_with_fw; 3831 } 3832 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 3833 3834 /* tell it the size of the interrupt queues */ 3835 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 3836 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 3837 if (status != 0) { 3838 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3839 goto abort_with_fw; 3840 } 3841 3842 /* ask the maximum number of slices it supports */ 3843 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 3844 if (status != 0) { 3845 device_printf(sc->dev, 3846 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 3847 goto abort_with_fw; 3848 } 3849 sc->num_slices = cmd.data0; 3850 if (sc->num_slices > msix_cnt) 3851 sc->num_slices = msix_cnt; 3852 3853 if (mxge_max_slices == -1) { 3854 /* cap to number of CPUs in system */ 3855 if (sc->num_slices > ncpus) 3856 sc->num_slices = ncpus; 3857 } else { 3858 if (sc->num_slices > mxge_max_slices) 3859 sc->num_slices = mxge_max_slices; 3860 } 3861 /* make sure it is a power of two */ 3862 while (sc->num_slices & (sc->num_slices - 1)) 3863 sc->num_slices--; 3864 3865 if (bootverbose) 3866 device_printf(sc->dev, "using %d slices\n", 3867 sc->num_slices); 3868 3869 return; 3870 3871 abort_with_fw: 3872 sc->fw_name = old_fw; 3873 (void) mxge_load_firmware(sc, 0); 3874 } 3875 3876 #if 0 3877 static int 3878 mxge_add_msix_irqs(mxge_softc_t *sc) 3879 { 3880 size_t bytes; 3881 int count, err, i, rid; 3882 3883 rid = PCIR_BAR(2); 3884 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 3885 &rid, RF_ACTIVE); 3886 3887 if (sc->msix_table_res == NULL) { 3888 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 3889 return ENXIO; 3890 } 3891 3892 count = sc->num_slices; 3893 err = pci_alloc_msix(sc->dev, &count); 3894 if (err != 0) { 3895 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 3896 "err = %d \n", sc->num_slices, err); 3897 goto abort_with_msix_table; 3898 } 3899 if (count < sc->num_slices) { 3900 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 3901 count, sc->num_slices); 3902 device_printf(sc->dev, 3903 "Try setting hw.mxge.max_slices to %d\n", 3904 count); 3905 err = ENOSPC; 3906 goto abort_with_msix; 3907 } 3908 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 3909 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3910 if (sc->msix_irq_res == NULL) { 3911 err = ENOMEM; 3912 goto abort_with_msix; 3913 } 3914 3915 for (i = 0; i < sc->num_slices; i++) { 3916 rid = i + 1; 3917 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 3918 SYS_RES_IRQ, 3919 &rid, RF_ACTIVE); 3920 if (sc->msix_irq_res[i] == NULL) { 3921 device_printf(sc->dev, "couldn't allocate IRQ res" 3922 " for message %d\n", i); 3923 err = ENXIO; 3924 goto abort_with_res; 3925 } 3926 } 3927 3928 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 3929 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 3930 3931 for (i = 0; i < sc->num_slices; i++) { 3932 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 3933 INTR_MPSAFE, 3934 mxge_intr, &sc->ss[i], &sc->msix_ih[i], 3935 sc->ifp->if_serializer); 3936 if (err != 0) { 3937 device_printf(sc->dev, "couldn't setup intr for " 3938 "message %d\n", i); 3939 goto abort_with_intr; 3940 } 3941 } 3942 3943 if (bootverbose) { 3944 device_printf(sc->dev, "using %d msix IRQs:", 3945 sc->num_slices); 3946 for (i = 0; i < sc->num_slices; i++) 3947 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i])); 3948 kprintf("\n"); 3949 } 3950 return (0); 3951 3952 abort_with_intr: 3953 for (i = 0; i < sc->num_slices; i++) { 3954 if (sc->msix_ih[i] != NULL) { 3955 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 3956 sc->msix_ih[i]); 3957 sc->msix_ih[i] = NULL; 3958 } 3959 } 3960 kfree(sc->msix_ih, M_DEVBUF); 3961 3962 3963 abort_with_res: 3964 for (i = 0; i < sc->num_slices; i++) { 3965 rid = i + 1; 3966 if (sc->msix_irq_res[i] != NULL) 3967 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 3968 sc->msix_irq_res[i]); 3969 sc->msix_irq_res[i] = NULL; 3970 } 3971 kfree(sc->msix_irq_res, M_DEVBUF); 3972 3973 3974 abort_with_msix: 3975 pci_release_msi(sc->dev); 3976 3977 abort_with_msix_table: 3978 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 3979 sc->msix_table_res); 3980 3981 return err; 3982 } 3983 #endif 3984 3985 static int 3986 mxge_add_single_irq(mxge_softc_t *sc) 3987 { 3988 driver_intr_t *intr_func; 3989 u_int irq_flags; 3990 3991 sc->irq_type = pci_alloc_1intr(sc->dev, mxge_msi_enable, 3992 &sc->irq_rid, &irq_flags); 3993 3994 sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, 3995 &sc->irq_rid, irq_flags); 3996 if (sc->irq_res == NULL) { 3997 device_printf(sc->dev, "could not alloc interrupt\n"); 3998 return ENXIO; 3999 } 4000 4001 if (sc->irq_type == PCI_INTR_TYPE_LEGACY) 4002 intr_func = mxge_legacy; 4003 else 4004 intr_func = mxge_msi; 4005 4006 return bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE, 4007 intr_func, &sc->ss[0], &sc->ih, &sc->main_serialize); 4008 } 4009 4010 #if 0 4011 static void 4012 mxge_rem_msix_irqs(mxge_softc_t *sc) 4013 { 4014 int i, rid; 4015 4016 for (i = 0; i < sc->num_slices; i++) { 4017 if (sc->msix_ih[i] != NULL) { 4018 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4019 sc->msix_ih[i]); 4020 sc->msix_ih[i] = NULL; 4021 } 4022 } 4023 kfree(sc->msix_ih, M_DEVBUF); 4024 4025 for (i = 0; i < sc->num_slices; i++) { 4026 rid = i + 1; 4027 if (sc->msix_irq_res[i] != NULL) 4028 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4029 sc->msix_irq_res[i]); 4030 sc->msix_irq_res[i] = NULL; 4031 } 4032 kfree(sc->msix_irq_res, M_DEVBUF); 4033 4034 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4035 sc->msix_table_res); 4036 4037 pci_release_msi(sc->dev); 4038 return; 4039 } 4040 #endif 4041 4042 static int 4043 mxge_add_irq(mxge_softc_t *sc) 4044 { 4045 #if 0 4046 int err; 4047 4048 if (sc->num_slices > 1) 4049 err = mxge_add_msix_irqs(sc); 4050 else 4051 err = mxge_add_single_irq(sc); 4052 4053 if (0 && err == 0 && sc->num_slices > 1) { 4054 mxge_rem_msix_irqs(sc); 4055 err = mxge_add_msix_irqs(sc); 4056 } 4057 return err; 4058 #else 4059 return mxge_add_single_irq(sc); 4060 #endif 4061 } 4062 4063 static void 4064 mxge_setup_serialize(struct mxge_softc *sc) 4065 { 4066 int i = 0, slice; 4067 4068 /* Main + rx + tx */ 4069 sc->nserialize = (2 * sc->num_slices) + 1; 4070 sc->serializes = 4071 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *), 4072 M_DEVBUF, M_WAITOK | M_ZERO); 4073 4074 /* 4075 * Setup serializes 4076 * 4077 * NOTE: Order is critical 4078 */ 4079 4080 KKASSERT(i < sc->nserialize); 4081 sc->serializes[i++] = &sc->main_serialize; 4082 4083 for (slice = 0; slice < sc->num_slices; ++slice) { 4084 KKASSERT(i < sc->nserialize); 4085 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize; 4086 } 4087 4088 for (slice = 0; slice < sc->num_slices; ++slice) { 4089 KKASSERT(i < sc->nserialize); 4090 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize; 4091 } 4092 4093 KKASSERT(i == sc->nserialize); 4094 } 4095 4096 static void 4097 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz) 4098 { 4099 struct mxge_softc *sc = ifp->if_softc; 4100 4101 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz); 4102 } 4103 4104 static void 4105 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4106 { 4107 struct mxge_softc *sc = ifp->if_softc; 4108 4109 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz); 4110 } 4111 4112 static int 4113 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz) 4114 { 4115 struct mxge_softc *sc = ifp->if_softc; 4116 4117 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz); 4118 } 4119 4120 #ifdef INVARIANTS 4121 4122 static void 4123 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz, 4124 boolean_t serialized) 4125 { 4126 struct mxge_softc *sc = ifp->if_softc; 4127 4128 ifnet_serialize_array_assert(sc->serializes, sc->nserialize, 4129 slz, serialized); 4130 } 4131 4132 #endif /* INVARIANTS */ 4133 4134 static int 4135 mxge_attach(device_t dev) 4136 { 4137 mxge_softc_t *sc = device_get_softc(dev); 4138 struct ifnet *ifp = &sc->arpcom.ac_if; 4139 int err, rid; 4140 4141 /* 4142 * Avoid rewriting half the lines in this file to use 4143 * &sc->arpcom.ac_if instead 4144 */ 4145 sc->ifp = ifp; 4146 sc->dev = dev; 4147 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4148 ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); 4149 4150 lwkt_serialize_init(&sc->main_serialize); 4151 4152 mxge_fetch_tunables(sc); 4153 4154 err = bus_dma_tag_create(NULL, /* parent */ 4155 1, /* alignment */ 4156 0, /* boundary */ 4157 BUS_SPACE_MAXADDR, /* low */ 4158 BUS_SPACE_MAXADDR, /* high */ 4159 NULL, NULL, /* filter */ 4160 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 4161 0, /* num segs */ 4162 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 4163 0, /* flags */ 4164 &sc->parent_dmat); /* tag */ 4165 if (err != 0) { 4166 device_printf(dev, "Err %d allocating parent dmat\n", err); 4167 goto failed; 4168 } 4169 4170 callout_init_mp(&sc->co_hdl); 4171 4172 mxge_setup_cfg_space(sc); 4173 4174 /* 4175 * Map the board into the kernel 4176 */ 4177 rid = PCIR_BARS; 4178 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 4179 &rid, RF_ACTIVE); 4180 if (sc->mem_res == NULL) { 4181 device_printf(dev, "could not map memory\n"); 4182 err = ENXIO; 4183 goto failed; 4184 } 4185 4186 sc->sram = rman_get_virtual(sc->mem_res); 4187 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4188 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4189 device_printf(dev, "impossible memory region size %ld\n", 4190 rman_get_size(sc->mem_res)); 4191 err = ENXIO; 4192 goto failed; 4193 } 4194 4195 /* 4196 * Make NULL terminated copy of the EEPROM strings section of 4197 * lanai SRAM 4198 */ 4199 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4200 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4201 rman_get_bushandle(sc->mem_res), 4202 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4203 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2); 4204 err = mxge_parse_strings(sc); 4205 if (err != 0) { 4206 device_printf(dev, "parse EEPROM string failed\n"); 4207 goto failed; 4208 } 4209 4210 /* 4211 * Enable write combining for efficient use of PCIe bus 4212 */ 4213 mxge_enable_wc(sc); 4214 4215 /* 4216 * Allocate the out of band DMA memory 4217 */ 4218 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64); 4219 if (err != 0) { 4220 device_printf(dev, "alloc cmd DMA buf failed\n"); 4221 goto failed; 4222 } 4223 sc->cmd = sc->cmd_dma.dmem_addr; 4224 4225 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4226 if (err != 0) { 4227 device_printf(dev, "alloc zeropad DMA buf failed\n"); 4228 goto failed; 4229 } 4230 4231 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4232 if (err != 0) { 4233 device_printf(dev, "alloc dmabench DMA buf failed\n"); 4234 goto failed; 4235 } 4236 4237 /* Select & load the firmware */ 4238 err = mxge_select_firmware(sc); 4239 if (err != 0) { 4240 device_printf(dev, "select firmware failed\n"); 4241 goto failed; 4242 } 4243 4244 mxge_slice_probe(sc); 4245 err = mxge_alloc_slices(sc); 4246 if (err != 0) { 4247 device_printf(dev, "alloc slices failed\n"); 4248 goto failed; 4249 } 4250 4251 /* Setup serializes */ 4252 mxge_setup_serialize(sc); 4253 4254 err = mxge_reset(sc, 0); 4255 if (err != 0) { 4256 device_printf(dev, "reset failed\n"); 4257 goto failed; 4258 } 4259 4260 err = mxge_alloc_rings(sc); 4261 if (err != 0) { 4262 device_printf(dev, "failed to allocate rings\n"); 4263 goto failed; 4264 } 4265 4266 ifp->if_baudrate = IF_Gbps(10UL); 4267 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO; 4268 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4269 4270 ifp->if_capabilities |= IFCAP_VLAN_MTU; 4271 #if 0 4272 /* Well, its software, sigh */ 4273 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; 4274 #endif 4275 ifp->if_capenable = ifp->if_capabilities; 4276 4277 ifp->if_softc = sc; 4278 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4279 ifp->if_init = mxge_init; 4280 ifp->if_ioctl = mxge_ioctl; 4281 ifp->if_start = mxge_start; 4282 ifp->if_watchdog = mxge_watchdog; 4283 ifp->if_serialize = mxge_serialize; 4284 ifp->if_deserialize = mxge_deserialize; 4285 ifp->if_tryserialize = mxge_tryserialize; 4286 #ifdef INVARIANTS 4287 ifp->if_serialize_assert = mxge_serialize_assert; 4288 #endif 4289 4290 /* Increase TSO burst length */ 4291 ifp->if_tsolen = (32 * ETHERMTU); 4292 4293 /* Initialise the ifmedia structure */ 4294 mxge_media_init(sc); 4295 mxge_media_probe(sc); 4296 4297 ether_ifattach(ifp, sc->mac_addr, NULL); 4298 4299 /* 4300 * XXX 4301 * We are not ready to do "gather" jumbo frame, so 4302 * limit MTU to MJUMPAGESIZE 4303 */ 4304 sc->max_mtu = MJUMPAGESIZE - 4305 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1; 4306 sc->dying = 0; 4307 4308 /* must come after ether_ifattach() */ 4309 err = mxge_add_irq(sc); 4310 if (err != 0) { 4311 device_printf(dev, "alloc and setup intr failed\n"); 4312 ether_ifdetach(ifp); 4313 goto failed; 4314 } 4315 4316 ifq_set_cpuid(&ifp->if_snd, rman_get_cpuid(sc->irq_res)); 4317 ifq_set_hw_serialize(&ifp->if_snd, &sc->ss[0].tx.tx_serialize); 4318 4319 mxge_add_sysctls(sc); 4320 4321 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc, 4322 rman_get_cpuid(sc->irq_res)); 4323 return 0; 4324 4325 failed: 4326 mxge_detach(dev); 4327 return err; 4328 } 4329 4330 static int 4331 mxge_detach(device_t dev) 4332 { 4333 mxge_softc_t *sc = device_get_softc(dev); 4334 4335 if (device_is_attached(dev)) { 4336 struct ifnet *ifp = sc->ifp; 4337 4338 ifnet_serialize_all(ifp); 4339 4340 sc->dying = 1; 4341 if (ifp->if_flags & IFF_RUNNING) 4342 mxge_close(sc, 1); 4343 callout_stop(&sc->co_hdl); 4344 4345 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4346 4347 ifnet_deserialize_all(ifp); 4348 4349 callout_terminate(&sc->co_hdl); 4350 4351 ether_ifdetach(ifp); 4352 } 4353 ifmedia_removeall(&sc->media); 4354 4355 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL && 4356 sc->sram != NULL) 4357 mxge_dummy_rdma(sc, 0); 4358 4359 mxge_rem_sysctls(sc); 4360 mxge_free_rings(sc); 4361 4362 /* MUST after sysctls and rings are freed */ 4363 mxge_free_slices(sc); 4364 4365 if (sc->dmabench_dma.dmem_addr != NULL) 4366 mxge_dma_free(&sc->dmabench_dma); 4367 if (sc->zeropad_dma.dmem_addr != NULL) 4368 mxge_dma_free(&sc->zeropad_dma); 4369 if (sc->cmd_dma.dmem_addr != NULL) 4370 mxge_dma_free(&sc->cmd_dma); 4371 4372 if (sc->irq_res != NULL) { 4373 bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, 4374 sc->irq_res); 4375 } 4376 if (sc->irq_type == PCI_INTR_TYPE_MSI) 4377 pci_release_msi(dev); 4378 4379 if (sc->mem_res != NULL) { 4380 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, 4381 sc->mem_res); 4382 } 4383 4384 if (sc->parent_dmat != NULL) 4385 bus_dma_tag_destroy(sc->parent_dmat); 4386 4387 return 0; 4388 } 4389 4390 static int 4391 mxge_shutdown(device_t dev) 4392 { 4393 return 0; 4394 } 4395