1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <unistd.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <dirent.h> 43 #include <net/if.h> 44 #include <sys/ioctl.h> 45 #include <sys/socket.h> 46 #include <netinet/in.h> 47 #include <linux/if.h> 48 #include <linux/ethtool.h> 49 #include <linux/sockios.h> 50 #include <fcntl.h> 51 52 /* DPDK headers don't like -pedantic. */ 53 #ifdef PEDANTIC 54 #pragma GCC diagnostic ignored "-pedantic" 55 #endif 56 #include <rte_atomic.h> 57 #include <rte_ethdev.h> 58 #include <rte_mbuf.h> 59 #include <rte_common.h> 60 #include <rte_interrupts.h> 61 #include <rte_alarm.h> 62 #ifdef PEDANTIC 63 #pragma GCC diagnostic error "-pedantic" 64 #endif 65 66 #include "mlx5.h" 67 #include "mlx5_rxtx.h" 68 #include "mlx5_utils.h" 69 70 /** 71 * Get interface name from private structure. 72 * 73 * @param[in] priv 74 * Pointer to private structure. 75 * @param[out] ifname 76 * Interface name output buffer. 77 * 78 * @return 79 * 0 on success, -1 on failure and errno is set. 80 */ 81 int 82 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 83 { 84 DIR *dir; 85 struct dirent *dent; 86 unsigned int dev_type = 0; 87 unsigned int dev_port_prev = ~0u; 88 char match[IF_NAMESIZE] = ""; 89 90 { 91 MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path); 92 93 dir = opendir(path); 94 if (dir == NULL) 95 return -1; 96 } 97 while ((dent = readdir(dir)) != NULL) { 98 char *name = dent->d_name; 99 FILE *file; 100 unsigned int dev_port; 101 int r; 102 103 if ((name[0] == '.') && 104 ((name[1] == '\0') || 105 ((name[1] == '.') && (name[2] == '\0')))) 106 continue; 107 108 MKSTR(path, "%s/device/net/%s/%s", 109 priv->ctx->device->ibdev_path, name, 110 (dev_type ? "dev_id" : "dev_port")); 111 112 file = fopen(path, "rb"); 113 if (file == NULL) { 114 if (errno != ENOENT) 115 continue; 116 /* 117 * Switch to dev_id when dev_port does not exist as 118 * is the case with Linux kernel versions < 3.15. 119 */ 120 try_dev_id: 121 match[0] = '\0'; 122 if (dev_type) 123 break; 124 dev_type = 1; 125 dev_port_prev = ~0u; 126 rewinddir(dir); 127 continue; 128 } 129 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 130 fclose(file); 131 if (r != 1) 132 continue; 133 /* 134 * Switch to dev_id when dev_port returns the same value for 135 * all ports. May happen when using a MOFED release older than 136 * 3.0 with a Linux kernel >= 3.15. 137 */ 138 if (dev_port == dev_port_prev) 139 goto try_dev_id; 140 dev_port_prev = dev_port; 141 if (dev_port == (priv->port - 1u)) 142 snprintf(match, sizeof(match), "%s", name); 143 } 144 closedir(dir); 145 if (match[0] == '\0') 146 return -1; 147 strncpy(*ifname, match, sizeof(*ifname)); 148 return 0; 149 } 150 151 /** 152 * Read from sysfs entry. 153 * 154 * @param[in] priv 155 * Pointer to private structure. 156 * @param[in] entry 157 * Entry name relative to sysfs path. 158 * @param[out] buf 159 * Data output buffer. 160 * @param size 161 * Buffer size. 162 * 163 * @return 164 * 0 on success, -1 on failure and errno is set. 165 */ 166 static int 167 priv_sysfs_read(const struct priv *priv, const char *entry, 168 char *buf, size_t size) 169 { 170 char ifname[IF_NAMESIZE]; 171 FILE *file; 172 int ret; 173 int err; 174 175 if (priv_get_ifname(priv, &ifname)) 176 return -1; 177 178 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 179 ifname, entry); 180 181 file = fopen(path, "rb"); 182 if (file == NULL) 183 return -1; 184 ret = fread(buf, 1, size, file); 185 err = errno; 186 if (((size_t)ret < size) && (ferror(file))) 187 ret = -1; 188 else 189 ret = size; 190 fclose(file); 191 errno = err; 192 return ret; 193 } 194 195 /** 196 * Write to sysfs entry. 197 * 198 * @param[in] priv 199 * Pointer to private structure. 200 * @param[in] entry 201 * Entry name relative to sysfs path. 202 * @param[in] buf 203 * Data buffer. 204 * @param size 205 * Buffer size. 206 * 207 * @return 208 * 0 on success, -1 on failure and errno is set. 209 */ 210 static int 211 priv_sysfs_write(const struct priv *priv, const char *entry, 212 char *buf, size_t size) 213 { 214 char ifname[IF_NAMESIZE]; 215 FILE *file; 216 int ret; 217 int err; 218 219 if (priv_get_ifname(priv, &ifname)) 220 return -1; 221 222 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 223 ifname, entry); 224 225 file = fopen(path, "wb"); 226 if (file == NULL) 227 return -1; 228 ret = fwrite(buf, 1, size, file); 229 err = errno; 230 if (((size_t)ret < size) || (ferror(file))) 231 ret = -1; 232 else 233 ret = size; 234 fclose(file); 235 errno = err; 236 return ret; 237 } 238 239 /** 240 * Get unsigned long sysfs property. 241 * 242 * @param priv 243 * Pointer to private structure. 244 * @param[in] name 245 * Entry name relative to sysfs path. 246 * @param[out] value 247 * Value output buffer. 248 * 249 * @return 250 * 0 on success, -1 on failure and errno is set. 251 */ 252 static int 253 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 254 { 255 int ret; 256 unsigned long value_ret; 257 char value_str[32]; 258 259 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 260 if (ret == -1) { 261 DEBUG("cannot read %s value from sysfs: %s", 262 name, strerror(errno)); 263 return -1; 264 } 265 value_str[ret] = '\0'; 266 errno = 0; 267 value_ret = strtoul(value_str, NULL, 0); 268 if (errno) { 269 DEBUG("invalid %s value `%s': %s", name, value_str, 270 strerror(errno)); 271 return -1; 272 } 273 *value = value_ret; 274 return 0; 275 } 276 277 /** 278 * Set unsigned long sysfs property. 279 * 280 * @param priv 281 * Pointer to private structure. 282 * @param[in] name 283 * Entry name relative to sysfs path. 284 * @param value 285 * Value to set. 286 * 287 * @return 288 * 0 on success, -1 on failure and errno is set. 289 */ 290 static int 291 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 292 { 293 int ret; 294 MKSTR(value_str, "%lu", value); 295 296 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 297 if (ret == -1) { 298 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 299 name, value_str, value, strerror(errno)); 300 return -1; 301 } 302 return 0; 303 } 304 305 /** 306 * Perform ifreq ioctl() on associated Ethernet device. 307 * 308 * @param[in] priv 309 * Pointer to private structure. 310 * @param req 311 * Request number to pass to ioctl(). 312 * @param[out] ifr 313 * Interface request structure output buffer. 314 * 315 * @return 316 * 0 on success, -1 on failure and errno is set. 317 */ 318 int 319 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 320 { 321 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 322 int ret = -1; 323 324 if (sock == -1) 325 return ret; 326 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 327 ret = ioctl(sock, req, ifr); 328 close(sock); 329 return ret; 330 } 331 332 /** 333 * Get device MTU. 334 * 335 * @param priv 336 * Pointer to private structure. 337 * @param[out] mtu 338 * MTU value output buffer. 339 * 340 * @return 341 * 0 on success, -1 on failure and errno is set. 342 */ 343 int 344 priv_get_mtu(struct priv *priv, uint16_t *mtu) 345 { 346 unsigned long ulong_mtu; 347 348 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 349 return -1; 350 *mtu = ulong_mtu; 351 return 0; 352 } 353 354 /** 355 * Set device MTU. 356 * 357 * @param priv 358 * Pointer to private structure. 359 * @param mtu 360 * MTU value to set. 361 * 362 * @return 363 * 0 on success, -1 on failure and errno is set. 364 */ 365 static int 366 priv_set_mtu(struct priv *priv, uint16_t mtu) 367 { 368 return priv_set_sysfs_ulong(priv, "mtu", mtu); 369 } 370 371 /** 372 * Set device flags. 373 * 374 * @param priv 375 * Pointer to private structure. 376 * @param keep 377 * Bitmask for flags that must remain untouched. 378 * @param flags 379 * Bitmask for flags to modify. 380 * 381 * @return 382 * 0 on success, -1 on failure and errno is set. 383 */ 384 int 385 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 386 { 387 unsigned long tmp; 388 389 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 390 return -1; 391 tmp &= keep; 392 tmp |= flags; 393 return priv_set_sysfs_ulong(priv, "flags", tmp); 394 } 395 396 /** 397 * Ethernet device configuration. 398 * 399 * Prepare the driver for a given number of TX and RX queues. 400 * 401 * @param dev 402 * Pointer to Ethernet device structure. 403 * 404 * @return 405 * 0 on success, errno value on failure. 406 */ 407 static int 408 dev_configure(struct rte_eth_dev *dev) 409 { 410 struct priv *priv = dev->data->dev_private; 411 unsigned int rxqs_n = dev->data->nb_rx_queues; 412 unsigned int txqs_n = dev->data->nb_tx_queues; 413 414 priv->rxqs = (void *)dev->data->rx_queues; 415 priv->txqs = (void *)dev->data->tx_queues; 416 if (txqs_n != priv->txqs_n) { 417 INFO("%p: TX queues number update: %u -> %u", 418 (void *)dev, priv->txqs_n, txqs_n); 419 priv->txqs_n = txqs_n; 420 } 421 if (rxqs_n == priv->rxqs_n) 422 return 0; 423 INFO("%p: RX queues number update: %u -> %u", 424 (void *)dev, priv->rxqs_n, rxqs_n); 425 priv->rxqs_n = rxqs_n; 426 return 0; 427 } 428 429 /** 430 * DPDK callback for Ethernet device configuration. 431 * 432 * @param dev 433 * Pointer to Ethernet device structure. 434 * 435 * @return 436 * 0 on success, negative errno value on failure. 437 */ 438 int 439 mlx5_dev_configure(struct rte_eth_dev *dev) 440 { 441 struct priv *priv = dev->data->dev_private; 442 int ret; 443 444 priv_lock(priv); 445 ret = dev_configure(dev); 446 assert(ret >= 0); 447 priv_unlock(priv); 448 return -ret; 449 } 450 451 /** 452 * DPDK callback to get information about the device. 453 * 454 * @param dev 455 * Pointer to Ethernet device structure. 456 * @param[out] info 457 * Info structure output buffer. 458 */ 459 void 460 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 461 { 462 struct priv *priv = dev->data->dev_private; 463 unsigned int max; 464 char ifname[IF_NAMESIZE]; 465 466 priv_lock(priv); 467 /* FIXME: we should ask the device for these values. */ 468 info->min_rx_bufsize = 32; 469 info->max_rx_pktlen = 65536; 470 /* 471 * Since we need one CQ per QP, the limit is the minimum number 472 * between the two values. 473 */ 474 max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ? 475 priv->device_attr.max_qp : priv->device_attr.max_cq); 476 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 477 if (max >= 65535) 478 max = 65535; 479 info->max_rx_queues = max; 480 info->max_tx_queues = max; 481 /* Last array entry is reserved for broadcast. */ 482 info->max_mac_addrs = (RTE_DIM(priv->mac) - 1); 483 info->rx_offload_capa = 484 (priv->hw_csum ? 485 (DEV_RX_OFFLOAD_IPV4_CKSUM | 486 DEV_RX_OFFLOAD_UDP_CKSUM | 487 DEV_RX_OFFLOAD_TCP_CKSUM) : 488 0); 489 info->tx_offload_capa = 490 (priv->hw_csum ? 491 (DEV_TX_OFFLOAD_IPV4_CKSUM | 492 DEV_TX_OFFLOAD_UDP_CKSUM | 493 DEV_TX_OFFLOAD_TCP_CKSUM) : 494 0); 495 if (priv_get_ifname(priv, &ifname) == 0) 496 info->if_index = if_nametoindex(ifname); 497 priv_unlock(priv); 498 } 499 500 /** 501 * DPDK callback to retrieve physical link information (unlocked version). 502 * 503 * @param dev 504 * Pointer to Ethernet device structure. 505 * @param wait_to_complete 506 * Wait for request completion (ignored). 507 */ 508 static int 509 mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) 510 { 511 struct priv *priv = dev->data->dev_private; 512 struct ethtool_cmd edata = { 513 .cmd = ETHTOOL_GSET 514 }; 515 struct ifreq ifr; 516 struct rte_eth_link dev_link; 517 int link_speed = 0; 518 519 (void)wait_to_complete; 520 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 521 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 522 return -1; 523 } 524 memset(&dev_link, 0, sizeof(dev_link)); 525 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 526 (ifr.ifr_flags & IFF_RUNNING)); 527 ifr.ifr_data = &edata; 528 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 529 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 530 strerror(errno)); 531 return -1; 532 } 533 link_speed = ethtool_cmd_speed(&edata); 534 if (link_speed == -1) 535 dev_link.link_speed = 0; 536 else 537 dev_link.link_speed = link_speed; 538 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 539 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 540 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 541 /* Link status changed. */ 542 dev->data->dev_link = dev_link; 543 return 0; 544 } 545 /* Link status is still the same. */ 546 return -1; 547 } 548 549 /** 550 * DPDK callback to retrieve physical link information. 551 * 552 * @param dev 553 * Pointer to Ethernet device structure. 554 * @param wait_to_complete 555 * Wait for request completion (ignored). 556 */ 557 int 558 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 559 { 560 struct priv *priv = dev->data->dev_private; 561 int ret; 562 563 priv_lock(priv); 564 ret = mlx5_link_update_unlocked(dev, wait_to_complete); 565 priv_unlock(priv); 566 return ret; 567 } 568 569 /** 570 * DPDK callback to change the MTU. 571 * 572 * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be 573 * received). Use this as a hint to enable/disable scattered packets support 574 * and improve performance when not needed. 575 * Since failure is not an option, reconfiguring queues on the fly is not 576 * recommended. 577 * 578 * @param dev 579 * Pointer to Ethernet device structure. 580 * @param in_mtu 581 * New MTU. 582 * 583 * @return 584 * 0 on success, negative errno value on failure. 585 */ 586 int 587 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 588 { 589 struct priv *priv = dev->data->dev_private; 590 int ret = 0; 591 unsigned int i; 592 uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) = 593 mlx5_rx_burst; 594 595 priv_lock(priv); 596 /* Set kernel interface MTU first. */ 597 if (priv_set_mtu(priv, mtu)) { 598 ret = errno; 599 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 600 strerror(ret)); 601 goto out; 602 } else 603 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 604 priv->mtu = mtu; 605 /* Temporarily replace RX handler with a fake one, assuming it has not 606 * been copied elsewhere. */ 607 dev->rx_pkt_burst = removed_rx_burst; 608 /* Make sure everyone has left mlx5_rx_burst() and uses 609 * removed_rx_burst() instead. */ 610 rte_wmb(); 611 usleep(1000); 612 /* Reconfigure each RX queue. */ 613 for (i = 0; (i != priv->rxqs_n); ++i) { 614 struct rxq *rxq = (*priv->rxqs)[i]; 615 unsigned int max_frame_len; 616 int sp; 617 618 if (rxq == NULL) 619 continue; 620 /* Calculate new maximum frame length according to MTU and 621 * toggle scattered support (sp) if necessary. */ 622 max_frame_len = (priv->mtu + ETHER_HDR_LEN + 623 (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN)); 624 sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM)); 625 /* Provide new values to rxq_setup(). */ 626 dev->data->dev_conf.rxmode.jumbo_frame = sp; 627 dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len; 628 ret = rxq_rehash(dev, rxq); 629 if (ret) { 630 /* Force SP RX if that queue requires it and abort. */ 631 if (rxq->sp) 632 rx_func = mlx5_rx_burst_sp; 633 break; 634 } 635 /* Scattered burst function takes priority. */ 636 if (rxq->sp) 637 rx_func = mlx5_rx_burst_sp; 638 } 639 /* Burst functions can now be called again. */ 640 rte_wmb(); 641 dev->rx_pkt_burst = rx_func; 642 out: 643 priv_unlock(priv); 644 assert(ret >= 0); 645 return -ret; 646 } 647 648 /** 649 * DPDK callback to get flow control status. 650 * 651 * @param dev 652 * Pointer to Ethernet device structure. 653 * @param[out] fc_conf 654 * Flow control output buffer. 655 * 656 * @return 657 * 0 on success, negative errno value on failure. 658 */ 659 int 660 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 661 { 662 struct priv *priv = dev->data->dev_private; 663 struct ifreq ifr; 664 struct ethtool_pauseparam ethpause = { 665 .cmd = ETHTOOL_GPAUSEPARAM 666 }; 667 int ret; 668 669 ifr.ifr_data = ðpause; 670 priv_lock(priv); 671 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 672 ret = errno; 673 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 674 " failed: %s", 675 strerror(ret)); 676 goto out; 677 } 678 679 fc_conf->autoneg = ethpause.autoneg; 680 if (ethpause.rx_pause && ethpause.tx_pause) 681 fc_conf->mode = RTE_FC_FULL; 682 else if (ethpause.rx_pause) 683 fc_conf->mode = RTE_FC_RX_PAUSE; 684 else if (ethpause.tx_pause) 685 fc_conf->mode = RTE_FC_TX_PAUSE; 686 else 687 fc_conf->mode = RTE_FC_NONE; 688 ret = 0; 689 690 out: 691 priv_unlock(priv); 692 assert(ret >= 0); 693 return -ret; 694 } 695 696 /** 697 * DPDK callback to modify flow control parameters. 698 * 699 * @param dev 700 * Pointer to Ethernet device structure. 701 * @param[in] fc_conf 702 * Flow control parameters. 703 * 704 * @return 705 * 0 on success, negative errno value on failure. 706 */ 707 int 708 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 709 { 710 struct priv *priv = dev->data->dev_private; 711 struct ifreq ifr; 712 struct ethtool_pauseparam ethpause = { 713 .cmd = ETHTOOL_SPAUSEPARAM 714 }; 715 int ret; 716 717 ifr.ifr_data = ðpause; 718 ethpause.autoneg = fc_conf->autoneg; 719 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 720 (fc_conf->mode & RTE_FC_RX_PAUSE)) 721 ethpause.rx_pause = 1; 722 else 723 ethpause.rx_pause = 0; 724 725 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 726 (fc_conf->mode & RTE_FC_TX_PAUSE)) 727 ethpause.tx_pause = 1; 728 else 729 ethpause.tx_pause = 0; 730 731 priv_lock(priv); 732 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 733 ret = errno; 734 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 735 " failed: %s", 736 strerror(ret)); 737 goto out; 738 } 739 ret = 0; 740 741 out: 742 priv_unlock(priv); 743 assert(ret >= 0); 744 return -ret; 745 } 746 747 /** 748 * Get PCI information from struct ibv_device. 749 * 750 * @param device 751 * Pointer to Ethernet device structure. 752 * @param[out] pci_addr 753 * PCI bus address output buffer. 754 * 755 * @return 756 * 0 on success, -1 on failure and errno is set. 757 */ 758 int 759 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 760 struct rte_pci_addr *pci_addr) 761 { 762 FILE *file; 763 char line[32]; 764 MKSTR(path, "%s/device/uevent", device->ibdev_path); 765 766 file = fopen(path, "rb"); 767 if (file == NULL) 768 return -1; 769 while (fgets(line, sizeof(line), file) == line) { 770 size_t len = strlen(line); 771 int ret; 772 773 /* Truncate long lines. */ 774 if (len == (sizeof(line) - 1)) 775 while (line[(len - 1)] != '\n') { 776 ret = fgetc(file); 777 if (ret == EOF) 778 break; 779 line[(len - 1)] = ret; 780 } 781 /* Extract information. */ 782 if (sscanf(line, 783 "PCI_SLOT_NAME=" 784 "%" SCNx16 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 785 &pci_addr->domain, 786 &pci_addr->bus, 787 &pci_addr->devid, 788 &pci_addr->function) == 4) { 789 ret = 0; 790 break; 791 } 792 } 793 fclose(file); 794 return 0; 795 } 796 797 /** 798 * Link status handler. 799 * 800 * @param priv 801 * Pointer to private structure. 802 * @param dev 803 * Pointer to the rte_eth_dev structure. 804 * 805 * @return 806 * Nonzero if the callback process can be called immediately. 807 */ 808 static int 809 priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) 810 { 811 struct ibv_async_event event; 812 int port_change = 0; 813 int ret = 0; 814 815 /* Read all message and acknowledge them. */ 816 for (;;) { 817 if (ibv_get_async_event(priv->ctx, &event)) 818 break; 819 820 if (event.event_type == IBV_EVENT_PORT_ACTIVE || 821 event.event_type == IBV_EVENT_PORT_ERR) 822 port_change = 1; 823 else 824 DEBUG("event type %d on port %d not handled", 825 event.event_type, event.element.port_num); 826 ibv_ack_async_event(&event); 827 } 828 829 if (port_change ^ priv->pending_alarm) { 830 struct rte_eth_link *link = &dev->data->dev_link; 831 832 priv->pending_alarm = 0; 833 mlx5_link_update_unlocked(dev, 0); 834 if (((link->link_speed == 0) && link->link_status) || 835 ((link->link_speed != 0) && !link->link_status)) { 836 /* Inconsistent status, check again later. */ 837 priv->pending_alarm = 1; 838 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 839 mlx5_dev_link_status_handler, 840 dev); 841 } else 842 ret = 1; 843 } 844 return ret; 845 } 846 847 /** 848 * Handle delayed link status event. 849 * 850 * @param arg 851 * Registered argument. 852 */ 853 void 854 mlx5_dev_link_status_handler(void *arg) 855 { 856 struct rte_eth_dev *dev = arg; 857 struct priv *priv = dev->data->dev_private; 858 int ret; 859 860 priv_lock(priv); 861 assert(priv->pending_alarm == 1); 862 ret = priv_dev_link_status_handler(priv, dev); 863 priv_unlock(priv); 864 if (ret) 865 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); 866 } 867 868 /** 869 * Handle interrupts from the NIC. 870 * 871 * @param[in] intr_handle 872 * Interrupt handler. 873 * @param cb_arg 874 * Callback argument. 875 */ 876 void 877 mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) 878 { 879 struct rte_eth_dev *dev = cb_arg; 880 struct priv *priv = dev->data->dev_private; 881 int ret; 882 883 (void)intr_handle; 884 priv_lock(priv); 885 ret = priv_dev_link_status_handler(priv, dev); 886 priv_unlock(priv); 887 if (ret) 888 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); 889 } 890 891 /** 892 * Uninstall interrupt handler. 893 * 894 * @param priv 895 * Pointer to private structure. 896 * @param dev 897 * Pointer to the rte_eth_dev structure. 898 */ 899 void 900 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 901 { 902 if (!dev->data->dev_conf.intr_conf.lsc) 903 return; 904 rte_intr_callback_unregister(&priv->intr_handle, 905 mlx5_dev_interrupt_handler, 906 dev); 907 if (priv->pending_alarm) 908 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 909 priv->pending_alarm = 0; 910 priv->intr_handle.fd = 0; 911 priv->intr_handle.type = 0; 912 } 913 914 /** 915 * Install interrupt handler. 916 * 917 * @param priv 918 * Pointer to private structure. 919 * @param dev 920 * Pointer to the rte_eth_dev structure. 921 */ 922 void 923 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 924 { 925 int rc, flags; 926 927 if (!dev->data->dev_conf.intr_conf.lsc) 928 return; 929 assert(priv->ctx->async_fd > 0); 930 flags = fcntl(priv->ctx->async_fd, F_GETFL); 931 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 932 if (rc < 0) { 933 INFO("failed to change file descriptor async event queue"); 934 dev->data->dev_conf.intr_conf.lsc = 0; 935 } else { 936 priv->intr_handle.fd = priv->ctx->async_fd; 937 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 938 rte_intr_callback_register(&priv->intr_handle, 939 mlx5_dev_interrupt_handler, 940 dev); 941 } 942 } 943