1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <unistd.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <dirent.h> 43 #include <net/if.h> 44 #include <sys/ioctl.h> 45 #include <sys/socket.h> 46 #include <netinet/in.h> 47 #include <linux/if.h> 48 #include <linux/ethtool.h> 49 #include <linux/sockios.h> 50 #include <fcntl.h> 51 52 /* DPDK headers don't like -pedantic. */ 53 #ifdef PEDANTIC 54 #pragma GCC diagnostic ignored "-pedantic" 55 #endif 56 #include <rte_atomic.h> 57 #include <rte_ethdev.h> 58 #include <rte_mbuf.h> 59 #include <rte_common.h> 60 #include <rte_interrupts.h> 61 #include <rte_alarm.h> 62 #ifdef PEDANTIC 63 #pragma GCC diagnostic error "-pedantic" 64 #endif 65 66 #include "mlx5.h" 67 #include "mlx5_rxtx.h" 68 #include "mlx5_utils.h" 69 70 /** 71 * Get interface name from private structure. 72 * 73 * @param[in] priv 74 * Pointer to private structure. 75 * @param[out] ifname 76 * Interface name output buffer. 77 * 78 * @return 79 * 0 on success, -1 on failure and errno is set. 80 */ 81 int 82 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 83 { 84 DIR *dir; 85 struct dirent *dent; 86 unsigned int dev_type = 0; 87 unsigned int dev_port_prev = ~0u; 88 char match[IF_NAMESIZE] = ""; 89 90 { 91 MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path); 92 93 dir = opendir(path); 94 if (dir == NULL) 95 return -1; 96 } 97 while ((dent = readdir(dir)) != NULL) { 98 char *name = dent->d_name; 99 FILE *file; 100 unsigned int dev_port; 101 int r; 102 103 if ((name[0] == '.') && 104 ((name[1] == '\0') || 105 ((name[1] == '.') && (name[2] == '\0')))) 106 continue; 107 108 MKSTR(path, "%s/device/net/%s/%s", 109 priv->ctx->device->ibdev_path, name, 110 (dev_type ? "dev_id" : "dev_port")); 111 112 file = fopen(path, "rb"); 113 if (file == NULL) { 114 if (errno != ENOENT) 115 continue; 116 /* 117 * Switch to dev_id when dev_port does not exist as 118 * is the case with Linux kernel versions < 3.15. 119 */ 120 try_dev_id: 121 match[0] = '\0'; 122 if (dev_type) 123 break; 124 dev_type = 1; 125 dev_port_prev = ~0u; 126 rewinddir(dir); 127 continue; 128 } 129 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 130 fclose(file); 131 if (r != 1) 132 continue; 133 /* 134 * Switch to dev_id when dev_port returns the same value for 135 * all ports. May happen when using a MOFED release older than 136 * 3.0 with a Linux kernel >= 3.15. 137 */ 138 if (dev_port == dev_port_prev) 139 goto try_dev_id; 140 dev_port_prev = dev_port; 141 if (dev_port == (priv->port - 1u)) 142 snprintf(match, sizeof(match), "%s", name); 143 } 144 closedir(dir); 145 if (match[0] == '\0') 146 return -1; 147 strncpy(*ifname, match, sizeof(*ifname)); 148 return 0; 149 } 150 151 /** 152 * Read from sysfs entry. 153 * 154 * @param[in] priv 155 * Pointer to private structure. 156 * @param[in] entry 157 * Entry name relative to sysfs path. 158 * @param[out] buf 159 * Data output buffer. 160 * @param size 161 * Buffer size. 162 * 163 * @return 164 * 0 on success, -1 on failure and errno is set. 165 */ 166 static int 167 priv_sysfs_read(const struct priv *priv, const char *entry, 168 char *buf, size_t size) 169 { 170 char ifname[IF_NAMESIZE]; 171 FILE *file; 172 int ret; 173 int err; 174 175 if (priv_get_ifname(priv, &ifname)) 176 return -1; 177 178 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 179 ifname, entry); 180 181 file = fopen(path, "rb"); 182 if (file == NULL) 183 return -1; 184 ret = fread(buf, 1, size, file); 185 err = errno; 186 if (((size_t)ret < size) && (ferror(file))) 187 ret = -1; 188 else 189 ret = size; 190 fclose(file); 191 errno = err; 192 return ret; 193 } 194 195 /** 196 * Write to sysfs entry. 197 * 198 * @param[in] priv 199 * Pointer to private structure. 200 * @param[in] entry 201 * Entry name relative to sysfs path. 202 * @param[in] buf 203 * Data buffer. 204 * @param size 205 * Buffer size. 206 * 207 * @return 208 * 0 on success, -1 on failure and errno is set. 209 */ 210 static int 211 priv_sysfs_write(const struct priv *priv, const char *entry, 212 char *buf, size_t size) 213 { 214 char ifname[IF_NAMESIZE]; 215 FILE *file; 216 int ret; 217 int err; 218 219 if (priv_get_ifname(priv, &ifname)) 220 return -1; 221 222 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 223 ifname, entry); 224 225 file = fopen(path, "wb"); 226 if (file == NULL) 227 return -1; 228 ret = fwrite(buf, 1, size, file); 229 err = errno; 230 if (((size_t)ret < size) || (ferror(file))) 231 ret = -1; 232 else 233 ret = size; 234 fclose(file); 235 errno = err; 236 return ret; 237 } 238 239 /** 240 * Get unsigned long sysfs property. 241 * 242 * @param priv 243 * Pointer to private structure. 244 * @param[in] name 245 * Entry name relative to sysfs path. 246 * @param[out] value 247 * Value output buffer. 248 * 249 * @return 250 * 0 on success, -1 on failure and errno is set. 251 */ 252 static int 253 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 254 { 255 int ret; 256 unsigned long value_ret; 257 char value_str[32]; 258 259 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 260 if (ret == -1) { 261 DEBUG("cannot read %s value from sysfs: %s", 262 name, strerror(errno)); 263 return -1; 264 } 265 value_str[ret] = '\0'; 266 errno = 0; 267 value_ret = strtoul(value_str, NULL, 0); 268 if (errno) { 269 DEBUG("invalid %s value `%s': %s", name, value_str, 270 strerror(errno)); 271 return -1; 272 } 273 *value = value_ret; 274 return 0; 275 } 276 277 /** 278 * Set unsigned long sysfs property. 279 * 280 * @param priv 281 * Pointer to private structure. 282 * @param[in] name 283 * Entry name relative to sysfs path. 284 * @param value 285 * Value to set. 286 * 287 * @return 288 * 0 on success, -1 on failure and errno is set. 289 */ 290 static int 291 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 292 { 293 int ret; 294 MKSTR(value_str, "%lu", value); 295 296 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 297 if (ret == -1) { 298 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 299 name, value_str, value, strerror(errno)); 300 return -1; 301 } 302 return 0; 303 } 304 305 /** 306 * Perform ifreq ioctl() on associated Ethernet device. 307 * 308 * @param[in] priv 309 * Pointer to private structure. 310 * @param req 311 * Request number to pass to ioctl(). 312 * @param[out] ifr 313 * Interface request structure output buffer. 314 * 315 * @return 316 * 0 on success, -1 on failure and errno is set. 317 */ 318 int 319 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 320 { 321 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 322 int ret = -1; 323 324 if (sock == -1) 325 return ret; 326 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 327 ret = ioctl(sock, req, ifr); 328 close(sock); 329 return ret; 330 } 331 332 /** 333 * Get device MTU. 334 * 335 * @param priv 336 * Pointer to private structure. 337 * @param[out] mtu 338 * MTU value output buffer. 339 * 340 * @return 341 * 0 on success, -1 on failure and errno is set. 342 */ 343 int 344 priv_get_mtu(struct priv *priv, uint16_t *mtu) 345 { 346 unsigned long ulong_mtu; 347 348 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 349 return -1; 350 *mtu = ulong_mtu; 351 return 0; 352 } 353 354 /** 355 * Set device MTU. 356 * 357 * @param priv 358 * Pointer to private structure. 359 * @param mtu 360 * MTU value to set. 361 * 362 * @return 363 * 0 on success, -1 on failure and errno is set. 364 */ 365 static int 366 priv_set_mtu(struct priv *priv, uint16_t mtu) 367 { 368 return priv_set_sysfs_ulong(priv, "mtu", mtu); 369 } 370 371 /** 372 * Set device flags. 373 * 374 * @param priv 375 * Pointer to private structure. 376 * @param keep 377 * Bitmask for flags that must remain untouched. 378 * @param flags 379 * Bitmask for flags to modify. 380 * 381 * @return 382 * 0 on success, -1 on failure and errno is set. 383 */ 384 int 385 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 386 { 387 unsigned long tmp; 388 389 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 390 return -1; 391 tmp &= keep; 392 tmp |= flags; 393 return priv_set_sysfs_ulong(priv, "flags", tmp); 394 } 395 396 /** 397 * Ethernet device configuration. 398 * 399 * Prepare the driver for a given number of TX and RX queues. 400 * 401 * @param dev 402 * Pointer to Ethernet device structure. 403 * 404 * @return 405 * 0 on success, errno value on failure. 406 */ 407 static int 408 dev_configure(struct rte_eth_dev *dev) 409 { 410 struct priv *priv = dev->data->dev_private; 411 unsigned int rxqs_n = dev->data->nb_rx_queues; 412 unsigned int txqs_n = dev->data->nb_tx_queues; 413 unsigned int i; 414 unsigned int j; 415 unsigned int reta_idx_n; 416 417 priv->rxqs = (void *)dev->data->rx_queues; 418 priv->txqs = (void *)dev->data->tx_queues; 419 if (txqs_n != priv->txqs_n) { 420 INFO("%p: TX queues number update: %u -> %u", 421 (void *)dev, priv->txqs_n, txqs_n); 422 priv->txqs_n = txqs_n; 423 } 424 if (rxqs_n > priv->ind_table_max_size) { 425 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 426 return EINVAL; 427 } 428 if (rxqs_n == priv->rxqs_n) 429 return 0; 430 INFO("%p: RX queues number update: %u -> %u", 431 (void *)dev, priv->rxqs_n, rxqs_n); 432 priv->rxqs_n = rxqs_n; 433 /* If the requested number of RX queues is not a power of two, use the 434 * maximum indirection table size for better balancing. 435 * The result is always rounded to the next power of two. */ 436 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 437 priv->ind_table_max_size : 438 rxqs_n)); 439 if (priv_rss_reta_index_resize(priv, reta_idx_n)) 440 return ENOMEM; 441 /* When the number of RX queues is not a power of two, the remaining 442 * table entries are padded with reused WQs and hashes are not spread 443 * uniformly. */ 444 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 445 (*priv->reta_idx)[i] = j; 446 if (++j == rxqs_n) 447 j = 0; 448 } 449 return 0; 450 } 451 452 /** 453 * DPDK callback for Ethernet device configuration. 454 * 455 * @param dev 456 * Pointer to Ethernet device structure. 457 * 458 * @return 459 * 0 on success, negative errno value on failure. 460 */ 461 int 462 mlx5_dev_configure(struct rte_eth_dev *dev) 463 { 464 struct priv *priv = dev->data->dev_private; 465 int ret; 466 467 priv_lock(priv); 468 ret = dev_configure(dev); 469 assert(ret >= 0); 470 priv_unlock(priv); 471 return -ret; 472 } 473 474 /** 475 * DPDK callback to get information about the device. 476 * 477 * @param dev 478 * Pointer to Ethernet device structure. 479 * @param[out] info 480 * Info structure output buffer. 481 */ 482 void 483 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 484 { 485 struct priv *priv = dev->data->dev_private; 486 unsigned int max; 487 char ifname[IF_NAMESIZE]; 488 489 priv_lock(priv); 490 /* FIXME: we should ask the device for these values. */ 491 info->min_rx_bufsize = 32; 492 info->max_rx_pktlen = 65536; 493 /* 494 * Since we need one CQ per QP, the limit is the minimum number 495 * between the two values. 496 */ 497 max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ? 498 priv->device_attr.max_qp : priv->device_attr.max_cq); 499 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 500 if (max >= 65535) 501 max = 65535; 502 info->max_rx_queues = max; 503 info->max_tx_queues = max; 504 info->max_mac_addrs = RTE_DIM(priv->mac); 505 info->rx_offload_capa = 506 (priv->hw_csum ? 507 (DEV_RX_OFFLOAD_IPV4_CKSUM | 508 DEV_RX_OFFLOAD_UDP_CKSUM | 509 DEV_RX_OFFLOAD_TCP_CKSUM) : 510 0); 511 info->tx_offload_capa = 512 (priv->hw_csum ? 513 (DEV_TX_OFFLOAD_IPV4_CKSUM | 514 DEV_TX_OFFLOAD_UDP_CKSUM | 515 DEV_TX_OFFLOAD_TCP_CKSUM) : 516 0); 517 if (priv_get_ifname(priv, &ifname) == 0) 518 info->if_index = if_nametoindex(ifname); 519 /* FIXME: RETA update/query API expects the callee to know the size of 520 * the indirection table, for this PMD the size varies depending on 521 * the number of RX queues, it becomes impossible to find the correct 522 * size if it is not fixed. 523 * The API should be updated to solve this problem. */ 524 info->reta_size = priv->ind_table_max_size; 525 priv_unlock(priv); 526 } 527 528 /** 529 * DPDK callback to retrieve physical link information (unlocked version). 530 * 531 * @param dev 532 * Pointer to Ethernet device structure. 533 * @param wait_to_complete 534 * Wait for request completion (ignored). 535 */ 536 static int 537 mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) 538 { 539 struct priv *priv = dev->data->dev_private; 540 struct ethtool_cmd edata = { 541 .cmd = ETHTOOL_GSET 542 }; 543 struct ifreq ifr; 544 struct rte_eth_link dev_link; 545 int link_speed = 0; 546 547 (void)wait_to_complete; 548 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 549 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 550 return -1; 551 } 552 memset(&dev_link, 0, sizeof(dev_link)); 553 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 554 (ifr.ifr_flags & IFF_RUNNING)); 555 ifr.ifr_data = &edata; 556 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 557 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 558 strerror(errno)); 559 return -1; 560 } 561 link_speed = ethtool_cmd_speed(&edata); 562 if (link_speed == -1) 563 dev_link.link_speed = 0; 564 else 565 dev_link.link_speed = link_speed; 566 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 567 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 568 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 569 /* Link status changed. */ 570 dev->data->dev_link = dev_link; 571 return 0; 572 } 573 /* Link status is still the same. */ 574 return -1; 575 } 576 577 /** 578 * DPDK callback to retrieve physical link information. 579 * 580 * @param dev 581 * Pointer to Ethernet device structure. 582 * @param wait_to_complete 583 * Wait for request completion (ignored). 584 */ 585 int 586 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 587 { 588 struct priv *priv = dev->data->dev_private; 589 int ret; 590 591 priv_lock(priv); 592 ret = mlx5_link_update_unlocked(dev, wait_to_complete); 593 priv_unlock(priv); 594 return ret; 595 } 596 597 /** 598 * DPDK callback to change the MTU. 599 * 600 * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be 601 * received). Use this as a hint to enable/disable scattered packets support 602 * and improve performance when not needed. 603 * Since failure is not an option, reconfiguring queues on the fly is not 604 * recommended. 605 * 606 * @param dev 607 * Pointer to Ethernet device structure. 608 * @param in_mtu 609 * New MTU. 610 * 611 * @return 612 * 0 on success, negative errno value on failure. 613 */ 614 int 615 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 616 { 617 struct priv *priv = dev->data->dev_private; 618 int ret = 0; 619 unsigned int i; 620 uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) = 621 mlx5_rx_burst; 622 623 priv_lock(priv); 624 /* Set kernel interface MTU first. */ 625 if (priv_set_mtu(priv, mtu)) { 626 ret = errno; 627 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 628 strerror(ret)); 629 goto out; 630 } else 631 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 632 priv->mtu = mtu; 633 /* Temporarily replace RX handler with a fake one, assuming it has not 634 * been copied elsewhere. */ 635 dev->rx_pkt_burst = removed_rx_burst; 636 /* Make sure everyone has left mlx5_rx_burst() and uses 637 * removed_rx_burst() instead. */ 638 rte_wmb(); 639 usleep(1000); 640 /* Reconfigure each RX queue. */ 641 for (i = 0; (i != priv->rxqs_n); ++i) { 642 struct rxq *rxq = (*priv->rxqs)[i]; 643 unsigned int max_frame_len; 644 int sp; 645 646 if (rxq == NULL) 647 continue; 648 /* Calculate new maximum frame length according to MTU and 649 * toggle scattered support (sp) if necessary. */ 650 max_frame_len = (priv->mtu + ETHER_HDR_LEN + 651 (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN)); 652 sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM)); 653 /* Provide new values to rxq_setup(). */ 654 dev->data->dev_conf.rxmode.jumbo_frame = sp; 655 dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len; 656 ret = rxq_rehash(dev, rxq); 657 if (ret) { 658 /* Force SP RX if that queue requires it and abort. */ 659 if (rxq->sp) 660 rx_func = mlx5_rx_burst_sp; 661 break; 662 } 663 /* Scattered burst function takes priority. */ 664 if (rxq->sp) 665 rx_func = mlx5_rx_burst_sp; 666 } 667 /* Burst functions can now be called again. */ 668 rte_wmb(); 669 dev->rx_pkt_burst = rx_func; 670 out: 671 priv_unlock(priv); 672 assert(ret >= 0); 673 return -ret; 674 } 675 676 /** 677 * DPDK callback to get flow control status. 678 * 679 * @param dev 680 * Pointer to Ethernet device structure. 681 * @param[out] fc_conf 682 * Flow control output buffer. 683 * 684 * @return 685 * 0 on success, negative errno value on failure. 686 */ 687 int 688 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 689 { 690 struct priv *priv = dev->data->dev_private; 691 struct ifreq ifr; 692 struct ethtool_pauseparam ethpause = { 693 .cmd = ETHTOOL_GPAUSEPARAM 694 }; 695 int ret; 696 697 ifr.ifr_data = ðpause; 698 priv_lock(priv); 699 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 700 ret = errno; 701 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 702 " failed: %s", 703 strerror(ret)); 704 goto out; 705 } 706 707 fc_conf->autoneg = ethpause.autoneg; 708 if (ethpause.rx_pause && ethpause.tx_pause) 709 fc_conf->mode = RTE_FC_FULL; 710 else if (ethpause.rx_pause) 711 fc_conf->mode = RTE_FC_RX_PAUSE; 712 else if (ethpause.tx_pause) 713 fc_conf->mode = RTE_FC_TX_PAUSE; 714 else 715 fc_conf->mode = RTE_FC_NONE; 716 ret = 0; 717 718 out: 719 priv_unlock(priv); 720 assert(ret >= 0); 721 return -ret; 722 } 723 724 /** 725 * DPDK callback to modify flow control parameters. 726 * 727 * @param dev 728 * Pointer to Ethernet device structure. 729 * @param[in] fc_conf 730 * Flow control parameters. 731 * 732 * @return 733 * 0 on success, negative errno value on failure. 734 */ 735 int 736 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 737 { 738 struct priv *priv = dev->data->dev_private; 739 struct ifreq ifr; 740 struct ethtool_pauseparam ethpause = { 741 .cmd = ETHTOOL_SPAUSEPARAM 742 }; 743 int ret; 744 745 ifr.ifr_data = ðpause; 746 ethpause.autoneg = fc_conf->autoneg; 747 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 748 (fc_conf->mode & RTE_FC_RX_PAUSE)) 749 ethpause.rx_pause = 1; 750 else 751 ethpause.rx_pause = 0; 752 753 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 754 (fc_conf->mode & RTE_FC_TX_PAUSE)) 755 ethpause.tx_pause = 1; 756 else 757 ethpause.tx_pause = 0; 758 759 priv_lock(priv); 760 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 761 ret = errno; 762 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 763 " failed: %s", 764 strerror(ret)); 765 goto out; 766 } 767 ret = 0; 768 769 out: 770 priv_unlock(priv); 771 assert(ret >= 0); 772 return -ret; 773 } 774 775 /** 776 * Get PCI information from struct ibv_device. 777 * 778 * @param device 779 * Pointer to Ethernet device structure. 780 * @param[out] pci_addr 781 * PCI bus address output buffer. 782 * 783 * @return 784 * 0 on success, -1 on failure and errno is set. 785 */ 786 int 787 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 788 struct rte_pci_addr *pci_addr) 789 { 790 FILE *file; 791 char line[32]; 792 MKSTR(path, "%s/device/uevent", device->ibdev_path); 793 794 file = fopen(path, "rb"); 795 if (file == NULL) 796 return -1; 797 while (fgets(line, sizeof(line), file) == line) { 798 size_t len = strlen(line); 799 int ret; 800 801 /* Truncate long lines. */ 802 if (len == (sizeof(line) - 1)) 803 while (line[(len - 1)] != '\n') { 804 ret = fgetc(file); 805 if (ret == EOF) 806 break; 807 line[(len - 1)] = ret; 808 } 809 /* Extract information. */ 810 if (sscanf(line, 811 "PCI_SLOT_NAME=" 812 "%" SCNx16 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 813 &pci_addr->domain, 814 &pci_addr->bus, 815 &pci_addr->devid, 816 &pci_addr->function) == 4) { 817 ret = 0; 818 break; 819 } 820 } 821 fclose(file); 822 return 0; 823 } 824 825 /** 826 * Link status handler. 827 * 828 * @param priv 829 * Pointer to private structure. 830 * @param dev 831 * Pointer to the rte_eth_dev structure. 832 * 833 * @return 834 * Nonzero if the callback process can be called immediately. 835 */ 836 static int 837 priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) 838 { 839 struct ibv_async_event event; 840 int port_change = 0; 841 int ret = 0; 842 843 /* Read all message and acknowledge them. */ 844 for (;;) { 845 if (ibv_get_async_event(priv->ctx, &event)) 846 break; 847 848 if (event.event_type == IBV_EVENT_PORT_ACTIVE || 849 event.event_type == IBV_EVENT_PORT_ERR) 850 port_change = 1; 851 else 852 DEBUG("event type %d on port %d not handled", 853 event.event_type, event.element.port_num); 854 ibv_ack_async_event(&event); 855 } 856 857 if (port_change ^ priv->pending_alarm) { 858 struct rte_eth_link *link = &dev->data->dev_link; 859 860 priv->pending_alarm = 0; 861 mlx5_link_update_unlocked(dev, 0); 862 if (((link->link_speed == 0) && link->link_status) || 863 ((link->link_speed != 0) && !link->link_status)) { 864 /* Inconsistent status, check again later. */ 865 priv->pending_alarm = 1; 866 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 867 mlx5_dev_link_status_handler, 868 dev); 869 } else 870 ret = 1; 871 } 872 return ret; 873 } 874 875 /** 876 * Handle delayed link status event. 877 * 878 * @param arg 879 * Registered argument. 880 */ 881 void 882 mlx5_dev_link_status_handler(void *arg) 883 { 884 struct rte_eth_dev *dev = arg; 885 struct priv *priv = dev->data->dev_private; 886 int ret; 887 888 priv_lock(priv); 889 assert(priv->pending_alarm == 1); 890 ret = priv_dev_link_status_handler(priv, dev); 891 priv_unlock(priv); 892 if (ret) 893 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); 894 } 895 896 /** 897 * Handle interrupts from the NIC. 898 * 899 * @param[in] intr_handle 900 * Interrupt handler. 901 * @param cb_arg 902 * Callback argument. 903 */ 904 void 905 mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) 906 { 907 struct rte_eth_dev *dev = cb_arg; 908 struct priv *priv = dev->data->dev_private; 909 int ret; 910 911 (void)intr_handle; 912 priv_lock(priv); 913 ret = priv_dev_link_status_handler(priv, dev); 914 priv_unlock(priv); 915 if (ret) 916 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); 917 } 918 919 /** 920 * Uninstall interrupt handler. 921 * 922 * @param priv 923 * Pointer to private structure. 924 * @param dev 925 * Pointer to the rte_eth_dev structure. 926 */ 927 void 928 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 929 { 930 if (!dev->data->dev_conf.intr_conf.lsc) 931 return; 932 rte_intr_callback_unregister(&priv->intr_handle, 933 mlx5_dev_interrupt_handler, 934 dev); 935 if (priv->pending_alarm) 936 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 937 priv->pending_alarm = 0; 938 priv->intr_handle.fd = 0; 939 priv->intr_handle.type = 0; 940 } 941 942 /** 943 * Install interrupt handler. 944 * 945 * @param priv 946 * Pointer to private structure. 947 * @param dev 948 * Pointer to the rte_eth_dev structure. 949 */ 950 void 951 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 952 { 953 int rc, flags; 954 955 if (!dev->data->dev_conf.intr_conf.lsc) 956 return; 957 assert(priv->ctx->async_fd > 0); 958 flags = fcntl(priv->ctx->async_fd, F_GETFL); 959 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 960 if (rc < 0) { 961 INFO("failed to change file descriptor async event queue"); 962 dev->data->dev_conf.intr_conf.lsc = 0; 963 } else { 964 priv->intr_handle.fd = priv->ctx->async_fd; 965 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 966 rte_intr_callback_register(&priv->intr_handle, 967 mlx5_dev_interrupt_handler, 968 dev); 969 } 970 } 971