1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <unistd.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <dirent.h> 43 #include <net/if.h> 44 #include <sys/ioctl.h> 45 #include <sys/socket.h> 46 #include <netinet/in.h> 47 #include <linux/if.h> 48 #include <linux/ethtool.h> 49 #include <linux/sockios.h> 50 #include <fcntl.h> 51 52 /* DPDK headers don't like -pedantic. */ 53 #ifdef PEDANTIC 54 #pragma GCC diagnostic ignored "-pedantic" 55 #endif 56 #include <rte_atomic.h> 57 #include <rte_ethdev.h> 58 #include <rte_mbuf.h> 59 #include <rte_common.h> 60 #include <rte_interrupts.h> 61 #include <rte_alarm.h> 62 #ifdef PEDANTIC 63 #pragma GCC diagnostic error "-pedantic" 64 #endif 65 66 #include "mlx5.h" 67 #include "mlx5_rxtx.h" 68 #include "mlx5_utils.h" 69 70 /** 71 * Get interface name from private structure. 72 * 73 * @param[in] priv 74 * Pointer to private structure. 75 * @param[out] ifname 76 * Interface name output buffer. 77 * 78 * @return 79 * 0 on success, -1 on failure and errno is set. 80 */ 81 int 82 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 83 { 84 DIR *dir; 85 struct dirent *dent; 86 unsigned int dev_type = 0; 87 unsigned int dev_port_prev = ~0u; 88 char match[IF_NAMESIZE] = ""; 89 90 { 91 MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path); 92 93 dir = opendir(path); 94 if (dir == NULL) 95 return -1; 96 } 97 while ((dent = readdir(dir)) != NULL) { 98 char *name = dent->d_name; 99 FILE *file; 100 unsigned int dev_port; 101 int r; 102 103 if ((name[0] == '.') && 104 ((name[1] == '\0') || 105 ((name[1] == '.') && (name[2] == '\0')))) 106 continue; 107 108 MKSTR(path, "%s/device/net/%s/%s", 109 priv->ctx->device->ibdev_path, name, 110 (dev_type ? "dev_id" : "dev_port")); 111 112 file = fopen(path, "rb"); 113 if (file == NULL) { 114 if (errno != ENOENT) 115 continue; 116 /* 117 * Switch to dev_id when dev_port does not exist as 118 * is the case with Linux kernel versions < 3.15. 119 */ 120 try_dev_id: 121 match[0] = '\0'; 122 if (dev_type) 123 break; 124 dev_type = 1; 125 dev_port_prev = ~0u; 126 rewinddir(dir); 127 continue; 128 } 129 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 130 fclose(file); 131 if (r != 1) 132 continue; 133 /* 134 * Switch to dev_id when dev_port returns the same value for 135 * all ports. May happen when using a MOFED release older than 136 * 3.0 with a Linux kernel >= 3.15. 137 */ 138 if (dev_port == dev_port_prev) 139 goto try_dev_id; 140 dev_port_prev = dev_port; 141 if (dev_port == (priv->port - 1u)) 142 snprintf(match, sizeof(match), "%s", name); 143 } 144 closedir(dir); 145 if (match[0] == '\0') 146 return -1; 147 strncpy(*ifname, match, sizeof(*ifname)); 148 return 0; 149 } 150 151 /** 152 * Read from sysfs entry. 153 * 154 * @param[in] priv 155 * Pointer to private structure. 156 * @param[in] entry 157 * Entry name relative to sysfs path. 158 * @param[out] buf 159 * Data output buffer. 160 * @param size 161 * Buffer size. 162 * 163 * @return 164 * 0 on success, -1 on failure and errno is set. 165 */ 166 static int 167 priv_sysfs_read(const struct priv *priv, const char *entry, 168 char *buf, size_t size) 169 { 170 char ifname[IF_NAMESIZE]; 171 FILE *file; 172 int ret; 173 int err; 174 175 if (priv_get_ifname(priv, &ifname)) 176 return -1; 177 178 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 179 ifname, entry); 180 181 file = fopen(path, "rb"); 182 if (file == NULL) 183 return -1; 184 ret = fread(buf, 1, size, file); 185 err = errno; 186 if (((size_t)ret < size) && (ferror(file))) 187 ret = -1; 188 else 189 ret = size; 190 fclose(file); 191 errno = err; 192 return ret; 193 } 194 195 /** 196 * Write to sysfs entry. 197 * 198 * @param[in] priv 199 * Pointer to private structure. 200 * @param[in] entry 201 * Entry name relative to sysfs path. 202 * @param[in] buf 203 * Data buffer. 204 * @param size 205 * Buffer size. 206 * 207 * @return 208 * 0 on success, -1 on failure and errno is set. 209 */ 210 static int 211 priv_sysfs_write(const struct priv *priv, const char *entry, 212 char *buf, size_t size) 213 { 214 char ifname[IF_NAMESIZE]; 215 FILE *file; 216 int ret; 217 int err; 218 219 if (priv_get_ifname(priv, &ifname)) 220 return -1; 221 222 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 223 ifname, entry); 224 225 file = fopen(path, "wb"); 226 if (file == NULL) 227 return -1; 228 ret = fwrite(buf, 1, size, file); 229 err = errno; 230 if (((size_t)ret < size) || (ferror(file))) 231 ret = -1; 232 else 233 ret = size; 234 fclose(file); 235 errno = err; 236 return ret; 237 } 238 239 /** 240 * Get unsigned long sysfs property. 241 * 242 * @param priv 243 * Pointer to private structure. 244 * @param[in] name 245 * Entry name relative to sysfs path. 246 * @param[out] value 247 * Value output buffer. 248 * 249 * @return 250 * 0 on success, -1 on failure and errno is set. 251 */ 252 static int 253 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 254 { 255 int ret; 256 unsigned long value_ret; 257 char value_str[32]; 258 259 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 260 if (ret == -1) { 261 DEBUG("cannot read %s value from sysfs: %s", 262 name, strerror(errno)); 263 return -1; 264 } 265 value_str[ret] = '\0'; 266 errno = 0; 267 value_ret = strtoul(value_str, NULL, 0); 268 if (errno) { 269 DEBUG("invalid %s value `%s': %s", name, value_str, 270 strerror(errno)); 271 return -1; 272 } 273 *value = value_ret; 274 return 0; 275 } 276 277 /** 278 * Set unsigned long sysfs property. 279 * 280 * @param priv 281 * Pointer to private structure. 282 * @param[in] name 283 * Entry name relative to sysfs path. 284 * @param value 285 * Value to set. 286 * 287 * @return 288 * 0 on success, -1 on failure and errno is set. 289 */ 290 static int 291 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 292 { 293 int ret; 294 MKSTR(value_str, "%lu", value); 295 296 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 297 if (ret == -1) { 298 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 299 name, value_str, value, strerror(errno)); 300 return -1; 301 } 302 return 0; 303 } 304 305 /** 306 * Perform ifreq ioctl() on associated Ethernet device. 307 * 308 * @param[in] priv 309 * Pointer to private structure. 310 * @param req 311 * Request number to pass to ioctl(). 312 * @param[out] ifr 313 * Interface request structure output buffer. 314 * 315 * @return 316 * 0 on success, -1 on failure and errno is set. 317 */ 318 int 319 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 320 { 321 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 322 int ret = -1; 323 324 if (sock == -1) 325 return ret; 326 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 327 ret = ioctl(sock, req, ifr); 328 close(sock); 329 return ret; 330 } 331 332 /** 333 * Get device MTU. 334 * 335 * @param priv 336 * Pointer to private structure. 337 * @param[out] mtu 338 * MTU value output buffer. 339 * 340 * @return 341 * 0 on success, -1 on failure and errno is set. 342 */ 343 int 344 priv_get_mtu(struct priv *priv, uint16_t *mtu) 345 { 346 unsigned long ulong_mtu; 347 348 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 349 return -1; 350 *mtu = ulong_mtu; 351 return 0; 352 } 353 354 /** 355 * Set device MTU. 356 * 357 * @param priv 358 * Pointer to private structure. 359 * @param mtu 360 * MTU value to set. 361 * 362 * @return 363 * 0 on success, -1 on failure and errno is set. 364 */ 365 static int 366 priv_set_mtu(struct priv *priv, uint16_t mtu) 367 { 368 return priv_set_sysfs_ulong(priv, "mtu", mtu); 369 } 370 371 /** 372 * Set device flags. 373 * 374 * @param priv 375 * Pointer to private structure. 376 * @param keep 377 * Bitmask for flags that must remain untouched. 378 * @param flags 379 * Bitmask for flags to modify. 380 * 381 * @return 382 * 0 on success, -1 on failure and errno is set. 383 */ 384 int 385 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 386 { 387 unsigned long tmp; 388 389 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 390 return -1; 391 tmp &= keep; 392 tmp |= flags; 393 return priv_set_sysfs_ulong(priv, "flags", tmp); 394 } 395 396 /** 397 * Ethernet device configuration. 398 * 399 * Prepare the driver for a given number of TX and RX queues. 400 * 401 * @param dev 402 * Pointer to Ethernet device structure. 403 * 404 * @return 405 * 0 on success, errno value on failure. 406 */ 407 static int 408 dev_configure(struct rte_eth_dev *dev) 409 { 410 struct priv *priv = dev->data->dev_private; 411 unsigned int rxqs_n = dev->data->nb_rx_queues; 412 unsigned int txqs_n = dev->data->nb_tx_queues; 413 unsigned int i; 414 unsigned int j; 415 unsigned int reta_idx_n; 416 417 priv->rxqs = (void *)dev->data->rx_queues; 418 priv->txqs = (void *)dev->data->tx_queues; 419 if (txqs_n != priv->txqs_n) { 420 INFO("%p: TX queues number update: %u -> %u", 421 (void *)dev, priv->txqs_n, txqs_n); 422 priv->txqs_n = txqs_n; 423 } 424 if (rxqs_n > priv->ind_table_max_size) { 425 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 426 return EINVAL; 427 } 428 if (rxqs_n == priv->rxqs_n) 429 return 0; 430 INFO("%p: RX queues number update: %u -> %u", 431 (void *)dev, priv->rxqs_n, rxqs_n); 432 priv->rxqs_n = rxqs_n; 433 /* If the requested number of RX queues is not a power of two, use the 434 * maximum indirection table size for better balancing. 435 * The result is always rounded to the next power of two. */ 436 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 437 priv->ind_table_max_size : 438 rxqs_n)); 439 if (priv_rss_reta_index_resize(priv, reta_idx_n)) 440 return ENOMEM; 441 /* When the number of RX queues is not a power of two, the remaining 442 * table entries are padded with reused WQs and hashes are not spread 443 * uniformly. */ 444 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 445 (*priv->reta_idx)[i] = j; 446 if (++j == rxqs_n) 447 j = 0; 448 } 449 return 0; 450 } 451 452 /** 453 * DPDK callback for Ethernet device configuration. 454 * 455 * @param dev 456 * Pointer to Ethernet device structure. 457 * 458 * @return 459 * 0 on success, negative errno value on failure. 460 */ 461 int 462 mlx5_dev_configure(struct rte_eth_dev *dev) 463 { 464 struct priv *priv = dev->data->dev_private; 465 int ret; 466 467 priv_lock(priv); 468 ret = dev_configure(dev); 469 assert(ret >= 0); 470 priv_unlock(priv); 471 return -ret; 472 } 473 474 /** 475 * DPDK callback to get information about the device. 476 * 477 * @param dev 478 * Pointer to Ethernet device structure. 479 * @param[out] info 480 * Info structure output buffer. 481 */ 482 void 483 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 484 { 485 struct priv *priv = dev->data->dev_private; 486 unsigned int max; 487 char ifname[IF_NAMESIZE]; 488 489 priv_lock(priv); 490 /* FIXME: we should ask the device for these values. */ 491 info->min_rx_bufsize = 32; 492 info->max_rx_pktlen = 65536; 493 /* 494 * Since we need one CQ per QP, the limit is the minimum number 495 * between the two values. 496 */ 497 max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ? 498 priv->device_attr.max_qp : priv->device_attr.max_cq); 499 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 500 if (max >= 65535) 501 max = 65535; 502 info->max_rx_queues = max; 503 info->max_tx_queues = max; 504 info->max_mac_addrs = RTE_DIM(priv->mac); 505 info->rx_offload_capa = 506 (priv->hw_csum ? 507 (DEV_RX_OFFLOAD_IPV4_CKSUM | 508 DEV_RX_OFFLOAD_UDP_CKSUM | 509 DEV_RX_OFFLOAD_TCP_CKSUM) : 510 0); 511 info->tx_offload_capa = 512 (priv->hw_csum ? 513 (DEV_TX_OFFLOAD_IPV4_CKSUM | 514 DEV_TX_OFFLOAD_UDP_CKSUM | 515 DEV_TX_OFFLOAD_TCP_CKSUM) : 516 0); 517 if (priv_get_ifname(priv, &ifname) == 0) 518 info->if_index = if_nametoindex(ifname); 519 /* FIXME: RETA update/query API expects the callee to know the size of 520 * the indirection table, for this PMD the size varies depending on 521 * the number of RX queues, it becomes impossible to find the correct 522 * size if it is not fixed. 523 * The API should be updated to solve this problem. */ 524 info->reta_size = priv->ind_table_max_size; 525 priv_unlock(priv); 526 } 527 528 const uint32_t * 529 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 530 { 531 static const uint32_t ptypes[] = { 532 /* refers to rxq_cq_to_pkt_type() */ 533 RTE_PTYPE_L3_IPV4, 534 RTE_PTYPE_L3_IPV6, 535 RTE_PTYPE_INNER_L3_IPV4, 536 RTE_PTYPE_INNER_L3_IPV6, 537 RTE_PTYPE_UNKNOWN 538 539 }; 540 541 if (dev->rx_pkt_burst == mlx5_rx_burst || 542 dev->rx_pkt_burst == mlx5_rx_burst_sp) 543 return ptypes; 544 return NULL; 545 } 546 547 /** 548 * DPDK callback to retrieve physical link information (unlocked version). 549 * 550 * @param dev 551 * Pointer to Ethernet device structure. 552 * @param wait_to_complete 553 * Wait for request completion (ignored). 554 */ 555 static int 556 mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) 557 { 558 struct priv *priv = dev->data->dev_private; 559 struct ethtool_cmd edata = { 560 .cmd = ETHTOOL_GSET 561 }; 562 struct ifreq ifr; 563 struct rte_eth_link dev_link; 564 int link_speed = 0; 565 566 (void)wait_to_complete; 567 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 568 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 569 return -1; 570 } 571 memset(&dev_link, 0, sizeof(dev_link)); 572 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 573 (ifr.ifr_flags & IFF_RUNNING)); 574 ifr.ifr_data = &edata; 575 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 576 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 577 strerror(errno)); 578 return -1; 579 } 580 link_speed = ethtool_cmd_speed(&edata); 581 if (link_speed == -1) 582 dev_link.link_speed = 0; 583 else 584 dev_link.link_speed = link_speed; 585 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 586 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 587 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 588 /* Link status changed. */ 589 dev->data->dev_link = dev_link; 590 return 0; 591 } 592 /* Link status is still the same. */ 593 return -1; 594 } 595 596 /** 597 * DPDK callback to retrieve physical link information. 598 * 599 * @param dev 600 * Pointer to Ethernet device structure. 601 * @param wait_to_complete 602 * Wait for request completion (ignored). 603 */ 604 int 605 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 606 { 607 struct priv *priv = dev->data->dev_private; 608 int ret; 609 610 priv_lock(priv); 611 ret = mlx5_link_update_unlocked(dev, wait_to_complete); 612 priv_unlock(priv); 613 return ret; 614 } 615 616 /** 617 * DPDK callback to change the MTU. 618 * 619 * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be 620 * received). Use this as a hint to enable/disable scattered packets support 621 * and improve performance when not needed. 622 * Since failure is not an option, reconfiguring queues on the fly is not 623 * recommended. 624 * 625 * @param dev 626 * Pointer to Ethernet device structure. 627 * @param in_mtu 628 * New MTU. 629 * 630 * @return 631 * 0 on success, negative errno value on failure. 632 */ 633 int 634 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 635 { 636 struct priv *priv = dev->data->dev_private; 637 int ret = 0; 638 unsigned int i; 639 uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) = 640 mlx5_rx_burst; 641 642 priv_lock(priv); 643 /* Set kernel interface MTU first. */ 644 if (priv_set_mtu(priv, mtu)) { 645 ret = errno; 646 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 647 strerror(ret)); 648 goto out; 649 } else 650 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 651 priv->mtu = mtu; 652 /* Temporarily replace RX handler with a fake one, assuming it has not 653 * been copied elsewhere. */ 654 dev->rx_pkt_burst = removed_rx_burst; 655 /* Make sure everyone has left mlx5_rx_burst() and uses 656 * removed_rx_burst() instead. */ 657 rte_wmb(); 658 usleep(1000); 659 /* Reconfigure each RX queue. */ 660 for (i = 0; (i != priv->rxqs_n); ++i) { 661 struct rxq *rxq = (*priv->rxqs)[i]; 662 unsigned int max_frame_len; 663 int sp; 664 665 if (rxq == NULL) 666 continue; 667 /* Calculate new maximum frame length according to MTU and 668 * toggle scattered support (sp) if necessary. */ 669 max_frame_len = (priv->mtu + ETHER_HDR_LEN + 670 (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN)); 671 sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM)); 672 /* Provide new values to rxq_setup(). */ 673 dev->data->dev_conf.rxmode.jumbo_frame = sp; 674 dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len; 675 ret = rxq_rehash(dev, rxq); 676 if (ret) { 677 /* Force SP RX if that queue requires it and abort. */ 678 if (rxq->sp) 679 rx_func = mlx5_rx_burst_sp; 680 break; 681 } 682 /* Scattered burst function takes priority. */ 683 if (rxq->sp) 684 rx_func = mlx5_rx_burst_sp; 685 } 686 /* Burst functions can now be called again. */ 687 rte_wmb(); 688 dev->rx_pkt_burst = rx_func; 689 out: 690 priv_unlock(priv); 691 assert(ret >= 0); 692 return -ret; 693 } 694 695 /** 696 * DPDK callback to get flow control status. 697 * 698 * @param dev 699 * Pointer to Ethernet device structure. 700 * @param[out] fc_conf 701 * Flow control output buffer. 702 * 703 * @return 704 * 0 on success, negative errno value on failure. 705 */ 706 int 707 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 708 { 709 struct priv *priv = dev->data->dev_private; 710 struct ifreq ifr; 711 struct ethtool_pauseparam ethpause = { 712 .cmd = ETHTOOL_GPAUSEPARAM 713 }; 714 int ret; 715 716 ifr.ifr_data = ðpause; 717 priv_lock(priv); 718 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 719 ret = errno; 720 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 721 " failed: %s", 722 strerror(ret)); 723 goto out; 724 } 725 726 fc_conf->autoneg = ethpause.autoneg; 727 if (ethpause.rx_pause && ethpause.tx_pause) 728 fc_conf->mode = RTE_FC_FULL; 729 else if (ethpause.rx_pause) 730 fc_conf->mode = RTE_FC_RX_PAUSE; 731 else if (ethpause.tx_pause) 732 fc_conf->mode = RTE_FC_TX_PAUSE; 733 else 734 fc_conf->mode = RTE_FC_NONE; 735 ret = 0; 736 737 out: 738 priv_unlock(priv); 739 assert(ret >= 0); 740 return -ret; 741 } 742 743 /** 744 * DPDK callback to modify flow control parameters. 745 * 746 * @param dev 747 * Pointer to Ethernet device structure. 748 * @param[in] fc_conf 749 * Flow control parameters. 750 * 751 * @return 752 * 0 on success, negative errno value on failure. 753 */ 754 int 755 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 756 { 757 struct priv *priv = dev->data->dev_private; 758 struct ifreq ifr; 759 struct ethtool_pauseparam ethpause = { 760 .cmd = ETHTOOL_SPAUSEPARAM 761 }; 762 int ret; 763 764 ifr.ifr_data = ðpause; 765 ethpause.autoneg = fc_conf->autoneg; 766 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 767 (fc_conf->mode & RTE_FC_RX_PAUSE)) 768 ethpause.rx_pause = 1; 769 else 770 ethpause.rx_pause = 0; 771 772 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 773 (fc_conf->mode & RTE_FC_TX_PAUSE)) 774 ethpause.tx_pause = 1; 775 else 776 ethpause.tx_pause = 0; 777 778 priv_lock(priv); 779 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 780 ret = errno; 781 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 782 " failed: %s", 783 strerror(ret)); 784 goto out; 785 } 786 ret = 0; 787 788 out: 789 priv_unlock(priv); 790 assert(ret >= 0); 791 return -ret; 792 } 793 794 /** 795 * Get PCI information from struct ibv_device. 796 * 797 * @param device 798 * Pointer to Ethernet device structure. 799 * @param[out] pci_addr 800 * PCI bus address output buffer. 801 * 802 * @return 803 * 0 on success, -1 on failure and errno is set. 804 */ 805 int 806 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 807 struct rte_pci_addr *pci_addr) 808 { 809 FILE *file; 810 char line[32]; 811 MKSTR(path, "%s/device/uevent", device->ibdev_path); 812 813 file = fopen(path, "rb"); 814 if (file == NULL) 815 return -1; 816 while (fgets(line, sizeof(line), file) == line) { 817 size_t len = strlen(line); 818 int ret; 819 820 /* Truncate long lines. */ 821 if (len == (sizeof(line) - 1)) 822 while (line[(len - 1)] != '\n') { 823 ret = fgetc(file); 824 if (ret == EOF) 825 break; 826 line[(len - 1)] = ret; 827 } 828 /* Extract information. */ 829 if (sscanf(line, 830 "PCI_SLOT_NAME=" 831 "%" SCNx16 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 832 &pci_addr->domain, 833 &pci_addr->bus, 834 &pci_addr->devid, 835 &pci_addr->function) == 4) { 836 ret = 0; 837 break; 838 } 839 } 840 fclose(file); 841 return 0; 842 } 843 844 /** 845 * Link status handler. 846 * 847 * @param priv 848 * Pointer to private structure. 849 * @param dev 850 * Pointer to the rte_eth_dev structure. 851 * 852 * @return 853 * Nonzero if the callback process can be called immediately. 854 */ 855 static int 856 priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) 857 { 858 struct ibv_async_event event; 859 int port_change = 0; 860 int ret = 0; 861 862 /* Read all message and acknowledge them. */ 863 for (;;) { 864 if (ibv_get_async_event(priv->ctx, &event)) 865 break; 866 867 if (event.event_type == IBV_EVENT_PORT_ACTIVE || 868 event.event_type == IBV_EVENT_PORT_ERR) 869 port_change = 1; 870 else 871 DEBUG("event type %d on port %d not handled", 872 event.event_type, event.element.port_num); 873 ibv_ack_async_event(&event); 874 } 875 876 if (port_change ^ priv->pending_alarm) { 877 struct rte_eth_link *link = &dev->data->dev_link; 878 879 priv->pending_alarm = 0; 880 mlx5_link_update_unlocked(dev, 0); 881 if (((link->link_speed == 0) && link->link_status) || 882 ((link->link_speed != 0) && !link->link_status)) { 883 /* Inconsistent status, check again later. */ 884 priv->pending_alarm = 1; 885 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 886 mlx5_dev_link_status_handler, 887 dev); 888 } else 889 ret = 1; 890 } 891 return ret; 892 } 893 894 /** 895 * Handle delayed link status event. 896 * 897 * @param arg 898 * Registered argument. 899 */ 900 void 901 mlx5_dev_link_status_handler(void *arg) 902 { 903 struct rte_eth_dev *dev = arg; 904 struct priv *priv = dev->data->dev_private; 905 int ret; 906 907 priv_lock(priv); 908 assert(priv->pending_alarm == 1); 909 ret = priv_dev_link_status_handler(priv, dev); 910 priv_unlock(priv); 911 if (ret) 912 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); 913 } 914 915 /** 916 * Handle interrupts from the NIC. 917 * 918 * @param[in] intr_handle 919 * Interrupt handler. 920 * @param cb_arg 921 * Callback argument. 922 */ 923 void 924 mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) 925 { 926 struct rte_eth_dev *dev = cb_arg; 927 struct priv *priv = dev->data->dev_private; 928 int ret; 929 930 (void)intr_handle; 931 priv_lock(priv); 932 ret = priv_dev_link_status_handler(priv, dev); 933 priv_unlock(priv); 934 if (ret) 935 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); 936 } 937 938 /** 939 * Uninstall interrupt handler. 940 * 941 * @param priv 942 * Pointer to private structure. 943 * @param dev 944 * Pointer to the rte_eth_dev structure. 945 */ 946 void 947 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 948 { 949 if (!dev->data->dev_conf.intr_conf.lsc) 950 return; 951 rte_intr_callback_unregister(&priv->intr_handle, 952 mlx5_dev_interrupt_handler, 953 dev); 954 if (priv->pending_alarm) 955 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 956 priv->pending_alarm = 0; 957 priv->intr_handle.fd = 0; 958 priv->intr_handle.type = 0; 959 } 960 961 /** 962 * Install interrupt handler. 963 * 964 * @param priv 965 * Pointer to private structure. 966 * @param dev 967 * Pointer to the rte_eth_dev structure. 968 */ 969 void 970 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 971 { 972 int rc, flags; 973 974 if (!dev->data->dev_conf.intr_conf.lsc) 975 return; 976 assert(priv->ctx->async_fd > 0); 977 flags = fcntl(priv->ctx->async_fd, F_GETFL); 978 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 979 if (rc < 0) { 980 INFO("failed to change file descriptor async event queue"); 981 dev->data->dev_conf.intr_conf.lsc = 0; 982 } else { 983 priv->intr_handle.fd = priv->ctx->async_fd; 984 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 985 rte_intr_callback_register(&priv->intr_handle, 986 mlx5_dev_interrupt_handler, 987 dev); 988 } 989 } 990