1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <unistd.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <dirent.h> 43 #include <net/if.h> 44 #include <sys/ioctl.h> 45 #include <sys/socket.h> 46 #include <netinet/in.h> 47 #include <linux/if.h> 48 #include <linux/ethtool.h> 49 #include <linux/sockios.h> 50 #include <fcntl.h> 51 52 /* DPDK headers don't like -pedantic. */ 53 #ifdef PEDANTIC 54 #pragma GCC diagnostic ignored "-pedantic" 55 #endif 56 #include <rte_atomic.h> 57 #include <rte_ethdev.h> 58 #include <rte_mbuf.h> 59 #include <rte_common.h> 60 #include <rte_interrupts.h> 61 #include <rte_alarm.h> 62 #ifdef PEDANTIC 63 #pragma GCC diagnostic error "-pedantic" 64 #endif 65 66 #include "mlx5.h" 67 #include "mlx5_rxtx.h" 68 #include "mlx5_utils.h" 69 70 /** 71 * Get interface name from private structure. 72 * 73 * @param[in] priv 74 * Pointer to private structure. 75 * @param[out] ifname 76 * Interface name output buffer. 77 * 78 * @return 79 * 0 on success, -1 on failure and errno is set. 80 */ 81 int 82 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 83 { 84 DIR *dir; 85 struct dirent *dent; 86 unsigned int dev_type = 0; 87 unsigned int dev_port_prev = ~0u; 88 char match[IF_NAMESIZE] = ""; 89 90 { 91 MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path); 92 93 dir = opendir(path); 94 if (dir == NULL) 95 return -1; 96 } 97 while ((dent = readdir(dir)) != NULL) { 98 char *name = dent->d_name; 99 FILE *file; 100 unsigned int dev_port; 101 int r; 102 103 if ((name[0] == '.') && 104 ((name[1] == '\0') || 105 ((name[1] == '.') && (name[2] == '\0')))) 106 continue; 107 108 MKSTR(path, "%s/device/net/%s/%s", 109 priv->ctx->device->ibdev_path, name, 110 (dev_type ? "dev_id" : "dev_port")); 111 112 file = fopen(path, "rb"); 113 if (file == NULL) { 114 if (errno != ENOENT) 115 continue; 116 /* 117 * Switch to dev_id when dev_port does not exist as 118 * is the case with Linux kernel versions < 3.15. 119 */ 120 try_dev_id: 121 match[0] = '\0'; 122 if (dev_type) 123 break; 124 dev_type = 1; 125 dev_port_prev = ~0u; 126 rewinddir(dir); 127 continue; 128 } 129 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 130 fclose(file); 131 if (r != 1) 132 continue; 133 /* 134 * Switch to dev_id when dev_port returns the same value for 135 * all ports. May happen when using a MOFED release older than 136 * 3.0 with a Linux kernel >= 3.15. 137 */ 138 if (dev_port == dev_port_prev) 139 goto try_dev_id; 140 dev_port_prev = dev_port; 141 if (dev_port == (priv->port - 1u)) 142 snprintf(match, sizeof(match), "%s", name); 143 } 144 closedir(dir); 145 if (match[0] == '\0') 146 return -1; 147 strncpy(*ifname, match, sizeof(*ifname)); 148 return 0; 149 } 150 151 /** 152 * Read from sysfs entry. 153 * 154 * @param[in] priv 155 * Pointer to private structure. 156 * @param[in] entry 157 * Entry name relative to sysfs path. 158 * @param[out] buf 159 * Data output buffer. 160 * @param size 161 * Buffer size. 162 * 163 * @return 164 * 0 on success, -1 on failure and errno is set. 165 */ 166 static int 167 priv_sysfs_read(const struct priv *priv, const char *entry, 168 char *buf, size_t size) 169 { 170 char ifname[IF_NAMESIZE]; 171 FILE *file; 172 int ret; 173 int err; 174 175 if (priv_get_ifname(priv, &ifname)) 176 return -1; 177 178 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 179 ifname, entry); 180 181 file = fopen(path, "rb"); 182 if (file == NULL) 183 return -1; 184 ret = fread(buf, 1, size, file); 185 err = errno; 186 if (((size_t)ret < size) && (ferror(file))) 187 ret = -1; 188 else 189 ret = size; 190 fclose(file); 191 errno = err; 192 return ret; 193 } 194 195 /** 196 * Write to sysfs entry. 197 * 198 * @param[in] priv 199 * Pointer to private structure. 200 * @param[in] entry 201 * Entry name relative to sysfs path. 202 * @param[in] buf 203 * Data buffer. 204 * @param size 205 * Buffer size. 206 * 207 * @return 208 * 0 on success, -1 on failure and errno is set. 209 */ 210 static int 211 priv_sysfs_write(const struct priv *priv, const char *entry, 212 char *buf, size_t size) 213 { 214 char ifname[IF_NAMESIZE]; 215 FILE *file; 216 int ret; 217 int err; 218 219 if (priv_get_ifname(priv, &ifname)) 220 return -1; 221 222 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 223 ifname, entry); 224 225 file = fopen(path, "wb"); 226 if (file == NULL) 227 return -1; 228 ret = fwrite(buf, 1, size, file); 229 err = errno; 230 if (((size_t)ret < size) || (ferror(file))) 231 ret = -1; 232 else 233 ret = size; 234 fclose(file); 235 errno = err; 236 return ret; 237 } 238 239 /** 240 * Get unsigned long sysfs property. 241 * 242 * @param priv 243 * Pointer to private structure. 244 * @param[in] name 245 * Entry name relative to sysfs path. 246 * @param[out] value 247 * Value output buffer. 248 * 249 * @return 250 * 0 on success, -1 on failure and errno is set. 251 */ 252 static int 253 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 254 { 255 int ret; 256 unsigned long value_ret; 257 char value_str[32]; 258 259 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 260 if (ret == -1) { 261 DEBUG("cannot read %s value from sysfs: %s", 262 name, strerror(errno)); 263 return -1; 264 } 265 value_str[ret] = '\0'; 266 errno = 0; 267 value_ret = strtoul(value_str, NULL, 0); 268 if (errno) { 269 DEBUG("invalid %s value `%s': %s", name, value_str, 270 strerror(errno)); 271 return -1; 272 } 273 *value = value_ret; 274 return 0; 275 } 276 277 /** 278 * Set unsigned long sysfs property. 279 * 280 * @param priv 281 * Pointer to private structure. 282 * @param[in] name 283 * Entry name relative to sysfs path. 284 * @param value 285 * Value to set. 286 * 287 * @return 288 * 0 on success, -1 on failure and errno is set. 289 */ 290 static int 291 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 292 { 293 int ret; 294 MKSTR(value_str, "%lu", value); 295 296 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 297 if (ret == -1) { 298 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 299 name, value_str, value, strerror(errno)); 300 return -1; 301 } 302 return 0; 303 } 304 305 /** 306 * Perform ifreq ioctl() on associated Ethernet device. 307 * 308 * @param[in] priv 309 * Pointer to private structure. 310 * @param req 311 * Request number to pass to ioctl(). 312 * @param[out] ifr 313 * Interface request structure output buffer. 314 * 315 * @return 316 * 0 on success, -1 on failure and errno is set. 317 */ 318 int 319 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 320 { 321 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 322 int ret = -1; 323 324 if (sock == -1) 325 return ret; 326 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 327 ret = ioctl(sock, req, ifr); 328 close(sock); 329 return ret; 330 } 331 332 /** 333 * Get device MTU. 334 * 335 * @param priv 336 * Pointer to private structure. 337 * @param[out] mtu 338 * MTU value output buffer. 339 * 340 * @return 341 * 0 on success, -1 on failure and errno is set. 342 */ 343 int 344 priv_get_mtu(struct priv *priv, uint16_t *mtu) 345 { 346 unsigned long ulong_mtu; 347 348 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 349 return -1; 350 *mtu = ulong_mtu; 351 return 0; 352 } 353 354 /** 355 * Set device MTU. 356 * 357 * @param priv 358 * Pointer to private structure. 359 * @param mtu 360 * MTU value to set. 361 * 362 * @return 363 * 0 on success, -1 on failure and errno is set. 364 */ 365 static int 366 priv_set_mtu(struct priv *priv, uint16_t mtu) 367 { 368 return priv_set_sysfs_ulong(priv, "mtu", mtu); 369 } 370 371 /** 372 * Set device flags. 373 * 374 * @param priv 375 * Pointer to private structure. 376 * @param keep 377 * Bitmask for flags that must remain untouched. 378 * @param flags 379 * Bitmask for flags to modify. 380 * 381 * @return 382 * 0 on success, -1 on failure and errno is set. 383 */ 384 int 385 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 386 { 387 unsigned long tmp; 388 389 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 390 return -1; 391 tmp &= keep; 392 tmp |= flags; 393 return priv_set_sysfs_ulong(priv, "flags", tmp); 394 } 395 396 /** 397 * Ethernet device configuration. 398 * 399 * Prepare the driver for a given number of TX and RX queues. 400 * 401 * @param dev 402 * Pointer to Ethernet device structure. 403 * 404 * @return 405 * 0 on success, errno value on failure. 406 */ 407 static int 408 dev_configure(struct rte_eth_dev *dev) 409 { 410 struct priv *priv = dev->data->dev_private; 411 unsigned int rxqs_n = dev->data->nb_rx_queues; 412 unsigned int txqs_n = dev->data->nb_tx_queues; 413 unsigned int i; 414 unsigned int j; 415 unsigned int reta_idx_n; 416 417 priv->rxqs = (void *)dev->data->rx_queues; 418 priv->txqs = (void *)dev->data->tx_queues; 419 if (txqs_n != priv->txqs_n) { 420 INFO("%p: TX queues number update: %u -> %u", 421 (void *)dev, priv->txqs_n, txqs_n); 422 priv->txqs_n = txqs_n; 423 } 424 if (rxqs_n > priv->ind_table_max_size) { 425 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 426 return EINVAL; 427 } 428 if (rxqs_n == priv->rxqs_n) 429 return 0; 430 INFO("%p: RX queues number update: %u -> %u", 431 (void *)dev, priv->rxqs_n, rxqs_n); 432 priv->rxqs_n = rxqs_n; 433 /* If the requested number of RX queues is not a power of two, use the 434 * maximum indirection table size for better balancing. 435 * The result is always rounded to the next power of two. */ 436 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 437 priv->ind_table_max_size : 438 rxqs_n)); 439 if (priv_rss_reta_index_resize(priv, reta_idx_n)) 440 return ENOMEM; 441 /* When the number of RX queues is not a power of two, the remaining 442 * table entries are padded with reused WQs and hashes are not spread 443 * uniformly. */ 444 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 445 (*priv->reta_idx)[i] = j; 446 if (++j == rxqs_n) 447 j = 0; 448 } 449 return 0; 450 } 451 452 /** 453 * DPDK callback for Ethernet device configuration. 454 * 455 * @param dev 456 * Pointer to Ethernet device structure. 457 * 458 * @return 459 * 0 on success, negative errno value on failure. 460 */ 461 int 462 mlx5_dev_configure(struct rte_eth_dev *dev) 463 { 464 struct priv *priv = dev->data->dev_private; 465 int ret; 466 467 priv_lock(priv); 468 ret = dev_configure(dev); 469 assert(ret >= 0); 470 priv_unlock(priv); 471 return -ret; 472 } 473 474 /** 475 * DPDK callback to get information about the device. 476 * 477 * @param dev 478 * Pointer to Ethernet device structure. 479 * @param[out] info 480 * Info structure output buffer. 481 */ 482 void 483 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 484 { 485 struct priv *priv = dev->data->dev_private; 486 unsigned int max; 487 char ifname[IF_NAMESIZE]; 488 489 priv_lock(priv); 490 /* FIXME: we should ask the device for these values. */ 491 info->min_rx_bufsize = 32; 492 info->max_rx_pktlen = 65536; 493 /* 494 * Since we need one CQ per QP, the limit is the minimum number 495 * between the two values. 496 */ 497 max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ? 498 priv->device_attr.max_qp : priv->device_attr.max_cq); 499 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 500 if (max >= 65535) 501 max = 65535; 502 info->max_rx_queues = max; 503 info->max_tx_queues = max; 504 /* Last array entry is reserved for broadcast. */ 505 info->max_mac_addrs = (RTE_DIM(priv->mac) - 1); 506 info->rx_offload_capa = 507 (priv->hw_csum ? 508 (DEV_RX_OFFLOAD_IPV4_CKSUM | 509 DEV_RX_OFFLOAD_UDP_CKSUM | 510 DEV_RX_OFFLOAD_TCP_CKSUM) : 511 0); 512 info->tx_offload_capa = 513 (priv->hw_csum ? 514 (DEV_TX_OFFLOAD_IPV4_CKSUM | 515 DEV_TX_OFFLOAD_UDP_CKSUM | 516 DEV_TX_OFFLOAD_TCP_CKSUM) : 517 0); 518 if (priv_get_ifname(priv, &ifname) == 0) 519 info->if_index = if_nametoindex(ifname); 520 /* FIXME: RETA update/query API expects the callee to know the size of 521 * the indirection table, for this PMD the size varies depending on 522 * the number of RX queues, it becomes impossible to find the correct 523 * size if it is not fixed. 524 * The API should be updated to solve this problem. */ 525 info->reta_size = priv->ind_table_max_size; 526 priv_unlock(priv); 527 } 528 529 /** 530 * DPDK callback to retrieve physical link information (unlocked version). 531 * 532 * @param dev 533 * Pointer to Ethernet device structure. 534 * @param wait_to_complete 535 * Wait for request completion (ignored). 536 */ 537 static int 538 mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) 539 { 540 struct priv *priv = dev->data->dev_private; 541 struct ethtool_cmd edata = { 542 .cmd = ETHTOOL_GSET 543 }; 544 struct ifreq ifr; 545 struct rte_eth_link dev_link; 546 int link_speed = 0; 547 548 (void)wait_to_complete; 549 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 550 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 551 return -1; 552 } 553 memset(&dev_link, 0, sizeof(dev_link)); 554 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 555 (ifr.ifr_flags & IFF_RUNNING)); 556 ifr.ifr_data = &edata; 557 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 558 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 559 strerror(errno)); 560 return -1; 561 } 562 link_speed = ethtool_cmd_speed(&edata); 563 if (link_speed == -1) 564 dev_link.link_speed = 0; 565 else 566 dev_link.link_speed = link_speed; 567 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 568 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 569 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 570 /* Link status changed. */ 571 dev->data->dev_link = dev_link; 572 return 0; 573 } 574 /* Link status is still the same. */ 575 return -1; 576 } 577 578 /** 579 * DPDK callback to retrieve physical link information. 580 * 581 * @param dev 582 * Pointer to Ethernet device structure. 583 * @param wait_to_complete 584 * Wait for request completion (ignored). 585 */ 586 int 587 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 588 { 589 struct priv *priv = dev->data->dev_private; 590 int ret; 591 592 priv_lock(priv); 593 ret = mlx5_link_update_unlocked(dev, wait_to_complete); 594 priv_unlock(priv); 595 return ret; 596 } 597 598 /** 599 * DPDK callback to change the MTU. 600 * 601 * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be 602 * received). Use this as a hint to enable/disable scattered packets support 603 * and improve performance when not needed. 604 * Since failure is not an option, reconfiguring queues on the fly is not 605 * recommended. 606 * 607 * @param dev 608 * Pointer to Ethernet device structure. 609 * @param in_mtu 610 * New MTU. 611 * 612 * @return 613 * 0 on success, negative errno value on failure. 614 */ 615 int 616 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 617 { 618 struct priv *priv = dev->data->dev_private; 619 int ret = 0; 620 unsigned int i; 621 uint16_t (*rx_func)(void *, struct rte_mbuf **, uint16_t) = 622 mlx5_rx_burst; 623 624 priv_lock(priv); 625 /* Set kernel interface MTU first. */ 626 if (priv_set_mtu(priv, mtu)) { 627 ret = errno; 628 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 629 strerror(ret)); 630 goto out; 631 } else 632 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 633 priv->mtu = mtu; 634 /* Temporarily replace RX handler with a fake one, assuming it has not 635 * been copied elsewhere. */ 636 dev->rx_pkt_burst = removed_rx_burst; 637 /* Make sure everyone has left mlx5_rx_burst() and uses 638 * removed_rx_burst() instead. */ 639 rte_wmb(); 640 usleep(1000); 641 /* Reconfigure each RX queue. */ 642 for (i = 0; (i != priv->rxqs_n); ++i) { 643 struct rxq *rxq = (*priv->rxqs)[i]; 644 unsigned int max_frame_len; 645 int sp; 646 647 if (rxq == NULL) 648 continue; 649 /* Calculate new maximum frame length according to MTU and 650 * toggle scattered support (sp) if necessary. */ 651 max_frame_len = (priv->mtu + ETHER_HDR_LEN + 652 (ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN)); 653 sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM)); 654 /* Provide new values to rxq_setup(). */ 655 dev->data->dev_conf.rxmode.jumbo_frame = sp; 656 dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len; 657 ret = rxq_rehash(dev, rxq); 658 if (ret) { 659 /* Force SP RX if that queue requires it and abort. */ 660 if (rxq->sp) 661 rx_func = mlx5_rx_burst_sp; 662 break; 663 } 664 /* Scattered burst function takes priority. */ 665 if (rxq->sp) 666 rx_func = mlx5_rx_burst_sp; 667 } 668 /* Burst functions can now be called again. */ 669 rte_wmb(); 670 dev->rx_pkt_burst = rx_func; 671 out: 672 priv_unlock(priv); 673 assert(ret >= 0); 674 return -ret; 675 } 676 677 /** 678 * DPDK callback to get flow control status. 679 * 680 * @param dev 681 * Pointer to Ethernet device structure. 682 * @param[out] fc_conf 683 * Flow control output buffer. 684 * 685 * @return 686 * 0 on success, negative errno value on failure. 687 */ 688 int 689 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 690 { 691 struct priv *priv = dev->data->dev_private; 692 struct ifreq ifr; 693 struct ethtool_pauseparam ethpause = { 694 .cmd = ETHTOOL_GPAUSEPARAM 695 }; 696 int ret; 697 698 ifr.ifr_data = ðpause; 699 priv_lock(priv); 700 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 701 ret = errno; 702 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 703 " failed: %s", 704 strerror(ret)); 705 goto out; 706 } 707 708 fc_conf->autoneg = ethpause.autoneg; 709 if (ethpause.rx_pause && ethpause.tx_pause) 710 fc_conf->mode = RTE_FC_FULL; 711 else if (ethpause.rx_pause) 712 fc_conf->mode = RTE_FC_RX_PAUSE; 713 else if (ethpause.tx_pause) 714 fc_conf->mode = RTE_FC_TX_PAUSE; 715 else 716 fc_conf->mode = RTE_FC_NONE; 717 ret = 0; 718 719 out: 720 priv_unlock(priv); 721 assert(ret >= 0); 722 return -ret; 723 } 724 725 /** 726 * DPDK callback to modify flow control parameters. 727 * 728 * @param dev 729 * Pointer to Ethernet device structure. 730 * @param[in] fc_conf 731 * Flow control parameters. 732 * 733 * @return 734 * 0 on success, negative errno value on failure. 735 */ 736 int 737 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 738 { 739 struct priv *priv = dev->data->dev_private; 740 struct ifreq ifr; 741 struct ethtool_pauseparam ethpause = { 742 .cmd = ETHTOOL_SPAUSEPARAM 743 }; 744 int ret; 745 746 ifr.ifr_data = ðpause; 747 ethpause.autoneg = fc_conf->autoneg; 748 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 749 (fc_conf->mode & RTE_FC_RX_PAUSE)) 750 ethpause.rx_pause = 1; 751 else 752 ethpause.rx_pause = 0; 753 754 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 755 (fc_conf->mode & RTE_FC_TX_PAUSE)) 756 ethpause.tx_pause = 1; 757 else 758 ethpause.tx_pause = 0; 759 760 priv_lock(priv); 761 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 762 ret = errno; 763 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 764 " failed: %s", 765 strerror(ret)); 766 goto out; 767 } 768 ret = 0; 769 770 out: 771 priv_unlock(priv); 772 assert(ret >= 0); 773 return -ret; 774 } 775 776 /** 777 * Get PCI information from struct ibv_device. 778 * 779 * @param device 780 * Pointer to Ethernet device structure. 781 * @param[out] pci_addr 782 * PCI bus address output buffer. 783 * 784 * @return 785 * 0 on success, -1 on failure and errno is set. 786 */ 787 int 788 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 789 struct rte_pci_addr *pci_addr) 790 { 791 FILE *file; 792 char line[32]; 793 MKSTR(path, "%s/device/uevent", device->ibdev_path); 794 795 file = fopen(path, "rb"); 796 if (file == NULL) 797 return -1; 798 while (fgets(line, sizeof(line), file) == line) { 799 size_t len = strlen(line); 800 int ret; 801 802 /* Truncate long lines. */ 803 if (len == (sizeof(line) - 1)) 804 while (line[(len - 1)] != '\n') { 805 ret = fgetc(file); 806 if (ret == EOF) 807 break; 808 line[(len - 1)] = ret; 809 } 810 /* Extract information. */ 811 if (sscanf(line, 812 "PCI_SLOT_NAME=" 813 "%" SCNx16 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 814 &pci_addr->domain, 815 &pci_addr->bus, 816 &pci_addr->devid, 817 &pci_addr->function) == 4) { 818 ret = 0; 819 break; 820 } 821 } 822 fclose(file); 823 return 0; 824 } 825 826 /** 827 * Link status handler. 828 * 829 * @param priv 830 * Pointer to private structure. 831 * @param dev 832 * Pointer to the rte_eth_dev structure. 833 * 834 * @return 835 * Nonzero if the callback process can be called immediately. 836 */ 837 static int 838 priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) 839 { 840 struct ibv_async_event event; 841 int port_change = 0; 842 int ret = 0; 843 844 /* Read all message and acknowledge them. */ 845 for (;;) { 846 if (ibv_get_async_event(priv->ctx, &event)) 847 break; 848 849 if (event.event_type == IBV_EVENT_PORT_ACTIVE || 850 event.event_type == IBV_EVENT_PORT_ERR) 851 port_change = 1; 852 else 853 DEBUG("event type %d on port %d not handled", 854 event.event_type, event.element.port_num); 855 ibv_ack_async_event(&event); 856 } 857 858 if (port_change ^ priv->pending_alarm) { 859 struct rte_eth_link *link = &dev->data->dev_link; 860 861 priv->pending_alarm = 0; 862 mlx5_link_update_unlocked(dev, 0); 863 if (((link->link_speed == 0) && link->link_status) || 864 ((link->link_speed != 0) && !link->link_status)) { 865 /* Inconsistent status, check again later. */ 866 priv->pending_alarm = 1; 867 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 868 mlx5_dev_link_status_handler, 869 dev); 870 } else 871 ret = 1; 872 } 873 return ret; 874 } 875 876 /** 877 * Handle delayed link status event. 878 * 879 * @param arg 880 * Registered argument. 881 */ 882 void 883 mlx5_dev_link_status_handler(void *arg) 884 { 885 struct rte_eth_dev *dev = arg; 886 struct priv *priv = dev->data->dev_private; 887 int ret; 888 889 priv_lock(priv); 890 assert(priv->pending_alarm == 1); 891 ret = priv_dev_link_status_handler(priv, dev); 892 priv_unlock(priv); 893 if (ret) 894 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); 895 } 896 897 /** 898 * Handle interrupts from the NIC. 899 * 900 * @param[in] intr_handle 901 * Interrupt handler. 902 * @param cb_arg 903 * Callback argument. 904 */ 905 void 906 mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) 907 { 908 struct rte_eth_dev *dev = cb_arg; 909 struct priv *priv = dev->data->dev_private; 910 int ret; 911 912 (void)intr_handle; 913 priv_lock(priv); 914 ret = priv_dev_link_status_handler(priv, dev); 915 priv_unlock(priv); 916 if (ret) 917 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); 918 } 919 920 /** 921 * Uninstall interrupt handler. 922 * 923 * @param priv 924 * Pointer to private structure. 925 * @param dev 926 * Pointer to the rte_eth_dev structure. 927 */ 928 void 929 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 930 { 931 if (!dev->data->dev_conf.intr_conf.lsc) 932 return; 933 rte_intr_callback_unregister(&priv->intr_handle, 934 mlx5_dev_interrupt_handler, 935 dev); 936 if (priv->pending_alarm) 937 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 938 priv->pending_alarm = 0; 939 priv->intr_handle.fd = 0; 940 priv->intr_handle.type = 0; 941 } 942 943 /** 944 * Install interrupt handler. 945 * 946 * @param priv 947 * Pointer to private structure. 948 * @param dev 949 * Pointer to the rte_eth_dev structure. 950 */ 951 void 952 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 953 { 954 int rc, flags; 955 956 if (!dev->data->dev_conf.intr_conf.lsc) 957 return; 958 assert(priv->ctx->async_fd > 0); 959 flags = fcntl(priv->ctx->async_fd, F_GETFL); 960 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 961 if (rc < 0) { 962 INFO("failed to change file descriptor async event queue"); 963 dev->data->dev_conf.intr_conf.lsc = 0; 964 } else { 965 priv->intr_handle.fd = priv->ctx->async_fd; 966 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 967 rte_intr_callback_register(&priv->intr_handle, 968 mlx5_dev_interrupt_handler, 969 dev); 970 } 971 } 972