1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox. 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <unistd.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <stdlib.h> 15 #include <errno.h> 16 #include <dirent.h> 17 #include <net/if.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <sys/utsname.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <linux/version.h> 25 #include <fcntl.h> 26 #include <stdalign.h> 27 #include <sys/un.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_alarm.h> 36 #include <rte_malloc.h> 37 38 #include "mlx5.h" 39 #include "mlx5_glue.h" 40 #include "mlx5_rxtx.h" 41 #include "mlx5_utils.h" 42 43 /* Add defines in case the running kernel is not the same as user headers. */ 44 #ifndef ETHTOOL_GLINKSETTINGS 45 struct ethtool_link_settings { 46 uint32_t cmd; 47 uint32_t speed; 48 uint8_t duplex; 49 uint8_t port; 50 uint8_t phy_address; 51 uint8_t autoneg; 52 uint8_t mdio_support; 53 uint8_t eth_to_mdix; 54 uint8_t eth_tp_mdix_ctrl; 55 int8_t link_mode_masks_nwords; 56 uint32_t reserved[8]; 57 uint32_t link_mode_masks[]; 58 }; 59 60 #define ETHTOOL_GLINKSETTINGS 0x0000004c 61 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 62 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 63 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 64 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 65 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 66 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 67 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 68 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 69 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 70 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 71 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 72 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 73 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 74 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 75 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 76 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 77 #endif 78 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 79 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 80 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 81 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 82 #endif 83 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 84 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 85 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 86 #endif 87 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 88 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 89 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 90 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 91 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 92 #endif 93 94 /** 95 * Get interface name from private structure. 96 * 97 * @param[in] priv 98 * Pointer to private structure. 99 * @param[out] ifname 100 * Interface name output buffer. 101 * 102 * @return 103 * 0 on success, -1 on failure and errno is set. 104 */ 105 int 106 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 107 { 108 DIR *dir; 109 struct dirent *dent; 110 unsigned int dev_type = 0; 111 unsigned int dev_port_prev = ~0u; 112 char match[IF_NAMESIZE] = ""; 113 114 { 115 MKSTR(path, "%s/device/net", priv->ibdev_path); 116 117 dir = opendir(path); 118 if (dir == NULL) 119 return -1; 120 } 121 while ((dent = readdir(dir)) != NULL) { 122 char *name = dent->d_name; 123 FILE *file; 124 unsigned int dev_port; 125 int r; 126 127 if ((name[0] == '.') && 128 ((name[1] == '\0') || 129 ((name[1] == '.') && (name[2] == '\0')))) 130 continue; 131 132 MKSTR(path, "%s/device/net/%s/%s", 133 priv->ibdev_path, name, 134 (dev_type ? "dev_id" : "dev_port")); 135 136 file = fopen(path, "rb"); 137 if (file == NULL) { 138 if (errno != ENOENT) 139 continue; 140 /* 141 * Switch to dev_id when dev_port does not exist as 142 * is the case with Linux kernel versions < 3.15. 143 */ 144 try_dev_id: 145 match[0] = '\0'; 146 if (dev_type) 147 break; 148 dev_type = 1; 149 dev_port_prev = ~0u; 150 rewinddir(dir); 151 continue; 152 } 153 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 154 fclose(file); 155 if (r != 1) 156 continue; 157 /* 158 * Switch to dev_id when dev_port returns the same value for 159 * all ports. May happen when using a MOFED release older than 160 * 3.0 with a Linux kernel >= 3.15. 161 */ 162 if (dev_port == dev_port_prev) 163 goto try_dev_id; 164 dev_port_prev = dev_port; 165 if (dev_port == (priv->port - 1u)) 166 snprintf(match, sizeof(match), "%s", name); 167 } 168 closedir(dir); 169 if (match[0] == '\0') 170 return -1; 171 strncpy(*ifname, match, sizeof(*ifname)); 172 return 0; 173 } 174 175 /** 176 * Check if the counter is located on ib counters file. 177 * 178 * @param[in] cntr 179 * Counter name. 180 * 181 * @return 182 * 1 if counter is located on ib counters file , 0 otherwise. 183 */ 184 int 185 priv_is_ib_cntr(const char *cntr) 186 { 187 if (!strcmp(cntr, "out_of_buffer")) 188 return 1; 189 return 0; 190 } 191 192 /** 193 * Read from sysfs entry. 194 * 195 * @param[in] priv 196 * Pointer to private structure. 197 * @param[in] entry 198 * Entry name relative to sysfs path. 199 * @param[out] buf 200 * Data output buffer. 201 * @param size 202 * Buffer size. 203 * 204 * @return 205 * 0 on success, -1 on failure and errno is set. 206 */ 207 static int 208 priv_sysfs_read(const struct priv *priv, const char *entry, 209 char *buf, size_t size) 210 { 211 char ifname[IF_NAMESIZE]; 212 FILE *file; 213 int ret; 214 int err; 215 216 if (priv_get_ifname(priv, &ifname)) 217 return -1; 218 219 if (priv_is_ib_cntr(entry)) { 220 MKSTR(path, "%s/ports/1/hw_counters/%s", 221 priv->ibdev_path, entry); 222 file = fopen(path, "rb"); 223 } else { 224 MKSTR(path, "%s/device/net/%s/%s", 225 priv->ibdev_path, ifname, entry); 226 file = fopen(path, "rb"); 227 } 228 if (file == NULL) 229 return -1; 230 ret = fread(buf, 1, size, file); 231 err = errno; 232 if (((size_t)ret < size) && (ferror(file))) 233 ret = -1; 234 else 235 ret = size; 236 fclose(file); 237 errno = err; 238 return ret; 239 } 240 241 /** 242 * Write to sysfs entry. 243 * 244 * @param[in] priv 245 * Pointer to private structure. 246 * @param[in] entry 247 * Entry name relative to sysfs path. 248 * @param[in] buf 249 * Data buffer. 250 * @param size 251 * Buffer size. 252 * 253 * @return 254 * 0 on success, -1 on failure and errno is set. 255 */ 256 static int 257 priv_sysfs_write(const struct priv *priv, const char *entry, 258 char *buf, size_t size) 259 { 260 char ifname[IF_NAMESIZE]; 261 FILE *file; 262 int ret; 263 int err; 264 265 if (priv_get_ifname(priv, &ifname)) 266 return -1; 267 268 MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry); 269 270 file = fopen(path, "wb"); 271 if (file == NULL) 272 return -1; 273 ret = fwrite(buf, 1, size, file); 274 err = errno; 275 if (((size_t)ret < size) || (ferror(file))) 276 ret = -1; 277 else 278 ret = size; 279 fclose(file); 280 errno = err; 281 return ret; 282 } 283 284 /** 285 * Get unsigned long sysfs property. 286 * 287 * @param priv 288 * Pointer to private structure. 289 * @param[in] name 290 * Entry name relative to sysfs path. 291 * @param[out] value 292 * Value output buffer. 293 * 294 * @return 295 * 0 on success, -1 on failure and errno is set. 296 */ 297 static int 298 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 299 { 300 int ret; 301 unsigned long value_ret; 302 char value_str[32]; 303 304 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 305 if (ret == -1) { 306 DEBUG("cannot read %s value from sysfs: %s", 307 name, strerror(errno)); 308 return -1; 309 } 310 value_str[ret] = '\0'; 311 errno = 0; 312 value_ret = strtoul(value_str, NULL, 0); 313 if (errno) { 314 DEBUG("invalid %s value `%s': %s", name, value_str, 315 strerror(errno)); 316 return -1; 317 } 318 *value = value_ret; 319 return 0; 320 } 321 322 /** 323 * Set unsigned long sysfs property. 324 * 325 * @param priv 326 * Pointer to private structure. 327 * @param[in] name 328 * Entry name relative to sysfs path. 329 * @param value 330 * Value to set. 331 * 332 * @return 333 * 0 on success, -1 on failure and errno is set. 334 */ 335 static int 336 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 337 { 338 int ret; 339 MKSTR(value_str, "%lu", value); 340 341 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 342 if (ret == -1) { 343 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 344 name, value_str, value, strerror(errno)); 345 return -1; 346 } 347 return 0; 348 } 349 350 /** 351 * Perform ifreq ioctl() on associated Ethernet device. 352 * 353 * @param[in] priv 354 * Pointer to private structure. 355 * @param req 356 * Request number to pass to ioctl(). 357 * @param[out] ifr 358 * Interface request structure output buffer. 359 * 360 * @return 361 * 0 on success, -1 on failure and errno is set. 362 */ 363 int 364 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 365 { 366 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 367 int ret = -1; 368 369 if (sock == -1) 370 return ret; 371 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 372 ret = ioctl(sock, req, ifr); 373 close(sock); 374 return ret; 375 } 376 377 /** 378 * Return the number of active VFs for the current device. 379 * 380 * @param[in] priv 381 * Pointer to private structure. 382 * @param[out] num_vfs 383 * Number of active VFs. 384 * 385 * @return 386 * 0 on success, -1 on failure and errno is set. 387 */ 388 int 389 priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs) 390 { 391 /* The sysfs entry name depends on the operating system. */ 392 const char **name = (const char *[]){ 393 "device/sriov_numvfs", 394 "device/mlx5_num_vfs", 395 NULL, 396 }; 397 int ret; 398 399 do { 400 unsigned long ulong_num_vfs; 401 402 ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs); 403 if (!ret) 404 *num_vfs = ulong_num_vfs; 405 } while (*(++name) && ret); 406 return ret; 407 } 408 409 /** 410 * Get device MTU. 411 * 412 * @param priv 413 * Pointer to private structure. 414 * @param[out] mtu 415 * MTU value output buffer. 416 * 417 * @return 418 * 0 on success, -1 on failure and errno is set. 419 */ 420 int 421 priv_get_mtu(struct priv *priv, uint16_t *mtu) 422 { 423 unsigned long ulong_mtu; 424 425 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 426 return -1; 427 *mtu = ulong_mtu; 428 return 0; 429 } 430 431 /** 432 * Read device counter from sysfs. 433 * 434 * @param priv 435 * Pointer to private structure. 436 * @param name 437 * Counter name. 438 * @param[out] cntr 439 * Counter output buffer. 440 * 441 * @return 442 * 0 on success, -1 on failure and errno is set. 443 */ 444 int 445 priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) 446 { 447 unsigned long ulong_ctr; 448 449 if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1) 450 return -1; 451 *cntr = ulong_ctr; 452 return 0; 453 } 454 455 /** 456 * Set device MTU. 457 * 458 * @param priv 459 * Pointer to private structure. 460 * @param mtu 461 * MTU value to set. 462 * 463 * @return 464 * 0 on success, -1 on failure and errno is set. 465 */ 466 static int 467 priv_set_mtu(struct priv *priv, uint16_t mtu) 468 { 469 uint16_t new_mtu; 470 471 if (priv_set_sysfs_ulong(priv, "mtu", mtu) || 472 priv_get_mtu(priv, &new_mtu)) 473 return -1; 474 if (new_mtu == mtu) 475 return 0; 476 errno = EINVAL; 477 return -1; 478 } 479 480 /** 481 * Set device flags. 482 * 483 * @param priv 484 * Pointer to private structure. 485 * @param keep 486 * Bitmask for flags that must remain untouched. 487 * @param flags 488 * Bitmask for flags to modify. 489 * 490 * @return 491 * 0 on success, -1 on failure and errno is set. 492 */ 493 int 494 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 495 { 496 unsigned long tmp; 497 498 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 499 return -1; 500 tmp &= keep; 501 tmp |= (flags & (~keep)); 502 return priv_set_sysfs_ulong(priv, "flags", tmp); 503 } 504 505 /** 506 * Ethernet device configuration. 507 * 508 * Prepare the driver for a given number of TX and RX queues. 509 * 510 * @param dev 511 * Pointer to Ethernet device structure. 512 * 513 * @return 514 * 0 on success, errno value on failure. 515 */ 516 static int 517 dev_configure(struct rte_eth_dev *dev) 518 { 519 struct priv *priv = dev->data->dev_private; 520 unsigned int rxqs_n = dev->data->nb_rx_queues; 521 unsigned int txqs_n = dev->data->nb_tx_queues; 522 unsigned int i; 523 unsigned int j; 524 unsigned int reta_idx_n; 525 const uint8_t use_app_rss_key = 526 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 527 uint64_t supp_tx_offloads = mlx5_priv_get_tx_port_offloads(priv); 528 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 529 uint64_t supp_rx_offloads = 530 (mlx5_priv_get_rx_port_offloads(priv) | 531 mlx5_priv_get_rx_queue_offloads(priv)); 532 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; 533 534 if ((tx_offloads & supp_tx_offloads) != tx_offloads) { 535 ERROR("Some Tx offloads are not supported " 536 "requested 0x%" PRIx64 " supported 0x%" PRIx64, 537 tx_offloads, supp_tx_offloads); 538 return ENOTSUP; 539 } 540 if ((rx_offloads & supp_rx_offloads) != rx_offloads) { 541 ERROR("Some Rx offloads are not supported " 542 "requested 0x%" PRIx64 " supported 0x%" PRIx64, 543 rx_offloads, supp_rx_offloads); 544 return ENOTSUP; 545 } 546 if (use_app_rss_key && 547 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 548 rss_hash_default_key_len)) { 549 /* MLX5 RSS only support 40bytes key. */ 550 return EINVAL; 551 } 552 priv->rss_conf.rss_key = 553 rte_realloc(priv->rss_conf.rss_key, 554 rss_hash_default_key_len, 0); 555 if (!priv->rss_conf.rss_key) { 556 ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n); 557 return ENOMEM; 558 } 559 memcpy(priv->rss_conf.rss_key, 560 use_app_rss_key ? 561 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 562 rss_hash_default_key, 563 rss_hash_default_key_len); 564 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 565 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 566 priv->rxqs = (void *)dev->data->rx_queues; 567 priv->txqs = (void *)dev->data->tx_queues; 568 if (txqs_n != priv->txqs_n) { 569 INFO("%p: TX queues number update: %u -> %u", 570 (void *)dev, priv->txqs_n, txqs_n); 571 priv->txqs_n = txqs_n; 572 } 573 if (rxqs_n > priv->config.ind_table_max_size) { 574 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 575 return EINVAL; 576 } 577 if (rxqs_n == priv->rxqs_n) 578 return 0; 579 INFO("%p: RX queues number update: %u -> %u", 580 (void *)dev, priv->rxqs_n, rxqs_n); 581 priv->rxqs_n = rxqs_n; 582 /* If the requested number of RX queues is not a power of two, use the 583 * maximum indirection table size for better balancing. 584 * The result is always rounded to the next power of two. */ 585 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 586 priv->config.ind_table_max_size : 587 rxqs_n)); 588 if (priv_rss_reta_index_resize(priv, reta_idx_n)) 589 return ENOMEM; 590 /* When the number of RX queues is not a power of two, the remaining 591 * table entries are padded with reused WQs and hashes are not spread 592 * uniformly. */ 593 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 594 (*priv->reta_idx)[i] = j; 595 if (++j == rxqs_n) 596 j = 0; 597 } 598 return 0; 599 } 600 601 /** 602 * DPDK callback for Ethernet device configuration. 603 * 604 * @param dev 605 * Pointer to Ethernet device structure. 606 * 607 * @return 608 * 0 on success, negative errno value on failure. 609 */ 610 int 611 mlx5_dev_configure(struct rte_eth_dev *dev) 612 { 613 struct priv *priv = dev->data->dev_private; 614 int ret; 615 616 priv_lock(priv); 617 ret = dev_configure(dev); 618 assert(ret >= 0); 619 priv_unlock(priv); 620 return -ret; 621 } 622 623 /** 624 * DPDK callback to get information about the device. 625 * 626 * @param dev 627 * Pointer to Ethernet device structure. 628 * @param[out] info 629 * Info structure output buffer. 630 */ 631 void 632 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 633 { 634 struct priv *priv = dev->data->dev_private; 635 struct mlx5_dev_config *config = &priv->config; 636 unsigned int max; 637 char ifname[IF_NAMESIZE]; 638 639 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 640 641 priv_lock(priv); 642 /* FIXME: we should ask the device for these values. */ 643 info->min_rx_bufsize = 32; 644 info->max_rx_pktlen = 65536; 645 /* 646 * Since we need one CQ per QP, the limit is the minimum number 647 * between the two values. 648 */ 649 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 650 priv->device_attr.orig_attr.max_qp); 651 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 652 if (max >= 65535) 653 max = 65535; 654 info->max_rx_queues = max; 655 info->max_tx_queues = max; 656 info->max_mac_addrs = RTE_DIM(priv->mac); 657 info->rx_queue_offload_capa = 658 mlx5_priv_get_rx_queue_offloads(priv); 659 info->rx_offload_capa = (mlx5_priv_get_rx_port_offloads(priv) | 660 info->rx_queue_offload_capa); 661 info->tx_offload_capa = mlx5_priv_get_tx_port_offloads(priv); 662 if (priv_get_ifname(priv, &ifname) == 0) 663 info->if_index = if_nametoindex(ifname); 664 info->reta_size = priv->reta_idx_n ? 665 priv->reta_idx_n : config->ind_table_max_size; 666 info->hash_key_size = priv->rss_conf.rss_key_len; 667 info->speed_capa = priv->link_speed_capa; 668 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 669 priv_unlock(priv); 670 } 671 672 const uint32_t * 673 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 674 { 675 static const uint32_t ptypes[] = { 676 /* refers to rxq_cq_to_pkt_type() */ 677 RTE_PTYPE_L2_ETHER, 678 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 679 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 680 RTE_PTYPE_L4_NONFRAG, 681 RTE_PTYPE_L4_FRAG, 682 RTE_PTYPE_L4_TCP, 683 RTE_PTYPE_L4_UDP, 684 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 685 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 686 RTE_PTYPE_INNER_L4_NONFRAG, 687 RTE_PTYPE_INNER_L4_FRAG, 688 RTE_PTYPE_INNER_L4_TCP, 689 RTE_PTYPE_INNER_L4_UDP, 690 RTE_PTYPE_UNKNOWN 691 }; 692 693 if (dev->rx_pkt_burst == mlx5_rx_burst || 694 dev->rx_pkt_burst == mlx5_rx_burst_vec) 695 return ptypes; 696 return NULL; 697 } 698 699 /** 700 * DPDK callback to retrieve physical link information. 701 * 702 * @param dev 703 * Pointer to Ethernet device structure. 704 * @param wait_to_complete 705 * Wait for request completion (ignored). 706 */ 707 static int 708 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) 709 { 710 struct priv *priv = dev->data->dev_private; 711 struct ethtool_cmd edata = { 712 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 713 }; 714 struct ifreq ifr; 715 struct rte_eth_link dev_link; 716 int link_speed = 0; 717 718 /* priv_lock() is not taken to allow concurrent calls. */ 719 720 (void)wait_to_complete; 721 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 722 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 723 return -1; 724 } 725 memset(&dev_link, 0, sizeof(dev_link)); 726 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 727 (ifr.ifr_flags & IFF_RUNNING)); 728 ifr.ifr_data = (void *)&edata; 729 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 730 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 731 strerror(errno)); 732 return -1; 733 } 734 link_speed = ethtool_cmd_speed(&edata); 735 if (link_speed == -1) 736 dev_link.link_speed = 0; 737 else 738 dev_link.link_speed = link_speed; 739 priv->link_speed_capa = 0; 740 if (edata.supported & SUPPORTED_Autoneg) 741 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 742 if (edata.supported & (SUPPORTED_1000baseT_Full | 743 SUPPORTED_1000baseKX_Full)) 744 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 745 if (edata.supported & SUPPORTED_10000baseKR_Full) 746 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 747 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 748 SUPPORTED_40000baseCR4_Full | 749 SUPPORTED_40000baseSR4_Full | 750 SUPPORTED_40000baseLR4_Full)) 751 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 752 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 753 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 754 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 755 ETH_LINK_SPEED_FIXED); 756 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 757 /* Link status changed. */ 758 dev->data->dev_link = dev_link; 759 return 0; 760 } 761 /* Link status is still the same. */ 762 return -1; 763 } 764 765 /** 766 * Retrieve physical link information (unlocked version using new ioctl). 767 * 768 * @param dev 769 * Pointer to Ethernet device structure. 770 * @param wait_to_complete 771 * Wait for request completion (ignored). 772 */ 773 static int 774 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) 775 { 776 struct priv *priv = dev->data->dev_private; 777 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 778 struct ifreq ifr; 779 struct rte_eth_link dev_link; 780 uint64_t sc; 781 782 (void)wait_to_complete; 783 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 784 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 785 return -1; 786 } 787 memset(&dev_link, 0, sizeof(dev_link)); 788 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 789 (ifr.ifr_flags & IFF_RUNNING)); 790 ifr.ifr_data = (void *)&gcmd; 791 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 792 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 793 strerror(errno)); 794 return -1; 795 } 796 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 797 798 alignas(struct ethtool_link_settings) 799 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 800 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 801 struct ethtool_link_settings *ecmd = (void *)data; 802 803 *ecmd = gcmd; 804 ifr.ifr_data = (void *)ecmd; 805 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 806 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 807 strerror(errno)); 808 return -1; 809 } 810 dev_link.link_speed = ecmd->speed; 811 sc = ecmd->link_mode_masks[0] | 812 ((uint64_t)ecmd->link_mode_masks[1] << 32); 813 priv->link_speed_capa = 0; 814 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 815 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 816 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 817 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 818 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 819 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 820 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 821 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 822 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 823 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 824 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 825 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 826 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 827 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 828 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 829 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 830 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 831 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 832 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 833 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 834 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 835 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 836 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 837 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 838 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 839 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 840 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 841 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 842 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 843 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 844 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 845 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 846 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 847 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 848 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 849 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 850 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 851 ETH_LINK_SPEED_FIXED); 852 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 853 /* Link status changed. */ 854 dev->data->dev_link = dev_link; 855 return 0; 856 } 857 /* Link status is still the same. */ 858 return -1; 859 } 860 861 /** 862 * Enable receiving and transmitting traffic. 863 * 864 * @param priv 865 * Pointer to private structure. 866 */ 867 static void 868 priv_link_start(struct priv *priv) 869 { 870 struct rte_eth_dev *dev = priv->dev; 871 int err; 872 873 dev->tx_pkt_burst = priv_select_tx_function(priv, dev); 874 dev->rx_pkt_burst = priv_select_rx_function(priv, dev); 875 err = priv_dev_traffic_enable(priv, dev); 876 if (err) 877 ERROR("%p: error occurred while configuring control flows: %s", 878 (void *)priv, strerror(err)); 879 err = priv_flow_start(priv, &priv->flows); 880 if (err) 881 ERROR("%p: error occurred while configuring flows: %s", 882 (void *)priv, strerror(err)); 883 } 884 885 /** 886 * Disable receiving and transmitting traffic. 887 * 888 * @param priv 889 * Pointer to private structure. 890 */ 891 static void 892 priv_link_stop(struct priv *priv) 893 { 894 struct rte_eth_dev *dev = priv->dev; 895 896 priv_flow_stop(priv, &priv->flows); 897 priv_dev_traffic_disable(priv, dev); 898 dev->rx_pkt_burst = removed_rx_burst; 899 dev->tx_pkt_burst = removed_tx_burst; 900 } 901 902 /** 903 * Retrieve physical link information and update rx/tx_pkt_burst callbacks 904 * accordingly. 905 * 906 * @param priv 907 * Pointer to private structure. 908 * @param wait_to_complete 909 * Wait for request completion (ignored). 910 */ 911 int 912 priv_link_update(struct priv *priv, int wait_to_complete) 913 { 914 struct rte_eth_dev *dev = priv->dev; 915 struct utsname utsname; 916 int ver[3]; 917 int ret; 918 struct rte_eth_link dev_link = dev->data->dev_link; 919 920 if (uname(&utsname) == -1 || 921 sscanf(utsname.release, "%d.%d.%d", 922 &ver[0], &ver[1], &ver[2]) != 3 || 923 KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) 924 ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete); 925 else 926 ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete); 927 /* If lsc interrupt is disabled, should always be ready for traffic. */ 928 if (!dev->data->dev_conf.intr_conf.lsc) { 929 priv_link_start(priv); 930 return ret; 931 } 932 /* Re-select burst callbacks only if link status has been changed. */ 933 if (!ret && dev_link.link_status != dev->data->dev_link.link_status) { 934 if (dev->data->dev_link.link_status == ETH_LINK_UP) 935 priv_link_start(priv); 936 else 937 priv_link_stop(priv); 938 } 939 return ret; 940 } 941 942 /** 943 * Querying the link status till it changes to the desired state. 944 * Number of query attempts is bounded by MLX5_MAX_LINK_QUERY_ATTEMPTS. 945 * 946 * @param priv 947 * Pointer to private structure. 948 * @param status 949 * Link desired status. 950 * 951 * @return 952 * 0 on success, negative errno value on failure. 953 */ 954 int 955 priv_force_link_status_change(struct priv *priv, int status) 956 { 957 int try = 0; 958 959 while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) { 960 priv_link_update(priv, 0); 961 if (priv->dev->data->dev_link.link_status == status) 962 return 0; 963 try++; 964 sleep(1); 965 } 966 return -EAGAIN; 967 } 968 969 /** 970 * DPDK callback to retrieve physical link information. 971 * 972 * @param dev 973 * Pointer to Ethernet device structure. 974 * @param wait_to_complete 975 * Wait for request completion (ignored). 976 */ 977 int 978 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 979 { 980 struct priv *priv = dev->data->dev_private; 981 int ret; 982 983 priv_lock(priv); 984 ret = priv_link_update(priv, wait_to_complete); 985 priv_unlock(priv); 986 return ret; 987 } 988 989 /** 990 * DPDK callback to change the MTU. 991 * 992 * @param dev 993 * Pointer to Ethernet device structure. 994 * @param in_mtu 995 * New MTU. 996 * 997 * @return 998 * 0 on success, negative errno value on failure. 999 */ 1000 int 1001 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 1002 { 1003 struct priv *priv = dev->data->dev_private; 1004 uint16_t kern_mtu; 1005 int ret = 0; 1006 1007 priv_lock(priv); 1008 ret = priv_get_mtu(priv, &kern_mtu); 1009 if (ret) 1010 goto out; 1011 /* Set kernel interface MTU first. */ 1012 ret = priv_set_mtu(priv, mtu); 1013 if (ret) 1014 goto out; 1015 ret = priv_get_mtu(priv, &kern_mtu); 1016 if (ret) 1017 goto out; 1018 if (kern_mtu == mtu) { 1019 priv->mtu = mtu; 1020 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 1021 } 1022 priv_unlock(priv); 1023 return 0; 1024 out: 1025 ret = errno; 1026 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 1027 strerror(ret)); 1028 priv_unlock(priv); 1029 assert(ret >= 0); 1030 return -ret; 1031 } 1032 1033 /** 1034 * DPDK callback to get flow control status. 1035 * 1036 * @param dev 1037 * Pointer to Ethernet device structure. 1038 * @param[out] fc_conf 1039 * Flow control output buffer. 1040 * 1041 * @return 1042 * 0 on success, negative errno value on failure. 1043 */ 1044 int 1045 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1046 { 1047 struct priv *priv = dev->data->dev_private; 1048 struct ifreq ifr; 1049 struct ethtool_pauseparam ethpause = { 1050 .cmd = ETHTOOL_GPAUSEPARAM 1051 }; 1052 int ret; 1053 1054 ifr.ifr_data = (void *)ðpause; 1055 priv_lock(priv); 1056 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 1057 ret = errno; 1058 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 1059 " failed: %s", 1060 strerror(ret)); 1061 goto out; 1062 } 1063 1064 fc_conf->autoneg = ethpause.autoneg; 1065 if (ethpause.rx_pause && ethpause.tx_pause) 1066 fc_conf->mode = RTE_FC_FULL; 1067 else if (ethpause.rx_pause) 1068 fc_conf->mode = RTE_FC_RX_PAUSE; 1069 else if (ethpause.tx_pause) 1070 fc_conf->mode = RTE_FC_TX_PAUSE; 1071 else 1072 fc_conf->mode = RTE_FC_NONE; 1073 ret = 0; 1074 1075 out: 1076 priv_unlock(priv); 1077 assert(ret >= 0); 1078 return -ret; 1079 } 1080 1081 /** 1082 * DPDK callback to modify flow control parameters. 1083 * 1084 * @param dev 1085 * Pointer to Ethernet device structure. 1086 * @param[in] fc_conf 1087 * Flow control parameters. 1088 * 1089 * @return 1090 * 0 on success, negative errno value on failure. 1091 */ 1092 int 1093 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1094 { 1095 struct priv *priv = dev->data->dev_private; 1096 struct ifreq ifr; 1097 struct ethtool_pauseparam ethpause = { 1098 .cmd = ETHTOOL_SPAUSEPARAM 1099 }; 1100 int ret; 1101 1102 ifr.ifr_data = (void *)ðpause; 1103 ethpause.autoneg = fc_conf->autoneg; 1104 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1105 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1106 ethpause.rx_pause = 1; 1107 else 1108 ethpause.rx_pause = 0; 1109 1110 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1111 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1112 ethpause.tx_pause = 1; 1113 else 1114 ethpause.tx_pause = 0; 1115 1116 priv_lock(priv); 1117 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 1118 ret = errno; 1119 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1120 " failed: %s", 1121 strerror(ret)); 1122 goto out; 1123 } 1124 ret = 0; 1125 1126 out: 1127 priv_unlock(priv); 1128 assert(ret >= 0); 1129 return -ret; 1130 } 1131 1132 /** 1133 * Get PCI information from struct ibv_device. 1134 * 1135 * @param device 1136 * Pointer to Ethernet device structure. 1137 * @param[out] pci_addr 1138 * PCI bus address output buffer. 1139 * 1140 * @return 1141 * 0 on success, -1 on failure and errno is set. 1142 */ 1143 int 1144 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 1145 struct rte_pci_addr *pci_addr) 1146 { 1147 FILE *file; 1148 char line[32]; 1149 MKSTR(path, "%s/device/uevent", device->ibdev_path); 1150 1151 file = fopen(path, "rb"); 1152 if (file == NULL) 1153 return -1; 1154 while (fgets(line, sizeof(line), file) == line) { 1155 size_t len = strlen(line); 1156 int ret; 1157 1158 /* Truncate long lines. */ 1159 if (len == (sizeof(line) - 1)) 1160 while (line[(len - 1)] != '\n') { 1161 ret = fgetc(file); 1162 if (ret == EOF) 1163 break; 1164 line[(len - 1)] = ret; 1165 } 1166 /* Extract information. */ 1167 if (sscanf(line, 1168 "PCI_SLOT_NAME=" 1169 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1170 &pci_addr->domain, 1171 &pci_addr->bus, 1172 &pci_addr->devid, 1173 &pci_addr->function) == 4) { 1174 ret = 0; 1175 break; 1176 } 1177 } 1178 fclose(file); 1179 return 0; 1180 } 1181 1182 /** 1183 * Update the link status. 1184 * 1185 * @param priv 1186 * Pointer to private structure. 1187 * 1188 * @return 1189 * Zero if the callback process can be called immediately. 1190 */ 1191 static int 1192 priv_link_status_update(struct priv *priv) 1193 { 1194 struct rte_eth_link *link = &priv->dev->data->dev_link; 1195 1196 priv_link_update(priv, 0); 1197 if (((link->link_speed == 0) && link->link_status) || 1198 ((link->link_speed != 0) && !link->link_status)) { 1199 /* 1200 * Inconsistent status. Event likely occurred before the 1201 * kernel netdevice exposes the new status. 1202 */ 1203 if (!priv->pending_alarm) { 1204 priv->pending_alarm = 1; 1205 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 1206 mlx5_dev_link_status_handler, 1207 priv->dev); 1208 } 1209 return 1; 1210 } else if (unlikely(priv->pending_alarm)) { 1211 /* Link interrupt occurred while alarm is already scheduled. */ 1212 priv->pending_alarm = 0; 1213 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev); 1214 } 1215 return 0; 1216 } 1217 1218 /** 1219 * Device status handler. 1220 * 1221 * @param priv 1222 * Pointer to private structure. 1223 * @param events 1224 * Pointer to event flags holder. 1225 * 1226 * @return 1227 * Events bitmap of callback process which can be called immediately. 1228 */ 1229 static uint32_t 1230 priv_dev_status_handler(struct priv *priv) 1231 { 1232 struct ibv_async_event event; 1233 uint32_t ret = 0; 1234 1235 /* Read all message and acknowledge them. */ 1236 for (;;) { 1237 if (mlx5_glue->get_async_event(priv->ctx, &event)) 1238 break; 1239 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1240 event.event_type == IBV_EVENT_PORT_ERR) && 1241 (priv->dev->data->dev_conf.intr_conf.lsc == 1)) 1242 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 1243 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 1244 priv->dev->data->dev_conf.intr_conf.rmv == 1) 1245 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 1246 else 1247 DEBUG("event type %d on port %d not handled", 1248 event.event_type, event.element.port_num); 1249 mlx5_glue->ack_async_event(&event); 1250 } 1251 if (ret & (1 << RTE_ETH_EVENT_INTR_LSC)) 1252 if (priv_link_status_update(priv)) 1253 ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC); 1254 return ret; 1255 } 1256 1257 /** 1258 * Handle delayed link status event. 1259 * 1260 * @param arg 1261 * Registered argument. 1262 */ 1263 void 1264 mlx5_dev_link_status_handler(void *arg) 1265 { 1266 struct rte_eth_dev *dev = arg; 1267 struct priv *priv = dev->data->dev_private; 1268 int ret; 1269 1270 while (!priv_trylock(priv)) { 1271 /* Alarm is being canceled. */ 1272 if (priv->pending_alarm == 0) 1273 return; 1274 rte_pause(); 1275 } 1276 priv->pending_alarm = 0; 1277 ret = priv_link_status_update(priv); 1278 priv_unlock(priv); 1279 if (!ret) 1280 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1281 } 1282 1283 /** 1284 * Handle interrupts from the NIC. 1285 * 1286 * @param[in] intr_handle 1287 * Interrupt handler. 1288 * @param cb_arg 1289 * Callback argument. 1290 */ 1291 void 1292 mlx5_dev_interrupt_handler(void *cb_arg) 1293 { 1294 struct rte_eth_dev *dev = cb_arg; 1295 struct priv *priv = dev->data->dev_private; 1296 uint32_t events; 1297 1298 priv_lock(priv); 1299 events = priv_dev_status_handler(priv); 1300 priv_unlock(priv); 1301 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 1302 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1303 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 1304 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1305 } 1306 1307 /** 1308 * Handle interrupts from the socket. 1309 * 1310 * @param cb_arg 1311 * Callback argument. 1312 */ 1313 static void 1314 mlx5_dev_handler_socket(void *cb_arg) 1315 { 1316 struct rte_eth_dev *dev = cb_arg; 1317 struct priv *priv = dev->data->dev_private; 1318 1319 priv_lock(priv); 1320 priv_socket_handle(priv); 1321 priv_unlock(priv); 1322 } 1323 1324 /** 1325 * Uninstall interrupt handler. 1326 * 1327 * @param priv 1328 * Pointer to private structure. 1329 * @param dev 1330 * Pointer to the rte_eth_dev structure. 1331 */ 1332 void 1333 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 1334 { 1335 if (dev->data->dev_conf.intr_conf.lsc || 1336 dev->data->dev_conf.intr_conf.rmv) 1337 rte_intr_callback_unregister(&priv->intr_handle, 1338 mlx5_dev_interrupt_handler, dev); 1339 if (priv->primary_socket) 1340 rte_intr_callback_unregister(&priv->intr_handle_socket, 1341 mlx5_dev_handler_socket, dev); 1342 if (priv->pending_alarm) { 1343 priv->pending_alarm = 0; 1344 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 1345 } 1346 priv->intr_handle.fd = 0; 1347 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1348 priv->intr_handle_socket.fd = 0; 1349 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1350 } 1351 1352 /** 1353 * Install interrupt handler. 1354 * 1355 * @param priv 1356 * Pointer to private structure. 1357 * @param dev 1358 * Pointer to the rte_eth_dev structure. 1359 */ 1360 void 1361 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 1362 { 1363 int rc, flags; 1364 1365 assert(priv->ctx->async_fd > 0); 1366 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1367 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1368 if (rc < 0) { 1369 INFO("failed to change file descriptor async event queue"); 1370 dev->data->dev_conf.intr_conf.lsc = 0; 1371 dev->data->dev_conf.intr_conf.rmv = 0; 1372 } 1373 if (dev->data->dev_conf.intr_conf.lsc || 1374 dev->data->dev_conf.intr_conf.rmv) { 1375 priv->intr_handle.fd = priv->ctx->async_fd; 1376 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1377 rte_intr_callback_register(&priv->intr_handle, 1378 mlx5_dev_interrupt_handler, dev); 1379 } 1380 1381 rc = priv_socket_init(priv); 1382 if (!rc && priv->primary_socket) { 1383 priv->intr_handle_socket.fd = priv->primary_socket; 1384 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1385 rte_intr_callback_register(&priv->intr_handle_socket, 1386 mlx5_dev_handler_socket, dev); 1387 } 1388 } 1389 1390 /** 1391 * Change the link state (UP / DOWN). 1392 * 1393 * @param priv 1394 * Pointer to private data structure. 1395 * @param up 1396 * Nonzero for link up, otherwise link down. 1397 * 1398 * @return 1399 * 0 on success, errno value on failure. 1400 */ 1401 static int 1402 priv_dev_set_link(struct priv *priv, int up) 1403 { 1404 return priv_set_flags(priv, ~IFF_UP, up ? IFF_UP : ~IFF_UP); 1405 } 1406 1407 /** 1408 * DPDK callback to bring the link DOWN. 1409 * 1410 * @param dev 1411 * Pointer to Ethernet device structure. 1412 * 1413 * @return 1414 * 0 on success, errno value on failure. 1415 */ 1416 int 1417 mlx5_set_link_down(struct rte_eth_dev *dev) 1418 { 1419 struct priv *priv = dev->data->dev_private; 1420 int err; 1421 1422 priv_lock(priv); 1423 err = priv_dev_set_link(priv, 0); 1424 priv_unlock(priv); 1425 return err; 1426 } 1427 1428 /** 1429 * DPDK callback to bring the link UP. 1430 * 1431 * @param dev 1432 * Pointer to Ethernet device structure. 1433 * 1434 * @return 1435 * 0 on success, errno value on failure. 1436 */ 1437 int 1438 mlx5_set_link_up(struct rte_eth_dev *dev) 1439 { 1440 struct priv *priv = dev->data->dev_private; 1441 int err; 1442 1443 priv_lock(priv); 1444 err = priv_dev_set_link(priv, 1); 1445 priv_unlock(priv); 1446 return err; 1447 } 1448 1449 /** 1450 * Configure the TX function to use. 1451 * 1452 * @param priv 1453 * Pointer to private data structure. 1454 * @param dev 1455 * Pointer to rte_eth_dev structure. 1456 * 1457 * @return 1458 * Pointer to selected Tx burst function. 1459 */ 1460 eth_tx_burst_t 1461 priv_select_tx_function(struct priv *priv, struct rte_eth_dev *dev) 1462 { 1463 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1464 struct mlx5_dev_config *config = &priv->config; 1465 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1466 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1467 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1468 DEV_TX_OFFLOAD_GRE_TNL_TSO)); 1469 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1470 1471 assert(priv != NULL); 1472 /* Select appropriate TX function. */ 1473 if (vlan_insert || tso) 1474 return tx_pkt_burst; 1475 if (config->mps == MLX5_MPW_ENHANCED) { 1476 if (priv_check_vec_tx_support(priv, dev) > 0) { 1477 if (priv_check_raw_vec_tx_support(priv, dev) > 0) 1478 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1479 else 1480 tx_pkt_burst = mlx5_tx_burst_vec; 1481 DEBUG("selected Enhanced MPW TX vectorized function"); 1482 } else { 1483 tx_pkt_burst = mlx5_tx_burst_empw; 1484 DEBUG("selected Enhanced MPW TX function"); 1485 } 1486 } else if (config->mps && (config->txq_inline > 0)) { 1487 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1488 DEBUG("selected MPW inline TX function"); 1489 } else if (config->mps) { 1490 tx_pkt_burst = mlx5_tx_burst_mpw; 1491 DEBUG("selected MPW TX function"); 1492 } 1493 return tx_pkt_burst; 1494 } 1495 1496 /** 1497 * Configure the RX function to use. 1498 * 1499 * @param priv 1500 * Pointer to private data structure. 1501 * @param dev 1502 * Pointer to rte_eth_dev structure. 1503 * 1504 * @return 1505 * Pointer to selected Rx burst function. 1506 */ 1507 eth_rx_burst_t 1508 priv_select_rx_function(struct priv *priv, __rte_unused struct rte_eth_dev *dev) 1509 { 1510 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1511 1512 assert(priv != NULL); 1513 if (priv_check_vec_rx_support(priv) > 0) { 1514 rx_pkt_burst = mlx5_rx_burst_vec; 1515 DEBUG("selected RX vectorized function"); 1516 } 1517 return rx_pkt_burst; 1518 } 1519 1520 /** 1521 * Check if mlx5 device was removed. 1522 * 1523 * @param dev 1524 * Pointer to Ethernet device structure. 1525 * 1526 * @return 1527 * 1 when device is removed, otherwise 0. 1528 */ 1529 int 1530 mlx5_is_removed(struct rte_eth_dev *dev) 1531 { 1532 struct ibv_device_attr device_attr; 1533 struct priv *priv = dev->data->dev_private; 1534 1535 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1536 return 1; 1537 return 0; 1538 } 1539