1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <inttypes.h> 9 #include <unistd.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <stdlib.h> 15 #include <errno.h> 16 #include <dirent.h> 17 #include <net/if.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <netinet/in.h> 21 #include <linux/ethtool.h> 22 #include <linux/sockios.h> 23 #include <fcntl.h> 24 #include <stdalign.h> 25 #include <sys/un.h> 26 #include <time.h> 27 28 #include <rte_atomic.h> 29 #include <rte_ethdev_driver.h> 30 #include <rte_bus_pci.h> 31 #include <rte_mbuf.h> 32 #include <rte_common.h> 33 #include <rte_interrupts.h> 34 #include <rte_malloc.h> 35 #include <rte_string_fns.h> 36 #include <rte_rwlock.h> 37 #include <rte_cycles.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_rxtx.h" 42 #include "mlx5_utils.h" 43 44 /* Supported speed values found in /usr/include/linux/ethtool.h */ 45 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 46 #define SUPPORTED_40000baseKR4_Full (1 << 23) 47 #endif 48 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 49 #define SUPPORTED_40000baseCR4_Full (1 << 24) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 52 #define SUPPORTED_40000baseSR4_Full (1 << 25) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 55 #define SUPPORTED_40000baseLR4_Full (1 << 26) 56 #endif 57 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 58 #define SUPPORTED_56000baseKR4_Full (1 << 27) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 61 #define SUPPORTED_56000baseCR4_Full (1 << 28) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 64 #define SUPPORTED_56000baseSR4_Full (1 << 29) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 67 #define SUPPORTED_56000baseLR4_Full (1 << 30) 68 #endif 69 70 /* Add defines in case the running kernel is not the same as user headers. */ 71 #ifndef ETHTOOL_GLINKSETTINGS 72 struct ethtool_link_settings { 73 uint32_t cmd; 74 uint32_t speed; 75 uint8_t duplex; 76 uint8_t port; 77 uint8_t phy_address; 78 uint8_t autoneg; 79 uint8_t mdio_support; 80 uint8_t eth_to_mdix; 81 uint8_t eth_tp_mdix_ctrl; 82 int8_t link_mode_masks_nwords; 83 uint32_t reserved[8]; 84 uint32_t link_mode_masks[]; 85 }; 86 87 #define ETHTOOL_GLINKSETTINGS 0x0000004c 88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 109 #endif 110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 119 #endif 120 121 /** 122 * Get master interface name from private structure. 123 * 124 * @param[in] dev 125 * Pointer to Ethernet device. 126 * @param[out] ifname 127 * Interface name output buffer. 128 * 129 * @return 130 * 0 on success, a negative errno value otherwise and rte_errno is set. 131 */ 132 int 133 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE]) 134 { 135 DIR *dir; 136 struct dirent *dent; 137 unsigned int dev_type = 0; 138 unsigned int dev_port_prev = ~0u; 139 char match[IF_NAMESIZE] = ""; 140 141 assert(ibdev_path); 142 { 143 MKSTR(path, "%s/device/net", ibdev_path); 144 145 dir = opendir(path); 146 if (dir == NULL) { 147 rte_errno = errno; 148 return -rte_errno; 149 } 150 } 151 while ((dent = readdir(dir)) != NULL) { 152 char *name = dent->d_name; 153 FILE *file; 154 unsigned int dev_port; 155 int r; 156 157 if ((name[0] == '.') && 158 ((name[1] == '\0') || 159 ((name[1] == '.') && (name[2] == '\0')))) 160 continue; 161 162 MKSTR(path, "%s/device/net/%s/%s", 163 ibdev_path, name, 164 (dev_type ? "dev_id" : "dev_port")); 165 166 file = fopen(path, "rb"); 167 if (file == NULL) { 168 if (errno != ENOENT) 169 continue; 170 /* 171 * Switch to dev_id when dev_port does not exist as 172 * is the case with Linux kernel versions < 3.15. 173 */ 174 try_dev_id: 175 match[0] = '\0'; 176 if (dev_type) 177 break; 178 dev_type = 1; 179 dev_port_prev = ~0u; 180 rewinddir(dir); 181 continue; 182 } 183 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 184 fclose(file); 185 if (r != 1) 186 continue; 187 /* 188 * Switch to dev_id when dev_port returns the same value for 189 * all ports. May happen when using a MOFED release older than 190 * 3.0 with a Linux kernel >= 3.15. 191 */ 192 if (dev_port == dev_port_prev) 193 goto try_dev_id; 194 dev_port_prev = dev_port; 195 if (dev_port == 0) 196 strlcpy(match, name, sizeof(match)); 197 } 198 closedir(dir); 199 if (match[0] == '\0') { 200 rte_errno = ENOENT; 201 return -rte_errno; 202 } 203 strncpy(*ifname, match, sizeof(*ifname)); 204 return 0; 205 } 206 207 /** 208 * Get interface name from private structure. 209 * 210 * This is a port representor-aware version of mlx5_get_master_ifname(). 211 * 212 * @param[in] dev 213 * Pointer to Ethernet device. 214 * @param[out] ifname 215 * Interface name output buffer. 216 * 217 * @return 218 * 0 on success, a negative errno value otherwise and rte_errno is set. 219 */ 220 int 221 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 222 { 223 struct mlx5_priv *priv = dev->data->dev_private; 224 unsigned int ifindex; 225 226 assert(priv); 227 assert(priv->sh); 228 ifindex = mlx5_ifindex(dev); 229 if (!ifindex) { 230 if (!priv->representor) 231 return mlx5_get_master_ifname(priv->sh->ibdev_path, 232 ifname); 233 rte_errno = ENXIO; 234 return -rte_errno; 235 } 236 if (if_indextoname(ifindex, &(*ifname)[0])) 237 return 0; 238 rte_errno = errno; 239 return -rte_errno; 240 } 241 242 /** 243 * Get interface name for the specified device, uses the extra base 244 * device resources to perform Netlink requests. 245 * 246 * This is a port representor-aware version of mlx5_get_master_ifname(). 247 * 248 * @param[in] base 249 * Pointer to Ethernet device to use Netlink socket from 250 * to perfrom requests. 251 * @param[in] dev 252 * Pointer to Ethernet device. 253 * @param[out] ifname 254 * Interface name output buffer. 255 * 256 * @return 257 * 0 on success, a negative errno value otherwise and rte_errno is set. 258 */ 259 int 260 mlx5_get_ifname_base(const struct rte_eth_dev *base, 261 const struct rte_eth_dev *dev, 262 char (*ifname)[IF_NAMESIZE]) 263 { 264 struct mlx5_priv *priv = dev->data->dev_private; 265 struct mlx5_priv *priv_base = base->data->dev_private; 266 unsigned int ifindex; 267 268 assert(priv); 269 assert(priv->sh); 270 assert(priv_base); 271 ifindex = priv_base->nl_socket_rdma >= 0 ? 272 mlx5_nl_ifindex(priv_base->nl_socket_rdma, 273 priv->sh->ibdev_name, 274 priv->ibv_port) : 0; 275 if (!ifindex) { 276 if (!priv->representor) 277 return mlx5_get_master_ifname(priv->sh->ibdev_path, 278 ifname); 279 rte_errno = ENXIO; 280 return -rte_errno; 281 } 282 if (if_indextoname(ifindex, &(*ifname)[0])) 283 return 0; 284 rte_errno = errno; 285 return -rte_errno; 286 } 287 /** 288 * Get the interface index from device name. 289 * 290 * @param[in] dev 291 * Pointer to Ethernet device. 292 * 293 * @return 294 * Nonzero interface index on success, zero otherwise and rte_errno is set. 295 */ 296 unsigned int 297 mlx5_ifindex(const struct rte_eth_dev *dev) 298 { 299 struct mlx5_priv *priv = dev->data->dev_private; 300 unsigned int ifindex; 301 302 assert(priv); 303 assert(priv->if_index); 304 ifindex = priv->if_index; 305 if (!ifindex) 306 rte_errno = ENXIO; 307 return ifindex; 308 } 309 310 /** 311 * Perform ifreq ioctl() on associated Ethernet device. 312 * 313 * @param[in] dev 314 * Pointer to Ethernet device. 315 * @param req 316 * Request number to pass to ioctl(). 317 * @param[out] ifr 318 * Interface request structure output buffer. 319 * 320 * @return 321 * 0 on success, a negative errno value otherwise and rte_errno is set. 322 */ 323 int 324 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 325 { 326 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 327 int ret = 0; 328 329 if (sock == -1) { 330 rte_errno = errno; 331 return -rte_errno; 332 } 333 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 334 if (ret) 335 goto error; 336 ret = ioctl(sock, req, ifr); 337 if (ret == -1) { 338 rte_errno = errno; 339 goto error; 340 } 341 close(sock); 342 return 0; 343 error: 344 close(sock); 345 return -rte_errno; 346 } 347 348 /** 349 * Perform ifreq ioctl() on specified Ethernet device, 350 * ifindex, name and other attributes are requested 351 * on the base device to avoid specified device Netlink 352 * socket sharing (this is not thread-safe). 353 * 354 * @param[in] base 355 * Pointer to Ethernet device to get dev attributes. 356 * @param[in] dev 357 * Pointer to Ethernet device to perform ioctl. 358 * @param req 359 * Request number to pass to ioctl(). 360 * @param[out] ifr 361 * Interface request structure output buffer. 362 * 363 * @return 364 * 0 on success, a negative errno value otherwise and rte_errno is set. 365 */ 366 int 367 mlx5_ifreq_base(const struct rte_eth_dev *base, 368 const struct rte_eth_dev *dev, 369 int req, struct ifreq *ifr) 370 { 371 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 372 int ret = 0; 373 374 if (sock == -1) { 375 rte_errno = errno; 376 return -rte_errno; 377 } 378 ret = mlx5_get_ifname_base(base, dev, &ifr->ifr_name); 379 if (ret) 380 goto error; 381 ret = ioctl(sock, req, ifr); 382 if (ret == -1) { 383 rte_errno = errno; 384 goto error; 385 } 386 close(sock); 387 return 0; 388 error: 389 close(sock); 390 return -rte_errno; 391 } 392 393 /** 394 * Get device MTU. 395 * 396 * @param dev 397 * Pointer to Ethernet device. 398 * @param[out] mtu 399 * MTU value output buffer. 400 * 401 * @return 402 * 0 on success, a negative errno value otherwise and rte_errno is set. 403 */ 404 int 405 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 406 { 407 struct ifreq request; 408 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 409 410 if (ret) 411 return ret; 412 *mtu = request.ifr_mtu; 413 return 0; 414 } 415 416 /** 417 * Set device MTU. 418 * 419 * @param dev 420 * Pointer to Ethernet device. 421 * @param mtu 422 * MTU value to set. 423 * 424 * @return 425 * 0 on success, a negative errno value otherwise and rte_errno is set. 426 */ 427 static int 428 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 429 { 430 struct ifreq request = { .ifr_mtu = mtu, }; 431 432 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 433 } 434 435 /** 436 * Set device flags. 437 * 438 * @param dev 439 * Pointer to Ethernet device. 440 * @param keep 441 * Bitmask for flags that must remain untouched. 442 * @param flags 443 * Bitmask for flags to modify. 444 * 445 * @return 446 * 0 on success, a negative errno value otherwise and rte_errno is set. 447 */ 448 int 449 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 450 { 451 struct ifreq request; 452 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 453 454 if (ret) 455 return ret; 456 request.ifr_flags &= keep; 457 request.ifr_flags |= flags & ~keep; 458 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 459 } 460 461 /** 462 * DPDK callback for Ethernet device configuration. 463 * 464 * @param dev 465 * Pointer to Ethernet device structure. 466 * 467 * @return 468 * 0 on success, a negative errno value otherwise and rte_errno is set. 469 */ 470 int 471 mlx5_dev_configure(struct rte_eth_dev *dev) 472 { 473 struct mlx5_priv *priv = dev->data->dev_private; 474 unsigned int rxqs_n = dev->data->nb_rx_queues; 475 unsigned int txqs_n = dev->data->nb_tx_queues; 476 unsigned int i; 477 unsigned int j; 478 unsigned int reta_idx_n; 479 const uint8_t use_app_rss_key = 480 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 481 int ret = 0; 482 483 if (use_app_rss_key && 484 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 485 MLX5_RSS_HASH_KEY_LEN)) { 486 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 487 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 488 rte_errno = EINVAL; 489 return -rte_errno; 490 } 491 priv->rss_conf.rss_key = 492 rte_realloc(priv->rss_conf.rss_key, 493 MLX5_RSS_HASH_KEY_LEN, 0); 494 if (!priv->rss_conf.rss_key) { 495 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 496 dev->data->port_id, rxqs_n); 497 rte_errno = ENOMEM; 498 return -rte_errno; 499 } 500 memcpy(priv->rss_conf.rss_key, 501 use_app_rss_key ? 502 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 503 rss_hash_default_key, 504 MLX5_RSS_HASH_KEY_LEN); 505 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 506 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 507 priv->rxqs = (void *)dev->data->rx_queues; 508 priv->txqs = (void *)dev->data->tx_queues; 509 if (txqs_n != priv->txqs_n) { 510 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 511 dev->data->port_id, priv->txqs_n, txqs_n); 512 priv->txqs_n = txqs_n; 513 } 514 if (rxqs_n > priv->config.ind_table_max_size) { 515 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 516 dev->data->port_id, rxqs_n); 517 rte_errno = EINVAL; 518 return -rte_errno; 519 } 520 if (rxqs_n != priv->rxqs_n) { 521 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 522 dev->data->port_id, priv->rxqs_n, rxqs_n); 523 priv->rxqs_n = rxqs_n; 524 /* 525 * If the requested number of RX queues is not a power of two, 526 * use the maximum indirection table size for better balancing. 527 * The result is always rounded to the next power of two. 528 */ 529 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 530 priv->config.ind_table_max_size : 531 rxqs_n)); 532 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 533 if (ret) 534 return ret; 535 /* 536 * When the number of RX queues is not a power of two, 537 * the remaining table entries are padded with reused WQs 538 * and hashes are not spread uniformly. 539 */ 540 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 541 (*priv->reta_idx)[i] = j; 542 if (++j == rxqs_n) 543 j = 0; 544 } 545 } 546 ret = mlx5_proc_priv_init(dev); 547 if (ret) 548 return ret; 549 return 0; 550 } 551 552 /** 553 * Sets default tuning parameters. 554 * 555 * @param dev 556 * Pointer to Ethernet device. 557 * @param[out] info 558 * Info structure output buffer. 559 */ 560 static void 561 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 562 { 563 struct mlx5_priv *priv = dev->data->dev_private; 564 565 /* Minimum CPU utilization. */ 566 info->default_rxportconf.ring_size = 256; 567 info->default_txportconf.ring_size = 256; 568 info->default_rxportconf.burst_size = 64; 569 info->default_txportconf.burst_size = 64; 570 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 571 info->default_rxportconf.nb_queues = 16; 572 info->default_txportconf.nb_queues = 16; 573 if (dev->data->nb_rx_queues > 2 || 574 dev->data->nb_tx_queues > 2) { 575 /* Max Throughput. */ 576 info->default_rxportconf.ring_size = 2048; 577 info->default_txportconf.ring_size = 2048; 578 } 579 } else { 580 info->default_rxportconf.nb_queues = 8; 581 info->default_txportconf.nb_queues = 8; 582 if (dev->data->nb_rx_queues > 2 || 583 dev->data->nb_tx_queues > 2) { 584 /* Max Throughput. */ 585 info->default_rxportconf.ring_size = 4096; 586 info->default_txportconf.ring_size = 4096; 587 } 588 } 589 } 590 591 /** 592 * Sets tx mbuf limiting parameters. 593 * 594 * @param dev 595 * Pointer to Ethernet device. 596 * @param[out] info 597 * Info structure output buffer. 598 */ 599 static void 600 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 601 { 602 struct mlx5_priv *priv = dev->data->dev_private; 603 struct mlx5_dev_config *config = &priv->config; 604 unsigned int inlen; 605 uint16_t nb_max; 606 607 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 608 MLX5_SEND_DEF_INLINE_LEN : 609 (unsigned int)config->txq_inline_max; 610 assert(config->txq_inline_min >= 0); 611 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 612 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 613 MLX5_ESEG_MIN_INLINE_SIZE - 614 MLX5_WQE_CSEG_SIZE - 615 MLX5_WQE_ESEG_SIZE - 616 MLX5_WQE_DSEG_SIZE * 2); 617 nb_max = (MLX5_WQE_SIZE_MAX + 618 MLX5_ESEG_MIN_INLINE_SIZE - 619 MLX5_WQE_CSEG_SIZE - 620 MLX5_WQE_ESEG_SIZE - 621 MLX5_WQE_DSEG_SIZE - 622 inlen) / MLX5_WSEG_SIZE; 623 info->tx_desc_lim.nb_seg_max = nb_max; 624 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 625 } 626 627 /** 628 * DPDK callback to get information about the device. 629 * 630 * @param dev 631 * Pointer to Ethernet device structure. 632 * @param[out] info 633 * Info structure output buffer. 634 */ 635 void 636 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 637 { 638 struct mlx5_priv *priv = dev->data->dev_private; 639 struct mlx5_dev_config *config = &priv->config; 640 unsigned int max; 641 642 /* FIXME: we should ask the device for these values. */ 643 info->min_rx_bufsize = 32; 644 info->max_rx_pktlen = 65536; 645 /* 646 * Since we need one CQ per QP, the limit is the minimum number 647 * between the two values. 648 */ 649 max = RTE_MIN(priv->sh->device_attr.orig_attr.max_cq, 650 priv->sh->device_attr.orig_attr.max_qp); 651 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 652 if (max >= 65535) 653 max = 65535; 654 info->max_rx_queues = max; 655 info->max_tx_queues = max; 656 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 657 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 658 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 659 info->rx_queue_offload_capa); 660 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 661 info->if_index = mlx5_ifindex(dev); 662 info->reta_size = priv->reta_idx_n ? 663 priv->reta_idx_n : config->ind_table_max_size; 664 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 665 info->speed_capa = priv->link_speed_capa; 666 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 667 mlx5_set_default_params(dev, info); 668 mlx5_set_txlimit_params(dev, info); 669 info->switch_info.name = dev->data->name; 670 info->switch_info.domain_id = priv->domain_id; 671 info->switch_info.port_id = priv->representor_id; 672 if (priv->representor) { 673 unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0); 674 uint16_t port_id[i]; 675 676 i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i), i); 677 while (i--) { 678 struct mlx5_priv *opriv = 679 rte_eth_devices[port_id[i]].data->dev_private; 680 681 if (!opriv || 682 opriv->representor || 683 opriv->domain_id != priv->domain_id) 684 continue; 685 /* 686 * Override switch name with that of the master 687 * device. 688 */ 689 info->switch_info.name = opriv->dev_data->name; 690 break; 691 } 692 } 693 } 694 695 /** 696 * Get device current raw clock counter 697 * 698 * @param dev 699 * Pointer to Ethernet device structure. 700 * @param[out] time 701 * Current raw clock counter of the device. 702 * 703 * @return 704 * 0 if the clock has correctly been read 705 * The value of errno in case of error 706 */ 707 int 708 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock) 709 { 710 struct mlx5_priv *priv = dev->data->dev_private; 711 struct ibv_context *ctx = priv->sh->ctx; 712 struct ibv_values_ex values; 713 int err = 0; 714 715 values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK; 716 err = mlx5_glue->query_rt_values_ex(ctx, &values); 717 if (err != 0) { 718 DRV_LOG(WARNING, "Could not query the clock !"); 719 return err; 720 } 721 *clock = values.raw_clock.tv_nsec; 722 return 0; 723 } 724 725 /** 726 * Get firmware version of a device. 727 * 728 * @param dev 729 * Ethernet device port. 730 * @param fw_ver 731 * String output allocated by caller. 732 * @param fw_size 733 * Size of the output string, including terminating null byte. 734 * 735 * @return 736 * 0 on success, or the size of the non truncated string if too big. 737 */ 738 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 739 { 740 struct mlx5_priv *priv = dev->data->dev_private; 741 struct ibv_device_attr *attr = &priv->sh->device_attr.orig_attr; 742 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 743 744 if (fw_size < size) 745 return size; 746 if (fw_ver != NULL) 747 strlcpy(fw_ver, attr->fw_ver, fw_size); 748 return 0; 749 } 750 751 /** 752 * Get supported packet types. 753 * 754 * @param dev 755 * Pointer to Ethernet device structure. 756 * 757 * @return 758 * A pointer to the supported Packet types array. 759 */ 760 const uint32_t * 761 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 762 { 763 static const uint32_t ptypes[] = { 764 /* refers to rxq_cq_to_pkt_type() */ 765 RTE_PTYPE_L2_ETHER, 766 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 767 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 768 RTE_PTYPE_L4_NONFRAG, 769 RTE_PTYPE_L4_FRAG, 770 RTE_PTYPE_L4_TCP, 771 RTE_PTYPE_L4_UDP, 772 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 773 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 774 RTE_PTYPE_INNER_L4_NONFRAG, 775 RTE_PTYPE_INNER_L4_FRAG, 776 RTE_PTYPE_INNER_L4_TCP, 777 RTE_PTYPE_INNER_L4_UDP, 778 RTE_PTYPE_UNKNOWN 779 }; 780 781 if (dev->rx_pkt_burst == mlx5_rx_burst || 782 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 783 dev->rx_pkt_burst == mlx5_rx_burst_vec) 784 return ptypes; 785 return NULL; 786 } 787 788 /** 789 * Retrieve the master device for representor in the same switch domain. 790 * 791 * @param dev 792 * Pointer to representor Ethernet device structure. 793 * 794 * @return 795 * Master device structure on success, NULL otherwise. 796 */ 797 798 static struct rte_eth_dev * 799 mlx5_find_master_dev(struct rte_eth_dev *dev) 800 { 801 struct mlx5_priv *priv; 802 uint16_t port_id; 803 uint16_t domain_id; 804 805 priv = dev->data->dev_private; 806 domain_id = priv->domain_id; 807 assert(priv->representor); 808 RTE_ETH_FOREACH_DEV_OF(port_id, dev->device) { 809 priv = rte_eth_devices[port_id].data->dev_private; 810 if (priv && 811 priv->master && 812 priv->domain_id == domain_id) 813 return &rte_eth_devices[port_id]; 814 } 815 return NULL; 816 } 817 818 /** 819 * DPDK callback to retrieve physical link information. 820 * 821 * @param dev 822 * Pointer to Ethernet device structure. 823 * @param[out] link 824 * Storage for current link status. 825 * 826 * @return 827 * 0 on success, a negative errno value otherwise and rte_errno is set. 828 */ 829 static int 830 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 831 struct rte_eth_link *link) 832 { 833 struct mlx5_priv *priv = dev->data->dev_private; 834 struct ethtool_cmd edata = { 835 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 836 }; 837 struct ifreq ifr; 838 struct rte_eth_link dev_link; 839 int link_speed = 0; 840 int ret; 841 842 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 843 if (ret) { 844 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 845 dev->data->port_id, strerror(rte_errno)); 846 return ret; 847 } 848 dev_link = (struct rte_eth_link) { 849 .link_status = ((ifr.ifr_flags & IFF_UP) && 850 (ifr.ifr_flags & IFF_RUNNING)), 851 }; 852 ifr = (struct ifreq) { 853 .ifr_data = (void *)&edata, 854 }; 855 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 856 if (ret) { 857 if (ret == -ENOTSUP && priv->representor) { 858 struct rte_eth_dev *master; 859 860 /* 861 * For representors we can try to inherit link 862 * settings from the master device. Actually 863 * link settings do not make a lot of sense 864 * for representors due to missing physical 865 * link. The old kernel drivers supported 866 * emulated settings query for representors, 867 * the new ones do not, so we have to add 868 * this code for compatibility issues. 869 */ 870 master = mlx5_find_master_dev(dev); 871 if (master) { 872 ifr = (struct ifreq) { 873 .ifr_data = (void *)&edata, 874 }; 875 /* 876 * Use special version of mlx5_ifreq() 877 * to get master device name with local 878 * device Netlink socket. Using master 879 * device Netlink socket is not thread 880 * safe. 881 */ 882 ret = mlx5_ifreq_base(dev, master, 883 SIOCETHTOOL, &ifr); 884 } 885 } 886 if (ret) { 887 DRV_LOG(WARNING, 888 "port %u ioctl(SIOCETHTOOL," 889 " ETHTOOL_GSET) failed: %s", 890 dev->data->port_id, strerror(rte_errno)); 891 return ret; 892 } 893 } 894 link_speed = ethtool_cmd_speed(&edata); 895 if (link_speed == -1) 896 dev_link.link_speed = ETH_SPEED_NUM_NONE; 897 else 898 dev_link.link_speed = link_speed; 899 priv->link_speed_capa = 0; 900 if (edata.supported & SUPPORTED_Autoneg) 901 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 902 if (edata.supported & (SUPPORTED_1000baseT_Full | 903 SUPPORTED_1000baseKX_Full)) 904 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 905 if (edata.supported & SUPPORTED_10000baseKR_Full) 906 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 907 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 908 SUPPORTED_40000baseCR4_Full | 909 SUPPORTED_40000baseSR4_Full | 910 SUPPORTED_40000baseLR4_Full)) 911 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 912 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 913 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 914 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 915 ETH_LINK_SPEED_FIXED); 916 if (((dev_link.link_speed && !dev_link.link_status) || 917 (!dev_link.link_speed && dev_link.link_status))) { 918 rte_errno = EAGAIN; 919 return -rte_errno; 920 } 921 *link = dev_link; 922 return 0; 923 } 924 925 /** 926 * Retrieve physical link information (unlocked version using new ioctl). 927 * 928 * @param dev 929 * Pointer to Ethernet device structure. 930 * @param[out] link 931 * Storage for current link status. 932 * 933 * @return 934 * 0 on success, a negative errno value otherwise and rte_errno is set. 935 */ 936 static int 937 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 938 struct rte_eth_link *link) 939 940 { 941 struct mlx5_priv *priv = dev->data->dev_private; 942 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 943 struct ifreq ifr; 944 struct rte_eth_link dev_link; 945 struct rte_eth_dev *master = NULL; 946 uint64_t sc; 947 int ret; 948 949 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 950 if (ret) { 951 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 952 dev->data->port_id, strerror(rte_errno)); 953 return ret; 954 } 955 dev_link = (struct rte_eth_link) { 956 .link_status = ((ifr.ifr_flags & IFF_UP) && 957 (ifr.ifr_flags & IFF_RUNNING)), 958 }; 959 ifr = (struct ifreq) { 960 .ifr_data = (void *)&gcmd, 961 }; 962 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 963 if (ret) { 964 if (ret == -ENOTSUP && priv->representor) { 965 /* 966 * For representors we can try to inherit link 967 * settings from the master device. Actually 968 * link settings do not make a lot of sense 969 * for representors due to missing physical 970 * link. The old kernel drivers supported 971 * emulated settings query for representors, 972 * the new ones do not, so we have to add 973 * this code for compatibility issues. 974 */ 975 master = mlx5_find_master_dev(dev); 976 if (master) { 977 ifr = (struct ifreq) { 978 .ifr_data = (void *)&gcmd, 979 }; 980 /* 981 * Avoid using master Netlink socket. 982 * This is not thread-safe. 983 */ 984 ret = mlx5_ifreq_base(dev, master, 985 SIOCETHTOOL, &ifr); 986 } 987 } 988 if (ret) { 989 DRV_LOG(DEBUG, 990 "port %u ioctl(SIOCETHTOOL," 991 " ETHTOOL_GLINKSETTINGS) failed: %s", 992 dev->data->port_id, strerror(rte_errno)); 993 return ret; 994 } 995 996 } 997 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 998 999 alignas(struct ethtool_link_settings) 1000 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 1001 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 1002 struct ethtool_link_settings *ecmd = (void *)data; 1003 1004 *ecmd = gcmd; 1005 ifr.ifr_data = (void *)ecmd; 1006 ret = mlx5_ifreq_base(dev, master ? master : dev, SIOCETHTOOL, &ifr); 1007 if (ret) { 1008 DRV_LOG(DEBUG, 1009 "port %u ioctl(SIOCETHTOOL," 1010 "ETHTOOL_GLINKSETTINGS) failed: %s", 1011 dev->data->port_id, strerror(rte_errno)); 1012 return ret; 1013 } 1014 dev_link.link_speed = ecmd->speed; 1015 sc = ecmd->link_mode_masks[0] | 1016 ((uint64_t)ecmd->link_mode_masks[1] << 32); 1017 priv->link_speed_capa = 0; 1018 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 1019 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 1020 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 1021 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 1022 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 1023 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 1024 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 1025 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 1026 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 1027 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 1028 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 1029 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 1030 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 1031 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 1032 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 1033 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 1034 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 1035 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 1036 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 1037 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 1038 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 1039 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 1040 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 1041 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 1042 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 1043 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 1044 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 1045 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 1046 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 1047 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 1048 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 1049 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 1050 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 1051 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 1052 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 1053 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 1054 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 1055 ETH_LINK_SPEED_FIXED); 1056 if (((dev_link.link_speed && !dev_link.link_status) || 1057 (!dev_link.link_speed && dev_link.link_status))) { 1058 rte_errno = EAGAIN; 1059 return -rte_errno; 1060 } 1061 *link = dev_link; 1062 return 0; 1063 } 1064 1065 /** 1066 * DPDK callback to retrieve physical link information. 1067 * 1068 * @param dev 1069 * Pointer to Ethernet device structure. 1070 * @param wait_to_complete 1071 * Wait for request completion. 1072 * 1073 * @return 1074 * 0 if link status was not updated, positive if it was, a negative errno 1075 * value otherwise and rte_errno is set. 1076 */ 1077 int 1078 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 1079 { 1080 int ret; 1081 struct rte_eth_link dev_link; 1082 time_t start_time = time(NULL); 1083 1084 do { 1085 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 1086 if (ret == -ENOTSUP) 1087 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 1088 if (ret == 0) 1089 break; 1090 /* Handle wait to complete situation. */ 1091 if (wait_to_complete && ret == -EAGAIN) { 1092 if (abs((int)difftime(time(NULL), start_time)) < 1093 MLX5_LINK_STATUS_TIMEOUT) { 1094 usleep(0); 1095 continue; 1096 } else { 1097 rte_errno = EBUSY; 1098 return -rte_errno; 1099 } 1100 } else if (ret < 0) { 1101 return ret; 1102 } 1103 } while (wait_to_complete); 1104 ret = !!memcmp(&dev->data->dev_link, &dev_link, 1105 sizeof(struct rte_eth_link)); 1106 dev->data->dev_link = dev_link; 1107 return ret; 1108 } 1109 1110 /** 1111 * DPDK callback to change the MTU. 1112 * 1113 * @param dev 1114 * Pointer to Ethernet device structure. 1115 * @param in_mtu 1116 * New MTU. 1117 * 1118 * @return 1119 * 0 on success, a negative errno value otherwise and rte_errno is set. 1120 */ 1121 int 1122 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 1123 { 1124 struct mlx5_priv *priv = dev->data->dev_private; 1125 uint16_t kern_mtu = 0; 1126 int ret; 1127 1128 ret = mlx5_get_mtu(dev, &kern_mtu); 1129 if (ret) 1130 return ret; 1131 /* Set kernel interface MTU first. */ 1132 ret = mlx5_set_mtu(dev, mtu); 1133 if (ret) 1134 return ret; 1135 ret = mlx5_get_mtu(dev, &kern_mtu); 1136 if (ret) 1137 return ret; 1138 if (kern_mtu == mtu) { 1139 priv->mtu = mtu; 1140 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 1141 dev->data->port_id, mtu); 1142 return 0; 1143 } 1144 rte_errno = EAGAIN; 1145 return -rte_errno; 1146 } 1147 1148 /** 1149 * DPDK callback to get flow control status. 1150 * 1151 * @param dev 1152 * Pointer to Ethernet device structure. 1153 * @param[out] fc_conf 1154 * Flow control output buffer. 1155 * 1156 * @return 1157 * 0 on success, a negative errno value otherwise and rte_errno is set. 1158 */ 1159 int 1160 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1161 { 1162 struct ifreq ifr; 1163 struct ethtool_pauseparam ethpause = { 1164 .cmd = ETHTOOL_GPAUSEPARAM 1165 }; 1166 int ret; 1167 1168 ifr.ifr_data = (void *)ðpause; 1169 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1170 if (ret) { 1171 DRV_LOG(WARNING, 1172 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 1173 " %s", 1174 dev->data->port_id, strerror(rte_errno)); 1175 return ret; 1176 } 1177 fc_conf->autoneg = ethpause.autoneg; 1178 if (ethpause.rx_pause && ethpause.tx_pause) 1179 fc_conf->mode = RTE_FC_FULL; 1180 else if (ethpause.rx_pause) 1181 fc_conf->mode = RTE_FC_RX_PAUSE; 1182 else if (ethpause.tx_pause) 1183 fc_conf->mode = RTE_FC_TX_PAUSE; 1184 else 1185 fc_conf->mode = RTE_FC_NONE; 1186 return 0; 1187 } 1188 1189 /** 1190 * DPDK callback to modify flow control parameters. 1191 * 1192 * @param dev 1193 * Pointer to Ethernet device structure. 1194 * @param[in] fc_conf 1195 * Flow control parameters. 1196 * 1197 * @return 1198 * 0 on success, a negative errno value otherwise and rte_errno is set. 1199 */ 1200 int 1201 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1202 { 1203 struct ifreq ifr; 1204 struct ethtool_pauseparam ethpause = { 1205 .cmd = ETHTOOL_SPAUSEPARAM 1206 }; 1207 int ret; 1208 1209 ifr.ifr_data = (void *)ðpause; 1210 ethpause.autoneg = fc_conf->autoneg; 1211 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1212 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1213 ethpause.rx_pause = 1; 1214 else 1215 ethpause.rx_pause = 0; 1216 1217 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1218 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1219 ethpause.tx_pause = 1; 1220 else 1221 ethpause.tx_pause = 0; 1222 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1223 if (ret) { 1224 DRV_LOG(WARNING, 1225 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1226 " failed: %s", 1227 dev->data->port_id, strerror(rte_errno)); 1228 return ret; 1229 } 1230 return 0; 1231 } 1232 1233 /** 1234 * Get PCI information from struct ibv_device. 1235 * 1236 * @param device 1237 * Pointer to Ethernet device structure. 1238 * @param[out] pci_addr 1239 * PCI bus address output buffer. 1240 * 1241 * @return 1242 * 0 on success, a negative errno value otherwise and rte_errno is set. 1243 */ 1244 int 1245 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 1246 struct rte_pci_addr *pci_addr) 1247 { 1248 FILE *file; 1249 char line[32]; 1250 MKSTR(path, "%s/device/uevent", device->ibdev_path); 1251 1252 file = fopen(path, "rb"); 1253 if (file == NULL) { 1254 rte_errno = errno; 1255 return -rte_errno; 1256 } 1257 while (fgets(line, sizeof(line), file) == line) { 1258 size_t len = strlen(line); 1259 int ret; 1260 1261 /* Truncate long lines. */ 1262 if (len == (sizeof(line) - 1)) 1263 while (line[(len - 1)] != '\n') { 1264 ret = fgetc(file); 1265 if (ret == EOF) 1266 break; 1267 line[(len - 1)] = ret; 1268 } 1269 /* Extract information. */ 1270 if (sscanf(line, 1271 "PCI_SLOT_NAME=" 1272 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1273 &pci_addr->domain, 1274 &pci_addr->bus, 1275 &pci_addr->devid, 1276 &pci_addr->function) == 4) { 1277 ret = 0; 1278 break; 1279 } 1280 } 1281 fclose(file); 1282 return 0; 1283 } 1284 1285 /** 1286 * Handle asynchronous removal event for entire multiport device. 1287 * 1288 * @param sh 1289 * Infiniband device shared context. 1290 */ 1291 static void 1292 mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) 1293 { 1294 uint32_t i; 1295 1296 for (i = 0; i < sh->max_port; ++i) { 1297 struct rte_eth_dev *dev; 1298 1299 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { 1300 /* 1301 * Or not existing port either no 1302 * handler installed for this port. 1303 */ 1304 continue; 1305 } 1306 dev = &rte_eth_devices[sh->port[i].ih_port_id]; 1307 assert(dev); 1308 if (dev->data->dev_conf.intr_conf.rmv) 1309 _rte_eth_dev_callback_process 1310 (dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1311 } 1312 } 1313 1314 /** 1315 * Handle shared asynchronous events the NIC (removal event 1316 * and link status change). Supports multiport IB device. 1317 * 1318 * @param cb_arg 1319 * Callback argument. 1320 */ 1321 void 1322 mlx5_dev_interrupt_handler(void *cb_arg) 1323 { 1324 struct mlx5_ibv_shared *sh = cb_arg; 1325 struct ibv_async_event event; 1326 1327 /* Read all message from the IB device and acknowledge them. */ 1328 for (;;) { 1329 struct rte_eth_dev *dev; 1330 uint32_t tmp; 1331 1332 if (mlx5_glue->get_async_event(sh->ctx, &event)) 1333 break; 1334 /* Retrieve and check IB port index. */ 1335 tmp = (uint32_t)event.element.port_num; 1336 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) { 1337 /* 1338 * The DEVICE_FATAL event is called once for 1339 * entire device without port specifying. 1340 * We should notify all existing ports. 1341 */ 1342 mlx5_glue->ack_async_event(&event); 1343 mlx5_dev_interrupt_device_fatal(sh); 1344 continue; 1345 } 1346 assert(tmp && (tmp <= sh->max_port)); 1347 if (!tmp) { 1348 /* Unsupported devive level event. */ 1349 mlx5_glue->ack_async_event(&event); 1350 DRV_LOG(DEBUG, 1351 "unsupported common event (type %d)", 1352 event.event_type); 1353 continue; 1354 } 1355 if (tmp > sh->max_port) { 1356 /* Invalid IB port index. */ 1357 mlx5_glue->ack_async_event(&event); 1358 DRV_LOG(DEBUG, 1359 "cannot handle an event (type %d)" 1360 "due to invalid IB port index (%u)", 1361 event.event_type, tmp); 1362 continue; 1363 } 1364 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { 1365 /* No handler installed. */ 1366 mlx5_glue->ack_async_event(&event); 1367 DRV_LOG(DEBUG, 1368 "cannot handle an event (type %d)" 1369 "due to no handler installed for port %u", 1370 event.event_type, tmp); 1371 continue; 1372 } 1373 /* Retrieve ethernet device descriptor. */ 1374 tmp = sh->port[tmp - 1].ih_port_id; 1375 dev = &rte_eth_devices[tmp]; 1376 assert(dev); 1377 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1378 event.event_type == IBV_EVENT_PORT_ERR) && 1379 dev->data->dev_conf.intr_conf.lsc) { 1380 mlx5_glue->ack_async_event(&event); 1381 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1382 usleep(0); 1383 continue; 1384 } 1385 _rte_eth_dev_callback_process 1386 (dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1387 continue; 1388 } 1389 DRV_LOG(DEBUG, 1390 "port %u cannot handle an unknown event (type %d)", 1391 dev->data->port_id, event.event_type); 1392 mlx5_glue->ack_async_event(&event); 1393 } 1394 } 1395 1396 /* 1397 * Unregister callback handler safely. The handler may be active 1398 * while we are trying to unregister it, in this case code -EAGAIN 1399 * is returned by rte_intr_callback_unregister(). This routine checks 1400 * the return code and tries to unregister handler again. 1401 * 1402 * @param handle 1403 * interrupt handle 1404 * @param cb_fn 1405 * pointer to callback routine 1406 * @cb_arg 1407 * opaque callback parameter 1408 */ 1409 void 1410 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle, 1411 rte_intr_callback_fn cb_fn, void *cb_arg) 1412 { 1413 /* 1414 * Try to reduce timeout management overhead by not calling 1415 * the timer related routines on the first iteration. If the 1416 * unregistering succeeds on first call there will be no 1417 * timer calls at all. 1418 */ 1419 uint64_t twait = 0; 1420 uint64_t start = 0; 1421 1422 do { 1423 int ret; 1424 1425 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg); 1426 if (ret >= 0) 1427 return; 1428 if (ret != -EAGAIN) { 1429 DRV_LOG(INFO, "failed to unregister interrupt" 1430 " handler (error: %d)", ret); 1431 assert(false); 1432 return; 1433 } 1434 if (twait) { 1435 struct timespec onems; 1436 1437 /* Wait one millisecond and try again. */ 1438 onems.tv_sec = 0; 1439 onems.tv_nsec = NS_PER_S / MS_PER_S; 1440 nanosleep(&onems, 0); 1441 /* Check whether one second elapsed. */ 1442 if ((rte_get_timer_cycles() - start) <= twait) 1443 continue; 1444 } else { 1445 /* 1446 * We get the amount of timer ticks for one second. 1447 * If this amount elapsed it means we spent one 1448 * second in waiting. This branch is executed once 1449 * on first iteration. 1450 */ 1451 twait = rte_get_timer_hz(); 1452 assert(twait); 1453 } 1454 /* 1455 * Timeout elapsed, show message (once a second) and retry. 1456 * We have no other acceptable option here, if we ignore 1457 * the unregistering return code the handler will not 1458 * be unregistered, fd will be closed and we may get the 1459 * crush. Hanging and messaging in the loop seems not to be 1460 * the worst choice. 1461 */ 1462 DRV_LOG(INFO, "Retrying to unregister interrupt handler"); 1463 start = rte_get_timer_cycles(); 1464 } while (true); 1465 } 1466 1467 /** 1468 * Handle DEVX interrupts from the NIC. 1469 * This function is probably called from the DPDK host thread. 1470 * 1471 * @param cb_arg 1472 * Callback argument. 1473 */ 1474 void 1475 mlx5_dev_interrupt_handler_devx(void *cb_arg) 1476 { 1477 #ifndef HAVE_IBV_DEVX_ASYNC 1478 (void)cb_arg; 1479 return; 1480 #else 1481 struct mlx5_ibv_shared *sh = cb_arg; 1482 union { 1483 struct mlx5dv_devx_async_cmd_hdr cmd_resp; 1484 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) + 1485 MLX5_ST_SZ_BYTES(traffic_counter) + 1486 sizeof(struct mlx5dv_devx_async_cmd_hdr)]; 1487 } out; 1488 uint8_t *buf = out.buf + sizeof(out.cmd_resp); 1489 1490 while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp, 1491 &out.cmd_resp, 1492 sizeof(out.buf))) 1493 mlx5_flow_async_pool_query_handle 1494 (sh, (uint64_t)out.cmd_resp.wr_id, 1495 mlx5_devx_get_out_command_status(buf)); 1496 #endif /* HAVE_IBV_DEVX_ASYNC */ 1497 } 1498 1499 /** 1500 * Uninstall shared asynchronous device events handler. 1501 * This function is implemented to support event sharing 1502 * between multiple ports of single IB device. 1503 * 1504 * @param dev 1505 * Pointer to Ethernet device. 1506 */ 1507 static void 1508 mlx5_dev_shared_handler_uninstall(struct rte_eth_dev *dev) 1509 { 1510 struct mlx5_priv *priv = dev->data->dev_private; 1511 struct mlx5_ibv_shared *sh = priv->sh; 1512 1513 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1514 return; 1515 pthread_mutex_lock(&sh->intr_mutex); 1516 assert(priv->ibv_port); 1517 assert(priv->ibv_port <= sh->max_port); 1518 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1519 if (sh->port[priv->ibv_port - 1].ih_port_id >= RTE_MAX_ETHPORTS) 1520 goto exit; 1521 assert(sh->port[priv->ibv_port - 1].ih_port_id == 1522 (uint32_t)dev->data->port_id); 1523 assert(sh->intr_cnt); 1524 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1525 if (!sh->intr_cnt || --sh->intr_cnt) 1526 goto exit; 1527 mlx5_intr_callback_unregister(&sh->intr_handle, 1528 mlx5_dev_interrupt_handler, sh); 1529 sh->intr_handle.fd = 0; 1530 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1531 if (sh->intr_handle_devx.fd) { 1532 rte_intr_callback_unregister(&sh->intr_handle_devx, 1533 mlx5_dev_interrupt_handler_devx, 1534 sh); 1535 sh->intr_handle_devx.fd = 0; 1536 sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN; 1537 } 1538 if (sh->devx_comp) { 1539 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp); 1540 sh->devx_comp = NULL; 1541 } 1542 exit: 1543 pthread_mutex_unlock(&sh->intr_mutex); 1544 } 1545 1546 /** 1547 * Install shared asynchronous device events handler. 1548 * This function is implemented to support event sharing 1549 * between multiple ports of single IB device. 1550 * 1551 * @param dev 1552 * Pointer to Ethernet device. 1553 */ 1554 static void 1555 mlx5_dev_shared_handler_install(struct rte_eth_dev *dev) 1556 { 1557 struct mlx5_priv *priv = dev->data->dev_private; 1558 struct mlx5_ibv_shared *sh = priv->sh; 1559 int ret; 1560 int flags; 1561 1562 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1563 return; 1564 pthread_mutex_lock(&sh->intr_mutex); 1565 assert(priv->ibv_port); 1566 assert(priv->ibv_port <= sh->max_port); 1567 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1568 if (sh->port[priv->ibv_port - 1].ih_port_id < RTE_MAX_ETHPORTS) { 1569 /* The handler is already installed for this port. */ 1570 assert(sh->intr_cnt); 1571 goto exit; 1572 } 1573 sh->port[priv->ibv_port - 1].ih_port_id = (uint32_t)dev->data->port_id; 1574 if (sh->intr_cnt) { 1575 sh->intr_cnt++; 1576 goto exit; 1577 } 1578 /* No shared handler installed. */ 1579 assert(sh->ctx->async_fd > 0); 1580 flags = fcntl(sh->ctx->async_fd, F_GETFL); 1581 ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1582 if (ret) { 1583 DRV_LOG(INFO, "failed to change file descriptor" 1584 " async event queue"); 1585 goto error; 1586 } 1587 sh->intr_handle.fd = sh->ctx->async_fd; 1588 sh->intr_handle.type = RTE_INTR_HANDLE_EXT; 1589 rte_intr_callback_register(&sh->intr_handle, 1590 mlx5_dev_interrupt_handler, sh); 1591 if (priv->config.devx) { 1592 #ifndef HAVE_IBV_DEVX_ASYNC 1593 goto error_unregister; 1594 #else 1595 sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx); 1596 if (sh->devx_comp) { 1597 flags = fcntl(sh->devx_comp->fd, F_GETFL); 1598 ret = fcntl(sh->devx_comp->fd, F_SETFL, 1599 flags | O_NONBLOCK); 1600 if (ret) { 1601 DRV_LOG(INFO, "failed to change file descriptor" 1602 " devx async event queue"); 1603 goto error_unregister; 1604 } 1605 sh->intr_handle_devx.fd = sh->devx_comp->fd; 1606 sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT; 1607 rte_intr_callback_register 1608 (&sh->intr_handle_devx, 1609 mlx5_dev_interrupt_handler_devx, sh); 1610 } else { 1611 DRV_LOG(INFO, "failed to create devx async command " 1612 "completion"); 1613 goto error_unregister; 1614 } 1615 #endif /* HAVE_IBV_DEVX_ASYNC */ 1616 } 1617 sh->intr_cnt++; 1618 goto exit; 1619 error_unregister: 1620 rte_intr_callback_unregister(&sh->intr_handle, 1621 mlx5_dev_interrupt_handler, sh); 1622 error: 1623 /* Indicate there will be no interrupts. */ 1624 dev->data->dev_conf.intr_conf.lsc = 0; 1625 dev->data->dev_conf.intr_conf.rmv = 0; 1626 sh->intr_handle.fd = 0; 1627 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1628 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1629 exit: 1630 pthread_mutex_unlock(&sh->intr_mutex); 1631 } 1632 1633 /** 1634 * Uninstall interrupt handler. 1635 * 1636 * @param dev 1637 * Pointer to Ethernet device. 1638 */ 1639 void 1640 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1641 { 1642 mlx5_dev_shared_handler_uninstall(dev); 1643 } 1644 1645 /** 1646 * Install interrupt handler. 1647 * 1648 * @param dev 1649 * Pointer to Ethernet device. 1650 */ 1651 void 1652 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1653 { 1654 mlx5_dev_shared_handler_install(dev); 1655 } 1656 1657 /** 1658 * DPDK callback to bring the link DOWN. 1659 * 1660 * @param dev 1661 * Pointer to Ethernet device structure. 1662 * 1663 * @return 1664 * 0 on success, a negative errno value otherwise and rte_errno is set. 1665 */ 1666 int 1667 mlx5_set_link_down(struct rte_eth_dev *dev) 1668 { 1669 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1670 } 1671 1672 /** 1673 * DPDK callback to bring the link UP. 1674 * 1675 * @param dev 1676 * Pointer to Ethernet device structure. 1677 * 1678 * @return 1679 * 0 on success, a negative errno value otherwise and rte_errno is set. 1680 */ 1681 int 1682 mlx5_set_link_up(struct rte_eth_dev *dev) 1683 { 1684 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1685 } 1686 1687 /** 1688 * Configure the RX function to use. 1689 * 1690 * @param dev 1691 * Pointer to private data structure. 1692 * 1693 * @return 1694 * Pointer to selected Rx burst function. 1695 */ 1696 eth_rx_burst_t 1697 mlx5_select_rx_function(struct rte_eth_dev *dev) 1698 { 1699 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1700 1701 assert(dev != NULL); 1702 if (mlx5_check_vec_rx_support(dev) > 0) { 1703 rx_pkt_burst = mlx5_rx_burst_vec; 1704 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1705 dev->data->port_id); 1706 } else if (mlx5_mprq_enabled(dev)) { 1707 rx_pkt_burst = mlx5_rx_burst_mprq; 1708 } 1709 return rx_pkt_burst; 1710 } 1711 1712 /** 1713 * Check if mlx5 device was removed. 1714 * 1715 * @param dev 1716 * Pointer to Ethernet device structure. 1717 * 1718 * @return 1719 * 1 when device is removed, otherwise 0. 1720 */ 1721 int 1722 mlx5_is_removed(struct rte_eth_dev *dev) 1723 { 1724 struct ibv_device_attr device_attr; 1725 struct mlx5_priv *priv = dev->data->dev_private; 1726 1727 if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO) 1728 return 1; 1729 return 0; 1730 } 1731 1732 /** 1733 * Get port ID list of mlx5 instances sharing a common device. 1734 * 1735 * @param[in] dev 1736 * Device to look for. 1737 * @param[out] port_list 1738 * Result buffer for collected port IDs. 1739 * @param port_list_n 1740 * Maximum number of entries in result buffer. If 0, @p port_list can be 1741 * NULL. 1742 * 1743 * @return 1744 * Number of matching instances regardless of the @p port_list_n 1745 * parameter, 0 if none were found. 1746 */ 1747 unsigned int 1748 mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list, 1749 unsigned int port_list_n) 1750 { 1751 uint16_t id; 1752 unsigned int n = 0; 1753 1754 RTE_ETH_FOREACH_DEV_OF(id, dev) { 1755 if (n < port_list_n) 1756 port_list[n] = id; 1757 n++; 1758 } 1759 return n; 1760 } 1761 1762 /** 1763 * Get the E-Switch domain id this port belongs to. 1764 * 1765 * @param[in] port 1766 * Device port id. 1767 * @param[out] es_domain_id 1768 * E-Switch domain id. 1769 * @param[out] es_port_id 1770 * The port id of the port in the E-Switch. 1771 * 1772 * @return 1773 * 0 on success, a negative errno value otherwise and rte_errno is set. 1774 */ 1775 int 1776 mlx5_port_to_eswitch_info(uint16_t port, 1777 uint16_t *es_domain_id, uint16_t *es_port_id) 1778 { 1779 struct rte_eth_dev *dev; 1780 struct mlx5_priv *priv; 1781 1782 if (port >= RTE_MAX_ETHPORTS) { 1783 rte_errno = EINVAL; 1784 return -rte_errno; 1785 } 1786 if (!rte_eth_dev_is_valid_port(port)) { 1787 rte_errno = ENODEV; 1788 return -rte_errno; 1789 } 1790 dev = &rte_eth_devices[port]; 1791 priv = dev->data->dev_private; 1792 if (!(priv->representor || priv->master)) { 1793 rte_errno = EINVAL; 1794 return -rte_errno; 1795 } 1796 if (es_domain_id) 1797 *es_domain_id = priv->domain_id; 1798 if (es_port_id) 1799 *es_port_id = priv->vport_id; 1800 return 0; 1801 } 1802 1803 /** 1804 * Get switch information associated with network interface. 1805 * 1806 * @param ifindex 1807 * Network interface index. 1808 * @param[out] info 1809 * Switch information object, populated in case of success. 1810 * 1811 * @return 1812 * 0 on success, a negative errno value otherwise and rte_errno is set. 1813 */ 1814 int 1815 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1816 { 1817 char ifname[IF_NAMESIZE]; 1818 char port_name[IF_NAMESIZE]; 1819 FILE *file; 1820 struct mlx5_switch_info data = { 1821 .master = 0, 1822 .representor = 0, 1823 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1824 .port_name = 0, 1825 .switch_id = 0, 1826 }; 1827 DIR *dir; 1828 bool port_switch_id_set = false; 1829 bool device_dir = false; 1830 char c; 1831 int ret; 1832 1833 if (!if_indextoname(ifindex, ifname)) { 1834 rte_errno = errno; 1835 return -rte_errno; 1836 } 1837 1838 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1839 ifname); 1840 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1841 ifname); 1842 MKSTR(pci_device, "/sys/class/net/%s/device", 1843 ifname); 1844 1845 file = fopen(phys_port_name, "rb"); 1846 if (file != NULL) { 1847 ret = fscanf(file, "%s", port_name); 1848 fclose(file); 1849 if (ret == 1) 1850 mlx5_translate_port_name(port_name, &data); 1851 } 1852 file = fopen(phys_switch_id, "rb"); 1853 if (file == NULL) { 1854 rte_errno = errno; 1855 return -rte_errno; 1856 } 1857 port_switch_id_set = 1858 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1859 c == '\n'; 1860 fclose(file); 1861 dir = opendir(pci_device); 1862 if (dir != NULL) { 1863 closedir(dir); 1864 device_dir = true; 1865 } 1866 if (port_switch_id_set) { 1867 /* We have some E-Switch configuration. */ 1868 mlx5_sysfs_check_switch_info(device_dir, &data); 1869 } 1870 *info = data; 1871 assert(!(data.master && data.representor)); 1872 if (data.master && data.representor) { 1873 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1874 " and as representor", ifindex); 1875 rte_errno = ENODEV; 1876 return -rte_errno; 1877 } 1878 return 0; 1879 } 1880 1881 /** 1882 * Analyze gathered port parameters via Netlink to recognize master 1883 * and representor devices for E-Switch configuration. 1884 * 1885 * @param[in] num_vf_set 1886 * flag of presence of number of VFs port attribute. 1887 * @param[inout] switch_info 1888 * Port information, including port name as a number and port name 1889 * type if recognized 1890 * 1891 * @return 1892 * master and representor flags are set in switch_info according to 1893 * recognized parameters (if any). 1894 */ 1895 void 1896 mlx5_nl_check_switch_info(bool num_vf_set, 1897 struct mlx5_switch_info *switch_info) 1898 { 1899 switch (switch_info->name_type) { 1900 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1901 /* 1902 * Name is not recognized, assume the master, 1903 * check the number of VFs key presence. 1904 */ 1905 switch_info->master = num_vf_set; 1906 break; 1907 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1908 /* 1909 * Name is not set, this assumes the legacy naming 1910 * schema for master, just check if there is a 1911 * number of VFs key. 1912 */ 1913 switch_info->master = num_vf_set; 1914 break; 1915 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1916 /* New uplink naming schema recognized. */ 1917 switch_info->master = 1; 1918 break; 1919 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1920 /* Legacy representors naming schema. */ 1921 switch_info->representor = !num_vf_set; 1922 break; 1923 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1924 /* New representors naming schema. */ 1925 switch_info->representor = 1; 1926 break; 1927 } 1928 } 1929 1930 /** 1931 * Analyze gathered port parameters via sysfs to recognize master 1932 * and representor devices for E-Switch configuration. 1933 * 1934 * @param[in] device_dir 1935 * flag of presence of "device" directory under port device key. 1936 * @param[inout] switch_info 1937 * Port information, including port name as a number and port name 1938 * type if recognized 1939 * 1940 * @return 1941 * master and representor flags are set in switch_info according to 1942 * recognized parameters (if any). 1943 */ 1944 void 1945 mlx5_sysfs_check_switch_info(bool device_dir, 1946 struct mlx5_switch_info *switch_info) 1947 { 1948 switch (switch_info->name_type) { 1949 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1950 /* 1951 * Name is not recognized, assume the master, 1952 * check the device directory presence. 1953 */ 1954 switch_info->master = device_dir; 1955 break; 1956 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1957 /* 1958 * Name is not set, this assumes the legacy naming 1959 * schema for master, just check if there is 1960 * a device directory. 1961 */ 1962 switch_info->master = device_dir; 1963 break; 1964 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1965 /* New uplink naming schema recognized. */ 1966 switch_info->master = 1; 1967 break; 1968 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1969 /* Legacy representors naming schema. */ 1970 switch_info->representor = !device_dir; 1971 break; 1972 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1973 /* New representors naming schema. */ 1974 switch_info->representor = 1; 1975 break; 1976 } 1977 } 1978 1979 /** 1980 * Extract port name, as a number, from sysfs or netlink information. 1981 * 1982 * @param[in] port_name_in 1983 * String representing the port name. 1984 * @param[out] port_info_out 1985 * Port information, including port name as a number and port name 1986 * type if recognized 1987 * 1988 * @return 1989 * port_name field set according to recognized name format. 1990 */ 1991 void 1992 mlx5_translate_port_name(const char *port_name_in, 1993 struct mlx5_switch_info *port_info_out) 1994 { 1995 char pf_c1, pf_c2, vf_c1, vf_c2; 1996 char *end; 1997 int sc_items; 1998 1999 /* 2000 * Check for port-name as a string of the form pf0vf0 2001 * (support kernel ver >= 5.0 or OFED ver >= 4.6). 2002 */ 2003 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", 2004 &pf_c1, &pf_c2, &port_info_out->pf_num, 2005 &vf_c1, &vf_c2, &port_info_out->port_name); 2006 if (sc_items == 6 && 2007 pf_c1 == 'p' && pf_c2 == 'f' && 2008 vf_c1 == 'v' && vf_c2 == 'f') { 2009 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; 2010 return; 2011 } 2012 /* 2013 * Check for port-name as a string of the form p0 2014 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 2015 */ 2016 sc_items = sscanf(port_name_in, "%c%d", 2017 &pf_c1, &port_info_out->port_name); 2018 if (sc_items == 2 && pf_c1 == 'p') { 2019 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 2020 return; 2021 } 2022 /* Check for port-name as a number (support kernel ver < 5.0 */ 2023 errno = 0; 2024 port_info_out->port_name = strtol(port_name_in, &end, 0); 2025 if (!errno && 2026 (size_t)(end - port_name_in) == strlen(port_name_in)) { 2027 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 2028 return; 2029 } 2030 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 2031 return; 2032 } 2033