1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <inttypes.h> 9 #include <unistd.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <stdlib.h> 15 #include <errno.h> 16 #include <dirent.h> 17 #include <net/if.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <netinet/in.h> 21 #include <linux/ethtool.h> 22 #include <linux/sockios.h> 23 #include <fcntl.h> 24 #include <stdalign.h> 25 #include <sys/un.h> 26 #include <time.h> 27 28 #include <rte_atomic.h> 29 #include <rte_ethdev_driver.h> 30 #include <rte_bus_pci.h> 31 #include <rte_mbuf.h> 32 #include <rte_common.h> 33 #include <rte_interrupts.h> 34 #include <rte_malloc.h> 35 #include <rte_string_fns.h> 36 #include <rte_rwlock.h> 37 #include <rte_cycles.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_rxtx.h" 42 #include "mlx5_utils.h" 43 44 /* Supported speed values found in /usr/include/linux/ethtool.h */ 45 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 46 #define SUPPORTED_40000baseKR4_Full (1 << 23) 47 #endif 48 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 49 #define SUPPORTED_40000baseCR4_Full (1 << 24) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 52 #define SUPPORTED_40000baseSR4_Full (1 << 25) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 55 #define SUPPORTED_40000baseLR4_Full (1 << 26) 56 #endif 57 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 58 #define SUPPORTED_56000baseKR4_Full (1 << 27) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 61 #define SUPPORTED_56000baseCR4_Full (1 << 28) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 64 #define SUPPORTED_56000baseSR4_Full (1 << 29) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 67 #define SUPPORTED_56000baseLR4_Full (1 << 30) 68 #endif 69 70 /* Add defines in case the running kernel is not the same as user headers. */ 71 #ifndef ETHTOOL_GLINKSETTINGS 72 struct ethtool_link_settings { 73 uint32_t cmd; 74 uint32_t speed; 75 uint8_t duplex; 76 uint8_t port; 77 uint8_t phy_address; 78 uint8_t autoneg; 79 uint8_t mdio_support; 80 uint8_t eth_to_mdix; 81 uint8_t eth_tp_mdix_ctrl; 82 int8_t link_mode_masks_nwords; 83 uint32_t reserved[8]; 84 uint32_t link_mode_masks[]; 85 }; 86 87 #define ETHTOOL_GLINKSETTINGS 0x0000004c 88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 109 #endif 110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 119 #endif 120 121 /** 122 * Get master interface name from private structure. 123 * 124 * @param[in] dev 125 * Pointer to Ethernet device. 126 * @param[out] ifname 127 * Interface name output buffer. 128 * 129 * @return 130 * 0 on success, a negative errno value otherwise and rte_errno is set. 131 */ 132 int 133 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE]) 134 { 135 DIR *dir; 136 struct dirent *dent; 137 unsigned int dev_type = 0; 138 unsigned int dev_port_prev = ~0u; 139 char match[IF_NAMESIZE] = ""; 140 141 assert(ibdev_path); 142 { 143 MKSTR(path, "%s/device/net", ibdev_path); 144 145 dir = opendir(path); 146 if (dir == NULL) { 147 rte_errno = errno; 148 return -rte_errno; 149 } 150 } 151 while ((dent = readdir(dir)) != NULL) { 152 char *name = dent->d_name; 153 FILE *file; 154 unsigned int dev_port; 155 int r; 156 157 if ((name[0] == '.') && 158 ((name[1] == '\0') || 159 ((name[1] == '.') && (name[2] == '\0')))) 160 continue; 161 162 MKSTR(path, "%s/device/net/%s/%s", 163 ibdev_path, name, 164 (dev_type ? "dev_id" : "dev_port")); 165 166 file = fopen(path, "rb"); 167 if (file == NULL) { 168 if (errno != ENOENT) 169 continue; 170 /* 171 * Switch to dev_id when dev_port does not exist as 172 * is the case with Linux kernel versions < 3.15. 173 */ 174 try_dev_id: 175 match[0] = '\0'; 176 if (dev_type) 177 break; 178 dev_type = 1; 179 dev_port_prev = ~0u; 180 rewinddir(dir); 181 continue; 182 } 183 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 184 fclose(file); 185 if (r != 1) 186 continue; 187 /* 188 * Switch to dev_id when dev_port returns the same value for 189 * all ports. May happen when using a MOFED release older than 190 * 3.0 with a Linux kernel >= 3.15. 191 */ 192 if (dev_port == dev_port_prev) 193 goto try_dev_id; 194 dev_port_prev = dev_port; 195 if (dev_port == 0) 196 strlcpy(match, name, sizeof(match)); 197 } 198 closedir(dir); 199 if (match[0] == '\0') { 200 rte_errno = ENOENT; 201 return -rte_errno; 202 } 203 strncpy(*ifname, match, sizeof(*ifname)); 204 return 0; 205 } 206 207 /** 208 * Get interface name from private structure. 209 * 210 * This is a port representor-aware version of mlx5_get_master_ifname(). 211 * 212 * @param[in] dev 213 * Pointer to Ethernet device. 214 * @param[out] ifname 215 * Interface name output buffer. 216 * 217 * @return 218 * 0 on success, a negative errno value otherwise and rte_errno is set. 219 */ 220 int 221 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 222 { 223 struct mlx5_priv *priv = dev->data->dev_private; 224 unsigned int ifindex; 225 226 assert(priv); 227 assert(priv->sh); 228 ifindex = mlx5_ifindex(dev); 229 if (!ifindex) { 230 if (!priv->representor) 231 return mlx5_get_master_ifname(priv->sh->ibdev_path, 232 ifname); 233 rte_errno = ENXIO; 234 return -rte_errno; 235 } 236 if (if_indextoname(ifindex, &(*ifname)[0])) 237 return 0; 238 rte_errno = errno; 239 return -rte_errno; 240 } 241 242 /** 243 * Get the interface index from device name. 244 * 245 * @param[in] dev 246 * Pointer to Ethernet device. 247 * 248 * @return 249 * Nonzero interface index on success, zero otherwise and rte_errno is set. 250 */ 251 unsigned int 252 mlx5_ifindex(const struct rte_eth_dev *dev) 253 { 254 struct mlx5_priv *priv = dev->data->dev_private; 255 unsigned int ifindex; 256 257 assert(priv); 258 assert(priv->if_index); 259 ifindex = priv->if_index; 260 if (!ifindex) 261 rte_errno = ENXIO; 262 return ifindex; 263 } 264 265 /** 266 * Perform ifreq ioctl() on associated Ethernet device. 267 * 268 * @param[in] dev 269 * Pointer to Ethernet device. 270 * @param req 271 * Request number to pass to ioctl(). 272 * @param[out] ifr 273 * Interface request structure output buffer. 274 * 275 * @return 276 * 0 on success, a negative errno value otherwise and rte_errno is set. 277 */ 278 int 279 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 280 { 281 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 282 int ret = 0; 283 284 if (sock == -1) { 285 rte_errno = errno; 286 return -rte_errno; 287 } 288 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 289 if (ret) 290 goto error; 291 ret = ioctl(sock, req, ifr); 292 if (ret == -1) { 293 rte_errno = errno; 294 goto error; 295 } 296 close(sock); 297 return 0; 298 error: 299 close(sock); 300 return -rte_errno; 301 } 302 303 /** 304 * Get device MTU. 305 * 306 * @param dev 307 * Pointer to Ethernet device. 308 * @param[out] mtu 309 * MTU value output buffer. 310 * 311 * @return 312 * 0 on success, a negative errno value otherwise and rte_errno is set. 313 */ 314 int 315 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 316 { 317 struct ifreq request; 318 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 319 320 if (ret) 321 return ret; 322 *mtu = request.ifr_mtu; 323 return 0; 324 } 325 326 /** 327 * Set device MTU. 328 * 329 * @param dev 330 * Pointer to Ethernet device. 331 * @param mtu 332 * MTU value to set. 333 * 334 * @return 335 * 0 on success, a negative errno value otherwise and rte_errno is set. 336 */ 337 static int 338 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 339 { 340 struct ifreq request = { .ifr_mtu = mtu, }; 341 342 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 343 } 344 345 /** 346 * Set device flags. 347 * 348 * @param dev 349 * Pointer to Ethernet device. 350 * @param keep 351 * Bitmask for flags that must remain untouched. 352 * @param flags 353 * Bitmask for flags to modify. 354 * 355 * @return 356 * 0 on success, a negative errno value otherwise and rte_errno is set. 357 */ 358 int 359 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 360 { 361 struct ifreq request; 362 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 363 364 if (ret) 365 return ret; 366 request.ifr_flags &= keep; 367 request.ifr_flags |= flags & ~keep; 368 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 369 } 370 371 /** 372 * DPDK callback for Ethernet device configuration. 373 * 374 * @param dev 375 * Pointer to Ethernet device structure. 376 * 377 * @return 378 * 0 on success, a negative errno value otherwise and rte_errno is set. 379 */ 380 int 381 mlx5_dev_configure(struct rte_eth_dev *dev) 382 { 383 struct mlx5_priv *priv = dev->data->dev_private; 384 unsigned int rxqs_n = dev->data->nb_rx_queues; 385 unsigned int txqs_n = dev->data->nb_tx_queues; 386 unsigned int i; 387 unsigned int j; 388 unsigned int reta_idx_n; 389 const uint8_t use_app_rss_key = 390 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 391 int ret = 0; 392 393 if (use_app_rss_key && 394 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 395 MLX5_RSS_HASH_KEY_LEN)) { 396 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 397 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 398 rte_errno = EINVAL; 399 return -rte_errno; 400 } 401 priv->rss_conf.rss_key = 402 rte_realloc(priv->rss_conf.rss_key, 403 MLX5_RSS_HASH_KEY_LEN, 0); 404 if (!priv->rss_conf.rss_key) { 405 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 406 dev->data->port_id, rxqs_n); 407 rte_errno = ENOMEM; 408 return -rte_errno; 409 } 410 memcpy(priv->rss_conf.rss_key, 411 use_app_rss_key ? 412 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 413 rss_hash_default_key, 414 MLX5_RSS_HASH_KEY_LEN); 415 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 416 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 417 priv->rxqs = (void *)dev->data->rx_queues; 418 priv->txqs = (void *)dev->data->tx_queues; 419 if (txqs_n != priv->txqs_n) { 420 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 421 dev->data->port_id, priv->txqs_n, txqs_n); 422 priv->txqs_n = txqs_n; 423 } 424 if (rxqs_n > priv->config.ind_table_max_size) { 425 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 426 dev->data->port_id, rxqs_n); 427 rte_errno = EINVAL; 428 return -rte_errno; 429 } 430 if (rxqs_n != priv->rxqs_n) { 431 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 432 dev->data->port_id, priv->rxqs_n, rxqs_n); 433 priv->rxqs_n = rxqs_n; 434 /* 435 * If the requested number of RX queues is not a power of two, 436 * use the maximum indirection table size for better balancing. 437 * The result is always rounded to the next power of two. 438 */ 439 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 440 priv->config.ind_table_max_size : 441 rxqs_n)); 442 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 443 if (ret) 444 return ret; 445 /* 446 * When the number of RX queues is not a power of two, 447 * the remaining table entries are padded with reused WQs 448 * and hashes are not spread uniformly. 449 */ 450 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 451 (*priv->reta_idx)[i] = j; 452 if (++j == rxqs_n) 453 j = 0; 454 } 455 } 456 ret = mlx5_proc_priv_init(dev); 457 if (ret) 458 return ret; 459 return 0; 460 } 461 462 /** 463 * Sets default tuning parameters. 464 * 465 * @param dev 466 * Pointer to Ethernet device. 467 * @param[out] info 468 * Info structure output buffer. 469 */ 470 static void 471 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 472 { 473 struct mlx5_priv *priv = dev->data->dev_private; 474 475 /* Minimum CPU utilization. */ 476 info->default_rxportconf.ring_size = 256; 477 info->default_txportconf.ring_size = 256; 478 info->default_rxportconf.burst_size = 64; 479 info->default_txportconf.burst_size = 64; 480 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 481 info->default_rxportconf.nb_queues = 16; 482 info->default_txportconf.nb_queues = 16; 483 if (dev->data->nb_rx_queues > 2 || 484 dev->data->nb_tx_queues > 2) { 485 /* Max Throughput. */ 486 info->default_rxportconf.ring_size = 2048; 487 info->default_txportconf.ring_size = 2048; 488 } 489 } else { 490 info->default_rxportconf.nb_queues = 8; 491 info->default_txportconf.nb_queues = 8; 492 if (dev->data->nb_rx_queues > 2 || 493 dev->data->nb_tx_queues > 2) { 494 /* Max Throughput. */ 495 info->default_rxportconf.ring_size = 4096; 496 info->default_txportconf.ring_size = 4096; 497 } 498 } 499 } 500 501 /** 502 * Sets tx mbuf limiting parameters. 503 * 504 * @param dev 505 * Pointer to Ethernet device. 506 * @param[out] info 507 * Info structure output buffer. 508 */ 509 static void 510 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 511 { 512 struct mlx5_priv *priv = dev->data->dev_private; 513 struct mlx5_dev_config *config = &priv->config; 514 unsigned int inlen; 515 uint16_t nb_max; 516 517 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 518 MLX5_SEND_DEF_INLINE_LEN : 519 (unsigned int)config->txq_inline_max; 520 assert(config->txq_inline_min >= 0); 521 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 522 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 523 MLX5_ESEG_MIN_INLINE_SIZE - 524 MLX5_WQE_CSEG_SIZE - 525 MLX5_WQE_ESEG_SIZE - 526 MLX5_WQE_DSEG_SIZE * 2); 527 nb_max = (MLX5_WQE_SIZE_MAX + 528 MLX5_ESEG_MIN_INLINE_SIZE - 529 MLX5_WQE_CSEG_SIZE - 530 MLX5_WQE_ESEG_SIZE - 531 MLX5_WQE_DSEG_SIZE - 532 inlen) / MLX5_WSEG_SIZE; 533 info->tx_desc_lim.nb_seg_max = nb_max; 534 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 535 } 536 537 /** 538 * DPDK callback to get information about the device. 539 * 540 * @param dev 541 * Pointer to Ethernet device structure. 542 * @param[out] info 543 * Info structure output buffer. 544 */ 545 int 546 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 547 { 548 struct mlx5_priv *priv = dev->data->dev_private; 549 struct mlx5_dev_config *config = &priv->config; 550 unsigned int max; 551 552 /* FIXME: we should ask the device for these values. */ 553 info->min_rx_bufsize = 32; 554 info->max_rx_pktlen = 65536; 555 /* 556 * Since we need one CQ per QP, the limit is the minimum number 557 * between the two values. 558 */ 559 max = RTE_MIN(priv->sh->device_attr.orig_attr.max_cq, 560 priv->sh->device_attr.orig_attr.max_qp); 561 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 562 if (max >= 65535) 563 max = 65535; 564 info->max_rx_queues = max; 565 info->max_tx_queues = max; 566 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 567 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 568 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 569 info->rx_queue_offload_capa); 570 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 571 info->if_index = mlx5_ifindex(dev); 572 info->reta_size = priv->reta_idx_n ? 573 priv->reta_idx_n : config->ind_table_max_size; 574 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 575 info->speed_capa = priv->link_speed_capa; 576 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 577 mlx5_set_default_params(dev, info); 578 mlx5_set_txlimit_params(dev, info); 579 info->switch_info.name = dev->data->name; 580 info->switch_info.domain_id = priv->domain_id; 581 info->switch_info.port_id = priv->representor_id; 582 if (priv->representor) { 583 uint16_t port_id; 584 585 if (priv->pf_bond >= 0) { 586 /* 587 * Switch port ID is opaque value with driver defined 588 * format. Push the PF index in bonding configurations 589 * in upper four bits of port ID. If we get too many 590 * representors (more than 4K) or PFs (more than 15) 591 * this approach must be reconsidered. 592 */ 593 if ((info->switch_info.port_id >> 594 MLX5_PORT_ID_BONDING_PF_SHIFT) || 595 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 596 DRV_LOG(ERR, "can't update switch port ID" 597 " for bonding device"); 598 assert(false); 599 return -ENODEV; 600 } 601 info->switch_info.port_id |= 602 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 603 } 604 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 605 struct mlx5_priv *opriv = 606 rte_eth_devices[port_id].data->dev_private; 607 608 if (!opriv || 609 opriv->representor || 610 opriv->sh != priv->sh || 611 opriv->domain_id != priv->domain_id) 612 continue; 613 /* 614 * Override switch name with that of the master 615 * device. 616 */ 617 info->switch_info.name = opriv->dev_data->name; 618 break; 619 } 620 } 621 return 0; 622 } 623 624 /** 625 * Get device current raw clock counter 626 * 627 * @param dev 628 * Pointer to Ethernet device structure. 629 * @param[out] time 630 * Current raw clock counter of the device. 631 * 632 * @return 633 * 0 if the clock has correctly been read 634 * The value of errno in case of error 635 */ 636 int 637 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock) 638 { 639 struct mlx5_priv *priv = dev->data->dev_private; 640 struct ibv_context *ctx = priv->sh->ctx; 641 struct ibv_values_ex values; 642 int err = 0; 643 644 values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK; 645 err = mlx5_glue->query_rt_values_ex(ctx, &values); 646 if (err != 0) { 647 DRV_LOG(WARNING, "Could not query the clock !"); 648 return err; 649 } 650 *clock = values.raw_clock.tv_nsec; 651 return 0; 652 } 653 654 /** 655 * Get firmware version of a device. 656 * 657 * @param dev 658 * Ethernet device port. 659 * @param fw_ver 660 * String output allocated by caller. 661 * @param fw_size 662 * Size of the output string, including terminating null byte. 663 * 664 * @return 665 * 0 on success, or the size of the non truncated string if too big. 666 */ 667 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 668 { 669 struct mlx5_priv *priv = dev->data->dev_private; 670 struct ibv_device_attr *attr = &priv->sh->device_attr.orig_attr; 671 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 672 673 if (fw_size < size) 674 return size; 675 if (fw_ver != NULL) 676 strlcpy(fw_ver, attr->fw_ver, fw_size); 677 return 0; 678 } 679 680 /** 681 * Get supported packet types. 682 * 683 * @param dev 684 * Pointer to Ethernet device structure. 685 * 686 * @return 687 * A pointer to the supported Packet types array. 688 */ 689 const uint32_t * 690 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 691 { 692 static const uint32_t ptypes[] = { 693 /* refers to rxq_cq_to_pkt_type() */ 694 RTE_PTYPE_L2_ETHER, 695 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 696 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 697 RTE_PTYPE_L4_NONFRAG, 698 RTE_PTYPE_L4_FRAG, 699 RTE_PTYPE_L4_TCP, 700 RTE_PTYPE_L4_UDP, 701 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 702 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 703 RTE_PTYPE_INNER_L4_NONFRAG, 704 RTE_PTYPE_INNER_L4_FRAG, 705 RTE_PTYPE_INNER_L4_TCP, 706 RTE_PTYPE_INNER_L4_UDP, 707 RTE_PTYPE_UNKNOWN 708 }; 709 710 if (dev->rx_pkt_burst == mlx5_rx_burst || 711 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 712 dev->rx_pkt_burst == mlx5_rx_burst_vec) 713 return ptypes; 714 return NULL; 715 } 716 717 /** 718 * Retrieve the master device for representor in the same switch domain. 719 * 720 * @param dev 721 * Pointer to representor Ethernet device structure. 722 * 723 * @return 724 * Master device structure on success, NULL otherwise. 725 */ 726 727 static struct rte_eth_dev * 728 mlx5_find_master_dev(struct rte_eth_dev *dev) 729 { 730 struct mlx5_priv *priv; 731 uint16_t port_id; 732 uint16_t domain_id; 733 734 priv = dev->data->dev_private; 735 domain_id = priv->domain_id; 736 assert(priv->representor); 737 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 738 struct mlx5_priv *opriv = 739 rte_eth_devices[port_id].data->dev_private; 740 if (opriv && 741 opriv->master && 742 opriv->domain_id == domain_id && 743 opriv->sh == priv->sh) 744 return &rte_eth_devices[port_id]; 745 } 746 return NULL; 747 } 748 749 /** 750 * DPDK callback to retrieve physical link information. 751 * 752 * @param dev 753 * Pointer to Ethernet device structure. 754 * @param[out] link 755 * Storage for current link status. 756 * 757 * @return 758 * 0 on success, a negative errno value otherwise and rte_errno is set. 759 */ 760 static int 761 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 762 struct rte_eth_link *link) 763 { 764 struct mlx5_priv *priv = dev->data->dev_private; 765 struct ethtool_cmd edata = { 766 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 767 }; 768 struct ifreq ifr; 769 struct rte_eth_link dev_link; 770 int link_speed = 0; 771 int ret; 772 773 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 774 if (ret) { 775 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 776 dev->data->port_id, strerror(rte_errno)); 777 return ret; 778 } 779 dev_link = (struct rte_eth_link) { 780 .link_status = ((ifr.ifr_flags & IFF_UP) && 781 (ifr.ifr_flags & IFF_RUNNING)), 782 }; 783 ifr = (struct ifreq) { 784 .ifr_data = (void *)&edata, 785 }; 786 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 787 if (ret) { 788 if (ret == -ENOTSUP && priv->representor) { 789 struct rte_eth_dev *master; 790 791 /* 792 * For representors we can try to inherit link 793 * settings from the master device. Actually 794 * link settings do not make a lot of sense 795 * for representors due to missing physical 796 * link. The old kernel drivers supported 797 * emulated settings query for representors, 798 * the new ones do not, so we have to add 799 * this code for compatibility issues. 800 */ 801 master = mlx5_find_master_dev(dev); 802 if (master) { 803 ifr = (struct ifreq) { 804 .ifr_data = (void *)&edata, 805 }; 806 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 807 } 808 } 809 if (ret) { 810 DRV_LOG(WARNING, 811 "port %u ioctl(SIOCETHTOOL," 812 " ETHTOOL_GSET) failed: %s", 813 dev->data->port_id, strerror(rte_errno)); 814 return ret; 815 } 816 } 817 link_speed = ethtool_cmd_speed(&edata); 818 if (link_speed == -1) 819 dev_link.link_speed = ETH_SPEED_NUM_NONE; 820 else 821 dev_link.link_speed = link_speed; 822 priv->link_speed_capa = 0; 823 if (edata.supported & SUPPORTED_Autoneg) 824 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 825 if (edata.supported & (SUPPORTED_1000baseT_Full | 826 SUPPORTED_1000baseKX_Full)) 827 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 828 if (edata.supported & SUPPORTED_10000baseKR_Full) 829 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 830 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 831 SUPPORTED_40000baseCR4_Full | 832 SUPPORTED_40000baseSR4_Full | 833 SUPPORTED_40000baseLR4_Full)) 834 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 835 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 836 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 837 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 838 ETH_LINK_SPEED_FIXED); 839 if (((dev_link.link_speed && !dev_link.link_status) || 840 (!dev_link.link_speed && dev_link.link_status))) { 841 rte_errno = EAGAIN; 842 return -rte_errno; 843 } 844 *link = dev_link; 845 return 0; 846 } 847 848 /** 849 * Retrieve physical link information (unlocked version using new ioctl). 850 * 851 * @param dev 852 * Pointer to Ethernet device structure. 853 * @param[out] link 854 * Storage for current link status. 855 * 856 * @return 857 * 0 on success, a negative errno value otherwise and rte_errno is set. 858 */ 859 static int 860 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 861 struct rte_eth_link *link) 862 863 { 864 struct mlx5_priv *priv = dev->data->dev_private; 865 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 866 struct ifreq ifr; 867 struct rte_eth_link dev_link; 868 struct rte_eth_dev *master = NULL; 869 uint64_t sc; 870 int ret; 871 872 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 873 if (ret) { 874 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 875 dev->data->port_id, strerror(rte_errno)); 876 return ret; 877 } 878 dev_link = (struct rte_eth_link) { 879 .link_status = ((ifr.ifr_flags & IFF_UP) && 880 (ifr.ifr_flags & IFF_RUNNING)), 881 }; 882 ifr = (struct ifreq) { 883 .ifr_data = (void *)&gcmd, 884 }; 885 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 886 if (ret) { 887 if (ret == -ENOTSUP && priv->representor) { 888 /* 889 * For representors we can try to inherit link 890 * settings from the master device. Actually 891 * link settings do not make a lot of sense 892 * for representors due to missing physical 893 * link. The old kernel drivers supported 894 * emulated settings query for representors, 895 * the new ones do not, so we have to add 896 * this code for compatibility issues. 897 */ 898 master = mlx5_find_master_dev(dev); 899 if (master) { 900 ifr = (struct ifreq) { 901 .ifr_data = (void *)&gcmd, 902 }; 903 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 904 } 905 } 906 if (ret) { 907 DRV_LOG(DEBUG, 908 "port %u ioctl(SIOCETHTOOL," 909 " ETHTOOL_GLINKSETTINGS) failed: %s", 910 dev->data->port_id, strerror(rte_errno)); 911 return ret; 912 } 913 914 } 915 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 916 917 alignas(struct ethtool_link_settings) 918 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 919 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 920 struct ethtool_link_settings *ecmd = (void *)data; 921 922 *ecmd = gcmd; 923 ifr.ifr_data = (void *)ecmd; 924 ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr); 925 if (ret) { 926 DRV_LOG(DEBUG, 927 "port %u ioctl(SIOCETHTOOL," 928 "ETHTOOL_GLINKSETTINGS) failed: %s", 929 dev->data->port_id, strerror(rte_errno)); 930 return ret; 931 } 932 dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE : 933 ecmd->speed; 934 sc = ecmd->link_mode_masks[0] | 935 ((uint64_t)ecmd->link_mode_masks[1] << 32); 936 priv->link_speed_capa = 0; 937 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 938 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 939 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 940 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 941 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 942 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 943 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 944 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 945 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 946 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 947 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 948 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 949 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 950 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 951 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 952 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 953 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 954 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 955 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 956 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 957 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 958 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 959 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 960 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 961 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 962 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 963 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 964 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 965 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 966 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 967 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 968 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 969 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 970 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 971 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 972 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 973 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 974 ETH_LINK_SPEED_FIXED); 975 if (((dev_link.link_speed && !dev_link.link_status) || 976 (!dev_link.link_speed && dev_link.link_status))) { 977 rte_errno = EAGAIN; 978 return -rte_errno; 979 } 980 *link = dev_link; 981 return 0; 982 } 983 984 /** 985 * DPDK callback to retrieve physical link information. 986 * 987 * @param dev 988 * Pointer to Ethernet device structure. 989 * @param wait_to_complete 990 * Wait for request completion. 991 * 992 * @return 993 * 0 if link status was not updated, positive if it was, a negative errno 994 * value otherwise and rte_errno is set. 995 */ 996 int 997 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 998 { 999 int ret; 1000 struct rte_eth_link dev_link; 1001 time_t start_time = time(NULL); 1002 int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT; 1003 1004 do { 1005 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 1006 if (ret == -ENOTSUP) 1007 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 1008 if (ret == 0) 1009 break; 1010 /* Handle wait to complete situation. */ 1011 if ((wait_to_complete || retry) && ret == -EAGAIN) { 1012 if (abs((int)difftime(time(NULL), start_time)) < 1013 MLX5_LINK_STATUS_TIMEOUT) { 1014 usleep(0); 1015 continue; 1016 } else { 1017 rte_errno = EBUSY; 1018 return -rte_errno; 1019 } 1020 } else if (ret < 0) { 1021 return ret; 1022 } 1023 } while (wait_to_complete || retry-- > 0); 1024 ret = !!memcmp(&dev->data->dev_link, &dev_link, 1025 sizeof(struct rte_eth_link)); 1026 dev->data->dev_link = dev_link; 1027 return ret; 1028 } 1029 1030 /** 1031 * DPDK callback to change the MTU. 1032 * 1033 * @param dev 1034 * Pointer to Ethernet device structure. 1035 * @param in_mtu 1036 * New MTU. 1037 * 1038 * @return 1039 * 0 on success, a negative errno value otherwise and rte_errno is set. 1040 */ 1041 int 1042 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 1043 { 1044 struct mlx5_priv *priv = dev->data->dev_private; 1045 uint16_t kern_mtu = 0; 1046 int ret; 1047 1048 ret = mlx5_get_mtu(dev, &kern_mtu); 1049 if (ret) 1050 return ret; 1051 /* Set kernel interface MTU first. */ 1052 ret = mlx5_set_mtu(dev, mtu); 1053 if (ret) 1054 return ret; 1055 ret = mlx5_get_mtu(dev, &kern_mtu); 1056 if (ret) 1057 return ret; 1058 if (kern_mtu == mtu) { 1059 priv->mtu = mtu; 1060 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 1061 dev->data->port_id, mtu); 1062 return 0; 1063 } 1064 rte_errno = EAGAIN; 1065 return -rte_errno; 1066 } 1067 1068 /** 1069 * DPDK callback to get flow control status. 1070 * 1071 * @param dev 1072 * Pointer to Ethernet device structure. 1073 * @param[out] fc_conf 1074 * Flow control output buffer. 1075 * 1076 * @return 1077 * 0 on success, a negative errno value otherwise and rte_errno is set. 1078 */ 1079 int 1080 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1081 { 1082 struct ifreq ifr; 1083 struct ethtool_pauseparam ethpause = { 1084 .cmd = ETHTOOL_GPAUSEPARAM 1085 }; 1086 int ret; 1087 1088 ifr.ifr_data = (void *)ðpause; 1089 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1090 if (ret) { 1091 DRV_LOG(WARNING, 1092 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 1093 " %s", 1094 dev->data->port_id, strerror(rte_errno)); 1095 return ret; 1096 } 1097 fc_conf->autoneg = ethpause.autoneg; 1098 if (ethpause.rx_pause && ethpause.tx_pause) 1099 fc_conf->mode = RTE_FC_FULL; 1100 else if (ethpause.rx_pause) 1101 fc_conf->mode = RTE_FC_RX_PAUSE; 1102 else if (ethpause.tx_pause) 1103 fc_conf->mode = RTE_FC_TX_PAUSE; 1104 else 1105 fc_conf->mode = RTE_FC_NONE; 1106 return 0; 1107 } 1108 1109 /** 1110 * DPDK callback to modify flow control parameters. 1111 * 1112 * @param dev 1113 * Pointer to Ethernet device structure. 1114 * @param[in] fc_conf 1115 * Flow control parameters. 1116 * 1117 * @return 1118 * 0 on success, a negative errno value otherwise and rte_errno is set. 1119 */ 1120 int 1121 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1122 { 1123 struct ifreq ifr; 1124 struct ethtool_pauseparam ethpause = { 1125 .cmd = ETHTOOL_SPAUSEPARAM 1126 }; 1127 int ret; 1128 1129 ifr.ifr_data = (void *)ðpause; 1130 ethpause.autoneg = fc_conf->autoneg; 1131 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1132 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1133 ethpause.rx_pause = 1; 1134 else 1135 ethpause.rx_pause = 0; 1136 1137 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1138 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1139 ethpause.tx_pause = 1; 1140 else 1141 ethpause.tx_pause = 0; 1142 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1143 if (ret) { 1144 DRV_LOG(WARNING, 1145 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1146 " failed: %s", 1147 dev->data->port_id, strerror(rte_errno)); 1148 return ret; 1149 } 1150 return 0; 1151 } 1152 1153 /** 1154 * Get PCI information by sysfs device path. 1155 * 1156 * @param dev_path 1157 * Pointer to device sysfs folder name. 1158 * @param[out] pci_addr 1159 * PCI bus address output buffer. 1160 * 1161 * @return 1162 * 0 on success, a negative errno value otherwise and rte_errno is set. 1163 */ 1164 int 1165 mlx5_dev_to_pci_addr(const char *dev_path, 1166 struct rte_pci_addr *pci_addr) 1167 { 1168 FILE *file; 1169 char line[32]; 1170 MKSTR(path, "%s/device/uevent", dev_path); 1171 1172 file = fopen(path, "rb"); 1173 if (file == NULL) { 1174 rte_errno = errno; 1175 return -rte_errno; 1176 } 1177 while (fgets(line, sizeof(line), file) == line) { 1178 size_t len = strlen(line); 1179 int ret; 1180 1181 /* Truncate long lines. */ 1182 if (len == (sizeof(line) - 1)) 1183 while (line[(len - 1)] != '\n') { 1184 ret = fgetc(file); 1185 if (ret == EOF) 1186 break; 1187 line[(len - 1)] = ret; 1188 } 1189 /* Extract information. */ 1190 if (sscanf(line, 1191 "PCI_SLOT_NAME=" 1192 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1193 &pci_addr->domain, 1194 &pci_addr->bus, 1195 &pci_addr->devid, 1196 &pci_addr->function) == 4) { 1197 ret = 0; 1198 break; 1199 } 1200 } 1201 fclose(file); 1202 return 0; 1203 } 1204 1205 /** 1206 * Handle asynchronous removal event for entire multiport device. 1207 * 1208 * @param sh 1209 * Infiniband device shared context. 1210 */ 1211 static void 1212 mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) 1213 { 1214 uint32_t i; 1215 1216 for (i = 0; i < sh->max_port; ++i) { 1217 struct rte_eth_dev *dev; 1218 1219 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { 1220 /* 1221 * Or not existing port either no 1222 * handler installed for this port. 1223 */ 1224 continue; 1225 } 1226 dev = &rte_eth_devices[sh->port[i].ih_port_id]; 1227 assert(dev); 1228 if (dev->data->dev_conf.intr_conf.rmv) 1229 _rte_eth_dev_callback_process 1230 (dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1231 } 1232 } 1233 1234 /** 1235 * Handle shared asynchronous events the NIC (removal event 1236 * and link status change). Supports multiport IB device. 1237 * 1238 * @param cb_arg 1239 * Callback argument. 1240 */ 1241 void 1242 mlx5_dev_interrupt_handler(void *cb_arg) 1243 { 1244 struct mlx5_ibv_shared *sh = cb_arg; 1245 struct ibv_async_event event; 1246 1247 /* Read all message from the IB device and acknowledge them. */ 1248 for (;;) { 1249 struct rte_eth_dev *dev; 1250 uint32_t tmp; 1251 1252 if (mlx5_glue->get_async_event(sh->ctx, &event)) 1253 break; 1254 /* Retrieve and check IB port index. */ 1255 tmp = (uint32_t)event.element.port_num; 1256 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) { 1257 /* 1258 * The DEVICE_FATAL event is called once for 1259 * entire device without port specifying. 1260 * We should notify all existing ports. 1261 */ 1262 mlx5_glue->ack_async_event(&event); 1263 mlx5_dev_interrupt_device_fatal(sh); 1264 continue; 1265 } 1266 assert(tmp && (tmp <= sh->max_port)); 1267 if (!tmp) { 1268 /* Unsupported devive level event. */ 1269 mlx5_glue->ack_async_event(&event); 1270 DRV_LOG(DEBUG, 1271 "unsupported common event (type %d)", 1272 event.event_type); 1273 continue; 1274 } 1275 if (tmp > sh->max_port) { 1276 /* Invalid IB port index. */ 1277 mlx5_glue->ack_async_event(&event); 1278 DRV_LOG(DEBUG, 1279 "cannot handle an event (type %d)" 1280 "due to invalid IB port index (%u)", 1281 event.event_type, tmp); 1282 continue; 1283 } 1284 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { 1285 /* No handler installed. */ 1286 mlx5_glue->ack_async_event(&event); 1287 DRV_LOG(DEBUG, 1288 "cannot handle an event (type %d)" 1289 "due to no handler installed for port %u", 1290 event.event_type, tmp); 1291 continue; 1292 } 1293 /* Retrieve ethernet device descriptor. */ 1294 tmp = sh->port[tmp - 1].ih_port_id; 1295 dev = &rte_eth_devices[tmp]; 1296 assert(dev); 1297 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1298 event.event_type == IBV_EVENT_PORT_ERR) && 1299 dev->data->dev_conf.intr_conf.lsc) { 1300 mlx5_glue->ack_async_event(&event); 1301 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1302 usleep(0); 1303 continue; 1304 } 1305 _rte_eth_dev_callback_process 1306 (dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1307 continue; 1308 } 1309 DRV_LOG(DEBUG, 1310 "port %u cannot handle an unknown event (type %d)", 1311 dev->data->port_id, event.event_type); 1312 mlx5_glue->ack_async_event(&event); 1313 } 1314 } 1315 1316 /* 1317 * Unregister callback handler safely. The handler may be active 1318 * while we are trying to unregister it, in this case code -EAGAIN 1319 * is returned by rte_intr_callback_unregister(). This routine checks 1320 * the return code and tries to unregister handler again. 1321 * 1322 * @param handle 1323 * interrupt handle 1324 * @param cb_fn 1325 * pointer to callback routine 1326 * @cb_arg 1327 * opaque callback parameter 1328 */ 1329 void 1330 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle, 1331 rte_intr_callback_fn cb_fn, void *cb_arg) 1332 { 1333 /* 1334 * Try to reduce timeout management overhead by not calling 1335 * the timer related routines on the first iteration. If the 1336 * unregistering succeeds on first call there will be no 1337 * timer calls at all. 1338 */ 1339 uint64_t twait = 0; 1340 uint64_t start = 0; 1341 1342 do { 1343 int ret; 1344 1345 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg); 1346 if (ret >= 0) 1347 return; 1348 if (ret != -EAGAIN) { 1349 DRV_LOG(INFO, "failed to unregister interrupt" 1350 " handler (error: %d)", ret); 1351 assert(false); 1352 return; 1353 } 1354 if (twait) { 1355 struct timespec onems; 1356 1357 /* Wait one millisecond and try again. */ 1358 onems.tv_sec = 0; 1359 onems.tv_nsec = NS_PER_S / MS_PER_S; 1360 nanosleep(&onems, 0); 1361 /* Check whether one second elapsed. */ 1362 if ((rte_get_timer_cycles() - start) <= twait) 1363 continue; 1364 } else { 1365 /* 1366 * We get the amount of timer ticks for one second. 1367 * If this amount elapsed it means we spent one 1368 * second in waiting. This branch is executed once 1369 * on first iteration. 1370 */ 1371 twait = rte_get_timer_hz(); 1372 assert(twait); 1373 } 1374 /* 1375 * Timeout elapsed, show message (once a second) and retry. 1376 * We have no other acceptable option here, if we ignore 1377 * the unregistering return code the handler will not 1378 * be unregistered, fd will be closed and we may get the 1379 * crush. Hanging and messaging in the loop seems not to be 1380 * the worst choice. 1381 */ 1382 DRV_LOG(INFO, "Retrying to unregister interrupt handler"); 1383 start = rte_get_timer_cycles(); 1384 } while (true); 1385 } 1386 1387 /** 1388 * Handle DEVX interrupts from the NIC. 1389 * This function is probably called from the DPDK host thread. 1390 * 1391 * @param cb_arg 1392 * Callback argument. 1393 */ 1394 void 1395 mlx5_dev_interrupt_handler_devx(void *cb_arg) 1396 { 1397 #ifndef HAVE_IBV_DEVX_ASYNC 1398 (void)cb_arg; 1399 return; 1400 #else 1401 struct mlx5_ibv_shared *sh = cb_arg; 1402 union { 1403 struct mlx5dv_devx_async_cmd_hdr cmd_resp; 1404 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) + 1405 MLX5_ST_SZ_BYTES(traffic_counter) + 1406 sizeof(struct mlx5dv_devx_async_cmd_hdr)]; 1407 } out; 1408 uint8_t *buf = out.buf + sizeof(out.cmd_resp); 1409 1410 while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp, 1411 &out.cmd_resp, 1412 sizeof(out.buf))) 1413 mlx5_flow_async_pool_query_handle 1414 (sh, (uint64_t)out.cmd_resp.wr_id, 1415 mlx5_devx_get_out_command_status(buf)); 1416 #endif /* HAVE_IBV_DEVX_ASYNC */ 1417 } 1418 1419 /** 1420 * Uninstall shared asynchronous device events handler. 1421 * This function is implemented to support event sharing 1422 * between multiple ports of single IB device. 1423 * 1424 * @param dev 1425 * Pointer to Ethernet device. 1426 */ 1427 static void 1428 mlx5_dev_shared_handler_uninstall(struct rte_eth_dev *dev) 1429 { 1430 struct mlx5_priv *priv = dev->data->dev_private; 1431 struct mlx5_ibv_shared *sh = priv->sh; 1432 1433 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1434 return; 1435 pthread_mutex_lock(&sh->intr_mutex); 1436 assert(priv->ibv_port); 1437 assert(priv->ibv_port <= sh->max_port); 1438 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1439 if (sh->port[priv->ibv_port - 1].ih_port_id >= RTE_MAX_ETHPORTS) 1440 goto exit; 1441 assert(sh->port[priv->ibv_port - 1].ih_port_id == 1442 (uint32_t)dev->data->port_id); 1443 assert(sh->intr_cnt); 1444 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1445 if (!sh->intr_cnt || --sh->intr_cnt) 1446 goto exit; 1447 mlx5_intr_callback_unregister(&sh->intr_handle, 1448 mlx5_dev_interrupt_handler, sh); 1449 sh->intr_handle.fd = 0; 1450 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1451 if (sh->intr_handle_devx.fd) { 1452 rte_intr_callback_unregister(&sh->intr_handle_devx, 1453 mlx5_dev_interrupt_handler_devx, 1454 sh); 1455 sh->intr_handle_devx.fd = 0; 1456 sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN; 1457 } 1458 if (sh->devx_comp) { 1459 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp); 1460 sh->devx_comp = NULL; 1461 } 1462 exit: 1463 pthread_mutex_unlock(&sh->intr_mutex); 1464 } 1465 1466 /** 1467 * Install shared asynchronous device events handler. 1468 * This function is implemented to support event sharing 1469 * between multiple ports of single IB device. 1470 * 1471 * @param dev 1472 * Pointer to Ethernet device. 1473 */ 1474 static void 1475 mlx5_dev_shared_handler_install(struct rte_eth_dev *dev) 1476 { 1477 struct mlx5_priv *priv = dev->data->dev_private; 1478 struct mlx5_ibv_shared *sh = priv->sh; 1479 int ret; 1480 int flags; 1481 1482 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1483 return; 1484 pthread_mutex_lock(&sh->intr_mutex); 1485 assert(priv->ibv_port); 1486 assert(priv->ibv_port <= sh->max_port); 1487 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1488 if (sh->port[priv->ibv_port - 1].ih_port_id < RTE_MAX_ETHPORTS) { 1489 /* The handler is already installed for this port. */ 1490 assert(sh->intr_cnt); 1491 goto exit; 1492 } 1493 sh->port[priv->ibv_port - 1].ih_port_id = (uint32_t)dev->data->port_id; 1494 if (sh->intr_cnt) { 1495 sh->intr_cnt++; 1496 goto exit; 1497 } 1498 /* No shared handler installed. */ 1499 assert(sh->ctx->async_fd > 0); 1500 flags = fcntl(sh->ctx->async_fd, F_GETFL); 1501 ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1502 if (ret) { 1503 DRV_LOG(INFO, "failed to change file descriptor" 1504 " async event queue"); 1505 goto error; 1506 } 1507 sh->intr_handle.fd = sh->ctx->async_fd; 1508 sh->intr_handle.type = RTE_INTR_HANDLE_EXT; 1509 rte_intr_callback_register(&sh->intr_handle, 1510 mlx5_dev_interrupt_handler, sh); 1511 if (priv->config.devx) { 1512 #ifndef HAVE_IBV_DEVX_ASYNC 1513 goto error_unregister; 1514 #else 1515 sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx); 1516 if (sh->devx_comp) { 1517 flags = fcntl(sh->devx_comp->fd, F_GETFL); 1518 ret = fcntl(sh->devx_comp->fd, F_SETFL, 1519 flags | O_NONBLOCK); 1520 if (ret) { 1521 DRV_LOG(INFO, "failed to change file descriptor" 1522 " devx async event queue"); 1523 goto error_unregister; 1524 } 1525 sh->intr_handle_devx.fd = sh->devx_comp->fd; 1526 sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT; 1527 rte_intr_callback_register 1528 (&sh->intr_handle_devx, 1529 mlx5_dev_interrupt_handler_devx, sh); 1530 } else { 1531 DRV_LOG(INFO, "failed to create devx async command " 1532 "completion"); 1533 goto error_unregister; 1534 } 1535 #endif /* HAVE_IBV_DEVX_ASYNC */ 1536 } 1537 sh->intr_cnt++; 1538 goto exit; 1539 error_unregister: 1540 rte_intr_callback_unregister(&sh->intr_handle, 1541 mlx5_dev_interrupt_handler, sh); 1542 error: 1543 /* Indicate there will be no interrupts. */ 1544 dev->data->dev_conf.intr_conf.lsc = 0; 1545 dev->data->dev_conf.intr_conf.rmv = 0; 1546 sh->intr_handle.fd = 0; 1547 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1548 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1549 exit: 1550 pthread_mutex_unlock(&sh->intr_mutex); 1551 } 1552 1553 /** 1554 * Uninstall interrupt handler. 1555 * 1556 * @param dev 1557 * Pointer to Ethernet device. 1558 */ 1559 void 1560 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1561 { 1562 mlx5_dev_shared_handler_uninstall(dev); 1563 } 1564 1565 /** 1566 * Install interrupt handler. 1567 * 1568 * @param dev 1569 * Pointer to Ethernet device. 1570 */ 1571 void 1572 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1573 { 1574 mlx5_dev_shared_handler_install(dev); 1575 } 1576 1577 /** 1578 * DPDK callback to bring the link DOWN. 1579 * 1580 * @param dev 1581 * Pointer to Ethernet device structure. 1582 * 1583 * @return 1584 * 0 on success, a negative errno value otherwise and rte_errno is set. 1585 */ 1586 int 1587 mlx5_set_link_down(struct rte_eth_dev *dev) 1588 { 1589 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1590 } 1591 1592 /** 1593 * DPDK callback to bring the link UP. 1594 * 1595 * @param dev 1596 * Pointer to Ethernet device structure. 1597 * 1598 * @return 1599 * 0 on success, a negative errno value otherwise and rte_errno is set. 1600 */ 1601 int 1602 mlx5_set_link_up(struct rte_eth_dev *dev) 1603 { 1604 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1605 } 1606 1607 /** 1608 * Configure the RX function to use. 1609 * 1610 * @param dev 1611 * Pointer to private data structure. 1612 * 1613 * @return 1614 * Pointer to selected Rx burst function. 1615 */ 1616 eth_rx_burst_t 1617 mlx5_select_rx_function(struct rte_eth_dev *dev) 1618 { 1619 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1620 1621 assert(dev != NULL); 1622 if (mlx5_check_vec_rx_support(dev) > 0) { 1623 rx_pkt_burst = mlx5_rx_burst_vec; 1624 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1625 dev->data->port_id); 1626 } else if (mlx5_mprq_enabled(dev)) { 1627 rx_pkt_burst = mlx5_rx_burst_mprq; 1628 } 1629 return rx_pkt_burst; 1630 } 1631 1632 /** 1633 * Check if mlx5 device was removed. 1634 * 1635 * @param dev 1636 * Pointer to Ethernet device structure. 1637 * 1638 * @return 1639 * 1 when device is removed, otherwise 0. 1640 */ 1641 int 1642 mlx5_is_removed(struct rte_eth_dev *dev) 1643 { 1644 struct ibv_device_attr device_attr; 1645 struct mlx5_priv *priv = dev->data->dev_private; 1646 1647 if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO) 1648 return 1; 1649 return 0; 1650 } 1651 1652 /** 1653 * Get the E-Switch parameters by port id. 1654 * 1655 * @param[in] port 1656 * Device port id. 1657 * @param[out] es_domain_id 1658 * E-Switch domain id. 1659 * @param[out] es_port_id 1660 * The port id of the port in the E-Switch. 1661 * 1662 * @return 1663 * pointer to device private data structure containing data needed 1664 * on success, NULL otherwise and rte_errno is set. 1665 */ 1666 struct mlx5_priv * 1667 mlx5_port_to_eswitch_info(uint16_t port) 1668 { 1669 struct rte_eth_dev *dev; 1670 struct mlx5_priv *priv; 1671 1672 if (port >= RTE_MAX_ETHPORTS) { 1673 rte_errno = EINVAL; 1674 return NULL; 1675 } 1676 if (!rte_eth_dev_is_valid_port(port)) { 1677 rte_errno = ENODEV; 1678 return NULL; 1679 } 1680 dev = &rte_eth_devices[port]; 1681 priv = dev->data->dev_private; 1682 if (!(priv->representor || priv->master)) { 1683 rte_errno = EINVAL; 1684 return NULL; 1685 } 1686 return priv; 1687 } 1688 1689 /** 1690 * Get the E-Switch parameters by device instance. 1691 * 1692 * @param[in] port 1693 * Device port id. 1694 * @param[out] es_domain_id 1695 * E-Switch domain id. 1696 * @param[out] es_port_id 1697 * The port id of the port in the E-Switch. 1698 * 1699 * @return 1700 * pointer to device private data structure containing data needed 1701 * on success, NULL otherwise and rte_errno is set. 1702 */ 1703 struct mlx5_priv * 1704 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 1705 { 1706 struct mlx5_priv *priv; 1707 1708 priv = dev->data->dev_private; 1709 if (!(priv->representor || priv->master)) { 1710 rte_errno = EINVAL; 1711 return NULL; 1712 } 1713 return priv; 1714 } 1715 1716 /** 1717 * Get switch information associated with network interface. 1718 * 1719 * @param ifindex 1720 * Network interface index. 1721 * @param[out] info 1722 * Switch information object, populated in case of success. 1723 * 1724 * @return 1725 * 0 on success, a negative errno value otherwise and rte_errno is set. 1726 */ 1727 int 1728 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1729 { 1730 char ifname[IF_NAMESIZE]; 1731 char port_name[IF_NAMESIZE]; 1732 FILE *file; 1733 struct mlx5_switch_info data = { 1734 .master = 0, 1735 .representor = 0, 1736 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1737 .port_name = 0, 1738 .switch_id = 0, 1739 }; 1740 DIR *dir; 1741 bool port_switch_id_set = false; 1742 bool device_dir = false; 1743 char c; 1744 int ret; 1745 1746 if (!if_indextoname(ifindex, ifname)) { 1747 rte_errno = errno; 1748 return -rte_errno; 1749 } 1750 1751 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1752 ifname); 1753 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1754 ifname); 1755 MKSTR(pci_device, "/sys/class/net/%s/device", 1756 ifname); 1757 1758 file = fopen(phys_port_name, "rb"); 1759 if (file != NULL) { 1760 ret = fscanf(file, "%s", port_name); 1761 fclose(file); 1762 if (ret == 1) 1763 mlx5_translate_port_name(port_name, &data); 1764 } 1765 file = fopen(phys_switch_id, "rb"); 1766 if (file == NULL) { 1767 rte_errno = errno; 1768 return -rte_errno; 1769 } 1770 port_switch_id_set = 1771 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1772 c == '\n'; 1773 fclose(file); 1774 dir = opendir(pci_device); 1775 if (dir != NULL) { 1776 closedir(dir); 1777 device_dir = true; 1778 } 1779 if (port_switch_id_set) { 1780 /* We have some E-Switch configuration. */ 1781 mlx5_sysfs_check_switch_info(device_dir, &data); 1782 } 1783 *info = data; 1784 assert(!(data.master && data.representor)); 1785 if (data.master && data.representor) { 1786 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1787 " and as representor", ifindex); 1788 rte_errno = ENODEV; 1789 return -rte_errno; 1790 } 1791 return 0; 1792 } 1793 1794 /** 1795 * Analyze gathered port parameters via Netlink to recognize master 1796 * and representor devices for E-Switch configuration. 1797 * 1798 * @param[in] num_vf_set 1799 * flag of presence of number of VFs port attribute. 1800 * @param[inout] switch_info 1801 * Port information, including port name as a number and port name 1802 * type if recognized 1803 * 1804 * @return 1805 * master and representor flags are set in switch_info according to 1806 * recognized parameters (if any). 1807 */ 1808 void 1809 mlx5_nl_check_switch_info(bool num_vf_set, 1810 struct mlx5_switch_info *switch_info) 1811 { 1812 switch (switch_info->name_type) { 1813 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1814 /* 1815 * Name is not recognized, assume the master, 1816 * check the number of VFs key presence. 1817 */ 1818 switch_info->master = num_vf_set; 1819 break; 1820 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1821 /* 1822 * Name is not set, this assumes the legacy naming 1823 * schema for master, just check if there is a 1824 * number of VFs key. 1825 */ 1826 switch_info->master = num_vf_set; 1827 break; 1828 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1829 /* New uplink naming schema recognized. */ 1830 switch_info->master = 1; 1831 break; 1832 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1833 /* Legacy representors naming schema. */ 1834 switch_info->representor = !num_vf_set; 1835 break; 1836 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1837 /* New representors naming schema. */ 1838 switch_info->representor = 1; 1839 break; 1840 } 1841 } 1842 1843 /** 1844 * Analyze gathered port parameters via sysfs to recognize master 1845 * and representor devices for E-Switch configuration. 1846 * 1847 * @param[in] device_dir 1848 * flag of presence of "device" directory under port device key. 1849 * @param[inout] switch_info 1850 * Port information, including port name as a number and port name 1851 * type if recognized 1852 * 1853 * @return 1854 * master and representor flags are set in switch_info according to 1855 * recognized parameters (if any). 1856 */ 1857 void 1858 mlx5_sysfs_check_switch_info(bool device_dir, 1859 struct mlx5_switch_info *switch_info) 1860 { 1861 switch (switch_info->name_type) { 1862 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1863 /* 1864 * Name is not recognized, assume the master, 1865 * check the device directory presence. 1866 */ 1867 switch_info->master = device_dir; 1868 break; 1869 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1870 /* 1871 * Name is not set, this assumes the legacy naming 1872 * schema for master, just check if there is 1873 * a device directory. 1874 */ 1875 switch_info->master = device_dir; 1876 break; 1877 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1878 /* New uplink naming schema recognized. */ 1879 switch_info->master = 1; 1880 break; 1881 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1882 /* Legacy representors naming schema. */ 1883 switch_info->representor = !device_dir; 1884 break; 1885 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1886 /* New representors naming schema. */ 1887 switch_info->representor = 1; 1888 break; 1889 } 1890 } 1891 1892 /** 1893 * Extract port name, as a number, from sysfs or netlink information. 1894 * 1895 * @param[in] port_name_in 1896 * String representing the port name. 1897 * @param[out] port_info_out 1898 * Port information, including port name as a number and port name 1899 * type if recognized 1900 * 1901 * @return 1902 * port_name field set according to recognized name format. 1903 */ 1904 void 1905 mlx5_translate_port_name(const char *port_name_in, 1906 struct mlx5_switch_info *port_info_out) 1907 { 1908 char pf_c1, pf_c2, vf_c1, vf_c2; 1909 char *end; 1910 int sc_items; 1911 1912 /* 1913 * Check for port-name as a string of the form pf0vf0 1914 * (support kernel ver >= 5.0 or OFED ver >= 4.6). 1915 */ 1916 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", 1917 &pf_c1, &pf_c2, &port_info_out->pf_num, 1918 &vf_c1, &vf_c2, &port_info_out->port_name); 1919 if (sc_items == 6 && 1920 pf_c1 == 'p' && pf_c2 == 'f' && 1921 vf_c1 == 'v' && vf_c2 == 'f') { 1922 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; 1923 return; 1924 } 1925 /* 1926 * Check for port-name as a string of the form p0 1927 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 1928 */ 1929 sc_items = sscanf(port_name_in, "%c%d", 1930 &pf_c1, &port_info_out->port_name); 1931 if (sc_items == 2 && pf_c1 == 'p') { 1932 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 1933 return; 1934 } 1935 /* Check for port-name as a number (support kernel ver < 5.0 */ 1936 errno = 0; 1937 port_info_out->port_name = strtol(port_name_in, &end, 0); 1938 if (!errno && 1939 (size_t)(end - port_name_in) == strlen(port_name_in)) { 1940 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 1941 return; 1942 } 1943 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 1944 return; 1945 } 1946 1947 /** 1948 * DPDK callback to retrieve plug-in module EEPROM information (type and size). 1949 * 1950 * @param dev 1951 * Pointer to Ethernet device structure. 1952 * @param[out] modinfo 1953 * Storage for plug-in module EEPROM information. 1954 * 1955 * @return 1956 * 0 on success, a negative errno value otherwise and rte_errno is set. 1957 */ 1958 int 1959 mlx5_get_module_info(struct rte_eth_dev *dev, 1960 struct rte_eth_dev_module_info *modinfo) 1961 { 1962 struct ethtool_modinfo info = { 1963 .cmd = ETHTOOL_GMODULEINFO, 1964 }; 1965 struct ifreq ifr = (struct ifreq) { 1966 .ifr_data = (void *)&info, 1967 }; 1968 int ret = 0; 1969 1970 if (!dev || !modinfo) { 1971 DRV_LOG(WARNING, "missing argument, cannot get module info"); 1972 rte_errno = EINVAL; 1973 return -rte_errno; 1974 } 1975 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1976 if (ret) { 1977 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 1978 dev->data->port_id, strerror(rte_errno)); 1979 return ret; 1980 } 1981 modinfo->type = info.type; 1982 modinfo->eeprom_len = info.eeprom_len; 1983 return ret; 1984 } 1985 1986 /** 1987 * DPDK callback to retrieve plug-in module EEPROM data. 1988 * 1989 * @param dev 1990 * Pointer to Ethernet device structure. 1991 * @param[out] info 1992 * Storage for plug-in module EEPROM data. 1993 * 1994 * @return 1995 * 0 on success, a negative errno value otherwise and rte_errno is set. 1996 */ 1997 int mlx5_get_module_eeprom(struct rte_eth_dev *dev, 1998 struct rte_dev_eeprom_info *info) 1999 { 2000 struct ethtool_eeprom *eeprom; 2001 struct ifreq ifr; 2002 int ret = 0; 2003 2004 if (!dev || !info) { 2005 DRV_LOG(WARNING, "missing argument, cannot get module eeprom"); 2006 rte_errno = EINVAL; 2007 return -rte_errno; 2008 } 2009 eeprom = rte_calloc(__func__, 1, 2010 (sizeof(struct ethtool_eeprom) + info->length), 0); 2011 if (!eeprom) { 2012 DRV_LOG(WARNING, "port %u cannot allocate memory for " 2013 "eeprom data", dev->data->port_id); 2014 rte_errno = ENOMEM; 2015 return -rte_errno; 2016 } 2017 eeprom->cmd = ETHTOOL_GMODULEEEPROM; 2018 eeprom->offset = info->offset; 2019 eeprom->len = info->length; 2020 ifr = (struct ifreq) { 2021 .ifr_data = (void *)eeprom, 2022 }; 2023 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2024 if (ret) 2025 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 2026 dev->data->port_id, strerror(rte_errno)); 2027 else 2028 rte_memcpy(info->data, eeprom->data, info->length); 2029 rte_free(eeprom); 2030 return ret; 2031 } 2032