1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <inttypes.h> 9 #include <unistd.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <stdlib.h> 15 #include <errno.h> 16 #include <dirent.h> 17 #include <net/if.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <netinet/in.h> 21 #include <linux/ethtool.h> 22 #include <linux/sockios.h> 23 #include <fcntl.h> 24 #include <stdalign.h> 25 #include <sys/un.h> 26 #include <time.h> 27 28 #include <rte_atomic.h> 29 #include <rte_ethdev_driver.h> 30 #include <rte_bus_pci.h> 31 #include <rte_mbuf.h> 32 #include <rte_common.h> 33 #include <rte_interrupts.h> 34 #include <rte_malloc.h> 35 #include <rte_string_fns.h> 36 #include <rte_rwlock.h> 37 #include <rte_cycles.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_rxtx.h" 42 #include "mlx5_utils.h" 43 44 /* Supported speed values found in /usr/include/linux/ethtool.h */ 45 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 46 #define SUPPORTED_40000baseKR4_Full (1 << 23) 47 #endif 48 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 49 #define SUPPORTED_40000baseCR4_Full (1 << 24) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 52 #define SUPPORTED_40000baseSR4_Full (1 << 25) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 55 #define SUPPORTED_40000baseLR4_Full (1 << 26) 56 #endif 57 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 58 #define SUPPORTED_56000baseKR4_Full (1 << 27) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 61 #define SUPPORTED_56000baseCR4_Full (1 << 28) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 64 #define SUPPORTED_56000baseSR4_Full (1 << 29) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 67 #define SUPPORTED_56000baseLR4_Full (1 << 30) 68 #endif 69 70 /* Add defines in case the running kernel is not the same as user headers. */ 71 #ifndef ETHTOOL_GLINKSETTINGS 72 struct ethtool_link_settings { 73 uint32_t cmd; 74 uint32_t speed; 75 uint8_t duplex; 76 uint8_t port; 77 uint8_t phy_address; 78 uint8_t autoneg; 79 uint8_t mdio_support; 80 uint8_t eth_to_mdix; 81 uint8_t eth_tp_mdix_ctrl; 82 int8_t link_mode_masks_nwords; 83 uint32_t reserved[8]; 84 uint32_t link_mode_masks[]; 85 }; 86 87 #define ETHTOOL_GLINKSETTINGS 0x0000004c 88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 109 #endif 110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 119 #endif 120 121 /** 122 * Get master interface name from private structure. 123 * 124 * @param[in] dev 125 * Pointer to Ethernet device. 126 * @param[out] ifname 127 * Interface name output buffer. 128 * 129 * @return 130 * 0 on success, a negative errno value otherwise and rte_errno is set. 131 */ 132 int 133 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE]) 134 { 135 DIR *dir; 136 struct dirent *dent; 137 unsigned int dev_type = 0; 138 unsigned int dev_port_prev = ~0u; 139 char match[IF_NAMESIZE] = ""; 140 141 assert(ibdev_path); 142 { 143 MKSTR(path, "%s/device/net", ibdev_path); 144 145 dir = opendir(path); 146 if (dir == NULL) { 147 rte_errno = errno; 148 return -rte_errno; 149 } 150 } 151 while ((dent = readdir(dir)) != NULL) { 152 char *name = dent->d_name; 153 FILE *file; 154 unsigned int dev_port; 155 int r; 156 157 if ((name[0] == '.') && 158 ((name[1] == '\0') || 159 ((name[1] == '.') && (name[2] == '\0')))) 160 continue; 161 162 MKSTR(path, "%s/device/net/%s/%s", 163 ibdev_path, name, 164 (dev_type ? "dev_id" : "dev_port")); 165 166 file = fopen(path, "rb"); 167 if (file == NULL) { 168 if (errno != ENOENT) 169 continue; 170 /* 171 * Switch to dev_id when dev_port does not exist as 172 * is the case with Linux kernel versions < 3.15. 173 */ 174 try_dev_id: 175 match[0] = '\0'; 176 if (dev_type) 177 break; 178 dev_type = 1; 179 dev_port_prev = ~0u; 180 rewinddir(dir); 181 continue; 182 } 183 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 184 fclose(file); 185 if (r != 1) 186 continue; 187 /* 188 * Switch to dev_id when dev_port returns the same value for 189 * all ports. May happen when using a MOFED release older than 190 * 3.0 with a Linux kernel >= 3.15. 191 */ 192 if (dev_port == dev_port_prev) 193 goto try_dev_id; 194 dev_port_prev = dev_port; 195 if (dev_port == 0) 196 strlcpy(match, name, sizeof(match)); 197 } 198 closedir(dir); 199 if (match[0] == '\0') { 200 rte_errno = ENOENT; 201 return -rte_errno; 202 } 203 strncpy(*ifname, match, sizeof(*ifname)); 204 return 0; 205 } 206 207 /** 208 * Get interface name from private structure. 209 * 210 * This is a port representor-aware version of mlx5_get_master_ifname(). 211 * 212 * @param[in] dev 213 * Pointer to Ethernet device. 214 * @param[out] ifname 215 * Interface name output buffer. 216 * 217 * @return 218 * 0 on success, a negative errno value otherwise and rte_errno is set. 219 */ 220 int 221 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 222 { 223 struct mlx5_priv *priv = dev->data->dev_private; 224 unsigned int ifindex; 225 226 assert(priv); 227 assert(priv->sh); 228 ifindex = mlx5_ifindex(dev); 229 if (!ifindex) { 230 if (!priv->representor) 231 return mlx5_get_master_ifname(priv->sh->ibdev_path, 232 ifname); 233 rte_errno = ENXIO; 234 return -rte_errno; 235 } 236 if (if_indextoname(ifindex, &(*ifname)[0])) 237 return 0; 238 rte_errno = errno; 239 return -rte_errno; 240 } 241 242 /** 243 * Get the interface index from device name. 244 * 245 * @param[in] dev 246 * Pointer to Ethernet device. 247 * 248 * @return 249 * Nonzero interface index on success, zero otherwise and rte_errno is set. 250 */ 251 unsigned int 252 mlx5_ifindex(const struct rte_eth_dev *dev) 253 { 254 struct mlx5_priv *priv = dev->data->dev_private; 255 unsigned int ifindex; 256 257 assert(priv); 258 assert(priv->if_index); 259 ifindex = priv->if_index; 260 if (!ifindex) 261 rte_errno = ENXIO; 262 return ifindex; 263 } 264 265 /** 266 * Perform ifreq ioctl() on associated Ethernet device. 267 * 268 * @param[in] dev 269 * Pointer to Ethernet device. 270 * @param req 271 * Request number to pass to ioctl(). 272 * @param[out] ifr 273 * Interface request structure output buffer. 274 * 275 * @return 276 * 0 on success, a negative errno value otherwise and rte_errno is set. 277 */ 278 int 279 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 280 { 281 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 282 int ret = 0; 283 284 if (sock == -1) { 285 rte_errno = errno; 286 return -rte_errno; 287 } 288 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 289 if (ret) 290 goto error; 291 ret = ioctl(sock, req, ifr); 292 if (ret == -1) { 293 rte_errno = errno; 294 goto error; 295 } 296 close(sock); 297 return 0; 298 error: 299 close(sock); 300 return -rte_errno; 301 } 302 303 /** 304 * Get device MTU. 305 * 306 * @param dev 307 * Pointer to Ethernet device. 308 * @param[out] mtu 309 * MTU value output buffer. 310 * 311 * @return 312 * 0 on success, a negative errno value otherwise and rte_errno is set. 313 */ 314 int 315 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 316 { 317 struct ifreq request; 318 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 319 320 if (ret) 321 return ret; 322 *mtu = request.ifr_mtu; 323 return 0; 324 } 325 326 /** 327 * Set device MTU. 328 * 329 * @param dev 330 * Pointer to Ethernet device. 331 * @param mtu 332 * MTU value to set. 333 * 334 * @return 335 * 0 on success, a negative errno value otherwise and rte_errno is set. 336 */ 337 static int 338 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 339 { 340 struct ifreq request = { .ifr_mtu = mtu, }; 341 342 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 343 } 344 345 /** 346 * Set device flags. 347 * 348 * @param dev 349 * Pointer to Ethernet device. 350 * @param keep 351 * Bitmask for flags that must remain untouched. 352 * @param flags 353 * Bitmask for flags to modify. 354 * 355 * @return 356 * 0 on success, a negative errno value otherwise and rte_errno is set. 357 */ 358 int 359 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 360 { 361 struct ifreq request; 362 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 363 364 if (ret) 365 return ret; 366 request.ifr_flags &= keep; 367 request.ifr_flags |= flags & ~keep; 368 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 369 } 370 371 /** 372 * DPDK callback for Ethernet device configuration. 373 * 374 * @param dev 375 * Pointer to Ethernet device structure. 376 * 377 * @return 378 * 0 on success, a negative errno value otherwise and rte_errno is set. 379 */ 380 int 381 mlx5_dev_configure(struct rte_eth_dev *dev) 382 { 383 struct mlx5_priv *priv = dev->data->dev_private; 384 unsigned int rxqs_n = dev->data->nb_rx_queues; 385 unsigned int txqs_n = dev->data->nb_tx_queues; 386 unsigned int i; 387 unsigned int j; 388 unsigned int reta_idx_n; 389 const uint8_t use_app_rss_key = 390 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 391 int ret = 0; 392 393 if (use_app_rss_key && 394 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 395 MLX5_RSS_HASH_KEY_LEN)) { 396 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 397 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 398 rte_errno = EINVAL; 399 return -rte_errno; 400 } 401 priv->rss_conf.rss_key = 402 rte_realloc(priv->rss_conf.rss_key, 403 MLX5_RSS_HASH_KEY_LEN, 0); 404 if (!priv->rss_conf.rss_key) { 405 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 406 dev->data->port_id, rxqs_n); 407 rte_errno = ENOMEM; 408 return -rte_errno; 409 } 410 memcpy(priv->rss_conf.rss_key, 411 use_app_rss_key ? 412 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 413 rss_hash_default_key, 414 MLX5_RSS_HASH_KEY_LEN); 415 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 416 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 417 priv->rxqs = (void *)dev->data->rx_queues; 418 priv->txqs = (void *)dev->data->tx_queues; 419 if (txqs_n != priv->txqs_n) { 420 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 421 dev->data->port_id, priv->txqs_n, txqs_n); 422 priv->txqs_n = txqs_n; 423 } 424 if (rxqs_n > priv->config.ind_table_max_size) { 425 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 426 dev->data->port_id, rxqs_n); 427 rte_errno = EINVAL; 428 return -rte_errno; 429 } 430 if (rxqs_n != priv->rxqs_n) { 431 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 432 dev->data->port_id, priv->rxqs_n, rxqs_n); 433 priv->rxqs_n = rxqs_n; 434 /* 435 * If the requested number of RX queues is not a power of two, 436 * use the maximum indirection table size for better balancing. 437 * The result is always rounded to the next power of two. 438 */ 439 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 440 priv->config.ind_table_max_size : 441 rxqs_n)); 442 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 443 if (ret) 444 return ret; 445 /* 446 * When the number of RX queues is not a power of two, 447 * the remaining table entries are padded with reused WQs 448 * and hashes are not spread uniformly. 449 */ 450 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 451 (*priv->reta_idx)[i] = j; 452 if (++j == rxqs_n) 453 j = 0; 454 } 455 } 456 ret = mlx5_proc_priv_init(dev); 457 if (ret) 458 return ret; 459 return 0; 460 } 461 462 /** 463 * Sets default tuning parameters. 464 * 465 * @param dev 466 * Pointer to Ethernet device. 467 * @param[out] info 468 * Info structure output buffer. 469 */ 470 static void 471 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 472 { 473 struct mlx5_priv *priv = dev->data->dev_private; 474 475 /* Minimum CPU utilization. */ 476 info->default_rxportconf.ring_size = 256; 477 info->default_txportconf.ring_size = 256; 478 info->default_rxportconf.burst_size = 64; 479 info->default_txportconf.burst_size = 64; 480 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 481 info->default_rxportconf.nb_queues = 16; 482 info->default_txportconf.nb_queues = 16; 483 if (dev->data->nb_rx_queues > 2 || 484 dev->data->nb_tx_queues > 2) { 485 /* Max Throughput. */ 486 info->default_rxportconf.ring_size = 2048; 487 info->default_txportconf.ring_size = 2048; 488 } 489 } else { 490 info->default_rxportconf.nb_queues = 8; 491 info->default_txportconf.nb_queues = 8; 492 if (dev->data->nb_rx_queues > 2 || 493 dev->data->nb_tx_queues > 2) { 494 /* Max Throughput. */ 495 info->default_rxportconf.ring_size = 4096; 496 info->default_txportconf.ring_size = 4096; 497 } 498 } 499 } 500 501 /** 502 * Sets tx mbuf limiting parameters. 503 * 504 * @param dev 505 * Pointer to Ethernet device. 506 * @param[out] info 507 * Info structure output buffer. 508 */ 509 static void 510 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 511 { 512 struct mlx5_priv *priv = dev->data->dev_private; 513 struct mlx5_dev_config *config = &priv->config; 514 unsigned int inlen; 515 uint16_t nb_max; 516 517 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 518 MLX5_SEND_DEF_INLINE_LEN : 519 (unsigned int)config->txq_inline_max; 520 assert(config->txq_inline_min >= 0); 521 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 522 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 523 MLX5_ESEG_MIN_INLINE_SIZE - 524 MLX5_WQE_CSEG_SIZE - 525 MLX5_WQE_ESEG_SIZE - 526 MLX5_WQE_DSEG_SIZE * 2); 527 nb_max = (MLX5_WQE_SIZE_MAX + 528 MLX5_ESEG_MIN_INLINE_SIZE - 529 MLX5_WQE_CSEG_SIZE - 530 MLX5_WQE_ESEG_SIZE - 531 MLX5_WQE_DSEG_SIZE - 532 inlen) / MLX5_WSEG_SIZE; 533 info->tx_desc_lim.nb_seg_max = nb_max; 534 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 535 } 536 537 /** 538 * DPDK callback to get information about the device. 539 * 540 * @param dev 541 * Pointer to Ethernet device structure. 542 * @param[out] info 543 * Info structure output buffer. 544 */ 545 int 546 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 547 { 548 struct mlx5_priv *priv = dev->data->dev_private; 549 struct mlx5_dev_config *config = &priv->config; 550 unsigned int max; 551 552 /* FIXME: we should ask the device for these values. */ 553 info->min_rx_bufsize = 32; 554 info->max_rx_pktlen = 65536; 555 /* 556 * Since we need one CQ per QP, the limit is the minimum number 557 * between the two values. 558 */ 559 max = RTE_MIN(priv->sh->device_attr.orig_attr.max_cq, 560 priv->sh->device_attr.orig_attr.max_qp); 561 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 562 if (max >= 65535) 563 max = 65535; 564 info->max_rx_queues = max; 565 info->max_tx_queues = max; 566 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 567 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 568 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 569 info->rx_queue_offload_capa); 570 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 571 info->if_index = mlx5_ifindex(dev); 572 info->reta_size = priv->reta_idx_n ? 573 priv->reta_idx_n : config->ind_table_max_size; 574 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 575 info->speed_capa = priv->link_speed_capa; 576 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 577 mlx5_set_default_params(dev, info); 578 mlx5_set_txlimit_params(dev, info); 579 info->switch_info.name = dev->data->name; 580 info->switch_info.domain_id = priv->domain_id; 581 info->switch_info.port_id = priv->representor_id; 582 if (priv->representor) { 583 uint16_t port_id; 584 585 if (priv->pf_bond >= 0) { 586 /* 587 * Switch port ID is opaque value with driver defined 588 * format. Push the PF index in bonding configurations 589 * in upper four bits of port ID. If we get too many 590 * representors (more than 4K) or PFs (more than 15) 591 * this approach must be reconsidered. 592 */ 593 if ((info->switch_info.port_id >> 594 MLX5_PORT_ID_BONDING_PF_SHIFT) || 595 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 596 DRV_LOG(ERR, "can't update switch port ID" 597 " for bonding device"); 598 assert(false); 599 return -ENODEV; 600 } 601 info->switch_info.port_id |= 602 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 603 } 604 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 605 struct mlx5_priv *opriv = 606 rte_eth_devices[port_id].data->dev_private; 607 608 if (!opriv || 609 opriv->representor || 610 opriv->sh != priv->sh || 611 opriv->domain_id != priv->domain_id) 612 continue; 613 /* 614 * Override switch name with that of the master 615 * device. 616 */ 617 info->switch_info.name = opriv->dev_data->name; 618 break; 619 } 620 } 621 return 0; 622 } 623 624 /** 625 * Get device current raw clock counter 626 * 627 * @param dev 628 * Pointer to Ethernet device structure. 629 * @param[out] time 630 * Current raw clock counter of the device. 631 * 632 * @return 633 * 0 if the clock has correctly been read 634 * The value of errno in case of error 635 */ 636 int 637 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock) 638 { 639 struct mlx5_priv *priv = dev->data->dev_private; 640 struct ibv_context *ctx = priv->sh->ctx; 641 struct ibv_values_ex values; 642 int err = 0; 643 644 values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK; 645 err = mlx5_glue->query_rt_values_ex(ctx, &values); 646 if (err != 0) { 647 DRV_LOG(WARNING, "Could not query the clock !"); 648 return err; 649 } 650 *clock = values.raw_clock.tv_nsec; 651 return 0; 652 } 653 654 /** 655 * Get firmware version of a device. 656 * 657 * @param dev 658 * Ethernet device port. 659 * @param fw_ver 660 * String output allocated by caller. 661 * @param fw_size 662 * Size of the output string, including terminating null byte. 663 * 664 * @return 665 * 0 on success, or the size of the non truncated string if too big. 666 */ 667 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 668 { 669 struct mlx5_priv *priv = dev->data->dev_private; 670 struct ibv_device_attr *attr = &priv->sh->device_attr.orig_attr; 671 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 672 673 if (fw_size < size) 674 return size; 675 if (fw_ver != NULL) 676 strlcpy(fw_ver, attr->fw_ver, fw_size); 677 return 0; 678 } 679 680 /** 681 * Get supported packet types. 682 * 683 * @param dev 684 * Pointer to Ethernet device structure. 685 * 686 * @return 687 * A pointer to the supported Packet types array. 688 */ 689 const uint32_t * 690 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 691 { 692 static const uint32_t ptypes[] = { 693 /* refers to rxq_cq_to_pkt_type() */ 694 RTE_PTYPE_L2_ETHER, 695 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 696 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 697 RTE_PTYPE_L4_NONFRAG, 698 RTE_PTYPE_L4_FRAG, 699 RTE_PTYPE_L4_TCP, 700 RTE_PTYPE_L4_UDP, 701 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 702 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 703 RTE_PTYPE_INNER_L4_NONFRAG, 704 RTE_PTYPE_INNER_L4_FRAG, 705 RTE_PTYPE_INNER_L4_TCP, 706 RTE_PTYPE_INNER_L4_UDP, 707 RTE_PTYPE_UNKNOWN 708 }; 709 710 if (dev->rx_pkt_burst == mlx5_rx_burst || 711 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 712 dev->rx_pkt_burst == mlx5_rx_burst_vec) 713 return ptypes; 714 return NULL; 715 } 716 717 /** 718 * Retrieve the master device for representor in the same switch domain. 719 * 720 * @param dev 721 * Pointer to representor Ethernet device structure. 722 * 723 * @return 724 * Master device structure on success, NULL otherwise. 725 */ 726 727 static struct rte_eth_dev * 728 mlx5_find_master_dev(struct rte_eth_dev *dev) 729 { 730 struct mlx5_priv *priv; 731 uint16_t port_id; 732 uint16_t domain_id; 733 734 priv = dev->data->dev_private; 735 domain_id = priv->domain_id; 736 assert(priv->representor); 737 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 738 struct mlx5_priv *opriv = 739 rte_eth_devices[port_id].data->dev_private; 740 if (opriv && 741 opriv->master && 742 opriv->domain_id == domain_id && 743 opriv->sh == priv->sh) 744 return &rte_eth_devices[port_id]; 745 } 746 return NULL; 747 } 748 749 /** 750 * DPDK callback to retrieve physical link information. 751 * 752 * @param dev 753 * Pointer to Ethernet device structure. 754 * @param[out] link 755 * Storage for current link status. 756 * 757 * @return 758 * 0 on success, a negative errno value otherwise and rte_errno is set. 759 */ 760 static int 761 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 762 struct rte_eth_link *link) 763 { 764 struct mlx5_priv *priv = dev->data->dev_private; 765 struct ethtool_cmd edata = { 766 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 767 }; 768 struct ifreq ifr; 769 struct rte_eth_link dev_link; 770 int link_speed = 0; 771 int ret; 772 773 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 774 if (ret) { 775 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 776 dev->data->port_id, strerror(rte_errno)); 777 return ret; 778 } 779 dev_link = (struct rte_eth_link) { 780 .link_status = ((ifr.ifr_flags & IFF_UP) && 781 (ifr.ifr_flags & IFF_RUNNING)), 782 }; 783 ifr = (struct ifreq) { 784 .ifr_data = (void *)&edata, 785 }; 786 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 787 if (ret) { 788 if (ret == -ENOTSUP && priv->representor) { 789 struct rte_eth_dev *master; 790 791 /* 792 * For representors we can try to inherit link 793 * settings from the master device. Actually 794 * link settings do not make a lot of sense 795 * for representors due to missing physical 796 * link. The old kernel drivers supported 797 * emulated settings query for representors, 798 * the new ones do not, so we have to add 799 * this code for compatibility issues. 800 */ 801 master = mlx5_find_master_dev(dev); 802 if (master) { 803 ifr = (struct ifreq) { 804 .ifr_data = (void *)&edata, 805 }; 806 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 807 } 808 } 809 if (ret) { 810 DRV_LOG(WARNING, 811 "port %u ioctl(SIOCETHTOOL," 812 " ETHTOOL_GSET) failed: %s", 813 dev->data->port_id, strerror(rte_errno)); 814 return ret; 815 } 816 } 817 link_speed = ethtool_cmd_speed(&edata); 818 if (link_speed == -1) 819 dev_link.link_speed = ETH_SPEED_NUM_NONE; 820 else 821 dev_link.link_speed = link_speed; 822 priv->link_speed_capa = 0; 823 if (edata.supported & SUPPORTED_Autoneg) 824 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 825 if (edata.supported & (SUPPORTED_1000baseT_Full | 826 SUPPORTED_1000baseKX_Full)) 827 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 828 if (edata.supported & SUPPORTED_10000baseKR_Full) 829 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 830 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 831 SUPPORTED_40000baseCR4_Full | 832 SUPPORTED_40000baseSR4_Full | 833 SUPPORTED_40000baseLR4_Full)) 834 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 835 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 836 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 837 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 838 ETH_LINK_SPEED_FIXED); 839 if (((dev_link.link_speed && !dev_link.link_status) || 840 (!dev_link.link_speed && dev_link.link_status))) { 841 rte_errno = EAGAIN; 842 return -rte_errno; 843 } 844 *link = dev_link; 845 return 0; 846 } 847 848 /** 849 * Retrieve physical link information (unlocked version using new ioctl). 850 * 851 * @param dev 852 * Pointer to Ethernet device structure. 853 * @param[out] link 854 * Storage for current link status. 855 * 856 * @return 857 * 0 on success, a negative errno value otherwise and rte_errno is set. 858 */ 859 static int 860 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 861 struct rte_eth_link *link) 862 863 { 864 struct mlx5_priv *priv = dev->data->dev_private; 865 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 866 struct ifreq ifr; 867 struct rte_eth_link dev_link; 868 struct rte_eth_dev *master = NULL; 869 uint64_t sc; 870 int ret; 871 872 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 873 if (ret) { 874 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 875 dev->data->port_id, strerror(rte_errno)); 876 return ret; 877 } 878 dev_link = (struct rte_eth_link) { 879 .link_status = ((ifr.ifr_flags & IFF_UP) && 880 (ifr.ifr_flags & IFF_RUNNING)), 881 }; 882 ifr = (struct ifreq) { 883 .ifr_data = (void *)&gcmd, 884 }; 885 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 886 if (ret) { 887 if (ret == -ENOTSUP && priv->representor) { 888 /* 889 * For representors we can try to inherit link 890 * settings from the master device. Actually 891 * link settings do not make a lot of sense 892 * for representors due to missing physical 893 * link. The old kernel drivers supported 894 * emulated settings query for representors, 895 * the new ones do not, so we have to add 896 * this code for compatibility issues. 897 */ 898 master = mlx5_find_master_dev(dev); 899 if (master) { 900 ifr = (struct ifreq) { 901 .ifr_data = (void *)&gcmd, 902 }; 903 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 904 } 905 } 906 if (ret) { 907 DRV_LOG(DEBUG, 908 "port %u ioctl(SIOCETHTOOL," 909 " ETHTOOL_GLINKSETTINGS) failed: %s", 910 dev->data->port_id, strerror(rte_errno)); 911 return ret; 912 } 913 914 } 915 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 916 917 alignas(struct ethtool_link_settings) 918 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 919 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 920 struct ethtool_link_settings *ecmd = (void *)data; 921 922 *ecmd = gcmd; 923 ifr.ifr_data = (void *)ecmd; 924 ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr); 925 if (ret) { 926 DRV_LOG(DEBUG, 927 "port %u ioctl(SIOCETHTOOL," 928 "ETHTOOL_GLINKSETTINGS) failed: %s", 929 dev->data->port_id, strerror(rte_errno)); 930 return ret; 931 } 932 dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE : 933 ecmd->speed; 934 sc = ecmd->link_mode_masks[0] | 935 ((uint64_t)ecmd->link_mode_masks[1] << 32); 936 priv->link_speed_capa = 0; 937 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 938 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 939 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 940 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 941 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 942 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 943 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 944 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 945 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 946 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 947 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 948 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 949 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 950 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 951 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 952 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 953 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 954 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 955 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 956 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 957 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 958 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 959 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 960 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 961 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 962 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 963 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 964 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 965 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 966 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 967 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 968 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 969 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 970 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 971 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 972 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 973 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 974 ETH_LINK_SPEED_FIXED); 975 if (((dev_link.link_speed && !dev_link.link_status) || 976 (!dev_link.link_speed && dev_link.link_status))) { 977 rte_errno = EAGAIN; 978 return -rte_errno; 979 } 980 *link = dev_link; 981 return 0; 982 } 983 984 /** 985 * DPDK callback to retrieve physical link information. 986 * 987 * @param dev 988 * Pointer to Ethernet device structure. 989 * @param wait_to_complete 990 * Wait for request completion. 991 * 992 * @return 993 * 0 if link status was not updated, positive if it was, a negative errno 994 * value otherwise and rte_errno is set. 995 */ 996 int 997 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 998 { 999 int ret; 1000 struct rte_eth_link dev_link; 1001 time_t start_time = time(NULL); 1002 1003 do { 1004 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 1005 if (ret == -ENOTSUP) 1006 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 1007 if (ret == 0) 1008 break; 1009 /* Handle wait to complete situation. */ 1010 if (wait_to_complete && ret == -EAGAIN) { 1011 if (abs((int)difftime(time(NULL), start_time)) < 1012 MLX5_LINK_STATUS_TIMEOUT) { 1013 usleep(0); 1014 continue; 1015 } else { 1016 rte_errno = EBUSY; 1017 return -rte_errno; 1018 } 1019 } else if (ret < 0) { 1020 return ret; 1021 } 1022 } while (wait_to_complete); 1023 ret = !!memcmp(&dev->data->dev_link, &dev_link, 1024 sizeof(struct rte_eth_link)); 1025 dev->data->dev_link = dev_link; 1026 return ret; 1027 } 1028 1029 /** 1030 * DPDK callback to change the MTU. 1031 * 1032 * @param dev 1033 * Pointer to Ethernet device structure. 1034 * @param in_mtu 1035 * New MTU. 1036 * 1037 * @return 1038 * 0 on success, a negative errno value otherwise and rte_errno is set. 1039 */ 1040 int 1041 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 1042 { 1043 struct mlx5_priv *priv = dev->data->dev_private; 1044 uint16_t kern_mtu = 0; 1045 int ret; 1046 1047 ret = mlx5_get_mtu(dev, &kern_mtu); 1048 if (ret) 1049 return ret; 1050 /* Set kernel interface MTU first. */ 1051 ret = mlx5_set_mtu(dev, mtu); 1052 if (ret) 1053 return ret; 1054 ret = mlx5_get_mtu(dev, &kern_mtu); 1055 if (ret) 1056 return ret; 1057 if (kern_mtu == mtu) { 1058 priv->mtu = mtu; 1059 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 1060 dev->data->port_id, mtu); 1061 return 0; 1062 } 1063 rte_errno = EAGAIN; 1064 return -rte_errno; 1065 } 1066 1067 /** 1068 * DPDK callback to get flow control status. 1069 * 1070 * @param dev 1071 * Pointer to Ethernet device structure. 1072 * @param[out] fc_conf 1073 * Flow control output buffer. 1074 * 1075 * @return 1076 * 0 on success, a negative errno value otherwise and rte_errno is set. 1077 */ 1078 int 1079 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1080 { 1081 struct ifreq ifr; 1082 struct ethtool_pauseparam ethpause = { 1083 .cmd = ETHTOOL_GPAUSEPARAM 1084 }; 1085 int ret; 1086 1087 ifr.ifr_data = (void *)ðpause; 1088 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1089 if (ret) { 1090 DRV_LOG(WARNING, 1091 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 1092 " %s", 1093 dev->data->port_id, strerror(rte_errno)); 1094 return ret; 1095 } 1096 fc_conf->autoneg = ethpause.autoneg; 1097 if (ethpause.rx_pause && ethpause.tx_pause) 1098 fc_conf->mode = RTE_FC_FULL; 1099 else if (ethpause.rx_pause) 1100 fc_conf->mode = RTE_FC_RX_PAUSE; 1101 else if (ethpause.tx_pause) 1102 fc_conf->mode = RTE_FC_TX_PAUSE; 1103 else 1104 fc_conf->mode = RTE_FC_NONE; 1105 return 0; 1106 } 1107 1108 /** 1109 * DPDK callback to modify flow control parameters. 1110 * 1111 * @param dev 1112 * Pointer to Ethernet device structure. 1113 * @param[in] fc_conf 1114 * Flow control parameters. 1115 * 1116 * @return 1117 * 0 on success, a negative errno value otherwise and rte_errno is set. 1118 */ 1119 int 1120 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1121 { 1122 struct ifreq ifr; 1123 struct ethtool_pauseparam ethpause = { 1124 .cmd = ETHTOOL_SPAUSEPARAM 1125 }; 1126 int ret; 1127 1128 ifr.ifr_data = (void *)ðpause; 1129 ethpause.autoneg = fc_conf->autoneg; 1130 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1131 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1132 ethpause.rx_pause = 1; 1133 else 1134 ethpause.rx_pause = 0; 1135 1136 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1137 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1138 ethpause.tx_pause = 1; 1139 else 1140 ethpause.tx_pause = 0; 1141 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1142 if (ret) { 1143 DRV_LOG(WARNING, 1144 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1145 " failed: %s", 1146 dev->data->port_id, strerror(rte_errno)); 1147 return ret; 1148 } 1149 return 0; 1150 } 1151 1152 /** 1153 * Get PCI information by sysfs device path. 1154 * 1155 * @param dev_path 1156 * Pointer to device sysfs folder name. 1157 * @param[out] pci_addr 1158 * PCI bus address output buffer. 1159 * 1160 * @return 1161 * 0 on success, a negative errno value otherwise and rte_errno is set. 1162 */ 1163 int 1164 mlx5_dev_to_pci_addr(const char *dev_path, 1165 struct rte_pci_addr *pci_addr) 1166 { 1167 FILE *file; 1168 char line[32]; 1169 MKSTR(path, "%s/device/uevent", dev_path); 1170 1171 file = fopen(path, "rb"); 1172 if (file == NULL) { 1173 rte_errno = errno; 1174 return -rte_errno; 1175 } 1176 while (fgets(line, sizeof(line), file) == line) { 1177 size_t len = strlen(line); 1178 int ret; 1179 1180 /* Truncate long lines. */ 1181 if (len == (sizeof(line) - 1)) 1182 while (line[(len - 1)] != '\n') { 1183 ret = fgetc(file); 1184 if (ret == EOF) 1185 break; 1186 line[(len - 1)] = ret; 1187 } 1188 /* Extract information. */ 1189 if (sscanf(line, 1190 "PCI_SLOT_NAME=" 1191 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1192 &pci_addr->domain, 1193 &pci_addr->bus, 1194 &pci_addr->devid, 1195 &pci_addr->function) == 4) { 1196 ret = 0; 1197 break; 1198 } 1199 } 1200 fclose(file); 1201 return 0; 1202 } 1203 1204 /** 1205 * Handle asynchronous removal event for entire multiport device. 1206 * 1207 * @param sh 1208 * Infiniband device shared context. 1209 */ 1210 static void 1211 mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) 1212 { 1213 uint32_t i; 1214 1215 for (i = 0; i < sh->max_port; ++i) { 1216 struct rte_eth_dev *dev; 1217 1218 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { 1219 /* 1220 * Or not existing port either no 1221 * handler installed for this port. 1222 */ 1223 continue; 1224 } 1225 dev = &rte_eth_devices[sh->port[i].ih_port_id]; 1226 assert(dev); 1227 if (dev->data->dev_conf.intr_conf.rmv) 1228 _rte_eth_dev_callback_process 1229 (dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1230 } 1231 } 1232 1233 /** 1234 * Handle shared asynchronous events the NIC (removal event 1235 * and link status change). Supports multiport IB device. 1236 * 1237 * @param cb_arg 1238 * Callback argument. 1239 */ 1240 void 1241 mlx5_dev_interrupt_handler(void *cb_arg) 1242 { 1243 struct mlx5_ibv_shared *sh = cb_arg; 1244 struct ibv_async_event event; 1245 1246 /* Read all message from the IB device and acknowledge them. */ 1247 for (;;) { 1248 struct rte_eth_dev *dev; 1249 uint32_t tmp; 1250 1251 if (mlx5_glue->get_async_event(sh->ctx, &event)) 1252 break; 1253 /* Retrieve and check IB port index. */ 1254 tmp = (uint32_t)event.element.port_num; 1255 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) { 1256 /* 1257 * The DEVICE_FATAL event is called once for 1258 * entire device without port specifying. 1259 * We should notify all existing ports. 1260 */ 1261 mlx5_glue->ack_async_event(&event); 1262 mlx5_dev_interrupt_device_fatal(sh); 1263 continue; 1264 } 1265 assert(tmp && (tmp <= sh->max_port)); 1266 if (!tmp) { 1267 /* Unsupported devive level event. */ 1268 mlx5_glue->ack_async_event(&event); 1269 DRV_LOG(DEBUG, 1270 "unsupported common event (type %d)", 1271 event.event_type); 1272 continue; 1273 } 1274 if (tmp > sh->max_port) { 1275 /* Invalid IB port index. */ 1276 mlx5_glue->ack_async_event(&event); 1277 DRV_LOG(DEBUG, 1278 "cannot handle an event (type %d)" 1279 "due to invalid IB port index (%u)", 1280 event.event_type, tmp); 1281 continue; 1282 } 1283 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { 1284 /* No handler installed. */ 1285 mlx5_glue->ack_async_event(&event); 1286 DRV_LOG(DEBUG, 1287 "cannot handle an event (type %d)" 1288 "due to no handler installed for port %u", 1289 event.event_type, tmp); 1290 continue; 1291 } 1292 /* Retrieve ethernet device descriptor. */ 1293 tmp = sh->port[tmp - 1].ih_port_id; 1294 dev = &rte_eth_devices[tmp]; 1295 assert(dev); 1296 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1297 event.event_type == IBV_EVENT_PORT_ERR) && 1298 dev->data->dev_conf.intr_conf.lsc) { 1299 mlx5_glue->ack_async_event(&event); 1300 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1301 usleep(0); 1302 continue; 1303 } 1304 _rte_eth_dev_callback_process 1305 (dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1306 continue; 1307 } 1308 DRV_LOG(DEBUG, 1309 "port %u cannot handle an unknown event (type %d)", 1310 dev->data->port_id, event.event_type); 1311 mlx5_glue->ack_async_event(&event); 1312 } 1313 } 1314 1315 /* 1316 * Unregister callback handler safely. The handler may be active 1317 * while we are trying to unregister it, in this case code -EAGAIN 1318 * is returned by rte_intr_callback_unregister(). This routine checks 1319 * the return code and tries to unregister handler again. 1320 * 1321 * @param handle 1322 * interrupt handle 1323 * @param cb_fn 1324 * pointer to callback routine 1325 * @cb_arg 1326 * opaque callback parameter 1327 */ 1328 void 1329 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle, 1330 rte_intr_callback_fn cb_fn, void *cb_arg) 1331 { 1332 /* 1333 * Try to reduce timeout management overhead by not calling 1334 * the timer related routines on the first iteration. If the 1335 * unregistering succeeds on first call there will be no 1336 * timer calls at all. 1337 */ 1338 uint64_t twait = 0; 1339 uint64_t start = 0; 1340 1341 do { 1342 int ret; 1343 1344 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg); 1345 if (ret >= 0) 1346 return; 1347 if (ret != -EAGAIN) { 1348 DRV_LOG(INFO, "failed to unregister interrupt" 1349 " handler (error: %d)", ret); 1350 assert(false); 1351 return; 1352 } 1353 if (twait) { 1354 struct timespec onems; 1355 1356 /* Wait one millisecond and try again. */ 1357 onems.tv_sec = 0; 1358 onems.tv_nsec = NS_PER_S / MS_PER_S; 1359 nanosleep(&onems, 0); 1360 /* Check whether one second elapsed. */ 1361 if ((rte_get_timer_cycles() - start) <= twait) 1362 continue; 1363 } else { 1364 /* 1365 * We get the amount of timer ticks for one second. 1366 * If this amount elapsed it means we spent one 1367 * second in waiting. This branch is executed once 1368 * on first iteration. 1369 */ 1370 twait = rte_get_timer_hz(); 1371 assert(twait); 1372 } 1373 /* 1374 * Timeout elapsed, show message (once a second) and retry. 1375 * We have no other acceptable option here, if we ignore 1376 * the unregistering return code the handler will not 1377 * be unregistered, fd will be closed and we may get the 1378 * crush. Hanging and messaging in the loop seems not to be 1379 * the worst choice. 1380 */ 1381 DRV_LOG(INFO, "Retrying to unregister interrupt handler"); 1382 start = rte_get_timer_cycles(); 1383 } while (true); 1384 } 1385 1386 /** 1387 * Handle DEVX interrupts from the NIC. 1388 * This function is probably called from the DPDK host thread. 1389 * 1390 * @param cb_arg 1391 * Callback argument. 1392 */ 1393 void 1394 mlx5_dev_interrupt_handler_devx(void *cb_arg) 1395 { 1396 #ifndef HAVE_IBV_DEVX_ASYNC 1397 (void)cb_arg; 1398 return; 1399 #else 1400 struct mlx5_ibv_shared *sh = cb_arg; 1401 union { 1402 struct mlx5dv_devx_async_cmd_hdr cmd_resp; 1403 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) + 1404 MLX5_ST_SZ_BYTES(traffic_counter) + 1405 sizeof(struct mlx5dv_devx_async_cmd_hdr)]; 1406 } out; 1407 uint8_t *buf = out.buf + sizeof(out.cmd_resp); 1408 1409 while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp, 1410 &out.cmd_resp, 1411 sizeof(out.buf))) 1412 mlx5_flow_async_pool_query_handle 1413 (sh, (uint64_t)out.cmd_resp.wr_id, 1414 mlx5_devx_get_out_command_status(buf)); 1415 #endif /* HAVE_IBV_DEVX_ASYNC */ 1416 } 1417 1418 /** 1419 * Uninstall shared asynchronous device events handler. 1420 * This function is implemented to support event sharing 1421 * between multiple ports of single IB device. 1422 * 1423 * @param dev 1424 * Pointer to Ethernet device. 1425 */ 1426 static void 1427 mlx5_dev_shared_handler_uninstall(struct rte_eth_dev *dev) 1428 { 1429 struct mlx5_priv *priv = dev->data->dev_private; 1430 struct mlx5_ibv_shared *sh = priv->sh; 1431 1432 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1433 return; 1434 pthread_mutex_lock(&sh->intr_mutex); 1435 assert(priv->ibv_port); 1436 assert(priv->ibv_port <= sh->max_port); 1437 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1438 if (sh->port[priv->ibv_port - 1].ih_port_id >= RTE_MAX_ETHPORTS) 1439 goto exit; 1440 assert(sh->port[priv->ibv_port - 1].ih_port_id == 1441 (uint32_t)dev->data->port_id); 1442 assert(sh->intr_cnt); 1443 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1444 if (!sh->intr_cnt || --sh->intr_cnt) 1445 goto exit; 1446 mlx5_intr_callback_unregister(&sh->intr_handle, 1447 mlx5_dev_interrupt_handler, sh); 1448 sh->intr_handle.fd = 0; 1449 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1450 if (sh->intr_handle_devx.fd) { 1451 rte_intr_callback_unregister(&sh->intr_handle_devx, 1452 mlx5_dev_interrupt_handler_devx, 1453 sh); 1454 sh->intr_handle_devx.fd = 0; 1455 sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN; 1456 } 1457 if (sh->devx_comp) { 1458 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp); 1459 sh->devx_comp = NULL; 1460 } 1461 exit: 1462 pthread_mutex_unlock(&sh->intr_mutex); 1463 } 1464 1465 /** 1466 * Install shared asynchronous device events handler. 1467 * This function is implemented to support event sharing 1468 * between multiple ports of single IB device. 1469 * 1470 * @param dev 1471 * Pointer to Ethernet device. 1472 */ 1473 static void 1474 mlx5_dev_shared_handler_install(struct rte_eth_dev *dev) 1475 { 1476 struct mlx5_priv *priv = dev->data->dev_private; 1477 struct mlx5_ibv_shared *sh = priv->sh; 1478 int ret; 1479 int flags; 1480 1481 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1482 return; 1483 pthread_mutex_lock(&sh->intr_mutex); 1484 assert(priv->ibv_port); 1485 assert(priv->ibv_port <= sh->max_port); 1486 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1487 if (sh->port[priv->ibv_port - 1].ih_port_id < RTE_MAX_ETHPORTS) { 1488 /* The handler is already installed for this port. */ 1489 assert(sh->intr_cnt); 1490 goto exit; 1491 } 1492 sh->port[priv->ibv_port - 1].ih_port_id = (uint32_t)dev->data->port_id; 1493 if (sh->intr_cnt) { 1494 sh->intr_cnt++; 1495 goto exit; 1496 } 1497 /* No shared handler installed. */ 1498 assert(sh->ctx->async_fd > 0); 1499 flags = fcntl(sh->ctx->async_fd, F_GETFL); 1500 ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1501 if (ret) { 1502 DRV_LOG(INFO, "failed to change file descriptor" 1503 " async event queue"); 1504 goto error; 1505 } 1506 sh->intr_handle.fd = sh->ctx->async_fd; 1507 sh->intr_handle.type = RTE_INTR_HANDLE_EXT; 1508 rte_intr_callback_register(&sh->intr_handle, 1509 mlx5_dev_interrupt_handler, sh); 1510 if (priv->config.devx) { 1511 #ifndef HAVE_IBV_DEVX_ASYNC 1512 goto error_unregister; 1513 #else 1514 sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx); 1515 if (sh->devx_comp) { 1516 flags = fcntl(sh->devx_comp->fd, F_GETFL); 1517 ret = fcntl(sh->devx_comp->fd, F_SETFL, 1518 flags | O_NONBLOCK); 1519 if (ret) { 1520 DRV_LOG(INFO, "failed to change file descriptor" 1521 " devx async event queue"); 1522 goto error_unregister; 1523 } 1524 sh->intr_handle_devx.fd = sh->devx_comp->fd; 1525 sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT; 1526 rte_intr_callback_register 1527 (&sh->intr_handle_devx, 1528 mlx5_dev_interrupt_handler_devx, sh); 1529 } else { 1530 DRV_LOG(INFO, "failed to create devx async command " 1531 "completion"); 1532 goto error_unregister; 1533 } 1534 #endif /* HAVE_IBV_DEVX_ASYNC */ 1535 } 1536 sh->intr_cnt++; 1537 goto exit; 1538 error_unregister: 1539 rte_intr_callback_unregister(&sh->intr_handle, 1540 mlx5_dev_interrupt_handler, sh); 1541 error: 1542 /* Indicate there will be no interrupts. */ 1543 dev->data->dev_conf.intr_conf.lsc = 0; 1544 dev->data->dev_conf.intr_conf.rmv = 0; 1545 sh->intr_handle.fd = 0; 1546 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1547 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1548 exit: 1549 pthread_mutex_unlock(&sh->intr_mutex); 1550 } 1551 1552 /** 1553 * Uninstall interrupt handler. 1554 * 1555 * @param dev 1556 * Pointer to Ethernet device. 1557 */ 1558 void 1559 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1560 { 1561 mlx5_dev_shared_handler_uninstall(dev); 1562 } 1563 1564 /** 1565 * Install interrupt handler. 1566 * 1567 * @param dev 1568 * Pointer to Ethernet device. 1569 */ 1570 void 1571 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1572 { 1573 mlx5_dev_shared_handler_install(dev); 1574 } 1575 1576 /** 1577 * DPDK callback to bring the link DOWN. 1578 * 1579 * @param dev 1580 * Pointer to Ethernet device structure. 1581 * 1582 * @return 1583 * 0 on success, a negative errno value otherwise and rte_errno is set. 1584 */ 1585 int 1586 mlx5_set_link_down(struct rte_eth_dev *dev) 1587 { 1588 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1589 } 1590 1591 /** 1592 * DPDK callback to bring the link UP. 1593 * 1594 * @param dev 1595 * Pointer to Ethernet device structure. 1596 * 1597 * @return 1598 * 0 on success, a negative errno value otherwise and rte_errno is set. 1599 */ 1600 int 1601 mlx5_set_link_up(struct rte_eth_dev *dev) 1602 { 1603 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1604 } 1605 1606 /** 1607 * Configure the RX function to use. 1608 * 1609 * @param dev 1610 * Pointer to private data structure. 1611 * 1612 * @return 1613 * Pointer to selected Rx burst function. 1614 */ 1615 eth_rx_burst_t 1616 mlx5_select_rx_function(struct rte_eth_dev *dev) 1617 { 1618 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1619 1620 assert(dev != NULL); 1621 if (mlx5_check_vec_rx_support(dev) > 0) { 1622 rx_pkt_burst = mlx5_rx_burst_vec; 1623 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1624 dev->data->port_id); 1625 } else if (mlx5_mprq_enabled(dev)) { 1626 rx_pkt_burst = mlx5_rx_burst_mprq; 1627 } 1628 return rx_pkt_burst; 1629 } 1630 1631 /** 1632 * Check if mlx5 device was removed. 1633 * 1634 * @param dev 1635 * Pointer to Ethernet device structure. 1636 * 1637 * @return 1638 * 1 when device is removed, otherwise 0. 1639 */ 1640 int 1641 mlx5_is_removed(struct rte_eth_dev *dev) 1642 { 1643 struct ibv_device_attr device_attr; 1644 struct mlx5_priv *priv = dev->data->dev_private; 1645 1646 if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO) 1647 return 1; 1648 return 0; 1649 } 1650 1651 /** 1652 * Get the E-Switch parameters by port id. 1653 * 1654 * @param[in] port 1655 * Device port id. 1656 * @param[out] es_domain_id 1657 * E-Switch domain id. 1658 * @param[out] es_port_id 1659 * The port id of the port in the E-Switch. 1660 * 1661 * @return 1662 * pointer to device private data structure containing data needed 1663 * on success, NULL otherwise and rte_errno is set. 1664 */ 1665 struct mlx5_priv * 1666 mlx5_port_to_eswitch_info(uint16_t port) 1667 { 1668 struct rte_eth_dev *dev; 1669 struct mlx5_priv *priv; 1670 1671 if (port >= RTE_MAX_ETHPORTS) { 1672 rte_errno = EINVAL; 1673 return NULL; 1674 } 1675 if (!rte_eth_dev_is_valid_port(port)) { 1676 rte_errno = ENODEV; 1677 return NULL; 1678 } 1679 dev = &rte_eth_devices[port]; 1680 priv = dev->data->dev_private; 1681 if (!(priv->representor || priv->master)) { 1682 rte_errno = EINVAL; 1683 return NULL; 1684 } 1685 return priv; 1686 } 1687 1688 /** 1689 * Get the E-Switch parameters by device instance. 1690 * 1691 * @param[in] port 1692 * Device port id. 1693 * @param[out] es_domain_id 1694 * E-Switch domain id. 1695 * @param[out] es_port_id 1696 * The port id of the port in the E-Switch. 1697 * 1698 * @return 1699 * pointer to device private data structure containing data needed 1700 * on success, NULL otherwise and rte_errno is set. 1701 */ 1702 struct mlx5_priv * 1703 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 1704 { 1705 struct mlx5_priv *priv; 1706 1707 priv = dev->data->dev_private; 1708 if (!(priv->representor || priv->master)) { 1709 rte_errno = EINVAL; 1710 return NULL; 1711 } 1712 return priv; 1713 } 1714 1715 /** 1716 * Get switch information associated with network interface. 1717 * 1718 * @param ifindex 1719 * Network interface index. 1720 * @param[out] info 1721 * Switch information object, populated in case of success. 1722 * 1723 * @return 1724 * 0 on success, a negative errno value otherwise and rte_errno is set. 1725 */ 1726 int 1727 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1728 { 1729 char ifname[IF_NAMESIZE]; 1730 char port_name[IF_NAMESIZE]; 1731 FILE *file; 1732 struct mlx5_switch_info data = { 1733 .master = 0, 1734 .representor = 0, 1735 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1736 .port_name = 0, 1737 .switch_id = 0, 1738 }; 1739 DIR *dir; 1740 bool port_switch_id_set = false; 1741 bool device_dir = false; 1742 char c; 1743 int ret; 1744 1745 if (!if_indextoname(ifindex, ifname)) { 1746 rte_errno = errno; 1747 return -rte_errno; 1748 } 1749 1750 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1751 ifname); 1752 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1753 ifname); 1754 MKSTR(pci_device, "/sys/class/net/%s/device", 1755 ifname); 1756 1757 file = fopen(phys_port_name, "rb"); 1758 if (file != NULL) { 1759 ret = fscanf(file, "%s", port_name); 1760 fclose(file); 1761 if (ret == 1) 1762 mlx5_translate_port_name(port_name, &data); 1763 } 1764 file = fopen(phys_switch_id, "rb"); 1765 if (file == NULL) { 1766 rte_errno = errno; 1767 return -rte_errno; 1768 } 1769 port_switch_id_set = 1770 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1771 c == '\n'; 1772 fclose(file); 1773 dir = opendir(pci_device); 1774 if (dir != NULL) { 1775 closedir(dir); 1776 device_dir = true; 1777 } 1778 if (port_switch_id_set) { 1779 /* We have some E-Switch configuration. */ 1780 mlx5_sysfs_check_switch_info(device_dir, &data); 1781 } 1782 *info = data; 1783 assert(!(data.master && data.representor)); 1784 if (data.master && data.representor) { 1785 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1786 " and as representor", ifindex); 1787 rte_errno = ENODEV; 1788 return -rte_errno; 1789 } 1790 return 0; 1791 } 1792 1793 /** 1794 * Analyze gathered port parameters via Netlink to recognize master 1795 * and representor devices for E-Switch configuration. 1796 * 1797 * @param[in] num_vf_set 1798 * flag of presence of number of VFs port attribute. 1799 * @param[inout] switch_info 1800 * Port information, including port name as a number and port name 1801 * type if recognized 1802 * 1803 * @return 1804 * master and representor flags are set in switch_info according to 1805 * recognized parameters (if any). 1806 */ 1807 void 1808 mlx5_nl_check_switch_info(bool num_vf_set, 1809 struct mlx5_switch_info *switch_info) 1810 { 1811 switch (switch_info->name_type) { 1812 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1813 /* 1814 * Name is not recognized, assume the master, 1815 * check the number of VFs key presence. 1816 */ 1817 switch_info->master = num_vf_set; 1818 break; 1819 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1820 /* 1821 * Name is not set, this assumes the legacy naming 1822 * schema for master, just check if there is a 1823 * number of VFs key. 1824 */ 1825 switch_info->master = num_vf_set; 1826 break; 1827 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1828 /* New uplink naming schema recognized. */ 1829 switch_info->master = 1; 1830 break; 1831 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1832 /* Legacy representors naming schema. */ 1833 switch_info->representor = !num_vf_set; 1834 break; 1835 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1836 /* New representors naming schema. */ 1837 switch_info->representor = 1; 1838 break; 1839 } 1840 } 1841 1842 /** 1843 * Analyze gathered port parameters via sysfs to recognize master 1844 * and representor devices for E-Switch configuration. 1845 * 1846 * @param[in] device_dir 1847 * flag of presence of "device" directory under port device key. 1848 * @param[inout] switch_info 1849 * Port information, including port name as a number and port name 1850 * type if recognized 1851 * 1852 * @return 1853 * master and representor flags are set in switch_info according to 1854 * recognized parameters (if any). 1855 */ 1856 void 1857 mlx5_sysfs_check_switch_info(bool device_dir, 1858 struct mlx5_switch_info *switch_info) 1859 { 1860 switch (switch_info->name_type) { 1861 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1862 /* 1863 * Name is not recognized, assume the master, 1864 * check the device directory presence. 1865 */ 1866 switch_info->master = device_dir; 1867 break; 1868 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1869 /* 1870 * Name is not set, this assumes the legacy naming 1871 * schema for master, just check if there is 1872 * a device directory. 1873 */ 1874 switch_info->master = device_dir; 1875 break; 1876 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1877 /* New uplink naming schema recognized. */ 1878 switch_info->master = 1; 1879 break; 1880 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1881 /* Legacy representors naming schema. */ 1882 switch_info->representor = !device_dir; 1883 break; 1884 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1885 /* New representors naming schema. */ 1886 switch_info->representor = 1; 1887 break; 1888 } 1889 } 1890 1891 /** 1892 * Extract port name, as a number, from sysfs or netlink information. 1893 * 1894 * @param[in] port_name_in 1895 * String representing the port name. 1896 * @param[out] port_info_out 1897 * Port information, including port name as a number and port name 1898 * type if recognized 1899 * 1900 * @return 1901 * port_name field set according to recognized name format. 1902 */ 1903 void 1904 mlx5_translate_port_name(const char *port_name_in, 1905 struct mlx5_switch_info *port_info_out) 1906 { 1907 char pf_c1, pf_c2, vf_c1, vf_c2; 1908 char *end; 1909 int sc_items; 1910 1911 /* 1912 * Check for port-name as a string of the form pf0vf0 1913 * (support kernel ver >= 5.0 or OFED ver >= 4.6). 1914 */ 1915 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", 1916 &pf_c1, &pf_c2, &port_info_out->pf_num, 1917 &vf_c1, &vf_c2, &port_info_out->port_name); 1918 if (sc_items == 6 && 1919 pf_c1 == 'p' && pf_c2 == 'f' && 1920 vf_c1 == 'v' && vf_c2 == 'f') { 1921 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; 1922 return; 1923 } 1924 /* 1925 * Check for port-name as a string of the form p0 1926 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 1927 */ 1928 sc_items = sscanf(port_name_in, "%c%d", 1929 &pf_c1, &port_info_out->port_name); 1930 if (sc_items == 2 && pf_c1 == 'p') { 1931 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 1932 return; 1933 } 1934 /* Check for port-name as a number (support kernel ver < 5.0 */ 1935 errno = 0; 1936 port_info_out->port_name = strtol(port_name_in, &end, 0); 1937 if (!errno && 1938 (size_t)(end - port_name_in) == strlen(port_name_in)) { 1939 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 1940 return; 1941 } 1942 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 1943 return; 1944 } 1945 1946 /** 1947 * DPDK callback to retrieve plug-in module EEPROM information (type and size). 1948 * 1949 * @param dev 1950 * Pointer to Ethernet device structure. 1951 * @param[out] modinfo 1952 * Storage for plug-in module EEPROM information. 1953 * 1954 * @return 1955 * 0 on success, a negative errno value otherwise and rte_errno is set. 1956 */ 1957 int 1958 mlx5_get_module_info(struct rte_eth_dev *dev, 1959 struct rte_eth_dev_module_info *modinfo) 1960 { 1961 struct ethtool_modinfo info = { 1962 .cmd = ETHTOOL_GMODULEINFO, 1963 }; 1964 struct ifreq ifr = (struct ifreq) { 1965 .ifr_data = (void *)&info, 1966 }; 1967 int ret = 0; 1968 1969 if (!dev || !modinfo) { 1970 DRV_LOG(WARNING, "missing argument, cannot get module info"); 1971 rte_errno = EINVAL; 1972 return -rte_errno; 1973 } 1974 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1975 if (ret) { 1976 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 1977 dev->data->port_id, strerror(rte_errno)); 1978 return ret; 1979 } 1980 modinfo->type = info.type; 1981 modinfo->eeprom_len = info.eeprom_len; 1982 return ret; 1983 } 1984 1985 /** 1986 * DPDK callback to retrieve plug-in module EEPROM data. 1987 * 1988 * @param dev 1989 * Pointer to Ethernet device structure. 1990 * @param[out] info 1991 * Storage for plug-in module EEPROM data. 1992 * 1993 * @return 1994 * 0 on success, a negative errno value otherwise and rte_errno is set. 1995 */ 1996 int mlx5_get_module_eeprom(struct rte_eth_dev *dev, 1997 struct rte_dev_eeprom_info *info) 1998 { 1999 struct ethtool_eeprom *eeprom; 2000 struct ifreq ifr; 2001 int ret = 0; 2002 2003 if (!dev || !info) { 2004 DRV_LOG(WARNING, "missing argument, cannot get module eeprom"); 2005 rte_errno = EINVAL; 2006 return -rte_errno; 2007 } 2008 eeprom = rte_calloc(__func__, 1, 2009 (sizeof(struct ethtool_eeprom) + info->length), 0); 2010 if (!eeprom) { 2011 DRV_LOG(WARNING, "port %u cannot allocate memory for " 2012 "eeprom data", dev->data->port_id); 2013 rte_errno = ENOMEM; 2014 return -rte_errno; 2015 } 2016 eeprom->cmd = ETHTOOL_GMODULEEEPROM; 2017 eeprom->offset = info->offset; 2018 eeprom->len = info->length; 2019 ifr = (struct ifreq) { 2020 .ifr_data = (void *)eeprom, 2021 }; 2022 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2023 if (ret) 2024 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 2025 dev->data->port_id, strerror(rte_errno)); 2026 else 2027 rte_memcpy(info->data, eeprom->data, info->length); 2028 rte_free(eeprom); 2029 return ret; 2030 } 2031