1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <inttypes.h> 9 #include <unistd.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <stdlib.h> 15 #include <errno.h> 16 #include <dirent.h> 17 #include <net/if.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <netinet/in.h> 21 #include <linux/ethtool.h> 22 #include <linux/sockios.h> 23 #include <fcntl.h> 24 #include <stdalign.h> 25 #include <sys/un.h> 26 #include <time.h> 27 28 #include <rte_atomic.h> 29 #include <rte_ethdev_driver.h> 30 #include <rte_bus_pci.h> 31 #include <rte_mbuf.h> 32 #include <rte_common.h> 33 #include <rte_interrupts.h> 34 #include <rte_malloc.h> 35 #include <rte_string_fns.h> 36 #include <rte_rwlock.h> 37 #include <rte_cycles.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_rxtx.h" 42 #include "mlx5_utils.h" 43 44 /* Supported speed values found in /usr/include/linux/ethtool.h */ 45 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 46 #define SUPPORTED_40000baseKR4_Full (1 << 23) 47 #endif 48 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 49 #define SUPPORTED_40000baseCR4_Full (1 << 24) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 52 #define SUPPORTED_40000baseSR4_Full (1 << 25) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 55 #define SUPPORTED_40000baseLR4_Full (1 << 26) 56 #endif 57 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 58 #define SUPPORTED_56000baseKR4_Full (1 << 27) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 61 #define SUPPORTED_56000baseCR4_Full (1 << 28) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 64 #define SUPPORTED_56000baseSR4_Full (1 << 29) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 67 #define SUPPORTED_56000baseLR4_Full (1 << 30) 68 #endif 69 70 /* Add defines in case the running kernel is not the same as user headers. */ 71 #ifndef ETHTOOL_GLINKSETTINGS 72 struct ethtool_link_settings { 73 uint32_t cmd; 74 uint32_t speed; 75 uint8_t duplex; 76 uint8_t port; 77 uint8_t phy_address; 78 uint8_t autoneg; 79 uint8_t mdio_support; 80 uint8_t eth_to_mdix; 81 uint8_t eth_tp_mdix_ctrl; 82 int8_t link_mode_masks_nwords; 83 uint32_t reserved[8]; 84 uint32_t link_mode_masks[]; 85 }; 86 87 #define ETHTOOL_GLINKSETTINGS 0x0000004c 88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 109 #endif 110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 119 #endif 120 121 /** 122 * Get master interface name from private structure. 123 * 124 * @param[in] dev 125 * Pointer to Ethernet device. 126 * @param[out] ifname 127 * Interface name output buffer. 128 * 129 * @return 130 * 0 on success, a negative errno value otherwise and rte_errno is set. 131 */ 132 int 133 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE]) 134 { 135 DIR *dir; 136 struct dirent *dent; 137 unsigned int dev_type = 0; 138 unsigned int dev_port_prev = ~0u; 139 char match[IF_NAMESIZE] = ""; 140 141 assert(ibdev_path); 142 { 143 MKSTR(path, "%s/device/net", ibdev_path); 144 145 dir = opendir(path); 146 if (dir == NULL) { 147 rte_errno = errno; 148 return -rte_errno; 149 } 150 } 151 while ((dent = readdir(dir)) != NULL) { 152 char *name = dent->d_name; 153 FILE *file; 154 unsigned int dev_port; 155 int r; 156 157 if ((name[0] == '.') && 158 ((name[1] == '\0') || 159 ((name[1] == '.') && (name[2] == '\0')))) 160 continue; 161 162 MKSTR(path, "%s/device/net/%s/%s", 163 ibdev_path, name, 164 (dev_type ? "dev_id" : "dev_port")); 165 166 file = fopen(path, "rb"); 167 if (file == NULL) { 168 if (errno != ENOENT) 169 continue; 170 /* 171 * Switch to dev_id when dev_port does not exist as 172 * is the case with Linux kernel versions < 3.15. 173 */ 174 try_dev_id: 175 match[0] = '\0'; 176 if (dev_type) 177 break; 178 dev_type = 1; 179 dev_port_prev = ~0u; 180 rewinddir(dir); 181 continue; 182 } 183 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 184 fclose(file); 185 if (r != 1) 186 continue; 187 /* 188 * Switch to dev_id when dev_port returns the same value for 189 * all ports. May happen when using a MOFED release older than 190 * 3.0 with a Linux kernel >= 3.15. 191 */ 192 if (dev_port == dev_port_prev) 193 goto try_dev_id; 194 dev_port_prev = dev_port; 195 if (dev_port == 0) 196 strlcpy(match, name, sizeof(match)); 197 } 198 closedir(dir); 199 if (match[0] == '\0') { 200 rte_errno = ENOENT; 201 return -rte_errno; 202 } 203 strncpy(*ifname, match, sizeof(*ifname)); 204 return 0; 205 } 206 207 /** 208 * Get interface name from private structure. 209 * 210 * This is a port representor-aware version of mlx5_get_master_ifname(). 211 * 212 * @param[in] dev 213 * Pointer to Ethernet device. 214 * @param[out] ifname 215 * Interface name output buffer. 216 * 217 * @return 218 * 0 on success, a negative errno value otherwise and rte_errno is set. 219 */ 220 int 221 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 222 { 223 struct mlx5_priv *priv = dev->data->dev_private; 224 unsigned int ifindex; 225 226 assert(priv); 227 assert(priv->sh); 228 ifindex = mlx5_ifindex(dev); 229 if (!ifindex) { 230 if (!priv->representor) 231 return mlx5_get_master_ifname(priv->sh->ibdev_path, 232 ifname); 233 rte_errno = ENXIO; 234 return -rte_errno; 235 } 236 if (if_indextoname(ifindex, &(*ifname)[0])) 237 return 0; 238 rte_errno = errno; 239 return -rte_errno; 240 } 241 242 /** 243 * Get the interface index from device name. 244 * 245 * @param[in] dev 246 * Pointer to Ethernet device. 247 * 248 * @return 249 * Nonzero interface index on success, zero otherwise and rte_errno is set. 250 */ 251 unsigned int 252 mlx5_ifindex(const struct rte_eth_dev *dev) 253 { 254 struct mlx5_priv *priv = dev->data->dev_private; 255 unsigned int ifindex; 256 257 assert(priv); 258 assert(priv->if_index); 259 ifindex = priv->if_index; 260 if (!ifindex) 261 rte_errno = ENXIO; 262 return ifindex; 263 } 264 265 /** 266 * Perform ifreq ioctl() on associated Ethernet device. 267 * 268 * @param[in] dev 269 * Pointer to Ethernet device. 270 * @param req 271 * Request number to pass to ioctl(). 272 * @param[out] ifr 273 * Interface request structure output buffer. 274 * 275 * @return 276 * 0 on success, a negative errno value otherwise and rte_errno is set. 277 */ 278 int 279 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 280 { 281 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 282 int ret = 0; 283 284 if (sock == -1) { 285 rte_errno = errno; 286 return -rte_errno; 287 } 288 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 289 if (ret) 290 goto error; 291 ret = ioctl(sock, req, ifr); 292 if (ret == -1) { 293 rte_errno = errno; 294 goto error; 295 } 296 close(sock); 297 return 0; 298 error: 299 close(sock); 300 return -rte_errno; 301 } 302 303 /** 304 * Get device MTU. 305 * 306 * @param dev 307 * Pointer to Ethernet device. 308 * @param[out] mtu 309 * MTU value output buffer. 310 * 311 * @return 312 * 0 on success, a negative errno value otherwise and rte_errno is set. 313 */ 314 int 315 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 316 { 317 struct ifreq request; 318 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 319 320 if (ret) 321 return ret; 322 *mtu = request.ifr_mtu; 323 return 0; 324 } 325 326 /** 327 * Set device MTU. 328 * 329 * @param dev 330 * Pointer to Ethernet device. 331 * @param mtu 332 * MTU value to set. 333 * 334 * @return 335 * 0 on success, a negative errno value otherwise and rte_errno is set. 336 */ 337 static int 338 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 339 { 340 struct ifreq request = { .ifr_mtu = mtu, }; 341 342 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 343 } 344 345 /** 346 * Set device flags. 347 * 348 * @param dev 349 * Pointer to Ethernet device. 350 * @param keep 351 * Bitmask for flags that must remain untouched. 352 * @param flags 353 * Bitmask for flags to modify. 354 * 355 * @return 356 * 0 on success, a negative errno value otherwise and rte_errno is set. 357 */ 358 int 359 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 360 { 361 struct ifreq request; 362 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 363 364 if (ret) 365 return ret; 366 request.ifr_flags &= keep; 367 request.ifr_flags |= flags & ~keep; 368 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 369 } 370 371 /** 372 * DPDK callback for Ethernet device configuration. 373 * 374 * @param dev 375 * Pointer to Ethernet device structure. 376 * 377 * @return 378 * 0 on success, a negative errno value otherwise and rte_errno is set. 379 */ 380 int 381 mlx5_dev_configure(struct rte_eth_dev *dev) 382 { 383 struct mlx5_priv *priv = dev->data->dev_private; 384 unsigned int rxqs_n = dev->data->nb_rx_queues; 385 unsigned int txqs_n = dev->data->nb_tx_queues; 386 unsigned int i; 387 unsigned int j; 388 unsigned int reta_idx_n; 389 const uint8_t use_app_rss_key = 390 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 391 int ret = 0; 392 393 if (use_app_rss_key && 394 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 395 MLX5_RSS_HASH_KEY_LEN)) { 396 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 397 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 398 rte_errno = EINVAL; 399 return -rte_errno; 400 } 401 priv->rss_conf.rss_key = 402 rte_realloc(priv->rss_conf.rss_key, 403 MLX5_RSS_HASH_KEY_LEN, 0); 404 if (!priv->rss_conf.rss_key) { 405 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 406 dev->data->port_id, rxqs_n); 407 rte_errno = ENOMEM; 408 return -rte_errno; 409 } 410 memcpy(priv->rss_conf.rss_key, 411 use_app_rss_key ? 412 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 413 rss_hash_default_key, 414 MLX5_RSS_HASH_KEY_LEN); 415 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 416 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 417 priv->rxqs = (void *)dev->data->rx_queues; 418 priv->txqs = (void *)dev->data->tx_queues; 419 if (txqs_n != priv->txqs_n) { 420 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 421 dev->data->port_id, priv->txqs_n, txqs_n); 422 priv->txqs_n = txqs_n; 423 } 424 if (rxqs_n > priv->config.ind_table_max_size) { 425 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 426 dev->data->port_id, rxqs_n); 427 rte_errno = EINVAL; 428 return -rte_errno; 429 } 430 if (rxqs_n != priv->rxqs_n) { 431 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 432 dev->data->port_id, priv->rxqs_n, rxqs_n); 433 priv->rxqs_n = rxqs_n; 434 /* 435 * If the requested number of RX queues is not a power of two, 436 * use the maximum indirection table size for better balancing. 437 * The result is always rounded to the next power of two. 438 */ 439 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 440 priv->config.ind_table_max_size : 441 rxqs_n)); 442 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 443 if (ret) 444 return ret; 445 /* 446 * When the number of RX queues is not a power of two, 447 * the remaining table entries are padded with reused WQs 448 * and hashes are not spread uniformly. 449 */ 450 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 451 (*priv->reta_idx)[i] = j; 452 if (++j == rxqs_n) 453 j = 0; 454 } 455 } 456 ret = mlx5_proc_priv_init(dev); 457 if (ret) 458 return ret; 459 return 0; 460 } 461 462 /** 463 * Sets default tuning parameters. 464 * 465 * @param dev 466 * Pointer to Ethernet device. 467 * @param[out] info 468 * Info structure output buffer. 469 */ 470 static void 471 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 472 { 473 struct mlx5_priv *priv = dev->data->dev_private; 474 475 /* Minimum CPU utilization. */ 476 info->default_rxportconf.ring_size = 256; 477 info->default_txportconf.ring_size = 256; 478 info->default_rxportconf.burst_size = 64; 479 info->default_txportconf.burst_size = 64; 480 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 481 info->default_rxportconf.nb_queues = 16; 482 info->default_txportconf.nb_queues = 16; 483 if (dev->data->nb_rx_queues > 2 || 484 dev->data->nb_tx_queues > 2) { 485 /* Max Throughput. */ 486 info->default_rxportconf.ring_size = 2048; 487 info->default_txportconf.ring_size = 2048; 488 } 489 } else { 490 info->default_rxportconf.nb_queues = 8; 491 info->default_txportconf.nb_queues = 8; 492 if (dev->data->nb_rx_queues > 2 || 493 dev->data->nb_tx_queues > 2) { 494 /* Max Throughput. */ 495 info->default_rxportconf.ring_size = 4096; 496 info->default_txportconf.ring_size = 4096; 497 } 498 } 499 } 500 501 /** 502 * Sets tx mbuf limiting parameters. 503 * 504 * @param dev 505 * Pointer to Ethernet device. 506 * @param[out] info 507 * Info structure output buffer. 508 */ 509 static void 510 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 511 { 512 struct mlx5_priv *priv = dev->data->dev_private; 513 struct mlx5_dev_config *config = &priv->config; 514 unsigned int inlen; 515 uint16_t nb_max; 516 517 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 518 MLX5_SEND_DEF_INLINE_LEN : 519 (unsigned int)config->txq_inline_max; 520 assert(config->txq_inline_min >= 0); 521 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 522 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 523 MLX5_ESEG_MIN_INLINE_SIZE - 524 MLX5_WQE_CSEG_SIZE - 525 MLX5_WQE_ESEG_SIZE - 526 MLX5_WQE_DSEG_SIZE * 2); 527 nb_max = (MLX5_WQE_SIZE_MAX + 528 MLX5_ESEG_MIN_INLINE_SIZE - 529 MLX5_WQE_CSEG_SIZE - 530 MLX5_WQE_ESEG_SIZE - 531 MLX5_WQE_DSEG_SIZE - 532 inlen) / MLX5_WSEG_SIZE; 533 info->tx_desc_lim.nb_seg_max = nb_max; 534 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 535 } 536 537 /** 538 * DPDK callback to get information about the device. 539 * 540 * @param dev 541 * Pointer to Ethernet device structure. 542 * @param[out] info 543 * Info structure output buffer. 544 */ 545 int 546 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 547 { 548 struct mlx5_priv *priv = dev->data->dev_private; 549 struct mlx5_dev_config *config = &priv->config; 550 unsigned int max; 551 552 /* FIXME: we should ask the device for these values. */ 553 info->min_rx_bufsize = 32; 554 info->max_rx_pktlen = 65536; 555 /* 556 * Since we need one CQ per QP, the limit is the minimum number 557 * between the two values. 558 */ 559 max = RTE_MIN(priv->sh->device_attr.orig_attr.max_cq, 560 priv->sh->device_attr.orig_attr.max_qp); 561 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 562 if (max >= 65535) 563 max = 65535; 564 info->max_rx_queues = max; 565 info->max_tx_queues = max; 566 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 567 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 568 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 569 info->rx_queue_offload_capa); 570 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 571 info->if_index = mlx5_ifindex(dev); 572 info->reta_size = priv->reta_idx_n ? 573 priv->reta_idx_n : config->ind_table_max_size; 574 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 575 info->speed_capa = priv->link_speed_capa; 576 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 577 mlx5_set_default_params(dev, info); 578 mlx5_set_txlimit_params(dev, info); 579 info->switch_info.name = dev->data->name; 580 info->switch_info.domain_id = priv->domain_id; 581 info->switch_info.port_id = priv->representor_id; 582 if (priv->representor) { 583 uint16_t port_id; 584 585 if (priv->pf_bond >= 0) { 586 /* 587 * Switch port ID is opaque value with driver defined 588 * format. Push the PF index in bonding configurations 589 * in upper four bits of port ID. If we get too many 590 * representors (more than 4K) or PFs (more than 15) 591 * this approach must be reconsidered. 592 */ 593 if ((info->switch_info.port_id >> 594 MLX5_PORT_ID_BONDING_PF_SHIFT) || 595 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 596 DRV_LOG(ERR, "can't update switch port ID" 597 " for bonding device"); 598 assert(false); 599 return -ENODEV; 600 } 601 info->switch_info.port_id |= 602 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 603 } 604 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 605 struct mlx5_priv *opriv = 606 rte_eth_devices[port_id].data->dev_private; 607 608 if (!opriv || 609 opriv->representor || 610 opriv->sh != priv->sh || 611 opriv->domain_id != priv->domain_id) 612 continue; 613 /* 614 * Override switch name with that of the master 615 * device. 616 */ 617 info->switch_info.name = opriv->dev_data->name; 618 break; 619 } 620 } 621 return 0; 622 } 623 624 /** 625 * Get device current raw clock counter 626 * 627 * @param dev 628 * Pointer to Ethernet device structure. 629 * @param[out] time 630 * Current raw clock counter of the device. 631 * 632 * @return 633 * 0 if the clock has correctly been read 634 * The value of errno in case of error 635 */ 636 int 637 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock) 638 { 639 struct mlx5_priv *priv = dev->data->dev_private; 640 struct ibv_context *ctx = priv->sh->ctx; 641 struct ibv_values_ex values; 642 int err = 0; 643 644 values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK; 645 err = mlx5_glue->query_rt_values_ex(ctx, &values); 646 if (err != 0) { 647 DRV_LOG(WARNING, "Could not query the clock !"); 648 return err; 649 } 650 *clock = values.raw_clock.tv_nsec; 651 return 0; 652 } 653 654 /** 655 * Get firmware version of a device. 656 * 657 * @param dev 658 * Ethernet device port. 659 * @param fw_ver 660 * String output allocated by caller. 661 * @param fw_size 662 * Size of the output string, including terminating null byte. 663 * 664 * @return 665 * 0 on success, or the size of the non truncated string if too big. 666 */ 667 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 668 { 669 struct mlx5_priv *priv = dev->data->dev_private; 670 struct ibv_device_attr *attr = &priv->sh->device_attr.orig_attr; 671 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 672 673 if (fw_size < size) 674 return size; 675 if (fw_ver != NULL) 676 strlcpy(fw_ver, attr->fw_ver, fw_size); 677 return 0; 678 } 679 680 /** 681 * Get supported packet types. 682 * 683 * @param dev 684 * Pointer to Ethernet device structure. 685 * 686 * @return 687 * A pointer to the supported Packet types array. 688 */ 689 const uint32_t * 690 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 691 { 692 static const uint32_t ptypes[] = { 693 /* refers to rxq_cq_to_pkt_type() */ 694 RTE_PTYPE_L2_ETHER, 695 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 696 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 697 RTE_PTYPE_L4_NONFRAG, 698 RTE_PTYPE_L4_FRAG, 699 RTE_PTYPE_L4_TCP, 700 RTE_PTYPE_L4_UDP, 701 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 702 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 703 RTE_PTYPE_INNER_L4_NONFRAG, 704 RTE_PTYPE_INNER_L4_FRAG, 705 RTE_PTYPE_INNER_L4_TCP, 706 RTE_PTYPE_INNER_L4_UDP, 707 RTE_PTYPE_UNKNOWN 708 }; 709 710 if (dev->rx_pkt_burst == mlx5_rx_burst || 711 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 712 dev->rx_pkt_burst == mlx5_rx_burst_vec) 713 return ptypes; 714 return NULL; 715 } 716 717 /** 718 * Retrieve the master device for representor in the same switch domain. 719 * 720 * @param dev 721 * Pointer to representor Ethernet device structure. 722 * 723 * @return 724 * Master device structure on success, NULL otherwise. 725 */ 726 727 static struct rte_eth_dev * 728 mlx5_find_master_dev(struct rte_eth_dev *dev) 729 { 730 struct mlx5_priv *priv; 731 uint16_t port_id; 732 uint16_t domain_id; 733 734 priv = dev->data->dev_private; 735 domain_id = priv->domain_id; 736 assert(priv->representor); 737 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 738 struct mlx5_priv *opriv = 739 rte_eth_devices[port_id].data->dev_private; 740 if (opriv && 741 opriv->master && 742 opriv->domain_id == domain_id && 743 opriv->sh == priv->sh) 744 return &rte_eth_devices[port_id]; 745 } 746 return NULL; 747 } 748 749 /** 750 * DPDK callback to retrieve physical link information. 751 * 752 * @param dev 753 * Pointer to Ethernet device structure. 754 * @param[out] link 755 * Storage for current link status. 756 * 757 * @return 758 * 0 on success, a negative errno value otherwise and rte_errno is set. 759 */ 760 static int 761 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 762 struct rte_eth_link *link) 763 { 764 struct mlx5_priv *priv = dev->data->dev_private; 765 struct ethtool_cmd edata = { 766 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 767 }; 768 struct ifreq ifr; 769 struct rte_eth_link dev_link; 770 int link_speed = 0; 771 int ret; 772 773 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 774 if (ret) { 775 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 776 dev->data->port_id, strerror(rte_errno)); 777 return ret; 778 } 779 dev_link = (struct rte_eth_link) { 780 .link_status = ((ifr.ifr_flags & IFF_UP) && 781 (ifr.ifr_flags & IFF_RUNNING)), 782 }; 783 ifr = (struct ifreq) { 784 .ifr_data = (void *)&edata, 785 }; 786 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 787 if (ret) { 788 if (ret == -ENOTSUP && priv->representor) { 789 struct rte_eth_dev *master; 790 791 /* 792 * For representors we can try to inherit link 793 * settings from the master device. Actually 794 * link settings do not make a lot of sense 795 * for representors due to missing physical 796 * link. The old kernel drivers supported 797 * emulated settings query for representors, 798 * the new ones do not, so we have to add 799 * this code for compatibility issues. 800 */ 801 master = mlx5_find_master_dev(dev); 802 if (master) { 803 ifr = (struct ifreq) { 804 .ifr_data = (void *)&edata, 805 }; 806 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 807 } 808 } 809 if (ret) { 810 DRV_LOG(WARNING, 811 "port %u ioctl(SIOCETHTOOL," 812 " ETHTOOL_GSET) failed: %s", 813 dev->data->port_id, strerror(rte_errno)); 814 return ret; 815 } 816 } 817 link_speed = ethtool_cmd_speed(&edata); 818 if (link_speed == -1) 819 dev_link.link_speed = ETH_SPEED_NUM_NONE; 820 else 821 dev_link.link_speed = link_speed; 822 priv->link_speed_capa = 0; 823 if (edata.supported & SUPPORTED_Autoneg) 824 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 825 if (edata.supported & (SUPPORTED_1000baseT_Full | 826 SUPPORTED_1000baseKX_Full)) 827 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 828 if (edata.supported & SUPPORTED_10000baseKR_Full) 829 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 830 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 831 SUPPORTED_40000baseCR4_Full | 832 SUPPORTED_40000baseSR4_Full | 833 SUPPORTED_40000baseLR4_Full)) 834 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 835 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 836 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 837 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 838 ETH_LINK_SPEED_FIXED); 839 if (((dev_link.link_speed && !dev_link.link_status) || 840 (!dev_link.link_speed && dev_link.link_status))) { 841 rte_errno = EAGAIN; 842 return -rte_errno; 843 } 844 *link = dev_link; 845 return 0; 846 } 847 848 /** 849 * Retrieve physical link information (unlocked version using new ioctl). 850 * 851 * @param dev 852 * Pointer to Ethernet device structure. 853 * @param[out] link 854 * Storage for current link status. 855 * 856 * @return 857 * 0 on success, a negative errno value otherwise and rte_errno is set. 858 */ 859 static int 860 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 861 struct rte_eth_link *link) 862 863 { 864 struct mlx5_priv *priv = dev->data->dev_private; 865 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 866 struct ifreq ifr; 867 struct rte_eth_link dev_link; 868 struct rte_eth_dev *master = NULL; 869 uint64_t sc; 870 int ret; 871 872 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 873 if (ret) { 874 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 875 dev->data->port_id, strerror(rte_errno)); 876 return ret; 877 } 878 dev_link = (struct rte_eth_link) { 879 .link_status = ((ifr.ifr_flags & IFF_UP) && 880 (ifr.ifr_flags & IFF_RUNNING)), 881 }; 882 ifr = (struct ifreq) { 883 .ifr_data = (void *)&gcmd, 884 }; 885 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 886 if (ret) { 887 if (ret == -ENOTSUP && priv->representor) { 888 /* 889 * For representors we can try to inherit link 890 * settings from the master device. Actually 891 * link settings do not make a lot of sense 892 * for representors due to missing physical 893 * link. The old kernel drivers supported 894 * emulated settings query for representors, 895 * the new ones do not, so we have to add 896 * this code for compatibility issues. 897 */ 898 master = mlx5_find_master_dev(dev); 899 if (master) { 900 ifr = (struct ifreq) { 901 .ifr_data = (void *)&gcmd, 902 }; 903 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 904 } 905 } 906 if (ret) { 907 DRV_LOG(DEBUG, 908 "port %u ioctl(SIOCETHTOOL," 909 " ETHTOOL_GLINKSETTINGS) failed: %s", 910 dev->data->port_id, strerror(rte_errno)); 911 return ret; 912 } 913 914 } 915 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 916 917 alignas(struct ethtool_link_settings) 918 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 919 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 920 struct ethtool_link_settings *ecmd = (void *)data; 921 922 *ecmd = gcmd; 923 ifr.ifr_data = (void *)ecmd; 924 ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr); 925 if (ret) { 926 DRV_LOG(DEBUG, 927 "port %u ioctl(SIOCETHTOOL," 928 "ETHTOOL_GLINKSETTINGS) failed: %s", 929 dev->data->port_id, strerror(rte_errno)); 930 return ret; 931 } 932 dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE : 933 ecmd->speed; 934 sc = ecmd->link_mode_masks[0] | 935 ((uint64_t)ecmd->link_mode_masks[1] << 32); 936 priv->link_speed_capa = 0; 937 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 938 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 939 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 940 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 941 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 942 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 943 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 944 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 945 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 946 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 947 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 948 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 949 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 950 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 951 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 952 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 953 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 954 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 955 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 956 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 957 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 958 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 959 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 960 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 961 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 962 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 963 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 964 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 965 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 966 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 967 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 968 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 969 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 970 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 971 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 972 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 973 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 974 ETH_LINK_SPEED_FIXED); 975 if (((dev_link.link_speed && !dev_link.link_status) || 976 (!dev_link.link_speed && dev_link.link_status))) { 977 rte_errno = EAGAIN; 978 return -rte_errno; 979 } 980 *link = dev_link; 981 return 0; 982 } 983 984 /** 985 * DPDK callback to retrieve physical link information. 986 * 987 * @param dev 988 * Pointer to Ethernet device structure. 989 * @param wait_to_complete 990 * Wait for request completion. 991 * 992 * @return 993 * 0 if link status was not updated, positive if it was, a negative errno 994 * value otherwise and rte_errno is set. 995 */ 996 int 997 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 998 { 999 int ret; 1000 struct rte_eth_link dev_link; 1001 time_t start_time = time(NULL); 1002 int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT; 1003 1004 do { 1005 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 1006 if (ret == -ENOTSUP) 1007 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 1008 if (ret == 0) 1009 break; 1010 /* Handle wait to complete situation. */ 1011 if ((wait_to_complete || retry) && ret == -EAGAIN) { 1012 if (abs((int)difftime(time(NULL), start_time)) < 1013 MLX5_LINK_STATUS_TIMEOUT) { 1014 usleep(0); 1015 continue; 1016 } else { 1017 rte_errno = EBUSY; 1018 return -rte_errno; 1019 } 1020 } else if (ret < 0) { 1021 return ret; 1022 } 1023 } while (wait_to_complete || retry-- > 0); 1024 ret = !!memcmp(&dev->data->dev_link, &dev_link, 1025 sizeof(struct rte_eth_link)); 1026 dev->data->dev_link = dev_link; 1027 return ret; 1028 } 1029 1030 /** 1031 * DPDK callback to change the MTU. 1032 * 1033 * @param dev 1034 * Pointer to Ethernet device structure. 1035 * @param in_mtu 1036 * New MTU. 1037 * 1038 * @return 1039 * 0 on success, a negative errno value otherwise and rte_errno is set. 1040 */ 1041 int 1042 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 1043 { 1044 struct mlx5_priv *priv = dev->data->dev_private; 1045 uint16_t kern_mtu = 0; 1046 int ret; 1047 1048 ret = mlx5_get_mtu(dev, &kern_mtu); 1049 if (ret) 1050 return ret; 1051 /* Set kernel interface MTU first. */ 1052 ret = mlx5_set_mtu(dev, mtu); 1053 if (ret) 1054 return ret; 1055 ret = mlx5_get_mtu(dev, &kern_mtu); 1056 if (ret) 1057 return ret; 1058 if (kern_mtu == mtu) { 1059 priv->mtu = mtu; 1060 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 1061 dev->data->port_id, mtu); 1062 return 0; 1063 } 1064 rte_errno = EAGAIN; 1065 return -rte_errno; 1066 } 1067 1068 /** 1069 * DPDK callback to get flow control status. 1070 * 1071 * @param dev 1072 * Pointer to Ethernet device structure. 1073 * @param[out] fc_conf 1074 * Flow control output buffer. 1075 * 1076 * @return 1077 * 0 on success, a negative errno value otherwise and rte_errno is set. 1078 */ 1079 int 1080 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1081 { 1082 struct ifreq ifr; 1083 struct ethtool_pauseparam ethpause = { 1084 .cmd = ETHTOOL_GPAUSEPARAM 1085 }; 1086 int ret; 1087 1088 ifr.ifr_data = (void *)ðpause; 1089 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1090 if (ret) { 1091 DRV_LOG(WARNING, 1092 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 1093 " %s", 1094 dev->data->port_id, strerror(rte_errno)); 1095 return ret; 1096 } 1097 fc_conf->autoneg = ethpause.autoneg; 1098 if (ethpause.rx_pause && ethpause.tx_pause) 1099 fc_conf->mode = RTE_FC_FULL; 1100 else if (ethpause.rx_pause) 1101 fc_conf->mode = RTE_FC_RX_PAUSE; 1102 else if (ethpause.tx_pause) 1103 fc_conf->mode = RTE_FC_TX_PAUSE; 1104 else 1105 fc_conf->mode = RTE_FC_NONE; 1106 return 0; 1107 } 1108 1109 /** 1110 * DPDK callback to modify flow control parameters. 1111 * 1112 * @param dev 1113 * Pointer to Ethernet device structure. 1114 * @param[in] fc_conf 1115 * Flow control parameters. 1116 * 1117 * @return 1118 * 0 on success, a negative errno value otherwise and rte_errno is set. 1119 */ 1120 int 1121 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1122 { 1123 struct ifreq ifr; 1124 struct ethtool_pauseparam ethpause = { 1125 .cmd = ETHTOOL_SPAUSEPARAM 1126 }; 1127 int ret; 1128 1129 ifr.ifr_data = (void *)ðpause; 1130 ethpause.autoneg = fc_conf->autoneg; 1131 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1132 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1133 ethpause.rx_pause = 1; 1134 else 1135 ethpause.rx_pause = 0; 1136 1137 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1138 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1139 ethpause.tx_pause = 1; 1140 else 1141 ethpause.tx_pause = 0; 1142 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1143 if (ret) { 1144 DRV_LOG(WARNING, 1145 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1146 " failed: %s", 1147 dev->data->port_id, strerror(rte_errno)); 1148 return ret; 1149 } 1150 return 0; 1151 } 1152 1153 /** 1154 * Get PCI information by sysfs device path. 1155 * 1156 * @param dev_path 1157 * Pointer to device sysfs folder name. 1158 * @param[out] pci_addr 1159 * PCI bus address output buffer. 1160 * 1161 * @return 1162 * 0 on success, a negative errno value otherwise and rte_errno is set. 1163 */ 1164 int 1165 mlx5_dev_to_pci_addr(const char *dev_path, 1166 struct rte_pci_addr *pci_addr) 1167 { 1168 FILE *file; 1169 char line[32]; 1170 MKSTR(path, "%s/device/uevent", dev_path); 1171 1172 file = fopen(path, "rb"); 1173 if (file == NULL) { 1174 rte_errno = errno; 1175 return -rte_errno; 1176 } 1177 while (fgets(line, sizeof(line), file) == line) { 1178 size_t len = strlen(line); 1179 int ret; 1180 1181 /* Truncate long lines. */ 1182 if (len == (sizeof(line) - 1)) 1183 while (line[(len - 1)] != '\n') { 1184 ret = fgetc(file); 1185 if (ret == EOF) 1186 break; 1187 line[(len - 1)] = ret; 1188 } 1189 /* Extract information. */ 1190 if (sscanf(line, 1191 "PCI_SLOT_NAME=" 1192 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1193 &pci_addr->domain, 1194 &pci_addr->bus, 1195 &pci_addr->devid, 1196 &pci_addr->function) == 4) { 1197 ret = 0; 1198 break; 1199 } 1200 } 1201 fclose(file); 1202 return 0; 1203 } 1204 1205 /** 1206 * Handle asynchronous removal event for entire multiport device. 1207 * 1208 * @param sh 1209 * Infiniband device shared context. 1210 */ 1211 static void 1212 mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) 1213 { 1214 uint32_t i; 1215 1216 for (i = 0; i < sh->max_port; ++i) { 1217 struct rte_eth_dev *dev; 1218 1219 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { 1220 /* 1221 * Or not existing port either no 1222 * handler installed for this port. 1223 */ 1224 continue; 1225 } 1226 dev = &rte_eth_devices[sh->port[i].ih_port_id]; 1227 assert(dev); 1228 if (dev->data->dev_conf.intr_conf.rmv) 1229 _rte_eth_dev_callback_process 1230 (dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1231 } 1232 } 1233 1234 /** 1235 * Handle shared asynchronous events the NIC (removal event 1236 * and link status change). Supports multiport IB device. 1237 * 1238 * @param cb_arg 1239 * Callback argument. 1240 */ 1241 void 1242 mlx5_dev_interrupt_handler(void *cb_arg) 1243 { 1244 struct mlx5_ibv_shared *sh = cb_arg; 1245 struct ibv_async_event event; 1246 1247 /* Read all message from the IB device and acknowledge them. */ 1248 for (;;) { 1249 struct rte_eth_dev *dev; 1250 uint32_t tmp; 1251 1252 if (mlx5_glue->get_async_event(sh->ctx, &event)) 1253 break; 1254 /* Retrieve and check IB port index. */ 1255 tmp = (uint32_t)event.element.port_num; 1256 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) { 1257 /* 1258 * The DEVICE_FATAL event is called once for 1259 * entire device without port specifying. 1260 * We should notify all existing ports. 1261 */ 1262 mlx5_glue->ack_async_event(&event); 1263 mlx5_dev_interrupt_device_fatal(sh); 1264 continue; 1265 } 1266 assert(tmp && (tmp <= sh->max_port)); 1267 if (!tmp) { 1268 /* Unsupported devive level event. */ 1269 mlx5_glue->ack_async_event(&event); 1270 DRV_LOG(DEBUG, 1271 "unsupported common event (type %d)", 1272 event.event_type); 1273 continue; 1274 } 1275 if (tmp > sh->max_port) { 1276 /* Invalid IB port index. */ 1277 mlx5_glue->ack_async_event(&event); 1278 DRV_LOG(DEBUG, 1279 "cannot handle an event (type %d)" 1280 "due to invalid IB port index (%u)", 1281 event.event_type, tmp); 1282 continue; 1283 } 1284 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { 1285 /* No handler installed. */ 1286 mlx5_glue->ack_async_event(&event); 1287 DRV_LOG(DEBUG, 1288 "cannot handle an event (type %d)" 1289 "due to no handler installed for port %u", 1290 event.event_type, tmp); 1291 continue; 1292 } 1293 /* Retrieve ethernet device descriptor. */ 1294 tmp = sh->port[tmp - 1].ih_port_id; 1295 dev = &rte_eth_devices[tmp]; 1296 assert(dev); 1297 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1298 event.event_type == IBV_EVENT_PORT_ERR) && 1299 dev->data->dev_conf.intr_conf.lsc) { 1300 mlx5_glue->ack_async_event(&event); 1301 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1302 usleep(0); 1303 continue; 1304 } 1305 _rte_eth_dev_callback_process 1306 (dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1307 continue; 1308 } 1309 DRV_LOG(DEBUG, 1310 "port %u cannot handle an unknown event (type %d)", 1311 dev->data->port_id, event.event_type); 1312 mlx5_glue->ack_async_event(&event); 1313 } 1314 } 1315 1316 /* 1317 * Unregister callback handler safely. The handler may be active 1318 * while we are trying to unregister it, in this case code -EAGAIN 1319 * is returned by rte_intr_callback_unregister(). This routine checks 1320 * the return code and tries to unregister handler again. 1321 * 1322 * @param handle 1323 * interrupt handle 1324 * @param cb_fn 1325 * pointer to callback routine 1326 * @cb_arg 1327 * opaque callback parameter 1328 */ 1329 void 1330 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle, 1331 rte_intr_callback_fn cb_fn, void *cb_arg) 1332 { 1333 /* 1334 * Try to reduce timeout management overhead by not calling 1335 * the timer related routines on the first iteration. If the 1336 * unregistering succeeds on first call there will be no 1337 * timer calls at all. 1338 */ 1339 uint64_t twait = 0; 1340 uint64_t start = 0; 1341 1342 do { 1343 int ret; 1344 1345 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg); 1346 if (ret >= 0) 1347 return; 1348 if (ret != -EAGAIN) { 1349 DRV_LOG(INFO, "failed to unregister interrupt" 1350 " handler (error: %d)", ret); 1351 assert(false); 1352 return; 1353 } 1354 if (twait) { 1355 struct timespec onems; 1356 1357 /* Wait one millisecond and try again. */ 1358 onems.tv_sec = 0; 1359 onems.tv_nsec = NS_PER_S / MS_PER_S; 1360 nanosleep(&onems, 0); 1361 /* Check whether one second elapsed. */ 1362 if ((rte_get_timer_cycles() - start) <= twait) 1363 continue; 1364 } else { 1365 /* 1366 * We get the amount of timer ticks for one second. 1367 * If this amount elapsed it means we spent one 1368 * second in waiting. This branch is executed once 1369 * on first iteration. 1370 */ 1371 twait = rte_get_timer_hz(); 1372 assert(twait); 1373 } 1374 /* 1375 * Timeout elapsed, show message (once a second) and retry. 1376 * We have no other acceptable option here, if we ignore 1377 * the unregistering return code the handler will not 1378 * be unregistered, fd will be closed and we may get the 1379 * crush. Hanging and messaging in the loop seems not to be 1380 * the worst choice. 1381 */ 1382 DRV_LOG(INFO, "Retrying to unregister interrupt handler"); 1383 start = rte_get_timer_cycles(); 1384 } while (true); 1385 } 1386 1387 /** 1388 * Handle DEVX interrupts from the NIC. 1389 * This function is probably called from the DPDK host thread. 1390 * 1391 * @param cb_arg 1392 * Callback argument. 1393 */ 1394 void 1395 mlx5_dev_interrupt_handler_devx(void *cb_arg) 1396 { 1397 #ifndef HAVE_IBV_DEVX_ASYNC 1398 (void)cb_arg; 1399 return; 1400 #else 1401 struct mlx5_ibv_shared *sh = cb_arg; 1402 union { 1403 struct mlx5dv_devx_async_cmd_hdr cmd_resp; 1404 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) + 1405 MLX5_ST_SZ_BYTES(traffic_counter) + 1406 sizeof(struct mlx5dv_devx_async_cmd_hdr)]; 1407 } out; 1408 uint8_t *buf = out.buf + sizeof(out.cmd_resp); 1409 1410 while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp, 1411 &out.cmd_resp, 1412 sizeof(out.buf))) 1413 mlx5_flow_async_pool_query_handle 1414 (sh, (uint64_t)out.cmd_resp.wr_id, 1415 mlx5_devx_get_out_command_status(buf)); 1416 #endif /* HAVE_IBV_DEVX_ASYNC */ 1417 } 1418 1419 /** 1420 * Uninstall shared asynchronous device events handler. 1421 * This function is implemented to support event sharing 1422 * between multiple ports of single IB device. 1423 * 1424 * @param dev 1425 * Pointer to Ethernet device. 1426 */ 1427 static void 1428 mlx5_dev_shared_handler_uninstall(struct rte_eth_dev *dev) 1429 { 1430 struct mlx5_priv *priv = dev->data->dev_private; 1431 struct mlx5_ibv_shared *sh = priv->sh; 1432 1433 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1434 return; 1435 pthread_mutex_lock(&sh->intr_mutex); 1436 assert(priv->ibv_port); 1437 assert(priv->ibv_port <= sh->max_port); 1438 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1439 if (sh->port[priv->ibv_port - 1].ih_port_id >= RTE_MAX_ETHPORTS) 1440 goto exit; 1441 assert(sh->port[priv->ibv_port - 1].ih_port_id == 1442 (uint32_t)dev->data->port_id); 1443 assert(sh->intr_cnt); 1444 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1445 if (!sh->intr_cnt || --sh->intr_cnt) 1446 goto exit; 1447 mlx5_intr_callback_unregister(&sh->intr_handle, 1448 mlx5_dev_interrupt_handler, sh); 1449 sh->intr_handle.fd = 0; 1450 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1451 exit: 1452 pthread_mutex_unlock(&sh->intr_mutex); 1453 } 1454 1455 /** 1456 * Uninstall devx shared asynchronous device events handler. 1457 * This function is implemeted to support event sharing 1458 * between multiple ports of single IB device. 1459 * 1460 * @param dev 1461 * Pointer to Ethernet device. 1462 */ 1463 static void 1464 mlx5_dev_shared_handler_devx_uninstall(struct rte_eth_dev *dev) 1465 { 1466 struct mlx5_priv *priv = dev->data->dev_private; 1467 struct mlx5_ibv_shared *sh = priv->sh; 1468 1469 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1470 return; 1471 pthread_mutex_lock(&sh->intr_mutex); 1472 assert(priv->ibv_port); 1473 assert(priv->ibv_port <= sh->max_port); 1474 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1475 if (sh->port[priv->ibv_port - 1].devx_ih_port_id >= RTE_MAX_ETHPORTS) 1476 goto exit; 1477 assert(sh->port[priv->ibv_port - 1].devx_ih_port_id == 1478 (uint32_t)dev->data->port_id); 1479 sh->port[priv->ibv_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1480 if (!sh->devx_intr_cnt || --sh->devx_intr_cnt) 1481 goto exit; 1482 if (sh->intr_handle_devx.fd) { 1483 rte_intr_callback_unregister(&sh->intr_handle_devx, 1484 mlx5_dev_interrupt_handler_devx, 1485 sh); 1486 sh->intr_handle_devx.fd = 0; 1487 sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN; 1488 } 1489 if (sh->devx_comp) { 1490 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp); 1491 sh->devx_comp = NULL; 1492 } 1493 exit: 1494 pthread_mutex_unlock(&sh->intr_mutex); 1495 } 1496 1497 /** 1498 * Install shared asynchronous device events handler. 1499 * This function is implemented to support event sharing 1500 * between multiple ports of single IB device. 1501 * 1502 * @param dev 1503 * Pointer to Ethernet device. 1504 */ 1505 static void 1506 mlx5_dev_shared_handler_install(struct rte_eth_dev *dev) 1507 { 1508 struct mlx5_priv *priv = dev->data->dev_private; 1509 struct mlx5_ibv_shared *sh = priv->sh; 1510 int ret; 1511 int flags; 1512 1513 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1514 return; 1515 pthread_mutex_lock(&sh->intr_mutex); 1516 assert(priv->ibv_port); 1517 assert(priv->ibv_port <= sh->max_port); 1518 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1519 if (sh->port[priv->ibv_port - 1].ih_port_id < RTE_MAX_ETHPORTS) { 1520 /* The handler is already installed for this port. */ 1521 assert(sh->intr_cnt); 1522 goto exit; 1523 } 1524 if (sh->intr_cnt) { 1525 sh->port[priv->ibv_port - 1].ih_port_id = 1526 (uint32_t)dev->data->port_id; 1527 sh->intr_cnt++; 1528 goto exit; 1529 } 1530 /* No shared handler installed. */ 1531 assert(sh->ctx->async_fd > 0); 1532 flags = fcntl(sh->ctx->async_fd, F_GETFL); 1533 ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1534 if (ret) { 1535 DRV_LOG(INFO, "failed to change file descriptor async event" 1536 " queue"); 1537 /* Indicate there will be no interrupts. */ 1538 dev->data->dev_conf.intr_conf.lsc = 0; 1539 dev->data->dev_conf.intr_conf.rmv = 0; 1540 } else { 1541 sh->intr_handle.fd = sh->ctx->async_fd; 1542 sh->intr_handle.type = RTE_INTR_HANDLE_EXT; 1543 rte_intr_callback_register(&sh->intr_handle, 1544 mlx5_dev_interrupt_handler, sh); 1545 sh->intr_cnt++; 1546 sh->port[priv->ibv_port - 1].ih_port_id = 1547 (uint32_t)dev->data->port_id; 1548 } 1549 exit: 1550 pthread_mutex_unlock(&sh->intr_mutex); 1551 } 1552 1553 /** 1554 * Install devx shared asyncronous device events handler. 1555 * This function is implemeted to support event sharing 1556 * between multiple ports of single IB device. 1557 * 1558 * @param dev 1559 * Pointer to Ethernet device. 1560 */ 1561 static void 1562 mlx5_dev_shared_handler_devx_install(struct rte_eth_dev *dev) 1563 { 1564 struct mlx5_priv *priv = dev->data->dev_private; 1565 struct mlx5_ibv_shared *sh = priv->sh; 1566 1567 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1568 return; 1569 pthread_mutex_lock(&sh->intr_mutex); 1570 assert(priv->ibv_port); 1571 assert(priv->ibv_port <= sh->max_port); 1572 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1573 if (sh->port[priv->ibv_port - 1].devx_ih_port_id < RTE_MAX_ETHPORTS) { 1574 /* The handler is already installed for this port. */ 1575 assert(sh->devx_intr_cnt); 1576 goto exit; 1577 } 1578 if (sh->devx_intr_cnt) { 1579 sh->devx_intr_cnt++; 1580 sh->port[priv->ibv_port - 1].devx_ih_port_id = 1581 (uint32_t)dev->data->port_id; 1582 goto exit; 1583 } 1584 if (priv->config.devx) { 1585 #ifndef HAVE_IBV_DEVX_ASYNC 1586 goto exit; 1587 #else 1588 sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx); 1589 if (sh->devx_comp) { 1590 int flags = fcntl(sh->devx_comp->fd, F_GETFL); 1591 int ret = fcntl(sh->devx_comp->fd, F_SETFL, 1592 flags | O_NONBLOCK); 1593 1594 if (ret) { 1595 DRV_LOG(INFO, "failed to change file descriptor" 1596 " devx async event queue"); 1597 } else { 1598 sh->intr_handle_devx.fd = sh->devx_comp->fd; 1599 sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT; 1600 rte_intr_callback_register 1601 (&sh->intr_handle_devx, 1602 mlx5_dev_interrupt_handler_devx, sh); 1603 sh->devx_intr_cnt++; 1604 sh->port[priv->ibv_port - 1].devx_ih_port_id = 1605 (uint32_t)dev->data->port_id; 1606 } 1607 } 1608 #endif /* HAVE_IBV_DEVX_ASYNC */ 1609 } 1610 exit: 1611 pthread_mutex_unlock(&sh->intr_mutex); 1612 } 1613 1614 /** 1615 * Uninstall interrupt handler. 1616 * 1617 * @param dev 1618 * Pointer to Ethernet device. 1619 */ 1620 void 1621 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1622 { 1623 mlx5_dev_shared_handler_uninstall(dev); 1624 } 1625 1626 /** 1627 * Install interrupt handler. 1628 * 1629 * @param dev 1630 * Pointer to Ethernet device. 1631 */ 1632 void 1633 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1634 { 1635 mlx5_dev_shared_handler_install(dev); 1636 } 1637 1638 /** 1639 * Devx uninstall interrupt handler. 1640 * 1641 * @param dev 1642 * Pointer to Ethernet device. 1643 */ 1644 void 1645 mlx5_dev_interrupt_handler_devx_uninstall(struct rte_eth_dev *dev) 1646 { 1647 mlx5_dev_shared_handler_devx_uninstall(dev); 1648 } 1649 1650 /** 1651 * Devx install interrupt handler. 1652 * 1653 * @param dev 1654 * Pointer to Ethernet device. 1655 */ 1656 void 1657 mlx5_dev_interrupt_handler_devx_install(struct rte_eth_dev *dev) 1658 { 1659 mlx5_dev_shared_handler_devx_install(dev); 1660 } 1661 1662 /** 1663 * DPDK callback to bring the link DOWN. 1664 * 1665 * @param dev 1666 * Pointer to Ethernet device structure. 1667 * 1668 * @return 1669 * 0 on success, a negative errno value otherwise and rte_errno is set. 1670 */ 1671 int 1672 mlx5_set_link_down(struct rte_eth_dev *dev) 1673 { 1674 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1675 } 1676 1677 /** 1678 * DPDK callback to bring the link UP. 1679 * 1680 * @param dev 1681 * Pointer to Ethernet device structure. 1682 * 1683 * @return 1684 * 0 on success, a negative errno value otherwise and rte_errno is set. 1685 */ 1686 int 1687 mlx5_set_link_up(struct rte_eth_dev *dev) 1688 { 1689 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1690 } 1691 1692 /** 1693 * Configure the RX function to use. 1694 * 1695 * @param dev 1696 * Pointer to private data structure. 1697 * 1698 * @return 1699 * Pointer to selected Rx burst function. 1700 */ 1701 eth_rx_burst_t 1702 mlx5_select_rx_function(struct rte_eth_dev *dev) 1703 { 1704 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1705 1706 assert(dev != NULL); 1707 if (mlx5_check_vec_rx_support(dev) > 0) { 1708 rx_pkt_burst = mlx5_rx_burst_vec; 1709 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1710 dev->data->port_id); 1711 } else if (mlx5_mprq_enabled(dev)) { 1712 rx_pkt_burst = mlx5_rx_burst_mprq; 1713 } 1714 return rx_pkt_burst; 1715 } 1716 1717 /** 1718 * Check if mlx5 device was removed. 1719 * 1720 * @param dev 1721 * Pointer to Ethernet device structure. 1722 * 1723 * @return 1724 * 1 when device is removed, otherwise 0. 1725 */ 1726 int 1727 mlx5_is_removed(struct rte_eth_dev *dev) 1728 { 1729 struct ibv_device_attr device_attr; 1730 struct mlx5_priv *priv = dev->data->dev_private; 1731 1732 if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO) 1733 return 1; 1734 return 0; 1735 } 1736 1737 /** 1738 * Get the E-Switch parameters by port id. 1739 * 1740 * @param[in] port 1741 * Device port id. 1742 * @param[out] es_domain_id 1743 * E-Switch domain id. 1744 * @param[out] es_port_id 1745 * The port id of the port in the E-Switch. 1746 * 1747 * @return 1748 * pointer to device private data structure containing data needed 1749 * on success, NULL otherwise and rte_errno is set. 1750 */ 1751 struct mlx5_priv * 1752 mlx5_port_to_eswitch_info(uint16_t port) 1753 { 1754 struct rte_eth_dev *dev; 1755 struct mlx5_priv *priv; 1756 1757 if (port >= RTE_MAX_ETHPORTS) { 1758 rte_errno = EINVAL; 1759 return NULL; 1760 } 1761 if (!rte_eth_dev_is_valid_port(port)) { 1762 rte_errno = ENODEV; 1763 return NULL; 1764 } 1765 dev = &rte_eth_devices[port]; 1766 priv = dev->data->dev_private; 1767 if (!(priv->representor || priv->master)) { 1768 rte_errno = EINVAL; 1769 return NULL; 1770 } 1771 return priv; 1772 } 1773 1774 /** 1775 * Get the E-Switch parameters by device instance. 1776 * 1777 * @param[in] port 1778 * Device port id. 1779 * @param[out] es_domain_id 1780 * E-Switch domain id. 1781 * @param[out] es_port_id 1782 * The port id of the port in the E-Switch. 1783 * 1784 * @return 1785 * pointer to device private data structure containing data needed 1786 * on success, NULL otherwise and rte_errno is set. 1787 */ 1788 struct mlx5_priv * 1789 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 1790 { 1791 struct mlx5_priv *priv; 1792 1793 priv = dev->data->dev_private; 1794 if (!(priv->representor || priv->master)) { 1795 rte_errno = EINVAL; 1796 return NULL; 1797 } 1798 return priv; 1799 } 1800 1801 /** 1802 * Get switch information associated with network interface. 1803 * 1804 * @param ifindex 1805 * Network interface index. 1806 * @param[out] info 1807 * Switch information object, populated in case of success. 1808 * 1809 * @return 1810 * 0 on success, a negative errno value otherwise and rte_errno is set. 1811 */ 1812 int 1813 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1814 { 1815 char ifname[IF_NAMESIZE]; 1816 char port_name[IF_NAMESIZE]; 1817 FILE *file; 1818 struct mlx5_switch_info data = { 1819 .master = 0, 1820 .representor = 0, 1821 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1822 .port_name = 0, 1823 .switch_id = 0, 1824 }; 1825 DIR *dir; 1826 bool port_switch_id_set = false; 1827 bool device_dir = false; 1828 char c; 1829 int ret; 1830 1831 if (!if_indextoname(ifindex, ifname)) { 1832 rte_errno = errno; 1833 return -rte_errno; 1834 } 1835 1836 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1837 ifname); 1838 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1839 ifname); 1840 MKSTR(pci_device, "/sys/class/net/%s/device", 1841 ifname); 1842 1843 file = fopen(phys_port_name, "rb"); 1844 if (file != NULL) { 1845 ret = fscanf(file, "%s", port_name); 1846 fclose(file); 1847 if (ret == 1) 1848 mlx5_translate_port_name(port_name, &data); 1849 } 1850 file = fopen(phys_switch_id, "rb"); 1851 if (file == NULL) { 1852 rte_errno = errno; 1853 return -rte_errno; 1854 } 1855 port_switch_id_set = 1856 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1857 c == '\n'; 1858 fclose(file); 1859 dir = opendir(pci_device); 1860 if (dir != NULL) { 1861 closedir(dir); 1862 device_dir = true; 1863 } 1864 if (port_switch_id_set) { 1865 /* We have some E-Switch configuration. */ 1866 mlx5_sysfs_check_switch_info(device_dir, &data); 1867 } 1868 *info = data; 1869 assert(!(data.master && data.representor)); 1870 if (data.master && data.representor) { 1871 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1872 " and as representor", ifindex); 1873 rte_errno = ENODEV; 1874 return -rte_errno; 1875 } 1876 return 0; 1877 } 1878 1879 /** 1880 * Analyze gathered port parameters via Netlink to recognize master 1881 * and representor devices for E-Switch configuration. 1882 * 1883 * @param[in] num_vf_set 1884 * flag of presence of number of VFs port attribute. 1885 * @param[inout] switch_info 1886 * Port information, including port name as a number and port name 1887 * type if recognized 1888 * 1889 * @return 1890 * master and representor flags are set in switch_info according to 1891 * recognized parameters (if any). 1892 */ 1893 void 1894 mlx5_nl_check_switch_info(bool num_vf_set, 1895 struct mlx5_switch_info *switch_info) 1896 { 1897 switch (switch_info->name_type) { 1898 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1899 /* 1900 * Name is not recognized, assume the master, 1901 * check the number of VFs key presence. 1902 */ 1903 switch_info->master = num_vf_set; 1904 break; 1905 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1906 /* 1907 * Name is not set, this assumes the legacy naming 1908 * schema for master, just check if there is a 1909 * number of VFs key. 1910 */ 1911 switch_info->master = num_vf_set; 1912 break; 1913 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1914 /* New uplink naming schema recognized. */ 1915 switch_info->master = 1; 1916 break; 1917 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1918 /* Legacy representors naming schema. */ 1919 switch_info->representor = !num_vf_set; 1920 break; 1921 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1922 /* New representors naming schema. */ 1923 switch_info->representor = 1; 1924 break; 1925 } 1926 } 1927 1928 /** 1929 * Analyze gathered port parameters via sysfs to recognize master 1930 * and representor devices for E-Switch configuration. 1931 * 1932 * @param[in] device_dir 1933 * flag of presence of "device" directory under port device key. 1934 * @param[inout] switch_info 1935 * Port information, including port name as a number and port name 1936 * type if recognized 1937 * 1938 * @return 1939 * master and representor flags are set in switch_info according to 1940 * recognized parameters (if any). 1941 */ 1942 void 1943 mlx5_sysfs_check_switch_info(bool device_dir, 1944 struct mlx5_switch_info *switch_info) 1945 { 1946 switch (switch_info->name_type) { 1947 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1948 /* 1949 * Name is not recognized, assume the master, 1950 * check the device directory presence. 1951 */ 1952 switch_info->master = device_dir; 1953 break; 1954 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1955 /* 1956 * Name is not set, this assumes the legacy naming 1957 * schema for master, just check if there is 1958 * a device directory. 1959 */ 1960 switch_info->master = device_dir; 1961 break; 1962 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1963 /* New uplink naming schema recognized. */ 1964 switch_info->master = 1; 1965 break; 1966 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1967 /* Legacy representors naming schema. */ 1968 switch_info->representor = !device_dir; 1969 break; 1970 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1971 /* New representors naming schema. */ 1972 switch_info->representor = 1; 1973 break; 1974 } 1975 } 1976 1977 /** 1978 * Extract port name, as a number, from sysfs or netlink information. 1979 * 1980 * @param[in] port_name_in 1981 * String representing the port name. 1982 * @param[out] port_info_out 1983 * Port information, including port name as a number and port name 1984 * type if recognized 1985 * 1986 * @return 1987 * port_name field set according to recognized name format. 1988 */ 1989 void 1990 mlx5_translate_port_name(const char *port_name_in, 1991 struct mlx5_switch_info *port_info_out) 1992 { 1993 char pf_c1, pf_c2, vf_c1, vf_c2; 1994 char *end; 1995 int sc_items; 1996 1997 /* 1998 * Check for port-name as a string of the form pf0vf0 1999 * (support kernel ver >= 5.0 or OFED ver >= 4.6). 2000 */ 2001 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", 2002 &pf_c1, &pf_c2, &port_info_out->pf_num, 2003 &vf_c1, &vf_c2, &port_info_out->port_name); 2004 if (sc_items == 6 && 2005 pf_c1 == 'p' && pf_c2 == 'f' && 2006 vf_c1 == 'v' && vf_c2 == 'f') { 2007 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; 2008 return; 2009 } 2010 /* 2011 * Check for port-name as a string of the form p0 2012 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 2013 */ 2014 sc_items = sscanf(port_name_in, "%c%d", 2015 &pf_c1, &port_info_out->port_name); 2016 if (sc_items == 2 && pf_c1 == 'p') { 2017 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 2018 return; 2019 } 2020 /* Check for port-name as a number (support kernel ver < 5.0 */ 2021 errno = 0; 2022 port_info_out->port_name = strtol(port_name_in, &end, 0); 2023 if (!errno && 2024 (size_t)(end - port_name_in) == strlen(port_name_in)) { 2025 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 2026 return; 2027 } 2028 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 2029 return; 2030 } 2031 2032 /** 2033 * DPDK callback to retrieve plug-in module EEPROM information (type and size). 2034 * 2035 * @param dev 2036 * Pointer to Ethernet device structure. 2037 * @param[out] modinfo 2038 * Storage for plug-in module EEPROM information. 2039 * 2040 * @return 2041 * 0 on success, a negative errno value otherwise and rte_errno is set. 2042 */ 2043 int 2044 mlx5_get_module_info(struct rte_eth_dev *dev, 2045 struct rte_eth_dev_module_info *modinfo) 2046 { 2047 struct ethtool_modinfo info = { 2048 .cmd = ETHTOOL_GMODULEINFO, 2049 }; 2050 struct ifreq ifr = (struct ifreq) { 2051 .ifr_data = (void *)&info, 2052 }; 2053 int ret = 0; 2054 2055 if (!dev || !modinfo) { 2056 DRV_LOG(WARNING, "missing argument, cannot get module info"); 2057 rte_errno = EINVAL; 2058 return -rte_errno; 2059 } 2060 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2061 if (ret) { 2062 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 2063 dev->data->port_id, strerror(rte_errno)); 2064 return ret; 2065 } 2066 modinfo->type = info.type; 2067 modinfo->eeprom_len = info.eeprom_len; 2068 return ret; 2069 } 2070 2071 /** 2072 * DPDK callback to retrieve plug-in module EEPROM data. 2073 * 2074 * @param dev 2075 * Pointer to Ethernet device structure. 2076 * @param[out] info 2077 * Storage for plug-in module EEPROM data. 2078 * 2079 * @return 2080 * 0 on success, a negative errno value otherwise and rte_errno is set. 2081 */ 2082 int mlx5_get_module_eeprom(struct rte_eth_dev *dev, 2083 struct rte_dev_eeprom_info *info) 2084 { 2085 struct ethtool_eeprom *eeprom; 2086 struct ifreq ifr; 2087 int ret = 0; 2088 2089 if (!dev || !info) { 2090 DRV_LOG(WARNING, "missing argument, cannot get module eeprom"); 2091 rte_errno = EINVAL; 2092 return -rte_errno; 2093 } 2094 eeprom = rte_calloc(__func__, 1, 2095 (sizeof(struct ethtool_eeprom) + info->length), 0); 2096 if (!eeprom) { 2097 DRV_LOG(WARNING, "port %u cannot allocate memory for " 2098 "eeprom data", dev->data->port_id); 2099 rte_errno = ENOMEM; 2100 return -rte_errno; 2101 } 2102 eeprom->cmd = ETHTOOL_GMODULEEEPROM; 2103 eeprom->offset = info->offset; 2104 eeprom->len = info->length; 2105 ifr = (struct ifreq) { 2106 .ifr_data = (void *)eeprom, 2107 }; 2108 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2109 if (ret) 2110 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 2111 dev->data->port_id, strerror(rte_errno)); 2112 else 2113 rte_memcpy(info->data, eeprom->data, info->length); 2114 rte_free(eeprom); 2115 return ret; 2116 } 2117