1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <inttypes.h> 9 #include <unistd.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <stdlib.h> 15 #include <errno.h> 16 #include <dirent.h> 17 #include <net/if.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <netinet/in.h> 21 #include <linux/ethtool.h> 22 #include <linux/sockios.h> 23 #include <fcntl.h> 24 #include <stdalign.h> 25 #include <sys/un.h> 26 #include <time.h> 27 28 #include <rte_atomic.h> 29 #include <rte_ethdev_driver.h> 30 #include <rte_bus_pci.h> 31 #include <rte_mbuf.h> 32 #include <rte_common.h> 33 #include <rte_interrupts.h> 34 #include <rte_malloc.h> 35 #include <rte_string_fns.h> 36 #include <rte_rwlock.h> 37 #include <rte_cycles.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_devx_cmds.h" 42 #include "mlx5_rxtx.h" 43 #include "mlx5_utils.h" 44 45 /* Supported speed values found in /usr/include/linux/ethtool.h */ 46 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 47 #define SUPPORTED_40000baseKR4_Full (1 << 23) 48 #endif 49 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 50 #define SUPPORTED_40000baseCR4_Full (1 << 24) 51 #endif 52 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 53 #define SUPPORTED_40000baseSR4_Full (1 << 25) 54 #endif 55 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 56 #define SUPPORTED_40000baseLR4_Full (1 << 26) 57 #endif 58 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 59 #define SUPPORTED_56000baseKR4_Full (1 << 27) 60 #endif 61 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 62 #define SUPPORTED_56000baseCR4_Full (1 << 28) 63 #endif 64 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 65 #define SUPPORTED_56000baseSR4_Full (1 << 29) 66 #endif 67 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 68 #define SUPPORTED_56000baseLR4_Full (1 << 30) 69 #endif 70 71 /* Add defines in case the running kernel is not the same as user headers. */ 72 #ifndef ETHTOOL_GLINKSETTINGS 73 struct ethtool_link_settings { 74 uint32_t cmd; 75 uint32_t speed; 76 uint8_t duplex; 77 uint8_t port; 78 uint8_t phy_address; 79 uint8_t autoneg; 80 uint8_t mdio_support; 81 uint8_t eth_to_mdix; 82 uint8_t eth_tp_mdix_ctrl; 83 int8_t link_mode_masks_nwords; 84 uint32_t reserved[8]; 85 uint32_t link_mode_masks[]; 86 }; 87 88 #define ETHTOOL_GLINKSETTINGS 0x0000004c 89 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 90 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 91 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 92 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 93 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 94 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 95 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 96 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 97 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 98 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 99 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 100 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 101 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 102 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 103 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 104 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 105 #endif 106 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 107 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 108 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 109 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 110 #endif 111 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 112 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 113 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 114 #endif 115 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 116 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 117 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 118 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 119 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 120 #endif 121 122 /** 123 * Get master interface name from private structure. 124 * 125 * @param[in] dev 126 * Pointer to Ethernet device. 127 * @param[out] ifname 128 * Interface name output buffer. 129 * 130 * @return 131 * 0 on success, a negative errno value otherwise and rte_errno is set. 132 */ 133 int 134 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE]) 135 { 136 DIR *dir; 137 struct dirent *dent; 138 unsigned int dev_type = 0; 139 unsigned int dev_port_prev = ~0u; 140 char match[IF_NAMESIZE] = ""; 141 142 assert(ibdev_path); 143 { 144 MKSTR(path, "%s/device/net", ibdev_path); 145 146 dir = opendir(path); 147 if (dir == NULL) { 148 rte_errno = errno; 149 return -rte_errno; 150 } 151 } 152 while ((dent = readdir(dir)) != NULL) { 153 char *name = dent->d_name; 154 FILE *file; 155 unsigned int dev_port; 156 int r; 157 158 if ((name[0] == '.') && 159 ((name[1] == '\0') || 160 ((name[1] == '.') && (name[2] == '\0')))) 161 continue; 162 163 MKSTR(path, "%s/device/net/%s/%s", 164 ibdev_path, name, 165 (dev_type ? "dev_id" : "dev_port")); 166 167 file = fopen(path, "rb"); 168 if (file == NULL) { 169 if (errno != ENOENT) 170 continue; 171 /* 172 * Switch to dev_id when dev_port does not exist as 173 * is the case with Linux kernel versions < 3.15. 174 */ 175 try_dev_id: 176 match[0] = '\0'; 177 if (dev_type) 178 break; 179 dev_type = 1; 180 dev_port_prev = ~0u; 181 rewinddir(dir); 182 continue; 183 } 184 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 185 fclose(file); 186 if (r != 1) 187 continue; 188 /* 189 * Switch to dev_id when dev_port returns the same value for 190 * all ports. May happen when using a MOFED release older than 191 * 3.0 with a Linux kernel >= 3.15. 192 */ 193 if (dev_port == dev_port_prev) 194 goto try_dev_id; 195 dev_port_prev = dev_port; 196 if (dev_port == 0) 197 strlcpy(match, name, sizeof(match)); 198 } 199 closedir(dir); 200 if (match[0] == '\0') { 201 rte_errno = ENOENT; 202 return -rte_errno; 203 } 204 strncpy(*ifname, match, sizeof(*ifname)); 205 return 0; 206 } 207 208 /** 209 * Get interface name from private structure. 210 * 211 * This is a port representor-aware version of mlx5_get_master_ifname(). 212 * 213 * @param[in] dev 214 * Pointer to Ethernet device. 215 * @param[out] ifname 216 * Interface name output buffer. 217 * 218 * @return 219 * 0 on success, a negative errno value otherwise and rte_errno is set. 220 */ 221 int 222 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 223 { 224 struct mlx5_priv *priv = dev->data->dev_private; 225 unsigned int ifindex; 226 227 assert(priv); 228 assert(priv->sh); 229 ifindex = mlx5_ifindex(dev); 230 if (!ifindex) { 231 if (!priv->representor) 232 return mlx5_get_master_ifname(priv->sh->ibdev_path, 233 ifname); 234 rte_errno = ENXIO; 235 return -rte_errno; 236 } 237 if (if_indextoname(ifindex, &(*ifname)[0])) 238 return 0; 239 rte_errno = errno; 240 return -rte_errno; 241 } 242 243 /** 244 * Get the interface index from device name. 245 * 246 * @param[in] dev 247 * Pointer to Ethernet device. 248 * 249 * @return 250 * Nonzero interface index on success, zero otherwise and rte_errno is set. 251 */ 252 unsigned int 253 mlx5_ifindex(const struct rte_eth_dev *dev) 254 { 255 struct mlx5_priv *priv = dev->data->dev_private; 256 unsigned int ifindex; 257 258 assert(priv); 259 assert(priv->if_index); 260 ifindex = priv->if_index; 261 if (!ifindex) 262 rte_errno = ENXIO; 263 return ifindex; 264 } 265 266 /** 267 * Perform ifreq ioctl() on associated Ethernet device. 268 * 269 * @param[in] dev 270 * Pointer to Ethernet device. 271 * @param req 272 * Request number to pass to ioctl(). 273 * @param[out] ifr 274 * Interface request structure output buffer. 275 * 276 * @return 277 * 0 on success, a negative errno value otherwise and rte_errno is set. 278 */ 279 int 280 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 281 { 282 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 283 int ret = 0; 284 285 if (sock == -1) { 286 rte_errno = errno; 287 return -rte_errno; 288 } 289 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 290 if (ret) 291 goto error; 292 ret = ioctl(sock, req, ifr); 293 if (ret == -1) { 294 rte_errno = errno; 295 goto error; 296 } 297 close(sock); 298 return 0; 299 error: 300 close(sock); 301 return -rte_errno; 302 } 303 304 /** 305 * Get device MTU. 306 * 307 * @param dev 308 * Pointer to Ethernet device. 309 * @param[out] mtu 310 * MTU value output buffer. 311 * 312 * @return 313 * 0 on success, a negative errno value otherwise and rte_errno is set. 314 */ 315 int 316 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 317 { 318 struct ifreq request; 319 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 320 321 if (ret) 322 return ret; 323 *mtu = request.ifr_mtu; 324 return 0; 325 } 326 327 /** 328 * Set device MTU. 329 * 330 * @param dev 331 * Pointer to Ethernet device. 332 * @param mtu 333 * MTU value to set. 334 * 335 * @return 336 * 0 on success, a negative errno value otherwise and rte_errno is set. 337 */ 338 static int 339 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 340 { 341 struct ifreq request = { .ifr_mtu = mtu, }; 342 343 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 344 } 345 346 /** 347 * Set device flags. 348 * 349 * @param dev 350 * Pointer to Ethernet device. 351 * @param keep 352 * Bitmask for flags that must remain untouched. 353 * @param flags 354 * Bitmask for flags to modify. 355 * 356 * @return 357 * 0 on success, a negative errno value otherwise and rte_errno is set. 358 */ 359 int 360 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 361 { 362 struct ifreq request; 363 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 364 365 if (ret) 366 return ret; 367 request.ifr_flags &= keep; 368 request.ifr_flags |= flags & ~keep; 369 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 370 } 371 372 /** 373 * DPDK callback for Ethernet device configuration. 374 * 375 * @param dev 376 * Pointer to Ethernet device structure. 377 * 378 * @return 379 * 0 on success, a negative errno value otherwise and rte_errno is set. 380 */ 381 int 382 mlx5_dev_configure(struct rte_eth_dev *dev) 383 { 384 struct mlx5_priv *priv = dev->data->dev_private; 385 unsigned int rxqs_n = dev->data->nb_rx_queues; 386 unsigned int txqs_n = dev->data->nb_tx_queues; 387 const uint8_t use_app_rss_key = 388 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 389 int ret = 0; 390 391 if (use_app_rss_key && 392 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 393 MLX5_RSS_HASH_KEY_LEN)) { 394 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 395 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 396 rte_errno = EINVAL; 397 return -rte_errno; 398 } 399 priv->rss_conf.rss_key = 400 rte_realloc(priv->rss_conf.rss_key, 401 MLX5_RSS_HASH_KEY_LEN, 0); 402 if (!priv->rss_conf.rss_key) { 403 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 404 dev->data->port_id, rxqs_n); 405 rte_errno = ENOMEM; 406 return -rte_errno; 407 } 408 409 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) 410 dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; 411 412 memcpy(priv->rss_conf.rss_key, 413 use_app_rss_key ? 414 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 415 rss_hash_default_key, 416 MLX5_RSS_HASH_KEY_LEN); 417 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 418 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 419 priv->rxqs = (void *)dev->data->rx_queues; 420 priv->txqs = (void *)dev->data->tx_queues; 421 if (txqs_n != priv->txqs_n) { 422 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 423 dev->data->port_id, priv->txqs_n, txqs_n); 424 priv->txqs_n = txqs_n; 425 } 426 if (rxqs_n > priv->config.ind_table_max_size) { 427 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 428 dev->data->port_id, rxqs_n); 429 rte_errno = EINVAL; 430 return -rte_errno; 431 } 432 if (rxqs_n != priv->rxqs_n) { 433 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 434 dev->data->port_id, priv->rxqs_n, rxqs_n); 435 priv->rxqs_n = rxqs_n; 436 } 437 priv->skip_default_rss_reta = 0; 438 ret = mlx5_proc_priv_init(dev); 439 if (ret) 440 return ret; 441 return 0; 442 } 443 444 /** 445 * Configure default RSS reta. 446 * 447 * @param dev 448 * Pointer to Ethernet device structure. 449 * 450 * @return 451 * 0 on success, a negative errno value otherwise and rte_errno is set. 452 */ 453 int 454 mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev) 455 { 456 struct mlx5_priv *priv = dev->data->dev_private; 457 unsigned int rxqs_n = dev->data->nb_rx_queues; 458 unsigned int i; 459 unsigned int j; 460 unsigned int reta_idx_n; 461 int ret = 0; 462 unsigned int *rss_queue_arr = NULL; 463 unsigned int rss_queue_n = 0; 464 465 if (priv->skip_default_rss_reta) 466 return ret; 467 rss_queue_arr = rte_malloc("", rxqs_n * sizeof(unsigned int), 0); 468 if (!rss_queue_arr) { 469 DRV_LOG(ERR, "port %u cannot allocate RSS queue list (%u)", 470 dev->data->port_id, rxqs_n); 471 rte_errno = ENOMEM; 472 return -rte_errno; 473 } 474 for (i = 0, j = 0; i < rxqs_n; i++) { 475 struct mlx5_rxq_data *rxq_data; 476 struct mlx5_rxq_ctrl *rxq_ctrl; 477 478 rxq_data = (*priv->rxqs)[i]; 479 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 480 if (rxq_ctrl && rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) 481 rss_queue_arr[j++] = i; 482 } 483 rss_queue_n = j; 484 if (rss_queue_n > priv->config.ind_table_max_size) { 485 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 486 dev->data->port_id, rss_queue_n); 487 rte_errno = EINVAL; 488 rte_free(rss_queue_arr); 489 return -rte_errno; 490 } 491 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 492 dev->data->port_id, priv->rxqs_n, rxqs_n); 493 priv->rxqs_n = rxqs_n; 494 /* 495 * If the requested number of RX queues is not a power of two, 496 * use the maximum indirection table size for better balancing. 497 * The result is always rounded to the next power of two. 498 */ 499 reta_idx_n = (1 << log2above((rss_queue_n & (rss_queue_n - 1)) ? 500 priv->config.ind_table_max_size : 501 rss_queue_n)); 502 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 503 if (ret) { 504 rte_free(rss_queue_arr); 505 return ret; 506 } 507 /* 508 * When the number of RX queues is not a power of two, 509 * the remaining table entries are padded with reused WQs 510 * and hashes are not spread uniformly. 511 */ 512 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 513 (*priv->reta_idx)[i] = rss_queue_arr[j]; 514 if (++j == rss_queue_n) 515 j = 0; 516 } 517 rte_free(rss_queue_arr); 518 return ret; 519 } 520 521 /** 522 * Sets default tuning parameters. 523 * 524 * @param dev 525 * Pointer to Ethernet device. 526 * @param[out] info 527 * Info structure output buffer. 528 */ 529 static void 530 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 531 { 532 struct mlx5_priv *priv = dev->data->dev_private; 533 534 /* Minimum CPU utilization. */ 535 info->default_rxportconf.ring_size = 256; 536 info->default_txportconf.ring_size = 256; 537 info->default_rxportconf.burst_size = MLX5_RX_DEFAULT_BURST; 538 info->default_txportconf.burst_size = MLX5_TX_DEFAULT_BURST; 539 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 540 info->default_rxportconf.nb_queues = 16; 541 info->default_txportconf.nb_queues = 16; 542 if (dev->data->nb_rx_queues > 2 || 543 dev->data->nb_tx_queues > 2) { 544 /* Max Throughput. */ 545 info->default_rxportconf.ring_size = 2048; 546 info->default_txportconf.ring_size = 2048; 547 } 548 } else { 549 info->default_rxportconf.nb_queues = 8; 550 info->default_txportconf.nb_queues = 8; 551 if (dev->data->nb_rx_queues > 2 || 552 dev->data->nb_tx_queues > 2) { 553 /* Max Throughput. */ 554 info->default_rxportconf.ring_size = 4096; 555 info->default_txportconf.ring_size = 4096; 556 } 557 } 558 } 559 560 /** 561 * Sets tx mbuf limiting parameters. 562 * 563 * @param dev 564 * Pointer to Ethernet device. 565 * @param[out] info 566 * Info structure output buffer. 567 */ 568 static void 569 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 570 { 571 struct mlx5_priv *priv = dev->data->dev_private; 572 struct mlx5_dev_config *config = &priv->config; 573 unsigned int inlen; 574 uint16_t nb_max; 575 576 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 577 MLX5_SEND_DEF_INLINE_LEN : 578 (unsigned int)config->txq_inline_max; 579 assert(config->txq_inline_min >= 0); 580 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 581 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 582 MLX5_ESEG_MIN_INLINE_SIZE - 583 MLX5_WQE_CSEG_SIZE - 584 MLX5_WQE_ESEG_SIZE - 585 MLX5_WQE_DSEG_SIZE * 2); 586 nb_max = (MLX5_WQE_SIZE_MAX + 587 MLX5_ESEG_MIN_INLINE_SIZE - 588 MLX5_WQE_CSEG_SIZE - 589 MLX5_WQE_ESEG_SIZE - 590 MLX5_WQE_DSEG_SIZE - 591 inlen) / MLX5_WSEG_SIZE; 592 info->tx_desc_lim.nb_seg_max = nb_max; 593 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 594 } 595 596 /** 597 * DPDK callback to get information about the device. 598 * 599 * @param dev 600 * Pointer to Ethernet device structure. 601 * @param[out] info 602 * Info structure output buffer. 603 */ 604 int 605 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 606 { 607 struct mlx5_priv *priv = dev->data->dev_private; 608 struct mlx5_dev_config *config = &priv->config; 609 unsigned int max; 610 611 /* FIXME: we should ask the device for these values. */ 612 info->min_rx_bufsize = 32; 613 info->max_rx_pktlen = 65536; 614 info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; 615 /* 616 * Since we need one CQ per QP, the limit is the minimum number 617 * between the two values. 618 */ 619 max = RTE_MIN(priv->sh->device_attr.orig_attr.max_cq, 620 priv->sh->device_attr.orig_attr.max_qp); 621 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 622 if (max >= 65535) 623 max = 65535; 624 info->max_rx_queues = max; 625 info->max_tx_queues = max; 626 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 627 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 628 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 629 info->rx_queue_offload_capa); 630 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 631 info->if_index = mlx5_ifindex(dev); 632 info->reta_size = priv->reta_idx_n ? 633 priv->reta_idx_n : config->ind_table_max_size; 634 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 635 info->speed_capa = priv->link_speed_capa; 636 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 637 mlx5_set_default_params(dev, info); 638 mlx5_set_txlimit_params(dev, info); 639 info->switch_info.name = dev->data->name; 640 info->switch_info.domain_id = priv->domain_id; 641 info->switch_info.port_id = priv->representor_id; 642 if (priv->representor) { 643 uint16_t port_id; 644 645 if (priv->pf_bond >= 0) { 646 /* 647 * Switch port ID is opaque value with driver defined 648 * format. Push the PF index in bonding configurations 649 * in upper four bits of port ID. If we get too many 650 * representors (more than 4K) or PFs (more than 15) 651 * this approach must be reconsidered. 652 */ 653 if ((info->switch_info.port_id >> 654 MLX5_PORT_ID_BONDING_PF_SHIFT) || 655 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 656 DRV_LOG(ERR, "can't update switch port ID" 657 " for bonding device"); 658 assert(false); 659 return -ENODEV; 660 } 661 info->switch_info.port_id |= 662 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 663 } 664 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 665 struct mlx5_priv *opriv = 666 rte_eth_devices[port_id].data->dev_private; 667 668 if (!opriv || 669 opriv->representor || 670 opriv->sh != priv->sh || 671 opriv->domain_id != priv->domain_id) 672 continue; 673 /* 674 * Override switch name with that of the master 675 * device. 676 */ 677 info->switch_info.name = opriv->dev_data->name; 678 break; 679 } 680 } 681 return 0; 682 } 683 684 /** 685 * Get device current raw clock counter 686 * 687 * @param dev 688 * Pointer to Ethernet device structure. 689 * @param[out] time 690 * Current raw clock counter of the device. 691 * 692 * @return 693 * 0 if the clock has correctly been read 694 * The value of errno in case of error 695 */ 696 int 697 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock) 698 { 699 struct mlx5_priv *priv = dev->data->dev_private; 700 struct ibv_context *ctx = priv->sh->ctx; 701 struct ibv_values_ex values; 702 int err = 0; 703 704 values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK; 705 err = mlx5_glue->query_rt_values_ex(ctx, &values); 706 if (err != 0) { 707 DRV_LOG(WARNING, "Could not query the clock !"); 708 return err; 709 } 710 *clock = values.raw_clock.tv_nsec; 711 return 0; 712 } 713 714 /** 715 * Get firmware version of a device. 716 * 717 * @param dev 718 * Ethernet device port. 719 * @param fw_ver 720 * String output allocated by caller. 721 * @param fw_size 722 * Size of the output string, including terminating null byte. 723 * 724 * @return 725 * 0 on success, or the size of the non truncated string if too big. 726 */ 727 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 728 { 729 struct mlx5_priv *priv = dev->data->dev_private; 730 struct ibv_device_attr *attr = &priv->sh->device_attr.orig_attr; 731 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 732 733 if (fw_size < size) 734 return size; 735 if (fw_ver != NULL) 736 strlcpy(fw_ver, attr->fw_ver, fw_size); 737 return 0; 738 } 739 740 /** 741 * Get supported packet types. 742 * 743 * @param dev 744 * Pointer to Ethernet device structure. 745 * 746 * @return 747 * A pointer to the supported Packet types array. 748 */ 749 const uint32_t * 750 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 751 { 752 static const uint32_t ptypes[] = { 753 /* refers to rxq_cq_to_pkt_type() */ 754 RTE_PTYPE_L2_ETHER, 755 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 756 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 757 RTE_PTYPE_L4_NONFRAG, 758 RTE_PTYPE_L4_FRAG, 759 RTE_PTYPE_L4_TCP, 760 RTE_PTYPE_L4_UDP, 761 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 762 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 763 RTE_PTYPE_INNER_L4_NONFRAG, 764 RTE_PTYPE_INNER_L4_FRAG, 765 RTE_PTYPE_INNER_L4_TCP, 766 RTE_PTYPE_INNER_L4_UDP, 767 RTE_PTYPE_UNKNOWN 768 }; 769 770 if (dev->rx_pkt_burst == mlx5_rx_burst || 771 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 772 dev->rx_pkt_burst == mlx5_rx_burst_vec) 773 return ptypes; 774 return NULL; 775 } 776 777 /** 778 * Retrieve the master device for representor in the same switch domain. 779 * 780 * @param dev 781 * Pointer to representor Ethernet device structure. 782 * 783 * @return 784 * Master device structure on success, NULL otherwise. 785 */ 786 787 static struct rte_eth_dev * 788 mlx5_find_master_dev(struct rte_eth_dev *dev) 789 { 790 struct mlx5_priv *priv; 791 uint16_t port_id; 792 uint16_t domain_id; 793 794 priv = dev->data->dev_private; 795 domain_id = priv->domain_id; 796 assert(priv->representor); 797 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 798 struct mlx5_priv *opriv = 799 rte_eth_devices[port_id].data->dev_private; 800 if (opriv && 801 opriv->master && 802 opriv->domain_id == domain_id && 803 opriv->sh == priv->sh) 804 return &rte_eth_devices[port_id]; 805 } 806 return NULL; 807 } 808 809 /** 810 * DPDK callback to retrieve physical link information. 811 * 812 * @param dev 813 * Pointer to Ethernet device structure. 814 * @param[out] link 815 * Storage for current link status. 816 * 817 * @return 818 * 0 on success, a negative errno value otherwise and rte_errno is set. 819 */ 820 static int 821 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 822 struct rte_eth_link *link) 823 { 824 struct mlx5_priv *priv = dev->data->dev_private; 825 struct ethtool_cmd edata = { 826 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 827 }; 828 struct ifreq ifr; 829 struct rte_eth_link dev_link; 830 int link_speed = 0; 831 int ret; 832 833 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 834 if (ret) { 835 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 836 dev->data->port_id, strerror(rte_errno)); 837 return ret; 838 } 839 dev_link = (struct rte_eth_link) { 840 .link_status = ((ifr.ifr_flags & IFF_UP) && 841 (ifr.ifr_flags & IFF_RUNNING)), 842 }; 843 ifr = (struct ifreq) { 844 .ifr_data = (void *)&edata, 845 }; 846 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 847 if (ret) { 848 if (ret == -ENOTSUP && priv->representor) { 849 struct rte_eth_dev *master; 850 851 /* 852 * For representors we can try to inherit link 853 * settings from the master device. Actually 854 * link settings do not make a lot of sense 855 * for representors due to missing physical 856 * link. The old kernel drivers supported 857 * emulated settings query for representors, 858 * the new ones do not, so we have to add 859 * this code for compatibility issues. 860 */ 861 master = mlx5_find_master_dev(dev); 862 if (master) { 863 ifr = (struct ifreq) { 864 .ifr_data = (void *)&edata, 865 }; 866 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 867 } 868 } 869 if (ret) { 870 DRV_LOG(WARNING, 871 "port %u ioctl(SIOCETHTOOL," 872 " ETHTOOL_GSET) failed: %s", 873 dev->data->port_id, strerror(rte_errno)); 874 return ret; 875 } 876 } 877 link_speed = ethtool_cmd_speed(&edata); 878 if (link_speed == -1) 879 dev_link.link_speed = ETH_SPEED_NUM_NONE; 880 else 881 dev_link.link_speed = link_speed; 882 priv->link_speed_capa = 0; 883 if (edata.supported & SUPPORTED_Autoneg) 884 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 885 if (edata.supported & (SUPPORTED_1000baseT_Full | 886 SUPPORTED_1000baseKX_Full)) 887 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 888 if (edata.supported & SUPPORTED_10000baseKR_Full) 889 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 890 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 891 SUPPORTED_40000baseCR4_Full | 892 SUPPORTED_40000baseSR4_Full | 893 SUPPORTED_40000baseLR4_Full)) 894 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 895 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 896 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 897 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 898 ETH_LINK_SPEED_FIXED); 899 if (((dev_link.link_speed && !dev_link.link_status) || 900 (!dev_link.link_speed && dev_link.link_status))) { 901 rte_errno = EAGAIN; 902 return -rte_errno; 903 } 904 *link = dev_link; 905 return 0; 906 } 907 908 /** 909 * Retrieve physical link information (unlocked version using new ioctl). 910 * 911 * @param dev 912 * Pointer to Ethernet device structure. 913 * @param[out] link 914 * Storage for current link status. 915 * 916 * @return 917 * 0 on success, a negative errno value otherwise and rte_errno is set. 918 */ 919 static int 920 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 921 struct rte_eth_link *link) 922 923 { 924 struct mlx5_priv *priv = dev->data->dev_private; 925 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 926 struct ifreq ifr; 927 struct rte_eth_link dev_link; 928 struct rte_eth_dev *master = NULL; 929 uint64_t sc; 930 int ret; 931 932 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 933 if (ret) { 934 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 935 dev->data->port_id, strerror(rte_errno)); 936 return ret; 937 } 938 dev_link = (struct rte_eth_link) { 939 .link_status = ((ifr.ifr_flags & IFF_UP) && 940 (ifr.ifr_flags & IFF_RUNNING)), 941 }; 942 ifr = (struct ifreq) { 943 .ifr_data = (void *)&gcmd, 944 }; 945 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 946 if (ret) { 947 if (ret == -ENOTSUP && priv->representor) { 948 /* 949 * For representors we can try to inherit link 950 * settings from the master device. Actually 951 * link settings do not make a lot of sense 952 * for representors due to missing physical 953 * link. The old kernel drivers supported 954 * emulated settings query for representors, 955 * the new ones do not, so we have to add 956 * this code for compatibility issues. 957 */ 958 master = mlx5_find_master_dev(dev); 959 if (master) { 960 ifr = (struct ifreq) { 961 .ifr_data = (void *)&gcmd, 962 }; 963 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 964 } 965 } 966 if (ret) { 967 DRV_LOG(DEBUG, 968 "port %u ioctl(SIOCETHTOOL," 969 " ETHTOOL_GLINKSETTINGS) failed: %s", 970 dev->data->port_id, strerror(rte_errno)); 971 return ret; 972 } 973 974 } 975 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 976 977 alignas(struct ethtool_link_settings) 978 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 979 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 980 struct ethtool_link_settings *ecmd = (void *)data; 981 982 *ecmd = gcmd; 983 ifr.ifr_data = (void *)ecmd; 984 ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr); 985 if (ret) { 986 DRV_LOG(DEBUG, 987 "port %u ioctl(SIOCETHTOOL," 988 "ETHTOOL_GLINKSETTINGS) failed: %s", 989 dev->data->port_id, strerror(rte_errno)); 990 return ret; 991 } 992 dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE : 993 ecmd->speed; 994 sc = ecmd->link_mode_masks[0] | 995 ((uint64_t)ecmd->link_mode_masks[1] << 32); 996 priv->link_speed_capa = 0; 997 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 998 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 999 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 1000 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 1001 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 1002 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 1003 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 1004 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 1005 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 1006 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 1007 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 1008 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 1009 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 1010 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 1011 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 1012 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 1013 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 1014 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 1015 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 1016 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 1017 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 1018 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 1019 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 1020 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 1021 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 1022 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 1023 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 1024 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 1025 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 1026 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 1027 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 1028 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 1029 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 1030 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 1031 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 1032 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 1033 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 1034 ETH_LINK_SPEED_FIXED); 1035 if (((dev_link.link_speed && !dev_link.link_status) || 1036 (!dev_link.link_speed && dev_link.link_status))) { 1037 rte_errno = EAGAIN; 1038 return -rte_errno; 1039 } 1040 *link = dev_link; 1041 return 0; 1042 } 1043 1044 /** 1045 * DPDK callback to retrieve physical link information. 1046 * 1047 * @param dev 1048 * Pointer to Ethernet device structure. 1049 * @param wait_to_complete 1050 * Wait for request completion. 1051 * 1052 * @return 1053 * 0 if link status was not updated, positive if it was, a negative errno 1054 * value otherwise and rte_errno is set. 1055 */ 1056 int 1057 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 1058 { 1059 int ret; 1060 struct rte_eth_link dev_link; 1061 time_t start_time = time(NULL); 1062 int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT; 1063 1064 do { 1065 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 1066 if (ret == -ENOTSUP) 1067 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 1068 if (ret == 0) 1069 break; 1070 /* Handle wait to complete situation. */ 1071 if ((wait_to_complete || retry) && ret == -EAGAIN) { 1072 if (abs((int)difftime(time(NULL), start_time)) < 1073 MLX5_LINK_STATUS_TIMEOUT) { 1074 usleep(0); 1075 continue; 1076 } else { 1077 rte_errno = EBUSY; 1078 return -rte_errno; 1079 } 1080 } else if (ret < 0) { 1081 return ret; 1082 } 1083 } while (wait_to_complete || retry-- > 0); 1084 ret = !!memcmp(&dev->data->dev_link, &dev_link, 1085 sizeof(struct rte_eth_link)); 1086 dev->data->dev_link = dev_link; 1087 return ret; 1088 } 1089 1090 /** 1091 * DPDK callback to change the MTU. 1092 * 1093 * @param dev 1094 * Pointer to Ethernet device structure. 1095 * @param in_mtu 1096 * New MTU. 1097 * 1098 * @return 1099 * 0 on success, a negative errno value otherwise and rte_errno is set. 1100 */ 1101 int 1102 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 1103 { 1104 struct mlx5_priv *priv = dev->data->dev_private; 1105 uint16_t kern_mtu = 0; 1106 int ret; 1107 1108 ret = mlx5_get_mtu(dev, &kern_mtu); 1109 if (ret) 1110 return ret; 1111 /* Set kernel interface MTU first. */ 1112 ret = mlx5_set_mtu(dev, mtu); 1113 if (ret) 1114 return ret; 1115 ret = mlx5_get_mtu(dev, &kern_mtu); 1116 if (ret) 1117 return ret; 1118 if (kern_mtu == mtu) { 1119 priv->mtu = mtu; 1120 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 1121 dev->data->port_id, mtu); 1122 return 0; 1123 } 1124 rte_errno = EAGAIN; 1125 return -rte_errno; 1126 } 1127 1128 /** 1129 * DPDK callback to get flow control status. 1130 * 1131 * @param dev 1132 * Pointer to Ethernet device structure. 1133 * @param[out] fc_conf 1134 * Flow control output buffer. 1135 * 1136 * @return 1137 * 0 on success, a negative errno value otherwise and rte_errno is set. 1138 */ 1139 int 1140 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1141 { 1142 struct ifreq ifr; 1143 struct ethtool_pauseparam ethpause = { 1144 .cmd = ETHTOOL_GPAUSEPARAM 1145 }; 1146 int ret; 1147 1148 ifr.ifr_data = (void *)ðpause; 1149 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1150 if (ret) { 1151 DRV_LOG(WARNING, 1152 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 1153 " %s", 1154 dev->data->port_id, strerror(rte_errno)); 1155 return ret; 1156 } 1157 fc_conf->autoneg = ethpause.autoneg; 1158 if (ethpause.rx_pause && ethpause.tx_pause) 1159 fc_conf->mode = RTE_FC_FULL; 1160 else if (ethpause.rx_pause) 1161 fc_conf->mode = RTE_FC_RX_PAUSE; 1162 else if (ethpause.tx_pause) 1163 fc_conf->mode = RTE_FC_TX_PAUSE; 1164 else 1165 fc_conf->mode = RTE_FC_NONE; 1166 return 0; 1167 } 1168 1169 /** 1170 * DPDK callback to modify flow control parameters. 1171 * 1172 * @param dev 1173 * Pointer to Ethernet device structure. 1174 * @param[in] fc_conf 1175 * Flow control parameters. 1176 * 1177 * @return 1178 * 0 on success, a negative errno value otherwise and rte_errno is set. 1179 */ 1180 int 1181 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1182 { 1183 struct ifreq ifr; 1184 struct ethtool_pauseparam ethpause = { 1185 .cmd = ETHTOOL_SPAUSEPARAM 1186 }; 1187 int ret; 1188 1189 ifr.ifr_data = (void *)ðpause; 1190 ethpause.autoneg = fc_conf->autoneg; 1191 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1192 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1193 ethpause.rx_pause = 1; 1194 else 1195 ethpause.rx_pause = 0; 1196 1197 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1198 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1199 ethpause.tx_pause = 1; 1200 else 1201 ethpause.tx_pause = 0; 1202 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1203 if (ret) { 1204 DRV_LOG(WARNING, 1205 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1206 " failed: %s", 1207 dev->data->port_id, strerror(rte_errno)); 1208 return ret; 1209 } 1210 return 0; 1211 } 1212 1213 /** 1214 * Get PCI information by sysfs device path. 1215 * 1216 * @param dev_path 1217 * Pointer to device sysfs folder name. 1218 * @param[out] pci_addr 1219 * PCI bus address output buffer. 1220 * 1221 * @return 1222 * 0 on success, a negative errno value otherwise and rte_errno is set. 1223 */ 1224 int 1225 mlx5_dev_to_pci_addr(const char *dev_path, 1226 struct rte_pci_addr *pci_addr) 1227 { 1228 FILE *file; 1229 char line[32]; 1230 MKSTR(path, "%s/device/uevent", dev_path); 1231 1232 file = fopen(path, "rb"); 1233 if (file == NULL) { 1234 rte_errno = errno; 1235 return -rte_errno; 1236 } 1237 while (fgets(line, sizeof(line), file) == line) { 1238 size_t len = strlen(line); 1239 int ret; 1240 1241 /* Truncate long lines. */ 1242 if (len == (sizeof(line) - 1)) 1243 while (line[(len - 1)] != '\n') { 1244 ret = fgetc(file); 1245 if (ret == EOF) 1246 break; 1247 line[(len - 1)] = ret; 1248 } 1249 /* Extract information. */ 1250 if (sscanf(line, 1251 "PCI_SLOT_NAME=" 1252 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1253 &pci_addr->domain, 1254 &pci_addr->bus, 1255 &pci_addr->devid, 1256 &pci_addr->function) == 4) { 1257 ret = 0; 1258 break; 1259 } 1260 } 1261 fclose(file); 1262 return 0; 1263 } 1264 1265 /** 1266 * Handle asynchronous removal event for entire multiport device. 1267 * 1268 * @param sh 1269 * Infiniband device shared context. 1270 */ 1271 static void 1272 mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) 1273 { 1274 uint32_t i; 1275 1276 for (i = 0; i < sh->max_port; ++i) { 1277 struct rte_eth_dev *dev; 1278 1279 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { 1280 /* 1281 * Or not existing port either no 1282 * handler installed for this port. 1283 */ 1284 continue; 1285 } 1286 dev = &rte_eth_devices[sh->port[i].ih_port_id]; 1287 assert(dev); 1288 if (dev->data->dev_conf.intr_conf.rmv) 1289 _rte_eth_dev_callback_process 1290 (dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1291 } 1292 } 1293 1294 /** 1295 * Handle shared asynchronous events the NIC (removal event 1296 * and link status change). Supports multiport IB device. 1297 * 1298 * @param cb_arg 1299 * Callback argument. 1300 */ 1301 void 1302 mlx5_dev_interrupt_handler(void *cb_arg) 1303 { 1304 struct mlx5_ibv_shared *sh = cb_arg; 1305 struct ibv_async_event event; 1306 1307 /* Read all message from the IB device and acknowledge them. */ 1308 for (;;) { 1309 struct rte_eth_dev *dev; 1310 uint32_t tmp; 1311 1312 if (mlx5_glue->get_async_event(sh->ctx, &event)) 1313 break; 1314 /* Retrieve and check IB port index. */ 1315 tmp = (uint32_t)event.element.port_num; 1316 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) { 1317 /* 1318 * The DEVICE_FATAL event is called once for 1319 * entire device without port specifying. 1320 * We should notify all existing ports. 1321 */ 1322 mlx5_glue->ack_async_event(&event); 1323 mlx5_dev_interrupt_device_fatal(sh); 1324 continue; 1325 } 1326 assert(tmp && (tmp <= sh->max_port)); 1327 if (!tmp) { 1328 /* Unsupported devive level event. */ 1329 mlx5_glue->ack_async_event(&event); 1330 DRV_LOG(DEBUG, 1331 "unsupported common event (type %d)", 1332 event.event_type); 1333 continue; 1334 } 1335 if (tmp > sh->max_port) { 1336 /* Invalid IB port index. */ 1337 mlx5_glue->ack_async_event(&event); 1338 DRV_LOG(DEBUG, 1339 "cannot handle an event (type %d)" 1340 "due to invalid IB port index (%u)", 1341 event.event_type, tmp); 1342 continue; 1343 } 1344 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { 1345 /* No handler installed. */ 1346 mlx5_glue->ack_async_event(&event); 1347 DRV_LOG(DEBUG, 1348 "cannot handle an event (type %d)" 1349 "due to no handler installed for port %u", 1350 event.event_type, tmp); 1351 continue; 1352 } 1353 /* Retrieve ethernet device descriptor. */ 1354 tmp = sh->port[tmp - 1].ih_port_id; 1355 dev = &rte_eth_devices[tmp]; 1356 assert(dev); 1357 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1358 event.event_type == IBV_EVENT_PORT_ERR) && 1359 dev->data->dev_conf.intr_conf.lsc) { 1360 mlx5_glue->ack_async_event(&event); 1361 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1362 usleep(0); 1363 continue; 1364 } 1365 _rte_eth_dev_callback_process 1366 (dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1367 continue; 1368 } 1369 DRV_LOG(DEBUG, 1370 "port %u cannot handle an unknown event (type %d)", 1371 dev->data->port_id, event.event_type); 1372 mlx5_glue->ack_async_event(&event); 1373 } 1374 } 1375 1376 /* 1377 * Unregister callback handler safely. The handler may be active 1378 * while we are trying to unregister it, in this case code -EAGAIN 1379 * is returned by rte_intr_callback_unregister(). This routine checks 1380 * the return code and tries to unregister handler again. 1381 * 1382 * @param handle 1383 * interrupt handle 1384 * @param cb_fn 1385 * pointer to callback routine 1386 * @cb_arg 1387 * opaque callback parameter 1388 */ 1389 void 1390 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle, 1391 rte_intr_callback_fn cb_fn, void *cb_arg) 1392 { 1393 /* 1394 * Try to reduce timeout management overhead by not calling 1395 * the timer related routines on the first iteration. If the 1396 * unregistering succeeds on first call there will be no 1397 * timer calls at all. 1398 */ 1399 uint64_t twait = 0; 1400 uint64_t start = 0; 1401 1402 do { 1403 int ret; 1404 1405 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg); 1406 if (ret >= 0) 1407 return; 1408 if (ret != -EAGAIN) { 1409 DRV_LOG(INFO, "failed to unregister interrupt" 1410 " handler (error: %d)", ret); 1411 assert(false); 1412 return; 1413 } 1414 if (twait) { 1415 struct timespec onems; 1416 1417 /* Wait one millisecond and try again. */ 1418 onems.tv_sec = 0; 1419 onems.tv_nsec = NS_PER_S / MS_PER_S; 1420 nanosleep(&onems, 0); 1421 /* Check whether one second elapsed. */ 1422 if ((rte_get_timer_cycles() - start) <= twait) 1423 continue; 1424 } else { 1425 /* 1426 * We get the amount of timer ticks for one second. 1427 * If this amount elapsed it means we spent one 1428 * second in waiting. This branch is executed once 1429 * on first iteration. 1430 */ 1431 twait = rte_get_timer_hz(); 1432 assert(twait); 1433 } 1434 /* 1435 * Timeout elapsed, show message (once a second) and retry. 1436 * We have no other acceptable option here, if we ignore 1437 * the unregistering return code the handler will not 1438 * be unregistered, fd will be closed and we may get the 1439 * crush. Hanging and messaging in the loop seems not to be 1440 * the worst choice. 1441 */ 1442 DRV_LOG(INFO, "Retrying to unregister interrupt handler"); 1443 start = rte_get_timer_cycles(); 1444 } while (true); 1445 } 1446 1447 /** 1448 * Handle DEVX interrupts from the NIC. 1449 * This function is probably called from the DPDK host thread. 1450 * 1451 * @param cb_arg 1452 * Callback argument. 1453 */ 1454 void 1455 mlx5_dev_interrupt_handler_devx(void *cb_arg) 1456 { 1457 #ifndef HAVE_IBV_DEVX_ASYNC 1458 (void)cb_arg; 1459 return; 1460 #else 1461 struct mlx5_ibv_shared *sh = cb_arg; 1462 union { 1463 struct mlx5dv_devx_async_cmd_hdr cmd_resp; 1464 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) + 1465 MLX5_ST_SZ_BYTES(traffic_counter) + 1466 sizeof(struct mlx5dv_devx_async_cmd_hdr)]; 1467 } out; 1468 uint8_t *buf = out.buf + sizeof(out.cmd_resp); 1469 1470 while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp, 1471 &out.cmd_resp, 1472 sizeof(out.buf))) 1473 mlx5_flow_async_pool_query_handle 1474 (sh, (uint64_t)out.cmd_resp.wr_id, 1475 mlx5_devx_get_out_command_status(buf)); 1476 #endif /* HAVE_IBV_DEVX_ASYNC */ 1477 } 1478 1479 /** 1480 * Uninstall shared asynchronous device events handler. 1481 * This function is implemented to support event sharing 1482 * between multiple ports of single IB device. 1483 * 1484 * @param dev 1485 * Pointer to Ethernet device. 1486 */ 1487 static void 1488 mlx5_dev_shared_handler_uninstall(struct rte_eth_dev *dev) 1489 { 1490 struct mlx5_priv *priv = dev->data->dev_private; 1491 struct mlx5_ibv_shared *sh = priv->sh; 1492 1493 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1494 return; 1495 pthread_mutex_lock(&sh->intr_mutex); 1496 assert(priv->ibv_port); 1497 assert(priv->ibv_port <= sh->max_port); 1498 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1499 if (sh->port[priv->ibv_port - 1].ih_port_id >= RTE_MAX_ETHPORTS) 1500 goto exit; 1501 assert(sh->port[priv->ibv_port - 1].ih_port_id == 1502 (uint32_t)dev->data->port_id); 1503 assert(sh->intr_cnt); 1504 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1505 if (!sh->intr_cnt || --sh->intr_cnt) 1506 goto exit; 1507 mlx5_intr_callback_unregister(&sh->intr_handle, 1508 mlx5_dev_interrupt_handler, sh); 1509 sh->intr_handle.fd = 0; 1510 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1511 exit: 1512 pthread_mutex_unlock(&sh->intr_mutex); 1513 } 1514 1515 /** 1516 * Uninstall devx shared asynchronous device events handler. 1517 * This function is implemeted to support event sharing 1518 * between multiple ports of single IB device. 1519 * 1520 * @param dev 1521 * Pointer to Ethernet device. 1522 */ 1523 static void 1524 mlx5_dev_shared_handler_devx_uninstall(struct rte_eth_dev *dev) 1525 { 1526 struct mlx5_priv *priv = dev->data->dev_private; 1527 struct mlx5_ibv_shared *sh = priv->sh; 1528 1529 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1530 return; 1531 pthread_mutex_lock(&sh->intr_mutex); 1532 assert(priv->ibv_port); 1533 assert(priv->ibv_port <= sh->max_port); 1534 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1535 if (sh->port[priv->ibv_port - 1].devx_ih_port_id >= RTE_MAX_ETHPORTS) 1536 goto exit; 1537 assert(sh->port[priv->ibv_port - 1].devx_ih_port_id == 1538 (uint32_t)dev->data->port_id); 1539 sh->port[priv->ibv_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1540 if (!sh->devx_intr_cnt || --sh->devx_intr_cnt) 1541 goto exit; 1542 if (sh->intr_handle_devx.fd) { 1543 rte_intr_callback_unregister(&sh->intr_handle_devx, 1544 mlx5_dev_interrupt_handler_devx, 1545 sh); 1546 sh->intr_handle_devx.fd = 0; 1547 sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN; 1548 } 1549 if (sh->devx_comp) { 1550 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp); 1551 sh->devx_comp = NULL; 1552 } 1553 exit: 1554 pthread_mutex_unlock(&sh->intr_mutex); 1555 } 1556 1557 /** 1558 * Install shared asynchronous device events handler. 1559 * This function is implemented to support event sharing 1560 * between multiple ports of single IB device. 1561 * 1562 * @param dev 1563 * Pointer to Ethernet device. 1564 */ 1565 static void 1566 mlx5_dev_shared_handler_install(struct rte_eth_dev *dev) 1567 { 1568 struct mlx5_priv *priv = dev->data->dev_private; 1569 struct mlx5_ibv_shared *sh = priv->sh; 1570 int ret; 1571 int flags; 1572 1573 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1574 return; 1575 pthread_mutex_lock(&sh->intr_mutex); 1576 assert(priv->ibv_port); 1577 assert(priv->ibv_port <= sh->max_port); 1578 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1579 if (sh->port[priv->ibv_port - 1].ih_port_id < RTE_MAX_ETHPORTS) { 1580 /* The handler is already installed for this port. */ 1581 assert(sh->intr_cnt); 1582 goto exit; 1583 } 1584 if (sh->intr_cnt) { 1585 sh->port[priv->ibv_port - 1].ih_port_id = 1586 (uint32_t)dev->data->port_id; 1587 sh->intr_cnt++; 1588 goto exit; 1589 } 1590 /* No shared handler installed. */ 1591 assert(sh->ctx->async_fd > 0); 1592 flags = fcntl(sh->ctx->async_fd, F_GETFL); 1593 ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1594 if (ret) { 1595 DRV_LOG(INFO, "failed to change file descriptor async event" 1596 " queue"); 1597 /* Indicate there will be no interrupts. */ 1598 dev->data->dev_conf.intr_conf.lsc = 0; 1599 dev->data->dev_conf.intr_conf.rmv = 0; 1600 } else { 1601 sh->intr_handle.fd = sh->ctx->async_fd; 1602 sh->intr_handle.type = RTE_INTR_HANDLE_EXT; 1603 rte_intr_callback_register(&sh->intr_handle, 1604 mlx5_dev_interrupt_handler, sh); 1605 sh->intr_cnt++; 1606 sh->port[priv->ibv_port - 1].ih_port_id = 1607 (uint32_t)dev->data->port_id; 1608 } 1609 exit: 1610 pthread_mutex_unlock(&sh->intr_mutex); 1611 } 1612 1613 /** 1614 * Install devx shared asyncronous device events handler. 1615 * This function is implemeted to support event sharing 1616 * between multiple ports of single IB device. 1617 * 1618 * @param dev 1619 * Pointer to Ethernet device. 1620 */ 1621 static void 1622 mlx5_dev_shared_handler_devx_install(struct rte_eth_dev *dev) 1623 { 1624 struct mlx5_priv *priv = dev->data->dev_private; 1625 struct mlx5_ibv_shared *sh = priv->sh; 1626 1627 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1628 return; 1629 pthread_mutex_lock(&sh->intr_mutex); 1630 assert(priv->ibv_port); 1631 assert(priv->ibv_port <= sh->max_port); 1632 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1633 if (sh->port[priv->ibv_port - 1].devx_ih_port_id < RTE_MAX_ETHPORTS) { 1634 /* The handler is already installed for this port. */ 1635 assert(sh->devx_intr_cnt); 1636 goto exit; 1637 } 1638 if (sh->devx_intr_cnt) { 1639 sh->devx_intr_cnt++; 1640 sh->port[priv->ibv_port - 1].devx_ih_port_id = 1641 (uint32_t)dev->data->port_id; 1642 goto exit; 1643 } 1644 if (priv->config.devx) { 1645 #ifndef HAVE_IBV_DEVX_ASYNC 1646 goto exit; 1647 #else 1648 sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx); 1649 if (sh->devx_comp) { 1650 int flags = fcntl(sh->devx_comp->fd, F_GETFL); 1651 int ret = fcntl(sh->devx_comp->fd, F_SETFL, 1652 flags | O_NONBLOCK); 1653 1654 if (ret) { 1655 DRV_LOG(INFO, "failed to change file descriptor" 1656 " devx async event queue"); 1657 } else { 1658 sh->intr_handle_devx.fd = sh->devx_comp->fd; 1659 sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT; 1660 rte_intr_callback_register 1661 (&sh->intr_handle_devx, 1662 mlx5_dev_interrupt_handler_devx, sh); 1663 sh->devx_intr_cnt++; 1664 sh->port[priv->ibv_port - 1].devx_ih_port_id = 1665 (uint32_t)dev->data->port_id; 1666 } 1667 } 1668 #endif /* HAVE_IBV_DEVX_ASYNC */ 1669 } 1670 exit: 1671 pthread_mutex_unlock(&sh->intr_mutex); 1672 } 1673 1674 /** 1675 * Uninstall interrupt handler. 1676 * 1677 * @param dev 1678 * Pointer to Ethernet device. 1679 */ 1680 void 1681 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1682 { 1683 mlx5_dev_shared_handler_uninstall(dev); 1684 } 1685 1686 /** 1687 * Install interrupt handler. 1688 * 1689 * @param dev 1690 * Pointer to Ethernet device. 1691 */ 1692 void 1693 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1694 { 1695 mlx5_dev_shared_handler_install(dev); 1696 } 1697 1698 /** 1699 * Devx uninstall interrupt handler. 1700 * 1701 * @param dev 1702 * Pointer to Ethernet device. 1703 */ 1704 void 1705 mlx5_dev_interrupt_handler_devx_uninstall(struct rte_eth_dev *dev) 1706 { 1707 mlx5_dev_shared_handler_devx_uninstall(dev); 1708 } 1709 1710 /** 1711 * Devx install interrupt handler. 1712 * 1713 * @param dev 1714 * Pointer to Ethernet device. 1715 */ 1716 void 1717 mlx5_dev_interrupt_handler_devx_install(struct rte_eth_dev *dev) 1718 { 1719 mlx5_dev_shared_handler_devx_install(dev); 1720 } 1721 1722 /** 1723 * DPDK callback to bring the link DOWN. 1724 * 1725 * @param dev 1726 * Pointer to Ethernet device structure. 1727 * 1728 * @return 1729 * 0 on success, a negative errno value otherwise and rte_errno is set. 1730 */ 1731 int 1732 mlx5_set_link_down(struct rte_eth_dev *dev) 1733 { 1734 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1735 } 1736 1737 /** 1738 * DPDK callback to bring the link UP. 1739 * 1740 * @param dev 1741 * Pointer to Ethernet device structure. 1742 * 1743 * @return 1744 * 0 on success, a negative errno value otherwise and rte_errno is set. 1745 */ 1746 int 1747 mlx5_set_link_up(struct rte_eth_dev *dev) 1748 { 1749 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1750 } 1751 1752 /** 1753 * Configure the RX function to use. 1754 * 1755 * @param dev 1756 * Pointer to private data structure. 1757 * 1758 * @return 1759 * Pointer to selected Rx burst function. 1760 */ 1761 eth_rx_burst_t 1762 mlx5_select_rx_function(struct rte_eth_dev *dev) 1763 { 1764 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1765 1766 assert(dev != NULL); 1767 if (mlx5_check_vec_rx_support(dev) > 0) { 1768 rx_pkt_burst = mlx5_rx_burst_vec; 1769 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1770 dev->data->port_id); 1771 } else if (mlx5_mprq_enabled(dev)) { 1772 rx_pkt_burst = mlx5_rx_burst_mprq; 1773 } 1774 return rx_pkt_burst; 1775 } 1776 1777 /** 1778 * Check if mlx5 device was removed. 1779 * 1780 * @param dev 1781 * Pointer to Ethernet device structure. 1782 * 1783 * @return 1784 * 1 when device is removed, otherwise 0. 1785 */ 1786 int 1787 mlx5_is_removed(struct rte_eth_dev *dev) 1788 { 1789 struct ibv_device_attr device_attr; 1790 struct mlx5_priv *priv = dev->data->dev_private; 1791 1792 if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO) 1793 return 1; 1794 return 0; 1795 } 1796 1797 /** 1798 * Get the E-Switch parameters by port id. 1799 * 1800 * @param[in] port 1801 * Device port id. 1802 * @param[in] valid 1803 * Device port id is valid, skip check. This flag is useful 1804 * when trials are performed from probing and device is not 1805 * flagged as valid yet (in attaching process). 1806 * @param[out] es_domain_id 1807 * E-Switch domain id. 1808 * @param[out] es_port_id 1809 * The port id of the port in the E-Switch. 1810 * 1811 * @return 1812 * pointer to device private data structure containing data needed 1813 * on success, NULL otherwise and rte_errno is set. 1814 */ 1815 struct mlx5_priv * 1816 mlx5_port_to_eswitch_info(uint16_t port, bool valid) 1817 { 1818 struct rte_eth_dev *dev; 1819 struct mlx5_priv *priv; 1820 1821 if (port >= RTE_MAX_ETHPORTS) { 1822 rte_errno = EINVAL; 1823 return NULL; 1824 } 1825 if (!valid && !rte_eth_dev_is_valid_port(port)) { 1826 rte_errno = ENODEV; 1827 return NULL; 1828 } 1829 dev = &rte_eth_devices[port]; 1830 priv = dev->data->dev_private; 1831 if (!(priv->representor || priv->master)) { 1832 rte_errno = EINVAL; 1833 return NULL; 1834 } 1835 return priv; 1836 } 1837 1838 /** 1839 * Get the E-Switch parameters by device instance. 1840 * 1841 * @param[in] port 1842 * Device port id. 1843 * @param[out] es_domain_id 1844 * E-Switch domain id. 1845 * @param[out] es_port_id 1846 * The port id of the port in the E-Switch. 1847 * 1848 * @return 1849 * pointer to device private data structure containing data needed 1850 * on success, NULL otherwise and rte_errno is set. 1851 */ 1852 struct mlx5_priv * 1853 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 1854 { 1855 struct mlx5_priv *priv; 1856 1857 priv = dev->data->dev_private; 1858 if (!(priv->representor || priv->master)) { 1859 rte_errno = EINVAL; 1860 return NULL; 1861 } 1862 return priv; 1863 } 1864 1865 /** 1866 * Get switch information associated with network interface. 1867 * 1868 * @param ifindex 1869 * Network interface index. 1870 * @param[out] info 1871 * Switch information object, populated in case of success. 1872 * 1873 * @return 1874 * 0 on success, a negative errno value otherwise and rte_errno is set. 1875 */ 1876 int 1877 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1878 { 1879 char ifname[IF_NAMESIZE]; 1880 char port_name[IF_NAMESIZE]; 1881 FILE *file; 1882 struct mlx5_switch_info data = { 1883 .master = 0, 1884 .representor = 0, 1885 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1886 .port_name = 0, 1887 .switch_id = 0, 1888 }; 1889 DIR *dir; 1890 bool port_switch_id_set = false; 1891 bool device_dir = false; 1892 char c; 1893 int ret; 1894 1895 if (!if_indextoname(ifindex, ifname)) { 1896 rte_errno = errno; 1897 return -rte_errno; 1898 } 1899 1900 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1901 ifname); 1902 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1903 ifname); 1904 MKSTR(pci_device, "/sys/class/net/%s/device", 1905 ifname); 1906 1907 file = fopen(phys_port_name, "rb"); 1908 if (file != NULL) { 1909 ret = fscanf(file, "%s", port_name); 1910 fclose(file); 1911 if (ret == 1) 1912 mlx5_translate_port_name(port_name, &data); 1913 } 1914 file = fopen(phys_switch_id, "rb"); 1915 if (file == NULL) { 1916 rte_errno = errno; 1917 return -rte_errno; 1918 } 1919 port_switch_id_set = 1920 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1921 c == '\n'; 1922 fclose(file); 1923 dir = opendir(pci_device); 1924 if (dir != NULL) { 1925 closedir(dir); 1926 device_dir = true; 1927 } 1928 if (port_switch_id_set) { 1929 /* We have some E-Switch configuration. */ 1930 mlx5_sysfs_check_switch_info(device_dir, &data); 1931 } 1932 *info = data; 1933 assert(!(data.master && data.representor)); 1934 if (data.master && data.representor) { 1935 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1936 " and as representor", ifindex); 1937 rte_errno = ENODEV; 1938 return -rte_errno; 1939 } 1940 return 0; 1941 } 1942 1943 /** 1944 * Analyze gathered port parameters via Netlink to recognize master 1945 * and representor devices for E-Switch configuration. 1946 * 1947 * @param[in] num_vf_set 1948 * flag of presence of number of VFs port attribute. 1949 * @param[inout] switch_info 1950 * Port information, including port name as a number and port name 1951 * type if recognized 1952 * 1953 * @return 1954 * master and representor flags are set in switch_info according to 1955 * recognized parameters (if any). 1956 */ 1957 void 1958 mlx5_nl_check_switch_info(bool num_vf_set, 1959 struct mlx5_switch_info *switch_info) 1960 { 1961 switch (switch_info->name_type) { 1962 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1963 /* 1964 * Name is not recognized, assume the master, 1965 * check the number of VFs key presence. 1966 */ 1967 switch_info->master = num_vf_set; 1968 break; 1969 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1970 /* 1971 * Name is not set, this assumes the legacy naming 1972 * schema for master, just check if there is a 1973 * number of VFs key. 1974 */ 1975 switch_info->master = num_vf_set; 1976 break; 1977 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1978 /* New uplink naming schema recognized. */ 1979 switch_info->master = 1; 1980 break; 1981 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1982 /* Legacy representors naming schema. */ 1983 switch_info->representor = !num_vf_set; 1984 break; 1985 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1986 /* New representors naming schema. */ 1987 switch_info->representor = 1; 1988 break; 1989 } 1990 } 1991 1992 /** 1993 * Analyze gathered port parameters via sysfs to recognize master 1994 * and representor devices for E-Switch configuration. 1995 * 1996 * @param[in] device_dir 1997 * flag of presence of "device" directory under port device key. 1998 * @param[inout] switch_info 1999 * Port information, including port name as a number and port name 2000 * type if recognized 2001 * 2002 * @return 2003 * master and representor flags are set in switch_info according to 2004 * recognized parameters (if any). 2005 */ 2006 void 2007 mlx5_sysfs_check_switch_info(bool device_dir, 2008 struct mlx5_switch_info *switch_info) 2009 { 2010 switch (switch_info->name_type) { 2011 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 2012 /* 2013 * Name is not recognized, assume the master, 2014 * check the device directory presence. 2015 */ 2016 switch_info->master = device_dir; 2017 break; 2018 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 2019 /* 2020 * Name is not set, this assumes the legacy naming 2021 * schema for master, just check if there is 2022 * a device directory. 2023 */ 2024 switch_info->master = device_dir; 2025 break; 2026 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 2027 /* New uplink naming schema recognized. */ 2028 switch_info->master = 1; 2029 break; 2030 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 2031 /* Legacy representors naming schema. */ 2032 switch_info->representor = !device_dir; 2033 break; 2034 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 2035 /* New representors naming schema. */ 2036 switch_info->representor = 1; 2037 break; 2038 } 2039 } 2040 2041 /** 2042 * Extract port name, as a number, from sysfs or netlink information. 2043 * 2044 * @param[in] port_name_in 2045 * String representing the port name. 2046 * @param[out] port_info_out 2047 * Port information, including port name as a number and port name 2048 * type if recognized 2049 * 2050 * @return 2051 * port_name field set according to recognized name format. 2052 */ 2053 void 2054 mlx5_translate_port_name(const char *port_name_in, 2055 struct mlx5_switch_info *port_info_out) 2056 { 2057 char pf_c1, pf_c2, vf_c1, vf_c2; 2058 char *end; 2059 int sc_items; 2060 2061 /* 2062 * Check for port-name as a string of the form pf0vf0 2063 * (support kernel ver >= 5.0 or OFED ver >= 4.6). 2064 */ 2065 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", 2066 &pf_c1, &pf_c2, &port_info_out->pf_num, 2067 &vf_c1, &vf_c2, &port_info_out->port_name); 2068 if (sc_items == 6 && 2069 pf_c1 == 'p' && pf_c2 == 'f' && 2070 vf_c1 == 'v' && vf_c2 == 'f') { 2071 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; 2072 return; 2073 } 2074 /* 2075 * Check for port-name as a string of the form p0 2076 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 2077 */ 2078 sc_items = sscanf(port_name_in, "%c%d", 2079 &pf_c1, &port_info_out->port_name); 2080 if (sc_items == 2 && pf_c1 == 'p') { 2081 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 2082 return; 2083 } 2084 /* Check for port-name as a number (support kernel ver < 5.0 */ 2085 errno = 0; 2086 port_info_out->port_name = strtol(port_name_in, &end, 0); 2087 if (!errno && 2088 (size_t)(end - port_name_in) == strlen(port_name_in)) { 2089 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 2090 return; 2091 } 2092 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 2093 return; 2094 } 2095 2096 /** 2097 * DPDK callback to retrieve plug-in module EEPROM information (type and size). 2098 * 2099 * @param dev 2100 * Pointer to Ethernet device structure. 2101 * @param[out] modinfo 2102 * Storage for plug-in module EEPROM information. 2103 * 2104 * @return 2105 * 0 on success, a negative errno value otherwise and rte_errno is set. 2106 */ 2107 int 2108 mlx5_get_module_info(struct rte_eth_dev *dev, 2109 struct rte_eth_dev_module_info *modinfo) 2110 { 2111 struct ethtool_modinfo info = { 2112 .cmd = ETHTOOL_GMODULEINFO, 2113 }; 2114 struct ifreq ifr = (struct ifreq) { 2115 .ifr_data = (void *)&info, 2116 }; 2117 int ret = 0; 2118 2119 if (!dev || !modinfo) { 2120 DRV_LOG(WARNING, "missing argument, cannot get module info"); 2121 rte_errno = EINVAL; 2122 return -rte_errno; 2123 } 2124 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2125 if (ret) { 2126 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 2127 dev->data->port_id, strerror(rte_errno)); 2128 return ret; 2129 } 2130 modinfo->type = info.type; 2131 modinfo->eeprom_len = info.eeprom_len; 2132 return ret; 2133 } 2134 2135 /** 2136 * DPDK callback to retrieve plug-in module EEPROM data. 2137 * 2138 * @param dev 2139 * Pointer to Ethernet device structure. 2140 * @param[out] info 2141 * Storage for plug-in module EEPROM data. 2142 * 2143 * @return 2144 * 0 on success, a negative errno value otherwise and rte_errno is set. 2145 */ 2146 int mlx5_get_module_eeprom(struct rte_eth_dev *dev, 2147 struct rte_dev_eeprom_info *info) 2148 { 2149 struct ethtool_eeprom *eeprom; 2150 struct ifreq ifr; 2151 int ret = 0; 2152 2153 if (!dev || !info) { 2154 DRV_LOG(WARNING, "missing argument, cannot get module eeprom"); 2155 rte_errno = EINVAL; 2156 return -rte_errno; 2157 } 2158 eeprom = rte_calloc(__func__, 1, 2159 (sizeof(struct ethtool_eeprom) + info->length), 0); 2160 if (!eeprom) { 2161 DRV_LOG(WARNING, "port %u cannot allocate memory for " 2162 "eeprom data", dev->data->port_id); 2163 rte_errno = ENOMEM; 2164 return -rte_errno; 2165 } 2166 eeprom->cmd = ETHTOOL_GMODULEEEPROM; 2167 eeprom->offset = info->offset; 2168 eeprom->len = info->length; 2169 ifr = (struct ifreq) { 2170 .ifr_data = (void *)eeprom, 2171 }; 2172 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2173 if (ret) 2174 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 2175 dev->data->port_id, strerror(rte_errno)); 2176 else 2177 rte_memcpy(info->data, eeprom->data, info->length); 2178 rte_free(eeprom); 2179 return ret; 2180 } 2181 2182 /** 2183 * DPDK callback to retrieve hairpin capabilities. 2184 * 2185 * @param dev 2186 * Pointer to Ethernet device structure. 2187 * @param[out] cap 2188 * Storage for hairpin capability data. 2189 * 2190 * @return 2191 * 0 on success, a negative errno value otherwise and rte_errno is set. 2192 */ 2193 int mlx5_hairpin_cap_get(struct rte_eth_dev *dev, 2194 struct rte_eth_hairpin_cap *cap) 2195 { 2196 struct mlx5_priv *priv = dev->data->dev_private; 2197 2198 if (priv->sh->devx == 0) { 2199 rte_errno = ENOTSUP; 2200 return -rte_errno; 2201 } 2202 cap->max_nb_queues = UINT16_MAX; 2203 cap->max_rx_2_tx = 1; 2204 cap->max_tx_2_rx = 1; 2205 cap->max_nb_desc = 8192; 2206 return 0; 2207 } 2208