1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <assert.h> 8 #include <inttypes.h> 9 #include <unistd.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <stdlib.h> 15 #include <errno.h> 16 #include <dirent.h> 17 #include <net/if.h> 18 #include <sys/ioctl.h> 19 #include <sys/socket.h> 20 #include <netinet/in.h> 21 #include <linux/ethtool.h> 22 #include <linux/sockios.h> 23 #include <fcntl.h> 24 #include <stdalign.h> 25 #include <sys/un.h> 26 #include <time.h> 27 28 #include <rte_atomic.h> 29 #include <rte_ethdev_driver.h> 30 #include <rte_bus_pci.h> 31 #include <rte_mbuf.h> 32 #include <rte_common.h> 33 #include <rte_interrupts.h> 34 #include <rte_malloc.h> 35 #include <rte_string_fns.h> 36 #include <rte_rwlock.h> 37 #include <rte_cycles.h> 38 39 #include <mlx5_glue.h> 40 #include <mlx5_devx_cmds.h> 41 #include <mlx5_common.h> 42 43 #include "mlx5.h" 44 #include "mlx5_rxtx.h" 45 #include "mlx5_utils.h" 46 47 /* Supported speed values found in /usr/include/linux/ethtool.h */ 48 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 49 #define SUPPORTED_40000baseKR4_Full (1 << 23) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 52 #define SUPPORTED_40000baseCR4_Full (1 << 24) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 55 #define SUPPORTED_40000baseSR4_Full (1 << 25) 56 #endif 57 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 58 #define SUPPORTED_40000baseLR4_Full (1 << 26) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 61 #define SUPPORTED_56000baseKR4_Full (1 << 27) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 64 #define SUPPORTED_56000baseCR4_Full (1 << 28) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 67 #define SUPPORTED_56000baseSR4_Full (1 << 29) 68 #endif 69 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 70 #define SUPPORTED_56000baseLR4_Full (1 << 30) 71 #endif 72 73 /* Add defines in case the running kernel is not the same as user headers. */ 74 #ifndef ETHTOOL_GLINKSETTINGS 75 struct ethtool_link_settings { 76 uint32_t cmd; 77 uint32_t speed; 78 uint8_t duplex; 79 uint8_t port; 80 uint8_t phy_address; 81 uint8_t autoneg; 82 uint8_t mdio_support; 83 uint8_t eth_to_mdix; 84 uint8_t eth_tp_mdix_ctrl; 85 int8_t link_mode_masks_nwords; 86 uint32_t reserved[8]; 87 uint32_t link_mode_masks[]; 88 }; 89 90 #define ETHTOOL_GLINKSETTINGS 0x0000004c 91 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 92 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 93 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 94 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 95 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 96 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 97 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 98 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 99 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 100 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 101 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 102 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 103 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 104 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 105 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 106 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 107 #endif 108 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 109 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 110 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 111 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 112 #endif 113 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 114 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 115 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 116 #endif 117 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 118 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 119 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 120 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 121 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 122 #endif 123 124 /** 125 * Get master interface name from private structure. 126 * 127 * @param[in] dev 128 * Pointer to Ethernet device. 129 * @param[out] ifname 130 * Interface name output buffer. 131 * 132 * @return 133 * 0 on success, a negative errno value otherwise and rte_errno is set. 134 */ 135 int 136 mlx5_get_master_ifname(const char *ibdev_path, char (*ifname)[IF_NAMESIZE]) 137 { 138 DIR *dir; 139 struct dirent *dent; 140 unsigned int dev_type = 0; 141 unsigned int dev_port_prev = ~0u; 142 char match[IF_NAMESIZE] = ""; 143 144 assert(ibdev_path); 145 { 146 MKSTR(path, "%s/device/net", ibdev_path); 147 148 dir = opendir(path); 149 if (dir == NULL) { 150 rte_errno = errno; 151 return -rte_errno; 152 } 153 } 154 while ((dent = readdir(dir)) != NULL) { 155 char *name = dent->d_name; 156 FILE *file; 157 unsigned int dev_port; 158 int r; 159 160 if ((name[0] == '.') && 161 ((name[1] == '\0') || 162 ((name[1] == '.') && (name[2] == '\0')))) 163 continue; 164 165 MKSTR(path, "%s/device/net/%s/%s", 166 ibdev_path, name, 167 (dev_type ? "dev_id" : "dev_port")); 168 169 file = fopen(path, "rb"); 170 if (file == NULL) { 171 if (errno != ENOENT) 172 continue; 173 /* 174 * Switch to dev_id when dev_port does not exist as 175 * is the case with Linux kernel versions < 3.15. 176 */ 177 try_dev_id: 178 match[0] = '\0'; 179 if (dev_type) 180 break; 181 dev_type = 1; 182 dev_port_prev = ~0u; 183 rewinddir(dir); 184 continue; 185 } 186 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 187 fclose(file); 188 if (r != 1) 189 continue; 190 /* 191 * Switch to dev_id when dev_port returns the same value for 192 * all ports. May happen when using a MOFED release older than 193 * 3.0 with a Linux kernel >= 3.15. 194 */ 195 if (dev_port == dev_port_prev) 196 goto try_dev_id; 197 dev_port_prev = dev_port; 198 if (dev_port == 0) 199 strlcpy(match, name, sizeof(match)); 200 } 201 closedir(dir); 202 if (match[0] == '\0') { 203 rte_errno = ENOENT; 204 return -rte_errno; 205 } 206 strncpy(*ifname, match, sizeof(*ifname)); 207 return 0; 208 } 209 210 /** 211 * Get interface name from private structure. 212 * 213 * This is a port representor-aware version of mlx5_get_master_ifname(). 214 * 215 * @param[in] dev 216 * Pointer to Ethernet device. 217 * @param[out] ifname 218 * Interface name output buffer. 219 * 220 * @return 221 * 0 on success, a negative errno value otherwise and rte_errno is set. 222 */ 223 int 224 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 225 { 226 struct mlx5_priv *priv = dev->data->dev_private; 227 unsigned int ifindex; 228 229 assert(priv); 230 assert(priv->sh); 231 ifindex = mlx5_ifindex(dev); 232 if (!ifindex) { 233 if (!priv->representor) 234 return mlx5_get_master_ifname(priv->sh->ibdev_path, 235 ifname); 236 rte_errno = ENXIO; 237 return -rte_errno; 238 } 239 if (if_indextoname(ifindex, &(*ifname)[0])) 240 return 0; 241 rte_errno = errno; 242 return -rte_errno; 243 } 244 245 /** 246 * Get the interface index from device name. 247 * 248 * @param[in] dev 249 * Pointer to Ethernet device. 250 * 251 * @return 252 * Nonzero interface index on success, zero otherwise and rte_errno is set. 253 */ 254 unsigned int 255 mlx5_ifindex(const struct rte_eth_dev *dev) 256 { 257 struct mlx5_priv *priv = dev->data->dev_private; 258 unsigned int ifindex; 259 260 assert(priv); 261 assert(priv->if_index); 262 ifindex = priv->if_index; 263 if (!ifindex) 264 rte_errno = ENXIO; 265 return ifindex; 266 } 267 268 /** 269 * Perform ifreq ioctl() on associated Ethernet device. 270 * 271 * @param[in] dev 272 * Pointer to Ethernet device. 273 * @param req 274 * Request number to pass to ioctl(). 275 * @param[out] ifr 276 * Interface request structure output buffer. 277 * 278 * @return 279 * 0 on success, a negative errno value otherwise and rte_errno is set. 280 */ 281 int 282 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 283 { 284 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 285 int ret = 0; 286 287 if (sock == -1) { 288 rte_errno = errno; 289 return -rte_errno; 290 } 291 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 292 if (ret) 293 goto error; 294 ret = ioctl(sock, req, ifr); 295 if (ret == -1) { 296 rte_errno = errno; 297 goto error; 298 } 299 close(sock); 300 return 0; 301 error: 302 close(sock); 303 return -rte_errno; 304 } 305 306 /** 307 * Get device MTU. 308 * 309 * @param dev 310 * Pointer to Ethernet device. 311 * @param[out] mtu 312 * MTU value output buffer. 313 * 314 * @return 315 * 0 on success, a negative errno value otherwise and rte_errno is set. 316 */ 317 int 318 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 319 { 320 struct ifreq request; 321 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 322 323 if (ret) 324 return ret; 325 *mtu = request.ifr_mtu; 326 return 0; 327 } 328 329 /** 330 * Set device MTU. 331 * 332 * @param dev 333 * Pointer to Ethernet device. 334 * @param mtu 335 * MTU value to set. 336 * 337 * @return 338 * 0 on success, a negative errno value otherwise and rte_errno is set. 339 */ 340 static int 341 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 342 { 343 struct ifreq request = { .ifr_mtu = mtu, }; 344 345 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 346 } 347 348 /** 349 * Set device flags. 350 * 351 * @param dev 352 * Pointer to Ethernet device. 353 * @param keep 354 * Bitmask for flags that must remain untouched. 355 * @param flags 356 * Bitmask for flags to modify. 357 * 358 * @return 359 * 0 on success, a negative errno value otherwise and rte_errno is set. 360 */ 361 int 362 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 363 { 364 struct ifreq request; 365 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 366 367 if (ret) 368 return ret; 369 request.ifr_flags &= keep; 370 request.ifr_flags |= flags & ~keep; 371 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 372 } 373 374 /** 375 * DPDK callback for Ethernet device configuration. 376 * 377 * @param dev 378 * Pointer to Ethernet device structure. 379 * 380 * @return 381 * 0 on success, a negative errno value otherwise and rte_errno is set. 382 */ 383 int 384 mlx5_dev_configure(struct rte_eth_dev *dev) 385 { 386 struct mlx5_priv *priv = dev->data->dev_private; 387 unsigned int rxqs_n = dev->data->nb_rx_queues; 388 unsigned int txqs_n = dev->data->nb_tx_queues; 389 const uint8_t use_app_rss_key = 390 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 391 int ret = 0; 392 393 if (use_app_rss_key && 394 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 395 MLX5_RSS_HASH_KEY_LEN)) { 396 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 397 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 398 rte_errno = EINVAL; 399 return -rte_errno; 400 } 401 priv->rss_conf.rss_key = 402 rte_realloc(priv->rss_conf.rss_key, 403 MLX5_RSS_HASH_KEY_LEN, 0); 404 if (!priv->rss_conf.rss_key) { 405 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 406 dev->data->port_id, rxqs_n); 407 rte_errno = ENOMEM; 408 return -rte_errno; 409 } 410 411 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) 412 dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; 413 414 memcpy(priv->rss_conf.rss_key, 415 use_app_rss_key ? 416 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 417 rss_hash_default_key, 418 MLX5_RSS_HASH_KEY_LEN); 419 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 420 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 421 priv->rxqs = (void *)dev->data->rx_queues; 422 priv->txqs = (void *)dev->data->tx_queues; 423 if (txqs_n != priv->txqs_n) { 424 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 425 dev->data->port_id, priv->txqs_n, txqs_n); 426 priv->txqs_n = txqs_n; 427 } 428 if (rxqs_n > priv->config.ind_table_max_size) { 429 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 430 dev->data->port_id, rxqs_n); 431 rte_errno = EINVAL; 432 return -rte_errno; 433 } 434 if (rxqs_n != priv->rxqs_n) { 435 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 436 dev->data->port_id, priv->rxqs_n, rxqs_n); 437 priv->rxqs_n = rxqs_n; 438 } 439 priv->skip_default_rss_reta = 0; 440 ret = mlx5_proc_priv_init(dev); 441 if (ret) 442 return ret; 443 return 0; 444 } 445 446 /** 447 * Configure default RSS reta. 448 * 449 * @param dev 450 * Pointer to Ethernet device structure. 451 * 452 * @return 453 * 0 on success, a negative errno value otherwise and rte_errno is set. 454 */ 455 int 456 mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev) 457 { 458 struct mlx5_priv *priv = dev->data->dev_private; 459 unsigned int rxqs_n = dev->data->nb_rx_queues; 460 unsigned int i; 461 unsigned int j; 462 unsigned int reta_idx_n; 463 int ret = 0; 464 unsigned int *rss_queue_arr = NULL; 465 unsigned int rss_queue_n = 0; 466 467 if (priv->skip_default_rss_reta) 468 return ret; 469 rss_queue_arr = rte_malloc("", rxqs_n * sizeof(unsigned int), 0); 470 if (!rss_queue_arr) { 471 DRV_LOG(ERR, "port %u cannot allocate RSS queue list (%u)", 472 dev->data->port_id, rxqs_n); 473 rte_errno = ENOMEM; 474 return -rte_errno; 475 } 476 for (i = 0, j = 0; i < rxqs_n; i++) { 477 struct mlx5_rxq_data *rxq_data; 478 struct mlx5_rxq_ctrl *rxq_ctrl; 479 480 rxq_data = (*priv->rxqs)[i]; 481 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 482 if (rxq_ctrl && rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) 483 rss_queue_arr[j++] = i; 484 } 485 rss_queue_n = j; 486 if (rss_queue_n > priv->config.ind_table_max_size) { 487 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 488 dev->data->port_id, rss_queue_n); 489 rte_errno = EINVAL; 490 rte_free(rss_queue_arr); 491 return -rte_errno; 492 } 493 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 494 dev->data->port_id, priv->rxqs_n, rxqs_n); 495 priv->rxqs_n = rxqs_n; 496 /* 497 * If the requested number of RX queues is not a power of two, 498 * use the maximum indirection table size for better balancing. 499 * The result is always rounded to the next power of two. 500 */ 501 reta_idx_n = (1 << log2above((rss_queue_n & (rss_queue_n - 1)) ? 502 priv->config.ind_table_max_size : 503 rss_queue_n)); 504 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 505 if (ret) { 506 rte_free(rss_queue_arr); 507 return ret; 508 } 509 /* 510 * When the number of RX queues is not a power of two, 511 * the remaining table entries are padded with reused WQs 512 * and hashes are not spread uniformly. 513 */ 514 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 515 (*priv->reta_idx)[i] = rss_queue_arr[j]; 516 if (++j == rss_queue_n) 517 j = 0; 518 } 519 rte_free(rss_queue_arr); 520 return ret; 521 } 522 523 /** 524 * Sets default tuning parameters. 525 * 526 * @param dev 527 * Pointer to Ethernet device. 528 * @param[out] info 529 * Info structure output buffer. 530 */ 531 static void 532 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 533 { 534 struct mlx5_priv *priv = dev->data->dev_private; 535 536 /* Minimum CPU utilization. */ 537 info->default_rxportconf.ring_size = 256; 538 info->default_txportconf.ring_size = 256; 539 info->default_rxportconf.burst_size = MLX5_RX_DEFAULT_BURST; 540 info->default_txportconf.burst_size = MLX5_TX_DEFAULT_BURST; 541 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 542 info->default_rxportconf.nb_queues = 16; 543 info->default_txportconf.nb_queues = 16; 544 if (dev->data->nb_rx_queues > 2 || 545 dev->data->nb_tx_queues > 2) { 546 /* Max Throughput. */ 547 info->default_rxportconf.ring_size = 2048; 548 info->default_txportconf.ring_size = 2048; 549 } 550 } else { 551 info->default_rxportconf.nb_queues = 8; 552 info->default_txportconf.nb_queues = 8; 553 if (dev->data->nb_rx_queues > 2 || 554 dev->data->nb_tx_queues > 2) { 555 /* Max Throughput. */ 556 info->default_rxportconf.ring_size = 4096; 557 info->default_txportconf.ring_size = 4096; 558 } 559 } 560 } 561 562 /** 563 * Sets tx mbuf limiting parameters. 564 * 565 * @param dev 566 * Pointer to Ethernet device. 567 * @param[out] info 568 * Info structure output buffer. 569 */ 570 static void 571 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 572 { 573 struct mlx5_priv *priv = dev->data->dev_private; 574 struct mlx5_dev_config *config = &priv->config; 575 unsigned int inlen; 576 uint16_t nb_max; 577 578 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 579 MLX5_SEND_DEF_INLINE_LEN : 580 (unsigned int)config->txq_inline_max; 581 assert(config->txq_inline_min >= 0); 582 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 583 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 584 MLX5_ESEG_MIN_INLINE_SIZE - 585 MLX5_WQE_CSEG_SIZE - 586 MLX5_WQE_ESEG_SIZE - 587 MLX5_WQE_DSEG_SIZE * 2); 588 nb_max = (MLX5_WQE_SIZE_MAX + 589 MLX5_ESEG_MIN_INLINE_SIZE - 590 MLX5_WQE_CSEG_SIZE - 591 MLX5_WQE_ESEG_SIZE - 592 MLX5_WQE_DSEG_SIZE - 593 inlen) / MLX5_WSEG_SIZE; 594 info->tx_desc_lim.nb_seg_max = nb_max; 595 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 596 } 597 598 /** 599 * DPDK callback to get information about the device. 600 * 601 * @param dev 602 * Pointer to Ethernet device structure. 603 * @param[out] info 604 * Info structure output buffer. 605 */ 606 int 607 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 608 { 609 struct mlx5_priv *priv = dev->data->dev_private; 610 struct mlx5_dev_config *config = &priv->config; 611 unsigned int max; 612 613 /* FIXME: we should ask the device for these values. */ 614 info->min_rx_bufsize = 32; 615 info->max_rx_pktlen = 65536; 616 info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; 617 /* 618 * Since we need one CQ per QP, the limit is the minimum number 619 * between the two values. 620 */ 621 max = RTE_MIN(priv->sh->device_attr.orig_attr.max_cq, 622 priv->sh->device_attr.orig_attr.max_qp); 623 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 624 if (max >= 65535) 625 max = 65535; 626 info->max_rx_queues = max; 627 info->max_tx_queues = max; 628 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 629 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 630 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 631 info->rx_queue_offload_capa); 632 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 633 info->if_index = mlx5_ifindex(dev); 634 info->reta_size = priv->reta_idx_n ? 635 priv->reta_idx_n : config->ind_table_max_size; 636 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 637 info->speed_capa = priv->link_speed_capa; 638 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 639 mlx5_set_default_params(dev, info); 640 mlx5_set_txlimit_params(dev, info); 641 info->switch_info.name = dev->data->name; 642 info->switch_info.domain_id = priv->domain_id; 643 info->switch_info.port_id = priv->representor_id; 644 if (priv->representor) { 645 uint16_t port_id; 646 647 if (priv->pf_bond >= 0) { 648 /* 649 * Switch port ID is opaque value with driver defined 650 * format. Push the PF index in bonding configurations 651 * in upper four bits of port ID. If we get too many 652 * representors (more than 4K) or PFs (more than 15) 653 * this approach must be reconsidered. 654 */ 655 if ((info->switch_info.port_id >> 656 MLX5_PORT_ID_BONDING_PF_SHIFT) || 657 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 658 DRV_LOG(ERR, "can't update switch port ID" 659 " for bonding device"); 660 assert(false); 661 return -ENODEV; 662 } 663 info->switch_info.port_id |= 664 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 665 } 666 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 667 struct mlx5_priv *opriv = 668 rte_eth_devices[port_id].data->dev_private; 669 670 if (!opriv || 671 opriv->representor || 672 opriv->sh != priv->sh || 673 opriv->domain_id != priv->domain_id) 674 continue; 675 /* 676 * Override switch name with that of the master 677 * device. 678 */ 679 info->switch_info.name = opriv->dev_data->name; 680 break; 681 } 682 } 683 return 0; 684 } 685 686 /** 687 * Get device current raw clock counter 688 * 689 * @param dev 690 * Pointer to Ethernet device structure. 691 * @param[out] time 692 * Current raw clock counter of the device. 693 * 694 * @return 695 * 0 if the clock has correctly been read 696 * The value of errno in case of error 697 */ 698 int 699 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock) 700 { 701 struct mlx5_priv *priv = dev->data->dev_private; 702 struct ibv_context *ctx = priv->sh->ctx; 703 struct ibv_values_ex values; 704 int err = 0; 705 706 values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK; 707 err = mlx5_glue->query_rt_values_ex(ctx, &values); 708 if (err != 0) { 709 DRV_LOG(WARNING, "Could not query the clock !"); 710 return err; 711 } 712 *clock = values.raw_clock.tv_nsec; 713 return 0; 714 } 715 716 /** 717 * Get firmware version of a device. 718 * 719 * @param dev 720 * Ethernet device port. 721 * @param fw_ver 722 * String output allocated by caller. 723 * @param fw_size 724 * Size of the output string, including terminating null byte. 725 * 726 * @return 727 * 0 on success, or the size of the non truncated string if too big. 728 */ 729 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 730 { 731 struct mlx5_priv *priv = dev->data->dev_private; 732 struct ibv_device_attr *attr = &priv->sh->device_attr.orig_attr; 733 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 734 735 if (fw_size < size) 736 return size; 737 if (fw_ver != NULL) 738 strlcpy(fw_ver, attr->fw_ver, fw_size); 739 return 0; 740 } 741 742 /** 743 * Get supported packet types. 744 * 745 * @param dev 746 * Pointer to Ethernet device structure. 747 * 748 * @return 749 * A pointer to the supported Packet types array. 750 */ 751 const uint32_t * 752 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 753 { 754 static const uint32_t ptypes[] = { 755 /* refers to rxq_cq_to_pkt_type() */ 756 RTE_PTYPE_L2_ETHER, 757 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 758 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 759 RTE_PTYPE_L4_NONFRAG, 760 RTE_PTYPE_L4_FRAG, 761 RTE_PTYPE_L4_TCP, 762 RTE_PTYPE_L4_UDP, 763 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 764 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 765 RTE_PTYPE_INNER_L4_NONFRAG, 766 RTE_PTYPE_INNER_L4_FRAG, 767 RTE_PTYPE_INNER_L4_TCP, 768 RTE_PTYPE_INNER_L4_UDP, 769 RTE_PTYPE_UNKNOWN 770 }; 771 772 if (dev->rx_pkt_burst == mlx5_rx_burst || 773 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 774 dev->rx_pkt_burst == mlx5_rx_burst_vec) 775 return ptypes; 776 return NULL; 777 } 778 779 /** 780 * Retrieve the master device for representor in the same switch domain. 781 * 782 * @param dev 783 * Pointer to representor Ethernet device structure. 784 * 785 * @return 786 * Master device structure on success, NULL otherwise. 787 */ 788 789 static struct rte_eth_dev * 790 mlx5_find_master_dev(struct rte_eth_dev *dev) 791 { 792 struct mlx5_priv *priv; 793 uint16_t port_id; 794 uint16_t domain_id; 795 796 priv = dev->data->dev_private; 797 domain_id = priv->domain_id; 798 assert(priv->representor); 799 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 800 struct mlx5_priv *opriv = 801 rte_eth_devices[port_id].data->dev_private; 802 if (opriv && 803 opriv->master && 804 opriv->domain_id == domain_id && 805 opriv->sh == priv->sh) 806 return &rte_eth_devices[port_id]; 807 } 808 return NULL; 809 } 810 811 /** 812 * DPDK callback to retrieve physical link information. 813 * 814 * @param dev 815 * Pointer to Ethernet device structure. 816 * @param[out] link 817 * Storage for current link status. 818 * 819 * @return 820 * 0 on success, a negative errno value otherwise and rte_errno is set. 821 */ 822 static int 823 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 824 struct rte_eth_link *link) 825 { 826 struct mlx5_priv *priv = dev->data->dev_private; 827 struct ethtool_cmd edata = { 828 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 829 }; 830 struct ifreq ifr; 831 struct rte_eth_link dev_link; 832 int link_speed = 0; 833 int ret; 834 835 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 836 if (ret) { 837 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 838 dev->data->port_id, strerror(rte_errno)); 839 return ret; 840 } 841 dev_link = (struct rte_eth_link) { 842 .link_status = ((ifr.ifr_flags & IFF_UP) && 843 (ifr.ifr_flags & IFF_RUNNING)), 844 }; 845 ifr = (struct ifreq) { 846 .ifr_data = (void *)&edata, 847 }; 848 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 849 if (ret) { 850 if (ret == -ENOTSUP && priv->representor) { 851 struct rte_eth_dev *master; 852 853 /* 854 * For representors we can try to inherit link 855 * settings from the master device. Actually 856 * link settings do not make a lot of sense 857 * for representors due to missing physical 858 * link. The old kernel drivers supported 859 * emulated settings query for representors, 860 * the new ones do not, so we have to add 861 * this code for compatibility issues. 862 */ 863 master = mlx5_find_master_dev(dev); 864 if (master) { 865 ifr = (struct ifreq) { 866 .ifr_data = (void *)&edata, 867 }; 868 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 869 } 870 } 871 if (ret) { 872 DRV_LOG(WARNING, 873 "port %u ioctl(SIOCETHTOOL," 874 " ETHTOOL_GSET) failed: %s", 875 dev->data->port_id, strerror(rte_errno)); 876 return ret; 877 } 878 } 879 link_speed = ethtool_cmd_speed(&edata); 880 if (link_speed == -1) 881 dev_link.link_speed = ETH_SPEED_NUM_NONE; 882 else 883 dev_link.link_speed = link_speed; 884 priv->link_speed_capa = 0; 885 if (edata.supported & SUPPORTED_Autoneg) 886 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 887 if (edata.supported & (SUPPORTED_1000baseT_Full | 888 SUPPORTED_1000baseKX_Full)) 889 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 890 if (edata.supported & SUPPORTED_10000baseKR_Full) 891 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 892 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 893 SUPPORTED_40000baseCR4_Full | 894 SUPPORTED_40000baseSR4_Full | 895 SUPPORTED_40000baseLR4_Full)) 896 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 897 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 898 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 899 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 900 ETH_LINK_SPEED_FIXED); 901 if (((dev_link.link_speed && !dev_link.link_status) || 902 (!dev_link.link_speed && dev_link.link_status))) { 903 rte_errno = EAGAIN; 904 return -rte_errno; 905 } 906 *link = dev_link; 907 return 0; 908 } 909 910 /** 911 * Retrieve physical link information (unlocked version using new ioctl). 912 * 913 * @param dev 914 * Pointer to Ethernet device structure. 915 * @param[out] link 916 * Storage for current link status. 917 * 918 * @return 919 * 0 on success, a negative errno value otherwise and rte_errno is set. 920 */ 921 static int 922 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 923 struct rte_eth_link *link) 924 925 { 926 struct mlx5_priv *priv = dev->data->dev_private; 927 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 928 struct ifreq ifr; 929 struct rte_eth_link dev_link; 930 struct rte_eth_dev *master = NULL; 931 uint64_t sc; 932 int ret; 933 934 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 935 if (ret) { 936 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 937 dev->data->port_id, strerror(rte_errno)); 938 return ret; 939 } 940 dev_link = (struct rte_eth_link) { 941 .link_status = ((ifr.ifr_flags & IFF_UP) && 942 (ifr.ifr_flags & IFF_RUNNING)), 943 }; 944 ifr = (struct ifreq) { 945 .ifr_data = (void *)&gcmd, 946 }; 947 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 948 if (ret) { 949 if (ret == -ENOTSUP && priv->representor) { 950 /* 951 * For representors we can try to inherit link 952 * settings from the master device. Actually 953 * link settings do not make a lot of sense 954 * for representors due to missing physical 955 * link. The old kernel drivers supported 956 * emulated settings query for representors, 957 * the new ones do not, so we have to add 958 * this code for compatibility issues. 959 */ 960 master = mlx5_find_master_dev(dev); 961 if (master) { 962 ifr = (struct ifreq) { 963 .ifr_data = (void *)&gcmd, 964 }; 965 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 966 } 967 } 968 if (ret) { 969 DRV_LOG(DEBUG, 970 "port %u ioctl(SIOCETHTOOL," 971 " ETHTOOL_GLINKSETTINGS) failed: %s", 972 dev->data->port_id, strerror(rte_errno)); 973 return ret; 974 } 975 976 } 977 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 978 979 alignas(struct ethtool_link_settings) 980 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 981 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 982 struct ethtool_link_settings *ecmd = (void *)data; 983 984 *ecmd = gcmd; 985 ifr.ifr_data = (void *)ecmd; 986 ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr); 987 if (ret) { 988 DRV_LOG(DEBUG, 989 "port %u ioctl(SIOCETHTOOL," 990 "ETHTOOL_GLINKSETTINGS) failed: %s", 991 dev->data->port_id, strerror(rte_errno)); 992 return ret; 993 } 994 dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE : 995 ecmd->speed; 996 sc = ecmd->link_mode_masks[0] | 997 ((uint64_t)ecmd->link_mode_masks[1] << 32); 998 priv->link_speed_capa = 0; 999 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 1000 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 1001 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 1002 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 1003 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 1004 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 1005 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 1006 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 1007 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 1008 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 1009 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 1010 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 1011 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 1012 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 1013 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 1014 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 1015 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 1016 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 1017 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 1018 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 1019 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 1020 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 1021 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 1022 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 1023 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 1024 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 1025 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 1026 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 1027 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 1028 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 1029 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 1030 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 1031 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 1032 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 1033 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 1034 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 1035 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 1036 ETH_LINK_SPEED_FIXED); 1037 if (((dev_link.link_speed && !dev_link.link_status) || 1038 (!dev_link.link_speed && dev_link.link_status))) { 1039 rte_errno = EAGAIN; 1040 return -rte_errno; 1041 } 1042 *link = dev_link; 1043 return 0; 1044 } 1045 1046 /** 1047 * DPDK callback to retrieve physical link information. 1048 * 1049 * @param dev 1050 * Pointer to Ethernet device structure. 1051 * @param wait_to_complete 1052 * Wait for request completion. 1053 * 1054 * @return 1055 * 0 if link status was not updated, positive if it was, a negative errno 1056 * value otherwise and rte_errno is set. 1057 */ 1058 int 1059 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 1060 { 1061 int ret; 1062 struct rte_eth_link dev_link; 1063 time_t start_time = time(NULL); 1064 int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT; 1065 1066 do { 1067 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 1068 if (ret == -ENOTSUP) 1069 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 1070 if (ret == 0) 1071 break; 1072 /* Handle wait to complete situation. */ 1073 if ((wait_to_complete || retry) && ret == -EAGAIN) { 1074 if (abs((int)difftime(time(NULL), start_time)) < 1075 MLX5_LINK_STATUS_TIMEOUT) { 1076 usleep(0); 1077 continue; 1078 } else { 1079 rte_errno = EBUSY; 1080 return -rte_errno; 1081 } 1082 } else if (ret < 0) { 1083 return ret; 1084 } 1085 } while (wait_to_complete || retry-- > 0); 1086 ret = !!memcmp(&dev->data->dev_link, &dev_link, 1087 sizeof(struct rte_eth_link)); 1088 dev->data->dev_link = dev_link; 1089 return ret; 1090 } 1091 1092 /** 1093 * DPDK callback to change the MTU. 1094 * 1095 * @param dev 1096 * Pointer to Ethernet device structure. 1097 * @param in_mtu 1098 * New MTU. 1099 * 1100 * @return 1101 * 0 on success, a negative errno value otherwise and rte_errno is set. 1102 */ 1103 int 1104 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 1105 { 1106 struct mlx5_priv *priv = dev->data->dev_private; 1107 uint16_t kern_mtu = 0; 1108 int ret; 1109 1110 ret = mlx5_get_mtu(dev, &kern_mtu); 1111 if (ret) 1112 return ret; 1113 /* Set kernel interface MTU first. */ 1114 ret = mlx5_set_mtu(dev, mtu); 1115 if (ret) 1116 return ret; 1117 ret = mlx5_get_mtu(dev, &kern_mtu); 1118 if (ret) 1119 return ret; 1120 if (kern_mtu == mtu) { 1121 priv->mtu = mtu; 1122 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 1123 dev->data->port_id, mtu); 1124 return 0; 1125 } 1126 rte_errno = EAGAIN; 1127 return -rte_errno; 1128 } 1129 1130 /** 1131 * DPDK callback to get flow control status. 1132 * 1133 * @param dev 1134 * Pointer to Ethernet device structure. 1135 * @param[out] fc_conf 1136 * Flow control output buffer. 1137 * 1138 * @return 1139 * 0 on success, a negative errno value otherwise and rte_errno is set. 1140 */ 1141 int 1142 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1143 { 1144 struct ifreq ifr; 1145 struct ethtool_pauseparam ethpause = { 1146 .cmd = ETHTOOL_GPAUSEPARAM 1147 }; 1148 int ret; 1149 1150 ifr.ifr_data = (void *)ðpause; 1151 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1152 if (ret) { 1153 DRV_LOG(WARNING, 1154 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 1155 " %s", 1156 dev->data->port_id, strerror(rte_errno)); 1157 return ret; 1158 } 1159 fc_conf->autoneg = ethpause.autoneg; 1160 if (ethpause.rx_pause && ethpause.tx_pause) 1161 fc_conf->mode = RTE_FC_FULL; 1162 else if (ethpause.rx_pause) 1163 fc_conf->mode = RTE_FC_RX_PAUSE; 1164 else if (ethpause.tx_pause) 1165 fc_conf->mode = RTE_FC_TX_PAUSE; 1166 else 1167 fc_conf->mode = RTE_FC_NONE; 1168 return 0; 1169 } 1170 1171 /** 1172 * DPDK callback to modify flow control parameters. 1173 * 1174 * @param dev 1175 * Pointer to Ethernet device structure. 1176 * @param[in] fc_conf 1177 * Flow control parameters. 1178 * 1179 * @return 1180 * 0 on success, a negative errno value otherwise and rte_errno is set. 1181 */ 1182 int 1183 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1184 { 1185 struct ifreq ifr; 1186 struct ethtool_pauseparam ethpause = { 1187 .cmd = ETHTOOL_SPAUSEPARAM 1188 }; 1189 int ret; 1190 1191 ifr.ifr_data = (void *)ðpause; 1192 ethpause.autoneg = fc_conf->autoneg; 1193 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1194 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1195 ethpause.rx_pause = 1; 1196 else 1197 ethpause.rx_pause = 0; 1198 1199 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1200 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1201 ethpause.tx_pause = 1; 1202 else 1203 ethpause.tx_pause = 0; 1204 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1205 if (ret) { 1206 DRV_LOG(WARNING, 1207 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1208 " failed: %s", 1209 dev->data->port_id, strerror(rte_errno)); 1210 return ret; 1211 } 1212 return 0; 1213 } 1214 1215 /** 1216 * Handle asynchronous removal event for entire multiport device. 1217 * 1218 * @param sh 1219 * Infiniband device shared context. 1220 */ 1221 static void 1222 mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) 1223 { 1224 uint32_t i; 1225 1226 for (i = 0; i < sh->max_port; ++i) { 1227 struct rte_eth_dev *dev; 1228 1229 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { 1230 /* 1231 * Or not existing port either no 1232 * handler installed for this port. 1233 */ 1234 continue; 1235 } 1236 dev = &rte_eth_devices[sh->port[i].ih_port_id]; 1237 assert(dev); 1238 if (dev->data->dev_conf.intr_conf.rmv) 1239 _rte_eth_dev_callback_process 1240 (dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1241 } 1242 } 1243 1244 /** 1245 * Handle shared asynchronous events the NIC (removal event 1246 * and link status change). Supports multiport IB device. 1247 * 1248 * @param cb_arg 1249 * Callback argument. 1250 */ 1251 void 1252 mlx5_dev_interrupt_handler(void *cb_arg) 1253 { 1254 struct mlx5_ibv_shared *sh = cb_arg; 1255 struct ibv_async_event event; 1256 1257 /* Read all message from the IB device and acknowledge them. */ 1258 for (;;) { 1259 struct rte_eth_dev *dev; 1260 uint32_t tmp; 1261 1262 if (mlx5_glue->get_async_event(sh->ctx, &event)) 1263 break; 1264 /* Retrieve and check IB port index. */ 1265 tmp = (uint32_t)event.element.port_num; 1266 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) { 1267 /* 1268 * The DEVICE_FATAL event is called once for 1269 * entire device without port specifying. 1270 * We should notify all existing ports. 1271 */ 1272 mlx5_glue->ack_async_event(&event); 1273 mlx5_dev_interrupt_device_fatal(sh); 1274 continue; 1275 } 1276 assert(tmp && (tmp <= sh->max_port)); 1277 if (!tmp) { 1278 /* Unsupported devive level event. */ 1279 mlx5_glue->ack_async_event(&event); 1280 DRV_LOG(DEBUG, 1281 "unsupported common event (type %d)", 1282 event.event_type); 1283 continue; 1284 } 1285 if (tmp > sh->max_port) { 1286 /* Invalid IB port index. */ 1287 mlx5_glue->ack_async_event(&event); 1288 DRV_LOG(DEBUG, 1289 "cannot handle an event (type %d)" 1290 "due to invalid IB port index (%u)", 1291 event.event_type, tmp); 1292 continue; 1293 } 1294 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { 1295 /* No handler installed. */ 1296 mlx5_glue->ack_async_event(&event); 1297 DRV_LOG(DEBUG, 1298 "cannot handle an event (type %d)" 1299 "due to no handler installed for port %u", 1300 event.event_type, tmp); 1301 continue; 1302 } 1303 /* Retrieve ethernet device descriptor. */ 1304 tmp = sh->port[tmp - 1].ih_port_id; 1305 dev = &rte_eth_devices[tmp]; 1306 assert(dev); 1307 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1308 event.event_type == IBV_EVENT_PORT_ERR) && 1309 dev->data->dev_conf.intr_conf.lsc) { 1310 mlx5_glue->ack_async_event(&event); 1311 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1312 usleep(0); 1313 continue; 1314 } 1315 _rte_eth_dev_callback_process 1316 (dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1317 continue; 1318 } 1319 DRV_LOG(DEBUG, 1320 "port %u cannot handle an unknown event (type %d)", 1321 dev->data->port_id, event.event_type); 1322 mlx5_glue->ack_async_event(&event); 1323 } 1324 } 1325 1326 /* 1327 * Unregister callback handler safely. The handler may be active 1328 * while we are trying to unregister it, in this case code -EAGAIN 1329 * is returned by rte_intr_callback_unregister(). This routine checks 1330 * the return code and tries to unregister handler again. 1331 * 1332 * @param handle 1333 * interrupt handle 1334 * @param cb_fn 1335 * pointer to callback routine 1336 * @cb_arg 1337 * opaque callback parameter 1338 */ 1339 void 1340 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle, 1341 rte_intr_callback_fn cb_fn, void *cb_arg) 1342 { 1343 /* 1344 * Try to reduce timeout management overhead by not calling 1345 * the timer related routines on the first iteration. If the 1346 * unregistering succeeds on first call there will be no 1347 * timer calls at all. 1348 */ 1349 uint64_t twait = 0; 1350 uint64_t start = 0; 1351 1352 do { 1353 int ret; 1354 1355 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg); 1356 if (ret >= 0) 1357 return; 1358 if (ret != -EAGAIN) { 1359 DRV_LOG(INFO, "failed to unregister interrupt" 1360 " handler (error: %d)", ret); 1361 assert(false); 1362 return; 1363 } 1364 if (twait) { 1365 struct timespec onems; 1366 1367 /* Wait one millisecond and try again. */ 1368 onems.tv_sec = 0; 1369 onems.tv_nsec = NS_PER_S / MS_PER_S; 1370 nanosleep(&onems, 0); 1371 /* Check whether one second elapsed. */ 1372 if ((rte_get_timer_cycles() - start) <= twait) 1373 continue; 1374 } else { 1375 /* 1376 * We get the amount of timer ticks for one second. 1377 * If this amount elapsed it means we spent one 1378 * second in waiting. This branch is executed once 1379 * on first iteration. 1380 */ 1381 twait = rte_get_timer_hz(); 1382 assert(twait); 1383 } 1384 /* 1385 * Timeout elapsed, show message (once a second) and retry. 1386 * We have no other acceptable option here, if we ignore 1387 * the unregistering return code the handler will not 1388 * be unregistered, fd will be closed and we may get the 1389 * crush. Hanging and messaging in the loop seems not to be 1390 * the worst choice. 1391 */ 1392 DRV_LOG(INFO, "Retrying to unregister interrupt handler"); 1393 start = rte_get_timer_cycles(); 1394 } while (true); 1395 } 1396 1397 /** 1398 * Handle DEVX interrupts from the NIC. 1399 * This function is probably called from the DPDK host thread. 1400 * 1401 * @param cb_arg 1402 * Callback argument. 1403 */ 1404 void 1405 mlx5_dev_interrupt_handler_devx(void *cb_arg) 1406 { 1407 #ifndef HAVE_IBV_DEVX_ASYNC 1408 (void)cb_arg; 1409 return; 1410 #else 1411 struct mlx5_ibv_shared *sh = cb_arg; 1412 union { 1413 struct mlx5dv_devx_async_cmd_hdr cmd_resp; 1414 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) + 1415 MLX5_ST_SZ_BYTES(traffic_counter) + 1416 sizeof(struct mlx5dv_devx_async_cmd_hdr)]; 1417 } out; 1418 uint8_t *buf = out.buf + sizeof(out.cmd_resp); 1419 1420 while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp, 1421 &out.cmd_resp, 1422 sizeof(out.buf))) 1423 mlx5_flow_async_pool_query_handle 1424 (sh, (uint64_t)out.cmd_resp.wr_id, 1425 mlx5_devx_get_out_command_status(buf)); 1426 #endif /* HAVE_IBV_DEVX_ASYNC */ 1427 } 1428 1429 /** 1430 * Uninstall shared asynchronous device events handler. 1431 * This function is implemented to support event sharing 1432 * between multiple ports of single IB device. 1433 * 1434 * @param dev 1435 * Pointer to Ethernet device. 1436 */ 1437 static void 1438 mlx5_dev_shared_handler_uninstall(struct rte_eth_dev *dev) 1439 { 1440 struct mlx5_priv *priv = dev->data->dev_private; 1441 struct mlx5_ibv_shared *sh = priv->sh; 1442 1443 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1444 return; 1445 pthread_mutex_lock(&sh->intr_mutex); 1446 assert(priv->ibv_port); 1447 assert(priv->ibv_port <= sh->max_port); 1448 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1449 if (sh->port[priv->ibv_port - 1].ih_port_id >= RTE_MAX_ETHPORTS) 1450 goto exit; 1451 assert(sh->port[priv->ibv_port - 1].ih_port_id == 1452 (uint32_t)dev->data->port_id); 1453 assert(sh->intr_cnt); 1454 sh->port[priv->ibv_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1455 if (!sh->intr_cnt || --sh->intr_cnt) 1456 goto exit; 1457 mlx5_intr_callback_unregister(&sh->intr_handle, 1458 mlx5_dev_interrupt_handler, sh); 1459 sh->intr_handle.fd = 0; 1460 sh->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1461 exit: 1462 pthread_mutex_unlock(&sh->intr_mutex); 1463 } 1464 1465 /** 1466 * Uninstall devx shared asynchronous device events handler. 1467 * This function is implemeted to support event sharing 1468 * between multiple ports of single IB device. 1469 * 1470 * @param dev 1471 * Pointer to Ethernet device. 1472 */ 1473 static void 1474 mlx5_dev_shared_handler_devx_uninstall(struct rte_eth_dev *dev) 1475 { 1476 struct mlx5_priv *priv = dev->data->dev_private; 1477 struct mlx5_ibv_shared *sh = priv->sh; 1478 1479 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1480 return; 1481 pthread_mutex_lock(&sh->intr_mutex); 1482 assert(priv->ibv_port); 1483 assert(priv->ibv_port <= sh->max_port); 1484 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1485 if (sh->port[priv->ibv_port - 1].devx_ih_port_id >= RTE_MAX_ETHPORTS) 1486 goto exit; 1487 assert(sh->port[priv->ibv_port - 1].devx_ih_port_id == 1488 (uint32_t)dev->data->port_id); 1489 sh->port[priv->ibv_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1490 if (!sh->devx_intr_cnt || --sh->devx_intr_cnt) 1491 goto exit; 1492 if (sh->intr_handle_devx.fd) { 1493 rte_intr_callback_unregister(&sh->intr_handle_devx, 1494 mlx5_dev_interrupt_handler_devx, 1495 sh); 1496 sh->intr_handle_devx.fd = 0; 1497 sh->intr_handle_devx.type = RTE_INTR_HANDLE_UNKNOWN; 1498 } 1499 if (sh->devx_comp) { 1500 mlx5_glue->devx_destroy_cmd_comp(sh->devx_comp); 1501 sh->devx_comp = NULL; 1502 } 1503 exit: 1504 pthread_mutex_unlock(&sh->intr_mutex); 1505 } 1506 1507 /** 1508 * Install shared asynchronous device events handler. 1509 * This function is implemented to support event sharing 1510 * between multiple ports of single IB device. 1511 * 1512 * @param dev 1513 * Pointer to Ethernet device. 1514 */ 1515 static void 1516 mlx5_dev_shared_handler_install(struct rte_eth_dev *dev) 1517 { 1518 struct mlx5_priv *priv = dev->data->dev_private; 1519 struct mlx5_ibv_shared *sh = priv->sh; 1520 int ret; 1521 int flags; 1522 1523 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1524 return; 1525 pthread_mutex_lock(&sh->intr_mutex); 1526 assert(priv->ibv_port); 1527 assert(priv->ibv_port <= sh->max_port); 1528 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1529 if (sh->port[priv->ibv_port - 1].ih_port_id < RTE_MAX_ETHPORTS) { 1530 /* The handler is already installed for this port. */ 1531 assert(sh->intr_cnt); 1532 goto exit; 1533 } 1534 if (sh->intr_cnt) { 1535 sh->port[priv->ibv_port - 1].ih_port_id = 1536 (uint32_t)dev->data->port_id; 1537 sh->intr_cnt++; 1538 goto exit; 1539 } 1540 /* No shared handler installed. */ 1541 assert(sh->ctx->async_fd > 0); 1542 flags = fcntl(sh->ctx->async_fd, F_GETFL); 1543 ret = fcntl(sh->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1544 if (ret) { 1545 DRV_LOG(INFO, "failed to change file descriptor async event" 1546 " queue"); 1547 /* Indicate there will be no interrupts. */ 1548 dev->data->dev_conf.intr_conf.lsc = 0; 1549 dev->data->dev_conf.intr_conf.rmv = 0; 1550 } else { 1551 sh->intr_handle.fd = sh->ctx->async_fd; 1552 sh->intr_handle.type = RTE_INTR_HANDLE_EXT; 1553 rte_intr_callback_register(&sh->intr_handle, 1554 mlx5_dev_interrupt_handler, sh); 1555 sh->intr_cnt++; 1556 sh->port[priv->ibv_port - 1].ih_port_id = 1557 (uint32_t)dev->data->port_id; 1558 } 1559 exit: 1560 pthread_mutex_unlock(&sh->intr_mutex); 1561 } 1562 1563 /** 1564 * Install devx shared asyncronous device events handler. 1565 * This function is implemeted to support event sharing 1566 * between multiple ports of single IB device. 1567 * 1568 * @param dev 1569 * Pointer to Ethernet device. 1570 */ 1571 static void 1572 mlx5_dev_shared_handler_devx_install(struct rte_eth_dev *dev) 1573 { 1574 struct mlx5_priv *priv = dev->data->dev_private; 1575 struct mlx5_ibv_shared *sh = priv->sh; 1576 1577 if (rte_eal_process_type() != RTE_PROC_PRIMARY) 1578 return; 1579 pthread_mutex_lock(&sh->intr_mutex); 1580 assert(priv->ibv_port); 1581 assert(priv->ibv_port <= sh->max_port); 1582 assert(dev->data->port_id < RTE_MAX_ETHPORTS); 1583 if (sh->port[priv->ibv_port - 1].devx_ih_port_id < RTE_MAX_ETHPORTS) { 1584 /* The handler is already installed for this port. */ 1585 assert(sh->devx_intr_cnt); 1586 goto exit; 1587 } 1588 if (sh->devx_intr_cnt) { 1589 sh->devx_intr_cnt++; 1590 sh->port[priv->ibv_port - 1].devx_ih_port_id = 1591 (uint32_t)dev->data->port_id; 1592 goto exit; 1593 } 1594 if (priv->config.devx) { 1595 #ifndef HAVE_IBV_DEVX_ASYNC 1596 goto exit; 1597 #else 1598 sh->devx_comp = mlx5_glue->devx_create_cmd_comp(sh->ctx); 1599 if (sh->devx_comp) { 1600 int flags = fcntl(sh->devx_comp->fd, F_GETFL); 1601 int ret = fcntl(sh->devx_comp->fd, F_SETFL, 1602 flags | O_NONBLOCK); 1603 1604 if (ret) { 1605 DRV_LOG(INFO, "failed to change file descriptor" 1606 " devx async event queue"); 1607 } else { 1608 sh->intr_handle_devx.fd = sh->devx_comp->fd; 1609 sh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT; 1610 rte_intr_callback_register 1611 (&sh->intr_handle_devx, 1612 mlx5_dev_interrupt_handler_devx, sh); 1613 sh->devx_intr_cnt++; 1614 sh->port[priv->ibv_port - 1].devx_ih_port_id = 1615 (uint32_t)dev->data->port_id; 1616 } 1617 } 1618 #endif /* HAVE_IBV_DEVX_ASYNC */ 1619 } 1620 exit: 1621 pthread_mutex_unlock(&sh->intr_mutex); 1622 } 1623 1624 /** 1625 * Uninstall interrupt handler. 1626 * 1627 * @param dev 1628 * Pointer to Ethernet device. 1629 */ 1630 void 1631 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1632 { 1633 mlx5_dev_shared_handler_uninstall(dev); 1634 } 1635 1636 /** 1637 * Install interrupt handler. 1638 * 1639 * @param dev 1640 * Pointer to Ethernet device. 1641 */ 1642 void 1643 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1644 { 1645 mlx5_dev_shared_handler_install(dev); 1646 } 1647 1648 /** 1649 * Devx uninstall interrupt handler. 1650 * 1651 * @param dev 1652 * Pointer to Ethernet device. 1653 */ 1654 void 1655 mlx5_dev_interrupt_handler_devx_uninstall(struct rte_eth_dev *dev) 1656 { 1657 mlx5_dev_shared_handler_devx_uninstall(dev); 1658 } 1659 1660 /** 1661 * Devx install interrupt handler. 1662 * 1663 * @param dev 1664 * Pointer to Ethernet device. 1665 */ 1666 void 1667 mlx5_dev_interrupt_handler_devx_install(struct rte_eth_dev *dev) 1668 { 1669 mlx5_dev_shared_handler_devx_install(dev); 1670 } 1671 1672 /** 1673 * DPDK callback to bring the link DOWN. 1674 * 1675 * @param dev 1676 * Pointer to Ethernet device structure. 1677 * 1678 * @return 1679 * 0 on success, a negative errno value otherwise and rte_errno is set. 1680 */ 1681 int 1682 mlx5_set_link_down(struct rte_eth_dev *dev) 1683 { 1684 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1685 } 1686 1687 /** 1688 * DPDK callback to bring the link UP. 1689 * 1690 * @param dev 1691 * Pointer to Ethernet device structure. 1692 * 1693 * @return 1694 * 0 on success, a negative errno value otherwise and rte_errno is set. 1695 */ 1696 int 1697 mlx5_set_link_up(struct rte_eth_dev *dev) 1698 { 1699 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1700 } 1701 1702 /** 1703 * Configure the RX function to use. 1704 * 1705 * @param dev 1706 * Pointer to private data structure. 1707 * 1708 * @return 1709 * Pointer to selected Rx burst function. 1710 */ 1711 eth_rx_burst_t 1712 mlx5_select_rx_function(struct rte_eth_dev *dev) 1713 { 1714 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1715 1716 assert(dev != NULL); 1717 if (mlx5_check_vec_rx_support(dev) > 0) { 1718 rx_pkt_burst = mlx5_rx_burst_vec; 1719 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1720 dev->data->port_id); 1721 } else if (mlx5_mprq_enabled(dev)) { 1722 rx_pkt_burst = mlx5_rx_burst_mprq; 1723 } 1724 return rx_pkt_burst; 1725 } 1726 1727 /** 1728 * Check if mlx5 device was removed. 1729 * 1730 * @param dev 1731 * Pointer to Ethernet device structure. 1732 * 1733 * @return 1734 * 1 when device is removed, otherwise 0. 1735 */ 1736 int 1737 mlx5_is_removed(struct rte_eth_dev *dev) 1738 { 1739 struct ibv_device_attr device_attr; 1740 struct mlx5_priv *priv = dev->data->dev_private; 1741 1742 if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO) 1743 return 1; 1744 return 0; 1745 } 1746 1747 /** 1748 * Get the E-Switch parameters by port id. 1749 * 1750 * @param[in] port 1751 * Device port id. 1752 * @param[in] valid 1753 * Device port id is valid, skip check. This flag is useful 1754 * when trials are performed from probing and device is not 1755 * flagged as valid yet (in attaching process). 1756 * @param[out] es_domain_id 1757 * E-Switch domain id. 1758 * @param[out] es_port_id 1759 * The port id of the port in the E-Switch. 1760 * 1761 * @return 1762 * pointer to device private data structure containing data needed 1763 * on success, NULL otherwise and rte_errno is set. 1764 */ 1765 struct mlx5_priv * 1766 mlx5_port_to_eswitch_info(uint16_t port, bool valid) 1767 { 1768 struct rte_eth_dev *dev; 1769 struct mlx5_priv *priv; 1770 1771 if (port >= RTE_MAX_ETHPORTS) { 1772 rte_errno = EINVAL; 1773 return NULL; 1774 } 1775 if (!valid && !rte_eth_dev_is_valid_port(port)) { 1776 rte_errno = ENODEV; 1777 return NULL; 1778 } 1779 dev = &rte_eth_devices[port]; 1780 priv = dev->data->dev_private; 1781 if (!(priv->representor || priv->master)) { 1782 rte_errno = EINVAL; 1783 return NULL; 1784 } 1785 return priv; 1786 } 1787 1788 /** 1789 * Get the E-Switch parameters by device instance. 1790 * 1791 * @param[in] port 1792 * Device port id. 1793 * @param[out] es_domain_id 1794 * E-Switch domain id. 1795 * @param[out] es_port_id 1796 * The port id of the port in the E-Switch. 1797 * 1798 * @return 1799 * pointer to device private data structure containing data needed 1800 * on success, NULL otherwise and rte_errno is set. 1801 */ 1802 struct mlx5_priv * 1803 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 1804 { 1805 struct mlx5_priv *priv; 1806 1807 priv = dev->data->dev_private; 1808 if (!(priv->representor || priv->master)) { 1809 rte_errno = EINVAL; 1810 return NULL; 1811 } 1812 return priv; 1813 } 1814 1815 /** 1816 * Get switch information associated with network interface. 1817 * 1818 * @param ifindex 1819 * Network interface index. 1820 * @param[out] info 1821 * Switch information object, populated in case of success. 1822 * 1823 * @return 1824 * 0 on success, a negative errno value otherwise and rte_errno is set. 1825 */ 1826 int 1827 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1828 { 1829 char ifname[IF_NAMESIZE]; 1830 char port_name[IF_NAMESIZE]; 1831 FILE *file; 1832 struct mlx5_switch_info data = { 1833 .master = 0, 1834 .representor = 0, 1835 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1836 .port_name = 0, 1837 .switch_id = 0, 1838 }; 1839 DIR *dir; 1840 bool port_switch_id_set = false; 1841 bool device_dir = false; 1842 char c; 1843 int ret; 1844 1845 if (!if_indextoname(ifindex, ifname)) { 1846 rte_errno = errno; 1847 return -rte_errno; 1848 } 1849 1850 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1851 ifname); 1852 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1853 ifname); 1854 MKSTR(pci_device, "/sys/class/net/%s/device", 1855 ifname); 1856 1857 file = fopen(phys_port_name, "rb"); 1858 if (file != NULL) { 1859 ret = fscanf(file, "%s", port_name); 1860 fclose(file); 1861 if (ret == 1) 1862 mlx5_translate_port_name(port_name, &data); 1863 } 1864 file = fopen(phys_switch_id, "rb"); 1865 if (file == NULL) { 1866 rte_errno = errno; 1867 return -rte_errno; 1868 } 1869 port_switch_id_set = 1870 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1871 c == '\n'; 1872 fclose(file); 1873 dir = opendir(pci_device); 1874 if (dir != NULL) { 1875 closedir(dir); 1876 device_dir = true; 1877 } 1878 if (port_switch_id_set) { 1879 /* We have some E-Switch configuration. */ 1880 mlx5_sysfs_check_switch_info(device_dir, &data); 1881 } 1882 *info = data; 1883 assert(!(data.master && data.representor)); 1884 if (data.master && data.representor) { 1885 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1886 " and as representor", ifindex); 1887 rte_errno = ENODEV; 1888 return -rte_errno; 1889 } 1890 return 0; 1891 } 1892 1893 /** 1894 * Analyze gathered port parameters via Netlink to recognize master 1895 * and representor devices for E-Switch configuration. 1896 * 1897 * @param[in] num_vf_set 1898 * flag of presence of number of VFs port attribute. 1899 * @param[inout] switch_info 1900 * Port information, including port name as a number and port name 1901 * type if recognized 1902 * 1903 * @return 1904 * master and representor flags are set in switch_info according to 1905 * recognized parameters (if any). 1906 */ 1907 void 1908 mlx5_nl_check_switch_info(bool num_vf_set, 1909 struct mlx5_switch_info *switch_info) 1910 { 1911 switch (switch_info->name_type) { 1912 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1913 /* 1914 * Name is not recognized, assume the master, 1915 * check the number of VFs key presence. 1916 */ 1917 switch_info->master = num_vf_set; 1918 break; 1919 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1920 /* 1921 * Name is not set, this assumes the legacy naming 1922 * schema for master, just check if there is a 1923 * number of VFs key. 1924 */ 1925 switch_info->master = num_vf_set; 1926 break; 1927 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1928 /* New uplink naming schema recognized. */ 1929 switch_info->master = 1; 1930 break; 1931 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1932 /* Legacy representors naming schema. */ 1933 switch_info->representor = !num_vf_set; 1934 break; 1935 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1936 /* New representors naming schema. */ 1937 switch_info->representor = 1; 1938 break; 1939 } 1940 } 1941 1942 /** 1943 * Analyze gathered port parameters via sysfs to recognize master 1944 * and representor devices for E-Switch configuration. 1945 * 1946 * @param[in] device_dir 1947 * flag of presence of "device" directory under port device key. 1948 * @param[inout] switch_info 1949 * Port information, including port name as a number and port name 1950 * type if recognized 1951 * 1952 * @return 1953 * master and representor flags are set in switch_info according to 1954 * recognized parameters (if any). 1955 */ 1956 void 1957 mlx5_sysfs_check_switch_info(bool device_dir, 1958 struct mlx5_switch_info *switch_info) 1959 { 1960 switch (switch_info->name_type) { 1961 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 1962 /* 1963 * Name is not recognized, assume the master, 1964 * check the device directory presence. 1965 */ 1966 switch_info->master = device_dir; 1967 break; 1968 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1969 /* 1970 * Name is not set, this assumes the legacy naming 1971 * schema for master, just check if there is 1972 * a device directory. 1973 */ 1974 switch_info->master = device_dir; 1975 break; 1976 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1977 /* New uplink naming schema recognized. */ 1978 switch_info->master = 1; 1979 break; 1980 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1981 /* Legacy representors naming schema. */ 1982 switch_info->representor = !device_dir; 1983 break; 1984 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1985 /* New representors naming schema. */ 1986 switch_info->representor = 1; 1987 break; 1988 } 1989 } 1990 1991 /** 1992 * Extract port name, as a number, from sysfs or netlink information. 1993 * 1994 * @param[in] port_name_in 1995 * String representing the port name. 1996 * @param[out] port_info_out 1997 * Port information, including port name as a number and port name 1998 * type if recognized 1999 * 2000 * @return 2001 * port_name field set according to recognized name format. 2002 */ 2003 void 2004 mlx5_translate_port_name(const char *port_name_in, 2005 struct mlx5_switch_info *port_info_out) 2006 { 2007 char pf_c1, pf_c2, vf_c1, vf_c2; 2008 char *end; 2009 int sc_items; 2010 2011 /* 2012 * Check for port-name as a string of the form pf0vf0 2013 * (support kernel ver >= 5.0 or OFED ver >= 4.6). 2014 */ 2015 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", 2016 &pf_c1, &pf_c2, &port_info_out->pf_num, 2017 &vf_c1, &vf_c2, &port_info_out->port_name); 2018 if (sc_items == 6 && 2019 pf_c1 == 'p' && pf_c2 == 'f' && 2020 vf_c1 == 'v' && vf_c2 == 'f') { 2021 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; 2022 return; 2023 } 2024 /* 2025 * Check for port-name as a string of the form p0 2026 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 2027 */ 2028 sc_items = sscanf(port_name_in, "%c%d", 2029 &pf_c1, &port_info_out->port_name); 2030 if (sc_items == 2 && pf_c1 == 'p') { 2031 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 2032 return; 2033 } 2034 /* Check for port-name as a number (support kernel ver < 5.0 */ 2035 errno = 0; 2036 port_info_out->port_name = strtol(port_name_in, &end, 0); 2037 if (!errno && 2038 (size_t)(end - port_name_in) == strlen(port_name_in)) { 2039 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 2040 return; 2041 } 2042 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 2043 return; 2044 } 2045 2046 /** 2047 * DPDK callback to retrieve plug-in module EEPROM information (type and size). 2048 * 2049 * @param dev 2050 * Pointer to Ethernet device structure. 2051 * @param[out] modinfo 2052 * Storage for plug-in module EEPROM information. 2053 * 2054 * @return 2055 * 0 on success, a negative errno value otherwise and rte_errno is set. 2056 */ 2057 int 2058 mlx5_get_module_info(struct rte_eth_dev *dev, 2059 struct rte_eth_dev_module_info *modinfo) 2060 { 2061 struct ethtool_modinfo info = { 2062 .cmd = ETHTOOL_GMODULEINFO, 2063 }; 2064 struct ifreq ifr = (struct ifreq) { 2065 .ifr_data = (void *)&info, 2066 }; 2067 int ret = 0; 2068 2069 if (!dev || !modinfo) { 2070 DRV_LOG(WARNING, "missing argument, cannot get module info"); 2071 rte_errno = EINVAL; 2072 return -rte_errno; 2073 } 2074 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2075 if (ret) { 2076 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 2077 dev->data->port_id, strerror(rte_errno)); 2078 return ret; 2079 } 2080 modinfo->type = info.type; 2081 modinfo->eeprom_len = info.eeprom_len; 2082 return ret; 2083 } 2084 2085 /** 2086 * DPDK callback to retrieve plug-in module EEPROM data. 2087 * 2088 * @param dev 2089 * Pointer to Ethernet device structure. 2090 * @param[out] info 2091 * Storage for plug-in module EEPROM data. 2092 * 2093 * @return 2094 * 0 on success, a negative errno value otherwise and rte_errno is set. 2095 */ 2096 int mlx5_get_module_eeprom(struct rte_eth_dev *dev, 2097 struct rte_dev_eeprom_info *info) 2098 { 2099 struct ethtool_eeprom *eeprom; 2100 struct ifreq ifr; 2101 int ret = 0; 2102 2103 if (!dev || !info) { 2104 DRV_LOG(WARNING, "missing argument, cannot get module eeprom"); 2105 rte_errno = EINVAL; 2106 return -rte_errno; 2107 } 2108 eeprom = rte_calloc(__func__, 1, 2109 (sizeof(struct ethtool_eeprom) + info->length), 0); 2110 if (!eeprom) { 2111 DRV_LOG(WARNING, "port %u cannot allocate memory for " 2112 "eeprom data", dev->data->port_id); 2113 rte_errno = ENOMEM; 2114 return -rte_errno; 2115 } 2116 eeprom->cmd = ETHTOOL_GMODULEEEPROM; 2117 eeprom->offset = info->offset; 2118 eeprom->len = info->length; 2119 ifr = (struct ifreq) { 2120 .ifr_data = (void *)eeprom, 2121 }; 2122 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 2123 if (ret) 2124 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 2125 dev->data->port_id, strerror(rte_errno)); 2126 else 2127 rte_memcpy(info->data, eeprom->data, info->length); 2128 rte_free(eeprom); 2129 return ret; 2130 } 2131 2132 /** 2133 * DPDK callback to retrieve hairpin capabilities. 2134 * 2135 * @param dev 2136 * Pointer to Ethernet device structure. 2137 * @param[out] cap 2138 * Storage for hairpin capability data. 2139 * 2140 * @return 2141 * 0 on success, a negative errno value otherwise and rte_errno is set. 2142 */ 2143 int mlx5_hairpin_cap_get(struct rte_eth_dev *dev, 2144 struct rte_eth_hairpin_cap *cap) 2145 { 2146 struct mlx5_priv *priv = dev->data->dev_private; 2147 2148 if (priv->sh->devx == 0) { 2149 rte_errno = ENOTSUP; 2150 return -rte_errno; 2151 } 2152 cap->max_nb_queues = UINT16_MAX; 2153 cap->max_rx_2_tx = 1; 2154 cap->max_tx_2_rx = 1; 2155 cap->max_nb_desc = 8192; 2156 return 0; 2157 } 2158