1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 #include <time.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_malloc.h> 36 #include <rte_string_fns.h> 37 #include <rte_rwlock.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_rxtx.h" 42 #include "mlx5_utils.h" 43 44 /* Supported speed values found in /usr/include/linux/ethtool.h */ 45 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 46 #define SUPPORTED_40000baseKR4_Full (1 << 23) 47 #endif 48 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 49 #define SUPPORTED_40000baseCR4_Full (1 << 24) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 52 #define SUPPORTED_40000baseSR4_Full (1 << 25) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 55 #define SUPPORTED_40000baseLR4_Full (1 << 26) 56 #endif 57 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 58 #define SUPPORTED_56000baseKR4_Full (1 << 27) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 61 #define SUPPORTED_56000baseCR4_Full (1 << 28) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 64 #define SUPPORTED_56000baseSR4_Full (1 << 29) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 67 #define SUPPORTED_56000baseLR4_Full (1 << 30) 68 #endif 69 70 /* Add defines in case the running kernel is not the same as user headers. */ 71 #ifndef ETHTOOL_GLINKSETTINGS 72 struct ethtool_link_settings { 73 uint32_t cmd; 74 uint32_t speed; 75 uint8_t duplex; 76 uint8_t port; 77 uint8_t phy_address; 78 uint8_t autoneg; 79 uint8_t mdio_support; 80 uint8_t eth_to_mdix; 81 uint8_t eth_tp_mdix_ctrl; 82 int8_t link_mode_masks_nwords; 83 uint32_t reserved[8]; 84 uint32_t link_mode_masks[]; 85 }; 86 87 #define ETHTOOL_GLINKSETTINGS 0x0000004c 88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 109 #endif 110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 119 #endif 120 121 /** 122 * Get master interface name from private structure. 123 * 124 * @param[in] dev 125 * Pointer to Ethernet device. 126 * @param[out] ifname 127 * Interface name output buffer. 128 * 129 * @return 130 * 0 on success, a negative errno value otherwise and rte_errno is set. 131 */ 132 int 133 mlx5_get_master_ifname(const struct rte_eth_dev *dev, 134 char (*ifname)[IF_NAMESIZE]) 135 { 136 struct priv *priv = dev->data->dev_private; 137 DIR *dir; 138 struct dirent *dent; 139 unsigned int dev_type = 0; 140 unsigned int dev_port_prev = ~0u; 141 char match[IF_NAMESIZE] = ""; 142 143 { 144 MKSTR(path, "%s/device/net", priv->ibdev_path); 145 146 dir = opendir(path); 147 if (dir == NULL) { 148 rte_errno = errno; 149 return -rte_errno; 150 } 151 } 152 while ((dent = readdir(dir)) != NULL) { 153 char *name = dent->d_name; 154 FILE *file; 155 unsigned int dev_port; 156 int r; 157 158 if ((name[0] == '.') && 159 ((name[1] == '\0') || 160 ((name[1] == '.') && (name[2] == '\0')))) 161 continue; 162 163 MKSTR(path, "%s/device/net/%s/%s", 164 priv->ibdev_path, name, 165 (dev_type ? "dev_id" : "dev_port")); 166 167 file = fopen(path, "rb"); 168 if (file == NULL) { 169 if (errno != ENOENT) 170 continue; 171 /* 172 * Switch to dev_id when dev_port does not exist as 173 * is the case with Linux kernel versions < 3.15. 174 */ 175 try_dev_id: 176 match[0] = '\0'; 177 if (dev_type) 178 break; 179 dev_type = 1; 180 dev_port_prev = ~0u; 181 rewinddir(dir); 182 continue; 183 } 184 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 185 fclose(file); 186 if (r != 1) 187 continue; 188 /* 189 * Switch to dev_id when dev_port returns the same value for 190 * all ports. May happen when using a MOFED release older than 191 * 3.0 with a Linux kernel >= 3.15. 192 */ 193 if (dev_port == dev_port_prev) 194 goto try_dev_id; 195 dev_port_prev = dev_port; 196 if (dev_port == 0) 197 strlcpy(match, name, sizeof(match)); 198 } 199 closedir(dir); 200 if (match[0] == '\0') { 201 rte_errno = ENOENT; 202 return -rte_errno; 203 } 204 strncpy(*ifname, match, sizeof(*ifname)); 205 return 0; 206 } 207 208 /** 209 * Get interface name from private structure. 210 * 211 * This is a port representor-aware version of mlx5_get_master_ifname(). 212 * 213 * @param[in] dev 214 * Pointer to Ethernet device. 215 * @param[out] ifname 216 * Interface name output buffer. 217 * 218 * @return 219 * 0 on success, a negative errno value otherwise and rte_errno is set. 220 */ 221 int 222 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 223 { 224 struct priv *priv = dev->data->dev_private; 225 unsigned int ifindex = 226 priv->nl_socket_rdma >= 0 ? 227 mlx5_nl_ifindex(priv->nl_socket_rdma, priv->ibdev_name) : 0; 228 229 if (!ifindex) { 230 if (!priv->representor) 231 return mlx5_get_master_ifname(dev, ifname); 232 rte_errno = ENXIO; 233 return -rte_errno; 234 } 235 if (if_indextoname(ifindex, &(*ifname)[0])) 236 return 0; 237 rte_errno = errno; 238 return -rte_errno; 239 } 240 241 /** 242 * Get the interface index from device name. 243 * 244 * @param[in] dev 245 * Pointer to Ethernet device. 246 * 247 * @return 248 * Nonzero interface index on success, zero otherwise and rte_errno is set. 249 */ 250 unsigned int 251 mlx5_ifindex(const struct rte_eth_dev *dev) 252 { 253 char ifname[IF_NAMESIZE]; 254 unsigned int ifindex; 255 256 if (mlx5_get_ifname(dev, &ifname)) 257 return 0; 258 ifindex = if_nametoindex(ifname); 259 if (!ifindex) 260 rte_errno = errno; 261 return ifindex; 262 } 263 264 /** 265 * Perform ifreq ioctl() on associated Ethernet device. 266 * 267 * @param[in] dev 268 * Pointer to Ethernet device. 269 * @param req 270 * Request number to pass to ioctl(). 271 * @param[out] ifr 272 * Interface request structure output buffer. 273 * @param master 274 * When device is a port representor, perform request on master device 275 * instead. 276 * 277 * @return 278 * 0 on success, a negative errno value otherwise and rte_errno is set. 279 */ 280 int 281 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr, 282 int master) 283 { 284 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 285 int ret = 0; 286 287 if (sock == -1) { 288 rte_errno = errno; 289 return -rte_errno; 290 } 291 if (master) 292 ret = mlx5_get_master_ifname(dev, &ifr->ifr_name); 293 else 294 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 295 if (ret) 296 goto error; 297 ret = ioctl(sock, req, ifr); 298 if (ret == -1) { 299 rte_errno = errno; 300 goto error; 301 } 302 close(sock); 303 return 0; 304 error: 305 close(sock); 306 return -rte_errno; 307 } 308 309 /** 310 * Get device MTU. 311 * 312 * @param dev 313 * Pointer to Ethernet device. 314 * @param[out] mtu 315 * MTU value output buffer. 316 * 317 * @return 318 * 0 on success, a negative errno value otherwise and rte_errno is set. 319 */ 320 int 321 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 322 { 323 struct ifreq request; 324 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request, 0); 325 326 if (ret) 327 return ret; 328 *mtu = request.ifr_mtu; 329 return 0; 330 } 331 332 /** 333 * Set device MTU. 334 * 335 * @param dev 336 * Pointer to Ethernet device. 337 * @param mtu 338 * MTU value to set. 339 * 340 * @return 341 * 0 on success, a negative errno value otherwise and rte_errno is set. 342 */ 343 static int 344 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 345 { 346 struct ifreq request = { .ifr_mtu = mtu, }; 347 348 return mlx5_ifreq(dev, SIOCSIFMTU, &request, 0); 349 } 350 351 /** 352 * Set device flags. 353 * 354 * @param dev 355 * Pointer to Ethernet device. 356 * @param keep 357 * Bitmask for flags that must remain untouched. 358 * @param flags 359 * Bitmask for flags to modify. 360 * 361 * @return 362 * 0 on success, a negative errno value otherwise and rte_errno is set. 363 */ 364 int 365 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 366 { 367 struct ifreq request; 368 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request, 0); 369 370 if (ret) 371 return ret; 372 request.ifr_flags &= keep; 373 request.ifr_flags |= flags & ~keep; 374 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request, 0); 375 } 376 377 /** 378 * DPDK callback for Ethernet device configuration. 379 * 380 * @param dev 381 * Pointer to Ethernet device structure. 382 * 383 * @return 384 * 0 on success, a negative errno value otherwise and rte_errno is set. 385 */ 386 int 387 mlx5_dev_configure(struct rte_eth_dev *dev) 388 { 389 struct priv *priv = dev->data->dev_private; 390 unsigned int rxqs_n = dev->data->nb_rx_queues; 391 unsigned int txqs_n = dev->data->nb_tx_queues; 392 unsigned int i; 393 unsigned int j; 394 unsigned int reta_idx_n; 395 const uint8_t use_app_rss_key = 396 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 397 int ret = 0; 398 399 if (use_app_rss_key && 400 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 401 MLX5_RSS_HASH_KEY_LEN)) { 402 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 403 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 404 rte_errno = EINVAL; 405 return -rte_errno; 406 } 407 priv->rss_conf.rss_key = 408 rte_realloc(priv->rss_conf.rss_key, 409 MLX5_RSS_HASH_KEY_LEN, 0); 410 if (!priv->rss_conf.rss_key) { 411 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 412 dev->data->port_id, rxqs_n); 413 rte_errno = ENOMEM; 414 return -rte_errno; 415 } 416 memcpy(priv->rss_conf.rss_key, 417 use_app_rss_key ? 418 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 419 rss_hash_default_key, 420 MLX5_RSS_HASH_KEY_LEN); 421 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 422 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 423 priv->rxqs = (void *)dev->data->rx_queues; 424 priv->txqs = (void *)dev->data->tx_queues; 425 if (txqs_n != priv->txqs_n) { 426 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 427 dev->data->port_id, priv->txqs_n, txqs_n); 428 priv->txqs_n = txqs_n; 429 } 430 if (rxqs_n > priv->config.ind_table_max_size) { 431 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 432 dev->data->port_id, rxqs_n); 433 rte_errno = EINVAL; 434 return -rte_errno; 435 } 436 if (rxqs_n == priv->rxqs_n) 437 return 0; 438 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 439 dev->data->port_id, priv->rxqs_n, rxqs_n); 440 priv->rxqs_n = rxqs_n; 441 /* If the requested number of RX queues is not a power of two, use the 442 * maximum indirection table size for better balancing. 443 * The result is always rounded to the next power of two. */ 444 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 445 priv->config.ind_table_max_size : 446 rxqs_n)); 447 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 448 if (ret) 449 return ret; 450 /* When the number of RX queues is not a power of two, the remaining 451 * table entries are padded with reused WQs and hashes are not spread 452 * uniformly. */ 453 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 454 (*priv->reta_idx)[i] = j; 455 if (++j == rxqs_n) 456 j = 0; 457 } 458 return 0; 459 } 460 461 /** 462 * Sets default tuning parameters. 463 * 464 * @param dev 465 * Pointer to Ethernet device. 466 * @param[out] info 467 * Info structure output buffer. 468 */ 469 static void 470 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 471 { 472 struct priv *priv = dev->data->dev_private; 473 474 /* Minimum CPU utilization. */ 475 info->default_rxportconf.ring_size = 256; 476 info->default_txportconf.ring_size = 256; 477 info->default_rxportconf.burst_size = 64; 478 info->default_txportconf.burst_size = 64; 479 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 480 info->default_rxportconf.nb_queues = 16; 481 info->default_txportconf.nb_queues = 16; 482 if (dev->data->nb_rx_queues > 2 || 483 dev->data->nb_tx_queues > 2) { 484 /* Max Throughput. */ 485 info->default_rxportconf.ring_size = 2048; 486 info->default_txportconf.ring_size = 2048; 487 } 488 } else { 489 info->default_rxportconf.nb_queues = 8; 490 info->default_txportconf.nb_queues = 8; 491 if (dev->data->nb_rx_queues > 2 || 492 dev->data->nb_tx_queues > 2) { 493 /* Max Throughput. */ 494 info->default_rxportconf.ring_size = 4096; 495 info->default_txportconf.ring_size = 4096; 496 } 497 } 498 } 499 500 /** 501 * DPDK callback to get information about the device. 502 * 503 * @param dev 504 * Pointer to Ethernet device structure. 505 * @param[out] info 506 * Info structure output buffer. 507 */ 508 void 509 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 510 { 511 struct priv *priv = dev->data->dev_private; 512 struct mlx5_dev_config *config = &priv->config; 513 unsigned int max; 514 char ifname[IF_NAMESIZE]; 515 516 /* FIXME: we should ask the device for these values. */ 517 info->min_rx_bufsize = 32; 518 info->max_rx_pktlen = 65536; 519 /* 520 * Since we need one CQ per QP, the limit is the minimum number 521 * between the two values. 522 */ 523 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 524 priv->device_attr.orig_attr.max_qp); 525 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 526 if (max >= 65535) 527 max = 65535; 528 info->max_rx_queues = max; 529 info->max_tx_queues = max; 530 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 531 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 532 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 533 info->rx_queue_offload_capa); 534 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 535 if (mlx5_get_ifname(dev, &ifname) == 0) 536 info->if_index = if_nametoindex(ifname); 537 info->reta_size = priv->reta_idx_n ? 538 priv->reta_idx_n : config->ind_table_max_size; 539 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 540 info->speed_capa = priv->link_speed_capa; 541 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 542 mlx5_set_default_params(dev, info); 543 info->switch_info.name = dev->data->name; 544 info->switch_info.domain_id = priv->domain_id; 545 info->switch_info.port_id = priv->representor_id; 546 if (priv->representor) { 547 unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0); 548 uint16_t port_id[i]; 549 550 i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i), i); 551 while (i--) { 552 struct priv *opriv = 553 rte_eth_devices[port_id[i]].data->dev_private; 554 555 if (!opriv || 556 opriv->representor || 557 opriv->domain_id != priv->domain_id) 558 continue; 559 /* 560 * Override switch name with that of the master 561 * device. 562 */ 563 info->switch_info.name = opriv->dev_data->name; 564 break; 565 } 566 } 567 } 568 569 /** 570 * Get supported packet types. 571 * 572 * @param dev 573 * Pointer to Ethernet device structure. 574 * 575 * @return 576 * A pointer to the supported Packet types array. 577 */ 578 const uint32_t * 579 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 580 { 581 static const uint32_t ptypes[] = { 582 /* refers to rxq_cq_to_pkt_type() */ 583 RTE_PTYPE_L2_ETHER, 584 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 585 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 586 RTE_PTYPE_L4_NONFRAG, 587 RTE_PTYPE_L4_FRAG, 588 RTE_PTYPE_L4_TCP, 589 RTE_PTYPE_L4_UDP, 590 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 591 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 592 RTE_PTYPE_INNER_L4_NONFRAG, 593 RTE_PTYPE_INNER_L4_FRAG, 594 RTE_PTYPE_INNER_L4_TCP, 595 RTE_PTYPE_INNER_L4_UDP, 596 RTE_PTYPE_UNKNOWN 597 }; 598 599 if (dev->rx_pkt_burst == mlx5_rx_burst || 600 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 601 dev->rx_pkt_burst == mlx5_rx_burst_vec) 602 return ptypes; 603 return NULL; 604 } 605 606 /** 607 * DPDK callback to retrieve physical link information. 608 * 609 * @param dev 610 * Pointer to Ethernet device structure. 611 * @param[out] link 612 * Storage for current link status. 613 * 614 * @return 615 * 0 on success, a negative errno value otherwise and rte_errno is set. 616 */ 617 static int 618 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 619 struct rte_eth_link *link) 620 { 621 struct priv *priv = dev->data->dev_private; 622 struct ethtool_cmd edata = { 623 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 624 }; 625 struct ifreq ifr; 626 struct rte_eth_link dev_link; 627 int link_speed = 0; 628 int ret; 629 630 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 0); 631 if (ret) { 632 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 633 dev->data->port_id, strerror(rte_errno)); 634 return ret; 635 } 636 dev_link = (struct rte_eth_link) { 637 .link_status = ((ifr.ifr_flags & IFF_UP) && 638 (ifr.ifr_flags & IFF_RUNNING)), 639 }; 640 ifr = (struct ifreq) { 641 .ifr_data = (void *)&edata, 642 }; 643 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1); 644 if (ret) { 645 DRV_LOG(WARNING, 646 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 647 dev->data->port_id, strerror(rte_errno)); 648 return ret; 649 } 650 link_speed = ethtool_cmd_speed(&edata); 651 if (link_speed == -1) 652 dev_link.link_speed = ETH_SPEED_NUM_NONE; 653 else 654 dev_link.link_speed = link_speed; 655 priv->link_speed_capa = 0; 656 if (edata.supported & SUPPORTED_Autoneg) 657 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 658 if (edata.supported & (SUPPORTED_1000baseT_Full | 659 SUPPORTED_1000baseKX_Full)) 660 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 661 if (edata.supported & SUPPORTED_10000baseKR_Full) 662 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 663 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 664 SUPPORTED_40000baseCR4_Full | 665 SUPPORTED_40000baseSR4_Full | 666 SUPPORTED_40000baseLR4_Full)) 667 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 668 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 669 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 670 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 671 ETH_LINK_SPEED_FIXED); 672 if (!priv->representor && 673 ((dev_link.link_speed && !dev_link.link_status) || 674 (!dev_link.link_speed && dev_link.link_status))) { 675 rte_errno = EAGAIN; 676 return -rte_errno; 677 } 678 *link = dev_link; 679 return 0; 680 } 681 682 /** 683 * Retrieve physical link information (unlocked version using new ioctl). 684 * 685 * @param dev 686 * Pointer to Ethernet device structure. 687 * @param[out] link 688 * Storage for current link status. 689 * 690 * @return 691 * 0 on success, a negative errno value otherwise and rte_errno is set. 692 */ 693 static int 694 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 695 struct rte_eth_link *link) 696 697 { 698 struct priv *priv = dev->data->dev_private; 699 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 700 struct ifreq ifr; 701 struct rte_eth_link dev_link; 702 uint64_t sc; 703 int ret; 704 705 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 0); 706 if (ret) { 707 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 708 dev->data->port_id, strerror(rte_errno)); 709 return ret; 710 } 711 dev_link = (struct rte_eth_link) { 712 .link_status = ((ifr.ifr_flags & IFF_UP) && 713 (ifr.ifr_flags & IFF_RUNNING)), 714 }; 715 ifr = (struct ifreq) { 716 .ifr_data = (void *)&gcmd, 717 }; 718 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1); 719 if (ret) { 720 DRV_LOG(DEBUG, 721 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 722 " failed: %s", 723 dev->data->port_id, strerror(rte_errno)); 724 return ret; 725 } 726 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 727 728 alignas(struct ethtool_link_settings) 729 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 730 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 731 struct ethtool_link_settings *ecmd = (void *)data; 732 733 *ecmd = gcmd; 734 ifr.ifr_data = (void *)ecmd; 735 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1); 736 if (ret) { 737 DRV_LOG(DEBUG, 738 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 739 " failed: %s", 740 dev->data->port_id, strerror(rte_errno)); 741 return ret; 742 } 743 dev_link.link_speed = ecmd->speed; 744 sc = ecmd->link_mode_masks[0] | 745 ((uint64_t)ecmd->link_mode_masks[1] << 32); 746 priv->link_speed_capa = 0; 747 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 748 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 749 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 750 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 751 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 752 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 753 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 754 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 755 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 756 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 757 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 758 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 759 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 760 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 761 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 762 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 763 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 764 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 765 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 766 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 767 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 768 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 769 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 770 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 771 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 772 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 773 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 774 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 775 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 776 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 777 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 778 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 779 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 780 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 781 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 782 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 783 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 784 ETH_LINK_SPEED_FIXED); 785 if (!priv->representor && 786 ((dev_link.link_speed && !dev_link.link_status) || 787 (!dev_link.link_speed && dev_link.link_status))) { 788 rte_errno = EAGAIN; 789 return -rte_errno; 790 } 791 *link = dev_link; 792 return 0; 793 } 794 795 /** 796 * DPDK callback to retrieve physical link information. 797 * 798 * @param dev 799 * Pointer to Ethernet device structure. 800 * @param wait_to_complete 801 * Wait for request completion. 802 * 803 * @return 804 * 0 if link status was not updated, positive if it was, a negative errno 805 * value otherwise and rte_errno is set. 806 */ 807 int 808 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 809 { 810 int ret; 811 struct rte_eth_link dev_link; 812 time_t start_time = time(NULL); 813 814 do { 815 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 816 if (ret) 817 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 818 if (ret == 0) 819 break; 820 /* Handle wait to complete situation. */ 821 if (wait_to_complete && ret == -EAGAIN) { 822 if (abs((int)difftime(time(NULL), start_time)) < 823 MLX5_LINK_STATUS_TIMEOUT) { 824 usleep(0); 825 continue; 826 } else { 827 rte_errno = EBUSY; 828 return -rte_errno; 829 } 830 } else if (ret < 0) { 831 return ret; 832 } 833 } while (wait_to_complete); 834 ret = !!memcmp(&dev->data->dev_link, &dev_link, 835 sizeof(struct rte_eth_link)); 836 dev->data->dev_link = dev_link; 837 return ret; 838 } 839 840 /** 841 * DPDK callback to change the MTU. 842 * 843 * @param dev 844 * Pointer to Ethernet device structure. 845 * @param in_mtu 846 * New MTU. 847 * 848 * @return 849 * 0 on success, a negative errno value otherwise and rte_errno is set. 850 */ 851 int 852 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 853 { 854 struct priv *priv = dev->data->dev_private; 855 uint16_t kern_mtu = 0; 856 int ret; 857 858 ret = mlx5_get_mtu(dev, &kern_mtu); 859 if (ret) 860 return ret; 861 /* Set kernel interface MTU first. */ 862 ret = mlx5_set_mtu(dev, mtu); 863 if (ret) 864 return ret; 865 ret = mlx5_get_mtu(dev, &kern_mtu); 866 if (ret) 867 return ret; 868 if (kern_mtu == mtu) { 869 priv->mtu = mtu; 870 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 871 dev->data->port_id, mtu); 872 return 0; 873 } 874 rte_errno = EAGAIN; 875 return -rte_errno; 876 } 877 878 /** 879 * DPDK callback to get flow control status. 880 * 881 * @param dev 882 * Pointer to Ethernet device structure. 883 * @param[out] fc_conf 884 * Flow control output buffer. 885 * 886 * @return 887 * 0 on success, a negative errno value otherwise and rte_errno is set. 888 */ 889 int 890 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 891 { 892 struct ifreq ifr; 893 struct ethtool_pauseparam ethpause = { 894 .cmd = ETHTOOL_GPAUSEPARAM 895 }; 896 int ret; 897 898 ifr.ifr_data = (void *)ðpause; 899 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1); 900 if (ret) { 901 DRV_LOG(WARNING, 902 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 903 " %s", 904 dev->data->port_id, strerror(rte_errno)); 905 return ret; 906 } 907 fc_conf->autoneg = ethpause.autoneg; 908 if (ethpause.rx_pause && ethpause.tx_pause) 909 fc_conf->mode = RTE_FC_FULL; 910 else if (ethpause.rx_pause) 911 fc_conf->mode = RTE_FC_RX_PAUSE; 912 else if (ethpause.tx_pause) 913 fc_conf->mode = RTE_FC_TX_PAUSE; 914 else 915 fc_conf->mode = RTE_FC_NONE; 916 return 0; 917 } 918 919 /** 920 * DPDK callback to modify flow control parameters. 921 * 922 * @param dev 923 * Pointer to Ethernet device structure. 924 * @param[in] fc_conf 925 * Flow control parameters. 926 * 927 * @return 928 * 0 on success, a negative errno value otherwise and rte_errno is set. 929 */ 930 int 931 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 932 { 933 struct ifreq ifr; 934 struct ethtool_pauseparam ethpause = { 935 .cmd = ETHTOOL_SPAUSEPARAM 936 }; 937 int ret; 938 939 ifr.ifr_data = (void *)ðpause; 940 ethpause.autoneg = fc_conf->autoneg; 941 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 942 (fc_conf->mode & RTE_FC_RX_PAUSE)) 943 ethpause.rx_pause = 1; 944 else 945 ethpause.rx_pause = 0; 946 947 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 948 (fc_conf->mode & RTE_FC_TX_PAUSE)) 949 ethpause.tx_pause = 1; 950 else 951 ethpause.tx_pause = 0; 952 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 0); 953 if (ret) { 954 DRV_LOG(WARNING, 955 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 956 " failed: %s", 957 dev->data->port_id, strerror(rte_errno)); 958 return ret; 959 } 960 return 0; 961 } 962 963 /** 964 * Get PCI information from struct ibv_device. 965 * 966 * @param device 967 * Pointer to Ethernet device structure. 968 * @param[out] pci_addr 969 * PCI bus address output buffer. 970 * 971 * @return 972 * 0 on success, a negative errno value otherwise and rte_errno is set. 973 */ 974 int 975 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 976 struct rte_pci_addr *pci_addr) 977 { 978 FILE *file; 979 char line[32]; 980 MKSTR(path, "%s/device/uevent", device->ibdev_path); 981 982 file = fopen(path, "rb"); 983 if (file == NULL) { 984 rte_errno = errno; 985 return -rte_errno; 986 } 987 while (fgets(line, sizeof(line), file) == line) { 988 size_t len = strlen(line); 989 int ret; 990 991 /* Truncate long lines. */ 992 if (len == (sizeof(line) - 1)) 993 while (line[(len - 1)] != '\n') { 994 ret = fgetc(file); 995 if (ret == EOF) 996 break; 997 line[(len - 1)] = ret; 998 } 999 /* Extract information. */ 1000 if (sscanf(line, 1001 "PCI_SLOT_NAME=" 1002 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1003 &pci_addr->domain, 1004 &pci_addr->bus, 1005 &pci_addr->devid, 1006 &pci_addr->function) == 4) { 1007 ret = 0; 1008 break; 1009 } 1010 } 1011 fclose(file); 1012 return 0; 1013 } 1014 1015 /** 1016 * Device status handler. 1017 * 1018 * @param dev 1019 * Pointer to Ethernet device. 1020 * @param events 1021 * Pointer to event flags holder. 1022 * 1023 * @return 1024 * Events bitmap of callback process which can be called immediately. 1025 */ 1026 static uint32_t 1027 mlx5_dev_status_handler(struct rte_eth_dev *dev) 1028 { 1029 struct priv *priv = dev->data->dev_private; 1030 struct ibv_async_event event; 1031 uint32_t ret = 0; 1032 1033 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1034 usleep(0); 1035 return 0; 1036 } 1037 /* Read all message and acknowledge them. */ 1038 for (;;) { 1039 if (mlx5_glue->get_async_event(priv->ctx, &event)) 1040 break; 1041 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1042 event.event_type == IBV_EVENT_PORT_ERR) && 1043 (dev->data->dev_conf.intr_conf.lsc == 1)) 1044 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 1045 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 1046 dev->data->dev_conf.intr_conf.rmv == 1) 1047 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 1048 else 1049 DRV_LOG(DEBUG, 1050 "port %u event type %d on not handled", 1051 dev->data->port_id, event.event_type); 1052 mlx5_glue->ack_async_event(&event); 1053 } 1054 return ret; 1055 } 1056 1057 /** 1058 * Handle interrupts from the NIC. 1059 * 1060 * @param[in] intr_handle 1061 * Interrupt handler. 1062 * @param cb_arg 1063 * Callback argument. 1064 */ 1065 void 1066 mlx5_dev_interrupt_handler(void *cb_arg) 1067 { 1068 struct rte_eth_dev *dev = cb_arg; 1069 uint32_t events; 1070 1071 events = mlx5_dev_status_handler(dev); 1072 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 1073 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1074 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 1075 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1076 } 1077 1078 /** 1079 * Handle interrupts from the socket. 1080 * 1081 * @param cb_arg 1082 * Callback argument. 1083 */ 1084 static void 1085 mlx5_dev_handler_socket(void *cb_arg) 1086 { 1087 struct rte_eth_dev *dev = cb_arg; 1088 1089 mlx5_socket_handle(dev); 1090 } 1091 1092 /** 1093 * Uninstall interrupt handler. 1094 * 1095 * @param dev 1096 * Pointer to Ethernet device. 1097 */ 1098 void 1099 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1100 { 1101 struct priv *priv = dev->data->dev_private; 1102 1103 if (dev->data->dev_conf.intr_conf.lsc || 1104 dev->data->dev_conf.intr_conf.rmv) 1105 rte_intr_callback_unregister(&priv->intr_handle, 1106 mlx5_dev_interrupt_handler, dev); 1107 if (priv->primary_socket) 1108 rte_intr_callback_unregister(&priv->intr_handle_socket, 1109 mlx5_dev_handler_socket, dev); 1110 priv->intr_handle.fd = 0; 1111 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1112 priv->intr_handle_socket.fd = 0; 1113 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1114 } 1115 1116 /** 1117 * Install interrupt handler. 1118 * 1119 * @param dev 1120 * Pointer to Ethernet device. 1121 */ 1122 void 1123 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1124 { 1125 struct priv *priv = dev->data->dev_private; 1126 int ret; 1127 int flags; 1128 1129 assert(priv->ctx->async_fd > 0); 1130 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1131 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1132 if (ret) { 1133 DRV_LOG(INFO, 1134 "port %u failed to change file descriptor async event" 1135 " queue", 1136 dev->data->port_id); 1137 dev->data->dev_conf.intr_conf.lsc = 0; 1138 dev->data->dev_conf.intr_conf.rmv = 0; 1139 } 1140 if (dev->data->dev_conf.intr_conf.lsc || 1141 dev->data->dev_conf.intr_conf.rmv) { 1142 priv->intr_handle.fd = priv->ctx->async_fd; 1143 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1144 rte_intr_callback_register(&priv->intr_handle, 1145 mlx5_dev_interrupt_handler, dev); 1146 } 1147 ret = mlx5_socket_init(dev); 1148 if (ret) 1149 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1150 dev->data->port_id, strerror(rte_errno)); 1151 else if (priv->primary_socket) { 1152 priv->intr_handle_socket.fd = priv->primary_socket; 1153 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1154 rte_intr_callback_register(&priv->intr_handle_socket, 1155 mlx5_dev_handler_socket, dev); 1156 } 1157 } 1158 1159 /** 1160 * DPDK callback to bring the link DOWN. 1161 * 1162 * @param dev 1163 * Pointer to Ethernet device structure. 1164 * 1165 * @return 1166 * 0 on success, a negative errno value otherwise and rte_errno is set. 1167 */ 1168 int 1169 mlx5_set_link_down(struct rte_eth_dev *dev) 1170 { 1171 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1172 } 1173 1174 /** 1175 * DPDK callback to bring the link UP. 1176 * 1177 * @param dev 1178 * Pointer to Ethernet device structure. 1179 * 1180 * @return 1181 * 0 on success, a negative errno value otherwise and rte_errno is set. 1182 */ 1183 int 1184 mlx5_set_link_up(struct rte_eth_dev *dev) 1185 { 1186 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1187 } 1188 1189 /** 1190 * Configure the TX function to use. 1191 * 1192 * @param dev 1193 * Pointer to private data structure. 1194 * 1195 * @return 1196 * Pointer to selected Tx burst function. 1197 */ 1198 eth_tx_burst_t 1199 mlx5_select_tx_function(struct rte_eth_dev *dev) 1200 { 1201 struct priv *priv = dev->data->dev_private; 1202 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1203 struct mlx5_dev_config *config = &priv->config; 1204 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1205 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1206 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1207 DEV_TX_OFFLOAD_GRE_TNL_TSO | 1208 DEV_TX_OFFLOAD_IP_TNL_TSO | 1209 DEV_TX_OFFLOAD_UDP_TNL_TSO)); 1210 int swp = !!(tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 1211 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1212 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)); 1213 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1214 1215 assert(priv != NULL); 1216 /* Select appropriate TX function. */ 1217 if (vlan_insert || tso || swp) 1218 return tx_pkt_burst; 1219 if (config->mps == MLX5_MPW_ENHANCED) { 1220 if (mlx5_check_vec_tx_support(dev) > 0) { 1221 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1222 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1223 else 1224 tx_pkt_burst = mlx5_tx_burst_vec; 1225 DRV_LOG(DEBUG, 1226 "port %u selected enhanced MPW Tx vectorized" 1227 " function", 1228 dev->data->port_id); 1229 } else { 1230 tx_pkt_burst = mlx5_tx_burst_empw; 1231 DRV_LOG(DEBUG, 1232 "port %u selected enhanced MPW Tx function", 1233 dev->data->port_id); 1234 } 1235 } else if (config->mps && (config->txq_inline > 0)) { 1236 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1237 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1238 dev->data->port_id); 1239 } else if (config->mps) { 1240 tx_pkt_burst = mlx5_tx_burst_mpw; 1241 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1242 dev->data->port_id); 1243 } 1244 return tx_pkt_burst; 1245 } 1246 1247 /** 1248 * Configure the RX function to use. 1249 * 1250 * @param dev 1251 * Pointer to private data structure. 1252 * 1253 * @return 1254 * Pointer to selected Rx burst function. 1255 */ 1256 eth_rx_burst_t 1257 mlx5_select_rx_function(struct rte_eth_dev *dev) 1258 { 1259 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1260 1261 assert(dev != NULL); 1262 if (mlx5_check_vec_rx_support(dev) > 0) { 1263 rx_pkt_burst = mlx5_rx_burst_vec; 1264 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1265 dev->data->port_id); 1266 } else if (mlx5_mprq_enabled(dev)) { 1267 rx_pkt_burst = mlx5_rx_burst_mprq; 1268 } 1269 return rx_pkt_burst; 1270 } 1271 1272 /** 1273 * Check if mlx5 device was removed. 1274 * 1275 * @param dev 1276 * Pointer to Ethernet device structure. 1277 * 1278 * @return 1279 * 1 when device is removed, otherwise 0. 1280 */ 1281 int 1282 mlx5_is_removed(struct rte_eth_dev *dev) 1283 { 1284 struct ibv_device_attr device_attr; 1285 struct priv *priv = dev->data->dev_private; 1286 1287 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1288 return 1; 1289 return 0; 1290 } 1291 1292 /** 1293 * Get port ID list of mlx5 instances sharing a common device. 1294 * 1295 * @param[in] dev 1296 * Device to look for. 1297 * @param[out] port_list 1298 * Result buffer for collected port IDs. 1299 * @param port_list_n 1300 * Maximum number of entries in result buffer. If 0, @p port_list can be 1301 * NULL. 1302 * 1303 * @return 1304 * Number of matching instances regardless of the @p port_list_n 1305 * parameter, 0 if none were found. 1306 */ 1307 unsigned int 1308 mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list, 1309 unsigned int port_list_n) 1310 { 1311 uint16_t id; 1312 unsigned int n = 0; 1313 1314 RTE_ETH_FOREACH_DEV(id) { 1315 struct rte_eth_dev *ldev = &rte_eth_devices[id]; 1316 1317 if (!ldev->device || 1318 !ldev->device->driver || 1319 strcmp(ldev->device->driver->name, MLX5_DRIVER_NAME) || 1320 ldev->device != dev) 1321 continue; 1322 if (n < port_list_n) 1323 port_list[n] = id; 1324 n++; 1325 } 1326 return n; 1327 } 1328 1329 /** 1330 * Get switch information associated with network interface. 1331 * 1332 * @param ifindex 1333 * Network interface index. 1334 * @param[out] info 1335 * Switch information object, populated in case of success. 1336 * 1337 * @return 1338 * 0 on success, a negative errno value otherwise and rte_errno is set. 1339 */ 1340 int 1341 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1342 { 1343 char ifname[IF_NAMESIZE]; 1344 FILE *file; 1345 struct mlx5_switch_info data = { .master = 0, }; 1346 bool port_name_set = false; 1347 bool port_switch_id_set = false; 1348 char c; 1349 1350 if (!if_indextoname(ifindex, ifname)) { 1351 rte_errno = errno; 1352 return -rte_errno; 1353 } 1354 1355 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1356 ifname); 1357 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1358 ifname); 1359 1360 file = fopen(phys_port_name, "rb"); 1361 if (file != NULL) { 1362 port_name_set = 1363 fscanf(file, "%d%c", &data.port_name, &c) == 2 && 1364 c == '\n'; 1365 fclose(file); 1366 } 1367 file = fopen(phys_switch_id, "rb"); 1368 if (file == NULL) { 1369 rte_errno = errno; 1370 return -rte_errno; 1371 } 1372 port_switch_id_set = 1373 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1374 c == '\n'; 1375 fclose(file); 1376 data.master = port_switch_id_set && !port_name_set; 1377 data.representor = port_switch_id_set && port_name_set; 1378 *info = data; 1379 return 0; 1380 } 1381