1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 #include <time.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_malloc.h> 36 #include <rte_string_fns.h> 37 #include <rte_rwlock.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_rxtx.h" 42 #include "mlx5_utils.h" 43 44 /* Supported speed values found in /usr/include/linux/ethtool.h */ 45 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 46 #define SUPPORTED_40000baseKR4_Full (1 << 23) 47 #endif 48 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 49 #define SUPPORTED_40000baseCR4_Full (1 << 24) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 52 #define SUPPORTED_40000baseSR4_Full (1 << 25) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 55 #define SUPPORTED_40000baseLR4_Full (1 << 26) 56 #endif 57 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 58 #define SUPPORTED_56000baseKR4_Full (1 << 27) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 61 #define SUPPORTED_56000baseCR4_Full (1 << 28) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 64 #define SUPPORTED_56000baseSR4_Full (1 << 29) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 67 #define SUPPORTED_56000baseLR4_Full (1 << 30) 68 #endif 69 70 /* Add defines in case the running kernel is not the same as user headers. */ 71 #ifndef ETHTOOL_GLINKSETTINGS 72 struct ethtool_link_settings { 73 uint32_t cmd; 74 uint32_t speed; 75 uint8_t duplex; 76 uint8_t port; 77 uint8_t phy_address; 78 uint8_t autoneg; 79 uint8_t mdio_support; 80 uint8_t eth_to_mdix; 81 uint8_t eth_tp_mdix_ctrl; 82 int8_t link_mode_masks_nwords; 83 uint32_t reserved[8]; 84 uint32_t link_mode_masks[]; 85 }; 86 87 #define ETHTOOL_GLINKSETTINGS 0x0000004c 88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 109 #endif 110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 119 #endif 120 121 /** 122 * Get master interface name from private structure. 123 * 124 * @param[in] dev 125 * Pointer to Ethernet device. 126 * @param[out] ifname 127 * Interface name output buffer. 128 * 129 * @return 130 * 0 on success, a negative errno value otherwise and rte_errno is set. 131 */ 132 static int 133 mlx5_get_master_ifname(const struct rte_eth_dev *dev, 134 char (*ifname)[IF_NAMESIZE]) 135 { 136 struct priv *priv = dev->data->dev_private; 137 DIR *dir; 138 struct dirent *dent; 139 unsigned int dev_type = 0; 140 unsigned int dev_port_prev = ~0u; 141 char match[IF_NAMESIZE] = ""; 142 143 { 144 MKSTR(path, "%s/device/net", priv->ibdev_path); 145 146 dir = opendir(path); 147 if (dir == NULL) { 148 rte_errno = errno; 149 return -rte_errno; 150 } 151 } 152 while ((dent = readdir(dir)) != NULL) { 153 char *name = dent->d_name; 154 FILE *file; 155 unsigned int dev_port; 156 int r; 157 158 if ((name[0] == '.') && 159 ((name[1] == '\0') || 160 ((name[1] == '.') && (name[2] == '\0')))) 161 continue; 162 163 MKSTR(path, "%s/device/net/%s/%s", 164 priv->ibdev_path, name, 165 (dev_type ? "dev_id" : "dev_port")); 166 167 file = fopen(path, "rb"); 168 if (file == NULL) { 169 if (errno != ENOENT) 170 continue; 171 /* 172 * Switch to dev_id when dev_port does not exist as 173 * is the case with Linux kernel versions < 3.15. 174 */ 175 try_dev_id: 176 match[0] = '\0'; 177 if (dev_type) 178 break; 179 dev_type = 1; 180 dev_port_prev = ~0u; 181 rewinddir(dir); 182 continue; 183 } 184 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 185 fclose(file); 186 if (r != 1) 187 continue; 188 /* 189 * Switch to dev_id when dev_port returns the same value for 190 * all ports. May happen when using a MOFED release older than 191 * 3.0 with a Linux kernel >= 3.15. 192 */ 193 if (dev_port == dev_port_prev) 194 goto try_dev_id; 195 dev_port_prev = dev_port; 196 if (dev_port == 0) 197 strlcpy(match, name, sizeof(match)); 198 } 199 closedir(dir); 200 if (match[0] == '\0') { 201 rte_errno = ENOENT; 202 return -rte_errno; 203 } 204 strncpy(*ifname, match, sizeof(*ifname)); 205 return 0; 206 } 207 208 /** 209 * Get interface name from private structure. 210 * 211 * This is a port representor-aware version of mlx5_get_master_ifname(). 212 * 213 * @param[in] dev 214 * Pointer to Ethernet device. 215 * @param[out] ifname 216 * Interface name output buffer. 217 * 218 * @return 219 * 0 on success, a negative errno value otherwise and rte_errno is set. 220 */ 221 int 222 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 223 { 224 struct priv *priv = dev->data->dev_private; 225 unsigned int ifindex = 226 priv->nl_socket_rdma >= 0 ? 227 mlx5_nl_ifindex(priv->nl_socket_rdma, priv->ibdev_name) : 0; 228 229 if (!ifindex) { 230 if (!priv->representor) 231 return mlx5_get_master_ifname(dev, ifname); 232 rte_errno = ENXIO; 233 return -rte_errno; 234 } 235 if (if_indextoname(ifindex, &(*ifname)[0])) 236 return 0; 237 rte_errno = errno; 238 return -rte_errno; 239 } 240 241 /** 242 * Get the interface index from device name. 243 * 244 * @param[in] dev 245 * Pointer to Ethernet device. 246 * 247 * @return 248 * Nonzero interface index on success, zero otherwise and rte_errno is set. 249 */ 250 unsigned int 251 mlx5_ifindex(const struct rte_eth_dev *dev) 252 { 253 char ifname[IF_NAMESIZE]; 254 unsigned int ifindex; 255 256 if (mlx5_get_ifname(dev, &ifname)) 257 return 0; 258 ifindex = if_nametoindex(ifname); 259 if (!ifindex) 260 rte_errno = errno; 261 return ifindex; 262 } 263 264 /** 265 * Perform ifreq ioctl() on associated Ethernet device. 266 * 267 * @param[in] dev 268 * Pointer to Ethernet device. 269 * @param req 270 * Request number to pass to ioctl(). 271 * @param[out] ifr 272 * Interface request structure output buffer. 273 * 274 * @return 275 * 0 on success, a negative errno value otherwise and rte_errno is set. 276 */ 277 int 278 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 279 { 280 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 281 int ret = 0; 282 283 if (sock == -1) { 284 rte_errno = errno; 285 return -rte_errno; 286 } 287 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 288 if (ret) 289 goto error; 290 ret = ioctl(sock, req, ifr); 291 if (ret == -1) { 292 rte_errno = errno; 293 goto error; 294 } 295 close(sock); 296 return 0; 297 error: 298 close(sock); 299 return -rte_errno; 300 } 301 302 /** 303 * Get device MTU. 304 * 305 * @param dev 306 * Pointer to Ethernet device. 307 * @param[out] mtu 308 * MTU value output buffer. 309 * 310 * @return 311 * 0 on success, a negative errno value otherwise and rte_errno is set. 312 */ 313 int 314 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 315 { 316 struct ifreq request; 317 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 318 319 if (ret) 320 return ret; 321 *mtu = request.ifr_mtu; 322 return 0; 323 } 324 325 /** 326 * Set device MTU. 327 * 328 * @param dev 329 * Pointer to Ethernet device. 330 * @param mtu 331 * MTU value to set. 332 * 333 * @return 334 * 0 on success, a negative errno value otherwise and rte_errno is set. 335 */ 336 static int 337 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 338 { 339 struct ifreq request = { .ifr_mtu = mtu, }; 340 341 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 342 } 343 344 /** 345 * Set device flags. 346 * 347 * @param dev 348 * Pointer to Ethernet device. 349 * @param keep 350 * Bitmask for flags that must remain untouched. 351 * @param flags 352 * Bitmask for flags to modify. 353 * 354 * @return 355 * 0 on success, a negative errno value otherwise and rte_errno is set. 356 */ 357 int 358 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 359 { 360 struct ifreq request; 361 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 362 363 if (ret) 364 return ret; 365 request.ifr_flags &= keep; 366 request.ifr_flags |= flags & ~keep; 367 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 368 } 369 370 /** 371 * DPDK callback for Ethernet device configuration. 372 * 373 * @param dev 374 * Pointer to Ethernet device structure. 375 * 376 * @return 377 * 0 on success, a negative errno value otherwise and rte_errno is set. 378 */ 379 int 380 mlx5_dev_configure(struct rte_eth_dev *dev) 381 { 382 struct priv *priv = dev->data->dev_private; 383 unsigned int rxqs_n = dev->data->nb_rx_queues; 384 unsigned int txqs_n = dev->data->nb_tx_queues; 385 unsigned int i; 386 unsigned int j; 387 unsigned int reta_idx_n; 388 const uint8_t use_app_rss_key = 389 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 390 int ret = 0; 391 392 if (use_app_rss_key && 393 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 394 MLX5_RSS_HASH_KEY_LEN)) { 395 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 396 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 397 rte_errno = EINVAL; 398 return -rte_errno; 399 } 400 priv->rss_conf.rss_key = 401 rte_realloc(priv->rss_conf.rss_key, 402 MLX5_RSS_HASH_KEY_LEN, 0); 403 if (!priv->rss_conf.rss_key) { 404 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 405 dev->data->port_id, rxqs_n); 406 rte_errno = ENOMEM; 407 return -rte_errno; 408 } 409 memcpy(priv->rss_conf.rss_key, 410 use_app_rss_key ? 411 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 412 rss_hash_default_key, 413 MLX5_RSS_HASH_KEY_LEN); 414 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 415 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 416 priv->rxqs = (void *)dev->data->rx_queues; 417 priv->txqs = (void *)dev->data->tx_queues; 418 if (txqs_n != priv->txqs_n) { 419 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 420 dev->data->port_id, priv->txqs_n, txqs_n); 421 priv->txqs_n = txqs_n; 422 } 423 if (rxqs_n > priv->config.ind_table_max_size) { 424 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 425 dev->data->port_id, rxqs_n); 426 rte_errno = EINVAL; 427 return -rte_errno; 428 } 429 if (rxqs_n == priv->rxqs_n) 430 return 0; 431 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 432 dev->data->port_id, priv->rxqs_n, rxqs_n); 433 priv->rxqs_n = rxqs_n; 434 /* If the requested number of RX queues is not a power of two, use the 435 * maximum indirection table size for better balancing. 436 * The result is always rounded to the next power of two. */ 437 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 438 priv->config.ind_table_max_size : 439 rxqs_n)); 440 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 441 if (ret) 442 return ret; 443 /* When the number of RX queues is not a power of two, the remaining 444 * table entries are padded with reused WQs and hashes are not spread 445 * uniformly. */ 446 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 447 (*priv->reta_idx)[i] = j; 448 if (++j == rxqs_n) 449 j = 0; 450 } 451 return 0; 452 } 453 454 /** 455 * Sets default tuning parameters. 456 * 457 * @param dev 458 * Pointer to Ethernet device. 459 * @param[out] info 460 * Info structure output buffer. 461 */ 462 static void 463 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 464 { 465 struct priv *priv = dev->data->dev_private; 466 467 /* Minimum CPU utilization. */ 468 info->default_rxportconf.ring_size = 256; 469 info->default_txportconf.ring_size = 256; 470 info->default_rxportconf.burst_size = 64; 471 info->default_txportconf.burst_size = 64; 472 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 473 info->default_rxportconf.nb_queues = 16; 474 info->default_txportconf.nb_queues = 16; 475 if (dev->data->nb_rx_queues > 2 || 476 dev->data->nb_tx_queues > 2) { 477 /* Max Throughput. */ 478 info->default_rxportconf.ring_size = 2048; 479 info->default_txportconf.ring_size = 2048; 480 } 481 } else { 482 info->default_rxportconf.nb_queues = 8; 483 info->default_txportconf.nb_queues = 8; 484 if (dev->data->nb_rx_queues > 2 || 485 dev->data->nb_tx_queues > 2) { 486 /* Max Throughput. */ 487 info->default_rxportconf.ring_size = 4096; 488 info->default_txportconf.ring_size = 4096; 489 } 490 } 491 } 492 493 /** 494 * DPDK callback to get information about the device. 495 * 496 * @param dev 497 * Pointer to Ethernet device structure. 498 * @param[out] info 499 * Info structure output buffer. 500 */ 501 void 502 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 503 { 504 struct priv *priv = dev->data->dev_private; 505 struct mlx5_dev_config *config = &priv->config; 506 unsigned int max; 507 char ifname[IF_NAMESIZE]; 508 509 /* FIXME: we should ask the device for these values. */ 510 info->min_rx_bufsize = 32; 511 info->max_rx_pktlen = 65536; 512 /* 513 * Since we need one CQ per QP, the limit is the minimum number 514 * between the two values. 515 */ 516 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 517 priv->device_attr.orig_attr.max_qp); 518 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 519 if (max >= 65535) 520 max = 65535; 521 info->max_rx_queues = max; 522 info->max_tx_queues = max; 523 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 524 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 525 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 526 info->rx_queue_offload_capa); 527 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 528 if (mlx5_get_ifname(dev, &ifname) == 0) 529 info->if_index = if_nametoindex(ifname); 530 info->reta_size = priv->reta_idx_n ? 531 priv->reta_idx_n : config->ind_table_max_size; 532 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 533 info->speed_capa = priv->link_speed_capa; 534 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 535 mlx5_set_default_params(dev, info); 536 info->switch_info.name = dev->data->name; 537 info->switch_info.domain_id = priv->domain_id; 538 info->switch_info.port_id = priv->representor_id; 539 if (priv->representor) { 540 unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0); 541 uint16_t port_id[i]; 542 543 i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i), i); 544 while (i--) { 545 struct priv *opriv = 546 rte_eth_devices[port_id[i]].data->dev_private; 547 548 if (!opriv || 549 opriv->representor || 550 opriv->domain_id != priv->domain_id) 551 continue; 552 /* 553 * Override switch name with that of the master 554 * device. 555 */ 556 info->switch_info.name = opriv->dev_data->name; 557 break; 558 } 559 } 560 } 561 562 /** 563 * Get supported packet types. 564 * 565 * @param dev 566 * Pointer to Ethernet device structure. 567 * 568 * @return 569 * A pointer to the supported Packet types array. 570 */ 571 const uint32_t * 572 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 573 { 574 static const uint32_t ptypes[] = { 575 /* refers to rxq_cq_to_pkt_type() */ 576 RTE_PTYPE_L2_ETHER, 577 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 578 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 579 RTE_PTYPE_L4_NONFRAG, 580 RTE_PTYPE_L4_FRAG, 581 RTE_PTYPE_L4_TCP, 582 RTE_PTYPE_L4_UDP, 583 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 584 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 585 RTE_PTYPE_INNER_L4_NONFRAG, 586 RTE_PTYPE_INNER_L4_FRAG, 587 RTE_PTYPE_INNER_L4_TCP, 588 RTE_PTYPE_INNER_L4_UDP, 589 RTE_PTYPE_UNKNOWN 590 }; 591 592 if (dev->rx_pkt_burst == mlx5_rx_burst || 593 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 594 dev->rx_pkt_burst == mlx5_rx_burst_vec) 595 return ptypes; 596 return NULL; 597 } 598 599 /** 600 * DPDK callback to retrieve physical link information. 601 * 602 * @param dev 603 * Pointer to Ethernet device structure. 604 * @param[out] link 605 * Storage for current link status. 606 * 607 * @return 608 * 0 on success, a negative errno value otherwise and rte_errno is set. 609 */ 610 static int 611 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 612 struct rte_eth_link *link) 613 { 614 struct priv *priv = dev->data->dev_private; 615 struct ethtool_cmd edata = { 616 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 617 }; 618 struct ifreq ifr; 619 struct rte_eth_link dev_link; 620 int link_speed = 0; 621 int ret; 622 623 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 624 if (ret) { 625 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 626 dev->data->port_id, strerror(rte_errno)); 627 return ret; 628 } 629 dev_link = (struct rte_eth_link) { 630 .link_status = ((ifr.ifr_flags & IFF_UP) && 631 (ifr.ifr_flags & IFF_RUNNING)), 632 }; 633 ifr = (struct ifreq) { 634 .ifr_data = (void *)&edata, 635 }; 636 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 637 if (ret) { 638 DRV_LOG(WARNING, 639 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 640 dev->data->port_id, strerror(rte_errno)); 641 return ret; 642 } 643 link_speed = ethtool_cmd_speed(&edata); 644 if (link_speed == -1) 645 dev_link.link_speed = ETH_SPEED_NUM_NONE; 646 else 647 dev_link.link_speed = link_speed; 648 priv->link_speed_capa = 0; 649 if (edata.supported & SUPPORTED_Autoneg) 650 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 651 if (edata.supported & (SUPPORTED_1000baseT_Full | 652 SUPPORTED_1000baseKX_Full)) 653 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 654 if (edata.supported & SUPPORTED_10000baseKR_Full) 655 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 656 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 657 SUPPORTED_40000baseCR4_Full | 658 SUPPORTED_40000baseSR4_Full | 659 SUPPORTED_40000baseLR4_Full)) 660 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 661 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 662 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 663 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 664 ETH_LINK_SPEED_FIXED); 665 if (((dev_link.link_speed && !dev_link.link_status) || 666 (!dev_link.link_speed && dev_link.link_status))) { 667 rte_errno = EAGAIN; 668 return -rte_errno; 669 } 670 *link = dev_link; 671 return 0; 672 } 673 674 /** 675 * Retrieve physical link information (unlocked version using new ioctl). 676 * 677 * @param dev 678 * Pointer to Ethernet device structure. 679 * @param[out] link 680 * Storage for current link status. 681 * 682 * @return 683 * 0 on success, a negative errno value otherwise and rte_errno is set. 684 */ 685 static int 686 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 687 struct rte_eth_link *link) 688 689 { 690 struct priv *priv = dev->data->dev_private; 691 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 692 struct ifreq ifr; 693 struct rte_eth_link dev_link; 694 uint64_t sc; 695 int ret; 696 697 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 698 if (ret) { 699 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 700 dev->data->port_id, strerror(rte_errno)); 701 return ret; 702 } 703 dev_link = (struct rte_eth_link) { 704 .link_status = ((ifr.ifr_flags & IFF_UP) && 705 (ifr.ifr_flags & IFF_RUNNING)), 706 }; 707 ifr = (struct ifreq) { 708 .ifr_data = (void *)&gcmd, 709 }; 710 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 711 if (ret) { 712 DRV_LOG(DEBUG, 713 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 714 " failed: %s", 715 dev->data->port_id, strerror(rte_errno)); 716 return ret; 717 } 718 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 719 720 alignas(struct ethtool_link_settings) 721 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 722 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 723 struct ethtool_link_settings *ecmd = (void *)data; 724 725 *ecmd = gcmd; 726 ifr.ifr_data = (void *)ecmd; 727 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 728 if (ret) { 729 DRV_LOG(DEBUG, 730 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 731 " failed: %s", 732 dev->data->port_id, strerror(rte_errno)); 733 return ret; 734 } 735 dev_link.link_speed = ecmd->speed; 736 sc = ecmd->link_mode_masks[0] | 737 ((uint64_t)ecmd->link_mode_masks[1] << 32); 738 priv->link_speed_capa = 0; 739 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 740 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 741 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 742 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 743 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 744 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 745 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 746 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 747 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 748 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 749 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 750 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 751 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 752 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 753 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 754 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 755 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 756 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 757 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 758 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 759 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 760 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 761 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 762 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 763 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 764 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 765 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 766 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 767 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 768 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 769 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 770 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 771 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 772 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 773 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 774 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 775 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 776 ETH_LINK_SPEED_FIXED); 777 if (((dev_link.link_speed && !dev_link.link_status) || 778 (!dev_link.link_speed && dev_link.link_status))) { 779 rte_errno = EAGAIN; 780 return -rte_errno; 781 } 782 *link = dev_link; 783 return 0; 784 } 785 786 /** 787 * DPDK callback to retrieve physical link information. 788 * 789 * @param dev 790 * Pointer to Ethernet device structure. 791 * @param wait_to_complete 792 * Wait for request completion. 793 * 794 * @return 795 * 0 if link status was not updated, positive if it was, a negative errno 796 * value otherwise and rte_errno is set. 797 */ 798 int 799 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 800 { 801 int ret; 802 struct rte_eth_link dev_link; 803 time_t start_time = time(NULL); 804 805 do { 806 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 807 if (ret) 808 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 809 if (ret == 0) 810 break; 811 /* Handle wait to complete situation. */ 812 if (wait_to_complete && ret == -EAGAIN) { 813 if (abs((int)difftime(time(NULL), start_time)) < 814 MLX5_LINK_STATUS_TIMEOUT) { 815 usleep(0); 816 continue; 817 } else { 818 rte_errno = EBUSY; 819 return -rte_errno; 820 } 821 } else if (ret < 0) { 822 return ret; 823 } 824 } while (wait_to_complete); 825 ret = !!memcmp(&dev->data->dev_link, &dev_link, 826 sizeof(struct rte_eth_link)); 827 dev->data->dev_link = dev_link; 828 return ret; 829 } 830 831 /** 832 * DPDK callback to change the MTU. 833 * 834 * @param dev 835 * Pointer to Ethernet device structure. 836 * @param in_mtu 837 * New MTU. 838 * 839 * @return 840 * 0 on success, a negative errno value otherwise and rte_errno is set. 841 */ 842 int 843 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 844 { 845 struct priv *priv = dev->data->dev_private; 846 uint16_t kern_mtu = 0; 847 int ret; 848 849 ret = mlx5_get_mtu(dev, &kern_mtu); 850 if (ret) 851 return ret; 852 /* Set kernel interface MTU first. */ 853 ret = mlx5_set_mtu(dev, mtu); 854 if (ret) 855 return ret; 856 ret = mlx5_get_mtu(dev, &kern_mtu); 857 if (ret) 858 return ret; 859 if (kern_mtu == mtu) { 860 priv->mtu = mtu; 861 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 862 dev->data->port_id, mtu); 863 return 0; 864 } 865 rte_errno = EAGAIN; 866 return -rte_errno; 867 } 868 869 /** 870 * DPDK callback to get flow control status. 871 * 872 * @param dev 873 * Pointer to Ethernet device structure. 874 * @param[out] fc_conf 875 * Flow control output buffer. 876 * 877 * @return 878 * 0 on success, a negative errno value otherwise and rte_errno is set. 879 */ 880 int 881 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 882 { 883 struct ifreq ifr; 884 struct ethtool_pauseparam ethpause = { 885 .cmd = ETHTOOL_GPAUSEPARAM 886 }; 887 int ret; 888 889 ifr.ifr_data = (void *)ðpause; 890 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 891 if (ret) { 892 DRV_LOG(WARNING, 893 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 894 " %s", 895 dev->data->port_id, strerror(rte_errno)); 896 return ret; 897 } 898 fc_conf->autoneg = ethpause.autoneg; 899 if (ethpause.rx_pause && ethpause.tx_pause) 900 fc_conf->mode = RTE_FC_FULL; 901 else if (ethpause.rx_pause) 902 fc_conf->mode = RTE_FC_RX_PAUSE; 903 else if (ethpause.tx_pause) 904 fc_conf->mode = RTE_FC_TX_PAUSE; 905 else 906 fc_conf->mode = RTE_FC_NONE; 907 return 0; 908 } 909 910 /** 911 * DPDK callback to modify flow control parameters. 912 * 913 * @param dev 914 * Pointer to Ethernet device structure. 915 * @param[in] fc_conf 916 * Flow control parameters. 917 * 918 * @return 919 * 0 on success, a negative errno value otherwise and rte_errno is set. 920 */ 921 int 922 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 923 { 924 struct ifreq ifr; 925 struct ethtool_pauseparam ethpause = { 926 .cmd = ETHTOOL_SPAUSEPARAM 927 }; 928 int ret; 929 930 ifr.ifr_data = (void *)ðpause; 931 ethpause.autoneg = fc_conf->autoneg; 932 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 933 (fc_conf->mode & RTE_FC_RX_PAUSE)) 934 ethpause.rx_pause = 1; 935 else 936 ethpause.rx_pause = 0; 937 938 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 939 (fc_conf->mode & RTE_FC_TX_PAUSE)) 940 ethpause.tx_pause = 1; 941 else 942 ethpause.tx_pause = 0; 943 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 944 if (ret) { 945 DRV_LOG(WARNING, 946 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 947 " failed: %s", 948 dev->data->port_id, strerror(rte_errno)); 949 return ret; 950 } 951 return 0; 952 } 953 954 /** 955 * Get PCI information from struct ibv_device. 956 * 957 * @param device 958 * Pointer to Ethernet device structure. 959 * @param[out] pci_addr 960 * PCI bus address output buffer. 961 * 962 * @return 963 * 0 on success, a negative errno value otherwise and rte_errno is set. 964 */ 965 int 966 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 967 struct rte_pci_addr *pci_addr) 968 { 969 FILE *file; 970 char line[32]; 971 MKSTR(path, "%s/device/uevent", device->ibdev_path); 972 973 file = fopen(path, "rb"); 974 if (file == NULL) { 975 rte_errno = errno; 976 return -rte_errno; 977 } 978 while (fgets(line, sizeof(line), file) == line) { 979 size_t len = strlen(line); 980 int ret; 981 982 /* Truncate long lines. */ 983 if (len == (sizeof(line) - 1)) 984 while (line[(len - 1)] != '\n') { 985 ret = fgetc(file); 986 if (ret == EOF) 987 break; 988 line[(len - 1)] = ret; 989 } 990 /* Extract information. */ 991 if (sscanf(line, 992 "PCI_SLOT_NAME=" 993 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 994 &pci_addr->domain, 995 &pci_addr->bus, 996 &pci_addr->devid, 997 &pci_addr->function) == 4) { 998 ret = 0; 999 break; 1000 } 1001 } 1002 fclose(file); 1003 return 0; 1004 } 1005 1006 /** 1007 * Device status handler. 1008 * 1009 * @param dev 1010 * Pointer to Ethernet device. 1011 * @param events 1012 * Pointer to event flags holder. 1013 * 1014 * @return 1015 * Events bitmap of callback process which can be called immediately. 1016 */ 1017 static uint32_t 1018 mlx5_dev_status_handler(struct rte_eth_dev *dev) 1019 { 1020 struct priv *priv = dev->data->dev_private; 1021 struct ibv_async_event event; 1022 uint32_t ret = 0; 1023 1024 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1025 usleep(0); 1026 return 0; 1027 } 1028 /* Read all message and acknowledge them. */ 1029 for (;;) { 1030 if (mlx5_glue->get_async_event(priv->ctx, &event)) 1031 break; 1032 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1033 event.event_type == IBV_EVENT_PORT_ERR) && 1034 (dev->data->dev_conf.intr_conf.lsc == 1)) 1035 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 1036 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 1037 dev->data->dev_conf.intr_conf.rmv == 1) 1038 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 1039 else 1040 DRV_LOG(DEBUG, 1041 "port %u event type %d on not handled", 1042 dev->data->port_id, event.event_type); 1043 mlx5_glue->ack_async_event(&event); 1044 } 1045 return ret; 1046 } 1047 1048 /** 1049 * Handle interrupts from the NIC. 1050 * 1051 * @param[in] intr_handle 1052 * Interrupt handler. 1053 * @param cb_arg 1054 * Callback argument. 1055 */ 1056 void 1057 mlx5_dev_interrupt_handler(void *cb_arg) 1058 { 1059 struct rte_eth_dev *dev = cb_arg; 1060 uint32_t events; 1061 1062 events = mlx5_dev_status_handler(dev); 1063 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 1064 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1065 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 1066 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1067 } 1068 1069 /** 1070 * Handle interrupts from the socket. 1071 * 1072 * @param cb_arg 1073 * Callback argument. 1074 */ 1075 static void 1076 mlx5_dev_handler_socket(void *cb_arg) 1077 { 1078 struct rte_eth_dev *dev = cb_arg; 1079 1080 mlx5_socket_handle(dev); 1081 } 1082 1083 /** 1084 * Uninstall interrupt handler. 1085 * 1086 * @param dev 1087 * Pointer to Ethernet device. 1088 */ 1089 void 1090 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1091 { 1092 struct priv *priv = dev->data->dev_private; 1093 1094 if (dev->data->dev_conf.intr_conf.lsc || 1095 dev->data->dev_conf.intr_conf.rmv) 1096 rte_intr_callback_unregister(&priv->intr_handle, 1097 mlx5_dev_interrupt_handler, dev); 1098 if (priv->primary_socket) 1099 rte_intr_callback_unregister(&priv->intr_handle_socket, 1100 mlx5_dev_handler_socket, dev); 1101 priv->intr_handle.fd = 0; 1102 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1103 priv->intr_handle_socket.fd = 0; 1104 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1105 } 1106 1107 /** 1108 * Install interrupt handler. 1109 * 1110 * @param dev 1111 * Pointer to Ethernet device. 1112 */ 1113 void 1114 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1115 { 1116 struct priv *priv = dev->data->dev_private; 1117 int ret; 1118 int flags; 1119 1120 assert(priv->ctx->async_fd > 0); 1121 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1122 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1123 if (ret) { 1124 DRV_LOG(INFO, 1125 "port %u failed to change file descriptor async event" 1126 " queue", 1127 dev->data->port_id); 1128 dev->data->dev_conf.intr_conf.lsc = 0; 1129 dev->data->dev_conf.intr_conf.rmv = 0; 1130 } 1131 if (dev->data->dev_conf.intr_conf.lsc || 1132 dev->data->dev_conf.intr_conf.rmv) { 1133 priv->intr_handle.fd = priv->ctx->async_fd; 1134 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1135 rte_intr_callback_register(&priv->intr_handle, 1136 mlx5_dev_interrupt_handler, dev); 1137 } 1138 ret = mlx5_socket_init(dev); 1139 if (ret) 1140 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1141 dev->data->port_id, strerror(rte_errno)); 1142 else if (priv->primary_socket) { 1143 priv->intr_handle_socket.fd = priv->primary_socket; 1144 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1145 rte_intr_callback_register(&priv->intr_handle_socket, 1146 mlx5_dev_handler_socket, dev); 1147 } 1148 } 1149 1150 /** 1151 * DPDK callback to bring the link DOWN. 1152 * 1153 * @param dev 1154 * Pointer to Ethernet device structure. 1155 * 1156 * @return 1157 * 0 on success, a negative errno value otherwise and rte_errno is set. 1158 */ 1159 int 1160 mlx5_set_link_down(struct rte_eth_dev *dev) 1161 { 1162 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1163 } 1164 1165 /** 1166 * DPDK callback to bring the link UP. 1167 * 1168 * @param dev 1169 * Pointer to Ethernet device structure. 1170 * 1171 * @return 1172 * 0 on success, a negative errno value otherwise and rte_errno is set. 1173 */ 1174 int 1175 mlx5_set_link_up(struct rte_eth_dev *dev) 1176 { 1177 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1178 } 1179 1180 /** 1181 * Configure the TX function to use. 1182 * 1183 * @param dev 1184 * Pointer to private data structure. 1185 * 1186 * @return 1187 * Pointer to selected Tx burst function. 1188 */ 1189 eth_tx_burst_t 1190 mlx5_select_tx_function(struct rte_eth_dev *dev) 1191 { 1192 struct priv *priv = dev->data->dev_private; 1193 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1194 struct mlx5_dev_config *config = &priv->config; 1195 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1196 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1197 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1198 DEV_TX_OFFLOAD_GRE_TNL_TSO | 1199 DEV_TX_OFFLOAD_IP_TNL_TSO | 1200 DEV_TX_OFFLOAD_UDP_TNL_TSO)); 1201 int swp = !!(tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 1202 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1203 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)); 1204 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1205 1206 assert(priv != NULL); 1207 /* Select appropriate TX function. */ 1208 if (vlan_insert || tso || swp) 1209 return tx_pkt_burst; 1210 if (config->mps == MLX5_MPW_ENHANCED) { 1211 if (mlx5_check_vec_tx_support(dev) > 0) { 1212 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1213 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1214 else 1215 tx_pkt_burst = mlx5_tx_burst_vec; 1216 DRV_LOG(DEBUG, 1217 "port %u selected enhanced MPW Tx vectorized" 1218 " function", 1219 dev->data->port_id); 1220 } else { 1221 tx_pkt_burst = mlx5_tx_burst_empw; 1222 DRV_LOG(DEBUG, 1223 "port %u selected enhanced MPW Tx function", 1224 dev->data->port_id); 1225 } 1226 } else if (config->mps && (config->txq_inline > 0)) { 1227 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1228 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1229 dev->data->port_id); 1230 } else if (config->mps) { 1231 tx_pkt_burst = mlx5_tx_burst_mpw; 1232 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1233 dev->data->port_id); 1234 } 1235 return tx_pkt_burst; 1236 } 1237 1238 /** 1239 * Configure the RX function to use. 1240 * 1241 * @param dev 1242 * Pointer to private data structure. 1243 * 1244 * @return 1245 * Pointer to selected Rx burst function. 1246 */ 1247 eth_rx_burst_t 1248 mlx5_select_rx_function(struct rte_eth_dev *dev) 1249 { 1250 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1251 1252 assert(dev != NULL); 1253 if (mlx5_check_vec_rx_support(dev) > 0) { 1254 rx_pkt_burst = mlx5_rx_burst_vec; 1255 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1256 dev->data->port_id); 1257 } else if (mlx5_mprq_enabled(dev)) { 1258 rx_pkt_burst = mlx5_rx_burst_mprq; 1259 } 1260 return rx_pkt_burst; 1261 } 1262 1263 /** 1264 * Check if mlx5 device was removed. 1265 * 1266 * @param dev 1267 * Pointer to Ethernet device structure. 1268 * 1269 * @return 1270 * 1 when device is removed, otherwise 0. 1271 */ 1272 int 1273 mlx5_is_removed(struct rte_eth_dev *dev) 1274 { 1275 struct ibv_device_attr device_attr; 1276 struct priv *priv = dev->data->dev_private; 1277 1278 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1279 return 1; 1280 return 0; 1281 } 1282 1283 /** 1284 * Get port ID list of mlx5 instances sharing a common device. 1285 * 1286 * @param[in] dev 1287 * Device to look for. 1288 * @param[out] port_list 1289 * Result buffer for collected port IDs. 1290 * @param port_list_n 1291 * Maximum number of entries in result buffer. If 0, @p port_list can be 1292 * NULL. 1293 * 1294 * @return 1295 * Number of matching instances regardless of the @p port_list_n 1296 * parameter, 0 if none were found. 1297 */ 1298 unsigned int 1299 mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list, 1300 unsigned int port_list_n) 1301 { 1302 uint16_t id; 1303 unsigned int n = 0; 1304 1305 RTE_ETH_FOREACH_DEV(id) { 1306 struct rte_eth_dev *ldev = &rte_eth_devices[id]; 1307 1308 if (!ldev->device || 1309 !ldev->device->driver || 1310 strcmp(ldev->device->driver->name, MLX5_DRIVER_NAME) || 1311 ldev->device != dev) 1312 continue; 1313 if (n < port_list_n) 1314 port_list[n] = id; 1315 n++; 1316 } 1317 return n; 1318 } 1319 1320 /** 1321 * Get switch information associated with network interface. 1322 * 1323 * @param ifindex 1324 * Network interface index. 1325 * @param[out] info 1326 * Switch information object, populated in case of success. 1327 * 1328 * @return 1329 * 0 on success, a negative errno value otherwise and rte_errno is set. 1330 */ 1331 int 1332 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1333 { 1334 char ifname[IF_NAMESIZE]; 1335 FILE *file; 1336 struct mlx5_switch_info data = { .master = 0, }; 1337 bool port_name_set = false; 1338 bool port_switch_id_set = false; 1339 char c; 1340 1341 if (!if_indextoname(ifindex, ifname)) { 1342 rte_errno = errno; 1343 return -rte_errno; 1344 } 1345 1346 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1347 ifname); 1348 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1349 ifname); 1350 1351 file = fopen(phys_port_name, "rb"); 1352 if (file != NULL) { 1353 port_name_set = 1354 fscanf(file, "%d%c", &data.port_name, &c) == 2 && 1355 c == '\n'; 1356 fclose(file); 1357 } 1358 file = fopen(phys_switch_id, "rb"); 1359 if (file == NULL) { 1360 rte_errno = errno; 1361 return -rte_errno; 1362 } 1363 port_switch_id_set = 1364 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1365 c == '\n'; 1366 fclose(file); 1367 data.master = port_switch_id_set && !port_name_set; 1368 data.representor = port_switch_id_set && port_name_set; 1369 *info = data; 1370 return 0; 1371 } 1372