1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 #include <time.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_malloc.h> 36 #include <rte_string_fns.h> 37 #include <rte_rwlock.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_rxtx.h" 42 #include "mlx5_utils.h" 43 44 /* Supported speed values found in /usr/include/linux/ethtool.h */ 45 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 46 #define SUPPORTED_40000baseKR4_Full (1 << 23) 47 #endif 48 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 49 #define SUPPORTED_40000baseCR4_Full (1 << 24) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 52 #define SUPPORTED_40000baseSR4_Full (1 << 25) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 55 #define SUPPORTED_40000baseLR4_Full (1 << 26) 56 #endif 57 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 58 #define SUPPORTED_56000baseKR4_Full (1 << 27) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 61 #define SUPPORTED_56000baseCR4_Full (1 << 28) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 64 #define SUPPORTED_56000baseSR4_Full (1 << 29) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 67 #define SUPPORTED_56000baseLR4_Full (1 << 30) 68 #endif 69 70 /* Add defines in case the running kernel is not the same as user headers. */ 71 #ifndef ETHTOOL_GLINKSETTINGS 72 struct ethtool_link_settings { 73 uint32_t cmd; 74 uint32_t speed; 75 uint8_t duplex; 76 uint8_t port; 77 uint8_t phy_address; 78 uint8_t autoneg; 79 uint8_t mdio_support; 80 uint8_t eth_to_mdix; 81 uint8_t eth_tp_mdix_ctrl; 82 int8_t link_mode_masks_nwords; 83 uint32_t reserved[8]; 84 uint32_t link_mode_masks[]; 85 }; 86 87 #define ETHTOOL_GLINKSETTINGS 0x0000004c 88 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 89 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 90 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 91 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 92 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 93 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 94 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 95 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 96 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 97 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 98 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 99 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 100 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 101 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 102 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 103 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 106 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 107 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 108 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 109 #endif 110 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 111 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 112 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 115 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 116 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 117 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 118 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 119 #endif 120 121 /** 122 * Get master interface name from private structure. 123 * 124 * @param[in] dev 125 * Pointer to Ethernet device. 126 * @param[out] ifname 127 * Interface name output buffer. 128 * 129 * @return 130 * 0 on success, a negative errno value otherwise and rte_errno is set. 131 */ 132 int 133 mlx5_get_master_ifname(const struct rte_eth_dev *dev, 134 char (*ifname)[IF_NAMESIZE]) 135 { 136 struct priv *priv = dev->data->dev_private; 137 DIR *dir; 138 struct dirent *dent; 139 unsigned int dev_type = 0; 140 unsigned int dev_port_prev = ~0u; 141 char match[IF_NAMESIZE] = ""; 142 143 { 144 MKSTR(path, "%s/device/net", priv->ibdev_path); 145 146 dir = opendir(path); 147 if (dir == NULL) { 148 rte_errno = errno; 149 return -rte_errno; 150 } 151 } 152 while ((dent = readdir(dir)) != NULL) { 153 char *name = dent->d_name; 154 FILE *file; 155 unsigned int dev_port; 156 int r; 157 158 if ((name[0] == '.') && 159 ((name[1] == '\0') || 160 ((name[1] == '.') && (name[2] == '\0')))) 161 continue; 162 163 MKSTR(path, "%s/device/net/%s/%s", 164 priv->ibdev_path, name, 165 (dev_type ? "dev_id" : "dev_port")); 166 167 file = fopen(path, "rb"); 168 if (file == NULL) { 169 if (errno != ENOENT) 170 continue; 171 /* 172 * Switch to dev_id when dev_port does not exist as 173 * is the case with Linux kernel versions < 3.15. 174 */ 175 try_dev_id: 176 match[0] = '\0'; 177 if (dev_type) 178 break; 179 dev_type = 1; 180 dev_port_prev = ~0u; 181 rewinddir(dir); 182 continue; 183 } 184 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 185 fclose(file); 186 if (r != 1) 187 continue; 188 /* 189 * Switch to dev_id when dev_port returns the same value for 190 * all ports. May happen when using a MOFED release older than 191 * 3.0 with a Linux kernel >= 3.15. 192 */ 193 if (dev_port == dev_port_prev) 194 goto try_dev_id; 195 dev_port_prev = dev_port; 196 if (dev_port == 0) 197 strlcpy(match, name, sizeof(match)); 198 } 199 closedir(dir); 200 if (match[0] == '\0') { 201 rte_errno = ENOENT; 202 return -rte_errno; 203 } 204 strncpy(*ifname, match, sizeof(*ifname)); 205 return 0; 206 } 207 208 /** 209 * Get interface name from private structure. 210 * 211 * This is a port representor-aware version of mlx5_get_master_ifname(). 212 * 213 * @param[in] dev 214 * Pointer to Ethernet device. 215 * @param[out] ifname 216 * Interface name output buffer. 217 * 218 * @return 219 * 0 on success, a negative errno value otherwise and rte_errno is set. 220 */ 221 int 222 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 223 { 224 struct priv *priv = dev->data->dev_private; 225 unsigned int ifindex = 226 priv->nl_socket_rdma >= 0 ? 227 mlx5_nl_ifindex(priv->nl_socket_rdma, priv->ibdev_name) : 0; 228 229 if (!ifindex) { 230 if (!priv->representor) 231 return mlx5_get_master_ifname(dev, ifname); 232 rte_errno = ENXIO; 233 return -rte_errno; 234 } 235 if (if_indextoname(ifindex, &(*ifname)[0])) 236 return 0; 237 rte_errno = errno; 238 return -rte_errno; 239 } 240 241 /** 242 * Get the interface index from device name. 243 * 244 * @param[in] dev 245 * Pointer to Ethernet device. 246 * 247 * @return 248 * Interface index on success, a negative errno value otherwise and 249 * rte_errno is set. 250 */ 251 int 252 mlx5_ifindex(const struct rte_eth_dev *dev) 253 { 254 char ifname[IF_NAMESIZE]; 255 unsigned int ret; 256 257 ret = mlx5_get_ifname(dev, &ifname); 258 if (ret) 259 return ret; 260 ret = if_nametoindex(ifname); 261 if (ret == 0) { 262 rte_errno = errno; 263 return -rte_errno; 264 } 265 return ret; 266 } 267 268 /** 269 * Perform ifreq ioctl() on associated Ethernet device. 270 * 271 * @param[in] dev 272 * Pointer to Ethernet device. 273 * @param req 274 * Request number to pass to ioctl(). 275 * @param[out] ifr 276 * Interface request structure output buffer. 277 * @param master 278 * When device is a port representor, perform request on master device 279 * instead. 280 * 281 * @return 282 * 0 on success, a negative errno value otherwise and rte_errno is set. 283 */ 284 int 285 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr, 286 int master) 287 { 288 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 289 int ret = 0; 290 291 if (sock == -1) { 292 rte_errno = errno; 293 return -rte_errno; 294 } 295 if (master) 296 ret = mlx5_get_master_ifname(dev, &ifr->ifr_name); 297 else 298 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 299 if (ret) 300 goto error; 301 ret = ioctl(sock, req, ifr); 302 if (ret == -1) { 303 rte_errno = errno; 304 goto error; 305 } 306 close(sock); 307 return 0; 308 error: 309 close(sock); 310 return -rte_errno; 311 } 312 313 /** 314 * Get device MTU. 315 * 316 * @param dev 317 * Pointer to Ethernet device. 318 * @param[out] mtu 319 * MTU value output buffer. 320 * 321 * @return 322 * 0 on success, a negative errno value otherwise and rte_errno is set. 323 */ 324 int 325 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 326 { 327 struct ifreq request; 328 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request, 0); 329 330 if (ret) 331 return ret; 332 *mtu = request.ifr_mtu; 333 return 0; 334 } 335 336 /** 337 * Set device MTU. 338 * 339 * @param dev 340 * Pointer to Ethernet device. 341 * @param mtu 342 * MTU value to set. 343 * 344 * @return 345 * 0 on success, a negative errno value otherwise and rte_errno is set. 346 */ 347 static int 348 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 349 { 350 struct ifreq request = { .ifr_mtu = mtu, }; 351 352 return mlx5_ifreq(dev, SIOCSIFMTU, &request, 0); 353 } 354 355 /** 356 * Set device flags. 357 * 358 * @param dev 359 * Pointer to Ethernet device. 360 * @param keep 361 * Bitmask for flags that must remain untouched. 362 * @param flags 363 * Bitmask for flags to modify. 364 * 365 * @return 366 * 0 on success, a negative errno value otherwise and rte_errno is set. 367 */ 368 int 369 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 370 { 371 struct ifreq request; 372 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request, 0); 373 374 if (ret) 375 return ret; 376 request.ifr_flags &= keep; 377 request.ifr_flags |= flags & ~keep; 378 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request, 0); 379 } 380 381 /** 382 * DPDK callback for Ethernet device configuration. 383 * 384 * @param dev 385 * Pointer to Ethernet device structure. 386 * 387 * @return 388 * 0 on success, a negative errno value otherwise and rte_errno is set. 389 */ 390 int 391 mlx5_dev_configure(struct rte_eth_dev *dev) 392 { 393 struct priv *priv = dev->data->dev_private; 394 unsigned int rxqs_n = dev->data->nb_rx_queues; 395 unsigned int txqs_n = dev->data->nb_tx_queues; 396 unsigned int i; 397 unsigned int j; 398 unsigned int reta_idx_n; 399 const uint8_t use_app_rss_key = 400 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 401 int ret = 0; 402 403 if (use_app_rss_key && 404 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 405 MLX5_RSS_HASH_KEY_LEN)) { 406 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 407 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 408 rte_errno = EINVAL; 409 return -rte_errno; 410 } 411 priv->rss_conf.rss_key = 412 rte_realloc(priv->rss_conf.rss_key, 413 MLX5_RSS_HASH_KEY_LEN, 0); 414 if (!priv->rss_conf.rss_key) { 415 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 416 dev->data->port_id, rxqs_n); 417 rte_errno = ENOMEM; 418 return -rte_errno; 419 } 420 memcpy(priv->rss_conf.rss_key, 421 use_app_rss_key ? 422 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 423 rss_hash_default_key, 424 MLX5_RSS_HASH_KEY_LEN); 425 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 426 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 427 priv->rxqs = (void *)dev->data->rx_queues; 428 priv->txqs = (void *)dev->data->tx_queues; 429 if (txqs_n != priv->txqs_n) { 430 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 431 dev->data->port_id, priv->txqs_n, txqs_n); 432 priv->txqs_n = txqs_n; 433 } 434 if (rxqs_n > priv->config.ind_table_max_size) { 435 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 436 dev->data->port_id, rxqs_n); 437 rte_errno = EINVAL; 438 return -rte_errno; 439 } 440 if (rxqs_n == priv->rxqs_n) 441 return 0; 442 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 443 dev->data->port_id, priv->rxqs_n, rxqs_n); 444 priv->rxqs_n = rxqs_n; 445 /* If the requested number of RX queues is not a power of two, use the 446 * maximum indirection table size for better balancing. 447 * The result is always rounded to the next power of two. */ 448 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 449 priv->config.ind_table_max_size : 450 rxqs_n)); 451 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 452 if (ret) 453 return ret; 454 /* When the number of RX queues is not a power of two, the remaining 455 * table entries are padded with reused WQs and hashes are not spread 456 * uniformly. */ 457 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 458 (*priv->reta_idx)[i] = j; 459 if (++j == rxqs_n) 460 j = 0; 461 } 462 return 0; 463 } 464 465 /** 466 * Sets default tuning parameters. 467 * 468 * @param dev 469 * Pointer to Ethernet device. 470 * @param[out] info 471 * Info structure output buffer. 472 */ 473 static void 474 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 475 { 476 struct priv *priv = dev->data->dev_private; 477 478 /* Minimum CPU utilization. */ 479 info->default_rxportconf.ring_size = 256; 480 info->default_txportconf.ring_size = 256; 481 info->default_rxportconf.burst_size = 64; 482 info->default_txportconf.burst_size = 64; 483 if (priv->link_speed_capa & ETH_LINK_SPEED_100G) { 484 info->default_rxportconf.nb_queues = 16; 485 info->default_txportconf.nb_queues = 16; 486 if (dev->data->nb_rx_queues > 2 || 487 dev->data->nb_tx_queues > 2) { 488 /* Max Throughput. */ 489 info->default_rxportconf.ring_size = 2048; 490 info->default_txportconf.ring_size = 2048; 491 } 492 } else { 493 info->default_rxportconf.nb_queues = 8; 494 info->default_txportconf.nb_queues = 8; 495 if (dev->data->nb_rx_queues > 2 || 496 dev->data->nb_tx_queues > 2) { 497 /* Max Throughput. */ 498 info->default_rxportconf.ring_size = 4096; 499 info->default_txportconf.ring_size = 4096; 500 } 501 } 502 } 503 504 /** 505 * DPDK callback to get information about the device. 506 * 507 * @param dev 508 * Pointer to Ethernet device structure. 509 * @param[out] info 510 * Info structure output buffer. 511 */ 512 void 513 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 514 { 515 struct priv *priv = dev->data->dev_private; 516 struct mlx5_dev_config *config = &priv->config; 517 unsigned int max; 518 char ifname[IF_NAMESIZE]; 519 520 /* FIXME: we should ask the device for these values. */ 521 info->min_rx_bufsize = 32; 522 info->max_rx_pktlen = 65536; 523 /* 524 * Since we need one CQ per QP, the limit is the minimum number 525 * between the two values. 526 */ 527 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 528 priv->device_attr.orig_attr.max_qp); 529 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 530 if (max >= 65535) 531 max = 65535; 532 info->max_rx_queues = max; 533 info->max_tx_queues = max; 534 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 535 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 536 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 537 info->rx_queue_offload_capa); 538 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 539 if (mlx5_get_ifname(dev, &ifname) == 0) 540 info->if_index = if_nametoindex(ifname); 541 info->reta_size = priv->reta_idx_n ? 542 priv->reta_idx_n : config->ind_table_max_size; 543 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 544 info->speed_capa = priv->link_speed_capa; 545 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 546 mlx5_set_default_params(dev, info); 547 info->switch_info.name = dev->data->name; 548 info->switch_info.domain_id = priv->domain_id; 549 info->switch_info.port_id = priv->representor_id; 550 if (priv->representor) { 551 unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0); 552 uint16_t port_id[i]; 553 554 i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i), i); 555 while (i--) { 556 struct priv *opriv = 557 rte_eth_devices[port_id[i]].data->dev_private; 558 559 if (!opriv || 560 opriv->representor || 561 opriv->domain_id != priv->domain_id) 562 continue; 563 /* 564 * Override switch name with that of the master 565 * device. 566 */ 567 info->switch_info.name = opriv->dev_data->name; 568 break; 569 } 570 } 571 } 572 573 /** 574 * Get supported packet types. 575 * 576 * @param dev 577 * Pointer to Ethernet device structure. 578 * 579 * @return 580 * A pointer to the supported Packet types array. 581 */ 582 const uint32_t * 583 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 584 { 585 static const uint32_t ptypes[] = { 586 /* refers to rxq_cq_to_pkt_type() */ 587 RTE_PTYPE_L2_ETHER, 588 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 589 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 590 RTE_PTYPE_L4_NONFRAG, 591 RTE_PTYPE_L4_FRAG, 592 RTE_PTYPE_L4_TCP, 593 RTE_PTYPE_L4_UDP, 594 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 595 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 596 RTE_PTYPE_INNER_L4_NONFRAG, 597 RTE_PTYPE_INNER_L4_FRAG, 598 RTE_PTYPE_INNER_L4_TCP, 599 RTE_PTYPE_INNER_L4_UDP, 600 RTE_PTYPE_UNKNOWN 601 }; 602 603 if (dev->rx_pkt_burst == mlx5_rx_burst || 604 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 605 dev->rx_pkt_burst == mlx5_rx_burst_vec) 606 return ptypes; 607 return NULL; 608 } 609 610 /** 611 * DPDK callback to retrieve physical link information. 612 * 613 * @param dev 614 * Pointer to Ethernet device structure. 615 * @param[out] link 616 * Storage for current link status. 617 * 618 * @return 619 * 0 on success, a negative errno value otherwise and rte_errno is set. 620 */ 621 static int 622 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 623 struct rte_eth_link *link) 624 { 625 struct priv *priv = dev->data->dev_private; 626 struct ethtool_cmd edata = { 627 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 628 }; 629 struct ifreq ifr; 630 struct rte_eth_link dev_link; 631 int link_speed = 0; 632 int ret; 633 634 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 1); 635 if (ret) { 636 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 637 dev->data->port_id, strerror(rte_errno)); 638 return ret; 639 } 640 memset(&dev_link, 0, sizeof(dev_link)); 641 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 642 (ifr.ifr_flags & IFF_RUNNING)); 643 ifr.ifr_data = (void *)&edata; 644 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1); 645 if (ret) { 646 DRV_LOG(WARNING, 647 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 648 dev->data->port_id, strerror(rte_errno)); 649 return ret; 650 } 651 link_speed = ethtool_cmd_speed(&edata); 652 if (link_speed == -1) 653 dev_link.link_speed = ETH_SPEED_NUM_NONE; 654 else 655 dev_link.link_speed = link_speed; 656 priv->link_speed_capa = 0; 657 if (edata.supported & SUPPORTED_Autoneg) 658 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 659 if (edata.supported & (SUPPORTED_1000baseT_Full | 660 SUPPORTED_1000baseKX_Full)) 661 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 662 if (edata.supported & SUPPORTED_10000baseKR_Full) 663 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 664 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 665 SUPPORTED_40000baseCR4_Full | 666 SUPPORTED_40000baseSR4_Full | 667 SUPPORTED_40000baseLR4_Full)) 668 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 669 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 670 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 671 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 672 ETH_LINK_SPEED_FIXED); 673 if ((dev_link.link_speed && !dev_link.link_status) || 674 (!dev_link.link_speed && dev_link.link_status)) { 675 rte_errno = EAGAIN; 676 return -rte_errno; 677 } 678 *link = dev_link; 679 return 0; 680 } 681 682 /** 683 * Retrieve physical link information (unlocked version using new ioctl). 684 * 685 * @param dev 686 * Pointer to Ethernet device structure. 687 * @param[out] link 688 * Storage for current link status. 689 * 690 * @return 691 * 0 on success, a negative errno value otherwise and rte_errno is set. 692 */ 693 static int 694 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 695 struct rte_eth_link *link) 696 697 { 698 struct priv *priv = dev->data->dev_private; 699 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 700 struct ifreq ifr; 701 struct rte_eth_link dev_link; 702 uint64_t sc; 703 int ret; 704 705 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 1); 706 if (ret) { 707 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 708 dev->data->port_id, strerror(rte_errno)); 709 return ret; 710 } 711 memset(&dev_link, 0, sizeof(dev_link)); 712 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 713 (ifr.ifr_flags & IFF_RUNNING)); 714 ifr.ifr_data = (void *)&gcmd; 715 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1); 716 if (ret) { 717 DRV_LOG(DEBUG, 718 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 719 " failed: %s", 720 dev->data->port_id, strerror(rte_errno)); 721 return ret; 722 } 723 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 724 725 alignas(struct ethtool_link_settings) 726 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 727 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 728 struct ethtool_link_settings *ecmd = (void *)data; 729 730 *ecmd = gcmd; 731 ifr.ifr_data = (void *)ecmd; 732 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1); 733 if (ret) { 734 DRV_LOG(DEBUG, 735 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 736 " failed: %s", 737 dev->data->port_id, strerror(rte_errno)); 738 return ret; 739 } 740 dev_link.link_speed = ecmd->speed; 741 sc = ecmd->link_mode_masks[0] | 742 ((uint64_t)ecmd->link_mode_masks[1] << 32); 743 priv->link_speed_capa = 0; 744 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 745 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 746 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 747 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 748 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 749 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 750 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 751 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 752 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 753 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 754 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 755 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 756 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 757 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 758 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 759 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 760 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 761 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 762 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 763 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 764 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 765 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 766 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 767 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 768 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 769 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 770 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 771 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 772 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 773 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 774 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 775 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 776 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 777 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 778 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 779 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 780 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 781 ETH_LINK_SPEED_FIXED); 782 if ((dev_link.link_speed && !dev_link.link_status) || 783 (!dev_link.link_speed && dev_link.link_status)) { 784 rte_errno = EAGAIN; 785 return -rte_errno; 786 } 787 *link = dev_link; 788 return 0; 789 } 790 791 /** 792 * DPDK callback to retrieve physical link information. 793 * 794 * @param dev 795 * Pointer to Ethernet device structure. 796 * @param wait_to_complete 797 * Wait for request completion. 798 * 799 * @return 800 * 0 if link status was not updated, positive if it was, a negative errno 801 * value otherwise and rte_errno is set. 802 */ 803 int 804 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 805 { 806 int ret; 807 struct rte_eth_link dev_link; 808 time_t start_time = time(NULL); 809 810 do { 811 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 812 if (ret) 813 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 814 if (ret == 0) 815 break; 816 /* Handle wait to complete situation. */ 817 if (wait_to_complete && ret == -EAGAIN) { 818 if (abs((int)difftime(time(NULL), start_time)) < 819 MLX5_LINK_STATUS_TIMEOUT) { 820 usleep(0); 821 continue; 822 } else { 823 rte_errno = EBUSY; 824 return -rte_errno; 825 } 826 } else if (ret < 0) { 827 return ret; 828 } 829 } while (wait_to_complete); 830 ret = !!memcmp(&dev->data->dev_link, &dev_link, 831 sizeof(struct rte_eth_link)); 832 dev->data->dev_link = dev_link; 833 return ret; 834 } 835 836 /** 837 * DPDK callback to change the MTU. 838 * 839 * @param dev 840 * Pointer to Ethernet device structure. 841 * @param in_mtu 842 * New MTU. 843 * 844 * @return 845 * 0 on success, a negative errno value otherwise and rte_errno is set. 846 */ 847 int 848 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 849 { 850 struct priv *priv = dev->data->dev_private; 851 uint16_t kern_mtu = 0; 852 int ret; 853 854 ret = mlx5_get_mtu(dev, &kern_mtu); 855 if (ret) 856 return ret; 857 /* Set kernel interface MTU first. */ 858 ret = mlx5_set_mtu(dev, mtu); 859 if (ret) 860 return ret; 861 ret = mlx5_get_mtu(dev, &kern_mtu); 862 if (ret) 863 return ret; 864 if (kern_mtu == mtu) { 865 priv->mtu = mtu; 866 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 867 dev->data->port_id, mtu); 868 return 0; 869 } 870 rte_errno = EAGAIN; 871 return -rte_errno; 872 } 873 874 /** 875 * DPDK callback to get flow control status. 876 * 877 * @param dev 878 * Pointer to Ethernet device structure. 879 * @param[out] fc_conf 880 * Flow control output buffer. 881 * 882 * @return 883 * 0 on success, a negative errno value otherwise and rte_errno is set. 884 */ 885 int 886 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 887 { 888 struct ifreq ifr; 889 struct ethtool_pauseparam ethpause = { 890 .cmd = ETHTOOL_GPAUSEPARAM 891 }; 892 int ret; 893 894 ifr.ifr_data = (void *)ðpause; 895 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1); 896 if (ret) { 897 DRV_LOG(WARNING, 898 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 899 " %s", 900 dev->data->port_id, strerror(rte_errno)); 901 return ret; 902 } 903 fc_conf->autoneg = ethpause.autoneg; 904 if (ethpause.rx_pause && ethpause.tx_pause) 905 fc_conf->mode = RTE_FC_FULL; 906 else if (ethpause.rx_pause) 907 fc_conf->mode = RTE_FC_RX_PAUSE; 908 else if (ethpause.tx_pause) 909 fc_conf->mode = RTE_FC_TX_PAUSE; 910 else 911 fc_conf->mode = RTE_FC_NONE; 912 return 0; 913 } 914 915 /** 916 * DPDK callback to modify flow control parameters. 917 * 918 * @param dev 919 * Pointer to Ethernet device structure. 920 * @param[in] fc_conf 921 * Flow control parameters. 922 * 923 * @return 924 * 0 on success, a negative errno value otherwise and rte_errno is set. 925 */ 926 int 927 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 928 { 929 struct ifreq ifr; 930 struct ethtool_pauseparam ethpause = { 931 .cmd = ETHTOOL_SPAUSEPARAM 932 }; 933 int ret; 934 935 ifr.ifr_data = (void *)ðpause; 936 ethpause.autoneg = fc_conf->autoneg; 937 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 938 (fc_conf->mode & RTE_FC_RX_PAUSE)) 939 ethpause.rx_pause = 1; 940 else 941 ethpause.rx_pause = 0; 942 943 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 944 (fc_conf->mode & RTE_FC_TX_PAUSE)) 945 ethpause.tx_pause = 1; 946 else 947 ethpause.tx_pause = 0; 948 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 0); 949 if (ret) { 950 DRV_LOG(WARNING, 951 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 952 " failed: %s", 953 dev->data->port_id, strerror(rte_errno)); 954 return ret; 955 } 956 return 0; 957 } 958 959 /** 960 * Get PCI information from struct ibv_device. 961 * 962 * @param device 963 * Pointer to Ethernet device structure. 964 * @param[out] pci_addr 965 * PCI bus address output buffer. 966 * 967 * @return 968 * 0 on success, a negative errno value otherwise and rte_errno is set. 969 */ 970 int 971 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 972 struct rte_pci_addr *pci_addr) 973 { 974 FILE *file; 975 char line[32]; 976 MKSTR(path, "%s/device/uevent", device->ibdev_path); 977 978 file = fopen(path, "rb"); 979 if (file == NULL) { 980 rte_errno = errno; 981 return -rte_errno; 982 } 983 while (fgets(line, sizeof(line), file) == line) { 984 size_t len = strlen(line); 985 int ret; 986 987 /* Truncate long lines. */ 988 if (len == (sizeof(line) - 1)) 989 while (line[(len - 1)] != '\n') { 990 ret = fgetc(file); 991 if (ret == EOF) 992 break; 993 line[(len - 1)] = ret; 994 } 995 /* Extract information. */ 996 if (sscanf(line, 997 "PCI_SLOT_NAME=" 998 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 999 &pci_addr->domain, 1000 &pci_addr->bus, 1001 &pci_addr->devid, 1002 &pci_addr->function) == 4) { 1003 ret = 0; 1004 break; 1005 } 1006 } 1007 fclose(file); 1008 return 0; 1009 } 1010 1011 /** 1012 * Device status handler. 1013 * 1014 * @param dev 1015 * Pointer to Ethernet device. 1016 * @param events 1017 * Pointer to event flags holder. 1018 * 1019 * @return 1020 * Events bitmap of callback process which can be called immediately. 1021 */ 1022 static uint32_t 1023 mlx5_dev_status_handler(struct rte_eth_dev *dev) 1024 { 1025 struct priv *priv = dev->data->dev_private; 1026 struct ibv_async_event event; 1027 uint32_t ret = 0; 1028 1029 if (mlx5_link_update(dev, 0) == -EAGAIN) { 1030 usleep(0); 1031 return 0; 1032 } 1033 /* Read all message and acknowledge them. */ 1034 for (;;) { 1035 if (mlx5_glue->get_async_event(priv->ctx, &event)) 1036 break; 1037 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1038 event.event_type == IBV_EVENT_PORT_ERR) && 1039 (dev->data->dev_conf.intr_conf.lsc == 1)) 1040 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 1041 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 1042 dev->data->dev_conf.intr_conf.rmv == 1) 1043 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 1044 else 1045 DRV_LOG(DEBUG, 1046 "port %u event type %d on not handled", 1047 dev->data->port_id, event.event_type); 1048 mlx5_glue->ack_async_event(&event); 1049 } 1050 return ret; 1051 } 1052 1053 /** 1054 * Handle interrupts from the NIC. 1055 * 1056 * @param[in] intr_handle 1057 * Interrupt handler. 1058 * @param cb_arg 1059 * Callback argument. 1060 */ 1061 void 1062 mlx5_dev_interrupt_handler(void *cb_arg) 1063 { 1064 struct rte_eth_dev *dev = cb_arg; 1065 uint32_t events; 1066 1067 events = mlx5_dev_status_handler(dev); 1068 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 1069 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1070 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 1071 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1072 } 1073 1074 /** 1075 * Handle interrupts from the socket. 1076 * 1077 * @param cb_arg 1078 * Callback argument. 1079 */ 1080 static void 1081 mlx5_dev_handler_socket(void *cb_arg) 1082 { 1083 struct rte_eth_dev *dev = cb_arg; 1084 1085 mlx5_socket_handle(dev); 1086 } 1087 1088 /** 1089 * Uninstall interrupt handler. 1090 * 1091 * @param dev 1092 * Pointer to Ethernet device. 1093 */ 1094 void 1095 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1096 { 1097 struct priv *priv = dev->data->dev_private; 1098 1099 if (dev->data->dev_conf.intr_conf.lsc || 1100 dev->data->dev_conf.intr_conf.rmv) 1101 rte_intr_callback_unregister(&priv->intr_handle, 1102 mlx5_dev_interrupt_handler, dev); 1103 if (priv->primary_socket) 1104 rte_intr_callback_unregister(&priv->intr_handle_socket, 1105 mlx5_dev_handler_socket, dev); 1106 priv->intr_handle.fd = 0; 1107 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1108 priv->intr_handle_socket.fd = 0; 1109 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1110 } 1111 1112 /** 1113 * Install interrupt handler. 1114 * 1115 * @param dev 1116 * Pointer to Ethernet device. 1117 */ 1118 void 1119 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1120 { 1121 struct priv *priv = dev->data->dev_private; 1122 int ret; 1123 int flags; 1124 1125 assert(priv->ctx->async_fd > 0); 1126 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1127 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1128 if (ret) { 1129 DRV_LOG(INFO, 1130 "port %u failed to change file descriptor async event" 1131 " queue", 1132 dev->data->port_id); 1133 dev->data->dev_conf.intr_conf.lsc = 0; 1134 dev->data->dev_conf.intr_conf.rmv = 0; 1135 } 1136 if (dev->data->dev_conf.intr_conf.lsc || 1137 dev->data->dev_conf.intr_conf.rmv) { 1138 priv->intr_handle.fd = priv->ctx->async_fd; 1139 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1140 rte_intr_callback_register(&priv->intr_handle, 1141 mlx5_dev_interrupt_handler, dev); 1142 } 1143 ret = mlx5_socket_init(dev); 1144 if (ret) 1145 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1146 dev->data->port_id, strerror(rte_errno)); 1147 else if (priv->primary_socket) { 1148 priv->intr_handle_socket.fd = priv->primary_socket; 1149 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1150 rte_intr_callback_register(&priv->intr_handle_socket, 1151 mlx5_dev_handler_socket, dev); 1152 } 1153 } 1154 1155 /** 1156 * DPDK callback to bring the link DOWN. 1157 * 1158 * @param dev 1159 * Pointer to Ethernet device structure. 1160 * 1161 * @return 1162 * 0 on success, a negative errno value otherwise and rte_errno is set. 1163 */ 1164 int 1165 mlx5_set_link_down(struct rte_eth_dev *dev) 1166 { 1167 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1168 } 1169 1170 /** 1171 * DPDK callback to bring the link UP. 1172 * 1173 * @param dev 1174 * Pointer to Ethernet device structure. 1175 * 1176 * @return 1177 * 0 on success, a negative errno value otherwise and rte_errno is set. 1178 */ 1179 int 1180 mlx5_set_link_up(struct rte_eth_dev *dev) 1181 { 1182 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1183 } 1184 1185 /** 1186 * Configure the TX function to use. 1187 * 1188 * @param dev 1189 * Pointer to private data structure. 1190 * 1191 * @return 1192 * Pointer to selected Tx burst function. 1193 */ 1194 eth_tx_burst_t 1195 mlx5_select_tx_function(struct rte_eth_dev *dev) 1196 { 1197 struct priv *priv = dev->data->dev_private; 1198 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1199 struct mlx5_dev_config *config = &priv->config; 1200 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1201 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1202 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1203 DEV_TX_OFFLOAD_GRE_TNL_TSO | 1204 DEV_TX_OFFLOAD_IP_TNL_TSO | 1205 DEV_TX_OFFLOAD_UDP_TNL_TSO)); 1206 int swp = !!(tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 1207 DEV_TX_OFFLOAD_UDP_TNL_TSO | 1208 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)); 1209 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1210 1211 assert(priv != NULL); 1212 /* Select appropriate TX function. */ 1213 if (vlan_insert || tso || swp) 1214 return tx_pkt_burst; 1215 if (config->mps == MLX5_MPW_ENHANCED) { 1216 if (mlx5_check_vec_tx_support(dev) > 0) { 1217 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1218 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1219 else 1220 tx_pkt_burst = mlx5_tx_burst_vec; 1221 DRV_LOG(DEBUG, 1222 "port %u selected enhanced MPW Tx vectorized" 1223 " function", 1224 dev->data->port_id); 1225 } else { 1226 tx_pkt_burst = mlx5_tx_burst_empw; 1227 DRV_LOG(DEBUG, 1228 "port %u selected enhanced MPW Tx function", 1229 dev->data->port_id); 1230 } 1231 } else if (config->mps && (config->txq_inline > 0)) { 1232 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1233 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1234 dev->data->port_id); 1235 } else if (config->mps) { 1236 tx_pkt_burst = mlx5_tx_burst_mpw; 1237 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1238 dev->data->port_id); 1239 } 1240 return tx_pkt_burst; 1241 } 1242 1243 /** 1244 * Configure the RX function to use. 1245 * 1246 * @param dev 1247 * Pointer to private data structure. 1248 * 1249 * @return 1250 * Pointer to selected Rx burst function. 1251 */ 1252 eth_rx_burst_t 1253 mlx5_select_rx_function(struct rte_eth_dev *dev) 1254 { 1255 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1256 1257 assert(dev != NULL); 1258 if (mlx5_check_vec_rx_support(dev) > 0) { 1259 rx_pkt_burst = mlx5_rx_burst_vec; 1260 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1261 dev->data->port_id); 1262 } else if (mlx5_mprq_enabled(dev)) { 1263 rx_pkt_burst = mlx5_rx_burst_mprq; 1264 } 1265 return rx_pkt_burst; 1266 } 1267 1268 /** 1269 * Check if mlx5 device was removed. 1270 * 1271 * @param dev 1272 * Pointer to Ethernet device structure. 1273 * 1274 * @return 1275 * 1 when device is removed, otherwise 0. 1276 */ 1277 int 1278 mlx5_is_removed(struct rte_eth_dev *dev) 1279 { 1280 struct ibv_device_attr device_attr; 1281 struct priv *priv = dev->data->dev_private; 1282 1283 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1284 return 1; 1285 return 0; 1286 } 1287 1288 /** 1289 * Get port ID list of mlx5 instances sharing a common device. 1290 * 1291 * @param[in] dev 1292 * Device to look for. 1293 * @param[out] port_list 1294 * Result buffer for collected port IDs. 1295 * @param port_list_n 1296 * Maximum number of entries in result buffer. If 0, @p port_list can be 1297 * NULL. 1298 * 1299 * @return 1300 * Number of matching instances regardless of the @p port_list_n 1301 * parameter, 0 if none were found. 1302 */ 1303 unsigned int 1304 mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list, 1305 unsigned int port_list_n) 1306 { 1307 uint16_t id; 1308 unsigned int n = 0; 1309 1310 RTE_ETH_FOREACH_DEV(id) { 1311 struct rte_eth_dev *ldev = &rte_eth_devices[id]; 1312 1313 if (!ldev->device || 1314 !ldev->device->driver || 1315 strcmp(ldev->device->driver->name, MLX5_DRIVER_NAME) || 1316 ldev->device != dev) 1317 continue; 1318 if (n < port_list_n) 1319 port_list[n] = id; 1320 n++; 1321 } 1322 return n; 1323 } 1324 1325 /** 1326 * Get switch information associated with network interface. 1327 * 1328 * @param ifindex 1329 * Network interface index. 1330 * @param[out] info 1331 * Switch information object, populated in case of success. 1332 * 1333 * @return 1334 * 0 on success, a negative errno value otherwise and rte_errno is set. 1335 */ 1336 int 1337 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1338 { 1339 char ifname[IF_NAMESIZE]; 1340 FILE *file; 1341 struct mlx5_switch_info data = { .master = 0, }; 1342 bool port_name_set = false; 1343 bool port_switch_id_set = false; 1344 char c; 1345 1346 if (!if_indextoname(ifindex, ifname)) { 1347 rte_errno = errno; 1348 return -rte_errno; 1349 } 1350 1351 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1352 ifname); 1353 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1354 ifname); 1355 1356 file = fopen(phys_port_name, "rb"); 1357 if (file != NULL) { 1358 port_name_set = 1359 fscanf(file, "%d%c", &data.port_name, &c) == 2 && 1360 c == '\n'; 1361 fclose(file); 1362 } 1363 file = fopen(phys_switch_id, "rb"); 1364 if (file == NULL) { 1365 rte_errno = errno; 1366 return -rte_errno; 1367 } 1368 port_switch_id_set = 1369 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1370 c == '\n'; 1371 fclose(file); 1372 data.master = port_switch_id_set && !port_name_set; 1373 data.representor = port_switch_id_set && port_name_set; 1374 *info = data; 1375 return 0; 1376 } 1377