1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 #include <time.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_malloc.h> 36 #include <rte_string_fns.h> 37 38 #include "mlx5.h" 39 #include "mlx5_glue.h" 40 #include "mlx5_rxtx.h" 41 #include "mlx5_utils.h" 42 43 /* Add defines in case the running kernel is not the same as user headers. */ 44 #ifndef ETHTOOL_GLINKSETTINGS 45 struct ethtool_link_settings { 46 uint32_t cmd; 47 uint32_t speed; 48 uint8_t duplex; 49 uint8_t port; 50 uint8_t phy_address; 51 uint8_t autoneg; 52 uint8_t mdio_support; 53 uint8_t eth_to_mdix; 54 uint8_t eth_tp_mdix_ctrl; 55 int8_t link_mode_masks_nwords; 56 uint32_t reserved[8]; 57 uint32_t link_mode_masks[]; 58 }; 59 60 #define ETHTOOL_GLINKSETTINGS 0x0000004c 61 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 62 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 63 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 64 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 65 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 66 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 67 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 68 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 69 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 70 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 71 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 72 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 73 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 74 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 75 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 76 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 77 #endif 78 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 79 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 80 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 81 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 82 #endif 83 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 84 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 85 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 86 #endif 87 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 88 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 89 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 90 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 91 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 92 #endif 93 94 /** 95 * Get interface name from private structure. 96 * 97 * @param[in] dev 98 * Pointer to Ethernet device. 99 * @param[out] ifname 100 * Interface name output buffer. 101 * 102 * @return 103 * 0 on success, a negative errno value otherwise and rte_errno is set. 104 */ 105 int 106 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 107 { 108 struct priv *priv = dev->data->dev_private; 109 DIR *dir; 110 struct dirent *dent; 111 unsigned int dev_type = 0; 112 unsigned int dev_port_prev = ~0u; 113 char match[IF_NAMESIZE] = ""; 114 115 { 116 MKSTR(path, "%s/device/net", priv->ibdev_path); 117 118 dir = opendir(path); 119 if (dir == NULL) { 120 rte_errno = errno; 121 return -rte_errno; 122 } 123 } 124 while ((dent = readdir(dir)) != NULL) { 125 char *name = dent->d_name; 126 FILE *file; 127 unsigned int dev_port; 128 int r; 129 130 if ((name[0] == '.') && 131 ((name[1] == '\0') || 132 ((name[1] == '.') && (name[2] == '\0')))) 133 continue; 134 135 MKSTR(path, "%s/device/net/%s/%s", 136 priv->ibdev_path, name, 137 (dev_type ? "dev_id" : "dev_port")); 138 139 file = fopen(path, "rb"); 140 if (file == NULL) { 141 if (errno != ENOENT) 142 continue; 143 /* 144 * Switch to dev_id when dev_port does not exist as 145 * is the case with Linux kernel versions < 3.15. 146 */ 147 try_dev_id: 148 match[0] = '\0'; 149 if (dev_type) 150 break; 151 dev_type = 1; 152 dev_port_prev = ~0u; 153 rewinddir(dir); 154 continue; 155 } 156 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 157 fclose(file); 158 if (r != 1) 159 continue; 160 /* 161 * Switch to dev_id when dev_port returns the same value for 162 * all ports. May happen when using a MOFED release older than 163 * 3.0 with a Linux kernel >= 3.15. 164 */ 165 if (dev_port == dev_port_prev) 166 goto try_dev_id; 167 dev_port_prev = dev_port; 168 if (dev_port == (priv->port - 1u)) 169 strlcpy(match, name, sizeof(match)); 170 } 171 closedir(dir); 172 if (match[0] == '\0') { 173 rte_errno = ENOENT; 174 return -rte_errno; 175 } 176 strncpy(*ifname, match, sizeof(*ifname)); 177 return 0; 178 } 179 180 /** 181 * Get the interface index from device name. 182 * 183 * @param[in] dev 184 * Pointer to Ethernet device. 185 * 186 * @return 187 * Interface index on success, a negative errno value otherwise and 188 * rte_errno is set. 189 */ 190 int 191 mlx5_ifindex(const struct rte_eth_dev *dev) 192 { 193 char ifname[IF_NAMESIZE]; 194 int ret; 195 196 ret = mlx5_get_ifname(dev, &ifname); 197 if (ret) 198 return ret; 199 ret = if_nametoindex(ifname); 200 if (ret == -1) { 201 rte_errno = errno; 202 return -rte_errno; 203 } 204 return ret; 205 } 206 207 /** 208 * Perform ifreq ioctl() on associated Ethernet device. 209 * 210 * @param[in] dev 211 * Pointer to Ethernet device. 212 * @param req 213 * Request number to pass to ioctl(). 214 * @param[out] ifr 215 * Interface request structure output buffer. 216 * 217 * @return 218 * 0 on success, a negative errno value otherwise and rte_errno is set. 219 */ 220 int 221 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 222 { 223 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 224 int ret = 0; 225 226 if (sock == -1) { 227 rte_errno = errno; 228 return -rte_errno; 229 } 230 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 231 if (ret) 232 goto error; 233 ret = ioctl(sock, req, ifr); 234 if (ret == -1) { 235 rte_errno = errno; 236 goto error; 237 } 238 close(sock); 239 return 0; 240 error: 241 close(sock); 242 return -rte_errno; 243 } 244 245 /** 246 * Get device MTU. 247 * 248 * @param dev 249 * Pointer to Ethernet device. 250 * @param[out] mtu 251 * MTU value output buffer. 252 * 253 * @return 254 * 0 on success, a negative errno value otherwise and rte_errno is set. 255 */ 256 int 257 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 258 { 259 struct ifreq request; 260 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 261 262 if (ret) 263 return ret; 264 *mtu = request.ifr_mtu; 265 return 0; 266 } 267 268 /** 269 * Set device MTU. 270 * 271 * @param dev 272 * Pointer to Ethernet device. 273 * @param mtu 274 * MTU value to set. 275 * 276 * @return 277 * 0 on success, a negative errno value otherwise and rte_errno is set. 278 */ 279 static int 280 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 281 { 282 struct ifreq request = { .ifr_mtu = mtu, }; 283 284 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 285 } 286 287 /** 288 * Set device flags. 289 * 290 * @param dev 291 * Pointer to Ethernet device. 292 * @param keep 293 * Bitmask for flags that must remain untouched. 294 * @param flags 295 * Bitmask for flags to modify. 296 * 297 * @return 298 * 0 on success, a negative errno value otherwise and rte_errno is set. 299 */ 300 int 301 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 302 { 303 struct ifreq request; 304 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 305 306 if (ret) 307 return ret; 308 request.ifr_flags &= keep; 309 request.ifr_flags |= flags & ~keep; 310 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 311 } 312 313 /** 314 * DPDK callback for Ethernet device configuration. 315 * 316 * @param dev 317 * Pointer to Ethernet device structure. 318 * 319 * @return 320 * 0 on success, a negative errno value otherwise and rte_errno is set. 321 */ 322 int 323 mlx5_dev_configure(struct rte_eth_dev *dev) 324 { 325 struct priv *priv = dev->data->dev_private; 326 unsigned int rxqs_n = dev->data->nb_rx_queues; 327 unsigned int txqs_n = dev->data->nb_tx_queues; 328 unsigned int i; 329 unsigned int j; 330 unsigned int reta_idx_n; 331 const uint8_t use_app_rss_key = 332 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 333 uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev); 334 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 335 uint64_t supp_rx_offloads = 336 (mlx5_get_rx_port_offloads() | 337 mlx5_get_rx_queue_offloads(dev)); 338 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; 339 int ret = 0; 340 341 if ((tx_offloads & supp_tx_offloads) != tx_offloads) { 342 DRV_LOG(ERR, 343 "port %u some Tx offloads are not supported requested" 344 " 0x%" PRIx64 " supported 0x%" PRIx64, 345 dev->data->port_id, tx_offloads, supp_tx_offloads); 346 rte_errno = ENOTSUP; 347 return -rte_errno; 348 } 349 if ((rx_offloads & supp_rx_offloads) != rx_offloads) { 350 DRV_LOG(ERR, 351 "port %u some Rx offloads are not supported requested" 352 " 0x%" PRIx64 " supported 0x%" PRIx64, 353 dev->data->port_id, rx_offloads, supp_rx_offloads); 354 rte_errno = ENOTSUP; 355 return -rte_errno; 356 } 357 if (use_app_rss_key && 358 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 359 rss_hash_default_key_len)) { 360 DRV_LOG(ERR, "port %u RSS key len must be %zu Bytes long", 361 dev->data->port_id, rss_hash_default_key_len); 362 rte_errno = EINVAL; 363 return -rte_errno; 364 } 365 priv->rss_conf.rss_key = 366 rte_realloc(priv->rss_conf.rss_key, 367 rss_hash_default_key_len, 0); 368 if (!priv->rss_conf.rss_key) { 369 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 370 dev->data->port_id, rxqs_n); 371 rte_errno = ENOMEM; 372 return -rte_errno; 373 } 374 memcpy(priv->rss_conf.rss_key, 375 use_app_rss_key ? 376 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 377 rss_hash_default_key, 378 rss_hash_default_key_len); 379 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 380 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 381 priv->rxqs = (void *)dev->data->rx_queues; 382 priv->txqs = (void *)dev->data->tx_queues; 383 if (txqs_n != priv->txqs_n) { 384 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 385 dev->data->port_id, priv->txqs_n, txqs_n); 386 priv->txqs_n = txqs_n; 387 } 388 if (rxqs_n > priv->config.ind_table_max_size) { 389 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 390 dev->data->port_id, rxqs_n); 391 rte_errno = EINVAL; 392 return -rte_errno; 393 } 394 if (rxqs_n == priv->rxqs_n) 395 return 0; 396 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 397 dev->data->port_id, priv->rxqs_n, rxqs_n); 398 priv->rxqs_n = rxqs_n; 399 /* If the requested number of RX queues is not a power of two, use the 400 * maximum indirection table size for better balancing. 401 * The result is always rounded to the next power of two. */ 402 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 403 priv->config.ind_table_max_size : 404 rxqs_n)); 405 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 406 if (ret) 407 return ret; 408 /* When the number of RX queues is not a power of two, the remaining 409 * table entries are padded with reused WQs and hashes are not spread 410 * uniformly. */ 411 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 412 (*priv->reta_idx)[i] = j; 413 if (++j == rxqs_n) 414 j = 0; 415 } 416 return 0; 417 } 418 419 /** 420 * DPDK callback to get information about the device. 421 * 422 * @param dev 423 * Pointer to Ethernet device structure. 424 * @param[out] info 425 * Info structure output buffer. 426 */ 427 void 428 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 429 { 430 struct priv *priv = dev->data->dev_private; 431 struct mlx5_dev_config *config = &priv->config; 432 unsigned int max; 433 char ifname[IF_NAMESIZE]; 434 435 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 436 /* FIXME: we should ask the device for these values. */ 437 info->min_rx_bufsize = 32; 438 info->max_rx_pktlen = 65536; 439 /* 440 * Since we need one CQ per QP, the limit is the minimum number 441 * between the two values. 442 */ 443 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 444 priv->device_attr.orig_attr.max_qp); 445 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 446 if (max >= 65535) 447 max = 65535; 448 info->max_rx_queues = max; 449 info->max_tx_queues = max; 450 info->max_mac_addrs = RTE_DIM(priv->mac); 451 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 452 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 453 info->rx_queue_offload_capa); 454 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 455 if (mlx5_get_ifname(dev, &ifname) == 0) 456 info->if_index = if_nametoindex(ifname); 457 info->reta_size = priv->reta_idx_n ? 458 priv->reta_idx_n : config->ind_table_max_size; 459 info->hash_key_size = rss_hash_default_key_len; 460 info->speed_capa = priv->link_speed_capa; 461 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 462 } 463 464 /** 465 * Get supported packet types. 466 * 467 * @param dev 468 * Pointer to Ethernet device structure. 469 * 470 * @return 471 * A pointer to the supported Packet types array. 472 */ 473 const uint32_t * 474 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 475 { 476 static const uint32_t ptypes[] = { 477 /* refers to rxq_cq_to_pkt_type() */ 478 RTE_PTYPE_L2_ETHER, 479 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 480 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 481 RTE_PTYPE_L4_NONFRAG, 482 RTE_PTYPE_L4_FRAG, 483 RTE_PTYPE_L4_TCP, 484 RTE_PTYPE_L4_UDP, 485 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 486 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 487 RTE_PTYPE_INNER_L4_NONFRAG, 488 RTE_PTYPE_INNER_L4_FRAG, 489 RTE_PTYPE_INNER_L4_TCP, 490 RTE_PTYPE_INNER_L4_UDP, 491 RTE_PTYPE_UNKNOWN 492 }; 493 494 if (dev->rx_pkt_burst == mlx5_rx_burst || 495 dev->rx_pkt_burst == mlx5_rx_burst_vec) 496 return ptypes; 497 return NULL; 498 } 499 500 /** 501 * DPDK callback to retrieve physical link information. 502 * 503 * @param dev 504 * Pointer to Ethernet device structure. 505 * @param[out] link 506 * Storage for current link status. 507 * 508 * @return 509 * 0 on success, a negative errno value otherwise and rte_errno is set. 510 */ 511 static int 512 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 513 struct rte_eth_link *link) 514 { 515 struct priv *priv = dev->data->dev_private; 516 struct ethtool_cmd edata = { 517 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 518 }; 519 struct ifreq ifr; 520 struct rte_eth_link dev_link; 521 int link_speed = 0; 522 int ret; 523 524 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 525 if (ret) { 526 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 527 dev->data->port_id, strerror(rte_errno)); 528 return ret; 529 } 530 memset(&dev_link, 0, sizeof(dev_link)); 531 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 532 (ifr.ifr_flags & IFF_RUNNING)); 533 ifr.ifr_data = (void *)&edata; 534 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 535 if (ret) { 536 DRV_LOG(WARNING, 537 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 538 dev->data->port_id, strerror(rte_errno)); 539 return ret; 540 } 541 link_speed = ethtool_cmd_speed(&edata); 542 if (link_speed == -1) 543 dev_link.link_speed = 0; 544 else 545 dev_link.link_speed = link_speed; 546 priv->link_speed_capa = 0; 547 if (edata.supported & SUPPORTED_Autoneg) 548 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 549 if (edata.supported & (SUPPORTED_1000baseT_Full | 550 SUPPORTED_1000baseKX_Full)) 551 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 552 if (edata.supported & SUPPORTED_10000baseKR_Full) 553 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 554 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 555 SUPPORTED_40000baseCR4_Full | 556 SUPPORTED_40000baseSR4_Full | 557 SUPPORTED_40000baseLR4_Full)) 558 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 559 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 560 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 561 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 562 ETH_LINK_SPEED_FIXED); 563 if ((dev_link.link_speed && !dev_link.link_status) || 564 (!dev_link.link_speed && dev_link.link_status)) { 565 rte_errno = EAGAIN; 566 return -rte_errno; 567 } 568 *link = dev_link; 569 return 0; 570 } 571 572 /** 573 * Retrieve physical link information (unlocked version using new ioctl). 574 * 575 * @param dev 576 * Pointer to Ethernet device structure. 577 * @param[out] link 578 * Storage for current link status. 579 * 580 * @return 581 * 0 on success, a negative errno value otherwise and rte_errno is set. 582 */ 583 static int 584 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 585 struct rte_eth_link *link) 586 587 { 588 struct priv *priv = dev->data->dev_private; 589 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 590 struct ifreq ifr; 591 struct rte_eth_link dev_link; 592 uint64_t sc; 593 int ret; 594 595 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 596 if (ret) { 597 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 598 dev->data->port_id, strerror(rte_errno)); 599 return ret; 600 } 601 memset(&dev_link, 0, sizeof(dev_link)); 602 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 603 (ifr.ifr_flags & IFF_RUNNING)); 604 ifr.ifr_data = (void *)&gcmd; 605 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 606 if (ret) { 607 DRV_LOG(DEBUG, 608 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 609 " failed: %s", 610 dev->data->port_id, strerror(rte_errno)); 611 return ret; 612 } 613 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 614 615 alignas(struct ethtool_link_settings) 616 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 617 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 618 struct ethtool_link_settings *ecmd = (void *)data; 619 620 *ecmd = gcmd; 621 ifr.ifr_data = (void *)ecmd; 622 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 623 if (ret) { 624 DRV_LOG(DEBUG, 625 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 626 " failed: %s", 627 dev->data->port_id, strerror(rte_errno)); 628 return ret; 629 } 630 dev_link.link_speed = ecmd->speed; 631 sc = ecmd->link_mode_masks[0] | 632 ((uint64_t)ecmd->link_mode_masks[1] << 32); 633 priv->link_speed_capa = 0; 634 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 635 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 636 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 637 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 638 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 639 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 640 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 641 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 642 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 643 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 644 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 645 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 646 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 647 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 648 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 649 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 650 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 651 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 652 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 653 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 654 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 655 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 656 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 657 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 658 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 659 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 660 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 661 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 662 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 663 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 664 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 665 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 666 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 667 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 668 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 669 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 670 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 671 ETH_LINK_SPEED_FIXED); 672 if ((dev_link.link_speed && !dev_link.link_status) || 673 (!dev_link.link_speed && dev_link.link_status)) { 674 rte_errno = EAGAIN; 675 return -rte_errno; 676 } 677 *link = dev_link; 678 return 0; 679 } 680 681 /** 682 * DPDK callback to retrieve physical link information. 683 * 684 * @param dev 685 * Pointer to Ethernet device structure. 686 * @param wait_to_complete 687 * Wait for request completion. 688 * 689 * @return 690 * 0 if link status was not updated, positive if it was, a negative errno 691 * value otherwise and rte_errno is set. 692 */ 693 int 694 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 695 { 696 int ret; 697 struct rte_eth_link dev_link; 698 time_t start_time = time(NULL); 699 700 do { 701 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 702 if (ret) 703 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 704 if (ret == 0) 705 break; 706 /* Handle wait to complete situation. */ 707 if (wait_to_complete && ret == -EAGAIN) { 708 if (abs((int)difftime(time(NULL), start_time)) < 709 MLX5_LINK_STATUS_TIMEOUT) { 710 usleep(0); 711 continue; 712 } else { 713 rte_errno = EBUSY; 714 return -rte_errno; 715 } 716 } else if (ret < 0) { 717 return ret; 718 } 719 } while (wait_to_complete); 720 ret = !!memcmp(&dev->data->dev_link, &dev_link, 721 sizeof(struct rte_eth_link)); 722 dev->data->dev_link = dev_link; 723 return ret; 724 } 725 726 /** 727 * DPDK callback to change the MTU. 728 * 729 * @param dev 730 * Pointer to Ethernet device structure. 731 * @param in_mtu 732 * New MTU. 733 * 734 * @return 735 * 0 on success, a negative errno value otherwise and rte_errno is set. 736 */ 737 int 738 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 739 { 740 struct priv *priv = dev->data->dev_private; 741 uint16_t kern_mtu = 0; 742 int ret; 743 744 ret = mlx5_get_mtu(dev, &kern_mtu); 745 if (ret) 746 return ret; 747 /* Set kernel interface MTU first. */ 748 ret = mlx5_set_mtu(dev, mtu); 749 if (ret) 750 return ret; 751 ret = mlx5_get_mtu(dev, &kern_mtu); 752 if (ret) 753 return ret; 754 if (kern_mtu == mtu) { 755 priv->mtu = mtu; 756 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 757 dev->data->port_id, mtu); 758 return 0; 759 } 760 rte_errno = EAGAIN; 761 return -rte_errno; 762 } 763 764 /** 765 * DPDK callback to get flow control status. 766 * 767 * @param dev 768 * Pointer to Ethernet device structure. 769 * @param[out] fc_conf 770 * Flow control output buffer. 771 * 772 * @return 773 * 0 on success, a negative errno value otherwise and rte_errno is set. 774 */ 775 int 776 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 777 { 778 struct ifreq ifr; 779 struct ethtool_pauseparam ethpause = { 780 .cmd = ETHTOOL_GPAUSEPARAM 781 }; 782 int ret; 783 784 ifr.ifr_data = (void *)ðpause; 785 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 786 if (ret) { 787 DRV_LOG(WARNING, 788 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 789 " %s", 790 dev->data->port_id, strerror(rte_errno)); 791 return ret; 792 } 793 fc_conf->autoneg = ethpause.autoneg; 794 if (ethpause.rx_pause && ethpause.tx_pause) 795 fc_conf->mode = RTE_FC_FULL; 796 else if (ethpause.rx_pause) 797 fc_conf->mode = RTE_FC_RX_PAUSE; 798 else if (ethpause.tx_pause) 799 fc_conf->mode = RTE_FC_TX_PAUSE; 800 else 801 fc_conf->mode = RTE_FC_NONE; 802 return 0; 803 } 804 805 /** 806 * DPDK callback to modify flow control parameters. 807 * 808 * @param dev 809 * Pointer to Ethernet device structure. 810 * @param[in] fc_conf 811 * Flow control parameters. 812 * 813 * @return 814 * 0 on success, a negative errno value otherwise and rte_errno is set. 815 */ 816 int 817 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 818 { 819 struct ifreq ifr; 820 struct ethtool_pauseparam ethpause = { 821 .cmd = ETHTOOL_SPAUSEPARAM 822 }; 823 int ret; 824 825 ifr.ifr_data = (void *)ðpause; 826 ethpause.autoneg = fc_conf->autoneg; 827 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 828 (fc_conf->mode & RTE_FC_RX_PAUSE)) 829 ethpause.rx_pause = 1; 830 else 831 ethpause.rx_pause = 0; 832 833 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 834 (fc_conf->mode & RTE_FC_TX_PAUSE)) 835 ethpause.tx_pause = 1; 836 else 837 ethpause.tx_pause = 0; 838 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 839 if (ret) { 840 DRV_LOG(WARNING, 841 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 842 " failed: %s", 843 dev->data->port_id, strerror(rte_errno)); 844 return ret; 845 } 846 return 0; 847 } 848 849 /** 850 * Get PCI information from struct ibv_device. 851 * 852 * @param device 853 * Pointer to Ethernet device structure. 854 * @param[out] pci_addr 855 * PCI bus address output buffer. 856 * 857 * @return 858 * 0 on success, a negative errno value otherwise and rte_errno is set. 859 */ 860 int 861 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 862 struct rte_pci_addr *pci_addr) 863 { 864 FILE *file; 865 char line[32]; 866 MKSTR(path, "%s/device/uevent", device->ibdev_path); 867 868 file = fopen(path, "rb"); 869 if (file == NULL) { 870 rte_errno = errno; 871 return -rte_errno; 872 } 873 while (fgets(line, sizeof(line), file) == line) { 874 size_t len = strlen(line); 875 int ret; 876 877 /* Truncate long lines. */ 878 if (len == (sizeof(line) - 1)) 879 while (line[(len - 1)] != '\n') { 880 ret = fgetc(file); 881 if (ret == EOF) 882 break; 883 line[(len - 1)] = ret; 884 } 885 /* Extract information. */ 886 if (sscanf(line, 887 "PCI_SLOT_NAME=" 888 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 889 &pci_addr->domain, 890 &pci_addr->bus, 891 &pci_addr->devid, 892 &pci_addr->function) == 4) { 893 ret = 0; 894 break; 895 } 896 } 897 fclose(file); 898 return 0; 899 } 900 901 /** 902 * Device status handler. 903 * 904 * @param dev 905 * Pointer to Ethernet device. 906 * @param events 907 * Pointer to event flags holder. 908 * 909 * @return 910 * Events bitmap of callback process which can be called immediately. 911 */ 912 static uint32_t 913 mlx5_dev_status_handler(struct rte_eth_dev *dev) 914 { 915 struct priv *priv = dev->data->dev_private; 916 struct ibv_async_event event; 917 uint32_t ret = 0; 918 919 if (mlx5_link_update(dev, 0) == -EAGAIN) { 920 usleep(0); 921 return 0; 922 } 923 /* Read all message and acknowledge them. */ 924 for (;;) { 925 if (mlx5_glue->get_async_event(priv->ctx, &event)) 926 break; 927 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 928 event.event_type == IBV_EVENT_PORT_ERR) && 929 (dev->data->dev_conf.intr_conf.lsc == 1)) 930 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 931 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 932 dev->data->dev_conf.intr_conf.rmv == 1) 933 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 934 else 935 DRV_LOG(DEBUG, 936 "port %u event type %d on not handled", 937 dev->data->port_id, event.event_type); 938 mlx5_glue->ack_async_event(&event); 939 } 940 return ret; 941 } 942 943 /** 944 * Handle interrupts from the NIC. 945 * 946 * @param[in] intr_handle 947 * Interrupt handler. 948 * @param cb_arg 949 * Callback argument. 950 */ 951 void 952 mlx5_dev_interrupt_handler(void *cb_arg) 953 { 954 struct rte_eth_dev *dev = cb_arg; 955 uint32_t events; 956 957 events = mlx5_dev_status_handler(dev); 958 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 959 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 960 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 961 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 962 } 963 964 /** 965 * Handle interrupts from the socket. 966 * 967 * @param cb_arg 968 * Callback argument. 969 */ 970 static void 971 mlx5_dev_handler_socket(void *cb_arg) 972 { 973 struct rte_eth_dev *dev = cb_arg; 974 975 mlx5_socket_handle(dev); 976 } 977 978 /** 979 * Uninstall interrupt handler. 980 * 981 * @param dev 982 * Pointer to Ethernet device. 983 */ 984 void 985 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 986 { 987 struct priv *priv = dev->data->dev_private; 988 989 if (dev->data->dev_conf.intr_conf.lsc || 990 dev->data->dev_conf.intr_conf.rmv) 991 rte_intr_callback_unregister(&priv->intr_handle, 992 mlx5_dev_interrupt_handler, dev); 993 if (priv->primary_socket) 994 rte_intr_callback_unregister(&priv->intr_handle_socket, 995 mlx5_dev_handler_socket, dev); 996 priv->intr_handle.fd = 0; 997 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 998 priv->intr_handle_socket.fd = 0; 999 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1000 } 1001 1002 /** 1003 * Install interrupt handler. 1004 * 1005 * @param dev 1006 * Pointer to Ethernet device. 1007 */ 1008 void 1009 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1010 { 1011 struct priv *priv = dev->data->dev_private; 1012 int ret; 1013 int flags; 1014 1015 assert(priv->ctx->async_fd > 0); 1016 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1017 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1018 if (ret) { 1019 DRV_LOG(INFO, 1020 "port %u failed to change file descriptor async event" 1021 " queue", 1022 dev->data->port_id); 1023 dev->data->dev_conf.intr_conf.lsc = 0; 1024 dev->data->dev_conf.intr_conf.rmv = 0; 1025 } 1026 if (dev->data->dev_conf.intr_conf.lsc || 1027 dev->data->dev_conf.intr_conf.rmv) { 1028 priv->intr_handle.fd = priv->ctx->async_fd; 1029 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1030 rte_intr_callback_register(&priv->intr_handle, 1031 mlx5_dev_interrupt_handler, dev); 1032 } 1033 ret = mlx5_socket_init(dev); 1034 if (ret) 1035 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1036 dev->data->port_id, strerror(rte_errno)); 1037 else if (priv->primary_socket) { 1038 priv->intr_handle_socket.fd = priv->primary_socket; 1039 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1040 rte_intr_callback_register(&priv->intr_handle_socket, 1041 mlx5_dev_handler_socket, dev); 1042 } 1043 } 1044 1045 /** 1046 * DPDK callback to bring the link DOWN. 1047 * 1048 * @param dev 1049 * Pointer to Ethernet device structure. 1050 * 1051 * @return 1052 * 0 on success, a negative errno value otherwise and rte_errno is set. 1053 */ 1054 int 1055 mlx5_set_link_down(struct rte_eth_dev *dev) 1056 { 1057 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1058 } 1059 1060 /** 1061 * DPDK callback to bring the link UP. 1062 * 1063 * @param dev 1064 * Pointer to Ethernet device structure. 1065 * 1066 * @return 1067 * 0 on success, a negative errno value otherwise and rte_errno is set. 1068 */ 1069 int 1070 mlx5_set_link_up(struct rte_eth_dev *dev) 1071 { 1072 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1073 } 1074 1075 /** 1076 * Configure the TX function to use. 1077 * 1078 * @param dev 1079 * Pointer to private data structure. 1080 * 1081 * @return 1082 * Pointer to selected Tx burst function. 1083 */ 1084 eth_tx_burst_t 1085 mlx5_select_tx_function(struct rte_eth_dev *dev) 1086 { 1087 struct priv *priv = dev->data->dev_private; 1088 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1089 struct mlx5_dev_config *config = &priv->config; 1090 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1091 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1092 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1093 DEV_TX_OFFLOAD_GRE_TNL_TSO)); 1094 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1095 1096 assert(priv != NULL); 1097 /* Select appropriate TX function. */ 1098 if (vlan_insert || tso) 1099 return tx_pkt_burst; 1100 if (config->mps == MLX5_MPW_ENHANCED) { 1101 if (mlx5_check_vec_tx_support(dev) > 0) { 1102 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1103 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1104 else 1105 tx_pkt_burst = mlx5_tx_burst_vec; 1106 DRV_LOG(DEBUG, 1107 "port %u selected enhanced MPW Tx vectorized" 1108 " function", 1109 dev->data->port_id); 1110 } else { 1111 tx_pkt_burst = mlx5_tx_burst_empw; 1112 DRV_LOG(DEBUG, 1113 "port %u selected enhanced MPW Tx function", 1114 dev->data->port_id); 1115 } 1116 } else if (config->mps && (config->txq_inline > 0)) { 1117 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1118 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1119 dev->data->port_id); 1120 } else if (config->mps) { 1121 tx_pkt_burst = mlx5_tx_burst_mpw; 1122 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1123 dev->data->port_id); 1124 } 1125 return tx_pkt_burst; 1126 } 1127 1128 /** 1129 * Configure the RX function to use. 1130 * 1131 * @param dev 1132 * Pointer to private data structure. 1133 * 1134 * @return 1135 * Pointer to selected Rx burst function. 1136 */ 1137 eth_rx_burst_t 1138 mlx5_select_rx_function(struct rte_eth_dev *dev) 1139 { 1140 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1141 1142 assert(dev != NULL); 1143 if (mlx5_check_vec_rx_support(dev) > 0) { 1144 rx_pkt_burst = mlx5_rx_burst_vec; 1145 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1146 dev->data->port_id); 1147 } 1148 return rx_pkt_burst; 1149 } 1150 1151 /** 1152 * Check if mlx5 device was removed. 1153 * 1154 * @param dev 1155 * Pointer to Ethernet device structure. 1156 * 1157 * @return 1158 * 1 when device is removed, otherwise 0. 1159 */ 1160 int 1161 mlx5_is_removed(struct rte_eth_dev *dev) 1162 { 1163 struct ibv_device_attr device_attr; 1164 struct priv *priv = dev->data->dev_private; 1165 1166 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1167 return 1; 1168 return 0; 1169 } 1170