1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 #include <time.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_malloc.h> 36 #include <rte_string_fns.h> 37 38 #include "mlx5.h" 39 #include "mlx5_glue.h" 40 #include "mlx5_rxtx.h" 41 #include "mlx5_utils.h" 42 43 /* Add defines in case the running kernel is not the same as user headers. */ 44 #ifndef ETHTOOL_GLINKSETTINGS 45 struct ethtool_link_settings { 46 uint32_t cmd; 47 uint32_t speed; 48 uint8_t duplex; 49 uint8_t port; 50 uint8_t phy_address; 51 uint8_t autoneg; 52 uint8_t mdio_support; 53 uint8_t eth_to_mdix; 54 uint8_t eth_tp_mdix_ctrl; 55 int8_t link_mode_masks_nwords; 56 uint32_t reserved[8]; 57 uint32_t link_mode_masks[]; 58 }; 59 60 #define ETHTOOL_GLINKSETTINGS 0x0000004c 61 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 62 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 63 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 64 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 65 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 66 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 67 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 68 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 69 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 70 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 71 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 72 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 73 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 74 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 75 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 76 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 77 #endif 78 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 79 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 80 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 81 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 82 #endif 83 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 84 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 85 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 86 #endif 87 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 88 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 89 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 90 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 91 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 92 #endif 93 94 /** 95 * Get interface name from private structure. 96 * 97 * @param[in] dev 98 * Pointer to Ethernet device. 99 * @param[out] ifname 100 * Interface name output buffer. 101 * 102 * @return 103 * 0 on success, a negative errno value otherwise and rte_errno is set. 104 */ 105 int 106 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 107 { 108 struct priv *priv = dev->data->dev_private; 109 DIR *dir; 110 struct dirent *dent; 111 unsigned int dev_type = 0; 112 unsigned int dev_port_prev = ~0u; 113 char match[IF_NAMESIZE] = ""; 114 115 { 116 MKSTR(path, "%s/device/net", priv->ibdev_path); 117 118 dir = opendir(path); 119 if (dir == NULL) { 120 rte_errno = errno; 121 return -rte_errno; 122 } 123 } 124 while ((dent = readdir(dir)) != NULL) { 125 char *name = dent->d_name; 126 FILE *file; 127 unsigned int dev_port; 128 int r; 129 130 if ((name[0] == '.') && 131 ((name[1] == '\0') || 132 ((name[1] == '.') && (name[2] == '\0')))) 133 continue; 134 135 MKSTR(path, "%s/device/net/%s/%s", 136 priv->ibdev_path, name, 137 (dev_type ? "dev_id" : "dev_port")); 138 139 file = fopen(path, "rb"); 140 if (file == NULL) { 141 if (errno != ENOENT) 142 continue; 143 /* 144 * Switch to dev_id when dev_port does not exist as 145 * is the case with Linux kernel versions < 3.15. 146 */ 147 try_dev_id: 148 match[0] = '\0'; 149 if (dev_type) 150 break; 151 dev_type = 1; 152 dev_port_prev = ~0u; 153 rewinddir(dir); 154 continue; 155 } 156 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 157 fclose(file); 158 if (r != 1) 159 continue; 160 /* 161 * Switch to dev_id when dev_port returns the same value for 162 * all ports. May happen when using a MOFED release older than 163 * 3.0 with a Linux kernel >= 3.15. 164 */ 165 if (dev_port == dev_port_prev) 166 goto try_dev_id; 167 dev_port_prev = dev_port; 168 if (dev_port == (priv->port - 1u)) 169 strlcpy(match, name, sizeof(match)); 170 } 171 closedir(dir); 172 if (match[0] == '\0') { 173 rte_errno = ENOENT; 174 return -rte_errno; 175 } 176 strncpy(*ifname, match, sizeof(*ifname)); 177 return 0; 178 } 179 180 /** 181 * Perform ifreq ioctl() on associated Ethernet device. 182 * 183 * @param[in] dev 184 * Pointer to Ethernet device. 185 * @param req 186 * Request number to pass to ioctl(). 187 * @param[out] ifr 188 * Interface request structure output buffer. 189 * 190 * @return 191 * 0 on success, a negative errno value otherwise and rte_errno is set. 192 */ 193 int 194 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 195 { 196 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 197 int ret = 0; 198 199 if (sock == -1) { 200 rte_errno = errno; 201 return -rte_errno; 202 } 203 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 204 if (ret) 205 goto error; 206 ret = ioctl(sock, req, ifr); 207 if (ret == -1) { 208 rte_errno = errno; 209 goto error; 210 } 211 close(sock); 212 return 0; 213 error: 214 close(sock); 215 return -rte_errno; 216 } 217 218 /** 219 * Get device MTU. 220 * 221 * @param dev 222 * Pointer to Ethernet device. 223 * @param[out] mtu 224 * MTU value output buffer. 225 * 226 * @return 227 * 0 on success, a negative errno value otherwise and rte_errno is set. 228 */ 229 int 230 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 231 { 232 struct ifreq request; 233 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 234 235 if (ret) 236 return ret; 237 *mtu = request.ifr_mtu; 238 return 0; 239 } 240 241 /** 242 * Set device MTU. 243 * 244 * @param dev 245 * Pointer to Ethernet device. 246 * @param mtu 247 * MTU value to set. 248 * 249 * @return 250 * 0 on success, a negative errno value otherwise and rte_errno is set. 251 */ 252 static int 253 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 254 { 255 struct ifreq request = { .ifr_mtu = mtu, }; 256 257 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 258 } 259 260 /** 261 * Set device flags. 262 * 263 * @param dev 264 * Pointer to Ethernet device. 265 * @param keep 266 * Bitmask for flags that must remain untouched. 267 * @param flags 268 * Bitmask for flags to modify. 269 * 270 * @return 271 * 0 on success, a negative errno value otherwise and rte_errno is set. 272 */ 273 int 274 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 275 { 276 struct ifreq request; 277 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 278 279 if (ret) 280 return ret; 281 request.ifr_flags &= keep; 282 request.ifr_flags |= flags & ~keep; 283 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 284 } 285 286 /** 287 * DPDK callback for Ethernet device configuration. 288 * 289 * @param dev 290 * Pointer to Ethernet device structure. 291 * 292 * @return 293 * 0 on success, a negative errno value otherwise and rte_errno is set. 294 */ 295 int 296 mlx5_dev_configure(struct rte_eth_dev *dev) 297 { 298 struct priv *priv = dev->data->dev_private; 299 unsigned int rxqs_n = dev->data->nb_rx_queues; 300 unsigned int txqs_n = dev->data->nb_tx_queues; 301 unsigned int i; 302 unsigned int j; 303 unsigned int reta_idx_n; 304 const uint8_t use_app_rss_key = 305 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 306 uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev); 307 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 308 uint64_t supp_rx_offloads = 309 (mlx5_get_rx_port_offloads() | 310 mlx5_get_rx_queue_offloads(dev)); 311 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; 312 int ret = 0; 313 314 if ((tx_offloads & supp_tx_offloads) != tx_offloads) { 315 DRV_LOG(ERR, 316 "port %u some Tx offloads are not supported requested" 317 " 0x%" PRIx64 " supported 0x%" PRIx64, 318 dev->data->port_id, tx_offloads, supp_tx_offloads); 319 rte_errno = ENOTSUP; 320 return -rte_errno; 321 } 322 if ((rx_offloads & supp_rx_offloads) != rx_offloads) { 323 DRV_LOG(ERR, 324 "port %u some Rx offloads are not supported requested" 325 " 0x%" PRIx64 " supported 0x%" PRIx64, 326 dev->data->port_id, rx_offloads, supp_rx_offloads); 327 rte_errno = ENOTSUP; 328 return -rte_errno; 329 } 330 if (use_app_rss_key && 331 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 332 rss_hash_default_key_len)) { 333 DRV_LOG(ERR, "port %u RSS key len must be %zu Bytes long", 334 dev->data->port_id, rss_hash_default_key_len); 335 rte_errno = EINVAL; 336 return -rte_errno; 337 } 338 priv->rss_conf.rss_key = 339 rte_realloc(priv->rss_conf.rss_key, 340 rss_hash_default_key_len, 0); 341 if (!priv->rss_conf.rss_key) { 342 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 343 dev->data->port_id, rxqs_n); 344 rte_errno = ENOMEM; 345 return -rte_errno; 346 } 347 memcpy(priv->rss_conf.rss_key, 348 use_app_rss_key ? 349 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 350 rss_hash_default_key, 351 rss_hash_default_key_len); 352 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 353 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 354 priv->rxqs = (void *)dev->data->rx_queues; 355 priv->txqs = (void *)dev->data->tx_queues; 356 if (txqs_n != priv->txqs_n) { 357 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 358 dev->data->port_id, priv->txqs_n, txqs_n); 359 priv->txqs_n = txqs_n; 360 } 361 if (rxqs_n > priv->config.ind_table_max_size) { 362 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 363 dev->data->port_id, rxqs_n); 364 rte_errno = EINVAL; 365 return -rte_errno; 366 } 367 if (rxqs_n == priv->rxqs_n) 368 return 0; 369 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 370 dev->data->port_id, priv->rxqs_n, rxqs_n); 371 priv->rxqs_n = rxqs_n; 372 /* If the requested number of RX queues is not a power of two, use the 373 * maximum indirection table size for better balancing. 374 * The result is always rounded to the next power of two. */ 375 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 376 priv->config.ind_table_max_size : 377 rxqs_n)); 378 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 379 if (ret) 380 return ret; 381 /* When the number of RX queues is not a power of two, the remaining 382 * table entries are padded with reused WQs and hashes are not spread 383 * uniformly. */ 384 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 385 (*priv->reta_idx)[i] = j; 386 if (++j == rxqs_n) 387 j = 0; 388 } 389 return 0; 390 } 391 392 /** 393 * DPDK callback to get information about the device. 394 * 395 * @param dev 396 * Pointer to Ethernet device structure. 397 * @param[out] info 398 * Info structure output buffer. 399 */ 400 void 401 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 402 { 403 struct priv *priv = dev->data->dev_private; 404 struct mlx5_dev_config *config = &priv->config; 405 unsigned int max; 406 char ifname[IF_NAMESIZE]; 407 408 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 409 /* FIXME: we should ask the device for these values. */ 410 info->min_rx_bufsize = 32; 411 info->max_rx_pktlen = 65536; 412 /* 413 * Since we need one CQ per QP, the limit is the minimum number 414 * between the two values. 415 */ 416 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 417 priv->device_attr.orig_attr.max_qp); 418 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 419 if (max >= 65535) 420 max = 65535; 421 info->max_rx_queues = max; 422 info->max_tx_queues = max; 423 info->max_mac_addrs = RTE_DIM(priv->mac); 424 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 425 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 426 info->rx_queue_offload_capa); 427 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 428 if (mlx5_get_ifname(dev, &ifname) == 0) 429 info->if_index = if_nametoindex(ifname); 430 info->reta_size = priv->reta_idx_n ? 431 priv->reta_idx_n : config->ind_table_max_size; 432 info->hash_key_size = rss_hash_default_key_len; 433 info->speed_capa = priv->link_speed_capa; 434 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 435 } 436 437 /** 438 * Get supported packet types. 439 * 440 * @param dev 441 * Pointer to Ethernet device structure. 442 * 443 * @return 444 * A pointer to the supported Packet types array. 445 */ 446 const uint32_t * 447 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 448 { 449 static const uint32_t ptypes[] = { 450 /* refers to rxq_cq_to_pkt_type() */ 451 RTE_PTYPE_L2_ETHER, 452 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 453 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 454 RTE_PTYPE_L4_NONFRAG, 455 RTE_PTYPE_L4_FRAG, 456 RTE_PTYPE_L4_TCP, 457 RTE_PTYPE_L4_UDP, 458 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 459 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 460 RTE_PTYPE_INNER_L4_NONFRAG, 461 RTE_PTYPE_INNER_L4_FRAG, 462 RTE_PTYPE_INNER_L4_TCP, 463 RTE_PTYPE_INNER_L4_UDP, 464 RTE_PTYPE_UNKNOWN 465 }; 466 467 if (dev->rx_pkt_burst == mlx5_rx_burst || 468 dev->rx_pkt_burst == mlx5_rx_burst_vec) 469 return ptypes; 470 return NULL; 471 } 472 473 /** 474 * DPDK callback to retrieve physical link information. 475 * 476 * @param dev 477 * Pointer to Ethernet device structure. 478 * @param[out] link 479 * Storage for current link status. 480 * 481 * @return 482 * 0 on success, a negative errno value otherwise and rte_errno is set. 483 */ 484 static int 485 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 486 struct rte_eth_link *link) 487 { 488 struct priv *priv = dev->data->dev_private; 489 struct ethtool_cmd edata = { 490 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 491 }; 492 struct ifreq ifr; 493 struct rte_eth_link dev_link; 494 int link_speed = 0; 495 int ret; 496 497 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 498 if (ret) { 499 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 500 dev->data->port_id, strerror(rte_errno)); 501 return ret; 502 } 503 memset(&dev_link, 0, sizeof(dev_link)); 504 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 505 (ifr.ifr_flags & IFF_RUNNING)); 506 ifr.ifr_data = (void *)&edata; 507 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 508 if (ret) { 509 DRV_LOG(WARNING, 510 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 511 dev->data->port_id, strerror(rte_errno)); 512 return ret; 513 } 514 link_speed = ethtool_cmd_speed(&edata); 515 if (link_speed == -1) 516 dev_link.link_speed = 0; 517 else 518 dev_link.link_speed = link_speed; 519 priv->link_speed_capa = 0; 520 if (edata.supported & SUPPORTED_Autoneg) 521 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 522 if (edata.supported & (SUPPORTED_1000baseT_Full | 523 SUPPORTED_1000baseKX_Full)) 524 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 525 if (edata.supported & SUPPORTED_10000baseKR_Full) 526 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 527 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 528 SUPPORTED_40000baseCR4_Full | 529 SUPPORTED_40000baseSR4_Full | 530 SUPPORTED_40000baseLR4_Full)) 531 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 532 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 533 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 534 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 535 ETH_LINK_SPEED_FIXED); 536 if ((dev_link.link_speed && !dev_link.link_status) || 537 (!dev_link.link_speed && dev_link.link_status)) { 538 rte_errno = EAGAIN; 539 return -rte_errno; 540 } 541 *link = dev_link; 542 return 0; 543 } 544 545 /** 546 * Retrieve physical link information (unlocked version using new ioctl). 547 * 548 * @param dev 549 * Pointer to Ethernet device structure. 550 * @param[out] link 551 * Storage for current link status. 552 * 553 * @return 554 * 0 on success, a negative errno value otherwise and rte_errno is set. 555 */ 556 static int 557 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 558 struct rte_eth_link *link) 559 560 { 561 struct priv *priv = dev->data->dev_private; 562 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 563 struct ifreq ifr; 564 struct rte_eth_link dev_link; 565 uint64_t sc; 566 int ret; 567 568 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 569 if (ret) { 570 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 571 dev->data->port_id, strerror(rte_errno)); 572 return ret; 573 } 574 memset(&dev_link, 0, sizeof(dev_link)); 575 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 576 (ifr.ifr_flags & IFF_RUNNING)); 577 ifr.ifr_data = (void *)&gcmd; 578 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 579 if (ret) { 580 DRV_LOG(DEBUG, 581 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 582 " failed: %s", 583 dev->data->port_id, strerror(rte_errno)); 584 return ret; 585 } 586 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 587 588 alignas(struct ethtool_link_settings) 589 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 590 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 591 struct ethtool_link_settings *ecmd = (void *)data; 592 593 *ecmd = gcmd; 594 ifr.ifr_data = (void *)ecmd; 595 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 596 if (ret) { 597 DRV_LOG(DEBUG, 598 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 599 " failed: %s", 600 dev->data->port_id, strerror(rte_errno)); 601 return ret; 602 } 603 dev_link.link_speed = ecmd->speed; 604 sc = ecmd->link_mode_masks[0] | 605 ((uint64_t)ecmd->link_mode_masks[1] << 32); 606 priv->link_speed_capa = 0; 607 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 608 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 609 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 610 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 611 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 612 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 613 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 614 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 615 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 616 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 617 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 618 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 619 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 620 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 621 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 622 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 623 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 624 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 625 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 626 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 627 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 628 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 629 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 630 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 631 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 632 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 633 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 634 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 635 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 636 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 637 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 638 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 639 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 640 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 641 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 642 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 643 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 644 ETH_LINK_SPEED_FIXED); 645 if ((dev_link.link_speed && !dev_link.link_status) || 646 (!dev_link.link_speed && dev_link.link_status)) { 647 rte_errno = EAGAIN; 648 return -rte_errno; 649 } 650 *link = dev_link; 651 return 0; 652 } 653 654 /** 655 * DPDK callback to retrieve physical link information. 656 * 657 * @param dev 658 * Pointer to Ethernet device structure. 659 * @param wait_to_complete 660 * Wait for request completion. 661 * 662 * @return 663 * 0 if link status was not updated, positive if it was, a negative errno 664 * value otherwise and rte_errno is set. 665 */ 666 int 667 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 668 { 669 int ret; 670 struct rte_eth_link dev_link; 671 time_t start_time = time(NULL); 672 673 do { 674 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 675 if (ret) 676 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 677 if (ret == 0) 678 break; 679 /* Handle wait to complete situation. */ 680 if (wait_to_complete && ret == -EAGAIN) { 681 if (abs((int)difftime(time(NULL), start_time)) < 682 MLX5_LINK_STATUS_TIMEOUT) { 683 usleep(0); 684 continue; 685 } else { 686 rte_errno = EBUSY; 687 return -rte_errno; 688 } 689 } else if (ret < 0) { 690 return ret; 691 } 692 } while (wait_to_complete); 693 ret = !!memcmp(&dev->data->dev_link, &dev_link, 694 sizeof(struct rte_eth_link)); 695 dev->data->dev_link = dev_link; 696 return ret; 697 } 698 699 /** 700 * DPDK callback to change the MTU. 701 * 702 * @param dev 703 * Pointer to Ethernet device structure. 704 * @param in_mtu 705 * New MTU. 706 * 707 * @return 708 * 0 on success, a negative errno value otherwise and rte_errno is set. 709 */ 710 int 711 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 712 { 713 struct priv *priv = dev->data->dev_private; 714 uint16_t kern_mtu = 0; 715 int ret; 716 717 ret = mlx5_get_mtu(dev, &kern_mtu); 718 if (ret) 719 return ret; 720 /* Set kernel interface MTU first. */ 721 ret = mlx5_set_mtu(dev, mtu); 722 if (ret) 723 return ret; 724 ret = mlx5_get_mtu(dev, &kern_mtu); 725 if (ret) 726 return ret; 727 if (kern_mtu == mtu) { 728 priv->mtu = mtu; 729 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 730 dev->data->port_id, mtu); 731 return 0; 732 } 733 rte_errno = EAGAIN; 734 return -rte_errno; 735 } 736 737 /** 738 * DPDK callback to get flow control status. 739 * 740 * @param dev 741 * Pointer to Ethernet device structure. 742 * @param[out] fc_conf 743 * Flow control output buffer. 744 * 745 * @return 746 * 0 on success, a negative errno value otherwise and rte_errno is set. 747 */ 748 int 749 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 750 { 751 struct ifreq ifr; 752 struct ethtool_pauseparam ethpause = { 753 .cmd = ETHTOOL_GPAUSEPARAM 754 }; 755 int ret; 756 757 ifr.ifr_data = (void *)ðpause; 758 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 759 if (ret) { 760 DRV_LOG(WARNING, 761 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 762 " %s", 763 dev->data->port_id, strerror(rte_errno)); 764 return ret; 765 } 766 fc_conf->autoneg = ethpause.autoneg; 767 if (ethpause.rx_pause && ethpause.tx_pause) 768 fc_conf->mode = RTE_FC_FULL; 769 else if (ethpause.rx_pause) 770 fc_conf->mode = RTE_FC_RX_PAUSE; 771 else if (ethpause.tx_pause) 772 fc_conf->mode = RTE_FC_TX_PAUSE; 773 else 774 fc_conf->mode = RTE_FC_NONE; 775 return 0; 776 } 777 778 /** 779 * DPDK callback to modify flow control parameters. 780 * 781 * @param dev 782 * Pointer to Ethernet device structure. 783 * @param[in] fc_conf 784 * Flow control parameters. 785 * 786 * @return 787 * 0 on success, a negative errno value otherwise and rte_errno is set. 788 */ 789 int 790 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 791 { 792 struct ifreq ifr; 793 struct ethtool_pauseparam ethpause = { 794 .cmd = ETHTOOL_SPAUSEPARAM 795 }; 796 int ret; 797 798 ifr.ifr_data = (void *)ðpause; 799 ethpause.autoneg = fc_conf->autoneg; 800 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 801 (fc_conf->mode & RTE_FC_RX_PAUSE)) 802 ethpause.rx_pause = 1; 803 else 804 ethpause.rx_pause = 0; 805 806 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 807 (fc_conf->mode & RTE_FC_TX_PAUSE)) 808 ethpause.tx_pause = 1; 809 else 810 ethpause.tx_pause = 0; 811 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 812 if (ret) { 813 DRV_LOG(WARNING, 814 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 815 " failed: %s", 816 dev->data->port_id, strerror(rte_errno)); 817 return ret; 818 } 819 return 0; 820 } 821 822 /** 823 * Get PCI information from struct ibv_device. 824 * 825 * @param device 826 * Pointer to Ethernet device structure. 827 * @param[out] pci_addr 828 * PCI bus address output buffer. 829 * 830 * @return 831 * 0 on success, a negative errno value otherwise and rte_errno is set. 832 */ 833 int 834 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 835 struct rte_pci_addr *pci_addr) 836 { 837 FILE *file; 838 char line[32]; 839 MKSTR(path, "%s/device/uevent", device->ibdev_path); 840 841 file = fopen(path, "rb"); 842 if (file == NULL) { 843 rte_errno = errno; 844 return -rte_errno; 845 } 846 while (fgets(line, sizeof(line), file) == line) { 847 size_t len = strlen(line); 848 int ret; 849 850 /* Truncate long lines. */ 851 if (len == (sizeof(line) - 1)) 852 while (line[(len - 1)] != '\n') { 853 ret = fgetc(file); 854 if (ret == EOF) 855 break; 856 line[(len - 1)] = ret; 857 } 858 /* Extract information. */ 859 if (sscanf(line, 860 "PCI_SLOT_NAME=" 861 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 862 &pci_addr->domain, 863 &pci_addr->bus, 864 &pci_addr->devid, 865 &pci_addr->function) == 4) { 866 ret = 0; 867 break; 868 } 869 } 870 fclose(file); 871 return 0; 872 } 873 874 /** 875 * Device status handler. 876 * 877 * @param dev 878 * Pointer to Ethernet device. 879 * @param events 880 * Pointer to event flags holder. 881 * 882 * @return 883 * Events bitmap of callback process which can be called immediately. 884 */ 885 static uint32_t 886 mlx5_dev_status_handler(struct rte_eth_dev *dev) 887 { 888 struct priv *priv = dev->data->dev_private; 889 struct ibv_async_event event; 890 uint32_t ret = 0; 891 892 if (mlx5_link_update(dev, 0) == -EAGAIN) { 893 usleep(0); 894 return 0; 895 } 896 /* Read all message and acknowledge them. */ 897 for (;;) { 898 if (mlx5_glue->get_async_event(priv->ctx, &event)) 899 break; 900 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 901 event.event_type == IBV_EVENT_PORT_ERR) && 902 (dev->data->dev_conf.intr_conf.lsc == 1)) 903 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 904 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 905 dev->data->dev_conf.intr_conf.rmv == 1) 906 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 907 else 908 DRV_LOG(DEBUG, 909 "port %u event type %d on not handled", 910 dev->data->port_id, event.event_type); 911 mlx5_glue->ack_async_event(&event); 912 } 913 return ret; 914 } 915 916 /** 917 * Handle interrupts from the NIC. 918 * 919 * @param[in] intr_handle 920 * Interrupt handler. 921 * @param cb_arg 922 * Callback argument. 923 */ 924 void 925 mlx5_dev_interrupt_handler(void *cb_arg) 926 { 927 struct rte_eth_dev *dev = cb_arg; 928 uint32_t events; 929 930 events = mlx5_dev_status_handler(dev); 931 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 932 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 933 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 934 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 935 } 936 937 /** 938 * Handle interrupts from the socket. 939 * 940 * @param cb_arg 941 * Callback argument. 942 */ 943 static void 944 mlx5_dev_handler_socket(void *cb_arg) 945 { 946 struct rte_eth_dev *dev = cb_arg; 947 948 mlx5_socket_handle(dev); 949 } 950 951 /** 952 * Uninstall interrupt handler. 953 * 954 * @param dev 955 * Pointer to Ethernet device. 956 */ 957 void 958 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 959 { 960 struct priv *priv = dev->data->dev_private; 961 962 if (dev->data->dev_conf.intr_conf.lsc || 963 dev->data->dev_conf.intr_conf.rmv) 964 rte_intr_callback_unregister(&priv->intr_handle, 965 mlx5_dev_interrupt_handler, dev); 966 if (priv->primary_socket) 967 rte_intr_callback_unregister(&priv->intr_handle_socket, 968 mlx5_dev_handler_socket, dev); 969 priv->intr_handle.fd = 0; 970 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 971 priv->intr_handle_socket.fd = 0; 972 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 973 } 974 975 /** 976 * Install interrupt handler. 977 * 978 * @param dev 979 * Pointer to Ethernet device. 980 */ 981 void 982 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 983 { 984 struct priv *priv = dev->data->dev_private; 985 int ret; 986 int flags; 987 988 assert(priv->ctx->async_fd > 0); 989 flags = fcntl(priv->ctx->async_fd, F_GETFL); 990 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 991 if (ret) { 992 DRV_LOG(INFO, 993 "port %u failed to change file descriptor async event" 994 " queue", 995 dev->data->port_id); 996 dev->data->dev_conf.intr_conf.lsc = 0; 997 dev->data->dev_conf.intr_conf.rmv = 0; 998 } 999 if (dev->data->dev_conf.intr_conf.lsc || 1000 dev->data->dev_conf.intr_conf.rmv) { 1001 priv->intr_handle.fd = priv->ctx->async_fd; 1002 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1003 rte_intr_callback_register(&priv->intr_handle, 1004 mlx5_dev_interrupt_handler, dev); 1005 } 1006 ret = mlx5_socket_init(dev); 1007 if (ret) 1008 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1009 dev->data->port_id, strerror(rte_errno)); 1010 else if (priv->primary_socket) { 1011 priv->intr_handle_socket.fd = priv->primary_socket; 1012 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1013 rte_intr_callback_register(&priv->intr_handle_socket, 1014 mlx5_dev_handler_socket, dev); 1015 } 1016 } 1017 1018 /** 1019 * DPDK callback to bring the link DOWN. 1020 * 1021 * @param dev 1022 * Pointer to Ethernet device structure. 1023 * 1024 * @return 1025 * 0 on success, a negative errno value otherwise and rte_errno is set. 1026 */ 1027 int 1028 mlx5_set_link_down(struct rte_eth_dev *dev) 1029 { 1030 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1031 } 1032 1033 /** 1034 * DPDK callback to bring the link UP. 1035 * 1036 * @param dev 1037 * Pointer to Ethernet device structure. 1038 * 1039 * @return 1040 * 0 on success, a negative errno value otherwise and rte_errno is set. 1041 */ 1042 int 1043 mlx5_set_link_up(struct rte_eth_dev *dev) 1044 { 1045 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1046 } 1047 1048 /** 1049 * Configure the TX function to use. 1050 * 1051 * @param dev 1052 * Pointer to private data structure. 1053 * 1054 * @return 1055 * Pointer to selected Tx burst function. 1056 */ 1057 eth_tx_burst_t 1058 mlx5_select_tx_function(struct rte_eth_dev *dev) 1059 { 1060 struct priv *priv = dev->data->dev_private; 1061 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1062 struct mlx5_dev_config *config = &priv->config; 1063 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1064 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1065 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1066 DEV_TX_OFFLOAD_GRE_TNL_TSO)); 1067 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1068 1069 assert(priv != NULL); 1070 /* Select appropriate TX function. */ 1071 if (vlan_insert || tso) 1072 return tx_pkt_burst; 1073 if (config->mps == MLX5_MPW_ENHANCED) { 1074 if (mlx5_check_vec_tx_support(dev) > 0) { 1075 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1076 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1077 else 1078 tx_pkt_burst = mlx5_tx_burst_vec; 1079 DRV_LOG(DEBUG, 1080 "port %u selected enhanced MPW Tx vectorized" 1081 " function", 1082 dev->data->port_id); 1083 } else { 1084 tx_pkt_burst = mlx5_tx_burst_empw; 1085 DRV_LOG(DEBUG, 1086 "port %u selected enhanced MPW Tx function", 1087 dev->data->port_id); 1088 } 1089 } else if (config->mps && (config->txq_inline > 0)) { 1090 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1091 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1092 dev->data->port_id); 1093 } else if (config->mps) { 1094 tx_pkt_burst = mlx5_tx_burst_mpw; 1095 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1096 dev->data->port_id); 1097 } 1098 return tx_pkt_burst; 1099 } 1100 1101 /** 1102 * Configure the RX function to use. 1103 * 1104 * @param dev 1105 * Pointer to private data structure. 1106 * 1107 * @return 1108 * Pointer to selected Rx burst function. 1109 */ 1110 eth_rx_burst_t 1111 mlx5_select_rx_function(struct rte_eth_dev *dev) 1112 { 1113 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1114 1115 assert(dev != NULL); 1116 if (mlx5_check_vec_rx_support(dev) > 0) { 1117 rx_pkt_burst = mlx5_rx_burst_vec; 1118 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1119 dev->data->port_id); 1120 } 1121 return rx_pkt_burst; 1122 } 1123 1124 /** 1125 * Check if mlx5 device was removed. 1126 * 1127 * @param dev 1128 * Pointer to Ethernet device structure. 1129 * 1130 * @return 1131 * 1 when device is removed, otherwise 0. 1132 */ 1133 int 1134 mlx5_is_removed(struct rte_eth_dev *dev) 1135 { 1136 struct ibv_device_attr device_attr; 1137 struct priv *priv = dev->data->dev_private; 1138 1139 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1140 return 1; 1141 return 0; 1142 } 1143