1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox. 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 #include <time.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_malloc.h> 36 37 #include "mlx5.h" 38 #include "mlx5_glue.h" 39 #include "mlx5_rxtx.h" 40 #include "mlx5_utils.h" 41 42 /* Add defines in case the running kernel is not the same as user headers. */ 43 #ifndef ETHTOOL_GLINKSETTINGS 44 struct ethtool_link_settings { 45 uint32_t cmd; 46 uint32_t speed; 47 uint8_t duplex; 48 uint8_t port; 49 uint8_t phy_address; 50 uint8_t autoneg; 51 uint8_t mdio_support; 52 uint8_t eth_to_mdix; 53 uint8_t eth_tp_mdix_ctrl; 54 int8_t link_mode_masks_nwords; 55 uint32_t reserved[8]; 56 uint32_t link_mode_masks[]; 57 }; 58 59 #define ETHTOOL_GLINKSETTINGS 0x0000004c 60 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 61 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 62 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 63 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 64 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 65 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 66 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 67 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 68 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 69 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 70 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 71 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 72 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 73 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 74 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 75 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 76 #endif 77 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 78 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 79 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 80 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 81 #endif 82 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 83 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 84 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 85 #endif 86 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 87 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 88 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 89 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 90 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 91 #endif 92 93 /** 94 * Get interface name from private structure. 95 * 96 * @param[in] dev 97 * Pointer to Ethernet device. 98 * @param[out] ifname 99 * Interface name output buffer. 100 * 101 * @return 102 * 0 on success, a negative errno value otherwise and rte_errno is set. 103 */ 104 int 105 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 106 { 107 struct priv *priv = dev->data->dev_private; 108 DIR *dir; 109 struct dirent *dent; 110 unsigned int dev_type = 0; 111 unsigned int dev_port_prev = ~0u; 112 char match[IF_NAMESIZE] = ""; 113 114 { 115 MKSTR(path, "%s/device/net", priv->ibdev_path); 116 117 dir = opendir(path); 118 if (dir == NULL) { 119 rte_errno = errno; 120 return -rte_errno; 121 } 122 } 123 while ((dent = readdir(dir)) != NULL) { 124 char *name = dent->d_name; 125 FILE *file; 126 unsigned int dev_port; 127 int r; 128 129 if ((name[0] == '.') && 130 ((name[1] == '\0') || 131 ((name[1] == '.') && (name[2] == '\0')))) 132 continue; 133 134 MKSTR(path, "%s/device/net/%s/%s", 135 priv->ibdev_path, name, 136 (dev_type ? "dev_id" : "dev_port")); 137 138 file = fopen(path, "rb"); 139 if (file == NULL) { 140 if (errno != ENOENT) 141 continue; 142 /* 143 * Switch to dev_id when dev_port does not exist as 144 * is the case with Linux kernel versions < 3.15. 145 */ 146 try_dev_id: 147 match[0] = '\0'; 148 if (dev_type) 149 break; 150 dev_type = 1; 151 dev_port_prev = ~0u; 152 rewinddir(dir); 153 continue; 154 } 155 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 156 fclose(file); 157 if (r != 1) 158 continue; 159 /* 160 * Switch to dev_id when dev_port returns the same value for 161 * all ports. May happen when using a MOFED release older than 162 * 3.0 with a Linux kernel >= 3.15. 163 */ 164 if (dev_port == dev_port_prev) 165 goto try_dev_id; 166 dev_port_prev = dev_port; 167 if (dev_port == (priv->port - 1u)) 168 snprintf(match, sizeof(match), "%s", name); 169 } 170 closedir(dir); 171 if (match[0] == '\0') { 172 rte_errno = ENOENT; 173 return -rte_errno; 174 } 175 strncpy(*ifname, match, sizeof(*ifname)); 176 return 0; 177 } 178 179 /** 180 * Perform ifreq ioctl() on associated Ethernet device. 181 * 182 * @param[in] dev 183 * Pointer to Ethernet device. 184 * @param req 185 * Request number to pass to ioctl(). 186 * @param[out] ifr 187 * Interface request structure output buffer. 188 * 189 * @return 190 * 0 on success, a negative errno value otherwise and rte_errno is set. 191 */ 192 int 193 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 194 { 195 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 196 int ret = 0; 197 198 if (sock == -1) { 199 rte_errno = errno; 200 return -rte_errno; 201 } 202 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 203 if (ret) 204 goto error; 205 ret = ioctl(sock, req, ifr); 206 if (ret == -1) { 207 rte_errno = errno; 208 goto error; 209 } 210 close(sock); 211 return 0; 212 error: 213 close(sock); 214 return -rte_errno; 215 } 216 217 /** 218 * Get device MTU. 219 * 220 * @param dev 221 * Pointer to Ethernet device. 222 * @param[out] mtu 223 * MTU value output buffer. 224 * 225 * @return 226 * 0 on success, a negative errno value otherwise and rte_errno is set. 227 */ 228 int 229 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 230 { 231 struct ifreq request; 232 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 233 234 if (ret) 235 return ret; 236 *mtu = request.ifr_mtu; 237 return 0; 238 } 239 240 /** 241 * Set device MTU. 242 * 243 * @param dev 244 * Pointer to Ethernet device. 245 * @param mtu 246 * MTU value to set. 247 * 248 * @return 249 * 0 on success, a negative errno value otherwise and rte_errno is set. 250 */ 251 static int 252 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 253 { 254 struct ifreq request = { .ifr_mtu = mtu, }; 255 256 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 257 } 258 259 /** 260 * Set device flags. 261 * 262 * @param dev 263 * Pointer to Ethernet device. 264 * @param keep 265 * Bitmask for flags that must remain untouched. 266 * @param flags 267 * Bitmask for flags to modify. 268 * 269 * @return 270 * 0 on success, a negative errno value otherwise and rte_errno is set. 271 */ 272 int 273 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 274 { 275 struct ifreq request; 276 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 277 278 if (ret) 279 return ret; 280 request.ifr_flags &= keep; 281 request.ifr_flags |= flags & ~keep; 282 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 283 } 284 285 /** 286 * DPDK callback for Ethernet device configuration. 287 * 288 * @param dev 289 * Pointer to Ethernet device structure. 290 * 291 * @return 292 * 0 on success, a negative errno value otherwise and rte_errno is set. 293 */ 294 int 295 mlx5_dev_configure(struct rte_eth_dev *dev) 296 { 297 struct priv *priv = dev->data->dev_private; 298 unsigned int rxqs_n = dev->data->nb_rx_queues; 299 unsigned int txqs_n = dev->data->nb_tx_queues; 300 unsigned int i; 301 unsigned int j; 302 unsigned int reta_idx_n; 303 const uint8_t use_app_rss_key = 304 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 305 uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev); 306 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 307 uint64_t supp_rx_offloads = 308 (mlx5_get_rx_port_offloads() | 309 mlx5_get_rx_queue_offloads(dev)); 310 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; 311 int ret = 0; 312 313 if ((tx_offloads & supp_tx_offloads) != tx_offloads) { 314 DRV_LOG(ERR, 315 "port %u some Tx offloads are not supported requested" 316 " 0x%" PRIx64 " supported 0x%" PRIx64, 317 dev->data->port_id, tx_offloads, supp_tx_offloads); 318 rte_errno = ENOTSUP; 319 return -rte_errno; 320 } 321 if ((rx_offloads & supp_rx_offloads) != rx_offloads) { 322 DRV_LOG(ERR, 323 "port %u some Rx offloads are not supported requested" 324 " 0x%" PRIx64 " supported 0x%" PRIx64, 325 dev->data->port_id, rx_offloads, supp_rx_offloads); 326 rte_errno = ENOTSUP; 327 return -rte_errno; 328 } 329 if (use_app_rss_key && 330 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 331 rss_hash_default_key_len)) { 332 /* MLX5 RSS only support 40bytes key. */ 333 rte_errno = EINVAL; 334 return -rte_errno; 335 } 336 priv->rss_conf.rss_key = 337 rte_realloc(priv->rss_conf.rss_key, 338 rss_hash_default_key_len, 0); 339 if (!priv->rss_conf.rss_key) { 340 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 341 dev->data->port_id, rxqs_n); 342 rte_errno = ENOMEM; 343 return -rte_errno; 344 } 345 memcpy(priv->rss_conf.rss_key, 346 use_app_rss_key ? 347 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 348 rss_hash_default_key, 349 rss_hash_default_key_len); 350 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 351 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 352 priv->rxqs = (void *)dev->data->rx_queues; 353 priv->txqs = (void *)dev->data->tx_queues; 354 if (txqs_n != priv->txqs_n) { 355 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 356 dev->data->port_id, priv->txqs_n, txqs_n); 357 priv->txqs_n = txqs_n; 358 } 359 if (rxqs_n > priv->config.ind_table_max_size) { 360 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 361 dev->data->port_id, rxqs_n); 362 rte_errno = EINVAL; 363 return -rte_errno; 364 } 365 if (rxqs_n == priv->rxqs_n) 366 return 0; 367 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 368 dev->data->port_id, priv->rxqs_n, rxqs_n); 369 priv->rxqs_n = rxqs_n; 370 /* If the requested number of RX queues is not a power of two, use the 371 * maximum indirection table size for better balancing. 372 * The result is always rounded to the next power of two. */ 373 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 374 priv->config.ind_table_max_size : 375 rxqs_n)); 376 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 377 if (ret) 378 return ret; 379 /* When the number of RX queues is not a power of two, the remaining 380 * table entries are padded with reused WQs and hashes are not spread 381 * uniformly. */ 382 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 383 (*priv->reta_idx)[i] = j; 384 if (++j == rxqs_n) 385 j = 0; 386 } 387 return 0; 388 } 389 390 /** 391 * DPDK callback to get information about the device. 392 * 393 * @param dev 394 * Pointer to Ethernet device structure. 395 * @param[out] info 396 * Info structure output buffer. 397 */ 398 void 399 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 400 { 401 struct priv *priv = dev->data->dev_private; 402 struct mlx5_dev_config *config = &priv->config; 403 unsigned int max; 404 char ifname[IF_NAMESIZE]; 405 406 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 407 /* FIXME: we should ask the device for these values. */ 408 info->min_rx_bufsize = 32; 409 info->max_rx_pktlen = 65536; 410 /* 411 * Since we need one CQ per QP, the limit is the minimum number 412 * between the two values. 413 */ 414 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 415 priv->device_attr.orig_attr.max_qp); 416 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 417 if (max >= 65535) 418 max = 65535; 419 info->max_rx_queues = max; 420 info->max_tx_queues = max; 421 info->max_mac_addrs = RTE_DIM(priv->mac); 422 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 423 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 424 info->rx_queue_offload_capa); 425 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 426 if (mlx5_get_ifname(dev, &ifname) == 0) 427 info->if_index = if_nametoindex(ifname); 428 info->reta_size = priv->reta_idx_n ? 429 priv->reta_idx_n : config->ind_table_max_size; 430 info->hash_key_size = priv->rss_conf.rss_key_len; 431 info->speed_capa = priv->link_speed_capa; 432 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 433 } 434 435 /** 436 * Get supported packet types. 437 * 438 * @param dev 439 * Pointer to Ethernet device structure. 440 * 441 * @return 442 * A pointer to the supported Packet types array. 443 */ 444 const uint32_t * 445 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 446 { 447 static const uint32_t ptypes[] = { 448 /* refers to rxq_cq_to_pkt_type() */ 449 RTE_PTYPE_L2_ETHER, 450 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 451 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 452 RTE_PTYPE_L4_NONFRAG, 453 RTE_PTYPE_L4_FRAG, 454 RTE_PTYPE_L4_TCP, 455 RTE_PTYPE_L4_UDP, 456 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 457 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 458 RTE_PTYPE_INNER_L4_NONFRAG, 459 RTE_PTYPE_INNER_L4_FRAG, 460 RTE_PTYPE_INNER_L4_TCP, 461 RTE_PTYPE_INNER_L4_UDP, 462 RTE_PTYPE_UNKNOWN 463 }; 464 465 if (dev->rx_pkt_burst == mlx5_rx_burst || 466 dev->rx_pkt_burst == mlx5_rx_burst_vec) 467 return ptypes; 468 return NULL; 469 } 470 471 /** 472 * DPDK callback to retrieve physical link information. 473 * 474 * @param dev 475 * Pointer to Ethernet device structure. 476 * @param[out] link 477 * Storage for current link status. 478 * 479 * @return 480 * 0 on success, a negative errno value otherwise and rte_errno is set. 481 */ 482 static int 483 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 484 struct rte_eth_link *link) 485 { 486 struct priv *priv = dev->data->dev_private; 487 struct ethtool_cmd edata = { 488 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 489 }; 490 struct ifreq ifr; 491 struct rte_eth_link dev_link; 492 int link_speed = 0; 493 int ret; 494 495 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 496 if (ret) { 497 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 498 dev->data->port_id, strerror(rte_errno)); 499 return ret; 500 } 501 memset(&dev_link, 0, sizeof(dev_link)); 502 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 503 (ifr.ifr_flags & IFF_RUNNING)); 504 ifr.ifr_data = (void *)&edata; 505 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 506 if (ret) { 507 DRV_LOG(WARNING, 508 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 509 dev->data->port_id, strerror(rte_errno)); 510 return ret; 511 } 512 link_speed = ethtool_cmd_speed(&edata); 513 if (link_speed == -1) 514 dev_link.link_speed = 0; 515 else 516 dev_link.link_speed = link_speed; 517 priv->link_speed_capa = 0; 518 if (edata.supported & SUPPORTED_Autoneg) 519 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 520 if (edata.supported & (SUPPORTED_1000baseT_Full | 521 SUPPORTED_1000baseKX_Full)) 522 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 523 if (edata.supported & SUPPORTED_10000baseKR_Full) 524 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 525 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 526 SUPPORTED_40000baseCR4_Full | 527 SUPPORTED_40000baseSR4_Full | 528 SUPPORTED_40000baseLR4_Full)) 529 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 530 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 531 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 532 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 533 ETH_LINK_SPEED_FIXED); 534 if ((dev_link.link_speed && !dev_link.link_status) || 535 (!dev_link.link_speed && dev_link.link_status)) { 536 rte_errno = EAGAIN; 537 return -rte_errno; 538 } 539 *link = dev_link; 540 return 0; 541 } 542 543 /** 544 * Retrieve physical link information (unlocked version using new ioctl). 545 * 546 * @param dev 547 * Pointer to Ethernet device structure. 548 * @param[out] link 549 * Storage for current link status. 550 * 551 * @return 552 * 0 on success, a negative errno value otherwise and rte_errno is set. 553 */ 554 static int 555 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 556 struct rte_eth_link *link) 557 558 { 559 struct priv *priv = dev->data->dev_private; 560 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 561 struct ifreq ifr; 562 struct rte_eth_link dev_link; 563 uint64_t sc; 564 int ret; 565 566 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 567 if (ret) { 568 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 569 dev->data->port_id, strerror(rte_errno)); 570 return ret; 571 } 572 memset(&dev_link, 0, sizeof(dev_link)); 573 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 574 (ifr.ifr_flags & IFF_RUNNING)); 575 ifr.ifr_data = (void *)&gcmd; 576 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 577 if (ret) { 578 DRV_LOG(DEBUG, 579 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 580 " failed: %s", 581 dev->data->port_id, strerror(rte_errno)); 582 return ret; 583 } 584 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 585 586 alignas(struct ethtool_link_settings) 587 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 588 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 589 struct ethtool_link_settings *ecmd = (void *)data; 590 591 *ecmd = gcmd; 592 ifr.ifr_data = (void *)ecmd; 593 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 594 if (ret) { 595 DRV_LOG(DEBUG, 596 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 597 " failed: %s", 598 dev->data->port_id, strerror(rte_errno)); 599 return ret; 600 } 601 dev_link.link_speed = ecmd->speed; 602 sc = ecmd->link_mode_masks[0] | 603 ((uint64_t)ecmd->link_mode_masks[1] << 32); 604 priv->link_speed_capa = 0; 605 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 606 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 607 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 608 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 609 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 610 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 611 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 612 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 613 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 614 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 615 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 616 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 617 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 618 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 619 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 620 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 621 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 622 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 623 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 624 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 625 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 626 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 627 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 628 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 629 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 630 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 631 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 632 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 633 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 634 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 635 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 636 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 637 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 638 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 639 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 640 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 641 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 642 ETH_LINK_SPEED_FIXED); 643 if ((dev_link.link_speed && !dev_link.link_status) || 644 (!dev_link.link_speed && dev_link.link_status)) { 645 rte_errno = EAGAIN; 646 return -rte_errno; 647 } 648 *link = dev_link; 649 return 0; 650 } 651 652 /** 653 * DPDK callback to retrieve physical link information. 654 * 655 * @param dev 656 * Pointer to Ethernet device structure. 657 * @param wait_to_complete 658 * Wait for request completion. 659 * 660 * @return 661 * 0 if link status was not updated, positive if it was, a negative errno 662 * value otherwise and rte_errno is set. 663 */ 664 int 665 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 666 { 667 int ret; 668 struct rte_eth_link dev_link; 669 time_t start_time = time(NULL); 670 671 do { 672 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 673 if (ret) 674 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 675 if (ret == 0) 676 break; 677 /* Handle wait to complete situation. */ 678 if (wait_to_complete && ret == -EAGAIN) { 679 if (abs((int)difftime(time(NULL), start_time)) < 680 MLX5_LINK_STATUS_TIMEOUT) { 681 usleep(0); 682 continue; 683 } else { 684 rte_errno = EBUSY; 685 return -rte_errno; 686 } 687 } else if (ret < 0) { 688 return ret; 689 } 690 } while (wait_to_complete); 691 ret = !!memcmp(&dev->data->dev_link, &dev_link, 692 sizeof(struct rte_eth_link)); 693 dev->data->dev_link = dev_link; 694 return ret; 695 } 696 697 /** 698 * DPDK callback to change the MTU. 699 * 700 * @param dev 701 * Pointer to Ethernet device structure. 702 * @param in_mtu 703 * New MTU. 704 * 705 * @return 706 * 0 on success, a negative errno value otherwise and rte_errno is set. 707 */ 708 int 709 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 710 { 711 struct priv *priv = dev->data->dev_private; 712 uint16_t kern_mtu = 0; 713 int ret; 714 715 ret = mlx5_get_mtu(dev, &kern_mtu); 716 if (ret) 717 return ret; 718 /* Set kernel interface MTU first. */ 719 ret = mlx5_set_mtu(dev, mtu); 720 if (ret) 721 return ret; 722 ret = mlx5_get_mtu(dev, &kern_mtu); 723 if (ret) 724 return ret; 725 if (kern_mtu == mtu) { 726 priv->mtu = mtu; 727 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 728 dev->data->port_id, mtu); 729 return 0; 730 } 731 rte_errno = EAGAIN; 732 return -rte_errno; 733 } 734 735 /** 736 * DPDK callback to get flow control status. 737 * 738 * @param dev 739 * Pointer to Ethernet device structure. 740 * @param[out] fc_conf 741 * Flow control output buffer. 742 * 743 * @return 744 * 0 on success, a negative errno value otherwise and rte_errno is set. 745 */ 746 int 747 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 748 { 749 struct ifreq ifr; 750 struct ethtool_pauseparam ethpause = { 751 .cmd = ETHTOOL_GPAUSEPARAM 752 }; 753 int ret; 754 755 ifr.ifr_data = (void *)ðpause; 756 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 757 if (ret) { 758 DRV_LOG(WARNING, 759 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 760 " %s", 761 dev->data->port_id, strerror(rte_errno)); 762 return ret; 763 } 764 fc_conf->autoneg = ethpause.autoneg; 765 if (ethpause.rx_pause && ethpause.tx_pause) 766 fc_conf->mode = RTE_FC_FULL; 767 else if (ethpause.rx_pause) 768 fc_conf->mode = RTE_FC_RX_PAUSE; 769 else if (ethpause.tx_pause) 770 fc_conf->mode = RTE_FC_TX_PAUSE; 771 else 772 fc_conf->mode = RTE_FC_NONE; 773 return 0; 774 } 775 776 /** 777 * DPDK callback to modify flow control parameters. 778 * 779 * @param dev 780 * Pointer to Ethernet device structure. 781 * @param[in] fc_conf 782 * Flow control parameters. 783 * 784 * @return 785 * 0 on success, a negative errno value otherwise and rte_errno is set. 786 */ 787 int 788 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 789 { 790 struct ifreq ifr; 791 struct ethtool_pauseparam ethpause = { 792 .cmd = ETHTOOL_SPAUSEPARAM 793 }; 794 int ret; 795 796 ifr.ifr_data = (void *)ðpause; 797 ethpause.autoneg = fc_conf->autoneg; 798 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 799 (fc_conf->mode & RTE_FC_RX_PAUSE)) 800 ethpause.rx_pause = 1; 801 else 802 ethpause.rx_pause = 0; 803 804 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 805 (fc_conf->mode & RTE_FC_TX_PAUSE)) 806 ethpause.tx_pause = 1; 807 else 808 ethpause.tx_pause = 0; 809 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 810 if (ret) { 811 DRV_LOG(WARNING, 812 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 813 " failed: %s", 814 dev->data->port_id, strerror(rte_errno)); 815 return ret; 816 } 817 return 0; 818 } 819 820 /** 821 * Get PCI information from struct ibv_device. 822 * 823 * @param device 824 * Pointer to Ethernet device structure. 825 * @param[out] pci_addr 826 * PCI bus address output buffer. 827 * 828 * @return 829 * 0 on success, a negative errno value otherwise and rte_errno is set. 830 */ 831 int 832 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 833 struct rte_pci_addr *pci_addr) 834 { 835 FILE *file; 836 char line[32]; 837 MKSTR(path, "%s/device/uevent", device->ibdev_path); 838 839 file = fopen(path, "rb"); 840 if (file == NULL) { 841 rte_errno = errno; 842 return -rte_errno; 843 } 844 while (fgets(line, sizeof(line), file) == line) { 845 size_t len = strlen(line); 846 int ret; 847 848 /* Truncate long lines. */ 849 if (len == (sizeof(line) - 1)) 850 while (line[(len - 1)] != '\n') { 851 ret = fgetc(file); 852 if (ret == EOF) 853 break; 854 line[(len - 1)] = ret; 855 } 856 /* Extract information. */ 857 if (sscanf(line, 858 "PCI_SLOT_NAME=" 859 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 860 &pci_addr->domain, 861 &pci_addr->bus, 862 &pci_addr->devid, 863 &pci_addr->function) == 4) { 864 ret = 0; 865 break; 866 } 867 } 868 fclose(file); 869 return 0; 870 } 871 872 /** 873 * Device status handler. 874 * 875 * @param dev 876 * Pointer to Ethernet device. 877 * @param events 878 * Pointer to event flags holder. 879 * 880 * @return 881 * Events bitmap of callback process which can be called immediately. 882 */ 883 static uint32_t 884 mlx5_dev_status_handler(struct rte_eth_dev *dev) 885 { 886 struct priv *priv = dev->data->dev_private; 887 struct ibv_async_event event; 888 uint32_t ret = 0; 889 890 if (mlx5_link_update(dev, 0) == -EAGAIN) { 891 usleep(0); 892 return 0; 893 } 894 /* Read all message and acknowledge them. */ 895 for (;;) { 896 if (mlx5_glue->get_async_event(priv->ctx, &event)) 897 break; 898 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 899 event.event_type == IBV_EVENT_PORT_ERR) && 900 (dev->data->dev_conf.intr_conf.lsc == 1)) 901 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 902 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 903 dev->data->dev_conf.intr_conf.rmv == 1) 904 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 905 else 906 DRV_LOG(DEBUG, 907 "port %u event type %d on not handled", 908 dev->data->port_id, event.event_type); 909 mlx5_glue->ack_async_event(&event); 910 } 911 return ret; 912 } 913 914 /** 915 * Handle interrupts from the NIC. 916 * 917 * @param[in] intr_handle 918 * Interrupt handler. 919 * @param cb_arg 920 * Callback argument. 921 */ 922 void 923 mlx5_dev_interrupt_handler(void *cb_arg) 924 { 925 struct rte_eth_dev *dev = cb_arg; 926 uint32_t events; 927 928 events = mlx5_dev_status_handler(dev); 929 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 930 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 931 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 932 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 933 } 934 935 /** 936 * Handle interrupts from the socket. 937 * 938 * @param cb_arg 939 * Callback argument. 940 */ 941 static void 942 mlx5_dev_handler_socket(void *cb_arg) 943 { 944 struct rte_eth_dev *dev = cb_arg; 945 946 mlx5_socket_handle(dev); 947 } 948 949 /** 950 * Uninstall interrupt handler. 951 * 952 * @param dev 953 * Pointer to Ethernet device. 954 */ 955 void 956 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 957 { 958 struct priv *priv = dev->data->dev_private; 959 960 if (dev->data->dev_conf.intr_conf.lsc || 961 dev->data->dev_conf.intr_conf.rmv) 962 rte_intr_callback_unregister(&priv->intr_handle, 963 mlx5_dev_interrupt_handler, dev); 964 if (priv->primary_socket) 965 rte_intr_callback_unregister(&priv->intr_handle_socket, 966 mlx5_dev_handler_socket, dev); 967 priv->intr_handle.fd = 0; 968 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 969 priv->intr_handle_socket.fd = 0; 970 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 971 } 972 973 /** 974 * Install interrupt handler. 975 * 976 * @param dev 977 * Pointer to Ethernet device. 978 */ 979 void 980 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 981 { 982 struct priv *priv = dev->data->dev_private; 983 int ret; 984 int flags; 985 986 assert(priv->ctx->async_fd > 0); 987 flags = fcntl(priv->ctx->async_fd, F_GETFL); 988 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 989 if (ret) { 990 DRV_LOG(INFO, 991 "port %u failed to change file descriptor async event" 992 " queue", 993 dev->data->port_id); 994 dev->data->dev_conf.intr_conf.lsc = 0; 995 dev->data->dev_conf.intr_conf.rmv = 0; 996 } 997 if (dev->data->dev_conf.intr_conf.lsc || 998 dev->data->dev_conf.intr_conf.rmv) { 999 priv->intr_handle.fd = priv->ctx->async_fd; 1000 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1001 rte_intr_callback_register(&priv->intr_handle, 1002 mlx5_dev_interrupt_handler, dev); 1003 } 1004 ret = mlx5_socket_init(dev); 1005 if (ret) 1006 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1007 dev->data->port_id, strerror(rte_errno)); 1008 else if (priv->primary_socket) { 1009 priv->intr_handle_socket.fd = priv->primary_socket; 1010 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1011 rte_intr_callback_register(&priv->intr_handle_socket, 1012 mlx5_dev_handler_socket, dev); 1013 } 1014 } 1015 1016 /** 1017 * DPDK callback to bring the link DOWN. 1018 * 1019 * @param dev 1020 * Pointer to Ethernet device structure. 1021 * 1022 * @return 1023 * 0 on success, a negative errno value otherwise and rte_errno is set. 1024 */ 1025 int 1026 mlx5_set_link_down(struct rte_eth_dev *dev) 1027 { 1028 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1029 } 1030 1031 /** 1032 * DPDK callback to bring the link UP. 1033 * 1034 * @param dev 1035 * Pointer to Ethernet device structure. 1036 * 1037 * @return 1038 * 0 on success, a negative errno value otherwise and rte_errno is set. 1039 */ 1040 int 1041 mlx5_set_link_up(struct rte_eth_dev *dev) 1042 { 1043 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1044 } 1045 1046 /** 1047 * Configure the TX function to use. 1048 * 1049 * @param dev 1050 * Pointer to private data structure. 1051 * 1052 * @return 1053 * Pointer to selected Tx burst function. 1054 */ 1055 eth_tx_burst_t 1056 mlx5_select_tx_function(struct rte_eth_dev *dev) 1057 { 1058 struct priv *priv = dev->data->dev_private; 1059 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1060 struct mlx5_dev_config *config = &priv->config; 1061 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1062 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1063 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1064 DEV_TX_OFFLOAD_GRE_TNL_TSO)); 1065 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1066 1067 assert(priv != NULL); 1068 /* Select appropriate TX function. */ 1069 if (vlan_insert || tso) 1070 return tx_pkt_burst; 1071 if (config->mps == MLX5_MPW_ENHANCED) { 1072 if (mlx5_check_vec_tx_support(dev) > 0) { 1073 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1074 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1075 else 1076 tx_pkt_burst = mlx5_tx_burst_vec; 1077 DRV_LOG(DEBUG, 1078 "port %u selected enhanced MPW Tx vectorized" 1079 " function", 1080 dev->data->port_id); 1081 } else { 1082 tx_pkt_burst = mlx5_tx_burst_empw; 1083 DRV_LOG(DEBUG, 1084 "port %u selected enhanced MPW Tx function", 1085 dev->data->port_id); 1086 } 1087 } else if (config->mps && (config->txq_inline > 0)) { 1088 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1089 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1090 dev->data->port_id); 1091 } else if (config->mps) { 1092 tx_pkt_burst = mlx5_tx_burst_mpw; 1093 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1094 dev->data->port_id); 1095 } 1096 return tx_pkt_burst; 1097 } 1098 1099 /** 1100 * Configure the RX function to use. 1101 * 1102 * @param dev 1103 * Pointer to private data structure. 1104 * 1105 * @return 1106 * Pointer to selected Rx burst function. 1107 */ 1108 eth_rx_burst_t 1109 mlx5_select_rx_function(struct rte_eth_dev *dev) 1110 { 1111 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1112 1113 assert(dev != NULL); 1114 if (mlx5_check_vec_rx_support(dev) > 0) { 1115 rx_pkt_burst = mlx5_rx_burst_vec; 1116 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1117 dev->data->port_id); 1118 } 1119 return rx_pkt_burst; 1120 } 1121 1122 /** 1123 * Check if mlx5 device was removed. 1124 * 1125 * @param dev 1126 * Pointer to Ethernet device structure. 1127 * 1128 * @return 1129 * 1 when device is removed, otherwise 0. 1130 */ 1131 int 1132 mlx5_is_removed(struct rte_eth_dev *dev) 1133 { 1134 struct ibv_device_attr device_attr; 1135 struct priv *priv = dev->data->dev_private; 1136 1137 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1138 return 1; 1139 return 0; 1140 } 1141