1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox. 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 #include <time.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_malloc.h> 36 37 #include "mlx5.h" 38 #include "mlx5_glue.h" 39 #include "mlx5_rxtx.h" 40 #include "mlx5_utils.h" 41 42 /* Add defines in case the running kernel is not the same as user headers. */ 43 #ifndef ETHTOOL_GLINKSETTINGS 44 struct ethtool_link_settings { 45 uint32_t cmd; 46 uint32_t speed; 47 uint8_t duplex; 48 uint8_t port; 49 uint8_t phy_address; 50 uint8_t autoneg; 51 uint8_t mdio_support; 52 uint8_t eth_to_mdix; 53 uint8_t eth_tp_mdix_ctrl; 54 int8_t link_mode_masks_nwords; 55 uint32_t reserved[8]; 56 uint32_t link_mode_masks[]; 57 }; 58 59 #define ETHTOOL_GLINKSETTINGS 0x0000004c 60 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 61 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 62 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 63 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 64 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 65 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 66 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 67 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 68 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 69 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 70 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 71 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 72 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 73 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 74 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 75 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 76 #endif 77 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 78 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 79 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 80 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 81 #endif 82 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 83 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 84 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 85 #endif 86 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 87 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 88 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 89 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 90 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 91 #endif 92 93 /** 94 * Get interface name from private structure. 95 * 96 * @param[in] dev 97 * Pointer to Ethernet device. 98 * @param[out] ifname 99 * Interface name output buffer. 100 * 101 * @return 102 * 0 on success, a negative errno value otherwise and rte_errno is set. 103 */ 104 int 105 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 106 { 107 struct priv *priv = dev->data->dev_private; 108 DIR *dir; 109 struct dirent *dent; 110 unsigned int dev_type = 0; 111 unsigned int dev_port_prev = ~0u; 112 char match[IF_NAMESIZE] = ""; 113 114 { 115 MKSTR(path, "%s/device/net", priv->ibdev_path); 116 117 dir = opendir(path); 118 if (dir == NULL) { 119 rte_errno = errno; 120 return -rte_errno; 121 } 122 } 123 while ((dent = readdir(dir)) != NULL) { 124 char *name = dent->d_name; 125 FILE *file; 126 unsigned int dev_port; 127 int r; 128 129 if ((name[0] == '.') && 130 ((name[1] == '\0') || 131 ((name[1] == '.') && (name[2] == '\0')))) 132 continue; 133 134 MKSTR(path, "%s/device/net/%s/%s", 135 priv->ibdev_path, name, 136 (dev_type ? "dev_id" : "dev_port")); 137 138 file = fopen(path, "rb"); 139 if (file == NULL) { 140 if (errno != ENOENT) 141 continue; 142 /* 143 * Switch to dev_id when dev_port does not exist as 144 * is the case with Linux kernel versions < 3.15. 145 */ 146 try_dev_id: 147 match[0] = '\0'; 148 if (dev_type) 149 break; 150 dev_type = 1; 151 dev_port_prev = ~0u; 152 rewinddir(dir); 153 continue; 154 } 155 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 156 fclose(file); 157 if (r != 1) 158 continue; 159 /* 160 * Switch to dev_id when dev_port returns the same value for 161 * all ports. May happen when using a MOFED release older than 162 * 3.0 with a Linux kernel >= 3.15. 163 */ 164 if (dev_port == dev_port_prev) 165 goto try_dev_id; 166 dev_port_prev = dev_port; 167 if (dev_port == (priv->port - 1u)) 168 snprintf(match, sizeof(match), "%s", name); 169 } 170 closedir(dir); 171 if (match[0] == '\0') { 172 rte_errno = ENOENT; 173 return -rte_errno; 174 } 175 strncpy(*ifname, match, sizeof(*ifname)); 176 return 0; 177 } 178 179 /** 180 * Perform ifreq ioctl() on associated Ethernet device. 181 * 182 * @param[in] dev 183 * Pointer to Ethernet device. 184 * @param req 185 * Request number to pass to ioctl(). 186 * @param[out] ifr 187 * Interface request structure output buffer. 188 * 189 * @return 190 * 0 on success, a negative errno value otherwise and rte_errno is set. 191 */ 192 int 193 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 194 { 195 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 196 int ret = 0; 197 198 if (sock == -1) { 199 rte_errno = errno; 200 return -rte_errno; 201 } 202 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 203 if (ret) 204 goto error; 205 ret = ioctl(sock, req, ifr); 206 if (ret == -1) { 207 rte_errno = errno; 208 goto error; 209 } 210 close(sock); 211 return 0; 212 error: 213 close(sock); 214 return -rte_errno; 215 } 216 217 /** 218 * Get device MTU. 219 * 220 * @param dev 221 * Pointer to Ethernet device. 222 * @param[out] mtu 223 * MTU value output buffer. 224 * 225 * @return 226 * 0 on success, a negative errno value otherwise and rte_errno is set. 227 */ 228 int 229 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 230 { 231 struct ifreq request; 232 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 233 234 if (ret) 235 return ret; 236 *mtu = request.ifr_mtu; 237 return 0; 238 } 239 240 /** 241 * Set device MTU. 242 * 243 * @param dev 244 * Pointer to Ethernet device. 245 * @param mtu 246 * MTU value to set. 247 * 248 * @return 249 * 0 on success, a negative errno value otherwise and rte_errno is set. 250 */ 251 static int 252 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 253 { 254 struct ifreq request = { .ifr_mtu = mtu, }; 255 256 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 257 } 258 259 /** 260 * Set device flags. 261 * 262 * @param dev 263 * Pointer to Ethernet device. 264 * @param keep 265 * Bitmask for flags that must remain untouched. 266 * @param flags 267 * Bitmask for flags to modify. 268 * 269 * @return 270 * 0 on success, a negative errno value otherwise and rte_errno is set. 271 */ 272 int 273 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 274 { 275 struct ifreq request; 276 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 277 278 if (ret) 279 return ret; 280 request.ifr_flags &= keep; 281 request.ifr_flags |= flags & ~keep; 282 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 283 } 284 285 /** 286 * DPDK callback for Ethernet device configuration. 287 * 288 * @param dev 289 * Pointer to Ethernet device structure. 290 * 291 * @return 292 * 0 on success, a negative errno value otherwise and rte_errno is set. 293 */ 294 int 295 mlx5_dev_configure(struct rte_eth_dev *dev) 296 { 297 struct priv *priv = dev->data->dev_private; 298 unsigned int rxqs_n = dev->data->nb_rx_queues; 299 unsigned int txqs_n = dev->data->nb_tx_queues; 300 unsigned int i; 301 unsigned int j; 302 unsigned int reta_idx_n; 303 const uint8_t use_app_rss_key = 304 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 305 uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev); 306 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 307 uint64_t supp_rx_offloads = 308 (mlx5_get_rx_port_offloads() | 309 mlx5_get_rx_queue_offloads(dev)); 310 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; 311 int ret = 0; 312 313 if ((tx_offloads & supp_tx_offloads) != tx_offloads) { 314 DRV_LOG(ERR, 315 "port %u some Tx offloads are not supported requested" 316 " 0x%" PRIx64 " supported 0x%" PRIx64, 317 dev->data->port_id, tx_offloads, supp_tx_offloads); 318 rte_errno = ENOTSUP; 319 return -rte_errno; 320 } 321 if ((rx_offloads & supp_rx_offloads) != rx_offloads) { 322 DRV_LOG(ERR, 323 "port %u some Rx offloads are not supported requested" 324 " 0x%" PRIx64 " supported 0x%" PRIx64, 325 dev->data->port_id, rx_offloads, supp_rx_offloads); 326 rte_errno = ENOTSUP; 327 return -rte_errno; 328 } 329 if (use_app_rss_key && 330 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 331 rss_hash_default_key_len)) { 332 DRV_LOG(ERR, "port %u RSS key len must be %zu Bytes long", 333 dev->data->port_id, rss_hash_default_key_len); 334 rte_errno = EINVAL; 335 return -rte_errno; 336 } 337 priv->rss_conf.rss_key = 338 rte_realloc(priv->rss_conf.rss_key, 339 rss_hash_default_key_len, 0); 340 if (!priv->rss_conf.rss_key) { 341 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 342 dev->data->port_id, rxqs_n); 343 rte_errno = ENOMEM; 344 return -rte_errno; 345 } 346 memcpy(priv->rss_conf.rss_key, 347 use_app_rss_key ? 348 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 349 rss_hash_default_key, 350 rss_hash_default_key_len); 351 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 352 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 353 priv->rxqs = (void *)dev->data->rx_queues; 354 priv->txqs = (void *)dev->data->tx_queues; 355 if (txqs_n != priv->txqs_n) { 356 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 357 dev->data->port_id, priv->txqs_n, txqs_n); 358 priv->txqs_n = txqs_n; 359 } 360 if (rxqs_n > priv->config.ind_table_max_size) { 361 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 362 dev->data->port_id, rxqs_n); 363 rte_errno = EINVAL; 364 return -rte_errno; 365 } 366 if (rxqs_n == priv->rxqs_n) 367 return 0; 368 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 369 dev->data->port_id, priv->rxqs_n, rxqs_n); 370 priv->rxqs_n = rxqs_n; 371 /* If the requested number of RX queues is not a power of two, use the 372 * maximum indirection table size for better balancing. 373 * The result is always rounded to the next power of two. */ 374 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 375 priv->config.ind_table_max_size : 376 rxqs_n)); 377 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 378 if (ret) 379 return ret; 380 /* When the number of RX queues is not a power of two, the remaining 381 * table entries are padded with reused WQs and hashes are not spread 382 * uniformly. */ 383 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 384 (*priv->reta_idx)[i] = j; 385 if (++j == rxqs_n) 386 j = 0; 387 } 388 return 0; 389 } 390 391 /** 392 * DPDK callback to get information about the device. 393 * 394 * @param dev 395 * Pointer to Ethernet device structure. 396 * @param[out] info 397 * Info structure output buffer. 398 */ 399 void 400 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 401 { 402 struct priv *priv = dev->data->dev_private; 403 struct mlx5_dev_config *config = &priv->config; 404 unsigned int max; 405 char ifname[IF_NAMESIZE]; 406 407 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 408 /* FIXME: we should ask the device for these values. */ 409 info->min_rx_bufsize = 32; 410 info->max_rx_pktlen = 65536; 411 /* 412 * Since we need one CQ per QP, the limit is the minimum number 413 * between the two values. 414 */ 415 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 416 priv->device_attr.orig_attr.max_qp); 417 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 418 if (max >= 65535) 419 max = 65535; 420 info->max_rx_queues = max; 421 info->max_tx_queues = max; 422 info->max_mac_addrs = RTE_DIM(priv->mac); 423 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 424 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 425 info->rx_queue_offload_capa); 426 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 427 if (mlx5_get_ifname(dev, &ifname) == 0) 428 info->if_index = if_nametoindex(ifname); 429 info->reta_size = priv->reta_idx_n ? 430 priv->reta_idx_n : config->ind_table_max_size; 431 info->hash_key_size = rss_hash_default_key_len; 432 info->speed_capa = priv->link_speed_capa; 433 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 434 } 435 436 /** 437 * Get supported packet types. 438 * 439 * @param dev 440 * Pointer to Ethernet device structure. 441 * 442 * @return 443 * A pointer to the supported Packet types array. 444 */ 445 const uint32_t * 446 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 447 { 448 static const uint32_t ptypes[] = { 449 /* refers to rxq_cq_to_pkt_type() */ 450 RTE_PTYPE_L2_ETHER, 451 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 452 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 453 RTE_PTYPE_L4_NONFRAG, 454 RTE_PTYPE_L4_FRAG, 455 RTE_PTYPE_L4_TCP, 456 RTE_PTYPE_L4_UDP, 457 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 458 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 459 RTE_PTYPE_INNER_L4_NONFRAG, 460 RTE_PTYPE_INNER_L4_FRAG, 461 RTE_PTYPE_INNER_L4_TCP, 462 RTE_PTYPE_INNER_L4_UDP, 463 RTE_PTYPE_UNKNOWN 464 }; 465 466 if (dev->rx_pkt_burst == mlx5_rx_burst || 467 dev->rx_pkt_burst == mlx5_rx_burst_vec) 468 return ptypes; 469 return NULL; 470 } 471 472 /** 473 * DPDK callback to retrieve physical link information. 474 * 475 * @param dev 476 * Pointer to Ethernet device structure. 477 * @param[out] link 478 * Storage for current link status. 479 * 480 * @return 481 * 0 on success, a negative errno value otherwise and rte_errno is set. 482 */ 483 static int 484 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 485 struct rte_eth_link *link) 486 { 487 struct priv *priv = dev->data->dev_private; 488 struct ethtool_cmd edata = { 489 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 490 }; 491 struct ifreq ifr; 492 struct rte_eth_link dev_link; 493 int link_speed = 0; 494 int ret; 495 496 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 497 if (ret) { 498 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 499 dev->data->port_id, strerror(rte_errno)); 500 return ret; 501 } 502 memset(&dev_link, 0, sizeof(dev_link)); 503 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 504 (ifr.ifr_flags & IFF_RUNNING)); 505 ifr.ifr_data = (void *)&edata; 506 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 507 if (ret) { 508 DRV_LOG(WARNING, 509 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 510 dev->data->port_id, strerror(rte_errno)); 511 return ret; 512 } 513 link_speed = ethtool_cmd_speed(&edata); 514 if (link_speed == -1) 515 dev_link.link_speed = 0; 516 else 517 dev_link.link_speed = link_speed; 518 priv->link_speed_capa = 0; 519 if (edata.supported & SUPPORTED_Autoneg) 520 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 521 if (edata.supported & (SUPPORTED_1000baseT_Full | 522 SUPPORTED_1000baseKX_Full)) 523 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 524 if (edata.supported & SUPPORTED_10000baseKR_Full) 525 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 526 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 527 SUPPORTED_40000baseCR4_Full | 528 SUPPORTED_40000baseSR4_Full | 529 SUPPORTED_40000baseLR4_Full)) 530 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 531 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 532 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 533 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 534 ETH_LINK_SPEED_FIXED); 535 if ((dev_link.link_speed && !dev_link.link_status) || 536 (!dev_link.link_speed && dev_link.link_status)) { 537 rte_errno = EAGAIN; 538 return -rte_errno; 539 } 540 *link = dev_link; 541 return 0; 542 } 543 544 /** 545 * Retrieve physical link information (unlocked version using new ioctl). 546 * 547 * @param dev 548 * Pointer to Ethernet device structure. 549 * @param[out] link 550 * Storage for current link status. 551 * 552 * @return 553 * 0 on success, a negative errno value otherwise and rte_errno is set. 554 */ 555 static int 556 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 557 struct rte_eth_link *link) 558 559 { 560 struct priv *priv = dev->data->dev_private; 561 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 562 struct ifreq ifr; 563 struct rte_eth_link dev_link; 564 uint64_t sc; 565 int ret; 566 567 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 568 if (ret) { 569 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 570 dev->data->port_id, strerror(rte_errno)); 571 return ret; 572 } 573 memset(&dev_link, 0, sizeof(dev_link)); 574 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 575 (ifr.ifr_flags & IFF_RUNNING)); 576 ifr.ifr_data = (void *)&gcmd; 577 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 578 if (ret) { 579 DRV_LOG(DEBUG, 580 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 581 " failed: %s", 582 dev->data->port_id, strerror(rte_errno)); 583 return ret; 584 } 585 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 586 587 alignas(struct ethtool_link_settings) 588 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 589 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 590 struct ethtool_link_settings *ecmd = (void *)data; 591 592 *ecmd = gcmd; 593 ifr.ifr_data = (void *)ecmd; 594 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 595 if (ret) { 596 DRV_LOG(DEBUG, 597 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 598 " failed: %s", 599 dev->data->port_id, strerror(rte_errno)); 600 return ret; 601 } 602 dev_link.link_speed = ecmd->speed; 603 sc = ecmd->link_mode_masks[0] | 604 ((uint64_t)ecmd->link_mode_masks[1] << 32); 605 priv->link_speed_capa = 0; 606 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 607 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 608 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 609 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 610 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 611 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 612 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 613 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 614 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 615 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 616 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 617 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 618 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 619 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 620 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 621 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 622 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 623 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 624 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 625 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 626 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 627 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 628 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 629 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 630 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 631 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 632 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 633 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 634 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 635 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 636 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 637 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 638 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 639 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 640 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 641 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 642 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 643 ETH_LINK_SPEED_FIXED); 644 if ((dev_link.link_speed && !dev_link.link_status) || 645 (!dev_link.link_speed && dev_link.link_status)) { 646 rte_errno = EAGAIN; 647 return -rte_errno; 648 } 649 *link = dev_link; 650 return 0; 651 } 652 653 /** 654 * DPDK callback to retrieve physical link information. 655 * 656 * @param dev 657 * Pointer to Ethernet device structure. 658 * @param wait_to_complete 659 * Wait for request completion. 660 * 661 * @return 662 * 0 if link status was not updated, positive if it was, a negative errno 663 * value otherwise and rte_errno is set. 664 */ 665 int 666 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 667 { 668 int ret; 669 struct rte_eth_link dev_link; 670 time_t start_time = time(NULL); 671 672 do { 673 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 674 if (ret) 675 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 676 if (ret == 0) 677 break; 678 /* Handle wait to complete situation. */ 679 if (wait_to_complete && ret == -EAGAIN) { 680 if (abs((int)difftime(time(NULL), start_time)) < 681 MLX5_LINK_STATUS_TIMEOUT) { 682 usleep(0); 683 continue; 684 } else { 685 rte_errno = EBUSY; 686 return -rte_errno; 687 } 688 } else if (ret < 0) { 689 return ret; 690 } 691 } while (wait_to_complete); 692 ret = !!memcmp(&dev->data->dev_link, &dev_link, 693 sizeof(struct rte_eth_link)); 694 dev->data->dev_link = dev_link; 695 return ret; 696 } 697 698 /** 699 * DPDK callback to change the MTU. 700 * 701 * @param dev 702 * Pointer to Ethernet device structure. 703 * @param in_mtu 704 * New MTU. 705 * 706 * @return 707 * 0 on success, a negative errno value otherwise and rte_errno is set. 708 */ 709 int 710 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 711 { 712 struct priv *priv = dev->data->dev_private; 713 uint16_t kern_mtu = 0; 714 int ret; 715 716 ret = mlx5_get_mtu(dev, &kern_mtu); 717 if (ret) 718 return ret; 719 /* Set kernel interface MTU first. */ 720 ret = mlx5_set_mtu(dev, mtu); 721 if (ret) 722 return ret; 723 ret = mlx5_get_mtu(dev, &kern_mtu); 724 if (ret) 725 return ret; 726 if (kern_mtu == mtu) { 727 priv->mtu = mtu; 728 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 729 dev->data->port_id, mtu); 730 return 0; 731 } 732 rte_errno = EAGAIN; 733 return -rte_errno; 734 } 735 736 /** 737 * DPDK callback to get flow control status. 738 * 739 * @param dev 740 * Pointer to Ethernet device structure. 741 * @param[out] fc_conf 742 * Flow control output buffer. 743 * 744 * @return 745 * 0 on success, a negative errno value otherwise and rte_errno is set. 746 */ 747 int 748 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 749 { 750 struct ifreq ifr; 751 struct ethtool_pauseparam ethpause = { 752 .cmd = ETHTOOL_GPAUSEPARAM 753 }; 754 int ret; 755 756 ifr.ifr_data = (void *)ðpause; 757 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 758 if (ret) { 759 DRV_LOG(WARNING, 760 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 761 " %s", 762 dev->data->port_id, strerror(rte_errno)); 763 return ret; 764 } 765 fc_conf->autoneg = ethpause.autoneg; 766 if (ethpause.rx_pause && ethpause.tx_pause) 767 fc_conf->mode = RTE_FC_FULL; 768 else if (ethpause.rx_pause) 769 fc_conf->mode = RTE_FC_RX_PAUSE; 770 else if (ethpause.tx_pause) 771 fc_conf->mode = RTE_FC_TX_PAUSE; 772 else 773 fc_conf->mode = RTE_FC_NONE; 774 return 0; 775 } 776 777 /** 778 * DPDK callback to modify flow control parameters. 779 * 780 * @param dev 781 * Pointer to Ethernet device structure. 782 * @param[in] fc_conf 783 * Flow control parameters. 784 * 785 * @return 786 * 0 on success, a negative errno value otherwise and rte_errno is set. 787 */ 788 int 789 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 790 { 791 struct ifreq ifr; 792 struct ethtool_pauseparam ethpause = { 793 .cmd = ETHTOOL_SPAUSEPARAM 794 }; 795 int ret; 796 797 ifr.ifr_data = (void *)ðpause; 798 ethpause.autoneg = fc_conf->autoneg; 799 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 800 (fc_conf->mode & RTE_FC_RX_PAUSE)) 801 ethpause.rx_pause = 1; 802 else 803 ethpause.rx_pause = 0; 804 805 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 806 (fc_conf->mode & RTE_FC_TX_PAUSE)) 807 ethpause.tx_pause = 1; 808 else 809 ethpause.tx_pause = 0; 810 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 811 if (ret) { 812 DRV_LOG(WARNING, 813 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 814 " failed: %s", 815 dev->data->port_id, strerror(rte_errno)); 816 return ret; 817 } 818 return 0; 819 } 820 821 /** 822 * Get PCI information from struct ibv_device. 823 * 824 * @param device 825 * Pointer to Ethernet device structure. 826 * @param[out] pci_addr 827 * PCI bus address output buffer. 828 * 829 * @return 830 * 0 on success, a negative errno value otherwise and rte_errno is set. 831 */ 832 int 833 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 834 struct rte_pci_addr *pci_addr) 835 { 836 FILE *file; 837 char line[32]; 838 MKSTR(path, "%s/device/uevent", device->ibdev_path); 839 840 file = fopen(path, "rb"); 841 if (file == NULL) { 842 rte_errno = errno; 843 return -rte_errno; 844 } 845 while (fgets(line, sizeof(line), file) == line) { 846 size_t len = strlen(line); 847 int ret; 848 849 /* Truncate long lines. */ 850 if (len == (sizeof(line) - 1)) 851 while (line[(len - 1)] != '\n') { 852 ret = fgetc(file); 853 if (ret == EOF) 854 break; 855 line[(len - 1)] = ret; 856 } 857 /* Extract information. */ 858 if (sscanf(line, 859 "PCI_SLOT_NAME=" 860 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 861 &pci_addr->domain, 862 &pci_addr->bus, 863 &pci_addr->devid, 864 &pci_addr->function) == 4) { 865 ret = 0; 866 break; 867 } 868 } 869 fclose(file); 870 return 0; 871 } 872 873 /** 874 * Device status handler. 875 * 876 * @param dev 877 * Pointer to Ethernet device. 878 * @param events 879 * Pointer to event flags holder. 880 * 881 * @return 882 * Events bitmap of callback process which can be called immediately. 883 */ 884 static uint32_t 885 mlx5_dev_status_handler(struct rte_eth_dev *dev) 886 { 887 struct priv *priv = dev->data->dev_private; 888 struct ibv_async_event event; 889 uint32_t ret = 0; 890 891 if (mlx5_link_update(dev, 0) == -EAGAIN) { 892 usleep(0); 893 return 0; 894 } 895 /* Read all message and acknowledge them. */ 896 for (;;) { 897 if (mlx5_glue->get_async_event(priv->ctx, &event)) 898 break; 899 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 900 event.event_type == IBV_EVENT_PORT_ERR) && 901 (dev->data->dev_conf.intr_conf.lsc == 1)) 902 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 903 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 904 dev->data->dev_conf.intr_conf.rmv == 1) 905 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 906 else 907 DRV_LOG(DEBUG, 908 "port %u event type %d on not handled", 909 dev->data->port_id, event.event_type); 910 mlx5_glue->ack_async_event(&event); 911 } 912 return ret; 913 } 914 915 /** 916 * Handle interrupts from the NIC. 917 * 918 * @param[in] intr_handle 919 * Interrupt handler. 920 * @param cb_arg 921 * Callback argument. 922 */ 923 void 924 mlx5_dev_interrupt_handler(void *cb_arg) 925 { 926 struct rte_eth_dev *dev = cb_arg; 927 uint32_t events; 928 929 events = mlx5_dev_status_handler(dev); 930 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 931 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 932 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 933 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 934 } 935 936 /** 937 * Handle interrupts from the socket. 938 * 939 * @param cb_arg 940 * Callback argument. 941 */ 942 static void 943 mlx5_dev_handler_socket(void *cb_arg) 944 { 945 struct rte_eth_dev *dev = cb_arg; 946 947 mlx5_socket_handle(dev); 948 } 949 950 /** 951 * Uninstall interrupt handler. 952 * 953 * @param dev 954 * Pointer to Ethernet device. 955 */ 956 void 957 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 958 { 959 struct priv *priv = dev->data->dev_private; 960 961 if (dev->data->dev_conf.intr_conf.lsc || 962 dev->data->dev_conf.intr_conf.rmv) 963 rte_intr_callback_unregister(&priv->intr_handle, 964 mlx5_dev_interrupt_handler, dev); 965 if (priv->primary_socket) 966 rte_intr_callback_unregister(&priv->intr_handle_socket, 967 mlx5_dev_handler_socket, dev); 968 priv->intr_handle.fd = 0; 969 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 970 priv->intr_handle_socket.fd = 0; 971 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 972 } 973 974 /** 975 * Install interrupt handler. 976 * 977 * @param dev 978 * Pointer to Ethernet device. 979 */ 980 void 981 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 982 { 983 struct priv *priv = dev->data->dev_private; 984 int ret; 985 int flags; 986 987 assert(priv->ctx->async_fd > 0); 988 flags = fcntl(priv->ctx->async_fd, F_GETFL); 989 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 990 if (ret) { 991 DRV_LOG(INFO, 992 "port %u failed to change file descriptor async event" 993 " queue", 994 dev->data->port_id); 995 dev->data->dev_conf.intr_conf.lsc = 0; 996 dev->data->dev_conf.intr_conf.rmv = 0; 997 } 998 if (dev->data->dev_conf.intr_conf.lsc || 999 dev->data->dev_conf.intr_conf.rmv) { 1000 priv->intr_handle.fd = priv->ctx->async_fd; 1001 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1002 rte_intr_callback_register(&priv->intr_handle, 1003 mlx5_dev_interrupt_handler, dev); 1004 } 1005 ret = mlx5_socket_init(dev); 1006 if (ret) 1007 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1008 dev->data->port_id, strerror(rte_errno)); 1009 else if (priv->primary_socket) { 1010 priv->intr_handle_socket.fd = priv->primary_socket; 1011 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1012 rte_intr_callback_register(&priv->intr_handle_socket, 1013 mlx5_dev_handler_socket, dev); 1014 } 1015 } 1016 1017 /** 1018 * DPDK callback to bring the link DOWN. 1019 * 1020 * @param dev 1021 * Pointer to Ethernet device structure. 1022 * 1023 * @return 1024 * 0 on success, a negative errno value otherwise and rte_errno is set. 1025 */ 1026 int 1027 mlx5_set_link_down(struct rte_eth_dev *dev) 1028 { 1029 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1030 } 1031 1032 /** 1033 * DPDK callback to bring the link UP. 1034 * 1035 * @param dev 1036 * Pointer to Ethernet device structure. 1037 * 1038 * @return 1039 * 0 on success, a negative errno value otherwise and rte_errno is set. 1040 */ 1041 int 1042 mlx5_set_link_up(struct rte_eth_dev *dev) 1043 { 1044 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1045 } 1046 1047 /** 1048 * Configure the TX function to use. 1049 * 1050 * @param dev 1051 * Pointer to private data structure. 1052 * 1053 * @return 1054 * Pointer to selected Tx burst function. 1055 */ 1056 eth_tx_burst_t 1057 mlx5_select_tx_function(struct rte_eth_dev *dev) 1058 { 1059 struct priv *priv = dev->data->dev_private; 1060 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1061 struct mlx5_dev_config *config = &priv->config; 1062 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1063 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1064 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1065 DEV_TX_OFFLOAD_GRE_TNL_TSO)); 1066 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1067 1068 assert(priv != NULL); 1069 /* Select appropriate TX function. */ 1070 if (vlan_insert || tso) 1071 return tx_pkt_burst; 1072 if (config->mps == MLX5_MPW_ENHANCED) { 1073 if (mlx5_check_vec_tx_support(dev) > 0) { 1074 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1075 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1076 else 1077 tx_pkt_burst = mlx5_tx_burst_vec; 1078 DRV_LOG(DEBUG, 1079 "port %u selected enhanced MPW Tx vectorized" 1080 " function", 1081 dev->data->port_id); 1082 } else { 1083 tx_pkt_burst = mlx5_tx_burst_empw; 1084 DRV_LOG(DEBUG, 1085 "port %u selected enhanced MPW Tx function", 1086 dev->data->port_id); 1087 } 1088 } else if (config->mps && (config->txq_inline > 0)) { 1089 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1090 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1091 dev->data->port_id); 1092 } else if (config->mps) { 1093 tx_pkt_burst = mlx5_tx_burst_mpw; 1094 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1095 dev->data->port_id); 1096 } 1097 return tx_pkt_burst; 1098 } 1099 1100 /** 1101 * Configure the RX function to use. 1102 * 1103 * @param dev 1104 * Pointer to private data structure. 1105 * 1106 * @return 1107 * Pointer to selected Rx burst function. 1108 */ 1109 eth_rx_burst_t 1110 mlx5_select_rx_function(struct rte_eth_dev *dev) 1111 { 1112 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1113 1114 assert(dev != NULL); 1115 if (mlx5_check_vec_rx_support(dev) > 0) { 1116 rx_pkt_burst = mlx5_rx_burst_vec; 1117 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1118 dev->data->port_id); 1119 } 1120 return rx_pkt_burst; 1121 } 1122 1123 /** 1124 * Check if mlx5 device was removed. 1125 * 1126 * @param dev 1127 * Pointer to Ethernet device structure. 1128 * 1129 * @return 1130 * 1 when device is removed, otherwise 0. 1131 */ 1132 int 1133 mlx5_is_removed(struct rte_eth_dev *dev) 1134 { 1135 struct ibv_device_attr device_attr; 1136 struct priv *priv = dev->data->dev_private; 1137 1138 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1139 return 1; 1140 return 0; 1141 } 1142