1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 #include <time.h> 28 29 #include <rte_atomic.h> 30 #include <rte_ethdev_driver.h> 31 #include <rte_bus_pci.h> 32 #include <rte_mbuf.h> 33 #include <rte_common.h> 34 #include <rte_interrupts.h> 35 #include <rte_malloc.h> 36 #include <rte_string_fns.h> 37 38 #include "mlx5.h" 39 #include "mlx5_glue.h" 40 #include "mlx5_rxtx.h" 41 #include "mlx5_utils.h" 42 43 /* Add defines in case the running kernel is not the same as user headers. */ 44 #ifndef ETHTOOL_GLINKSETTINGS 45 struct ethtool_link_settings { 46 uint32_t cmd; 47 uint32_t speed; 48 uint8_t duplex; 49 uint8_t port; 50 uint8_t phy_address; 51 uint8_t autoneg; 52 uint8_t mdio_support; 53 uint8_t eth_to_mdix; 54 uint8_t eth_tp_mdix_ctrl; 55 int8_t link_mode_masks_nwords; 56 uint32_t reserved[8]; 57 uint32_t link_mode_masks[]; 58 }; 59 60 #define ETHTOOL_GLINKSETTINGS 0x0000004c 61 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 62 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 63 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 64 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 65 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 66 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 67 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 68 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 69 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 70 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 71 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 72 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 73 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 74 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 75 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 76 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 77 #endif 78 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 79 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 80 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 81 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 82 #endif 83 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 84 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 85 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 86 #endif 87 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 88 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 89 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 90 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 91 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 92 #endif 93 94 /** 95 * Get interface name from private structure. 96 * 97 * @param[in] dev 98 * Pointer to Ethernet device. 99 * @param[out] ifname 100 * Interface name output buffer. 101 * 102 * @return 103 * 0 on success, a negative errno value otherwise and rte_errno is set. 104 */ 105 int 106 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 107 { 108 struct priv *priv = dev->data->dev_private; 109 DIR *dir; 110 struct dirent *dent; 111 unsigned int dev_type = 0; 112 unsigned int dev_port_prev = ~0u; 113 char match[IF_NAMESIZE] = ""; 114 115 { 116 MKSTR(path, "%s/device/net", priv->ibdev_path); 117 118 dir = opendir(path); 119 if (dir == NULL) { 120 rte_errno = errno; 121 return -rte_errno; 122 } 123 } 124 while ((dent = readdir(dir)) != NULL) { 125 char *name = dent->d_name; 126 FILE *file; 127 unsigned int dev_port; 128 int r; 129 130 if ((name[0] == '.') && 131 ((name[1] == '\0') || 132 ((name[1] == '.') && (name[2] == '\0')))) 133 continue; 134 135 MKSTR(path, "%s/device/net/%s/%s", 136 priv->ibdev_path, name, 137 (dev_type ? "dev_id" : "dev_port")); 138 139 file = fopen(path, "rb"); 140 if (file == NULL) { 141 if (errno != ENOENT) 142 continue; 143 /* 144 * Switch to dev_id when dev_port does not exist as 145 * is the case with Linux kernel versions < 3.15. 146 */ 147 try_dev_id: 148 match[0] = '\0'; 149 if (dev_type) 150 break; 151 dev_type = 1; 152 dev_port_prev = ~0u; 153 rewinddir(dir); 154 continue; 155 } 156 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 157 fclose(file); 158 if (r != 1) 159 continue; 160 /* 161 * Switch to dev_id when dev_port returns the same value for 162 * all ports. May happen when using a MOFED release older than 163 * 3.0 with a Linux kernel >= 3.15. 164 */ 165 if (dev_port == dev_port_prev) 166 goto try_dev_id; 167 dev_port_prev = dev_port; 168 if (dev_port == (priv->port - 1u)) 169 strlcpy(match, name, sizeof(match)); 170 } 171 closedir(dir); 172 if (match[0] == '\0') { 173 rte_errno = ENOENT; 174 return -rte_errno; 175 } 176 strncpy(*ifname, match, sizeof(*ifname)); 177 return 0; 178 } 179 180 /** 181 * Get the interface index from device name. 182 * 183 * @param[in] dev 184 * Pointer to Ethernet device. 185 * 186 * @return 187 * Interface index on success, a negative errno value otherwise and 188 * rte_errno is set. 189 */ 190 int 191 mlx5_ifindex(const struct rte_eth_dev *dev) 192 { 193 char ifname[IF_NAMESIZE]; 194 int ret; 195 196 ret = mlx5_get_ifname(dev, &ifname); 197 if (ret) 198 return ret; 199 ret = if_nametoindex(ifname); 200 if (ret == -1) { 201 rte_errno = errno; 202 return -rte_errno; 203 } 204 return ret; 205 } 206 207 /** 208 * Perform ifreq ioctl() on associated Ethernet device. 209 * 210 * @param[in] dev 211 * Pointer to Ethernet device. 212 * @param req 213 * Request number to pass to ioctl(). 214 * @param[out] ifr 215 * Interface request structure output buffer. 216 * 217 * @return 218 * 0 on success, a negative errno value otherwise and rte_errno is set. 219 */ 220 int 221 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 222 { 223 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 224 int ret = 0; 225 226 if (sock == -1) { 227 rte_errno = errno; 228 return -rte_errno; 229 } 230 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 231 if (ret) 232 goto error; 233 ret = ioctl(sock, req, ifr); 234 if (ret == -1) { 235 rte_errno = errno; 236 goto error; 237 } 238 close(sock); 239 return 0; 240 error: 241 close(sock); 242 return -rte_errno; 243 } 244 245 /** 246 * Get device MTU. 247 * 248 * @param dev 249 * Pointer to Ethernet device. 250 * @param[out] mtu 251 * MTU value output buffer. 252 * 253 * @return 254 * 0 on success, a negative errno value otherwise and rte_errno is set. 255 */ 256 int 257 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 258 { 259 struct ifreq request; 260 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 261 262 if (ret) 263 return ret; 264 *mtu = request.ifr_mtu; 265 return 0; 266 } 267 268 /** 269 * Set device MTU. 270 * 271 * @param dev 272 * Pointer to Ethernet device. 273 * @param mtu 274 * MTU value to set. 275 * 276 * @return 277 * 0 on success, a negative errno value otherwise and rte_errno is set. 278 */ 279 static int 280 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 281 { 282 struct ifreq request = { .ifr_mtu = mtu, }; 283 284 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 285 } 286 287 /** 288 * Set device flags. 289 * 290 * @param dev 291 * Pointer to Ethernet device. 292 * @param keep 293 * Bitmask for flags that must remain untouched. 294 * @param flags 295 * Bitmask for flags to modify. 296 * 297 * @return 298 * 0 on success, a negative errno value otherwise and rte_errno is set. 299 */ 300 int 301 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 302 { 303 struct ifreq request; 304 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 305 306 if (ret) 307 return ret; 308 request.ifr_flags &= keep; 309 request.ifr_flags |= flags & ~keep; 310 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 311 } 312 313 /** 314 * DPDK callback for Ethernet device configuration. 315 * 316 * @param dev 317 * Pointer to Ethernet device structure. 318 * 319 * @return 320 * 0 on success, a negative errno value otherwise and rte_errno is set. 321 */ 322 int 323 mlx5_dev_configure(struct rte_eth_dev *dev) 324 { 325 struct priv *priv = dev->data->dev_private; 326 unsigned int rxqs_n = dev->data->nb_rx_queues; 327 unsigned int txqs_n = dev->data->nb_tx_queues; 328 unsigned int i; 329 unsigned int j; 330 unsigned int reta_idx_n; 331 const uint8_t use_app_rss_key = 332 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 333 uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev); 334 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 335 uint64_t supp_rx_offloads = 336 (mlx5_get_rx_port_offloads() | 337 mlx5_get_rx_queue_offloads(dev)); 338 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; 339 int ret = 0; 340 341 if ((tx_offloads & supp_tx_offloads) != tx_offloads) { 342 DRV_LOG(ERR, 343 "port %u some Tx offloads are not supported requested" 344 " 0x%" PRIx64 " supported 0x%" PRIx64, 345 dev->data->port_id, tx_offloads, supp_tx_offloads); 346 rte_errno = ENOTSUP; 347 return -rte_errno; 348 } 349 if ((rx_offloads & supp_rx_offloads) != rx_offloads) { 350 DRV_LOG(ERR, 351 "port %u some Rx offloads are not supported requested" 352 " 0x%" PRIx64 " supported 0x%" PRIx64, 353 dev->data->port_id, rx_offloads, supp_rx_offloads); 354 rte_errno = ENOTSUP; 355 return -rte_errno; 356 } 357 if (use_app_rss_key && 358 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 359 rss_hash_default_key_len)) { 360 DRV_LOG(ERR, "port %u RSS key len must be %zu Bytes long", 361 dev->data->port_id, rss_hash_default_key_len); 362 rte_errno = EINVAL; 363 return -rte_errno; 364 } 365 priv->rss_conf.rss_key = 366 rte_realloc(priv->rss_conf.rss_key, 367 rss_hash_default_key_len, 0); 368 if (!priv->rss_conf.rss_key) { 369 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 370 dev->data->port_id, rxqs_n); 371 rte_errno = ENOMEM; 372 return -rte_errno; 373 } 374 memcpy(priv->rss_conf.rss_key, 375 use_app_rss_key ? 376 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 377 rss_hash_default_key, 378 rss_hash_default_key_len); 379 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 380 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 381 priv->rxqs = (void *)dev->data->rx_queues; 382 priv->txqs = (void *)dev->data->tx_queues; 383 if (txqs_n != priv->txqs_n) { 384 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 385 dev->data->port_id, priv->txqs_n, txqs_n); 386 priv->txqs_n = txqs_n; 387 } 388 if (rxqs_n > priv->config.ind_table_max_size) { 389 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 390 dev->data->port_id, rxqs_n); 391 rte_errno = EINVAL; 392 return -rte_errno; 393 } 394 if (rxqs_n == priv->rxqs_n) 395 return 0; 396 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 397 dev->data->port_id, priv->rxqs_n, rxqs_n); 398 priv->rxqs_n = rxqs_n; 399 /* If the requested number of RX queues is not a power of two, use the 400 * maximum indirection table size for better balancing. 401 * The result is always rounded to the next power of two. */ 402 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 403 priv->config.ind_table_max_size : 404 rxqs_n)); 405 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 406 if (ret) 407 return ret; 408 /* When the number of RX queues is not a power of two, the remaining 409 * table entries are padded with reused WQs and hashes are not spread 410 * uniformly. */ 411 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 412 (*priv->reta_idx)[i] = j; 413 if (++j == rxqs_n) 414 j = 0; 415 } 416 return 0; 417 } 418 419 /** 420 * DPDK callback to get information about the device. 421 * 422 * @param dev 423 * Pointer to Ethernet device structure. 424 * @param[out] info 425 * Info structure output buffer. 426 */ 427 void 428 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 429 { 430 struct priv *priv = dev->data->dev_private; 431 struct mlx5_dev_config *config = &priv->config; 432 unsigned int max; 433 char ifname[IF_NAMESIZE]; 434 435 /* FIXME: we should ask the device for these values. */ 436 info->min_rx_bufsize = 32; 437 info->max_rx_pktlen = 65536; 438 /* 439 * Since we need one CQ per QP, the limit is the minimum number 440 * between the two values. 441 */ 442 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 443 priv->device_attr.orig_attr.max_qp); 444 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 445 if (max >= 65535) 446 max = 65535; 447 info->max_rx_queues = max; 448 info->max_tx_queues = max; 449 info->max_mac_addrs = RTE_DIM(priv->mac); 450 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 451 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 452 info->rx_queue_offload_capa); 453 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 454 if (mlx5_get_ifname(dev, &ifname) == 0) 455 info->if_index = if_nametoindex(ifname); 456 info->reta_size = priv->reta_idx_n ? 457 priv->reta_idx_n : config->ind_table_max_size; 458 info->hash_key_size = rss_hash_default_key_len; 459 info->speed_capa = priv->link_speed_capa; 460 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 461 } 462 463 /** 464 * Get supported packet types. 465 * 466 * @param dev 467 * Pointer to Ethernet device structure. 468 * 469 * @return 470 * A pointer to the supported Packet types array. 471 */ 472 const uint32_t * 473 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 474 { 475 static const uint32_t ptypes[] = { 476 /* refers to rxq_cq_to_pkt_type() */ 477 RTE_PTYPE_L2_ETHER, 478 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 479 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 480 RTE_PTYPE_L4_NONFRAG, 481 RTE_PTYPE_L4_FRAG, 482 RTE_PTYPE_L4_TCP, 483 RTE_PTYPE_L4_UDP, 484 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 485 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 486 RTE_PTYPE_INNER_L4_NONFRAG, 487 RTE_PTYPE_INNER_L4_FRAG, 488 RTE_PTYPE_INNER_L4_TCP, 489 RTE_PTYPE_INNER_L4_UDP, 490 RTE_PTYPE_UNKNOWN 491 }; 492 493 if (dev->rx_pkt_burst == mlx5_rx_burst || 494 dev->rx_pkt_burst == mlx5_rx_burst_vec) 495 return ptypes; 496 return NULL; 497 } 498 499 /** 500 * DPDK callback to retrieve physical link information. 501 * 502 * @param dev 503 * Pointer to Ethernet device structure. 504 * @param[out] link 505 * Storage for current link status. 506 * 507 * @return 508 * 0 on success, a negative errno value otherwise and rte_errno is set. 509 */ 510 static int 511 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 512 struct rte_eth_link *link) 513 { 514 struct priv *priv = dev->data->dev_private; 515 struct ethtool_cmd edata = { 516 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 517 }; 518 struct ifreq ifr; 519 struct rte_eth_link dev_link; 520 int link_speed = 0; 521 int ret; 522 523 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 524 if (ret) { 525 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 526 dev->data->port_id, strerror(rte_errno)); 527 return ret; 528 } 529 memset(&dev_link, 0, sizeof(dev_link)); 530 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 531 (ifr.ifr_flags & IFF_RUNNING)); 532 ifr.ifr_data = (void *)&edata; 533 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 534 if (ret) { 535 DRV_LOG(WARNING, 536 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 537 dev->data->port_id, strerror(rte_errno)); 538 return ret; 539 } 540 link_speed = ethtool_cmd_speed(&edata); 541 if (link_speed == -1) 542 dev_link.link_speed = 0; 543 else 544 dev_link.link_speed = link_speed; 545 priv->link_speed_capa = 0; 546 if (edata.supported & SUPPORTED_Autoneg) 547 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 548 if (edata.supported & (SUPPORTED_1000baseT_Full | 549 SUPPORTED_1000baseKX_Full)) 550 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 551 if (edata.supported & SUPPORTED_10000baseKR_Full) 552 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 553 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 554 SUPPORTED_40000baseCR4_Full | 555 SUPPORTED_40000baseSR4_Full | 556 SUPPORTED_40000baseLR4_Full)) 557 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 558 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 559 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 560 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 561 ETH_LINK_SPEED_FIXED); 562 if ((dev_link.link_speed && !dev_link.link_status) || 563 (!dev_link.link_speed && dev_link.link_status)) { 564 rte_errno = EAGAIN; 565 return -rte_errno; 566 } 567 *link = dev_link; 568 return 0; 569 } 570 571 /** 572 * Retrieve physical link information (unlocked version using new ioctl). 573 * 574 * @param dev 575 * Pointer to Ethernet device structure. 576 * @param[out] link 577 * Storage for current link status. 578 * 579 * @return 580 * 0 on success, a negative errno value otherwise and rte_errno is set. 581 */ 582 static int 583 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 584 struct rte_eth_link *link) 585 586 { 587 struct priv *priv = dev->data->dev_private; 588 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 589 struct ifreq ifr; 590 struct rte_eth_link dev_link; 591 uint64_t sc; 592 int ret; 593 594 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 595 if (ret) { 596 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 597 dev->data->port_id, strerror(rte_errno)); 598 return ret; 599 } 600 memset(&dev_link, 0, sizeof(dev_link)); 601 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 602 (ifr.ifr_flags & IFF_RUNNING)); 603 ifr.ifr_data = (void *)&gcmd; 604 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 605 if (ret) { 606 DRV_LOG(DEBUG, 607 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 608 " failed: %s", 609 dev->data->port_id, strerror(rte_errno)); 610 return ret; 611 } 612 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 613 614 alignas(struct ethtool_link_settings) 615 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 616 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 617 struct ethtool_link_settings *ecmd = (void *)data; 618 619 *ecmd = gcmd; 620 ifr.ifr_data = (void *)ecmd; 621 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 622 if (ret) { 623 DRV_LOG(DEBUG, 624 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 625 " failed: %s", 626 dev->data->port_id, strerror(rte_errno)); 627 return ret; 628 } 629 dev_link.link_speed = ecmd->speed; 630 sc = ecmd->link_mode_masks[0] | 631 ((uint64_t)ecmd->link_mode_masks[1] << 32); 632 priv->link_speed_capa = 0; 633 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 634 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 635 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 636 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 637 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 638 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 639 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 640 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 641 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 642 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 643 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 644 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 645 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 646 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 647 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 648 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 649 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 650 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 651 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 652 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 653 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 654 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 655 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 656 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 657 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 658 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 659 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 660 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 661 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 662 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 663 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 664 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 665 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 666 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 667 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 668 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 669 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 670 ETH_LINK_SPEED_FIXED); 671 if ((dev_link.link_speed && !dev_link.link_status) || 672 (!dev_link.link_speed && dev_link.link_status)) { 673 rte_errno = EAGAIN; 674 return -rte_errno; 675 } 676 *link = dev_link; 677 return 0; 678 } 679 680 /** 681 * DPDK callback to retrieve physical link information. 682 * 683 * @param dev 684 * Pointer to Ethernet device structure. 685 * @param wait_to_complete 686 * Wait for request completion. 687 * 688 * @return 689 * 0 if link status was not updated, positive if it was, a negative errno 690 * value otherwise and rte_errno is set. 691 */ 692 int 693 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 694 { 695 int ret; 696 struct rte_eth_link dev_link; 697 time_t start_time = time(NULL); 698 699 do { 700 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 701 if (ret) 702 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 703 if (ret == 0) 704 break; 705 /* Handle wait to complete situation. */ 706 if (wait_to_complete && ret == -EAGAIN) { 707 if (abs((int)difftime(time(NULL), start_time)) < 708 MLX5_LINK_STATUS_TIMEOUT) { 709 usleep(0); 710 continue; 711 } else { 712 rte_errno = EBUSY; 713 return -rte_errno; 714 } 715 } else if (ret < 0) { 716 return ret; 717 } 718 } while (wait_to_complete); 719 ret = !!memcmp(&dev->data->dev_link, &dev_link, 720 sizeof(struct rte_eth_link)); 721 dev->data->dev_link = dev_link; 722 return ret; 723 } 724 725 /** 726 * DPDK callback to change the MTU. 727 * 728 * @param dev 729 * Pointer to Ethernet device structure. 730 * @param in_mtu 731 * New MTU. 732 * 733 * @return 734 * 0 on success, a negative errno value otherwise and rte_errno is set. 735 */ 736 int 737 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 738 { 739 struct priv *priv = dev->data->dev_private; 740 uint16_t kern_mtu = 0; 741 int ret; 742 743 ret = mlx5_get_mtu(dev, &kern_mtu); 744 if (ret) 745 return ret; 746 /* Set kernel interface MTU first. */ 747 ret = mlx5_set_mtu(dev, mtu); 748 if (ret) 749 return ret; 750 ret = mlx5_get_mtu(dev, &kern_mtu); 751 if (ret) 752 return ret; 753 if (kern_mtu == mtu) { 754 priv->mtu = mtu; 755 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 756 dev->data->port_id, mtu); 757 return 0; 758 } 759 rte_errno = EAGAIN; 760 return -rte_errno; 761 } 762 763 /** 764 * DPDK callback to get flow control status. 765 * 766 * @param dev 767 * Pointer to Ethernet device structure. 768 * @param[out] fc_conf 769 * Flow control output buffer. 770 * 771 * @return 772 * 0 on success, a negative errno value otherwise and rte_errno is set. 773 */ 774 int 775 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 776 { 777 struct ifreq ifr; 778 struct ethtool_pauseparam ethpause = { 779 .cmd = ETHTOOL_GPAUSEPARAM 780 }; 781 int ret; 782 783 ifr.ifr_data = (void *)ðpause; 784 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 785 if (ret) { 786 DRV_LOG(WARNING, 787 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 788 " %s", 789 dev->data->port_id, strerror(rte_errno)); 790 return ret; 791 } 792 fc_conf->autoneg = ethpause.autoneg; 793 if (ethpause.rx_pause && ethpause.tx_pause) 794 fc_conf->mode = RTE_FC_FULL; 795 else if (ethpause.rx_pause) 796 fc_conf->mode = RTE_FC_RX_PAUSE; 797 else if (ethpause.tx_pause) 798 fc_conf->mode = RTE_FC_TX_PAUSE; 799 else 800 fc_conf->mode = RTE_FC_NONE; 801 return 0; 802 } 803 804 /** 805 * DPDK callback to modify flow control parameters. 806 * 807 * @param dev 808 * Pointer to Ethernet device structure. 809 * @param[in] fc_conf 810 * Flow control parameters. 811 * 812 * @return 813 * 0 on success, a negative errno value otherwise and rte_errno is set. 814 */ 815 int 816 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 817 { 818 struct ifreq ifr; 819 struct ethtool_pauseparam ethpause = { 820 .cmd = ETHTOOL_SPAUSEPARAM 821 }; 822 int ret; 823 824 ifr.ifr_data = (void *)ðpause; 825 ethpause.autoneg = fc_conf->autoneg; 826 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 827 (fc_conf->mode & RTE_FC_RX_PAUSE)) 828 ethpause.rx_pause = 1; 829 else 830 ethpause.rx_pause = 0; 831 832 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 833 (fc_conf->mode & RTE_FC_TX_PAUSE)) 834 ethpause.tx_pause = 1; 835 else 836 ethpause.tx_pause = 0; 837 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 838 if (ret) { 839 DRV_LOG(WARNING, 840 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 841 " failed: %s", 842 dev->data->port_id, strerror(rte_errno)); 843 return ret; 844 } 845 return 0; 846 } 847 848 /** 849 * Get PCI information from struct ibv_device. 850 * 851 * @param device 852 * Pointer to Ethernet device structure. 853 * @param[out] pci_addr 854 * PCI bus address output buffer. 855 * 856 * @return 857 * 0 on success, a negative errno value otherwise and rte_errno is set. 858 */ 859 int 860 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 861 struct rte_pci_addr *pci_addr) 862 { 863 FILE *file; 864 char line[32]; 865 MKSTR(path, "%s/device/uevent", device->ibdev_path); 866 867 file = fopen(path, "rb"); 868 if (file == NULL) { 869 rte_errno = errno; 870 return -rte_errno; 871 } 872 while (fgets(line, sizeof(line), file) == line) { 873 size_t len = strlen(line); 874 int ret; 875 876 /* Truncate long lines. */ 877 if (len == (sizeof(line) - 1)) 878 while (line[(len - 1)] != '\n') { 879 ret = fgetc(file); 880 if (ret == EOF) 881 break; 882 line[(len - 1)] = ret; 883 } 884 /* Extract information. */ 885 if (sscanf(line, 886 "PCI_SLOT_NAME=" 887 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 888 &pci_addr->domain, 889 &pci_addr->bus, 890 &pci_addr->devid, 891 &pci_addr->function) == 4) { 892 ret = 0; 893 break; 894 } 895 } 896 fclose(file); 897 return 0; 898 } 899 900 /** 901 * Device status handler. 902 * 903 * @param dev 904 * Pointer to Ethernet device. 905 * @param events 906 * Pointer to event flags holder. 907 * 908 * @return 909 * Events bitmap of callback process which can be called immediately. 910 */ 911 static uint32_t 912 mlx5_dev_status_handler(struct rte_eth_dev *dev) 913 { 914 struct priv *priv = dev->data->dev_private; 915 struct ibv_async_event event; 916 uint32_t ret = 0; 917 918 if (mlx5_link_update(dev, 0) == -EAGAIN) { 919 usleep(0); 920 return 0; 921 } 922 /* Read all message and acknowledge them. */ 923 for (;;) { 924 if (mlx5_glue->get_async_event(priv->ctx, &event)) 925 break; 926 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 927 event.event_type == IBV_EVENT_PORT_ERR) && 928 (dev->data->dev_conf.intr_conf.lsc == 1)) 929 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 930 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 931 dev->data->dev_conf.intr_conf.rmv == 1) 932 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 933 else 934 DRV_LOG(DEBUG, 935 "port %u event type %d on not handled", 936 dev->data->port_id, event.event_type); 937 mlx5_glue->ack_async_event(&event); 938 } 939 return ret; 940 } 941 942 /** 943 * Handle interrupts from the NIC. 944 * 945 * @param[in] intr_handle 946 * Interrupt handler. 947 * @param cb_arg 948 * Callback argument. 949 */ 950 void 951 mlx5_dev_interrupt_handler(void *cb_arg) 952 { 953 struct rte_eth_dev *dev = cb_arg; 954 uint32_t events; 955 956 events = mlx5_dev_status_handler(dev); 957 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 958 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 959 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 960 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 961 } 962 963 /** 964 * Handle interrupts from the socket. 965 * 966 * @param cb_arg 967 * Callback argument. 968 */ 969 static void 970 mlx5_dev_handler_socket(void *cb_arg) 971 { 972 struct rte_eth_dev *dev = cb_arg; 973 974 mlx5_socket_handle(dev); 975 } 976 977 /** 978 * Uninstall interrupt handler. 979 * 980 * @param dev 981 * Pointer to Ethernet device. 982 */ 983 void 984 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 985 { 986 struct priv *priv = dev->data->dev_private; 987 988 if (dev->data->dev_conf.intr_conf.lsc || 989 dev->data->dev_conf.intr_conf.rmv) 990 rte_intr_callback_unregister(&priv->intr_handle, 991 mlx5_dev_interrupt_handler, dev); 992 if (priv->primary_socket) 993 rte_intr_callback_unregister(&priv->intr_handle_socket, 994 mlx5_dev_handler_socket, dev); 995 priv->intr_handle.fd = 0; 996 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 997 priv->intr_handle_socket.fd = 0; 998 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 999 } 1000 1001 /** 1002 * Install interrupt handler. 1003 * 1004 * @param dev 1005 * Pointer to Ethernet device. 1006 */ 1007 void 1008 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1009 { 1010 struct priv *priv = dev->data->dev_private; 1011 int ret; 1012 int flags; 1013 1014 assert(priv->ctx->async_fd > 0); 1015 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1016 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1017 if (ret) { 1018 DRV_LOG(INFO, 1019 "port %u failed to change file descriptor async event" 1020 " queue", 1021 dev->data->port_id); 1022 dev->data->dev_conf.intr_conf.lsc = 0; 1023 dev->data->dev_conf.intr_conf.rmv = 0; 1024 } 1025 if (dev->data->dev_conf.intr_conf.lsc || 1026 dev->data->dev_conf.intr_conf.rmv) { 1027 priv->intr_handle.fd = priv->ctx->async_fd; 1028 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1029 rte_intr_callback_register(&priv->intr_handle, 1030 mlx5_dev_interrupt_handler, dev); 1031 } 1032 ret = mlx5_socket_init(dev); 1033 if (ret) 1034 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1035 dev->data->port_id, strerror(rte_errno)); 1036 else if (priv->primary_socket) { 1037 priv->intr_handle_socket.fd = priv->primary_socket; 1038 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1039 rte_intr_callback_register(&priv->intr_handle_socket, 1040 mlx5_dev_handler_socket, dev); 1041 } 1042 } 1043 1044 /** 1045 * DPDK callback to bring the link DOWN. 1046 * 1047 * @param dev 1048 * Pointer to Ethernet device structure. 1049 * 1050 * @return 1051 * 0 on success, a negative errno value otherwise and rte_errno is set. 1052 */ 1053 int 1054 mlx5_set_link_down(struct rte_eth_dev *dev) 1055 { 1056 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1057 } 1058 1059 /** 1060 * DPDK callback to bring the link UP. 1061 * 1062 * @param dev 1063 * Pointer to Ethernet device structure. 1064 * 1065 * @return 1066 * 0 on success, a negative errno value otherwise and rte_errno is set. 1067 */ 1068 int 1069 mlx5_set_link_up(struct rte_eth_dev *dev) 1070 { 1071 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1072 } 1073 1074 /** 1075 * Configure the TX function to use. 1076 * 1077 * @param dev 1078 * Pointer to private data structure. 1079 * 1080 * @return 1081 * Pointer to selected Tx burst function. 1082 */ 1083 eth_tx_burst_t 1084 mlx5_select_tx_function(struct rte_eth_dev *dev) 1085 { 1086 struct priv *priv = dev->data->dev_private; 1087 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1088 struct mlx5_dev_config *config = &priv->config; 1089 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1090 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1091 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1092 DEV_TX_OFFLOAD_GRE_TNL_TSO)); 1093 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1094 1095 assert(priv != NULL); 1096 /* Select appropriate TX function. */ 1097 if (vlan_insert || tso) 1098 return tx_pkt_burst; 1099 if (config->mps == MLX5_MPW_ENHANCED) { 1100 if (mlx5_check_vec_tx_support(dev) > 0) { 1101 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1102 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1103 else 1104 tx_pkt_burst = mlx5_tx_burst_vec; 1105 DRV_LOG(DEBUG, 1106 "port %u selected enhanced MPW Tx vectorized" 1107 " function", 1108 dev->data->port_id); 1109 } else { 1110 tx_pkt_burst = mlx5_tx_burst_empw; 1111 DRV_LOG(DEBUG, 1112 "port %u selected enhanced MPW Tx function", 1113 dev->data->port_id); 1114 } 1115 } else if (config->mps && (config->txq_inline > 0)) { 1116 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1117 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1118 dev->data->port_id); 1119 } else if (config->mps) { 1120 tx_pkt_burst = mlx5_tx_burst_mpw; 1121 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1122 dev->data->port_id); 1123 } 1124 return tx_pkt_burst; 1125 } 1126 1127 /** 1128 * Configure the RX function to use. 1129 * 1130 * @param dev 1131 * Pointer to private data structure. 1132 * 1133 * @return 1134 * Pointer to selected Rx burst function. 1135 */ 1136 eth_rx_burst_t 1137 mlx5_select_rx_function(struct rte_eth_dev *dev) 1138 { 1139 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1140 1141 assert(dev != NULL); 1142 if (mlx5_check_vec_rx_support(dev) > 0) { 1143 rx_pkt_burst = mlx5_rx_burst_vec; 1144 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1145 dev->data->port_id); 1146 } 1147 return rx_pkt_burst; 1148 } 1149 1150 /** 1151 * Check if mlx5 device was removed. 1152 * 1153 * @param dev 1154 * Pointer to Ethernet device structure. 1155 * 1156 * @return 1157 * 1 when device is removed, otherwise 0. 1158 */ 1159 int 1160 mlx5_is_removed(struct rte_eth_dev *dev) 1161 { 1162 struct ibv_device_attr device_attr; 1163 struct priv *priv = dev->data->dev_private; 1164 1165 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1166 return 1; 1167 return 0; 1168 } 1169