1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox. 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <netinet/in.h> 22 #include <linux/ethtool.h> 23 #include <linux/sockios.h> 24 #include <fcntl.h> 25 #include <stdalign.h> 26 #include <sys/un.h> 27 28 #include <rte_atomic.h> 29 #include <rte_ethdev_driver.h> 30 #include <rte_bus_pci.h> 31 #include <rte_mbuf.h> 32 #include <rte_common.h> 33 #include <rte_interrupts.h> 34 #include <rte_alarm.h> 35 #include <rte_malloc.h> 36 37 #include "mlx5.h" 38 #include "mlx5_glue.h" 39 #include "mlx5_rxtx.h" 40 #include "mlx5_utils.h" 41 42 /* Add defines in case the running kernel is not the same as user headers. */ 43 #ifndef ETHTOOL_GLINKSETTINGS 44 struct ethtool_link_settings { 45 uint32_t cmd; 46 uint32_t speed; 47 uint8_t duplex; 48 uint8_t port; 49 uint8_t phy_address; 50 uint8_t autoneg; 51 uint8_t mdio_support; 52 uint8_t eth_to_mdix; 53 uint8_t eth_tp_mdix_ctrl; 54 int8_t link_mode_masks_nwords; 55 uint32_t reserved[8]; 56 uint32_t link_mode_masks[]; 57 }; 58 59 #define ETHTOOL_GLINKSETTINGS 0x0000004c 60 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 61 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 62 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 63 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 64 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 65 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 66 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 67 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 68 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 69 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 70 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 71 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 72 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 73 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 74 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 75 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 76 #endif 77 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 78 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 79 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 80 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 81 #endif 82 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 83 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 84 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 85 #endif 86 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 87 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 88 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 89 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 90 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 91 #endif 92 93 /** 94 * Get interface name from private structure. 95 * 96 * @param[in] dev 97 * Pointer to Ethernet device. 98 * @param[out] ifname 99 * Interface name output buffer. 100 * 101 * @return 102 * 0 on success, a negative errno value otherwise and rte_errno is set. 103 */ 104 int 105 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 106 { 107 struct priv *priv = dev->data->dev_private; 108 DIR *dir; 109 struct dirent *dent; 110 unsigned int dev_type = 0; 111 unsigned int dev_port_prev = ~0u; 112 char match[IF_NAMESIZE] = ""; 113 114 { 115 MKSTR(path, "%s/device/net", priv->ibdev_path); 116 117 dir = opendir(path); 118 if (dir == NULL) { 119 rte_errno = errno; 120 return -rte_errno; 121 } 122 } 123 while ((dent = readdir(dir)) != NULL) { 124 char *name = dent->d_name; 125 FILE *file; 126 unsigned int dev_port; 127 int r; 128 129 if ((name[0] == '.') && 130 ((name[1] == '\0') || 131 ((name[1] == '.') && (name[2] == '\0')))) 132 continue; 133 134 MKSTR(path, "%s/device/net/%s/%s", 135 priv->ibdev_path, name, 136 (dev_type ? "dev_id" : "dev_port")); 137 138 file = fopen(path, "rb"); 139 if (file == NULL) { 140 if (errno != ENOENT) 141 continue; 142 /* 143 * Switch to dev_id when dev_port does not exist as 144 * is the case with Linux kernel versions < 3.15. 145 */ 146 try_dev_id: 147 match[0] = '\0'; 148 if (dev_type) 149 break; 150 dev_type = 1; 151 dev_port_prev = ~0u; 152 rewinddir(dir); 153 continue; 154 } 155 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 156 fclose(file); 157 if (r != 1) 158 continue; 159 /* 160 * Switch to dev_id when dev_port returns the same value for 161 * all ports. May happen when using a MOFED release older than 162 * 3.0 with a Linux kernel >= 3.15. 163 */ 164 if (dev_port == dev_port_prev) 165 goto try_dev_id; 166 dev_port_prev = dev_port; 167 if (dev_port == (priv->port - 1u)) 168 snprintf(match, sizeof(match), "%s", name); 169 } 170 closedir(dir); 171 if (match[0] == '\0') { 172 rte_errno = ENOENT; 173 return -rte_errno; 174 } 175 strncpy(*ifname, match, sizeof(*ifname)); 176 return 0; 177 } 178 179 /** 180 * Perform ifreq ioctl() on associated Ethernet device. 181 * 182 * @param[in] dev 183 * Pointer to Ethernet device. 184 * @param req 185 * Request number to pass to ioctl(). 186 * @param[out] ifr 187 * Interface request structure output buffer. 188 * 189 * @return 190 * 0 on success, a negative errno value otherwise and rte_errno is set. 191 */ 192 int 193 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 194 { 195 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 196 int ret = 0; 197 198 if (sock == -1) { 199 rte_errno = errno; 200 return -rte_errno; 201 } 202 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 203 if (ret) 204 goto error; 205 ret = ioctl(sock, req, ifr); 206 if (ret == -1) { 207 rte_errno = errno; 208 goto error; 209 } 210 close(sock); 211 return 0; 212 error: 213 close(sock); 214 return -rte_errno; 215 } 216 217 /** 218 * Get device MTU. 219 * 220 * @param dev 221 * Pointer to Ethernet device. 222 * @param[out] mtu 223 * MTU value output buffer. 224 * 225 * @return 226 * 0 on success, a negative errno value otherwise and rte_errno is set. 227 */ 228 int 229 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 230 { 231 struct ifreq request; 232 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 233 234 if (ret) 235 return ret; 236 *mtu = request.ifr_mtu; 237 return 0; 238 } 239 240 /** 241 * Set device MTU. 242 * 243 * @param dev 244 * Pointer to Ethernet device. 245 * @param mtu 246 * MTU value to set. 247 * 248 * @return 249 * 0 on success, a negative errno value otherwise and rte_errno is set. 250 */ 251 static int 252 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 253 { 254 struct ifreq request = { .ifr_mtu = mtu, }; 255 256 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 257 } 258 259 /** 260 * Set device flags. 261 * 262 * @param dev 263 * Pointer to Ethernet device. 264 * @param keep 265 * Bitmask for flags that must remain untouched. 266 * @param flags 267 * Bitmask for flags to modify. 268 * 269 * @return 270 * 0 on success, a negative errno value otherwise and rte_errno is set. 271 */ 272 int 273 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 274 { 275 struct ifreq request; 276 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 277 278 if (ret) 279 return ret; 280 request.ifr_flags &= keep; 281 request.ifr_flags |= flags & ~keep; 282 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 283 } 284 285 /** 286 * DPDK callback for Ethernet device configuration. 287 * 288 * @param dev 289 * Pointer to Ethernet device structure. 290 * 291 * @return 292 * 0 on success, a negative errno value otherwise and rte_errno is set. 293 */ 294 int 295 mlx5_dev_configure(struct rte_eth_dev *dev) 296 { 297 struct priv *priv = dev->data->dev_private; 298 unsigned int rxqs_n = dev->data->nb_rx_queues; 299 unsigned int txqs_n = dev->data->nb_tx_queues; 300 unsigned int i; 301 unsigned int j; 302 unsigned int reta_idx_n; 303 const uint8_t use_app_rss_key = 304 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 305 uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev); 306 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 307 uint64_t supp_rx_offloads = 308 (mlx5_get_rx_port_offloads() | 309 mlx5_get_rx_queue_offloads(dev)); 310 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; 311 int ret = 0; 312 313 if ((tx_offloads & supp_tx_offloads) != tx_offloads) { 314 DRV_LOG(ERR, 315 "port %u some Tx offloads are not supported requested" 316 " 0x%" PRIx64 " supported 0x%" PRIx64, 317 dev->data->port_id, tx_offloads, supp_tx_offloads); 318 rte_errno = ENOTSUP; 319 return -rte_errno; 320 } 321 if ((rx_offloads & supp_rx_offloads) != rx_offloads) { 322 DRV_LOG(ERR, 323 "port %u some Rx offloads are not supported requested" 324 " 0x%" PRIx64 " supported 0x%" PRIx64, 325 dev->data->port_id, rx_offloads, supp_rx_offloads); 326 rte_errno = ENOTSUP; 327 return -rte_errno; 328 } 329 if (use_app_rss_key && 330 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 331 rss_hash_default_key_len)) { 332 /* MLX5 RSS only support 40bytes key. */ 333 rte_errno = EINVAL; 334 return -rte_errno; 335 } 336 priv->rss_conf.rss_key = 337 rte_realloc(priv->rss_conf.rss_key, 338 rss_hash_default_key_len, 0); 339 if (!priv->rss_conf.rss_key) { 340 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 341 dev->data->port_id, rxqs_n); 342 rte_errno = ENOMEM; 343 return -rte_errno; 344 } 345 memcpy(priv->rss_conf.rss_key, 346 use_app_rss_key ? 347 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 348 rss_hash_default_key, 349 rss_hash_default_key_len); 350 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 351 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 352 priv->rxqs = (void *)dev->data->rx_queues; 353 priv->txqs = (void *)dev->data->tx_queues; 354 if (txqs_n != priv->txqs_n) { 355 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 356 dev->data->port_id, priv->txqs_n, txqs_n); 357 priv->txqs_n = txqs_n; 358 } 359 if (rxqs_n > priv->config.ind_table_max_size) { 360 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 361 dev->data->port_id, rxqs_n); 362 rte_errno = EINVAL; 363 return -rte_errno; 364 } 365 if (rxqs_n == priv->rxqs_n) 366 return 0; 367 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 368 dev->data->port_id, priv->rxqs_n, rxqs_n); 369 priv->rxqs_n = rxqs_n; 370 /* If the requested number of RX queues is not a power of two, use the 371 * maximum indirection table size for better balancing. 372 * The result is always rounded to the next power of two. */ 373 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 374 priv->config.ind_table_max_size : 375 rxqs_n)); 376 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 377 if (ret) 378 return ret; 379 /* When the number of RX queues is not a power of two, the remaining 380 * table entries are padded with reused WQs and hashes are not spread 381 * uniformly. */ 382 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 383 (*priv->reta_idx)[i] = j; 384 if (++j == rxqs_n) 385 j = 0; 386 } 387 return 0; 388 } 389 390 /** 391 * DPDK callback to get information about the device. 392 * 393 * @param dev 394 * Pointer to Ethernet device structure. 395 * @param[out] info 396 * Info structure output buffer. 397 */ 398 void 399 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 400 { 401 struct priv *priv = dev->data->dev_private; 402 struct mlx5_dev_config *config = &priv->config; 403 unsigned int max; 404 char ifname[IF_NAMESIZE]; 405 406 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 407 /* FIXME: we should ask the device for these values. */ 408 info->min_rx_bufsize = 32; 409 info->max_rx_pktlen = 65536; 410 /* 411 * Since we need one CQ per QP, the limit is the minimum number 412 * between the two values. 413 */ 414 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 415 priv->device_attr.orig_attr.max_qp); 416 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 417 if (max >= 65535) 418 max = 65535; 419 info->max_rx_queues = max; 420 info->max_tx_queues = max; 421 info->max_mac_addrs = RTE_DIM(priv->mac); 422 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 423 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 424 info->rx_queue_offload_capa); 425 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 426 if (mlx5_get_ifname(dev, &ifname) == 0) 427 info->if_index = if_nametoindex(ifname); 428 info->reta_size = priv->reta_idx_n ? 429 priv->reta_idx_n : config->ind_table_max_size; 430 info->hash_key_size = priv->rss_conf.rss_key_len; 431 info->speed_capa = priv->link_speed_capa; 432 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 433 } 434 435 /** 436 * Get supported packet types. 437 * 438 * @param dev 439 * Pointer to Ethernet device structure. 440 * 441 * @return 442 * A pointer to the supported Packet types array. 443 */ 444 const uint32_t * 445 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 446 { 447 static const uint32_t ptypes[] = { 448 /* refers to rxq_cq_to_pkt_type() */ 449 RTE_PTYPE_L2_ETHER, 450 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 451 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 452 RTE_PTYPE_L4_NONFRAG, 453 RTE_PTYPE_L4_FRAG, 454 RTE_PTYPE_L4_TCP, 455 RTE_PTYPE_L4_UDP, 456 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 457 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 458 RTE_PTYPE_INNER_L4_NONFRAG, 459 RTE_PTYPE_INNER_L4_FRAG, 460 RTE_PTYPE_INNER_L4_TCP, 461 RTE_PTYPE_INNER_L4_UDP, 462 RTE_PTYPE_UNKNOWN 463 }; 464 465 if (dev->rx_pkt_burst == mlx5_rx_burst || 466 dev->rx_pkt_burst == mlx5_rx_burst_vec) 467 return ptypes; 468 return NULL; 469 } 470 471 /** 472 * DPDK callback to retrieve physical link information. 473 * 474 * @param dev 475 * Pointer to Ethernet device structure. 476 * 477 * @return 478 * 0 on success, a negative errno value otherwise and rte_errno is set. 479 */ 480 static int 481 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev) 482 { 483 struct priv *priv = dev->data->dev_private; 484 struct ethtool_cmd edata = { 485 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 486 }; 487 struct ifreq ifr; 488 struct rte_eth_link dev_link; 489 int link_speed = 0; 490 int ret; 491 492 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 493 if (ret) { 494 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 495 dev->data->port_id, strerror(rte_errno)); 496 return ret; 497 } 498 memset(&dev_link, 0, sizeof(dev_link)); 499 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 500 (ifr.ifr_flags & IFF_RUNNING)); 501 ifr.ifr_data = (void *)&edata; 502 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 503 if (ret) { 504 DRV_LOG(WARNING, 505 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 506 dev->data->port_id, strerror(rte_errno)); 507 return ret; 508 } 509 link_speed = ethtool_cmd_speed(&edata); 510 if (link_speed == -1) 511 dev_link.link_speed = 0; 512 else 513 dev_link.link_speed = link_speed; 514 priv->link_speed_capa = 0; 515 if (edata.supported & SUPPORTED_Autoneg) 516 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 517 if (edata.supported & (SUPPORTED_1000baseT_Full | 518 SUPPORTED_1000baseKX_Full)) 519 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 520 if (edata.supported & SUPPORTED_10000baseKR_Full) 521 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 522 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 523 SUPPORTED_40000baseCR4_Full | 524 SUPPORTED_40000baseSR4_Full | 525 SUPPORTED_40000baseLR4_Full)) 526 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 527 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 528 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 529 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 530 ETH_LINK_SPEED_FIXED); 531 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 532 /* Link status changed. */ 533 dev->data->dev_link = dev_link; 534 return 0; 535 } 536 /* Link status is still the same. */ 537 rte_errno = EAGAIN; 538 return -rte_errno; 539 } 540 541 /** 542 * Retrieve physical link information (unlocked version using new ioctl). 543 * 544 * @param dev 545 * Pointer to Ethernet device structure. 546 * 547 * @return 548 * 0 on success, a negative errno value otherwise and rte_errno is set. 549 */ 550 static int 551 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev) 552 { 553 struct priv *priv = dev->data->dev_private; 554 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 555 struct ifreq ifr; 556 struct rte_eth_link dev_link; 557 uint64_t sc; 558 int ret; 559 560 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 561 if (ret) { 562 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 563 dev->data->port_id, strerror(rte_errno)); 564 return ret; 565 } 566 memset(&dev_link, 0, sizeof(dev_link)); 567 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 568 (ifr.ifr_flags & IFF_RUNNING)); 569 ifr.ifr_data = (void *)&gcmd; 570 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 571 if (ret) { 572 DRV_LOG(DEBUG, 573 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 574 " failed: %s", 575 dev->data->port_id, strerror(rte_errno)); 576 return ret; 577 } 578 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 579 580 alignas(struct ethtool_link_settings) 581 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 582 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 583 struct ethtool_link_settings *ecmd = (void *)data; 584 585 *ecmd = gcmd; 586 ifr.ifr_data = (void *)ecmd; 587 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 588 if (ret) { 589 DRV_LOG(DEBUG, 590 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" 591 " failed: %s", 592 dev->data->port_id, strerror(rte_errno)); 593 return ret; 594 } 595 dev_link.link_speed = ecmd->speed; 596 sc = ecmd->link_mode_masks[0] | 597 ((uint64_t)ecmd->link_mode_masks[1] << 32); 598 priv->link_speed_capa = 0; 599 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 600 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 601 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 602 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 603 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 604 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 605 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 606 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 607 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 608 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 609 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 610 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 611 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 612 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 613 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 614 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 615 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 616 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 617 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 618 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 619 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 620 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 621 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 622 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 623 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 624 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 625 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 626 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 627 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 628 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 629 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 630 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 631 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 632 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 633 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 634 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 635 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 636 ETH_LINK_SPEED_FIXED); 637 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 638 /* Link status changed. */ 639 dev->data->dev_link = dev_link; 640 return 0; 641 } 642 /* Link status is still the same. */ 643 rte_errno = EAGAIN; 644 return -rte_errno; 645 } 646 647 /** 648 * DPDK callback to retrieve physical link information. 649 * 650 * @param dev 651 * Pointer to Ethernet device structure. 652 * @param wait_to_complete 653 * Wait for request completion (ignored). 654 * 655 * @return 656 * 0 on success, a negative errno value otherwise and rte_errno is set. 657 */ 658 int 659 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused) 660 { 661 int ret; 662 663 ret = mlx5_link_update_unlocked_gset(dev); 664 if (ret) 665 ret = mlx5_link_update_unlocked_gs(dev); 666 return 0; 667 } 668 669 /** 670 * DPDK callback to change the MTU. 671 * 672 * @param dev 673 * Pointer to Ethernet device structure. 674 * @param in_mtu 675 * New MTU. 676 * 677 * @return 678 * 0 on success, a negative errno value otherwise and rte_errno is set. 679 */ 680 int 681 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 682 { 683 struct priv *priv = dev->data->dev_private; 684 uint16_t kern_mtu = 0; 685 int ret; 686 687 ret = mlx5_get_mtu(dev, &kern_mtu); 688 if (ret) 689 return ret; 690 /* Set kernel interface MTU first. */ 691 ret = mlx5_set_mtu(dev, mtu); 692 if (ret) 693 return ret; 694 ret = mlx5_get_mtu(dev, &kern_mtu); 695 if (ret) 696 return ret; 697 if (kern_mtu == mtu) { 698 priv->mtu = mtu; 699 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 700 dev->data->port_id, mtu); 701 return 0; 702 } 703 rte_errno = EAGAIN; 704 return -rte_errno; 705 } 706 707 /** 708 * DPDK callback to get flow control status. 709 * 710 * @param dev 711 * Pointer to Ethernet device structure. 712 * @param[out] fc_conf 713 * Flow control output buffer. 714 * 715 * @return 716 * 0 on success, a negative errno value otherwise and rte_errno is set. 717 */ 718 int 719 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 720 { 721 struct ifreq ifr; 722 struct ethtool_pauseparam ethpause = { 723 .cmd = ETHTOOL_GPAUSEPARAM 724 }; 725 int ret; 726 727 ifr.ifr_data = (void *)ðpause; 728 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 729 if (ret) { 730 DRV_LOG(WARNING, 731 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 732 " %s", 733 dev->data->port_id, strerror(rte_errno)); 734 return ret; 735 } 736 fc_conf->autoneg = ethpause.autoneg; 737 if (ethpause.rx_pause && ethpause.tx_pause) 738 fc_conf->mode = RTE_FC_FULL; 739 else if (ethpause.rx_pause) 740 fc_conf->mode = RTE_FC_RX_PAUSE; 741 else if (ethpause.tx_pause) 742 fc_conf->mode = RTE_FC_TX_PAUSE; 743 else 744 fc_conf->mode = RTE_FC_NONE; 745 return 0; 746 } 747 748 /** 749 * DPDK callback to modify flow control parameters. 750 * 751 * @param dev 752 * Pointer to Ethernet device structure. 753 * @param[in] fc_conf 754 * Flow control parameters. 755 * 756 * @return 757 * 0 on success, a negative errno value otherwise and rte_errno is set. 758 */ 759 int 760 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 761 { 762 struct ifreq ifr; 763 struct ethtool_pauseparam ethpause = { 764 .cmd = ETHTOOL_SPAUSEPARAM 765 }; 766 int ret; 767 768 ifr.ifr_data = (void *)ðpause; 769 ethpause.autoneg = fc_conf->autoneg; 770 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 771 (fc_conf->mode & RTE_FC_RX_PAUSE)) 772 ethpause.rx_pause = 1; 773 else 774 ethpause.rx_pause = 0; 775 776 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 777 (fc_conf->mode & RTE_FC_TX_PAUSE)) 778 ethpause.tx_pause = 1; 779 else 780 ethpause.tx_pause = 0; 781 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 782 if (ret) { 783 DRV_LOG(WARNING, 784 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 785 " failed: %s", 786 dev->data->port_id, strerror(rte_errno)); 787 return ret; 788 } 789 return 0; 790 } 791 792 /** 793 * Get PCI information from struct ibv_device. 794 * 795 * @param device 796 * Pointer to Ethernet device structure. 797 * @param[out] pci_addr 798 * PCI bus address output buffer. 799 * 800 * @return 801 * 0 on success, a negative errno value otherwise and rte_errno is set. 802 */ 803 int 804 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 805 struct rte_pci_addr *pci_addr) 806 { 807 FILE *file; 808 char line[32]; 809 MKSTR(path, "%s/device/uevent", device->ibdev_path); 810 811 file = fopen(path, "rb"); 812 if (file == NULL) { 813 rte_errno = errno; 814 return -rte_errno; 815 } 816 while (fgets(line, sizeof(line), file) == line) { 817 size_t len = strlen(line); 818 int ret; 819 820 /* Truncate long lines. */ 821 if (len == (sizeof(line) - 1)) 822 while (line[(len - 1)] != '\n') { 823 ret = fgetc(file); 824 if (ret == EOF) 825 break; 826 line[(len - 1)] = ret; 827 } 828 /* Extract information. */ 829 if (sscanf(line, 830 "PCI_SLOT_NAME=" 831 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 832 &pci_addr->domain, 833 &pci_addr->bus, 834 &pci_addr->devid, 835 &pci_addr->function) == 4) { 836 ret = 0; 837 break; 838 } 839 } 840 fclose(file); 841 return 0; 842 } 843 844 /** 845 * Update the link status. 846 * 847 * @param dev 848 * Pointer to Ethernet device. 849 * 850 * @return 851 * Zero if the callback process can be called immediately, negative errno 852 * value otherwise and rte_errno is set. 853 */ 854 static int 855 mlx5_link_status_update(struct rte_eth_dev *dev) 856 { 857 struct priv *priv = dev->data->dev_private; 858 struct rte_eth_link *link = &dev->data->dev_link; 859 int ret; 860 861 ret = mlx5_link_update(dev, 0); 862 if (ret) 863 return ret; 864 if (((link->link_speed == 0) && link->link_status) || 865 ((link->link_speed != 0) && !link->link_status)) { 866 /* 867 * Inconsistent status. Event likely occurred before the 868 * kernel netdevice exposes the new status. 869 */ 870 if (!priv->pending_alarm) { 871 priv->pending_alarm = 1; 872 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 873 mlx5_dev_link_status_handler, 874 priv->dev); 875 } 876 return 1; 877 } else if (unlikely(priv->pending_alarm)) { 878 /* Link interrupt occurred while alarm is already scheduled. */ 879 priv->pending_alarm = 0; 880 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev); 881 } 882 return 0; 883 } 884 885 /** 886 * Device status handler. 887 * 888 * @param dev 889 * Pointer to Ethernet device. 890 * @param events 891 * Pointer to event flags holder. 892 * 893 * @return 894 * Events bitmap of callback process which can be called immediately. 895 */ 896 static uint32_t 897 mlx5_dev_status_handler(struct rte_eth_dev *dev) 898 { 899 struct priv *priv = dev->data->dev_private; 900 struct ibv_async_event event; 901 uint32_t ret = 0; 902 903 /* Read all message and acknowledge them. */ 904 for (;;) { 905 if (mlx5_glue->get_async_event(priv->ctx, &event)) 906 break; 907 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 908 event.event_type == IBV_EVENT_PORT_ERR) && 909 (dev->data->dev_conf.intr_conf.lsc == 1)) 910 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 911 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 912 dev->data->dev_conf.intr_conf.rmv == 1) 913 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 914 else 915 DRV_LOG(DEBUG, 916 "port %u event type %d on not handled", 917 dev->data->port_id, event.event_type); 918 mlx5_glue->ack_async_event(&event); 919 } 920 if (ret & (1 << RTE_ETH_EVENT_INTR_LSC)) 921 if (mlx5_link_status_update(dev)) 922 ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC); 923 return ret; 924 } 925 926 /** 927 * Handle delayed link status event. 928 * 929 * @param arg 930 * Registered argument. 931 */ 932 void 933 mlx5_dev_link_status_handler(void *arg) 934 { 935 struct rte_eth_dev *dev = arg; 936 struct priv *priv = dev->data->dev_private; 937 int ret; 938 939 priv->pending_alarm = 0; 940 ret = mlx5_link_status_update(dev); 941 if (!ret) 942 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 943 } 944 945 /** 946 * Handle interrupts from the NIC. 947 * 948 * @param[in] intr_handle 949 * Interrupt handler. 950 * @param cb_arg 951 * Callback argument. 952 */ 953 void 954 mlx5_dev_interrupt_handler(void *cb_arg) 955 { 956 struct rte_eth_dev *dev = cb_arg; 957 uint32_t events; 958 959 events = mlx5_dev_status_handler(dev); 960 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 961 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 962 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 963 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 964 } 965 966 /** 967 * Handle interrupts from the socket. 968 * 969 * @param cb_arg 970 * Callback argument. 971 */ 972 static void 973 mlx5_dev_handler_socket(void *cb_arg) 974 { 975 struct rte_eth_dev *dev = cb_arg; 976 977 mlx5_socket_handle(dev); 978 } 979 980 /** 981 * Uninstall interrupt handler. 982 * 983 * @param dev 984 * Pointer to Ethernet device. 985 */ 986 void 987 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 988 { 989 struct priv *priv = dev->data->dev_private; 990 991 if (dev->data->dev_conf.intr_conf.lsc || 992 dev->data->dev_conf.intr_conf.rmv) 993 rte_intr_callback_unregister(&priv->intr_handle, 994 mlx5_dev_interrupt_handler, dev); 995 if (priv->primary_socket) 996 rte_intr_callback_unregister(&priv->intr_handle_socket, 997 mlx5_dev_handler_socket, dev); 998 if (priv->pending_alarm) { 999 priv->pending_alarm = 0; 1000 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 1001 } 1002 priv->intr_handle.fd = 0; 1003 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1004 priv->intr_handle_socket.fd = 0; 1005 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1006 } 1007 1008 /** 1009 * Install interrupt handler. 1010 * 1011 * @param dev 1012 * Pointer to Ethernet device. 1013 */ 1014 void 1015 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1016 { 1017 struct priv *priv = dev->data->dev_private; 1018 int ret; 1019 int flags; 1020 1021 assert(priv->ctx->async_fd > 0); 1022 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1023 ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1024 if (ret) { 1025 DRV_LOG(INFO, 1026 "port %u failed to change file descriptor async event" 1027 " queue", 1028 dev->data->port_id); 1029 dev->data->dev_conf.intr_conf.lsc = 0; 1030 dev->data->dev_conf.intr_conf.rmv = 0; 1031 } 1032 if (dev->data->dev_conf.intr_conf.lsc || 1033 dev->data->dev_conf.intr_conf.rmv) { 1034 priv->intr_handle.fd = priv->ctx->async_fd; 1035 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1036 rte_intr_callback_register(&priv->intr_handle, 1037 mlx5_dev_interrupt_handler, dev); 1038 } 1039 ret = mlx5_socket_init(dev); 1040 if (ret) 1041 DRV_LOG(ERR, "port %u cannot initialise socket: %s", 1042 dev->data->port_id, strerror(rte_errno)); 1043 else if (priv->primary_socket) { 1044 priv->intr_handle_socket.fd = priv->primary_socket; 1045 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1046 rte_intr_callback_register(&priv->intr_handle_socket, 1047 mlx5_dev_handler_socket, dev); 1048 } 1049 } 1050 1051 /** 1052 * DPDK callback to bring the link DOWN. 1053 * 1054 * @param dev 1055 * Pointer to Ethernet device structure. 1056 * 1057 * @return 1058 * 0 on success, a negative errno value otherwise and rte_errno is set. 1059 */ 1060 int 1061 mlx5_set_link_down(struct rte_eth_dev *dev) 1062 { 1063 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1064 } 1065 1066 /** 1067 * DPDK callback to bring the link UP. 1068 * 1069 * @param dev 1070 * Pointer to Ethernet device structure. 1071 * 1072 * @return 1073 * 0 on success, a negative errno value otherwise and rte_errno is set. 1074 */ 1075 int 1076 mlx5_set_link_up(struct rte_eth_dev *dev) 1077 { 1078 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1079 } 1080 1081 /** 1082 * Configure the TX function to use. 1083 * 1084 * @param dev 1085 * Pointer to private data structure. 1086 * 1087 * @return 1088 * Pointer to selected Tx burst function. 1089 */ 1090 eth_tx_burst_t 1091 mlx5_select_tx_function(struct rte_eth_dev *dev) 1092 { 1093 struct priv *priv = dev->data->dev_private; 1094 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1095 struct mlx5_dev_config *config = &priv->config; 1096 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1097 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1098 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1099 DEV_TX_OFFLOAD_GRE_TNL_TSO)); 1100 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1101 1102 assert(priv != NULL); 1103 /* Select appropriate TX function. */ 1104 if (vlan_insert || tso) 1105 return tx_pkt_burst; 1106 if (config->mps == MLX5_MPW_ENHANCED) { 1107 if (mlx5_check_vec_tx_support(dev) > 0) { 1108 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1109 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1110 else 1111 tx_pkt_burst = mlx5_tx_burst_vec; 1112 DRV_LOG(DEBUG, 1113 "port %u selected enhanced MPW Tx vectorized" 1114 " function", 1115 dev->data->port_id); 1116 } else { 1117 tx_pkt_burst = mlx5_tx_burst_empw; 1118 DRV_LOG(DEBUG, 1119 "port %u selected enhanced MPW Tx function", 1120 dev->data->port_id); 1121 } 1122 } else if (config->mps && (config->txq_inline > 0)) { 1123 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1124 DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", 1125 dev->data->port_id); 1126 } else if (config->mps) { 1127 tx_pkt_burst = mlx5_tx_burst_mpw; 1128 DRV_LOG(DEBUG, "port %u selected MPW Tx function", 1129 dev->data->port_id); 1130 } 1131 return tx_pkt_burst; 1132 } 1133 1134 /** 1135 * Configure the RX function to use. 1136 * 1137 * @param dev 1138 * Pointer to private data structure. 1139 * 1140 * @return 1141 * Pointer to selected Rx burst function. 1142 */ 1143 eth_rx_burst_t 1144 mlx5_select_rx_function(struct rte_eth_dev *dev) 1145 { 1146 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1147 1148 assert(dev != NULL); 1149 if (mlx5_check_vec_rx_support(dev) > 0) { 1150 rx_pkt_burst = mlx5_rx_burst_vec; 1151 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 1152 dev->data->port_id); 1153 } 1154 return rx_pkt_burst; 1155 } 1156 1157 /** 1158 * Check if mlx5 device was removed. 1159 * 1160 * @param dev 1161 * Pointer to Ethernet device structure. 1162 * 1163 * @return 1164 * 1 when device is removed, otherwise 0. 1165 */ 1166 int 1167 mlx5_is_removed(struct rte_eth_dev *dev) 1168 { 1169 struct ibv_device_attr device_attr; 1170 struct priv *priv = dev->data->dev_private; 1171 1172 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1173 return 1; 1174 return 0; 1175 } 1176