1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox. 4 */ 5 6 #define _GNU_SOURCE 7 8 #include <stddef.h> 9 #include <assert.h> 10 #include <inttypes.h> 11 #include <unistd.h> 12 #include <stdint.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <stdlib.h> 16 #include <errno.h> 17 #include <dirent.h> 18 #include <net/if.h> 19 #include <sys/ioctl.h> 20 #include <sys/socket.h> 21 #include <sys/utsname.h> 22 #include <netinet/in.h> 23 #include <linux/ethtool.h> 24 #include <linux/sockios.h> 25 #include <linux/version.h> 26 #include <fcntl.h> 27 #include <stdalign.h> 28 #include <sys/un.h> 29 30 #include <rte_atomic.h> 31 #include <rte_ethdev_driver.h> 32 #include <rte_bus_pci.h> 33 #include <rte_mbuf.h> 34 #include <rte_common.h> 35 #include <rte_interrupts.h> 36 #include <rte_alarm.h> 37 #include <rte_malloc.h> 38 39 #include "mlx5.h" 40 #include "mlx5_glue.h" 41 #include "mlx5_rxtx.h" 42 #include "mlx5_utils.h" 43 44 /* Add defines in case the running kernel is not the same as user headers. */ 45 #ifndef ETHTOOL_GLINKSETTINGS 46 struct ethtool_link_settings { 47 uint32_t cmd; 48 uint32_t speed; 49 uint8_t duplex; 50 uint8_t port; 51 uint8_t phy_address; 52 uint8_t autoneg; 53 uint8_t mdio_support; 54 uint8_t eth_to_mdix; 55 uint8_t eth_tp_mdix_ctrl; 56 int8_t link_mode_masks_nwords; 57 uint32_t reserved[8]; 58 uint32_t link_mode_masks[]; 59 }; 60 61 #define ETHTOOL_GLINKSETTINGS 0x0000004c 62 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 63 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 64 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 65 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 66 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 67 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 68 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 69 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 70 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 71 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 72 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 73 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 74 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 75 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 76 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 77 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 78 #endif 79 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 80 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 81 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 82 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 83 #endif 84 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 85 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 86 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 87 #endif 88 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 89 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 90 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 91 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 92 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 93 #endif 94 95 /** 96 * Get interface name from private structure. 97 * 98 * @param[in] dev 99 * Pointer to Ethernet device. 100 * @param[out] ifname 101 * Interface name output buffer. 102 * 103 * @return 104 * 0 on success, -1 on failure and errno is set. 105 */ 106 int 107 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 108 { 109 struct priv *priv = dev->data->dev_private; 110 DIR *dir; 111 struct dirent *dent; 112 unsigned int dev_type = 0; 113 unsigned int dev_port_prev = ~0u; 114 char match[IF_NAMESIZE] = ""; 115 116 { 117 MKSTR(path, "%s/device/net", priv->ibdev_path); 118 119 dir = opendir(path); 120 if (dir == NULL) 121 return -1; 122 } 123 while ((dent = readdir(dir)) != NULL) { 124 char *name = dent->d_name; 125 FILE *file; 126 unsigned int dev_port; 127 int r; 128 129 if ((name[0] == '.') && 130 ((name[1] == '\0') || 131 ((name[1] == '.') && (name[2] == '\0')))) 132 continue; 133 134 MKSTR(path, "%s/device/net/%s/%s", 135 priv->ibdev_path, name, 136 (dev_type ? "dev_id" : "dev_port")); 137 138 file = fopen(path, "rb"); 139 if (file == NULL) { 140 if (errno != ENOENT) 141 continue; 142 /* 143 * Switch to dev_id when dev_port does not exist as 144 * is the case with Linux kernel versions < 3.15. 145 */ 146 try_dev_id: 147 match[0] = '\0'; 148 if (dev_type) 149 break; 150 dev_type = 1; 151 dev_port_prev = ~0u; 152 rewinddir(dir); 153 continue; 154 } 155 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 156 fclose(file); 157 if (r != 1) 158 continue; 159 /* 160 * Switch to dev_id when dev_port returns the same value for 161 * all ports. May happen when using a MOFED release older than 162 * 3.0 with a Linux kernel >= 3.15. 163 */ 164 if (dev_port == dev_port_prev) 165 goto try_dev_id; 166 dev_port_prev = dev_port; 167 if (dev_port == (priv->port - 1u)) 168 snprintf(match, sizeof(match), "%s", name); 169 } 170 closedir(dir); 171 if (match[0] == '\0') 172 return -1; 173 strncpy(*ifname, match, sizeof(*ifname)); 174 return 0; 175 } 176 177 /** 178 * Perform ifreq ioctl() on associated Ethernet device. 179 * 180 * @param[in] dev 181 * Pointer to Ethernet device. 182 * @param req 183 * Request number to pass to ioctl(). 184 * @param[out] ifr 185 * Interface request structure output buffer. 186 * 187 * @return 188 * 0 on success, -1 on failure and errno is set. 189 */ 190 int 191 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 192 { 193 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 194 int ret = -1; 195 196 if (sock == -1) 197 return ret; 198 if (mlx5_get_ifname(dev, &ifr->ifr_name) == 0) 199 ret = ioctl(sock, req, ifr); 200 close(sock); 201 return ret; 202 } 203 204 /** 205 * Get device MTU. 206 * 207 * @param dev 208 * Pointer to Ethernet device. 209 * @param[out] mtu 210 * MTU value output buffer. 211 * 212 * @return 213 * 0 on success, -1 on failure and errno is set. 214 */ 215 int 216 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 217 { 218 struct ifreq request; 219 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 220 221 if (ret) 222 return ret; 223 *mtu = request.ifr_mtu; 224 return 0; 225 } 226 227 /** 228 * Set device MTU. 229 * 230 * @param dev 231 * Pointer to Ethernet device. 232 * @param mtu 233 * MTU value to set. 234 * 235 * @return 236 * 0 on success, -1 on failure and errno is set. 237 */ 238 static int 239 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 240 { 241 struct ifreq request = { .ifr_mtu = mtu, }; 242 243 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 244 } 245 246 /** 247 * Set device flags. 248 * 249 * @param dev 250 * Pointer to Ethernet device. 251 * @param keep 252 * Bitmask for flags that must remain untouched. 253 * @param flags 254 * Bitmask for flags to modify. 255 * 256 * @return 257 * 0 on success, -1 on failure and errno is set. 258 */ 259 int 260 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 261 { 262 struct ifreq request; 263 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 264 265 if (ret) 266 return ret; 267 request.ifr_flags &= keep; 268 request.ifr_flags |= flags & ~keep; 269 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 270 } 271 272 /** 273 * DPDK callback for Ethernet device configuration. 274 * 275 * @param dev 276 * Pointer to Ethernet device structure. 277 * 278 * @return 279 * 0 on success, negative errno value on failure. 280 */ 281 int 282 mlx5_dev_configure(struct rte_eth_dev *dev) 283 { 284 struct priv *priv = dev->data->dev_private; 285 unsigned int rxqs_n = dev->data->nb_rx_queues; 286 unsigned int txqs_n = dev->data->nb_tx_queues; 287 unsigned int i; 288 unsigned int j; 289 unsigned int reta_idx_n; 290 const uint8_t use_app_rss_key = 291 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 292 uint64_t supp_tx_offloads = mlx5_get_tx_port_offloads(dev); 293 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 294 uint64_t supp_rx_offloads = 295 (mlx5_get_rx_port_offloads() | 296 mlx5_get_rx_queue_offloads(dev)); 297 uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; 298 299 if ((tx_offloads & supp_tx_offloads) != tx_offloads) { 300 ERROR("Some Tx offloads are not supported " 301 "requested 0x%" PRIx64 " supported 0x%" PRIx64, 302 tx_offloads, supp_tx_offloads); 303 return ENOTSUP; 304 } 305 if ((rx_offloads & supp_rx_offloads) != rx_offloads) { 306 ERROR("Some Rx offloads are not supported " 307 "requested 0x%" PRIx64 " supported 0x%" PRIx64, 308 rx_offloads, supp_rx_offloads); 309 return ENOTSUP; 310 } 311 if (use_app_rss_key && 312 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 313 rss_hash_default_key_len)) { 314 /* MLX5 RSS only support 40bytes key. */ 315 return EINVAL; 316 } 317 priv->rss_conf.rss_key = 318 rte_realloc(priv->rss_conf.rss_key, 319 rss_hash_default_key_len, 0); 320 if (!priv->rss_conf.rss_key) { 321 ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n); 322 return ENOMEM; 323 } 324 memcpy(priv->rss_conf.rss_key, 325 use_app_rss_key ? 326 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 327 rss_hash_default_key, 328 rss_hash_default_key_len); 329 priv->rss_conf.rss_key_len = rss_hash_default_key_len; 330 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 331 priv->rxqs = (void *)dev->data->rx_queues; 332 priv->txqs = (void *)dev->data->tx_queues; 333 if (txqs_n != priv->txqs_n) { 334 INFO("%p: TX queues number update: %u -> %u", 335 (void *)dev, priv->txqs_n, txqs_n); 336 priv->txqs_n = txqs_n; 337 } 338 if (rxqs_n > priv->config.ind_table_max_size) { 339 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 340 return EINVAL; 341 } 342 if (rxqs_n == priv->rxqs_n) 343 return 0; 344 INFO("%p: RX queues number update: %u -> %u", 345 (void *)dev, priv->rxqs_n, rxqs_n); 346 priv->rxqs_n = rxqs_n; 347 /* If the requested number of RX queues is not a power of two, use the 348 * maximum indirection table size for better balancing. 349 * The result is always rounded to the next power of two. */ 350 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 351 priv->config.ind_table_max_size : 352 rxqs_n)); 353 if (mlx5_rss_reta_index_resize(dev, reta_idx_n)) 354 return ENOMEM; 355 /* When the number of RX queues is not a power of two, the remaining 356 * table entries are padded with reused WQs and hashes are not spread 357 * uniformly. */ 358 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 359 (*priv->reta_idx)[i] = j; 360 if (++j == rxqs_n) 361 j = 0; 362 } 363 return 0; 364 365 } 366 367 /** 368 * DPDK callback to get information about the device. 369 * 370 * @param dev 371 * Pointer to Ethernet device structure. 372 * @param[out] info 373 * Info structure output buffer. 374 */ 375 void 376 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 377 { 378 struct priv *priv = dev->data->dev_private; 379 struct mlx5_dev_config *config = &priv->config; 380 unsigned int max; 381 char ifname[IF_NAMESIZE]; 382 383 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 384 /* FIXME: we should ask the device for these values. */ 385 info->min_rx_bufsize = 32; 386 info->max_rx_pktlen = 65536; 387 /* 388 * Since we need one CQ per QP, the limit is the minimum number 389 * between the two values. 390 */ 391 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 392 priv->device_attr.orig_attr.max_qp); 393 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 394 if (max >= 65535) 395 max = 65535; 396 info->max_rx_queues = max; 397 info->max_tx_queues = max; 398 info->max_mac_addrs = RTE_DIM(priv->mac); 399 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 400 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 401 info->rx_queue_offload_capa); 402 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 403 if (mlx5_get_ifname(dev, &ifname) == 0) 404 info->if_index = if_nametoindex(ifname); 405 info->reta_size = priv->reta_idx_n ? 406 priv->reta_idx_n : config->ind_table_max_size; 407 info->hash_key_size = priv->rss_conf.rss_key_len; 408 info->speed_capa = priv->link_speed_capa; 409 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 410 } 411 412 /** 413 * Get supported packet types. 414 * 415 * @param dev 416 * Pointer to Ethernet device structure. 417 * 418 * @return 419 * A pointer to the supported Packet types array. 420 */ 421 const uint32_t * 422 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 423 { 424 static const uint32_t ptypes[] = { 425 /* refers to rxq_cq_to_pkt_type() */ 426 RTE_PTYPE_L2_ETHER, 427 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 428 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 429 RTE_PTYPE_L4_NONFRAG, 430 RTE_PTYPE_L4_FRAG, 431 RTE_PTYPE_L4_TCP, 432 RTE_PTYPE_L4_UDP, 433 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 434 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 435 RTE_PTYPE_INNER_L4_NONFRAG, 436 RTE_PTYPE_INNER_L4_FRAG, 437 RTE_PTYPE_INNER_L4_TCP, 438 RTE_PTYPE_INNER_L4_UDP, 439 RTE_PTYPE_UNKNOWN 440 }; 441 442 if (dev->rx_pkt_burst == mlx5_rx_burst || 443 dev->rx_pkt_burst == mlx5_rx_burst_vec) 444 return ptypes; 445 return NULL; 446 } 447 448 /** 449 * DPDK callback to retrieve physical link information. 450 * 451 * @param dev 452 * Pointer to Ethernet device structure. 453 * 454 * @return 455 * 0 on success, -1 on error. 456 */ 457 static int 458 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev) 459 { 460 struct priv *priv = dev->data->dev_private; 461 struct ethtool_cmd edata = { 462 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 463 }; 464 struct ifreq ifr; 465 struct rte_eth_link dev_link; 466 int link_speed = 0; 467 468 if (mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr)) { 469 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 470 return -1; 471 } 472 memset(&dev_link, 0, sizeof(dev_link)); 473 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 474 (ifr.ifr_flags & IFF_RUNNING)); 475 ifr.ifr_data = (void *)&edata; 476 if (mlx5_ifreq(dev, SIOCETHTOOL, &ifr)) { 477 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 478 strerror(errno)); 479 return -1; 480 } 481 link_speed = ethtool_cmd_speed(&edata); 482 if (link_speed == -1) 483 dev_link.link_speed = 0; 484 else 485 dev_link.link_speed = link_speed; 486 priv->link_speed_capa = 0; 487 if (edata.supported & SUPPORTED_Autoneg) 488 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 489 if (edata.supported & (SUPPORTED_1000baseT_Full | 490 SUPPORTED_1000baseKX_Full)) 491 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 492 if (edata.supported & SUPPORTED_10000baseKR_Full) 493 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 494 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 495 SUPPORTED_40000baseCR4_Full | 496 SUPPORTED_40000baseSR4_Full | 497 SUPPORTED_40000baseLR4_Full)) 498 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 499 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 500 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 501 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 502 ETH_LINK_SPEED_FIXED); 503 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 504 /* Link status changed. */ 505 dev->data->dev_link = dev_link; 506 return 0; 507 } 508 /* Link status is still the same. */ 509 return -1; 510 } 511 512 /** 513 * Retrieve physical link information (unlocked version using new ioctl). 514 * 515 * @param dev 516 * Pointer to Ethernet device structure. 517 * 518 * @return 519 * 0 on success, -1 on error. 520 */ 521 static int 522 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev) 523 { 524 struct priv *priv = dev->data->dev_private; 525 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 526 struct ifreq ifr; 527 struct rte_eth_link dev_link; 528 uint64_t sc; 529 530 if (mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr)) { 531 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 532 return -1; 533 } 534 memset(&dev_link, 0, sizeof(dev_link)); 535 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 536 (ifr.ifr_flags & IFF_RUNNING)); 537 ifr.ifr_data = (void *)&gcmd; 538 if (mlx5_ifreq(dev, SIOCETHTOOL, &ifr)) { 539 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 540 strerror(errno)); 541 return -1; 542 } 543 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 544 545 alignas(struct ethtool_link_settings) 546 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 547 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 548 struct ethtool_link_settings *ecmd = (void *)data; 549 550 *ecmd = gcmd; 551 ifr.ifr_data = (void *)ecmd; 552 if (mlx5_ifreq(dev, SIOCETHTOOL, &ifr)) { 553 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 554 strerror(errno)); 555 return -1; 556 } 557 dev_link.link_speed = ecmd->speed; 558 sc = ecmd->link_mode_masks[0] | 559 ((uint64_t)ecmd->link_mode_masks[1] << 32); 560 priv->link_speed_capa = 0; 561 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 562 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 563 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 564 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 565 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 566 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 567 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 568 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 569 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 570 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 571 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 572 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 573 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 574 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 575 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 576 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 577 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 578 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 579 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 580 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 581 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 582 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 583 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 584 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 585 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 586 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 587 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 588 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 589 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 590 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 591 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 592 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 593 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 594 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 595 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 596 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 597 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 598 ETH_LINK_SPEED_FIXED); 599 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 600 /* Link status changed. */ 601 dev->data->dev_link = dev_link; 602 return 0; 603 } 604 /* Link status is still the same. */ 605 return -1; 606 } 607 608 /** 609 * Enable receiving and transmitting traffic. 610 * 611 * @param dev 612 * Pointer to Ethernet device. 613 */ 614 static void 615 mlx5_link_start(struct rte_eth_dev *dev) 616 { 617 struct priv *priv = dev->data->dev_private; 618 int err; 619 620 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 621 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 622 err = mlx5_traffic_enable(dev); 623 if (err) 624 ERROR("%p: error occurred while configuring control flows: %s", 625 (void *)dev, strerror(err)); 626 err = mlx5_flow_start(dev, &priv->flows); 627 if (err) 628 ERROR("%p: error occurred while configuring flows: %s", 629 (void *)dev, strerror(err)); 630 } 631 632 /** 633 * Disable receiving and transmitting traffic. 634 * 635 * @param dev 636 * Pointer to Ethernet device. 637 */ 638 static void 639 mlx5_link_stop(struct rte_eth_dev *dev) 640 { 641 struct priv *priv = dev->data->dev_private; 642 643 mlx5_flow_stop(dev, &priv->flows); 644 mlx5_traffic_disable(dev); 645 dev->rx_pkt_burst = removed_rx_burst; 646 dev->tx_pkt_burst = removed_tx_burst; 647 } 648 649 /** 650 * Querying the link status till it changes to the desired state. 651 * Number of query attempts is bounded by MLX5_MAX_LINK_QUERY_ATTEMPTS. 652 * 653 * @param dev 654 * Pointer to Ethernet device. 655 * @param status 656 * Link desired status. 657 * 658 * @return 659 * 0 on success, negative errno value on failure. 660 */ 661 int 662 mlx5_force_link_status_change(struct rte_eth_dev *dev, int status) 663 { 664 int try = 0; 665 666 while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) { 667 mlx5_link_update(dev, 0); 668 if (dev->data->dev_link.link_status == status) 669 return 0; 670 try++; 671 sleep(1); 672 } 673 return -EAGAIN; 674 } 675 676 /** 677 * DPDK callback to retrieve physical link information. 678 * 679 * @param dev 680 * Pointer to Ethernet device structure. 681 * @param wait_to_complete 682 * Wait for request completion (ignored). 683 * 684 * @return 685 * 0 on success, -1 on error. 686 */ 687 int 688 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused) 689 { 690 struct utsname utsname; 691 int ver[3]; 692 int ret; 693 struct rte_eth_link dev_link = dev->data->dev_link; 694 695 if (uname(&utsname) == -1 || 696 sscanf(utsname.release, "%d.%d.%d", 697 &ver[0], &ver[1], &ver[2]) != 3 || 698 KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) 699 ret = mlx5_link_update_unlocked_gset(dev); 700 else 701 ret = mlx5_link_update_unlocked_gs(dev); 702 /* If lsc interrupt is disabled, should always be ready for traffic. */ 703 if (!dev->data->dev_conf.intr_conf.lsc) { 704 mlx5_link_start(dev); 705 return ret; 706 } 707 /* Re-select burst callbacks only if link status has been changed. */ 708 if (!ret && dev_link.link_status != dev->data->dev_link.link_status) { 709 if (dev->data->dev_link.link_status == ETH_LINK_UP) 710 mlx5_link_start(dev); 711 else 712 mlx5_link_stop(dev); 713 } 714 return ret; 715 } 716 717 /** 718 * DPDK callback to change the MTU. 719 * 720 * @param dev 721 * Pointer to Ethernet device structure. 722 * @param in_mtu 723 * New MTU. 724 * 725 * @return 726 * 0 on success, negative errno value on failure. 727 */ 728 int 729 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 730 { 731 struct priv *priv = dev->data->dev_private; 732 uint16_t kern_mtu; 733 int ret = 0; 734 735 ret = mlx5_get_mtu(dev, &kern_mtu); 736 if (ret) 737 goto out; 738 /* Set kernel interface MTU first. */ 739 ret = mlx5_set_mtu(dev, mtu); 740 if (ret) 741 goto out; 742 ret = mlx5_get_mtu(dev, &kern_mtu); 743 if (ret) 744 goto out; 745 if (kern_mtu == mtu) { 746 priv->mtu = mtu; 747 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 748 } 749 return 0; 750 out: 751 ret = errno; 752 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 753 strerror(ret)); 754 assert(ret >= 0); 755 return -ret; 756 } 757 758 /** 759 * DPDK callback to get flow control status. 760 * 761 * @param dev 762 * Pointer to Ethernet device structure. 763 * @param[out] fc_conf 764 * Flow control output buffer. 765 * 766 * @return 767 * 0 on success, negative errno value on failure. 768 */ 769 int 770 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 771 { 772 struct ifreq ifr; 773 struct ethtool_pauseparam ethpause = { 774 .cmd = ETHTOOL_GPAUSEPARAM 775 }; 776 int ret; 777 778 ifr.ifr_data = (void *)ðpause; 779 if (mlx5_ifreq(dev, SIOCETHTOOL, &ifr)) { 780 ret = errno; 781 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed: %s", 782 strerror(ret)); 783 goto out; 784 } 785 fc_conf->autoneg = ethpause.autoneg; 786 if (ethpause.rx_pause && ethpause.tx_pause) 787 fc_conf->mode = RTE_FC_FULL; 788 else if (ethpause.rx_pause) 789 fc_conf->mode = RTE_FC_RX_PAUSE; 790 else if (ethpause.tx_pause) 791 fc_conf->mode = RTE_FC_TX_PAUSE; 792 else 793 fc_conf->mode = RTE_FC_NONE; 794 ret = 0; 795 out: 796 assert(ret >= 0); 797 return -ret; 798 } 799 800 /** 801 * DPDK callback to modify flow control parameters. 802 * 803 * @param dev 804 * Pointer to Ethernet device structure. 805 * @param[in] fc_conf 806 * Flow control parameters. 807 * 808 * @return 809 * 0 on success, negative errno value on failure. 810 */ 811 int 812 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 813 { 814 struct ifreq ifr; 815 struct ethtool_pauseparam ethpause = { 816 .cmd = ETHTOOL_SPAUSEPARAM 817 }; 818 int ret; 819 820 ifr.ifr_data = (void *)ðpause; 821 ethpause.autoneg = fc_conf->autoneg; 822 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 823 (fc_conf->mode & RTE_FC_RX_PAUSE)) 824 ethpause.rx_pause = 1; 825 else 826 ethpause.rx_pause = 0; 827 828 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 829 (fc_conf->mode & RTE_FC_TX_PAUSE)) 830 ethpause.tx_pause = 1; 831 else 832 ethpause.tx_pause = 0; 833 if (mlx5_ifreq(dev, SIOCETHTOOL, &ifr)) { 834 ret = errno; 835 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 836 " failed: %s", 837 strerror(ret)); 838 goto out; 839 } 840 ret = 0; 841 out: 842 assert(ret >= 0); 843 return -ret; 844 } 845 846 /** 847 * Get PCI information from struct ibv_device. 848 * 849 * @param device 850 * Pointer to Ethernet device structure. 851 * @param[out] pci_addr 852 * PCI bus address output buffer. 853 * 854 * @return 855 * 0 on success, -1 on failure and errno is set. 856 */ 857 int 858 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 859 struct rte_pci_addr *pci_addr) 860 { 861 FILE *file; 862 char line[32]; 863 MKSTR(path, "%s/device/uevent", device->ibdev_path); 864 865 file = fopen(path, "rb"); 866 if (file == NULL) 867 return -1; 868 while (fgets(line, sizeof(line), file) == line) { 869 size_t len = strlen(line); 870 int ret; 871 872 /* Truncate long lines. */ 873 if (len == (sizeof(line) - 1)) 874 while (line[(len - 1)] != '\n') { 875 ret = fgetc(file); 876 if (ret == EOF) 877 break; 878 line[(len - 1)] = ret; 879 } 880 /* Extract information. */ 881 if (sscanf(line, 882 "PCI_SLOT_NAME=" 883 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 884 &pci_addr->domain, 885 &pci_addr->bus, 886 &pci_addr->devid, 887 &pci_addr->function) == 4) { 888 ret = 0; 889 break; 890 } 891 } 892 fclose(file); 893 return 0; 894 } 895 896 /** 897 * Update the link status. 898 * 899 * @param dev 900 * Pointer to Ethernet device. 901 * 902 * @return 903 * Zero if the callback process can be called immediately. 904 */ 905 static int 906 mlx5_link_status_update(struct rte_eth_dev *dev) 907 { 908 struct priv *priv = dev->data->dev_private; 909 struct rte_eth_link *link = &dev->data->dev_link; 910 911 mlx5_link_update(dev, 0); 912 if (((link->link_speed == 0) && link->link_status) || 913 ((link->link_speed != 0) && !link->link_status)) { 914 /* 915 * Inconsistent status. Event likely occurred before the 916 * kernel netdevice exposes the new status. 917 */ 918 if (!priv->pending_alarm) { 919 priv->pending_alarm = 1; 920 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 921 mlx5_dev_link_status_handler, 922 priv->dev); 923 } 924 return 1; 925 } else if (unlikely(priv->pending_alarm)) { 926 /* Link interrupt occurred while alarm is already scheduled. */ 927 priv->pending_alarm = 0; 928 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev); 929 } 930 return 0; 931 } 932 933 /** 934 * Device status handler. 935 * 936 * @param dev 937 * Pointer to Ethernet device. 938 * @param events 939 * Pointer to event flags holder. 940 * 941 * @return 942 * Events bitmap of callback process which can be called immediately. 943 */ 944 static uint32_t 945 mlx5_dev_status_handler(struct rte_eth_dev *dev) 946 { 947 struct priv *priv = dev->data->dev_private; 948 struct ibv_async_event event; 949 uint32_t ret = 0; 950 951 /* Read all message and acknowledge them. */ 952 for (;;) { 953 if (mlx5_glue->get_async_event(priv->ctx, &event)) 954 break; 955 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 956 event.event_type == IBV_EVENT_PORT_ERR) && 957 (dev->data->dev_conf.intr_conf.lsc == 1)) 958 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 959 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 960 dev->data->dev_conf.intr_conf.rmv == 1) 961 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 962 else 963 DEBUG("event type %d on port %d not handled", 964 event.event_type, event.element.port_num); 965 mlx5_glue->ack_async_event(&event); 966 } 967 if (ret & (1 << RTE_ETH_EVENT_INTR_LSC)) 968 if (mlx5_link_status_update(dev)) 969 ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC); 970 return ret; 971 } 972 973 /** 974 * Handle delayed link status event. 975 * 976 * @param arg 977 * Registered argument. 978 */ 979 void 980 mlx5_dev_link_status_handler(void *arg) 981 { 982 struct rte_eth_dev *dev = arg; 983 struct priv *priv = dev->data->dev_private; 984 int ret; 985 986 priv->pending_alarm = 0; 987 ret = mlx5_link_status_update(dev); 988 if (!ret) 989 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 990 } 991 992 /** 993 * Handle interrupts from the NIC. 994 * 995 * @param[in] intr_handle 996 * Interrupt handler. 997 * @param cb_arg 998 * Callback argument. 999 */ 1000 void 1001 mlx5_dev_interrupt_handler(void *cb_arg) 1002 { 1003 struct rte_eth_dev *dev = cb_arg; 1004 uint32_t events; 1005 1006 events = mlx5_dev_status_handler(dev); 1007 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 1008 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); 1009 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 1010 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); 1011 } 1012 1013 /** 1014 * Handle interrupts from the socket. 1015 * 1016 * @param cb_arg 1017 * Callback argument. 1018 */ 1019 static void 1020 mlx5_dev_handler_socket(void *cb_arg) 1021 { 1022 struct rte_eth_dev *dev = cb_arg; 1023 1024 mlx5_socket_handle(dev); 1025 } 1026 1027 /** 1028 * Uninstall interrupt handler. 1029 * 1030 * @param dev 1031 * Pointer to Ethernet device. 1032 */ 1033 void 1034 mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) 1035 { 1036 struct priv *priv = dev->data->dev_private; 1037 1038 if (dev->data->dev_conf.intr_conf.lsc || 1039 dev->data->dev_conf.intr_conf.rmv) 1040 rte_intr_callback_unregister(&priv->intr_handle, 1041 mlx5_dev_interrupt_handler, dev); 1042 if (priv->primary_socket) 1043 rte_intr_callback_unregister(&priv->intr_handle_socket, 1044 mlx5_dev_handler_socket, dev); 1045 if (priv->pending_alarm) { 1046 priv->pending_alarm = 0; 1047 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 1048 } 1049 priv->intr_handle.fd = 0; 1050 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1051 priv->intr_handle_socket.fd = 0; 1052 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1053 } 1054 1055 /** 1056 * Install interrupt handler. 1057 * 1058 * @param dev 1059 * Pointer to Ethernet device. 1060 */ 1061 void 1062 mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) 1063 { 1064 struct priv *priv = dev->data->dev_private; 1065 int rc, flags; 1066 1067 assert(priv->ctx->async_fd > 0); 1068 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1069 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1070 if (rc < 0) { 1071 INFO("failed to change file descriptor async event queue"); 1072 dev->data->dev_conf.intr_conf.lsc = 0; 1073 dev->data->dev_conf.intr_conf.rmv = 0; 1074 } 1075 if (dev->data->dev_conf.intr_conf.lsc || 1076 dev->data->dev_conf.intr_conf.rmv) { 1077 priv->intr_handle.fd = priv->ctx->async_fd; 1078 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1079 rte_intr_callback_register(&priv->intr_handle, 1080 mlx5_dev_interrupt_handler, dev); 1081 } 1082 rc = mlx5_socket_init(dev); 1083 if (!rc && priv->primary_socket) { 1084 priv->intr_handle_socket.fd = priv->primary_socket; 1085 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1086 rte_intr_callback_register(&priv->intr_handle_socket, 1087 mlx5_dev_handler_socket, dev); 1088 } 1089 } 1090 1091 /** 1092 * DPDK callback to bring the link DOWN. 1093 * 1094 * @param dev 1095 * Pointer to Ethernet device structure. 1096 * 1097 * @return 1098 * 0 on success, errno value on failure. 1099 */ 1100 int 1101 mlx5_set_link_down(struct rte_eth_dev *dev) 1102 { 1103 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 1104 } 1105 1106 /** 1107 * DPDK callback to bring the link UP. 1108 * 1109 * @param dev 1110 * Pointer to Ethernet device structure. 1111 * 1112 * @return 1113 * 0 on success, errno value on failure. 1114 */ 1115 int 1116 mlx5_set_link_up(struct rte_eth_dev *dev) 1117 { 1118 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 1119 } 1120 1121 /** 1122 * Configure the TX function to use. 1123 * 1124 * @param dev 1125 * Pointer to private data structure. 1126 * 1127 * @return 1128 * Pointer to selected Tx burst function. 1129 */ 1130 eth_tx_burst_t 1131 mlx5_select_tx_function(struct rte_eth_dev *dev) 1132 { 1133 struct priv *priv = dev->data->dev_private; 1134 eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; 1135 struct mlx5_dev_config *config = &priv->config; 1136 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 1137 int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 1138 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 1139 DEV_TX_OFFLOAD_GRE_TNL_TSO)); 1140 int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); 1141 1142 assert(priv != NULL); 1143 /* Select appropriate TX function. */ 1144 if (vlan_insert || tso) 1145 return tx_pkt_burst; 1146 if (config->mps == MLX5_MPW_ENHANCED) { 1147 if (mlx5_check_vec_tx_support(dev) > 0) { 1148 if (mlx5_check_raw_vec_tx_support(dev) > 0) 1149 tx_pkt_burst = mlx5_tx_burst_raw_vec; 1150 else 1151 tx_pkt_burst = mlx5_tx_burst_vec; 1152 DEBUG("selected Enhanced MPW TX vectorized function"); 1153 } else { 1154 tx_pkt_burst = mlx5_tx_burst_empw; 1155 DEBUG("selected Enhanced MPW TX function"); 1156 } 1157 } else if (config->mps && (config->txq_inline > 0)) { 1158 tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1159 DEBUG("selected MPW inline TX function"); 1160 } else if (config->mps) { 1161 tx_pkt_burst = mlx5_tx_burst_mpw; 1162 DEBUG("selected MPW TX function"); 1163 } 1164 return tx_pkt_burst; 1165 } 1166 1167 /** 1168 * Configure the RX function to use. 1169 * 1170 * @param dev 1171 * Pointer to private data structure. 1172 * 1173 * @return 1174 * Pointer to selected Rx burst function. 1175 */ 1176 eth_rx_burst_t 1177 mlx5_select_rx_function(struct rte_eth_dev *dev) 1178 { 1179 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 1180 1181 assert(dev != NULL); 1182 if (mlx5_check_vec_rx_support(dev) > 0) { 1183 rx_pkt_burst = mlx5_rx_burst_vec; 1184 DEBUG("selected RX vectorized function"); 1185 } 1186 return rx_pkt_burst; 1187 } 1188 1189 /** 1190 * Check if mlx5 device was removed. 1191 * 1192 * @param dev 1193 * Pointer to Ethernet device structure. 1194 * 1195 * @return 1196 * 1 when device is removed, otherwise 0. 1197 */ 1198 int 1199 mlx5_is_removed(struct rte_eth_dev *dev) 1200 { 1201 struct ibv_device_attr device_attr; 1202 struct priv *priv = dev->data->dev_private; 1203 1204 if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) 1205 return 1; 1206 return 0; 1207 } 1208