1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #define _GNU_SOURCE 35 36 #include <stddef.h> 37 #include <assert.h> 38 #include <unistd.h> 39 #include <stdint.h> 40 #include <stdio.h> 41 #include <string.h> 42 #include <stdlib.h> 43 #include <errno.h> 44 #include <dirent.h> 45 #include <net/if.h> 46 #include <sys/ioctl.h> 47 #include <sys/socket.h> 48 #include <sys/utsname.h> 49 #include <netinet/in.h> 50 #include <linux/ethtool.h> 51 #include <linux/sockios.h> 52 #include <linux/version.h> 53 #include <fcntl.h> 54 #include <stdalign.h> 55 #include <sys/un.h> 56 57 #include <rte_atomic.h> 58 #include <rte_ethdev.h> 59 #include <rte_mbuf.h> 60 #include <rte_common.h> 61 #include <rte_interrupts.h> 62 #include <rte_alarm.h> 63 #include <rte_malloc.h> 64 65 #include "mlx5.h" 66 #include "mlx5_rxtx.h" 67 #include "mlx5_utils.h" 68 69 /* Add defines in case the running kernel is not the same as user headers. */ 70 #ifndef ETHTOOL_GLINKSETTINGS 71 struct ethtool_link_settings { 72 uint32_t cmd; 73 uint32_t speed; 74 uint8_t duplex; 75 uint8_t port; 76 uint8_t phy_address; 77 uint8_t autoneg; 78 uint8_t mdio_support; 79 uint8_t eth_to_mdix; 80 uint8_t eth_tp_mdix_ctrl; 81 int8_t link_mode_masks_nwords; 82 uint32_t reserved[8]; 83 uint32_t link_mode_masks[]; 84 }; 85 86 #define ETHTOOL_GLINKSETTINGS 0x0000004c 87 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 88 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 89 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 90 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 91 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 92 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 93 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 94 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 95 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 96 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 97 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 98 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 99 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 100 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 101 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 102 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 103 #endif 104 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 105 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 106 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 107 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 108 #endif 109 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 110 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 111 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 112 #endif 113 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 114 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 115 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 116 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 117 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 118 #endif 119 120 /** 121 * Return private structure associated with an Ethernet device. 122 * 123 * @param dev 124 * Pointer to Ethernet device structure. 125 * 126 * @return 127 * Pointer to private structure. 128 */ 129 struct priv * 130 mlx5_get_priv(struct rte_eth_dev *dev) 131 { 132 return dev->data->dev_private; 133 } 134 135 /** 136 * Check if running as a secondary process. 137 * 138 * @return 139 * Nonzero if running as a secondary process. 140 */ 141 inline int 142 mlx5_is_secondary(void) 143 { 144 return rte_eal_process_type() == RTE_PROC_SECONDARY; 145 } 146 147 /** 148 * Get interface name from private structure. 149 * 150 * @param[in] priv 151 * Pointer to private structure. 152 * @param[out] ifname 153 * Interface name output buffer. 154 * 155 * @return 156 * 0 on success, -1 on failure and errno is set. 157 */ 158 int 159 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 160 { 161 DIR *dir; 162 struct dirent *dent; 163 unsigned int dev_type = 0; 164 unsigned int dev_port_prev = ~0u; 165 char match[IF_NAMESIZE] = ""; 166 167 { 168 MKSTR(path, "%s/device/net", priv->ibdev_path); 169 170 dir = opendir(path); 171 if (dir == NULL) 172 return -1; 173 } 174 while ((dent = readdir(dir)) != NULL) { 175 char *name = dent->d_name; 176 FILE *file; 177 unsigned int dev_port; 178 int r; 179 180 if ((name[0] == '.') && 181 ((name[1] == '\0') || 182 ((name[1] == '.') && (name[2] == '\0')))) 183 continue; 184 185 MKSTR(path, "%s/device/net/%s/%s", 186 priv->ibdev_path, name, 187 (dev_type ? "dev_id" : "dev_port")); 188 189 file = fopen(path, "rb"); 190 if (file == NULL) { 191 if (errno != ENOENT) 192 continue; 193 /* 194 * Switch to dev_id when dev_port does not exist as 195 * is the case with Linux kernel versions < 3.15. 196 */ 197 try_dev_id: 198 match[0] = '\0'; 199 if (dev_type) 200 break; 201 dev_type = 1; 202 dev_port_prev = ~0u; 203 rewinddir(dir); 204 continue; 205 } 206 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 207 fclose(file); 208 if (r != 1) 209 continue; 210 /* 211 * Switch to dev_id when dev_port returns the same value for 212 * all ports. May happen when using a MOFED release older than 213 * 3.0 with a Linux kernel >= 3.15. 214 */ 215 if (dev_port == dev_port_prev) 216 goto try_dev_id; 217 dev_port_prev = dev_port; 218 if (dev_port == (priv->port - 1u)) 219 snprintf(match, sizeof(match), "%s", name); 220 } 221 closedir(dir); 222 if (match[0] == '\0') 223 return -1; 224 strncpy(*ifname, match, sizeof(*ifname)); 225 return 0; 226 } 227 228 /** 229 * Check if the counter is located on ib counters file. 230 * 231 * @param[in] cntr 232 * Counter name. 233 * 234 * @return 235 * 1 if counter is located on ib counters file , 0 otherwise. 236 */ 237 int 238 priv_is_ib_cntr(const char *cntr) 239 { 240 if (!strcmp(cntr, "out_of_buffer")) 241 return 1; 242 return 0; 243 } 244 245 /** 246 * Read from sysfs entry. 247 * 248 * @param[in] priv 249 * Pointer to private structure. 250 * @param[in] entry 251 * Entry name relative to sysfs path. 252 * @param[out] buf 253 * Data output buffer. 254 * @param size 255 * Buffer size. 256 * 257 * @return 258 * 0 on success, -1 on failure and errno is set. 259 */ 260 static int 261 priv_sysfs_read(const struct priv *priv, const char *entry, 262 char *buf, size_t size) 263 { 264 char ifname[IF_NAMESIZE]; 265 FILE *file; 266 int ret; 267 int err; 268 269 if (priv_get_ifname(priv, &ifname)) 270 return -1; 271 272 if (priv_is_ib_cntr(entry)) { 273 MKSTR(path, "%s/ports/1/hw_counters/%s", 274 priv->ibdev_path, entry); 275 file = fopen(path, "rb"); 276 } else { 277 MKSTR(path, "%s/device/net/%s/%s", 278 priv->ibdev_path, ifname, entry); 279 file = fopen(path, "rb"); 280 } 281 if (file == NULL) 282 return -1; 283 ret = fread(buf, 1, size, file); 284 err = errno; 285 if (((size_t)ret < size) && (ferror(file))) 286 ret = -1; 287 else 288 ret = size; 289 fclose(file); 290 errno = err; 291 return ret; 292 } 293 294 /** 295 * Write to sysfs entry. 296 * 297 * @param[in] priv 298 * Pointer to private structure. 299 * @param[in] entry 300 * Entry name relative to sysfs path. 301 * @param[in] buf 302 * Data buffer. 303 * @param size 304 * Buffer size. 305 * 306 * @return 307 * 0 on success, -1 on failure and errno is set. 308 */ 309 static int 310 priv_sysfs_write(const struct priv *priv, const char *entry, 311 char *buf, size_t size) 312 { 313 char ifname[IF_NAMESIZE]; 314 FILE *file; 315 int ret; 316 int err; 317 318 if (priv_get_ifname(priv, &ifname)) 319 return -1; 320 321 MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry); 322 323 file = fopen(path, "wb"); 324 if (file == NULL) 325 return -1; 326 ret = fwrite(buf, 1, size, file); 327 err = errno; 328 if (((size_t)ret < size) || (ferror(file))) 329 ret = -1; 330 else 331 ret = size; 332 fclose(file); 333 errno = err; 334 return ret; 335 } 336 337 /** 338 * Get unsigned long sysfs property. 339 * 340 * @param priv 341 * Pointer to private structure. 342 * @param[in] name 343 * Entry name relative to sysfs path. 344 * @param[out] value 345 * Value output buffer. 346 * 347 * @return 348 * 0 on success, -1 on failure and errno is set. 349 */ 350 static int 351 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 352 { 353 int ret; 354 unsigned long value_ret; 355 char value_str[32]; 356 357 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 358 if (ret == -1) { 359 DEBUG("cannot read %s value from sysfs: %s", 360 name, strerror(errno)); 361 return -1; 362 } 363 value_str[ret] = '\0'; 364 errno = 0; 365 value_ret = strtoul(value_str, NULL, 0); 366 if (errno) { 367 DEBUG("invalid %s value `%s': %s", name, value_str, 368 strerror(errno)); 369 return -1; 370 } 371 *value = value_ret; 372 return 0; 373 } 374 375 /** 376 * Set unsigned long sysfs property. 377 * 378 * @param priv 379 * Pointer to private structure. 380 * @param[in] name 381 * Entry name relative to sysfs path. 382 * @param value 383 * Value to set. 384 * 385 * @return 386 * 0 on success, -1 on failure and errno is set. 387 */ 388 static int 389 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 390 { 391 int ret; 392 MKSTR(value_str, "%lu", value); 393 394 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 395 if (ret == -1) { 396 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 397 name, value_str, value, strerror(errno)); 398 return -1; 399 } 400 return 0; 401 } 402 403 /** 404 * Perform ifreq ioctl() on associated Ethernet device. 405 * 406 * @param[in] priv 407 * Pointer to private structure. 408 * @param req 409 * Request number to pass to ioctl(). 410 * @param[out] ifr 411 * Interface request structure output buffer. 412 * 413 * @return 414 * 0 on success, -1 on failure and errno is set. 415 */ 416 int 417 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 418 { 419 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 420 int ret = -1; 421 422 if (sock == -1) 423 return ret; 424 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 425 ret = ioctl(sock, req, ifr); 426 close(sock); 427 return ret; 428 } 429 430 /** 431 * Return the number of active VFs for the current device. 432 * 433 * @param[in] priv 434 * Pointer to private structure. 435 * @param[out] num_vfs 436 * Number of active VFs. 437 * 438 * @return 439 * 0 on success, -1 on failure and errno is set. 440 */ 441 int 442 priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs) 443 { 444 /* The sysfs entry name depends on the operating system. */ 445 const char **name = (const char *[]){ 446 "device/sriov_numvfs", 447 "device/mlx5_num_vfs", 448 NULL, 449 }; 450 int ret; 451 452 do { 453 unsigned long ulong_num_vfs; 454 455 ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs); 456 if (!ret) 457 *num_vfs = ulong_num_vfs; 458 } while (*(++name) && ret); 459 return ret; 460 } 461 462 /** 463 * Get device MTU. 464 * 465 * @param priv 466 * Pointer to private structure. 467 * @param[out] mtu 468 * MTU value output buffer. 469 * 470 * @return 471 * 0 on success, -1 on failure and errno is set. 472 */ 473 int 474 priv_get_mtu(struct priv *priv, uint16_t *mtu) 475 { 476 unsigned long ulong_mtu; 477 478 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 479 return -1; 480 *mtu = ulong_mtu; 481 return 0; 482 } 483 484 /** 485 * Read device counter from sysfs. 486 * 487 * @param priv 488 * Pointer to private structure. 489 * @param name 490 * Counter name. 491 * @param[out] cntr 492 * Counter output buffer. 493 * 494 * @return 495 * 0 on success, -1 on failure and errno is set. 496 */ 497 int 498 priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) 499 { 500 unsigned long ulong_ctr; 501 502 if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1) 503 return -1; 504 *cntr = ulong_ctr; 505 return 0; 506 } 507 508 /** 509 * Set device MTU. 510 * 511 * @param priv 512 * Pointer to private structure. 513 * @param mtu 514 * MTU value to set. 515 * 516 * @return 517 * 0 on success, -1 on failure and errno is set. 518 */ 519 static int 520 priv_set_mtu(struct priv *priv, uint16_t mtu) 521 { 522 uint16_t new_mtu; 523 524 if (priv_set_sysfs_ulong(priv, "mtu", mtu) || 525 priv_get_mtu(priv, &new_mtu)) 526 return -1; 527 if (new_mtu == mtu) 528 return 0; 529 errno = EINVAL; 530 return -1; 531 } 532 533 /** 534 * Set device flags. 535 * 536 * @param priv 537 * Pointer to private structure. 538 * @param keep 539 * Bitmask for flags that must remain untouched. 540 * @param flags 541 * Bitmask for flags to modify. 542 * 543 * @return 544 * 0 on success, -1 on failure and errno is set. 545 */ 546 int 547 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 548 { 549 unsigned long tmp; 550 551 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 552 return -1; 553 tmp &= keep; 554 tmp |= (flags & (~keep)); 555 return priv_set_sysfs_ulong(priv, "flags", tmp); 556 } 557 558 /** 559 * Ethernet device configuration. 560 * 561 * Prepare the driver for a given number of TX and RX queues. 562 * 563 * @param dev 564 * Pointer to Ethernet device structure. 565 * 566 * @return 567 * 0 on success, errno value on failure. 568 */ 569 static int 570 dev_configure(struct rte_eth_dev *dev) 571 { 572 struct priv *priv = dev->data->dev_private; 573 unsigned int rxqs_n = dev->data->nb_rx_queues; 574 unsigned int txqs_n = dev->data->nb_tx_queues; 575 unsigned int i; 576 unsigned int j; 577 unsigned int reta_idx_n; 578 579 priv->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 580 priv->rxqs = (void *)dev->data->rx_queues; 581 priv->txqs = (void *)dev->data->tx_queues; 582 if (txqs_n != priv->txqs_n) { 583 INFO("%p: TX queues number update: %u -> %u", 584 (void *)dev, priv->txqs_n, txqs_n); 585 priv->txqs_n = txqs_n; 586 } 587 if (rxqs_n > priv->ind_table_max_size) { 588 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 589 return EINVAL; 590 } 591 if (rxqs_n == priv->rxqs_n) 592 return 0; 593 INFO("%p: RX queues number update: %u -> %u", 594 (void *)dev, priv->rxqs_n, rxqs_n); 595 priv->rxqs_n = rxqs_n; 596 /* If the requested number of RX queues is not a power of two, use the 597 * maximum indirection table size for better balancing. 598 * The result is always rounded to the next power of two. */ 599 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 600 priv->ind_table_max_size : 601 rxqs_n)); 602 if (priv_rss_reta_index_resize(priv, reta_idx_n)) 603 return ENOMEM; 604 /* When the number of RX queues is not a power of two, the remaining 605 * table entries are padded with reused WQs and hashes are not spread 606 * uniformly. */ 607 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 608 (*priv->reta_idx)[i] = j; 609 if (++j == rxqs_n) 610 j = 0; 611 } 612 return 0; 613 } 614 615 /** 616 * DPDK callback for Ethernet device configuration. 617 * 618 * @param dev 619 * Pointer to Ethernet device structure. 620 * 621 * @return 622 * 0 on success, negative errno value on failure. 623 */ 624 int 625 mlx5_dev_configure(struct rte_eth_dev *dev) 626 { 627 struct priv *priv = dev->data->dev_private; 628 int ret; 629 630 if (mlx5_is_secondary()) 631 return -E_RTE_SECONDARY; 632 633 priv_lock(priv); 634 ret = dev_configure(dev); 635 assert(ret >= 0); 636 priv_unlock(priv); 637 return -ret; 638 } 639 640 /** 641 * DPDK callback to get information about the device. 642 * 643 * @param dev 644 * Pointer to Ethernet device structure. 645 * @param[out] info 646 * Info structure output buffer. 647 */ 648 void 649 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 650 { 651 struct priv *priv = mlx5_get_priv(dev); 652 unsigned int max; 653 char ifname[IF_NAMESIZE]; 654 655 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 656 657 priv_lock(priv); 658 /* FIXME: we should ask the device for these values. */ 659 info->min_rx_bufsize = 32; 660 info->max_rx_pktlen = 65536; 661 /* 662 * Since we need one CQ per QP, the limit is the minimum number 663 * between the two values. 664 */ 665 max = RTE_MIN(priv->device_attr.orig_attr.max_cq, 666 priv->device_attr.orig_attr.max_qp); 667 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 668 if (max >= 65535) 669 max = 65535; 670 info->max_rx_queues = max; 671 info->max_tx_queues = max; 672 info->max_mac_addrs = RTE_DIM(priv->mac); 673 info->rx_offload_capa = 674 (priv->hw_csum ? 675 (DEV_RX_OFFLOAD_IPV4_CKSUM | 676 DEV_RX_OFFLOAD_UDP_CKSUM | 677 DEV_RX_OFFLOAD_TCP_CKSUM) : 678 0) | 679 (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0); 680 if (!priv->mps) 681 info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; 682 if (priv->hw_csum) 683 info->tx_offload_capa |= 684 (DEV_TX_OFFLOAD_IPV4_CKSUM | 685 DEV_TX_OFFLOAD_UDP_CKSUM | 686 DEV_TX_OFFLOAD_TCP_CKSUM); 687 if (priv->tso) 688 info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; 689 if (priv->tunnel_en) 690 info->tx_offload_capa |= (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | 691 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 692 DEV_TX_OFFLOAD_GRE_TNL_TSO); 693 if (priv_get_ifname(priv, &ifname) == 0) 694 info->if_index = if_nametoindex(ifname); 695 info->reta_size = priv->reta_idx_n ? 696 priv->reta_idx_n : priv->ind_table_max_size; 697 info->hash_key_size = ((*priv->rss_conf) ? 698 (*priv->rss_conf)[0]->rss_key_len : 699 0); 700 info->speed_capa = priv->link_speed_capa; 701 priv_unlock(priv); 702 } 703 704 const uint32_t * 705 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 706 { 707 static const uint32_t ptypes[] = { 708 /* refers to rxq_cq_to_pkt_type() */ 709 RTE_PTYPE_L2_ETHER, 710 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 711 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 712 RTE_PTYPE_L4_NONFRAG, 713 RTE_PTYPE_L4_FRAG, 714 RTE_PTYPE_L4_TCP, 715 RTE_PTYPE_L4_UDP, 716 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 717 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 718 RTE_PTYPE_INNER_L4_NONFRAG, 719 RTE_PTYPE_INNER_L4_FRAG, 720 RTE_PTYPE_INNER_L4_TCP, 721 RTE_PTYPE_INNER_L4_UDP, 722 RTE_PTYPE_UNKNOWN 723 }; 724 725 if (dev->rx_pkt_burst == mlx5_rx_burst || 726 dev->rx_pkt_burst == mlx5_rx_burst_vec) 727 return ptypes; 728 return NULL; 729 } 730 731 /** 732 * DPDK callback to retrieve physical link information. 733 * 734 * @param dev 735 * Pointer to Ethernet device structure. 736 * @param wait_to_complete 737 * Wait for request completion (ignored). 738 */ 739 static int 740 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) 741 { 742 struct priv *priv = mlx5_get_priv(dev); 743 struct ethtool_cmd edata = { 744 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 745 }; 746 struct ifreq ifr; 747 struct rte_eth_link dev_link; 748 int link_speed = 0; 749 750 /* priv_lock() is not taken to allow concurrent calls. */ 751 752 (void)wait_to_complete; 753 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 754 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 755 return -1; 756 } 757 memset(&dev_link, 0, sizeof(dev_link)); 758 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 759 (ifr.ifr_flags & IFF_RUNNING)); 760 ifr.ifr_data = (void *)&edata; 761 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 762 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 763 strerror(errno)); 764 return -1; 765 } 766 link_speed = ethtool_cmd_speed(&edata); 767 if (link_speed == -1) 768 dev_link.link_speed = 0; 769 else 770 dev_link.link_speed = link_speed; 771 priv->link_speed_capa = 0; 772 if (edata.supported & SUPPORTED_Autoneg) 773 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 774 if (edata.supported & (SUPPORTED_1000baseT_Full | 775 SUPPORTED_1000baseKX_Full)) 776 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 777 if (edata.supported & SUPPORTED_10000baseKR_Full) 778 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 779 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 780 SUPPORTED_40000baseCR4_Full | 781 SUPPORTED_40000baseSR4_Full | 782 SUPPORTED_40000baseLR4_Full)) 783 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 784 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 785 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 786 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 787 ETH_LINK_SPEED_FIXED); 788 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 789 /* Link status changed. */ 790 dev->data->dev_link = dev_link; 791 return 0; 792 } 793 /* Link status is still the same. */ 794 return -1; 795 } 796 797 /** 798 * Retrieve physical link information (unlocked version using new ioctl). 799 * 800 * @param dev 801 * Pointer to Ethernet device structure. 802 * @param wait_to_complete 803 * Wait for request completion (ignored). 804 */ 805 static int 806 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) 807 { 808 struct priv *priv = mlx5_get_priv(dev); 809 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 810 struct ifreq ifr; 811 struct rte_eth_link dev_link; 812 uint64_t sc; 813 814 (void)wait_to_complete; 815 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 816 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 817 return -1; 818 } 819 memset(&dev_link, 0, sizeof(dev_link)); 820 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 821 (ifr.ifr_flags & IFF_RUNNING)); 822 ifr.ifr_data = (void *)&gcmd; 823 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 824 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 825 strerror(errno)); 826 return -1; 827 } 828 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 829 830 alignas(struct ethtool_link_settings) 831 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 832 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 833 struct ethtool_link_settings *ecmd = (void *)data; 834 835 *ecmd = gcmd; 836 ifr.ifr_data = (void *)ecmd; 837 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 838 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 839 strerror(errno)); 840 return -1; 841 } 842 dev_link.link_speed = ecmd->speed; 843 sc = ecmd->link_mode_masks[0] | 844 ((uint64_t)ecmd->link_mode_masks[1] << 32); 845 priv->link_speed_capa = 0; 846 if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT) 847 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 848 if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT | 849 ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)) 850 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 851 if (sc & (ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT | 852 ETHTOOL_LINK_MODE_10000baseKR_Full_BIT | 853 ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)) 854 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 855 if (sc & (ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT | 856 ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)) 857 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 858 if (sc & (ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT | 859 ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT | 860 ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT | 861 ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)) 862 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 863 if (sc & (ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT | 864 ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT | 865 ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT | 866 ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)) 867 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 868 if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT | 869 ETHTOOL_LINK_MODE_25000baseKR_Full_BIT | 870 ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)) 871 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 872 if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT | 873 ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)) 874 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 875 if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT | 876 ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT | 877 ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT | 878 ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)) 879 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 880 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 881 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 882 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 883 ETH_LINK_SPEED_FIXED); 884 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 885 /* Link status changed. */ 886 dev->data->dev_link = dev_link; 887 return 0; 888 } 889 /* Link status is still the same. */ 890 return -1; 891 } 892 893 /** 894 * DPDK callback to retrieve physical link information. 895 * 896 * @param dev 897 * Pointer to Ethernet device structure. 898 * @param wait_to_complete 899 * Wait for request completion (ignored). 900 */ 901 int 902 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 903 { 904 struct utsname utsname; 905 int ver[3]; 906 907 if (uname(&utsname) == -1 || 908 sscanf(utsname.release, "%d.%d.%d", 909 &ver[0], &ver[1], &ver[2]) != 3 || 910 KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) 911 return mlx5_link_update_unlocked_gset(dev, wait_to_complete); 912 return mlx5_link_update_unlocked_gs(dev, wait_to_complete); 913 } 914 915 /** 916 * DPDK callback to change the MTU. 917 * 918 * @param dev 919 * Pointer to Ethernet device structure. 920 * @param in_mtu 921 * New MTU. 922 * 923 * @return 924 * 0 on success, negative errno value on failure. 925 */ 926 int 927 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 928 { 929 struct priv *priv = dev->data->dev_private; 930 uint16_t kern_mtu; 931 int ret = 0; 932 933 if (mlx5_is_secondary()) 934 return -E_RTE_SECONDARY; 935 936 priv_lock(priv); 937 ret = priv_get_mtu(priv, &kern_mtu); 938 if (ret) 939 goto out; 940 /* Set kernel interface MTU first. */ 941 ret = priv_set_mtu(priv, mtu); 942 if (ret) 943 goto out; 944 ret = priv_get_mtu(priv, &kern_mtu); 945 if (ret) 946 goto out; 947 if (kern_mtu == mtu) { 948 priv->mtu = mtu; 949 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 950 } 951 priv_unlock(priv); 952 return 0; 953 out: 954 ret = errno; 955 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 956 strerror(ret)); 957 priv_unlock(priv); 958 assert(ret >= 0); 959 return -ret; 960 } 961 962 /** 963 * DPDK callback to get flow control status. 964 * 965 * @param dev 966 * Pointer to Ethernet device structure. 967 * @param[out] fc_conf 968 * Flow control output buffer. 969 * 970 * @return 971 * 0 on success, negative errno value on failure. 972 */ 973 int 974 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 975 { 976 struct priv *priv = dev->data->dev_private; 977 struct ifreq ifr; 978 struct ethtool_pauseparam ethpause = { 979 .cmd = ETHTOOL_GPAUSEPARAM 980 }; 981 int ret; 982 983 if (mlx5_is_secondary()) 984 return -E_RTE_SECONDARY; 985 986 ifr.ifr_data = (void *)ðpause; 987 priv_lock(priv); 988 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 989 ret = errno; 990 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 991 " failed: %s", 992 strerror(ret)); 993 goto out; 994 } 995 996 fc_conf->autoneg = ethpause.autoneg; 997 if (ethpause.rx_pause && ethpause.tx_pause) 998 fc_conf->mode = RTE_FC_FULL; 999 else if (ethpause.rx_pause) 1000 fc_conf->mode = RTE_FC_RX_PAUSE; 1001 else if (ethpause.tx_pause) 1002 fc_conf->mode = RTE_FC_TX_PAUSE; 1003 else 1004 fc_conf->mode = RTE_FC_NONE; 1005 ret = 0; 1006 1007 out: 1008 priv_unlock(priv); 1009 assert(ret >= 0); 1010 return -ret; 1011 } 1012 1013 /** 1014 * DPDK callback to modify flow control parameters. 1015 * 1016 * @param dev 1017 * Pointer to Ethernet device structure. 1018 * @param[in] fc_conf 1019 * Flow control parameters. 1020 * 1021 * @return 1022 * 0 on success, negative errno value on failure. 1023 */ 1024 int 1025 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1026 { 1027 struct priv *priv = dev->data->dev_private; 1028 struct ifreq ifr; 1029 struct ethtool_pauseparam ethpause = { 1030 .cmd = ETHTOOL_SPAUSEPARAM 1031 }; 1032 int ret; 1033 1034 if (mlx5_is_secondary()) 1035 return -E_RTE_SECONDARY; 1036 1037 ifr.ifr_data = (void *)ðpause; 1038 ethpause.autoneg = fc_conf->autoneg; 1039 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1040 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1041 ethpause.rx_pause = 1; 1042 else 1043 ethpause.rx_pause = 0; 1044 1045 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1046 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1047 ethpause.tx_pause = 1; 1048 else 1049 ethpause.tx_pause = 0; 1050 1051 priv_lock(priv); 1052 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 1053 ret = errno; 1054 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1055 " failed: %s", 1056 strerror(ret)); 1057 goto out; 1058 } 1059 ret = 0; 1060 1061 out: 1062 priv_unlock(priv); 1063 assert(ret >= 0); 1064 return -ret; 1065 } 1066 1067 /** 1068 * Get PCI information from struct ibv_device. 1069 * 1070 * @param device 1071 * Pointer to Ethernet device structure. 1072 * @param[out] pci_addr 1073 * PCI bus address output buffer. 1074 * 1075 * @return 1076 * 0 on success, -1 on failure and errno is set. 1077 */ 1078 int 1079 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 1080 struct rte_pci_addr *pci_addr) 1081 { 1082 FILE *file; 1083 char line[32]; 1084 MKSTR(path, "%s/device/uevent", device->ibdev_path); 1085 1086 file = fopen(path, "rb"); 1087 if (file == NULL) 1088 return -1; 1089 while (fgets(line, sizeof(line), file) == line) { 1090 size_t len = strlen(line); 1091 int ret; 1092 1093 /* Truncate long lines. */ 1094 if (len == (sizeof(line) - 1)) 1095 while (line[(len - 1)] != '\n') { 1096 ret = fgetc(file); 1097 if (ret == EOF) 1098 break; 1099 line[(len - 1)] = ret; 1100 } 1101 /* Extract information. */ 1102 if (sscanf(line, 1103 "PCI_SLOT_NAME=" 1104 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1105 &pci_addr->domain, 1106 &pci_addr->bus, 1107 &pci_addr->devid, 1108 &pci_addr->function) == 4) { 1109 ret = 0; 1110 break; 1111 } 1112 } 1113 fclose(file); 1114 return 0; 1115 } 1116 1117 /** 1118 * Update the link status. 1119 * 1120 * @param priv 1121 * Pointer to private structure. 1122 * 1123 * @return 1124 * Zero if the callback process can be called immediately. 1125 */ 1126 static int 1127 priv_link_status_update(struct priv *priv) 1128 { 1129 struct rte_eth_link *link = &priv->dev->data->dev_link; 1130 1131 mlx5_link_update(priv->dev, 0); 1132 if (((link->link_speed == 0) && link->link_status) || 1133 ((link->link_speed != 0) && !link->link_status)) { 1134 /* 1135 * Inconsistent status. Event likely occurred before the 1136 * kernel netdevice exposes the new status. 1137 */ 1138 if (!priv->pending_alarm) { 1139 priv->pending_alarm = 1; 1140 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 1141 mlx5_dev_link_status_handler, 1142 priv->dev); 1143 } 1144 return 1; 1145 } else if (unlikely(priv->pending_alarm)) { 1146 /* Link interrupt occurred while alarm is already scheduled. */ 1147 priv->pending_alarm = 0; 1148 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev); 1149 } 1150 return 0; 1151 } 1152 1153 /** 1154 * Device status handler. 1155 * 1156 * @param priv 1157 * Pointer to private structure. 1158 * @param events 1159 * Pointer to event flags holder. 1160 * 1161 * @return 1162 * Events bitmap of callback process which can be called immediately. 1163 */ 1164 static uint32_t 1165 priv_dev_status_handler(struct priv *priv) 1166 { 1167 struct ibv_async_event event; 1168 uint32_t ret = 0; 1169 1170 /* Read all message and acknowledge them. */ 1171 for (;;) { 1172 if (ibv_get_async_event(priv->ctx, &event)) 1173 break; 1174 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 1175 event.event_type == IBV_EVENT_PORT_ERR) && 1176 (priv->dev->data->dev_conf.intr_conf.lsc == 1)) 1177 ret |= (1 << RTE_ETH_EVENT_INTR_LSC); 1178 else if (event.event_type == IBV_EVENT_DEVICE_FATAL && 1179 priv->dev->data->dev_conf.intr_conf.rmv == 1) 1180 ret |= (1 << RTE_ETH_EVENT_INTR_RMV); 1181 else 1182 DEBUG("event type %d on port %d not handled", 1183 event.event_type, event.element.port_num); 1184 ibv_ack_async_event(&event); 1185 } 1186 if (ret & (1 << RTE_ETH_EVENT_INTR_LSC)) 1187 if (priv_link_status_update(priv)) 1188 ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC); 1189 return ret; 1190 } 1191 1192 /** 1193 * Handle delayed link status event. 1194 * 1195 * @param arg 1196 * Registered argument. 1197 */ 1198 void 1199 mlx5_dev_link_status_handler(void *arg) 1200 { 1201 struct rte_eth_dev *dev = arg; 1202 struct priv *priv = dev->data->dev_private; 1203 int ret; 1204 1205 priv_lock(priv); 1206 assert(priv->pending_alarm == 1); 1207 priv->pending_alarm = 0; 1208 ret = priv_link_status_update(priv); 1209 priv_unlock(priv); 1210 if (!ret) 1211 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, 1212 NULL); 1213 } 1214 1215 /** 1216 * Handle interrupts from the NIC. 1217 * 1218 * @param[in] intr_handle 1219 * Interrupt handler. 1220 * @param cb_arg 1221 * Callback argument. 1222 */ 1223 void 1224 mlx5_dev_interrupt_handler(void *cb_arg) 1225 { 1226 struct rte_eth_dev *dev = cb_arg; 1227 struct priv *priv = dev->data->dev_private; 1228 uint32_t events; 1229 1230 priv_lock(priv); 1231 events = priv_dev_status_handler(priv); 1232 priv_unlock(priv); 1233 if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) 1234 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, 1235 NULL); 1236 if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) 1237 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL, 1238 NULL); 1239 } 1240 1241 /** 1242 * Handle interrupts from the socket. 1243 * 1244 * @param cb_arg 1245 * Callback argument. 1246 */ 1247 static void 1248 mlx5_dev_handler_socket(void *cb_arg) 1249 { 1250 struct rte_eth_dev *dev = cb_arg; 1251 struct priv *priv = dev->data->dev_private; 1252 1253 priv_lock(priv); 1254 priv_socket_handle(priv); 1255 priv_unlock(priv); 1256 } 1257 1258 /** 1259 * Uninstall interrupt handler. 1260 * 1261 * @param priv 1262 * Pointer to private structure. 1263 * @param dev 1264 * Pointer to the rte_eth_dev structure. 1265 */ 1266 void 1267 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 1268 { 1269 if (dev->data->dev_conf.intr_conf.lsc || 1270 dev->data->dev_conf.intr_conf.rmv) 1271 rte_intr_callback_unregister(&priv->intr_handle, 1272 mlx5_dev_interrupt_handler, dev); 1273 if (priv->primary_socket) 1274 rte_intr_callback_unregister(&priv->intr_handle_socket, 1275 mlx5_dev_handler_socket, dev); 1276 if (priv->pending_alarm) 1277 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 1278 priv->pending_alarm = 0; 1279 priv->intr_handle.fd = 0; 1280 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1281 priv->intr_handle_socket.fd = 0; 1282 priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN; 1283 } 1284 1285 /** 1286 * Install interrupt handler. 1287 * 1288 * @param priv 1289 * Pointer to private structure. 1290 * @param dev 1291 * Pointer to the rte_eth_dev structure. 1292 */ 1293 void 1294 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 1295 { 1296 int rc, flags; 1297 1298 assert(!mlx5_is_secondary()); 1299 assert(priv->ctx->async_fd > 0); 1300 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1301 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1302 if (rc < 0) { 1303 INFO("failed to change file descriptor async event queue"); 1304 dev->data->dev_conf.intr_conf.lsc = 0; 1305 dev->data->dev_conf.intr_conf.rmv = 0; 1306 } 1307 if (dev->data->dev_conf.intr_conf.lsc || 1308 dev->data->dev_conf.intr_conf.rmv) { 1309 priv->intr_handle.fd = priv->ctx->async_fd; 1310 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1311 rte_intr_callback_register(&priv->intr_handle, 1312 mlx5_dev_interrupt_handler, dev); 1313 } 1314 1315 rc = priv_socket_init(priv); 1316 if (!rc && priv->primary_socket) { 1317 priv->intr_handle_socket.fd = priv->primary_socket; 1318 priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; 1319 rte_intr_callback_register(&priv->intr_handle_socket, 1320 mlx5_dev_handler_socket, dev); 1321 } 1322 } 1323 1324 /** 1325 * Change the link state (UP / DOWN). 1326 * 1327 * @param priv 1328 * Pointer to private data structure. 1329 * @param dev 1330 * Pointer to rte_eth_dev structure. 1331 * @param up 1332 * Nonzero for link up, otherwise link down. 1333 * 1334 * @return 1335 * 0 on success, errno value on failure. 1336 */ 1337 static int 1338 priv_dev_set_link(struct priv *priv, struct rte_eth_dev *dev, int up) 1339 { 1340 int err; 1341 1342 if (up) { 1343 err = priv_set_flags(priv, ~IFF_UP, IFF_UP); 1344 if (err) 1345 return err; 1346 priv_dev_select_tx_function(priv, dev); 1347 priv_dev_select_rx_function(priv, dev); 1348 } else { 1349 err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP); 1350 if (err) 1351 return err; 1352 dev->rx_pkt_burst = removed_rx_burst; 1353 dev->tx_pkt_burst = removed_tx_burst; 1354 } 1355 return 0; 1356 } 1357 1358 /** 1359 * DPDK callback to bring the link DOWN. 1360 * 1361 * @param dev 1362 * Pointer to Ethernet device structure. 1363 * 1364 * @return 1365 * 0 on success, errno value on failure. 1366 */ 1367 int 1368 mlx5_set_link_down(struct rte_eth_dev *dev) 1369 { 1370 struct priv *priv = dev->data->dev_private; 1371 int err; 1372 1373 priv_lock(priv); 1374 err = priv_dev_set_link(priv, dev, 0); 1375 priv_unlock(priv); 1376 return err; 1377 } 1378 1379 /** 1380 * DPDK callback to bring the link UP. 1381 * 1382 * @param dev 1383 * Pointer to Ethernet device structure. 1384 * 1385 * @return 1386 * 0 on success, errno value on failure. 1387 */ 1388 int 1389 mlx5_set_link_up(struct rte_eth_dev *dev) 1390 { 1391 struct priv *priv = dev->data->dev_private; 1392 int err; 1393 1394 priv_lock(priv); 1395 err = priv_dev_set_link(priv, dev, 1); 1396 priv_unlock(priv); 1397 return err; 1398 } 1399 1400 /** 1401 * Configure the TX function to use. 1402 * 1403 * @param priv 1404 * Pointer to private data structure. 1405 * @param dev 1406 * Pointer to rte_eth_dev structure. 1407 */ 1408 void 1409 priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev) 1410 { 1411 assert(priv != NULL); 1412 assert(dev != NULL); 1413 dev->tx_pkt_burst = mlx5_tx_burst; 1414 /* Select appropriate TX function. */ 1415 if (priv->mps == MLX5_MPW_ENHANCED) { 1416 if (priv_check_vec_tx_support(priv) > 0) { 1417 if (priv_check_raw_vec_tx_support(priv) > 0) 1418 dev->tx_pkt_burst = mlx5_tx_burst_raw_vec; 1419 else 1420 dev->tx_pkt_burst = mlx5_tx_burst_vec; 1421 DEBUG("selected Enhanced MPW TX vectorized function"); 1422 } else { 1423 dev->tx_pkt_burst = mlx5_tx_burst_empw; 1424 DEBUG("selected Enhanced MPW TX function"); 1425 } 1426 } else if (priv->mps && priv->txq_inline) { 1427 dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1428 DEBUG("selected MPW inline TX function"); 1429 } else if (priv->mps) { 1430 dev->tx_pkt_burst = mlx5_tx_burst_mpw; 1431 DEBUG("selected MPW TX function"); 1432 } 1433 } 1434 1435 /** 1436 * Configure the RX function to use. 1437 * 1438 * @param priv 1439 * Pointer to private data structure. 1440 * @param dev 1441 * Pointer to rte_eth_dev structure. 1442 */ 1443 void 1444 priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev) 1445 { 1446 assert(priv != NULL); 1447 assert(dev != NULL); 1448 if (priv_check_vec_rx_support(priv) > 0) { 1449 dev->rx_pkt_burst = mlx5_rx_burst_vec; 1450 DEBUG("selected RX vectorized function"); 1451 } else { 1452 dev->rx_pkt_burst = mlx5_rx_burst; 1453 } 1454 } 1455