1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <unistd.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <dirent.h> 43 #include <net/if.h> 44 #include <sys/ioctl.h> 45 #include <sys/socket.h> 46 #include <sys/utsname.h> 47 #include <netinet/in.h> 48 #include <linux/ethtool.h> 49 #include <linux/sockios.h> 50 #include <linux/version.h> 51 #include <fcntl.h> 52 53 #include <rte_atomic.h> 54 #include <rte_ethdev.h> 55 #include <rte_mbuf.h> 56 #include <rte_common.h> 57 #include <rte_interrupts.h> 58 #include <rte_alarm.h> 59 #include <rte_malloc.h> 60 61 #include "mlx5.h" 62 #include "mlx5_rxtx.h" 63 #include "mlx5_utils.h" 64 65 /* Add defines in case the running kernel is not the same as user headers. */ 66 #ifndef ETHTOOL_GLINKSETTINGS 67 struct ethtool_link_settings { 68 uint32_t cmd; 69 uint32_t speed; 70 uint8_t duplex; 71 uint8_t port; 72 uint8_t phy_address; 73 uint8_t autoneg; 74 uint8_t mdio_support; 75 uint8_t eth_to_mdix; 76 uint8_t eth_tp_mdix_ctrl; 77 int8_t link_mode_masks_nwords; 78 uint32_t reserved[8]; 79 uint32_t link_mode_masks[]; 80 }; 81 82 #define ETHTOOL_GLINKSETTINGS 0x0000004c 83 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 84 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 85 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 86 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 87 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 88 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 89 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 90 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 91 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 92 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 93 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 94 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 95 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 96 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 97 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 98 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 99 #endif 100 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 101 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 102 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 103 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 104 #endif 105 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 106 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 107 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 108 #endif 109 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 110 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 111 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 112 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 113 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 114 #endif 115 #define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX) 116 117 /** 118 * Return private structure associated with an Ethernet device. 119 * 120 * @param dev 121 * Pointer to Ethernet device structure. 122 * 123 * @return 124 * Pointer to private structure. 125 */ 126 struct priv * 127 mlx5_get_priv(struct rte_eth_dev *dev) 128 { 129 struct mlx5_secondary_data *sd; 130 131 if (!mlx5_is_secondary()) 132 return dev->data->dev_private; 133 sd = &mlx5_secondary_data[dev->data->port_id]; 134 return sd->data.dev_private; 135 } 136 137 /** 138 * Check if running as a secondary process. 139 * 140 * @return 141 * Nonzero if running as a secondary process. 142 */ 143 inline int 144 mlx5_is_secondary(void) 145 { 146 return rte_eal_process_type() != RTE_PROC_PRIMARY; 147 } 148 149 /** 150 * Get interface name from private structure. 151 * 152 * @param[in] priv 153 * Pointer to private structure. 154 * @param[out] ifname 155 * Interface name output buffer. 156 * 157 * @return 158 * 0 on success, -1 on failure and errno is set. 159 */ 160 int 161 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 162 { 163 DIR *dir; 164 struct dirent *dent; 165 unsigned int dev_type = 0; 166 unsigned int dev_port_prev = ~0u; 167 char match[IF_NAMESIZE] = ""; 168 169 { 170 MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path); 171 172 dir = opendir(path); 173 if (dir == NULL) 174 return -1; 175 } 176 while ((dent = readdir(dir)) != NULL) { 177 char *name = dent->d_name; 178 FILE *file; 179 unsigned int dev_port; 180 int r; 181 182 if ((name[0] == '.') && 183 ((name[1] == '\0') || 184 ((name[1] == '.') && (name[2] == '\0')))) 185 continue; 186 187 MKSTR(path, "%s/device/net/%s/%s", 188 priv->ctx->device->ibdev_path, name, 189 (dev_type ? "dev_id" : "dev_port")); 190 191 file = fopen(path, "rb"); 192 if (file == NULL) { 193 if (errno != ENOENT) 194 continue; 195 /* 196 * Switch to dev_id when dev_port does not exist as 197 * is the case with Linux kernel versions < 3.15. 198 */ 199 try_dev_id: 200 match[0] = '\0'; 201 if (dev_type) 202 break; 203 dev_type = 1; 204 dev_port_prev = ~0u; 205 rewinddir(dir); 206 continue; 207 } 208 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 209 fclose(file); 210 if (r != 1) 211 continue; 212 /* 213 * Switch to dev_id when dev_port returns the same value for 214 * all ports. May happen when using a MOFED release older than 215 * 3.0 with a Linux kernel >= 3.15. 216 */ 217 if (dev_port == dev_port_prev) 218 goto try_dev_id; 219 dev_port_prev = dev_port; 220 if (dev_port == (priv->port - 1u)) 221 snprintf(match, sizeof(match), "%s", name); 222 } 223 closedir(dir); 224 if (match[0] == '\0') 225 return -1; 226 strncpy(*ifname, match, sizeof(*ifname)); 227 return 0; 228 } 229 230 /** 231 * Check if the counter is located on ib counters file. 232 * 233 * @param[in] cntr 234 * Counter name. 235 * 236 * @return 237 * 1 if counter is located on ib counters file , 0 otherwise. 238 */ 239 int 240 priv_is_ib_cntr(const char *cntr) 241 { 242 if (!strcmp(cntr, "out_of_buffer")) 243 return 1; 244 return 0; 245 } 246 247 /** 248 * Read from sysfs entry. 249 * 250 * @param[in] priv 251 * Pointer to private structure. 252 * @param[in] entry 253 * Entry name relative to sysfs path. 254 * @param[out] buf 255 * Data output buffer. 256 * @param size 257 * Buffer size. 258 * 259 * @return 260 * 0 on success, -1 on failure and errno is set. 261 */ 262 static int 263 priv_sysfs_read(const struct priv *priv, const char *entry, 264 char *buf, size_t size) 265 { 266 char ifname[IF_NAMESIZE]; 267 FILE *file; 268 int ret; 269 int err; 270 271 if (priv_get_ifname(priv, &ifname)) 272 return -1; 273 274 if (priv_is_ib_cntr(entry)) { 275 MKSTR(path, "%s/ports/1/hw_counters/%s", 276 priv->ctx->device->ibdev_path, entry); 277 file = fopen(path, "rb"); 278 } else { 279 MKSTR(path, "%s/device/net/%s/%s", 280 priv->ctx->device->ibdev_path, ifname, entry); 281 file = fopen(path, "rb"); 282 } 283 if (file == NULL) 284 return -1; 285 ret = fread(buf, 1, size, file); 286 err = errno; 287 if (((size_t)ret < size) && (ferror(file))) 288 ret = -1; 289 else 290 ret = size; 291 fclose(file); 292 errno = err; 293 return ret; 294 } 295 296 /** 297 * Write to sysfs entry. 298 * 299 * @param[in] priv 300 * Pointer to private structure. 301 * @param[in] entry 302 * Entry name relative to sysfs path. 303 * @param[in] buf 304 * Data buffer. 305 * @param size 306 * Buffer size. 307 * 308 * @return 309 * 0 on success, -1 on failure and errno is set. 310 */ 311 static int 312 priv_sysfs_write(const struct priv *priv, const char *entry, 313 char *buf, size_t size) 314 { 315 char ifname[IF_NAMESIZE]; 316 FILE *file; 317 int ret; 318 int err; 319 320 if (priv_get_ifname(priv, &ifname)) 321 return -1; 322 323 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 324 ifname, entry); 325 326 file = fopen(path, "wb"); 327 if (file == NULL) 328 return -1; 329 ret = fwrite(buf, 1, size, file); 330 err = errno; 331 if (((size_t)ret < size) || (ferror(file))) 332 ret = -1; 333 else 334 ret = size; 335 fclose(file); 336 errno = err; 337 return ret; 338 } 339 340 /** 341 * Get unsigned long sysfs property. 342 * 343 * @param priv 344 * Pointer to private structure. 345 * @param[in] name 346 * Entry name relative to sysfs path. 347 * @param[out] value 348 * Value output buffer. 349 * 350 * @return 351 * 0 on success, -1 on failure and errno is set. 352 */ 353 static int 354 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 355 { 356 int ret; 357 unsigned long value_ret; 358 char value_str[32]; 359 360 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 361 if (ret == -1) { 362 DEBUG("cannot read %s value from sysfs: %s", 363 name, strerror(errno)); 364 return -1; 365 } 366 value_str[ret] = '\0'; 367 errno = 0; 368 value_ret = strtoul(value_str, NULL, 0); 369 if (errno) { 370 DEBUG("invalid %s value `%s': %s", name, value_str, 371 strerror(errno)); 372 return -1; 373 } 374 *value = value_ret; 375 return 0; 376 } 377 378 /** 379 * Set unsigned long sysfs property. 380 * 381 * @param priv 382 * Pointer to private structure. 383 * @param[in] name 384 * Entry name relative to sysfs path. 385 * @param value 386 * Value to set. 387 * 388 * @return 389 * 0 on success, -1 on failure and errno is set. 390 */ 391 static int 392 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 393 { 394 int ret; 395 MKSTR(value_str, "%lu", value); 396 397 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 398 if (ret == -1) { 399 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 400 name, value_str, value, strerror(errno)); 401 return -1; 402 } 403 return 0; 404 } 405 406 /** 407 * Perform ifreq ioctl() on associated Ethernet device. 408 * 409 * @param[in] priv 410 * Pointer to private structure. 411 * @param req 412 * Request number to pass to ioctl(). 413 * @param[out] ifr 414 * Interface request structure output buffer. 415 * 416 * @return 417 * 0 on success, -1 on failure and errno is set. 418 */ 419 int 420 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 421 { 422 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 423 int ret = -1; 424 425 if (sock == -1) 426 return ret; 427 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 428 ret = ioctl(sock, req, ifr); 429 close(sock); 430 return ret; 431 } 432 433 /** 434 * Return the number of active VFs for the current device. 435 * 436 * @param[in] priv 437 * Pointer to private structure. 438 * @param[out] num_vfs 439 * Number of active VFs. 440 * 441 * @return 442 * 0 on success, -1 on failure and errno is set. 443 */ 444 int 445 priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs) 446 { 447 /* The sysfs entry name depends on the operating system. */ 448 const char **name = (const char *[]){ 449 "device/sriov_numvfs", 450 "device/mlx5_num_vfs", 451 NULL, 452 }; 453 int ret; 454 455 do { 456 unsigned long ulong_num_vfs; 457 458 ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs); 459 if (!ret) 460 *num_vfs = ulong_num_vfs; 461 } while (*(++name) && ret); 462 return ret; 463 } 464 465 /** 466 * Get device MTU. 467 * 468 * @param priv 469 * Pointer to private structure. 470 * @param[out] mtu 471 * MTU value output buffer. 472 * 473 * @return 474 * 0 on success, -1 on failure and errno is set. 475 */ 476 int 477 priv_get_mtu(struct priv *priv, uint16_t *mtu) 478 { 479 unsigned long ulong_mtu; 480 481 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 482 return -1; 483 *mtu = ulong_mtu; 484 return 0; 485 } 486 487 /** 488 * Read device counter from sysfs. 489 * 490 * @param priv 491 * Pointer to private structure. 492 * @param name 493 * Counter name. 494 * @param[out] cntr 495 * Counter output buffer. 496 * 497 * @return 498 * 0 on success, -1 on failure and errno is set. 499 */ 500 int 501 priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) 502 { 503 unsigned long ulong_ctr; 504 505 if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1) 506 return -1; 507 *cntr = ulong_ctr; 508 return 0; 509 } 510 511 /** 512 * Set device MTU. 513 * 514 * @param priv 515 * Pointer to private structure. 516 * @param mtu 517 * MTU value to set. 518 * 519 * @return 520 * 0 on success, -1 on failure and errno is set. 521 */ 522 static int 523 priv_set_mtu(struct priv *priv, uint16_t mtu) 524 { 525 uint16_t new_mtu; 526 527 if (priv_set_sysfs_ulong(priv, "mtu", mtu) || 528 priv_get_mtu(priv, &new_mtu)) 529 return -1; 530 if (new_mtu == mtu) 531 return 0; 532 errno = EINVAL; 533 return -1; 534 } 535 536 /** 537 * Set device flags. 538 * 539 * @param priv 540 * Pointer to private structure. 541 * @param keep 542 * Bitmask for flags that must remain untouched. 543 * @param flags 544 * Bitmask for flags to modify. 545 * 546 * @return 547 * 0 on success, -1 on failure and errno is set. 548 */ 549 int 550 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 551 { 552 unsigned long tmp; 553 554 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 555 return -1; 556 tmp &= keep; 557 tmp |= (flags & (~keep)); 558 return priv_set_sysfs_ulong(priv, "flags", tmp); 559 } 560 561 /** 562 * Ethernet device configuration. 563 * 564 * Prepare the driver for a given number of TX and RX queues. 565 * 566 * @param dev 567 * Pointer to Ethernet device structure. 568 * 569 * @return 570 * 0 on success, errno value on failure. 571 */ 572 static int 573 dev_configure(struct rte_eth_dev *dev) 574 { 575 struct priv *priv = dev->data->dev_private; 576 unsigned int rxqs_n = dev->data->nb_rx_queues; 577 unsigned int txqs_n = dev->data->nb_tx_queues; 578 unsigned int i; 579 unsigned int j; 580 unsigned int reta_idx_n; 581 582 priv->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 583 priv->rxqs = (void *)dev->data->rx_queues; 584 priv->txqs = (void *)dev->data->tx_queues; 585 if (txqs_n != priv->txqs_n) { 586 INFO("%p: TX queues number update: %u -> %u", 587 (void *)dev, priv->txqs_n, txqs_n); 588 priv->txqs_n = txqs_n; 589 } 590 if (rxqs_n > priv->ind_table_max_size) { 591 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 592 return EINVAL; 593 } 594 if (rxqs_n == priv->rxqs_n) 595 return 0; 596 INFO("%p: RX queues number update: %u -> %u", 597 (void *)dev, priv->rxqs_n, rxqs_n); 598 priv->rxqs_n = rxqs_n; 599 /* If the requested number of RX queues is not a power of two, use the 600 * maximum indirection table size for better balancing. 601 * The result is always rounded to the next power of two. */ 602 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 603 priv->ind_table_max_size : 604 rxqs_n)); 605 if (priv_rss_reta_index_resize(priv, reta_idx_n)) 606 return ENOMEM; 607 /* When the number of RX queues is not a power of two, the remaining 608 * table entries are padded with reused WQs and hashes are not spread 609 * uniformly. */ 610 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 611 (*priv->reta_idx)[i] = j; 612 if (++j == rxqs_n) 613 j = 0; 614 } 615 return 0; 616 } 617 618 /** 619 * DPDK callback for Ethernet device configuration. 620 * 621 * @param dev 622 * Pointer to Ethernet device structure. 623 * 624 * @return 625 * 0 on success, negative errno value on failure. 626 */ 627 int 628 mlx5_dev_configure(struct rte_eth_dev *dev) 629 { 630 struct priv *priv = dev->data->dev_private; 631 int ret; 632 633 if (mlx5_is_secondary()) 634 return -E_RTE_SECONDARY; 635 636 priv_lock(priv); 637 ret = dev_configure(dev); 638 assert(ret >= 0); 639 priv_unlock(priv); 640 return -ret; 641 } 642 643 /** 644 * DPDK callback to get information about the device. 645 * 646 * @param dev 647 * Pointer to Ethernet device structure. 648 * @param[out] info 649 * Info structure output buffer. 650 */ 651 void 652 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 653 { 654 struct priv *priv = mlx5_get_priv(dev); 655 unsigned int max; 656 char ifname[IF_NAMESIZE]; 657 658 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 659 660 priv_lock(priv); 661 /* FIXME: we should ask the device for these values. */ 662 info->min_rx_bufsize = 32; 663 info->max_rx_pktlen = 65536; 664 /* 665 * Since we need one CQ per QP, the limit is the minimum number 666 * between the two values. 667 */ 668 max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ? 669 priv->device_attr.max_qp : priv->device_attr.max_cq); 670 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 671 if (max >= 65535) 672 max = 65535; 673 info->max_rx_queues = max; 674 info->max_tx_queues = max; 675 info->max_mac_addrs = RTE_DIM(priv->mac); 676 info->rx_offload_capa = 677 (priv->hw_csum ? 678 (DEV_RX_OFFLOAD_IPV4_CKSUM | 679 DEV_RX_OFFLOAD_UDP_CKSUM | 680 DEV_RX_OFFLOAD_TCP_CKSUM) : 681 0) | 682 (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0); 683 if (!priv->mps) 684 info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; 685 if (priv->hw_csum) 686 info->tx_offload_capa |= 687 (DEV_TX_OFFLOAD_IPV4_CKSUM | 688 DEV_TX_OFFLOAD_UDP_CKSUM | 689 DEV_TX_OFFLOAD_TCP_CKSUM); 690 if (priv->tso) 691 info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; 692 if (priv->tunnel_en) 693 info->tx_offload_capa |= (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | 694 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 695 DEV_TX_OFFLOAD_GRE_TNL_TSO); 696 if (priv_get_ifname(priv, &ifname) == 0) 697 info->if_index = if_nametoindex(ifname); 698 info->reta_size = priv->reta_idx_n ? 699 priv->reta_idx_n : priv->ind_table_max_size; 700 info->hash_key_size = ((*priv->rss_conf) ? 701 (*priv->rss_conf)[0]->rss_key_len : 702 0); 703 info->speed_capa = priv->link_speed_capa; 704 priv_unlock(priv); 705 } 706 707 const uint32_t * 708 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 709 { 710 static const uint32_t ptypes[] = { 711 /* refers to rxq_cq_to_pkt_type() */ 712 RTE_PTYPE_L2_ETHER, 713 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 714 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 715 RTE_PTYPE_L4_NONFRAG, 716 RTE_PTYPE_L4_FRAG, 717 RTE_PTYPE_L4_TCP, 718 RTE_PTYPE_L4_UDP, 719 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 720 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 721 RTE_PTYPE_INNER_L4_NONFRAG, 722 RTE_PTYPE_INNER_L4_FRAG, 723 RTE_PTYPE_INNER_L4_TCP, 724 RTE_PTYPE_INNER_L4_UDP, 725 RTE_PTYPE_UNKNOWN 726 }; 727 728 if (dev->rx_pkt_burst == mlx5_rx_burst || 729 dev->rx_pkt_burst == mlx5_rx_burst_vec) 730 return ptypes; 731 return NULL; 732 } 733 734 /** 735 * DPDK callback to retrieve physical link information. 736 * 737 * @param dev 738 * Pointer to Ethernet device structure. 739 * @param wait_to_complete 740 * Wait for request completion (ignored). 741 */ 742 static int 743 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) 744 { 745 struct priv *priv = mlx5_get_priv(dev); 746 struct ethtool_cmd edata = { 747 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 748 }; 749 struct ifreq ifr; 750 struct rte_eth_link dev_link; 751 int link_speed = 0; 752 753 /* priv_lock() is not taken to allow concurrent calls. */ 754 755 (void)wait_to_complete; 756 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 757 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 758 return -1; 759 } 760 memset(&dev_link, 0, sizeof(dev_link)); 761 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 762 (ifr.ifr_flags & IFF_RUNNING)); 763 ifr.ifr_data = (void *)&edata; 764 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 765 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 766 strerror(errno)); 767 return -1; 768 } 769 link_speed = ethtool_cmd_speed(&edata); 770 if (link_speed == -1) 771 dev_link.link_speed = 0; 772 else 773 dev_link.link_speed = link_speed; 774 priv->link_speed_capa = 0; 775 if (edata.supported & SUPPORTED_Autoneg) 776 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 777 if (edata.supported & (SUPPORTED_1000baseT_Full | 778 SUPPORTED_1000baseKX_Full)) 779 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 780 if (edata.supported & SUPPORTED_10000baseKR_Full) 781 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 782 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 783 SUPPORTED_40000baseCR4_Full | 784 SUPPORTED_40000baseSR4_Full | 785 SUPPORTED_40000baseLR4_Full)) 786 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 787 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 788 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 789 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 790 ETH_LINK_SPEED_FIXED); 791 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 792 /* Link status changed. */ 793 dev->data->dev_link = dev_link; 794 return 0; 795 } 796 /* Link status is still the same. */ 797 return -1; 798 } 799 800 /** 801 * Retrieve physical link information (unlocked version using new ioctl). 802 * 803 * @param dev 804 * Pointer to Ethernet device structure. 805 * @param wait_to_complete 806 * Wait for request completion (ignored). 807 */ 808 static int 809 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) 810 { 811 struct priv *priv = mlx5_get_priv(dev); 812 __extension__ struct { 813 struct ethtool_link_settings edata; 814 uint32_t link_mode_data[3 * 815 ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32]; 816 } ecmd; 817 818 struct ifreq ifr; 819 struct rte_eth_link dev_link; 820 uint64_t sc; 821 822 (void)wait_to_complete; 823 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 824 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 825 return -1; 826 } 827 memset(&dev_link, 0, sizeof(dev_link)); 828 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 829 (ifr.ifr_flags & IFF_RUNNING)); 830 memset(&ecmd, 0, sizeof(ecmd)); 831 ecmd.edata.cmd = ETHTOOL_GLINKSETTINGS; 832 ifr.ifr_data = (void *)&ecmd; 833 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 834 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 835 strerror(errno)); 836 return -1; 837 } 838 ecmd.edata.link_mode_masks_nwords = -ecmd.edata.link_mode_masks_nwords; 839 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 840 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 841 strerror(errno)); 842 return -1; 843 } 844 dev_link.link_speed = ecmd.edata.speed; 845 sc = ecmd.edata.link_mode_masks[0] | 846 ((uint64_t)ecmd.edata.link_mode_masks[1] << 32); 847 priv->link_speed_capa = 0; 848 if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT) 849 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 850 if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT | 851 ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)) 852 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 853 if (sc & (ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT | 854 ETHTOOL_LINK_MODE_10000baseKR_Full_BIT | 855 ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)) 856 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 857 if (sc & (ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT | 858 ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)) 859 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 860 if (sc & (ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT | 861 ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT | 862 ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT | 863 ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)) 864 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 865 if (sc & (ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT | 866 ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT | 867 ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT | 868 ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)) 869 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 870 if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT | 871 ETHTOOL_LINK_MODE_25000baseKR_Full_BIT | 872 ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)) 873 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 874 if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT | 875 ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)) 876 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 877 if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT | 878 ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT | 879 ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT | 880 ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)) 881 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 882 dev_link.link_duplex = ((ecmd.edata.duplex == DUPLEX_HALF) ? 883 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 884 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 885 ETH_LINK_SPEED_FIXED); 886 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 887 /* Link status changed. */ 888 dev->data->dev_link = dev_link; 889 return 0; 890 } 891 /* Link status is still the same. */ 892 return -1; 893 } 894 895 /** 896 * DPDK callback to retrieve physical link information. 897 * 898 * @param dev 899 * Pointer to Ethernet device structure. 900 * @param wait_to_complete 901 * Wait for request completion (ignored). 902 */ 903 int 904 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 905 { 906 struct utsname utsname; 907 int ver[3]; 908 909 if (uname(&utsname) == -1 || 910 sscanf(utsname.release, "%d.%d.%d", 911 &ver[0], &ver[1], &ver[2]) != 3 || 912 KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) 913 return mlx5_link_update_unlocked_gset(dev, wait_to_complete); 914 return mlx5_link_update_unlocked_gs(dev, wait_to_complete); 915 } 916 917 /** 918 * DPDK callback to change the MTU. 919 * 920 * @param dev 921 * Pointer to Ethernet device structure. 922 * @param in_mtu 923 * New MTU. 924 * 925 * @return 926 * 0 on success, negative errno value on failure. 927 */ 928 int 929 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 930 { 931 struct priv *priv = dev->data->dev_private; 932 uint16_t kern_mtu; 933 int ret = 0; 934 935 if (mlx5_is_secondary()) 936 return -E_RTE_SECONDARY; 937 938 priv_lock(priv); 939 ret = priv_get_mtu(priv, &kern_mtu); 940 if (ret) 941 goto out; 942 /* Set kernel interface MTU first. */ 943 ret = priv_set_mtu(priv, mtu); 944 if (ret) 945 goto out; 946 ret = priv_get_mtu(priv, &kern_mtu); 947 if (ret) 948 goto out; 949 if (kern_mtu == mtu) { 950 priv->mtu = mtu; 951 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 952 } 953 priv_unlock(priv); 954 return 0; 955 out: 956 ret = errno; 957 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 958 strerror(ret)); 959 priv_unlock(priv); 960 assert(ret >= 0); 961 return -ret; 962 } 963 964 /** 965 * DPDK callback to get flow control status. 966 * 967 * @param dev 968 * Pointer to Ethernet device structure. 969 * @param[out] fc_conf 970 * Flow control output buffer. 971 * 972 * @return 973 * 0 on success, negative errno value on failure. 974 */ 975 int 976 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 977 { 978 struct priv *priv = dev->data->dev_private; 979 struct ifreq ifr; 980 struct ethtool_pauseparam ethpause = { 981 .cmd = ETHTOOL_GPAUSEPARAM 982 }; 983 int ret; 984 985 if (mlx5_is_secondary()) 986 return -E_RTE_SECONDARY; 987 988 ifr.ifr_data = (void *)ðpause; 989 priv_lock(priv); 990 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 991 ret = errno; 992 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 993 " failed: %s", 994 strerror(ret)); 995 goto out; 996 } 997 998 fc_conf->autoneg = ethpause.autoneg; 999 if (ethpause.rx_pause && ethpause.tx_pause) 1000 fc_conf->mode = RTE_FC_FULL; 1001 else if (ethpause.rx_pause) 1002 fc_conf->mode = RTE_FC_RX_PAUSE; 1003 else if (ethpause.tx_pause) 1004 fc_conf->mode = RTE_FC_TX_PAUSE; 1005 else 1006 fc_conf->mode = RTE_FC_NONE; 1007 ret = 0; 1008 1009 out: 1010 priv_unlock(priv); 1011 assert(ret >= 0); 1012 return -ret; 1013 } 1014 1015 /** 1016 * DPDK callback to modify flow control parameters. 1017 * 1018 * @param dev 1019 * Pointer to Ethernet device structure. 1020 * @param[in] fc_conf 1021 * Flow control parameters. 1022 * 1023 * @return 1024 * 0 on success, negative errno value on failure. 1025 */ 1026 int 1027 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1028 { 1029 struct priv *priv = dev->data->dev_private; 1030 struct ifreq ifr; 1031 struct ethtool_pauseparam ethpause = { 1032 .cmd = ETHTOOL_SPAUSEPARAM 1033 }; 1034 int ret; 1035 1036 if (mlx5_is_secondary()) 1037 return -E_RTE_SECONDARY; 1038 1039 ifr.ifr_data = (void *)ðpause; 1040 ethpause.autoneg = fc_conf->autoneg; 1041 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1042 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1043 ethpause.rx_pause = 1; 1044 else 1045 ethpause.rx_pause = 0; 1046 1047 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1048 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1049 ethpause.tx_pause = 1; 1050 else 1051 ethpause.tx_pause = 0; 1052 1053 priv_lock(priv); 1054 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 1055 ret = errno; 1056 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1057 " failed: %s", 1058 strerror(ret)); 1059 goto out; 1060 } 1061 ret = 0; 1062 1063 out: 1064 priv_unlock(priv); 1065 assert(ret >= 0); 1066 return -ret; 1067 } 1068 1069 /** 1070 * Get PCI information from struct ibv_device. 1071 * 1072 * @param device 1073 * Pointer to Ethernet device structure. 1074 * @param[out] pci_addr 1075 * PCI bus address output buffer. 1076 * 1077 * @return 1078 * 0 on success, -1 on failure and errno is set. 1079 */ 1080 int 1081 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 1082 struct rte_pci_addr *pci_addr) 1083 { 1084 FILE *file; 1085 char line[32]; 1086 MKSTR(path, "%s/device/uevent", device->ibdev_path); 1087 1088 file = fopen(path, "rb"); 1089 if (file == NULL) 1090 return -1; 1091 while (fgets(line, sizeof(line), file) == line) { 1092 size_t len = strlen(line); 1093 int ret; 1094 1095 /* Truncate long lines. */ 1096 if (len == (sizeof(line) - 1)) 1097 while (line[(len - 1)] != '\n') { 1098 ret = fgetc(file); 1099 if (ret == EOF) 1100 break; 1101 line[(len - 1)] = ret; 1102 } 1103 /* Extract information. */ 1104 if (sscanf(line, 1105 "PCI_SLOT_NAME=" 1106 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1107 &pci_addr->domain, 1108 &pci_addr->bus, 1109 &pci_addr->devid, 1110 &pci_addr->function) == 4) { 1111 ret = 0; 1112 break; 1113 } 1114 } 1115 fclose(file); 1116 return 0; 1117 } 1118 1119 /** 1120 * Link status handler. 1121 * 1122 * @param priv 1123 * Pointer to private structure. 1124 * @param dev 1125 * Pointer to the rte_eth_dev structure. 1126 * 1127 * @return 1128 * Nonzero if the callback process can be called immediately. 1129 */ 1130 static int 1131 priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) 1132 { 1133 struct ibv_async_event event; 1134 struct rte_eth_link *link = &dev->data->dev_link; 1135 int ret = 0; 1136 1137 /* Read all message and acknowledge them. */ 1138 for (;;) { 1139 if (ibv_get_async_event(priv->ctx, &event)) 1140 break; 1141 1142 if (event.event_type != IBV_EVENT_PORT_ACTIVE && 1143 event.event_type != IBV_EVENT_PORT_ERR) 1144 DEBUG("event type %d on port %d not handled", 1145 event.event_type, event.element.port_num); 1146 ibv_ack_async_event(&event); 1147 } 1148 mlx5_link_update(dev, 0); 1149 if (((link->link_speed == 0) && link->link_status) || 1150 ((link->link_speed != 0) && !link->link_status)) { 1151 if (!priv->pending_alarm) { 1152 /* Inconsistent status, check again later. */ 1153 priv->pending_alarm = 1; 1154 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 1155 mlx5_dev_link_status_handler, 1156 dev); 1157 } 1158 } else { 1159 ret = 1; 1160 } 1161 return ret; 1162 } 1163 1164 /** 1165 * Handle delayed link status event. 1166 * 1167 * @param arg 1168 * Registered argument. 1169 */ 1170 void 1171 mlx5_dev_link_status_handler(void *arg) 1172 { 1173 struct rte_eth_dev *dev = arg; 1174 struct priv *priv = dev->data->dev_private; 1175 int ret; 1176 1177 priv_lock(priv); 1178 assert(priv->pending_alarm == 1); 1179 priv->pending_alarm = 0; 1180 ret = priv_dev_link_status_handler(priv, dev); 1181 priv_unlock(priv); 1182 if (ret) 1183 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, 1184 NULL); 1185 } 1186 1187 /** 1188 * Handle interrupts from the NIC. 1189 * 1190 * @param[in] intr_handle 1191 * Interrupt handler. 1192 * @param cb_arg 1193 * Callback argument. 1194 */ 1195 void 1196 mlx5_dev_interrupt_handler(void *cb_arg) 1197 { 1198 struct rte_eth_dev *dev = cb_arg; 1199 struct priv *priv = dev->data->dev_private; 1200 int ret; 1201 1202 priv_lock(priv); 1203 ret = priv_dev_link_status_handler(priv, dev); 1204 priv_unlock(priv); 1205 if (ret) 1206 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, 1207 NULL); 1208 } 1209 1210 /** 1211 * Uninstall interrupt handler. 1212 * 1213 * @param priv 1214 * Pointer to private structure. 1215 * @param dev 1216 * Pointer to the rte_eth_dev structure. 1217 */ 1218 void 1219 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 1220 { 1221 if (!dev->data->dev_conf.intr_conf.lsc) 1222 return; 1223 rte_intr_callback_unregister(&priv->intr_handle, 1224 mlx5_dev_interrupt_handler, 1225 dev); 1226 if (priv->pending_alarm) 1227 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 1228 priv->pending_alarm = 0; 1229 priv->intr_handle.fd = 0; 1230 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1231 } 1232 1233 /** 1234 * Install interrupt handler. 1235 * 1236 * @param priv 1237 * Pointer to private structure. 1238 * @param dev 1239 * Pointer to the rte_eth_dev structure. 1240 */ 1241 void 1242 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 1243 { 1244 int rc, flags; 1245 1246 if (!dev->data->dev_conf.intr_conf.lsc) 1247 return; 1248 assert(priv->ctx->async_fd > 0); 1249 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1250 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1251 if (rc < 0) { 1252 INFO("failed to change file descriptor async event queue"); 1253 dev->data->dev_conf.intr_conf.lsc = 0; 1254 } else { 1255 priv->intr_handle.fd = priv->ctx->async_fd; 1256 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1257 rte_intr_callback_register(&priv->intr_handle, 1258 mlx5_dev_interrupt_handler, 1259 dev); 1260 } 1261 } 1262 1263 /** 1264 * Change the link state (UP / DOWN). 1265 * 1266 * @param priv 1267 * Pointer to Ethernet device structure. 1268 * @param up 1269 * Nonzero for link up, otherwise link down. 1270 * 1271 * @return 1272 * 0 on success, errno value on failure. 1273 */ 1274 static int 1275 priv_set_link(struct priv *priv, int up) 1276 { 1277 struct rte_eth_dev *dev = priv->dev; 1278 int err; 1279 1280 if (up) { 1281 err = priv_set_flags(priv, ~IFF_UP, IFF_UP); 1282 if (err) 1283 return err; 1284 priv_select_tx_function(priv); 1285 priv_select_rx_function(priv); 1286 } else { 1287 err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP); 1288 if (err) 1289 return err; 1290 dev->rx_pkt_burst = removed_rx_burst; 1291 dev->tx_pkt_burst = removed_tx_burst; 1292 } 1293 return 0; 1294 } 1295 1296 /** 1297 * DPDK callback to bring the link DOWN. 1298 * 1299 * @param dev 1300 * Pointer to Ethernet device structure. 1301 * 1302 * @return 1303 * 0 on success, errno value on failure. 1304 */ 1305 int 1306 mlx5_set_link_down(struct rte_eth_dev *dev) 1307 { 1308 struct priv *priv = dev->data->dev_private; 1309 int err; 1310 1311 priv_lock(priv); 1312 err = priv_set_link(priv, 0); 1313 priv_unlock(priv); 1314 return err; 1315 } 1316 1317 /** 1318 * DPDK callback to bring the link UP. 1319 * 1320 * @param dev 1321 * Pointer to Ethernet device structure. 1322 * 1323 * @return 1324 * 0 on success, errno value on failure. 1325 */ 1326 int 1327 mlx5_set_link_up(struct rte_eth_dev *dev) 1328 { 1329 struct priv *priv = dev->data->dev_private; 1330 int err; 1331 1332 priv_lock(priv); 1333 err = priv_set_link(priv, 1); 1334 priv_unlock(priv); 1335 return err; 1336 } 1337 1338 /** 1339 * Configure secondary process queues from a private data pointer (primary 1340 * or secondary) and update burst callbacks. Can take place only once. 1341 * 1342 * All queues must have been previously created by the primary process to 1343 * avoid undefined behavior. 1344 * 1345 * @param priv 1346 * Private data pointer from either primary or secondary process. 1347 * 1348 * @return 1349 * Private data pointer from secondary process, NULL in case of error. 1350 */ 1351 struct priv * 1352 mlx5_secondary_data_setup(struct priv *priv) 1353 { 1354 unsigned int port_id = 0; 1355 struct mlx5_secondary_data *sd; 1356 void **tx_queues; 1357 void **rx_queues; 1358 unsigned int nb_tx_queues; 1359 unsigned int nb_rx_queues; 1360 unsigned int i; 1361 1362 /* priv must be valid at this point. */ 1363 assert(priv != NULL); 1364 /* priv->dev must also be valid but may point to local memory from 1365 * another process, possibly with the same address and must not 1366 * be dereferenced yet. */ 1367 assert(priv->dev != NULL); 1368 /* Determine port ID by finding out where priv comes from. */ 1369 while (1) { 1370 sd = &mlx5_secondary_data[port_id]; 1371 rte_spinlock_lock(&sd->lock); 1372 /* Primary process? */ 1373 if (sd->primary_priv == priv) 1374 break; 1375 /* Secondary process? */ 1376 if (sd->data.dev_private == priv) 1377 break; 1378 rte_spinlock_unlock(&sd->lock); 1379 if (++port_id == RTE_DIM(mlx5_secondary_data)) 1380 port_id = 0; 1381 } 1382 /* Switch to secondary private structure. If private data has already 1383 * been updated by another thread, there is nothing else to do. */ 1384 priv = sd->data.dev_private; 1385 if (priv->dev->data == &sd->data) 1386 goto end; 1387 /* Sanity checks. Secondary private structure is supposed to point 1388 * to local eth_dev, itself still pointing to the shared device data 1389 * structure allocated by the primary process. */ 1390 assert(sd->shared_dev_data != &sd->data); 1391 assert(sd->data.nb_tx_queues == 0); 1392 assert(sd->data.tx_queues == NULL); 1393 assert(sd->data.nb_rx_queues == 0); 1394 assert(sd->data.rx_queues == NULL); 1395 assert(priv != sd->primary_priv); 1396 assert(priv->dev->data == sd->shared_dev_data); 1397 assert(priv->txqs_n == 0); 1398 assert(priv->txqs == NULL); 1399 assert(priv->rxqs_n == 0); 1400 assert(priv->rxqs == NULL); 1401 nb_tx_queues = sd->shared_dev_data->nb_tx_queues; 1402 nb_rx_queues = sd->shared_dev_data->nb_rx_queues; 1403 /* Allocate local storage for queues. */ 1404 tx_queues = rte_zmalloc("secondary ethdev->tx_queues", 1405 sizeof(sd->data.tx_queues[0]) * nb_tx_queues, 1406 RTE_CACHE_LINE_SIZE); 1407 rx_queues = rte_zmalloc("secondary ethdev->rx_queues", 1408 sizeof(sd->data.rx_queues[0]) * nb_rx_queues, 1409 RTE_CACHE_LINE_SIZE); 1410 if (tx_queues == NULL || rx_queues == NULL) 1411 goto error; 1412 /* Lock to prevent control operations during setup. */ 1413 priv_lock(priv); 1414 /* TX queues. */ 1415 for (i = 0; i != nb_tx_queues; ++i) { 1416 struct txq *primary_txq = (*sd->primary_priv->txqs)[i]; 1417 struct txq_ctrl *primary_txq_ctrl; 1418 struct txq_ctrl *txq_ctrl; 1419 1420 if (primary_txq == NULL) 1421 continue; 1422 primary_txq_ctrl = container_of(primary_txq, 1423 struct txq_ctrl, txq); 1424 txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl) + 1425 (1 << primary_txq->elts_n) * 1426 sizeof(struct rte_mbuf *), 0, 1427 primary_txq_ctrl->socket); 1428 if (txq_ctrl != NULL) { 1429 if (txq_ctrl_setup(priv->dev, 1430 txq_ctrl, 1431 1 << primary_txq->elts_n, 1432 primary_txq_ctrl->socket, 1433 NULL) == 0) { 1434 txq_ctrl->txq.stats.idx = 1435 primary_txq->stats.idx; 1436 tx_queues[i] = &txq_ctrl->txq; 1437 continue; 1438 } 1439 rte_free(txq_ctrl); 1440 } 1441 while (i) { 1442 txq_ctrl = tx_queues[--i]; 1443 txq_cleanup(txq_ctrl); 1444 rte_free(txq_ctrl); 1445 } 1446 goto error; 1447 } 1448 /* RX queues. */ 1449 for (i = 0; i != nb_rx_queues; ++i) { 1450 struct rxq_ctrl *primary_rxq = 1451 container_of((*sd->primary_priv->rxqs)[i], 1452 struct rxq_ctrl, rxq); 1453 1454 if (primary_rxq == NULL) 1455 continue; 1456 /* Not supported yet. */ 1457 rx_queues[i] = NULL; 1458 } 1459 /* Update everything. */ 1460 priv->txqs = (void *)tx_queues; 1461 priv->txqs_n = nb_tx_queues; 1462 priv->rxqs = (void *)rx_queues; 1463 priv->rxqs_n = nb_rx_queues; 1464 sd->data.rx_queues = rx_queues; 1465 sd->data.tx_queues = tx_queues; 1466 sd->data.nb_rx_queues = nb_rx_queues; 1467 sd->data.nb_tx_queues = nb_tx_queues; 1468 sd->data.dev_link = sd->shared_dev_data->dev_link; 1469 sd->data.mtu = sd->shared_dev_data->mtu; 1470 memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state, 1471 sizeof(sd->data.rx_queue_state)); 1472 memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state, 1473 sizeof(sd->data.tx_queue_state)); 1474 sd->data.dev_flags = sd->shared_dev_data->dev_flags; 1475 /* Use local data from now on. */ 1476 rte_mb(); 1477 priv->dev->data = &sd->data; 1478 rte_mb(); 1479 priv_select_tx_function(priv); 1480 priv_select_rx_function(priv); 1481 priv_unlock(priv); 1482 end: 1483 /* More sanity checks. */ 1484 assert(priv->dev->data == &sd->data); 1485 rte_spinlock_unlock(&sd->lock); 1486 return priv; 1487 error: 1488 priv_unlock(priv); 1489 rte_free(tx_queues); 1490 rte_free(rx_queues); 1491 rte_spinlock_unlock(&sd->lock); 1492 return NULL; 1493 } 1494 1495 /** 1496 * Configure the TX function to use. 1497 * 1498 * @param priv 1499 * Pointer to private structure. 1500 */ 1501 void 1502 priv_select_tx_function(struct priv *priv) 1503 { 1504 priv->dev->tx_pkt_burst = mlx5_tx_burst; 1505 /* Select appropriate TX function. */ 1506 if (priv->mps == MLX5_MPW_ENHANCED) { 1507 if (priv_check_vec_tx_support(priv) > 0) { 1508 if (priv_check_raw_vec_tx_support(priv) > 0) 1509 priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec; 1510 else 1511 priv->dev->tx_pkt_burst = mlx5_tx_burst_vec; 1512 DEBUG("selected Enhanced MPW TX vectorized function"); 1513 } else { 1514 priv->dev->tx_pkt_burst = mlx5_tx_burst_empw; 1515 DEBUG("selected Enhanced MPW TX function"); 1516 } 1517 } else if (priv->mps && priv->txq_inline) { 1518 priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1519 DEBUG("selected MPW inline TX function"); 1520 } else if (priv->mps) { 1521 priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw; 1522 DEBUG("selected MPW TX function"); 1523 } 1524 } 1525 1526 /** 1527 * Configure the RX function to use. 1528 * 1529 * @param priv 1530 * Pointer to private structure. 1531 */ 1532 void 1533 priv_select_rx_function(struct priv *priv) 1534 { 1535 if (priv_check_vec_rx_support(priv) > 0) { 1536 priv->dev->rx_pkt_burst = mlx5_rx_burst_vec; 1537 DEBUG("selected RX vectorized function"); 1538 } else { 1539 priv->dev->rx_pkt_burst = mlx5_rx_burst; 1540 } 1541 } 1542