1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <unistd.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <dirent.h> 43 #include <net/if.h> 44 #include <sys/ioctl.h> 45 #include <sys/socket.h> 46 #include <sys/utsname.h> 47 #include <netinet/in.h> 48 #include <linux/ethtool.h> 49 #include <linux/sockios.h> 50 #include <linux/version.h> 51 #include <fcntl.h> 52 53 /* DPDK headers don't like -pedantic. */ 54 #ifdef PEDANTIC 55 #pragma GCC diagnostic ignored "-Wpedantic" 56 #endif 57 #include <rte_atomic.h> 58 #include <rte_ethdev.h> 59 #include <rte_mbuf.h> 60 #include <rte_common.h> 61 #include <rte_interrupts.h> 62 #include <rte_alarm.h> 63 #include <rte_malloc.h> 64 #ifdef PEDANTIC 65 #pragma GCC diagnostic error "-Wpedantic" 66 #endif 67 68 #include "mlx5.h" 69 #include "mlx5_rxtx.h" 70 #include "mlx5_utils.h" 71 72 /* Add defines in case the running kernel is not the same as user headers. */ 73 #ifndef ETHTOOL_GLINKSETTINGS 74 struct ethtool_link_settings { 75 uint32_t cmd; 76 uint32_t speed; 77 uint8_t duplex; 78 uint8_t port; 79 uint8_t phy_address; 80 uint8_t autoneg; 81 uint8_t mdio_support; 82 uint8_t eth_to_mdix; 83 uint8_t eth_tp_mdix_ctrl; 84 int8_t link_mode_masks_nwords; 85 uint32_t reserved[8]; 86 uint32_t link_mode_masks[]; 87 }; 88 89 #define ETHTOOL_GLINKSETTINGS 0x0000004c 90 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 91 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 92 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 93 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 94 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 95 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 96 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 97 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 98 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 99 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 100 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 101 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 102 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 103 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 104 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 105 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 106 #endif 107 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 108 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 109 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 110 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 111 #endif 112 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 113 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 114 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 115 #endif 116 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 117 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 118 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 119 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 120 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 121 #endif 122 #define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX) 123 124 /** 125 * Return private structure associated with an Ethernet device. 126 * 127 * @param dev 128 * Pointer to Ethernet device structure. 129 * 130 * @return 131 * Pointer to private structure. 132 */ 133 struct priv * 134 mlx5_get_priv(struct rte_eth_dev *dev) 135 { 136 struct mlx5_secondary_data *sd; 137 138 if (!mlx5_is_secondary()) 139 return dev->data->dev_private; 140 sd = &mlx5_secondary_data[dev->data->port_id]; 141 return sd->data.dev_private; 142 } 143 144 /** 145 * Check if running as a secondary process. 146 * 147 * @return 148 * Nonzero if running as a secondary process. 149 */ 150 inline int 151 mlx5_is_secondary(void) 152 { 153 return rte_eal_process_type() != RTE_PROC_PRIMARY; 154 } 155 156 /** 157 * Get interface name from private structure. 158 * 159 * @param[in] priv 160 * Pointer to private structure. 161 * @param[out] ifname 162 * Interface name output buffer. 163 * 164 * @return 165 * 0 on success, -1 on failure and errno is set. 166 */ 167 int 168 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 169 { 170 DIR *dir; 171 struct dirent *dent; 172 unsigned int dev_type = 0; 173 unsigned int dev_port_prev = ~0u; 174 char match[IF_NAMESIZE] = ""; 175 176 { 177 MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path); 178 179 dir = opendir(path); 180 if (dir == NULL) 181 return -1; 182 } 183 while ((dent = readdir(dir)) != NULL) { 184 char *name = dent->d_name; 185 FILE *file; 186 unsigned int dev_port; 187 int r; 188 189 if ((name[0] == '.') && 190 ((name[1] == '\0') || 191 ((name[1] == '.') && (name[2] == '\0')))) 192 continue; 193 194 MKSTR(path, "%s/device/net/%s/%s", 195 priv->ctx->device->ibdev_path, name, 196 (dev_type ? "dev_id" : "dev_port")); 197 198 file = fopen(path, "rb"); 199 if (file == NULL) { 200 if (errno != ENOENT) 201 continue; 202 /* 203 * Switch to dev_id when dev_port does not exist as 204 * is the case with Linux kernel versions < 3.15. 205 */ 206 try_dev_id: 207 match[0] = '\0'; 208 if (dev_type) 209 break; 210 dev_type = 1; 211 dev_port_prev = ~0u; 212 rewinddir(dir); 213 continue; 214 } 215 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 216 fclose(file); 217 if (r != 1) 218 continue; 219 /* 220 * Switch to dev_id when dev_port returns the same value for 221 * all ports. May happen when using a MOFED release older than 222 * 3.0 with a Linux kernel >= 3.15. 223 */ 224 if (dev_port == dev_port_prev) 225 goto try_dev_id; 226 dev_port_prev = dev_port; 227 if (dev_port == (priv->port - 1u)) 228 snprintf(match, sizeof(match), "%s", name); 229 } 230 closedir(dir); 231 if (match[0] == '\0') 232 return -1; 233 strncpy(*ifname, match, sizeof(*ifname)); 234 return 0; 235 } 236 237 /** 238 * Check if the counter is located on ib counters file. 239 * 240 * @param[in] cntr 241 * Counter name. 242 * 243 * @return 244 * 1 if counter is located on ib counters file , 0 otherwise. 245 */ 246 int 247 priv_is_ib_cntr(const char *cntr) 248 { 249 if (!strcmp(cntr, "out_of_buffer")) 250 return 1; 251 return 0; 252 } 253 254 /** 255 * Read from sysfs entry. 256 * 257 * @param[in] priv 258 * Pointer to private structure. 259 * @param[in] entry 260 * Entry name relative to sysfs path. 261 * @param[out] buf 262 * Data output buffer. 263 * @param size 264 * Buffer size. 265 * 266 * @return 267 * 0 on success, -1 on failure and errno is set. 268 */ 269 static int 270 priv_sysfs_read(const struct priv *priv, const char *entry, 271 char *buf, size_t size) 272 { 273 char ifname[IF_NAMESIZE]; 274 FILE *file; 275 int ret; 276 int err; 277 278 if (priv_get_ifname(priv, &ifname)) 279 return -1; 280 281 if (priv_is_ib_cntr(entry)) { 282 MKSTR(path, "%s/ports/1/hw_counters/%s", 283 priv->ctx->device->ibdev_path, entry); 284 file = fopen(path, "rb"); 285 } else { 286 MKSTR(path, "%s/device/net/%s/%s", 287 priv->ctx->device->ibdev_path, ifname, entry); 288 file = fopen(path, "rb"); 289 } 290 if (file == NULL) 291 return -1; 292 ret = fread(buf, 1, size, file); 293 err = errno; 294 if (((size_t)ret < size) && (ferror(file))) 295 ret = -1; 296 else 297 ret = size; 298 fclose(file); 299 errno = err; 300 return ret; 301 } 302 303 /** 304 * Write to sysfs entry. 305 * 306 * @param[in] priv 307 * Pointer to private structure. 308 * @param[in] entry 309 * Entry name relative to sysfs path. 310 * @param[in] buf 311 * Data buffer. 312 * @param size 313 * Buffer size. 314 * 315 * @return 316 * 0 on success, -1 on failure and errno is set. 317 */ 318 static int 319 priv_sysfs_write(const struct priv *priv, const char *entry, 320 char *buf, size_t size) 321 { 322 char ifname[IF_NAMESIZE]; 323 FILE *file; 324 int ret; 325 int err; 326 327 if (priv_get_ifname(priv, &ifname)) 328 return -1; 329 330 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 331 ifname, entry); 332 333 file = fopen(path, "wb"); 334 if (file == NULL) 335 return -1; 336 ret = fwrite(buf, 1, size, file); 337 err = errno; 338 if (((size_t)ret < size) || (ferror(file))) 339 ret = -1; 340 else 341 ret = size; 342 fclose(file); 343 errno = err; 344 return ret; 345 } 346 347 /** 348 * Get unsigned long sysfs property. 349 * 350 * @param priv 351 * Pointer to private structure. 352 * @param[in] name 353 * Entry name relative to sysfs path. 354 * @param[out] value 355 * Value output buffer. 356 * 357 * @return 358 * 0 on success, -1 on failure and errno is set. 359 */ 360 static int 361 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 362 { 363 int ret; 364 unsigned long value_ret; 365 char value_str[32]; 366 367 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 368 if (ret == -1) { 369 DEBUG("cannot read %s value from sysfs: %s", 370 name, strerror(errno)); 371 return -1; 372 } 373 value_str[ret] = '\0'; 374 errno = 0; 375 value_ret = strtoul(value_str, NULL, 0); 376 if (errno) { 377 DEBUG("invalid %s value `%s': %s", name, value_str, 378 strerror(errno)); 379 return -1; 380 } 381 *value = value_ret; 382 return 0; 383 } 384 385 /** 386 * Set unsigned long sysfs property. 387 * 388 * @param priv 389 * Pointer to private structure. 390 * @param[in] name 391 * Entry name relative to sysfs path. 392 * @param value 393 * Value to set. 394 * 395 * @return 396 * 0 on success, -1 on failure and errno is set. 397 */ 398 static int 399 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 400 { 401 int ret; 402 MKSTR(value_str, "%lu", value); 403 404 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 405 if (ret == -1) { 406 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 407 name, value_str, value, strerror(errno)); 408 return -1; 409 } 410 return 0; 411 } 412 413 /** 414 * Perform ifreq ioctl() on associated Ethernet device. 415 * 416 * @param[in] priv 417 * Pointer to private structure. 418 * @param req 419 * Request number to pass to ioctl(). 420 * @param[out] ifr 421 * Interface request structure output buffer. 422 * 423 * @return 424 * 0 on success, -1 on failure and errno is set. 425 */ 426 int 427 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 428 { 429 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 430 int ret = -1; 431 432 if (sock == -1) 433 return ret; 434 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 435 ret = ioctl(sock, req, ifr); 436 close(sock); 437 return ret; 438 } 439 440 /** 441 * Return the number of active VFs for the current device. 442 * 443 * @param[in] priv 444 * Pointer to private structure. 445 * @param[out] num_vfs 446 * Number of active VFs. 447 * 448 * @return 449 * 0 on success, -1 on failure and errno is set. 450 */ 451 int 452 priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs) 453 { 454 /* The sysfs entry name depends on the operating system. */ 455 const char **name = (const char *[]){ 456 "device/sriov_numvfs", 457 "device/mlx5_num_vfs", 458 NULL, 459 }; 460 int ret; 461 462 do { 463 unsigned long ulong_num_vfs; 464 465 ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs); 466 if (!ret) 467 *num_vfs = ulong_num_vfs; 468 } while (*(++name) && ret); 469 return ret; 470 } 471 472 /** 473 * Get device MTU. 474 * 475 * @param priv 476 * Pointer to private structure. 477 * @param[out] mtu 478 * MTU value output buffer. 479 * 480 * @return 481 * 0 on success, -1 on failure and errno is set. 482 */ 483 int 484 priv_get_mtu(struct priv *priv, uint16_t *mtu) 485 { 486 unsigned long ulong_mtu; 487 488 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 489 return -1; 490 *mtu = ulong_mtu; 491 return 0; 492 } 493 494 /** 495 * Read device counter from sysfs. 496 * 497 * @param priv 498 * Pointer to private structure. 499 * @param name 500 * Counter name. 501 * @param[out] cntr 502 * Counter output buffer. 503 * 504 * @return 505 * 0 on success, -1 on failure and errno is set. 506 */ 507 int 508 priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) 509 { 510 unsigned long ulong_ctr; 511 512 if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1) 513 return -1; 514 *cntr = ulong_ctr; 515 return 0; 516 } 517 518 /** 519 * Set device MTU. 520 * 521 * @param priv 522 * Pointer to private structure. 523 * @param mtu 524 * MTU value to set. 525 * 526 * @return 527 * 0 on success, -1 on failure and errno is set. 528 */ 529 static int 530 priv_set_mtu(struct priv *priv, uint16_t mtu) 531 { 532 uint16_t new_mtu; 533 534 if (priv_set_sysfs_ulong(priv, "mtu", mtu) || 535 priv_get_mtu(priv, &new_mtu)) 536 return -1; 537 if (new_mtu == mtu) 538 return 0; 539 errno = EINVAL; 540 return -1; 541 } 542 543 /** 544 * Set device flags. 545 * 546 * @param priv 547 * Pointer to private structure. 548 * @param keep 549 * Bitmask for flags that must remain untouched. 550 * @param flags 551 * Bitmask for flags to modify. 552 * 553 * @return 554 * 0 on success, -1 on failure and errno is set. 555 */ 556 int 557 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 558 { 559 unsigned long tmp; 560 561 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 562 return -1; 563 tmp &= keep; 564 tmp |= (flags & (~keep)); 565 return priv_set_sysfs_ulong(priv, "flags", tmp); 566 } 567 568 /** 569 * Ethernet device configuration. 570 * 571 * Prepare the driver for a given number of TX and RX queues. 572 * 573 * @param dev 574 * Pointer to Ethernet device structure. 575 * 576 * @return 577 * 0 on success, errno value on failure. 578 */ 579 static int 580 dev_configure(struct rte_eth_dev *dev) 581 { 582 struct priv *priv = dev->data->dev_private; 583 unsigned int rxqs_n = dev->data->nb_rx_queues; 584 unsigned int txqs_n = dev->data->nb_tx_queues; 585 unsigned int i; 586 unsigned int j; 587 unsigned int reta_idx_n; 588 589 priv->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 590 priv->rxqs = (void *)dev->data->rx_queues; 591 priv->txqs = (void *)dev->data->tx_queues; 592 if (txqs_n != priv->txqs_n) { 593 INFO("%p: TX queues number update: %u -> %u", 594 (void *)dev, priv->txqs_n, txqs_n); 595 priv->txqs_n = txqs_n; 596 } 597 if (rxqs_n > priv->ind_table_max_size) { 598 ERROR("cannot handle this many RX queues (%u)", rxqs_n); 599 return EINVAL; 600 } 601 if (rxqs_n == priv->rxqs_n) 602 return 0; 603 INFO("%p: RX queues number update: %u -> %u", 604 (void *)dev, priv->rxqs_n, rxqs_n); 605 priv->rxqs_n = rxqs_n; 606 /* If the requested number of RX queues is not a power of two, use the 607 * maximum indirection table size for better balancing. 608 * The result is always rounded to the next power of two. */ 609 reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? 610 priv->ind_table_max_size : 611 rxqs_n)); 612 if (priv_rss_reta_index_resize(priv, reta_idx_n)) 613 return ENOMEM; 614 /* When the number of RX queues is not a power of two, the remaining 615 * table entries are padded with reused WQs and hashes are not spread 616 * uniformly. */ 617 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 618 (*priv->reta_idx)[i] = j; 619 if (++j == rxqs_n) 620 j = 0; 621 } 622 return 0; 623 } 624 625 /** 626 * DPDK callback for Ethernet device configuration. 627 * 628 * @param dev 629 * Pointer to Ethernet device structure. 630 * 631 * @return 632 * 0 on success, negative errno value on failure. 633 */ 634 int 635 mlx5_dev_configure(struct rte_eth_dev *dev) 636 { 637 struct priv *priv = dev->data->dev_private; 638 int ret; 639 640 if (mlx5_is_secondary()) 641 return -E_RTE_SECONDARY; 642 643 priv_lock(priv); 644 ret = dev_configure(dev); 645 assert(ret >= 0); 646 priv_unlock(priv); 647 return -ret; 648 } 649 650 /** 651 * DPDK callback to get information about the device. 652 * 653 * @param dev 654 * Pointer to Ethernet device structure. 655 * @param[out] info 656 * Info structure output buffer. 657 */ 658 void 659 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 660 { 661 struct priv *priv = mlx5_get_priv(dev); 662 unsigned int max; 663 char ifname[IF_NAMESIZE]; 664 665 info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); 666 667 priv_lock(priv); 668 /* FIXME: we should ask the device for these values. */ 669 info->min_rx_bufsize = 32; 670 info->max_rx_pktlen = 65536; 671 /* 672 * Since we need one CQ per QP, the limit is the minimum number 673 * between the two values. 674 */ 675 max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ? 676 priv->device_attr.max_qp : priv->device_attr.max_cq); 677 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 678 if (max >= 65535) 679 max = 65535; 680 info->max_rx_queues = max; 681 info->max_tx_queues = max; 682 info->max_mac_addrs = RTE_DIM(priv->mac); 683 info->rx_offload_capa = 684 (priv->hw_csum ? 685 (DEV_RX_OFFLOAD_IPV4_CKSUM | 686 DEV_RX_OFFLOAD_UDP_CKSUM | 687 DEV_RX_OFFLOAD_TCP_CKSUM) : 688 0) | 689 (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0); 690 if (!priv->mps) 691 info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; 692 if (priv->hw_csum) 693 info->tx_offload_capa |= 694 (DEV_TX_OFFLOAD_IPV4_CKSUM | 695 DEV_TX_OFFLOAD_UDP_CKSUM | 696 DEV_TX_OFFLOAD_TCP_CKSUM); 697 if (priv->tso) 698 info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; 699 if (priv->tunnel_en) 700 info->tx_offload_capa |= (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | 701 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 702 DEV_TX_OFFLOAD_GRE_TNL_TSO); 703 if (priv_get_ifname(priv, &ifname) == 0) 704 info->if_index = if_nametoindex(ifname); 705 info->reta_size = priv->reta_idx_n ? 706 priv->reta_idx_n : priv->ind_table_max_size; 707 info->hash_key_size = ((*priv->rss_conf) ? 708 (*priv->rss_conf)[0]->rss_key_len : 709 0); 710 info->speed_capa = priv->link_speed_capa; 711 priv_unlock(priv); 712 } 713 714 const uint32_t * 715 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 716 { 717 static const uint32_t ptypes[] = { 718 /* refers to rxq_cq_to_pkt_type() */ 719 RTE_PTYPE_L2_ETHER, 720 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 721 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 722 RTE_PTYPE_L4_NONFRAG, 723 RTE_PTYPE_L4_FRAG, 724 RTE_PTYPE_L4_TCP, 725 RTE_PTYPE_L4_UDP, 726 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 727 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 728 RTE_PTYPE_INNER_L4_NONFRAG, 729 RTE_PTYPE_INNER_L4_FRAG, 730 RTE_PTYPE_INNER_L4_TCP, 731 RTE_PTYPE_INNER_L4_UDP, 732 RTE_PTYPE_UNKNOWN 733 }; 734 735 if (dev->rx_pkt_burst == mlx5_rx_burst || 736 dev->rx_pkt_burst == mlx5_rx_burst_vec) 737 return ptypes; 738 return NULL; 739 } 740 741 /** 742 * DPDK callback to retrieve physical link information. 743 * 744 * @param dev 745 * Pointer to Ethernet device structure. 746 * @param wait_to_complete 747 * Wait for request completion (ignored). 748 */ 749 static int 750 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) 751 { 752 struct priv *priv = mlx5_get_priv(dev); 753 struct ethtool_cmd edata = { 754 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 755 }; 756 struct ifreq ifr; 757 struct rte_eth_link dev_link; 758 int link_speed = 0; 759 760 /* priv_lock() is not taken to allow concurrent calls. */ 761 762 (void)wait_to_complete; 763 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 764 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 765 return -1; 766 } 767 memset(&dev_link, 0, sizeof(dev_link)); 768 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 769 (ifr.ifr_flags & IFF_RUNNING)); 770 ifr.ifr_data = (void *)&edata; 771 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 772 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", 773 strerror(errno)); 774 return -1; 775 } 776 link_speed = ethtool_cmd_speed(&edata); 777 if (link_speed == -1) 778 dev_link.link_speed = 0; 779 else 780 dev_link.link_speed = link_speed; 781 priv->link_speed_capa = 0; 782 if (edata.supported & SUPPORTED_Autoneg) 783 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 784 if (edata.supported & (SUPPORTED_1000baseT_Full | 785 SUPPORTED_1000baseKX_Full)) 786 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 787 if (edata.supported & SUPPORTED_10000baseKR_Full) 788 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 789 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 790 SUPPORTED_40000baseCR4_Full | 791 SUPPORTED_40000baseSR4_Full | 792 SUPPORTED_40000baseLR4_Full)) 793 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 794 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 795 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 796 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 797 ETH_LINK_SPEED_FIXED); 798 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 799 /* Link status changed. */ 800 dev->data->dev_link = dev_link; 801 return 0; 802 } 803 /* Link status is still the same. */ 804 return -1; 805 } 806 807 /** 808 * Retrieve physical link information (unlocked version using new ioctl). 809 * 810 * @param dev 811 * Pointer to Ethernet device structure. 812 * @param wait_to_complete 813 * Wait for request completion (ignored). 814 */ 815 static int 816 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) 817 { 818 struct priv *priv = mlx5_get_priv(dev); 819 __extension__ struct { 820 struct ethtool_link_settings edata; 821 uint32_t link_mode_data[3 * 822 ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32]; 823 } ecmd; 824 825 struct ifreq ifr; 826 struct rte_eth_link dev_link; 827 uint64_t sc; 828 829 (void)wait_to_complete; 830 if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { 831 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); 832 return -1; 833 } 834 memset(&dev_link, 0, sizeof(dev_link)); 835 dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && 836 (ifr.ifr_flags & IFF_RUNNING)); 837 memset(&ecmd, 0, sizeof(ecmd)); 838 ecmd.edata.cmd = ETHTOOL_GLINKSETTINGS; 839 ifr.ifr_data = (void *)&ecmd; 840 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 841 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 842 strerror(errno)); 843 return -1; 844 } 845 ecmd.edata.link_mode_masks_nwords = -ecmd.edata.link_mode_masks_nwords; 846 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 847 DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", 848 strerror(errno)); 849 return -1; 850 } 851 dev_link.link_speed = ecmd.edata.speed; 852 sc = ecmd.edata.link_mode_masks[0] | 853 ((uint64_t)ecmd.edata.link_mode_masks[1] << 32); 854 priv->link_speed_capa = 0; 855 if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT) 856 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 857 if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT | 858 ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)) 859 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 860 if (sc & (ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT | 861 ETHTOOL_LINK_MODE_10000baseKR_Full_BIT | 862 ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)) 863 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 864 if (sc & (ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT | 865 ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)) 866 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 867 if (sc & (ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT | 868 ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT | 869 ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT | 870 ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)) 871 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 872 if (sc & (ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT | 873 ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT | 874 ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT | 875 ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)) 876 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 877 if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT | 878 ETHTOOL_LINK_MODE_25000baseKR_Full_BIT | 879 ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)) 880 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 881 if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT | 882 ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)) 883 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 884 if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT | 885 ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT | 886 ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT | 887 ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)) 888 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 889 dev_link.link_duplex = ((ecmd.edata.duplex == DUPLEX_HALF) ? 890 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 891 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 892 ETH_LINK_SPEED_FIXED); 893 if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { 894 /* Link status changed. */ 895 dev->data->dev_link = dev_link; 896 return 0; 897 } 898 /* Link status is still the same. */ 899 return -1; 900 } 901 902 /** 903 * DPDK callback to retrieve physical link information. 904 * 905 * @param dev 906 * Pointer to Ethernet device structure. 907 * @param wait_to_complete 908 * Wait for request completion (ignored). 909 */ 910 int 911 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 912 { 913 struct utsname utsname; 914 int ver[3]; 915 916 if (uname(&utsname) == -1 || 917 sscanf(utsname.release, "%d.%d.%d", 918 &ver[0], &ver[1], &ver[2]) != 3 || 919 KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) 920 return mlx5_link_update_unlocked_gset(dev, wait_to_complete); 921 return mlx5_link_update_unlocked_gs(dev, wait_to_complete); 922 } 923 924 /** 925 * DPDK callback to change the MTU. 926 * 927 * @param dev 928 * Pointer to Ethernet device structure. 929 * @param in_mtu 930 * New MTU. 931 * 932 * @return 933 * 0 on success, negative errno value on failure. 934 */ 935 int 936 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 937 { 938 struct priv *priv = dev->data->dev_private; 939 uint16_t kern_mtu; 940 int ret = 0; 941 942 if (mlx5_is_secondary()) 943 return -E_RTE_SECONDARY; 944 945 priv_lock(priv); 946 ret = priv_get_mtu(priv, &kern_mtu); 947 if (ret) 948 goto out; 949 /* Set kernel interface MTU first. */ 950 ret = priv_set_mtu(priv, mtu); 951 if (ret) 952 goto out; 953 ret = priv_get_mtu(priv, &kern_mtu); 954 if (ret) 955 goto out; 956 if (kern_mtu == mtu) { 957 priv->mtu = mtu; 958 DEBUG("adapter port %u MTU set to %u", priv->port, mtu); 959 } 960 priv_unlock(priv); 961 return 0; 962 out: 963 ret = errno; 964 WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, 965 strerror(ret)); 966 priv_unlock(priv); 967 assert(ret >= 0); 968 return -ret; 969 } 970 971 /** 972 * DPDK callback to get flow control status. 973 * 974 * @param dev 975 * Pointer to Ethernet device structure. 976 * @param[out] fc_conf 977 * Flow control output buffer. 978 * 979 * @return 980 * 0 on success, negative errno value on failure. 981 */ 982 int 983 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 984 { 985 struct priv *priv = dev->data->dev_private; 986 struct ifreq ifr; 987 struct ethtool_pauseparam ethpause = { 988 .cmd = ETHTOOL_GPAUSEPARAM 989 }; 990 int ret; 991 992 if (mlx5_is_secondary()) 993 return -E_RTE_SECONDARY; 994 995 ifr.ifr_data = (void *)ðpause; 996 priv_lock(priv); 997 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 998 ret = errno; 999 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" 1000 " failed: %s", 1001 strerror(ret)); 1002 goto out; 1003 } 1004 1005 fc_conf->autoneg = ethpause.autoneg; 1006 if (ethpause.rx_pause && ethpause.tx_pause) 1007 fc_conf->mode = RTE_FC_FULL; 1008 else if (ethpause.rx_pause) 1009 fc_conf->mode = RTE_FC_RX_PAUSE; 1010 else if (ethpause.tx_pause) 1011 fc_conf->mode = RTE_FC_TX_PAUSE; 1012 else 1013 fc_conf->mode = RTE_FC_NONE; 1014 ret = 0; 1015 1016 out: 1017 priv_unlock(priv); 1018 assert(ret >= 0); 1019 return -ret; 1020 } 1021 1022 /** 1023 * DPDK callback to modify flow control parameters. 1024 * 1025 * @param dev 1026 * Pointer to Ethernet device structure. 1027 * @param[in] fc_conf 1028 * Flow control parameters. 1029 * 1030 * @return 1031 * 0 on success, negative errno value on failure. 1032 */ 1033 int 1034 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 1035 { 1036 struct priv *priv = dev->data->dev_private; 1037 struct ifreq ifr; 1038 struct ethtool_pauseparam ethpause = { 1039 .cmd = ETHTOOL_SPAUSEPARAM 1040 }; 1041 int ret; 1042 1043 if (mlx5_is_secondary()) 1044 return -E_RTE_SECONDARY; 1045 1046 ifr.ifr_data = (void *)ðpause; 1047 ethpause.autoneg = fc_conf->autoneg; 1048 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1049 (fc_conf->mode & RTE_FC_RX_PAUSE)) 1050 ethpause.rx_pause = 1; 1051 else 1052 ethpause.rx_pause = 0; 1053 1054 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 1055 (fc_conf->mode & RTE_FC_TX_PAUSE)) 1056 ethpause.tx_pause = 1; 1057 else 1058 ethpause.tx_pause = 0; 1059 1060 priv_lock(priv); 1061 if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { 1062 ret = errno; 1063 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 1064 " failed: %s", 1065 strerror(ret)); 1066 goto out; 1067 } 1068 ret = 0; 1069 1070 out: 1071 priv_unlock(priv); 1072 assert(ret >= 0); 1073 return -ret; 1074 } 1075 1076 /** 1077 * Get PCI information from struct ibv_device. 1078 * 1079 * @param device 1080 * Pointer to Ethernet device structure. 1081 * @param[out] pci_addr 1082 * PCI bus address output buffer. 1083 * 1084 * @return 1085 * 0 on success, -1 on failure and errno is set. 1086 */ 1087 int 1088 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 1089 struct rte_pci_addr *pci_addr) 1090 { 1091 FILE *file; 1092 char line[32]; 1093 MKSTR(path, "%s/device/uevent", device->ibdev_path); 1094 1095 file = fopen(path, "rb"); 1096 if (file == NULL) 1097 return -1; 1098 while (fgets(line, sizeof(line), file) == line) { 1099 size_t len = strlen(line); 1100 int ret; 1101 1102 /* Truncate long lines. */ 1103 if (len == (sizeof(line) - 1)) 1104 while (line[(len - 1)] != '\n') { 1105 ret = fgetc(file); 1106 if (ret == EOF) 1107 break; 1108 line[(len - 1)] = ret; 1109 } 1110 /* Extract information. */ 1111 if (sscanf(line, 1112 "PCI_SLOT_NAME=" 1113 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1114 &pci_addr->domain, 1115 &pci_addr->bus, 1116 &pci_addr->devid, 1117 &pci_addr->function) == 4) { 1118 ret = 0; 1119 break; 1120 } 1121 } 1122 fclose(file); 1123 return 0; 1124 } 1125 1126 /** 1127 * Link status handler. 1128 * 1129 * @param priv 1130 * Pointer to private structure. 1131 * @param dev 1132 * Pointer to the rte_eth_dev structure. 1133 * 1134 * @return 1135 * Nonzero if the callback process can be called immediately. 1136 */ 1137 static int 1138 priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) 1139 { 1140 struct ibv_async_event event; 1141 struct rte_eth_link *link = &dev->data->dev_link; 1142 int ret = 0; 1143 1144 /* Read all message and acknowledge them. */ 1145 for (;;) { 1146 if (ibv_get_async_event(priv->ctx, &event)) 1147 break; 1148 1149 if (event.event_type != IBV_EVENT_PORT_ACTIVE && 1150 event.event_type != IBV_EVENT_PORT_ERR) 1151 DEBUG("event type %d on port %d not handled", 1152 event.event_type, event.element.port_num); 1153 ibv_ack_async_event(&event); 1154 } 1155 mlx5_link_update(dev, 0); 1156 if (((link->link_speed == 0) && link->link_status) || 1157 ((link->link_speed != 0) && !link->link_status)) { 1158 if (!priv->pending_alarm) { 1159 /* Inconsistent status, check again later. */ 1160 priv->pending_alarm = 1; 1161 rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, 1162 mlx5_dev_link_status_handler, 1163 dev); 1164 } 1165 } else { 1166 ret = 1; 1167 } 1168 return ret; 1169 } 1170 1171 /** 1172 * Handle delayed link status event. 1173 * 1174 * @param arg 1175 * Registered argument. 1176 */ 1177 void 1178 mlx5_dev_link_status_handler(void *arg) 1179 { 1180 struct rte_eth_dev *dev = arg; 1181 struct priv *priv = dev->data->dev_private; 1182 int ret; 1183 1184 priv_lock(priv); 1185 assert(priv->pending_alarm == 1); 1186 priv->pending_alarm = 0; 1187 ret = priv_dev_link_status_handler(priv, dev); 1188 priv_unlock(priv); 1189 if (ret) 1190 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, 1191 NULL); 1192 } 1193 1194 /** 1195 * Handle interrupts from the NIC. 1196 * 1197 * @param[in] intr_handle 1198 * Interrupt handler. 1199 * @param cb_arg 1200 * Callback argument. 1201 */ 1202 void 1203 mlx5_dev_interrupt_handler(void *cb_arg) 1204 { 1205 struct rte_eth_dev *dev = cb_arg; 1206 struct priv *priv = dev->data->dev_private; 1207 int ret; 1208 1209 priv_lock(priv); 1210 ret = priv_dev_link_status_handler(priv, dev); 1211 priv_unlock(priv); 1212 if (ret) 1213 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, 1214 NULL); 1215 } 1216 1217 /** 1218 * Uninstall interrupt handler. 1219 * 1220 * @param priv 1221 * Pointer to private structure. 1222 * @param dev 1223 * Pointer to the rte_eth_dev structure. 1224 */ 1225 void 1226 priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) 1227 { 1228 if (!dev->data->dev_conf.intr_conf.lsc) 1229 return; 1230 rte_intr_callback_unregister(&priv->intr_handle, 1231 mlx5_dev_interrupt_handler, 1232 dev); 1233 if (priv->pending_alarm) 1234 rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); 1235 priv->pending_alarm = 0; 1236 priv->intr_handle.fd = 0; 1237 priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; 1238 } 1239 1240 /** 1241 * Install interrupt handler. 1242 * 1243 * @param priv 1244 * Pointer to private structure. 1245 * @param dev 1246 * Pointer to the rte_eth_dev structure. 1247 */ 1248 void 1249 priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) 1250 { 1251 int rc, flags; 1252 1253 if (!dev->data->dev_conf.intr_conf.lsc) 1254 return; 1255 assert(priv->ctx->async_fd > 0); 1256 flags = fcntl(priv->ctx->async_fd, F_GETFL); 1257 rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); 1258 if (rc < 0) { 1259 INFO("failed to change file descriptor async event queue"); 1260 dev->data->dev_conf.intr_conf.lsc = 0; 1261 } else { 1262 priv->intr_handle.fd = priv->ctx->async_fd; 1263 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1264 rte_intr_callback_register(&priv->intr_handle, 1265 mlx5_dev_interrupt_handler, 1266 dev); 1267 } 1268 } 1269 1270 /** 1271 * Change the link state (UP / DOWN). 1272 * 1273 * @param priv 1274 * Pointer to Ethernet device structure. 1275 * @param up 1276 * Nonzero for link up, otherwise link down. 1277 * 1278 * @return 1279 * 0 on success, errno value on failure. 1280 */ 1281 static int 1282 priv_set_link(struct priv *priv, int up) 1283 { 1284 struct rte_eth_dev *dev = priv->dev; 1285 int err; 1286 1287 if (up) { 1288 err = priv_set_flags(priv, ~IFF_UP, IFF_UP); 1289 if (err) 1290 return err; 1291 priv_select_tx_function(priv); 1292 priv_select_rx_function(priv); 1293 } else { 1294 err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP); 1295 if (err) 1296 return err; 1297 dev->rx_pkt_burst = removed_rx_burst; 1298 dev->tx_pkt_burst = removed_tx_burst; 1299 } 1300 return 0; 1301 } 1302 1303 /** 1304 * DPDK callback to bring the link DOWN. 1305 * 1306 * @param dev 1307 * Pointer to Ethernet device structure. 1308 * 1309 * @return 1310 * 0 on success, errno value on failure. 1311 */ 1312 int 1313 mlx5_set_link_down(struct rte_eth_dev *dev) 1314 { 1315 struct priv *priv = dev->data->dev_private; 1316 int err; 1317 1318 priv_lock(priv); 1319 err = priv_set_link(priv, 0); 1320 priv_unlock(priv); 1321 return err; 1322 } 1323 1324 /** 1325 * DPDK callback to bring the link UP. 1326 * 1327 * @param dev 1328 * Pointer to Ethernet device structure. 1329 * 1330 * @return 1331 * 0 on success, errno value on failure. 1332 */ 1333 int 1334 mlx5_set_link_up(struct rte_eth_dev *dev) 1335 { 1336 struct priv *priv = dev->data->dev_private; 1337 int err; 1338 1339 priv_lock(priv); 1340 err = priv_set_link(priv, 1); 1341 priv_unlock(priv); 1342 return err; 1343 } 1344 1345 /** 1346 * Configure secondary process queues from a private data pointer (primary 1347 * or secondary) and update burst callbacks. Can take place only once. 1348 * 1349 * All queues must have been previously created by the primary process to 1350 * avoid undefined behavior. 1351 * 1352 * @param priv 1353 * Private data pointer from either primary or secondary process. 1354 * 1355 * @return 1356 * Private data pointer from secondary process, NULL in case of error. 1357 */ 1358 struct priv * 1359 mlx5_secondary_data_setup(struct priv *priv) 1360 { 1361 unsigned int port_id = 0; 1362 struct mlx5_secondary_data *sd; 1363 void **tx_queues; 1364 void **rx_queues; 1365 unsigned int nb_tx_queues; 1366 unsigned int nb_rx_queues; 1367 unsigned int i; 1368 1369 /* priv must be valid at this point. */ 1370 assert(priv != NULL); 1371 /* priv->dev must also be valid but may point to local memory from 1372 * another process, possibly with the same address and must not 1373 * be dereferenced yet. */ 1374 assert(priv->dev != NULL); 1375 /* Determine port ID by finding out where priv comes from. */ 1376 while (1) { 1377 sd = &mlx5_secondary_data[port_id]; 1378 rte_spinlock_lock(&sd->lock); 1379 /* Primary process? */ 1380 if (sd->primary_priv == priv) 1381 break; 1382 /* Secondary process? */ 1383 if (sd->data.dev_private == priv) 1384 break; 1385 rte_spinlock_unlock(&sd->lock); 1386 if (++port_id == RTE_DIM(mlx5_secondary_data)) 1387 port_id = 0; 1388 } 1389 /* Switch to secondary private structure. If private data has already 1390 * been updated by another thread, there is nothing else to do. */ 1391 priv = sd->data.dev_private; 1392 if (priv->dev->data == &sd->data) 1393 goto end; 1394 /* Sanity checks. Secondary private structure is supposed to point 1395 * to local eth_dev, itself still pointing to the shared device data 1396 * structure allocated by the primary process. */ 1397 assert(sd->shared_dev_data != &sd->data); 1398 assert(sd->data.nb_tx_queues == 0); 1399 assert(sd->data.tx_queues == NULL); 1400 assert(sd->data.nb_rx_queues == 0); 1401 assert(sd->data.rx_queues == NULL); 1402 assert(priv != sd->primary_priv); 1403 assert(priv->dev->data == sd->shared_dev_data); 1404 assert(priv->txqs_n == 0); 1405 assert(priv->txqs == NULL); 1406 assert(priv->rxqs_n == 0); 1407 assert(priv->rxqs == NULL); 1408 nb_tx_queues = sd->shared_dev_data->nb_tx_queues; 1409 nb_rx_queues = sd->shared_dev_data->nb_rx_queues; 1410 /* Allocate local storage for queues. */ 1411 tx_queues = rte_zmalloc("secondary ethdev->tx_queues", 1412 sizeof(sd->data.tx_queues[0]) * nb_tx_queues, 1413 RTE_CACHE_LINE_SIZE); 1414 rx_queues = rte_zmalloc("secondary ethdev->rx_queues", 1415 sizeof(sd->data.rx_queues[0]) * nb_rx_queues, 1416 RTE_CACHE_LINE_SIZE); 1417 if (tx_queues == NULL || rx_queues == NULL) 1418 goto error; 1419 /* Lock to prevent control operations during setup. */ 1420 priv_lock(priv); 1421 /* TX queues. */ 1422 for (i = 0; i != nb_tx_queues; ++i) { 1423 struct txq *primary_txq = (*sd->primary_priv->txqs)[i]; 1424 struct txq_ctrl *primary_txq_ctrl; 1425 struct txq_ctrl *txq_ctrl; 1426 1427 if (primary_txq == NULL) 1428 continue; 1429 primary_txq_ctrl = container_of(primary_txq, 1430 struct txq_ctrl, txq); 1431 txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl) + 1432 (1 << primary_txq->elts_n) * 1433 sizeof(struct rte_mbuf *), 0, 1434 primary_txq_ctrl->socket); 1435 if (txq_ctrl != NULL) { 1436 if (txq_ctrl_setup(priv->dev, 1437 txq_ctrl, 1438 1 << primary_txq->elts_n, 1439 primary_txq_ctrl->socket, 1440 NULL) == 0) { 1441 txq_ctrl->txq.stats.idx = 1442 primary_txq->stats.idx; 1443 tx_queues[i] = &txq_ctrl->txq; 1444 continue; 1445 } 1446 rte_free(txq_ctrl); 1447 } 1448 while (i) { 1449 txq_ctrl = tx_queues[--i]; 1450 txq_cleanup(txq_ctrl); 1451 rte_free(txq_ctrl); 1452 } 1453 goto error; 1454 } 1455 /* RX queues. */ 1456 for (i = 0; i != nb_rx_queues; ++i) { 1457 struct rxq_ctrl *primary_rxq = 1458 container_of((*sd->primary_priv->rxqs)[i], 1459 struct rxq_ctrl, rxq); 1460 1461 if (primary_rxq == NULL) 1462 continue; 1463 /* Not supported yet. */ 1464 rx_queues[i] = NULL; 1465 } 1466 /* Update everything. */ 1467 priv->txqs = (void *)tx_queues; 1468 priv->txqs_n = nb_tx_queues; 1469 priv->rxqs = (void *)rx_queues; 1470 priv->rxqs_n = nb_rx_queues; 1471 sd->data.rx_queues = rx_queues; 1472 sd->data.tx_queues = tx_queues; 1473 sd->data.nb_rx_queues = nb_rx_queues; 1474 sd->data.nb_tx_queues = nb_tx_queues; 1475 sd->data.dev_link = sd->shared_dev_data->dev_link; 1476 sd->data.mtu = sd->shared_dev_data->mtu; 1477 memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state, 1478 sizeof(sd->data.rx_queue_state)); 1479 memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state, 1480 sizeof(sd->data.tx_queue_state)); 1481 sd->data.dev_flags = sd->shared_dev_data->dev_flags; 1482 /* Use local data from now on. */ 1483 rte_mb(); 1484 priv->dev->data = &sd->data; 1485 rte_mb(); 1486 priv_select_tx_function(priv); 1487 priv_select_rx_function(priv); 1488 priv_unlock(priv); 1489 end: 1490 /* More sanity checks. */ 1491 assert(priv->dev->data == &sd->data); 1492 rte_spinlock_unlock(&sd->lock); 1493 return priv; 1494 error: 1495 priv_unlock(priv); 1496 rte_free(tx_queues); 1497 rte_free(rx_queues); 1498 rte_spinlock_unlock(&sd->lock); 1499 return NULL; 1500 } 1501 1502 /** 1503 * Configure the TX function to use. 1504 * 1505 * @param priv 1506 * Pointer to private structure. 1507 */ 1508 void 1509 priv_select_tx_function(struct priv *priv) 1510 { 1511 priv->dev->tx_pkt_burst = mlx5_tx_burst; 1512 /* Select appropriate TX function. */ 1513 if (priv->mps == MLX5_MPW_ENHANCED) { 1514 if (priv_check_vec_tx_support(priv) > 0) { 1515 if (priv_check_raw_vec_tx_support(priv) > 0) 1516 priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec; 1517 else 1518 priv->dev->tx_pkt_burst = mlx5_tx_burst_vec; 1519 DEBUG("selected Enhanced MPW TX vectorized function"); 1520 } else { 1521 priv->dev->tx_pkt_burst = mlx5_tx_burst_empw; 1522 DEBUG("selected Enhanced MPW TX function"); 1523 } 1524 } else if (priv->mps && priv->txq_inline) { 1525 priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline; 1526 DEBUG("selected MPW inline TX function"); 1527 } else if (priv->mps) { 1528 priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw; 1529 DEBUG("selected MPW TX function"); 1530 } 1531 } 1532 1533 /** 1534 * Configure the RX function to use. 1535 * 1536 * @param priv 1537 * Pointer to private structure. 1538 */ 1539 void 1540 priv_select_rx_function(struct priv *priv) 1541 { 1542 if (priv_check_vec_rx_support(priv) > 0) { 1543 priv_prep_vec_rx_function(priv); 1544 priv->dev->rx_pkt_burst = mlx5_rx_burst_vec; 1545 DEBUG("selected RX vectorized function"); 1546 } else { 1547 priv->dev->rx_pkt_burst = mlx5_rx_burst; 1548 } 1549 } 1550