1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <unistd.h> 36 #include <string.h> 37 #include <assert.h> 38 #include <stdint.h> 39 #include <stdlib.h> 40 #include <errno.h> 41 #include <net/if.h> 42 43 /* Verbs header. */ 44 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 45 #ifdef PEDANTIC 46 #pragma GCC diagnostic ignored "-Wpedantic" 47 #endif 48 #include <infiniband/verbs.h> 49 #ifdef PEDANTIC 50 #pragma GCC diagnostic error "-Wpedantic" 51 #endif 52 53 #include <rte_malloc.h> 54 #include <rte_ethdev.h> 55 #include <rte_ethdev_pci.h> 56 #include <rte_pci.h> 57 #include <rte_common.h> 58 #include <rte_kvargs.h> 59 60 #include "mlx5.h" 61 #include "mlx5_utils.h" 62 #include "mlx5_rxtx.h" 63 #include "mlx5_autoconf.h" 64 #include "mlx5_defs.h" 65 66 /* Device parameter to enable RX completion queue compression. */ 67 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" 68 69 /* Device parameter to configure inline send. */ 70 #define MLX5_TXQ_INLINE "txq_inline" 71 72 /* 73 * Device parameter to configure the number of TX queues threshold for 74 * enabling inline send. 75 */ 76 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" 77 78 /* Device parameter to enable multi-packet send WQEs. */ 79 #define MLX5_TXQ_MPW_EN "txq_mpw_en" 80 81 /* Device parameter to include 2 dsegs in the title WQEBB. */ 82 #define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en" 83 84 /* Device parameter to limit the size of inlining packet. */ 85 #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" 86 87 /* Device parameter to enable hardware TSO offload. */ 88 #define MLX5_TSO "tso" 89 90 /* Device parameter to enable hardware Tx vector. */ 91 #define MLX5_TX_VEC_EN "tx_vec_en" 92 93 /* Device parameter to enable hardware Rx vector. */ 94 #define MLX5_RX_VEC_EN "rx_vec_en" 95 96 /* Default PMD specific parameter value. */ 97 #define MLX5_ARG_UNSET (-1) 98 99 #ifndef HAVE_IBV_MLX5_MOD_MPW 100 #define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2) 101 #define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3) 102 #endif 103 104 struct mlx5_args { 105 int cqe_comp; 106 int txq_inline; 107 int txqs_inline; 108 int mps; 109 int mpw_hdr_dseg; 110 int inline_max_packet_sz; 111 int tso; 112 int tx_vec_en; 113 int rx_vec_en; 114 }; 115 /** 116 * Retrieve integer value from environment variable. 117 * 118 * @param[in] name 119 * Environment variable name. 120 * 121 * @return 122 * Integer value, 0 if the variable is not set. 123 */ 124 int 125 mlx5_getenv_int(const char *name) 126 { 127 const char *val = getenv(name); 128 129 if (val == NULL) 130 return 0; 131 return atoi(val); 132 } 133 134 /** 135 * DPDK callback to close the device. 136 * 137 * Destroy all queues and objects, free memory. 138 * 139 * @param dev 140 * Pointer to Ethernet device structure. 141 */ 142 static void 143 mlx5_dev_close(struct rte_eth_dev *dev) 144 { 145 struct priv *priv = mlx5_get_priv(dev); 146 unsigned int i; 147 148 priv_lock(priv); 149 DEBUG("%p: closing device \"%s\"", 150 (void *)dev, 151 ((priv->ctx != NULL) ? priv->ctx->device->name : "")); 152 /* In case mlx5_dev_stop() has not been called. */ 153 priv_dev_interrupt_handler_uninstall(priv, dev); 154 priv_special_flow_disable_all(priv); 155 priv_mac_addrs_disable(priv); 156 priv_destroy_hash_rxqs(priv); 157 158 /* Remove flow director elements. */ 159 priv_fdir_disable(priv); 160 priv_fdir_delete_filters_list(priv); 161 162 /* Prevent crashes when queues are still in use. */ 163 dev->rx_pkt_burst = removed_rx_burst; 164 dev->tx_pkt_burst = removed_tx_burst; 165 if (priv->rxqs != NULL) { 166 /* XXX race condition if mlx5_rx_burst() is still running. */ 167 usleep(1000); 168 for (i = 0; (i != priv->rxqs_n); ++i) { 169 struct rxq *rxq = (*priv->rxqs)[i]; 170 struct rxq_ctrl *rxq_ctrl; 171 172 if (rxq == NULL) 173 continue; 174 rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq); 175 (*priv->rxqs)[i] = NULL; 176 rxq_cleanup(rxq_ctrl); 177 rte_free(rxq_ctrl); 178 } 179 priv->rxqs_n = 0; 180 priv->rxqs = NULL; 181 } 182 if (priv->txqs != NULL) { 183 /* XXX race condition if mlx5_tx_burst() is still running. */ 184 usleep(1000); 185 for (i = 0; (i != priv->txqs_n); ++i) { 186 struct txq *txq = (*priv->txqs)[i]; 187 struct txq_ctrl *txq_ctrl; 188 189 if (txq == NULL) 190 continue; 191 txq_ctrl = container_of(txq, struct txq_ctrl, txq); 192 (*priv->txqs)[i] = NULL; 193 txq_cleanup(txq_ctrl); 194 rte_free(txq_ctrl); 195 } 196 priv->txqs_n = 0; 197 priv->txqs = NULL; 198 } 199 if (priv->pd != NULL) { 200 assert(priv->ctx != NULL); 201 claim_zero(ibv_dealloc_pd(priv->pd)); 202 claim_zero(ibv_close_device(priv->ctx)); 203 } else 204 assert(priv->ctx == NULL); 205 if (priv->rss_conf != NULL) { 206 for (i = 0; (i != hash_rxq_init_n); ++i) 207 rte_free((*priv->rss_conf)[i]); 208 rte_free(priv->rss_conf); 209 } 210 if (priv->reta_idx != NULL) 211 rte_free(priv->reta_idx); 212 priv_unlock(priv); 213 memset(priv, 0, sizeof(*priv)); 214 } 215 216 static const struct eth_dev_ops mlx5_dev_ops = { 217 .dev_configure = mlx5_dev_configure, 218 .dev_start = mlx5_dev_start, 219 .dev_stop = mlx5_dev_stop, 220 .dev_set_link_down = mlx5_set_link_down, 221 .dev_set_link_up = mlx5_set_link_up, 222 .dev_close = mlx5_dev_close, 223 .promiscuous_enable = mlx5_promiscuous_enable, 224 .promiscuous_disable = mlx5_promiscuous_disable, 225 .allmulticast_enable = mlx5_allmulticast_enable, 226 .allmulticast_disable = mlx5_allmulticast_disable, 227 .link_update = mlx5_link_update, 228 .stats_get = mlx5_stats_get, 229 .stats_reset = mlx5_stats_reset, 230 .xstats_get = mlx5_xstats_get, 231 .xstats_reset = mlx5_xstats_reset, 232 .xstats_get_names = mlx5_xstats_get_names, 233 .dev_infos_get = mlx5_dev_infos_get, 234 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 235 .vlan_filter_set = mlx5_vlan_filter_set, 236 .rx_queue_setup = mlx5_rx_queue_setup, 237 .tx_queue_setup = mlx5_tx_queue_setup, 238 .rx_queue_release = mlx5_rx_queue_release, 239 .tx_queue_release = mlx5_tx_queue_release, 240 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 241 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 242 .mac_addr_remove = mlx5_mac_addr_remove, 243 .mac_addr_add = mlx5_mac_addr_add, 244 .mac_addr_set = mlx5_mac_addr_set, 245 .mtu_set = mlx5_dev_set_mtu, 246 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 247 .vlan_offload_set = mlx5_vlan_offload_set, 248 .reta_update = mlx5_dev_rss_reta_update, 249 .reta_query = mlx5_dev_rss_reta_query, 250 .rss_hash_update = mlx5_rss_hash_update, 251 .rss_hash_conf_get = mlx5_rss_hash_conf_get, 252 .filter_ctrl = mlx5_dev_filter_ctrl, 253 .rx_descriptor_status = mlx5_rx_descriptor_status, 254 .tx_descriptor_status = mlx5_tx_descriptor_status, 255 .rx_queue_intr_enable = mlx5_rx_intr_enable, 256 .rx_queue_intr_disable = mlx5_rx_intr_disable, 257 }; 258 259 static struct { 260 struct rte_pci_addr pci_addr; /* associated PCI address */ 261 uint32_t ports; /* physical ports bitfield. */ 262 } mlx5_dev[32]; 263 264 /** 265 * Get device index in mlx5_dev[] from PCI bus address. 266 * 267 * @param[in] pci_addr 268 * PCI bus address to look for. 269 * 270 * @return 271 * mlx5_dev[] index on success, -1 on failure. 272 */ 273 static int 274 mlx5_dev_idx(struct rte_pci_addr *pci_addr) 275 { 276 unsigned int i; 277 int ret = -1; 278 279 assert(pci_addr != NULL); 280 for (i = 0; (i != RTE_DIM(mlx5_dev)); ++i) { 281 if ((mlx5_dev[i].pci_addr.domain == pci_addr->domain) && 282 (mlx5_dev[i].pci_addr.bus == pci_addr->bus) && 283 (mlx5_dev[i].pci_addr.devid == pci_addr->devid) && 284 (mlx5_dev[i].pci_addr.function == pci_addr->function)) 285 return i; 286 if ((mlx5_dev[i].ports == 0) && (ret == -1)) 287 ret = i; 288 } 289 return ret; 290 } 291 292 /** 293 * Verify and store value for device argument. 294 * 295 * @param[in] key 296 * Key argument to verify. 297 * @param[in] val 298 * Value associated with key. 299 * @param opaque 300 * User data. 301 * 302 * @return 303 * 0 on success, negative errno value on failure. 304 */ 305 static int 306 mlx5_args_check(const char *key, const char *val, void *opaque) 307 { 308 struct mlx5_args *args = opaque; 309 unsigned long tmp; 310 311 errno = 0; 312 tmp = strtoul(val, NULL, 0); 313 if (errno) { 314 WARN("%s: \"%s\" is not a valid integer", key, val); 315 return errno; 316 } 317 if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { 318 args->cqe_comp = !!tmp; 319 } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { 320 args->txq_inline = tmp; 321 } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { 322 args->txqs_inline = tmp; 323 } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { 324 args->mps = !!tmp; 325 } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { 326 args->mpw_hdr_dseg = !!tmp; 327 } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { 328 args->inline_max_packet_sz = tmp; 329 } else if (strcmp(MLX5_TSO, key) == 0) { 330 args->tso = !!tmp; 331 } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { 332 args->tx_vec_en = !!tmp; 333 } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { 334 args->rx_vec_en = !!tmp; 335 } else { 336 WARN("%s: unknown parameter", key); 337 return -EINVAL; 338 } 339 return 0; 340 } 341 342 /** 343 * Parse device parameters. 344 * 345 * @param priv 346 * Pointer to private structure. 347 * @param devargs 348 * Device arguments structure. 349 * 350 * @return 351 * 0 on success, errno value on failure. 352 */ 353 static int 354 mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) 355 { 356 const char **params = (const char *[]){ 357 MLX5_RXQ_CQE_COMP_EN, 358 MLX5_TXQ_INLINE, 359 MLX5_TXQS_MIN_INLINE, 360 MLX5_TXQ_MPW_EN, 361 MLX5_TXQ_MPW_HDR_DSEG_EN, 362 MLX5_TXQ_MAX_INLINE_LEN, 363 MLX5_TSO, 364 MLX5_TX_VEC_EN, 365 MLX5_RX_VEC_EN, 366 NULL, 367 }; 368 struct rte_kvargs *kvlist; 369 int ret = 0; 370 int i; 371 372 if (devargs == NULL) 373 return 0; 374 /* Following UGLY cast is done to pass checkpatch. */ 375 kvlist = rte_kvargs_parse(devargs->args, params); 376 if (kvlist == NULL) 377 return 0; 378 /* Process parameters. */ 379 for (i = 0; (params[i] != NULL); ++i) { 380 if (rte_kvargs_count(kvlist, params[i])) { 381 ret = rte_kvargs_process(kvlist, params[i], 382 mlx5_args_check, args); 383 if (ret != 0) { 384 rte_kvargs_free(kvlist); 385 return ret; 386 } 387 } 388 } 389 rte_kvargs_free(kvlist); 390 return 0; 391 } 392 393 static struct rte_pci_driver mlx5_driver; 394 395 /** 396 * Assign parameters from args into priv, only non default 397 * values are considered. 398 * 399 * @param[out] priv 400 * Pointer to private structure. 401 * @param[in] args 402 * Pointer to args values. 403 */ 404 static void 405 mlx5_args_assign(struct priv *priv, struct mlx5_args *args) 406 { 407 if (args->cqe_comp != MLX5_ARG_UNSET) 408 priv->cqe_comp = args->cqe_comp; 409 if (args->txq_inline != MLX5_ARG_UNSET) 410 priv->txq_inline = args->txq_inline; 411 if (args->txqs_inline != MLX5_ARG_UNSET) 412 priv->txqs_inline = args->txqs_inline; 413 if (args->mps != MLX5_ARG_UNSET) 414 priv->mps = args->mps ? priv->mps : 0; 415 if (args->mpw_hdr_dseg != MLX5_ARG_UNSET) 416 priv->mpw_hdr_dseg = args->mpw_hdr_dseg; 417 if (args->inline_max_packet_sz != MLX5_ARG_UNSET) 418 priv->inline_max_packet_sz = args->inline_max_packet_sz; 419 if (args->tso != MLX5_ARG_UNSET) 420 priv->tso = args->tso; 421 if (args->tx_vec_en != MLX5_ARG_UNSET) 422 priv->tx_vec_en = args->tx_vec_en; 423 if (args->rx_vec_en != MLX5_ARG_UNSET) 424 priv->rx_vec_en = args->rx_vec_en; 425 } 426 427 /** 428 * DPDK callback to register a PCI device. 429 * 430 * This function creates an Ethernet device for each port of a given 431 * PCI device. 432 * 433 * @param[in] pci_drv 434 * PCI driver structure (mlx5_driver). 435 * @param[in] pci_dev 436 * PCI device information. 437 * 438 * @return 439 * 0 on success, negative errno value on failure. 440 */ 441 static int 442 mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 443 { 444 struct ibv_device **list; 445 struct ibv_device *ibv_dev; 446 int err = 0; 447 struct ibv_context *attr_ctx = NULL; 448 struct ibv_device_attr_ex device_attr; 449 unsigned int sriov; 450 unsigned int mps; 451 unsigned int tunnel_en = 0; 452 int idx; 453 int i; 454 struct mlx5dv_context attrs_out; 455 456 (void)pci_drv; 457 assert(pci_drv == &mlx5_driver); 458 /* Get mlx5_dev[] index. */ 459 idx = mlx5_dev_idx(&pci_dev->addr); 460 if (idx == -1) { 461 ERROR("this driver cannot support any more adapters"); 462 return -ENOMEM; 463 } 464 DEBUG("using driver device index %d", idx); 465 466 /* Save PCI address. */ 467 mlx5_dev[idx].pci_addr = pci_dev->addr; 468 list = ibv_get_device_list(&i); 469 if (list == NULL) { 470 assert(errno); 471 if (errno == ENOSYS) 472 ERROR("cannot list devices, is ib_uverbs loaded?"); 473 return -errno; 474 } 475 assert(i >= 0); 476 /* 477 * For each listed device, check related sysfs entry against 478 * the provided PCI ID. 479 */ 480 while (i != 0) { 481 struct rte_pci_addr pci_addr; 482 483 --i; 484 DEBUG("checking device \"%s\"", list[i]->name); 485 if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) 486 continue; 487 if ((pci_dev->addr.domain != pci_addr.domain) || 488 (pci_dev->addr.bus != pci_addr.bus) || 489 (pci_dev->addr.devid != pci_addr.devid) || 490 (pci_dev->addr.function != pci_addr.function)) 491 continue; 492 sriov = ((pci_dev->id.device_id == 493 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || 494 (pci_dev->id.device_id == 495 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) || 496 (pci_dev->id.device_id == 497 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) || 498 (pci_dev->id.device_id == 499 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)); 500 switch (pci_dev->id.device_id) { 501 case PCI_DEVICE_ID_MELLANOX_CONNECTX4: 502 tunnel_en = 1; 503 break; 504 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: 505 case PCI_DEVICE_ID_MELLANOX_CONNECTX5: 506 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 507 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: 508 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 509 tunnel_en = 1; 510 break; 511 default: 512 break; 513 } 514 INFO("PCI information matches, using device \"%s\"" 515 " (SR-IOV: %s)", 516 list[i]->name, 517 sriov ? "true" : "false"); 518 attr_ctx = ibv_open_device(list[i]); 519 err = errno; 520 break; 521 } 522 if (attr_ctx == NULL) { 523 ibv_free_device_list(list); 524 switch (err) { 525 case 0: 526 ERROR("cannot access device, is mlx5_ib loaded?"); 527 return -ENODEV; 528 case EINVAL: 529 ERROR("cannot use device, are drivers up to date?"); 530 return -EINVAL; 531 } 532 assert(err > 0); 533 return -err; 534 } 535 ibv_dev = list[i]; 536 537 DEBUG("device opened"); 538 /* 539 * Multi-packet send is supported by ConnectX-4 Lx PF as well 540 * as all ConnectX-5 devices. 541 */ 542 mlx5dv_query_device(attr_ctx, &attrs_out); 543 if (attrs_out.flags & (MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW | 544 MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED)) { 545 INFO("Enhanced MPW is detected\n"); 546 mps = MLX5_MPW_ENHANCED; 547 } else if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { 548 INFO("MPW is detected\n"); 549 mps = MLX5_MPW; 550 } else { 551 INFO("MPW is disabled\n"); 552 mps = MLX5_MPW_DISABLED; 553 } 554 if (ibv_query_device_ex(attr_ctx, NULL, &device_attr)) 555 goto error; 556 INFO("%u port(s) detected", device_attr.orig_attr.phys_port_cnt); 557 558 for (i = 0; i < device_attr.orig_attr.phys_port_cnt; i++) { 559 uint32_t port = i + 1; /* ports are indexed from one */ 560 uint32_t test = (1 << i); 561 struct ibv_context *ctx = NULL; 562 struct ibv_port_attr port_attr; 563 struct ibv_pd *pd = NULL; 564 struct priv *priv = NULL; 565 struct rte_eth_dev *eth_dev; 566 struct ibv_device_attr_ex device_attr_ex; 567 struct ether_addr mac; 568 uint16_t num_vfs = 0; 569 struct mlx5_args args = { 570 .cqe_comp = MLX5_ARG_UNSET, 571 .txq_inline = MLX5_ARG_UNSET, 572 .txqs_inline = MLX5_ARG_UNSET, 573 .mps = MLX5_ARG_UNSET, 574 .mpw_hdr_dseg = MLX5_ARG_UNSET, 575 .inline_max_packet_sz = MLX5_ARG_UNSET, 576 .tso = MLX5_ARG_UNSET, 577 .tx_vec_en = MLX5_ARG_UNSET, 578 .rx_vec_en = MLX5_ARG_UNSET, 579 }; 580 581 DEBUG("using port %u (%08" PRIx32 ")", port, test); 582 583 ctx = ibv_open_device(ibv_dev); 584 if (ctx == NULL) { 585 err = ENODEV; 586 goto port_error; 587 } 588 589 /* Check port status. */ 590 err = ibv_query_port(ctx, port, &port_attr); 591 if (err) { 592 ERROR("port query failed: %s", strerror(err)); 593 goto port_error; 594 } 595 596 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 597 ERROR("port %d is not configured in Ethernet mode", 598 port); 599 err = EINVAL; 600 goto port_error; 601 } 602 603 if (port_attr.state != IBV_PORT_ACTIVE) 604 DEBUG("port %d is not active: \"%s\" (%d)", 605 port, ibv_port_state_str(port_attr.state), 606 port_attr.state); 607 608 /* Allocate protection domain. */ 609 pd = ibv_alloc_pd(ctx); 610 if (pd == NULL) { 611 ERROR("PD allocation failure"); 612 err = ENOMEM; 613 goto port_error; 614 } 615 616 mlx5_dev[idx].ports |= test; 617 618 /* from rte_ethdev.c */ 619 priv = rte_zmalloc("ethdev private structure", 620 sizeof(*priv), 621 RTE_CACHE_LINE_SIZE); 622 if (priv == NULL) { 623 ERROR("priv allocation failure"); 624 err = ENOMEM; 625 goto port_error; 626 } 627 628 priv->ctx = ctx; 629 priv->device_attr = device_attr; 630 priv->port = port; 631 priv->pd = pd; 632 priv->mtu = ETHER_MTU; 633 priv->mps = mps; /* Enable MPW by default if supported. */ 634 priv->cqe_comp = 1; /* Enable compression by default. */ 635 priv->tunnel_en = tunnel_en; 636 /* Enable vector by default if supported. */ 637 priv->tx_vec_en = 1; 638 priv->rx_vec_en = 1; 639 err = mlx5_args(&args, pci_dev->device.devargs); 640 if (err) { 641 ERROR("failed to process device arguments: %s", 642 strerror(err)); 643 goto port_error; 644 } 645 mlx5_args_assign(priv, &args); 646 if (ibv_query_device_ex(ctx, NULL, &device_attr_ex)) { 647 ERROR("ibv_query_device_ex() failed"); 648 goto port_error; 649 } 650 651 priv->hw_csum = 652 !!(device_attr_ex.device_cap_flags_ex & 653 IBV_DEVICE_RAW_IP_CSUM); 654 DEBUG("checksum offloading is %ssupported", 655 (priv->hw_csum ? "" : "not ")); 656 657 #ifdef HAVE_IBV_DEVICE_VXLAN_SUPPORT 658 priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & 659 IBV_DEVICE_VXLAN_SUPPORT); 660 #endif 661 DEBUG("L2 tunnel checksum offloads are %ssupported", 662 (priv->hw_csum_l2tun ? "" : "not ")); 663 664 priv->ind_table_max_size = 665 device_attr_ex.rss_caps.max_rwq_indirection_table_size; 666 /* Remove this check once DPDK supports larger/variable 667 * indirection tables. */ 668 if (priv->ind_table_max_size > 669 (unsigned int)ETH_RSS_RETA_SIZE_512) 670 priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512; 671 DEBUG("maximum RX indirection table size is %u", 672 priv->ind_table_max_size); 673 priv->hw_vlan_strip = !!(device_attr_ex.raw_packet_caps & 674 IBV_RAW_PACKET_CAP_CVLAN_STRIPPING); 675 DEBUG("VLAN stripping is %ssupported", 676 (priv->hw_vlan_strip ? "" : "not ")); 677 678 priv->hw_fcs_strip = 679 !!(device_attr_ex.orig_attr.device_cap_flags & 680 IBV_WQ_FLAGS_SCATTER_FCS); 681 DEBUG("FCS stripping configuration is %ssupported", 682 (priv->hw_fcs_strip ? "" : "not ")); 683 684 #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING 685 priv->hw_padding = !!device_attr_ex.rx_pad_end_addr_align; 686 #endif 687 DEBUG("hardware RX end alignment padding is %ssupported", 688 (priv->hw_padding ? "" : "not ")); 689 690 priv_get_num_vfs(priv, &num_vfs); 691 priv->sriov = (num_vfs || sriov); 692 priv->tso = ((priv->tso) && 693 (device_attr_ex.tso_caps.max_tso > 0) && 694 (device_attr_ex.tso_caps.supported_qpts & 695 (1 << IBV_QPT_RAW_PACKET))); 696 if (priv->tso) 697 priv->max_tso_payload_sz = 698 device_attr_ex.tso_caps.max_tso; 699 if (priv->mps && !mps) { 700 ERROR("multi-packet send not supported on this device" 701 " (" MLX5_TXQ_MPW_EN ")"); 702 err = ENOTSUP; 703 goto port_error; 704 } else if (priv->mps && priv->tso) { 705 WARN("multi-packet send not supported in conjunction " 706 "with TSO. MPS disabled"); 707 priv->mps = 0; 708 } 709 INFO("%sMPS is %s", 710 priv->mps == MLX5_MPW_ENHANCED ? "Enhanced " : "", 711 priv->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); 712 /* Set default values for Enhanced MPW, a.k.a MPWv2. */ 713 if (priv->mps == MLX5_MPW_ENHANCED) { 714 if (args.txqs_inline == MLX5_ARG_UNSET) 715 priv->txqs_inline = MLX5_EMPW_MIN_TXQS; 716 if (args.inline_max_packet_sz == MLX5_ARG_UNSET) 717 priv->inline_max_packet_sz = 718 MLX5_EMPW_MAX_INLINE_LEN; 719 if (args.txq_inline == MLX5_ARG_UNSET) 720 priv->txq_inline = MLX5_WQE_SIZE_MAX - 721 MLX5_WQE_SIZE; 722 } 723 /* Allocate and register default RSS hash keys. */ 724 priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n, 725 sizeof((*priv->rss_conf)[0]), 0); 726 if (priv->rss_conf == NULL) { 727 err = ENOMEM; 728 goto port_error; 729 } 730 err = rss_hash_rss_conf_new_key(priv, 731 rss_hash_default_key, 732 rss_hash_default_key_len, 733 ETH_RSS_PROTO_MASK); 734 if (err) 735 goto port_error; 736 /* Configure the first MAC address by default. */ 737 if (priv_get_mac(priv, &mac.addr_bytes)) { 738 ERROR("cannot get MAC address, is mlx5_en loaded?" 739 " (errno: %s)", strerror(errno)); 740 err = ENODEV; 741 goto port_error; 742 } 743 INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", 744 priv->port, 745 mac.addr_bytes[0], mac.addr_bytes[1], 746 mac.addr_bytes[2], mac.addr_bytes[3], 747 mac.addr_bytes[4], mac.addr_bytes[5]); 748 /* Register MAC address. */ 749 claim_zero(priv_mac_addr_add(priv, 0, 750 (const uint8_t (*)[ETHER_ADDR_LEN]) 751 mac.addr_bytes)); 752 /* Initialize FD filters list. */ 753 err = fdir_init_filters_list(priv); 754 if (err) 755 goto port_error; 756 #ifndef NDEBUG 757 { 758 char ifname[IF_NAMESIZE]; 759 760 if (priv_get_ifname(priv, &ifname) == 0) 761 DEBUG("port %u ifname is \"%s\"", 762 priv->port, ifname); 763 else 764 DEBUG("port %u ifname is unknown", priv->port); 765 } 766 #endif 767 /* Get actual MTU if possible. */ 768 priv_get_mtu(priv, &priv->mtu); 769 DEBUG("port %u MTU is %u", priv->port, priv->mtu); 770 771 /* from rte_ethdev.c */ 772 { 773 char name[RTE_ETH_NAME_MAX_LEN]; 774 775 snprintf(name, sizeof(name), "%s port %u", 776 ibv_get_device_name(ibv_dev), port); 777 eth_dev = rte_eth_dev_allocate(name); 778 } 779 if (eth_dev == NULL) { 780 ERROR("can not allocate rte ethdev"); 781 err = ENOMEM; 782 goto port_error; 783 } 784 eth_dev->data->dev_private = priv; 785 eth_dev->data->mac_addrs = priv->mac; 786 eth_dev->device = &pci_dev->device; 787 rte_eth_copy_pci_info(eth_dev, pci_dev); 788 eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; 789 eth_dev->device->driver = &mlx5_driver.driver; 790 priv->dev = eth_dev; 791 eth_dev->dev_ops = &mlx5_dev_ops; 792 TAILQ_INIT(&priv->flows); 793 794 /* Bring Ethernet device up. */ 795 DEBUG("forcing Ethernet interface up"); 796 priv_set_flags(priv, ~IFF_UP, IFF_UP); 797 mlx5_link_update(priv->dev, 1); 798 continue; 799 800 port_error: 801 if (priv) { 802 rte_free(priv->rss_conf); 803 rte_free(priv); 804 } 805 if (pd) 806 claim_zero(ibv_dealloc_pd(pd)); 807 if (ctx) 808 claim_zero(ibv_close_device(ctx)); 809 break; 810 } 811 812 /* 813 * XXX if something went wrong in the loop above, there is a resource 814 * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as 815 * long as the dpdk does not provide a way to deallocate a ethdev and a 816 * way to enumerate the registered ethdevs to free the previous ones. 817 */ 818 819 /* no port found, complain */ 820 if (!mlx5_dev[idx].ports) { 821 err = ENODEV; 822 goto error; 823 } 824 825 error: 826 if (attr_ctx) 827 claim_zero(ibv_close_device(attr_ctx)); 828 if (list) 829 ibv_free_device_list(list); 830 assert(err >= 0); 831 return -err; 832 } 833 834 static const struct rte_pci_id mlx5_pci_id_map[] = { 835 { 836 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 837 PCI_DEVICE_ID_MELLANOX_CONNECTX4) 838 }, 839 { 840 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 841 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) 842 }, 843 { 844 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 845 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) 846 }, 847 { 848 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 849 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) 850 }, 851 { 852 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 853 PCI_DEVICE_ID_MELLANOX_CONNECTX5) 854 }, 855 { 856 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 857 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) 858 }, 859 { 860 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 861 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) 862 }, 863 { 864 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 865 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) 866 }, 867 { 868 .vendor_id = 0 869 } 870 }; 871 872 static struct rte_pci_driver mlx5_driver = { 873 .driver = { 874 .name = MLX5_DRIVER_NAME 875 }, 876 .id_table = mlx5_pci_id_map, 877 .probe = mlx5_pci_probe, 878 .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, 879 }; 880 881 /** 882 * Driver initialization routine. 883 */ 884 RTE_INIT(rte_mlx5_pmd_init); 885 static void 886 rte_mlx5_pmd_init(void) 887 { 888 /* Build the static table for ptype conversion. */ 889 mlx5_set_ptype_table(); 890 /* 891 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 892 * huge pages. Calling ibv_fork_init() during init allows 893 * applications to use fork() safely for purposes other than 894 * using this PMD, which is not supported in forked processes. 895 */ 896 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 897 /* Don't map UAR to WC if BlueFlame is not used.*/ 898 setenv("MLX5_SHUT_UP_BF", "1", 1); 899 ibv_fork_init(); 900 rte_pci_register(&mlx5_driver); 901 } 902 903 RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); 904 RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); 905 RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib"); 906