1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <unistd.h> 36 #include <string.h> 37 #include <assert.h> 38 #include <stdint.h> 39 #include <stdlib.h> 40 #include <errno.h> 41 #include <net/if.h> 42 43 /* Verbs header. */ 44 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 45 #ifdef PEDANTIC 46 #pragma GCC diagnostic ignored "-Wpedantic" 47 #endif 48 #include <infiniband/verbs.h> 49 #ifdef PEDANTIC 50 #pragma GCC diagnostic error "-Wpedantic" 51 #endif 52 53 /* DPDK headers don't like -pedantic. */ 54 #ifdef PEDANTIC 55 #pragma GCC diagnostic ignored "-Wpedantic" 56 #endif 57 #include <rte_malloc.h> 58 #include <rte_ethdev.h> 59 #include <rte_pci.h> 60 #include <rte_common.h> 61 #include <rte_kvargs.h> 62 #ifdef PEDANTIC 63 #pragma GCC diagnostic error "-Wpedantic" 64 #endif 65 66 #include "mlx5.h" 67 #include "mlx5_utils.h" 68 #include "mlx5_rxtx.h" 69 #include "mlx5_autoconf.h" 70 #include "mlx5_defs.h" 71 72 /* Device parameter to enable RX completion queue compression. */ 73 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" 74 75 /* Device parameter to configure inline send. */ 76 #define MLX5_TXQ_INLINE "txq_inline" 77 78 /* 79 * Device parameter to configure the number of TX queues threshold for 80 * enabling inline send. 81 */ 82 #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" 83 84 /* Device parameter to enable multi-packet send WQEs. */ 85 #define MLX5_TXQ_MPW_EN "txq_mpw_en" 86 87 /** 88 * Retrieve integer value from environment variable. 89 * 90 * @param[in] name 91 * Environment variable name. 92 * 93 * @return 94 * Integer value, 0 if the variable is not set. 95 */ 96 int 97 mlx5_getenv_int(const char *name) 98 { 99 const char *val = getenv(name); 100 101 if (val == NULL) 102 return 0; 103 return atoi(val); 104 } 105 106 /** 107 * DPDK callback to close the device. 108 * 109 * Destroy all queues and objects, free memory. 110 * 111 * @param dev 112 * Pointer to Ethernet device structure. 113 */ 114 static void 115 mlx5_dev_close(struct rte_eth_dev *dev) 116 { 117 struct priv *priv = mlx5_get_priv(dev); 118 unsigned int i; 119 120 priv_lock(priv); 121 DEBUG("%p: closing device \"%s\"", 122 (void *)dev, 123 ((priv->ctx != NULL) ? priv->ctx->device->name : "")); 124 /* In case mlx5_dev_stop() has not been called. */ 125 priv_dev_interrupt_handler_uninstall(priv, dev); 126 priv_special_flow_disable_all(priv); 127 priv_mac_addrs_disable(priv); 128 priv_destroy_hash_rxqs(priv); 129 130 /* Remove flow director elements. */ 131 priv_fdir_disable(priv); 132 priv_fdir_delete_filters_list(priv); 133 134 /* Prevent crashes when queues are still in use. */ 135 dev->rx_pkt_burst = removed_rx_burst; 136 dev->tx_pkt_burst = removed_tx_burst; 137 if (priv->rxqs != NULL) { 138 /* XXX race condition if mlx5_rx_burst() is still running. */ 139 usleep(1000); 140 for (i = 0; (i != priv->rxqs_n); ++i) { 141 struct rxq *rxq = (*priv->rxqs)[i]; 142 struct rxq_ctrl *rxq_ctrl; 143 144 if (rxq == NULL) 145 continue; 146 rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq); 147 (*priv->rxqs)[i] = NULL; 148 rxq_cleanup(rxq_ctrl); 149 rte_free(rxq_ctrl); 150 } 151 priv->rxqs_n = 0; 152 priv->rxqs = NULL; 153 } 154 if (priv->txqs != NULL) { 155 /* XXX race condition if mlx5_tx_burst() is still running. */ 156 usleep(1000); 157 for (i = 0; (i != priv->txqs_n); ++i) { 158 struct txq *txq = (*priv->txqs)[i]; 159 struct txq_ctrl *txq_ctrl; 160 161 if (txq == NULL) 162 continue; 163 txq_ctrl = container_of(txq, struct txq_ctrl, txq); 164 (*priv->txqs)[i] = NULL; 165 txq_cleanup(txq_ctrl); 166 rte_free(txq_ctrl); 167 } 168 priv->txqs_n = 0; 169 priv->txqs = NULL; 170 } 171 if (priv->pd != NULL) { 172 assert(priv->ctx != NULL); 173 claim_zero(ibv_dealloc_pd(priv->pd)); 174 claim_zero(ibv_close_device(priv->ctx)); 175 } else 176 assert(priv->ctx == NULL); 177 if (priv->rss_conf != NULL) { 178 for (i = 0; (i != hash_rxq_init_n); ++i) 179 rte_free((*priv->rss_conf)[i]); 180 rte_free(priv->rss_conf); 181 } 182 if (priv->reta_idx != NULL) 183 rte_free(priv->reta_idx); 184 priv_unlock(priv); 185 memset(priv, 0, sizeof(*priv)); 186 } 187 188 static const struct eth_dev_ops mlx5_dev_ops = { 189 .dev_configure = mlx5_dev_configure, 190 .dev_start = mlx5_dev_start, 191 .dev_stop = mlx5_dev_stop, 192 .dev_set_link_down = mlx5_set_link_down, 193 .dev_set_link_up = mlx5_set_link_up, 194 .dev_close = mlx5_dev_close, 195 .promiscuous_enable = mlx5_promiscuous_enable, 196 .promiscuous_disable = mlx5_promiscuous_disable, 197 .allmulticast_enable = mlx5_allmulticast_enable, 198 .allmulticast_disable = mlx5_allmulticast_disable, 199 .link_update = mlx5_link_update, 200 .stats_get = mlx5_stats_get, 201 .stats_reset = mlx5_stats_reset, 202 .xstats_get = mlx5_xstats_get, 203 .xstats_reset = mlx5_xstats_reset, 204 .xstats_get_names = mlx5_xstats_get_names, 205 .dev_infos_get = mlx5_dev_infos_get, 206 .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, 207 .vlan_filter_set = mlx5_vlan_filter_set, 208 .rx_queue_setup = mlx5_rx_queue_setup, 209 .tx_queue_setup = mlx5_tx_queue_setup, 210 .rx_queue_release = mlx5_rx_queue_release, 211 .tx_queue_release = mlx5_tx_queue_release, 212 .flow_ctrl_get = mlx5_dev_get_flow_ctrl, 213 .flow_ctrl_set = mlx5_dev_set_flow_ctrl, 214 .mac_addr_remove = mlx5_mac_addr_remove, 215 .mac_addr_add = mlx5_mac_addr_add, 216 .mac_addr_set = mlx5_mac_addr_set, 217 .mtu_set = mlx5_dev_set_mtu, 218 .vlan_strip_queue_set = mlx5_vlan_strip_queue_set, 219 .vlan_offload_set = mlx5_vlan_offload_set, 220 .reta_update = mlx5_dev_rss_reta_update, 221 .reta_query = mlx5_dev_rss_reta_query, 222 .rss_hash_update = mlx5_rss_hash_update, 223 .rss_hash_conf_get = mlx5_rss_hash_conf_get, 224 .filter_ctrl = mlx5_dev_filter_ctrl, 225 }; 226 227 static struct { 228 struct rte_pci_addr pci_addr; /* associated PCI address */ 229 uint32_t ports; /* physical ports bitfield. */ 230 } mlx5_dev[32]; 231 232 /** 233 * Get device index in mlx5_dev[] from PCI bus address. 234 * 235 * @param[in] pci_addr 236 * PCI bus address to look for. 237 * 238 * @return 239 * mlx5_dev[] index on success, -1 on failure. 240 */ 241 static int 242 mlx5_dev_idx(struct rte_pci_addr *pci_addr) 243 { 244 unsigned int i; 245 int ret = -1; 246 247 assert(pci_addr != NULL); 248 for (i = 0; (i != RTE_DIM(mlx5_dev)); ++i) { 249 if ((mlx5_dev[i].pci_addr.domain == pci_addr->domain) && 250 (mlx5_dev[i].pci_addr.bus == pci_addr->bus) && 251 (mlx5_dev[i].pci_addr.devid == pci_addr->devid) && 252 (mlx5_dev[i].pci_addr.function == pci_addr->function)) 253 return i; 254 if ((mlx5_dev[i].ports == 0) && (ret == -1)) 255 ret = i; 256 } 257 return ret; 258 } 259 260 /** 261 * Verify and store value for device argument. 262 * 263 * @param[in] key 264 * Key argument to verify. 265 * @param[in] val 266 * Value associated with key. 267 * @param opaque 268 * User data. 269 * 270 * @return 271 * 0 on success, negative errno value on failure. 272 */ 273 static int 274 mlx5_args_check(const char *key, const char *val, void *opaque) 275 { 276 struct priv *priv = opaque; 277 unsigned long tmp; 278 279 errno = 0; 280 tmp = strtoul(val, NULL, 0); 281 if (errno) { 282 WARN("%s: \"%s\" is not a valid integer", key, val); 283 return errno; 284 } 285 if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { 286 priv->cqe_comp = !!tmp; 287 } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { 288 priv->txq_inline = tmp; 289 } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { 290 priv->txqs_inline = tmp; 291 } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { 292 priv->mps &= !!tmp; /* Enable MPW only if HW supports */ 293 } else { 294 WARN("%s: unknown parameter", key); 295 return -EINVAL; 296 } 297 return 0; 298 } 299 300 /** 301 * Parse device parameters. 302 * 303 * @param priv 304 * Pointer to private structure. 305 * @param devargs 306 * Device arguments structure. 307 * 308 * @return 309 * 0 on success, errno value on failure. 310 */ 311 static int 312 mlx5_args(struct priv *priv, struct rte_devargs *devargs) 313 { 314 const char **params = (const char *[]){ 315 MLX5_RXQ_CQE_COMP_EN, 316 MLX5_TXQ_INLINE, 317 MLX5_TXQS_MIN_INLINE, 318 MLX5_TXQ_MPW_EN, 319 NULL, 320 }; 321 struct rte_kvargs *kvlist; 322 int ret = 0; 323 int i; 324 325 if (devargs == NULL) 326 return 0; 327 /* Following UGLY cast is done to pass checkpatch. */ 328 kvlist = rte_kvargs_parse(devargs->args, params); 329 if (kvlist == NULL) 330 return 0; 331 /* Process parameters. */ 332 for (i = 0; (params[i] != NULL); ++i) { 333 if (rte_kvargs_count(kvlist, params[i])) { 334 ret = rte_kvargs_process(kvlist, params[i], 335 mlx5_args_check, priv); 336 if (ret != 0) { 337 rte_kvargs_free(kvlist); 338 return ret; 339 } 340 } 341 } 342 rte_kvargs_free(kvlist); 343 return 0; 344 } 345 346 static struct eth_driver mlx5_driver; 347 348 /** 349 * DPDK callback to register a PCI device. 350 * 351 * This function creates an Ethernet device for each port of a given 352 * PCI device. 353 * 354 * @param[in] pci_drv 355 * PCI driver structure (mlx5_driver). 356 * @param[in] pci_dev 357 * PCI device information. 358 * 359 * @return 360 * 0 on success, negative errno value on failure. 361 */ 362 static int 363 mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 364 { 365 struct ibv_device **list; 366 struct ibv_device *ibv_dev; 367 int err = 0; 368 struct ibv_context *attr_ctx = NULL; 369 struct ibv_device_attr device_attr; 370 unsigned int sriov; 371 unsigned int mps; 372 int idx; 373 int i; 374 375 (void)pci_drv; 376 assert(pci_drv == &mlx5_driver.pci_drv); 377 /* Get mlx5_dev[] index. */ 378 idx = mlx5_dev_idx(&pci_dev->addr); 379 if (idx == -1) { 380 ERROR("this driver cannot support any more adapters"); 381 return -ENOMEM; 382 } 383 DEBUG("using driver device index %d", idx); 384 385 /* Save PCI address. */ 386 mlx5_dev[idx].pci_addr = pci_dev->addr; 387 list = ibv_get_device_list(&i); 388 if (list == NULL) { 389 assert(errno); 390 if (errno == ENOSYS) { 391 WARN("cannot list devices, is ib_uverbs loaded?"); 392 return 0; 393 } 394 return -errno; 395 } 396 assert(i >= 0); 397 /* 398 * For each listed device, check related sysfs entry against 399 * the provided PCI ID. 400 */ 401 while (i != 0) { 402 struct rte_pci_addr pci_addr; 403 404 --i; 405 DEBUG("checking device \"%s\"", list[i]->name); 406 if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) 407 continue; 408 if ((pci_dev->addr.domain != pci_addr.domain) || 409 (pci_dev->addr.bus != pci_addr.bus) || 410 (pci_dev->addr.devid != pci_addr.devid) || 411 (pci_dev->addr.function != pci_addr.function)) 412 continue; 413 sriov = ((pci_dev->id.device_id == 414 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || 415 (pci_dev->id.device_id == 416 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) || 417 (pci_dev->id.device_id == 418 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) || 419 (pci_dev->id.device_id == 420 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)); 421 /* 422 * Multi-packet send is supported by ConnectX-4 Lx PF as well 423 * as all ConnectX-5 devices. 424 */ 425 switch (pci_dev->id.device_id) { 426 case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: 427 case PCI_DEVICE_ID_MELLANOX_CONNECTX5: 428 case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: 429 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: 430 case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: 431 mps = 1; 432 break; 433 default: 434 mps = 0; 435 } 436 INFO("PCI information matches, using device \"%s\"" 437 " (SR-IOV: %s, MPS: %s)", 438 list[i]->name, 439 sriov ? "true" : "false", 440 mps ? "true" : "false"); 441 attr_ctx = ibv_open_device(list[i]); 442 err = errno; 443 break; 444 } 445 if (attr_ctx == NULL) { 446 ibv_free_device_list(list); 447 switch (err) { 448 case 0: 449 WARN("cannot access device, is mlx5_ib loaded?"); 450 return 0; 451 case EINVAL: 452 WARN("cannot use device, are drivers up to date?"); 453 return 0; 454 } 455 assert(err > 0); 456 return -err; 457 } 458 ibv_dev = list[i]; 459 460 DEBUG("device opened"); 461 if (ibv_query_device(attr_ctx, &device_attr)) 462 goto error; 463 INFO("%u port(s) detected", device_attr.phys_port_cnt); 464 465 for (i = 0; i < device_attr.phys_port_cnt; i++) { 466 uint32_t port = i + 1; /* ports are indexed from one */ 467 uint32_t test = (1 << i); 468 struct ibv_context *ctx = NULL; 469 struct ibv_port_attr port_attr; 470 struct ibv_pd *pd = NULL; 471 struct priv *priv = NULL; 472 struct rte_eth_dev *eth_dev; 473 struct ibv_exp_device_attr exp_device_attr; 474 struct ether_addr mac; 475 uint16_t num_vfs = 0; 476 477 exp_device_attr.comp_mask = 478 IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | 479 IBV_EXP_DEVICE_ATTR_RX_HASH | 480 IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS | 481 IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN | 482 0; 483 484 DEBUG("using port %u (%08" PRIx32 ")", port, test); 485 486 ctx = ibv_open_device(ibv_dev); 487 if (ctx == NULL) 488 goto port_error; 489 490 /* Check port status. */ 491 err = ibv_query_port(ctx, port, &port_attr); 492 if (err) { 493 ERROR("port query failed: %s", strerror(err)); 494 goto port_error; 495 } 496 497 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 498 ERROR("port %d is not configured in Ethernet mode", 499 port); 500 goto port_error; 501 } 502 503 if (port_attr.state != IBV_PORT_ACTIVE) 504 DEBUG("port %d is not active: \"%s\" (%d)", 505 port, ibv_port_state_str(port_attr.state), 506 port_attr.state); 507 508 /* Allocate protection domain. */ 509 pd = ibv_alloc_pd(ctx); 510 if (pd == NULL) { 511 ERROR("PD allocation failure"); 512 err = ENOMEM; 513 goto port_error; 514 } 515 516 mlx5_dev[idx].ports |= test; 517 518 /* from rte_ethdev.c */ 519 priv = rte_zmalloc("ethdev private structure", 520 sizeof(*priv), 521 RTE_CACHE_LINE_SIZE); 522 if (priv == NULL) { 523 ERROR("priv allocation failure"); 524 err = ENOMEM; 525 goto port_error; 526 } 527 528 priv->ctx = ctx; 529 priv->device_attr = device_attr; 530 priv->port = port; 531 priv->pd = pd; 532 priv->mtu = ETHER_MTU; 533 priv->mps = mps; /* Enable MPW by default if supported. */ 534 priv->cqe_comp = 1; /* Enable compression by default. */ 535 err = mlx5_args(priv, pci_dev->device.devargs); 536 if (err) { 537 ERROR("failed to process device arguments: %s", 538 strerror(err)); 539 goto port_error; 540 } 541 if (ibv_exp_query_device(ctx, &exp_device_attr)) { 542 ERROR("ibv_exp_query_device() failed"); 543 goto port_error; 544 } 545 546 priv->hw_csum = 547 ((exp_device_attr.exp_device_cap_flags & 548 IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) && 549 (exp_device_attr.exp_device_cap_flags & 550 IBV_EXP_DEVICE_RX_CSUM_IP_PKT)); 551 DEBUG("checksum offloading is %ssupported", 552 (priv->hw_csum ? "" : "not ")); 553 554 priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & 555 IBV_EXP_DEVICE_VXLAN_SUPPORT); 556 DEBUG("L2 tunnel checksum offloads are %ssupported", 557 (priv->hw_csum_l2tun ? "" : "not ")); 558 559 priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; 560 /* Remove this check once DPDK supports larger/variable 561 * indirection tables. */ 562 if (priv->ind_table_max_size > 563 (unsigned int)ETH_RSS_RETA_SIZE_512) 564 priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512; 565 DEBUG("maximum RX indirection table size is %u", 566 priv->ind_table_max_size); 567 priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap & 568 IBV_EXP_RECEIVE_WQ_CVLAN_STRIP); 569 DEBUG("VLAN stripping is %ssupported", 570 (priv->hw_vlan_strip ? "" : "not ")); 571 572 priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags & 573 IBV_EXP_DEVICE_SCATTER_FCS); 574 DEBUG("FCS stripping configuration is %ssupported", 575 (priv->hw_fcs_strip ? "" : "not ")); 576 577 priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align; 578 DEBUG("hardware RX end alignment padding is %ssupported", 579 (priv->hw_padding ? "" : "not ")); 580 581 priv_get_num_vfs(priv, &num_vfs); 582 priv->sriov = (num_vfs || sriov); 583 if (priv->mps && !mps) { 584 ERROR("multi-packet send not supported on this device" 585 " (" MLX5_TXQ_MPW_EN ")"); 586 err = ENOTSUP; 587 goto port_error; 588 } 589 /* Allocate and register default RSS hash keys. */ 590 priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n, 591 sizeof((*priv->rss_conf)[0]), 0); 592 if (priv->rss_conf == NULL) { 593 err = ENOMEM; 594 goto port_error; 595 } 596 err = rss_hash_rss_conf_new_key(priv, 597 rss_hash_default_key, 598 rss_hash_default_key_len, 599 ETH_RSS_PROTO_MASK); 600 if (err) 601 goto port_error; 602 /* Configure the first MAC address by default. */ 603 if (priv_get_mac(priv, &mac.addr_bytes)) { 604 ERROR("cannot get MAC address, is mlx5_en loaded?" 605 " (errno: %s)", strerror(errno)); 606 goto port_error; 607 } 608 INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", 609 priv->port, 610 mac.addr_bytes[0], mac.addr_bytes[1], 611 mac.addr_bytes[2], mac.addr_bytes[3], 612 mac.addr_bytes[4], mac.addr_bytes[5]); 613 /* Register MAC address. */ 614 claim_zero(priv_mac_addr_add(priv, 0, 615 (const uint8_t (*)[ETHER_ADDR_LEN]) 616 mac.addr_bytes)); 617 /* Initialize FD filters list. */ 618 err = fdir_init_filters_list(priv); 619 if (err) 620 goto port_error; 621 #ifndef NDEBUG 622 { 623 char ifname[IF_NAMESIZE]; 624 625 if (priv_get_ifname(priv, &ifname) == 0) 626 DEBUG("port %u ifname is \"%s\"", 627 priv->port, ifname); 628 else 629 DEBUG("port %u ifname is unknown", priv->port); 630 } 631 #endif 632 /* Get actual MTU if possible. */ 633 priv_get_mtu(priv, &priv->mtu); 634 DEBUG("port %u MTU is %u", priv->port, priv->mtu); 635 636 /* from rte_ethdev.c */ 637 { 638 char name[RTE_ETH_NAME_MAX_LEN]; 639 640 snprintf(name, sizeof(name), "%s port %u", 641 ibv_get_device_name(ibv_dev), port); 642 eth_dev = rte_eth_dev_allocate(name); 643 } 644 if (eth_dev == NULL) { 645 ERROR("can not allocate rte ethdev"); 646 err = ENOMEM; 647 goto port_error; 648 } 649 650 /* Secondary processes have to use local storage for their 651 * private data as well as a copy of eth_dev->data, but this 652 * pointer must not be modified before burst functions are 653 * actually called. */ 654 if (mlx5_is_secondary()) { 655 struct mlx5_secondary_data *sd = 656 &mlx5_secondary_data[eth_dev->data->port_id]; 657 sd->primary_priv = eth_dev->data->dev_private; 658 if (sd->primary_priv == NULL) { 659 ERROR("no private data for port %u", 660 eth_dev->data->port_id); 661 err = EINVAL; 662 goto port_error; 663 } 664 sd->shared_dev_data = eth_dev->data; 665 rte_spinlock_init(&sd->lock); 666 memcpy(sd->data.name, sd->shared_dev_data->name, 667 sizeof(sd->data.name)); 668 sd->data.dev_private = priv; 669 sd->data.rx_mbuf_alloc_failed = 0; 670 sd->data.mtu = ETHER_MTU; 671 sd->data.port_id = sd->shared_dev_data->port_id; 672 sd->data.mac_addrs = priv->mac; 673 eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup; 674 eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup; 675 } else { 676 eth_dev->data->dev_private = priv; 677 eth_dev->data->mac_addrs = priv->mac; 678 } 679 680 eth_dev->device = &pci_dev->device; 681 rte_eth_copy_pci_info(eth_dev, pci_dev); 682 eth_dev->driver = &mlx5_driver; 683 priv->dev = eth_dev; 684 eth_dev->dev_ops = &mlx5_dev_ops; 685 686 /* Bring Ethernet device up. */ 687 DEBUG("forcing Ethernet interface up"); 688 priv_set_flags(priv, ~IFF_UP, IFF_UP); 689 mlx5_link_update(priv->dev, 1); 690 continue; 691 692 port_error: 693 if (priv) { 694 rte_free(priv->rss_conf); 695 rte_free(priv); 696 } 697 if (pd) 698 claim_zero(ibv_dealloc_pd(pd)); 699 if (ctx) 700 claim_zero(ibv_close_device(ctx)); 701 break; 702 } 703 704 /* 705 * XXX if something went wrong in the loop above, there is a resource 706 * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as 707 * long as the dpdk does not provide a way to deallocate a ethdev and a 708 * way to enumerate the registered ethdevs to free the previous ones. 709 */ 710 711 /* no port found, complain */ 712 if (!mlx5_dev[idx].ports) { 713 err = ENODEV; 714 goto error; 715 } 716 717 error: 718 if (attr_ctx) 719 claim_zero(ibv_close_device(attr_ctx)); 720 if (list) 721 ibv_free_device_list(list); 722 assert(err >= 0); 723 return -err; 724 } 725 726 static const struct rte_pci_id mlx5_pci_id_map[] = { 727 { 728 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 729 PCI_DEVICE_ID_MELLANOX_CONNECTX4) 730 }, 731 { 732 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 733 PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) 734 }, 735 { 736 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 737 PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) 738 }, 739 { 740 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 741 PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) 742 }, 743 { 744 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 745 PCI_DEVICE_ID_MELLANOX_CONNECTX5) 746 }, 747 { 748 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 749 PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) 750 }, 751 { 752 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 753 PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) 754 }, 755 { 756 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 757 PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) 758 }, 759 { 760 .vendor_id = 0 761 } 762 }; 763 764 static struct eth_driver mlx5_driver = { 765 .pci_drv = { 766 .driver = { 767 .name = MLX5_DRIVER_NAME 768 }, 769 .id_table = mlx5_pci_id_map, 770 .probe = mlx5_pci_probe, 771 .drv_flags = RTE_PCI_DRV_INTR_LSC, 772 }, 773 .dev_private_size = sizeof(struct priv) 774 }; 775 776 /** 777 * Driver initialization routine. 778 */ 779 RTE_INIT(rte_mlx5_pmd_init); 780 static void 781 rte_mlx5_pmd_init(void) 782 { 783 /* 784 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 785 * huge pages. Calling ibv_fork_init() during init allows 786 * applications to use fork() safely for purposes other than 787 * using this PMD, which is not supported in forked processes. 788 */ 789 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 790 ibv_fork_init(); 791 rte_eal_pci_register(&mlx5_driver.pci_drv); 792 } 793 794 RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); 795 RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); 796 RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib"); 797