1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2012 6WIND S.A. 3 * Copyright 2012 Mellanox Technologies, Ltd 4 */ 5 6 /** 7 * @file 8 * mlx4 driver initialization. 9 */ 10 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <stddef.h> 14 #include <stdint.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <sys/mman.h> 19 #include <unistd.h> 20 #ifdef RTE_IBVERBS_LINK_DLOPEN 21 #include <dlfcn.h> 22 #endif 23 24 /* Verbs headers do not support -pedantic. */ 25 #ifdef PEDANTIC 26 #pragma GCC diagnostic ignored "-Wpedantic" 27 #endif 28 #include <infiniband/verbs.h> 29 #ifdef PEDANTIC 30 #pragma GCC diagnostic error "-Wpedantic" 31 #endif 32 33 #include <rte_common.h> 34 #include <dev_driver.h> 35 #include <rte_errno.h> 36 #include <ethdev_driver.h> 37 #include <ethdev_pci.h> 38 #include <rte_ether.h> 39 #include <rte_flow.h> 40 #include <rte_interrupts.h> 41 #include <rte_kvargs.h> 42 #include <rte_malloc.h> 43 #include <rte_mbuf.h> 44 45 #include "mlx4.h" 46 #include "mlx4_glue.h" 47 #include "mlx4_flow.h" 48 #include "mlx4_mr.h" 49 #include "mlx4_rxtx.h" 50 #include "mlx4_utils.h" 51 52 #ifdef MLX4_GLUE 53 const struct mlx4_glue *mlx4_glue; 54 #endif 55 56 static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data"; 57 58 /* Shared memory between primary and secondary processes. */ 59 struct mlx4_shared_data *mlx4_shared_data; 60 61 /* Spinlock for mlx4_shared_data allocation. */ 62 static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 63 64 /* Process local data for secondary processes. */ 65 static struct mlx4_local_data mlx4_local_data; 66 67 /** Configuration structure for device arguments. */ 68 struct mlx4_conf { 69 struct { 70 uint32_t present; /**< Bit-field for existing ports. */ 71 uint32_t enabled; /**< Bit-field for user-enabled ports. */ 72 } ports; 73 int mr_ext_memseg_en; 74 /** Whether memseg should be extended for MR creation. */ 75 }; 76 77 /* Available parameters list. */ 78 const char *pmd_mlx4_init_params[] = { 79 MLX4_PMD_PORT_KVARG, 80 MLX4_MR_EXT_MEMSEG_EN_KVARG, 81 NULL, 82 }; 83 84 static int mlx4_dev_stop(struct rte_eth_dev *dev); 85 86 /** 87 * Initialize shared data between primary and secondary process. 88 * 89 * A memzone is reserved by primary process and secondary processes attach to 90 * the memzone. 91 * 92 * @return 93 * 0 on success, a negative errno value otherwise and rte_errno is set. 94 */ 95 static int 96 mlx4_init_shared_data(void) 97 { 98 const struct rte_memzone *mz; 99 int ret = 0; 100 101 rte_spinlock_lock(&mlx4_shared_data_lock); 102 if (mlx4_shared_data == NULL) { 103 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 104 /* Allocate shared memory. */ 105 mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA, 106 sizeof(*mlx4_shared_data), 107 SOCKET_ID_ANY, 0); 108 if (mz == NULL) { 109 ERROR("Cannot allocate mlx4 shared data"); 110 ret = -rte_errno; 111 goto error; 112 } 113 mlx4_shared_data = mz->addr; 114 memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data)); 115 rte_spinlock_init(&mlx4_shared_data->lock); 116 } else { 117 /* Lookup allocated shared memory. */ 118 mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA); 119 if (mz == NULL) { 120 ERROR("Cannot attach mlx4 shared data"); 121 ret = -rte_errno; 122 goto error; 123 } 124 mlx4_shared_data = mz->addr; 125 memset(&mlx4_local_data, 0, sizeof(mlx4_local_data)); 126 } 127 } 128 error: 129 rte_spinlock_unlock(&mlx4_shared_data_lock); 130 return ret; 131 } 132 133 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS 134 /** 135 * Verbs callback to allocate a memory. This function should allocate the space 136 * according to the size provided residing inside a huge page. 137 * Please note that all allocation must respect the alignment from libmlx4 138 * (i.e. currently sysconf(_SC_PAGESIZE)). 139 * 140 * @param[in] size 141 * The size in bytes of the memory to allocate. 142 * @param[in] data 143 * A pointer to the callback data. 144 * 145 * @return 146 * Allocated buffer, NULL otherwise and rte_errno is set. 147 */ 148 static void * 149 mlx4_alloc_verbs_buf(size_t size, void *data) 150 { 151 struct mlx4_priv *priv = data; 152 void *ret; 153 size_t alignment = sysconf(_SC_PAGESIZE); 154 unsigned int socket = SOCKET_ID_ANY; 155 156 if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) { 157 const struct txq *txq = priv->verbs_alloc_ctx.obj; 158 159 socket = txq->socket; 160 } else if (priv->verbs_alloc_ctx.type == 161 MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) { 162 const struct rxq *rxq = priv->verbs_alloc_ctx.obj; 163 164 socket = rxq->socket; 165 } 166 MLX4_ASSERT(data != NULL); 167 ret = rte_malloc_socket(__func__, size, alignment, socket); 168 if (!ret && size) 169 rte_errno = ENOMEM; 170 return ret; 171 } 172 173 /** 174 * Verbs callback to free a memory. 175 * 176 * @param[in] ptr 177 * A pointer to the memory to free. 178 * @param[in] data 179 * A pointer to the callback data. 180 */ 181 static void 182 mlx4_free_verbs_buf(void *ptr, void *data __rte_unused) 183 { 184 MLX4_ASSERT(data != NULL); 185 rte_free(ptr); 186 } 187 #endif 188 189 /** 190 * Initialize process private data structure. 191 * 192 * @param dev 193 * Pointer to Ethernet device structure. 194 * 195 * @return 196 * 0 on success, a negative errno value otherwise and rte_errno is set. 197 */ 198 int 199 mlx4_proc_priv_init(struct rte_eth_dev *dev) 200 { 201 struct mlx4_proc_priv *ppriv; 202 size_t ppriv_size; 203 204 mlx4_proc_priv_uninit(dev); 205 /* 206 * UAR register table follows the process private structure. BlueFlame 207 * registers for Tx queues are stored in the table. 208 */ 209 ppriv_size = sizeof(struct mlx4_proc_priv) + 210 dev->data->nb_tx_queues * sizeof(void *); 211 ppriv = rte_zmalloc_socket("mlx4_proc_priv", ppriv_size, 212 RTE_CACHE_LINE_SIZE, dev->device->numa_node); 213 if (!ppriv) { 214 rte_errno = ENOMEM; 215 return -rte_errno; 216 } 217 ppriv->uar_table_sz = dev->data->nb_tx_queues; 218 dev->process_private = ppriv; 219 return 0; 220 } 221 222 /** 223 * Un-initialize process private data structure. 224 * 225 * @param dev 226 * Pointer to Ethernet device structure. 227 */ 228 void 229 mlx4_proc_priv_uninit(struct rte_eth_dev *dev) 230 { 231 if (!dev->process_private) 232 return; 233 rte_free(dev->process_private); 234 dev->process_private = NULL; 235 } 236 237 /** 238 * DPDK callback for Ethernet device configuration. 239 * 240 * @param dev 241 * Pointer to Ethernet device structure. 242 * 243 * @return 244 * 0 on success, negative errno value otherwise and rte_errno is set. 245 */ 246 static int 247 mlx4_dev_configure(struct rte_eth_dev *dev) 248 { 249 struct mlx4_priv *priv = dev->data->dev_private; 250 struct rte_flow_error error; 251 int ret; 252 253 /* Prepare internal flow rules. */ 254 ret = mlx4_flow_sync(priv, &error); 255 if (ret) { 256 ERROR("cannot set up internal flow rules (code %d, \"%s\")," 257 " flow error type %d, cause %p, message: %s", 258 -ret, strerror(-ret), error.type, error.cause, 259 error.message ? error.message : "(unspecified)"); 260 goto exit; 261 } 262 ret = mlx4_intr_install(priv); 263 if (ret) { 264 ERROR("%p: interrupt handler installation failed", 265 (void *)dev); 266 goto exit; 267 } 268 ret = mlx4_proc_priv_init(dev); 269 if (ret) { 270 ERROR("%p: process private data allocation failed", 271 (void *)dev); 272 goto exit; 273 } 274 exit: 275 return ret; 276 } 277 278 /** 279 * DPDK callback to start the device. 280 * 281 * Simulate device start by initializing common RSS resources and attaching 282 * all configured flows. 283 * 284 * @param dev 285 * Pointer to Ethernet device structure. 286 * 287 * @return 288 * 0 on success, negative errno value otherwise and rte_errno is set. 289 */ 290 static int 291 mlx4_dev_start(struct rte_eth_dev *dev) 292 { 293 struct mlx4_priv *priv = dev->data->dev_private; 294 struct rte_flow_error error; 295 uint16_t i; 296 int ret; 297 298 if (priv->started) 299 return 0; 300 DEBUG("%p: attaching configured flows to all RX queues", (void *)dev); 301 priv->started = 1; 302 ret = mlx4_rss_init(priv); 303 if (ret) { 304 ERROR("%p: cannot initialize RSS resources: %s", 305 (void *)dev, strerror(-ret)); 306 goto err; 307 } 308 #ifdef RTE_LIBRTE_MLX4_DEBUG 309 mlx4_mr_dump_dev(dev); 310 #endif 311 ret = mlx4_rxq_intr_enable(priv); 312 if (ret) { 313 ERROR("%p: interrupt handler installation failed", 314 (void *)dev); 315 goto err; 316 } 317 ret = mlx4_flow_sync(priv, &error); 318 if (ret) { 319 ERROR("%p: cannot attach flow rules (code %d, \"%s\")," 320 " flow error type %d, cause %p, message: %s", 321 (void *)dev, 322 -ret, strerror(-ret), error.type, error.cause, 323 error.message ? error.message : "(unspecified)"); 324 goto err; 325 } 326 rte_wmb(); 327 dev->tx_pkt_burst = mlx4_tx_burst; 328 dev->rx_pkt_burst = mlx4_rx_burst; 329 /* Enable datapath on secondary process. */ 330 mlx4_mp_req_start_rxtx(dev); 331 332 for (i = 0; i < dev->data->nb_rx_queues; i++) 333 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 334 for (i = 0; i < dev->data->nb_tx_queues; i++) 335 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED; 336 337 return 0; 338 err: 339 mlx4_dev_stop(dev); 340 return ret; 341 } 342 343 /** 344 * DPDK callback to stop the device. 345 * 346 * Simulate device stop by detaching all configured flows. 347 * 348 * @param dev 349 * Pointer to Ethernet device structure. 350 */ 351 static int 352 mlx4_dev_stop(struct rte_eth_dev *dev) 353 { 354 struct mlx4_priv *priv = dev->data->dev_private; 355 uint16_t i; 356 357 if (!priv->started) 358 return 0; 359 DEBUG("%p: detaching flows from all RX queues", (void *)dev); 360 priv->started = 0; 361 dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 362 dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 363 rte_wmb(); 364 /* Disable datapath on secondary process. */ 365 mlx4_mp_req_stop_rxtx(dev); 366 mlx4_flow_sync(priv, NULL); 367 mlx4_rxq_intr_disable(priv); 368 mlx4_rss_deinit(priv); 369 370 for (i = 0; i < dev->data->nb_rx_queues; i++) 371 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 372 for (i = 0; i < dev->data->nb_tx_queues; i++) 373 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED; 374 375 return 0; 376 } 377 378 /** 379 * DPDK callback to close the device. 380 * 381 * Destroy all queues and objects, free memory. 382 * 383 * @param dev 384 * Pointer to Ethernet device structure. 385 */ 386 static int 387 mlx4_dev_close(struct rte_eth_dev *dev) 388 { 389 struct mlx4_priv *priv = dev->data->dev_private; 390 unsigned int i; 391 392 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 393 rte_eth_dev_release_port(dev); 394 return 0; 395 } 396 DEBUG("%p: closing device \"%s\"", 397 (void *)dev, 398 ((priv->ctx != NULL) ? priv->ctx->device->name : "")); 399 dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 400 dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 401 rte_wmb(); 402 /* Disable datapath on secondary process. */ 403 mlx4_mp_req_stop_rxtx(dev); 404 mlx4_flow_clean(priv); 405 mlx4_rss_deinit(priv); 406 for (i = 0; i != dev->data->nb_rx_queues; ++i) 407 mlx4_rx_queue_release(dev, i); 408 for (i = 0; i != dev->data->nb_tx_queues; ++i) 409 mlx4_tx_queue_release(dev, i); 410 mlx4_proc_priv_uninit(dev); 411 mlx4_mr_release(dev); 412 if (priv->pd != NULL) { 413 MLX4_ASSERT(priv->ctx != NULL); 414 claim_zero(mlx4_glue->dealloc_pd(priv->pd)); 415 claim_zero(mlx4_glue->close_device(priv->ctx)); 416 } else 417 MLX4_ASSERT(priv->ctx == NULL); 418 mlx4_intr_uninstall(priv); 419 memset(priv, 0, sizeof(*priv)); 420 /* mac_addrs must not be freed because part of dev_private */ 421 dev->data->mac_addrs = NULL; 422 return 0; 423 } 424 425 static const struct eth_dev_ops mlx4_dev_ops = { 426 .dev_configure = mlx4_dev_configure, 427 .dev_start = mlx4_dev_start, 428 .dev_stop = mlx4_dev_stop, 429 .dev_set_link_down = mlx4_dev_set_link_down, 430 .dev_set_link_up = mlx4_dev_set_link_up, 431 .dev_close = mlx4_dev_close, 432 .link_update = mlx4_link_update, 433 .promiscuous_enable = mlx4_promiscuous_enable, 434 .promiscuous_disable = mlx4_promiscuous_disable, 435 .allmulticast_enable = mlx4_allmulticast_enable, 436 .allmulticast_disable = mlx4_allmulticast_disable, 437 .mac_addr_remove = mlx4_mac_addr_remove, 438 .mac_addr_add = mlx4_mac_addr_add, 439 .mac_addr_set = mlx4_mac_addr_set, 440 .set_mc_addr_list = mlx4_set_mc_addr_list, 441 .stats_get = mlx4_stats_get, 442 .stats_reset = mlx4_stats_reset, 443 .fw_version_get = mlx4_fw_version_get, 444 .dev_infos_get = mlx4_dev_infos_get, 445 .dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get, 446 .vlan_filter_set = mlx4_vlan_filter_set, 447 .rx_queue_setup = mlx4_rx_queue_setup, 448 .tx_queue_setup = mlx4_tx_queue_setup, 449 .rx_queue_release = mlx4_rx_queue_release, 450 .tx_queue_release = mlx4_tx_queue_release, 451 .flow_ctrl_get = mlx4_flow_ctrl_get, 452 .flow_ctrl_set = mlx4_flow_ctrl_set, 453 .mtu_set = mlx4_mtu_set, 454 .flow_ops_get = mlx4_flow_ops_get, 455 .rx_queue_intr_enable = mlx4_rx_intr_enable, 456 .rx_queue_intr_disable = mlx4_rx_intr_disable, 457 .is_removed = mlx4_is_removed, 458 }; 459 460 /* Available operations from secondary process. */ 461 static const struct eth_dev_ops mlx4_dev_sec_ops = { 462 .stats_get = mlx4_stats_get, 463 .stats_reset = mlx4_stats_reset, 464 .fw_version_get = mlx4_fw_version_get, 465 .dev_infos_get = mlx4_dev_infos_get, 466 }; 467 468 /** 469 * Get PCI information from struct ibv_device. 470 * 471 * @param device 472 * Pointer to Ethernet device structure. 473 * @param[out] pci_addr 474 * PCI bus address output buffer. 475 * 476 * @return 477 * 0 on success, negative errno value otherwise and rte_errno is set. 478 */ 479 static int 480 mlx4_ibv_device_to_pci_addr(const struct ibv_device *device, 481 struct rte_pci_addr *pci_addr) 482 { 483 FILE *file; 484 char line[32]; 485 MKSTR(path, "%s/device/uevent", device->ibdev_path); 486 487 file = fopen(path, "rb"); 488 if (file == NULL) { 489 rte_errno = errno; 490 return -rte_errno; 491 } 492 while (fgets(line, sizeof(line), file) == line) { 493 size_t len = strlen(line); 494 int ret; 495 496 /* Truncate long lines. */ 497 if (len == (sizeof(line) - 1)) 498 while (line[(len - 1)] != '\n') { 499 ret = fgetc(file); 500 if (ret == EOF) 501 break; 502 line[(len - 1)] = ret; 503 } 504 /* Extract information. */ 505 if (sscanf(line, 506 "PCI_SLOT_NAME=" 507 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 508 &pci_addr->domain, 509 &pci_addr->bus, 510 &pci_addr->devid, 511 &pci_addr->function) == 4) { 512 break; 513 } 514 } 515 fclose(file); 516 return 0; 517 } 518 519 /** 520 * Verify and store value for device argument. 521 * 522 * @param[in] key 523 * Key argument to verify. 524 * @param[in] val 525 * Value associated with key. 526 * @param[in, out] conf 527 * Shared configuration data. 528 * 529 * @return 530 * 0 on success, negative errno value otherwise and rte_errno is set. 531 */ 532 static int 533 mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf) 534 { 535 unsigned long tmp; 536 537 errno = 0; 538 tmp = strtoul(val, NULL, 0); 539 if (errno) { 540 rte_errno = errno; 541 WARN("%s: \"%s\" is not a valid integer", key, val); 542 return -rte_errno; 543 } 544 if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) { 545 uint32_t ports = rte_log2_u32(conf->ports.present + 1); 546 547 if (tmp >= ports) { 548 ERROR("port index %lu outside range [0,%" PRIu32 ")", 549 tmp, ports); 550 return -EINVAL; 551 } 552 if (!(conf->ports.present & (1 << tmp))) { 553 rte_errno = EINVAL; 554 ERROR("invalid port index %lu", tmp); 555 return -rte_errno; 556 } 557 conf->ports.enabled |= 1 << tmp; 558 } else if (strcmp(MLX4_MR_EXT_MEMSEG_EN_KVARG, key) == 0) { 559 conf->mr_ext_memseg_en = !!tmp; 560 } else { 561 rte_errno = EINVAL; 562 WARN("%s: unknown parameter", key); 563 return -rte_errno; 564 } 565 return 0; 566 } 567 568 /** 569 * Parse device parameters. 570 * 571 * @param devargs 572 * Device arguments structure. 573 * 574 * @return 575 * 0 on success, negative errno value otherwise and rte_errno is set. 576 */ 577 static int 578 mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf) 579 { 580 struct rte_kvargs *kvlist; 581 unsigned int arg_count; 582 int ret = 0; 583 int i; 584 585 if (devargs == NULL) 586 return 0; 587 kvlist = rte_kvargs_parse(devargs->args, pmd_mlx4_init_params); 588 if (kvlist == NULL) { 589 rte_errno = EINVAL; 590 ERROR("failed to parse kvargs"); 591 return -rte_errno; 592 } 593 /* Process parameters. */ 594 for (i = 0; pmd_mlx4_init_params[i]; ++i) { 595 arg_count = rte_kvargs_count(kvlist, pmd_mlx4_init_params[i]); 596 while (arg_count-- > 0) { 597 ret = rte_kvargs_process(kvlist, 598 pmd_mlx4_init_params[i], 599 (int (*)(const char *, 600 const char *, 601 void *)) 602 mlx4_arg_parse, 603 conf); 604 if (ret != 0) 605 goto free_kvlist; 606 } 607 } 608 free_kvlist: 609 rte_kvargs_free(kvlist); 610 return ret; 611 } 612 613 /** 614 * Interpret RSS capabilities reported by device. 615 * 616 * This function returns the set of usable Verbs RSS hash fields, kernel 617 * quirks taken into account. 618 * 619 * @param ctx 620 * Verbs context. 621 * @param pd 622 * Verbs protection domain. 623 * @param device_attr_ex 624 * Extended device attributes to interpret. 625 * 626 * @return 627 * Usable RSS hash fields mask in Verbs format. 628 */ 629 static uint64_t 630 mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd, 631 struct ibv_device_attr_ex *device_attr_ex) 632 { 633 uint64_t hw_rss_sup = device_attr_ex->rss_caps.rx_hash_fields_mask; 634 struct ibv_cq *cq = NULL; 635 struct ibv_wq *wq = NULL; 636 struct ibv_rwq_ind_table *ind = NULL; 637 struct ibv_qp *qp = NULL; 638 639 if (!hw_rss_sup) { 640 WARN("no RSS capabilities reported; disabling support for UDP" 641 " RSS and inner VXLAN RSS"); 642 return IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4 | 643 IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6 | 644 IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP; 645 } 646 if (!(hw_rss_sup & IBV_RX_HASH_INNER)) 647 return hw_rss_sup; 648 /* 649 * Although reported as supported, missing code in some Linux 650 * versions (v4.15, v4.16) prevents the creation of hash QPs with 651 * inner capability. 652 * 653 * There is no choice but to attempt to instantiate a temporary RSS 654 * context in order to confirm its support. 655 */ 656 cq = mlx4_glue->create_cq(ctx, 1, NULL, NULL, 0); 657 wq = cq ? mlx4_glue->create_wq 658 (ctx, 659 &(struct ibv_wq_init_attr){ 660 .wq_type = IBV_WQT_RQ, 661 .max_wr = 1, 662 .max_sge = 1, 663 .pd = pd, 664 .cq = cq, 665 }) : NULL; 666 ind = wq ? mlx4_glue->create_rwq_ind_table 667 (ctx, 668 &(struct ibv_rwq_ind_table_init_attr){ 669 .log_ind_tbl_size = 0, 670 .ind_tbl = &wq, 671 .comp_mask = 0, 672 }) : NULL; 673 qp = ind ? mlx4_glue->create_qp_ex 674 (ctx, 675 &(struct ibv_qp_init_attr_ex){ 676 .comp_mask = 677 (IBV_QP_INIT_ATTR_PD | 678 IBV_QP_INIT_ATTR_RX_HASH | 679 IBV_QP_INIT_ATTR_IND_TABLE), 680 .qp_type = IBV_QPT_RAW_PACKET, 681 .pd = pd, 682 .rwq_ind_tbl = ind, 683 .rx_hash_conf = { 684 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 685 .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE, 686 .rx_hash_key = mlx4_rss_hash_key_default, 687 .rx_hash_fields_mask = hw_rss_sup, 688 }, 689 }) : NULL; 690 if (!qp) { 691 WARN("disabling unusable inner RSS capability due to kernel" 692 " quirk"); 693 hw_rss_sup &= ~IBV_RX_HASH_INNER; 694 } else { 695 claim_zero(mlx4_glue->destroy_qp(qp)); 696 } 697 if (ind) 698 claim_zero(mlx4_glue->destroy_rwq_ind_table(ind)); 699 if (wq) 700 claim_zero(mlx4_glue->destroy_wq(wq)); 701 if (cq) 702 claim_zero(mlx4_glue->destroy_cq(cq)); 703 return hw_rss_sup; 704 } 705 706 static struct rte_pci_driver mlx4_driver; 707 708 /** 709 * PMD global initialization. 710 * 711 * Independent from individual device, this function initializes global 712 * per-PMD data structures distinguishing primary and secondary processes. 713 * Hence, each initialization is called once per a process. 714 * 715 * @return 716 * 0 on success, a negative errno value otherwise and rte_errno is set. 717 */ 718 static int 719 mlx4_init_once(void) 720 { 721 struct mlx4_shared_data *sd; 722 struct mlx4_local_data *ld = &mlx4_local_data; 723 int ret = 0; 724 725 if (mlx4_init_shared_data()) 726 return -rte_errno; 727 sd = mlx4_shared_data; 728 MLX4_ASSERT(sd); 729 rte_spinlock_lock(&sd->lock); 730 switch (rte_eal_process_type()) { 731 case RTE_PROC_PRIMARY: 732 if (sd->init_done) 733 break; 734 LIST_INIT(&sd->mem_event_cb_list); 735 rte_rwlock_init(&sd->mem_event_rwlock); 736 rte_mem_event_callback_register("MLX4_MEM_EVENT_CB", 737 mlx4_mr_mem_event_cb, NULL); 738 ret = mlx4_mp_init_primary(); 739 if (ret) 740 goto out; 741 sd->init_done = 1; 742 break; 743 case RTE_PROC_SECONDARY: 744 if (ld->init_done) 745 break; 746 ret = mlx4_mp_init_secondary(); 747 if (ret) 748 goto out; 749 ++sd->secondary_cnt; 750 ld->init_done = 1; 751 break; 752 default: 753 break; 754 } 755 out: 756 rte_spinlock_unlock(&sd->lock); 757 return ret; 758 } 759 760 /** 761 * DPDK callback to register a PCI device. 762 * 763 * This function creates an Ethernet device for each port of a given 764 * PCI device. 765 * 766 * @param[in] pci_drv 767 * PCI driver structure (mlx4_driver). 768 * @param[in] pci_dev 769 * PCI device information. 770 * 771 * @return 772 * 0 on success, negative errno value otherwise and rte_errno is set. 773 */ 774 static int 775 mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 776 { 777 struct ibv_device **list; 778 struct ibv_device *ibv_dev; 779 int err = 0; 780 struct ibv_context *attr_ctx = NULL; 781 struct ibv_device_attr device_attr; 782 struct ibv_device_attr_ex device_attr_ex; 783 struct rte_eth_dev *prev_dev = NULL; 784 struct mlx4_conf conf = { 785 .ports.present = 0, 786 .mr_ext_memseg_en = 1, 787 }; 788 unsigned int vf; 789 int i; 790 char ifname[IF_NAMESIZE]; 791 792 (void)pci_drv; 793 err = mlx4_init_once(); 794 if (err) { 795 ERROR("unable to init PMD global data: %s", 796 strerror(rte_errno)); 797 return -rte_errno; 798 } 799 MLX4_ASSERT(pci_drv == &mlx4_driver); 800 list = mlx4_glue->get_device_list(&i); 801 if (list == NULL) { 802 rte_errno = errno; 803 MLX4_ASSERT(rte_errno); 804 if (rte_errno == ENOSYS) 805 ERROR("cannot list devices, is ib_uverbs loaded?"); 806 return -rte_errno; 807 } 808 MLX4_ASSERT(i >= 0); 809 /* 810 * For each listed device, check related sysfs entry against 811 * the provided PCI ID. 812 */ 813 while (i != 0) { 814 struct rte_pci_addr pci_addr; 815 816 --i; 817 DEBUG("checking device \"%s\"", list[i]->name); 818 if (mlx4_ibv_device_to_pci_addr(list[i], &pci_addr)) 819 continue; 820 if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0) 821 continue; 822 vf = (pci_dev->id.device_id == 823 PCI_DEVICE_ID_MELLANOX_CONNECTX3VF); 824 INFO("PCI information matches, using device \"%s\" (VF: %s)", 825 list[i]->name, (vf ? "true" : "false")); 826 attr_ctx = mlx4_glue->open_device(list[i]); 827 err = errno; 828 break; 829 } 830 if (attr_ctx == NULL) { 831 mlx4_glue->free_device_list(list); 832 switch (err) { 833 case 0: 834 rte_errno = ENODEV; 835 ERROR("cannot access device, is mlx4_ib loaded?"); 836 return -rte_errno; 837 case EINVAL: 838 rte_errno = EINVAL; 839 ERROR("cannot use device, are drivers up to date?"); 840 return -rte_errno; 841 } 842 MLX4_ASSERT(err > 0); 843 rte_errno = err; 844 return -rte_errno; 845 } 846 ibv_dev = list[i]; 847 DEBUG("device opened"); 848 if (mlx4_glue->query_device(attr_ctx, &device_attr)) { 849 err = ENODEV; 850 goto error; 851 } 852 INFO("%u port(s) detected", device_attr.phys_port_cnt); 853 conf.ports.present |= (UINT64_C(1) << device_attr.phys_port_cnt) - 1; 854 if (mlx4_args(pci_dev->device.devargs, &conf)) { 855 ERROR("failed to process device arguments"); 856 err = EINVAL; 857 goto error; 858 } 859 /* Use all ports when none are defined */ 860 if (!conf.ports.enabled) 861 conf.ports.enabled = conf.ports.present; 862 /* Retrieve extended device attributes. */ 863 if (mlx4_glue->query_device_ex(attr_ctx, NULL, &device_attr_ex)) { 864 err = ENODEV; 865 goto error; 866 } 867 MLX4_ASSERT(device_attr.max_sge >= MLX4_MAX_SGE); 868 for (i = 0; i < device_attr.phys_port_cnt; i++) { 869 uint32_t port = i + 1; /* ports are indexed from one */ 870 struct ibv_context *ctx = NULL; 871 struct ibv_port_attr port_attr; 872 struct ibv_pd *pd = NULL; 873 struct mlx4_priv *priv = NULL; 874 struct rte_eth_dev *eth_dev = NULL; 875 struct rte_ether_addr mac; 876 char name[RTE_ETH_NAME_MAX_LEN]; 877 878 /* If port is not enabled, skip. */ 879 if (!(conf.ports.enabled & (1 << i))) 880 continue; 881 DEBUG("using port %u", port); 882 ctx = mlx4_glue->open_device(ibv_dev); 883 if (ctx == NULL) { 884 err = ENODEV; 885 goto port_error; 886 } 887 snprintf(name, sizeof(name), "%s port %u", 888 mlx4_glue->get_device_name(ibv_dev), port); 889 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 890 int fd; 891 892 eth_dev = rte_eth_dev_attach_secondary(name); 893 if (eth_dev == NULL) { 894 ERROR("can not attach rte ethdev"); 895 rte_errno = ENOMEM; 896 err = rte_errno; 897 goto err_secondary; 898 } 899 priv = eth_dev->data->dev_private; 900 if (!priv->verbs_alloc_ctx.enabled) { 901 ERROR("secondary process is not supported" 902 " due to lack of external allocator" 903 " from Verbs"); 904 rte_errno = ENOTSUP; 905 err = rte_errno; 906 goto err_secondary; 907 } 908 eth_dev->device = &pci_dev->device; 909 eth_dev->dev_ops = &mlx4_dev_sec_ops; 910 err = mlx4_proc_priv_init(eth_dev); 911 if (err) 912 goto err_secondary; 913 /* Receive command fd from primary process. */ 914 fd = mlx4_mp_req_verbs_cmd_fd(eth_dev); 915 if (fd < 0) { 916 err = rte_errno; 917 goto err_secondary; 918 } 919 /* Remap UAR for Tx queues. */ 920 err = mlx4_tx_uar_init_secondary(eth_dev, fd); 921 close(fd); 922 if (err) { 923 err = rte_errno; 924 goto err_secondary; 925 } 926 /* 927 * Ethdev pointer is still required as input since 928 * the primary device is not accessible from the 929 * secondary process. 930 */ 931 eth_dev->tx_pkt_burst = mlx4_tx_burst; 932 eth_dev->rx_pkt_burst = mlx4_rx_burst; 933 claim_zero(mlx4_glue->close_device(ctx)); 934 rte_eth_copy_pci_info(eth_dev, pci_dev); 935 rte_eth_dev_probing_finish(eth_dev); 936 prev_dev = eth_dev; 937 continue; 938 err_secondary: 939 claim_zero(mlx4_glue->close_device(ctx)); 940 rte_eth_dev_release_port(eth_dev); 941 if (prev_dev) 942 rte_eth_dev_release_port(prev_dev); 943 break; 944 } 945 /* Check port status. */ 946 err = mlx4_glue->query_port(ctx, port, &port_attr); 947 if (err) { 948 err = ENODEV; 949 ERROR("port query failed: %s", strerror(err)); 950 goto port_error; 951 } 952 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 953 err = ENOTSUP; 954 ERROR("port %d is not configured in Ethernet mode", 955 port); 956 goto port_error; 957 } 958 if (port_attr.state != IBV_PORT_ACTIVE) 959 DEBUG("port %d is not active: \"%s\" (%d)", 960 port, mlx4_glue->port_state_str(port_attr.state), 961 port_attr.state); 962 /* Make asynchronous FD non-blocking to handle interrupts. */ 963 err = mlx4_fd_set_non_blocking(ctx->async_fd); 964 if (err) { 965 ERROR("cannot make asynchronous FD non-blocking: %s", 966 strerror(err)); 967 goto port_error; 968 } 969 /* Allocate protection domain. */ 970 pd = mlx4_glue->alloc_pd(ctx); 971 if (pd == NULL) { 972 err = ENOMEM; 973 ERROR("PD allocation failure"); 974 goto port_error; 975 } 976 /* from rte_ethdev.c */ 977 priv = rte_zmalloc("ethdev private structure", 978 sizeof(*priv), 979 RTE_CACHE_LINE_SIZE); 980 if (priv == NULL) { 981 err = ENOMEM; 982 ERROR("priv allocation failure"); 983 goto port_error; 984 } 985 priv->ctx = ctx; 986 priv->device_attr = device_attr; 987 priv->port = port; 988 priv->pd = pd; 989 priv->mtu = RTE_ETHER_MTU; 990 priv->vf = vf; 991 priv->hw_csum = !!(device_attr.device_cap_flags & 992 IBV_DEVICE_RAW_IP_CSUM); 993 DEBUG("checksum offloading is %ssupported", 994 (priv->hw_csum ? "" : "not ")); 995 /* Only ConnectX-3 Pro supports tunneling. */ 996 priv->hw_csum_l2tun = 997 priv->hw_csum && 998 (device_attr.vendor_part_id == 999 PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO); 1000 DEBUG("L2 tunnel checksum offloads are %ssupported", 1001 priv->hw_csum_l2tun ? "" : "not "); 1002 priv->hw_rss_sup = mlx4_hw_rss_sup(priv->ctx, priv->pd, 1003 &device_attr_ex); 1004 DEBUG("supported RSS hash fields mask: %016" PRIx64, 1005 priv->hw_rss_sup); 1006 priv->hw_rss_max_qps = 1007 device_attr_ex.rss_caps.max_rwq_indirection_table_size; 1008 DEBUG("MAX RSS queues %d", priv->hw_rss_max_qps); 1009 priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps & 1010 IBV_RAW_PACKET_CAP_SCATTER_FCS); 1011 DEBUG("FCS stripping toggling is %ssupported", 1012 priv->hw_fcs_strip ? "" : "not "); 1013 priv->tso = 1014 ((device_attr_ex.tso_caps.max_tso > 0) && 1015 (device_attr_ex.tso_caps.supported_qpts & 1016 (1 << IBV_QPT_RAW_PACKET))); 1017 if (priv->tso) 1018 priv->tso_max_payload_sz = 1019 device_attr_ex.tso_caps.max_tso; 1020 DEBUG("TSO is %ssupported", 1021 priv->tso ? "" : "not "); 1022 priv->mr_ext_memseg_en = conf.mr_ext_memseg_en; 1023 /* Configure the first MAC address by default. */ 1024 err = mlx4_get_mac(priv, &mac.addr_bytes); 1025 if (err) { 1026 ERROR("cannot get MAC address, is mlx4_en loaded?" 1027 " (error: %s)", strerror(err)); 1028 goto port_error; 1029 } 1030 INFO("port %u MAC address is " RTE_ETHER_ADDR_PRT_FMT, 1031 priv->port, RTE_ETHER_ADDR_BYTES(&mac)); 1032 /* Register MAC address. */ 1033 priv->mac[0] = mac; 1034 1035 if (mlx4_get_ifname(priv, &ifname) == 0) { 1036 DEBUG("port %u ifname is \"%s\"", 1037 priv->port, ifname); 1038 priv->if_index = if_nametoindex(ifname); 1039 } else { 1040 DEBUG("port %u ifname is unknown", priv->port); 1041 } 1042 1043 /* Get actual MTU if possible. */ 1044 mlx4_mtu_get(priv, &priv->mtu); 1045 DEBUG("port %u MTU is %u", priv->port, priv->mtu); 1046 eth_dev = rte_eth_dev_allocate(name); 1047 if (eth_dev == NULL) { 1048 err = ENOMEM; 1049 ERROR("can not allocate rte ethdev"); 1050 goto port_error; 1051 } 1052 eth_dev->data->dev_private = priv; 1053 eth_dev->data->mac_addrs = priv->mac; 1054 eth_dev->device = &pci_dev->device; 1055 rte_eth_copy_pci_info(eth_dev, pci_dev); 1056 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1057 /* Initialize local interrupt handle for current port. */ 1058 priv->intr_handle = 1059 rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 1060 if (priv->intr_handle == NULL) { 1061 ERROR("can not allocate intr_handle"); 1062 goto port_error; 1063 } 1064 1065 if (rte_intr_fd_set(priv->intr_handle, -1)) 1066 goto port_error; 1067 1068 if (rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT)) 1069 goto port_error; 1070 1071 /* 1072 * Override ethdev interrupt handle pointer with private 1073 * handle instead of that of the parent PCI device used by 1074 * default. This prevents it from being shared between all 1075 * ports of the same PCI device since each of them is 1076 * associated its own Verbs context. 1077 * 1078 * Rx interrupts in particular require this as the PMD has 1079 * no control over the registration of queue interrupts 1080 * besides setting up eth_dev->intr_handle, the rest is 1081 * handled by rte_intr_rx_ctl(). 1082 */ 1083 eth_dev->intr_handle = priv->intr_handle; 1084 priv->dev_data = eth_dev->data; 1085 eth_dev->dev_ops = &mlx4_dev_ops; 1086 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS 1087 /* Hint libmlx4 to use PMD allocator for data plane resources */ 1088 err = mlx4_glue->dv_set_context_attr 1089 (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS, 1090 (void *)((uintptr_t)&(struct mlx4dv_ctx_allocators){ 1091 .alloc = &mlx4_alloc_verbs_buf, 1092 .free = &mlx4_free_verbs_buf, 1093 .data = priv, 1094 })); 1095 if (err) 1096 WARN("Verbs external allocator is not supported"); 1097 else 1098 priv->verbs_alloc_ctx.enabled = 1; 1099 #endif 1100 /* Bring Ethernet device up. */ 1101 DEBUG("forcing Ethernet interface up"); 1102 mlx4_dev_set_link_up(eth_dev); 1103 /* Update link status once if waiting for LSC. */ 1104 if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1105 mlx4_link_update(eth_dev, 0); 1106 /* 1107 * Once the device is added to the list of memory event 1108 * callback, its global MR cache table cannot be expanded 1109 * on the fly because of deadlock. If it overflows, lookup 1110 * should be done by searching MR list linearly, which is slow. 1111 */ 1112 err = mlx4_mr_btree_init(&priv->mr.cache, 1113 MLX4_MR_BTREE_CACHE_N * 2, 1114 eth_dev->device->numa_node); 1115 if (err) { 1116 /* rte_errno is already set. */ 1117 goto port_error; 1118 } 1119 /* Add device to memory callback list. */ 1120 rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock); 1121 LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list, 1122 priv, mem_event_cb); 1123 rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock); 1124 rte_eth_dev_probing_finish(eth_dev); 1125 prev_dev = eth_dev; 1126 continue; 1127 port_error: 1128 if (priv != NULL) 1129 rte_intr_instance_free(priv->intr_handle); 1130 rte_free(priv); 1131 if (eth_dev != NULL) 1132 eth_dev->data->dev_private = NULL; 1133 if (pd) 1134 claim_zero(mlx4_glue->dealloc_pd(pd)); 1135 if (ctx) 1136 claim_zero(mlx4_glue->close_device(ctx)); 1137 if (eth_dev != NULL) { 1138 /* mac_addrs must not be freed because part of dev_private */ 1139 eth_dev->data->mac_addrs = NULL; 1140 rte_eth_dev_release_port(eth_dev); 1141 } 1142 if (prev_dev) 1143 mlx4_dev_close(prev_dev); 1144 break; 1145 } 1146 error: 1147 if (attr_ctx) 1148 claim_zero(mlx4_glue->close_device(attr_ctx)); 1149 if (list) 1150 mlx4_glue->free_device_list(list); 1151 if (err) 1152 rte_errno = err; 1153 return -err; 1154 } 1155 1156 /** 1157 * DPDK callback to remove a PCI device. 1158 * 1159 * This function removes all Ethernet devices belong to a given PCI device. 1160 * 1161 * @param[in] pci_dev 1162 * Pointer to the PCI device. 1163 * 1164 * @return 1165 * 0 on success, the function cannot fail. 1166 */ 1167 static int 1168 mlx4_pci_remove(struct rte_pci_device *pci_dev) 1169 { 1170 uint16_t port_id; 1171 int ret = 0; 1172 1173 RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) { 1174 /* 1175 * mlx4_dev_close() is not registered to secondary process, 1176 * call the close function explicitly for secondary process. 1177 */ 1178 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 1179 ret |= mlx4_dev_close(&rte_eth_devices[port_id]); 1180 else 1181 ret |= rte_eth_dev_close(port_id); 1182 } 1183 return ret == 0 ? 0 : -EIO; 1184 } 1185 1186 static const struct rte_pci_id mlx4_pci_id_map[] = { 1187 { 1188 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1189 PCI_DEVICE_ID_MELLANOX_CONNECTX3) 1190 }, 1191 { 1192 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1193 PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO) 1194 }, 1195 { 1196 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1197 PCI_DEVICE_ID_MELLANOX_CONNECTX3VF) 1198 }, 1199 { 1200 .vendor_id = 0 1201 } 1202 }; 1203 1204 static struct rte_pci_driver mlx4_driver = { 1205 .driver = { 1206 .name = MLX4_DRIVER_NAME 1207 }, 1208 .id_table = mlx4_pci_id_map, 1209 .probe = mlx4_pci_probe, 1210 .remove = mlx4_pci_remove, 1211 .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, 1212 }; 1213 1214 #ifdef RTE_IBVERBS_LINK_DLOPEN 1215 1216 /** 1217 * Suffix RTE_EAL_PMD_PATH with "-glue". 1218 * 1219 * This function performs a sanity check on RTE_EAL_PMD_PATH before 1220 * suffixing its last component. 1221 * 1222 * @param buf[out] 1223 * Output buffer, should be large enough otherwise NULL is returned. 1224 * @param size 1225 * Size of @p out. 1226 * 1227 * @return 1228 * Pointer to @p buf or @p NULL in case suffix cannot be appended. 1229 */ 1230 static char * 1231 mlx4_glue_path(char *buf, size_t size) 1232 { 1233 static const char *const bad[] = { "/", ".", "..", NULL }; 1234 const char *path = RTE_EAL_PMD_PATH; 1235 size_t len = strlen(path); 1236 size_t off; 1237 int i; 1238 1239 while (len && path[len - 1] == '/') 1240 --len; 1241 for (off = len; off && path[off - 1] != '/'; --off) 1242 ; 1243 for (i = 0; bad[i]; ++i) 1244 if (!strncmp(path + off, bad[i], (int)(len - off))) 1245 goto error; 1246 i = snprintf(buf, size, "%.*s-glue", (int)len, path); 1247 if (i == -1 || (size_t)i >= size) 1248 goto error; 1249 return buf; 1250 error: 1251 ERROR("unable to append \"-glue\" to last component of" 1252 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\")," 1253 " please re-configure DPDK"); 1254 return NULL; 1255 } 1256 1257 /** 1258 * Initialization routine for run-time dependency on rdma-core. 1259 */ 1260 static int 1261 mlx4_glue_init(void) 1262 { 1263 char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 1264 const char *path[] = { 1265 /* 1266 * A basic security check is necessary before trusting 1267 * MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 1268 */ 1269 (geteuid() == getuid() && getegid() == getgid() ? 1270 getenv("MLX4_GLUE_PATH") : NULL), 1271 /* 1272 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 1273 * variant, otherwise let dlopen() look up libraries on its 1274 * own. 1275 */ 1276 (*RTE_EAL_PMD_PATH ? 1277 mlx4_glue_path(glue_path, sizeof(glue_path)) : ""), 1278 }; 1279 unsigned int i = 0; 1280 void *handle = NULL; 1281 void **sym; 1282 const char *dlmsg; 1283 1284 while (!handle && i != RTE_DIM(path)) { 1285 const char *end; 1286 size_t len; 1287 int ret; 1288 1289 if (!path[i]) { 1290 ++i; 1291 continue; 1292 } 1293 end = strpbrk(path[i], ":;"); 1294 if (!end) 1295 end = path[i] + strlen(path[i]); 1296 len = end - path[i]; 1297 ret = 0; 1298 do { 1299 char name[ret + 1]; 1300 1301 ret = snprintf(name, sizeof(name), "%.*s%s" MLX4_GLUE, 1302 (int)len, path[i], 1303 (!len || *(end - 1) == '/') ? "" : "/"); 1304 if (ret == -1) 1305 break; 1306 if (sizeof(name) != (size_t)ret + 1) 1307 continue; 1308 DEBUG("looking for rdma-core glue as \"%s\"", name); 1309 handle = dlopen(name, RTLD_LAZY); 1310 break; 1311 } while (1); 1312 path[i] = end + 1; 1313 if (!*end) 1314 ++i; 1315 } 1316 if (!handle) { 1317 rte_errno = EINVAL; 1318 dlmsg = dlerror(); 1319 if (dlmsg) 1320 WARN("cannot load glue library: %s", dlmsg); 1321 goto glue_error; 1322 } 1323 sym = dlsym(handle, "mlx4_glue"); 1324 if (!sym || !*sym) { 1325 rte_errno = EINVAL; 1326 dlmsg = dlerror(); 1327 if (dlmsg) 1328 ERROR("cannot resolve glue symbol: %s", dlmsg); 1329 goto glue_error; 1330 } 1331 mlx4_glue = *sym; 1332 return 0; 1333 glue_error: 1334 if (handle) 1335 dlclose(handle); 1336 WARN("cannot initialize PMD due to missing run-time" 1337 " dependency on rdma-core libraries (libibverbs," 1338 " libmlx4)"); 1339 return -rte_errno; 1340 } 1341 1342 #endif 1343 1344 /* Initialize driver log type. */ 1345 RTE_LOG_REGISTER_DEFAULT(mlx4_logtype, NOTICE) 1346 1347 /** 1348 * Driver initialization routine. 1349 */ 1350 RTE_INIT(rte_mlx4_pmd_init) 1351 { 1352 /* 1353 * MLX4_DEVICE_FATAL_CLEANUP tells ibv_destroy functions we 1354 * want to get success errno value in case of calling them 1355 * when the device was removed. 1356 */ 1357 setenv("MLX4_DEVICE_FATAL_CLEANUP", "1", 1); 1358 /* 1359 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 1360 * huge pages. Calling ibv_fork_init() during init allows 1361 * applications to use fork() safely for purposes other than 1362 * using this PMD, which is not supported in forked processes. 1363 */ 1364 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 1365 #ifdef RTE_IBVERBS_LINK_DLOPEN 1366 if (mlx4_glue_init()) 1367 return; 1368 MLX4_ASSERT(mlx4_glue); 1369 #endif 1370 #ifdef RTE_LIBRTE_MLX4_DEBUG 1371 /* Glue structure must not contain any NULL pointers. */ 1372 { 1373 unsigned int i; 1374 1375 for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i) 1376 MLX4_ASSERT(((const void *const *)mlx4_glue)[i]); 1377 } 1378 #endif 1379 if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) { 1380 ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required", 1381 mlx4_glue->version, MLX4_GLUE_VERSION); 1382 return; 1383 } 1384 mlx4_glue->fork_init(); 1385 rte_pci_register(&mlx4_driver); 1386 } 1387 1388 RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__); 1389 RTE_PMD_REGISTER_PCI_TABLE(net_mlx4, mlx4_pci_id_map); 1390 RTE_PMD_REGISTER_KMOD_DEP(net_mlx4, 1391 "* ib_uverbs & mlx4_en & mlx4_core & mlx4_ib"); 1392