1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2012 6WIND S.A. 3 * Copyright 2012 Mellanox Technologies, Ltd 4 */ 5 6 /** 7 * @file 8 * mlx4 driver initialization. 9 */ 10 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <stddef.h> 14 #include <stdint.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <sys/mman.h> 19 #include <unistd.h> 20 #ifdef RTE_IBVERBS_LINK_DLOPEN 21 #include <dlfcn.h> 22 #endif 23 24 /* Verbs headers do not support -pedantic. */ 25 #ifdef PEDANTIC 26 #pragma GCC diagnostic ignored "-Wpedantic" 27 #endif 28 #include <infiniband/verbs.h> 29 #ifdef PEDANTIC 30 #pragma GCC diagnostic error "-Wpedantic" 31 #endif 32 33 #include <rte_common.h> 34 #include <rte_dev.h> 35 #include <rte_errno.h> 36 #include <ethdev_driver.h> 37 #include <ethdev_pci.h> 38 #include <rte_ether.h> 39 #include <rte_flow.h> 40 #include <rte_interrupts.h> 41 #include <rte_kvargs.h> 42 #include <rte_malloc.h> 43 #include <rte_mbuf.h> 44 45 #include "mlx4.h" 46 #include "mlx4_glue.h" 47 #include "mlx4_flow.h" 48 #include "mlx4_mr.h" 49 #include "mlx4_rxtx.h" 50 #include "mlx4_utils.h" 51 52 #ifdef MLX4_GLUE 53 const struct mlx4_glue *mlx4_glue; 54 #endif 55 56 static const char *MZ_MLX4_PMD_SHARED_DATA = "mlx4_pmd_shared_data"; 57 58 /* Shared memory between primary and secondary processes. */ 59 struct mlx4_shared_data *mlx4_shared_data; 60 61 /* Spinlock for mlx4_shared_data allocation. */ 62 static rte_spinlock_t mlx4_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 63 64 /* Process local data for secondary processes. */ 65 static struct mlx4_local_data mlx4_local_data; 66 67 /** Configuration structure for device arguments. */ 68 struct mlx4_conf { 69 struct { 70 uint32_t present; /**< Bit-field for existing ports. */ 71 uint32_t enabled; /**< Bit-field for user-enabled ports. */ 72 } ports; 73 int mr_ext_memseg_en; 74 /** Whether memseg should be extended for MR creation. */ 75 }; 76 77 /* Available parameters list. */ 78 const char *pmd_mlx4_init_params[] = { 79 MLX4_PMD_PORT_KVARG, 80 MLX4_MR_EXT_MEMSEG_EN_KVARG, 81 NULL, 82 }; 83 84 static int mlx4_dev_stop(struct rte_eth_dev *dev); 85 86 /** 87 * Initialize shared data between primary and secondary process. 88 * 89 * A memzone is reserved by primary process and secondary processes attach to 90 * the memzone. 91 * 92 * @return 93 * 0 on success, a negative errno value otherwise and rte_errno is set. 94 */ 95 static int 96 mlx4_init_shared_data(void) 97 { 98 const struct rte_memzone *mz; 99 int ret = 0; 100 101 rte_spinlock_lock(&mlx4_shared_data_lock); 102 if (mlx4_shared_data == NULL) { 103 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 104 /* Allocate shared memory. */ 105 mz = rte_memzone_reserve(MZ_MLX4_PMD_SHARED_DATA, 106 sizeof(*mlx4_shared_data), 107 SOCKET_ID_ANY, 0); 108 if (mz == NULL) { 109 ERROR("Cannot allocate mlx4 shared data\n"); 110 ret = -rte_errno; 111 goto error; 112 } 113 mlx4_shared_data = mz->addr; 114 memset(mlx4_shared_data, 0, sizeof(*mlx4_shared_data)); 115 rte_spinlock_init(&mlx4_shared_data->lock); 116 } else { 117 /* Lookup allocated shared memory. */ 118 mz = rte_memzone_lookup(MZ_MLX4_PMD_SHARED_DATA); 119 if (mz == NULL) { 120 ERROR("Cannot attach mlx4 shared data\n"); 121 ret = -rte_errno; 122 goto error; 123 } 124 mlx4_shared_data = mz->addr; 125 memset(&mlx4_local_data, 0, sizeof(mlx4_local_data)); 126 } 127 } 128 error: 129 rte_spinlock_unlock(&mlx4_shared_data_lock); 130 return ret; 131 } 132 133 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS 134 /** 135 * Verbs callback to allocate a memory. This function should allocate the space 136 * according to the size provided residing inside a huge page. 137 * Please note that all allocation must respect the alignment from libmlx4 138 * (i.e. currently sysconf(_SC_PAGESIZE)). 139 * 140 * @param[in] size 141 * The size in bytes of the memory to allocate. 142 * @param[in] data 143 * A pointer to the callback data. 144 * 145 * @return 146 * Allocated buffer, NULL otherwise and rte_errno is set. 147 */ 148 static void * 149 mlx4_alloc_verbs_buf(size_t size, void *data) 150 { 151 struct mlx4_priv *priv = data; 152 void *ret; 153 size_t alignment = sysconf(_SC_PAGESIZE); 154 unsigned int socket = SOCKET_ID_ANY; 155 156 if (priv->verbs_alloc_ctx.type == MLX4_VERBS_ALLOC_TYPE_TX_QUEUE) { 157 const struct txq *txq = priv->verbs_alloc_ctx.obj; 158 159 socket = txq->socket; 160 } else if (priv->verbs_alloc_ctx.type == 161 MLX4_VERBS_ALLOC_TYPE_RX_QUEUE) { 162 const struct rxq *rxq = priv->verbs_alloc_ctx.obj; 163 164 socket = rxq->socket; 165 } 166 MLX4_ASSERT(data != NULL); 167 ret = rte_malloc_socket(__func__, size, alignment, socket); 168 if (!ret && size) 169 rte_errno = ENOMEM; 170 return ret; 171 } 172 173 /** 174 * Verbs callback to free a memory. 175 * 176 * @param[in] ptr 177 * A pointer to the memory to free. 178 * @param[in] data 179 * A pointer to the callback data. 180 */ 181 static void 182 mlx4_free_verbs_buf(void *ptr, void *data __rte_unused) 183 { 184 MLX4_ASSERT(data != NULL); 185 rte_free(ptr); 186 } 187 #endif 188 189 /** 190 * Initialize process private data structure. 191 * 192 * @param dev 193 * Pointer to Ethernet device structure. 194 * 195 * @return 196 * 0 on success, a negative errno value otherwise and rte_errno is set. 197 */ 198 int 199 mlx4_proc_priv_init(struct rte_eth_dev *dev) 200 { 201 struct mlx4_proc_priv *ppriv; 202 size_t ppriv_size; 203 204 /* 205 * UAR register table follows the process private structure. BlueFlame 206 * registers for Tx queues are stored in the table. 207 */ 208 ppriv_size = sizeof(struct mlx4_proc_priv) + 209 dev->data->nb_tx_queues * sizeof(void *); 210 ppriv = rte_zmalloc_socket("mlx4_proc_priv", ppriv_size, 211 RTE_CACHE_LINE_SIZE, dev->device->numa_node); 212 if (!ppriv) { 213 rte_errno = ENOMEM; 214 return -rte_errno; 215 } 216 ppriv->uar_table_sz = dev->data->nb_tx_queues; 217 dev->process_private = ppriv; 218 return 0; 219 } 220 221 /** 222 * Un-initialize process private data structure. 223 * 224 * @param dev 225 * Pointer to Ethernet device structure. 226 */ 227 void 228 mlx4_proc_priv_uninit(struct rte_eth_dev *dev) 229 { 230 if (!dev->process_private) 231 return; 232 rte_free(dev->process_private); 233 dev->process_private = NULL; 234 } 235 236 /** 237 * DPDK callback for Ethernet device configuration. 238 * 239 * @param dev 240 * Pointer to Ethernet device structure. 241 * 242 * @return 243 * 0 on success, negative errno value otherwise and rte_errno is set. 244 */ 245 static int 246 mlx4_dev_configure(struct rte_eth_dev *dev) 247 { 248 struct mlx4_priv *priv = dev->data->dev_private; 249 struct rte_flow_error error; 250 int ret; 251 252 /* Prepare internal flow rules. */ 253 ret = mlx4_flow_sync(priv, &error); 254 if (ret) { 255 ERROR("cannot set up internal flow rules (code %d, \"%s\")," 256 " flow error type %d, cause %p, message: %s", 257 -ret, strerror(-ret), error.type, error.cause, 258 error.message ? error.message : "(unspecified)"); 259 goto exit; 260 } 261 ret = mlx4_intr_install(priv); 262 if (ret) { 263 ERROR("%p: interrupt handler installation failed", 264 (void *)dev); 265 goto exit; 266 } 267 ret = mlx4_proc_priv_init(dev); 268 if (ret) { 269 ERROR("%p: process private data allocation failed", 270 (void *)dev); 271 goto exit; 272 } 273 exit: 274 return ret; 275 } 276 277 /** 278 * DPDK callback to start the device. 279 * 280 * Simulate device start by initializing common RSS resources and attaching 281 * all configured flows. 282 * 283 * @param dev 284 * Pointer to Ethernet device structure. 285 * 286 * @return 287 * 0 on success, negative errno value otherwise and rte_errno is set. 288 */ 289 static int 290 mlx4_dev_start(struct rte_eth_dev *dev) 291 { 292 struct mlx4_priv *priv = dev->data->dev_private; 293 struct rte_flow_error error; 294 int ret; 295 296 if (priv->started) 297 return 0; 298 DEBUG("%p: attaching configured flows to all RX queues", (void *)dev); 299 priv->started = 1; 300 ret = mlx4_rss_init(priv); 301 if (ret) { 302 ERROR("%p: cannot initialize RSS resources: %s", 303 (void *)dev, strerror(-ret)); 304 goto err; 305 } 306 #ifdef RTE_LIBRTE_MLX4_DEBUG 307 mlx4_mr_dump_dev(dev); 308 #endif 309 ret = mlx4_rxq_intr_enable(priv); 310 if (ret) { 311 ERROR("%p: interrupt handler installation failed", 312 (void *)dev); 313 goto err; 314 } 315 ret = mlx4_flow_sync(priv, &error); 316 if (ret) { 317 ERROR("%p: cannot attach flow rules (code %d, \"%s\")," 318 " flow error type %d, cause %p, message: %s", 319 (void *)dev, 320 -ret, strerror(-ret), error.type, error.cause, 321 error.message ? error.message : "(unspecified)"); 322 goto err; 323 } 324 rte_wmb(); 325 dev->tx_pkt_burst = mlx4_tx_burst; 326 dev->rx_pkt_burst = mlx4_rx_burst; 327 /* Enable datapath on secondary process. */ 328 mlx4_mp_req_start_rxtx(dev); 329 return 0; 330 err: 331 mlx4_dev_stop(dev); 332 return ret; 333 } 334 335 /** 336 * DPDK callback to stop the device. 337 * 338 * Simulate device stop by detaching all configured flows. 339 * 340 * @param dev 341 * Pointer to Ethernet device structure. 342 */ 343 static int 344 mlx4_dev_stop(struct rte_eth_dev *dev) 345 { 346 struct mlx4_priv *priv = dev->data->dev_private; 347 348 if (!priv->started) 349 return 0; 350 DEBUG("%p: detaching flows from all RX queues", (void *)dev); 351 priv->started = 0; 352 dev->tx_pkt_burst = mlx4_tx_burst_removed; 353 dev->rx_pkt_burst = mlx4_rx_burst_removed; 354 rte_wmb(); 355 /* Disable datapath on secondary process. */ 356 mlx4_mp_req_stop_rxtx(dev); 357 mlx4_flow_sync(priv, NULL); 358 mlx4_rxq_intr_disable(priv); 359 mlx4_rss_deinit(priv); 360 361 return 0; 362 } 363 364 /** 365 * DPDK callback to close the device. 366 * 367 * Destroy all queues and objects, free memory. 368 * 369 * @param dev 370 * Pointer to Ethernet device structure. 371 */ 372 static int 373 mlx4_dev_close(struct rte_eth_dev *dev) 374 { 375 struct mlx4_priv *priv = dev->data->dev_private; 376 unsigned int i; 377 378 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 379 rte_eth_dev_release_port(dev); 380 return 0; 381 } 382 DEBUG("%p: closing device \"%s\"", 383 (void *)dev, 384 ((priv->ctx != NULL) ? priv->ctx->device->name : "")); 385 dev->rx_pkt_burst = mlx4_rx_burst_removed; 386 dev->tx_pkt_burst = mlx4_tx_burst_removed; 387 rte_wmb(); 388 /* Disable datapath on secondary process. */ 389 mlx4_mp_req_stop_rxtx(dev); 390 mlx4_flow_clean(priv); 391 mlx4_rss_deinit(priv); 392 for (i = 0; i != dev->data->nb_rx_queues; ++i) 393 mlx4_rx_queue_release(dev->data->rx_queues[i]); 394 for (i = 0; i != dev->data->nb_tx_queues; ++i) 395 mlx4_tx_queue_release(dev->data->tx_queues[i]); 396 mlx4_proc_priv_uninit(dev); 397 mlx4_mr_release(dev); 398 if (priv->pd != NULL) { 399 MLX4_ASSERT(priv->ctx != NULL); 400 claim_zero(mlx4_glue->dealloc_pd(priv->pd)); 401 claim_zero(mlx4_glue->close_device(priv->ctx)); 402 } else 403 MLX4_ASSERT(priv->ctx == NULL); 404 mlx4_intr_uninstall(priv); 405 memset(priv, 0, sizeof(*priv)); 406 /* mac_addrs must not be freed because part of dev_private */ 407 dev->data->mac_addrs = NULL; 408 return 0; 409 } 410 411 static const struct eth_dev_ops mlx4_dev_ops = { 412 .dev_configure = mlx4_dev_configure, 413 .dev_start = mlx4_dev_start, 414 .dev_stop = mlx4_dev_stop, 415 .dev_set_link_down = mlx4_dev_set_link_down, 416 .dev_set_link_up = mlx4_dev_set_link_up, 417 .dev_close = mlx4_dev_close, 418 .link_update = mlx4_link_update, 419 .promiscuous_enable = mlx4_promiscuous_enable, 420 .promiscuous_disable = mlx4_promiscuous_disable, 421 .allmulticast_enable = mlx4_allmulticast_enable, 422 .allmulticast_disable = mlx4_allmulticast_disable, 423 .mac_addr_remove = mlx4_mac_addr_remove, 424 .mac_addr_add = mlx4_mac_addr_add, 425 .mac_addr_set = mlx4_mac_addr_set, 426 .set_mc_addr_list = mlx4_set_mc_addr_list, 427 .stats_get = mlx4_stats_get, 428 .stats_reset = mlx4_stats_reset, 429 .fw_version_get = mlx4_fw_version_get, 430 .dev_infos_get = mlx4_dev_infos_get, 431 .dev_supported_ptypes_get = mlx4_dev_supported_ptypes_get, 432 .vlan_filter_set = mlx4_vlan_filter_set, 433 .rx_queue_setup = mlx4_rx_queue_setup, 434 .tx_queue_setup = mlx4_tx_queue_setup, 435 .rx_queue_release = mlx4_rx_queue_release, 436 .tx_queue_release = mlx4_tx_queue_release, 437 .flow_ctrl_get = mlx4_flow_ctrl_get, 438 .flow_ctrl_set = mlx4_flow_ctrl_set, 439 .mtu_set = mlx4_mtu_set, 440 .flow_ops_get = mlx4_flow_ops_get, 441 .rx_queue_intr_enable = mlx4_rx_intr_enable, 442 .rx_queue_intr_disable = mlx4_rx_intr_disable, 443 .is_removed = mlx4_is_removed, 444 }; 445 446 /* Available operations from secondary process. */ 447 static const struct eth_dev_ops mlx4_dev_sec_ops = { 448 .stats_get = mlx4_stats_get, 449 .stats_reset = mlx4_stats_reset, 450 .fw_version_get = mlx4_fw_version_get, 451 .dev_infos_get = mlx4_dev_infos_get, 452 }; 453 454 /** 455 * Get PCI information from struct ibv_device. 456 * 457 * @param device 458 * Pointer to Ethernet device structure. 459 * @param[out] pci_addr 460 * PCI bus address output buffer. 461 * 462 * @return 463 * 0 on success, negative errno value otherwise and rte_errno is set. 464 */ 465 static int 466 mlx4_ibv_device_to_pci_addr(const struct ibv_device *device, 467 struct rte_pci_addr *pci_addr) 468 { 469 FILE *file; 470 char line[32]; 471 MKSTR(path, "%s/device/uevent", device->ibdev_path); 472 473 file = fopen(path, "rb"); 474 if (file == NULL) { 475 rte_errno = errno; 476 return -rte_errno; 477 } 478 while (fgets(line, sizeof(line), file) == line) { 479 size_t len = strlen(line); 480 int ret; 481 482 /* Truncate long lines. */ 483 if (len == (sizeof(line) - 1)) 484 while (line[(len - 1)] != '\n') { 485 ret = fgetc(file); 486 if (ret == EOF) 487 break; 488 line[(len - 1)] = ret; 489 } 490 /* Extract information. */ 491 if (sscanf(line, 492 "PCI_SLOT_NAME=" 493 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 494 &pci_addr->domain, 495 &pci_addr->bus, 496 &pci_addr->devid, 497 &pci_addr->function) == 4) { 498 break; 499 } 500 } 501 fclose(file); 502 return 0; 503 } 504 505 /** 506 * Verify and store value for device argument. 507 * 508 * @param[in] key 509 * Key argument to verify. 510 * @param[in] val 511 * Value associated with key. 512 * @param[in, out] conf 513 * Shared configuration data. 514 * 515 * @return 516 * 0 on success, negative errno value otherwise and rte_errno is set. 517 */ 518 static int 519 mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf) 520 { 521 unsigned long tmp; 522 523 errno = 0; 524 tmp = strtoul(val, NULL, 0); 525 if (errno) { 526 rte_errno = errno; 527 WARN("%s: \"%s\" is not a valid integer", key, val); 528 return -rte_errno; 529 } 530 if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) { 531 uint32_t ports = rte_log2_u32(conf->ports.present + 1); 532 533 if (tmp >= ports) { 534 ERROR("port index %lu outside range [0,%" PRIu32 ")", 535 tmp, ports); 536 return -EINVAL; 537 } 538 if (!(conf->ports.present & (1 << tmp))) { 539 rte_errno = EINVAL; 540 ERROR("invalid port index %lu", tmp); 541 return -rte_errno; 542 } 543 conf->ports.enabled |= 1 << tmp; 544 } else if (strcmp(MLX4_MR_EXT_MEMSEG_EN_KVARG, key) == 0) { 545 conf->mr_ext_memseg_en = !!tmp; 546 } else { 547 rte_errno = EINVAL; 548 WARN("%s: unknown parameter", key); 549 return -rte_errno; 550 } 551 return 0; 552 } 553 554 /** 555 * Parse device parameters. 556 * 557 * @param devargs 558 * Device arguments structure. 559 * 560 * @return 561 * 0 on success, negative errno value otherwise and rte_errno is set. 562 */ 563 static int 564 mlx4_args(struct rte_devargs *devargs, struct mlx4_conf *conf) 565 { 566 struct rte_kvargs *kvlist; 567 unsigned int arg_count; 568 int ret = 0; 569 int i; 570 571 if (devargs == NULL) 572 return 0; 573 kvlist = rte_kvargs_parse(devargs->args, pmd_mlx4_init_params); 574 if (kvlist == NULL) { 575 rte_errno = EINVAL; 576 ERROR("failed to parse kvargs"); 577 return -rte_errno; 578 } 579 /* Process parameters. */ 580 for (i = 0; pmd_mlx4_init_params[i]; ++i) { 581 arg_count = rte_kvargs_count(kvlist, pmd_mlx4_init_params[i]); 582 while (arg_count-- > 0) { 583 ret = rte_kvargs_process(kvlist, 584 pmd_mlx4_init_params[i], 585 (int (*)(const char *, 586 const char *, 587 void *)) 588 mlx4_arg_parse, 589 conf); 590 if (ret != 0) 591 goto free_kvlist; 592 } 593 } 594 free_kvlist: 595 rte_kvargs_free(kvlist); 596 return ret; 597 } 598 599 /** 600 * Interpret RSS capabilities reported by device. 601 * 602 * This function returns the set of usable Verbs RSS hash fields, kernel 603 * quirks taken into account. 604 * 605 * @param ctx 606 * Verbs context. 607 * @param pd 608 * Verbs protection domain. 609 * @param device_attr_ex 610 * Extended device attributes to interpret. 611 * 612 * @return 613 * Usable RSS hash fields mask in Verbs format. 614 */ 615 static uint64_t 616 mlx4_hw_rss_sup(struct ibv_context *ctx, struct ibv_pd *pd, 617 struct ibv_device_attr_ex *device_attr_ex) 618 { 619 uint64_t hw_rss_sup = device_attr_ex->rss_caps.rx_hash_fields_mask; 620 struct ibv_cq *cq = NULL; 621 struct ibv_wq *wq = NULL; 622 struct ibv_rwq_ind_table *ind = NULL; 623 struct ibv_qp *qp = NULL; 624 625 if (!hw_rss_sup) { 626 WARN("no RSS capabilities reported; disabling support for UDP" 627 " RSS and inner VXLAN RSS"); 628 return IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4 | 629 IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6 | 630 IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP; 631 } 632 if (!(hw_rss_sup & IBV_RX_HASH_INNER)) 633 return hw_rss_sup; 634 /* 635 * Although reported as supported, missing code in some Linux 636 * versions (v4.15, v4.16) prevents the creation of hash QPs with 637 * inner capability. 638 * 639 * There is no choice but to attempt to instantiate a temporary RSS 640 * context in order to confirm its support. 641 */ 642 cq = mlx4_glue->create_cq(ctx, 1, NULL, NULL, 0); 643 wq = cq ? mlx4_glue->create_wq 644 (ctx, 645 &(struct ibv_wq_init_attr){ 646 .wq_type = IBV_WQT_RQ, 647 .max_wr = 1, 648 .max_sge = 1, 649 .pd = pd, 650 .cq = cq, 651 }) : NULL; 652 ind = wq ? mlx4_glue->create_rwq_ind_table 653 (ctx, 654 &(struct ibv_rwq_ind_table_init_attr){ 655 .log_ind_tbl_size = 0, 656 .ind_tbl = &wq, 657 .comp_mask = 0, 658 }) : NULL; 659 qp = ind ? mlx4_glue->create_qp_ex 660 (ctx, 661 &(struct ibv_qp_init_attr_ex){ 662 .comp_mask = 663 (IBV_QP_INIT_ATTR_PD | 664 IBV_QP_INIT_ATTR_RX_HASH | 665 IBV_QP_INIT_ATTR_IND_TABLE), 666 .qp_type = IBV_QPT_RAW_PACKET, 667 .pd = pd, 668 .rwq_ind_tbl = ind, 669 .rx_hash_conf = { 670 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 671 .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE, 672 .rx_hash_key = mlx4_rss_hash_key_default, 673 .rx_hash_fields_mask = hw_rss_sup, 674 }, 675 }) : NULL; 676 if (!qp) { 677 WARN("disabling unusable inner RSS capability due to kernel" 678 " quirk"); 679 hw_rss_sup &= ~IBV_RX_HASH_INNER; 680 } else { 681 claim_zero(mlx4_glue->destroy_qp(qp)); 682 } 683 if (ind) 684 claim_zero(mlx4_glue->destroy_rwq_ind_table(ind)); 685 if (wq) 686 claim_zero(mlx4_glue->destroy_wq(wq)); 687 if (cq) 688 claim_zero(mlx4_glue->destroy_cq(cq)); 689 return hw_rss_sup; 690 } 691 692 static struct rte_pci_driver mlx4_driver; 693 694 /** 695 * PMD global initialization. 696 * 697 * Independent from individual device, this function initializes global 698 * per-PMD data structures distinguishing primary and secondary processes. 699 * Hence, each initialization is called once per a process. 700 * 701 * @return 702 * 0 on success, a negative errno value otherwise and rte_errno is set. 703 */ 704 static int 705 mlx4_init_once(void) 706 { 707 struct mlx4_shared_data *sd; 708 struct mlx4_local_data *ld = &mlx4_local_data; 709 int ret = 0; 710 711 if (mlx4_init_shared_data()) 712 return -rte_errno; 713 sd = mlx4_shared_data; 714 MLX4_ASSERT(sd); 715 rte_spinlock_lock(&sd->lock); 716 switch (rte_eal_process_type()) { 717 case RTE_PROC_PRIMARY: 718 if (sd->init_done) 719 break; 720 LIST_INIT(&sd->mem_event_cb_list); 721 rte_rwlock_init(&sd->mem_event_rwlock); 722 rte_mem_event_callback_register("MLX4_MEM_EVENT_CB", 723 mlx4_mr_mem_event_cb, NULL); 724 ret = mlx4_mp_init_primary(); 725 if (ret) 726 goto out; 727 sd->init_done = 1; 728 break; 729 case RTE_PROC_SECONDARY: 730 if (ld->init_done) 731 break; 732 ret = mlx4_mp_init_secondary(); 733 if (ret) 734 goto out; 735 ++sd->secondary_cnt; 736 ld->init_done = 1; 737 break; 738 default: 739 break; 740 } 741 out: 742 rte_spinlock_unlock(&sd->lock); 743 return ret; 744 } 745 746 /** 747 * DPDK callback to register a PCI device. 748 * 749 * This function creates an Ethernet device for each port of a given 750 * PCI device. 751 * 752 * @param[in] pci_drv 753 * PCI driver structure (mlx4_driver). 754 * @param[in] pci_dev 755 * PCI device information. 756 * 757 * @return 758 * 0 on success, negative errno value otherwise and rte_errno is set. 759 */ 760 static int 761 mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) 762 { 763 struct ibv_device **list; 764 struct ibv_device *ibv_dev; 765 int err = 0; 766 struct ibv_context *attr_ctx = NULL; 767 struct ibv_device_attr device_attr; 768 struct ibv_device_attr_ex device_attr_ex; 769 struct rte_eth_dev *prev_dev = NULL; 770 struct mlx4_conf conf = { 771 .ports.present = 0, 772 .mr_ext_memseg_en = 1, 773 }; 774 unsigned int vf; 775 int i; 776 char ifname[IF_NAMESIZE]; 777 778 (void)pci_drv; 779 err = mlx4_init_once(); 780 if (err) { 781 ERROR("unable to init PMD global data: %s", 782 strerror(rte_errno)); 783 return -rte_errno; 784 } 785 MLX4_ASSERT(pci_drv == &mlx4_driver); 786 list = mlx4_glue->get_device_list(&i); 787 if (list == NULL) { 788 rte_errno = errno; 789 MLX4_ASSERT(rte_errno); 790 if (rte_errno == ENOSYS) 791 ERROR("cannot list devices, is ib_uverbs loaded?"); 792 return -rte_errno; 793 } 794 MLX4_ASSERT(i >= 0); 795 /* 796 * For each listed device, check related sysfs entry against 797 * the provided PCI ID. 798 */ 799 while (i != 0) { 800 struct rte_pci_addr pci_addr; 801 802 --i; 803 DEBUG("checking device \"%s\"", list[i]->name); 804 if (mlx4_ibv_device_to_pci_addr(list[i], &pci_addr)) 805 continue; 806 if ((pci_dev->addr.domain != pci_addr.domain) || 807 (pci_dev->addr.bus != pci_addr.bus) || 808 (pci_dev->addr.devid != pci_addr.devid) || 809 (pci_dev->addr.function != pci_addr.function)) 810 continue; 811 vf = (pci_dev->id.device_id == 812 PCI_DEVICE_ID_MELLANOX_CONNECTX3VF); 813 INFO("PCI information matches, using device \"%s\" (VF: %s)", 814 list[i]->name, (vf ? "true" : "false")); 815 attr_ctx = mlx4_glue->open_device(list[i]); 816 err = errno; 817 break; 818 } 819 if (attr_ctx == NULL) { 820 mlx4_glue->free_device_list(list); 821 switch (err) { 822 case 0: 823 rte_errno = ENODEV; 824 ERROR("cannot access device, is mlx4_ib loaded?"); 825 return -rte_errno; 826 case EINVAL: 827 rte_errno = EINVAL; 828 ERROR("cannot use device, are drivers up to date?"); 829 return -rte_errno; 830 } 831 MLX4_ASSERT(err > 0); 832 rte_errno = err; 833 return -rte_errno; 834 } 835 ibv_dev = list[i]; 836 DEBUG("device opened"); 837 if (mlx4_glue->query_device(attr_ctx, &device_attr)) { 838 err = ENODEV; 839 goto error; 840 } 841 INFO("%u port(s) detected", device_attr.phys_port_cnt); 842 conf.ports.present |= (UINT64_C(1) << device_attr.phys_port_cnt) - 1; 843 if (mlx4_args(pci_dev->device.devargs, &conf)) { 844 ERROR("failed to process device arguments"); 845 err = EINVAL; 846 goto error; 847 } 848 /* Use all ports when none are defined */ 849 if (!conf.ports.enabled) 850 conf.ports.enabled = conf.ports.present; 851 /* Retrieve extended device attributes. */ 852 if (mlx4_glue->query_device_ex(attr_ctx, NULL, &device_attr_ex)) { 853 err = ENODEV; 854 goto error; 855 } 856 MLX4_ASSERT(device_attr.max_sge >= MLX4_MAX_SGE); 857 for (i = 0; i < device_attr.phys_port_cnt; i++) { 858 uint32_t port = i + 1; /* ports are indexed from one */ 859 struct ibv_context *ctx = NULL; 860 struct ibv_port_attr port_attr; 861 struct ibv_pd *pd = NULL; 862 struct mlx4_priv *priv = NULL; 863 struct rte_eth_dev *eth_dev = NULL; 864 struct rte_ether_addr mac; 865 char name[RTE_ETH_NAME_MAX_LEN]; 866 867 /* If port is not enabled, skip. */ 868 if (!(conf.ports.enabled & (1 << i))) 869 continue; 870 DEBUG("using port %u", port); 871 ctx = mlx4_glue->open_device(ibv_dev); 872 if (ctx == NULL) { 873 err = ENODEV; 874 goto port_error; 875 } 876 snprintf(name, sizeof(name), "%s port %u", 877 mlx4_glue->get_device_name(ibv_dev), port); 878 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 879 eth_dev = rte_eth_dev_attach_secondary(name); 880 if (eth_dev == NULL) { 881 ERROR("can not attach rte ethdev"); 882 rte_errno = ENOMEM; 883 err = rte_errno; 884 goto err_secondary; 885 } 886 priv = eth_dev->data->dev_private; 887 if (!priv->verbs_alloc_ctx.enabled) { 888 ERROR("secondary process is not supported" 889 " due to lack of external allocator" 890 " from Verbs"); 891 rte_errno = ENOTSUP; 892 err = rte_errno; 893 goto err_secondary; 894 } 895 eth_dev->device = &pci_dev->device; 896 eth_dev->dev_ops = &mlx4_dev_sec_ops; 897 err = mlx4_proc_priv_init(eth_dev); 898 if (err) 899 goto err_secondary; 900 /* Receive command fd from primary process. */ 901 err = mlx4_mp_req_verbs_cmd_fd(eth_dev); 902 if (err < 0) { 903 err = rte_errno; 904 goto err_secondary; 905 } 906 /* Remap UAR for Tx queues. */ 907 err = mlx4_tx_uar_init_secondary(eth_dev, err); 908 if (err) { 909 err = rte_errno; 910 goto err_secondary; 911 } 912 /* 913 * Ethdev pointer is still required as input since 914 * the primary device is not accessible from the 915 * secondary process. 916 */ 917 eth_dev->tx_pkt_burst = mlx4_tx_burst; 918 eth_dev->rx_pkt_burst = mlx4_rx_burst; 919 claim_zero(mlx4_glue->close_device(ctx)); 920 rte_eth_copy_pci_info(eth_dev, pci_dev); 921 rte_eth_dev_probing_finish(eth_dev); 922 prev_dev = eth_dev; 923 continue; 924 err_secondary: 925 claim_zero(mlx4_glue->close_device(ctx)); 926 rte_eth_dev_release_port(eth_dev); 927 if (prev_dev) 928 rte_eth_dev_release_port(prev_dev); 929 break; 930 } 931 /* Check port status. */ 932 err = mlx4_glue->query_port(ctx, port, &port_attr); 933 if (err) { 934 err = ENODEV; 935 ERROR("port query failed: %s", strerror(err)); 936 goto port_error; 937 } 938 if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { 939 err = ENOTSUP; 940 ERROR("port %d is not configured in Ethernet mode", 941 port); 942 goto port_error; 943 } 944 if (port_attr.state != IBV_PORT_ACTIVE) 945 DEBUG("port %d is not active: \"%s\" (%d)", 946 port, mlx4_glue->port_state_str(port_attr.state), 947 port_attr.state); 948 /* Make asynchronous FD non-blocking to handle interrupts. */ 949 err = mlx4_fd_set_non_blocking(ctx->async_fd); 950 if (err) { 951 ERROR("cannot make asynchronous FD non-blocking: %s", 952 strerror(err)); 953 goto port_error; 954 } 955 /* Allocate protection domain. */ 956 pd = mlx4_glue->alloc_pd(ctx); 957 if (pd == NULL) { 958 err = ENOMEM; 959 ERROR("PD allocation failure"); 960 goto port_error; 961 } 962 /* from rte_ethdev.c */ 963 priv = rte_zmalloc("ethdev private structure", 964 sizeof(*priv), 965 RTE_CACHE_LINE_SIZE); 966 if (priv == NULL) { 967 err = ENOMEM; 968 ERROR("priv allocation failure"); 969 goto port_error; 970 } 971 priv->ctx = ctx; 972 priv->device_attr = device_attr; 973 priv->port = port; 974 priv->pd = pd; 975 priv->mtu = RTE_ETHER_MTU; 976 priv->vf = vf; 977 priv->hw_csum = !!(device_attr.device_cap_flags & 978 IBV_DEVICE_RAW_IP_CSUM); 979 DEBUG("checksum offloading is %ssupported", 980 (priv->hw_csum ? "" : "not ")); 981 /* Only ConnectX-3 Pro supports tunneling. */ 982 priv->hw_csum_l2tun = 983 priv->hw_csum && 984 (device_attr.vendor_part_id == 985 PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO); 986 DEBUG("L2 tunnel checksum offloads are %ssupported", 987 priv->hw_csum_l2tun ? "" : "not "); 988 priv->hw_rss_sup = mlx4_hw_rss_sup(priv->ctx, priv->pd, 989 &device_attr_ex); 990 DEBUG("supported RSS hash fields mask: %016" PRIx64, 991 priv->hw_rss_sup); 992 priv->hw_rss_max_qps = 993 device_attr_ex.rss_caps.max_rwq_indirection_table_size; 994 DEBUG("MAX RSS queues %d", priv->hw_rss_max_qps); 995 priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps & 996 IBV_RAW_PACKET_CAP_SCATTER_FCS); 997 DEBUG("FCS stripping toggling is %ssupported", 998 priv->hw_fcs_strip ? "" : "not "); 999 priv->tso = 1000 ((device_attr_ex.tso_caps.max_tso > 0) && 1001 (device_attr_ex.tso_caps.supported_qpts & 1002 (1 << IBV_QPT_RAW_PACKET))); 1003 if (priv->tso) 1004 priv->tso_max_payload_sz = 1005 device_attr_ex.tso_caps.max_tso; 1006 DEBUG("TSO is %ssupported", 1007 priv->tso ? "" : "not "); 1008 priv->mr_ext_memseg_en = conf.mr_ext_memseg_en; 1009 /* Configure the first MAC address by default. */ 1010 err = mlx4_get_mac(priv, &mac.addr_bytes); 1011 if (err) { 1012 ERROR("cannot get MAC address, is mlx4_en loaded?" 1013 " (error: %s)", strerror(err)); 1014 goto port_error; 1015 } 1016 INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", 1017 priv->port, 1018 mac.addr_bytes[0], mac.addr_bytes[1], 1019 mac.addr_bytes[2], mac.addr_bytes[3], 1020 mac.addr_bytes[4], mac.addr_bytes[5]); 1021 /* Register MAC address. */ 1022 priv->mac[0] = mac; 1023 1024 if (mlx4_get_ifname(priv, &ifname) == 0) { 1025 DEBUG("port %u ifname is \"%s\"", 1026 priv->port, ifname); 1027 priv->if_index = if_nametoindex(ifname); 1028 } else { 1029 DEBUG("port %u ifname is unknown", priv->port); 1030 } 1031 1032 /* Get actual MTU if possible. */ 1033 mlx4_mtu_get(priv, &priv->mtu); 1034 DEBUG("port %u MTU is %u", priv->port, priv->mtu); 1035 eth_dev = rte_eth_dev_allocate(name); 1036 if (eth_dev == NULL) { 1037 err = ENOMEM; 1038 ERROR("can not allocate rte ethdev"); 1039 goto port_error; 1040 } 1041 eth_dev->data->dev_private = priv; 1042 eth_dev->data->mac_addrs = priv->mac; 1043 eth_dev->device = &pci_dev->device; 1044 rte_eth_copy_pci_info(eth_dev, pci_dev); 1045 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; 1046 /* Initialize local interrupt handle for current port. */ 1047 memset(&priv->intr_handle, 0, sizeof(struct rte_intr_handle)); 1048 priv->intr_handle.fd = -1; 1049 priv->intr_handle.type = RTE_INTR_HANDLE_EXT; 1050 /* 1051 * Override ethdev interrupt handle pointer with private 1052 * handle instead of that of the parent PCI device used by 1053 * default. This prevents it from being shared between all 1054 * ports of the same PCI device since each of them is 1055 * associated its own Verbs context. 1056 * 1057 * Rx interrupts in particular require this as the PMD has 1058 * no control over the registration of queue interrupts 1059 * besides setting up eth_dev->intr_handle, the rest is 1060 * handled by rte_intr_rx_ctl(). 1061 */ 1062 eth_dev->intr_handle = &priv->intr_handle; 1063 priv->dev_data = eth_dev->data; 1064 eth_dev->dev_ops = &mlx4_dev_ops; 1065 #ifdef HAVE_IBV_MLX4_BUF_ALLOCATORS 1066 /* Hint libmlx4 to use PMD allocator for data plane resources */ 1067 err = mlx4_glue->dv_set_context_attr 1068 (ctx, MLX4DV_SET_CTX_ATTR_BUF_ALLOCATORS, 1069 (void *)((uintptr_t)&(struct mlx4dv_ctx_allocators){ 1070 .alloc = &mlx4_alloc_verbs_buf, 1071 .free = &mlx4_free_verbs_buf, 1072 .data = priv, 1073 })); 1074 if (err) 1075 WARN("Verbs external allocator is not supported"); 1076 else 1077 priv->verbs_alloc_ctx.enabled = 1; 1078 #endif 1079 /* Bring Ethernet device up. */ 1080 DEBUG("forcing Ethernet interface up"); 1081 mlx4_dev_set_link_up(eth_dev); 1082 /* Update link status once if waiting for LSC. */ 1083 if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) 1084 mlx4_link_update(eth_dev, 0); 1085 /* 1086 * Once the device is added to the list of memory event 1087 * callback, its global MR cache table cannot be expanded 1088 * on the fly because of deadlock. If it overflows, lookup 1089 * should be done by searching MR list linearly, which is slow. 1090 */ 1091 err = mlx4_mr_btree_init(&priv->mr.cache, 1092 MLX4_MR_BTREE_CACHE_N * 2, 1093 eth_dev->device->numa_node); 1094 if (err) { 1095 /* rte_errno is already set. */ 1096 goto port_error; 1097 } 1098 /* Add device to memory callback list. */ 1099 rte_rwlock_write_lock(&mlx4_shared_data->mem_event_rwlock); 1100 LIST_INSERT_HEAD(&mlx4_shared_data->mem_event_cb_list, 1101 priv, mem_event_cb); 1102 rte_rwlock_write_unlock(&mlx4_shared_data->mem_event_rwlock); 1103 rte_eth_dev_probing_finish(eth_dev); 1104 prev_dev = eth_dev; 1105 continue; 1106 port_error: 1107 rte_free(priv); 1108 if (eth_dev != NULL) 1109 eth_dev->data->dev_private = NULL; 1110 if (pd) 1111 claim_zero(mlx4_glue->dealloc_pd(pd)); 1112 if (ctx) 1113 claim_zero(mlx4_glue->close_device(ctx)); 1114 if (eth_dev != NULL) { 1115 /* mac_addrs must not be freed because part of dev_private */ 1116 eth_dev->data->mac_addrs = NULL; 1117 rte_eth_dev_release_port(eth_dev); 1118 } 1119 if (prev_dev) 1120 mlx4_dev_close(prev_dev); 1121 break; 1122 } 1123 error: 1124 if (attr_ctx) 1125 claim_zero(mlx4_glue->close_device(attr_ctx)); 1126 if (list) 1127 mlx4_glue->free_device_list(list); 1128 if (err) 1129 rte_errno = err; 1130 return -err; 1131 } 1132 1133 /** 1134 * DPDK callback to remove a PCI device. 1135 * 1136 * This function removes all Ethernet devices belong to a given PCI device. 1137 * 1138 * @param[in] pci_dev 1139 * Pointer to the PCI device. 1140 * 1141 * @return 1142 * 0 on success, the function cannot fail. 1143 */ 1144 static int 1145 mlx4_pci_remove(struct rte_pci_device *pci_dev) 1146 { 1147 uint16_t port_id; 1148 int ret = 0; 1149 1150 RTE_ETH_FOREACH_DEV_OF(port_id, &pci_dev->device) { 1151 /* 1152 * mlx4_dev_close() is not registered to secondary process, 1153 * call the close function explicitly for secondary process. 1154 */ 1155 if (rte_eal_process_type() == RTE_PROC_SECONDARY) 1156 ret |= mlx4_dev_close(&rte_eth_devices[port_id]); 1157 else 1158 ret |= rte_eth_dev_close(port_id); 1159 } 1160 return ret == 0 ? 0 : -EIO; 1161 } 1162 1163 static const struct rte_pci_id mlx4_pci_id_map[] = { 1164 { 1165 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1166 PCI_DEVICE_ID_MELLANOX_CONNECTX3) 1167 }, 1168 { 1169 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1170 PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO) 1171 }, 1172 { 1173 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 1174 PCI_DEVICE_ID_MELLANOX_CONNECTX3VF) 1175 }, 1176 { 1177 .vendor_id = 0 1178 } 1179 }; 1180 1181 static struct rte_pci_driver mlx4_driver = { 1182 .driver = { 1183 .name = MLX4_DRIVER_NAME 1184 }, 1185 .id_table = mlx4_pci_id_map, 1186 .probe = mlx4_pci_probe, 1187 .remove = mlx4_pci_remove, 1188 .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, 1189 }; 1190 1191 #ifdef RTE_IBVERBS_LINK_DLOPEN 1192 1193 /** 1194 * Suffix RTE_EAL_PMD_PATH with "-glue". 1195 * 1196 * This function performs a sanity check on RTE_EAL_PMD_PATH before 1197 * suffixing its last component. 1198 * 1199 * @param buf[out] 1200 * Output buffer, should be large enough otherwise NULL is returned. 1201 * @param size 1202 * Size of @p out. 1203 * 1204 * @return 1205 * Pointer to @p buf or @p NULL in case suffix cannot be appended. 1206 */ 1207 static char * 1208 mlx4_glue_path(char *buf, size_t size) 1209 { 1210 static const char *const bad[] = { "/", ".", "..", NULL }; 1211 const char *path = RTE_EAL_PMD_PATH; 1212 size_t len = strlen(path); 1213 size_t off; 1214 int i; 1215 1216 while (len && path[len - 1] == '/') 1217 --len; 1218 for (off = len; off && path[off - 1] != '/'; --off) 1219 ; 1220 for (i = 0; bad[i]; ++i) 1221 if (!strncmp(path + off, bad[i], (int)(len - off))) 1222 goto error; 1223 i = snprintf(buf, size, "%.*s-glue", (int)len, path); 1224 if (i == -1 || (size_t)i >= size) 1225 goto error; 1226 return buf; 1227 error: 1228 ERROR("unable to append \"-glue\" to last component of" 1229 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\")," 1230 " please re-configure DPDK"); 1231 return NULL; 1232 } 1233 1234 /** 1235 * Initialization routine for run-time dependency on rdma-core. 1236 */ 1237 static int 1238 mlx4_glue_init(void) 1239 { 1240 char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 1241 const char *path[] = { 1242 /* 1243 * A basic security check is necessary before trusting 1244 * MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 1245 */ 1246 (geteuid() == getuid() && getegid() == getgid() ? 1247 getenv("MLX4_GLUE_PATH") : NULL), 1248 /* 1249 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 1250 * variant, otherwise let dlopen() look up libraries on its 1251 * own. 1252 */ 1253 (*RTE_EAL_PMD_PATH ? 1254 mlx4_glue_path(glue_path, sizeof(glue_path)) : ""), 1255 }; 1256 unsigned int i = 0; 1257 void *handle = NULL; 1258 void **sym; 1259 const char *dlmsg; 1260 1261 while (!handle && i != RTE_DIM(path)) { 1262 const char *end; 1263 size_t len; 1264 int ret; 1265 1266 if (!path[i]) { 1267 ++i; 1268 continue; 1269 } 1270 end = strpbrk(path[i], ":;"); 1271 if (!end) 1272 end = path[i] + strlen(path[i]); 1273 len = end - path[i]; 1274 ret = 0; 1275 do { 1276 char name[ret + 1]; 1277 1278 ret = snprintf(name, sizeof(name), "%.*s%s" MLX4_GLUE, 1279 (int)len, path[i], 1280 (!len || *(end - 1) == '/') ? "" : "/"); 1281 if (ret == -1) 1282 break; 1283 if (sizeof(name) != (size_t)ret + 1) 1284 continue; 1285 DEBUG("looking for rdma-core glue as \"%s\"", name); 1286 handle = dlopen(name, RTLD_LAZY); 1287 break; 1288 } while (1); 1289 path[i] = end + 1; 1290 if (!*end) 1291 ++i; 1292 } 1293 if (!handle) { 1294 rte_errno = EINVAL; 1295 dlmsg = dlerror(); 1296 if (dlmsg) 1297 WARN("cannot load glue library: %s", dlmsg); 1298 goto glue_error; 1299 } 1300 sym = dlsym(handle, "mlx4_glue"); 1301 if (!sym || !*sym) { 1302 rte_errno = EINVAL; 1303 dlmsg = dlerror(); 1304 if (dlmsg) 1305 ERROR("cannot resolve glue symbol: %s", dlmsg); 1306 goto glue_error; 1307 } 1308 mlx4_glue = *sym; 1309 return 0; 1310 glue_error: 1311 if (handle) 1312 dlclose(handle); 1313 WARN("cannot initialize PMD due to missing run-time" 1314 " dependency on rdma-core libraries (libibverbs," 1315 " libmlx4)"); 1316 return -rte_errno; 1317 } 1318 1319 #endif 1320 1321 /* Initialize driver log type. */ 1322 RTE_LOG_REGISTER(mlx4_logtype, pmd.net.mlx4, NOTICE) 1323 1324 /** 1325 * Driver initialization routine. 1326 */ 1327 RTE_INIT(rte_mlx4_pmd_init) 1328 { 1329 /* 1330 * MLX4_DEVICE_FATAL_CLEANUP tells ibv_destroy functions we 1331 * want to get success errno value in case of calling them 1332 * when the device was removed. 1333 */ 1334 setenv("MLX4_DEVICE_FATAL_CLEANUP", "1", 1); 1335 /* 1336 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 1337 * huge pages. Calling ibv_fork_init() during init allows 1338 * applications to use fork() safely for purposes other than 1339 * using this PMD, which is not supported in forked processes. 1340 */ 1341 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 1342 #ifdef RTE_IBVERBS_LINK_DLOPEN 1343 if (mlx4_glue_init()) 1344 return; 1345 MLX4_ASSERT(mlx4_glue); 1346 #endif 1347 #ifdef RTE_LIBRTE_MLX4_DEBUG 1348 /* Glue structure must not contain any NULL pointers. */ 1349 { 1350 unsigned int i; 1351 1352 for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i) 1353 MLX4_ASSERT(((const void *const *)mlx4_glue)[i]); 1354 } 1355 #endif 1356 if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) { 1357 ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required", 1358 mlx4_glue->version, MLX4_GLUE_VERSION); 1359 return; 1360 } 1361 mlx4_glue->fork_init(); 1362 rte_pci_register(&mlx4_driver); 1363 } 1364 1365 RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__); 1366 RTE_PMD_REGISTER_PCI_TABLE(net_mlx4, mlx4_pci_id_map); 1367 RTE_PMD_REGISTER_KMOD_DEP(net_mlx4, 1368 "* ib_uverbs & mlx4_en & mlx4_core & mlx4_ib"); 1369