1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 5 #include <unistd.h> 6 #include <dirent.h> 7 #include <fcntl.h> 8 #include <sys/mman.h> 9 #include <sys/ioctl.h> 10 #include <net/if.h> 11 12 #include <ethdev_driver.h> 13 #include <ethdev_pci.h> 14 #include <rte_kvargs.h> 15 #include <rte_eal_paging.h> 16 #include <rte_pci.h> 17 18 #include <infiniband/verbs.h> 19 #include <infiniband/manadv.h> 20 21 #include <assert.h> 22 23 #include "mana.h" 24 25 /* Shared memory between primary/secondary processes, per driver */ 26 /* Data to track primary/secondary usage */ 27 struct mana_shared_data *mana_shared_data; 28 static struct mana_shared_data mana_local_data; 29 30 /* The memory region for the above data */ 31 static const struct rte_memzone *mana_shared_mz; 32 static const char *MZ_MANA_SHARED_DATA = "mana_shared_data"; 33 34 /* Spinlock for mana_shared_data */ 35 static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 36 37 /* Allocate a buffer on the stack and fill it with a printf format string. */ 38 #define MANA_MKSTR(name, ...) \ 39 int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ 40 char name[mkstr_size_##name + 1]; \ 41 \ 42 memset(name, 0, mkstr_size_##name + 1); \ 43 snprintf(name, sizeof(name), "" __VA_ARGS__) 44 45 int mana_logtype_driver; 46 int mana_logtype_init; 47 48 /* 49 * Callback from rdma-core to allocate a buffer for a queue. 50 */ 51 void * 52 mana_alloc_verbs_buf(size_t size, void *data) 53 { 54 void *ret; 55 size_t alignment = rte_mem_page_size(); 56 int socket = (int)(uintptr_t)data; 57 58 DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket); 59 60 if (alignment == (size_t)-1) { 61 DRV_LOG(ERR, "Failed to get mem page size"); 62 rte_errno = ENOMEM; 63 return NULL; 64 } 65 66 ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket); 67 if (!ret && size) 68 rte_errno = ENOMEM; 69 return ret; 70 } 71 72 void 73 mana_free_verbs_buf(void *ptr, void *data __rte_unused) 74 { 75 rte_free(ptr); 76 } 77 78 static int 79 mana_dev_configure(struct rte_eth_dev *dev) 80 { 81 struct mana_priv *priv = dev->data->dev_private; 82 struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 83 84 if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 85 dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 86 87 if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) { 88 DRV_LOG(ERR, "Only support equal number of rx/tx queues"); 89 return -EINVAL; 90 } 91 92 if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { 93 DRV_LOG(ERR, "number of TX/RX queues must be power of 2"); 94 return -EINVAL; 95 } 96 97 priv->num_queues = dev->data->nb_rx_queues; 98 99 manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS, 100 (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 101 .alloc = &mana_alloc_verbs_buf, 102 .free = &mana_free_verbs_buf, 103 .data = 0, 104 })); 105 106 return 0; 107 } 108 109 static void 110 rx_intr_vec_disable(struct mana_priv *priv) 111 { 112 struct rte_intr_handle *intr_handle = priv->intr_handle; 113 114 rte_intr_free_epoll_fd(intr_handle); 115 rte_intr_vec_list_free(intr_handle); 116 rte_intr_nb_efd_set(intr_handle, 0); 117 } 118 119 static int 120 rx_intr_vec_enable(struct mana_priv *priv) 121 { 122 unsigned int i; 123 unsigned int rxqs_n = priv->dev_data->nb_rx_queues; 124 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 125 struct rte_intr_handle *intr_handle = priv->intr_handle; 126 int ret; 127 128 rx_intr_vec_disable(priv); 129 130 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 131 DRV_LOG(ERR, "Failed to allocate memory for interrupt vector"); 132 return -ENOMEM; 133 } 134 135 for (i = 0; i < n; i++) { 136 struct mana_rxq *rxq = priv->dev_data->rx_queues[i]; 137 138 ret = rte_intr_vec_list_index_set(intr_handle, i, 139 RTE_INTR_VEC_RXTX_OFFSET + i); 140 if (ret) { 141 DRV_LOG(ERR, "Failed to set intr vec %u", i); 142 return ret; 143 } 144 145 ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd); 146 if (ret) { 147 DRV_LOG(ERR, "Failed to set FD at intr %u", i); 148 return ret; 149 } 150 } 151 152 return rte_intr_nb_efd_set(intr_handle, n); 153 } 154 155 static void 156 rxq_intr_disable(struct mana_priv *priv) 157 { 158 int err = rte_errno; 159 160 rx_intr_vec_disable(priv); 161 rte_errno = err; 162 } 163 164 static int 165 rxq_intr_enable(struct mana_priv *priv) 166 { 167 const struct rte_eth_intr_conf *const intr_conf = 168 &priv->dev_data->dev_conf.intr_conf; 169 170 if (!intr_conf->rxq) 171 return 0; 172 173 return rx_intr_vec_enable(priv); 174 } 175 176 static int 177 mana_dev_start(struct rte_eth_dev *dev) 178 { 179 int ret; 180 struct mana_priv *priv = dev->data->dev_private; 181 182 rte_spinlock_init(&priv->mr_btree_lock); 183 ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N, 184 dev->device->numa_node); 185 if (ret) { 186 DRV_LOG(ERR, "Failed to init device MR btree %d", ret); 187 return ret; 188 } 189 190 ret = mana_start_tx_queues(dev); 191 if (ret) { 192 DRV_LOG(ERR, "failed to start tx queues %d", ret); 193 goto failed_tx; 194 } 195 196 ret = mana_start_rx_queues(dev); 197 if (ret) { 198 DRV_LOG(ERR, "failed to start rx queues %d", ret); 199 goto failed_rx; 200 } 201 202 rte_wmb(); 203 204 dev->tx_pkt_burst = mana_tx_burst; 205 dev->rx_pkt_burst = mana_rx_burst; 206 207 DRV_LOG(INFO, "TX/RX queues have started"); 208 209 /* Enable datapath for secondary processes */ 210 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX); 211 212 ret = rxq_intr_enable(priv); 213 if (ret) { 214 DRV_LOG(ERR, "Failed to enable RX interrupts"); 215 goto failed_intr; 216 } 217 218 return 0; 219 220 failed_intr: 221 mana_stop_rx_queues(dev); 222 223 failed_rx: 224 mana_stop_tx_queues(dev); 225 226 failed_tx: 227 mana_mr_btree_free(&priv->mr_btree); 228 229 return ret; 230 } 231 232 static int 233 mana_dev_stop(struct rte_eth_dev *dev) 234 { 235 int ret; 236 struct mana_priv *priv = dev->data->dev_private; 237 238 rxq_intr_disable(priv); 239 240 dev->tx_pkt_burst = mana_tx_burst_removed; 241 dev->rx_pkt_burst = mana_rx_burst_removed; 242 243 /* Stop datapath on secondary processes */ 244 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX); 245 246 rte_wmb(); 247 248 ret = mana_stop_tx_queues(dev); 249 if (ret) { 250 DRV_LOG(ERR, "failed to stop tx queues"); 251 return ret; 252 } 253 254 ret = mana_stop_rx_queues(dev); 255 if (ret) { 256 DRV_LOG(ERR, "failed to stop tx queues"); 257 return ret; 258 } 259 260 return 0; 261 } 262 263 static int mana_intr_uninstall(struct mana_priv *priv); 264 265 static int 266 mana_dev_close(struct rte_eth_dev *dev) 267 { 268 struct mana_priv *priv = dev->data->dev_private; 269 int ret; 270 271 mana_remove_all_mr(priv); 272 273 ret = mana_intr_uninstall(priv); 274 if (ret) 275 return ret; 276 277 ret = ibv_close_device(priv->ib_ctx); 278 if (ret) { 279 ret = errno; 280 return ret; 281 } 282 283 return 0; 284 } 285 286 static int 287 mana_dev_info_get(struct rte_eth_dev *dev, 288 struct rte_eth_dev_info *dev_info) 289 { 290 struct mana_priv *priv = dev->data->dev_private; 291 292 dev_info->min_mtu = RTE_ETHER_MIN_MTU; 293 dev_info->max_mtu = MANA_MAX_MTU; 294 295 /* RX params */ 296 dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE; 297 dev_info->max_rx_pktlen = MANA_MAX_MTU + RTE_ETHER_HDR_LEN; 298 299 dev_info->max_rx_queues = RTE_MIN(priv->max_rx_queues, UINT16_MAX); 300 dev_info->max_tx_queues = RTE_MIN(priv->max_tx_queues, UINT16_MAX); 301 302 dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR; 303 dev_info->max_hash_mac_addrs = 0; 304 305 dev_info->max_vfs = 1; 306 307 /* Offload params */ 308 dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT; 309 310 dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT; 311 312 /* RSS */ 313 dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS; 314 dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES; 315 dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT; 316 317 /* Thresholds */ 318 dev_info->default_rxconf = (struct rte_eth_rxconf){ 319 .rx_thresh = { 320 .pthresh = 8, 321 .hthresh = 8, 322 .wthresh = 0, 323 }, 324 .rx_free_thresh = 32, 325 /* If no descriptors available, pkts are dropped by default */ 326 .rx_drop_en = 1, 327 }; 328 329 dev_info->default_txconf = (struct rte_eth_txconf){ 330 .tx_thresh = { 331 .pthresh = 32, 332 .hthresh = 0, 333 .wthresh = 0, 334 }, 335 .tx_rs_thresh = 32, 336 .tx_free_thresh = 32, 337 }; 338 339 /* Buffer limits */ 340 dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 341 dev_info->rx_desc_lim.nb_max = RTE_MIN(priv->max_rx_desc, UINT16_MAX); 342 dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 343 dev_info->rx_desc_lim.nb_seg_max = 344 RTE_MIN(priv->max_recv_sge, UINT16_MAX); 345 dev_info->rx_desc_lim.nb_mtu_seg_max = 346 RTE_MIN(priv->max_recv_sge, UINT16_MAX); 347 348 dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 349 dev_info->tx_desc_lim.nb_max = RTE_MIN(priv->max_tx_desc, UINT16_MAX); 350 dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 351 dev_info->tx_desc_lim.nb_seg_max = 352 RTE_MIN(priv->max_send_sge, UINT16_MAX); 353 dev_info->tx_desc_lim.nb_mtu_seg_max = 354 RTE_MIN(priv->max_send_sge, UINT16_MAX); 355 356 /* Speed */ 357 dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G; 358 359 /* RX params */ 360 dev_info->default_rxportconf.burst_size = 1; 361 dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE; 362 dev_info->default_rxportconf.nb_queues = 1; 363 364 /* TX params */ 365 dev_info->default_txportconf.burst_size = 1; 366 dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE; 367 dev_info->default_txportconf.nb_queues = 1; 368 369 return 0; 370 } 371 372 static void 373 mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 374 struct rte_eth_txq_info *qinfo) 375 { 376 struct mana_txq *txq = dev->data->tx_queues[queue_id]; 377 378 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 379 qinfo->nb_desc = txq->num_desc; 380 } 381 382 static void 383 mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 384 struct rte_eth_rxq_info *qinfo) 385 { 386 struct mana_rxq *rxq = dev->data->rx_queues[queue_id]; 387 388 qinfo->mp = rxq->mp; 389 qinfo->nb_desc = rxq->num_desc; 390 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 391 } 392 393 static const uint32_t * 394 mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused, 395 size_t *no_of_elements) 396 { 397 static const uint32_t ptypes[] = { 398 RTE_PTYPE_L2_ETHER, 399 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 400 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 401 RTE_PTYPE_L4_FRAG, 402 RTE_PTYPE_L4_TCP, 403 RTE_PTYPE_L4_UDP, 404 }; 405 406 *no_of_elements = RTE_DIM(ptypes); 407 return ptypes; 408 } 409 410 static int 411 mana_rss_hash_update(struct rte_eth_dev *dev, 412 struct rte_eth_rss_conf *rss_conf) 413 { 414 struct mana_priv *priv = dev->data->dev_private; 415 416 /* Currently can only update RSS hash when device is stopped */ 417 if (dev->data->dev_started) { 418 DRV_LOG(ERR, "Can't update RSS after device has started"); 419 return -ENODEV; 420 } 421 422 if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) { 423 DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64, 424 dev->data->port_id, rss_conf->rss_hf); 425 return -EINVAL; 426 } 427 428 if (rss_conf->rss_key && rss_conf->rss_key_len) { 429 if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) { 430 DRV_LOG(ERR, "Port %u key len must be %u long", 431 dev->data->port_id, 432 TOEPLITZ_HASH_KEY_SIZE_IN_BYTES); 433 return -EINVAL; 434 } 435 436 priv->rss_conf.rss_key_len = rss_conf->rss_key_len; 437 priv->rss_conf.rss_key = 438 rte_zmalloc("mana_rss", rss_conf->rss_key_len, 439 RTE_CACHE_LINE_SIZE); 440 if (!priv->rss_conf.rss_key) 441 return -ENOMEM; 442 memcpy(priv->rss_conf.rss_key, rss_conf->rss_key, 443 rss_conf->rss_key_len); 444 } 445 priv->rss_conf.rss_hf = rss_conf->rss_hf; 446 447 return 0; 448 } 449 450 static int 451 mana_rss_hash_conf_get(struct rte_eth_dev *dev, 452 struct rte_eth_rss_conf *rss_conf) 453 { 454 struct mana_priv *priv = dev->data->dev_private; 455 456 if (!rss_conf) 457 return -EINVAL; 458 459 if (rss_conf->rss_key && 460 rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) { 461 memcpy(rss_conf->rss_key, priv->rss_conf.rss_key, 462 priv->rss_conf.rss_key_len); 463 } 464 465 rss_conf->rss_key_len = priv->rss_conf.rss_key_len; 466 rss_conf->rss_hf = priv->rss_conf.rss_hf; 467 468 return 0; 469 } 470 471 static int 472 mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 473 uint16_t nb_desc, unsigned int socket_id, 474 const struct rte_eth_txconf *tx_conf __rte_unused) 475 476 { 477 struct mana_priv *priv = dev->data->dev_private; 478 struct mana_txq *txq; 479 int ret; 480 481 txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id); 482 if (!txq) { 483 DRV_LOG(ERR, "failed to allocate txq"); 484 return -ENOMEM; 485 } 486 487 txq->socket = socket_id; 488 489 txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring", 490 sizeof(struct mana_txq_desc) * 491 nb_desc, 492 RTE_CACHE_LINE_SIZE, socket_id); 493 if (!txq->desc_ring) { 494 DRV_LOG(ERR, "failed to allocate txq desc_ring"); 495 ret = -ENOMEM; 496 goto fail; 497 } 498 499 txq->gdma_comp_buf = rte_malloc_socket("mana_txq_comp", 500 sizeof(*txq->gdma_comp_buf) * nb_desc, 501 RTE_CACHE_LINE_SIZE, socket_id); 502 if (!txq->gdma_comp_buf) { 503 DRV_LOG(ERR, "failed to allocate txq comp"); 504 ret = -ENOMEM; 505 goto fail; 506 } 507 508 ret = mana_mr_btree_init(&txq->mr_btree, 509 MANA_MR_BTREE_PER_QUEUE_N, socket_id); 510 if (ret) { 511 DRV_LOG(ERR, "Failed to init TXQ MR btree"); 512 goto fail; 513 } 514 515 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p", 516 queue_idx, nb_desc, socket_id, txq->desc_ring); 517 518 txq->desc_ring_head = 0; 519 txq->desc_ring_tail = 0; 520 txq->priv = priv; 521 txq->num_desc = nb_desc; 522 dev->data->tx_queues[queue_idx] = txq; 523 524 return 0; 525 526 fail: 527 rte_free(txq->gdma_comp_buf); 528 rte_free(txq->desc_ring); 529 rte_free(txq); 530 return ret; 531 } 532 533 static void 534 mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 535 { 536 struct mana_txq *txq = dev->data->tx_queues[qid]; 537 538 mana_mr_btree_free(&txq->mr_btree); 539 540 rte_free(txq->gdma_comp_buf); 541 rte_free(txq->desc_ring); 542 rte_free(txq); 543 } 544 545 static int 546 mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 547 uint16_t nb_desc, unsigned int socket_id, 548 const struct rte_eth_rxconf *rx_conf __rte_unused, 549 struct rte_mempool *mp) 550 { 551 struct mana_priv *priv = dev->data->dev_private; 552 struct mana_rxq *rxq; 553 int ret; 554 555 rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id); 556 if (!rxq) { 557 DRV_LOG(ERR, "failed to allocate rxq"); 558 return -ENOMEM; 559 } 560 561 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u", 562 queue_idx, nb_desc, socket_id); 563 564 rxq->socket = socket_id; 565 566 rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring", 567 sizeof(struct mana_rxq_desc) * 568 nb_desc, 569 RTE_CACHE_LINE_SIZE, socket_id); 570 571 if (!rxq->desc_ring) { 572 DRV_LOG(ERR, "failed to allocate rxq desc_ring"); 573 ret = -ENOMEM; 574 goto fail; 575 } 576 577 rxq->desc_ring_head = 0; 578 rxq->desc_ring_tail = 0; 579 580 rxq->gdma_comp_buf = rte_malloc_socket("mana_rxq_comp", 581 sizeof(*rxq->gdma_comp_buf) * nb_desc, 582 RTE_CACHE_LINE_SIZE, socket_id); 583 if (!rxq->gdma_comp_buf) { 584 DRV_LOG(ERR, "failed to allocate rxq comp"); 585 ret = -ENOMEM; 586 goto fail; 587 } 588 589 ret = mana_mr_btree_init(&rxq->mr_btree, 590 MANA_MR_BTREE_PER_QUEUE_N, socket_id); 591 if (ret) { 592 DRV_LOG(ERR, "Failed to init RXQ MR btree"); 593 goto fail; 594 } 595 596 rxq->priv = priv; 597 rxq->num_desc = nb_desc; 598 rxq->mp = mp; 599 dev->data->rx_queues[queue_idx] = rxq; 600 601 return 0; 602 603 fail: 604 rte_free(rxq->gdma_comp_buf); 605 rte_free(rxq->desc_ring); 606 rte_free(rxq); 607 return ret; 608 } 609 610 static void 611 mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 612 { 613 struct mana_rxq *rxq = dev->data->rx_queues[qid]; 614 615 mana_mr_btree_free(&rxq->mr_btree); 616 617 rte_free(rxq->gdma_comp_buf); 618 rte_free(rxq->desc_ring); 619 rte_free(rxq); 620 } 621 622 static int 623 mana_dev_link_update(struct rte_eth_dev *dev, 624 int wait_to_complete __rte_unused) 625 { 626 struct rte_eth_link link; 627 628 /* MANA has no concept of carrier state, always reporting UP */ 629 link = (struct rte_eth_link) { 630 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 631 .link_autoneg = RTE_ETH_LINK_SPEED_FIXED, 632 .link_speed = RTE_ETH_SPEED_NUM_100G, 633 .link_status = RTE_ETH_LINK_UP, 634 }; 635 636 return rte_eth_linkstatus_set(dev, &link); 637 } 638 639 static int 640 mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 641 { 642 unsigned int i; 643 644 for (i = 0; i < dev->data->nb_tx_queues; i++) { 645 struct mana_txq *txq = dev->data->tx_queues[i]; 646 647 if (!txq) 648 continue; 649 650 stats->opackets += txq->stats.packets; 651 stats->obytes += txq->stats.bytes; 652 stats->oerrors += txq->stats.errors; 653 654 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 655 stats->q_opackets[i] = txq->stats.packets; 656 stats->q_obytes[i] = txq->stats.bytes; 657 } 658 } 659 660 stats->rx_nombuf = 0; 661 for (i = 0; i < dev->data->nb_rx_queues; i++) { 662 struct mana_rxq *rxq = dev->data->rx_queues[i]; 663 664 if (!rxq) 665 continue; 666 667 stats->ipackets += rxq->stats.packets; 668 stats->ibytes += rxq->stats.bytes; 669 stats->ierrors += rxq->stats.errors; 670 671 /* There is no good way to get stats->imissed, not setting it */ 672 673 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 674 stats->q_ipackets[i] = rxq->stats.packets; 675 stats->q_ibytes[i] = rxq->stats.bytes; 676 } 677 678 stats->rx_nombuf += rxq->stats.nombuf; 679 } 680 681 return 0; 682 } 683 684 static int 685 mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused) 686 { 687 unsigned int i; 688 689 PMD_INIT_FUNC_TRACE(); 690 691 for (i = 0; i < dev->data->nb_tx_queues; i++) { 692 struct mana_txq *txq = dev->data->tx_queues[i]; 693 694 if (!txq) 695 continue; 696 697 memset(&txq->stats, 0, sizeof(txq->stats)); 698 } 699 700 for (i = 0; i < dev->data->nb_rx_queues; i++) { 701 struct mana_rxq *rxq = dev->data->rx_queues[i]; 702 703 if (!rxq) 704 continue; 705 706 memset(&rxq->stats, 0, sizeof(rxq->stats)); 707 } 708 709 return 0; 710 } 711 712 static int 713 mana_get_ifname(const struct mana_priv *priv, char (*ifname)[IF_NAMESIZE]) 714 { 715 int ret; 716 DIR *dir; 717 struct dirent *dent; 718 719 MANA_MKSTR(dirpath, "%s/device/net", priv->ib_ctx->device->ibdev_path); 720 721 dir = opendir(dirpath); 722 if (dir == NULL) 723 return -ENODEV; 724 725 while ((dent = readdir(dir)) != NULL) { 726 char *name = dent->d_name; 727 FILE *file; 728 struct rte_ether_addr addr; 729 char *mac = NULL; 730 731 if ((name[0] == '.') && 732 ((name[1] == '\0') || 733 ((name[1] == '.') && (name[2] == '\0')))) 734 continue; 735 736 MANA_MKSTR(path, "%s/%s/address", dirpath, name); 737 738 file = fopen(path, "r"); 739 if (!file) { 740 ret = -ENODEV; 741 break; 742 } 743 744 ret = fscanf(file, "%ms", &mac); 745 fclose(file); 746 747 if (ret <= 0) { 748 ret = -EINVAL; 749 break; 750 } 751 752 ret = rte_ether_unformat_addr(mac, &addr); 753 free(mac); 754 if (ret) 755 break; 756 757 if (rte_is_same_ether_addr(&addr, priv->dev_data->mac_addrs)) { 758 strlcpy(*ifname, name, sizeof(*ifname)); 759 ret = 0; 760 break; 761 } 762 } 763 764 closedir(dir); 765 return ret; 766 } 767 768 static int 769 mana_ifreq(const struct mana_priv *priv, int req, struct ifreq *ifr) 770 { 771 int sock, ret; 772 773 sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 774 if (sock == -1) 775 return -errno; 776 777 ret = mana_get_ifname(priv, &ifr->ifr_name); 778 if (ret) { 779 close(sock); 780 return ret; 781 } 782 783 if (ioctl(sock, req, ifr) == -1) 784 ret = -errno; 785 786 close(sock); 787 788 return ret; 789 } 790 791 static int 792 mana_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 793 { 794 struct mana_priv *priv = dev->data->dev_private; 795 struct ifreq request = { .ifr_mtu = mtu, }; 796 797 return mana_ifreq(priv, SIOCSIFMTU, &request); 798 } 799 800 static const struct eth_dev_ops mana_dev_ops = { 801 .dev_configure = mana_dev_configure, 802 .dev_start = mana_dev_start, 803 .dev_stop = mana_dev_stop, 804 .dev_close = mana_dev_close, 805 .dev_infos_get = mana_dev_info_get, 806 .txq_info_get = mana_dev_tx_queue_info, 807 .rxq_info_get = mana_dev_rx_queue_info, 808 .dev_supported_ptypes_get = mana_supported_ptypes, 809 .rss_hash_update = mana_rss_hash_update, 810 .rss_hash_conf_get = mana_rss_hash_conf_get, 811 .tx_queue_setup = mana_dev_tx_queue_setup, 812 .tx_queue_release = mana_dev_tx_queue_release, 813 .rx_queue_setup = mana_dev_rx_queue_setup, 814 .rx_queue_release = mana_dev_rx_queue_release, 815 .rx_queue_intr_enable = mana_rx_intr_enable, 816 .rx_queue_intr_disable = mana_rx_intr_disable, 817 .link_update = mana_dev_link_update, 818 .stats_get = mana_dev_stats_get, 819 .stats_reset = mana_dev_stats_reset, 820 .mtu_set = mana_mtu_set, 821 }; 822 823 static const struct eth_dev_ops mana_dev_secondary_ops = { 824 .stats_get = mana_dev_stats_get, 825 .stats_reset = mana_dev_stats_reset, 826 .dev_infos_get = mana_dev_info_get, 827 }; 828 829 uint16_t 830 mana_rx_burst_removed(void *dpdk_rxq __rte_unused, 831 struct rte_mbuf **pkts __rte_unused, 832 uint16_t pkts_n __rte_unused) 833 { 834 rte_mb(); 835 return 0; 836 } 837 838 uint16_t 839 mana_tx_burst_removed(void *dpdk_rxq __rte_unused, 840 struct rte_mbuf **pkts __rte_unused, 841 uint16_t pkts_n __rte_unused) 842 { 843 rte_mb(); 844 return 0; 845 } 846 847 #define ETH_MANA_MAC_ARG "mac" 848 static const char * const mana_init_args[] = { 849 ETH_MANA_MAC_ARG, 850 NULL, 851 }; 852 853 /* Support of parsing up to 8 mac address from EAL command line */ 854 #define MAX_NUM_ADDRESS 8 855 struct mana_conf { 856 struct rte_ether_addr mac_array[MAX_NUM_ADDRESS]; 857 unsigned int index; 858 }; 859 860 static int 861 mana_arg_parse_callback(const char *key, const char *val, void *private) 862 { 863 struct mana_conf *conf = (struct mana_conf *)private; 864 int ret; 865 866 DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index); 867 868 if (conf->index >= MAX_NUM_ADDRESS) { 869 DRV_LOG(ERR, "Exceeding max MAC address"); 870 return 1; 871 } 872 873 ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]); 874 if (ret) { 875 DRV_LOG(ERR, "Invalid MAC address %s", val); 876 return ret; 877 } 878 879 conf->index++; 880 881 return 0; 882 } 883 884 static int 885 mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf) 886 { 887 struct rte_kvargs *kvlist; 888 unsigned int arg_count; 889 int ret = 0; 890 891 kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args); 892 if (!kvlist) { 893 DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str); 894 return -EINVAL; 895 } 896 897 arg_count = rte_kvargs_count(kvlist, mana_init_args[0]); 898 if (arg_count > MAX_NUM_ADDRESS) { 899 ret = -EINVAL; 900 goto free_kvlist; 901 } 902 ret = rte_kvargs_process(kvlist, mana_init_args[0], 903 mana_arg_parse_callback, conf); 904 if (ret) { 905 DRV_LOG(ERR, "error parsing args"); 906 goto free_kvlist; 907 } 908 909 free_kvlist: 910 rte_kvargs_free(kvlist); 911 return ret; 912 } 913 914 static int 915 get_port_mac(struct ibv_device *device, unsigned int port, 916 struct rte_ether_addr *addr) 917 { 918 FILE *file; 919 int ret = 0; 920 DIR *dir; 921 struct dirent *dent; 922 unsigned int dev_port; 923 924 MANA_MKSTR(path, "%s/device/net", device->ibdev_path); 925 926 dir = opendir(path); 927 if (!dir) 928 return -ENOENT; 929 930 while ((dent = readdir(dir))) { 931 char *name = dent->d_name; 932 char *mac = NULL; 933 934 MANA_MKSTR(port_path, "%s/%s/dev_port", path, name); 935 936 /* Ignore . and .. */ 937 if ((name[0] == '.') && 938 ((name[1] == '\0') || 939 ((name[1] == '.') && (name[2] == '\0')))) 940 continue; 941 942 file = fopen(port_path, "r"); 943 if (!file) 944 continue; 945 946 ret = fscanf(file, "%u", &dev_port); 947 fclose(file); 948 949 if (ret != 1) 950 continue; 951 952 /* Ethernet ports start at 0, IB port start at 1 */ 953 if (dev_port == port - 1) { 954 MANA_MKSTR(address_path, "%s/%s/address", path, name); 955 956 file = fopen(address_path, "r"); 957 if (!file) 958 continue; 959 960 ret = fscanf(file, "%ms", &mac); 961 fclose(file); 962 963 if (ret < 0) 964 break; 965 966 ret = rte_ether_unformat_addr(mac, addr); 967 if (ret) 968 DRV_LOG(ERR, "unrecognized mac addr %s", mac); 969 970 free(mac); 971 break; 972 } 973 } 974 975 closedir(dir); 976 return ret; 977 } 978 979 static int 980 mana_ibv_device_to_pci_addr(const struct ibv_device *device, 981 struct rte_pci_addr *pci_addr) 982 { 983 FILE *file; 984 char *line = NULL; 985 size_t len = 0; 986 987 MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path); 988 989 file = fopen(path, "r"); 990 if (!file) 991 return -errno; 992 993 while (getline(&line, &len, file) != -1) { 994 /* Extract information. */ 995 if (sscanf(line, 996 "PCI_SLOT_NAME=" 997 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 998 &pci_addr->domain, 999 &pci_addr->bus, 1000 &pci_addr->devid, 1001 &pci_addr->function) == 4) { 1002 break; 1003 } 1004 } 1005 1006 free(line); 1007 fclose(file); 1008 return 0; 1009 } 1010 1011 /* 1012 * Interrupt handler from IB layer to notify this device is being removed. 1013 */ 1014 static void 1015 mana_intr_handler(void *arg) 1016 { 1017 struct mana_priv *priv = arg; 1018 struct ibv_context *ctx = priv->ib_ctx; 1019 struct ibv_async_event event; 1020 1021 /* Read and ack all messages from IB device */ 1022 while (true) { 1023 if (ibv_get_async_event(ctx, &event)) 1024 break; 1025 1026 if (event.event_type == IBV_EVENT_DEVICE_FATAL) { 1027 struct rte_eth_dev *dev; 1028 1029 dev = &rte_eth_devices[priv->port_id]; 1030 if (dev->data->dev_conf.intr_conf.rmv) 1031 rte_eth_dev_callback_process(dev, 1032 RTE_ETH_EVENT_INTR_RMV, NULL); 1033 } 1034 1035 ibv_ack_async_event(&event); 1036 } 1037 } 1038 1039 static int 1040 mana_intr_uninstall(struct mana_priv *priv) 1041 { 1042 int ret; 1043 1044 ret = rte_intr_callback_unregister(priv->intr_handle, 1045 mana_intr_handler, priv); 1046 if (ret <= 0) { 1047 DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret); 1048 return ret; 1049 } 1050 1051 rte_intr_instance_free(priv->intr_handle); 1052 1053 return 0; 1054 } 1055 1056 int 1057 mana_fd_set_non_blocking(int fd) 1058 { 1059 int ret = fcntl(fd, F_GETFL); 1060 1061 if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK)) 1062 return 0; 1063 1064 rte_errno = errno; 1065 return -rte_errno; 1066 } 1067 1068 static int 1069 mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv) 1070 { 1071 int ret; 1072 struct ibv_context *ctx = priv->ib_ctx; 1073 1074 priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 1075 if (!priv->intr_handle) { 1076 DRV_LOG(ERR, "Failed to allocate intr_handle"); 1077 rte_errno = ENOMEM; 1078 return -ENOMEM; 1079 } 1080 1081 ret = rte_intr_fd_set(priv->intr_handle, -1); 1082 if (ret) 1083 goto free_intr; 1084 1085 ret = mana_fd_set_non_blocking(ctx->async_fd); 1086 if (ret) { 1087 DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK"); 1088 goto free_intr; 1089 } 1090 1091 ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd); 1092 if (ret) 1093 goto free_intr; 1094 1095 ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT); 1096 if (ret) 1097 goto free_intr; 1098 1099 ret = rte_intr_callback_register(priv->intr_handle, 1100 mana_intr_handler, priv); 1101 if (ret) { 1102 DRV_LOG(ERR, "Failed to register intr callback"); 1103 rte_intr_fd_set(priv->intr_handle, -1); 1104 goto free_intr; 1105 } 1106 1107 eth_dev->intr_handle = priv->intr_handle; 1108 return 0; 1109 1110 free_intr: 1111 rte_intr_instance_free(priv->intr_handle); 1112 priv->intr_handle = NULL; 1113 1114 return ret; 1115 } 1116 1117 static int 1118 mana_proc_priv_init(struct rte_eth_dev *dev) 1119 { 1120 struct mana_process_priv *priv; 1121 1122 priv = rte_zmalloc_socket("mana_proc_priv", 1123 sizeof(struct mana_process_priv), 1124 RTE_CACHE_LINE_SIZE, 1125 dev->device->numa_node); 1126 if (!priv) 1127 return -ENOMEM; 1128 1129 dev->process_private = priv; 1130 return 0; 1131 } 1132 1133 /* 1134 * Map the doorbell page for the secondary process through IB device handle. 1135 */ 1136 static int 1137 mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd) 1138 { 1139 struct mana_process_priv *priv = eth_dev->process_private; 1140 1141 void *addr; 1142 1143 addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0); 1144 if (addr == MAP_FAILED) { 1145 DRV_LOG(ERR, "Failed to map secondary doorbell port %u", 1146 eth_dev->data->port_id); 1147 return -ENOMEM; 1148 } 1149 1150 DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr); 1151 1152 priv->db_page = addr; 1153 1154 return 0; 1155 } 1156 1157 /* Initialize shared data for the driver (all devices) */ 1158 static int 1159 mana_init_shared_data(void) 1160 { 1161 int ret = 0; 1162 const struct rte_memzone *secondary_mz; 1163 1164 rte_spinlock_lock(&mana_shared_data_lock); 1165 1166 /* Skip if shared data is already initialized */ 1167 if (mana_shared_data) 1168 goto exit; 1169 1170 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1171 mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA, 1172 sizeof(*mana_shared_data), 1173 SOCKET_ID_ANY, 0); 1174 if (!mana_shared_mz) { 1175 DRV_LOG(ERR, "Cannot allocate mana shared data"); 1176 ret = -rte_errno; 1177 goto exit; 1178 } 1179 1180 mana_shared_data = mana_shared_mz->addr; 1181 memset(mana_shared_data, 0, sizeof(*mana_shared_data)); 1182 rte_spinlock_init(&mana_shared_data->lock); 1183 } else { 1184 secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA); 1185 if (!secondary_mz) { 1186 DRV_LOG(ERR, "Cannot attach mana shared data"); 1187 ret = -rte_errno; 1188 goto exit; 1189 } 1190 1191 mana_shared_data = secondary_mz->addr; 1192 memset(&mana_local_data, 0, sizeof(mana_local_data)); 1193 } 1194 1195 exit: 1196 rte_spinlock_unlock(&mana_shared_data_lock); 1197 1198 return ret; 1199 } 1200 1201 /* 1202 * Init the data structures for use in primary and secondary processes. 1203 */ 1204 static int 1205 mana_init_once(void) 1206 { 1207 int ret; 1208 1209 ret = mana_init_shared_data(); 1210 if (ret) 1211 return ret; 1212 1213 rte_spinlock_lock(&mana_shared_data->lock); 1214 1215 switch (rte_eal_process_type()) { 1216 case RTE_PROC_PRIMARY: 1217 if (mana_shared_data->init_done) 1218 break; 1219 1220 ret = mana_mp_init_primary(); 1221 if (ret) 1222 break; 1223 DRV_LOG(ERR, "MP INIT PRIMARY"); 1224 1225 mana_shared_data->init_done = 1; 1226 break; 1227 1228 case RTE_PROC_SECONDARY: 1229 1230 if (mana_local_data.init_done) 1231 break; 1232 1233 ret = mana_mp_init_secondary(); 1234 if (ret) 1235 break; 1236 1237 DRV_LOG(ERR, "MP INIT SECONDARY"); 1238 1239 mana_local_data.init_done = 1; 1240 break; 1241 1242 default: 1243 /* Impossible, internal error */ 1244 ret = -EPROTO; 1245 break; 1246 } 1247 1248 rte_spinlock_unlock(&mana_shared_data->lock); 1249 1250 return ret; 1251 } 1252 1253 /* 1254 * Probe an IB port 1255 * Return value: 1256 * positive value: successfully probed port 1257 * 0: port not matching specified MAC address 1258 * negative value: error code 1259 */ 1260 static int 1261 mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr, 1262 uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr) 1263 { 1264 struct mana_priv *priv = NULL; 1265 struct rte_eth_dev *eth_dev = NULL; 1266 struct ibv_parent_domain_init_attr attr = {0}; 1267 char address[64]; 1268 char name[RTE_ETH_NAME_MAX_LEN]; 1269 int ret; 1270 struct ibv_context *ctx = NULL; 1271 1272 rte_ether_format_addr(address, sizeof(address), addr); 1273 DRV_LOG(INFO, "device located port %u address %s", port, address); 1274 1275 priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE, 1276 SOCKET_ID_ANY); 1277 if (!priv) 1278 return -ENOMEM; 1279 1280 snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port); 1281 1282 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1283 int fd; 1284 1285 eth_dev = rte_eth_dev_attach_secondary(name); 1286 if (!eth_dev) { 1287 DRV_LOG(ERR, "Can't attach to dev %s", name); 1288 ret = -ENOMEM; 1289 goto failed; 1290 } 1291 1292 eth_dev->device = &pci_dev->device; 1293 eth_dev->dev_ops = &mana_dev_secondary_ops; 1294 ret = mana_proc_priv_init(eth_dev); 1295 if (ret) 1296 goto failed; 1297 priv->process_priv = eth_dev->process_private; 1298 1299 /* Get the IB FD from the primary process */ 1300 fd = mana_mp_req_verbs_cmd_fd(eth_dev); 1301 if (fd < 0) { 1302 DRV_LOG(ERR, "Failed to get FD %d", fd); 1303 ret = -ENODEV; 1304 goto failed; 1305 } 1306 1307 ret = mana_map_doorbell_secondary(eth_dev, fd); 1308 if (ret) { 1309 DRV_LOG(ERR, "Failed secondary map %d", fd); 1310 goto failed; 1311 } 1312 1313 /* fd is no not used after mapping doorbell */ 1314 close(fd); 1315 1316 eth_dev->tx_pkt_burst = mana_tx_burst; 1317 eth_dev->rx_pkt_burst = mana_rx_burst; 1318 1319 rte_spinlock_lock(&mana_shared_data->lock); 1320 mana_shared_data->secondary_cnt++; 1321 mana_local_data.secondary_cnt++; 1322 rte_spinlock_unlock(&mana_shared_data->lock); 1323 1324 rte_eth_copy_pci_info(eth_dev, pci_dev); 1325 rte_eth_dev_probing_finish(eth_dev); 1326 1327 return 0; 1328 } 1329 1330 ctx = ibv_open_device(ibdev); 1331 if (!ctx) { 1332 DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name); 1333 ret = -ENODEV; 1334 goto failed; 1335 } 1336 1337 eth_dev = rte_eth_dev_allocate(name); 1338 if (!eth_dev) { 1339 ret = -ENOMEM; 1340 goto failed; 1341 } 1342 1343 eth_dev->data->mac_addrs = 1344 rte_calloc("mana_mac", 1, 1345 sizeof(struct rte_ether_addr), 0); 1346 if (!eth_dev->data->mac_addrs) { 1347 ret = -ENOMEM; 1348 goto failed; 1349 } 1350 1351 rte_ether_addr_copy(addr, eth_dev->data->mac_addrs); 1352 1353 priv->ib_pd = ibv_alloc_pd(ctx); 1354 if (!priv->ib_pd) { 1355 DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port); 1356 ret = -ENOMEM; 1357 goto failed; 1358 } 1359 1360 /* Create a parent domain with the port number */ 1361 attr.pd = priv->ib_pd; 1362 attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT; 1363 attr.pd_context = (void *)(uintptr_t)port; 1364 priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr); 1365 if (!priv->ib_parent_pd) { 1366 DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port); 1367 ret = -ENOMEM; 1368 goto failed; 1369 } 1370 1371 priv->ib_ctx = ctx; 1372 priv->port_id = eth_dev->data->port_id; 1373 priv->dev_port = port; 1374 eth_dev->data->dev_private = priv; 1375 priv->dev_data = eth_dev->data; 1376 1377 priv->max_rx_queues = dev_attr->orig_attr.max_qp; 1378 priv->max_tx_queues = dev_attr->orig_attr.max_qp; 1379 1380 priv->max_rx_desc = 1381 RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1382 dev_attr->orig_attr.max_cqe); 1383 priv->max_tx_desc = 1384 RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1385 dev_attr->orig_attr.max_cqe); 1386 1387 priv->max_send_sge = dev_attr->orig_attr.max_sge; 1388 priv->max_recv_sge = dev_attr->orig_attr.max_sge; 1389 1390 priv->max_mr = dev_attr->orig_attr.max_mr; 1391 priv->max_mr_size = dev_attr->orig_attr.max_mr_size; 1392 1393 DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d mr %" PRIu64, 1394 name, priv->max_rx_queues, priv->max_rx_desc, 1395 priv->max_send_sge, priv->max_mr_size); 1396 1397 rte_eth_copy_pci_info(eth_dev, pci_dev); 1398 1399 /* Create async interrupt handler */ 1400 ret = mana_intr_install(eth_dev, priv); 1401 if (ret) { 1402 DRV_LOG(ERR, "Failed to install intr handler"); 1403 goto failed; 1404 } 1405 1406 rte_spinlock_lock(&mana_shared_data->lock); 1407 mana_shared_data->primary_cnt++; 1408 rte_spinlock_unlock(&mana_shared_data->lock); 1409 1410 eth_dev->device = &pci_dev->device; 1411 1412 DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id); 1413 1414 eth_dev->rx_pkt_burst = mana_rx_burst_removed; 1415 eth_dev->tx_pkt_burst = mana_tx_burst_removed; 1416 eth_dev->dev_ops = &mana_dev_ops; 1417 1418 rte_eth_dev_probing_finish(eth_dev); 1419 1420 return 0; 1421 1422 failed: 1423 /* Free the resource for the port failed */ 1424 if (priv) { 1425 if (priv->ib_parent_pd) 1426 ibv_dealloc_pd(priv->ib_parent_pd); 1427 1428 if (priv->ib_pd) 1429 ibv_dealloc_pd(priv->ib_pd); 1430 } 1431 1432 if (eth_dev) 1433 rte_eth_dev_release_port(eth_dev); 1434 1435 rte_free(priv); 1436 1437 if (ctx) 1438 ibv_close_device(ctx); 1439 1440 return ret; 1441 } 1442 1443 /* 1444 * Goes through the IB device list to look for the IB port matching the 1445 * mac_addr. If found, create a rte_eth_dev for it. 1446 * Return value: number of successfully probed devices 1447 */ 1448 static int 1449 mana_pci_probe_mac(struct rte_pci_device *pci_dev, 1450 struct rte_ether_addr *mac_addr) 1451 { 1452 struct ibv_device **ibv_list; 1453 int ibv_idx; 1454 struct ibv_context *ctx; 1455 int num_devices; 1456 int ret; 1457 uint8_t port; 1458 int count = 0; 1459 1460 ibv_list = ibv_get_device_list(&num_devices); 1461 for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) { 1462 struct ibv_device *ibdev = ibv_list[ibv_idx]; 1463 struct rte_pci_addr pci_addr; 1464 struct ibv_device_attr_ex dev_attr; 1465 1466 DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s", 1467 ibdev->name, ibdev->dev_name, ibdev->ibdev_path); 1468 1469 if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr)) 1470 continue; 1471 1472 /* Ignore if this IB device is not this PCI device */ 1473 if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0) 1474 continue; 1475 1476 ctx = ibv_open_device(ibdev); 1477 if (!ctx) { 1478 DRV_LOG(ERR, "Failed to open IB device %s", 1479 ibdev->name); 1480 continue; 1481 } 1482 ret = ibv_query_device_ex(ctx, NULL, &dev_attr); 1483 ibv_close_device(ctx); 1484 1485 if (ret) { 1486 DRV_LOG(ERR, "Failed to query IB device %s", 1487 ibdev->name); 1488 continue; 1489 } 1490 1491 for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt; 1492 port++) { 1493 struct rte_ether_addr addr; 1494 ret = get_port_mac(ibdev, port, &addr); 1495 if (ret) 1496 continue; 1497 1498 if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr)) 1499 continue; 1500 1501 ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr); 1502 if (ret) { 1503 DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret); 1504 } else { 1505 count++; 1506 DRV_LOG(INFO, "Successfully probed on IB port %u", port); 1507 } 1508 } 1509 } 1510 1511 ibv_free_device_list(ibv_list); 1512 return count; 1513 } 1514 1515 /* 1516 * Main callback function from PCI bus to probe a device. 1517 */ 1518 static int 1519 mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1520 struct rte_pci_device *pci_dev) 1521 { 1522 struct rte_devargs *args = pci_dev->device.devargs; 1523 struct mana_conf conf = {0}; 1524 unsigned int i; 1525 int ret; 1526 int count = 0; 1527 1528 if (args && args->drv_str) { 1529 ret = mana_parse_args(args, &conf); 1530 if (ret) { 1531 DRV_LOG(ERR, "Failed to parse parameters args = %s", 1532 args->drv_str); 1533 return ret; 1534 } 1535 } 1536 1537 ret = mana_init_once(); 1538 if (ret) { 1539 DRV_LOG(ERR, "Failed to init PMD global data %d", ret); 1540 return ret; 1541 } 1542 1543 /* If there are no driver parameters, probe on all ports */ 1544 if (conf.index) { 1545 for (i = 0; i < conf.index; i++) 1546 count += mana_pci_probe_mac(pci_dev, 1547 &conf.mac_array[i]); 1548 } else { 1549 count = mana_pci_probe_mac(pci_dev, NULL); 1550 } 1551 1552 if (!count) { 1553 rte_memzone_free(mana_shared_mz); 1554 mana_shared_mz = NULL; 1555 ret = -ENODEV; 1556 } 1557 1558 return ret; 1559 } 1560 1561 static int 1562 mana_dev_uninit(struct rte_eth_dev *dev) 1563 { 1564 return mana_dev_close(dev); 1565 } 1566 1567 /* 1568 * Callback from PCI to remove this device. 1569 */ 1570 static int 1571 mana_pci_remove(struct rte_pci_device *pci_dev) 1572 { 1573 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1574 rte_spinlock_lock(&mana_shared_data_lock); 1575 1576 rte_spinlock_lock(&mana_shared_data->lock); 1577 1578 RTE_VERIFY(mana_shared_data->primary_cnt > 0); 1579 mana_shared_data->primary_cnt--; 1580 if (!mana_shared_data->primary_cnt) { 1581 DRV_LOG(DEBUG, "mp uninit primary"); 1582 mana_mp_uninit_primary(); 1583 } 1584 1585 rte_spinlock_unlock(&mana_shared_data->lock); 1586 1587 /* Also free the shared memory if this is the last */ 1588 if (!mana_shared_data->primary_cnt) { 1589 DRV_LOG(DEBUG, "free shared memezone data"); 1590 rte_memzone_free(mana_shared_mz); 1591 mana_shared_mz = NULL; 1592 } 1593 1594 rte_spinlock_unlock(&mana_shared_data_lock); 1595 } else { 1596 rte_spinlock_lock(&mana_shared_data_lock); 1597 1598 rte_spinlock_lock(&mana_shared_data->lock); 1599 RTE_VERIFY(mana_shared_data->secondary_cnt > 0); 1600 mana_shared_data->secondary_cnt--; 1601 rte_spinlock_unlock(&mana_shared_data->lock); 1602 1603 RTE_VERIFY(mana_local_data.secondary_cnt > 0); 1604 mana_local_data.secondary_cnt--; 1605 if (!mana_local_data.secondary_cnt) { 1606 DRV_LOG(DEBUG, "mp uninit secondary"); 1607 mana_mp_uninit_secondary(); 1608 } 1609 1610 rte_spinlock_unlock(&mana_shared_data_lock); 1611 } 1612 1613 return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit); 1614 } 1615 1616 static const struct rte_pci_id mana_pci_id_map[] = { 1617 { 1618 RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 1619 PCI_DEVICE_ID_MICROSOFT_MANA) 1620 }, 1621 { 1622 .vendor_id = 0 1623 }, 1624 }; 1625 1626 static struct rte_pci_driver mana_pci_driver = { 1627 .id_table = mana_pci_id_map, 1628 .probe = mana_pci_probe, 1629 .remove = mana_pci_remove, 1630 .drv_flags = RTE_PCI_DRV_INTR_RMV, 1631 }; 1632 1633 RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver); 1634 RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map); 1635 RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib"); 1636 RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE); 1637 RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE); 1638 RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>"); 1639