1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 5 #include <unistd.h> 6 #include <dirent.h> 7 #include <fcntl.h> 8 #include <sys/mman.h> 9 10 #include <ethdev_driver.h> 11 #include <ethdev_pci.h> 12 #include <rte_kvargs.h> 13 #include <rte_eal_paging.h> 14 15 #include <infiniband/verbs.h> 16 #include <infiniband/manadv.h> 17 18 #include <assert.h> 19 20 #include "mana.h" 21 22 /* Shared memory between primary/secondary processes, per driver */ 23 /* Data to track primary/secondary usage */ 24 struct mana_shared_data *mana_shared_data; 25 static struct mana_shared_data mana_local_data; 26 27 /* The memory region for the above data */ 28 static const struct rte_memzone *mana_shared_mz; 29 static const char *MZ_MANA_SHARED_DATA = "mana_shared_data"; 30 31 /* Spinlock for mana_shared_data */ 32 static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 33 34 /* Allocate a buffer on the stack and fill it with a printf format string. */ 35 #define MANA_MKSTR(name, ...) \ 36 int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ 37 char name[mkstr_size_##name + 1]; \ 38 \ 39 memset(name, 0, mkstr_size_##name + 1); \ 40 snprintf(name, sizeof(name), "" __VA_ARGS__) 41 42 int mana_logtype_driver; 43 int mana_logtype_init; 44 45 /* 46 * Callback from rdma-core to allocate a buffer for a queue. 47 */ 48 void * 49 mana_alloc_verbs_buf(size_t size, void *data) 50 { 51 void *ret; 52 size_t alignment = rte_mem_page_size(); 53 int socket = (int)(uintptr_t)data; 54 55 DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket); 56 57 if (alignment == (size_t)-1) { 58 DRV_LOG(ERR, "Failed to get mem page size"); 59 rte_errno = ENOMEM; 60 return NULL; 61 } 62 63 ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket); 64 if (!ret && size) 65 rte_errno = ENOMEM; 66 return ret; 67 } 68 69 void 70 mana_free_verbs_buf(void *ptr, void *data __rte_unused) 71 { 72 rte_free(ptr); 73 } 74 75 static int 76 mana_dev_configure(struct rte_eth_dev *dev) 77 { 78 struct mana_priv *priv = dev->data->dev_private; 79 struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 80 81 if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 82 dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 83 84 if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) { 85 DRV_LOG(ERR, "Only support equal number of rx/tx queues"); 86 return -EINVAL; 87 } 88 89 if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { 90 DRV_LOG(ERR, "number of TX/RX queues must be power of 2"); 91 return -EINVAL; 92 } 93 94 priv->num_queues = dev->data->nb_rx_queues; 95 96 manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS, 97 (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 98 .alloc = &mana_alloc_verbs_buf, 99 .free = &mana_free_verbs_buf, 100 .data = 0, 101 })); 102 103 return 0; 104 } 105 106 static void 107 rx_intr_vec_disable(struct mana_priv *priv) 108 { 109 struct rte_intr_handle *intr_handle = priv->intr_handle; 110 111 rte_intr_free_epoll_fd(intr_handle); 112 rte_intr_vec_list_free(intr_handle); 113 rte_intr_nb_efd_set(intr_handle, 0); 114 } 115 116 static int 117 rx_intr_vec_enable(struct mana_priv *priv) 118 { 119 unsigned int i; 120 unsigned int rxqs_n = priv->dev_data->nb_rx_queues; 121 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 122 struct rte_intr_handle *intr_handle = priv->intr_handle; 123 int ret; 124 125 rx_intr_vec_disable(priv); 126 127 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 128 DRV_LOG(ERR, "Failed to allocate memory for interrupt vector"); 129 return -ENOMEM; 130 } 131 132 for (i = 0; i < n; i++) { 133 struct mana_rxq *rxq = priv->dev_data->rx_queues[i]; 134 135 ret = rte_intr_vec_list_index_set(intr_handle, i, 136 RTE_INTR_VEC_RXTX_OFFSET + i); 137 if (ret) { 138 DRV_LOG(ERR, "Failed to set intr vec %u", i); 139 return ret; 140 } 141 142 ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd); 143 if (ret) { 144 DRV_LOG(ERR, "Failed to set FD at intr %u", i); 145 return ret; 146 } 147 } 148 149 return rte_intr_nb_efd_set(intr_handle, n); 150 } 151 152 static void 153 rxq_intr_disable(struct mana_priv *priv) 154 { 155 int err = rte_errno; 156 157 rx_intr_vec_disable(priv); 158 rte_errno = err; 159 } 160 161 static int 162 rxq_intr_enable(struct mana_priv *priv) 163 { 164 const struct rte_eth_intr_conf *const intr_conf = 165 &priv->dev_data->dev_conf.intr_conf; 166 167 if (!intr_conf->rxq) 168 return 0; 169 170 return rx_intr_vec_enable(priv); 171 } 172 173 static int 174 mana_dev_start(struct rte_eth_dev *dev) 175 { 176 int ret; 177 struct mana_priv *priv = dev->data->dev_private; 178 179 rte_spinlock_init(&priv->mr_btree_lock); 180 ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N, 181 dev->device->numa_node); 182 if (ret) { 183 DRV_LOG(ERR, "Failed to init device MR btree %d", ret); 184 return ret; 185 } 186 187 ret = mana_start_tx_queues(dev); 188 if (ret) { 189 DRV_LOG(ERR, "failed to start tx queues %d", ret); 190 goto failed_tx; 191 } 192 193 ret = mana_start_rx_queues(dev); 194 if (ret) { 195 DRV_LOG(ERR, "failed to start rx queues %d", ret); 196 goto failed_rx; 197 } 198 199 rte_wmb(); 200 201 dev->tx_pkt_burst = mana_tx_burst; 202 dev->rx_pkt_burst = mana_rx_burst; 203 204 DRV_LOG(INFO, "TX/RX queues have started"); 205 206 /* Enable datapath for secondary processes */ 207 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX); 208 209 ret = rxq_intr_enable(priv); 210 if (ret) { 211 DRV_LOG(ERR, "Failed to enable RX interrupts"); 212 goto failed_intr; 213 } 214 215 return 0; 216 217 failed_intr: 218 mana_stop_rx_queues(dev); 219 220 failed_rx: 221 mana_stop_tx_queues(dev); 222 223 failed_tx: 224 mana_mr_btree_free(&priv->mr_btree); 225 226 return ret; 227 } 228 229 static int 230 mana_dev_stop(struct rte_eth_dev *dev) 231 { 232 int ret; 233 struct mana_priv *priv = dev->data->dev_private; 234 235 rxq_intr_disable(priv); 236 237 dev->tx_pkt_burst = mana_tx_burst_removed; 238 dev->rx_pkt_burst = mana_rx_burst_removed; 239 240 /* Stop datapath on secondary processes */ 241 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX); 242 243 rte_wmb(); 244 245 ret = mana_stop_tx_queues(dev); 246 if (ret) { 247 DRV_LOG(ERR, "failed to stop tx queues"); 248 return ret; 249 } 250 251 ret = mana_stop_rx_queues(dev); 252 if (ret) { 253 DRV_LOG(ERR, "failed to stop tx queues"); 254 return ret; 255 } 256 257 return 0; 258 } 259 260 static int mana_intr_uninstall(struct mana_priv *priv); 261 262 static int 263 mana_dev_close(struct rte_eth_dev *dev) 264 { 265 struct mana_priv *priv = dev->data->dev_private; 266 int ret; 267 268 mana_remove_all_mr(priv); 269 270 ret = mana_intr_uninstall(priv); 271 if (ret) 272 return ret; 273 274 ret = ibv_close_device(priv->ib_ctx); 275 if (ret) { 276 ret = errno; 277 return ret; 278 } 279 280 return 0; 281 } 282 283 static int 284 mana_dev_info_get(struct rte_eth_dev *dev, 285 struct rte_eth_dev_info *dev_info) 286 { 287 struct mana_priv *priv = dev->data->dev_private; 288 289 dev_info->max_mtu = RTE_ETHER_MTU; 290 291 /* RX params */ 292 dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE; 293 dev_info->max_rx_pktlen = MAX_FRAME_SIZE; 294 295 dev_info->max_rx_queues = priv->max_rx_queues; 296 dev_info->max_tx_queues = priv->max_tx_queues; 297 298 dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR; 299 dev_info->max_hash_mac_addrs = 0; 300 301 dev_info->max_vfs = 1; 302 303 /* Offload params */ 304 dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT; 305 306 dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT; 307 308 /* RSS */ 309 dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS; 310 dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES; 311 dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT; 312 313 /* Thresholds */ 314 dev_info->default_rxconf = (struct rte_eth_rxconf){ 315 .rx_thresh = { 316 .pthresh = 8, 317 .hthresh = 8, 318 .wthresh = 0, 319 }, 320 .rx_free_thresh = 32, 321 /* If no descriptors available, pkts are dropped by default */ 322 .rx_drop_en = 1, 323 }; 324 325 dev_info->default_txconf = (struct rte_eth_txconf){ 326 .tx_thresh = { 327 .pthresh = 32, 328 .hthresh = 0, 329 .wthresh = 0, 330 }, 331 .tx_rs_thresh = 32, 332 .tx_free_thresh = 32, 333 }; 334 335 /* Buffer limits */ 336 dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 337 dev_info->rx_desc_lim.nb_max = priv->max_rx_desc; 338 dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 339 dev_info->rx_desc_lim.nb_seg_max = priv->max_recv_sge; 340 dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge; 341 342 dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 343 dev_info->tx_desc_lim.nb_max = priv->max_tx_desc; 344 dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 345 dev_info->tx_desc_lim.nb_seg_max = priv->max_send_sge; 346 dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge; 347 348 /* Speed */ 349 dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G; 350 351 /* RX params */ 352 dev_info->default_rxportconf.burst_size = 1; 353 dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE; 354 dev_info->default_rxportconf.nb_queues = 1; 355 356 /* TX params */ 357 dev_info->default_txportconf.burst_size = 1; 358 dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE; 359 dev_info->default_txportconf.nb_queues = 1; 360 361 return 0; 362 } 363 364 static void 365 mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 366 struct rte_eth_txq_info *qinfo) 367 { 368 struct mana_txq *txq = dev->data->tx_queues[queue_id]; 369 370 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 371 qinfo->nb_desc = txq->num_desc; 372 } 373 374 static void 375 mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 376 struct rte_eth_rxq_info *qinfo) 377 { 378 struct mana_rxq *rxq = dev->data->rx_queues[queue_id]; 379 380 qinfo->mp = rxq->mp; 381 qinfo->nb_desc = rxq->num_desc; 382 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 383 } 384 385 static const uint32_t * 386 mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused) 387 { 388 static const uint32_t ptypes[] = { 389 RTE_PTYPE_L2_ETHER, 390 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 391 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 392 RTE_PTYPE_L4_FRAG, 393 RTE_PTYPE_L4_TCP, 394 RTE_PTYPE_L4_UDP, 395 RTE_PTYPE_UNKNOWN 396 }; 397 398 return ptypes; 399 } 400 401 static int 402 mana_rss_hash_update(struct rte_eth_dev *dev, 403 struct rte_eth_rss_conf *rss_conf) 404 { 405 struct mana_priv *priv = dev->data->dev_private; 406 407 /* Currently can only update RSS hash when device is stopped */ 408 if (dev->data->dev_started) { 409 DRV_LOG(ERR, "Can't update RSS after device has started"); 410 return -ENODEV; 411 } 412 413 if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) { 414 DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64, 415 dev->data->port_id, rss_conf->rss_hf); 416 return -EINVAL; 417 } 418 419 if (rss_conf->rss_key && rss_conf->rss_key_len) { 420 if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) { 421 DRV_LOG(ERR, "Port %u key len must be %u long", 422 dev->data->port_id, 423 TOEPLITZ_HASH_KEY_SIZE_IN_BYTES); 424 return -EINVAL; 425 } 426 427 priv->rss_conf.rss_key_len = rss_conf->rss_key_len; 428 priv->rss_conf.rss_key = 429 rte_zmalloc("mana_rss", rss_conf->rss_key_len, 430 RTE_CACHE_LINE_SIZE); 431 if (!priv->rss_conf.rss_key) 432 return -ENOMEM; 433 memcpy(priv->rss_conf.rss_key, rss_conf->rss_key, 434 rss_conf->rss_key_len); 435 } 436 priv->rss_conf.rss_hf = rss_conf->rss_hf; 437 438 return 0; 439 } 440 441 static int 442 mana_rss_hash_conf_get(struct rte_eth_dev *dev, 443 struct rte_eth_rss_conf *rss_conf) 444 { 445 struct mana_priv *priv = dev->data->dev_private; 446 447 if (!rss_conf) 448 return -EINVAL; 449 450 if (rss_conf->rss_key && 451 rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) { 452 memcpy(rss_conf->rss_key, priv->rss_conf.rss_key, 453 priv->rss_conf.rss_key_len); 454 } 455 456 rss_conf->rss_key_len = priv->rss_conf.rss_key_len; 457 rss_conf->rss_hf = priv->rss_conf.rss_hf; 458 459 return 0; 460 } 461 462 static int 463 mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 464 uint16_t nb_desc, unsigned int socket_id, 465 const struct rte_eth_txconf *tx_conf __rte_unused) 466 467 { 468 struct mana_priv *priv = dev->data->dev_private; 469 struct mana_txq *txq; 470 int ret; 471 472 txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id); 473 if (!txq) { 474 DRV_LOG(ERR, "failed to allocate txq"); 475 return -ENOMEM; 476 } 477 478 txq->socket = socket_id; 479 480 txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring", 481 sizeof(struct mana_txq_desc) * 482 nb_desc, 483 RTE_CACHE_LINE_SIZE, socket_id); 484 if (!txq->desc_ring) { 485 DRV_LOG(ERR, "failed to allocate txq desc_ring"); 486 ret = -ENOMEM; 487 goto fail; 488 } 489 490 txq->gdma_comp_buf = rte_malloc_socket("mana_txq_comp", 491 sizeof(*txq->gdma_comp_buf) * nb_desc, 492 RTE_CACHE_LINE_SIZE, socket_id); 493 if (!txq->gdma_comp_buf) { 494 DRV_LOG(ERR, "failed to allocate txq comp"); 495 ret = -ENOMEM; 496 goto fail; 497 } 498 499 ret = mana_mr_btree_init(&txq->mr_btree, 500 MANA_MR_BTREE_PER_QUEUE_N, socket_id); 501 if (ret) { 502 DRV_LOG(ERR, "Failed to init TXQ MR btree"); 503 goto fail; 504 } 505 506 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p", 507 queue_idx, nb_desc, socket_id, txq->desc_ring); 508 509 txq->desc_ring_head = 0; 510 txq->desc_ring_tail = 0; 511 txq->priv = priv; 512 txq->num_desc = nb_desc; 513 dev->data->tx_queues[queue_idx] = txq; 514 515 return 0; 516 517 fail: 518 rte_free(txq->gdma_comp_buf); 519 rte_free(txq->desc_ring); 520 rte_free(txq); 521 return ret; 522 } 523 524 static void 525 mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 526 { 527 struct mana_txq *txq = dev->data->tx_queues[qid]; 528 529 mana_mr_btree_free(&txq->mr_btree); 530 531 rte_free(txq->gdma_comp_buf); 532 rte_free(txq->desc_ring); 533 rte_free(txq); 534 } 535 536 static int 537 mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 538 uint16_t nb_desc, unsigned int socket_id, 539 const struct rte_eth_rxconf *rx_conf __rte_unused, 540 struct rte_mempool *mp) 541 { 542 struct mana_priv *priv = dev->data->dev_private; 543 struct mana_rxq *rxq; 544 int ret; 545 546 rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id); 547 if (!rxq) { 548 DRV_LOG(ERR, "failed to allocate rxq"); 549 return -ENOMEM; 550 } 551 552 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u", 553 queue_idx, nb_desc, socket_id); 554 555 rxq->socket = socket_id; 556 557 rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring", 558 sizeof(struct mana_rxq_desc) * 559 nb_desc, 560 RTE_CACHE_LINE_SIZE, socket_id); 561 562 if (!rxq->desc_ring) { 563 DRV_LOG(ERR, "failed to allocate rxq desc_ring"); 564 ret = -ENOMEM; 565 goto fail; 566 } 567 568 rxq->desc_ring_head = 0; 569 rxq->desc_ring_tail = 0; 570 571 rxq->gdma_comp_buf = rte_malloc_socket("mana_rxq_comp", 572 sizeof(*rxq->gdma_comp_buf) * nb_desc, 573 RTE_CACHE_LINE_SIZE, socket_id); 574 if (!rxq->gdma_comp_buf) { 575 DRV_LOG(ERR, "failed to allocate rxq comp"); 576 ret = -ENOMEM; 577 goto fail; 578 } 579 580 ret = mana_mr_btree_init(&rxq->mr_btree, 581 MANA_MR_BTREE_PER_QUEUE_N, socket_id); 582 if (ret) { 583 DRV_LOG(ERR, "Failed to init RXQ MR btree"); 584 goto fail; 585 } 586 587 rxq->priv = priv; 588 rxq->num_desc = nb_desc; 589 rxq->mp = mp; 590 dev->data->rx_queues[queue_idx] = rxq; 591 592 return 0; 593 594 fail: 595 rte_free(rxq->gdma_comp_buf); 596 rte_free(rxq->desc_ring); 597 rte_free(rxq); 598 return ret; 599 } 600 601 static void 602 mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 603 { 604 struct mana_rxq *rxq = dev->data->rx_queues[qid]; 605 606 mana_mr_btree_free(&rxq->mr_btree); 607 608 rte_free(rxq->gdma_comp_buf); 609 rte_free(rxq->desc_ring); 610 rte_free(rxq); 611 } 612 613 static int 614 mana_dev_link_update(struct rte_eth_dev *dev, 615 int wait_to_complete __rte_unused) 616 { 617 struct rte_eth_link link; 618 619 /* MANA has no concept of carrier state, always reporting UP */ 620 link = (struct rte_eth_link) { 621 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 622 .link_autoneg = RTE_ETH_LINK_SPEED_FIXED, 623 .link_speed = RTE_ETH_SPEED_NUM_100G, 624 .link_status = RTE_ETH_LINK_UP, 625 }; 626 627 return rte_eth_linkstatus_set(dev, &link); 628 } 629 630 static int 631 mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 632 { 633 unsigned int i; 634 635 for (i = 0; i < dev->data->nb_tx_queues; i++) { 636 struct mana_txq *txq = dev->data->tx_queues[i]; 637 638 if (!txq) 639 continue; 640 641 stats->opackets += txq->stats.packets; 642 stats->obytes += txq->stats.bytes; 643 stats->oerrors += txq->stats.errors; 644 645 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 646 stats->q_opackets[i] = txq->stats.packets; 647 stats->q_obytes[i] = txq->stats.bytes; 648 } 649 } 650 651 stats->rx_nombuf = 0; 652 for (i = 0; i < dev->data->nb_rx_queues; i++) { 653 struct mana_rxq *rxq = dev->data->rx_queues[i]; 654 655 if (!rxq) 656 continue; 657 658 stats->ipackets += rxq->stats.packets; 659 stats->ibytes += rxq->stats.bytes; 660 stats->ierrors += rxq->stats.errors; 661 662 /* There is no good way to get stats->imissed, not setting it */ 663 664 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 665 stats->q_ipackets[i] = rxq->stats.packets; 666 stats->q_ibytes[i] = rxq->stats.bytes; 667 } 668 669 stats->rx_nombuf += rxq->stats.nombuf; 670 } 671 672 return 0; 673 } 674 675 static int 676 mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused) 677 { 678 unsigned int i; 679 680 PMD_INIT_FUNC_TRACE(); 681 682 for (i = 0; i < dev->data->nb_tx_queues; i++) { 683 struct mana_txq *txq = dev->data->tx_queues[i]; 684 685 if (!txq) 686 continue; 687 688 memset(&txq->stats, 0, sizeof(txq->stats)); 689 } 690 691 for (i = 0; i < dev->data->nb_rx_queues; i++) { 692 struct mana_rxq *rxq = dev->data->rx_queues[i]; 693 694 if (!rxq) 695 continue; 696 697 memset(&rxq->stats, 0, sizeof(rxq->stats)); 698 } 699 700 return 0; 701 } 702 703 static const struct eth_dev_ops mana_dev_ops = { 704 .dev_configure = mana_dev_configure, 705 .dev_start = mana_dev_start, 706 .dev_stop = mana_dev_stop, 707 .dev_close = mana_dev_close, 708 .dev_infos_get = mana_dev_info_get, 709 .txq_info_get = mana_dev_tx_queue_info, 710 .rxq_info_get = mana_dev_rx_queue_info, 711 .dev_supported_ptypes_get = mana_supported_ptypes, 712 .rss_hash_update = mana_rss_hash_update, 713 .rss_hash_conf_get = mana_rss_hash_conf_get, 714 .tx_queue_setup = mana_dev_tx_queue_setup, 715 .tx_queue_release = mana_dev_tx_queue_release, 716 .rx_queue_setup = mana_dev_rx_queue_setup, 717 .rx_queue_release = mana_dev_rx_queue_release, 718 .rx_queue_intr_enable = mana_rx_intr_enable, 719 .rx_queue_intr_disable = mana_rx_intr_disable, 720 .link_update = mana_dev_link_update, 721 .stats_get = mana_dev_stats_get, 722 .stats_reset = mana_dev_stats_reset, 723 }; 724 725 static const struct eth_dev_ops mana_dev_secondary_ops = { 726 .stats_get = mana_dev_stats_get, 727 .stats_reset = mana_dev_stats_reset, 728 .dev_infos_get = mana_dev_info_get, 729 }; 730 731 uint16_t 732 mana_rx_burst_removed(void *dpdk_rxq __rte_unused, 733 struct rte_mbuf **pkts __rte_unused, 734 uint16_t pkts_n __rte_unused) 735 { 736 rte_mb(); 737 return 0; 738 } 739 740 uint16_t 741 mana_tx_burst_removed(void *dpdk_rxq __rte_unused, 742 struct rte_mbuf **pkts __rte_unused, 743 uint16_t pkts_n __rte_unused) 744 { 745 rte_mb(); 746 return 0; 747 } 748 749 #define ETH_MANA_MAC_ARG "mac" 750 static const char * const mana_init_args[] = { 751 ETH_MANA_MAC_ARG, 752 NULL, 753 }; 754 755 /* Support of parsing up to 8 mac address from EAL command line */ 756 #define MAX_NUM_ADDRESS 8 757 struct mana_conf { 758 struct rte_ether_addr mac_array[MAX_NUM_ADDRESS]; 759 unsigned int index; 760 }; 761 762 static int 763 mana_arg_parse_callback(const char *key, const char *val, void *private) 764 { 765 struct mana_conf *conf = (struct mana_conf *)private; 766 int ret; 767 768 DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index); 769 770 if (conf->index >= MAX_NUM_ADDRESS) { 771 DRV_LOG(ERR, "Exceeding max MAC address"); 772 return 1; 773 } 774 775 ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]); 776 if (ret) { 777 DRV_LOG(ERR, "Invalid MAC address %s", val); 778 return ret; 779 } 780 781 conf->index++; 782 783 return 0; 784 } 785 786 static int 787 mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf) 788 { 789 struct rte_kvargs *kvlist; 790 unsigned int arg_count; 791 int ret = 0; 792 793 kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args); 794 if (!kvlist) { 795 DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str); 796 return -EINVAL; 797 } 798 799 arg_count = rte_kvargs_count(kvlist, mana_init_args[0]); 800 if (arg_count > MAX_NUM_ADDRESS) { 801 ret = -EINVAL; 802 goto free_kvlist; 803 } 804 ret = rte_kvargs_process(kvlist, mana_init_args[0], 805 mana_arg_parse_callback, conf); 806 if (ret) { 807 DRV_LOG(ERR, "error parsing args"); 808 goto free_kvlist; 809 } 810 811 free_kvlist: 812 rte_kvargs_free(kvlist); 813 return ret; 814 } 815 816 static int 817 get_port_mac(struct ibv_device *device, unsigned int port, 818 struct rte_ether_addr *addr) 819 { 820 FILE *file; 821 int ret = 0; 822 DIR *dir; 823 struct dirent *dent; 824 unsigned int dev_port; 825 char mac[20]; 826 827 MANA_MKSTR(path, "%s/device/net", device->ibdev_path); 828 829 dir = opendir(path); 830 if (!dir) 831 return -ENOENT; 832 833 while ((dent = readdir(dir))) { 834 char *name = dent->d_name; 835 836 MANA_MKSTR(port_path, "%s/%s/dev_port", path, name); 837 838 /* Ignore . and .. */ 839 if ((name[0] == '.') && 840 ((name[1] == '\0') || 841 ((name[1] == '.') && (name[2] == '\0')))) 842 continue; 843 844 file = fopen(port_path, "r"); 845 if (!file) 846 continue; 847 848 ret = fscanf(file, "%u", &dev_port); 849 fclose(file); 850 851 if (ret != 1) 852 continue; 853 854 /* Ethernet ports start at 0, IB port start at 1 */ 855 if (dev_port == port - 1) { 856 MANA_MKSTR(address_path, "%s/%s/address", path, name); 857 858 file = fopen(address_path, "r"); 859 if (!file) 860 continue; 861 862 ret = fscanf(file, "%s", mac); 863 fclose(file); 864 865 if (ret < 0) 866 break; 867 868 ret = rte_ether_unformat_addr(mac, addr); 869 if (ret) 870 DRV_LOG(ERR, "unrecognized mac addr %s", mac); 871 break; 872 } 873 } 874 875 closedir(dir); 876 return ret; 877 } 878 879 static int 880 mana_ibv_device_to_pci_addr(const struct ibv_device *device, 881 struct rte_pci_addr *pci_addr) 882 { 883 FILE *file; 884 char *line = NULL; 885 size_t len = 0; 886 887 MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path); 888 889 file = fopen(path, "r"); 890 if (!file) 891 return -errno; 892 893 while (getline(&line, &len, file) != -1) { 894 /* Extract information. */ 895 if (sscanf(line, 896 "PCI_SLOT_NAME=" 897 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 898 &pci_addr->domain, 899 &pci_addr->bus, 900 &pci_addr->devid, 901 &pci_addr->function) == 4) { 902 break; 903 } 904 } 905 906 free(line); 907 fclose(file); 908 return 0; 909 } 910 911 /* 912 * Interrupt handler from IB layer to notify this device is being removed. 913 */ 914 static void 915 mana_intr_handler(void *arg) 916 { 917 struct mana_priv *priv = arg; 918 struct ibv_context *ctx = priv->ib_ctx; 919 struct ibv_async_event event; 920 921 /* Read and ack all messages from IB device */ 922 while (true) { 923 if (ibv_get_async_event(ctx, &event)) 924 break; 925 926 if (event.event_type == IBV_EVENT_DEVICE_FATAL) { 927 struct rte_eth_dev *dev; 928 929 dev = &rte_eth_devices[priv->port_id]; 930 if (dev->data->dev_conf.intr_conf.rmv) 931 rte_eth_dev_callback_process(dev, 932 RTE_ETH_EVENT_INTR_RMV, NULL); 933 } 934 935 ibv_ack_async_event(&event); 936 } 937 } 938 939 static int 940 mana_intr_uninstall(struct mana_priv *priv) 941 { 942 int ret; 943 944 ret = rte_intr_callback_unregister(priv->intr_handle, 945 mana_intr_handler, priv); 946 if (ret <= 0) { 947 DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret); 948 return ret; 949 } 950 951 rte_intr_instance_free(priv->intr_handle); 952 953 return 0; 954 } 955 956 int 957 mana_fd_set_non_blocking(int fd) 958 { 959 int ret = fcntl(fd, F_GETFL); 960 961 if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK)) 962 return 0; 963 964 rte_errno = errno; 965 return -rte_errno; 966 } 967 968 static int 969 mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv) 970 { 971 int ret; 972 struct ibv_context *ctx = priv->ib_ctx; 973 974 priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 975 if (!priv->intr_handle) { 976 DRV_LOG(ERR, "Failed to allocate intr_handle"); 977 rte_errno = ENOMEM; 978 return -ENOMEM; 979 } 980 981 ret = rte_intr_fd_set(priv->intr_handle, -1); 982 if (ret) 983 goto free_intr; 984 985 ret = mana_fd_set_non_blocking(ctx->async_fd); 986 if (ret) { 987 DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK"); 988 goto free_intr; 989 } 990 991 ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd); 992 if (ret) 993 goto free_intr; 994 995 ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT); 996 if (ret) 997 goto free_intr; 998 999 ret = rte_intr_callback_register(priv->intr_handle, 1000 mana_intr_handler, priv); 1001 if (ret) { 1002 DRV_LOG(ERR, "Failed to register intr callback"); 1003 rte_intr_fd_set(priv->intr_handle, -1); 1004 goto free_intr; 1005 } 1006 1007 eth_dev->intr_handle = priv->intr_handle; 1008 return 0; 1009 1010 free_intr: 1011 rte_intr_instance_free(priv->intr_handle); 1012 priv->intr_handle = NULL; 1013 1014 return ret; 1015 } 1016 1017 static int 1018 mana_proc_priv_init(struct rte_eth_dev *dev) 1019 { 1020 struct mana_process_priv *priv; 1021 1022 priv = rte_zmalloc_socket("mana_proc_priv", 1023 sizeof(struct mana_process_priv), 1024 RTE_CACHE_LINE_SIZE, 1025 dev->device->numa_node); 1026 if (!priv) 1027 return -ENOMEM; 1028 1029 dev->process_private = priv; 1030 return 0; 1031 } 1032 1033 /* 1034 * Map the doorbell page for the secondary process through IB device handle. 1035 */ 1036 static int 1037 mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd) 1038 { 1039 struct mana_process_priv *priv = eth_dev->process_private; 1040 1041 void *addr; 1042 1043 addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0); 1044 if (addr == MAP_FAILED) { 1045 DRV_LOG(ERR, "Failed to map secondary doorbell port %u", 1046 eth_dev->data->port_id); 1047 return -ENOMEM; 1048 } 1049 1050 DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr); 1051 1052 priv->db_page = addr; 1053 1054 return 0; 1055 } 1056 1057 /* Initialize shared data for the driver (all devices) */ 1058 static int 1059 mana_init_shared_data(void) 1060 { 1061 int ret = 0; 1062 const struct rte_memzone *secondary_mz; 1063 1064 rte_spinlock_lock(&mana_shared_data_lock); 1065 1066 /* Skip if shared data is already initialized */ 1067 if (mana_shared_data) 1068 goto exit; 1069 1070 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1071 mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA, 1072 sizeof(*mana_shared_data), 1073 SOCKET_ID_ANY, 0); 1074 if (!mana_shared_mz) { 1075 DRV_LOG(ERR, "Cannot allocate mana shared data"); 1076 ret = -rte_errno; 1077 goto exit; 1078 } 1079 1080 mana_shared_data = mana_shared_mz->addr; 1081 memset(mana_shared_data, 0, sizeof(*mana_shared_data)); 1082 rte_spinlock_init(&mana_shared_data->lock); 1083 } else { 1084 secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA); 1085 if (!secondary_mz) { 1086 DRV_LOG(ERR, "Cannot attach mana shared data"); 1087 ret = -rte_errno; 1088 goto exit; 1089 } 1090 1091 mana_shared_data = secondary_mz->addr; 1092 memset(&mana_local_data, 0, sizeof(mana_local_data)); 1093 } 1094 1095 exit: 1096 rte_spinlock_unlock(&mana_shared_data_lock); 1097 1098 return ret; 1099 } 1100 1101 /* 1102 * Init the data structures for use in primary and secondary processes. 1103 */ 1104 static int 1105 mana_init_once(void) 1106 { 1107 int ret; 1108 1109 ret = mana_init_shared_data(); 1110 if (ret) 1111 return ret; 1112 1113 rte_spinlock_lock(&mana_shared_data->lock); 1114 1115 switch (rte_eal_process_type()) { 1116 case RTE_PROC_PRIMARY: 1117 if (mana_shared_data->init_done) 1118 break; 1119 1120 ret = mana_mp_init_primary(); 1121 if (ret) 1122 break; 1123 DRV_LOG(ERR, "MP INIT PRIMARY"); 1124 1125 mana_shared_data->init_done = 1; 1126 break; 1127 1128 case RTE_PROC_SECONDARY: 1129 1130 if (mana_local_data.init_done) 1131 break; 1132 1133 ret = mana_mp_init_secondary(); 1134 if (ret) 1135 break; 1136 1137 DRV_LOG(ERR, "MP INIT SECONDARY"); 1138 1139 mana_local_data.init_done = 1; 1140 break; 1141 1142 default: 1143 /* Impossible, internal error */ 1144 ret = -EPROTO; 1145 break; 1146 } 1147 1148 rte_spinlock_unlock(&mana_shared_data->lock); 1149 1150 return ret; 1151 } 1152 1153 /* 1154 * Probe an IB port 1155 * Return value: 1156 * positive value: successfully probed port 1157 * 0: port not matching specified MAC address 1158 * negative value: error code 1159 */ 1160 static int 1161 mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr, 1162 uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr) 1163 { 1164 struct mana_priv *priv = NULL; 1165 struct rte_eth_dev *eth_dev = NULL; 1166 struct ibv_parent_domain_init_attr attr = {0}; 1167 char address[64]; 1168 char name[RTE_ETH_NAME_MAX_LEN]; 1169 int ret; 1170 struct ibv_context *ctx = NULL; 1171 1172 rte_ether_format_addr(address, sizeof(address), addr); 1173 DRV_LOG(INFO, "device located port %u address %s", port, address); 1174 1175 priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE, 1176 SOCKET_ID_ANY); 1177 if (!priv) 1178 return -ENOMEM; 1179 1180 snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port); 1181 1182 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1183 int fd; 1184 1185 eth_dev = rte_eth_dev_attach_secondary(name); 1186 if (!eth_dev) { 1187 DRV_LOG(ERR, "Can't attach to dev %s", name); 1188 ret = -ENOMEM; 1189 goto failed; 1190 } 1191 1192 eth_dev->device = &pci_dev->device; 1193 eth_dev->dev_ops = &mana_dev_secondary_ops; 1194 ret = mana_proc_priv_init(eth_dev); 1195 if (ret) 1196 goto failed; 1197 priv->process_priv = eth_dev->process_private; 1198 1199 /* Get the IB FD from the primary process */ 1200 fd = mana_mp_req_verbs_cmd_fd(eth_dev); 1201 if (fd < 0) { 1202 DRV_LOG(ERR, "Failed to get FD %d", fd); 1203 ret = -ENODEV; 1204 goto failed; 1205 } 1206 1207 ret = mana_map_doorbell_secondary(eth_dev, fd); 1208 if (ret) { 1209 DRV_LOG(ERR, "Failed secondary map %d", fd); 1210 goto failed; 1211 } 1212 1213 /* fd is no not used after mapping doorbell */ 1214 close(fd); 1215 1216 eth_dev->tx_pkt_burst = mana_tx_burst_removed; 1217 eth_dev->rx_pkt_burst = mana_rx_burst_removed; 1218 1219 rte_spinlock_lock(&mana_shared_data->lock); 1220 mana_shared_data->secondary_cnt++; 1221 mana_local_data.secondary_cnt++; 1222 rte_spinlock_unlock(&mana_shared_data->lock); 1223 1224 rte_eth_copy_pci_info(eth_dev, pci_dev); 1225 rte_eth_dev_probing_finish(eth_dev); 1226 1227 return 0; 1228 } 1229 1230 ctx = ibv_open_device(ibdev); 1231 if (!ctx) { 1232 DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name); 1233 ret = -ENODEV; 1234 goto failed; 1235 } 1236 1237 eth_dev = rte_eth_dev_allocate(name); 1238 if (!eth_dev) { 1239 ret = -ENOMEM; 1240 goto failed; 1241 } 1242 1243 eth_dev->data->mac_addrs = 1244 rte_calloc("mana_mac", 1, 1245 sizeof(struct rte_ether_addr), 0); 1246 if (!eth_dev->data->mac_addrs) { 1247 ret = -ENOMEM; 1248 goto failed; 1249 } 1250 1251 rte_ether_addr_copy(addr, eth_dev->data->mac_addrs); 1252 1253 priv->ib_pd = ibv_alloc_pd(ctx); 1254 if (!priv->ib_pd) { 1255 DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port); 1256 ret = -ENOMEM; 1257 goto failed; 1258 } 1259 1260 /* Create a parent domain with the port number */ 1261 attr.pd = priv->ib_pd; 1262 attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT; 1263 attr.pd_context = (void *)(uint64_t)port; 1264 priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr); 1265 if (!priv->ib_parent_pd) { 1266 DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port); 1267 ret = -ENOMEM; 1268 goto failed; 1269 } 1270 1271 priv->ib_ctx = ctx; 1272 priv->port_id = eth_dev->data->port_id; 1273 priv->dev_port = port; 1274 eth_dev->data->dev_private = priv; 1275 priv->dev_data = eth_dev->data; 1276 1277 priv->max_rx_queues = dev_attr->orig_attr.max_qp; 1278 priv->max_tx_queues = dev_attr->orig_attr.max_qp; 1279 1280 priv->max_rx_desc = 1281 RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1282 dev_attr->orig_attr.max_cqe); 1283 priv->max_tx_desc = 1284 RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1285 dev_attr->orig_attr.max_cqe); 1286 1287 priv->max_send_sge = dev_attr->orig_attr.max_sge; 1288 priv->max_recv_sge = dev_attr->orig_attr.max_sge; 1289 1290 priv->max_mr = dev_attr->orig_attr.max_mr; 1291 priv->max_mr_size = dev_attr->orig_attr.max_mr_size; 1292 1293 DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d", 1294 name, priv->max_rx_queues, priv->max_rx_desc, 1295 priv->max_send_sge); 1296 1297 rte_eth_copy_pci_info(eth_dev, pci_dev); 1298 1299 /* Create async interrupt handler */ 1300 ret = mana_intr_install(eth_dev, priv); 1301 if (ret) { 1302 DRV_LOG(ERR, "Failed to install intr handler"); 1303 goto failed; 1304 } 1305 1306 rte_spinlock_lock(&mana_shared_data->lock); 1307 mana_shared_data->primary_cnt++; 1308 rte_spinlock_unlock(&mana_shared_data->lock); 1309 1310 eth_dev->device = &pci_dev->device; 1311 1312 DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id); 1313 1314 eth_dev->rx_pkt_burst = mana_rx_burst_removed; 1315 eth_dev->tx_pkt_burst = mana_tx_burst_removed; 1316 eth_dev->dev_ops = &mana_dev_ops; 1317 1318 rte_eth_dev_probing_finish(eth_dev); 1319 1320 return 0; 1321 1322 failed: 1323 /* Free the resource for the port failed */ 1324 if (priv) { 1325 if (priv->ib_parent_pd) 1326 ibv_dealloc_pd(priv->ib_parent_pd); 1327 1328 if (priv->ib_pd) 1329 ibv_dealloc_pd(priv->ib_pd); 1330 } 1331 1332 if (eth_dev) 1333 rte_eth_dev_release_port(eth_dev); 1334 1335 rte_free(priv); 1336 1337 if (ctx) 1338 ibv_close_device(ctx); 1339 1340 return ret; 1341 } 1342 1343 /* 1344 * Goes through the IB device list to look for the IB port matching the 1345 * mac_addr. If found, create a rte_eth_dev for it. 1346 * Return value: number of successfully probed devices 1347 */ 1348 static int 1349 mana_pci_probe_mac(struct rte_pci_device *pci_dev, 1350 struct rte_ether_addr *mac_addr) 1351 { 1352 struct ibv_device **ibv_list; 1353 int ibv_idx; 1354 struct ibv_context *ctx; 1355 int num_devices; 1356 int ret; 1357 uint8_t port; 1358 int count = 0; 1359 1360 ibv_list = ibv_get_device_list(&num_devices); 1361 for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) { 1362 struct ibv_device *ibdev = ibv_list[ibv_idx]; 1363 struct rte_pci_addr pci_addr; 1364 struct ibv_device_attr_ex dev_attr; 1365 1366 DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s", 1367 ibdev->name, ibdev->dev_name, ibdev->ibdev_path); 1368 1369 if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr)) 1370 continue; 1371 1372 /* Ignore if this IB device is not this PCI device */ 1373 if (pci_dev->addr.domain != pci_addr.domain || 1374 pci_dev->addr.bus != pci_addr.bus || 1375 pci_dev->addr.devid != pci_addr.devid || 1376 pci_dev->addr.function != pci_addr.function) 1377 continue; 1378 1379 ctx = ibv_open_device(ibdev); 1380 if (!ctx) { 1381 DRV_LOG(ERR, "Failed to open IB device %s", 1382 ibdev->name); 1383 continue; 1384 } 1385 ret = ibv_query_device_ex(ctx, NULL, &dev_attr); 1386 ibv_close_device(ctx); 1387 1388 if (ret) { 1389 DRV_LOG(ERR, "Failed to query IB device %s", 1390 ibdev->name); 1391 continue; 1392 } 1393 1394 for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt; 1395 port++) { 1396 struct rte_ether_addr addr; 1397 ret = get_port_mac(ibdev, port, &addr); 1398 if (ret) 1399 continue; 1400 1401 if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr)) 1402 continue; 1403 1404 ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr); 1405 if (ret) { 1406 DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret); 1407 } else { 1408 count++; 1409 DRV_LOG(INFO, "Successfully probed on IB port %u", port); 1410 } 1411 } 1412 } 1413 1414 ibv_free_device_list(ibv_list); 1415 return count; 1416 } 1417 1418 /* 1419 * Main callback function from PCI bus to probe a device. 1420 */ 1421 static int 1422 mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1423 struct rte_pci_device *pci_dev) 1424 { 1425 struct rte_devargs *args = pci_dev->device.devargs; 1426 struct mana_conf conf = {0}; 1427 unsigned int i; 1428 int ret; 1429 int count = 0; 1430 1431 if (args && args->drv_str) { 1432 ret = mana_parse_args(args, &conf); 1433 if (ret) { 1434 DRV_LOG(ERR, "Failed to parse parameters args = %s", 1435 args->drv_str); 1436 return ret; 1437 } 1438 } 1439 1440 ret = mana_init_once(); 1441 if (ret) { 1442 DRV_LOG(ERR, "Failed to init PMD global data %d", ret); 1443 return ret; 1444 } 1445 1446 /* If there are no driver parameters, probe on all ports */ 1447 if (conf.index) { 1448 for (i = 0; i < conf.index; i++) 1449 count += mana_pci_probe_mac(pci_dev, 1450 &conf.mac_array[i]); 1451 } else { 1452 count = mana_pci_probe_mac(pci_dev, NULL); 1453 } 1454 1455 if (!count) { 1456 rte_memzone_free(mana_shared_mz); 1457 mana_shared_mz = NULL; 1458 ret = -ENODEV; 1459 } 1460 1461 return ret; 1462 } 1463 1464 static int 1465 mana_dev_uninit(struct rte_eth_dev *dev) 1466 { 1467 return mana_dev_close(dev); 1468 } 1469 1470 /* 1471 * Callback from PCI to remove this device. 1472 */ 1473 static int 1474 mana_pci_remove(struct rte_pci_device *pci_dev) 1475 { 1476 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1477 rte_spinlock_lock(&mana_shared_data_lock); 1478 1479 rte_spinlock_lock(&mana_shared_data->lock); 1480 1481 RTE_VERIFY(mana_shared_data->primary_cnt > 0); 1482 mana_shared_data->primary_cnt--; 1483 if (!mana_shared_data->primary_cnt) { 1484 DRV_LOG(DEBUG, "mp uninit primary"); 1485 mana_mp_uninit_primary(); 1486 } 1487 1488 rte_spinlock_unlock(&mana_shared_data->lock); 1489 1490 /* Also free the shared memory if this is the last */ 1491 if (!mana_shared_data->primary_cnt) { 1492 DRV_LOG(DEBUG, "free shared memezone data"); 1493 rte_memzone_free(mana_shared_mz); 1494 mana_shared_mz = NULL; 1495 } 1496 1497 rte_spinlock_unlock(&mana_shared_data_lock); 1498 } else { 1499 rte_spinlock_lock(&mana_shared_data_lock); 1500 1501 rte_spinlock_lock(&mana_shared_data->lock); 1502 RTE_VERIFY(mana_shared_data->secondary_cnt > 0); 1503 mana_shared_data->secondary_cnt--; 1504 rte_spinlock_unlock(&mana_shared_data->lock); 1505 1506 RTE_VERIFY(mana_local_data.secondary_cnt > 0); 1507 mana_local_data.secondary_cnt--; 1508 if (!mana_local_data.secondary_cnt) { 1509 DRV_LOG(DEBUG, "mp uninit secondary"); 1510 mana_mp_uninit_secondary(); 1511 } 1512 1513 rte_spinlock_unlock(&mana_shared_data_lock); 1514 } 1515 1516 return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit); 1517 } 1518 1519 static const struct rte_pci_id mana_pci_id_map[] = { 1520 { 1521 RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 1522 PCI_DEVICE_ID_MICROSOFT_MANA) 1523 }, 1524 { 1525 .vendor_id = 0 1526 }, 1527 }; 1528 1529 static struct rte_pci_driver mana_pci_driver = { 1530 .id_table = mana_pci_id_map, 1531 .probe = mana_pci_probe, 1532 .remove = mana_pci_remove, 1533 .drv_flags = RTE_PCI_DRV_INTR_RMV, 1534 }; 1535 1536 RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver); 1537 RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map); 1538 RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib"); 1539 RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE); 1540 RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE); 1541 RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>"); 1542