1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 5 #include <unistd.h> 6 #include <dirent.h> 7 #include <fcntl.h> 8 #include <sys/mman.h> 9 #include <sys/ioctl.h> 10 #include <net/if.h> 11 12 #include <ethdev_driver.h> 13 #include <ethdev_pci.h> 14 #include <rte_kvargs.h> 15 #include <rte_eal_paging.h> 16 #include <rte_pci.h> 17 18 #include <infiniband/verbs.h> 19 #include <infiniband/manadv.h> 20 21 #include <assert.h> 22 23 #include "mana.h" 24 25 /* Shared memory between primary/secondary processes, per driver */ 26 /* Data to track primary/secondary usage */ 27 struct mana_shared_data *mana_shared_data; 28 static struct mana_shared_data mana_local_data; 29 30 /* The memory region for the above data */ 31 static const struct rte_memzone *mana_shared_mz; 32 static const char *MZ_MANA_SHARED_DATA = "mana_shared_data"; 33 34 /* Spinlock for mana_shared_data */ 35 static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 36 37 /* Allocate a buffer on the stack and fill it with a printf format string. */ 38 #define MANA_MKSTR(name, ...) \ 39 int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ 40 char name[mkstr_size_##name + 1]; \ 41 \ 42 memset(name, 0, mkstr_size_##name + 1); \ 43 snprintf(name, sizeof(name), "" __VA_ARGS__) 44 45 int mana_logtype_driver; 46 int mana_logtype_init; 47 48 /* 49 * Callback from rdma-core to allocate a buffer for a queue. 50 */ 51 void * 52 mana_alloc_verbs_buf(size_t size, void *data) 53 { 54 void *ret; 55 size_t alignment = rte_mem_page_size(); 56 int socket = (int)(uintptr_t)data; 57 58 DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket); 59 60 if (alignment == (size_t)-1) { 61 DRV_LOG(ERR, "Failed to get mem page size"); 62 rte_errno = ENOMEM; 63 return NULL; 64 } 65 66 ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket); 67 if (!ret && size) 68 rte_errno = ENOMEM; 69 return ret; 70 } 71 72 void 73 mana_free_verbs_buf(void *ptr, void *data __rte_unused) 74 { 75 rte_free(ptr); 76 } 77 78 static int 79 mana_dev_configure(struct rte_eth_dev *dev) 80 { 81 struct mana_priv *priv = dev->data->dev_private; 82 struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 83 84 if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 85 dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 86 87 if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) { 88 DRV_LOG(ERR, "Only support equal number of rx/tx queues"); 89 return -EINVAL; 90 } 91 92 if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { 93 DRV_LOG(ERR, "number of TX/RX queues must be power of 2"); 94 return -EINVAL; 95 } 96 97 priv->vlan_strip = !!(dev_conf->rxmode.offloads & 98 RTE_ETH_RX_OFFLOAD_VLAN_STRIP); 99 100 priv->num_queues = dev->data->nb_rx_queues; 101 102 manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS, 103 (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 104 .alloc = &mana_alloc_verbs_buf, 105 .free = &mana_free_verbs_buf, 106 .data = 0, 107 })); 108 109 return 0; 110 } 111 112 static void 113 rx_intr_vec_disable(struct mana_priv *priv) 114 { 115 struct rte_intr_handle *intr_handle = priv->intr_handle; 116 117 rte_intr_free_epoll_fd(intr_handle); 118 rte_intr_vec_list_free(intr_handle); 119 rte_intr_nb_efd_set(intr_handle, 0); 120 } 121 122 static int 123 rx_intr_vec_enable(struct mana_priv *priv) 124 { 125 unsigned int i; 126 unsigned int rxqs_n = priv->dev_data->nb_rx_queues; 127 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 128 struct rte_intr_handle *intr_handle = priv->intr_handle; 129 int ret; 130 131 rx_intr_vec_disable(priv); 132 133 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 134 DRV_LOG(ERR, "Failed to allocate memory for interrupt vector"); 135 return -ENOMEM; 136 } 137 138 for (i = 0; i < n; i++) { 139 struct mana_rxq *rxq = priv->dev_data->rx_queues[i]; 140 141 ret = rte_intr_vec_list_index_set(intr_handle, i, 142 RTE_INTR_VEC_RXTX_OFFSET + i); 143 if (ret) { 144 DRV_LOG(ERR, "Failed to set intr vec %u", i); 145 return ret; 146 } 147 148 ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd); 149 if (ret) { 150 DRV_LOG(ERR, "Failed to set FD at intr %u", i); 151 return ret; 152 } 153 } 154 155 return rte_intr_nb_efd_set(intr_handle, n); 156 } 157 158 static void 159 rxq_intr_disable(struct mana_priv *priv) 160 { 161 int err = rte_errno; 162 163 rx_intr_vec_disable(priv); 164 rte_errno = err; 165 } 166 167 static int 168 rxq_intr_enable(struct mana_priv *priv) 169 { 170 const struct rte_eth_intr_conf *const intr_conf = 171 &priv->dev_data->dev_conf.intr_conf; 172 173 if (!intr_conf->rxq) 174 return 0; 175 176 return rx_intr_vec_enable(priv); 177 } 178 179 static int 180 mana_dev_start(struct rte_eth_dev *dev) 181 { 182 int ret; 183 struct mana_priv *priv = dev->data->dev_private; 184 185 rte_spinlock_init(&priv->mr_btree_lock); 186 ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N, 187 dev->device->numa_node); 188 if (ret) { 189 DRV_LOG(ERR, "Failed to init device MR btree %d", ret); 190 return ret; 191 } 192 193 ret = mana_start_tx_queues(dev); 194 if (ret) { 195 DRV_LOG(ERR, "failed to start tx queues %d", ret); 196 goto failed_tx; 197 } 198 199 ret = mana_start_rx_queues(dev); 200 if (ret) { 201 DRV_LOG(ERR, "failed to start rx queues %d", ret); 202 goto failed_rx; 203 } 204 205 rte_wmb(); 206 207 dev->tx_pkt_burst = mana_tx_burst; 208 dev->rx_pkt_burst = mana_rx_burst; 209 210 DRV_LOG(INFO, "TX/RX queues have started"); 211 212 /* Enable datapath for secondary processes */ 213 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX); 214 215 ret = rxq_intr_enable(priv); 216 if (ret) { 217 DRV_LOG(ERR, "Failed to enable RX interrupts"); 218 goto failed_intr; 219 } 220 221 return 0; 222 223 failed_intr: 224 mana_stop_rx_queues(dev); 225 226 failed_rx: 227 mana_stop_tx_queues(dev); 228 229 failed_tx: 230 mana_mr_btree_free(&priv->mr_btree); 231 232 return ret; 233 } 234 235 static int 236 mana_dev_stop(struct rte_eth_dev *dev) 237 { 238 int ret; 239 struct mana_priv *priv = dev->data->dev_private; 240 241 rxq_intr_disable(priv); 242 243 dev->tx_pkt_burst = mana_tx_burst_removed; 244 dev->rx_pkt_burst = mana_rx_burst_removed; 245 246 /* Stop datapath on secondary processes */ 247 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX); 248 249 rte_wmb(); 250 251 ret = mana_stop_tx_queues(dev); 252 if (ret) { 253 DRV_LOG(ERR, "failed to stop tx queues"); 254 return ret; 255 } 256 257 ret = mana_stop_rx_queues(dev); 258 if (ret) { 259 DRV_LOG(ERR, "failed to stop tx queues"); 260 return ret; 261 } 262 263 return 0; 264 } 265 266 static int mana_intr_uninstall(struct mana_priv *priv); 267 268 static int 269 mana_dev_close(struct rte_eth_dev *dev) 270 { 271 struct mana_priv *priv = dev->data->dev_private; 272 int ret; 273 274 mana_remove_all_mr(priv); 275 276 ret = mana_intr_uninstall(priv); 277 if (ret) 278 return ret; 279 280 ret = ibv_close_device(priv->ib_ctx); 281 if (ret) { 282 ret = errno; 283 return ret; 284 } 285 286 return 0; 287 } 288 289 static int 290 mana_dev_info_get(struct rte_eth_dev *dev, 291 struct rte_eth_dev_info *dev_info) 292 { 293 struct mana_priv *priv = dev->data->dev_private; 294 295 dev_info->min_mtu = RTE_ETHER_MIN_MTU; 296 dev_info->max_mtu = MANA_MAX_MTU; 297 298 /* RX params */ 299 dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE; 300 dev_info->max_rx_pktlen = MANA_MAX_MTU + RTE_ETHER_HDR_LEN; 301 302 dev_info->max_rx_queues = RTE_MIN(priv->max_rx_queues, UINT16_MAX); 303 dev_info->max_tx_queues = RTE_MIN(priv->max_tx_queues, UINT16_MAX); 304 305 dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR; 306 dev_info->max_hash_mac_addrs = 0; 307 308 dev_info->max_vfs = 1; 309 310 /* Offload params */ 311 dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT; 312 313 dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT; 314 315 /* RSS */ 316 dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS; 317 dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES; 318 dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT; 319 320 /* Thresholds */ 321 dev_info->default_rxconf = (struct rte_eth_rxconf){ 322 .rx_thresh = { 323 .pthresh = 8, 324 .hthresh = 8, 325 .wthresh = 0, 326 }, 327 .rx_free_thresh = 32, 328 /* If no descriptors available, pkts are dropped by default */ 329 .rx_drop_en = 1, 330 }; 331 332 dev_info->default_txconf = (struct rte_eth_txconf){ 333 .tx_thresh = { 334 .pthresh = 32, 335 .hthresh = 0, 336 .wthresh = 0, 337 }, 338 .tx_rs_thresh = 32, 339 .tx_free_thresh = 32, 340 }; 341 342 /* Buffer limits */ 343 dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 344 dev_info->rx_desc_lim.nb_max = RTE_MIN(priv->max_rx_desc, UINT16_MAX); 345 dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 346 dev_info->rx_desc_lim.nb_seg_max = 347 RTE_MIN(priv->max_recv_sge, UINT16_MAX); 348 dev_info->rx_desc_lim.nb_mtu_seg_max = 349 RTE_MIN(priv->max_recv_sge, UINT16_MAX); 350 351 dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 352 dev_info->tx_desc_lim.nb_max = RTE_MIN(priv->max_tx_desc, UINT16_MAX); 353 dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 354 dev_info->tx_desc_lim.nb_seg_max = 355 RTE_MIN(priv->max_send_sge, UINT16_MAX); 356 dev_info->tx_desc_lim.nb_mtu_seg_max = 357 RTE_MIN(priv->max_send_sge, UINT16_MAX); 358 359 /* Speed */ 360 dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G; 361 362 /* RX params */ 363 dev_info->default_rxportconf.burst_size = 1; 364 dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE; 365 dev_info->default_rxportconf.nb_queues = 1; 366 367 /* TX params */ 368 dev_info->default_txportconf.burst_size = 1; 369 dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE; 370 dev_info->default_txportconf.nb_queues = 1; 371 372 return 0; 373 } 374 375 static void 376 mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 377 struct rte_eth_txq_info *qinfo) 378 { 379 struct mana_txq *txq = dev->data->tx_queues[queue_id]; 380 381 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 382 qinfo->nb_desc = txq->num_desc; 383 } 384 385 static void 386 mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 387 struct rte_eth_rxq_info *qinfo) 388 { 389 struct mana_rxq *rxq = dev->data->rx_queues[queue_id]; 390 391 qinfo->mp = rxq->mp; 392 qinfo->nb_desc = rxq->num_desc; 393 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 394 } 395 396 static const uint32_t * 397 mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused, 398 size_t *no_of_elements) 399 { 400 static const uint32_t ptypes[] = { 401 RTE_PTYPE_L2_ETHER, 402 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 403 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 404 RTE_PTYPE_L4_FRAG, 405 RTE_PTYPE_L4_TCP, 406 RTE_PTYPE_L4_UDP, 407 }; 408 409 *no_of_elements = RTE_DIM(ptypes); 410 return ptypes; 411 } 412 413 static int 414 mana_rss_hash_update(struct rte_eth_dev *dev, 415 struct rte_eth_rss_conf *rss_conf) 416 { 417 struct mana_priv *priv = dev->data->dev_private; 418 419 /* Currently can only update RSS hash when device is stopped */ 420 if (dev->data->dev_started) { 421 DRV_LOG(ERR, "Can't update RSS after device has started"); 422 return -ENODEV; 423 } 424 425 if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) { 426 DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64, 427 dev->data->port_id, rss_conf->rss_hf); 428 return -EINVAL; 429 } 430 431 if (rss_conf->rss_key && rss_conf->rss_key_len) { 432 if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) { 433 DRV_LOG(ERR, "Port %u key len must be %u long", 434 dev->data->port_id, 435 TOEPLITZ_HASH_KEY_SIZE_IN_BYTES); 436 return -EINVAL; 437 } 438 439 priv->rss_conf.rss_key_len = rss_conf->rss_key_len; 440 priv->rss_conf.rss_key = 441 rte_zmalloc("mana_rss", rss_conf->rss_key_len, 442 RTE_CACHE_LINE_SIZE); 443 if (!priv->rss_conf.rss_key) 444 return -ENOMEM; 445 memcpy(priv->rss_conf.rss_key, rss_conf->rss_key, 446 rss_conf->rss_key_len); 447 } 448 priv->rss_conf.rss_hf = rss_conf->rss_hf; 449 450 return 0; 451 } 452 453 static int 454 mana_rss_hash_conf_get(struct rte_eth_dev *dev, 455 struct rte_eth_rss_conf *rss_conf) 456 { 457 struct mana_priv *priv = dev->data->dev_private; 458 459 if (!rss_conf) 460 return -EINVAL; 461 462 if (rss_conf->rss_key && 463 rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) { 464 memcpy(rss_conf->rss_key, priv->rss_conf.rss_key, 465 priv->rss_conf.rss_key_len); 466 } 467 468 rss_conf->rss_key_len = priv->rss_conf.rss_key_len; 469 rss_conf->rss_hf = priv->rss_conf.rss_hf; 470 471 return 0; 472 } 473 474 static int 475 mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 476 uint16_t nb_desc, unsigned int socket_id, 477 const struct rte_eth_txconf *tx_conf __rte_unused) 478 479 { 480 struct mana_priv *priv = dev->data->dev_private; 481 struct mana_txq *txq; 482 int ret; 483 484 txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id); 485 if (!txq) { 486 DRV_LOG(ERR, "failed to allocate txq"); 487 return -ENOMEM; 488 } 489 490 txq->socket = socket_id; 491 492 txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring", 493 sizeof(struct mana_txq_desc) * 494 nb_desc, 495 RTE_CACHE_LINE_SIZE, socket_id); 496 if (!txq->desc_ring) { 497 DRV_LOG(ERR, "failed to allocate txq desc_ring"); 498 ret = -ENOMEM; 499 goto fail; 500 } 501 502 txq->gdma_comp_buf = rte_malloc_socket("mana_txq_comp", 503 sizeof(*txq->gdma_comp_buf) * nb_desc, 504 RTE_CACHE_LINE_SIZE, socket_id); 505 if (!txq->gdma_comp_buf) { 506 DRV_LOG(ERR, "failed to allocate txq comp"); 507 ret = -ENOMEM; 508 goto fail; 509 } 510 511 ret = mana_mr_btree_init(&txq->mr_btree, 512 MANA_MR_BTREE_PER_QUEUE_N, socket_id); 513 if (ret) { 514 DRV_LOG(ERR, "Failed to init TXQ MR btree"); 515 goto fail; 516 } 517 518 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p", 519 queue_idx, nb_desc, socket_id, txq->desc_ring); 520 521 txq->desc_ring_head = 0; 522 txq->desc_ring_tail = 0; 523 txq->priv = priv; 524 txq->num_desc = nb_desc; 525 dev->data->tx_queues[queue_idx] = txq; 526 527 return 0; 528 529 fail: 530 rte_free(txq->gdma_comp_buf); 531 rte_free(txq->desc_ring); 532 rte_free(txq); 533 return ret; 534 } 535 536 static void 537 mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 538 { 539 struct mana_txq *txq = dev->data->tx_queues[qid]; 540 541 mana_mr_btree_free(&txq->mr_btree); 542 543 rte_free(txq->gdma_comp_buf); 544 rte_free(txq->desc_ring); 545 rte_free(txq); 546 } 547 548 static int 549 mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 550 uint16_t nb_desc, unsigned int socket_id, 551 const struct rte_eth_rxconf *rx_conf __rte_unused, 552 struct rte_mempool *mp) 553 { 554 struct mana_priv *priv = dev->data->dev_private; 555 struct mana_rxq *rxq; 556 int ret; 557 558 rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id); 559 if (!rxq) { 560 DRV_LOG(ERR, "failed to allocate rxq"); 561 return -ENOMEM; 562 } 563 564 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u", 565 queue_idx, nb_desc, socket_id); 566 567 rxq->socket = socket_id; 568 569 rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring", 570 sizeof(struct mana_rxq_desc) * 571 nb_desc, 572 RTE_CACHE_LINE_SIZE, socket_id); 573 574 if (!rxq->desc_ring) { 575 DRV_LOG(ERR, "failed to allocate rxq desc_ring"); 576 ret = -ENOMEM; 577 goto fail; 578 } 579 580 rxq->desc_ring_head = 0; 581 rxq->desc_ring_tail = 0; 582 583 rxq->gdma_comp_buf = rte_malloc_socket("mana_rxq_comp", 584 sizeof(*rxq->gdma_comp_buf) * nb_desc, 585 RTE_CACHE_LINE_SIZE, socket_id); 586 if (!rxq->gdma_comp_buf) { 587 DRV_LOG(ERR, "failed to allocate rxq comp"); 588 ret = -ENOMEM; 589 goto fail; 590 } 591 592 ret = mana_mr_btree_init(&rxq->mr_btree, 593 MANA_MR_BTREE_PER_QUEUE_N, socket_id); 594 if (ret) { 595 DRV_LOG(ERR, "Failed to init RXQ MR btree"); 596 goto fail; 597 } 598 599 rxq->priv = priv; 600 rxq->num_desc = nb_desc; 601 rxq->mp = mp; 602 dev->data->rx_queues[queue_idx] = rxq; 603 604 return 0; 605 606 fail: 607 rte_free(rxq->gdma_comp_buf); 608 rte_free(rxq->desc_ring); 609 rte_free(rxq); 610 return ret; 611 } 612 613 static void 614 mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 615 { 616 struct mana_rxq *rxq = dev->data->rx_queues[qid]; 617 618 mana_mr_btree_free(&rxq->mr_btree); 619 620 rte_free(rxq->gdma_comp_buf); 621 rte_free(rxq->desc_ring); 622 rte_free(rxq); 623 } 624 625 static int 626 mana_dev_link_update(struct rte_eth_dev *dev, 627 int wait_to_complete __rte_unused) 628 { 629 struct rte_eth_link link; 630 631 /* MANA has no concept of carrier state, always reporting UP */ 632 link = (struct rte_eth_link) { 633 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 634 .link_autoneg = RTE_ETH_LINK_SPEED_FIXED, 635 .link_speed = RTE_ETH_SPEED_NUM_100G, 636 .link_status = RTE_ETH_LINK_UP, 637 }; 638 639 return rte_eth_linkstatus_set(dev, &link); 640 } 641 642 static int 643 mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 644 { 645 unsigned int i; 646 647 for (i = 0; i < dev->data->nb_tx_queues; i++) { 648 struct mana_txq *txq = dev->data->tx_queues[i]; 649 650 if (!txq) 651 continue; 652 653 stats->opackets += txq->stats.packets; 654 stats->obytes += txq->stats.bytes; 655 stats->oerrors += txq->stats.errors; 656 657 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 658 stats->q_opackets[i] = txq->stats.packets; 659 stats->q_obytes[i] = txq->stats.bytes; 660 } 661 } 662 663 stats->rx_nombuf = 0; 664 for (i = 0; i < dev->data->nb_rx_queues; i++) { 665 struct mana_rxq *rxq = dev->data->rx_queues[i]; 666 667 if (!rxq) 668 continue; 669 670 stats->ipackets += rxq->stats.packets; 671 stats->ibytes += rxq->stats.bytes; 672 stats->ierrors += rxq->stats.errors; 673 674 /* There is no good way to get stats->imissed, not setting it */ 675 676 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 677 stats->q_ipackets[i] = rxq->stats.packets; 678 stats->q_ibytes[i] = rxq->stats.bytes; 679 } 680 681 stats->rx_nombuf += rxq->stats.nombuf; 682 } 683 684 return 0; 685 } 686 687 static int 688 mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused) 689 { 690 unsigned int i; 691 692 PMD_INIT_FUNC_TRACE(); 693 694 for (i = 0; i < dev->data->nb_tx_queues; i++) { 695 struct mana_txq *txq = dev->data->tx_queues[i]; 696 697 if (!txq) 698 continue; 699 700 memset(&txq->stats, 0, sizeof(txq->stats)); 701 } 702 703 for (i = 0; i < dev->data->nb_rx_queues; i++) { 704 struct mana_rxq *rxq = dev->data->rx_queues[i]; 705 706 if (!rxq) 707 continue; 708 709 memset(&rxq->stats, 0, sizeof(rxq->stats)); 710 } 711 712 return 0; 713 } 714 715 static int 716 mana_get_ifname(const struct mana_priv *priv, char (*ifname)[IF_NAMESIZE]) 717 { 718 int ret = -ENODEV; 719 DIR *dir; 720 struct dirent *dent; 721 722 MANA_MKSTR(dirpath, "%s/device/net", priv->ib_ctx->device->ibdev_path); 723 724 dir = opendir(dirpath); 725 if (dir == NULL) 726 return -ENODEV; 727 728 while ((dent = readdir(dir)) != NULL) { 729 char *name = dent->d_name; 730 FILE *file; 731 struct rte_ether_addr addr; 732 char *mac = NULL; 733 734 if ((name[0] == '.') && 735 ((name[1] == '\0') || 736 ((name[1] == '.') && (name[2] == '\0')))) 737 continue; 738 739 MANA_MKSTR(path, "%s/%s/address", dirpath, name); 740 741 file = fopen(path, "r"); 742 if (!file) { 743 ret = -ENODEV; 744 break; 745 } 746 747 ret = fscanf(file, "%ms", &mac); 748 fclose(file); 749 750 if (ret <= 0) { 751 ret = -EINVAL; 752 break; 753 } 754 755 ret = rte_ether_unformat_addr(mac, &addr); 756 free(mac); 757 if (ret) 758 break; 759 760 if (rte_is_same_ether_addr(&addr, priv->dev_data->mac_addrs)) { 761 strlcpy(*ifname, name, sizeof(*ifname)); 762 ret = 0; 763 break; 764 } 765 } 766 767 closedir(dir); 768 return ret; 769 } 770 771 static int 772 mana_ifreq(const struct mana_priv *priv, int req, struct ifreq *ifr) 773 { 774 int sock, ret; 775 776 sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 777 if (sock == -1) 778 return -errno; 779 780 ret = mana_get_ifname(priv, &ifr->ifr_name); 781 if (ret) { 782 close(sock); 783 return ret; 784 } 785 786 if (ioctl(sock, req, ifr) == -1) 787 ret = -errno; 788 789 close(sock); 790 791 return ret; 792 } 793 794 static int 795 mana_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 796 { 797 struct mana_priv *priv = dev->data->dev_private; 798 struct ifreq request = { .ifr_mtu = mtu, }; 799 800 return mana_ifreq(priv, SIOCSIFMTU, &request); 801 } 802 803 static const struct eth_dev_ops mana_dev_ops = { 804 .dev_configure = mana_dev_configure, 805 .dev_start = mana_dev_start, 806 .dev_stop = mana_dev_stop, 807 .dev_close = mana_dev_close, 808 .dev_infos_get = mana_dev_info_get, 809 .txq_info_get = mana_dev_tx_queue_info, 810 .rxq_info_get = mana_dev_rx_queue_info, 811 .dev_supported_ptypes_get = mana_supported_ptypes, 812 .rss_hash_update = mana_rss_hash_update, 813 .rss_hash_conf_get = mana_rss_hash_conf_get, 814 .tx_queue_setup = mana_dev_tx_queue_setup, 815 .tx_queue_release = mana_dev_tx_queue_release, 816 .rx_queue_setup = mana_dev_rx_queue_setup, 817 .rx_queue_release = mana_dev_rx_queue_release, 818 .rx_queue_intr_enable = mana_rx_intr_enable, 819 .rx_queue_intr_disable = mana_rx_intr_disable, 820 .link_update = mana_dev_link_update, 821 .stats_get = mana_dev_stats_get, 822 .stats_reset = mana_dev_stats_reset, 823 .mtu_set = mana_mtu_set, 824 }; 825 826 static const struct eth_dev_ops mana_dev_secondary_ops = { 827 .stats_get = mana_dev_stats_get, 828 .stats_reset = mana_dev_stats_reset, 829 .dev_infos_get = mana_dev_info_get, 830 }; 831 832 uint16_t 833 mana_rx_burst_removed(void *dpdk_rxq __rte_unused, 834 struct rte_mbuf **pkts __rte_unused, 835 uint16_t pkts_n __rte_unused) 836 { 837 rte_mb(); 838 return 0; 839 } 840 841 uint16_t 842 mana_tx_burst_removed(void *dpdk_rxq __rte_unused, 843 struct rte_mbuf **pkts __rte_unused, 844 uint16_t pkts_n __rte_unused) 845 { 846 rte_mb(); 847 return 0; 848 } 849 850 #define ETH_MANA_MAC_ARG "mac" 851 static const char * const mana_init_args[] = { 852 ETH_MANA_MAC_ARG, 853 NULL, 854 }; 855 856 /* Support of parsing up to 8 mac address from EAL command line */ 857 #define MAX_NUM_ADDRESS 8 858 struct mana_conf { 859 struct rte_ether_addr mac_array[MAX_NUM_ADDRESS]; 860 unsigned int index; 861 }; 862 863 static int 864 mana_arg_parse_callback(const char *key, const char *val, void *private) 865 { 866 struct mana_conf *conf = (struct mana_conf *)private; 867 int ret; 868 869 DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index); 870 871 if (conf->index >= MAX_NUM_ADDRESS) { 872 DRV_LOG(ERR, "Exceeding max MAC address"); 873 return 1; 874 } 875 876 ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]); 877 if (ret) { 878 DRV_LOG(ERR, "Invalid MAC address %s", val); 879 return ret; 880 } 881 882 conf->index++; 883 884 return 0; 885 } 886 887 static int 888 mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf) 889 { 890 struct rte_kvargs *kvlist; 891 unsigned int arg_count; 892 int ret = 0; 893 894 kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args); 895 if (!kvlist) { 896 DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str); 897 return -EINVAL; 898 } 899 900 arg_count = rte_kvargs_count(kvlist, mana_init_args[0]); 901 if (arg_count > MAX_NUM_ADDRESS) { 902 ret = -EINVAL; 903 goto free_kvlist; 904 } 905 ret = rte_kvargs_process(kvlist, mana_init_args[0], 906 mana_arg_parse_callback, conf); 907 if (ret) { 908 DRV_LOG(ERR, "error parsing args"); 909 goto free_kvlist; 910 } 911 912 free_kvlist: 913 rte_kvargs_free(kvlist); 914 return ret; 915 } 916 917 static int 918 get_port_mac(struct ibv_device *device, unsigned int port, 919 struct rte_ether_addr *addr) 920 { 921 FILE *file; 922 int ret = 0; 923 DIR *dir; 924 struct dirent *dent; 925 unsigned int dev_port; 926 927 MANA_MKSTR(path, "%s/device/net", device->ibdev_path); 928 929 dir = opendir(path); 930 if (!dir) 931 return -ENOENT; 932 933 while ((dent = readdir(dir))) { 934 char *name = dent->d_name; 935 char *mac = NULL; 936 937 MANA_MKSTR(port_path, "%s/%s/dev_port", path, name); 938 939 /* Ignore . and .. */ 940 if ((name[0] == '.') && 941 ((name[1] == '\0') || 942 ((name[1] == '.') && (name[2] == '\0')))) 943 continue; 944 945 file = fopen(port_path, "r"); 946 if (!file) 947 continue; 948 949 ret = fscanf(file, "%u", &dev_port); 950 fclose(file); 951 952 if (ret != 1) 953 continue; 954 955 /* Ethernet ports start at 0, IB port start at 1 */ 956 if (dev_port == port - 1) { 957 MANA_MKSTR(address_path, "%s/%s/address", path, name); 958 959 file = fopen(address_path, "r"); 960 if (!file) 961 continue; 962 963 ret = fscanf(file, "%ms", &mac); 964 fclose(file); 965 966 if (ret < 0) 967 break; 968 969 ret = rte_ether_unformat_addr(mac, addr); 970 if (ret) 971 DRV_LOG(ERR, "unrecognized mac addr %s", mac); 972 973 free(mac); 974 break; 975 } 976 } 977 978 closedir(dir); 979 return ret; 980 } 981 982 static int 983 mana_ibv_device_to_pci_addr(const struct ibv_device *device, 984 struct rte_pci_addr *pci_addr) 985 { 986 FILE *file; 987 char *line = NULL; 988 size_t len = 0; 989 990 MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path); 991 992 file = fopen(path, "r"); 993 if (!file) 994 return -errno; 995 996 while (getline(&line, &len, file) != -1) { 997 /* Extract information. */ 998 if (sscanf(line, 999 "PCI_SLOT_NAME=" 1000 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 1001 &pci_addr->domain, 1002 &pci_addr->bus, 1003 &pci_addr->devid, 1004 &pci_addr->function) == 4) { 1005 break; 1006 } 1007 } 1008 1009 free(line); 1010 fclose(file); 1011 return 0; 1012 } 1013 1014 /* 1015 * Interrupt handler from IB layer to notify this device is being removed. 1016 */ 1017 static void 1018 mana_intr_handler(void *arg) 1019 { 1020 struct mana_priv *priv = arg; 1021 struct ibv_context *ctx = priv->ib_ctx; 1022 struct ibv_async_event event; 1023 1024 /* Read and ack all messages from IB device */ 1025 while (true) { 1026 if (ibv_get_async_event(ctx, &event)) 1027 break; 1028 1029 if (event.event_type == IBV_EVENT_DEVICE_FATAL) { 1030 struct rte_eth_dev *dev; 1031 1032 dev = &rte_eth_devices[priv->port_id]; 1033 if (dev->data->dev_conf.intr_conf.rmv) 1034 rte_eth_dev_callback_process(dev, 1035 RTE_ETH_EVENT_INTR_RMV, NULL); 1036 } 1037 1038 ibv_ack_async_event(&event); 1039 } 1040 } 1041 1042 static int 1043 mana_intr_uninstall(struct mana_priv *priv) 1044 { 1045 int ret; 1046 1047 ret = rte_intr_callback_unregister(priv->intr_handle, 1048 mana_intr_handler, priv); 1049 if (ret <= 0) { 1050 DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret); 1051 return ret; 1052 } 1053 1054 rte_intr_instance_free(priv->intr_handle); 1055 1056 return 0; 1057 } 1058 1059 int 1060 mana_fd_set_non_blocking(int fd) 1061 { 1062 int ret = fcntl(fd, F_GETFL); 1063 1064 if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK)) 1065 return 0; 1066 1067 rte_errno = errno; 1068 return -rte_errno; 1069 } 1070 1071 static int 1072 mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv) 1073 { 1074 int ret; 1075 struct ibv_context *ctx = priv->ib_ctx; 1076 1077 priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 1078 if (!priv->intr_handle) { 1079 DRV_LOG(ERR, "Failed to allocate intr_handle"); 1080 rte_errno = ENOMEM; 1081 return -ENOMEM; 1082 } 1083 1084 ret = rte_intr_fd_set(priv->intr_handle, -1); 1085 if (ret) 1086 goto free_intr; 1087 1088 ret = mana_fd_set_non_blocking(ctx->async_fd); 1089 if (ret) { 1090 DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK"); 1091 goto free_intr; 1092 } 1093 1094 ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd); 1095 if (ret) 1096 goto free_intr; 1097 1098 ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT); 1099 if (ret) 1100 goto free_intr; 1101 1102 ret = rte_intr_callback_register(priv->intr_handle, 1103 mana_intr_handler, priv); 1104 if (ret) { 1105 DRV_LOG(ERR, "Failed to register intr callback"); 1106 rte_intr_fd_set(priv->intr_handle, -1); 1107 goto free_intr; 1108 } 1109 1110 eth_dev->intr_handle = priv->intr_handle; 1111 return 0; 1112 1113 free_intr: 1114 rte_intr_instance_free(priv->intr_handle); 1115 priv->intr_handle = NULL; 1116 1117 return ret; 1118 } 1119 1120 static int 1121 mana_proc_priv_init(struct rte_eth_dev *dev) 1122 { 1123 struct mana_process_priv *priv; 1124 1125 priv = rte_zmalloc_socket("mana_proc_priv", 1126 sizeof(struct mana_process_priv), 1127 RTE_CACHE_LINE_SIZE, 1128 dev->device->numa_node); 1129 if (!priv) 1130 return -ENOMEM; 1131 1132 dev->process_private = priv; 1133 return 0; 1134 } 1135 1136 /* 1137 * Map the doorbell page for the secondary process through IB device handle. 1138 */ 1139 static int 1140 mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd) 1141 { 1142 struct mana_process_priv *priv = eth_dev->process_private; 1143 1144 void *addr; 1145 1146 addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0); 1147 if (addr == MAP_FAILED) { 1148 DRV_LOG(ERR, "Failed to map secondary doorbell port %u", 1149 eth_dev->data->port_id); 1150 return -ENOMEM; 1151 } 1152 1153 DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr); 1154 1155 priv->db_page = addr; 1156 1157 return 0; 1158 } 1159 1160 /* Initialize shared data for the driver (all devices) */ 1161 static int 1162 mana_init_shared_data(void) 1163 { 1164 int ret = 0; 1165 const struct rte_memzone *secondary_mz; 1166 1167 rte_spinlock_lock(&mana_shared_data_lock); 1168 1169 /* Skip if shared data is already initialized */ 1170 if (mana_shared_data) 1171 goto exit; 1172 1173 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1174 mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA, 1175 sizeof(*mana_shared_data), 1176 SOCKET_ID_ANY, 0); 1177 if (!mana_shared_mz) { 1178 DRV_LOG(ERR, "Cannot allocate mana shared data"); 1179 ret = -rte_errno; 1180 goto exit; 1181 } 1182 1183 mana_shared_data = mana_shared_mz->addr; 1184 memset(mana_shared_data, 0, sizeof(*mana_shared_data)); 1185 rte_spinlock_init(&mana_shared_data->lock); 1186 } else { 1187 secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA); 1188 if (!secondary_mz) { 1189 DRV_LOG(ERR, "Cannot attach mana shared data"); 1190 ret = -rte_errno; 1191 goto exit; 1192 } 1193 1194 mana_shared_data = secondary_mz->addr; 1195 memset(&mana_local_data, 0, sizeof(mana_local_data)); 1196 } 1197 1198 exit: 1199 rte_spinlock_unlock(&mana_shared_data_lock); 1200 1201 return ret; 1202 } 1203 1204 /* 1205 * Init the data structures for use in primary and secondary processes. 1206 */ 1207 static int 1208 mana_init_once(void) 1209 { 1210 int ret; 1211 1212 ret = mana_init_shared_data(); 1213 if (ret) 1214 return ret; 1215 1216 rte_spinlock_lock(&mana_shared_data->lock); 1217 1218 switch (rte_eal_process_type()) { 1219 case RTE_PROC_PRIMARY: 1220 if (mana_shared_data->init_done) 1221 break; 1222 1223 ret = mana_mp_init_primary(); 1224 if (ret) 1225 break; 1226 DRV_LOG(ERR, "MP INIT PRIMARY"); 1227 1228 mana_shared_data->init_done = 1; 1229 break; 1230 1231 case RTE_PROC_SECONDARY: 1232 1233 if (mana_local_data.init_done) 1234 break; 1235 1236 ret = mana_mp_init_secondary(); 1237 if (ret) 1238 break; 1239 1240 DRV_LOG(ERR, "MP INIT SECONDARY"); 1241 1242 mana_local_data.init_done = 1; 1243 break; 1244 1245 default: 1246 /* Impossible, internal error */ 1247 ret = -EPROTO; 1248 break; 1249 } 1250 1251 rte_spinlock_unlock(&mana_shared_data->lock); 1252 1253 return ret; 1254 } 1255 1256 /* 1257 * Probe an IB port 1258 * Return value: 1259 * positive value: successfully probed port 1260 * 0: port not matching specified MAC address 1261 * negative value: error code 1262 */ 1263 static int 1264 mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr, 1265 uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr) 1266 { 1267 struct mana_priv *priv = NULL; 1268 struct rte_eth_dev *eth_dev = NULL; 1269 struct ibv_parent_domain_init_attr attr = {0}; 1270 char address[64]; 1271 char name[RTE_ETH_NAME_MAX_LEN]; 1272 int ret; 1273 struct ibv_context *ctx = NULL; 1274 1275 rte_ether_format_addr(address, sizeof(address), addr); 1276 DRV_LOG(INFO, "device located port %u address %s", port, address); 1277 1278 priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE, 1279 SOCKET_ID_ANY); 1280 if (!priv) 1281 return -ENOMEM; 1282 1283 snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port); 1284 1285 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1286 int fd; 1287 1288 eth_dev = rte_eth_dev_attach_secondary(name); 1289 if (!eth_dev) { 1290 DRV_LOG(ERR, "Can't attach to dev %s", name); 1291 ret = -ENOMEM; 1292 goto failed; 1293 } 1294 1295 eth_dev->device = &pci_dev->device; 1296 eth_dev->dev_ops = &mana_dev_secondary_ops; 1297 ret = mana_proc_priv_init(eth_dev); 1298 if (ret) 1299 goto failed; 1300 priv->process_priv = eth_dev->process_private; 1301 1302 /* Get the IB FD from the primary process */ 1303 fd = mana_mp_req_verbs_cmd_fd(eth_dev); 1304 if (fd < 0) { 1305 DRV_LOG(ERR, "Failed to get FD %d", fd); 1306 ret = -ENODEV; 1307 goto failed; 1308 } 1309 1310 ret = mana_map_doorbell_secondary(eth_dev, fd); 1311 if (ret) { 1312 DRV_LOG(ERR, "Failed secondary map %d", fd); 1313 goto failed; 1314 } 1315 1316 /* fd is no not used after mapping doorbell */ 1317 close(fd); 1318 1319 eth_dev->tx_pkt_burst = mana_tx_burst; 1320 eth_dev->rx_pkt_burst = mana_rx_burst; 1321 1322 rte_spinlock_lock(&mana_shared_data->lock); 1323 mana_shared_data->secondary_cnt++; 1324 mana_local_data.secondary_cnt++; 1325 rte_spinlock_unlock(&mana_shared_data->lock); 1326 1327 rte_eth_copy_pci_info(eth_dev, pci_dev); 1328 rte_eth_dev_probing_finish(eth_dev); 1329 1330 return 0; 1331 } 1332 1333 ctx = ibv_open_device(ibdev); 1334 if (!ctx) { 1335 DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name); 1336 ret = -ENODEV; 1337 goto failed; 1338 } 1339 1340 eth_dev = rte_eth_dev_allocate(name); 1341 if (!eth_dev) { 1342 ret = -ENOMEM; 1343 goto failed; 1344 } 1345 1346 eth_dev->data->mac_addrs = 1347 rte_calloc("mana_mac", 1, 1348 sizeof(struct rte_ether_addr), 0); 1349 if (!eth_dev->data->mac_addrs) { 1350 ret = -ENOMEM; 1351 goto failed; 1352 } 1353 1354 rte_ether_addr_copy(addr, eth_dev->data->mac_addrs); 1355 1356 priv->ib_pd = ibv_alloc_pd(ctx); 1357 if (!priv->ib_pd) { 1358 DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port); 1359 ret = -ENOMEM; 1360 goto failed; 1361 } 1362 1363 /* Create a parent domain with the port number */ 1364 attr.pd = priv->ib_pd; 1365 attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT; 1366 attr.pd_context = (void *)(uintptr_t)port; 1367 priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr); 1368 if (!priv->ib_parent_pd) { 1369 DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port); 1370 ret = -ENOMEM; 1371 goto failed; 1372 } 1373 1374 priv->ib_ctx = ctx; 1375 priv->port_id = eth_dev->data->port_id; 1376 priv->dev_port = port; 1377 eth_dev->data->dev_private = priv; 1378 priv->dev_data = eth_dev->data; 1379 1380 priv->max_rx_queues = dev_attr->orig_attr.max_qp; 1381 priv->max_tx_queues = dev_attr->orig_attr.max_qp; 1382 1383 priv->max_rx_desc = 1384 RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1385 dev_attr->orig_attr.max_cqe); 1386 priv->max_tx_desc = 1387 RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1388 dev_attr->orig_attr.max_cqe); 1389 1390 priv->max_send_sge = dev_attr->orig_attr.max_sge; 1391 priv->max_recv_sge = dev_attr->orig_attr.max_sge; 1392 1393 priv->max_mr = dev_attr->orig_attr.max_mr; 1394 priv->max_mr_size = dev_attr->orig_attr.max_mr_size; 1395 1396 DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d mr %" PRIu64, 1397 name, priv->max_rx_queues, priv->max_rx_desc, 1398 priv->max_send_sge, priv->max_mr_size); 1399 1400 rte_eth_copy_pci_info(eth_dev, pci_dev); 1401 1402 /* Create async interrupt handler */ 1403 ret = mana_intr_install(eth_dev, priv); 1404 if (ret) { 1405 DRV_LOG(ERR, "Failed to install intr handler"); 1406 goto failed; 1407 } 1408 1409 rte_spinlock_lock(&mana_shared_data->lock); 1410 mana_shared_data->primary_cnt++; 1411 rte_spinlock_unlock(&mana_shared_data->lock); 1412 1413 eth_dev->device = &pci_dev->device; 1414 1415 DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id); 1416 1417 eth_dev->rx_pkt_burst = mana_rx_burst_removed; 1418 eth_dev->tx_pkt_burst = mana_tx_burst_removed; 1419 eth_dev->dev_ops = &mana_dev_ops; 1420 1421 rte_eth_dev_probing_finish(eth_dev); 1422 1423 return 0; 1424 1425 failed: 1426 /* Free the resource for the port failed */ 1427 if (priv) { 1428 if (priv->ib_parent_pd) 1429 ibv_dealloc_pd(priv->ib_parent_pd); 1430 1431 if (priv->ib_pd) 1432 ibv_dealloc_pd(priv->ib_pd); 1433 } 1434 1435 if (eth_dev) 1436 rte_eth_dev_release_port(eth_dev); 1437 1438 rte_free(priv); 1439 1440 if (ctx) 1441 ibv_close_device(ctx); 1442 1443 return ret; 1444 } 1445 1446 /* 1447 * Goes through the IB device list to look for the IB port matching the 1448 * mac_addr. If found, create a rte_eth_dev for it. 1449 * Return value: number of successfully probed devices 1450 */ 1451 static int 1452 mana_pci_probe_mac(struct rte_pci_device *pci_dev, 1453 struct rte_ether_addr *mac_addr) 1454 { 1455 struct ibv_device **ibv_list; 1456 int ibv_idx; 1457 struct ibv_context *ctx; 1458 int num_devices; 1459 int ret; 1460 uint8_t port; 1461 int count = 0; 1462 1463 ibv_list = ibv_get_device_list(&num_devices); 1464 for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) { 1465 struct ibv_device *ibdev = ibv_list[ibv_idx]; 1466 struct rte_pci_addr pci_addr; 1467 struct ibv_device_attr_ex dev_attr; 1468 1469 DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s", 1470 ibdev->name, ibdev->dev_name, ibdev->ibdev_path); 1471 1472 if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr)) 1473 continue; 1474 1475 /* Ignore if this IB device is not this PCI device */ 1476 if (rte_pci_addr_cmp(&pci_dev->addr, &pci_addr) != 0) 1477 continue; 1478 1479 ctx = ibv_open_device(ibdev); 1480 if (!ctx) { 1481 DRV_LOG(ERR, "Failed to open IB device %s", 1482 ibdev->name); 1483 continue; 1484 } 1485 ret = ibv_query_device_ex(ctx, NULL, &dev_attr); 1486 ibv_close_device(ctx); 1487 1488 if (ret) { 1489 DRV_LOG(ERR, "Failed to query IB device %s", 1490 ibdev->name); 1491 continue; 1492 } 1493 1494 for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt; 1495 port++) { 1496 struct rte_ether_addr addr; 1497 ret = get_port_mac(ibdev, port, &addr); 1498 if (ret) 1499 continue; 1500 1501 if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr)) 1502 continue; 1503 1504 ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr); 1505 if (ret) { 1506 DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret); 1507 } else { 1508 count++; 1509 DRV_LOG(INFO, "Successfully probed on IB port %u", port); 1510 } 1511 } 1512 } 1513 1514 ibv_free_device_list(ibv_list); 1515 return count; 1516 } 1517 1518 /* 1519 * Main callback function from PCI bus to probe a device. 1520 */ 1521 static int 1522 mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1523 struct rte_pci_device *pci_dev) 1524 { 1525 struct rte_devargs *args = pci_dev->device.devargs; 1526 struct mana_conf conf = {0}; 1527 unsigned int i; 1528 int ret; 1529 int count = 0; 1530 1531 if (args && args->drv_str) { 1532 ret = mana_parse_args(args, &conf); 1533 if (ret) { 1534 DRV_LOG(ERR, "Failed to parse parameters args = %s", 1535 args->drv_str); 1536 return ret; 1537 } 1538 } 1539 1540 ret = mana_init_once(); 1541 if (ret) { 1542 DRV_LOG(ERR, "Failed to init PMD global data %d", ret); 1543 return ret; 1544 } 1545 1546 /* If there are no driver parameters, probe on all ports */ 1547 if (conf.index) { 1548 for (i = 0; i < conf.index; i++) 1549 count += mana_pci_probe_mac(pci_dev, 1550 &conf.mac_array[i]); 1551 } else { 1552 count = mana_pci_probe_mac(pci_dev, NULL); 1553 } 1554 1555 if (!count) { 1556 rte_memzone_free(mana_shared_mz); 1557 mana_shared_mz = NULL; 1558 ret = -ENODEV; 1559 } 1560 1561 return ret; 1562 } 1563 1564 static int 1565 mana_dev_uninit(struct rte_eth_dev *dev) 1566 { 1567 return mana_dev_close(dev); 1568 } 1569 1570 /* 1571 * Callback from PCI to remove this device. 1572 */ 1573 static int 1574 mana_pci_remove(struct rte_pci_device *pci_dev) 1575 { 1576 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1577 rte_spinlock_lock(&mana_shared_data_lock); 1578 1579 rte_spinlock_lock(&mana_shared_data->lock); 1580 1581 RTE_VERIFY(mana_shared_data->primary_cnt > 0); 1582 mana_shared_data->primary_cnt--; 1583 if (!mana_shared_data->primary_cnt) { 1584 DRV_LOG(DEBUG, "mp uninit primary"); 1585 mana_mp_uninit_primary(); 1586 } 1587 1588 rte_spinlock_unlock(&mana_shared_data->lock); 1589 1590 /* Also free the shared memory if this is the last */ 1591 if (!mana_shared_data->primary_cnt) { 1592 DRV_LOG(DEBUG, "free shared memezone data"); 1593 rte_memzone_free(mana_shared_mz); 1594 mana_shared_mz = NULL; 1595 } 1596 1597 rte_spinlock_unlock(&mana_shared_data_lock); 1598 } else { 1599 rte_spinlock_lock(&mana_shared_data_lock); 1600 1601 rte_spinlock_lock(&mana_shared_data->lock); 1602 RTE_VERIFY(mana_shared_data->secondary_cnt > 0); 1603 mana_shared_data->secondary_cnt--; 1604 rte_spinlock_unlock(&mana_shared_data->lock); 1605 1606 RTE_VERIFY(mana_local_data.secondary_cnt > 0); 1607 mana_local_data.secondary_cnt--; 1608 if (!mana_local_data.secondary_cnt) { 1609 DRV_LOG(DEBUG, "mp uninit secondary"); 1610 mana_mp_uninit_secondary(); 1611 } 1612 1613 rte_spinlock_unlock(&mana_shared_data_lock); 1614 } 1615 1616 return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit); 1617 } 1618 1619 static const struct rte_pci_id mana_pci_id_map[] = { 1620 { 1621 RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 1622 PCI_DEVICE_ID_MICROSOFT_MANA) 1623 }, 1624 { 1625 .vendor_id = 0 1626 }, 1627 }; 1628 1629 static struct rte_pci_driver mana_pci_driver = { 1630 .id_table = mana_pci_id_map, 1631 .probe = mana_pci_probe, 1632 .remove = mana_pci_remove, 1633 .drv_flags = RTE_PCI_DRV_INTR_RMV, 1634 }; 1635 1636 RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver); 1637 RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map); 1638 RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib"); 1639 RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE); 1640 RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE); 1641 RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>"); 1642