1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 5 #include <unistd.h> 6 #include <dirent.h> 7 #include <fcntl.h> 8 #include <sys/mman.h> 9 #include <sys/ioctl.h> 10 #include <net/if.h> 11 12 #include <ethdev_driver.h> 13 #include <ethdev_pci.h> 14 #include <rte_kvargs.h> 15 #include <rte_eal_paging.h> 16 17 #include <infiniband/verbs.h> 18 #include <infiniband/manadv.h> 19 20 #include <assert.h> 21 22 #include "mana.h" 23 24 /* Shared memory between primary/secondary processes, per driver */ 25 /* Data to track primary/secondary usage */ 26 struct mana_shared_data *mana_shared_data; 27 static struct mana_shared_data mana_local_data; 28 29 /* The memory region for the above data */ 30 static const struct rte_memzone *mana_shared_mz; 31 static const char *MZ_MANA_SHARED_DATA = "mana_shared_data"; 32 33 /* Spinlock for mana_shared_data */ 34 static rte_spinlock_t mana_shared_data_lock = RTE_SPINLOCK_INITIALIZER; 35 36 /* Allocate a buffer on the stack and fill it with a printf format string. */ 37 #define MANA_MKSTR(name, ...) \ 38 int mkstr_size_##name = snprintf(NULL, 0, "" __VA_ARGS__); \ 39 char name[mkstr_size_##name + 1]; \ 40 \ 41 memset(name, 0, mkstr_size_##name + 1); \ 42 snprintf(name, sizeof(name), "" __VA_ARGS__) 43 44 int mana_logtype_driver; 45 int mana_logtype_init; 46 47 /* 48 * Callback from rdma-core to allocate a buffer for a queue. 49 */ 50 void * 51 mana_alloc_verbs_buf(size_t size, void *data) 52 { 53 void *ret; 54 size_t alignment = rte_mem_page_size(); 55 int socket = (int)(uintptr_t)data; 56 57 DRV_LOG(DEBUG, "size=%zu socket=%d", size, socket); 58 59 if (alignment == (size_t)-1) { 60 DRV_LOG(ERR, "Failed to get mem page size"); 61 rte_errno = ENOMEM; 62 return NULL; 63 } 64 65 ret = rte_zmalloc_socket("mana_verb_buf", size, alignment, socket); 66 if (!ret && size) 67 rte_errno = ENOMEM; 68 return ret; 69 } 70 71 void 72 mana_free_verbs_buf(void *ptr, void *data __rte_unused) 73 { 74 rte_free(ptr); 75 } 76 77 static int 78 mana_dev_configure(struct rte_eth_dev *dev) 79 { 80 struct mana_priv *priv = dev->data->dev_private; 81 struct rte_eth_conf *dev_conf = &dev->data->dev_conf; 82 83 if (dev_conf->rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) 84 dev_conf->rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH; 85 86 if (dev->data->nb_rx_queues != dev->data->nb_tx_queues) { 87 DRV_LOG(ERR, "Only support equal number of rx/tx queues"); 88 return -EINVAL; 89 } 90 91 if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { 92 DRV_LOG(ERR, "number of TX/RX queues must be power of 2"); 93 return -EINVAL; 94 } 95 96 priv->num_queues = dev->data->nb_rx_queues; 97 98 manadv_set_context_attr(priv->ib_ctx, MANADV_CTX_ATTR_BUF_ALLOCATORS, 99 (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 100 .alloc = &mana_alloc_verbs_buf, 101 .free = &mana_free_verbs_buf, 102 .data = 0, 103 })); 104 105 return 0; 106 } 107 108 static void 109 rx_intr_vec_disable(struct mana_priv *priv) 110 { 111 struct rte_intr_handle *intr_handle = priv->intr_handle; 112 113 rte_intr_free_epoll_fd(intr_handle); 114 rte_intr_vec_list_free(intr_handle); 115 rte_intr_nb_efd_set(intr_handle, 0); 116 } 117 118 static int 119 rx_intr_vec_enable(struct mana_priv *priv) 120 { 121 unsigned int i; 122 unsigned int rxqs_n = priv->dev_data->nb_rx_queues; 123 unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 124 struct rte_intr_handle *intr_handle = priv->intr_handle; 125 int ret; 126 127 rx_intr_vec_disable(priv); 128 129 if (rte_intr_vec_list_alloc(intr_handle, NULL, n)) { 130 DRV_LOG(ERR, "Failed to allocate memory for interrupt vector"); 131 return -ENOMEM; 132 } 133 134 for (i = 0; i < n; i++) { 135 struct mana_rxq *rxq = priv->dev_data->rx_queues[i]; 136 137 ret = rte_intr_vec_list_index_set(intr_handle, i, 138 RTE_INTR_VEC_RXTX_OFFSET + i); 139 if (ret) { 140 DRV_LOG(ERR, "Failed to set intr vec %u", i); 141 return ret; 142 } 143 144 ret = rte_intr_efds_index_set(intr_handle, i, rxq->channel->fd); 145 if (ret) { 146 DRV_LOG(ERR, "Failed to set FD at intr %u", i); 147 return ret; 148 } 149 } 150 151 return rte_intr_nb_efd_set(intr_handle, n); 152 } 153 154 static void 155 rxq_intr_disable(struct mana_priv *priv) 156 { 157 int err = rte_errno; 158 159 rx_intr_vec_disable(priv); 160 rte_errno = err; 161 } 162 163 static int 164 rxq_intr_enable(struct mana_priv *priv) 165 { 166 const struct rte_eth_intr_conf *const intr_conf = 167 &priv->dev_data->dev_conf.intr_conf; 168 169 if (!intr_conf->rxq) 170 return 0; 171 172 return rx_intr_vec_enable(priv); 173 } 174 175 static int 176 mana_dev_start(struct rte_eth_dev *dev) 177 { 178 int ret; 179 struct mana_priv *priv = dev->data->dev_private; 180 181 rte_spinlock_init(&priv->mr_btree_lock); 182 ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N, 183 dev->device->numa_node); 184 if (ret) { 185 DRV_LOG(ERR, "Failed to init device MR btree %d", ret); 186 return ret; 187 } 188 189 ret = mana_start_tx_queues(dev); 190 if (ret) { 191 DRV_LOG(ERR, "failed to start tx queues %d", ret); 192 goto failed_tx; 193 } 194 195 ret = mana_start_rx_queues(dev); 196 if (ret) { 197 DRV_LOG(ERR, "failed to start rx queues %d", ret); 198 goto failed_rx; 199 } 200 201 rte_wmb(); 202 203 dev->tx_pkt_burst = mana_tx_burst; 204 dev->rx_pkt_burst = mana_rx_burst; 205 206 DRV_LOG(INFO, "TX/RX queues have started"); 207 208 /* Enable datapath for secondary processes */ 209 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX); 210 211 ret = rxq_intr_enable(priv); 212 if (ret) { 213 DRV_LOG(ERR, "Failed to enable RX interrupts"); 214 goto failed_intr; 215 } 216 217 return 0; 218 219 failed_intr: 220 mana_stop_rx_queues(dev); 221 222 failed_rx: 223 mana_stop_tx_queues(dev); 224 225 failed_tx: 226 mana_mr_btree_free(&priv->mr_btree); 227 228 return ret; 229 } 230 231 static int 232 mana_dev_stop(struct rte_eth_dev *dev) 233 { 234 int ret; 235 struct mana_priv *priv = dev->data->dev_private; 236 237 rxq_intr_disable(priv); 238 239 dev->tx_pkt_burst = mana_tx_burst_removed; 240 dev->rx_pkt_burst = mana_rx_burst_removed; 241 242 /* Stop datapath on secondary processes */ 243 mana_mp_req_on_rxtx(dev, MANA_MP_REQ_STOP_RXTX); 244 245 rte_wmb(); 246 247 ret = mana_stop_tx_queues(dev); 248 if (ret) { 249 DRV_LOG(ERR, "failed to stop tx queues"); 250 return ret; 251 } 252 253 ret = mana_stop_rx_queues(dev); 254 if (ret) { 255 DRV_LOG(ERR, "failed to stop tx queues"); 256 return ret; 257 } 258 259 return 0; 260 } 261 262 static int mana_intr_uninstall(struct mana_priv *priv); 263 264 static int 265 mana_dev_close(struct rte_eth_dev *dev) 266 { 267 struct mana_priv *priv = dev->data->dev_private; 268 int ret; 269 270 mana_remove_all_mr(priv); 271 272 ret = mana_intr_uninstall(priv); 273 if (ret) 274 return ret; 275 276 ret = ibv_close_device(priv->ib_ctx); 277 if (ret) { 278 ret = errno; 279 return ret; 280 } 281 282 return 0; 283 } 284 285 static int 286 mana_dev_info_get(struct rte_eth_dev *dev, 287 struct rte_eth_dev_info *dev_info) 288 { 289 struct mana_priv *priv = dev->data->dev_private; 290 291 dev_info->min_mtu = RTE_ETHER_MIN_MTU; 292 dev_info->max_mtu = MANA_MAX_MTU; 293 294 /* RX params */ 295 dev_info->min_rx_bufsize = MIN_RX_BUF_SIZE; 296 dev_info->max_rx_pktlen = MANA_MAX_MTU + RTE_ETHER_HDR_LEN; 297 298 dev_info->max_rx_queues = priv->max_rx_queues; 299 dev_info->max_tx_queues = priv->max_tx_queues; 300 301 dev_info->max_mac_addrs = MANA_MAX_MAC_ADDR; 302 dev_info->max_hash_mac_addrs = 0; 303 304 dev_info->max_vfs = 1; 305 306 /* Offload params */ 307 dev_info->rx_offload_capa = MANA_DEV_RX_OFFLOAD_SUPPORT; 308 309 dev_info->tx_offload_capa = MANA_DEV_TX_OFFLOAD_SUPPORT; 310 311 /* RSS */ 312 dev_info->reta_size = INDIRECTION_TABLE_NUM_ELEMENTS; 313 dev_info->hash_key_size = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES; 314 dev_info->flow_type_rss_offloads = MANA_ETH_RSS_SUPPORT; 315 316 /* Thresholds */ 317 dev_info->default_rxconf = (struct rte_eth_rxconf){ 318 .rx_thresh = { 319 .pthresh = 8, 320 .hthresh = 8, 321 .wthresh = 0, 322 }, 323 .rx_free_thresh = 32, 324 /* If no descriptors available, pkts are dropped by default */ 325 .rx_drop_en = 1, 326 }; 327 328 dev_info->default_txconf = (struct rte_eth_txconf){ 329 .tx_thresh = { 330 .pthresh = 32, 331 .hthresh = 0, 332 .wthresh = 0, 333 }, 334 .tx_rs_thresh = 32, 335 .tx_free_thresh = 32, 336 }; 337 338 /* Buffer limits */ 339 dev_info->rx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 340 dev_info->rx_desc_lim.nb_max = priv->max_rx_desc; 341 dev_info->rx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 342 dev_info->rx_desc_lim.nb_seg_max = priv->max_recv_sge; 343 dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge; 344 345 dev_info->tx_desc_lim.nb_min = MIN_BUFFERS_PER_QUEUE; 346 dev_info->tx_desc_lim.nb_max = priv->max_tx_desc; 347 dev_info->tx_desc_lim.nb_align = MIN_BUFFERS_PER_QUEUE; 348 dev_info->tx_desc_lim.nb_seg_max = priv->max_send_sge; 349 dev_info->rx_desc_lim.nb_mtu_seg_max = priv->max_recv_sge; 350 351 /* Speed */ 352 dev_info->speed_capa = RTE_ETH_LINK_SPEED_100G; 353 354 /* RX params */ 355 dev_info->default_rxportconf.burst_size = 1; 356 dev_info->default_rxportconf.ring_size = MAX_RECEIVE_BUFFERS_PER_QUEUE; 357 dev_info->default_rxportconf.nb_queues = 1; 358 359 /* TX params */ 360 dev_info->default_txportconf.burst_size = 1; 361 dev_info->default_txportconf.ring_size = MAX_SEND_BUFFERS_PER_QUEUE; 362 dev_info->default_txportconf.nb_queues = 1; 363 364 return 0; 365 } 366 367 static void 368 mana_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 369 struct rte_eth_txq_info *qinfo) 370 { 371 struct mana_txq *txq = dev->data->tx_queues[queue_id]; 372 373 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 374 qinfo->nb_desc = txq->num_desc; 375 } 376 377 static void 378 mana_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_id, 379 struct rte_eth_rxq_info *qinfo) 380 { 381 struct mana_rxq *rxq = dev->data->rx_queues[queue_id]; 382 383 qinfo->mp = rxq->mp; 384 qinfo->nb_desc = rxq->num_desc; 385 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 386 } 387 388 static const uint32_t * 389 mana_supported_ptypes(struct rte_eth_dev *dev __rte_unused) 390 { 391 static const uint32_t ptypes[] = { 392 RTE_PTYPE_L2_ETHER, 393 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 394 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 395 RTE_PTYPE_L4_FRAG, 396 RTE_PTYPE_L4_TCP, 397 RTE_PTYPE_L4_UDP, 398 RTE_PTYPE_UNKNOWN 399 }; 400 401 return ptypes; 402 } 403 404 static int 405 mana_rss_hash_update(struct rte_eth_dev *dev, 406 struct rte_eth_rss_conf *rss_conf) 407 { 408 struct mana_priv *priv = dev->data->dev_private; 409 410 /* Currently can only update RSS hash when device is stopped */ 411 if (dev->data->dev_started) { 412 DRV_LOG(ERR, "Can't update RSS after device has started"); 413 return -ENODEV; 414 } 415 416 if (rss_conf->rss_hf & ~MANA_ETH_RSS_SUPPORT) { 417 DRV_LOG(ERR, "Port %u invalid RSS HF 0x%" PRIx64, 418 dev->data->port_id, rss_conf->rss_hf); 419 return -EINVAL; 420 } 421 422 if (rss_conf->rss_key && rss_conf->rss_key_len) { 423 if (rss_conf->rss_key_len != TOEPLITZ_HASH_KEY_SIZE_IN_BYTES) { 424 DRV_LOG(ERR, "Port %u key len must be %u long", 425 dev->data->port_id, 426 TOEPLITZ_HASH_KEY_SIZE_IN_BYTES); 427 return -EINVAL; 428 } 429 430 priv->rss_conf.rss_key_len = rss_conf->rss_key_len; 431 priv->rss_conf.rss_key = 432 rte_zmalloc("mana_rss", rss_conf->rss_key_len, 433 RTE_CACHE_LINE_SIZE); 434 if (!priv->rss_conf.rss_key) 435 return -ENOMEM; 436 memcpy(priv->rss_conf.rss_key, rss_conf->rss_key, 437 rss_conf->rss_key_len); 438 } 439 priv->rss_conf.rss_hf = rss_conf->rss_hf; 440 441 return 0; 442 } 443 444 static int 445 mana_rss_hash_conf_get(struct rte_eth_dev *dev, 446 struct rte_eth_rss_conf *rss_conf) 447 { 448 struct mana_priv *priv = dev->data->dev_private; 449 450 if (!rss_conf) 451 return -EINVAL; 452 453 if (rss_conf->rss_key && 454 rss_conf->rss_key_len >= priv->rss_conf.rss_key_len) { 455 memcpy(rss_conf->rss_key, priv->rss_conf.rss_key, 456 priv->rss_conf.rss_key_len); 457 } 458 459 rss_conf->rss_key_len = priv->rss_conf.rss_key_len; 460 rss_conf->rss_hf = priv->rss_conf.rss_hf; 461 462 return 0; 463 } 464 465 static int 466 mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 467 uint16_t nb_desc, unsigned int socket_id, 468 const struct rte_eth_txconf *tx_conf __rte_unused) 469 470 { 471 struct mana_priv *priv = dev->data->dev_private; 472 struct mana_txq *txq; 473 int ret; 474 475 txq = rte_zmalloc_socket("mana_txq", sizeof(*txq), 0, socket_id); 476 if (!txq) { 477 DRV_LOG(ERR, "failed to allocate txq"); 478 return -ENOMEM; 479 } 480 481 txq->socket = socket_id; 482 483 txq->desc_ring = rte_malloc_socket("mana_tx_desc_ring", 484 sizeof(struct mana_txq_desc) * 485 nb_desc, 486 RTE_CACHE_LINE_SIZE, socket_id); 487 if (!txq->desc_ring) { 488 DRV_LOG(ERR, "failed to allocate txq desc_ring"); 489 ret = -ENOMEM; 490 goto fail; 491 } 492 493 txq->gdma_comp_buf = rte_malloc_socket("mana_txq_comp", 494 sizeof(*txq->gdma_comp_buf) * nb_desc, 495 RTE_CACHE_LINE_SIZE, socket_id); 496 if (!txq->gdma_comp_buf) { 497 DRV_LOG(ERR, "failed to allocate txq comp"); 498 ret = -ENOMEM; 499 goto fail; 500 } 501 502 ret = mana_mr_btree_init(&txq->mr_btree, 503 MANA_MR_BTREE_PER_QUEUE_N, socket_id); 504 if (ret) { 505 DRV_LOG(ERR, "Failed to init TXQ MR btree"); 506 goto fail; 507 } 508 509 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u txq->desc_ring %p", 510 queue_idx, nb_desc, socket_id, txq->desc_ring); 511 512 txq->desc_ring_head = 0; 513 txq->desc_ring_tail = 0; 514 txq->priv = priv; 515 txq->num_desc = nb_desc; 516 dev->data->tx_queues[queue_idx] = txq; 517 518 return 0; 519 520 fail: 521 rte_free(txq->gdma_comp_buf); 522 rte_free(txq->desc_ring); 523 rte_free(txq); 524 return ret; 525 } 526 527 static void 528 mana_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 529 { 530 struct mana_txq *txq = dev->data->tx_queues[qid]; 531 532 mana_mr_btree_free(&txq->mr_btree); 533 534 rte_free(txq->gdma_comp_buf); 535 rte_free(txq->desc_ring); 536 rte_free(txq); 537 } 538 539 static int 540 mana_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, 541 uint16_t nb_desc, unsigned int socket_id, 542 const struct rte_eth_rxconf *rx_conf __rte_unused, 543 struct rte_mempool *mp) 544 { 545 struct mana_priv *priv = dev->data->dev_private; 546 struct mana_rxq *rxq; 547 int ret; 548 549 rxq = rte_zmalloc_socket("mana_rxq", sizeof(*rxq), 0, socket_id); 550 if (!rxq) { 551 DRV_LOG(ERR, "failed to allocate rxq"); 552 return -ENOMEM; 553 } 554 555 DRV_LOG(DEBUG, "idx %u nb_desc %u socket %u", 556 queue_idx, nb_desc, socket_id); 557 558 rxq->socket = socket_id; 559 560 rxq->desc_ring = rte_zmalloc_socket("mana_rx_mbuf_ring", 561 sizeof(struct mana_rxq_desc) * 562 nb_desc, 563 RTE_CACHE_LINE_SIZE, socket_id); 564 565 if (!rxq->desc_ring) { 566 DRV_LOG(ERR, "failed to allocate rxq desc_ring"); 567 ret = -ENOMEM; 568 goto fail; 569 } 570 571 rxq->desc_ring_head = 0; 572 rxq->desc_ring_tail = 0; 573 574 rxq->gdma_comp_buf = rte_malloc_socket("mana_rxq_comp", 575 sizeof(*rxq->gdma_comp_buf) * nb_desc, 576 RTE_CACHE_LINE_SIZE, socket_id); 577 if (!rxq->gdma_comp_buf) { 578 DRV_LOG(ERR, "failed to allocate rxq comp"); 579 ret = -ENOMEM; 580 goto fail; 581 } 582 583 ret = mana_mr_btree_init(&rxq->mr_btree, 584 MANA_MR_BTREE_PER_QUEUE_N, socket_id); 585 if (ret) { 586 DRV_LOG(ERR, "Failed to init RXQ MR btree"); 587 goto fail; 588 } 589 590 rxq->priv = priv; 591 rxq->num_desc = nb_desc; 592 rxq->mp = mp; 593 dev->data->rx_queues[queue_idx] = rxq; 594 595 return 0; 596 597 fail: 598 rte_free(rxq->gdma_comp_buf); 599 rte_free(rxq->desc_ring); 600 rte_free(rxq); 601 return ret; 602 } 603 604 static void 605 mana_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid) 606 { 607 struct mana_rxq *rxq = dev->data->rx_queues[qid]; 608 609 mana_mr_btree_free(&rxq->mr_btree); 610 611 rte_free(rxq->gdma_comp_buf); 612 rte_free(rxq->desc_ring); 613 rte_free(rxq); 614 } 615 616 static int 617 mana_dev_link_update(struct rte_eth_dev *dev, 618 int wait_to_complete __rte_unused) 619 { 620 struct rte_eth_link link; 621 622 /* MANA has no concept of carrier state, always reporting UP */ 623 link = (struct rte_eth_link) { 624 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX, 625 .link_autoneg = RTE_ETH_LINK_SPEED_FIXED, 626 .link_speed = RTE_ETH_SPEED_NUM_100G, 627 .link_status = RTE_ETH_LINK_UP, 628 }; 629 630 return rte_eth_linkstatus_set(dev, &link); 631 } 632 633 static int 634 mana_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) 635 { 636 unsigned int i; 637 638 for (i = 0; i < dev->data->nb_tx_queues; i++) { 639 struct mana_txq *txq = dev->data->tx_queues[i]; 640 641 if (!txq) 642 continue; 643 644 stats->opackets += txq->stats.packets; 645 stats->obytes += txq->stats.bytes; 646 stats->oerrors += txq->stats.errors; 647 648 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 649 stats->q_opackets[i] = txq->stats.packets; 650 stats->q_obytes[i] = txq->stats.bytes; 651 } 652 } 653 654 stats->rx_nombuf = 0; 655 for (i = 0; i < dev->data->nb_rx_queues; i++) { 656 struct mana_rxq *rxq = dev->data->rx_queues[i]; 657 658 if (!rxq) 659 continue; 660 661 stats->ipackets += rxq->stats.packets; 662 stats->ibytes += rxq->stats.bytes; 663 stats->ierrors += rxq->stats.errors; 664 665 /* There is no good way to get stats->imissed, not setting it */ 666 667 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { 668 stats->q_ipackets[i] = rxq->stats.packets; 669 stats->q_ibytes[i] = rxq->stats.bytes; 670 } 671 672 stats->rx_nombuf += rxq->stats.nombuf; 673 } 674 675 return 0; 676 } 677 678 static int 679 mana_dev_stats_reset(struct rte_eth_dev *dev __rte_unused) 680 { 681 unsigned int i; 682 683 PMD_INIT_FUNC_TRACE(); 684 685 for (i = 0; i < dev->data->nb_tx_queues; i++) { 686 struct mana_txq *txq = dev->data->tx_queues[i]; 687 688 if (!txq) 689 continue; 690 691 memset(&txq->stats, 0, sizeof(txq->stats)); 692 } 693 694 for (i = 0; i < dev->data->nb_rx_queues; i++) { 695 struct mana_rxq *rxq = dev->data->rx_queues[i]; 696 697 if (!rxq) 698 continue; 699 700 memset(&rxq->stats, 0, sizeof(rxq->stats)); 701 } 702 703 return 0; 704 } 705 706 static int 707 mana_get_ifname(const struct mana_priv *priv, char (*ifname)[IF_NAMESIZE]) 708 { 709 int ret; 710 DIR *dir; 711 struct dirent *dent; 712 713 MANA_MKSTR(dirpath, "%s/device/net", priv->ib_ctx->device->ibdev_path); 714 715 dir = opendir(dirpath); 716 if (dir == NULL) 717 return -ENODEV; 718 719 while ((dent = readdir(dir)) != NULL) { 720 char *name = dent->d_name; 721 FILE *file; 722 struct rte_ether_addr addr; 723 char *mac = NULL; 724 725 if ((name[0] == '.') && 726 ((name[1] == '\0') || 727 ((name[1] == '.') && (name[2] == '\0')))) 728 continue; 729 730 MANA_MKSTR(path, "%s/%s/address", dirpath, name); 731 732 file = fopen(path, "r"); 733 if (!file) { 734 ret = -ENODEV; 735 break; 736 } 737 738 ret = fscanf(file, "%ms", &mac); 739 fclose(file); 740 741 if (ret <= 0) { 742 ret = -EINVAL; 743 break; 744 } 745 746 ret = rte_ether_unformat_addr(mac, &addr); 747 free(mac); 748 if (ret) 749 break; 750 751 if (rte_is_same_ether_addr(&addr, priv->dev_data->mac_addrs)) { 752 strlcpy(*ifname, name, sizeof(*ifname)); 753 ret = 0; 754 break; 755 } 756 } 757 758 closedir(dir); 759 return ret; 760 } 761 762 static int 763 mana_ifreq(const struct mana_priv *priv, int req, struct ifreq *ifr) 764 { 765 int sock, ret; 766 767 sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 768 if (sock == -1) 769 return -errno; 770 771 ret = mana_get_ifname(priv, &ifr->ifr_name); 772 if (ret) { 773 close(sock); 774 return ret; 775 } 776 777 if (ioctl(sock, req, ifr) == -1) 778 ret = -errno; 779 780 close(sock); 781 782 return ret; 783 } 784 785 static int 786 mana_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) 787 { 788 struct mana_priv *priv = dev->data->dev_private; 789 struct ifreq request = { .ifr_mtu = mtu, }; 790 791 return mana_ifreq(priv, SIOCSIFMTU, &request); 792 } 793 794 static const struct eth_dev_ops mana_dev_ops = { 795 .dev_configure = mana_dev_configure, 796 .dev_start = mana_dev_start, 797 .dev_stop = mana_dev_stop, 798 .dev_close = mana_dev_close, 799 .dev_infos_get = mana_dev_info_get, 800 .txq_info_get = mana_dev_tx_queue_info, 801 .rxq_info_get = mana_dev_rx_queue_info, 802 .dev_supported_ptypes_get = mana_supported_ptypes, 803 .rss_hash_update = mana_rss_hash_update, 804 .rss_hash_conf_get = mana_rss_hash_conf_get, 805 .tx_queue_setup = mana_dev_tx_queue_setup, 806 .tx_queue_release = mana_dev_tx_queue_release, 807 .rx_queue_setup = mana_dev_rx_queue_setup, 808 .rx_queue_release = mana_dev_rx_queue_release, 809 .rx_queue_intr_enable = mana_rx_intr_enable, 810 .rx_queue_intr_disable = mana_rx_intr_disable, 811 .link_update = mana_dev_link_update, 812 .stats_get = mana_dev_stats_get, 813 .stats_reset = mana_dev_stats_reset, 814 .mtu_set = mana_mtu_set, 815 }; 816 817 static const struct eth_dev_ops mana_dev_secondary_ops = { 818 .stats_get = mana_dev_stats_get, 819 .stats_reset = mana_dev_stats_reset, 820 .dev_infos_get = mana_dev_info_get, 821 }; 822 823 uint16_t 824 mana_rx_burst_removed(void *dpdk_rxq __rte_unused, 825 struct rte_mbuf **pkts __rte_unused, 826 uint16_t pkts_n __rte_unused) 827 { 828 rte_mb(); 829 return 0; 830 } 831 832 uint16_t 833 mana_tx_burst_removed(void *dpdk_rxq __rte_unused, 834 struct rte_mbuf **pkts __rte_unused, 835 uint16_t pkts_n __rte_unused) 836 { 837 rte_mb(); 838 return 0; 839 } 840 841 #define ETH_MANA_MAC_ARG "mac" 842 static const char * const mana_init_args[] = { 843 ETH_MANA_MAC_ARG, 844 NULL, 845 }; 846 847 /* Support of parsing up to 8 mac address from EAL command line */ 848 #define MAX_NUM_ADDRESS 8 849 struct mana_conf { 850 struct rte_ether_addr mac_array[MAX_NUM_ADDRESS]; 851 unsigned int index; 852 }; 853 854 static int 855 mana_arg_parse_callback(const char *key, const char *val, void *private) 856 { 857 struct mana_conf *conf = (struct mana_conf *)private; 858 int ret; 859 860 DRV_LOG(INFO, "key=%s value=%s index=%d", key, val, conf->index); 861 862 if (conf->index >= MAX_NUM_ADDRESS) { 863 DRV_LOG(ERR, "Exceeding max MAC address"); 864 return 1; 865 } 866 867 ret = rte_ether_unformat_addr(val, &conf->mac_array[conf->index]); 868 if (ret) { 869 DRV_LOG(ERR, "Invalid MAC address %s", val); 870 return ret; 871 } 872 873 conf->index++; 874 875 return 0; 876 } 877 878 static int 879 mana_parse_args(struct rte_devargs *devargs, struct mana_conf *conf) 880 { 881 struct rte_kvargs *kvlist; 882 unsigned int arg_count; 883 int ret = 0; 884 885 kvlist = rte_kvargs_parse(devargs->drv_str, mana_init_args); 886 if (!kvlist) { 887 DRV_LOG(ERR, "failed to parse kvargs args=%s", devargs->drv_str); 888 return -EINVAL; 889 } 890 891 arg_count = rte_kvargs_count(kvlist, mana_init_args[0]); 892 if (arg_count > MAX_NUM_ADDRESS) { 893 ret = -EINVAL; 894 goto free_kvlist; 895 } 896 ret = rte_kvargs_process(kvlist, mana_init_args[0], 897 mana_arg_parse_callback, conf); 898 if (ret) { 899 DRV_LOG(ERR, "error parsing args"); 900 goto free_kvlist; 901 } 902 903 free_kvlist: 904 rte_kvargs_free(kvlist); 905 return ret; 906 } 907 908 static int 909 get_port_mac(struct ibv_device *device, unsigned int port, 910 struct rte_ether_addr *addr) 911 { 912 FILE *file; 913 int ret = 0; 914 DIR *dir; 915 struct dirent *dent; 916 unsigned int dev_port; 917 918 MANA_MKSTR(path, "%s/device/net", device->ibdev_path); 919 920 dir = opendir(path); 921 if (!dir) 922 return -ENOENT; 923 924 while ((dent = readdir(dir))) { 925 char *name = dent->d_name; 926 char *mac = NULL; 927 928 MANA_MKSTR(port_path, "%s/%s/dev_port", path, name); 929 930 /* Ignore . and .. */ 931 if ((name[0] == '.') && 932 ((name[1] == '\0') || 933 ((name[1] == '.') && (name[2] == '\0')))) 934 continue; 935 936 file = fopen(port_path, "r"); 937 if (!file) 938 continue; 939 940 ret = fscanf(file, "%u", &dev_port); 941 fclose(file); 942 943 if (ret != 1) 944 continue; 945 946 /* Ethernet ports start at 0, IB port start at 1 */ 947 if (dev_port == port - 1) { 948 MANA_MKSTR(address_path, "%s/%s/address", path, name); 949 950 file = fopen(address_path, "r"); 951 if (!file) 952 continue; 953 954 ret = fscanf(file, "%ms", &mac); 955 fclose(file); 956 957 if (ret < 0) 958 break; 959 960 ret = rte_ether_unformat_addr(mac, addr); 961 if (ret) 962 DRV_LOG(ERR, "unrecognized mac addr %s", mac); 963 964 free(mac); 965 break; 966 } 967 } 968 969 closedir(dir); 970 return ret; 971 } 972 973 static int 974 mana_ibv_device_to_pci_addr(const struct ibv_device *device, 975 struct rte_pci_addr *pci_addr) 976 { 977 FILE *file; 978 char *line = NULL; 979 size_t len = 0; 980 981 MANA_MKSTR(path, "%s/device/uevent", device->ibdev_path); 982 983 file = fopen(path, "r"); 984 if (!file) 985 return -errno; 986 987 while (getline(&line, &len, file) != -1) { 988 /* Extract information. */ 989 if (sscanf(line, 990 "PCI_SLOT_NAME=" 991 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 992 &pci_addr->domain, 993 &pci_addr->bus, 994 &pci_addr->devid, 995 &pci_addr->function) == 4) { 996 break; 997 } 998 } 999 1000 free(line); 1001 fclose(file); 1002 return 0; 1003 } 1004 1005 /* 1006 * Interrupt handler from IB layer to notify this device is being removed. 1007 */ 1008 static void 1009 mana_intr_handler(void *arg) 1010 { 1011 struct mana_priv *priv = arg; 1012 struct ibv_context *ctx = priv->ib_ctx; 1013 struct ibv_async_event event; 1014 1015 /* Read and ack all messages from IB device */ 1016 while (true) { 1017 if (ibv_get_async_event(ctx, &event)) 1018 break; 1019 1020 if (event.event_type == IBV_EVENT_DEVICE_FATAL) { 1021 struct rte_eth_dev *dev; 1022 1023 dev = &rte_eth_devices[priv->port_id]; 1024 if (dev->data->dev_conf.intr_conf.rmv) 1025 rte_eth_dev_callback_process(dev, 1026 RTE_ETH_EVENT_INTR_RMV, NULL); 1027 } 1028 1029 ibv_ack_async_event(&event); 1030 } 1031 } 1032 1033 static int 1034 mana_intr_uninstall(struct mana_priv *priv) 1035 { 1036 int ret; 1037 1038 ret = rte_intr_callback_unregister(priv->intr_handle, 1039 mana_intr_handler, priv); 1040 if (ret <= 0) { 1041 DRV_LOG(ERR, "Failed to unregister intr callback ret %d", ret); 1042 return ret; 1043 } 1044 1045 rte_intr_instance_free(priv->intr_handle); 1046 1047 return 0; 1048 } 1049 1050 int 1051 mana_fd_set_non_blocking(int fd) 1052 { 1053 int ret = fcntl(fd, F_GETFL); 1054 1055 if (ret != -1 && !fcntl(fd, F_SETFL, ret | O_NONBLOCK)) 1056 return 0; 1057 1058 rte_errno = errno; 1059 return -rte_errno; 1060 } 1061 1062 static int 1063 mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv) 1064 { 1065 int ret; 1066 struct ibv_context *ctx = priv->ib_ctx; 1067 1068 priv->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED); 1069 if (!priv->intr_handle) { 1070 DRV_LOG(ERR, "Failed to allocate intr_handle"); 1071 rte_errno = ENOMEM; 1072 return -ENOMEM; 1073 } 1074 1075 ret = rte_intr_fd_set(priv->intr_handle, -1); 1076 if (ret) 1077 goto free_intr; 1078 1079 ret = mana_fd_set_non_blocking(ctx->async_fd); 1080 if (ret) { 1081 DRV_LOG(ERR, "Failed to change async_fd to NONBLOCK"); 1082 goto free_intr; 1083 } 1084 1085 ret = rte_intr_fd_set(priv->intr_handle, ctx->async_fd); 1086 if (ret) 1087 goto free_intr; 1088 1089 ret = rte_intr_type_set(priv->intr_handle, RTE_INTR_HANDLE_EXT); 1090 if (ret) 1091 goto free_intr; 1092 1093 ret = rte_intr_callback_register(priv->intr_handle, 1094 mana_intr_handler, priv); 1095 if (ret) { 1096 DRV_LOG(ERR, "Failed to register intr callback"); 1097 rte_intr_fd_set(priv->intr_handle, -1); 1098 goto free_intr; 1099 } 1100 1101 eth_dev->intr_handle = priv->intr_handle; 1102 return 0; 1103 1104 free_intr: 1105 rte_intr_instance_free(priv->intr_handle); 1106 priv->intr_handle = NULL; 1107 1108 return ret; 1109 } 1110 1111 static int 1112 mana_proc_priv_init(struct rte_eth_dev *dev) 1113 { 1114 struct mana_process_priv *priv; 1115 1116 priv = rte_zmalloc_socket("mana_proc_priv", 1117 sizeof(struct mana_process_priv), 1118 RTE_CACHE_LINE_SIZE, 1119 dev->device->numa_node); 1120 if (!priv) 1121 return -ENOMEM; 1122 1123 dev->process_private = priv; 1124 return 0; 1125 } 1126 1127 /* 1128 * Map the doorbell page for the secondary process through IB device handle. 1129 */ 1130 static int 1131 mana_map_doorbell_secondary(struct rte_eth_dev *eth_dev, int fd) 1132 { 1133 struct mana_process_priv *priv = eth_dev->process_private; 1134 1135 void *addr; 1136 1137 addr = mmap(NULL, rte_mem_page_size(), PROT_WRITE, MAP_SHARED, fd, 0); 1138 if (addr == MAP_FAILED) { 1139 DRV_LOG(ERR, "Failed to map secondary doorbell port %u", 1140 eth_dev->data->port_id); 1141 return -ENOMEM; 1142 } 1143 1144 DRV_LOG(INFO, "Secondary doorbell mapped to %p", addr); 1145 1146 priv->db_page = addr; 1147 1148 return 0; 1149 } 1150 1151 /* Initialize shared data for the driver (all devices) */ 1152 static int 1153 mana_init_shared_data(void) 1154 { 1155 int ret = 0; 1156 const struct rte_memzone *secondary_mz; 1157 1158 rte_spinlock_lock(&mana_shared_data_lock); 1159 1160 /* Skip if shared data is already initialized */ 1161 if (mana_shared_data) 1162 goto exit; 1163 1164 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1165 mana_shared_mz = rte_memzone_reserve(MZ_MANA_SHARED_DATA, 1166 sizeof(*mana_shared_data), 1167 SOCKET_ID_ANY, 0); 1168 if (!mana_shared_mz) { 1169 DRV_LOG(ERR, "Cannot allocate mana shared data"); 1170 ret = -rte_errno; 1171 goto exit; 1172 } 1173 1174 mana_shared_data = mana_shared_mz->addr; 1175 memset(mana_shared_data, 0, sizeof(*mana_shared_data)); 1176 rte_spinlock_init(&mana_shared_data->lock); 1177 } else { 1178 secondary_mz = rte_memzone_lookup(MZ_MANA_SHARED_DATA); 1179 if (!secondary_mz) { 1180 DRV_LOG(ERR, "Cannot attach mana shared data"); 1181 ret = -rte_errno; 1182 goto exit; 1183 } 1184 1185 mana_shared_data = secondary_mz->addr; 1186 memset(&mana_local_data, 0, sizeof(mana_local_data)); 1187 } 1188 1189 exit: 1190 rte_spinlock_unlock(&mana_shared_data_lock); 1191 1192 return ret; 1193 } 1194 1195 /* 1196 * Init the data structures for use in primary and secondary processes. 1197 */ 1198 static int 1199 mana_init_once(void) 1200 { 1201 int ret; 1202 1203 ret = mana_init_shared_data(); 1204 if (ret) 1205 return ret; 1206 1207 rte_spinlock_lock(&mana_shared_data->lock); 1208 1209 switch (rte_eal_process_type()) { 1210 case RTE_PROC_PRIMARY: 1211 if (mana_shared_data->init_done) 1212 break; 1213 1214 ret = mana_mp_init_primary(); 1215 if (ret) 1216 break; 1217 DRV_LOG(ERR, "MP INIT PRIMARY"); 1218 1219 mana_shared_data->init_done = 1; 1220 break; 1221 1222 case RTE_PROC_SECONDARY: 1223 1224 if (mana_local_data.init_done) 1225 break; 1226 1227 ret = mana_mp_init_secondary(); 1228 if (ret) 1229 break; 1230 1231 DRV_LOG(ERR, "MP INIT SECONDARY"); 1232 1233 mana_local_data.init_done = 1; 1234 break; 1235 1236 default: 1237 /* Impossible, internal error */ 1238 ret = -EPROTO; 1239 break; 1240 } 1241 1242 rte_spinlock_unlock(&mana_shared_data->lock); 1243 1244 return ret; 1245 } 1246 1247 /* 1248 * Probe an IB port 1249 * Return value: 1250 * positive value: successfully probed port 1251 * 0: port not matching specified MAC address 1252 * negative value: error code 1253 */ 1254 static int 1255 mana_probe_port(struct ibv_device *ibdev, struct ibv_device_attr_ex *dev_attr, 1256 uint8_t port, struct rte_pci_device *pci_dev, struct rte_ether_addr *addr) 1257 { 1258 struct mana_priv *priv = NULL; 1259 struct rte_eth_dev *eth_dev = NULL; 1260 struct ibv_parent_domain_init_attr attr = {0}; 1261 char address[64]; 1262 char name[RTE_ETH_NAME_MAX_LEN]; 1263 int ret; 1264 struct ibv_context *ctx = NULL; 1265 1266 rte_ether_format_addr(address, sizeof(address), addr); 1267 DRV_LOG(INFO, "device located port %u address %s", port, address); 1268 1269 priv = rte_zmalloc_socket(NULL, sizeof(*priv), RTE_CACHE_LINE_SIZE, 1270 SOCKET_ID_ANY); 1271 if (!priv) 1272 return -ENOMEM; 1273 1274 snprintf(name, sizeof(name), "%s_port%d", pci_dev->device.name, port); 1275 1276 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 1277 int fd; 1278 1279 eth_dev = rte_eth_dev_attach_secondary(name); 1280 if (!eth_dev) { 1281 DRV_LOG(ERR, "Can't attach to dev %s", name); 1282 ret = -ENOMEM; 1283 goto failed; 1284 } 1285 1286 eth_dev->device = &pci_dev->device; 1287 eth_dev->dev_ops = &mana_dev_secondary_ops; 1288 ret = mana_proc_priv_init(eth_dev); 1289 if (ret) 1290 goto failed; 1291 priv->process_priv = eth_dev->process_private; 1292 1293 /* Get the IB FD from the primary process */ 1294 fd = mana_mp_req_verbs_cmd_fd(eth_dev); 1295 if (fd < 0) { 1296 DRV_LOG(ERR, "Failed to get FD %d", fd); 1297 ret = -ENODEV; 1298 goto failed; 1299 } 1300 1301 ret = mana_map_doorbell_secondary(eth_dev, fd); 1302 if (ret) { 1303 DRV_LOG(ERR, "Failed secondary map %d", fd); 1304 goto failed; 1305 } 1306 1307 /* fd is no not used after mapping doorbell */ 1308 close(fd); 1309 1310 eth_dev->tx_pkt_burst = mana_tx_burst_removed; 1311 eth_dev->rx_pkt_burst = mana_rx_burst_removed; 1312 1313 rte_spinlock_lock(&mana_shared_data->lock); 1314 mana_shared_data->secondary_cnt++; 1315 mana_local_data.secondary_cnt++; 1316 rte_spinlock_unlock(&mana_shared_data->lock); 1317 1318 rte_eth_copy_pci_info(eth_dev, pci_dev); 1319 rte_eth_dev_probing_finish(eth_dev); 1320 1321 return 0; 1322 } 1323 1324 ctx = ibv_open_device(ibdev); 1325 if (!ctx) { 1326 DRV_LOG(ERR, "Failed to open IB device %s", ibdev->name); 1327 ret = -ENODEV; 1328 goto failed; 1329 } 1330 1331 eth_dev = rte_eth_dev_allocate(name); 1332 if (!eth_dev) { 1333 ret = -ENOMEM; 1334 goto failed; 1335 } 1336 1337 eth_dev->data->mac_addrs = 1338 rte_calloc("mana_mac", 1, 1339 sizeof(struct rte_ether_addr), 0); 1340 if (!eth_dev->data->mac_addrs) { 1341 ret = -ENOMEM; 1342 goto failed; 1343 } 1344 1345 rte_ether_addr_copy(addr, eth_dev->data->mac_addrs); 1346 1347 priv->ib_pd = ibv_alloc_pd(ctx); 1348 if (!priv->ib_pd) { 1349 DRV_LOG(ERR, "ibv_alloc_pd failed port %d", port); 1350 ret = -ENOMEM; 1351 goto failed; 1352 } 1353 1354 /* Create a parent domain with the port number */ 1355 attr.pd = priv->ib_pd; 1356 attr.comp_mask = IBV_PARENT_DOMAIN_INIT_ATTR_PD_CONTEXT; 1357 attr.pd_context = (void *)(uintptr_t)port; 1358 priv->ib_parent_pd = ibv_alloc_parent_domain(ctx, &attr); 1359 if (!priv->ib_parent_pd) { 1360 DRV_LOG(ERR, "ibv_alloc_parent_domain failed port %d", port); 1361 ret = -ENOMEM; 1362 goto failed; 1363 } 1364 1365 priv->ib_ctx = ctx; 1366 priv->port_id = eth_dev->data->port_id; 1367 priv->dev_port = port; 1368 eth_dev->data->dev_private = priv; 1369 priv->dev_data = eth_dev->data; 1370 1371 priv->max_rx_queues = dev_attr->orig_attr.max_qp; 1372 priv->max_tx_queues = dev_attr->orig_attr.max_qp; 1373 1374 priv->max_rx_desc = 1375 RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1376 dev_attr->orig_attr.max_cqe); 1377 priv->max_tx_desc = 1378 RTE_MIN(dev_attr->orig_attr.max_qp_wr, 1379 dev_attr->orig_attr.max_cqe); 1380 1381 priv->max_send_sge = dev_attr->orig_attr.max_sge; 1382 priv->max_recv_sge = dev_attr->orig_attr.max_sge; 1383 1384 priv->max_mr = dev_attr->orig_attr.max_mr; 1385 priv->max_mr_size = dev_attr->orig_attr.max_mr_size; 1386 1387 DRV_LOG(INFO, "dev %s max queues %d desc %d sge %d", 1388 name, priv->max_rx_queues, priv->max_rx_desc, 1389 priv->max_send_sge); 1390 1391 rte_eth_copy_pci_info(eth_dev, pci_dev); 1392 1393 /* Create async interrupt handler */ 1394 ret = mana_intr_install(eth_dev, priv); 1395 if (ret) { 1396 DRV_LOG(ERR, "Failed to install intr handler"); 1397 goto failed; 1398 } 1399 1400 rte_spinlock_lock(&mana_shared_data->lock); 1401 mana_shared_data->primary_cnt++; 1402 rte_spinlock_unlock(&mana_shared_data->lock); 1403 1404 eth_dev->device = &pci_dev->device; 1405 1406 DRV_LOG(INFO, "device %s at port %u", name, eth_dev->data->port_id); 1407 1408 eth_dev->rx_pkt_burst = mana_rx_burst_removed; 1409 eth_dev->tx_pkt_burst = mana_tx_burst_removed; 1410 eth_dev->dev_ops = &mana_dev_ops; 1411 1412 rte_eth_dev_probing_finish(eth_dev); 1413 1414 return 0; 1415 1416 failed: 1417 /* Free the resource for the port failed */ 1418 if (priv) { 1419 if (priv->ib_parent_pd) 1420 ibv_dealloc_pd(priv->ib_parent_pd); 1421 1422 if (priv->ib_pd) 1423 ibv_dealloc_pd(priv->ib_pd); 1424 } 1425 1426 if (eth_dev) 1427 rte_eth_dev_release_port(eth_dev); 1428 1429 rte_free(priv); 1430 1431 if (ctx) 1432 ibv_close_device(ctx); 1433 1434 return ret; 1435 } 1436 1437 /* 1438 * Goes through the IB device list to look for the IB port matching the 1439 * mac_addr. If found, create a rte_eth_dev for it. 1440 * Return value: number of successfully probed devices 1441 */ 1442 static int 1443 mana_pci_probe_mac(struct rte_pci_device *pci_dev, 1444 struct rte_ether_addr *mac_addr) 1445 { 1446 struct ibv_device **ibv_list; 1447 int ibv_idx; 1448 struct ibv_context *ctx; 1449 int num_devices; 1450 int ret; 1451 uint8_t port; 1452 int count = 0; 1453 1454 ibv_list = ibv_get_device_list(&num_devices); 1455 for (ibv_idx = 0; ibv_idx < num_devices; ibv_idx++) { 1456 struct ibv_device *ibdev = ibv_list[ibv_idx]; 1457 struct rte_pci_addr pci_addr; 1458 struct ibv_device_attr_ex dev_attr; 1459 1460 DRV_LOG(INFO, "Probe device name %s dev_name %s ibdev_path %s", 1461 ibdev->name, ibdev->dev_name, ibdev->ibdev_path); 1462 1463 if (mana_ibv_device_to_pci_addr(ibdev, &pci_addr)) 1464 continue; 1465 1466 /* Ignore if this IB device is not this PCI device */ 1467 if (pci_dev->addr.domain != pci_addr.domain || 1468 pci_dev->addr.bus != pci_addr.bus || 1469 pci_dev->addr.devid != pci_addr.devid || 1470 pci_dev->addr.function != pci_addr.function) 1471 continue; 1472 1473 ctx = ibv_open_device(ibdev); 1474 if (!ctx) { 1475 DRV_LOG(ERR, "Failed to open IB device %s", 1476 ibdev->name); 1477 continue; 1478 } 1479 ret = ibv_query_device_ex(ctx, NULL, &dev_attr); 1480 ibv_close_device(ctx); 1481 1482 if (ret) { 1483 DRV_LOG(ERR, "Failed to query IB device %s", 1484 ibdev->name); 1485 continue; 1486 } 1487 1488 for (port = 1; port <= dev_attr.orig_attr.phys_port_cnt; 1489 port++) { 1490 struct rte_ether_addr addr; 1491 ret = get_port_mac(ibdev, port, &addr); 1492 if (ret) 1493 continue; 1494 1495 if (mac_addr && !rte_is_same_ether_addr(&addr, mac_addr)) 1496 continue; 1497 1498 ret = mana_probe_port(ibdev, &dev_attr, port, pci_dev, &addr); 1499 if (ret) { 1500 DRV_LOG(ERR, "Probe on IB port %u failed %d", port, ret); 1501 } else { 1502 count++; 1503 DRV_LOG(INFO, "Successfully probed on IB port %u", port); 1504 } 1505 } 1506 } 1507 1508 ibv_free_device_list(ibv_list); 1509 return count; 1510 } 1511 1512 /* 1513 * Main callback function from PCI bus to probe a device. 1514 */ 1515 static int 1516 mana_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, 1517 struct rte_pci_device *pci_dev) 1518 { 1519 struct rte_devargs *args = pci_dev->device.devargs; 1520 struct mana_conf conf = {0}; 1521 unsigned int i; 1522 int ret; 1523 int count = 0; 1524 1525 if (args && args->drv_str) { 1526 ret = mana_parse_args(args, &conf); 1527 if (ret) { 1528 DRV_LOG(ERR, "Failed to parse parameters args = %s", 1529 args->drv_str); 1530 return ret; 1531 } 1532 } 1533 1534 ret = mana_init_once(); 1535 if (ret) { 1536 DRV_LOG(ERR, "Failed to init PMD global data %d", ret); 1537 return ret; 1538 } 1539 1540 /* If there are no driver parameters, probe on all ports */ 1541 if (conf.index) { 1542 for (i = 0; i < conf.index; i++) 1543 count += mana_pci_probe_mac(pci_dev, 1544 &conf.mac_array[i]); 1545 } else { 1546 count = mana_pci_probe_mac(pci_dev, NULL); 1547 } 1548 1549 if (!count) { 1550 rte_memzone_free(mana_shared_mz); 1551 mana_shared_mz = NULL; 1552 ret = -ENODEV; 1553 } 1554 1555 return ret; 1556 } 1557 1558 static int 1559 mana_dev_uninit(struct rte_eth_dev *dev) 1560 { 1561 return mana_dev_close(dev); 1562 } 1563 1564 /* 1565 * Callback from PCI to remove this device. 1566 */ 1567 static int 1568 mana_pci_remove(struct rte_pci_device *pci_dev) 1569 { 1570 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 1571 rte_spinlock_lock(&mana_shared_data_lock); 1572 1573 rte_spinlock_lock(&mana_shared_data->lock); 1574 1575 RTE_VERIFY(mana_shared_data->primary_cnt > 0); 1576 mana_shared_data->primary_cnt--; 1577 if (!mana_shared_data->primary_cnt) { 1578 DRV_LOG(DEBUG, "mp uninit primary"); 1579 mana_mp_uninit_primary(); 1580 } 1581 1582 rte_spinlock_unlock(&mana_shared_data->lock); 1583 1584 /* Also free the shared memory if this is the last */ 1585 if (!mana_shared_data->primary_cnt) { 1586 DRV_LOG(DEBUG, "free shared memezone data"); 1587 rte_memzone_free(mana_shared_mz); 1588 mana_shared_mz = NULL; 1589 } 1590 1591 rte_spinlock_unlock(&mana_shared_data_lock); 1592 } else { 1593 rte_spinlock_lock(&mana_shared_data_lock); 1594 1595 rte_spinlock_lock(&mana_shared_data->lock); 1596 RTE_VERIFY(mana_shared_data->secondary_cnt > 0); 1597 mana_shared_data->secondary_cnt--; 1598 rte_spinlock_unlock(&mana_shared_data->lock); 1599 1600 RTE_VERIFY(mana_local_data.secondary_cnt > 0); 1601 mana_local_data.secondary_cnt--; 1602 if (!mana_local_data.secondary_cnt) { 1603 DRV_LOG(DEBUG, "mp uninit secondary"); 1604 mana_mp_uninit_secondary(); 1605 } 1606 1607 rte_spinlock_unlock(&mana_shared_data_lock); 1608 } 1609 1610 return rte_eth_dev_pci_generic_remove(pci_dev, mana_dev_uninit); 1611 } 1612 1613 static const struct rte_pci_id mana_pci_id_map[] = { 1614 { 1615 RTE_PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 1616 PCI_DEVICE_ID_MICROSOFT_MANA) 1617 }, 1618 { 1619 .vendor_id = 0 1620 }, 1621 }; 1622 1623 static struct rte_pci_driver mana_pci_driver = { 1624 .id_table = mana_pci_id_map, 1625 .probe = mana_pci_probe, 1626 .remove = mana_pci_remove, 1627 .drv_flags = RTE_PCI_DRV_INTR_RMV, 1628 }; 1629 1630 RTE_PMD_REGISTER_PCI(net_mana, mana_pci_driver); 1631 RTE_PMD_REGISTER_PCI_TABLE(net_mana, mana_pci_id_map); 1632 RTE_PMD_REGISTER_KMOD_DEP(net_mana, "* ib_uverbs & mana_ib"); 1633 RTE_LOG_REGISTER_SUFFIX(mana_logtype_init, init, NOTICE); 1634 RTE_LOG_REGISTER_SUFFIX(mana_logtype_driver, driver, NOTICE); 1635 RTE_PMD_REGISTER_PARAM_STRING(net_mana, ETH_MANA_MAC_ARG "=<mac_addr>"); 1636