1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 #include <ethdev_driver.h> 5 6 #include <infiniband/verbs.h> 7 #include <infiniband/manadv.h> 8 9 #include "mana.h" 10 11 static uint8_t mana_rss_hash_key_default[TOEPLITZ_HASH_KEY_SIZE_IN_BYTES] = { 12 0x2c, 0xc6, 0x81, 0xd1, 13 0x5b, 0xdb, 0xf4, 0xf7, 14 0xfc, 0xa2, 0x83, 0x19, 15 0xdb, 0x1a, 0x3e, 0x94, 16 0x6b, 0x9e, 0x38, 0xd9, 17 0x2c, 0x9c, 0x03, 0xd1, 18 0xad, 0x99, 0x44, 0xa7, 19 0xd9, 0x56, 0x3d, 0x59, 20 0x06, 0x3c, 0x25, 0xf3, 21 0xfc, 0x1f, 0xdc, 0x2a, 22 }; 23 24 int 25 mana_rq_ring_doorbell(struct mana_rxq *rxq) 26 { 27 struct mana_priv *priv = rxq->priv; 28 int ret; 29 void *db_page = priv->db_page; 30 31 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 32 struct rte_eth_dev *dev = 33 &rte_eth_devices[priv->dev_data->port_id]; 34 struct mana_process_priv *process_priv = dev->process_private; 35 36 db_page = process_priv->db_page; 37 } 38 39 /* Hardware Spec specifies that software client should set 0 for 40 * wqe_cnt for Receive Queues. 41 */ 42 #ifdef RTE_ARCH_32 43 ret = mana_ring_short_doorbell(db_page, GDMA_QUEUE_RECEIVE, 44 rxq->gdma_rq.id, 45 rxq->wqe_cnt_to_short_db * 46 GDMA_WQE_ALIGNMENT_UNIT_SIZE, 47 0); 48 #else 49 ret = mana_ring_doorbell(db_page, GDMA_QUEUE_RECEIVE, 50 rxq->gdma_rq.id, 51 rxq->gdma_rq.head * GDMA_WQE_ALIGNMENT_UNIT_SIZE, 52 0); 53 #endif 54 55 if (ret) 56 DP_LOG(ERR, "failed to ring RX doorbell ret %d", ret); 57 58 return ret; 59 } 60 61 static int 62 mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq) 63 { 64 struct rte_mbuf *mbuf = NULL; 65 struct gdma_sgl_element sgl[1]; 66 struct gdma_work_request request; 67 uint32_t wqe_size_in_bu; 68 struct mana_priv *priv = rxq->priv; 69 int ret; 70 struct mana_mr_cache *mr; 71 72 mbuf = rte_pktmbuf_alloc(rxq->mp); 73 if (!mbuf) { 74 rxq->stats.nombuf++; 75 return -ENOMEM; 76 } 77 78 mr = mana_find_pmd_mr(&rxq->mr_btree, priv, mbuf); 79 if (!mr) { 80 DP_LOG(ERR, "failed to register RX MR"); 81 rte_pktmbuf_free(mbuf); 82 return -ENOMEM; 83 } 84 85 request.gdma_header.struct_size = sizeof(request); 86 87 sgl[0].address = rte_cpu_to_le_64(rte_pktmbuf_mtod(mbuf, uint64_t)); 88 sgl[0].memory_key = mr->lkey; 89 sgl[0].size = 90 rte_pktmbuf_data_room_size(rxq->mp) - 91 RTE_PKTMBUF_HEADROOM; 92 93 request.sgl = sgl; 94 request.num_sgl_elements = 1; 95 request.inline_oob_data = NULL; 96 request.inline_oob_size_in_bytes = 0; 97 request.flags = 0; 98 request.client_data_unit = NOT_USING_CLIENT_DATA_UNIT; 99 100 ret = gdma_post_work_request(&rxq->gdma_rq, &request, &wqe_size_in_bu); 101 if (!ret) { 102 struct mana_rxq_desc *desc = 103 &rxq->desc_ring[rxq->desc_ring_head]; 104 105 /* update queue for tracking pending packets */ 106 desc->pkt = mbuf; 107 desc->wqe_size_in_bu = wqe_size_in_bu; 108 #ifdef RTE_ARCH_32 109 rxq->wqe_cnt_to_short_db += wqe_size_in_bu; 110 #endif 111 rxq->desc_ring_head = (rxq->desc_ring_head + 1) % rxq->num_desc; 112 } else { 113 DP_LOG(DEBUG, "failed to post recv ret %d", ret); 114 return ret; 115 } 116 117 return 0; 118 } 119 120 /* 121 * Post work requests for a Rx queue. 122 */ 123 static int 124 mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq) 125 { 126 int ret; 127 uint32_t i; 128 129 #ifdef RTE_ARCH_32 130 rxq->wqe_cnt_to_short_db = 0; 131 #endif 132 for (i = 0; i < rxq->num_desc; i++) { 133 ret = mana_alloc_and_post_rx_wqe(rxq); 134 if (ret) { 135 DP_LOG(ERR, "failed to post RX ret = %d", ret); 136 return ret; 137 } 138 139 #ifdef RTE_ARCH_32 140 if (rxq->wqe_cnt_to_short_db > RX_WQE_SHORT_DB_THRESHOLD) { 141 mana_rq_ring_doorbell(rxq); 142 rxq->wqe_cnt_to_short_db = 0; 143 } 144 #endif 145 } 146 147 mana_rq_ring_doorbell(rxq); 148 149 return ret; 150 } 151 152 int 153 mana_stop_rx_queues(struct rte_eth_dev *dev) 154 { 155 struct mana_priv *priv = dev->data->dev_private; 156 int ret, i; 157 158 if (priv->rwq_qp) { 159 ret = ibv_destroy_qp(priv->rwq_qp); 160 if (ret) 161 DRV_LOG(ERR, "rx_queue destroy_qp failed %d", ret); 162 priv->rwq_qp = NULL; 163 } 164 165 if (priv->ind_table) { 166 ret = ibv_destroy_rwq_ind_table(priv->ind_table); 167 if (ret) 168 DRV_LOG(ERR, "destroy rwq ind table failed %d", ret); 169 priv->ind_table = NULL; 170 } 171 172 for (i = 0; i < priv->num_queues; i++) { 173 struct mana_rxq *rxq = dev->data->rx_queues[i]; 174 175 if (rxq->wq) { 176 ret = ibv_destroy_wq(rxq->wq); 177 if (ret) 178 DRV_LOG(ERR, 179 "rx_queue destroy_wq failed %d", ret); 180 rxq->wq = NULL; 181 } 182 183 if (rxq->cq) { 184 ret = ibv_destroy_cq(rxq->cq); 185 if (ret) 186 DRV_LOG(ERR, 187 "rx_queue destroy_cq failed %d", ret); 188 rxq->cq = NULL; 189 190 if (rxq->channel) { 191 ret = ibv_destroy_comp_channel(rxq->channel); 192 if (ret) 193 DRV_LOG(ERR, "failed destroy comp %d", 194 ret); 195 rxq->channel = NULL; 196 } 197 } 198 199 /* Drain and free posted WQEs */ 200 while (rxq->desc_ring_tail != rxq->desc_ring_head) { 201 struct mana_rxq_desc *desc = 202 &rxq->desc_ring[rxq->desc_ring_tail]; 203 204 rte_pktmbuf_free(desc->pkt); 205 206 rxq->desc_ring_tail = 207 (rxq->desc_ring_tail + 1) % rxq->num_desc; 208 } 209 rxq->desc_ring_head = 0; 210 rxq->desc_ring_tail = 0; 211 212 memset(&rxq->gdma_rq, 0, sizeof(rxq->gdma_rq)); 213 memset(&rxq->gdma_cq, 0, sizeof(rxq->gdma_cq)); 214 } 215 return 0; 216 } 217 218 int 219 mana_start_rx_queues(struct rte_eth_dev *dev) 220 { 221 struct mana_priv *priv = dev->data->dev_private; 222 int ret, i; 223 struct ibv_wq *ind_tbl[priv->num_queues]; 224 225 DRV_LOG(INFO, "start rx queues"); 226 for (i = 0; i < priv->num_queues; i++) { 227 struct mana_rxq *rxq = dev->data->rx_queues[i]; 228 struct ibv_wq_init_attr wq_attr = {}; 229 230 manadv_set_context_attr(priv->ib_ctx, 231 MANADV_CTX_ATTR_BUF_ALLOCATORS, 232 (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 233 .alloc = &mana_alloc_verbs_buf, 234 .free = &mana_free_verbs_buf, 235 .data = (void *)(uintptr_t)rxq->socket, 236 })); 237 238 if (dev->data->dev_conf.intr_conf.rxq) { 239 rxq->channel = ibv_create_comp_channel(priv->ib_ctx); 240 if (!rxq->channel) { 241 ret = -errno; 242 DRV_LOG(ERR, "Queue %d comp channel failed", i); 243 goto fail; 244 } 245 246 ret = mana_fd_set_non_blocking(rxq->channel->fd); 247 if (ret) { 248 DRV_LOG(ERR, "Failed to set comp non-blocking"); 249 goto fail; 250 } 251 } 252 253 rxq->cq = ibv_create_cq(priv->ib_ctx, rxq->num_desc, 254 NULL, rxq->channel, 255 rxq->channel ? i : 0); 256 if (!rxq->cq) { 257 ret = -errno; 258 DRV_LOG(ERR, "failed to create rx cq queue %d", i); 259 goto fail; 260 } 261 262 wq_attr.wq_type = IBV_WQT_RQ; 263 wq_attr.max_wr = rxq->num_desc; 264 wq_attr.max_sge = 1; 265 wq_attr.pd = priv->ib_parent_pd; 266 wq_attr.cq = rxq->cq; 267 268 rxq->wq = ibv_create_wq(priv->ib_ctx, &wq_attr); 269 if (!rxq->wq) { 270 ret = -errno; 271 DRV_LOG(ERR, "failed to create rx wq %d", i); 272 goto fail; 273 } 274 275 ind_tbl[i] = rxq->wq; 276 } 277 278 struct ibv_rwq_ind_table_init_attr ind_table_attr = { 279 .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)), 280 .ind_tbl = ind_tbl, 281 .comp_mask = 0, 282 }; 283 284 priv->ind_table = ibv_create_rwq_ind_table(priv->ib_ctx, 285 &ind_table_attr); 286 if (!priv->ind_table) { 287 ret = -errno; 288 DRV_LOG(ERR, "failed to create ind_table ret %d", ret); 289 goto fail; 290 } 291 292 DRV_LOG(INFO, "ind_table handle %d num %d", 293 priv->ind_table->ind_tbl_handle, 294 priv->ind_table->ind_tbl_num); 295 296 struct ibv_qp_init_attr_ex qp_attr_ex = { 297 .comp_mask = IBV_QP_INIT_ATTR_PD | 298 IBV_QP_INIT_ATTR_RX_HASH | 299 IBV_QP_INIT_ATTR_IND_TABLE, 300 .qp_type = IBV_QPT_RAW_PACKET, 301 .pd = priv->ib_parent_pd, 302 .rwq_ind_tbl = priv->ind_table, 303 .rx_hash_conf = { 304 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 305 .rx_hash_key_len = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES, 306 .rx_hash_key = mana_rss_hash_key_default, 307 .rx_hash_fields_mask = 308 IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4, 309 }, 310 311 }; 312 313 /* overwrite default if rss key is set */ 314 if (priv->rss_conf.rss_key_len && priv->rss_conf.rss_key) 315 qp_attr_ex.rx_hash_conf.rx_hash_key = 316 priv->rss_conf.rss_key; 317 318 /* overwrite default if rss hash fields are set */ 319 if (priv->rss_conf.rss_hf) { 320 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask = 0; 321 322 if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV4) 323 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 324 IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4; 325 326 if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV6) 327 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 328 IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_SRC_IPV6; 329 330 if (priv->rss_conf.rss_hf & 331 (RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV6_TCP)) 332 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 333 IBV_RX_HASH_SRC_PORT_TCP | 334 IBV_RX_HASH_DST_PORT_TCP; 335 336 if (priv->rss_conf.rss_hf & 337 (RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV6_UDP)) 338 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 339 IBV_RX_HASH_SRC_PORT_UDP | 340 IBV_RX_HASH_DST_PORT_UDP; 341 } 342 343 priv->rwq_qp = ibv_create_qp_ex(priv->ib_ctx, &qp_attr_ex); 344 if (!priv->rwq_qp) { 345 ret = -errno; 346 DRV_LOG(ERR, "rx ibv_create_qp_ex failed"); 347 goto fail; 348 } 349 350 for (i = 0; i < priv->num_queues; i++) { 351 struct mana_rxq *rxq = dev->data->rx_queues[i]; 352 struct manadv_obj obj = {}; 353 struct manadv_cq dv_cq; 354 struct manadv_rwq dv_wq; 355 356 obj.cq.in = rxq->cq; 357 obj.cq.out = &dv_cq; 358 obj.rwq.in = rxq->wq; 359 obj.rwq.out = &dv_wq; 360 ret = manadv_init_obj(&obj, MANADV_OBJ_CQ | MANADV_OBJ_RWQ); 361 if (ret) { 362 DRV_LOG(ERR, "manadv_init_obj failed ret %d", ret); 363 goto fail; 364 } 365 366 rxq->gdma_cq.buffer = obj.cq.out->buf; 367 rxq->gdma_cq.count = obj.cq.out->count; 368 rxq->gdma_cq.size = rxq->gdma_cq.count * COMP_ENTRY_SIZE; 369 rxq->gdma_cq.id = obj.cq.out->cq_id; 370 371 /* CQ head starts with count */ 372 rxq->gdma_cq.head = rxq->gdma_cq.count; 373 374 DRV_LOG(INFO, "rxq cq id %u buf %p count %u size %u", 375 rxq->gdma_cq.id, rxq->gdma_cq.buffer, 376 rxq->gdma_cq.count, rxq->gdma_cq.size); 377 378 priv->db_page = obj.rwq.out->db_page; 379 380 rxq->gdma_rq.buffer = obj.rwq.out->buf; 381 rxq->gdma_rq.count = obj.rwq.out->count; 382 rxq->gdma_rq.size = obj.rwq.out->size; 383 rxq->gdma_rq.id = obj.rwq.out->wq_id; 384 385 DRV_LOG(INFO, "rxq rq id %u buf %p count %u size %u", 386 rxq->gdma_rq.id, rxq->gdma_rq.buffer, 387 rxq->gdma_rq.count, rxq->gdma_rq.size); 388 389 rxq->comp_buf_len = 0; 390 rxq->comp_buf_idx = 0; 391 rxq->backlog_idx = 0; 392 } 393 394 for (i = 0; i < priv->num_queues; i++) { 395 ret = mana_alloc_and_post_rx_wqes(dev->data->rx_queues[i]); 396 if (ret) 397 goto fail; 398 } 399 400 return 0; 401 402 fail: 403 mana_stop_rx_queues(dev); 404 return ret; 405 } 406 407 uint16_t 408 mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 409 { 410 uint16_t pkt_received = 0; 411 uint16_t wqe_posted = 0; 412 struct mana_rxq *rxq = dpdk_rxq; 413 struct mana_priv *priv = rxq->priv; 414 struct rte_mbuf *mbuf; 415 int ret; 416 uint32_t pkt_idx = rxq->backlog_idx; 417 uint32_t pkt_len; 418 uint32_t i; 419 int polled = 0; 420 421 #ifdef RTE_ARCH_32 422 rxq->wqe_cnt_to_short_db = 0; 423 #endif 424 425 repoll: 426 /* Polling on new completions if we have no backlog */ 427 if (rxq->comp_buf_idx == rxq->comp_buf_len) { 428 RTE_ASSERT(!pkt_idx); 429 rxq->comp_buf_len = 430 gdma_poll_completion_queue(&rxq->gdma_cq, 431 rxq->gdma_comp_buf, pkts_n); 432 rxq->comp_buf_idx = 0; 433 polled = 1; 434 } 435 436 i = rxq->comp_buf_idx; 437 while (i < rxq->comp_buf_len) { 438 struct mana_rx_comp_oob *oob = (struct mana_rx_comp_oob *) 439 rxq->gdma_comp_buf[i].cqe_data; 440 struct mana_rxq_desc *desc = 441 &rxq->desc_ring[rxq->desc_ring_tail]; 442 443 mbuf = desc->pkt; 444 445 switch (oob->cqe_hdr.cqe_type) { 446 case CQE_RX_OKAY: 447 case CQE_RX_COALESCED_4: 448 /* Proceed to process mbuf */ 449 break; 450 451 case CQE_RX_TRUNCATED: 452 default: 453 DP_LOG(ERR, "RX CQE type %d client %d vendor %d", 454 oob->cqe_hdr.cqe_type, oob->cqe_hdr.client_type, 455 oob->cqe_hdr.vendor_err); 456 457 rxq->stats.errors++; 458 rte_pktmbuf_free(mbuf); 459 460 i++; 461 goto drop; 462 } 463 464 DP_LOG(DEBUG, "mana_rx_comp_oob type %d rxq %p", 465 oob->cqe_hdr.cqe_type, rxq); 466 467 pkt_len = oob->packet_info[pkt_idx].packet_length; 468 if (!pkt_len) { 469 /* Move on to the next completion */ 470 pkt_idx = 0; 471 i++; 472 continue; 473 } 474 475 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 476 mbuf->nb_segs = 1; 477 mbuf->next = NULL; 478 mbuf->data_len = pkt_len; 479 mbuf->pkt_len = pkt_len; 480 mbuf->port = priv->port_id; 481 482 if (oob->rx_ip_header_checksum_succeeded) 483 mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 484 485 if (oob->rx_ip_header_checksum_failed) 486 mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 487 488 if (oob->rx_outer_ip_header_checksum_failed) 489 mbuf->ol_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; 490 491 if (oob->rx_tcp_checksum_succeeded || 492 oob->rx_udp_checksum_succeeded) 493 mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 494 495 if (oob->rx_tcp_checksum_failed || 496 oob->rx_udp_checksum_failed) 497 mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 498 499 if (oob->rx_hash_type == MANA_HASH_L3 || 500 oob->rx_hash_type == MANA_HASH_L4) { 501 mbuf->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 502 mbuf->hash.rss = oob->packet_info[pkt_idx].packet_hash; 503 } 504 505 pkts[pkt_received++] = mbuf; 506 rxq->stats.packets++; 507 rxq->stats.bytes += mbuf->data_len; 508 509 pkt_idx++; 510 /* Move on the next completion if all packets are processed */ 511 if (pkt_idx >= RX_COM_OOB_NUM_PACKETINFO_SEGMENTS) { 512 pkt_idx = 0; 513 i++; 514 } 515 516 drop: 517 rxq->desc_ring_tail++; 518 if (rxq->desc_ring_tail >= rxq->num_desc) 519 rxq->desc_ring_tail = 0; 520 521 rxq->gdma_rq.tail += desc->wqe_size_in_bu; 522 523 /* Consume this request and post another request */ 524 ret = mana_alloc_and_post_rx_wqe(rxq); 525 if (ret) { 526 DP_LOG(ERR, "failed to post rx wqe ret=%d", ret); 527 break; 528 } 529 530 wqe_posted++; 531 if (pkt_received == pkts_n) 532 break; 533 534 #ifdef RTE_ARCH_32 535 /* Ring short doorbell if approaching the wqe increment 536 * limit. 537 */ 538 if (rxq->wqe_cnt_to_short_db > RX_WQE_SHORT_DB_THRESHOLD) { 539 mana_rq_ring_doorbell(rxq); 540 rxq->wqe_cnt_to_short_db = 0; 541 } 542 #endif 543 } 544 545 rxq->backlog_idx = pkt_idx; 546 rxq->comp_buf_idx = i; 547 548 /* If all CQEs are processed but there are more packets to read, poll the 549 * completion queue again because we may have not polled on the completion 550 * queue due to CQE not fully processed in the previous rx_burst 551 */ 552 if (pkt_received < pkts_n && !polled) { 553 polled = 1; 554 goto repoll; 555 } 556 557 if (wqe_posted) 558 mana_rq_ring_doorbell(rxq); 559 560 return pkt_received; 561 } 562 563 #ifdef RTE_ARCH_32 564 static int 565 mana_arm_cq(struct mana_rxq *rxq __rte_unused, uint8_t arm __rte_unused) 566 { 567 DP_LOG(ERR, "Do not support in 32 bit"); 568 569 return -ENODEV; 570 } 571 #else 572 static int 573 mana_arm_cq(struct mana_rxq *rxq, uint8_t arm) 574 { 575 struct mana_priv *priv = rxq->priv; 576 uint32_t head = rxq->gdma_cq.head % 577 (rxq->gdma_cq.count << COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE); 578 579 DP_LOG(DEBUG, "Ringing completion queue ID %u head %u arm %d", 580 rxq->gdma_cq.id, head, arm); 581 582 return mana_ring_doorbell(priv->db_page, GDMA_QUEUE_COMPLETION, 583 rxq->gdma_cq.id, head, arm); 584 } 585 #endif 586 587 int 588 mana_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 589 { 590 struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id]; 591 592 return mana_arm_cq(rxq, 1); 593 } 594 595 int 596 mana_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 597 { 598 struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id]; 599 struct ibv_cq *ev_cq; 600 void *ev_ctx; 601 int ret; 602 603 ret = ibv_get_cq_event(rxq->channel, &ev_cq, &ev_ctx); 604 if (ret) 605 ret = errno; 606 else if (ev_cq != rxq->cq) 607 ret = EINVAL; 608 609 if (ret) { 610 if (ret != EAGAIN) 611 DP_LOG(ERR, "Can't disable RX intr queue %d", 612 rx_queue_id); 613 } else { 614 ibv_ack_cq_events(rxq->cq, 1); 615 } 616 617 return -ret; 618 } 619