1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 #include <ethdev_driver.h> 5 6 #include <infiniband/verbs.h> 7 #include <infiniband/manadv.h> 8 9 #include "mana.h" 10 11 static uint8_t mana_rss_hash_key_default[TOEPLITZ_HASH_KEY_SIZE_IN_BYTES] = { 12 0x2c, 0xc6, 0x81, 0xd1, 13 0x5b, 0xdb, 0xf4, 0xf7, 14 0xfc, 0xa2, 0x83, 0x19, 15 0xdb, 0x1a, 0x3e, 0x94, 16 0x6b, 0x9e, 0x38, 0xd9, 17 0x2c, 0x9c, 0x03, 0xd1, 18 0xad, 0x99, 0x44, 0xa7, 19 0xd9, 0x56, 0x3d, 0x59, 20 0x06, 0x3c, 0x25, 0xf3, 21 0xfc, 0x1f, 0xdc, 0x2a, 22 }; 23 24 int 25 mana_rq_ring_doorbell(struct mana_rxq *rxq, uint8_t arm) 26 { 27 struct mana_priv *priv = rxq->priv; 28 int ret; 29 void *db_page = priv->db_page; 30 31 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 32 struct rte_eth_dev *dev = 33 &rte_eth_devices[priv->dev_data->port_id]; 34 struct mana_process_priv *process_priv = dev->process_private; 35 36 db_page = process_priv->db_page; 37 } 38 39 ret = mana_ring_doorbell(db_page, GDMA_QUEUE_RECEIVE, 40 rxq->gdma_rq.id, 41 rxq->gdma_rq.head * GDMA_WQE_ALIGNMENT_UNIT_SIZE, 42 arm); 43 44 if (ret) 45 DP_LOG(ERR, "failed to ring RX doorbell ret %d", ret); 46 47 return ret; 48 } 49 50 static int 51 mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq) 52 { 53 struct rte_mbuf *mbuf = NULL; 54 struct gdma_sgl_element sgl[1]; 55 struct gdma_work_request request; 56 uint32_t wqe_size_in_bu; 57 struct mana_priv *priv = rxq->priv; 58 int ret; 59 struct mana_mr_cache *mr; 60 61 mbuf = rte_pktmbuf_alloc(rxq->mp); 62 if (!mbuf) { 63 rxq->stats.nombuf++; 64 return -ENOMEM; 65 } 66 67 mr = mana_find_pmd_mr(&rxq->mr_btree, priv, mbuf); 68 if (!mr) { 69 DP_LOG(ERR, "failed to register RX MR"); 70 rte_pktmbuf_free(mbuf); 71 return -ENOMEM; 72 } 73 74 request.gdma_header.struct_size = sizeof(request); 75 76 sgl[0].address = rte_cpu_to_le_64(rte_pktmbuf_mtod(mbuf, uint64_t)); 77 sgl[0].memory_key = mr->lkey; 78 sgl[0].size = 79 rte_pktmbuf_data_room_size(rxq->mp) - 80 RTE_PKTMBUF_HEADROOM; 81 82 request.sgl = sgl; 83 request.num_sgl_elements = 1; 84 request.inline_oob_data = NULL; 85 request.inline_oob_size_in_bytes = 0; 86 request.flags = 0; 87 request.client_data_unit = NOT_USING_CLIENT_DATA_UNIT; 88 89 ret = gdma_post_work_request(&rxq->gdma_rq, &request, &wqe_size_in_bu); 90 if (!ret) { 91 struct mana_rxq_desc *desc = 92 &rxq->desc_ring[rxq->desc_ring_head]; 93 94 /* update queue for tracking pending packets */ 95 desc->pkt = mbuf; 96 desc->wqe_size_in_bu = wqe_size_in_bu; 97 rxq->desc_ring_head = (rxq->desc_ring_head + 1) % rxq->num_desc; 98 } else { 99 DP_LOG(DEBUG, "failed to post recv ret %d", ret); 100 return ret; 101 } 102 103 return 0; 104 } 105 106 /* 107 * Post work requests for a Rx queue. 108 */ 109 static int 110 mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq) 111 { 112 int ret; 113 uint32_t i; 114 115 for (i = 0; i < rxq->num_desc; i++) { 116 ret = mana_alloc_and_post_rx_wqe(rxq); 117 if (ret) { 118 DP_LOG(ERR, "failed to post RX ret = %d", ret); 119 return ret; 120 } 121 } 122 123 mana_rq_ring_doorbell(rxq, rxq->num_desc); 124 125 return ret; 126 } 127 128 int 129 mana_stop_rx_queues(struct rte_eth_dev *dev) 130 { 131 struct mana_priv *priv = dev->data->dev_private; 132 int ret, i; 133 134 if (priv->rwq_qp) { 135 ret = ibv_destroy_qp(priv->rwq_qp); 136 if (ret) 137 DRV_LOG(ERR, "rx_queue destroy_qp failed %d", ret); 138 priv->rwq_qp = NULL; 139 } 140 141 if (priv->ind_table) { 142 ret = ibv_destroy_rwq_ind_table(priv->ind_table); 143 if (ret) 144 DRV_LOG(ERR, "destroy rwq ind table failed %d", ret); 145 priv->ind_table = NULL; 146 } 147 148 for (i = 0; i < priv->num_queues; i++) { 149 struct mana_rxq *rxq = dev->data->rx_queues[i]; 150 151 if (rxq->wq) { 152 ret = ibv_destroy_wq(rxq->wq); 153 if (ret) 154 DRV_LOG(ERR, 155 "rx_queue destroy_wq failed %d", ret); 156 rxq->wq = NULL; 157 } 158 159 if (rxq->cq) { 160 ret = ibv_destroy_cq(rxq->cq); 161 if (ret) 162 DRV_LOG(ERR, 163 "rx_queue destroy_cq failed %d", ret); 164 rxq->cq = NULL; 165 166 if (rxq->channel) { 167 ret = ibv_destroy_comp_channel(rxq->channel); 168 if (ret) 169 DRV_LOG(ERR, "failed destroy comp %d", 170 ret); 171 rxq->channel = NULL; 172 } 173 } 174 175 /* Drain and free posted WQEs */ 176 while (rxq->desc_ring_tail != rxq->desc_ring_head) { 177 struct mana_rxq_desc *desc = 178 &rxq->desc_ring[rxq->desc_ring_tail]; 179 180 rte_pktmbuf_free(desc->pkt); 181 182 rxq->desc_ring_tail = 183 (rxq->desc_ring_tail + 1) % rxq->num_desc; 184 } 185 rxq->desc_ring_head = 0; 186 rxq->desc_ring_tail = 0; 187 188 memset(&rxq->gdma_rq, 0, sizeof(rxq->gdma_rq)); 189 memset(&rxq->gdma_cq, 0, sizeof(rxq->gdma_cq)); 190 } 191 return 0; 192 } 193 194 int 195 mana_start_rx_queues(struct rte_eth_dev *dev) 196 { 197 struct mana_priv *priv = dev->data->dev_private; 198 int ret, i; 199 struct ibv_wq *ind_tbl[priv->num_queues]; 200 201 DRV_LOG(INFO, "start rx queues"); 202 for (i = 0; i < priv->num_queues; i++) { 203 struct mana_rxq *rxq = dev->data->rx_queues[i]; 204 struct ibv_wq_init_attr wq_attr = {}; 205 206 manadv_set_context_attr(priv->ib_ctx, 207 MANADV_CTX_ATTR_BUF_ALLOCATORS, 208 (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 209 .alloc = &mana_alloc_verbs_buf, 210 .free = &mana_free_verbs_buf, 211 .data = (void *)(uintptr_t)rxq->socket, 212 })); 213 214 if (dev->data->dev_conf.intr_conf.rxq) { 215 rxq->channel = ibv_create_comp_channel(priv->ib_ctx); 216 if (!rxq->channel) { 217 ret = -errno; 218 DRV_LOG(ERR, "Queue %d comp channel failed", i); 219 goto fail; 220 } 221 222 ret = mana_fd_set_non_blocking(rxq->channel->fd); 223 if (ret) { 224 DRV_LOG(ERR, "Failed to set comp non-blocking"); 225 goto fail; 226 } 227 } 228 229 rxq->cq = ibv_create_cq(priv->ib_ctx, rxq->num_desc, 230 NULL, rxq->channel, 231 rxq->channel ? i : 0); 232 if (!rxq->cq) { 233 ret = -errno; 234 DRV_LOG(ERR, "failed to create rx cq queue %d", i); 235 goto fail; 236 } 237 238 wq_attr.wq_type = IBV_WQT_RQ; 239 wq_attr.max_wr = rxq->num_desc; 240 wq_attr.max_sge = 1; 241 wq_attr.pd = priv->ib_parent_pd; 242 wq_attr.cq = rxq->cq; 243 244 rxq->wq = ibv_create_wq(priv->ib_ctx, &wq_attr); 245 if (!rxq->wq) { 246 ret = -errno; 247 DRV_LOG(ERR, "failed to create rx wq %d", i); 248 goto fail; 249 } 250 251 ind_tbl[i] = rxq->wq; 252 } 253 254 struct ibv_rwq_ind_table_init_attr ind_table_attr = { 255 .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)), 256 .ind_tbl = ind_tbl, 257 .comp_mask = 0, 258 }; 259 260 priv->ind_table = ibv_create_rwq_ind_table(priv->ib_ctx, 261 &ind_table_attr); 262 if (!priv->ind_table) { 263 ret = -errno; 264 DRV_LOG(ERR, "failed to create ind_table ret %d", ret); 265 goto fail; 266 } 267 268 DRV_LOG(INFO, "ind_table handle %d num %d", 269 priv->ind_table->ind_tbl_handle, 270 priv->ind_table->ind_tbl_num); 271 272 struct ibv_qp_init_attr_ex qp_attr_ex = { 273 .comp_mask = IBV_QP_INIT_ATTR_PD | 274 IBV_QP_INIT_ATTR_RX_HASH | 275 IBV_QP_INIT_ATTR_IND_TABLE, 276 .qp_type = IBV_QPT_RAW_PACKET, 277 .pd = priv->ib_parent_pd, 278 .rwq_ind_tbl = priv->ind_table, 279 .rx_hash_conf = { 280 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 281 .rx_hash_key_len = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES, 282 .rx_hash_key = mana_rss_hash_key_default, 283 .rx_hash_fields_mask = 284 IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4, 285 }, 286 287 }; 288 289 /* overwrite default if rss key is set */ 290 if (priv->rss_conf.rss_key_len && priv->rss_conf.rss_key) 291 qp_attr_ex.rx_hash_conf.rx_hash_key = 292 priv->rss_conf.rss_key; 293 294 /* overwrite default if rss hash fields are set */ 295 if (priv->rss_conf.rss_hf) { 296 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask = 0; 297 298 if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV4) 299 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 300 IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4; 301 302 if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV6) 303 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 304 IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_SRC_IPV6; 305 306 if (priv->rss_conf.rss_hf & 307 (RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV6_TCP)) 308 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 309 IBV_RX_HASH_SRC_PORT_TCP | 310 IBV_RX_HASH_DST_PORT_TCP; 311 312 if (priv->rss_conf.rss_hf & 313 (RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV6_UDP)) 314 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 315 IBV_RX_HASH_SRC_PORT_UDP | 316 IBV_RX_HASH_DST_PORT_UDP; 317 } 318 319 priv->rwq_qp = ibv_create_qp_ex(priv->ib_ctx, &qp_attr_ex); 320 if (!priv->rwq_qp) { 321 ret = -errno; 322 DRV_LOG(ERR, "rx ibv_create_qp_ex failed"); 323 goto fail; 324 } 325 326 for (i = 0; i < priv->num_queues; i++) { 327 struct mana_rxq *rxq = dev->data->rx_queues[i]; 328 struct manadv_obj obj = {}; 329 struct manadv_cq dv_cq; 330 struct manadv_rwq dv_wq; 331 332 obj.cq.in = rxq->cq; 333 obj.cq.out = &dv_cq; 334 obj.rwq.in = rxq->wq; 335 obj.rwq.out = &dv_wq; 336 ret = manadv_init_obj(&obj, MANADV_OBJ_CQ | MANADV_OBJ_RWQ); 337 if (ret) { 338 DRV_LOG(ERR, "manadv_init_obj failed ret %d", ret); 339 goto fail; 340 } 341 342 rxq->gdma_cq.buffer = obj.cq.out->buf; 343 rxq->gdma_cq.count = obj.cq.out->count; 344 rxq->gdma_cq.size = rxq->gdma_cq.count * COMP_ENTRY_SIZE; 345 rxq->gdma_cq.id = obj.cq.out->cq_id; 346 347 /* CQ head starts with count */ 348 rxq->gdma_cq.head = rxq->gdma_cq.count; 349 350 DRV_LOG(INFO, "rxq cq id %u buf %p count %u size %u", 351 rxq->gdma_cq.id, rxq->gdma_cq.buffer, 352 rxq->gdma_cq.count, rxq->gdma_cq.size); 353 354 priv->db_page = obj.rwq.out->db_page; 355 356 rxq->gdma_rq.buffer = obj.rwq.out->buf; 357 rxq->gdma_rq.count = obj.rwq.out->count; 358 rxq->gdma_rq.size = obj.rwq.out->size; 359 rxq->gdma_rq.id = obj.rwq.out->wq_id; 360 361 DRV_LOG(INFO, "rxq rq id %u buf %p count %u size %u", 362 rxq->gdma_rq.id, rxq->gdma_rq.buffer, 363 rxq->gdma_rq.count, rxq->gdma_rq.size); 364 365 rxq->comp_buf_len = 0; 366 rxq->comp_buf_idx = 0; 367 rxq->backlog_idx = 0; 368 } 369 370 for (i = 0; i < priv->num_queues; i++) { 371 ret = mana_alloc_and_post_rx_wqes(dev->data->rx_queues[i]); 372 if (ret) 373 goto fail; 374 } 375 376 return 0; 377 378 fail: 379 mana_stop_rx_queues(dev); 380 return ret; 381 } 382 383 uint16_t 384 mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 385 { 386 uint16_t pkt_received = 0; 387 uint16_t wqe_posted = 0; 388 struct mana_rxq *rxq = dpdk_rxq; 389 struct mana_priv *priv = rxq->priv; 390 struct rte_mbuf *mbuf; 391 int ret; 392 uint32_t pkt_idx = rxq->backlog_idx; 393 uint32_t pkt_len; 394 uint32_t i; 395 int polled = 0; 396 397 repoll: 398 /* Polling on new completions if we have no backlog */ 399 if (rxq->comp_buf_idx == rxq->comp_buf_len) { 400 RTE_ASSERT(!pkt_idx); 401 rxq->comp_buf_len = 402 gdma_poll_completion_queue(&rxq->gdma_cq, 403 rxq->gdma_comp_buf, pkts_n); 404 rxq->comp_buf_idx = 0; 405 polled = 1; 406 } 407 408 i = rxq->comp_buf_idx; 409 while (i < rxq->comp_buf_len) { 410 struct mana_rx_comp_oob *oob = (struct mana_rx_comp_oob *) 411 rxq->gdma_comp_buf[i].cqe_data; 412 struct mana_rxq_desc *desc = 413 &rxq->desc_ring[rxq->desc_ring_tail]; 414 415 mbuf = desc->pkt; 416 417 switch (oob->cqe_hdr.cqe_type) { 418 case CQE_RX_OKAY: 419 case CQE_RX_COALESCED_4: 420 /* Proceed to process mbuf */ 421 break; 422 423 case CQE_RX_TRUNCATED: 424 default: 425 DP_LOG(ERR, "RX CQE type %d client %d vendor %d", 426 oob->cqe_hdr.cqe_type, oob->cqe_hdr.client_type, 427 oob->cqe_hdr.vendor_err); 428 429 rxq->stats.errors++; 430 rte_pktmbuf_free(mbuf); 431 432 i++; 433 goto drop; 434 } 435 436 DP_LOG(DEBUG, "mana_rx_comp_oob type %d rxq %p", 437 oob->cqe_hdr.cqe_type, rxq); 438 439 pkt_len = oob->packet_info[pkt_idx].packet_length; 440 if (!pkt_len) { 441 /* Move on to the next completion */ 442 pkt_idx = 0; 443 i++; 444 continue; 445 } 446 447 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 448 mbuf->nb_segs = 1; 449 mbuf->next = NULL; 450 mbuf->data_len = pkt_len; 451 mbuf->pkt_len = pkt_len; 452 mbuf->port = priv->port_id; 453 454 if (oob->rx_ip_header_checksum_succeeded) 455 mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 456 457 if (oob->rx_ip_header_checksum_failed) 458 mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 459 460 if (oob->rx_outer_ip_header_checksum_failed) 461 mbuf->ol_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; 462 463 if (oob->rx_tcp_checksum_succeeded || 464 oob->rx_udp_checksum_succeeded) 465 mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 466 467 if (oob->rx_tcp_checksum_failed || 468 oob->rx_udp_checksum_failed) 469 mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 470 471 if (oob->rx_hash_type == MANA_HASH_L3 || 472 oob->rx_hash_type == MANA_HASH_L4) { 473 mbuf->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 474 mbuf->hash.rss = oob->packet_info[pkt_idx].packet_hash; 475 } 476 477 pkts[pkt_received++] = mbuf; 478 rxq->stats.packets++; 479 rxq->stats.bytes += mbuf->data_len; 480 481 pkt_idx++; 482 /* Move on the next completion if all packets are processed */ 483 if (pkt_idx >= RX_COM_OOB_NUM_PACKETINFO_SEGMENTS) { 484 pkt_idx = 0; 485 i++; 486 } 487 488 drop: 489 rxq->desc_ring_tail++; 490 if (rxq->desc_ring_tail >= rxq->num_desc) 491 rxq->desc_ring_tail = 0; 492 493 rxq->gdma_rq.tail += desc->wqe_size_in_bu; 494 495 /* Consume this request and post another request */ 496 ret = mana_alloc_and_post_rx_wqe(rxq); 497 if (ret) { 498 DP_LOG(ERR, "failed to post rx wqe ret=%d", ret); 499 break; 500 } 501 502 wqe_posted++; 503 if (pkt_received == pkts_n) 504 break; 505 } 506 507 rxq->backlog_idx = pkt_idx; 508 rxq->comp_buf_idx = i; 509 510 /* If all CQEs are processed but there are more packets to read, poll the 511 * completion queue again because we may have not polled on the completion 512 * queue due to CQE not fully processed in the previous rx_burst 513 */ 514 if (pkt_received < pkts_n && !polled) { 515 polled = 1; 516 goto repoll; 517 } 518 519 if (wqe_posted) 520 mana_rq_ring_doorbell(rxq, wqe_posted); 521 522 return pkt_received; 523 } 524 525 static int 526 mana_arm_cq(struct mana_rxq *rxq, uint8_t arm) 527 { 528 struct mana_priv *priv = rxq->priv; 529 uint32_t head = rxq->gdma_cq.head % 530 (rxq->gdma_cq.count << COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE); 531 532 DP_LOG(DEBUG, "Ringing completion queue ID %u head %u arm %d", 533 rxq->gdma_cq.id, head, arm); 534 535 return mana_ring_doorbell(priv->db_page, GDMA_QUEUE_COMPLETION, 536 rxq->gdma_cq.id, head, arm); 537 } 538 539 int 540 mana_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 541 { 542 struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id]; 543 544 return mana_arm_cq(rxq, 1); 545 } 546 547 int 548 mana_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 549 { 550 struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id]; 551 struct ibv_cq *ev_cq; 552 void *ev_ctx; 553 int ret; 554 555 ret = ibv_get_cq_event(rxq->channel, &ev_cq, &ev_ctx); 556 if (ret) 557 ret = errno; 558 else if (ev_cq != rxq->cq) 559 ret = EINVAL; 560 561 if (ret) { 562 if (ret != EAGAIN) 563 DP_LOG(ERR, "Can't disable RX intr queue %d", 564 rx_queue_id); 565 } else { 566 ibv_ack_cq_events(rxq->cq, 1); 567 } 568 569 return -ret; 570 } 571