1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2022 Microsoft Corporation 3 */ 4 #include <ethdev_driver.h> 5 6 #include <infiniband/verbs.h> 7 #include <infiniband/manadv.h> 8 9 #include "mana.h" 10 11 static uint8_t mana_rss_hash_key_default[TOEPLITZ_HASH_KEY_SIZE_IN_BYTES] = { 12 0x2c, 0xc6, 0x81, 0xd1, 13 0x5b, 0xdb, 0xf4, 0xf7, 14 0xfc, 0xa2, 0x83, 0x19, 15 0xdb, 0x1a, 0x3e, 0x94, 16 0x6b, 0x9e, 0x38, 0xd9, 17 0x2c, 0x9c, 0x03, 0xd1, 18 0xad, 0x99, 0x44, 0xa7, 19 0xd9, 0x56, 0x3d, 0x59, 20 0x06, 0x3c, 0x25, 0xf3, 21 0xfc, 0x1f, 0xdc, 0x2a, 22 }; 23 24 int 25 mana_rq_ring_doorbell(struct mana_rxq *rxq, uint8_t arm) 26 { 27 struct mana_priv *priv = rxq->priv; 28 int ret; 29 void *db_page = priv->db_page; 30 31 if (rte_eal_process_type() == RTE_PROC_SECONDARY) { 32 struct rte_eth_dev *dev = 33 &rte_eth_devices[priv->dev_data->port_id]; 34 struct mana_process_priv *process_priv = dev->process_private; 35 36 db_page = process_priv->db_page; 37 } 38 39 ret = mana_ring_doorbell(db_page, GDMA_QUEUE_RECEIVE, 40 rxq->gdma_rq.id, 41 rxq->gdma_rq.head * GDMA_WQE_ALIGNMENT_UNIT_SIZE, 42 arm); 43 44 if (ret) 45 DRV_LOG(ERR, "failed to ring RX doorbell ret %d", ret); 46 47 return ret; 48 } 49 50 static int 51 mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq) 52 { 53 struct rte_mbuf *mbuf = NULL; 54 struct gdma_sgl_element sgl[1]; 55 struct gdma_work_request request = {0}; 56 struct gdma_posted_wqe_info wqe_info = {0}; 57 struct mana_priv *priv = rxq->priv; 58 int ret; 59 struct mana_mr_cache *mr; 60 61 mbuf = rte_pktmbuf_alloc(rxq->mp); 62 if (!mbuf) { 63 rxq->stats.nombuf++; 64 return -ENOMEM; 65 } 66 67 mr = mana_find_pmd_mr(&rxq->mr_btree, priv, mbuf); 68 if (!mr) { 69 DRV_LOG(ERR, "failed to register RX MR"); 70 rte_pktmbuf_free(mbuf); 71 return -ENOMEM; 72 } 73 74 request.gdma_header.struct_size = sizeof(request); 75 wqe_info.gdma_header.struct_size = sizeof(wqe_info); 76 77 sgl[0].address = rte_cpu_to_le_64(rte_pktmbuf_mtod(mbuf, uint64_t)); 78 sgl[0].memory_key = mr->lkey; 79 sgl[0].size = 80 rte_pktmbuf_data_room_size(rxq->mp) - 81 RTE_PKTMBUF_HEADROOM; 82 83 request.sgl = sgl; 84 request.num_sgl_elements = 1; 85 request.inline_oob_data = NULL; 86 request.inline_oob_size_in_bytes = 0; 87 request.flags = 0; 88 request.client_data_unit = NOT_USING_CLIENT_DATA_UNIT; 89 90 ret = gdma_post_work_request(&rxq->gdma_rq, &request, &wqe_info); 91 if (!ret) { 92 struct mana_rxq_desc *desc = 93 &rxq->desc_ring[rxq->desc_ring_head]; 94 95 /* update queue for tracking pending packets */ 96 desc->pkt = mbuf; 97 desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu; 98 rxq->desc_ring_head = (rxq->desc_ring_head + 1) % rxq->num_desc; 99 } else { 100 DRV_LOG(ERR, "failed to post recv ret %d", ret); 101 return ret; 102 } 103 104 return 0; 105 } 106 107 /* 108 * Post work requests for a Rx queue. 109 */ 110 static int 111 mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq) 112 { 113 int ret; 114 uint32_t i; 115 116 for (i = 0; i < rxq->num_desc; i++) { 117 ret = mana_alloc_and_post_rx_wqe(rxq); 118 if (ret) { 119 DRV_LOG(ERR, "failed to post RX ret = %d", ret); 120 return ret; 121 } 122 } 123 124 mana_rq_ring_doorbell(rxq, rxq->num_desc); 125 126 return ret; 127 } 128 129 int 130 mana_stop_rx_queues(struct rte_eth_dev *dev) 131 { 132 struct mana_priv *priv = dev->data->dev_private; 133 int ret, i; 134 135 if (priv->rwq_qp) { 136 ret = ibv_destroy_qp(priv->rwq_qp); 137 if (ret) 138 DRV_LOG(ERR, "rx_queue destroy_qp failed %d", ret); 139 priv->rwq_qp = NULL; 140 } 141 142 if (priv->ind_table) { 143 ret = ibv_destroy_rwq_ind_table(priv->ind_table); 144 if (ret) 145 DRV_LOG(ERR, "destroy rwq ind table failed %d", ret); 146 priv->ind_table = NULL; 147 } 148 149 for (i = 0; i < priv->num_queues; i++) { 150 struct mana_rxq *rxq = dev->data->rx_queues[i]; 151 152 if (rxq->wq) { 153 ret = ibv_destroy_wq(rxq->wq); 154 if (ret) 155 DRV_LOG(ERR, 156 "rx_queue destroy_wq failed %d", ret); 157 rxq->wq = NULL; 158 } 159 160 if (rxq->cq) { 161 ret = ibv_destroy_cq(rxq->cq); 162 if (ret) 163 DRV_LOG(ERR, 164 "rx_queue destroy_cq failed %d", ret); 165 rxq->cq = NULL; 166 167 if (rxq->channel) { 168 ret = ibv_destroy_comp_channel(rxq->channel); 169 if (ret) 170 DRV_LOG(ERR, "failed destroy comp %d", 171 ret); 172 rxq->channel = NULL; 173 } 174 } 175 176 /* Drain and free posted WQEs */ 177 while (rxq->desc_ring_tail != rxq->desc_ring_head) { 178 struct mana_rxq_desc *desc = 179 &rxq->desc_ring[rxq->desc_ring_tail]; 180 181 rte_pktmbuf_free(desc->pkt); 182 183 rxq->desc_ring_tail = 184 (rxq->desc_ring_tail + 1) % rxq->num_desc; 185 } 186 rxq->desc_ring_head = 0; 187 rxq->desc_ring_tail = 0; 188 189 memset(&rxq->gdma_rq, 0, sizeof(rxq->gdma_rq)); 190 memset(&rxq->gdma_cq, 0, sizeof(rxq->gdma_cq)); 191 } 192 return 0; 193 } 194 195 int 196 mana_start_rx_queues(struct rte_eth_dev *dev) 197 { 198 struct mana_priv *priv = dev->data->dev_private; 199 int ret, i; 200 struct ibv_wq *ind_tbl[priv->num_queues]; 201 202 DRV_LOG(INFO, "start rx queues"); 203 for (i = 0; i < priv->num_queues; i++) { 204 struct mana_rxq *rxq = dev->data->rx_queues[i]; 205 struct ibv_wq_init_attr wq_attr = {}; 206 207 manadv_set_context_attr(priv->ib_ctx, 208 MANADV_CTX_ATTR_BUF_ALLOCATORS, 209 (void *)((uintptr_t)&(struct manadv_ctx_allocators){ 210 .alloc = &mana_alloc_verbs_buf, 211 .free = &mana_free_verbs_buf, 212 .data = (void *)(uintptr_t)rxq->socket, 213 })); 214 215 if (dev->data->dev_conf.intr_conf.rxq) { 216 rxq->channel = ibv_create_comp_channel(priv->ib_ctx); 217 if (!rxq->channel) { 218 ret = -errno; 219 DRV_LOG(ERR, "Queue %d comp channel failed", i); 220 goto fail; 221 } 222 223 ret = mana_fd_set_non_blocking(rxq->channel->fd); 224 if (ret) { 225 DRV_LOG(ERR, "Failed to set comp non-blocking"); 226 goto fail; 227 } 228 } 229 230 rxq->cq = ibv_create_cq(priv->ib_ctx, rxq->num_desc, 231 NULL, rxq->channel, 232 rxq->channel ? i : 0); 233 if (!rxq->cq) { 234 ret = -errno; 235 DRV_LOG(ERR, "failed to create rx cq queue %d", i); 236 goto fail; 237 } 238 239 wq_attr.wq_type = IBV_WQT_RQ; 240 wq_attr.max_wr = rxq->num_desc; 241 wq_attr.max_sge = 1; 242 wq_attr.pd = priv->ib_parent_pd; 243 wq_attr.cq = rxq->cq; 244 245 rxq->wq = ibv_create_wq(priv->ib_ctx, &wq_attr); 246 if (!rxq->wq) { 247 ret = -errno; 248 DRV_LOG(ERR, "failed to create rx wq %d", i); 249 goto fail; 250 } 251 252 ind_tbl[i] = rxq->wq; 253 } 254 255 struct ibv_rwq_ind_table_init_attr ind_table_attr = { 256 .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)), 257 .ind_tbl = ind_tbl, 258 .comp_mask = 0, 259 }; 260 261 priv->ind_table = ibv_create_rwq_ind_table(priv->ib_ctx, 262 &ind_table_attr); 263 if (!priv->ind_table) { 264 ret = -errno; 265 DRV_LOG(ERR, "failed to create ind_table ret %d", ret); 266 goto fail; 267 } 268 269 DRV_LOG(INFO, "ind_table handle %d num %d", 270 priv->ind_table->ind_tbl_handle, 271 priv->ind_table->ind_tbl_num); 272 273 struct ibv_qp_init_attr_ex qp_attr_ex = { 274 .comp_mask = IBV_QP_INIT_ATTR_PD | 275 IBV_QP_INIT_ATTR_RX_HASH | 276 IBV_QP_INIT_ATTR_IND_TABLE, 277 .qp_type = IBV_QPT_RAW_PACKET, 278 .pd = priv->ib_parent_pd, 279 .rwq_ind_tbl = priv->ind_table, 280 .rx_hash_conf = { 281 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ, 282 .rx_hash_key_len = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES, 283 .rx_hash_key = mana_rss_hash_key_default, 284 .rx_hash_fields_mask = 285 IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4, 286 }, 287 288 }; 289 290 /* overwrite default if rss key is set */ 291 if (priv->rss_conf.rss_key_len && priv->rss_conf.rss_key) 292 qp_attr_ex.rx_hash_conf.rx_hash_key = 293 priv->rss_conf.rss_key; 294 295 /* overwrite default if rss hash fields are set */ 296 if (priv->rss_conf.rss_hf) { 297 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask = 0; 298 299 if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV4) 300 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 301 IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4; 302 303 if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV6) 304 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 305 IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_SRC_IPV6; 306 307 if (priv->rss_conf.rss_hf & 308 (RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV6_TCP)) 309 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 310 IBV_RX_HASH_SRC_PORT_TCP | 311 IBV_RX_HASH_DST_PORT_TCP; 312 313 if (priv->rss_conf.rss_hf & 314 (RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV6_UDP)) 315 qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |= 316 IBV_RX_HASH_SRC_PORT_UDP | 317 IBV_RX_HASH_DST_PORT_UDP; 318 } 319 320 priv->rwq_qp = ibv_create_qp_ex(priv->ib_ctx, &qp_attr_ex); 321 if (!priv->rwq_qp) { 322 ret = -errno; 323 DRV_LOG(ERR, "rx ibv_create_qp_ex failed"); 324 goto fail; 325 } 326 327 for (i = 0; i < priv->num_queues; i++) { 328 struct mana_rxq *rxq = dev->data->rx_queues[i]; 329 struct manadv_obj obj = {}; 330 struct manadv_cq dv_cq; 331 struct manadv_rwq dv_wq; 332 333 obj.cq.in = rxq->cq; 334 obj.cq.out = &dv_cq; 335 obj.rwq.in = rxq->wq; 336 obj.rwq.out = &dv_wq; 337 ret = manadv_init_obj(&obj, MANADV_OBJ_CQ | MANADV_OBJ_RWQ); 338 if (ret) { 339 DRV_LOG(ERR, "manadv_init_obj failed ret %d", ret); 340 goto fail; 341 } 342 343 rxq->gdma_cq.buffer = obj.cq.out->buf; 344 rxq->gdma_cq.count = obj.cq.out->count; 345 rxq->gdma_cq.size = rxq->gdma_cq.count * COMP_ENTRY_SIZE; 346 rxq->gdma_cq.id = obj.cq.out->cq_id; 347 348 /* CQ head starts with count */ 349 rxq->gdma_cq.head = rxq->gdma_cq.count; 350 351 DRV_LOG(INFO, "rxq cq id %u buf %p count %u size %u", 352 rxq->gdma_cq.id, rxq->gdma_cq.buffer, 353 rxq->gdma_cq.count, rxq->gdma_cq.size); 354 355 priv->db_page = obj.rwq.out->db_page; 356 357 rxq->gdma_rq.buffer = obj.rwq.out->buf; 358 rxq->gdma_rq.count = obj.rwq.out->count; 359 rxq->gdma_rq.size = obj.rwq.out->size; 360 rxq->gdma_rq.id = obj.rwq.out->wq_id; 361 362 DRV_LOG(INFO, "rxq rq id %u buf %p count %u size %u", 363 rxq->gdma_rq.id, rxq->gdma_rq.buffer, 364 rxq->gdma_rq.count, rxq->gdma_rq.size); 365 } 366 367 for (i = 0; i < priv->num_queues; i++) { 368 ret = mana_alloc_and_post_rx_wqes(dev->data->rx_queues[i]); 369 if (ret) 370 goto fail; 371 } 372 373 return 0; 374 375 fail: 376 mana_stop_rx_queues(dev); 377 return ret; 378 } 379 380 uint16_t 381 mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 382 { 383 uint16_t pkt_received = 0; 384 uint8_t wqe_posted = 0; 385 struct mana_rxq *rxq = dpdk_rxq; 386 struct mana_priv *priv = rxq->priv; 387 struct gdma_comp comp; 388 struct rte_mbuf *mbuf; 389 int ret; 390 391 while (pkt_received < pkts_n && 392 gdma_poll_completion_queue(&rxq->gdma_cq, &comp) == 1) { 393 struct mana_rxq_desc *desc; 394 struct mana_rx_comp_oob *oob = 395 (struct mana_rx_comp_oob *)&comp.completion_data[0]; 396 397 if (comp.work_queue_number != rxq->gdma_rq.id) { 398 DRV_LOG(ERR, "rxq comp id mismatch wqid=0x%x rcid=0x%x", 399 comp.work_queue_number, rxq->gdma_rq.id); 400 rxq->stats.errors++; 401 break; 402 } 403 404 desc = &rxq->desc_ring[rxq->desc_ring_tail]; 405 rxq->gdma_rq.tail += desc->wqe_size_in_bu; 406 mbuf = desc->pkt; 407 408 switch (oob->cqe_hdr.cqe_type) { 409 case CQE_RX_OKAY: 410 /* Proceed to process mbuf */ 411 break; 412 413 case CQE_RX_TRUNCATED: 414 DRV_LOG(ERR, "Drop a truncated packet"); 415 rxq->stats.errors++; 416 rte_pktmbuf_free(mbuf); 417 goto drop; 418 419 case CQE_RX_COALESCED_4: 420 DRV_LOG(ERR, "RX coalescing is not supported"); 421 continue; 422 423 default: 424 DRV_LOG(ERR, "Unknown RX CQE type %d", 425 oob->cqe_hdr.cqe_type); 426 continue; 427 } 428 429 DRV_LOG(DEBUG, "mana_rx_comp_oob CQE_RX_OKAY rxq %p", rxq); 430 431 mbuf->data_off = RTE_PKTMBUF_HEADROOM; 432 mbuf->nb_segs = 1; 433 mbuf->next = NULL; 434 mbuf->pkt_len = oob->packet_info[0].packet_length; 435 mbuf->data_len = oob->packet_info[0].packet_length; 436 mbuf->port = priv->port_id; 437 438 if (oob->rx_ip_header_checksum_succeeded) 439 mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 440 441 if (oob->rx_ip_header_checksum_failed) 442 mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 443 444 if (oob->rx_outer_ip_header_checksum_failed) 445 mbuf->ol_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; 446 447 if (oob->rx_tcp_checksum_succeeded || 448 oob->rx_udp_checksum_succeeded) 449 mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 450 451 if (oob->rx_tcp_checksum_failed || 452 oob->rx_udp_checksum_failed) 453 mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 454 455 if (oob->rx_hash_type == MANA_HASH_L3 || 456 oob->rx_hash_type == MANA_HASH_L4) { 457 mbuf->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 458 mbuf->hash.rss = oob->packet_info[0].packet_hash; 459 } 460 461 pkts[pkt_received++] = mbuf; 462 rxq->stats.packets++; 463 rxq->stats.bytes += mbuf->data_len; 464 465 drop: 466 rxq->desc_ring_tail++; 467 if (rxq->desc_ring_tail >= rxq->num_desc) 468 rxq->desc_ring_tail = 0; 469 470 /* Post another request */ 471 ret = mana_alloc_and_post_rx_wqe(rxq); 472 if (ret) { 473 DRV_LOG(ERR, "failed to post rx wqe ret=%d", ret); 474 break; 475 } 476 477 wqe_posted++; 478 } 479 480 if (wqe_posted) 481 mana_rq_ring_doorbell(rxq, wqe_posted); 482 483 return pkt_received; 484 } 485 486 static int 487 mana_arm_cq(struct mana_rxq *rxq, uint8_t arm) 488 { 489 struct mana_priv *priv = rxq->priv; 490 uint32_t head = rxq->gdma_cq.head % 491 (rxq->gdma_cq.count << COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE); 492 493 DRV_LOG(ERR, "Ringing completion queue ID %u head %u arm %d", 494 rxq->gdma_cq.id, head, arm); 495 496 return mana_ring_doorbell(priv->db_page, GDMA_QUEUE_COMPLETION, 497 rxq->gdma_cq.id, head, arm); 498 } 499 500 int 501 mana_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 502 { 503 struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id]; 504 505 return mana_arm_cq(rxq, 1); 506 } 507 508 int 509 mana_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) 510 { 511 struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id]; 512 struct ibv_cq *ev_cq; 513 void *ev_ctx; 514 int ret; 515 516 ret = ibv_get_cq_event(rxq->channel, &ev_cq, &ev_ctx); 517 if (ret) 518 ret = errno; 519 else if (ev_cq != rxq->cq) 520 ret = EINVAL; 521 522 if (ret) { 523 if (ret != EAGAIN) 524 DRV_LOG(ERR, "Can't disable RX intr queue %d", 525 rx_queue_id); 526 } else { 527 ibv_ack_cq_events(rxq->cq, 1); 528 } 529 530 return -ret; 531 } 532