xref: /dpdk/drivers/net/mana/rx.c (revision 8a56ff3d246899b77b81c6450680214a8697749b)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2022 Microsoft Corporation
3  */
4 #include <ethdev_driver.h>
5 
6 #include <infiniband/verbs.h>
7 #include <infiniband/manadv.h>
8 
9 #include "mana.h"
10 
11 static uint8_t mana_rss_hash_key_default[TOEPLITZ_HASH_KEY_SIZE_IN_BYTES] = {
12 	0x2c, 0xc6, 0x81, 0xd1,
13 	0x5b, 0xdb, 0xf4, 0xf7,
14 	0xfc, 0xa2, 0x83, 0x19,
15 	0xdb, 0x1a, 0x3e, 0x94,
16 	0x6b, 0x9e, 0x38, 0xd9,
17 	0x2c, 0x9c, 0x03, 0xd1,
18 	0xad, 0x99, 0x44, 0xa7,
19 	0xd9, 0x56, 0x3d, 0x59,
20 	0x06, 0x3c, 0x25, 0xf3,
21 	0xfc, 0x1f, 0xdc, 0x2a,
22 };
23 
24 int
25 mana_rq_ring_doorbell(struct mana_rxq *rxq, uint8_t arm)
26 {
27 	struct mana_priv *priv = rxq->priv;
28 	int ret;
29 	void *db_page = priv->db_page;
30 
31 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
32 		struct rte_eth_dev *dev =
33 			&rte_eth_devices[priv->dev_data->port_id];
34 		struct mana_process_priv *process_priv = dev->process_private;
35 
36 		db_page = process_priv->db_page;
37 	}
38 
39 	ret = mana_ring_doorbell(db_page, GDMA_QUEUE_RECEIVE,
40 			 rxq->gdma_rq.id,
41 			 rxq->gdma_rq.head * GDMA_WQE_ALIGNMENT_UNIT_SIZE,
42 			 arm);
43 
44 	if (ret)
45 		DP_LOG(ERR, "failed to ring RX doorbell ret %d", ret);
46 
47 	return ret;
48 }
49 
50 static int
51 mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq)
52 {
53 	struct rte_mbuf *mbuf = NULL;
54 	struct gdma_sgl_element sgl[1];
55 	struct gdma_work_request request;
56 	uint32_t wqe_size_in_bu;
57 	struct mana_priv *priv = rxq->priv;
58 	int ret;
59 	struct mana_mr_cache *mr;
60 
61 	mbuf = rte_pktmbuf_alloc(rxq->mp);
62 	if (!mbuf) {
63 		rxq->stats.nombuf++;
64 		return -ENOMEM;
65 	}
66 
67 	mr = mana_find_pmd_mr(&rxq->mr_btree, priv, mbuf);
68 	if (!mr) {
69 		DP_LOG(ERR, "failed to register RX MR");
70 		rte_pktmbuf_free(mbuf);
71 		return -ENOMEM;
72 	}
73 
74 	request.gdma_header.struct_size = sizeof(request);
75 
76 	sgl[0].address = rte_cpu_to_le_64(rte_pktmbuf_mtod(mbuf, uint64_t));
77 	sgl[0].memory_key = mr->lkey;
78 	sgl[0].size =
79 		rte_pktmbuf_data_room_size(rxq->mp) -
80 		RTE_PKTMBUF_HEADROOM;
81 
82 	request.sgl = sgl;
83 	request.num_sgl_elements = 1;
84 	request.inline_oob_data = NULL;
85 	request.inline_oob_size_in_bytes = 0;
86 	request.flags = 0;
87 	request.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
88 
89 	ret = gdma_post_work_request(&rxq->gdma_rq, &request, &wqe_size_in_bu);
90 	if (!ret) {
91 		struct mana_rxq_desc *desc =
92 			&rxq->desc_ring[rxq->desc_ring_head];
93 
94 		/* update queue for tracking pending packets */
95 		desc->pkt = mbuf;
96 		desc->wqe_size_in_bu = wqe_size_in_bu;
97 		rxq->desc_ring_head = (rxq->desc_ring_head + 1) % rxq->num_desc;
98 	} else {
99 		DP_LOG(DEBUG, "failed to post recv ret %d", ret);
100 		return ret;
101 	}
102 
103 	return 0;
104 }
105 
106 /*
107  * Post work requests for a Rx queue.
108  */
109 static int
110 mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq)
111 {
112 	int ret;
113 	uint32_t i;
114 
115 	for (i = 0; i < rxq->num_desc; i++) {
116 		ret = mana_alloc_and_post_rx_wqe(rxq);
117 		if (ret) {
118 			DP_LOG(ERR, "failed to post RX ret = %d", ret);
119 			return ret;
120 		}
121 	}
122 
123 	mana_rq_ring_doorbell(rxq, rxq->num_desc);
124 
125 	return ret;
126 }
127 
128 int
129 mana_stop_rx_queues(struct rte_eth_dev *dev)
130 {
131 	struct mana_priv *priv = dev->data->dev_private;
132 	int ret, i;
133 
134 	if (priv->rwq_qp) {
135 		ret = ibv_destroy_qp(priv->rwq_qp);
136 		if (ret)
137 			DRV_LOG(ERR, "rx_queue destroy_qp failed %d", ret);
138 		priv->rwq_qp = NULL;
139 	}
140 
141 	if (priv->ind_table) {
142 		ret = ibv_destroy_rwq_ind_table(priv->ind_table);
143 		if (ret)
144 			DRV_LOG(ERR, "destroy rwq ind table failed %d", ret);
145 		priv->ind_table = NULL;
146 	}
147 
148 	for (i = 0; i < priv->num_queues; i++) {
149 		struct mana_rxq *rxq = dev->data->rx_queues[i];
150 
151 		if (rxq->wq) {
152 			ret = ibv_destroy_wq(rxq->wq);
153 			if (ret)
154 				DRV_LOG(ERR,
155 					"rx_queue destroy_wq failed %d", ret);
156 			rxq->wq = NULL;
157 		}
158 
159 		if (rxq->cq) {
160 			ret = ibv_destroy_cq(rxq->cq);
161 			if (ret)
162 				DRV_LOG(ERR,
163 					"rx_queue destroy_cq failed %d", ret);
164 			rxq->cq = NULL;
165 
166 			if (rxq->channel) {
167 				ret = ibv_destroy_comp_channel(rxq->channel);
168 				if (ret)
169 					DRV_LOG(ERR, "failed destroy comp %d",
170 						ret);
171 				rxq->channel = NULL;
172 			}
173 		}
174 
175 		/* Drain and free posted WQEs */
176 		while (rxq->desc_ring_tail != rxq->desc_ring_head) {
177 			struct mana_rxq_desc *desc =
178 				&rxq->desc_ring[rxq->desc_ring_tail];
179 
180 			rte_pktmbuf_free(desc->pkt);
181 
182 			rxq->desc_ring_tail =
183 				(rxq->desc_ring_tail + 1) % rxq->num_desc;
184 		}
185 		rxq->desc_ring_head = 0;
186 		rxq->desc_ring_tail = 0;
187 
188 		memset(&rxq->gdma_rq, 0, sizeof(rxq->gdma_rq));
189 		memset(&rxq->gdma_cq, 0, sizeof(rxq->gdma_cq));
190 	}
191 	return 0;
192 }
193 
194 int
195 mana_start_rx_queues(struct rte_eth_dev *dev)
196 {
197 	struct mana_priv *priv = dev->data->dev_private;
198 	int ret, i;
199 	struct ibv_wq *ind_tbl[priv->num_queues];
200 
201 	DRV_LOG(INFO, "start rx queues");
202 	for (i = 0; i < priv->num_queues; i++) {
203 		struct mana_rxq *rxq = dev->data->rx_queues[i];
204 		struct ibv_wq_init_attr wq_attr = {};
205 
206 		manadv_set_context_attr(priv->ib_ctx,
207 			MANADV_CTX_ATTR_BUF_ALLOCATORS,
208 			(void *)((uintptr_t)&(struct manadv_ctx_allocators){
209 				.alloc = &mana_alloc_verbs_buf,
210 				.free = &mana_free_verbs_buf,
211 				.data = (void *)(uintptr_t)rxq->socket,
212 			}));
213 
214 		if (dev->data->dev_conf.intr_conf.rxq) {
215 			rxq->channel = ibv_create_comp_channel(priv->ib_ctx);
216 			if (!rxq->channel) {
217 				ret = -errno;
218 				DRV_LOG(ERR, "Queue %d comp channel failed", i);
219 				goto fail;
220 			}
221 
222 			ret = mana_fd_set_non_blocking(rxq->channel->fd);
223 			if (ret) {
224 				DRV_LOG(ERR, "Failed to set comp non-blocking");
225 				goto fail;
226 			}
227 		}
228 
229 		rxq->cq = ibv_create_cq(priv->ib_ctx, rxq->num_desc,
230 					NULL, rxq->channel,
231 					rxq->channel ? i : 0);
232 		if (!rxq->cq) {
233 			ret = -errno;
234 			DRV_LOG(ERR, "failed to create rx cq queue %d", i);
235 			goto fail;
236 		}
237 
238 		wq_attr.wq_type = IBV_WQT_RQ;
239 		wq_attr.max_wr = rxq->num_desc;
240 		wq_attr.max_sge = 1;
241 		wq_attr.pd = priv->ib_parent_pd;
242 		wq_attr.cq = rxq->cq;
243 
244 		rxq->wq = ibv_create_wq(priv->ib_ctx, &wq_attr);
245 		if (!rxq->wq) {
246 			ret = -errno;
247 			DRV_LOG(ERR, "failed to create rx wq %d", i);
248 			goto fail;
249 		}
250 
251 		ind_tbl[i] = rxq->wq;
252 	}
253 
254 	struct ibv_rwq_ind_table_init_attr ind_table_attr = {
255 		.log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)),
256 		.ind_tbl = ind_tbl,
257 		.comp_mask = 0,
258 	};
259 
260 	priv->ind_table = ibv_create_rwq_ind_table(priv->ib_ctx,
261 						   &ind_table_attr);
262 	if (!priv->ind_table) {
263 		ret = -errno;
264 		DRV_LOG(ERR, "failed to create ind_table ret %d", ret);
265 		goto fail;
266 	}
267 
268 	DRV_LOG(INFO, "ind_table handle %d num %d",
269 		priv->ind_table->ind_tbl_handle,
270 		priv->ind_table->ind_tbl_num);
271 
272 	struct ibv_qp_init_attr_ex qp_attr_ex = {
273 		.comp_mask = IBV_QP_INIT_ATTR_PD |
274 			     IBV_QP_INIT_ATTR_RX_HASH |
275 			     IBV_QP_INIT_ATTR_IND_TABLE,
276 		.qp_type = IBV_QPT_RAW_PACKET,
277 		.pd = priv->ib_parent_pd,
278 		.rwq_ind_tbl = priv->ind_table,
279 		.rx_hash_conf = {
280 			.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
281 			.rx_hash_key_len = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES,
282 			.rx_hash_key = mana_rss_hash_key_default,
283 			.rx_hash_fields_mask =
284 				IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
285 		},
286 
287 	};
288 
289 	/* overwrite default if rss key is set */
290 	if (priv->rss_conf.rss_key_len && priv->rss_conf.rss_key)
291 		qp_attr_ex.rx_hash_conf.rx_hash_key =
292 			priv->rss_conf.rss_key;
293 
294 	/* overwrite default if rss hash fields are set */
295 	if (priv->rss_conf.rss_hf) {
296 		qp_attr_ex.rx_hash_conf.rx_hash_fields_mask = 0;
297 
298 		if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV4)
299 			qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |=
300 				IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4;
301 
302 		if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV6)
303 			qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |=
304 				IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_SRC_IPV6;
305 
306 		if (priv->rss_conf.rss_hf &
307 		    (RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV6_TCP))
308 			qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |=
309 				IBV_RX_HASH_SRC_PORT_TCP |
310 				IBV_RX_HASH_DST_PORT_TCP;
311 
312 		if (priv->rss_conf.rss_hf &
313 		    (RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV6_UDP))
314 			qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |=
315 				IBV_RX_HASH_SRC_PORT_UDP |
316 				IBV_RX_HASH_DST_PORT_UDP;
317 	}
318 
319 	priv->rwq_qp = ibv_create_qp_ex(priv->ib_ctx, &qp_attr_ex);
320 	if (!priv->rwq_qp) {
321 		ret = -errno;
322 		DRV_LOG(ERR, "rx ibv_create_qp_ex failed");
323 		goto fail;
324 	}
325 
326 	for (i = 0; i < priv->num_queues; i++) {
327 		struct mana_rxq *rxq = dev->data->rx_queues[i];
328 		struct manadv_obj obj = {};
329 		struct manadv_cq dv_cq;
330 		struct manadv_rwq dv_wq;
331 
332 		obj.cq.in = rxq->cq;
333 		obj.cq.out = &dv_cq;
334 		obj.rwq.in = rxq->wq;
335 		obj.rwq.out = &dv_wq;
336 		ret = manadv_init_obj(&obj, MANADV_OBJ_CQ | MANADV_OBJ_RWQ);
337 		if (ret) {
338 			DRV_LOG(ERR, "manadv_init_obj failed ret %d", ret);
339 			goto fail;
340 		}
341 
342 		rxq->gdma_cq.buffer = obj.cq.out->buf;
343 		rxq->gdma_cq.count = obj.cq.out->count;
344 		rxq->gdma_cq.size = rxq->gdma_cq.count * COMP_ENTRY_SIZE;
345 		rxq->gdma_cq.id = obj.cq.out->cq_id;
346 
347 		/* CQ head starts with count */
348 		rxq->gdma_cq.head = rxq->gdma_cq.count;
349 
350 		DRV_LOG(INFO, "rxq cq id %u buf %p count %u size %u",
351 			rxq->gdma_cq.id, rxq->gdma_cq.buffer,
352 			rxq->gdma_cq.count, rxq->gdma_cq.size);
353 
354 		priv->db_page = obj.rwq.out->db_page;
355 
356 		rxq->gdma_rq.buffer = obj.rwq.out->buf;
357 		rxq->gdma_rq.count = obj.rwq.out->count;
358 		rxq->gdma_rq.size = obj.rwq.out->size;
359 		rxq->gdma_rq.id = obj.rwq.out->wq_id;
360 
361 		DRV_LOG(INFO, "rxq rq id %u buf %p count %u size %u",
362 			rxq->gdma_rq.id, rxq->gdma_rq.buffer,
363 			rxq->gdma_rq.count, rxq->gdma_rq.size);
364 
365 		rxq->comp_buf_len = 0;
366 		rxq->comp_buf_idx = 0;
367 		rxq->backlog_idx = 0;
368 	}
369 
370 	for (i = 0; i < priv->num_queues; i++) {
371 		ret = mana_alloc_and_post_rx_wqes(dev->data->rx_queues[i]);
372 		if (ret)
373 			goto fail;
374 	}
375 
376 	return 0;
377 
378 fail:
379 	mana_stop_rx_queues(dev);
380 	return ret;
381 }
382 
383 uint16_t
384 mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
385 {
386 	uint16_t pkt_received = 0;
387 	uint16_t wqe_posted = 0;
388 	struct mana_rxq *rxq = dpdk_rxq;
389 	struct mana_priv *priv = rxq->priv;
390 	struct rte_mbuf *mbuf;
391 	int ret;
392 	uint32_t pkt_idx = rxq->backlog_idx;
393 	uint32_t pkt_len;
394 	uint32_t i;
395 	int polled = 0;
396 
397 repoll:
398 	/* Polling on new completions if we have no backlog */
399 	if (rxq->comp_buf_idx == rxq->comp_buf_len) {
400 		RTE_ASSERT(!pkt_idx);
401 		rxq->comp_buf_len =
402 			gdma_poll_completion_queue(&rxq->gdma_cq,
403 						   rxq->gdma_comp_buf, pkts_n);
404 		rxq->comp_buf_idx = 0;
405 		polled = 1;
406 	}
407 
408 	i = rxq->comp_buf_idx;
409 	while (i < rxq->comp_buf_len) {
410 		struct mana_rx_comp_oob *oob = (struct mana_rx_comp_oob *)
411 			rxq->gdma_comp_buf[i].cqe_data;
412 		struct mana_rxq_desc *desc =
413 			&rxq->desc_ring[rxq->desc_ring_tail];
414 
415 		mbuf = desc->pkt;
416 
417 		switch (oob->cqe_hdr.cqe_type) {
418 		case CQE_RX_OKAY:
419 		case CQE_RX_COALESCED_4:
420 			/* Proceed to process mbuf */
421 			break;
422 
423 		case CQE_RX_TRUNCATED:
424 		default:
425 			DP_LOG(ERR, "RX CQE type %d client %d vendor %d",
426 			       oob->cqe_hdr.cqe_type, oob->cqe_hdr.client_type,
427 			       oob->cqe_hdr.vendor_err);
428 
429 			rxq->stats.errors++;
430 			rte_pktmbuf_free(mbuf);
431 
432 			i++;
433 			goto drop;
434 		}
435 
436 		DP_LOG(DEBUG, "mana_rx_comp_oob type %d rxq %p",
437 		       oob->cqe_hdr.cqe_type, rxq);
438 
439 		pkt_len = oob->packet_info[pkt_idx].packet_length;
440 		if (!pkt_len) {
441 			/* Move on to the next completion */
442 			pkt_idx = 0;
443 			i++;
444 			continue;
445 		}
446 
447 		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
448 		mbuf->nb_segs = 1;
449 		mbuf->next = NULL;
450 		mbuf->data_len = pkt_len;
451 		mbuf->pkt_len = pkt_len;
452 		mbuf->port = priv->port_id;
453 
454 		if (oob->rx_ip_header_checksum_succeeded)
455 			mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
456 
457 		if (oob->rx_ip_header_checksum_failed)
458 			mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
459 
460 		if (oob->rx_outer_ip_header_checksum_failed)
461 			mbuf->ol_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
462 
463 		if (oob->rx_tcp_checksum_succeeded ||
464 		    oob->rx_udp_checksum_succeeded)
465 			mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
466 
467 		if (oob->rx_tcp_checksum_failed ||
468 		    oob->rx_udp_checksum_failed)
469 			mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
470 
471 		if (oob->rx_hash_type == MANA_HASH_L3 ||
472 		    oob->rx_hash_type == MANA_HASH_L4) {
473 			mbuf->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
474 			mbuf->hash.rss = oob->packet_info[pkt_idx].packet_hash;
475 		}
476 
477 		pkts[pkt_received++] = mbuf;
478 		rxq->stats.packets++;
479 		rxq->stats.bytes += mbuf->data_len;
480 
481 		pkt_idx++;
482 		/* Move on the next completion if all packets are processed */
483 		if (pkt_idx >= RX_COM_OOB_NUM_PACKETINFO_SEGMENTS) {
484 			pkt_idx = 0;
485 			i++;
486 		}
487 
488 drop:
489 		rxq->desc_ring_tail++;
490 		if (rxq->desc_ring_tail >= rxq->num_desc)
491 			rxq->desc_ring_tail = 0;
492 
493 		rxq->gdma_rq.tail += desc->wqe_size_in_bu;
494 
495 		/* Consume this request and post another request */
496 		ret = mana_alloc_and_post_rx_wqe(rxq);
497 		if (ret) {
498 			DP_LOG(ERR, "failed to post rx wqe ret=%d", ret);
499 			break;
500 		}
501 
502 		wqe_posted++;
503 		if (pkt_received == pkts_n)
504 			break;
505 	}
506 
507 	rxq->backlog_idx = pkt_idx;
508 	rxq->comp_buf_idx = i;
509 
510 	/* If all CQEs are processed but there are more packets to read, poll the
511 	 * completion queue again because we may have not polled on the completion
512 	 * queue due to CQE not fully processed in the previous rx_burst
513 	 */
514 	if (pkt_received < pkts_n && !polled) {
515 		polled = 1;
516 		goto repoll;
517 	}
518 
519 	if (wqe_posted)
520 		mana_rq_ring_doorbell(rxq, wqe_posted);
521 
522 	return pkt_received;
523 }
524 
525 static int
526 mana_arm_cq(struct mana_rxq *rxq, uint8_t arm)
527 {
528 	struct mana_priv *priv = rxq->priv;
529 	uint32_t head = rxq->gdma_cq.head %
530 		(rxq->gdma_cq.count << COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE);
531 
532 	DP_LOG(DEBUG, "Ringing completion queue ID %u head %u arm %d",
533 	       rxq->gdma_cq.id, head, arm);
534 
535 	return mana_ring_doorbell(priv->db_page, GDMA_QUEUE_COMPLETION,
536 				  rxq->gdma_cq.id, head, arm);
537 }
538 
539 int
540 mana_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
541 {
542 	struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id];
543 
544 	return mana_arm_cq(rxq, 1);
545 }
546 
547 int
548 mana_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
549 {
550 	struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id];
551 	struct ibv_cq *ev_cq;
552 	void *ev_ctx;
553 	int ret;
554 
555 	ret = ibv_get_cq_event(rxq->channel, &ev_cq, &ev_ctx);
556 	if (ret)
557 		ret = errno;
558 	else if (ev_cq != rxq->cq)
559 		ret = EINVAL;
560 
561 	if (ret) {
562 		if (ret != EAGAIN)
563 			DP_LOG(ERR, "Can't disable RX intr queue %d",
564 			       rx_queue_id);
565 	} else {
566 		ibv_ack_cq_events(rxq->cq, 1);
567 	}
568 
569 	return -ret;
570 }
571