xref: /dpdk/drivers/net/mana/rx.c (revision afd5d170727e94d8c3dda5e7fd21c14f94d107bf)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2022 Microsoft Corporation
3  */
4 #include <ethdev_driver.h>
5 
6 #include <infiniband/verbs.h>
7 #include <infiniband/manadv.h>
8 
9 #include "mana.h"
10 
11 static uint8_t mana_rss_hash_key_default[TOEPLITZ_HASH_KEY_SIZE_IN_BYTES] = {
12 	0x2c, 0xc6, 0x81, 0xd1,
13 	0x5b, 0xdb, 0xf4, 0xf7,
14 	0xfc, 0xa2, 0x83, 0x19,
15 	0xdb, 0x1a, 0x3e, 0x94,
16 	0x6b, 0x9e, 0x38, 0xd9,
17 	0x2c, 0x9c, 0x03, 0xd1,
18 	0xad, 0x99, 0x44, 0xa7,
19 	0xd9, 0x56, 0x3d, 0x59,
20 	0x06, 0x3c, 0x25, 0xf3,
21 	0xfc, 0x1f, 0xdc, 0x2a,
22 };
23 
24 int
25 mana_rq_ring_doorbell(struct mana_rxq *rxq, uint8_t arm)
26 {
27 	struct mana_priv *priv = rxq->priv;
28 	int ret;
29 	void *db_page = priv->db_page;
30 
31 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
32 		struct rte_eth_dev *dev =
33 			&rte_eth_devices[priv->dev_data->port_id];
34 		struct mana_process_priv *process_priv = dev->process_private;
35 
36 		db_page = process_priv->db_page;
37 	}
38 
39 	ret = mana_ring_doorbell(db_page, GDMA_QUEUE_RECEIVE,
40 			 rxq->gdma_rq.id,
41 			 rxq->gdma_rq.head * GDMA_WQE_ALIGNMENT_UNIT_SIZE,
42 			 arm);
43 
44 	if (ret)
45 		DRV_LOG(ERR, "failed to ring RX doorbell ret %d", ret);
46 
47 	return ret;
48 }
49 
50 static int
51 mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq)
52 {
53 	struct rte_mbuf *mbuf = NULL;
54 	struct gdma_sgl_element sgl[1];
55 	struct gdma_work_request request = {0};
56 	struct gdma_posted_wqe_info wqe_info = {0};
57 	struct mana_priv *priv = rxq->priv;
58 	int ret;
59 	struct mana_mr_cache *mr;
60 
61 	mbuf = rte_pktmbuf_alloc(rxq->mp);
62 	if (!mbuf) {
63 		rxq->stats.nombuf++;
64 		return -ENOMEM;
65 	}
66 
67 	mr = mana_find_pmd_mr(&rxq->mr_btree, priv, mbuf);
68 	if (!mr) {
69 		DRV_LOG(ERR, "failed to register RX MR");
70 		rte_pktmbuf_free(mbuf);
71 		return -ENOMEM;
72 	}
73 
74 	request.gdma_header.struct_size = sizeof(request);
75 	wqe_info.gdma_header.struct_size = sizeof(wqe_info);
76 
77 	sgl[0].address = rte_cpu_to_le_64(rte_pktmbuf_mtod(mbuf, uint64_t));
78 	sgl[0].memory_key = mr->lkey;
79 	sgl[0].size =
80 		rte_pktmbuf_data_room_size(rxq->mp) -
81 		RTE_PKTMBUF_HEADROOM;
82 
83 	request.sgl = sgl;
84 	request.num_sgl_elements = 1;
85 	request.inline_oob_data = NULL;
86 	request.inline_oob_size_in_bytes = 0;
87 	request.flags = 0;
88 	request.client_data_unit = NOT_USING_CLIENT_DATA_UNIT;
89 
90 	ret = gdma_post_work_request(&rxq->gdma_rq, &request, &wqe_info);
91 	if (!ret) {
92 		struct mana_rxq_desc *desc =
93 			&rxq->desc_ring[rxq->desc_ring_head];
94 
95 		/* update queue for tracking pending packets */
96 		desc->pkt = mbuf;
97 		desc->wqe_size_in_bu = wqe_info.wqe_size_in_bu;
98 		rxq->desc_ring_head = (rxq->desc_ring_head + 1) % rxq->num_desc;
99 	} else {
100 		DRV_LOG(ERR, "failed to post recv ret %d", ret);
101 		return ret;
102 	}
103 
104 	return 0;
105 }
106 
107 /*
108  * Post work requests for a Rx queue.
109  */
110 static int
111 mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq)
112 {
113 	int ret;
114 	uint32_t i;
115 
116 	for (i = 0; i < rxq->num_desc; i++) {
117 		ret = mana_alloc_and_post_rx_wqe(rxq);
118 		if (ret) {
119 			DRV_LOG(ERR, "failed to post RX ret = %d", ret);
120 			return ret;
121 		}
122 	}
123 
124 	mana_rq_ring_doorbell(rxq, rxq->num_desc);
125 
126 	return ret;
127 }
128 
129 int
130 mana_stop_rx_queues(struct rte_eth_dev *dev)
131 {
132 	struct mana_priv *priv = dev->data->dev_private;
133 	int ret, i;
134 
135 	if (priv->rwq_qp) {
136 		ret = ibv_destroy_qp(priv->rwq_qp);
137 		if (ret)
138 			DRV_LOG(ERR, "rx_queue destroy_qp failed %d", ret);
139 		priv->rwq_qp = NULL;
140 	}
141 
142 	if (priv->ind_table) {
143 		ret = ibv_destroy_rwq_ind_table(priv->ind_table);
144 		if (ret)
145 			DRV_LOG(ERR, "destroy rwq ind table failed %d", ret);
146 		priv->ind_table = NULL;
147 	}
148 
149 	for (i = 0; i < priv->num_queues; i++) {
150 		struct mana_rxq *rxq = dev->data->rx_queues[i];
151 
152 		if (rxq->wq) {
153 			ret = ibv_destroy_wq(rxq->wq);
154 			if (ret)
155 				DRV_LOG(ERR,
156 					"rx_queue destroy_wq failed %d", ret);
157 			rxq->wq = NULL;
158 		}
159 
160 		if (rxq->cq) {
161 			ret = ibv_destroy_cq(rxq->cq);
162 			if (ret)
163 				DRV_LOG(ERR,
164 					"rx_queue destroy_cq failed %d", ret);
165 			rxq->cq = NULL;
166 
167 			if (rxq->channel) {
168 				ret = ibv_destroy_comp_channel(rxq->channel);
169 				if (ret)
170 					DRV_LOG(ERR, "failed destroy comp %d",
171 						ret);
172 				rxq->channel = NULL;
173 			}
174 		}
175 
176 		/* Drain and free posted WQEs */
177 		while (rxq->desc_ring_tail != rxq->desc_ring_head) {
178 			struct mana_rxq_desc *desc =
179 				&rxq->desc_ring[rxq->desc_ring_tail];
180 
181 			rte_pktmbuf_free(desc->pkt);
182 
183 			rxq->desc_ring_tail =
184 				(rxq->desc_ring_tail + 1) % rxq->num_desc;
185 		}
186 		rxq->desc_ring_head = 0;
187 		rxq->desc_ring_tail = 0;
188 
189 		memset(&rxq->gdma_rq, 0, sizeof(rxq->gdma_rq));
190 		memset(&rxq->gdma_cq, 0, sizeof(rxq->gdma_cq));
191 	}
192 	return 0;
193 }
194 
195 int
196 mana_start_rx_queues(struct rte_eth_dev *dev)
197 {
198 	struct mana_priv *priv = dev->data->dev_private;
199 	int ret, i;
200 	struct ibv_wq *ind_tbl[priv->num_queues];
201 
202 	DRV_LOG(INFO, "start rx queues");
203 	for (i = 0; i < priv->num_queues; i++) {
204 		struct mana_rxq *rxq = dev->data->rx_queues[i];
205 		struct ibv_wq_init_attr wq_attr = {};
206 
207 		manadv_set_context_attr(priv->ib_ctx,
208 			MANADV_CTX_ATTR_BUF_ALLOCATORS,
209 			(void *)((uintptr_t)&(struct manadv_ctx_allocators){
210 				.alloc = &mana_alloc_verbs_buf,
211 				.free = &mana_free_verbs_buf,
212 				.data = (void *)(uintptr_t)rxq->socket,
213 			}));
214 
215 		if (dev->data->dev_conf.intr_conf.rxq) {
216 			rxq->channel = ibv_create_comp_channel(priv->ib_ctx);
217 			if (!rxq->channel) {
218 				ret = -errno;
219 				DRV_LOG(ERR, "Queue %d comp channel failed", i);
220 				goto fail;
221 			}
222 
223 			ret = mana_fd_set_non_blocking(rxq->channel->fd);
224 			if (ret) {
225 				DRV_LOG(ERR, "Failed to set comp non-blocking");
226 				goto fail;
227 			}
228 		}
229 
230 		rxq->cq = ibv_create_cq(priv->ib_ctx, rxq->num_desc,
231 					NULL, rxq->channel,
232 					rxq->channel ? i : 0);
233 		if (!rxq->cq) {
234 			ret = -errno;
235 			DRV_LOG(ERR, "failed to create rx cq queue %d", i);
236 			goto fail;
237 		}
238 
239 		wq_attr.wq_type = IBV_WQT_RQ;
240 		wq_attr.max_wr = rxq->num_desc;
241 		wq_attr.max_sge = 1;
242 		wq_attr.pd = priv->ib_parent_pd;
243 		wq_attr.cq = rxq->cq;
244 
245 		rxq->wq = ibv_create_wq(priv->ib_ctx, &wq_attr);
246 		if (!rxq->wq) {
247 			ret = -errno;
248 			DRV_LOG(ERR, "failed to create rx wq %d", i);
249 			goto fail;
250 		}
251 
252 		ind_tbl[i] = rxq->wq;
253 	}
254 
255 	struct ibv_rwq_ind_table_init_attr ind_table_attr = {
256 		.log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)),
257 		.ind_tbl = ind_tbl,
258 		.comp_mask = 0,
259 	};
260 
261 	priv->ind_table = ibv_create_rwq_ind_table(priv->ib_ctx,
262 						   &ind_table_attr);
263 	if (!priv->ind_table) {
264 		ret = -errno;
265 		DRV_LOG(ERR, "failed to create ind_table ret %d", ret);
266 		goto fail;
267 	}
268 
269 	DRV_LOG(INFO, "ind_table handle %d num %d",
270 		priv->ind_table->ind_tbl_handle,
271 		priv->ind_table->ind_tbl_num);
272 
273 	struct ibv_qp_init_attr_ex qp_attr_ex = {
274 		.comp_mask = IBV_QP_INIT_ATTR_PD |
275 			     IBV_QP_INIT_ATTR_RX_HASH |
276 			     IBV_QP_INIT_ATTR_IND_TABLE,
277 		.qp_type = IBV_QPT_RAW_PACKET,
278 		.pd = priv->ib_parent_pd,
279 		.rwq_ind_tbl = priv->ind_table,
280 		.rx_hash_conf = {
281 			.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
282 			.rx_hash_key_len = TOEPLITZ_HASH_KEY_SIZE_IN_BYTES,
283 			.rx_hash_key = mana_rss_hash_key_default,
284 			.rx_hash_fields_mask =
285 				IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
286 		},
287 
288 	};
289 
290 	/* overwrite default if rss key is set */
291 	if (priv->rss_conf.rss_key_len && priv->rss_conf.rss_key)
292 		qp_attr_ex.rx_hash_conf.rx_hash_key =
293 			priv->rss_conf.rss_key;
294 
295 	/* overwrite default if rss hash fields are set */
296 	if (priv->rss_conf.rss_hf) {
297 		qp_attr_ex.rx_hash_conf.rx_hash_fields_mask = 0;
298 
299 		if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV4)
300 			qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |=
301 				IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4;
302 
303 		if (priv->rss_conf.rss_hf & RTE_ETH_RSS_IPV6)
304 			qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |=
305 				IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_SRC_IPV6;
306 
307 		if (priv->rss_conf.rss_hf &
308 		    (RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV6_TCP))
309 			qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |=
310 				IBV_RX_HASH_SRC_PORT_TCP |
311 				IBV_RX_HASH_DST_PORT_TCP;
312 
313 		if (priv->rss_conf.rss_hf &
314 		    (RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV6_UDP))
315 			qp_attr_ex.rx_hash_conf.rx_hash_fields_mask |=
316 				IBV_RX_HASH_SRC_PORT_UDP |
317 				IBV_RX_HASH_DST_PORT_UDP;
318 	}
319 
320 	priv->rwq_qp = ibv_create_qp_ex(priv->ib_ctx, &qp_attr_ex);
321 	if (!priv->rwq_qp) {
322 		ret = -errno;
323 		DRV_LOG(ERR, "rx ibv_create_qp_ex failed");
324 		goto fail;
325 	}
326 
327 	for (i = 0; i < priv->num_queues; i++) {
328 		struct mana_rxq *rxq = dev->data->rx_queues[i];
329 		struct manadv_obj obj = {};
330 		struct manadv_cq dv_cq;
331 		struct manadv_rwq dv_wq;
332 
333 		obj.cq.in = rxq->cq;
334 		obj.cq.out = &dv_cq;
335 		obj.rwq.in = rxq->wq;
336 		obj.rwq.out = &dv_wq;
337 		ret = manadv_init_obj(&obj, MANADV_OBJ_CQ | MANADV_OBJ_RWQ);
338 		if (ret) {
339 			DRV_LOG(ERR, "manadv_init_obj failed ret %d", ret);
340 			goto fail;
341 		}
342 
343 		rxq->gdma_cq.buffer = obj.cq.out->buf;
344 		rxq->gdma_cq.count = obj.cq.out->count;
345 		rxq->gdma_cq.size = rxq->gdma_cq.count * COMP_ENTRY_SIZE;
346 		rxq->gdma_cq.id = obj.cq.out->cq_id;
347 
348 		/* CQ head starts with count */
349 		rxq->gdma_cq.head = rxq->gdma_cq.count;
350 
351 		DRV_LOG(INFO, "rxq cq id %u buf %p count %u size %u",
352 			rxq->gdma_cq.id, rxq->gdma_cq.buffer,
353 			rxq->gdma_cq.count, rxq->gdma_cq.size);
354 
355 		priv->db_page = obj.rwq.out->db_page;
356 
357 		rxq->gdma_rq.buffer = obj.rwq.out->buf;
358 		rxq->gdma_rq.count = obj.rwq.out->count;
359 		rxq->gdma_rq.size = obj.rwq.out->size;
360 		rxq->gdma_rq.id = obj.rwq.out->wq_id;
361 
362 		DRV_LOG(INFO, "rxq rq id %u buf %p count %u size %u",
363 			rxq->gdma_rq.id, rxq->gdma_rq.buffer,
364 			rxq->gdma_rq.count, rxq->gdma_rq.size);
365 	}
366 
367 	for (i = 0; i < priv->num_queues; i++) {
368 		ret = mana_alloc_and_post_rx_wqes(dev->data->rx_queues[i]);
369 		if (ret)
370 			goto fail;
371 	}
372 
373 	return 0;
374 
375 fail:
376 	mana_stop_rx_queues(dev);
377 	return ret;
378 }
379 
380 uint16_t
381 mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
382 {
383 	uint16_t pkt_received = 0;
384 	uint8_t wqe_posted = 0;
385 	struct mana_rxq *rxq = dpdk_rxq;
386 	struct mana_priv *priv = rxq->priv;
387 	struct gdma_comp comp;
388 	struct rte_mbuf *mbuf;
389 	int ret;
390 
391 	while (pkt_received < pkts_n &&
392 	       gdma_poll_completion_queue(&rxq->gdma_cq, &comp) == 1) {
393 		struct mana_rxq_desc *desc;
394 		struct mana_rx_comp_oob *oob =
395 			(struct mana_rx_comp_oob *)&comp.completion_data[0];
396 
397 		if (comp.work_queue_number != rxq->gdma_rq.id) {
398 			DRV_LOG(ERR, "rxq comp id mismatch wqid=0x%x rcid=0x%x",
399 				comp.work_queue_number, rxq->gdma_rq.id);
400 			rxq->stats.errors++;
401 			break;
402 		}
403 
404 		desc = &rxq->desc_ring[rxq->desc_ring_tail];
405 		rxq->gdma_rq.tail += desc->wqe_size_in_bu;
406 		mbuf = desc->pkt;
407 
408 		switch (oob->cqe_hdr.cqe_type) {
409 		case CQE_RX_OKAY:
410 			/* Proceed to process mbuf */
411 			break;
412 
413 		case CQE_RX_TRUNCATED:
414 			DRV_LOG(ERR, "Drop a truncated packet");
415 			rxq->stats.errors++;
416 			rte_pktmbuf_free(mbuf);
417 			goto drop;
418 
419 		case CQE_RX_COALESCED_4:
420 			DRV_LOG(ERR, "RX coalescing is not supported");
421 			continue;
422 
423 		default:
424 			DRV_LOG(ERR, "Unknown RX CQE type %d",
425 				oob->cqe_hdr.cqe_type);
426 			continue;
427 		}
428 
429 		DRV_LOG(DEBUG, "mana_rx_comp_oob CQE_RX_OKAY rxq %p", rxq);
430 
431 		mbuf->data_off = RTE_PKTMBUF_HEADROOM;
432 		mbuf->nb_segs = 1;
433 		mbuf->next = NULL;
434 		mbuf->pkt_len = oob->packet_info[0].packet_length;
435 		mbuf->data_len = oob->packet_info[0].packet_length;
436 		mbuf->port = priv->port_id;
437 
438 		if (oob->rx_ip_header_checksum_succeeded)
439 			mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
440 
441 		if (oob->rx_ip_header_checksum_failed)
442 			mbuf->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
443 
444 		if (oob->rx_outer_ip_header_checksum_failed)
445 			mbuf->ol_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
446 
447 		if (oob->rx_tcp_checksum_succeeded ||
448 		    oob->rx_udp_checksum_succeeded)
449 			mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
450 
451 		if (oob->rx_tcp_checksum_failed ||
452 		    oob->rx_udp_checksum_failed)
453 			mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
454 
455 		if (oob->rx_hash_type == MANA_HASH_L3 ||
456 		    oob->rx_hash_type == MANA_HASH_L4) {
457 			mbuf->ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
458 			mbuf->hash.rss = oob->packet_info[0].packet_hash;
459 		}
460 
461 		pkts[pkt_received++] = mbuf;
462 		rxq->stats.packets++;
463 		rxq->stats.bytes += mbuf->data_len;
464 
465 drop:
466 		rxq->desc_ring_tail++;
467 		if (rxq->desc_ring_tail >= rxq->num_desc)
468 			rxq->desc_ring_tail = 0;
469 
470 		/* Post another request */
471 		ret = mana_alloc_and_post_rx_wqe(rxq);
472 		if (ret) {
473 			DRV_LOG(ERR, "failed to post rx wqe ret=%d", ret);
474 			break;
475 		}
476 
477 		wqe_posted++;
478 	}
479 
480 	if (wqe_posted)
481 		mana_rq_ring_doorbell(rxq, wqe_posted);
482 
483 	return pkt_received;
484 }
485 
486 static int
487 mana_arm_cq(struct mana_rxq *rxq, uint8_t arm)
488 {
489 	struct mana_priv *priv = rxq->priv;
490 	uint32_t head = rxq->gdma_cq.head %
491 		(rxq->gdma_cq.count << COMPLETION_QUEUE_ENTRY_OWNER_BITS_SIZE);
492 
493 	DRV_LOG(ERR, "Ringing completion queue ID %u head %u arm %d",
494 		rxq->gdma_cq.id, head, arm);
495 
496 	return mana_ring_doorbell(priv->db_page, GDMA_QUEUE_COMPLETION,
497 				  rxq->gdma_cq.id, head, arm);
498 }
499 
500 int
501 mana_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
502 {
503 	struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id];
504 
505 	return mana_arm_cq(rxq, 1);
506 }
507 
508 int
509 mana_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
510 {
511 	struct mana_rxq *rxq = dev->data->rx_queues[rx_queue_id];
512 	struct ibv_cq *ev_cq;
513 	void *ev_ctx;
514 	int ret;
515 
516 	ret = ibv_get_cq_event(rxq->channel, &ev_cq, &ev_ctx);
517 	if (ret)
518 		ret = errno;
519 	else if (ev_cq != rxq->cq)
520 		ret = EINVAL;
521 
522 	if (ret) {
523 		if (ret != EAGAIN)
524 			DRV_LOG(ERR, "Can't disable RX intr queue %d",
525 				rx_queue_id);
526 	} else {
527 		ibv_ack_cq_events(rxq->cq, 1);
528 	}
529 
530 	return -ret;
531 }
532