xref: /dpdk/drivers/net/hinic/hinic_pmd_rx.c (revision 089e5ed727a15da2729cfee9b63533dd120bd04c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4 
5 #include <rte_ether.h>
6 #include <rte_mbuf.h>
7 #ifdef __ARM64_NEON__
8 #include <arm_neon.h>
9 #endif
10 
11 #include "base/hinic_compat.h"
12 #include "base/hinic_pmd_hwdev.h"
13 #include "base/hinic_pmd_wq.h"
14 #include "base/hinic_pmd_niccfg.h"
15 #include "base/hinic_pmd_nicio.h"
16 #include "hinic_pmd_ethdev.h"
17 #include "hinic_pmd_rx.h"
18 
19 /* rxq wq operations */
20 #define HINIC_GET_RQ_WQE_MASK(rxq)	\
21 	((rxq)->wq->mask)
22 
23 #define HINIC_GET_RQ_LOCAL_CI(rxq)	\
24 	(((rxq)->wq->cons_idx) & HINIC_GET_RQ_WQE_MASK(rxq))
25 
26 #define HINIC_GET_RQ_LOCAL_PI(rxq)	\
27 	(((rxq)->wq->prod_idx) & HINIC_GET_RQ_WQE_MASK(rxq))
28 
29 #define HINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt)	\
30 	do {						\
31 		(rxq)->wq->cons_idx += (wqebb_cnt);	\
32 		(rxq)->wq->delta += (wqebb_cnt);	\
33 	} while (0)
34 
35 #define HINIC_UPDATE_RQ_HW_PI(rxq, pi)	\
36 	(*((rxq)->pi_virt_addr) =	\
37 		cpu_to_be16((pi) & HINIC_GET_RQ_WQE_MASK(rxq)))
38 
39 #define HINIC_GET_RQ_FREE_WQEBBS(rxq)	((rxq)->wq->delta - 1)
40 
41 /* rxq cqe done and status bit */
42 #define HINIC_GET_RX_DONE_BE(status)	\
43 	((status) & 0x80U)
44 
45 #define HINIC_RX_CSUM_OFFLOAD_EN	0xFFF
46 
47 #define RQ_CQE_SGE_VLAN_SHIFT			0
48 #define RQ_CQE_SGE_LEN_SHIFT			16
49 
50 #define RQ_CQE_SGE_VLAN_MASK			0xFFFFU
51 #define RQ_CQE_SGE_LEN_MASK			0xFFFFU
52 
53 #define RQ_CQE_SGE_GET(val, member)		\
54 	(((val) >> RQ_CQE_SGE_##member##_SHIFT) & RQ_CQE_SGE_##member##_MASK)
55 
56 #define HINIC_GET_RX_VLAN_TAG(vlan_len)	\
57 		RQ_CQE_SGE_GET(vlan_len, VLAN)
58 
59 #define HINIC_GET_RX_PKT_LEN(vlan_len)	\
60 		RQ_CQE_SGE_GET(vlan_len, LEN)
61 
62 #define RQ_CQE_STATUS_CSUM_ERR_SHIFT		0
63 #define RQ_CQE_STATUS_NUM_LRO_SHIFT		16
64 #define RQ_CQE_STATUS_LRO_PUSH_SHIFT		25
65 #define RQ_CQE_STATUS_LRO_ENTER_SHIFT		26
66 #define RQ_CQE_STATUS_LRO_INTR_SHIFT		27
67 
68 #define RQ_CQE_STATUS_BP_EN_SHIFT		30
69 #define RQ_CQE_STATUS_RXDONE_SHIFT		31
70 #define RQ_CQE_STATUS_FLUSH_SHIFT		28
71 
72 #define RQ_CQE_STATUS_CSUM_ERR_MASK		0xFFFFU
73 #define RQ_CQE_STATUS_NUM_LRO_MASK		0xFFU
74 #define RQ_CQE_STATUS_LRO_PUSH_MASK		0X1U
75 #define RQ_CQE_STATUS_LRO_ENTER_MASK		0X1U
76 #define RQ_CQE_STATUS_LRO_INTR_MASK		0X1U
77 #define RQ_CQE_STATUS_BP_EN_MASK		0X1U
78 #define RQ_CQE_STATUS_RXDONE_MASK		0x1U
79 #define RQ_CQE_STATUS_FLUSH_MASK		0x1U
80 
81 #define RQ_CQE_STATUS_GET(val, member)		\
82 		(((val) >> RQ_CQE_STATUS_##member##_SHIFT) & \
83 				RQ_CQE_STATUS_##member##_MASK)
84 
85 #define RQ_CQE_STATUS_CLEAR(val, member)	\
86 		((val) & (~(RQ_CQE_STATUS_##member##_MASK << \
87 				RQ_CQE_STATUS_##member##_SHIFT)))
88 
89 #define HINIC_GET_RX_CSUM_ERR(status)	\
90 		RQ_CQE_STATUS_GET(status, CSUM_ERR)
91 
92 #define HINIC_GET_RX_DONE(status)	\
93 		RQ_CQE_STATUS_GET(status, RXDONE)
94 
95 #define HINIC_GET_RX_FLUSH(status)	\
96 		RQ_CQE_STATUS_GET(status, FLUSH)
97 
98 #define HINIC_GET_RX_BP_EN(status)	\
99 		RQ_CQE_STATUS_GET(status, BP_EN)
100 
101 #define HINIC_GET_RX_NUM_LRO(status)	\
102 		RQ_CQE_STATUS_GET(status, NUM_LRO)
103 
104 /* RQ_CTRL */
105 #define	RQ_CTRL_BUFDESC_SECT_LEN_SHIFT		0
106 #define	RQ_CTRL_COMPLETE_FORMAT_SHIFT		15
107 #define RQ_CTRL_COMPLETE_LEN_SHIFT		27
108 #define RQ_CTRL_LEN_SHIFT			29
109 
110 #define	RQ_CTRL_BUFDESC_SECT_LEN_MASK		0xFFU
111 #define	RQ_CTRL_COMPLETE_FORMAT_MASK		0x1U
112 #define RQ_CTRL_COMPLETE_LEN_MASK		0x3U
113 #define RQ_CTRL_LEN_MASK			0x3U
114 
115 #define RQ_CTRL_SET(val, member)		\
116 	(((val) & RQ_CTRL_##member##_MASK) << RQ_CTRL_##member##_SHIFT)
117 
118 #define RQ_CTRL_GET(val, member)		\
119 	(((val) >> RQ_CTRL_##member##_SHIFT) & RQ_CTRL_##member##_MASK)
120 
121 #define RQ_CTRL_CLEAR(val, member)		\
122 	((val) & (~(RQ_CTRL_##member##_MASK << RQ_CTRL_##member##_SHIFT)))
123 
124 #define RQ_CQE_PKT_NUM_SHIFT			1
125 #define RQ_CQE_PKT_FIRST_LEN_SHIFT		19
126 #define RQ_CQE_PKT_LAST_LEN_SHIFT		6
127 #define RQ_CQE_SUPER_CQE_EN_SHIFT		0
128 
129 #define RQ_CQE_PKT_FIRST_LEN_MASK		0x1FFFU
130 #define RQ_CQE_PKT_LAST_LEN_MASK		0x1FFFU
131 #define RQ_CQE_PKT_NUM_MASK			0x1FU
132 #define RQ_CQE_SUPER_CQE_EN_MASK		0x1
133 
134 #define RQ_CQE_PKT_NUM_GET(val, member)		\
135 	(((val) >> RQ_CQE_PKT_##member##_SHIFT) & RQ_CQE_PKT_##member##_MASK)
136 
137 #define HINIC_GET_RQ_CQE_PKT_NUM(pkt_info) RQ_CQE_PKT_NUM_GET(pkt_info, NUM)
138 
139 #define RQ_CQE_SUPER_CQE_EN_GET(val, member)	\
140 	(((val) >> RQ_CQE_##member##_SHIFT) & RQ_CQE_##member##_MASK)
141 
142 #define HINIC_GET_SUPER_CQE_EN(pkt_info)	\
143 	RQ_CQE_SUPER_CQE_EN_GET(pkt_info, SUPER_CQE_EN)
144 
145 #define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT		21
146 #define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK		0x1U
147 
148 #define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT		0
149 #define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK		0xFFFU
150 
151 #define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_SHIFT		19
152 #define RQ_CQE_OFFOLAD_TYPE_PKT_UMBCAST_MASK		0x3U
153 
154 #define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_SHIFT		24
155 #define RQ_CQE_OFFOLAD_TYPE_RSS_TYPE_MASK		0xFFU
156 
157 #define RQ_CQE_OFFOLAD_TYPE_GET(val, member)		(((val) >> \
158 				RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \
159 				RQ_CQE_OFFOLAD_TYPE_##member##_MASK)
160 
161 #define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)	\
162 		RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN)
163 
164 #define HINIC_GET_RSS_TYPES(offload_type)	\
165 		RQ_CQE_OFFOLAD_TYPE_GET(offload_type, RSS_TYPE)
166 
167 #define HINIC_GET_RX_PKT_TYPE(offload_type)	\
168 		RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE)
169 
170 #define HINIC_GET_RX_PKT_UMBCAST(offload_type)	\
171 		RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_UMBCAST)
172 
173 #define RQ_CQE_STATUS_CSUM_BYPASS_VAL			0x80U
174 #define RQ_CQE_STATUS_CSUM_ERR_IP_MASK			0x39U
175 #define RQ_CQE_STATUS_CSUM_ERR_L4_MASK			0x46U
176 #define RQ_CQE_STATUS_CSUM_ERR_OTHER			0x100U
177 
178 #define HINIC_CSUM_ERR_BYPASSED(csum_err)	 \
179 	((csum_err) == RQ_CQE_STATUS_CSUM_BYPASS_VAL)
180 
181 #define HINIC_CSUM_ERR_IP(csum_err)	 \
182 	((csum_err) & RQ_CQE_STATUS_CSUM_ERR_IP_MASK)
183 
184 #define HINIC_CSUM_ERR_L4(csum_err)	 \
185 	((csum_err) & RQ_CQE_STATUS_CSUM_ERR_L4_MASK)
186 
187 #define HINIC_CSUM_ERR_OTHER(csum_err)	 \
188 	((csum_err) == RQ_CQE_STATUS_CSUM_ERR_OTHER)
189 
190 
191 void hinic_get_func_rx_buf_size(struct hinic_nic_dev *nic_dev)
192 {
193 	struct hinic_rxq *rxq;
194 	u16 q_id;
195 	u16 buf_size = 0;
196 
197 	for (q_id = 0; q_id < nic_dev->num_rq; q_id++) {
198 		rxq = nic_dev->rxqs[q_id];
199 
200 		if (rxq == NULL)
201 			continue;
202 
203 		if (q_id == 0)
204 			buf_size = rxq->buf_len;
205 
206 		buf_size = buf_size > rxq->buf_len ? rxq->buf_len : buf_size;
207 	}
208 
209 	nic_dev->hwdev->nic_io->rq_buf_size = buf_size;
210 }
211 
212 int hinic_create_rq(struct hinic_hwdev *hwdev, u16 q_id, u16 rq_depth)
213 {
214 	int err;
215 	struct hinic_nic_io *nic_io = hwdev->nic_io;
216 	struct hinic_qp *qp = &nic_io->qps[q_id];
217 	struct hinic_rq *rq = &qp->rq;
218 
219 	/* in case of hardware still generate interrupt, do not use msix 0 */
220 	rq->msix_entry_idx = 1;
221 	rq->q_id = q_id;
222 	rq->rq_depth = rq_depth;
223 	nic_io->rq_depth = rq_depth;
224 
225 	err = hinic_wq_allocate(hwdev, &nic_io->rq_wq[q_id],
226 				HINIC_RQ_WQEBB_SHIFT, nic_io->rq_depth);
227 	if (err) {
228 		PMD_DRV_LOG(ERR, "Failed to allocate WQ for RQ");
229 		return err;
230 	}
231 	rq->wq = &nic_io->rq_wq[q_id];
232 
233 	rq->pi_virt_addr =
234 		(volatile u16 *)dma_zalloc_coherent(hwdev, HINIC_PAGE_SIZE,
235 						    &rq->pi_dma_addr,
236 						    GFP_KERNEL);
237 	if (!rq->pi_virt_addr) {
238 		PMD_DRV_LOG(ERR, "Failed to allocate rq pi virt addr");
239 		err = -ENOMEM;
240 		goto rq_pi_alloc_err;
241 	}
242 
243 	return HINIC_OK;
244 
245 rq_pi_alloc_err:
246 	hinic_wq_free(hwdev, &nic_io->rq_wq[q_id]);
247 
248 	return err;
249 }
250 
251 void hinic_destroy_rq(struct hinic_hwdev *hwdev, u16 q_id)
252 {
253 	struct hinic_nic_io *nic_io = hwdev->nic_io;
254 	struct hinic_qp *qp = &nic_io->qps[q_id];
255 	struct hinic_rq *rq = &qp->rq;
256 
257 	if (qp->rq.wq == NULL)
258 		return;
259 
260 	dma_free_coherent_volatile(hwdev, HINIC_PAGE_SIZE,
261 				   (volatile void *)rq->pi_virt_addr,
262 				   rq->pi_dma_addr);
263 	hinic_wq_free(nic_io->hwdev, qp->rq.wq);
264 	qp->rq.wq = NULL;
265 }
266 
267 static void
268 hinic_prepare_rq_wqe(void *wqe, __rte_unused u16 pi, dma_addr_t buf_addr,
269 			dma_addr_t cqe_dma)
270 {
271 	struct hinic_rq_wqe *rq_wqe = wqe;
272 	struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
273 	struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
274 	struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
275 	u32 rq_ceq_len = sizeof(struct hinic_rq_cqe);
276 
277 	ctrl->ctrl_fmt =
278 		RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)),  LEN) |
279 		RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)), COMPLETE_LEN) |
280 		RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)), BUFDESC_SECT_LEN) |
281 		RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
282 
283 	hinic_set_sge(&cqe_sect->sge, cqe_dma, rq_ceq_len);
284 
285 	buf_desc->addr_high = upper_32_bits(buf_addr);
286 	buf_desc->addr_low = lower_32_bits(buf_addr);
287 }
288 
289 void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)
290 {
291 	if (!rxq || !stats)
292 		return;
293 
294 	memcpy(stats, &rxq->rxq_stats, sizeof(rxq->rxq_stats));
295 }
296 
297 void hinic_rxq_stats_reset(struct hinic_rxq *rxq)
298 {
299 	struct hinic_rxq_stats *rxq_stats;
300 
301 	if (rxq == NULL)
302 		return;
303 
304 	rxq_stats = &rxq->rxq_stats;
305 	memset(rxq_stats, 0, sizeof(*rxq_stats));
306 }
307 
308 static int hinic_rx_alloc_cqe(struct hinic_rxq *rxq)
309 {
310 	size_t cqe_mem_size;
311 
312 	/* allocate continuous cqe memory for saving number of memory zone */
313 	cqe_mem_size = sizeof(struct hinic_rq_cqe) * rxq->q_depth;
314 	rxq->cqe_start_vaddr =
315 		dma_zalloc_coherent(rxq->nic_dev->hwdev,
316 				    cqe_mem_size, &rxq->cqe_start_paddr,
317 				    GFP_KERNEL);
318 	if (!rxq->cqe_start_vaddr) {
319 		PMD_DRV_LOG(ERR, "Allocate cqe dma memory failed");
320 		return -ENOMEM;
321 	}
322 
323 	rxq->rx_cqe = (struct hinic_rq_cqe *)rxq->cqe_start_vaddr;
324 
325 	return HINIC_OK;
326 }
327 
328 static void hinic_rx_free_cqe(struct hinic_rxq *rxq)
329 {
330 	size_t cqe_mem_size;
331 
332 	cqe_mem_size = sizeof(struct hinic_rq_cqe) * rxq->q_depth;
333 	dma_free_coherent(rxq->nic_dev->hwdev, cqe_mem_size,
334 			  rxq->cqe_start_vaddr, rxq->cqe_start_paddr);
335 	rxq->cqe_start_vaddr = NULL;
336 }
337 
338 static int hinic_rx_fill_wqe(struct hinic_rxq *rxq)
339 {
340 	struct hinic_nic_dev *nic_dev = rxq->nic_dev;
341 	struct hinic_rq_wqe *rq_wqe;
342 	dma_addr_t buf_dma_addr, cqe_dma_addr;
343 	u16 pi = 0;
344 	int i;
345 
346 	buf_dma_addr = 0;
347 	cqe_dma_addr = rxq->cqe_start_paddr;
348 	for (i = 0; i < rxq->q_depth; i++) {
349 		rq_wqe = hinic_get_rq_wqe(nic_dev->hwdev, rxq->q_id, &pi);
350 		if (!rq_wqe) {
351 			PMD_DRV_LOG(ERR, "Get rq wqe failed");
352 			break;
353 		}
354 
355 		hinic_prepare_rq_wqe(rq_wqe, pi, buf_dma_addr, cqe_dma_addr);
356 		cqe_dma_addr +=  sizeof(struct hinic_rq_cqe);
357 
358 		hinic_cpu_to_be32(rq_wqe, sizeof(struct hinic_rq_wqe));
359 	}
360 
361 	hinic_return_rq_wqe(nic_dev->hwdev, rxq->q_id, i);
362 
363 	return i;
364 }
365 
366 /* alloc cqe and prepare rqe */
367 int hinic_setup_rx_resources(struct hinic_rxq *rxq)
368 {
369 	u64 rx_info_sz;
370 	int err, pkts;
371 
372 	rx_info_sz = rxq->q_depth * sizeof(*rxq->rx_info);
373 	rxq->rx_info = kzalloc_aligned(rx_info_sz, GFP_KERNEL);
374 	if (!rxq->rx_info)
375 		return -ENOMEM;
376 
377 	err = hinic_rx_alloc_cqe(rxq);
378 	if (err) {
379 		PMD_DRV_LOG(ERR, "Allocate rx cqe failed");
380 		goto rx_cqe_err;
381 	}
382 
383 	pkts = hinic_rx_fill_wqe(rxq);
384 	if (pkts != rxq->q_depth) {
385 		PMD_DRV_LOG(ERR, "Fill rx wqe failed");
386 		err = -ENOMEM;
387 		goto rx_fill_err;
388 	}
389 
390 	return 0;
391 
392 rx_fill_err:
393 	hinic_rx_free_cqe(rxq);
394 
395 rx_cqe_err:
396 	kfree(rxq->rx_info);
397 	rxq->rx_info = NULL;
398 
399 	return err;
400 }
401 
402 void hinic_free_rx_resources(struct hinic_rxq *rxq)
403 {
404 	if (rxq->rx_info == NULL)
405 		return;
406 
407 	hinic_rx_free_cqe(rxq);
408 	kfree(rxq->rx_info);
409 	rxq->rx_info = NULL;
410 }
411 
412 void hinic_free_all_rx_resources(struct rte_eth_dev *eth_dev)
413 {
414 	u16 q_id;
415 	struct hinic_nic_dev *nic_dev =
416 				HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
417 
418 	for (q_id = 0; q_id < nic_dev->num_rq; q_id++) {
419 		eth_dev->data->rx_queues[q_id] = NULL;
420 
421 		if (nic_dev->rxqs[q_id] == NULL)
422 			continue;
423 
424 		hinic_free_all_rx_skbs(nic_dev->rxqs[q_id]);
425 		hinic_free_rx_resources(nic_dev->rxqs[q_id]);
426 		kfree(nic_dev->rxqs[q_id]);
427 		nic_dev->rxqs[q_id] = NULL;
428 	}
429 }
430 
431 void hinic_free_all_rx_mbuf(struct rte_eth_dev *eth_dev)
432 {
433 	struct hinic_nic_dev *nic_dev =
434 				HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
435 	u16 q_id;
436 
437 	for (q_id = 0; q_id < nic_dev->num_rq; q_id++)
438 		hinic_free_all_rx_skbs(nic_dev->rxqs[q_id]);
439 }
440 
441 static void hinic_recv_jumbo_pkt(struct hinic_rxq *rxq,
442 				 struct rte_mbuf *head_skb,
443 				 u32 remain_pkt_len)
444 {
445 	struct hinic_nic_dev *nic_dev = rxq->nic_dev;
446 	struct rte_mbuf *cur_mbuf, *rxm = NULL;
447 	struct hinic_rx_info *rx_info;
448 	u16 sw_ci, rx_buf_len = rxq->buf_len;
449 	u32 pkt_len;
450 
451 	while (remain_pkt_len > 0) {
452 		sw_ci = hinic_get_rq_local_ci(nic_dev->hwdev, rxq->q_id);
453 		rx_info = &rxq->rx_info[sw_ci];
454 
455 		hinic_update_rq_local_ci(nic_dev->hwdev, rxq->q_id, 1);
456 
457 		pkt_len = remain_pkt_len > rx_buf_len ?
458 			rx_buf_len : remain_pkt_len;
459 		remain_pkt_len -= pkt_len;
460 
461 		cur_mbuf = rx_info->mbuf;
462 		cur_mbuf->data_len = (u16)pkt_len;
463 		cur_mbuf->next = NULL;
464 
465 		head_skb->pkt_len += cur_mbuf->data_len;
466 		head_skb->nb_segs++;
467 
468 		if (!rxm)
469 			head_skb->next = cur_mbuf;
470 		else
471 			rxm->next = cur_mbuf;
472 
473 		rxm = cur_mbuf;
474 	}
475 }
476 
477 static void hinic_rss_deinit(struct hinic_nic_dev *nic_dev)
478 {
479 	u8 prio_tc[HINIC_DCB_UP_MAX] = {0};
480 	(void)hinic_rss_cfg(nic_dev->hwdev, 0,
481 			    nic_dev->rss_tmpl_idx, 0, prio_tc);
482 }
483 
484 static int hinic_rss_key_init(struct hinic_nic_dev *nic_dev,
485 			      struct rte_eth_rss_conf *rss_conf)
486 {
487 	u8 default_rss_key[HINIC_RSS_KEY_SIZE] = {
488 			 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
489 			 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
490 			 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
491 			 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
492 			 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa};
493 	u8 hashkey[HINIC_RSS_KEY_SIZE] = {0};
494 	u8 tmpl_idx = nic_dev->rss_tmpl_idx;
495 
496 	if (rss_conf->rss_key == NULL)
497 		memcpy(hashkey, default_rss_key, HINIC_RSS_KEY_SIZE);
498 	else
499 		memcpy(hashkey, rss_conf->rss_key, rss_conf->rss_key_len);
500 
501 	return hinic_rss_set_template_tbl(nic_dev->hwdev, tmpl_idx, hashkey);
502 }
503 
504 static void hinic_fill_rss_type(struct nic_rss_type *rss_type,
505 				struct rte_eth_rss_conf *rss_conf)
506 {
507 	u64 rss_hf = rss_conf->rss_hf;
508 
509 	rss_type->ipv4 = (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4)) ? 1 : 0;
510 	rss_type->tcp_ipv4 = (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) ? 1 : 0;
511 	rss_type->ipv6 = (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6)) ? 1 : 0;
512 	rss_type->ipv6_ext = (rss_hf & ETH_RSS_IPV6_EX) ? 1 : 0;
513 	rss_type->tcp_ipv6 = (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) ? 1 : 0;
514 	rss_type->tcp_ipv6_ext = (rss_hf & ETH_RSS_IPV6_TCP_EX) ? 1 : 0;
515 	rss_type->udp_ipv4 = (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) ? 1 : 0;
516 	rss_type->udp_ipv6 = (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP) ? 1 : 0;
517 }
518 
519 static void hinic_fillout_indir_tbl(struct hinic_nic_dev *nic_dev, u32 *indir)
520 {
521 	u8 rss_queue_count = nic_dev->num_rss;
522 	int i = 0, j;
523 
524 	if (rss_queue_count == 0) {
525 		/* delete q_id from indir tbl */
526 		for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++)
527 			indir[i] = 0xFF;	/* Invalid value in indir tbl */
528 	} else {
529 		while (i < HINIC_RSS_INDIR_SIZE)
530 			for (j = 0; (j < rss_queue_count) &&
531 			     (i < HINIC_RSS_INDIR_SIZE); j++)
532 				indir[i++] = nic_dev->rx_queue_list[j];
533 	}
534 }
535 
536 static int hinic_rss_init(struct hinic_nic_dev *nic_dev,
537 			  __attribute__((unused)) u8 *rq2iq_map,
538 			  struct rte_eth_rss_conf *rss_conf)
539 {
540 	u32 indir_tbl[HINIC_RSS_INDIR_SIZE] = {0};
541 	struct nic_rss_type rss_type = {0};
542 	u8 prio_tc[HINIC_DCB_UP_MAX] = {0};
543 	u8 tmpl_idx = 0xFF, num_tc = 0;
544 	int err;
545 
546 	tmpl_idx = nic_dev->rss_tmpl_idx;
547 
548 	err = hinic_rss_key_init(nic_dev, rss_conf);
549 	if (err)
550 		return err;
551 
552 	if (!nic_dev->rss_indir_flag) {
553 		hinic_fillout_indir_tbl(nic_dev, indir_tbl);
554 		err = hinic_rss_set_indir_tbl(nic_dev->hwdev, tmpl_idx,
555 					      indir_tbl);
556 		if (err)
557 			return err;
558 	}
559 
560 	hinic_fill_rss_type(&rss_type, rss_conf);
561 	err = hinic_set_rss_type(nic_dev->hwdev, tmpl_idx, rss_type);
562 	if (err)
563 		return err;
564 
565 	err = hinic_rss_set_hash_engine(nic_dev->hwdev, tmpl_idx,
566 					HINIC_RSS_HASH_ENGINE_TYPE_TOEP);
567 	if (err)
568 		return err;
569 
570 	return hinic_rss_cfg(nic_dev->hwdev, 1, tmpl_idx, num_tc, prio_tc);
571 }
572 
573 static void
574 hinic_add_rq_to_rx_queue_list(struct hinic_nic_dev *nic_dev, u16 queue_id)
575 {
576 	u8 rss_queue_count = nic_dev->num_rss;
577 
578 	RTE_ASSERT(rss_queue_count <= (RTE_DIM(nic_dev->rx_queue_list) - 1));
579 
580 	nic_dev->rx_queue_list[rss_queue_count] = queue_id;
581 	nic_dev->num_rss++;
582 }
583 
584 /**
585  * hinic_setup_num_qps - determine num_qps from rss_tmpl_id
586  * @nic_dev: pointer to the private ethernet device
587  * Return: 0 on Success, error code otherwise.
588  **/
589 static int hinic_setup_num_qps(struct hinic_nic_dev *nic_dev)
590 {
591 	int err, i;
592 
593 	if (!(nic_dev->flags & ETH_MQ_RX_RSS_FLAG)) {
594 		nic_dev->flags &= ~ETH_MQ_RX_RSS_FLAG;
595 		nic_dev->num_rss = 0;
596 		if (nic_dev->num_rq > 1) {
597 			/* get rss template id */
598 			err = hinic_rss_template_alloc(nic_dev->hwdev,
599 						       &nic_dev->rss_tmpl_idx);
600 			if (err) {
601 				PMD_DRV_LOG(WARNING, "Alloc rss template failed");
602 				return err;
603 			}
604 			nic_dev->flags |= ETH_MQ_RX_RSS_FLAG;
605 			for (i = 0; i < nic_dev->num_rq; i++)
606 				hinic_add_rq_to_rx_queue_list(nic_dev, i);
607 		}
608 	}
609 
610 	return 0;
611 }
612 
613 static void hinic_destroy_num_qps(struct hinic_nic_dev *nic_dev)
614 {
615 	if (nic_dev->flags & ETH_MQ_RX_RSS_FLAG) {
616 		if (hinic_rss_template_free(nic_dev->hwdev,
617 					    nic_dev->rss_tmpl_idx))
618 			PMD_DRV_LOG(WARNING, "Free rss template failed");
619 
620 		nic_dev->flags &= ~ETH_MQ_RX_RSS_FLAG;
621 	}
622 }
623 
624 static int hinic_config_mq_rx_rss(struct hinic_nic_dev *nic_dev, bool on)
625 {
626 	int ret = 0;
627 
628 	if (on) {
629 		ret = hinic_setup_num_qps(nic_dev);
630 		if (ret)
631 			PMD_DRV_LOG(ERR, "Setup num_qps failed");
632 	} else {
633 		hinic_destroy_num_qps(nic_dev);
634 	}
635 
636 	return ret;
637 }
638 
639 int hinic_config_mq_mode(struct rte_eth_dev *dev, bool on)
640 {
641 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
642 	struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
643 	int ret = 0;
644 
645 	switch (dev_conf->rxmode.mq_mode) {
646 	case ETH_MQ_RX_RSS:
647 		ret = hinic_config_mq_rx_rss(nic_dev, on);
648 		break;
649 	default:
650 		break;
651 	}
652 
653 	return ret;
654 }
655 
656 int hinic_rx_configure(struct rte_eth_dev *dev)
657 {
658 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
659 	struct rte_eth_rss_conf rss_conf =
660 		dev->data->dev_conf.rx_adv_conf.rss_conf;
661 	u32 csum_en = 0;
662 	int err;
663 
664 	if (nic_dev->flags & ETH_MQ_RX_RSS_FLAG) {
665 		if (rss_conf.rss_hf == 0) {
666 			rss_conf.rss_hf = HINIC_RSS_OFFLOAD_ALL;
667 		} else if ((rss_conf.rss_hf & HINIC_RSS_OFFLOAD_ALL) == 0) {
668 			PMD_DRV_LOG(ERR, "Do not support rss offload all");
669 			goto rss_config_err;
670 		}
671 
672 		err = hinic_rss_init(nic_dev, NULL, &rss_conf);
673 		if (err) {
674 			PMD_DRV_LOG(ERR, "Init rss failed");
675 			goto rss_config_err;
676 		}
677 	}
678 
679 	/* Enable both L3/L4 rx checksum offload */
680 	if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_CHECKSUM)
681 		csum_en = HINIC_RX_CSUM_OFFLOAD_EN;
682 
683 	err = hinic_set_rx_csum_offload(nic_dev->hwdev, csum_en);
684 	if (err)
685 		goto rx_csum_ofl_err;
686 
687 	return 0;
688 
689 rx_csum_ofl_err:
690 rss_config_err:
691 	hinic_destroy_num_qps(nic_dev);
692 
693 	return HINIC_ERROR;
694 }
695 
696 void hinic_rx_remove_configure(struct rte_eth_dev *dev)
697 {
698 	struct hinic_nic_dev *nic_dev = HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(dev);
699 
700 	if (nic_dev->flags & ETH_MQ_RX_RSS_FLAG) {
701 		hinic_rss_deinit(nic_dev);
702 		hinic_destroy_num_qps(nic_dev);
703 	}
704 }
705 
706 void hinic_free_all_rx_skbs(struct hinic_rxq *rxq)
707 {
708 	struct hinic_nic_dev *nic_dev = rxq->nic_dev;
709 	struct hinic_rx_info *rx_info;
710 	int free_wqebbs =
711 		hinic_get_rq_free_wqebbs(nic_dev->hwdev, rxq->q_id) + 1;
712 	volatile struct hinic_rq_cqe *rx_cqe;
713 	u16 ci;
714 
715 	while (free_wqebbs++ < rxq->q_depth) {
716 		ci = hinic_get_rq_local_ci(nic_dev->hwdev, rxq->q_id);
717 
718 		rx_cqe = &rxq->rx_cqe[ci];
719 
720 		/* clear done bit */
721 		rx_cqe->status = 0;
722 
723 		rx_info = &rxq->rx_info[ci];
724 		rte_pktmbuf_free(rx_info->mbuf);
725 		rx_info->mbuf = NULL;
726 
727 		hinic_update_rq_local_ci(nic_dev->hwdev, rxq->q_id, 1);
728 	}
729 }
730 
731 static inline void hinic_rq_cqe_be_to_cpu32(void *dst_le32,
732 					    volatile void *src_be32)
733 {
734 #if defined(__X86_64_SSE__)
735 	volatile __m128i *wqe_be = (volatile __m128i *)src_be32;
736 	__m128i *wqe_le = (__m128i *)dst_le32;
737 	__m128i shuf_mask =  _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
738 					11, 4, 5, 6, 7, 0, 1, 2, 3);
739 
740 	/* l2nic just use first 128 bits */
741 	wqe_le[0] = _mm_shuffle_epi8(wqe_be[0], shuf_mask);
742 #elif defined(__ARM64_NEON__)
743 	volatile uint8x16_t *wqe_be = (volatile uint8x16_t *)src_be32;
744 	uint8x16_t *wqe_le = (uint8x16_t *)dst_le32;
745 	const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
746 					9, 8, 15, 14, 13, 12};
747 
748 	/* l2nic just use first 128 bits */
749 	wqe_le[0] = vqtbl1q_u8(wqe_be[0], shuf_mask);
750 #else
751 	u32 i;
752 	volatile u32 *wqe_be = (volatile u32 *)src_be32;
753 	u32 *wqe_le = (u32 *)dst_le32;
754 
755 #define HINIC_L2NIC_RQ_CQE_USED		4 /* 4Bytes unit */
756 
757 	for (i = 0; i < HINIC_L2NIC_RQ_CQE_USED; i++) {
758 		*wqe_le = rte_be_to_cpu_32(*wqe_be);
759 		wqe_be++;
760 		wqe_le++;
761 	}
762 #endif
763 }
764 
765 static inline uint64_t hinic_rx_rss_hash(uint32_t offload_type,
766 					 uint32_t cqe_hass_val,
767 					 uint32_t *rss_hash)
768 {
769 	uint32_t rss_type;
770 
771 	rss_type = HINIC_GET_RSS_TYPES(offload_type);
772 	if (likely(rss_type != 0)) {
773 		*rss_hash = cqe_hass_val;
774 		return PKT_RX_RSS_HASH;
775 	}
776 
777 	return 0;
778 }
779 
780 static inline uint64_t hinic_rx_csum(uint32_t status, struct hinic_rxq *rxq)
781 {
782 	uint32_t checksum_err;
783 	uint64_t flags;
784 
785 	/* most case checksum is ok */
786 	checksum_err = HINIC_GET_RX_CSUM_ERR(status);
787 	if (likely(checksum_err == 0))
788 		return (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
789 
790 	/* If BYPASS bit set, all other status indications should be ignored */
791 	if (unlikely(HINIC_CSUM_ERR_BYPASSED(checksum_err)))
792 		return PKT_RX_IP_CKSUM_UNKNOWN;
793 
794 	flags = 0;
795 
796 	/* IP checksum error */
797 	if (HINIC_CSUM_ERR_IP(checksum_err))
798 		flags |= PKT_RX_IP_CKSUM_BAD;
799 	else
800 		flags |= PKT_RX_IP_CKSUM_GOOD;
801 
802 	/* L4 checksum error */
803 	if (HINIC_CSUM_ERR_L4(checksum_err))
804 		flags |= PKT_RX_L4_CKSUM_BAD;
805 	else
806 		flags |= PKT_RX_L4_CKSUM_GOOD;
807 
808 	if (unlikely(HINIC_CSUM_ERR_OTHER(checksum_err)))
809 		flags = PKT_RX_L4_CKSUM_NONE;
810 
811 	rxq->rxq_stats.errors++;
812 
813 	return flags;
814 }
815 
816 static inline uint64_t hinic_rx_vlan(uint32_t offload_type, uint32_t vlan_len,
817 				     uint16_t *vlan_tci)
818 {
819 	uint16_t vlan_tag;
820 
821 	vlan_tag = HINIC_GET_RX_VLAN_TAG(vlan_len);
822 	if (!HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) || 0 == vlan_tag) {
823 		*vlan_tci = 0;
824 		return 0;
825 	}
826 
827 	*vlan_tci = vlan_tag;
828 
829 	return PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
830 }
831 
832 static inline u32 hinic_rx_alloc_mbuf_bulk(struct hinic_rxq *rxq,
833 					   struct rte_mbuf **mbufs,
834 					   u32 exp_mbuf_cnt)
835 {
836 	int rc;
837 	u32 avail_cnt;
838 
839 	rc = rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, exp_mbuf_cnt);
840 	if (likely(rc == HINIC_OK)) {
841 		avail_cnt = exp_mbuf_cnt;
842 	} else {
843 		avail_cnt = 0;
844 		rxq->rxq_stats.rx_nombuf += exp_mbuf_cnt;
845 	}
846 
847 	return avail_cnt;
848 }
849 
850 static struct rte_mbuf *hinic_rx_alloc_mbuf(struct hinic_rxq *rxq,
851 					dma_addr_t *dma_addr)
852 {
853 	struct rte_mbuf *mbuf;
854 
855 	mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
856 	if (unlikely(!mbuf))
857 		return NULL;
858 
859 	*dma_addr = rte_mbuf_data_iova_default(mbuf);
860 
861 	return mbuf;
862 }
863 
864 static inline void hinic_rearm_rxq_mbuf(struct hinic_rxq *rxq)
865 {
866 	u16 pi;
867 	u32 i, free_wqebbs, rearm_wqebbs, exp_wqebbs;
868 	dma_addr_t dma_addr;
869 	struct hinic_rq_wqe *rq_wqe;
870 	struct rte_mbuf **rearm_mbufs;
871 
872 	/* check free wqebb fo rearm */
873 	free_wqebbs = HINIC_GET_RQ_FREE_WQEBBS(rxq);
874 	if (unlikely(free_wqebbs < rxq->rx_free_thresh))
875 		return;
876 
877 	/* get rearm mbuf array */
878 	pi = HINIC_GET_RQ_LOCAL_PI(rxq);
879 	rearm_mbufs = (struct rte_mbuf **)(&rxq->rx_info[pi]);
880 
881 	/* check rxq free wqebbs turn around */
882 	exp_wqebbs = rxq->q_depth - pi;
883 	if (free_wqebbs < exp_wqebbs)
884 		exp_wqebbs = free_wqebbs;
885 
886 	/* alloc mbuf in bulk */
887 	rearm_wqebbs = hinic_rx_alloc_mbuf_bulk(rxq, rearm_mbufs, exp_wqebbs);
888 	if (unlikely(rearm_wqebbs == 0))
889 		return;
890 
891 	/* rearm rx mbuf */
892 	rq_wqe = WQ_WQE_ADDR(rxq->wq, (u32)pi);
893 	for (i = 0; i < rearm_wqebbs; i++) {
894 		dma_addr = rte_mbuf_data_iova_default(rearm_mbufs[i]);
895 		rq_wqe->buf_desc.addr_high =
896 					cpu_to_be32(upper_32_bits(dma_addr));
897 		rq_wqe->buf_desc.addr_low =
898 					cpu_to_be32(lower_32_bits(dma_addr));
899 		rq_wqe++;
900 	}
901 	rxq->wq->prod_idx += rearm_wqebbs;
902 	rxq->wq->delta -= rearm_wqebbs;
903 
904 	/* update rq hw_pi */
905 	rte_wmb();
906 	HINIC_UPDATE_RQ_HW_PI(rxq, pi + rearm_wqebbs);
907 }
908 
909 void hinic_rx_alloc_pkts(struct hinic_rxq *rxq)
910 {
911 	struct hinic_nic_dev *nic_dev = rxq->nic_dev;
912 	struct hinic_rq_wqe *rq_wqe;
913 	struct hinic_rx_info *rx_info;
914 	struct rte_mbuf *mb;
915 	dma_addr_t dma_addr;
916 	u16 pi = 0;
917 	int i, free_wqebbs;
918 
919 	free_wqebbs = HINIC_GET_RQ_FREE_WQEBBS(rxq);
920 	for (i = 0; i < free_wqebbs; i++) {
921 		mb = hinic_rx_alloc_mbuf(rxq, &dma_addr);
922 		if (unlikely(!mb)) {
923 			rxq->rxq_stats.rx_nombuf++;
924 			break;
925 		}
926 
927 		rq_wqe = hinic_get_rq_wqe(nic_dev->hwdev, rxq->q_id, &pi);
928 		if (unlikely(!rq_wqe)) {
929 			rte_pktmbuf_free(mb);
930 			break;
931 		}
932 
933 		/* fill buffer address only */
934 		rq_wqe->buf_desc.addr_high =
935 				cpu_to_be32(upper_32_bits(dma_addr));
936 		rq_wqe->buf_desc.addr_low =
937 				cpu_to_be32(lower_32_bits(dma_addr));
938 
939 		rx_info = &rxq->rx_info[pi];
940 		rx_info->mbuf = mb;
941 	}
942 
943 	if (likely(i > 0)) {
944 		rte_wmb();
945 		HINIC_UPDATE_RQ_HW_PI(rxq, pi + 1);
946 	}
947 }
948 
949 u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts)
950 {
951 	struct rte_mbuf *rxm;
952 	struct hinic_rxq *rxq = rx_queue;
953 	struct hinic_rx_info *rx_info;
954 	volatile struct hinic_rq_cqe *rx_cqe;
955 	u16 rx_buf_len, pkts = 0;
956 	u16 sw_ci, ci_mask, wqebb_cnt = 0;
957 	u32 pkt_len, status, vlan_len;
958 	u64 rx_bytes = 0;
959 	struct hinic_rq_cqe cqe;
960 	u32 offload_type, rss_hash;
961 
962 	rx_buf_len = rxq->buf_len;
963 
964 	/* 1. get polling start ci */
965 	ci_mask = HINIC_GET_RQ_WQE_MASK(rxq);
966 	sw_ci = HINIC_GET_RQ_LOCAL_CI(rxq);
967 
968 	while (pkts < nb_pkts) {
969 		 /* 2. current ci is done */
970 		rx_cqe = &rxq->rx_cqe[sw_ci];
971 		status = rx_cqe->status;
972 		if (!HINIC_GET_RX_DONE_BE(status))
973 			break;
974 
975 		/* read other cqe member after status */
976 		rte_rmb();
977 
978 		/* convert cqe and get packet length */
979 		hinic_rq_cqe_be_to_cpu32(&cqe, (volatile void *)rx_cqe);
980 		vlan_len = cqe.vlan_len;
981 
982 		rx_info = &rxq->rx_info[sw_ci];
983 		rxm = rx_info->mbuf;
984 
985 		/* 3. next ci point and prefetch */
986 		sw_ci++;
987 		sw_ci &= ci_mask;
988 
989 		/* prefetch next mbuf first 64B */
990 		rte_prefetch0(rxq->rx_info[sw_ci].mbuf);
991 
992 		/* 4. jumbo frame process */
993 		pkt_len = HINIC_GET_RX_PKT_LEN(vlan_len);
994 		if (likely(pkt_len <= rx_buf_len)) {
995 			rxm->data_len = pkt_len;
996 			rxm->pkt_len = pkt_len;
997 			wqebb_cnt++;
998 		} else {
999 			rxm->data_len = rx_buf_len;
1000 			rxm->pkt_len = rx_buf_len;
1001 
1002 			/* if jumbo use multi-wqebb update ci,
1003 			 * recv_jumbo_pkt will also update ci
1004 			 */
1005 			HINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt + 1);
1006 			wqebb_cnt = 0;
1007 			hinic_recv_jumbo_pkt(rxq, rxm, pkt_len - rx_buf_len);
1008 			sw_ci = HINIC_GET_RQ_LOCAL_CI(rxq);
1009 		}
1010 
1011 		/* 5. vlan/checksum/rss/pkt_type/gro offload */
1012 		rxm->data_off = RTE_PKTMBUF_HEADROOM;
1013 		rxm->port = rxq->port_id;
1014 		offload_type = cqe.offload_type;
1015 
1016 		/* vlan offload */
1017 		rxm->ol_flags |= hinic_rx_vlan(offload_type, vlan_len,
1018 					       &rxm->vlan_tci);
1019 
1020 		/* checksum offload */
1021 		rxm->ol_flags |= hinic_rx_csum(cqe.status, rxq);
1022 
1023 		/* rss hash offload */
1024 		rss_hash = cqe.rss_hash;
1025 		rxm->ol_flags |= hinic_rx_rss_hash(offload_type, rss_hash,
1026 						   &rxm->hash.rss);
1027 
1028 		/* 6. clear done bit */
1029 		rx_cqe->status = 0;
1030 
1031 		rx_bytes += pkt_len;
1032 		rx_pkts[pkts++] = rxm;
1033 	}
1034 
1035 	if (pkts) {
1036 		/* 7. update ci */
1037 		HINIC_UPDATE_RQ_LOCAL_CI(rxq, wqebb_cnt);
1038 
1039 		/* do packet stats */
1040 		rxq->rxq_stats.packets += pkts;
1041 		rxq->rxq_stats.bytes += rx_bytes;
1042 	}
1043 	rxq->rxq_stats.burst_pkts = pkts;
1044 
1045 	/* 8. rearm mbuf to rxq */
1046 	hinic_rearm_rxq_mbuf(rxq);
1047 
1048 	return pkts;
1049 }
1050