xref: /dpdk/drivers/net/hinic/hinic_pmd_tx.c (revision 089e5ed727a15da2729cfee9b63533dd120bd04c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4 
5 #include <rte_mbuf.h>
6 #include <rte_tcp.h>
7 #include <rte_sctp.h>
8 #include <rte_udp.h>
9 #include <rte_ip.h>
10 #ifdef __ARM64_NEON__
11 #include <arm_neon.h>
12 #endif
13 
14 #include "base/hinic_compat.h"
15 #include "base/hinic_pmd_hwdev.h"
16 #include "base/hinic_pmd_hwif.h"
17 #include "base/hinic_pmd_wq.h"
18 #include "base/hinic_pmd_nicio.h"
19 #include "hinic_pmd_ethdev.h"
20 #include "hinic_pmd_tx.h"
21 
22 /* packet header and tx offload info */
23 #define VXLANLEN			8
24 #define MAX_PLD_OFFSET			221
25 #define MAX_SINGLE_SGE_SIZE		65536
26 #define TSO_ENABLE			1
27 #define TX_MSS_DEFAULT			0x3E00
28 #define TX_MSS_MIN			0x50
29 
30 #define HINIC_NONTSO_PKT_MAX_SGE		17	/* non-tso max sge 17 */
31 #define HINIC_NONTSO_SEG_NUM_INVALID(num)	\
32 			((num) > HINIC_NONTSO_PKT_MAX_SGE)
33 
34 #define HINIC_TSO_PKT_MAX_SGE			127	/* tso max sge 127 */
35 #define HINIC_TSO_SEG_NUM_INVALID(num)		((num) > HINIC_TSO_PKT_MAX_SGE)
36 
37 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */
38 #define HINIC_BUF_DESC_SIZE(nr_descs)	(SIZE_8BYTES(((u32)nr_descs) << 4))
39 
40 #define MASKED_SQ_IDX(sq, idx)		((idx) & (sq)->wq->mask)
41 
42 /* SQ_CTRL */
43 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT		0
44 #define SQ_CTRL_TASKSECT_LEN_SHIFT		16
45 #define SQ_CTRL_DATA_FORMAT_SHIFT		22
46 #define SQ_CTRL_LEN_SHIFT			29
47 #define SQ_CTRL_OWNER_SHIFT			31
48 
49 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK		0xFFU
50 #define SQ_CTRL_TASKSECT_LEN_MASK		0x1FU
51 #define SQ_CTRL_DATA_FORMAT_MASK		0x1U
52 #define SQ_CTRL_LEN_MASK			0x3U
53 #define SQ_CTRL_OWNER_MASK			0x1U
54 
55 #define SQ_CTRL_SET(val, member)	\
56 	(((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)
57 
58 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT		2
59 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT		10
60 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT		11
61 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT	12
62 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT		13
63 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT		27
64 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT		28
65 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT		29
66 
67 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK		0xFFU
68 #define SQ_CTRL_QUEUE_INFO_UFO_MASK		0x1U
69 #define SQ_CTRL_QUEUE_INFO_TSO_MASK		0x1U
70 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK	0x1U
71 #define SQ_CTRL_QUEUE_INFO_MSS_MASK		0x3FFFU
72 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK		0x1U
73 #define SQ_CTRL_QUEUE_INFO_UC_MASK		0x1U
74 #define SQ_CTRL_QUEUE_INFO_PRI_MASK		0x7U
75 
76 #define SQ_CTRL_QUEUE_INFO_SET(val, member)	\
77 	(((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) <<	\
78 			SQ_CTRL_QUEUE_INFO_##member##_SHIFT)
79 
80 #define SQ_CTRL_QUEUE_INFO_GET(val, member)	\
81 	(((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) &	\
82 			SQ_CTRL_QUEUE_INFO_##member##_MASK)
83 
84 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member)	\
85 	((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \
86 			SQ_CTRL_QUEUE_INFO_##member##_SHIFT)))
87 
88 #define	SQ_TASK_INFO0_L2HDR_LEN_SHIFT		0
89 #define	SQ_TASK_INFO0_L4OFFLOAD_SHIFT		8
90 #define	SQ_TASK_INFO0_INNER_L3TYPE_SHIFT	10
91 #define	SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT	12
92 #define	SQ_TASK_INFO0_PARSE_FLAG_SHIFT		13
93 #define	SQ_TASK_INFO0_UFO_AVD_SHIFT		14
94 #define	SQ_TASK_INFO0_TSO_UFO_SHIFT		15
95 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT		16
96 
97 #define	SQ_TASK_INFO0_L2HDR_LEN_MASK		0xFFU
98 #define	SQ_TASK_INFO0_L4OFFLOAD_MASK		0x3U
99 #define	SQ_TASK_INFO0_INNER_L3TYPE_MASK		0x3U
100 #define	SQ_TASK_INFO0_VLAN_OFFLOAD_MASK		0x1U
101 #define	SQ_TASK_INFO0_PARSE_FLAG_MASK		0x1U
102 #define	SQ_TASK_INFO0_UFO_AVD_MASK		0x1U
103 #define SQ_TASK_INFO0_TSO_UFO_MASK		0x1U
104 #define SQ_TASK_INFO0_VLAN_TAG_MASK		0xFFFFU
105 
106 #define SQ_TASK_INFO0_SET(val, member)			\
107 	(((u32)(val) & SQ_TASK_INFO0_##member##_MASK) <<	\
108 			SQ_TASK_INFO0_##member##_SHIFT)
109 
110 #define	SQ_TASK_INFO1_MD_TYPE_SHIFT		8
111 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT		16
112 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT		24
113 
114 #define	SQ_TASK_INFO1_MD_TYPE_MASK		0xFFU
115 #define SQ_TASK_INFO1_INNER_L4LEN_MASK		0xFFU
116 #define SQ_TASK_INFO1_INNER_L3LEN_MASK		0xFFU
117 
118 #define SQ_TASK_INFO1_SET(val, member)			\
119 	(((val) & SQ_TASK_INFO1_##member##_MASK) <<	\
120 			SQ_TASK_INFO1_##member##_SHIFT)
121 
122 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT	0
123 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT		8
124 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT	16
125 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT	24
126 
127 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK		0xFFU
128 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK		0xFFU
129 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK	0x7U
130 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK		0x3U
131 
132 #define SQ_TASK_INFO2_SET(val, member)			\
133 	(((val) & SQ_TASK_INFO2_##member##_MASK) <<	\
134 			SQ_TASK_INFO2_##member##_SHIFT)
135 
136 #define	SQ_TASK_INFO4_L2TYPE_SHIFT		31
137 
138 #define	SQ_TASK_INFO4_L2TYPE_MASK		0x1U
139 
140 #define SQ_TASK_INFO4_SET(val, member)		\
141 	(((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \
142 			SQ_TASK_INFO4_##member##_SHIFT)
143 
144 /* SQ_DB */
145 #define SQ_DB_OFF				0x00000800
146 #define SQ_DB_INFO_HI_PI_SHIFT			0
147 #define SQ_DB_INFO_QID_SHIFT			8
148 #define SQ_DB_INFO_CFLAG_SHIFT			23
149 #define SQ_DB_INFO_COS_SHIFT			24
150 #define SQ_DB_INFO_TYPE_SHIFT			27
151 
152 #define SQ_DB_INFO_HI_PI_MASK			0xFFU
153 #define SQ_DB_INFO_QID_MASK			0x3FFU
154 #define SQ_DB_INFO_CFLAG_MASK			0x1U
155 #define SQ_DB_INFO_COS_MASK			0x7U
156 #define SQ_DB_INFO_TYPE_MASK			0x1FU
157 #define SQ_DB_INFO_SET(val, member)		\
158 	(((u32)(val) & SQ_DB_INFO_##member##_MASK) <<	\
159 			SQ_DB_INFO_##member##_SHIFT)
160 
161 #define SQ_DB					1
162 #define SQ_CFLAG_DP				0	/* CFLAG_DATA_PATH */
163 
164 #define SQ_DB_PI_LOW_MASK			0xFF
165 #define SQ_DB_PI_LOW(pi)			((pi) & SQ_DB_PI_LOW_MASK)
166 #define SQ_DB_PI_HI_SHIFT			8
167 #define SQ_DB_PI_HIGH(pi)			((pi) >> SQ_DB_PI_HI_SHIFT)
168 #define SQ_DB_ADDR(sq, pi)		\
169 	((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))
170 
171 /* txq wq operations */
172 #define HINIC_GET_SQ_WQE_MASK(txq)		((txq)->wq->mask)
173 
174 #define HINIC_GET_SQ_HW_CI(txq)	\
175 	((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))
176 
177 #define HINIC_GET_SQ_LOCAL_CI(txq)	\
178 	(((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))
179 
180 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt)	\
181 	do {						\
182 		(txq)->wq->cons_idx += wqebb_cnt;	\
183 		(txq)->wq->delta += wqebb_cnt;		\
184 	} while (0)
185 
186 #define HINIC_GET_SQ_FREE_WQEBBS(txq)	((txq)->wq->delta - 1)
187 
188 #define HINIC_IS_SQ_EMPTY(txq)	(((txq)->wq->delta) == ((txq)->q_depth))
189 
190 #define BUF_DESC_SIZE_SHIFT		4
191 
192 #define HINIC_SQ_WQE_SIZE(num_sge)		\
193 	(sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) +  \
194 			(unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))
195 
196 #define HINIC_SQ_WQEBB_CNT(num_sge)	\
197 	(int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \
198 			HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)
199 
200 
201 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)
202 {
203 #if defined(__X86_64_SSE__)
204 	int i;
205 	__m128i *wqe_line = (__m128i *)data;
206 	__m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
207 					11, 4, 5, 6, 7, 0, 1, 2, 3);
208 
209 	for (i = 0; i < nr_wqebb; i++) {
210 		/* convert 64B wqebb using 4 SSE instructions */
211 		wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);
212 		wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);
213 		wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);
214 		wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);
215 		wqe_line += 4;
216 	}
217 #elif defined(__ARM64_NEON__)
218 	int i;
219 	uint8x16_t *wqe_line = (uint8x16_t *)data;
220 	const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
221 					9, 8, 15, 14, 13, 12};
222 
223 	for (i = 0; i < nr_wqebb; i++) {
224 		wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);
225 		wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);
226 		wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);
227 		wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);
228 		wqe_line += 4;
229 	}
230 #else
231 	hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE);
232 #endif
233 }
234 
235 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)
236 {
237 #if defined(__X86_64_SSE__)
238 	int i;
239 	__m128i *sge_line = (__m128i *)data;
240 	__m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
241 					11, 4, 5, 6, 7, 0, 1, 2, 3);
242 
243 	for (i = 0; i < nr_sge; i++) {
244 		/* convert 16B sge using 1 SSE instructions */
245 		*sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);
246 		sge_line++;
247 	}
248 #elif defined(__ARM64_NEON__)
249 	int i;
250 	uint8x16_t *sge_line = (uint8x16_t *)data;
251 	const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
252 					9, 8, 15, 14, 13, 12};
253 
254 	for (i = 0; i < nr_sge; i++) {
255 		*sge_line = vqtbl1q_u8(*sge_line, shuf_mask);
256 		sge_line++;
257 	}
258 #else
259 	hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc));
260 #endif
261 }
262 
263 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
264 {
265 	if (!txq || !stats) {
266 		PMD_DRV_LOG(ERR, "Txq or stats is NULL");
267 		return;
268 	}
269 
270 	memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));
271 }
272 
273 void hinic_txq_stats_reset(struct hinic_txq *txq)
274 {
275 	struct hinic_txq_stats *txq_stats;
276 
277 	if (txq == NULL)
278 		return;
279 
280 	txq_stats = &txq->txq_stats;
281 	memset(txq_stats, 0, sizeof(*txq_stats));
282 }
283 
284 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,
285 						  struct rte_mbuf *mbuf,
286 						  u16 sge_cnt)
287 {
288 	struct rte_mbuf *dst_mbuf;
289 	u32 offset = 0;
290 	u16 i;
291 
292 	if (unlikely(!nic_dev->cpy_mpool))
293 		return NULL;
294 
295 	dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);
296 	if (unlikely(!dst_mbuf))
297 		return NULL;
298 
299 	dst_mbuf->data_off = 0;
300 	for (i = 0; i < sge_cnt; i++) {
301 		rte_memcpy((char *)dst_mbuf->buf_addr + offset,
302 			   (char *)mbuf->buf_addr + mbuf->data_off,
303 			   mbuf->data_len);
304 		dst_mbuf->data_len += mbuf->data_len;
305 		offset += mbuf->data_len;
306 		mbuf = mbuf->next;
307 	}
308 
309 	return dst_mbuf;
310 }
311 
312 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,
313 					  struct rte_mbuf *mbuf,
314 					  struct hinic_sq_bufdesc *sges,
315 					  struct hinic_wqe_info *sqe_info)
316 {
317 	dma_addr_t dma_addr;
318 	u16 i, around_sges;
319 	u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;
320 	u16 real_nb_segs = mbuf->nb_segs;
321 	struct hinic_sq_bufdesc *sge_idx = sges;
322 
323 	if (unlikely(sqe_info->around)) {
324 		/* parts of wqe is in sq bottom while parts
325 		 * of wqe is in sq head
326 		 */
327 		i = 0;
328 		for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;
329 		     sge_idx++) {
330 			dma_addr = rte_mbuf_data_iova(mbuf);
331 			hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
332 				      mbuf->data_len);
333 			mbuf = mbuf->next;
334 			i++;
335 		}
336 
337 		around_sges = nb_segs - i;
338 		sge_idx = (struct hinic_sq_bufdesc *)
339 				((void *)txq->sq_head_addr);
340 		for (; i < nb_segs; i++) {
341 			dma_addr = rte_mbuf_data_iova(mbuf);
342 			hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
343 				      mbuf->data_len);
344 			mbuf = mbuf->next;
345 			sge_idx++;
346 		}
347 
348 		/* covert sges at head to big endian */
349 		hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);
350 	} else {
351 		/* wqe is in continuous space */
352 		for (i = 0; i < nb_segs; i++) {
353 			dma_addr = rte_mbuf_data_iova(mbuf);
354 			hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
355 				      mbuf->data_len);
356 			mbuf = mbuf->next;
357 			sge_idx++;
358 		}
359 	}
360 
361 	/* for now: support non-tso over 17 sge, copy the last 2 mbuf */
362 	if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {
363 		/* copy invalid mbuf segs to a valid buffer, lost performance */
364 		txq->txq_stats.cpy_pkts += 1;
365 		mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,
366 					  real_nb_segs - nb_segs);
367 		if (unlikely(!mbuf))
368 			return false;
369 
370 		txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;
371 
372 		/* deal with the last mbuf */
373 		dma_addr = rte_mbuf_data_iova(mbuf);
374 		hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
375 			      mbuf->data_len);
376 		if (unlikely(sqe_info->around))
377 			hinic_sge_cpu_to_be32((void *)sge_idx, 1);
378 	}
379 
380 	return true;
381 }
382 
383 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,
384 					    u32 queue_info, int nr_descs,
385 					    u8 owner)
386 {
387 	u32 ctrl_size, task_size, bufdesc_size;
388 
389 	ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
390 	task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
391 	bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);
392 
393 	ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
394 			SQ_CTRL_SET(task_size, TASKSECT_LEN)	|
395 			SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)	|
396 			SQ_CTRL_SET(ctrl_size, LEN)		|
397 			SQ_CTRL_SET(owner, OWNER);
398 
399 	ctrl->queue_info = queue_info;
400 	ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);
401 
402 	if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {
403 		ctrl->queue_info |=
404 			SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);
405 	} else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {
406 		/* mss should not be less than 80 */
407 		ctrl->queue_info =
408 				SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);
409 		ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);
410 	}
411 }
412 
413 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,
414 					  struct hinic_tx_offload_info
415 					  *poff_info,
416 					  struct hinic_wqe_info *sqe_info)
417 {
418 	u32 total_len, limit_len, checked_len, left_len;
419 	u32 i, first_mss_sges, left_sges;
420 	struct rte_mbuf *mbuf_head, *mbuf_pre;
421 
422 	left_sges = mbuf->nb_segs;
423 	mbuf_head = mbuf;
424 
425 	/* tso sge number validation */
426 	if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {
427 		checked_len = 0;
428 		limit_len = mbuf->tso_segsz + poff_info->payload_offset;
429 		first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;
430 
431 		/* each continues 17 mbufs segmust do one check */
432 		while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {
433 			/* total len of first 16 mbufs must equal
434 			 * or more than limit_len
435 			 */
436 			total_len = 0;
437 			for (i = 0; i < first_mss_sges; i++) {
438 				total_len += mbuf->data_len;
439 				mbuf_pre = mbuf;
440 				mbuf = mbuf->next;
441 				if (total_len >= limit_len) {
442 					limit_len = mbuf_head->tso_segsz;
443 					break;
444 				}
445 			}
446 
447 			checked_len += total_len;
448 
449 			/* try to copy if not valid */
450 			if (unlikely(first_mss_sges == i)) {
451 				left_sges -= first_mss_sges;
452 				checked_len -= mbuf_pre->data_len;
453 
454 				left_len = mbuf_head->pkt_len - checked_len;
455 				if (left_len > HINIC_COPY_MBUF_SIZE)
456 					return false;
457 
458 				sqe_info->sge_cnt = mbuf_head->nb_segs -
459 							left_sges;
460 				sqe_info->cpy_mbuf_cnt = 1;
461 
462 				return true;
463 			}
464 			first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);
465 
466 			/* continue next 16 mbufs */
467 			left_sges -= (i + 1);
468 		} /* end of while */
469 	}
470 
471 	sqe_info->sge_cnt = mbuf_head->nb_segs;
472 	return true;
473 }
474 
475 static inline void
476 hinic_set_l4_csum_info(struct hinic_sq_task *task,
477 		u32 *queue_info, struct hinic_tx_offload_info *poff_info)
478 {
479 	u32 tcp_udp_cs, sctp;
480 	u16 l2hdr_len;
481 
482 	sctp = 0;
483 	if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))
484 		sctp = 1;
485 
486 	tcp_udp_cs = poff_info->inner_l4_tcp_udp;
487 
488 	if (poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {
489 		l2hdr_len =  poff_info->outer_l2_len;
490 
491 		task->pkt_info2 |=
492 		SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |
493 		SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);
494 		task->pkt_info2 |=
495 		SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |
496 		SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);
497 	} else {
498 		l2hdr_len = poff_info->inner_l2_len;
499 	}
500 
501 	task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);
502 	task->pkt_info1 |=
503 		SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);
504 	task->pkt_info0 |=
505 		SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
506 	task->pkt_info1 |=
507 		SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);
508 	task->pkt_info0 |=
509 		SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);
510 	*queue_info |=
511 		SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |
512 		SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |
513 		SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);
514 }
515 
516 static inline void
517 hinic_set_tso_info(struct hinic_sq_task *task,
518 		u32 *queue_info, struct rte_mbuf *mbuf,
519 		struct hinic_tx_offload_info *poff_info)
520 {
521 	hinic_set_l4_csum_info(task, queue_info, poff_info);
522 
523 	/* wqe for tso */
524 	task->pkt_info0 |=
525 		SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
526 	task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);
527 	*queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);
528 	/* qsf was initialized in prepare_sq_wqe */
529 	*queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);
530 	*queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);
531 }
532 
533 static inline void
534 hinic_set_vlan_tx_offload(struct hinic_sq_task *task,
535 			u32 *queue_info, u16 vlan_tag, u16 vlan_pri)
536 {
537 	task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
538 				SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
539 
540 	*queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);
541 }
542 
543 static inline void
544 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,
545 		struct hinic_sq_task *task, u32 *queue_info,
546 		struct hinic_tx_offload_info *tx_off_info)
547 {
548 	u16 vlan_tag;
549 	uint64_t ol_flags = mbuf->ol_flags;
550 
551 	/* clear DW0~2 of task section for offload */
552 	task->pkt_info0 = 0;
553 	task->pkt_info1 = 0;
554 	task->pkt_info2 = 0;
555 
556 	/* Base VLAN */
557 	if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) {
558 		vlan_tag = mbuf->vlan_tci;
559 		hinic_set_vlan_tx_offload(task, queue_info, vlan_tag,
560 					  vlan_tag >> VLAN_PRIO_SHIFT);
561 	}
562 
563 	/* non checksum or tso */
564 	if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))
565 		return;
566 
567 	if ((ol_flags & PKT_TX_TCP_SEG))
568 		/* set tso info for task and qsf */
569 		hinic_set_tso_info(task, queue_info, mbuf, tx_off_info);
570 	else /* just support l4 checksum offload */
571 		hinic_set_l4_csum_info(task, queue_info, tx_off_info);
572 }
573 
574 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)
575 {
576 	struct hinic_tx_info *tx_info;
577 	struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];
578 	int i, nb_free = 0;
579 	u16 hw_ci, sw_ci, sq_mask;
580 	int wqebb_cnt = 0;
581 
582 	hw_ci = HINIC_GET_SQ_HW_CI(txq);
583 	sw_ci = HINIC_GET_SQ_LOCAL_CI(txq);
584 	sq_mask = HINIC_GET_SQ_WQE_MASK(txq);
585 
586 	for (i = 0; i < txq->tx_free_thresh; ++i) {
587 		tx_info = &txq->tx_info[sw_ci];
588 		if (hw_ci == sw_ci ||
589 			(((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))
590 			break;
591 
592 		sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;
593 
594 		if (unlikely(tx_info->cpy_mbuf != NULL)) {
595 			rte_pktmbuf_free(tx_info->cpy_mbuf);
596 			tx_info->cpy_mbuf = NULL;
597 		}
598 
599 		wqebb_cnt += tx_info->wqebb_cnt;
600 		mbuf = tx_info->mbuf;
601 
602 		if (likely(mbuf->nb_segs == 1)) {
603 			m = rte_pktmbuf_prefree_seg(mbuf);
604 			tx_info->mbuf = NULL;
605 
606 			if (unlikely(m == NULL))
607 				continue;
608 
609 			mbuf_free[nb_free++] = m;
610 			if (unlikely(m->pool != mbuf_free[0]->pool ||
611 				nb_free >= HINIC_MAX_TX_FREE_BULK)) {
612 				rte_mempool_put_bulk(mbuf_free[0]->pool,
613 					(void **)mbuf_free, (nb_free - 1));
614 				nb_free = 0;
615 				mbuf_free[nb_free++] = m;
616 			}
617 		} else {
618 			rte_pktmbuf_free(mbuf);
619 			tx_info->mbuf = NULL;
620 		}
621 	}
622 
623 	if (nb_free > 0)
624 		rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,
625 				     nb_free);
626 
627 	HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);
628 }
629 
630 static inline struct hinic_sq_wqe *
631 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,
632 		struct hinic_wqe_info *wqe_info)
633 {
634 	u32 cur_pi, end_pi;
635 	u16 remain_wqebbs;
636 	struct hinic_sq *sq = txq->sq;
637 	struct hinic_wq *wq = txq->wq;
638 
639 	/* record current pi */
640 	cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);
641 	end_pi = cur_pi + wqebb_cnt;
642 
643 	/* update next pi and delta */
644 	wq->prod_idx += wqebb_cnt;
645 	wq->delta -= wqebb_cnt;
646 
647 	/* return current pi and owner */
648 	wqe_info->pi = cur_pi;
649 	wqe_info->owner = sq->owner;
650 	wqe_info->around = 0;
651 	wqe_info->seq_wqebbs = wqebb_cnt;
652 
653 	if (unlikely(end_pi >= txq->q_depth)) {
654 		/* update owner of next prod_idx */
655 		sq->owner = !sq->owner;
656 
657 		/* turn around to head */
658 		if (unlikely(end_pi > txq->q_depth)) {
659 			wqe_info->around = 1;
660 			remain_wqebbs = txq->q_depth - cur_pi;
661 			wqe_info->seq_wqebbs = remain_wqebbs;
662 		}
663 	}
664 
665 	return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);
666 }
667 
668 static inline int
669 hinic_validate_tx_offload(const struct rte_mbuf *m)
670 {
671 	uint64_t ol_flags = m->ol_flags;
672 	uint64_t inner_l3_offset = m->l2_len;
673 
674 	/* just support vxlan offload */
675 	if ((ol_flags & PKT_TX_TUNNEL_MASK) &&
676 	    !(ol_flags & PKT_TX_TUNNEL_VXLAN))
677 		return -ENOTSUP;
678 
679 	if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
680 		inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
681 
682 	/* Headers are fragmented */
683 	if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len)
684 		return -ENOTSUP;
685 
686 	/* IP checksum can be counted only for IPv4 packet */
687 	if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
688 		return -EINVAL;
689 
690 	/* IP type not set when required */
691 	if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) {
692 		if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
693 			return -EINVAL;
694 	}
695 
696 	/* Check requirements for TSO packet */
697 	if (ol_flags & PKT_TX_TCP_SEG) {
698 		if (m->tso_segsz == 0 ||
699 			((ol_flags & PKT_TX_IPV4) &&
700 			!(ol_flags & PKT_TX_IP_CKSUM)))
701 			return -EINVAL;
702 	}
703 
704 	/* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
705 	if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) &&
706 		!(ol_flags & PKT_TX_OUTER_IPV4))
707 		return -EINVAL;
708 
709 	return 0;
710 }
711 
712 static inline uint16_t
713 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
714 {
715 	struct ipv4_psd_header {
716 		uint32_t src_addr; /* IP address of source host. */
717 		uint32_t dst_addr; /* IP address of destination host. */
718 		uint8_t  zero;     /* zero. */
719 		uint8_t  proto;    /* L4 protocol type. */
720 		uint16_t len;      /* L4 length. */
721 	} psd_hdr;
722 	uint8_t ihl;
723 
724 	psd_hdr.src_addr = ipv4_hdr->src_addr;
725 	psd_hdr.dst_addr = ipv4_hdr->dst_addr;
726 	psd_hdr.zero = 0;
727 	psd_hdr.proto = ipv4_hdr->next_proto_id;
728 	if (ol_flags & PKT_TX_TCP_SEG) {
729 		psd_hdr.len = 0;
730 	} else {
731 		/* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates
732 		 * lower 4 bits and unit is 4 bytes
733 		 */
734 		ihl = (ipv4_hdr->version_ihl & 0xF) << 2;
735 		psd_hdr.len =
736 		rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
737 				 ihl);
738 	}
739 	return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
740 }
741 
742 static inline uint16_t
743 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
744 {
745 	uint32_t sum;
746 	struct {
747 		uint32_t len;   /* L4 length. */
748 		uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
749 	} psd_hdr;
750 
751 	psd_hdr.proto = (ipv6_hdr->proto << 24);
752 	if (ol_flags & PKT_TX_TCP_SEG)
753 		psd_hdr.len = 0;
754 	else
755 		psd_hdr.len = ipv6_hdr->payload_len;
756 
757 	sum = __rte_raw_cksum(ipv6_hdr->src_addr,
758 		sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);
759 	sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
760 	return __rte_raw_cksum_reduce(sum);
761 }
762 
763 static inline int
764 hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,
765 				struct hinic_tx_offload_info *off_info)
766 {
767 	struct rte_ipv4_hdr *ipv4_hdr;
768 	struct rte_ipv6_hdr *ipv6_hdr;
769 	struct rte_tcp_hdr *tcp_hdr;
770 	struct rte_udp_hdr *udp_hdr;
771 	struct rte_ether_hdr *eth_hdr;
772 	struct rte_vlan_hdr *vlan_hdr;
773 	u16 eth_type = 0;
774 	uint64_t inner_l3_offset = m->l2_len;
775 	uint64_t ol_flags = m->ol_flags;
776 
777 	/* Does packet set any of available offloads */
778 	if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
779 		return 0;
780 
781 	if (unlikely(hinic_validate_tx_offload(m)))
782 		return -EINVAL;
783 
784 	if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
785 			(ol_flags & PKT_TX_OUTER_IPV6) ||
786 			(ol_flags & PKT_TX_TUNNEL_VXLAN)) {
787 		inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
788 		off_info->outer_l2_len = m->outer_l2_len;
789 		off_info->outer_l3_len = m->outer_l3_len;
790 		/* just support vxlan tunneling pkt */
791 		off_info->inner_l2_len = m->l2_len - VXLANLEN -
792 						sizeof(struct rte_udp_hdr);
793 		off_info->inner_l3_len = m->l3_len;
794 		off_info->inner_l4_len = m->l4_len;
795 		off_info->tunnel_length = m->l2_len;
796 		off_info->payload_offset = m->outer_l2_len +
797 				m->outer_l3_len + m->l2_len + m->l3_len;
798 		off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
799 	} else {
800 		off_info->inner_l2_len = m->l2_len;
801 		off_info->inner_l3_len = m->l3_len;
802 		off_info->inner_l4_len = m->l4_len;
803 		off_info->tunnel_type = NOT_TUNNEL;
804 		off_info->payload_offset = m->l2_len + m->l3_len;
805 	}
806 
807 	if (((ol_flags & PKT_TX_L4_MASK) != PKT_TX_SCTP_CKSUM) &&
808 	    ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_UDP_CKSUM))
809 		off_info->payload_offset += m->l4_len;
810 
811 	/* invalid udp or tcp header */
812 	if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
813 		return -EINVAL;
814 
815 	/* Process outter udp pseudo-header checksum */
816 	if ((ol_flags & PKT_TX_TUNNEL_VXLAN) && ((ol_flags & PKT_TX_TCP_SEG) ||
817 			(ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
818 			(ol_flags & PKT_TX_OUTER_IPV6))) {
819 		off_info->tunnel_type = TUNNEL_UDP_CSUM;
820 
821 		/* inner_l4_tcp_udp csum should be setted to calculate outter
822 		 * udp checksum when vxlan packets without inner l3 and l4
823 		 */
824 		off_info->inner_l4_tcp_udp = 1;
825 
826 		eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
827 		eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
828 
829 		if (eth_type == RTE_ETHER_TYPE_VLAN) {
830 			vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
831 			eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
832 		}
833 
834 		if (eth_type == RTE_ETHER_TYPE_IPV4) {
835 			ipv4_hdr =
836 			rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
837 						m->outer_l2_len);
838 			off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
839 			ipv4_hdr->hdr_checksum = 0;
840 
841 			udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
842 							m->outer_l3_len);
843 			udp_hdr->dgram_cksum =
844 				hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
845 		} else if (eth_type == RTE_ETHER_TYPE_IPV6) {
846 			off_info->outer_l3_type = IPV6_PKT;
847 			ipv6_hdr =
848 			rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
849 						m->outer_l2_len);
850 
851 			udp_hdr =
852 			rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
853 						(m->outer_l2_len +
854 						m->outer_l3_len));
855 			udp_hdr->dgram_cksum =
856 				hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
857 		}
858 	}
859 
860 	if (ol_flags & PKT_TX_IPV4)
861 		off_info->inner_l3_type = (ol_flags & PKT_TX_IP_CKSUM) ?
862 					IPV4_PKT_WITH_CHKSUM_OFFLOAD :
863 					IPV4_PKT_NO_CHKSUM_OFFLOAD;
864 	else if (ol_flags & PKT_TX_IPV6)
865 		off_info->inner_l3_type = IPV6_PKT;
866 
867 	/* Process the pseudo-header checksum */
868 	if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
869 		if (ol_flags & PKT_TX_IPV4) {
870 			ipv4_hdr =
871 			rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
872 						inner_l3_offset);
873 
874 			if (ol_flags & PKT_TX_IP_CKSUM)
875 				ipv4_hdr->hdr_checksum = 0;
876 
877 			udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
878 								m->l3_len);
879 			udp_hdr->dgram_cksum =
880 				hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
881 		} else {
882 			ipv6_hdr =
883 			rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
884 						inner_l3_offset);
885 
886 			udp_hdr =
887 			rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *,
888 						(inner_l3_offset + m->l3_len));
889 			udp_hdr->dgram_cksum =
890 				hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
891 		}
892 
893 		off_info->inner_l4_type = UDP_OFFLOAD_ENABLE;
894 		off_info->inner_l4_tcp_udp = 1;
895 		off_info->inner_l4_len = sizeof(struct rte_udp_hdr);
896 	} else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) ||
897 			(ol_flags & PKT_TX_TCP_SEG)) {
898 		if (ol_flags & PKT_TX_IPV4) {
899 			ipv4_hdr =
900 			rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
901 						inner_l3_offset);
902 
903 			if (ol_flags & PKT_TX_IP_CKSUM)
904 				ipv4_hdr->hdr_checksum = 0;
905 
906 			/* non-TSO tcp */
907 			tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
908 								m->l3_len);
909 			tcp_hdr->cksum =
910 				hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
911 		} else {
912 			ipv6_hdr =
913 			rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
914 						inner_l3_offset);
915 			/* non-TSO tcp */
916 			tcp_hdr =
917 			rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *,
918 						(inner_l3_offset + m->l3_len));
919 			tcp_hdr->cksum =
920 				hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
921 		}
922 
923 		off_info->inner_l4_type = TCP_OFFLOAD_ENABLE;
924 		off_info->inner_l4_tcp_udp = 1;
925 	} else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) {
926 		off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;
927 		off_info->inner_l4_tcp_udp = 0;
928 		off_info->inner_l4_len = sizeof(struct rte_sctp_hdr);
929 	}
930 
931 	return 0;
932 }
933 
934 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,
935 					    struct hinic_wqe_info *sqe_info,
936 					    struct hinic_tx_offload_info
937 					    *off_info)
938 {
939 	u16  i, total_len, sge_cnt = mbuf_pkt->nb_segs;
940 	struct rte_mbuf *mbuf;
941 	int ret;
942 
943 	memset(off_info, 0, sizeof(*off_info));
944 
945 	ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);
946 	if (unlikely(ret))
947 		return false;
948 
949 	sqe_info->cpy_mbuf_cnt = 0;
950 
951 	/* non tso mbuf */
952 	if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) {
953 		if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
954 			/* non tso packet len must less than 64KB */
955 			return false;
956 		} else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
957 			/* non tso packet buffer number must less than 17
958 			 * the mbuf segs more than 17 must copy to one buffer
959 			 */
960 			total_len = 0;
961 			mbuf = mbuf_pkt;
962 			for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {
963 				total_len += mbuf->data_len;
964 				mbuf = mbuf->next;
965 			}
966 
967 			/* default support copy total 4k mbuf segs */
968 			if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <
969 				  mbuf_pkt->pkt_len)
970 				return false;
971 
972 			sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;
973 			sqe_info->cpy_mbuf_cnt = 1;
974 			return true;
975 		}
976 
977 		/* valid non tso mbuf */
978 		sqe_info->sge_cnt = sge_cnt;
979 	} else {
980 		/* tso mbuf */
981 		if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))
982 			/* too many mbuf segs */
983 			return false;
984 
985 		/* check tso mbuf segs are valid or not */
986 		if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,
987 			     off_info, sqe_info)))
988 			return false;
989 	}
990 
991 	return true;
992 }
993 
994 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)
995 {
996 	u16 prod_idx;
997 	u32 hi_prod_idx;
998 	struct hinic_sq_db sq_db;
999 
1000 	prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);
1001 	hi_prod_idx = SQ_DB_PI_HIGH(prod_idx);
1002 
1003 	sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |
1004 			SQ_DB_INFO_SET(SQ_DB, TYPE) |
1005 			SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |
1006 			SQ_DB_INFO_SET(cos, COS) |
1007 			SQ_DB_INFO_SET(sq->q_id, QID);
1008 
1009 	/* Data should be written to HW in Big Endian Format */
1010 	sq_db.db_info = cpu_to_be32(sq_db.db_info);
1011 
1012 	/* Write all before the doorbell */
1013 	rte_wmb();
1014 	writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));
1015 }
1016 
1017 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)
1018 {
1019 	int free_wqebb_cnt, wqe_wqebb_cnt;
1020 	u32 queue_info, tx_bytes = 0;
1021 	u16 nb_tx;
1022 	struct hinic_wqe_info sqe_info;
1023 	struct hinic_tx_offload_info off_info;
1024 	struct rte_mbuf *mbuf_pkt;
1025 	struct hinic_txq *txq = tx_queue;
1026 	struct hinic_tx_info *tx_info;
1027 	struct hinic_sq_wqe *sq_wqe;
1028 	struct hinic_sq_task *task;
1029 
1030 	/* reclaim tx mbuf before xmit new packet */
1031 	if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)
1032 		hinic_xmit_mbuf_cleanup(txq);
1033 
1034 	/* tx loop routine */
1035 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1036 		mbuf_pkt = *tx_pkts++;
1037 		queue_info = 0;
1038 
1039 		/* 1. parse sge and tx offlod info from mbuf */
1040 		if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,
1041 						       &sqe_info, &off_info))) {
1042 			txq->txq_stats.off_errs++;
1043 			break;
1044 		}
1045 
1046 		/* 2. try to get enough wqebb */
1047 		wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
1048 		free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1049 		if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1050 			/* reclaim again */
1051 			hinic_xmit_mbuf_cleanup(txq);
1052 			free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1053 			if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1054 				txq->txq_stats.tx_busy += (nb_pkts - nb_tx);
1055 				break;
1056 			}
1057 		}
1058 
1059 		/* 3. get sq tail wqe address from wqe_page,
1060 		 * sq have enough wqebb for this packet
1061 		 */
1062 		sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
1063 
1064 		/* 4. fill sq wqe sge section */
1065 		if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
1066 						     sq_wqe->buf_descs,
1067 						     &sqe_info))) {
1068 			hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,
1069 					    wqe_wqebb_cnt, sqe_info.owner);
1070 			txq->txq_stats.off_errs++;
1071 			break;
1072 		}
1073 
1074 		/* 5. fill sq wqe task section and queue info */
1075 		task = &sq_wqe->task;
1076 
1077 		/* tx packet offload configure */
1078 		hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
1079 					   &off_info);
1080 
1081 		/* 6. record tx info */
1082 		tx_info = &txq->tx_info[sqe_info.pi];
1083 		tx_info->mbuf = mbuf_pkt;
1084 		tx_info->wqebb_cnt = wqe_wqebb_cnt;
1085 
1086 		/* 7. fill sq wqe header section */
1087 		hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
1088 					 sqe_info.sge_cnt, sqe_info.owner);
1089 
1090 		/* 8.convert continue or bottom wqe byteorder to big endian */
1091 		hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);
1092 
1093 		tx_bytes += mbuf_pkt->pkt_len;
1094 	}
1095 
1096 	/* 9. write sq doorbell in burst mode */
1097 	if (nb_tx) {
1098 		hinic_sq_write_db(txq->sq, txq->cos);
1099 
1100 		txq->txq_stats.packets += nb_tx;
1101 		txq->txq_stats.bytes += tx_bytes;
1102 	}
1103 	txq->txq_stats.burst_pkts = nb_tx;
1104 
1105 	return nb_tx;
1106 }
1107 
1108 void hinic_free_all_tx_skbs(struct hinic_txq *txq)
1109 {
1110 	u16 ci;
1111 	struct hinic_nic_dev *nic_dev = txq->nic_dev;
1112 	struct hinic_tx_info *tx_info;
1113 	int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev,
1114 						   txq->q_id) + 1;
1115 
1116 	while (free_wqebbs < txq->q_depth) {
1117 		ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id);
1118 
1119 		tx_info = &txq->tx_info[ci];
1120 
1121 		if (unlikely(tx_info->cpy_mbuf != NULL)) {
1122 			rte_pktmbuf_free(tx_info->cpy_mbuf);
1123 			tx_info->cpy_mbuf = NULL;
1124 		}
1125 
1126 		rte_pktmbuf_free(tx_info->mbuf);
1127 		hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id,
1128 					 tx_info->wqebb_cnt);
1129 
1130 		free_wqebbs += tx_info->wqebb_cnt;
1131 		tx_info->mbuf = NULL;
1132 	}
1133 }
1134 
1135 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev)
1136 {
1137 	u16 q_id;
1138 	struct hinic_nic_dev *nic_dev =
1139 				HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1140 
1141 	for (q_id = 0; q_id < nic_dev->num_sq; q_id++) {
1142 		eth_dev->data->tx_queues[q_id] = NULL;
1143 
1144 		if (nic_dev->txqs[q_id] == NULL)
1145 			continue;
1146 
1147 		/* stop tx queue free tx mbuf */
1148 		hinic_free_all_tx_skbs(nic_dev->txqs[q_id]);
1149 		hinic_free_tx_resources(nic_dev->txqs[q_id]);
1150 
1151 		/* free txq */
1152 		kfree(nic_dev->txqs[q_id]);
1153 		nic_dev->txqs[q_id] = NULL;
1154 	}
1155 }
1156 
1157 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev)
1158 {
1159 	u16 q_id;
1160 	struct hinic_nic_dev *nic_dev =
1161 				HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1162 
1163 	for (q_id = 0; q_id < nic_dev->num_sq; q_id++)
1164 		/* stop tx queue free tx mbuf */
1165 		hinic_free_all_tx_skbs(nic_dev->txqs[q_id]);
1166 }
1167 
1168 int hinic_setup_tx_resources(struct hinic_txq *txq)
1169 {
1170 	u64 tx_info_sz;
1171 
1172 	tx_info_sz = txq->q_depth * sizeof(*txq->tx_info);
1173 	txq->tx_info = kzalloc_aligned(tx_info_sz, GFP_KERNEL);
1174 	if (!txq->tx_info)
1175 		return -ENOMEM;
1176 
1177 	return HINIC_OK;
1178 }
1179 
1180 void hinic_free_tx_resources(struct hinic_txq *txq)
1181 {
1182 	if (txq->tx_info == NULL)
1183 		return;
1184 
1185 	kfree(txq->tx_info);
1186 	txq->tx_info = NULL;
1187 }
1188 
1189 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, u16 sq_depth)
1190 {
1191 	int err;
1192 	struct hinic_nic_io *nic_io = hwdev->nic_io;
1193 	struct hinic_qp *qp = &nic_io->qps[q_id];
1194 	struct hinic_sq *sq = &qp->sq;
1195 	void __iomem *db_addr;
1196 	volatile u32 *ci_addr;
1197 
1198 	sq->sq_depth = sq_depth;
1199 	nic_io->sq_depth = sq_depth;
1200 
1201 	/* alloc wq */
1202 	err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id],
1203 				HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth);
1204 	if (err) {
1205 		PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ");
1206 		return err;
1207 	}
1208 
1209 	/* alloc sq doorbell space */
1210 	err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr);
1211 	if (err) {
1212 		PMD_DRV_LOG(ERR, "Failed to init db addr");
1213 		goto alloc_db_err;
1214 	}
1215 
1216 	/* clear hardware ci */
1217 	ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id);
1218 	*ci_addr = 0;
1219 
1220 	sq->q_id = q_id;
1221 	sq->wq = &nic_io->sq_wq[q_id];
1222 	sq->owner = 1;
1223 	sq->cons_idx_addr = (volatile u16 *)ci_addr;
1224 	sq->db_addr = db_addr;
1225 
1226 	return HINIC_OK;
1227 
1228 alloc_db_err:
1229 	hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]);
1230 
1231 	return err;
1232 }
1233 
1234 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id)
1235 {
1236 	struct hinic_nic_io *nic_io;
1237 	struct hinic_qp *qp;
1238 
1239 	nic_io = hwdev->nic_io;
1240 	qp = &nic_io->qps[q_id];
1241 
1242 	if (qp->sq.wq == NULL)
1243 		return;
1244 
1245 	hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr);
1246 	hinic_wq_free(nic_io->hwdev, qp->sq.wq);
1247 	qp->sq.wq = NULL;
1248 }
1249