xref: /dpdk/drivers/net/hinic/hinic_pmd_tx.c (revision 89b5642d0d45c22c0ceab57efe3fab3b49ff4324)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Huawei Technologies Co., Ltd
3  */
4 
5 #include <rte_mbuf.h>
6 #include <rte_tcp.h>
7 #include <rte_sctp.h>
8 #include <rte_udp.h>
9 #include <rte_ip.h>
10 #ifdef RTE_ARCH_ARM64
11 #include <arm_neon.h>
12 #endif
13 
14 #include "base/hinic_compat.h"
15 #include "base/hinic_pmd_hwdev.h"
16 #include "base/hinic_pmd_hwif.h"
17 #include "base/hinic_pmd_wq.h"
18 #include "base/hinic_pmd_nicio.h"
19 #include "base/hinic_pmd_niccfg.h"
20 #include "hinic_pmd_ethdev.h"
21 #include "hinic_pmd_tx.h"
22 
23 /* packet header and tx offload info */
24 #define ETHER_LEN_NO_VLAN		14
25 #define ETHER_LEN_WITH_VLAN		18
26 #define VXLANLEN			8
27 #define MAX_PLD_OFFSET			221
28 #define MAX_SINGLE_SGE_SIZE		65536
29 #define TSO_ENABLE			1
30 #define TX_MSS_DEFAULT			0x3E00
31 #define TX_MSS_MIN			0x50
32 
33 #define HINIC_NONTSO_PKT_MAX_SGE		17	/* non-tso max sge 17 */
34 #define HINIC_NONTSO_SEG_NUM_INVALID(num)	\
35 			((num) > HINIC_NONTSO_PKT_MAX_SGE)
36 
37 #define HINIC_TSO_PKT_MAX_SGE			127	/* tso max sge 127 */
38 #define HINIC_TSO_SEG_NUM_INVALID(num)		((num) > HINIC_TSO_PKT_MAX_SGE)
39 
40 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */
41 #define HINIC_BUF_DESC_SIZE(nr_descs)	(SIZE_8BYTES(((u32)nr_descs) << 4))
42 
43 #define MASKED_SQ_IDX(sq, idx)		((idx) & (sq)->wq->mask)
44 
45 /* SQ_CTRL */
46 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT		0
47 #define SQ_CTRL_TASKSECT_LEN_SHIFT		16
48 #define SQ_CTRL_DATA_FORMAT_SHIFT		22
49 #define SQ_CTRL_LEN_SHIFT			29
50 #define SQ_CTRL_OWNER_SHIFT			31
51 
52 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK		0xFFU
53 #define SQ_CTRL_TASKSECT_LEN_MASK		0x1FU
54 #define SQ_CTRL_DATA_FORMAT_MASK		0x1U
55 #define SQ_CTRL_LEN_MASK			0x3U
56 #define SQ_CTRL_OWNER_MASK			0x1U
57 
58 #define SQ_CTRL_SET(val, member)	\
59 	(((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT)
60 
61 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT		2
62 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT		10
63 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT		11
64 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT	12
65 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT		13
66 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT		27
67 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT		28
68 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT		29
69 
70 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK		0xFFU
71 #define SQ_CTRL_QUEUE_INFO_UFO_MASK		0x1U
72 #define SQ_CTRL_QUEUE_INFO_TSO_MASK		0x1U
73 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK	0x1U
74 #define SQ_CTRL_QUEUE_INFO_MSS_MASK		0x3FFFU
75 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK		0x1U
76 #define SQ_CTRL_QUEUE_INFO_UC_MASK		0x1U
77 #define SQ_CTRL_QUEUE_INFO_PRI_MASK		0x7U
78 
79 #define SQ_CTRL_QUEUE_INFO_SET(val, member)	\
80 	(((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) <<	\
81 			SQ_CTRL_QUEUE_INFO_##member##_SHIFT)
82 
83 #define SQ_CTRL_QUEUE_INFO_GET(val, member)	\
84 	(((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) &	\
85 			SQ_CTRL_QUEUE_INFO_##member##_MASK)
86 
87 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member)	\
88 	((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \
89 			SQ_CTRL_QUEUE_INFO_##member##_SHIFT)))
90 
91 #define	SQ_TASK_INFO0_L2HDR_LEN_SHIFT		0
92 #define	SQ_TASK_INFO0_L4OFFLOAD_SHIFT		8
93 #define	SQ_TASK_INFO0_INNER_L3TYPE_SHIFT	10
94 #define	SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT	12
95 #define	SQ_TASK_INFO0_PARSE_FLAG_SHIFT		13
96 #define	SQ_TASK_INFO0_UFO_AVD_SHIFT		14
97 #define	SQ_TASK_INFO0_TSO_UFO_SHIFT		15
98 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT		16
99 
100 #define	SQ_TASK_INFO0_L2HDR_LEN_MASK		0xFFU
101 #define	SQ_TASK_INFO0_L4OFFLOAD_MASK		0x3U
102 #define	SQ_TASK_INFO0_INNER_L3TYPE_MASK		0x3U
103 #define	SQ_TASK_INFO0_VLAN_OFFLOAD_MASK		0x1U
104 #define	SQ_TASK_INFO0_PARSE_FLAG_MASK		0x1U
105 #define	SQ_TASK_INFO0_UFO_AVD_MASK		0x1U
106 #define SQ_TASK_INFO0_TSO_UFO_MASK		0x1U
107 #define SQ_TASK_INFO0_VLAN_TAG_MASK		0xFFFFU
108 
109 #define SQ_TASK_INFO0_SET(val, member)			\
110 	(((u32)(val) & SQ_TASK_INFO0_##member##_MASK) <<	\
111 			SQ_TASK_INFO0_##member##_SHIFT)
112 
113 #define	SQ_TASK_INFO1_MD_TYPE_SHIFT		8
114 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT		16
115 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT		24
116 
117 #define	SQ_TASK_INFO1_MD_TYPE_MASK		0xFFU
118 #define SQ_TASK_INFO1_INNER_L4LEN_MASK		0xFFU
119 #define SQ_TASK_INFO1_INNER_L3LEN_MASK		0xFFU
120 
121 #define SQ_TASK_INFO1_SET(val, member)			\
122 	(((val) & SQ_TASK_INFO1_##member##_MASK) <<	\
123 			SQ_TASK_INFO1_##member##_SHIFT)
124 
125 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT	0
126 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT		8
127 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT	16
128 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT	24
129 
130 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK		0xFFU
131 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK		0xFFU
132 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK	0x7U
133 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK		0x3U
134 
135 #define SQ_TASK_INFO2_SET(val, member)			\
136 	(((val) & SQ_TASK_INFO2_##member##_MASK) <<	\
137 			SQ_TASK_INFO2_##member##_SHIFT)
138 
139 #define	SQ_TASK_INFO4_L2TYPE_SHIFT		31
140 
141 #define	SQ_TASK_INFO4_L2TYPE_MASK		0x1U
142 
143 #define SQ_TASK_INFO4_SET(val, member)		\
144 	(((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \
145 			SQ_TASK_INFO4_##member##_SHIFT)
146 
147 /* SQ_DB */
148 #define SQ_DB_OFF				0x00000800
149 #define SQ_DB_INFO_HI_PI_SHIFT			0
150 #define SQ_DB_INFO_QID_SHIFT			8
151 #define SQ_DB_INFO_CFLAG_SHIFT			23
152 #define SQ_DB_INFO_COS_SHIFT			24
153 #define SQ_DB_INFO_TYPE_SHIFT			27
154 
155 #define SQ_DB_INFO_HI_PI_MASK			0xFFU
156 #define SQ_DB_INFO_QID_MASK			0x3FFU
157 #define SQ_DB_INFO_CFLAG_MASK			0x1U
158 #define SQ_DB_INFO_COS_MASK			0x7U
159 #define SQ_DB_INFO_TYPE_MASK			0x1FU
160 #define SQ_DB_INFO_SET(val, member)		\
161 	(((u32)(val) & SQ_DB_INFO_##member##_MASK) <<	\
162 			SQ_DB_INFO_##member##_SHIFT)
163 
164 #define SQ_DB					1
165 #define SQ_CFLAG_DP				0	/* CFLAG_DATA_PATH */
166 
167 #define SQ_DB_PI_LOW_MASK			0xFF
168 #define SQ_DB_PI_LOW(pi)			((pi) & SQ_DB_PI_LOW_MASK)
169 #define SQ_DB_PI_HI_SHIFT			8
170 #define SQ_DB_PI_HIGH(pi)			((pi) >> SQ_DB_PI_HI_SHIFT)
171 #define SQ_DB_ADDR(sq, pi)		\
172 	((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi))
173 
174 /* txq wq operations */
175 #define HINIC_GET_SQ_WQE_MASK(txq)		((txq)->wq->mask)
176 
177 #define HINIC_GET_SQ_HW_CI(txq)	\
178 	((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq))
179 
180 #define HINIC_GET_SQ_LOCAL_CI(txq)	\
181 	(((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq))
182 
183 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt)	\
184 	do {						\
185 		(txq)->wq->cons_idx += wqebb_cnt;	\
186 		(txq)->wq->delta += wqebb_cnt;		\
187 	} while (0)
188 
189 #define HINIC_GET_SQ_FREE_WQEBBS(txq)	((txq)->wq->delta - 1)
190 
191 #define HINIC_IS_SQ_EMPTY(txq)	(((txq)->wq->delta) == ((txq)->q_depth))
192 
193 #define BUF_DESC_SIZE_SHIFT		4
194 
195 #define HINIC_SQ_WQE_SIZE(num_sge)		\
196 	(sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) +  \
197 			(unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT))
198 
199 #define HINIC_SQ_WQEBB_CNT(num_sge)	\
200 	(int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \
201 			HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT)
202 
203 
204 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb)
205 {
206 #if defined(RTE_ARCH_X86_64)
207 	int i;
208 	__m128i *wqe_line = (__m128i *)data;
209 	__m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
210 					11, 4, 5, 6, 7, 0, 1, 2, 3);
211 
212 	for (i = 0; i < nr_wqebb; i++) {
213 		/* convert 64B wqebb using 4 SSE instructions */
214 		wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask);
215 		wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask);
216 		wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask);
217 		wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask);
218 		wqe_line += 4;
219 	}
220 #elif defined(RTE_ARCH_ARM64)
221 	int i;
222 	uint8x16_t *wqe_line = (uint8x16_t *)data;
223 	const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
224 					9, 8, 15, 14, 13, 12};
225 
226 	for (i = 0; i < nr_wqebb; i++) {
227 		wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask);
228 		wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask);
229 		wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask);
230 		wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask);
231 		wqe_line += 4;
232 	}
233 #else
234 	hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE);
235 #endif
236 }
237 
238 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge)
239 {
240 #if defined(RTE_ARCH_X86_64)
241 	int i;
242 	__m128i *sge_line = (__m128i *)data;
243 	__m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10,
244 					11, 4, 5, 6, 7, 0, 1, 2, 3);
245 
246 	for (i = 0; i < nr_sge; i++) {
247 		/* convert 16B sge using 1 SSE instructions */
248 		*sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask);
249 		sge_line++;
250 	}
251 #elif defined(RTE_ARCH_ARM64)
252 	int i;
253 	uint8x16_t *sge_line = (uint8x16_t *)data;
254 	const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10,
255 					9, 8, 15, 14, 13, 12};
256 
257 	for (i = 0; i < nr_sge; i++) {
258 		*sge_line = vqtbl1q_u8(*sge_line, shuf_mask);
259 		sge_line++;
260 	}
261 #else
262 	hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc));
263 #endif
264 }
265 
266 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
267 {
268 	if (!txq || !stats) {
269 		PMD_DRV_LOG(ERR, "Txq or stats is NULL");
270 		return;
271 	}
272 
273 	memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats));
274 }
275 
276 void hinic_txq_stats_reset(struct hinic_txq *txq)
277 {
278 	struct hinic_txq_stats *txq_stats;
279 
280 	if (txq == NULL)
281 		return;
282 
283 	txq_stats = &txq->txq_stats;
284 	memset(txq_stats, 0, sizeof(*txq_stats));
285 }
286 
287 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev,
288 						  struct rte_mbuf *mbuf,
289 						  u16 sge_cnt)
290 {
291 	struct rte_mbuf *dst_mbuf;
292 	u32 offset = 0;
293 	u16 i;
294 
295 	if (unlikely(!nic_dev->cpy_mpool))
296 		return NULL;
297 
298 	dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool);
299 	if (unlikely(!dst_mbuf))
300 		return NULL;
301 
302 	dst_mbuf->data_off = 0;
303 	for (i = 0; i < sge_cnt; i++) {
304 		rte_memcpy((char *)dst_mbuf->buf_addr + offset,
305 			   (char *)mbuf->buf_addr + mbuf->data_off,
306 			   mbuf->data_len);
307 		dst_mbuf->data_len += mbuf->data_len;
308 		offset += mbuf->data_len;
309 		mbuf = mbuf->next;
310 	}
311 
312 	dst_mbuf->pkt_len = dst_mbuf->data_len;
313 
314 	return dst_mbuf;
315 }
316 
317 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq,
318 					  struct rte_mbuf *mbuf,
319 					  struct hinic_sq_bufdesc *sges,
320 					  struct hinic_wqe_info *sqe_info)
321 {
322 	dma_addr_t dma_addr;
323 	u16 i, around_sges;
324 	u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt;
325 	u16 real_nb_segs = mbuf->nb_segs;
326 	struct hinic_sq_bufdesc *sge_idx = sges;
327 
328 	if (unlikely(sqe_info->around)) {
329 		/* parts of wqe is in sq bottom while parts
330 		 * of wqe is in sq head
331 		 */
332 		i = 0;
333 		for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr;
334 		     sge_idx++) {
335 			if (unlikely(mbuf == NULL)) {
336 				txq->txq_stats.mbuf_null++;
337 				return false;
338 			}
339 
340 			dma_addr = rte_mbuf_data_iova(mbuf);
341 			if (unlikely(mbuf->data_len == 0)) {
342 				txq->txq_stats.sge_len0++;
343 				return false;
344 			}
345 			hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
346 				      mbuf->data_len);
347 			mbuf = mbuf->next;
348 			i++;
349 		}
350 
351 		around_sges = nb_segs - i;
352 		sge_idx = (struct hinic_sq_bufdesc *)
353 				((void *)txq->sq_head_addr);
354 		for (; i < nb_segs; i++) {
355 			if (unlikely(mbuf == NULL)) {
356 				txq->txq_stats.mbuf_null++;
357 				return false;
358 			}
359 
360 			dma_addr = rte_mbuf_data_iova(mbuf);
361 			if (unlikely(mbuf->data_len == 0)) {
362 				txq->txq_stats.sge_len0++;
363 				return false;
364 			}
365 			hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
366 				      mbuf->data_len);
367 			mbuf = mbuf->next;
368 			sge_idx++;
369 		}
370 
371 		/* covert sges at head to big endian */
372 		hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges);
373 	} else {
374 		/* wqe is in continuous space */
375 		for (i = 0; i < nb_segs; i++) {
376 			if (unlikely(mbuf == NULL)) {
377 				txq->txq_stats.mbuf_null++;
378 				return false;
379 			}
380 
381 			dma_addr = rte_mbuf_data_iova(mbuf);
382 			if (unlikely(mbuf->data_len == 0)) {
383 				txq->txq_stats.sge_len0++;
384 				return false;
385 			}
386 			hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
387 				      mbuf->data_len);
388 			mbuf = mbuf->next;
389 			sge_idx++;
390 		}
391 	}
392 
393 	/* for now: support non-tso over 17 sge, copy the last 2 mbuf */
394 	if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) {
395 		/* copy invalid mbuf segs to a valid buffer, lost performance */
396 		txq->txq_stats.cpy_pkts += 1;
397 		mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf,
398 					  real_nb_segs - nb_segs);
399 		if (unlikely(!mbuf))
400 			return false;
401 
402 		txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf;
403 
404 		/* deal with the last mbuf */
405 		dma_addr = rte_mbuf_data_iova(mbuf);
406 		if (unlikely(mbuf->data_len == 0)) {
407 			txq->txq_stats.sge_len0++;
408 			return false;
409 		}
410 		hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr,
411 			      mbuf->data_len);
412 		if (unlikely(sqe_info->around))
413 			hinic_sge_cpu_to_be32((void *)sge_idx, 1);
414 	}
415 
416 	return true;
417 }
418 
419 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl,
420 					    u32 queue_info, int nr_descs,
421 					    u8 owner)
422 {
423 	u32 ctrl_size, task_size, bufdesc_size;
424 
425 	ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
426 	task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
427 	bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs);
428 
429 	ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
430 			SQ_CTRL_SET(task_size, TASKSECT_LEN)	|
431 			SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)	|
432 			SQ_CTRL_SET(ctrl_size, LEN)		|
433 			SQ_CTRL_SET(owner, OWNER);
434 
435 	ctrl->queue_info = queue_info;
436 	ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC);
437 
438 	if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) {
439 		ctrl->queue_info |=
440 			SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS);
441 	} else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) {
442 		/* mss should not be less than 80 */
443 		ctrl->queue_info =
444 				SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS);
445 		ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS);
446 	}
447 }
448 
449 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf,
450 					  struct hinic_tx_offload_info
451 					  *poff_info,
452 					  struct hinic_wqe_info *sqe_info)
453 {
454 	u32 total_len, limit_len, checked_len, left_len, adjust_mss;
455 	u32 i, first_mss_sges, left_sges;
456 	struct rte_mbuf *mbuf_head, *mbuf_pre;
457 
458 	left_sges = mbuf->nb_segs;
459 	mbuf_head = mbuf;
460 
461 	/* tso sge number validation */
462 	if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) {
463 		checked_len = 0;
464 		adjust_mss = mbuf->tso_segsz >= TX_MSS_MIN ?
465 				mbuf->tso_segsz : TX_MSS_MIN;
466 		limit_len = adjust_mss + poff_info->payload_offset;
467 		first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE;
468 
469 		/* each continues 17 mbufs segmust do one check */
470 		while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) {
471 			/* total len of first 16 mbufs must equal
472 			 * or more than limit_len
473 			 */
474 			total_len = 0;
475 			for (i = 0; i < first_mss_sges; i++) {
476 				total_len += mbuf->data_len;
477 				mbuf_pre = mbuf;
478 				mbuf = mbuf->next;
479 				if (total_len >= limit_len) {
480 					limit_len = adjust_mss;
481 					break;
482 				}
483 			}
484 
485 			checked_len += total_len;
486 
487 			/* try to copy if not valid */
488 			if (unlikely(first_mss_sges == i)) {
489 				left_sges -= first_mss_sges;
490 				checked_len -= mbuf_pre->data_len;
491 
492 				left_len = mbuf_head->pkt_len - checked_len;
493 				if (left_len > HINIC_COPY_MBUF_SIZE)
494 					return false;
495 
496 				sqe_info->sge_cnt = mbuf_head->nb_segs -
497 							left_sges;
498 				sqe_info->cpy_mbuf_cnt = 1;
499 
500 				return true;
501 			}
502 			first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1);
503 
504 			/* continue next 16 mbufs */
505 			left_sges -= (i + 1);
506 		} /* end of while */
507 	}
508 
509 	sqe_info->sge_cnt = mbuf_head->nb_segs;
510 	return true;
511 }
512 
513 static inline void
514 hinic_set_l4_csum_info(struct hinic_sq_task *task,
515 		u32 *queue_info, struct hinic_tx_offload_info *poff_info)
516 {
517 	u32 tcp_udp_cs, sctp = 0;
518 	u16 l2hdr_len;
519 
520 	if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE))
521 		sctp = 1;
522 
523 	tcp_udp_cs = poff_info->inner_l4_tcp_udp;
524 
525 	if (poff_info->tunnel_type == TUNNEL_UDP_CSUM ||
526 	    poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) {
527 		l2hdr_len =  poff_info->outer_l2_len;
528 
529 		task->pkt_info2 |=
530 		SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) |
531 		SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN);
532 		task->pkt_info2 |=
533 		SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) |
534 		SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN);
535 	} else {
536 		l2hdr_len = poff_info->inner_l2_len;
537 	}
538 
539 	task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN);
540 	task->pkt_info1 |=
541 		SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN);
542 	task->pkt_info0 |=
543 		SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
544 	task->pkt_info1 |=
545 		SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN);
546 	task->pkt_info0 |=
547 		SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD);
548 	*queue_info |=
549 		SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) |
550 		SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) |
551 		SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP);
552 }
553 
554 static inline void
555 hinic_set_tso_info(struct hinic_sq_task *task,
556 		u32 *queue_info, struct rte_mbuf *mbuf,
557 		struct hinic_tx_offload_info *poff_info)
558 {
559 	hinic_set_l4_csum_info(task, queue_info, poff_info);
560 
561 	/* wqe for tso */
562 	task->pkt_info0 |=
563 		SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE);
564 	task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO);
565 	*queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO);
566 	/* qsf was initialized in prepare_sq_wqe */
567 	*queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS);
568 	*queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS);
569 }
570 
571 static inline void
572 hinic_set_vlan_tx_offload(struct hinic_sq_task *task,
573 			u32 *queue_info, u16 vlan_tag, u16 vlan_pri)
574 {
575 	task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) |
576 				SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD);
577 
578 	*queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI);
579 }
580 
581 static inline void
582 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf,
583 		struct hinic_sq_task *task, u32 *queue_info,
584 		struct hinic_tx_offload_info *tx_off_info)
585 {
586 	u16 vlan_tag;
587 	uint64_t ol_flags = mbuf->ol_flags;
588 
589 	/* clear DW0~2 of task section for offload */
590 	task->pkt_info0 = 0;
591 	task->pkt_info1 = 0;
592 	task->pkt_info2 = 0;
593 
594 	/* Base VLAN */
595 	if (unlikely(ol_flags & RTE_MBUF_F_TX_VLAN)) {
596 		vlan_tag = mbuf->vlan_tci;
597 		hinic_set_vlan_tx_offload(task, queue_info, vlan_tag,
598 					  vlan_tag >> VLAN_PRIO_SHIFT);
599 	}
600 
601 	/* non checksum or tso */
602 	if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)))
603 		return;
604 
605 	if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG))
606 		/* set tso info for task and qsf */
607 		hinic_set_tso_info(task, queue_info, mbuf, tx_off_info);
608 	else /* just support l4 checksum offload */
609 		hinic_set_l4_csum_info(task, queue_info, tx_off_info);
610 }
611 
612 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq)
613 {
614 	struct hinic_tx_info *tx_info;
615 	struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK];
616 	int i, nb_free = 0;
617 	u16 hw_ci, sw_ci, sq_mask;
618 	int wqebb_cnt = 0;
619 
620 	hw_ci = HINIC_GET_SQ_HW_CI(txq);
621 	sw_ci = HINIC_GET_SQ_LOCAL_CI(txq);
622 	sq_mask = HINIC_GET_SQ_WQE_MASK(txq);
623 
624 	for (i = 0; i < txq->tx_free_thresh; ++i) {
625 		tx_info = &txq->tx_info[sw_ci];
626 		if (hw_ci == sw_ci ||
627 			(((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt))
628 			break;
629 
630 		sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask;
631 
632 		if (unlikely(tx_info->cpy_mbuf != NULL)) {
633 			rte_pktmbuf_free(tx_info->cpy_mbuf);
634 			tx_info->cpy_mbuf = NULL;
635 		}
636 
637 		wqebb_cnt += tx_info->wqebb_cnt;
638 		mbuf = tx_info->mbuf;
639 
640 		if (likely(mbuf->nb_segs == 1)) {
641 			m = rte_pktmbuf_prefree_seg(mbuf);
642 			tx_info->mbuf = NULL;
643 
644 			if (unlikely(m == NULL))
645 				continue;
646 
647 			mbuf_free[nb_free++] = m;
648 			if (unlikely(m->pool != mbuf_free[0]->pool ||
649 				nb_free >= HINIC_MAX_TX_FREE_BULK)) {
650 				rte_mempool_put_bulk(mbuf_free[0]->pool,
651 					(void **)mbuf_free, (nb_free - 1));
652 				nb_free = 0;
653 				mbuf_free[nb_free++] = m;
654 			}
655 		} else {
656 			rte_pktmbuf_free(mbuf);
657 			tx_info->mbuf = NULL;
658 		}
659 	}
660 
661 	if (nb_free > 0)
662 		rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free,
663 				     nb_free);
664 
665 	HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt);
666 }
667 
668 static inline struct hinic_sq_wqe *
669 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt,
670 		 struct hinic_wqe_info *wqe_info)
671 {
672 	u32 cur_pi, end_pi;
673 	u16 remain_wqebbs;
674 	struct hinic_sq *sq = txq->sq;
675 	struct hinic_wq *wq = txq->wq;
676 
677 	/* record current pi */
678 	cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx);
679 	end_pi = cur_pi + wqebb_cnt;
680 
681 	/* update next pi and delta */
682 	wq->prod_idx += wqebb_cnt;
683 	wq->delta -= wqebb_cnt;
684 
685 	/* return current pi and owner */
686 	wqe_info->pi = cur_pi;
687 	wqe_info->owner = sq->owner;
688 	wqe_info->around = 0;
689 	wqe_info->seq_wqebbs = wqebb_cnt;
690 
691 	if (unlikely(end_pi >= txq->q_depth)) {
692 		/* update owner of next prod_idx */
693 		sq->owner = !sq->owner;
694 
695 		/* turn around to head */
696 		if (unlikely(end_pi > txq->q_depth)) {
697 			wqe_info->around = 1;
698 			remain_wqebbs = txq->q_depth - cur_pi;
699 			wqe_info->seq_wqebbs = remain_wqebbs;
700 		}
701 	}
702 
703 	return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi);
704 }
705 
706 static inline uint16_t
707 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
708 {
709 	struct ipv4_psd_header {
710 		uint32_t src_addr; /* IP address of source host. */
711 		uint32_t dst_addr; /* IP address of destination host. */
712 		uint8_t  zero;     /* zero. */
713 		uint8_t  proto;    /* L4 protocol type. */
714 		uint16_t len;      /* L4 length. */
715 	} psd_hdr;
716 
717 	psd_hdr.src_addr = ipv4_hdr->src_addr;
718 	psd_hdr.dst_addr = ipv4_hdr->dst_addr;
719 	psd_hdr.zero = 0;
720 	psd_hdr.proto = ipv4_hdr->next_proto_id;
721 	if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
722 		psd_hdr.len = 0;
723 	} else {
724 		psd_hdr.len =
725 		rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) -
726 				 rte_ipv4_hdr_len(ipv4_hdr));
727 	}
728 	return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr));
729 }
730 
731 static inline uint16_t
732 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
733 {
734 	uint32_t sum;
735 	struct {
736 		uint32_t len;   /* L4 length. */
737 		uint32_t proto; /* L4 protocol - top 3 bytes must be zero */
738 	} psd_hdr;
739 
740 	psd_hdr.proto = (ipv6_hdr->proto << 24);
741 	if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
742 		psd_hdr.len = 0;
743 	else
744 		psd_hdr.len = ipv6_hdr->payload_len;
745 
746 	sum = __rte_raw_cksum(&ipv6_hdr->src_addr,
747 		sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0);
748 	sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum);
749 	return __rte_raw_cksum_reduce(sum);
750 }
751 
752 static inline void hinic_get_outer_cs_pld_offset(struct rte_mbuf *m,
753 					struct hinic_tx_offload_info *off_info)
754 {
755 	uint64_t ol_flags = m->ol_flags;
756 
757 	if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_UDP_CKSUM)
758 		off_info->payload_offset = m->outer_l2_len + m->outer_l3_len +
759 					   m->l2_len + m->l3_len;
760 	else if ((ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) || (ol_flags & RTE_MBUF_F_TX_TCP_SEG))
761 		off_info->payload_offset = m->outer_l2_len + m->outer_l3_len +
762 					   m->l2_len + m->l3_len + m->l4_len;
763 }
764 
765 static inline void hinic_get_pld_offset(struct rte_mbuf *m,
766 					struct hinic_tx_offload_info *off_info)
767 {
768 	uint64_t ol_flags = m->ol_flags;
769 
770 	if (((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_UDP_CKSUM) ||
771 	    ((ol_flags & RTE_MBUF_F_TX_L4_MASK) == RTE_MBUF_F_TX_SCTP_CKSUM))
772 		off_info->payload_offset = m->l2_len + m->l3_len;
773 	else if ((ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) || (ol_flags & RTE_MBUF_F_TX_TCP_SEG))
774 		off_info->payload_offset = m->l2_len + m->l3_len +
775 					   m->l4_len;
776 }
777 
778 static inline void hinic_analyze_tx_info(struct rte_mbuf *mbuf,
779 					 struct hinic_tx_offload_info *off_info)
780 {
781 	struct rte_ether_hdr *eth_hdr;
782 	struct rte_vlan_hdr *vlan_hdr;
783 	struct rte_ipv4_hdr *ipv4_hdr;
784 	u16 eth_type;
785 
786 	eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
787 	eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
788 
789 	if (eth_type == RTE_ETHER_TYPE_VLAN) {
790 		off_info->outer_l2_len = ETHER_LEN_WITH_VLAN;
791 		vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
792 		eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
793 	} else {
794 		off_info->outer_l2_len = ETHER_LEN_NO_VLAN;
795 	}
796 
797 	if (eth_type == RTE_ETHER_TYPE_IPV4) {
798 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
799 						   off_info->outer_l2_len);
800 		off_info->outer_l3_len = rte_ipv4_hdr_len(ipv4_hdr);
801 	} else if (eth_type == RTE_ETHER_TYPE_IPV6) {
802 		/* not support ipv6 extension header */
803 		off_info->outer_l3_len = sizeof(struct rte_ipv6_hdr);
804 	}
805 }
806 
807 static inline void hinic_analyze_outer_ip_vxlan(struct rte_mbuf *mbuf,
808 					struct hinic_tx_offload_info *off_info)
809 {
810 	struct rte_ether_hdr *eth_hdr;
811 	struct rte_vlan_hdr *vlan_hdr;
812 	struct rte_ipv4_hdr *ipv4_hdr;
813 	struct rte_udp_hdr *udp_hdr;
814 	u16 eth_type = 0;
815 
816 	eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
817 	eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
818 
819 	if (eth_type == RTE_ETHER_TYPE_VLAN) {
820 		vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1);
821 		eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto);
822 	}
823 
824 	if (eth_type == RTE_ETHER_TYPE_IPV4) {
825 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
826 						   mbuf->outer_l2_len);
827 		off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
828 		ipv4_hdr->hdr_checksum = 0;
829 
830 		udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
831 						 mbuf->outer_l3_len);
832 		udp_hdr->dgram_cksum = 0;
833 	} else if (eth_type == RTE_ETHER_TYPE_IPV6) {
834 		off_info->outer_l3_type = IPV6_PKT;
835 
836 		udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *,
837 						  (mbuf->outer_l2_len +
838 						   mbuf->outer_l3_len));
839 		udp_hdr->dgram_cksum = 0;
840 	}
841 }
842 
843 static inline uint8_t hinic_analyze_l3_type(struct rte_mbuf *mbuf)
844 {
845 	uint8_t l3_type;
846 	uint64_t ol_flags = mbuf->ol_flags;
847 
848 	if (ol_flags & RTE_MBUF_F_TX_IPV4)
849 		l3_type = (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) ?
850 			  IPV4_PKT_WITH_CHKSUM_OFFLOAD :
851 			  IPV4_PKT_NO_CHKSUM_OFFLOAD;
852 	else if (ol_flags & RTE_MBUF_F_TX_IPV6)
853 		l3_type = IPV6_PKT;
854 	else
855 		l3_type = UNKNOWN_L3TYPE;
856 
857 	return l3_type;
858 }
859 
860 static inline void hinic_calculate_tcp_checksum(struct rte_mbuf *mbuf,
861 					struct hinic_tx_offload_info *off_info,
862 					uint64_t inner_l3_offset)
863 {
864 	struct rte_ipv4_hdr *ipv4_hdr;
865 	struct rte_ipv6_hdr *ipv6_hdr;
866 	struct rte_tcp_hdr *tcp_hdr;
867 	uint64_t ol_flags = mbuf->ol_flags;
868 
869 	if (ol_flags & RTE_MBUF_F_TX_IPV4) {
870 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
871 						   inner_l3_offset);
872 
873 		if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
874 			ipv4_hdr->hdr_checksum = 0;
875 
876 		tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr +
877 						 mbuf->l3_len);
878 		tcp_hdr->cksum = hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags);
879 	} else {
880 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *,
881 						   inner_l3_offset);
882 		tcp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_tcp_hdr *,
883 						  (inner_l3_offset +
884 						   mbuf->l3_len));
885 		tcp_hdr->cksum = hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags);
886 	}
887 
888 	off_info->inner_l4_type = TCP_OFFLOAD_ENABLE;
889 	off_info->inner_l4_tcp_udp = 1;
890 }
891 
892 static inline void hinic_calculate_udp_checksum(struct rte_mbuf *mbuf,
893 					struct hinic_tx_offload_info *off_info,
894 					uint64_t inner_l3_offset)
895 {
896 	struct rte_ipv4_hdr *ipv4_hdr;
897 	struct rte_ipv6_hdr *ipv6_hdr;
898 	struct rte_udp_hdr *udp_hdr;
899 	uint64_t ol_flags = mbuf->ol_flags;
900 
901 	if (ol_flags & RTE_MBUF_F_TX_IPV4) {
902 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *,
903 						   inner_l3_offset);
904 
905 		if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
906 			ipv4_hdr->hdr_checksum = 0;
907 
908 		udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr +
909 						 mbuf->l3_len);
910 		udp_hdr->dgram_cksum = hinic_ipv4_phdr_cksum(ipv4_hdr,
911 							     ol_flags);
912 	} else {
913 		ipv6_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv6_hdr *,
914 						   inner_l3_offset);
915 
916 		udp_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_udp_hdr *,
917 						  (inner_l3_offset +
918 						   mbuf->l3_len));
919 		udp_hdr->dgram_cksum = hinic_ipv6_phdr_cksum(ipv6_hdr,
920 							     ol_flags);
921 	}
922 
923 	off_info->inner_l4_type = UDP_OFFLOAD_ENABLE;
924 	off_info->inner_l4_tcp_udp = 1;
925 }
926 
927 static inline void
928 hinic_calculate_sctp_checksum(struct hinic_tx_offload_info *off_info)
929 {
930 	off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE;
931 	off_info->inner_l4_tcp_udp = 0;
932 	off_info->inner_l4_len = sizeof(struct rte_sctp_hdr);
933 }
934 
935 static inline void hinic_calculate_checksum(struct rte_mbuf *mbuf,
936 					struct hinic_tx_offload_info *off_info,
937 					uint64_t inner_l3_offset)
938 {
939 	uint64_t ol_flags = mbuf->ol_flags;
940 
941 	switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
942 	case RTE_MBUF_F_TX_UDP_CKSUM:
943 		hinic_calculate_udp_checksum(mbuf, off_info, inner_l3_offset);
944 		break;
945 
946 	case RTE_MBUF_F_TX_TCP_CKSUM:
947 		hinic_calculate_tcp_checksum(mbuf, off_info, inner_l3_offset);
948 		break;
949 
950 	case RTE_MBUF_F_TX_SCTP_CKSUM:
951 		hinic_calculate_sctp_checksum(off_info);
952 		break;
953 
954 	default:
955 		if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
956 			hinic_calculate_tcp_checksum(mbuf, off_info,
957 						     inner_l3_offset);
958 		break;
959 	}
960 }
961 
962 static inline int hinic_tx_offload_pkt_prepare(struct rte_mbuf *m,
963 					struct hinic_tx_offload_info *off_info)
964 {
965 	uint64_t inner_l3_offset;
966 	uint64_t ol_flags = m->ol_flags;
967 
968 	/* Check if the packets set available offload flags */
969 	if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))
970 		return 0;
971 
972 	/* Support only vxlan offload */
973 	if (unlikely((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) &&
974 		     !(ol_flags & RTE_MBUF_F_TX_TUNNEL_VXLAN)))
975 		return -ENOTSUP;
976 
977 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
978 	if (rte_validate_tx_offload(m) != 0)
979 		return -EINVAL;
980 #endif
981 
982 	if (ol_flags & RTE_MBUF_F_TX_TUNNEL_VXLAN) {
983 		off_info->tunnel_type = TUNNEL_UDP_NO_CSUM;
984 
985 		/* inner_l4_tcp_udp csum should be set to calculate outer
986 		 * udp checksum when vxlan packets without inner l3 and l4
987 		 */
988 		off_info->inner_l4_tcp_udp = 1;
989 
990 		if ((ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) ||
991 		    (ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) ||
992 		    (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
993 			inner_l3_offset = m->l2_len + m->outer_l2_len +
994 					  m->outer_l3_len;
995 			off_info->outer_l2_len = m->outer_l2_len;
996 			off_info->outer_l3_len = m->outer_l3_len;
997 			/* just support vxlan tunneling pkt */
998 			off_info->inner_l2_len = m->l2_len - VXLANLEN -
999 						 sizeof(struct rte_udp_hdr);
1000 			off_info->tunnel_length = m->l2_len;
1001 
1002 			hinic_analyze_outer_ip_vxlan(m, off_info);
1003 
1004 			hinic_get_outer_cs_pld_offset(m, off_info);
1005 		} else {
1006 			inner_l3_offset = m->l2_len;
1007 			hinic_analyze_tx_info(m, off_info);
1008 			/* just support vxlan tunneling pkt */
1009 			off_info->inner_l2_len = m->l2_len - VXLANLEN -
1010 						 sizeof(struct rte_udp_hdr) -
1011 						 off_info->outer_l2_len -
1012 						 off_info->outer_l3_len;
1013 			off_info->tunnel_length = m->l2_len -
1014 						  off_info->outer_l2_len -
1015 						  off_info->outer_l3_len;
1016 			off_info->outer_l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
1017 
1018 			hinic_get_pld_offset(m, off_info);
1019 		}
1020 	} else {
1021 		inner_l3_offset = m->l2_len;
1022 		off_info->inner_l2_len = m->l2_len;
1023 		off_info->tunnel_type = NOT_TUNNEL;
1024 
1025 		hinic_get_pld_offset(m, off_info);
1026 	}
1027 
1028 	/* invalid udp or tcp header */
1029 	if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET))
1030 		return -EINVAL;
1031 
1032 	off_info->inner_l3_len = m->l3_len;
1033 	off_info->inner_l4_len = m->l4_len;
1034 	off_info->inner_l3_type = hinic_analyze_l3_type(m);
1035 
1036 	/* Process the pseudo-header checksum */
1037 	hinic_calculate_checksum(m, off_info, inner_l3_offset);
1038 
1039 	return 0;
1040 }
1041 
1042 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt,
1043 					    struct hinic_wqe_info *sqe_info,
1044 					    struct hinic_tx_offload_info
1045 					    *off_info)
1046 {
1047 	u16  i, total_len, sge_cnt = mbuf_pkt->nb_segs;
1048 	struct rte_mbuf *mbuf;
1049 	int ret;
1050 
1051 	memset(off_info, 0, sizeof(*off_info));
1052 
1053 	ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info);
1054 	if (unlikely(ret))
1055 		return false;
1056 
1057 	sqe_info->cpy_mbuf_cnt = 0;
1058 
1059 	/* non tso mbuf */
1060 	if (likely(!(mbuf_pkt->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) {
1061 		if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) {
1062 			/* non tso packet len must less than 64KB */
1063 			return false;
1064 		} else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) {
1065 			/* non tso packet buffer number must less than 17
1066 			 * the mbuf segs more than 17 must copy to one buffer
1067 			 */
1068 			total_len = 0;
1069 			mbuf = mbuf_pkt;
1070 			for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) {
1071 				total_len += mbuf->data_len;
1072 				mbuf = mbuf->next;
1073 			}
1074 
1075 			/* default support copy total 4k mbuf segs */
1076 			if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) <
1077 				  mbuf_pkt->pkt_len)
1078 				return false;
1079 
1080 			sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE;
1081 			sqe_info->cpy_mbuf_cnt = 1;
1082 			return true;
1083 		}
1084 
1085 		/* valid non tso mbuf */
1086 		sqe_info->sge_cnt = sge_cnt;
1087 	} else {
1088 		/* tso mbuf */
1089 		if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt)))
1090 			/* too many mbuf segs */
1091 			return false;
1092 
1093 		/* check tso mbuf segs are valid or not */
1094 		if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt,
1095 			     off_info, sqe_info)))
1096 			return false;
1097 	}
1098 
1099 	return true;
1100 }
1101 
1102 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos)
1103 {
1104 	u16 prod_idx;
1105 	u32 hi_prod_idx;
1106 	struct hinic_sq_db sq_db;
1107 
1108 	prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx);
1109 	hi_prod_idx = SQ_DB_PI_HIGH(prod_idx);
1110 
1111 	sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) |
1112 			SQ_DB_INFO_SET(SQ_DB, TYPE) |
1113 			SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) |
1114 			SQ_DB_INFO_SET(cos, COS) |
1115 			SQ_DB_INFO_SET(sq->q_id, QID);
1116 
1117 	/* Data should be written to HW in Big Endian Format */
1118 	sq_db.db_info = cpu_to_be32(sq_db.db_info);
1119 
1120 	/* Write all before the doorbell */
1121 	rte_wmb();
1122 	writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx));
1123 }
1124 
1125 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts)
1126 {
1127 	int free_wqebb_cnt, wqe_wqebb_cnt;
1128 	u32 queue_info, tx_bytes = 0;
1129 	u16 nb_tx;
1130 	struct hinic_wqe_info sqe_info;
1131 	struct hinic_tx_offload_info off_info;
1132 	struct rte_mbuf *mbuf_pkt;
1133 	struct hinic_txq *txq = tx_queue;
1134 	struct hinic_tx_info *tx_info;
1135 	struct hinic_sq_wqe *sq_wqe;
1136 	struct hinic_sq_task *task;
1137 
1138 	/* reclaim tx mbuf before xmit new packet */
1139 	if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh)
1140 		hinic_xmit_mbuf_cleanup(txq);
1141 
1142 	/* tx loop routine */
1143 	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1144 		mbuf_pkt = *tx_pkts++;
1145 		queue_info = 0;
1146 
1147 		/* 1. parse sge and tx offload info from mbuf */
1148 		if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt,
1149 						       &sqe_info, &off_info))) {
1150 			txq->txq_stats.off_errs++;
1151 			break;
1152 		}
1153 
1154 		/* 2. try to get enough wqebb */
1155 		wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt);
1156 		free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1157 		if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1158 			/* reclaim again */
1159 			hinic_xmit_mbuf_cleanup(txq);
1160 			free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq);
1161 			if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) {
1162 				txq->txq_stats.tx_busy += (nb_pkts - nb_tx);
1163 				break;
1164 			}
1165 		}
1166 
1167 		/* 3. get sq tail wqe address from wqe_page,
1168 		 * sq have enough wqebb for this packet
1169 		 */
1170 		sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info);
1171 
1172 		/* 4. fill sq wqe sge section */
1173 		if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt,
1174 						     sq_wqe->buf_descs,
1175 						     &sqe_info))) {
1176 			hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id,
1177 					    wqe_wqebb_cnt, sqe_info.owner);
1178 			txq->txq_stats.off_errs++;
1179 			break;
1180 		}
1181 
1182 		/* 5. fill sq wqe task section and queue info */
1183 		task = &sq_wqe->task;
1184 
1185 		/* tx packet offload configure */
1186 		hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info,
1187 					   &off_info);
1188 
1189 		/* 6. record tx info */
1190 		tx_info = &txq->tx_info[sqe_info.pi];
1191 		tx_info->mbuf = mbuf_pkt;
1192 		tx_info->wqebb_cnt = wqe_wqebb_cnt;
1193 
1194 		/* 7. fill sq wqe header section */
1195 		hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info,
1196 					 sqe_info.sge_cnt, sqe_info.owner);
1197 
1198 		/* 8.convert continue or bottom wqe byteorder to big endian */
1199 		hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs);
1200 
1201 		tx_bytes += mbuf_pkt->pkt_len;
1202 	}
1203 
1204 	/* 9. write sq doorbell in burst mode */
1205 	if (nb_tx) {
1206 		hinic_sq_write_db(txq->sq, txq->cos);
1207 
1208 		txq->txq_stats.packets += nb_tx;
1209 		txq->txq_stats.bytes += tx_bytes;
1210 	}
1211 	txq->txq_stats.burst_pkts = nb_tx;
1212 
1213 	return nb_tx;
1214 }
1215 
1216 void hinic_free_all_tx_mbufs(struct hinic_txq *txq)
1217 {
1218 	u16 ci;
1219 	struct hinic_nic_dev *nic_dev = txq->nic_dev;
1220 	struct hinic_tx_info *tx_info;
1221 	int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev,
1222 						   txq->q_id) + 1;
1223 
1224 	while (free_wqebbs < txq->q_depth) {
1225 		ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id);
1226 
1227 		tx_info = &txq->tx_info[ci];
1228 
1229 		if (unlikely(tx_info->cpy_mbuf != NULL)) {
1230 			rte_pktmbuf_free(tx_info->cpy_mbuf);
1231 			tx_info->cpy_mbuf = NULL;
1232 		}
1233 
1234 		rte_pktmbuf_free(tx_info->mbuf);
1235 		hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id,
1236 					 tx_info->wqebb_cnt);
1237 
1238 		free_wqebbs += tx_info->wqebb_cnt;
1239 		tx_info->mbuf = NULL;
1240 	}
1241 }
1242 
1243 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev)
1244 {
1245 	u16 q_id;
1246 	struct hinic_nic_dev *nic_dev =
1247 				HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1248 
1249 	for (q_id = 0; q_id < nic_dev->num_sq; q_id++) {
1250 		if (eth_dev->data->tx_queues != NULL)
1251 			eth_dev->data->tx_queues[q_id] = NULL;
1252 
1253 		if (nic_dev->txqs[q_id] == NULL)
1254 			continue;
1255 
1256 		/* stop tx queue free tx mbuf */
1257 		hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1258 		hinic_free_tx_resources(nic_dev->txqs[q_id]);
1259 
1260 		/* free txq */
1261 		kfree(nic_dev->txqs[q_id]);
1262 		nic_dev->txqs[q_id] = NULL;
1263 	}
1264 }
1265 
1266 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev)
1267 {
1268 	u16 q_id;
1269 	struct hinic_nic_dev *nic_dev =
1270 				HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev);
1271 
1272 	for (q_id = 0; q_id < nic_dev->num_sq; q_id++)
1273 		/* stop tx queue free tx mbuf */
1274 		hinic_free_all_tx_mbufs(nic_dev->txqs[q_id]);
1275 }
1276 
1277 int hinic_setup_tx_resources(struct hinic_txq *txq)
1278 {
1279 	u64 tx_info_sz;
1280 
1281 	tx_info_sz = txq->q_depth * sizeof(*txq->tx_info);
1282 	txq->tx_info = rte_zmalloc_socket("tx_info", tx_info_sz,
1283 			RTE_CACHE_LINE_SIZE, txq->socket_id);
1284 	if (!txq->tx_info)
1285 		return -ENOMEM;
1286 
1287 	return HINIC_OK;
1288 }
1289 
1290 void hinic_free_tx_resources(struct hinic_txq *txq)
1291 {
1292 	if (txq->tx_info == NULL)
1293 		return;
1294 
1295 	rte_free(txq->tx_info);
1296 	txq->tx_info = NULL;
1297 }
1298 
1299 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id,
1300 			u16 sq_depth, unsigned int socket_id)
1301 {
1302 	int err;
1303 	struct hinic_nic_io *nic_io = hwdev->nic_io;
1304 	struct hinic_qp *qp = &nic_io->qps[q_id];
1305 	struct hinic_sq *sq = &qp->sq;
1306 	void __iomem *db_addr;
1307 	volatile u32 *ci_addr;
1308 
1309 	sq->sq_depth = sq_depth;
1310 	nic_io->sq_depth = sq_depth;
1311 
1312 	/* alloc wq */
1313 	err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id],
1314 				HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth,
1315 				socket_id);
1316 	if (err) {
1317 		PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ");
1318 		return err;
1319 	}
1320 
1321 	/* alloc sq doorbell space */
1322 	err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr);
1323 	if (err) {
1324 		PMD_DRV_LOG(ERR, "Failed to init db addr");
1325 		goto alloc_db_err;
1326 	}
1327 
1328 	/* clear hardware ci */
1329 	ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id);
1330 	*ci_addr = 0;
1331 
1332 	sq->q_id = q_id;
1333 	sq->wq = &nic_io->sq_wq[q_id];
1334 	sq->owner = 1;
1335 	sq->cons_idx_addr = (volatile u16 *)ci_addr;
1336 	sq->db_addr = db_addr;
1337 
1338 	return HINIC_OK;
1339 
1340 alloc_db_err:
1341 	hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]);
1342 
1343 	return err;
1344 }
1345 
1346 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id)
1347 {
1348 	struct hinic_nic_io *nic_io;
1349 	struct hinic_qp *qp;
1350 
1351 	nic_io = hwdev->nic_io;
1352 	qp = &nic_io->qps[q_id];
1353 
1354 	if (qp->sq.wq == NULL)
1355 		return;
1356 
1357 	hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr);
1358 	hinic_wq_free(nic_io->hwdev, qp->sq.wq);
1359 	qp->sq.wq = NULL;
1360 }
1361