xref: /dpdk/drivers/net/cnxk/cn9k_tx.h (revision 1d9077d101b5c9ac2f841a82187ef5ced0fc7f43)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 #ifndef __CN9K_TX_H__
5 #define __CN9K_TX_H__
6 
7 #include <rte_vect.h>
8 
9 #define NIX_TX_OFFLOAD_NONE	      (0)
10 #define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
11 #define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
12 #define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
13 #define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
14 #define NIX_TX_OFFLOAD_TSO_F	      BIT(4)
15 #define NIX_TX_OFFLOAD_TSTAMP_F	      BIT(5)
16 #define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
17 #define NIX_TX_OFFLOAD_MAX	      (NIX_TX_OFFLOAD_SECURITY_F << 1)
18 
19 /* Flags to control xmit_prepare function.
20  * Defining it from backwards to denote its been
21  * not used as offload flags to pick function
22  */
23 #define NIX_TX_MULTI_SEG_F BIT(15)
24 
25 #define NIX_TX_NEED_SEND_HDR_W1                                                \
26 	(NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
27 	 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
28 
29 #define NIX_TX_NEED_EXT_HDR                                                    \
30 	(NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
31 	 NIX_TX_OFFLOAD_TSO_F)
32 
33 #define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
34 	do {                                                                   \
35 		int64_t avail;                                                 \
36 		/* Cached value is low, Update the fc_cache_pkts */            \
37 		if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
38 			avail = txq->nb_sqb_bufs_adj - *txq->fc_mem;           \
39 			/* Multiply with sqe_per_sqb to express in pkts */     \
40 			(txq)->fc_cache_pkts =                                 \
41 				(avail << (txq)->sqes_per_sqb_log2) - avail;   \
42 			/* Check it again for the room */                      \
43 			if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
44 				return 0;                                      \
45 		}                                                              \
46 	} while (0)
47 
48 /* Function to determine no of tx subdesc required in case ext
49  * sub desc is enabled.
50  */
51 static __rte_always_inline int
52 cn9k_nix_tx_ext_subs(const uint16_t flags)
53 {
54 	return (flags & NIX_TX_OFFLOAD_TSTAMP_F)
55 		       ? 2
56 		       : ((flags &
57 			   (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F))
58 				  ? 1
59 				  : 0);
60 }
61 
62 static __rte_always_inline void
63 cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,
64 		     const uint16_t flags, const uint16_t static_sz)
65 {
66 	if (static_sz)
67 		cmd[0] = txq->send_hdr_w0;
68 	else
69 		cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
70 			 ((uint64_t)(cn9k_nix_tx_ext_subs(flags) + 1) << 40);
71 	cmd[1] = 0;
72 
73 	if (flags & NIX_TX_NEED_EXT_HDR) {
74 		if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
75 			cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
76 		else
77 			cmd[2] = NIX_SUBDC_EXT << 60;
78 		cmd[3] = 0;
79 		cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
80 	} else {
81 		cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
82 	}
83 }
84 
85 static __rte_always_inline void
86 cn9k_nix_free_extmbuf(struct rte_mbuf *m)
87 {
88 	struct rte_mbuf *m_next;
89 	while (m != NULL) {
90 		m_next = m->next;
91 		rte_pktmbuf_free_seg(m);
92 		m = m_next;
93 	}
94 }
95 
96 static __rte_always_inline uint64_t
97 cn9k_nix_prefree_seg(struct rte_mbuf *m, struct rte_mbuf **extm, struct cn9k_eth_txq *txq,
98 		     struct nix_send_hdr_s *send_hdr, uint64_t *aura)
99 {
100 	struct rte_mbuf *prev;
101 	uint32_t sqe_id;
102 
103 	if (RTE_MBUF_HAS_EXTBUF(m)) {
104 		if (unlikely(txq->tx_compl.ena == 0)) {
105 			m->next = *extm;
106 			*extm = m;
107 			return 1;
108 		}
109 		if (send_hdr->w0.pnc) {
110 			sqe_id = send_hdr->w1.sqe_id;
111 			prev = txq->tx_compl.ptr[sqe_id];
112 			m->next = prev;
113 			txq->tx_compl.ptr[sqe_id] = m;
114 		} else {
115 			sqe_id = __atomic_fetch_add(&txq->tx_compl.sqe_id, 1, __ATOMIC_RELAXED);
116 			send_hdr->w0.pnc = 1;
117 			send_hdr->w1.sqe_id = sqe_id &
118 				txq->tx_compl.nb_desc_mask;
119 			txq->tx_compl.ptr[send_hdr->w1.sqe_id] = m;
120 			m->next = NULL;
121 		}
122 		return 1;
123 	} else {
124 		return cnxk_nix_prefree_seg(m, aura);
125 	}
126 }
127 
128 #if defined(RTE_ARCH_ARM64)
129 /* Only called for first segments of single segmented mbufs */
130 static __rte_always_inline void
131 cn9k_nix_prefree_seg_vec(struct rte_mbuf **mbufs, struct rte_mbuf **extm, struct cn9k_eth_txq *txq,
132 			 uint64x2_t *senddesc01_w0, uint64x2_t *senddesc23_w0,
133 			 uint64x2_t *senddesc01_w1, uint64x2_t *senddesc23_w1)
134 {
135 	struct rte_mbuf **tx_compl_ptr = txq->tx_compl.ptr;
136 	uint32_t nb_desc_mask = txq->tx_compl.nb_desc_mask;
137 	bool tx_compl_ena = txq->tx_compl.ena;
138 	struct rte_mbuf *m0, *m1, *m2, *m3;
139 	struct rte_mbuf *cookie;
140 	uint64_t w0, w1, aura;
141 	uint64_t sqe_id;
142 
143 	m0 = mbufs[0];
144 	m1 = mbufs[1];
145 	m2 = mbufs[2];
146 	m3 = mbufs[3];
147 
148 	/* mbuf 0 */
149 	w0 = vgetq_lane_u64(*senddesc01_w0, 0);
150 	if (RTE_MBUF_HAS_EXTBUF(m0)) {
151 		w0 |= BIT_ULL(19);
152 		w1 = vgetq_lane_u64(*senddesc01_w1, 0);
153 		w1 &= ~0xFFFF000000000000UL;
154 		if (unlikely(!tx_compl_ena)) {
155 			m0->next = *extm;
156 			*extm = m0;
157 		} else {
158 			sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
159 							       rte_memory_order_relaxed);
160 			sqe_id = sqe_id & nb_desc_mask;
161 			/* Set PNC */
162 			w0 |= BIT_ULL(43);
163 			w1 |= sqe_id << 48;
164 			tx_compl_ptr[sqe_id] = m0;
165 			*senddesc01_w1 = vsetq_lane_u64(w1, *senddesc01_w1, 0);
166 		}
167 	} else {
168 		cookie = RTE_MBUF_DIRECT(m0) ? m0 : rte_mbuf_from_indirect(m0);
169 		aura = (w0 >> 20) & 0xFFFFF;
170 		w0 &= ~0xFFFFF00000UL;
171 		w0 |= cnxk_nix_prefree_seg(m0, &aura) << 19;
172 		w0 |= aura << 20;
173 
174 		if ((w0 & BIT_ULL(19)) == 0)
175 			RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
176 	}
177 	*senddesc01_w0 = vsetq_lane_u64(w0, *senddesc01_w0, 0);
178 
179 	/* mbuf1 */
180 	w0 = vgetq_lane_u64(*senddesc01_w0, 1);
181 	if (RTE_MBUF_HAS_EXTBUF(m1)) {
182 		w0 |= BIT_ULL(19);
183 		w1 = vgetq_lane_u64(*senddesc01_w1, 1);
184 		w1 &= ~0xFFFF000000000000UL;
185 		if (unlikely(!tx_compl_ena)) {
186 			m1->next = *extm;
187 			*extm = m1;
188 		} else {
189 			sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
190 							       rte_memory_order_relaxed);
191 			sqe_id = sqe_id & nb_desc_mask;
192 			/* Set PNC */
193 			w0 |= BIT_ULL(43);
194 			w1 |= sqe_id << 48;
195 			tx_compl_ptr[sqe_id] = m1;
196 			*senddesc01_w1 = vsetq_lane_u64(w1, *senddesc01_w1, 1);
197 		}
198 	} else {
199 		cookie = RTE_MBUF_DIRECT(m1) ? m1 : rte_mbuf_from_indirect(m1);
200 		aura = (w0 >> 20) & 0xFFFFF;
201 		w0 &= ~0xFFFFF00000UL;
202 		w0 |= cnxk_nix_prefree_seg(m1, &aura) << 19;
203 		w0 |= aura << 20;
204 
205 		if ((w0 & BIT_ULL(19)) == 0)
206 			RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
207 	}
208 	*senddesc01_w0 = vsetq_lane_u64(w0, *senddesc01_w0, 1);
209 
210 	/* mbuf 2 */
211 	w0 = vgetq_lane_u64(*senddesc23_w0, 0);
212 	if (RTE_MBUF_HAS_EXTBUF(m2)) {
213 		w0 |= BIT_ULL(19);
214 		w1 = vgetq_lane_u64(*senddesc23_w1, 0);
215 		w1 &= ~0xFFFF000000000000UL;
216 		if (unlikely(!tx_compl_ena)) {
217 			m2->next = *extm;
218 			*extm = m2;
219 		} else {
220 			sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
221 							       rte_memory_order_relaxed);
222 			sqe_id = sqe_id & nb_desc_mask;
223 			/* Set PNC */
224 			w0 |= BIT_ULL(43);
225 			w1 |= sqe_id << 48;
226 			tx_compl_ptr[sqe_id] = m2;
227 			*senddesc23_w1 = vsetq_lane_u64(w1, *senddesc23_w1, 0);
228 		}
229 	} else {
230 		cookie = RTE_MBUF_DIRECT(m2) ? m2 : rte_mbuf_from_indirect(m2);
231 		aura = (w0 >> 20) & 0xFFFFF;
232 		w0 &= ~0xFFFFF00000UL;
233 		w0 |= cnxk_nix_prefree_seg(m2, &aura) << 19;
234 		w0 |= aura << 20;
235 
236 		if ((w0 & BIT_ULL(19)) == 0)
237 			RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
238 	}
239 	*senddesc23_w0 = vsetq_lane_u64(w0, *senddesc23_w0, 0);
240 
241 	/* mbuf3 */
242 	w0 = vgetq_lane_u64(*senddesc23_w0, 1);
243 	if (RTE_MBUF_HAS_EXTBUF(m3)) {
244 		w0 |= BIT_ULL(19);
245 		w1 = vgetq_lane_u64(*senddesc23_w1, 1);
246 		w1 &= ~0xFFFF000000000000UL;
247 		if (unlikely(!tx_compl_ena)) {
248 			m3->next = *extm;
249 			*extm = m3;
250 		} else {
251 			sqe_id = rte_atomic_fetch_add_explicit(&txq->tx_compl.sqe_id, 1,
252 							       rte_memory_order_relaxed);
253 			sqe_id = sqe_id & nb_desc_mask;
254 			/* Set PNC */
255 			w0 |= BIT_ULL(43);
256 			w1 |= sqe_id << 48;
257 			tx_compl_ptr[sqe_id] = m3;
258 			*senddesc23_w1 = vsetq_lane_u64(w1, *senddesc23_w1, 1);
259 		}
260 	} else {
261 		cookie = RTE_MBUF_DIRECT(m3) ? m3 : rte_mbuf_from_indirect(m3);
262 		aura = (w0 >> 20) & 0xFFFFF;
263 		w0 &= ~0xFFFFF00000UL;
264 		w0 |= cnxk_nix_prefree_seg(m3, &aura) << 19;
265 		w0 |= aura << 20;
266 
267 		if ((w0 & BIT_ULL(19)) == 0)
268 			RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
269 	}
270 	*senddesc23_w0 = vsetq_lane_u64(w0, *senddesc23_w0, 1);
271 #ifndef RTE_LIBRTE_MEMPOOL_DEBUG
272 	RTE_SET_USED(cookie);
273 #endif
274 }
275 #endif
276 
277 static __rte_always_inline void
278 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
279 {
280 	uint64_t mask, ol_flags = m->ol_flags;
281 
282 	if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
283 		uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
284 		uint16_t *iplen, *oiplen, *oudplen;
285 		uint16_t lso_sb, paylen;
286 
287 		mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
288 		lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
289 			 m->l2_len + m->l3_len + m->l4_len;
290 
291 		/* Reduce payload len from base headers */
292 		paylen = m->pkt_len - lso_sb;
293 
294 		/* Get iplen position assuming no tunnel hdr */
295 		iplen = (uint16_t *)(mdata + m->l2_len +
296 				     (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
297 		/* Handle tunnel tso */
298 		if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
299 		    (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
300 			const uint8_t is_udp_tun =
301 				(CNXK_NIX_UDP_TUN_BITMASK >>
302 				 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
303 				0x1;
304 
305 			oiplen = (uint16_t *)(mdata + m->outer_l2_len +
306 					      (2 << !!(ol_flags &
307 						       RTE_MBUF_F_TX_OUTER_IPV6)));
308 			*oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
309 						   paylen);
310 
311 			/* Update format for UDP tunneled packet */
312 			if (is_udp_tun) {
313 				oudplen = (uint16_t *)(mdata + m->outer_l2_len +
314 						       m->outer_l3_len + 4);
315 				*oudplen = rte_cpu_to_be_16(
316 					rte_be_to_cpu_16(*oudplen) - paylen);
317 			}
318 
319 			/* Update iplen position to inner ip hdr */
320 			iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
321 					     m->l4_len +
322 					     (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
323 		}
324 
325 		*iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
326 	}
327 }
328 
329 static __rte_always_inline void
330 cn9k_nix_xmit_prepare(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm,
331 		      uint64_t *cmd, const uint16_t flags, const uint64_t lso_tun_fmt,
332 		      uint8_t mark_flag, uint64_t mark_fmt)
333 {
334 	uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
335 	struct nix_send_ext_s *send_hdr_ext;
336 	struct nix_send_hdr_s *send_hdr;
337 	uint64_t ol_flags = 0, mask;
338 	union nix_send_hdr_w1_u w1;
339 	union nix_send_sg_s *sg;
340 	uint16_t mark_form = 0;
341 
342 	send_hdr = (struct nix_send_hdr_s *)cmd;
343 	if (flags & NIX_TX_NEED_EXT_HDR) {
344 		send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
345 		sg = (union nix_send_sg_s *)(cmd + 4);
346 		/* Clear previous markings */
347 		send_hdr_ext->w0.lso = 0;
348 		send_hdr_ext->w0.mark_en = 0;
349 		send_hdr_ext->w1.u = 0;
350 		ol_flags = m->ol_flags;
351 	} else {
352 		sg = (union nix_send_sg_s *)(cmd + 2);
353 	}
354 
355 	if (flags & NIX_TX_NEED_SEND_HDR_W1) {
356 		ol_flags = m->ol_flags;
357 		w1.u = 0;
358 	}
359 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
360 		send_hdr->w0.pnc = 0;
361 
362 	if (!(flags & NIX_TX_MULTI_SEG_F))
363 		send_hdr->w0.total = m->data_len;
364 	else
365 		send_hdr->w0.total = m->pkt_len;
366 	send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
367 
368 	/*
369 	 * L3type:  2 => IPV4
370 	 *          3 => IPV4 with csum
371 	 *          4 => IPV6
372 	 * L3type and L3ptr needs to be set for either
373 	 * L3 csum or L4 csum or LSO
374 	 *
375 	 */
376 
377 	if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
378 	    (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
379 		const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
380 		const uint8_t ol3type =
381 			((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
382 			((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
383 			!!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
384 
385 		/* Outer L3 */
386 		w1.ol3type = ol3type;
387 		mask = 0xffffull << ((!!ol3type) << 4);
388 		w1.ol3ptr = ~mask & m->outer_l2_len;
389 		w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
390 
391 		/* Outer L4 */
392 		w1.ol4type = csum + (csum << 1);
393 
394 		/* Inner L3 */
395 		w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
396 			     ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
397 		w1.il3ptr = w1.ol4ptr + m->l2_len;
398 		w1.il4ptr = w1.il3ptr + m->l3_len;
399 		/* Increment it by 1 if it is IPV4 as 3 is with csum */
400 		w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
401 
402 		/* Inner L4 */
403 		w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
404 
405 		/* In case of no tunnel header use only
406 		 * shift IL3/IL4 fields a bit to use
407 		 * OL3/OL4 for header checksum
408 		 */
409 		mask = !ol3type;
410 		w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
411 		       ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
412 
413 	} else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
414 		const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
415 		const uint8_t outer_l2_len = m->outer_l2_len;
416 
417 		/* Outer L3 */
418 		w1.ol3ptr = outer_l2_len;
419 		w1.ol4ptr = outer_l2_len + m->outer_l3_len;
420 		/* Increment it by 1 if it is IPV4 as 3 is with csum */
421 		w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
422 			     ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
423 			     !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
424 
425 		/* Outer L4 */
426 		w1.ol4type = csum + (csum << 1);
427 
428 	} else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
429 		const uint8_t l2_len = m->l2_len;
430 
431 		/* Always use OLXPTR and OLXTYPE when only
432 		 * when one header is present
433 		 */
434 
435 		/* Inner L3 */
436 		w1.ol3ptr = l2_len;
437 		w1.ol4ptr = l2_len + m->l3_len;
438 		/* Increment it by 1 if it is IPV4 as 3 is with csum */
439 		w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
440 			     ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
441 			     !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
442 
443 		/* Inner L4 */
444 		w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
445 	}
446 
447 	if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
448 		const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
449 		const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
450 						  RTE_MBUF_F_TX_IPV6));
451 
452 		send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
453 		/* HW will update ptr after vlan0 update */
454 		send_hdr_ext->w1.vlan1_ins_ptr = 12;
455 		send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
456 
457 		send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
458 		/* 2B before end of l2 header */
459 		send_hdr_ext->w1.vlan0_ins_ptr = 12;
460 		send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
461 		/* Fill for VLAN marking only when VLAN insertion enabled */
462 		mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
463 			     (send_hdr_ext->w1.vlan1_ins_ena ||
464 			      send_hdr_ext->w1.vlan0_ins_ena));
465 		/* Mask requested flags with packet data information */
466 		mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
467 		mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
468 
469 		mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
470 		mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
471 		markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
472 
473 		send_hdr_ext->w0.mark_en = !!mark_off;
474 		send_hdr_ext->w0.markform = mark_form & 0x7F;
475 		send_hdr_ext->w0.markptr = markptr;
476 	}
477 
478 	if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
479 		uint16_t lso_sb;
480 		uint64_t mask;
481 
482 		mask = -(!w1.il3type);
483 		lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
484 
485 		send_hdr_ext->w0.lso_sb = lso_sb;
486 		send_hdr_ext->w0.lso = 1;
487 		send_hdr_ext->w0.lso_mps = m->tso_segsz;
488 		send_hdr_ext->w0.lso_format =
489 			NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
490 		w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
491 
492 		/* Handle tunnel tso */
493 		if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
494 		    (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
495 			const uint8_t is_udp_tun =
496 				(CNXK_NIX_UDP_TUN_BITMASK >>
497 				 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
498 				0x1;
499 			uint8_t shift = is_udp_tun ? 32 : 0;
500 
501 			shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
502 			shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
503 
504 			w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
505 			w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
506 			/* Update format for UDP tunneled packet */
507 			send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
508 		}
509 	}
510 
511 	if (flags & NIX_TX_NEED_SEND_HDR_W1)
512 		send_hdr->w1.u = w1.u;
513 
514 	if (!(flags & NIX_TX_MULTI_SEG_F)) {
515 		struct rte_mbuf *cookie;
516 
517 		sg->seg1_size = m->data_len;
518 		*(rte_iova_t *)(++sg) = rte_mbuf_data_iova(m);
519 		cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
520 
521 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
522 			uint64_t aura;
523 			/* DF bit = 1 if refcount of current mbuf or parent mbuf
524 			 *		is greater than 1
525 			 * DF bit = 0 otherwise
526 			 */
527 			aura = send_hdr->w0.aura;
528 			send_hdr->w0.df = cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura);
529 			send_hdr->w0.aura = aura;
530 			/* Ensuring mbuf fields which got updated in
531 			 * cnxk_nix_prefree_seg are written before LMTST.
532 			 */
533 			rte_io_wmb();
534 		}
535 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
536 		/* Mark mempool object as "put" since it is freed by NIX */
537 		if (!send_hdr->w0.df)
538 			RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
539 #else
540 		RTE_SET_USED(cookie);
541 #endif
542 	} else {
543 		sg->seg1_size = m->data_len;
544 		*(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
545 
546 		/* NOFF is handled later for multi-seg */
547 	}
548 }
549 
550 static __rte_always_inline void
551 cn9k_nix_xmit_prepare_tstamp(struct cn9k_eth_txq *txq, uint64_t *cmd,
552 			     const uint64_t ol_flags, const uint16_t no_segdw,
553 			     const uint16_t flags)
554 {
555 	if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
556 		struct nix_send_mem_s *send_mem;
557 		uint16_t off = (no_segdw - 1) << 1;
558 		const uint8_t is_ol_tstamp =
559 			!(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
560 
561 		send_mem = (struct nix_send_mem_s *)(cmd + off);
562 
563 		/* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, Tx tstamp
564 		 * should not be recorded, hence changing the alg type to
565 		 * NIX_SENDMEMALG_SUB and also changing send mem addr field to
566 		 * next 8 bytes as it corrupts the actual Tx tstamp registered
567 		 * address.
568 		 */
569 		send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM;
570 		send_mem->w0.cn9k.alg =
571 			NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
572 
573 		send_mem->addr = (rte_iova_t)(((uint64_t *)txq->ts_mem) +
574 				(is_ol_tstamp));
575 	}
576 }
577 
578 static __rte_always_inline void
579 cn9k_nix_xmit_one(uint64_t *cmd, void *lmt_addr, const rte_iova_t io_addr,
580 		  const uint32_t flags)
581 {
582 	uint64_t lmt_status;
583 
584 	do {
585 		roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
586 		lmt_status = roc_lmt_submit_ldeor(io_addr);
587 	} while (lmt_status == 0);
588 }
589 
590 static __rte_always_inline void
591 cn9k_nix_xmit_prep_lmt(uint64_t *cmd, void *lmt_addr, const uint32_t flags)
592 {
593 	roc_lmt_mov(lmt_addr, cmd, cn9k_nix_tx_ext_subs(flags));
594 }
595 
596 static __rte_always_inline void
597 cn9k_nix_sec_fc_wait_one(const struct cn9k_eth_txq *txq)
598 {
599 	uint64_t nb_desc = txq->cpt_desc;
600 	uint64_t *fc = txq->cpt_fc;
601 
602 	while (nb_desc <= __atomic_load_n(fc, __ATOMIC_RELAXED))
603 		;
604 }
605 
606 static __rte_always_inline uint64_t
607 cn9k_nix_xmit_submit_lmt(const rte_iova_t io_addr)
608 {
609 	return roc_lmt_submit_ldeor(io_addr);
610 }
611 
612 static __rte_always_inline uint64_t
613 cn9k_nix_xmit_submit_lmt_release(const rte_iova_t io_addr)
614 {
615 	return roc_lmt_submit_ldeorl(io_addr);
616 }
617 
618 static __rte_always_inline uint16_t
619 cn9k_nix_prepare_mseg(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm,
620 		      uint64_t *cmd, const uint16_t flags)
621 {
622 	struct nix_send_hdr_s *send_hdr;
623 	uint64_t prefree = 0, aura;
624 	struct rte_mbuf *cookie;
625 	union nix_send_sg_s *sg;
626 	struct rte_mbuf *m_next;
627 	uint64_t *slist, sg_u;
628 	uint64_t nb_segs;
629 	uint64_t segdw;
630 	uint8_t off, i;
631 
632 	send_hdr = (struct nix_send_hdr_s *)cmd;
633 
634 	if (flags & NIX_TX_NEED_EXT_HDR)
635 		off = 2;
636 	else
637 		off = 0;
638 
639 	sg = (union nix_send_sg_s *)&cmd[2 + off];
640 
641 	/* Start from second segment, first segment is already there */
642 	i = 1;
643 	sg_u = sg->u;
644 	sg_u &= 0xFC0000000000FFFF;
645 	nb_segs = m->nb_segs - 1;
646 	m_next = m->next;
647 	slist = &cmd[3 + off + 1];
648 
649 	cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
650 	/* Set invert df if buffer is not to be freed by H/W */
651 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
652 		aura = send_hdr->w0.aura;
653 		prefree = (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << 55);
654 		send_hdr->w0.aura = aura;
655 		sg_u |= prefree;
656 		rte_io_wmb();
657 	}
658 
659 	/* Mark mempool object as "put" since it is freed by NIX */
660 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
661 	if (!(sg_u & (1ULL << 55)))
662 		RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
663 	rte_io_wmb();
664 #else
665 	RTE_SET_USED(cookie);
666 #endif
667 #ifdef RTE_ENABLE_ASSERT
668 	m->next = NULL;
669 	m->nb_segs = 1;
670 #endif
671 	m = m_next;
672 	if (!m)
673 		goto done;
674 
675 	/* Fill mbuf segments */
676 	do {
677 		m_next = m->next;
678 		sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
679 		*slist = rte_mbuf_data_iova(m);
680 		cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
681 		/* Set invert df if buffer is not to be freed by H/W */
682 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
683 			sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, NULL) << (i + 55));
684 			/* Commit changes to mbuf */
685 			rte_io_wmb();
686 		}
687 		/* Mark mempool object as "put" since it is freed by NIX */
688 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
689 		if (!(sg_u & (1ULL << (i + 55))))
690 			RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
691 		rte_io_wmb();
692 #endif
693 		slist++;
694 		i++;
695 		nb_segs--;
696 		if (i > 2 && nb_segs) {
697 			i = 0;
698 			/* Next SG subdesc */
699 			*(uint64_t *)slist = sg_u & 0xFC00000000000000;
700 			sg->u = sg_u;
701 			sg->segs = 3;
702 			sg = (union nix_send_sg_s *)slist;
703 			sg_u = sg->u;
704 			slist++;
705 		}
706 #ifdef RTE_ENABLE_ASSERT
707 		m->next = NULL;
708 #endif
709 		m = m_next;
710 	} while (nb_segs);
711 
712 done:
713 	sg->u = sg_u;
714 	sg->segs = i;
715 	segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
716 	/* Roundup extra dwords to multiple of 2 */
717 	segdw = (segdw >> 1) + (segdw & 0x1);
718 	/* Default dwords */
719 	segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
720 	send_hdr->w0.sizem1 = segdw - 1;
721 
722 #ifdef RTE_ENABLE_ASSERT
723 	rte_io_wmb();
724 #endif
725 	return segdw;
726 }
727 
728 static __rte_always_inline void
729 cn9k_nix_xmit_mseg_prep_lmt(uint64_t *cmd, void *lmt_addr, uint16_t segdw)
730 {
731 	roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
732 }
733 
734 static __rte_always_inline void
735 cn9k_nix_xmit_mseg_one(uint64_t *cmd, void *lmt_addr, rte_iova_t io_addr,
736 		       uint16_t segdw)
737 {
738 	uint64_t lmt_status;
739 
740 	do {
741 		roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
742 		lmt_status = roc_lmt_submit_ldeor(io_addr);
743 	} while (lmt_status == 0);
744 }
745 
746 static __rte_always_inline void
747 cn9k_nix_xmit_mseg_one_release(uint64_t *cmd, void *lmt_addr,
748 			       rte_iova_t io_addr, uint16_t segdw)
749 {
750 	uint64_t lmt_status;
751 
752 	rte_io_wmb();
753 	do {
754 		roc_lmt_mov_seg(lmt_addr, (const void *)cmd, segdw);
755 		lmt_status = roc_lmt_submit_ldeor(io_addr);
756 	} while (lmt_status == 0);
757 }
758 
759 static __rte_always_inline uint16_t
760 cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
761 		   uint64_t *cmd, const uint16_t flags)
762 {
763 	struct cn9k_eth_txq *txq = tx_queue;
764 	const rte_iova_t io_addr = txq->io_addr;
765 	uint64_t lso_tun_fmt = 0, mark_fmt = 0;
766 	void *lmt_addr = txq->lmt_addr;
767 	struct rte_mbuf *extm = NULL;
768 	uint8_t mark_flag = 0;
769 	uint16_t i;
770 
771 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
772 		handle_tx_completion_pkts(txq, 0);
773 
774 	NIX_XMIT_FC_OR_RETURN(txq, pkts);
775 
776 	cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
777 
778 	/* Perform header writes before barrier for TSO */
779 	if (flags & NIX_TX_OFFLOAD_TSO_F) {
780 		lso_tun_fmt = txq->lso_tun_fmt;
781 
782 		for (i = 0; i < pkts; i++)
783 			cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
784 	}
785 
786 	if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
787 		mark_fmt = txq->mark_fmt;
788 		mark_flag = txq->mark_flag;
789 	}
790 
791 	/* Lets commit any changes in the packet here as no further changes
792 	 * to the packet will be done unless no fast free is enabled.
793 	 */
794 	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
795 		rte_io_wmb();
796 
797 	for (i = 0; i < pkts; i++) {
798 		cn9k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt,
799 				      mark_flag, mark_fmt);
800 		cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,
801 					     flags);
802 		cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
803 	}
804 
805 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
806 		cn9k_nix_free_extmbuf(extm);
807 
808 	/* Reduce the cached count */
809 	txq->fc_cache_pkts -= pkts;
810 
811 	return pkts;
812 }
813 
814 static __rte_always_inline uint16_t
815 cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
816 			uint16_t pkts, uint64_t *cmd, const uint16_t flags)
817 {
818 	struct cn9k_eth_txq *txq = tx_queue;
819 	const rte_iova_t io_addr = txq->io_addr;
820 	uint64_t lso_tun_fmt = 0, mark_fmt = 0;
821 	void *lmt_addr = txq->lmt_addr;
822 	struct rte_mbuf *extm = NULL;
823 	uint8_t mark_flag = 0;
824 	uint16_t segdw;
825 	uint64_t i;
826 
827 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
828 		handle_tx_completion_pkts(txq, 0);
829 
830 	NIX_XMIT_FC_OR_RETURN(txq, pkts);
831 
832 	cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
833 
834 	/* Perform header writes before barrier for TSO */
835 	if (flags & NIX_TX_OFFLOAD_TSO_F) {
836 		lso_tun_fmt = txq->lso_tun_fmt;
837 
838 		for (i = 0; i < pkts; i++)
839 			cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
840 	}
841 
842 	if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
843 		mark_fmt = txq->mark_fmt;
844 		mark_flag = txq->mark_flag;
845 	}
846 
847 	/* Lets commit any changes in the packet here as no further changes
848 	 * to the packet will be done unless no fast free is enabled.
849 	 */
850 	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
851 		rte_io_wmb();
852 
853 	for (i = 0; i < pkts; i++) {
854 		cn9k_nix_xmit_prepare(txq, tx_pkts[i], &extm, cmd, flags, lso_tun_fmt,
855 				      mark_flag, mark_fmt);
856 		segdw = cn9k_nix_prepare_mseg(txq, tx_pkts[i], &extm, cmd, flags);
857 		cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,
858 					     segdw, flags);
859 		cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
860 	}
861 
862 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
863 		cn9k_nix_free_extmbuf(extm);
864 
865 	/* Reduce the cached count */
866 	txq->fc_cache_pkts -= pkts;
867 
868 	return pkts;
869 }
870 
871 #if defined(RTE_ARCH_ARM64)
872 
873 static __rte_always_inline void
874 cn9k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
875 		     union nix_send_ext_w0_u *w0, uint64_t ol_flags,
876 		     uint64_t flags)
877 {
878 	uint16_t lso_sb;
879 	uint64_t mask;
880 
881 	if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
882 		return;
883 
884 	mask = -(!w1->il3type);
885 	lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
886 
887 	w0->u |= BIT(14);
888 	w0->lso_sb = lso_sb;
889 	w0->lso_mps = m->tso_segsz;
890 	w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
891 	w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
892 
893 	/* Handle tunnel tso */
894 	if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
895 	    (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
896 		const uint8_t is_udp_tun =
897 			(CNXK_NIX_UDP_TUN_BITMASK >>
898 			 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
899 			0x1;
900 
901 		w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
902 		w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
903 		/* Update format for UDP tunneled packet */
904 		w0->lso_format += is_udp_tun ? 2 : 6;
905 
906 		w0->lso_format += !!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 1;
907 	}
908 }
909 
910 static __rte_always_inline uint8_t
911 cn9k_nix_prepare_mseg_vec_list(struct cn9k_eth_txq *txq,
912 			       struct rte_mbuf *m, struct rte_mbuf **extm, uint64_t *cmd,
913 			       struct nix_send_hdr_s *send_hdr,
914 			       union nix_send_sg_s *sg, const uint32_t flags)
915 {
916 	struct rte_mbuf *m_next, *cookie;
917 	uint64_t *slist, sg_u, aura;
918 	uint16_t nb_segs;
919 	uint64_t segdw;
920 	int i = 1;
921 
922 	send_hdr->w0.total = m->pkt_len;
923 	/* Clear sg->u header before use */
924 	sg->u &= 0xFC00000000000000;
925 	sg_u = sg->u;
926 	slist = &cmd[0];
927 
928 	sg_u = sg_u | ((uint64_t)m->data_len);
929 
930 	nb_segs = m->nb_segs - 1;
931 	m_next = m->next;
932 
933 	/* Set invert df if buffer is not to be freed by H/W */
934 	cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
935 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
936 		aura = send_hdr->w0.aura;
937 		sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << 55);
938 		send_hdr->w0.aura = aura;
939 	}
940 	/* Mark mempool object as "put" since it is freed by NIX */
941 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
942 	if (!(sg_u & (1ULL << 55)))
943 		RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
944 	rte_io_wmb();
945 #else
946 	RTE_SET_USED(cookie);
947 #endif
948 
949 #ifdef RTE_ENABLE_ASSERT
950 	m->next = NULL;
951 	m->nb_segs = 1;
952 #endif
953 	m = m_next;
954 	/* Fill mbuf segments */
955 	do {
956 		m_next = m->next;
957 		sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
958 		*slist = rte_mbuf_data_iova(m);
959 		cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
960 		/* Set invert df if buffer is not to be freed by H/W */
961 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
962 			sg_u |= (cn9k_nix_prefree_seg(m, extm, txq, send_hdr, &aura) << (i + 55));
963 			/* Mark mempool object as "put" since it is freed by NIX
964 			 */
965 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
966 		if (!(sg_u & (1ULL << (i + 55))))
967 			RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
968 		rte_io_wmb();
969 #endif
970 		slist++;
971 		i++;
972 		nb_segs--;
973 		if (i > 2 && nb_segs) {
974 			i = 0;
975 			/* Next SG subdesc */
976 			*(uint64_t *)slist = sg_u & 0xFC00000000000000;
977 			sg->u = sg_u;
978 			sg->segs = 3;
979 			sg = (union nix_send_sg_s *)slist;
980 			sg_u = sg->u;
981 			slist++;
982 		}
983 #ifdef RTE_ENABLE_ASSERT
984 		m->next = NULL;
985 #endif
986 		m = m_next;
987 	} while (nb_segs);
988 
989 	sg->u = sg_u;
990 	sg->segs = i;
991 	segdw = (uint64_t *)slist - (uint64_t *)&cmd[0];
992 
993 	segdw += 2;
994 	/* Roundup extra dwords to multiple of 2 */
995 	segdw = (segdw >> 1) + (segdw & 0x1);
996 	/* Default dwords */
997 	segdw += 1 + !!(flags & NIX_TX_NEED_EXT_HDR) +
998 		 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
999 	send_hdr->w0.sizem1 = segdw - 1;
1000 
1001 #ifdef RTE_ENABLE_ASSERT
1002 	rte_io_wmb();
1003 #endif
1004 	return segdw;
1005 }
1006 
1007 static __rte_always_inline uint8_t
1008 cn9k_nix_prepare_mseg_vec(struct cn9k_eth_txq *txq, struct rte_mbuf *m, struct rte_mbuf **extm,
1009 			  uint64_t *cmd, uint64x2_t *cmd0, uint64x2_t *cmd1, const uint32_t flags)
1010 {
1011 	struct nix_send_hdr_s send_hdr;
1012 	struct rte_mbuf *cookie;
1013 	union nix_send_sg_s sg;
1014 	uint64_t aura;
1015 	uint8_t ret;
1016 
1017 	if (m->nb_segs == 1) {
1018 		cookie = RTE_MBUF_DIRECT(m) ? m : rte_mbuf_from_indirect(m);
1019 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1020 			send_hdr.w0.u = vgetq_lane_u64(cmd0[0], 0);
1021 			send_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1);
1022 			sg.u = vgetq_lane_u64(cmd1[0], 0);
1023 			aura = send_hdr.w0.aura;
1024 			sg.u |= (cn9k_nix_prefree_seg(m, extm, txq, &send_hdr, &aura) << 55);
1025 			send_hdr.w0.aura = aura;
1026 			cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1027 			cmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0);
1028 			cmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1);
1029 		}
1030 
1031 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1032 		sg.u = vgetq_lane_u64(cmd1[0], 0);
1033 		if (!(sg.u & (1ULL << 55)))
1034 			RTE_MEMPOOL_CHECK_COOKIES(cookie->pool, (void **)&cookie, 1, 0);
1035 		rte_io_wmb();
1036 #else
1037 		RTE_SET_USED(cookie);
1038 #endif
1039 		return 2 + !!(flags & NIX_TX_NEED_EXT_HDR) +
1040 		       !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1041 	}
1042 
1043 	send_hdr.w0.u = vgetq_lane_u64(cmd0[0], 0);
1044 	send_hdr.w1.u = vgetq_lane_u64(cmd0[0], 1);
1045 	sg.u = vgetq_lane_u64(cmd1[0], 0);
1046 
1047 	ret = cn9k_nix_prepare_mseg_vec_list(txq, m, extm, cmd, &send_hdr, &sg, flags);
1048 
1049 	cmd0[0] = vsetq_lane_u64(send_hdr.w0.u, cmd0[0], 0);
1050 	cmd0[0] = vsetq_lane_u64(send_hdr.w1.u, cmd0[0], 1);
1051 	cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1052 	return ret;
1053 }
1054 
1055 #define NIX_DESCS_PER_LOOP 4
1056 
1057 static __rte_always_inline void
1058 cn9k_nix_xmit_pkts_mseg_vector(uint64x2_t *cmd0, uint64x2_t *cmd1,
1059 			       uint64x2_t *cmd2, uint64x2_t *cmd3,
1060 			       uint8_t *segdw,
1061 			       uint64_t slist[][CNXK_NIX_TX_MSEG_SG_DWORDS - 2],
1062 			       uint64_t *lmt_addr, rte_iova_t io_addr,
1063 			       const uint32_t flags)
1064 {
1065 	uint64_t lmt_status;
1066 	uint8_t j, off;
1067 
1068 	if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1069 	    !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1070 		/* No segments in 4 consecutive packets. */
1071 		if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1072 			do {
1073 				vst1q_u64(lmt_addr, cmd0[0]);
1074 				vst1q_u64(lmt_addr + 2, cmd1[0]);
1075 				vst1q_u64(lmt_addr + 4, cmd0[1]);
1076 				vst1q_u64(lmt_addr + 6, cmd1[1]);
1077 				vst1q_u64(lmt_addr + 8, cmd0[2]);
1078 				vst1q_u64(lmt_addr + 10, cmd1[2]);
1079 				vst1q_u64(lmt_addr + 12, cmd0[3]);
1080 				vst1q_u64(lmt_addr + 14, cmd1[3]);
1081 				lmt_status = roc_lmt_submit_ldeor(io_addr);
1082 			} while (lmt_status == 0);
1083 
1084 			return;
1085 		}
1086 	}
1087 
1088 	for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1089 		/* Fit consecutive packets in same LMTLINE. */
1090 		if ((segdw[j] + segdw[j + 1]) <= 8) {
1091 again0:
1092 			if ((flags & NIX_TX_NEED_EXT_HDR) &&
1093 			    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1094 				vst1q_u64(lmt_addr, cmd0[j]);
1095 				vst1q_u64(lmt_addr + 2, cmd2[j]);
1096 				vst1q_u64(lmt_addr + 4, cmd1[j]);
1097 				/* Copy segs */
1098 				off = segdw[j] - 4;
1099 				roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
1100 				off <<= 1;
1101 				vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1102 
1103 				vst1q_u64(lmt_addr + 8 + off, cmd0[j + 1]);
1104 				vst1q_u64(lmt_addr + 10 + off, cmd2[j + 1]);
1105 				vst1q_u64(lmt_addr + 12 + off, cmd1[j + 1]);
1106 				roc_lmt_mov_seg(lmt_addr + 14 + off,
1107 						slist[j + 1], segdw[j + 1] - 4);
1108 				off += ((segdw[j + 1] - 4) << 1);
1109 				vst1q_u64(lmt_addr + 14 + off, cmd3[j + 1]);
1110 			} else if (flags & NIX_TX_NEED_EXT_HDR) {
1111 				vst1q_u64(lmt_addr, cmd0[j]);
1112 				vst1q_u64(lmt_addr + 2, cmd2[j]);
1113 				vst1q_u64(lmt_addr + 4, cmd1[j]);
1114 				/* Copy segs */
1115 				off = segdw[j] - 3;
1116 				roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
1117 				off <<= 1;
1118 				vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1119 				vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1120 				vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1121 				roc_lmt_mov_seg(lmt_addr + 12 + off,
1122 						slist[j + 1], segdw[j + 1] - 3);
1123 			} else {
1124 				vst1q_u64(lmt_addr, cmd0[j]);
1125 				vst1q_u64(lmt_addr + 2, cmd1[j]);
1126 				/* Copy segs */
1127 				off = segdw[j] - 2;
1128 				roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
1129 				off <<= 1;
1130 				vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1131 				vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1132 				roc_lmt_mov_seg(lmt_addr + 8 + off,
1133 						slist[j + 1], segdw[j + 1] - 2);
1134 			}
1135 			lmt_status = roc_lmt_submit_ldeor(io_addr);
1136 			if (lmt_status == 0)
1137 				goto again0;
1138 			j += 2;
1139 		} else {
1140 again1:
1141 			if ((flags & NIX_TX_NEED_EXT_HDR) &&
1142 			    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1143 				vst1q_u64(lmt_addr, cmd0[j]);
1144 				vst1q_u64(lmt_addr + 2, cmd2[j]);
1145 				vst1q_u64(lmt_addr + 4, cmd1[j]);
1146 				/* Copy segs */
1147 				off = segdw[j] - 4;
1148 				roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
1149 				off <<= 1;
1150 				vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1151 			} else if (flags & NIX_TX_NEED_EXT_HDR) {
1152 				vst1q_u64(lmt_addr, cmd0[j]);
1153 				vst1q_u64(lmt_addr + 2, cmd2[j]);
1154 				vst1q_u64(lmt_addr + 4, cmd1[j]);
1155 				/* Copy segs */
1156 				off = segdw[j] - 3;
1157 				roc_lmt_mov_seg(lmt_addr + 6, slist[j], off);
1158 			} else {
1159 				vst1q_u64(lmt_addr, cmd0[j]);
1160 				vst1q_u64(lmt_addr + 2, cmd1[j]);
1161 				/* Copy segs */
1162 				off = segdw[j] - 2;
1163 				roc_lmt_mov_seg(lmt_addr + 4, slist[j], off);
1164 			}
1165 			lmt_status = roc_lmt_submit_ldeor(io_addr);
1166 			if (lmt_status == 0)
1167 				goto again1;
1168 			j += 1;
1169 		}
1170 	}
1171 }
1172 
1173 static __rte_always_inline uint16_t
1174 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
1175 			  uint16_t pkts, uint64_t *cmd, const uint16_t flags)
1176 {
1177 	uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1178 	uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1179 	uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1180 		cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1181 	uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
1182 	uint64x2_t senddesc01_w0, senddesc23_w0;
1183 	uint64x2_t senddesc01_w1, senddesc23_w1;
1184 	uint64x2_t sendext01_w0, sendext23_w0;
1185 	uint64x2_t sendext01_w1, sendext23_w1;
1186 	uint64x2_t sendmem01_w0, sendmem23_w0;
1187 	uint64x2_t sendmem01_w1, sendmem23_w1;
1188 	uint64x2_t sgdesc01_w0, sgdesc23_w0;
1189 	uint64x2_t sgdesc01_w1, sgdesc23_w1;
1190 	struct cn9k_eth_txq *txq = tx_queue;
1191 	uint64_t *lmt_addr = txq->lmt_addr;
1192 	rte_iova_t io_addr = txq->io_addr;
1193 	uint64x2_t ltypes01, ltypes23;
1194 	struct rte_mbuf *extm = NULL;
1195 	uint64x2_t xtmp128, ytmp128;
1196 	uint64x2_t xmask01, xmask23;
1197 	uint64_t lmt_status, i;
1198 	uint16_t pkts_left;
1199 
1200 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
1201 		handle_tx_completion_pkts(txq, 0);
1202 
1203 	NIX_XMIT_FC_OR_RETURN(txq, pkts);
1204 
1205 	pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
1206 	pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1207 
1208 	/* Reduce the cached count */
1209 	txq->fc_cache_pkts -= pkts;
1210 
1211 	/* Perform header writes before barrier for TSO */
1212 	if (flags & NIX_TX_OFFLOAD_TSO_F) {
1213 		for (i = 0; i < pkts; i++)
1214 			cn9k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1215 	}
1216 
1217 	/* Lets commit any changes in the packet here as no further changes
1218 	 * to the packet will be done unless no fast free is enabled.
1219 	 */
1220 	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
1221 		rte_io_wmb();
1222 
1223 	senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1224 	senddesc23_w0 = senddesc01_w0;
1225 
1226 	senddesc01_w1 = vdupq_n_u64(0);
1227 	senddesc23_w1 = senddesc01_w1;
1228 	sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1229 	sgdesc23_w0 = sgdesc01_w0;
1230 
1231 	if (flags & NIX_TX_NEED_EXT_HDR) {
1232 		if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1233 			sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1234 						   BIT_ULL(15));
1235 			sendmem01_w0 =
1236 				vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1237 					    (NIX_SENDMEMALG_SETTSTMP << 56));
1238 			sendmem23_w0 = sendmem01_w0;
1239 			sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1240 			sendmem23_w1 = sendmem01_w1;
1241 		} else {
1242 			sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1243 		}
1244 		sendext23_w0 = sendext01_w0;
1245 
1246 		if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1247 			sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1248 		else
1249 			sendext01_w1 = vdupq_n_u64(0);
1250 		sendext23_w1 = sendext01_w1;
1251 	}
1252 
1253 	for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
1254 		/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1255 		senddesc01_w0 =
1256 			vbicq_u64(senddesc01_w0, vdupq_n_u64(0x800FFFFFFFF));
1257 		sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1258 
1259 		senddesc23_w0 = senddesc01_w0;
1260 		sgdesc23_w0 = sgdesc01_w0;
1261 
1262 		/* Clear vlan enables. */
1263 		if (flags & NIX_TX_NEED_EXT_HDR) {
1264 			sendext01_w1 = vbicq_u64(sendext01_w1,
1265 						 vdupq_n_u64(0x3FFFF00FFFF00));
1266 			sendext23_w1 = sendext01_w1;
1267 		}
1268 
1269 		if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1270 			/* Reset send mem alg to SETTSTMP from SUB*/
1271 			sendmem01_w0 = vbicq_u64(sendmem01_w0,
1272 						 vdupq_n_u64(BIT_ULL(59)));
1273 			/* Reset send mem address to default. */
1274 			sendmem01_w1 =
1275 				vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1276 			sendmem23_w0 = sendmem01_w0;
1277 			sendmem23_w1 = sendmem01_w1;
1278 		}
1279 
1280 		if (flags & NIX_TX_OFFLOAD_TSO_F) {
1281 			/* Clear the LSO enable bit. */
1282 			sendext01_w0 = vbicq_u64(sendext01_w0,
1283 						 vdupq_n_u64(BIT_ULL(14)));
1284 			sendext23_w0 = sendext01_w0;
1285 		}
1286 
1287 		/* Move mbufs to iova */
1288 		mbuf0 = (uint64_t *)tx_pkts[0];
1289 		mbuf1 = (uint64_t *)tx_pkts[1];
1290 		mbuf2 = (uint64_t *)tx_pkts[2];
1291 		mbuf3 = (uint64_t *)tx_pkts[3];
1292 
1293 		/*
1294 		 * Get mbuf's, olflags, iova, pktlen, dataoff
1295 		 * dataoff_iovaX.D[0] = iova,
1296 		 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1297 		 * len_olflagsX.D[0] = ol_flags,
1298 		 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1299 		 */
1300 		dataoff_iova0 =
1301 			vsetq_lane_u64(((struct rte_mbuf *)mbuf0)->data_off, vld1q_u64(mbuf0), 1);
1302 		len_olflags0 = vld1q_u64(mbuf0 + 3);
1303 		dataoff_iova1 =
1304 			vsetq_lane_u64(((struct rte_mbuf *)mbuf1)->data_off, vld1q_u64(mbuf1), 1);
1305 		len_olflags1 = vld1q_u64(mbuf1 + 3);
1306 		dataoff_iova2 =
1307 			vsetq_lane_u64(((struct rte_mbuf *)mbuf2)->data_off, vld1q_u64(mbuf2), 1);
1308 		len_olflags2 = vld1q_u64(mbuf2 + 3);
1309 		dataoff_iova3 =
1310 			vsetq_lane_u64(((struct rte_mbuf *)mbuf3)->data_off, vld1q_u64(mbuf3), 1);
1311 		len_olflags3 = vld1q_u64(mbuf3 + 3);
1312 
1313 		/* Move mbufs to point pool */
1314 		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 + offsetof(struct rte_mbuf, pool));
1315 		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 + offsetof(struct rte_mbuf, pool));
1316 		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 + offsetof(struct rte_mbuf, pool));
1317 		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 + offsetof(struct rte_mbuf, pool));
1318 
1319 		if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1320 			     NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1321 			/* Get tx_offload for ol2, ol3, l2, l3 lengths */
1322 			/*
1323 			 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1324 			 * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1325 			 */
1326 
1327 			asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1328 				     : [a] "+w"(senddesc01_w1)
1329 				     : [in] "r"(mbuf0 + 2)
1330 				     : "memory");
1331 
1332 			asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1333 				     : [a] "+w"(senddesc01_w1)
1334 				     : [in] "r"(mbuf1 + 2)
1335 				     : "memory");
1336 
1337 			asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1338 				     : [b] "+w"(senddesc23_w1)
1339 				     : [in] "r"(mbuf2 + 2)
1340 				     : "memory");
1341 
1342 			asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1343 				     : [b] "+w"(senddesc23_w1)
1344 				     : [in] "r"(mbuf3 + 2)
1345 				     : "memory");
1346 
1347 			/* Get pool pointer alone */
1348 			mbuf0 = (uint64_t *)*mbuf0;
1349 			mbuf1 = (uint64_t *)*mbuf1;
1350 			mbuf2 = (uint64_t *)*mbuf2;
1351 			mbuf3 = (uint64_t *)*mbuf3;
1352 		} else {
1353 			/* Get pool pointer alone */
1354 			mbuf0 = (uint64_t *)*mbuf0;
1355 			mbuf1 = (uint64_t *)*mbuf1;
1356 			mbuf2 = (uint64_t *)*mbuf2;
1357 			mbuf3 = (uint64_t *)*mbuf3;
1358 		}
1359 
1360 		const uint8x16_t shuf_mask2 = {
1361 			0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1362 			0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1363 		};
1364 		xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1365 		ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1366 
1367 		/*
1368 		 * Pick only 16 bits of pktlen preset at bits 63:32
1369 		 * and place them at bits 15:0.
1370 		 */
1371 		xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1372 		ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1373 
1374 		/* Add pairwise to get dataoff + iova in sgdesc_w1 */
1375 		sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1376 		sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1377 
1378 		/* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1379 		 * pktlen at 15:0 position.
1380 		 */
1381 		sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1382 		sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1383 		senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1384 		senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1385 
1386 		/* Move mbuf to point to pool_id. */
1387 		mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1388 				     offsetof(struct rte_mempool, pool_id));
1389 		mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1390 				     offsetof(struct rte_mempool, pool_id));
1391 		mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1392 				     offsetof(struct rte_mempool, pool_id));
1393 		mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1394 				     offsetof(struct rte_mempool, pool_id));
1395 
1396 		if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1397 		    !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1398 			/*
1399 			 * Lookup table to translate ol_flags to
1400 			 * il3/il4 types. But we still use ol3/ol4 types in
1401 			 * senddesc_w1 as only one header processing is enabled.
1402 			 */
1403 			const uint8x16_t tbl = {
1404 				/* [0-15] = il4type:il3type */
1405 				0x04, /* none (IPv6 assumed) */
1406 				0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1407 				0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1408 				0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1409 				0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1410 				0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1411 				0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1412 				0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1413 				0x02, /* RTE_MBUF_F_TX_IPV4  */
1414 				0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1415 				0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1416 				0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1417 				0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1418 				0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1419 				       * RTE_MBUF_F_TX_TCP_CKSUM
1420 				       */
1421 				0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1422 				       * RTE_MBUF_F_TX_SCTP_CKSUM
1423 				       */
1424 				0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1425 				       * RTE_MBUF_F_TX_UDP_CKSUM
1426 				       */
1427 			};
1428 
1429 			/* Extract olflags to translate to iltypes */
1430 			xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1431 			ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1432 
1433 			/*
1434 			 * E(47):L3_LEN(9):L2_LEN(7+z)
1435 			 * E(47):L3_LEN(9):L2_LEN(7+z)
1436 			 */
1437 			senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1438 			senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1439 
1440 			/* Move OLFLAGS bits 55:52 to 51:48
1441 			 * with zeros preprended on the byte and rest
1442 			 * don't care
1443 			 */
1444 			xtmp128 = vshrq_n_u8(xtmp128, 4);
1445 			ytmp128 = vshrq_n_u8(ytmp128, 4);
1446 			/*
1447 			 * E(48):L3_LEN(8):L2_LEN(z+7)
1448 			 * E(48):L3_LEN(8):L2_LEN(z+7)
1449 			 */
1450 			const int8x16_t tshft3 = {
1451 				-1, 0, 8, 8, 8, 8, 8, 8,
1452 				-1, 0, 8, 8, 8, 8, 8, 8,
1453 			};
1454 
1455 			senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1456 			senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1457 
1458 			/* Do the lookup */
1459 			ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1460 			ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1461 
1462 			/* Pick only relevant fields i.e Bit 48:55 of iltype
1463 			 * and place it in ol3/ol4type of senddesc_w1
1464 			 */
1465 			const uint8x16_t shuf_mask0 = {
1466 				0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1467 				0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1468 			};
1469 
1470 			ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1471 			ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1472 
1473 			/* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1474 			 * a [E(32):E(16):OL3(8):OL2(8)]
1475 			 * a = a + (a << 8)
1476 			 * a [E(32):E(16):(OL3+OL2):OL2]
1477 			 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1478 			 */
1479 			senddesc01_w1 = vaddq_u8(senddesc01_w1,
1480 						 vshlq_n_u16(senddesc01_w1, 8));
1481 			senddesc23_w1 = vaddq_u8(senddesc23_w1,
1482 						 vshlq_n_u16(senddesc23_w1, 8));
1483 
1484 			/* Move ltypes to senddesc*_w1 */
1485 			senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1486 			senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1487 		} else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1488 			   (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1489 			/*
1490 			 * Lookup table to translate ol_flags to
1491 			 * ol3/ol4 types.
1492 			 */
1493 
1494 			const uint8x16_t tbl = {
1495 				/* [0-15] = ol4type:ol3type */
1496 				0x00, /* none */
1497 				0x03, /* OUTER_IP_CKSUM */
1498 				0x02, /* OUTER_IPV4 */
1499 				0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1500 				0x04, /* OUTER_IPV6 */
1501 				0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1502 				0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1503 				0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1504 				       * OUTER_IP_CKSUM
1505 				       */
1506 				0x00, /* OUTER_UDP_CKSUM */
1507 				0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
1508 				0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
1509 				0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
1510 				       * OUTER_IP_CKSUM
1511 				       */
1512 				0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
1513 				0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1514 				       * OUTER_IP_CKSUM
1515 				       */
1516 				0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1517 				       * OUTER_IPV4
1518 				       */
1519 				0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1520 				       * OUTER_IPV4 | OUTER_IP_CKSUM
1521 				       */
1522 			};
1523 
1524 			/* Extract olflags to translate to iltypes */
1525 			xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1526 			ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1527 
1528 			/*
1529 			 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1530 			 * E(47):OL3_LEN(9):OL2_LEN(7+z)
1531 			 */
1532 			const uint8x16_t shuf_mask5 = {
1533 				0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1534 				0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1535 			};
1536 			senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1537 			senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1538 
1539 			/* Extract outer ol flags only */
1540 			const uint64x2_t o_cksum_mask = {
1541 				0x1C00020000000000,
1542 				0x1C00020000000000,
1543 			};
1544 
1545 			xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
1546 			ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
1547 
1548 			/* Extract OUTER_UDP_CKSUM bit 41 and
1549 			 * move it to bit 61
1550 			 */
1551 
1552 			xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1553 			ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1554 
1555 			/* Shift oltype by 2 to start nibble from BIT(56)
1556 			 * instead of BIT(58)
1557 			 */
1558 			xtmp128 = vshrq_n_u8(xtmp128, 2);
1559 			ytmp128 = vshrq_n_u8(ytmp128, 2);
1560 			/*
1561 			 * E(48):L3_LEN(8):L2_LEN(z+7)
1562 			 * E(48):L3_LEN(8):L2_LEN(z+7)
1563 			 */
1564 			const int8x16_t tshft3 = {
1565 				-1, 0, 8, 8, 8, 8, 8, 8,
1566 				-1, 0, 8, 8, 8, 8, 8, 8,
1567 			};
1568 
1569 			senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1570 			senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1571 
1572 			/* Do the lookup */
1573 			ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1574 			ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1575 
1576 			/* Pick only relevant fields i.e Bit 56:63 of oltype
1577 			 * and place it in ol3/ol4type of senddesc_w1
1578 			 */
1579 			const uint8x16_t shuf_mask0 = {
1580 				0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
1581 				0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
1582 			};
1583 
1584 			ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1585 			ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1586 
1587 			/* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1588 			 * a [E(32):E(16):OL3(8):OL2(8)]
1589 			 * a = a + (a << 8)
1590 			 * a [E(32):E(16):(OL3+OL2):OL2]
1591 			 * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1592 			 */
1593 			senddesc01_w1 = vaddq_u8(senddesc01_w1,
1594 						 vshlq_n_u16(senddesc01_w1, 8));
1595 			senddesc23_w1 = vaddq_u8(senddesc23_w1,
1596 						 vshlq_n_u16(senddesc23_w1, 8));
1597 
1598 			/* Move ltypes to senddesc*_w1 */
1599 			senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1600 			senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1601 		} else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1602 			   (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1603 			/* Lookup table to translate ol_flags to
1604 			 * ol4type, ol3type, il4type, il3type of senddesc_w1
1605 			 */
1606 			const uint8x16x2_t tbl = {{
1607 				{
1608 					/* [0-15] = il4type:il3type */
1609 					0x04, /* none (IPv6) */
1610 					0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
1611 					0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
1612 					0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
1613 					0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1614 					0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
1615 					       * RTE_MBUF_F_TX_TCP_CKSUM
1616 					       */
1617 					0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
1618 					       * RTE_MBUF_F_TX_SCTP_CKSUM
1619 					       */
1620 					0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
1621 					       * RTE_MBUF_F_TX_UDP_CKSUM
1622 					       */
1623 					0x02, /* RTE_MBUF_F_TX_IPV4 */
1624 					0x12, /* RTE_MBUF_F_TX_IPV4 |
1625 					       * RTE_MBUF_F_TX_TCP_CKSUM
1626 					       */
1627 					0x22, /* RTE_MBUF_F_TX_IPV4 |
1628 					       * RTE_MBUF_F_TX_SCTP_CKSUM
1629 					       */
1630 					0x32, /* RTE_MBUF_F_TX_IPV4 |
1631 					       * RTE_MBUF_F_TX_UDP_CKSUM
1632 					       */
1633 					0x03, /* RTE_MBUF_F_TX_IPV4 |
1634 					       * RTE_MBUF_F_TX_IP_CKSUM
1635 					       */
1636 					0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1637 					       * RTE_MBUF_F_TX_TCP_CKSUM
1638 					       */
1639 					0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1640 					       * RTE_MBUF_F_TX_SCTP_CKSUM
1641 					       */
1642 					0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1643 					       * RTE_MBUF_F_TX_UDP_CKSUM
1644 					       */
1645 				},
1646 
1647 				{
1648 					/* [16-31] = ol4type:ol3type */
1649 					0x00, /* none */
1650 					0x03, /* OUTER_IP_CKSUM */
1651 					0x02, /* OUTER_IPV4 */
1652 					0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
1653 					0x04, /* OUTER_IPV6 */
1654 					0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
1655 					0x00, /* OUTER_IPV6 | OUTER_IPV4 */
1656 					0x00, /* OUTER_IPV6 | OUTER_IPV4 |
1657 					       * OUTER_IP_CKSUM
1658 					       */
1659 					0x00, /* OUTER_UDP_CKSUM */
1660 					0x33, /* OUTER_UDP_CKSUM |
1661 					       * OUTER_IP_CKSUM
1662 					       */
1663 					0x32, /* OUTER_UDP_CKSUM |
1664 					       * OUTER_IPV4
1665 					       */
1666 					0x33, /* OUTER_UDP_CKSUM |
1667 					       * OUTER_IPV4 | OUTER_IP_CKSUM
1668 					       */
1669 					0x34, /* OUTER_UDP_CKSUM |
1670 					       * OUTER_IPV6
1671 					       */
1672 					0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1673 					       * OUTER_IP_CKSUM
1674 					       */
1675 					0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1676 					       * OUTER_IPV4
1677 					       */
1678 					0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
1679 					       * OUTER_IPV4 | OUTER_IP_CKSUM
1680 					       */
1681 				},
1682 			}};
1683 
1684 			/* Extract olflags to translate to oltype & iltype */
1685 			xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1686 			ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1687 
1688 			/*
1689 			 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1690 			 * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
1691 			 */
1692 			const uint32x4_t tshft_4 = {
1693 				1,
1694 				0,
1695 				1,
1696 				0,
1697 			};
1698 			senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
1699 			senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
1700 
1701 			/*
1702 			 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1703 			 * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
1704 			 */
1705 			const uint8x16_t shuf_mask5 = {
1706 				0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
1707 				0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1708 			};
1709 			senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
1710 			senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
1711 
1712 			/* Extract outer and inner header ol_flags */
1713 			const uint64x2_t oi_cksum_mask = {
1714 				0x1CF0020000000000,
1715 				0x1CF0020000000000,
1716 			};
1717 
1718 			xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
1719 			ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
1720 
1721 			/* Extract OUTER_UDP_CKSUM bit 41 and
1722 			 * move it to bit 61
1723 			 */
1724 
1725 			xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
1726 			ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
1727 
1728 			/* Shift right oltype by 2 and iltype by 4
1729 			 * to start oltype nibble from BIT(58)
1730 			 * instead of BIT(56) and iltype nibble from BIT(48)
1731 			 * instead of BIT(52).
1732 			 */
1733 			const int8x16_t tshft5 = {
1734 				8, 8, 8, 8, 8, 8, -4, -2,
1735 				8, 8, 8, 8, 8, 8, -4, -2,
1736 			};
1737 
1738 			xtmp128 = vshlq_u8(xtmp128, tshft5);
1739 			ytmp128 = vshlq_u8(ytmp128, tshft5);
1740 			/*
1741 			 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1742 			 * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
1743 			 */
1744 			const int8x16_t tshft3 = {
1745 				-1, 0, -1, 0, 0, 0, 0, 0,
1746 				-1, 0, -1, 0, 0, 0, 0, 0,
1747 			};
1748 
1749 			senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1750 			senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1751 
1752 			/* Mark Bit(4) of oltype */
1753 			const uint64x2_t oi_cksum_mask2 = {
1754 				0x1000000000000000,
1755 				0x1000000000000000,
1756 			};
1757 
1758 			xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
1759 			ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
1760 
1761 			/* Do the lookup */
1762 			ltypes01 = vqtbl2q_u8(tbl, xtmp128);
1763 			ltypes23 = vqtbl2q_u8(tbl, ytmp128);
1764 
1765 			/* Pick only relevant fields i.e Bit 48:55 of iltype and
1766 			 * Bit 56:63 of oltype and place it in corresponding
1767 			 * place in senddesc_w1.
1768 			 */
1769 			const uint8x16_t shuf_mask0 = {
1770 				0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
1771 				0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
1772 			};
1773 
1774 			ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1775 			ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1776 
1777 			/* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
1778 			 * l3len, l2len, ol3len, ol2len.
1779 			 * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
1780 			 * a = a + (a << 8)
1781 			 * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
1782 			 * a = a + (a << 16)
1783 			 * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
1784 			 * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
1785 			 */
1786 			senddesc01_w1 = vaddq_u8(senddesc01_w1,
1787 						 vshlq_n_u32(senddesc01_w1, 8));
1788 			senddesc23_w1 = vaddq_u8(senddesc23_w1,
1789 						 vshlq_n_u32(senddesc23_w1, 8));
1790 
1791 			/* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
1792 			senddesc01_w1 = vaddq_u8(
1793 				senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
1794 			senddesc23_w1 = vaddq_u8(
1795 				senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
1796 
1797 			/* Move ltypes to senddesc*_w1 */
1798 			senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1799 			senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1800 		}
1801 
1802 		xmask01 = vdupq_n_u64(0);
1803 		xmask23 = xmask01;
1804 		asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
1805 			     : [a] "+w"(xmask01)
1806 			     : [in] "r"(mbuf0)
1807 			     : "memory");
1808 
1809 		asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
1810 			     : [a] "+w"(xmask01)
1811 			     : [in] "r"(mbuf1)
1812 			     : "memory");
1813 
1814 		asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
1815 			     : [b] "+w"(xmask23)
1816 			     : [in] "r"(mbuf2)
1817 			     : "memory");
1818 
1819 		asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
1820 			     : [b] "+w"(xmask23)
1821 			     : [in] "r"(mbuf3)
1822 			     : "memory");
1823 		xmask01 = vshlq_n_u64(xmask01, 20);
1824 		xmask23 = vshlq_n_u64(xmask23, 20);
1825 
1826 		senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
1827 		senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
1828 
1829 		if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1830 			/* Tx ol_flag for vlan. */
1831 			const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
1832 			/* Bit enable for VLAN1 */
1833 			const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
1834 			/* Tx ol_flag for QnQ. */
1835 			const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
1836 			/* Bit enable for VLAN0 */
1837 			const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
1838 			/* Load vlan values from packet. outer is VLAN 0 */
1839 			uint64x2_t ext01 = {
1840 				((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
1841 					((uint64_t)tx_pkts[0]->vlan_tci) << 32,
1842 				((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
1843 					((uint64_t)tx_pkts[1]->vlan_tci) << 32,
1844 			};
1845 			uint64x2_t ext23 = {
1846 				((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
1847 					((uint64_t)tx_pkts[2]->vlan_tci) << 32,
1848 				((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
1849 					((uint64_t)tx_pkts[3]->vlan_tci) << 32,
1850 			};
1851 
1852 			/* Get ol_flags of the packets. */
1853 			xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1854 			ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1855 
1856 			/* ORR vlan outer/inner values into cmd. */
1857 			sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
1858 			sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
1859 
1860 			/* Test for offload enable bits and generate masks. */
1861 			xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
1862 						      mlv),
1863 					    vandq_u64(vtstq_u64(xtmp128, olq),
1864 						      mlq));
1865 			ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
1866 						      mlv),
1867 					    vandq_u64(vtstq_u64(ytmp128, olq),
1868 						      mlq));
1869 
1870 			/* Set vlan enable bits into cmd based on mask. */
1871 			sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
1872 			sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
1873 		}
1874 
1875 		if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1876 			/* Tx ol_flag for timestamp. */
1877 			const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
1878 						RTE_MBUF_F_TX_IEEE1588_TMST};
1879 			/* Set send mem alg to SUB. */
1880 			const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
1881 			/* Increment send mem address by 8. */
1882 			const uint64x2_t addr = {0x8, 0x8};
1883 
1884 			xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1885 			ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1886 
1887 			/* Check if timestamp is requested and generate inverted
1888 			 * mask as we need not make any changes to default cmd
1889 			 * value.
1890 			 */
1891 			xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
1892 			ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
1893 
1894 			/* Change send mem address to an 8 byte offset when
1895 			 * TSTMP is disabled.
1896 			 */
1897 			sendmem01_w1 = vaddq_u64(sendmem01_w1,
1898 						 vandq_u64(xtmp128, addr));
1899 			sendmem23_w1 = vaddq_u64(sendmem23_w1,
1900 						 vandq_u64(ytmp128, addr));
1901 			/* Change send mem alg to SUB when TSTMP is disabled. */
1902 			sendmem01_w0 = vorrq_u64(sendmem01_w0,
1903 						 vandq_u64(xtmp128, alg));
1904 			sendmem23_w0 = vorrq_u64(sendmem23_w0,
1905 						 vandq_u64(ytmp128, alg));
1906 
1907 			cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
1908 			cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
1909 			cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
1910 			cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
1911 		}
1912 
1913 		if (flags & NIX_TX_OFFLOAD_TSO_F) {
1914 			uint64_t sx_w0[NIX_DESCS_PER_LOOP];
1915 			uint64_t sd_w1[NIX_DESCS_PER_LOOP];
1916 
1917 			/* Extract SD W1 as we need to set L4 types. */
1918 			vst1q_u64(sd_w1, senddesc01_w1);
1919 			vst1q_u64(sd_w1 + 2, senddesc23_w1);
1920 
1921 			/* Extract SX W0 as we need to set LSO fields. */
1922 			vst1q_u64(sx_w0, sendext01_w0);
1923 			vst1q_u64(sx_w0 + 2, sendext23_w0);
1924 
1925 			/* Extract ol_flags. */
1926 			xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1927 			ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1928 
1929 			/* Prepare individual mbufs. */
1930 			cn9k_nix_prepare_tso(tx_pkts[0],
1931 				(union nix_send_hdr_w1_u *)&sd_w1[0],
1932 				(union nix_send_ext_w0_u *)&sx_w0[0],
1933 				vgetq_lane_u64(xtmp128, 0), flags);
1934 
1935 			cn9k_nix_prepare_tso(tx_pkts[1],
1936 				(union nix_send_hdr_w1_u *)&sd_w1[1],
1937 				(union nix_send_ext_w0_u *)&sx_w0[1],
1938 				vgetq_lane_u64(xtmp128, 1), flags);
1939 
1940 			cn9k_nix_prepare_tso(tx_pkts[2],
1941 				(union nix_send_hdr_w1_u *)&sd_w1[2],
1942 				(union nix_send_ext_w0_u *)&sx_w0[2],
1943 				vgetq_lane_u64(ytmp128, 0), flags);
1944 
1945 			cn9k_nix_prepare_tso(tx_pkts[3],
1946 				(union nix_send_hdr_w1_u *)&sd_w1[3],
1947 				(union nix_send_ext_w0_u *)&sx_w0[3],
1948 				vgetq_lane_u64(ytmp128, 1), flags);
1949 
1950 			senddesc01_w1 = vld1q_u64(sd_w1);
1951 			senddesc23_w1 = vld1q_u64(sd_w1 + 2);
1952 
1953 			sendext01_w0 = vld1q_u64(sx_w0);
1954 			sendext23_w0 = vld1q_u64(sx_w0 + 2);
1955 		}
1956 
1957 		if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1958 		    !(flags & NIX_TX_MULTI_SEG_F)) {
1959 			/* Set don't free bit if reference count > 1 */
1960 			cn9k_nix_prefree_seg_vec(tx_pkts, &extm, txq, &senddesc01_w0,
1961 						 &senddesc23_w0, &senddesc01_w1, &senddesc23_w1);
1962 			/* Ensuring mbuf fields which got updated in
1963 			 * cnxk_nix_prefree_seg are written before LMTST.
1964 			 */
1965 			rte_io_wmb();
1966 		} else if (!(flags & NIX_TX_MULTI_SEG_F)) {
1967 			/* Move mbufs to iova */
1968 			mbuf0 = (uint64_t *)tx_pkts[0];
1969 			mbuf1 = (uint64_t *)tx_pkts[1];
1970 			mbuf2 = (uint64_t *)tx_pkts[2];
1971 			mbuf3 = (uint64_t *)tx_pkts[3];
1972 
1973 			/* Mark mempool object as "put" since
1974 			 * it is freed by NIX
1975 			 */
1976 			RTE_MEMPOOL_CHECK_COOKIES(
1977 				((struct rte_mbuf *)mbuf0)->pool,
1978 				(void **)&mbuf0, 1, 0);
1979 
1980 			RTE_MEMPOOL_CHECK_COOKIES(
1981 				((struct rte_mbuf *)mbuf1)->pool,
1982 				(void **)&mbuf1, 1, 0);
1983 
1984 			RTE_MEMPOOL_CHECK_COOKIES(
1985 				((struct rte_mbuf *)mbuf2)->pool,
1986 				(void **)&mbuf2, 1, 0);
1987 
1988 			RTE_MEMPOOL_CHECK_COOKIES(
1989 				((struct rte_mbuf *)mbuf3)->pool,
1990 				(void **)&mbuf3, 1, 0);
1991 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1992 			rte_io_wmb();
1993 #endif
1994 		}
1995 
1996 		/* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
1997 		cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
1998 		cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
1999 		cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2000 		cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2001 
2002 		cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2003 		cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2004 		cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2005 		cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2006 
2007 		if (flags & NIX_TX_NEED_EXT_HDR) {
2008 			cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2009 			cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2010 			cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2011 			cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2012 		}
2013 
2014 		if (flags & NIX_TX_MULTI_SEG_F) {
2015 			uint64_t seg_list[NIX_DESCS_PER_LOOP]
2016 					 [CNXK_NIX_TX_MSEG_SG_DWORDS - 2];
2017 			uint8_t j, segdw[NIX_DESCS_PER_LOOP + 1];
2018 
2019 			/* Build mseg list for each packet individually. */
2020 			for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
2021 				segdw[j] = cn9k_nix_prepare_mseg_vec(txq,
2022 							tx_pkts[j], &extm,
2023 							seg_list[j], &cmd0[j],
2024 							&cmd1[j], flags);
2025 			segdw[4] = 8;
2026 
2027 			/* Commit all changes to mbuf before LMTST. */
2028 			if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
2029 				rte_io_wmb();
2030 
2031 			cn9k_nix_xmit_pkts_mseg_vector(cmd0, cmd1, cmd2, cmd3,
2032 						       segdw, seg_list,
2033 						       lmt_addr, io_addr,
2034 						       flags);
2035 		} else if (flags & NIX_TX_NEED_EXT_HDR) {
2036 			/* With ext header in the command we can no longer send
2037 			 * all 4 packets together since LMTLINE is 128bytes.
2038 			 * Split and Tx twice.
2039 			 */
2040 			do {
2041 				if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2042 					vst1q_u64(lmt_addr, cmd0[0]);
2043 					vst1q_u64(lmt_addr + 2, cmd2[0]);
2044 					vst1q_u64(lmt_addr + 4, cmd1[0]);
2045 					vst1q_u64(lmt_addr + 6, cmd3[0]);
2046 					vst1q_u64(lmt_addr + 8, cmd0[1]);
2047 					vst1q_u64(lmt_addr + 10, cmd2[1]);
2048 					vst1q_u64(lmt_addr + 12, cmd1[1]);
2049 					vst1q_u64(lmt_addr + 14, cmd3[1]);
2050 				} else {
2051 					vst1q_u64(lmt_addr, cmd0[0]);
2052 					vst1q_u64(lmt_addr + 2, cmd2[0]);
2053 					vst1q_u64(lmt_addr + 4, cmd1[0]);
2054 					vst1q_u64(lmt_addr + 6, cmd0[1]);
2055 					vst1q_u64(lmt_addr + 8, cmd2[1]);
2056 					vst1q_u64(lmt_addr + 10, cmd1[1]);
2057 				}
2058 				lmt_status = roc_lmt_submit_ldeor(io_addr);
2059 			} while (lmt_status == 0);
2060 
2061 			do {
2062 				if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2063 					vst1q_u64(lmt_addr, cmd0[2]);
2064 					vst1q_u64(lmt_addr + 2, cmd2[2]);
2065 					vst1q_u64(lmt_addr + 4, cmd1[2]);
2066 					vst1q_u64(lmt_addr + 6, cmd3[2]);
2067 					vst1q_u64(lmt_addr + 8, cmd0[3]);
2068 					vst1q_u64(lmt_addr + 10, cmd2[3]);
2069 					vst1q_u64(lmt_addr + 12, cmd1[3]);
2070 					vst1q_u64(lmt_addr + 14, cmd3[3]);
2071 				} else {
2072 					vst1q_u64(lmt_addr, cmd0[2]);
2073 					vst1q_u64(lmt_addr + 2, cmd2[2]);
2074 					vst1q_u64(lmt_addr + 4, cmd1[2]);
2075 					vst1q_u64(lmt_addr + 6, cmd0[3]);
2076 					vst1q_u64(lmt_addr + 8, cmd2[3]);
2077 					vst1q_u64(lmt_addr + 10, cmd1[3]);
2078 				}
2079 				lmt_status = roc_lmt_submit_ldeor(io_addr);
2080 			} while (lmt_status == 0);
2081 		} else {
2082 			do {
2083 				vst1q_u64(lmt_addr, cmd0[0]);
2084 				vst1q_u64(lmt_addr + 2, cmd1[0]);
2085 				vst1q_u64(lmt_addr + 4, cmd0[1]);
2086 				vst1q_u64(lmt_addr + 6, cmd1[1]);
2087 				vst1q_u64(lmt_addr + 8, cmd0[2]);
2088 				vst1q_u64(lmt_addr + 10, cmd1[2]);
2089 				vst1q_u64(lmt_addr + 12, cmd0[3]);
2090 				vst1q_u64(lmt_addr + 14, cmd1[3]);
2091 				lmt_status = roc_lmt_submit_ldeor(io_addr);
2092 			} while (lmt_status == 0);
2093 		}
2094 		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2095 	}
2096 
2097 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && !txq->tx_compl.ena)
2098 		cn9k_nix_free_extmbuf(extm);
2099 
2100 	if (unlikely(pkts_left)) {
2101 		if (flags & NIX_TX_MULTI_SEG_F)
2102 			pkts += cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
2103 							pkts_left, cmd, flags);
2104 		else
2105 			pkts += cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts_left,
2106 						   cmd, flags);
2107 	}
2108 
2109 	return pkts;
2110 }
2111 
2112 #else
2113 static __rte_always_inline uint16_t
2114 cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
2115 			  uint16_t pkts, uint64_t *cmd, const uint16_t flags)
2116 {
2117 	RTE_SET_USED(tx_queue);
2118 	RTE_SET_USED(tx_pkts);
2119 	RTE_SET_USED(pkts);
2120 	RTE_SET_USED(cmd);
2121 	RTE_SET_USED(flags);
2122 	return 0;
2123 }
2124 #endif
2125 
2126 #define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2127 #define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2128 #define VLAN_F	     NIX_TX_OFFLOAD_VLAN_QINQ_F
2129 #define NOFF_F	     NIX_TX_OFFLOAD_MBUF_NOFF_F
2130 #define TSO_F	     NIX_TX_OFFLOAD_TSO_F
2131 #define TSP_F	     NIX_TX_OFFLOAD_TSTAMP_F
2132 #define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2133 
2134 /* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2135 #define NIX_TX_FASTPATH_MODES_0_15                                             \
2136 	T(no_offload, 6, NIX_TX_OFFLOAD_NONE)                                  \
2137 	T(l3l4csum, 6, L3L4CSUM_F)                                             \
2138 	T(ol3ol4csum, 6, OL3OL4CSUM_F)                                         \
2139 	T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F)                   \
2140 	T(vlan, 6, VLAN_F)                                                     \
2141 	T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
2142 	T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
2143 	T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2144 	T(noff, 6, NOFF_F)                                                     \
2145 	T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F)                               \
2146 	T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F)                           \
2147 	T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2148 	T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
2149 	T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2150 	T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
2151 	T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
2152 	  NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2153 
2154 #define NIX_TX_FASTPATH_MODES_16_31                                            \
2155 	T(tso, 6, TSO_F)                                                       \
2156 	T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
2157 	T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
2158 	T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
2159 	T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
2160 	T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
2161 	T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
2162 	T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2163 	  TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2164 	T(tso_noff, 6, TSO_F | NOFF_F)                                         \
2165 	T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
2166 	T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
2167 	T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
2168 	  TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2169 	T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
2170 	T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
2171 	T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2172 	T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2173 	  TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2174 
2175 #define NIX_TX_FASTPATH_MODES_32_47                                            \
2176 	T(ts, 8, TSP_F)                                                        \
2177 	T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
2178 	T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
2179 	T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2180 	T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
2181 	T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
2182 	T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2183 	T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
2184 	  TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2185 	T(ts_noff, 8, TSP_F | NOFF_F)                                          \
2186 	T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
2187 	T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2188 	T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
2189 	  TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2190 	T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
2191 	T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
2192 	T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
2193 	T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
2194 	  TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2195 
2196 #define NIX_TX_FASTPATH_MODES_48_63                                            \
2197 	T(ts_tso, 8, TSP_F | TSO_F)                                            \
2198 	T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
2199 	T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
2200 	T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
2201 	  TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
2202 	T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
2203 	T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
2204 	T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
2205 	T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2206 	  TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2207 	T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
2208 	T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
2209 	T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
2210 	T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
2211 	  TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2212 	T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
2213 	T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
2214 	  TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
2215 	T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
2216 	  TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
2217 	T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2218 	  TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2219 
2220 #define NIX_TX_FASTPATH_MODES_64_79                                            \
2221 	T(sec, 6, T_SEC_F)                                                     \
2222 	T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F)                               \
2223 	T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F)                           \
2224 	T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2225 	T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
2226 	T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
2227 	T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
2228 	T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2229 	  T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2230 	T(sec_noff, 6, T_SEC_F | NOFF_F)                                       \
2231 	T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2232 	T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2233 	T(sec_noff_ol3ol4csum_l3l4csum, 6,                                     \
2234 	  T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2235 	T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2236 	T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2237 	T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2238 	  T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2239 	T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2240 	  T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2241 
2242 #define NIX_TX_FASTPATH_MODES_80_95                                            \
2243 	T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2244 	T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2245 	T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2246 	T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2247 	  T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2248 	T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2249 	T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2250 	T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2251 	T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2252 	  T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2253 	T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2254 	T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2255 	T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2256 	T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2257 	  T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2258 	T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2259 	T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2260 	  T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2261 	T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2262 	  T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2263 	T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2264 	  T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2265 
2266 #define NIX_TX_FASTPATH_MODES_96_111                                           \
2267 	T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2268 	T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2269 	T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2270 	T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2271 	  T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2272 	T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2273 	T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2274 	T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2275 	T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2276 	  T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2277 	T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2278 	T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2279 	T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2280 	T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2281 	  T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2282 	T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2283 	T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2284 	  T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2285 	T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2286 	  T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2287 	T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2288 	  T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2289 
2290 #define NIX_TX_FASTPATH_MODES_112_127                                          \
2291 	T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2292 	T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2293 	T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2294 	T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2295 	  T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2296 	T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2297 	T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2298 	  T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2299 	T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2300 	  T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2301 	T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2302 	  T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2303 	T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2304 	T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2305 	  T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2306 	T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2307 	  T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2308 	T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2309 	  T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2310 	T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2311 	T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2312 	  T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2313 	T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2314 	  T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2315 	T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2316 	  T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2317 		  L3L4CSUM_F)
2318 
2319 #define NIX_TX_FASTPATH_MODES                                                  \
2320 	NIX_TX_FASTPATH_MODES_0_15                                             \
2321 	NIX_TX_FASTPATH_MODES_16_31                                            \
2322 	NIX_TX_FASTPATH_MODES_32_47                                            \
2323 	NIX_TX_FASTPATH_MODES_48_63                                            \
2324 	NIX_TX_FASTPATH_MODES_64_79                                            \
2325 	NIX_TX_FASTPATH_MODES_80_95                                            \
2326 	NIX_TX_FASTPATH_MODES_96_111                                           \
2327 	NIX_TX_FASTPATH_MODES_112_127
2328 
2329 #define T(name, sz, flags)                                                     \
2330 	uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_##name(           \
2331 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2332 	uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_mseg_##name(      \
2333 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2334 	uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_##name(       \
2335 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
2336 	uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_mseg_##name(  \
2337 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
2338 
2339 NIX_TX_FASTPATH_MODES
2340 #undef T
2341 
2342 #define NIX_TX_XMIT(fn, sz, flags)                                             \
2343 	uint16_t __rte_noinline __rte_hot fn(                                  \
2344 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2345 	{                                                                      \
2346 		uint64_t cmd[sz];                                              \
2347 		/* For TSO inner checksum is a must */                         \
2348 		if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2349 		    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2350 			return 0;                                              \
2351 		return cn9k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd,        \
2352 					  flags);                              \
2353 	}
2354 
2355 #define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
2356 	uint16_t __rte_noinline __rte_hot fn(                                  \
2357 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2358 	{                                                                      \
2359 		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2360 		/* For TSO inner checksum is a must */                         \
2361 		if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2362 		    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2363 			return 0;                                              \
2364 		return cn9k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,   \
2365 					       (flags) | NIX_TX_MULTI_SEG_F);  \
2366 	}
2367 
2368 #define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
2369 	uint16_t __rte_noinline __rte_hot fn(                                  \
2370 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2371 	{                                                                      \
2372 		uint64_t cmd[sz];                                              \
2373 		/* For TSO inner checksum is a must */                         \
2374 		if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2375 		    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2376 			return 0;                                              \
2377 		return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2378 						 (flags));                     \
2379 	}
2380 
2381 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
2382 	uint16_t __rte_noinline __rte_hot fn(                                  \
2383 		void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
2384 	{                                                                      \
2385 		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
2386 		/* For TSO inner checksum is a must */                         \
2387 		if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
2388 		    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
2389 			return 0;                                              \
2390 		return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \
2391 						 (flags) |                     \
2392 							 NIX_TX_MULTI_SEG_F);  \
2393 	}
2394 
2395 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_all_offload(void *tx_queue,
2396 								 struct rte_mbuf **tx_pkts,
2397 								 uint16_t pkts);
2398 
2399 uint16_t __rte_noinline __rte_hot cn9k_nix_xmit_pkts_vec_all_offload(void *tx_queue,
2400 								     struct rte_mbuf **tx_pkts,
2401 								     uint16_t pkts);
2402 
2403 #endif /* __CN9K_TX_H__ */
2404