xref: /dpdk/drivers/net/enic/enic_rxtx_vec_avx2.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
18a6ff33dSHyong Youb Kim /* SPDX-License-Identifier: BSD-3-Clause
28a6ff33dSHyong Youb Kim  * Copyright 2008-2018 Cisco Systems, Inc.  All rights reserved.
38a6ff33dSHyong Youb Kim  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
48a6ff33dSHyong Youb Kim  */
58a6ff33dSHyong Youb Kim 
68a6ff33dSHyong Youb Kim #include <rte_mbuf.h>
7df96fd0dSBruce Richardson #include <ethdev_driver.h>
8ac61aa64SCiara Power #include <rte_vect.h>
98a6ff33dSHyong Youb Kim 
108a6ff33dSHyong Youb Kim #include "enic_compat.h"
118a6ff33dSHyong Youb Kim #include "rq_enet_desc.h"
128a6ff33dSHyong Youb Kim #include "enic.h"
138a6ff33dSHyong Youb Kim #include "enic_rxtx_common.h"
148a6ff33dSHyong Youb Kim 
158a6ff33dSHyong Youb Kim #include <x86intrin.h>
168a6ff33dSHyong Youb Kim 
178a6ff33dSHyong Youb Kim static struct rte_mbuf *
rx_one(struct cq_enet_rq_desc * cqd,struct rte_mbuf * mb,struct enic * enic)188a6ff33dSHyong Youb Kim rx_one(struct cq_enet_rq_desc *cqd, struct rte_mbuf *mb, struct enic *enic)
198a6ff33dSHyong Youb Kim {
208a6ff33dSHyong Youb Kim 	bool tnl;
218a6ff33dSHyong Youb Kim 
228a6ff33dSHyong Youb Kim 	*(uint64_t *)&mb->rearm_data = enic->mbuf_initializer;
238a6ff33dSHyong Youb Kim 	mb->data_len = cqd->bytes_written_flags &
248a6ff33dSHyong Youb Kim 		CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
258a6ff33dSHyong Youb Kim 	mb->pkt_len = mb->data_len;
268a6ff33dSHyong Youb Kim 	tnl = enic->overlay_offload && (cqd->completed_index_flags &
278a6ff33dSHyong Youb Kim 					CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0;
288a6ff33dSHyong Youb Kim 	mb->packet_type =
298a6ff33dSHyong Youb Kim 		enic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd, tnl);
308a6ff33dSHyong Youb Kim 	enic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb);
318a6ff33dSHyong Youb Kim 	/* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */
328a6ff33dSHyong Youb Kim 	if (tnl) {
338a6ff33dSHyong Youb Kim 		mb->packet_type &= ~(RTE_PTYPE_L3_MASK |
348a6ff33dSHyong Youb Kim 				     RTE_PTYPE_L4_MASK);
358a6ff33dSHyong Youb Kim 	}
368a6ff33dSHyong Youb Kim 	return mb;
378a6ff33dSHyong Youb Kim }
388a6ff33dSHyong Youb Kim 
398a6ff33dSHyong Youb Kim static uint16_t
enic_noscatter_vec_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)408a6ff33dSHyong Youb Kim enic_noscatter_vec_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
418a6ff33dSHyong Youb Kim 			     uint16_t nb_pkts)
428a6ff33dSHyong Youb Kim {
438a6ff33dSHyong Youb Kim 	struct rte_mbuf **rx, **rxmb;
448a6ff33dSHyong Youb Kim 	uint16_t cq_idx, nb_rx, max_rx;
458a6ff33dSHyong Youb Kim 	struct cq_enet_rq_desc *cqd;
468a6ff33dSHyong Youb Kim 	struct rq_enet_desc *rqd;
478a6ff33dSHyong Youb Kim 	struct vnic_cq *cq;
488a6ff33dSHyong Youb Kim 	struct vnic_rq *rq;
498a6ff33dSHyong Youb Kim 	struct enic *enic;
508a6ff33dSHyong Youb Kim 	uint8_t color;
518a6ff33dSHyong Youb Kim 
528a6ff33dSHyong Youb Kim 	rq = rx_queue;
538a6ff33dSHyong Youb Kim 	enic = vnic_dev_priv(rq->vdev);
548a6ff33dSHyong Youb Kim 	cq = &enic->cq[enic_cq_rq(enic, rq->index)];
558a6ff33dSHyong Youb Kim 	cq_idx = cq->to_clean;
568a6ff33dSHyong Youb Kim 
578a6ff33dSHyong Youb Kim 	/*
588a6ff33dSHyong Youb Kim 	 * Fill up the reserve of free mbufs. Below, we restock the receive
598a6ff33dSHyong Youb Kim 	 * ring with these mbufs to avoid allocation failures.
608a6ff33dSHyong Youb Kim 	 */
618a6ff33dSHyong Youb Kim 	if (rq->num_free_mbufs == 0) {
628a6ff33dSHyong Youb Kim 		if (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs,
638a6ff33dSHyong Youb Kim 					 ENIC_RX_BURST_MAX))
648a6ff33dSHyong Youb Kim 			return 0;
658a6ff33dSHyong Youb Kim 		rq->num_free_mbufs = ENIC_RX_BURST_MAX;
668a6ff33dSHyong Youb Kim 	}
678a6ff33dSHyong Youb Kim 	/* Receive until the end of the ring, at most. */
688a6ff33dSHyong Youb Kim 	max_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs);
698a6ff33dSHyong Youb Kim 	max_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx);
708a6ff33dSHyong Youb Kim 
718a6ff33dSHyong Youb Kim 	rxmb = rq->mbuf_ring + cq_idx;
728a6ff33dSHyong Youb Kim 	color = cq->last_color;
738a6ff33dSHyong Youb Kim 	cqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx;
748a6ff33dSHyong Youb Kim 	rx = rx_pkts;
758a6ff33dSHyong Youb Kim 	if (max_rx == 0 ||
768a6ff33dSHyong Youb Kim 	    (cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color)
778a6ff33dSHyong Youb Kim 		return 0;
788a6ff33dSHyong Youb Kim 
798a6ff33dSHyong Youb Kim 	/* Step 1: Process one packet to do aligned 256-bit load below */
808a6ff33dSHyong Youb Kim 	if (cq_idx & 0x1) {
818a6ff33dSHyong Youb Kim 		if (unlikely(cqd->bytes_written_flags &
828a6ff33dSHyong Youb Kim 			     CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
838a6ff33dSHyong Youb Kim 			rte_pktmbuf_free(*rxmb++);
848a6ff33dSHyong Youb Kim 			rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
858a6ff33dSHyong Youb Kim 		} else {
868a6ff33dSHyong Youb Kim 			*rx++ = rx_one(cqd, *rxmb++, enic);
878a6ff33dSHyong Youb Kim 		}
888a6ff33dSHyong Youb Kim 		cqd++;
898a6ff33dSHyong Youb Kim 		max_rx--;
908a6ff33dSHyong Youb Kim 	}
918a6ff33dSHyong Youb Kim 
928a6ff33dSHyong Youb Kim 	const __m256i mask =
938a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* Second descriptor */
948a6ff33dSHyong Youb Kim 			0xff, /* type_color */
958a6ff33dSHyong Youb Kim 			(CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |
968a6ff33dSHyong Youb Kim 			 CQ_ENET_RQ_DESC_FLAGS_IPV4 |
978a6ff33dSHyong Youb Kim 			 CQ_ENET_RQ_DESC_FLAGS_IPV6 |
988a6ff33dSHyong Youb Kim 			 CQ_ENET_RQ_DESC_FLAGS_TCP |
998a6ff33dSHyong Youb Kim 			 CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */
1008a6ff33dSHyong Youb Kim 			0, 0, /* checksum_fcoe */
1018a6ff33dSHyong Youb Kim 			0xff, 0xff, /* vlan */
1028a6ff33dSHyong Youb Kim 			0x3f, 0xff, /* bytes_written_flags */
1038a6ff33dSHyong Youb Kim 			0xff, 0xff, 0xff, 0xff, /* rss_hash */
1048a6ff33dSHyong Youb Kim 			0xff, 0xff, /* q_number_rss_type_flags */
1058a6ff33dSHyong Youb Kim 			0, 0, /* completed_index_flags */
1068a6ff33dSHyong Youb Kim 			/* First descriptor */
1078a6ff33dSHyong Youb Kim 			0xff, /* type_color */
1088a6ff33dSHyong Youb Kim 			(CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |
1098a6ff33dSHyong Youb Kim 			 CQ_ENET_RQ_DESC_FLAGS_IPV4 |
1108a6ff33dSHyong Youb Kim 			 CQ_ENET_RQ_DESC_FLAGS_IPV6 |
1118a6ff33dSHyong Youb Kim 			 CQ_ENET_RQ_DESC_FLAGS_TCP |
1128a6ff33dSHyong Youb Kim 			 CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */
1138a6ff33dSHyong Youb Kim 			0, 0, /* checksum_fcoe */
1148a6ff33dSHyong Youb Kim 			0xff, 0xff, /* vlan */
1158a6ff33dSHyong Youb Kim 			0x3f, 0xff, /* bytes_written_flags */
1168a6ff33dSHyong Youb Kim 			0xff, 0xff, 0xff, 0xff, /* rss_hash */
1178a6ff33dSHyong Youb Kim 			0xff, 0xff, /* q_number_rss_type_flags */
1188a6ff33dSHyong Youb Kim 			0, 0 /* completed_index_flags */
1198a6ff33dSHyong Youb Kim 			);
1208a6ff33dSHyong Youb Kim 	const __m256i shuffle_mask =
1218a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* Second descriptor */
1228a6ff33dSHyong Youb Kim 			7, 6, 5, 4,             /* rss = rss_hash */
1238a6ff33dSHyong Youb Kim 			11, 10,                 /* vlan_tci = vlan */
1248a6ff33dSHyong Youb Kim 			9, 8,                   /* data_len = bytes_written */
1258a6ff33dSHyong Youb Kim 			0x80, 0x80, 9, 8,       /* pkt_len = bytes_written */
1268a6ff33dSHyong Youb Kim 			0x80, 0x80, 0x80, 0x80, /* packet_type = 0 */
1278a6ff33dSHyong Youb Kim 			/* First descriptor */
1288a6ff33dSHyong Youb Kim 			7, 6, 5, 4,             /* rss = rss_hash */
1298a6ff33dSHyong Youb Kim 			11, 10,                 /* vlan_tci = vlan */
1308a6ff33dSHyong Youb Kim 			9, 8,                   /* data_len = bytes_written */
1318a6ff33dSHyong Youb Kim 			0x80, 0x80, 9, 8,       /* pkt_len = bytes_written */
1328a6ff33dSHyong Youb Kim 			0x80, 0x80, 0x80, 0x80  /* packet_type = 0 */
1338a6ff33dSHyong Youb Kim 			);
1348a6ff33dSHyong Youb Kim 	/* Used to collect 8 flags from 8 desc into one register */
1358a6ff33dSHyong Youb Kim 	const __m256i flags_shuffle_mask =
1368a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* Second descriptor */
1378a6ff33dSHyong Youb Kim 			1, 3, 9, 14,
1388a6ff33dSHyong Youb Kim 			1, 3, 9, 14,
1398a6ff33dSHyong Youb Kim 			1, 3, 9, 14,
1408a6ff33dSHyong Youb Kim 			1, 3, 9, 14,
1418a6ff33dSHyong Youb Kim 			/* First descriptor */
1428a6ff33dSHyong Youb Kim 			1, 3, 9, 14,
1438a6ff33dSHyong Youb Kim 			1, 3, 9, 14,
1448a6ff33dSHyong Youb Kim 			1, 3, 9, 14,
1458a6ff33dSHyong Youb Kim 			/*
1468a6ff33dSHyong Youb Kim 			 * Byte 3: upper byte of completed_index_flags
1478a6ff33dSHyong Youb Kim 			 *         bit 5 = fcoe (tunnel)
1488a6ff33dSHyong Youb Kim 			 * Byte 2: upper byte of q_number_rss_type_flags
1498a6ff33dSHyong Youb Kim 			 *         bits 2,3,4,5 = rss type
1508a6ff33dSHyong Youb Kim 			 *         bit 6 = csum_not_calc
1518a6ff33dSHyong Youb Kim 			 * Byte 1: upper byte of bytes_written_flags
1528a6ff33dSHyong Youb Kim 			 *         bit 6 = truncated
1538a6ff33dSHyong Youb Kim 			 *         bit 7 = vlan stripped
1548a6ff33dSHyong Youb Kim 			 * Byte 0: flags
1558a6ff33dSHyong Youb Kim 			 */
1568a6ff33dSHyong Youb Kim 			1, 3, 9, 14
1578a6ff33dSHyong Youb Kim 			);
1588a6ff33dSHyong Youb Kim 	/* Used to collect 8 VLAN IDs from 8 desc into one register */
1598a6ff33dSHyong Youb Kim 	const __m256i vlan_shuffle_mask =
1608a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* Second descriptor */
1618a6ff33dSHyong Youb Kim 			0x80, 0x80, 11, 10,
1628a6ff33dSHyong Youb Kim 			0x80, 0x80, 11, 10,
1638a6ff33dSHyong Youb Kim 			0x80, 0x80, 11, 10,
1648a6ff33dSHyong Youb Kim 			0x80, 0x80, 11, 10,
1658a6ff33dSHyong Youb Kim 			/* First descriptor */
1668a6ff33dSHyong Youb Kim 			0x80, 0x80, 11, 10,
1678a6ff33dSHyong Youb Kim 			0x80, 0x80, 11, 10,
1688a6ff33dSHyong Youb Kim 			0x80, 0x80, 11, 10,
1698a6ff33dSHyong Youb Kim 			0x80, 0x80, 11, 10);
170*daa02b5cSOlivier Matz 	/* RTE_MBUF_F_RX_RSS_HASH is 1<<1 so fits in 8-bit integer */
1718a6ff33dSHyong Youb Kim 	const __m256i rss_shuffle =
1728a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* second 128 bits */
173*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
174*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
175*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
176*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
177*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
1788a6ff33dSHyong Youb Kim 			0, /* rss_types = 0 */
1798a6ff33dSHyong Youb Kim 			/* first 128 bits */
180*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
181*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
182*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
183*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
184*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
1858a6ff33dSHyong Youb Kim 			0 /* rss_types = 0 */);
1868a6ff33dSHyong Youb Kim 	/*
1878a6ff33dSHyong Youb Kim 	 * VLAN offload flags.
1888a6ff33dSHyong Youb Kim 	 * shuffle index:
1898a6ff33dSHyong Youb Kim 	 * vlan_stripped => bit 0
1908a6ff33dSHyong Youb Kim 	 * vlan_id == 0  => bit 1
1918a6ff33dSHyong Youb Kim 	 */
1928a6ff33dSHyong Youb Kim 	const __m256i vlan_shuffle =
1938a6ff33dSHyong Youb Kim 		_mm256_set_epi32(0, 0, 0, 0,
194*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED, 0,
195*daa02b5cSOlivier Matz 			RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED, RTE_MBUF_F_RX_VLAN);
1968a6ff33dSHyong Youb Kim 	/* Use the same shuffle index as vlan_shuffle */
1978a6ff33dSHyong Youb Kim 	const __m256i vlan_ptype_shuffle =
1988a6ff33dSHyong Youb Kim 		_mm256_set_epi32(0, 0, 0, 0,
1998a6ff33dSHyong Youb Kim 				 RTE_PTYPE_L2_ETHER,
2008a6ff33dSHyong Youb Kim 				 RTE_PTYPE_L2_ETHER,
2018a6ff33dSHyong Youb Kim 				 RTE_PTYPE_L2_ETHER,
2028a6ff33dSHyong Youb Kim 				 RTE_PTYPE_L2_ETHER_VLAN);
2038a6ff33dSHyong Youb Kim 	/*
2048a6ff33dSHyong Youb Kim 	 * CKSUM flags. Shift right so they fit int 8-bit integers.
2058a6ff33dSHyong Youb Kim 	 * shuffle index:
2068a6ff33dSHyong Youb Kim 	 * ipv4_csum_ok    => bit 3
2078a6ff33dSHyong Youb Kim 	 * ip4             => bit 2
2088a6ff33dSHyong Youb Kim 	 * tcp_or_udp      => bit 1
2098a6ff33dSHyong Youb Kim 	 * tcp_udp_csum_ok => bit 0
2108a6ff33dSHyong Youb Kim 	 */
2118a6ff33dSHyong Youb Kim 	const __m256i csum_shuffle =
2128a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* second 128 bits */
2138a6ff33dSHyong Youb Kim 			/* 1111 ip4+ip4_ok+l4+l4_ok */
214*daa02b5cSOlivier Matz 			((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
2158a6ff33dSHyong Youb Kim 			/* 1110 ip4_ok+ip4+l4+!l4_ok */
216*daa02b5cSOlivier Matz 			((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1),
217*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1), /* 1101 ip4+ip4_ok */
218*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1), /* 1100 ip4_ok+ip4 */
219*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1), /* 1011 l4+l4_ok */
220*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_L4_CKSUM_BAD >> 1),  /* 1010 l4+!l4_ok */
2218a6ff33dSHyong Youb Kim 			0, /* 1001 */
2228a6ff33dSHyong Youb Kim 			0, /* 1000 */
2238a6ff33dSHyong Youb Kim 			/* 0111 !ip4_ok+ip4+l4+l4_ok */
224*daa02b5cSOlivier Matz 			((RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
2258a6ff33dSHyong Youb Kim 			/* 0110 !ip4_ok+ip4+l4+!l4_ok */
226*daa02b5cSOlivier Matz 			((RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1),
227*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1),  /* 0101 !ip4_ok+ip4 */
228*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1),  /* 0100 !ip4_ok+ip4 */
229*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1), /* 0011 l4+l4_ok */
230*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_L4_CKSUM_BAD >> 1),  /* 0010 l4+!l4_ok */
2318a6ff33dSHyong Youb Kim 			0, /* 0001 */
2328a6ff33dSHyong Youb Kim 			0, /* 0000 */
2338a6ff33dSHyong Youb Kim 			/* first 128 bits */
234*daa02b5cSOlivier Matz 			((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
235*daa02b5cSOlivier Matz 			((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1),
236*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1),
237*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1),
238*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1),
239*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_L4_CKSUM_BAD >> 1),
2408a6ff33dSHyong Youb Kim 			0, 0,
241*daa02b5cSOlivier Matz 			((RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
242*daa02b5cSOlivier Matz 			((RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1),
243*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1),
244*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1),
245*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1),
246*daa02b5cSOlivier Matz 			(RTE_MBUF_F_RX_L4_CKSUM_BAD >> 1),
2478a6ff33dSHyong Youb Kim 			0, 0);
2488a6ff33dSHyong Youb Kim 	/*
2498a6ff33dSHyong Youb Kim 	 * Non-fragment PTYPEs.
2508a6ff33dSHyong Youb Kim 	 * Shuffle 4-bit index:
2518a6ff33dSHyong Youb Kim 	 * ip6 => bit 0
2528a6ff33dSHyong Youb Kim 	 * ip4 => bit 1
2538a6ff33dSHyong Youb Kim 	 * udp => bit 2
2548a6ff33dSHyong Youb Kim 	 * tcp => bit 3
2558a6ff33dSHyong Youb Kim 	 *   bit
2568a6ff33dSHyong Youb Kim 	 * 3 2 1 0
2578a6ff33dSHyong Youb Kim 	 * -------
2588a6ff33dSHyong Youb Kim 	 * 0 0 0 0 unknown
2598a6ff33dSHyong Youb Kim 	 * 0 0 0 1 ip6 | nonfrag
2608a6ff33dSHyong Youb Kim 	 * 0 0 1 0 ip4 | nonfrag
2618a6ff33dSHyong Youb Kim 	 * 0 0 1 1 unknown
2628a6ff33dSHyong Youb Kim 	 * 0 1 0 0 unknown
2638a6ff33dSHyong Youb Kim 	 * 0 1 0 1 ip6 | udp
2648a6ff33dSHyong Youb Kim 	 * 0 1 1 0 ip4 | udp
2658a6ff33dSHyong Youb Kim 	 * 0 1 1 1 unknown
2668a6ff33dSHyong Youb Kim 	 * 1 0 0 0 unknown
2678a6ff33dSHyong Youb Kim 	 * 1 0 0 1 ip6 | tcp
2688a6ff33dSHyong Youb Kim 	 * 1 0 1 0 ip4 | tcp
2698a6ff33dSHyong Youb Kim 	 * 1 0 1 1 unknown
2708a6ff33dSHyong Youb Kim 	 * 1 1 0 0 unknown
2718a6ff33dSHyong Youb Kim 	 * 1 1 0 1 unknown
2728a6ff33dSHyong Youb Kim 	 * 1 1 1 0 unknown
2738a6ff33dSHyong Youb Kim 	 * 1 1 1 1 unknown
2748a6ff33dSHyong Youb Kim 	 *
2758a6ff33dSHyong Youb Kim 	 * PTYPEs do not fit in 8 bits, so shift right 4..
2768a6ff33dSHyong Youb Kim 	 */
2778a6ff33dSHyong Youb Kim 	const __m256i nonfrag_ptype_shuffle =
2788a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* second 128 bits */
2798a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
2808a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2818a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2828a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
2838a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
2848a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2858a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
2868a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
2878a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2888a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
2898a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_NONFRAG) >> 4,
2908a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
2918a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_NONFRAG) >> 4,
2928a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
2938a6ff33dSHyong Youb Kim 			/* first 128 bits */
2948a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
2958a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2968a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2978a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
2988a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
2998a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3008a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
3018a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
3028a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3038a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3048a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_NONFRAG) >> 4,
3058a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3068a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_NONFRAG) >> 4,
3078a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN);
3088a6ff33dSHyong Youb Kim 	/* Fragment PTYPEs. Use the same shuffle index as above. */
3098a6ff33dSHyong Youb Kim 	const __m256i frag_ptype_shuffle =
3108a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* second 128 bits */
3118a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
3128a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3138a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3148a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3158a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3168a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3178a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3188a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3198a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3208a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3218a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3228a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3238a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3248a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3258a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3268a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3278a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3288a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
3298a6ff33dSHyong Youb Kim 			/* first 128 bits */
3308a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
3318a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3328a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3338a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3348a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3358a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3368a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3378a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3388a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3398a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3408a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3418a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3428a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3438a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3448a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3458a6ff33dSHyong Youb Kim 			(RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3468a6ff33dSHyong Youb Kim 			 RTE_PTYPE_L4_FRAG) >> 4,
3478a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN);
3488a6ff33dSHyong Youb Kim 	/*
3498a6ff33dSHyong Youb Kim 	 * Tunnel PTYPEs. Use the same shuffle index as above.
3508a6ff33dSHyong Youb Kim 	 * L4 types are not part of this table. They come from non-tunnel
3518a6ff33dSHyong Youb Kim 	 * types above.
3528a6ff33dSHyong Youb Kim 	 */
3538a6ff33dSHyong Youb Kim 	const __m256i tnl_l3_ptype_shuffle =
3548a6ff33dSHyong Youb Kim 		_mm256_set_epi8(/* second 128 bits */
3558a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
3568a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3578a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3588a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3598a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3608a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3618a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3628a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3638a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3648a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3658a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3668a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
3678a6ff33dSHyong Youb Kim 			/* first 128 bits */
3688a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN,
3698a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3708a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3718a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3728a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3738a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3748a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3758a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3768a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3778a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3788a6ff33dSHyong Youb Kim 			RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3798a6ff33dSHyong Youb Kim 			RTE_PTYPE_UNKNOWN);
3808a6ff33dSHyong Youb Kim 
3818a6ff33dSHyong Youb Kim 	const __m256i mbuf_init = _mm256_set_epi64x(0, enic->mbuf_initializer,
3828a6ff33dSHyong Youb Kim 						    0, enic->mbuf_initializer);
3838a6ff33dSHyong Youb Kim 
3848a6ff33dSHyong Youb Kim 	/*
3858a6ff33dSHyong Youb Kim 	 * --- cq desc fields ---    offset
3868a6ff33dSHyong Youb Kim 	 * completed_index_flags    - 0   use: fcoe
3878a6ff33dSHyong Youb Kim 	 * q_number_rss_type_flags  - 2   use: rss types, csum_not_calc
3888a6ff33dSHyong Youb Kim 	 * rss_hash                 - 4   ==> mbuf.hash.rss
3898a6ff33dSHyong Youb Kim 	 * bytes_written_flags      - 8   ==> mbuf.pkt_len,data_len
3908a6ff33dSHyong Youb Kim 	 *                                use: truncated, vlan_stripped
3918a6ff33dSHyong Youb Kim 	 * vlan                     - 10  ==> mbuf.vlan_tci
3928a6ff33dSHyong Youb Kim 	 * checksum_fcoe            - 12  (unused)
3938a6ff33dSHyong Youb Kim 	 * flags                    - 14  use: all bits
3948a6ff33dSHyong Youb Kim 	 * type_color               - 15  (unused)
3958a6ff33dSHyong Youb Kim 	 *
3968a6ff33dSHyong Youb Kim 	 * --- mbuf fields ---       offset
3978a6ff33dSHyong Youb Kim 	 * rearm_data              ---- 16
3988a6ff33dSHyong Youb Kim 	 * data_off    - 0      (mbuf_init) -+
3998a6ff33dSHyong Youb Kim 	 * refcnt      - 2      (mbuf_init)  |
4008a6ff33dSHyong Youb Kim 	 * nb_segs     - 4      (mbuf_init)  | 16B 128b
4018a6ff33dSHyong Youb Kim 	 * port        - 6      (mbuf_init)  |
4028a6ff33dSHyong Youb Kim 	 * ol_flag     - 8      (from cqd)  -+
4038a6ff33dSHyong Youb Kim 	 * rx_descriptor_fields1   ---- 32
4048a6ff33dSHyong Youb Kim 	 * packet_type - 0      (from cqd)  -+
4058a6ff33dSHyong Youb Kim 	 * pkt_len     - 4      (from cqd)   |
4068a6ff33dSHyong Youb Kim 	 * data_len    - 8      (from cqd)   | 16B 128b
4078a6ff33dSHyong Youb Kim 	 * vlan_tci    - 10     (from cqd)   |
4088a6ff33dSHyong Youb Kim 	 * rss         - 12     (from cqd)  -+
4098a6ff33dSHyong Youb Kim 	 */
4108a6ff33dSHyong Youb Kim 
4118a6ff33dSHyong Youb Kim 	__m256i overlay_enabled =
4128a6ff33dSHyong Youb Kim 		_mm256_set1_epi32((uint32_t)enic->overlay_offload);
4138a6ff33dSHyong Youb Kim 
4148a6ff33dSHyong Youb Kim 	/* Step 2: Process 8 packets per loop using SIMD */
4158a6ff33dSHyong Youb Kim 	while (max_rx > 7 && (((cqd + 7)->type_color &
4168a6ff33dSHyong Youb Kim 			       CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {
4178a6ff33dSHyong Youb Kim 		/* Load 8 16B CQ descriptors */
4188a6ff33dSHyong Youb Kim 		__m256i cqd01 = _mm256_load_si256((void *)cqd);
4198a6ff33dSHyong Youb Kim 		__m256i cqd23 = _mm256_load_si256((void *)(cqd + 2));
4208a6ff33dSHyong Youb Kim 		__m256i cqd45 = _mm256_load_si256((void *)(cqd + 4));
4218a6ff33dSHyong Youb Kim 		__m256i cqd67 = _mm256_load_si256((void *)(cqd + 6));
4228a6ff33dSHyong Youb Kim 		/* Copy 8 mbuf pointers to rx_pkts */
4238a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((void *)rx,
4248a6ff33dSHyong Youb Kim 				    _mm256_loadu_si256((void *)rxmb));
4258a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((void *)(rx + 4),
4268a6ff33dSHyong Youb Kim 				    _mm256_loadu_si256((void *)(rxmb + 4)));
4278a6ff33dSHyong Youb Kim 
4288a6ff33dSHyong Youb Kim 		/*
4298a6ff33dSHyong Youb Kim 		 * Collect 8 flags (each 32 bits) into one register.
4308a6ff33dSHyong Youb Kim 		 * 4 shuffles, 3 blends, 1 permute for 8 desc: 1 inst/desc
4318a6ff33dSHyong Youb Kim 		 */
4328a6ff33dSHyong Youb Kim 		__m256i flags01 =
4338a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi8(cqd01, flags_shuffle_mask);
4348a6ff33dSHyong Youb Kim 		/*
4358a6ff33dSHyong Youb Kim 		 * Shuffle above produces 8 x 32-bit flags for 8 descriptors
4368a6ff33dSHyong Youb Kim 		 * in this order: 0, 0, 0, 0, 1, 1, 1, 1
4378a6ff33dSHyong Youb Kim 		 * The duplicates in each 128-bit lane simplifies blending
4388a6ff33dSHyong Youb Kim 		 * below.
4398a6ff33dSHyong Youb Kim 		 */
4408a6ff33dSHyong Youb Kim 		__m256i flags23 =
4418a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi8(cqd23, flags_shuffle_mask);
4428a6ff33dSHyong Youb Kim 		__m256i flags45 =
4438a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi8(cqd45, flags_shuffle_mask);
4448a6ff33dSHyong Youb Kim 		__m256i flags67 =
4458a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi8(cqd67, flags_shuffle_mask);
4468a6ff33dSHyong Youb Kim 		/* 1st blend produces flags for desc: 0, 2, 0, 0, 1, 3, 1, 1 */
4478a6ff33dSHyong Youb Kim 		__m256i flags0_3 = _mm256_blend_epi32(flags01, flags23, 0x22);
4488a6ff33dSHyong Youb Kim 		/* 2nd blend produces flags for desc: 4, 4, 4, 6, 5, 5, 5, 7 */
4498a6ff33dSHyong Youb Kim 		__m256i flags4_7 = _mm256_blend_epi32(flags45, flags67, 0x88);
4508a6ff33dSHyong Youb Kim 		/* 3rd blend produces flags for desc: 0, 2, 4, 6, 1, 3, 5, 7 */
4518a6ff33dSHyong Youb Kim 		__m256i flags0_7 = _mm256_blend_epi32(flags0_3, flags4_7, 0xcc);
4528a6ff33dSHyong Youb Kim 		/*
4538a6ff33dSHyong Youb Kim 		 * Swap to reorder flags in this order: 1, 3, 5, 7, 0, 2, 4, 6
4548a6ff33dSHyong Youb Kim 		 * This order simplifies blend operations way below that
4558a6ff33dSHyong Youb Kim 		 * produce 'rearm' data for each mbuf.
4568a6ff33dSHyong Youb Kim 		 */
4578a6ff33dSHyong Youb Kim 		flags0_7 = _mm256_permute4x64_epi64(flags0_7,
4588a6ff33dSHyong Youb Kim 			(1 << 6) + (0 << 4) + (3 << 2) + 2);
4598a6ff33dSHyong Youb Kim 
4608a6ff33dSHyong Youb Kim 		/*
4618a6ff33dSHyong Youb Kim 		 * Check truncated bits and bail out early on.
4628a6ff33dSHyong Youb Kim 		 * 6 avx inst, 1 or, 1 if-then-else for 8 desc: 1 inst/desc
4638a6ff33dSHyong Youb Kim 		 */
4648a6ff33dSHyong Youb Kim 		__m256i trunc =
4658a6ff33dSHyong Youb Kim 			_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 17), 31);
4668a6ff33dSHyong Youb Kim 		trunc = _mm256_add_epi64(trunc, _mm256_permute4x64_epi64(trunc,
4678a6ff33dSHyong Youb Kim 			(1 << 6) + (0 << 4) + (3 << 2) + 2));
4688a6ff33dSHyong Youb Kim 		/* 0:63 contains 1+3+0+2 and 64:127 contains 5+7+4+6 */
4698a6ff33dSHyong Youb Kim 		if (_mm256_extract_epi64(trunc, 0) ||
4708a6ff33dSHyong Youb Kim 		    _mm256_extract_epi64(trunc, 1))
4718a6ff33dSHyong Youb Kim 			break;
4728a6ff33dSHyong Youb Kim 
4738a6ff33dSHyong Youb Kim 		/*
474*daa02b5cSOlivier Matz 		 * Compute RTE_MBUF_F_RX_RSS_HASH.
4758a6ff33dSHyong Youb Kim 		 * Use 2 shifts and 1 shuffle for 8 desc: 0.375 inst/desc
4768a6ff33dSHyong Youb Kim 		 * RSS types in byte 0, 4, 8, 12, 16, 20, 24, 28
4778a6ff33dSHyong Youb Kim 		 * Everything else is zero.
4788a6ff33dSHyong Youb Kim 		 */
4798a6ff33dSHyong Youb Kim 		__m256i rss_types =
4808a6ff33dSHyong Youb Kim 			_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 10), 28);
4818a6ff33dSHyong Youb Kim 		/*
482*daa02b5cSOlivier Matz 		 * RSS flags (RTE_MBUF_F_RX_RSS_HASH) are in
4838a6ff33dSHyong Youb Kim 		 * byte 0, 4, 8, 12, 16, 20, 24, 28
4848a6ff33dSHyong Youb Kim 		 * Everything else is zero.
4858a6ff33dSHyong Youb Kim 		 */
4868a6ff33dSHyong Youb Kim 		__m256i rss_flags = _mm256_shuffle_epi8(rss_shuffle, rss_types);
4878a6ff33dSHyong Youb Kim 
4888a6ff33dSHyong Youb Kim 		/*
4898a6ff33dSHyong Youb Kim 		 * Compute CKSUM flags. First build the index and then
4908a6ff33dSHyong Youb Kim 		 * use it to shuffle csum_shuffle.
4918a6ff33dSHyong Youb Kim 		 * 20 instructions including const loads: 2.5 inst/desc
4928a6ff33dSHyong Youb Kim 		 */
4938a6ff33dSHyong Youb Kim 		/*
4948a6ff33dSHyong Youb Kim 		 * csum_not_calc (bit 22)
4958a6ff33dSHyong Youb Kim 		 * csum_not_calc (0) => 0xffffffff
4968a6ff33dSHyong Youb Kim 		 * csum_not_calc (1) => 0x0
4978a6ff33dSHyong Youb Kim 		 */
4988a6ff33dSHyong Youb Kim 		const __m256i zero4 = _mm256_setzero_si256();
4998a6ff33dSHyong Youb Kim 		const __m256i mask22 = _mm256_set1_epi32(0x400000);
5008a6ff33dSHyong Youb Kim 		__m256i csum_not_calc = _mm256_cmpeq_epi32(zero4,
5018a6ff33dSHyong Youb Kim 			_mm256_and_si256(flags0_7, mask22));
5028a6ff33dSHyong Youb Kim 		/*
5038a6ff33dSHyong Youb Kim 		 * (tcp|udp) && !fragment => bit 1
5048a6ff33dSHyong Youb Kim 		 * tcp = bit 2, udp = bit 1, frag = bit 6
5058a6ff33dSHyong Youb Kim 		 */
5068a6ff33dSHyong Youb Kim 		const __m256i mask1 = _mm256_set1_epi32(0x2);
5078a6ff33dSHyong Youb Kim 		__m256i tcp_udp =
5088a6ff33dSHyong Youb Kim 			_mm256_andnot_si256(_mm256_srli_epi32(flags0_7, 5),
5098a6ff33dSHyong Youb Kim 				_mm256_or_si256(flags0_7,
5108a6ff33dSHyong Youb Kim 					_mm256_srli_epi32(flags0_7, 1)));
5118a6ff33dSHyong Youb Kim 		tcp_udp = _mm256_and_si256(tcp_udp, mask1);
5128a6ff33dSHyong Youb Kim 		/* ipv4 (bit 5) => bit 2 */
5138a6ff33dSHyong Youb Kim 		const __m256i mask2 = _mm256_set1_epi32(0x4);
5148a6ff33dSHyong Youb Kim 		__m256i ipv4 = _mm256_and_si256(mask2,
5158a6ff33dSHyong Youb Kim 			_mm256_srli_epi32(flags0_7, 3));
5168a6ff33dSHyong Youb Kim 		/*
5178a6ff33dSHyong Youb Kim 		 * ipv4_csum_ok (bit 3) => bit 3
5188a6ff33dSHyong Youb Kim 		 * tcp_udp_csum_ok (bit 0) => bit 0
5198a6ff33dSHyong Youb Kim 		 * 0x9
5208a6ff33dSHyong Youb Kim 		 */
5218a6ff33dSHyong Youb Kim 		const __m256i mask0_3 = _mm256_set1_epi32(0x9);
5228a6ff33dSHyong Youb Kim 		__m256i csum_idx = _mm256_and_si256(flags0_7, mask0_3);
5238a6ff33dSHyong Youb Kim 		csum_idx = _mm256_and_si256(csum_not_calc,
5248a6ff33dSHyong Youb Kim 			_mm256_or_si256(_mm256_or_si256(csum_idx, ipv4),
5258a6ff33dSHyong Youb Kim 				tcp_udp));
5268a6ff33dSHyong Youb Kim 		__m256i csum_flags =
5278a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi8(csum_shuffle, csum_idx);
5288a6ff33dSHyong Youb Kim 		/* Shift left to restore CKSUM flags. See csum_shuffle. */
5298a6ff33dSHyong Youb Kim 		csum_flags = _mm256_slli_epi32(csum_flags, 1);
5308a6ff33dSHyong Youb Kim 		/* Combine csum flags and offload flags: 0.125 inst/desc */
5318a6ff33dSHyong Youb Kim 		rss_flags = _mm256_or_si256(rss_flags, csum_flags);
5328a6ff33dSHyong Youb Kim 
5338a6ff33dSHyong Youb Kim 		/*
5348a6ff33dSHyong Youb Kim 		 * Collect 8 VLAN IDs and compute vlan_id != 0 on each.
5358a6ff33dSHyong Youb Kim 		 * 4 shuffles, 3 blends, 1 permute, 1 cmp, 1 sub for 8 desc:
5368a6ff33dSHyong Youb Kim 		 * 1.25 inst/desc
5378a6ff33dSHyong Youb Kim 		 */
5388a6ff33dSHyong Youb Kim 		__m256i vlan01 = _mm256_shuffle_epi8(cqd01, vlan_shuffle_mask);
5398a6ff33dSHyong Youb Kim 		__m256i vlan23 = _mm256_shuffle_epi8(cqd23, vlan_shuffle_mask);
5408a6ff33dSHyong Youb Kim 		__m256i vlan45 = _mm256_shuffle_epi8(cqd45, vlan_shuffle_mask);
5418a6ff33dSHyong Youb Kim 		__m256i vlan67 = _mm256_shuffle_epi8(cqd67, vlan_shuffle_mask);
5428a6ff33dSHyong Youb Kim 		__m256i vlan0_3 = _mm256_blend_epi32(vlan01, vlan23, 0x22);
5438a6ff33dSHyong Youb Kim 		__m256i vlan4_7 = _mm256_blend_epi32(vlan45, vlan67, 0x88);
5448a6ff33dSHyong Youb Kim 		/* desc: 0, 2, 4, 6, 1, 3, 5, 7 */
5458a6ff33dSHyong Youb Kim 		__m256i vlan0_7 = _mm256_blend_epi32(vlan0_3, vlan4_7, 0xcc);
5468a6ff33dSHyong Youb Kim 		/* desc: 1, 3, 5, 7, 0, 2, 4, 6 */
5478a6ff33dSHyong Youb Kim 		vlan0_7 = _mm256_permute4x64_epi64(vlan0_7,
5488a6ff33dSHyong Youb Kim 			(1 << 6) + (0 << 4) + (3 << 2) + 2);
5498a6ff33dSHyong Youb Kim 		/*
5508a6ff33dSHyong Youb Kim 		 * Compare 0 == vlan_id produces 0xffffffff (-1) if
5518a6ff33dSHyong Youb Kim 		 * vlan 0 and 0 if vlan non-0. Then subtracting the
5528a6ff33dSHyong Youb Kim 		 * result from 0 produces 0 - (-1) = 1 for vlan 0, and
5538a6ff33dSHyong Youb Kim 		 * 0 - 0 = 0 for vlan non-0.
5548a6ff33dSHyong Youb Kim 		 */
5558a6ff33dSHyong Youb Kim 		vlan0_7 = _mm256_cmpeq_epi32(zero4, vlan0_7);
5568a6ff33dSHyong Youb Kim 		/* vlan_id != 0 => 0, vlan_id == 0 => 1 */
5578a6ff33dSHyong Youb Kim 		vlan0_7 = _mm256_sub_epi32(zero4, vlan0_7);
5588a6ff33dSHyong Youb Kim 
5598a6ff33dSHyong Youb Kim 		/*
560*daa02b5cSOlivier Matz 		 * Compute RTE_MBUF_F_RX_VLAN and RTE_MBUF_F_RX_VLAN_STRIPPED.
5618a6ff33dSHyong Youb Kim 		 * Use 3 shifts, 1 or,  1 shuffle for 8 desc: 0.625 inst/desc
5628a6ff33dSHyong Youb Kim 		 * VLAN offload flags in byte 0, 4, 8, 12, 16, 20, 24, 28
5638a6ff33dSHyong Youb Kim 		 * Everything else is zero.
5648a6ff33dSHyong Youb Kim 		 */
5658a6ff33dSHyong Youb Kim 		__m256i vlan_idx =
5668a6ff33dSHyong Youb Kim 			_mm256_or_si256(/* vlan_stripped => bit 0 */
5678a6ff33dSHyong Youb Kim 				_mm256_srli_epi32(_mm256_slli_epi32(flags0_7,
5688a6ff33dSHyong Youb Kim 					16), 31),
5698a6ff33dSHyong Youb Kim 				/* (vlan_id == 0) => bit 1 */
5708a6ff33dSHyong Youb Kim 				_mm256_slli_epi32(vlan0_7, 1));
5718a6ff33dSHyong Youb Kim 		/*
5728a6ff33dSHyong Youb Kim 		 * The index captures 4 cases.
5738a6ff33dSHyong Youb Kim 		 * stripped, id = 0   ==> 11b = 3
5748a6ff33dSHyong Youb Kim 		 * stripped, id != 0  ==> 01b = 1
5758a6ff33dSHyong Youb Kim 		 * not strip, id == 0 ==> 10b = 2
5768a6ff33dSHyong Youb Kim 		 * not strip, id != 0 ==> 00b = 0
5778a6ff33dSHyong Youb Kim 		 */
5788a6ff33dSHyong Youb Kim 		__m256i vlan_flags = _mm256_permutevar8x32_epi32(vlan_shuffle,
5798a6ff33dSHyong Youb Kim 			vlan_idx);
5808a6ff33dSHyong Youb Kim 		/* Combine vlan and offload flags: 0.125 inst/desc */
5818a6ff33dSHyong Youb Kim 		rss_flags = _mm256_or_si256(rss_flags, vlan_flags);
5828a6ff33dSHyong Youb Kim 
5838a6ff33dSHyong Youb Kim 		/*
5848a6ff33dSHyong Youb Kim 		 * Compute non-tunnel PTYPEs.
5858a6ff33dSHyong Youb Kim 		 * 17 inst / 8 desc = 2.125 inst/desc
5868a6ff33dSHyong Youb Kim 		 */
5878a6ff33dSHyong Youb Kim 		/* ETHER and ETHER_VLAN */
5888a6ff33dSHyong Youb Kim 		__m256i vlan_ptype =
5898a6ff33dSHyong Youb Kim 			_mm256_permutevar8x32_epi32(vlan_ptype_shuffle,
5908a6ff33dSHyong Youb Kim 				vlan_idx);
5918a6ff33dSHyong Youb Kim 		/* Build the ptype index from flags */
5928a6ff33dSHyong Youb Kim 		tcp_udp = _mm256_slli_epi32(flags0_7, 29);
5938a6ff33dSHyong Youb Kim 		tcp_udp = _mm256_slli_epi32(_mm256_srli_epi32(tcp_udp, 30), 2);
5948a6ff33dSHyong Youb Kim 		__m256i ip4_ip6 =
5958a6ff33dSHyong Youb Kim 			_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 26), 30);
5968a6ff33dSHyong Youb Kim 		__m256i ptype_idx = _mm256_or_si256(tcp_udp, ip4_ip6);
5978a6ff33dSHyong Youb Kim 		__m256i frag_bit =
5988a6ff33dSHyong Youb Kim 			_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 25), 31);
5998a6ff33dSHyong Youb Kim 		__m256i nonfrag_ptype =
6008a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi8(nonfrag_ptype_shuffle, ptype_idx);
6018a6ff33dSHyong Youb Kim 		__m256i frag_ptype =
6028a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi8(frag_ptype_shuffle, ptype_idx);
6038a6ff33dSHyong Youb Kim 		/*
6048a6ff33dSHyong Youb Kim 		 * Zero out the unwanted types and combine the remaining bits.
6058a6ff33dSHyong Youb Kim 		 * The effect is same as selecting non-frag or frag types
6068a6ff33dSHyong Youb Kim 		 * depending on the frag bit.
6078a6ff33dSHyong Youb Kim 		 */
6088a6ff33dSHyong Youb Kim 		nonfrag_ptype = _mm256_and_si256(nonfrag_ptype,
6098a6ff33dSHyong Youb Kim 			_mm256_cmpeq_epi32(zero4, frag_bit));
6108a6ff33dSHyong Youb Kim 		frag_ptype = _mm256_and_si256(frag_ptype,
6118a6ff33dSHyong Youb Kim 			_mm256_cmpgt_epi32(frag_bit, zero4));
6128a6ff33dSHyong Youb Kim 		__m256i ptype = _mm256_or_si256(nonfrag_ptype, frag_ptype);
6138a6ff33dSHyong Youb Kim 		ptype = _mm256_slli_epi32(ptype, 4);
6148a6ff33dSHyong Youb Kim 		/*
6158a6ff33dSHyong Youb Kim 		 * Compute tunnel PTYPEs.
6168a6ff33dSHyong Youb Kim 		 * 15 inst / 8 desc = 1.875 inst/desc
6178a6ff33dSHyong Youb Kim 		 */
6188a6ff33dSHyong Youb Kim 		__m256i tnl_l3_ptype =
6198a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi8(tnl_l3_ptype_shuffle, ptype_idx);
6208a6ff33dSHyong Youb Kim 		tnl_l3_ptype = _mm256_slli_epi32(tnl_l3_ptype, 16);
6218a6ff33dSHyong Youb Kim 		/*
6228a6ff33dSHyong Youb Kim 		 * Shift non-tunnel L4 types to make them tunnel types.
6238a6ff33dSHyong Youb Kim 		 * RTE_PTYPE_L4_TCP << 16 == RTE_PTYPE_INNER_L4_TCP
6248a6ff33dSHyong Youb Kim 		 */
6258a6ff33dSHyong Youb Kim 		__m256i tnl_l4_ptype =
6268a6ff33dSHyong Youb Kim 			_mm256_slli_epi32(_mm256_and_si256(ptype,
6278a6ff33dSHyong Youb Kim 				_mm256_set1_epi32(RTE_PTYPE_L4_MASK)), 16);
6288a6ff33dSHyong Youb Kim 		__m256i tnl_ptype =
6298a6ff33dSHyong Youb Kim 			_mm256_or_si256(tnl_l3_ptype, tnl_l4_ptype);
6308a6ff33dSHyong Youb Kim 		tnl_ptype = _mm256_or_si256(tnl_ptype,
6318a6ff33dSHyong Youb Kim 			_mm256_set1_epi32(RTE_PTYPE_TUNNEL_GRENAT |
6328a6ff33dSHyong Youb Kim 				RTE_PTYPE_INNER_L2_ETHER));
6338a6ff33dSHyong Youb Kim 		/*
6348a6ff33dSHyong Youb Kim 		 * Select non-tunnel or tunnel types by zeroing out the
6358a6ff33dSHyong Youb Kim 		 * unwanted ones.
6368a6ff33dSHyong Youb Kim 		 */
6378a6ff33dSHyong Youb Kim 		__m256i tnl_flags = _mm256_and_si256(overlay_enabled,
6388a6ff33dSHyong Youb Kim 			_mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 2), 31));
6398a6ff33dSHyong Youb Kim 		tnl_ptype = _mm256_and_si256(tnl_ptype,
6408a6ff33dSHyong Youb Kim 			_mm256_sub_epi32(zero4, tnl_flags));
6418a6ff33dSHyong Youb Kim 		ptype =	_mm256_and_si256(ptype,
6428a6ff33dSHyong Youb Kim 			_mm256_cmpeq_epi32(zero4, tnl_flags));
6438a6ff33dSHyong Youb Kim 		/*
6448a6ff33dSHyong Youb Kim 		 * Combine types and swap to have ptypes in the same order
6458a6ff33dSHyong Youb Kim 		 * as desc.
6468a6ff33dSHyong Youb Kim 		 * desc: 0 2 4 6 1 3 5 7
6478a6ff33dSHyong Youb Kim 		 * 3 inst / 8 desc = 0.375 inst/desc
6488a6ff33dSHyong Youb Kim 		 */
6498a6ff33dSHyong Youb Kim 		ptype = _mm256_or_si256(ptype, tnl_ptype);
6508a6ff33dSHyong Youb Kim 		ptype = _mm256_or_si256(ptype, vlan_ptype);
6518a6ff33dSHyong Youb Kim 		ptype = _mm256_permute4x64_epi64(ptype,
6528a6ff33dSHyong Youb Kim 			(1 << 6) + (0 << 4) + (3 << 2) + 2);
6538a6ff33dSHyong Youb Kim 
6548a6ff33dSHyong Youb Kim 		/*
6558a6ff33dSHyong Youb Kim 		 * Mask packet length.
6568a6ff33dSHyong Youb Kim 		 * Use 4 ands: 0.5 instructions/desc
6578a6ff33dSHyong Youb Kim 		 */
6588a6ff33dSHyong Youb Kim 		cqd01 = _mm256_and_si256(cqd01, mask);
6598a6ff33dSHyong Youb Kim 		cqd23 = _mm256_and_si256(cqd23, mask);
6608a6ff33dSHyong Youb Kim 		cqd45 = _mm256_and_si256(cqd45, mask);
6618a6ff33dSHyong Youb Kim 		cqd67 = _mm256_and_si256(cqd67, mask);
6628a6ff33dSHyong Youb Kim 		/*
6638a6ff33dSHyong Youb Kim 		 * Shuffle. Two 16B sets of the mbuf fields.
6648a6ff33dSHyong Youb Kim 		 * packet_type, pkt_len, data_len, vlan_tci, rss
6658a6ff33dSHyong Youb Kim 		 */
6668a6ff33dSHyong Youb Kim 		__m256i rearm01 = _mm256_shuffle_epi8(cqd01, shuffle_mask);
6678a6ff33dSHyong Youb Kim 		__m256i rearm23 = _mm256_shuffle_epi8(cqd23, shuffle_mask);
6688a6ff33dSHyong Youb Kim 		__m256i rearm45 = _mm256_shuffle_epi8(cqd45, shuffle_mask);
6698a6ff33dSHyong Youb Kim 		__m256i rearm67 = _mm256_shuffle_epi8(cqd67, shuffle_mask);
6708a6ff33dSHyong Youb Kim 
6718a6ff33dSHyong Youb Kim 		/*
6728a6ff33dSHyong Youb Kim 		 * Blend in ptypes
6738a6ff33dSHyong Youb Kim 		 * 4 blends and 3 shuffles for 8 desc: 0.875 inst/desc
6748a6ff33dSHyong Youb Kim 		 */
6758a6ff33dSHyong Youb Kim 		rearm01 = _mm256_blend_epi32(rearm01, ptype, 0x11);
6768a6ff33dSHyong Youb Kim 		rearm23 = _mm256_blend_epi32(rearm23,
6778a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi32(ptype, 1), 0x11);
6788a6ff33dSHyong Youb Kim 		rearm45 = _mm256_blend_epi32(rearm45,
6798a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi32(ptype, 2), 0x11);
6808a6ff33dSHyong Youb Kim 		rearm67 = _mm256_blend_epi32(rearm67,
6818a6ff33dSHyong Youb Kim 			_mm256_shuffle_epi32(ptype, 3), 0x11);
6828a6ff33dSHyong Youb Kim 
6838a6ff33dSHyong Youb Kim 		/*
6848a6ff33dSHyong Youb Kim 		 * Move rss_flags into ol_flags in mbuf_init.
6858a6ff33dSHyong Youb Kim 		 * Use 1 shift and 1 blend for each desc: 2 inst/desc
6868a6ff33dSHyong Youb Kim 		 */
6878a6ff33dSHyong Youb Kim 		__m256i mbuf_init4_5 = _mm256_blend_epi32(mbuf_init,
6888a6ff33dSHyong Youb Kim 			rss_flags, 0x44);
6898a6ff33dSHyong Youb Kim 		__m256i mbuf_init2_3 = _mm256_blend_epi32(mbuf_init,
6908a6ff33dSHyong Youb Kim 			_mm256_slli_si256(rss_flags, 4), 0x44);
6918a6ff33dSHyong Youb Kim 		__m256i mbuf_init0_1 = _mm256_blend_epi32(mbuf_init,
6928a6ff33dSHyong Youb Kim 			_mm256_slli_si256(rss_flags, 8), 0x44);
6938a6ff33dSHyong Youb Kim 		__m256i mbuf_init6_7 = _mm256_blend_epi32(mbuf_init,
6948a6ff33dSHyong Youb Kim 			_mm256_srli_si256(rss_flags, 4), 0x44);
6958a6ff33dSHyong Youb Kim 
6968a6ff33dSHyong Youb Kim 		/*
6978a6ff33dSHyong Youb Kim 		 * Build rearm, one per desc.
6988a6ff33dSHyong Youb Kim 		 * 8 blends and 4 permutes: 1.5 inst/desc
6998a6ff33dSHyong Youb Kim 		 */
7008a6ff33dSHyong Youb Kim 		__m256i rearm0 = _mm256_blend_epi32(rearm01,
7018a6ff33dSHyong Youb Kim 			mbuf_init0_1, 0xf0);
7028a6ff33dSHyong Youb Kim 		__m256i rearm1 = _mm256_blend_epi32(mbuf_init0_1,
7038a6ff33dSHyong Youb Kim 			rearm01, 0xf0);
7048a6ff33dSHyong Youb Kim 		__m256i rearm2 = _mm256_blend_epi32(rearm23,
7058a6ff33dSHyong Youb Kim 			mbuf_init2_3, 0xf0);
7068a6ff33dSHyong Youb Kim 		__m256i rearm3 = _mm256_blend_epi32(mbuf_init2_3,
7078a6ff33dSHyong Youb Kim 			rearm23, 0xf0);
7088a6ff33dSHyong Youb Kim 		/* Swap upper and lower 64 bits */
7098a6ff33dSHyong Youb Kim 		rearm0 = _mm256_permute4x64_epi64(rearm0,
7108a6ff33dSHyong Youb Kim 			(1 << 6) + (0 << 4) + (3 << 2) + 2);
7118a6ff33dSHyong Youb Kim 		rearm2 = _mm256_permute4x64_epi64(rearm2,
7128a6ff33dSHyong Youb Kim 			(1 << 6) + (0 << 4) + (3 << 2) + 2);
7138a6ff33dSHyong Youb Kim 		/* Second set of 4 descriptors */
7148a6ff33dSHyong Youb Kim 		__m256i rearm4 = _mm256_blend_epi32(rearm45,
7158a6ff33dSHyong Youb Kim 			mbuf_init4_5, 0xf0);
7168a6ff33dSHyong Youb Kim 		__m256i rearm5 = _mm256_blend_epi32(mbuf_init4_5,
7178a6ff33dSHyong Youb Kim 			rearm45, 0xf0);
7188a6ff33dSHyong Youb Kim 		__m256i rearm6 = _mm256_blend_epi32(rearm67,
7198a6ff33dSHyong Youb Kim 			mbuf_init6_7, 0xf0);
7208a6ff33dSHyong Youb Kim 		__m256i rearm7 = _mm256_blend_epi32(mbuf_init6_7,
7218a6ff33dSHyong Youb Kim 			rearm67, 0xf0);
7228a6ff33dSHyong Youb Kim 		rearm4 = _mm256_permute4x64_epi64(rearm4,
7238a6ff33dSHyong Youb Kim 			(1 << 6) + (0 << 4) + (3 << 2) + 2);
7248a6ff33dSHyong Youb Kim 		rearm6 = _mm256_permute4x64_epi64(rearm6,
7258a6ff33dSHyong Youb Kim 			(1 << 6) + (0 << 4) + (3 << 2) + 2);
7268a6ff33dSHyong Youb Kim 
7278a6ff33dSHyong Youb Kim 		/*
7288a6ff33dSHyong Youb Kim 		 * Write out 32B of mbuf fields.
7298a6ff33dSHyong Youb Kim 		 * data_off    - off 0  (mbuf_init)
7308a6ff33dSHyong Youb Kim 		 * refcnt      - 2      (mbuf_init)
7318a6ff33dSHyong Youb Kim 		 * nb_segs     - 4      (mbuf_init)
7328a6ff33dSHyong Youb Kim 		 * port        - 6      (mbuf_init)
7338a6ff33dSHyong Youb Kim 		 * ol_flag     - 8      (from cqd)
7348a6ff33dSHyong Youb Kim 		 * packet_type - 16     (from cqd)
7358a6ff33dSHyong Youb Kim 		 * pkt_len     - 20     (from cqd)
7368a6ff33dSHyong Youb Kim 		 * data_len    - 24     (from cqd)
7378a6ff33dSHyong Youb Kim 		 * vlan_tci    - 26     (from cqd)
7388a6ff33dSHyong Youb Kim 		 * rss         - 28     (from cqd)
7398a6ff33dSHyong Youb Kim 		 */
7408a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((__m256i *)&rxmb[0]->rearm_data, rearm0);
7418a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((__m256i *)&rxmb[1]->rearm_data, rearm1);
7428a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((__m256i *)&rxmb[2]->rearm_data, rearm2);
7438a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((__m256i *)&rxmb[3]->rearm_data, rearm3);
7448a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((__m256i *)&rxmb[4]->rearm_data, rearm4);
7458a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((__m256i *)&rxmb[5]->rearm_data, rearm5);
7468a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((__m256i *)&rxmb[6]->rearm_data, rearm6);
7478a6ff33dSHyong Youb Kim 		_mm256_storeu_si256((__m256i *)&rxmb[7]->rearm_data, rearm7);
7488a6ff33dSHyong Youb Kim 
7498a6ff33dSHyong Youb Kim 		max_rx -= 8;
7508a6ff33dSHyong Youb Kim 		cqd += 8;
7518a6ff33dSHyong Youb Kim 		rx += 8;
7528a6ff33dSHyong Youb Kim 		rxmb += 8;
7538a6ff33dSHyong Youb Kim 	}
7548a6ff33dSHyong Youb Kim 
7558a6ff33dSHyong Youb Kim 	/*
7568a6ff33dSHyong Youb Kim 	 * Step 3: Slow path to handle a small (<8) number of packets and
7578a6ff33dSHyong Youb Kim 	 * occasional truncated packets.
7588a6ff33dSHyong Youb Kim 	 */
7598a6ff33dSHyong Youb Kim 	while (max_rx && ((cqd->type_color &
7608a6ff33dSHyong Youb Kim 			   CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {
7618a6ff33dSHyong Youb Kim 		if (unlikely(cqd->bytes_written_flags &
7628a6ff33dSHyong Youb Kim 			     CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
7638a6ff33dSHyong Youb Kim 			rte_pktmbuf_free(*rxmb++);
7648a6ff33dSHyong Youb Kim 			rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
7658a6ff33dSHyong Youb Kim 		} else {
7668a6ff33dSHyong Youb Kim 			*rx++ = rx_one(cqd, *rxmb++, enic);
7678a6ff33dSHyong Youb Kim 		}
7688a6ff33dSHyong Youb Kim 		cqd++;
7698a6ff33dSHyong Youb Kim 		max_rx--;
7708a6ff33dSHyong Youb Kim 	}
7718a6ff33dSHyong Youb Kim 
7728a6ff33dSHyong Youb Kim 	/* Number of descriptors visited */
7738a6ff33dSHyong Youb Kim 	nb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx;
7748a6ff33dSHyong Youb Kim 	if (nb_rx == 0)
7758a6ff33dSHyong Youb Kim 		return 0;
7768a6ff33dSHyong Youb Kim 	rqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx;
7778a6ff33dSHyong Youb Kim 	rxmb = rq->mbuf_ring + cq_idx;
7788a6ff33dSHyong Youb Kim 	cq_idx += nb_rx;
7798a6ff33dSHyong Youb Kim 	rq->rx_nb_hold += nb_rx;
7808a6ff33dSHyong Youb Kim 	if (unlikely(cq_idx == cq->ring.desc_count)) {
7818a6ff33dSHyong Youb Kim 		cq_idx = 0;
7828a6ff33dSHyong Youb Kim 		cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT;
7838a6ff33dSHyong Youb Kim 	}
7848a6ff33dSHyong Youb Kim 	cq->to_clean = cq_idx;
7858a6ff33dSHyong Youb Kim 
7868a6ff33dSHyong Youb Kim 	/* Step 4: Restock RQ with new mbufs */
7878a6ff33dSHyong Youb Kim 	memcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs,
7888a6ff33dSHyong Youb Kim 	       sizeof(struct rte_mbuf *) * nb_rx);
7898a6ff33dSHyong Youb Kim 	rq->num_free_mbufs -= nb_rx;
7908a6ff33dSHyong Youb Kim 	while (nb_rx) {
7918a6ff33dSHyong Youb Kim 		rqd->address = (*rxmb)->buf_iova + RTE_PKTMBUF_HEADROOM;
7928a6ff33dSHyong Youb Kim 		nb_rx--;
7938a6ff33dSHyong Youb Kim 		rqd++;
7948a6ff33dSHyong Youb Kim 		rxmb++;
7958a6ff33dSHyong Youb Kim 	}
7968a6ff33dSHyong Youb Kim 	if (rq->rx_nb_hold > rq->rx_free_thresh) {
7978a6ff33dSHyong Youb Kim 		rq->posted_index = enic_ring_add(rq->ring.desc_count,
7988a6ff33dSHyong Youb Kim 						 rq->posted_index,
7998a6ff33dSHyong Youb Kim 						 rq->rx_nb_hold);
8008a6ff33dSHyong Youb Kim 		rq->rx_nb_hold = 0;
8018a6ff33dSHyong Youb Kim 		rte_wmb();
8028a6ff33dSHyong Youb Kim 		iowrite32_relaxed(rq->posted_index,
8038a6ff33dSHyong Youb Kim 				  &rq->ctrl->posted_index);
8048a6ff33dSHyong Youb Kim 	}
8058a6ff33dSHyong Youb Kim 
8068a6ff33dSHyong Youb Kim 	return rx - rx_pkts;
8078a6ff33dSHyong Youb Kim }
8088a6ff33dSHyong Youb Kim 
8098a6ff33dSHyong Youb Kim bool
enic_use_vector_rx_handler(struct rte_eth_dev * eth_dev)810e92a4b41SHyong Youb Kim enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev)
8118a6ff33dSHyong Youb Kim {
812e92a4b41SHyong Youb Kim 	struct enic *enic = pmd_priv(eth_dev);
8138a6ff33dSHyong Youb Kim 
8148a6ff33dSHyong Youb Kim 	/* User needs to request for the avx2 handler */
8158a6ff33dSHyong Youb Kim 	if (!enic->enable_avx2_rx)
8168a6ff33dSHyong Youb Kim 		return false;
8178a6ff33dSHyong Youb Kim 	/* Do not support scatter Rx */
8188a6ff33dSHyong Youb Kim 	if (!(enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0))
8198a6ff33dSHyong Youb Kim 		return false;
820ac61aa64SCiara Power 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
821ac61aa64SCiara Power 			rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) {
822bbd8ecc0SJohn Daley 		ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
8238a6ff33dSHyong Youb Kim 		eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
824f011fa0aSHyong Youb Kim 		enic->use_noscatter_vec_rx_handler = 1;
8258a6ff33dSHyong Youb Kim 		return true;
8268a6ff33dSHyong Youb Kim 	}
8278a6ff33dSHyong Youb Kim 	return false;
8288a6ff33dSHyong Youb Kim }
829