xref: /dpdk/drivers/net/mlx5/mlx5_rxtx_vec_sse.h (revision 43fd3624fdfe3a33904a9b64d94306dd3d4f2c13)
18fd92a66SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause
23c2ddbd4SYongseok Koh  * Copyright 2017 6WIND S.A.
35feecc57SShahaf Shuler  * Copyright 2017 Mellanox Technologies, Ltd
43c2ddbd4SYongseok Koh  */
53c2ddbd4SYongseok Koh 
63c2ddbd4SYongseok Koh #ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_
73c2ddbd4SYongseok Koh #define RTE_PMD_MLX5_RXTX_VEC_SSE_H_
83c2ddbd4SYongseok Koh 
93c2ddbd4SYongseok Koh #include <stdint.h>
103c2ddbd4SYongseok Koh #include <string.h>
113c2ddbd4SYongseok Koh #include <stdlib.h>
12393ff728SBruce Richardson #include <rte_vect.h>
133c2ddbd4SYongseok Koh 
143c2ddbd4SYongseok Koh #include <rte_mbuf.h>
153c2ddbd4SYongseok Koh #include <rte_mempool.h>
163c2ddbd4SYongseok Koh #include <rte_prefetch.h>
173c2ddbd4SYongseok Koh 
187b4f1e6bSMatan Azrad #include <mlx5_prm.h>
197b4f1e6bSMatan Azrad 
207b4f1e6bSMatan Azrad #include "mlx5_defs.h"
213c2ddbd4SYongseok Koh #include "mlx5.h"
223c2ddbd4SYongseok Koh #include "mlx5_utils.h"
233c2ddbd4SYongseok Koh #include "mlx5_rxtx.h"
243c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec.h"
253c2ddbd4SYongseok Koh #include "mlx5_autoconf.h"
263c2ddbd4SYongseok Koh 
273c2ddbd4SYongseok Koh /**
283c2ddbd4SYongseok Koh  * Store free buffers to RX SW ring.
293c2ddbd4SYongseok Koh  *
301ded2623SAlexander Kozyrev  * @param elts
311ded2623SAlexander Kozyrev  *   Pointer to SW ring to be filled.
323c2ddbd4SYongseok Koh  * @param pkts
333c2ddbd4SYongseok Koh  *   Pointer to array of packets to be stored.
343c2ddbd4SYongseok Koh  * @param pkts_n
353c2ddbd4SYongseok Koh  *   Number of packets to be stored.
363c2ddbd4SYongseok Koh  */
373c2ddbd4SYongseok Koh static inline void
381ded2623SAlexander Kozyrev rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)
393c2ddbd4SYongseok Koh {
403c2ddbd4SYongseok Koh 	unsigned int pos;
413c2ddbd4SYongseok Koh 	uint16_t p = n & -2;
423c2ddbd4SYongseok Koh 
433c2ddbd4SYongseok Koh 	for (pos = 0; pos < p; pos += 2) {
443c2ddbd4SYongseok Koh 		__m128i mbp;
453c2ddbd4SYongseok Koh 
463c2ddbd4SYongseok Koh 		mbp = _mm_loadu_si128((__m128i *)&elts[pos]);
473c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)&pkts[pos], mbp);
483c2ddbd4SYongseok Koh 	}
493c2ddbd4SYongseok Koh 	if (n & 1)
503c2ddbd4SYongseok Koh 		pkts[pos] = elts[pos];
513c2ddbd4SYongseok Koh }
523c2ddbd4SYongseok Koh 
533c2ddbd4SYongseok Koh /**
543c2ddbd4SYongseok Koh  * Decompress a compressed completion and fill in mbufs in RX SW ring with data
553c2ddbd4SYongseok Koh  * extracted from the title completion descriptor.
563c2ddbd4SYongseok Koh  *
573c2ddbd4SYongseok Koh  * @param rxq
583c2ddbd4SYongseok Koh  *   Pointer to RX queue structure.
593c2ddbd4SYongseok Koh  * @param cq
603c2ddbd4SYongseok Koh  *   Pointer to completion array having a compressed completion at first.
613c2ddbd4SYongseok Koh  * @param elts
623c2ddbd4SYongseok Koh  *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
633c2ddbd4SYongseok Koh  *   the title completion descriptor to be copied to the rest of mbufs.
6490ec9b0dSAlexander Kozyrev  * @param keep
6590ec9b0dSAlexander Kozyrev  *   Keep unzipping if the next CQE is the miniCQE array.
661c191691SMatan Azrad  *
671c191691SMatan Azrad  * @return
681c191691SMatan Azrad  *   Number of mini-CQEs successfully decompressed.
693c2ddbd4SYongseok Koh  */
701c191691SMatan Azrad static inline uint16_t
713c2ddbd4SYongseok Koh rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
7290ec9b0dSAlexander Kozyrev 		    struct rte_mbuf **elts, bool keep)
733c2ddbd4SYongseok Koh {
74*43fd3624SAndre Muezerie 	volatile struct mlx5_mini_cqe8 *mcq =
75*43fd3624SAndre Muezerie 		(volatile struct mlx5_mini_cqe8 *)(cq + !rxq->cqe_comp_layout);
76fc3e1798SAlexander Kozyrev 	/* Title packet is pre-built. */
77fc3e1798SAlexander Kozyrev 	struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
783c2ddbd4SYongseok Koh 	unsigned int pos;
793c2ddbd4SYongseok Koh 	unsigned int i;
803c2ddbd4SYongseok Koh 	unsigned int inv = 0;
813c2ddbd4SYongseok Koh 	/* Mask to shuffle from extracted mini CQE to mbuf. */
823c2ddbd4SYongseok Koh 	const __m128i shuf_mask1 =
833c2ddbd4SYongseok Koh 		_mm_set_epi8(0,  1,  2,  3, /* rss, bswap32 */
843c2ddbd4SYongseok Koh 			    -1, -1,         /* skip vlan_tci */
853c2ddbd4SYongseok Koh 			     6,  7,         /* data_len, bswap16 */
863c2ddbd4SYongseok Koh 			    -1, -1,  6,  7, /* pkt_len, bswap16 */
873c2ddbd4SYongseok Koh 			    -1, -1, -1, -1  /* skip packet_type */);
883c2ddbd4SYongseok Koh 	const __m128i shuf_mask2 =
893c2ddbd4SYongseok Koh 		_mm_set_epi8(8,  9, 10, 11, /* rss, bswap32 */
903c2ddbd4SYongseok Koh 			    -1, -1,         /* skip vlan_tci */
913c2ddbd4SYongseok Koh 			    14, 15,         /* data_len, bswap16 */
923c2ddbd4SYongseok Koh 			    -1, -1, 14, 15, /* pkt_len, bswap16 */
933c2ddbd4SYongseok Koh 			    -1, -1, -1, -1  /* skip packet_type */);
943c2ddbd4SYongseok Koh 	/* Restore the compressed count. Must be 16 bits. */
95fc3e1798SAlexander Kozyrev 	uint16_t mcqe_n = (rxq->cqe_comp_layout) ?
96a7ae9ba1SAlexander Kozyrev 		(MLX5_CQE_NUM_MINIS(cq->op_own) + 1U) : rte_be_to_cpu_32(cq->byte_cnt);
97fc3e1798SAlexander Kozyrev 	uint16_t pkts_n = mcqe_n;
983c2ddbd4SYongseok Koh 	const __m128i rearm =
993c2ddbd4SYongseok Koh 		_mm_loadu_si128((__m128i *)&t_pkt->rearm_data);
1003c2ddbd4SYongseok Koh 	const __m128i rxdf =
1013c2ddbd4SYongseok Koh 		_mm_loadu_si128((__m128i *)&t_pkt->rx_descriptor_fields1);
1023c2ddbd4SYongseok Koh 	const __m128i crc_adj =
1033c2ddbd4SYongseok Koh 		_mm_set_epi16(0, 0, 0,
10435b2d13fSOlivier Matz 			      rxq->crc_present * RTE_ETHER_CRC_LEN,
1053c2ddbd4SYongseok Koh 			      0,
10635b2d13fSOlivier Matz 			      rxq->crc_present * RTE_ETHER_CRC_LEN,
1073c2ddbd4SYongseok Koh 			      0, 0);
10854c2d46bSAlexander Kozyrev 	__m128i ol_flags = _mm_setzero_si128();
10954c2d46bSAlexander Kozyrev 	__m128i ol_flags_mask = _mm_setzero_si128();
1103c2ddbd4SYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
1113c2ddbd4SYongseok Koh 	const __m128i zero = _mm_setzero_si128();
1123c2ddbd4SYongseok Koh 	const __m128i ones = _mm_cmpeq_epi32(zero, zero);
1133c2ddbd4SYongseok Koh 	uint32_t rcvd_byte = 0;
1143c2ddbd4SYongseok Koh 	/* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
1153c2ddbd4SYongseok Koh 	const __m128i len_shuf_mask =
1163c2ddbd4SYongseok Koh 		_mm_set_epi8(-1, -1, -1, -1,
1173c2ddbd4SYongseok Koh 			     -1, -1, -1, -1,
1183c2ddbd4SYongseok Koh 			     14, 15,  6,  7,
1193c2ddbd4SYongseok Koh 			     10, 11,  2,  3);
1203c2ddbd4SYongseok Koh #endif
1213c2ddbd4SYongseok Koh 	/*
1223c2ddbd4SYongseok Koh 	 * A. load mCQEs into a 128bit register.
1233c2ddbd4SYongseok Koh 	 * B. store rearm data to mbuf.
1243c2ddbd4SYongseok Koh 	 * C. combine data from mCQEs with rx_descriptor_fields1.
1253c2ddbd4SYongseok Koh 	 * D. store rx_descriptor_fields1.
1263c2ddbd4SYongseok Koh 	 * E. store flow tag (rte_flow mark).
1273c2ddbd4SYongseok Koh 	 */
128fc3e1798SAlexander Kozyrev cycle:
129fc3e1798SAlexander Kozyrev 	if (rxq->cqe_comp_layout)
130*43fd3624SAndre Muezerie 		rte_prefetch0((volatile void *)(cq + mcqe_n));
1313c2ddbd4SYongseok Koh 	for (pos = 0; pos < mcqe_n; ) {
1323c2ddbd4SYongseok Koh 		__m128i mcqe1, mcqe2;
1333c2ddbd4SYongseok Koh 		__m128i rxdf1, rxdf2;
1343c2ddbd4SYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
1353c2ddbd4SYongseok Koh 		__m128i byte_cnt, invalid_mask;
1363c2ddbd4SYongseok Koh #endif
1373c2ddbd4SYongseok Koh 
138fc3e1798SAlexander Kozyrev 		if (!rxq->cqe_comp_layout)
13928a4b963SAlexander Kozyrev 			for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
14028a4b963SAlexander Kozyrev 				if (likely(pos + i < mcqe_n))
141*43fd3624SAndre Muezerie 					rte_prefetch0((volatile void *)(cq + pos + i));
1423c2ddbd4SYongseok Koh 		/* A.1 load mCQEs into a 128bit register. */
143*43fd3624SAndre Muezerie 		mcqe1 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, &mcq[pos % 8]));
144*43fd3624SAndre Muezerie 		mcqe2 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, &mcq[pos % 8 + 2]));
1453c2ddbd4SYongseok Koh 		/* B.1 store rearm data to mbuf. */
1463c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)&elts[pos]->rearm_data, rearm);
1473c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)&elts[pos + 1]->rearm_data, rearm);
1483c2ddbd4SYongseok Koh 		/* C.1 combine data from mCQEs with rx_descriptor_fields1. */
1493c2ddbd4SYongseok Koh 		rxdf1 = _mm_shuffle_epi8(mcqe1, shuf_mask1);
1503c2ddbd4SYongseok Koh 		rxdf2 = _mm_shuffle_epi8(mcqe1, shuf_mask2);
1513c2ddbd4SYongseok Koh 		rxdf1 = _mm_sub_epi16(rxdf1, crc_adj);
1523c2ddbd4SYongseok Koh 		rxdf2 = _mm_sub_epi16(rxdf2, crc_adj);
1533c2ddbd4SYongseok Koh 		rxdf1 = _mm_blend_epi16(rxdf1, rxdf, 0x23);
1543c2ddbd4SYongseok Koh 		rxdf2 = _mm_blend_epi16(rxdf2, rxdf, 0x23);
1553c2ddbd4SYongseok Koh 		/* D.1 store rx_descriptor_fields1. */
1563c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)
1573c2ddbd4SYongseok Koh 				  &elts[pos]->rx_descriptor_fields1,
1583c2ddbd4SYongseok Koh 				 rxdf1);
1593c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)
1603c2ddbd4SYongseok Koh 				  &elts[pos + 1]->rx_descriptor_fields1,
1613c2ddbd4SYongseok Koh 				 rxdf2);
1623c2ddbd4SYongseok Koh 		/* B.1 store rearm data to mbuf. */
1633c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)&elts[pos + 2]->rearm_data, rearm);
1643c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)&elts[pos + 3]->rearm_data, rearm);
1653c2ddbd4SYongseok Koh 		/* C.1 combine data from mCQEs with rx_descriptor_fields1. */
1663c2ddbd4SYongseok Koh 		rxdf1 = _mm_shuffle_epi8(mcqe2, shuf_mask1);
1673c2ddbd4SYongseok Koh 		rxdf2 = _mm_shuffle_epi8(mcqe2, shuf_mask2);
1683c2ddbd4SYongseok Koh 		rxdf1 = _mm_sub_epi16(rxdf1, crc_adj);
1693c2ddbd4SYongseok Koh 		rxdf2 = _mm_sub_epi16(rxdf2, crc_adj);
1703c2ddbd4SYongseok Koh 		rxdf1 = _mm_blend_epi16(rxdf1, rxdf, 0x23);
1713c2ddbd4SYongseok Koh 		rxdf2 = _mm_blend_epi16(rxdf2, rxdf, 0x23);
1723c2ddbd4SYongseok Koh 		/* D.1 store rx_descriptor_fields1. */
1733c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)
1743c2ddbd4SYongseok Koh 				  &elts[pos + 2]->rx_descriptor_fields1,
1753c2ddbd4SYongseok Koh 				 rxdf1);
1763c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)
1773c2ddbd4SYongseok Koh 				  &elts[pos + 3]->rx_descriptor_fields1,
1783c2ddbd4SYongseok Koh 				 rxdf2);
1793c2ddbd4SYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
1803c2ddbd4SYongseok Koh 		invalid_mask = _mm_set_epi64x(0,
1813c2ddbd4SYongseok Koh 					      (mcqe_n - pos) *
1823c2ddbd4SYongseok Koh 					      sizeof(uint16_t) * 8);
1833c2ddbd4SYongseok Koh 		invalid_mask = _mm_sll_epi64(ones, invalid_mask);
18454c2d46bSAlexander Kozyrev 		byte_cnt = _mm_blend_epi16(_mm_srli_si128(mcqe1, 4),
18554c2d46bSAlexander Kozyrev 					   mcqe2, 0xcc);
1863c2ddbd4SYongseok Koh 		byte_cnt = _mm_shuffle_epi8(byte_cnt, len_shuf_mask);
1873c2ddbd4SYongseok Koh 		byte_cnt = _mm_andnot_si128(invalid_mask, byte_cnt);
1883c2ddbd4SYongseok Koh 		byte_cnt = _mm_hadd_epi16(byte_cnt, zero);
1893c2ddbd4SYongseok Koh 		rcvd_byte += _mm_cvtsi128_si64(_mm_hadd_epi16(byte_cnt, zero));
1903c2ddbd4SYongseok Koh #endif
1913c2ddbd4SYongseok Koh 		if (rxq->mark) {
19254c2d46bSAlexander Kozyrev 			if (rxq->mcqe_format !=
19354c2d46bSAlexander Kozyrev 				MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
19454c2d46bSAlexander Kozyrev 				const uint32_t flow_tag = t_pkt->hash.fdir.hi;
19554c2d46bSAlexander Kozyrev 
1963c2ddbd4SYongseok Koh 				/* E.1 store flow tag (rte_flow mark). */
1973c2ddbd4SYongseok Koh 				elts[pos]->hash.fdir.hi = flow_tag;
1983c2ddbd4SYongseok Koh 				elts[pos + 1]->hash.fdir.hi = flow_tag;
1993c2ddbd4SYongseok Koh 				elts[pos + 2]->hash.fdir.hi = flow_tag;
2003c2ddbd4SYongseok Koh 				elts[pos + 3]->hash.fdir.hi = flow_tag;
20154c2d46bSAlexander Kozyrev 			} else {
20254c2d46bSAlexander Kozyrev 				const __m128i flow_mark_adj =
20354c2d46bSAlexander Kozyrev 					_mm_set_epi32(-1, -1, -1, -1);
20454c2d46bSAlexander Kozyrev 				const __m128i flow_mark_shuf =
205bd0940a5SAlexander Kozyrev 					_mm_set_epi8(-1,  9,  8, 12,
206bd0940a5SAlexander Kozyrev 						     -1,  1,  0,  4,
20754c2d46bSAlexander Kozyrev 						     -1, -1, -1, -1,
20854c2d46bSAlexander Kozyrev 						     -1, -1, -1, -1);
20954c2d46bSAlexander Kozyrev 				const __m128i ft_mask =
21054c2d46bSAlexander Kozyrev 					_mm_set1_epi32(0xffffff00);
21154c2d46bSAlexander Kozyrev 				const __m128i fdir_flags =
212daa02b5cSOlivier Matz 					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
21354c2d46bSAlexander Kozyrev 				const __m128i fdir_all_flags =
214daa02b5cSOlivier Matz 					_mm_set1_epi32(RTE_MBUF_F_RX_FDIR |
215fca8cba4SDavid Marchand 						       rxq->mark_flag);
21654c2d46bSAlexander Kozyrev 				__m128i fdir_id_flags =
217fca8cba4SDavid Marchand 					_mm_set1_epi32(rxq->mark_flag);
21854c2d46bSAlexander Kozyrev 
21954c2d46bSAlexander Kozyrev 				/* Extract flow_tag field. */
22054c2d46bSAlexander Kozyrev 				__m128i ftag0 =
22154c2d46bSAlexander Kozyrev 					_mm_shuffle_epi8(mcqe1, flow_mark_shuf);
22254c2d46bSAlexander Kozyrev 				__m128i ftag1 =
22354c2d46bSAlexander Kozyrev 					_mm_shuffle_epi8(mcqe2, flow_mark_shuf);
22454c2d46bSAlexander Kozyrev 				__m128i ftag =
22554c2d46bSAlexander Kozyrev 					_mm_unpackhi_epi64(ftag0, ftag1);
22654c2d46bSAlexander Kozyrev 				__m128i invalid_mask =
22754c2d46bSAlexander Kozyrev 					_mm_cmpeq_epi32(ftag, zero);
22854c2d46bSAlexander Kozyrev 
22954c2d46bSAlexander Kozyrev 				ol_flags_mask = _mm_or_si128(ol_flags_mask,
23054c2d46bSAlexander Kozyrev 							     fdir_all_flags);
231daa02b5cSOlivier Matz 				/* Set RTE_MBUF_F_RX_FDIR if flow tag is non-zero. */
23254c2d46bSAlexander Kozyrev 				ol_flags = _mm_or_si128(ol_flags,
23354c2d46bSAlexander Kozyrev 					_mm_andnot_si128(invalid_mask,
23454c2d46bSAlexander Kozyrev 							 fdir_flags));
23554c2d46bSAlexander Kozyrev 				/* Mask out invalid entries. */
23654c2d46bSAlexander Kozyrev 				fdir_id_flags = _mm_andnot_si128(invalid_mask,
23754c2d46bSAlexander Kozyrev 								 fdir_id_flags);
23854c2d46bSAlexander Kozyrev 				/* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
23954c2d46bSAlexander Kozyrev 				ol_flags = _mm_or_si128(ol_flags,
24054c2d46bSAlexander Kozyrev 					_mm_andnot_si128(_mm_cmpeq_epi32(ftag,
24154c2d46bSAlexander Kozyrev 							 ft_mask),
24254c2d46bSAlexander Kozyrev 					fdir_id_flags));
24354c2d46bSAlexander Kozyrev 				ftag = _mm_add_epi32(ftag, flow_mark_adj);
24454c2d46bSAlexander Kozyrev 				elts[pos]->hash.fdir.hi =
24554c2d46bSAlexander Kozyrev 						_mm_extract_epi32(ftag, 0);
24654c2d46bSAlexander Kozyrev 				elts[pos + 1]->hash.fdir.hi =
24754c2d46bSAlexander Kozyrev 						_mm_extract_epi32(ftag, 1);
24854c2d46bSAlexander Kozyrev 				elts[pos + 2]->hash.fdir.hi =
24954c2d46bSAlexander Kozyrev 						_mm_extract_epi32(ftag, 2);
25054c2d46bSAlexander Kozyrev 				elts[pos + 3]->hash.fdir.hi =
25154c2d46bSAlexander Kozyrev 						_mm_extract_epi32(ftag, 3);
25254c2d46bSAlexander Kozyrev 			}
25354c2d46bSAlexander Kozyrev 		}
25454c2d46bSAlexander Kozyrev 		if (unlikely(rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)) {
25554c2d46bSAlexander Kozyrev 			if (rxq->mcqe_format ==
25654c2d46bSAlexander Kozyrev 			    MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
25754c2d46bSAlexander Kozyrev 				const uint8_t pkt_info =
25854c2d46bSAlexander Kozyrev 					(cq->pkt_info & 0x3) << 6;
25954c2d46bSAlexander Kozyrev 				const uint8_t pkt_hdr0 =
26054c2d46bSAlexander Kozyrev 					_mm_extract_epi8(mcqe1, 0);
26154c2d46bSAlexander Kozyrev 				const uint8_t pkt_hdr1 =
26254c2d46bSAlexander Kozyrev 					_mm_extract_epi8(mcqe1, 8);
26354c2d46bSAlexander Kozyrev 				const uint8_t pkt_hdr2 =
26454c2d46bSAlexander Kozyrev 					_mm_extract_epi8(mcqe2, 0);
26554c2d46bSAlexander Kozyrev 				const uint8_t pkt_hdr3 =
26654c2d46bSAlexander Kozyrev 					_mm_extract_epi8(mcqe2, 8);
26754c2d46bSAlexander Kozyrev 				const __m128i vlan_mask =
268daa02b5cSOlivier Matz 					_mm_set1_epi32(RTE_MBUF_F_RX_VLAN |
269daa02b5cSOlivier Matz 						       RTE_MBUF_F_RX_VLAN_STRIPPED);
27054c2d46bSAlexander Kozyrev 				const __m128i cv_mask =
27154c2d46bSAlexander Kozyrev 					_mm_set1_epi32(MLX5_CQE_VLAN_STRIPPED);
27254c2d46bSAlexander Kozyrev 				const __m128i pkt_cv =
27354c2d46bSAlexander Kozyrev 					_mm_set_epi32(pkt_hdr0 & 0x1,
27454c2d46bSAlexander Kozyrev 						      pkt_hdr1 & 0x1,
27554c2d46bSAlexander Kozyrev 						      pkt_hdr2 & 0x1,
27654c2d46bSAlexander Kozyrev 						      pkt_hdr3 & 0x1);
27754c2d46bSAlexander Kozyrev 
27854c2d46bSAlexander Kozyrev 				ol_flags_mask = _mm_or_si128(ol_flags_mask,
27954c2d46bSAlexander Kozyrev 							     vlan_mask);
28054c2d46bSAlexander Kozyrev 				ol_flags = _mm_or_si128(ol_flags,
28154c2d46bSAlexander Kozyrev 					_mm_and_si128(_mm_cmpeq_epi32(pkt_cv,
28254c2d46bSAlexander Kozyrev 					cv_mask), vlan_mask));
28354c2d46bSAlexander Kozyrev 				elts[pos]->packet_type =
28454c2d46bSAlexander Kozyrev 					mlx5_ptype_table[(pkt_hdr0 >> 2) |
28554c2d46bSAlexander Kozyrev 							 pkt_info];
28654c2d46bSAlexander Kozyrev 				elts[pos + 1]->packet_type =
28754c2d46bSAlexander Kozyrev 					mlx5_ptype_table[(pkt_hdr1 >> 2) |
28854c2d46bSAlexander Kozyrev 							 pkt_info];
28954c2d46bSAlexander Kozyrev 				elts[pos + 2]->packet_type =
29054c2d46bSAlexander Kozyrev 					mlx5_ptype_table[(pkt_hdr2 >> 2) |
29154c2d46bSAlexander Kozyrev 							 pkt_info];
29254c2d46bSAlexander Kozyrev 				elts[pos + 3]->packet_type =
29354c2d46bSAlexander Kozyrev 					mlx5_ptype_table[(pkt_hdr3 >> 2) |
29454c2d46bSAlexander Kozyrev 							 pkt_info];
29554c2d46bSAlexander Kozyrev 				if (rxq->tunnel) {
29654c2d46bSAlexander Kozyrev 					elts[pos]->packet_type |=
29754c2d46bSAlexander Kozyrev 						!!(((pkt_hdr0 >> 2) |
29854c2d46bSAlexander Kozyrev 						pkt_info) & (1 << 6));
29954c2d46bSAlexander Kozyrev 					elts[pos + 1]->packet_type |=
30054c2d46bSAlexander Kozyrev 						!!(((pkt_hdr1 >> 2) |
30154c2d46bSAlexander Kozyrev 						pkt_info) & (1 << 6));
30254c2d46bSAlexander Kozyrev 					elts[pos + 2]->packet_type |=
30354c2d46bSAlexander Kozyrev 						!!(((pkt_hdr2 >> 2) |
30454c2d46bSAlexander Kozyrev 						pkt_info) & (1 << 6));
30554c2d46bSAlexander Kozyrev 					elts[pos + 3]->packet_type |=
30654c2d46bSAlexander Kozyrev 						!!(((pkt_hdr3 >> 2) |
30754c2d46bSAlexander Kozyrev 						pkt_info) & (1 << 6));
30854c2d46bSAlexander Kozyrev 				}
30954c2d46bSAlexander Kozyrev 			}
31054c2d46bSAlexander Kozyrev 			const __m128i hash_flags =
311daa02b5cSOlivier Matz 				_mm_set1_epi32(RTE_MBUF_F_RX_RSS_HASH);
31254c2d46bSAlexander Kozyrev 			const __m128i rearm_flags =
31354c2d46bSAlexander Kozyrev 				_mm_set1_epi32((uint32_t)t_pkt->ol_flags);
31454c2d46bSAlexander Kozyrev 
31554c2d46bSAlexander Kozyrev 			ol_flags_mask = _mm_or_si128(ol_flags_mask, hash_flags);
31654c2d46bSAlexander Kozyrev 			ol_flags = _mm_or_si128(ol_flags,
31754c2d46bSAlexander Kozyrev 				_mm_andnot_si128(ol_flags_mask, rearm_flags));
31854c2d46bSAlexander Kozyrev 			elts[pos]->ol_flags =
31954c2d46bSAlexander Kozyrev 				_mm_extract_epi32(ol_flags, 0);
32054c2d46bSAlexander Kozyrev 			elts[pos + 1]->ol_flags =
32154c2d46bSAlexander Kozyrev 				_mm_extract_epi32(ol_flags, 1);
32254c2d46bSAlexander Kozyrev 			elts[pos + 2]->ol_flags =
32354c2d46bSAlexander Kozyrev 				_mm_extract_epi32(ol_flags, 2);
32454c2d46bSAlexander Kozyrev 			elts[pos + 3]->ol_flags =
32554c2d46bSAlexander Kozyrev 				_mm_extract_epi32(ol_flags, 3);
32654c2d46bSAlexander Kozyrev 			elts[pos]->hash.rss = 0;
32754c2d46bSAlexander Kozyrev 			elts[pos + 1]->hash.rss = 0;
32854c2d46bSAlexander Kozyrev 			elts[pos + 2]->hash.rss = 0;
32954c2d46bSAlexander Kozyrev 			elts[pos + 3]->hash.rss = 0;
3303c2ddbd4SYongseok Koh 		}
3316c55b622SAlexander Kozyrev 		if (rxq->dynf_meta) {
3326c55b622SAlexander Kozyrev 			int32_t offs = rxq->flow_meta_offset;
3336c55b622SAlexander Kozyrev 			const uint32_t meta =
3346c55b622SAlexander Kozyrev 				*RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *);
33570fa0b4eSViacheslav Ovsiienko 
33670fa0b4eSViacheslav Ovsiienko 			/* Check if title packet has valid metadata. */
33770fa0b4eSViacheslav Ovsiienko 			if (meta) {
3380c555915SAlexander Kozyrev 				MLX5_ASSERT(t_pkt->ol_flags &
3390c555915SAlexander Kozyrev 					    rxq->flow_meta_mask);
3406c55b622SAlexander Kozyrev 				*RTE_MBUF_DYNFIELD(elts[pos], offs,
3416c55b622SAlexander Kozyrev 							uint32_t *) = meta;
3426c55b622SAlexander Kozyrev 				*RTE_MBUF_DYNFIELD(elts[pos + 1], offs,
3436c55b622SAlexander Kozyrev 							uint32_t *) = meta;
3446c55b622SAlexander Kozyrev 				*RTE_MBUF_DYNFIELD(elts[pos + 2], offs,
3456c55b622SAlexander Kozyrev 							uint32_t *) = meta;
3466c55b622SAlexander Kozyrev 				*RTE_MBUF_DYNFIELD(elts[pos + 3], offs,
3476c55b622SAlexander Kozyrev 							uint32_t *) = meta;
34870fa0b4eSViacheslav Ovsiienko 			}
34970fa0b4eSViacheslav Ovsiienko 		}
3503c2ddbd4SYongseok Koh 		pos += MLX5_VPMD_DESCS_PER_LOOP;
3513c2ddbd4SYongseok Koh 		/* Move to next CQE and invalidate consumed CQEs. */
352fc3e1798SAlexander Kozyrev 		if (!rxq->cqe_comp_layout) {
3533c2ddbd4SYongseok Koh 			if (!(pos & 0x7) && pos < mcqe_n) {
3546f52bd33SAlexander Kozyrev 				if (pos + 8 < mcqe_n)
355*43fd3624SAndre Muezerie 					rte_prefetch0((volatile void *)(cq + pos + 8));
356*43fd3624SAndre Muezerie 				mcq = (volatile struct mlx5_mini_cqe8 *)(cq + pos);
3573c2ddbd4SYongseok Koh 				for (i = 0; i < 8; ++i)
3583c2ddbd4SYongseok Koh 					cq[inv++].op_own = MLX5_CQE_INVALIDATE;
3593c2ddbd4SYongseok Koh 			}
3603c2ddbd4SYongseok Koh 		}
361fc3e1798SAlexander Kozyrev 	}
36290ec9b0dSAlexander Kozyrev 	if (rxq->cqe_comp_layout && keep) {
363fc3e1798SAlexander Kozyrev 		int ret;
364fc3e1798SAlexander Kozyrev 		/* Keep unzipping if the next CQE is the miniCQE array. */
365fc3e1798SAlexander Kozyrev 		cq = &cq[mcqe_n];
366fc3e1798SAlexander Kozyrev 		ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci + pkts_n);
367fc3e1798SAlexander Kozyrev 		if (ret == MLX5_CQE_STATUS_SW_OWN &&
368fc3e1798SAlexander Kozyrev 		    MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED) {
369fc3e1798SAlexander Kozyrev 			pos = 0;
370fc3e1798SAlexander Kozyrev 			elts = &elts[mcqe_n];
371*43fd3624SAndre Muezerie 			mcq = (volatile struct mlx5_mini_cqe8 *)cq;
372fc3e1798SAlexander Kozyrev 			mcqe_n = MLX5_CQE_NUM_MINIS(cq->op_own) + 1;
373fc3e1798SAlexander Kozyrev 			pkts_n += mcqe_n;
374fc3e1798SAlexander Kozyrev 			goto cycle;
375fc3e1798SAlexander Kozyrev 		}
376fc3e1798SAlexander Kozyrev 	} else {
3773c2ddbd4SYongseok Koh 		/* Invalidate the rest of CQEs. */
378fc3e1798SAlexander Kozyrev 		for (; inv < pkts_n; ++inv)
3793c2ddbd4SYongseok Koh 			cq[inv].op_own = MLX5_CQE_INVALIDATE;
380fc3e1798SAlexander Kozyrev 	}
3813c2ddbd4SYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
382fc3e1798SAlexander Kozyrev 	rxq->stats.ipackets += pkts_n;
3833c2ddbd4SYongseok Koh 	rxq->stats.ibytes += rcvd_byte;
3843c2ddbd4SYongseok Koh #endif
385fc3e1798SAlexander Kozyrev 	return pkts_n;
3863c2ddbd4SYongseok Koh }
3873c2ddbd4SYongseok Koh 
3883c2ddbd4SYongseok Koh /**
3893c2ddbd4SYongseok Koh  * Calculate packet type and offload flag for mbuf and store it.
3903c2ddbd4SYongseok Koh  *
3913c2ddbd4SYongseok Koh  * @param rxq
3923c2ddbd4SYongseok Koh  *   Pointer to RX queue structure.
3933c2ddbd4SYongseok Koh  * @param cqes[4]
3943c2ddbd4SYongseok Koh  *   Array of four 16bytes completions extracted from the original completion
3953c2ddbd4SYongseok Koh  *   descriptor.
3963c2ddbd4SYongseok Koh  * @param op_err
3973c2ddbd4SYongseok Koh  *   Opcode vector having responder error status. Each field is 4B.
3983c2ddbd4SYongseok Koh  * @param pkts
3993c2ddbd4SYongseok Koh  *   Pointer to array of packets to be filled.
4003c2ddbd4SYongseok Koh  */
4013c2ddbd4SYongseok Koh static inline void
4023c2ddbd4SYongseok Koh rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
4033c2ddbd4SYongseok Koh 			 __m128i op_err, struct rte_mbuf **pkts)
4043c2ddbd4SYongseok Koh {
4053c2ddbd4SYongseok Koh 	__m128i pinfo0, pinfo1;
4063c2ddbd4SYongseok Koh 	__m128i pinfo, ptype;
407daa02b5cSOlivier Matz 	__m128i ol_flags = _mm_set1_epi32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH |
40804840ecbSThomas Monjalon 					  rxq->hw_timestamp * rxq->timestamp_rx_flag);
4093c2ddbd4SYongseok Koh 	__m128i cv_flags;
4103c2ddbd4SYongseok Koh 	const __m128i zero = _mm_setzero_si128();
41154c2d46bSAlexander Kozyrev 	const __m128i ptype_mask = _mm_set1_epi32(0xfd06);
41254c2d46bSAlexander Kozyrev 	const __m128i ptype_ol_mask = _mm_set1_epi32(0x106);
41354c2d46bSAlexander Kozyrev 	const __m128i pinfo_mask = _mm_set1_epi32(0x3);
4143c2ddbd4SYongseok Koh 	const __m128i cv_flag_sel =
4153c2ddbd4SYongseok Koh 		_mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0,
416daa02b5cSOlivier Matz 			     (uint8_t)((RTE_MBUF_F_RX_IP_CKSUM_GOOD |
417daa02b5cSOlivier Matz 					RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
4183c2ddbd4SYongseok Koh 			     0,
419daa02b5cSOlivier Matz 			     (uint8_t)(RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1),
4203c2ddbd4SYongseok Koh 			     0,
421daa02b5cSOlivier Matz 			     (uint8_t)(RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1),
422daa02b5cSOlivier Matz 			     (uint8_t)(RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED),
4233c2ddbd4SYongseok Koh 			     0);
4243c2ddbd4SYongseok Koh 	const __m128i cv_mask =
425daa02b5cSOlivier Matz 		_mm_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
426daa02b5cSOlivier Matz 			       RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
4273c2ddbd4SYongseok Koh 	const __m128i mbuf_init =
428bdb8e5b1SViacheslav Ovsiienko 		_mm_load_si128((__m128i *)&rxq->mbuf_initializer);
4293c2ddbd4SYongseok Koh 	__m128i rearm0, rearm1, rearm2, rearm3;
4303cc08bc6SXueming Li 	uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
4313c2ddbd4SYongseok Koh 
4323c2ddbd4SYongseok Koh 	/* Extract pkt_info field. */
4333c2ddbd4SYongseok Koh 	pinfo0 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
4343c2ddbd4SYongseok Koh 	pinfo1 = _mm_unpacklo_epi32(cqes[2], cqes[3]);
4353c2ddbd4SYongseok Koh 	pinfo = _mm_unpacklo_epi64(pinfo0, pinfo1);
4363c2ddbd4SYongseok Koh 	/* Extract hdr_type_etc field. */
4373c2ddbd4SYongseok Koh 	pinfo0 = _mm_unpackhi_epi32(cqes[0], cqes[1]);
4383c2ddbd4SYongseok Koh 	pinfo1 = _mm_unpackhi_epi32(cqes[2], cqes[3]);
4393c2ddbd4SYongseok Koh 	ptype = _mm_unpacklo_epi64(pinfo0, pinfo1);
4403c2ddbd4SYongseok Koh 	if (rxq->mark) {
4411ded2623SAlexander Kozyrev 		const __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);
442daa02b5cSOlivier Matz 		const __m128i fdir_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
443fca8cba4SDavid Marchand 		__m128i fdir_id_flags = _mm_set1_epi32(rxq->mark_flag);
4443c2ddbd4SYongseok Koh 		__m128i flow_tag, invalid_mask;
4453c2ddbd4SYongseok Koh 
4463c2ddbd4SYongseok Koh 		flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask);
447daa02b5cSOlivier Matz 		/* Check if flow tag is non-zero then set RTE_MBUF_F_RX_FDIR. */
4483c2ddbd4SYongseok Koh 		invalid_mask = _mm_cmpeq_epi32(flow_tag, zero);
4493c2ddbd4SYongseok Koh 		ol_flags = _mm_or_si128(ol_flags,
4503c2ddbd4SYongseok Koh 					_mm_andnot_si128(invalid_mask,
4513c2ddbd4SYongseok Koh 							 fdir_flags));
4523c2ddbd4SYongseok Koh 		/* Mask out invalid entries. */
4536a59d647SYongseok Koh 		fdir_id_flags = _mm_andnot_si128(invalid_mask, fdir_id_flags);
4543c2ddbd4SYongseok Koh 		/* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
4553c2ddbd4SYongseok Koh 		ol_flags = _mm_or_si128(ol_flags,
4563c2ddbd4SYongseok Koh 					_mm_andnot_si128(
4573c2ddbd4SYongseok Koh 						_mm_cmpeq_epi32(flow_tag,
4583c2ddbd4SYongseok Koh 								pinfo_ft_mask),
4593c2ddbd4SYongseok Koh 						fdir_id_flags));
4603c2ddbd4SYongseok Koh 	}
4613c2ddbd4SYongseok Koh 	/*
4623c2ddbd4SYongseok Koh 	 * Merge the two fields to generate the following:
4633c2ddbd4SYongseok Koh 	 * bit[1]     = l3_ok
4643c2ddbd4SYongseok Koh 	 * bit[2]     = l4_ok
4653c2ddbd4SYongseok Koh 	 * bit[8]     = cv
4663c2ddbd4SYongseok Koh 	 * bit[11:10] = l3_hdr_type
4673c2ddbd4SYongseok Koh 	 * bit[14:12] = l4_hdr_type
4683c2ddbd4SYongseok Koh 	 * bit[15]    = ip_frag
4693c2ddbd4SYongseok Koh 	 * bit[16]    = tunneled
4703c2ddbd4SYongseok Koh 	 * bit[17]    = outer_l3_type
4713c2ddbd4SYongseok Koh 	 */
4723c2ddbd4SYongseok Koh 	ptype = _mm_and_si128(ptype, ptype_mask);
4733c2ddbd4SYongseok Koh 	pinfo = _mm_and_si128(pinfo, pinfo_mask);
4743c2ddbd4SYongseok Koh 	pinfo = _mm_slli_epi32(pinfo, 16);
4753c2ddbd4SYongseok Koh 	/* Make pinfo has merged fields for ol_flags calculation. */
4763c2ddbd4SYongseok Koh 	pinfo = _mm_or_si128(ptype, pinfo);
4773c2ddbd4SYongseok Koh 	ptype = _mm_srli_epi32(pinfo, 10);
4783c2ddbd4SYongseok Koh 	ptype = _mm_packs_epi32(ptype, zero);
4793c2ddbd4SYongseok Koh 	/* Errored packets will have RTE_PTYPE_ALL_MASK. */
4803c2ddbd4SYongseok Koh 	op_err = _mm_srli_epi16(op_err, 8);
4813c2ddbd4SYongseok Koh 	ptype = _mm_or_si128(ptype, op_err);
4823cc08bc6SXueming Li 	pt_idx0 = _mm_extract_epi8(ptype, 0);
4833cc08bc6SXueming Li 	pt_idx1 = _mm_extract_epi8(ptype, 2);
4843cc08bc6SXueming Li 	pt_idx2 = _mm_extract_epi8(ptype, 4);
4853cc08bc6SXueming Li 	pt_idx3 = _mm_extract_epi8(ptype, 6);
4863cc08bc6SXueming Li 	pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
4873cc08bc6SXueming Li 			       !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
4883cc08bc6SXueming Li 	pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
4893cc08bc6SXueming Li 			       !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
4903cc08bc6SXueming Li 	pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
4913cc08bc6SXueming Li 			       !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
4923cc08bc6SXueming Li 	pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
4933cc08bc6SXueming Li 			       !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
4943c2ddbd4SYongseok Koh 	/* Fill flags for checksum and VLAN. */
4953c2ddbd4SYongseok Koh 	pinfo = _mm_and_si128(pinfo, ptype_ol_mask);
4963c2ddbd4SYongseok Koh 	pinfo = _mm_shuffle_epi8(cv_flag_sel, pinfo);
4973c2ddbd4SYongseok Koh 	/* Locate checksum flags at byte[2:1] and merge with VLAN flags. */
4983c2ddbd4SYongseok Koh 	cv_flags = _mm_slli_epi32(pinfo, 9);
4993c2ddbd4SYongseok Koh 	cv_flags = _mm_or_si128(pinfo, cv_flags);
5003c2ddbd4SYongseok Koh 	/* Move back flags to start from byte[0]. */
5013c2ddbd4SYongseok Koh 	cv_flags = _mm_srli_epi32(cv_flags, 8);
5023c2ddbd4SYongseok Koh 	/* Mask out garbage bits. */
5033c2ddbd4SYongseok Koh 	cv_flags = _mm_and_si128(cv_flags, cv_mask);
5043c2ddbd4SYongseok Koh 	/* Merge to ol_flags. */
5053c2ddbd4SYongseok Koh 	ol_flags = _mm_or_si128(ol_flags, cv_flags);
5063c2ddbd4SYongseok Koh 	/* Merge mbuf_init and ol_flags. */
5073c2ddbd4SYongseok Koh 	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 8), 0x30);
5083c2ddbd4SYongseok Koh 	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 4), 0x30);
5093c2ddbd4SYongseok Koh 	rearm2 = _mm_blend_epi16(mbuf_init, ol_flags, 0x30);
5103c2ddbd4SYongseok Koh 	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(ol_flags, 4), 0x30);
5113c2ddbd4SYongseok Koh 	/* Write 8B rearm_data and 8B ol_flags. */
5123c2ddbd4SYongseok Koh 	_mm_store_si128((__m128i *)&pkts[0]->rearm_data, rearm0);
5133c2ddbd4SYongseok Koh 	_mm_store_si128((__m128i *)&pkts[1]->rearm_data, rearm1);
5143c2ddbd4SYongseok Koh 	_mm_store_si128((__m128i *)&pkts[2]->rearm_data, rearm2);
5153c2ddbd4SYongseok Koh 	_mm_store_si128((__m128i *)&pkts[3]->rearm_data, rearm3);
5163c2ddbd4SYongseok Koh }
5173c2ddbd4SYongseok Koh 
5183c2ddbd4SYongseok Koh /**
5191ded2623SAlexander Kozyrev  * Process a non-compressed completion and fill in mbufs in RX SW ring
5201ded2623SAlexander Kozyrev  * with data extracted from the title completion descriptor.
5213c2ddbd4SYongseok Koh  *
5223c2ddbd4SYongseok Koh  * @param rxq
5233c2ddbd4SYongseok Koh  *   Pointer to RX queue structure.
5241ded2623SAlexander Kozyrev  * @param cq
5251ded2623SAlexander Kozyrev  *   Pointer to completion array having a non-compressed completion at first.
5261ded2623SAlexander Kozyrev  * @param elts
5271ded2623SAlexander Kozyrev  *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
5281ded2623SAlexander Kozyrev  *   the title completion descriptor to be copied to the rest of mbufs.
5293c2ddbd4SYongseok Koh  * @param[out] pkts
5303c2ddbd4SYongseok Koh  *   Array to store received packets.
5313c2ddbd4SYongseok Koh  * @param pkts_n
5323c2ddbd4SYongseok Koh  *   Maximum number of packets in array.
533d27fb0deSYongseok Koh  * @param[out] err
534d27fb0deSYongseok Koh  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
535d27fb0deSYongseok Koh  *   packet to handle.
5361ded2623SAlexander Kozyrev  * @param[out] comp
5371ded2623SAlexander Kozyrev  *   Pointer to a index. Set it to the first compressed completion if any.
5383c2ddbd4SYongseok Koh  *
5393c2ddbd4SYongseok Koh  * @return
5401ded2623SAlexander Kozyrev  *   Number of CQEs successfully processed.
5413c2ddbd4SYongseok Koh  */
5423c2ddbd4SYongseok Koh static inline uint16_t
5431ded2623SAlexander Kozyrev rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
5441ded2623SAlexander Kozyrev 		 struct rte_mbuf **elts, struct rte_mbuf **pkts,
5451ded2623SAlexander Kozyrev 		 uint16_t pkts_n, uint64_t *err, uint64_t *comp)
5463c2ddbd4SYongseok Koh {
5473c2ddbd4SYongseok Koh 	const uint16_t q_n = 1 << rxq->cqe_n;
5483c2ddbd4SYongseok Koh 	const uint16_t q_mask = q_n - 1;
5491f903ebeSAlexander Kozyrev 	unsigned int pos, adj;
5501ded2623SAlexander Kozyrev 	uint64_t n = 0;
5513c2ddbd4SYongseok Koh 	uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
5523c2ddbd4SYongseok Koh 	uint16_t nocmp_n = 0;
553fc3e1798SAlexander Kozyrev 	const uint8_t vic = rxq->cq_ci >> rxq->cqe_n;
554fc3e1798SAlexander Kozyrev 	const uint8_t own = !(rxq->cq_ci & (q_mask + 1));
555fc3e1798SAlexander Kozyrev 	const __m128i vic_check = _mm_set1_epi64x(0x00ff000000ff0000LL);
5561ded2623SAlexander Kozyrev 	const __m128i owner_check =	_mm_set1_epi64x(0x0100000001000000LL);
5571ded2623SAlexander Kozyrev 	const __m128i opcode_check = _mm_set1_epi64x(0xf0000000f0000000LL);
5581ded2623SAlexander Kozyrev 	const __m128i format_check = _mm_set1_epi64x(0x0c0000000c000000LL);
5591ded2623SAlexander Kozyrev 	const __m128i resp_err_check = _mm_set1_epi64x(0xe0000000e0000000LL);
5603c2ddbd4SYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
5613c2ddbd4SYongseok Koh 	uint32_t rcvd_byte = 0;
5623c2ddbd4SYongseok Koh 	/* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
5633c2ddbd4SYongseok Koh 	const __m128i len_shuf_mask =
5643c2ddbd4SYongseok Koh 		_mm_set_epi8(-1, -1, -1, -1,
5653c2ddbd4SYongseok Koh 			     -1, -1, -1, -1,
5663c2ddbd4SYongseok Koh 			     12, 13,  8,  9,
5673c2ddbd4SYongseok Koh 			      4,  5,  0,  1);
5683c2ddbd4SYongseok Koh #endif
569fc3e1798SAlexander Kozyrev 	const __m128i validity =
570fc3e1798SAlexander Kozyrev 		_mm_set_epi8(0, vic, 0, 0,
571fc3e1798SAlexander Kozyrev 			     0, vic, 0, 0,
572fc3e1798SAlexander Kozyrev 			     0, vic, 0, 0,
573fc3e1798SAlexander Kozyrev 			     0, vic, 0, 0);
574fc3e1798SAlexander Kozyrev 	const __m128i ownership =
575fc3e1798SAlexander Kozyrev 		_mm_set_epi8(own, 0, 0, 0,
576fc3e1798SAlexander Kozyrev 			     own, 0, 0, 0,
577fc3e1798SAlexander Kozyrev 			     own, 0, 0, 0,
578fc3e1798SAlexander Kozyrev 			     own, 0, 0, 0);
5793c2ddbd4SYongseok Koh 	/* Mask to shuffle from extracted CQE to mbuf. */
5803c2ddbd4SYongseok Koh 	const __m128i shuf_mask =
5813c2ddbd4SYongseok Koh 		_mm_set_epi8(-1,  3,  2,  1, /* fdir.hi */
5823c2ddbd4SYongseok Koh 			     12, 13, 14, 15, /* rss, bswap32 */
5833c2ddbd4SYongseok Koh 			     10, 11,         /* vlan_tci, bswap16 */
5843c2ddbd4SYongseok Koh 			      4,  5,         /* data_len, bswap16 */
5853c2ddbd4SYongseok Koh 			     -1, -1,         /* zero out 2nd half of pkt_len */
5863c2ddbd4SYongseok Koh 			      4,  5          /* pkt_len, bswap16 */);
5873c2ddbd4SYongseok Koh 	/* Mask to blend from the last Qword to the first DQword. */
5883c2ddbd4SYongseok Koh 	const __m128i blend_mask =
5893c2ddbd4SYongseok Koh 		_mm_set_epi8(-1, -1, -1, -1,
5903c2ddbd4SYongseok Koh 			     -1, -1, -1, -1,
5913c2ddbd4SYongseok Koh 			      0,  0,  0,  0,
5923c2ddbd4SYongseok Koh 			      0,  0,  0, -1);
5933c2ddbd4SYongseok Koh 	const __m128i zero = _mm_setzero_si128();
5943c2ddbd4SYongseok Koh 	const __m128i ones = _mm_cmpeq_epi32(zero, zero);
5953c2ddbd4SYongseok Koh 	const __m128i crc_adj =
5963c2ddbd4SYongseok Koh 		_mm_set_epi16(0, 0, 0, 0, 0,
59735b2d13fSOlivier Matz 			      rxq->crc_present * RTE_ETHER_CRC_LEN,
5983c2ddbd4SYongseok Koh 			      0,
59935b2d13fSOlivier Matz 			      rxq->crc_present * RTE_ETHER_CRC_LEN);
6003c2ddbd4SYongseok Koh 	const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0);
6013c2ddbd4SYongseok Koh 	/*
6023c2ddbd4SYongseok Koh 	 * A. load first Qword (8bytes) in one loop.
6036e695b0cSSarosh Arif 	 * B. copy 4 mbuf pointers from elts ring to returning pkts.
6043c2ddbd4SYongseok Koh 	 * C. load remained CQE data and extract necessary fields.
6053c2ddbd4SYongseok Koh 	 *    Final 16bytes cqes[] extracted from original 64bytes CQE has the
6063c2ddbd4SYongseok Koh 	 *    following structure:
6073c2ddbd4SYongseok Koh 	 *        struct {
6083c2ddbd4SYongseok Koh 	 *          uint8_t  pkt_info;
6093c2ddbd4SYongseok Koh 	 *          uint8_t  flow_tag[3];
6103c2ddbd4SYongseok Koh 	 *          uint16_t byte_cnt;
611fc3e1798SAlexander Kozyrev 	 *          uint8_t  validity_iteration_count;
6123c2ddbd4SYongseok Koh 	 *          uint8_t  op_own;
6133c2ddbd4SYongseok Koh 	 *          uint16_t hdr_type_etc;
6143c2ddbd4SYongseok Koh 	 *          uint16_t vlan_info;
6153c2ddbd4SYongseok Koh 	 *          uint32_t rx_has_res;
6163c2ddbd4SYongseok Koh 	 *        } c;
6173c2ddbd4SYongseok Koh 	 * D. fill in mbuf.
6183c2ddbd4SYongseok Koh 	 * E. get valid CQEs.
6193c2ddbd4SYongseok Koh 	 * F. find compressed CQE.
6203c2ddbd4SYongseok Koh 	 */
6213c2ddbd4SYongseok Koh 	for (pos = 0;
6223c2ddbd4SYongseok Koh 	     pos < pkts_n;
6233c2ddbd4SYongseok Koh 	     pos += MLX5_VPMD_DESCS_PER_LOOP) {
6243c2ddbd4SYongseok Koh 		__m128i cqes[MLX5_VPMD_DESCS_PER_LOOP];
6253c2ddbd4SYongseok Koh 		__m128i cqe_tmp1, cqe_tmp2;
6263c2ddbd4SYongseok Koh 		__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
6273c2ddbd4SYongseok Koh 		__m128i op_own, op_own_tmp1, op_own_tmp2;
6283c2ddbd4SYongseok Koh 		__m128i opcode, owner_mask, invalid_mask;
6291f903ebeSAlexander Kozyrev 		__m128i comp_mask, mini_mask;
6303c2ddbd4SYongseok Koh 		__m128i mask;
6313c2ddbd4SYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
6323c2ddbd4SYongseok Koh 		__m128i byte_cnt;
6333c2ddbd4SYongseok Koh #endif
6343c2ddbd4SYongseok Koh 		__m128i mbp1, mbp2;
6353c2ddbd4SYongseok Koh 		__m128i p = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
6363c2ddbd4SYongseok Koh 		unsigned int p1, p2, p3;
6373c2ddbd4SYongseok Koh 
6383c2ddbd4SYongseok Koh 		/* Prefetch next 4 CQEs. */
6393c2ddbd4SYongseok Koh 		if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
6403c2ddbd4SYongseok Koh 			rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP]);
6413c2ddbd4SYongseok Koh 			rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 1]);
6423c2ddbd4SYongseok Koh 			rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 2]);
6433c2ddbd4SYongseok Koh 			rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 3]);
6443c2ddbd4SYongseok Koh 		}
6453c2ddbd4SYongseok Koh 		/* A.0 do not cross the end of CQ. */
6463c2ddbd4SYongseok Koh 		mask = _mm_set_epi64x(0, (pkts_n - pos) * sizeof(uint16_t) * 8);
6473c2ddbd4SYongseok Koh 		mask = _mm_sll_epi64(ones, mask);
6483c2ddbd4SYongseok Koh 		p = _mm_andnot_si128(mask, p);
6493c2ddbd4SYongseok Koh 		/* A.1 load cqes. */
6503c2ddbd4SYongseok Koh 		p3 = _mm_extract_epi16(p, 3);
651*43fd3624SAndre Muezerie 		cqes[3] = _mm_loadl_epi64(RTE_CAST_PTR(const __m128i *,
652*43fd3624SAndre Muezerie 					   &cq[pos + p3].sop_drop_qpn));
6533c2ddbd4SYongseok Koh 		rte_compiler_barrier();
6543c2ddbd4SYongseok Koh 		p2 = _mm_extract_epi16(p, 2);
655*43fd3624SAndre Muezerie 		cqes[2] = _mm_loadl_epi64(RTE_CAST_PTR(const __m128i *,
656*43fd3624SAndre Muezerie 					   &cq[pos + p2].sop_drop_qpn));
6573c2ddbd4SYongseok Koh 		rte_compiler_barrier();
6583c2ddbd4SYongseok Koh 		/* B.1 load mbuf pointers. */
6593c2ddbd4SYongseok Koh 		mbp1 = _mm_loadu_si128((__m128i *)&elts[pos]);
6603c2ddbd4SYongseok Koh 		mbp2 = _mm_loadu_si128((__m128i *)&elts[pos + 2]);
6613c2ddbd4SYongseok Koh 		/* A.1 load a block having op_own. */
6623c2ddbd4SYongseok Koh 		p1 = _mm_extract_epi16(p, 1);
663*43fd3624SAndre Muezerie 		cqes[1] = _mm_loadl_epi64(RTE_CAST_PTR(const __m128i *,
664*43fd3624SAndre Muezerie 					   &cq[pos + p1].sop_drop_qpn));
6653c2ddbd4SYongseok Koh 		rte_compiler_barrier();
666*43fd3624SAndre Muezerie 		cqes[0] = _mm_loadl_epi64(RTE_CAST_PTR(const __m128i *,
667*43fd3624SAndre Muezerie 					   &cq[pos].sop_drop_qpn));
6683c2ddbd4SYongseok Koh 		/* B.2 copy mbuf pointers. */
6693c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
6703c2ddbd4SYongseok Koh 		_mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
671f0f5d844SPhil Yang 		rte_io_rmb();
6723c2ddbd4SYongseok Koh 		/* C.1 load remained CQE data and extract necessary fields. */
673*43fd3624SAndre Muezerie 		cqe_tmp2 = _mm_load_si128(RTE_CAST_PTR(const __m128i *, &cq[pos + p3]));
674*43fd3624SAndre Muezerie 		cqe_tmp1 = _mm_load_si128(RTE_CAST_PTR(const __m128i *, &cq[pos + p2]));
6753c2ddbd4SYongseok Koh 		cqes[3] = _mm_blendv_epi8(cqes[3], cqe_tmp2, blend_mask);
6763c2ddbd4SYongseok Koh 		cqes[2] = _mm_blendv_epi8(cqes[2], cqe_tmp1, blend_mask);
677*43fd3624SAndre Muezerie 		cqe_tmp2 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, &cq[pos + p3].csum));
678*43fd3624SAndre Muezerie 		cqe_tmp1 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, &cq[pos + p2].csum));
6793c2ddbd4SYongseok Koh 		cqes[3] = _mm_blend_epi16(cqes[3], cqe_tmp2, 0x30);
6803c2ddbd4SYongseok Koh 		cqes[2] = _mm_blend_epi16(cqes[2], cqe_tmp1, 0x30);
681*43fd3624SAndre Muezerie 		cqe_tmp2 = _mm_loadl_epi64(RTE_CAST_PTR(const __m128i *, &cq[pos + p3].rsvd4[2]));
682*43fd3624SAndre Muezerie 		cqe_tmp1 = _mm_loadl_epi64(RTE_CAST_PTR(const __m128i *, &cq[pos + p2].rsvd4[2]));
6833c2ddbd4SYongseok Koh 		cqes[3] = _mm_blend_epi16(cqes[3], cqe_tmp2, 0x04);
6843c2ddbd4SYongseok Koh 		cqes[2] = _mm_blend_epi16(cqes[2], cqe_tmp1, 0x04);
6853c2ddbd4SYongseok Koh 		/* C.2 generate final structure for mbuf with swapping bytes. */
6863c2ddbd4SYongseok Koh 		pkt_mb3 = _mm_shuffle_epi8(cqes[3], shuf_mask);
6873c2ddbd4SYongseok Koh 		pkt_mb2 = _mm_shuffle_epi8(cqes[2], shuf_mask);
6883c2ddbd4SYongseok Koh 		/* C.3 adjust CRC length. */
6893c2ddbd4SYongseok Koh 		pkt_mb3 = _mm_sub_epi16(pkt_mb3, crc_adj);
6903c2ddbd4SYongseok Koh 		pkt_mb2 = _mm_sub_epi16(pkt_mb2, crc_adj);
6913c2ddbd4SYongseok Koh 		/* C.4 adjust flow mark. */
6923c2ddbd4SYongseok Koh 		pkt_mb3 = _mm_add_epi32(pkt_mb3, flow_mark_adj);
6933c2ddbd4SYongseok Koh 		pkt_mb2 = _mm_add_epi32(pkt_mb2, flow_mark_adj);
6943c2ddbd4SYongseok Koh 		/* D.1 fill in mbuf - rx_descriptor_fields1. */
6953c2ddbd4SYongseok Koh 		_mm_storeu_si128((void *)&pkts[pos + 3]->pkt_len, pkt_mb3);
6963c2ddbd4SYongseok Koh 		_mm_storeu_si128((void *)&pkts[pos + 2]->pkt_len, pkt_mb2);
6973c2ddbd4SYongseok Koh 		/* E.1 extract op_own field. */
6983c2ddbd4SYongseok Koh 		op_own_tmp2 = _mm_unpacklo_epi32(cqes[2], cqes[3]);
6993c2ddbd4SYongseok Koh 		/* C.1 load remained CQE data and extract necessary fields. */
700*43fd3624SAndre Muezerie 		cqe_tmp2 = _mm_load_si128(RTE_CAST_PTR(const __m128i *, &cq[pos + p1]));
701*43fd3624SAndre Muezerie 		cqe_tmp1 = _mm_load_si128(RTE_CAST_PTR(const __m128i *, &cq[pos]));
7023c2ddbd4SYongseok Koh 		cqes[1] = _mm_blendv_epi8(cqes[1], cqe_tmp2, blend_mask);
7033c2ddbd4SYongseok Koh 		cqes[0] = _mm_blendv_epi8(cqes[0], cqe_tmp1, blend_mask);
704*43fd3624SAndre Muezerie 		cqe_tmp2 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, &cq[pos + p1].csum));
705*43fd3624SAndre Muezerie 		cqe_tmp1 = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, &cq[pos].csum));
7063c2ddbd4SYongseok Koh 		cqes[1] = _mm_blend_epi16(cqes[1], cqe_tmp2, 0x30);
7073c2ddbd4SYongseok Koh 		cqes[0] = _mm_blend_epi16(cqes[0], cqe_tmp1, 0x30);
708*43fd3624SAndre Muezerie 		cqe_tmp2 = _mm_loadl_epi64(RTE_CAST_PTR(const __m128i *, &cq[pos + p1].rsvd4[2]));
709*43fd3624SAndre Muezerie 		cqe_tmp1 = _mm_loadl_epi64(RTE_CAST_PTR(const __m128i *, &cq[pos].rsvd4[2]));
7103c2ddbd4SYongseok Koh 		cqes[1] = _mm_blend_epi16(cqes[1], cqe_tmp2, 0x04);
7113c2ddbd4SYongseok Koh 		cqes[0] = _mm_blend_epi16(cqes[0], cqe_tmp1, 0x04);
7123c2ddbd4SYongseok Koh 		/* C.2 generate final structure for mbuf with swapping bytes. */
7133c2ddbd4SYongseok Koh 		pkt_mb1 = _mm_shuffle_epi8(cqes[1], shuf_mask);
7143c2ddbd4SYongseok Koh 		pkt_mb0 = _mm_shuffle_epi8(cqes[0], shuf_mask);
7153c2ddbd4SYongseok Koh 		/* C.3 adjust CRC length. */
7163c2ddbd4SYongseok Koh 		pkt_mb1 = _mm_sub_epi16(pkt_mb1, crc_adj);
7173c2ddbd4SYongseok Koh 		pkt_mb0 = _mm_sub_epi16(pkt_mb0, crc_adj);
7183c2ddbd4SYongseok Koh 		/* C.4 adjust flow mark. */
7193c2ddbd4SYongseok Koh 		pkt_mb1 = _mm_add_epi32(pkt_mb1, flow_mark_adj);
7203c2ddbd4SYongseok Koh 		pkt_mb0 = _mm_add_epi32(pkt_mb0, flow_mark_adj);
7213c2ddbd4SYongseok Koh 		/* E.1 extract op_own byte. */
7223c2ddbd4SYongseok Koh 		op_own_tmp1 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
7233c2ddbd4SYongseok Koh 		op_own = _mm_unpackhi_epi64(op_own_tmp1, op_own_tmp2);
7243c2ddbd4SYongseok Koh 		/* D.1 fill in mbuf - rx_descriptor_fields1. */
7253c2ddbd4SYongseok Koh 		_mm_storeu_si128((void *)&pkts[pos + 1]->pkt_len, pkt_mb1);
7263c2ddbd4SYongseok Koh 		_mm_storeu_si128((void *)&pkts[pos]->pkt_len, pkt_mb0);
727fc3e1798SAlexander Kozyrev 		/* E.2 mask out CQEs belonging to HW. */
728fc3e1798SAlexander Kozyrev 		if (rxq->cqe_comp_layout) {
729fc3e1798SAlexander Kozyrev 			owner_mask = _mm_and_si128(op_own, vic_check);
730fc3e1798SAlexander Kozyrev 			owner_mask = _mm_cmpeq_epi32(owner_mask, validity);
731fc3e1798SAlexander Kozyrev 			owner_mask = _mm_xor_si128(owner_mask, ones);
732fc3e1798SAlexander Kozyrev 		} else {
7333c2ddbd4SYongseok Koh 			owner_mask = _mm_and_si128(op_own, owner_check);
734fc3e1798SAlexander Kozyrev 			owner_mask = _mm_cmpeq_epi32(owner_mask, ownership);
735fc3e1798SAlexander Kozyrev 		}
7363c2ddbd4SYongseok Koh 		owner_mask = _mm_packs_epi32(owner_mask, zero);
7373c2ddbd4SYongseok Koh 		/* E.3 get mask for invalidated CQEs. */
7383c2ddbd4SYongseok Koh 		opcode = _mm_and_si128(op_own, opcode_check);
7393c2ddbd4SYongseok Koh 		invalid_mask = _mm_cmpeq_epi32(opcode_check, opcode);
7403c2ddbd4SYongseok Koh 		invalid_mask = _mm_packs_epi32(invalid_mask, zero);
7413c2ddbd4SYongseok Koh 		/* E.4 mask out beyond boundary. */
7423c2ddbd4SYongseok Koh 		invalid_mask = _mm_or_si128(invalid_mask, mask);
7433c2ddbd4SYongseok Koh 		/* E.5 merge invalid_mask with invalid owner. */
7443c2ddbd4SYongseok Koh 		invalid_mask = _mm_or_si128(invalid_mask, owner_mask);
7453c2ddbd4SYongseok Koh 		/* F.1 find compressed CQE format. */
7463c2ddbd4SYongseok Koh 		comp_mask = _mm_and_si128(op_own, format_check);
7473c2ddbd4SYongseok Koh 		comp_mask = _mm_cmpeq_epi32(comp_mask, format_check);
7483c2ddbd4SYongseok Koh 		comp_mask = _mm_packs_epi32(comp_mask, zero);
7493c2ddbd4SYongseok Koh 		/* F.2 mask out invalid entries. */
7503c2ddbd4SYongseok Koh 		comp_mask = _mm_andnot_si128(invalid_mask, comp_mask);
7513c2ddbd4SYongseok Koh 		comp_idx = _mm_cvtsi128_si64(comp_mask);
7523c2ddbd4SYongseok Koh 		/* F.3 get the first compressed CQE. */
7533c2ddbd4SYongseok Koh 		comp_idx = comp_idx ?
7543d4e27fdSDavid Marchand 				rte_ctz64(comp_idx) /
7553c2ddbd4SYongseok Koh 					(sizeof(uint16_t) * 8) :
7563c2ddbd4SYongseok Koh 				MLX5_VPMD_DESCS_PER_LOOP;
7573c2ddbd4SYongseok Koh 		/* E.6 mask out entries after the compressed CQE. */
7583c2ddbd4SYongseok Koh 		mask = _mm_set_epi64x(0, comp_idx * sizeof(uint16_t) * 8);
7593c2ddbd4SYongseok Koh 		mask = _mm_sll_epi64(ones, mask);
7603c2ddbd4SYongseok Koh 		invalid_mask = _mm_or_si128(invalid_mask, mask);
7613c2ddbd4SYongseok Koh 		/* E.7 count non-compressed valid CQEs. */
7623c2ddbd4SYongseok Koh 		n = _mm_cvtsi128_si64(invalid_mask);
7633d4e27fdSDavid Marchand 		n = n ? rte_ctz64(n) / (sizeof(uint16_t) * 8) :
7643c2ddbd4SYongseok Koh 			MLX5_VPMD_DESCS_PER_LOOP;
7653c2ddbd4SYongseok Koh 		nocmp_n += n;
7663c2ddbd4SYongseok Koh 		/* D.2 get the final invalid mask. */
7673c2ddbd4SYongseok Koh 		mask = _mm_set_epi64x(0, n * sizeof(uint16_t) * 8);
7683c2ddbd4SYongseok Koh 		mask = _mm_sll_epi64(ones, mask);
7693c2ddbd4SYongseok Koh 		invalid_mask = _mm_or_si128(invalid_mask, mask);
7703c2ddbd4SYongseok Koh 		/* D.3 check error in opcode. */
771fc3e1798SAlexander Kozyrev 		adj = (!rxq->cqe_comp_layout &&
772fc3e1798SAlexander Kozyrev 		       comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
7731f903ebeSAlexander Kozyrev 		mask = _mm_set_epi64x(0, adj * sizeof(uint16_t) * 8);
7741f903ebeSAlexander Kozyrev 		mini_mask = _mm_sll_epi64(invalid_mask, mask);
7753c2ddbd4SYongseok Koh 		opcode = _mm_cmpeq_epi32(resp_err_check, opcode);
7763c2ddbd4SYongseok Koh 		opcode = _mm_packs_epi32(opcode, zero);
7771f903ebeSAlexander Kozyrev 		opcode = _mm_andnot_si128(mini_mask, opcode);
7783c2ddbd4SYongseok Koh 		/* D.4 mark if any error is set */
779d27fb0deSYongseok Koh 		*err |= _mm_cvtsi128_si64(opcode);
7803c2ddbd4SYongseok Koh 		/* D.5 fill in mbuf - rearm_data and packet_type. */
7813c2ddbd4SYongseok Koh 		rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]);
78225ed2ebfSViacheslav Ovsiienko 		if (unlikely(rxq->shared)) {
78325ed2ebfSViacheslav Ovsiienko 			pkts[pos]->port = cq[pos].user_index_low;
7843638f431SAlexander Kozyrev 			pkts[pos + 1]->port = cq[pos + p1].user_index_low;
7853638f431SAlexander Kozyrev 			pkts[pos + 2]->port = cq[pos + p2].user_index_low;
7863638f431SAlexander Kozyrev 			pkts[pos + 3]->port = cq[pos + p3].user_index_low;
78725ed2ebfSViacheslav Ovsiienko 		}
78825ed2ebfSViacheslav Ovsiienko 		if (unlikely(rxq->hw_timestamp)) {
78904840ecbSThomas Monjalon 			int offset = rxq->timestamp_offset;
790a2854c4dSViacheslav Ovsiienko 			if (rxq->rt_timestamp) {
791a2854c4dSViacheslav Ovsiienko 				struct mlx5_dev_ctx_shared *sh = rxq->sh;
792a2854c4dSViacheslav Ovsiienko 				uint64_t ts;
793a2854c4dSViacheslav Ovsiienko 
794a2854c4dSViacheslav Ovsiienko 				ts = rte_be_to_cpu_64(cq[pos].timestamp);
79504840ecbSThomas Monjalon 				mlx5_timestamp_set(pkts[pos], offset,
79604840ecbSThomas Monjalon 					mlx5_txpp_convert_rx_ts(sh, ts));
797a2854c4dSViacheslav Ovsiienko 				ts = rte_be_to_cpu_64(cq[pos + p1].timestamp);
79804840ecbSThomas Monjalon 				mlx5_timestamp_set(pkts[pos + 1], offset,
79904840ecbSThomas Monjalon 					mlx5_txpp_convert_rx_ts(sh, ts));
800a2854c4dSViacheslav Ovsiienko 				ts = rte_be_to_cpu_64(cq[pos + p2].timestamp);
80104840ecbSThomas Monjalon 				mlx5_timestamp_set(pkts[pos + 2], offset,
80204840ecbSThomas Monjalon 					mlx5_txpp_convert_rx_ts(sh, ts));
803a2854c4dSViacheslav Ovsiienko 				ts = rte_be_to_cpu_64(cq[pos + p3].timestamp);
80404840ecbSThomas Monjalon 				mlx5_timestamp_set(pkts[pos + 3], offset,
80504840ecbSThomas Monjalon 					mlx5_txpp_convert_rx_ts(sh, ts));
806a2854c4dSViacheslav Ovsiienko 			} else {
80704840ecbSThomas Monjalon 				mlx5_timestamp_set(pkts[pos], offset,
80804840ecbSThomas Monjalon 					rte_be_to_cpu_64(cq[pos].timestamp));
80904840ecbSThomas Monjalon 				mlx5_timestamp_set(pkts[pos + 1], offset,
81004840ecbSThomas Monjalon 					rte_be_to_cpu_64(cq[pos + p1].timestamp));
81104840ecbSThomas Monjalon 				mlx5_timestamp_set(pkts[pos + 2], offset,
81204840ecbSThomas Monjalon 					rte_be_to_cpu_64(cq[pos + p2].timestamp));
81304840ecbSThomas Monjalon 				mlx5_timestamp_set(pkts[pos + 3], offset,
81404840ecbSThomas Monjalon 					rte_be_to_cpu_64(cq[pos + p3].timestamp));
815a2854c4dSViacheslav Ovsiienko 			}
81678c7406bSRaslan Darawsheh 		}
8176c55b622SAlexander Kozyrev 		if (rxq->dynf_meta) {
8187be78d02SJosh Soref 			/* This code is subject for further optimization. */
8196c55b622SAlexander Kozyrev 			int32_t offs = rxq->flow_meta_offset;
8204eefb20fSViacheslav Ovsiienko 			uint32_t mask = rxq->flow_meta_port_mask;
8216c55b622SAlexander Kozyrev 
8226c55b622SAlexander Kozyrev 			*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
8236d5735c1SAlexander Kozyrev 				rte_be_to_cpu_32
8246d5735c1SAlexander Kozyrev 				(cq[pos].flow_table_metadata) &	mask;
8256c55b622SAlexander Kozyrev 			*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
8266d5735c1SAlexander Kozyrev 				rte_be_to_cpu_32
8276d5735c1SAlexander Kozyrev 				(cq[pos + p1].flow_table_metadata) & mask;
8286c55b622SAlexander Kozyrev 			*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
8296d5735c1SAlexander Kozyrev 				rte_be_to_cpu_32
8306d5735c1SAlexander Kozyrev 				(cq[pos + p2].flow_table_metadata) & mask;
8316c55b622SAlexander Kozyrev 			*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
8326d5735c1SAlexander Kozyrev 				rte_be_to_cpu_32
8336d5735c1SAlexander Kozyrev 				(cq[pos + p3].flow_table_metadata) & mask;
8346c55b622SAlexander Kozyrev 			if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
8356c55b622SAlexander Kozyrev 				pkts[pos]->ol_flags |= rxq->flow_meta_mask;
8366c55b622SAlexander Kozyrev 			if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))
8376c55b622SAlexander Kozyrev 				pkts[pos + 1]->ol_flags |= rxq->flow_meta_mask;
8386c55b622SAlexander Kozyrev 			if (*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *))
8396c55b622SAlexander Kozyrev 				pkts[pos + 2]->ol_flags |= rxq->flow_meta_mask;
8406c55b622SAlexander Kozyrev 			if (*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *))
8416c55b622SAlexander Kozyrev 				pkts[pos + 3]->ol_flags |= rxq->flow_meta_mask;
842a18ac611SViacheslav Ovsiienko 		}
8433c2ddbd4SYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
8443c2ddbd4SYongseok Koh 		/* Add up received bytes count. */
8453c2ddbd4SYongseok Koh 		byte_cnt = _mm_shuffle_epi8(op_own, len_shuf_mask);
8463c2ddbd4SYongseok Koh 		byte_cnt = _mm_andnot_si128(invalid_mask, byte_cnt);
8473c2ddbd4SYongseok Koh 		byte_cnt = _mm_hadd_epi16(byte_cnt, zero);
8483c2ddbd4SYongseok Koh 		rcvd_byte += _mm_cvtsi128_si64(_mm_hadd_epi16(byte_cnt, zero));
8493c2ddbd4SYongseok Koh #endif
8503c2ddbd4SYongseok Koh 		/*
8513c2ddbd4SYongseok Koh 		 * Break the loop unless more valid CQE is expected, or if
8523c2ddbd4SYongseok Koh 		 * there's a compressed CQE.
8533c2ddbd4SYongseok Koh 		 */
8543c2ddbd4SYongseok Koh 		if (n != MLX5_VPMD_DESCS_PER_LOOP)
8553c2ddbd4SYongseok Koh 			break;
8563c2ddbd4SYongseok Koh 	}
8573c2ddbd4SYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS
8583c2ddbd4SYongseok Koh 	rxq->stats.ipackets += nocmp_n;
8593c2ddbd4SYongseok Koh 	rxq->stats.ibytes += rcvd_byte;
8603c2ddbd4SYongseok Koh #endif
8611ded2623SAlexander Kozyrev 	if (comp_idx == n)
8621ded2623SAlexander Kozyrev 		*comp = comp_idx;
8631ded2623SAlexander Kozyrev 	return nocmp_n;
8643c2ddbd4SYongseok Koh }
8653c2ddbd4SYongseok Koh 
8663c2ddbd4SYongseok Koh #endif /* RTE_PMD_MLX5_RXTX_VEC_SSE_H_ */
867