xref: /dpdk/drivers/net/octeon_ep/cnxk_ep_rx_sse.c (revision f2b1510f19d7bfd386d130fa38123d6e2152cf80)
1ab09499eSPavan Nikhilesh /* SPDX-License-Identifier: BSD-3-Clause
2ab09499eSPavan Nikhilesh  * Copyright(C) 2023 Marvell.
3ab09499eSPavan Nikhilesh  */
4ab09499eSPavan Nikhilesh 
5ab09499eSPavan Nikhilesh #include "cnxk_ep_rx.h"
6ab09499eSPavan Nikhilesh 
7ab09499eSPavan Nikhilesh static __rte_always_inline uint32_t
8ab09499eSPavan Nikhilesh hadd(__m128i x)
9ab09499eSPavan Nikhilesh {
10ab09499eSPavan Nikhilesh 	__m128i hi64 = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));
11ab09499eSPavan Nikhilesh 	__m128i sum64 = _mm_add_epi32(hi64, x);
12ab09499eSPavan Nikhilesh 	__m128i hi32 = _mm_shufflelo_epi16(sum64, _MM_SHUFFLE(1, 0, 3, 2));
13ab09499eSPavan Nikhilesh 	__m128i sum32 = _mm_add_epi32(sum64, hi32);
14ab09499eSPavan Nikhilesh 	return _mm_cvtsi128_si32(sum32);
15ab09499eSPavan Nikhilesh }
16ab09499eSPavan Nikhilesh 
17ab09499eSPavan Nikhilesh static __rte_always_inline void
18ab09499eSPavan Nikhilesh cnxk_ep_process_pkts_vec_sse(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq, uint16_t new_pkts)
19ab09499eSPavan Nikhilesh {
20ab09499eSPavan Nikhilesh 	struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
21bdfb48bfSPavan Nikhilesh 	uint32_t read_idx = droq->read_idx;
22ab09499eSPavan Nikhilesh 	struct rte_mbuf *m0, *m1, *m2, *m3;
23ab09499eSPavan Nikhilesh 	uint16_t nb_desc = droq->nb_desc;
24bdfb48bfSPavan Nikhilesh 	uint32_t idx0, idx1, idx2, idx3;
25ab09499eSPavan Nikhilesh 	uint16_t pkts = 0;
26bdfb48bfSPavan Nikhilesh 	__m128i bytes;
27ab09499eSPavan Nikhilesh 
28ab09499eSPavan Nikhilesh 	idx0 = read_idx;
29bdfb48bfSPavan Nikhilesh 	bytes = _mm_setzero_si128();
30ab09499eSPavan Nikhilesh 	while (pkts < new_pkts) {
31ab09499eSPavan Nikhilesh 		const __m128i bswap_mask = _mm_set_epi8(0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 8, 9, 0xFF,
32ab09499eSPavan Nikhilesh 							0xFF, 4, 5, 0xFF, 0xFF, 0, 1);
33ab09499eSPavan Nikhilesh 		const __m128i cpy_mask = _mm_set_epi8(0xFF, 0xFF, 9, 8, 0xFF, 0xFF, 9, 8, 0xFF,
34ab09499eSPavan Nikhilesh 						      0xFF, 1, 0, 0xFF, 0xFF, 1, 0);
35ab09499eSPavan Nikhilesh 		__m128i s01, s23;
36ab09499eSPavan Nikhilesh 
37ab09499eSPavan Nikhilesh 		idx1 = otx_ep_incr_index(idx0, 1, nb_desc);
38ab09499eSPavan Nikhilesh 		idx2 = otx_ep_incr_index(idx1, 1, nb_desc);
39ab09499eSPavan Nikhilesh 		idx3 = otx_ep_incr_index(idx2, 1, nb_desc);
40ab09499eSPavan Nikhilesh 
41ab09499eSPavan Nikhilesh 		m0 = recv_buf_list[idx0];
42ab09499eSPavan Nikhilesh 		m1 = recv_buf_list[idx1];
43ab09499eSPavan Nikhilesh 		m2 = recv_buf_list[idx2];
44ab09499eSPavan Nikhilesh 		m3 = recv_buf_list[idx3];
45ab09499eSPavan Nikhilesh 
46ab09499eSPavan Nikhilesh 		/* Load packet size big-endian. */
47bdfb48bfSPavan Nikhilesh 		s01 = _mm_set_epi32(cnxk_pktmbuf_mtod(m3, struct otx_ep_droq_info *)->length >> 48,
48bdfb48bfSPavan Nikhilesh 				    cnxk_pktmbuf_mtod(m1, struct otx_ep_droq_info *)->length >> 48,
49bdfb48bfSPavan Nikhilesh 				    cnxk_pktmbuf_mtod(m2, struct otx_ep_droq_info *)->length >> 48,
50bdfb48bfSPavan Nikhilesh 				    cnxk_pktmbuf_mtod(m0, struct otx_ep_droq_info *)->length >> 48);
51ab09499eSPavan Nikhilesh 		/* Convert to little-endian. */
52ab09499eSPavan Nikhilesh 		s01 = _mm_shuffle_epi8(s01, bswap_mask);
53bdfb48bfSPavan Nikhilesh 		/* Vertical add, consolidate outside loop */
54bdfb48bfSPavan Nikhilesh 		bytes = _mm_add_epi32(bytes, s01);
55*f2b1510fSStephen Hemminger 		/* Separate into packet length and data length. */
56ab09499eSPavan Nikhilesh 		s23 = _mm_shuffle_epi32(s01, _MM_SHUFFLE(3, 3, 1, 1));
57ab09499eSPavan Nikhilesh 		s01 = _mm_shuffle_epi8(s01, cpy_mask);
58ab09499eSPavan Nikhilesh 		s23 = _mm_shuffle_epi8(s23, cpy_mask);
59ab09499eSPavan Nikhilesh 
60ab09499eSPavan Nikhilesh 		/* Store packet length and data length to mbuf. */
61ab09499eSPavan Nikhilesh 		*(uint64_t *)&m0->pkt_len = ((rte_xmm_t)s01).u64[0];
62ab09499eSPavan Nikhilesh 		*(uint64_t *)&m1->pkt_len = ((rte_xmm_t)s01).u64[1];
63ab09499eSPavan Nikhilesh 		*(uint64_t *)&m2->pkt_len = ((rte_xmm_t)s23).u64[0];
64ab09499eSPavan Nikhilesh 		*(uint64_t *)&m3->pkt_len = ((rte_xmm_t)s23).u64[1];
65ab09499eSPavan Nikhilesh 
66ab09499eSPavan Nikhilesh 		/* Reset rearm data. */
67ab09499eSPavan Nikhilesh 		*(uint64_t *)&m0->rearm_data = droq->rearm_data;
68ab09499eSPavan Nikhilesh 		*(uint64_t *)&m1->rearm_data = droq->rearm_data;
69ab09499eSPavan Nikhilesh 		*(uint64_t *)&m2->rearm_data = droq->rearm_data;
70ab09499eSPavan Nikhilesh 		*(uint64_t *)&m3->rearm_data = droq->rearm_data;
71ab09499eSPavan Nikhilesh 
72ab09499eSPavan Nikhilesh 		rx_pkts[pkts++] = m0;
73ab09499eSPavan Nikhilesh 		rx_pkts[pkts++] = m1;
74ab09499eSPavan Nikhilesh 		rx_pkts[pkts++] = m2;
75ab09499eSPavan Nikhilesh 		rx_pkts[pkts++] = m3;
76ab09499eSPavan Nikhilesh 		idx0 = otx_ep_incr_index(idx3, 1, nb_desc);
77ab09499eSPavan Nikhilesh 	}
78ab09499eSPavan Nikhilesh 	droq->read_idx = idx0;
79ab09499eSPavan Nikhilesh 
80ab09499eSPavan Nikhilesh 	droq->refill_count += new_pkts;
81ab09499eSPavan Nikhilesh 	droq->pkts_pending -= new_pkts;
82ab09499eSPavan Nikhilesh 	/* Stats */
83ab09499eSPavan Nikhilesh 	droq->stats.pkts_received += new_pkts;
84bdfb48bfSPavan Nikhilesh 	droq->stats.bytes_received += hadd(bytes);
85ab09499eSPavan Nikhilesh }
86ab09499eSPavan Nikhilesh 
87ab09499eSPavan Nikhilesh uint16_t __rte_noinline __rte_hot
88ab09499eSPavan Nikhilesh cnxk_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
89ab09499eSPavan Nikhilesh {
90ab09499eSPavan Nikhilesh 	struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
91ab09499eSPavan Nikhilesh 	uint16_t new_pkts, vpkts;
92ab09499eSPavan Nikhilesh 
93bdfb48bfSPavan Nikhilesh 	/* Refill RX buffers */
94bdfb48bfSPavan Nikhilesh 	if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
95bdfb48bfSPavan Nikhilesh 		cnxk_ep_rx_refill(droq);
96bdfb48bfSPavan Nikhilesh 
97ab09499eSPavan Nikhilesh 	new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
98ab09499eSPavan Nikhilesh 	vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE);
99ab09499eSPavan Nikhilesh 	cnxk_ep_process_pkts_vec_sse(rx_pkts, droq, vpkts);
100ab09499eSPavan Nikhilesh 	cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);
101ab09499eSPavan Nikhilesh 
102ab09499eSPavan Nikhilesh 	return new_pkts;
103ab09499eSPavan Nikhilesh }
104ab09499eSPavan Nikhilesh 
105ab09499eSPavan Nikhilesh uint16_t __rte_noinline __rte_hot
106ab09499eSPavan Nikhilesh cn9k_ep_recv_pkts_sse(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
107ab09499eSPavan Nikhilesh {
108ab09499eSPavan Nikhilesh 	struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
109ab09499eSPavan Nikhilesh 	uint16_t new_pkts, vpkts;
110ab09499eSPavan Nikhilesh 
111ab09499eSPavan Nikhilesh 	/* Refill RX buffers */
112ab09499eSPavan Nikhilesh 	if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
113ab09499eSPavan Nikhilesh 		cnxk_ep_rx_refill(droq);
114ab09499eSPavan Nikhilesh 	} else {
115ab09499eSPavan Nikhilesh 		/* SDP output goes into DROP state when output doorbell count
116ab09499eSPavan Nikhilesh 		 * goes below drop count. When door bell count is written with
117ab09499eSPavan Nikhilesh 		 * a value greater than drop count SDP output should come out
118ab09499eSPavan Nikhilesh 		 * of DROP state. Due to a race condition this is not happening.
119ab09499eSPavan Nikhilesh 		 * Writing doorbell register with 0 again may make SDP output
120ab09499eSPavan Nikhilesh 		 * come out of this state.
121ab09499eSPavan Nikhilesh 		 */
122ab09499eSPavan Nikhilesh 
123ab09499eSPavan Nikhilesh 		rte_write32(0, droq->pkts_credit_reg);
124ab09499eSPavan Nikhilesh 	}
125ab09499eSPavan Nikhilesh 
126bdfb48bfSPavan Nikhilesh 	new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
127bdfb48bfSPavan Nikhilesh 	vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE);
128bdfb48bfSPavan Nikhilesh 	cnxk_ep_process_pkts_vec_sse(rx_pkts, droq, vpkts);
129bdfb48bfSPavan Nikhilesh 	cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);
130bdfb48bfSPavan Nikhilesh 
131ab09499eSPavan Nikhilesh 	return new_pkts;
132ab09499eSPavan Nikhilesh }
133