xref: /dpdk/drivers/net/octeon_ep/cnxk_ep_rx_neon.c (revision f2b1510f19d7bfd386d130fa38123d6e2152cf80)
1c9e4dc02SPavan Nikhilesh /* SPDX-License-Identifier: BSD-3-Clause
2c9e4dc02SPavan Nikhilesh  * Copyright(C) 2023 Marvell.
3c9e4dc02SPavan Nikhilesh  */
4c9e4dc02SPavan Nikhilesh 
5c9e4dc02SPavan Nikhilesh #include "cnxk_ep_rx.h"
6c9e4dc02SPavan Nikhilesh 
7c9e4dc02SPavan Nikhilesh static __rte_always_inline void
8c9e4dc02SPavan Nikhilesh cnxk_ep_process_pkts_vec_neon(struct rte_mbuf **rx_pkts, struct otx_ep_droq *droq,
9c9e4dc02SPavan Nikhilesh 			      uint16_t new_pkts)
10c9e4dc02SPavan Nikhilesh {
11c9e4dc02SPavan Nikhilesh 	const uint8x16_t mask0 = {0, 1, 0xff, 0xff, 0, 1, 0xff, 0xff,
12c9e4dc02SPavan Nikhilesh 				  4, 5, 0xff, 0xff, 4, 5, 0xff, 0xff};
13c9e4dc02SPavan Nikhilesh 	const uint8x16_t mask1 = {8,  9,  0xff, 0xff, 8,  9,  0xff, 0xff,
14c9e4dc02SPavan Nikhilesh 				  12, 13, 0xff, 0xff, 12, 13, 0xff, 0xff};
15c9e4dc02SPavan Nikhilesh 	struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
16c9e4dc02SPavan Nikhilesh 	uint32_t pidx0, pidx1, pidx2, pidx3;
17c9e4dc02SPavan Nikhilesh 	struct rte_mbuf *m0, *m1, *m2, *m3;
18c9e4dc02SPavan Nikhilesh 	uint32_t read_idx = droq->read_idx;
19c9e4dc02SPavan Nikhilesh 	uint16_t nb_desc = droq->nb_desc;
20c9e4dc02SPavan Nikhilesh 	uint32_t idx0, idx1, idx2, idx3;
21c9e4dc02SPavan Nikhilesh 	uint64x2_t s01, s23;
22c9e4dc02SPavan Nikhilesh 	uint32x4_t bytes;
23c9e4dc02SPavan Nikhilesh 	uint16_t pkts = 0;
24c9e4dc02SPavan Nikhilesh 
25c9e4dc02SPavan Nikhilesh 	idx0 = read_idx;
26c9e4dc02SPavan Nikhilesh 	s01 = vdupq_n_u64(0);
27c9e4dc02SPavan Nikhilesh 	bytes = vdupq_n_u32(0);
28c9e4dc02SPavan Nikhilesh 	while (pkts < new_pkts) {
29c9e4dc02SPavan Nikhilesh 		idx1 = otx_ep_incr_index(idx0, 1, nb_desc);
30c9e4dc02SPavan Nikhilesh 		idx2 = otx_ep_incr_index(idx1, 1, nb_desc);
31c9e4dc02SPavan Nikhilesh 		idx3 = otx_ep_incr_index(idx2, 1, nb_desc);
32c9e4dc02SPavan Nikhilesh 
33c9e4dc02SPavan Nikhilesh 		if (new_pkts - pkts > 4) {
34c9e4dc02SPavan Nikhilesh 			pidx0 = otx_ep_incr_index(idx3, 1, nb_desc);
35c9e4dc02SPavan Nikhilesh 			pidx1 = otx_ep_incr_index(pidx0, 1, nb_desc);
36c9e4dc02SPavan Nikhilesh 			pidx2 = otx_ep_incr_index(pidx1, 1, nb_desc);
37c9e4dc02SPavan Nikhilesh 			pidx3 = otx_ep_incr_index(pidx2, 1, nb_desc);
38c9e4dc02SPavan Nikhilesh 
39c9e4dc02SPavan Nikhilesh 			rte_prefetch_non_temporal(cnxk_pktmbuf_mtod(recv_buf_list[pidx0], void *));
40c9e4dc02SPavan Nikhilesh 			rte_prefetch_non_temporal(cnxk_pktmbuf_mtod(recv_buf_list[pidx1], void *));
41c9e4dc02SPavan Nikhilesh 			rte_prefetch_non_temporal(cnxk_pktmbuf_mtod(recv_buf_list[pidx2], void *));
42c9e4dc02SPavan Nikhilesh 			rte_prefetch_non_temporal(cnxk_pktmbuf_mtod(recv_buf_list[pidx3], void *));
43c9e4dc02SPavan Nikhilesh 		}
44c9e4dc02SPavan Nikhilesh 
45c9e4dc02SPavan Nikhilesh 		m0 = recv_buf_list[idx0];
46c9e4dc02SPavan Nikhilesh 		m1 = recv_buf_list[idx1];
47c9e4dc02SPavan Nikhilesh 		m2 = recv_buf_list[idx2];
48c9e4dc02SPavan Nikhilesh 		m3 = recv_buf_list[idx3];
49c9e4dc02SPavan Nikhilesh 
50c9e4dc02SPavan Nikhilesh 		/* Load packet size big-endian. */
51c9e4dc02SPavan Nikhilesh 		s01 = vsetq_lane_u32(cnxk_pktmbuf_mtod(m0, struct otx_ep_droq_info *)->length >> 48,
52c9e4dc02SPavan Nikhilesh 				     s01, 0);
53c9e4dc02SPavan Nikhilesh 		s01 = vsetq_lane_u32(cnxk_pktmbuf_mtod(m1, struct otx_ep_droq_info *)->length >> 48,
54c9e4dc02SPavan Nikhilesh 				     s01, 1);
55c9e4dc02SPavan Nikhilesh 		s01 = vsetq_lane_u32(cnxk_pktmbuf_mtod(m2, struct otx_ep_droq_info *)->length >> 48,
56c9e4dc02SPavan Nikhilesh 				     s01, 2);
57c9e4dc02SPavan Nikhilesh 		s01 = vsetq_lane_u32(cnxk_pktmbuf_mtod(m3, struct otx_ep_droq_info *)->length >> 48,
58c9e4dc02SPavan Nikhilesh 				     s01, 3);
59c9e4dc02SPavan Nikhilesh 		/* Convert to little-endian. */
60c9e4dc02SPavan Nikhilesh 		s01 = vrev16q_u8(s01);
61c9e4dc02SPavan Nikhilesh 
62c9e4dc02SPavan Nikhilesh 		/* Vertical add, consolidate outside the loop. */
63c9e4dc02SPavan Nikhilesh 		bytes += vaddq_u32(bytes, s01);
64*f2b1510fSStephen Hemminger 		/* Separate into packet length and data length. */
65c9e4dc02SPavan Nikhilesh 		s23 = vqtbl1q_u8(s01, mask1);
66c9e4dc02SPavan Nikhilesh 		s01 = vqtbl1q_u8(s01, mask0);
67c9e4dc02SPavan Nikhilesh 
68c9e4dc02SPavan Nikhilesh 		/* Store packet length and data length to mbuf. */
69c9e4dc02SPavan Nikhilesh 		*(uint64_t *)&m0->pkt_len = vgetq_lane_u64(s01, 0);
70c9e4dc02SPavan Nikhilesh 		*(uint64_t *)&m1->pkt_len = vgetq_lane_u64(s01, 1);
71c9e4dc02SPavan Nikhilesh 		*(uint64_t *)&m2->pkt_len = vgetq_lane_u64(s23, 0);
72c9e4dc02SPavan Nikhilesh 		*(uint64_t *)&m3->pkt_len = vgetq_lane_u64(s23, 1);
73c9e4dc02SPavan Nikhilesh 
74c9e4dc02SPavan Nikhilesh 		/* Reset rearm data. */
75c9e4dc02SPavan Nikhilesh 		*(uint64_t *)&m0->rearm_data = droq->rearm_data;
76c9e4dc02SPavan Nikhilesh 		*(uint64_t *)&m1->rearm_data = droq->rearm_data;
77c9e4dc02SPavan Nikhilesh 		*(uint64_t *)&m2->rearm_data = droq->rearm_data;
78c9e4dc02SPavan Nikhilesh 		*(uint64_t *)&m3->rearm_data = droq->rearm_data;
79c9e4dc02SPavan Nikhilesh 
80c9e4dc02SPavan Nikhilesh 		rx_pkts[pkts++] = m0;
81c9e4dc02SPavan Nikhilesh 		rx_pkts[pkts++] = m1;
82c9e4dc02SPavan Nikhilesh 		rx_pkts[pkts++] = m2;
83c9e4dc02SPavan Nikhilesh 		rx_pkts[pkts++] = m3;
84c9e4dc02SPavan Nikhilesh 		idx0 = otx_ep_incr_index(idx3, 1, nb_desc);
85c9e4dc02SPavan Nikhilesh 	}
86c9e4dc02SPavan Nikhilesh 	droq->read_idx = idx0;
87c9e4dc02SPavan Nikhilesh 
88c9e4dc02SPavan Nikhilesh 	droq->refill_count += new_pkts;
89c9e4dc02SPavan Nikhilesh 	droq->pkts_pending -= new_pkts;
90c9e4dc02SPavan Nikhilesh 	/* Stats */
91c9e4dc02SPavan Nikhilesh 	droq->stats.pkts_received += new_pkts;
92c9e4dc02SPavan Nikhilesh #if defined(RTE_ARCH_32)
93c9e4dc02SPavan Nikhilesh 	droq->stats.bytes_received += vgetq_lane_u32(bytes, 0);
94c9e4dc02SPavan Nikhilesh 	droq->stats.bytes_received += vgetq_lane_u32(bytes, 1);
95c9e4dc02SPavan Nikhilesh 	droq->stats.bytes_received += vgetq_lane_u32(bytes, 2);
96c9e4dc02SPavan Nikhilesh 	droq->stats.bytes_received += vgetq_lane_u32(bytes, 3);
97c9e4dc02SPavan Nikhilesh #else
98c9e4dc02SPavan Nikhilesh 	droq->stats.bytes_received += vaddvq_u32(bytes);
99c9e4dc02SPavan Nikhilesh #endif
100c9e4dc02SPavan Nikhilesh }
101c9e4dc02SPavan Nikhilesh 
102c9e4dc02SPavan Nikhilesh uint16_t __rte_noinline __rte_hot
103c9e4dc02SPavan Nikhilesh cnxk_ep_recv_pkts_neon(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
104c9e4dc02SPavan Nikhilesh {
105c9e4dc02SPavan Nikhilesh 	struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
106c9e4dc02SPavan Nikhilesh 	uint16_t new_pkts, vpkts;
107c9e4dc02SPavan Nikhilesh 
108c9e4dc02SPavan Nikhilesh 	/* Refill RX buffers */
109c9e4dc02SPavan Nikhilesh 	if (droq->refill_count >= DROQ_REFILL_THRESHOLD)
110c9e4dc02SPavan Nikhilesh 		cnxk_ep_rx_refill(droq);
111c9e4dc02SPavan Nikhilesh 
112c9e4dc02SPavan Nikhilesh 	new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
113c9e4dc02SPavan Nikhilesh 	vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE);
114c9e4dc02SPavan Nikhilesh 	cnxk_ep_process_pkts_vec_neon(rx_pkts, droq, vpkts);
115c9e4dc02SPavan Nikhilesh 	cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);
116c9e4dc02SPavan Nikhilesh 
117c9e4dc02SPavan Nikhilesh 	return new_pkts;
118c9e4dc02SPavan Nikhilesh }
119c9e4dc02SPavan Nikhilesh 
120c9e4dc02SPavan Nikhilesh uint16_t __rte_noinline __rte_hot
121c9e4dc02SPavan Nikhilesh cn9k_ep_recv_pkts_neon(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
122c9e4dc02SPavan Nikhilesh {
123c9e4dc02SPavan Nikhilesh 	struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
124c9e4dc02SPavan Nikhilesh 	uint16_t new_pkts, vpkts;
125c9e4dc02SPavan Nikhilesh 
126c9e4dc02SPavan Nikhilesh 	/* Refill RX buffers */
127c9e4dc02SPavan Nikhilesh 	if (droq->refill_count >= DROQ_REFILL_THRESHOLD) {
128c9e4dc02SPavan Nikhilesh 		cnxk_ep_rx_refill(droq);
129c9e4dc02SPavan Nikhilesh 	} else {
130c9e4dc02SPavan Nikhilesh 		/* SDP output goes into DROP state when output doorbell count
131c9e4dc02SPavan Nikhilesh 		 * goes below drop count. When door bell count is written with
132c9e4dc02SPavan Nikhilesh 		 * a value greater than drop count SDP output should come out
133c9e4dc02SPavan Nikhilesh 		 * of DROP state. Due to a race condition this is not happening.
134c9e4dc02SPavan Nikhilesh 		 * Writing doorbell register with 0 again may make SDP output
135c9e4dc02SPavan Nikhilesh 		 * come out of this state.
136c9e4dc02SPavan Nikhilesh 		 */
137c9e4dc02SPavan Nikhilesh 
138c9e4dc02SPavan Nikhilesh 		rte_write32(0, droq->pkts_credit_reg);
139c9e4dc02SPavan Nikhilesh 	}
140c9e4dc02SPavan Nikhilesh 
141c9e4dc02SPavan Nikhilesh 	new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
142c9e4dc02SPavan Nikhilesh 	vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_SSE);
143c9e4dc02SPavan Nikhilesh 	cnxk_ep_process_pkts_vec_neon(rx_pkts, droq, vpkts);
144c9e4dc02SPavan Nikhilesh 	cnxk_ep_process_pkts_scalar(&rx_pkts[vpkts], droq, new_pkts - vpkts);
145c9e4dc02SPavan Nikhilesh 
146c9e4dc02SPavan Nikhilesh 	return new_pkts;
147c9e4dc02SPavan Nikhilesh }
148