xref: /dpdk/lib/node/ip4_lookup_neon.h (revision be4c0cb4901fc0703786e0d3da4e0123306e4539)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(C) 2020 Marvell International Ltd.
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #ifndef __INCLUDE_IP4_LOOKUP_NEON_H__
699a2dd95SBruce Richardson #define __INCLUDE_IP4_LOOKUP_NEON_H__
799a2dd95SBruce Richardson 
899a2dd95SBruce Richardson /* ARM64 NEON */
999a2dd95SBruce Richardson static uint16_t
1099a2dd95SBruce Richardson ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
1199a2dd95SBruce Richardson 			void **objs, uint16_t nb_objs)
1299a2dd95SBruce Richardson {
1399a2dd95SBruce Richardson 	struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
1499a2dd95SBruce Richardson 	struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
1599a2dd95SBruce Richardson 	const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
1699a2dd95SBruce Richardson 	struct rte_ipv4_hdr *ipv4_hdr;
1799a2dd95SBruce Richardson 	void **to_next, **from;
1899a2dd95SBruce Richardson 	uint16_t last_spec = 0;
1999a2dd95SBruce Richardson 	rte_edge_t next_index;
2099a2dd95SBruce Richardson 	uint16_t n_left_from;
2199a2dd95SBruce Richardson 	uint16_t held = 0;
2299a2dd95SBruce Richardson 	uint32_t drop_nh;
2399a2dd95SBruce Richardson 	rte_xmm_t result;
2499a2dd95SBruce Richardson 	rte_xmm_t priv01;
2599a2dd95SBruce Richardson 	rte_xmm_t priv23;
2699a2dd95SBruce Richardson 	int32x4_t dip;
2799a2dd95SBruce Richardson 	int rc, i;
2899a2dd95SBruce Richardson 
2999a2dd95SBruce Richardson 	/* Speculative next */
3099a2dd95SBruce Richardson 	next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
3199a2dd95SBruce Richardson 	/* Drop node */
3299a2dd95SBruce Richardson 	drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
3399a2dd95SBruce Richardson 
3499a2dd95SBruce Richardson 	pkts = (struct rte_mbuf **)objs;
3599a2dd95SBruce Richardson 	from = objs;
3699a2dd95SBruce Richardson 	n_left_from = nb_objs;
3799a2dd95SBruce Richardson 
3899a2dd95SBruce Richardson 	for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += OBJS_PER_CLINE)
3999a2dd95SBruce Richardson 		rte_prefetch0(&objs[i]);
4099a2dd95SBruce Richardson 
4199a2dd95SBruce Richardson 	for (i = 0; i < 4 && i < n_left_from; i++)
4299a2dd95SBruce Richardson 		rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *,
4399a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr)));
4499a2dd95SBruce Richardson 
4599a2dd95SBruce Richardson 	dip = vdupq_n_s32(0);
4699a2dd95SBruce Richardson 	/* Get stream for the speculated next node */
4799a2dd95SBruce Richardson 	to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
4899a2dd95SBruce Richardson 	while (n_left_from >= 4) {
4999a2dd95SBruce Richardson #if RTE_GRAPH_BURST_SIZE > 64
5099a2dd95SBruce Richardson 		/* Prefetch next-next mbufs */
5199a2dd95SBruce Richardson 		if (likely(n_left_from > 11)) {
5299a2dd95SBruce Richardson 			rte_prefetch0(pkts[8]);
5399a2dd95SBruce Richardson 			rte_prefetch0(pkts[9]);
5499a2dd95SBruce Richardson 			rte_prefetch0(pkts[10]);
5599a2dd95SBruce Richardson 			rte_prefetch0(pkts[11]);
5699a2dd95SBruce Richardson 		}
5799a2dd95SBruce Richardson #endif
5899a2dd95SBruce Richardson 		/* Prefetch next mbuf data */
5999a2dd95SBruce Richardson 		if (likely(n_left_from > 7)) {
6099a2dd95SBruce Richardson 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *,
6199a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr)));
6299a2dd95SBruce Richardson 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *,
6399a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr)));
6499a2dd95SBruce Richardson 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *,
6599a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr)));
6699a2dd95SBruce Richardson 			rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *,
6799a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr)));
6899a2dd95SBruce Richardson 		}
6999a2dd95SBruce Richardson 
7099a2dd95SBruce Richardson 		mbuf0 = pkts[0];
7199a2dd95SBruce Richardson 		mbuf1 = pkts[1];
7299a2dd95SBruce Richardson 		mbuf2 = pkts[2];
7399a2dd95SBruce Richardson 		mbuf3 = pkts[3];
7499a2dd95SBruce Richardson 
7599a2dd95SBruce Richardson 		pkts += 4;
7699a2dd95SBruce Richardson 		n_left_from -= 4;
7799a2dd95SBruce Richardson 
7899a2dd95SBruce Richardson 		/* Extract DIP of mbuf0 */
7999a2dd95SBruce Richardson 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *,
8099a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr));
8199a2dd95SBruce Richardson 		dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 0);
8299a2dd95SBruce Richardson 		/* Extract cksum, ttl as ipv4 hdr is in cache */
8399a2dd95SBruce Richardson 		priv01.u16[1] = ipv4_hdr->time_to_live;
8499a2dd95SBruce Richardson 		priv01.u32[1] = ipv4_hdr->hdr_checksum;
8599a2dd95SBruce Richardson 
8699a2dd95SBruce Richardson 		/* Extract DIP of mbuf1 */
8799a2dd95SBruce Richardson 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv4_hdr *,
8899a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr));
8999a2dd95SBruce Richardson 		dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 1);
9099a2dd95SBruce Richardson 		/* Extract cksum, ttl as ipv4 hdr is in cache */
9199a2dd95SBruce Richardson 		priv01.u16[5] = ipv4_hdr->time_to_live;
9299a2dd95SBruce Richardson 		priv01.u32[3] = ipv4_hdr->hdr_checksum;
9399a2dd95SBruce Richardson 
9499a2dd95SBruce Richardson 		/* Extract DIP of mbuf2 */
9599a2dd95SBruce Richardson 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv4_hdr *,
9699a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr));
9799a2dd95SBruce Richardson 		dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 2);
9899a2dd95SBruce Richardson 		/* Extract cksum, ttl as ipv4 hdr is in cache */
9999a2dd95SBruce Richardson 		priv23.u16[1] = ipv4_hdr->time_to_live;
10099a2dd95SBruce Richardson 		priv23.u32[1] = ipv4_hdr->hdr_checksum;
10199a2dd95SBruce Richardson 
10299a2dd95SBruce Richardson 		/* Extract DIP of mbuf3 */
10399a2dd95SBruce Richardson 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv4_hdr *,
10499a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr));
10599a2dd95SBruce Richardson 		dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 3);
10699a2dd95SBruce Richardson 
10799a2dd95SBruce Richardson 		dip = vreinterpretq_s32_u8(
10899a2dd95SBruce Richardson 			vrev32q_u8(vreinterpretq_u8_s32(dip)));
10999a2dd95SBruce Richardson 		/* Extract cksum, ttl as ipv4 hdr is in cache */
11099a2dd95SBruce Richardson 		priv23.u16[5] = ipv4_hdr->time_to_live;
11199a2dd95SBruce Richardson 		priv23.u32[3] = ipv4_hdr->hdr_checksum;
11299a2dd95SBruce Richardson 
11399a2dd95SBruce Richardson 		/* Perform LPM lookup to get NH and next node */
11499a2dd95SBruce Richardson 		rte_lpm_lookupx4(lpm, dip, result.u32, drop_nh);
11599a2dd95SBruce Richardson 		priv01.u16[0] = result.u16[0];
11699a2dd95SBruce Richardson 		priv01.u16[4] = result.u16[2];
11799a2dd95SBruce Richardson 		priv23.u16[0] = result.u16[4];
11899a2dd95SBruce Richardson 		priv23.u16[4] = result.u16[6];
119*be4c0cb4SPavan Nikhilesh 		NODE_INCREMENT_XSTAT_ID(node, 0, result.u16[1] == (drop_nh >> 16), 1);
120*be4c0cb4SPavan Nikhilesh 		NODE_INCREMENT_XSTAT_ID(node, 0, result.u16[3] == (drop_nh >> 16), 1);
121*be4c0cb4SPavan Nikhilesh 		NODE_INCREMENT_XSTAT_ID(node, 0, result.u16[5] == (drop_nh >> 16), 1);
122*be4c0cb4SPavan Nikhilesh 		NODE_INCREMENT_XSTAT_ID(node, 0, result.u16[7] == (drop_nh >> 16), 1);
12399a2dd95SBruce Richardson 
12499a2dd95SBruce Richardson 		node_mbuf_priv1(mbuf0, dyn)->u = priv01.u64[0];
12599a2dd95SBruce Richardson 		node_mbuf_priv1(mbuf1, dyn)->u = priv01.u64[1];
12699a2dd95SBruce Richardson 		node_mbuf_priv1(mbuf2, dyn)->u = priv23.u64[0];
12799a2dd95SBruce Richardson 		node_mbuf_priv1(mbuf3, dyn)->u = priv23.u64[1];
12899a2dd95SBruce Richardson 
12999a2dd95SBruce Richardson 		/* Enqueue four to next node */
13099a2dd95SBruce Richardson 		rte_edge_t fix_spec = ((next_index == result.u16[1]) &&
13199a2dd95SBruce Richardson 				       (result.u16[1] == result.u16[3]) &&
13299a2dd95SBruce Richardson 				       (result.u16[3] == result.u16[5]) &&
13399a2dd95SBruce Richardson 				       (result.u16[5] == result.u16[7]));
13499a2dd95SBruce Richardson 
13599a2dd95SBruce Richardson 		if (unlikely(fix_spec == 0)) {
13699a2dd95SBruce Richardson 			/* Copy things successfully speculated till now */
13799a2dd95SBruce Richardson 			rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
13899a2dd95SBruce Richardson 			from += last_spec;
13999a2dd95SBruce Richardson 			to_next += last_spec;
14099a2dd95SBruce Richardson 			held += last_spec;
14199a2dd95SBruce Richardson 			last_spec = 0;
14299a2dd95SBruce Richardson 
14399a2dd95SBruce Richardson 			/* Next0 */
14499a2dd95SBruce Richardson 			if (next_index == result.u16[1]) {
14599a2dd95SBruce Richardson 				to_next[0] = from[0];
14699a2dd95SBruce Richardson 				to_next++;
14799a2dd95SBruce Richardson 				held++;
14899a2dd95SBruce Richardson 			} else {
14999a2dd95SBruce Richardson 				rte_node_enqueue_x1(graph, node, result.u16[1],
15099a2dd95SBruce Richardson 						    from[0]);
15199a2dd95SBruce Richardson 			}
15299a2dd95SBruce Richardson 
15399a2dd95SBruce Richardson 			/* Next1 */
15499a2dd95SBruce Richardson 			if (next_index == result.u16[3]) {
15599a2dd95SBruce Richardson 				to_next[0] = from[1];
15699a2dd95SBruce Richardson 				to_next++;
15799a2dd95SBruce Richardson 				held++;
15899a2dd95SBruce Richardson 			} else {
15999a2dd95SBruce Richardson 				rte_node_enqueue_x1(graph, node, result.u16[3],
16099a2dd95SBruce Richardson 						    from[1]);
16199a2dd95SBruce Richardson 			}
16299a2dd95SBruce Richardson 
16399a2dd95SBruce Richardson 			/* Next2 */
16499a2dd95SBruce Richardson 			if (next_index == result.u16[5]) {
16599a2dd95SBruce Richardson 				to_next[0] = from[2];
16699a2dd95SBruce Richardson 				to_next++;
16799a2dd95SBruce Richardson 				held++;
16899a2dd95SBruce Richardson 			} else {
16999a2dd95SBruce Richardson 				rte_node_enqueue_x1(graph, node, result.u16[5],
17099a2dd95SBruce Richardson 						    from[2]);
17199a2dd95SBruce Richardson 			}
17299a2dd95SBruce Richardson 
17399a2dd95SBruce Richardson 			/* Next3 */
17499a2dd95SBruce Richardson 			if (next_index == result.u16[7]) {
17599a2dd95SBruce Richardson 				to_next[0] = from[3];
17699a2dd95SBruce Richardson 				to_next++;
17799a2dd95SBruce Richardson 				held++;
17899a2dd95SBruce Richardson 			} else {
17999a2dd95SBruce Richardson 				rte_node_enqueue_x1(graph, node, result.u16[7],
18099a2dd95SBruce Richardson 						    from[3]);
18199a2dd95SBruce Richardson 			}
18299a2dd95SBruce Richardson 
18399a2dd95SBruce Richardson 			from += 4;
18499a2dd95SBruce Richardson 		} else {
18599a2dd95SBruce Richardson 			last_spec += 4;
18699a2dd95SBruce Richardson 		}
18799a2dd95SBruce Richardson 	}
18899a2dd95SBruce Richardson 
18999a2dd95SBruce Richardson 	while (n_left_from > 0) {
19099a2dd95SBruce Richardson 		uint32_t next_hop;
19199a2dd95SBruce Richardson 		uint16_t next0;
19299a2dd95SBruce Richardson 
19399a2dd95SBruce Richardson 		mbuf0 = pkts[0];
19499a2dd95SBruce Richardson 
19599a2dd95SBruce Richardson 		pkts += 1;
19699a2dd95SBruce Richardson 		n_left_from -= 1;
19799a2dd95SBruce Richardson 
19899a2dd95SBruce Richardson 		/* Extract DIP of mbuf0 */
19999a2dd95SBruce Richardson 		ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *,
20099a2dd95SBruce Richardson 						sizeof(struct rte_ether_hdr));
20199a2dd95SBruce Richardson 		/* Extract cksum, ttl as ipv4 hdr is in cache */
20299a2dd95SBruce Richardson 		node_mbuf_priv1(mbuf0, dyn)->cksum = ipv4_hdr->hdr_checksum;
20399a2dd95SBruce Richardson 		node_mbuf_priv1(mbuf0, dyn)->ttl = ipv4_hdr->time_to_live;
20499a2dd95SBruce Richardson 
20599a2dd95SBruce Richardson 		rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr),
20699a2dd95SBruce Richardson 				    &next_hop);
20799a2dd95SBruce Richardson 		next_hop = (rc == 0) ? next_hop : drop_nh;
20899a2dd95SBruce Richardson 
209*be4c0cb4SPavan Nikhilesh 		NODE_INCREMENT_XSTAT_ID(node, 0, rc != 0, 1);
21099a2dd95SBruce Richardson 		node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop;
21199a2dd95SBruce Richardson 		next_hop = next_hop >> 16;
21299a2dd95SBruce Richardson 		next0 = (uint16_t)next_hop;
21399a2dd95SBruce Richardson 
21499a2dd95SBruce Richardson 		if (unlikely(next_index ^ next0)) {
21599a2dd95SBruce Richardson 			/* Copy things successfully speculated till now */
21699a2dd95SBruce Richardson 			rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
21799a2dd95SBruce Richardson 			from += last_spec;
21899a2dd95SBruce Richardson 			to_next += last_spec;
21999a2dd95SBruce Richardson 			held += last_spec;
22099a2dd95SBruce Richardson 			last_spec = 0;
22199a2dd95SBruce Richardson 
22299a2dd95SBruce Richardson 			rte_node_enqueue_x1(graph, node, next0, from[0]);
22399a2dd95SBruce Richardson 			from += 1;
22499a2dd95SBruce Richardson 		} else {
22599a2dd95SBruce Richardson 			last_spec += 1;
22699a2dd95SBruce Richardson 		}
22799a2dd95SBruce Richardson 	}
22899a2dd95SBruce Richardson 
22999a2dd95SBruce Richardson 	/* !!! Home run !!! */
23099a2dd95SBruce Richardson 	if (likely(last_spec == nb_objs)) {
23199a2dd95SBruce Richardson 		rte_node_next_stream_move(graph, node, next_index);
23299a2dd95SBruce Richardson 		return nb_objs;
23399a2dd95SBruce Richardson 	}
23499a2dd95SBruce Richardson 	held += last_spec;
23599a2dd95SBruce Richardson 	rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
23699a2dd95SBruce Richardson 	rte_node_next_stream_put(graph, node, next_index, held);
23799a2dd95SBruce Richardson 
23899a2dd95SBruce Richardson 	return nb_objs;
23999a2dd95SBruce Richardson }
24099a2dd95SBruce Richardson 
24199a2dd95SBruce Richardson #endif /* __INCLUDE_IP4_LOOKUP_NEON_H__ */
242