xref: /dpdk/lib/lpm/rte_lpm_sve.h (revision b29ccbea24ae3a6da4ff3769c4fe702b0702a4b5)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2020 Arm Limited
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #ifndef _RTE_LPM_SVE_H_
699a2dd95SBruce Richardson #define _RTE_LPM_SVE_H_
799a2dd95SBruce Richardson 
81094dd94SDavid Marchand #include <rte_compat.h>
999a2dd95SBruce Richardson #include <rte_vect.h>
1099a2dd95SBruce Richardson 
1199a2dd95SBruce Richardson #ifdef __cplusplus
1299a2dd95SBruce Richardson extern "C" {
1399a2dd95SBruce Richardson #endif
1499a2dd95SBruce Richardson 
15*b29ccbeaSRuifeng Wang static inline int
__rte_lpm_lookup_vec(const struct rte_lpm * lpm,const uint32_t * ips,uint32_t * __rte_restrict next_hops,const uint32_t n)1699a2dd95SBruce Richardson __rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
1799a2dd95SBruce Richardson 		uint32_t *__rte_restrict next_hops, const uint32_t n)
1899a2dd95SBruce Richardson {
19*b29ccbeaSRuifeng Wang 	uint32_t i;
20*b29ccbeaSRuifeng Wang 	uint64_t vl = svcntw();
21*b29ccbeaSRuifeng Wang 	svuint32_t v_ip, v_idx, v_tbl24, v_tbl8;
22*b29ccbeaSRuifeng Wang 	svuint32_t v_mask_xv, v_mask_v;
23*b29ccbeaSRuifeng Wang 	svbool_t pg = svptrue_b32();
2499a2dd95SBruce Richardson 	svbool_t pv;
2599a2dd95SBruce Richardson 
26*b29ccbeaSRuifeng Wang 	for (i = 0; i < n; i++)
27*b29ccbeaSRuifeng Wang 		next_hops[i] = 0;
28*b29ccbeaSRuifeng Wang 
29*b29ccbeaSRuifeng Wang 	for (i = 0; i < n - vl; i += vl) {
3099a2dd95SBruce Richardson 		v_ip = svld1(pg, &ips[i]);
3199a2dd95SBruce Richardson 		/* Get indices for tbl24[] */
3299a2dd95SBruce Richardson 		v_idx = svlsr_x(pg, v_ip, 8);
3399a2dd95SBruce Richardson 		/* Extract values from tbl24[] */
3499a2dd95SBruce Richardson 		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24,
3599a2dd95SBruce Richardson 						v_idx);
3699a2dd95SBruce Richardson 
3799a2dd95SBruce Richardson 		/* Create mask with valid set */
3899a2dd95SBruce Richardson 		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
3999a2dd95SBruce Richardson 		/* Create mask with valid and valid_group set */
4099a2dd95SBruce Richardson 		v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK);
4199a2dd95SBruce Richardson 		/* Create predicate for tbl24 entries: (valid && !valid_group) */
4299a2dd95SBruce Richardson 		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v);
43*b29ccbeaSRuifeng Wang 		svst1(pv, &next_hops[i], v_tbl24);
4499a2dd95SBruce Richardson 
4599a2dd95SBruce Richardson 		/* Update predicate for tbl24 entries: (valid && valid_group) */
4699a2dd95SBruce Richardson 		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv);
47*b29ccbeaSRuifeng Wang 		if (svptest_any(pg, pv)) {
4899a2dd95SBruce Richardson 			/* Compute tbl8 index */
4999a2dd95SBruce Richardson 			v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff));
5099a2dd95SBruce Richardson 			v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
5199a2dd95SBruce Richardson 			v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)),
5299a2dd95SBruce Richardson 					v_idx);
5399a2dd95SBruce Richardson 			/* Extract values from tbl8[] */
5499a2dd95SBruce Richardson 			v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
5599a2dd95SBruce Richardson 							v_idx);
5699a2dd95SBruce Richardson 			/* Update predicate for tbl8 entries: (valid) */
5799a2dd95SBruce Richardson 			pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
58*b29ccbeaSRuifeng Wang 			svst1(pv, &next_hops[i], v_tbl8);
59*b29ccbeaSRuifeng Wang 		}
60*b29ccbeaSRuifeng Wang 	}
6199a2dd95SBruce Richardson 
6299a2dd95SBruce Richardson 	pg = svwhilelt_b32(i, n);
63*b29ccbeaSRuifeng Wang 	if (svptest_any(svptrue_b32(), pg)) {
64*b29ccbeaSRuifeng Wang 		v_ip = svld1(pg, &ips[i]);
65*b29ccbeaSRuifeng Wang 		/* Get indices for tbl24[] */
66*b29ccbeaSRuifeng Wang 		v_idx = svlsr_x(pg, v_ip, 8);
67*b29ccbeaSRuifeng Wang 		/* Extract values from tbl24[] */
68*b29ccbeaSRuifeng Wang 		v_tbl24 = svld1_gather_index(pg, (const uint32_t *)lpm->tbl24,
69*b29ccbeaSRuifeng Wang 						v_idx);
70*b29ccbeaSRuifeng Wang 
71*b29ccbeaSRuifeng Wang 		/* Create mask with valid set */
72*b29ccbeaSRuifeng Wang 		v_mask_v = svdup_u32_z(pg, RTE_LPM_LOOKUP_SUCCESS);
73*b29ccbeaSRuifeng Wang 		/* Create mask with valid and valid_group set */
74*b29ccbeaSRuifeng Wang 		v_mask_xv = svdup_u32_z(pg, RTE_LPM_VALID_EXT_ENTRY_BITMASK);
75*b29ccbeaSRuifeng Wang 		/* Create predicate for tbl24 entries: (valid && !valid_group) */
76*b29ccbeaSRuifeng Wang 		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_v);
77*b29ccbeaSRuifeng Wang 		svst1(pv, &next_hops[i], v_tbl24);
78*b29ccbeaSRuifeng Wang 
79*b29ccbeaSRuifeng Wang 		/* Update predicate for tbl24 entries: (valid && valid_group) */
80*b29ccbeaSRuifeng Wang 		pv = svcmpeq(pg, svand_z(pg, v_tbl24, v_mask_xv), v_mask_xv);
81*b29ccbeaSRuifeng Wang 		if (svptest_any(pg, pv)) {
82*b29ccbeaSRuifeng Wang 			/* Compute tbl8 index */
83*b29ccbeaSRuifeng Wang 			v_idx = svand_x(pv, v_tbl24, svdup_u32_z(pv, 0xffffff));
84*b29ccbeaSRuifeng Wang 			v_idx = svmul_x(pv, v_idx, RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
85*b29ccbeaSRuifeng Wang 			v_idx = svadd_x(pv, svand_x(pv, v_ip, svdup_u32_z(pv, 0xff)),
86*b29ccbeaSRuifeng Wang 					v_idx);
87*b29ccbeaSRuifeng Wang 			/* Extract values from tbl8[] */
88*b29ccbeaSRuifeng Wang 			v_tbl8 = svld1_gather_index(pv, (const uint32_t *)lpm->tbl8,
89*b29ccbeaSRuifeng Wang 							v_idx);
90*b29ccbeaSRuifeng Wang 			/* Update predicate for tbl8 entries: (valid) */
91*b29ccbeaSRuifeng Wang 			pv = svcmpeq(pv, svand_z(pv, v_tbl8, v_mask_v), v_mask_v);
92*b29ccbeaSRuifeng Wang 			svst1(pv, &next_hops[i], v_tbl8);
93*b29ccbeaSRuifeng Wang 		}
9499a2dd95SBruce Richardson 	}
9599a2dd95SBruce Richardson 
96*b29ccbeaSRuifeng Wang 	return 0;
9799a2dd95SBruce Richardson }
9899a2dd95SBruce Richardson #ifdef __cplusplus
9999a2dd95SBruce Richardson }
10099a2dd95SBruce Richardson #endif
10199a2dd95SBruce Richardson 
10299a2dd95SBruce Richardson #endif /* _RTE_LPM_SVE_H_ */
103