xref: /dpdk/lib/hash/compare_signatures_arm.h (revision a40ac9bcd85c840aec776729f950663d3a61eaf5)
10668f68bSYoan Picchi /* SPDX-License-Identifier: BSD-3-Clause
20668f68bSYoan Picchi  * Copyright(c) 2010-2016 Intel Corporation
30668f68bSYoan Picchi  * Copyright(c) 2018-2024 Arm Limited
40668f68bSYoan Picchi  */
50668f68bSYoan Picchi 
60668f68bSYoan Picchi #ifndef COMPARE_SIGNATURES_ARM_H
70668f68bSYoan Picchi #define COMPARE_SIGNATURES_ARM_H
80668f68bSYoan Picchi 
90668f68bSYoan Picchi #include <inttypes.h>
100668f68bSYoan Picchi 
110668f68bSYoan Picchi #include <rte_common.h>
120668f68bSYoan Picchi #include <rte_vect.h>
130668f68bSYoan Picchi 
140668f68bSYoan Picchi #include "rte_cuckoo_hash.h"
150668f68bSYoan Picchi 
16ef801b59SYoan Picchi /* Arm's version uses a densely packed hitmask buffer: every bit is in use. */
17ef801b59SYoan Picchi #define DENSE_HASH_BULK_LOOKUP 1
18ef801b59SYoan Picchi 
190668f68bSYoan Picchi static inline void
compare_signatures_dense(uint16_t * hitmask_buffer,const uint16_t * prim_bucket_sigs,const uint16_t * sec_bucket_sigs,uint16_t sig,enum rte_hash_sig_compare_function sig_cmp_fn)20ef801b59SYoan Picchi compare_signatures_dense(uint16_t *hitmask_buffer,
21ef801b59SYoan Picchi 			const uint16_t *prim_bucket_sigs,
22ef801b59SYoan Picchi 			const uint16_t *sec_bucket_sigs,
230668f68bSYoan Picchi 			uint16_t sig,
240668f68bSYoan Picchi 			enum rte_hash_sig_compare_function sig_cmp_fn)
250668f68bSYoan Picchi {
26ef801b59SYoan Picchi 	static_assert(sizeof(*hitmask_buffer) >= 2 * (RTE_HASH_BUCKET_ENTRIES / 8),
27ef801b59SYoan Picchi 		"hitmask_buffer must be wide enough to fit a dense hitmask");
280668f68bSYoan Picchi 
29ef801b59SYoan Picchi 	/* For match mask every bits indicates the match */
300668f68bSYoan Picchi 	switch (sig_cmp_fn) {
310668f68bSYoan Picchi #if defined(__ARM_NEON) && RTE_HASH_BUCKET_ENTRIES <= 8
320668f68bSYoan Picchi 	case RTE_HASH_COMPARE_NEON: {
33cf566e26SYoan Picchi 		uint16x8_t vmat, hit1, hit2;
34cf566e26SYoan Picchi 		const uint16x8_t mask = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
35cf566e26SYoan Picchi 		const uint16x8_t vsig = vld1q_dup_u16((uint16_t const *)&sig);
360668f68bSYoan Picchi 
370668f68bSYoan Picchi 		/* Compare all signatures in the primary bucket */
38cf566e26SYoan Picchi 		vmat = vceqq_u16(vsig, vld1q_u16(prim_bucket_sigs));
39cf566e26SYoan Picchi 		hit1 = vandq_u16(vmat, mask);
40cf566e26SYoan Picchi 
410668f68bSYoan Picchi 		/* Compare all signatures in the secondary bucket */
42cf566e26SYoan Picchi 		vmat = vceqq_u16(vsig, vld1q_u16(sec_bucket_sigs));
43cf566e26SYoan Picchi 		hit2 = vandq_u16(vmat, mask);
44cf566e26SYoan Picchi 
45cf566e26SYoan Picchi 		hit2 = vshlq_n_u16(hit2, RTE_HASH_BUCKET_ENTRIES);
46cf566e26SYoan Picchi 		hit2 = vorrq_u16(hit1, hit2);
47cf566e26SYoan Picchi 		*hitmask_buffer = vaddvq_u16(hit2);
480668f68bSYoan Picchi 		break;
490668f68bSYoan Picchi 	}
500668f68bSYoan Picchi #endif
51*a40ac9bcSYoan Picchi #if defined(RTE_HAS_SVE_ACLE)
52*a40ac9bcSYoan Picchi 	case RTE_HASH_COMPARE_SVE: {
53*a40ac9bcSYoan Picchi 		svuint16_t vsign, shift, sv_matches;
54*a40ac9bcSYoan Picchi 		svbool_t pred, match, bucket_wide_pred;
55*a40ac9bcSYoan Picchi 		int i = 0;
56*a40ac9bcSYoan Picchi 		uint64_t vl = svcnth();
57*a40ac9bcSYoan Picchi 
58*a40ac9bcSYoan Picchi 		vsign = svdup_u16(sig);
59*a40ac9bcSYoan Picchi 		shift = svindex_u16(0, 1);
60*a40ac9bcSYoan Picchi 
61*a40ac9bcSYoan Picchi 		if (vl >= 2 * RTE_HASH_BUCKET_ENTRIES && RTE_HASH_BUCKET_ENTRIES <= 8) {
62*a40ac9bcSYoan Picchi 			svuint16_t primary_array_vect, secondary_array_vect;
63*a40ac9bcSYoan Picchi 			bucket_wide_pred = svwhilelt_b16(0, RTE_HASH_BUCKET_ENTRIES);
64*a40ac9bcSYoan Picchi 			primary_array_vect = svld1_u16(bucket_wide_pred, prim_bucket_sigs);
65*a40ac9bcSYoan Picchi 			secondary_array_vect = svld1_u16(bucket_wide_pred, sec_bucket_sigs);
66*a40ac9bcSYoan Picchi 
67*a40ac9bcSYoan Picchi 			/* We merged the two vectors so we can do both comparisons at once */
68*a40ac9bcSYoan Picchi 			primary_array_vect = svsplice_u16(bucket_wide_pred, primary_array_vect,
69*a40ac9bcSYoan Picchi 				secondary_array_vect);
70*a40ac9bcSYoan Picchi 			pred = svwhilelt_b16(0, 2*RTE_HASH_BUCKET_ENTRIES);
71*a40ac9bcSYoan Picchi 
72*a40ac9bcSYoan Picchi 			/* Compare all signatures in the buckets */
73*a40ac9bcSYoan Picchi 			match = svcmpeq_u16(pred, vsign, primary_array_vect);
74*a40ac9bcSYoan Picchi 			if (svptest_any(svptrue_b16(), match)) {
75*a40ac9bcSYoan Picchi 				sv_matches = svdup_u16(1);
76*a40ac9bcSYoan Picchi 				sv_matches = svlsl_u16_z(match, sv_matches, shift);
77*a40ac9bcSYoan Picchi 				*hitmask_buffer = svorv_u16(svptrue_b16(), sv_matches);
78*a40ac9bcSYoan Picchi 			}
79*a40ac9bcSYoan Picchi 		} else {
80*a40ac9bcSYoan Picchi 			do {
81*a40ac9bcSYoan Picchi 				pred = svwhilelt_b16(i, RTE_HASH_BUCKET_ENTRIES);
82*a40ac9bcSYoan Picchi 				uint16_t lower_half = 0;
83*a40ac9bcSYoan Picchi 				uint16_t upper_half = 0;
84*a40ac9bcSYoan Picchi 				/* Compare all signatures in the primary bucket */
85*a40ac9bcSYoan Picchi 				match = svcmpeq_u16(pred, vsign, svld1_u16(pred,
86*a40ac9bcSYoan Picchi 					&prim_bucket_sigs[i]));
87*a40ac9bcSYoan Picchi 				if (svptest_any(svptrue_b16(), match)) {
88*a40ac9bcSYoan Picchi 					sv_matches = svdup_u16(1);
89*a40ac9bcSYoan Picchi 					sv_matches = svlsl_u16_z(match, sv_matches, shift);
90*a40ac9bcSYoan Picchi 					lower_half = svorv_u16(svptrue_b16(), sv_matches);
91*a40ac9bcSYoan Picchi 				}
92*a40ac9bcSYoan Picchi 				/* Compare all signatures in the secondary bucket */
93*a40ac9bcSYoan Picchi 				match = svcmpeq_u16(pred, vsign, svld1_u16(pred,
94*a40ac9bcSYoan Picchi 					&sec_bucket_sigs[i]));
95*a40ac9bcSYoan Picchi 				if (svptest_any(svptrue_b16(), match)) {
96*a40ac9bcSYoan Picchi 					sv_matches = svdup_u16(1);
97*a40ac9bcSYoan Picchi 					sv_matches = svlsl_u16_z(match, sv_matches, shift);
98*a40ac9bcSYoan Picchi 					upper_half = svorv_u16(svptrue_b16(), sv_matches)
99*a40ac9bcSYoan Picchi 						<< RTE_HASH_BUCKET_ENTRIES;
100*a40ac9bcSYoan Picchi 				}
101*a40ac9bcSYoan Picchi 				hitmask_buffer[i / 8] = upper_half | lower_half;
102*a40ac9bcSYoan Picchi 				i += vl;
103*a40ac9bcSYoan Picchi 			} while (i < RTE_HASH_BUCKET_ENTRIES);
104*a40ac9bcSYoan Picchi 		}
105*a40ac9bcSYoan Picchi 		break;
106*a40ac9bcSYoan Picchi 	}
107*a40ac9bcSYoan Picchi #endif
1080668f68bSYoan Picchi 	default:
109ef801b59SYoan Picchi 		for (unsigned int i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
110ef801b59SYoan Picchi 			*hitmask_buffer |= (sig == prim_bucket_sigs[i]) << i;
111ef801b59SYoan Picchi 			*hitmask_buffer |=
112ef801b59SYoan Picchi 				((sig == sec_bucket_sigs[i]) << i) << RTE_HASH_BUCKET_ENTRIES;
1130668f68bSYoan Picchi 		}
1140668f68bSYoan Picchi 	}
1150668f68bSYoan Picchi }
1160668f68bSYoan Picchi #endif /* COMPARE_SIGNATURES_ARM_H */
117