10668f68bSYoan Picchi /* SPDX-License-Identifier: BSD-3-Clause
20668f68bSYoan Picchi * Copyright(c) 2010-2016 Intel Corporation
30668f68bSYoan Picchi * Copyright(c) 2018-2024 Arm Limited
40668f68bSYoan Picchi */
50668f68bSYoan Picchi
60668f68bSYoan Picchi #ifndef COMPARE_SIGNATURES_ARM_H
70668f68bSYoan Picchi #define COMPARE_SIGNATURES_ARM_H
80668f68bSYoan Picchi
90668f68bSYoan Picchi #include <inttypes.h>
100668f68bSYoan Picchi
110668f68bSYoan Picchi #include <rte_common.h>
120668f68bSYoan Picchi #include <rte_vect.h>
130668f68bSYoan Picchi
140668f68bSYoan Picchi #include "rte_cuckoo_hash.h"
150668f68bSYoan Picchi
16ef801b59SYoan Picchi /* Arm's version uses a densely packed hitmask buffer: every bit is in use. */
17ef801b59SYoan Picchi #define DENSE_HASH_BULK_LOOKUP 1
18ef801b59SYoan Picchi
190668f68bSYoan Picchi static inline void
compare_signatures_dense(uint16_t * hitmask_buffer,const uint16_t * prim_bucket_sigs,const uint16_t * sec_bucket_sigs,uint16_t sig,enum rte_hash_sig_compare_function sig_cmp_fn)20ef801b59SYoan Picchi compare_signatures_dense(uint16_t *hitmask_buffer,
21ef801b59SYoan Picchi const uint16_t *prim_bucket_sigs,
22ef801b59SYoan Picchi const uint16_t *sec_bucket_sigs,
230668f68bSYoan Picchi uint16_t sig,
240668f68bSYoan Picchi enum rte_hash_sig_compare_function sig_cmp_fn)
250668f68bSYoan Picchi {
26ef801b59SYoan Picchi static_assert(sizeof(*hitmask_buffer) >= 2 * (RTE_HASH_BUCKET_ENTRIES / 8),
27ef801b59SYoan Picchi "hitmask_buffer must be wide enough to fit a dense hitmask");
280668f68bSYoan Picchi
29ef801b59SYoan Picchi /* For match mask every bits indicates the match */
300668f68bSYoan Picchi switch (sig_cmp_fn) {
310668f68bSYoan Picchi #if defined(__ARM_NEON) && RTE_HASH_BUCKET_ENTRIES <= 8
320668f68bSYoan Picchi case RTE_HASH_COMPARE_NEON: {
33cf566e26SYoan Picchi uint16x8_t vmat, hit1, hit2;
34cf566e26SYoan Picchi const uint16x8_t mask = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
35cf566e26SYoan Picchi const uint16x8_t vsig = vld1q_dup_u16((uint16_t const *)&sig);
360668f68bSYoan Picchi
370668f68bSYoan Picchi /* Compare all signatures in the primary bucket */
38cf566e26SYoan Picchi vmat = vceqq_u16(vsig, vld1q_u16(prim_bucket_sigs));
39cf566e26SYoan Picchi hit1 = vandq_u16(vmat, mask);
40cf566e26SYoan Picchi
410668f68bSYoan Picchi /* Compare all signatures in the secondary bucket */
42cf566e26SYoan Picchi vmat = vceqq_u16(vsig, vld1q_u16(sec_bucket_sigs));
43cf566e26SYoan Picchi hit2 = vandq_u16(vmat, mask);
44cf566e26SYoan Picchi
45cf566e26SYoan Picchi hit2 = vshlq_n_u16(hit2, RTE_HASH_BUCKET_ENTRIES);
46cf566e26SYoan Picchi hit2 = vorrq_u16(hit1, hit2);
47cf566e26SYoan Picchi *hitmask_buffer = vaddvq_u16(hit2);
480668f68bSYoan Picchi break;
490668f68bSYoan Picchi }
500668f68bSYoan Picchi #endif
51*a40ac9bcSYoan Picchi #if defined(RTE_HAS_SVE_ACLE)
52*a40ac9bcSYoan Picchi case RTE_HASH_COMPARE_SVE: {
53*a40ac9bcSYoan Picchi svuint16_t vsign, shift, sv_matches;
54*a40ac9bcSYoan Picchi svbool_t pred, match, bucket_wide_pred;
55*a40ac9bcSYoan Picchi int i = 0;
56*a40ac9bcSYoan Picchi uint64_t vl = svcnth();
57*a40ac9bcSYoan Picchi
58*a40ac9bcSYoan Picchi vsign = svdup_u16(sig);
59*a40ac9bcSYoan Picchi shift = svindex_u16(0, 1);
60*a40ac9bcSYoan Picchi
61*a40ac9bcSYoan Picchi if (vl >= 2 * RTE_HASH_BUCKET_ENTRIES && RTE_HASH_BUCKET_ENTRIES <= 8) {
62*a40ac9bcSYoan Picchi svuint16_t primary_array_vect, secondary_array_vect;
63*a40ac9bcSYoan Picchi bucket_wide_pred = svwhilelt_b16(0, RTE_HASH_BUCKET_ENTRIES);
64*a40ac9bcSYoan Picchi primary_array_vect = svld1_u16(bucket_wide_pred, prim_bucket_sigs);
65*a40ac9bcSYoan Picchi secondary_array_vect = svld1_u16(bucket_wide_pred, sec_bucket_sigs);
66*a40ac9bcSYoan Picchi
67*a40ac9bcSYoan Picchi /* We merged the two vectors so we can do both comparisons at once */
68*a40ac9bcSYoan Picchi primary_array_vect = svsplice_u16(bucket_wide_pred, primary_array_vect,
69*a40ac9bcSYoan Picchi secondary_array_vect);
70*a40ac9bcSYoan Picchi pred = svwhilelt_b16(0, 2*RTE_HASH_BUCKET_ENTRIES);
71*a40ac9bcSYoan Picchi
72*a40ac9bcSYoan Picchi /* Compare all signatures in the buckets */
73*a40ac9bcSYoan Picchi match = svcmpeq_u16(pred, vsign, primary_array_vect);
74*a40ac9bcSYoan Picchi if (svptest_any(svptrue_b16(), match)) {
75*a40ac9bcSYoan Picchi sv_matches = svdup_u16(1);
76*a40ac9bcSYoan Picchi sv_matches = svlsl_u16_z(match, sv_matches, shift);
77*a40ac9bcSYoan Picchi *hitmask_buffer = svorv_u16(svptrue_b16(), sv_matches);
78*a40ac9bcSYoan Picchi }
79*a40ac9bcSYoan Picchi } else {
80*a40ac9bcSYoan Picchi do {
81*a40ac9bcSYoan Picchi pred = svwhilelt_b16(i, RTE_HASH_BUCKET_ENTRIES);
82*a40ac9bcSYoan Picchi uint16_t lower_half = 0;
83*a40ac9bcSYoan Picchi uint16_t upper_half = 0;
84*a40ac9bcSYoan Picchi /* Compare all signatures in the primary bucket */
85*a40ac9bcSYoan Picchi match = svcmpeq_u16(pred, vsign, svld1_u16(pred,
86*a40ac9bcSYoan Picchi &prim_bucket_sigs[i]));
87*a40ac9bcSYoan Picchi if (svptest_any(svptrue_b16(), match)) {
88*a40ac9bcSYoan Picchi sv_matches = svdup_u16(1);
89*a40ac9bcSYoan Picchi sv_matches = svlsl_u16_z(match, sv_matches, shift);
90*a40ac9bcSYoan Picchi lower_half = svorv_u16(svptrue_b16(), sv_matches);
91*a40ac9bcSYoan Picchi }
92*a40ac9bcSYoan Picchi /* Compare all signatures in the secondary bucket */
93*a40ac9bcSYoan Picchi match = svcmpeq_u16(pred, vsign, svld1_u16(pred,
94*a40ac9bcSYoan Picchi &sec_bucket_sigs[i]));
95*a40ac9bcSYoan Picchi if (svptest_any(svptrue_b16(), match)) {
96*a40ac9bcSYoan Picchi sv_matches = svdup_u16(1);
97*a40ac9bcSYoan Picchi sv_matches = svlsl_u16_z(match, sv_matches, shift);
98*a40ac9bcSYoan Picchi upper_half = svorv_u16(svptrue_b16(), sv_matches)
99*a40ac9bcSYoan Picchi << RTE_HASH_BUCKET_ENTRIES;
100*a40ac9bcSYoan Picchi }
101*a40ac9bcSYoan Picchi hitmask_buffer[i / 8] = upper_half | lower_half;
102*a40ac9bcSYoan Picchi i += vl;
103*a40ac9bcSYoan Picchi } while (i < RTE_HASH_BUCKET_ENTRIES);
104*a40ac9bcSYoan Picchi }
105*a40ac9bcSYoan Picchi break;
106*a40ac9bcSYoan Picchi }
107*a40ac9bcSYoan Picchi #endif
1080668f68bSYoan Picchi default:
109ef801b59SYoan Picchi for (unsigned int i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
110ef801b59SYoan Picchi *hitmask_buffer |= (sig == prim_bucket_sigs[i]) << i;
111ef801b59SYoan Picchi *hitmask_buffer |=
112ef801b59SYoan Picchi ((sig == sec_bucket_sigs[i]) << i) << RTE_HASH_BUCKET_ENTRIES;
1130668f68bSYoan Picchi }
1140668f68bSYoan Picchi }
1150668f68bSYoan Picchi }
1160668f68bSYoan Picchi #endif /* COMPARE_SIGNATURES_ARM_H */
117