199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson * Copyright(c) 2017 Intel Corporation
399a2dd95SBruce Richardson */
499a2dd95SBruce Richardson
599a2dd95SBruce Richardson #include <math.h>
699a2dd95SBruce Richardson #include <string.h>
799a2dd95SBruce Richardson
899a2dd95SBruce Richardson #include <rte_malloc.h>
999a2dd95SBruce Richardson #include <rte_errno.h>
1099a2dd95SBruce Richardson #include <rte_log.h>
1199a2dd95SBruce Richardson
12*0e21c7c0SDavid Marchand #include "member.h"
1399a2dd95SBruce Richardson #include "rte_member.h"
1499a2dd95SBruce Richardson #include "rte_member_vbf.h"
1599a2dd95SBruce Richardson
1699a2dd95SBruce Richardson /*
1799a2dd95SBruce Richardson * vBF currently implemented as a big array.
1899a2dd95SBruce Richardson * The BFs have a vertical layout. Bits in same location of all bfs will stay
1999a2dd95SBruce Richardson * in the same cache line.
2099a2dd95SBruce Richardson * For example, if we have 32 bloom filters, we use a uint32_t array to
2199a2dd95SBruce Richardson * represent all of them. array[0] represent the first location of all the
2299a2dd95SBruce Richardson * bloom filters, array[1] represents the second location of all the
2399a2dd95SBruce Richardson * bloom filters, etc. The advantage of this layout is to minimize the average
2499a2dd95SBruce Richardson * number of memory accesses to test all bloom filters.
2599a2dd95SBruce Richardson *
2699a2dd95SBruce Richardson * Currently the implementation supports vBF containing 1,2,4,8,16,32 BFs.
2799a2dd95SBruce Richardson */
2899a2dd95SBruce Richardson int
rte_member_create_vbf(struct rte_member_setsum * ss,const struct rte_member_parameters * params)2999a2dd95SBruce Richardson rte_member_create_vbf(struct rte_member_setsum *ss,
3099a2dd95SBruce Richardson const struct rte_member_parameters *params)
3199a2dd95SBruce Richardson {
3299a2dd95SBruce Richardson
3399a2dd95SBruce Richardson if (params->num_set > RTE_MEMBER_MAX_BF ||
3499a2dd95SBruce Richardson !rte_is_power_of_2(params->num_set) ||
3599a2dd95SBruce Richardson params->num_keys == 0 ||
3699a2dd95SBruce Richardson params->false_positive_rate == 0 ||
3799a2dd95SBruce Richardson params->false_positive_rate > 1) {
3899a2dd95SBruce Richardson rte_errno = EINVAL;
39*0e21c7c0SDavid Marchand MEMBER_LOG(ERR, "Membership vBF create with invalid parameters");
4099a2dd95SBruce Richardson return -EINVAL;
4199a2dd95SBruce Richardson }
4299a2dd95SBruce Richardson
4399a2dd95SBruce Richardson /* We assume expected keys evenly distribute to all BFs */
4499a2dd95SBruce Richardson uint32_t num_keys_per_bf = 1 + (params->num_keys - 1) / ss->num_set;
4599a2dd95SBruce Richardson
4699a2dd95SBruce Richardson /*
4799a2dd95SBruce Richardson * Note that the false positive rate is for all BFs in the vBF
4899a2dd95SBruce Richardson * such that the single BF's false positive rate needs to be
4999a2dd95SBruce Richardson * calculated.
5099a2dd95SBruce Richardson * Assume each BF's False positive rate is fp_one_bf. The total false
5199a2dd95SBruce Richardson * positive rate is fp = 1-(1-fp_one_bf)^n.
5299a2dd95SBruce Richardson * => fp_one_bf = 1 - (1-fp)^(1/n)
5399a2dd95SBruce Richardson */
5499a2dd95SBruce Richardson
5599a2dd95SBruce Richardson float fp_one_bf = 1 - pow((1 - params->false_positive_rate),
5699a2dd95SBruce Richardson 1.0 / ss->num_set);
5799a2dd95SBruce Richardson
5899a2dd95SBruce Richardson if (fp_one_bf == 0) {
5999a2dd95SBruce Richardson rte_errno = EINVAL;
60*0e21c7c0SDavid Marchand MEMBER_LOG(ERR, "Membership BF false positive rate is too small");
6199a2dd95SBruce Richardson return -EINVAL;
6299a2dd95SBruce Richardson }
6399a2dd95SBruce Richardson
6499a2dd95SBruce Richardson uint32_t bits = ceil((num_keys_per_bf *
6599a2dd95SBruce Richardson log(fp_one_bf)) /
6699a2dd95SBruce Richardson log(1.0 / (pow(2.0, log(2.0)))));
6799a2dd95SBruce Richardson
6899a2dd95SBruce Richardson /* We round to power of 2 for performance during lookup */
6999a2dd95SBruce Richardson ss->bits = rte_align32pow2(bits);
7099a2dd95SBruce Richardson
7199a2dd95SBruce Richardson ss->num_hashes = (uint32_t)(log(2.0) * bits / num_keys_per_bf);
7299a2dd95SBruce Richardson ss->bit_mask = ss->bits - 1;
7399a2dd95SBruce Richardson
7499a2dd95SBruce Richardson /*
7599a2dd95SBruce Richardson * Since we round the bits to power of 2, the final false positive
7699a2dd95SBruce Richardson * rate will probably not be same as the user specified. We log the
7799a2dd95SBruce Richardson * new value as debug message.
7899a2dd95SBruce Richardson */
7999a2dd95SBruce Richardson float new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf *
8099a2dd95SBruce Richardson ss->num_hashes)), ss->num_hashes);
8199a2dd95SBruce Richardson new_fp = 1 - pow((1 - new_fp), ss->num_set);
8299a2dd95SBruce Richardson
8399a2dd95SBruce Richardson /*
8499a2dd95SBruce Richardson * Reduce hash function count, until we approach the user specified
8599a2dd95SBruce Richardson * false-positive rate. Otherwise it is too conservative
8699a2dd95SBruce Richardson */
8799a2dd95SBruce Richardson int tmp_num_hash = ss->num_hashes;
8899a2dd95SBruce Richardson
8999a2dd95SBruce Richardson while (tmp_num_hash > 1) {
9099a2dd95SBruce Richardson float tmp_fp = new_fp;
9199a2dd95SBruce Richardson
9299a2dd95SBruce Richardson tmp_num_hash--;
9399a2dd95SBruce Richardson new_fp = pow((1 - pow((1 - 1.0 / ss->bits), num_keys_per_bf *
9499a2dd95SBruce Richardson tmp_num_hash)), tmp_num_hash);
9599a2dd95SBruce Richardson new_fp = 1 - pow((1 - new_fp), ss->num_set);
9699a2dd95SBruce Richardson
9799a2dd95SBruce Richardson if (new_fp > params->false_positive_rate) {
9899a2dd95SBruce Richardson new_fp = tmp_fp;
9999a2dd95SBruce Richardson tmp_num_hash++;
10099a2dd95SBruce Richardson break;
10199a2dd95SBruce Richardson }
10299a2dd95SBruce Richardson }
10399a2dd95SBruce Richardson
10499a2dd95SBruce Richardson ss->num_hashes = tmp_num_hash;
10599a2dd95SBruce Richardson
10699a2dd95SBruce Richardson /*
10799a2dd95SBruce Richardson * To avoid multiplication and division:
10899a2dd95SBruce Richardson * mul_shift is used for multiplication shift during bit test
10999a2dd95SBruce Richardson * div_shift is used for division shift, to be divided by number of bits
11099a2dd95SBruce Richardson * represented by a uint32_t variable
11199a2dd95SBruce Richardson */
112de0ec3c2STyler Retzlaff ss->mul_shift = rte_ctz32(ss->num_set);
113de0ec3c2STyler Retzlaff ss->div_shift = rte_ctz32(32 >> ss->mul_shift);
11499a2dd95SBruce Richardson
115*0e21c7c0SDavid Marchand MEMBER_LOG(DEBUG, "vector bloom filter created, "
11699a2dd95SBruce Richardson "each bloom filter expects %u keys, needs %u bits, %u hashes, "
11799a2dd95SBruce Richardson "with false positive rate set as %.5f, "
118*0e21c7c0SDavid Marchand "The new calculated vBF false positive rate is %.5f",
11999a2dd95SBruce Richardson num_keys_per_bf, ss->bits, ss->num_hashes, fp_one_bf, new_fp);
12099a2dd95SBruce Richardson
12199a2dd95SBruce Richardson ss->table = rte_zmalloc_socket(NULL, ss->num_set * (ss->bits >> 3),
12299a2dd95SBruce Richardson RTE_CACHE_LINE_SIZE, ss->socket_id);
12399a2dd95SBruce Richardson if (ss->table == NULL)
12499a2dd95SBruce Richardson return -ENOMEM;
12599a2dd95SBruce Richardson
12699a2dd95SBruce Richardson return 0;
12799a2dd95SBruce Richardson }
12899a2dd95SBruce Richardson
12999a2dd95SBruce Richardson static inline uint32_t
test_bit(uint32_t bit_loc,const struct rte_member_setsum * ss)13099a2dd95SBruce Richardson test_bit(uint32_t bit_loc, const struct rte_member_setsum *ss)
13199a2dd95SBruce Richardson {
13299a2dd95SBruce Richardson uint32_t *vbf = ss->table;
13399a2dd95SBruce Richardson uint32_t n = ss->num_set;
13499a2dd95SBruce Richardson uint32_t div_shift = ss->div_shift;
13599a2dd95SBruce Richardson uint32_t mul_shift = ss->mul_shift;
13699a2dd95SBruce Richardson /*
13799a2dd95SBruce Richardson * a is how many bits in one BF are represented by one 32bit
13899a2dd95SBruce Richardson * variable.
13999a2dd95SBruce Richardson */
14099a2dd95SBruce Richardson uint32_t a = 32 >> mul_shift;
14199a2dd95SBruce Richardson /*
14299a2dd95SBruce Richardson * x>>b is the divide, x & (a-1) is the mod, & (1<<n-1) to mask out bits
14399a2dd95SBruce Richardson * we do not need
14499a2dd95SBruce Richardson */
14599a2dd95SBruce Richardson return (vbf[bit_loc >> div_shift] >>
14699a2dd95SBruce Richardson ((bit_loc & (a - 1)) << mul_shift)) & ((1ULL << n) - 1);
14799a2dd95SBruce Richardson }
14899a2dd95SBruce Richardson
14999a2dd95SBruce Richardson static inline void
set_bit(uint32_t bit_loc,const struct rte_member_setsum * ss,int32_t set)15099a2dd95SBruce Richardson set_bit(uint32_t bit_loc, const struct rte_member_setsum *ss, int32_t set)
15199a2dd95SBruce Richardson {
15299a2dd95SBruce Richardson uint32_t *vbf = ss->table;
15399a2dd95SBruce Richardson uint32_t div_shift = ss->div_shift;
15499a2dd95SBruce Richardson uint32_t mul_shift = ss->mul_shift;
15599a2dd95SBruce Richardson uint32_t a = 32 >> mul_shift;
15699a2dd95SBruce Richardson
15799a2dd95SBruce Richardson vbf[bit_loc >> div_shift] |=
15899a2dd95SBruce Richardson 1UL << (((bit_loc & (a - 1)) << mul_shift) + set - 1);
15999a2dd95SBruce Richardson }
16099a2dd95SBruce Richardson
16199a2dd95SBruce Richardson int
rte_member_lookup_vbf(const struct rte_member_setsum * ss,const void * key,member_set_t * set_id)16299a2dd95SBruce Richardson rte_member_lookup_vbf(const struct rte_member_setsum *ss, const void *key,
16399a2dd95SBruce Richardson member_set_t *set_id)
16499a2dd95SBruce Richardson {
16599a2dd95SBruce Richardson uint32_t j;
16699a2dd95SBruce Richardson uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed);
16799a2dd95SBruce Richardson uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t),
16899a2dd95SBruce Richardson ss->sec_hash_seed);
16999a2dd95SBruce Richardson uint32_t mask = ~0;
17099a2dd95SBruce Richardson uint32_t bit_loc;
17199a2dd95SBruce Richardson
17299a2dd95SBruce Richardson for (j = 0; j < ss->num_hashes; j++) {
17399a2dd95SBruce Richardson bit_loc = (h1 + j * h2) & ss->bit_mask;
17499a2dd95SBruce Richardson mask &= test_bit(bit_loc, ss);
17599a2dd95SBruce Richardson }
17699a2dd95SBruce Richardson
17799a2dd95SBruce Richardson if (mask) {
178de0ec3c2STyler Retzlaff *set_id = rte_ctz32(mask) + 1;
17999a2dd95SBruce Richardson return 1;
18099a2dd95SBruce Richardson }
18199a2dd95SBruce Richardson
18299a2dd95SBruce Richardson *set_id = RTE_MEMBER_NO_MATCH;
18399a2dd95SBruce Richardson return 0;
18499a2dd95SBruce Richardson }
18599a2dd95SBruce Richardson
18699a2dd95SBruce Richardson uint32_t
rte_member_lookup_bulk_vbf(const struct rte_member_setsum * ss,const void ** keys,uint32_t num_keys,member_set_t * set_ids)18799a2dd95SBruce Richardson rte_member_lookup_bulk_vbf(const struct rte_member_setsum *ss,
18899a2dd95SBruce Richardson const void **keys, uint32_t num_keys, member_set_t *set_ids)
18999a2dd95SBruce Richardson {
19099a2dd95SBruce Richardson uint32_t i, k;
19199a2dd95SBruce Richardson uint32_t num_matches = 0;
19299a2dd95SBruce Richardson uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX];
19399a2dd95SBruce Richardson uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX];
19499a2dd95SBruce Richardson uint32_t bit_loc;
19599a2dd95SBruce Richardson
19699a2dd95SBruce Richardson for (i = 0; i < num_keys; i++)
19799a2dd95SBruce Richardson h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len,
19899a2dd95SBruce Richardson ss->prim_hash_seed);
19999a2dd95SBruce Richardson for (i = 0; i < num_keys; i++)
20099a2dd95SBruce Richardson h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t),
20199a2dd95SBruce Richardson ss->sec_hash_seed);
20299a2dd95SBruce Richardson for (i = 0; i < num_keys; i++) {
20399a2dd95SBruce Richardson mask[i] = ~0;
20499a2dd95SBruce Richardson for (k = 0; k < ss->num_hashes; k++) {
20599a2dd95SBruce Richardson bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask;
20699a2dd95SBruce Richardson mask[i] &= test_bit(bit_loc, ss);
20799a2dd95SBruce Richardson }
20899a2dd95SBruce Richardson }
20999a2dd95SBruce Richardson for (i = 0; i < num_keys; i++) {
21099a2dd95SBruce Richardson if (mask[i]) {
211de0ec3c2STyler Retzlaff set_ids[i] = rte_ctz32(mask[i]) + 1;
21299a2dd95SBruce Richardson num_matches++;
21399a2dd95SBruce Richardson } else
21499a2dd95SBruce Richardson set_ids[i] = RTE_MEMBER_NO_MATCH;
21599a2dd95SBruce Richardson }
21699a2dd95SBruce Richardson return num_matches;
21799a2dd95SBruce Richardson }
21899a2dd95SBruce Richardson
21999a2dd95SBruce Richardson uint32_t
rte_member_lookup_multi_vbf(const struct rte_member_setsum * ss,const void * key,uint32_t match_per_key,member_set_t * set_id)22099a2dd95SBruce Richardson rte_member_lookup_multi_vbf(const struct rte_member_setsum *ss,
22199a2dd95SBruce Richardson const void *key, uint32_t match_per_key,
22299a2dd95SBruce Richardson member_set_t *set_id)
22399a2dd95SBruce Richardson {
22499a2dd95SBruce Richardson uint32_t num_matches = 0;
22599a2dd95SBruce Richardson uint32_t j;
22699a2dd95SBruce Richardson uint32_t h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed);
22799a2dd95SBruce Richardson uint32_t h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t),
22899a2dd95SBruce Richardson ss->sec_hash_seed);
22999a2dd95SBruce Richardson uint32_t mask = ~0;
23099a2dd95SBruce Richardson uint32_t bit_loc;
23199a2dd95SBruce Richardson
23299a2dd95SBruce Richardson for (j = 0; j < ss->num_hashes; j++) {
23399a2dd95SBruce Richardson bit_loc = (h1 + j * h2) & ss->bit_mask;
23499a2dd95SBruce Richardson mask &= test_bit(bit_loc, ss);
23599a2dd95SBruce Richardson }
23699a2dd95SBruce Richardson while (mask) {
237de0ec3c2STyler Retzlaff uint32_t loc = rte_ctz32(mask);
23899a2dd95SBruce Richardson set_id[num_matches] = loc + 1;
23999a2dd95SBruce Richardson num_matches++;
24099a2dd95SBruce Richardson if (num_matches >= match_per_key)
24199a2dd95SBruce Richardson return num_matches;
24299a2dd95SBruce Richardson mask &= ~(1UL << loc);
24399a2dd95SBruce Richardson }
24499a2dd95SBruce Richardson return num_matches;
24599a2dd95SBruce Richardson }
24699a2dd95SBruce Richardson
24799a2dd95SBruce Richardson uint32_t
rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum * ss,const void ** keys,uint32_t num_keys,uint32_t match_per_key,uint32_t * match_count,member_set_t * set_ids)24899a2dd95SBruce Richardson rte_member_lookup_multi_bulk_vbf(const struct rte_member_setsum *ss,
24999a2dd95SBruce Richardson const void **keys, uint32_t num_keys, uint32_t match_per_key,
25099a2dd95SBruce Richardson uint32_t *match_count,
25199a2dd95SBruce Richardson member_set_t *set_ids)
25299a2dd95SBruce Richardson {
25399a2dd95SBruce Richardson uint32_t i, k;
25499a2dd95SBruce Richardson uint32_t num_matches = 0;
25599a2dd95SBruce Richardson uint32_t match_cnt_t;
25699a2dd95SBruce Richardson uint32_t mask[RTE_MEMBER_LOOKUP_BULK_MAX];
25799a2dd95SBruce Richardson uint32_t h1[RTE_MEMBER_LOOKUP_BULK_MAX], h2[RTE_MEMBER_LOOKUP_BULK_MAX];
25899a2dd95SBruce Richardson uint32_t bit_loc;
25999a2dd95SBruce Richardson
26099a2dd95SBruce Richardson for (i = 0; i < num_keys; i++)
26199a2dd95SBruce Richardson h1[i] = MEMBER_HASH_FUNC(keys[i], ss->key_len,
26299a2dd95SBruce Richardson ss->prim_hash_seed);
26399a2dd95SBruce Richardson for (i = 0; i < num_keys; i++)
26499a2dd95SBruce Richardson h2[i] = MEMBER_HASH_FUNC(&h1[i], sizeof(uint32_t),
26599a2dd95SBruce Richardson ss->sec_hash_seed);
26699a2dd95SBruce Richardson for (i = 0; i < num_keys; i++) {
26799a2dd95SBruce Richardson mask[i] = ~0;
26899a2dd95SBruce Richardson for (k = 0; k < ss->num_hashes; k++) {
26999a2dd95SBruce Richardson bit_loc = (h1[i] + k * h2[i]) & ss->bit_mask;
27099a2dd95SBruce Richardson mask[i] &= test_bit(bit_loc, ss);
27199a2dd95SBruce Richardson }
27299a2dd95SBruce Richardson }
27399a2dd95SBruce Richardson for (i = 0; i < num_keys; i++) {
27499a2dd95SBruce Richardson match_cnt_t = 0;
27599a2dd95SBruce Richardson while (mask[i]) {
276de0ec3c2STyler Retzlaff uint32_t loc = rte_ctz32(mask[i]);
27799a2dd95SBruce Richardson set_ids[i * match_per_key + match_cnt_t] = loc + 1;
27899a2dd95SBruce Richardson match_cnt_t++;
27999a2dd95SBruce Richardson if (match_cnt_t >= match_per_key)
28099a2dd95SBruce Richardson break;
28199a2dd95SBruce Richardson mask[i] &= ~(1UL << loc);
28299a2dd95SBruce Richardson }
28399a2dd95SBruce Richardson match_count[i] = match_cnt_t;
28499a2dd95SBruce Richardson if (match_cnt_t != 0)
28599a2dd95SBruce Richardson num_matches++;
28699a2dd95SBruce Richardson }
28799a2dd95SBruce Richardson return num_matches;
28899a2dd95SBruce Richardson }
28999a2dd95SBruce Richardson
29099a2dd95SBruce Richardson int
rte_member_add_vbf(const struct rte_member_setsum * ss,const void * key,member_set_t set_id)29199a2dd95SBruce Richardson rte_member_add_vbf(const struct rte_member_setsum *ss,
29299a2dd95SBruce Richardson const void *key, member_set_t set_id)
29399a2dd95SBruce Richardson {
29499a2dd95SBruce Richardson uint32_t i, h1, h2;
29599a2dd95SBruce Richardson uint32_t bit_loc;
29699a2dd95SBruce Richardson
29799a2dd95SBruce Richardson if (set_id > ss->num_set || set_id == RTE_MEMBER_NO_MATCH)
29899a2dd95SBruce Richardson return -EINVAL;
29999a2dd95SBruce Richardson
30099a2dd95SBruce Richardson h1 = MEMBER_HASH_FUNC(key, ss->key_len, ss->prim_hash_seed);
30199a2dd95SBruce Richardson h2 = MEMBER_HASH_FUNC(&h1, sizeof(uint32_t), ss->sec_hash_seed);
30299a2dd95SBruce Richardson
30399a2dd95SBruce Richardson for (i = 0; i < ss->num_hashes; i++) {
30499a2dd95SBruce Richardson bit_loc = (h1 + i * h2) & ss->bit_mask;
30599a2dd95SBruce Richardson set_bit(bit_loc, ss, set_id);
30699a2dd95SBruce Richardson }
30799a2dd95SBruce Richardson return 0;
30899a2dd95SBruce Richardson }
30999a2dd95SBruce Richardson
31099a2dd95SBruce Richardson void
rte_member_free_vbf(struct rte_member_setsum * ss)31199a2dd95SBruce Richardson rte_member_free_vbf(struct rte_member_setsum *ss)
31299a2dd95SBruce Richardson {
31399a2dd95SBruce Richardson rte_free(ss->table);
31499a2dd95SBruce Richardson }
31599a2dd95SBruce Richardson
31699a2dd95SBruce Richardson void
rte_member_reset_vbf(const struct rte_member_setsum * ss)31799a2dd95SBruce Richardson rte_member_reset_vbf(const struct rte_member_setsum *ss)
31899a2dd95SBruce Richardson {
31999a2dd95SBruce Richardson uint32_t *vbf = ss->table;
32099a2dd95SBruce Richardson memset(vbf, 0, (ss->num_set * ss->bits) >> 3);
32199a2dd95SBruce Richardson }
322