xref: /dpdk/lib/member/rte_member_ht.c (revision e9fd1ebf981f361844aea9ec94e17f4bda5e1479)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 #include <rte_errno.h>
6 #include <rte_malloc.h>
7 #include <rte_prefetch.h>
8 #include <rte_random.h>
9 #include <rte_log.h>
10 #include <rte_vect.h>
11 
12 #include "member.h"
13 #include "rte_member.h"
14 #include "rte_member_ht.h"
15 
16 #if defined(RTE_ARCH_X86)
17 #include "rte_member_x86.h"
18 #endif
19 
20 /* Search bucket for entry with tmp_sig and update set_id */
21 static inline int
22 update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
23 		struct member_ht_bucket *buckets,
24 		member_set_t set_id)
25 {
26 	uint32_t i;
27 
28 	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
29 		if (buckets[bucket_id].sigs[i] == tmp_sig) {
30 			buckets[bucket_id].sets[i] = set_id;
31 			return 1;
32 		}
33 	}
34 	return 0;
35 }
36 
37 static inline int
38 search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig,
39 		struct member_ht_bucket *buckets,
40 		member_set_t *set_id)
41 {
42 	uint32_t iter;
43 
44 	for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
45 		if (tmp_sig == buckets[bucket_id].sigs[iter] &&
46 				buckets[bucket_id].sets[iter] !=
47 				RTE_MEMBER_NO_MATCH) {
48 			*set_id = buckets[bucket_id].sets[iter];
49 			return 1;
50 		}
51 	}
52 	return 0;
53 }
54 
55 static inline void
56 search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig,
57 		struct member_ht_bucket *buckets,
58 		uint32_t *counter,
59 		uint32_t matches_per_key,
60 		member_set_t *set_id)
61 {
62 	uint32_t iter;
63 
64 	for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
65 		if (tmp_sig == buckets[bucket_id].sigs[iter] &&
66 				buckets[bucket_id].sets[iter] !=
67 				RTE_MEMBER_NO_MATCH) {
68 			set_id[*counter] = buckets[bucket_id].sets[iter];
69 			(*counter)++;
70 			if (*counter >= matches_per_key)
71 				return;
72 		}
73 	}
74 }
75 
76 int
77 rte_member_create_ht(struct rte_member_setsum *ss,
78 		const struct rte_member_parameters *params)
79 {
80 	uint32_t i, j;
81 	uint32_t size_bucket_t;
82 	uint32_t num_entries = rte_align32pow2(params->num_keys);
83 
84 	if ((num_entries > RTE_MEMBER_ENTRIES_MAX) ||
85 			!rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) ||
86 			num_entries < RTE_MEMBER_BUCKET_ENTRIES) {
87 		rte_errno = EINVAL;
88 		MEMBER_LOG(ERR,
89 			"Membership HT create with invalid parameters");
90 		return -EINVAL;
91 	}
92 
93 	uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES;
94 
95 	size_bucket_t = sizeof(struct member_ht_bucket);
96 
97 	struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL,
98 			num_buckets * size_bucket_t,
99 			RTE_CACHE_LINE_SIZE, ss->socket_id);
100 
101 	if (buckets == NULL) {
102 		MEMBER_LOG(ERR, "memory allocation failed for HT "
103 						"setsummary");
104 		return -ENOMEM;
105 	}
106 
107 	ss->table = buckets;
108 	ss->bucket_cnt = num_buckets;
109 	ss->bucket_mask = num_buckets - 1;
110 	ss->cache = params->is_cache;
111 
112 	for (i = 0; i < num_buckets; i++) {
113 		for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
114 			buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
115 	}
116 #if defined(RTE_ARCH_X86)
117 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
118 			RTE_MEMBER_BUCKET_ENTRIES == 16 &&
119 			rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)
120 		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
121 	else
122 #endif
123 		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR;
124 
125 	MEMBER_LOG(DEBUG, "Hash table based filter created, "
126 			"the table has %u entries, %u buckets",
127 			num_entries, num_buckets);
128 	return 0;
129 }
130 
131 static inline void
132 get_buckets_index(const struct rte_member_setsum *ss, const void *key,
133 		uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig)
134 {
135 	uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
136 						ss->prim_hash_seed);
137 	uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t),
138 						ss->sec_hash_seed);
139 	/*
140 	 * We use the first hash value for the signature, and the second hash
141 	 * value to derive the primary and secondary bucket locations.
142 	 *
143 	 * For non-cache mode, we use the lower bits for the primary bucket
144 	 * location. Then we xor primary bucket location and the signature
145 	 * to get the secondary bucket location. This is called "partial-key
146 	 * cuckoo hashing" proposed by B. Fan, et al's paper
147 	 * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use
148 	 * xor is that one could derive the alternative bucket location
149 	 * by only using the current bucket location and the signature. This is
150 	 * generally required by non-cache mode's eviction and deletion
151 	 * process without the need to store alternative hash value nor the full
152 	 * key.
153 	 *
154 	 * For cache mode, we use the lower bits for the primary bucket
155 	 * location and the higher bits for the secondary bucket location. In
156 	 * cache mode, keys are simply overwritten if bucket is full. We do not
157 	 * use xor since lower/higher bits are more independent hash values thus
158 	 * should provide slightly better table load.
159 	 */
160 	*sig = first_hash;
161 	if (ss->cache) {
162 		*prim_bkt = sec_hash & ss->bucket_mask;
163 		*sec_bkt =  (sec_hash >> 16) & ss->bucket_mask;
164 	} else {
165 		*prim_bkt = sec_hash & ss->bucket_mask;
166 		*sec_bkt =  (*prim_bkt ^ *sig) & ss->bucket_mask;
167 	}
168 }
169 
170 int
171 rte_member_lookup_ht(const struct rte_member_setsum *ss,
172 		const void *key, member_set_t *set_id)
173 {
174 	uint32_t prim_bucket, sec_bucket;
175 	member_sig_t tmp_sig;
176 	struct member_ht_bucket *buckets = ss->table;
177 
178 	*set_id = RTE_MEMBER_NO_MATCH;
179 	get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
180 
181 	switch (ss->sig_cmp_fn) {
182 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
183 	case RTE_MEMBER_COMPARE_AVX2:
184 		if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
185 				set_id) ||
186 				search_bucket_single_avx(sec_bucket, tmp_sig,
187 					buckets, set_id))
188 			return 1;
189 		break;
190 #endif
191 	default:
192 		if (search_bucket_single(prim_bucket, tmp_sig, buckets,
193 				set_id) ||
194 				search_bucket_single(sec_bucket, tmp_sig,
195 					buckets, set_id))
196 			return 1;
197 	}
198 
199 	return 0;
200 }
201 
202 uint32_t
203 rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
204 		const void **keys, uint32_t num_keys, member_set_t *set_id)
205 {
206 	uint32_t i;
207 	uint32_t num_matches = 0;
208 	struct member_ht_bucket *buckets = ss->table;
209 	member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
210 	uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
211 	uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
212 
213 	for (i = 0; i < num_keys; i++) {
214 		get_buckets_index(ss, keys[i], &prim_buckets[i],
215 				&sec_buckets[i], &tmp_sig[i]);
216 		rte_prefetch0(&buckets[prim_buckets[i]]);
217 		rte_prefetch0(&buckets[sec_buckets[i]]);
218 	}
219 
220 	for (i = 0; i < num_keys; i++) {
221 		switch (ss->sig_cmp_fn) {
222 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
223 		case RTE_MEMBER_COMPARE_AVX2:
224 			if (search_bucket_single_avx(prim_buckets[i],
225 					tmp_sig[i], buckets, &set_id[i]) ||
226 				search_bucket_single_avx(sec_buckets[i],
227 					tmp_sig[i], buckets, &set_id[i]))
228 				num_matches++;
229 			else
230 				set_id[i] = RTE_MEMBER_NO_MATCH;
231 			break;
232 #endif
233 		default:
234 			if (search_bucket_single(prim_buckets[i], tmp_sig[i],
235 					buckets, &set_id[i]) ||
236 					search_bucket_single(sec_buckets[i],
237 					tmp_sig[i], buckets, &set_id[i]))
238 				num_matches++;
239 			else
240 				set_id[i] = RTE_MEMBER_NO_MATCH;
241 		}
242 	}
243 	return num_matches;
244 }
245 
246 uint32_t
247 rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
248 		const void *key, uint32_t match_per_key,
249 		member_set_t *set_id)
250 {
251 	uint32_t num_matches = 0;
252 	uint32_t prim_bucket, sec_bucket;
253 	member_sig_t tmp_sig;
254 	struct member_ht_bucket *buckets = ss->table;
255 
256 	get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
257 
258 	switch (ss->sig_cmp_fn) {
259 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
260 	case RTE_MEMBER_COMPARE_AVX2:
261 		search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
262 			&num_matches, match_per_key, set_id);
263 		if (num_matches < match_per_key)
264 			search_bucket_multi_avx(sec_bucket, tmp_sig,
265 				buckets, &num_matches, match_per_key, set_id);
266 		return num_matches;
267 #endif
268 	default:
269 		search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
270 				 match_per_key, set_id);
271 		if (num_matches < match_per_key)
272 			search_bucket_multi(sec_bucket, tmp_sig,
273 				buckets, &num_matches, match_per_key, set_id);
274 		return num_matches;
275 	}
276 }
277 
278 uint32_t
279 rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
280 		const void **keys, uint32_t num_keys, uint32_t match_per_key,
281 		uint32_t *match_count,
282 		member_set_t *set_ids)
283 {
284 	uint32_t i;
285 	uint32_t num_matches = 0;
286 	struct member_ht_bucket *buckets = ss->table;
287 	uint32_t match_cnt_tmp;
288 	member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
289 	uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
290 	uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
291 
292 	for (i = 0; i < num_keys; i++) {
293 		get_buckets_index(ss, keys[i], &prim_buckets[i],
294 				&sec_buckets[i], &tmp_sig[i]);
295 		rte_prefetch0(&buckets[prim_buckets[i]]);
296 		rte_prefetch0(&buckets[sec_buckets[i]]);
297 	}
298 	for (i = 0; i < num_keys; i++) {
299 		match_cnt_tmp = 0;
300 
301 		switch (ss->sig_cmp_fn) {
302 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
303 		case RTE_MEMBER_COMPARE_AVX2:
304 			search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
305 				buckets, &match_cnt_tmp, match_per_key,
306 				&set_ids[i*match_per_key]);
307 			if (match_cnt_tmp < match_per_key)
308 				search_bucket_multi_avx(sec_buckets[i],
309 					tmp_sig[i], buckets, &match_cnt_tmp,
310 					match_per_key,
311 					&set_ids[i*match_per_key]);
312 			match_count[i] = match_cnt_tmp;
313 			if (match_cnt_tmp != 0)
314 				num_matches++;
315 			break;
316 #endif
317 		default:
318 			search_bucket_multi(prim_buckets[i], tmp_sig[i],
319 				buckets, &match_cnt_tmp, match_per_key,
320 				&set_ids[i*match_per_key]);
321 			if (match_cnt_tmp < match_per_key)
322 				search_bucket_multi(sec_buckets[i], tmp_sig[i],
323 					buckets, &match_cnt_tmp, match_per_key,
324 					&set_ids[i*match_per_key]);
325 			match_count[i] = match_cnt_tmp;
326 			if (match_cnt_tmp != 0)
327 				num_matches++;
328 		}
329 	}
330 	return num_matches;
331 }
332 
333 static inline int
334 try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
335 		member_sig_t sig, member_set_t set_id)
336 {
337 	int i;
338 	/* If not full then insert into one slot */
339 	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
340 		if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
341 			buckets[prim].sigs[i] = sig;
342 			buckets[prim].sets[i] = set_id;
343 			return 0;
344 		}
345 	}
346 	/* If prim failed, we need to access second bucket */
347 	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
348 		if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
349 			buckets[sec].sigs[i] = sig;
350 			buckets[sec].sets[i] = set_id;
351 			return 0;
352 		}
353 	}
354 	return -1;
355 }
356 
357 static inline int
358 try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
359 		member_sig_t sig, member_set_t set_id,
360 		enum rte_member_sig_compare_function cmp_fn)
361 {
362 	switch (cmp_fn) {
363 #if defined(RTE_ARCH_X86) && defined(__AVX2__)
364 	case RTE_MEMBER_COMPARE_AVX2:
365 		if (update_entry_search_avx(prim, sig, buckets, set_id) ||
366 				update_entry_search_avx(sec, sig, buckets,
367 					set_id))
368 			return 0;
369 		break;
370 #endif
371 	default:
372 		if (update_entry_search(prim, sig, buckets, set_id) ||
373 				update_entry_search(sec, sig, buckets,
374 					set_id))
375 			return 0;
376 	}
377 	return -1;
378 }
379 
380 static inline int
381 evict_from_bucket(void)
382 {
383 	/* For now, we randomly pick one entry to evict */
384 	return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1);
385 }
386 
387 /*
388  * This function is similar to the cuckoo hash make_space function in hash
389  * library
390  */
391 static inline int
392 make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx,
393 			unsigned int *nr_pushes)
394 {
395 	unsigned int i, j;
396 	int ret;
397 	struct member_ht_bucket *buckets = ss->table;
398 	uint32_t next_bucket_idx;
399 	struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
400 	struct member_ht_bucket *bkt = &buckets[bkt_idx];
401 	/* MSB is set to indicate if an entry has been already pushed */
402 	member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
403 
404 	/*
405 	 * Push existing item (search for bucket with space in
406 	 * alternative locations) to its alternative location
407 	 */
408 	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
409 		/* Search for space in alternative locations */
410 		next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
411 		next_bkt[i] = &buckets[next_bucket_idx];
412 		for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
413 			if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH)
414 				break;
415 		}
416 
417 		if (j != RTE_MEMBER_BUCKET_ENTRIES)
418 			break;
419 	}
420 
421 	/* Alternative location has spare room (end of recursive function) */
422 	if (i != RTE_MEMBER_BUCKET_ENTRIES) {
423 		next_bkt[i]->sigs[j] = bkt->sigs[i];
424 		next_bkt[i]->sets[j] = bkt->sets[i];
425 		return i;
426 	}
427 
428 	/* Pick entry that has not been pushed yet */
429 	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
430 		if ((bkt->sets[i] & flag_mask) == 0)
431 			break;
432 
433 	/* All entries have been pushed, so entry cannot be added */
434 	if (i == RTE_MEMBER_BUCKET_ENTRIES ||
435 			++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES)
436 		return -ENOSPC;
437 
438 	next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask;
439 	/* Set flag to indicate that this entry is going to be pushed */
440 	bkt->sets[i] |= flag_mask;
441 
442 	/* Need room in alternative bucket to insert the pushed entry */
443 	ret = make_space_bucket(ss, next_bucket_idx, nr_pushes);
444 	/*
445 	 * After recursive function.
446 	 * Clear flags and insert the pushed entry
447 	 * in its alternative location if successful,
448 	 * or return error
449 	 */
450 	bkt->sets[i] &= ~flag_mask;
451 	if (ret >= 0) {
452 		next_bkt[i]->sigs[ret] = bkt->sigs[i];
453 		next_bkt[i]->sets[ret] = bkt->sets[i];
454 		return i;
455 	} else
456 		return ret;
457 }
458 
459 int
460 rte_member_add_ht(const struct rte_member_setsum *ss,
461 		const void *key, member_set_t set_id)
462 {
463 	int ret;
464 	unsigned int nr_pushes = 0;
465 	uint32_t prim_bucket, sec_bucket;
466 	member_sig_t tmp_sig;
467 	struct member_ht_bucket *buckets = ss->table;
468 	member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
469 
470 	if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0)
471 		return -EINVAL;
472 
473 	get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
474 
475 	/*
476 	 * If it is cache based setsummary, we try overwriting (updating)
477 	 * existing entry with the same signature first. In cache mode, we allow
478 	 * false negatives and only cache the most recent keys.
479 	 *
480 	 * For non-cache mode, we do not update existing entry with the same
481 	 * signature. This is because if two keys with same signature update
482 	 * each other, false negative may happen, which is not the expected
483 	 * behavior for non-cache setsummary.
484 	 */
485 	if (ss->cache) {
486 		ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
487 					set_id, ss->sig_cmp_fn);
488 		if (ret != -1)
489 			return ret;
490 	}
491 	/* If not full then insert into one slot */
492 	ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
493 	if (ret != -1)
494 		return ret;
495 
496 	/* Random pick prim or sec for recursive displacement */
497 	uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket;
498 	if (ss->cache) {
499 		ret = evict_from_bucket();
500 		buckets[select_bucket].sigs[ret] = tmp_sig;
501 		buckets[select_bucket].sets[ret] = set_id;
502 		return 1;
503 	}
504 
505 	ret = make_space_bucket(ss, select_bucket, &nr_pushes);
506 	if (ret >= 0) {
507 		buckets[select_bucket].sigs[ret] = tmp_sig;
508 		buckets[select_bucket].sets[ret] = set_id;
509 		ret = 1;
510 	}
511 
512 	return ret;
513 }
514 
515 void
516 rte_member_free_ht(struct rte_member_setsum *ss)
517 {
518 	rte_free(ss->table);
519 }
520 
521 int
522 rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key,
523 		member_set_t set_id)
524 {
525 	int i;
526 	uint32_t prim_bucket, sec_bucket;
527 	member_sig_t tmp_sig;
528 	struct member_ht_bucket *buckets = ss->table;
529 
530 	get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
531 
532 	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
533 		if (tmp_sig == buckets[prim_bucket].sigs[i] &&
534 				set_id == buckets[prim_bucket].sets[i]) {
535 			buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
536 			return 0;
537 		}
538 	}
539 
540 	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
541 		if (tmp_sig == buckets[sec_bucket].sigs[i] &&
542 				set_id == buckets[sec_bucket].sets[i]) {
543 			buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH;
544 			return 0;
545 		}
546 	}
547 	return -ENOENT;
548 }
549 
550 void
551 rte_member_reset_ht(const struct rte_member_setsum *ss)
552 {
553 	uint32_t i, j;
554 	struct member_ht_bucket *buckets = ss->table;
555 
556 	for (i = 0; i < ss->bucket_cnt; i++) {
557 		for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
558 			buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
559 	}
560 }
561