1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #include <rte_errno.h> 6 #include <rte_malloc.h> 7 #include <rte_prefetch.h> 8 #include <rte_random.h> 9 #include <rte_log.h> 10 #include <rte_vect.h> 11 12 #include "member.h" 13 #include "rte_member.h" 14 #include "rte_member_ht.h" 15 16 #if defined(RTE_ARCH_X86) 17 #include "rte_member_x86.h" 18 #endif 19 20 /* Search bucket for entry with tmp_sig and update set_id */ 21 static inline int 22 update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig, 23 struct member_ht_bucket *buckets, 24 member_set_t set_id) 25 { 26 uint32_t i; 27 28 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { 29 if (buckets[bucket_id].sigs[i] == tmp_sig) { 30 buckets[bucket_id].sets[i] = set_id; 31 return 1; 32 } 33 } 34 return 0; 35 } 36 37 static inline int 38 search_bucket_single(uint32_t bucket_id, member_sig_t tmp_sig, 39 struct member_ht_bucket *buckets, 40 member_set_t *set_id) 41 { 42 uint32_t iter; 43 44 for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) { 45 if (tmp_sig == buckets[bucket_id].sigs[iter] && 46 buckets[bucket_id].sets[iter] != 47 RTE_MEMBER_NO_MATCH) { 48 *set_id = buckets[bucket_id].sets[iter]; 49 return 1; 50 } 51 } 52 return 0; 53 } 54 55 static inline void 56 search_bucket_multi(uint32_t bucket_id, member_sig_t tmp_sig, 57 struct member_ht_bucket *buckets, 58 uint32_t *counter, 59 uint32_t matches_per_key, 60 member_set_t *set_id) 61 { 62 uint32_t iter; 63 64 for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) { 65 if (tmp_sig == buckets[bucket_id].sigs[iter] && 66 buckets[bucket_id].sets[iter] != 67 RTE_MEMBER_NO_MATCH) { 68 set_id[*counter] = buckets[bucket_id].sets[iter]; 69 (*counter)++; 70 if (*counter >= matches_per_key) 71 return; 72 } 73 } 74 } 75 76 int 77 rte_member_create_ht(struct rte_member_setsum *ss, 78 const struct rte_member_parameters *params) 79 { 80 uint32_t i, j; 81 uint32_t size_bucket_t; 82 uint32_t num_entries = rte_align32pow2(params->num_keys); 83 84 if ((num_entries > RTE_MEMBER_ENTRIES_MAX) || 85 !rte_is_power_of_2(RTE_MEMBER_BUCKET_ENTRIES) || 86 num_entries < RTE_MEMBER_BUCKET_ENTRIES) { 87 rte_errno = EINVAL; 88 MEMBER_LOG(ERR, 89 "Membership HT create with invalid parameters"); 90 return -EINVAL; 91 } 92 93 uint32_t num_buckets = num_entries / RTE_MEMBER_BUCKET_ENTRIES; 94 95 size_bucket_t = sizeof(struct member_ht_bucket); 96 97 struct member_ht_bucket *buckets = rte_zmalloc_socket(NULL, 98 num_buckets * size_bucket_t, 99 RTE_CACHE_LINE_SIZE, ss->socket_id); 100 101 if (buckets == NULL) { 102 MEMBER_LOG(ERR, "memory allocation failed for HT " 103 "setsummary"); 104 return -ENOMEM; 105 } 106 107 ss->table = buckets; 108 ss->bucket_cnt = num_buckets; 109 ss->bucket_mask = num_buckets - 1; 110 ss->cache = params->is_cache; 111 112 for (i = 0; i < num_buckets; i++) { 113 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) 114 buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; 115 } 116 #if defined(RTE_ARCH_X86) 117 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) && 118 RTE_MEMBER_BUCKET_ENTRIES == 16 && 119 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) 120 ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2; 121 else 122 #endif 123 ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR; 124 125 MEMBER_LOG(DEBUG, "Hash table based filter created, " 126 "the table has %u entries, %u buckets", 127 num_entries, num_buckets); 128 return 0; 129 } 130 131 static inline void 132 get_buckets_index(const struct rte_member_setsum *ss, const void *key, 133 uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t *sig) 134 { 135 uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len, 136 ss->prim_hash_seed); 137 uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash, sizeof(uint32_t), 138 ss->sec_hash_seed); 139 /* 140 * We use the first hash value for the signature, and the second hash 141 * value to derive the primary and secondary bucket locations. 142 * 143 * For non-cache mode, we use the lower bits for the primary bucket 144 * location. Then we xor primary bucket location and the signature 145 * to get the secondary bucket location. This is called "partial-key 146 * cuckoo hashing" proposed by B. Fan, et al's paper 147 * "Cuckoo Filter: Practically Better Than Bloom". The benefit to use 148 * xor is that one could derive the alternative bucket location 149 * by only using the current bucket location and the signature. This is 150 * generally required by non-cache mode's eviction and deletion 151 * process without the need to store alternative hash value nor the full 152 * key. 153 * 154 * For cache mode, we use the lower bits for the primary bucket 155 * location and the higher bits for the secondary bucket location. In 156 * cache mode, keys are simply overwritten if bucket is full. We do not 157 * use xor since lower/higher bits are more independent hash values thus 158 * should provide slightly better table load. 159 */ 160 *sig = first_hash; 161 if (ss->cache) { 162 *prim_bkt = sec_hash & ss->bucket_mask; 163 *sec_bkt = (sec_hash >> 16) & ss->bucket_mask; 164 } else { 165 *prim_bkt = sec_hash & ss->bucket_mask; 166 *sec_bkt = (*prim_bkt ^ *sig) & ss->bucket_mask; 167 } 168 } 169 170 int 171 rte_member_lookup_ht(const struct rte_member_setsum *ss, 172 const void *key, member_set_t *set_id) 173 { 174 uint32_t prim_bucket, sec_bucket; 175 member_sig_t tmp_sig; 176 struct member_ht_bucket *buckets = ss->table; 177 178 *set_id = RTE_MEMBER_NO_MATCH; 179 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); 180 181 switch (ss->sig_cmp_fn) { 182 #if defined(RTE_ARCH_X86) && defined(__AVX2__) 183 case RTE_MEMBER_COMPARE_AVX2: 184 if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets, 185 set_id) || 186 search_bucket_single_avx(sec_bucket, tmp_sig, 187 buckets, set_id)) 188 return 1; 189 break; 190 #endif 191 default: 192 if (search_bucket_single(prim_bucket, tmp_sig, buckets, 193 set_id) || 194 search_bucket_single(sec_bucket, tmp_sig, 195 buckets, set_id)) 196 return 1; 197 } 198 199 return 0; 200 } 201 202 uint32_t 203 rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss, 204 const void **keys, uint32_t num_keys, member_set_t *set_id) 205 { 206 uint32_t i; 207 uint32_t num_matches = 0; 208 struct member_ht_bucket *buckets = ss->table; 209 member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX]; 210 uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; 211 uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; 212 213 for (i = 0; i < num_keys; i++) { 214 get_buckets_index(ss, keys[i], &prim_buckets[i], 215 &sec_buckets[i], &tmp_sig[i]); 216 rte_prefetch0(&buckets[prim_buckets[i]]); 217 rte_prefetch0(&buckets[sec_buckets[i]]); 218 } 219 220 for (i = 0; i < num_keys; i++) { 221 switch (ss->sig_cmp_fn) { 222 #if defined(RTE_ARCH_X86) && defined(__AVX2__) 223 case RTE_MEMBER_COMPARE_AVX2: 224 if (search_bucket_single_avx(prim_buckets[i], 225 tmp_sig[i], buckets, &set_id[i]) || 226 search_bucket_single_avx(sec_buckets[i], 227 tmp_sig[i], buckets, &set_id[i])) 228 num_matches++; 229 else 230 set_id[i] = RTE_MEMBER_NO_MATCH; 231 break; 232 #endif 233 default: 234 if (search_bucket_single(prim_buckets[i], tmp_sig[i], 235 buckets, &set_id[i]) || 236 search_bucket_single(sec_buckets[i], 237 tmp_sig[i], buckets, &set_id[i])) 238 num_matches++; 239 else 240 set_id[i] = RTE_MEMBER_NO_MATCH; 241 } 242 } 243 return num_matches; 244 } 245 246 uint32_t 247 rte_member_lookup_multi_ht(const struct rte_member_setsum *ss, 248 const void *key, uint32_t match_per_key, 249 member_set_t *set_id) 250 { 251 uint32_t num_matches = 0; 252 uint32_t prim_bucket, sec_bucket; 253 member_sig_t tmp_sig; 254 struct member_ht_bucket *buckets = ss->table; 255 256 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); 257 258 switch (ss->sig_cmp_fn) { 259 #if defined(RTE_ARCH_X86) && defined(__AVX2__) 260 case RTE_MEMBER_COMPARE_AVX2: 261 search_bucket_multi_avx(prim_bucket, tmp_sig, buckets, 262 &num_matches, match_per_key, set_id); 263 if (num_matches < match_per_key) 264 search_bucket_multi_avx(sec_bucket, tmp_sig, 265 buckets, &num_matches, match_per_key, set_id); 266 return num_matches; 267 #endif 268 default: 269 search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches, 270 match_per_key, set_id); 271 if (num_matches < match_per_key) 272 search_bucket_multi(sec_bucket, tmp_sig, 273 buckets, &num_matches, match_per_key, set_id); 274 return num_matches; 275 } 276 } 277 278 uint32_t 279 rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss, 280 const void **keys, uint32_t num_keys, uint32_t match_per_key, 281 uint32_t *match_count, 282 member_set_t *set_ids) 283 { 284 uint32_t i; 285 uint32_t num_matches = 0; 286 struct member_ht_bucket *buckets = ss->table; 287 uint32_t match_cnt_tmp; 288 member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX]; 289 uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; 290 uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX]; 291 292 for (i = 0; i < num_keys; i++) { 293 get_buckets_index(ss, keys[i], &prim_buckets[i], 294 &sec_buckets[i], &tmp_sig[i]); 295 rte_prefetch0(&buckets[prim_buckets[i]]); 296 rte_prefetch0(&buckets[sec_buckets[i]]); 297 } 298 for (i = 0; i < num_keys; i++) { 299 match_cnt_tmp = 0; 300 301 switch (ss->sig_cmp_fn) { 302 #if defined(RTE_ARCH_X86) && defined(__AVX2__) 303 case RTE_MEMBER_COMPARE_AVX2: 304 search_bucket_multi_avx(prim_buckets[i], tmp_sig[i], 305 buckets, &match_cnt_tmp, match_per_key, 306 &set_ids[i*match_per_key]); 307 if (match_cnt_tmp < match_per_key) 308 search_bucket_multi_avx(sec_buckets[i], 309 tmp_sig[i], buckets, &match_cnt_tmp, 310 match_per_key, 311 &set_ids[i*match_per_key]); 312 match_count[i] = match_cnt_tmp; 313 if (match_cnt_tmp != 0) 314 num_matches++; 315 break; 316 #endif 317 default: 318 search_bucket_multi(prim_buckets[i], tmp_sig[i], 319 buckets, &match_cnt_tmp, match_per_key, 320 &set_ids[i*match_per_key]); 321 if (match_cnt_tmp < match_per_key) 322 search_bucket_multi(sec_buckets[i], tmp_sig[i], 323 buckets, &match_cnt_tmp, match_per_key, 324 &set_ids[i*match_per_key]); 325 match_count[i] = match_cnt_tmp; 326 if (match_cnt_tmp != 0) 327 num_matches++; 328 } 329 } 330 return num_matches; 331 } 332 333 static inline int 334 try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, 335 member_sig_t sig, member_set_t set_id) 336 { 337 int i; 338 /* If not full then insert into one slot */ 339 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { 340 if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) { 341 buckets[prim].sigs[i] = sig; 342 buckets[prim].sets[i] = set_id; 343 return 0; 344 } 345 } 346 /* If prim failed, we need to access second bucket */ 347 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { 348 if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) { 349 buckets[sec].sigs[i] = sig; 350 buckets[sec].sets[i] = set_id; 351 return 0; 352 } 353 } 354 return -1; 355 } 356 357 static inline int 358 try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec, 359 member_sig_t sig, member_set_t set_id, 360 enum rte_member_sig_compare_function cmp_fn) 361 { 362 switch (cmp_fn) { 363 #if defined(RTE_ARCH_X86) && defined(__AVX2__) 364 case RTE_MEMBER_COMPARE_AVX2: 365 if (update_entry_search_avx(prim, sig, buckets, set_id) || 366 update_entry_search_avx(sec, sig, buckets, 367 set_id)) 368 return 0; 369 break; 370 #endif 371 default: 372 if (update_entry_search(prim, sig, buckets, set_id) || 373 update_entry_search(sec, sig, buckets, 374 set_id)) 375 return 0; 376 } 377 return -1; 378 } 379 380 static inline int 381 evict_from_bucket(void) 382 { 383 /* For now, we randomly pick one entry to evict */ 384 return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1); 385 } 386 387 /* 388 * This function is similar to the cuckoo hash make_space function in hash 389 * library 390 */ 391 static inline int 392 make_space_bucket(const struct rte_member_setsum *ss, uint32_t bkt_idx, 393 unsigned int *nr_pushes) 394 { 395 unsigned int i, j; 396 int ret; 397 struct member_ht_bucket *buckets = ss->table; 398 uint32_t next_bucket_idx; 399 struct member_ht_bucket *next_bkt[RTE_MEMBER_BUCKET_ENTRIES]; 400 struct member_ht_bucket *bkt = &buckets[bkt_idx]; 401 /* MSB is set to indicate if an entry has been already pushed */ 402 member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1); 403 404 /* 405 * Push existing item (search for bucket with space in 406 * alternative locations) to its alternative location 407 */ 408 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { 409 /* Search for space in alternative locations */ 410 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask; 411 next_bkt[i] = &buckets[next_bucket_idx]; 412 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) { 413 if (next_bkt[i]->sets[j] == RTE_MEMBER_NO_MATCH) 414 break; 415 } 416 417 if (j != RTE_MEMBER_BUCKET_ENTRIES) 418 break; 419 } 420 421 /* Alternative location has spare room (end of recursive function) */ 422 if (i != RTE_MEMBER_BUCKET_ENTRIES) { 423 next_bkt[i]->sigs[j] = bkt->sigs[i]; 424 next_bkt[i]->sets[j] = bkt->sets[i]; 425 return i; 426 } 427 428 /* Pick entry that has not been pushed yet */ 429 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) 430 if ((bkt->sets[i] & flag_mask) == 0) 431 break; 432 433 /* All entries have been pushed, so entry cannot be added */ 434 if (i == RTE_MEMBER_BUCKET_ENTRIES || 435 ++(*nr_pushes) > RTE_MEMBER_MAX_PUSHES) 436 return -ENOSPC; 437 438 next_bucket_idx = (bkt->sigs[i] ^ bkt_idx) & ss->bucket_mask; 439 /* Set flag to indicate that this entry is going to be pushed */ 440 bkt->sets[i] |= flag_mask; 441 442 /* Need room in alternative bucket to insert the pushed entry */ 443 ret = make_space_bucket(ss, next_bucket_idx, nr_pushes); 444 /* 445 * After recursive function. 446 * Clear flags and insert the pushed entry 447 * in its alternative location if successful, 448 * or return error 449 */ 450 bkt->sets[i] &= ~flag_mask; 451 if (ret >= 0) { 452 next_bkt[i]->sigs[ret] = bkt->sigs[i]; 453 next_bkt[i]->sets[ret] = bkt->sets[i]; 454 return i; 455 } else 456 return ret; 457 } 458 459 int 460 rte_member_add_ht(const struct rte_member_setsum *ss, 461 const void *key, member_set_t set_id) 462 { 463 int ret; 464 unsigned int nr_pushes = 0; 465 uint32_t prim_bucket, sec_bucket; 466 member_sig_t tmp_sig; 467 struct member_ht_bucket *buckets = ss->table; 468 member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1); 469 470 if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) != 0) 471 return -EINVAL; 472 473 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); 474 475 /* 476 * If it is cache based setsummary, we try overwriting (updating) 477 * existing entry with the same signature first. In cache mode, we allow 478 * false negatives and only cache the most recent keys. 479 * 480 * For non-cache mode, we do not update existing entry with the same 481 * signature. This is because if two keys with same signature update 482 * each other, false negative may happen, which is not the expected 483 * behavior for non-cache setsummary. 484 */ 485 if (ss->cache) { 486 ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig, 487 set_id, ss->sig_cmp_fn); 488 if (ret != -1) 489 return ret; 490 } 491 /* If not full then insert into one slot */ 492 ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id); 493 if (ret != -1) 494 return ret; 495 496 /* Random pick prim or sec for recursive displacement */ 497 uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket : sec_bucket; 498 if (ss->cache) { 499 ret = evict_from_bucket(); 500 buckets[select_bucket].sigs[ret] = tmp_sig; 501 buckets[select_bucket].sets[ret] = set_id; 502 return 1; 503 } 504 505 ret = make_space_bucket(ss, select_bucket, &nr_pushes); 506 if (ret >= 0) { 507 buckets[select_bucket].sigs[ret] = tmp_sig; 508 buckets[select_bucket].sets[ret] = set_id; 509 ret = 1; 510 } 511 512 return ret; 513 } 514 515 void 516 rte_member_free_ht(struct rte_member_setsum *ss) 517 { 518 rte_free(ss->table); 519 } 520 521 int 522 rte_member_delete_ht(const struct rte_member_setsum *ss, const void *key, 523 member_set_t set_id) 524 { 525 int i; 526 uint32_t prim_bucket, sec_bucket; 527 member_sig_t tmp_sig; 528 struct member_ht_bucket *buckets = ss->table; 529 530 get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig); 531 532 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { 533 if (tmp_sig == buckets[prim_bucket].sigs[i] && 534 set_id == buckets[prim_bucket].sets[i]) { 535 buckets[prim_bucket].sets[i] = RTE_MEMBER_NO_MATCH; 536 return 0; 537 } 538 } 539 540 for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) { 541 if (tmp_sig == buckets[sec_bucket].sigs[i] && 542 set_id == buckets[sec_bucket].sets[i]) { 543 buckets[sec_bucket].sets[i] = RTE_MEMBER_NO_MATCH; 544 return 0; 545 } 546 } 547 return -ENOENT; 548 } 549 550 void 551 rte_member_reset_ht(const struct rte_member_setsum *ss) 552 { 553 uint32_t i, j; 554 struct member_ht_bucket *buckets = ss->table; 555 556 for (i = 0; i < ss->bucket_cnt; i++) { 557 for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) 558 buckets[i].sets[j] = RTE_MEMBER_NO_MATCH; 559 } 560 } 561