1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 /** 6 * @file 7 * 8 * RTE Membership Library 9 * 10 * The Membership Library is an extension and generalization of a traditional 11 * filter (for example Bloom Filter and cuckoo filter) structure that has 12 * multiple usages in a variety of workloads and applications. The library is 13 * used to test if a key belongs to certain sets. Two types of such 14 * "set-summary" structures are implemented: hash-table based (HT) and vector 15 * bloom filter (vBF). For HT setsummary, two subtypes or modes are available, 16 * cache and non-cache modes. The table below summarize some properties of 17 * the different implementations. 18 */ 19 20 /** 21 * <!-- 22 * +==========+=====================+================+=========================+ 23 * | type | vbf | HT-cache | HT-non-cache | 24 * +==========+=====================+==========================================+ 25 * |structure | bloom-filter array | hash-table like without storing key | 26 * +----------+---------------------+------------------------------------------+ 27 * |set id | limited by bf count | [1, 0x7fff] | 28 * | | up to 32. | | 29 * +----------+---------------------+------------------------------------------+ 30 * |usages & | small set range, | can delete, | cache most recent keys, | 31 * |properties| user-specified | big set range, | have both false-positive| 32 * | | false-positive rate,| small false | and false-negative | 33 * | | no deletion support.| positive depend| depend on table size, | 34 * | | | on table size, | automatic overwritten. | 35 * | | | new key does | | 36 * | | | not overwrite | | 37 * | | | existing key. | | 38 * +----------+---------------------+----------------+-------------------------+ 39 * +==========+=============================+ 40 * | type | sketch | 41 * +==========+=============================+ 42 * |structure | counting bloom filter array | 43 * +----------+-----------------------------+ 44 * |set id | 1: heavy set, 0: light set | 45 * | | | 46 * +----------+-----------------------------+ 47 * |usages & | count size of a flow, | 48 * |properties| used for heavy hitter | 49 * | | detection. | 50 * +----------+-----------------------------+ 51 * --> 52 */ 53 54 #ifndef _RTE_MEMBER_H_ 55 #define _RTE_MEMBER_H_ 56 57 #include <stdint.h> 58 #include <stdbool.h> 59 #include <inttypes.h> 60 61 #include <rte_common.h> 62 63 /** The set ID type that stored internally in hash table based set summary. */ 64 typedef uint16_t member_set_t; 65 /** Invalid set ID used to mean no match found. */ 66 #define RTE_MEMBER_NO_MATCH 0 67 /** Maximum size of hash table that can be created. */ 68 #define RTE_MEMBER_ENTRIES_MAX (1 << 30) 69 /** Maximum number of keys that can be searched as a bulk */ 70 #define RTE_MEMBER_LOOKUP_BULK_MAX 64 71 /** Entry count per bucket in hash table based mode. */ 72 #define RTE_MEMBER_BUCKET_ENTRIES 16 73 /** Maximum number of characters in setsum name. */ 74 #define RTE_MEMBER_NAMESIZE 32 75 /** Max value of the random number */ 76 #define RTE_RAND_MAX ~0LLU 77 /** 78 * As packets skipped in the sampling-based algorithm, the accounting 79 * results accuracy is not guaranteed in the start stage. There should 80 * be a "convergence time" to achieve the accuracy after receiving enough 81 * packets. 82 * For sketch, use the flag if prefer always bounded mode, which only 83 * starts sampling after receiving enough packets to keep the results 84 * accuracy always bounded. 85 */ 86 #define RTE_MEMBER_SKETCH_ALWAYS_BOUNDED 0x01 87 /** For sketch, use the flag if to count packet size instead of packet count */ 88 #define RTE_MEMBER_SKETCH_COUNT_BYTE 0x02 89 90 /** @internal Hash function used by membership library. */ 91 #if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) 92 #include <rte_hash_crc.h> 93 #define MEMBER_HASH_FUNC rte_hash_crc 94 #else 95 #include <rte_jhash.h> 96 #define MEMBER_HASH_FUNC rte_jhash 97 #endif 98 99 #ifdef __cplusplus 100 extern "C" { 101 #endif 102 103 /** @internal setsummary structure. */ 104 struct rte_member_setsum; 105 106 /** 107 * Parameter struct used to create set summary 108 */ 109 struct rte_member_parameters; 110 111 /** 112 * Define different set summary types 113 */ 114 enum rte_member_setsum_type { 115 RTE_MEMBER_TYPE_HT = 0, /**< Hash table based set summary. */ 116 RTE_MEMBER_TYPE_VBF, /**< Vector of bloom filters. */ 117 RTE_MEMBER_TYPE_SKETCH, 118 RTE_MEMBER_NUM_TYPE 119 }; 120 121 /** @internal compare function for different arch. */ 122 enum rte_member_sig_compare_function { 123 RTE_MEMBER_COMPARE_SCALAR = 0, 124 RTE_MEMBER_COMPARE_AVX2, 125 RTE_MEMBER_COMPARE_NUM 126 }; 127 128 /* sketch update function with different implementations. */ 129 typedef void (*sketch_update_fn_t)(const struct rte_member_setsum *ss, 130 const void *key, 131 uint32_t count); 132 133 /* sketch lookup function with different implementations. */ 134 typedef uint64_t (*sketch_lookup_fn_t)(const struct rte_member_setsum *ss, 135 const void *key); 136 137 /* sketch delete function with different implementations. */ 138 typedef void (*sketch_delete_fn_t)(const struct rte_member_setsum *ss, 139 const void *key); 140 141 /** @internal setsummary structure. */ 142 struct __rte_cache_aligned rte_member_setsum { 143 enum rte_member_setsum_type type; /* Type of the set summary. */ 144 uint32_t key_len; /* Length of key. */ 145 uint32_t prim_hash_seed; /* Primary hash function seed. */ 146 uint32_t sec_hash_seed; /* Secondary hash function seed. */ 147 148 /* Hash table based. */ 149 uint32_t bucket_cnt; /* Number of buckets. */ 150 uint32_t bucket_mask; /* Bit mask to get bucket index. */ 151 /* For runtime selecting AVX, scalar, etc for signature comparison. */ 152 enum rte_member_sig_compare_function sig_cmp_fn; 153 uint8_t cache; /* If it is cache mode for ht based. */ 154 155 /* Vector bloom filter. */ 156 uint32_t num_set; /* Number of set (bf) in vbf. */ 157 uint32_t bits; /* Number of bits in each bf. */ 158 uint32_t bit_mask; /* Bit mask to get bit location in bf. */ 159 uint32_t num_hashes; /* Number of hash values to index bf. */ 160 161 /* Parameters for sketch */ 162 float error_rate; 163 float sample_rate; 164 uint32_t num_col; 165 uint32_t num_row; 166 int always_bounded; 167 double converge_thresh; 168 uint32_t topk; 169 uint32_t count_byte; 170 uint64_t *hash_seeds; 171 sketch_update_fn_t sketch_update; /* Pointer to the sketch update function */ 172 sketch_lookup_fn_t sketch_lookup; /* Pointer to the sketch lookup function */ 173 sketch_delete_fn_t sketch_delete; /* Pointer to the sketch delete function */ 174 175 void *runtime_var; 176 uint32_t mul_shift; /* vbf internal variable used during bit test. */ 177 uint32_t div_shift; /* vbf internal variable used during bit test. */ 178 179 void *table; /* This is the handler of hash table or vBF array. */ 180 181 182 /* Second cache line should start here. */ 183 uint32_t socket_id; /* NUMA Socket ID for memory. */ 184 char name[RTE_MEMBER_NAMESIZE]; /* Name of this set summary. */ 185 #ifdef RTE_ARCH_X86 186 bool use_avx512; 187 #endif 188 }; 189 190 /** 191 * Parameters used when create the set summary table. Currently user can 192 * specify two types of setsummary: HT based and vBF. For HT based, user can 193 * specify cache or non-cache mode. Here is a table to describe some differences 194 */ 195 struct __rte_cache_aligned rte_member_parameters { 196 const char *name; /**< Name of the hash. */ 197 198 /** 199 * User to specify the type of the setsummary from one of 200 * rte_member_setsum_type. 201 * 202 * HT based setsummary is implemented like a hash table. User should use 203 * this type when there are many sets. 204 * 205 * vBF setsummary is a vector of bloom filters. It is used when number 206 * of sets is not big (less than 32 for current implementation). 207 */ 208 enum rte_member_setsum_type type; 209 210 /** 211 * is_cache is only used for HT based setsummary. 212 * 213 * If it is HT based setsummary, user to specify the subtype or mode 214 * of the setsummary. It could be cache, or non-cache mode. 215 * Set is_cache to be 1 if to use as cache mode. 216 * 217 * For cache mode, keys can be evicted out of the HT setsummary. Keys 218 * with the same signature and map to the same bucket 219 * will overwrite each other in the setsummary table. 220 * This mode is useful for the case that the set-summary only 221 * needs to keep record of the recently inserted keys. Both 222 * false-negative and false-positive could happen. 223 * 224 * For non-cache mode, keys cannot be evicted out of the cache. So for 225 * this mode the setsummary will become full eventually. Keys with the 226 * same signature but map to the same bucket will still occupy multiple 227 * entries. This mode does not give false-negative result. 228 */ 229 uint8_t is_cache; 230 231 /** 232 * For HT setsummary, num_keys equals to the number of entries of the 233 * table. When the number of keys inserted in the HT setsummary 234 * approaches this number, eviction could happen. For cache mode, 235 * keys could be evicted out of the table. For non-cache mode, keys will 236 * be evicted to other buckets like cuckoo hash. The table will also 237 * likely to become full before the number of inserted keys equal to the 238 * total number of entries. 239 * 240 * For vBF, num_keys equal to the expected number of keys that will 241 * be inserted into the vBF. The implementation assumes the keys are 242 * evenly distributed to each BF in vBF. This is used to calculate the 243 * number of bits we need for each BF. User does not specify the size of 244 * each BF directly because the optimal size depends on the num_keys 245 * and false positive rate. 246 */ 247 uint32_t num_keys; 248 249 /** 250 * The length of key is used for hash calculation. Since key is not 251 * stored in set-summary, large key does not require more memory space. 252 */ 253 uint32_t key_len; 254 255 /** 256 * num_set is only used for vBF, but not used for HT setsummary. 257 * 258 * num_set is equal to the number of BFs in vBF. For current 259 * implementation, it only supports 1,2,4,8,16,32 BFs in one vBF set 260 * summary. If other number of sets are needed, for example 5, the user 261 * should allocate the minimum available value that larger than 5, 262 * which is 8. 263 */ 264 uint32_t num_set; 265 266 /** 267 * false_positive_rate is only used for vBF, but not used for HT 268 * setsummary. 269 * 270 * For vBF, false_positive_rate is the user-defined false positive rate 271 * given expected number of inserted keys (num_keys). It is used to 272 * calculate the total number of bits for each BF, and the number of 273 * hash values used during lookup and insertion. For details please 274 * refer to vBF implementation and membership library documentation. 275 * 276 * For HT, This parameter is not directly set by users. 277 * HT setsummary's false positive rate is in the order of: 278 * false_pos = (1/bucket_count)*(1/2^16), since we use 16-bit signature. 279 * This is because two keys needs to map to same bucket and same 280 * signature to have a collision (false positive). bucket_count is equal 281 * to number of entries (num_keys) divided by entry count per bucket 282 * (RTE_MEMBER_BUCKET_ENTRIES). Thus, the false_positive_rate is not 283 * directly set by users for HT mode. 284 */ 285 float false_positive_rate; 286 287 /** 288 * We use two seeds to calculate two independent hashes for each key. 289 * 290 * For HT type, one hash is used as signature, and the other is used 291 * for bucket location. 292 * For vBF type, these two hashes and their combinations are used as 293 * hash locations to index the bit array. 294 * For Sketch type, these seeds are not used. 295 */ 296 uint32_t prim_hash_seed; 297 298 /** 299 * The secondary seed should be a different value from the primary seed. 300 */ 301 uint32_t sec_hash_seed; 302 303 /** 304 * For count(min) sketch data structure, error rate defines the accuracy 305 * required by the user. Higher accuracy leads to more memory usage, but 306 * the flow size is estimated more accurately. 307 */ 308 float error_rate; 309 310 /** 311 * Sampling rate means the internal sample rate of the rows of the count 312 * min sketches. Lower sampling rate can reduce CPU overhead, but the 313 * data structure will require more time to converge statistically. 314 */ 315 float sample_rate; 316 317 /** 318 * How many top heavy hitter to be reported. The library will internally 319 * keep the keys of heavy hitters for final report. 320 */ 321 uint32_t top_k; 322 323 /** 324 * Extra flags that may passed in by user 325 */ 326 uint32_t extra_flag; 327 328 int socket_id; /**< NUMA Socket ID for memory. */ 329 }; 330 331 /** 332 * Find an existing set-summary and return a pointer to it. 333 * 334 * @param name 335 * Name of the set-summary. 336 * @return 337 * Pointer to the set-summary or NULL if object not found 338 * with rte_errno set appropriately. Possible rte_errno values include: 339 * - ENOENT - value not available for return 340 */ 341 struct rte_member_setsum * 342 rte_member_find_existing(const char *name); 343 344 /** 345 * Create set-summary (SS). 346 * 347 * @param params 348 * Parameters to initialize the setsummary. 349 * @return 350 * Return the pointer to the setsummary. 351 * Return value is NULL if the creation failed. 352 */ 353 struct rte_member_setsum * 354 rte_member_create(const struct rte_member_parameters *params); 355 356 /** 357 * Lookup key in set-summary (SS). 358 * Single key lookup and return as soon as the first match found 359 * 360 * @param setsum 361 * Pointer of a setsummary. 362 * @param key 363 * Pointer of the key to be looked up. 364 * @param set_id 365 * Output the set id matches the key. 366 * @return 367 * Return 1 for found a match and 0 for not found a match. 368 */ 369 int 370 rte_member_lookup(const struct rte_member_setsum *setsum, const void *key, 371 member_set_t *set_id); 372 373 /** 374 * Lookup bulk of keys in set-summary (SS). 375 * Each key lookup returns as soon as the first match found 376 * 377 * @param setsum 378 * Pointer of a setsummary. 379 * @param keys 380 * Pointer of the bulk of keys to be looked up. 381 * @param num_keys 382 * Number of keys that will be lookup. 383 * @param set_ids 384 * Output set ids for all the keys to this array. 385 * User should preallocate array that can contain all results, which size is 386 * the num_keys. 387 * @return 388 * The number of keys that found a match. 389 */ 390 int 391 rte_member_lookup_bulk(const struct rte_member_setsum *setsum, 392 const void **keys, uint32_t num_keys, 393 member_set_t *set_ids); 394 395 /** 396 * Lookup a key in set-summary (SS) for multiple matches. 397 * The key lookup will find all matched entries (multiple match). 398 * Note that for cache mode of HT, each key can have at most one match. This is 399 * because keys with same signature that maps to same bucket will overwrite 400 * each other. So multi-match lookup should be used for vBF and non-cache HT. 401 * 402 * @param setsum 403 * Pointer of a set-summary. 404 * @param key 405 * Pointer of the key that to be looked up. 406 * @param max_match_per_key 407 * User specified maximum number of matches for each key. The function returns 408 * as soon as this number of matches found for the key. 409 * @param set_id 410 * Output set ids for all the matches of the key. User needs to preallocate 411 * the array that can contain max_match_per_key number of results. 412 * @return 413 * The number of matches that found for the key. 414 * For cache mode HT set-summary, the number should be at most 1. 415 */ 416 int 417 rte_member_lookup_multi(const struct rte_member_setsum *setsum, 418 const void *key, uint32_t max_match_per_key, 419 member_set_t *set_id); 420 421 /** 422 * Lookup a bulk of keys in set-summary (SS) for multiple matches each key. 423 * Each key lookup will find all matched entries (multiple match). 424 * Note that for cache mode HT, each key can have at most one match. So 425 * multi-match function is mainly used for vBF and non-cache mode HT. 426 * 427 * @param setsum 428 * Pointer of a setsummary. 429 * @param keys 430 * Pointer of the keys to be looked up. 431 * @param num_keys 432 * The number of keys that will be lookup. 433 * @param max_match_per_key 434 * The possible maximum number of matches for each key. 435 * @param match_count 436 * Output the number of matches for each key in an array. 437 * @param set_ids 438 * Return set ids for all the matches of all keys. Users pass in a 439 * preallocated 2D array with first dimension as key index and second 440 * dimension as match index. For example set_ids[bulk_size][max_match_per_key] 441 * @return 442 * The number of keys that found one or more matches in the set-summary. 443 */ 444 int 445 rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum, 446 const void **keys, uint32_t num_keys, 447 uint32_t max_match_per_key, 448 uint32_t *match_count, 449 member_set_t *set_ids); 450 451 /** 452 * Insert key into set-summary (SS). 453 * 454 * @param setsum 455 * Pointer of a set-summary. 456 * @param key 457 * Pointer of the key to be added. 458 * @param set_id 459 * The set id associated with the key that needs to be added. Different mode 460 * supports different set_id ranges. 0 cannot be used as set_id since 461 * RTE_MEMBER_NO_MATCH by default is set as 0. 462 * For HT mode, the set_id has range as [1, 0x7FFF], MSB is reserved. 463 * For vBF mode the set id is limited by the num_set parameter when create 464 * the set-summary. For sketch mode, this id is ignored. 465 * @return 466 * HT (cache mode) and vBF should never fail unless the set_id is not in the 467 * valid range. In such case -EINVAL is returned. 468 * For HT (non-cache mode) it could fail with -ENOSPC error code when table is 469 * full. 470 * For success it returns different values for different modes to provide 471 * extra information for users. 472 * Return 0 for HT (cache mode) if the add does not cause 473 * eviction, return 1 otherwise. Return 0 for non-cache mode if success, 474 * -ENOSPC for full, and 1 if cuckoo eviction happens. 475 * Always returns 0 for vBF mode and sketch. 476 */ 477 int 478 rte_member_add(const struct rte_member_setsum *setsum, const void *key, 479 member_set_t set_id); 480 481 /** 482 * Add the packet byte size into the sketch. 483 * 484 * @param setsum 485 * Pointer of a set-summary. 486 * @param key 487 * Pointer of the key to be added. 488 * @param byte_count 489 * Add the byte count of the packet into the sketch. 490 * @return 491 * Return -EINVAL for invalid parameters, otherwise return 0. 492 */ 493 int 494 rte_member_add_byte_count(const struct rte_member_setsum *setsum, 495 const void *key, uint32_t byte_count); 496 497 /** 498 * Query packet count for a certain flow-key. 499 * 500 * @param setsum 501 * Pointer of a set-summary. 502 * @param key 503 * Pointer of the key to be added. 504 * @param count 505 * The output packet count or byte count. 506 * @return 507 * Return -EINVAL for invalid parameters. 508 */ 509 int 510 rte_member_query_count(const struct rte_member_setsum *setsum, 511 const void *key, uint64_t *count); 512 513 514 /** 515 * Report heavyhitter flow-keys into set-summary (SS). 516 * 517 * @param setsum 518 * Pointer of a set-summary. 519 * @param keys 520 * Pointer of the output top-k key array. 521 * @param counts 522 * Pointer of the output packet count or byte count array of the top-k keys. 523 * @return 524 * Return -EINVAL for invalid parameters. Return a positive integer indicate 525 * how many heavy hitters are reported. 526 */ 527 int 528 rte_member_report_heavyhitter(const struct rte_member_setsum *setsum, 529 void **keys, uint64_t *counts); 530 531 532 /** 533 * De-allocate memory used by set-summary. 534 * 535 * @param setsum 536 * Pointer to the set summary. 537 * If setsum is NULL, no operation is performed. 538 */ 539 void 540 rte_member_free(struct rte_member_setsum *setsum); 541 542 /** 543 * Reset the set-summary tables. E.g. reset bits to be 0 in BF, 544 * reset set_id in each entry to be RTE_MEMBER_NO_MATCH in HT based SS. 545 * 546 * @param setsum 547 * Pointer to the set-summary. 548 */ 549 void 550 rte_member_reset(const struct rte_member_setsum *setsum); 551 552 /** 553 * Delete items from the set-summary. Note that vBF does not support deletion 554 * in current implementation. For vBF, error code of -EINVAL will be returned. 555 * 556 * @param setsum 557 * Pointer to the set-summary. 558 * @param key 559 * Pointer of the key to be deleted. 560 * @param set_id 561 * For HT mode, we need both key and its corresponding set_id to 562 * properly delete the key. Without set_id, we may delete other keys with the 563 * same signature. 564 * @return 565 * If no entry found to delete, an error code of -ENOENT could be returned. 566 */ 567 int 568 rte_member_delete(const struct rte_member_setsum *setsum, const void *key, 569 member_set_t set_id); 570 571 #ifdef __cplusplus 572 } 573 #endif 574 575 #endif /* _RTE_MEMBER_H_ */ 576