xref: /dpdk/lib/member/rte_member.h (revision 719834a6849e1daf4a70ff7742bbcc3ae7e25607)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4 
5 /**
6  * @file
7  *
8  * RTE Membership Library
9  *
10  * The Membership Library is an extension and generalization of a traditional
11  * filter (for example Bloom Filter and cuckoo filter) structure that has
12  * multiple usages in a variety of workloads and applications. The library is
13  * used to test if a key belongs to certain sets. Two types of such
14  * "set-summary" structures are implemented: hash-table based (HT) and vector
15  * bloom filter (vBF). For HT setsummary, two subtypes or modes are available,
16  * cache and non-cache modes. The table below summarize some properties of
17  * the different implementations.
18  */
19 
20 /**
21  * <!--
22  * +==========+=====================+================+=========================+
23  * |   type   |      vbf            |     HT-cache   |     HT-non-cache        |
24  * +==========+=====================+==========================================+
25  * |structure |  bloom-filter array |  hash-table like without storing key     |
26  * +----------+---------------------+------------------------------------------+
27  * |set id    | limited by bf count |           [1, 0x7fff]                    |
28  * |          | up to 32.           |                                          |
29  * +----------+---------------------+------------------------------------------+
30  * |usages &  | small set range,    | can delete,    | cache most recent keys, |
31  * |properties| user-specified      | big set range, | have both false-positive|
32  * |          | false-positive rate,| small false    | and false-negative      |
33  * |          | no deletion support.| positive depend| depend on table size,   |
34  * |          |                     | on table size, | automatic overwritten.  |
35  * |          |                     | new key does   |                         |
36  * |          |                     | not overwrite  |                         |
37  * |          |                     | existing key.  |                         |
38  * +----------+---------------------+----------------+-------------------------+
39  * +==========+=============================+
40  * |   type   |      sketch                 |
41  * +==========+=============================+
42  * |structure | counting bloom filter array |
43  * +----------+-----------------------------+
44  * |set id    | 1: heavy set, 0: light set  |
45  * |          |                             |
46  * +----------+-----------------------------+
47  * |usages &  | count size of a flow,       |
48  * |properties| used for heavy hitter       |
49  * |          | detection.                  |
50  * +----------+-----------------------------+
51  * -->
52  */
53 
54 #ifndef _RTE_MEMBER_H_
55 #define _RTE_MEMBER_H_
56 
57 #include <stdint.h>
58 #include <stdbool.h>
59 #include <inttypes.h>
60 
61 #include <rte_common.h>
62 
63 /** The set ID type that stored internally in hash table based set summary. */
64 typedef uint16_t member_set_t;
65 /** Invalid set ID used to mean no match found. */
66 #define RTE_MEMBER_NO_MATCH 0
67 /** Maximum size of hash table that can be created. */
68 #define RTE_MEMBER_ENTRIES_MAX (1 << 30)
69 /** Maximum number of keys that can be searched as a bulk */
70 #define RTE_MEMBER_LOOKUP_BULK_MAX 64
71 /** Entry count per bucket in hash table based mode. */
72 #define RTE_MEMBER_BUCKET_ENTRIES 16
73 /** Maximum number of characters in setsum name. */
74 #define RTE_MEMBER_NAMESIZE 32
75 /** Max value of the random number */
76 #define RTE_RAND_MAX      ~0LLU
77 /**
78  * As packets skipped in the sampling-based algorithm, the accounting
79  * results accuracy is not guaranteed in the start stage. There should
80  * be a "convergence time" to achieve the accuracy after receiving enough
81  * packets.
82  * For sketch, use the flag if prefer always bounded mode, which only
83  * starts sampling after receiving enough packets to keep the results
84  * accuracy always bounded.
85  */
86 #define RTE_MEMBER_SKETCH_ALWAYS_BOUNDED 0x01
87 /** For sketch, use the flag if to count packet size instead of packet count */
88 #define RTE_MEMBER_SKETCH_COUNT_BYTE 0x02
89 
90 /** @internal Hash function used by membership library. */
91 #if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32)
92 #include <rte_hash_crc.h>
93 #define MEMBER_HASH_FUNC       rte_hash_crc
94 #else
95 #include <rte_jhash.h>
96 #define MEMBER_HASH_FUNC       rte_jhash
97 #endif
98 
99 #ifdef __cplusplus
100 extern "C" {
101 #endif
102 
103 /** @internal setsummary structure. */
104 struct rte_member_setsum;
105 
106 /**
107  * Parameter struct used to create set summary
108  */
109 struct rte_member_parameters;
110 
111 /**
112  * Define different set summary types
113  */
114 enum rte_member_setsum_type {
115 	RTE_MEMBER_TYPE_HT = 0,  /**< Hash table based set summary. */
116 	RTE_MEMBER_TYPE_VBF,     /**< Vector of bloom filters. */
117 	RTE_MEMBER_TYPE_SKETCH,
118 	RTE_MEMBER_NUM_TYPE
119 };
120 
121 /** @internal compare function for different arch. */
122 enum rte_member_sig_compare_function {
123 	RTE_MEMBER_COMPARE_SCALAR = 0,
124 	RTE_MEMBER_COMPARE_AVX2,
125 	RTE_MEMBER_COMPARE_NUM
126 };
127 
128 /* sketch update function with different implementations. */
129 typedef void (*sketch_update_fn_t)(const struct rte_member_setsum *ss,
130 				   const void *key,
131 				   uint32_t count);
132 
133 /* sketch lookup function with different implementations. */
134 typedef uint64_t (*sketch_lookup_fn_t)(const struct rte_member_setsum *ss,
135 				       const void *key);
136 
137 /* sketch delete function with different implementations. */
138 typedef void (*sketch_delete_fn_t)(const struct rte_member_setsum *ss,
139 				   const void *key);
140 
141 /** @internal setsummary structure. */
142 struct __rte_cache_aligned rte_member_setsum {
143 	enum rte_member_setsum_type type; /* Type of the set summary. */
144 	uint32_t key_len;		/* Length of key. */
145 	uint32_t prim_hash_seed;	/* Primary hash function seed. */
146 	uint32_t sec_hash_seed;		/* Secondary hash function seed. */
147 
148 	/* Hash table based. */
149 	uint32_t bucket_cnt;		/* Number of buckets. */
150 	uint32_t bucket_mask;		/* Bit mask to get bucket index. */
151 	/* For runtime selecting AVX, scalar, etc for signature comparison. */
152 	enum rte_member_sig_compare_function sig_cmp_fn;
153 	uint8_t cache;			/* If it is cache mode for ht based. */
154 
155 	/* Vector bloom filter. */
156 	uint32_t num_set;		/* Number of set (bf) in vbf. */
157 	uint32_t bits;			/* Number of bits in each bf. */
158 	uint32_t bit_mask;	/* Bit mask to get bit location in bf. */
159 	uint32_t num_hashes;	/* Number of hash values to index bf. */
160 
161 	/* Parameters for sketch */
162 	float error_rate;
163 	float sample_rate;
164 	uint32_t num_col;
165 	uint32_t num_row;
166 	int always_bounded;
167 	double converge_thresh;
168 	uint32_t topk;
169 	uint32_t count_byte;
170 	uint64_t *hash_seeds;
171 	sketch_update_fn_t sketch_update; /* Pointer to the sketch update function */
172 	sketch_lookup_fn_t sketch_lookup; /* Pointer to the sketch lookup function */
173 	sketch_delete_fn_t sketch_delete; /* Pointer to the sketch delete function */
174 
175 	void *runtime_var;
176 	uint32_t mul_shift;  /* vbf internal variable used during bit test. */
177 	uint32_t div_shift;  /* vbf internal variable used during bit test. */
178 
179 	void *table;	/* This is the handler of hash table or vBF array. */
180 
181 
182 	/* Second cache line should start here. */
183 	uint32_t socket_id;          /* NUMA Socket ID for memory. */
184 	char name[RTE_MEMBER_NAMESIZE]; /* Name of this set summary. */
185 #ifdef RTE_ARCH_X86
186 	bool use_avx512;
187 #endif
188 };
189 
190 /**
191  * Parameters used when create the set summary table. Currently user can
192  * specify two types of setsummary: HT based and vBF. For HT based, user can
193  * specify cache or non-cache mode. Here is a table to describe some differences
194  */
195 struct __rte_cache_aligned rte_member_parameters {
196 	const char *name;			/**< Name of the hash. */
197 
198 	/**
199 	 * User to specify the type of the setsummary from one of
200 	 * rte_member_setsum_type.
201 	 *
202 	 * HT based setsummary is implemented like a hash table. User should use
203 	 * this type when there are many sets.
204 	 *
205 	 * vBF setsummary is a vector of bloom filters. It is used when number
206 	 * of sets is not big (less than 32 for current implementation).
207 	 */
208 	enum rte_member_setsum_type type;
209 
210 	/**
211 	 * is_cache is only used for HT based setsummary.
212 	 *
213 	 * If it is HT based setsummary, user to specify the subtype or mode
214 	 * of the setsummary. It could be cache, or non-cache mode.
215 	 * Set is_cache to be 1 if to use as cache mode.
216 	 *
217 	 * For cache mode, keys can be evicted out of the HT setsummary. Keys
218 	 * with the same signature and map to the same bucket
219 	 * will overwrite each other in the setsummary table.
220 	 * This mode is useful for the case that the set-summary only
221 	 * needs to keep record of the recently inserted keys. Both
222 	 * false-negative and false-positive could happen.
223 	 *
224 	 * For non-cache mode, keys cannot be evicted out of the cache. So for
225 	 * this mode the setsummary will become full eventually. Keys with the
226 	 * same signature but map to the same bucket will still occupy multiple
227 	 * entries. This mode does not give false-negative result.
228 	 */
229 	uint8_t is_cache;
230 
231 	/**
232 	 * For HT setsummary, num_keys equals to the number of entries of the
233 	 * table. When the number of keys inserted in the HT setsummary
234 	 * approaches this number, eviction could happen. For cache mode,
235 	 * keys could be evicted out of the table. For non-cache mode, keys will
236 	 * be evicted to other buckets like cuckoo hash. The table will also
237 	 * likely to become full before the number of inserted keys equal to the
238 	 * total number of entries.
239 	 *
240 	 * For vBF, num_keys equal to the expected number of keys that will
241 	 * be inserted into the vBF. The implementation assumes the keys are
242 	 * evenly distributed to each BF in vBF. This is used to calculate the
243 	 * number of bits we need for each BF. User does not specify the size of
244 	 * each BF directly because the optimal size depends on the num_keys
245 	 * and false positive rate.
246 	 */
247 	uint32_t num_keys;
248 
249 	/**
250 	 * The length of key is used for hash calculation. Since key is not
251 	 * stored in set-summary, large key does not require more memory space.
252 	 */
253 	uint32_t key_len;
254 
255 	/**
256 	 * num_set is only used for vBF, but not used for HT setsummary.
257 	 *
258 	 * num_set is equal to the number of BFs in vBF. For current
259 	 * implementation, it only supports 1,2,4,8,16,32 BFs in one vBF set
260 	 * summary. If other number of sets are needed, for example 5, the user
261 	 * should allocate the minimum available value that larger than 5,
262 	 * which is 8.
263 	 */
264 	uint32_t num_set;
265 
266 	/**
267 	 * false_positive_rate is only used for vBF, but not used for HT
268 	 * setsummary.
269 	 *
270 	 * For vBF, false_positive_rate is the user-defined false positive rate
271 	 * given expected number of inserted keys (num_keys). It is used to
272 	 * calculate the total number of bits for each BF, and the number of
273 	 * hash values used during lookup and insertion. For details please
274 	 * refer to vBF implementation and membership library documentation.
275 	 *
276 	 * For HT, This parameter is not directly set by users.
277 	 * HT setsummary's false positive rate is in the order of:
278 	 * false_pos = (1/bucket_count)*(1/2^16), since we use 16-bit signature.
279 	 * This is because two keys needs to map to same bucket and same
280 	 * signature to have a collision (false positive). bucket_count is equal
281 	 * to number of entries (num_keys) divided by entry count per bucket
282 	 * (RTE_MEMBER_BUCKET_ENTRIES). Thus, the false_positive_rate is not
283 	 * directly set by users for HT mode.
284 	 */
285 	float false_positive_rate;
286 
287 	/**
288 	 * We use two seeds to calculate two independent hashes for each key.
289 	 *
290 	 * For HT type, one hash is used as signature, and the other is used
291 	 * for bucket location.
292 	 * For vBF type, these two hashes and their combinations are used as
293 	 * hash locations to index the bit array.
294 	 * For Sketch type, these seeds are not used.
295 	 */
296 	uint32_t prim_hash_seed;
297 
298 	/**
299 	 * The secondary seed should be a different value from the primary seed.
300 	 */
301 	uint32_t sec_hash_seed;
302 
303 	/**
304 	 * For count(min) sketch data structure, error rate defines the accuracy
305 	 * required by the user. Higher accuracy leads to more memory usage, but
306 	 * the flow size is estimated more accurately.
307 	 */
308 	float error_rate;
309 
310 	/**
311 	 * Sampling rate means the internal sample rate of the rows of the count
312 	 * min sketches. Lower sampling rate can reduce CPU overhead, but the
313 	 * data structure will require more time to converge statistically.
314 	 */
315 	float sample_rate;
316 
317 	/**
318 	 * How many top heavy hitter to be reported. The library will internally
319 	 * keep the keys of heavy hitters for final report.
320 	 */
321 	uint32_t top_k;
322 
323 	/**
324 	 * Extra flags that may passed in by user
325 	 */
326 	uint32_t extra_flag;
327 
328 	int socket_id;			/**< NUMA Socket ID for memory. */
329 };
330 
331 /**
332  * Find an existing set-summary and return a pointer to it.
333  *
334  * @param name
335  *   Name of the set-summary.
336  * @return
337  *   Pointer to the set-summary or NULL if object not found
338  *   with rte_errno set appropriately. Possible rte_errno values include:
339  *    - ENOENT - value not available for return
340  */
341 struct rte_member_setsum *
342 rte_member_find_existing(const char *name);
343 
344 /**
345  * Create set-summary (SS).
346  *
347  * @param params
348  *   Parameters to initialize the setsummary.
349  * @return
350  *   Return the pointer to the setsummary.
351  *   Return value is NULL if the creation failed.
352  */
353 struct rte_member_setsum *
354 rte_member_create(const struct rte_member_parameters *params);
355 
356 /**
357  * Lookup key in set-summary (SS).
358  * Single key lookup and return as soon as the first match found
359  *
360  * @param setsum
361  *   Pointer of a setsummary.
362  * @param key
363  *   Pointer of the key to be looked up.
364  * @param set_id
365  *   Output the set id matches the key.
366  * @return
367  *   Return 1 for found a match and 0 for not found a match.
368  */
369 int
370 rte_member_lookup(const struct rte_member_setsum *setsum, const void *key,
371 			member_set_t *set_id);
372 
373 /**
374  * Lookup bulk of keys in set-summary (SS).
375  * Each key lookup returns as soon as the first match found
376  *
377  * @param setsum
378  *   Pointer of a setsummary.
379  * @param keys
380  *   Pointer of the bulk of keys to be looked up.
381  * @param num_keys
382  *   Number of keys that will be lookup.
383  * @param set_ids
384  *   Output set ids for all the keys to this array.
385  *   User should preallocate array that can contain all results, which size is
386  *   the num_keys.
387  * @return
388  *   The number of keys that found a match.
389  */
390 int
391 rte_member_lookup_bulk(const struct rte_member_setsum *setsum,
392 			const void **keys, uint32_t num_keys,
393 			member_set_t *set_ids);
394 
395 /**
396  * Lookup a key in set-summary (SS) for multiple matches.
397  * The key lookup will find all matched entries (multiple match).
398  * Note that for cache mode of HT, each key can have at most one match. This is
399  * because keys with same signature that maps to same bucket will overwrite
400  * each other. So multi-match lookup should be used for vBF and non-cache HT.
401  *
402  * @param setsum
403  *   Pointer of a set-summary.
404  * @param key
405  *   Pointer of the key that to be looked up.
406  * @param max_match_per_key
407  *   User specified maximum number of matches for each key. The function returns
408  *   as soon as this number of matches found for the key.
409  * @param set_id
410  *   Output set ids for all the matches of the key. User needs to preallocate
411  *   the array that can contain max_match_per_key number of results.
412  * @return
413  *   The number of matches that found for the key.
414  *   For cache mode HT set-summary, the number should be at most 1.
415  */
416 int
417 rte_member_lookup_multi(const struct rte_member_setsum *setsum,
418 		const void *key, uint32_t max_match_per_key,
419 		member_set_t *set_id);
420 
421 /**
422  * Lookup a bulk of keys in set-summary (SS) for multiple matches each key.
423  * Each key lookup will find all matched entries (multiple match).
424  * Note that for cache mode HT, each key can have at most one match. So
425  * multi-match function is mainly used for vBF and non-cache mode HT.
426  *
427  * @param setsum
428  *   Pointer of a setsummary.
429  * @param keys
430  *   Pointer of the keys to be looked up.
431  * @param num_keys
432  *   The number of keys that will be lookup.
433  * @param max_match_per_key
434  *   The possible maximum number of matches for each key.
435  * @param match_count
436  *   Output the number of matches for each key in an array.
437  * @param set_ids
438  *   Return set ids for all the matches of all keys. Users pass in a
439  *   preallocated 2D array with first dimension as key index and second
440  *   dimension as match index. For example set_ids[bulk_size][max_match_per_key]
441  * @return
442  *   The number of keys that found one or more matches in the set-summary.
443  */
444 int
445 rte_member_lookup_multi_bulk(const struct rte_member_setsum *setsum,
446 		const void **keys, uint32_t num_keys,
447 		uint32_t max_match_per_key,
448 		uint32_t *match_count,
449 		member_set_t *set_ids);
450 
451 /**
452  * Insert key into set-summary (SS).
453  *
454  * @param setsum
455  *   Pointer of a set-summary.
456  * @param key
457  *   Pointer of the key to be added.
458  * @param set_id
459  *   The set id associated with the key that needs to be added. Different mode
460  *   supports different set_id ranges. 0 cannot be used as set_id since
461  *   RTE_MEMBER_NO_MATCH by default is set as 0.
462  *   For HT mode, the set_id has range as [1, 0x7FFF], MSB is reserved.
463  *   For vBF mode the set id is limited by the num_set parameter when create
464  *   the set-summary. For sketch mode, this id is ignored.
465  * @return
466  *   HT (cache mode) and vBF should never fail unless the set_id is not in the
467  *   valid range. In such case -EINVAL is returned.
468  *   For HT (non-cache mode) it could fail with -ENOSPC error code when table is
469  *   full.
470  *   For success it returns different values for different modes to provide
471  *   extra information for users.
472  *   Return 0 for HT (cache mode) if the add does not cause
473  *   eviction, return 1 otherwise. Return 0 for non-cache mode if success,
474  *   -ENOSPC for full, and 1 if cuckoo eviction happens.
475  *   Always returns 0 for vBF mode and sketch.
476  */
477 int
478 rte_member_add(const struct rte_member_setsum *setsum, const void *key,
479 			member_set_t set_id);
480 
481 /**
482  * Add the packet byte size into the sketch.
483  *
484  * @param setsum
485  *   Pointer of a set-summary.
486  * @param key
487  *   Pointer of the key to be added.
488  * @param byte_count
489  *   Add the byte count of the packet into the sketch.
490  * @return
491  * Return -EINVAL for invalid parameters, otherwise return 0.
492  */
493 int
494 rte_member_add_byte_count(const struct rte_member_setsum *setsum,
495 			  const void *key, uint32_t byte_count);
496 
497 /**
498  * Query packet count for a certain flow-key.
499  *
500  * @param setsum
501  *   Pointer of a set-summary.
502  * @param key
503  *   Pointer of the key to be added.
504  * @param count
505  *   The output packet count or byte count.
506  * @return
507  *   Return -EINVAL for invalid parameters.
508  */
509 int
510 rte_member_query_count(const struct rte_member_setsum *setsum,
511 		       const void *key, uint64_t *count);
512 
513 
514 /**
515  * Report heavyhitter flow-keys into set-summary (SS).
516  *
517  * @param setsum
518  *   Pointer of a set-summary.
519  * @param keys
520  *   Pointer of the output top-k key array.
521  * @param counts
522  *   Pointer of the output packet count or byte count array of the top-k keys.
523  * @return
524  *   Return -EINVAL for invalid parameters. Return a positive integer indicate
525  *   how many heavy hitters are reported.
526  */
527 int
528 rte_member_report_heavyhitter(const struct rte_member_setsum *setsum,
529 			      void **keys, uint64_t *counts);
530 
531 
532 /**
533  * De-allocate memory used by set-summary.
534  *
535  * @param setsum
536  *   Pointer to the set summary.
537  *   If setsum is NULL, no operation is performed.
538  */
539 void
540 rte_member_free(struct rte_member_setsum *setsum);
541 
542 /**
543  * Reset the set-summary tables. E.g. reset bits to be 0 in BF,
544  * reset set_id in each entry to be RTE_MEMBER_NO_MATCH in HT based SS.
545  *
546  * @param setsum
547  *   Pointer to the set-summary.
548  */
549 void
550 rte_member_reset(const struct rte_member_setsum *setsum);
551 
552 /**
553  * Delete items from the set-summary. Note that vBF does not support deletion
554  * in current implementation. For vBF, error code of -EINVAL will be returned.
555  *
556  * @param setsum
557  *   Pointer to the set-summary.
558  * @param key
559  *   Pointer of the key to be deleted.
560  * @param set_id
561  *   For HT mode, we need both key and its corresponding set_id to
562  *   properly delete the key. Without set_id, we may delete other keys with the
563  *   same signature.
564  * @return
565  *   If no entry found to delete, an error code of -ENOENT could be returned.
566  */
567 int
568 rte_member_delete(const struct rte_member_setsum *setsum, const void *key,
569 			member_set_t set_id);
570 
571 #ifdef __cplusplus
572 }
573 #endif
574 
575 #endif /* _RTE_MEMBER_H_ */
576