xref: /dpdk/lib/table/rte_table_hash_key8.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <string.h>
5 #include <stdio.h>
6 
7 #include <rte_common.h>
8 #include <rte_mbuf.h>
9 #include <rte_memory.h>
10 #include <rte_malloc.h>
11 #include <rte_log.h>
12 
13 #include "rte_table_hash.h"
14 #include "rte_lru.h"
15 
16 #define KEY_SIZE						8
17 
18 #define KEYS_PER_BUCKET					4
19 
20 #ifdef RTE_TABLE_STATS_COLLECT
21 
22 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) \
23 	table->stats.n_pkts_in += val
24 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) \
25 	table->stats.n_pkts_lookup_miss += val
26 
27 #else
28 
29 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val)
30 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val)
31 
32 #endif
33 
34 #ifdef RTE_ARCH_64
35 struct rte_bucket_4_8 {
36 	/* Cache line 0 */
37 	uint64_t signature;
38 	uint64_t lru_list;
39 	struct rte_bucket_4_8 *next;
40 	uint64_t next_valid;
41 
42 	uint64_t key[4];
43 
44 	/* Cache line 1 */
45 	uint8_t data[0];
46 };
47 #else
48 struct rte_bucket_4_8 {
49 	/* Cache line 0 */
50 	uint64_t signature;
51 	uint64_t lru_list;
52 	struct rte_bucket_4_8 *next;
53 	uint32_t pad;
54 	uint64_t next_valid;
55 
56 	uint64_t key[4];
57 
58 	/* Cache line 1 */
59 	uint8_t data[0];
60 };
61 #endif
62 
63 struct rte_table_hash {
64 	struct rte_table_stats stats;
65 
66 	/* Input parameters */
67 	uint32_t n_buckets;
68 	uint32_t key_size;
69 	uint32_t entry_size;
70 	uint32_t bucket_size;
71 	uint32_t key_offset;
72 	uint64_t key_mask;
73 	rte_table_hash_op_hash f_hash;
74 	uint64_t seed;
75 
76 	/* Extendible buckets */
77 	uint32_t n_buckets_ext;
78 	uint32_t stack_pos;
79 	uint32_t *stack;
80 
81 	/* Lookup table */
82 	uint8_t memory[0] __rte_cache_aligned;
83 };
84 
85 static int
86 keycmp(void *a, void *b, void *b_mask)
87 {
88 	uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
89 
90 	return a64[0] != (b64[0] & b_mask64[0]);
91 }
92 
93 static void
94 keycpy(void *dst, void *src, void *src_mask)
95 {
96 	uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
97 
98 	dst64[0] = src64[0] & src_mask64[0];
99 }
100 
101 static int
102 check_params_create(struct rte_table_hash_params *params)
103 {
104 	/* name */
105 	if (params->name == NULL) {
106 		RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
107 		return -EINVAL;
108 	}
109 
110 	/* key_size */
111 	if (params->key_size != KEY_SIZE) {
112 		RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
113 		return -EINVAL;
114 	}
115 
116 	/* n_keys */
117 	if (params->n_keys == 0) {
118 		RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
119 		return -EINVAL;
120 	}
121 
122 	/* n_buckets */
123 	if ((params->n_buckets == 0) ||
124 		(!rte_is_power_of_2(params->n_buckets))) {
125 		RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
126 		return -EINVAL;
127 	}
128 
129 	/* f_hash */
130 	if (params->f_hash == NULL) {
131 		RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
132 			__func__);
133 		return -EINVAL;
134 	}
135 
136 	return 0;
137 }
138 
139 static void *
140 rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
141 {
142 	struct rte_table_hash_params *p = params;
143 	struct rte_table_hash *f;
144 	uint64_t bucket_size, total_size;
145 	uint32_t n_buckets, i;
146 
147 	/* Check input parameters */
148 	if ((check_params_create(p) != 0) ||
149 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
150 		((sizeof(struct rte_bucket_4_8) % 64) != 0))
151 		return NULL;
152 
153 	/*
154 	 * Table dimensioning
155 	 *
156 	 * Objective: Pick the number of buckets (n_buckets) so that there a chance
157 	 * to store n_keys keys in the table.
158 	 *
159 	 * Note: Since the buckets do not get extended, it is not possible to
160 	 * guarantee that n_keys keys can be stored in the table at any time. In the
161 	 * worst case scenario when all the n_keys fall into the same bucket, only
162 	 * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
163 	 * defeats the purpose of the hash table. It indicates unsuitable f_hash or
164 	 * n_keys to n_buckets ratio.
165 	 *
166 	 * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
167 	 */
168 	n_buckets = rte_align32pow2(
169 		(p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
170 	n_buckets = RTE_MAX(n_buckets, p->n_buckets);
171 
172 	/* Memory allocation */
173 	bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
174 		KEYS_PER_BUCKET * entry_size);
175 	total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
176 
177 	if (total_size > SIZE_MAX) {
178 		RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
179 			" for hash table %s\n",
180 			__func__, total_size, p->name);
181 		return NULL;
182 	}
183 
184 	f = rte_zmalloc_socket(p->name,
185 		(size_t)total_size,
186 		RTE_CACHE_LINE_SIZE,
187 		socket_id);
188 	if (f == NULL) {
189 		RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
190 			" for hash table %s\n",
191 			__func__, total_size, p->name);
192 		return NULL;
193 	}
194 
195 	RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
196 		"is %" PRIu64 " bytes\n",
197 		__func__, p->name, total_size);
198 
199 	/* Memory initialization */
200 	f->n_buckets = n_buckets;
201 	f->key_size = KEY_SIZE;
202 	f->entry_size = entry_size;
203 	f->bucket_size = bucket_size;
204 	f->key_offset = p->key_offset;
205 	f->f_hash = p->f_hash;
206 	f->seed = p->seed;
207 
208 	if (p->key_mask != NULL)
209 		f->key_mask = ((uint64_t *)p->key_mask)[0];
210 	else
211 		f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
212 
213 	for (i = 0; i < n_buckets; i++) {
214 		struct rte_bucket_4_8 *bucket;
215 
216 		bucket = (struct rte_bucket_4_8 *) &f->memory[i *
217 			f->bucket_size];
218 		bucket->lru_list = 0x0000000100020003LLU;
219 	}
220 
221 	return f;
222 }
223 
224 static int
225 rte_table_hash_free_key8_lru(void *table)
226 {
227 	struct rte_table_hash *f = table;
228 
229 	/* Check input parameters */
230 	if (f == NULL) {
231 		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
232 		return -EINVAL;
233 	}
234 
235 	rte_free(f);
236 	return 0;
237 }
238 
239 static int
240 rte_table_hash_entry_add_key8_lru(
241 	void *table,
242 	void *key,
243 	void *entry,
244 	int *key_found,
245 	void **entry_ptr)
246 {
247 	struct rte_table_hash *f = table;
248 	struct rte_bucket_4_8 *bucket;
249 	uint64_t signature, mask, pos;
250 	uint32_t bucket_index, i;
251 
252 	signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
253 	bucket_index = signature & (f->n_buckets - 1);
254 	bucket = (struct rte_bucket_4_8 *)
255 		&f->memory[bucket_index * f->bucket_size];
256 
257 	/* Key is present in the bucket */
258 	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
259 		uint64_t bucket_signature = bucket->signature;
260 		uint64_t *bucket_key = &bucket->key[i];
261 
262 		if ((bucket_signature & mask) &&
263 			(keycmp(bucket_key, key, &f->key_mask) == 0)) {
264 			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
265 
266 			memcpy(bucket_data, entry, f->entry_size);
267 			lru_update(bucket, i);
268 			*key_found = 1;
269 			*entry_ptr = (void *) bucket_data;
270 			return 0;
271 		}
272 	}
273 
274 	/* Key is not present in the bucket */
275 	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
276 		uint64_t bucket_signature = bucket->signature;
277 
278 		if ((bucket_signature & mask) == 0) {
279 			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
280 
281 			bucket->signature |= mask;
282 			keycpy(&bucket->key[i], key, &f->key_mask);
283 			memcpy(bucket_data, entry, f->entry_size);
284 			lru_update(bucket, i);
285 			*key_found = 0;
286 			*entry_ptr = (void *) bucket_data;
287 
288 			return 0;
289 		}
290 	}
291 
292 	/* Bucket full: replace LRU entry */
293 	pos = lru_pos(bucket);
294 	keycpy(&bucket->key[pos], key, &f->key_mask);
295 	memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
296 	lru_update(bucket, pos);
297 	*key_found = 0;
298 	*entry_ptr = (void *) &bucket->data[pos * f->entry_size];
299 
300 	return 0;
301 }
302 
303 static int
304 rte_table_hash_entry_delete_key8_lru(
305 	void *table,
306 	void *key,
307 	int *key_found,
308 	void *entry)
309 {
310 	struct rte_table_hash *f = table;
311 	struct rte_bucket_4_8 *bucket;
312 	uint64_t signature, mask;
313 	uint32_t bucket_index, i;
314 
315 	signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
316 	bucket_index = signature & (f->n_buckets - 1);
317 	bucket = (struct rte_bucket_4_8 *)
318 		&f->memory[bucket_index * f->bucket_size];
319 
320 	/* Key is present in the bucket */
321 	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
322 		uint64_t bucket_signature = bucket->signature;
323 		uint64_t *bucket_key = &bucket->key[i];
324 
325 		if ((bucket_signature & mask) &&
326 			(keycmp(bucket_key, key, &f->key_mask) == 0)) {
327 			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
328 
329 			bucket->signature &= ~mask;
330 			*key_found = 1;
331 			if (entry)
332 				memcpy(entry, bucket_data, f->entry_size);
333 
334 			return 0;
335 		}
336 	}
337 
338 	/* Key is not present in the bucket */
339 	*key_found = 0;
340 	return 0;
341 }
342 
343 static void *
344 rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
345 {
346 	struct rte_table_hash_params *p = params;
347 	struct rte_table_hash *f;
348 	uint64_t bucket_size, stack_size, total_size;
349 	uint32_t n_buckets_ext, i;
350 
351 	/* Check input parameters */
352 	if ((check_params_create(p) != 0) ||
353 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
354 		((sizeof(struct rte_bucket_4_8) % 64) != 0))
355 		return NULL;
356 
357 	/*
358 	 * Table dimensioning
359 	 *
360 	 * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
361 	 * it is guaranteed that n_keys keys can be stored in the table at any time.
362 	 *
363 	 * The worst case scenario takes place when all the n_keys keys fall into
364 	 * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
365 	 * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
366 	 * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
367 	 * into a different bucket. This case defeats the purpose of the hash table.
368 	 * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
369 	 *
370 	 * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
371 	 */
372 	n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
373 
374 	/* Memory allocation */
375 	bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
376 		KEYS_PER_BUCKET * entry_size);
377 	stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
378 	total_size = sizeof(struct rte_table_hash) +
379 		(p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
380 
381 	if (total_size > SIZE_MAX) {
382 		RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
383 			"for hash table %s\n",
384 			__func__, total_size, p->name);
385 		return NULL;
386 	}
387 
388 	f = rte_zmalloc_socket(p->name,
389 		(size_t)total_size,
390 		RTE_CACHE_LINE_SIZE,
391 		socket_id);
392 	if (f == NULL) {
393 		RTE_LOG(ERR, TABLE,
394 			"%s: Cannot allocate %" PRIu64 " bytes "
395 			"for hash table %s\n",
396 			__func__, total_size, p->name);
397 		return NULL;
398 	}
399 	RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
400 		"is %" PRIu64 " bytes\n",
401 		__func__, p->name, total_size);
402 
403 	/* Memory initialization */
404 	f->n_buckets = p->n_buckets;
405 	f->key_size = KEY_SIZE;
406 	f->entry_size = entry_size;
407 	f->bucket_size = bucket_size;
408 	f->key_offset = p->key_offset;
409 	f->f_hash = p->f_hash;
410 	f->seed = p->seed;
411 
412 	f->n_buckets_ext = n_buckets_ext;
413 	f->stack_pos = n_buckets_ext;
414 	f->stack = (uint32_t *)
415 		&f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
416 
417 	if (p->key_mask != NULL)
418 		f->key_mask = ((uint64_t *)p->key_mask)[0];
419 	else
420 		f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
421 
422 	for (i = 0; i < n_buckets_ext; i++)
423 		f->stack[i] = i;
424 
425 	return f;
426 }
427 
428 static int
429 rte_table_hash_free_key8_ext(void *table)
430 {
431 	struct rte_table_hash *f = table;
432 
433 	/* Check input parameters */
434 	if (f == NULL) {
435 		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
436 		return -EINVAL;
437 	}
438 
439 	rte_free(f);
440 	return 0;
441 }
442 
443 static int
444 rte_table_hash_entry_add_key8_ext(
445 	void *table,
446 	void *key,
447 	void *entry,
448 	int *key_found,
449 	void **entry_ptr)
450 {
451 	struct rte_table_hash *f = table;
452 	struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
453 	uint64_t signature;
454 	uint32_t bucket_index, i;
455 
456 	signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
457 	bucket_index = signature & (f->n_buckets - 1);
458 	bucket0 = (struct rte_bucket_4_8 *)
459 		&f->memory[bucket_index * f->bucket_size];
460 
461 	/* Key is present in the bucket */
462 	for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
463 		uint64_t mask;
464 
465 		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
466 			uint64_t bucket_signature = bucket->signature;
467 			uint64_t *bucket_key = &bucket->key[i];
468 
469 			if ((bucket_signature & mask) &&
470 				(keycmp(bucket_key, key, &f->key_mask) == 0)) {
471 				uint8_t *bucket_data = &bucket->data[i *
472 					f->entry_size];
473 
474 				memcpy(bucket_data, entry, f->entry_size);
475 				*key_found = 1;
476 				*entry_ptr = (void *) bucket_data;
477 				return 0;
478 			}
479 		}
480 	}
481 
482 	/* Key is not present in the bucket */
483 	for (bucket_prev = NULL, bucket = bucket0;
484 		bucket != NULL; bucket_prev = bucket, bucket = bucket->next) {
485 		uint64_t mask;
486 
487 		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
488 			uint64_t bucket_signature = bucket->signature;
489 
490 			if ((bucket_signature & mask) == 0) {
491 				uint8_t *bucket_data = &bucket->data[i *
492 					f->entry_size];
493 
494 				bucket->signature |= mask;
495 				keycpy(&bucket->key[i], key, &f->key_mask);
496 				memcpy(bucket_data, entry, f->entry_size);
497 				*key_found = 0;
498 				*entry_ptr = (void *) bucket_data;
499 
500 				return 0;
501 			}
502 		}
503 	}
504 
505 	/* Bucket full: extend bucket */
506 	if (f->stack_pos > 0) {
507 		bucket_index = f->stack[--f->stack_pos];
508 
509 		bucket = (struct rte_bucket_4_8 *) &f->memory[(f->n_buckets +
510 			bucket_index) * f->bucket_size];
511 		bucket_prev->next = bucket;
512 		bucket_prev->next_valid = 1;
513 
514 		bucket->signature = 1;
515 		keycpy(&bucket->key[0], key, &f->key_mask);
516 		memcpy(&bucket->data[0], entry, f->entry_size);
517 		*key_found = 0;
518 		*entry_ptr = (void *) &bucket->data[0];
519 		return 0;
520 	}
521 
522 	return -ENOSPC;
523 }
524 
525 static int
526 rte_table_hash_entry_delete_key8_ext(
527 	void *table,
528 	void *key,
529 	int *key_found,
530 	void *entry)
531 {
532 	struct rte_table_hash *f = table;
533 	struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
534 	uint64_t signature;
535 	uint32_t bucket_index, i;
536 
537 	signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
538 	bucket_index = signature & (f->n_buckets - 1);
539 	bucket0 = (struct rte_bucket_4_8 *)
540 		&f->memory[bucket_index * f->bucket_size];
541 
542 	/* Key is present in the bucket */
543 	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
544 		bucket_prev = bucket, bucket = bucket->next) {
545 		uint64_t mask;
546 
547 		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
548 			uint64_t bucket_signature = bucket->signature;
549 			uint64_t *bucket_key = &bucket->key[i];
550 
551 			if ((bucket_signature & mask) &&
552 				(keycmp(bucket_key, key, &f->key_mask) == 0)) {
553 				uint8_t *bucket_data = &bucket->data[i *
554 					f->entry_size];
555 
556 				bucket->signature &= ~mask;
557 				*key_found = 1;
558 				if (entry)
559 					memcpy(entry, bucket_data,
560 						f->entry_size);
561 
562 				if ((bucket->signature == 0) &&
563 				    (bucket_prev != NULL)) {
564 					bucket_prev->next = bucket->next;
565 					bucket_prev->next_valid =
566 						bucket->next_valid;
567 
568 					memset(bucket, 0,
569 						sizeof(struct rte_bucket_4_8));
570 					bucket_index = (((uint8_t *)bucket -
571 						(uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
572 					f->stack[f->stack_pos++] = bucket_index;
573 				}
574 
575 				return 0;
576 			}
577 		}
578 	}
579 
580 	/* Key is not present in the bucket */
581 	*key_found = 0;
582 	return 0;
583 }
584 
585 #define lookup_key8_cmp(key_in, bucket, pos, f)			\
586 {								\
587 	uint64_t xor[4], signature, k;				\
588 								\
589 	signature = ~bucket->signature;				\
590 								\
591 	k = key_in[0] & f->key_mask;				\
592 	xor[0] = (k ^ bucket->key[0]) | (signature & 1);		\
593 	xor[1] = (k ^ bucket->key[1]) | (signature & 2);		\
594 	xor[2] = (k ^ bucket->key[2]) | (signature & 4);		\
595 	xor[3] = (k ^ bucket->key[3]) | (signature & 8);		\
596 								\
597 	pos = 4;						\
598 	if (xor[0] == 0)					\
599 		pos = 0;					\
600 	if (xor[1] == 0)					\
601 		pos = 1;					\
602 	if (xor[2] == 0)					\
603 		pos = 2;					\
604 	if (xor[3] == 0)					\
605 		pos = 3;					\
606 }
607 
608 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
609 {								\
610 	uint64_t pkt_mask;					\
611 	uint32_t key_offset = f->key_offset;\
612 								\
613 	pkt0_index = __builtin_ctzll(pkts_mask);		\
614 	pkt_mask = 1LLU << pkt0_index;				\
615 	pkts_mask &= ~pkt_mask;					\
616 								\
617 	mbuf0 = pkts[pkt0_index];				\
618 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));	\
619 }
620 
621 #define lookup1_stage1(mbuf1, bucket1, f)			\
622 {								\
623 	uint64_t *key;						\
624 	uint64_t signature;					\
625 	uint32_t bucket_index;					\
626 								\
627 	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
628 	signature = f->f_hash(key, &f->key_mask, KEY_SIZE, f->seed);	\
629 	bucket_index = signature & (f->n_buckets - 1);		\
630 	bucket1 = (struct rte_bucket_4_8 *)			\
631 		&f->memory[bucket_index * f->bucket_size];	\
632 	rte_prefetch0(bucket1);					\
633 }
634 
635 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,		\
636 	pkts_mask_out, entries, f)				\
637 {								\
638 	void *a;						\
639 	uint64_t pkt_mask;					\
640 	uint64_t *key;						\
641 	uint32_t pos;						\
642 								\
643 	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
644 	lookup_key8_cmp(key, bucket2, pos, f);	\
645 								\
646 	pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
647 	pkts_mask_out |= pkt_mask;				\
648 								\
649 	a = (void *) &bucket2->data[pos * f->entry_size];	\
650 	rte_prefetch0(a);					\
651 	entries[pkt2_index] = a;				\
652 	lru_update(bucket2, pos);				\
653 }
654 
655 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
656 	entries, buckets_mask, buckets, keys, f)		\
657 {								\
658 	struct rte_bucket_4_8 *bucket_next;			\
659 	void *a;						\
660 	uint64_t pkt_mask, bucket_mask;				\
661 	uint64_t *key;						\
662 	uint32_t pos;						\
663 								\
664 	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
665 	lookup_key8_cmp(key, bucket2, pos, f);	\
666 								\
667 	pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
668 	pkts_mask_out |= pkt_mask;				\
669 								\
670 	a = (void *) &bucket2->data[pos * f->entry_size];	\
671 	rte_prefetch0(a);					\
672 	entries[pkt2_index] = a;				\
673 								\
674 	bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
675 	buckets_mask |= bucket_mask;				\
676 	bucket_next = bucket2->next;				\
677 	buckets[pkt2_index] = bucket_next;			\
678 	keys[pkt2_index] = key;					\
679 }
680 
681 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
682 	buckets_mask, f)					\
683 {								\
684 	struct rte_bucket_4_8 *bucket, *bucket_next;		\
685 	void *a;						\
686 	uint64_t pkt_mask, bucket_mask;				\
687 	uint64_t *key;						\
688 	uint32_t pos;						\
689 								\
690 	bucket = buckets[pkt_index];				\
691 	key = keys[pkt_index];					\
692 	lookup_key8_cmp(key, bucket, pos, f);			\
693 								\
694 	pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\
695 	pkts_mask_out |= pkt_mask;				\
696 								\
697 	a = (void *) &bucket->data[pos * f->entry_size];	\
698 	rte_prefetch0(a);					\
699 	entries[pkt_index] = a;					\
700 								\
701 	bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
702 	buckets_mask |= bucket_mask;				\
703 	bucket_next = bucket->next;				\
704 	rte_prefetch0(bucket_next);				\
705 	buckets[pkt_index] = bucket_next;			\
706 	keys[pkt_index] = key;					\
707 }
708 
709 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
710 	pkts, pkts_mask, f)					\
711 {								\
712 	uint64_t pkt00_mask, pkt01_mask;			\
713 	uint32_t key_offset = f->key_offset;		\
714 								\
715 	pkt00_index = __builtin_ctzll(pkts_mask);		\
716 	pkt00_mask = 1LLU << pkt00_index;			\
717 	pkts_mask &= ~pkt00_mask;				\
718 								\
719 	mbuf00 = pkts[pkt00_index];				\
720 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
721 								\
722 	pkt01_index = __builtin_ctzll(pkts_mask);		\
723 	pkt01_mask = 1LLU << pkt01_index;			\
724 	pkts_mask &= ~pkt01_mask;				\
725 								\
726 	mbuf01 = pkts[pkt01_index];				\
727 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
728 }
729 
730 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
731 	mbuf00, mbuf01, pkts, pkts_mask, f)			\
732 {								\
733 	uint64_t pkt00_mask, pkt01_mask;			\
734 	uint32_t key_offset = f->key_offset;		\
735 								\
736 	pkt00_index = __builtin_ctzll(pkts_mask);		\
737 	pkt00_mask = 1LLU << pkt00_index;			\
738 	pkts_mask &= ~pkt00_mask;				\
739 								\
740 	mbuf00 = pkts[pkt00_index];				\
741 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
742 								\
743 	pkt01_index = __builtin_ctzll(pkts_mask);		\
744 	if (pkts_mask == 0)					\
745 		pkt01_index = pkt00_index;			\
746 								\
747 	pkt01_mask = 1LLU << pkt01_index;			\
748 	pkts_mask &= ~pkt01_mask;				\
749 								\
750 	mbuf01 = pkts[pkt01_index];				\
751 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
752 }
753 
754 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)\
755 {								\
756 	uint64_t *key10, *key11;				\
757 	uint64_t signature10, signature11;			\
758 	uint32_t bucket10_index, bucket11_index;		\
759 	rte_table_hash_op_hash f_hash = f->f_hash;		\
760 	uint64_t seed = f->seed;				\
761 	uint32_t key_offset = f->key_offset;			\
762 								\
763 	key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\
764 	key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\
765 								\
766 	signature10 = f_hash(key10, &f->key_mask, KEY_SIZE, seed);	\
767 	bucket10_index = signature10 & (f->n_buckets - 1);	\
768 	bucket10 = (struct rte_bucket_4_8 *)			\
769 		&f->memory[bucket10_index * f->bucket_size];	\
770 	rte_prefetch0(bucket10);				\
771 								\
772 	signature11 = f_hash(key11, &f->key_mask, KEY_SIZE, seed);	\
773 	bucket11_index = signature11 & (f->n_buckets - 1);	\
774 	bucket11 = (struct rte_bucket_4_8 *)			\
775 		&f->memory[bucket11_index * f->bucket_size];	\
776 	rte_prefetch0(bucket11);				\
777 }
778 
779 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
780 	bucket20, bucket21, pkts_mask_out, entries, f)		\
781 {								\
782 	void *a20, *a21;					\
783 	uint64_t pkt20_mask, pkt21_mask;			\
784 	uint64_t *key20, *key21;				\
785 	uint32_t pos20, pos21;					\
786 								\
787 	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
788 	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
789 								\
790 	lookup_key8_cmp(key20, bucket20, pos20, f);			\
791 	lookup_key8_cmp(key21, bucket21, pos21, f);			\
792 								\
793 	pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
794 	pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
795 	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
796 								\
797 	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
798 	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
799 	rte_prefetch0(a20);					\
800 	rte_prefetch0(a21);					\
801 	entries[pkt20_index] = a20;				\
802 	entries[pkt21_index] = a21;				\
803 	lru_update(bucket20, pos20);				\
804 	lru_update(bucket21, pos21);				\
805 }
806 
807 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
808 	bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
809 {								\
810 	struct rte_bucket_4_8 *bucket20_next, *bucket21_next;	\
811 	void *a20, *a21;					\
812 	uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
813 	uint64_t *key20, *key21;				\
814 	uint32_t pos20, pos21;					\
815 								\
816 	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
817 	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
818 								\
819 	lookup_key8_cmp(key20, bucket20, pos20, f);			\
820 	lookup_key8_cmp(key21, bucket21, pos21, f);			\
821 								\
822 	pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
823 	pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
824 	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
825 								\
826 	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
827 	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
828 	rte_prefetch0(a20);					\
829 	rte_prefetch0(a21);					\
830 	entries[pkt20_index] = a20;				\
831 	entries[pkt21_index] = a21;				\
832 								\
833 	bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
834 	bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
835 	buckets_mask |= bucket20_mask | bucket21_mask;		\
836 	bucket20_next = bucket20->next;				\
837 	bucket21_next = bucket21->next;				\
838 	buckets[pkt20_index] = bucket20_next;			\
839 	buckets[pkt21_index] = bucket21_next;			\
840 	keys[pkt20_index] = key20;				\
841 	keys[pkt21_index] = key21;				\
842 }
843 
844 static int
845 rte_table_hash_lookup_key8_lru(
846 	void *table,
847 	struct rte_mbuf **pkts,
848 	uint64_t pkts_mask,
849 	uint64_t *lookup_hit_mask,
850 	void **entries)
851 {
852 	struct rte_table_hash *f = (struct rte_table_hash *) table;
853 	struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
854 	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
855 	uint32_t pkt00_index, pkt01_index, pkt10_index;
856 	uint32_t pkt11_index, pkt20_index, pkt21_index;
857 	uint64_t pkts_mask_out = 0;
858 
859 	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
860 	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
861 
862 	/* Cannot run the pipeline with less than 5 packets */
863 	if (__builtin_popcountll(pkts_mask) < 5) {
864 		for ( ; pkts_mask; ) {
865 			struct rte_bucket_4_8 *bucket;
866 			struct rte_mbuf *mbuf;
867 			uint32_t pkt_index;
868 
869 			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
870 			lookup1_stage1(mbuf, bucket, f);
871 			lookup1_stage2_lru(pkt_index, mbuf, bucket,
872 				pkts_mask_out, entries, f);
873 		}
874 
875 		*lookup_hit_mask = pkts_mask_out;
876 		RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
877 		return 0;
878 	}
879 
880 	/*
881 	 * Pipeline fill
882 	 *
883 	 */
884 	/* Pipeline stage 0 */
885 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
886 		pkts_mask, f);
887 
888 	/* Pipeline feed */
889 	mbuf10 = mbuf00;
890 	mbuf11 = mbuf01;
891 	pkt10_index = pkt00_index;
892 	pkt11_index = pkt01_index;
893 
894 	/* Pipeline stage 0 */
895 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
896 		pkts_mask, f);
897 
898 	/* Pipeline stage 1 */
899 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
900 
901 	/*
902 	 * Pipeline run
903 	 *
904 	 */
905 	for ( ; pkts_mask; ) {
906 		/* Pipeline feed */
907 		bucket20 = bucket10;
908 		bucket21 = bucket11;
909 		mbuf20 = mbuf10;
910 		mbuf21 = mbuf11;
911 		mbuf10 = mbuf00;
912 		mbuf11 = mbuf01;
913 		pkt20_index = pkt10_index;
914 		pkt21_index = pkt11_index;
915 		pkt10_index = pkt00_index;
916 		pkt11_index = pkt01_index;
917 
918 		/* Pipeline stage 0 */
919 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
920 			mbuf00, mbuf01, pkts, pkts_mask, f);
921 
922 		/* Pipeline stage 1 */
923 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
924 
925 		/* Pipeline stage 2 */
926 		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
927 			bucket20, bucket21, pkts_mask_out, entries, f);
928 	}
929 
930 	/*
931 	 * Pipeline flush
932 	 *
933 	 */
934 	/* Pipeline feed */
935 	bucket20 = bucket10;
936 	bucket21 = bucket11;
937 	mbuf20 = mbuf10;
938 	mbuf21 = mbuf11;
939 	mbuf10 = mbuf00;
940 	mbuf11 = mbuf01;
941 	pkt20_index = pkt10_index;
942 	pkt21_index = pkt11_index;
943 	pkt10_index = pkt00_index;
944 	pkt11_index = pkt01_index;
945 
946 	/* Pipeline stage 1 */
947 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
948 
949 	/* Pipeline stage 2 */
950 	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
951 		bucket20, bucket21, pkts_mask_out, entries, f);
952 
953 	/* Pipeline feed */
954 	bucket20 = bucket10;
955 	bucket21 = bucket11;
956 	mbuf20 = mbuf10;
957 	mbuf21 = mbuf11;
958 	pkt20_index = pkt10_index;
959 	pkt21_index = pkt11_index;
960 
961 	/* Pipeline stage 2 */
962 	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
963 		bucket20, bucket21, pkts_mask_out, entries, f);
964 
965 	*lookup_hit_mask = pkts_mask_out;
966 	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
967 	return 0;
968 } /* lookup LRU */
969 
970 static int
971 rte_table_hash_lookup_key8_ext(
972 	void *table,
973 	struct rte_mbuf **pkts,
974 	uint64_t pkts_mask,
975 	uint64_t *lookup_hit_mask,
976 	void **entries)
977 {
978 	struct rte_table_hash *f = (struct rte_table_hash *) table;
979 	struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
980 	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
981 	uint32_t pkt00_index, pkt01_index, pkt10_index;
982 	uint32_t pkt11_index, pkt20_index, pkt21_index;
983 	uint64_t pkts_mask_out = 0, buckets_mask = 0;
984 	struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
985 	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
986 
987 	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
988 	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
989 
990 	/* Cannot run the pipeline with less than 5 packets */
991 	if (__builtin_popcountll(pkts_mask) < 5) {
992 		for ( ; pkts_mask; ) {
993 			struct rte_bucket_4_8 *bucket;
994 			struct rte_mbuf *mbuf;
995 			uint32_t pkt_index;
996 
997 			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
998 			lookup1_stage1(mbuf, bucket, f);
999 			lookup1_stage2_ext(pkt_index, mbuf, bucket,
1000 				pkts_mask_out, entries, buckets_mask,
1001 				buckets, keys, f);
1002 		}
1003 
1004 		goto grind_next_buckets;
1005 	}
1006 
1007 	/*
1008 	 * Pipeline fill
1009 	 *
1010 	 */
1011 	/* Pipeline stage 0 */
1012 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1013 		pkts_mask, f);
1014 
1015 	/* Pipeline feed */
1016 	mbuf10 = mbuf00;
1017 	mbuf11 = mbuf01;
1018 	pkt10_index = pkt00_index;
1019 	pkt11_index = pkt01_index;
1020 
1021 	/* Pipeline stage 0 */
1022 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1023 		pkts_mask, f);
1024 
1025 	/* Pipeline stage 1 */
1026 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1027 
1028 	/*
1029 	 * Pipeline run
1030 	 *
1031 	 */
1032 	for ( ; pkts_mask; ) {
1033 		/* Pipeline feed */
1034 		bucket20 = bucket10;
1035 		bucket21 = bucket11;
1036 		mbuf20 = mbuf10;
1037 		mbuf21 = mbuf11;
1038 		mbuf10 = mbuf00;
1039 		mbuf11 = mbuf01;
1040 		pkt20_index = pkt10_index;
1041 		pkt21_index = pkt11_index;
1042 		pkt10_index = pkt00_index;
1043 		pkt11_index = pkt01_index;
1044 
1045 		/* Pipeline stage 0 */
1046 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1047 			mbuf00, mbuf01, pkts, pkts_mask, f);
1048 
1049 		/* Pipeline stage 1 */
1050 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1051 
1052 		/* Pipeline stage 2 */
1053 		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1054 			bucket20, bucket21, pkts_mask_out, entries,
1055 			buckets_mask, buckets, keys, f);
1056 	}
1057 
1058 	/*
1059 	 * Pipeline flush
1060 	 *
1061 	 */
1062 	/* Pipeline feed */
1063 	bucket20 = bucket10;
1064 	bucket21 = bucket11;
1065 	mbuf20 = mbuf10;
1066 	mbuf21 = mbuf11;
1067 	mbuf10 = mbuf00;
1068 	mbuf11 = mbuf01;
1069 	pkt20_index = pkt10_index;
1070 	pkt21_index = pkt11_index;
1071 	pkt10_index = pkt00_index;
1072 	pkt11_index = pkt01_index;
1073 
1074 	/* Pipeline stage 1 */
1075 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1076 
1077 	/* Pipeline stage 2 */
1078 	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1079 		bucket20, bucket21, pkts_mask_out, entries,
1080 		buckets_mask, buckets, keys, f);
1081 
1082 	/* Pipeline feed */
1083 	bucket20 = bucket10;
1084 	bucket21 = bucket11;
1085 	mbuf20 = mbuf10;
1086 	mbuf21 = mbuf11;
1087 	pkt20_index = pkt10_index;
1088 	pkt21_index = pkt11_index;
1089 
1090 	/* Pipeline stage 2 */
1091 	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1092 		bucket20, bucket21, pkts_mask_out, entries,
1093 		buckets_mask, buckets, keys, f);
1094 
1095 grind_next_buckets:
1096 	/* Grind next buckets */
1097 	for ( ; buckets_mask; ) {
1098 		uint64_t buckets_mask_next = 0;
1099 
1100 		for ( ; buckets_mask; ) {
1101 			uint64_t pkt_mask;
1102 			uint32_t pkt_index;
1103 
1104 			pkt_index = __builtin_ctzll(buckets_mask);
1105 			pkt_mask = 1LLU << pkt_index;
1106 			buckets_mask &= ~pkt_mask;
1107 
1108 			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1109 				entries, buckets_mask_next, f);
1110 		}
1111 
1112 		buckets_mask = buckets_mask_next;
1113 	}
1114 
1115 	*lookup_hit_mask = pkts_mask_out;
1116 	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
1117 	return 0;
1118 } /* lookup EXT */
1119 
1120 static int
1121 rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear)
1122 {
1123 	struct rte_table_hash *t = table;
1124 
1125 	if (stats != NULL)
1126 		memcpy(stats, &t->stats, sizeof(t->stats));
1127 
1128 	if (clear)
1129 		memset(&t->stats, 0, sizeof(t->stats));
1130 
1131 	return 0;
1132 }
1133 
1134 struct rte_table_ops rte_table_hash_key8_lru_ops = {
1135 	.f_create = rte_table_hash_create_key8_lru,
1136 	.f_free = rte_table_hash_free_key8_lru,
1137 	.f_add = rte_table_hash_entry_add_key8_lru,
1138 	.f_delete = rte_table_hash_entry_delete_key8_lru,
1139 	.f_add_bulk = NULL,
1140 	.f_delete_bulk = NULL,
1141 	.f_lookup = rte_table_hash_lookup_key8_lru,
1142 	.f_stats = rte_table_hash_key8_stats_read,
1143 };
1144 
1145 struct rte_table_ops rte_table_hash_key8_ext_ops = {
1146 	.f_create = rte_table_hash_create_key8_ext,
1147 	.f_free = rte_table_hash_free_key8_ext,
1148 	.f_add = rte_table_hash_entry_add_key8_ext,
1149 	.f_delete = rte_table_hash_entry_delete_key8_ext,
1150 	.f_add_bulk = NULL,
1151 	.f_delete_bulk = NULL,
1152 	.f_lookup = rte_table_hash_lookup_key8_ext,
1153 	.f_stats = rte_table_hash_key8_stats_read,
1154 };
1155