xref: /dpdk/lib/table/rte_table_hash_key8.c (revision 8f1d23ece06adff5eae9f1b4365bdbbd3abee2b2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <string.h>
5 #include <stdio.h>
6 
7 #include <rte_common.h>
8 #include <rte_malloc.h>
9 #include <rte_log.h>
10 
11 #include "rte_table_hash.h"
12 #include "rte_lru.h"
13 
14 #define KEY_SIZE						8
15 
16 #define KEYS_PER_BUCKET					4
17 
18 #ifdef RTE_TABLE_STATS_COLLECT
19 
20 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) \
21 	table->stats.n_pkts_in += val
22 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) \
23 	table->stats.n_pkts_lookup_miss += val
24 
25 #else
26 
27 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val)
28 #define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val)
29 
30 #endif
31 
32 #ifdef RTE_ARCH_64
33 struct rte_bucket_4_8 {
34 	/* Cache line 0 */
35 	uint64_t signature;
36 	uint64_t lru_list;
37 	struct rte_bucket_4_8 *next;
38 	uint64_t next_valid;
39 
40 	uint64_t key[4];
41 
42 	/* Cache line 1 */
43 	uint8_t data[];
44 };
45 #else
46 struct rte_bucket_4_8 {
47 	/* Cache line 0 */
48 	uint64_t signature;
49 	uint64_t lru_list;
50 	struct rte_bucket_4_8 *next;
51 	uint32_t pad;
52 	uint64_t next_valid;
53 
54 	uint64_t key[4];
55 
56 	/* Cache line 1 */
57 	uint8_t data[];
58 };
59 #endif
60 
61 struct rte_table_hash {
62 	struct rte_table_stats stats;
63 
64 	/* Input parameters */
65 	uint32_t n_buckets;
66 	uint32_t key_size;
67 	uint32_t entry_size;
68 	uint32_t bucket_size;
69 	uint32_t key_offset;
70 	uint64_t key_mask;
71 	rte_table_hash_op_hash f_hash;
72 	uint64_t seed;
73 
74 	/* Extendible buckets */
75 	uint32_t n_buckets_ext;
76 	uint32_t stack_pos;
77 	uint32_t *stack;
78 
79 	/* Lookup table */
80 	uint8_t memory[0] __rte_cache_aligned;
81 };
82 
83 static int
84 keycmp(void *a, void *b, void *b_mask)
85 {
86 	uint64_t *a64 = a, *b64 = b, *b_mask64 = b_mask;
87 
88 	return a64[0] != (b64[0] & b_mask64[0]);
89 }
90 
91 static void
92 keycpy(void *dst, void *src, void *src_mask)
93 {
94 	uint64_t *dst64 = dst, *src64 = src, *src_mask64 = src_mask;
95 
96 	dst64[0] = src64[0] & src_mask64[0];
97 }
98 
99 static int
100 check_params_create(struct rte_table_hash_params *params)
101 {
102 	/* name */
103 	if (params->name == NULL) {
104 		RTE_LOG(ERR, TABLE, "%s: name invalid value\n", __func__);
105 		return -EINVAL;
106 	}
107 
108 	/* key_size */
109 	if (params->key_size != KEY_SIZE) {
110 		RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
111 		return -EINVAL;
112 	}
113 
114 	/* n_keys */
115 	if (params->n_keys == 0) {
116 		RTE_LOG(ERR, TABLE, "%s: n_keys is zero\n", __func__);
117 		return -EINVAL;
118 	}
119 
120 	/* n_buckets */
121 	if ((params->n_buckets == 0) ||
122 		(!rte_is_power_of_2(params->n_buckets))) {
123 		RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
124 		return -EINVAL;
125 	}
126 
127 	/* f_hash */
128 	if (params->f_hash == NULL) {
129 		RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n",
130 			__func__);
131 		return -EINVAL;
132 	}
133 
134 	return 0;
135 }
136 
137 static void *
138 rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size)
139 {
140 	struct rte_table_hash_params *p = params;
141 	struct rte_table_hash *f;
142 	uint64_t bucket_size, total_size;
143 	uint32_t n_buckets, i;
144 
145 	/* Check input parameters */
146 	if ((check_params_create(p) != 0) ||
147 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
148 		((sizeof(struct rte_bucket_4_8) % 64) != 0))
149 		return NULL;
150 
151 	/*
152 	 * Table dimensioning
153 	 *
154 	 * Objective: Pick the number of buckets (n_buckets) so that there a chance
155 	 * to store n_keys keys in the table.
156 	 *
157 	 * Note: Since the buckets do not get extended, it is not possible to
158 	 * guarantee that n_keys keys can be stored in the table at any time. In the
159 	 * worst case scenario when all the n_keys fall into the same bucket, only
160 	 * a maximum of KEYS_PER_BUCKET keys will be stored in the table. This case
161 	 * defeats the purpose of the hash table. It indicates unsuitable f_hash or
162 	 * n_keys to n_buckets ratio.
163 	 *
164 	 * MIN(n_buckets) = (n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET
165 	 */
166 	n_buckets = rte_align32pow2(
167 		(p->n_keys + KEYS_PER_BUCKET - 1) / KEYS_PER_BUCKET);
168 	n_buckets = RTE_MAX(n_buckets, p->n_buckets);
169 
170 	/* Memory allocation */
171 	bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
172 		KEYS_PER_BUCKET * entry_size);
173 	total_size = sizeof(struct rte_table_hash) + n_buckets * bucket_size;
174 
175 	if (total_size > SIZE_MAX) {
176 		RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
177 			" for hash table %s\n",
178 			__func__, total_size, p->name);
179 		return NULL;
180 	}
181 
182 	f = rte_zmalloc_socket(p->name,
183 		(size_t)total_size,
184 		RTE_CACHE_LINE_SIZE,
185 		socket_id);
186 	if (f == NULL) {
187 		RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes"
188 			" for hash table %s\n",
189 			__func__, total_size, p->name);
190 		return NULL;
191 	}
192 
193 	RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
194 		"is %" PRIu64 " bytes\n",
195 		__func__, p->name, total_size);
196 
197 	/* Memory initialization */
198 	f->n_buckets = n_buckets;
199 	f->key_size = KEY_SIZE;
200 	f->entry_size = entry_size;
201 	f->bucket_size = bucket_size;
202 	f->key_offset = p->key_offset;
203 	f->f_hash = p->f_hash;
204 	f->seed = p->seed;
205 
206 	if (p->key_mask != NULL)
207 		f->key_mask = ((uint64_t *)p->key_mask)[0];
208 	else
209 		f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
210 
211 	for (i = 0; i < n_buckets; i++) {
212 		struct rte_bucket_4_8 *bucket;
213 
214 		bucket = (struct rte_bucket_4_8 *) &f->memory[i *
215 			f->bucket_size];
216 		bucket->lru_list = 0x0000000100020003LLU;
217 	}
218 
219 	return f;
220 }
221 
222 static int
223 rte_table_hash_free_key8_lru(void *table)
224 {
225 	struct rte_table_hash *f = table;
226 
227 	/* Check input parameters */
228 	if (f == NULL) {
229 		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
230 		return -EINVAL;
231 	}
232 
233 	rte_free(f);
234 	return 0;
235 }
236 
237 static int
238 rte_table_hash_entry_add_key8_lru(
239 	void *table,
240 	void *key,
241 	void *entry,
242 	int *key_found,
243 	void **entry_ptr)
244 {
245 	struct rte_table_hash *f = table;
246 	struct rte_bucket_4_8 *bucket;
247 	uint64_t signature, mask, pos;
248 	uint32_t bucket_index, i;
249 
250 	signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
251 	bucket_index = signature & (f->n_buckets - 1);
252 	bucket = (struct rte_bucket_4_8 *)
253 		&f->memory[bucket_index * f->bucket_size];
254 
255 	/* Key is present in the bucket */
256 	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
257 		uint64_t bucket_signature = bucket->signature;
258 		uint64_t *bucket_key = &bucket->key[i];
259 
260 		if ((bucket_signature & mask) &&
261 			(keycmp(bucket_key, key, &f->key_mask) == 0)) {
262 			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
263 
264 			memcpy(bucket_data, entry, f->entry_size);
265 			lru_update(bucket, i);
266 			*key_found = 1;
267 			*entry_ptr = (void *) bucket_data;
268 			return 0;
269 		}
270 	}
271 
272 	/* Key is not present in the bucket */
273 	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
274 		uint64_t bucket_signature = bucket->signature;
275 
276 		if ((bucket_signature & mask) == 0) {
277 			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
278 
279 			bucket->signature |= mask;
280 			keycpy(&bucket->key[i], key, &f->key_mask);
281 			memcpy(bucket_data, entry, f->entry_size);
282 			lru_update(bucket, i);
283 			*key_found = 0;
284 			*entry_ptr = (void *) bucket_data;
285 
286 			return 0;
287 		}
288 	}
289 
290 	/* Bucket full: replace LRU entry */
291 	pos = lru_pos(bucket);
292 	keycpy(&bucket->key[pos], key, &f->key_mask);
293 	memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
294 	lru_update(bucket, pos);
295 	*key_found = 0;
296 	*entry_ptr = (void *) &bucket->data[pos * f->entry_size];
297 
298 	return 0;
299 }
300 
301 static int
302 rte_table_hash_entry_delete_key8_lru(
303 	void *table,
304 	void *key,
305 	int *key_found,
306 	void *entry)
307 {
308 	struct rte_table_hash *f = table;
309 	struct rte_bucket_4_8 *bucket;
310 	uint64_t signature, mask;
311 	uint32_t bucket_index, i;
312 
313 	signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
314 	bucket_index = signature & (f->n_buckets - 1);
315 	bucket = (struct rte_bucket_4_8 *)
316 		&f->memory[bucket_index * f->bucket_size];
317 
318 	/* Key is present in the bucket */
319 	for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
320 		uint64_t bucket_signature = bucket->signature;
321 		uint64_t *bucket_key = &bucket->key[i];
322 
323 		if ((bucket_signature & mask) &&
324 			(keycmp(bucket_key, key, &f->key_mask) == 0)) {
325 			uint8_t *bucket_data = &bucket->data[i * f->entry_size];
326 
327 			bucket->signature &= ~mask;
328 			*key_found = 1;
329 			if (entry)
330 				memcpy(entry, bucket_data, f->entry_size);
331 
332 			return 0;
333 		}
334 	}
335 
336 	/* Key is not present in the bucket */
337 	*key_found = 0;
338 	return 0;
339 }
340 
341 static void *
342 rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size)
343 {
344 	struct rte_table_hash_params *p = params;
345 	struct rte_table_hash *f;
346 	uint64_t bucket_size, stack_size, total_size;
347 	uint32_t n_buckets_ext, i;
348 
349 	/* Check input parameters */
350 	if ((check_params_create(p) != 0) ||
351 		((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) ||
352 		((sizeof(struct rte_bucket_4_8) % 64) != 0))
353 		return NULL;
354 
355 	/*
356 	 * Table dimensioning
357 	 *
358 	 * Objective: Pick the number of bucket extensions (n_buckets_ext) so that
359 	 * it is guaranteed that n_keys keys can be stored in the table at any time.
360 	 *
361 	 * The worst case scenario takes place when all the n_keys keys fall into
362 	 * the same bucket. Actually, due to the KEYS_PER_BUCKET scheme, the worst
363 	 * case takes place when (n_keys - KEYS_PER_BUCKET + 1) keys fall into the
364 	 * same bucket, while the remaining (KEYS_PER_BUCKET - 1) keys each fall
365 	 * into a different bucket. This case defeats the purpose of the hash table.
366 	 * It indicates unsuitable f_hash or n_keys to n_buckets ratio.
367 	 *
368 	 * n_buckets_ext = n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1
369 	 */
370 	n_buckets_ext = p->n_keys / KEYS_PER_BUCKET + KEYS_PER_BUCKET - 1;
371 
372 	/* Memory allocation */
373 	bucket_size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_bucket_4_8) +
374 		KEYS_PER_BUCKET * entry_size);
375 	stack_size = RTE_CACHE_LINE_ROUNDUP(n_buckets_ext * sizeof(uint32_t));
376 	total_size = sizeof(struct rte_table_hash) +
377 		(p->n_buckets + n_buckets_ext) * bucket_size + stack_size;
378 
379 	if (total_size > SIZE_MAX) {
380 		RTE_LOG(ERR, TABLE, "%s: Cannot allocate %" PRIu64 " bytes "
381 			"for hash table %s\n",
382 			__func__, total_size, p->name);
383 		return NULL;
384 	}
385 
386 	f = rte_zmalloc_socket(p->name,
387 		(size_t)total_size,
388 		RTE_CACHE_LINE_SIZE,
389 		socket_id);
390 	if (f == NULL) {
391 		RTE_LOG(ERR, TABLE,
392 			"%s: Cannot allocate %" PRIu64 " bytes "
393 			"for hash table %s\n",
394 			__func__, total_size, p->name);
395 		return NULL;
396 	}
397 	RTE_LOG(INFO, TABLE, "%s: Hash table %s memory footprint "
398 		"is %" PRIu64 " bytes\n",
399 		__func__, p->name, total_size);
400 
401 	/* Memory initialization */
402 	f->n_buckets = p->n_buckets;
403 	f->key_size = KEY_SIZE;
404 	f->entry_size = entry_size;
405 	f->bucket_size = bucket_size;
406 	f->key_offset = p->key_offset;
407 	f->f_hash = p->f_hash;
408 	f->seed = p->seed;
409 
410 	f->n_buckets_ext = n_buckets_ext;
411 	f->stack_pos = n_buckets_ext;
412 	f->stack = (uint32_t *)
413 		&f->memory[(p->n_buckets + n_buckets_ext) * f->bucket_size];
414 
415 	if (p->key_mask != NULL)
416 		f->key_mask = ((uint64_t *)p->key_mask)[0];
417 	else
418 		f->key_mask = 0xFFFFFFFFFFFFFFFFLLU;
419 
420 	for (i = 0; i < n_buckets_ext; i++)
421 		f->stack[i] = i;
422 
423 	return f;
424 }
425 
426 static int
427 rte_table_hash_free_key8_ext(void *table)
428 {
429 	struct rte_table_hash *f = table;
430 
431 	/* Check input parameters */
432 	if (f == NULL) {
433 		RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
434 		return -EINVAL;
435 	}
436 
437 	rte_free(f);
438 	return 0;
439 }
440 
441 static int
442 rte_table_hash_entry_add_key8_ext(
443 	void *table,
444 	void *key,
445 	void *entry,
446 	int *key_found,
447 	void **entry_ptr)
448 {
449 	struct rte_table_hash *f = table;
450 	struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
451 	uint64_t signature;
452 	uint32_t bucket_index, i;
453 
454 	signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
455 	bucket_index = signature & (f->n_buckets - 1);
456 	bucket0 = (struct rte_bucket_4_8 *)
457 		&f->memory[bucket_index * f->bucket_size];
458 
459 	/* Key is present in the bucket */
460 	for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
461 		uint64_t mask;
462 
463 		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
464 			uint64_t bucket_signature = bucket->signature;
465 			uint64_t *bucket_key = &bucket->key[i];
466 
467 			if ((bucket_signature & mask) &&
468 				(keycmp(bucket_key, key, &f->key_mask) == 0)) {
469 				uint8_t *bucket_data = &bucket->data[i *
470 					f->entry_size];
471 
472 				memcpy(bucket_data, entry, f->entry_size);
473 				*key_found = 1;
474 				*entry_ptr = (void *) bucket_data;
475 				return 0;
476 			}
477 		}
478 	}
479 
480 	/* Key is not present in the bucket */
481 	for (bucket_prev = NULL, bucket = bucket0;
482 		bucket != NULL; bucket_prev = bucket, bucket = bucket->next) {
483 		uint64_t mask;
484 
485 		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
486 			uint64_t bucket_signature = bucket->signature;
487 
488 			if ((bucket_signature & mask) == 0) {
489 				uint8_t *bucket_data = &bucket->data[i *
490 					f->entry_size];
491 
492 				bucket->signature |= mask;
493 				keycpy(&bucket->key[i], key, &f->key_mask);
494 				memcpy(bucket_data, entry, f->entry_size);
495 				*key_found = 0;
496 				*entry_ptr = (void *) bucket_data;
497 
498 				return 0;
499 			}
500 		}
501 	}
502 
503 	/* Bucket full: extend bucket */
504 	if (f->stack_pos > 0) {
505 		bucket_index = f->stack[--f->stack_pos];
506 
507 		bucket = (struct rte_bucket_4_8 *) &f->memory[(f->n_buckets +
508 			bucket_index) * f->bucket_size];
509 		bucket_prev->next = bucket;
510 		bucket_prev->next_valid = 1;
511 
512 		bucket->signature = 1;
513 		keycpy(&bucket->key[0], key, &f->key_mask);
514 		memcpy(&bucket->data[0], entry, f->entry_size);
515 		*key_found = 0;
516 		*entry_ptr = (void *) &bucket->data[0];
517 		return 0;
518 	}
519 
520 	return -ENOSPC;
521 }
522 
523 static int
524 rte_table_hash_entry_delete_key8_ext(
525 	void *table,
526 	void *key,
527 	int *key_found,
528 	void *entry)
529 {
530 	struct rte_table_hash *f = table;
531 	struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
532 	uint64_t signature;
533 	uint32_t bucket_index, i;
534 
535 	signature = f->f_hash(key, &f->key_mask, f->key_size, f->seed);
536 	bucket_index = signature & (f->n_buckets - 1);
537 	bucket0 = (struct rte_bucket_4_8 *)
538 		&f->memory[bucket_index * f->bucket_size];
539 
540 	/* Key is present in the bucket */
541 	for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
542 		bucket_prev = bucket, bucket = bucket->next) {
543 		uint64_t mask;
544 
545 		for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) {
546 			uint64_t bucket_signature = bucket->signature;
547 			uint64_t *bucket_key = &bucket->key[i];
548 
549 			if ((bucket_signature & mask) &&
550 				(keycmp(bucket_key, key, &f->key_mask) == 0)) {
551 				uint8_t *bucket_data = &bucket->data[i *
552 					f->entry_size];
553 
554 				bucket->signature &= ~mask;
555 				*key_found = 1;
556 				if (entry)
557 					memcpy(entry, bucket_data,
558 						f->entry_size);
559 
560 				if ((bucket->signature == 0) &&
561 				    (bucket_prev != NULL)) {
562 					bucket_prev->next = bucket->next;
563 					bucket_prev->next_valid =
564 						bucket->next_valid;
565 
566 					memset(bucket, 0,
567 						sizeof(struct rte_bucket_4_8));
568 					bucket_index = (((uint8_t *)bucket -
569 						(uint8_t *)f->memory)/f->bucket_size) - f->n_buckets;
570 					f->stack[f->stack_pos++] = bucket_index;
571 				}
572 
573 				return 0;
574 			}
575 		}
576 	}
577 
578 	/* Key is not present in the bucket */
579 	*key_found = 0;
580 	return 0;
581 }
582 
583 #define lookup_key8_cmp(key_in, bucket, pos, f)			\
584 {								\
585 	uint64_t xor[4], signature, k;				\
586 								\
587 	signature = ~bucket->signature;				\
588 								\
589 	k = key_in[0] & f->key_mask;				\
590 	xor[0] = (k ^ bucket->key[0]) | (signature & 1);		\
591 	xor[1] = (k ^ bucket->key[1]) | (signature & 2);		\
592 	xor[2] = (k ^ bucket->key[2]) | (signature & 4);		\
593 	xor[3] = (k ^ bucket->key[3]) | (signature & 8);		\
594 								\
595 	pos = 4;						\
596 	if (xor[0] == 0)					\
597 		pos = 0;					\
598 	if (xor[1] == 0)					\
599 		pos = 1;					\
600 	if (xor[2] == 0)					\
601 		pos = 2;					\
602 	if (xor[3] == 0)					\
603 		pos = 3;					\
604 }
605 
606 #define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f)	\
607 {								\
608 	uint64_t pkt_mask;					\
609 	uint32_t key_offset = f->key_offset;\
610 								\
611 	pkt0_index = __builtin_ctzll(pkts_mask);		\
612 	pkt_mask = 1LLU << pkt0_index;				\
613 	pkts_mask &= ~pkt_mask;					\
614 								\
615 	mbuf0 = pkts[pkt0_index];				\
616 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));	\
617 }
618 
619 #define lookup1_stage1(mbuf1, bucket1, f)			\
620 {								\
621 	uint64_t *key;						\
622 	uint64_t signature;					\
623 	uint32_t bucket_index;					\
624 								\
625 	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\
626 	signature = f->f_hash(key, &f->key_mask, KEY_SIZE, f->seed);	\
627 	bucket_index = signature & (f->n_buckets - 1);		\
628 	bucket1 = (struct rte_bucket_4_8 *)			\
629 		&f->memory[bucket_index * f->bucket_size];	\
630 	rte_prefetch0(bucket1);					\
631 }
632 
633 #define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,		\
634 	pkts_mask_out, entries, f)				\
635 {								\
636 	void *a;						\
637 	uint64_t pkt_mask;					\
638 	uint64_t *key;						\
639 	uint32_t pos;						\
640 								\
641 	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
642 	lookup_key8_cmp(key, bucket2, pos, f);	\
643 								\
644 	pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
645 	pkts_mask_out |= pkt_mask;				\
646 								\
647 	a = (void *) &bucket2->data[pos * f->entry_size];	\
648 	rte_prefetch0(a);					\
649 	entries[pkt2_index] = a;				\
650 	lru_update(bucket2, pos);				\
651 }
652 
653 #define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\
654 	entries, buckets_mask, buckets, keys, f)		\
655 {								\
656 	struct rte_bucket_4_8 *bucket_next;			\
657 	void *a;						\
658 	uint64_t pkt_mask, bucket_mask;				\
659 	uint64_t *key;						\
660 	uint32_t pos;						\
661 								\
662 	key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\
663 	lookup_key8_cmp(key, bucket2, pos, f);	\
664 								\
665 	pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\
666 	pkts_mask_out |= pkt_mask;				\
667 								\
668 	a = (void *) &bucket2->data[pos * f->entry_size];	\
669 	rte_prefetch0(a);					\
670 	entries[pkt2_index] = a;				\
671 								\
672 	bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\
673 	buckets_mask |= bucket_mask;				\
674 	bucket_next = bucket2->next;				\
675 	buckets[pkt2_index] = bucket_next;			\
676 	keys[pkt2_index] = key;					\
677 }
678 
679 #define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\
680 	buckets_mask, f)					\
681 {								\
682 	struct rte_bucket_4_8 *bucket, *bucket_next;		\
683 	void *a;						\
684 	uint64_t pkt_mask, bucket_mask;				\
685 	uint64_t *key;						\
686 	uint32_t pos;						\
687 								\
688 	bucket = buckets[pkt_index];				\
689 	key = keys[pkt_index];					\
690 	lookup_key8_cmp(key, bucket, pos, f);			\
691 								\
692 	pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\
693 	pkts_mask_out |= pkt_mask;				\
694 								\
695 	a = (void *) &bucket->data[pos * f->entry_size];	\
696 	rte_prefetch0(a);					\
697 	entries[pkt_index] = a;					\
698 								\
699 	bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\
700 	buckets_mask |= bucket_mask;				\
701 	bucket_next = bucket->next;				\
702 	rte_prefetch0(bucket_next);				\
703 	buckets[pkt_index] = bucket_next;			\
704 	keys[pkt_index] = key;					\
705 }
706 
707 #define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\
708 	pkts, pkts_mask, f)					\
709 {								\
710 	uint64_t pkt00_mask, pkt01_mask;			\
711 	uint32_t key_offset = f->key_offset;		\
712 								\
713 	pkt00_index = __builtin_ctzll(pkts_mask);		\
714 	pkt00_mask = 1LLU << pkt00_index;			\
715 	pkts_mask &= ~pkt00_mask;				\
716 								\
717 	mbuf00 = pkts[pkt00_index];				\
718 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
719 								\
720 	pkt01_index = __builtin_ctzll(pkts_mask);		\
721 	pkt01_mask = 1LLU << pkt01_index;			\
722 	pkts_mask &= ~pkt01_mask;				\
723 								\
724 	mbuf01 = pkts[pkt01_index];				\
725 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
726 }
727 
728 #define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\
729 	mbuf00, mbuf01, pkts, pkts_mask, f)			\
730 {								\
731 	uint64_t pkt00_mask, pkt01_mask;			\
732 	uint32_t key_offset = f->key_offset;		\
733 								\
734 	pkt00_index = __builtin_ctzll(pkts_mask);		\
735 	pkt00_mask = 1LLU << pkt00_index;			\
736 	pkts_mask &= ~pkt00_mask;				\
737 								\
738 	mbuf00 = pkts[pkt00_index];				\
739 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\
740 								\
741 	pkt01_index = __builtin_ctzll(pkts_mask);		\
742 	if (pkts_mask == 0)					\
743 		pkt01_index = pkt00_index;			\
744 								\
745 	pkt01_mask = 1LLU << pkt01_index;			\
746 	pkts_mask &= ~pkt01_mask;				\
747 								\
748 	mbuf01 = pkts[pkt01_index];				\
749 	rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\
750 }
751 
752 #define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)\
753 {								\
754 	uint64_t *key10, *key11;				\
755 	uint64_t signature10, signature11;			\
756 	uint32_t bucket10_index, bucket11_index;		\
757 	rte_table_hash_op_hash f_hash = f->f_hash;		\
758 	uint64_t seed = f->seed;				\
759 	uint32_t key_offset = f->key_offset;			\
760 								\
761 	key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\
762 	key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\
763 								\
764 	signature10 = f_hash(key10, &f->key_mask, KEY_SIZE, seed);	\
765 	bucket10_index = signature10 & (f->n_buckets - 1);	\
766 	bucket10 = (struct rte_bucket_4_8 *)			\
767 		&f->memory[bucket10_index * f->bucket_size];	\
768 	rte_prefetch0(bucket10);				\
769 								\
770 	signature11 = f_hash(key11, &f->key_mask, KEY_SIZE, seed);	\
771 	bucket11_index = signature11 & (f->n_buckets - 1);	\
772 	bucket11 = (struct rte_bucket_4_8 *)			\
773 		&f->memory[bucket11_index * f->bucket_size];	\
774 	rte_prefetch0(bucket11);				\
775 }
776 
777 #define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\
778 	bucket20, bucket21, pkts_mask_out, entries, f)		\
779 {								\
780 	void *a20, *a21;					\
781 	uint64_t pkt20_mask, pkt21_mask;			\
782 	uint64_t *key20, *key21;				\
783 	uint32_t pos20, pos21;					\
784 								\
785 	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
786 	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
787 								\
788 	lookup_key8_cmp(key20, bucket20, pos20, f);			\
789 	lookup_key8_cmp(key21, bucket21, pos21, f);			\
790 								\
791 	pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
792 	pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
793 	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
794 								\
795 	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
796 	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
797 	rte_prefetch0(a20);					\
798 	rte_prefetch0(a21);					\
799 	entries[pkt20_index] = a20;				\
800 	entries[pkt21_index] = a21;				\
801 	lru_update(bucket20, pos20);				\
802 	lru_update(bucket21, pos21);				\
803 }
804 
805 #define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \
806 	bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\
807 {								\
808 	struct rte_bucket_4_8 *bucket20_next, *bucket21_next;	\
809 	void *a20, *a21;					\
810 	uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\
811 	uint64_t *key20, *key21;				\
812 	uint32_t pos20, pos21;					\
813 								\
814 	key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\
815 	key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\
816 								\
817 	lookup_key8_cmp(key20, bucket20, pos20, f);			\
818 	lookup_key8_cmp(key21, bucket21, pos21, f);			\
819 								\
820 	pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\
821 	pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\
822 	pkts_mask_out |= pkt20_mask | pkt21_mask;		\
823 								\
824 	a20 = (void *) &bucket20->data[pos20 * f->entry_size];	\
825 	a21 = (void *) &bucket21->data[pos21 * f->entry_size];	\
826 	rte_prefetch0(a20);					\
827 	rte_prefetch0(a21);					\
828 	entries[pkt20_index] = a20;				\
829 	entries[pkt21_index] = a21;				\
830 								\
831 	bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\
832 	bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\
833 	buckets_mask |= bucket20_mask | bucket21_mask;		\
834 	bucket20_next = bucket20->next;				\
835 	bucket21_next = bucket21->next;				\
836 	buckets[pkt20_index] = bucket20_next;			\
837 	buckets[pkt21_index] = bucket21_next;			\
838 	keys[pkt20_index] = key20;				\
839 	keys[pkt21_index] = key21;				\
840 }
841 
842 static int
843 rte_table_hash_lookup_key8_lru(
844 	void *table,
845 	struct rte_mbuf **pkts,
846 	uint64_t pkts_mask,
847 	uint64_t *lookup_hit_mask,
848 	void **entries)
849 {
850 	struct rte_table_hash *f = (struct rte_table_hash *) table;
851 	struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
852 	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
853 	uint32_t pkt00_index, pkt01_index, pkt10_index;
854 	uint32_t pkt11_index, pkt20_index, pkt21_index;
855 	uint64_t pkts_mask_out = 0;
856 
857 	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
858 	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
859 
860 	/* Cannot run the pipeline with less than 5 packets */
861 	if (__builtin_popcountll(pkts_mask) < 5) {
862 		for ( ; pkts_mask; ) {
863 			struct rte_bucket_4_8 *bucket;
864 			struct rte_mbuf *mbuf;
865 			uint32_t pkt_index;
866 
867 			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
868 			lookup1_stage1(mbuf, bucket, f);
869 			lookup1_stage2_lru(pkt_index, mbuf, bucket,
870 				pkts_mask_out, entries, f);
871 		}
872 
873 		*lookup_hit_mask = pkts_mask_out;
874 		RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
875 		return 0;
876 	}
877 
878 	/*
879 	 * Pipeline fill
880 	 *
881 	 */
882 	/* Pipeline stage 0 */
883 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
884 		pkts_mask, f);
885 
886 	/* Pipeline feed */
887 	mbuf10 = mbuf00;
888 	mbuf11 = mbuf01;
889 	pkt10_index = pkt00_index;
890 	pkt11_index = pkt01_index;
891 
892 	/* Pipeline stage 0 */
893 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
894 		pkts_mask, f);
895 
896 	/* Pipeline stage 1 */
897 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
898 
899 	/*
900 	 * Pipeline run
901 	 *
902 	 */
903 	for ( ; pkts_mask; ) {
904 		/* Pipeline feed */
905 		bucket20 = bucket10;
906 		bucket21 = bucket11;
907 		mbuf20 = mbuf10;
908 		mbuf21 = mbuf11;
909 		mbuf10 = mbuf00;
910 		mbuf11 = mbuf01;
911 		pkt20_index = pkt10_index;
912 		pkt21_index = pkt11_index;
913 		pkt10_index = pkt00_index;
914 		pkt11_index = pkt01_index;
915 
916 		/* Pipeline stage 0 */
917 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
918 			mbuf00, mbuf01, pkts, pkts_mask, f);
919 
920 		/* Pipeline stage 1 */
921 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
922 
923 		/* Pipeline stage 2 */
924 		lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
925 			bucket20, bucket21, pkts_mask_out, entries, f);
926 	}
927 
928 	/*
929 	 * Pipeline flush
930 	 *
931 	 */
932 	/* Pipeline feed */
933 	bucket20 = bucket10;
934 	bucket21 = bucket11;
935 	mbuf20 = mbuf10;
936 	mbuf21 = mbuf11;
937 	mbuf10 = mbuf00;
938 	mbuf11 = mbuf01;
939 	pkt20_index = pkt10_index;
940 	pkt21_index = pkt11_index;
941 	pkt10_index = pkt00_index;
942 	pkt11_index = pkt01_index;
943 
944 	/* Pipeline stage 1 */
945 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
946 
947 	/* Pipeline stage 2 */
948 	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
949 		bucket20, bucket21, pkts_mask_out, entries, f);
950 
951 	/* Pipeline feed */
952 	bucket20 = bucket10;
953 	bucket21 = bucket11;
954 	mbuf20 = mbuf10;
955 	mbuf21 = mbuf11;
956 	pkt20_index = pkt10_index;
957 	pkt21_index = pkt11_index;
958 
959 	/* Pipeline stage 2 */
960 	lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
961 		bucket20, bucket21, pkts_mask_out, entries, f);
962 
963 	*lookup_hit_mask = pkts_mask_out;
964 	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
965 	return 0;
966 } /* lookup LRU */
967 
968 static int
969 rte_table_hash_lookup_key8_ext(
970 	void *table,
971 	struct rte_mbuf **pkts,
972 	uint64_t pkts_mask,
973 	uint64_t *lookup_hit_mask,
974 	void **entries)
975 {
976 	struct rte_table_hash *f = (struct rte_table_hash *) table;
977 	struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
978 	struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
979 	uint32_t pkt00_index, pkt01_index, pkt10_index;
980 	uint32_t pkt11_index, pkt20_index, pkt21_index;
981 	uint64_t pkts_mask_out = 0, buckets_mask = 0;
982 	struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
983 	uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
984 
985 	__rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask);
986 	RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in);
987 
988 	/* Cannot run the pipeline with less than 5 packets */
989 	if (__builtin_popcountll(pkts_mask) < 5) {
990 		for ( ; pkts_mask; ) {
991 			struct rte_bucket_4_8 *bucket;
992 			struct rte_mbuf *mbuf;
993 			uint32_t pkt_index;
994 
995 			lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f);
996 			lookup1_stage1(mbuf, bucket, f);
997 			lookup1_stage2_ext(pkt_index, mbuf, bucket,
998 				pkts_mask_out, entries, buckets_mask,
999 				buckets, keys, f);
1000 		}
1001 
1002 		goto grind_next_buckets;
1003 	}
1004 
1005 	/*
1006 	 * Pipeline fill
1007 	 *
1008 	 */
1009 	/* Pipeline stage 0 */
1010 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1011 		pkts_mask, f);
1012 
1013 	/* Pipeline feed */
1014 	mbuf10 = mbuf00;
1015 	mbuf11 = mbuf01;
1016 	pkt10_index = pkt00_index;
1017 	pkt11_index = pkt01_index;
1018 
1019 	/* Pipeline stage 0 */
1020 	lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts,
1021 		pkts_mask, f);
1022 
1023 	/* Pipeline stage 1 */
1024 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1025 
1026 	/*
1027 	 * Pipeline run
1028 	 *
1029 	 */
1030 	for ( ; pkts_mask; ) {
1031 		/* Pipeline feed */
1032 		bucket20 = bucket10;
1033 		bucket21 = bucket11;
1034 		mbuf20 = mbuf10;
1035 		mbuf21 = mbuf11;
1036 		mbuf10 = mbuf00;
1037 		mbuf11 = mbuf01;
1038 		pkt20_index = pkt10_index;
1039 		pkt21_index = pkt11_index;
1040 		pkt10_index = pkt00_index;
1041 		pkt11_index = pkt01_index;
1042 
1043 		/* Pipeline stage 0 */
1044 		lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
1045 			mbuf00, mbuf01, pkts, pkts_mask, f);
1046 
1047 		/* Pipeline stage 1 */
1048 		lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1049 
1050 		/* Pipeline stage 2 */
1051 		lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1052 			bucket20, bucket21, pkts_mask_out, entries,
1053 			buckets_mask, buckets, keys, f);
1054 	}
1055 
1056 	/*
1057 	 * Pipeline flush
1058 	 *
1059 	 */
1060 	/* Pipeline feed */
1061 	bucket20 = bucket10;
1062 	bucket21 = bucket11;
1063 	mbuf20 = mbuf10;
1064 	mbuf21 = mbuf11;
1065 	mbuf10 = mbuf00;
1066 	mbuf11 = mbuf01;
1067 	pkt20_index = pkt10_index;
1068 	pkt21_index = pkt11_index;
1069 	pkt10_index = pkt00_index;
1070 	pkt11_index = pkt01_index;
1071 
1072 	/* Pipeline stage 1 */
1073 	lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
1074 
1075 	/* Pipeline stage 2 */
1076 	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1077 		bucket20, bucket21, pkts_mask_out, entries,
1078 		buckets_mask, buckets, keys, f);
1079 
1080 	/* Pipeline feed */
1081 	bucket20 = bucket10;
1082 	bucket21 = bucket11;
1083 	mbuf20 = mbuf10;
1084 	mbuf21 = mbuf11;
1085 	pkt20_index = pkt10_index;
1086 	pkt21_index = pkt11_index;
1087 
1088 	/* Pipeline stage 2 */
1089 	lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
1090 		bucket20, bucket21, pkts_mask_out, entries,
1091 		buckets_mask, buckets, keys, f);
1092 
1093 grind_next_buckets:
1094 	/* Grind next buckets */
1095 	for ( ; buckets_mask; ) {
1096 		uint64_t buckets_mask_next = 0;
1097 
1098 		for ( ; buckets_mask; ) {
1099 			uint64_t pkt_mask;
1100 			uint32_t pkt_index;
1101 
1102 			pkt_index = __builtin_ctzll(buckets_mask);
1103 			pkt_mask = 1LLU << pkt_index;
1104 			buckets_mask &= ~pkt_mask;
1105 
1106 			lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
1107 				entries, buckets_mask_next, f);
1108 		}
1109 
1110 		buckets_mask = buckets_mask_next;
1111 	}
1112 
1113 	*lookup_hit_mask = pkts_mask_out;
1114 	RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out));
1115 	return 0;
1116 } /* lookup EXT */
1117 
1118 static int
1119 rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear)
1120 {
1121 	struct rte_table_hash *t = table;
1122 
1123 	if (stats != NULL)
1124 		memcpy(stats, &t->stats, sizeof(t->stats));
1125 
1126 	if (clear)
1127 		memset(&t->stats, 0, sizeof(t->stats));
1128 
1129 	return 0;
1130 }
1131 
1132 struct rte_table_ops rte_table_hash_key8_lru_ops = {
1133 	.f_create = rte_table_hash_create_key8_lru,
1134 	.f_free = rte_table_hash_free_key8_lru,
1135 	.f_add = rte_table_hash_entry_add_key8_lru,
1136 	.f_delete = rte_table_hash_entry_delete_key8_lru,
1137 	.f_add_bulk = NULL,
1138 	.f_delete_bulk = NULL,
1139 	.f_lookup = rte_table_hash_lookup_key8_lru,
1140 	.f_stats = rte_table_hash_key8_stats_read,
1141 };
1142 
1143 struct rte_table_ops rte_table_hash_key8_ext_ops = {
1144 	.f_create = rte_table_hash_create_key8_ext,
1145 	.f_free = rte_table_hash_free_key8_ext,
1146 	.f_add = rte_table_hash_entry_add_key8_ext,
1147 	.f_delete = rte_table_hash_entry_delete_key8_ext,
1148 	.f_add_bulk = NULL,
1149 	.f_delete_bulk = NULL,
1150 	.f_lookup = rte_table_hash_lookup_key8_ext,
1151 	.f_stats = rte_table_hash_key8_stats_read,
1152 };
1153