xref: /dpdk/lib/hash/rte_thash.c (revision e9fd1ebf981f361844aea9ec94e17f4bda5e1479)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 
5 #include <stdalign.h>
6 #include <sys/queue.h>
7 
8 #include <rte_thash.h>
9 #include <rte_tailq.h>
10 #include <rte_random.h>
11 #include <rte_memcpy.h>
12 #include <rte_errno.h>
13 #include <rte_eal_memconfig.h>
14 #include <rte_log.h>
15 #include <rte_malloc.h>
16 
17 RTE_LOG_REGISTER_SUFFIX(thash_logtype, thash, INFO);
18 #define RTE_LOGTYPE_HASH thash_logtype
19 #define HASH_LOG(level, ...) \
20 	RTE_LOG_LINE(level, HASH, "" __VA_ARGS__)
21 
22 #define THASH_NAME_LEN		64
23 #define TOEPLITZ_HASH_LEN	32
24 
25 #define RETA_SZ_IN_RANGE(reta_sz)	((reta_sz >= RTE_THASH_RETA_SZ_MIN) &&\
26 					(reta_sz <= RTE_THASH_RETA_SZ_MAX))
27 
28 TAILQ_HEAD(rte_thash_list, rte_tailq_entry);
29 static struct rte_tailq_elem rte_thash_tailq = {
30 	.name = "RTE_THASH",
31 };
32 EAL_REGISTER_TAILQ(rte_thash_tailq)
33 
34 /**
35  * Table of some irreducible polinomials over GF(2).
36  * For lfsr they are represented in BE bit order, and
37  * x^0 is masked out.
38  * For example, poly x^5 + x^2 + 1 will be represented
39  * as (101001b & 11111b) = 01001b = 0x9
40  */
41 static const uint32_t irreducible_poly_table[][4] = {
42 	{0, 0, 0, 0},	/** < degree 0 */
43 	{1, 1, 1, 1},	/** < degree 1 */
44 	{0x3, 0x3, 0x3, 0x3},	/** < degree 2 and so on... */
45 	{0x5, 0x3, 0x5, 0x3},
46 	{0x9, 0x3, 0x9, 0x3},
47 	{0x9, 0x1b, 0xf, 0x5},
48 	{0x21, 0x33, 0x1b, 0x2d},
49 	{0x41, 0x11, 0x71, 0x9},
50 	{0x71, 0xa9, 0xf5, 0x8d},
51 	{0x21, 0xd1, 0x69, 0x1d9},
52 	{0x81, 0x2c1, 0x3b1, 0x185},
53 	{0x201, 0x541, 0x341, 0x461},
54 	{0x941, 0x609, 0xe19, 0x45d},
55 	{0x1601, 0x1f51, 0x1171, 0x359},
56 	{0x2141, 0x2111, 0x2db1, 0x2109},
57 	{0x4001, 0x801, 0x101, 0x7301},
58 	{0x7781, 0xa011, 0x4211, 0x86d9},
59 };
60 
61 struct thash_lfsr {
62 	uint32_t	ref_cnt;
63 	uint32_t	poly;
64 	/**< polynomial associated with the lfsr */
65 	uint32_t	rev_poly;
66 	/**< polynomial to generate the sequence in reverse direction */
67 	uint32_t	state;
68 	/**< current state of the lfsr */
69 	uint32_t	rev_state;
70 	/**< current state of the lfsr for reverse direction */
71 	uint32_t	deg;	/**< polynomial degree*/
72 	uint32_t	bits_cnt;  /**< number of bits generated by lfsr*/
73 };
74 
75 struct rte_thash_subtuple_helper {
76 	char	name[THASH_NAME_LEN];	/** < Name of subtuple configuration */
77 	LIST_ENTRY(rte_thash_subtuple_helper)	next;
78 	struct thash_lfsr	*lfsr;
79 	uint32_t	offset;		/** < Offset of the m-sequence */
80 	uint32_t	len;		/** < Length of the m-sequence */
81 	uint32_t	tuple_offset;	/** < Offset in bits of the subtuple */
82 	uint32_t	tuple_len;	/** < Length in bits of the subtuple */
83 	uint32_t	lsb_msk;	/** < (1 << reta_sz_log) - 1 */
84 	__extension__ alignas(RTE_CACHE_LINE_SIZE) uint32_t	compl_table[0];
85 	/** < Complementary table */
86 };
87 
88 struct rte_thash_ctx {
89 	char		name[THASH_NAME_LEN];
90 	LIST_HEAD(, rte_thash_subtuple_helper) head;
91 	uint32_t	key_len;	/** < Length of the NIC RSS hash key */
92 	uint32_t	reta_sz_log;	/** < size of the RSS ReTa in bits */
93 	uint32_t	subtuples_nb;	/** < number of subtuples */
94 	uint32_t	flags;
95 	uint64_t	*matrices;
96 	/**< matrices used with rte_thash_gfni implementation */
97 	uint8_t		hash_key[0];
98 };
99 
100 int
101 rte_thash_gfni_supported(void)
102 {
103 #ifdef RTE_THASH_GFNI_DEFINED
104 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_GFNI) &&
105 			(rte_vect_get_max_simd_bitwidth() >=
106 			RTE_VECT_SIMD_512))
107 		return 1;
108 #endif
109 
110 	return 0;
111 };
112 
113 void
114 rte_thash_complete_matrix(uint64_t *matrixes, const uint8_t *rss_key, int size)
115 {
116 	int i, j;
117 	uint8_t *m = (uint8_t *)matrixes;
118 	uint8_t left_part, right_part;
119 
120 	for (i = 0; i < size; i++) {
121 		for (j = 0; j < 8; j++) {
122 			left_part = rss_key[i] << j;
123 			right_part = (uint16_t)(rss_key[(i + 1) % size]) >>
124 				(8 - j);
125 			m[i * 8 + j] = left_part|right_part;
126 		}
127 	}
128 }
129 
130 static inline uint32_t
131 get_bit_lfsr(struct thash_lfsr *lfsr)
132 {
133 	uint32_t bit, ret;
134 
135 	/*
136 	 * masking the TAP bits defined by the polynomial and
137 	 * calculating parity
138 	 */
139 	bit = rte_popcount32(lfsr->state & lfsr->poly) & 0x1;
140 	ret = lfsr->state & 0x1;
141 	lfsr->state = ((lfsr->state >> 1) | (bit << (lfsr->deg - 1))) &
142 		((1 << lfsr->deg) - 1);
143 
144 	lfsr->bits_cnt++;
145 	return ret;
146 }
147 
148 static inline uint32_t
149 get_rev_bit_lfsr(struct thash_lfsr *lfsr)
150 {
151 	uint32_t bit, ret;
152 
153 	bit = rte_popcount32(lfsr->rev_state & lfsr->rev_poly) & 0x1;
154 	ret = lfsr->rev_state & (1 << (lfsr->deg - 1));
155 	lfsr->rev_state = ((lfsr->rev_state << 1) | bit) &
156 		((1 << lfsr->deg) - 1);
157 
158 	lfsr->bits_cnt++;
159 	return ret;
160 }
161 
162 static inline uint32_t
163 thash_get_rand_poly(uint32_t poly_degree)
164 {
165 	return irreducible_poly_table[poly_degree][rte_rand() %
166 		RTE_DIM(irreducible_poly_table[poly_degree])];
167 }
168 
169 static struct thash_lfsr *
170 alloc_lfsr(struct rte_thash_ctx *ctx)
171 {
172 	struct thash_lfsr *lfsr;
173 	uint32_t i;
174 
175 	if (ctx == NULL)
176 		return NULL;
177 
178 	lfsr = rte_zmalloc(NULL, sizeof(struct thash_lfsr), 0);
179 	if (lfsr == NULL)
180 		return NULL;
181 
182 	lfsr->deg = ctx->reta_sz_log;
183 	lfsr->poly = thash_get_rand_poly(lfsr->deg);
184 	do {
185 		lfsr->state = rte_rand() & ((1 << lfsr->deg) - 1);
186 	} while (lfsr->state == 0);
187 	/* init reverse order polynomial */
188 	lfsr->rev_poly = (lfsr->poly >> 1) | (1 << (lfsr->deg - 1));
189 	/* init proper rev_state*/
190 	lfsr->rev_state = lfsr->state;
191 	for (i = 0; i <= lfsr->deg; i++)
192 		get_rev_bit_lfsr(lfsr);
193 
194 	/* clear bits_cnt after rev_state was inited */
195 	lfsr->bits_cnt = 0;
196 	lfsr->ref_cnt = 1;
197 
198 	return lfsr;
199 }
200 
201 static void
202 attach_lfsr(struct rte_thash_subtuple_helper *h, struct thash_lfsr *lfsr)
203 {
204 	lfsr->ref_cnt++;
205 	h->lfsr = lfsr;
206 }
207 
208 static void
209 free_lfsr(struct thash_lfsr *lfsr)
210 {
211 	lfsr->ref_cnt--;
212 	if (lfsr->ref_cnt == 0)
213 		rte_free(lfsr);
214 }
215 
216 struct rte_thash_ctx *
217 rte_thash_init_ctx(const char *name, uint32_t key_len, uint32_t reta_sz,
218 	uint8_t *key, uint32_t flags)
219 {
220 	struct rte_thash_ctx *ctx;
221 	struct rte_tailq_entry *te;
222 	struct rte_thash_list *thash_list;
223 	uint32_t i;
224 
225 	if ((name == NULL) || (key_len == 0) || !RETA_SZ_IN_RANGE(reta_sz)) {
226 		rte_errno = EINVAL;
227 		return NULL;
228 	}
229 
230 	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
231 
232 	rte_mcfg_tailq_write_lock();
233 
234 	/* guarantee there's no existing */
235 	TAILQ_FOREACH(te, thash_list, next) {
236 		ctx = (struct rte_thash_ctx *)te->data;
237 		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
238 			break;
239 	}
240 	ctx = NULL;
241 	if (te != NULL) {
242 		rte_errno = EEXIST;
243 		goto exit;
244 	}
245 
246 	/* allocate tailq entry */
247 	te = rte_zmalloc("THASH_TAILQ_ENTRY", sizeof(*te), 0);
248 	if (te == NULL) {
249 		HASH_LOG(ERR,
250 			"Can not allocate tailq entry for thash context %s",
251 			name);
252 		rte_errno = ENOMEM;
253 		goto exit;
254 	}
255 
256 	ctx = rte_zmalloc(NULL, sizeof(struct rte_thash_ctx) + key_len, 0);
257 	if (ctx == NULL) {
258 		HASH_LOG(ERR, "thash ctx %s memory allocation failed",
259 			name);
260 		rte_errno = ENOMEM;
261 		goto free_te;
262 	}
263 
264 	rte_strlcpy(ctx->name, name, sizeof(ctx->name));
265 	ctx->key_len = key_len;
266 	ctx->reta_sz_log = reta_sz;
267 	LIST_INIT(&ctx->head);
268 	ctx->flags = flags;
269 
270 	if (key)
271 		rte_memcpy(ctx->hash_key, key, key_len);
272 	else {
273 		for (i = 0; i < key_len; i++)
274 			ctx->hash_key[i] = rte_rand();
275 	}
276 
277 	if (rte_thash_gfni_supported()) {
278 		ctx->matrices = rte_zmalloc(NULL, key_len * sizeof(uint64_t),
279 			RTE_CACHE_LINE_SIZE);
280 		if (ctx->matrices == NULL) {
281 			HASH_LOG(ERR, "Cannot allocate matrices");
282 			rte_errno = ENOMEM;
283 			goto free_ctx;
284 		}
285 
286 		rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
287 			key_len);
288 	}
289 
290 	te->data = (void *)ctx;
291 	TAILQ_INSERT_TAIL(thash_list, te, next);
292 
293 	rte_mcfg_tailq_write_unlock();
294 
295 	return ctx;
296 
297 free_ctx:
298 	rte_free(ctx);
299 free_te:
300 	rte_free(te);
301 exit:
302 	rte_mcfg_tailq_write_unlock();
303 	return NULL;
304 }
305 
306 struct rte_thash_ctx *
307 rte_thash_find_existing(const char *name)
308 {
309 	struct rte_thash_ctx *ctx;
310 	struct rte_tailq_entry *te;
311 	struct rte_thash_list *thash_list;
312 
313 	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
314 
315 	rte_mcfg_tailq_read_lock();
316 	TAILQ_FOREACH(te, thash_list, next) {
317 		ctx = (struct rte_thash_ctx *)te->data;
318 		if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0)
319 			break;
320 	}
321 
322 	rte_mcfg_tailq_read_unlock();
323 
324 	if (te == NULL) {
325 		rte_errno = ENOENT;
326 		return NULL;
327 	}
328 
329 	return ctx;
330 }
331 
332 void
333 rte_thash_free_ctx(struct rte_thash_ctx *ctx)
334 {
335 	struct rte_tailq_entry *te;
336 	struct rte_thash_list *thash_list;
337 	struct rte_thash_subtuple_helper *ent, *tmp;
338 
339 	if (ctx == NULL)
340 		return;
341 
342 	thash_list = RTE_TAILQ_CAST(rte_thash_tailq.head, rte_thash_list);
343 	rte_mcfg_tailq_write_lock();
344 	TAILQ_FOREACH(te, thash_list, next) {
345 		if (te->data == (void *)ctx)
346 			break;
347 	}
348 
349 	if (te != NULL)
350 		TAILQ_REMOVE(thash_list, te, next);
351 
352 	rte_mcfg_tailq_write_unlock();
353 	ent = LIST_FIRST(&(ctx->head));
354 	while (ent) {
355 		free_lfsr(ent->lfsr);
356 		tmp = ent;
357 		ent = LIST_NEXT(ent, next);
358 		LIST_REMOVE(tmp, next);
359 		rte_free(tmp);
360 	}
361 
362 	rte_free(ctx);
363 	rte_free(te);
364 }
365 
366 static inline void
367 set_bit(uint8_t *ptr, uint32_t bit, uint32_t pos)
368 {
369 	uint32_t byte_idx = pos / CHAR_BIT;
370 	/* index of the bit int byte, indexing starts from MSB */
371 	uint32_t bit_idx = (CHAR_BIT - 1) - (pos & (CHAR_BIT - 1));
372 	uint8_t tmp;
373 
374 	tmp = ptr[byte_idx];
375 	tmp &= ~(1 << bit_idx);
376 	tmp |= bit << bit_idx;
377 	ptr[byte_idx] = tmp;
378 }
379 
380 /**
381  * writes m-sequence to the hash_key for range [start, end]
382  * (i.e. including start and end positions)
383  */
384 static int
385 generate_subkey(struct rte_thash_ctx *ctx, struct thash_lfsr *lfsr,
386 	uint32_t start, uint32_t end)
387 {
388 	uint32_t i;
389 	uint32_t req_bits = (start < end) ? (end - start) : (start - end);
390 	req_bits++; /* due to including end */
391 
392 	/* check if lfsr overflow period of the m-sequence */
393 	if (((lfsr->bits_cnt + req_bits) > (1ULL << lfsr->deg) - 1) &&
394 			((ctx->flags & RTE_THASH_IGNORE_PERIOD_OVERFLOW) !=
395 			RTE_THASH_IGNORE_PERIOD_OVERFLOW)) {
396 		HASH_LOG(ERR,
397 			"Can't generate m-sequence due to period overflow");
398 		return -ENOSPC;
399 	}
400 
401 	if (start < end) {
402 		/* original direction (from left to right)*/
403 		for (i = start; i <= end; i++)
404 			set_bit(ctx->hash_key, get_bit_lfsr(lfsr), i);
405 
406 	} else {
407 		/* reverse direction (from right to left) */
408 		for (i = end; i >= start; i--)
409 			set_bit(ctx->hash_key, get_rev_bit_lfsr(lfsr), i);
410 	}
411 
412 	if (ctx->matrices != NULL)
413 		rte_thash_complete_matrix(ctx->matrices, ctx->hash_key,
414 			ctx->key_len);
415 
416 	return 0;
417 }
418 
419 static inline uint32_t
420 get_subvalue(struct rte_thash_ctx *ctx, uint32_t offset)
421 {
422 	uint32_t *tmp, val;
423 
424 	tmp = (uint32_t *)(&ctx->hash_key[offset >> 3]);
425 	val = rte_be_to_cpu_32(*tmp);
426 	val >>= (TOEPLITZ_HASH_LEN - ((offset & (CHAR_BIT - 1)) +
427 		ctx->reta_sz_log));
428 
429 	return val & ((1 << ctx->reta_sz_log) - 1);
430 }
431 
432 static inline void
433 generate_complement_table(struct rte_thash_ctx *ctx,
434 	struct rte_thash_subtuple_helper *h)
435 {
436 	int i, j, k;
437 	uint32_t val;
438 	uint32_t start;
439 
440 	start = h->offset + h->len - (2 * ctx->reta_sz_log - 1);
441 
442 	for (i = 1; i < (1 << ctx->reta_sz_log); i++) {
443 		val = 0;
444 		for (j = i; j; j &= (j - 1)) {
445 			k = rte_bsf32(j);
446 			val ^= get_subvalue(ctx, start - k +
447 				ctx->reta_sz_log - 1);
448 		}
449 		h->compl_table[val] = i;
450 	}
451 }
452 
453 static inline int
454 insert_before(struct rte_thash_ctx *ctx,
455 	struct rte_thash_subtuple_helper *ent,
456 	struct rte_thash_subtuple_helper *cur_ent,
457 	struct rte_thash_subtuple_helper *next_ent,
458 	uint32_t start, uint32_t end, uint32_t range_end)
459 {
460 	int ret;
461 
462 	if (end < cur_ent->offset) {
463 		ent->lfsr = alloc_lfsr(ctx);
464 		if (ent->lfsr == NULL) {
465 			rte_free(ent);
466 			return -ENOMEM;
467 		}
468 		/* generate nonoverlapping range [start, end) */
469 		ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
470 		if (ret != 0) {
471 			free_lfsr(ent->lfsr);
472 			rte_free(ent);
473 			return ret;
474 		}
475 	} else if ((next_ent != NULL) && (end > next_ent->offset)) {
476 		HASH_LOG(ERR,
477 			"Can't add helper %s due to conflict with existing"
478 			" helper %s", ent->name, next_ent->name);
479 		rte_free(ent);
480 		return -ENOSPC;
481 	}
482 	attach_lfsr(ent, cur_ent->lfsr);
483 
484 	/**
485 	 * generate partially overlapping range
486 	 * [start, cur_ent->start) in reverse order
487 	 */
488 	ret = generate_subkey(ctx, ent->lfsr, cur_ent->offset - 1, start);
489 	if (ret != 0) {
490 		free_lfsr(ent->lfsr);
491 		rte_free(ent);
492 		return ret;
493 	}
494 
495 	if (end > range_end) {
496 		/**
497 		 * generate partially overlapping range
498 		 * (range_end, end)
499 		 */
500 		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
501 		if (ret != 0) {
502 			free_lfsr(ent->lfsr);
503 			rte_free(ent);
504 			return ret;
505 		}
506 	}
507 
508 	LIST_INSERT_BEFORE(cur_ent, ent, next);
509 	generate_complement_table(ctx, ent);
510 	ctx->subtuples_nb++;
511 	return 0;
512 }
513 
514 static inline int
515 insert_after(struct rte_thash_ctx *ctx,
516 	struct rte_thash_subtuple_helper *ent,
517 	struct rte_thash_subtuple_helper *cur_ent,
518 	struct rte_thash_subtuple_helper *next_ent,
519 	struct rte_thash_subtuple_helper *prev_ent,
520 	uint32_t end, uint32_t range_end)
521 {
522 	int ret;
523 
524 	if ((next_ent != NULL) && (end > next_ent->offset)) {
525 		HASH_LOG(ERR,
526 			"Can't add helper %s due to conflict with existing"
527 			" helper %s", ent->name, next_ent->name);
528 		rte_free(ent);
529 		return -EEXIST;
530 	}
531 
532 	attach_lfsr(ent, cur_ent->lfsr);
533 	if (end > range_end) {
534 		/**
535 		 * generate partially overlapping range
536 		 * (range_end, end)
537 		 */
538 		ret = generate_subkey(ctx, ent->lfsr, range_end, end - 1);
539 		if (ret != 0) {
540 			free_lfsr(ent->lfsr);
541 			rte_free(ent);
542 			return ret;
543 		}
544 	}
545 
546 	LIST_INSERT_AFTER(prev_ent, ent, next);
547 	generate_complement_table(ctx, ent);
548 	ctx->subtuples_nb++;
549 
550 	return 0;
551 }
552 
553 int
554 rte_thash_add_helper(struct rte_thash_ctx *ctx, const char *name, uint32_t len,
555 	uint32_t offset)
556 {
557 	struct rte_thash_subtuple_helper *ent, *cur_ent, *prev_ent, *next_ent;
558 	uint32_t start, end;
559 	int ret;
560 
561 	if ((ctx == NULL) || (name == NULL) || (len < ctx->reta_sz_log) ||
562 			((offset + len + TOEPLITZ_HASH_LEN - 1) >
563 			ctx->key_len * CHAR_BIT))
564 		return -EINVAL;
565 
566 	/* Check for existing name*/
567 	LIST_FOREACH(cur_ent, &ctx->head, next) {
568 		if (strncmp(name, cur_ent->name, sizeof(cur_ent->name)) == 0)
569 			return -EEXIST;
570 	}
571 
572 	end = offset + len + TOEPLITZ_HASH_LEN - 1;
573 	start = ((ctx->flags & RTE_THASH_MINIMAL_SEQ) ==
574 		RTE_THASH_MINIMAL_SEQ) ? (end - (2 * ctx->reta_sz_log - 1)) :
575 		offset;
576 
577 	ent = rte_zmalloc(NULL, sizeof(struct rte_thash_subtuple_helper) +
578 		sizeof(uint32_t) * (1 << ctx->reta_sz_log),
579 		RTE_CACHE_LINE_SIZE);
580 	if (ent == NULL)
581 		return -ENOMEM;
582 
583 	rte_strlcpy(ent->name, name, sizeof(ent->name));
584 	ent->offset = start;
585 	ent->len = end - start;
586 	ent->tuple_offset = offset;
587 	ent->tuple_len = len;
588 	ent->lsb_msk = (1 << ctx->reta_sz_log) - 1;
589 
590 	cur_ent = LIST_FIRST(&ctx->head);
591 	while (cur_ent) {
592 		uint32_t range_end = cur_ent->offset + cur_ent->len;
593 		next_ent = LIST_NEXT(cur_ent, next);
594 		prev_ent = cur_ent;
595 		/* Iterate through overlapping ranges */
596 		while ((next_ent != NULL) && (next_ent->offset < range_end)) {
597 			range_end = RTE_MAX(next_ent->offset + next_ent->len,
598 				range_end);
599 			if (start > next_ent->offset)
600 				prev_ent = next_ent;
601 
602 			next_ent = LIST_NEXT(next_ent, next);
603 		}
604 
605 		if (start < cur_ent->offset)
606 			return insert_before(ctx, ent, cur_ent, next_ent,
607 				start, end, range_end);
608 		else if (start < range_end)
609 			return insert_after(ctx, ent, cur_ent, next_ent,
610 				prev_ent, end, range_end);
611 
612 		cur_ent = next_ent;
613 		continue;
614 	}
615 
616 	ent->lfsr = alloc_lfsr(ctx);
617 	if (ent->lfsr == NULL) {
618 		rte_free(ent);
619 		return -ENOMEM;
620 	}
621 
622 	/* generate nonoverlapping range [start, end) */
623 	ret = generate_subkey(ctx, ent->lfsr, start, end - 1);
624 	if (ret != 0) {
625 		free_lfsr(ent->lfsr);
626 		rte_free(ent);
627 		return ret;
628 	}
629 	if (LIST_EMPTY(&ctx->head)) {
630 		LIST_INSERT_HEAD(&ctx->head, ent, next);
631 	} else {
632 		LIST_FOREACH(next_ent, &ctx->head, next)
633 			prev_ent = next_ent;
634 
635 		LIST_INSERT_AFTER(prev_ent, ent, next);
636 	}
637 	generate_complement_table(ctx, ent);
638 	ctx->subtuples_nb++;
639 
640 	return 0;
641 }
642 
643 struct rte_thash_subtuple_helper *
644 rte_thash_get_helper(struct rte_thash_ctx *ctx, const char *name)
645 {
646 	struct rte_thash_subtuple_helper *ent;
647 
648 	if ((ctx == NULL) || (name == NULL))
649 		return NULL;
650 
651 	LIST_FOREACH(ent, &ctx->head, next) {
652 		if (strncmp(name, ent->name, sizeof(ent->name)) == 0)
653 			return ent;
654 	}
655 
656 	return NULL;
657 }
658 
659 uint32_t
660 rte_thash_get_complement(struct rte_thash_subtuple_helper *h,
661 	uint32_t hash, uint32_t desired_hash)
662 {
663 	return h->compl_table[(hash ^ desired_hash) & h->lsb_msk];
664 }
665 
666 const uint8_t *
667 rte_thash_get_key(struct rte_thash_ctx *ctx)
668 {
669 	return ctx->hash_key;
670 }
671 
672 const uint64_t *
673 rte_thash_get_gfni_matrices(struct rte_thash_ctx *ctx)
674 {
675 	return ctx->matrices;
676 }
677 
678 static inline uint8_t
679 read_unaligned_byte(uint8_t *ptr, unsigned int offset)
680 {
681 	uint8_t ret = 0;
682 
683 	ret = ptr[offset / CHAR_BIT];
684 	if (offset % CHAR_BIT) {
685 		ret <<= (offset % CHAR_BIT);
686 		ret |= ptr[(offset / CHAR_BIT) + 1] >>
687 			(CHAR_BIT - (offset % CHAR_BIT));
688 	}
689 
690 	return ret;
691 }
692 
693 static inline uint32_t
694 read_unaligned_bits(uint8_t *ptr, int len, int offset)
695 {
696 	uint32_t ret = 0;
697 	int shift;
698 
699 	len = RTE_MAX(len, 0);
700 	len = RTE_MIN(len, (int)(sizeof(uint32_t) * CHAR_BIT));
701 
702 	while (len > 0) {
703 		ret <<= CHAR_BIT;
704 
705 		ret |= read_unaligned_byte(ptr, offset);
706 		offset += CHAR_BIT;
707 		len -= CHAR_BIT;
708 	}
709 
710 	shift = (len == 0) ? 0 :
711 		(CHAR_BIT - ((len + CHAR_BIT) % CHAR_BIT));
712 	return ret >> shift;
713 }
714 
715 /* returns mask for len bits with given offset inside byte */
716 static inline uint8_t
717 get_bits_mask(unsigned int len, unsigned int offset)
718 {
719 	unsigned int last_bit;
720 
721 	offset %= CHAR_BIT;
722 	/* last bit within byte */
723 	last_bit = RTE_MIN((unsigned int)CHAR_BIT, offset + len);
724 
725 	return ((1 << (CHAR_BIT - offset)) - 1) ^
726 		((1 << (CHAR_BIT - last_bit)) - 1);
727 }
728 
729 static inline void
730 write_unaligned_byte(uint8_t *ptr, unsigned int len,
731 	unsigned int offset, uint8_t val)
732 {
733 	uint8_t tmp;
734 
735 	tmp = ptr[offset / CHAR_BIT];
736 	tmp &= ~get_bits_mask(len, offset);
737 	tmp |= ((val << (CHAR_BIT - len)) >> (offset % CHAR_BIT));
738 	ptr[offset / CHAR_BIT] = tmp;
739 	if (((offset + len) / CHAR_BIT) != (offset / CHAR_BIT)) {
740 		int rest_len = (offset + len) % CHAR_BIT;
741 		tmp = ptr[(offset + len) / CHAR_BIT];
742 		tmp &= ~get_bits_mask(rest_len, 0);
743 		tmp |= val << (CHAR_BIT - rest_len);
744 		ptr[(offset + len) / CHAR_BIT] = tmp;
745 	}
746 }
747 
748 static inline void
749 write_unaligned_bits(uint8_t *ptr, int len, int offset, uint32_t val)
750 {
751 	uint8_t tmp;
752 	unsigned int part_len;
753 
754 	len = RTE_MAX(len, 0);
755 	len = RTE_MIN(len, (int)(sizeof(uint32_t) * CHAR_BIT));
756 
757 	while (len > 0) {
758 		part_len = RTE_MIN(CHAR_BIT, len);
759 		tmp = (uint8_t)val & ((1 << part_len) - 1);
760 		write_unaligned_byte(ptr, part_len,
761 			offset + len - part_len, tmp);
762 		len -= CHAR_BIT;
763 		val >>= CHAR_BIT;
764 	}
765 }
766 
767 int
768 rte_thash_adjust_tuple(struct rte_thash_ctx *ctx,
769 	struct rte_thash_subtuple_helper *h,
770 	uint8_t *tuple, unsigned int tuple_len,
771 	uint32_t desired_value,	unsigned int attempts,
772 	rte_thash_check_tuple_t fn, void *userdata)
773 {
774 	uint32_t tmp_tuple[tuple_len / sizeof(uint32_t)];
775 	unsigned int i, j, ret = 0;
776 	uint32_t hash, adj_bits;
777 	const uint8_t *hash_key;
778 	uint32_t tmp;
779 	int offset;
780 	int tmp_len;
781 
782 	if ((ctx == NULL) || (h == NULL) || (tuple == NULL) ||
783 			(tuple_len % sizeof(uint32_t) != 0) || (attempts <= 0))
784 		return -EINVAL;
785 
786 	hash_key = rte_thash_get_key(ctx);
787 
788 	attempts = RTE_MIN(attempts, 1U << (h->tuple_len - ctx->reta_sz_log));
789 
790 	for (i = 0; i < attempts; i++) {
791 		if (ctx->matrices != NULL)
792 			hash = rte_thash_gfni(ctx->matrices, tuple, tuple_len);
793 		else {
794 			for (j = 0; j < (tuple_len / 4); j++)
795 				tmp_tuple[j] =
796 					rte_be_to_cpu_32(
797 						*(uint32_t *)&tuple[j * 4]);
798 
799 			hash = rte_softrss(tmp_tuple, tuple_len / 4, hash_key);
800 		}
801 
802 		adj_bits = rte_thash_get_complement(h, hash, desired_value);
803 
804 		/*
805 		 * Hint: LSB of adj_bits corresponds to
806 		 * offset + len bit of the subtuple
807 		 */
808 		offset =  h->tuple_offset + h->tuple_len - ctx->reta_sz_log;
809 		tmp = read_unaligned_bits(tuple, ctx->reta_sz_log, offset);
810 		tmp ^= adj_bits;
811 		write_unaligned_bits(tuple, ctx->reta_sz_log, offset, tmp);
812 
813 		if (fn != NULL) {
814 			ret = (fn(userdata, tuple)) ? 0 : -EEXIST;
815 			if (ret == 0)
816 				return 0;
817 			else if (i < (attempts - 1)) {
818 				/* increment subtuple part by 1 */
819 				tmp_len = RTE_MIN(sizeof(uint32_t) * CHAR_BIT,
820 					h->tuple_len - ctx->reta_sz_log);
821 				offset -= tmp_len;
822 				tmp = read_unaligned_bits(tuple, tmp_len,
823 					offset);
824 				tmp++;
825 				tmp &= (1 << tmp_len) - 1;
826 				write_unaligned_bits(tuple, tmp_len, offset,
827 					tmp);
828 			}
829 		} else
830 			return 0;
831 	}
832 
833 	return ret;
834 }
835