xref: /dpdk/lib/acl/acl_gen.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <rte_acl.h>
6 #include "acl.h"
7 
8 #define	QRANGE_MIN	((uint8_t)INT8_MIN)
9 
10 #define	RTE_ACL_VERIFY(exp)	do {                                          \
11 	if (!(exp))                                                           \
12 		rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
13 } while (0)
14 
15 struct acl_node_counters {
16 	int32_t match;
17 	int32_t match_used;
18 	int32_t single;
19 	int32_t quad;
20 	int32_t quad_vectors;
21 	int32_t dfa;
22 	int32_t dfa_gr64;
23 };
24 
25 struct rte_acl_indices {
26 	int32_t dfa_index;
27 	int32_t quad_index;
28 	int32_t single_index;
29 	int32_t match_index;
30 	int32_t match_start;
31 };
32 
33 static void
34 acl_gen_log_stats(const struct rte_acl_ctx *ctx,
35 	const struct acl_node_counters *counts,
36 	const struct rte_acl_indices *indices,
37 	size_t max_size)
38 {
39 	RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
40 		"runtime memory footprint on socket %d:\n"
41 		"single nodes/bytes used: %d/%zu\n"
42 		"quad nodes/vectors/bytes used: %d/%d/%zu\n"
43 		"DFA nodes/group64/bytes used: %d/%d/%zu\n"
44 		"match nodes/bytes used: %d/%zu\n"
45 		"total: %zu bytes\n"
46 		"max limit: %zu bytes\n",
47 		ctx->name, ctx->socket_id,
48 		counts->single, counts->single * sizeof(uint64_t),
49 		counts->quad, counts->quad_vectors,
50 		(indices->quad_index - indices->dfa_index) * sizeof(uint64_t),
51 		counts->dfa, counts->dfa_gr64,
52 		indices->dfa_index * sizeof(uint64_t),
53 		counts->match,
54 		counts->match * sizeof(struct rte_acl_match_results),
55 		ctx->mem_sz,
56 		max_size);
57 }
58 
59 static uint64_t
60 acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index)
61 {
62 	uint64_t idx;
63 	uint32_t i;
64 
65 	idx = 0;
66 	for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
67 		RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM);
68 		RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout);
69 		idx |= (i - node->dfa_gr64[i]) <<
70 			(6 + RTE_ACL_DFA_GR64_BIT * i);
71 	}
72 
73 	return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type;
74 }
75 
76 static void
77 acl_dfa_fill_gr64(const struct rte_acl_node *node,
78 	const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE])
79 {
80 	uint32_t i;
81 
82 	for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
83 		memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE,
84 			src + i * RTE_ACL_DFA_GR64_SIZE,
85 			RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0]));
86 	}
87 }
88 
89 static uint32_t
90 acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE],
91 	uint8_t gr64[RTE_ACL_DFA_GR64_NUM])
92 {
93 	uint32_t i, j, k;
94 
95 	k = 0;
96 	for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) {
97 		gr64[i] = i;
98 		for (j = 0; j != i; j++) {
99 			if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE,
100 					array_ptr + j * RTE_ACL_DFA_GR64_SIZE,
101 					RTE_ACL_DFA_GR64_SIZE *
102 					sizeof(array_ptr[0])) == 0)
103 				break;
104 		}
105 		gr64[i] = (j != i) ? gr64[j] : k++;
106 	}
107 
108 	return k;
109 }
110 
111 static uint32_t
112 acl_node_fill_dfa(const struct rte_acl_node *node,
113 	uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved)
114 {
115 	uint32_t n, x;
116 	uint32_t ranges, last_bit;
117 	struct rte_acl_node *child;
118 	struct rte_acl_bitset *bits;
119 
120 	ranges = 0;
121 	last_bit = 0;
122 
123 	for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
124 		dfa[n] = no_match;
125 
126 	for (x = 0; x < node->num_ptrs; x++) {
127 
128 		child = node->ptrs[x].ptr;
129 		if (child == NULL)
130 			continue;
131 
132 		bits = &node->ptrs[x].values;
133 		for (n = 0; n < RTE_ACL_DFA_SIZE; n++) {
134 
135 			if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] &
136 				(1U << (n % (sizeof(bits_t) * CHAR_BIT)))) {
137 
138 				dfa[n] = resolved ? child->node_index : x;
139 				ranges += (last_bit == 0);
140 				last_bit = 1;
141 			} else {
142 				last_bit = 0;
143 			}
144 		}
145 	}
146 
147 	return ranges;
148 }
149 
150 /*
151 *  Counts the number of groups of sequential bits that are
152 *  either 0 or 1, as specified by the zero_one parameter. This is used to
153 *  calculate the number of ranges in a node to see if it fits in a quad range
154 *  node.
155 */
156 static int
157 acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one)
158 {
159 	int n, ranges, last_bit;
160 
161 	ranges = 0;
162 	last_bit = zero_one ^ 1;
163 
164 	for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) {
165 		if (bits->bits[n / (sizeof(bits_t) * 8)] &
166 				(1U << (n % (sizeof(bits_t) * 8)))) {
167 			if (zero_one == 1 && last_bit != 1)
168 				ranges++;
169 			last_bit = 1;
170 		} else {
171 			if (zero_one == 0 && last_bit != 0)
172 				ranges++;
173 			last_bit = 0;
174 		}
175 	}
176 	for (n = 0; n < QRANGE_MIN; n++) {
177 		if (bits->bits[n / (sizeof(bits_t) * 8)] &
178 				(1U << (n % (sizeof(bits_t) * CHAR_BIT)))) {
179 			if (zero_one == 1 && last_bit != 1)
180 				ranges++;
181 			last_bit = 1;
182 		} else {
183 			if (zero_one == 0 && last_bit != 0)
184 				ranges++;
185 			last_bit = 0;
186 		}
187 	}
188 
189 	return ranges;
190 }
191 
192 /*
193  * Count number of ranges spanned by the node's pointers
194  */
195 static int
196 acl_count_fanout(struct rte_acl_node *node)
197 {
198 	uint32_t n;
199 	int ranges;
200 
201 	if (node->fanout != 0)
202 		return node->fanout;
203 
204 	ranges = acl_count_sequential_groups(&node->values, 0);
205 
206 	for (n = 0; n < node->num_ptrs; n++) {
207 		if (node->ptrs[n].ptr != NULL)
208 			ranges += acl_count_sequential_groups(
209 				&node->ptrs[n].values, 1);
210 	}
211 
212 	node->fanout = ranges;
213 	return node->fanout;
214 }
215 
216 /*
217  * Determine the type of nodes and count each type
218  */
219 static void
220 acl_count_trie_types(struct acl_node_counters *counts,
221 	struct rte_acl_node *node, uint64_t no_match, int force_dfa)
222 {
223 	uint32_t n;
224 	int num_ptrs;
225 	uint64_t dfa[RTE_ACL_DFA_SIZE];
226 
227 	/* skip if this node has been counted */
228 	if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED)
229 		return;
230 
231 	if (node->match_flag != 0 || node->num_ptrs == 0) {
232 		counts->match++;
233 		node->node_type = RTE_ACL_NODE_MATCH;
234 		return;
235 	}
236 
237 	num_ptrs = acl_count_fanout(node);
238 
239 	/* Force type to dfa */
240 	if (force_dfa)
241 		num_ptrs = RTE_ACL_DFA_SIZE;
242 
243 	/* determine node type based on number of ranges */
244 	if (num_ptrs == 1) {
245 		counts->single++;
246 		node->node_type = RTE_ACL_NODE_SINGLE;
247 	} else if (num_ptrs <= RTE_ACL_QUAD_MAX) {
248 		counts->quad++;
249 		counts->quad_vectors += node->fanout;
250 		node->node_type = RTE_ACL_NODE_QRANGE;
251 	} else {
252 		counts->dfa++;
253 		node->node_type = RTE_ACL_NODE_DFA;
254 		if (force_dfa != 0) {
255 			/* always expand to a max number of nodes. */
256 			for (n = 0; n != RTE_DIM(node->dfa_gr64); n++)
257 				node->dfa_gr64[n] = n;
258 			node->fanout = n;
259 		} else {
260 			acl_node_fill_dfa(node, dfa, no_match, 0);
261 			node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64);
262 		}
263 		counts->dfa_gr64 += node->fanout;
264 	}
265 
266 	/*
267 	 * recursively count the types of all children
268 	 */
269 	for (n = 0; n < node->num_ptrs; n++) {
270 		if (node->ptrs[n].ptr != NULL)
271 			acl_count_trie_types(counts, node->ptrs[n].ptr,
272 				no_match, 0);
273 	}
274 }
275 
276 static void
277 acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match,
278 	int resolved)
279 {
280 	uint32_t x;
281 	int32_t m;
282 	uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE];
283 
284 	acl_node_fill_dfa(node, dfa, no_match, resolved);
285 
286 	/*
287 	 * Rather than going from 0 to 256, the range count and
288 	 * the layout are from 80-ff then 0-7f due to signed compare
289 	 * for SSE (cmpgt).
290 	 */
291 	if (node->node_type == RTE_ACL_NODE_QRANGE) {
292 
293 		m = 0;
294 		node_a = node_array;
295 		index = dfa[QRANGE_MIN];
296 		*node_a++ = index;
297 
298 		for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) {
299 			if (dfa[x] != index) {
300 				index = dfa[x];
301 				*node_a++ = index;
302 				node->transitions[m++] = (uint8_t)(x - 1);
303 			}
304 		}
305 
306 		for (x = 0; x < INT8_MAX + 1; x++) {
307 			if (dfa[x] != index) {
308 				index = dfa[x];
309 				*node_a++ = index;
310 				node->transitions[m++] = (uint8_t)(x - 1);
311 			}
312 		}
313 
314 		/* fill unused locations with max value - nothing is greater */
315 		for (; m < RTE_ACL_QUAD_SIZE; m++)
316 			node->transitions[m] = INT8_MAX;
317 
318 		RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE);
319 
320 	} else if (node->node_type == RTE_ACL_NODE_DFA && resolved) {
321 		acl_dfa_fill_gr64(node, dfa, node_array);
322 	}
323 }
324 
325 /*
326  * Routine that allocates space for this node and recursively calls
327  * to allocate space for each child. Once all the children are allocated,
328  * then resolve all transitions for this node.
329  */
330 static void
331 acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
332 	uint64_t no_match, struct rte_acl_indices *index, int num_categories)
333 {
334 	uint32_t n, sz, *qtrp;
335 	uint64_t *array_ptr;
336 	struct rte_acl_match_results *match;
337 
338 	if (node->node_index != RTE_ACL_NODE_UNDEFINED)
339 		return;
340 
341 	array_ptr = NULL;
342 
343 	switch (node->node_type) {
344 	case RTE_ACL_NODE_DFA:
345 		array_ptr = &node_array[index->dfa_index];
346 		node->node_index = acl_dfa_gen_idx(node, index->dfa_index);
347 		sz = node->fanout * RTE_ACL_DFA_GR64_SIZE;
348 		index->dfa_index += sz;
349 		for (n = 0; n < sz; n++)
350 			array_ptr[n] = no_match;
351 		break;
352 	case RTE_ACL_NODE_SINGLE:
353 		node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index |
354 			node->node_type;
355 		array_ptr = &node_array[index->single_index];
356 		index->single_index += 1;
357 		array_ptr[0] = no_match;
358 		break;
359 	case RTE_ACL_NODE_QRANGE:
360 		array_ptr = &node_array[index->quad_index];
361 		acl_add_ptrs(node, array_ptr, no_match, 0);
362 		qtrp = (uint32_t *)node->transitions;
363 		node->node_index = qtrp[0];
364 		node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
365 		node->node_index |= index->quad_index | node->node_type;
366 		index->quad_index += node->fanout;
367 		break;
368 	case RTE_ACL_NODE_MATCH:
369 		match = ((struct rte_acl_match_results *)
370 			(node_array + index->match_start));
371 		for (n = 0; n != RTE_DIM(match->results); n++)
372 			RTE_ACL_VERIFY(match->results[0] == 0);
373 		memcpy(match + index->match_index, node->mrt,
374 			sizeof(*node->mrt));
375 		node->node_index = index->match_index | node->node_type;
376 		index->match_index += 1;
377 		break;
378 	case RTE_ACL_NODE_UNDEFINED:
379 		RTE_ACL_VERIFY(node->node_type !=
380 			(uint32_t)RTE_ACL_NODE_UNDEFINED);
381 		break;
382 	}
383 
384 	/* recursively allocate space for all children */
385 	for (n = 0; n < node->num_ptrs; n++) {
386 		if (node->ptrs[n].ptr != NULL)
387 			acl_gen_node(node->ptrs[n].ptr,
388 				node_array,
389 				no_match,
390 				index,
391 				num_categories);
392 	}
393 
394 	/* All children are resolved, resolve this node's pointers */
395 	switch (node->node_type) {
396 	case RTE_ACL_NODE_DFA:
397 		acl_add_ptrs(node, array_ptr, no_match, 1);
398 		break;
399 	case RTE_ACL_NODE_SINGLE:
400 		for (n = 0; n < node->num_ptrs; n++) {
401 			if (node->ptrs[n].ptr != NULL)
402 				array_ptr[0] = node->ptrs[n].ptr->node_index;
403 		}
404 		break;
405 	case RTE_ACL_NODE_QRANGE:
406 		acl_add_ptrs(node, array_ptr, no_match, 1);
407 		break;
408 	case RTE_ACL_NODE_MATCH:
409 		break;
410 	case RTE_ACL_NODE_UNDEFINED:
411 		RTE_ACL_VERIFY(node->node_type !=
412 			(uint32_t)RTE_ACL_NODE_UNDEFINED);
413 		break;
414 	}
415 }
416 
417 static void
418 acl_calc_counts_indices(struct acl_node_counters *counts,
419 	struct rte_acl_indices *indices,
420 	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
421 	uint64_t no_match)
422 {
423 	uint32_t n;
424 
425 	memset(indices, 0, sizeof(*indices));
426 	memset(counts, 0, sizeof(*counts));
427 
428 	/* Get stats on nodes */
429 	for (n = 0; n < num_tries; n++) {
430 		acl_count_trie_types(counts, node_bld_trie[n].trie,
431 			no_match, 1);
432 	}
433 
434 	indices->dfa_index = RTE_ACL_DFA_SIZE + 1;
435 	indices->quad_index = indices->dfa_index +
436 		counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE;
437 	indices->single_index = indices->quad_index + counts->quad_vectors;
438 	indices->match_start = indices->single_index + counts->single + 1;
439 	indices->match_start = RTE_ALIGN(indices->match_start,
440 		(XMM_SIZE / sizeof(uint64_t)));
441 	indices->match_index = 1;
442 }
443 
444 /*
445  * Generate the runtime structure using build structure
446  */
447 int
448 rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
449 	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
450 	uint32_t num_categories, uint32_t data_index_sz, size_t max_size)
451 {
452 	void *mem;
453 	size_t total_size;
454 	uint64_t *node_array, no_match;
455 	uint32_t n, match_index;
456 	struct rte_acl_match_results *match;
457 	struct acl_node_counters counts;
458 	struct rte_acl_indices indices;
459 
460 	no_match = RTE_ACL_NODE_MATCH;
461 
462 	/* Fill counts and indices arrays from the nodes. */
463 	acl_calc_counts_indices(&counts, &indices,
464 		node_bld_trie, num_tries, no_match);
465 
466 	/* Allocate runtime memory (align to cache boundary) */
467 	total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) +
468 		indices.match_start * sizeof(uint64_t) +
469 		(counts.match + 1) * sizeof(struct rte_acl_match_results) +
470 		XMM_SIZE;
471 
472 	if (total_size > max_size) {
473 		RTE_LOG(DEBUG, ACL,
474 			"Gen phase for ACL ctx \"%s\" exceeds max_size limit, "
475 			"bytes required: %zu, allowed: %zu\n",
476 			ctx->name, total_size, max_size);
477 		return -ERANGE;
478 	}
479 
480 	mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
481 			ctx->socket_id);
482 	if (mem == NULL) {
483 		RTE_LOG(ERR, ACL,
484 			"allocation of %zu bytes on socket %d for %s failed\n",
485 			total_size, ctx->socket_id, ctx->name);
486 		return -ENOMEM;
487 	}
488 
489 	/* Fill the runtime structure */
490 	match_index = indices.match_start;
491 	node_array = (uint64_t *)((uintptr_t)mem +
492 		RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE));
493 
494 	/*
495 	 * Setup the NOMATCH node (a SINGLE at the
496 	 * highest index, that points to itself)
497 	 */
498 
499 	node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_IDLE_NODE;
500 
501 	for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
502 		node_array[n] = no_match;
503 
504 	/* NOMATCH result at index 0 */
505 	match = ((struct rte_acl_match_results *)(node_array + match_index));
506 	memset(match, 0, sizeof(*match));
507 
508 	for (n = 0; n < num_tries; n++) {
509 
510 		acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
511 			&indices, num_categories);
512 
513 		if (node_bld_trie[n].trie->node_index == no_match)
514 			trie[n].root_index = 0;
515 		else
516 			trie[n].root_index = node_bld_trie[n].trie->node_index;
517 	}
518 
519 	ctx->mem = mem;
520 	ctx->mem_sz = total_size;
521 	ctx->data_indexes = mem;
522 	ctx->num_tries = num_tries;
523 	ctx->num_categories = num_categories;
524 	ctx->match_index = match_index;
525 	ctx->no_match = no_match;
526 	ctx->idle = node_array[RTE_ACL_DFA_SIZE];
527 	ctx->trans_table = node_array;
528 	memcpy(ctx->trie, trie, sizeof(ctx->trie));
529 
530 	acl_gen_log_stats(ctx, &counts, &indices, max_size);
531 	return 0;
532 }
533