xref: /dpdk/lib/acl/acl_gen.c (revision ae67895b507bb6af22263c79ba0d5c374b396485)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <rte_acl.h>
6 #include "acl.h"
7 #include "acl_log.h"
8 
9 #define	QRANGE_MIN	((uint8_t)INT8_MIN)
10 
11 #define	RTE_ACL_VERIFY(exp)	do {                                          \
12 	if (!(exp))                                                           \
13 		rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \
14 } while (0)
15 
16 struct acl_node_counters {
17 	int32_t match;
18 	int32_t match_used;
19 	int32_t single;
20 	int32_t quad;
21 	int32_t quad_vectors;
22 	int32_t dfa;
23 	int32_t dfa_gr64;
24 };
25 
26 struct rte_acl_indices {
27 	int32_t dfa_index;
28 	int32_t quad_index;
29 	int32_t single_index;
30 	int32_t match_index;
31 	int32_t match_start;
32 };
33 
34 static void
acl_gen_log_stats(const struct rte_acl_ctx * ctx,const struct acl_node_counters * counts,const struct rte_acl_indices * indices,size_t max_size)35 acl_gen_log_stats(const struct rte_acl_ctx *ctx,
36 	const struct acl_node_counters *counts,
37 	const struct rte_acl_indices *indices,
38 	size_t max_size)
39 {
40 	RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n"
41 		"runtime memory footprint on socket %d:\n"
42 		"single nodes/bytes used: %d/%zu\n"
43 		"quad nodes/vectors/bytes used: %d/%d/%zu\n"
44 		"DFA nodes/group64/bytes used: %d/%d/%zu\n"
45 		"match nodes/bytes used: %d/%zu\n"
46 		"total: %zu bytes\n"
47 		"max limit: %zu bytes\n",
48 		ctx->name, ctx->socket_id,
49 		counts->single, counts->single * sizeof(uint64_t),
50 		counts->quad, counts->quad_vectors,
51 		(indices->quad_index - indices->dfa_index) * sizeof(uint64_t),
52 		counts->dfa, counts->dfa_gr64,
53 		indices->dfa_index * sizeof(uint64_t),
54 		counts->match,
55 		counts->match * sizeof(struct rte_acl_match_results),
56 		ctx->mem_sz,
57 		max_size);
58 }
59 
60 static uint64_t
acl_dfa_gen_idx(const struct rte_acl_node * node,uint32_t index)61 acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index)
62 {
63 	uint64_t idx;
64 	uint32_t i;
65 
66 	idx = 0;
67 	for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
68 		RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM);
69 		RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout);
70 		idx |= (i - node->dfa_gr64[i]) <<
71 			(6 + RTE_ACL_DFA_GR64_BIT * i);
72 	}
73 
74 	return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type;
75 }
76 
77 static void
acl_dfa_fill_gr64(const struct rte_acl_node * node,const uint64_t src[RTE_ACL_DFA_SIZE],uint64_t dst[RTE_ACL_DFA_SIZE])78 acl_dfa_fill_gr64(const struct rte_acl_node *node,
79 	const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE])
80 {
81 	uint32_t i;
82 
83 	for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) {
84 		memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE,
85 			src + i * RTE_ACL_DFA_GR64_SIZE,
86 			RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0]));
87 	}
88 }
89 
90 static uint32_t
acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE],uint8_t gr64[RTE_ACL_DFA_GR64_NUM])91 acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE],
92 	uint8_t gr64[RTE_ACL_DFA_GR64_NUM])
93 {
94 	uint32_t i, j, k;
95 
96 	k = 0;
97 	for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) {
98 		gr64[i] = i;
99 		for (j = 0; j != i; j++) {
100 			if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE,
101 					array_ptr + j * RTE_ACL_DFA_GR64_SIZE,
102 					RTE_ACL_DFA_GR64_SIZE *
103 					sizeof(array_ptr[0])) == 0)
104 				break;
105 		}
106 		gr64[i] = (j != i) ? gr64[j] : k++;
107 	}
108 
109 	return k;
110 }
111 
112 static uint32_t
acl_node_fill_dfa(const struct rte_acl_node * node,uint64_t dfa[RTE_ACL_DFA_SIZE],uint64_t no_match,int32_t resolved)113 acl_node_fill_dfa(const struct rte_acl_node *node,
114 	uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved)
115 {
116 	uint32_t n, x;
117 	uint32_t ranges, last_bit;
118 	struct rte_acl_node *child;
119 	struct rte_acl_bitset *bits;
120 
121 	ranges = 0;
122 	last_bit = 0;
123 
124 	for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
125 		dfa[n] = no_match;
126 
127 	for (x = 0; x < node->num_ptrs; x++) {
128 
129 		child = node->ptrs[x].ptr;
130 		if (child == NULL)
131 			continue;
132 
133 		bits = &node->ptrs[x].values;
134 		for (n = 0; n < RTE_ACL_DFA_SIZE; n++) {
135 
136 			if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] &
137 				(1U << (n % (sizeof(bits_t) * CHAR_BIT)))) {
138 
139 				dfa[n] = resolved ? child->node_index : x;
140 				ranges += (last_bit == 0);
141 				last_bit = 1;
142 			} else {
143 				last_bit = 0;
144 			}
145 		}
146 	}
147 
148 	return ranges;
149 }
150 
151 /*
152  * Count the number of groups of sequential bits that are either 0 or 1,
153  * as specified by the zero_one parameter.
154  * This is used to calculate the number of ranges in a node
155  * to see if it fits in a quad range node.
156  */
157 static int
acl_count_sequential_groups(struct rte_acl_bitset * bits,int zero_one)158 acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one)
159 {
160 	int n, ranges, last_bit;
161 
162 	ranges = 0;
163 	last_bit = zero_one ^ 1;
164 
165 	for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) {
166 		if (bits->bits[n / (sizeof(bits_t) * 8)] &
167 				(1U << (n % (sizeof(bits_t) * 8)))) {
168 			if (zero_one == 1 && last_bit != 1)
169 				ranges++;
170 			last_bit = 1;
171 		} else {
172 			if (zero_one == 0 && last_bit != 0)
173 				ranges++;
174 			last_bit = 0;
175 		}
176 	}
177 	for (n = 0; n < QRANGE_MIN; n++) {
178 		if (bits->bits[n / (sizeof(bits_t) * 8)] &
179 				(1U << (n % (sizeof(bits_t) * CHAR_BIT)))) {
180 			if (zero_one == 1 && last_bit != 1)
181 				ranges++;
182 			last_bit = 1;
183 		} else {
184 			if (zero_one == 0 && last_bit != 0)
185 				ranges++;
186 			last_bit = 0;
187 		}
188 	}
189 
190 	return ranges;
191 }
192 
193 /*
194  * Count number of ranges spanned by the node's pointers
195  */
196 static int
acl_count_fanout(struct rte_acl_node * node)197 acl_count_fanout(struct rte_acl_node *node)
198 {
199 	uint32_t n;
200 	int ranges;
201 
202 	if (node->fanout != 0)
203 		return node->fanout;
204 
205 	ranges = acl_count_sequential_groups(&node->values, 0);
206 
207 	for (n = 0; n < node->num_ptrs; n++) {
208 		if (node->ptrs[n].ptr != NULL)
209 			ranges += acl_count_sequential_groups(
210 				&node->ptrs[n].values, 1);
211 	}
212 
213 	node->fanout = ranges;
214 	return node->fanout;
215 }
216 
217 /*
218  * Determine the type of nodes and count each type
219  */
220 static void
acl_count_trie_types(struct acl_node_counters * counts,struct rte_acl_node * node,uint64_t no_match,int force_dfa)221 acl_count_trie_types(struct acl_node_counters *counts,
222 	struct rte_acl_node *node, uint64_t no_match, int force_dfa)
223 {
224 	uint32_t n;
225 	int num_ptrs;
226 	uint64_t dfa[RTE_ACL_DFA_SIZE];
227 
228 	/* skip if this node has been counted */
229 	if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED)
230 		return;
231 
232 	if (node->match_flag != 0 || node->num_ptrs == 0) {
233 		counts->match++;
234 		node->node_type = RTE_ACL_NODE_MATCH;
235 		return;
236 	}
237 
238 	num_ptrs = acl_count_fanout(node);
239 
240 	/* Force type to dfa */
241 	if (force_dfa)
242 		num_ptrs = RTE_ACL_DFA_SIZE;
243 
244 	/* determine node type based on number of ranges */
245 	if (num_ptrs == 1) {
246 		counts->single++;
247 		node->node_type = RTE_ACL_NODE_SINGLE;
248 	} else if (num_ptrs <= RTE_ACL_QUAD_MAX) {
249 		counts->quad++;
250 		counts->quad_vectors += node->fanout;
251 		node->node_type = RTE_ACL_NODE_QRANGE;
252 	} else {
253 		counts->dfa++;
254 		node->node_type = RTE_ACL_NODE_DFA;
255 		if (force_dfa != 0) {
256 			/* always expand to a max number of nodes. */
257 			for (n = 0; n != RTE_DIM(node->dfa_gr64); n++)
258 				node->dfa_gr64[n] = n;
259 			node->fanout = n;
260 		} else {
261 			acl_node_fill_dfa(node, dfa, no_match, 0);
262 			node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64);
263 		}
264 		counts->dfa_gr64 += node->fanout;
265 	}
266 
267 	/*
268 	 * recursively count the types of all children
269 	 */
270 	for (n = 0; n < node->num_ptrs; n++) {
271 		if (node->ptrs[n].ptr != NULL)
272 			acl_count_trie_types(counts, node->ptrs[n].ptr,
273 				no_match, 0);
274 	}
275 }
276 
277 static void
acl_add_ptrs(struct rte_acl_node * node,uint64_t * node_array,uint64_t no_match,int resolved)278 acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match,
279 	int resolved)
280 {
281 	uint32_t x;
282 	int32_t m;
283 	uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE];
284 
285 	acl_node_fill_dfa(node, dfa, no_match, resolved);
286 
287 	/*
288 	 * Rather than going from 0 to 256, the range count and
289 	 * the layout are from 80-ff then 0-7f due to signed compare
290 	 * for SSE (cmpgt).
291 	 */
292 	if (node->node_type == RTE_ACL_NODE_QRANGE) {
293 
294 		m = 0;
295 		node_a = node_array;
296 		index = dfa[QRANGE_MIN];
297 		*node_a++ = index;
298 
299 		for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) {
300 			if (dfa[x] != index) {
301 				index = dfa[x];
302 				*node_a++ = index;
303 				node->transitions[m++] = (uint8_t)(x - 1);
304 			}
305 		}
306 
307 		for (x = 0; x < INT8_MAX + 1; x++) {
308 			if (dfa[x] != index) {
309 				index = dfa[x];
310 				*node_a++ = index;
311 				node->transitions[m++] = (uint8_t)(x - 1);
312 			}
313 		}
314 
315 		/* fill unused locations with max value - nothing is greater */
316 		for (; m < RTE_ACL_QUAD_SIZE; m++)
317 			node->transitions[m] = INT8_MAX;
318 
319 		RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE);
320 
321 	} else if (node->node_type == RTE_ACL_NODE_DFA && resolved) {
322 		acl_dfa_fill_gr64(node, dfa, node_array);
323 	}
324 }
325 
326 /*
327  * Routine that allocates space for this node and recursively calls
328  * to allocate space for each child. Once all the children are allocated,
329  * then resolve all transitions for this node.
330  */
331 static void
acl_gen_node(struct rte_acl_node * node,uint64_t * node_array,uint64_t no_match,struct rte_acl_indices * index,int num_categories)332 acl_gen_node(struct rte_acl_node *node, uint64_t *node_array,
333 	uint64_t no_match, struct rte_acl_indices *index, int num_categories)
334 {
335 	uint32_t n, sz, *qtrp;
336 	uint64_t *array_ptr;
337 	struct rte_acl_match_results *match;
338 
339 	if (node->node_index != RTE_ACL_NODE_UNDEFINED)
340 		return;
341 
342 	array_ptr = NULL;
343 
344 	switch (node->node_type) {
345 	case RTE_ACL_NODE_DFA:
346 		array_ptr = &node_array[index->dfa_index];
347 		node->node_index = acl_dfa_gen_idx(node, index->dfa_index);
348 		sz = node->fanout * RTE_ACL_DFA_GR64_SIZE;
349 		index->dfa_index += sz;
350 		for (n = 0; n < sz; n++)
351 			array_ptr[n] = no_match;
352 		break;
353 	case RTE_ACL_NODE_SINGLE:
354 		node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index |
355 			node->node_type;
356 		array_ptr = &node_array[index->single_index];
357 		index->single_index += 1;
358 		array_ptr[0] = no_match;
359 		break;
360 	case RTE_ACL_NODE_QRANGE:
361 		array_ptr = &node_array[index->quad_index];
362 		acl_add_ptrs(node, array_ptr, no_match, 0);
363 		qtrp = (uint32_t *)node->transitions;
364 		node->node_index = qtrp[0];
365 		node->node_index <<= sizeof(index->quad_index) * CHAR_BIT;
366 		node->node_index |= index->quad_index | node->node_type;
367 		index->quad_index += node->fanout;
368 		break;
369 	case RTE_ACL_NODE_MATCH:
370 		match = ((struct rte_acl_match_results *)
371 			(node_array + index->match_start));
372 		for (n = 0; n != RTE_DIM(match->results); n++)
373 			RTE_ACL_VERIFY(match->results[0] == 0);
374 		memcpy(match + index->match_index, node->mrt,
375 			sizeof(*node->mrt));
376 		node->node_index = index->match_index | node->node_type;
377 		index->match_index += 1;
378 		break;
379 	case RTE_ACL_NODE_UNDEFINED:
380 		RTE_ACL_VERIFY(node->node_type !=
381 			(uint32_t)RTE_ACL_NODE_UNDEFINED);
382 		break;
383 	}
384 
385 	/* recursively allocate space for all children */
386 	for (n = 0; n < node->num_ptrs; n++) {
387 		if (node->ptrs[n].ptr != NULL)
388 			acl_gen_node(node->ptrs[n].ptr,
389 				node_array,
390 				no_match,
391 				index,
392 				num_categories);
393 	}
394 
395 	/* All children are resolved, resolve this node's pointers */
396 	switch (node->node_type) {
397 	case RTE_ACL_NODE_DFA:
398 		acl_add_ptrs(node, array_ptr, no_match, 1);
399 		break;
400 	case RTE_ACL_NODE_SINGLE:
401 		for (n = 0; n < node->num_ptrs; n++) {
402 			if (node->ptrs[n].ptr != NULL)
403 				array_ptr[0] = node->ptrs[n].ptr->node_index;
404 		}
405 		break;
406 	case RTE_ACL_NODE_QRANGE:
407 		acl_add_ptrs(node, array_ptr, no_match, 1);
408 		break;
409 	case RTE_ACL_NODE_MATCH:
410 		break;
411 	case RTE_ACL_NODE_UNDEFINED:
412 		RTE_ACL_VERIFY(node->node_type !=
413 			(uint32_t)RTE_ACL_NODE_UNDEFINED);
414 		break;
415 	}
416 }
417 
418 static void
acl_calc_counts_indices(struct acl_node_counters * counts,struct rte_acl_indices * indices,struct rte_acl_bld_trie * node_bld_trie,uint32_t num_tries,uint64_t no_match)419 acl_calc_counts_indices(struct acl_node_counters *counts,
420 	struct rte_acl_indices *indices,
421 	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
422 	uint64_t no_match)
423 {
424 	uint32_t n;
425 
426 	memset(indices, 0, sizeof(*indices));
427 	memset(counts, 0, sizeof(*counts));
428 
429 	/* Get stats on nodes */
430 	for (n = 0; n < num_tries; n++) {
431 		acl_count_trie_types(counts, node_bld_trie[n].trie,
432 			no_match, 1);
433 	}
434 
435 	indices->dfa_index = RTE_ACL_DFA_SIZE + 1;
436 	indices->quad_index = indices->dfa_index +
437 		counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE;
438 	indices->single_index = indices->quad_index + counts->quad_vectors;
439 	indices->match_start = indices->single_index + counts->single + 1;
440 	indices->match_start = RTE_ALIGN(indices->match_start,
441 		(XMM_SIZE / sizeof(uint64_t)));
442 	indices->match_index = 1;
443 }
444 
445 /*
446  * Generate the runtime structure using build structure
447  */
448 int
rte_acl_gen(struct rte_acl_ctx * ctx,struct rte_acl_trie * trie,struct rte_acl_bld_trie * node_bld_trie,uint32_t num_tries,uint32_t num_categories,uint32_t data_index_sz,size_t max_size)449 rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
450 	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
451 	uint32_t num_categories, uint32_t data_index_sz, size_t max_size)
452 {
453 	void *mem;
454 	size_t total_size;
455 	uint64_t *node_array, no_match;
456 	uint32_t n, match_index;
457 	struct rte_acl_match_results *match;
458 	struct acl_node_counters counts;
459 	struct rte_acl_indices indices;
460 
461 	no_match = RTE_ACL_NODE_MATCH;
462 
463 	/* Fill counts and indices arrays from the nodes. */
464 	acl_calc_counts_indices(&counts, &indices,
465 		node_bld_trie, num_tries, no_match);
466 
467 	/* Allocate runtime memory (align to cache boundary) */
468 	total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) +
469 		indices.match_start * sizeof(uint64_t) +
470 		(counts.match + 1) * sizeof(struct rte_acl_match_results) +
471 		XMM_SIZE;
472 
473 	if (total_size > max_size) {
474 		ACL_LOG(DEBUG,
475 			"Gen phase for ACL ctx \"%s\" exceeds max_size limit, "
476 			"bytes required: %zu, allowed: %zu",
477 			ctx->name, total_size, max_size);
478 		return -ERANGE;
479 	}
480 
481 	mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE,
482 			ctx->socket_id);
483 	if (mem == NULL) {
484 		ACL_LOG(ERR,
485 			"allocation of %zu bytes on socket %d for %s failed",
486 			total_size, ctx->socket_id, ctx->name);
487 		return -ENOMEM;
488 	}
489 
490 	/* Fill the runtime structure */
491 	match_index = indices.match_start;
492 	node_array = (uint64_t *)((uintptr_t)mem +
493 		RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE));
494 
495 	/*
496 	 * Setup the NOMATCH node (a SINGLE at the
497 	 * highest index, that points to itself)
498 	 */
499 
500 	node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_IDLE_NODE;
501 
502 	for (n = 0; n < RTE_ACL_DFA_SIZE; n++)
503 		node_array[n] = no_match;
504 
505 	/* NOMATCH result at index 0 */
506 	match = ((struct rte_acl_match_results *)(node_array + match_index));
507 	memset(match, 0, sizeof(*match));
508 
509 	for (n = 0; n < num_tries; n++) {
510 
511 		acl_gen_node(node_bld_trie[n].trie, node_array, no_match,
512 			&indices, num_categories);
513 
514 		if (node_bld_trie[n].trie->node_index == no_match)
515 			trie[n].root_index = 0;
516 		else
517 			trie[n].root_index = node_bld_trie[n].trie->node_index;
518 	}
519 
520 	ctx->mem = mem;
521 	ctx->mem_sz = total_size;
522 	ctx->data_indexes = mem;
523 	ctx->num_tries = num_tries;
524 	ctx->num_categories = num_categories;
525 	ctx->match_index = match_index;
526 	ctx->no_match = no_match;
527 	ctx->idle = node_array[RTE_ACL_DFA_SIZE];
528 	ctx->trans_table = node_array;
529 	memcpy(ctx->trie, trie, sizeof(ctx->trie));
530 
531 	acl_gen_log_stats(ctx, &counts, &indices, max_size);
532 	return 0;
533 }
534