1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <rte_acl.h> 6 #include "acl.h" 7 #include "acl_log.h" 8 9 #define QRANGE_MIN ((uint8_t)INT8_MIN) 10 11 #define RTE_ACL_VERIFY(exp) do { \ 12 if (!(exp)) \ 13 rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ 14 } while (0) 15 16 struct acl_node_counters { 17 int32_t match; 18 int32_t match_used; 19 int32_t single; 20 int32_t quad; 21 int32_t quad_vectors; 22 int32_t dfa; 23 int32_t dfa_gr64; 24 }; 25 26 struct rte_acl_indices { 27 int32_t dfa_index; 28 int32_t quad_index; 29 int32_t single_index; 30 int32_t match_index; 31 int32_t match_start; 32 }; 33 34 static void 35 acl_gen_log_stats(const struct rte_acl_ctx *ctx, 36 const struct acl_node_counters *counts, 37 const struct rte_acl_indices *indices, 38 size_t max_size) 39 { 40 RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n" 41 "runtime memory footprint on socket %d:\n" 42 "single nodes/bytes used: %d/%zu\n" 43 "quad nodes/vectors/bytes used: %d/%d/%zu\n" 44 "DFA nodes/group64/bytes used: %d/%d/%zu\n" 45 "match nodes/bytes used: %d/%zu\n" 46 "total: %zu bytes\n" 47 "max limit: %zu bytes\n", 48 ctx->name, ctx->socket_id, 49 counts->single, counts->single * sizeof(uint64_t), 50 counts->quad, counts->quad_vectors, 51 (indices->quad_index - indices->dfa_index) * sizeof(uint64_t), 52 counts->dfa, counts->dfa_gr64, 53 indices->dfa_index * sizeof(uint64_t), 54 counts->match, 55 counts->match * sizeof(struct rte_acl_match_results), 56 ctx->mem_sz, 57 max_size); 58 } 59 60 static uint64_t 61 acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index) 62 { 63 uint64_t idx; 64 uint32_t i; 65 66 idx = 0; 67 for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { 68 RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM); 69 RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout); 70 idx |= (i - node->dfa_gr64[i]) << 71 (6 + RTE_ACL_DFA_GR64_BIT * i); 72 } 73 74 return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type; 75 } 76 77 static void 78 acl_dfa_fill_gr64(const struct rte_acl_node *node, 79 const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE]) 80 { 81 uint32_t i; 82 83 for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { 84 memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE, 85 src + i * RTE_ACL_DFA_GR64_SIZE, 86 RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0])); 87 } 88 } 89 90 static uint32_t 91 acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE], 92 uint8_t gr64[RTE_ACL_DFA_GR64_NUM]) 93 { 94 uint32_t i, j, k; 95 96 k = 0; 97 for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) { 98 gr64[i] = i; 99 for (j = 0; j != i; j++) { 100 if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE, 101 array_ptr + j * RTE_ACL_DFA_GR64_SIZE, 102 RTE_ACL_DFA_GR64_SIZE * 103 sizeof(array_ptr[0])) == 0) 104 break; 105 } 106 gr64[i] = (j != i) ? gr64[j] : k++; 107 } 108 109 return k; 110 } 111 112 static uint32_t 113 acl_node_fill_dfa(const struct rte_acl_node *node, 114 uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved) 115 { 116 uint32_t n, x; 117 uint32_t ranges, last_bit; 118 struct rte_acl_node *child; 119 struct rte_acl_bitset *bits; 120 121 ranges = 0; 122 last_bit = 0; 123 124 for (n = 0; n < RTE_ACL_DFA_SIZE; n++) 125 dfa[n] = no_match; 126 127 for (x = 0; x < node->num_ptrs; x++) { 128 129 child = node->ptrs[x].ptr; 130 if (child == NULL) 131 continue; 132 133 bits = &node->ptrs[x].values; 134 for (n = 0; n < RTE_ACL_DFA_SIZE; n++) { 135 136 if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] & 137 (1U << (n % (sizeof(bits_t) * CHAR_BIT)))) { 138 139 dfa[n] = resolved ? child->node_index : x; 140 ranges += (last_bit == 0); 141 last_bit = 1; 142 } else { 143 last_bit = 0; 144 } 145 } 146 } 147 148 return ranges; 149 } 150 151 /* 152 * Count the number of groups of sequential bits that are either 0 or 1, 153 * as specified by the zero_one parameter. 154 * This is used to calculate the number of ranges in a node 155 * to see if it fits in a quad range node. 156 */ 157 static int 158 acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one) 159 { 160 int n, ranges, last_bit; 161 162 ranges = 0; 163 last_bit = zero_one ^ 1; 164 165 for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) { 166 if (bits->bits[n / (sizeof(bits_t) * 8)] & 167 (1U << (n % (sizeof(bits_t) * 8)))) { 168 if (zero_one == 1 && last_bit != 1) 169 ranges++; 170 last_bit = 1; 171 } else { 172 if (zero_one == 0 && last_bit != 0) 173 ranges++; 174 last_bit = 0; 175 } 176 } 177 for (n = 0; n < QRANGE_MIN; n++) { 178 if (bits->bits[n / (sizeof(bits_t) * 8)] & 179 (1U << (n % (sizeof(bits_t) * CHAR_BIT)))) { 180 if (zero_one == 1 && last_bit != 1) 181 ranges++; 182 last_bit = 1; 183 } else { 184 if (zero_one == 0 && last_bit != 0) 185 ranges++; 186 last_bit = 0; 187 } 188 } 189 190 return ranges; 191 } 192 193 /* 194 * Count number of ranges spanned by the node's pointers 195 */ 196 static int 197 acl_count_fanout(struct rte_acl_node *node) 198 { 199 uint32_t n; 200 int ranges; 201 202 if (node->fanout != 0) 203 return node->fanout; 204 205 ranges = acl_count_sequential_groups(&node->values, 0); 206 207 for (n = 0; n < node->num_ptrs; n++) { 208 if (node->ptrs[n].ptr != NULL) 209 ranges += acl_count_sequential_groups( 210 &node->ptrs[n].values, 1); 211 } 212 213 node->fanout = ranges; 214 return node->fanout; 215 } 216 217 /* 218 * Determine the type of nodes and count each type 219 */ 220 static void 221 acl_count_trie_types(struct acl_node_counters *counts, 222 struct rte_acl_node *node, uint64_t no_match, int force_dfa) 223 { 224 uint32_t n; 225 int num_ptrs; 226 uint64_t dfa[RTE_ACL_DFA_SIZE]; 227 228 /* skip if this node has been counted */ 229 if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED) 230 return; 231 232 if (node->match_flag != 0 || node->num_ptrs == 0) { 233 counts->match++; 234 node->node_type = RTE_ACL_NODE_MATCH; 235 return; 236 } 237 238 num_ptrs = acl_count_fanout(node); 239 240 /* Force type to dfa */ 241 if (force_dfa) 242 num_ptrs = RTE_ACL_DFA_SIZE; 243 244 /* determine node type based on number of ranges */ 245 if (num_ptrs == 1) { 246 counts->single++; 247 node->node_type = RTE_ACL_NODE_SINGLE; 248 } else if (num_ptrs <= RTE_ACL_QUAD_MAX) { 249 counts->quad++; 250 counts->quad_vectors += node->fanout; 251 node->node_type = RTE_ACL_NODE_QRANGE; 252 } else { 253 counts->dfa++; 254 node->node_type = RTE_ACL_NODE_DFA; 255 if (force_dfa != 0) { 256 /* always expand to a max number of nodes. */ 257 for (n = 0; n != RTE_DIM(node->dfa_gr64); n++) 258 node->dfa_gr64[n] = n; 259 node->fanout = n; 260 } else { 261 acl_node_fill_dfa(node, dfa, no_match, 0); 262 node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64); 263 } 264 counts->dfa_gr64 += node->fanout; 265 } 266 267 /* 268 * recursively count the types of all children 269 */ 270 for (n = 0; n < node->num_ptrs; n++) { 271 if (node->ptrs[n].ptr != NULL) 272 acl_count_trie_types(counts, node->ptrs[n].ptr, 273 no_match, 0); 274 } 275 } 276 277 static void 278 acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match, 279 int resolved) 280 { 281 uint32_t x; 282 int32_t m; 283 uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE]; 284 285 acl_node_fill_dfa(node, dfa, no_match, resolved); 286 287 /* 288 * Rather than going from 0 to 256, the range count and 289 * the layout are from 80-ff then 0-7f due to signed compare 290 * for SSE (cmpgt). 291 */ 292 if (node->node_type == RTE_ACL_NODE_QRANGE) { 293 294 m = 0; 295 node_a = node_array; 296 index = dfa[QRANGE_MIN]; 297 *node_a++ = index; 298 299 for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) { 300 if (dfa[x] != index) { 301 index = dfa[x]; 302 *node_a++ = index; 303 node->transitions[m++] = (uint8_t)(x - 1); 304 } 305 } 306 307 for (x = 0; x < INT8_MAX + 1; x++) { 308 if (dfa[x] != index) { 309 index = dfa[x]; 310 *node_a++ = index; 311 node->transitions[m++] = (uint8_t)(x - 1); 312 } 313 } 314 315 /* fill unused locations with max value - nothing is greater */ 316 for (; m < RTE_ACL_QUAD_SIZE; m++) 317 node->transitions[m] = INT8_MAX; 318 319 RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE); 320 321 } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) { 322 acl_dfa_fill_gr64(node, dfa, node_array); 323 } 324 } 325 326 /* 327 * Routine that allocates space for this node and recursively calls 328 * to allocate space for each child. Once all the children are allocated, 329 * then resolve all transitions for this node. 330 */ 331 static void 332 acl_gen_node(struct rte_acl_node *node, uint64_t *node_array, 333 uint64_t no_match, struct rte_acl_indices *index, int num_categories) 334 { 335 uint32_t n, sz, *qtrp; 336 uint64_t *array_ptr; 337 struct rte_acl_match_results *match; 338 339 if (node->node_index != RTE_ACL_NODE_UNDEFINED) 340 return; 341 342 array_ptr = NULL; 343 344 switch (node->node_type) { 345 case RTE_ACL_NODE_DFA: 346 array_ptr = &node_array[index->dfa_index]; 347 node->node_index = acl_dfa_gen_idx(node, index->dfa_index); 348 sz = node->fanout * RTE_ACL_DFA_GR64_SIZE; 349 index->dfa_index += sz; 350 for (n = 0; n < sz; n++) 351 array_ptr[n] = no_match; 352 break; 353 case RTE_ACL_NODE_SINGLE: 354 node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index | 355 node->node_type; 356 array_ptr = &node_array[index->single_index]; 357 index->single_index += 1; 358 array_ptr[0] = no_match; 359 break; 360 case RTE_ACL_NODE_QRANGE: 361 array_ptr = &node_array[index->quad_index]; 362 acl_add_ptrs(node, array_ptr, no_match, 0); 363 qtrp = (uint32_t *)node->transitions; 364 node->node_index = qtrp[0]; 365 node->node_index <<= sizeof(index->quad_index) * CHAR_BIT; 366 node->node_index |= index->quad_index | node->node_type; 367 index->quad_index += node->fanout; 368 break; 369 case RTE_ACL_NODE_MATCH: 370 match = ((struct rte_acl_match_results *) 371 (node_array + index->match_start)); 372 for (n = 0; n != RTE_DIM(match->results); n++) 373 RTE_ACL_VERIFY(match->results[0] == 0); 374 memcpy(match + index->match_index, node->mrt, 375 sizeof(*node->mrt)); 376 node->node_index = index->match_index | node->node_type; 377 index->match_index += 1; 378 break; 379 case RTE_ACL_NODE_UNDEFINED: 380 RTE_ACL_VERIFY(node->node_type != 381 (uint32_t)RTE_ACL_NODE_UNDEFINED); 382 break; 383 } 384 385 /* recursively allocate space for all children */ 386 for (n = 0; n < node->num_ptrs; n++) { 387 if (node->ptrs[n].ptr != NULL) 388 acl_gen_node(node->ptrs[n].ptr, 389 node_array, 390 no_match, 391 index, 392 num_categories); 393 } 394 395 /* All children are resolved, resolve this node's pointers */ 396 switch (node->node_type) { 397 case RTE_ACL_NODE_DFA: 398 acl_add_ptrs(node, array_ptr, no_match, 1); 399 break; 400 case RTE_ACL_NODE_SINGLE: 401 for (n = 0; n < node->num_ptrs; n++) { 402 if (node->ptrs[n].ptr != NULL) 403 array_ptr[0] = node->ptrs[n].ptr->node_index; 404 } 405 break; 406 case RTE_ACL_NODE_QRANGE: 407 acl_add_ptrs(node, array_ptr, no_match, 1); 408 break; 409 case RTE_ACL_NODE_MATCH: 410 break; 411 case RTE_ACL_NODE_UNDEFINED: 412 RTE_ACL_VERIFY(node->node_type != 413 (uint32_t)RTE_ACL_NODE_UNDEFINED); 414 break; 415 } 416 } 417 418 static void 419 acl_calc_counts_indices(struct acl_node_counters *counts, 420 struct rte_acl_indices *indices, 421 struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, 422 uint64_t no_match) 423 { 424 uint32_t n; 425 426 memset(indices, 0, sizeof(*indices)); 427 memset(counts, 0, sizeof(*counts)); 428 429 /* Get stats on nodes */ 430 for (n = 0; n < num_tries; n++) { 431 acl_count_trie_types(counts, node_bld_trie[n].trie, 432 no_match, 1); 433 } 434 435 indices->dfa_index = RTE_ACL_DFA_SIZE + 1; 436 indices->quad_index = indices->dfa_index + 437 counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE; 438 indices->single_index = indices->quad_index + counts->quad_vectors; 439 indices->match_start = indices->single_index + counts->single + 1; 440 indices->match_start = RTE_ALIGN(indices->match_start, 441 (XMM_SIZE / sizeof(uint64_t))); 442 indices->match_index = 1; 443 } 444 445 /* 446 * Generate the runtime structure using build structure 447 */ 448 int 449 rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, 450 struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, 451 uint32_t num_categories, uint32_t data_index_sz, size_t max_size) 452 { 453 void *mem; 454 size_t total_size; 455 uint64_t *node_array, no_match; 456 uint32_t n, match_index; 457 struct rte_acl_match_results *match; 458 struct acl_node_counters counts; 459 struct rte_acl_indices indices; 460 461 no_match = RTE_ACL_NODE_MATCH; 462 463 /* Fill counts and indices arrays from the nodes. */ 464 acl_calc_counts_indices(&counts, &indices, 465 node_bld_trie, num_tries, no_match); 466 467 /* Allocate runtime memory (align to cache boundary) */ 468 total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) + 469 indices.match_start * sizeof(uint64_t) + 470 (counts.match + 1) * sizeof(struct rte_acl_match_results) + 471 XMM_SIZE; 472 473 if (total_size > max_size) { 474 ACL_LOG(DEBUG, 475 "Gen phase for ACL ctx \"%s\" exceeds max_size limit, " 476 "bytes required: %zu, allowed: %zu", 477 ctx->name, total_size, max_size); 478 return -ERANGE; 479 } 480 481 mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE, 482 ctx->socket_id); 483 if (mem == NULL) { 484 ACL_LOG(ERR, 485 "allocation of %zu bytes on socket %d for %s failed", 486 total_size, ctx->socket_id, ctx->name); 487 return -ENOMEM; 488 } 489 490 /* Fill the runtime structure */ 491 match_index = indices.match_start; 492 node_array = (uint64_t *)((uintptr_t)mem + 493 RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE)); 494 495 /* 496 * Setup the NOMATCH node (a SINGLE at the 497 * highest index, that points to itself) 498 */ 499 500 node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_IDLE_NODE; 501 502 for (n = 0; n < RTE_ACL_DFA_SIZE; n++) 503 node_array[n] = no_match; 504 505 /* NOMATCH result at index 0 */ 506 match = ((struct rte_acl_match_results *)(node_array + match_index)); 507 memset(match, 0, sizeof(*match)); 508 509 for (n = 0; n < num_tries; n++) { 510 511 acl_gen_node(node_bld_trie[n].trie, node_array, no_match, 512 &indices, num_categories); 513 514 if (node_bld_trie[n].trie->node_index == no_match) 515 trie[n].root_index = 0; 516 else 517 trie[n].root_index = node_bld_trie[n].trie->node_index; 518 } 519 520 ctx->mem = mem; 521 ctx->mem_sz = total_size; 522 ctx->data_indexes = mem; 523 ctx->num_tries = num_tries; 524 ctx->num_categories = num_categories; 525 ctx->match_index = match_index; 526 ctx->no_match = no_match; 527 ctx->idle = node_array[RTE_ACL_DFA_SIZE]; 528 ctx->trans_table = node_array; 529 memcpy(ctx->trie, trie, sizeof(ctx->trie)); 530 531 acl_gen_log_stats(ctx, &counts, &indices, max_size); 532 return 0; 533 } 534