1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <rte_acl.h> 6 #include "acl.h" 7 8 #define QRANGE_MIN ((uint8_t)INT8_MIN) 9 10 #define RTE_ACL_VERIFY(exp) do { \ 11 if (!(exp)) \ 12 rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ 13 } while (0) 14 15 struct acl_node_counters { 16 int32_t match; 17 int32_t match_used; 18 int32_t single; 19 int32_t quad; 20 int32_t quad_vectors; 21 int32_t dfa; 22 int32_t dfa_gr64; 23 }; 24 25 struct rte_acl_indices { 26 int32_t dfa_index; 27 int32_t quad_index; 28 int32_t single_index; 29 int32_t match_index; 30 int32_t match_start; 31 }; 32 33 static void 34 acl_gen_log_stats(const struct rte_acl_ctx *ctx, 35 const struct acl_node_counters *counts, 36 const struct rte_acl_indices *indices, 37 size_t max_size) 38 { 39 RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n" 40 "runtime memory footprint on socket %d:\n" 41 "single nodes/bytes used: %d/%zu\n" 42 "quad nodes/vectors/bytes used: %d/%d/%zu\n" 43 "DFA nodes/group64/bytes used: %d/%d/%zu\n" 44 "match nodes/bytes used: %d/%zu\n" 45 "total: %zu bytes\n" 46 "max limit: %zu bytes\n", 47 ctx->name, ctx->socket_id, 48 counts->single, counts->single * sizeof(uint64_t), 49 counts->quad, counts->quad_vectors, 50 (indices->quad_index - indices->dfa_index) * sizeof(uint64_t), 51 counts->dfa, counts->dfa_gr64, 52 indices->dfa_index * sizeof(uint64_t), 53 counts->match, 54 counts->match * sizeof(struct rte_acl_match_results), 55 ctx->mem_sz, 56 max_size); 57 } 58 59 static uint64_t 60 acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index) 61 { 62 uint64_t idx; 63 uint32_t i; 64 65 idx = 0; 66 for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { 67 RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM); 68 RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout); 69 idx |= (i - node->dfa_gr64[i]) << 70 (6 + RTE_ACL_DFA_GR64_BIT * i); 71 } 72 73 return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type; 74 } 75 76 static void 77 acl_dfa_fill_gr64(const struct rte_acl_node *node, 78 const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE]) 79 { 80 uint32_t i; 81 82 for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { 83 memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE, 84 src + i * RTE_ACL_DFA_GR64_SIZE, 85 RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0])); 86 } 87 } 88 89 static uint32_t 90 acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE], 91 uint8_t gr64[RTE_ACL_DFA_GR64_NUM]) 92 { 93 uint32_t i, j, k; 94 95 k = 0; 96 for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) { 97 gr64[i] = i; 98 for (j = 0; j != i; j++) { 99 if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE, 100 array_ptr + j * RTE_ACL_DFA_GR64_SIZE, 101 RTE_ACL_DFA_GR64_SIZE * 102 sizeof(array_ptr[0])) == 0) 103 break; 104 } 105 gr64[i] = (j != i) ? gr64[j] : k++; 106 } 107 108 return k; 109 } 110 111 static uint32_t 112 acl_node_fill_dfa(const struct rte_acl_node *node, 113 uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved) 114 { 115 uint32_t n, x; 116 uint32_t ranges, last_bit; 117 struct rte_acl_node *child; 118 struct rte_acl_bitset *bits; 119 120 ranges = 0; 121 last_bit = 0; 122 123 for (n = 0; n < RTE_ACL_DFA_SIZE; n++) 124 dfa[n] = no_match; 125 126 for (x = 0; x < node->num_ptrs; x++) { 127 128 child = node->ptrs[x].ptr; 129 if (child == NULL) 130 continue; 131 132 bits = &node->ptrs[x].values; 133 for (n = 0; n < RTE_ACL_DFA_SIZE; n++) { 134 135 if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] & 136 (1U << (n % (sizeof(bits_t) * CHAR_BIT)))) { 137 138 dfa[n] = resolved ? child->node_index : x; 139 ranges += (last_bit == 0); 140 last_bit = 1; 141 } else { 142 last_bit = 0; 143 } 144 } 145 } 146 147 return ranges; 148 } 149 150 /* 151 * Counts the number of groups of sequential bits that are 152 * either 0 or 1, as specified by the zero_one parameter. This is used to 153 * calculate the number of ranges in a node to see if it fits in a quad range 154 * node. 155 */ 156 static int 157 acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one) 158 { 159 int n, ranges, last_bit; 160 161 ranges = 0; 162 last_bit = zero_one ^ 1; 163 164 for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) { 165 if (bits->bits[n / (sizeof(bits_t) * 8)] & 166 (1U << (n % (sizeof(bits_t) * 8)))) { 167 if (zero_one == 1 && last_bit != 1) 168 ranges++; 169 last_bit = 1; 170 } else { 171 if (zero_one == 0 && last_bit != 0) 172 ranges++; 173 last_bit = 0; 174 } 175 } 176 for (n = 0; n < QRANGE_MIN; n++) { 177 if (bits->bits[n / (sizeof(bits_t) * 8)] & 178 (1U << (n % (sizeof(bits_t) * CHAR_BIT)))) { 179 if (zero_one == 1 && last_bit != 1) 180 ranges++; 181 last_bit = 1; 182 } else { 183 if (zero_one == 0 && last_bit != 0) 184 ranges++; 185 last_bit = 0; 186 } 187 } 188 189 return ranges; 190 } 191 192 /* 193 * Count number of ranges spanned by the node's pointers 194 */ 195 static int 196 acl_count_fanout(struct rte_acl_node *node) 197 { 198 uint32_t n; 199 int ranges; 200 201 if (node->fanout != 0) 202 return node->fanout; 203 204 ranges = acl_count_sequential_groups(&node->values, 0); 205 206 for (n = 0; n < node->num_ptrs; n++) { 207 if (node->ptrs[n].ptr != NULL) 208 ranges += acl_count_sequential_groups( 209 &node->ptrs[n].values, 1); 210 } 211 212 node->fanout = ranges; 213 return node->fanout; 214 } 215 216 /* 217 * Determine the type of nodes and count each type 218 */ 219 static void 220 acl_count_trie_types(struct acl_node_counters *counts, 221 struct rte_acl_node *node, uint64_t no_match, int force_dfa) 222 { 223 uint32_t n; 224 int num_ptrs; 225 uint64_t dfa[RTE_ACL_DFA_SIZE]; 226 227 /* skip if this node has been counted */ 228 if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED) 229 return; 230 231 if (node->match_flag != 0 || node->num_ptrs == 0) { 232 counts->match++; 233 node->node_type = RTE_ACL_NODE_MATCH; 234 return; 235 } 236 237 num_ptrs = acl_count_fanout(node); 238 239 /* Force type to dfa */ 240 if (force_dfa) 241 num_ptrs = RTE_ACL_DFA_SIZE; 242 243 /* determine node type based on number of ranges */ 244 if (num_ptrs == 1) { 245 counts->single++; 246 node->node_type = RTE_ACL_NODE_SINGLE; 247 } else if (num_ptrs <= RTE_ACL_QUAD_MAX) { 248 counts->quad++; 249 counts->quad_vectors += node->fanout; 250 node->node_type = RTE_ACL_NODE_QRANGE; 251 } else { 252 counts->dfa++; 253 node->node_type = RTE_ACL_NODE_DFA; 254 if (force_dfa != 0) { 255 /* always expand to a max number of nodes. */ 256 for (n = 0; n != RTE_DIM(node->dfa_gr64); n++) 257 node->dfa_gr64[n] = n; 258 node->fanout = n; 259 } else { 260 acl_node_fill_dfa(node, dfa, no_match, 0); 261 node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64); 262 } 263 counts->dfa_gr64 += node->fanout; 264 } 265 266 /* 267 * recursively count the types of all children 268 */ 269 for (n = 0; n < node->num_ptrs; n++) { 270 if (node->ptrs[n].ptr != NULL) 271 acl_count_trie_types(counts, node->ptrs[n].ptr, 272 no_match, 0); 273 } 274 } 275 276 static void 277 acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match, 278 int resolved) 279 { 280 uint32_t x; 281 int32_t m; 282 uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE]; 283 284 acl_node_fill_dfa(node, dfa, no_match, resolved); 285 286 /* 287 * Rather than going from 0 to 256, the range count and 288 * the layout are from 80-ff then 0-7f due to signed compare 289 * for SSE (cmpgt). 290 */ 291 if (node->node_type == RTE_ACL_NODE_QRANGE) { 292 293 m = 0; 294 node_a = node_array; 295 index = dfa[QRANGE_MIN]; 296 *node_a++ = index; 297 298 for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) { 299 if (dfa[x] != index) { 300 index = dfa[x]; 301 *node_a++ = index; 302 node->transitions[m++] = (uint8_t)(x - 1); 303 } 304 } 305 306 for (x = 0; x < INT8_MAX + 1; x++) { 307 if (dfa[x] != index) { 308 index = dfa[x]; 309 *node_a++ = index; 310 node->transitions[m++] = (uint8_t)(x - 1); 311 } 312 } 313 314 /* fill unused locations with max value - nothing is greater */ 315 for (; m < RTE_ACL_QUAD_SIZE; m++) 316 node->transitions[m] = INT8_MAX; 317 318 RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE); 319 320 } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) { 321 acl_dfa_fill_gr64(node, dfa, node_array); 322 } 323 } 324 325 /* 326 * Routine that allocates space for this node and recursively calls 327 * to allocate space for each child. Once all the children are allocated, 328 * then resolve all transitions for this node. 329 */ 330 static void 331 acl_gen_node(struct rte_acl_node *node, uint64_t *node_array, 332 uint64_t no_match, struct rte_acl_indices *index, int num_categories) 333 { 334 uint32_t n, sz, *qtrp; 335 uint64_t *array_ptr; 336 struct rte_acl_match_results *match; 337 338 if (node->node_index != RTE_ACL_NODE_UNDEFINED) 339 return; 340 341 array_ptr = NULL; 342 343 switch (node->node_type) { 344 case RTE_ACL_NODE_DFA: 345 array_ptr = &node_array[index->dfa_index]; 346 node->node_index = acl_dfa_gen_idx(node, index->dfa_index); 347 sz = node->fanout * RTE_ACL_DFA_GR64_SIZE; 348 index->dfa_index += sz; 349 for (n = 0; n < sz; n++) 350 array_ptr[n] = no_match; 351 break; 352 case RTE_ACL_NODE_SINGLE: 353 node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index | 354 node->node_type; 355 array_ptr = &node_array[index->single_index]; 356 index->single_index += 1; 357 array_ptr[0] = no_match; 358 break; 359 case RTE_ACL_NODE_QRANGE: 360 array_ptr = &node_array[index->quad_index]; 361 acl_add_ptrs(node, array_ptr, no_match, 0); 362 qtrp = (uint32_t *)node->transitions; 363 node->node_index = qtrp[0]; 364 node->node_index <<= sizeof(index->quad_index) * CHAR_BIT; 365 node->node_index |= index->quad_index | node->node_type; 366 index->quad_index += node->fanout; 367 break; 368 case RTE_ACL_NODE_MATCH: 369 match = ((struct rte_acl_match_results *) 370 (node_array + index->match_start)); 371 for (n = 0; n != RTE_DIM(match->results); n++) 372 RTE_ACL_VERIFY(match->results[0] == 0); 373 memcpy(match + index->match_index, node->mrt, 374 sizeof(*node->mrt)); 375 node->node_index = index->match_index | node->node_type; 376 index->match_index += 1; 377 break; 378 case RTE_ACL_NODE_UNDEFINED: 379 RTE_ACL_VERIFY(node->node_type != 380 (uint32_t)RTE_ACL_NODE_UNDEFINED); 381 break; 382 } 383 384 /* recursively allocate space for all children */ 385 for (n = 0; n < node->num_ptrs; n++) { 386 if (node->ptrs[n].ptr != NULL) 387 acl_gen_node(node->ptrs[n].ptr, 388 node_array, 389 no_match, 390 index, 391 num_categories); 392 } 393 394 /* All children are resolved, resolve this node's pointers */ 395 switch (node->node_type) { 396 case RTE_ACL_NODE_DFA: 397 acl_add_ptrs(node, array_ptr, no_match, 1); 398 break; 399 case RTE_ACL_NODE_SINGLE: 400 for (n = 0; n < node->num_ptrs; n++) { 401 if (node->ptrs[n].ptr != NULL) 402 array_ptr[0] = node->ptrs[n].ptr->node_index; 403 } 404 break; 405 case RTE_ACL_NODE_QRANGE: 406 acl_add_ptrs(node, array_ptr, no_match, 1); 407 break; 408 case RTE_ACL_NODE_MATCH: 409 break; 410 case RTE_ACL_NODE_UNDEFINED: 411 RTE_ACL_VERIFY(node->node_type != 412 (uint32_t)RTE_ACL_NODE_UNDEFINED); 413 break; 414 } 415 } 416 417 static void 418 acl_calc_counts_indices(struct acl_node_counters *counts, 419 struct rte_acl_indices *indices, 420 struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, 421 uint64_t no_match) 422 { 423 uint32_t n; 424 425 memset(indices, 0, sizeof(*indices)); 426 memset(counts, 0, sizeof(*counts)); 427 428 /* Get stats on nodes */ 429 for (n = 0; n < num_tries; n++) { 430 acl_count_trie_types(counts, node_bld_trie[n].trie, 431 no_match, 1); 432 } 433 434 indices->dfa_index = RTE_ACL_DFA_SIZE + 1; 435 indices->quad_index = indices->dfa_index + 436 counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE; 437 indices->single_index = indices->quad_index + counts->quad_vectors; 438 indices->match_start = indices->single_index + counts->single + 1; 439 indices->match_start = RTE_ALIGN(indices->match_start, 440 (XMM_SIZE / sizeof(uint64_t))); 441 indices->match_index = 1; 442 } 443 444 /* 445 * Generate the runtime structure using build structure 446 */ 447 int 448 rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, 449 struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, 450 uint32_t num_categories, uint32_t data_index_sz, size_t max_size) 451 { 452 void *mem; 453 size_t total_size; 454 uint64_t *node_array, no_match; 455 uint32_t n, match_index; 456 struct rte_acl_match_results *match; 457 struct acl_node_counters counts; 458 struct rte_acl_indices indices; 459 460 no_match = RTE_ACL_NODE_MATCH; 461 462 /* Fill counts and indices arrays from the nodes. */ 463 acl_calc_counts_indices(&counts, &indices, 464 node_bld_trie, num_tries, no_match); 465 466 /* Allocate runtime memory (align to cache boundary) */ 467 total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) + 468 indices.match_start * sizeof(uint64_t) + 469 (counts.match + 1) * sizeof(struct rte_acl_match_results) + 470 XMM_SIZE; 471 472 if (total_size > max_size) { 473 RTE_LOG(DEBUG, ACL, 474 "Gen phase for ACL ctx \"%s\" exceeds max_size limit, " 475 "bytes required: %zu, allowed: %zu\n", 476 ctx->name, total_size, max_size); 477 return -ERANGE; 478 } 479 480 mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE, 481 ctx->socket_id); 482 if (mem == NULL) { 483 RTE_LOG(ERR, ACL, 484 "allocation of %zu bytes on socket %d for %s failed\n", 485 total_size, ctx->socket_id, ctx->name); 486 return -ENOMEM; 487 } 488 489 /* Fill the runtime structure */ 490 match_index = indices.match_start; 491 node_array = (uint64_t *)((uintptr_t)mem + 492 RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE)); 493 494 /* 495 * Setup the NOMATCH node (a SINGLE at the 496 * highest index, that points to itself) 497 */ 498 499 node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_IDLE_NODE; 500 501 for (n = 0; n < RTE_ACL_DFA_SIZE; n++) 502 node_array[n] = no_match; 503 504 /* NOMATCH result at index 0 */ 505 match = ((struct rte_acl_match_results *)(node_array + match_index)); 506 memset(match, 0, sizeof(*match)); 507 508 for (n = 0; n < num_tries; n++) { 509 510 acl_gen_node(node_bld_trie[n].trie, node_array, no_match, 511 &indices, num_categories); 512 513 if (node_bld_trie[n].trie->node_index == no_match) 514 trie[n].root_index = 0; 515 else 516 trie[n].root_index = node_bld_trie[n].trie->node_index; 517 } 518 519 ctx->mem = mem; 520 ctx->mem_sz = total_size; 521 ctx->data_indexes = mem; 522 ctx->num_tries = num_tries; 523 ctx->num_categories = num_categories; 524 ctx->match_index = match_index; 525 ctx->no_match = no_match; 526 ctx->idle = node_array[RTE_ACL_DFA_SIZE]; 527 ctx->trans_table = node_array; 528 memcpy(ctx->trie, trie, sizeof(ctx->trie)); 529 530 acl_gen_log_stats(ctx, &counts, &indices, max_size); 531 return 0; 532 } 533