199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson * Copyright(c) 2010-2014 Intel Corporation
399a2dd95SBruce Richardson */
499a2dd95SBruce Richardson
599a2dd95SBruce Richardson #ifndef _ACL_RUN_H_
699a2dd95SBruce Richardson #define _ACL_RUN_H_
799a2dd95SBruce Richardson
899a2dd95SBruce Richardson #include <rte_acl.h>
999a2dd95SBruce Richardson #include "acl.h"
1099a2dd95SBruce Richardson
1199a2dd95SBruce Richardson #define MAX_SEARCHES_AVX16 16
1299a2dd95SBruce Richardson #define MAX_SEARCHES_SSE8 8
1399a2dd95SBruce Richardson #define MAX_SEARCHES_ALTIVEC8 8
1499a2dd95SBruce Richardson #define MAX_SEARCHES_SSE4 4
1599a2dd95SBruce Richardson #define MAX_SEARCHES_ALTIVEC4 4
1699a2dd95SBruce Richardson #define MAX_SEARCHES_SCALAR 2
1799a2dd95SBruce Richardson
1899a2dd95SBruce Richardson #define GET_NEXT_4BYTES(prm, idx) \
1999a2dd95SBruce Richardson (*((const int32_t *)((prm)[(idx)].data + *(prm)[idx].data_index++)))
2099a2dd95SBruce Richardson
2199a2dd95SBruce Richardson
2299a2dd95SBruce Richardson #define RTE_ACL_NODE_INDEX ((uint32_t)~RTE_ACL_NODE_TYPE)
2399a2dd95SBruce Richardson
2499a2dd95SBruce Richardson #define SCALAR_QRANGE_MULT 0x01010101
2599a2dd95SBruce Richardson #define SCALAR_QRANGE_MASK 0x7f7f7f7f
2699a2dd95SBruce Richardson #define SCALAR_QRANGE_MIN 0x80808080
2799a2dd95SBruce Richardson
2899a2dd95SBruce Richardson /*
2999a2dd95SBruce Richardson * Structure to manage N parallel trie traversals.
3099a2dd95SBruce Richardson * The runtime trie traversal routines can process 8, 4, or 2 tries
3199a2dd95SBruce Richardson * in parallel. Each packet may require multiple trie traversals (up to 4).
3299a2dd95SBruce Richardson * This structure is used to fill the slots (0 to n-1) for parallel processing
3399a2dd95SBruce Richardson * with the trie traversals needed for each packet.
3499a2dd95SBruce Richardson */
3599a2dd95SBruce Richardson struct acl_flow_data {
3699a2dd95SBruce Richardson uint32_t num_packets;
3799a2dd95SBruce Richardson /* number of packets processed */
3899a2dd95SBruce Richardson uint32_t started;
3999a2dd95SBruce Richardson /* number of trie traversals in progress */
4099a2dd95SBruce Richardson uint32_t trie;
4199a2dd95SBruce Richardson /* current trie index (0 to N-1) */
4299a2dd95SBruce Richardson uint32_t cmplt_size;
4399a2dd95SBruce Richardson /* maximum number of packets to process */
4499a2dd95SBruce Richardson uint32_t total_packets;
4599a2dd95SBruce Richardson /* number of result categories per packet. */
4699a2dd95SBruce Richardson uint32_t categories;
4799a2dd95SBruce Richardson const uint64_t *trans;
4899a2dd95SBruce Richardson const uint8_t **data;
4999a2dd95SBruce Richardson uint32_t *results;
5099a2dd95SBruce Richardson struct completion *last_cmplt;
5199a2dd95SBruce Richardson struct completion *cmplt_array;
5299a2dd95SBruce Richardson };
5399a2dd95SBruce Richardson
5499a2dd95SBruce Richardson /*
5599a2dd95SBruce Richardson * Structure to maintain running results for
5699a2dd95SBruce Richardson * a single packet (up to 4 tries).
5799a2dd95SBruce Richardson */
__rte_aligned(XMM_SIZE)58*c6552d9aSTyler Retzlaff struct __rte_aligned(XMM_SIZE) completion {
5999a2dd95SBruce Richardson uint32_t *results; /* running results. */
6099a2dd95SBruce Richardson int32_t priority[RTE_ACL_MAX_CATEGORIES]; /* running priorities. */
6199a2dd95SBruce Richardson uint32_t count; /* num of remaining tries */
6299a2dd95SBruce Richardson /* true for allocated struct */
63*c6552d9aSTyler Retzlaff };
6499a2dd95SBruce Richardson
6599a2dd95SBruce Richardson /*
6699a2dd95SBruce Richardson * One parms structure for each slot in the search engine.
6799a2dd95SBruce Richardson */
6899a2dd95SBruce Richardson struct parms {
6999a2dd95SBruce Richardson const uint8_t *data;
7099a2dd95SBruce Richardson /* input data for this packet */
7199a2dd95SBruce Richardson const uint32_t *data_index;
7299a2dd95SBruce Richardson /* data indirection for this trie */
7399a2dd95SBruce Richardson struct completion *cmplt;
7499a2dd95SBruce Richardson /* completion data for this packet */
7599a2dd95SBruce Richardson };
7699a2dd95SBruce Richardson
7799a2dd95SBruce Richardson /*
7899a2dd95SBruce Richardson * Define an global idle node for unused engine slots
7999a2dd95SBruce Richardson */
8099a2dd95SBruce Richardson static const uint32_t idle[UINT8_MAX + 1];
8199a2dd95SBruce Richardson
8299a2dd95SBruce Richardson /*
8399a2dd95SBruce Richardson * Allocate a completion structure to manage the tries for a packet.
8499a2dd95SBruce Richardson */
8599a2dd95SBruce Richardson static inline struct completion *
alloc_completion(struct completion * p,uint32_t size,uint32_t tries,uint32_t * results)8699a2dd95SBruce Richardson alloc_completion(struct completion *p, uint32_t size, uint32_t tries,
8799a2dd95SBruce Richardson uint32_t *results)
8899a2dd95SBruce Richardson {
8999a2dd95SBruce Richardson uint32_t n;
9099a2dd95SBruce Richardson
9199a2dd95SBruce Richardson for (n = 0; n < size; n++) {
9299a2dd95SBruce Richardson
9399a2dd95SBruce Richardson if (p[n].count == 0) {
9499a2dd95SBruce Richardson
9599a2dd95SBruce Richardson /* mark as allocated and set number of tries. */
9699a2dd95SBruce Richardson p[n].count = tries;
9799a2dd95SBruce Richardson p[n].results = results;
9899a2dd95SBruce Richardson return &(p[n]);
9999a2dd95SBruce Richardson }
10099a2dd95SBruce Richardson }
10199a2dd95SBruce Richardson
10299a2dd95SBruce Richardson /* should never get here */
10399a2dd95SBruce Richardson return NULL;
10499a2dd95SBruce Richardson }
10599a2dd95SBruce Richardson
10699a2dd95SBruce Richardson /*
10799a2dd95SBruce Richardson * Resolve priority for a single result trie.
10899a2dd95SBruce Richardson */
10999a2dd95SBruce Richardson static inline void
resolve_single_priority(uint64_t transition,int n,const struct rte_acl_ctx * ctx,struct parms * parms,const struct rte_acl_match_results * p)11099a2dd95SBruce Richardson resolve_single_priority(uint64_t transition, int n,
11199a2dd95SBruce Richardson const struct rte_acl_ctx *ctx, struct parms *parms,
11299a2dd95SBruce Richardson const struct rte_acl_match_results *p)
11399a2dd95SBruce Richardson {
11499a2dd95SBruce Richardson if (parms[n].cmplt->count == ctx->num_tries ||
11599a2dd95SBruce Richardson parms[n].cmplt->priority[0] <=
11699a2dd95SBruce Richardson p[transition].priority[0]) {
11799a2dd95SBruce Richardson
11899a2dd95SBruce Richardson parms[n].cmplt->priority[0] = p[transition].priority[0];
11999a2dd95SBruce Richardson parms[n].cmplt->results[0] = p[transition].results[0];
12099a2dd95SBruce Richardson }
12199a2dd95SBruce Richardson }
12299a2dd95SBruce Richardson
12399a2dd95SBruce Richardson /*
12499a2dd95SBruce Richardson * Routine to fill a slot in the parallel trie traversal array (parms) from
12599a2dd95SBruce Richardson * the list of packets (flows).
12699a2dd95SBruce Richardson */
12799a2dd95SBruce Richardson static inline uint64_t
acl_start_next_trie(struct acl_flow_data * flows,struct parms * parms,int n,const struct rte_acl_ctx * ctx)12899a2dd95SBruce Richardson acl_start_next_trie(struct acl_flow_data *flows, struct parms *parms, int n,
12999a2dd95SBruce Richardson const struct rte_acl_ctx *ctx)
13099a2dd95SBruce Richardson {
13199a2dd95SBruce Richardson uint64_t transition;
13299a2dd95SBruce Richardson
13399a2dd95SBruce Richardson /* if there are any more packets to process */
13499a2dd95SBruce Richardson if (flows->num_packets < flows->total_packets) {
13599a2dd95SBruce Richardson parms[n].data = flows->data[flows->num_packets];
13699a2dd95SBruce Richardson parms[n].data_index = ctx->trie[flows->trie].data_index;
13799a2dd95SBruce Richardson
13899a2dd95SBruce Richardson /* if this is the first trie for this packet */
13999a2dd95SBruce Richardson if (flows->trie == 0) {
14099a2dd95SBruce Richardson flows->last_cmplt = alloc_completion(flows->cmplt_array,
14199a2dd95SBruce Richardson flows->cmplt_size, ctx->num_tries,
14299a2dd95SBruce Richardson flows->results +
14399a2dd95SBruce Richardson flows->num_packets * flows->categories);
14499a2dd95SBruce Richardson }
14599a2dd95SBruce Richardson
14699a2dd95SBruce Richardson /* set completion parameters and starting index for this slot */
14799a2dd95SBruce Richardson parms[n].cmplt = flows->last_cmplt;
14899a2dd95SBruce Richardson transition =
14999a2dd95SBruce Richardson flows->trans[parms[n].data[*parms[n].data_index++] +
15099a2dd95SBruce Richardson ctx->trie[flows->trie].root_index];
15199a2dd95SBruce Richardson
15299a2dd95SBruce Richardson /*
15399a2dd95SBruce Richardson * if this is the last trie for this packet,
15499a2dd95SBruce Richardson * then setup next packet.
15599a2dd95SBruce Richardson */
15699a2dd95SBruce Richardson flows->trie++;
15799a2dd95SBruce Richardson if (flows->trie >= ctx->num_tries) {
15899a2dd95SBruce Richardson flows->trie = 0;
15999a2dd95SBruce Richardson flows->num_packets++;
16099a2dd95SBruce Richardson }
16199a2dd95SBruce Richardson
16299a2dd95SBruce Richardson /* keep track of number of active trie traversals */
16399a2dd95SBruce Richardson flows->started++;
16499a2dd95SBruce Richardson
16599a2dd95SBruce Richardson /* no more tries to process, set slot to an idle position */
16699a2dd95SBruce Richardson } else {
16799a2dd95SBruce Richardson transition = ctx->idle;
16899a2dd95SBruce Richardson parms[n].data = (const uint8_t *)idle;
16999a2dd95SBruce Richardson parms[n].data_index = idle;
17099a2dd95SBruce Richardson }
17199a2dd95SBruce Richardson return transition;
17299a2dd95SBruce Richardson }
17399a2dd95SBruce Richardson
17499a2dd95SBruce Richardson static inline void
acl_set_flow(struct acl_flow_data * flows,struct completion * cmplt,uint32_t cmplt_size,const uint8_t ** data,uint32_t * results,uint32_t data_num,uint32_t categories,const uint64_t * trans)17599a2dd95SBruce Richardson acl_set_flow(struct acl_flow_data *flows, struct completion *cmplt,
17699a2dd95SBruce Richardson uint32_t cmplt_size, const uint8_t **data, uint32_t *results,
17799a2dd95SBruce Richardson uint32_t data_num, uint32_t categories, const uint64_t *trans)
17899a2dd95SBruce Richardson {
17999a2dd95SBruce Richardson flows->num_packets = 0;
18099a2dd95SBruce Richardson flows->started = 0;
18199a2dd95SBruce Richardson flows->trie = 0;
18299a2dd95SBruce Richardson flows->last_cmplt = NULL;
18399a2dd95SBruce Richardson flows->cmplt_array = cmplt;
18499a2dd95SBruce Richardson flows->total_packets = data_num;
18599a2dd95SBruce Richardson flows->categories = categories;
18699a2dd95SBruce Richardson flows->cmplt_size = cmplt_size;
18799a2dd95SBruce Richardson flows->data = data;
18899a2dd95SBruce Richardson flows->results = results;
18999a2dd95SBruce Richardson flows->trans = trans;
19099a2dd95SBruce Richardson }
19199a2dd95SBruce Richardson
19299a2dd95SBruce Richardson typedef void (*resolve_priority_t)
19399a2dd95SBruce Richardson (uint64_t transition, int n, const struct rte_acl_ctx *ctx,
19499a2dd95SBruce Richardson struct parms *parms, const struct rte_acl_match_results *p,
19599a2dd95SBruce Richardson uint32_t categories);
19699a2dd95SBruce Richardson
19799a2dd95SBruce Richardson /*
19899a2dd95SBruce Richardson * Detect matches. If a match node transition is found, then this trie
19999a2dd95SBruce Richardson * traversal is complete and fill the slot with the next trie
20099a2dd95SBruce Richardson * to be processed.
20199a2dd95SBruce Richardson */
20299a2dd95SBruce Richardson static inline uint64_t
acl_match_check(uint64_t transition,int slot,const struct rte_acl_ctx * ctx,struct parms * parms,struct acl_flow_data * flows,resolve_priority_t resolve_priority)20399a2dd95SBruce Richardson acl_match_check(uint64_t transition, int slot,
20499a2dd95SBruce Richardson const struct rte_acl_ctx *ctx, struct parms *parms,
20599a2dd95SBruce Richardson struct acl_flow_data *flows, resolve_priority_t resolve_priority)
20699a2dd95SBruce Richardson {
20799a2dd95SBruce Richardson const struct rte_acl_match_results *p;
20899a2dd95SBruce Richardson
20999a2dd95SBruce Richardson p = (const struct rte_acl_match_results *)
21099a2dd95SBruce Richardson (flows->trans + ctx->match_index);
21199a2dd95SBruce Richardson
21299a2dd95SBruce Richardson if (transition & RTE_ACL_NODE_MATCH) {
21399a2dd95SBruce Richardson
21499a2dd95SBruce Richardson /* Remove flags from index and decrement active traversals */
21599a2dd95SBruce Richardson transition &= RTE_ACL_NODE_INDEX;
21699a2dd95SBruce Richardson flows->started--;
21799a2dd95SBruce Richardson
21899a2dd95SBruce Richardson /* Resolve priorities for this trie and running results */
21999a2dd95SBruce Richardson if (flows->categories == 1)
22099a2dd95SBruce Richardson resolve_single_priority(transition, slot, ctx,
22199a2dd95SBruce Richardson parms, p);
22299a2dd95SBruce Richardson else
22399a2dd95SBruce Richardson resolve_priority(transition, slot, ctx, parms,
22499a2dd95SBruce Richardson p, flows->categories);
22599a2dd95SBruce Richardson
22699a2dd95SBruce Richardson /* Count down completed tries for this search request */
22799a2dd95SBruce Richardson parms[slot].cmplt->count--;
22899a2dd95SBruce Richardson
22999a2dd95SBruce Richardson /* Fill the slot with the next trie or idle trie */
23099a2dd95SBruce Richardson transition = acl_start_next_trie(flows, parms, slot, ctx);
23199a2dd95SBruce Richardson }
23299a2dd95SBruce Richardson
23399a2dd95SBruce Richardson return transition;
23499a2dd95SBruce Richardson }
23599a2dd95SBruce Richardson
23699a2dd95SBruce Richardson #endif /* _ACL_RUN_H_ */
237