xref: /dpdk/lib/distributor/distributor_private.h (revision c6552d9a8deffa448de2d5e2e726f50508c1efd2)
199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause
299a2dd95SBruce Richardson  * Copyright(c) 2017 Intel Corporation
399a2dd95SBruce Richardson  */
499a2dd95SBruce Richardson 
599a2dd95SBruce Richardson #ifndef _DIST_PRIV_H_
699a2dd95SBruce Richardson #define _DIST_PRIV_H_
799a2dd95SBruce Richardson 
8e9fd1ebfSTyler Retzlaff #include <stdalign.h>
9e9fd1ebfSTyler Retzlaff 
1099a2dd95SBruce Richardson /**
1199a2dd95SBruce Richardson  * @file
1299a2dd95SBruce Richardson  * RTE distributor
1399a2dd95SBruce Richardson  *
1499a2dd95SBruce Richardson  * The distributor is a component which is designed to pass packets
1599a2dd95SBruce Richardson  * one-at-a-time to workers, with dynamic load balancing.
1699a2dd95SBruce Richardson  */
1799a2dd95SBruce Richardson 
1899a2dd95SBruce Richardson #define NO_FLAGS 0
1999a2dd95SBruce Richardson #define RTE_DISTRIB_PREFIX "DT_"
2099a2dd95SBruce Richardson 
2199a2dd95SBruce Richardson /*
2299a2dd95SBruce Richardson  * We will use the bottom four bits of pointer for flags, shifting out
2399a2dd95SBruce Richardson  * the top four bits to make room (since a 64-bit pointer actually only uses
2499a2dd95SBruce Richardson  * 48 bits). An arithmetic-right-shift will then appropriately restore the
2599a2dd95SBruce Richardson  * original pointer value with proper sign extension into the top bits.
2699a2dd95SBruce Richardson  */
2799a2dd95SBruce Richardson #define RTE_DISTRIB_FLAG_BITS 4
2899a2dd95SBruce Richardson #define RTE_DISTRIB_FLAGS_MASK (0x0F)
2999a2dd95SBruce Richardson #define RTE_DISTRIB_NO_BUF 0       /**< empty flags: no buffer requested */
3099a2dd95SBruce Richardson #define RTE_DISTRIB_GET_BUF (1)    /**< worker requests a buffer, returns old */
3199a2dd95SBruce Richardson #define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */
3299a2dd95SBruce Richardson #define RTE_DISTRIB_VALID_BUF (4)  /**< set if bufptr contains ptr */
3399a2dd95SBruce Richardson 
3499a2dd95SBruce Richardson #define RTE_DISTRIB_BACKLOG_SIZE 8
3599a2dd95SBruce Richardson #define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1)
3699a2dd95SBruce Richardson 
3799a2dd95SBruce Richardson #define RTE_DISTRIB_MAX_RETURNS 128
3899a2dd95SBruce Richardson #define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1)
3999a2dd95SBruce Richardson 
4099a2dd95SBruce Richardson /**
4199a2dd95SBruce Richardson  * Maximum number of workers allowed.
4299a2dd95SBruce Richardson  * Be aware of increasing the limit, because it is limited by how we track
4399a2dd95SBruce Richardson  * in-flight tags. See in_flight_bitmask and rte_distributor_process
4499a2dd95SBruce Richardson  */
4599a2dd95SBruce Richardson #define RTE_DISTRIB_MAX_WORKERS 64
4699a2dd95SBruce Richardson 
4799a2dd95SBruce Richardson #define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */
4899a2dd95SBruce Richardson 
4999a2dd95SBruce Richardson /**
5099a2dd95SBruce Richardson  * Buffer structure used to pass the pointer data between cores. This is cache
5199a2dd95SBruce Richardson  * line aligned, but to improve performance and prevent adjacent cache-line
5299a2dd95SBruce Richardson  * prefetches of buffers for other workers, e.g. when worker 1's buffer is on
5399a2dd95SBruce Richardson  * the next cache line to worker 0, we pad this out to three cache lines.
5499a2dd95SBruce Richardson  * Only 64-bits of the memory is actually used though.
5599a2dd95SBruce Richardson  */
56*c6552d9aSTyler Retzlaff union __rte_cache_aligned rte_distributor_buffer_single {
57c2a363a3STyler Retzlaff 	volatile RTE_ATOMIC(int64_t) bufptr64;
5899a2dd95SBruce Richardson 	char pad[RTE_CACHE_LINE_SIZE*3];
59*c6552d9aSTyler Retzlaff };
6099a2dd95SBruce Richardson 
6199a2dd95SBruce Richardson /*
6299a2dd95SBruce Richardson  * Transfer up to 8 mbufs at a time to/from workers, and
6399a2dd95SBruce Richardson  * flow matching algorithm optimized for 8 flow IDs at a time
6499a2dd95SBruce Richardson  */
6599a2dd95SBruce Richardson #define RTE_DIST_BURST_SIZE 8
6699a2dd95SBruce Richardson 
67*c6552d9aSTyler Retzlaff struct __rte_cache_aligned rte_distributor_backlog {
6899a2dd95SBruce Richardson 	unsigned int start;
6999a2dd95SBruce Richardson 	unsigned int count;
70e9fd1ebfSTyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) int64_t pkts[RTE_DIST_BURST_SIZE];
7199a2dd95SBruce Richardson 	uint16_t *tags; /* will point to second cacheline of inflights */
72*c6552d9aSTyler Retzlaff };
7399a2dd95SBruce Richardson 
7499a2dd95SBruce Richardson 
7599a2dd95SBruce Richardson struct rte_distributor_returned_pkts {
7699a2dd95SBruce Richardson 	unsigned int start;
7799a2dd95SBruce Richardson 	unsigned int count;
7899a2dd95SBruce Richardson 	struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS];
7999a2dd95SBruce Richardson };
8099a2dd95SBruce Richardson 
8199a2dd95SBruce Richardson struct rte_distributor_single {
8299a2dd95SBruce Richardson 	TAILQ_ENTRY(rte_distributor_single) next;    /**< Next in list. */
8399a2dd95SBruce Richardson 
8499a2dd95SBruce Richardson 	char name[RTE_DISTRIBUTOR_NAMESIZE];  /**< Name of the ring. */
8599a2dd95SBruce Richardson 	unsigned int num_workers;             /**< Number of workers polling */
8699a2dd95SBruce Richardson 
8799a2dd95SBruce Richardson 	uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS];
8899a2dd95SBruce Richardson 		/**< Tracks the tag being processed per core */
8999a2dd95SBruce Richardson 	uint64_t in_flight_bitmask;
9099a2dd95SBruce Richardson 		/**< on/off bits for in-flight tags.
9199a2dd95SBruce Richardson 		 * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then
9299a2dd95SBruce Richardson 		 * the bitmask has to expand.
9399a2dd95SBruce Richardson 		 */
9499a2dd95SBruce Richardson 
9599a2dd95SBruce Richardson 	struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS];
9699a2dd95SBruce Richardson 
9799a2dd95SBruce Richardson 	union rte_distributor_buffer_single bufs[RTE_DISTRIB_MAX_WORKERS];
9899a2dd95SBruce Richardson 
9999a2dd95SBruce Richardson 	struct rte_distributor_returned_pkts returns;
10099a2dd95SBruce Richardson };
10199a2dd95SBruce Richardson 
10299a2dd95SBruce Richardson /* All different signature compare functions */
10399a2dd95SBruce Richardson enum rte_distributor_match_function {
10499a2dd95SBruce Richardson 	RTE_DIST_MATCH_SCALAR = 0,
10599a2dd95SBruce Richardson 	RTE_DIST_MATCH_VECTOR,
10699a2dd95SBruce Richardson 	RTE_DIST_NUM_MATCH_FNS
10799a2dd95SBruce Richardson };
10899a2dd95SBruce Richardson 
10999a2dd95SBruce Richardson /**
11099a2dd95SBruce Richardson  * Buffer structure used to pass the pointer data between cores. This is cache
11199a2dd95SBruce Richardson  * line aligned, but to improve performance and prevent adjacent cache-line
11299a2dd95SBruce Richardson  * prefetches of buffers for other workers, e.g. when worker 1's buffer is on
11399a2dd95SBruce Richardson  * the next cache line to worker 0, we pad this out to two cache lines.
11499a2dd95SBruce Richardson  * We can pass up to 8 mbufs at a time in one cacheline.
11599a2dd95SBruce Richardson  * There is a separate cacheline for returns in the burst API.
11699a2dd95SBruce Richardson  */
11799a2dd95SBruce Richardson struct rte_distributor_buffer {
118e9fd1ebfSTyler Retzlaff 	volatile alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int64_t) bufptr64[RTE_DIST_BURST_SIZE];
119e9fd1ebfSTyler Retzlaff 		/* <= outgoing to worker */
12099a2dd95SBruce Richardson 
121e9fd1ebfSTyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) int64_t pad1;    /* <= one cache line  */
12299a2dd95SBruce Richardson 
123e9fd1ebfSTyler Retzlaff 	volatile alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int64_t) retptr64[RTE_DIST_BURST_SIZE];
124e9fd1ebfSTyler Retzlaff 		/* <= incoming from worker */
12599a2dd95SBruce Richardson 
126e9fd1ebfSTyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) int64_t pad2;    /* <= one cache line  */
12799a2dd95SBruce Richardson 
128e9fd1ebfSTyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) int count;       /* <= number of current mbufs */
12999a2dd95SBruce Richardson };
13099a2dd95SBruce Richardson 
13199a2dd95SBruce Richardson struct rte_distributor {
13299a2dd95SBruce Richardson 	TAILQ_ENTRY(rte_distributor) next;    /**< Next in list. */
13399a2dd95SBruce Richardson 
13499a2dd95SBruce Richardson 	char name[RTE_DISTRIBUTOR_NAMESIZE];  /**< Name of the ring. */
13599a2dd95SBruce Richardson 	unsigned int num_workers;             /**< Number of workers polling */
13699a2dd95SBruce Richardson 	unsigned int alg_type;                /**< Number of alg types */
13799a2dd95SBruce Richardson 
13899a2dd95SBruce Richardson 	/**>
13999a2dd95SBruce Richardson 	 * First cache line in the this array are the tags inflight
14099a2dd95SBruce Richardson 	 * on the worker core. Second cache line are the backlog
14199a2dd95SBruce Richardson 	 * that are going to go to the worker core.
14299a2dd95SBruce Richardson 	 */
143e9fd1ebfSTyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) uint16_t
144e9fd1ebfSTyler Retzlaff 		in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2];
14599a2dd95SBruce Richardson 
146e9fd1ebfSTyler Retzlaff 	alignas(RTE_CACHE_LINE_SIZE) struct rte_distributor_backlog
147e9fd1ebfSTyler Retzlaff 		backlog[RTE_DISTRIB_MAX_WORKERS];
14899a2dd95SBruce Richardson 
14999a2dd95SBruce Richardson 	struct rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS];
15099a2dd95SBruce Richardson 
15199a2dd95SBruce Richardson 	struct rte_distributor_returned_pkts returns;
15299a2dd95SBruce Richardson 
15399a2dd95SBruce Richardson 	enum rte_distributor_match_function dist_match_fn;
15499a2dd95SBruce Richardson 
15599a2dd95SBruce Richardson 	struct rte_distributor_single *d_single;
15699a2dd95SBruce Richardson 
15799a2dd95SBruce Richardson 	uint8_t active[RTE_DISTRIB_MAX_WORKERS];
15899a2dd95SBruce Richardson 	uint8_t activesum;
15999a2dd95SBruce Richardson };
16099a2dd95SBruce Richardson 
16199a2dd95SBruce Richardson void
16299a2dd95SBruce Richardson find_match_scalar(struct rte_distributor *d,
16399a2dd95SBruce Richardson 			uint16_t *data_ptr,
16499a2dd95SBruce Richardson 			uint16_t *output_ptr);
16599a2dd95SBruce Richardson 
16699a2dd95SBruce Richardson void
16799a2dd95SBruce Richardson find_match_vec(struct rte_distributor *d,
16899a2dd95SBruce Richardson 			uint16_t *data_ptr,
16999a2dd95SBruce Richardson 			uint16_t *output_ptr);
17099a2dd95SBruce Richardson 
17199a2dd95SBruce Richardson #endif /* _DIST_PRIV_H_ */
172