xref: /dpdk/drivers/event/dsw/dsw_evdev.h (revision fd51012de5369679e807be1d6a81d63ef15015ce)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Ericsson AB
3  */
4 
5 #ifndef _DSW_EVDEV_H_
6 #define _DSW_EVDEV_H_
7 
8 #include <eventdev_pmd.h>
9 
10 #include <rte_bitset.h>
11 #include <rte_event_ring.h>
12 #include <rte_eventdev.h>
13 
14 #define DSW_PMD_NAME RTE_STR(event_dsw)
15 
16 #define DSW_MAX_PORTS (64)
17 #define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
18 #define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
19 #define DSW_MAX_PORT_OUT_BUFFER (32)
20 
21 #define DSW_MAX_QUEUES (16)
22 
23 #define DSW_MAX_EVENTS (16384)
24 
25 /* Multiple 24-bit flow ids will map to the same DSW-level flow. The
26  * number of DSW flows should be high enough make it unlikely that
27  * flow ids of several large flows hash to the same DSW-level flow.
28  * Such collisions will limit parallelism and thus the number of cores
29  * that may be utilized. However, configuring a large number of DSW
30  * flows might potentially, depending on traffic and actual
31  * application flow id value range, result in each such DSW-level flow
32  * being very small. The effect of migrating such flows will be small,
33  * in terms amount of processing load redistributed. This will in turn
34  * reduce the load balancing speed, since flow migration rate has an
35  * upper limit. Code changes are required to allow > 32k DSW-level
36  * flows.
37  */
38 #define DSW_MAX_FLOWS_BITS (13)
39 #define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
40 #define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)
41 
42 /* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows,
43  * but the 'dsw' scheduler (more or less) randomly assign flow id to
44  * events on parallel queues, to be able to reuse some of the
45  * migration mechanism and scheduling logic from
46  * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a
47  * particular port, the likely-hood of events being scheduled to this
48  * port is reduced, and thus a kind of statistical load balancing is
49  * achieved.
50  */
51 #define DSW_PARALLEL_FLOWS (1024)
52 
53 /* 'Background tasks' are polling the control rings for *
54  *  migration-related messages, or flush the output buffer (so
55  *  buffered events doesn't linger too long). Shouldn't be too low,
56  *  since the system won't benefit from the 'batching' effects from
57  *  the output buffer, and shouldn't be too high, since it will make
58  *  buffered events linger too long in case the port goes idle.
59  */
60 #define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
61 
62 /* Avoid making small 'loans' from the central in-flight event credit
63  * pool, to improve efficiency.
64  */
65 #define DSW_MIN_CREDIT_LOAN (64)
66 #define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN)
67 #define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN)
68 
69 /* The rings are dimensioned so that all in-flight events can reside
70  * on any one of the port rings, to avoid the trouble of having to
71  * care about the case where there's no room on the destination port's
72  * input ring.
73  */
74 #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
75 
76 #define DSW_MAX_LOAD (INT16_MAX)
77 #define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
78 #define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
79 
80 /* The thought behind keeping the load update interval shorter than
81  * the migration interval is that the load from newly migrated flows
82  * should 'show up' on the load measurement before new migrations are
83  * considered. This is to avoid having too many flows, from too many
84  * source ports, to be migrated too quickly to a lightly loaded port -
85  * in particular since this might cause the system to oscillate.
86  */
87 #define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
88 #define DSW_OLD_LOAD_WEIGHT (1)
89 
90 /* The minimum time (in us) between two flow migrations. What puts an
91  * upper limit on the actual migration rate is primarily the pace in
92  * which the ports send and receive control messages, which in turn is
93  * largely a function of how much cycles are spent the processing of
94  * an event burst.
95  */
96 #define DSW_MIGRATION_INTERVAL (1000)
97 #define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
98 #define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
99 #define DSW_REBALANCE_THRESHOLD (DSW_LOAD_FROM_PERCENT(3))
100 
101 #define DSW_MAX_EVENTS_RECORDED (128)
102 
103 #define DSW_MAX_FLOWS_PER_MIGRATION (8)
104 
105 /* Only one outstanding migration per port is allowed */
106 #define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION)
107 
108 /* Enough room for pause request/confirm and unpaus request/confirm for
109  * all possible senders.
110  */
111 #define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
112 
113 /* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
114  * dequeue(), arrange events so that events with the same flow id on
115  * the same queue forms a back-to-back "burst", and also so that such
116  * bursts of different flow ids, but on the same queue, also come
117  * consecutively. All this in an attempt to improve data and
118  * instruction cache usage for the application, at the cost of a
119  * scheduler overhead increase.
120  */
121 
122 /* #define DSW_SORT_DEQUEUED */
123 
124 struct dsw_queue_flow {
125 	uint8_t queue_id;
126 	uint16_t flow_hash;
127 };
128 
129 enum dsw_migration_state {
130 	DSW_MIGRATION_STATE_IDLE,
131 	DSW_MIGRATION_STATE_FINISH_PENDING,
132 	DSW_MIGRATION_STATE_PAUSING,
133 	DSW_MIGRATION_STATE_UNPAUSING
134 };
135 
136 struct __rte_cache_aligned dsw_port {
137 	uint16_t id;
138 
139 	/* Keeping a pointer here to avoid container_of() calls, which
140 	 * are expensive since they are very frequent and will result
141 	 * in an integer multiplication (since the port id is an index
142 	 * into the dsw_evdev port array).
143 	 */
144 	struct dsw_evdev *dsw;
145 
146 	uint16_t dequeue_depth;
147 	uint16_t enqueue_depth;
148 
149 	int32_t inflight_credits;
150 
151 	int32_t new_event_threshold;
152 
153 	bool implicit_release;
154 
155 	uint16_t pending_releases;
156 
157 	uint16_t next_parallel_flow_id;
158 
159 	uint16_t ops_since_bg_task;
160 
161 	/* most recent 'background' processing */
162 	uint64_t last_bg;
163 
164 	/* For port load measurement. */
165 	uint64_t next_load_update;
166 	uint64_t load_update_interval;
167 	uint64_t measurement_start;
168 	uint64_t busy_start;
169 	uint64_t busy_cycles;
170 	uint64_t total_busy_cycles;
171 
172 	/* For the ctl interface and flow migration mechanism. */
173 	uint64_t next_emigration;
174 	uint64_t migration_interval;
175 	enum dsw_migration_state migration_state;
176 
177 	uint64_t emigration_start;
178 	uint64_t emigrations;
179 	uint64_t emigration_latency;
180 
181 	uint8_t emigration_target_port_ids[DSW_MAX_FLOWS_PER_MIGRATION];
182 	struct dsw_queue_flow
183 		emigration_target_qfs[DSW_MAX_FLOWS_PER_MIGRATION];
184 	uint8_t emigration_targets_len;
185 	uint8_t cfm_cnt;
186 
187 	uint64_t immigrations;
188 
189 	uint16_t paused_flows_len;
190 	struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
191 
192 	/* In a very contrived worst case all inflight events can be
193 	 * laying around paused here.
194 	 */
195 	uint16_t paused_events_len;
196 	struct rte_event paused_events[DSW_MAX_EVENTS];
197 
198 	uint16_t emigrating_events_len;
199 	/* Buffer for not-yet-processed events pertaining to a flow
200 	 * emigrating from this port. These events will be forwarded
201 	 * to the target port.
202 	 */
203 	struct rte_event emigrating_events[DSW_MAX_EVENTS];
204 
205 	uint16_t seen_events_len;
206 	uint16_t seen_events_idx;
207 	struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
208 
209 	uint64_t enqueue_calls;
210 	uint64_t new_enqueued;
211 	uint64_t forward_enqueued;
212 	uint64_t release_enqueued;
213 	uint64_t queue_enqueued[DSW_MAX_QUEUES];
214 
215 	uint64_t dequeue_calls;
216 	uint64_t dequeued;
217 	uint64_t queue_dequeued[DSW_MAX_QUEUES];
218 
219 	uint16_t out_buffer_len[DSW_MAX_PORTS];
220 	struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
221 
222 	uint16_t in_buffer_len;
223 	uint16_t in_buffer_start;
224 	/* This buffer may contain events that were read up from the
225 	 * in_ring during the flow migration process.
226 	 */
227 	struct rte_event in_buffer[DSW_MAX_EVENTS];
228 
229 	alignas(RTE_CACHE_LINE_SIZE) struct rte_event_ring *in_ring;
230 
231 	alignas(RTE_CACHE_LINE_SIZE) struct rte_ring *ctl_in_ring;
232 
233 	/* Estimate of current port load. */
234 	alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int16_t) load;
235 	/* Estimate of flows currently migrating to this port. */
236 	alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) immigration_load;
237 };
238 
239 struct dsw_queue {
240 	uint8_t schedule_type;
241 	RTE_BITSET_DECLARE(serving_ports, DSW_MAX_PORTS);
242 	uint16_t num_serving_ports;
243 
244 	alignas(RTE_CACHE_LINE_SIZE) uint8_t flow_to_port_map[DSW_MAX_FLOWS];
245 };
246 
247 /* Limited by the size of the 'serving_ports' bitmask */
248 static_assert(DSW_MAX_PORTS <= 64, "Max compile-time port count exceeded");
249 
250 struct dsw_evdev {
251 	struct rte_eventdev_data *data;
252 
253 	struct dsw_port ports[DSW_MAX_PORTS];
254 	uint16_t num_ports;
255 	struct dsw_queue queues[DSW_MAX_QUEUES];
256 	uint8_t num_queues;
257 	int32_t max_inflight;
258 
259 	alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) credits_on_loan;
260 };
261 
262 #define DSW_CTL_PAUSE_REQ (0)
263 #define DSW_CTL_UNPAUSE_REQ (1)
264 #define DSW_CTL_CFM (2)
265 
266 struct __rte_aligned(4) dsw_ctl_msg {
267 	uint8_t type;
268 	uint8_t originating_port_id;
269 	uint8_t qfs_len;
270 	struct dsw_queue_flow qfs[DSW_MAX_FLOWS_PER_MIGRATION];
271 };
272 
273 uint16_t dsw_event_enqueue_burst(void *port,
274 				 const struct rte_event events[],
275 				 uint16_t events_len);
276 uint16_t dsw_event_enqueue_new_burst(void *port,
277 				     const struct rte_event events[],
278 				     uint16_t events_len);
279 uint16_t dsw_event_enqueue_forward_burst(void *port,
280 					 const struct rte_event events[],
281 					 uint16_t events_len);
282 
283 uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
284 				 uint16_t num, uint64_t wait);
285 void dsw_event_maintain(void *port, int op);
286 
287 int dsw_xstats_get_names(const struct rte_eventdev *dev,
288 			 enum rte_event_dev_xstats_mode mode,
289 			 uint8_t queue_port_id,
290 			 struct rte_event_dev_xstats_name *xstats_names,
291 			 uint64_t *ids, unsigned int size);
292 int dsw_xstats_get(const struct rte_eventdev *dev,
293 		   enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
294 		   const uint64_t ids[], uint64_t values[], unsigned int n);
295 uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
296 				const char *name, uint64_t *id);
297 
298 static inline struct dsw_evdev *
299 dsw_pmd_priv(const struct rte_eventdev *eventdev)
300 {
301 	return eventdev->data->dev_private;
302 }
303 
304 extern int event_dsw_logtype;
305 #define RTE_LOGTYPE_EVENT_DSW event_dsw_logtype
306 #define DSW_LOG_DP_LINE(level, fmt, ...)				\
307 	RTE_LOG_DP_LINE(level, EVENT_DSW, "%s() line %u: " fmt,		\
308 		   __func__, __LINE__, ## __VA_ARGS__)
309 
310 #define DSW_LOG_DP_PORT_LINE(level, port_id, fmt, ...)			\
311 	DSW_LOG_DP_LINE(level, "<Port %d> " fmt, port_id, ## __VA_ARGS__)
312 
313 #endif
314