xref: /dpdk/drivers/event/dsw/dsw_evdev.h (revision dc348f2e81a94dd3b8a32c2f882483227796905d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Ericsson AB
3  */
4 
5 #ifndef _DSW_EVDEV_H_
6 #define _DSW_EVDEV_H_
7 
8 #include <eventdev_pmd.h>
9 
10 #include <rte_event_ring.h>
11 #include <rte_eventdev.h>
12 
13 #define DSW_PMD_NAME RTE_STR(event_dsw)
14 
15 #define DSW_MAX_PORTS (64)
16 #define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
17 #define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
18 #define DSW_MAX_PORT_OUT_BUFFER (32)
19 
20 #define DSW_MAX_QUEUES (16)
21 
22 #define DSW_MAX_EVENTS (16384)
23 
24 /* Multiple 24-bit flow ids will map to the same DSW-level flow. The
25  * number of DSW flows should be high enough make it unlikely that
26  * flow ids of several large flows hash to the same DSW-level flow.
27  * Such collisions will limit parallelism and thus the number of cores
28  * that may be utilized. However, configuring a large number of DSW
29  * flows might potentially, depending on traffic and actual
30  * application flow id value range, result in each such DSW-level flow
31  * being very small. The effect of migrating such flows will be small,
32  * in terms amount of processing load redistributed. This will in turn
33  * reduce the load balancing speed, since flow migration rate has an
34  * upper limit. Code changes are required to allow > 32k DSW-level
35  * flows.
36  */
37 #define DSW_MAX_FLOWS_BITS (13)
38 #define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
39 #define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)
40 
41 /* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows,
42  * but the 'dsw' scheduler (more or less) randomly assign flow id to
43  * events on parallel queues, to be able to reuse some of the
44  * migration mechanism and scheduling logic from
45  * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a
46  * particular port, the likely-hood of events being scheduled to this
47  * port is reduced, and thus a kind of statistical load balancing is
48  * achieved.
49  */
50 #define DSW_PARALLEL_FLOWS (1024)
51 
52 /* 'Background tasks' are polling the control rings for *
53  *  migration-related messages, or flush the output buffer (so
54  *  buffered events doesn't linger too long). Shouldn't be too low,
55  *  since the system won't benefit from the 'batching' effects from
56  *  the output buffer, and shouldn't be too high, since it will make
57  *  buffered events linger too long in case the port goes idle.
58  */
59 #define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
60 
61 /* Avoid making small 'loans' from the central in-flight event credit
62  * pool, to improve efficiency.
63  */
64 #define DSW_MIN_CREDIT_LOAN (64)
65 #define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN)
66 #define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN)
67 
68 /* The rings are dimensioned so that all in-flight events can reside
69  * on any one of the port rings, to avoid the trouble of having to
70  * care about the case where there's no room on the destination port's
71  * input ring.
72  */
73 #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
74 
75 #define DSW_MAX_LOAD (INT16_MAX)
76 #define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
77 #define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
78 
79 /* The thought behind keeping the load update interval shorter than
80  * the migration interval is that the load from newly migrated flows
81  * should 'show up' on the load measurement before new migrations are
82  * considered. This is to avoid having too many flows, from too many
83  * source ports, to be migrated too quickly to a lightly loaded port -
84  * in particular since this might cause the system to oscillate.
85  */
86 #define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
87 #define DSW_OLD_LOAD_WEIGHT (1)
88 
89 /* The minimum time (in us) between two flow migrations. What puts an
90  * upper limit on the actual migration rate is primarily the pace in
91  * which the ports send and receive control messages, which in turn is
92  * largely a function of how much cycles are spent the processing of
93  * an event burst.
94  */
95 #define DSW_MIGRATION_INTERVAL (1000)
96 #define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
97 #define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
98 #define DSW_REBALANCE_THRESHOLD (DSW_LOAD_FROM_PERCENT(3))
99 
100 #define DSW_MAX_EVENTS_RECORDED (128)
101 
102 #define DSW_MAX_FLOWS_PER_MIGRATION (8)
103 
104 /* Only one outstanding migration per port is allowed */
105 #define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION)
106 
107 /* Enough room for pause request/confirm and unpaus request/confirm for
108  * all possible senders.
109  */
110 #define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
111 
112 /* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
113  * dequeue(), arrange events so that events with the same flow id on
114  * the same queue forms a back-to-back "burst", and also so that such
115  * bursts of different flow ids, but on the same queue, also come
116  * consecutively. All this in an attempt to improve data and
117  * instruction cache usage for the application, at the cost of a
118  * scheduler overhead increase.
119  */
120 
121 /* #define DSW_SORT_DEQUEUED */
122 
123 struct dsw_queue_flow {
124 	uint8_t queue_id;
125 	uint16_t flow_hash;
126 };
127 
128 enum dsw_migration_state {
129 	DSW_MIGRATION_STATE_IDLE,
130 	DSW_MIGRATION_STATE_PAUSING,
131 	DSW_MIGRATION_STATE_UNPAUSING
132 };
133 
134 struct dsw_port {
135 	uint16_t id;
136 
137 	/* Keeping a pointer here to avoid container_of() calls, which
138 	 * are expensive since they are very frequent and will result
139 	 * in an integer multiplication (since the port id is an index
140 	 * into the dsw_evdev port array).
141 	 */
142 	struct dsw_evdev *dsw;
143 
144 	uint16_t dequeue_depth;
145 	uint16_t enqueue_depth;
146 
147 	int32_t inflight_credits;
148 
149 	int32_t new_event_threshold;
150 
151 	uint16_t pending_releases;
152 
153 	uint16_t next_parallel_flow_id;
154 
155 	uint16_t ops_since_bg_task;
156 
157 	/* most recent 'background' processing */
158 	uint64_t last_bg;
159 
160 	/* For port load measurement. */
161 	uint64_t next_load_update;
162 	uint64_t load_update_interval;
163 	uint64_t measurement_start;
164 	uint64_t busy_start;
165 	uint64_t busy_cycles;
166 	uint64_t total_busy_cycles;
167 
168 	/* For the ctl interface and flow migration mechanism. */
169 	uint64_t next_emigration;
170 	uint64_t migration_interval;
171 	enum dsw_migration_state migration_state;
172 
173 	uint64_t emigration_start;
174 	uint64_t emigrations;
175 	uint64_t emigration_latency;
176 
177 	uint8_t emigration_target_port_ids[DSW_MAX_FLOWS_PER_MIGRATION];
178 	struct dsw_queue_flow
179 		emigration_target_qfs[DSW_MAX_FLOWS_PER_MIGRATION];
180 	uint8_t emigration_targets_len;
181 	uint8_t cfm_cnt;
182 
183 	uint64_t immigrations;
184 
185 	uint16_t paused_flows_len;
186 	struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
187 
188 	/* In a very contrived worst case all inflight events can be
189 	 * laying around paused here.
190 	 */
191 	uint16_t paused_events_len;
192 	struct rte_event paused_events[DSW_MAX_EVENTS];
193 
194 	uint16_t emigrating_events_len;
195 	/* Buffer for not-yet-processed events pertaining to a flow
196 	 * emigrating from this port. These events will be forwarded
197 	 * to the target port.
198 	 */
199 	struct rte_event emigrating_events[DSW_MAX_EVENTS];
200 
201 	uint16_t seen_events_len;
202 	uint16_t seen_events_idx;
203 	struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
204 
205 	uint64_t enqueue_calls;
206 	uint64_t new_enqueued;
207 	uint64_t forward_enqueued;
208 	uint64_t release_enqueued;
209 	uint64_t queue_enqueued[DSW_MAX_QUEUES];
210 
211 	uint64_t dequeue_calls;
212 	uint64_t dequeued;
213 	uint64_t queue_dequeued[DSW_MAX_QUEUES];
214 
215 	uint16_t out_buffer_len[DSW_MAX_PORTS];
216 	struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
217 
218 	uint16_t in_buffer_len;
219 	uint16_t in_buffer_start;
220 	/* This buffer may contain events that were read up from the
221 	 * in_ring during the flow migration process.
222 	 */
223 	struct rte_event in_buffer[DSW_MAX_EVENTS];
224 
225 	struct rte_event_ring *in_ring __rte_cache_aligned;
226 
227 	struct rte_ring *ctl_in_ring __rte_cache_aligned;
228 
229 	/* Estimate of current port load. */
230 	int16_t load __rte_cache_aligned;
231 	/* Estimate of flows currently migrating to this port. */
232 	int32_t immigration_load __rte_cache_aligned;
233 } __rte_cache_aligned;
234 
235 struct dsw_queue {
236 	uint8_t schedule_type;
237 	uint8_t serving_ports[DSW_MAX_PORTS];
238 	uint16_t num_serving_ports;
239 
240 	uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned;
241 };
242 
243 struct dsw_evdev {
244 	struct rte_eventdev_data *data;
245 
246 	struct dsw_port ports[DSW_MAX_PORTS];
247 	uint16_t num_ports;
248 	struct dsw_queue queues[DSW_MAX_QUEUES];
249 	uint8_t num_queues;
250 	int32_t max_inflight;
251 
252 	int32_t credits_on_loan __rte_cache_aligned;
253 };
254 
255 #define DSW_CTL_PAUS_REQ (0)
256 #define DSW_CTL_UNPAUS_REQ (1)
257 #define DSW_CTL_CFM (2)
258 
259 struct dsw_ctl_msg {
260 	uint8_t type;
261 	uint8_t originating_port_id;
262 	uint8_t qfs_len;
263 	struct dsw_queue_flow qfs[DSW_MAX_FLOWS_PER_MIGRATION];
264 } __rte_aligned(4);
265 
266 uint16_t dsw_event_enqueue(void *port, const struct rte_event *event);
267 uint16_t dsw_event_enqueue_burst(void *port,
268 				 const struct rte_event events[],
269 				 uint16_t events_len);
270 uint16_t dsw_event_enqueue_new_burst(void *port,
271 				     const struct rte_event events[],
272 				     uint16_t events_len);
273 uint16_t dsw_event_enqueue_forward_burst(void *port,
274 					 const struct rte_event events[],
275 					 uint16_t events_len);
276 
277 uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait);
278 uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
279 				 uint16_t num, uint64_t wait);
280 void dsw_event_maintain(void *port, int op);
281 
282 int dsw_xstats_get_names(const struct rte_eventdev *dev,
283 			 enum rte_event_dev_xstats_mode mode,
284 			 uint8_t queue_port_id,
285 			 struct rte_event_dev_xstats_name *xstats_names,
286 			 uint64_t *ids, unsigned int size);
287 int dsw_xstats_get(const struct rte_eventdev *dev,
288 		   enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
289 		   const uint64_t ids[], uint64_t values[], unsigned int n);
290 uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
291 				const char *name, uint64_t *id);
292 
293 static inline struct dsw_evdev *
294 dsw_pmd_priv(const struct rte_eventdev *eventdev)
295 {
296 	return eventdev->data->dev_private;
297 }
298 
299 #define DSW_LOG_DP(level, fmt, args...)					\
300 	RTE_LOG_DP(level, EVENTDEV, "[%s] %s() line %u: " fmt,		\
301 		   DSW_PMD_NAME,					\
302 		   __func__, __LINE__, ## args)
303 
304 #define DSW_LOG_DP_PORT(level, port_id, fmt, args...)		\
305 	DSW_LOG_DP(level, "<Port %d> " fmt, port_id, ## args)
306 
307 #endif
308