1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Ericsson AB 3 */ 4 5 #ifndef _DSW_EVDEV_H_ 6 #define _DSW_EVDEV_H_ 7 8 #include <eventdev_pmd.h> 9 10 #include <rte_bitset.h> 11 #include <rte_event_ring.h> 12 #include <rte_eventdev.h> 13 14 #define DSW_PMD_NAME RTE_STR(event_dsw) 15 16 #define DSW_MAX_PORTS (64) 17 #define DSW_MAX_PORT_DEQUEUE_DEPTH (128) 18 #define DSW_MAX_PORT_ENQUEUE_DEPTH (128) 19 #define DSW_MAX_PORT_OUT_BUFFER (32) 20 21 #define DSW_MAX_QUEUES (16) 22 23 #define DSW_MAX_EVENTS (16384) 24 25 /* Multiple 24-bit flow ids will map to the same DSW-level flow. The 26 * number of DSW flows should be high enough make it unlikely that 27 * flow ids of several large flows hash to the same DSW-level flow. 28 * Such collisions will limit parallelism and thus the number of cores 29 * that may be utilized. However, configuring a large number of DSW 30 * flows might potentially, depending on traffic and actual 31 * application flow id value range, result in each such DSW-level flow 32 * being very small. The effect of migrating such flows will be small, 33 * in terms amount of processing load redistributed. This will in turn 34 * reduce the load balancing speed, since flow migration rate has an 35 * upper limit. Code changes are required to allow > 32k DSW-level 36 * flows. 37 */ 38 #define DSW_MAX_FLOWS_BITS (13) 39 #define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS)) 40 #define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1) 41 42 /* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows, 43 * but the 'dsw' scheduler (more or less) randomly assign flow id to 44 * events on parallel queues, to be able to reuse some of the 45 * migration mechanism and scheduling logic from 46 * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a 47 * particular port, the likely-hood of events being scheduled to this 48 * port is reduced, and thus a kind of statistical load balancing is 49 * achieved. 50 */ 51 #define DSW_PARALLEL_FLOWS (1024) 52 53 /* 'Background tasks' are polling the control rings for * 54 * migration-related messages, or flush the output buffer (so 55 * buffered events doesn't linger too long). Shouldn't be too low, 56 * since the system won't benefit from the 'batching' effects from 57 * the output buffer, and shouldn't be too high, since it will make 58 * buffered events linger too long in case the port goes idle. 59 */ 60 #define DSW_MAX_PORT_OPS_PER_BG_TASK (128) 61 62 /* Avoid making small 'loans' from the central in-flight event credit 63 * pool, to improve efficiency. 64 */ 65 #define DSW_MIN_CREDIT_LOAN (64) 66 #define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN) 67 #define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN) 68 69 /* The rings are dimensioned so that all in-flight events can reside 70 * on any one of the port rings, to avoid the trouble of having to 71 * care about the case where there's no room on the destination port's 72 * input ring. 73 */ 74 #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS) 75 76 #define DSW_MAX_LOAD (INT16_MAX) 77 #define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100)) 78 #define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD) 79 80 /* The thought behind keeping the load update interval shorter than 81 * the migration interval is that the load from newly migrated flows 82 * should 'show up' on the load measurement before new migrations are 83 * considered. This is to avoid having too many flows, from too many 84 * source ports, to be migrated too quickly to a lightly loaded port - 85 * in particular since this might cause the system to oscillate. 86 */ 87 #define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4) 88 #define DSW_OLD_LOAD_WEIGHT (1) 89 90 /* The minimum time (in us) between two flow migrations. What puts an 91 * upper limit on the actual migration rate is primarily the pace in 92 * which the ports send and receive control messages, which in turn is 93 * largely a function of how much cycles are spent the processing of 94 * an event burst. 95 */ 96 #define DSW_MIGRATION_INTERVAL (1000) 97 #define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70)) 98 #define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95)) 99 #define DSW_REBALANCE_THRESHOLD (DSW_LOAD_FROM_PERCENT(3)) 100 101 #define DSW_MAX_EVENTS_RECORDED (128) 102 103 #define DSW_MAX_FLOWS_PER_MIGRATION (8) 104 105 /* Only one outstanding migration per port is allowed */ 106 #define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION) 107 108 /* Enough room for pause request/confirm and unpaus request/confirm for 109 * all possible senders. 110 */ 111 #define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4) 112 113 /* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of 114 * dequeue(), arrange events so that events with the same flow id on 115 * the same queue forms a back-to-back "burst", and also so that such 116 * bursts of different flow ids, but on the same queue, also come 117 * consecutively. All this in an attempt to improve data and 118 * instruction cache usage for the application, at the cost of a 119 * scheduler overhead increase. 120 */ 121 122 /* #define DSW_SORT_DEQUEUED */ 123 124 struct dsw_queue_flow { 125 uint8_t queue_id; 126 uint16_t flow_hash; 127 }; 128 129 enum dsw_migration_state { 130 DSW_MIGRATION_STATE_IDLE, 131 DSW_MIGRATION_STATE_FINISH_PENDING, 132 DSW_MIGRATION_STATE_PAUSING, 133 DSW_MIGRATION_STATE_UNPAUSING 134 }; 135 136 struct __rte_cache_aligned dsw_port { 137 uint16_t id; 138 139 /* Keeping a pointer here to avoid container_of() calls, which 140 * are expensive since they are very frequent and will result 141 * in an integer multiplication (since the port id is an index 142 * into the dsw_evdev port array). 143 */ 144 struct dsw_evdev *dsw; 145 146 uint16_t dequeue_depth; 147 uint16_t enqueue_depth; 148 149 int32_t inflight_credits; 150 151 int32_t new_event_threshold; 152 153 bool implicit_release; 154 155 uint16_t pending_releases; 156 157 uint16_t next_parallel_flow_id; 158 159 uint16_t ops_since_bg_task; 160 161 /* most recent 'background' processing */ 162 uint64_t last_bg; 163 164 /* For port load measurement. */ 165 uint64_t next_load_update; 166 uint64_t load_update_interval; 167 uint64_t measurement_start; 168 uint64_t busy_start; 169 uint64_t busy_cycles; 170 uint64_t total_busy_cycles; 171 172 /* For the ctl interface and flow migration mechanism. */ 173 uint64_t next_emigration; 174 uint64_t migration_interval; 175 enum dsw_migration_state migration_state; 176 177 uint64_t emigration_start; 178 uint64_t emigrations; 179 uint64_t emigration_latency; 180 181 uint8_t emigration_target_port_ids[DSW_MAX_FLOWS_PER_MIGRATION]; 182 struct dsw_queue_flow 183 emigration_target_qfs[DSW_MAX_FLOWS_PER_MIGRATION]; 184 uint8_t emigration_targets_len; 185 uint8_t cfm_cnt; 186 187 uint64_t immigrations; 188 189 uint16_t paused_flows_len; 190 struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS]; 191 192 /* In a very contrived worst case all inflight events can be 193 * laying around paused here. 194 */ 195 uint16_t paused_events_len; 196 struct rte_event paused_events[DSW_MAX_EVENTS]; 197 198 uint16_t emigrating_events_len; 199 /* Buffer for not-yet-processed events pertaining to a flow 200 * emigrating from this port. These events will be forwarded 201 * to the target port. 202 */ 203 struct rte_event emigrating_events[DSW_MAX_EVENTS]; 204 205 uint16_t seen_events_len; 206 uint16_t seen_events_idx; 207 struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED]; 208 209 uint64_t enqueue_calls; 210 uint64_t new_enqueued; 211 uint64_t forward_enqueued; 212 uint64_t release_enqueued; 213 uint64_t queue_enqueued[DSW_MAX_QUEUES]; 214 215 uint64_t dequeue_calls; 216 uint64_t dequeued; 217 uint64_t queue_dequeued[DSW_MAX_QUEUES]; 218 219 uint16_t out_buffer_len[DSW_MAX_PORTS]; 220 struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER]; 221 222 uint16_t in_buffer_len; 223 uint16_t in_buffer_start; 224 /* This buffer may contain events that were read up from the 225 * in_ring during the flow migration process. 226 */ 227 struct rte_event in_buffer[DSW_MAX_EVENTS]; 228 229 alignas(RTE_CACHE_LINE_SIZE) struct rte_event_ring *in_ring; 230 231 alignas(RTE_CACHE_LINE_SIZE) struct rte_ring *ctl_in_ring; 232 233 /* Estimate of current port load. */ 234 alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int16_t) load; 235 /* Estimate of flows currently migrating to this port. */ 236 alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) immigration_load; 237 }; 238 239 struct dsw_queue { 240 uint8_t schedule_type; 241 RTE_BITSET_DECLARE(serving_ports, DSW_MAX_PORTS); 242 uint16_t num_serving_ports; 243 244 alignas(RTE_CACHE_LINE_SIZE) uint8_t flow_to_port_map[DSW_MAX_FLOWS]; 245 }; 246 247 /* Limited by the size of the 'serving_ports' bitmask */ 248 static_assert(DSW_MAX_PORTS <= 64, "Max compile-time port count exceeded"); 249 250 struct dsw_evdev { 251 struct rte_eventdev_data *data; 252 253 struct dsw_port ports[DSW_MAX_PORTS]; 254 uint16_t num_ports; 255 struct dsw_queue queues[DSW_MAX_QUEUES]; 256 uint8_t num_queues; 257 int32_t max_inflight; 258 259 alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) credits_on_loan; 260 }; 261 262 #define DSW_CTL_PAUSE_REQ (0) 263 #define DSW_CTL_UNPAUSE_REQ (1) 264 #define DSW_CTL_CFM (2) 265 266 struct __rte_aligned(4) dsw_ctl_msg { 267 uint8_t type; 268 uint8_t originating_port_id; 269 uint8_t qfs_len; 270 struct dsw_queue_flow qfs[DSW_MAX_FLOWS_PER_MIGRATION]; 271 }; 272 273 uint16_t dsw_event_enqueue_burst(void *port, 274 const struct rte_event events[], 275 uint16_t events_len); 276 uint16_t dsw_event_enqueue_new_burst(void *port, 277 const struct rte_event events[], 278 uint16_t events_len); 279 uint16_t dsw_event_enqueue_forward_burst(void *port, 280 const struct rte_event events[], 281 uint16_t events_len); 282 283 uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events, 284 uint16_t num, uint64_t wait); 285 void dsw_event_maintain(void *port, int op); 286 287 int dsw_xstats_get_names(const struct rte_eventdev *dev, 288 enum rte_event_dev_xstats_mode mode, 289 uint8_t queue_port_id, 290 struct rte_event_dev_xstats_name *xstats_names, 291 uint64_t *ids, unsigned int size); 292 int dsw_xstats_get(const struct rte_eventdev *dev, 293 enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, 294 const uint64_t ids[], uint64_t values[], unsigned int n); 295 uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev, 296 const char *name, uint64_t *id); 297 298 static inline struct dsw_evdev * 299 dsw_pmd_priv(const struct rte_eventdev *eventdev) 300 { 301 return eventdev->data->dev_private; 302 } 303 304 extern int event_dsw_logtype; 305 #define RTE_LOGTYPE_EVENT_DSW event_dsw_logtype 306 #define DSW_LOG_DP_LINE(level, fmt, ...) \ 307 RTE_LOG_DP_LINE(level, EVENT_DSW, "%s() line %u: " fmt, \ 308 __func__, __LINE__, ## __VA_ARGS__) 309 310 #define DSW_LOG_DP_PORT_LINE(level, port_id, fmt, ...) \ 311 DSW_LOG_DP_LINE(level, "<Port %d> " fmt, port_id, ## __VA_ARGS__) 312 313 #endif 314