1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #include "test_perf_common.h" 6 7 /* See http://doc.dpdk.org/guides/tools/testeventdev.html for test details */ 8 9 static inline int 10 perf_queue_nb_event_queues(struct evt_options *opt) 11 { 12 /* nb_queues = number of producers * number of stages */ 13 uint8_t nb_prod = opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR ? 14 rte_eth_dev_count_avail() : evt_nr_active_lcores(opt->plcores); 15 return nb_prod * opt->nb_stages; 16 } 17 18 static __rte_always_inline void 19 fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list, 20 const uint8_t nb_stages) 21 { 22 ev->queue_id++; 23 ev->sched_type = sched_type_list[ev->queue_id % nb_stages]; 24 ev->op = RTE_EVENT_OP_FORWARD; 25 ev->event_type = RTE_EVENT_TYPE_CPU; 26 } 27 28 static __rte_always_inline void 29 fwd_event_vector(struct rte_event *const ev, uint8_t *const sched_type_list, 30 const uint8_t nb_stages) 31 { 32 ev->queue_id++; 33 ev->sched_type = sched_type_list[ev->queue_id % nb_stages]; 34 ev->op = RTE_EVENT_OP_FORWARD; 35 ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR; 36 } 37 38 static int 39 perf_queue_worker(void *arg, const int enable_fwd_latency) 40 { 41 uint16_t enq = 0, deq = 0; 42 struct rte_event ev; 43 PERF_WORKER_INIT; 44 45 while (t->done == false) { 46 deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); 47 48 if (!deq) { 49 rte_pause(); 50 continue; 51 } 52 53 if (prod_crypto_type && (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) { 54 if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency)) 55 continue; 56 } else { 57 pe = ev.event_ptr; 58 } 59 60 stage = ev.queue_id % nb_stages; 61 if (enable_fwd_latency && !prod_timer_type && stage == 0) 62 /* first q in pipeline, mark timestamp to compute fwd latency */ 63 perf_mark_fwd_latency(pe); 64 65 /* last stage in pipeline */ 66 if (unlikely(stage == laststage)) { 67 if (enable_fwd_latency) 68 cnt = perf_process_last_stage_latency(pool, prod_crypto_type, 69 &ev, w, bufs, sz, cnt); 70 else 71 cnt = perf_process_last_stage(pool, prod_crypto_type, 72 &ev, w, bufs, sz, cnt); 73 } else { 74 fwd_event(&ev, sched_type_list, nb_stages); 75 do { 76 enq = rte_event_enqueue_burst(dev, port, &ev, 1); 77 } while (!enq && !t->done); 78 } 79 } 80 81 perf_worker_cleanup(pool, dev, port, &ev, enq, deq); 82 83 return 0; 84 } 85 86 static int 87 perf_queue_worker_burst(void *arg, const int enable_fwd_latency) 88 { 89 /* +1 to avoid prefetch out of array check */ 90 struct rte_event ev[BURST_SIZE + 1]; 91 uint16_t enq = 0, nb_rx = 0; 92 PERF_WORKER_INIT; 93 uint16_t i; 94 95 while (t->done == false) { 96 nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0); 97 98 if (!nb_rx) { 99 rte_pause(); 100 continue; 101 } 102 103 for (i = 0; i < nb_rx; i++) { 104 if (prod_crypto_type && (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) { 105 if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency)) 106 continue; 107 } 108 109 stage = ev[i].queue_id % nb_stages; 110 if (enable_fwd_latency && !prod_timer_type && stage == 0) { 111 rte_prefetch0(ev[i+1].event_ptr); 112 /* first queue in pipeline. 113 * mark time stamp to compute fwd latency 114 */ 115 perf_mark_fwd_latency(ev[i].event_ptr); 116 } 117 /* last stage in pipeline */ 118 if (unlikely(stage == laststage)) { 119 if (enable_fwd_latency) 120 cnt = perf_process_last_stage_latency(pool, 121 prod_crypto_type, &ev[i], w, bufs, sz, cnt); 122 else 123 cnt = perf_process_last_stage(pool, prod_crypto_type, 124 &ev[i], w, bufs, sz, cnt); 125 126 ev[i].op = RTE_EVENT_OP_RELEASE; 127 } else { 128 fwd_event(&ev[i], sched_type_list, nb_stages); 129 } 130 } 131 132 133 enq = rte_event_enqueue_burst(dev, port, ev, nb_rx); 134 while (enq < nb_rx && !t->done) { 135 enq += rte_event_enqueue_burst(dev, port, 136 ev + enq, nb_rx - enq); 137 } 138 } 139 140 perf_worker_cleanup(pool, dev, port, ev, enq, nb_rx); 141 142 return 0; 143 } 144 145 static int 146 perf_queue_worker_vector(void *arg, const int enable_fwd_latency) 147 { 148 uint16_t enq = 0, deq = 0; 149 struct rte_event ev; 150 PERF_WORKER_INIT; 151 152 RTE_SET_USED(sz); 153 RTE_SET_USED(cnt); 154 RTE_SET_USED(prod_crypto_type); 155 156 while (t->done == false) { 157 deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); 158 159 if (!deq) 160 continue; 161 162 if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) { 163 if (perf_handle_crypto_vector_ev(&ev, &pe, enable_fwd_latency)) 164 continue; 165 } 166 167 stage = ev.queue_id % nb_stages; 168 /* First q in pipeline, mark timestamp to compute fwd latency */ 169 if (enable_fwd_latency && !prod_timer_type && stage == 0) 170 perf_mark_fwd_latency(pe); 171 172 /* Last stage in pipeline */ 173 if (unlikely(stage == laststage)) { 174 perf_process_vector_last_stage(pool, t->ca_op_pool, &ev, w, 175 enable_fwd_latency); 176 } else { 177 fwd_event_vector(&ev, sched_type_list, nb_stages); 178 do { 179 enq = rte_event_enqueue_burst(dev, port, &ev, 1); 180 } while (!enq && !t->done); 181 } 182 } 183 184 perf_worker_cleanup(pool, dev, port, &ev, enq, deq); 185 186 return 0; 187 } 188 189 static int 190 worker_wrapper(void *arg) 191 { 192 struct worker_data *w = arg; 193 struct evt_options *opt = w->t->opt; 194 195 const bool burst = evt_has_burst_mode(w->dev_id); 196 const int fwd_latency = opt->fwd_latency; 197 198 /* allow compiler to optimize */ 199 if (opt->ena_vector && opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) 200 return perf_queue_worker_vector(arg, fwd_latency); 201 else if (!burst && !fwd_latency) 202 return perf_queue_worker(arg, 0); 203 else if (!burst && fwd_latency) 204 return perf_queue_worker(arg, 1); 205 else if (burst && !fwd_latency) 206 return perf_queue_worker_burst(arg, 0); 207 else if (burst && fwd_latency) 208 return perf_queue_worker_burst(arg, 1); 209 210 rte_panic("invalid worker\n"); 211 } 212 213 static int 214 perf_queue_launch_lcores(struct evt_test *test, struct evt_options *opt) 215 { 216 return perf_launch_lcores(test, opt, worker_wrapper); 217 } 218 219 static int 220 perf_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt) 221 { 222 uint8_t queue; 223 int nb_stages = opt->nb_stages; 224 int ret; 225 int nb_ports; 226 int nb_queues; 227 uint16_t prod; 228 struct rte_event_dev_info dev_info; 229 struct test_perf *t = evt_test_priv(test); 230 231 nb_ports = evt_nr_active_lcores(opt->wlcores); 232 nb_ports += opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR || 233 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR ? 0 : 234 evt_nr_active_lcores(opt->plcores); 235 236 nb_queues = perf_queue_nb_event_queues(opt); 237 238 ret = rte_event_dev_info_get(opt->dev_id, &dev_info); 239 if (ret) { 240 evt_err("failed to get eventdev info %d", opt->dev_id); 241 return ret; 242 } 243 244 ret = evt_configure_eventdev(opt, nb_queues, nb_ports); 245 if (ret) { 246 evt_err("failed to configure eventdev %d", opt->dev_id); 247 return ret; 248 } 249 250 struct rte_event_queue_conf q_conf = { 251 .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, 252 .nb_atomic_flows = opt->nb_flows, 253 .nb_atomic_order_sequences = opt->nb_flows, 254 }; 255 /* queue configurations */ 256 for (queue = 0; queue < nb_queues; queue++) { 257 q_conf.schedule_type = 258 (opt->sched_type_list[queue % nb_stages]); 259 260 if (opt->q_priority) { 261 uint8_t stage_pos = queue % nb_stages; 262 /* Configure event queues(stage 0 to stage n) with 263 * RTE_EVENT_DEV_PRIORITY_LOWEST to 264 * RTE_EVENT_DEV_PRIORITY_HIGHEST. 265 */ 266 uint8_t step = RTE_EVENT_DEV_PRIORITY_LOWEST / 267 (nb_stages - 1); 268 /* Higher prio for the queues closer to last stage */ 269 q_conf.priority = RTE_EVENT_DEV_PRIORITY_LOWEST - 270 (step * stage_pos); 271 } 272 ret = rte_event_queue_setup(opt->dev_id, queue, &q_conf); 273 if (ret) { 274 evt_err("failed to setup queue=%d", queue); 275 return ret; 276 } 277 } 278 279 if (opt->wkr_deq_dep > dev_info.max_event_port_dequeue_depth) 280 opt->wkr_deq_dep = dev_info.max_event_port_dequeue_depth; 281 282 /* port configuration */ 283 const struct rte_event_port_conf p_conf = { 284 .dequeue_depth = opt->wkr_deq_dep, 285 .enqueue_depth = dev_info.max_event_port_dequeue_depth, 286 .new_event_threshold = dev_info.max_num_events, 287 }; 288 289 ret = perf_event_dev_port_setup(test, opt, nb_stages /* stride */, 290 nb_queues, &p_conf); 291 if (ret) 292 return ret; 293 294 if (!evt_has_distributed_sched(opt->dev_id)) { 295 uint32_t service_id; 296 rte_event_dev_service_id_get(opt->dev_id, &service_id); 297 ret = evt_service_setup(service_id); 298 if (ret) { 299 evt_err("No service lcore found to run event dev."); 300 return ret; 301 } 302 } 303 304 ret = rte_event_dev_start(opt->dev_id); 305 if (ret) { 306 evt_err("failed to start eventdev %d", opt->dev_id); 307 return ret; 308 } 309 310 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) { 311 RTE_ETH_FOREACH_DEV(prod) { 312 ret = rte_eth_dev_start(prod); 313 if (ret) { 314 evt_err("Ethernet dev [%d] failed to start. Using synthetic producer", 315 prod); 316 return ret; 317 } 318 319 ret = rte_event_eth_rx_adapter_start(prod); 320 if (ret) { 321 evt_err("Rx adapter[%d] start failed", prod); 322 return ret; 323 } 324 printf("%s: Port[%d] using Rx adapter[%d] started\n", 325 __func__, prod, prod); 326 } 327 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) { 328 for (prod = 0; prod < opt->nb_timer_adptrs; prod++) { 329 ret = rte_event_timer_adapter_start( 330 t->timer_adptr[prod]); 331 if (ret) { 332 evt_err("failed to Start event timer adapter %d" 333 , prod); 334 return ret; 335 } 336 } 337 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { 338 uint8_t cdev_id, cdev_count; 339 340 cdev_count = rte_cryptodev_count(); 341 for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) { 342 ret = rte_cryptodev_start(cdev_id); 343 if (ret) { 344 evt_err("Failed to start cryptodev %u", 345 cdev_id); 346 return ret; 347 } 348 } 349 } 350 351 return 0; 352 } 353 354 static void 355 perf_queue_opt_dump(struct evt_options *opt) 356 { 357 evt_dump_fwd_latency(opt); 358 perf_opt_dump(opt, perf_queue_nb_event_queues(opt)); 359 } 360 361 static int 362 perf_queue_opt_check(struct evt_options *opt) 363 { 364 return perf_opt_check(opt, perf_queue_nb_event_queues(opt)); 365 } 366 367 static bool 368 perf_queue_capability_check(struct evt_options *opt) 369 { 370 struct rte_event_dev_info dev_info; 371 372 rte_event_dev_info_get(opt->dev_id, &dev_info); 373 if (dev_info.max_event_queues < perf_queue_nb_event_queues(opt) || 374 dev_info.max_event_ports < perf_nb_event_ports(opt)) { 375 evt_err("not enough eventdev queues=%d/%d or ports=%d/%d", 376 perf_queue_nb_event_queues(opt), 377 dev_info.max_event_queues, 378 perf_nb_event_ports(opt), dev_info.max_event_ports); 379 } 380 381 return true; 382 } 383 384 static const struct evt_test_ops perf_queue = { 385 .cap_check = perf_queue_capability_check, 386 .opt_check = perf_queue_opt_check, 387 .opt_dump = perf_queue_opt_dump, 388 .test_setup = perf_test_setup, 389 .mempool_setup = perf_mempool_setup, 390 .ethdev_setup = perf_ethdev_setup, 391 .cryptodev_setup = perf_cryptodev_setup, 392 .ethdev_rx_stop = perf_ethdev_rx_stop, 393 .eventdev_setup = perf_queue_eventdev_setup, 394 .launch_lcores = perf_queue_launch_lcores, 395 .eventdev_destroy = perf_eventdev_destroy, 396 .mempool_destroy = perf_mempool_destroy, 397 .ethdev_destroy = perf_ethdev_destroy, 398 .cryptodev_destroy = perf_cryptodev_destroy, 399 .test_result = perf_test_result, 400 .test_destroy = perf_test_destroy, 401 }; 402 403 EVT_TEST_REGISTER(perf_queue); 404