1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #include "test_perf_common.h" 6 7 /* See http://doc.dpdk.org/guides/tools/testeventdev.html for test details */ 8 9 static inline int 10 perf_queue_nb_event_queues(struct evt_options *opt) 11 { 12 /* nb_queues = number of producers * number of stages */ 13 uint8_t nb_prod = opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR ? 14 rte_eth_dev_count_avail() : evt_nr_active_lcores(opt->plcores); 15 return nb_prod * opt->nb_stages; 16 } 17 18 static __rte_always_inline void 19 fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list, 20 const uint8_t nb_stages) 21 { 22 ev->queue_id++; 23 ev->sched_type = sched_type_list[ev->queue_id % nb_stages]; 24 ev->op = RTE_EVENT_OP_FORWARD; 25 ev->event_type = RTE_EVENT_TYPE_CPU; 26 } 27 28 static __rte_always_inline void 29 fwd_event_vector(struct rte_event *const ev, uint8_t *const sched_type_list, 30 const uint8_t nb_stages) 31 { 32 ev->queue_id++; 33 ev->sched_type = sched_type_list[ev->queue_id % nb_stages]; 34 ev->op = RTE_EVENT_OP_FORWARD; 35 ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR; 36 } 37 38 static int 39 perf_queue_worker(void *arg, const int enable_fwd_latency) 40 { 41 uint16_t enq = 0, deq = 0; 42 struct rte_event ev; 43 PERF_WORKER_INIT; 44 45 while (t->done == false) { 46 deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); 47 48 if (!deq) { 49 rte_pause(); 50 continue; 51 } 52 53 if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) && 54 (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) { 55 if (perf_handle_crypto_ev(&ev, &pe, enable_fwd_latency)) 56 continue; 57 } else { 58 pe = ev.event_ptr; 59 } 60 61 stage = ev.queue_id % nb_stages; 62 if (enable_fwd_latency && !prod_timer_type && stage == 0) 63 /* first q in pipeline, mark timestamp to compute fwd latency */ 64 perf_mark_fwd_latency(pe); 65 66 /* last stage in pipeline */ 67 if (unlikely(stage == laststage)) { 68 if (enable_fwd_latency) 69 cnt = perf_process_last_stage_latency(pool, prod_type, 70 &ev, w, bufs, sz, cnt); 71 else 72 cnt = perf_process_last_stage(pool, prod_type, 73 &ev, w, bufs, sz, cnt); 74 } else { 75 fwd_event(&ev, sched_type_list, nb_stages); 76 do { 77 enq = rte_event_enqueue_burst(dev, port, &ev, 1); 78 } while (!enq && !t->done); 79 } 80 } 81 82 perf_worker_cleanup(pool, dev, port, &ev, enq, deq); 83 84 return 0; 85 } 86 87 static int 88 perf_queue_worker_burst(void *arg, const int enable_fwd_latency) 89 { 90 /* +1 to avoid prefetch out of array check */ 91 struct rte_event ev[BURST_SIZE + 1]; 92 uint16_t enq = 0, nb_rx = 0; 93 PERF_WORKER_INIT; 94 uint16_t i; 95 96 while (t->done == false) { 97 nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0); 98 99 if (!nb_rx) { 100 rte_pause(); 101 continue; 102 } 103 104 for (i = 0; i < nb_rx; i++) { 105 if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) && 106 (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) { 107 if (perf_handle_crypto_ev(&ev[i], &pe, enable_fwd_latency)) 108 continue; 109 } 110 111 stage = ev[i].queue_id % nb_stages; 112 if (enable_fwd_latency && !prod_timer_type && stage == 0) { 113 rte_prefetch0(ev[i+1].event_ptr); 114 /* first queue in pipeline. 115 * mark time stamp to compute fwd latency 116 */ 117 perf_mark_fwd_latency(ev[i].event_ptr); 118 } 119 /* last stage in pipeline */ 120 if (unlikely(stage == laststage)) { 121 if (enable_fwd_latency) 122 cnt = perf_process_last_stage_latency(pool, 123 prod_type, &ev[i], w, bufs, sz, cnt); 124 else 125 cnt = perf_process_last_stage(pool, prod_type, 126 &ev[i], w, bufs, sz, cnt); 127 128 ev[i].op = RTE_EVENT_OP_RELEASE; 129 } else { 130 fwd_event(&ev[i], sched_type_list, nb_stages); 131 } 132 } 133 134 135 enq = rte_event_enqueue_burst(dev, port, ev, nb_rx); 136 while (enq < nb_rx && !t->done) { 137 enq += rte_event_enqueue_burst(dev, port, 138 ev + enq, nb_rx - enq); 139 } 140 } 141 142 perf_worker_cleanup(pool, dev, port, ev, enq, nb_rx); 143 144 return 0; 145 } 146 147 static int 148 perf_queue_worker_vector(void *arg, const int enable_fwd_latency) 149 { 150 uint16_t enq = 0, deq = 0; 151 struct rte_event ev; 152 PERF_WORKER_INIT; 153 154 RTE_SET_USED(sz); 155 RTE_SET_USED(cnt); 156 RTE_SET_USED(prod_type); 157 158 while (t->done == false) { 159 deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); 160 161 if (!deq) 162 continue; 163 164 if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) { 165 if (perf_handle_crypto_vector_ev(&ev, &pe, enable_fwd_latency)) 166 continue; 167 } 168 169 stage = ev.queue_id % nb_stages; 170 /* First q in pipeline, mark timestamp to compute fwd latency */ 171 if (enable_fwd_latency && !prod_timer_type && stage == 0) 172 perf_mark_fwd_latency(pe); 173 174 /* Last stage in pipeline */ 175 if (unlikely(stage == laststage)) { 176 perf_process_vector_last_stage(pool, t->ca_op_pool, &ev, w, 177 enable_fwd_latency); 178 } else { 179 fwd_event_vector(&ev, sched_type_list, nb_stages); 180 do { 181 enq = rte_event_enqueue_burst(dev, port, &ev, 1); 182 } while (!enq && !t->done); 183 } 184 } 185 186 perf_worker_cleanup(pool, dev, port, &ev, enq, deq); 187 188 return 0; 189 } 190 191 static int 192 worker_wrapper(void *arg) 193 { 194 struct worker_data *w = arg; 195 struct evt_options *opt = w->t->opt; 196 197 const bool burst = evt_has_burst_mode(w->dev_id); 198 const int fwd_latency = opt->fwd_latency; 199 200 /* allow compiler to optimize */ 201 if (opt->ena_vector && opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) 202 return perf_queue_worker_vector(arg, fwd_latency); 203 else if (!burst && !fwd_latency) 204 return perf_queue_worker(arg, 0); 205 else if (!burst && fwd_latency) 206 return perf_queue_worker(arg, 1); 207 else if (burst && !fwd_latency) 208 return perf_queue_worker_burst(arg, 0); 209 else if (burst && fwd_latency) 210 return perf_queue_worker_burst(arg, 1); 211 212 rte_panic("invalid worker\n"); 213 } 214 215 static int 216 perf_queue_launch_lcores(struct evt_test *test, struct evt_options *opt) 217 { 218 return perf_launch_lcores(test, opt, worker_wrapper); 219 } 220 221 static int 222 perf_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt) 223 { 224 uint8_t queue; 225 int nb_stages = opt->nb_stages; 226 int ret; 227 int nb_ports; 228 int nb_queues; 229 uint16_t prod; 230 struct rte_event_dev_info dev_info; 231 struct test_perf *t = evt_test_priv(test); 232 233 nb_ports = evt_nr_active_lcores(opt->wlcores); 234 nb_ports += opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR || 235 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR ? 0 : 236 evt_nr_active_lcores(opt->plcores); 237 238 nb_queues = perf_queue_nb_event_queues(opt); 239 240 ret = rte_event_dev_info_get(opt->dev_id, &dev_info); 241 if (ret) { 242 evt_err("failed to get eventdev info %d", opt->dev_id); 243 return ret; 244 } 245 246 ret = evt_configure_eventdev(opt, nb_queues, nb_ports); 247 if (ret) { 248 evt_err("failed to configure eventdev %d", opt->dev_id); 249 return ret; 250 } 251 252 struct rte_event_queue_conf q_conf = { 253 .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, 254 .nb_atomic_flows = opt->nb_flows, 255 .nb_atomic_order_sequences = opt->nb_flows, 256 }; 257 /* queue configurations */ 258 for (queue = 0; queue < nb_queues; queue++) { 259 q_conf.schedule_type = 260 (opt->sched_type_list[queue % nb_stages]); 261 262 if (opt->q_priority) { 263 uint8_t stage_pos = queue % nb_stages; 264 /* Configure event queues(stage 0 to stage n) with 265 * RTE_EVENT_DEV_PRIORITY_LOWEST to 266 * RTE_EVENT_DEV_PRIORITY_HIGHEST. 267 */ 268 uint8_t step = RTE_EVENT_DEV_PRIORITY_LOWEST / 269 (nb_stages - 1); 270 /* Higher prio for the queues closer to last stage */ 271 q_conf.priority = RTE_EVENT_DEV_PRIORITY_LOWEST - 272 (step * stage_pos); 273 } 274 ret = rte_event_queue_setup(opt->dev_id, queue, &q_conf); 275 if (ret) { 276 evt_err("failed to setup queue=%d", queue); 277 return ret; 278 } 279 } 280 281 if (opt->wkr_deq_dep > dev_info.max_event_port_dequeue_depth) 282 opt->wkr_deq_dep = dev_info.max_event_port_dequeue_depth; 283 284 /* port configuration */ 285 const struct rte_event_port_conf p_conf = { 286 .dequeue_depth = opt->wkr_deq_dep, 287 .enqueue_depth = dev_info.max_event_port_dequeue_depth, 288 .new_event_threshold = dev_info.max_num_events, 289 }; 290 291 ret = perf_event_dev_port_setup(test, opt, nb_stages /* stride */, 292 nb_queues, &p_conf); 293 if (ret) 294 return ret; 295 296 if (!evt_has_distributed_sched(opt->dev_id)) { 297 uint32_t service_id; 298 rte_event_dev_service_id_get(opt->dev_id, &service_id); 299 ret = evt_service_setup(service_id); 300 if (ret) { 301 evt_err("No service lcore found to run event dev."); 302 return ret; 303 } 304 } 305 306 ret = rte_event_dev_start(opt->dev_id); 307 if (ret) { 308 evt_err("failed to start eventdev %d", opt->dev_id); 309 return ret; 310 } 311 312 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) { 313 RTE_ETH_FOREACH_DEV(prod) { 314 ret = rte_eth_dev_start(prod); 315 if (ret) { 316 evt_err("Ethernet dev [%d] failed to start. Using synthetic producer", 317 prod); 318 return ret; 319 } 320 321 ret = rte_event_eth_rx_adapter_start(prod); 322 if (ret) { 323 evt_err("Rx adapter[%d] start failed", prod); 324 return ret; 325 } 326 printf("%s: Port[%d] using Rx adapter[%d] started\n", 327 __func__, prod, prod); 328 } 329 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) { 330 for (prod = 0; prod < opt->nb_timer_adptrs; prod++) { 331 ret = rte_event_timer_adapter_start( 332 t->timer_adptr[prod]); 333 if (ret) { 334 evt_err("failed to Start event timer adapter %d" 335 , prod); 336 return ret; 337 } 338 } 339 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { 340 uint8_t cdev_id, cdev_count; 341 342 cdev_count = rte_cryptodev_count(); 343 for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) { 344 ret = rte_cryptodev_start(cdev_id); 345 if (ret) { 346 evt_err("Failed to start cryptodev %u", 347 cdev_id); 348 return ret; 349 } 350 } 351 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) { 352 uint8_t dma_dev_id = 0, dma_dev_count; 353 354 dma_dev_count = rte_dma_count_avail(); 355 if (dma_dev_count == 0) { 356 evt_err("No dma devices available\n"); 357 return -ENODEV; 358 } 359 360 ret = rte_dma_start(dma_dev_id); 361 if (ret) { 362 evt_err("Failed to start dmadev %u", dma_dev_id); 363 return ret; 364 } 365 } 366 367 return 0; 368 } 369 370 static void 371 perf_queue_opt_dump(struct evt_options *opt) 372 { 373 evt_dump_fwd_latency(opt); 374 perf_opt_dump(opt, perf_queue_nb_event_queues(opt)); 375 } 376 377 static int 378 perf_queue_opt_check(struct evt_options *opt) 379 { 380 return perf_opt_check(opt, perf_queue_nb_event_queues(opt)); 381 } 382 383 static bool 384 perf_queue_capability_check(struct evt_options *opt) 385 { 386 struct rte_event_dev_info dev_info; 387 388 rte_event_dev_info_get(opt->dev_id, &dev_info); 389 if (dev_info.max_event_queues < perf_queue_nb_event_queues(opt) || 390 dev_info.max_event_ports < perf_nb_event_ports(opt)) { 391 evt_err("not enough eventdev queues=%d/%d or ports=%d/%d", 392 perf_queue_nb_event_queues(opt), 393 dev_info.max_event_queues, 394 perf_nb_event_ports(opt), dev_info.max_event_ports); 395 } 396 397 return true; 398 } 399 400 static const struct evt_test_ops perf_queue = { 401 .cap_check = perf_queue_capability_check, 402 .opt_check = perf_queue_opt_check, 403 .opt_dump = perf_queue_opt_dump, 404 .test_setup = perf_test_setup, 405 .mempool_setup = perf_mempool_setup, 406 .ethdev_setup = perf_ethdev_setup, 407 .cryptodev_setup = perf_cryptodev_setup, 408 .dmadev_setup = perf_dmadev_setup, 409 .ethdev_rx_stop = perf_ethdev_rx_stop, 410 .eventdev_setup = perf_queue_eventdev_setup, 411 .launch_lcores = perf_queue_launch_lcores, 412 .eventdev_destroy = perf_eventdev_destroy, 413 .mempool_destroy = perf_mempool_destroy, 414 .ethdev_destroy = perf_ethdev_destroy, 415 .cryptodev_destroy = perf_cryptodev_destroy, 416 .dmadev_destroy = perf_dmadev_destroy, 417 .test_result = perf_test_result, 418 .test_destroy = perf_test_destroy, 419 }; 420 421 EVT_TEST_REGISTER(perf_queue); 422