1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #include "test_perf_common.h" 6 7 /* See http://doc.dpdk.org/guides/tools/testeventdev.html for test details */ 8 9 static inline int 10 perf_queue_nb_event_queues(struct evt_options *opt) 11 { 12 /* nb_queues = number of producers * number of stages */ 13 uint8_t nb_prod = opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR ? 14 rte_eth_dev_count_avail() : evt_nr_active_lcores(opt->plcores); 15 return nb_prod * opt->nb_stages; 16 } 17 18 static __rte_always_inline void 19 fwd_event(struct rte_event *const ev, uint8_t *const sched_type_list, 20 const uint8_t nb_stages) 21 { 22 ev->queue_id++; 23 ev->sched_type = sched_type_list[ev->queue_id % nb_stages]; 24 ev->op = RTE_EVENT_OP_FORWARD; 25 ev->event_type = RTE_EVENT_TYPE_CPU; 26 } 27 28 static __rte_always_inline void 29 fwd_event_vector(struct rte_event *const ev, uint8_t *const sched_type_list, 30 const uint8_t nb_stages) 31 { 32 ev->queue_id++; 33 ev->sched_type = sched_type_list[ev->queue_id % nb_stages]; 34 ev->op = RTE_EVENT_OP_FORWARD; 35 ev->event_type = RTE_EVENT_TYPE_CPU_VECTOR; 36 } 37 38 static int 39 perf_queue_worker(void *arg, const int enable_fwd_latency) 40 { 41 uint16_t enq = 0, deq = 0; 42 struct rte_event ev; 43 PERF_WORKER_INIT; 44 45 RTE_SET_USED(pe); 46 while (t->done == false) { 47 deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); 48 49 if (!deq) { 50 rte_pause(); 51 continue; 52 } 53 54 if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) && 55 (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV)) { 56 if (perf_handle_crypto_ev(&ev)) 57 continue; 58 } else { 59 pe = ev.event_ptr; 60 } 61 62 stage = ev.queue_id % nb_stages; 63 if (enable_fwd_latency && !prod_timer_type && stage == 0) 64 /* first q in pipeline, mark timestamp to compute fwd latency */ 65 perf_mark_fwd_latency(prod_type, &ev); 66 67 /* last stage in pipeline */ 68 if (unlikely(stage == laststage)) { 69 if (enable_fwd_latency) 70 cnt = perf_process_last_stage_latency(pool, prod_type, 71 &ev, w, bufs, sz, cnt); 72 else 73 cnt = perf_process_last_stage(pool, prod_type, 74 &ev, w, bufs, sz, cnt); 75 } else { 76 fwd_event(&ev, sched_type_list, nb_stages); 77 do { 78 enq = rte_event_enqueue_burst(dev, port, &ev, 1); 79 } while (!enq && !t->done); 80 } 81 } 82 83 perf_worker_cleanup(pool, dev, port, &ev, enq, deq); 84 85 return 0; 86 } 87 88 static int 89 perf_queue_worker_burst(void *arg, const int enable_fwd_latency) 90 { 91 /* +1 to avoid prefetch out of array check */ 92 struct rte_event ev[BURST_SIZE + 1]; 93 uint16_t enq = 0, nb_rx = 0; 94 PERF_WORKER_INIT; 95 uint16_t i; 96 97 RTE_SET_USED(pe); 98 while (t->done == false) { 99 nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0); 100 101 if (!nb_rx) { 102 rte_pause(); 103 continue; 104 } 105 106 for (i = 0; i < nb_rx; i++) { 107 if ((prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) && 108 (ev[i].event_type == RTE_EVENT_TYPE_CRYPTODEV)) { 109 if (perf_handle_crypto_ev(&ev[i])) 110 continue; 111 } 112 113 stage = ev[i].queue_id % nb_stages; 114 if (enable_fwd_latency && !prod_timer_type && stage == 0) { 115 rte_prefetch0(ev[i+1].event_ptr); 116 /* first queue in pipeline. 117 * mark time stamp to compute fwd latency 118 */ 119 perf_mark_fwd_latency(prod_type, &ev[i]); 120 } 121 /* last stage in pipeline */ 122 if (unlikely(stage == laststage)) { 123 if (enable_fwd_latency) 124 cnt = perf_process_last_stage_latency(pool, 125 prod_type, &ev[i], w, bufs, sz, cnt); 126 else 127 cnt = perf_process_last_stage(pool, prod_type, 128 &ev[i], w, bufs, sz, cnt); 129 130 ev[i].op = RTE_EVENT_OP_RELEASE; 131 } else { 132 fwd_event(&ev[i], sched_type_list, nb_stages); 133 } 134 } 135 136 137 enq = rte_event_enqueue_burst(dev, port, ev, nb_rx); 138 while (enq < nb_rx && !t->done) { 139 enq += rte_event_enqueue_burst(dev, port, 140 ev + enq, nb_rx - enq); 141 } 142 } 143 144 perf_worker_cleanup(pool, dev, port, ev, enq, nb_rx); 145 146 return 0; 147 } 148 149 static int 150 perf_queue_worker_vector(void *arg, const int enable_fwd_latency) 151 { 152 uint16_t enq = 0, deq = 0; 153 struct rte_event ev; 154 PERF_WORKER_INIT; 155 156 RTE_SET_USED(sz); 157 RTE_SET_USED(cnt); 158 RTE_SET_USED(prod_type); 159 160 while (t->done == false) { 161 deq = rte_event_dequeue_burst(dev, port, &ev, 1, 0); 162 163 if (!deq) 164 continue; 165 166 if (ev.event_type == RTE_EVENT_TYPE_CRYPTODEV_VECTOR) { 167 if (perf_handle_crypto_vector_ev(&ev, &pe, enable_fwd_latency)) 168 continue; 169 } 170 171 stage = ev.queue_id % nb_stages; 172 /* First q in pipeline, mark timestamp to compute fwd latency */ 173 if (enable_fwd_latency && !prod_timer_type && stage == 0) 174 pe->timestamp = rte_get_timer_cycles(); 175 176 /* Last stage in pipeline */ 177 if (unlikely(stage == laststage)) { 178 perf_process_vector_last_stage(pool, t->ca_op_pool, &ev, w, 179 enable_fwd_latency); 180 } else { 181 fwd_event_vector(&ev, sched_type_list, nb_stages); 182 do { 183 enq = rte_event_enqueue_burst(dev, port, &ev, 1); 184 } while (!enq && !t->done); 185 } 186 } 187 188 perf_worker_cleanup(pool, dev, port, &ev, enq, deq); 189 190 return 0; 191 } 192 193 static int 194 worker_wrapper(void *arg) 195 { 196 struct worker_data *w = arg; 197 struct evt_options *opt = w->t->opt; 198 199 const bool burst = evt_has_burst_mode(w->dev_id); 200 const int fwd_latency = opt->fwd_latency; 201 202 /* allow compiler to optimize */ 203 if (opt->ena_vector && opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) 204 return perf_queue_worker_vector(arg, fwd_latency); 205 else if (!burst && !fwd_latency) 206 return perf_queue_worker(arg, 0); 207 else if (!burst && fwd_latency) 208 return perf_queue_worker(arg, 1); 209 else if (burst && !fwd_latency) 210 return perf_queue_worker_burst(arg, 0); 211 else if (burst && fwd_latency) 212 return perf_queue_worker_burst(arg, 1); 213 214 rte_panic("invalid worker\n"); 215 } 216 217 static int 218 perf_queue_launch_lcores(struct evt_test *test, struct evt_options *opt) 219 { 220 return perf_launch_lcores(test, opt, worker_wrapper); 221 } 222 223 static int 224 perf_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt) 225 { 226 uint8_t queue; 227 int nb_stages = opt->nb_stages; 228 int ret; 229 int nb_ports; 230 int nb_queues; 231 uint16_t prod; 232 struct rte_event_dev_info dev_info; 233 struct test_perf *t = evt_test_priv(test); 234 235 nb_ports = evt_nr_active_lcores(opt->wlcores); 236 nb_ports += opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR || 237 opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR ? 0 : 238 evt_nr_active_lcores(opt->plcores); 239 240 nb_queues = perf_queue_nb_event_queues(opt); 241 242 ret = rte_event_dev_info_get(opt->dev_id, &dev_info); 243 if (ret) { 244 evt_err("failed to get eventdev info %d", opt->dev_id); 245 return ret; 246 } 247 248 ret = evt_configure_eventdev(opt, nb_queues, nb_ports); 249 if (ret) { 250 evt_err("failed to configure eventdev %d", opt->dev_id); 251 return ret; 252 } 253 254 struct rte_event_queue_conf q_conf = { 255 .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, 256 .nb_atomic_flows = opt->nb_flows, 257 .nb_atomic_order_sequences = opt->nb_flows, 258 }; 259 /* queue configurations */ 260 for (queue = 0; queue < nb_queues; queue++) { 261 q_conf.schedule_type = 262 (opt->sched_type_list[queue % nb_stages]); 263 264 if (opt->q_priority) { 265 uint8_t stage_pos = queue % nb_stages; 266 /* Configure event queues(stage 0 to stage n) with 267 * RTE_EVENT_DEV_PRIORITY_LOWEST to 268 * RTE_EVENT_DEV_PRIORITY_HIGHEST. 269 */ 270 uint8_t step = RTE_EVENT_DEV_PRIORITY_LOWEST / 271 (nb_stages - 1); 272 /* Higher prio for the queues closer to last stage */ 273 q_conf.priority = RTE_EVENT_DEV_PRIORITY_LOWEST - 274 (step * stage_pos); 275 } 276 ret = rte_event_queue_setup(opt->dev_id, queue, &q_conf); 277 if (ret) { 278 evt_err("failed to setup queue=%d", queue); 279 return ret; 280 } 281 } 282 283 if (opt->wkr_deq_dep > dev_info.max_event_port_dequeue_depth) 284 opt->wkr_deq_dep = dev_info.max_event_port_dequeue_depth; 285 286 /* port configuration */ 287 const struct rte_event_port_conf p_conf = { 288 .dequeue_depth = opt->wkr_deq_dep, 289 .enqueue_depth = dev_info.max_event_port_dequeue_depth, 290 .new_event_threshold = dev_info.max_num_events, 291 }; 292 293 ret = perf_event_dev_port_setup(test, opt, nb_stages /* stride */, 294 nb_queues, &p_conf); 295 if (ret) 296 return ret; 297 298 if (!evt_has_distributed_sched(opt->dev_id)) { 299 uint32_t service_id; 300 rte_event_dev_service_id_get(opt->dev_id, &service_id); 301 ret = evt_service_setup(service_id); 302 if (ret) { 303 evt_err("No service lcore found to run event dev."); 304 return ret; 305 } 306 } 307 308 ret = rte_event_dev_start(opt->dev_id); 309 if (ret) { 310 evt_err("failed to start eventdev %d", opt->dev_id); 311 return ret; 312 } 313 314 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) { 315 RTE_ETH_FOREACH_DEV(prod) { 316 ret = rte_eth_dev_start(prod); 317 if (ret) { 318 evt_err("Ethernet dev [%d] failed to start. Using synthetic producer", 319 prod); 320 return ret; 321 } 322 323 ret = rte_event_eth_rx_adapter_start(prod); 324 if (ret) { 325 evt_err("Rx adapter[%d] start failed", prod); 326 return ret; 327 } 328 printf("%s: Port[%d] using Rx adapter[%d] started\n", 329 __func__, prod, prod); 330 } 331 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_TIMER_ADPTR) { 332 for (prod = 0; prod < opt->nb_timer_adptrs; prod++) { 333 ret = rte_event_timer_adapter_start( 334 t->timer_adptr[prod]); 335 if (ret) { 336 evt_err("failed to Start event timer adapter %d" 337 , prod); 338 return ret; 339 } 340 } 341 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_CRYPTO_ADPTR) { 342 uint8_t cdev_id, cdev_count; 343 344 cdev_count = rte_cryptodev_count(); 345 for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) { 346 ret = rte_cryptodev_start(cdev_id); 347 if (ret) { 348 evt_err("Failed to start cryptodev %u", 349 cdev_id); 350 return ret; 351 } 352 } 353 } else if (opt->prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) { 354 uint8_t dma_dev_id = 0, dma_dev_count; 355 356 dma_dev_count = rte_dma_count_avail(); 357 if (dma_dev_count == 0) { 358 evt_err("No dma devices available\n"); 359 return -ENODEV; 360 } 361 362 ret = rte_dma_start(dma_dev_id); 363 if (ret) { 364 evt_err("Failed to start dmadev %u", dma_dev_id); 365 return ret; 366 } 367 } 368 369 return 0; 370 } 371 372 static void 373 perf_queue_opt_dump(struct evt_options *opt) 374 { 375 evt_dump_fwd_latency(opt); 376 perf_opt_dump(opt, perf_queue_nb_event_queues(opt)); 377 } 378 379 static int 380 perf_queue_opt_check(struct evt_options *opt) 381 { 382 return perf_opt_check(opt, perf_queue_nb_event_queues(opt)); 383 } 384 385 static bool 386 perf_queue_capability_check(struct evt_options *opt) 387 { 388 struct rte_event_dev_info dev_info; 389 390 rte_event_dev_info_get(opt->dev_id, &dev_info); 391 if (dev_info.max_event_queues < perf_queue_nb_event_queues(opt) || 392 dev_info.max_event_ports < perf_nb_event_ports(opt)) { 393 evt_err("not enough eventdev queues=%d/%d or ports=%d/%d", 394 perf_queue_nb_event_queues(opt), 395 dev_info.max_event_queues, 396 perf_nb_event_ports(opt), dev_info.max_event_ports); 397 } 398 399 return true; 400 } 401 402 static const struct evt_test_ops perf_queue = { 403 .cap_check = perf_queue_capability_check, 404 .opt_check = perf_queue_opt_check, 405 .opt_dump = perf_queue_opt_dump, 406 .test_setup = perf_test_setup, 407 .mempool_setup = perf_mempool_setup, 408 .ethdev_setup = perf_ethdev_setup, 409 .cryptodev_setup = perf_cryptodev_setup, 410 .dmadev_setup = perf_dmadev_setup, 411 .ethdev_rx_stop = perf_ethdev_rx_stop, 412 .eventdev_setup = perf_queue_eventdev_setup, 413 .launch_lcores = perf_queue_launch_lcores, 414 .eventdev_destroy = perf_eventdev_destroy, 415 .mempool_destroy = perf_mempool_destroy, 416 .ethdev_destroy = perf_ethdev_destroy, 417 .cryptodev_destroy = perf_cryptodev_destroy, 418 .dmadev_destroy = perf_dmadev_destroy, 419 .test_result = perf_test_result, 420 .test_destroy = perf_test_destroy, 421 }; 422 423 EVT_TEST_REGISTER(perf_queue); 424