xref: /dpdk/app/test-eventdev/test_perf_common.c (revision c7aa67f5a9e4a59a816a6506aa87cfb133981315)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4 
5 #include "test_perf_common.h"
6 
7 int
8 perf_test_result(struct evt_test *test, struct evt_options *opt)
9 {
10 	RTE_SET_USED(opt);
11 	struct test_perf *t = evt_test_priv(test);
12 
13 	return t->result;
14 }
15 
16 static inline int
17 perf_producer(void *arg)
18 {
19 	struct prod_data *p  = arg;
20 	struct test_perf *t = p->t;
21 	struct evt_options *opt = t->opt;
22 	const uint8_t dev_id = p->dev_id;
23 	const uint8_t port = p->port_id;
24 	struct rte_mempool *pool = t->pool;
25 	const uint64_t nb_pkts = t->nb_pkts;
26 	const uint32_t nb_flows = t->nb_flows;
27 	uint32_t flow_counter = 0;
28 	uint64_t count = 0;
29 	struct perf_elt *m;
30 	struct rte_event ev;
31 
32 	if (opt->verbose_level > 1)
33 		printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
34 				rte_lcore_id(), dev_id, port, p->queue_id);
35 
36 	ev.event = 0;
37 	ev.op = RTE_EVENT_OP_NEW;
38 	ev.queue_id = p->queue_id;
39 	ev.sched_type = t->opt->sched_type_list[0];
40 	ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
41 	ev.event_type =  RTE_EVENT_TYPE_CPU;
42 	ev.sub_event_type = 0; /* stage 0 */
43 
44 	while (count < nb_pkts && t->done == false) {
45 		if (rte_mempool_get(pool, (void **)&m) < 0)
46 			continue;
47 
48 		ev.flow_id = flow_counter++ % nb_flows;
49 		ev.event_ptr = m;
50 		m->timestamp = rte_get_timer_cycles();
51 		while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
52 			if (t->done)
53 				break;
54 			rte_pause();
55 			m->timestamp = rte_get_timer_cycles();
56 		}
57 		count++;
58 	}
59 
60 	return 0;
61 }
62 
63 static int
64 perf_producer_wrapper(void *arg)
65 {
66 	struct prod_data *p  = arg;
67 	struct test_perf *t = p->t;
68 	/* Launch the producer function only in case of synthetic producer. */
69 	if (t->opt->prod_type == EVT_PROD_TYPE_SYNT)
70 		return perf_producer(arg);
71 	return 0;
72 }
73 
74 static inline uint64_t
75 processed_pkts(struct test_perf *t)
76 {
77 	uint8_t i;
78 	uint64_t total = 0;
79 
80 	rte_smp_rmb();
81 	for (i = 0; i < t->nb_workers; i++)
82 		total += t->worker[i].processed_pkts;
83 
84 	return total;
85 }
86 
87 static inline uint64_t
88 total_latency(struct test_perf *t)
89 {
90 	uint8_t i;
91 	uint64_t total = 0;
92 
93 	rte_smp_rmb();
94 	for (i = 0; i < t->nb_workers; i++)
95 		total += t->worker[i].latency;
96 
97 	return total;
98 }
99 
100 
101 int
102 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
103 		int (*worker)(void *))
104 {
105 	int ret, lcore_id;
106 	struct test_perf *t = evt_test_priv(test);
107 
108 	int port_idx = 0;
109 	/* launch workers */
110 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
111 		if (!(opt->wlcores[lcore_id]))
112 			continue;
113 
114 		ret = rte_eal_remote_launch(worker,
115 				 &t->worker[port_idx], lcore_id);
116 		if (ret) {
117 			evt_err("failed to launch worker %d", lcore_id);
118 			return ret;
119 		}
120 		port_idx++;
121 	}
122 
123 	/* launch producers */
124 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
125 		if (!(opt->plcores[lcore_id]))
126 			continue;
127 
128 		ret = rte_eal_remote_launch(perf_producer_wrapper,
129 				&t->prod[port_idx], lcore_id);
130 		if (ret) {
131 			evt_err("failed to launch perf_producer %d", lcore_id);
132 			return ret;
133 		}
134 		port_idx++;
135 	}
136 
137 	const uint64_t total_pkts = opt->nb_pkts *
138 			evt_nr_active_lcores(opt->plcores);
139 
140 	uint64_t dead_lock_cycles = rte_get_timer_cycles();
141 	int64_t dead_lock_remaining  =  total_pkts;
142 	const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
143 
144 	uint64_t perf_cycles = rte_get_timer_cycles();
145 	int64_t perf_remaining  = total_pkts;
146 	const uint64_t perf_sample = rte_get_timer_hz();
147 
148 	static float total_mpps;
149 	static uint64_t samples;
150 
151 	const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
152 	int64_t remaining = t->outstand_pkts - processed_pkts(t);
153 
154 	while (t->done == false) {
155 		const uint64_t new_cycles = rte_get_timer_cycles();
156 
157 		if ((new_cycles - perf_cycles) > perf_sample) {
158 			const uint64_t latency = total_latency(t);
159 			const uint64_t pkts = processed_pkts(t);
160 
161 			remaining = t->outstand_pkts - pkts;
162 			float mpps = (float)(perf_remaining-remaining)/1000000;
163 
164 			perf_remaining = remaining;
165 			perf_cycles = new_cycles;
166 			total_mpps += mpps;
167 			++samples;
168 			if (opt->fwd_latency && pkts > 0) {
169 				printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
170 					mpps, total_mpps/samples,
171 					(float)(latency/pkts)/freq_mhz);
172 			} else {
173 				printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
174 					mpps, total_mpps/samples);
175 			}
176 			fflush(stdout);
177 
178 			if (remaining <= 0) {
179 				t->result = EVT_TEST_SUCCESS;
180 				if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
181 					t->done = true;
182 					rte_smp_wmb();
183 					break;
184 				}
185 			}
186 		}
187 
188 		if (new_cycles - dead_lock_cycles > dead_lock_sample &&
189 				opt->prod_type == EVT_PROD_TYPE_SYNT) {
190 			remaining = t->outstand_pkts - processed_pkts(t);
191 			if (dead_lock_remaining == remaining) {
192 				rte_event_dev_dump(opt->dev_id, stdout);
193 				evt_err("No schedules for seconds, deadlock");
194 				t->done = true;
195 				rte_smp_wmb();
196 				break;
197 			}
198 			dead_lock_remaining = remaining;
199 			dead_lock_cycles = new_cycles;
200 		}
201 	}
202 	printf("\n");
203 	return 0;
204 }
205 
206 static int
207 perf_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride,
208 		struct rte_event_port_conf prod_conf)
209 {
210 	int ret = 0;
211 	uint16_t prod;
212 	struct rte_event_eth_rx_adapter_queue_conf queue_conf;
213 
214 	memset(&queue_conf, 0,
215 			sizeof(struct rte_event_eth_rx_adapter_queue_conf));
216 	queue_conf.ev.sched_type = opt->sched_type_list[0];
217 	for (prod = 0; prod < rte_eth_dev_count(); prod++) {
218 		uint32_t cap;
219 
220 		ret = rte_event_eth_rx_adapter_caps_get(opt->dev_id,
221 				prod, &cap);
222 		if (ret) {
223 			evt_err("failed to get event rx adapter[%d]"
224 					" capabilities",
225 					opt->dev_id);
226 			return ret;
227 		}
228 		queue_conf.ev.queue_id = prod * stride;
229 		ret = rte_event_eth_rx_adapter_create(prod, opt->dev_id,
230 				&prod_conf);
231 		if (ret) {
232 			evt_err("failed to create rx adapter[%d]", prod);
233 			return ret;
234 		}
235 		ret = rte_event_eth_rx_adapter_queue_add(prod, prod, -1,
236 				&queue_conf);
237 		if (ret) {
238 			evt_err("failed to add rx queues to adapter[%d]", prod);
239 			return ret;
240 		}
241 
242 		if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) {
243 			uint32_t service_id;
244 
245 			rte_event_eth_rx_adapter_service_id_get(prod,
246 					&service_id);
247 			ret = evt_service_setup(service_id);
248 			if (ret) {
249 				evt_err("Failed to setup service core"
250 						" for Rx adapter\n");
251 				return ret;
252 			}
253 		}
254 
255 		ret = rte_eth_dev_start(prod);
256 		if (ret) {
257 			evt_err("Ethernet dev [%d] failed to start."
258 					" Using synthetic producer", prod);
259 			return ret;
260 		}
261 
262 		ret = rte_event_eth_rx_adapter_start(prod);
263 		if (ret) {
264 			evt_err("Rx adapter[%d] start failed", prod);
265 			return ret;
266 		}
267 		printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__,
268 				prod, prod);
269 	}
270 
271 	return ret;
272 }
273 
274 int
275 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
276 				uint8_t stride, uint8_t nb_queues)
277 {
278 	struct test_perf *t = evt_test_priv(test);
279 	uint16_t port, prod;
280 	int ret = -1;
281 	struct rte_event_port_conf port_conf;
282 
283 	memset(&port_conf, 0, sizeof(struct rte_event_port_conf));
284 	rte_event_port_default_conf_get(opt->dev_id, 0, &port_conf);
285 
286 	/* port configuration */
287 	const struct rte_event_port_conf wkr_p_conf = {
288 			.dequeue_depth = opt->wkr_deq_dep,
289 			.enqueue_depth = port_conf.enqueue_depth,
290 			.new_event_threshold = port_conf.new_event_threshold,
291 	};
292 
293 	/* setup one port per worker, linking to all queues */
294 	for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
295 				port++) {
296 		struct worker_data *w = &t->worker[port];
297 
298 		w->dev_id = opt->dev_id;
299 		w->port_id = port;
300 		w->t = t;
301 		w->processed_pkts = 0;
302 		w->latency = 0;
303 
304 		ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
305 		if (ret) {
306 			evt_err("failed to setup port %d", port);
307 			return ret;
308 		}
309 
310 		ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
311 		if (ret != nb_queues) {
312 			evt_err("failed to link all queues to port %d", port);
313 			return -EINVAL;
314 		}
315 	}
316 
317 	/* port for producers, no links */
318 	struct rte_event_port_conf prod_conf = {
319 			.dequeue_depth = port_conf.dequeue_depth,
320 			.enqueue_depth = port_conf.enqueue_depth,
321 			.new_event_threshold = port_conf.new_event_threshold,
322 	};
323 	if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
324 		for ( ; port < perf_nb_event_ports(opt); port++) {
325 			struct prod_data *p = &t->prod[port];
326 			p->t = t;
327 		}
328 
329 		ret = perf_event_rx_adapter_setup(opt, stride, prod_conf);
330 		if (ret)
331 			return ret;
332 	} else {
333 		prod = 0;
334 		for ( ; port < perf_nb_event_ports(opt); port++) {
335 			struct prod_data *p = &t->prod[port];
336 
337 			p->dev_id = opt->dev_id;
338 			p->port_id = port;
339 			p->queue_id = prod * stride;
340 			p->t = t;
341 
342 			ret = rte_event_port_setup(opt->dev_id, port,
343 					&prod_conf);
344 			if (ret) {
345 				evt_err("failed to setup port %d", port);
346 				return ret;
347 			}
348 			prod++;
349 		}
350 	}
351 
352 	return ret;
353 }
354 
355 int
356 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
357 {
358 	unsigned int lcores;
359 
360 	/* N producer + N worker + 1 master when producer cores are used
361 	 * Else N worker + 1 master when Rx adapter is used
362 	 */
363 	lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
364 
365 	if (rte_lcore_count() < lcores) {
366 		evt_err("test need minimum %d lcores", lcores);
367 		return -1;
368 	}
369 
370 	/* Validate worker lcores */
371 	if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
372 		evt_err("worker lcores overlaps with master lcore");
373 		return -1;
374 	}
375 	if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
376 		evt_err("worker lcores overlaps producer lcores");
377 		return -1;
378 	}
379 	if (evt_has_disabled_lcore(opt->wlcores)) {
380 		evt_err("one or more workers lcores are not enabled");
381 		return -1;
382 	}
383 	if (!evt_has_active_lcore(opt->wlcores)) {
384 		evt_err("minimum one worker is required");
385 		return -1;
386 	}
387 
388 	if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
389 		/* Validate producer lcores */
390 		if (evt_lcores_has_overlap(opt->plcores,
391 					rte_get_master_lcore())) {
392 			evt_err("producer lcores overlaps with master lcore");
393 			return -1;
394 		}
395 		if (evt_has_disabled_lcore(opt->plcores)) {
396 			evt_err("one or more producer lcores are not enabled");
397 			return -1;
398 		}
399 		if (!evt_has_active_lcore(opt->plcores)) {
400 			evt_err("minimum one producer is required");
401 			return -1;
402 		}
403 	}
404 
405 	if (evt_has_invalid_stage(opt))
406 		return -1;
407 
408 	if (evt_has_invalid_sched_type(opt))
409 		return -1;
410 
411 	if (nb_queues > EVT_MAX_QUEUES) {
412 		evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
413 		return -1;
414 	}
415 	if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
416 		evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
417 		return -1;
418 	}
419 
420 	/* Fixups */
421 	if (opt->nb_stages == 1 && opt->fwd_latency) {
422 		evt_info("fwd_latency is valid when nb_stages > 1, disabling");
423 		opt->fwd_latency = 0;
424 	}
425 	if (opt->fwd_latency && !opt->q_priority) {
426 		evt_info("enabled queue priority for latency measurement");
427 		opt->q_priority = 1;
428 	}
429 	if (opt->nb_pkts == 0)
430 		opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
431 
432 	return 0;
433 }
434 
435 void
436 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
437 {
438 	evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
439 	evt_dump_producer_lcores(opt);
440 	evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
441 	evt_dump_worker_lcores(opt);
442 	evt_dump_nb_stages(opt);
443 	evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
444 	evt_dump("nb_evdev_queues", "%d", nb_queues);
445 	evt_dump_queue_priority(opt);
446 	evt_dump_sched_type_list(opt);
447 	evt_dump_producer_type(opt);
448 }
449 
450 void
451 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
452 {
453 	RTE_SET_USED(test);
454 
455 	rte_event_dev_stop(opt->dev_id);
456 	rte_event_dev_close(opt->dev_id);
457 }
458 
459 static inline void
460 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
461 	    void *obj, unsigned i __rte_unused)
462 {
463 	memset(obj, 0, mp->elt_size);
464 }
465 
466 #define NB_RX_DESC			128
467 #define NB_TX_DESC			512
468 int
469 perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
470 {
471 	int i;
472 	struct test_perf *t = evt_test_priv(test);
473 	struct rte_eth_conf port_conf = {
474 		.rxmode = {
475 			.mq_mode = ETH_MQ_RX_RSS,
476 			.max_rx_pkt_len = ETHER_MAX_LEN,
477 			.split_hdr_size = 0,
478 			.header_split   = 0,
479 			.hw_ip_checksum = 0,
480 			.hw_vlan_filter = 0,
481 			.hw_vlan_strip  = 0,
482 			.hw_vlan_extend = 0,
483 			.jumbo_frame    = 0,
484 			.hw_strip_crc   = 1,
485 		},
486 		.rx_adv_conf = {
487 			.rss_conf = {
488 				.rss_key = NULL,
489 				.rss_hf = ETH_RSS_IP,
490 			},
491 		},
492 	};
493 
494 	if (opt->prod_type == EVT_PROD_TYPE_SYNT)
495 		return 0;
496 
497 	if (!rte_eth_dev_count()) {
498 		evt_err("No ethernet ports found.");
499 		return -ENODEV;
500 	}
501 
502 	for (i = 0; i < rte_eth_dev_count(); i++) {
503 
504 		if (rte_eth_dev_configure(i, 1, 1,
505 					&port_conf)
506 				< 0) {
507 			evt_err("Failed to configure eth port [%d]", i);
508 			return -EINVAL;
509 		}
510 
511 		if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC,
512 				rte_socket_id(), NULL, t->pool) < 0) {
513 			evt_err("Failed to setup eth port [%d] rx_queue: %d.",
514 					i, 0);
515 			return -EINVAL;
516 		}
517 
518 		if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC,
519 					rte_socket_id(), NULL) < 0) {
520 			evt_err("Failed to setup eth port [%d] tx_queue: %d.",
521 					i, 0);
522 			return -EINVAL;
523 		}
524 
525 		rte_eth_promiscuous_enable(i);
526 	}
527 
528 	return 0;
529 }
530 
531 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt)
532 {
533 	int i;
534 	RTE_SET_USED(test);
535 
536 	if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
537 		for (i = 0; i < rte_eth_dev_count(); i++) {
538 			rte_event_eth_rx_adapter_stop(i);
539 			rte_eth_dev_stop(i);
540 			rte_eth_dev_close(i);
541 		}
542 	}
543 }
544 
545 int
546 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
547 {
548 	struct test_perf *t = evt_test_priv(test);
549 
550 	if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
551 		t->pool = rte_mempool_create(test->name, /* mempool name */
552 				opt->pool_sz, /* number of elements*/
553 				sizeof(struct perf_elt), /* element size*/
554 				512, /* cache size*/
555 				0, NULL, NULL,
556 				perf_elt_init, /* obj constructor */
557 				NULL, opt->socket_id, 0); /* flags */
558 	} else {
559 		t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
560 				opt->pool_sz, /* number of elements*/
561 				512, /* cache size*/
562 				0,
563 				RTE_MBUF_DEFAULT_BUF_SIZE,
564 				opt->socket_id); /* flags */
565 
566 	}
567 
568 	if (t->pool == NULL) {
569 		evt_err("failed to create mempool");
570 		return -ENOMEM;
571 	}
572 
573 	return 0;
574 }
575 
576 void
577 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
578 {
579 	RTE_SET_USED(opt);
580 	struct test_perf *t = evt_test_priv(test);
581 
582 	rte_mempool_free(t->pool);
583 }
584 
585 int
586 perf_test_setup(struct evt_test *test, struct evt_options *opt)
587 {
588 	void *test_perf;
589 
590 	test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
591 				RTE_CACHE_LINE_SIZE, opt->socket_id);
592 	if (test_perf  == NULL) {
593 		evt_err("failed to allocate test_perf memory");
594 		goto nomem;
595 	}
596 	test->test_priv = test_perf;
597 
598 	struct test_perf *t = evt_test_priv(test);
599 
600 	t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
601 	t->nb_workers = evt_nr_active_lcores(opt->wlcores);
602 	t->done = false;
603 	t->nb_pkts = opt->nb_pkts;
604 	t->nb_flows = opt->nb_flows;
605 	t->result = EVT_TEST_FAILED;
606 	t->opt = opt;
607 	memcpy(t->sched_type_list, opt->sched_type_list,
608 			sizeof(opt->sched_type_list));
609 	return 0;
610 nomem:
611 	return -ENOMEM;
612 }
613 
614 void
615 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
616 {
617 	RTE_SET_USED(opt);
618 
619 	rte_free(test->test_priv);
620 }
621