xref: /dpdk/app/test-eventdev/test_perf_common.c (revision 89f0711f9ddfb5822da9d34f384b92f72a61c4dc)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4 
5 #include "test_perf_common.h"
6 
7 int
8 perf_test_result(struct evt_test *test, struct evt_options *opt)
9 {
10 	RTE_SET_USED(opt);
11 	int i;
12 	uint64_t total = 0;
13 	struct test_perf *t = evt_test_priv(test);
14 
15 	printf("Packet distribution across worker cores :\n");
16 	for (i = 0; i < t->nb_workers; i++)
17 		total += t->worker[i].processed_pkts;
18 	for (i = 0; i < t->nb_workers; i++)
19 		printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:"
20 				CLGRN" %3.2f\n"CLNRM, i,
21 				t->worker[i].processed_pkts,
22 				(((double)t->worker[i].processed_pkts)/total)
23 				* 100);
24 
25 	return t->result;
26 }
27 
28 static inline int
29 perf_producer(void *arg)
30 {
31 	struct prod_data *p  = arg;
32 	struct test_perf *t = p->t;
33 	struct evt_options *opt = t->opt;
34 	const uint8_t dev_id = p->dev_id;
35 	const uint8_t port = p->port_id;
36 	struct rte_mempool *pool = t->pool;
37 	const uint64_t nb_pkts = t->nb_pkts;
38 	const uint32_t nb_flows = t->nb_flows;
39 	uint32_t flow_counter = 0;
40 	uint64_t count = 0;
41 	struct perf_elt *m;
42 	struct rte_event ev;
43 
44 	if (opt->verbose_level > 1)
45 		printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
46 				rte_lcore_id(), dev_id, port, p->queue_id);
47 
48 	ev.event = 0;
49 	ev.op = RTE_EVENT_OP_NEW;
50 	ev.queue_id = p->queue_id;
51 	ev.sched_type = t->opt->sched_type_list[0];
52 	ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
53 	ev.event_type =  RTE_EVENT_TYPE_CPU;
54 	ev.sub_event_type = 0; /* stage 0 */
55 
56 	while (count < nb_pkts && t->done == false) {
57 		if (rte_mempool_get(pool, (void **)&m) < 0)
58 			continue;
59 
60 		ev.flow_id = flow_counter++ % nb_flows;
61 		ev.event_ptr = m;
62 		m->timestamp = rte_get_timer_cycles();
63 		while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
64 			if (t->done)
65 				break;
66 			rte_pause();
67 			m->timestamp = rte_get_timer_cycles();
68 		}
69 		count++;
70 	}
71 
72 	return 0;
73 }
74 
75 static int
76 perf_producer_wrapper(void *arg)
77 {
78 	struct prod_data *p  = arg;
79 	struct test_perf *t = p->t;
80 	/* Launch the producer function only in case of synthetic producer. */
81 	if (t->opt->prod_type == EVT_PROD_TYPE_SYNT)
82 		return perf_producer(arg);
83 	return 0;
84 }
85 
86 static inline uint64_t
87 processed_pkts(struct test_perf *t)
88 {
89 	uint8_t i;
90 	uint64_t total = 0;
91 
92 	rte_smp_rmb();
93 	for (i = 0; i < t->nb_workers; i++)
94 		total += t->worker[i].processed_pkts;
95 
96 	return total;
97 }
98 
99 static inline uint64_t
100 total_latency(struct test_perf *t)
101 {
102 	uint8_t i;
103 	uint64_t total = 0;
104 
105 	rte_smp_rmb();
106 	for (i = 0; i < t->nb_workers; i++)
107 		total += t->worker[i].latency;
108 
109 	return total;
110 }
111 
112 
113 int
114 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
115 		int (*worker)(void *))
116 {
117 	int ret, lcore_id;
118 	struct test_perf *t = evt_test_priv(test);
119 
120 	int port_idx = 0;
121 	/* launch workers */
122 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
123 		if (!(opt->wlcores[lcore_id]))
124 			continue;
125 
126 		ret = rte_eal_remote_launch(worker,
127 				 &t->worker[port_idx], lcore_id);
128 		if (ret) {
129 			evt_err("failed to launch worker %d", lcore_id);
130 			return ret;
131 		}
132 		port_idx++;
133 	}
134 
135 	/* launch producers */
136 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
137 		if (!(opt->plcores[lcore_id]))
138 			continue;
139 
140 		ret = rte_eal_remote_launch(perf_producer_wrapper,
141 				&t->prod[port_idx], lcore_id);
142 		if (ret) {
143 			evt_err("failed to launch perf_producer %d", lcore_id);
144 			return ret;
145 		}
146 		port_idx++;
147 	}
148 
149 	const uint64_t total_pkts = opt->nb_pkts *
150 			evt_nr_active_lcores(opt->plcores);
151 
152 	uint64_t dead_lock_cycles = rte_get_timer_cycles();
153 	int64_t dead_lock_remaining  =  total_pkts;
154 	const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
155 
156 	uint64_t perf_cycles = rte_get_timer_cycles();
157 	int64_t perf_remaining  = total_pkts;
158 	const uint64_t perf_sample = rte_get_timer_hz();
159 
160 	static float total_mpps;
161 	static uint64_t samples;
162 
163 	const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
164 	int64_t remaining = t->outstand_pkts - processed_pkts(t);
165 
166 	while (t->done == false) {
167 		const uint64_t new_cycles = rte_get_timer_cycles();
168 
169 		if ((new_cycles - perf_cycles) > perf_sample) {
170 			const uint64_t latency = total_latency(t);
171 			const uint64_t pkts = processed_pkts(t);
172 
173 			remaining = t->outstand_pkts - pkts;
174 			float mpps = (float)(perf_remaining-remaining)/1000000;
175 
176 			perf_remaining = remaining;
177 			perf_cycles = new_cycles;
178 			total_mpps += mpps;
179 			++samples;
180 			if (opt->fwd_latency && pkts > 0) {
181 				printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
182 					mpps, total_mpps/samples,
183 					(float)(latency/pkts)/freq_mhz);
184 			} else {
185 				printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
186 					mpps, total_mpps/samples);
187 			}
188 			fflush(stdout);
189 
190 			if (remaining <= 0) {
191 				t->result = EVT_TEST_SUCCESS;
192 				if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
193 					t->done = true;
194 					rte_smp_wmb();
195 					break;
196 				}
197 			}
198 		}
199 
200 		if (new_cycles - dead_lock_cycles > dead_lock_sample &&
201 				opt->prod_type == EVT_PROD_TYPE_SYNT) {
202 			remaining = t->outstand_pkts - processed_pkts(t);
203 			if (dead_lock_remaining == remaining) {
204 				rte_event_dev_dump(opt->dev_id, stdout);
205 				evt_err("No schedules for seconds, deadlock");
206 				t->done = true;
207 				rte_smp_wmb();
208 				break;
209 			}
210 			dead_lock_remaining = remaining;
211 			dead_lock_cycles = new_cycles;
212 		}
213 	}
214 	printf("\n");
215 	return 0;
216 }
217 
218 static int
219 perf_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride,
220 		struct rte_event_port_conf prod_conf)
221 {
222 	int ret = 0;
223 	uint16_t prod;
224 	struct rte_event_eth_rx_adapter_queue_conf queue_conf;
225 
226 	memset(&queue_conf, 0,
227 			sizeof(struct rte_event_eth_rx_adapter_queue_conf));
228 	queue_conf.ev.sched_type = opt->sched_type_list[0];
229 	for (prod = 0; prod < rte_eth_dev_count(); prod++) {
230 		uint32_t cap;
231 
232 		ret = rte_event_eth_rx_adapter_caps_get(opt->dev_id,
233 				prod, &cap);
234 		if (ret) {
235 			evt_err("failed to get event rx adapter[%d]"
236 					" capabilities",
237 					opt->dev_id);
238 			return ret;
239 		}
240 		queue_conf.ev.queue_id = prod * stride;
241 		ret = rte_event_eth_rx_adapter_create(prod, opt->dev_id,
242 				&prod_conf);
243 		if (ret) {
244 			evt_err("failed to create rx adapter[%d]", prod);
245 			return ret;
246 		}
247 		ret = rte_event_eth_rx_adapter_queue_add(prod, prod, -1,
248 				&queue_conf);
249 		if (ret) {
250 			evt_err("failed to add rx queues to adapter[%d]", prod);
251 			return ret;
252 		}
253 
254 		if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) {
255 			uint32_t service_id;
256 
257 			rte_event_eth_rx_adapter_service_id_get(prod,
258 					&service_id);
259 			ret = evt_service_setup(service_id);
260 			if (ret) {
261 				evt_err("Failed to setup service core"
262 						" for Rx adapter\n");
263 				return ret;
264 			}
265 		}
266 
267 		ret = rte_eth_dev_start(prod);
268 		if (ret) {
269 			evt_err("Ethernet dev [%d] failed to start."
270 					" Using synthetic producer", prod);
271 			return ret;
272 		}
273 
274 		ret = rte_event_eth_rx_adapter_start(prod);
275 		if (ret) {
276 			evt_err("Rx adapter[%d] start failed", prod);
277 			return ret;
278 		}
279 		printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__,
280 				prod, prod);
281 	}
282 
283 	return ret;
284 }
285 
286 int
287 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
288 				uint8_t stride, uint8_t nb_queues)
289 {
290 	struct test_perf *t = evt_test_priv(test);
291 	uint16_t port, prod;
292 	int ret = -1;
293 	struct rte_event_port_conf port_conf;
294 
295 	memset(&port_conf, 0, sizeof(struct rte_event_port_conf));
296 	rte_event_port_default_conf_get(opt->dev_id, 0, &port_conf);
297 
298 	/* port configuration */
299 	const struct rte_event_port_conf wkr_p_conf = {
300 			.dequeue_depth = opt->wkr_deq_dep,
301 			.enqueue_depth = port_conf.enqueue_depth,
302 			.new_event_threshold = port_conf.new_event_threshold,
303 	};
304 
305 	/* setup one port per worker, linking to all queues */
306 	for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
307 				port++) {
308 		struct worker_data *w = &t->worker[port];
309 
310 		w->dev_id = opt->dev_id;
311 		w->port_id = port;
312 		w->t = t;
313 		w->processed_pkts = 0;
314 		w->latency = 0;
315 
316 		ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
317 		if (ret) {
318 			evt_err("failed to setup port %d", port);
319 			return ret;
320 		}
321 
322 		ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
323 		if (ret != nb_queues) {
324 			evt_err("failed to link all queues to port %d", port);
325 			return -EINVAL;
326 		}
327 	}
328 
329 	/* port for producers, no links */
330 	struct rte_event_port_conf prod_conf = {
331 			.dequeue_depth = port_conf.dequeue_depth,
332 			.enqueue_depth = port_conf.enqueue_depth,
333 			.new_event_threshold = port_conf.new_event_threshold,
334 	};
335 	if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
336 		for ( ; port < perf_nb_event_ports(opt); port++) {
337 			struct prod_data *p = &t->prod[port];
338 			p->t = t;
339 		}
340 
341 		ret = perf_event_rx_adapter_setup(opt, stride, prod_conf);
342 		if (ret)
343 			return ret;
344 	} else {
345 		prod = 0;
346 		for ( ; port < perf_nb_event_ports(opt); port++) {
347 			struct prod_data *p = &t->prod[port];
348 
349 			p->dev_id = opt->dev_id;
350 			p->port_id = port;
351 			p->queue_id = prod * stride;
352 			p->t = t;
353 
354 			ret = rte_event_port_setup(opt->dev_id, port,
355 					&prod_conf);
356 			if (ret) {
357 				evt_err("failed to setup port %d", port);
358 				return ret;
359 			}
360 			prod++;
361 		}
362 	}
363 
364 	return ret;
365 }
366 
367 int
368 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
369 {
370 	unsigned int lcores;
371 
372 	/* N producer + N worker + 1 master when producer cores are used
373 	 * Else N worker + 1 master when Rx adapter is used
374 	 */
375 	lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
376 
377 	if (rte_lcore_count() < lcores) {
378 		evt_err("test need minimum %d lcores", lcores);
379 		return -1;
380 	}
381 
382 	/* Validate worker lcores */
383 	if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
384 		evt_err("worker lcores overlaps with master lcore");
385 		return -1;
386 	}
387 	if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
388 		evt_err("worker lcores overlaps producer lcores");
389 		return -1;
390 	}
391 	if (evt_has_disabled_lcore(opt->wlcores)) {
392 		evt_err("one or more workers lcores are not enabled");
393 		return -1;
394 	}
395 	if (!evt_has_active_lcore(opt->wlcores)) {
396 		evt_err("minimum one worker is required");
397 		return -1;
398 	}
399 
400 	if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
401 		/* Validate producer lcores */
402 		if (evt_lcores_has_overlap(opt->plcores,
403 					rte_get_master_lcore())) {
404 			evt_err("producer lcores overlaps with master lcore");
405 			return -1;
406 		}
407 		if (evt_has_disabled_lcore(opt->plcores)) {
408 			evt_err("one or more producer lcores are not enabled");
409 			return -1;
410 		}
411 		if (!evt_has_active_lcore(opt->plcores)) {
412 			evt_err("minimum one producer is required");
413 			return -1;
414 		}
415 	}
416 
417 	if (evt_has_invalid_stage(opt))
418 		return -1;
419 
420 	if (evt_has_invalid_sched_type(opt))
421 		return -1;
422 
423 	if (nb_queues > EVT_MAX_QUEUES) {
424 		evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
425 		return -1;
426 	}
427 	if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
428 		evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
429 		return -1;
430 	}
431 
432 	/* Fixups */
433 	if (opt->nb_stages == 1 && opt->fwd_latency) {
434 		evt_info("fwd_latency is valid when nb_stages > 1, disabling");
435 		opt->fwd_latency = 0;
436 	}
437 	if (opt->fwd_latency && !opt->q_priority) {
438 		evt_info("enabled queue priority for latency measurement");
439 		opt->q_priority = 1;
440 	}
441 	if (opt->nb_pkts == 0)
442 		opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
443 
444 	return 0;
445 }
446 
447 void
448 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
449 {
450 	evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
451 	evt_dump_producer_lcores(opt);
452 	evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
453 	evt_dump_worker_lcores(opt);
454 	evt_dump_nb_stages(opt);
455 	evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
456 	evt_dump("nb_evdev_queues", "%d", nb_queues);
457 	evt_dump_queue_priority(opt);
458 	evt_dump_sched_type_list(opt);
459 	evt_dump_producer_type(opt);
460 }
461 
462 void
463 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
464 {
465 	RTE_SET_USED(test);
466 
467 	rte_event_dev_stop(opt->dev_id);
468 	rte_event_dev_close(opt->dev_id);
469 }
470 
471 static inline void
472 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
473 	    void *obj, unsigned i __rte_unused)
474 {
475 	memset(obj, 0, mp->elt_size);
476 }
477 
478 #define NB_RX_DESC			128
479 #define NB_TX_DESC			512
480 int
481 perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
482 {
483 	int i;
484 	struct test_perf *t = evt_test_priv(test);
485 	struct rte_eth_conf port_conf = {
486 		.rxmode = {
487 			.mq_mode = ETH_MQ_RX_RSS,
488 			.max_rx_pkt_len = ETHER_MAX_LEN,
489 			.split_hdr_size = 0,
490 			.header_split   = 0,
491 			.hw_ip_checksum = 0,
492 			.hw_vlan_filter = 0,
493 			.hw_vlan_strip  = 0,
494 			.hw_vlan_extend = 0,
495 			.jumbo_frame    = 0,
496 			.hw_strip_crc   = 1,
497 		},
498 		.rx_adv_conf = {
499 			.rss_conf = {
500 				.rss_key = NULL,
501 				.rss_hf = ETH_RSS_IP,
502 			},
503 		},
504 	};
505 
506 	if (opt->prod_type == EVT_PROD_TYPE_SYNT)
507 		return 0;
508 
509 	if (!rte_eth_dev_count()) {
510 		evt_err("No ethernet ports found.");
511 		return -ENODEV;
512 	}
513 
514 	for (i = 0; i < rte_eth_dev_count(); i++) {
515 
516 		if (rte_eth_dev_configure(i, 1, 1,
517 					&port_conf)
518 				< 0) {
519 			evt_err("Failed to configure eth port [%d]", i);
520 			return -EINVAL;
521 		}
522 
523 		if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC,
524 				rte_socket_id(), NULL, t->pool) < 0) {
525 			evt_err("Failed to setup eth port [%d] rx_queue: %d.",
526 					i, 0);
527 			return -EINVAL;
528 		}
529 
530 		if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC,
531 					rte_socket_id(), NULL) < 0) {
532 			evt_err("Failed to setup eth port [%d] tx_queue: %d.",
533 					i, 0);
534 			return -EINVAL;
535 		}
536 
537 		rte_eth_promiscuous_enable(i);
538 	}
539 
540 	return 0;
541 }
542 
543 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt)
544 {
545 	int i;
546 	RTE_SET_USED(test);
547 
548 	if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
549 		for (i = 0; i < rte_eth_dev_count(); i++) {
550 			rte_event_eth_rx_adapter_stop(i);
551 			rte_eth_dev_stop(i);
552 			rte_eth_dev_close(i);
553 		}
554 	}
555 }
556 
557 int
558 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
559 {
560 	struct test_perf *t = evt_test_priv(test);
561 
562 	if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
563 		t->pool = rte_mempool_create(test->name, /* mempool name */
564 				opt->pool_sz, /* number of elements*/
565 				sizeof(struct perf_elt), /* element size*/
566 				512, /* cache size*/
567 				0, NULL, NULL,
568 				perf_elt_init, /* obj constructor */
569 				NULL, opt->socket_id, 0); /* flags */
570 	} else {
571 		t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
572 				opt->pool_sz, /* number of elements*/
573 				512, /* cache size*/
574 				0,
575 				RTE_MBUF_DEFAULT_BUF_SIZE,
576 				opt->socket_id); /* flags */
577 
578 	}
579 
580 	if (t->pool == NULL) {
581 		evt_err("failed to create mempool");
582 		return -ENOMEM;
583 	}
584 
585 	return 0;
586 }
587 
588 void
589 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
590 {
591 	RTE_SET_USED(opt);
592 	struct test_perf *t = evt_test_priv(test);
593 
594 	rte_mempool_free(t->pool);
595 }
596 
597 int
598 perf_test_setup(struct evt_test *test, struct evt_options *opt)
599 {
600 	void *test_perf;
601 
602 	test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
603 				RTE_CACHE_LINE_SIZE, opt->socket_id);
604 	if (test_perf  == NULL) {
605 		evt_err("failed to allocate test_perf memory");
606 		goto nomem;
607 	}
608 	test->test_priv = test_perf;
609 
610 	struct test_perf *t = evt_test_priv(test);
611 
612 	t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
613 	t->nb_workers = evt_nr_active_lcores(opt->wlcores);
614 	t->done = false;
615 	t->nb_pkts = opt->nb_pkts;
616 	t->nb_flows = opt->nb_flows;
617 	t->result = EVT_TEST_FAILED;
618 	t->opt = opt;
619 	memcpy(t->sched_type_list, opt->sched_type_list,
620 			sizeof(opt->sched_type_list));
621 	return 0;
622 nomem:
623 	return -ENOMEM;
624 }
625 
626 void
627 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
628 {
629 	RTE_SET_USED(opt);
630 
631 	rte_free(test->test_priv);
632 }
633