xref: /dpdk/app/test-eventdev/test_perf_common.c (revision 8b9bd0efe0b6920a08e28eebacf2bb916bdf5653)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4 
5 #include "test_perf_common.h"
6 
7 int
8 perf_test_result(struct evt_test *test, struct evt_options *opt)
9 {
10 	RTE_SET_USED(opt);
11 	int i;
12 	uint64_t total = 0;
13 	struct test_perf *t = evt_test_priv(test);
14 
15 	printf("Packet distribution across worker cores :\n");
16 	for (i = 0; i < t->nb_workers; i++)
17 		total += t->worker[i].processed_pkts;
18 	for (i = 0; i < t->nb_workers; i++)
19 		printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:"
20 				CLGRN" %3.2f\n"CLNRM, i,
21 				t->worker[i].processed_pkts,
22 				(((double)t->worker[i].processed_pkts)/total)
23 				* 100);
24 
25 	return t->result;
26 }
27 
28 static inline int
29 perf_producer(void *arg)
30 {
31 	struct prod_data *p  = arg;
32 	struct test_perf *t = p->t;
33 	struct evt_options *opt = t->opt;
34 	const uint8_t dev_id = p->dev_id;
35 	const uint8_t port = p->port_id;
36 	struct rte_mempool *pool = t->pool;
37 	const uint64_t nb_pkts = t->nb_pkts;
38 	const uint32_t nb_flows = t->nb_flows;
39 	uint32_t flow_counter = 0;
40 	uint64_t count = 0;
41 	struct perf_elt *m;
42 	struct rte_event ev;
43 
44 	if (opt->verbose_level > 1)
45 		printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
46 				rte_lcore_id(), dev_id, port, p->queue_id);
47 
48 	ev.event = 0;
49 	ev.op = RTE_EVENT_OP_NEW;
50 	ev.queue_id = p->queue_id;
51 	ev.sched_type = t->opt->sched_type_list[0];
52 	ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
53 	ev.event_type =  RTE_EVENT_TYPE_CPU;
54 	ev.sub_event_type = 0; /* stage 0 */
55 
56 	while (count < nb_pkts && t->done == false) {
57 		if (rte_mempool_get(pool, (void **)&m) < 0)
58 			continue;
59 
60 		ev.flow_id = flow_counter++ % nb_flows;
61 		ev.event_ptr = m;
62 		m->timestamp = rte_get_timer_cycles();
63 		while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
64 			if (t->done)
65 				break;
66 			rte_pause();
67 			m->timestamp = rte_get_timer_cycles();
68 		}
69 		count++;
70 	}
71 
72 	return 0;
73 }
74 
75 static int
76 perf_producer_wrapper(void *arg)
77 {
78 	struct prod_data *p  = arg;
79 	struct test_perf *t = p->t;
80 	/* Launch the producer function only in case of synthetic producer. */
81 	if (t->opt->prod_type == EVT_PROD_TYPE_SYNT)
82 		return perf_producer(arg);
83 	return 0;
84 }
85 
86 static inline uint64_t
87 processed_pkts(struct test_perf *t)
88 {
89 	uint8_t i;
90 	uint64_t total = 0;
91 
92 	rte_smp_rmb();
93 	for (i = 0; i < t->nb_workers; i++)
94 		total += t->worker[i].processed_pkts;
95 
96 	return total;
97 }
98 
99 static inline uint64_t
100 total_latency(struct test_perf *t)
101 {
102 	uint8_t i;
103 	uint64_t total = 0;
104 
105 	rte_smp_rmb();
106 	for (i = 0; i < t->nb_workers; i++)
107 		total += t->worker[i].latency;
108 
109 	return total;
110 }
111 
112 
113 int
114 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
115 		int (*worker)(void *))
116 {
117 	int ret, lcore_id;
118 	struct test_perf *t = evt_test_priv(test);
119 
120 	int port_idx = 0;
121 	/* launch workers */
122 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
123 		if (!(opt->wlcores[lcore_id]))
124 			continue;
125 
126 		ret = rte_eal_remote_launch(worker,
127 				 &t->worker[port_idx], lcore_id);
128 		if (ret) {
129 			evt_err("failed to launch worker %d", lcore_id);
130 			return ret;
131 		}
132 		port_idx++;
133 	}
134 
135 	/* launch producers */
136 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
137 		if (!(opt->plcores[lcore_id]))
138 			continue;
139 
140 		ret = rte_eal_remote_launch(perf_producer_wrapper,
141 				&t->prod[port_idx], lcore_id);
142 		if (ret) {
143 			evt_err("failed to launch perf_producer %d", lcore_id);
144 			return ret;
145 		}
146 		port_idx++;
147 	}
148 
149 	const uint64_t total_pkts = opt->nb_pkts *
150 			evt_nr_active_lcores(opt->plcores);
151 
152 	uint64_t dead_lock_cycles = rte_get_timer_cycles();
153 	int64_t dead_lock_remaining  =  total_pkts;
154 	const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
155 
156 	uint64_t perf_cycles = rte_get_timer_cycles();
157 	int64_t perf_remaining  = total_pkts;
158 	const uint64_t perf_sample = rte_get_timer_hz();
159 
160 	static float total_mpps;
161 	static uint64_t samples;
162 
163 	const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
164 	int64_t remaining = t->outstand_pkts - processed_pkts(t);
165 
166 	while (t->done == false) {
167 		const uint64_t new_cycles = rte_get_timer_cycles();
168 
169 		if ((new_cycles - perf_cycles) > perf_sample) {
170 			const uint64_t latency = total_latency(t);
171 			const uint64_t pkts = processed_pkts(t);
172 
173 			remaining = t->outstand_pkts - pkts;
174 			float mpps = (float)(perf_remaining-remaining)/1000000;
175 
176 			perf_remaining = remaining;
177 			perf_cycles = new_cycles;
178 			total_mpps += mpps;
179 			++samples;
180 			if (opt->fwd_latency && pkts > 0) {
181 				printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
182 					mpps, total_mpps/samples,
183 					(float)(latency/pkts)/freq_mhz);
184 			} else {
185 				printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
186 					mpps, total_mpps/samples);
187 			}
188 			fflush(stdout);
189 
190 			if (remaining <= 0) {
191 				t->result = EVT_TEST_SUCCESS;
192 				if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
193 					t->done = true;
194 					rte_smp_wmb();
195 					break;
196 				}
197 			}
198 		}
199 
200 		if (new_cycles - dead_lock_cycles > dead_lock_sample &&
201 				opt->prod_type == EVT_PROD_TYPE_SYNT) {
202 			remaining = t->outstand_pkts - processed_pkts(t);
203 			if (dead_lock_remaining == remaining) {
204 				rte_event_dev_dump(opt->dev_id, stdout);
205 				evt_err("No schedules for seconds, deadlock");
206 				t->done = true;
207 				rte_smp_wmb();
208 				break;
209 			}
210 			dead_lock_remaining = remaining;
211 			dead_lock_cycles = new_cycles;
212 		}
213 	}
214 	printf("\n");
215 	return 0;
216 }
217 
218 static int
219 perf_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride,
220 		struct rte_event_port_conf prod_conf)
221 {
222 	int ret = 0;
223 	uint16_t prod;
224 	struct rte_event_eth_rx_adapter_queue_conf queue_conf;
225 
226 	memset(&queue_conf, 0,
227 			sizeof(struct rte_event_eth_rx_adapter_queue_conf));
228 	queue_conf.ev.sched_type = opt->sched_type_list[0];
229 	for (prod = 0; prod < rte_eth_dev_count(); prod++) {
230 		uint32_t cap;
231 
232 		ret = rte_event_eth_rx_adapter_caps_get(opt->dev_id,
233 				prod, &cap);
234 		if (ret) {
235 			evt_err("failed to get event rx adapter[%d]"
236 					" capabilities",
237 					opt->dev_id);
238 			return ret;
239 		}
240 		queue_conf.ev.queue_id = prod * stride;
241 		ret = rte_event_eth_rx_adapter_create(prod, opt->dev_id,
242 				&prod_conf);
243 		if (ret) {
244 			evt_err("failed to create rx adapter[%d]", prod);
245 			return ret;
246 		}
247 		ret = rte_event_eth_rx_adapter_queue_add(prod, prod, -1,
248 				&queue_conf);
249 		if (ret) {
250 			evt_err("failed to add rx queues to adapter[%d]", prod);
251 			return ret;
252 		}
253 
254 		if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) {
255 			uint32_t service_id;
256 
257 			rte_event_eth_rx_adapter_service_id_get(prod,
258 					&service_id);
259 			ret = evt_service_setup(service_id);
260 			if (ret) {
261 				evt_err("Failed to setup service core"
262 						" for Rx adapter\n");
263 				return ret;
264 			}
265 		}
266 
267 		ret = rte_eth_dev_start(prod);
268 		if (ret) {
269 			evt_err("Ethernet dev [%d] failed to start."
270 					" Using synthetic producer", prod);
271 			return ret;
272 		}
273 
274 		ret = rte_event_eth_rx_adapter_start(prod);
275 		if (ret) {
276 			evt_err("Rx adapter[%d] start failed", prod);
277 			return ret;
278 		}
279 		printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__,
280 				prod, prod);
281 	}
282 
283 	return ret;
284 }
285 
286 int
287 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
288 				uint8_t stride, uint8_t nb_queues,
289 				const struct rte_event_port_conf *port_conf)
290 {
291 	struct test_perf *t = evt_test_priv(test);
292 	uint16_t port, prod;
293 	int ret = -1;
294 
295 	/* setup one port per worker, linking to all queues */
296 	for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
297 				port++) {
298 		struct worker_data *w = &t->worker[port];
299 
300 		w->dev_id = opt->dev_id;
301 		w->port_id = port;
302 		w->t = t;
303 		w->processed_pkts = 0;
304 		w->latency = 0;
305 
306 		ret = rte_event_port_setup(opt->dev_id, port, port_conf);
307 		if (ret) {
308 			evt_err("failed to setup port %d", port);
309 			return ret;
310 		}
311 
312 		ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
313 		if (ret != nb_queues) {
314 			evt_err("failed to link all queues to port %d", port);
315 			return -EINVAL;
316 		}
317 	}
318 
319 	/* port for producers, no links */
320 	if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
321 		for ( ; port < perf_nb_event_ports(opt); port++) {
322 			struct prod_data *p = &t->prod[port];
323 			p->t = t;
324 		}
325 
326 		ret = perf_event_rx_adapter_setup(opt, stride, *port_conf);
327 		if (ret)
328 			return ret;
329 	} else {
330 		prod = 0;
331 		for ( ; port < perf_nb_event_ports(opt); port++) {
332 			struct prod_data *p = &t->prod[port];
333 
334 			p->dev_id = opt->dev_id;
335 			p->port_id = port;
336 			p->queue_id = prod * stride;
337 			p->t = t;
338 
339 			ret = rte_event_port_setup(opt->dev_id, port,
340 					port_conf);
341 			if (ret) {
342 				evt_err("failed to setup port %d", port);
343 				return ret;
344 			}
345 			prod++;
346 		}
347 	}
348 
349 	return ret;
350 }
351 
352 int
353 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
354 {
355 	unsigned int lcores;
356 
357 	/* N producer + N worker + 1 master when producer cores are used
358 	 * Else N worker + 1 master when Rx adapter is used
359 	 */
360 	lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
361 
362 	if (rte_lcore_count() < lcores) {
363 		evt_err("test need minimum %d lcores", lcores);
364 		return -1;
365 	}
366 
367 	/* Validate worker lcores */
368 	if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
369 		evt_err("worker lcores overlaps with master lcore");
370 		return -1;
371 	}
372 	if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
373 		evt_err("worker lcores overlaps producer lcores");
374 		return -1;
375 	}
376 	if (evt_has_disabled_lcore(opt->wlcores)) {
377 		evt_err("one or more workers lcores are not enabled");
378 		return -1;
379 	}
380 	if (!evt_has_active_lcore(opt->wlcores)) {
381 		evt_err("minimum one worker is required");
382 		return -1;
383 	}
384 
385 	if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
386 		/* Validate producer lcores */
387 		if (evt_lcores_has_overlap(opt->plcores,
388 					rte_get_master_lcore())) {
389 			evt_err("producer lcores overlaps with master lcore");
390 			return -1;
391 		}
392 		if (evt_has_disabled_lcore(opt->plcores)) {
393 			evt_err("one or more producer lcores are not enabled");
394 			return -1;
395 		}
396 		if (!evt_has_active_lcore(opt->plcores)) {
397 			evt_err("minimum one producer is required");
398 			return -1;
399 		}
400 	}
401 
402 	if (evt_has_invalid_stage(opt))
403 		return -1;
404 
405 	if (evt_has_invalid_sched_type(opt))
406 		return -1;
407 
408 	if (nb_queues > EVT_MAX_QUEUES) {
409 		evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
410 		return -1;
411 	}
412 	if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
413 		evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
414 		return -1;
415 	}
416 
417 	/* Fixups */
418 	if (opt->nb_stages == 1 && opt->fwd_latency) {
419 		evt_info("fwd_latency is valid when nb_stages > 1, disabling");
420 		opt->fwd_latency = 0;
421 	}
422 	if (opt->fwd_latency && !opt->q_priority) {
423 		evt_info("enabled queue priority for latency measurement");
424 		opt->q_priority = 1;
425 	}
426 	if (opt->nb_pkts == 0)
427 		opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
428 
429 	return 0;
430 }
431 
432 void
433 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
434 {
435 	evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
436 	evt_dump_producer_lcores(opt);
437 	evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
438 	evt_dump_worker_lcores(opt);
439 	evt_dump_nb_stages(opt);
440 	evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
441 	evt_dump("nb_evdev_queues", "%d", nb_queues);
442 	evt_dump_queue_priority(opt);
443 	evt_dump_sched_type_list(opt);
444 	evt_dump_producer_type(opt);
445 }
446 
447 void
448 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
449 {
450 	RTE_SET_USED(test);
451 
452 	rte_event_dev_stop(opt->dev_id);
453 	rte_event_dev_close(opt->dev_id);
454 }
455 
456 static inline void
457 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
458 	    void *obj, unsigned i __rte_unused)
459 {
460 	memset(obj, 0, mp->elt_size);
461 }
462 
463 #define NB_RX_DESC			128
464 #define NB_TX_DESC			512
465 int
466 perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
467 {
468 	int i;
469 	struct test_perf *t = evt_test_priv(test);
470 	struct rte_eth_conf port_conf = {
471 		.rxmode = {
472 			.mq_mode = ETH_MQ_RX_RSS,
473 			.max_rx_pkt_len = ETHER_MAX_LEN,
474 			.split_hdr_size = 0,
475 			.header_split   = 0,
476 			.hw_ip_checksum = 0,
477 			.hw_vlan_filter = 0,
478 			.hw_vlan_strip  = 0,
479 			.hw_vlan_extend = 0,
480 			.jumbo_frame    = 0,
481 			.hw_strip_crc   = 1,
482 		},
483 		.rx_adv_conf = {
484 			.rss_conf = {
485 				.rss_key = NULL,
486 				.rss_hf = ETH_RSS_IP,
487 			},
488 		},
489 	};
490 
491 	if (opt->prod_type == EVT_PROD_TYPE_SYNT)
492 		return 0;
493 
494 	if (!rte_eth_dev_count()) {
495 		evt_err("No ethernet ports found.");
496 		return -ENODEV;
497 	}
498 
499 	for (i = 0; i < rte_eth_dev_count(); i++) {
500 
501 		if (rte_eth_dev_configure(i, 1, 1,
502 					&port_conf)
503 				< 0) {
504 			evt_err("Failed to configure eth port [%d]", i);
505 			return -EINVAL;
506 		}
507 
508 		if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC,
509 				rte_socket_id(), NULL, t->pool) < 0) {
510 			evt_err("Failed to setup eth port [%d] rx_queue: %d.",
511 					i, 0);
512 			return -EINVAL;
513 		}
514 
515 		if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC,
516 					rte_socket_id(), NULL) < 0) {
517 			evt_err("Failed to setup eth port [%d] tx_queue: %d.",
518 					i, 0);
519 			return -EINVAL;
520 		}
521 
522 		rte_eth_promiscuous_enable(i);
523 	}
524 
525 	return 0;
526 }
527 
528 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt)
529 {
530 	int i;
531 	RTE_SET_USED(test);
532 
533 	if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
534 		for (i = 0; i < rte_eth_dev_count(); i++) {
535 			rte_event_eth_rx_adapter_stop(i);
536 			rte_eth_dev_stop(i);
537 			rte_eth_dev_close(i);
538 		}
539 	}
540 }
541 
542 int
543 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
544 {
545 	struct test_perf *t = evt_test_priv(test);
546 
547 	if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
548 		t->pool = rte_mempool_create(test->name, /* mempool name */
549 				opt->pool_sz, /* number of elements*/
550 				sizeof(struct perf_elt), /* element size*/
551 				512, /* cache size*/
552 				0, NULL, NULL,
553 				perf_elt_init, /* obj constructor */
554 				NULL, opt->socket_id, 0); /* flags */
555 	} else {
556 		t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
557 				opt->pool_sz, /* number of elements*/
558 				512, /* cache size*/
559 				0,
560 				RTE_MBUF_DEFAULT_BUF_SIZE,
561 				opt->socket_id); /* flags */
562 
563 	}
564 
565 	if (t->pool == NULL) {
566 		evt_err("failed to create mempool");
567 		return -ENOMEM;
568 	}
569 
570 	return 0;
571 }
572 
573 void
574 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
575 {
576 	RTE_SET_USED(opt);
577 	struct test_perf *t = evt_test_priv(test);
578 
579 	rte_mempool_free(t->pool);
580 }
581 
582 int
583 perf_test_setup(struct evt_test *test, struct evt_options *opt)
584 {
585 	void *test_perf;
586 
587 	test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
588 				RTE_CACHE_LINE_SIZE, opt->socket_id);
589 	if (test_perf  == NULL) {
590 		evt_err("failed to allocate test_perf memory");
591 		goto nomem;
592 	}
593 	test->test_priv = test_perf;
594 
595 	struct test_perf *t = evt_test_priv(test);
596 
597 	t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
598 	t->nb_workers = evt_nr_active_lcores(opt->wlcores);
599 	t->done = false;
600 	t->nb_pkts = opt->nb_pkts;
601 	t->nb_flows = opt->nb_flows;
602 	t->result = EVT_TEST_FAILED;
603 	t->opt = opt;
604 	memcpy(t->sched_type_list, opt->sched_type_list,
605 			sizeof(opt->sched_type_list));
606 	return 0;
607 nomem:
608 	return -ENOMEM;
609 }
610 
611 void
612 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
613 {
614 	RTE_SET_USED(opt);
615 
616 	rte_free(test->test_priv);
617 }
618