xref: /dpdk/app/test-eventdev/test_perf_common.c (revision f8244c6399d9fae6afab6770ae367aef38742ea5)
1 /*
2  *   BSD LICENSE
3  *
4  *   Copyright (C) Cavium, Inc 2017.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Cavium, Inc nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include "test_perf_common.h"
34 
35 int
36 perf_test_result(struct evt_test *test, struct evt_options *opt)
37 {
38 	RTE_SET_USED(opt);
39 	struct test_perf *t = evt_test_priv(test);
40 
41 	return t->result;
42 }
43 
44 static inline int
45 perf_producer(void *arg)
46 {
47 	struct prod_data *p  = arg;
48 	struct test_perf *t = p->t;
49 	struct evt_options *opt = t->opt;
50 	const uint8_t dev_id = p->dev_id;
51 	const uint8_t port = p->port_id;
52 	struct rte_mempool *pool = t->pool;
53 	const uint64_t nb_pkts = t->nb_pkts;
54 	const uint32_t nb_flows = t->nb_flows;
55 	uint32_t flow_counter = 0;
56 	uint64_t count = 0;
57 	struct perf_elt *m;
58 	struct rte_event ev;
59 
60 	if (opt->verbose_level > 1)
61 		printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
62 				rte_lcore_id(), dev_id, port, p->queue_id);
63 
64 	ev.event = 0;
65 	ev.op = RTE_EVENT_OP_NEW;
66 	ev.queue_id = p->queue_id;
67 	ev.sched_type = t->opt->sched_type_list[0];
68 	ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
69 	ev.event_type =  RTE_EVENT_TYPE_CPU;
70 	ev.sub_event_type = 0; /* stage 0 */
71 
72 	while (count < nb_pkts && t->done == false) {
73 		if (rte_mempool_get(pool, (void **)&m) < 0)
74 			continue;
75 
76 		ev.flow_id = flow_counter++ % nb_flows;
77 		ev.event_ptr = m;
78 		m->timestamp = rte_get_timer_cycles();
79 		while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
80 			if (t->done)
81 				break;
82 			rte_pause();
83 			m->timestamp = rte_get_timer_cycles();
84 		}
85 		count++;
86 	}
87 
88 	return 0;
89 }
90 
91 static inline int
92 scheduler(void *arg)
93 {
94 	struct test_perf *t = arg;
95 	const uint8_t dev_id = t->opt->dev_id;
96 
97 	while (t->done == false)
98 		rte_event_schedule(dev_id);
99 
100 	return 0;
101 }
102 
103 static inline uint64_t
104 processed_pkts(struct test_perf *t)
105 {
106 	uint8_t i;
107 	uint64_t total = 0;
108 
109 	rte_smp_rmb();
110 	for (i = 0; i < t->nb_workers; i++)
111 		total += t->worker[i].processed_pkts;
112 
113 	return total;
114 }
115 
116 static inline uint64_t
117 total_latency(struct test_perf *t)
118 {
119 	uint8_t i;
120 	uint64_t total = 0;
121 
122 	rte_smp_rmb();
123 	for (i = 0; i < t->nb_workers; i++)
124 		total += t->worker[i].latency;
125 
126 	return total;
127 }
128 
129 
130 int
131 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
132 		int (*worker)(void *))
133 {
134 	int ret, lcore_id;
135 	struct test_perf *t = evt_test_priv(test);
136 
137 	int port_idx = 0;
138 	/* launch workers */
139 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
140 		if (!(opt->wlcores[lcore_id]))
141 			continue;
142 
143 		ret = rte_eal_remote_launch(worker,
144 				 &t->worker[port_idx], lcore_id);
145 		if (ret) {
146 			evt_err("failed to launch worker %d", lcore_id);
147 			return ret;
148 		}
149 		port_idx++;
150 	}
151 
152 	/* launch producers */
153 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
154 		if (!(opt->plcores[lcore_id]))
155 			continue;
156 
157 		ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx],
158 					 lcore_id);
159 		if (ret) {
160 			evt_err("failed to launch perf_producer %d", lcore_id);
161 			return ret;
162 		}
163 		port_idx++;
164 	}
165 
166 	/* launch scheduler */
167 	if (!evt_has_distributed_sched(opt->dev_id)) {
168 		ret = rte_eal_remote_launch(scheduler, t, opt->slcore);
169 		if (ret) {
170 			evt_err("failed to launch sched %d", opt->slcore);
171 			return ret;
172 		}
173 	}
174 
175 	const uint64_t total_pkts = opt->nb_pkts *
176 			evt_nr_active_lcores(opt->plcores);
177 
178 	uint64_t dead_lock_cycles = rte_get_timer_cycles();
179 	int64_t dead_lock_remaining  =  total_pkts;
180 	const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
181 
182 	uint64_t perf_cycles = rte_get_timer_cycles();
183 	int64_t perf_remaining  = total_pkts;
184 	const uint64_t perf_sample = rte_get_timer_hz();
185 
186 	static float total_mpps;
187 	static uint64_t samples;
188 
189 	const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
190 	int64_t remaining = t->outstand_pkts - processed_pkts(t);
191 
192 	while (t->done == false) {
193 		const uint64_t new_cycles = rte_get_timer_cycles();
194 
195 		if ((new_cycles - perf_cycles) > perf_sample) {
196 			const uint64_t latency = total_latency(t);
197 			const uint64_t pkts = processed_pkts(t);
198 
199 			remaining = t->outstand_pkts - pkts;
200 			float mpps = (float)(perf_remaining-remaining)/1000000;
201 
202 			perf_remaining = remaining;
203 			perf_cycles = new_cycles;
204 			total_mpps += mpps;
205 			++samples;
206 			if (opt->fwd_latency && pkts > 0) {
207 				printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
208 					mpps, total_mpps/samples,
209 					(float)(latency/pkts)/freq_mhz);
210 			} else {
211 				printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
212 					mpps, total_mpps/samples);
213 			}
214 			fflush(stdout);
215 
216 			if (remaining <= 0) {
217 				t->done = true;
218 				t->result = EVT_TEST_SUCCESS;
219 				rte_smp_wmb();
220 				break;
221 			}
222 		}
223 
224 		if (new_cycles - dead_lock_cycles > dead_lock_sample) {
225 			remaining = t->outstand_pkts - processed_pkts(t);
226 			if (dead_lock_remaining == remaining) {
227 				rte_event_dev_dump(opt->dev_id, stdout);
228 				evt_err("No schedules for seconds, deadlock");
229 				t->done = true;
230 				rte_smp_wmb();
231 				break;
232 			}
233 			dead_lock_remaining = remaining;
234 			dead_lock_cycles = new_cycles;
235 		}
236 	}
237 	printf("\n");
238 	return 0;
239 }
240 
241 int
242 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
243 				uint8_t stride, uint8_t nb_queues)
244 {
245 	struct test_perf *t = evt_test_priv(test);
246 	uint8_t port, prod;
247 	int ret = -1;
248 
249 	/* port configuration */
250 	const struct rte_event_port_conf wkr_p_conf = {
251 			.dequeue_depth = opt->wkr_deq_dep,
252 			.enqueue_depth = 64,
253 			.new_event_threshold = 4096,
254 	};
255 
256 	/* setup one port per worker, linking to all queues */
257 	for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
258 				port++) {
259 		struct worker_data *w = &t->worker[port];
260 
261 		w->dev_id = opt->dev_id;
262 		w->port_id = port;
263 		w->t = t;
264 		w->processed_pkts = 0;
265 		w->latency = 0;
266 
267 		ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
268 		if (ret) {
269 			evt_err("failed to setup port %d", port);
270 			return ret;
271 		}
272 
273 		ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
274 		if (ret != nb_queues) {
275 			evt_err("failed to link all queues to port %d", port);
276 			return -EINVAL;
277 		}
278 	}
279 
280 	/* port for producers, no links */
281 	const struct rte_event_port_conf prod_conf = {
282 			.dequeue_depth = 8,
283 			.enqueue_depth = 32,
284 			.new_event_threshold = 1200,
285 	};
286 	prod = 0;
287 	for ( ; port < perf_nb_event_ports(opt); port++) {
288 		struct prod_data *p = &t->prod[port];
289 
290 		p->dev_id = opt->dev_id;
291 		p->port_id = port;
292 		p->queue_id = prod * stride;
293 		p->t = t;
294 
295 		ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
296 		if (ret) {
297 			evt_err("failed to setup port %d", port);
298 			return ret;
299 		}
300 		prod++;
301 	}
302 
303 	return ret;
304 }
305 
306 int
307 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
308 {
309 	unsigned int lcores;
310 	bool need_slcore = !evt_has_distributed_sched(opt->dev_id);
311 
312 	/* N producer + N worker + 1 scheduler(based on dev capa) + 1 master */
313 	lcores = need_slcore ? 4 : 3;
314 
315 	if (rte_lcore_count() < lcores) {
316 		evt_err("test need minimum %d lcores", lcores);
317 		return -1;
318 	}
319 
320 	/* Validate worker lcores */
321 	if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
322 		evt_err("worker lcores overlaps with master lcore");
323 		return -1;
324 	}
325 	if (need_slcore && evt_lcores_has_overlap(opt->wlcores, opt->slcore)) {
326 		evt_err("worker lcores overlaps with scheduler lcore");
327 		return -1;
328 	}
329 	if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
330 		evt_err("worker lcores overlaps producer lcores");
331 		return -1;
332 	}
333 	if (evt_has_disabled_lcore(opt->wlcores)) {
334 		evt_err("one or more workers lcores are not enabled");
335 		return -1;
336 	}
337 	if (!evt_has_active_lcore(opt->wlcores)) {
338 		evt_err("minimum one worker is required");
339 		return -1;
340 	}
341 
342 	/* Validate producer lcores */
343 	if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) {
344 		evt_err("producer lcores overlaps with master lcore");
345 		return -1;
346 	}
347 	if (need_slcore && evt_lcores_has_overlap(opt->plcores, opt->slcore)) {
348 		evt_err("producer lcores overlaps with scheduler lcore");
349 		return -1;
350 	}
351 	if (evt_has_disabled_lcore(opt->plcores)) {
352 		evt_err("one or more producer lcores are not enabled");
353 		return -1;
354 	}
355 	if (!evt_has_active_lcore(opt->plcores)) {
356 		evt_err("minimum one producer is required");
357 		return -1;
358 	}
359 
360 	/* Validate scheduler lcore */
361 	if (!evt_has_distributed_sched(opt->dev_id) &&
362 			opt->slcore == (int)rte_get_master_lcore()) {
363 		evt_err("scheduler lcore and master lcore should be different");
364 		return -1;
365 	}
366 	if (need_slcore && !rte_lcore_is_enabled(opt->slcore)) {
367 		evt_err("scheduler lcore is not enabled");
368 		return -1;
369 	}
370 
371 	if (evt_has_invalid_stage(opt))
372 		return -1;
373 
374 	if (evt_has_invalid_sched_type(opt))
375 		return -1;
376 
377 	if (nb_queues > EVT_MAX_QUEUES) {
378 		evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
379 		return -1;
380 	}
381 	if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
382 		evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
383 		return -1;
384 	}
385 
386 	/* Fixups */
387 	if (opt->nb_stages == 1 && opt->fwd_latency) {
388 		evt_info("fwd_latency is valid when nb_stages > 1, disabling");
389 		opt->fwd_latency = 0;
390 	}
391 	if (opt->fwd_latency && !opt->q_priority) {
392 		evt_info("enabled queue priority for latency measurement");
393 		opt->q_priority = 1;
394 	}
395 	if (opt->nb_pkts == 0)
396 		opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
397 
398 	return 0;
399 }
400 
401 void
402 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
403 {
404 	evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
405 	evt_dump_producer_lcores(opt);
406 	evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
407 	evt_dump_worker_lcores(opt);
408 	if (!evt_has_distributed_sched(opt->dev_id))
409 		evt_dump_scheduler_lcore(opt);
410 	evt_dump_nb_stages(opt);
411 	evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
412 	evt_dump("nb_evdev_queues", "%d", nb_queues);
413 	evt_dump_queue_priority(opt);
414 	evt_dump_sched_type_list(opt);
415 }
416 
417 void
418 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
419 {
420 	RTE_SET_USED(test);
421 
422 	rte_event_dev_stop(opt->dev_id);
423 	rte_event_dev_close(opt->dev_id);
424 }
425 
426 static inline void
427 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
428 	    void *obj, unsigned i __rte_unused)
429 {
430 	memset(obj, 0, mp->elt_size);
431 }
432 
433 int
434 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
435 {
436 	struct test_perf *t = evt_test_priv(test);
437 
438 	t->pool = rte_mempool_create(test->name, /* mempool name */
439 				opt->pool_sz, /* number of elements*/
440 				sizeof(struct perf_elt), /* element size*/
441 				512, /* cache size*/
442 				0, NULL, NULL,
443 				perf_elt_init, /* obj constructor */
444 				NULL, opt->socket_id, 0); /* flags */
445 	if (t->pool == NULL) {
446 		evt_err("failed to create mempool");
447 		return -ENOMEM;
448 	}
449 
450 	return 0;
451 }
452 
453 void
454 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
455 {
456 	RTE_SET_USED(opt);
457 	struct test_perf *t = evt_test_priv(test);
458 
459 	rte_mempool_free(t->pool);
460 }
461 
462 int
463 perf_test_setup(struct evt_test *test, struct evt_options *opt)
464 {
465 	void *test_perf;
466 
467 	test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
468 				RTE_CACHE_LINE_SIZE, opt->socket_id);
469 	if (test_perf  == NULL) {
470 		evt_err("failed to allocate test_perf memory");
471 		goto nomem;
472 	}
473 	test->test_priv = test_perf;
474 
475 	struct test_perf *t = evt_test_priv(test);
476 
477 	t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
478 	t->nb_workers = evt_nr_active_lcores(opt->wlcores);
479 	t->done = false;
480 	t->nb_pkts = opt->nb_pkts;
481 	t->nb_flows = opt->nb_flows;
482 	t->result = EVT_TEST_FAILED;
483 	t->opt = opt;
484 	memcpy(t->sched_type_list, opt->sched_type_list,
485 			sizeof(opt->sched_type_list));
486 	return 0;
487 nomem:
488 	return -ENOMEM;
489 }
490 
491 void
492 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
493 {
494 	RTE_SET_USED(opt);
495 
496 	rte_free(test->test_priv);
497 }
498