xref: /dpdk/app/test-eventdev/test_perf_common.c (revision 3e0ceb9f17fff027fc6c8f18de35e11719ffa61e)
1 /*
2  *   BSD LICENSE
3  *
4  *   Copyright (C) Cavium, Inc 2017.
5  *
6  *   Redistribution and use in source and binary forms, with or without
7  *   modification, are permitted provided that the following conditions
8  *   are met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  *       notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above copyright
13  *       notice, this list of conditions and the following disclaimer in
14  *       the documentation and/or other materials provided with the
15  *       distribution.
16  *     * Neither the name of Cavium, Inc nor the names of its
17  *       contributors may be used to endorse or promote products derived
18  *       from this software without specific prior written permission.
19  *
20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include "test_perf_common.h"
34 
35 int
36 perf_test_result(struct evt_test *test, struct evt_options *opt)
37 {
38 	RTE_SET_USED(opt);
39 	struct test_perf *t = evt_test_priv(test);
40 
41 	return t->result;
42 }
43 
44 static inline int
45 perf_producer(void *arg)
46 {
47 	struct prod_data *p  = arg;
48 	struct test_perf *t = p->t;
49 	struct evt_options *opt = t->opt;
50 	const uint8_t dev_id = p->dev_id;
51 	const uint8_t port = p->port_id;
52 	struct rte_mempool *pool = t->pool;
53 	const uint64_t nb_pkts = t->nb_pkts;
54 	const uint32_t nb_flows = t->nb_flows;
55 	uint32_t flow_counter = 0;
56 	uint64_t count = 0;
57 	struct perf_elt *m;
58 	struct rte_event ev;
59 
60 	if (opt->verbose_level > 1)
61 		printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
62 				rte_lcore_id(), dev_id, port, p->queue_id);
63 
64 	ev.event = 0;
65 	ev.op = RTE_EVENT_OP_NEW;
66 	ev.queue_id = p->queue_id;
67 	ev.sched_type = t->opt->sched_type_list[0];
68 	ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
69 	ev.event_type =  RTE_EVENT_TYPE_CPU;
70 	ev.sub_event_type = 0; /* stage 0 */
71 
72 	while (count < nb_pkts && t->done == false) {
73 		if (rte_mempool_get(pool, (void **)&m) < 0)
74 			continue;
75 
76 		ev.flow_id = flow_counter++ % nb_flows;
77 		ev.event_ptr = m;
78 		m->timestamp = rte_get_timer_cycles();
79 		while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
80 			if (t->done)
81 				break;
82 			rte_pause();
83 			m->timestamp = rte_get_timer_cycles();
84 		}
85 		count++;
86 	}
87 
88 	return 0;
89 }
90 
91 static inline uint64_t
92 processed_pkts(struct test_perf *t)
93 {
94 	uint8_t i;
95 	uint64_t total = 0;
96 
97 	rte_smp_rmb();
98 	for (i = 0; i < t->nb_workers; i++)
99 		total += t->worker[i].processed_pkts;
100 
101 	return total;
102 }
103 
104 static inline uint64_t
105 total_latency(struct test_perf *t)
106 {
107 	uint8_t i;
108 	uint64_t total = 0;
109 
110 	rte_smp_rmb();
111 	for (i = 0; i < t->nb_workers; i++)
112 		total += t->worker[i].latency;
113 
114 	return total;
115 }
116 
117 
118 int
119 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
120 		int (*worker)(void *))
121 {
122 	int ret, lcore_id;
123 	struct test_perf *t = evt_test_priv(test);
124 
125 	int port_idx = 0;
126 	/* launch workers */
127 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
128 		if (!(opt->wlcores[lcore_id]))
129 			continue;
130 
131 		ret = rte_eal_remote_launch(worker,
132 				 &t->worker[port_idx], lcore_id);
133 		if (ret) {
134 			evt_err("failed to launch worker %d", lcore_id);
135 			return ret;
136 		}
137 		port_idx++;
138 	}
139 
140 	/* launch producers */
141 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
142 		if (!(opt->plcores[lcore_id]))
143 			continue;
144 
145 		ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx],
146 					 lcore_id);
147 		if (ret) {
148 			evt_err("failed to launch perf_producer %d", lcore_id);
149 			return ret;
150 		}
151 		port_idx++;
152 	}
153 
154 	const uint64_t total_pkts = opt->nb_pkts *
155 			evt_nr_active_lcores(opt->plcores);
156 
157 	uint64_t dead_lock_cycles = rte_get_timer_cycles();
158 	int64_t dead_lock_remaining  =  total_pkts;
159 	const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
160 
161 	uint64_t perf_cycles = rte_get_timer_cycles();
162 	int64_t perf_remaining  = total_pkts;
163 	const uint64_t perf_sample = rte_get_timer_hz();
164 
165 	static float total_mpps;
166 	static uint64_t samples;
167 
168 	const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
169 	int64_t remaining = t->outstand_pkts - processed_pkts(t);
170 
171 	while (t->done == false) {
172 		const uint64_t new_cycles = rte_get_timer_cycles();
173 
174 		if ((new_cycles - perf_cycles) > perf_sample) {
175 			const uint64_t latency = total_latency(t);
176 			const uint64_t pkts = processed_pkts(t);
177 
178 			remaining = t->outstand_pkts - pkts;
179 			float mpps = (float)(perf_remaining-remaining)/1000000;
180 
181 			perf_remaining = remaining;
182 			perf_cycles = new_cycles;
183 			total_mpps += mpps;
184 			++samples;
185 			if (opt->fwd_latency && pkts > 0) {
186 				printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
187 					mpps, total_mpps/samples,
188 					(float)(latency/pkts)/freq_mhz);
189 			} else {
190 				printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
191 					mpps, total_mpps/samples);
192 			}
193 			fflush(stdout);
194 
195 			if (remaining <= 0) {
196 				t->done = true;
197 				t->result = EVT_TEST_SUCCESS;
198 				rte_smp_wmb();
199 				break;
200 			}
201 		}
202 
203 		if (new_cycles - dead_lock_cycles > dead_lock_sample) {
204 			remaining = t->outstand_pkts - processed_pkts(t);
205 			if (dead_lock_remaining == remaining) {
206 				rte_event_dev_dump(opt->dev_id, stdout);
207 				evt_err("No schedules for seconds, deadlock");
208 				t->done = true;
209 				rte_smp_wmb();
210 				break;
211 			}
212 			dead_lock_remaining = remaining;
213 			dead_lock_cycles = new_cycles;
214 		}
215 	}
216 	printf("\n");
217 	return 0;
218 }
219 
220 int
221 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
222 				uint8_t stride, uint8_t nb_queues)
223 {
224 	struct test_perf *t = evt_test_priv(test);
225 	uint8_t port, prod;
226 	int ret = -1;
227 
228 	/* port configuration */
229 	const struct rte_event_port_conf wkr_p_conf = {
230 			.dequeue_depth = opt->wkr_deq_dep,
231 			.enqueue_depth = 64,
232 			.new_event_threshold = 4096,
233 	};
234 
235 	/* setup one port per worker, linking to all queues */
236 	for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
237 				port++) {
238 		struct worker_data *w = &t->worker[port];
239 
240 		w->dev_id = opt->dev_id;
241 		w->port_id = port;
242 		w->t = t;
243 		w->processed_pkts = 0;
244 		w->latency = 0;
245 
246 		ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
247 		if (ret) {
248 			evt_err("failed to setup port %d", port);
249 			return ret;
250 		}
251 
252 		ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
253 		if (ret != nb_queues) {
254 			evt_err("failed to link all queues to port %d", port);
255 			return -EINVAL;
256 		}
257 	}
258 
259 	/* port for producers, no links */
260 	const struct rte_event_port_conf prod_conf = {
261 			.dequeue_depth = 8,
262 			.enqueue_depth = 32,
263 			.new_event_threshold = 1200,
264 	};
265 	prod = 0;
266 	for ( ; port < perf_nb_event_ports(opt); port++) {
267 		struct prod_data *p = &t->prod[port];
268 
269 		p->dev_id = opt->dev_id;
270 		p->port_id = port;
271 		p->queue_id = prod * stride;
272 		p->t = t;
273 
274 		ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
275 		if (ret) {
276 			evt_err("failed to setup port %d", port);
277 			return ret;
278 		}
279 		prod++;
280 	}
281 
282 	return ret;
283 }
284 
285 int
286 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
287 {
288 	unsigned int lcores;
289 
290 	/* N producer + N worker + 1 master */
291 	lcores = 3;
292 
293 	if (rte_lcore_count() < lcores) {
294 		evt_err("test need minimum %d lcores", lcores);
295 		return -1;
296 	}
297 
298 	/* Validate worker lcores */
299 	if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
300 		evt_err("worker lcores overlaps with master lcore");
301 		return -1;
302 	}
303 	if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
304 		evt_err("worker lcores overlaps producer lcores");
305 		return -1;
306 	}
307 	if (evt_has_disabled_lcore(opt->wlcores)) {
308 		evt_err("one or more workers lcores are not enabled");
309 		return -1;
310 	}
311 	if (!evt_has_active_lcore(opt->wlcores)) {
312 		evt_err("minimum one worker is required");
313 		return -1;
314 	}
315 
316 	/* Validate producer lcores */
317 	if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) {
318 		evt_err("producer lcores overlaps with master lcore");
319 		return -1;
320 	}
321 	if (evt_has_disabled_lcore(opt->plcores)) {
322 		evt_err("one or more producer lcores are not enabled");
323 		return -1;
324 	}
325 	if (!evt_has_active_lcore(opt->plcores)) {
326 		evt_err("minimum one producer is required");
327 		return -1;
328 	}
329 
330 	if (evt_has_invalid_stage(opt))
331 		return -1;
332 
333 	if (evt_has_invalid_sched_type(opt))
334 		return -1;
335 
336 	if (nb_queues > EVT_MAX_QUEUES) {
337 		evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
338 		return -1;
339 	}
340 	if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
341 		evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
342 		return -1;
343 	}
344 
345 	/* Fixups */
346 	if (opt->nb_stages == 1 && opt->fwd_latency) {
347 		evt_info("fwd_latency is valid when nb_stages > 1, disabling");
348 		opt->fwd_latency = 0;
349 	}
350 	if (opt->fwd_latency && !opt->q_priority) {
351 		evt_info("enabled queue priority for latency measurement");
352 		opt->q_priority = 1;
353 	}
354 	if (opt->nb_pkts == 0)
355 		opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
356 
357 	return 0;
358 }
359 
360 void
361 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
362 {
363 	evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
364 	evt_dump_producer_lcores(opt);
365 	evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
366 	evt_dump_worker_lcores(opt);
367 	evt_dump_nb_stages(opt);
368 	evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
369 	evt_dump("nb_evdev_queues", "%d", nb_queues);
370 	evt_dump_queue_priority(opt);
371 	evt_dump_sched_type_list(opt);
372 }
373 
374 void
375 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
376 {
377 	RTE_SET_USED(test);
378 
379 	rte_event_dev_stop(opt->dev_id);
380 	rte_event_dev_close(opt->dev_id);
381 }
382 
383 static inline void
384 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
385 	    void *obj, unsigned i __rte_unused)
386 {
387 	memset(obj, 0, mp->elt_size);
388 }
389 
390 int
391 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
392 {
393 	struct test_perf *t = evt_test_priv(test);
394 
395 	t->pool = rte_mempool_create(test->name, /* mempool name */
396 				opt->pool_sz, /* number of elements*/
397 				sizeof(struct perf_elt), /* element size*/
398 				512, /* cache size*/
399 				0, NULL, NULL,
400 				perf_elt_init, /* obj constructor */
401 				NULL, opt->socket_id, 0); /* flags */
402 	if (t->pool == NULL) {
403 		evt_err("failed to create mempool");
404 		return -ENOMEM;
405 	}
406 
407 	return 0;
408 }
409 
410 void
411 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
412 {
413 	RTE_SET_USED(opt);
414 	struct test_perf *t = evt_test_priv(test);
415 
416 	rte_mempool_free(t->pool);
417 }
418 
419 int
420 perf_test_setup(struct evt_test *test, struct evt_options *opt)
421 {
422 	void *test_perf;
423 
424 	test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
425 				RTE_CACHE_LINE_SIZE, opt->socket_id);
426 	if (test_perf  == NULL) {
427 		evt_err("failed to allocate test_perf memory");
428 		goto nomem;
429 	}
430 	test->test_priv = test_perf;
431 
432 	struct test_perf *t = evt_test_priv(test);
433 
434 	t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
435 	t->nb_workers = evt_nr_active_lcores(opt->wlcores);
436 	t->done = false;
437 	t->nb_pkts = opt->nb_pkts;
438 	t->nb_flows = opt->nb_flows;
439 	t->result = EVT_TEST_FAILED;
440 	t->opt = opt;
441 	memcpy(t->sched_type_list, opt->sched_type_list,
442 			sizeof(opt->sched_type_list));
443 	return 0;
444 nomem:
445 	return -ENOMEM;
446 }
447 
448 void
449 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
450 {
451 	RTE_SET_USED(opt);
452 
453 	rte_free(test->test_priv);
454 }
455