xref: /dpdk/app/test/test_graph_perf.c (revision d83fb967212efa19d272e7fa65d17c9ad94b17c1)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #include "test.h"
6 
7 #include <inttypes.h>
8 #include <signal.h>
9 #include <stdio.h>
10 #include <unistd.h>
11 
12 #include <rte_common.h>
13 #include <rte_cycles.h>
14 #include <rte_errno.h>
15 #ifdef RTE_EXEC_ENV_WINDOWS
16 static int
test_graph_perf_func(void)17 test_graph_perf_func(void)
18 {
19 	printf("graph_perf not supported on Windows, skipping test\n");
20 	return TEST_SKIPPED;
21 }
22 
23 #else
24 
25 #include <rte_graph.h>
26 #include <rte_graph_worker.h>
27 #include <rte_lcore.h>
28 #include <rte_malloc.h>
29 #include <rte_mbuf.h>
30 
31 #define TEST_GRAPH_PERF_MZ	     "graph_perf_data"
32 #define TEST_GRAPH_SRC_NAME	     "test_graph_perf_source"
33 #define TEST_GRAPH_SRC_BRST_ONE_NAME "test_graph_perf_source_one"
34 #define TEST_GRAPH_WRK_NAME	     "test_graph_perf_worker"
35 #define TEST_GRAPH_SNK_NAME	     "test_graph_perf_sink"
36 
37 #define SOURCES(map)	     RTE_DIM(map)
38 #define STAGES(map)	     RTE_DIM(map)
39 #define NODES_PER_STAGE(map) RTE_DIM(map[0])
40 #define SINKS(map)	     RTE_DIM(map[0])
41 
42 #define MAX_EDGES_PER_NODE 7
43 
44 struct test_node_data {
45 	uint8_t node_id;
46 	uint8_t is_sink;
47 	uint8_t next_nodes[MAX_EDGES_PER_NODE];
48 	uint8_t next_percentage[MAX_EDGES_PER_NODE];
49 };
50 
51 struct test_graph_perf {
52 	uint16_t nb_nodes;
53 	rte_graph_t graph_id;
54 	struct test_node_data *node_data;
55 };
56 
57 struct graph_lcore_data {
58 	uint8_t done;
59 	rte_graph_t graph_id;
60 };
61 
62 static struct test_node_data *
graph_get_node_data(struct test_graph_perf * graph_data,rte_node_t id)63 graph_get_node_data(struct test_graph_perf *graph_data, rte_node_t id)
64 {
65 	struct test_node_data *node_data = NULL;
66 	int i;
67 
68 	for (i = 0; i < graph_data->nb_nodes; i++)
69 		if (graph_data->node_data[i].node_id == id) {
70 			node_data = &graph_data->node_data[i];
71 			break;
72 		}
73 
74 	return node_data;
75 }
76 
77 static int
test_node_ctx_init(const struct rte_graph * graph,struct rte_node * node)78 test_node_ctx_init(const struct rte_graph *graph, struct rte_node *node)
79 {
80 	struct test_graph_perf *graph_data;
81 	struct test_node_data *node_data;
82 	const struct rte_memzone *mz;
83 	rte_node_t nid = node->id;
84 	rte_edge_t edge = 0;
85 	int i;
86 
87 	RTE_SET_USED(graph);
88 
89 	mz = rte_memzone_lookup(TEST_GRAPH_PERF_MZ);
90 	if (mz == NULL)
91 		return -ENOMEM;
92 	graph_data = mz->addr;
93 	node_data = graph_get_node_data(graph_data, nid);
94 	node->ctx[0] = node->nb_edges;
95 	for (i = 0; i < node->nb_edges && !node_data->is_sink; i++, edge++) {
96 		node->ctx[i + 1] = edge;
97 		node->ctx[i + 9] = node_data->next_percentage[i];
98 	}
99 
100 	return 0;
101 }
102 
103 /* Source node function */
104 static uint16_t
test_perf_node_worker_source(struct rte_graph * graph,struct rte_node * node,void ** objs,uint16_t nb_objs)105 test_perf_node_worker_source(struct rte_graph *graph, struct rte_node *node,
106 			     void **objs, uint16_t nb_objs)
107 {
108 	uint16_t count;
109 	int i;
110 
111 	RTE_SET_USED(objs);
112 	RTE_SET_USED(nb_objs);
113 
114 	/* Create a proportional stream for every next */
115 	for (i = 0; i < node->ctx[0]; i++) {
116 		count = (node->ctx[i + 9] * RTE_GRAPH_BURST_SIZE) / 100;
117 		rte_node_next_stream_get(graph, node, node->ctx[i + 1], count);
118 		rte_node_next_stream_put(graph, node, node->ctx[i + 1], count);
119 	}
120 
121 	return RTE_GRAPH_BURST_SIZE;
122 }
123 
124 static struct rte_node_register test_graph_perf_source = {
125 	.name = TEST_GRAPH_SRC_NAME,
126 	.process = test_perf_node_worker_source,
127 	.flags = RTE_NODE_SOURCE_F,
128 	.init = test_node_ctx_init,
129 };
130 
131 RTE_NODE_REGISTER(test_graph_perf_source);
132 
133 static uint16_t
test_perf_node_worker_source_burst_one(struct rte_graph * graph,struct rte_node * node,void ** objs,uint16_t nb_objs)134 test_perf_node_worker_source_burst_one(struct rte_graph *graph,
135 				       struct rte_node *node, void **objs,
136 				       uint16_t nb_objs)
137 {
138 	uint16_t count;
139 	int i;
140 
141 	RTE_SET_USED(objs);
142 	RTE_SET_USED(nb_objs);
143 
144 	/* Create a proportional stream for every next */
145 	for (i = 0; i < node->ctx[0]; i++) {
146 		count = (node->ctx[i + 9]) / 100;
147 		rte_node_next_stream_get(graph, node, node->ctx[i + 1], count);
148 		rte_node_next_stream_put(graph, node, node->ctx[i + 1], count);
149 	}
150 
151 	return 1;
152 }
153 
154 static struct rte_node_register test_graph_perf_source_burst_one = {
155 	.name = TEST_GRAPH_SRC_BRST_ONE_NAME,
156 	.process = test_perf_node_worker_source_burst_one,
157 	.flags = RTE_NODE_SOURCE_F,
158 	.init = test_node_ctx_init,
159 };
160 
161 RTE_NODE_REGISTER(test_graph_perf_source_burst_one);
162 
163 /* Worker node function */
164 static uint16_t
test_perf_node_worker(struct rte_graph * graph,struct rte_node * node,void ** objs,uint16_t nb_objs)165 test_perf_node_worker(struct rte_graph *graph, struct rte_node *node,
166 		      void **objs, uint16_t nb_objs)
167 {
168 	uint16_t next = 0;
169 	uint16_t enq = 0;
170 	uint16_t count;
171 	int i;
172 
173 	/* Move stream for single next node */
174 	if (node->ctx[0] == 1) {
175 		rte_node_next_stream_move(graph, node, node->ctx[1]);
176 		return nb_objs;
177 	}
178 
179 	/* Enqueue objects to next nodes proportionally */
180 	for (i = 0; i < node->ctx[0]; i++) {
181 		next = node->ctx[i + 1];
182 		count = (node->ctx[i + 9] * nb_objs) / 100;
183 		enq += count;
184 		while (count) {
185 			switch (count & (4 - 1)) {
186 			case 0:
187 				rte_node_enqueue_x4(graph, node, next, objs[0],
188 						    objs[1], objs[2], objs[3]);
189 				objs += 4;
190 				count -= 4;
191 				break;
192 			case 1:
193 				rte_node_enqueue_x1(graph, node, next, objs[0]);
194 				objs += 1;
195 				count -= 1;
196 				break;
197 			case 2:
198 				rte_node_enqueue_x2(graph, node, next, objs[0],
199 						    objs[1]);
200 				objs += 2;
201 				count -= 2;
202 				break;
203 			case 3:
204 				rte_node_enqueue_x2(graph, node, next, objs[0],
205 						    objs[1]);
206 				rte_node_enqueue_x1(graph, node, next, objs[0]);
207 				objs += 3;
208 				count -= 3;
209 				break;
210 			}
211 		}
212 	}
213 
214 	if (enq != nb_objs)
215 		rte_node_enqueue(graph, node, next, objs, nb_objs - enq);
216 
217 	return nb_objs;
218 }
219 
220 static struct rte_node_register test_graph_perf_worker = {
221 	.name = TEST_GRAPH_WRK_NAME,
222 	.process = test_perf_node_worker,
223 	.init = test_node_ctx_init,
224 };
225 
226 RTE_NODE_REGISTER(test_graph_perf_worker);
227 
228 /* Last node in graph a.k.a sink node */
229 static uint16_t
test_perf_node_sink(struct rte_graph * graph,struct rte_node * node,void ** objs,uint16_t nb_objs)230 test_perf_node_sink(struct rte_graph *graph, struct rte_node *node, void **objs,
231 		    uint16_t nb_objs)
232 {
233 	RTE_SET_USED(graph);
234 	RTE_SET_USED(node);
235 	RTE_SET_USED(objs);
236 	RTE_SET_USED(nb_objs);
237 
238 	return nb_objs;
239 }
240 
241 static struct rte_node_register test_graph_perf_sink = {
242 	.name = TEST_GRAPH_SNK_NAME,
243 	.process = test_perf_node_sink,
244 	.init = test_node_ctx_init,
245 };
246 
247 RTE_NODE_REGISTER(test_graph_perf_sink);
248 
249 static int
graph_perf_setup(void)250 graph_perf_setup(void)
251 {
252 	if (rte_lcore_count() < 2) {
253 		printf("Test requires at least 2 lcores\n");
254 		return TEST_SKIPPED;
255 	}
256 
257 	return 0;
258 }
259 
260 static void
graph_perf_teardown(void)261 graph_perf_teardown(void)
262 {
263 }
264 
265 static inline rte_node_t
graph_node_get(const char * pname,char * nname)266 graph_node_get(const char *pname, char *nname)
267 {
268 	rte_node_t pnode_id = rte_node_from_name(pname);
269 	char lookup_name[RTE_NODE_NAMESIZE];
270 	rte_node_t node_id;
271 
272 	snprintf(lookup_name, RTE_NODE_NAMESIZE, "%s-%s", pname, nname);
273 	node_id = rte_node_from_name(lookup_name);
274 
275 	if (node_id != RTE_NODE_ID_INVALID) {
276 		if (rte_node_edge_count(node_id))
277 			rte_node_edge_shrink(node_id, 0);
278 		return node_id;
279 	}
280 
281 	return rte_node_clone(pnode_id, nname);
282 }
283 
284 static uint16_t
graph_node_count_edges(uint32_t stage,uint16_t node,uint16_t nodes_per_stage,uint8_t edge_map[][nodes_per_stage][nodes_per_stage],char * ename[],struct test_node_data * node_data,rte_node_t ** node_map)285 graph_node_count_edges(uint32_t stage, uint16_t node, uint16_t nodes_per_stage,
286 		       uint8_t edge_map[][nodes_per_stage][nodes_per_stage],
287 		       char *ename[], struct test_node_data *node_data,
288 		       rte_node_t **node_map)
289 {
290 	uint8_t total_percent = 0;
291 	uint16_t edges = 0;
292 	int i;
293 
294 	for (i = 0; i < nodes_per_stage && edges < MAX_EDGES_PER_NODE; i++) {
295 		if (edge_map[stage + 1][i][node]) {
296 			ename[edges] = malloc(sizeof(char) * RTE_NODE_NAMESIZE);
297 			snprintf(ename[edges], RTE_NODE_NAMESIZE, "%s",
298 				 rte_node_id_to_name(node_map[stage + 1][i]));
299 			node_data->next_nodes[edges] = node_map[stage + 1][i];
300 			node_data->next_percentage[edges] =
301 				edge_map[stage + 1][i][node];
302 			edges++;
303 			total_percent += edge_map[stage + 1][i][node];
304 		}
305 	}
306 
307 	if (edges >= MAX_EDGES_PER_NODE || (edges && total_percent != 100)) {
308 		for (i = 0; i < edges; i++)
309 			free(ename[i]);
310 		return RTE_EDGE_ID_INVALID;
311 	}
312 
313 	return edges;
314 }
315 
316 static int
graph_init(const char * gname,uint8_t nb_srcs,uint8_t nb_sinks,uint32_t stages,uint16_t nodes_per_stage,uint8_t src_map[][nodes_per_stage],uint8_t snk_map[][nb_sinks],uint8_t edge_map[][nodes_per_stage][nodes_per_stage],uint8_t burst_one)317 graph_init(const char *gname, uint8_t nb_srcs, uint8_t nb_sinks,
318 	   uint32_t stages, uint16_t nodes_per_stage,
319 	   uint8_t src_map[][nodes_per_stage], uint8_t snk_map[][nb_sinks],
320 	   uint8_t edge_map[][nodes_per_stage][nodes_per_stage],
321 	   uint8_t burst_one)
322 {
323 	struct test_graph_perf *graph_data;
324 	char nname[RTE_NODE_NAMESIZE / 2];
325 	struct test_node_data *node_data;
326 	char *ename[nodes_per_stage];
327 	struct rte_graph_param gconf = {0};
328 	const struct rte_memzone *mz;
329 	uint8_t total_percent = 0;
330 	rte_node_t *src_nodes;
331 	rte_node_t *snk_nodes;
332 	rte_node_t **node_map;
333 	char **node_patterns;
334 	rte_graph_t graph_id;
335 	rte_edge_t edges;
336 	rte_edge_t count;
337 	uint32_t i, j, k;
338 
339 	mz = rte_memzone_reserve(TEST_GRAPH_PERF_MZ,
340 				 sizeof(struct test_graph_perf), 0, 0);
341 	if (mz == NULL) {
342 		printf("Failed to allocate graph common memory\n");
343 		return -ENOMEM;
344 	}
345 
346 	graph_data = mz->addr;
347 	graph_data->nb_nodes = 0;
348 	graph_data->node_data =
349 		malloc(sizeof(struct test_node_data) *
350 		       (nb_srcs + nb_sinks + stages * nodes_per_stage));
351 	if (graph_data->node_data == NULL) {
352 		printf("Failed to reserve memzone for graph data\n");
353 		goto memzone_free;
354 	}
355 
356 	node_patterns = malloc(sizeof(char *) *
357 			       (nb_srcs + nb_sinks + stages * nodes_per_stage));
358 	if (node_patterns == NULL) {
359 		printf("Failed to reserve memory for node patterns\n");
360 		goto data_free;
361 	}
362 
363 	src_nodes = malloc(sizeof(rte_node_t) * nb_srcs);
364 	if (src_nodes == NULL) {
365 		printf("Failed to reserve memory for src nodes\n");
366 		goto pattern_free;
367 	}
368 
369 	snk_nodes = malloc(sizeof(rte_node_t) * nb_sinks);
370 	if (snk_nodes == NULL) {
371 		printf("Failed to reserve memory for snk nodes\n");
372 		goto src_free;
373 	}
374 
375 	node_map = malloc(sizeof(rte_node_t *) * stages +
376 			  sizeof(rte_node_t) * nodes_per_stage * stages);
377 	if (node_map == NULL) {
378 		printf("Failed to reserve memory for node map\n");
379 		goto snk_free;
380 	}
381 
382 	/* Setup the Graph */
383 	for (i = 0; i < stages; i++) {
384 		node_map[i] =
385 			(rte_node_t *)(node_map + stages) + nodes_per_stage * i;
386 		for (j = 0; j < nodes_per_stage; j++) {
387 			total_percent = 0;
388 			for (k = 0; k < nodes_per_stage; k++)
389 				total_percent += edge_map[i][j][k];
390 			if (!total_percent)
391 				continue;
392 			node_patterns[graph_data->nb_nodes] =
393 				malloc(RTE_NODE_NAMESIZE);
394 			if (node_patterns[graph_data->nb_nodes] == NULL) {
395 				printf("Failed to create memory for pattern\n");
396 				goto pattern_name_free;
397 			}
398 
399 			/* Clone a worker node */
400 			snprintf(nname, sizeof(nname), "%d-%d", i, j);
401 			node_map[i][j] =
402 				graph_node_get(TEST_GRAPH_WRK_NAME, nname);
403 			if (node_map[i][j] == RTE_NODE_ID_INVALID) {
404 				printf("Failed to create node[%s]\n", nname);
405 				graph_data->nb_nodes++;
406 				goto pattern_name_free;
407 			}
408 			snprintf(node_patterns[graph_data->nb_nodes],
409 				 RTE_NODE_NAMESIZE, "%s",
410 				 rte_node_id_to_name(node_map[i][j]));
411 			node_data =
412 				&graph_data->node_data[graph_data->nb_nodes];
413 			node_data->node_id = node_map[i][j];
414 			node_data->is_sink = false;
415 			graph_data->nb_nodes++;
416 		}
417 	}
418 
419 	for (i = 0; i < stages - 1; i++) {
420 		for (j = 0; j < nodes_per_stage; j++) {
421 			/* Count edges i.e connections of worker node to next */
422 			node_data =
423 				graph_get_node_data(graph_data, node_map[i][j]);
424 			edges = graph_node_count_edges(i, j, nodes_per_stage,
425 						       edge_map, ename,
426 						       node_data, node_map);
427 			if (edges == RTE_EDGE_ID_INVALID) {
428 				printf("Invalid edge configuration\n");
429 				goto pattern_name_free;
430 			}
431 			if (!edges)
432 				continue;
433 
434 			/* Connect a node in stage 'i' to nodes
435 			 * in stage 'i + 1' with edges.
436 			 */
437 			count = rte_node_edge_update(
438 				node_map[i][j], 0,
439 				(const char **)(uintptr_t)ename, edges);
440 			for (k = 0; k < edges; k++)
441 				free(ename[k]);
442 			if (count != edges) {
443 				printf("Couldn't add edges %d %d\n", edges,
444 				       count);
445 				goto pattern_name_free;
446 			}
447 		}
448 	}
449 
450 	/* Setup Source nodes */
451 	for (i = 0; i < nb_srcs; i++) {
452 		edges = 0;
453 		total_percent = 0;
454 		node_patterns[graph_data->nb_nodes] = malloc(RTE_NODE_NAMESIZE);
455 		if (node_patterns[graph_data->nb_nodes] == NULL) {
456 			printf("Failed to create memory for pattern\n");
457 			goto pattern_name_free;
458 		}
459 		/* Clone a source node */
460 		snprintf(nname, sizeof(nname), "%d", i);
461 		src_nodes[i] =
462 			graph_node_get(burst_one ? TEST_GRAPH_SRC_BRST_ONE_NAME
463 						 : TEST_GRAPH_SRC_NAME,
464 				       nname);
465 		if (src_nodes[i] == RTE_NODE_ID_INVALID) {
466 			printf("Failed to create node[%s]\n", nname);
467 			graph_data->nb_nodes++;
468 			goto pattern_name_free;
469 		}
470 		snprintf(node_patterns[graph_data->nb_nodes], RTE_NODE_NAMESIZE,
471 			 "%s", rte_node_id_to_name(src_nodes[i]));
472 		node_data = &graph_data->node_data[graph_data->nb_nodes];
473 		node_data->node_id = src_nodes[i];
474 		node_data->is_sink = false;
475 		graph_data->nb_nodes++;
476 
477 		/* Prepare next node list  to connect to */
478 		for (j = 0; j < nodes_per_stage; j++) {
479 			if (!src_map[i][j])
480 				continue;
481 			ename[edges] = malloc(sizeof(char) * RTE_NODE_NAMESIZE);
482 			snprintf(ename[edges], RTE_NODE_NAMESIZE, "%s",
483 				 rte_node_id_to_name(node_map[0][j]));
484 			node_data->next_nodes[edges] = node_map[0][j];
485 			node_data->next_percentage[edges] = src_map[i][j];
486 			edges++;
487 			total_percent += src_map[i][j];
488 		}
489 
490 		if (!edges)
491 			continue;
492 		if (edges >= MAX_EDGES_PER_NODE || total_percent != 100) {
493 			printf("Invalid edge configuration\n");
494 			for (j = 0; j < edges; j++)
495 				free(ename[j]);
496 			goto pattern_name_free;
497 		}
498 
499 		/* Connect to list of next nodes using edges */
500 		count = rte_node_edge_update(src_nodes[i], 0,
501 					     (const char **)(uintptr_t)ename,
502 					     edges);
503 		for (k = 0; k < edges; k++)
504 			free(ename[k]);
505 		if (count != edges) {
506 			printf("Couldn't add edges %d %d\n", edges, count);
507 			goto pattern_name_free;
508 		}
509 	}
510 
511 	/* Setup Sink nodes */
512 	for (i = 0; i < nb_sinks; i++) {
513 		node_patterns[graph_data->nb_nodes] = malloc(RTE_NODE_NAMESIZE);
514 		if (node_patterns[graph_data->nb_nodes] == NULL) {
515 			printf("Failed to create memory for pattern\n");
516 			goto pattern_name_free;
517 		}
518 
519 		/* Clone a sink node */
520 		snprintf(nname, sizeof(nname), "%d", i);
521 		snk_nodes[i] = graph_node_get(TEST_GRAPH_SNK_NAME, nname);
522 		if (snk_nodes[i] == RTE_NODE_ID_INVALID) {
523 			printf("Failed to create node[%s]\n", nname);
524 			graph_data->nb_nodes++;
525 			goto pattern_name_free;
526 		}
527 		snprintf(node_patterns[graph_data->nb_nodes], RTE_NODE_NAMESIZE,
528 			 "%s", rte_node_id_to_name(snk_nodes[i]));
529 		node_data = &graph_data->node_data[graph_data->nb_nodes];
530 		node_data->node_id = snk_nodes[i];
531 		node_data->is_sink = true;
532 		graph_data->nb_nodes++;
533 	}
534 
535 	/* Connect last stage worker nodes to sink nodes */
536 	for (i = 0; i < nodes_per_stage; i++) {
537 		edges = 0;
538 		total_percent = 0;
539 		node_data = graph_get_node_data(graph_data,
540 						node_map[stages - 1][i]);
541 		/* Prepare list of sink nodes to connect to */
542 		for (j = 0; j < nb_sinks; j++) {
543 			if (!snk_map[i][j])
544 				continue;
545 			ename[edges] = malloc(sizeof(char) * RTE_NODE_NAMESIZE);
546 			snprintf(ename[edges], RTE_NODE_NAMESIZE, "%s",
547 				 rte_node_id_to_name(snk_nodes[j]));
548 			node_data->next_nodes[edges] = snk_nodes[j];
549 			node_data->next_percentage[edges] = snk_map[i][j];
550 			edges++;
551 			total_percent += snk_map[i][j];
552 		}
553 		if (!edges)
554 			continue;
555 		if (edges >= MAX_EDGES_PER_NODE || total_percent != 100) {
556 			printf("Invalid edge configuration\n");
557 			for (j = 0; j < edges; j++)
558 				free(ename[i]);
559 			goto pattern_name_free;
560 		}
561 
562 		/* Connect a worker node to a list of sink nodes */
563 		count = rte_node_edge_update(node_map[stages - 1][i], 0,
564 					     (const char **)(uintptr_t)ename,
565 					     edges);
566 		for (k = 0; k < edges; k++)
567 			free(ename[k]);
568 		if (count != edges) {
569 			printf("Couldn't add edges %d %d\n", edges, count);
570 			goto pattern_name_free;
571 		}
572 	}
573 
574 	/* Create a Graph */
575 	gconf.socket_id = SOCKET_ID_ANY;
576 	gconf.nb_node_patterns = graph_data->nb_nodes;
577 	gconf.node_patterns = (const char **)(uintptr_t)node_patterns;
578 
579 	graph_id = rte_graph_create(gname, &gconf);
580 	if (graph_id == RTE_GRAPH_ID_INVALID) {
581 		printf("Graph creation failed with error = %d\n", rte_errno);
582 		goto pattern_name_free;
583 	}
584 	graph_data->graph_id = graph_id;
585 
586 	free(node_map);
587 	for (i = 0; i < graph_data->nb_nodes; i++)
588 		free(node_patterns[i]);
589 	free(snk_nodes);
590 	free(src_nodes);
591 	free(node_patterns);
592 	return 0;
593 
594 pattern_name_free:
595 	free(node_map);
596 	for (i = 0; i < graph_data->nb_nodes; i++)
597 		free(node_patterns[i]);
598 snk_free:
599 	free(snk_nodes);
600 src_free:
601 	free(src_nodes);
602 pattern_free:
603 	free(node_patterns);
604 data_free:
605 	free(graph_data->node_data);
606 memzone_free:
607 	rte_memzone_free(mz);
608 	return -ENOMEM;
609 }
610 
611 /* Worker thread function */
612 static int
_graph_perf_wrapper(void * args)613 _graph_perf_wrapper(void *args)
614 {
615 	struct graph_lcore_data *data = args;
616 	struct rte_graph *graph;
617 
618 	/* Lookup graph */
619 	graph = rte_graph_lookup(rte_graph_id_to_name(data->graph_id));
620 
621 	/* Graph walk until done */
622 	while (!data->done)
623 		rte_graph_walk(graph);
624 
625 	return 0;
626 }
627 
628 static int
measure_perf_get(rte_graph_t graph_id)629 measure_perf_get(rte_graph_t graph_id)
630 {
631 	const char *pattern = rte_graph_id_to_name(graph_id);
632 	uint32_t lcore_id = rte_get_next_lcore(-1, 1, 0);
633 	struct rte_graph_cluster_stats_param param;
634 	struct rte_graph_cluster_stats *stats;
635 	struct graph_lcore_data *data;
636 
637 	data = rte_zmalloc("Graph_perf", sizeof(struct graph_lcore_data),
638 			   RTE_CACHE_LINE_SIZE);
639 	data->graph_id = graph_id;
640 	data->done = 0;
641 
642 	/* Run graph worker thread function */
643 	rte_eal_remote_launch(_graph_perf_wrapper, data, lcore_id);
644 
645 	/* Collect stats for few msecs */
646 	if (rte_graph_has_stats_feature()) {
647 		memset(&param, 0, sizeof(param));
648 		param.f = stdout;
649 		param.socket_id = SOCKET_ID_ANY;
650 		param.graph_patterns = &pattern;
651 		param.nb_graph_patterns = 1;
652 
653 		stats = rte_graph_cluster_stats_create(&param);
654 		if (stats == NULL) {
655 			printf("Failed to create stats\n");
656 			return -ENOMEM;
657 		}
658 
659 		rte_delay_ms(3E2);
660 		rte_graph_cluster_stats_get(stats, true);
661 		rte_delay_ms(1E3);
662 		rte_graph_cluster_stats_get(stats, false);
663 		rte_graph_cluster_stats_destroy(stats);
664 	} else
665 		rte_delay_ms(1E3);
666 
667 	data->done = 1;
668 	rte_eal_wait_lcore(lcore_id);
669 
670 	return 0;
671 }
672 
673 static inline void
graph_fini(void)674 graph_fini(void)
675 {
676 	const struct rte_memzone *mz = rte_memzone_lookup(TEST_GRAPH_PERF_MZ);
677 	struct test_graph_perf *graph_data;
678 
679 	if (mz == NULL)
680 		return;
681 	graph_data = mz->addr;
682 
683 	rte_graph_destroy(graph_data->graph_id);
684 	free(graph_data->node_data);
685 	rte_memzone_free(rte_memzone_lookup(TEST_GRAPH_PERF_MZ));
686 }
687 
688 static int
measure_perf(void)689 measure_perf(void)
690 {
691 	const struct rte_memzone *mz;
692 	struct test_graph_perf *graph_data;
693 
694 	mz = rte_memzone_lookup(TEST_GRAPH_PERF_MZ);
695 	if (mz == NULL)
696 		return -ENOMEM;
697 	graph_data = mz->addr;
698 
699 	return measure_perf_get(graph_data->graph_id);
700 }
701 
702 static inline int
graph_hr_4s_1n_1src_1snk(void)703 graph_hr_4s_1n_1src_1snk(void)
704 {
705 	return measure_perf();
706 }
707 
708 static inline int
graph_hr_4s_1n_1src_1snk_brst_one(void)709 graph_hr_4s_1n_1src_1snk_brst_one(void)
710 {
711 	return measure_perf();
712 }
713 
714 static inline int
graph_hr_4s_1n_2src_1snk(void)715 graph_hr_4s_1n_2src_1snk(void)
716 {
717 	return measure_perf();
718 }
719 
720 static inline int
graph_hr_4s_1n_1src_2snk(void)721 graph_hr_4s_1n_1src_2snk(void)
722 {
723 	return measure_perf();
724 }
725 
726 static inline int
graph_tree_4s_4n_1src_4snk(void)727 graph_tree_4s_4n_1src_4snk(void)
728 {
729 	return measure_perf();
730 }
731 
732 static inline int
graph_reverse_tree_3s_4n_1src_1snk(void)733 graph_reverse_tree_3s_4n_1src_1snk(void)
734 {
735 	return measure_perf();
736 }
737 
738 static inline int
graph_parallel_tree_5s_4n_4src_4snk(void)739 graph_parallel_tree_5s_4n_4src_4snk(void)
740 {
741 	return measure_perf();
742 }
743 
744 /* Graph Topology
745  * nodes per stage:	1
746  * stages:		4
747  * src:			1
748  * sink:		1
749  */
750 static inline int
graph_init_hr(void)751 graph_init_hr(void)
752 {
753 	uint8_t edge_map[][1][1] = {
754 		{ {100} },
755 		{ {100} },
756 		{ {100} },
757 		{ {100} },
758 	};
759 	uint8_t src_map[][1] = { {100} };
760 	uint8_t snk_map[][1] = { {100} };
761 
762 	return graph_init("graph_hr", SOURCES(src_map), SINKS(snk_map),
763 			  STAGES(edge_map), NODES_PER_STAGE(edge_map), src_map,
764 			  snk_map, edge_map, 0);
765 }
766 
767 /* Graph Topology
768  * nodes per stage:	1
769  * stages:		4
770  * src:			1
771  * sink:		1
772  */
773 static inline int
graph_init_hr_brst_one(void)774 graph_init_hr_brst_one(void)
775 {
776 	uint8_t edge_map[][1][1] = {
777 		{ {100} },
778 		{ {100} },
779 		{ {100} },
780 		{ {100} },
781 	};
782 	uint8_t src_map[][1] = { {100} };
783 	uint8_t snk_map[][1] = { {100} };
784 
785 	return graph_init("graph_hr", SOURCES(src_map), SINKS(snk_map),
786 			  STAGES(edge_map), NODES_PER_STAGE(edge_map), src_map,
787 			  snk_map, edge_map, 1);
788 }
789 
790 /* Graph Topology
791  * nodes per stage:	1
792  * stages:		4
793  * src:			2
794  * sink:		1
795  */
796 static inline int
graph_init_hr_multi_src(void)797 graph_init_hr_multi_src(void)
798 {
799 	uint8_t edge_map[][1][1] = {
800 		{ {100} },
801 		{ {100} },
802 		{ {100} },
803 		{ {100} },
804 	};
805 	uint8_t src_map[][1] = {
806 		{100}, {100}
807 	};
808 	uint8_t snk_map[][1] = { {100} };
809 
810 	return graph_init("graph_hr", SOURCES(src_map), SINKS(snk_map),
811 			  STAGES(edge_map), NODES_PER_STAGE(edge_map), src_map,
812 			  snk_map, edge_map, 0);
813 }
814 
815 /* Graph Topology
816  * nodes per stage:	1
817  * stages:		4
818  * src:			1
819  * sink:		2
820  */
821 static inline int
graph_init_hr_multi_snk(void)822 graph_init_hr_multi_snk(void)
823 {
824 	uint8_t edge_map[][1][1] = {
825 		{ {100} },
826 		{ {100} },
827 		{ {100} },
828 		{ {100} },
829 	};
830 	uint8_t src_map[][1] = { {100} };
831 	uint8_t snk_map[][2] = { {50, 50} };
832 
833 	return graph_init("graph_hr", SOURCES(src_map), SINKS(snk_map),
834 			  STAGES(edge_map), NODES_PER_STAGE(edge_map), src_map,
835 			  snk_map, edge_map, 0);
836 }
837 
838 /* Graph Topology
839  * nodes per stage:	4
840  * stages:		4
841  * src:			1
842  * sink:		4
843  */
844 static inline int
graph_init_tree(void)845 graph_init_tree(void)
846 {
847 	uint8_t edge_map[][4][4] = {
848 		{
849 			{100, 0, 0, 0},
850 			{0, 0, 0, 0},
851 			{0, 0, 0, 0},
852 			{0, 0, 0, 0}
853 		},
854 		{
855 			{50, 0, 0, 0},
856 			{50, 0, 0, 0},
857 			{0, 0, 0, 0},
858 			{0, 0, 0, 0}
859 		},
860 		{
861 			{33, 33, 0, 0},
862 			{34, 34, 0, 0},
863 			{33, 33, 0, 0},
864 			{0, 0, 0, 0}
865 		},
866 		{
867 			{25, 25, 25, 0},
868 			{25, 25, 25, 0},
869 			{25, 25, 25, 0},
870 			{25, 25, 25, 0}
871 		}
872 	};
873 	uint8_t src_map[][4] = { {100, 0, 0, 0} };
874 	uint8_t snk_map[][4] = {
875 		{100, 0, 0, 0},
876 		{0, 100, 0, 0},
877 		{0, 0, 100, 0},
878 		{0, 0, 0, 100}
879 	};
880 
881 	return graph_init("graph_full_split", SOURCES(src_map), SINKS(snk_map),
882 			  STAGES(edge_map), NODES_PER_STAGE(edge_map), src_map,
883 			  snk_map, edge_map, 0);
884 }
885 
886 /* Graph Topology
887  * nodes per stage:	4
888  * stages:		3
889  * src:			1
890  * sink:		1
891  */
892 static inline int
graph_init_reverse_tree(void)893 graph_init_reverse_tree(void)
894 {
895 	uint8_t edge_map[][4][4] = {
896 		{
897 			{25, 25, 25, 25},
898 			{25, 25, 25, 25},
899 			{25, 25, 25, 25},
900 			{25, 25, 25, 25}
901 		},
902 		{
903 			{33, 33, 33, 33},
904 			{33, 33, 33, 33},
905 			{34, 34, 34, 34},
906 			{0, 0, 0, 0}
907 		},
908 		{
909 			{50, 50, 50, 0},
910 			{50, 50, 50, 0},
911 			{0, 0, 0, 0},
912 			{0, 0, 0, 0}
913 		},
914 	};
915 	uint8_t src_map[][4] = { {25, 25, 25, 25} };
916 	uint8_t snk_map[][1] = { {100}, {100}, {0}, {0} };
917 
918 	return graph_init("graph_full_split", SOURCES(src_map), SINKS(snk_map),
919 			  STAGES(edge_map), NODES_PER_STAGE(edge_map), src_map,
920 			  snk_map, edge_map, 0);
921 }
922 
923 /* Graph Topology
924  * nodes per stage:	4
925  * stages:		5
926  * src:			4
927  * sink:		4
928  */
929 static inline int
graph_init_parallel_tree(void)930 graph_init_parallel_tree(void)
931 {
932 	uint8_t edge_map[][4][4] = {
933 		{
934 			{100, 0, 0, 0},
935 			{0, 100, 0, 0},
936 			{0, 0, 100, 0},
937 			{0, 0, 0, 100}
938 		},
939 		{
940 			{100, 0, 0, 0},
941 			{0, 100, 0, 0},
942 			{0, 0, 100, 0},
943 			{0, 0, 0, 100}
944 		},
945 		{
946 			{100, 0, 0, 0},
947 			{0, 100, 0, 0},
948 			{0, 0, 100, 0},
949 			{0, 0, 0, 100}
950 		},
951 		{
952 			{100, 0, 0, 0},
953 			{0, 100, 0, 0},
954 			{0, 0, 100, 0},
955 			{0, 0, 0, 100}
956 		},
957 		{
958 			{100, 0, 0, 0},
959 			{0, 100, 0, 0},
960 			{0, 0, 100, 0},
961 			{0, 0, 0, 100}
962 		},
963 	};
964 	uint8_t src_map[][4] = {
965 		{100, 0, 0, 0},
966 		{0, 100, 0, 0},
967 		{0, 0, 100, 0},
968 		{0, 0, 0, 100}
969 	};
970 	uint8_t snk_map[][4] = {
971 		{100, 0, 0, 0},
972 		{0, 100, 0, 0},
973 		{0, 0, 100, 0},
974 		{0, 0, 0, 100}
975 	};
976 
977 	return graph_init("graph_parallel", SOURCES(src_map), SINKS(snk_map),
978 			  STAGES(edge_map), NODES_PER_STAGE(edge_map), src_map,
979 			  snk_map, edge_map, 0);
980 }
981 
982 /** Graph Creation cheat sheet
983  *  edge_map -> dictates graph flow from worker stage 0 to worker stage n-1.
984  *  src_map  -> dictates source nodes enqueue percentage to worker stage 0.
985  *  snk_map  -> dictates stage n-1 enqueue percentage to sink.
986  *
987  *  Layout:
988  *  edge_map[<nb_stages>][<nodes_per_stg>][<nodes_in_nxt_stg = nodes_per_stg>]
989  *  src_map[<nb_sources>][<nodes_in_stage0 = nodes_per_stage>]
990  *  snk_map[<nodes_in_stage(n-1) = nodes_per_stage>][<nb_sinks>]
991  *
992  *  The last array dictates the percentage of received objs to enqueue to next
993  *  stage.
994  *
995  *  Note: edge_map[][0][] will always be unused as it will receive from source
996  *
997  *  Example:
998  *	Graph:
999  *	http://bit.ly/2PqbqOy
1000  *	Each stage(n) connects to all nodes in the next stage in decreasing
1001  *	order.
1002  *	Since we can't resize the edge_map dynamically we get away by creating
1003  *	dummy nodes and assigning 0 percentages.
1004  *	Max nodes across all stages = 4
1005  *	stages = 3
1006  *	nb_src = 1
1007  *	nb_snk = 1
1008  *			   // Stages
1009  *	edge_map[][4][4] = {
1010  *		// Nodes per stage
1011  *		{
1012  *		    {25, 25, 25, 25},
1013  *		    {25, 25, 25, 25},
1014  *		    {25, 25, 25, 25},
1015  *		    {25, 25, 25, 25}
1016  *		},	// This will be unused.
1017  *		{
1018  *		    // Nodes enabled in current stage + prev stage enq %
1019  *		    {33, 33, 33, 33},
1020  *		    {33, 33, 33, 33},
1021  *		    {34, 34, 34, 34},
1022  *		    {0, 0, 0, 0}
1023  *		},
1024  *		{
1025  *		    {50, 50, 50, 0},
1026  *		    {50, 50, 50, 0},
1027  *		    {0, 0, 0, 0},
1028  *		    {0, 0, 0, 0}
1029  *		},
1030  *	};
1031  *	Above, each stage tells how much it should receive from previous except
1032  *	from stage_0.
1033  *
1034  *	src_map[][4] = { {25, 25, 25, 25} };
1035  *	Here, we tell each source the % it has to send to stage_0 nodes. In
1036  *	case we want 2 source node we can declare as
1037  *	src_map[][4] = { {25, 25, 25, 25}, {25, 25, 25, 25} };
1038  *
1039  *	snk_map[][1] = { {100}, {100}, {0}, {0} }
1040  *	Here, we tell stage - 1 nodes how much to enqueue to sink_0.
1041  *	If we have 2 sinks we can do as follows
1042  *	snk_map[][2] = { {50, 50}, {50, 50}, {0, 0}, {0, 0} }
1043  */
1044 
1045 static struct unit_test_suite graph_perf_testsuite = {
1046 	.suite_name = "Graph library performance test suite",
1047 	.setup = graph_perf_setup,
1048 	.teardown = graph_perf_teardown,
1049 	.unit_test_cases = {
1050 		TEST_CASE_ST(graph_init_hr, graph_fini,
1051 			     graph_hr_4s_1n_1src_1snk),
1052 		TEST_CASE_ST(graph_init_hr_brst_one, graph_fini,
1053 			     graph_hr_4s_1n_1src_1snk_brst_one),
1054 		TEST_CASE_ST(graph_init_hr_multi_src, graph_fini,
1055 			     graph_hr_4s_1n_2src_1snk),
1056 		TEST_CASE_ST(graph_init_hr_multi_snk, graph_fini,
1057 			     graph_hr_4s_1n_1src_2snk),
1058 		TEST_CASE_ST(graph_init_tree, graph_fini,
1059 			     graph_tree_4s_4n_1src_4snk),
1060 		TEST_CASE_ST(graph_init_reverse_tree, graph_fini,
1061 			     graph_reverse_tree_3s_4n_1src_1snk),
1062 		TEST_CASE_ST(graph_init_parallel_tree, graph_fini,
1063 			     graph_parallel_tree_5s_4n_4src_4snk),
1064 		TEST_CASES_END(), /**< NULL terminate unit test array */
1065 	},
1066 };
1067 
1068 static int
test_graph_perf_func(void)1069 test_graph_perf_func(void)
1070 {
1071 	return unit_test_suite_runner(&graph_perf_testsuite);
1072 }
1073 
1074 #endif /* !RTE_EXEC_ENV_WINDOWS */
1075 
1076 REGISTER_PERF_TEST(graph_perf_autotest, test_graph_perf_func);
1077