xref: /dpdk/lib/graph/graph_stats.c (revision 070db97e017b7ed9a5320b2f624f05562a632bd3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #include <fnmatch.h>
6 #include <stdbool.h>
7 #include <stdlib.h>
8 
9 #include <rte_common.h>
10 #include <rte_errno.h>
11 #include <rte_malloc.h>
12 
13 #include "graph_private.h"
14 
15 /* Capture all graphs of cluster */
16 struct cluster {
17 	rte_graph_t nb_graphs;
18 	rte_graph_t size;
19 
20 	struct graph **graphs;
21 };
22 
23 /* Capture same node ID across cluster  */
24 struct cluster_node {
25 	struct rte_graph_cluster_node_stats stat;
26 	rte_node_t nb_nodes;
27 
28 	struct rte_node *nodes[];
29 };
30 
31 struct __rte_cache_aligned rte_graph_cluster_stats {
32 	/* Header */
33 	rte_graph_cluster_stats_cb_t fn;
34 	uint32_t cluster_node_size; /* Size of struct cluster_node */
35 	rte_node_t max_nodes;
36 	int socket_id;
37 	bool dispatch;
38 	void *cookie;
39 	size_t sz;
40 
41 	struct cluster_node clusters[];
42 };
43 
44 #define boarder_model_dispatch()                                                              \
45 	fprintf(f, "+-------------------------------+---------------+--------" \
46 		   "-------+---------------+---------------+---------------+" \
47 		   "---------------+---------------+-" \
48 		   "----------+\n")
49 
50 #define boarder()                                                              \
51 	fprintf(f, "+-------------------------------+---------------+--------" \
52 		   "-------+---------------+---------------+---------------+-" \
53 		   "----------+\n")
54 
55 static inline void
56 print_banner_default(FILE *f)
57 {
58 	boarder();
59 	fprintf(f, "%-32s%-16s%-16s%-16s%-16s%-16s%-16s\n", "|Node", "|calls",
60 		"|objs", "|realloc_count", "|objs/call", "|objs/sec(10E6)",
61 		"|cycles/call|");
62 	boarder();
63 }
64 
65 static inline void
66 print_banner_dispatch(FILE *f)
67 {
68 	boarder_model_dispatch();
69 	fprintf(f, "%-32s%-16s%-16s%-16s%-16s%-16s%-16s%-16s%-16s\n",
70 		"|Node", "|calls",
71 		"|objs", "|sched objs", "|sched fail",
72 		"|realloc_count", "|objs/call", "|objs/sec(10E6)",
73 		"|cycles/call|");
74 	boarder_model_dispatch();
75 }
76 
77 static inline void
78 print_banner(FILE *f, bool dispatch)
79 {
80 	if (dispatch)
81 		print_banner_dispatch(f);
82 	else
83 		print_banner_default(f);
84 }
85 
86 static inline void
87 print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat, bool dispatch)
88 {
89 	double objs_per_call, objs_per_sec, cycles_per_call, ts_per_hz;
90 	const uint64_t prev_calls = stat->prev_calls;
91 	const uint64_t prev_objs = stat->prev_objs;
92 	const uint64_t cycles = stat->cycles;
93 	const uint64_t calls = stat->calls;
94 	const uint64_t objs = stat->objs;
95 	uint64_t call_delta;
96 
97 	call_delta = calls - prev_calls;
98 	objs_per_call =
99 		call_delta ? (double)((objs - prev_objs) / call_delta) : 0;
100 	cycles_per_call =
101 		call_delta ? (double)((cycles - stat->prev_cycles) / call_delta)
102 			   : 0;
103 	ts_per_hz = (double)((stat->ts - stat->prev_ts) / stat->hz);
104 	objs_per_sec = ts_per_hz ? (objs - prev_objs) / ts_per_hz : 0;
105 	objs_per_sec /= 1000000;
106 
107 	if (dispatch) {
108 		fprintf(f,
109 			"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15" PRIu64
110 			"|%-15" PRIu64 "|%-15" PRIu64
111 			"|%-15.3f|%-15.6f|%-11.4f|\n",
112 			stat->name, calls, objs, stat->dispatch.sched_objs,
113 			stat->dispatch.sched_fail, stat->realloc_count, objs_per_call,
114 			objs_per_sec, cycles_per_call);
115 	} else {
116 		fprintf(f,
117 			"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15" PRIu64
118 			"|%-15.3f|%-15.6f|%-11.4f|\n",
119 			stat->name, calls, objs, stat->realloc_count, objs_per_call,
120 			objs_per_sec, cycles_per_call);
121 	}
122 }
123 
124 static inline void
125 print_xstat(FILE *f, const struct rte_graph_cluster_node_stats *stat, bool dispatch)
126 {
127 	int i;
128 
129 	if (dispatch) {
130 		for (i = 0; i < stat->xstat_cntrs; i++)
131 			fprintf(f,
132 				"|\t%-24s|%15s|%-15" PRIu64 "|%15s|%15s|%15s|%15s|%15s|%11.4s|\n",
133 				stat->xstat_desc[i], "", stat->xstat_count[i], "", "", "", "", "",
134 				"");
135 	} else {
136 		for (i = 0; i < stat->xstat_cntrs; i++)
137 			fprintf(f,
138 				"|\t%-24s|%15s|%-15" PRIu64 "|%15s|%15.3s|%15.6s|%11.4s|\n",
139 				stat->xstat_desc[i], "", stat->xstat_count[i], "", "", "", "");
140 	}
141 }
142 
143 static int
144 graph_cluster_stats_cb(bool dispatch, bool is_first, bool is_last, void *cookie,
145 		       const struct rte_graph_cluster_node_stats *stat)
146 {
147 	FILE *f = cookie;
148 
149 	if (unlikely(is_first))
150 		print_banner(f, dispatch);
151 	if (stat->objs) {
152 		print_node(f, stat, dispatch);
153 		if (stat->xstat_cntrs)
154 			print_xstat(f, stat, dispatch);
155 	}
156 	if (unlikely(is_last)) {
157 		if (dispatch)
158 			boarder_model_dispatch();
159 		else
160 			boarder();
161 	}
162 
163 	return 0;
164 };
165 
166 static int
167 graph_cluster_stats_cb_rtc(bool is_first, bool is_last, void *cookie,
168 			   const struct rte_graph_cluster_node_stats *stat)
169 {
170 	return graph_cluster_stats_cb(false, is_first, is_last, cookie, stat);
171 };
172 
173 static int
174 graph_cluster_stats_cb_dispatch(bool is_first, bool is_last, void *cookie,
175 				const struct rte_graph_cluster_node_stats *stat)
176 {
177 	return graph_cluster_stats_cb(true, is_first, is_last, cookie, stat);
178 };
179 
180 static struct rte_graph_cluster_stats *
181 stats_mem_init(struct cluster *cluster,
182 	       const struct rte_graph_cluster_stats_param *prm)
183 {
184 	size_t sz = sizeof(struct rte_graph_cluster_stats);
185 	struct rte_graph_cluster_stats *stats;
186 	rte_graph_cluster_stats_cb_t fn;
187 	int socket_id = prm->socket_id;
188 	uint32_t cluster_node_size;
189 
190 	/* Fix up callback */
191 	fn = prm->fn;
192 	if (fn == NULL) {
193 		const struct rte_graph *graph = cluster->graphs[0]->graph;
194 		if (graph->model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
195 			fn = graph_cluster_stats_cb_dispatch;
196 		else
197 			fn = graph_cluster_stats_cb_rtc;
198 	}
199 
200 	cluster_node_size = sizeof(struct cluster_node);
201 	/* For a given cluster, max nodes will be the max number of graphs */
202 	cluster_node_size += cluster->nb_graphs * sizeof(struct rte_node *);
203 	cluster_node_size = RTE_ALIGN(cluster_node_size, RTE_CACHE_LINE_SIZE);
204 
205 	stats = realloc(NULL, sz);
206 	if (stats) {
207 		memset(stats, 0, sz);
208 		stats->fn = fn;
209 		stats->cluster_node_size = cluster_node_size;
210 		stats->max_nodes = 0;
211 		stats->socket_id = socket_id;
212 		stats->cookie = prm->cookie;
213 		stats->sz = sz;
214 	}
215 
216 	return stats;
217 }
218 
219 static int
220 stats_mem_populate(struct rte_graph_cluster_stats **stats_in,
221 		   struct rte_graph *graph, struct graph_node *graph_node)
222 {
223 	struct rte_graph_cluster_stats *stats = *stats_in;
224 	rte_node_t id = graph_node->node->id;
225 	struct cluster_node *cluster;
226 	struct rte_node *node;
227 	rte_node_t count;
228 	uint8_t i;
229 
230 	cluster = stats->clusters;
231 
232 	/* Iterate over cluster node array to find node ID match */
233 	for (count = 0; count < stats->max_nodes; count++) {
234 		/* Found an existing node in the reel */
235 		if (cluster->stat.id == id) {
236 			node = graph_node_id_to_ptr(graph, id);
237 			if (node == NULL)
238 				SET_ERR_JMP(
239 					ENOENT, err,
240 					"Failed to find node %s in graph %s",
241 					graph_node->node->name, graph->name);
242 
243 			cluster->nodes[cluster->nb_nodes++] = node;
244 			return 0;
245 		}
246 		cluster = RTE_PTR_ADD(cluster, stats->cluster_node_size);
247 	}
248 
249 	/* Hey, it is a new node, allocate space for it in the reel */
250 	stats = realloc(stats, stats->sz + stats->cluster_node_size);
251 	if (stats == NULL)
252 		SET_ERR_JMP(ENOMEM, err, "Realloc failed");
253 	*stats_in = NULL;
254 
255 	/* Clear the new struct cluster_node area */
256 	cluster = RTE_PTR_ADD(stats, stats->sz),
257 	memset(cluster, 0, stats->cluster_node_size);
258 	memcpy(cluster->stat.name, graph_node->node->name, RTE_NODE_NAMESIZE);
259 	cluster->stat.id = graph_node->node->id;
260 	cluster->stat.hz = rte_get_timer_hz();
261 	node = graph_node_id_to_ptr(graph, id);
262 	if (node == NULL)
263 		SET_ERR_JMP(ENOENT, free, "Failed to find node %s in graph %s",
264 			    graph_node->node->name, graph->name);
265 	cluster->nodes[cluster->nb_nodes++] = node;
266 	if (graph_node->node->xstats) {
267 		cluster->stat.xstat_cntrs = graph_node->node->xstats->nb_xstats;
268 		cluster->stat.xstat_count = rte_zmalloc_socket(NULL,
269 			sizeof(uint64_t) * graph_node->node->xstats->nb_xstats,
270 			RTE_CACHE_LINE_SIZE, stats->socket_id);
271 		if (cluster->stat.xstat_count == NULL)
272 			SET_ERR_JMP(ENOMEM, free, "Failed to allocate memory node %s graph %s",
273 				    graph_node->node->name, graph->name);
274 
275 		cluster->stat.xstat_desc = rte_zmalloc_socket(NULL,
276 			sizeof(RTE_NODE_XSTAT_DESC_SIZE) * graph_node->node->xstats->nb_xstats,
277 			RTE_CACHE_LINE_SIZE, stats->socket_id);
278 		if (cluster->stat.xstat_desc == NULL) {
279 			rte_free(cluster->stat.xstat_count);
280 			SET_ERR_JMP(ENOMEM, free, "Failed to allocate memory node %s graph %s",
281 				    graph_node->node->name, graph->name);
282 		}
283 
284 		for (i = 0; i < cluster->stat.xstat_cntrs; i++) {
285 			if (rte_strscpy(cluster->stat.xstat_desc[i],
286 					graph_node->node->xstats->xstat_desc[i],
287 					RTE_NODE_XSTAT_DESC_SIZE) < 0) {
288 				rte_free(cluster->stat.xstat_count);
289 				rte_free(cluster->stat.xstat_desc);
290 				SET_ERR_JMP(E2BIG, free,
291 					    "Error description overflow node %s graph %s",
292 					    graph_node->node->name, graph->name);
293 			}
294 		}
295 	}
296 
297 	stats->sz += stats->cluster_node_size;
298 	stats->max_nodes++;
299 	*stats_in = stats;
300 
301 	return 0;
302 free:
303 	free(stats);
304 err:
305 	return -rte_errno;
306 }
307 
308 static void
309 stats_mem_fini(struct rte_graph_cluster_stats *stats)
310 {
311 	free(stats);
312 }
313 
314 static void
315 cluster_init(struct cluster *cluster)
316 {
317 	memset(cluster, 0, sizeof(*cluster));
318 }
319 
320 static int
321 cluster_add(struct cluster *cluster, struct graph *graph)
322 {
323 	rte_graph_t count;
324 	size_t sz;
325 
326 	/* Skip the if graph is already added to cluster */
327 	for (count = 0; count < cluster->nb_graphs; count++)
328 		if (cluster->graphs[count] == graph)
329 			return 0;
330 
331 	/* Expand the cluster if required to store graph objects */
332 	if (cluster->nb_graphs + 1 > cluster->size) {
333 		cluster->size = RTE_MAX(1, cluster->size * 2);
334 		sz = sizeof(struct graph *) * cluster->size;
335 		cluster->graphs = realloc(cluster->graphs, sz);
336 		if (cluster->graphs == NULL)
337 			SET_ERR_JMP(ENOMEM, free, "Failed to realloc");
338 	}
339 
340 	/* Add graph to cluster */
341 	cluster->graphs[cluster->nb_graphs++] = graph;
342 	return 0;
343 
344 free:
345 	return -rte_errno;
346 }
347 
348 static void
349 cluster_fini(struct cluster *cluster)
350 {
351 	free(cluster->graphs);
352 }
353 
354 static int
355 expand_pattern_to_cluster(struct cluster *cluster, const char *pattern)
356 {
357 	struct graph_head *graph_head = graph_list_head_get();
358 	struct graph *graph;
359 	bool found = false;
360 
361 	/* Check for pattern match */
362 	STAILQ_FOREACH(graph, graph_head, next) {
363 		if (fnmatch(pattern, graph->name, 0) == 0) {
364 			if (cluster_add(cluster, graph))
365 				goto fail;
366 			found = true;
367 		}
368 	}
369 	if (found == false)
370 		SET_ERR_JMP(EFAULT, fail, "Pattern %s graph not found",
371 			    pattern);
372 
373 	return 0;
374 fail:
375 	return -rte_errno;
376 }
377 
378 struct rte_graph_cluster_stats *
379 rte_graph_cluster_stats_create(const struct rte_graph_cluster_stats_param *prm)
380 {
381 	struct rte_graph_cluster_stats *stats, *rc = NULL;
382 	struct graph_node *graph_node;
383 	struct cluster cluster;
384 	struct graph *graph;
385 	const char *pattern;
386 	rte_graph_t i;
387 
388 	/* Sanity checks */
389 	if (!rte_graph_has_stats_feature())
390 		SET_ERR_JMP(EINVAL, fail, "Stats feature is not enabled");
391 
392 	if (prm == NULL)
393 		SET_ERR_JMP(EINVAL, fail, "Invalid param");
394 
395 	if (prm->graph_patterns == NULL || prm->nb_graph_patterns == 0)
396 		SET_ERR_JMP(EINVAL, fail, "Invalid graph param");
397 
398 	cluster_init(&cluster);
399 
400 	graph_spinlock_lock();
401 	/* Expand graph pattern and add the graph to the cluster */
402 	for (i = 0; i < prm->nb_graph_patterns; i++) {
403 		pattern = prm->graph_patterns[i];
404 		if (expand_pattern_to_cluster(&cluster, pattern))
405 			goto bad_pattern;
406 	}
407 
408 	/* Alloc the stats memory */
409 	stats = stats_mem_init(&cluster, prm);
410 	if (stats == NULL)
411 		SET_ERR_JMP(ENOMEM, bad_pattern, "Failed alloc stats memory");
412 
413 	/* Iterate over M(Graph) x N (Nodes in graph) */
414 	for (i = 0; i < cluster.nb_graphs; i++) {
415 		graph = cluster.graphs[i];
416 		STAILQ_FOREACH(graph_node, &graph->node_list, next) {
417 			struct rte_graph *graph_fp = graph->graph;
418 			if (stats_mem_populate(&stats, graph_fp, graph_node))
419 				goto realloc_fail;
420 		}
421 		if (graph->graph->model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
422 			stats->dispatch = true;
423 	}
424 
425 	/* Finally copy to hugepage memory to avoid pressure on rte_realloc */
426 	rc = rte_malloc_socket(NULL, stats->sz, 0, stats->socket_id);
427 	if (rc)
428 		rte_memcpy(rc, stats, stats->sz);
429 	else
430 		SET_ERR_JMP(ENOMEM, realloc_fail, "rte_malloc failed");
431 
432 realloc_fail:
433 	stats_mem_fini(stats);
434 bad_pattern:
435 	graph_spinlock_unlock();
436 	cluster_fini(&cluster);
437 fail:
438 	return rc;
439 }
440 
441 void
442 rte_graph_cluster_stats_destroy(struct rte_graph_cluster_stats *stat)
443 {
444 	struct cluster_node *cluster;
445 	rte_node_t count;
446 
447 	cluster = stat->clusters;
448 	for (count = 0; count < stat->max_nodes; count++) {
449 		if (cluster->stat.xstat_cntrs) {
450 			rte_free(cluster->stat.xstat_count);
451 			rte_free(cluster->stat.xstat_desc);
452 		}
453 
454 		cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
455 	}
456 	return rte_free(stat);
457 }
458 
459 static inline void
460 cluster_node_arregate_stats(struct cluster_node *cluster, bool dispatch)
461 {
462 	uint64_t calls = 0, cycles = 0, objs = 0, realloc_count = 0;
463 	struct rte_graph_cluster_node_stats *stat = &cluster->stat;
464 	uint64_t sched_objs = 0, sched_fail = 0;
465 	struct rte_node *node;
466 	rte_node_t count;
467 	uint64_t *xstat;
468 	uint8_t i;
469 
470 	memset(stat->xstat_count, 0, sizeof(uint64_t) * stat->xstat_cntrs);
471 	for (count = 0; count < cluster->nb_nodes; count++) {
472 		node = cluster->nodes[count];
473 
474 		if (dispatch) {
475 			sched_objs += node->dispatch.total_sched_objs;
476 			sched_fail += node->dispatch.total_sched_fail;
477 		}
478 
479 		calls += node->total_calls;
480 		objs += node->total_objs;
481 		cycles += node->total_cycles;
482 		realloc_count += node->realloc_count;
483 
484 		if (node->xstat_off == 0)
485 			continue;
486 		xstat = RTE_PTR_ADD(node, node->xstat_off);
487 		for (i = 0; i < stat->xstat_cntrs; i++)
488 			stat->xstat_count[i] += xstat[i];
489 	}
490 
491 	stat->calls = calls;
492 	stat->objs = objs;
493 	stat->cycles = cycles;
494 
495 	if (dispatch) {
496 		stat->dispatch.sched_objs = sched_objs;
497 		stat->dispatch.sched_fail = sched_fail;
498 	}
499 
500 	stat->ts = rte_get_timer_cycles();
501 	stat->realloc_count = realloc_count;
502 }
503 
504 static inline void
505 cluster_node_store_prev_stats(struct cluster_node *cluster)
506 {
507 	struct rte_graph_cluster_node_stats *stat = &cluster->stat;
508 
509 	stat->prev_ts = stat->ts;
510 	stat->prev_calls = stat->calls;
511 	stat->prev_objs = stat->objs;
512 	stat->prev_cycles = stat->cycles;
513 }
514 
515 void
516 rte_graph_cluster_stats_get(struct rte_graph_cluster_stats *stat, bool skip_cb)
517 {
518 	struct cluster_node *cluster;
519 	rte_node_t count;
520 	int rc = 0;
521 
522 	cluster = stat->clusters;
523 
524 	for (count = 0; count < stat->max_nodes; count++) {
525 		cluster_node_arregate_stats(cluster, stat->dispatch);
526 		if (!skip_cb)
527 			rc = stat->fn(!count, (count == stat->max_nodes - 1),
528 				      stat->cookie, &cluster->stat);
529 		cluster_node_store_prev_stats(cluster);
530 		if (rc)
531 			break;
532 		cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
533 	}
534 }
535 
536 void
537 rte_graph_cluster_stats_reset(struct rte_graph_cluster_stats *stat)
538 {
539 	struct cluster_node *cluster;
540 	rte_node_t count;
541 	uint8_t i;
542 
543 	cluster = stat->clusters;
544 
545 	for (count = 0; count < stat->max_nodes; count++) {
546 		struct rte_graph_cluster_node_stats *node = &cluster->stat;
547 
548 		node->ts = 0;
549 		node->calls = 0;
550 		node->objs = 0;
551 		node->cycles = 0;
552 		node->prev_ts = 0;
553 		node->prev_calls = 0;
554 		node->prev_objs = 0;
555 		node->prev_cycles = 0;
556 		node->realloc_count = 0;
557 		for (i = 0; i < node->xstat_cntrs; i++)
558 			node->xstat_count[i] = 0;
559 		cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
560 	}
561 }
562