xref: /dpdk/lib/graph/graph_stats.c (revision d66269c088e96ab03e5049b73643f00ef7cd8760)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #include <fnmatch.h>
6 #include <stdbool.h>
7 #include <stdlib.h>
8 
9 #include <rte_common.h>
10 #include <rte_errno.h>
11 #include <rte_malloc.h>
12 
13 #include "graph_private.h"
14 
15 /* Capture all graphs of cluster */
16 struct cluster {
17 	rte_graph_t nb_graphs;
18 	rte_graph_t size;
19 
20 	struct graph **graphs;
21 };
22 
23 /* Capture same node ID across cluster  */
24 struct cluster_node {
25 	struct rte_graph_cluster_node_stats stat;
26 	rte_node_t nb_nodes;
27 
28 	struct rte_node *nodes[];
29 };
30 
31 struct __rte_cache_aligned rte_graph_cluster_stats {
32 	/* Header */
33 	rte_graph_cluster_stats_cb_t fn;
34 	uint32_t cluster_node_size; /* Size of struct cluster_node */
35 	rte_node_t max_nodes;
36 	int socket_id;
37 	bool dispatch;
38 	void *cookie;
39 	size_t sz;
40 
41 	struct cluster_node clusters[];
42 };
43 
44 #define boarder_model_dispatch()                                                              \
45 	fprintf(f, "+-------------------------------+---------------+--------" \
46 		   "-------+---------------+---------------+---------------+" \
47 		   "---------------+---------------+-" \
48 		   "----------+\n")
49 
50 #define boarder()                                                              \
51 	fprintf(f, "+-------------------------------+---------------+--------" \
52 		   "-------+---------------+---------------+---------------+-" \
53 		   "----------+\n")
54 
55 static inline void
56 print_banner_default(FILE *f)
57 {
58 	boarder();
59 	fprintf(f, "%-32s%-16s%-16s%-16s%-16s%-16s%-16s\n", "|Node", "|calls",
60 		"|objs", "|realloc_count", "|objs/call", "|objs/sec(10E6)",
61 		"|cycles/call|");
62 	boarder();
63 }
64 
65 static inline void
66 print_banner_dispatch(FILE *f)
67 {
68 	boarder_model_dispatch();
69 	fprintf(f, "%-32s%-16s%-16s%-16s%-16s%-16s%-16s%-16s%-16s\n",
70 		"|Node", "|calls",
71 		"|objs", "|sched objs", "|sched fail",
72 		"|realloc_count", "|objs/call", "|objs/sec(10E6)",
73 		"|cycles/call|");
74 	boarder_model_dispatch();
75 }
76 
77 static inline void
78 print_banner(FILE *f, bool dispatch)
79 {
80 	if (dispatch)
81 		print_banner_dispatch(f);
82 	else
83 		print_banner_default(f);
84 }
85 
86 static inline void
87 print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat, bool dispatch)
88 {
89 	double objs_per_call, objs_per_sec, cycles_per_call, ts_per_hz;
90 	const uint64_t prev_calls = stat->prev_calls;
91 	const uint64_t prev_objs = stat->prev_objs;
92 	const uint64_t cycles = stat->cycles;
93 	const uint64_t calls = stat->calls;
94 	const uint64_t objs = stat->objs;
95 	uint64_t call_delta;
96 
97 	call_delta = calls - prev_calls;
98 	objs_per_call =
99 		call_delta ? (double)((objs - prev_objs) / call_delta) : 0;
100 	cycles_per_call =
101 		call_delta ? (double)((cycles - stat->prev_cycles) / call_delta)
102 			   : 0;
103 	ts_per_hz = (double)((stat->ts - stat->prev_ts) / stat->hz);
104 	objs_per_sec = ts_per_hz ? (objs - prev_objs) / ts_per_hz : 0;
105 	objs_per_sec /= 1000000;
106 
107 	if (dispatch) {
108 		fprintf(f,
109 			"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15" PRIu64
110 			"|%-15" PRIu64 "|%-15" PRIu64
111 			"|%-15.3f|%-15.6f|%-11.4f|\n",
112 			stat->name, calls, objs, stat->dispatch.sched_objs,
113 			stat->dispatch.sched_fail, stat->realloc_count, objs_per_call,
114 			objs_per_sec, cycles_per_call);
115 	} else {
116 		fprintf(f,
117 			"|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15" PRIu64
118 			"|%-15.3f|%-15.6f|%-11.4f|\n",
119 			stat->name, calls, objs, stat->realloc_count, objs_per_call,
120 			objs_per_sec, cycles_per_call);
121 	}
122 }
123 
124 static int
125 graph_cluster_stats_cb(bool dispatch, bool is_first, bool is_last, void *cookie,
126 		       const struct rte_graph_cluster_node_stats *stat)
127 {
128 	FILE *f = cookie;
129 
130 	if (unlikely(is_first))
131 		print_banner(f, dispatch);
132 	if (stat->objs)
133 		print_node(f, stat, dispatch);
134 	if (unlikely(is_last)) {
135 		if (dispatch)
136 			boarder_model_dispatch();
137 		else
138 			boarder();
139 	}
140 
141 	return 0;
142 };
143 
144 static int
145 graph_cluster_stats_cb_rtc(bool is_first, bool is_last, void *cookie,
146 			   const struct rte_graph_cluster_node_stats *stat)
147 {
148 	return graph_cluster_stats_cb(false, is_first, is_last, cookie, stat);
149 };
150 
151 static int
152 graph_cluster_stats_cb_dispatch(bool is_first, bool is_last, void *cookie,
153 				const struct rte_graph_cluster_node_stats *stat)
154 {
155 	return graph_cluster_stats_cb(true, is_first, is_last, cookie, stat);
156 };
157 
158 static struct rte_graph_cluster_stats *
159 stats_mem_init(struct cluster *cluster,
160 	       const struct rte_graph_cluster_stats_param *prm)
161 {
162 	size_t sz = sizeof(struct rte_graph_cluster_stats);
163 	struct rte_graph_cluster_stats *stats;
164 	rte_graph_cluster_stats_cb_t fn;
165 	int socket_id = prm->socket_id;
166 	uint32_t cluster_node_size;
167 
168 	/* Fix up callback */
169 	fn = prm->fn;
170 	if (fn == NULL) {
171 		const struct rte_graph *graph = cluster->graphs[0]->graph;
172 		if (graph->model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
173 			fn = graph_cluster_stats_cb_dispatch;
174 		else
175 			fn = graph_cluster_stats_cb_rtc;
176 	}
177 
178 	cluster_node_size = sizeof(struct cluster_node);
179 	/* For a given cluster, max nodes will be the max number of graphs */
180 	cluster_node_size += cluster->nb_graphs * sizeof(struct rte_node *);
181 	cluster_node_size = RTE_ALIGN(cluster_node_size, RTE_CACHE_LINE_SIZE);
182 
183 	stats = realloc(NULL, sz);
184 	if (stats) {
185 		memset(stats, 0, sz);
186 		stats->fn = fn;
187 		stats->cluster_node_size = cluster_node_size;
188 		stats->max_nodes = 0;
189 		stats->socket_id = socket_id;
190 		stats->cookie = prm->cookie;
191 		stats->sz = sz;
192 	}
193 
194 	return stats;
195 }
196 
197 static int
198 stats_mem_populate(struct rte_graph_cluster_stats **stats_in,
199 		   struct rte_graph *graph, struct graph_node *graph_node)
200 {
201 	struct rte_graph_cluster_stats *stats = *stats_in;
202 	rte_node_t id = graph_node->node->id;
203 	struct cluster_node *cluster;
204 	struct rte_node *node;
205 	rte_node_t count;
206 
207 	cluster = stats->clusters;
208 
209 	/* Iterate over cluster node array to find node ID match */
210 	for (count = 0; count < stats->max_nodes; count++) {
211 		/* Found an existing node in the reel */
212 		if (cluster->stat.id == id) {
213 			node = graph_node_id_to_ptr(graph, id);
214 			if (node == NULL)
215 				SET_ERR_JMP(
216 					ENOENT, err,
217 					"Failed to find node %s in graph %s",
218 					graph_node->node->name, graph->name);
219 
220 			cluster->nodes[cluster->nb_nodes++] = node;
221 			return 0;
222 		}
223 		cluster = RTE_PTR_ADD(cluster, stats->cluster_node_size);
224 	}
225 
226 	/* Hey, it is a new node, allocate space for it in the reel */
227 	stats = realloc(stats, stats->sz + stats->cluster_node_size);
228 	if (stats == NULL)
229 		SET_ERR_JMP(ENOMEM, err, "Realloc failed");
230 	*stats_in = NULL;
231 
232 	/* Clear the new struct cluster_node area */
233 	cluster = RTE_PTR_ADD(stats, stats->sz),
234 	memset(cluster, 0, stats->cluster_node_size);
235 	memcpy(cluster->stat.name, graph_node->node->name, RTE_NODE_NAMESIZE);
236 	cluster->stat.id = graph_node->node->id;
237 	cluster->stat.hz = rte_get_timer_hz();
238 	node = graph_node_id_to_ptr(graph, id);
239 	if (node == NULL)
240 		SET_ERR_JMP(ENOENT, free, "Failed to find node %s in graph %s",
241 			    graph_node->node->name, graph->name);
242 	cluster->nodes[cluster->nb_nodes++] = node;
243 
244 	stats->sz += stats->cluster_node_size;
245 	stats->max_nodes++;
246 	*stats_in = stats;
247 
248 	return 0;
249 free:
250 	free(stats);
251 err:
252 	return -rte_errno;
253 }
254 
255 static void
256 stats_mem_fini(struct rte_graph_cluster_stats *stats)
257 {
258 	free(stats);
259 }
260 
261 static void
262 cluster_init(struct cluster *cluster)
263 {
264 	memset(cluster, 0, sizeof(*cluster));
265 }
266 
267 static int
268 cluster_add(struct cluster *cluster, struct graph *graph)
269 {
270 	rte_graph_t count;
271 	size_t sz;
272 
273 	/* Skip the if graph is already added to cluster */
274 	for (count = 0; count < cluster->nb_graphs; count++)
275 		if (cluster->graphs[count] == graph)
276 			return 0;
277 
278 	/* Expand the cluster if required to store graph objects */
279 	if (cluster->nb_graphs + 1 > cluster->size) {
280 		cluster->size = RTE_MAX(1, cluster->size * 2);
281 		sz = sizeof(struct graph *) * cluster->size;
282 		cluster->graphs = realloc(cluster->graphs, sz);
283 		if (cluster->graphs == NULL)
284 			SET_ERR_JMP(ENOMEM, free, "Failed to realloc");
285 	}
286 
287 	/* Add graph to cluster */
288 	cluster->graphs[cluster->nb_graphs++] = graph;
289 	return 0;
290 
291 free:
292 	return -rte_errno;
293 }
294 
295 static void
296 cluster_fini(struct cluster *cluster)
297 {
298 	free(cluster->graphs);
299 }
300 
301 static int
302 expand_pattern_to_cluster(struct cluster *cluster, const char *pattern)
303 {
304 	struct graph_head *graph_head = graph_list_head_get();
305 	struct graph *graph;
306 	bool found = false;
307 
308 	/* Check for pattern match */
309 	STAILQ_FOREACH(graph, graph_head, next) {
310 		if (fnmatch(pattern, graph->name, 0) == 0) {
311 			if (cluster_add(cluster, graph))
312 				goto fail;
313 			found = true;
314 		}
315 	}
316 	if (found == false)
317 		SET_ERR_JMP(EFAULT, fail, "Pattern %s graph not found",
318 			    pattern);
319 
320 	return 0;
321 fail:
322 	return -rte_errno;
323 }
324 
325 struct rte_graph_cluster_stats *
326 rte_graph_cluster_stats_create(const struct rte_graph_cluster_stats_param *prm)
327 {
328 	struct rte_graph_cluster_stats *stats, *rc = NULL;
329 	struct graph_node *graph_node;
330 	struct cluster cluster;
331 	struct graph *graph;
332 	const char *pattern;
333 	rte_graph_t i;
334 
335 	/* Sanity checks */
336 	if (!rte_graph_has_stats_feature())
337 		SET_ERR_JMP(EINVAL, fail, "Stats feature is not enabled");
338 
339 	if (prm == NULL)
340 		SET_ERR_JMP(EINVAL, fail, "Invalid param");
341 
342 	if (prm->graph_patterns == NULL || prm->nb_graph_patterns == 0)
343 		SET_ERR_JMP(EINVAL, fail, "Invalid graph param");
344 
345 	cluster_init(&cluster);
346 
347 	graph_spinlock_lock();
348 	/* Expand graph pattern and add the graph to the cluster */
349 	for (i = 0; i < prm->nb_graph_patterns; i++) {
350 		pattern = prm->graph_patterns[i];
351 		if (expand_pattern_to_cluster(&cluster, pattern))
352 			goto bad_pattern;
353 	}
354 
355 	/* Alloc the stats memory */
356 	stats = stats_mem_init(&cluster, prm);
357 	if (stats == NULL)
358 		SET_ERR_JMP(ENOMEM, bad_pattern, "Failed alloc stats memory");
359 
360 	/* Iterate over M(Graph) x N (Nodes in graph) */
361 	for (i = 0; i < cluster.nb_graphs; i++) {
362 		graph = cluster.graphs[i];
363 		STAILQ_FOREACH(graph_node, &graph->node_list, next) {
364 			struct rte_graph *graph_fp = graph->graph;
365 			if (stats_mem_populate(&stats, graph_fp, graph_node))
366 				goto realloc_fail;
367 		}
368 		if (graph->graph->model == RTE_GRAPH_MODEL_MCORE_DISPATCH)
369 			stats->dispatch = true;
370 	}
371 
372 	/* Finally copy to hugepage memory to avoid pressure on rte_realloc */
373 	rc = rte_malloc_socket(NULL, stats->sz, 0, stats->socket_id);
374 	if (rc)
375 		rte_memcpy(rc, stats, stats->sz);
376 	else
377 		SET_ERR_JMP(ENOMEM, realloc_fail, "rte_malloc failed");
378 
379 realloc_fail:
380 	stats_mem_fini(stats);
381 bad_pattern:
382 	graph_spinlock_unlock();
383 	cluster_fini(&cluster);
384 fail:
385 	return rc;
386 }
387 
388 void
389 rte_graph_cluster_stats_destroy(struct rte_graph_cluster_stats *stat)
390 {
391 	return rte_free(stat);
392 }
393 
394 static inline void
395 cluster_node_arregate_stats(struct cluster_node *cluster, bool dispatch)
396 {
397 	uint64_t calls = 0, cycles = 0, objs = 0, realloc_count = 0;
398 	struct rte_graph_cluster_node_stats *stat = &cluster->stat;
399 	uint64_t sched_objs = 0, sched_fail = 0;
400 	struct rte_node *node;
401 	rte_node_t count;
402 
403 	for (count = 0; count < cluster->nb_nodes; count++) {
404 		node = cluster->nodes[count];
405 
406 		if (dispatch) {
407 			sched_objs += node->dispatch.total_sched_objs;
408 			sched_fail += node->dispatch.total_sched_fail;
409 		}
410 
411 		calls += node->total_calls;
412 		objs += node->total_objs;
413 		cycles += node->total_cycles;
414 		realloc_count += node->realloc_count;
415 	}
416 
417 	stat->calls = calls;
418 	stat->objs = objs;
419 	stat->cycles = cycles;
420 
421 	if (dispatch) {
422 		stat->dispatch.sched_objs = sched_objs;
423 		stat->dispatch.sched_fail = sched_fail;
424 	}
425 
426 	stat->ts = rte_get_timer_cycles();
427 	stat->realloc_count = realloc_count;
428 }
429 
430 static inline void
431 cluster_node_store_prev_stats(struct cluster_node *cluster)
432 {
433 	struct rte_graph_cluster_node_stats *stat = &cluster->stat;
434 
435 	stat->prev_ts = stat->ts;
436 	stat->prev_calls = stat->calls;
437 	stat->prev_objs = stat->objs;
438 	stat->prev_cycles = stat->cycles;
439 }
440 
441 void
442 rte_graph_cluster_stats_get(struct rte_graph_cluster_stats *stat, bool skip_cb)
443 {
444 	struct cluster_node *cluster;
445 	rte_node_t count;
446 	int rc = 0;
447 
448 	cluster = stat->clusters;
449 
450 	for (count = 0; count < stat->max_nodes; count++) {
451 		cluster_node_arregate_stats(cluster, stat->dispatch);
452 		if (!skip_cb)
453 			rc = stat->fn(!count, (count == stat->max_nodes - 1),
454 				      stat->cookie, &cluster->stat);
455 		cluster_node_store_prev_stats(cluster);
456 		if (rc)
457 			break;
458 		cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
459 	}
460 }
461 
462 void
463 rte_graph_cluster_stats_reset(struct rte_graph_cluster_stats *stat)
464 {
465 	struct cluster_node *cluster;
466 	rte_node_t count;
467 
468 	cluster = stat->clusters;
469 
470 	for (count = 0; count < stat->max_nodes; count++) {
471 		struct rte_graph_cluster_node_stats *node = &cluster->stat;
472 
473 		node->ts = 0;
474 		node->calls = 0;
475 		node->objs = 0;
476 		node->cycles = 0;
477 		node->prev_ts = 0;
478 		node->prev_calls = 0;
479 		node->prev_objs = 0;
480 		node->prev_cycles = 0;
481 		node->realloc_count = 0;
482 		cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
483 	}
484 }
485