xref: /dpdk/lib/graph/rte_graph_worker_common.h (revision ba0a0e44f361cbc4667088a0c0e2d0b63f8dee20)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_
6 #define _RTE_GRAPH_WORKER_COMMON_H_
7 
8 /**
9  * @file rte_graph_worker_common.h
10  *
11  * This API allows a worker thread to walk over a graph and nodes to create,
12  * process, enqueue and move streams of objects to the next nodes.
13  */
14 
15 #include <assert.h>
16 #include <stdalign.h>
17 #include <stddef.h>
18 
19 #include <rte_common.h>
20 #include <rte_cycles.h>
21 #include <rte_prefetch.h>
22 #include <rte_memcpy.h>
23 #include <rte_memory.h>
24 
25 #include "rte_graph.h"
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
31 /** Graph worker models */
32 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */
33 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */
34 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1
35 /**< Dispatch model to support cross-core dispatching within core affinity. */
36 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */
37 
38 /**
39  * @internal
40  *
41  * Singly-linked list head for graph schedule run-queue.
42  */
43 SLIST_HEAD(rte_graph_rq_head, rte_graph);
44 
45 /**
46  * @internal
47  *
48  * Data structure to hold graph data.
49  */
50 struct __rte_cache_aligned rte_graph {
51 	/* Fast path area. */
52 	uint32_t tail;		     /**< Tail of circular buffer. */
53 	uint32_t head;		     /**< Head of circular buffer. */
54 	uint32_t cir_mask;	     /**< Circular buffer wrap around mask. */
55 	rte_node_t nb_nodes;	     /**< Number of nodes in the graph. */
56 	rte_graph_off_t *cir_start;  /**< Pointer to circular buffer. */
57 	rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
58 	uint8_t model;		     /**< graph model */
59 	uint8_t reserved1;	     /**< Reserved for future use. */
60 	uint16_t reserved2;	     /**< Reserved for future use. */
61 	union {
62 		/* Fast schedule area for mcore dispatch model */
63 		struct {
64 			alignas(RTE_CACHE_LINE_SIZE) struct rte_graph_rq_head *rq;
65 				/* The run-queue */
66 			struct rte_graph_rq_head rq_head; /* The head for run-queue list */
67 
68 			unsigned int lcore_id;  /**< The graph running Lcore. */
69 			struct rte_ring *wq;    /**< The work-queue for pending streams. */
70 			struct rte_mempool *mp; /**< The mempool for scheduling streams. */
71 		} dispatch; /** Only used by dispatch model */
72 	};
73 	SLIST_ENTRY(rte_graph) next;   /* The next for rte_graph list */
74 	/* End of Fast path area.*/
75 	rte_graph_t id;	/**< Graph identifier. */
76 	int socket;	/**< Socket ID where memory is allocated. */
77 	char name[RTE_GRAPH_NAMESIZE];	/**< Name of the graph. */
78 	bool pcap_enable;	        /**< Pcap trace enabled. */
79 	/** Number of packets captured per core. */
80 	uint64_t nb_pkt_captured;
81 	/** Number of packets to capture per core. */
82 	uint64_t nb_pkt_to_capture;
83 	char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ];  /**< Pcap filename. */
84 	uint64_t fence;			/**< Fence. */
85 };
86 
87 /**
88  * @internal
89  *
90  * Data structure to hold node data.
91  */
92 struct __rte_cache_aligned rte_node {
93 	/* Slow path area  */
94 	uint64_t fence;		/**< Fence. */
95 	rte_graph_off_t next;	/**< Index to next node. */
96 	rte_node_t id;		/**< Node identifier. */
97 	rte_node_t parent_id;	/**< Parent Node identifier. */
98 	rte_edge_t nb_edges;	/**< Number of edges from this node. */
99 	uint32_t realloc_count;	/**< Number of times realloced. */
100 
101 	char parent[RTE_NODE_NAMESIZE];	/**< Parent node name. */
102 	char name[RTE_NODE_NAMESIZE];	/**< Name of the node. */
103 
104 	/** Original process function when pcap is enabled. */
105 	rte_node_process_t original_process;
106 
107 	/** Fast schedule area for mcore dispatch model. */
108 	union {
109 		alignas(RTE_CACHE_LINE_MIN_SIZE) struct {
110 			unsigned int lcore_id;  /**< Node running lcore. */
111 			uint64_t total_sched_objs; /**< Number of objects scheduled. */
112 			uint64_t total_sched_fail; /**< Number of scheduled failure. */
113 		} dispatch;
114 	};
115 
116 	/** Fast path area cache line 1. */
117 	alignas(RTE_CACHE_LINE_MIN_SIZE)
118 	rte_graph_off_t xstat_off; /**< Offset to xstat counters. */
119 
120 	/** Fast path area cache line 2. */
121 	__extension__ struct __rte_cache_aligned {
122 #define RTE_NODE_CTX_SZ 16
123 		union {
124 			uint8_t ctx[RTE_NODE_CTX_SZ];
125 			__extension__ struct {
126 				void *ctx_ptr;
127 				void *ctx_ptr2;
128 			};
129 		}; /**< Node Context. */
130 		uint16_t size;		/**< Total number of objects available. */
131 		uint16_t idx;		/**< Number of objects used. */
132 		rte_graph_off_t off;	/**< Offset of node in the graph reel. */
133 		uint64_t total_cycles;	/**< Cycles spent in this node. */
134 		uint64_t total_calls;	/**< Calls done to this node. */
135 		uint64_t total_objs;	/**< Objects processed by this node. */
136 		union {
137 			void **objs;	   /**< Array of object pointers. */
138 			uint64_t objs_u64;
139 		};
140 		union {
141 			rte_node_process_t process; /**< Process function. */
142 			uint64_t process_u64;
143 		};
144 		alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */
145 	};
146 };
147 
148 static_assert(offsetof(struct rte_node, nodes) - offsetof(struct rte_node, ctx)
149 	== RTE_CACHE_LINE_MIN_SIZE, "rte_node fast path area must fit in 64 bytes");
150 
151 /**
152  * @internal
153  *
154  * Allocate a stream of objects.
155  *
156  * If stream already exists then re-allocate it to a larger size.
157  *
158  * @param graph
159  *   Pointer to the graph object.
160  * @param node
161  *   Pointer to the node object.
162  */
163 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
164 
165 /**
166  * @internal
167  *
168  * Allocate a stream with requested number of objects.
169  *
170  * If stream already exists then re-allocate it to a larger size.
171  *
172  * @param graph
173  *   Pointer to the graph object.
174  * @param node
175  *   Pointer to the node object.
176  * @param req_size
177  *   Number of objects to be allocated.
178  */
179 void __rte_node_stream_alloc_size(struct rte_graph *graph,
180 				  struct rte_node *node, uint16_t req_size);
181 
182 /* Fast path helper functions */
183 
184 /**
185  * @internal
186  *
187  * Enqueue a given node to the tail of the graph reel.
188  *
189  * @param graph
190  *   Pointer Graph object.
191  * @param node
192  *   Pointer to node object to be enqueued.
193  */
194 static __rte_always_inline void
195 __rte_node_process(struct rte_graph *graph, struct rte_node *node)
196 {
197 	uint64_t start;
198 	uint16_t rc;
199 	void **objs;
200 
201 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
202 	objs = node->objs;
203 	rte_prefetch0(objs);
204 
205 	if (rte_graph_has_stats_feature()) {
206 		start = rte_rdtsc();
207 		rc = node->process(graph, node, objs, node->idx);
208 		node->total_cycles += rte_rdtsc() - start;
209 		node->total_calls++;
210 		node->total_objs += rc;
211 	} else {
212 		node->process(graph, node, objs, node->idx);
213 	}
214 	node->idx = 0;
215 }
216 
217 /**
218  * @internal
219  *
220  * Enqueue a given node to the tail of the graph reel.
221  *
222  * @param graph
223  *   Pointer Graph object.
224  * @param node
225  *   Pointer to node object to be enqueued.
226  */
227 static __rte_always_inline void
228 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
229 {
230 	uint32_t tail;
231 
232 	tail = graph->tail;
233 	graph->cir_start[tail++] = node->off;
234 	graph->tail = tail & graph->cir_mask;
235 }
236 
237 /**
238  * @internal
239  *
240  * Enqueue sequence prologue function.
241  *
242  * Updates the node to tail of graph reel and resizes the number of objects
243  * available in the stream as needed.
244  *
245  * @param graph
246  *   Pointer to the graph object.
247  * @param node
248  *   Pointer to the node object.
249  * @param idx
250  *   Index at which the object enqueue starts from.
251  * @param space
252  *   Space required for the object enqueue.
253  */
254 static __rte_always_inline void
255 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
256 			    const uint16_t idx, const uint16_t space)
257 {
258 
259 	/* Add to the pending stream list if the node is new */
260 	if (idx == 0)
261 		__rte_node_enqueue_tail_update(graph, node);
262 
263 	if (unlikely(node->size < (idx + space)))
264 		__rte_node_stream_alloc_size(graph, node, node->size + space);
265 }
266 
267 /**
268  * @internal
269  *
270  * Get the node pointer from current node edge id.
271  *
272  * @param node
273  *   Current node pointer.
274  * @param next
275  *   Edge id of the required node.
276  *
277  * @return
278  *   Pointer to the node denoted by the edge id.
279  */
280 static __rte_always_inline struct rte_node *
281 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
282 {
283 	RTE_ASSERT(next < node->nb_edges);
284 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
285 	node = node->nodes[next];
286 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
287 
288 	return node;
289 }
290 
291 /**
292  * Enqueue the objs to next node for further processing and set
293  * the next node to pending state in the circular buffer.
294  *
295  * @param graph
296  *   Graph pointer returned from rte_graph_lookup().
297  * @param node
298  *   Current node pointer.
299  * @param next
300  *   Relative next node index to enqueue objs.
301  * @param objs
302  *   Objs to enqueue.
303  * @param nb_objs
304  *   Number of objs to enqueue.
305  */
306 static inline void
307 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
308 		 rte_edge_t next, void **objs, uint16_t nb_objs)
309 {
310 	node = __rte_node_next_node_get(node, next);
311 	const uint16_t idx = node->idx;
312 
313 	__rte_node_enqueue_prologue(graph, node, idx, nb_objs);
314 
315 	rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
316 	node->idx = idx + nb_objs;
317 }
318 
319 /**
320  * Enqueue only one obj to next node for further processing and
321  * set the next node to pending state in the circular buffer.
322  *
323  * @param graph
324  *   Graph pointer returned from rte_graph_lookup().
325  * @param node
326  *   Current node pointer.
327  * @param next
328  *   Relative next node index to enqueue objs.
329  * @param obj
330  *   Obj to enqueue.
331  */
332 static inline void
333 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
334 		    rte_edge_t next, void *obj)
335 {
336 	node = __rte_node_next_node_get(node, next);
337 	uint16_t idx = node->idx;
338 
339 	__rte_node_enqueue_prologue(graph, node, idx, 1);
340 
341 	node->objs[idx++] = obj;
342 	node->idx = idx;
343 }
344 
345 /**
346  * Enqueue only two objs to next node for further processing and
347  * set the next node to pending state in the circular buffer.
348  * Same as rte_node_enqueue_x1 but enqueue two objs.
349  *
350  * @param graph
351  *   Graph pointer returned from rte_graph_lookup().
352  * @param node
353  *   Current node pointer.
354  * @param next
355  *   Relative next node index to enqueue objs.
356  * @param obj0
357  *   Obj to enqueue.
358  * @param obj1
359  *   Obj to enqueue.
360  */
361 static inline void
362 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
363 		    rte_edge_t next, void *obj0, void *obj1)
364 {
365 	node = __rte_node_next_node_get(node, next);
366 	uint16_t idx = node->idx;
367 
368 	__rte_node_enqueue_prologue(graph, node, idx, 2);
369 
370 	node->objs[idx++] = obj0;
371 	node->objs[idx++] = obj1;
372 	node->idx = idx;
373 }
374 
375 /**
376  * Enqueue only four objs to next node for further processing and
377  * set the next node to pending state in the circular buffer.
378  * Same as rte_node_enqueue_x1 but enqueue four objs.
379  *
380  * @param graph
381  *   Graph pointer returned from rte_graph_lookup().
382  * @param node
383  *   Current node pointer.
384  * @param next
385  *   Relative next node index to enqueue objs.
386  * @param obj0
387  *   1st obj to enqueue.
388  * @param obj1
389  *   2nd obj to enqueue.
390  * @param obj2
391  *   3rd obj to enqueue.
392  * @param obj3
393  *   4th obj to enqueue.
394  */
395 static inline void
396 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
397 		    rte_edge_t next, void *obj0, void *obj1, void *obj2,
398 		    void *obj3)
399 {
400 	node = __rte_node_next_node_get(node, next);
401 	uint16_t idx = node->idx;
402 
403 	__rte_node_enqueue_prologue(graph, node, idx, 4);
404 
405 	node->objs[idx++] = obj0;
406 	node->objs[idx++] = obj1;
407 	node->objs[idx++] = obj2;
408 	node->objs[idx++] = obj3;
409 	node->idx = idx;
410 }
411 
412 /**
413  * Enqueue objs to multiple next nodes for further processing and
414  * set the next nodes to pending state in the circular buffer.
415  * objs[i] will be enqueued to nexts[i].
416  *
417  * @param graph
418  *   Graph pointer returned from rte_graph_lookup().
419  * @param node
420  *   Current node pointer.
421  * @param nexts
422  *   List of relative next node indices to enqueue objs.
423  * @param objs
424  *   List of objs to enqueue.
425  * @param nb_objs
426  *   Number of objs to enqueue.
427  */
428 static inline void
429 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
430 		      rte_edge_t *nexts, void **objs, uint16_t nb_objs)
431 {
432 	uint16_t i;
433 
434 	for (i = 0; i < nb_objs; i++)
435 		rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
436 }
437 
438 /**
439  * Get the stream of next node to enqueue the objs.
440  * Once done with the updating the objs, needs to call
441  * rte_node_next_stream_put to put the next node to pending state.
442  *
443  * @param graph
444  *   Graph pointer returned from rte_graph_lookup().
445  * @param node
446  *   Current node pointer.
447  * @param next
448  *   Relative next node index to get stream.
449  * @param nb_objs
450  *   Requested free size of the next stream.
451  *
452  * @return
453  *   Valid next stream on success.
454  *
455  * @see rte_node_next_stream_put().
456  */
457 static inline void **
458 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
459 			 rte_edge_t next, uint16_t nb_objs)
460 {
461 	node = __rte_node_next_node_get(node, next);
462 	const uint16_t idx = node->idx;
463 	uint16_t free_space = node->size - idx;
464 
465 	if (unlikely(free_space < nb_objs))
466 		__rte_node_stream_alloc_size(graph, node, node->size + nb_objs);
467 
468 	return &node->objs[idx];
469 }
470 
471 /**
472  * Put the next stream to pending state in the circular buffer
473  * for further processing. Should be invoked after rte_node_next_stream_get().
474  *
475  * @param graph
476  *   Graph pointer returned from rte_graph_lookup().
477  * @param node
478  *   Current node pointer.
479  * @param next
480  *   Relative next node index..
481  * @param idx
482  *   Number of objs updated in the stream after getting the stream using
483  *   rte_node_next_stream_get.
484  *
485  * @see rte_node_next_stream_get().
486  */
487 static inline void
488 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
489 			 rte_edge_t next, uint16_t idx)
490 {
491 	if (unlikely(!idx))
492 		return;
493 
494 	node = __rte_node_next_node_get(node, next);
495 	if (node->idx == 0)
496 		__rte_node_enqueue_tail_update(graph, node);
497 
498 	node->idx += idx;
499 }
500 
501 /**
502  * Home run scenario, Enqueue all the objs of current node to next
503  * node in optimized way by swapping the streams of both nodes.
504  * Performs good when next node is already not in pending state.
505  * If next node is already in pending state then normal enqueue
506  * will be used.
507  *
508  * @param graph
509  *   Graph pointer returned from rte_graph_lookup().
510  * @param src
511  *   Current node pointer.
512  * @param next
513  *   Relative next node index.
514  */
515 static inline void
516 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
517 			  rte_edge_t next)
518 {
519 	struct rte_node *dst = __rte_node_next_node_get(src, next);
520 
521 	/* Let swap the pointers if dst don't have valid objs */
522 	if (likely(dst->idx == 0)) {
523 		void **dobjs = dst->objs;
524 		uint16_t dsz = dst->size;
525 		dst->objs = src->objs;
526 		dst->size = src->size;
527 		src->objs = dobjs;
528 		src->size = dsz;
529 		dst->idx = src->idx;
530 		__rte_node_enqueue_tail_update(graph, dst);
531 	} else { /* Move the objects from src node to dst node */
532 		rte_node_enqueue(graph, src, next, src->objs, src->idx);
533 	}
534 }
535 
536 /**
537  * Test the validity of model.
538  *
539  * @param model
540  *   Model to check.
541  *
542  * @return
543  *   True if graph model is valid, false otherwise.
544  */
545 bool
546 rte_graph_model_is_valid(uint8_t model);
547 
548 /**
549  * @note This function does not perform any locking, and is only safe to call
550  *    before graph running. It will set all graphs the same model.
551  *
552  * @param model
553  *   Name of the graph worker model.
554  *
555  * @return
556  *   0 on success, -1 otherwise.
557  */
558 int rte_graph_worker_model_set(uint8_t model);
559 
560 /**
561  * Get the graph worker model
562  *
563  * @note All graph will use the same model and this function will get model from the first one.
564  *    Used for slow path.
565  *
566  * @param graph
567  *   Graph pointer.
568  *
569  * @return
570  *   Graph worker model on success.
571  */
572 uint8_t rte_graph_worker_model_get(struct rte_graph *graph);
573 
574 /**
575  * Get the graph worker model without check
576  *
577  * @note All graph will use the same model and this function will get model from the first one.
578  *    Used for fast path.
579  *
580  * @param graph
581  *   Graph pointer.
582  *
583  * @return
584  *   Graph worker model on success.
585  */
586 static __rte_always_inline
587 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph)
588 {
589 	return graph->model;
590 }
591 
592 /**
593  * Increment Node xstat count.
594  *
595  * Increment the count of an xstat for a given node.
596  *
597  * @param node
598  *   Pointer to the node.
599  * @param xstat_id
600  *   xstat ID.
601  * @param value
602  *   Value to increment.
603  */
604 __rte_experimental
605 static inline void
606 rte_node_xstat_increment(struct rte_node *node, uint16_t xstat_id, uint64_t value)
607 {
608 	if (rte_graph_has_stats_feature()) {
609 		uint64_t *xstat = (uint64_t *)RTE_PTR_ADD(node, node->xstat_off);
610 		xstat[xstat_id] += value;
611 	}
612 }
613 
614 #ifdef __cplusplus
615 }
616 #endif
617 
618 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */
619