xref: /dpdk/lib/graph/rte_graph_worker_common.h (revision 171c655b8c2c99a1606f639127e6a312eca8d9e8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_
6 #define _RTE_GRAPH_WORKER_COMMON_H_
7 
8 /**
9  * @file rte_graph_worker_common.h
10  *
11  * @warning
12  * @b EXPERIMENTAL:
13  * All functions in this file may be changed or removed without prior notice.
14  *
15  * This API allows a worker thread to walk over a graph and nodes to create,
16  * process, enqueue and move streams of objects to the next nodes.
17  */
18 
19 #include <rte_compat.h>
20 #include <rte_common.h>
21 #include <rte_cycles.h>
22 #include <rte_prefetch.h>
23 #include <rte_memcpy.h>
24 #include <rte_memory.h>
25 
26 #include "rte_graph.h"
27 
28 #ifdef __cplusplus
29 extern "C" {
30 #endif
31 
32 /** Graph worker models */
33 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */
34 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */
35 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1
36 /**< Dispatch model to support cross-core dispatching within core affinity. */
37 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */
38 
39 /**
40  * @internal
41  *
42  * Singly-linked list head for graph schedule run-queue.
43  */
44 SLIST_HEAD(rte_graph_rq_head, rte_graph);
45 
46 /**
47  * @internal
48  *
49  * Data structure to hold graph data.
50  */
51 struct rte_graph {
52 	/* Fast path area. */
53 	uint32_t tail;		     /**< Tail of circular buffer. */
54 	uint32_t head;		     /**< Head of circular buffer. */
55 	uint32_t cir_mask;	     /**< Circular buffer wrap around mask. */
56 	rte_node_t nb_nodes;	     /**< Number of nodes in the graph. */
57 	rte_graph_off_t *cir_start;  /**< Pointer to circular buffer. */
58 	rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
59 	uint8_t model;		     /**< graph model */
60 	uint8_t reserved1;	     /**< Reserved for future use. */
61 	uint16_t reserved2;	     /**< Reserved for future use. */
62 	RTE_STD_C11
63 	union {
64 		/* Fast schedule area for mcore dispatch model */
65 		struct {
66 			struct rte_graph_rq_head *rq __rte_cache_aligned; /* The run-queue */
67 			struct rte_graph_rq_head rq_head; /* The head for run-queue list */
68 
69 			unsigned int lcore_id;  /**< The graph running Lcore. */
70 			struct rte_ring *wq;    /**< The work-queue for pending streams. */
71 			struct rte_mempool *mp; /**< The mempool for scheduling streams. */
72 		} dispatch; /** Only used by dispatch model */
73 	};
74 	SLIST_ENTRY(rte_graph) next;   /* The next for rte_graph list */
75 	/* End of Fast path area.*/
76 	rte_graph_t id;	/**< Graph identifier. */
77 	int socket;	/**< Socket ID where memory is allocated. */
78 	char name[RTE_GRAPH_NAMESIZE];	/**< Name of the graph. */
79 	bool pcap_enable;	        /**< Pcap trace enabled. */
80 	/** Number of packets captured per core. */
81 	uint64_t nb_pkt_captured;
82 	/** Number of packets to capture per core. */
83 	uint64_t nb_pkt_to_capture;
84 	char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ];  /**< Pcap filename. */
85 	uint64_t fence;			/**< Fence. */
86 } __rte_cache_aligned;
87 
88 /**
89  * @internal
90  *
91  * Data structure to hold node data.
92  */
93 struct rte_node {
94 	/* Slow path area  */
95 	uint64_t fence;		/**< Fence. */
96 	rte_graph_off_t next;	/**< Index to next node. */
97 	rte_node_t id;		/**< Node identifier. */
98 	rte_node_t parent_id;	/**< Parent Node identifier. */
99 	rte_edge_t nb_edges;	/**< Number of edges from this node. */
100 	uint32_t realloc_count;	/**< Number of times realloced. */
101 
102 	char parent[RTE_NODE_NAMESIZE];	/**< Parent node name. */
103 	char name[RTE_NODE_NAMESIZE];	/**< Name of the node. */
104 
105 	/** Original process function when pcap is enabled. */
106 	rte_node_process_t original_process;
107 
108 	RTE_STD_C11
109 	union {
110 		/* Fast schedule area for mcore dispatch model */
111 		struct {
112 			unsigned int lcore_id;  /**< Node running lcore. */
113 			uint64_t total_sched_objs; /**< Number of objects scheduled. */
114 			uint64_t total_sched_fail; /**< Number of scheduled failure. */
115 		} dispatch;
116 	};
117 	/* Fast path area  */
118 #define RTE_NODE_CTX_SZ 16
119 	uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned; /**< Node Context. */
120 	uint16_t size;		/**< Total number of objects available. */
121 	uint16_t idx;		/**< Number of objects used. */
122 	rte_graph_off_t off;	/**< Offset of node in the graph reel. */
123 	uint64_t total_cycles;	/**< Cycles spent in this node. */
124 	uint64_t total_calls;	/**< Calls done to this node. */
125 	uint64_t total_objs;	/**< Objects processed by this node. */
126 	RTE_STD_C11
127 		union {
128 			void **objs;	   /**< Array of object pointers. */
129 			uint64_t objs_u64;
130 		};
131 	RTE_STD_C11
132 		union {
133 			rte_node_process_t process; /**< Process function. */
134 			uint64_t process_u64;
135 		};
136 	struct rte_node *nodes[] __rte_cache_min_aligned; /**< Next nodes. */
137 } __rte_cache_aligned;
138 
139 /**
140  * @internal
141  *
142  * Allocate a stream of objects.
143  *
144  * If stream already exists then re-allocate it to a larger size.
145  *
146  * @param graph
147  *   Pointer to the graph object.
148  * @param node
149  *   Pointer to the node object.
150  */
151 __rte_experimental
152 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
153 
154 /**
155  * @internal
156  *
157  * Allocate a stream with requested number of objects.
158  *
159  * If stream already exists then re-allocate it to a larger size.
160  *
161  * @param graph
162  *   Pointer to the graph object.
163  * @param node
164  *   Pointer to the node object.
165  * @param req_size
166  *   Number of objects to be allocated.
167  */
168 __rte_experimental
169 void __rte_node_stream_alloc_size(struct rte_graph *graph,
170 				  struct rte_node *node, uint16_t req_size);
171 
172 /* Fast path helper functions */
173 
174 /**
175  * @internal
176  *
177  * Enqueue a given node to the tail of the graph reel.
178  *
179  * @param graph
180  *   Pointer Graph object.
181  * @param node
182  *   Pointer to node object to be enqueued.
183  */
184 static __rte_always_inline void
185 __rte_node_process(struct rte_graph *graph, struct rte_node *node)
186 {
187 	uint64_t start;
188 	uint16_t rc;
189 	void **objs;
190 
191 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
192 	objs = node->objs;
193 	rte_prefetch0(objs);
194 
195 	if (rte_graph_has_stats_feature()) {
196 		start = rte_rdtsc();
197 		rc = node->process(graph, node, objs, node->idx);
198 		node->total_cycles += rte_rdtsc() - start;
199 		node->total_calls++;
200 		node->total_objs += rc;
201 	} else {
202 		node->process(graph, node, objs, node->idx);
203 	}
204 	node->idx = 0;
205 }
206 
207 /**
208  * @internal
209  *
210  * Enqueue a given node to the tail of the graph reel.
211  *
212  * @param graph
213  *   Pointer Graph object.
214  * @param node
215  *   Pointer to node object to be enqueued.
216  */
217 static __rte_always_inline void
218 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
219 {
220 	uint32_t tail;
221 
222 	tail = graph->tail;
223 	graph->cir_start[tail++] = node->off;
224 	graph->tail = tail & graph->cir_mask;
225 }
226 
227 /**
228  * @internal
229  *
230  * Enqueue sequence prologue function.
231  *
232  * Updates the node to tail of graph reel and resizes the number of objects
233  * available in the stream as needed.
234  *
235  * @param graph
236  *   Pointer to the graph object.
237  * @param node
238  *   Pointer to the node object.
239  * @param idx
240  *   Index at which the object enqueue starts from.
241  * @param space
242  *   Space required for the object enqueue.
243  */
244 static __rte_always_inline void
245 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
246 			    const uint16_t idx, const uint16_t space)
247 {
248 
249 	/* Add to the pending stream list if the node is new */
250 	if (idx == 0)
251 		__rte_node_enqueue_tail_update(graph, node);
252 
253 	if (unlikely(node->size < (idx + space)))
254 		__rte_node_stream_alloc_size(graph, node, node->size + space);
255 }
256 
257 /**
258  * @internal
259  *
260  * Get the node pointer from current node edge id.
261  *
262  * @param node
263  *   Current node pointer.
264  * @param next
265  *   Edge id of the required node.
266  *
267  * @return
268  *   Pointer to the node denoted by the edge id.
269  */
270 static __rte_always_inline struct rte_node *
271 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
272 {
273 	RTE_ASSERT(next < node->nb_edges);
274 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
275 	node = node->nodes[next];
276 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
277 
278 	return node;
279 }
280 
281 /**
282  * Enqueue the objs to next node for further processing and set
283  * the next node to pending state in the circular buffer.
284  *
285  * @param graph
286  *   Graph pointer returned from rte_graph_lookup().
287  * @param node
288  *   Current node pointer.
289  * @param next
290  *   Relative next node index to enqueue objs.
291  * @param objs
292  *   Objs to enqueue.
293  * @param nb_objs
294  *   Number of objs to enqueue.
295  */
296 __rte_experimental
297 static inline void
298 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
299 		 rte_edge_t next, void **objs, uint16_t nb_objs)
300 {
301 	node = __rte_node_next_node_get(node, next);
302 	const uint16_t idx = node->idx;
303 
304 	__rte_node_enqueue_prologue(graph, node, idx, nb_objs);
305 
306 	rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
307 	node->idx = idx + nb_objs;
308 }
309 
310 /**
311  * Enqueue only one obj to next node for further processing and
312  * set the next node to pending state in the circular buffer.
313  *
314  * @param graph
315  *   Graph pointer returned from rte_graph_lookup().
316  * @param node
317  *   Current node pointer.
318  * @param next
319  *   Relative next node index to enqueue objs.
320  * @param obj
321  *   Obj to enqueue.
322  */
323 __rte_experimental
324 static inline void
325 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
326 		    rte_edge_t next, void *obj)
327 {
328 	node = __rte_node_next_node_get(node, next);
329 	uint16_t idx = node->idx;
330 
331 	__rte_node_enqueue_prologue(graph, node, idx, 1);
332 
333 	node->objs[idx++] = obj;
334 	node->idx = idx;
335 }
336 
337 /**
338  * Enqueue only two objs to next node for further processing and
339  * set the next node to pending state in the circular buffer.
340  * Same as rte_node_enqueue_x1 but enqueue two objs.
341  *
342  * @param graph
343  *   Graph pointer returned from rte_graph_lookup().
344  * @param node
345  *   Current node pointer.
346  * @param next
347  *   Relative next node index to enqueue objs.
348  * @param obj0
349  *   Obj to enqueue.
350  * @param obj1
351  *   Obj to enqueue.
352  */
353 __rte_experimental
354 static inline void
355 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
356 		    rte_edge_t next, void *obj0, void *obj1)
357 {
358 	node = __rte_node_next_node_get(node, next);
359 	uint16_t idx = node->idx;
360 
361 	__rte_node_enqueue_prologue(graph, node, idx, 2);
362 
363 	node->objs[idx++] = obj0;
364 	node->objs[idx++] = obj1;
365 	node->idx = idx;
366 }
367 
368 /**
369  * Enqueue only four objs to next node for further processing and
370  * set the next node to pending state in the circular buffer.
371  * Same as rte_node_enqueue_x1 but enqueue four objs.
372  *
373  * @param graph
374  *   Graph pointer returned from rte_graph_lookup().
375  * @param node
376  *   Current node pointer.
377  * @param next
378  *   Relative next node index to enqueue objs.
379  * @param obj0
380  *   1st obj to enqueue.
381  * @param obj1
382  *   2nd obj to enqueue.
383  * @param obj2
384  *   3rd obj to enqueue.
385  * @param obj3
386  *   4th obj to enqueue.
387  */
388 __rte_experimental
389 static inline void
390 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
391 		    rte_edge_t next, void *obj0, void *obj1, void *obj2,
392 		    void *obj3)
393 {
394 	node = __rte_node_next_node_get(node, next);
395 	uint16_t idx = node->idx;
396 
397 	__rte_node_enqueue_prologue(graph, node, idx, 4);
398 
399 	node->objs[idx++] = obj0;
400 	node->objs[idx++] = obj1;
401 	node->objs[idx++] = obj2;
402 	node->objs[idx++] = obj3;
403 	node->idx = idx;
404 }
405 
406 /**
407  * Enqueue objs to multiple next nodes for further processing and
408  * set the next nodes to pending state in the circular buffer.
409  * objs[i] will be enqueued to nexts[i].
410  *
411  * @param graph
412  *   Graph pointer returned from rte_graph_lookup().
413  * @param node
414  *   Current node pointer.
415  * @param nexts
416  *   List of relative next node indices to enqueue objs.
417  * @param objs
418  *   List of objs to enqueue.
419  * @param nb_objs
420  *   Number of objs to enqueue.
421  */
422 __rte_experimental
423 static inline void
424 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
425 		      rte_edge_t *nexts, void **objs, uint16_t nb_objs)
426 {
427 	uint16_t i;
428 
429 	for (i = 0; i < nb_objs; i++)
430 		rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
431 }
432 
433 /**
434  * Get the stream of next node to enqueue the objs.
435  * Once done with the updating the objs, needs to call
436  * rte_node_next_stream_put to put the next node to pending state.
437  *
438  * @param graph
439  *   Graph pointer returned from rte_graph_lookup().
440  * @param node
441  *   Current node pointer.
442  * @param next
443  *   Relative next node index to get stream.
444  * @param nb_objs
445  *   Requested free size of the next stream.
446  *
447  * @return
448  *   Valid next stream on success.
449  *
450  * @see rte_node_next_stream_put().
451  */
452 __rte_experimental
453 static inline void **
454 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
455 			 rte_edge_t next, uint16_t nb_objs)
456 {
457 	node = __rte_node_next_node_get(node, next);
458 	const uint16_t idx = node->idx;
459 	uint16_t free_space = node->size - idx;
460 
461 	if (unlikely(free_space < nb_objs))
462 		__rte_node_stream_alloc_size(graph, node, node->size + nb_objs);
463 
464 	return &node->objs[idx];
465 }
466 
467 /**
468  * Put the next stream to pending state in the circular buffer
469  * for further processing. Should be invoked after rte_node_next_stream_get().
470  *
471  * @param graph
472  *   Graph pointer returned from rte_graph_lookup().
473  * @param node
474  *   Current node pointer.
475  * @param next
476  *   Relative next node index..
477  * @param idx
478  *   Number of objs updated in the stream after getting the stream using
479  *   rte_node_next_stream_get.
480  *
481  * @see rte_node_next_stream_get().
482  */
483 __rte_experimental
484 static inline void
485 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
486 			 rte_edge_t next, uint16_t idx)
487 {
488 	if (unlikely(!idx))
489 		return;
490 
491 	node = __rte_node_next_node_get(node, next);
492 	if (node->idx == 0)
493 		__rte_node_enqueue_tail_update(graph, node);
494 
495 	node->idx += idx;
496 }
497 
498 /**
499  * Home run scenario, Enqueue all the objs of current node to next
500  * node in optimized way by swapping the streams of both nodes.
501  * Performs good when next node is already not in pending state.
502  * If next node is already in pending state then normal enqueue
503  * will be used.
504  *
505  * @param graph
506  *   Graph pointer returned from rte_graph_lookup().
507  * @param src
508  *   Current node pointer.
509  * @param next
510  *   Relative next node index.
511  */
512 __rte_experimental
513 static inline void
514 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
515 			  rte_edge_t next)
516 {
517 	struct rte_node *dst = __rte_node_next_node_get(src, next);
518 
519 	/* Let swap the pointers if dst don't have valid objs */
520 	if (likely(dst->idx == 0)) {
521 		void **dobjs = dst->objs;
522 		uint16_t dsz = dst->size;
523 		dst->objs = src->objs;
524 		dst->size = src->size;
525 		src->objs = dobjs;
526 		src->size = dsz;
527 		dst->idx = src->idx;
528 		__rte_node_enqueue_tail_update(graph, dst);
529 	} else { /* Move the objects from src node to dst node */
530 		rte_node_enqueue(graph, src, next, src->objs, src->idx);
531 	}
532 }
533 
534 /**
535  * Test the validity of model.
536  *
537  * @param model
538  *   Model to check.
539  *
540  * @return
541  *   True if graph model is valid, false otherwise.
542  */
543 __rte_experimental
544 bool
545 rte_graph_model_is_valid(uint8_t model);
546 
547 /**
548  * @note This function does not perform any locking, and is only safe to call
549  *    before graph running. It will set all graphs the same model.
550  *
551  * @param model
552  *   Name of the graph worker model.
553  *
554  * @return
555  *   0 on success, -1 otherwise.
556  */
557 __rte_experimental
558 int rte_graph_worker_model_set(uint8_t model);
559 
560 /**
561  * Get the graph worker model
562  *
563  * @note All graph will use the same model and this function will get model from the first one.
564  *    Used for slow path.
565  *
566  * @param graph
567  *   Graph pointer.
568  *
569  * @return
570  *   Graph worker model on success.
571  */
572 __rte_experimental
573 uint8_t rte_graph_worker_model_get(struct rte_graph *graph);
574 
575 /**
576  * Get the graph worker model without check
577  *
578  * @note All graph will use the same model and this function will get model from the first one.
579  *    Used for fast path.
580  *
581  * @param graph
582  *   Graph pointer.
583  *
584  * @return
585  *   Graph worker model on success.
586  */
587 __rte_experimental
588 static __rte_always_inline
589 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph)
590 {
591 	return graph->model;
592 }
593 
594 #ifdef __cplusplus
595 }
596 #endif
597 
598 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */
599