xref: /dpdk/lib/graph/rte_graph_worker_common.h (revision 5d52418fa4b9a7f28eaedc1d88ec5cf330381c0e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_
6 #define _RTE_GRAPH_WORKER_COMMON_H_
7 
8 /**
9  * @file rte_graph_worker_common.h
10  *
11  * @warning
12  * @b EXPERIMENTAL:
13  * All functions in this file may be changed or removed without prior notice.
14  *
15  * This API allows a worker thread to walk over a graph and nodes to create,
16  * process, enqueue and move streams of objects to the next nodes.
17  */
18 
19 #include <rte_compat.h>
20 #include <rte_common.h>
21 #include <rte_cycles.h>
22 #include <rte_prefetch.h>
23 #include <rte_memcpy.h>
24 #include <rte_memory.h>
25 
26 #include "rte_graph.h"
27 
28 #ifdef __cplusplus
29 extern "C" {
30 #endif
31 
32 /** Graph worker models */
33 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */
34 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */
35 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1
36 /**< Dispatch model to support cross-core dispatching within core affinity. */
37 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */
38 
39 /**
40  * @internal
41  *
42  * Singly-linked list head for graph schedule run-queue.
43  */
44 SLIST_HEAD(rte_graph_rq_head, rte_graph);
45 
46 /**
47  * @internal
48  *
49  * Data structure to hold graph data.
50  */
51 struct rte_graph {
52 	/* Fast path area. */
53 	uint32_t tail;		     /**< Tail of circular buffer. */
54 	uint32_t head;		     /**< Head of circular buffer. */
55 	uint32_t cir_mask;	     /**< Circular buffer wrap around mask. */
56 	rte_node_t nb_nodes;	     /**< Number of nodes in the graph. */
57 	rte_graph_off_t *cir_start;  /**< Pointer to circular buffer. */
58 	rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
59 	uint8_t model;		     /**< graph model */
60 	uint8_t reserved1;	     /**< Reserved for future use. */
61 	uint16_t reserved2;	     /**< Reserved for future use. */
62 	union {
63 		/* Fast schedule area for mcore dispatch model */
64 		struct {
65 			struct rte_graph_rq_head *rq __rte_cache_aligned; /* The run-queue */
66 			struct rte_graph_rq_head rq_head; /* The head for run-queue list */
67 
68 			unsigned int lcore_id;  /**< The graph running Lcore. */
69 			struct rte_ring *wq;    /**< The work-queue for pending streams. */
70 			struct rte_mempool *mp; /**< The mempool for scheduling streams. */
71 		} dispatch; /** Only used by dispatch model */
72 	};
73 	SLIST_ENTRY(rte_graph) next;   /* The next for rte_graph list */
74 	/* End of Fast path area.*/
75 	rte_graph_t id;	/**< Graph identifier. */
76 	int socket;	/**< Socket ID where memory is allocated. */
77 	char name[RTE_GRAPH_NAMESIZE];	/**< Name of the graph. */
78 	bool pcap_enable;	        /**< Pcap trace enabled. */
79 	/** Number of packets captured per core. */
80 	uint64_t nb_pkt_captured;
81 	/** Number of packets to capture per core. */
82 	uint64_t nb_pkt_to_capture;
83 	char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ];  /**< Pcap filename. */
84 	uint64_t fence;			/**< Fence. */
85 } __rte_cache_aligned;
86 
87 /**
88  * @internal
89  *
90  * Data structure to hold node data.
91  */
92 struct rte_node {
93 	/* Slow path area  */
94 	uint64_t fence;		/**< Fence. */
95 	rte_graph_off_t next;	/**< Index to next node. */
96 	rte_node_t id;		/**< Node identifier. */
97 	rte_node_t parent_id;	/**< Parent Node identifier. */
98 	rte_edge_t nb_edges;	/**< Number of edges from this node. */
99 	uint32_t realloc_count;	/**< Number of times realloced. */
100 
101 	char parent[RTE_NODE_NAMESIZE];	/**< Parent node name. */
102 	char name[RTE_NODE_NAMESIZE];	/**< Name of the node. */
103 
104 	/** Original process function when pcap is enabled. */
105 	rte_node_process_t original_process;
106 
107 	union {
108 		/* Fast schedule area for mcore dispatch model */
109 		struct {
110 			unsigned int lcore_id;  /**< Node running lcore. */
111 			uint64_t total_sched_objs; /**< Number of objects scheduled. */
112 			uint64_t total_sched_fail; /**< Number of scheduled failure. */
113 		} dispatch;
114 	};
115 	/* Fast path area  */
116 #define RTE_NODE_CTX_SZ 16
117 	uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned; /**< Node Context. */
118 	uint16_t size;		/**< Total number of objects available. */
119 	uint16_t idx;		/**< Number of objects used. */
120 	rte_graph_off_t off;	/**< Offset of node in the graph reel. */
121 	uint64_t total_cycles;	/**< Cycles spent in this node. */
122 	uint64_t total_calls;	/**< Calls done to this node. */
123 	uint64_t total_objs;	/**< Objects processed by this node. */
124 		union {
125 			void **objs;	   /**< Array of object pointers. */
126 			uint64_t objs_u64;
127 		};
128 		union {
129 			rte_node_process_t process; /**< Process function. */
130 			uint64_t process_u64;
131 		};
132 	struct rte_node *nodes[] __rte_cache_min_aligned; /**< Next nodes. */
133 } __rte_cache_aligned;
134 
135 /**
136  * @internal
137  *
138  * Allocate a stream of objects.
139  *
140  * If stream already exists then re-allocate it to a larger size.
141  *
142  * @param graph
143  *   Pointer to the graph object.
144  * @param node
145  *   Pointer to the node object.
146  */
147 __rte_experimental
148 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
149 
150 /**
151  * @internal
152  *
153  * Allocate a stream with requested number of objects.
154  *
155  * If stream already exists then re-allocate it to a larger size.
156  *
157  * @param graph
158  *   Pointer to the graph object.
159  * @param node
160  *   Pointer to the node object.
161  * @param req_size
162  *   Number of objects to be allocated.
163  */
164 __rte_experimental
165 void __rte_node_stream_alloc_size(struct rte_graph *graph,
166 				  struct rte_node *node, uint16_t req_size);
167 
168 /* Fast path helper functions */
169 
170 /**
171  * @internal
172  *
173  * Enqueue a given node to the tail of the graph reel.
174  *
175  * @param graph
176  *   Pointer Graph object.
177  * @param node
178  *   Pointer to node object to be enqueued.
179  */
180 static __rte_always_inline void
181 __rte_node_process(struct rte_graph *graph, struct rte_node *node)
182 {
183 	uint64_t start;
184 	uint16_t rc;
185 	void **objs;
186 
187 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
188 	objs = node->objs;
189 	rte_prefetch0(objs);
190 
191 	if (rte_graph_has_stats_feature()) {
192 		start = rte_rdtsc();
193 		rc = node->process(graph, node, objs, node->idx);
194 		node->total_cycles += rte_rdtsc() - start;
195 		node->total_calls++;
196 		node->total_objs += rc;
197 	} else {
198 		node->process(graph, node, objs, node->idx);
199 	}
200 	node->idx = 0;
201 }
202 
203 /**
204  * @internal
205  *
206  * Enqueue a given node to the tail of the graph reel.
207  *
208  * @param graph
209  *   Pointer Graph object.
210  * @param node
211  *   Pointer to node object to be enqueued.
212  */
213 static __rte_always_inline void
214 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
215 {
216 	uint32_t tail;
217 
218 	tail = graph->tail;
219 	graph->cir_start[tail++] = node->off;
220 	graph->tail = tail & graph->cir_mask;
221 }
222 
223 /**
224  * @internal
225  *
226  * Enqueue sequence prologue function.
227  *
228  * Updates the node to tail of graph reel and resizes the number of objects
229  * available in the stream as needed.
230  *
231  * @param graph
232  *   Pointer to the graph object.
233  * @param node
234  *   Pointer to the node object.
235  * @param idx
236  *   Index at which the object enqueue starts from.
237  * @param space
238  *   Space required for the object enqueue.
239  */
240 static __rte_always_inline void
241 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
242 			    const uint16_t idx, const uint16_t space)
243 {
244 
245 	/* Add to the pending stream list if the node is new */
246 	if (idx == 0)
247 		__rte_node_enqueue_tail_update(graph, node);
248 
249 	if (unlikely(node->size < (idx + space)))
250 		__rte_node_stream_alloc_size(graph, node, node->size + space);
251 }
252 
253 /**
254  * @internal
255  *
256  * Get the node pointer from current node edge id.
257  *
258  * @param node
259  *   Current node pointer.
260  * @param next
261  *   Edge id of the required node.
262  *
263  * @return
264  *   Pointer to the node denoted by the edge id.
265  */
266 static __rte_always_inline struct rte_node *
267 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
268 {
269 	RTE_ASSERT(next < node->nb_edges);
270 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
271 	node = node->nodes[next];
272 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
273 
274 	return node;
275 }
276 
277 /**
278  * Enqueue the objs to next node for further processing and set
279  * the next node to pending state in the circular buffer.
280  *
281  * @param graph
282  *   Graph pointer returned from rte_graph_lookup().
283  * @param node
284  *   Current node pointer.
285  * @param next
286  *   Relative next node index to enqueue objs.
287  * @param objs
288  *   Objs to enqueue.
289  * @param nb_objs
290  *   Number of objs to enqueue.
291  */
292 __rte_experimental
293 static inline void
294 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
295 		 rte_edge_t next, void **objs, uint16_t nb_objs)
296 {
297 	node = __rte_node_next_node_get(node, next);
298 	const uint16_t idx = node->idx;
299 
300 	__rte_node_enqueue_prologue(graph, node, idx, nb_objs);
301 
302 	rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
303 	node->idx = idx + nb_objs;
304 }
305 
306 /**
307  * Enqueue only one obj to next node for further processing and
308  * set the next node to pending state in the circular buffer.
309  *
310  * @param graph
311  *   Graph pointer returned from rte_graph_lookup().
312  * @param node
313  *   Current node pointer.
314  * @param next
315  *   Relative next node index to enqueue objs.
316  * @param obj
317  *   Obj to enqueue.
318  */
319 __rte_experimental
320 static inline void
321 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
322 		    rte_edge_t next, void *obj)
323 {
324 	node = __rte_node_next_node_get(node, next);
325 	uint16_t idx = node->idx;
326 
327 	__rte_node_enqueue_prologue(graph, node, idx, 1);
328 
329 	node->objs[idx++] = obj;
330 	node->idx = idx;
331 }
332 
333 /**
334  * Enqueue only two objs to next node for further processing and
335  * set the next node to pending state in the circular buffer.
336  * Same as rte_node_enqueue_x1 but enqueue two objs.
337  *
338  * @param graph
339  *   Graph pointer returned from rte_graph_lookup().
340  * @param node
341  *   Current node pointer.
342  * @param next
343  *   Relative next node index to enqueue objs.
344  * @param obj0
345  *   Obj to enqueue.
346  * @param obj1
347  *   Obj to enqueue.
348  */
349 __rte_experimental
350 static inline void
351 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
352 		    rte_edge_t next, void *obj0, void *obj1)
353 {
354 	node = __rte_node_next_node_get(node, next);
355 	uint16_t idx = node->idx;
356 
357 	__rte_node_enqueue_prologue(graph, node, idx, 2);
358 
359 	node->objs[idx++] = obj0;
360 	node->objs[idx++] = obj1;
361 	node->idx = idx;
362 }
363 
364 /**
365  * Enqueue only four objs to next node for further processing and
366  * set the next node to pending state in the circular buffer.
367  * Same as rte_node_enqueue_x1 but enqueue four objs.
368  *
369  * @param graph
370  *   Graph pointer returned from rte_graph_lookup().
371  * @param node
372  *   Current node pointer.
373  * @param next
374  *   Relative next node index to enqueue objs.
375  * @param obj0
376  *   1st obj to enqueue.
377  * @param obj1
378  *   2nd obj to enqueue.
379  * @param obj2
380  *   3rd obj to enqueue.
381  * @param obj3
382  *   4th obj to enqueue.
383  */
384 __rte_experimental
385 static inline void
386 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
387 		    rte_edge_t next, void *obj0, void *obj1, void *obj2,
388 		    void *obj3)
389 {
390 	node = __rte_node_next_node_get(node, next);
391 	uint16_t idx = node->idx;
392 
393 	__rte_node_enqueue_prologue(graph, node, idx, 4);
394 
395 	node->objs[idx++] = obj0;
396 	node->objs[idx++] = obj1;
397 	node->objs[idx++] = obj2;
398 	node->objs[idx++] = obj3;
399 	node->idx = idx;
400 }
401 
402 /**
403  * Enqueue objs to multiple next nodes for further processing and
404  * set the next nodes to pending state in the circular buffer.
405  * objs[i] will be enqueued to nexts[i].
406  *
407  * @param graph
408  *   Graph pointer returned from rte_graph_lookup().
409  * @param node
410  *   Current node pointer.
411  * @param nexts
412  *   List of relative next node indices to enqueue objs.
413  * @param objs
414  *   List of objs to enqueue.
415  * @param nb_objs
416  *   Number of objs to enqueue.
417  */
418 __rte_experimental
419 static inline void
420 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
421 		      rte_edge_t *nexts, void **objs, uint16_t nb_objs)
422 {
423 	uint16_t i;
424 
425 	for (i = 0; i < nb_objs; i++)
426 		rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
427 }
428 
429 /**
430  * Get the stream of next node to enqueue the objs.
431  * Once done with the updating the objs, needs to call
432  * rte_node_next_stream_put to put the next node to pending state.
433  *
434  * @param graph
435  *   Graph pointer returned from rte_graph_lookup().
436  * @param node
437  *   Current node pointer.
438  * @param next
439  *   Relative next node index to get stream.
440  * @param nb_objs
441  *   Requested free size of the next stream.
442  *
443  * @return
444  *   Valid next stream on success.
445  *
446  * @see rte_node_next_stream_put().
447  */
448 __rte_experimental
449 static inline void **
450 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
451 			 rte_edge_t next, uint16_t nb_objs)
452 {
453 	node = __rte_node_next_node_get(node, next);
454 	const uint16_t idx = node->idx;
455 	uint16_t free_space = node->size - idx;
456 
457 	if (unlikely(free_space < nb_objs))
458 		__rte_node_stream_alloc_size(graph, node, node->size + nb_objs);
459 
460 	return &node->objs[idx];
461 }
462 
463 /**
464  * Put the next stream to pending state in the circular buffer
465  * for further processing. Should be invoked after rte_node_next_stream_get().
466  *
467  * @param graph
468  *   Graph pointer returned from rte_graph_lookup().
469  * @param node
470  *   Current node pointer.
471  * @param next
472  *   Relative next node index..
473  * @param idx
474  *   Number of objs updated in the stream after getting the stream using
475  *   rte_node_next_stream_get.
476  *
477  * @see rte_node_next_stream_get().
478  */
479 __rte_experimental
480 static inline void
481 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
482 			 rte_edge_t next, uint16_t idx)
483 {
484 	if (unlikely(!idx))
485 		return;
486 
487 	node = __rte_node_next_node_get(node, next);
488 	if (node->idx == 0)
489 		__rte_node_enqueue_tail_update(graph, node);
490 
491 	node->idx += idx;
492 }
493 
494 /**
495  * Home run scenario, Enqueue all the objs of current node to next
496  * node in optimized way by swapping the streams of both nodes.
497  * Performs good when next node is already not in pending state.
498  * If next node is already in pending state then normal enqueue
499  * will be used.
500  *
501  * @param graph
502  *   Graph pointer returned from rte_graph_lookup().
503  * @param src
504  *   Current node pointer.
505  * @param next
506  *   Relative next node index.
507  */
508 __rte_experimental
509 static inline void
510 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
511 			  rte_edge_t next)
512 {
513 	struct rte_node *dst = __rte_node_next_node_get(src, next);
514 
515 	/* Let swap the pointers if dst don't have valid objs */
516 	if (likely(dst->idx == 0)) {
517 		void **dobjs = dst->objs;
518 		uint16_t dsz = dst->size;
519 		dst->objs = src->objs;
520 		dst->size = src->size;
521 		src->objs = dobjs;
522 		src->size = dsz;
523 		dst->idx = src->idx;
524 		__rte_node_enqueue_tail_update(graph, dst);
525 	} else { /* Move the objects from src node to dst node */
526 		rte_node_enqueue(graph, src, next, src->objs, src->idx);
527 	}
528 }
529 
530 /**
531  * Test the validity of model.
532  *
533  * @param model
534  *   Model to check.
535  *
536  * @return
537  *   True if graph model is valid, false otherwise.
538  */
539 __rte_experimental
540 bool
541 rte_graph_model_is_valid(uint8_t model);
542 
543 /**
544  * @note This function does not perform any locking, and is only safe to call
545  *    before graph running. It will set all graphs the same model.
546  *
547  * @param model
548  *   Name of the graph worker model.
549  *
550  * @return
551  *   0 on success, -1 otherwise.
552  */
553 __rte_experimental
554 int rte_graph_worker_model_set(uint8_t model);
555 
556 /**
557  * Get the graph worker model
558  *
559  * @note All graph will use the same model and this function will get model from the first one.
560  *    Used for slow path.
561  *
562  * @param graph
563  *   Graph pointer.
564  *
565  * @return
566  *   Graph worker model on success.
567  */
568 __rte_experimental
569 uint8_t rte_graph_worker_model_get(struct rte_graph *graph);
570 
571 /**
572  * Get the graph worker model without check
573  *
574  * @note All graph will use the same model and this function will get model from the first one.
575  *    Used for fast path.
576  *
577  * @param graph
578  *   Graph pointer.
579  *
580  * @return
581  *   Graph worker model on success.
582  */
583 __rte_experimental
584 static __rte_always_inline
585 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph)
586 {
587 	return graph->model;
588 }
589 
590 #ifdef __cplusplus
591 }
592 #endif
593 
594 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */
595