xref: /dpdk/lib/graph/rte_graph_worker_common.h (revision adec2a5ce47fa3fccd82c9796c71eeeb65e99700)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_
6 #define _RTE_GRAPH_WORKER_COMMON_H_
7 
8 /**
9  * @file rte_graph_worker_common.h
10  *
11  * This API allows a worker thread to walk over a graph and nodes to create,
12  * process, enqueue and move streams of objects to the next nodes.
13  */
14 
15 #include <assert.h>
16 #include <stdalign.h>
17 #include <stddef.h>
18 
19 #include <rte_common.h>
20 #include <rte_cycles.h>
21 #include <rte_prefetch.h>
22 #include <rte_memcpy.h>
23 #include <rte_memory.h>
24 
25 #include "rte_graph.h"
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
31 /** Graph worker models */
32 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */
33 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */
34 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1
35 /**< Dispatch model to support cross-core dispatching within core affinity. */
36 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */
37 
38 /**
39  * @internal
40  *
41  * Singly-linked list head for graph schedule run-queue.
42  */
43 SLIST_HEAD(rte_graph_rq_head, rte_graph);
44 
45 /**
46  * @internal
47  *
48  * Data structure to hold graph data.
49  */
50 struct __rte_cache_aligned rte_graph {
51 	/* Fast path area. */
52 	uint32_t tail;		     /**< Tail of circular buffer. */
53 	uint32_t head;		     /**< Head of circular buffer. */
54 	uint32_t cir_mask;	     /**< Circular buffer wrap around mask. */
55 	rte_node_t nb_nodes;	     /**< Number of nodes in the graph. */
56 	rte_graph_off_t *cir_start;  /**< Pointer to circular buffer. */
57 	rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
58 	uint8_t model;		     /**< graph model */
59 	uint8_t reserved1;	     /**< Reserved for future use. */
60 	uint16_t reserved2;	     /**< Reserved for future use. */
61 	union {
62 		/* Fast schedule area for mcore dispatch model */
63 		struct {
64 			alignas(RTE_CACHE_LINE_SIZE) struct rte_graph_rq_head *rq;
65 				/* The run-queue */
66 			struct rte_graph_rq_head rq_head; /* The head for run-queue list */
67 
68 			unsigned int lcore_id;  /**< The graph running Lcore. */
69 			struct rte_ring *wq;    /**< The work-queue for pending streams. */
70 			struct rte_mempool *mp; /**< The mempool for scheduling streams. */
71 		} dispatch; /** Only used by dispatch model */
72 	};
73 	SLIST_ENTRY(rte_graph) next;   /* The next for rte_graph list */
74 	/* End of Fast path area.*/
75 	rte_graph_t id;	/**< Graph identifier. */
76 	int socket;	/**< Socket ID where memory is allocated. */
77 	char name[RTE_GRAPH_NAMESIZE];	/**< Name of the graph. */
78 	bool pcap_enable;	        /**< Pcap trace enabled. */
79 	/** Number of packets captured per core. */
80 	uint64_t nb_pkt_captured;
81 	/** Number of packets to capture per core. */
82 	uint64_t nb_pkt_to_capture;
83 	char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ];  /**< Pcap filename. */
84 	uint64_t fence;			/**< Fence. */
85 };
86 
87 /**
88  * @internal
89  *
90  * Data structure to hold node data.
91  */
92 struct __rte_cache_aligned rte_node {
93 	/* Slow path area  */
94 	uint64_t fence;		/**< Fence. */
95 	rte_graph_off_t next;	/**< Index to next node. */
96 	rte_node_t id;		/**< Node identifier. */
97 	rte_node_t parent_id;	/**< Parent Node identifier. */
98 	rte_edge_t nb_edges;	/**< Number of edges from this node. */
99 	uint32_t realloc_count;	/**< Number of times realloced. */
100 
101 	char parent[RTE_NODE_NAMESIZE];	/**< Parent node name. */
102 	char name[RTE_NODE_NAMESIZE];	/**< Name of the node. */
103 
104 	/** Original process function when pcap is enabled. */
105 	rte_node_process_t original_process;
106 
107 	union {
108 		/* Fast schedule area for mcore dispatch model */
109 		struct {
110 			unsigned int lcore_id;  /**< Node running lcore. */
111 			uint64_t total_sched_objs; /**< Number of objects scheduled. */
112 			uint64_t total_sched_fail; /**< Number of scheduled failure. */
113 		} dispatch;
114 	};
115 	/* Fast path area  */
116 	__extension__ struct __rte_cache_aligned {
117 #define RTE_NODE_CTX_SZ 16
118 		union {
119 			uint8_t ctx[RTE_NODE_CTX_SZ];
120 			__extension__ struct {
121 				void *ctx_ptr;
122 				void *ctx_ptr2;
123 			};
124 		}; /**< Node Context. */
125 		uint16_t size;		/**< Total number of objects available. */
126 		uint16_t idx;		/**< Number of objects used. */
127 		rte_graph_off_t off;	/**< Offset of node in the graph reel. */
128 		uint64_t total_cycles;	/**< Cycles spent in this node. */
129 		uint64_t total_calls;	/**< Calls done to this node. */
130 		uint64_t total_objs;	/**< Objects processed by this node. */
131 		union {
132 			void **objs;	   /**< Array of object pointers. */
133 			uint64_t objs_u64;
134 		};
135 		union {
136 			rte_node_process_t process; /**< Process function. */
137 			uint64_t process_u64;
138 		};
139 		alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */
140 	};
141 };
142 
143 static_assert(offsetof(struct rte_node, nodes) - offsetof(struct rte_node, ctx)
144 	== RTE_CACHE_LINE_MIN_SIZE, "rte_node fast path area must fit in 64 bytes");
145 
146 /**
147  * @internal
148  *
149  * Allocate a stream of objects.
150  *
151  * If stream already exists then re-allocate it to a larger size.
152  *
153  * @param graph
154  *   Pointer to the graph object.
155  * @param node
156  *   Pointer to the node object.
157  */
158 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
159 
160 /**
161  * @internal
162  *
163  * Allocate a stream with requested number of objects.
164  *
165  * If stream already exists then re-allocate it to a larger size.
166  *
167  * @param graph
168  *   Pointer to the graph object.
169  * @param node
170  *   Pointer to the node object.
171  * @param req_size
172  *   Number of objects to be allocated.
173  */
174 void __rte_node_stream_alloc_size(struct rte_graph *graph,
175 				  struct rte_node *node, uint16_t req_size);
176 
177 /* Fast path helper functions */
178 
179 /**
180  * @internal
181  *
182  * Enqueue a given node to the tail of the graph reel.
183  *
184  * @param graph
185  *   Pointer Graph object.
186  * @param node
187  *   Pointer to node object to be enqueued.
188  */
189 static __rte_always_inline void
190 __rte_node_process(struct rte_graph *graph, struct rte_node *node)
191 {
192 	uint64_t start;
193 	uint16_t rc;
194 	void **objs;
195 
196 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
197 	objs = node->objs;
198 	rte_prefetch0(objs);
199 
200 	if (rte_graph_has_stats_feature()) {
201 		start = rte_rdtsc();
202 		rc = node->process(graph, node, objs, node->idx);
203 		node->total_cycles += rte_rdtsc() - start;
204 		node->total_calls++;
205 		node->total_objs += rc;
206 	} else {
207 		node->process(graph, node, objs, node->idx);
208 	}
209 	node->idx = 0;
210 }
211 
212 /**
213  * @internal
214  *
215  * Enqueue a given node to the tail of the graph reel.
216  *
217  * @param graph
218  *   Pointer Graph object.
219  * @param node
220  *   Pointer to node object to be enqueued.
221  */
222 static __rte_always_inline void
223 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
224 {
225 	uint32_t tail;
226 
227 	tail = graph->tail;
228 	graph->cir_start[tail++] = node->off;
229 	graph->tail = tail & graph->cir_mask;
230 }
231 
232 /**
233  * @internal
234  *
235  * Enqueue sequence prologue function.
236  *
237  * Updates the node to tail of graph reel and resizes the number of objects
238  * available in the stream as needed.
239  *
240  * @param graph
241  *   Pointer to the graph object.
242  * @param node
243  *   Pointer to the node object.
244  * @param idx
245  *   Index at which the object enqueue starts from.
246  * @param space
247  *   Space required for the object enqueue.
248  */
249 static __rte_always_inline void
250 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
251 			    const uint16_t idx, const uint16_t space)
252 {
253 
254 	/* Add to the pending stream list if the node is new */
255 	if (idx == 0)
256 		__rte_node_enqueue_tail_update(graph, node);
257 
258 	if (unlikely(node->size < (idx + space)))
259 		__rte_node_stream_alloc_size(graph, node, node->size + space);
260 }
261 
262 /**
263  * @internal
264  *
265  * Get the node pointer from current node edge id.
266  *
267  * @param node
268  *   Current node pointer.
269  * @param next
270  *   Edge id of the required node.
271  *
272  * @return
273  *   Pointer to the node denoted by the edge id.
274  */
275 static __rte_always_inline struct rte_node *
276 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
277 {
278 	RTE_ASSERT(next < node->nb_edges);
279 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
280 	node = node->nodes[next];
281 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
282 
283 	return node;
284 }
285 
286 /**
287  * Enqueue the objs to next node for further processing and set
288  * the next node to pending state in the circular buffer.
289  *
290  * @param graph
291  *   Graph pointer returned from rte_graph_lookup().
292  * @param node
293  *   Current node pointer.
294  * @param next
295  *   Relative next node index to enqueue objs.
296  * @param objs
297  *   Objs to enqueue.
298  * @param nb_objs
299  *   Number of objs to enqueue.
300  */
301 static inline void
302 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
303 		 rte_edge_t next, void **objs, uint16_t nb_objs)
304 {
305 	node = __rte_node_next_node_get(node, next);
306 	const uint16_t idx = node->idx;
307 
308 	__rte_node_enqueue_prologue(graph, node, idx, nb_objs);
309 
310 	rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
311 	node->idx = idx + nb_objs;
312 }
313 
314 /**
315  * Enqueue only one obj to next node for further processing and
316  * set the next node to pending state in the circular buffer.
317  *
318  * @param graph
319  *   Graph pointer returned from rte_graph_lookup().
320  * @param node
321  *   Current node pointer.
322  * @param next
323  *   Relative next node index to enqueue objs.
324  * @param obj
325  *   Obj to enqueue.
326  */
327 static inline void
328 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
329 		    rte_edge_t next, void *obj)
330 {
331 	node = __rte_node_next_node_get(node, next);
332 	uint16_t idx = node->idx;
333 
334 	__rte_node_enqueue_prologue(graph, node, idx, 1);
335 
336 	node->objs[idx++] = obj;
337 	node->idx = idx;
338 }
339 
340 /**
341  * Enqueue only two objs to next node for further processing and
342  * set the next node to pending state in the circular buffer.
343  * Same as rte_node_enqueue_x1 but enqueue two objs.
344  *
345  * @param graph
346  *   Graph pointer returned from rte_graph_lookup().
347  * @param node
348  *   Current node pointer.
349  * @param next
350  *   Relative next node index to enqueue objs.
351  * @param obj0
352  *   Obj to enqueue.
353  * @param obj1
354  *   Obj to enqueue.
355  */
356 static inline void
357 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
358 		    rte_edge_t next, void *obj0, void *obj1)
359 {
360 	node = __rte_node_next_node_get(node, next);
361 	uint16_t idx = node->idx;
362 
363 	__rte_node_enqueue_prologue(graph, node, idx, 2);
364 
365 	node->objs[idx++] = obj0;
366 	node->objs[idx++] = obj1;
367 	node->idx = idx;
368 }
369 
370 /**
371  * Enqueue only four objs to next node for further processing and
372  * set the next node to pending state in the circular buffer.
373  * Same as rte_node_enqueue_x1 but enqueue four objs.
374  *
375  * @param graph
376  *   Graph pointer returned from rte_graph_lookup().
377  * @param node
378  *   Current node pointer.
379  * @param next
380  *   Relative next node index to enqueue objs.
381  * @param obj0
382  *   1st obj to enqueue.
383  * @param obj1
384  *   2nd obj to enqueue.
385  * @param obj2
386  *   3rd obj to enqueue.
387  * @param obj3
388  *   4th obj to enqueue.
389  */
390 static inline void
391 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
392 		    rte_edge_t next, void *obj0, void *obj1, void *obj2,
393 		    void *obj3)
394 {
395 	node = __rte_node_next_node_get(node, next);
396 	uint16_t idx = node->idx;
397 
398 	__rte_node_enqueue_prologue(graph, node, idx, 4);
399 
400 	node->objs[idx++] = obj0;
401 	node->objs[idx++] = obj1;
402 	node->objs[idx++] = obj2;
403 	node->objs[idx++] = obj3;
404 	node->idx = idx;
405 }
406 
407 /**
408  * Enqueue objs to multiple next nodes for further processing and
409  * set the next nodes to pending state in the circular buffer.
410  * objs[i] will be enqueued to nexts[i].
411  *
412  * @param graph
413  *   Graph pointer returned from rte_graph_lookup().
414  * @param node
415  *   Current node pointer.
416  * @param nexts
417  *   List of relative next node indices to enqueue objs.
418  * @param objs
419  *   List of objs to enqueue.
420  * @param nb_objs
421  *   Number of objs to enqueue.
422  */
423 static inline void
424 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
425 		      rte_edge_t *nexts, void **objs, uint16_t nb_objs)
426 {
427 	uint16_t i;
428 
429 	for (i = 0; i < nb_objs; i++)
430 		rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
431 }
432 
433 /**
434  * Get the stream of next node to enqueue the objs.
435  * Once done with the updating the objs, needs to call
436  * rte_node_next_stream_put to put the next node to pending state.
437  *
438  * @param graph
439  *   Graph pointer returned from rte_graph_lookup().
440  * @param node
441  *   Current node pointer.
442  * @param next
443  *   Relative next node index to get stream.
444  * @param nb_objs
445  *   Requested free size of the next stream.
446  *
447  * @return
448  *   Valid next stream on success.
449  *
450  * @see rte_node_next_stream_put().
451  */
452 static inline void **
453 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
454 			 rte_edge_t next, uint16_t nb_objs)
455 {
456 	node = __rte_node_next_node_get(node, next);
457 	const uint16_t idx = node->idx;
458 	uint16_t free_space = node->size - idx;
459 
460 	if (unlikely(free_space < nb_objs))
461 		__rte_node_stream_alloc_size(graph, node, node->size + nb_objs);
462 
463 	return &node->objs[idx];
464 }
465 
466 /**
467  * Put the next stream to pending state in the circular buffer
468  * for further processing. Should be invoked after rte_node_next_stream_get().
469  *
470  * @param graph
471  *   Graph pointer returned from rte_graph_lookup().
472  * @param node
473  *   Current node pointer.
474  * @param next
475  *   Relative next node index..
476  * @param idx
477  *   Number of objs updated in the stream after getting the stream using
478  *   rte_node_next_stream_get.
479  *
480  * @see rte_node_next_stream_get().
481  */
482 static inline void
483 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
484 			 rte_edge_t next, uint16_t idx)
485 {
486 	if (unlikely(!idx))
487 		return;
488 
489 	node = __rte_node_next_node_get(node, next);
490 	if (node->idx == 0)
491 		__rte_node_enqueue_tail_update(graph, node);
492 
493 	node->idx += idx;
494 }
495 
496 /**
497  * Home run scenario, Enqueue all the objs of current node to next
498  * node in optimized way by swapping the streams of both nodes.
499  * Performs good when next node is already not in pending state.
500  * If next node is already in pending state then normal enqueue
501  * will be used.
502  *
503  * @param graph
504  *   Graph pointer returned from rte_graph_lookup().
505  * @param src
506  *   Current node pointer.
507  * @param next
508  *   Relative next node index.
509  */
510 static inline void
511 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
512 			  rte_edge_t next)
513 {
514 	struct rte_node *dst = __rte_node_next_node_get(src, next);
515 
516 	/* Let swap the pointers if dst don't have valid objs */
517 	if (likely(dst->idx == 0)) {
518 		void **dobjs = dst->objs;
519 		uint16_t dsz = dst->size;
520 		dst->objs = src->objs;
521 		dst->size = src->size;
522 		src->objs = dobjs;
523 		src->size = dsz;
524 		dst->idx = src->idx;
525 		__rte_node_enqueue_tail_update(graph, dst);
526 	} else { /* Move the objects from src node to dst node */
527 		rte_node_enqueue(graph, src, next, src->objs, src->idx);
528 	}
529 }
530 
531 /**
532  * Test the validity of model.
533  *
534  * @param model
535  *   Model to check.
536  *
537  * @return
538  *   True if graph model is valid, false otherwise.
539  */
540 bool
541 rte_graph_model_is_valid(uint8_t model);
542 
543 /**
544  * @note This function does not perform any locking, and is only safe to call
545  *    before graph running. It will set all graphs the same model.
546  *
547  * @param model
548  *   Name of the graph worker model.
549  *
550  * @return
551  *   0 on success, -1 otherwise.
552  */
553 int rte_graph_worker_model_set(uint8_t model);
554 
555 /**
556  * Get the graph worker model
557  *
558  * @note All graph will use the same model and this function will get model from the first one.
559  *    Used for slow path.
560  *
561  * @param graph
562  *   Graph pointer.
563  *
564  * @return
565  *   Graph worker model on success.
566  */
567 uint8_t rte_graph_worker_model_get(struct rte_graph *graph);
568 
569 /**
570  * Get the graph worker model without check
571  *
572  * @note All graph will use the same model and this function will get model from the first one.
573  *    Used for fast path.
574  *
575  * @param graph
576  *   Graph pointer.
577  *
578  * @return
579  *   Graph worker model on success.
580  */
581 static __rte_always_inline
582 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph)
583 {
584 	return graph->model;
585 }
586 
587 #ifdef __cplusplus
588 }
589 #endif
590 
591 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */
592