xref: /dpdk/lib/graph/rte_graph_worker_common.h (revision 3c4898ef762eeb2578b9ae3d7f6e3a0e5cbca8c8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_
6 #define _RTE_GRAPH_WORKER_COMMON_H_
7 
8 /**
9  * @file rte_graph_worker_common.h
10  *
11  * This API allows a worker thread to walk over a graph and nodes to create,
12  * process, enqueue and move streams of objects to the next nodes.
13  */
14 
15 #include <rte_common.h>
16 #include <rte_cycles.h>
17 #include <rte_prefetch.h>
18 #include <rte_memcpy.h>
19 #include <rte_memory.h>
20 
21 #include "rte_graph.h"
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 
27 /** Graph worker models */
28 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */
29 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */
30 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1
31 /**< Dispatch model to support cross-core dispatching within core affinity. */
32 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */
33 
34 /**
35  * @internal
36  *
37  * Singly-linked list head for graph schedule run-queue.
38  */
39 SLIST_HEAD(rte_graph_rq_head, rte_graph);
40 
41 /**
42  * @internal
43  *
44  * Data structure to hold graph data.
45  */
46 struct rte_graph {
47 	/* Fast path area. */
48 	uint32_t tail;		     /**< Tail of circular buffer. */
49 	uint32_t head;		     /**< Head of circular buffer. */
50 	uint32_t cir_mask;	     /**< Circular buffer wrap around mask. */
51 	rte_node_t nb_nodes;	     /**< Number of nodes in the graph. */
52 	rte_graph_off_t *cir_start;  /**< Pointer to circular buffer. */
53 	rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
54 	uint8_t model;		     /**< graph model */
55 	uint8_t reserved1;	     /**< Reserved for future use. */
56 	uint16_t reserved2;	     /**< Reserved for future use. */
57 	union {
58 		/* Fast schedule area for mcore dispatch model */
59 		struct {
60 			struct rte_graph_rq_head *rq __rte_cache_aligned; /* The run-queue */
61 			struct rte_graph_rq_head rq_head; /* The head for run-queue list */
62 
63 			unsigned int lcore_id;  /**< The graph running Lcore. */
64 			struct rte_ring *wq;    /**< The work-queue for pending streams. */
65 			struct rte_mempool *mp; /**< The mempool for scheduling streams. */
66 		} dispatch; /** Only used by dispatch model */
67 	};
68 	SLIST_ENTRY(rte_graph) next;   /* The next for rte_graph list */
69 	/* End of Fast path area.*/
70 	rte_graph_t id;	/**< Graph identifier. */
71 	int socket;	/**< Socket ID where memory is allocated. */
72 	char name[RTE_GRAPH_NAMESIZE];	/**< Name of the graph. */
73 	bool pcap_enable;	        /**< Pcap trace enabled. */
74 	/** Number of packets captured per core. */
75 	uint64_t nb_pkt_captured;
76 	/** Number of packets to capture per core. */
77 	uint64_t nb_pkt_to_capture;
78 	char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ];  /**< Pcap filename. */
79 	uint64_t fence;			/**< Fence. */
80 } __rte_cache_aligned;
81 
82 /**
83  * @internal
84  *
85  * Data structure to hold node data.
86  */
87 struct rte_node {
88 	/* Slow path area  */
89 	uint64_t fence;		/**< Fence. */
90 	rte_graph_off_t next;	/**< Index to next node. */
91 	rte_node_t id;		/**< Node identifier. */
92 	rte_node_t parent_id;	/**< Parent Node identifier. */
93 	rte_edge_t nb_edges;	/**< Number of edges from this node. */
94 	uint32_t realloc_count;	/**< Number of times realloced. */
95 
96 	char parent[RTE_NODE_NAMESIZE];	/**< Parent node name. */
97 	char name[RTE_NODE_NAMESIZE];	/**< Name of the node. */
98 
99 	/** Original process function when pcap is enabled. */
100 	rte_node_process_t original_process;
101 
102 	union {
103 		/* Fast schedule area for mcore dispatch model */
104 		struct {
105 			unsigned int lcore_id;  /**< Node running lcore. */
106 			uint64_t total_sched_objs; /**< Number of objects scheduled. */
107 			uint64_t total_sched_fail; /**< Number of scheduled failure. */
108 		} dispatch;
109 	};
110 	/* Fast path area  */
111 #define RTE_NODE_CTX_SZ 16
112 	uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned; /**< Node Context. */
113 	uint16_t size;		/**< Total number of objects available. */
114 	uint16_t idx;		/**< Number of objects used. */
115 	rte_graph_off_t off;	/**< Offset of node in the graph reel. */
116 	uint64_t total_cycles;	/**< Cycles spent in this node. */
117 	uint64_t total_calls;	/**< Calls done to this node. */
118 	uint64_t total_objs;	/**< Objects processed by this node. */
119 		union {
120 			void **objs;	   /**< Array of object pointers. */
121 			uint64_t objs_u64;
122 		};
123 		union {
124 			rte_node_process_t process; /**< Process function. */
125 			uint64_t process_u64;
126 		};
127 	struct rte_node *nodes[] __rte_cache_min_aligned; /**< Next nodes. */
128 } __rte_cache_aligned;
129 
130 /**
131  * @internal
132  *
133  * Allocate a stream of objects.
134  *
135  * If stream already exists then re-allocate it to a larger size.
136  *
137  * @param graph
138  *   Pointer to the graph object.
139  * @param node
140  *   Pointer to the node object.
141  */
142 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
143 
144 /**
145  * @internal
146  *
147  * Allocate a stream with requested number of objects.
148  *
149  * If stream already exists then re-allocate it to a larger size.
150  *
151  * @param graph
152  *   Pointer to the graph object.
153  * @param node
154  *   Pointer to the node object.
155  * @param req_size
156  *   Number of objects to be allocated.
157  */
158 void __rte_node_stream_alloc_size(struct rte_graph *graph,
159 				  struct rte_node *node, uint16_t req_size);
160 
161 /* Fast path helper functions */
162 
163 /**
164  * @internal
165  *
166  * Enqueue a given node to the tail of the graph reel.
167  *
168  * @param graph
169  *   Pointer Graph object.
170  * @param node
171  *   Pointer to node object to be enqueued.
172  */
173 static __rte_always_inline void
174 __rte_node_process(struct rte_graph *graph, struct rte_node *node)
175 {
176 	uint64_t start;
177 	uint16_t rc;
178 	void **objs;
179 
180 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
181 	objs = node->objs;
182 	rte_prefetch0(objs);
183 
184 	if (rte_graph_has_stats_feature()) {
185 		start = rte_rdtsc();
186 		rc = node->process(graph, node, objs, node->idx);
187 		node->total_cycles += rte_rdtsc() - start;
188 		node->total_calls++;
189 		node->total_objs += rc;
190 	} else {
191 		node->process(graph, node, objs, node->idx);
192 	}
193 	node->idx = 0;
194 }
195 
196 /**
197  * @internal
198  *
199  * Enqueue a given node to the tail of the graph reel.
200  *
201  * @param graph
202  *   Pointer Graph object.
203  * @param node
204  *   Pointer to node object to be enqueued.
205  */
206 static __rte_always_inline void
207 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
208 {
209 	uint32_t tail;
210 
211 	tail = graph->tail;
212 	graph->cir_start[tail++] = node->off;
213 	graph->tail = tail & graph->cir_mask;
214 }
215 
216 /**
217  * @internal
218  *
219  * Enqueue sequence prologue function.
220  *
221  * Updates the node to tail of graph reel and resizes the number of objects
222  * available in the stream as needed.
223  *
224  * @param graph
225  *   Pointer to the graph object.
226  * @param node
227  *   Pointer to the node object.
228  * @param idx
229  *   Index at which the object enqueue starts from.
230  * @param space
231  *   Space required for the object enqueue.
232  */
233 static __rte_always_inline void
234 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
235 			    const uint16_t idx, const uint16_t space)
236 {
237 
238 	/* Add to the pending stream list if the node is new */
239 	if (idx == 0)
240 		__rte_node_enqueue_tail_update(graph, node);
241 
242 	if (unlikely(node->size < (idx + space)))
243 		__rte_node_stream_alloc_size(graph, node, node->size + space);
244 }
245 
246 /**
247  * @internal
248  *
249  * Get the node pointer from current node edge id.
250  *
251  * @param node
252  *   Current node pointer.
253  * @param next
254  *   Edge id of the required node.
255  *
256  * @return
257  *   Pointer to the node denoted by the edge id.
258  */
259 static __rte_always_inline struct rte_node *
260 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
261 {
262 	RTE_ASSERT(next < node->nb_edges);
263 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
264 	node = node->nodes[next];
265 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
266 
267 	return node;
268 }
269 
270 /**
271  * Enqueue the objs to next node for further processing and set
272  * the next node to pending state in the circular buffer.
273  *
274  * @param graph
275  *   Graph pointer returned from rte_graph_lookup().
276  * @param node
277  *   Current node pointer.
278  * @param next
279  *   Relative next node index to enqueue objs.
280  * @param objs
281  *   Objs to enqueue.
282  * @param nb_objs
283  *   Number of objs to enqueue.
284  */
285 static inline void
286 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
287 		 rte_edge_t next, void **objs, uint16_t nb_objs)
288 {
289 	node = __rte_node_next_node_get(node, next);
290 	const uint16_t idx = node->idx;
291 
292 	__rte_node_enqueue_prologue(graph, node, idx, nb_objs);
293 
294 	rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
295 	node->idx = idx + nb_objs;
296 }
297 
298 /**
299  * Enqueue only one obj to next node for further processing and
300  * set the next node to pending state in the circular buffer.
301  *
302  * @param graph
303  *   Graph pointer returned from rte_graph_lookup().
304  * @param node
305  *   Current node pointer.
306  * @param next
307  *   Relative next node index to enqueue objs.
308  * @param obj
309  *   Obj to enqueue.
310  */
311 static inline void
312 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
313 		    rte_edge_t next, void *obj)
314 {
315 	node = __rte_node_next_node_get(node, next);
316 	uint16_t idx = node->idx;
317 
318 	__rte_node_enqueue_prologue(graph, node, idx, 1);
319 
320 	node->objs[idx++] = obj;
321 	node->idx = idx;
322 }
323 
324 /**
325  * Enqueue only two objs to next node for further processing and
326  * set the next node to pending state in the circular buffer.
327  * Same as rte_node_enqueue_x1 but enqueue two objs.
328  *
329  * @param graph
330  *   Graph pointer returned from rte_graph_lookup().
331  * @param node
332  *   Current node pointer.
333  * @param next
334  *   Relative next node index to enqueue objs.
335  * @param obj0
336  *   Obj to enqueue.
337  * @param obj1
338  *   Obj to enqueue.
339  */
340 static inline void
341 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
342 		    rte_edge_t next, void *obj0, void *obj1)
343 {
344 	node = __rte_node_next_node_get(node, next);
345 	uint16_t idx = node->idx;
346 
347 	__rte_node_enqueue_prologue(graph, node, idx, 2);
348 
349 	node->objs[idx++] = obj0;
350 	node->objs[idx++] = obj1;
351 	node->idx = idx;
352 }
353 
354 /**
355  * Enqueue only four objs to next node for further processing and
356  * set the next node to pending state in the circular buffer.
357  * Same as rte_node_enqueue_x1 but enqueue four objs.
358  *
359  * @param graph
360  *   Graph pointer returned from rte_graph_lookup().
361  * @param node
362  *   Current node pointer.
363  * @param next
364  *   Relative next node index to enqueue objs.
365  * @param obj0
366  *   1st obj to enqueue.
367  * @param obj1
368  *   2nd obj to enqueue.
369  * @param obj2
370  *   3rd obj to enqueue.
371  * @param obj3
372  *   4th obj to enqueue.
373  */
374 static inline void
375 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
376 		    rte_edge_t next, void *obj0, void *obj1, void *obj2,
377 		    void *obj3)
378 {
379 	node = __rte_node_next_node_get(node, next);
380 	uint16_t idx = node->idx;
381 
382 	__rte_node_enqueue_prologue(graph, node, idx, 4);
383 
384 	node->objs[idx++] = obj0;
385 	node->objs[idx++] = obj1;
386 	node->objs[idx++] = obj2;
387 	node->objs[idx++] = obj3;
388 	node->idx = idx;
389 }
390 
391 /**
392  * Enqueue objs to multiple next nodes for further processing and
393  * set the next nodes to pending state in the circular buffer.
394  * objs[i] will be enqueued to nexts[i].
395  *
396  * @param graph
397  *   Graph pointer returned from rte_graph_lookup().
398  * @param node
399  *   Current node pointer.
400  * @param nexts
401  *   List of relative next node indices to enqueue objs.
402  * @param objs
403  *   List of objs to enqueue.
404  * @param nb_objs
405  *   Number of objs to enqueue.
406  */
407 static inline void
408 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
409 		      rte_edge_t *nexts, void **objs, uint16_t nb_objs)
410 {
411 	uint16_t i;
412 
413 	for (i = 0; i < nb_objs; i++)
414 		rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
415 }
416 
417 /**
418  * Get the stream of next node to enqueue the objs.
419  * Once done with the updating the objs, needs to call
420  * rte_node_next_stream_put to put the next node to pending state.
421  *
422  * @param graph
423  *   Graph pointer returned from rte_graph_lookup().
424  * @param node
425  *   Current node pointer.
426  * @param next
427  *   Relative next node index to get stream.
428  * @param nb_objs
429  *   Requested free size of the next stream.
430  *
431  * @return
432  *   Valid next stream on success.
433  *
434  * @see rte_node_next_stream_put().
435  */
436 static inline void **
437 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
438 			 rte_edge_t next, uint16_t nb_objs)
439 {
440 	node = __rte_node_next_node_get(node, next);
441 	const uint16_t idx = node->idx;
442 	uint16_t free_space = node->size - idx;
443 
444 	if (unlikely(free_space < nb_objs))
445 		__rte_node_stream_alloc_size(graph, node, node->size + nb_objs);
446 
447 	return &node->objs[idx];
448 }
449 
450 /**
451  * Put the next stream to pending state in the circular buffer
452  * for further processing. Should be invoked after rte_node_next_stream_get().
453  *
454  * @param graph
455  *   Graph pointer returned from rte_graph_lookup().
456  * @param node
457  *   Current node pointer.
458  * @param next
459  *   Relative next node index..
460  * @param idx
461  *   Number of objs updated in the stream after getting the stream using
462  *   rte_node_next_stream_get.
463  *
464  * @see rte_node_next_stream_get().
465  */
466 static inline void
467 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
468 			 rte_edge_t next, uint16_t idx)
469 {
470 	if (unlikely(!idx))
471 		return;
472 
473 	node = __rte_node_next_node_get(node, next);
474 	if (node->idx == 0)
475 		__rte_node_enqueue_tail_update(graph, node);
476 
477 	node->idx += idx;
478 }
479 
480 /**
481  * Home run scenario, Enqueue all the objs of current node to next
482  * node in optimized way by swapping the streams of both nodes.
483  * Performs good when next node is already not in pending state.
484  * If next node is already in pending state then normal enqueue
485  * will be used.
486  *
487  * @param graph
488  *   Graph pointer returned from rte_graph_lookup().
489  * @param src
490  *   Current node pointer.
491  * @param next
492  *   Relative next node index.
493  */
494 static inline void
495 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
496 			  rte_edge_t next)
497 {
498 	struct rte_node *dst = __rte_node_next_node_get(src, next);
499 
500 	/* Let swap the pointers if dst don't have valid objs */
501 	if (likely(dst->idx == 0)) {
502 		void **dobjs = dst->objs;
503 		uint16_t dsz = dst->size;
504 		dst->objs = src->objs;
505 		dst->size = src->size;
506 		src->objs = dobjs;
507 		src->size = dsz;
508 		dst->idx = src->idx;
509 		__rte_node_enqueue_tail_update(graph, dst);
510 	} else { /* Move the objects from src node to dst node */
511 		rte_node_enqueue(graph, src, next, src->objs, src->idx);
512 	}
513 }
514 
515 /**
516  * Test the validity of model.
517  *
518  * @param model
519  *   Model to check.
520  *
521  * @return
522  *   True if graph model is valid, false otherwise.
523  */
524 bool
525 rte_graph_model_is_valid(uint8_t model);
526 
527 /**
528  * @note This function does not perform any locking, and is only safe to call
529  *    before graph running. It will set all graphs the same model.
530  *
531  * @param model
532  *   Name of the graph worker model.
533  *
534  * @return
535  *   0 on success, -1 otherwise.
536  */
537 int rte_graph_worker_model_set(uint8_t model);
538 
539 /**
540  * Get the graph worker model
541  *
542  * @note All graph will use the same model and this function will get model from the first one.
543  *    Used for slow path.
544  *
545  * @param graph
546  *   Graph pointer.
547  *
548  * @return
549  *   Graph worker model on success.
550  */
551 uint8_t rte_graph_worker_model_get(struct rte_graph *graph);
552 
553 /**
554  * Get the graph worker model without check
555  *
556  * @note All graph will use the same model and this function will get model from the first one.
557  *    Used for fast path.
558  *
559  * @param graph
560  *   Graph pointer.
561  *
562  * @return
563  *   Graph worker model on success.
564  */
565 static __rte_always_inline
566 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph)
567 {
568 	return graph->model;
569 }
570 
571 #ifdef __cplusplus
572 }
573 #endif
574 
575 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */
576