xref: /dpdk/lib/graph/rte_graph_worker_common.h (revision c6552d9a8deffa448de2d5e2e726f50508c1efd2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2020 Marvell International Ltd.
3  */
4 
5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_
6 #define _RTE_GRAPH_WORKER_COMMON_H_
7 
8 /**
9  * @file rte_graph_worker_common.h
10  *
11  * This API allows a worker thread to walk over a graph and nodes to create,
12  * process, enqueue and move streams of objects to the next nodes.
13  */
14 
15 #include <stdalign.h>
16 
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_prefetch.h>
20 #include <rte_memcpy.h>
21 #include <rte_memory.h>
22 
23 #include "rte_graph.h"
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28 
29 /** Graph worker models */
30 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */
31 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */
32 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1
33 /**< Dispatch model to support cross-core dispatching within core affinity. */
34 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */
35 
36 /**
37  * @internal
38  *
39  * Singly-linked list head for graph schedule run-queue.
40  */
41 SLIST_HEAD(rte_graph_rq_head, rte_graph);
42 
43 /**
44  * @internal
45  *
46  * Data structure to hold graph data.
47  */
48 struct __rte_cache_aligned rte_graph {
49 	/* Fast path area. */
50 	uint32_t tail;		     /**< Tail of circular buffer. */
51 	uint32_t head;		     /**< Head of circular buffer. */
52 	uint32_t cir_mask;	     /**< Circular buffer wrap around mask. */
53 	rte_node_t nb_nodes;	     /**< Number of nodes in the graph. */
54 	rte_graph_off_t *cir_start;  /**< Pointer to circular buffer. */
55 	rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
56 	uint8_t model;		     /**< graph model */
57 	uint8_t reserved1;	     /**< Reserved for future use. */
58 	uint16_t reserved2;	     /**< Reserved for future use. */
59 	union {
60 		/* Fast schedule area for mcore dispatch model */
61 		struct {
62 			alignas(RTE_CACHE_LINE_SIZE) struct rte_graph_rq_head *rq;
63 				/* The run-queue */
64 			struct rte_graph_rq_head rq_head; /* The head for run-queue list */
65 
66 			unsigned int lcore_id;  /**< The graph running Lcore. */
67 			struct rte_ring *wq;    /**< The work-queue for pending streams. */
68 			struct rte_mempool *mp; /**< The mempool for scheduling streams. */
69 		} dispatch; /** Only used by dispatch model */
70 	};
71 	SLIST_ENTRY(rte_graph) next;   /* The next for rte_graph list */
72 	/* End of Fast path area.*/
73 	rte_graph_t id;	/**< Graph identifier. */
74 	int socket;	/**< Socket ID where memory is allocated. */
75 	char name[RTE_GRAPH_NAMESIZE];	/**< Name of the graph. */
76 	bool pcap_enable;	        /**< Pcap trace enabled. */
77 	/** Number of packets captured per core. */
78 	uint64_t nb_pkt_captured;
79 	/** Number of packets to capture per core. */
80 	uint64_t nb_pkt_to_capture;
81 	char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ];  /**< Pcap filename. */
82 	uint64_t fence;			/**< Fence. */
83 };
84 
85 /**
86  * @internal
87  *
88  * Data structure to hold node data.
89  */
90 struct __rte_cache_aligned rte_node {
91 	/* Slow path area  */
92 	uint64_t fence;		/**< Fence. */
93 	rte_graph_off_t next;	/**< Index to next node. */
94 	rte_node_t id;		/**< Node identifier. */
95 	rte_node_t parent_id;	/**< Parent Node identifier. */
96 	rte_edge_t nb_edges;	/**< Number of edges from this node. */
97 	uint32_t realloc_count;	/**< Number of times realloced. */
98 
99 	char parent[RTE_NODE_NAMESIZE];	/**< Parent node name. */
100 	char name[RTE_NODE_NAMESIZE];	/**< Name of the node. */
101 
102 	/** Original process function when pcap is enabled. */
103 	rte_node_process_t original_process;
104 
105 	union {
106 		/* Fast schedule area for mcore dispatch model */
107 		struct {
108 			unsigned int lcore_id;  /**< Node running lcore. */
109 			uint64_t total_sched_objs; /**< Number of objects scheduled. */
110 			uint64_t total_sched_fail; /**< Number of scheduled failure. */
111 		} dispatch;
112 	};
113 	/* Fast path area  */
114 #define RTE_NODE_CTX_SZ 16
115 	alignas(RTE_CACHE_LINE_SIZE) uint8_t ctx[RTE_NODE_CTX_SZ]; /**< Node Context. */
116 	uint16_t size;		/**< Total number of objects available. */
117 	uint16_t idx;		/**< Number of objects used. */
118 	rte_graph_off_t off;	/**< Offset of node in the graph reel. */
119 	uint64_t total_cycles;	/**< Cycles spent in this node. */
120 	uint64_t total_calls;	/**< Calls done to this node. */
121 	uint64_t total_objs;	/**< Objects processed by this node. */
122 		union {
123 			void **objs;	   /**< Array of object pointers. */
124 			uint64_t objs_u64;
125 		};
126 		union {
127 			rte_node_process_t process; /**< Process function. */
128 			uint64_t process_u64;
129 		};
130 	alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */
131 };
132 
133 /**
134  * @internal
135  *
136  * Allocate a stream of objects.
137  *
138  * If stream already exists then re-allocate it to a larger size.
139  *
140  * @param graph
141  *   Pointer to the graph object.
142  * @param node
143  *   Pointer to the node object.
144  */
145 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node);
146 
147 /**
148  * @internal
149  *
150  * Allocate a stream with requested number of objects.
151  *
152  * If stream already exists then re-allocate it to a larger size.
153  *
154  * @param graph
155  *   Pointer to the graph object.
156  * @param node
157  *   Pointer to the node object.
158  * @param req_size
159  *   Number of objects to be allocated.
160  */
161 void __rte_node_stream_alloc_size(struct rte_graph *graph,
162 				  struct rte_node *node, uint16_t req_size);
163 
164 /* Fast path helper functions */
165 
166 /**
167  * @internal
168  *
169  * Enqueue a given node to the tail of the graph reel.
170  *
171  * @param graph
172  *   Pointer Graph object.
173  * @param node
174  *   Pointer to node object to be enqueued.
175  */
176 static __rte_always_inline void
177 __rte_node_process(struct rte_graph *graph, struct rte_node *node)
178 {
179 	uint64_t start;
180 	uint16_t rc;
181 	void **objs;
182 
183 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
184 	objs = node->objs;
185 	rte_prefetch0(objs);
186 
187 	if (rte_graph_has_stats_feature()) {
188 		start = rte_rdtsc();
189 		rc = node->process(graph, node, objs, node->idx);
190 		node->total_cycles += rte_rdtsc() - start;
191 		node->total_calls++;
192 		node->total_objs += rc;
193 	} else {
194 		node->process(graph, node, objs, node->idx);
195 	}
196 	node->idx = 0;
197 }
198 
199 /**
200  * @internal
201  *
202  * Enqueue a given node to the tail of the graph reel.
203  *
204  * @param graph
205  *   Pointer Graph object.
206  * @param node
207  *   Pointer to node object to be enqueued.
208  */
209 static __rte_always_inline void
210 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
211 {
212 	uint32_t tail;
213 
214 	tail = graph->tail;
215 	graph->cir_start[tail++] = node->off;
216 	graph->tail = tail & graph->cir_mask;
217 }
218 
219 /**
220  * @internal
221  *
222  * Enqueue sequence prologue function.
223  *
224  * Updates the node to tail of graph reel and resizes the number of objects
225  * available in the stream as needed.
226  *
227  * @param graph
228  *   Pointer to the graph object.
229  * @param node
230  *   Pointer to the node object.
231  * @param idx
232  *   Index at which the object enqueue starts from.
233  * @param space
234  *   Space required for the object enqueue.
235  */
236 static __rte_always_inline void
237 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
238 			    const uint16_t idx, const uint16_t space)
239 {
240 
241 	/* Add to the pending stream list if the node is new */
242 	if (idx == 0)
243 		__rte_node_enqueue_tail_update(graph, node);
244 
245 	if (unlikely(node->size < (idx + space)))
246 		__rte_node_stream_alloc_size(graph, node, node->size + space);
247 }
248 
249 /**
250  * @internal
251  *
252  * Get the node pointer from current node edge id.
253  *
254  * @param node
255  *   Current node pointer.
256  * @param next
257  *   Edge id of the required node.
258  *
259  * @return
260  *   Pointer to the node denoted by the edge id.
261  */
262 static __rte_always_inline struct rte_node *
263 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
264 {
265 	RTE_ASSERT(next < node->nb_edges);
266 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
267 	node = node->nodes[next];
268 	RTE_ASSERT(node->fence == RTE_GRAPH_FENCE);
269 
270 	return node;
271 }
272 
273 /**
274  * Enqueue the objs to next node for further processing and set
275  * the next node to pending state in the circular buffer.
276  *
277  * @param graph
278  *   Graph pointer returned from rte_graph_lookup().
279  * @param node
280  *   Current node pointer.
281  * @param next
282  *   Relative next node index to enqueue objs.
283  * @param objs
284  *   Objs to enqueue.
285  * @param nb_objs
286  *   Number of objs to enqueue.
287  */
288 static inline void
289 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
290 		 rte_edge_t next, void **objs, uint16_t nb_objs)
291 {
292 	node = __rte_node_next_node_get(node, next);
293 	const uint16_t idx = node->idx;
294 
295 	__rte_node_enqueue_prologue(graph, node, idx, nb_objs);
296 
297 	rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *));
298 	node->idx = idx + nb_objs;
299 }
300 
301 /**
302  * Enqueue only one obj to next node for further processing and
303  * set the next node to pending state in the circular buffer.
304  *
305  * @param graph
306  *   Graph pointer returned from rte_graph_lookup().
307  * @param node
308  *   Current node pointer.
309  * @param next
310  *   Relative next node index to enqueue objs.
311  * @param obj
312  *   Obj to enqueue.
313  */
314 static inline void
315 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
316 		    rte_edge_t next, void *obj)
317 {
318 	node = __rte_node_next_node_get(node, next);
319 	uint16_t idx = node->idx;
320 
321 	__rte_node_enqueue_prologue(graph, node, idx, 1);
322 
323 	node->objs[idx++] = obj;
324 	node->idx = idx;
325 }
326 
327 /**
328  * Enqueue only two objs to next node for further processing and
329  * set the next node to pending state in the circular buffer.
330  * Same as rte_node_enqueue_x1 but enqueue two objs.
331  *
332  * @param graph
333  *   Graph pointer returned from rte_graph_lookup().
334  * @param node
335  *   Current node pointer.
336  * @param next
337  *   Relative next node index to enqueue objs.
338  * @param obj0
339  *   Obj to enqueue.
340  * @param obj1
341  *   Obj to enqueue.
342  */
343 static inline void
344 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
345 		    rte_edge_t next, void *obj0, void *obj1)
346 {
347 	node = __rte_node_next_node_get(node, next);
348 	uint16_t idx = node->idx;
349 
350 	__rte_node_enqueue_prologue(graph, node, idx, 2);
351 
352 	node->objs[idx++] = obj0;
353 	node->objs[idx++] = obj1;
354 	node->idx = idx;
355 }
356 
357 /**
358  * Enqueue only four objs to next node for further processing and
359  * set the next node to pending state in the circular buffer.
360  * Same as rte_node_enqueue_x1 but enqueue four objs.
361  *
362  * @param graph
363  *   Graph pointer returned from rte_graph_lookup().
364  * @param node
365  *   Current node pointer.
366  * @param next
367  *   Relative next node index to enqueue objs.
368  * @param obj0
369  *   1st obj to enqueue.
370  * @param obj1
371  *   2nd obj to enqueue.
372  * @param obj2
373  *   3rd obj to enqueue.
374  * @param obj3
375  *   4th obj to enqueue.
376  */
377 static inline void
378 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
379 		    rte_edge_t next, void *obj0, void *obj1, void *obj2,
380 		    void *obj3)
381 {
382 	node = __rte_node_next_node_get(node, next);
383 	uint16_t idx = node->idx;
384 
385 	__rte_node_enqueue_prologue(graph, node, idx, 4);
386 
387 	node->objs[idx++] = obj0;
388 	node->objs[idx++] = obj1;
389 	node->objs[idx++] = obj2;
390 	node->objs[idx++] = obj3;
391 	node->idx = idx;
392 }
393 
394 /**
395  * Enqueue objs to multiple next nodes for further processing and
396  * set the next nodes to pending state in the circular buffer.
397  * objs[i] will be enqueued to nexts[i].
398  *
399  * @param graph
400  *   Graph pointer returned from rte_graph_lookup().
401  * @param node
402  *   Current node pointer.
403  * @param nexts
404  *   List of relative next node indices to enqueue objs.
405  * @param objs
406  *   List of objs to enqueue.
407  * @param nb_objs
408  *   Number of objs to enqueue.
409  */
410 static inline void
411 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node,
412 		      rte_edge_t *nexts, void **objs, uint16_t nb_objs)
413 {
414 	uint16_t i;
415 
416 	for (i = 0; i < nb_objs; i++)
417 		rte_node_enqueue_x1(graph, node, nexts[i], objs[i]);
418 }
419 
420 /**
421  * Get the stream of next node to enqueue the objs.
422  * Once done with the updating the objs, needs to call
423  * rte_node_next_stream_put to put the next node to pending state.
424  *
425  * @param graph
426  *   Graph pointer returned from rte_graph_lookup().
427  * @param node
428  *   Current node pointer.
429  * @param next
430  *   Relative next node index to get stream.
431  * @param nb_objs
432  *   Requested free size of the next stream.
433  *
434  * @return
435  *   Valid next stream on success.
436  *
437  * @see rte_node_next_stream_put().
438  */
439 static inline void **
440 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
441 			 rte_edge_t next, uint16_t nb_objs)
442 {
443 	node = __rte_node_next_node_get(node, next);
444 	const uint16_t idx = node->idx;
445 	uint16_t free_space = node->size - idx;
446 
447 	if (unlikely(free_space < nb_objs))
448 		__rte_node_stream_alloc_size(graph, node, node->size + nb_objs);
449 
450 	return &node->objs[idx];
451 }
452 
453 /**
454  * Put the next stream to pending state in the circular buffer
455  * for further processing. Should be invoked after rte_node_next_stream_get().
456  *
457  * @param graph
458  *   Graph pointer returned from rte_graph_lookup().
459  * @param node
460  *   Current node pointer.
461  * @param next
462  *   Relative next node index..
463  * @param idx
464  *   Number of objs updated in the stream after getting the stream using
465  *   rte_node_next_stream_get.
466  *
467  * @see rte_node_next_stream_get().
468  */
469 static inline void
470 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
471 			 rte_edge_t next, uint16_t idx)
472 {
473 	if (unlikely(!idx))
474 		return;
475 
476 	node = __rte_node_next_node_get(node, next);
477 	if (node->idx == 0)
478 		__rte_node_enqueue_tail_update(graph, node);
479 
480 	node->idx += idx;
481 }
482 
483 /**
484  * Home run scenario, Enqueue all the objs of current node to next
485  * node in optimized way by swapping the streams of both nodes.
486  * Performs good when next node is already not in pending state.
487  * If next node is already in pending state then normal enqueue
488  * will be used.
489  *
490  * @param graph
491  *   Graph pointer returned from rte_graph_lookup().
492  * @param src
493  *   Current node pointer.
494  * @param next
495  *   Relative next node index.
496  */
497 static inline void
498 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
499 			  rte_edge_t next)
500 {
501 	struct rte_node *dst = __rte_node_next_node_get(src, next);
502 
503 	/* Let swap the pointers if dst don't have valid objs */
504 	if (likely(dst->idx == 0)) {
505 		void **dobjs = dst->objs;
506 		uint16_t dsz = dst->size;
507 		dst->objs = src->objs;
508 		dst->size = src->size;
509 		src->objs = dobjs;
510 		src->size = dsz;
511 		dst->idx = src->idx;
512 		__rte_node_enqueue_tail_update(graph, dst);
513 	} else { /* Move the objects from src node to dst node */
514 		rte_node_enqueue(graph, src, next, src->objs, src->idx);
515 	}
516 }
517 
518 /**
519  * Test the validity of model.
520  *
521  * @param model
522  *   Model to check.
523  *
524  * @return
525  *   True if graph model is valid, false otherwise.
526  */
527 bool
528 rte_graph_model_is_valid(uint8_t model);
529 
530 /**
531  * @note This function does not perform any locking, and is only safe to call
532  *    before graph running. It will set all graphs the same model.
533  *
534  * @param model
535  *   Name of the graph worker model.
536  *
537  * @return
538  *   0 on success, -1 otherwise.
539  */
540 int rte_graph_worker_model_set(uint8_t model);
541 
542 /**
543  * Get the graph worker model
544  *
545  * @note All graph will use the same model and this function will get model from the first one.
546  *    Used for slow path.
547  *
548  * @param graph
549  *   Graph pointer.
550  *
551  * @return
552  *   Graph worker model on success.
553  */
554 uint8_t rte_graph_worker_model_get(struct rte_graph *graph);
555 
556 /**
557  * Get the graph worker model without check
558  *
559  * @note All graph will use the same model and this function will get model from the first one.
560  *    Used for fast path.
561  *
562  * @param graph
563  *   Graph pointer.
564  *
565  * @return
566  *   Graph worker model on success.
567  */
568 static __rte_always_inline
569 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph)
570 {
571 	return graph->model;
572 }
573 
574 #ifdef __cplusplus
575 }
576 #endif
577 
578 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */
579