1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_ 6 #define _RTE_GRAPH_WORKER_COMMON_H_ 7 8 /** 9 * @file rte_graph_worker_common.h 10 * 11 * This API allows a worker thread to walk over a graph and nodes to create, 12 * process, enqueue and move streams of objects to the next nodes. 13 */ 14 15 #include <rte_common.h> 16 #include <rte_cycles.h> 17 #include <rte_prefetch.h> 18 #include <rte_memcpy.h> 19 #include <rte_memory.h> 20 21 #include "rte_graph.h" 22 23 #ifdef __cplusplus 24 extern "C" { 25 #endif 26 27 /** Graph worker models */ 28 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */ 29 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */ 30 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1 31 /**< Dispatch model to support cross-core dispatching within core affinity. */ 32 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */ 33 34 /** 35 * @internal 36 * 37 * Singly-linked list head for graph schedule run-queue. 38 */ 39 SLIST_HEAD(rte_graph_rq_head, rte_graph); 40 41 /** 42 * @internal 43 * 44 * Data structure to hold graph data. 45 */ 46 struct rte_graph { 47 /* Fast path area. */ 48 uint32_t tail; /**< Tail of circular buffer. */ 49 uint32_t head; /**< Head of circular buffer. */ 50 uint32_t cir_mask; /**< Circular buffer wrap around mask. */ 51 rte_node_t nb_nodes; /**< Number of nodes in the graph. */ 52 rte_graph_off_t *cir_start; /**< Pointer to circular buffer. */ 53 rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */ 54 uint8_t model; /**< graph model */ 55 uint8_t reserved1; /**< Reserved for future use. */ 56 uint16_t reserved2; /**< Reserved for future use. */ 57 union { 58 /* Fast schedule area for mcore dispatch model */ 59 struct { 60 struct rte_graph_rq_head *rq __rte_cache_aligned; /* The run-queue */ 61 struct rte_graph_rq_head rq_head; /* The head for run-queue list */ 62 63 unsigned int lcore_id; /**< The graph running Lcore. */ 64 struct rte_ring *wq; /**< The work-queue for pending streams. */ 65 struct rte_mempool *mp; /**< The mempool for scheduling streams. */ 66 } dispatch; /** Only used by dispatch model */ 67 }; 68 SLIST_ENTRY(rte_graph) next; /* The next for rte_graph list */ 69 /* End of Fast path area.*/ 70 rte_graph_t id; /**< Graph identifier. */ 71 int socket; /**< Socket ID where memory is allocated. */ 72 char name[RTE_GRAPH_NAMESIZE]; /**< Name of the graph. */ 73 bool pcap_enable; /**< Pcap trace enabled. */ 74 /** Number of packets captured per core. */ 75 uint64_t nb_pkt_captured; 76 /** Number of packets to capture per core. */ 77 uint64_t nb_pkt_to_capture; 78 char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ]; /**< Pcap filename. */ 79 uint64_t fence; /**< Fence. */ 80 } __rte_cache_aligned; 81 82 /** 83 * @internal 84 * 85 * Data structure to hold node data. 86 */ 87 struct rte_node { 88 /* Slow path area */ 89 uint64_t fence; /**< Fence. */ 90 rte_graph_off_t next; /**< Index to next node. */ 91 rte_node_t id; /**< Node identifier. */ 92 rte_node_t parent_id; /**< Parent Node identifier. */ 93 rte_edge_t nb_edges; /**< Number of edges from this node. */ 94 uint32_t realloc_count; /**< Number of times realloced. */ 95 96 char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */ 97 char name[RTE_NODE_NAMESIZE]; /**< Name of the node. */ 98 99 /** Original process function when pcap is enabled. */ 100 rte_node_process_t original_process; 101 102 union { 103 /* Fast schedule area for mcore dispatch model */ 104 struct { 105 unsigned int lcore_id; /**< Node running lcore. */ 106 uint64_t total_sched_objs; /**< Number of objects scheduled. */ 107 uint64_t total_sched_fail; /**< Number of scheduled failure. */ 108 } dispatch; 109 }; 110 /* Fast path area */ 111 #define RTE_NODE_CTX_SZ 16 112 uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned; /**< Node Context. */ 113 uint16_t size; /**< Total number of objects available. */ 114 uint16_t idx; /**< Number of objects used. */ 115 rte_graph_off_t off; /**< Offset of node in the graph reel. */ 116 uint64_t total_cycles; /**< Cycles spent in this node. */ 117 uint64_t total_calls; /**< Calls done to this node. */ 118 uint64_t total_objs; /**< Objects processed by this node. */ 119 union { 120 void **objs; /**< Array of object pointers. */ 121 uint64_t objs_u64; 122 }; 123 union { 124 rte_node_process_t process; /**< Process function. */ 125 uint64_t process_u64; 126 }; 127 struct rte_node *nodes[] __rte_cache_min_aligned; /**< Next nodes. */ 128 } __rte_cache_aligned; 129 130 /** 131 * @internal 132 * 133 * Allocate a stream of objects. 134 * 135 * If stream already exists then re-allocate it to a larger size. 136 * 137 * @param graph 138 * Pointer to the graph object. 139 * @param node 140 * Pointer to the node object. 141 */ 142 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node); 143 144 /** 145 * @internal 146 * 147 * Allocate a stream with requested number of objects. 148 * 149 * If stream already exists then re-allocate it to a larger size. 150 * 151 * @param graph 152 * Pointer to the graph object. 153 * @param node 154 * Pointer to the node object. 155 * @param req_size 156 * Number of objects to be allocated. 157 */ 158 void __rte_node_stream_alloc_size(struct rte_graph *graph, 159 struct rte_node *node, uint16_t req_size); 160 161 /* Fast path helper functions */ 162 163 /** 164 * @internal 165 * 166 * Enqueue a given node to the tail of the graph reel. 167 * 168 * @param graph 169 * Pointer Graph object. 170 * @param node 171 * Pointer to node object to be enqueued. 172 */ 173 static __rte_always_inline void 174 __rte_node_process(struct rte_graph *graph, struct rte_node *node) 175 { 176 uint64_t start; 177 uint16_t rc; 178 void **objs; 179 180 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 181 objs = node->objs; 182 rte_prefetch0(objs); 183 184 if (rte_graph_has_stats_feature()) { 185 start = rte_rdtsc(); 186 rc = node->process(graph, node, objs, node->idx); 187 node->total_cycles += rte_rdtsc() - start; 188 node->total_calls++; 189 node->total_objs += rc; 190 } else { 191 node->process(graph, node, objs, node->idx); 192 } 193 node->idx = 0; 194 } 195 196 /** 197 * @internal 198 * 199 * Enqueue a given node to the tail of the graph reel. 200 * 201 * @param graph 202 * Pointer Graph object. 203 * @param node 204 * Pointer to node object to be enqueued. 205 */ 206 static __rte_always_inline void 207 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node) 208 { 209 uint32_t tail; 210 211 tail = graph->tail; 212 graph->cir_start[tail++] = node->off; 213 graph->tail = tail & graph->cir_mask; 214 } 215 216 /** 217 * @internal 218 * 219 * Enqueue sequence prologue function. 220 * 221 * Updates the node to tail of graph reel and resizes the number of objects 222 * available in the stream as needed. 223 * 224 * @param graph 225 * Pointer to the graph object. 226 * @param node 227 * Pointer to the node object. 228 * @param idx 229 * Index at which the object enqueue starts from. 230 * @param space 231 * Space required for the object enqueue. 232 */ 233 static __rte_always_inline void 234 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node, 235 const uint16_t idx, const uint16_t space) 236 { 237 238 /* Add to the pending stream list if the node is new */ 239 if (idx == 0) 240 __rte_node_enqueue_tail_update(graph, node); 241 242 if (unlikely(node->size < (idx + space))) 243 __rte_node_stream_alloc_size(graph, node, node->size + space); 244 } 245 246 /** 247 * @internal 248 * 249 * Get the node pointer from current node edge id. 250 * 251 * @param node 252 * Current node pointer. 253 * @param next 254 * Edge id of the required node. 255 * 256 * @return 257 * Pointer to the node denoted by the edge id. 258 */ 259 static __rte_always_inline struct rte_node * 260 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next) 261 { 262 RTE_ASSERT(next < node->nb_edges); 263 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 264 node = node->nodes[next]; 265 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 266 267 return node; 268 } 269 270 /** 271 * Enqueue the objs to next node for further processing and set 272 * the next node to pending state in the circular buffer. 273 * 274 * @param graph 275 * Graph pointer returned from rte_graph_lookup(). 276 * @param node 277 * Current node pointer. 278 * @param next 279 * Relative next node index to enqueue objs. 280 * @param objs 281 * Objs to enqueue. 282 * @param nb_objs 283 * Number of objs to enqueue. 284 */ 285 static inline void 286 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, 287 rte_edge_t next, void **objs, uint16_t nb_objs) 288 { 289 node = __rte_node_next_node_get(node, next); 290 const uint16_t idx = node->idx; 291 292 __rte_node_enqueue_prologue(graph, node, idx, nb_objs); 293 294 rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *)); 295 node->idx = idx + nb_objs; 296 } 297 298 /** 299 * Enqueue only one obj to next node for further processing and 300 * set the next node to pending state in the circular buffer. 301 * 302 * @param graph 303 * Graph pointer returned from rte_graph_lookup(). 304 * @param node 305 * Current node pointer. 306 * @param next 307 * Relative next node index to enqueue objs. 308 * @param obj 309 * Obj to enqueue. 310 */ 311 static inline void 312 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, 313 rte_edge_t next, void *obj) 314 { 315 node = __rte_node_next_node_get(node, next); 316 uint16_t idx = node->idx; 317 318 __rte_node_enqueue_prologue(graph, node, idx, 1); 319 320 node->objs[idx++] = obj; 321 node->idx = idx; 322 } 323 324 /** 325 * Enqueue only two objs to next node for further processing and 326 * set the next node to pending state in the circular buffer. 327 * Same as rte_node_enqueue_x1 but enqueue two objs. 328 * 329 * @param graph 330 * Graph pointer returned from rte_graph_lookup(). 331 * @param node 332 * Current node pointer. 333 * @param next 334 * Relative next node index to enqueue objs. 335 * @param obj0 336 * Obj to enqueue. 337 * @param obj1 338 * Obj to enqueue. 339 */ 340 static inline void 341 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, 342 rte_edge_t next, void *obj0, void *obj1) 343 { 344 node = __rte_node_next_node_get(node, next); 345 uint16_t idx = node->idx; 346 347 __rte_node_enqueue_prologue(graph, node, idx, 2); 348 349 node->objs[idx++] = obj0; 350 node->objs[idx++] = obj1; 351 node->idx = idx; 352 } 353 354 /** 355 * Enqueue only four objs to next node for further processing and 356 * set the next node to pending state in the circular buffer. 357 * Same as rte_node_enqueue_x1 but enqueue four objs. 358 * 359 * @param graph 360 * Graph pointer returned from rte_graph_lookup(). 361 * @param node 362 * Current node pointer. 363 * @param next 364 * Relative next node index to enqueue objs. 365 * @param obj0 366 * 1st obj to enqueue. 367 * @param obj1 368 * 2nd obj to enqueue. 369 * @param obj2 370 * 3rd obj to enqueue. 371 * @param obj3 372 * 4th obj to enqueue. 373 */ 374 static inline void 375 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, 376 rte_edge_t next, void *obj0, void *obj1, void *obj2, 377 void *obj3) 378 { 379 node = __rte_node_next_node_get(node, next); 380 uint16_t idx = node->idx; 381 382 __rte_node_enqueue_prologue(graph, node, idx, 4); 383 384 node->objs[idx++] = obj0; 385 node->objs[idx++] = obj1; 386 node->objs[idx++] = obj2; 387 node->objs[idx++] = obj3; 388 node->idx = idx; 389 } 390 391 /** 392 * Enqueue objs to multiple next nodes for further processing and 393 * set the next nodes to pending state in the circular buffer. 394 * objs[i] will be enqueued to nexts[i]. 395 * 396 * @param graph 397 * Graph pointer returned from rte_graph_lookup(). 398 * @param node 399 * Current node pointer. 400 * @param nexts 401 * List of relative next node indices to enqueue objs. 402 * @param objs 403 * List of objs to enqueue. 404 * @param nb_objs 405 * Number of objs to enqueue. 406 */ 407 static inline void 408 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, 409 rte_edge_t *nexts, void **objs, uint16_t nb_objs) 410 { 411 uint16_t i; 412 413 for (i = 0; i < nb_objs; i++) 414 rte_node_enqueue_x1(graph, node, nexts[i], objs[i]); 415 } 416 417 /** 418 * Get the stream of next node to enqueue the objs. 419 * Once done with the updating the objs, needs to call 420 * rte_node_next_stream_put to put the next node to pending state. 421 * 422 * @param graph 423 * Graph pointer returned from rte_graph_lookup(). 424 * @param node 425 * Current node pointer. 426 * @param next 427 * Relative next node index to get stream. 428 * @param nb_objs 429 * Requested free size of the next stream. 430 * 431 * @return 432 * Valid next stream on success. 433 * 434 * @see rte_node_next_stream_put(). 435 */ 436 static inline void ** 437 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, 438 rte_edge_t next, uint16_t nb_objs) 439 { 440 node = __rte_node_next_node_get(node, next); 441 const uint16_t idx = node->idx; 442 uint16_t free_space = node->size - idx; 443 444 if (unlikely(free_space < nb_objs)) 445 __rte_node_stream_alloc_size(graph, node, node->size + nb_objs); 446 447 return &node->objs[idx]; 448 } 449 450 /** 451 * Put the next stream to pending state in the circular buffer 452 * for further processing. Should be invoked after rte_node_next_stream_get(). 453 * 454 * @param graph 455 * Graph pointer returned from rte_graph_lookup(). 456 * @param node 457 * Current node pointer. 458 * @param next 459 * Relative next node index.. 460 * @param idx 461 * Number of objs updated in the stream after getting the stream using 462 * rte_node_next_stream_get. 463 * 464 * @see rte_node_next_stream_get(). 465 */ 466 static inline void 467 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, 468 rte_edge_t next, uint16_t idx) 469 { 470 if (unlikely(!idx)) 471 return; 472 473 node = __rte_node_next_node_get(node, next); 474 if (node->idx == 0) 475 __rte_node_enqueue_tail_update(graph, node); 476 477 node->idx += idx; 478 } 479 480 /** 481 * Home run scenario, Enqueue all the objs of current node to next 482 * node in optimized way by swapping the streams of both nodes. 483 * Performs good when next node is already not in pending state. 484 * If next node is already in pending state then normal enqueue 485 * will be used. 486 * 487 * @param graph 488 * Graph pointer returned from rte_graph_lookup(). 489 * @param src 490 * Current node pointer. 491 * @param next 492 * Relative next node index. 493 */ 494 static inline void 495 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, 496 rte_edge_t next) 497 { 498 struct rte_node *dst = __rte_node_next_node_get(src, next); 499 500 /* Let swap the pointers if dst don't have valid objs */ 501 if (likely(dst->idx == 0)) { 502 void **dobjs = dst->objs; 503 uint16_t dsz = dst->size; 504 dst->objs = src->objs; 505 dst->size = src->size; 506 src->objs = dobjs; 507 src->size = dsz; 508 dst->idx = src->idx; 509 __rte_node_enqueue_tail_update(graph, dst); 510 } else { /* Move the objects from src node to dst node */ 511 rte_node_enqueue(graph, src, next, src->objs, src->idx); 512 } 513 } 514 515 /** 516 * Test the validity of model. 517 * 518 * @param model 519 * Model to check. 520 * 521 * @return 522 * True if graph model is valid, false otherwise. 523 */ 524 bool 525 rte_graph_model_is_valid(uint8_t model); 526 527 /** 528 * @note This function does not perform any locking, and is only safe to call 529 * before graph running. It will set all graphs the same model. 530 * 531 * @param model 532 * Name of the graph worker model. 533 * 534 * @return 535 * 0 on success, -1 otherwise. 536 */ 537 int rte_graph_worker_model_set(uint8_t model); 538 539 /** 540 * Get the graph worker model 541 * 542 * @note All graph will use the same model and this function will get model from the first one. 543 * Used for slow path. 544 * 545 * @param graph 546 * Graph pointer. 547 * 548 * @return 549 * Graph worker model on success. 550 */ 551 uint8_t rte_graph_worker_model_get(struct rte_graph *graph); 552 553 /** 554 * Get the graph worker model without check 555 * 556 * @note All graph will use the same model and this function will get model from the first one. 557 * Used for fast path. 558 * 559 * @param graph 560 * Graph pointer. 561 * 562 * @return 563 * Graph worker model on success. 564 */ 565 static __rte_always_inline 566 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph) 567 { 568 return graph->model; 569 } 570 571 #ifdef __cplusplus 572 } 573 #endif 574 575 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */ 576