1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_ 6 #define _RTE_GRAPH_WORKER_COMMON_H_ 7 8 /** 9 * @file rte_graph_worker_common.h 10 * 11 * @warning 12 * @b EXPERIMENTAL: 13 * All functions in this file may be changed or removed without prior notice. 14 * 15 * This API allows a worker thread to walk over a graph and nodes to create, 16 * process, enqueue and move streams of objects to the next nodes. 17 */ 18 19 #include <rte_compat.h> 20 #include <rte_common.h> 21 #include <rte_cycles.h> 22 #include <rte_prefetch.h> 23 #include <rte_memcpy.h> 24 #include <rte_memory.h> 25 26 #include "rte_graph.h" 27 28 #ifdef __cplusplus 29 extern "C" { 30 #endif 31 32 /** Graph worker models */ 33 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */ 34 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */ 35 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1 36 /**< Dispatch model to support cross-core dispatching within core affinity. */ 37 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */ 38 39 /** 40 * @internal 41 * 42 * Singly-linked list head for graph schedule run-queue. 43 */ 44 SLIST_HEAD(rte_graph_rq_head, rte_graph); 45 46 /** 47 * @internal 48 * 49 * Data structure to hold graph data. 50 */ 51 struct rte_graph { 52 /* Fast path area. */ 53 uint32_t tail; /**< Tail of circular buffer. */ 54 uint32_t head; /**< Head of circular buffer. */ 55 uint32_t cir_mask; /**< Circular buffer wrap around mask. */ 56 rte_node_t nb_nodes; /**< Number of nodes in the graph. */ 57 rte_graph_off_t *cir_start; /**< Pointer to circular buffer. */ 58 rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */ 59 uint8_t model; /**< graph model */ 60 uint8_t reserved1; /**< Reserved for future use. */ 61 uint16_t reserved2; /**< Reserved for future use. */ 62 RTE_STD_C11 63 union { 64 /* Fast schedule area for mcore dispatch model */ 65 struct { 66 struct rte_graph_rq_head *rq __rte_cache_aligned; /* The run-queue */ 67 struct rte_graph_rq_head rq_head; /* The head for run-queue list */ 68 69 unsigned int lcore_id; /**< The graph running Lcore. */ 70 struct rte_ring *wq; /**< The work-queue for pending streams. */ 71 struct rte_mempool *mp; /**< The mempool for scheduling streams. */ 72 } dispatch; /** Only used by dispatch model */ 73 }; 74 SLIST_ENTRY(rte_graph) next; /* The next for rte_graph list */ 75 /* End of Fast path area.*/ 76 rte_graph_t id; /**< Graph identifier. */ 77 int socket; /**< Socket ID where memory is allocated. */ 78 char name[RTE_GRAPH_NAMESIZE]; /**< Name of the graph. */ 79 bool pcap_enable; /**< Pcap trace enabled. */ 80 /** Number of packets captured per core. */ 81 uint64_t nb_pkt_captured; 82 /** Number of packets to capture per core. */ 83 uint64_t nb_pkt_to_capture; 84 char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ]; /**< Pcap filename. */ 85 uint64_t fence; /**< Fence. */ 86 } __rte_cache_aligned; 87 88 /** 89 * @internal 90 * 91 * Data structure to hold node data. 92 */ 93 struct rte_node { 94 /* Slow path area */ 95 uint64_t fence; /**< Fence. */ 96 rte_graph_off_t next; /**< Index to next node. */ 97 rte_node_t id; /**< Node identifier. */ 98 rte_node_t parent_id; /**< Parent Node identifier. */ 99 rte_edge_t nb_edges; /**< Number of edges from this node. */ 100 uint32_t realloc_count; /**< Number of times realloced. */ 101 102 char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */ 103 char name[RTE_NODE_NAMESIZE]; /**< Name of the node. */ 104 105 /** Original process function when pcap is enabled. */ 106 rte_node_process_t original_process; 107 108 RTE_STD_C11 109 union { 110 /* Fast schedule area for mcore dispatch model */ 111 struct { 112 unsigned int lcore_id; /**< Node running lcore. */ 113 uint64_t total_sched_objs; /**< Number of objects scheduled. */ 114 uint64_t total_sched_fail; /**< Number of scheduled failure. */ 115 } dispatch; 116 }; 117 /* Fast path area */ 118 #define RTE_NODE_CTX_SZ 16 119 uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned; /**< Node Context. */ 120 uint16_t size; /**< Total number of objects available. */ 121 uint16_t idx; /**< Number of objects used. */ 122 rte_graph_off_t off; /**< Offset of node in the graph reel. */ 123 uint64_t total_cycles; /**< Cycles spent in this node. */ 124 uint64_t total_calls; /**< Calls done to this node. */ 125 uint64_t total_objs; /**< Objects processed by this node. */ 126 RTE_STD_C11 127 union { 128 void **objs; /**< Array of object pointers. */ 129 uint64_t objs_u64; 130 }; 131 RTE_STD_C11 132 union { 133 rte_node_process_t process; /**< Process function. */ 134 uint64_t process_u64; 135 }; 136 struct rte_node *nodes[] __rte_cache_min_aligned; /**< Next nodes. */ 137 } __rte_cache_aligned; 138 139 /** 140 * @internal 141 * 142 * Allocate a stream of objects. 143 * 144 * If stream already exists then re-allocate it to a larger size. 145 * 146 * @param graph 147 * Pointer to the graph object. 148 * @param node 149 * Pointer to the node object. 150 */ 151 __rte_experimental 152 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node); 153 154 /** 155 * @internal 156 * 157 * Allocate a stream with requested number of objects. 158 * 159 * If stream already exists then re-allocate it to a larger size. 160 * 161 * @param graph 162 * Pointer to the graph object. 163 * @param node 164 * Pointer to the node object. 165 * @param req_size 166 * Number of objects to be allocated. 167 */ 168 __rte_experimental 169 void __rte_node_stream_alloc_size(struct rte_graph *graph, 170 struct rte_node *node, uint16_t req_size); 171 172 /* Fast path helper functions */ 173 174 /** 175 * @internal 176 * 177 * Enqueue a given node to the tail of the graph reel. 178 * 179 * @param graph 180 * Pointer Graph object. 181 * @param node 182 * Pointer to node object to be enqueued. 183 */ 184 static __rte_always_inline void 185 __rte_node_process(struct rte_graph *graph, struct rte_node *node) 186 { 187 uint64_t start; 188 uint16_t rc; 189 void **objs; 190 191 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 192 objs = node->objs; 193 rte_prefetch0(objs); 194 195 if (rte_graph_has_stats_feature()) { 196 start = rte_rdtsc(); 197 rc = node->process(graph, node, objs, node->idx); 198 node->total_cycles += rte_rdtsc() - start; 199 node->total_calls++; 200 node->total_objs += rc; 201 } else { 202 node->process(graph, node, objs, node->idx); 203 } 204 node->idx = 0; 205 } 206 207 /** 208 * @internal 209 * 210 * Enqueue a given node to the tail of the graph reel. 211 * 212 * @param graph 213 * Pointer Graph object. 214 * @param node 215 * Pointer to node object to be enqueued. 216 */ 217 static __rte_always_inline void 218 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node) 219 { 220 uint32_t tail; 221 222 tail = graph->tail; 223 graph->cir_start[tail++] = node->off; 224 graph->tail = tail & graph->cir_mask; 225 } 226 227 /** 228 * @internal 229 * 230 * Enqueue sequence prologue function. 231 * 232 * Updates the node to tail of graph reel and resizes the number of objects 233 * available in the stream as needed. 234 * 235 * @param graph 236 * Pointer to the graph object. 237 * @param node 238 * Pointer to the node object. 239 * @param idx 240 * Index at which the object enqueue starts from. 241 * @param space 242 * Space required for the object enqueue. 243 */ 244 static __rte_always_inline void 245 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node, 246 const uint16_t idx, const uint16_t space) 247 { 248 249 /* Add to the pending stream list if the node is new */ 250 if (idx == 0) 251 __rte_node_enqueue_tail_update(graph, node); 252 253 if (unlikely(node->size < (idx + space))) 254 __rte_node_stream_alloc_size(graph, node, node->size + space); 255 } 256 257 /** 258 * @internal 259 * 260 * Get the node pointer from current node edge id. 261 * 262 * @param node 263 * Current node pointer. 264 * @param next 265 * Edge id of the required node. 266 * 267 * @return 268 * Pointer to the node denoted by the edge id. 269 */ 270 static __rte_always_inline struct rte_node * 271 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next) 272 { 273 RTE_ASSERT(next < node->nb_edges); 274 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 275 node = node->nodes[next]; 276 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 277 278 return node; 279 } 280 281 /** 282 * Enqueue the objs to next node for further processing and set 283 * the next node to pending state in the circular buffer. 284 * 285 * @param graph 286 * Graph pointer returned from rte_graph_lookup(). 287 * @param node 288 * Current node pointer. 289 * @param next 290 * Relative next node index to enqueue objs. 291 * @param objs 292 * Objs to enqueue. 293 * @param nb_objs 294 * Number of objs to enqueue. 295 */ 296 __rte_experimental 297 static inline void 298 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, 299 rte_edge_t next, void **objs, uint16_t nb_objs) 300 { 301 node = __rte_node_next_node_get(node, next); 302 const uint16_t idx = node->idx; 303 304 __rte_node_enqueue_prologue(graph, node, idx, nb_objs); 305 306 rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *)); 307 node->idx = idx + nb_objs; 308 } 309 310 /** 311 * Enqueue only one obj to next node for further processing and 312 * set the next node to pending state in the circular buffer. 313 * 314 * @param graph 315 * Graph pointer returned from rte_graph_lookup(). 316 * @param node 317 * Current node pointer. 318 * @param next 319 * Relative next node index to enqueue objs. 320 * @param obj 321 * Obj to enqueue. 322 */ 323 __rte_experimental 324 static inline void 325 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, 326 rte_edge_t next, void *obj) 327 { 328 node = __rte_node_next_node_get(node, next); 329 uint16_t idx = node->idx; 330 331 __rte_node_enqueue_prologue(graph, node, idx, 1); 332 333 node->objs[idx++] = obj; 334 node->idx = idx; 335 } 336 337 /** 338 * Enqueue only two objs to next node for further processing and 339 * set the next node to pending state in the circular buffer. 340 * Same as rte_node_enqueue_x1 but enqueue two objs. 341 * 342 * @param graph 343 * Graph pointer returned from rte_graph_lookup(). 344 * @param node 345 * Current node pointer. 346 * @param next 347 * Relative next node index to enqueue objs. 348 * @param obj0 349 * Obj to enqueue. 350 * @param obj1 351 * Obj to enqueue. 352 */ 353 __rte_experimental 354 static inline void 355 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, 356 rte_edge_t next, void *obj0, void *obj1) 357 { 358 node = __rte_node_next_node_get(node, next); 359 uint16_t idx = node->idx; 360 361 __rte_node_enqueue_prologue(graph, node, idx, 2); 362 363 node->objs[idx++] = obj0; 364 node->objs[idx++] = obj1; 365 node->idx = idx; 366 } 367 368 /** 369 * Enqueue only four objs to next node for further processing and 370 * set the next node to pending state in the circular buffer. 371 * Same as rte_node_enqueue_x1 but enqueue four objs. 372 * 373 * @param graph 374 * Graph pointer returned from rte_graph_lookup(). 375 * @param node 376 * Current node pointer. 377 * @param next 378 * Relative next node index to enqueue objs. 379 * @param obj0 380 * 1st obj to enqueue. 381 * @param obj1 382 * 2nd obj to enqueue. 383 * @param obj2 384 * 3rd obj to enqueue. 385 * @param obj3 386 * 4th obj to enqueue. 387 */ 388 __rte_experimental 389 static inline void 390 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, 391 rte_edge_t next, void *obj0, void *obj1, void *obj2, 392 void *obj3) 393 { 394 node = __rte_node_next_node_get(node, next); 395 uint16_t idx = node->idx; 396 397 __rte_node_enqueue_prologue(graph, node, idx, 4); 398 399 node->objs[idx++] = obj0; 400 node->objs[idx++] = obj1; 401 node->objs[idx++] = obj2; 402 node->objs[idx++] = obj3; 403 node->idx = idx; 404 } 405 406 /** 407 * Enqueue objs to multiple next nodes for further processing and 408 * set the next nodes to pending state in the circular buffer. 409 * objs[i] will be enqueued to nexts[i]. 410 * 411 * @param graph 412 * Graph pointer returned from rte_graph_lookup(). 413 * @param node 414 * Current node pointer. 415 * @param nexts 416 * List of relative next node indices to enqueue objs. 417 * @param objs 418 * List of objs to enqueue. 419 * @param nb_objs 420 * Number of objs to enqueue. 421 */ 422 __rte_experimental 423 static inline void 424 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, 425 rte_edge_t *nexts, void **objs, uint16_t nb_objs) 426 { 427 uint16_t i; 428 429 for (i = 0; i < nb_objs; i++) 430 rte_node_enqueue_x1(graph, node, nexts[i], objs[i]); 431 } 432 433 /** 434 * Get the stream of next node to enqueue the objs. 435 * Once done with the updating the objs, needs to call 436 * rte_node_next_stream_put to put the next node to pending state. 437 * 438 * @param graph 439 * Graph pointer returned from rte_graph_lookup(). 440 * @param node 441 * Current node pointer. 442 * @param next 443 * Relative next node index to get stream. 444 * @param nb_objs 445 * Requested free size of the next stream. 446 * 447 * @return 448 * Valid next stream on success. 449 * 450 * @see rte_node_next_stream_put(). 451 */ 452 __rte_experimental 453 static inline void ** 454 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, 455 rte_edge_t next, uint16_t nb_objs) 456 { 457 node = __rte_node_next_node_get(node, next); 458 const uint16_t idx = node->idx; 459 uint16_t free_space = node->size - idx; 460 461 if (unlikely(free_space < nb_objs)) 462 __rte_node_stream_alloc_size(graph, node, node->size + nb_objs); 463 464 return &node->objs[idx]; 465 } 466 467 /** 468 * Put the next stream to pending state in the circular buffer 469 * for further processing. Should be invoked after rte_node_next_stream_get(). 470 * 471 * @param graph 472 * Graph pointer returned from rte_graph_lookup(). 473 * @param node 474 * Current node pointer. 475 * @param next 476 * Relative next node index.. 477 * @param idx 478 * Number of objs updated in the stream after getting the stream using 479 * rte_node_next_stream_get. 480 * 481 * @see rte_node_next_stream_get(). 482 */ 483 __rte_experimental 484 static inline void 485 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, 486 rte_edge_t next, uint16_t idx) 487 { 488 if (unlikely(!idx)) 489 return; 490 491 node = __rte_node_next_node_get(node, next); 492 if (node->idx == 0) 493 __rte_node_enqueue_tail_update(graph, node); 494 495 node->idx += idx; 496 } 497 498 /** 499 * Home run scenario, Enqueue all the objs of current node to next 500 * node in optimized way by swapping the streams of both nodes. 501 * Performs good when next node is already not in pending state. 502 * If next node is already in pending state then normal enqueue 503 * will be used. 504 * 505 * @param graph 506 * Graph pointer returned from rte_graph_lookup(). 507 * @param src 508 * Current node pointer. 509 * @param next 510 * Relative next node index. 511 */ 512 __rte_experimental 513 static inline void 514 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, 515 rte_edge_t next) 516 { 517 struct rte_node *dst = __rte_node_next_node_get(src, next); 518 519 /* Let swap the pointers if dst don't have valid objs */ 520 if (likely(dst->idx == 0)) { 521 void **dobjs = dst->objs; 522 uint16_t dsz = dst->size; 523 dst->objs = src->objs; 524 dst->size = src->size; 525 src->objs = dobjs; 526 src->size = dsz; 527 dst->idx = src->idx; 528 __rte_node_enqueue_tail_update(graph, dst); 529 } else { /* Move the objects from src node to dst node */ 530 rte_node_enqueue(graph, src, next, src->objs, src->idx); 531 } 532 } 533 534 /** 535 * Test the validity of model. 536 * 537 * @param model 538 * Model to check. 539 * 540 * @return 541 * True if graph model is valid, false otherwise. 542 */ 543 __rte_experimental 544 bool 545 rte_graph_model_is_valid(uint8_t model); 546 547 /** 548 * @note This function does not perform any locking, and is only safe to call 549 * before graph running. It will set all graphs the same model. 550 * 551 * @param model 552 * Name of the graph worker model. 553 * 554 * @return 555 * 0 on success, -1 otherwise. 556 */ 557 __rte_experimental 558 int rte_graph_worker_model_set(uint8_t model); 559 560 /** 561 * Get the graph worker model 562 * 563 * @note All graph will use the same model and this function will get model from the first one. 564 * Used for slow path. 565 * 566 * @param graph 567 * Graph pointer. 568 * 569 * @return 570 * Graph worker model on success. 571 */ 572 __rte_experimental 573 uint8_t rte_graph_worker_model_get(struct rte_graph *graph); 574 575 /** 576 * Get the graph worker model without check 577 * 578 * @note All graph will use the same model and this function will get model from the first one. 579 * Used for fast path. 580 * 581 * @param graph 582 * Graph pointer. 583 * 584 * @return 585 * Graph worker model on success. 586 */ 587 __rte_experimental 588 static __rte_always_inline 589 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph) 590 { 591 return graph->model; 592 } 593 594 #ifdef __cplusplus 595 } 596 #endif 597 598 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */ 599