1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_ 6 #define _RTE_GRAPH_WORKER_COMMON_H_ 7 8 /** 9 * @file rte_graph_worker_common.h 10 * 11 * @warning 12 * @b EXPERIMENTAL: 13 * All functions in this file may be changed or removed without prior notice. 14 * 15 * This API allows a worker thread to walk over a graph and nodes to create, 16 * process, enqueue and move streams of objects to the next nodes. 17 */ 18 19 #include <rte_compat.h> 20 #include <rte_common.h> 21 #include <rte_cycles.h> 22 #include <rte_prefetch.h> 23 #include <rte_memcpy.h> 24 #include <rte_memory.h> 25 26 #include "rte_graph.h" 27 28 #ifdef __cplusplus 29 extern "C" { 30 #endif 31 32 /** Graph worker models */ 33 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */ 34 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */ 35 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1 36 /**< Dispatch model to support cross-core dispatching within core affinity. */ 37 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */ 38 39 /** 40 * @internal 41 * 42 * Singly-linked list head for graph schedule run-queue. 43 */ 44 SLIST_HEAD(rte_graph_rq_head, rte_graph); 45 46 /** 47 * @internal 48 * 49 * Data structure to hold graph data. 50 */ 51 struct rte_graph { 52 /* Fast path area. */ 53 uint32_t tail; /**< Tail of circular buffer. */ 54 uint32_t head; /**< Head of circular buffer. */ 55 uint32_t cir_mask; /**< Circular buffer wrap around mask. */ 56 rte_node_t nb_nodes; /**< Number of nodes in the graph. */ 57 rte_graph_off_t *cir_start; /**< Pointer to circular buffer. */ 58 rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */ 59 uint8_t model; /**< graph model */ 60 uint8_t reserved1; /**< Reserved for future use. */ 61 uint16_t reserved2; /**< Reserved for future use. */ 62 union { 63 /* Fast schedule area for mcore dispatch model */ 64 struct { 65 struct rte_graph_rq_head *rq __rte_cache_aligned; /* The run-queue */ 66 struct rte_graph_rq_head rq_head; /* The head for run-queue list */ 67 68 unsigned int lcore_id; /**< The graph running Lcore. */ 69 struct rte_ring *wq; /**< The work-queue for pending streams. */ 70 struct rte_mempool *mp; /**< The mempool for scheduling streams. */ 71 } dispatch; /** Only used by dispatch model */ 72 }; 73 SLIST_ENTRY(rte_graph) next; /* The next for rte_graph list */ 74 /* End of Fast path area.*/ 75 rte_graph_t id; /**< Graph identifier. */ 76 int socket; /**< Socket ID where memory is allocated. */ 77 char name[RTE_GRAPH_NAMESIZE]; /**< Name of the graph. */ 78 bool pcap_enable; /**< Pcap trace enabled. */ 79 /** Number of packets captured per core. */ 80 uint64_t nb_pkt_captured; 81 /** Number of packets to capture per core. */ 82 uint64_t nb_pkt_to_capture; 83 char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ]; /**< Pcap filename. */ 84 uint64_t fence; /**< Fence. */ 85 } __rte_cache_aligned; 86 87 /** 88 * @internal 89 * 90 * Data structure to hold node data. 91 */ 92 struct rte_node { 93 /* Slow path area */ 94 uint64_t fence; /**< Fence. */ 95 rte_graph_off_t next; /**< Index to next node. */ 96 rte_node_t id; /**< Node identifier. */ 97 rte_node_t parent_id; /**< Parent Node identifier. */ 98 rte_edge_t nb_edges; /**< Number of edges from this node. */ 99 uint32_t realloc_count; /**< Number of times realloced. */ 100 101 char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */ 102 char name[RTE_NODE_NAMESIZE]; /**< Name of the node. */ 103 104 /** Original process function when pcap is enabled. */ 105 rte_node_process_t original_process; 106 107 union { 108 /* Fast schedule area for mcore dispatch model */ 109 struct { 110 unsigned int lcore_id; /**< Node running lcore. */ 111 uint64_t total_sched_objs; /**< Number of objects scheduled. */ 112 uint64_t total_sched_fail; /**< Number of scheduled failure. */ 113 } dispatch; 114 }; 115 /* Fast path area */ 116 #define RTE_NODE_CTX_SZ 16 117 uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned; /**< Node Context. */ 118 uint16_t size; /**< Total number of objects available. */ 119 uint16_t idx; /**< Number of objects used. */ 120 rte_graph_off_t off; /**< Offset of node in the graph reel. */ 121 uint64_t total_cycles; /**< Cycles spent in this node. */ 122 uint64_t total_calls; /**< Calls done to this node. */ 123 uint64_t total_objs; /**< Objects processed by this node. */ 124 union { 125 void **objs; /**< Array of object pointers. */ 126 uint64_t objs_u64; 127 }; 128 union { 129 rte_node_process_t process; /**< Process function. */ 130 uint64_t process_u64; 131 }; 132 struct rte_node *nodes[] __rte_cache_min_aligned; /**< Next nodes. */ 133 } __rte_cache_aligned; 134 135 /** 136 * @internal 137 * 138 * Allocate a stream of objects. 139 * 140 * If stream already exists then re-allocate it to a larger size. 141 * 142 * @param graph 143 * Pointer to the graph object. 144 * @param node 145 * Pointer to the node object. 146 */ 147 __rte_experimental 148 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node); 149 150 /** 151 * @internal 152 * 153 * Allocate a stream with requested number of objects. 154 * 155 * If stream already exists then re-allocate it to a larger size. 156 * 157 * @param graph 158 * Pointer to the graph object. 159 * @param node 160 * Pointer to the node object. 161 * @param req_size 162 * Number of objects to be allocated. 163 */ 164 __rte_experimental 165 void __rte_node_stream_alloc_size(struct rte_graph *graph, 166 struct rte_node *node, uint16_t req_size); 167 168 /* Fast path helper functions */ 169 170 /** 171 * @internal 172 * 173 * Enqueue a given node to the tail of the graph reel. 174 * 175 * @param graph 176 * Pointer Graph object. 177 * @param node 178 * Pointer to node object to be enqueued. 179 */ 180 static __rte_always_inline void 181 __rte_node_process(struct rte_graph *graph, struct rte_node *node) 182 { 183 uint64_t start; 184 uint16_t rc; 185 void **objs; 186 187 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 188 objs = node->objs; 189 rte_prefetch0(objs); 190 191 if (rte_graph_has_stats_feature()) { 192 start = rte_rdtsc(); 193 rc = node->process(graph, node, objs, node->idx); 194 node->total_cycles += rte_rdtsc() - start; 195 node->total_calls++; 196 node->total_objs += rc; 197 } else { 198 node->process(graph, node, objs, node->idx); 199 } 200 node->idx = 0; 201 } 202 203 /** 204 * @internal 205 * 206 * Enqueue a given node to the tail of the graph reel. 207 * 208 * @param graph 209 * Pointer Graph object. 210 * @param node 211 * Pointer to node object to be enqueued. 212 */ 213 static __rte_always_inline void 214 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node) 215 { 216 uint32_t tail; 217 218 tail = graph->tail; 219 graph->cir_start[tail++] = node->off; 220 graph->tail = tail & graph->cir_mask; 221 } 222 223 /** 224 * @internal 225 * 226 * Enqueue sequence prologue function. 227 * 228 * Updates the node to tail of graph reel and resizes the number of objects 229 * available in the stream as needed. 230 * 231 * @param graph 232 * Pointer to the graph object. 233 * @param node 234 * Pointer to the node object. 235 * @param idx 236 * Index at which the object enqueue starts from. 237 * @param space 238 * Space required for the object enqueue. 239 */ 240 static __rte_always_inline void 241 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node, 242 const uint16_t idx, const uint16_t space) 243 { 244 245 /* Add to the pending stream list if the node is new */ 246 if (idx == 0) 247 __rte_node_enqueue_tail_update(graph, node); 248 249 if (unlikely(node->size < (idx + space))) 250 __rte_node_stream_alloc_size(graph, node, node->size + space); 251 } 252 253 /** 254 * @internal 255 * 256 * Get the node pointer from current node edge id. 257 * 258 * @param node 259 * Current node pointer. 260 * @param next 261 * Edge id of the required node. 262 * 263 * @return 264 * Pointer to the node denoted by the edge id. 265 */ 266 static __rte_always_inline struct rte_node * 267 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next) 268 { 269 RTE_ASSERT(next < node->nb_edges); 270 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 271 node = node->nodes[next]; 272 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 273 274 return node; 275 } 276 277 /** 278 * Enqueue the objs to next node for further processing and set 279 * the next node to pending state in the circular buffer. 280 * 281 * @param graph 282 * Graph pointer returned from rte_graph_lookup(). 283 * @param node 284 * Current node pointer. 285 * @param next 286 * Relative next node index to enqueue objs. 287 * @param objs 288 * Objs to enqueue. 289 * @param nb_objs 290 * Number of objs to enqueue. 291 */ 292 __rte_experimental 293 static inline void 294 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, 295 rte_edge_t next, void **objs, uint16_t nb_objs) 296 { 297 node = __rte_node_next_node_get(node, next); 298 const uint16_t idx = node->idx; 299 300 __rte_node_enqueue_prologue(graph, node, idx, nb_objs); 301 302 rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *)); 303 node->idx = idx + nb_objs; 304 } 305 306 /** 307 * Enqueue only one obj to next node for further processing and 308 * set the next node to pending state in the circular buffer. 309 * 310 * @param graph 311 * Graph pointer returned from rte_graph_lookup(). 312 * @param node 313 * Current node pointer. 314 * @param next 315 * Relative next node index to enqueue objs. 316 * @param obj 317 * Obj to enqueue. 318 */ 319 __rte_experimental 320 static inline void 321 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, 322 rte_edge_t next, void *obj) 323 { 324 node = __rte_node_next_node_get(node, next); 325 uint16_t idx = node->idx; 326 327 __rte_node_enqueue_prologue(graph, node, idx, 1); 328 329 node->objs[idx++] = obj; 330 node->idx = idx; 331 } 332 333 /** 334 * Enqueue only two objs to next node for further processing and 335 * set the next node to pending state in the circular buffer. 336 * Same as rte_node_enqueue_x1 but enqueue two objs. 337 * 338 * @param graph 339 * Graph pointer returned from rte_graph_lookup(). 340 * @param node 341 * Current node pointer. 342 * @param next 343 * Relative next node index to enqueue objs. 344 * @param obj0 345 * Obj to enqueue. 346 * @param obj1 347 * Obj to enqueue. 348 */ 349 __rte_experimental 350 static inline void 351 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, 352 rte_edge_t next, void *obj0, void *obj1) 353 { 354 node = __rte_node_next_node_get(node, next); 355 uint16_t idx = node->idx; 356 357 __rte_node_enqueue_prologue(graph, node, idx, 2); 358 359 node->objs[idx++] = obj0; 360 node->objs[idx++] = obj1; 361 node->idx = idx; 362 } 363 364 /** 365 * Enqueue only four objs to next node for further processing and 366 * set the next node to pending state in the circular buffer. 367 * Same as rte_node_enqueue_x1 but enqueue four objs. 368 * 369 * @param graph 370 * Graph pointer returned from rte_graph_lookup(). 371 * @param node 372 * Current node pointer. 373 * @param next 374 * Relative next node index to enqueue objs. 375 * @param obj0 376 * 1st obj to enqueue. 377 * @param obj1 378 * 2nd obj to enqueue. 379 * @param obj2 380 * 3rd obj to enqueue. 381 * @param obj3 382 * 4th obj to enqueue. 383 */ 384 __rte_experimental 385 static inline void 386 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, 387 rte_edge_t next, void *obj0, void *obj1, void *obj2, 388 void *obj3) 389 { 390 node = __rte_node_next_node_get(node, next); 391 uint16_t idx = node->idx; 392 393 __rte_node_enqueue_prologue(graph, node, idx, 4); 394 395 node->objs[idx++] = obj0; 396 node->objs[idx++] = obj1; 397 node->objs[idx++] = obj2; 398 node->objs[idx++] = obj3; 399 node->idx = idx; 400 } 401 402 /** 403 * Enqueue objs to multiple next nodes for further processing and 404 * set the next nodes to pending state in the circular buffer. 405 * objs[i] will be enqueued to nexts[i]. 406 * 407 * @param graph 408 * Graph pointer returned from rte_graph_lookup(). 409 * @param node 410 * Current node pointer. 411 * @param nexts 412 * List of relative next node indices to enqueue objs. 413 * @param objs 414 * List of objs to enqueue. 415 * @param nb_objs 416 * Number of objs to enqueue. 417 */ 418 __rte_experimental 419 static inline void 420 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, 421 rte_edge_t *nexts, void **objs, uint16_t nb_objs) 422 { 423 uint16_t i; 424 425 for (i = 0; i < nb_objs; i++) 426 rte_node_enqueue_x1(graph, node, nexts[i], objs[i]); 427 } 428 429 /** 430 * Get the stream of next node to enqueue the objs. 431 * Once done with the updating the objs, needs to call 432 * rte_node_next_stream_put to put the next node to pending state. 433 * 434 * @param graph 435 * Graph pointer returned from rte_graph_lookup(). 436 * @param node 437 * Current node pointer. 438 * @param next 439 * Relative next node index to get stream. 440 * @param nb_objs 441 * Requested free size of the next stream. 442 * 443 * @return 444 * Valid next stream on success. 445 * 446 * @see rte_node_next_stream_put(). 447 */ 448 __rte_experimental 449 static inline void ** 450 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, 451 rte_edge_t next, uint16_t nb_objs) 452 { 453 node = __rte_node_next_node_get(node, next); 454 const uint16_t idx = node->idx; 455 uint16_t free_space = node->size - idx; 456 457 if (unlikely(free_space < nb_objs)) 458 __rte_node_stream_alloc_size(graph, node, node->size + nb_objs); 459 460 return &node->objs[idx]; 461 } 462 463 /** 464 * Put the next stream to pending state in the circular buffer 465 * for further processing. Should be invoked after rte_node_next_stream_get(). 466 * 467 * @param graph 468 * Graph pointer returned from rte_graph_lookup(). 469 * @param node 470 * Current node pointer. 471 * @param next 472 * Relative next node index.. 473 * @param idx 474 * Number of objs updated in the stream after getting the stream using 475 * rte_node_next_stream_get. 476 * 477 * @see rte_node_next_stream_get(). 478 */ 479 __rte_experimental 480 static inline void 481 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, 482 rte_edge_t next, uint16_t idx) 483 { 484 if (unlikely(!idx)) 485 return; 486 487 node = __rte_node_next_node_get(node, next); 488 if (node->idx == 0) 489 __rte_node_enqueue_tail_update(graph, node); 490 491 node->idx += idx; 492 } 493 494 /** 495 * Home run scenario, Enqueue all the objs of current node to next 496 * node in optimized way by swapping the streams of both nodes. 497 * Performs good when next node is already not in pending state. 498 * If next node is already in pending state then normal enqueue 499 * will be used. 500 * 501 * @param graph 502 * Graph pointer returned from rte_graph_lookup(). 503 * @param src 504 * Current node pointer. 505 * @param next 506 * Relative next node index. 507 */ 508 __rte_experimental 509 static inline void 510 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, 511 rte_edge_t next) 512 { 513 struct rte_node *dst = __rte_node_next_node_get(src, next); 514 515 /* Let swap the pointers if dst don't have valid objs */ 516 if (likely(dst->idx == 0)) { 517 void **dobjs = dst->objs; 518 uint16_t dsz = dst->size; 519 dst->objs = src->objs; 520 dst->size = src->size; 521 src->objs = dobjs; 522 src->size = dsz; 523 dst->idx = src->idx; 524 __rte_node_enqueue_tail_update(graph, dst); 525 } else { /* Move the objects from src node to dst node */ 526 rte_node_enqueue(graph, src, next, src->objs, src->idx); 527 } 528 } 529 530 /** 531 * Test the validity of model. 532 * 533 * @param model 534 * Model to check. 535 * 536 * @return 537 * True if graph model is valid, false otherwise. 538 */ 539 __rte_experimental 540 bool 541 rte_graph_model_is_valid(uint8_t model); 542 543 /** 544 * @note This function does not perform any locking, and is only safe to call 545 * before graph running. It will set all graphs the same model. 546 * 547 * @param model 548 * Name of the graph worker model. 549 * 550 * @return 551 * 0 on success, -1 otherwise. 552 */ 553 __rte_experimental 554 int rte_graph_worker_model_set(uint8_t model); 555 556 /** 557 * Get the graph worker model 558 * 559 * @note All graph will use the same model and this function will get model from the first one. 560 * Used for slow path. 561 * 562 * @param graph 563 * Graph pointer. 564 * 565 * @return 566 * Graph worker model on success. 567 */ 568 __rte_experimental 569 uint8_t rte_graph_worker_model_get(struct rte_graph *graph); 570 571 /** 572 * Get the graph worker model without check 573 * 574 * @note All graph will use the same model and this function will get model from the first one. 575 * Used for fast path. 576 * 577 * @param graph 578 * Graph pointer. 579 * 580 * @return 581 * Graph worker model on success. 582 */ 583 __rte_experimental 584 static __rte_always_inline 585 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph) 586 { 587 return graph->model; 588 } 589 590 #ifdef __cplusplus 591 } 592 #endif 593 594 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */ 595