1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_ 6 #define _RTE_GRAPH_WORKER_COMMON_H_ 7 8 /** 9 * @file rte_graph_worker_common.h 10 * 11 * This API allows a worker thread to walk over a graph and nodes to create, 12 * process, enqueue and move streams of objects to the next nodes. 13 */ 14 15 #include <assert.h> 16 #include <stdalign.h> 17 #include <stddef.h> 18 19 #include <rte_common.h> 20 #include <rte_cycles.h> 21 #include <rte_prefetch.h> 22 #include <rte_memcpy.h> 23 #include <rte_memory.h> 24 25 #include "rte_graph.h" 26 27 #ifdef __cplusplus 28 extern "C" { 29 #endif 30 31 /** Graph worker models */ 32 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */ 33 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */ 34 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1 35 /**< Dispatch model to support cross-core dispatching within core affinity. */ 36 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */ 37 38 /** 39 * @internal 40 * 41 * Singly-linked list head for graph schedule run-queue. 42 */ 43 SLIST_HEAD(rte_graph_rq_head, rte_graph); 44 45 /** 46 * @internal 47 * 48 * Data structure to hold graph data. 49 */ 50 struct __rte_cache_aligned rte_graph { 51 /* Fast path area. */ 52 uint32_t tail; /**< Tail of circular buffer. */ 53 uint32_t head; /**< Head of circular buffer. */ 54 uint32_t cir_mask; /**< Circular buffer wrap around mask. */ 55 rte_node_t nb_nodes; /**< Number of nodes in the graph. */ 56 rte_graph_off_t *cir_start; /**< Pointer to circular buffer. */ 57 rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */ 58 uint8_t model; /**< graph model */ 59 uint8_t reserved1; /**< Reserved for future use. */ 60 uint16_t reserved2; /**< Reserved for future use. */ 61 union { 62 /* Fast schedule area for mcore dispatch model */ 63 struct { 64 alignas(RTE_CACHE_LINE_SIZE) struct rte_graph_rq_head *rq; 65 /* The run-queue */ 66 struct rte_graph_rq_head rq_head; /* The head for run-queue list */ 67 68 unsigned int lcore_id; /**< The graph running Lcore. */ 69 struct rte_ring *wq; /**< The work-queue for pending streams. */ 70 struct rte_mempool *mp; /**< The mempool for scheduling streams. */ 71 } dispatch; /** Only used by dispatch model */ 72 }; 73 SLIST_ENTRY(rte_graph) next; /* The next for rte_graph list */ 74 /* End of Fast path area.*/ 75 rte_graph_t id; /**< Graph identifier. */ 76 int socket; /**< Socket ID where memory is allocated. */ 77 char name[RTE_GRAPH_NAMESIZE]; /**< Name of the graph. */ 78 bool pcap_enable; /**< Pcap trace enabled. */ 79 /** Number of packets captured per core. */ 80 uint64_t nb_pkt_captured; 81 /** Number of packets to capture per core. */ 82 uint64_t nb_pkt_to_capture; 83 char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ]; /**< Pcap filename. */ 84 uint64_t fence; /**< Fence. */ 85 }; 86 87 /** 88 * @internal 89 * 90 * Data structure to hold node data. 91 */ 92 struct __rte_cache_aligned rte_node { 93 /* Slow path area */ 94 uint64_t fence; /**< Fence. */ 95 rte_graph_off_t next; /**< Index to next node. */ 96 rte_node_t id; /**< Node identifier. */ 97 rte_node_t parent_id; /**< Parent Node identifier. */ 98 rte_edge_t nb_edges; /**< Number of edges from this node. */ 99 uint32_t realloc_count; /**< Number of times realloced. */ 100 101 char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */ 102 char name[RTE_NODE_NAMESIZE]; /**< Name of the node. */ 103 104 /** Original process function when pcap is enabled. */ 105 rte_node_process_t original_process; 106 107 /** Fast schedule area for mcore dispatch model. */ 108 union { 109 alignas(RTE_CACHE_LINE_MIN_SIZE) struct { 110 unsigned int lcore_id; /**< Node running lcore. */ 111 uint64_t total_sched_objs; /**< Number of objects scheduled. */ 112 uint64_t total_sched_fail; /**< Number of scheduled failure. */ 113 } dispatch; 114 }; 115 116 /** Fast path area cache line 1. */ 117 alignas(RTE_CACHE_LINE_MIN_SIZE) 118 rte_graph_off_t xstat_off; /**< Offset to xstat counters. */ 119 120 /** Fast path area cache line 2. */ 121 __extension__ struct __rte_cache_aligned { 122 #define RTE_NODE_CTX_SZ 16 123 union { 124 uint8_t ctx[RTE_NODE_CTX_SZ]; 125 __extension__ struct { 126 void *ctx_ptr; 127 void *ctx_ptr2; 128 }; 129 }; /**< Node Context. */ 130 uint16_t size; /**< Total number of objects available. */ 131 uint16_t idx; /**< Number of objects used. */ 132 rte_graph_off_t off; /**< Offset of node in the graph reel. */ 133 uint64_t total_cycles; /**< Cycles spent in this node. */ 134 uint64_t total_calls; /**< Calls done to this node. */ 135 uint64_t total_objs; /**< Objects processed by this node. */ 136 union { 137 void **objs; /**< Array of object pointers. */ 138 uint64_t objs_u64; 139 }; 140 union { 141 rte_node_process_t process; /**< Process function. */ 142 uint64_t process_u64; 143 }; 144 alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */ 145 }; 146 }; 147 148 static_assert(offsetof(struct rte_node, nodes) - offsetof(struct rte_node, ctx) 149 == RTE_CACHE_LINE_MIN_SIZE, "rte_node fast path area must fit in 64 bytes"); 150 151 /** 152 * @internal 153 * 154 * Allocate a stream of objects. 155 * 156 * If stream already exists then re-allocate it to a larger size. 157 * 158 * @param graph 159 * Pointer to the graph object. 160 * @param node 161 * Pointer to the node object. 162 */ 163 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node); 164 165 /** 166 * @internal 167 * 168 * Allocate a stream with requested number of objects. 169 * 170 * If stream already exists then re-allocate it to a larger size. 171 * 172 * @param graph 173 * Pointer to the graph object. 174 * @param node 175 * Pointer to the node object. 176 * @param req_size 177 * Number of objects to be allocated. 178 */ 179 void __rte_node_stream_alloc_size(struct rte_graph *graph, 180 struct rte_node *node, uint16_t req_size); 181 182 /* Fast path helper functions */ 183 184 /** 185 * @internal 186 * 187 * Enqueue a given node to the tail of the graph reel. 188 * 189 * @param graph 190 * Pointer Graph object. 191 * @param node 192 * Pointer to node object to be enqueued. 193 */ 194 static __rte_always_inline void 195 __rte_node_process(struct rte_graph *graph, struct rte_node *node) 196 { 197 uint64_t start; 198 uint16_t rc; 199 void **objs; 200 201 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 202 objs = node->objs; 203 rte_prefetch0(objs); 204 205 if (rte_graph_has_stats_feature()) { 206 start = rte_rdtsc(); 207 rc = node->process(graph, node, objs, node->idx); 208 node->total_cycles += rte_rdtsc() - start; 209 node->total_calls++; 210 node->total_objs += rc; 211 } else { 212 node->process(graph, node, objs, node->idx); 213 } 214 node->idx = 0; 215 } 216 217 /** 218 * @internal 219 * 220 * Enqueue a given node to the tail of the graph reel. 221 * 222 * @param graph 223 * Pointer Graph object. 224 * @param node 225 * Pointer to node object to be enqueued. 226 */ 227 static __rte_always_inline void 228 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node) 229 { 230 uint32_t tail; 231 232 tail = graph->tail; 233 graph->cir_start[tail++] = node->off; 234 graph->tail = tail & graph->cir_mask; 235 } 236 237 /** 238 * @internal 239 * 240 * Enqueue sequence prologue function. 241 * 242 * Updates the node to tail of graph reel and resizes the number of objects 243 * available in the stream as needed. 244 * 245 * @param graph 246 * Pointer to the graph object. 247 * @param node 248 * Pointer to the node object. 249 * @param idx 250 * Index at which the object enqueue starts from. 251 * @param space 252 * Space required for the object enqueue. 253 */ 254 static __rte_always_inline void 255 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node, 256 const uint16_t idx, const uint16_t space) 257 { 258 259 /* Add to the pending stream list if the node is new */ 260 if (idx == 0) 261 __rte_node_enqueue_tail_update(graph, node); 262 263 if (unlikely(node->size < (idx + space))) 264 __rte_node_stream_alloc_size(graph, node, node->size + space); 265 } 266 267 /** 268 * @internal 269 * 270 * Get the node pointer from current node edge id. 271 * 272 * @param node 273 * Current node pointer. 274 * @param next 275 * Edge id of the required node. 276 * 277 * @return 278 * Pointer to the node denoted by the edge id. 279 */ 280 static __rte_always_inline struct rte_node * 281 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next) 282 { 283 RTE_ASSERT(next < node->nb_edges); 284 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 285 node = node->nodes[next]; 286 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 287 288 return node; 289 } 290 291 /** 292 * Enqueue the objs to next node for further processing and set 293 * the next node to pending state in the circular buffer. 294 * 295 * @param graph 296 * Graph pointer returned from rte_graph_lookup(). 297 * @param node 298 * Current node pointer. 299 * @param next 300 * Relative next node index to enqueue objs. 301 * @param objs 302 * Objs to enqueue. 303 * @param nb_objs 304 * Number of objs to enqueue. 305 */ 306 static inline void 307 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, 308 rte_edge_t next, void **objs, uint16_t nb_objs) 309 { 310 node = __rte_node_next_node_get(node, next); 311 const uint16_t idx = node->idx; 312 313 __rte_node_enqueue_prologue(graph, node, idx, nb_objs); 314 315 rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *)); 316 node->idx = idx + nb_objs; 317 } 318 319 /** 320 * Enqueue only one obj to next node for further processing and 321 * set the next node to pending state in the circular buffer. 322 * 323 * @param graph 324 * Graph pointer returned from rte_graph_lookup(). 325 * @param node 326 * Current node pointer. 327 * @param next 328 * Relative next node index to enqueue objs. 329 * @param obj 330 * Obj to enqueue. 331 */ 332 static inline void 333 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, 334 rte_edge_t next, void *obj) 335 { 336 node = __rte_node_next_node_get(node, next); 337 uint16_t idx = node->idx; 338 339 __rte_node_enqueue_prologue(graph, node, idx, 1); 340 341 node->objs[idx++] = obj; 342 node->idx = idx; 343 } 344 345 /** 346 * Enqueue only two objs to next node for further processing and 347 * set the next node to pending state in the circular buffer. 348 * Same as rte_node_enqueue_x1 but enqueue two objs. 349 * 350 * @param graph 351 * Graph pointer returned from rte_graph_lookup(). 352 * @param node 353 * Current node pointer. 354 * @param next 355 * Relative next node index to enqueue objs. 356 * @param obj0 357 * Obj to enqueue. 358 * @param obj1 359 * Obj to enqueue. 360 */ 361 static inline void 362 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, 363 rte_edge_t next, void *obj0, void *obj1) 364 { 365 node = __rte_node_next_node_get(node, next); 366 uint16_t idx = node->idx; 367 368 __rte_node_enqueue_prologue(graph, node, idx, 2); 369 370 node->objs[idx++] = obj0; 371 node->objs[idx++] = obj1; 372 node->idx = idx; 373 } 374 375 /** 376 * Enqueue only four objs to next node for further processing and 377 * set the next node to pending state in the circular buffer. 378 * Same as rte_node_enqueue_x1 but enqueue four objs. 379 * 380 * @param graph 381 * Graph pointer returned from rte_graph_lookup(). 382 * @param node 383 * Current node pointer. 384 * @param next 385 * Relative next node index to enqueue objs. 386 * @param obj0 387 * 1st obj to enqueue. 388 * @param obj1 389 * 2nd obj to enqueue. 390 * @param obj2 391 * 3rd obj to enqueue. 392 * @param obj3 393 * 4th obj to enqueue. 394 */ 395 static inline void 396 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, 397 rte_edge_t next, void *obj0, void *obj1, void *obj2, 398 void *obj3) 399 { 400 node = __rte_node_next_node_get(node, next); 401 uint16_t idx = node->idx; 402 403 __rte_node_enqueue_prologue(graph, node, idx, 4); 404 405 node->objs[idx++] = obj0; 406 node->objs[idx++] = obj1; 407 node->objs[idx++] = obj2; 408 node->objs[idx++] = obj3; 409 node->idx = idx; 410 } 411 412 /** 413 * Enqueue objs to multiple next nodes for further processing and 414 * set the next nodes to pending state in the circular buffer. 415 * objs[i] will be enqueued to nexts[i]. 416 * 417 * @param graph 418 * Graph pointer returned from rte_graph_lookup(). 419 * @param node 420 * Current node pointer. 421 * @param nexts 422 * List of relative next node indices to enqueue objs. 423 * @param objs 424 * List of objs to enqueue. 425 * @param nb_objs 426 * Number of objs to enqueue. 427 */ 428 static inline void 429 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, 430 rte_edge_t *nexts, void **objs, uint16_t nb_objs) 431 { 432 uint16_t i; 433 434 for (i = 0; i < nb_objs; i++) 435 rte_node_enqueue_x1(graph, node, nexts[i], objs[i]); 436 } 437 438 /** 439 * Get the stream of next node to enqueue the objs. 440 * Once done with the updating the objs, needs to call 441 * rte_node_next_stream_put to put the next node to pending state. 442 * 443 * @param graph 444 * Graph pointer returned from rte_graph_lookup(). 445 * @param node 446 * Current node pointer. 447 * @param next 448 * Relative next node index to get stream. 449 * @param nb_objs 450 * Requested free size of the next stream. 451 * 452 * @return 453 * Valid next stream on success. 454 * 455 * @see rte_node_next_stream_put(). 456 */ 457 static inline void ** 458 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, 459 rte_edge_t next, uint16_t nb_objs) 460 { 461 node = __rte_node_next_node_get(node, next); 462 const uint16_t idx = node->idx; 463 uint16_t free_space = node->size - idx; 464 465 if (unlikely(free_space < nb_objs)) 466 __rte_node_stream_alloc_size(graph, node, node->size + nb_objs); 467 468 return &node->objs[idx]; 469 } 470 471 /** 472 * Put the next stream to pending state in the circular buffer 473 * for further processing. Should be invoked after rte_node_next_stream_get(). 474 * 475 * @param graph 476 * Graph pointer returned from rte_graph_lookup(). 477 * @param node 478 * Current node pointer. 479 * @param next 480 * Relative next node index.. 481 * @param idx 482 * Number of objs updated in the stream after getting the stream using 483 * rte_node_next_stream_get. 484 * 485 * @see rte_node_next_stream_get(). 486 */ 487 static inline void 488 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, 489 rte_edge_t next, uint16_t idx) 490 { 491 if (unlikely(!idx)) 492 return; 493 494 node = __rte_node_next_node_get(node, next); 495 if (node->idx == 0) 496 __rte_node_enqueue_tail_update(graph, node); 497 498 node->idx += idx; 499 } 500 501 /** 502 * Home run scenario, Enqueue all the objs of current node to next 503 * node in optimized way by swapping the streams of both nodes. 504 * Performs good when next node is already not in pending state. 505 * If next node is already in pending state then normal enqueue 506 * will be used. 507 * 508 * @param graph 509 * Graph pointer returned from rte_graph_lookup(). 510 * @param src 511 * Current node pointer. 512 * @param next 513 * Relative next node index. 514 */ 515 static inline void 516 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, 517 rte_edge_t next) 518 { 519 struct rte_node *dst = __rte_node_next_node_get(src, next); 520 521 /* Let swap the pointers if dst don't have valid objs */ 522 if (likely(dst->idx == 0)) { 523 void **dobjs = dst->objs; 524 uint16_t dsz = dst->size; 525 dst->objs = src->objs; 526 dst->size = src->size; 527 src->objs = dobjs; 528 src->size = dsz; 529 dst->idx = src->idx; 530 __rte_node_enqueue_tail_update(graph, dst); 531 } else { /* Move the objects from src node to dst node */ 532 rte_node_enqueue(graph, src, next, src->objs, src->idx); 533 } 534 } 535 536 /** 537 * Test the validity of model. 538 * 539 * @param model 540 * Model to check. 541 * 542 * @return 543 * True if graph model is valid, false otherwise. 544 */ 545 bool 546 rte_graph_model_is_valid(uint8_t model); 547 548 /** 549 * @note This function does not perform any locking, and is only safe to call 550 * before graph running. It will set all graphs the same model. 551 * 552 * @param model 553 * Name of the graph worker model. 554 * 555 * @return 556 * 0 on success, -1 otherwise. 557 */ 558 int rte_graph_worker_model_set(uint8_t model); 559 560 /** 561 * Get the graph worker model 562 * 563 * @note All graph will use the same model and this function will get model from the first one. 564 * Used for slow path. 565 * 566 * @param graph 567 * Graph pointer. 568 * 569 * @return 570 * Graph worker model on success. 571 */ 572 uint8_t rte_graph_worker_model_get(struct rte_graph *graph); 573 574 /** 575 * Get the graph worker model without check 576 * 577 * @note All graph will use the same model and this function will get model from the first one. 578 * Used for fast path. 579 * 580 * @param graph 581 * Graph pointer. 582 * 583 * @return 584 * Graph worker model on success. 585 */ 586 static __rte_always_inline 587 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph) 588 { 589 return graph->model; 590 } 591 592 /** 593 * Increment Node xstat count. 594 * 595 * Increment the count of an xstat for a given node. 596 * 597 * @param node 598 * Pointer to the node. 599 * @param xstat_id 600 * xstat ID. 601 * @param value 602 * Value to increment. 603 */ 604 __rte_experimental 605 static inline void 606 rte_node_xstat_increment(struct rte_node *node, uint16_t xstat_id, uint64_t value) 607 { 608 if (rte_graph_has_stats_feature()) { 609 uint64_t *xstat = (uint64_t *)RTE_PTR_ADD(node, node->xstat_off); 610 xstat[xstat_id] += value; 611 } 612 } 613 614 #ifdef __cplusplus 615 } 616 #endif 617 618 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */ 619