1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_ 6 #define _RTE_GRAPH_WORKER_COMMON_H_ 7 8 /** 9 * @file rte_graph_worker_common.h 10 * 11 * This API allows a worker thread to walk over a graph and nodes to create, 12 * process, enqueue and move streams of objects to the next nodes. 13 */ 14 15 #include <assert.h> 16 #include <stdalign.h> 17 #include <stddef.h> 18 19 #include <rte_common.h> 20 #include <rte_cycles.h> 21 #include <rte_prefetch.h> 22 #include <rte_memcpy.h> 23 #include <rte_memory.h> 24 25 #include "rte_graph.h" 26 27 #ifdef __cplusplus 28 extern "C" { 29 #endif 30 31 /** Graph worker models */ 32 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */ 33 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */ 34 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1 35 /**< Dispatch model to support cross-core dispatching within core affinity. */ 36 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */ 37 38 /** 39 * @internal 40 * 41 * Singly-linked list head for graph schedule run-queue. 42 */ 43 SLIST_HEAD(rte_graph_rq_head, rte_graph); 44 45 /** 46 * @internal 47 * 48 * Data structure to hold graph data. 49 */ 50 struct __rte_cache_aligned rte_graph { 51 /* Fast path area. */ 52 uint32_t tail; /**< Tail of circular buffer. */ 53 uint32_t head; /**< Head of circular buffer. */ 54 uint32_t cir_mask; /**< Circular buffer wrap around mask. */ 55 rte_node_t nb_nodes; /**< Number of nodes in the graph. */ 56 rte_graph_off_t *cir_start; /**< Pointer to circular buffer. */ 57 rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */ 58 uint8_t model; /**< graph model */ 59 uint8_t reserved1; /**< Reserved for future use. */ 60 uint16_t reserved2; /**< Reserved for future use. */ 61 union { 62 /* Fast schedule area for mcore dispatch model */ 63 struct { 64 alignas(RTE_CACHE_LINE_SIZE) struct rte_graph_rq_head *rq; 65 /* The run-queue */ 66 struct rte_graph_rq_head rq_head; /* The head for run-queue list */ 67 68 unsigned int lcore_id; /**< The graph running Lcore. */ 69 struct rte_ring *wq; /**< The work-queue for pending streams. */ 70 struct rte_mempool *mp; /**< The mempool for scheduling streams. */ 71 } dispatch; /** Only used by dispatch model */ 72 }; 73 SLIST_ENTRY(rte_graph) next; /* The next for rte_graph list */ 74 /* End of Fast path area.*/ 75 rte_graph_t id; /**< Graph identifier. */ 76 int socket; /**< Socket ID where memory is allocated. */ 77 char name[RTE_GRAPH_NAMESIZE]; /**< Name of the graph. */ 78 bool pcap_enable; /**< Pcap trace enabled. */ 79 /** Number of packets captured per core. */ 80 uint64_t nb_pkt_captured; 81 /** Number of packets to capture per core. */ 82 uint64_t nb_pkt_to_capture; 83 char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ]; /**< Pcap filename. */ 84 uint64_t fence; /**< Fence. */ 85 }; 86 87 /** 88 * @internal 89 * 90 * Data structure to hold node data. 91 */ 92 struct __rte_cache_aligned rte_node { 93 /* Slow path area */ 94 uint64_t fence; /**< Fence. */ 95 rte_graph_off_t next; /**< Index to next node. */ 96 rte_node_t id; /**< Node identifier. */ 97 rte_node_t parent_id; /**< Parent Node identifier. */ 98 rte_edge_t nb_edges; /**< Number of edges from this node. */ 99 uint32_t realloc_count; /**< Number of times realloced. */ 100 101 char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */ 102 char name[RTE_NODE_NAMESIZE]; /**< Name of the node. */ 103 104 /** Original process function when pcap is enabled. */ 105 rte_node_process_t original_process; 106 107 union { 108 /* Fast schedule area for mcore dispatch model */ 109 struct { 110 unsigned int lcore_id; /**< Node running lcore. */ 111 uint64_t total_sched_objs; /**< Number of objects scheduled. */ 112 uint64_t total_sched_fail; /**< Number of scheduled failure. */ 113 } dispatch; 114 }; 115 /* Fast path area */ 116 __extension__ struct __rte_cache_aligned { 117 #define RTE_NODE_CTX_SZ 16 118 union { 119 uint8_t ctx[RTE_NODE_CTX_SZ]; 120 __extension__ struct { 121 void *ctx_ptr; 122 void *ctx_ptr2; 123 }; 124 }; /**< Node Context. */ 125 uint16_t size; /**< Total number of objects available. */ 126 uint16_t idx; /**< Number of objects used. */ 127 rte_graph_off_t off; /**< Offset of node in the graph reel. */ 128 uint64_t total_cycles; /**< Cycles spent in this node. */ 129 uint64_t total_calls; /**< Calls done to this node. */ 130 uint64_t total_objs; /**< Objects processed by this node. */ 131 union { 132 void **objs; /**< Array of object pointers. */ 133 uint64_t objs_u64; 134 }; 135 union { 136 rte_node_process_t process; /**< Process function. */ 137 uint64_t process_u64; 138 }; 139 alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */ 140 }; 141 }; 142 143 static_assert(offsetof(struct rte_node, nodes) - offsetof(struct rte_node, ctx) 144 == RTE_CACHE_LINE_MIN_SIZE, "rte_node fast path area must fit in 64 bytes"); 145 146 /** 147 * @internal 148 * 149 * Allocate a stream of objects. 150 * 151 * If stream already exists then re-allocate it to a larger size. 152 * 153 * @param graph 154 * Pointer to the graph object. 155 * @param node 156 * Pointer to the node object. 157 */ 158 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node); 159 160 /** 161 * @internal 162 * 163 * Allocate a stream with requested number of objects. 164 * 165 * If stream already exists then re-allocate it to a larger size. 166 * 167 * @param graph 168 * Pointer to the graph object. 169 * @param node 170 * Pointer to the node object. 171 * @param req_size 172 * Number of objects to be allocated. 173 */ 174 void __rte_node_stream_alloc_size(struct rte_graph *graph, 175 struct rte_node *node, uint16_t req_size); 176 177 /* Fast path helper functions */ 178 179 /** 180 * @internal 181 * 182 * Enqueue a given node to the tail of the graph reel. 183 * 184 * @param graph 185 * Pointer Graph object. 186 * @param node 187 * Pointer to node object to be enqueued. 188 */ 189 static __rte_always_inline void 190 __rte_node_process(struct rte_graph *graph, struct rte_node *node) 191 { 192 uint64_t start; 193 uint16_t rc; 194 void **objs; 195 196 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 197 objs = node->objs; 198 rte_prefetch0(objs); 199 200 if (rte_graph_has_stats_feature()) { 201 start = rte_rdtsc(); 202 rc = node->process(graph, node, objs, node->idx); 203 node->total_cycles += rte_rdtsc() - start; 204 node->total_calls++; 205 node->total_objs += rc; 206 } else { 207 node->process(graph, node, objs, node->idx); 208 } 209 node->idx = 0; 210 } 211 212 /** 213 * @internal 214 * 215 * Enqueue a given node to the tail of the graph reel. 216 * 217 * @param graph 218 * Pointer Graph object. 219 * @param node 220 * Pointer to node object to be enqueued. 221 */ 222 static __rte_always_inline void 223 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node) 224 { 225 uint32_t tail; 226 227 tail = graph->tail; 228 graph->cir_start[tail++] = node->off; 229 graph->tail = tail & graph->cir_mask; 230 } 231 232 /** 233 * @internal 234 * 235 * Enqueue sequence prologue function. 236 * 237 * Updates the node to tail of graph reel and resizes the number of objects 238 * available in the stream as needed. 239 * 240 * @param graph 241 * Pointer to the graph object. 242 * @param node 243 * Pointer to the node object. 244 * @param idx 245 * Index at which the object enqueue starts from. 246 * @param space 247 * Space required for the object enqueue. 248 */ 249 static __rte_always_inline void 250 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node, 251 const uint16_t idx, const uint16_t space) 252 { 253 254 /* Add to the pending stream list if the node is new */ 255 if (idx == 0) 256 __rte_node_enqueue_tail_update(graph, node); 257 258 if (unlikely(node->size < (idx + space))) 259 __rte_node_stream_alloc_size(graph, node, node->size + space); 260 } 261 262 /** 263 * @internal 264 * 265 * Get the node pointer from current node edge id. 266 * 267 * @param node 268 * Current node pointer. 269 * @param next 270 * Edge id of the required node. 271 * 272 * @return 273 * Pointer to the node denoted by the edge id. 274 */ 275 static __rte_always_inline struct rte_node * 276 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next) 277 { 278 RTE_ASSERT(next < node->nb_edges); 279 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 280 node = node->nodes[next]; 281 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 282 283 return node; 284 } 285 286 /** 287 * Enqueue the objs to next node for further processing and set 288 * the next node to pending state in the circular buffer. 289 * 290 * @param graph 291 * Graph pointer returned from rte_graph_lookup(). 292 * @param node 293 * Current node pointer. 294 * @param next 295 * Relative next node index to enqueue objs. 296 * @param objs 297 * Objs to enqueue. 298 * @param nb_objs 299 * Number of objs to enqueue. 300 */ 301 static inline void 302 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, 303 rte_edge_t next, void **objs, uint16_t nb_objs) 304 { 305 node = __rte_node_next_node_get(node, next); 306 const uint16_t idx = node->idx; 307 308 __rte_node_enqueue_prologue(graph, node, idx, nb_objs); 309 310 rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *)); 311 node->idx = idx + nb_objs; 312 } 313 314 /** 315 * Enqueue only one obj to next node for further processing and 316 * set the next node to pending state in the circular buffer. 317 * 318 * @param graph 319 * Graph pointer returned from rte_graph_lookup(). 320 * @param node 321 * Current node pointer. 322 * @param next 323 * Relative next node index to enqueue objs. 324 * @param obj 325 * Obj to enqueue. 326 */ 327 static inline void 328 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, 329 rte_edge_t next, void *obj) 330 { 331 node = __rte_node_next_node_get(node, next); 332 uint16_t idx = node->idx; 333 334 __rte_node_enqueue_prologue(graph, node, idx, 1); 335 336 node->objs[idx++] = obj; 337 node->idx = idx; 338 } 339 340 /** 341 * Enqueue only two objs to next node for further processing and 342 * set the next node to pending state in the circular buffer. 343 * Same as rte_node_enqueue_x1 but enqueue two objs. 344 * 345 * @param graph 346 * Graph pointer returned from rte_graph_lookup(). 347 * @param node 348 * Current node pointer. 349 * @param next 350 * Relative next node index to enqueue objs. 351 * @param obj0 352 * Obj to enqueue. 353 * @param obj1 354 * Obj to enqueue. 355 */ 356 static inline void 357 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, 358 rte_edge_t next, void *obj0, void *obj1) 359 { 360 node = __rte_node_next_node_get(node, next); 361 uint16_t idx = node->idx; 362 363 __rte_node_enqueue_prologue(graph, node, idx, 2); 364 365 node->objs[idx++] = obj0; 366 node->objs[idx++] = obj1; 367 node->idx = idx; 368 } 369 370 /** 371 * Enqueue only four objs to next node for further processing and 372 * set the next node to pending state in the circular buffer. 373 * Same as rte_node_enqueue_x1 but enqueue four objs. 374 * 375 * @param graph 376 * Graph pointer returned from rte_graph_lookup(). 377 * @param node 378 * Current node pointer. 379 * @param next 380 * Relative next node index to enqueue objs. 381 * @param obj0 382 * 1st obj to enqueue. 383 * @param obj1 384 * 2nd obj to enqueue. 385 * @param obj2 386 * 3rd obj to enqueue. 387 * @param obj3 388 * 4th obj to enqueue. 389 */ 390 static inline void 391 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, 392 rte_edge_t next, void *obj0, void *obj1, void *obj2, 393 void *obj3) 394 { 395 node = __rte_node_next_node_get(node, next); 396 uint16_t idx = node->idx; 397 398 __rte_node_enqueue_prologue(graph, node, idx, 4); 399 400 node->objs[idx++] = obj0; 401 node->objs[idx++] = obj1; 402 node->objs[idx++] = obj2; 403 node->objs[idx++] = obj3; 404 node->idx = idx; 405 } 406 407 /** 408 * Enqueue objs to multiple next nodes for further processing and 409 * set the next nodes to pending state in the circular buffer. 410 * objs[i] will be enqueued to nexts[i]. 411 * 412 * @param graph 413 * Graph pointer returned from rte_graph_lookup(). 414 * @param node 415 * Current node pointer. 416 * @param nexts 417 * List of relative next node indices to enqueue objs. 418 * @param objs 419 * List of objs to enqueue. 420 * @param nb_objs 421 * Number of objs to enqueue. 422 */ 423 static inline void 424 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, 425 rte_edge_t *nexts, void **objs, uint16_t nb_objs) 426 { 427 uint16_t i; 428 429 for (i = 0; i < nb_objs; i++) 430 rte_node_enqueue_x1(graph, node, nexts[i], objs[i]); 431 } 432 433 /** 434 * Get the stream of next node to enqueue the objs. 435 * Once done with the updating the objs, needs to call 436 * rte_node_next_stream_put to put the next node to pending state. 437 * 438 * @param graph 439 * Graph pointer returned from rte_graph_lookup(). 440 * @param node 441 * Current node pointer. 442 * @param next 443 * Relative next node index to get stream. 444 * @param nb_objs 445 * Requested free size of the next stream. 446 * 447 * @return 448 * Valid next stream on success. 449 * 450 * @see rte_node_next_stream_put(). 451 */ 452 static inline void ** 453 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, 454 rte_edge_t next, uint16_t nb_objs) 455 { 456 node = __rte_node_next_node_get(node, next); 457 const uint16_t idx = node->idx; 458 uint16_t free_space = node->size - idx; 459 460 if (unlikely(free_space < nb_objs)) 461 __rte_node_stream_alloc_size(graph, node, node->size + nb_objs); 462 463 return &node->objs[idx]; 464 } 465 466 /** 467 * Put the next stream to pending state in the circular buffer 468 * for further processing. Should be invoked after rte_node_next_stream_get(). 469 * 470 * @param graph 471 * Graph pointer returned from rte_graph_lookup(). 472 * @param node 473 * Current node pointer. 474 * @param next 475 * Relative next node index.. 476 * @param idx 477 * Number of objs updated in the stream after getting the stream using 478 * rte_node_next_stream_get. 479 * 480 * @see rte_node_next_stream_get(). 481 */ 482 static inline void 483 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, 484 rte_edge_t next, uint16_t idx) 485 { 486 if (unlikely(!idx)) 487 return; 488 489 node = __rte_node_next_node_get(node, next); 490 if (node->idx == 0) 491 __rte_node_enqueue_tail_update(graph, node); 492 493 node->idx += idx; 494 } 495 496 /** 497 * Home run scenario, Enqueue all the objs of current node to next 498 * node in optimized way by swapping the streams of both nodes. 499 * Performs good when next node is already not in pending state. 500 * If next node is already in pending state then normal enqueue 501 * will be used. 502 * 503 * @param graph 504 * Graph pointer returned from rte_graph_lookup(). 505 * @param src 506 * Current node pointer. 507 * @param next 508 * Relative next node index. 509 */ 510 static inline void 511 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, 512 rte_edge_t next) 513 { 514 struct rte_node *dst = __rte_node_next_node_get(src, next); 515 516 /* Let swap the pointers if dst don't have valid objs */ 517 if (likely(dst->idx == 0)) { 518 void **dobjs = dst->objs; 519 uint16_t dsz = dst->size; 520 dst->objs = src->objs; 521 dst->size = src->size; 522 src->objs = dobjs; 523 src->size = dsz; 524 dst->idx = src->idx; 525 __rte_node_enqueue_tail_update(graph, dst); 526 } else { /* Move the objects from src node to dst node */ 527 rte_node_enqueue(graph, src, next, src->objs, src->idx); 528 } 529 } 530 531 /** 532 * Test the validity of model. 533 * 534 * @param model 535 * Model to check. 536 * 537 * @return 538 * True if graph model is valid, false otherwise. 539 */ 540 bool 541 rte_graph_model_is_valid(uint8_t model); 542 543 /** 544 * @note This function does not perform any locking, and is only safe to call 545 * before graph running. It will set all graphs the same model. 546 * 547 * @param model 548 * Name of the graph worker model. 549 * 550 * @return 551 * 0 on success, -1 otherwise. 552 */ 553 int rte_graph_worker_model_set(uint8_t model); 554 555 /** 556 * Get the graph worker model 557 * 558 * @note All graph will use the same model and this function will get model from the first one. 559 * Used for slow path. 560 * 561 * @param graph 562 * Graph pointer. 563 * 564 * @return 565 * Graph worker model on success. 566 */ 567 uint8_t rte_graph_worker_model_get(struct rte_graph *graph); 568 569 /** 570 * Get the graph worker model without check 571 * 572 * @note All graph will use the same model and this function will get model from the first one. 573 * Used for fast path. 574 * 575 * @param graph 576 * Graph pointer. 577 * 578 * @return 579 * Graph worker model on success. 580 */ 581 static __rte_always_inline 582 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph) 583 { 584 return graph->model; 585 } 586 587 #ifdef __cplusplus 588 } 589 #endif 590 591 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */ 592