1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #ifndef _RTE_GRAPH_WORKER_COMMON_H_ 6 #define _RTE_GRAPH_WORKER_COMMON_H_ 7 8 /** 9 * @file rte_graph_worker_common.h 10 * 11 * This API allows a worker thread to walk over a graph and nodes to create, 12 * process, enqueue and move streams of objects to the next nodes. 13 */ 14 15 #include <stdalign.h> 16 17 #include <rte_common.h> 18 #include <rte_cycles.h> 19 #include <rte_prefetch.h> 20 #include <rte_memcpy.h> 21 #include <rte_memory.h> 22 23 #include "rte_graph.h" 24 25 #ifdef __cplusplus 26 extern "C" { 27 #endif 28 29 /** Graph worker models */ 30 /* When adding a new graph model entry, update rte_graph_model_is_valid() implementation. */ 31 #define RTE_GRAPH_MODEL_RTC 0 /**< Run-To-Completion model. It is the default model. */ 32 #define RTE_GRAPH_MODEL_MCORE_DISPATCH 1 33 /**< Dispatch model to support cross-core dispatching within core affinity. */ 34 #define RTE_GRAPH_MODEL_DEFAULT RTE_GRAPH_MODEL_RTC /**< Default graph model. */ 35 36 /** 37 * @internal 38 * 39 * Singly-linked list head for graph schedule run-queue. 40 */ 41 SLIST_HEAD(rte_graph_rq_head, rte_graph); 42 43 /** 44 * @internal 45 * 46 * Data structure to hold graph data. 47 */ 48 struct __rte_cache_aligned rte_graph { 49 /* Fast path area. */ 50 uint32_t tail; /**< Tail of circular buffer. */ 51 uint32_t head; /**< Head of circular buffer. */ 52 uint32_t cir_mask; /**< Circular buffer wrap around mask. */ 53 rte_node_t nb_nodes; /**< Number of nodes in the graph. */ 54 rte_graph_off_t *cir_start; /**< Pointer to circular buffer. */ 55 rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */ 56 uint8_t model; /**< graph model */ 57 uint8_t reserved1; /**< Reserved for future use. */ 58 uint16_t reserved2; /**< Reserved for future use. */ 59 union { 60 /* Fast schedule area for mcore dispatch model */ 61 struct { 62 alignas(RTE_CACHE_LINE_SIZE) struct rte_graph_rq_head *rq; 63 /* The run-queue */ 64 struct rte_graph_rq_head rq_head; /* The head for run-queue list */ 65 66 unsigned int lcore_id; /**< The graph running Lcore. */ 67 struct rte_ring *wq; /**< The work-queue for pending streams. */ 68 struct rte_mempool *mp; /**< The mempool for scheduling streams. */ 69 } dispatch; /** Only used by dispatch model */ 70 }; 71 SLIST_ENTRY(rte_graph) next; /* The next for rte_graph list */ 72 /* End of Fast path area.*/ 73 rte_graph_t id; /**< Graph identifier. */ 74 int socket; /**< Socket ID where memory is allocated. */ 75 char name[RTE_GRAPH_NAMESIZE]; /**< Name of the graph. */ 76 bool pcap_enable; /**< Pcap trace enabled. */ 77 /** Number of packets captured per core. */ 78 uint64_t nb_pkt_captured; 79 /** Number of packets to capture per core. */ 80 uint64_t nb_pkt_to_capture; 81 char pcap_filename[RTE_GRAPH_PCAP_FILE_SZ]; /**< Pcap filename. */ 82 uint64_t fence; /**< Fence. */ 83 }; 84 85 /** 86 * @internal 87 * 88 * Data structure to hold node data. 89 */ 90 struct __rte_cache_aligned rte_node { 91 /* Slow path area */ 92 uint64_t fence; /**< Fence. */ 93 rte_graph_off_t next; /**< Index to next node. */ 94 rte_node_t id; /**< Node identifier. */ 95 rte_node_t parent_id; /**< Parent Node identifier. */ 96 rte_edge_t nb_edges; /**< Number of edges from this node. */ 97 uint32_t realloc_count; /**< Number of times realloced. */ 98 99 char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */ 100 char name[RTE_NODE_NAMESIZE]; /**< Name of the node. */ 101 102 /** Original process function when pcap is enabled. */ 103 rte_node_process_t original_process; 104 105 union { 106 /* Fast schedule area for mcore dispatch model */ 107 struct { 108 unsigned int lcore_id; /**< Node running lcore. */ 109 uint64_t total_sched_objs; /**< Number of objects scheduled. */ 110 uint64_t total_sched_fail; /**< Number of scheduled failure. */ 111 } dispatch; 112 }; 113 /* Fast path area */ 114 #define RTE_NODE_CTX_SZ 16 115 alignas(RTE_CACHE_LINE_SIZE) uint8_t ctx[RTE_NODE_CTX_SZ]; /**< Node Context. */ 116 uint16_t size; /**< Total number of objects available. */ 117 uint16_t idx; /**< Number of objects used. */ 118 rte_graph_off_t off; /**< Offset of node in the graph reel. */ 119 uint64_t total_cycles; /**< Cycles spent in this node. */ 120 uint64_t total_calls; /**< Calls done to this node. */ 121 uint64_t total_objs; /**< Objects processed by this node. */ 122 union { 123 void **objs; /**< Array of object pointers. */ 124 uint64_t objs_u64; 125 }; 126 union { 127 rte_node_process_t process; /**< Process function. */ 128 uint64_t process_u64; 129 }; 130 alignas(RTE_CACHE_LINE_MIN_SIZE) struct rte_node *nodes[]; /**< Next nodes. */ 131 }; 132 133 /** 134 * @internal 135 * 136 * Allocate a stream of objects. 137 * 138 * If stream already exists then re-allocate it to a larger size. 139 * 140 * @param graph 141 * Pointer to the graph object. 142 * @param node 143 * Pointer to the node object. 144 */ 145 void __rte_node_stream_alloc(struct rte_graph *graph, struct rte_node *node); 146 147 /** 148 * @internal 149 * 150 * Allocate a stream with requested number of objects. 151 * 152 * If stream already exists then re-allocate it to a larger size. 153 * 154 * @param graph 155 * Pointer to the graph object. 156 * @param node 157 * Pointer to the node object. 158 * @param req_size 159 * Number of objects to be allocated. 160 */ 161 void __rte_node_stream_alloc_size(struct rte_graph *graph, 162 struct rte_node *node, uint16_t req_size); 163 164 /* Fast path helper functions */ 165 166 /** 167 * @internal 168 * 169 * Enqueue a given node to the tail of the graph reel. 170 * 171 * @param graph 172 * Pointer Graph object. 173 * @param node 174 * Pointer to node object to be enqueued. 175 */ 176 static __rte_always_inline void 177 __rte_node_process(struct rte_graph *graph, struct rte_node *node) 178 { 179 uint64_t start; 180 uint16_t rc; 181 void **objs; 182 183 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 184 objs = node->objs; 185 rte_prefetch0(objs); 186 187 if (rte_graph_has_stats_feature()) { 188 start = rte_rdtsc(); 189 rc = node->process(graph, node, objs, node->idx); 190 node->total_cycles += rte_rdtsc() - start; 191 node->total_calls++; 192 node->total_objs += rc; 193 } else { 194 node->process(graph, node, objs, node->idx); 195 } 196 node->idx = 0; 197 } 198 199 /** 200 * @internal 201 * 202 * Enqueue a given node to the tail of the graph reel. 203 * 204 * @param graph 205 * Pointer Graph object. 206 * @param node 207 * Pointer to node object to be enqueued. 208 */ 209 static __rte_always_inline void 210 __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node) 211 { 212 uint32_t tail; 213 214 tail = graph->tail; 215 graph->cir_start[tail++] = node->off; 216 graph->tail = tail & graph->cir_mask; 217 } 218 219 /** 220 * @internal 221 * 222 * Enqueue sequence prologue function. 223 * 224 * Updates the node to tail of graph reel and resizes the number of objects 225 * available in the stream as needed. 226 * 227 * @param graph 228 * Pointer to the graph object. 229 * @param node 230 * Pointer to the node object. 231 * @param idx 232 * Index at which the object enqueue starts from. 233 * @param space 234 * Space required for the object enqueue. 235 */ 236 static __rte_always_inline void 237 __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node, 238 const uint16_t idx, const uint16_t space) 239 { 240 241 /* Add to the pending stream list if the node is new */ 242 if (idx == 0) 243 __rte_node_enqueue_tail_update(graph, node); 244 245 if (unlikely(node->size < (idx + space))) 246 __rte_node_stream_alloc_size(graph, node, node->size + space); 247 } 248 249 /** 250 * @internal 251 * 252 * Get the node pointer from current node edge id. 253 * 254 * @param node 255 * Current node pointer. 256 * @param next 257 * Edge id of the required node. 258 * 259 * @return 260 * Pointer to the node denoted by the edge id. 261 */ 262 static __rte_always_inline struct rte_node * 263 __rte_node_next_node_get(struct rte_node *node, rte_edge_t next) 264 { 265 RTE_ASSERT(next < node->nb_edges); 266 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 267 node = node->nodes[next]; 268 RTE_ASSERT(node->fence == RTE_GRAPH_FENCE); 269 270 return node; 271 } 272 273 /** 274 * Enqueue the objs to next node for further processing and set 275 * the next node to pending state in the circular buffer. 276 * 277 * @param graph 278 * Graph pointer returned from rte_graph_lookup(). 279 * @param node 280 * Current node pointer. 281 * @param next 282 * Relative next node index to enqueue objs. 283 * @param objs 284 * Objs to enqueue. 285 * @param nb_objs 286 * Number of objs to enqueue. 287 */ 288 static inline void 289 rte_node_enqueue(struct rte_graph *graph, struct rte_node *node, 290 rte_edge_t next, void **objs, uint16_t nb_objs) 291 { 292 node = __rte_node_next_node_get(node, next); 293 const uint16_t idx = node->idx; 294 295 __rte_node_enqueue_prologue(graph, node, idx, nb_objs); 296 297 rte_memcpy(&node->objs[idx], objs, nb_objs * sizeof(void *)); 298 node->idx = idx + nb_objs; 299 } 300 301 /** 302 * Enqueue only one obj to next node for further processing and 303 * set the next node to pending state in the circular buffer. 304 * 305 * @param graph 306 * Graph pointer returned from rte_graph_lookup(). 307 * @param node 308 * Current node pointer. 309 * @param next 310 * Relative next node index to enqueue objs. 311 * @param obj 312 * Obj to enqueue. 313 */ 314 static inline void 315 rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node, 316 rte_edge_t next, void *obj) 317 { 318 node = __rte_node_next_node_get(node, next); 319 uint16_t idx = node->idx; 320 321 __rte_node_enqueue_prologue(graph, node, idx, 1); 322 323 node->objs[idx++] = obj; 324 node->idx = idx; 325 } 326 327 /** 328 * Enqueue only two objs to next node for further processing and 329 * set the next node to pending state in the circular buffer. 330 * Same as rte_node_enqueue_x1 but enqueue two objs. 331 * 332 * @param graph 333 * Graph pointer returned from rte_graph_lookup(). 334 * @param node 335 * Current node pointer. 336 * @param next 337 * Relative next node index to enqueue objs. 338 * @param obj0 339 * Obj to enqueue. 340 * @param obj1 341 * Obj to enqueue. 342 */ 343 static inline void 344 rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node, 345 rte_edge_t next, void *obj0, void *obj1) 346 { 347 node = __rte_node_next_node_get(node, next); 348 uint16_t idx = node->idx; 349 350 __rte_node_enqueue_prologue(graph, node, idx, 2); 351 352 node->objs[idx++] = obj0; 353 node->objs[idx++] = obj1; 354 node->idx = idx; 355 } 356 357 /** 358 * Enqueue only four objs to next node for further processing and 359 * set the next node to pending state in the circular buffer. 360 * Same as rte_node_enqueue_x1 but enqueue four objs. 361 * 362 * @param graph 363 * Graph pointer returned from rte_graph_lookup(). 364 * @param node 365 * Current node pointer. 366 * @param next 367 * Relative next node index to enqueue objs. 368 * @param obj0 369 * 1st obj to enqueue. 370 * @param obj1 371 * 2nd obj to enqueue. 372 * @param obj2 373 * 3rd obj to enqueue. 374 * @param obj3 375 * 4th obj to enqueue. 376 */ 377 static inline void 378 rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node, 379 rte_edge_t next, void *obj0, void *obj1, void *obj2, 380 void *obj3) 381 { 382 node = __rte_node_next_node_get(node, next); 383 uint16_t idx = node->idx; 384 385 __rte_node_enqueue_prologue(graph, node, idx, 4); 386 387 node->objs[idx++] = obj0; 388 node->objs[idx++] = obj1; 389 node->objs[idx++] = obj2; 390 node->objs[idx++] = obj3; 391 node->idx = idx; 392 } 393 394 /** 395 * Enqueue objs to multiple next nodes for further processing and 396 * set the next nodes to pending state in the circular buffer. 397 * objs[i] will be enqueued to nexts[i]. 398 * 399 * @param graph 400 * Graph pointer returned from rte_graph_lookup(). 401 * @param node 402 * Current node pointer. 403 * @param nexts 404 * List of relative next node indices to enqueue objs. 405 * @param objs 406 * List of objs to enqueue. 407 * @param nb_objs 408 * Number of objs to enqueue. 409 */ 410 static inline void 411 rte_node_enqueue_next(struct rte_graph *graph, struct rte_node *node, 412 rte_edge_t *nexts, void **objs, uint16_t nb_objs) 413 { 414 uint16_t i; 415 416 for (i = 0; i < nb_objs; i++) 417 rte_node_enqueue_x1(graph, node, nexts[i], objs[i]); 418 } 419 420 /** 421 * Get the stream of next node to enqueue the objs. 422 * Once done with the updating the objs, needs to call 423 * rte_node_next_stream_put to put the next node to pending state. 424 * 425 * @param graph 426 * Graph pointer returned from rte_graph_lookup(). 427 * @param node 428 * Current node pointer. 429 * @param next 430 * Relative next node index to get stream. 431 * @param nb_objs 432 * Requested free size of the next stream. 433 * 434 * @return 435 * Valid next stream on success. 436 * 437 * @see rte_node_next_stream_put(). 438 */ 439 static inline void ** 440 rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node, 441 rte_edge_t next, uint16_t nb_objs) 442 { 443 node = __rte_node_next_node_get(node, next); 444 const uint16_t idx = node->idx; 445 uint16_t free_space = node->size - idx; 446 447 if (unlikely(free_space < nb_objs)) 448 __rte_node_stream_alloc_size(graph, node, node->size + nb_objs); 449 450 return &node->objs[idx]; 451 } 452 453 /** 454 * Put the next stream to pending state in the circular buffer 455 * for further processing. Should be invoked after rte_node_next_stream_get(). 456 * 457 * @param graph 458 * Graph pointer returned from rte_graph_lookup(). 459 * @param node 460 * Current node pointer. 461 * @param next 462 * Relative next node index.. 463 * @param idx 464 * Number of objs updated in the stream after getting the stream using 465 * rte_node_next_stream_get. 466 * 467 * @see rte_node_next_stream_get(). 468 */ 469 static inline void 470 rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node, 471 rte_edge_t next, uint16_t idx) 472 { 473 if (unlikely(!idx)) 474 return; 475 476 node = __rte_node_next_node_get(node, next); 477 if (node->idx == 0) 478 __rte_node_enqueue_tail_update(graph, node); 479 480 node->idx += idx; 481 } 482 483 /** 484 * Home run scenario, Enqueue all the objs of current node to next 485 * node in optimized way by swapping the streams of both nodes. 486 * Performs good when next node is already not in pending state. 487 * If next node is already in pending state then normal enqueue 488 * will be used. 489 * 490 * @param graph 491 * Graph pointer returned from rte_graph_lookup(). 492 * @param src 493 * Current node pointer. 494 * @param next 495 * Relative next node index. 496 */ 497 static inline void 498 rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src, 499 rte_edge_t next) 500 { 501 struct rte_node *dst = __rte_node_next_node_get(src, next); 502 503 /* Let swap the pointers if dst don't have valid objs */ 504 if (likely(dst->idx == 0)) { 505 void **dobjs = dst->objs; 506 uint16_t dsz = dst->size; 507 dst->objs = src->objs; 508 dst->size = src->size; 509 src->objs = dobjs; 510 src->size = dsz; 511 dst->idx = src->idx; 512 __rte_node_enqueue_tail_update(graph, dst); 513 } else { /* Move the objects from src node to dst node */ 514 rte_node_enqueue(graph, src, next, src->objs, src->idx); 515 } 516 } 517 518 /** 519 * Test the validity of model. 520 * 521 * @param model 522 * Model to check. 523 * 524 * @return 525 * True if graph model is valid, false otherwise. 526 */ 527 bool 528 rte_graph_model_is_valid(uint8_t model); 529 530 /** 531 * @note This function does not perform any locking, and is only safe to call 532 * before graph running. It will set all graphs the same model. 533 * 534 * @param model 535 * Name of the graph worker model. 536 * 537 * @return 538 * 0 on success, -1 otherwise. 539 */ 540 int rte_graph_worker_model_set(uint8_t model); 541 542 /** 543 * Get the graph worker model 544 * 545 * @note All graph will use the same model and this function will get model from the first one. 546 * Used for slow path. 547 * 548 * @param graph 549 * Graph pointer. 550 * 551 * @return 552 * Graph worker model on success. 553 */ 554 uint8_t rte_graph_worker_model_get(struct rte_graph *graph); 555 556 /** 557 * Get the graph worker model without check 558 * 559 * @note All graph will use the same model and this function will get model from the first one. 560 * Used for fast path. 561 * 562 * @param graph 563 * Graph pointer. 564 * 565 * @return 566 * Graph worker model on success. 567 */ 568 static __rte_always_inline 569 uint8_t rte_graph_worker_model_no_check_get(struct rte_graph *graph) 570 { 571 return graph->model; 572 } 573 574 #ifdef __cplusplus 575 } 576 #endif 577 578 #endif /* _RTE_GRAPH_WORKER_COIMMON_H_ */ 579