1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef __INCLUDE_RTE_SCHED_H__ 6 #define __INCLUDE_RTE_SCHED_H__ 7 8 #ifdef __cplusplus 9 extern "C" { 10 #endif 11 12 /** 13 * @file 14 * RTE Hierarchical Scheduler 15 * 16 * The hierarchical scheduler prioritizes the transmission of packets 17 * from different users and traffic classes according to the Service 18 * Level Agreements (SLAs) defined for the current network node. 19 * 20 * The scheduler supports thousands of packet queues grouped under a 21 * 5-level hierarchy: 22 * 1. Port: 23 * - Typical usage: output Ethernet port; 24 * - Multiple ports are scheduled in round robin order with 25 * equal priority; 26 * 2. Subport: 27 * - Typical usage: group of users; 28 * - Traffic shaping using the token bucket algorithm 29 * (one bucket per subport); 30 * - Upper limit enforced per traffic class at subport level; 31 * - Lower priority traffic classes able to reuse subport 32 * bandwidth currently unused by higher priority traffic 33 * classes of the same subport; 34 * - When any subport traffic class is oversubscribed 35 * (configuration time event), the usage of subport member 36 * pipes with high demand for that traffic class pipes is 37 * truncated to a dynamically adjusted value with no 38 * impact to low demand pipes; 39 * 3. Pipe: 40 * - Typical usage: individual user/subscriber; 41 * - Traffic shaping using the token bucket algorithm 42 * (one bucket per pipe); 43 * 4. Traffic class: 44 * - Traffic classes of the same pipe handled in strict 45 * priority order; 46 * - Upper limit enforced per traffic class at the pipe level; 47 * - Lower priority traffic classes able to reuse pipe 48 * bandwidth currently unused by higher priority traffic 49 * classes of the same pipe; 50 * 5. Queue: 51 * - Typical usage: queue hosting packets from one or 52 * multiple connections of same traffic class belonging to 53 * the same user; 54 * - Weighted Round Robin (WRR) is used to service the 55 * queues within same pipe lowest priority traffic class (best-effort). 56 */ 57 58 #include <rte_compat.h> 59 #include <rte_mbuf.h> 60 #include <rte_meter.h> 61 62 /** Congestion Management */ 63 #include "rte_red.h" 64 #include "rte_pie.h" 65 66 /** Maximum number of queues per pipe. 67 * Note that the multiple queues (power of 2) can only be assigned to 68 * lowest priority (best-effort) traffic class. Other higher priority traffic 69 * classes can only have one queue. 70 * Can not change. 71 * 72 * @see struct rte_sched_port_params 73 */ 74 #define RTE_SCHED_QUEUES_PER_PIPE 16 75 76 /** Number of WRR queues for best-effort traffic class per pipe. 77 * 78 * @see struct rte_sched_pipe_params 79 */ 80 #define RTE_SCHED_BE_QUEUES_PER_PIPE 4 81 82 /** Number of traffic classes per pipe (as well as subport). 83 * @see struct rte_sched_subport_params 84 * @see struct rte_sched_pipe_params 85 */ 86 #define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE \ 87 (RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1) 88 89 /** Best-effort traffic class ID 90 * Can not change. 91 */ 92 #define RTE_SCHED_TRAFFIC_CLASS_BE (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1) 93 94 /* 95 * Ethernet framing overhead. Overhead fields per Ethernet frame: 96 * 1. Preamble: 7 bytes; 97 * 2. Start of Frame Delimiter (SFD): 1 byte; 98 * 3. Frame Check Sequence (FCS): 4 bytes; 99 * 4. Inter Frame Gap (IFG): 12 bytes. 100 * 101 * The FCS is considered overhead only if not included in the packet 102 * length (field pkt_len of struct rte_mbuf). 103 * 104 * @see struct rte_sched_port_params 105 */ 106 #ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT 107 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24 108 #endif 109 110 /** 111 * Congestion Management (CMAN) mode 112 * 113 * This is used for controlling the admission of packets into a packet queue or 114 * group of packet queues on congestion. 115 * 116 * The *Random Early Detection (RED)* algorithm works by proactively dropping 117 * more and more input packets as the queue occupancy builds up. When the queue 118 * is full or almost full, RED effectively works as *tail drop*. The *Weighted 119 * RED* algorithm uses a separate set of RED thresholds for each packet color. 120 * 121 * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly 122 * drops a packet at the onset of the congestion and tries to control the 123 * latency around the target value. The congestion detection, however, is based 124 * on the queueing latency instead of the queue length like RED. For more 125 * information, refer RFC8033. 126 */ 127 enum rte_sched_cman_mode { 128 RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */ 129 RTE_SCHED_CMAN_PIE, /**< Proportional Integral Controller Enhanced (PIE) */ 130 }; 131 132 /* 133 * Pipe configuration parameters. The period and credits_per_period 134 * parameters are measured in bytes, with one byte meaning the time 135 * duration associated with the transmission of one byte on the 136 * physical medium of the output port, with pipe or pipe traffic class 137 * rate (measured as percentage of output port rate) determined as 138 * credits_per_period divided by period. One credit represents one 139 * byte. 140 */ 141 struct rte_sched_pipe_params { 142 /** Token bucket rate (measured in bytes per second) */ 143 uint64_t tb_rate; 144 145 /** Token bucket size (measured in credits) */ 146 uint64_t tb_size; 147 148 /** Traffic class rates (measured in bytes per second) */ 149 uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 150 151 /** Enforcement period (measured in milliseconds) */ 152 uint64_t tc_period; 153 154 /** Best-effort traffic class oversubscription weight */ 155 uint8_t tc_ov_weight; 156 157 /** WRR weights of best-effort traffic class queues */ 158 uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; 159 }; 160 161 /* 162 * Congestion Management configuration parameters. 163 */ 164 struct rte_sched_cman_params { 165 /** Congestion Management mode */ 166 enum rte_sched_cman_mode cman_mode; 167 168 union { 169 /** RED parameters */ 170 struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; 171 172 /** PIE parameters */ 173 struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 174 }; 175 }; 176 177 /* 178 * Subport configuration parameters. The period and credits_per_period 179 * parameters are measured in bytes, with one byte meaning the time 180 * duration associated with the transmission of one byte on the 181 * physical medium of the output port, with pipe or pipe traffic class 182 * rate (measured as percentage of output port rate) determined as 183 * credits_per_period divided by period. One credit represents one 184 * byte. 185 */ 186 struct rte_sched_subport_params { 187 /** Number of subport pipes. 188 * The subport can enable/allocate fewer pipes than the maximum 189 * number set through struct port_params::n_max_pipes_per_subport, 190 * as needed, to avoid memory allocation for the queues of the 191 * pipes that are not really needed. 192 */ 193 uint32_t n_pipes_per_subport_enabled; 194 195 /** Packet queue size for each traffic class. 196 * All the pipes within the same subport share the similar 197 * configuration for the queues. 198 */ 199 uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 200 201 /** Pipe profile table. 202 * Every pipe is configured using one of the profiles from this table. 203 */ 204 struct rte_sched_pipe_params *pipe_profiles; 205 206 /** Profiles in the pipe profile table */ 207 uint32_t n_pipe_profiles; 208 209 /** Max allowed profiles in the pipe profile table */ 210 uint32_t n_max_pipe_profiles; 211 212 /** Congestion Management parameters 213 * If NULL the congestion management is disabled for the subport, 214 * otherwise proper parameters need to be provided. 215 */ 216 struct rte_sched_cman_params *cman_params; 217 }; 218 219 struct rte_sched_subport_profile_params { 220 /** Token bucket rate (measured in bytes per second) */ 221 uint64_t tb_rate; 222 223 /** Token bucket size (measured in credits) */ 224 uint64_t tb_size; 225 226 /** Traffic class rates (measured in bytes per second) */ 227 uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 228 229 /** Enforcement period for rates (measured in milliseconds) */ 230 uint64_t tc_period; 231 }; 232 233 /** Subport statistics */ 234 struct rte_sched_subport_stats { 235 /** Number of packets successfully written */ 236 uint64_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 237 238 /** Number of packets dropped */ 239 uint64_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 240 241 /** Number of bytes successfully written for each traffic class */ 242 uint64_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 243 244 /** Number of bytes dropped for each traffic class */ 245 uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 246 247 /** Number of packets dropped by congestion management scheme */ 248 uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 249 }; 250 251 /** Queue statistics */ 252 struct rte_sched_queue_stats { 253 /** Packets successfully written */ 254 uint64_t n_pkts; 255 256 /** Packets dropped */ 257 uint64_t n_pkts_dropped; 258 259 /** Packets dropped by congestion management scheme */ 260 uint64_t n_pkts_cman_dropped; 261 262 /** Bytes successfully written */ 263 uint64_t n_bytes; 264 265 /** Bytes dropped */ 266 uint64_t n_bytes_dropped; 267 }; 268 269 /** Port configuration parameters. */ 270 struct rte_sched_port_params { 271 /** Name of the port to be associated */ 272 const char *name; 273 274 /** CPU socket ID */ 275 int socket; 276 277 /** Output port rate (measured in bytes per second) */ 278 uint64_t rate; 279 280 /** Maximum Ethernet frame size (measured in bytes). 281 * Should not include the framing overhead. 282 */ 283 uint32_t mtu; 284 285 /** Framing overhead per packet (measured in bytes) */ 286 uint32_t frame_overhead; 287 288 /** Number of subports */ 289 uint32_t n_subports_per_port; 290 291 /** subport profile table. 292 * Every pipe is configured using one of the profiles from this table. 293 */ 294 struct rte_sched_subport_profile_params *subport_profiles; 295 296 /** Profiles in the pipe profile table */ 297 uint32_t n_subport_profiles; 298 299 /** Max allowed profiles in the pipe profile table */ 300 uint32_t n_max_subport_profiles; 301 302 /** Maximum number of subport pipes. 303 * This parameter is used to reserve a fixed number of bits 304 * in struct rte_mbuf::sched.queue_id for the pipe_id for all 305 * the subports of the same port. 306 */ 307 uint32_t n_pipes_per_subport; 308 }; 309 310 /* 311 * Configuration 312 */ 313 314 /** 315 * Hierarchical scheduler port configuration 316 * 317 * @param params 318 * Port scheduler configuration parameter structure 319 * @return 320 * Handle to port scheduler instance upon success or NULL otherwise. 321 */ 322 struct rte_sched_port * 323 rte_sched_port_config(struct rte_sched_port_params *params); 324 325 /** 326 * Hierarchical scheduler port free 327 * 328 * @param port 329 * Handle to port scheduler instance. 330 * If port is NULL, no operation is performed. 331 */ 332 void 333 rte_sched_port_free(struct rte_sched_port *port); 334 335 /** 336 * Hierarchical scheduler pipe profile add 337 * 338 * @param port 339 * Handle to port scheduler instance 340 * @param subport_id 341 * Subport ID 342 * @param params 343 * Pipe profile parameters 344 * @param pipe_profile_id 345 * Set to valid profile id when profile is added successfully. 346 * @return 347 * 0 upon success, error code otherwise 348 */ 349 int 350 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port, 351 uint32_t subport_id, 352 struct rte_sched_pipe_params *params, 353 uint32_t *pipe_profile_id); 354 355 /** 356 * @warning 357 * @b EXPERIMENTAL: this API may change without prior notice. 358 * 359 * Hierarchical scheduler subport bandwidth profile add 360 * Note that this function is safe to use in runtime for adding new 361 * subport bandwidth profile as it doesn't have any impact on hierarchical 362 * structure of the scheduler. 363 * @param port 364 * Handle to port scheduler instance 365 * @param profile 366 * Subport bandwidth profile 367 * @param subport_profile_id 368 * Subport profile id 369 * @return 370 * 0 upon success, error code otherwise 371 */ 372 __rte_experimental 373 int 374 rte_sched_port_subport_profile_add(struct rte_sched_port *port, 375 struct rte_sched_subport_profile_params *profile, 376 uint32_t *subport_profile_id); 377 378 /** 379 * Hierarchical scheduler subport configuration 380 * Note that this function is safe to use at runtime 381 * to configure subport bandwidth profile. 382 * @param port 383 * Handle to port scheduler instance 384 * @param subport_id 385 * Subport ID 386 * @param params 387 * Subport configuration parameters. Must be non-NULL 388 * for first invocation (i.e initialization) for a given 389 * subport. Ignored (recommended value is NULL) for all 390 * subsequent invocation on the same subport. 391 * @param subport_profile_id 392 * ID of subport bandwidth profile 393 * @return 394 * 0 upon success, error code otherwise 395 */ 396 int 397 rte_sched_subport_config(struct rte_sched_port *port, 398 uint32_t subport_id, 399 struct rte_sched_subport_params *params, 400 uint32_t subport_profile_id); 401 402 /** 403 * Hierarchical scheduler pipe configuration 404 * 405 * @param port 406 * Handle to port scheduler instance 407 * @param subport_id 408 * Subport ID 409 * @param pipe_id 410 * Pipe ID within subport 411 * @param pipe_profile 412 * ID of subport-level pre-configured pipe profile 413 * @return 414 * 0 upon success, error code otherwise 415 */ 416 int 417 rte_sched_pipe_config(struct rte_sched_port *port, 418 uint32_t subport_id, 419 uint32_t pipe_id, 420 int32_t pipe_profile); 421 422 /** 423 * Hierarchical scheduler memory footprint size per port 424 * 425 * @param port_params 426 * Port scheduler configuration parameter structure 427 * @param subport_params 428 * Array of subport parameter structures 429 * @return 430 * Memory footprint size in bytes upon success, 0 otherwise 431 */ 432 uint32_t 433 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params, 434 struct rte_sched_subport_params **subport_params); 435 436 /* 437 * Statistics 438 */ 439 440 /** 441 * Hierarchical scheduler subport statistics read 442 * 443 * @param port 444 * Handle to port scheduler instance 445 * @param subport_id 446 * Subport ID 447 * @param stats 448 * Pointer to pre-allocated subport statistics structure where the statistics 449 * counters should be stored 450 * @param tc_ov 451 * Pointer to pre-allocated RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE-entry array 452 * where the oversubscription status for each of the subport traffic classes 453 * should be stored. 454 * @return 455 * 0 upon success, error code otherwise 456 */ 457 int 458 rte_sched_subport_read_stats(struct rte_sched_port *port, 459 uint32_t subport_id, 460 struct rte_sched_subport_stats *stats, 461 uint32_t *tc_ov); 462 463 /** 464 * Hierarchical scheduler queue statistics read 465 * 466 * @param port 467 * Handle to port scheduler instance 468 * @param queue_id 469 * Queue ID within port scheduler 470 * @param stats 471 * Pointer to pre-allocated subport statistics structure where the statistics 472 * counters should be stored 473 * @param qlen 474 * Pointer to pre-allocated variable where the current queue length 475 * should be stored. 476 * @return 477 * 0 upon success, error code otherwise 478 */ 479 int 480 rte_sched_queue_read_stats(struct rte_sched_port *port, 481 uint32_t queue_id, 482 struct rte_sched_queue_stats *stats, 483 uint16_t *qlen); 484 485 /** 486 * Scheduler hierarchy path write to packet descriptor. Typically 487 * called by the packet classification stage. 488 * 489 * @param port 490 * Handle to port scheduler instance 491 * @param pkt 492 * Packet descriptor handle 493 * @param subport 494 * Subport ID 495 * @param pipe 496 * Pipe ID within subport 497 * @param traffic_class 498 * Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE) 499 * @param queue 500 * Queue ID within pipe traffic class, 0 for high priority TCs, and 501 * 0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC 502 * @param color 503 * Packet color set 504 */ 505 void 506 rte_sched_port_pkt_write(struct rte_sched_port *port, 507 struct rte_mbuf *pkt, 508 uint32_t subport, uint32_t pipe, uint32_t traffic_class, 509 uint32_t queue, enum rte_color color); 510 511 /** 512 * Scheduler hierarchy path read from packet descriptor (struct 513 * rte_mbuf). Typically called as part of the hierarchical scheduler 514 * enqueue operation. The subport, pipe, traffic class and queue 515 * parameters need to be pre-allocated by the caller. 516 * 517 * @param port 518 * Handle to port scheduler instance 519 * @param pkt 520 * Packet descriptor handle 521 * @param subport 522 * Subport ID 523 * @param pipe 524 * Pipe ID within subport 525 * @param traffic_class 526 * Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE) 527 * @param queue 528 * Queue ID within pipe traffic class, 0 for high priority TCs, and 529 * 0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC 530 */ 531 void 532 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port, 533 const struct rte_mbuf *pkt, 534 uint32_t *subport, uint32_t *pipe, 535 uint32_t *traffic_class, uint32_t *queue); 536 537 enum rte_color 538 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt); 539 540 /** 541 * Hierarchical scheduler port enqueue. Writes up to n_pkts to port 542 * scheduler and returns the number of packets actually written. For 543 * each packet, the port scheduler queue to write the packet to is 544 * identified by reading the hierarchy path from the packet 545 * descriptor; if the queue is full or congested and the packet is not 546 * written to the queue, then the packet is automatically dropped 547 * without any action required from the caller. 548 * 549 * @param port 550 * Handle to port scheduler instance 551 * @param pkts 552 * Array storing the packet descriptor handles 553 * @param n_pkts 554 * Number of packets to enqueue from the pkts array into the port scheduler 555 * @return 556 * Number of packets successfully enqueued 557 */ 558 int 559 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); 560 561 /** 562 * Hierarchical scheduler port dequeue. Reads up to n_pkts from the 563 * port scheduler and stores them in the pkts array and returns the 564 * number of packets actually read. The pkts array needs to be 565 * pre-allocated by the caller with at least n_pkts entries. 566 * 567 * @param port 568 * Handle to port scheduler instance 569 * @param pkts 570 * Pre-allocated packet descriptor array where the packets dequeued 571 * from the port 572 * scheduler should be stored 573 * @param n_pkts 574 * Number of packets to dequeue from the port scheduler 575 * @return 576 * Number of packets successfully dequeued and placed in the pkts array 577 */ 578 int 579 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); 580 581 /** 582 * Hierarchical scheduler subport traffic class 583 * oversubscription enable/disable. 584 * This function should be called at the time of subport initialization. 585 * 586 * @param port 587 * Handle to port scheduler instance 588 * @param subport_id 589 * Subport ID 590 * @param tc_ov_enable 591 * Boolean flag to enable/disable TC OV 592 * @return 593 * 0 upon success, error code otherwise 594 */ 595 __rte_experimental 596 int 597 rte_sched_subport_tc_ov_config(struct rte_sched_port *port, uint32_t subport_id, bool tc_ov_enable); 598 599 #ifdef __cplusplus 600 } 601 #endif 602 603 #endif /* __INCLUDE_RTE_SCHED_H__ */ 604