1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef __INCLUDE_RTE_SCHED_H__ 6 #define __INCLUDE_RTE_SCHED_H__ 7 8 /** 9 * @file 10 * RTE Hierarchical Scheduler 11 * 12 * The hierarchical scheduler prioritizes the transmission of packets 13 * from different users and traffic classes according to the Service 14 * Level Agreements (SLAs) defined for the current network node. 15 * 16 * The scheduler supports thousands of packet queues grouped under a 17 * 5-level hierarchy: 18 * 1. Port: 19 * - Typical usage: output Ethernet port; 20 * - Multiple ports are scheduled in round robin order with 21 * equal priority; 22 * 2. Subport: 23 * - Typical usage: group of users; 24 * - Traffic shaping using the token bucket algorithm 25 * (one bucket per subport); 26 * - Upper limit enforced per traffic class at subport level; 27 * - Lower priority traffic classes able to reuse subport 28 * bandwidth currently unused by higher priority traffic 29 * classes of the same subport; 30 * - When any subport traffic class is oversubscribed 31 * (configuration time event), the usage of subport member 32 * pipes with high demand for that traffic class pipes is 33 * truncated to a dynamically adjusted value with no 34 * impact to low demand pipes; 35 * 3. Pipe: 36 * - Typical usage: individual user/subscriber; 37 * - Traffic shaping using the token bucket algorithm 38 * (one bucket per pipe); 39 * 4. Traffic class: 40 * - Traffic classes of the same pipe handled in strict 41 * priority order; 42 * - Upper limit enforced per traffic class at the pipe level; 43 * - Lower priority traffic classes able to reuse pipe 44 * bandwidth currently unused by higher priority traffic 45 * classes of the same pipe; 46 * 5. Queue: 47 * - Typical usage: queue hosting packets from one or 48 * multiple connections of same traffic class belonging to 49 * the same user; 50 * - Weighted Round Robin (WRR) is used to service the 51 * queues within same pipe lowest priority traffic class (best-effort). 52 */ 53 54 #include <rte_mbuf.h> 55 #include <rte_meter.h> 56 57 /** Congestion Management */ 58 #include "rte_red.h" 59 #include "rte_pie.h" 60 61 #ifdef __cplusplus 62 extern "C" { 63 #endif 64 65 /** Maximum number of queues per pipe. 66 * Note that the multiple queues (power of 2) can only be assigned to 67 * lowest priority (best-effort) traffic class. Other higher priority traffic 68 * classes can only have one queue. 69 * Can not change. 70 * 71 * @see struct rte_sched_port_params 72 */ 73 #define RTE_SCHED_QUEUES_PER_PIPE 16 74 75 /** Number of WRR queues for best-effort traffic class per pipe. 76 * 77 * @see struct rte_sched_pipe_params 78 */ 79 #define RTE_SCHED_BE_QUEUES_PER_PIPE 4 80 81 /** Number of traffic classes per pipe (as well as subport). 82 * @see struct rte_sched_subport_params 83 * @see struct rte_sched_pipe_params 84 */ 85 #define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE \ 86 (RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1) 87 88 /** Best-effort traffic class ID 89 * Can not change. 90 */ 91 #define RTE_SCHED_TRAFFIC_CLASS_BE (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1) 92 93 /* 94 * Ethernet framing overhead. Overhead fields per Ethernet frame: 95 * 1. Preamble: 7 bytes; 96 * 2. Start of Frame Delimiter (SFD): 1 byte; 97 * 3. Frame Check Sequence (FCS): 4 bytes; 98 * 4. Inter Frame Gap (IFG): 12 bytes. 99 * 100 * The FCS is considered overhead only if not included in the packet 101 * length (field pkt_len of struct rte_mbuf). 102 * 103 * @see struct rte_sched_port_params 104 */ 105 #ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT 106 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24 107 #endif 108 109 /** 110 * Congestion Management (CMAN) mode 111 * 112 * This is used for controlling the admission of packets into a packet queue or 113 * group of packet queues on congestion. 114 * 115 * The *Random Early Detection (RED)* algorithm works by proactively dropping 116 * more and more input packets as the queue occupancy builds up. When the queue 117 * is full or almost full, RED effectively works as *tail drop*. The *Weighted 118 * RED* algorithm uses a separate set of RED thresholds for each packet color. 119 * 120 * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly 121 * drops a packet at the onset of the congestion and tries to control the 122 * latency around the target value. The congestion detection, however, is based 123 * on the queueing latency instead of the queue length like RED. For more 124 * information, refer RFC8033. 125 */ 126 enum rte_sched_cman_mode { 127 RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */ 128 RTE_SCHED_CMAN_PIE, /**< Proportional Integral Controller Enhanced (PIE) */ 129 }; 130 131 /* 132 * Pipe configuration parameters. The period and credits_per_period 133 * parameters are measured in bytes, with one byte meaning the time 134 * duration associated with the transmission of one byte on the 135 * physical medium of the output port, with pipe or pipe traffic class 136 * rate (measured as percentage of output port rate) determined as 137 * credits_per_period divided by period. One credit represents one 138 * byte. 139 */ 140 struct rte_sched_pipe_params { 141 /** Token bucket rate (measured in bytes per second) */ 142 uint64_t tb_rate; 143 144 /** Token bucket size (measured in credits) */ 145 uint64_t tb_size; 146 147 /** Traffic class rates (measured in bytes per second) */ 148 uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 149 150 /** Enforcement period (measured in milliseconds) */ 151 uint64_t tc_period; 152 153 /** Best-effort traffic class oversubscription weight */ 154 uint8_t tc_ov_weight; 155 156 /** WRR weights of best-effort traffic class queues */ 157 uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; 158 }; 159 160 /* 161 * Congestion Management configuration parameters. 162 */ 163 struct rte_sched_cman_params { 164 /** Congestion Management mode */ 165 enum rte_sched_cman_mode cman_mode; 166 167 union { 168 /** RED parameters */ 169 struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; 170 171 /** PIE parameters */ 172 struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 173 }; 174 }; 175 176 /* 177 * Subport configuration parameters. The period and credits_per_period 178 * parameters are measured in bytes, with one byte meaning the time 179 * duration associated with the transmission of one byte on the 180 * physical medium of the output port, with pipe or pipe traffic class 181 * rate (measured as percentage of output port rate) determined as 182 * credits_per_period divided by period. One credit represents one 183 * byte. 184 */ 185 struct rte_sched_subport_params { 186 /** Number of subport pipes. 187 * The subport can enable/allocate fewer pipes than the maximum 188 * number set through struct port_params::n_max_pipes_per_subport, 189 * as needed, to avoid memory allocation for the queues of the 190 * pipes that are not really needed. 191 */ 192 uint32_t n_pipes_per_subport_enabled; 193 194 /** Packet queue size for each traffic class. 195 * All the pipes within the same subport share the similar 196 * configuration for the queues. 197 */ 198 uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 199 200 /** Pipe profile table. 201 * Every pipe is configured using one of the profiles from this table. 202 */ 203 struct rte_sched_pipe_params *pipe_profiles; 204 205 /** Profiles in the pipe profile table */ 206 uint32_t n_pipe_profiles; 207 208 /** Max allowed profiles in the pipe profile table */ 209 uint32_t n_max_pipe_profiles; 210 211 /** Congestion Management parameters 212 * If NULL the congestion management is disabled for the subport, 213 * otherwise proper parameters need to be provided. 214 */ 215 struct rte_sched_cman_params *cman_params; 216 }; 217 218 struct rte_sched_subport_profile_params { 219 /** Token bucket rate (measured in bytes per second) */ 220 uint64_t tb_rate; 221 222 /** Token bucket size (measured in credits) */ 223 uint64_t tb_size; 224 225 /** Traffic class rates (measured in bytes per second) */ 226 uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 227 228 /** Enforcement period for rates (measured in milliseconds) */ 229 uint64_t tc_period; 230 }; 231 232 /** Subport statistics */ 233 struct rte_sched_subport_stats { 234 /** Number of packets successfully written */ 235 uint64_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 236 237 /** Number of packets dropped */ 238 uint64_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 239 240 /** Number of bytes successfully written for each traffic class */ 241 uint64_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 242 243 /** Number of bytes dropped for each traffic class */ 244 uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 245 246 /** Number of packets dropped by congestion management scheme */ 247 uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 248 }; 249 250 /** Queue statistics */ 251 struct rte_sched_queue_stats { 252 /** Packets successfully written */ 253 uint64_t n_pkts; 254 255 /** Packets dropped */ 256 uint64_t n_pkts_dropped; 257 258 /** Packets dropped by congestion management scheme */ 259 uint64_t n_pkts_cman_dropped; 260 261 /** Bytes successfully written */ 262 uint64_t n_bytes; 263 264 /** Bytes dropped */ 265 uint64_t n_bytes_dropped; 266 }; 267 268 /** Port configuration parameters. */ 269 struct rte_sched_port_params { 270 /** Name of the port to be associated */ 271 const char *name; 272 273 /** CPU socket ID */ 274 int socket; 275 276 /** Output port rate (measured in bytes per second) */ 277 uint64_t rate; 278 279 /** Maximum Ethernet frame size (measured in bytes). 280 * Should not include the framing overhead. 281 */ 282 uint32_t mtu; 283 284 /** Framing overhead per packet (measured in bytes) */ 285 uint32_t frame_overhead; 286 287 /** Number of subports */ 288 uint32_t n_subports_per_port; 289 290 /** subport profile table. 291 * Every pipe is configured using one of the profiles from this table. 292 */ 293 struct rte_sched_subport_profile_params *subport_profiles; 294 295 /** Profiles in the pipe profile table */ 296 uint32_t n_subport_profiles; 297 298 /** Max allowed profiles in the pipe profile table */ 299 uint32_t n_max_subport_profiles; 300 301 /** Maximum number of subport pipes. 302 * This parameter is used to reserve a fixed number of bits 303 * in struct rte_mbuf::sched.queue_id for the pipe_id for all 304 * the subports of the same port. 305 */ 306 uint32_t n_pipes_per_subport; 307 }; 308 309 /* 310 * Configuration 311 */ 312 313 /** 314 * Hierarchical scheduler port configuration 315 * 316 * @param params 317 * Port scheduler configuration parameter structure 318 * @return 319 * Handle to port scheduler instance upon success or NULL otherwise. 320 */ 321 struct rte_sched_port * 322 rte_sched_port_config(struct rte_sched_port_params *params); 323 324 /** 325 * Hierarchical scheduler port free 326 * 327 * @param port 328 * Handle to port scheduler instance. 329 * If port is NULL, no operation is performed. 330 */ 331 void 332 rte_sched_port_free(struct rte_sched_port *port); 333 334 /** 335 * Hierarchical scheduler pipe profile add 336 * 337 * @param port 338 * Handle to port scheduler instance 339 * @param subport_id 340 * Subport ID 341 * @param params 342 * Pipe profile parameters 343 * @param pipe_profile_id 344 * Set to valid profile id when profile is added successfully. 345 * @return 346 * 0 upon success, error code otherwise 347 */ 348 int 349 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port, 350 uint32_t subport_id, 351 struct rte_sched_pipe_params *params, 352 uint32_t *pipe_profile_id); 353 354 /** 355 * Hierarchical scheduler subport bandwidth profile add 356 * Note that this function is safe to use in runtime for adding new 357 * subport bandwidth profile as it doesn't have any impact on hierarchical 358 * structure of the scheduler. 359 * @param port 360 * Handle to port scheduler instance 361 * @param profile 362 * Subport bandwidth profile 363 * @param subport_profile_id 364 * Subport profile id 365 * @return 366 * 0 upon success, error code otherwise 367 */ 368 int 369 rte_sched_port_subport_profile_add(struct rte_sched_port *port, 370 struct rte_sched_subport_profile_params *profile, 371 uint32_t *subport_profile_id); 372 373 /** 374 * Hierarchical scheduler subport configuration 375 * Note that this function is safe to use at runtime 376 * to configure subport bandwidth profile. 377 * @param port 378 * Handle to port scheduler instance 379 * @param subport_id 380 * Subport ID 381 * @param params 382 * Subport configuration parameters. Must be non-NULL 383 * for first invocation (i.e initialization) for a given 384 * subport. Ignored (recommended value is NULL) for all 385 * subsequent invocation on the same subport. 386 * @param subport_profile_id 387 * ID of subport bandwidth profile 388 * @return 389 * 0 upon success, error code otherwise 390 */ 391 int 392 rte_sched_subport_config(struct rte_sched_port *port, 393 uint32_t subport_id, 394 struct rte_sched_subport_params *params, 395 uint32_t subport_profile_id); 396 397 /** 398 * Hierarchical scheduler pipe configuration 399 * 400 * @param port 401 * Handle to port scheduler instance 402 * @param subport_id 403 * Subport ID 404 * @param pipe_id 405 * Pipe ID within subport 406 * @param pipe_profile 407 * ID of subport-level pre-configured pipe profile 408 * @return 409 * 0 upon success, error code otherwise 410 */ 411 int 412 rte_sched_pipe_config(struct rte_sched_port *port, 413 uint32_t subport_id, 414 uint32_t pipe_id, 415 int32_t pipe_profile); 416 417 /** 418 * Hierarchical scheduler memory footprint size per port 419 * 420 * @param port_params 421 * Port scheduler configuration parameter structure 422 * @param subport_params 423 * Array of subport parameter structures 424 * @return 425 * Memory footprint size in bytes upon success, 0 otherwise 426 */ 427 uint32_t 428 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params, 429 struct rte_sched_subport_params **subport_params); 430 431 /* 432 * Statistics 433 */ 434 435 /** 436 * Hierarchical scheduler subport statistics read 437 * 438 * @param port 439 * Handle to port scheduler instance 440 * @param subport_id 441 * Subport ID 442 * @param stats 443 * Pointer to pre-allocated subport statistics structure where the statistics 444 * counters should be stored 445 * @param tc_ov 446 * Pointer to pre-allocated RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE-entry array 447 * where the oversubscription status for each of the subport traffic classes 448 * should be stored. 449 * @return 450 * 0 upon success, error code otherwise 451 */ 452 int 453 rte_sched_subport_read_stats(struct rte_sched_port *port, 454 uint32_t subport_id, 455 struct rte_sched_subport_stats *stats, 456 uint32_t *tc_ov); 457 458 /** 459 * Hierarchical scheduler queue statistics read 460 * 461 * @param port 462 * Handle to port scheduler instance 463 * @param queue_id 464 * Queue ID within port scheduler 465 * @param stats 466 * Pointer to pre-allocated subport statistics structure where the statistics 467 * counters should be stored 468 * @param qlen 469 * Pointer to pre-allocated variable where the current queue length 470 * should be stored. 471 * @return 472 * 0 upon success, error code otherwise 473 */ 474 int 475 rte_sched_queue_read_stats(struct rte_sched_port *port, 476 uint32_t queue_id, 477 struct rte_sched_queue_stats *stats, 478 uint16_t *qlen); 479 480 /** 481 * Scheduler hierarchy path write to packet descriptor. Typically 482 * called by the packet classification stage. 483 * 484 * @param port 485 * Handle to port scheduler instance 486 * @param pkt 487 * Packet descriptor handle 488 * @param subport 489 * Subport ID 490 * @param pipe 491 * Pipe ID within subport 492 * @param traffic_class 493 * Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE) 494 * @param queue 495 * Queue ID within pipe traffic class, 0 for high priority TCs, and 496 * 0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC 497 * @param color 498 * Packet color set 499 */ 500 void 501 rte_sched_port_pkt_write(struct rte_sched_port *port, 502 struct rte_mbuf *pkt, 503 uint32_t subport, uint32_t pipe, uint32_t traffic_class, 504 uint32_t queue, enum rte_color color); 505 506 /** 507 * Scheduler hierarchy path read from packet descriptor (struct 508 * rte_mbuf). Typically called as part of the hierarchical scheduler 509 * enqueue operation. The subport, pipe, traffic class and queue 510 * parameters need to be pre-allocated by the caller. 511 * 512 * @param port 513 * Handle to port scheduler instance 514 * @param pkt 515 * Packet descriptor handle 516 * @param subport 517 * Subport ID 518 * @param pipe 519 * Pipe ID within subport 520 * @param traffic_class 521 * Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE) 522 * @param queue 523 * Queue ID within pipe traffic class, 0 for high priority TCs, and 524 * 0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC 525 */ 526 void 527 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port, 528 const struct rte_mbuf *pkt, 529 uint32_t *subport, uint32_t *pipe, 530 uint32_t *traffic_class, uint32_t *queue); 531 532 enum rte_color 533 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt); 534 535 /** 536 * Hierarchical scheduler port enqueue. Writes up to n_pkts to port 537 * scheduler and returns the number of packets actually written. For 538 * each packet, the port scheduler queue to write the packet to is 539 * identified by reading the hierarchy path from the packet 540 * descriptor; if the queue is full or congested and the packet is not 541 * written to the queue, then the packet is automatically dropped 542 * without any action required from the caller. 543 * 544 * @param port 545 * Handle to port scheduler instance 546 * @param pkts 547 * Array storing the packet descriptor handles 548 * @param n_pkts 549 * Number of packets to enqueue from the pkts array into the port scheduler 550 * @return 551 * Number of packets successfully enqueued 552 */ 553 int 554 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); 555 556 /** 557 * Hierarchical scheduler port dequeue. Reads up to n_pkts from the 558 * port scheduler and stores them in the pkts array and returns the 559 * number of packets actually read. The pkts array needs to be 560 * pre-allocated by the caller with at least n_pkts entries. 561 * 562 * @param port 563 * Handle to port scheduler instance 564 * @param pkts 565 * Pre-allocated packet descriptor array where the packets dequeued 566 * from the port 567 * scheduler should be stored 568 * @param n_pkts 569 * Number of packets to dequeue from the port scheduler 570 * @return 571 * Number of packets successfully dequeued and placed in the pkts array 572 */ 573 int 574 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); 575 576 /** 577 * Hierarchical scheduler subport traffic class 578 * oversubscription enable/disable. 579 * This function should be called at the time of subport initialization. 580 * 581 * @param port 582 * Handle to port scheduler instance 583 * @param subport_id 584 * Subport ID 585 * @param tc_ov_enable 586 * Boolean flag to enable/disable TC OV 587 * @return 588 * 0 upon success, error code otherwise 589 */ 590 int 591 rte_sched_subport_tc_ov_config(struct rte_sched_port *port, uint32_t subport_id, bool tc_ov_enable); 592 593 #ifdef __cplusplus 594 } 595 #endif 596 597 #endif /* __INCLUDE_RTE_SCHED_H__ */ 598