1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef __INCLUDE_RTE_SCHED_H__ 6 #define __INCLUDE_RTE_SCHED_H__ 7 8 #ifdef __cplusplus 9 extern "C" { 10 #endif 11 12 /** 13 * @file 14 * RTE Hierarchical Scheduler 15 * 16 * The hierarchical scheduler prioritizes the transmission of packets 17 * from different users and traffic classes according to the Service 18 * Level Agreements (SLAs) defined for the current network node. 19 * 20 * The scheduler supports thousands of packet queues grouped under a 21 * 5-level hierarchy: 22 * 1. Port: 23 * - Typical usage: output Ethernet port; 24 * - Multiple ports are scheduled in round robin order with 25 * equal priority; 26 * 2. Subport: 27 * - Typical usage: group of users; 28 * - Traffic shaping using the token bucket algorithm 29 * (one bucket per subport); 30 * - Upper limit enforced per traffic class at subport level; 31 * - Lower priority traffic classes able to reuse subport 32 * bandwidth currently unused by higher priority traffic 33 * classes of the same subport; 34 * - When any subport traffic class is oversubscribed 35 * (configuration time event), the usage of subport member 36 * pipes with high demand for that traffic class pipes is 37 * truncated to a dynamically adjusted value with no 38 * impact to low demand pipes; 39 * 3. Pipe: 40 * - Typical usage: individual user/subscriber; 41 * - Traffic shaping using the token bucket algorithm 42 * (one bucket per pipe); 43 * 4. Traffic class: 44 * - Traffic classes of the same pipe handled in strict 45 * priority order; 46 * - Upper limit enforced per traffic class at the pipe level; 47 * - Lower priority traffic classes able to reuse pipe 48 * bandwidth currently unused by higher priority traffic 49 * classes of the same pipe; 50 * 5. Queue: 51 * - Typical usage: queue hosting packets from one or 52 * multiple connections of same traffic class belonging to 53 * the same user; 54 * - Weighted Round Robin (WRR) is used to service the 55 * queues within same pipe lowest priority traffic class (best-effort). 56 * 57 */ 58 59 #include <rte_compat.h> 60 #include <rte_mbuf.h> 61 #include <rte_meter.h> 62 63 /** Congestion Management */ 64 #include "rte_red.h" 65 #include "rte_pie.h" 66 67 /** Maximum number of queues per pipe. 68 * Note that the multiple queues (power of 2) can only be assigned to 69 * lowest priority (best-effort) traffic class. Other higher priority traffic 70 * classes can only have one queue. 71 * Can not change. 72 * 73 * @see struct rte_sched_port_params 74 */ 75 #define RTE_SCHED_QUEUES_PER_PIPE 16 76 77 /** Number of WRR queues for best-effort traffic class per pipe. 78 * 79 * @see struct rte_sched_pipe_params 80 */ 81 #define RTE_SCHED_BE_QUEUES_PER_PIPE 4 82 83 /** Number of traffic classes per pipe (as well as subport). 84 * @see struct rte_sched_subport_params 85 * @see struct rte_sched_pipe_params 86 */ 87 #define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE \ 88 (RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1) 89 90 /** Best-effort traffic class ID 91 * Can not change. 92 */ 93 #define RTE_SCHED_TRAFFIC_CLASS_BE (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1) 94 95 /* 96 * Ethernet framing overhead. Overhead fields per Ethernet frame: 97 * 1. Preamble: 7 bytes; 98 * 2. Start of Frame Delimiter (SFD): 1 byte; 99 * 3. Frame Check Sequence (FCS): 4 bytes; 100 * 4. Inter Frame Gap (IFG): 12 bytes. 101 * 102 * The FCS is considered overhead only if not included in the packet 103 * length (field pkt_len of struct rte_mbuf). 104 * 105 * @see struct rte_sched_port_params 106 */ 107 #ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT 108 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24 109 #endif 110 111 /** 112 * Congestion Management (CMAN) mode 113 * 114 * This is used for controlling the admission of packets into a packet queue or 115 * group of packet queues on congestion. 116 * 117 * The *Random Early Detection (RED)* algorithm works by proactively dropping 118 * more and more input packets as the queue occupancy builds up. When the queue 119 * is full or almost full, RED effectively works as *tail drop*. The *Weighted 120 * RED* algorithm uses a separate set of RED thresholds for each packet color. 121 * 122 * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly 123 * drops a packet at the onset of the congestion and tries to control the 124 * latency around the target value. The congestion detection, however, is based 125 * on the queueing latency instead of the queue length like RED. For more 126 * information, refer RFC8033. 127 */ 128 enum rte_sched_cman_mode { 129 RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */ 130 RTE_SCHED_CMAN_PIE, /**< Proportional Integral Controller Enhanced (PIE) */ 131 }; 132 133 /* 134 * Pipe configuration parameters. The period and credits_per_period 135 * parameters are measured in bytes, with one byte meaning the time 136 * duration associated with the transmission of one byte on the 137 * physical medium of the output port, with pipe or pipe traffic class 138 * rate (measured as percentage of output port rate) determined as 139 * credits_per_period divided by period. One credit represents one 140 * byte. 141 */ 142 struct rte_sched_pipe_params { 143 /** Token bucket rate (measured in bytes per second) */ 144 uint64_t tb_rate; 145 146 /** Token bucket size (measured in credits) */ 147 uint64_t tb_size; 148 149 /** Traffic class rates (measured in bytes per second) */ 150 uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 151 152 /** Enforcement period (measured in milliseconds) */ 153 uint64_t tc_period; 154 155 /** Best-effort traffic class oversubscription weight */ 156 uint8_t tc_ov_weight; 157 158 /** WRR weights of best-effort traffic class queues */ 159 uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; 160 }; 161 162 /* 163 * Congestion Management configuration parameters. 164 */ 165 struct rte_sched_cman_params { 166 /** Congestion Management mode */ 167 enum rte_sched_cman_mode cman_mode; 168 169 union { 170 /** RED parameters */ 171 struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; 172 173 /** PIE parameters */ 174 struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 175 }; 176 }; 177 178 /* 179 * Subport configuration parameters. The period and credits_per_period 180 * parameters are measured in bytes, with one byte meaning the time 181 * duration associated with the transmission of one byte on the 182 * physical medium of the output port, with pipe or pipe traffic class 183 * rate (measured as percentage of output port rate) determined as 184 * credits_per_period divided by period. One credit represents one 185 * byte. 186 */ 187 struct rte_sched_subport_params { 188 /** Number of subport pipes. 189 * The subport can enable/allocate fewer pipes than the maximum 190 * number set through struct port_params::n_max_pipes_per_subport, 191 * as needed, to avoid memory allocation for the queues of the 192 * pipes that are not really needed. 193 */ 194 uint32_t n_pipes_per_subport_enabled; 195 196 /** Packet queue size for each traffic class. 197 * All the pipes within the same subport share the similar 198 * configuration for the queues. 199 */ 200 uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 201 202 /** Pipe profile table. 203 * Every pipe is configured using one of the profiles from this table. 204 */ 205 struct rte_sched_pipe_params *pipe_profiles; 206 207 /** Profiles in the pipe profile table */ 208 uint32_t n_pipe_profiles; 209 210 /** Max allowed profiles in the pipe profile table */ 211 uint32_t n_max_pipe_profiles; 212 213 /** Congestion Management parameters 214 * If NULL the congestion management is disabled for the subport, 215 * otherwise proper parameters need to be provided. 216 */ 217 struct rte_sched_cman_params *cman_params; 218 }; 219 220 struct rte_sched_subport_profile_params { 221 /** Token bucket rate (measured in bytes per second) */ 222 uint64_t tb_rate; 223 224 /** Token bucket size (measured in credits) */ 225 uint64_t tb_size; 226 227 /** Traffic class rates (measured in bytes per second) */ 228 uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 229 230 /** Enforcement period for rates (measured in milliseconds) */ 231 uint64_t tc_period; 232 }; 233 234 /** Subport statistics */ 235 struct rte_sched_subport_stats { 236 /** Number of packets successfully written */ 237 uint64_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 238 239 /** Number of packets dropped */ 240 uint64_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 241 242 /** Number of bytes successfully written for each traffic class */ 243 uint64_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 244 245 /** Number of bytes dropped for each traffic class */ 246 uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 247 248 /** Number of packets dropped by congestion management scheme */ 249 uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 250 }; 251 252 /** Queue statistics */ 253 struct rte_sched_queue_stats { 254 /** Packets successfully written */ 255 uint64_t n_pkts; 256 257 /** Packets dropped */ 258 uint64_t n_pkts_dropped; 259 260 /** Packets dropped by congestion management scheme */ 261 uint64_t n_pkts_cman_dropped; 262 263 /** Bytes successfully written */ 264 uint64_t n_bytes; 265 266 /** Bytes dropped */ 267 uint64_t n_bytes_dropped; 268 }; 269 270 /** Port configuration parameters. */ 271 struct rte_sched_port_params { 272 /** Name of the port to be associated */ 273 const char *name; 274 275 /** CPU socket ID */ 276 int socket; 277 278 /** Output port rate (measured in bytes per second) */ 279 uint64_t rate; 280 281 /** Maximum Ethernet frame size (measured in bytes). 282 * Should not include the framing overhead. 283 */ 284 uint32_t mtu; 285 286 /** Framing overhead per packet (measured in bytes) */ 287 uint32_t frame_overhead; 288 289 /** Number of subports */ 290 uint32_t n_subports_per_port; 291 292 /** subport profile table. 293 * Every pipe is configured using one of the profiles from this table. 294 */ 295 struct rte_sched_subport_profile_params *subport_profiles; 296 297 /** Profiles in the pipe profile table */ 298 uint32_t n_subport_profiles; 299 300 /** Max allowed profiles in the pipe profile table */ 301 uint32_t n_max_subport_profiles; 302 303 /** Maximum number of subport pipes. 304 * This parameter is used to reserve a fixed number of bits 305 * in struct rte_mbuf::sched.queue_id for the pipe_id for all 306 * the subports of the same port. 307 */ 308 uint32_t n_pipes_per_subport; 309 }; 310 311 /* 312 * Configuration 313 * 314 ***/ 315 316 /** 317 * Hierarchical scheduler port configuration 318 * 319 * @param params 320 * Port scheduler configuration parameter structure 321 * @return 322 * Handle to port scheduler instance upon success or NULL otherwise. 323 */ 324 struct rte_sched_port * 325 rte_sched_port_config(struct rte_sched_port_params *params); 326 327 /** 328 * Hierarchical scheduler port free 329 * 330 * @param port 331 * Handle to port scheduler instance 332 */ 333 void 334 rte_sched_port_free(struct rte_sched_port *port); 335 336 /** 337 * Hierarchical scheduler pipe profile add 338 * 339 * @param port 340 * Handle to port scheduler instance 341 * @param subport_id 342 * Subport ID 343 * @param params 344 * Pipe profile parameters 345 * @param pipe_profile_id 346 * Set to valid profile id when profile is added successfully. 347 * @return 348 * 0 upon success, error code otherwise 349 */ 350 int 351 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port, 352 uint32_t subport_id, 353 struct rte_sched_pipe_params *params, 354 uint32_t *pipe_profile_id); 355 356 /** 357 * @warning 358 * @b EXPERIMENTAL: this API may change without prior notice. 359 * 360 * Hierarchical scheduler subport bandwidth profile add 361 * Note that this function is safe to use in runtime for adding new 362 * subport bandwidth profile as it doesn't have any impact on hierarchical 363 * structure of the scheduler. 364 * @param port 365 * Handle to port scheduler instance 366 * @param profile 367 * Subport bandwidth profile 368 * @param subport_profile_id 369 * Subport profile id 370 * @return 371 * 0 upon success, error code otherwise 372 */ 373 __rte_experimental 374 int 375 rte_sched_port_subport_profile_add(struct rte_sched_port *port, 376 struct rte_sched_subport_profile_params *profile, 377 uint32_t *subport_profile_id); 378 379 /** 380 * Hierarchical scheduler subport configuration 381 * Note that this function is safe to use at runtime 382 * to configure subport bandwidth profile. 383 * @param port 384 * Handle to port scheduler instance 385 * @param subport_id 386 * Subport ID 387 * @param params 388 * Subport configuration parameters. Must be non-NULL 389 * for first invocation (i.e initialization) for a given 390 * subport. Ignored (recommended value is NULL) for all 391 * subsequent invocation on the same subport. 392 * @param subport_profile_id 393 * ID of subport bandwidth profile 394 * @return 395 * 0 upon success, error code otherwise 396 */ 397 int 398 rte_sched_subport_config(struct rte_sched_port *port, 399 uint32_t subport_id, 400 struct rte_sched_subport_params *params, 401 uint32_t subport_profile_id); 402 403 /** 404 * Hierarchical scheduler pipe configuration 405 * 406 * @param port 407 * Handle to port scheduler instance 408 * @param subport_id 409 * Subport ID 410 * @param pipe_id 411 * Pipe ID within subport 412 * @param pipe_profile 413 * ID of subport-level pre-configured pipe profile 414 * @return 415 * 0 upon success, error code otherwise 416 */ 417 int 418 rte_sched_pipe_config(struct rte_sched_port *port, 419 uint32_t subport_id, 420 uint32_t pipe_id, 421 int32_t pipe_profile); 422 423 /** 424 * Hierarchical scheduler memory footprint size per port 425 * 426 * @param port_params 427 * Port scheduler configuration parameter structure 428 * @param subport_params 429 * Array of subport parameter structures 430 * @return 431 * Memory footprint size in bytes upon success, 0 otherwise 432 */ 433 uint32_t 434 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params, 435 struct rte_sched_subport_params **subport_params); 436 /* 437 * Statistics 438 * 439 ***/ 440 441 /** 442 * Hierarchical scheduler subport statistics read 443 * 444 * @param port 445 * Handle to port scheduler instance 446 * @param subport_id 447 * Subport ID 448 * @param stats 449 * Pointer to pre-allocated subport statistics structure where the statistics 450 * counters should be stored 451 * @param tc_ov 452 * Pointer to pre-allocated RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE-entry array 453 * where the oversubscription status for each of the subport traffic classes 454 * should be stored. 455 * @return 456 * 0 upon success, error code otherwise 457 */ 458 int 459 rte_sched_subport_read_stats(struct rte_sched_port *port, 460 uint32_t subport_id, 461 struct rte_sched_subport_stats *stats, 462 uint32_t *tc_ov); 463 464 /** 465 * Hierarchical scheduler queue statistics read 466 * 467 * @param port 468 * Handle to port scheduler instance 469 * @param queue_id 470 * Queue ID within port scheduler 471 * @param stats 472 * Pointer to pre-allocated subport statistics structure where the statistics 473 * counters should be stored 474 * @param qlen 475 * Pointer to pre-allocated variable where the current queue length 476 * should be stored. 477 * @return 478 * 0 upon success, error code otherwise 479 */ 480 int 481 rte_sched_queue_read_stats(struct rte_sched_port *port, 482 uint32_t queue_id, 483 struct rte_sched_queue_stats *stats, 484 uint16_t *qlen); 485 486 /** 487 * Scheduler hierarchy path write to packet descriptor. Typically 488 * called by the packet classification stage. 489 * 490 * @param port 491 * Handle to port scheduler instance 492 * @param pkt 493 * Packet descriptor handle 494 * @param subport 495 * Subport ID 496 * @param pipe 497 * Pipe ID within subport 498 * @param traffic_class 499 * Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE) 500 * @param queue 501 * Queue ID within pipe traffic class, 0 for high priority TCs, and 502 * 0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC 503 * @param color 504 * Packet color set 505 */ 506 void 507 rte_sched_port_pkt_write(struct rte_sched_port *port, 508 struct rte_mbuf *pkt, 509 uint32_t subport, uint32_t pipe, uint32_t traffic_class, 510 uint32_t queue, enum rte_color color); 511 512 /** 513 * Scheduler hierarchy path read from packet descriptor (struct 514 * rte_mbuf). Typically called as part of the hierarchical scheduler 515 * enqueue operation. The subport, pipe, traffic class and queue 516 * parameters need to be pre-allocated by the caller. 517 * 518 * @param port 519 * Handle to port scheduler instance 520 * @param pkt 521 * Packet descriptor handle 522 * @param subport 523 * Subport ID 524 * @param pipe 525 * Pipe ID within subport 526 * @param traffic_class 527 * Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE) 528 * @param queue 529 * Queue ID within pipe traffic class, 0 for high priority TCs, and 530 * 0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC 531 */ 532 void 533 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port, 534 const struct rte_mbuf *pkt, 535 uint32_t *subport, uint32_t *pipe, 536 uint32_t *traffic_class, uint32_t *queue); 537 538 enum rte_color 539 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt); 540 541 /** 542 * Hierarchical scheduler port enqueue. Writes up to n_pkts to port 543 * scheduler and returns the number of packets actually written. For 544 * each packet, the port scheduler queue to write the packet to is 545 * identified by reading the hierarchy path from the packet 546 * descriptor; if the queue is full or congested and the packet is not 547 * written to the queue, then the packet is automatically dropped 548 * without any action required from the caller. 549 * 550 * @param port 551 * Handle to port scheduler instance 552 * @param pkts 553 * Array storing the packet descriptor handles 554 * @param n_pkts 555 * Number of packets to enqueue from the pkts array into the port scheduler 556 * @return 557 * Number of packets successfully enqueued 558 */ 559 int 560 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); 561 562 /** 563 * Hierarchical scheduler port dequeue. Reads up to n_pkts from the 564 * port scheduler and stores them in the pkts array and returns the 565 * number of packets actually read. The pkts array needs to be 566 * pre-allocated by the caller with at least n_pkts entries. 567 * 568 * @param port 569 * Handle to port scheduler instance 570 * @param pkts 571 * Pre-allocated packet descriptor array where the packets dequeued 572 * from the port 573 * scheduler should be stored 574 * @param n_pkts 575 * Number of packets to dequeue from the port scheduler 576 * @return 577 * Number of packets successfully dequeued and placed in the pkts array 578 */ 579 int 580 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); 581 582 #ifdef __cplusplus 583 } 584 #endif 585 586 #endif /* __INCLUDE_RTE_SCHED_H__ */ 587