1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #ifndef __INCLUDE_RTE_SCHED_H__ 6 #define __INCLUDE_RTE_SCHED_H__ 7 8 #ifdef __cplusplus 9 extern "C" { 10 #endif 11 12 /** 13 * @file 14 * RTE Hierarchical Scheduler 15 * 16 * The hierarchical scheduler prioritizes the transmission of packets 17 * from different users and traffic classes according to the Service 18 * Level Agreements (SLAs) defined for the current network node. 19 * 20 * The scheduler supports thousands of packet queues grouped under a 21 * 5-level hierarchy: 22 * 1. Port: 23 * - Typical usage: output Ethernet port; 24 * - Multiple ports are scheduled in round robin order with 25 * equal priority; 26 * 2. Subport: 27 * - Typical usage: group of users; 28 * - Traffic shaping using the token bucket algorithm 29 * (one bucket per subport); 30 * - Upper limit enforced per traffic class at subport level; 31 * - Lower priority traffic classes able to reuse subport 32 * bandwidth currently unused by higher priority traffic 33 * classes of the same subport; 34 * - When any subport traffic class is oversubscribed 35 * (configuration time event), the usage of subport member 36 * pipes with high demand for that traffic class pipes is 37 * truncated to a dynamically adjusted value with no 38 * impact to low demand pipes; 39 * 3. Pipe: 40 * - Typical usage: individual user/subscriber; 41 * - Traffic shaping using the token bucket algorithm 42 * (one bucket per pipe); 43 * 4. Traffic class: 44 * - Traffic classes of the same pipe handled in strict 45 * priority order; 46 * - Upper limit enforced per traffic class at the pipe level; 47 * - Lower priority traffic classes able to reuse pipe 48 * bandwidth currently unused by higher priority traffic 49 * classes of the same pipe; 50 * 5. Queue: 51 * - Typical usage: queue hosting packets from one or 52 * multiple connections of same traffic class belonging to 53 * the same user; 54 * - Weighted Round Robin (WRR) is used to service the 55 * queues within same pipe lowest priority traffic class (best-effort). 56 * 57 */ 58 59 #include <sys/types.h> 60 #include <rte_compat.h> 61 #include <rte_mbuf.h> 62 #include <rte_meter.h> 63 64 /** Random Early Detection (RED) */ 65 #ifdef RTE_SCHED_RED 66 #include "rte_red.h" 67 #endif 68 69 /** Maximum number of queues per pipe. 70 * Note that the multiple queues (power of 2) can only be assigned to 71 * lowest priority (best-effort) traffic class. Other higher priority traffic 72 * classes can only have one queue. 73 * Can not change. 74 * 75 * @see struct rte_sched_port_params 76 */ 77 #define RTE_SCHED_QUEUES_PER_PIPE 16 78 79 /** Number of WRR queues for best-effort traffic class per pipe. 80 * 81 * @see struct rte_sched_pipe_params 82 */ 83 #define RTE_SCHED_BE_QUEUES_PER_PIPE 4 84 85 /** Number of traffic classes per pipe (as well as subport). 86 * @see struct rte_sched_subport_params 87 * @see struct rte_sched_pipe_params 88 */ 89 #define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE \ 90 (RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1) 91 92 /** Best-effort traffic class ID 93 * Can not change. 94 */ 95 #define RTE_SCHED_TRAFFIC_CLASS_BE (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1) 96 97 /* 98 * Ethernet framing overhead. Overhead fields per Ethernet frame: 99 * 1. Preamble: 7 bytes; 100 * 2. Start of Frame Delimiter (SFD): 1 byte; 101 * 3. Frame Check Sequence (FCS): 4 bytes; 102 * 4. Inter Frame Gap (IFG): 12 bytes. 103 * 104 * The FCS is considered overhead only if not included in the packet 105 * length (field pkt_len of struct rte_mbuf). 106 * 107 * @see struct rte_sched_port_params 108 */ 109 #ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT 110 #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24 111 #endif 112 113 /* 114 * Pipe configuration parameters. The period and credits_per_period 115 * parameters are measured in bytes, with one byte meaning the time 116 * duration associated with the transmission of one byte on the 117 * physical medium of the output port, with pipe or pipe traffic class 118 * rate (measured as percentage of output port rate) determined as 119 * credits_per_period divided by period. One credit represents one 120 * byte. 121 */ 122 struct rte_sched_pipe_params { 123 /** Token bucket rate (measured in bytes per second) */ 124 uint64_t tb_rate; 125 126 /** Token bucket size (measured in credits) */ 127 uint64_t tb_size; 128 129 /** Traffic class rates (measured in bytes per second) */ 130 uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 131 132 /** Enforcement period (measured in milliseconds) */ 133 uint64_t tc_period; 134 135 /** Best-effort traffic class oversubscription weight */ 136 uint8_t tc_ov_weight; 137 138 /** WRR weights of best-effort traffic class queues */ 139 uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE]; 140 }; 141 142 /* 143 * Subport configuration parameters. The period and credits_per_period 144 * parameters are measured in bytes, with one byte meaning the time 145 * duration associated with the transmission of one byte on the 146 * physical medium of the output port, with pipe or pipe traffic class 147 * rate (measured as percentage of output port rate) determined as 148 * credits_per_period divided by period. One credit represents one 149 * byte. 150 */ 151 struct rte_sched_subport_params { 152 /** Number of subport pipes. 153 * The subport can enable/allocate fewer pipes than the maximum 154 * number set through struct port_params::n_max_pipes_per_subport, 155 * as needed, to avoid memory allocation for the queues of the 156 * pipes that are not really needed. 157 */ 158 uint32_t n_pipes_per_subport_enabled; 159 160 /** Packet queue size for each traffic class. 161 * All the pipes within the same subport share the similar 162 * configuration for the queues. 163 */ 164 uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 165 166 /** Pipe profile table. 167 * Every pipe is configured using one of the profiles from this table. 168 */ 169 struct rte_sched_pipe_params *pipe_profiles; 170 171 /** Profiles in the pipe profile table */ 172 uint32_t n_pipe_profiles; 173 174 /** Max allowed profiles in the pipe profile table */ 175 uint32_t n_max_pipe_profiles; 176 177 #ifdef RTE_SCHED_RED 178 /** RED parameters */ 179 struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; 180 #endif 181 }; 182 183 struct rte_sched_subport_profile_params { 184 /** Token bucket rate (measured in bytes per second) */ 185 uint64_t tb_rate; 186 187 /** Token bucket size (measured in credits) */ 188 uint64_t tb_size; 189 190 /** Traffic class rates (measured in bytes per second) */ 191 uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 192 193 /** Enforcement period for rates (measured in milliseconds) */ 194 uint64_t tc_period; 195 }; 196 197 /** Subport statistics */ 198 struct rte_sched_subport_stats { 199 /** Number of packets successfully written */ 200 uint64_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 201 202 /** Number of packets dropped */ 203 uint64_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 204 205 /** Number of bytes successfully written for each traffic class */ 206 uint64_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 207 208 /** Number of bytes dropped for each traffic class */ 209 uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 210 211 #ifdef RTE_SCHED_RED 212 /** Number of packets dropped by red */ 213 uint64_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 214 #endif 215 }; 216 217 /** Queue statistics */ 218 struct rte_sched_queue_stats { 219 /** Packets successfully written */ 220 uint64_t n_pkts; 221 222 /** Packets dropped */ 223 uint64_t n_pkts_dropped; 224 225 #ifdef RTE_SCHED_RED 226 /** Packets dropped by RED */ 227 uint64_t n_pkts_red_dropped; 228 #endif 229 230 /** Bytes successfully written */ 231 uint64_t n_bytes; 232 233 /** Bytes dropped */ 234 uint64_t n_bytes_dropped; 235 }; 236 237 /** Port configuration parameters. */ 238 struct rte_sched_port_params { 239 /** Name of the port to be associated */ 240 const char *name; 241 242 /** CPU socket ID */ 243 int socket; 244 245 /** Output port rate (measured in bytes per second) */ 246 uint64_t rate; 247 248 /** Maximum Ethernet frame size (measured in bytes). 249 * Should not include the framing overhead. 250 */ 251 uint32_t mtu; 252 253 /** Framing overhead per packet (measured in bytes) */ 254 uint32_t frame_overhead; 255 256 /** Number of subports */ 257 uint32_t n_subports_per_port; 258 259 /** subport profile table. 260 * Every pipe is configured using one of the profiles from this table. 261 */ 262 struct rte_sched_subport_profile_params *subport_profiles; 263 264 /** Profiles in the pipe profile table */ 265 uint32_t n_subport_profiles; 266 267 /** Max allowed profiles in the pipe profile table */ 268 uint32_t n_max_subport_profiles; 269 270 /** Maximum number of subport pipes. 271 * This parameter is used to reserve a fixed number of bits 272 * in struct rte_mbuf::sched.queue_id for the pipe_id for all 273 * the subports of the same port. 274 */ 275 uint32_t n_pipes_per_subport; 276 }; 277 278 /* 279 * Configuration 280 * 281 ***/ 282 283 /** 284 * Hierarchical scheduler port configuration 285 * 286 * @param params 287 * Port scheduler configuration parameter structure 288 * @return 289 * Handle to port scheduler instance upon success or NULL otherwise. 290 */ 291 struct rte_sched_port * 292 rte_sched_port_config(struct rte_sched_port_params *params); 293 294 /** 295 * Hierarchical scheduler port free 296 * 297 * @param port 298 * Handle to port scheduler instance 299 */ 300 void 301 rte_sched_port_free(struct rte_sched_port *port); 302 303 /** 304 * @warning 305 * @b EXPERIMENTAL: this API may change without prior notice. 306 * 307 * Hierarchical scheduler pipe profile add 308 * 309 * @param port 310 * Handle to port scheduler instance 311 * @param subport_id 312 * Subport ID 313 * @param params 314 * Pipe profile parameters 315 * @param pipe_profile_id 316 * Set to valid profile id when profile is added successfully. 317 * @return 318 * 0 upon success, error code otherwise 319 */ 320 __rte_experimental 321 int 322 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port, 323 uint32_t subport_id, 324 struct rte_sched_pipe_params *params, 325 uint32_t *pipe_profile_id); 326 327 /** 328 * @warning 329 * @b EXPERIMENTAL: this API may change without prior notice. 330 * 331 * Hierarchical scheduler subport bandwidth profile add 332 * Note that this function is safe to use in runtime for adding new 333 * subport bandwidth profile as it doesn't have any impact on hiearchical 334 * structure of the scheduler. 335 * @param port 336 * Handle to port scheduler instance 337 * @param profile 338 * Subport bandwidth profile 339 * @param subport_profile_id 340 * Subport profile id 341 * @return 342 * 0 upon success, error code otherwise 343 */ 344 __rte_experimental 345 int 346 rte_sched_port_subport_profile_add(struct rte_sched_port *port, 347 struct rte_sched_subport_profile_params *profile, 348 uint32_t *subport_profile_id); 349 350 /** 351 * Hierarchical scheduler subport configuration 352 * Note that this function is safe to use at runtime 353 * to configure subport bandwidth profile. 354 * @param port 355 * Handle to port scheduler instance 356 * @param subport_id 357 * Subport ID 358 * @param params 359 * Subport configuration parameters. Must be non-NULL 360 * for first invocation (i.e initialization) for a given 361 * subport. Ignored (recommended value is NULL) for all 362 * subsequent invocation on the same subport. 363 * @param subport_profile_id 364 * ID of subport bandwidth profile 365 * @return 366 * 0 upon success, error code otherwise 367 */ 368 int 369 rte_sched_subport_config(struct rte_sched_port *port, 370 uint32_t subport_id, 371 struct rte_sched_subport_params *params, 372 uint32_t subport_profile_id); 373 374 /** 375 * Hierarchical scheduler pipe configuration 376 * 377 * @param port 378 * Handle to port scheduler instance 379 * @param subport_id 380 * Subport ID 381 * @param pipe_id 382 * Pipe ID within subport 383 * @param pipe_profile 384 * ID of subport-level pre-configured pipe profile 385 * @return 386 * 0 upon success, error code otherwise 387 */ 388 int 389 rte_sched_pipe_config(struct rte_sched_port *port, 390 uint32_t subport_id, 391 uint32_t pipe_id, 392 int32_t pipe_profile); 393 394 /** 395 * Hierarchical scheduler memory footprint size per port 396 * 397 * @param port_params 398 * Port scheduler configuration parameter structure 399 * @param subport_params 400 * Array of subport parameter structures 401 * @return 402 * Memory footprint size in bytes upon success, 0 otherwise 403 */ 404 uint32_t 405 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params, 406 struct rte_sched_subport_params **subport_params); 407 /* 408 * Statistics 409 * 410 ***/ 411 412 /** 413 * Hierarchical scheduler subport statistics read 414 * 415 * @param port 416 * Handle to port scheduler instance 417 * @param subport_id 418 * Subport ID 419 * @param stats 420 * Pointer to pre-allocated subport statistics structure where the statistics 421 * counters should be stored 422 * @param tc_ov 423 * Pointer to pre-allocated RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE-entry array 424 * where the oversubscription status for each of the subport traffic classes 425 * should be stored. 426 * @return 427 * 0 upon success, error code otherwise 428 */ 429 int 430 rte_sched_subport_read_stats(struct rte_sched_port *port, 431 uint32_t subport_id, 432 struct rte_sched_subport_stats *stats, 433 uint32_t *tc_ov); 434 435 /** 436 * Hierarchical scheduler queue statistics read 437 * 438 * @param port 439 * Handle to port scheduler instance 440 * @param queue_id 441 * Queue ID within port scheduler 442 * @param stats 443 * Pointer to pre-allocated subport statistics structure where the statistics 444 * counters should be stored 445 * @param qlen 446 * Pointer to pre-allocated variable where the current queue length 447 * should be stored. 448 * @return 449 * 0 upon success, error code otherwise 450 */ 451 int 452 rte_sched_queue_read_stats(struct rte_sched_port *port, 453 uint32_t queue_id, 454 struct rte_sched_queue_stats *stats, 455 uint16_t *qlen); 456 457 /** 458 * Scheduler hierarchy path write to packet descriptor. Typically 459 * called by the packet classification stage. 460 * 461 * @param port 462 * Handle to port scheduler instance 463 * @param pkt 464 * Packet descriptor handle 465 * @param subport 466 * Subport ID 467 * @param pipe 468 * Pipe ID within subport 469 * @param traffic_class 470 * Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE) 471 * @param queue 472 * Queue ID within pipe traffic class, 0 for high priority TCs, and 473 * 0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC 474 * @param color 475 * Packet color set 476 */ 477 void 478 rte_sched_port_pkt_write(struct rte_sched_port *port, 479 struct rte_mbuf *pkt, 480 uint32_t subport, uint32_t pipe, uint32_t traffic_class, 481 uint32_t queue, enum rte_color color); 482 483 /** 484 * Scheduler hierarchy path read from packet descriptor (struct 485 * rte_mbuf). Typically called as part of the hierarchical scheduler 486 * enqueue operation. The subport, pipe, traffic class and queue 487 * parameters need to be pre-allocated by the caller. 488 * 489 * @param port 490 * Handle to port scheduler instance 491 * @param pkt 492 * Packet descriptor handle 493 * @param subport 494 * Subport ID 495 * @param pipe 496 * Pipe ID within subport 497 * @param traffic_class 498 * Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE) 499 * @param queue 500 * Queue ID within pipe traffic class, 0 for high priority TCs, and 501 * 0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC 502 */ 503 void 504 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port, 505 const struct rte_mbuf *pkt, 506 uint32_t *subport, uint32_t *pipe, 507 uint32_t *traffic_class, uint32_t *queue); 508 509 enum rte_color 510 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt); 511 512 /** 513 * Hierarchical scheduler port enqueue. Writes up to n_pkts to port 514 * scheduler and returns the number of packets actually written. For 515 * each packet, the port scheduler queue to write the packet to is 516 * identified by reading the hierarchy path from the packet 517 * descriptor; if the queue is full or congested and the packet is not 518 * written to the queue, then the packet is automatically dropped 519 * without any action required from the caller. 520 * 521 * @param port 522 * Handle to port scheduler instance 523 * @param pkts 524 * Array storing the packet descriptor handles 525 * @param n_pkts 526 * Number of packets to enqueue from the pkts array into the port scheduler 527 * @return 528 * Number of packets successfully enqueued 529 */ 530 int 531 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); 532 533 /** 534 * Hierarchical scheduler port dequeue. Reads up to n_pkts from the 535 * port scheduler and stores them in the pkts array and returns the 536 * number of packets actually read. The pkts array needs to be 537 * pre-allocated by the caller with at least n_pkts entries. 538 * 539 * @param port 540 * Handle to port scheduler instance 541 * @param pkts 542 * Pre-allocated packet descriptor array where the packets dequeued 543 * from the port 544 * scheduler should be stored 545 * @param n_pkts 546 * Number of packets to dequeue from the port scheduler 547 * @return 548 * Number of packets successfully dequeued and placed in the pkts array 549 */ 550 int 551 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); 552 553 #ifdef __cplusplus 554 } 555 #endif 556 557 #endif /* __INCLUDE_RTE_SCHED_H__ */ 558