1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <string.h> 7 8 #include <rte_common.h> 9 #include <rte_log.h> 10 #include <rte_memory.h> 11 #include <rte_malloc.h> 12 #include <rte_cycles.h> 13 #include <rte_prefetch.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_mbuf.h> 16 #include <rte_bitmap.h> 17 #include <rte_reciprocal.h> 18 19 #include "rte_sched.h" 20 #include "rte_sched_common.h" 21 #include "rte_approx.h" 22 23 #ifdef __INTEL_COMPILER 24 #pragma warning(disable:2259) /* conversion may lose significant bits */ 25 #endif 26 27 #ifdef RTE_SCHED_VECTOR 28 #include <rte_vect.h> 29 30 #ifdef RTE_ARCH_X86 31 #define SCHED_VECTOR_SSE4 32 #elif defined(__ARM_NEON) 33 #define SCHED_VECTOR_NEON 34 #endif 35 36 #endif 37 38 #define RTE_SCHED_TB_RATE_CONFIG_ERR (1e-7) 39 #define RTE_SCHED_WRR_SHIFT 3 40 #define RTE_SCHED_MAX_QUEUES_PER_TC RTE_SCHED_BE_QUEUES_PER_PIPE 41 #define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE) 42 #define RTE_SCHED_PIPE_INVALID UINT32_MAX 43 #define RTE_SCHED_BMP_POS_INVALID UINT32_MAX 44 45 /* Scaling for cycles_per_byte calculation 46 * Chosen so that minimum rate is 480 bit/sec 47 */ 48 #define RTE_SCHED_TIME_SHIFT 8 49 50 struct rte_sched_pipe_profile { 51 /* Token bucket (TB) */ 52 uint64_t tb_period; 53 uint64_t tb_credits_per_period; 54 uint64_t tb_size; 55 56 /* Pipe traffic classes */ 57 uint64_t tc_period; 58 uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 59 uint8_t tc_ov_weight; 60 61 /* Pipe best-effort traffic class queues */ 62 uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE]; 63 }; 64 65 struct rte_sched_pipe { 66 /* Token bucket (TB) */ 67 uint64_t tb_time; /* time of last update */ 68 uint64_t tb_credits; 69 70 /* Pipe profile and flags */ 71 uint32_t profile; 72 73 /* Traffic classes (TCs) */ 74 uint64_t tc_time; /* time of next update */ 75 uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 76 77 /* Weighted Round Robin (WRR) */ 78 uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; 79 80 /* TC oversubscription */ 81 uint64_t tc_ov_credits; 82 uint8_t tc_ov_period_id; 83 } __rte_cache_aligned; 84 85 struct rte_sched_queue { 86 uint16_t qw; 87 uint16_t qr; 88 }; 89 90 struct rte_sched_queue_extra { 91 struct rte_sched_queue_stats stats; 92 #ifdef RTE_SCHED_RED 93 struct rte_red red; 94 #endif 95 }; 96 97 enum grinder_state { 98 e_GRINDER_PREFETCH_PIPE = 0, 99 e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS, 100 e_GRINDER_PREFETCH_MBUF, 101 e_GRINDER_READ_MBUF 102 }; 103 104 struct rte_sched_subport_profile { 105 /* Token bucket (TB) */ 106 uint64_t tb_period; 107 uint64_t tb_credits_per_period; 108 uint64_t tb_size; 109 110 uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 111 uint64_t tc_period; 112 }; 113 114 struct rte_sched_grinder { 115 /* Pipe cache */ 116 uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE]; 117 uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE]; 118 uint32_t pcache_w; 119 uint32_t pcache_r; 120 121 /* Current pipe */ 122 enum grinder_state state; 123 uint32_t productive; 124 uint32_t pindex; 125 struct rte_sched_subport *subport; 126 struct rte_sched_subport_profile *subport_params; 127 struct rte_sched_pipe *pipe; 128 struct rte_sched_pipe_profile *pipe_params; 129 130 /* TC cache */ 131 uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 132 uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 133 uint32_t tccache_w; 134 uint32_t tccache_r; 135 136 /* Current TC */ 137 uint32_t tc_index; 138 struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC]; 139 struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC]; 140 uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC]; 141 uint16_t qsize; 142 uint32_t qmask; 143 uint32_t qpos; 144 struct rte_mbuf *pkt; 145 146 /* WRR */ 147 uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE]; 148 uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE]; 149 uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE]; 150 }; 151 152 struct rte_sched_subport { 153 /* Token bucket (TB) */ 154 uint64_t tb_time; /* time of last update */ 155 uint64_t tb_credits; 156 157 /* Traffic classes (TCs) */ 158 uint64_t tc_time; /* time of next update */ 159 uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 160 161 /* TC oversubscription */ 162 uint64_t tc_ov_wm; 163 uint64_t tc_ov_wm_min; 164 uint64_t tc_ov_wm_max; 165 uint8_t tc_ov_period_id; 166 uint8_t tc_ov; 167 uint32_t tc_ov_n; 168 double tc_ov_rate; 169 170 /* Statistics */ 171 struct rte_sched_subport_stats stats __rte_cache_aligned; 172 173 /* subport profile */ 174 uint32_t profile; 175 /* Subport pipes */ 176 uint32_t n_pipes_per_subport_enabled; 177 uint32_t n_pipe_profiles; 178 uint32_t n_max_pipe_profiles; 179 180 /* Pipe best-effort TC rate */ 181 uint64_t pipe_tc_be_rate_max; 182 183 /* Pipe queues size */ 184 uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 185 186 #ifdef RTE_SCHED_RED 187 struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; 188 #endif 189 190 /* Scheduling loop detection */ 191 uint32_t pipe_loop; 192 uint32_t pipe_exhaustion; 193 194 /* Bitmap */ 195 struct rte_bitmap *bmp; 196 uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16; 197 198 /* Grinders */ 199 struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS]; 200 uint32_t busy_grinders; 201 202 /* Queue base calculation */ 203 uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE]; 204 uint32_t qsize_sum; 205 206 struct rte_sched_pipe *pipe; 207 struct rte_sched_queue *queue; 208 struct rte_sched_queue_extra *queue_extra; 209 struct rte_sched_pipe_profile *pipe_profiles; 210 uint8_t *bmp_array; 211 struct rte_mbuf **queue_array; 212 uint8_t memory[0] __rte_cache_aligned; 213 } __rte_cache_aligned; 214 215 struct rte_sched_port { 216 /* User parameters */ 217 uint32_t n_subports_per_port; 218 uint32_t n_pipes_per_subport; 219 uint32_t n_pipes_per_subport_log2; 220 uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 221 uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE]; 222 uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE]; 223 uint32_t n_subport_profiles; 224 uint32_t n_max_subport_profiles; 225 uint64_t rate; 226 uint32_t mtu; 227 uint32_t frame_overhead; 228 int socket; 229 230 /* Timing */ 231 uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */ 232 uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */ 233 uint64_t time; /* Current NIC TX time measured in bytes */ 234 struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */ 235 uint64_t cycles_per_byte; 236 237 /* Grinders */ 238 struct rte_mbuf **pkts_out; 239 uint32_t n_pkts_out; 240 uint32_t subport_id; 241 242 /* Large data structures */ 243 struct rte_sched_subport_profile *subport_profiles; 244 struct rte_sched_subport *subports[0] __rte_cache_aligned; 245 } __rte_cache_aligned; 246 247 enum rte_sched_subport_array { 248 e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0, 249 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE, 250 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA, 251 e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES, 252 e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY, 253 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY, 254 e_RTE_SCHED_SUBPORT_ARRAY_TOTAL, 255 }; 256 257 static inline uint32_t 258 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport) 259 { 260 return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled; 261 } 262 263 static inline struct rte_mbuf ** 264 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex) 265 { 266 uint32_t pindex = qindex >> 4; 267 uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1); 268 269 return (subport->queue_array + pindex * 270 subport->qsize_sum + subport->qsize_add[qpos]); 271 } 272 273 static inline uint16_t 274 rte_sched_subport_pipe_qsize(struct rte_sched_port *port, 275 struct rte_sched_subport *subport, uint32_t qindex) 276 { 277 uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)]; 278 279 return subport->qsize[tc]; 280 } 281 282 static inline uint32_t 283 rte_sched_port_queues_per_port(struct rte_sched_port *port) 284 { 285 uint32_t n_queues = 0, i; 286 287 for (i = 0; i < port->n_subports_per_port; i++) 288 n_queues += rte_sched_subport_pipe_queues(port->subports[i]); 289 290 return n_queues; 291 } 292 293 static inline uint16_t 294 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class) 295 { 296 uint16_t pipe_queue = port->pipe_queue[traffic_class]; 297 298 return pipe_queue; 299 } 300 301 static inline uint8_t 302 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex) 303 { 304 uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)]; 305 306 return pipe_tc; 307 } 308 309 static inline uint8_t 310 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex) 311 { 312 uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)]; 313 314 return tc_queue; 315 } 316 317 static int 318 pipe_profile_check(struct rte_sched_pipe_params *params, 319 uint64_t rate, uint16_t *qsize) 320 { 321 uint32_t i; 322 323 /* Pipe parameters */ 324 if (params == NULL) { 325 RTE_LOG(ERR, SCHED, 326 "%s: Incorrect value for parameter params\n", __func__); 327 return -EINVAL; 328 } 329 330 /* TB rate: non-zero, not greater than port rate */ 331 if (params->tb_rate == 0 || 332 params->tb_rate > rate) { 333 RTE_LOG(ERR, SCHED, 334 "%s: Incorrect value for tb rate\n", __func__); 335 return -EINVAL; 336 } 337 338 /* TB size: non-zero */ 339 if (params->tb_size == 0) { 340 RTE_LOG(ERR, SCHED, 341 "%s: Incorrect value for tb size\n", __func__); 342 return -EINVAL; 343 } 344 345 /* TC rate: non-zero if qsize non-zero, less than pipe rate */ 346 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { 347 if ((qsize[i] == 0 && params->tc_rate[i] != 0) || 348 (qsize[i] != 0 && (params->tc_rate[i] == 0 || 349 params->tc_rate[i] > params->tb_rate))) { 350 RTE_LOG(ERR, SCHED, 351 "%s: Incorrect value for qsize or tc_rate\n", __func__); 352 return -EINVAL; 353 } 354 } 355 356 if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 || 357 qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) { 358 RTE_LOG(ERR, SCHED, 359 "%s: Incorrect value for be traffic class rate\n", __func__); 360 return -EINVAL; 361 } 362 363 /* TC period: non-zero */ 364 if (params->tc_period == 0) { 365 RTE_LOG(ERR, SCHED, 366 "%s: Incorrect value for tc period\n", __func__); 367 return -EINVAL; 368 } 369 370 /* Best effort tc oversubscription weight: non-zero */ 371 if (params->tc_ov_weight == 0) { 372 RTE_LOG(ERR, SCHED, 373 "%s: Incorrect value for tc ov weight\n", __func__); 374 return -EINVAL; 375 } 376 377 /* Queue WRR weights: non-zero */ 378 for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) { 379 if (params->wrr_weights[i] == 0) { 380 RTE_LOG(ERR, SCHED, 381 "%s: Incorrect value for wrr weight\n", __func__); 382 return -EINVAL; 383 } 384 } 385 386 return 0; 387 } 388 389 static int 390 subport_profile_check(struct rte_sched_subport_profile_params *params, 391 uint64_t rate) 392 { 393 uint32_t i; 394 395 /* Check user parameters */ 396 if (params == NULL) { 397 RTE_LOG(ERR, SCHED, "%s: " 398 "Incorrect value for parameter params\n", __func__); 399 return -EINVAL; 400 } 401 402 if (params->tb_rate == 0 || params->tb_rate > rate) { 403 RTE_LOG(ERR, SCHED, "%s: " 404 "Incorrect value for tb rate\n", __func__); 405 return -EINVAL; 406 } 407 408 if (params->tb_size == 0) { 409 RTE_LOG(ERR, SCHED, "%s: " 410 "Incorrect value for tb size\n", __func__); 411 return -EINVAL; 412 } 413 414 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { 415 uint64_t tc_rate = params->tc_rate[i]; 416 417 if (tc_rate == 0 || (tc_rate > params->tb_rate)) { 418 RTE_LOG(ERR, SCHED, "%s: " 419 "Incorrect value for tc rate\n", __func__); 420 return -EINVAL; 421 } 422 } 423 424 if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) { 425 RTE_LOG(ERR, SCHED, "%s: " 426 "Incorrect tc rate(best effort)\n", __func__); 427 return -EINVAL; 428 } 429 430 if (params->tc_period == 0) { 431 RTE_LOG(ERR, SCHED, "%s: " 432 "Incorrect value for tc period\n", __func__); 433 return -EINVAL; 434 } 435 436 return 0; 437 } 438 439 static int 440 rte_sched_port_check_params(struct rte_sched_port_params *params) 441 { 442 uint32_t i; 443 444 if (params == NULL) { 445 RTE_LOG(ERR, SCHED, 446 "%s: Incorrect value for parameter params\n", __func__); 447 return -EINVAL; 448 } 449 450 /* socket */ 451 if (params->socket < 0) { 452 RTE_LOG(ERR, SCHED, 453 "%s: Incorrect value for socket id\n", __func__); 454 return -EINVAL; 455 } 456 457 /* rate */ 458 if (params->rate == 0) { 459 RTE_LOG(ERR, SCHED, 460 "%s: Incorrect value for rate\n", __func__); 461 return -EINVAL; 462 } 463 464 /* mtu */ 465 if (params->mtu == 0) { 466 RTE_LOG(ERR, SCHED, 467 "%s: Incorrect value for mtu\n", __func__); 468 return -EINVAL; 469 } 470 471 /* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */ 472 if (params->n_subports_per_port == 0 || 473 params->n_subports_per_port > 1u << 16 || 474 !rte_is_power_of_2(params->n_subports_per_port)) { 475 RTE_LOG(ERR, SCHED, 476 "%s: Incorrect value for number of subports\n", __func__); 477 return -EINVAL; 478 } 479 480 if (params->subport_profiles == NULL || 481 params->n_subport_profiles == 0 || 482 params->n_max_subport_profiles == 0 || 483 params->n_subport_profiles > params->n_max_subport_profiles) { 484 RTE_LOG(ERR, SCHED, 485 "%s: Incorrect value for subport profiles\n", __func__); 486 return -EINVAL; 487 } 488 489 for (i = 0; i < params->n_subport_profiles; i++) { 490 struct rte_sched_subport_profile_params *p = 491 params->subport_profiles + i; 492 int status; 493 494 status = subport_profile_check(p, params->rate); 495 if (status != 0) { 496 RTE_LOG(ERR, SCHED, 497 "%s: subport profile check failed(%d)\n", 498 __func__, status); 499 return -EINVAL; 500 } 501 } 502 503 /* n_pipes_per_subport: non-zero, power of 2 */ 504 if (params->n_pipes_per_subport == 0 || 505 !rte_is_power_of_2(params->n_pipes_per_subport)) { 506 RTE_LOG(ERR, SCHED, 507 "%s: Incorrect value for maximum pipes number\n", __func__); 508 return -EINVAL; 509 } 510 511 return 0; 512 } 513 514 static uint32_t 515 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params, 516 enum rte_sched_subport_array array) 517 { 518 uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled; 519 uint32_t n_subport_pipe_queues = 520 RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport; 521 522 uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe); 523 uint32_t size_queue = 524 n_subport_pipe_queues * sizeof(struct rte_sched_queue); 525 uint32_t size_queue_extra 526 = n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra); 527 uint32_t size_pipe_profiles = params->n_max_pipe_profiles * 528 sizeof(struct rte_sched_pipe_profile); 529 uint32_t size_bmp_array = 530 rte_bitmap_get_memory_footprint(n_subport_pipe_queues); 531 uint32_t size_per_pipe_queue_array, size_queue_array; 532 533 uint32_t base, i; 534 535 size_per_pipe_queue_array = 0; 536 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { 537 if (i < RTE_SCHED_TRAFFIC_CLASS_BE) 538 size_per_pipe_queue_array += 539 params->qsize[i] * sizeof(struct rte_mbuf *); 540 else 541 size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC * 542 params->qsize[i] * sizeof(struct rte_mbuf *); 543 } 544 size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array; 545 546 base = 0; 547 548 if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE) 549 return base; 550 base += RTE_CACHE_LINE_ROUNDUP(size_pipe); 551 552 if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE) 553 return base; 554 base += RTE_CACHE_LINE_ROUNDUP(size_queue); 555 556 if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA) 557 return base; 558 base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra); 559 560 if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES) 561 return base; 562 base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles); 563 564 if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY) 565 return base; 566 base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array); 567 568 if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY) 569 return base; 570 base += RTE_CACHE_LINE_ROUNDUP(size_queue_array); 571 572 return base; 573 } 574 575 static void 576 rte_sched_subport_config_qsize(struct rte_sched_subport *subport) 577 { 578 uint32_t i; 579 580 subport->qsize_add[0] = 0; 581 582 /* Strict prority traffic class */ 583 for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 584 subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1]; 585 586 /* Best-effort traffic class */ 587 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] = 588 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] + 589 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE]; 590 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] = 591 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] + 592 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE]; 593 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] = 594 subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] + 595 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE]; 596 597 subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] + 598 subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE]; 599 } 600 601 static void 602 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i) 603 { 604 struct rte_sched_pipe_profile *p = subport->pipe_profiles + i; 605 606 RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n" 607 " Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n" 608 " Traffic classes: period = %"PRIu64",\n" 609 " credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 610 ", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 611 ", %"PRIu64", %"PRIu64", %"PRIu64"]\n" 612 " Best-effort traffic class oversubscription: weight = %hhu\n" 613 " WRR cost: [%hhu, %hhu, %hhu, %hhu]\n", 614 i, 615 616 /* Token bucket */ 617 p->tb_period, 618 p->tb_credits_per_period, 619 p->tb_size, 620 621 /* Traffic classes */ 622 p->tc_period, 623 p->tc_credits_per_period[0], 624 p->tc_credits_per_period[1], 625 p->tc_credits_per_period[2], 626 p->tc_credits_per_period[3], 627 p->tc_credits_per_period[4], 628 p->tc_credits_per_period[5], 629 p->tc_credits_per_period[6], 630 p->tc_credits_per_period[7], 631 p->tc_credits_per_period[8], 632 p->tc_credits_per_period[9], 633 p->tc_credits_per_period[10], 634 p->tc_credits_per_period[11], 635 p->tc_credits_per_period[12], 636 637 /* Best-effort traffic class oversubscription */ 638 p->tc_ov_weight, 639 640 /* WRR */ 641 p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]); 642 } 643 644 static void 645 rte_sched_port_log_subport_profile(struct rte_sched_port *port, uint32_t i) 646 { 647 struct rte_sched_subport_profile *p = port->subport_profiles + i; 648 649 RTE_LOG(DEBUG, SCHED, "Low level config for subport profile %u:\n" 650 "Token bucket: period = %"PRIu64", credits per period = %"PRIu64"," 651 "size = %"PRIu64"\n" 652 "Traffic classes: period = %"PRIu64",\n" 653 "credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 654 " %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64 655 " %"PRIu64", %"PRIu64", %"PRIu64"]\n", 656 i, 657 658 /* Token bucket */ 659 p->tb_period, 660 p->tb_credits_per_period, 661 p->tb_size, 662 663 /* Traffic classes */ 664 p->tc_period, 665 p->tc_credits_per_period[0], 666 p->tc_credits_per_period[1], 667 p->tc_credits_per_period[2], 668 p->tc_credits_per_period[3], 669 p->tc_credits_per_period[4], 670 p->tc_credits_per_period[5], 671 p->tc_credits_per_period[6], 672 p->tc_credits_per_period[7], 673 p->tc_credits_per_period[8], 674 p->tc_credits_per_period[9], 675 p->tc_credits_per_period[10], 676 p->tc_credits_per_period[11], 677 p->tc_credits_per_period[12]); 678 } 679 680 static inline uint64_t 681 rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate) 682 { 683 uint64_t time = time_ms; 684 685 time = (time * rate) / 1000; 686 687 return time; 688 } 689 690 static void 691 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport, 692 struct rte_sched_pipe_params *src, 693 struct rte_sched_pipe_profile *dst, 694 uint64_t rate) 695 { 696 uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE]; 697 uint32_t lcd1, lcd2, lcd; 698 uint32_t i; 699 700 /* Token Bucket */ 701 if (src->tb_rate == rate) { 702 dst->tb_credits_per_period = 1; 703 dst->tb_period = 1; 704 } else { 705 double tb_rate = (double) src->tb_rate 706 / (double) rate; 707 double d = RTE_SCHED_TB_RATE_CONFIG_ERR; 708 709 rte_approx_64(tb_rate, d, &dst->tb_credits_per_period, 710 &dst->tb_period); 711 } 712 713 dst->tb_size = src->tb_size; 714 715 /* Traffic Classes */ 716 dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, 717 rate); 718 719 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 720 if (subport->qsize[i]) 721 dst->tc_credits_per_period[i] 722 = rte_sched_time_ms_to_bytes(src->tc_period, 723 src->tc_rate[i]); 724 725 dst->tc_ov_weight = src->tc_ov_weight; 726 727 /* WRR queues */ 728 wrr_cost[0] = src->wrr_weights[0]; 729 wrr_cost[1] = src->wrr_weights[1]; 730 wrr_cost[2] = src->wrr_weights[2]; 731 wrr_cost[3] = src->wrr_weights[3]; 732 733 lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]); 734 lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]); 735 lcd = rte_get_lcd(lcd1, lcd2); 736 737 wrr_cost[0] = lcd / wrr_cost[0]; 738 wrr_cost[1] = lcd / wrr_cost[1]; 739 wrr_cost[2] = lcd / wrr_cost[2]; 740 wrr_cost[3] = lcd / wrr_cost[3]; 741 742 dst->wrr_cost[0] = (uint8_t) wrr_cost[0]; 743 dst->wrr_cost[1] = (uint8_t) wrr_cost[1]; 744 dst->wrr_cost[2] = (uint8_t) wrr_cost[2]; 745 dst->wrr_cost[3] = (uint8_t) wrr_cost[3]; 746 } 747 748 static void 749 rte_sched_subport_profile_convert(struct rte_sched_subport_profile_params *src, 750 struct rte_sched_subport_profile *dst, 751 uint64_t rate) 752 { 753 uint32_t i; 754 755 /* Token Bucket */ 756 if (src->tb_rate == rate) { 757 dst->tb_credits_per_period = 1; 758 dst->tb_period = 1; 759 } else { 760 double tb_rate = (double) src->tb_rate 761 / (double) rate; 762 double d = RTE_SCHED_TB_RATE_CONFIG_ERR; 763 764 rte_approx_64(tb_rate, d, &dst->tb_credits_per_period, 765 &dst->tb_period); 766 } 767 768 dst->tb_size = src->tb_size; 769 770 /* Traffic Classes */ 771 dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, rate); 772 773 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 774 dst->tc_credits_per_period[i] 775 = rte_sched_time_ms_to_bytes(src->tc_period, 776 src->tc_rate[i]); 777 } 778 779 static void 780 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport, 781 struct rte_sched_subport_params *params, uint64_t rate) 782 { 783 uint32_t i; 784 785 for (i = 0; i < subport->n_pipe_profiles; i++) { 786 struct rte_sched_pipe_params *src = params->pipe_profiles + i; 787 struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i; 788 789 rte_sched_pipe_profile_convert(subport, src, dst, rate); 790 rte_sched_port_log_pipe_profile(subport, i); 791 } 792 793 subport->pipe_tc_be_rate_max = 0; 794 for (i = 0; i < subport->n_pipe_profiles; i++) { 795 struct rte_sched_pipe_params *src = params->pipe_profiles + i; 796 uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE]; 797 798 if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate) 799 subport->pipe_tc_be_rate_max = pipe_tc_be_rate; 800 } 801 } 802 803 static void 804 rte_sched_port_config_subport_profile_table(struct rte_sched_port *port, 805 struct rte_sched_port_params *params, 806 uint64_t rate) 807 { 808 uint32_t i; 809 810 for (i = 0; i < port->n_subport_profiles; i++) { 811 struct rte_sched_subport_profile_params *src 812 = params->subport_profiles + i; 813 struct rte_sched_subport_profile *dst 814 = port->subport_profiles + i; 815 816 rte_sched_subport_profile_convert(src, dst, rate); 817 rte_sched_port_log_subport_profile(port, i); 818 } 819 } 820 821 static int 822 rte_sched_subport_check_params(struct rte_sched_subport_params *params, 823 uint32_t n_max_pipes_per_subport, 824 uint64_t rate) 825 { 826 uint32_t i; 827 828 /* Check user parameters */ 829 if (params == NULL) { 830 RTE_LOG(ERR, SCHED, 831 "%s: Incorrect value for parameter params\n", __func__); 832 return -EINVAL; 833 } 834 835 /* qsize: if non-zero, power of 2, 836 * no bigger than 32K (due to 16-bit read/write pointers) 837 */ 838 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { 839 uint16_t qsize = params->qsize[i]; 840 841 if (qsize != 0 && !rte_is_power_of_2(qsize)) { 842 RTE_LOG(ERR, SCHED, 843 "%s: Incorrect value for qsize\n", __func__); 844 return -EINVAL; 845 } 846 } 847 848 if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) { 849 RTE_LOG(ERR, SCHED, "%s: Incorrect qsize\n", __func__); 850 return -EINVAL; 851 } 852 853 /* n_pipes_per_subport: non-zero, power of 2 */ 854 if (params->n_pipes_per_subport_enabled == 0 || 855 params->n_pipes_per_subport_enabled > n_max_pipes_per_subport || 856 !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) { 857 RTE_LOG(ERR, SCHED, 858 "%s: Incorrect value for pipes number\n", __func__); 859 return -EINVAL; 860 } 861 862 /* pipe_profiles and n_pipe_profiles */ 863 if (params->pipe_profiles == NULL || 864 params->n_pipe_profiles == 0 || 865 params->n_max_pipe_profiles == 0 || 866 params->n_pipe_profiles > params->n_max_pipe_profiles) { 867 RTE_LOG(ERR, SCHED, 868 "%s: Incorrect value for pipe profiles\n", __func__); 869 return -EINVAL; 870 } 871 872 for (i = 0; i < params->n_pipe_profiles; i++) { 873 struct rte_sched_pipe_params *p = params->pipe_profiles + i; 874 int status; 875 876 status = pipe_profile_check(p, rate, ¶ms->qsize[0]); 877 if (status != 0) { 878 RTE_LOG(ERR, SCHED, 879 "%s: Pipe profile check failed(%d)\n", __func__, status); 880 return -EINVAL; 881 } 882 } 883 884 return 0; 885 } 886 887 uint32_t 888 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params, 889 struct rte_sched_subport_params **subport_params) 890 { 891 uint32_t size0 = 0, size1 = 0, i; 892 int status; 893 894 status = rte_sched_port_check_params(port_params); 895 if (status != 0) { 896 RTE_LOG(ERR, SCHED, 897 "%s: Port scheduler port params check failed (%d)\n", 898 __func__, status); 899 900 return 0; 901 } 902 903 for (i = 0; i < port_params->n_subports_per_port; i++) { 904 struct rte_sched_subport_params *sp = subport_params[i]; 905 906 status = rte_sched_subport_check_params(sp, 907 port_params->n_pipes_per_subport, 908 port_params->rate); 909 if (status != 0) { 910 RTE_LOG(ERR, SCHED, 911 "%s: Port scheduler subport params check failed (%d)\n", 912 __func__, status); 913 914 return 0; 915 } 916 } 917 918 size0 = sizeof(struct rte_sched_port); 919 920 for (i = 0; i < port_params->n_subports_per_port; i++) { 921 struct rte_sched_subport_params *sp = subport_params[i]; 922 923 size1 += rte_sched_subport_get_array_base(sp, 924 e_RTE_SCHED_SUBPORT_ARRAY_TOTAL); 925 } 926 927 return size0 + size1; 928 } 929 930 struct rte_sched_port * 931 rte_sched_port_config(struct rte_sched_port_params *params) 932 { 933 struct rte_sched_port *port = NULL; 934 uint32_t size0, size1, size2; 935 uint32_t cycles_per_byte; 936 uint32_t i, j; 937 int status; 938 939 status = rte_sched_port_check_params(params); 940 if (status != 0) { 941 RTE_LOG(ERR, SCHED, 942 "%s: Port scheduler params check failed (%d)\n", 943 __func__, status); 944 return NULL; 945 } 946 947 size0 = sizeof(struct rte_sched_port); 948 size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *); 949 size2 = params->n_max_subport_profiles * 950 sizeof(struct rte_sched_subport_profile); 951 952 /* Allocate memory to store the data structures */ 953 port = rte_zmalloc_socket("qos_params", size0 + size1, 954 RTE_CACHE_LINE_SIZE, params->socket); 955 if (port == NULL) { 956 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__); 957 958 return NULL; 959 } 960 961 /* Allocate memory to store the subport profile */ 962 port->subport_profiles = rte_zmalloc_socket("subport_profile", size2, 963 RTE_CACHE_LINE_SIZE, params->socket); 964 if (port == NULL) { 965 RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__); 966 967 return NULL; 968 } 969 970 /* User parameters */ 971 port->n_subports_per_port = params->n_subports_per_port; 972 port->n_subport_profiles = params->n_subport_profiles; 973 port->n_max_subport_profiles = params->n_max_subport_profiles; 974 port->n_pipes_per_subport = params->n_pipes_per_subport; 975 port->n_pipes_per_subport_log2 = 976 __builtin_ctz(params->n_pipes_per_subport); 977 port->socket = params->socket; 978 979 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 980 port->pipe_queue[i] = i; 981 982 for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) { 983 port->pipe_tc[i] = j; 984 985 if (j < RTE_SCHED_TRAFFIC_CLASS_BE) 986 j++; 987 } 988 989 for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) { 990 port->tc_queue[i] = j; 991 992 if (i >= RTE_SCHED_TRAFFIC_CLASS_BE) 993 j++; 994 } 995 port->rate = params->rate; 996 port->mtu = params->mtu + params->frame_overhead; 997 port->frame_overhead = params->frame_overhead; 998 999 /* Timing */ 1000 port->time_cpu_cycles = rte_get_tsc_cycles(); 1001 port->time_cpu_bytes = 0; 1002 port->time = 0; 1003 1004 /* Subport profile table */ 1005 rte_sched_port_config_subport_profile_table(port, params, port->rate); 1006 1007 cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT) 1008 / params->rate; 1009 port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte); 1010 port->cycles_per_byte = cycles_per_byte; 1011 1012 /* Grinders */ 1013 port->pkts_out = NULL; 1014 port->n_pkts_out = 0; 1015 port->subport_id = 0; 1016 1017 return port; 1018 } 1019 1020 static inline void 1021 rte_sched_subport_free(struct rte_sched_port *port, 1022 struct rte_sched_subport *subport) 1023 { 1024 uint32_t n_subport_pipe_queues; 1025 uint32_t qindex; 1026 1027 if (subport == NULL) 1028 return; 1029 1030 n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport); 1031 1032 /* Free enqueued mbufs */ 1033 for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) { 1034 struct rte_mbuf **mbufs = 1035 rte_sched_subport_pipe_qbase(subport, qindex); 1036 uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex); 1037 if (qsize != 0) { 1038 struct rte_sched_queue *queue = subport->queue + qindex; 1039 uint16_t qr = queue->qr & (qsize - 1); 1040 uint16_t qw = queue->qw & (qsize - 1); 1041 1042 for (; qr != qw; qr = (qr + 1) & (qsize - 1)) 1043 rte_pktmbuf_free(mbufs[qr]); 1044 } 1045 } 1046 1047 rte_free(subport); 1048 } 1049 1050 void 1051 rte_sched_port_free(struct rte_sched_port *port) 1052 { 1053 uint32_t i; 1054 1055 /* Check user parameters */ 1056 if (port == NULL) 1057 return; 1058 1059 for (i = 0; i < port->n_subports_per_port; i++) 1060 rte_sched_subport_free(port, port->subports[i]); 1061 1062 rte_free(port->subport_profiles); 1063 rte_free(port); 1064 } 1065 1066 static void 1067 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports) 1068 { 1069 uint32_t i; 1070 1071 for (i = 0; i < n_subports; i++) { 1072 struct rte_sched_subport *subport = port->subports[i]; 1073 1074 rte_sched_subport_free(port, subport); 1075 } 1076 1077 rte_free(port->subport_profiles); 1078 rte_free(port); 1079 } 1080 1081 int 1082 rte_sched_subport_config(struct rte_sched_port *port, 1083 uint32_t subport_id, 1084 struct rte_sched_subport_params *params, 1085 uint32_t subport_profile_id) 1086 { 1087 struct rte_sched_subport *s = NULL; 1088 uint32_t n_subports = subport_id; 1089 struct rte_sched_subport_profile *profile; 1090 uint32_t n_subport_pipe_queues, i; 1091 uint32_t size0, size1, bmp_mem_size; 1092 int status; 1093 1094 /* Check user parameters */ 1095 if (port == NULL) { 1096 RTE_LOG(ERR, SCHED, 1097 "%s: Incorrect value for parameter port\n", __func__); 1098 return 0; 1099 } 1100 1101 if (subport_id >= port->n_subports_per_port) { 1102 RTE_LOG(ERR, SCHED, 1103 "%s: Incorrect value for subport id\n", __func__); 1104 1105 rte_sched_free_memory(port, n_subports); 1106 return -EINVAL; 1107 } 1108 1109 if (subport_profile_id >= port->n_max_subport_profiles) { 1110 RTE_LOG(ERR, SCHED, "%s: " 1111 "Number of subport profile exceeds the max limit\n", 1112 __func__); 1113 rte_sched_free_memory(port, n_subports); 1114 return -EINVAL; 1115 } 1116 1117 /** Memory is allocated only on first invocation of the api for a 1118 * given subport. Subsequent invocation on same subport will just 1119 * update subport bandwidth parameter. 1120 **/ 1121 if (port->subports[subport_id] == NULL) { 1122 1123 status = rte_sched_subport_check_params(params, 1124 port->n_pipes_per_subport, 1125 port->rate); 1126 if (status != 0) { 1127 RTE_LOG(NOTICE, SCHED, 1128 "%s: Port scheduler params check failed (%d)\n", 1129 __func__, status); 1130 1131 rte_sched_free_memory(port, n_subports); 1132 return -EINVAL; 1133 } 1134 1135 /* Determine the amount of memory to allocate */ 1136 size0 = sizeof(struct rte_sched_subport); 1137 size1 = rte_sched_subport_get_array_base(params, 1138 e_RTE_SCHED_SUBPORT_ARRAY_TOTAL); 1139 1140 /* Allocate memory to store the data structures */ 1141 s = rte_zmalloc_socket("subport_params", size0 + size1, 1142 RTE_CACHE_LINE_SIZE, port->socket); 1143 if (s == NULL) { 1144 RTE_LOG(ERR, SCHED, 1145 "%s: Memory allocation fails\n", __func__); 1146 1147 rte_sched_free_memory(port, n_subports); 1148 return -ENOMEM; 1149 } 1150 1151 n_subports++; 1152 1153 subport_profile_id = 0; 1154 1155 /* Port */ 1156 port->subports[subport_id] = s; 1157 1158 s->tb_time = port->time; 1159 1160 /* compile time checks */ 1161 RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0); 1162 RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS & 1163 (RTE_SCHED_PORT_N_GRINDERS - 1)); 1164 1165 /* User parameters */ 1166 s->n_pipes_per_subport_enabled = 1167 params->n_pipes_per_subport_enabled; 1168 memcpy(s->qsize, params->qsize, sizeof(params->qsize)); 1169 s->n_pipe_profiles = params->n_pipe_profiles; 1170 s->n_max_pipe_profiles = params->n_max_pipe_profiles; 1171 1172 #ifdef RTE_SCHED_RED 1173 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { 1174 uint32_t j; 1175 1176 for (j = 0; j < RTE_COLORS; j++) { 1177 /* if min/max are both zero, then RED is disabled */ 1178 if ((params->red_params[i][j].min_th | 1179 params->red_params[i][j].max_th) == 0) { 1180 continue; 1181 } 1182 1183 if (rte_red_config_init(&s->red_config[i][j], 1184 params->red_params[i][j].wq_log2, 1185 params->red_params[i][j].min_th, 1186 params->red_params[i][j].max_th, 1187 params->red_params[i][j].maxp_inv) != 0) { 1188 rte_sched_free_memory(port, n_subports); 1189 1190 RTE_LOG(NOTICE, SCHED, 1191 "%s: RED configuration init fails\n", 1192 __func__); 1193 return -EINVAL; 1194 } 1195 } 1196 } 1197 #endif 1198 1199 /* Scheduling loop detection */ 1200 s->pipe_loop = RTE_SCHED_PIPE_INVALID; 1201 s->pipe_exhaustion = 0; 1202 1203 /* Grinders */ 1204 s->busy_grinders = 0; 1205 1206 /* Queue base calculation */ 1207 rte_sched_subport_config_qsize(s); 1208 1209 /* Large data structures */ 1210 s->pipe = (struct rte_sched_pipe *) 1211 (s->memory + rte_sched_subport_get_array_base(params, 1212 e_RTE_SCHED_SUBPORT_ARRAY_PIPE)); 1213 s->queue = (struct rte_sched_queue *) 1214 (s->memory + rte_sched_subport_get_array_base(params, 1215 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)); 1216 s->queue_extra = (struct rte_sched_queue_extra *) 1217 (s->memory + rte_sched_subport_get_array_base(params, 1218 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)); 1219 s->pipe_profiles = (struct rte_sched_pipe_profile *) 1220 (s->memory + rte_sched_subport_get_array_base(params, 1221 e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)); 1222 s->bmp_array = s->memory + rte_sched_subport_get_array_base( 1223 params, e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY); 1224 s->queue_array = (struct rte_mbuf **) 1225 (s->memory + rte_sched_subport_get_array_base(params, 1226 e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)); 1227 1228 /* Pipe profile table */ 1229 rte_sched_subport_config_pipe_profile_table(s, params, 1230 port->rate); 1231 1232 /* Bitmap */ 1233 n_subport_pipe_queues = rte_sched_subport_pipe_queues(s); 1234 bmp_mem_size = rte_bitmap_get_memory_footprint( 1235 n_subport_pipe_queues); 1236 s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array, 1237 bmp_mem_size); 1238 if (s->bmp == NULL) { 1239 RTE_LOG(ERR, SCHED, 1240 "%s: Subport bitmap init error\n", __func__); 1241 1242 rte_sched_free_memory(port, n_subports); 1243 return -EINVAL; 1244 } 1245 1246 for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) 1247 s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID; 1248 1249 #ifdef RTE_SCHED_SUBPORT_TC_OV 1250 /* TC oversubscription */ 1251 s->tc_ov_wm_min = port->mtu; 1252 s->tc_ov_period_id = 0; 1253 s->tc_ov = 0; 1254 s->tc_ov_n = 0; 1255 s->tc_ov_rate = 0; 1256 #endif 1257 } 1258 1259 { 1260 /* update subport parameters from subport profile table*/ 1261 profile = port->subport_profiles + subport_profile_id; 1262 1263 s = port->subports[subport_id]; 1264 1265 s->tb_credits = profile->tb_size / 2; 1266 1267 s->tc_time = port->time + profile->tc_period; 1268 1269 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 1270 if (s->qsize[i]) 1271 s->tc_credits[i] = 1272 profile->tc_credits_per_period[i]; 1273 else 1274 profile->tc_credits_per_period[i] = 0; 1275 1276 #ifdef RTE_SCHED_SUBPORT_TC_OV 1277 s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period, 1278 s->pipe_tc_be_rate_max); 1279 s->tc_ov_wm = s->tc_ov_wm_max; 1280 #endif 1281 s->profile = subport_profile_id; 1282 1283 } 1284 1285 rte_sched_port_log_subport_profile(port, subport_profile_id); 1286 1287 return 0; 1288 } 1289 1290 int 1291 rte_sched_pipe_config(struct rte_sched_port *port, 1292 uint32_t subport_id, 1293 uint32_t pipe_id, 1294 int32_t pipe_profile) 1295 { 1296 struct rte_sched_subport *s; 1297 struct rte_sched_subport_profile *sp; 1298 struct rte_sched_pipe *p; 1299 struct rte_sched_pipe_profile *params; 1300 uint32_t n_subports = subport_id + 1; 1301 uint32_t deactivate, profile, i; 1302 1303 /* Check user parameters */ 1304 profile = (uint32_t) pipe_profile; 1305 deactivate = (pipe_profile < 0); 1306 1307 if (port == NULL) { 1308 RTE_LOG(ERR, SCHED, 1309 "%s: Incorrect value for parameter port\n", __func__); 1310 return -EINVAL; 1311 } 1312 1313 if (subport_id >= port->n_subports_per_port) { 1314 RTE_LOG(ERR, SCHED, 1315 "%s: Incorrect value for parameter subport id\n", __func__); 1316 1317 rte_sched_free_memory(port, n_subports); 1318 return -EINVAL; 1319 } 1320 1321 s = port->subports[subport_id]; 1322 if (pipe_id >= s->n_pipes_per_subport_enabled) { 1323 RTE_LOG(ERR, SCHED, 1324 "%s: Incorrect value for parameter pipe id\n", __func__); 1325 1326 rte_sched_free_memory(port, n_subports); 1327 return -EINVAL; 1328 } 1329 1330 if (!deactivate && profile >= s->n_pipe_profiles) { 1331 RTE_LOG(ERR, SCHED, 1332 "%s: Incorrect value for parameter pipe profile\n", __func__); 1333 1334 rte_sched_free_memory(port, n_subports); 1335 return -EINVAL; 1336 } 1337 1338 sp = port->subport_profiles + s->profile; 1339 /* Handle the case when pipe already has a valid configuration */ 1340 p = s->pipe + pipe_id; 1341 if (p->tb_time) { 1342 params = s->pipe_profiles + p->profile; 1343 1344 double subport_tc_be_rate = 1345 (double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] 1346 / (double) sp->tc_period; 1347 double pipe_tc_be_rate = 1348 (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] 1349 / (double) params->tc_period; 1350 uint32_t tc_be_ov = s->tc_ov; 1351 1352 /* Unplug pipe from its subport */ 1353 s->tc_ov_n -= params->tc_ov_weight; 1354 s->tc_ov_rate -= pipe_tc_be_rate; 1355 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate; 1356 1357 if (s->tc_ov != tc_be_ov) { 1358 RTE_LOG(DEBUG, SCHED, 1359 "Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n", 1360 subport_id, subport_tc_be_rate, s->tc_ov_rate); 1361 } 1362 1363 /* Reset the pipe */ 1364 memset(p, 0, sizeof(struct rte_sched_pipe)); 1365 } 1366 1367 if (deactivate) 1368 return 0; 1369 1370 /* Apply the new pipe configuration */ 1371 p->profile = profile; 1372 params = s->pipe_profiles + p->profile; 1373 1374 /* Token Bucket (TB) */ 1375 p->tb_time = port->time; 1376 p->tb_credits = params->tb_size / 2; 1377 1378 /* Traffic Classes (TCs) */ 1379 p->tc_time = port->time + params->tc_period; 1380 1381 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 1382 if (s->qsize[i]) 1383 p->tc_credits[i] = params->tc_credits_per_period[i]; 1384 1385 { 1386 /* Subport best effort tc oversubscription */ 1387 double subport_tc_be_rate = 1388 (double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] 1389 / (double) sp->tc_period; 1390 double pipe_tc_be_rate = 1391 (double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] 1392 / (double) params->tc_period; 1393 uint32_t tc_be_ov = s->tc_ov; 1394 1395 s->tc_ov_n += params->tc_ov_weight; 1396 s->tc_ov_rate += pipe_tc_be_rate; 1397 s->tc_ov = s->tc_ov_rate > subport_tc_be_rate; 1398 1399 if (s->tc_ov != tc_be_ov) { 1400 RTE_LOG(DEBUG, SCHED, 1401 "Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n", 1402 subport_id, subport_tc_be_rate, s->tc_ov_rate); 1403 } 1404 p->tc_ov_period_id = s->tc_ov_period_id; 1405 p->tc_ov_credits = s->tc_ov_wm; 1406 } 1407 1408 return 0; 1409 } 1410 1411 int 1412 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port, 1413 uint32_t subport_id, 1414 struct rte_sched_pipe_params *params, 1415 uint32_t *pipe_profile_id) 1416 { 1417 struct rte_sched_subport *s; 1418 struct rte_sched_pipe_profile *pp; 1419 uint32_t i; 1420 int status; 1421 1422 /* Port */ 1423 if (port == NULL) { 1424 RTE_LOG(ERR, SCHED, 1425 "%s: Incorrect value for parameter port\n", __func__); 1426 return -EINVAL; 1427 } 1428 1429 /* Subport id not exceeds the max limit */ 1430 if (subport_id > port->n_subports_per_port) { 1431 RTE_LOG(ERR, SCHED, 1432 "%s: Incorrect value for subport id\n", __func__); 1433 return -EINVAL; 1434 } 1435 1436 s = port->subports[subport_id]; 1437 1438 /* Pipe profiles exceeds the max limit */ 1439 if (s->n_pipe_profiles >= s->n_max_pipe_profiles) { 1440 RTE_LOG(ERR, SCHED, 1441 "%s: Number of pipe profiles exceeds the max limit\n", __func__); 1442 return -EINVAL; 1443 } 1444 1445 /* Pipe params */ 1446 status = pipe_profile_check(params, port->rate, &s->qsize[0]); 1447 if (status != 0) { 1448 RTE_LOG(ERR, SCHED, 1449 "%s: Pipe profile check failed(%d)\n", __func__, status); 1450 return -EINVAL; 1451 } 1452 1453 pp = &s->pipe_profiles[s->n_pipe_profiles]; 1454 rte_sched_pipe_profile_convert(s, params, pp, port->rate); 1455 1456 /* Pipe profile should not exists */ 1457 for (i = 0; i < s->n_pipe_profiles; i++) 1458 if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) { 1459 RTE_LOG(ERR, SCHED, 1460 "%s: Pipe profile exists\n", __func__); 1461 return -EINVAL; 1462 } 1463 1464 /* Pipe profile commit */ 1465 *pipe_profile_id = s->n_pipe_profiles; 1466 s->n_pipe_profiles++; 1467 1468 if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE]) 1469 s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE]; 1470 1471 rte_sched_port_log_pipe_profile(s, *pipe_profile_id); 1472 1473 return 0; 1474 } 1475 1476 int 1477 rte_sched_port_subport_profile_add(struct rte_sched_port *port, 1478 struct rte_sched_subport_profile_params *params, 1479 uint32_t *subport_profile_id) 1480 { 1481 int status; 1482 uint32_t i; 1483 struct rte_sched_subport_profile *dst; 1484 1485 /* Port */ 1486 if (port == NULL) { 1487 RTE_LOG(ERR, SCHED, "%s: " 1488 "Incorrect value for parameter port\n", __func__); 1489 return -EINVAL; 1490 } 1491 1492 if (params == NULL) { 1493 RTE_LOG(ERR, SCHED, "%s: " 1494 "Incorrect value for parameter profile\n", __func__); 1495 return -EINVAL; 1496 } 1497 1498 if (subport_profile_id == NULL) { 1499 RTE_LOG(ERR, SCHED, "%s: " 1500 "Incorrect value for parameter subport_profile_id\n", 1501 __func__); 1502 return -EINVAL; 1503 } 1504 1505 dst = port->subport_profiles + port->n_subport_profiles; 1506 1507 /* Subport profiles exceeds the max limit */ 1508 if (port->n_subport_profiles >= port->n_max_subport_profiles) { 1509 RTE_LOG(ERR, SCHED, "%s: " 1510 "Number of subport profiles exceeds the max limit\n", 1511 __func__); 1512 return -EINVAL; 1513 } 1514 1515 status = subport_profile_check(params, port->rate); 1516 if (status != 0) { 1517 RTE_LOG(ERR, SCHED, 1518 "%s: subport profile check failed(%d)\n", __func__, status); 1519 return -EINVAL; 1520 } 1521 1522 rte_sched_subport_profile_convert(params, dst, port->rate); 1523 1524 /* Subport profile should not exists */ 1525 for (i = 0; i < port->n_subport_profiles; i++) 1526 if (memcmp(port->subport_profiles + i, 1527 dst, sizeof(*dst)) == 0) { 1528 RTE_LOG(ERR, SCHED, 1529 "%s: subport profile exists\n", __func__); 1530 return -EINVAL; 1531 } 1532 1533 /* Subport profile commit */ 1534 *subport_profile_id = port->n_subport_profiles; 1535 port->n_subport_profiles++; 1536 1537 rte_sched_port_log_subport_profile(port, *subport_profile_id); 1538 1539 return 0; 1540 } 1541 1542 static inline uint32_t 1543 rte_sched_port_qindex(struct rte_sched_port *port, 1544 uint32_t subport, 1545 uint32_t pipe, 1546 uint32_t traffic_class, 1547 uint32_t queue) 1548 { 1549 return ((subport & (port->n_subports_per_port - 1)) << 1550 (port->n_pipes_per_subport_log2 + 4)) | 1551 ((pipe & 1552 (port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) | 1553 ((rte_sched_port_pipe_queue(port, traffic_class) + queue) & 1554 (RTE_SCHED_QUEUES_PER_PIPE - 1)); 1555 } 1556 1557 void 1558 rte_sched_port_pkt_write(struct rte_sched_port *port, 1559 struct rte_mbuf *pkt, 1560 uint32_t subport, uint32_t pipe, 1561 uint32_t traffic_class, 1562 uint32_t queue, enum rte_color color) 1563 { 1564 uint32_t queue_id = 1565 rte_sched_port_qindex(port, subport, pipe, traffic_class, queue); 1566 1567 rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color); 1568 } 1569 1570 void 1571 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port, 1572 const struct rte_mbuf *pkt, 1573 uint32_t *subport, uint32_t *pipe, 1574 uint32_t *traffic_class, uint32_t *queue) 1575 { 1576 uint32_t queue_id = rte_mbuf_sched_queue_get(pkt); 1577 1578 *subport = queue_id >> (port->n_pipes_per_subport_log2 + 4); 1579 *pipe = (queue_id >> 4) & 1580 (port->subports[*subport]->n_pipes_per_subport_enabled - 1); 1581 *traffic_class = rte_sched_port_pipe_tc(port, queue_id); 1582 *queue = rte_sched_port_tc_queue(port, queue_id); 1583 } 1584 1585 enum rte_color 1586 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt) 1587 { 1588 return (enum rte_color)rte_mbuf_sched_color_get(pkt); 1589 } 1590 1591 int 1592 rte_sched_subport_read_stats(struct rte_sched_port *port, 1593 uint32_t subport_id, 1594 struct rte_sched_subport_stats *stats, 1595 uint32_t *tc_ov) 1596 { 1597 struct rte_sched_subport *s; 1598 1599 /* Check user parameters */ 1600 if (port == NULL) { 1601 RTE_LOG(ERR, SCHED, 1602 "%s: Incorrect value for parameter port\n", __func__); 1603 return -EINVAL; 1604 } 1605 1606 if (subport_id >= port->n_subports_per_port) { 1607 RTE_LOG(ERR, SCHED, 1608 "%s: Incorrect value for subport id\n", __func__); 1609 return -EINVAL; 1610 } 1611 1612 if (stats == NULL) { 1613 RTE_LOG(ERR, SCHED, 1614 "%s: Incorrect value for parameter stats\n", __func__); 1615 return -EINVAL; 1616 } 1617 1618 if (tc_ov == NULL) { 1619 RTE_LOG(ERR, SCHED, 1620 "%s: Incorrect value for tc_ov\n", __func__); 1621 return -EINVAL; 1622 } 1623 1624 s = port->subports[subport_id]; 1625 1626 /* Copy subport stats and clear */ 1627 memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats)); 1628 memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats)); 1629 1630 /* Subport TC oversubscription status */ 1631 *tc_ov = s->tc_ov; 1632 1633 return 0; 1634 } 1635 1636 int 1637 rte_sched_queue_read_stats(struct rte_sched_port *port, 1638 uint32_t queue_id, 1639 struct rte_sched_queue_stats *stats, 1640 uint16_t *qlen) 1641 { 1642 struct rte_sched_subport *s; 1643 struct rte_sched_queue *q; 1644 struct rte_sched_queue_extra *qe; 1645 uint32_t subport_id, subport_qmask, subport_qindex; 1646 1647 /* Check user parameters */ 1648 if (port == NULL) { 1649 RTE_LOG(ERR, SCHED, 1650 "%s: Incorrect value for parameter port\n", __func__); 1651 return -EINVAL; 1652 } 1653 1654 if (queue_id >= rte_sched_port_queues_per_port(port)) { 1655 RTE_LOG(ERR, SCHED, 1656 "%s: Incorrect value for queue id\n", __func__); 1657 return -EINVAL; 1658 } 1659 1660 if (stats == NULL) { 1661 RTE_LOG(ERR, SCHED, 1662 "%s: Incorrect value for parameter stats\n", __func__); 1663 return -EINVAL; 1664 } 1665 1666 if (qlen == NULL) { 1667 RTE_LOG(ERR, SCHED, 1668 "%s: Incorrect value for parameter qlen\n", __func__); 1669 return -EINVAL; 1670 } 1671 subport_qmask = port->n_pipes_per_subport_log2 + 4; 1672 subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1); 1673 1674 s = port->subports[subport_id]; 1675 subport_qindex = ((1 << subport_qmask) - 1) & queue_id; 1676 q = s->queue + subport_qindex; 1677 qe = s->queue_extra + subport_qindex; 1678 1679 /* Copy queue stats and clear */ 1680 memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats)); 1681 memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats)); 1682 1683 /* Queue length */ 1684 *qlen = q->qw - q->qr; 1685 1686 return 0; 1687 } 1688 1689 #ifdef RTE_SCHED_DEBUG 1690 1691 static inline int 1692 rte_sched_port_queue_is_empty(struct rte_sched_subport *subport, 1693 uint32_t qindex) 1694 { 1695 struct rte_sched_queue *queue = subport->queue + qindex; 1696 1697 return queue->qr == queue->qw; 1698 } 1699 1700 #endif /* RTE_SCHED_DEBUG */ 1701 1702 #ifdef RTE_SCHED_COLLECT_STATS 1703 1704 static inline void 1705 rte_sched_port_update_subport_stats(struct rte_sched_port *port, 1706 struct rte_sched_subport *subport, 1707 uint32_t qindex, 1708 struct rte_mbuf *pkt) 1709 { 1710 uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex); 1711 uint32_t pkt_len = pkt->pkt_len; 1712 1713 subport->stats.n_pkts_tc[tc_index] += 1; 1714 subport->stats.n_bytes_tc[tc_index] += pkt_len; 1715 } 1716 1717 #ifdef RTE_SCHED_RED 1718 static inline void 1719 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, 1720 struct rte_sched_subport *subport, 1721 uint32_t qindex, 1722 struct rte_mbuf *pkt, 1723 uint32_t red) 1724 #else 1725 static inline void 1726 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, 1727 struct rte_sched_subport *subport, 1728 uint32_t qindex, 1729 struct rte_mbuf *pkt, 1730 __rte_unused uint32_t red) 1731 #endif 1732 { 1733 uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex); 1734 uint32_t pkt_len = pkt->pkt_len; 1735 1736 subport->stats.n_pkts_tc_dropped[tc_index] += 1; 1737 subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len; 1738 #ifdef RTE_SCHED_RED 1739 subport->stats.n_pkts_red_dropped[tc_index] += red; 1740 #endif 1741 } 1742 1743 static inline void 1744 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport, 1745 uint32_t qindex, 1746 struct rte_mbuf *pkt) 1747 { 1748 struct rte_sched_queue_extra *qe = subport->queue_extra + qindex; 1749 uint32_t pkt_len = pkt->pkt_len; 1750 1751 qe->stats.n_pkts += 1; 1752 qe->stats.n_bytes += pkt_len; 1753 } 1754 1755 #ifdef RTE_SCHED_RED 1756 static inline void 1757 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport, 1758 uint32_t qindex, 1759 struct rte_mbuf *pkt, 1760 uint32_t red) 1761 #else 1762 static inline void 1763 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport, 1764 uint32_t qindex, 1765 struct rte_mbuf *pkt, 1766 __rte_unused uint32_t red) 1767 #endif 1768 { 1769 struct rte_sched_queue_extra *qe = subport->queue_extra + qindex; 1770 uint32_t pkt_len = pkt->pkt_len; 1771 1772 qe->stats.n_pkts_dropped += 1; 1773 qe->stats.n_bytes_dropped += pkt_len; 1774 #ifdef RTE_SCHED_RED 1775 qe->stats.n_pkts_red_dropped += red; 1776 #endif 1777 } 1778 1779 #endif /* RTE_SCHED_COLLECT_STATS */ 1780 1781 #ifdef RTE_SCHED_RED 1782 1783 static inline int 1784 rte_sched_port_red_drop(struct rte_sched_port *port, 1785 struct rte_sched_subport *subport, 1786 struct rte_mbuf *pkt, 1787 uint32_t qindex, 1788 uint16_t qlen) 1789 { 1790 struct rte_sched_queue_extra *qe; 1791 struct rte_red_config *red_cfg; 1792 struct rte_red *red; 1793 uint32_t tc_index; 1794 enum rte_color color; 1795 1796 tc_index = rte_sched_port_pipe_tc(port, qindex); 1797 color = rte_sched_port_pkt_read_color(pkt); 1798 red_cfg = &subport->red_config[tc_index][color]; 1799 1800 if ((red_cfg->min_th | red_cfg->max_th) == 0) 1801 return 0; 1802 1803 qe = subport->queue_extra + qindex; 1804 red = &qe->red; 1805 1806 return rte_red_enqueue(red_cfg, red, qlen, port->time); 1807 } 1808 1809 static inline void 1810 rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, 1811 struct rte_sched_subport *subport, uint32_t qindex) 1812 { 1813 struct rte_sched_queue_extra *qe = subport->queue_extra + qindex; 1814 struct rte_red *red = &qe->red; 1815 1816 rte_red_mark_queue_empty(red, port->time); 1817 } 1818 1819 #else 1820 1821 static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused, 1822 struct rte_sched_subport *subport __rte_unused, 1823 struct rte_mbuf *pkt __rte_unused, 1824 uint32_t qindex __rte_unused, 1825 uint16_t qlen __rte_unused) 1826 { 1827 return 0; 1828 } 1829 1830 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex) 1831 1832 #endif /* RTE_SCHED_RED */ 1833 1834 #ifdef RTE_SCHED_DEBUG 1835 1836 static inline void 1837 debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos, 1838 uint64_t bmp_slab) 1839 { 1840 uint64_t mask; 1841 uint32_t i, panic; 1842 1843 if (bmp_slab == 0) 1844 rte_panic("Empty slab at position %u\n", bmp_pos); 1845 1846 panic = 0; 1847 for (i = 0, mask = 1; i < 64; i++, mask <<= 1) { 1848 if (mask & bmp_slab) { 1849 if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) { 1850 printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i); 1851 panic = 1; 1852 } 1853 } 1854 } 1855 1856 if (panic) 1857 rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n", 1858 bmp_slab, bmp_pos); 1859 } 1860 1861 #endif /* RTE_SCHED_DEBUG */ 1862 1863 static inline struct rte_sched_subport * 1864 rte_sched_port_subport(struct rte_sched_port *port, 1865 struct rte_mbuf *pkt) 1866 { 1867 uint32_t queue_id = rte_mbuf_sched_queue_get(pkt); 1868 uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4); 1869 1870 return port->subports[subport_id]; 1871 } 1872 1873 static inline uint32_t 1874 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport, 1875 struct rte_mbuf *pkt, uint32_t subport_qmask) 1876 { 1877 struct rte_sched_queue *q; 1878 #ifdef RTE_SCHED_COLLECT_STATS 1879 struct rte_sched_queue_extra *qe; 1880 #endif 1881 uint32_t qindex = rte_mbuf_sched_queue_get(pkt); 1882 uint32_t subport_queue_id = subport_qmask & qindex; 1883 1884 q = subport->queue + subport_queue_id; 1885 rte_prefetch0(q); 1886 #ifdef RTE_SCHED_COLLECT_STATS 1887 qe = subport->queue_extra + subport_queue_id; 1888 rte_prefetch0(qe); 1889 #endif 1890 1891 return subport_queue_id; 1892 } 1893 1894 static inline void 1895 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port, 1896 struct rte_sched_subport *subport, 1897 uint32_t qindex, 1898 struct rte_mbuf **qbase) 1899 { 1900 struct rte_sched_queue *q; 1901 struct rte_mbuf **q_qw; 1902 uint16_t qsize; 1903 1904 q = subport->queue + qindex; 1905 qsize = rte_sched_subport_pipe_qsize(port, subport, qindex); 1906 q_qw = qbase + (q->qw & (qsize - 1)); 1907 1908 rte_prefetch0(q_qw); 1909 rte_bitmap_prefetch0(subport->bmp, qindex); 1910 } 1911 1912 static inline int 1913 rte_sched_port_enqueue_qwa(struct rte_sched_port *port, 1914 struct rte_sched_subport *subport, 1915 uint32_t qindex, 1916 struct rte_mbuf **qbase, 1917 struct rte_mbuf *pkt) 1918 { 1919 struct rte_sched_queue *q; 1920 uint16_t qsize; 1921 uint16_t qlen; 1922 1923 q = subport->queue + qindex; 1924 qsize = rte_sched_subport_pipe_qsize(port, subport, qindex); 1925 qlen = q->qw - q->qr; 1926 1927 /* Drop the packet (and update drop stats) when queue is full */ 1928 if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) || 1929 (qlen >= qsize))) { 1930 rte_pktmbuf_free(pkt); 1931 #ifdef RTE_SCHED_COLLECT_STATS 1932 rte_sched_port_update_subport_stats_on_drop(port, subport, 1933 qindex, pkt, qlen < qsize); 1934 rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt, 1935 qlen < qsize); 1936 #endif 1937 return 0; 1938 } 1939 1940 /* Enqueue packet */ 1941 qbase[q->qw & (qsize - 1)] = pkt; 1942 q->qw++; 1943 1944 /* Activate queue in the subport bitmap */ 1945 rte_bitmap_set(subport->bmp, qindex); 1946 1947 /* Statistics */ 1948 #ifdef RTE_SCHED_COLLECT_STATS 1949 rte_sched_port_update_subport_stats(port, subport, qindex, pkt); 1950 rte_sched_port_update_queue_stats(subport, qindex, pkt); 1951 #endif 1952 1953 return 1; 1954 } 1955 1956 1957 /* 1958 * The enqueue function implements a 4-level pipeline with each stage 1959 * processing two different packets. The purpose of using a pipeline 1960 * is to hide the latency of prefetching the data structures. The 1961 * naming convention is presented in the diagram below: 1962 * 1963 * p00 _______ p10 _______ p20 _______ p30 _______ 1964 * ----->| |----->| |----->| |----->| |-----> 1965 * | 0 | | 1 | | 2 | | 3 | 1966 * ----->|_______|----->|_______|----->|_______|----->|_______|-----> 1967 * p01 p11 p21 p31 1968 * 1969 */ 1970 int 1971 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, 1972 uint32_t n_pkts) 1973 { 1974 struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21, 1975 *pkt30, *pkt31, *pkt_last; 1976 struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base, 1977 **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base; 1978 struct rte_sched_subport *subport00, *subport01, *subport10, *subport11, 1979 *subport20, *subport21, *subport30, *subport31, *subport_last; 1980 uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last; 1981 uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last; 1982 uint32_t subport_qmask; 1983 uint32_t result, i; 1984 1985 result = 0; 1986 subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1; 1987 1988 /* 1989 * Less then 6 input packets available, which is not enough to 1990 * feed the pipeline 1991 */ 1992 if (unlikely(n_pkts < 6)) { 1993 struct rte_sched_subport *subports[5]; 1994 struct rte_mbuf **q_base[5]; 1995 uint32_t q[5]; 1996 1997 /* Prefetch the mbuf structure of each packet */ 1998 for (i = 0; i < n_pkts; i++) 1999 rte_prefetch0(pkts[i]); 2000 2001 /* Prefetch the subport structure for each packet */ 2002 for (i = 0; i < n_pkts; i++) 2003 subports[i] = rte_sched_port_subport(port, pkts[i]); 2004 2005 /* Prefetch the queue structure for each queue */ 2006 for (i = 0; i < n_pkts; i++) 2007 q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i], 2008 pkts[i], subport_qmask); 2009 2010 /* Prefetch the write pointer location of each queue */ 2011 for (i = 0; i < n_pkts; i++) { 2012 q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]); 2013 rte_sched_port_enqueue_qwa_prefetch0(port, subports[i], 2014 q[i], q_base[i]); 2015 } 2016 2017 /* Write each packet to its queue */ 2018 for (i = 0; i < n_pkts; i++) 2019 result += rte_sched_port_enqueue_qwa(port, subports[i], 2020 q[i], q_base[i], pkts[i]); 2021 2022 return result; 2023 } 2024 2025 /* Feed the first 3 stages of the pipeline (6 packets needed) */ 2026 pkt20 = pkts[0]; 2027 pkt21 = pkts[1]; 2028 rte_prefetch0(pkt20); 2029 rte_prefetch0(pkt21); 2030 2031 pkt10 = pkts[2]; 2032 pkt11 = pkts[3]; 2033 rte_prefetch0(pkt10); 2034 rte_prefetch0(pkt11); 2035 2036 subport20 = rte_sched_port_subport(port, pkt20); 2037 subport21 = rte_sched_port_subport(port, pkt21); 2038 q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20, 2039 pkt20, subport_qmask); 2040 q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21, 2041 pkt21, subport_qmask); 2042 2043 pkt00 = pkts[4]; 2044 pkt01 = pkts[5]; 2045 rte_prefetch0(pkt00); 2046 rte_prefetch0(pkt01); 2047 2048 subport10 = rte_sched_port_subport(port, pkt10); 2049 subport11 = rte_sched_port_subport(port, pkt11); 2050 q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10, 2051 pkt10, subport_qmask); 2052 q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11, 2053 pkt11, subport_qmask); 2054 2055 q20_base = rte_sched_subport_pipe_qbase(subport20, q20); 2056 q21_base = rte_sched_subport_pipe_qbase(subport21, q21); 2057 rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base); 2058 rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base); 2059 2060 /* Run the pipeline */ 2061 for (i = 6; i < (n_pkts & (~1)); i += 2) { 2062 /* Propagate stage inputs */ 2063 pkt30 = pkt20; 2064 pkt31 = pkt21; 2065 pkt20 = pkt10; 2066 pkt21 = pkt11; 2067 pkt10 = pkt00; 2068 pkt11 = pkt01; 2069 q30 = q20; 2070 q31 = q21; 2071 q20 = q10; 2072 q21 = q11; 2073 subport30 = subport20; 2074 subport31 = subport21; 2075 subport20 = subport10; 2076 subport21 = subport11; 2077 q30_base = q20_base; 2078 q31_base = q21_base; 2079 2080 /* Stage 0: Get packets in */ 2081 pkt00 = pkts[i]; 2082 pkt01 = pkts[i + 1]; 2083 rte_prefetch0(pkt00); 2084 rte_prefetch0(pkt01); 2085 2086 /* Stage 1: Prefetch subport and queue structure storing queue pointers */ 2087 subport10 = rte_sched_port_subport(port, pkt10); 2088 subport11 = rte_sched_port_subport(port, pkt11); 2089 q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10, 2090 pkt10, subport_qmask); 2091 q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11, 2092 pkt11, subport_qmask); 2093 2094 /* Stage 2: Prefetch queue write location */ 2095 q20_base = rte_sched_subport_pipe_qbase(subport20, q20); 2096 q21_base = rte_sched_subport_pipe_qbase(subport21, q21); 2097 rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base); 2098 rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base); 2099 2100 /* Stage 3: Write packet to queue and activate queue */ 2101 r30 = rte_sched_port_enqueue_qwa(port, subport30, 2102 q30, q30_base, pkt30); 2103 r31 = rte_sched_port_enqueue_qwa(port, subport31, 2104 q31, q31_base, pkt31); 2105 result += r30 + r31; 2106 } 2107 2108 /* 2109 * Drain the pipeline (exactly 6 packets). 2110 * Handle the last packet in the case 2111 * of an odd number of input packets. 2112 */ 2113 pkt_last = pkts[n_pkts - 1]; 2114 rte_prefetch0(pkt_last); 2115 2116 subport00 = rte_sched_port_subport(port, pkt00); 2117 subport01 = rte_sched_port_subport(port, pkt01); 2118 q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00, 2119 pkt00, subport_qmask); 2120 q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01, 2121 pkt01, subport_qmask); 2122 2123 q10_base = rte_sched_subport_pipe_qbase(subport10, q10); 2124 q11_base = rte_sched_subport_pipe_qbase(subport11, q11); 2125 rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base); 2126 rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base); 2127 2128 r20 = rte_sched_port_enqueue_qwa(port, subport20, 2129 q20, q20_base, pkt20); 2130 r21 = rte_sched_port_enqueue_qwa(port, subport21, 2131 q21, q21_base, pkt21); 2132 result += r20 + r21; 2133 2134 subport_last = rte_sched_port_subport(port, pkt_last); 2135 q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last, 2136 pkt_last, subport_qmask); 2137 2138 q00_base = rte_sched_subport_pipe_qbase(subport00, q00); 2139 q01_base = rte_sched_subport_pipe_qbase(subport01, q01); 2140 rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base); 2141 rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base); 2142 2143 r10 = rte_sched_port_enqueue_qwa(port, subport10, q10, 2144 q10_base, pkt10); 2145 r11 = rte_sched_port_enqueue_qwa(port, subport11, q11, 2146 q11_base, pkt11); 2147 result += r10 + r11; 2148 2149 q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last); 2150 rte_sched_port_enqueue_qwa_prefetch0(port, subport_last, 2151 q_last, q_last_base); 2152 2153 r00 = rte_sched_port_enqueue_qwa(port, subport00, q00, 2154 q00_base, pkt00); 2155 r01 = rte_sched_port_enqueue_qwa(port, subport01, q01, 2156 q01_base, pkt01); 2157 result += r00 + r01; 2158 2159 if (n_pkts & 1) { 2160 r_last = rte_sched_port_enqueue_qwa(port, subport_last, 2161 q_last, q_last_base, pkt_last); 2162 result += r_last; 2163 } 2164 2165 return result; 2166 } 2167 2168 #ifndef RTE_SCHED_SUBPORT_TC_OV 2169 2170 static inline void 2171 grinder_credits_update(struct rte_sched_port *port, 2172 struct rte_sched_subport *subport, uint32_t pos) 2173 { 2174 struct rte_sched_grinder *grinder = subport->grinder + pos; 2175 struct rte_sched_pipe *pipe = grinder->pipe; 2176 struct rte_sched_pipe_profile *params = grinder->pipe_params; 2177 struct rte_sched_subport_profile *sp = grinder->subport_params; 2178 uint64_t n_periods; 2179 uint32_t i; 2180 2181 /* Subport TB */ 2182 n_periods = (port->time - subport->tb_time) / sp->tb_period; 2183 subport->tb_credits += n_periods * sp->tb_credits_per_period; 2184 subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size); 2185 subport->tb_time += n_periods * sp->tb_period; 2186 2187 /* Pipe TB */ 2188 n_periods = (port->time - pipe->tb_time) / params->tb_period; 2189 pipe->tb_credits += n_periods * params->tb_credits_per_period; 2190 pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size); 2191 pipe->tb_time += n_periods * params->tb_period; 2192 2193 /* Subport TCs */ 2194 if (unlikely(port->time >= subport->tc_time)) { 2195 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 2196 subport->tc_credits[i] = sp->tc_credits_per_period[i]; 2197 2198 subport->tc_time = port->time + sp->tc_period; 2199 } 2200 2201 /* Pipe TCs */ 2202 if (unlikely(port->time >= pipe->tc_time)) { 2203 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 2204 pipe->tc_credits[i] = params->tc_credits_per_period[i]; 2205 2206 pipe->tc_time = port->time + params->tc_period; 2207 } 2208 } 2209 2210 #else 2211 2212 static inline uint64_t 2213 grinder_tc_ov_credits_update(struct rte_sched_port *port, 2214 struct rte_sched_subport *subport, uint32_t pos) 2215 { 2216 struct rte_sched_grinder *grinder = subport->grinder + pos; 2217 struct rte_sched_subport_profile *sp = grinder->subport_params; 2218 uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 2219 uint64_t tc_consumption = 0, tc_ov_consumption_max; 2220 uint64_t tc_ov_wm = subport->tc_ov_wm; 2221 uint32_t i; 2222 2223 if (subport->tc_ov == 0) 2224 return subport->tc_ov_wm_max; 2225 2226 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) { 2227 tc_ov_consumption[i] = sp->tc_credits_per_period[i] 2228 - subport->tc_credits[i]; 2229 tc_consumption += tc_ov_consumption[i]; 2230 } 2231 2232 tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] = 2233 sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] - 2234 subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE]; 2235 2236 tc_ov_consumption_max = 2237 sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] - 2238 tc_consumption; 2239 2240 if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] > 2241 (tc_ov_consumption_max - port->mtu)) { 2242 tc_ov_wm -= tc_ov_wm >> 7; 2243 if (tc_ov_wm < subport->tc_ov_wm_min) 2244 tc_ov_wm = subport->tc_ov_wm_min; 2245 2246 return tc_ov_wm; 2247 } 2248 2249 tc_ov_wm += (tc_ov_wm >> 7) + 1; 2250 if (tc_ov_wm > subport->tc_ov_wm_max) 2251 tc_ov_wm = subport->tc_ov_wm_max; 2252 2253 return tc_ov_wm; 2254 } 2255 2256 static inline void 2257 grinder_credits_update(struct rte_sched_port *port, 2258 struct rte_sched_subport *subport, uint32_t pos) 2259 { 2260 struct rte_sched_grinder *grinder = subport->grinder + pos; 2261 struct rte_sched_pipe *pipe = grinder->pipe; 2262 struct rte_sched_pipe_profile *params = grinder->pipe_params; 2263 struct rte_sched_subport_profile *sp = grinder->subport_params; 2264 uint64_t n_periods; 2265 uint32_t i; 2266 2267 /* Subport TB */ 2268 n_periods = (port->time - subport->tb_time) / sp->tb_period; 2269 subport->tb_credits += n_periods * sp->tb_credits_per_period; 2270 subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size); 2271 subport->tb_time += n_periods * sp->tb_period; 2272 2273 /* Pipe TB */ 2274 n_periods = (port->time - pipe->tb_time) / params->tb_period; 2275 pipe->tb_credits += n_periods * params->tb_credits_per_period; 2276 pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size); 2277 pipe->tb_time += n_periods * params->tb_period; 2278 2279 /* Subport TCs */ 2280 if (unlikely(port->time >= subport->tc_time)) { 2281 subport->tc_ov_wm = 2282 grinder_tc_ov_credits_update(port, subport, pos); 2283 2284 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 2285 subport->tc_credits[i] = sp->tc_credits_per_period[i]; 2286 2287 subport->tc_time = port->time + sp->tc_period; 2288 subport->tc_ov_period_id++; 2289 } 2290 2291 /* Pipe TCs */ 2292 if (unlikely(port->time >= pipe->tc_time)) { 2293 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 2294 pipe->tc_credits[i] = params->tc_credits_per_period[i]; 2295 pipe->tc_time = port->time + params->tc_period; 2296 } 2297 2298 /* Pipe TCs - Oversubscription */ 2299 if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) { 2300 pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight; 2301 2302 pipe->tc_ov_period_id = subport->tc_ov_period_id; 2303 } 2304 } 2305 2306 #endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */ 2307 2308 2309 #ifndef RTE_SCHED_SUBPORT_TC_OV 2310 2311 static inline int 2312 grinder_credits_check(struct rte_sched_port *port, 2313 struct rte_sched_subport *subport, uint32_t pos) 2314 { 2315 struct rte_sched_grinder *grinder = subport->grinder + pos; 2316 struct rte_sched_pipe *pipe = grinder->pipe; 2317 struct rte_mbuf *pkt = grinder->pkt; 2318 uint32_t tc_index = grinder->tc_index; 2319 uint64_t pkt_len = pkt->pkt_len + port->frame_overhead; 2320 uint64_t subport_tb_credits = subport->tb_credits; 2321 uint64_t subport_tc_credits = subport->tc_credits[tc_index]; 2322 uint64_t pipe_tb_credits = pipe->tb_credits; 2323 uint64_t pipe_tc_credits = pipe->tc_credits[tc_index]; 2324 int enough_credits; 2325 2326 /* Check queue credits */ 2327 enough_credits = (pkt_len <= subport_tb_credits) && 2328 (pkt_len <= subport_tc_credits) && 2329 (pkt_len <= pipe_tb_credits) && 2330 (pkt_len <= pipe_tc_credits); 2331 2332 if (!enough_credits) 2333 return 0; 2334 2335 /* Update port credits */ 2336 subport->tb_credits -= pkt_len; 2337 subport->tc_credits[tc_index] -= pkt_len; 2338 pipe->tb_credits -= pkt_len; 2339 pipe->tc_credits[tc_index] -= pkt_len; 2340 2341 return 1; 2342 } 2343 2344 #else 2345 2346 static inline int 2347 grinder_credits_check(struct rte_sched_port *port, 2348 struct rte_sched_subport *subport, uint32_t pos) 2349 { 2350 struct rte_sched_grinder *grinder = subport->grinder + pos; 2351 struct rte_sched_pipe *pipe = grinder->pipe; 2352 struct rte_mbuf *pkt = grinder->pkt; 2353 uint32_t tc_index = grinder->tc_index; 2354 uint64_t pkt_len = pkt->pkt_len + port->frame_overhead; 2355 uint64_t subport_tb_credits = subport->tb_credits; 2356 uint64_t subport_tc_credits = subport->tc_credits[tc_index]; 2357 uint64_t pipe_tb_credits = pipe->tb_credits; 2358 uint64_t pipe_tc_credits = pipe->tc_credits[tc_index]; 2359 uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; 2360 uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0}; 2361 uint64_t pipe_tc_ov_credits; 2362 uint32_t i; 2363 int enough_credits; 2364 2365 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) 2366 pipe_tc_ov_mask1[i] = ~0LLU; 2367 2368 pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits; 2369 pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU; 2370 pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index]; 2371 2372 /* Check pipe and subport credits */ 2373 enough_credits = (pkt_len <= subport_tb_credits) && 2374 (pkt_len <= subport_tc_credits) && 2375 (pkt_len <= pipe_tb_credits) && 2376 (pkt_len <= pipe_tc_credits) && 2377 (pkt_len <= pipe_tc_ov_credits); 2378 2379 if (!enough_credits) 2380 return 0; 2381 2382 /* Update pipe and subport credits */ 2383 subport->tb_credits -= pkt_len; 2384 subport->tc_credits[tc_index] -= pkt_len; 2385 pipe->tb_credits -= pkt_len; 2386 pipe->tc_credits[tc_index] -= pkt_len; 2387 pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len; 2388 2389 return 1; 2390 } 2391 2392 #endif /* RTE_SCHED_SUBPORT_TC_OV */ 2393 2394 2395 static inline int 2396 grinder_schedule(struct rte_sched_port *port, 2397 struct rte_sched_subport *subport, uint32_t pos) 2398 { 2399 struct rte_sched_grinder *grinder = subport->grinder + pos; 2400 struct rte_sched_queue *queue = grinder->queue[grinder->qpos]; 2401 struct rte_mbuf *pkt = grinder->pkt; 2402 uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; 2403 uint32_t be_tc_active; 2404 2405 if (!grinder_credits_check(port, subport, pos)) 2406 return 0; 2407 2408 /* Advance port time */ 2409 port->time += pkt_len; 2410 2411 /* Send packet */ 2412 port->pkts_out[port->n_pkts_out++] = pkt; 2413 queue->qr++; 2414 2415 be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0; 2416 grinder->wrr_tokens[grinder->qpos] += 2417 (pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active; 2418 2419 if (queue->qr == queue->qw) { 2420 uint32_t qindex = grinder->qindex[grinder->qpos]; 2421 2422 rte_bitmap_clear(subport->bmp, qindex); 2423 grinder->qmask &= ~(1 << grinder->qpos); 2424 if (be_tc_active) 2425 grinder->wrr_mask[grinder->qpos] = 0; 2426 rte_sched_port_set_queue_empty_timestamp(port, subport, qindex); 2427 } 2428 2429 /* Reset pipe loop detection */ 2430 subport->pipe_loop = RTE_SCHED_PIPE_INVALID; 2431 grinder->productive = 1; 2432 2433 return 1; 2434 } 2435 2436 #ifdef SCHED_VECTOR_SSE4 2437 2438 static inline int 2439 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe) 2440 { 2441 __m128i index = _mm_set1_epi32(base_pipe); 2442 __m128i pipes = _mm_load_si128((__m128i *)subport->grinder_base_bmp_pos); 2443 __m128i res = _mm_cmpeq_epi32(pipes, index); 2444 2445 pipes = _mm_load_si128((__m128i *)(subport->grinder_base_bmp_pos + 4)); 2446 pipes = _mm_cmpeq_epi32(pipes, index); 2447 res = _mm_or_si128(res, pipes); 2448 2449 if (_mm_testz_si128(res, res)) 2450 return 0; 2451 2452 return 1; 2453 } 2454 2455 #elif defined(SCHED_VECTOR_NEON) 2456 2457 static inline int 2458 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe) 2459 { 2460 uint32x4_t index, pipes; 2461 uint32_t *pos = (uint32_t *)subport->grinder_base_bmp_pos; 2462 2463 index = vmovq_n_u32(base_pipe); 2464 pipes = vld1q_u32(pos); 2465 if (!vminvq_u32(veorq_u32(pipes, index))) 2466 return 1; 2467 2468 pipes = vld1q_u32(pos + 4); 2469 if (!vminvq_u32(veorq_u32(pipes, index))) 2470 return 1; 2471 2472 return 0; 2473 } 2474 2475 #else 2476 2477 static inline int 2478 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe) 2479 { 2480 uint32_t i; 2481 2482 for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) { 2483 if (subport->grinder_base_bmp_pos[i] == base_pipe) 2484 return 1; 2485 } 2486 2487 return 0; 2488 } 2489 2490 #endif /* RTE_SCHED_OPTIMIZATIONS */ 2491 2492 static inline void 2493 grinder_pcache_populate(struct rte_sched_subport *subport, 2494 uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab) 2495 { 2496 struct rte_sched_grinder *grinder = subport->grinder + pos; 2497 uint16_t w[4]; 2498 2499 grinder->pcache_w = 0; 2500 grinder->pcache_r = 0; 2501 2502 w[0] = (uint16_t) bmp_slab; 2503 w[1] = (uint16_t) (bmp_slab >> 16); 2504 w[2] = (uint16_t) (bmp_slab >> 32); 2505 w[3] = (uint16_t) (bmp_slab >> 48); 2506 2507 grinder->pcache_qmask[grinder->pcache_w] = w[0]; 2508 grinder->pcache_qindex[grinder->pcache_w] = bmp_pos; 2509 grinder->pcache_w += (w[0] != 0); 2510 2511 grinder->pcache_qmask[grinder->pcache_w] = w[1]; 2512 grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16; 2513 grinder->pcache_w += (w[1] != 0); 2514 2515 grinder->pcache_qmask[grinder->pcache_w] = w[2]; 2516 grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32; 2517 grinder->pcache_w += (w[2] != 0); 2518 2519 grinder->pcache_qmask[grinder->pcache_w] = w[3]; 2520 grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48; 2521 grinder->pcache_w += (w[3] != 0); 2522 } 2523 2524 static inline void 2525 grinder_tccache_populate(struct rte_sched_subport *subport, 2526 uint32_t pos, uint32_t qindex, uint16_t qmask) 2527 { 2528 struct rte_sched_grinder *grinder = subport->grinder + pos; 2529 uint8_t b, i; 2530 2531 grinder->tccache_w = 0; 2532 grinder->tccache_r = 0; 2533 2534 for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) { 2535 b = (uint8_t) ((qmask >> i) & 0x1); 2536 grinder->tccache_qmask[grinder->tccache_w] = b; 2537 grinder->tccache_qindex[grinder->tccache_w] = qindex + i; 2538 grinder->tccache_w += (b != 0); 2539 } 2540 2541 b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE)); 2542 grinder->tccache_qmask[grinder->tccache_w] = b; 2543 grinder->tccache_qindex[grinder->tccache_w] = qindex + 2544 RTE_SCHED_TRAFFIC_CLASS_BE; 2545 grinder->tccache_w += (b != 0); 2546 } 2547 2548 static inline int 2549 grinder_next_tc(struct rte_sched_port *port, 2550 struct rte_sched_subport *subport, uint32_t pos) 2551 { 2552 struct rte_sched_grinder *grinder = subport->grinder + pos; 2553 struct rte_mbuf **qbase; 2554 uint32_t qindex; 2555 uint16_t qsize; 2556 2557 if (grinder->tccache_r == grinder->tccache_w) 2558 return 0; 2559 2560 qindex = grinder->tccache_qindex[grinder->tccache_r]; 2561 qbase = rte_sched_subport_pipe_qbase(subport, qindex); 2562 qsize = rte_sched_subport_pipe_qsize(port, subport, qindex); 2563 2564 grinder->tc_index = rte_sched_port_pipe_tc(port, qindex); 2565 grinder->qmask = grinder->tccache_qmask[grinder->tccache_r]; 2566 grinder->qsize = qsize; 2567 2568 if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) { 2569 grinder->queue[0] = subport->queue + qindex; 2570 grinder->qbase[0] = qbase; 2571 grinder->qindex[0] = qindex; 2572 grinder->tccache_r++; 2573 2574 return 1; 2575 } 2576 2577 grinder->queue[0] = subport->queue + qindex; 2578 grinder->queue[1] = subport->queue + qindex + 1; 2579 grinder->queue[2] = subport->queue + qindex + 2; 2580 grinder->queue[3] = subport->queue + qindex + 3; 2581 2582 grinder->qbase[0] = qbase; 2583 grinder->qbase[1] = qbase + qsize; 2584 grinder->qbase[2] = qbase + 2 * qsize; 2585 grinder->qbase[3] = qbase + 3 * qsize; 2586 2587 grinder->qindex[0] = qindex; 2588 grinder->qindex[1] = qindex + 1; 2589 grinder->qindex[2] = qindex + 2; 2590 grinder->qindex[3] = qindex + 3; 2591 2592 grinder->tccache_r++; 2593 return 1; 2594 } 2595 2596 static inline int 2597 grinder_next_pipe(struct rte_sched_port *port, 2598 struct rte_sched_subport *subport, uint32_t pos) 2599 { 2600 struct rte_sched_grinder *grinder = subport->grinder + pos; 2601 uint32_t pipe_qindex; 2602 uint16_t pipe_qmask; 2603 2604 if (grinder->pcache_r < grinder->pcache_w) { 2605 pipe_qmask = grinder->pcache_qmask[grinder->pcache_r]; 2606 pipe_qindex = grinder->pcache_qindex[grinder->pcache_r]; 2607 grinder->pcache_r++; 2608 } else { 2609 uint64_t bmp_slab = 0; 2610 uint32_t bmp_pos = 0; 2611 2612 /* Get another non-empty pipe group */ 2613 if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0)) 2614 return 0; 2615 2616 #ifdef RTE_SCHED_DEBUG 2617 debug_check_queue_slab(subport, bmp_pos, bmp_slab); 2618 #endif 2619 2620 /* Return if pipe group already in one of the other grinders */ 2621 subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID; 2622 if (unlikely(grinder_pipe_exists(subport, bmp_pos))) 2623 return 0; 2624 2625 subport->grinder_base_bmp_pos[pos] = bmp_pos; 2626 2627 /* Install new pipe group into grinder's pipe cache */ 2628 grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab); 2629 2630 pipe_qmask = grinder->pcache_qmask[0]; 2631 pipe_qindex = grinder->pcache_qindex[0]; 2632 grinder->pcache_r = 1; 2633 } 2634 2635 /* Install new pipe in the grinder */ 2636 grinder->pindex = pipe_qindex >> 4; 2637 grinder->subport = subport; 2638 grinder->pipe = subport->pipe + grinder->pindex; 2639 grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */ 2640 grinder->productive = 0; 2641 2642 grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask); 2643 grinder_next_tc(port, subport, pos); 2644 2645 /* Check for pipe exhaustion */ 2646 if (grinder->pindex == subport->pipe_loop) { 2647 subport->pipe_exhaustion = 1; 2648 subport->pipe_loop = RTE_SCHED_PIPE_INVALID; 2649 } 2650 2651 return 1; 2652 } 2653 2654 2655 static inline void 2656 grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos) 2657 { 2658 struct rte_sched_grinder *grinder = subport->grinder + pos; 2659 struct rte_sched_pipe *pipe = grinder->pipe; 2660 struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params; 2661 uint32_t qmask = grinder->qmask; 2662 2663 grinder->wrr_tokens[0] = 2664 ((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT; 2665 grinder->wrr_tokens[1] = 2666 ((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT; 2667 grinder->wrr_tokens[2] = 2668 ((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT; 2669 grinder->wrr_tokens[3] = 2670 ((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT; 2671 2672 grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF; 2673 grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF; 2674 grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF; 2675 grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF; 2676 2677 grinder->wrr_cost[0] = pipe_params->wrr_cost[0]; 2678 grinder->wrr_cost[1] = pipe_params->wrr_cost[1]; 2679 grinder->wrr_cost[2] = pipe_params->wrr_cost[2]; 2680 grinder->wrr_cost[3] = pipe_params->wrr_cost[3]; 2681 } 2682 2683 static inline void 2684 grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos) 2685 { 2686 struct rte_sched_grinder *grinder = subport->grinder + pos; 2687 struct rte_sched_pipe *pipe = grinder->pipe; 2688 2689 pipe->wrr_tokens[0] = 2690 (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >> 2691 RTE_SCHED_WRR_SHIFT; 2692 pipe->wrr_tokens[1] = 2693 (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >> 2694 RTE_SCHED_WRR_SHIFT; 2695 pipe->wrr_tokens[2] = 2696 (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >> 2697 RTE_SCHED_WRR_SHIFT; 2698 pipe->wrr_tokens[3] = 2699 (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >> 2700 RTE_SCHED_WRR_SHIFT; 2701 } 2702 2703 static inline void 2704 grinder_wrr(struct rte_sched_subport *subport, uint32_t pos) 2705 { 2706 struct rte_sched_grinder *grinder = subport->grinder + pos; 2707 uint16_t wrr_tokens_min; 2708 2709 grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; 2710 grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1]; 2711 grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2]; 2712 grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3]; 2713 2714 grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens); 2715 wrr_tokens_min = grinder->wrr_tokens[grinder->qpos]; 2716 2717 grinder->wrr_tokens[0] -= wrr_tokens_min; 2718 grinder->wrr_tokens[1] -= wrr_tokens_min; 2719 grinder->wrr_tokens[2] -= wrr_tokens_min; 2720 grinder->wrr_tokens[3] -= wrr_tokens_min; 2721 } 2722 2723 2724 #define grinder_evict(subport, pos) 2725 2726 static inline void 2727 grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos) 2728 { 2729 struct rte_sched_grinder *grinder = subport->grinder + pos; 2730 2731 rte_prefetch0(grinder->pipe); 2732 rte_prefetch0(grinder->queue[0]); 2733 } 2734 2735 static inline void 2736 grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos) 2737 { 2738 struct rte_sched_grinder *grinder = subport->grinder + pos; 2739 uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC]; 2740 2741 qsize = grinder->qsize; 2742 grinder->qpos = 0; 2743 2744 if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) { 2745 qr[0] = grinder->queue[0]->qr & (qsize - 1); 2746 2747 rte_prefetch0(grinder->qbase[0] + qr[0]); 2748 return; 2749 } 2750 2751 qr[0] = grinder->queue[0]->qr & (qsize - 1); 2752 qr[1] = grinder->queue[1]->qr & (qsize - 1); 2753 qr[2] = grinder->queue[2]->qr & (qsize - 1); 2754 qr[3] = grinder->queue[3]->qr & (qsize - 1); 2755 2756 rte_prefetch0(grinder->qbase[0] + qr[0]); 2757 rte_prefetch0(grinder->qbase[1] + qr[1]); 2758 2759 grinder_wrr_load(subport, pos); 2760 grinder_wrr(subport, pos); 2761 2762 rte_prefetch0(grinder->qbase[2] + qr[2]); 2763 rte_prefetch0(grinder->qbase[3] + qr[3]); 2764 } 2765 2766 static inline void 2767 grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos) 2768 { 2769 struct rte_sched_grinder *grinder = subport->grinder + pos; 2770 uint32_t qpos = grinder->qpos; 2771 struct rte_mbuf **qbase = grinder->qbase[qpos]; 2772 uint16_t qsize = grinder->qsize; 2773 uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1); 2774 2775 grinder->pkt = qbase[qr]; 2776 rte_prefetch0(grinder->pkt); 2777 2778 if (unlikely((qr & 0x7) == 7)) { 2779 uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1); 2780 2781 rte_prefetch0(qbase + qr_next); 2782 } 2783 } 2784 2785 static inline uint32_t 2786 grinder_handle(struct rte_sched_port *port, 2787 struct rte_sched_subport *subport, uint32_t pos) 2788 { 2789 struct rte_sched_grinder *grinder = subport->grinder + pos; 2790 2791 switch (grinder->state) { 2792 case e_GRINDER_PREFETCH_PIPE: 2793 { 2794 if (grinder_next_pipe(port, subport, pos)) { 2795 grinder_prefetch_pipe(subport, pos); 2796 subport->busy_grinders++; 2797 2798 grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS; 2799 return 0; 2800 } 2801 2802 return 0; 2803 } 2804 2805 case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS: 2806 { 2807 struct rte_sched_pipe *pipe = grinder->pipe; 2808 2809 grinder->pipe_params = subport->pipe_profiles + pipe->profile; 2810 grinder->subport_params = port->subport_profiles + 2811 subport->profile; 2812 2813 grinder_prefetch_tc_queue_arrays(subport, pos); 2814 grinder_credits_update(port, subport, pos); 2815 2816 grinder->state = e_GRINDER_PREFETCH_MBUF; 2817 return 0; 2818 } 2819 2820 case e_GRINDER_PREFETCH_MBUF: 2821 { 2822 grinder_prefetch_mbuf(subport, pos); 2823 2824 grinder->state = e_GRINDER_READ_MBUF; 2825 return 0; 2826 } 2827 2828 case e_GRINDER_READ_MBUF: 2829 { 2830 uint32_t wrr_active, result = 0; 2831 2832 result = grinder_schedule(port, subport, pos); 2833 2834 wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE); 2835 2836 /* Look for next packet within the same TC */ 2837 if (result && grinder->qmask) { 2838 if (wrr_active) 2839 grinder_wrr(subport, pos); 2840 2841 grinder_prefetch_mbuf(subport, pos); 2842 2843 return 1; 2844 } 2845 2846 if (wrr_active) 2847 grinder_wrr_store(subport, pos); 2848 2849 /* Look for another active TC within same pipe */ 2850 if (grinder_next_tc(port, subport, pos)) { 2851 grinder_prefetch_tc_queue_arrays(subport, pos); 2852 2853 grinder->state = e_GRINDER_PREFETCH_MBUF; 2854 return result; 2855 } 2856 2857 if (grinder->productive == 0 && 2858 subport->pipe_loop == RTE_SCHED_PIPE_INVALID) 2859 subport->pipe_loop = grinder->pindex; 2860 2861 grinder_evict(subport, pos); 2862 2863 /* Look for another active pipe */ 2864 if (grinder_next_pipe(port, subport, pos)) { 2865 grinder_prefetch_pipe(subport, pos); 2866 2867 grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS; 2868 return result; 2869 } 2870 2871 /* No active pipe found */ 2872 subport->busy_grinders--; 2873 2874 grinder->state = e_GRINDER_PREFETCH_PIPE; 2875 return result; 2876 } 2877 2878 default: 2879 rte_panic("Algorithmic error (invalid state)\n"); 2880 return 0; 2881 } 2882 } 2883 2884 static inline void 2885 rte_sched_port_time_resync(struct rte_sched_port *port) 2886 { 2887 uint64_t cycles = rte_get_tsc_cycles(); 2888 uint64_t cycles_diff; 2889 uint64_t bytes_diff; 2890 uint32_t i; 2891 2892 if (cycles < port->time_cpu_cycles) 2893 port->time_cpu_cycles = 0; 2894 2895 cycles_diff = cycles - port->time_cpu_cycles; 2896 /* Compute elapsed time in bytes */ 2897 bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT, 2898 port->inv_cycles_per_byte); 2899 2900 /* Advance port time */ 2901 port->time_cpu_cycles += 2902 (bytes_diff * port->cycles_per_byte) >> RTE_SCHED_TIME_SHIFT; 2903 port->time_cpu_bytes += bytes_diff; 2904 if (port->time < port->time_cpu_bytes) 2905 port->time = port->time_cpu_bytes; 2906 2907 /* Reset pipe loop detection */ 2908 for (i = 0; i < port->n_subports_per_port; i++) 2909 port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID; 2910 } 2911 2912 static inline int 2913 rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass) 2914 { 2915 int exceptions; 2916 2917 /* Check if any exception flag is set */ 2918 exceptions = (second_pass && subport->busy_grinders == 0) || 2919 (subport->pipe_exhaustion == 1); 2920 2921 /* Clear exception flags */ 2922 subport->pipe_exhaustion = 0; 2923 2924 return exceptions; 2925 } 2926 2927 int 2928 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts) 2929 { 2930 struct rte_sched_subport *subport; 2931 uint32_t subport_id = port->subport_id; 2932 uint32_t i, n_subports = 0, count; 2933 2934 port->pkts_out = pkts; 2935 port->n_pkts_out = 0; 2936 2937 rte_sched_port_time_resync(port); 2938 2939 /* Take each queue in the grinder one step further */ 2940 for (i = 0, count = 0; ; i++) { 2941 subport = port->subports[subport_id]; 2942 2943 count += grinder_handle(port, subport, 2944 i & (RTE_SCHED_PORT_N_GRINDERS - 1)); 2945 2946 if (count == n_pkts) { 2947 subport_id++; 2948 2949 if (subport_id == port->n_subports_per_port) 2950 subport_id = 0; 2951 2952 port->subport_id = subport_id; 2953 break; 2954 } 2955 2956 if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) { 2957 i = 0; 2958 subport_id++; 2959 n_subports++; 2960 } 2961 2962 if (subport_id == port->n_subports_per_port) 2963 subport_id = 0; 2964 2965 if (n_subports == port->n_subports_per_port) { 2966 port->subport_id = subport_id; 2967 break; 2968 } 2969 } 2970 2971 return count; 2972 } 2973