xref: /dpdk/lib/sched/rte_sched.c (revision 592ab76f9f0f41993bebb44da85c37750a93ece9)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <string.h>
7 
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_malloc.h>
11 #include <rte_cycles.h>
12 #include <rte_prefetch.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mbuf.h>
15 #include <rte_bitmap.h>
16 #include <rte_reciprocal.h>
17 
18 #include "rte_sched.h"
19 #include "rte_sched_common.h"
20 #include "rte_approx.h"
21 
22 #ifdef __INTEL_COMPILER
23 #pragma warning(disable:2259) /* conversion may lose significant bits */
24 #endif
25 
26 #ifndef RTE_SCHED_PORT_N_GRINDERS
27 #define RTE_SCHED_PORT_N_GRINDERS 8
28 #endif
29 
30 #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
31 #define RTE_SCHED_WRR_SHIFT                   3
32 #define RTE_SCHED_MAX_QUEUES_PER_TC           RTE_SCHED_BE_QUEUES_PER_PIPE
33 #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
34 #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
35 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
36 
37 /* Scaling for cycles_per_byte calculation
38  * Chosen so that minimum rate is 480 bit/sec
39  */
40 #define RTE_SCHED_TIME_SHIFT		      8
41 
42 struct rte_sched_pipe_profile {
43 	/* Token bucket (TB) */
44 	uint64_t tb_period;
45 	uint64_t tb_credits_per_period;
46 	uint64_t tb_size;
47 
48 	/* Pipe traffic classes */
49 	uint64_t tc_period;
50 	uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
51 	uint8_t tc_ov_weight;
52 
53 	/* Pipe best-effort traffic class queues */
54 	uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
55 };
56 
57 struct rte_sched_pipe {
58 	/* Token bucket (TB) */
59 	uint64_t tb_time; /* time of last update */
60 	uint64_t tb_credits;
61 
62 	/* Pipe profile and flags */
63 	uint32_t profile;
64 
65 	/* Traffic classes (TCs) */
66 	uint64_t tc_time; /* time of next update */
67 	uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
68 
69 	/* Weighted Round Robin (WRR) */
70 	uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
71 
72 	/* TC oversubscription */
73 	uint64_t tc_ov_credits;
74 	uint8_t tc_ov_period_id;
75 } __rte_cache_aligned;
76 
77 struct rte_sched_queue {
78 	uint16_t qw;
79 	uint16_t qr;
80 };
81 
82 struct rte_sched_queue_extra {
83 	struct rte_sched_queue_stats stats;
84 #ifdef RTE_SCHED_CMAN
85 	RTE_STD_C11
86 	union {
87 		struct rte_red red;
88 		struct rte_pie pie;
89 	};
90 #endif
91 };
92 
93 enum grinder_state {
94 	e_GRINDER_PREFETCH_PIPE = 0,
95 	e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
96 	e_GRINDER_PREFETCH_MBUF,
97 	e_GRINDER_READ_MBUF
98 };
99 
100 struct rte_sched_subport_profile {
101 	/* Token bucket (TB) */
102 	uint64_t tb_period;
103 	uint64_t tb_credits_per_period;
104 	uint64_t tb_size;
105 
106 	uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
107 	uint64_t tc_period;
108 };
109 
110 struct rte_sched_grinder {
111 	/* Pipe cache */
112 	uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
113 	uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
114 	uint32_t pcache_w;
115 	uint32_t pcache_r;
116 
117 	/* Current pipe */
118 	enum grinder_state state;
119 	uint32_t productive;
120 	uint32_t pindex;
121 	struct rte_sched_subport *subport;
122 	struct rte_sched_subport_profile *subport_params;
123 	struct rte_sched_pipe *pipe;
124 	struct rte_sched_pipe_profile *pipe_params;
125 
126 	/* TC cache */
127 	uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
128 	uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
129 	uint32_t tccache_w;
130 	uint32_t tccache_r;
131 
132 	/* Current TC */
133 	uint32_t tc_index;
134 	struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
135 	struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
136 	uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
137 	uint16_t qsize;
138 	uint32_t qmask;
139 	uint32_t qpos;
140 	struct rte_mbuf *pkt;
141 
142 	/* WRR */
143 	uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
144 	uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
145 	uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
146 };
147 
148 struct rte_sched_subport {
149 	/* Token bucket (TB) */
150 	uint64_t tb_time; /* time of last update */
151 	uint64_t tb_credits;
152 
153 	/* Traffic classes (TCs) */
154 	uint64_t tc_time; /* time of next update */
155 	uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
156 
157 	/* TC oversubscription */
158 	uint64_t tc_ov_wm;
159 	uint64_t tc_ov_wm_min;
160 	uint64_t tc_ov_wm_max;
161 	uint8_t tc_ov_period_id;
162 	uint8_t tc_ov;
163 	uint32_t tc_ov_n;
164 	double tc_ov_rate;
165 
166 	/* Statistics */
167 	struct rte_sched_subport_stats stats __rte_cache_aligned;
168 
169 	/* subport profile */
170 	uint32_t profile;
171 	/* Subport pipes */
172 	uint32_t n_pipes_per_subport_enabled;
173 	uint32_t n_pipe_profiles;
174 	uint32_t n_max_pipe_profiles;
175 
176 	/* Pipe best-effort TC rate */
177 	uint64_t pipe_tc_be_rate_max;
178 
179 	/* Pipe queues size */
180 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
181 
182 #ifdef RTE_SCHED_CMAN
183 	bool cman_enabled;
184 	enum rte_sched_cman_mode cman;
185 
186 	RTE_STD_C11
187 	union {
188 		struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
189 		struct rte_pie_config pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
190 	};
191 #endif
192 
193 	/* Scheduling loop detection */
194 	uint32_t pipe_loop;
195 	uint32_t pipe_exhaustion;
196 
197 	/* Bitmap */
198 	struct rte_bitmap *bmp;
199 	uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
200 
201 	/* Grinders */
202 	struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
203 	uint32_t busy_grinders;
204 
205 	/* Queue base calculation */
206 	uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
207 	uint32_t qsize_sum;
208 
209 	struct rte_sched_pipe *pipe;
210 	struct rte_sched_queue *queue;
211 	struct rte_sched_queue_extra *queue_extra;
212 	struct rte_sched_pipe_profile *pipe_profiles;
213 	uint8_t *bmp_array;
214 	struct rte_mbuf **queue_array;
215 	uint8_t memory[0] __rte_cache_aligned;
216 
217 	/* TC oversubscription activation */
218 	int tc_ov_enabled;
219 } __rte_cache_aligned;
220 
221 struct rte_sched_port {
222 	/* User parameters */
223 	uint32_t n_subports_per_port;
224 	uint32_t n_pipes_per_subport;
225 	uint32_t n_pipes_per_subport_log2;
226 	uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
227 	uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
228 	uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
229 	uint32_t n_subport_profiles;
230 	uint32_t n_max_subport_profiles;
231 	uint64_t rate;
232 	uint32_t mtu;
233 	uint32_t frame_overhead;
234 	int socket;
235 
236 	/* Timing */
237 	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cycles */
238 	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
239 	uint64_t time;                /* Current NIC TX time measured in bytes */
240 	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
241 	uint64_t cycles_per_byte;
242 
243 	/* Grinders */
244 	struct rte_mbuf **pkts_out;
245 	uint32_t n_pkts_out;
246 	uint32_t subport_id;
247 
248 	/* Large data structures */
249 	struct rte_sched_subport_profile *subport_profiles;
250 	struct rte_sched_subport *subports[0] __rte_cache_aligned;
251 } __rte_cache_aligned;
252 
253 enum rte_sched_subport_array {
254 	e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0,
255 	e_RTE_SCHED_SUBPORT_ARRAY_QUEUE,
256 	e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA,
257 	e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES,
258 	e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY,
259 	e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY,
260 	e_RTE_SCHED_SUBPORT_ARRAY_TOTAL,
261 };
262 
263 static inline uint32_t
264 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport)
265 {
266 	return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled;
267 }
268 
269 static inline struct rte_mbuf **
270 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex)
271 {
272 	uint32_t pindex = qindex >> 4;
273 	uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1);
274 
275 	return (subport->queue_array + pindex *
276 		subport->qsize_sum + subport->qsize_add[qpos]);
277 }
278 
279 static inline uint16_t
280 rte_sched_subport_pipe_qsize(struct rte_sched_port *port,
281 struct rte_sched_subport *subport, uint32_t qindex)
282 {
283 	uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
284 
285 	return subport->qsize[tc];
286 }
287 
288 static inline uint32_t
289 rte_sched_port_queues_per_port(struct rte_sched_port *port)
290 {
291 	uint32_t n_queues = 0, i;
292 
293 	for (i = 0; i < port->n_subports_per_port; i++)
294 		n_queues += rte_sched_subport_pipe_queues(port->subports[i]);
295 
296 	return n_queues;
297 }
298 
299 static inline uint16_t
300 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class)
301 {
302 	uint16_t pipe_queue = port->pipe_queue[traffic_class];
303 
304 	return pipe_queue;
305 }
306 
307 static inline uint8_t
308 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex)
309 {
310 	uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
311 
312 	return pipe_tc;
313 }
314 
315 static inline uint8_t
316 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex)
317 {
318 	uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
319 
320 	return tc_queue;
321 }
322 
323 static int
324 pipe_profile_check(struct rte_sched_pipe_params *params,
325 	uint64_t rate, uint16_t *qsize)
326 {
327 	uint32_t i;
328 
329 	/* Pipe parameters */
330 	if (params == NULL) {
331 		RTE_LOG(ERR, SCHED,
332 			"%s: Incorrect value for parameter params\n", __func__);
333 		return -EINVAL;
334 	}
335 
336 	/* TB rate: non-zero, not greater than port rate */
337 	if (params->tb_rate == 0 ||
338 		params->tb_rate > rate) {
339 		RTE_LOG(ERR, SCHED,
340 			"%s: Incorrect value for tb rate\n", __func__);
341 		return -EINVAL;
342 	}
343 
344 	/* TB size: non-zero */
345 	if (params->tb_size == 0) {
346 		RTE_LOG(ERR, SCHED,
347 			"%s: Incorrect value for tb size\n", __func__);
348 		return -EINVAL;
349 	}
350 
351 	/* TC rate: non-zero if qsize non-zero, less than pipe rate */
352 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
353 		if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
354 			(qsize[i] != 0 && (params->tc_rate[i] == 0 ||
355 			params->tc_rate[i] > params->tb_rate))) {
356 			RTE_LOG(ERR, SCHED,
357 				"%s: Incorrect value for qsize or tc_rate\n", __func__);
358 			return -EINVAL;
359 		}
360 	}
361 
362 	if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
363 		qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
364 		RTE_LOG(ERR, SCHED,
365 			"%s: Incorrect value for be traffic class rate\n", __func__);
366 		return -EINVAL;
367 	}
368 
369 	/* TC period: non-zero */
370 	if (params->tc_period == 0) {
371 		RTE_LOG(ERR, SCHED,
372 			"%s: Incorrect value for tc period\n", __func__);
373 		return -EINVAL;
374 	}
375 
376 	/*  Best effort tc oversubscription weight: non-zero */
377 	if (params->tc_ov_weight == 0) {
378 		RTE_LOG(ERR, SCHED,
379 			"%s: Incorrect value for tc ov weight\n", __func__);
380 		return -EINVAL;
381 	}
382 
383 	/* Queue WRR weights: non-zero */
384 	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
385 		if (params->wrr_weights[i] == 0) {
386 			RTE_LOG(ERR, SCHED,
387 				"%s: Incorrect value for wrr weight\n", __func__);
388 			return -EINVAL;
389 		}
390 	}
391 
392 	return 0;
393 }
394 
395 static int
396 subport_profile_check(struct rte_sched_subport_profile_params *params,
397 	uint64_t rate)
398 {
399 	uint32_t i;
400 
401 	/* Check user parameters */
402 	if (params == NULL) {
403 		RTE_LOG(ERR, SCHED, "%s: "
404 		"Incorrect value for parameter params\n", __func__);
405 		return -EINVAL;
406 	}
407 
408 	if (params->tb_rate == 0 || params->tb_rate > rate) {
409 		RTE_LOG(ERR, SCHED, "%s: "
410 		"Incorrect value for tb rate\n", __func__);
411 		return -EINVAL;
412 	}
413 
414 	if (params->tb_size == 0) {
415 		RTE_LOG(ERR, SCHED, "%s: "
416 		"Incorrect value for tb size\n", __func__);
417 		return -EINVAL;
418 	}
419 
420 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
421 		uint64_t tc_rate = params->tc_rate[i];
422 
423 		if (tc_rate == 0 || (tc_rate > params->tb_rate)) {
424 			RTE_LOG(ERR, SCHED, "%s: "
425 			"Incorrect value for tc rate\n", __func__);
426 			return -EINVAL;
427 		}
428 	}
429 
430 	if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
431 		RTE_LOG(ERR, SCHED, "%s: "
432 		"Incorrect tc rate(best effort)\n", __func__);
433 		return -EINVAL;
434 	}
435 
436 	if (params->tc_period == 0) {
437 		RTE_LOG(ERR, SCHED, "%s: "
438 		"Incorrect value for tc period\n", __func__);
439 		return -EINVAL;
440 	}
441 
442 	return 0;
443 }
444 
445 static int
446 rte_sched_port_check_params(struct rte_sched_port_params *params)
447 {
448 	uint32_t i;
449 
450 	if (params == NULL) {
451 		RTE_LOG(ERR, SCHED,
452 			"%s: Incorrect value for parameter params\n", __func__);
453 		return -EINVAL;
454 	}
455 
456 	/* socket */
457 	if (params->socket < 0) {
458 		RTE_LOG(ERR, SCHED,
459 			"%s: Incorrect value for socket id\n", __func__);
460 		return -EINVAL;
461 	}
462 
463 	/* rate */
464 	if (params->rate == 0) {
465 		RTE_LOG(ERR, SCHED,
466 			"%s: Incorrect value for rate\n", __func__);
467 		return -EINVAL;
468 	}
469 
470 	/* mtu */
471 	if (params->mtu == 0) {
472 		RTE_LOG(ERR, SCHED,
473 			"%s: Incorrect value for mtu\n", __func__);
474 		return -EINVAL;
475 	}
476 
477 	/* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
478 	if (params->n_subports_per_port == 0 ||
479 	    params->n_subports_per_port > 1u << 16 ||
480 	    !rte_is_power_of_2(params->n_subports_per_port)) {
481 		RTE_LOG(ERR, SCHED,
482 			"%s: Incorrect value for number of subports\n", __func__);
483 		return -EINVAL;
484 	}
485 
486 	if (params->subport_profiles == NULL ||
487 		params->n_subport_profiles == 0 ||
488 		params->n_max_subport_profiles == 0 ||
489 		params->n_subport_profiles > params->n_max_subport_profiles) {
490 		RTE_LOG(ERR, SCHED,
491 		"%s: Incorrect value for subport profiles\n", __func__);
492 		return -EINVAL;
493 	}
494 
495 	for (i = 0; i < params->n_subport_profiles; i++) {
496 		struct rte_sched_subport_profile_params *p =
497 						params->subport_profiles + i;
498 		int status;
499 
500 		status = subport_profile_check(p, params->rate);
501 		if (status != 0) {
502 			RTE_LOG(ERR, SCHED,
503 			"%s: subport profile check failed(%d)\n",
504 			__func__, status);
505 			return -EINVAL;
506 		}
507 	}
508 
509 	/* n_pipes_per_subport: non-zero, power of 2 */
510 	if (params->n_pipes_per_subport == 0 ||
511 	    !rte_is_power_of_2(params->n_pipes_per_subport)) {
512 		RTE_LOG(ERR, SCHED,
513 			"%s: Incorrect value for maximum pipes number\n", __func__);
514 		return -EINVAL;
515 	}
516 
517 	return 0;
518 }
519 
520 static uint32_t
521 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params,
522 	enum rte_sched_subport_array array)
523 {
524 	uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled;
525 	uint32_t n_subport_pipe_queues =
526 		RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport;
527 
528 	uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe);
529 	uint32_t size_queue =
530 		n_subport_pipe_queues * sizeof(struct rte_sched_queue);
531 	uint32_t size_queue_extra
532 		= n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra);
533 	uint32_t size_pipe_profiles = params->n_max_pipe_profiles *
534 		sizeof(struct rte_sched_pipe_profile);
535 	uint32_t size_bmp_array =
536 		rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
537 	uint32_t size_per_pipe_queue_array, size_queue_array;
538 
539 	uint32_t base, i;
540 
541 	size_per_pipe_queue_array = 0;
542 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
543 		if (i < RTE_SCHED_TRAFFIC_CLASS_BE)
544 			size_per_pipe_queue_array +=
545 				params->qsize[i] * sizeof(struct rte_mbuf *);
546 		else
547 			size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC *
548 				params->qsize[i] * sizeof(struct rte_mbuf *);
549 	}
550 	size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array;
551 
552 	base = 0;
553 
554 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE)
555 		return base;
556 	base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
557 
558 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)
559 		return base;
560 	base += RTE_CACHE_LINE_ROUNDUP(size_queue);
561 
562 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)
563 		return base;
564 	base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
565 
566 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)
567 		return base;
568 	base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
569 
570 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY)
571 		return base;
572 	base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
573 
574 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)
575 		return base;
576 	base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
577 
578 	return base;
579 }
580 
581 static void
582 rte_sched_subport_config_qsize(struct rte_sched_subport *subport)
583 {
584 	uint32_t i;
585 
586 	subport->qsize_add[0] = 0;
587 
588 	/* Strict priority traffic class */
589 	for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
590 		subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1];
591 
592 	/* Best-effort traffic class */
593 	subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] =
594 		subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] +
595 		subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
596 	subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] =
597 		subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] +
598 		subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
599 	subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] =
600 		subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] +
601 		subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
602 
603 	subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] +
604 		subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
605 }
606 
607 static void
608 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
609 {
610 	struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
611 
612 	RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
613 		"	Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n"
614 		"	Traffic classes: period = %"PRIu64",\n"
615 		"	credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
616 		", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
617 		", %"PRIu64", %"PRIu64", %"PRIu64"]\n"
618 		"	Best-effort traffic class oversubscription: weight = %hhu\n"
619 		"	WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
620 		i,
621 
622 		/* Token bucket */
623 		p->tb_period,
624 		p->tb_credits_per_period,
625 		p->tb_size,
626 
627 		/* Traffic classes */
628 		p->tc_period,
629 		p->tc_credits_per_period[0],
630 		p->tc_credits_per_period[1],
631 		p->tc_credits_per_period[2],
632 		p->tc_credits_per_period[3],
633 		p->tc_credits_per_period[4],
634 		p->tc_credits_per_period[5],
635 		p->tc_credits_per_period[6],
636 		p->tc_credits_per_period[7],
637 		p->tc_credits_per_period[8],
638 		p->tc_credits_per_period[9],
639 		p->tc_credits_per_period[10],
640 		p->tc_credits_per_period[11],
641 		p->tc_credits_per_period[12],
642 
643 		/* Best-effort traffic class oversubscription */
644 		p->tc_ov_weight,
645 
646 		/* WRR */
647 		p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
648 }
649 
650 static void
651 rte_sched_port_log_subport_profile(struct rte_sched_port *port, uint32_t i)
652 {
653 	struct rte_sched_subport_profile *p = port->subport_profiles + i;
654 
655 	RTE_LOG(DEBUG, SCHED, "Low level config for subport profile %u:\n"
656 	"Token bucket: period = %"PRIu64", credits per period = %"PRIu64","
657 	"size = %"PRIu64"\n"
658 	"Traffic classes: period = %"PRIu64",\n"
659 	"credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
660 	" %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
661 	" %"PRIu64", %"PRIu64", %"PRIu64"]\n",
662 	i,
663 
664 	/* Token bucket */
665 	p->tb_period,
666 	p->tb_credits_per_period,
667 	p->tb_size,
668 
669 	/* Traffic classes */
670 	p->tc_period,
671 	p->tc_credits_per_period[0],
672 	p->tc_credits_per_period[1],
673 	p->tc_credits_per_period[2],
674 	p->tc_credits_per_period[3],
675 	p->tc_credits_per_period[4],
676 	p->tc_credits_per_period[5],
677 	p->tc_credits_per_period[6],
678 	p->tc_credits_per_period[7],
679 	p->tc_credits_per_period[8],
680 	p->tc_credits_per_period[9],
681 	p->tc_credits_per_period[10],
682 	p->tc_credits_per_period[11],
683 	p->tc_credits_per_period[12]);
684 }
685 
686 static inline uint64_t
687 rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate)
688 {
689 	uint64_t time = time_ms;
690 
691 	time = (time * rate) / 1000;
692 
693 	return time;
694 }
695 
696 static void
697 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
698 	struct rte_sched_pipe_params *src,
699 	struct rte_sched_pipe_profile *dst,
700 	uint64_t rate)
701 {
702 	uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
703 	uint32_t lcd1, lcd2, lcd;
704 	uint32_t i;
705 
706 	/* Token Bucket */
707 	if (src->tb_rate == rate) {
708 		dst->tb_credits_per_period = 1;
709 		dst->tb_period = 1;
710 	} else {
711 		double tb_rate = (double) src->tb_rate
712 				/ (double) rate;
713 		double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
714 
715 		rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
716 			&dst->tb_period);
717 	}
718 
719 	dst->tb_size = src->tb_size;
720 
721 	/* Traffic Classes */
722 	dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
723 						rate);
724 
725 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
726 		if (subport->qsize[i])
727 			dst->tc_credits_per_period[i]
728 				= rte_sched_time_ms_to_bytes(src->tc_period,
729 					src->tc_rate[i]);
730 
731 	dst->tc_ov_weight = src->tc_ov_weight;
732 
733 	/* WRR queues */
734 	wrr_cost[0] = src->wrr_weights[0];
735 	wrr_cost[1] = src->wrr_weights[1];
736 	wrr_cost[2] = src->wrr_weights[2];
737 	wrr_cost[3] = src->wrr_weights[3];
738 
739 	lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
740 	lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
741 	lcd = rte_get_lcd(lcd1, lcd2);
742 
743 	wrr_cost[0] = lcd / wrr_cost[0];
744 	wrr_cost[1] = lcd / wrr_cost[1];
745 	wrr_cost[2] = lcd / wrr_cost[2];
746 	wrr_cost[3] = lcd / wrr_cost[3];
747 
748 	dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
749 	dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
750 	dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
751 	dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
752 }
753 
754 static void
755 rte_sched_subport_profile_convert(struct rte_sched_subport_profile_params *src,
756 	struct rte_sched_subport_profile *dst,
757 	uint64_t rate)
758 {
759 	uint32_t i;
760 
761 	/* Token Bucket */
762 	if (src->tb_rate == rate) {
763 		dst->tb_credits_per_period = 1;
764 		dst->tb_period = 1;
765 	} else {
766 		double tb_rate = (double) src->tb_rate
767 				/ (double) rate;
768 		double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
769 
770 		rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
771 			&dst->tb_period);
772 	}
773 
774 	dst->tb_size = src->tb_size;
775 
776 	/* Traffic Classes */
777 	dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, rate);
778 
779 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
780 		dst->tc_credits_per_period[i]
781 			= rte_sched_time_ms_to_bytes(src->tc_period,
782 				src->tc_rate[i]);
783 }
784 
785 static void
786 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
787 	struct rte_sched_subport_params *params, uint64_t rate)
788 {
789 	uint32_t i;
790 
791 	for (i = 0; i < subport->n_pipe_profiles; i++) {
792 		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
793 		struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i;
794 
795 		rte_sched_pipe_profile_convert(subport, src, dst, rate);
796 		rte_sched_port_log_pipe_profile(subport, i);
797 	}
798 
799 	subport->pipe_tc_be_rate_max = 0;
800 	for (i = 0; i < subport->n_pipe_profiles; i++) {
801 		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
802 		uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
803 
804 		if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
805 			subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
806 	}
807 }
808 
809 static void
810 rte_sched_port_config_subport_profile_table(struct rte_sched_port *port,
811 	struct rte_sched_port_params *params,
812 	uint64_t rate)
813 {
814 	uint32_t i;
815 
816 	for (i = 0; i < port->n_subport_profiles; i++) {
817 		struct rte_sched_subport_profile_params *src
818 				= params->subport_profiles + i;
819 		struct rte_sched_subport_profile *dst
820 				= port->subport_profiles + i;
821 
822 		rte_sched_subport_profile_convert(src, dst, rate);
823 		rte_sched_port_log_subport_profile(port, i);
824 	}
825 }
826 
827 static int
828 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
829 	uint32_t n_max_pipes_per_subport,
830 	uint64_t rate)
831 {
832 	uint32_t i;
833 
834 	/* Check user parameters */
835 	if (params == NULL) {
836 		RTE_LOG(ERR, SCHED,
837 			"%s: Incorrect value for parameter params\n", __func__);
838 		return -EINVAL;
839 	}
840 
841 	/* qsize: if non-zero, power of 2,
842 	 * no bigger than 32K (due to 16-bit read/write pointers)
843 	 */
844 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
845 		uint16_t qsize = params->qsize[i];
846 
847 		if (qsize != 0 && !rte_is_power_of_2(qsize)) {
848 			RTE_LOG(ERR, SCHED,
849 				"%s: Incorrect value for qsize\n", __func__);
850 			return -EINVAL;
851 		}
852 	}
853 
854 	if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
855 		RTE_LOG(ERR, SCHED, "%s: Incorrect qsize\n", __func__);
856 		return -EINVAL;
857 	}
858 
859 	/* n_pipes_per_subport: non-zero, power of 2 */
860 	if (params->n_pipes_per_subport_enabled == 0 ||
861 		params->n_pipes_per_subport_enabled > n_max_pipes_per_subport ||
862 	    !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) {
863 		RTE_LOG(ERR, SCHED,
864 			"%s: Incorrect value for pipes number\n", __func__);
865 		return -EINVAL;
866 	}
867 
868 	/* pipe_profiles and n_pipe_profiles */
869 	if (params->pipe_profiles == NULL ||
870 	    params->n_pipe_profiles == 0 ||
871 		params->n_max_pipe_profiles == 0 ||
872 		params->n_pipe_profiles > params->n_max_pipe_profiles) {
873 		RTE_LOG(ERR, SCHED,
874 			"%s: Incorrect value for pipe profiles\n", __func__);
875 		return -EINVAL;
876 	}
877 
878 	for (i = 0; i < params->n_pipe_profiles; i++) {
879 		struct rte_sched_pipe_params *p = params->pipe_profiles + i;
880 		int status;
881 
882 		status = pipe_profile_check(p, rate, &params->qsize[0]);
883 		if (status != 0) {
884 			RTE_LOG(ERR, SCHED,
885 				"%s: Pipe profile check failed(%d)\n", __func__, status);
886 			return -EINVAL;
887 		}
888 	}
889 
890 	return 0;
891 }
892 
893 uint32_t
894 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
895 	struct rte_sched_subport_params **subport_params)
896 {
897 	uint32_t size0 = 0, size1 = 0, i;
898 	int status;
899 
900 	status = rte_sched_port_check_params(port_params);
901 	if (status != 0) {
902 		RTE_LOG(ERR, SCHED,
903 			"%s: Port scheduler port params check failed (%d)\n",
904 			__func__, status);
905 
906 		return 0;
907 	}
908 
909 	for (i = 0; i < port_params->n_subports_per_port; i++) {
910 		struct rte_sched_subport_params *sp = subport_params[i];
911 
912 		status = rte_sched_subport_check_params(sp,
913 				port_params->n_pipes_per_subport,
914 				port_params->rate);
915 		if (status != 0) {
916 			RTE_LOG(ERR, SCHED,
917 				"%s: Port scheduler subport params check failed (%d)\n",
918 				__func__, status);
919 
920 			return 0;
921 		}
922 	}
923 
924 	size0 = sizeof(struct rte_sched_port);
925 
926 	for (i = 0; i < port_params->n_subports_per_port; i++) {
927 		struct rte_sched_subport_params *sp = subport_params[i];
928 
929 		size1 += rte_sched_subport_get_array_base(sp,
930 					e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
931 	}
932 
933 	return size0 + size1;
934 }
935 
936 struct rte_sched_port *
937 rte_sched_port_config(struct rte_sched_port_params *params)
938 {
939 	struct rte_sched_port *port = NULL;
940 	uint32_t size0, size1, size2;
941 	uint32_t cycles_per_byte;
942 	uint32_t i, j;
943 	int status;
944 
945 	status = rte_sched_port_check_params(params);
946 	if (status != 0) {
947 		RTE_LOG(ERR, SCHED,
948 			"%s: Port scheduler params check failed (%d)\n",
949 			__func__, status);
950 		return NULL;
951 	}
952 
953 	size0 = sizeof(struct rte_sched_port);
954 	size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *);
955 	size2 = params->n_max_subport_profiles *
956 		sizeof(struct rte_sched_subport_profile);
957 
958 	/* Allocate memory to store the data structures */
959 	port = rte_zmalloc_socket("qos_params", size0 + size1,
960 				 RTE_CACHE_LINE_SIZE, params->socket);
961 	if (port == NULL) {
962 		RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
963 
964 		return NULL;
965 	}
966 
967 	/* Allocate memory to store the subport profile */
968 	port->subport_profiles  = rte_zmalloc_socket("subport_profile", size2,
969 					RTE_CACHE_LINE_SIZE, params->socket);
970 	if (port->subport_profiles == NULL) {
971 		RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
972 		rte_free(port);
973 		return NULL;
974 	}
975 
976 	/* User parameters */
977 	port->n_subports_per_port = params->n_subports_per_port;
978 	port->n_subport_profiles = params->n_subport_profiles;
979 	port->n_max_subport_profiles = params->n_max_subport_profiles;
980 	port->n_pipes_per_subport = params->n_pipes_per_subport;
981 	port->n_pipes_per_subport_log2 =
982 			__builtin_ctz(params->n_pipes_per_subport);
983 	port->socket = params->socket;
984 
985 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
986 		port->pipe_queue[i] = i;
987 
988 	for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
989 		port->pipe_tc[i] = j;
990 
991 		if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
992 			j++;
993 	}
994 
995 	for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
996 		port->tc_queue[i] = j;
997 
998 		if (i >= RTE_SCHED_TRAFFIC_CLASS_BE)
999 			j++;
1000 	}
1001 	port->rate = params->rate;
1002 	port->mtu = params->mtu + params->frame_overhead;
1003 	port->frame_overhead = params->frame_overhead;
1004 
1005 	/* Timing */
1006 	port->time_cpu_cycles = rte_get_tsc_cycles();
1007 	port->time_cpu_bytes = 0;
1008 	port->time = 0;
1009 
1010 	/* Subport profile table */
1011 	rte_sched_port_config_subport_profile_table(port, params, port->rate);
1012 
1013 	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
1014 		/ params->rate;
1015 	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
1016 	port->cycles_per_byte = cycles_per_byte;
1017 
1018 	/* Grinders */
1019 	port->pkts_out = NULL;
1020 	port->n_pkts_out = 0;
1021 	port->subport_id = 0;
1022 
1023 	return port;
1024 }
1025 
1026 static inline void
1027 rte_sched_subport_free(struct rte_sched_port *port,
1028 	struct rte_sched_subport *subport)
1029 {
1030 	uint32_t n_subport_pipe_queues;
1031 	uint32_t qindex;
1032 
1033 	if (subport == NULL)
1034 		return;
1035 
1036 	n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport);
1037 
1038 	/* Free enqueued mbufs */
1039 	for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) {
1040 		struct rte_mbuf **mbufs =
1041 			rte_sched_subport_pipe_qbase(subport, qindex);
1042 		uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1043 		if (qsize != 0) {
1044 			struct rte_sched_queue *queue = subport->queue + qindex;
1045 			uint16_t qr = queue->qr & (qsize - 1);
1046 			uint16_t qw = queue->qw & (qsize - 1);
1047 
1048 			for (; qr != qw; qr = (qr + 1) & (qsize - 1))
1049 				rte_pktmbuf_free(mbufs[qr]);
1050 		}
1051 	}
1052 
1053 	rte_free(subport);
1054 }
1055 
1056 void
1057 rte_sched_port_free(struct rte_sched_port *port)
1058 {
1059 	uint32_t i;
1060 
1061 	/* Check user parameters */
1062 	if (port == NULL)
1063 		return;
1064 
1065 	for (i = 0; i < port->n_subports_per_port; i++)
1066 		rte_sched_subport_free(port, port->subports[i]);
1067 
1068 	rte_free(port->subport_profiles);
1069 	rte_free(port);
1070 }
1071 
1072 static void
1073 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
1074 {
1075 	uint32_t i;
1076 
1077 	for (i = 0; i < n_subports; i++) {
1078 		struct rte_sched_subport *subport = port->subports[i];
1079 
1080 		rte_sched_subport_free(port, subport);
1081 	}
1082 
1083 	rte_free(port->subport_profiles);
1084 	rte_free(port);
1085 }
1086 
1087 #ifdef RTE_SCHED_CMAN
1088 static int
1089 rte_sched_red_config(struct rte_sched_port *port,
1090 	struct rte_sched_subport *s,
1091 	struct rte_sched_subport_params *params,
1092 	uint32_t n_subports)
1093 {
1094 	uint32_t i;
1095 
1096 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1097 
1098 		uint32_t j;
1099 
1100 		for (j = 0; j < RTE_COLORS; j++) {
1101 			/* if min/max are both zero, then RED is disabled */
1102 			if ((params->cman_params->red_params[i][j].min_th |
1103 				 params->cman_params->red_params[i][j].max_th) == 0) {
1104 				continue;
1105 			}
1106 
1107 			if (rte_red_config_init(&s->red_config[i][j],
1108 				params->cman_params->red_params[i][j].wq_log2,
1109 				params->cman_params->red_params[i][j].min_th,
1110 				params->cman_params->red_params[i][j].max_th,
1111 				params->cman_params->red_params[i][j].maxp_inv) != 0) {
1112 				rte_sched_free_memory(port, n_subports);
1113 
1114 				RTE_LOG(NOTICE, SCHED,
1115 				"%s: RED configuration init fails\n", __func__);
1116 				return -EINVAL;
1117 			}
1118 		}
1119 	}
1120 	s->cman = RTE_SCHED_CMAN_RED;
1121 	return 0;
1122 }
1123 
1124 static int
1125 rte_sched_pie_config(struct rte_sched_port *port,
1126 	struct rte_sched_subport *s,
1127 	struct rte_sched_subport_params *params,
1128 	uint32_t n_subports)
1129 {
1130 	uint32_t i;
1131 
1132 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1133 		if (params->cman_params->pie_params[i].tailq_th > params->qsize[i]) {
1134 			RTE_LOG(NOTICE, SCHED,
1135 			"%s: PIE tailq threshold incorrect\n", __func__);
1136 			return -EINVAL;
1137 		}
1138 
1139 		if (rte_pie_config_init(&s->pie_config[i],
1140 			params->cman_params->pie_params[i].qdelay_ref,
1141 			params->cman_params->pie_params[i].dp_update_interval,
1142 			params->cman_params->pie_params[i].max_burst,
1143 			params->cman_params->pie_params[i].tailq_th) != 0) {
1144 			rte_sched_free_memory(port, n_subports);
1145 
1146 			RTE_LOG(NOTICE, SCHED,
1147 			"%s: PIE configuration init fails\n", __func__);
1148 			return -EINVAL;
1149 			}
1150 	}
1151 	s->cman = RTE_SCHED_CMAN_PIE;
1152 	return 0;
1153 }
1154 
1155 static int
1156 rte_sched_cman_config(struct rte_sched_port *port,
1157 	struct rte_sched_subport *s,
1158 	struct rte_sched_subport_params *params,
1159 	uint32_t n_subports)
1160 {
1161 	if (params->cman_params->cman_mode == RTE_SCHED_CMAN_RED)
1162 		return rte_sched_red_config(port, s, params, n_subports);
1163 
1164 	else if (params->cman_params->cman_mode == RTE_SCHED_CMAN_PIE)
1165 		return rte_sched_pie_config(port, s, params, n_subports);
1166 
1167 	return -EINVAL;
1168 }
1169 #endif
1170 
1171 int
1172 rte_sched_subport_tc_ov_config(struct rte_sched_port *port,
1173 	uint32_t subport_id,
1174 	bool tc_ov_enable)
1175 {
1176 	struct rte_sched_subport *s;
1177 
1178 	if (port == NULL) {
1179 		RTE_LOG(ERR, SCHED,
1180 			"%s: Incorrect value for parameter port\n", __func__);
1181 		return -EINVAL;
1182 	}
1183 
1184 	if (subport_id >= port->n_subports_per_port) {
1185 		RTE_LOG(ERR, SCHED,
1186 			"%s: Incorrect value for parameter subport id\n", __func__);
1187 		return  -EINVAL;
1188 	}
1189 
1190 	s = port->subports[subport_id];
1191 	s->tc_ov_enabled = tc_ov_enable ? 1 : 0;
1192 
1193 	return 0;
1194 }
1195 
1196 int
1197 rte_sched_subport_config(struct rte_sched_port *port,
1198 	uint32_t subport_id,
1199 	struct rte_sched_subport_params *params,
1200 	uint32_t subport_profile_id)
1201 {
1202 	struct rte_sched_subport *s = NULL;
1203 	uint32_t n_subports = subport_id;
1204 	struct rte_sched_subport_profile *profile;
1205 	uint32_t n_subport_pipe_queues, i;
1206 	uint32_t size0, size1, bmp_mem_size;
1207 	int status;
1208 	int ret;
1209 
1210 	/* Check user parameters */
1211 	if (port == NULL) {
1212 		RTE_LOG(ERR, SCHED,
1213 			"%s: Incorrect value for parameter port\n", __func__);
1214 		return 0;
1215 	}
1216 
1217 	if (subport_id >= port->n_subports_per_port) {
1218 		RTE_LOG(ERR, SCHED,
1219 			"%s: Incorrect value for subport id\n", __func__);
1220 		ret = -EINVAL;
1221 		goto out;
1222 	}
1223 
1224 	if (subport_profile_id >= port->n_max_subport_profiles) {
1225 		RTE_LOG(ERR, SCHED, "%s: "
1226 			"Number of subport profile exceeds the max limit\n",
1227 			__func__);
1228 		ret = -EINVAL;
1229 		goto out;
1230 	}
1231 
1232 	/** Memory is allocated only on first invocation of the api for a
1233 	 * given subport. Subsequent invocation on same subport will just
1234 	 * update subport bandwidth parameter.
1235 	 **/
1236 	if (port->subports[subport_id] == NULL) {
1237 
1238 		status = rte_sched_subport_check_params(params,
1239 			port->n_pipes_per_subport,
1240 			port->rate);
1241 		if (status != 0) {
1242 			RTE_LOG(NOTICE, SCHED,
1243 				"%s: Port scheduler params check failed (%d)\n",
1244 				__func__, status);
1245 			ret = -EINVAL;
1246 			goto out;
1247 		}
1248 
1249 		/* Determine the amount of memory to allocate */
1250 		size0 = sizeof(struct rte_sched_subport);
1251 		size1 = rte_sched_subport_get_array_base(params,
1252 					e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
1253 
1254 		/* Allocate memory to store the data structures */
1255 		s = rte_zmalloc_socket("subport_params", size0 + size1,
1256 			RTE_CACHE_LINE_SIZE, port->socket);
1257 		if (s == NULL) {
1258 			RTE_LOG(ERR, SCHED,
1259 				"%s: Memory allocation fails\n", __func__);
1260 			ret = -ENOMEM;
1261 			goto out;
1262 		}
1263 
1264 		n_subports++;
1265 
1266 		subport_profile_id = 0;
1267 
1268 		/* Port */
1269 		port->subports[subport_id] = s;
1270 
1271 		s->tb_time = port->time;
1272 
1273 		/* compile time checks */
1274 		RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
1275 		RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS &
1276 			(RTE_SCHED_PORT_N_GRINDERS - 1));
1277 
1278 		/* User parameters */
1279 		s->n_pipes_per_subport_enabled =
1280 				params->n_pipes_per_subport_enabled;
1281 		memcpy(s->qsize, params->qsize, sizeof(params->qsize));
1282 		s->n_pipe_profiles = params->n_pipe_profiles;
1283 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
1284 
1285 		/* TC oversubscription is enabled by default */
1286 		s->tc_ov_enabled = 1;
1287 
1288 #ifdef RTE_SCHED_CMAN
1289 		if (params->cman_params != NULL) {
1290 			s->cman_enabled = true;
1291 			status = rte_sched_cman_config(port, s, params, n_subports);
1292 			if (status) {
1293 				RTE_LOG(NOTICE, SCHED,
1294 					"%s: CMAN configuration fails\n", __func__);
1295 				return status;
1296 			}
1297 		} else {
1298 			s->cman_enabled = false;
1299 		}
1300 #endif
1301 
1302 		/* Scheduling loop detection */
1303 		s->pipe_loop = RTE_SCHED_PIPE_INVALID;
1304 		s->pipe_exhaustion = 0;
1305 
1306 		/* Grinders */
1307 		s->busy_grinders = 0;
1308 
1309 		/* Queue base calculation */
1310 		rte_sched_subport_config_qsize(s);
1311 
1312 		/* Large data structures */
1313 		s->pipe = (struct rte_sched_pipe *)
1314 			(s->memory + rte_sched_subport_get_array_base(params,
1315 			e_RTE_SCHED_SUBPORT_ARRAY_PIPE));
1316 		s->queue = (struct rte_sched_queue *)
1317 			(s->memory + rte_sched_subport_get_array_base(params,
1318 			e_RTE_SCHED_SUBPORT_ARRAY_QUEUE));
1319 		s->queue_extra = (struct rte_sched_queue_extra *)
1320 			(s->memory + rte_sched_subport_get_array_base(params,
1321 			e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA));
1322 		s->pipe_profiles = (struct rte_sched_pipe_profile *)
1323 			(s->memory + rte_sched_subport_get_array_base(params,
1324 			e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES));
1325 		s->bmp_array =  s->memory + rte_sched_subport_get_array_base(
1326 				params, e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY);
1327 		s->queue_array = (struct rte_mbuf **)
1328 			(s->memory + rte_sched_subport_get_array_base(params,
1329 			e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY));
1330 
1331 		/* Pipe profile table */
1332 		rte_sched_subport_config_pipe_profile_table(s, params,
1333 							    port->rate);
1334 
1335 		/* Bitmap */
1336 		n_subport_pipe_queues = rte_sched_subport_pipe_queues(s);
1337 		bmp_mem_size = rte_bitmap_get_memory_footprint(
1338 						n_subport_pipe_queues);
1339 		s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array,
1340 					bmp_mem_size);
1341 		if (s->bmp == NULL) {
1342 			RTE_LOG(ERR, SCHED,
1343 				"%s: Subport bitmap init error\n", __func__);
1344 			ret = -EINVAL;
1345 			goto out;
1346 		}
1347 
1348 		for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
1349 			s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
1350 
1351 		/* TC oversubscription */
1352 		s->tc_ov_wm_min = port->mtu;
1353 		s->tc_ov_period_id = 0;
1354 		s->tc_ov = 0;
1355 		s->tc_ov_n = 0;
1356 		s->tc_ov_rate = 0;
1357 	}
1358 
1359 	{
1360 	/* update subport parameters from subport profile table*/
1361 		profile = port->subport_profiles + subport_profile_id;
1362 
1363 		s = port->subports[subport_id];
1364 
1365 		s->tb_credits = profile->tb_size / 2;
1366 
1367 		s->tc_time = port->time + profile->tc_period;
1368 
1369 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1370 			if (s->qsize[i])
1371 				s->tc_credits[i] =
1372 					profile->tc_credits_per_period[i];
1373 			else
1374 				profile->tc_credits_per_period[i] = 0;
1375 
1376 		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
1377 							s->pipe_tc_be_rate_max);
1378 		s->tc_ov_wm = s->tc_ov_wm_max;
1379 		s->profile = subport_profile_id;
1380 
1381 	}
1382 
1383 	rte_sched_port_log_subport_profile(port, subport_profile_id);
1384 
1385 	return 0;
1386 
1387 out:
1388 	rte_sched_free_memory(port, n_subports);
1389 
1390 	return ret;
1391 }
1392 
1393 int
1394 rte_sched_pipe_config(struct rte_sched_port *port,
1395 	uint32_t subport_id,
1396 	uint32_t pipe_id,
1397 	int32_t pipe_profile)
1398 {
1399 	struct rte_sched_subport *s;
1400 	struct rte_sched_subport_profile *sp;
1401 	struct rte_sched_pipe *p;
1402 	struct rte_sched_pipe_profile *params;
1403 	uint32_t n_subports = subport_id + 1;
1404 	uint32_t deactivate, profile, i;
1405 	int ret;
1406 
1407 	/* Check user parameters */
1408 	profile = (uint32_t) pipe_profile;
1409 	deactivate = (pipe_profile < 0);
1410 
1411 	if (port == NULL) {
1412 		RTE_LOG(ERR, SCHED,
1413 			"%s: Incorrect value for parameter port\n", __func__);
1414 		return -EINVAL;
1415 	}
1416 
1417 	if (subport_id >= port->n_subports_per_port) {
1418 		RTE_LOG(ERR, SCHED,
1419 			"%s: Incorrect value for parameter subport id\n", __func__);
1420 		ret = -EINVAL;
1421 		goto out;
1422 	}
1423 
1424 	s = port->subports[subport_id];
1425 	if (pipe_id >= s->n_pipes_per_subport_enabled) {
1426 		RTE_LOG(ERR, SCHED,
1427 			"%s: Incorrect value for parameter pipe id\n", __func__);
1428 		ret = -EINVAL;
1429 		goto out;
1430 	}
1431 
1432 	if (!deactivate && profile >= s->n_pipe_profiles) {
1433 		RTE_LOG(ERR, SCHED,
1434 			"%s: Incorrect value for parameter pipe profile\n", __func__);
1435 		ret = -EINVAL;
1436 		goto out;
1437 	}
1438 
1439 	sp = port->subport_profiles + s->profile;
1440 	/* Handle the case when pipe already has a valid configuration */
1441 	p = s->pipe + pipe_id;
1442 	if (p->tb_time) {
1443 		params = s->pipe_profiles + p->profile;
1444 
1445 		double subport_tc_be_rate =
1446 		(double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1447 			/ (double) sp->tc_period;
1448 		double pipe_tc_be_rate =
1449 			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1450 			/ (double) params->tc_period;
1451 		uint32_t tc_be_ov = s->tc_ov;
1452 
1453 		/* Unplug pipe from its subport */
1454 		s->tc_ov_n -= params->tc_ov_weight;
1455 		s->tc_ov_rate -= pipe_tc_be_rate;
1456 		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1457 
1458 		if (s->tc_ov != tc_be_ov) {
1459 			RTE_LOG(DEBUG, SCHED,
1460 				"Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
1461 				subport_id, subport_tc_be_rate, s->tc_ov_rate);
1462 		}
1463 
1464 		/* Reset the pipe */
1465 		memset(p, 0, sizeof(struct rte_sched_pipe));
1466 	}
1467 
1468 	if (deactivate)
1469 		return 0;
1470 
1471 	/* Apply the new pipe configuration */
1472 	p->profile = profile;
1473 	params = s->pipe_profiles + p->profile;
1474 
1475 	/* Token Bucket (TB) */
1476 	p->tb_time = port->time;
1477 	p->tb_credits = params->tb_size / 2;
1478 
1479 	/* Traffic Classes (TCs) */
1480 	p->tc_time = port->time + params->tc_period;
1481 
1482 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1483 		if (s->qsize[i])
1484 			p->tc_credits[i] = params->tc_credits_per_period[i];
1485 
1486 	{
1487 		/* Subport best effort tc oversubscription */
1488 		double subport_tc_be_rate =
1489 		(double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1490 			/ (double) sp->tc_period;
1491 		double pipe_tc_be_rate =
1492 			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1493 			/ (double) params->tc_period;
1494 		uint32_t tc_be_ov = s->tc_ov;
1495 
1496 		s->tc_ov_n += params->tc_ov_weight;
1497 		s->tc_ov_rate += pipe_tc_be_rate;
1498 		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1499 
1500 		if (s->tc_ov != tc_be_ov) {
1501 			RTE_LOG(DEBUG, SCHED,
1502 				"Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
1503 				subport_id, subport_tc_be_rate, s->tc_ov_rate);
1504 		}
1505 		p->tc_ov_period_id = s->tc_ov_period_id;
1506 		p->tc_ov_credits = s->tc_ov_wm;
1507 	}
1508 
1509 	return 0;
1510 
1511 out:
1512 	rte_sched_free_memory(port, n_subports);
1513 
1514 	return ret;
1515 }
1516 
1517 int
1518 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
1519 	uint32_t subport_id,
1520 	struct rte_sched_pipe_params *params,
1521 	uint32_t *pipe_profile_id)
1522 {
1523 	struct rte_sched_subport *s;
1524 	struct rte_sched_pipe_profile *pp;
1525 	uint32_t i;
1526 	int status;
1527 
1528 	/* Port */
1529 	if (port == NULL) {
1530 		RTE_LOG(ERR, SCHED,
1531 			"%s: Incorrect value for parameter port\n", __func__);
1532 		return -EINVAL;
1533 	}
1534 
1535 	/* Subport id not exceeds the max limit */
1536 	if (subport_id > port->n_subports_per_port) {
1537 		RTE_LOG(ERR, SCHED,
1538 			"%s: Incorrect value for subport id\n", __func__);
1539 		return -EINVAL;
1540 	}
1541 
1542 	s = port->subports[subport_id];
1543 
1544 	/* Pipe profiles exceeds the max limit */
1545 	if (s->n_pipe_profiles >= s->n_max_pipe_profiles) {
1546 		RTE_LOG(ERR, SCHED,
1547 			"%s: Number of pipe profiles exceeds the max limit\n", __func__);
1548 		return -EINVAL;
1549 	}
1550 
1551 	/* Pipe params */
1552 	status = pipe_profile_check(params, port->rate, &s->qsize[0]);
1553 	if (status != 0) {
1554 		RTE_LOG(ERR, SCHED,
1555 			"%s: Pipe profile check failed(%d)\n", __func__, status);
1556 		return -EINVAL;
1557 	}
1558 
1559 	pp = &s->pipe_profiles[s->n_pipe_profiles];
1560 	rte_sched_pipe_profile_convert(s, params, pp, port->rate);
1561 
1562 	/* Pipe profile should not exists */
1563 	for (i = 0; i < s->n_pipe_profiles; i++)
1564 		if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) {
1565 			RTE_LOG(ERR, SCHED,
1566 				"%s: Pipe profile exists\n", __func__);
1567 			return -EINVAL;
1568 		}
1569 
1570 	/* Pipe profile commit */
1571 	*pipe_profile_id = s->n_pipe_profiles;
1572 	s->n_pipe_profiles++;
1573 
1574 	if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
1575 		s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
1576 
1577 	rte_sched_port_log_pipe_profile(s, *pipe_profile_id);
1578 
1579 	return 0;
1580 }
1581 
1582 int
1583 rte_sched_port_subport_profile_add(struct rte_sched_port *port,
1584 	struct rte_sched_subport_profile_params *params,
1585 	uint32_t *subport_profile_id)
1586 {
1587 	int status;
1588 	uint32_t i;
1589 	struct rte_sched_subport_profile *dst;
1590 
1591 	/* Port */
1592 	if (port == NULL) {
1593 		RTE_LOG(ERR, SCHED, "%s: "
1594 		"Incorrect value for parameter port\n", __func__);
1595 		return -EINVAL;
1596 	}
1597 
1598 	if (params == NULL) {
1599 		RTE_LOG(ERR, SCHED, "%s: "
1600 		"Incorrect value for parameter profile\n", __func__);
1601 		return -EINVAL;
1602 	}
1603 
1604 	if (subport_profile_id == NULL) {
1605 		RTE_LOG(ERR, SCHED, "%s: "
1606 		"Incorrect value for parameter subport_profile_id\n",
1607 		__func__);
1608 		return -EINVAL;
1609 	}
1610 
1611 	dst = port->subport_profiles + port->n_subport_profiles;
1612 
1613 	/* Subport profiles exceeds the max limit */
1614 	if (port->n_subport_profiles >= port->n_max_subport_profiles) {
1615 		RTE_LOG(ERR, SCHED, "%s: "
1616 		"Number of subport profiles exceeds the max limit\n",
1617 		 __func__);
1618 		return -EINVAL;
1619 	}
1620 
1621 	status = subport_profile_check(params, port->rate);
1622 	if (status != 0) {
1623 		RTE_LOG(ERR, SCHED,
1624 		"%s: subport profile check failed(%d)\n", __func__, status);
1625 		return -EINVAL;
1626 	}
1627 
1628 	rte_sched_subport_profile_convert(params, dst, port->rate);
1629 
1630 	/* Subport profile should not exists */
1631 	for (i = 0; i < port->n_subport_profiles; i++)
1632 		if (memcmp(port->subport_profiles + i,
1633 		    dst, sizeof(*dst)) == 0) {
1634 			RTE_LOG(ERR, SCHED,
1635 			"%s: subport profile exists\n", __func__);
1636 			return -EINVAL;
1637 		}
1638 
1639 	/* Subport profile commit */
1640 	*subport_profile_id = port->n_subport_profiles;
1641 	port->n_subport_profiles++;
1642 
1643 	rte_sched_port_log_subport_profile(port, *subport_profile_id);
1644 
1645 	return 0;
1646 }
1647 
1648 static inline uint32_t
1649 rte_sched_port_qindex(struct rte_sched_port *port,
1650 	uint32_t subport,
1651 	uint32_t pipe,
1652 	uint32_t traffic_class,
1653 	uint32_t queue)
1654 {
1655 	return ((subport & (port->n_subports_per_port - 1)) <<
1656 		(port->n_pipes_per_subport_log2 + 4)) |
1657 		((pipe &
1658 		(port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) |
1659 		((rte_sched_port_pipe_queue(port, traffic_class) + queue) &
1660 		(RTE_SCHED_QUEUES_PER_PIPE - 1));
1661 }
1662 
1663 void
1664 rte_sched_port_pkt_write(struct rte_sched_port *port,
1665 			 struct rte_mbuf *pkt,
1666 			 uint32_t subport, uint32_t pipe,
1667 			 uint32_t traffic_class,
1668 			 uint32_t queue, enum rte_color color)
1669 {
1670 	uint32_t queue_id =
1671 		rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
1672 
1673 	rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
1674 }
1675 
1676 void
1677 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
1678 				  const struct rte_mbuf *pkt,
1679 				  uint32_t *subport, uint32_t *pipe,
1680 				  uint32_t *traffic_class, uint32_t *queue)
1681 {
1682 	uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1683 
1684 	*subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1685 	*pipe = (queue_id >> 4) &
1686 		(port->subports[*subport]->n_pipes_per_subport_enabled - 1);
1687 	*traffic_class = rte_sched_port_pipe_tc(port, queue_id);
1688 	*queue = rte_sched_port_tc_queue(port, queue_id);
1689 }
1690 
1691 enum rte_color
1692 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
1693 {
1694 	return (enum rte_color)rte_mbuf_sched_color_get(pkt);
1695 }
1696 
1697 int
1698 rte_sched_subport_read_stats(struct rte_sched_port *port,
1699 			     uint32_t subport_id,
1700 			     struct rte_sched_subport_stats *stats,
1701 			     uint32_t *tc_ov)
1702 {
1703 	struct rte_sched_subport *s;
1704 
1705 	/* Check user parameters */
1706 	if (port == NULL) {
1707 		RTE_LOG(ERR, SCHED,
1708 			"%s: Incorrect value for parameter port\n", __func__);
1709 		return -EINVAL;
1710 	}
1711 
1712 	if (subport_id >= port->n_subports_per_port) {
1713 		RTE_LOG(ERR, SCHED,
1714 			"%s: Incorrect value for subport id\n", __func__);
1715 		return -EINVAL;
1716 	}
1717 
1718 	if (stats == NULL) {
1719 		RTE_LOG(ERR, SCHED,
1720 			"%s: Incorrect value for parameter stats\n", __func__);
1721 		return -EINVAL;
1722 	}
1723 
1724 	if (tc_ov == NULL) {
1725 		RTE_LOG(ERR, SCHED,
1726 			"%s: Incorrect value for tc_ov\n", __func__);
1727 		return -EINVAL;
1728 	}
1729 
1730 	s = port->subports[subport_id];
1731 
1732 	/* Copy subport stats and clear */
1733 	memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
1734 	memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
1735 
1736 	/* Subport TC oversubscription status */
1737 	*tc_ov = s->tc_ov;
1738 
1739 	return 0;
1740 }
1741 
1742 int
1743 rte_sched_queue_read_stats(struct rte_sched_port *port,
1744 	uint32_t queue_id,
1745 	struct rte_sched_queue_stats *stats,
1746 	uint16_t *qlen)
1747 {
1748 	struct rte_sched_subport *s;
1749 	struct rte_sched_queue *q;
1750 	struct rte_sched_queue_extra *qe;
1751 	uint32_t subport_id, subport_qmask, subport_qindex;
1752 
1753 	/* Check user parameters */
1754 	if (port == NULL) {
1755 		RTE_LOG(ERR, SCHED,
1756 			"%s: Incorrect value for parameter port\n", __func__);
1757 		return -EINVAL;
1758 	}
1759 
1760 	if (queue_id >= rte_sched_port_queues_per_port(port)) {
1761 		RTE_LOG(ERR, SCHED,
1762 			"%s: Incorrect value for queue id\n", __func__);
1763 		return -EINVAL;
1764 	}
1765 
1766 	if (stats == NULL) {
1767 		RTE_LOG(ERR, SCHED,
1768 			"%s: Incorrect value for parameter stats\n", __func__);
1769 		return -EINVAL;
1770 	}
1771 
1772 	if (qlen == NULL) {
1773 		RTE_LOG(ERR, SCHED,
1774 			"%s: Incorrect value for parameter qlen\n", __func__);
1775 		return -EINVAL;
1776 	}
1777 	subport_qmask = port->n_pipes_per_subport_log2 + 4;
1778 	subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1);
1779 
1780 	s = port->subports[subport_id];
1781 	subport_qindex = ((1 << subport_qmask) - 1) & queue_id;
1782 	q = s->queue + subport_qindex;
1783 	qe = s->queue_extra + subport_qindex;
1784 
1785 	/* Copy queue stats and clear */
1786 	memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
1787 	memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
1788 
1789 	/* Queue length */
1790 	*qlen = q->qw - q->qr;
1791 
1792 	return 0;
1793 }
1794 
1795 #ifdef RTE_SCHED_DEBUG
1796 
1797 static inline int
1798 rte_sched_port_queue_is_empty(struct rte_sched_subport *subport,
1799 	uint32_t qindex)
1800 {
1801 	struct rte_sched_queue *queue = subport->queue + qindex;
1802 
1803 	return queue->qr == queue->qw;
1804 }
1805 
1806 #endif /* RTE_SCHED_DEBUG */
1807 
1808 static inline void
1809 rte_sched_port_update_subport_stats(struct rte_sched_port *port,
1810 	struct rte_sched_subport *subport,
1811 	uint32_t qindex,
1812 	struct rte_mbuf *pkt)
1813 {
1814 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1815 	uint32_t pkt_len = pkt->pkt_len;
1816 
1817 	subport->stats.n_pkts_tc[tc_index] += 1;
1818 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
1819 }
1820 
1821 static inline void
1822 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1823 	struct rte_sched_subport *subport,
1824 	uint32_t qindex,
1825 	struct rte_mbuf *pkt,
1826 	__rte_unused uint32_t n_pkts_cman_dropped)
1827 {
1828 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1829 	uint32_t pkt_len = pkt->pkt_len;
1830 
1831 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
1832 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
1833 	subport->stats.n_pkts_cman_dropped[tc_index] += n_pkts_cman_dropped;
1834 }
1835 
1836 static inline void
1837 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
1838 	uint32_t qindex,
1839 	struct rte_mbuf *pkt)
1840 {
1841 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1842 	uint32_t pkt_len = pkt->pkt_len;
1843 
1844 	qe->stats.n_pkts += 1;
1845 	qe->stats.n_bytes += pkt_len;
1846 }
1847 
1848 static inline void
1849 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1850 	uint32_t qindex,
1851 	struct rte_mbuf *pkt,
1852 	__rte_unused uint32_t n_pkts_cman_dropped)
1853 {
1854 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1855 	uint32_t pkt_len = pkt->pkt_len;
1856 
1857 	qe->stats.n_pkts_dropped += 1;
1858 	qe->stats.n_bytes_dropped += pkt_len;
1859 #ifdef RTE_SCHED_CMAN
1860 	if (subport->cman_enabled)
1861 		qe->stats.n_pkts_cman_dropped += n_pkts_cman_dropped;
1862 #endif
1863 }
1864 
1865 #ifdef RTE_SCHED_CMAN
1866 
1867 static inline int
1868 rte_sched_port_cman_drop(struct rte_sched_port *port,
1869 	struct rte_sched_subport *subport,
1870 	struct rte_mbuf *pkt,
1871 	uint32_t qindex,
1872 	uint16_t qlen)
1873 {
1874 	if (!subport->cman_enabled)
1875 		return 0;
1876 
1877 	struct rte_sched_queue_extra *qe;
1878 	uint32_t tc_index;
1879 
1880 	tc_index = rte_sched_port_pipe_tc(port, qindex);
1881 	qe = subport->queue_extra + qindex;
1882 
1883 	/* RED */
1884 	if (subport->cman == RTE_SCHED_CMAN_RED) {
1885 		struct rte_red_config *red_cfg;
1886 		struct rte_red *red;
1887 		enum rte_color color;
1888 
1889 		color = rte_sched_port_pkt_read_color(pkt);
1890 		red_cfg = &subport->red_config[tc_index][color];
1891 
1892 		if ((red_cfg->min_th | red_cfg->max_th) == 0)
1893 			return 0;
1894 
1895 		red = &qe->red;
1896 
1897 		return rte_red_enqueue(red_cfg, red, qlen, port->time);
1898 	}
1899 
1900 	/* PIE */
1901 	struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index];
1902 	struct rte_pie *pie = &qe->pie;
1903 
1904 	return rte_pie_enqueue(pie_cfg, pie, qlen, pkt->pkt_len, port->time_cpu_cycles);
1905 }
1906 
1907 static inline void
1908 rte_sched_port_red_set_queue_empty_timestamp(struct rte_sched_port *port,
1909 	struct rte_sched_subport *subport, uint32_t qindex)
1910 {
1911 	if (subport->cman_enabled) {
1912 		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1913 		if (subport->cman == RTE_SCHED_CMAN_RED) {
1914 			struct rte_red *red = &qe->red;
1915 
1916 			rte_red_mark_queue_empty(red, port->time);
1917 		}
1918 	}
1919 }
1920 
1921 static inline void
1922 rte_sched_port_pie_dequeue(struct rte_sched_subport *subport,
1923 uint32_t qindex, uint32_t pkt_len, uint64_t time) {
1924 	if (subport->cman_enabled && subport->cman == RTE_SCHED_CMAN_PIE) {
1925 		struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1926 		struct rte_pie *pie = &qe->pie;
1927 
1928 		/* Update queue length */
1929 		pie->qlen -= 1;
1930 		pie->qlen_bytes -= pkt_len;
1931 
1932 		rte_pie_dequeue(pie, pkt_len, time);
1933 	}
1934 }
1935 
1936 #else
1937 
1938 static inline int rte_sched_port_cman_drop(struct rte_sched_port *port __rte_unused,
1939 	struct rte_sched_subport *subport __rte_unused,
1940 	struct rte_mbuf *pkt __rte_unused,
1941 	uint32_t qindex __rte_unused,
1942 	uint16_t qlen __rte_unused)
1943 {
1944 	return 0;
1945 }
1946 
1947 #define rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex)
1948 
1949 static inline void
1950 rte_sched_port_pie_dequeue(struct rte_sched_subport *subport __rte_unused,
1951 	uint32_t qindex __rte_unused,
1952 	uint32_t pkt_len __rte_unused,
1953 	uint64_t time __rte_unused) {
1954 	/* do-nothing when RTE_SCHED_CMAN not defined */
1955 }
1956 
1957 #endif /* RTE_SCHED_CMAN */
1958 
1959 #ifdef RTE_SCHED_DEBUG
1960 
1961 static inline void
1962 debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos,
1963 		       uint64_t bmp_slab)
1964 {
1965 	uint64_t mask;
1966 	uint32_t i, panic;
1967 
1968 	if (bmp_slab == 0)
1969 		rte_panic("Empty slab at position %u\n", bmp_pos);
1970 
1971 	panic = 0;
1972 	for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
1973 		if (mask & bmp_slab) {
1974 			if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) {
1975 				printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
1976 				panic = 1;
1977 			}
1978 		}
1979 	}
1980 
1981 	if (panic)
1982 		rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
1983 			bmp_slab, bmp_pos);
1984 }
1985 
1986 #endif /* RTE_SCHED_DEBUG */
1987 
1988 static inline struct rte_sched_subport *
1989 rte_sched_port_subport(struct rte_sched_port *port,
1990 	struct rte_mbuf *pkt)
1991 {
1992 	uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1993 	uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1994 
1995 	return port->subports[subport_id];
1996 }
1997 
1998 static inline uint32_t
1999 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport,
2000 	struct rte_mbuf *pkt, uint32_t subport_qmask)
2001 {
2002 	struct rte_sched_queue *q;
2003 	struct rte_sched_queue_extra *qe;
2004 	uint32_t qindex = rte_mbuf_sched_queue_get(pkt);
2005 	uint32_t subport_queue_id = subport_qmask & qindex;
2006 
2007 	q = subport->queue + subport_queue_id;
2008 	rte_prefetch0(q);
2009 	qe = subport->queue_extra + subport_queue_id;
2010 	rte_prefetch0(qe);
2011 
2012 	return subport_queue_id;
2013 }
2014 
2015 static inline void
2016 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
2017 	struct rte_sched_subport *subport,
2018 	uint32_t qindex,
2019 	struct rte_mbuf **qbase)
2020 {
2021 	struct rte_sched_queue *q;
2022 	struct rte_mbuf **q_qw;
2023 	uint16_t qsize;
2024 
2025 	q = subport->queue + qindex;
2026 	qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2027 	q_qw = qbase + (q->qw & (qsize - 1));
2028 
2029 	rte_prefetch0(q_qw);
2030 	rte_bitmap_prefetch0(subport->bmp, qindex);
2031 }
2032 
2033 static inline int
2034 rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
2035 	struct rte_sched_subport *subport,
2036 	uint32_t qindex,
2037 	struct rte_mbuf **qbase,
2038 	struct rte_mbuf *pkt)
2039 {
2040 	struct rte_sched_queue *q;
2041 	uint16_t qsize;
2042 	uint16_t qlen;
2043 
2044 	q = subport->queue + qindex;
2045 	qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2046 	qlen = q->qw - q->qr;
2047 
2048 	/* Drop the packet (and update drop stats) when queue is full */
2049 	if (unlikely(rte_sched_port_cman_drop(port, subport, pkt, qindex, qlen) ||
2050 		     (qlen >= qsize))) {
2051 		rte_pktmbuf_free(pkt);
2052 		rte_sched_port_update_subport_stats_on_drop(port, subport,
2053 			qindex, pkt, qlen < qsize);
2054 		rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt,
2055 			qlen < qsize);
2056 		return 0;
2057 	}
2058 
2059 	/* Enqueue packet */
2060 	qbase[q->qw & (qsize - 1)] = pkt;
2061 	q->qw++;
2062 
2063 	/* Activate queue in the subport bitmap */
2064 	rte_bitmap_set(subport->bmp, qindex);
2065 
2066 	/* Statistics */
2067 	rte_sched_port_update_subport_stats(port, subport, qindex, pkt);
2068 	rte_sched_port_update_queue_stats(subport, qindex, pkt);
2069 
2070 	return 1;
2071 }
2072 
2073 
2074 /*
2075  * The enqueue function implements a 4-level pipeline with each stage
2076  * processing two different packets. The purpose of using a pipeline
2077  * is to hide the latency of prefetching the data structures. The
2078  * naming convention is presented in the diagram below:
2079  *
2080  *   p00  _______   p10  _______   p20  _______   p30  _______
2081  * ----->|       |----->|       |----->|       |----->|       |----->
2082  *       |   0   |      |   1   |      |   2   |      |   3   |
2083  * ----->|_______|----->|_______|----->|_______|----->|_______|----->
2084  *   p01            p11            p21            p31
2085  *
2086  */
2087 int
2088 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
2089 		       uint32_t n_pkts)
2090 {
2091 	struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
2092 		*pkt30, *pkt31, *pkt_last;
2093 	struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
2094 		**q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
2095 	struct rte_sched_subport *subport00, *subport01, *subport10, *subport11,
2096 		*subport20, *subport21, *subport30, *subport31, *subport_last;
2097 	uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
2098 	uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
2099 	uint32_t subport_qmask;
2100 	uint32_t result, i;
2101 
2102 	result = 0;
2103 	subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1;
2104 
2105 	/*
2106 	 * Less then 6 input packets available, which is not enough to
2107 	 * feed the pipeline
2108 	 */
2109 	if (unlikely(n_pkts < 6)) {
2110 		struct rte_sched_subport *subports[5];
2111 		struct rte_mbuf **q_base[5];
2112 		uint32_t q[5];
2113 
2114 		/* Prefetch the mbuf structure of each packet */
2115 		for (i = 0; i < n_pkts; i++)
2116 			rte_prefetch0(pkts[i]);
2117 
2118 		/* Prefetch the subport structure for each packet */
2119 		for (i = 0; i < n_pkts; i++)
2120 			subports[i] = rte_sched_port_subport(port, pkts[i]);
2121 
2122 		/* Prefetch the queue structure for each queue */
2123 		for (i = 0; i < n_pkts; i++)
2124 			q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i],
2125 					pkts[i], subport_qmask);
2126 
2127 		/* Prefetch the write pointer location of each queue */
2128 		for (i = 0; i < n_pkts; i++) {
2129 			q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]);
2130 			rte_sched_port_enqueue_qwa_prefetch0(port, subports[i],
2131 				q[i], q_base[i]);
2132 		}
2133 
2134 		/* Write each packet to its queue */
2135 		for (i = 0; i < n_pkts; i++)
2136 			result += rte_sched_port_enqueue_qwa(port, subports[i],
2137 						q[i], q_base[i], pkts[i]);
2138 
2139 		return result;
2140 	}
2141 
2142 	/* Feed the first 3 stages of the pipeline (6 packets needed) */
2143 	pkt20 = pkts[0];
2144 	pkt21 = pkts[1];
2145 	rte_prefetch0(pkt20);
2146 	rte_prefetch0(pkt21);
2147 
2148 	pkt10 = pkts[2];
2149 	pkt11 = pkts[3];
2150 	rte_prefetch0(pkt10);
2151 	rte_prefetch0(pkt11);
2152 
2153 	subport20 = rte_sched_port_subport(port, pkt20);
2154 	subport21 = rte_sched_port_subport(port, pkt21);
2155 	q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20,
2156 			pkt20, subport_qmask);
2157 	q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21,
2158 			pkt21, subport_qmask);
2159 
2160 	pkt00 = pkts[4];
2161 	pkt01 = pkts[5];
2162 	rte_prefetch0(pkt00);
2163 	rte_prefetch0(pkt01);
2164 
2165 	subport10 = rte_sched_port_subport(port, pkt10);
2166 	subport11 = rte_sched_port_subport(port, pkt11);
2167 	q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2168 			pkt10, subport_qmask);
2169 	q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2170 			pkt11, subport_qmask);
2171 
2172 	q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2173 	q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2174 	rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2175 	rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2176 
2177 	/* Run the pipeline */
2178 	for (i = 6; i < (n_pkts & (~1)); i += 2) {
2179 		/* Propagate stage inputs */
2180 		pkt30 = pkt20;
2181 		pkt31 = pkt21;
2182 		pkt20 = pkt10;
2183 		pkt21 = pkt11;
2184 		pkt10 = pkt00;
2185 		pkt11 = pkt01;
2186 		q30 = q20;
2187 		q31 = q21;
2188 		q20 = q10;
2189 		q21 = q11;
2190 		subport30 = subport20;
2191 		subport31 = subport21;
2192 		subport20 = subport10;
2193 		subport21 = subport11;
2194 		q30_base = q20_base;
2195 		q31_base = q21_base;
2196 
2197 		/* Stage 0: Get packets in */
2198 		pkt00 = pkts[i];
2199 		pkt01 = pkts[i + 1];
2200 		rte_prefetch0(pkt00);
2201 		rte_prefetch0(pkt01);
2202 
2203 		/* Stage 1: Prefetch subport and queue structure storing queue pointers */
2204 		subport10 = rte_sched_port_subport(port, pkt10);
2205 		subport11 = rte_sched_port_subport(port, pkt11);
2206 		q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2207 				pkt10, subport_qmask);
2208 		q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2209 				pkt11, subport_qmask);
2210 
2211 		/* Stage 2: Prefetch queue write location */
2212 		q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2213 		q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2214 		rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2215 		rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2216 
2217 		/* Stage 3: Write packet to queue and activate queue */
2218 		r30 = rte_sched_port_enqueue_qwa(port, subport30,
2219 				q30, q30_base, pkt30);
2220 		r31 = rte_sched_port_enqueue_qwa(port, subport31,
2221 				q31, q31_base, pkt31);
2222 		result += r30 + r31;
2223 	}
2224 
2225 	/*
2226 	 * Drain the pipeline (exactly 6 packets).
2227 	 * Handle the last packet in the case
2228 	 * of an odd number of input packets.
2229 	 */
2230 	pkt_last = pkts[n_pkts - 1];
2231 	rte_prefetch0(pkt_last);
2232 
2233 	subport00 = rte_sched_port_subport(port, pkt00);
2234 	subport01 = rte_sched_port_subport(port, pkt01);
2235 	q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00,
2236 			pkt00, subport_qmask);
2237 	q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01,
2238 			pkt01, subport_qmask);
2239 
2240 	q10_base = rte_sched_subport_pipe_qbase(subport10, q10);
2241 	q11_base = rte_sched_subport_pipe_qbase(subport11, q11);
2242 	rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base);
2243 	rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base);
2244 
2245 	r20 = rte_sched_port_enqueue_qwa(port, subport20,
2246 			q20, q20_base, pkt20);
2247 	r21 = rte_sched_port_enqueue_qwa(port, subport21,
2248 			q21, q21_base, pkt21);
2249 	result += r20 + r21;
2250 
2251 	subport_last = rte_sched_port_subport(port, pkt_last);
2252 	q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last,
2253 				pkt_last, subport_qmask);
2254 
2255 	q00_base = rte_sched_subport_pipe_qbase(subport00, q00);
2256 	q01_base = rte_sched_subport_pipe_qbase(subport01, q01);
2257 	rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base);
2258 	rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base);
2259 
2260 	r10 = rte_sched_port_enqueue_qwa(port, subport10, q10,
2261 			q10_base, pkt10);
2262 	r11 = rte_sched_port_enqueue_qwa(port, subport11, q11,
2263 			q11_base, pkt11);
2264 	result += r10 + r11;
2265 
2266 	q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last);
2267 	rte_sched_port_enqueue_qwa_prefetch0(port, subport_last,
2268 		q_last, q_last_base);
2269 
2270 	r00 = rte_sched_port_enqueue_qwa(port, subport00, q00,
2271 			q00_base, pkt00);
2272 	r01 = rte_sched_port_enqueue_qwa(port, subport01, q01,
2273 			q01_base, pkt01);
2274 	result += r00 + r01;
2275 
2276 	if (n_pkts & 1) {
2277 		r_last = rte_sched_port_enqueue_qwa(port, subport_last,
2278 					q_last,	q_last_base, pkt_last);
2279 		result += r_last;
2280 	}
2281 
2282 	return result;
2283 }
2284 
2285 static inline uint64_t
2286 grinder_tc_ov_credits_update(struct rte_sched_port *port,
2287 	struct rte_sched_subport *subport, uint32_t pos)
2288 {
2289 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2290 	struct rte_sched_subport_profile *sp = grinder->subport_params;
2291 	uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2292 	uint64_t tc_consumption = 0, tc_ov_consumption_max;
2293 	uint64_t tc_ov_wm = subport->tc_ov_wm;
2294 	uint32_t i;
2295 
2296 	if (subport->tc_ov == 0)
2297 		return subport->tc_ov_wm_max;
2298 
2299 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2300 		tc_ov_consumption[i] = sp->tc_credits_per_period[i]
2301 					-  subport->tc_credits[i];
2302 		tc_consumption += tc_ov_consumption[i];
2303 	}
2304 
2305 	tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
2306 	sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2307 		subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
2308 
2309 	tc_ov_consumption_max =
2310 	sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2311 			tc_consumption;
2312 
2313 	if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
2314 		(tc_ov_consumption_max - port->mtu)) {
2315 		tc_ov_wm  -= tc_ov_wm >> 7;
2316 		if (tc_ov_wm < subport->tc_ov_wm_min)
2317 			tc_ov_wm = subport->tc_ov_wm_min;
2318 
2319 		return tc_ov_wm;
2320 	}
2321 
2322 	tc_ov_wm += (tc_ov_wm >> 7) + 1;
2323 	if (tc_ov_wm > subport->tc_ov_wm_max)
2324 		tc_ov_wm = subport->tc_ov_wm_max;
2325 
2326 	return tc_ov_wm;
2327 }
2328 
2329 static inline void
2330 grinder_credits_update(struct rte_sched_port *port,
2331 	struct rte_sched_subport *subport, uint32_t pos)
2332 {
2333 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2334 	struct rte_sched_pipe *pipe = grinder->pipe;
2335 	struct rte_sched_pipe_profile *params = grinder->pipe_params;
2336 	struct rte_sched_subport_profile *sp = grinder->subport_params;
2337 	uint64_t n_periods;
2338 	uint32_t i;
2339 
2340 	/* Subport TB */
2341 	n_periods = (port->time - subport->tb_time) / sp->tb_period;
2342 	subport->tb_credits += n_periods * sp->tb_credits_per_period;
2343 	subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2344 	subport->tb_time += n_periods * sp->tb_period;
2345 
2346 	/* Pipe TB */
2347 	n_periods = (port->time - pipe->tb_time) / params->tb_period;
2348 	pipe->tb_credits += n_periods * params->tb_credits_per_period;
2349 	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2350 	pipe->tb_time += n_periods * params->tb_period;
2351 
2352 	/* Subport TCs */
2353 	if (unlikely(port->time >= subport->tc_time)) {
2354 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2355 			subport->tc_credits[i] = sp->tc_credits_per_period[i];
2356 
2357 		subport->tc_time = port->time + sp->tc_period;
2358 	}
2359 
2360 	/* Pipe TCs */
2361 	if (unlikely(port->time >= pipe->tc_time)) {
2362 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2363 			pipe->tc_credits[i] = params->tc_credits_per_period[i];
2364 		pipe->tc_time = port->time + params->tc_period;
2365 	}
2366 }
2367 
2368 static inline void
2369 grinder_credits_update_with_tc_ov(struct rte_sched_port *port,
2370 	struct rte_sched_subport *subport, uint32_t pos)
2371 {
2372 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2373 	struct rte_sched_pipe *pipe = grinder->pipe;
2374 	struct rte_sched_pipe_profile *params = grinder->pipe_params;
2375 	struct rte_sched_subport_profile *sp = grinder->subport_params;
2376 	uint64_t n_periods;
2377 	uint32_t i;
2378 
2379 	/* Subport TB */
2380 	n_periods = (port->time - subport->tb_time) / sp->tb_period;
2381 	subport->tb_credits += n_periods * sp->tb_credits_per_period;
2382 	subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2383 	subport->tb_time += n_periods * sp->tb_period;
2384 
2385 	/* Pipe TB */
2386 	n_periods = (port->time - pipe->tb_time) / params->tb_period;
2387 	pipe->tb_credits += n_periods * params->tb_credits_per_period;
2388 	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2389 	pipe->tb_time += n_periods * params->tb_period;
2390 
2391 	/* Subport TCs */
2392 	if (unlikely(port->time >= subport->tc_time)) {
2393 		subport->tc_ov_wm =
2394 			grinder_tc_ov_credits_update(port, subport, pos);
2395 
2396 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2397 			subport->tc_credits[i] = sp->tc_credits_per_period[i];
2398 
2399 		subport->tc_time = port->time + sp->tc_period;
2400 		subport->tc_ov_period_id++;
2401 	}
2402 
2403 	/* Pipe TCs */
2404 	if (unlikely(port->time >= pipe->tc_time)) {
2405 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2406 			pipe->tc_credits[i] = params->tc_credits_per_period[i];
2407 		pipe->tc_time = port->time + params->tc_period;
2408 	}
2409 
2410 	/* Pipe TCs - Oversubscription */
2411 	if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
2412 		pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
2413 
2414 		pipe->tc_ov_period_id = subport->tc_ov_period_id;
2415 	}
2416 }
2417 
2418 static inline int
2419 grinder_credits_check(struct rte_sched_port *port,
2420 	struct rte_sched_subport *subport, uint32_t pos)
2421 {
2422 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2423 	struct rte_sched_pipe *pipe = grinder->pipe;
2424 	struct rte_mbuf *pkt = grinder->pkt;
2425 	uint32_t tc_index = grinder->tc_index;
2426 	uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2427 	uint64_t subport_tb_credits = subport->tb_credits;
2428 	uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2429 	uint64_t pipe_tb_credits = pipe->tb_credits;
2430 	uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2431 	int enough_credits;
2432 
2433 	/* Check pipe and subport credits */
2434 	enough_credits = (pkt_len <= subport_tb_credits) &&
2435 		(pkt_len <= subport_tc_credits) &&
2436 		(pkt_len <= pipe_tb_credits) &&
2437 		(pkt_len <= pipe_tc_credits);
2438 
2439 	if (!enough_credits)
2440 		return 0;
2441 
2442 	/* Update pipe and subport credits */
2443 	subport->tb_credits -= pkt_len;
2444 	subport->tc_credits[tc_index] -= pkt_len;
2445 	pipe->tb_credits -= pkt_len;
2446 	pipe->tc_credits[tc_index] -= pkt_len;
2447 
2448 	return 1;
2449 }
2450 
2451 static inline int
2452 grinder_credits_check_with_tc_ov(struct rte_sched_port *port,
2453 	struct rte_sched_subport *subport, uint32_t pos)
2454 {
2455 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2456 	struct rte_sched_pipe *pipe = grinder->pipe;
2457 	struct rte_mbuf *pkt = grinder->pkt;
2458 	uint32_t tc_index = grinder->tc_index;
2459 	uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2460 	uint64_t subport_tb_credits = subport->tb_credits;
2461 	uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2462 	uint64_t pipe_tb_credits = pipe->tb_credits;
2463 	uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2464 	uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2465 	uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
2466 	uint64_t pipe_tc_ov_credits;
2467 	uint32_t i;
2468 	int enough_credits;
2469 
2470 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2471 		pipe_tc_ov_mask1[i] = ~0LLU;
2472 
2473 	pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
2474 	pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU;
2475 	pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
2476 
2477 	/* Check pipe and subport credits */
2478 	enough_credits = (pkt_len <= subport_tb_credits) &&
2479 		(pkt_len <= subport_tc_credits) &&
2480 		(pkt_len <= pipe_tb_credits) &&
2481 		(pkt_len <= pipe_tc_credits) &&
2482 		(pkt_len <= pipe_tc_ov_credits);
2483 
2484 	if (!enough_credits)
2485 		return 0;
2486 
2487 	/* Update pipe and subport credits */
2488 	subport->tb_credits -= pkt_len;
2489 	subport->tc_credits[tc_index] -= pkt_len;
2490 	pipe->tb_credits -= pkt_len;
2491 	pipe->tc_credits[tc_index] -= pkt_len;
2492 	pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
2493 
2494 	return 1;
2495 }
2496 
2497 
2498 static inline int
2499 grinder_schedule(struct rte_sched_port *port,
2500 	struct rte_sched_subport *subport, uint32_t pos)
2501 {
2502 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2503 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
2504 	uint32_t qindex = grinder->qindex[grinder->qpos];
2505 	struct rte_mbuf *pkt = grinder->pkt;
2506 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2507 	uint32_t be_tc_active;
2508 
2509 	if (subport->tc_ov_enabled) {
2510 		if (!grinder_credits_check_with_tc_ov(port, subport, pos))
2511 			return 0;
2512 	} else {
2513 		if (!grinder_credits_check(port, subport, pos))
2514 			return 0;
2515 	}
2516 
2517 	/* Advance port time */
2518 	port->time += pkt_len;
2519 
2520 	/* Send packet */
2521 	port->pkts_out[port->n_pkts_out++] = pkt;
2522 	queue->qr++;
2523 
2524 	be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
2525 	grinder->wrr_tokens[grinder->qpos] +=
2526 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
2527 
2528 	if (queue->qr == queue->qw) {
2529 		rte_bitmap_clear(subport->bmp, qindex);
2530 		grinder->qmask &= ~(1 << grinder->qpos);
2531 		if (be_tc_active)
2532 			grinder->wrr_mask[grinder->qpos] = 0;
2533 
2534 		rte_sched_port_red_set_queue_empty_timestamp(port, subport, qindex);
2535 	}
2536 
2537 	rte_sched_port_pie_dequeue(subport, qindex, pkt_len, port->time_cpu_cycles);
2538 
2539 	/* Reset pipe loop detection */
2540 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2541 	grinder->productive = 1;
2542 
2543 	return 1;
2544 }
2545 
2546 static inline int
2547 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2548 {
2549 	uint32_t i;
2550 
2551 	for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
2552 		if (subport->grinder_base_bmp_pos[i] == base_pipe)
2553 			return 1;
2554 	}
2555 
2556 	return 0;
2557 }
2558 
2559 static inline void
2560 grinder_pcache_populate(struct rte_sched_subport *subport,
2561 	uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
2562 {
2563 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2564 	uint16_t w[4];
2565 
2566 	grinder->pcache_w = 0;
2567 	grinder->pcache_r = 0;
2568 
2569 	w[0] = (uint16_t) bmp_slab;
2570 	w[1] = (uint16_t) (bmp_slab >> 16);
2571 	w[2] = (uint16_t) (bmp_slab >> 32);
2572 	w[3] = (uint16_t) (bmp_slab >> 48);
2573 
2574 	grinder->pcache_qmask[grinder->pcache_w] = w[0];
2575 	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
2576 	grinder->pcache_w += (w[0] != 0);
2577 
2578 	grinder->pcache_qmask[grinder->pcache_w] = w[1];
2579 	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
2580 	grinder->pcache_w += (w[1] != 0);
2581 
2582 	grinder->pcache_qmask[grinder->pcache_w] = w[2];
2583 	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
2584 	grinder->pcache_w += (w[2] != 0);
2585 
2586 	grinder->pcache_qmask[grinder->pcache_w] = w[3];
2587 	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
2588 	grinder->pcache_w += (w[3] != 0);
2589 }
2590 
2591 static inline void
2592 grinder_tccache_populate(struct rte_sched_subport *subport,
2593 	uint32_t pos, uint32_t qindex, uint16_t qmask)
2594 {
2595 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2596 	uint8_t b, i;
2597 
2598 	grinder->tccache_w = 0;
2599 	grinder->tccache_r = 0;
2600 
2601 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2602 		b = (uint8_t) ((qmask >> i) & 0x1);
2603 		grinder->tccache_qmask[grinder->tccache_w] = b;
2604 		grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
2605 		grinder->tccache_w += (b != 0);
2606 	}
2607 
2608 	b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
2609 	grinder->tccache_qmask[grinder->tccache_w] = b;
2610 	grinder->tccache_qindex[grinder->tccache_w] = qindex +
2611 		RTE_SCHED_TRAFFIC_CLASS_BE;
2612 	grinder->tccache_w += (b != 0);
2613 }
2614 
2615 static inline int
2616 grinder_next_tc(struct rte_sched_port *port,
2617 	struct rte_sched_subport *subport, uint32_t pos)
2618 {
2619 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2620 	struct rte_mbuf **qbase;
2621 	uint32_t qindex;
2622 	uint16_t qsize;
2623 
2624 	if (grinder->tccache_r == grinder->tccache_w)
2625 		return 0;
2626 
2627 	qindex = grinder->tccache_qindex[grinder->tccache_r];
2628 	qbase = rte_sched_subport_pipe_qbase(subport, qindex);
2629 	qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2630 
2631 	grinder->tc_index = rte_sched_port_pipe_tc(port, qindex);
2632 	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
2633 	grinder->qsize = qsize;
2634 
2635 	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2636 		grinder->queue[0] = subport->queue + qindex;
2637 		grinder->qbase[0] = qbase;
2638 		grinder->qindex[0] = qindex;
2639 		grinder->tccache_r++;
2640 
2641 		return 1;
2642 	}
2643 
2644 	grinder->queue[0] = subport->queue + qindex;
2645 	grinder->queue[1] = subport->queue + qindex + 1;
2646 	grinder->queue[2] = subport->queue + qindex + 2;
2647 	grinder->queue[3] = subport->queue + qindex + 3;
2648 
2649 	grinder->qbase[0] = qbase;
2650 	grinder->qbase[1] = qbase + qsize;
2651 	grinder->qbase[2] = qbase + 2 * qsize;
2652 	grinder->qbase[3] = qbase + 3 * qsize;
2653 
2654 	grinder->qindex[0] = qindex;
2655 	grinder->qindex[1] = qindex + 1;
2656 	grinder->qindex[2] = qindex + 2;
2657 	grinder->qindex[3] = qindex + 3;
2658 
2659 	grinder->tccache_r++;
2660 	return 1;
2661 }
2662 
2663 static inline int
2664 grinder_next_pipe(struct rte_sched_port *port,
2665 	struct rte_sched_subport *subport, uint32_t pos)
2666 {
2667 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2668 	uint32_t pipe_qindex;
2669 	uint16_t pipe_qmask;
2670 
2671 	if (grinder->pcache_r < grinder->pcache_w) {
2672 		pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
2673 		pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
2674 		grinder->pcache_r++;
2675 	} else {
2676 		uint64_t bmp_slab = 0;
2677 		uint32_t bmp_pos = 0;
2678 
2679 		/* Get another non-empty pipe group */
2680 		if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0))
2681 			return 0;
2682 
2683 #ifdef RTE_SCHED_DEBUG
2684 		debug_check_queue_slab(subport, bmp_pos, bmp_slab);
2685 #endif
2686 
2687 		/* Return if pipe group already in one of the other grinders */
2688 		subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
2689 		if (unlikely(grinder_pipe_exists(subport, bmp_pos)))
2690 			return 0;
2691 
2692 		subport->grinder_base_bmp_pos[pos] = bmp_pos;
2693 
2694 		/* Install new pipe group into grinder's pipe cache */
2695 		grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab);
2696 
2697 		pipe_qmask = grinder->pcache_qmask[0];
2698 		pipe_qindex = grinder->pcache_qindex[0];
2699 		grinder->pcache_r = 1;
2700 	}
2701 
2702 	/* Install new pipe in the grinder */
2703 	grinder->pindex = pipe_qindex >> 4;
2704 	grinder->subport = subport;
2705 	grinder->pipe = subport->pipe + grinder->pindex;
2706 	grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
2707 	grinder->productive = 0;
2708 
2709 	grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask);
2710 	grinder_next_tc(port, subport, pos);
2711 
2712 	/* Check for pipe exhaustion */
2713 	if (grinder->pindex == subport->pipe_loop) {
2714 		subport->pipe_exhaustion = 1;
2715 		subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2716 	}
2717 
2718 	return 1;
2719 }
2720 
2721 
2722 static inline void
2723 grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos)
2724 {
2725 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2726 	struct rte_sched_pipe *pipe = grinder->pipe;
2727 	struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
2728 	uint32_t qmask = grinder->qmask;
2729 
2730 	grinder->wrr_tokens[0] =
2731 		((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
2732 	grinder->wrr_tokens[1] =
2733 		((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
2734 	grinder->wrr_tokens[2] =
2735 		((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
2736 	grinder->wrr_tokens[3] =
2737 		((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
2738 
2739 	grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
2740 	grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
2741 	grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
2742 	grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
2743 
2744 	grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
2745 	grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
2746 	grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
2747 	grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
2748 }
2749 
2750 static inline void
2751 grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos)
2752 {
2753 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2754 	struct rte_sched_pipe *pipe = grinder->pipe;
2755 
2756 	pipe->wrr_tokens[0] =
2757 			(grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
2758 				RTE_SCHED_WRR_SHIFT;
2759 	pipe->wrr_tokens[1] =
2760 			(grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
2761 				RTE_SCHED_WRR_SHIFT;
2762 	pipe->wrr_tokens[2] =
2763 			(grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
2764 				RTE_SCHED_WRR_SHIFT;
2765 	pipe->wrr_tokens[3] =
2766 			(grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
2767 				RTE_SCHED_WRR_SHIFT;
2768 }
2769 
2770 static inline void
2771 grinder_wrr(struct rte_sched_subport *subport, uint32_t pos)
2772 {
2773 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2774 	uint16_t wrr_tokens_min;
2775 
2776 	grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
2777 	grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
2778 	grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
2779 	grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
2780 
2781 	grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
2782 	wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
2783 
2784 	grinder->wrr_tokens[0] -= wrr_tokens_min;
2785 	grinder->wrr_tokens[1] -= wrr_tokens_min;
2786 	grinder->wrr_tokens[2] -= wrr_tokens_min;
2787 	grinder->wrr_tokens[3] -= wrr_tokens_min;
2788 }
2789 
2790 
2791 #define grinder_evict(subport, pos)
2792 
2793 static inline void
2794 grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos)
2795 {
2796 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2797 
2798 	rte_prefetch0(grinder->pipe);
2799 	rte_prefetch0(grinder->queue[0]);
2800 }
2801 
2802 static inline void
2803 grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos)
2804 {
2805 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2806 	uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
2807 
2808 	qsize = grinder->qsize;
2809 	grinder->qpos = 0;
2810 
2811 	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2812 		qr[0] = grinder->queue[0]->qr & (qsize - 1);
2813 
2814 		rte_prefetch0(grinder->qbase[0] + qr[0]);
2815 		return;
2816 	}
2817 
2818 	qr[0] = grinder->queue[0]->qr & (qsize - 1);
2819 	qr[1] = grinder->queue[1]->qr & (qsize - 1);
2820 	qr[2] = grinder->queue[2]->qr & (qsize - 1);
2821 	qr[3] = grinder->queue[3]->qr & (qsize - 1);
2822 
2823 	rte_prefetch0(grinder->qbase[0] + qr[0]);
2824 	rte_prefetch0(grinder->qbase[1] + qr[1]);
2825 
2826 	grinder_wrr_load(subport, pos);
2827 	grinder_wrr(subport, pos);
2828 
2829 	rte_prefetch0(grinder->qbase[2] + qr[2]);
2830 	rte_prefetch0(grinder->qbase[3] + qr[3]);
2831 }
2832 
2833 static inline void
2834 grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos)
2835 {
2836 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2837 	uint32_t qpos = grinder->qpos;
2838 	struct rte_mbuf **qbase = grinder->qbase[qpos];
2839 	uint16_t qsize = grinder->qsize;
2840 	uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
2841 
2842 	grinder->pkt = qbase[qr];
2843 	rte_prefetch0(grinder->pkt);
2844 
2845 	if (unlikely((qr & 0x7) == 7)) {
2846 		uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
2847 
2848 		rte_prefetch0(qbase + qr_next);
2849 	}
2850 }
2851 
2852 static inline uint32_t
2853 grinder_handle(struct rte_sched_port *port,
2854 	struct rte_sched_subport *subport, uint32_t pos)
2855 {
2856 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2857 
2858 	switch (grinder->state) {
2859 	case e_GRINDER_PREFETCH_PIPE:
2860 	{
2861 		if (grinder_next_pipe(port, subport, pos)) {
2862 			grinder_prefetch_pipe(subport, pos);
2863 			subport->busy_grinders++;
2864 
2865 			grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2866 			return 0;
2867 		}
2868 
2869 		return 0;
2870 	}
2871 
2872 	case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
2873 	{
2874 		struct rte_sched_pipe *pipe = grinder->pipe;
2875 
2876 		grinder->pipe_params = subport->pipe_profiles + pipe->profile;
2877 		grinder->subport_params = port->subport_profiles +
2878 						subport->profile;
2879 
2880 		grinder_prefetch_tc_queue_arrays(subport, pos);
2881 
2882 		if (subport->tc_ov_enabled)
2883 			grinder_credits_update_with_tc_ov(port, subport, pos);
2884 		else
2885 			grinder_credits_update(port, subport, pos);
2886 
2887 		grinder->state = e_GRINDER_PREFETCH_MBUF;
2888 		return 0;
2889 	}
2890 
2891 	case e_GRINDER_PREFETCH_MBUF:
2892 	{
2893 		grinder_prefetch_mbuf(subport, pos);
2894 
2895 		grinder->state = e_GRINDER_READ_MBUF;
2896 		return 0;
2897 	}
2898 
2899 	case e_GRINDER_READ_MBUF:
2900 	{
2901 		uint32_t wrr_active, result = 0;
2902 
2903 		result = grinder_schedule(port, subport, pos);
2904 
2905 		wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
2906 
2907 		/* Look for next packet within the same TC */
2908 		if (result && grinder->qmask) {
2909 			if (wrr_active)
2910 				grinder_wrr(subport, pos);
2911 
2912 			grinder_prefetch_mbuf(subport, pos);
2913 
2914 			return 1;
2915 		}
2916 
2917 		if (wrr_active)
2918 			grinder_wrr_store(subport, pos);
2919 
2920 		/* Look for another active TC within same pipe */
2921 		if (grinder_next_tc(port, subport, pos)) {
2922 			grinder_prefetch_tc_queue_arrays(subport, pos);
2923 
2924 			grinder->state = e_GRINDER_PREFETCH_MBUF;
2925 			return result;
2926 		}
2927 
2928 		if (grinder->productive == 0 &&
2929 		    subport->pipe_loop == RTE_SCHED_PIPE_INVALID)
2930 			subport->pipe_loop = grinder->pindex;
2931 
2932 		grinder_evict(subport, pos);
2933 
2934 		/* Look for another active pipe */
2935 		if (grinder_next_pipe(port, subport, pos)) {
2936 			grinder_prefetch_pipe(subport, pos);
2937 
2938 			grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2939 			return result;
2940 		}
2941 
2942 		/* No active pipe found */
2943 		subport->busy_grinders--;
2944 
2945 		grinder->state = e_GRINDER_PREFETCH_PIPE;
2946 		return result;
2947 	}
2948 
2949 	default:
2950 		rte_panic("Algorithmic error (invalid state)\n");
2951 		return 0;
2952 	}
2953 }
2954 
2955 static inline void
2956 rte_sched_port_time_resync(struct rte_sched_port *port)
2957 {
2958 	uint64_t cycles = rte_get_tsc_cycles();
2959 	uint64_t cycles_diff;
2960 	uint64_t bytes_diff;
2961 	uint32_t i;
2962 
2963 	if (cycles < port->time_cpu_cycles)
2964 		port->time_cpu_cycles = 0;
2965 
2966 	cycles_diff = cycles - port->time_cpu_cycles;
2967 	/* Compute elapsed time in bytes */
2968 	bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
2969 					   port->inv_cycles_per_byte);
2970 
2971 	/* Advance port time */
2972 	port->time_cpu_cycles +=
2973 		(bytes_diff * port->cycles_per_byte) >> RTE_SCHED_TIME_SHIFT;
2974 	port->time_cpu_bytes += bytes_diff;
2975 	if (port->time < port->time_cpu_bytes)
2976 		port->time = port->time_cpu_bytes;
2977 
2978 	/* Reset pipe loop detection */
2979 	for (i = 0; i < port->n_subports_per_port; i++)
2980 		port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID;
2981 }
2982 
2983 static inline int
2984 rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass)
2985 {
2986 	int exceptions;
2987 
2988 	/* Check if any exception flag is set */
2989 	exceptions = (second_pass && subport->busy_grinders == 0) ||
2990 		(subport->pipe_exhaustion == 1);
2991 
2992 	/* Clear exception flags */
2993 	subport->pipe_exhaustion = 0;
2994 
2995 	return exceptions;
2996 }
2997 
2998 int
2999 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
3000 {
3001 	struct rte_sched_subport *subport;
3002 	uint32_t subport_id = port->subport_id;
3003 	uint32_t i, n_subports = 0, count;
3004 
3005 	port->pkts_out = pkts;
3006 	port->n_pkts_out = 0;
3007 
3008 	rte_sched_port_time_resync(port);
3009 
3010 	/* Take each queue in the grinder one step further */
3011 	for (i = 0, count = 0; ; i++)  {
3012 		subport = port->subports[subport_id];
3013 
3014 		count += grinder_handle(port, subport,
3015 				i & (RTE_SCHED_PORT_N_GRINDERS - 1));
3016 
3017 		if (count == n_pkts) {
3018 			subport_id++;
3019 
3020 			if (subport_id == port->n_subports_per_port)
3021 				subport_id = 0;
3022 
3023 			port->subport_id = subport_id;
3024 			break;
3025 		}
3026 
3027 		if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) {
3028 			i = 0;
3029 			subport_id++;
3030 			n_subports++;
3031 		}
3032 
3033 		if (subport_id == port->n_subports_per_port)
3034 			subport_id = 0;
3035 
3036 		if (n_subports == port->n_subports_per_port) {
3037 			port->subport_id = subport_id;
3038 			break;
3039 		}
3040 	}
3041 
3042 	return count;
3043 }
3044