xref: /dpdk/lib/sched/rte_sched.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <string.h>
7 
8 #include <rte_common.h>
9 #include <rte_log.h>
10 #include <rte_memory.h>
11 #include <rte_malloc.h>
12 #include <rte_cycles.h>
13 #include <rte_prefetch.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_mbuf.h>
16 #include <rte_bitmap.h>
17 #include <rte_reciprocal.h>
18 
19 #include "rte_sched.h"
20 #include "rte_sched_common.h"
21 #include "rte_approx.h"
22 
23 #ifdef __INTEL_COMPILER
24 #pragma warning(disable:2259) /* conversion may lose significant bits */
25 #endif
26 
27 #ifdef RTE_SCHED_VECTOR
28 #include <rte_vect.h>
29 
30 #ifdef RTE_ARCH_X86
31 #define SCHED_VECTOR_SSE4
32 #elif defined(__ARM_NEON)
33 #define SCHED_VECTOR_NEON
34 #endif
35 
36 #endif
37 
38 #define RTE_SCHED_TB_RATE_CONFIG_ERR          (1e-7)
39 #define RTE_SCHED_WRR_SHIFT                   3
40 #define RTE_SCHED_MAX_QUEUES_PER_TC           RTE_SCHED_BE_QUEUES_PER_PIPE
41 #define RTE_SCHED_GRINDER_PCACHE_SIZE         (64 / RTE_SCHED_QUEUES_PER_PIPE)
42 #define RTE_SCHED_PIPE_INVALID                UINT32_MAX
43 #define RTE_SCHED_BMP_POS_INVALID             UINT32_MAX
44 
45 /* Scaling for cycles_per_byte calculation
46  * Chosen so that minimum rate is 480 bit/sec
47  */
48 #define RTE_SCHED_TIME_SHIFT		      8
49 
50 struct rte_sched_pipe_profile {
51 	/* Token bucket (TB) */
52 	uint64_t tb_period;
53 	uint64_t tb_credits_per_period;
54 	uint64_t tb_size;
55 
56 	/* Pipe traffic classes */
57 	uint64_t tc_period;
58 	uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
59 	uint8_t tc_ov_weight;
60 
61 	/* Pipe best-effort traffic class queues */
62 	uint8_t  wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
63 };
64 
65 struct rte_sched_pipe {
66 	/* Token bucket (TB) */
67 	uint64_t tb_time; /* time of last update */
68 	uint64_t tb_credits;
69 
70 	/* Pipe profile and flags */
71 	uint32_t profile;
72 
73 	/* Traffic classes (TCs) */
74 	uint64_t tc_time; /* time of next update */
75 	uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
76 
77 	/* Weighted Round Robin (WRR) */
78 	uint8_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
79 
80 	/* TC oversubscription */
81 	uint64_t tc_ov_credits;
82 	uint8_t tc_ov_period_id;
83 } __rte_cache_aligned;
84 
85 struct rte_sched_queue {
86 	uint16_t qw;
87 	uint16_t qr;
88 };
89 
90 struct rte_sched_queue_extra {
91 	struct rte_sched_queue_stats stats;
92 #ifdef RTE_SCHED_RED
93 	struct rte_red red;
94 #endif
95 };
96 
97 enum grinder_state {
98 	e_GRINDER_PREFETCH_PIPE = 0,
99 	e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS,
100 	e_GRINDER_PREFETCH_MBUF,
101 	e_GRINDER_READ_MBUF
102 };
103 
104 struct rte_sched_subport_profile {
105 	/* Token bucket (TB) */
106 	uint64_t tb_period;
107 	uint64_t tb_credits_per_period;
108 	uint64_t tb_size;
109 
110 	uint64_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
111 	uint64_t tc_period;
112 };
113 
114 struct rte_sched_grinder {
115 	/* Pipe cache */
116 	uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE];
117 	uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE];
118 	uint32_t pcache_w;
119 	uint32_t pcache_r;
120 
121 	/* Current pipe */
122 	enum grinder_state state;
123 	uint32_t productive;
124 	uint32_t pindex;
125 	struct rte_sched_subport *subport;
126 	struct rte_sched_subport_profile *subport_params;
127 	struct rte_sched_pipe *pipe;
128 	struct rte_sched_pipe_profile *pipe_params;
129 
130 	/* TC cache */
131 	uint8_t tccache_qmask[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
132 	uint32_t tccache_qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
133 	uint32_t tccache_w;
134 	uint32_t tccache_r;
135 
136 	/* Current TC */
137 	uint32_t tc_index;
138 	struct rte_sched_queue *queue[RTE_SCHED_MAX_QUEUES_PER_TC];
139 	struct rte_mbuf **qbase[RTE_SCHED_MAX_QUEUES_PER_TC];
140 	uint32_t qindex[RTE_SCHED_MAX_QUEUES_PER_TC];
141 	uint16_t qsize;
142 	uint32_t qmask;
143 	uint32_t qpos;
144 	struct rte_mbuf *pkt;
145 
146 	/* WRR */
147 	uint16_t wrr_tokens[RTE_SCHED_BE_QUEUES_PER_PIPE];
148 	uint16_t wrr_mask[RTE_SCHED_BE_QUEUES_PER_PIPE];
149 	uint8_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
150 };
151 
152 struct rte_sched_subport {
153 	/* Token bucket (TB) */
154 	uint64_t tb_time; /* time of last update */
155 	uint64_t tb_credits;
156 
157 	/* Traffic classes (TCs) */
158 	uint64_t tc_time; /* time of next update */
159 	uint64_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
160 
161 	/* TC oversubscription */
162 	uint64_t tc_ov_wm;
163 	uint64_t tc_ov_wm_min;
164 	uint64_t tc_ov_wm_max;
165 	uint8_t tc_ov_period_id;
166 	uint8_t tc_ov;
167 	uint32_t tc_ov_n;
168 	double tc_ov_rate;
169 
170 	/* Statistics */
171 	struct rte_sched_subport_stats stats __rte_cache_aligned;
172 
173 	/* subport profile */
174 	uint32_t profile;
175 	/* Subport pipes */
176 	uint32_t n_pipes_per_subport_enabled;
177 	uint32_t n_pipe_profiles;
178 	uint32_t n_max_pipe_profiles;
179 
180 	/* Pipe best-effort TC rate */
181 	uint64_t pipe_tc_be_rate_max;
182 
183 	/* Pipe queues size */
184 	uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
185 
186 #ifdef RTE_SCHED_RED
187 	struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
188 #endif
189 
190 	/* Scheduling loop detection */
191 	uint32_t pipe_loop;
192 	uint32_t pipe_exhaustion;
193 
194 	/* Bitmap */
195 	struct rte_bitmap *bmp;
196 	uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16;
197 
198 	/* Grinders */
199 	struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS];
200 	uint32_t busy_grinders;
201 
202 	/* Queue base calculation */
203 	uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE];
204 	uint32_t qsize_sum;
205 
206 	struct rte_sched_pipe *pipe;
207 	struct rte_sched_queue *queue;
208 	struct rte_sched_queue_extra *queue_extra;
209 	struct rte_sched_pipe_profile *pipe_profiles;
210 	uint8_t *bmp_array;
211 	struct rte_mbuf **queue_array;
212 	uint8_t memory[0] __rte_cache_aligned;
213 } __rte_cache_aligned;
214 
215 struct rte_sched_port {
216 	/* User parameters */
217 	uint32_t n_subports_per_port;
218 	uint32_t n_pipes_per_subport;
219 	uint32_t n_pipes_per_subport_log2;
220 	uint16_t pipe_queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
221 	uint8_t pipe_tc[RTE_SCHED_QUEUES_PER_PIPE];
222 	uint8_t tc_queue[RTE_SCHED_QUEUES_PER_PIPE];
223 	uint32_t n_subport_profiles;
224 	uint32_t n_max_subport_profiles;
225 	uint64_t rate;
226 	uint32_t mtu;
227 	uint32_t frame_overhead;
228 	int socket;
229 
230 	/* Timing */
231 	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU cyles */
232 	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes */
233 	uint64_t time;                /* Current NIC TX time measured in bytes */
234 	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
235 	uint64_t cycles_per_byte;
236 
237 	/* Grinders */
238 	struct rte_mbuf **pkts_out;
239 	uint32_t n_pkts_out;
240 	uint32_t subport_id;
241 
242 	/* Large data structures */
243 	struct rte_sched_subport_profile *subport_profiles;
244 	struct rte_sched_subport *subports[0] __rte_cache_aligned;
245 } __rte_cache_aligned;
246 
247 enum rte_sched_subport_array {
248 	e_RTE_SCHED_SUBPORT_ARRAY_PIPE = 0,
249 	e_RTE_SCHED_SUBPORT_ARRAY_QUEUE,
250 	e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA,
251 	e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES,
252 	e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY,
253 	e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY,
254 	e_RTE_SCHED_SUBPORT_ARRAY_TOTAL,
255 };
256 
257 static inline uint32_t
258 rte_sched_subport_pipe_queues(struct rte_sched_subport *subport)
259 {
260 	return RTE_SCHED_QUEUES_PER_PIPE * subport->n_pipes_per_subport_enabled;
261 }
262 
263 static inline struct rte_mbuf **
264 rte_sched_subport_pipe_qbase(struct rte_sched_subport *subport, uint32_t qindex)
265 {
266 	uint32_t pindex = qindex >> 4;
267 	uint32_t qpos = qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1);
268 
269 	return (subport->queue_array + pindex *
270 		subport->qsize_sum + subport->qsize_add[qpos]);
271 }
272 
273 static inline uint16_t
274 rte_sched_subport_pipe_qsize(struct rte_sched_port *port,
275 struct rte_sched_subport *subport, uint32_t qindex)
276 {
277 	uint32_t tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
278 
279 	return subport->qsize[tc];
280 }
281 
282 static inline uint32_t
283 rte_sched_port_queues_per_port(struct rte_sched_port *port)
284 {
285 	uint32_t n_queues = 0, i;
286 
287 	for (i = 0; i < port->n_subports_per_port; i++)
288 		n_queues += rte_sched_subport_pipe_queues(port->subports[i]);
289 
290 	return n_queues;
291 }
292 
293 static inline uint16_t
294 rte_sched_port_pipe_queue(struct rte_sched_port *port, uint32_t traffic_class)
295 {
296 	uint16_t pipe_queue = port->pipe_queue[traffic_class];
297 
298 	return pipe_queue;
299 }
300 
301 static inline uint8_t
302 rte_sched_port_pipe_tc(struct rte_sched_port *port, uint32_t qindex)
303 {
304 	uint8_t pipe_tc = port->pipe_tc[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
305 
306 	return pipe_tc;
307 }
308 
309 static inline uint8_t
310 rte_sched_port_tc_queue(struct rte_sched_port *port, uint32_t qindex)
311 {
312 	uint8_t tc_queue = port->tc_queue[qindex & (RTE_SCHED_QUEUES_PER_PIPE - 1)];
313 
314 	return tc_queue;
315 }
316 
317 static int
318 pipe_profile_check(struct rte_sched_pipe_params *params,
319 	uint64_t rate, uint16_t *qsize)
320 {
321 	uint32_t i;
322 
323 	/* Pipe parameters */
324 	if (params == NULL) {
325 		RTE_LOG(ERR, SCHED,
326 			"%s: Incorrect value for parameter params\n", __func__);
327 		return -EINVAL;
328 	}
329 
330 	/* TB rate: non-zero, not greater than port rate */
331 	if (params->tb_rate == 0 ||
332 		params->tb_rate > rate) {
333 		RTE_LOG(ERR, SCHED,
334 			"%s: Incorrect value for tb rate\n", __func__);
335 		return -EINVAL;
336 	}
337 
338 	/* TB size: non-zero */
339 	if (params->tb_size == 0) {
340 		RTE_LOG(ERR, SCHED,
341 			"%s: Incorrect value for tb size\n", __func__);
342 		return -EINVAL;
343 	}
344 
345 	/* TC rate: non-zero if qsize non-zero, less than pipe rate */
346 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
347 		if ((qsize[i] == 0 && params->tc_rate[i] != 0) ||
348 			(qsize[i] != 0 && (params->tc_rate[i] == 0 ||
349 			params->tc_rate[i] > params->tb_rate))) {
350 			RTE_LOG(ERR, SCHED,
351 				"%s: Incorrect value for qsize or tc_rate\n", __func__);
352 			return -EINVAL;
353 		}
354 	}
355 
356 	if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0 ||
357 		qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
358 		RTE_LOG(ERR, SCHED,
359 			"%s: Incorrect value for be traffic class rate\n", __func__);
360 		return -EINVAL;
361 	}
362 
363 	/* TC period: non-zero */
364 	if (params->tc_period == 0) {
365 		RTE_LOG(ERR, SCHED,
366 			"%s: Incorrect value for tc period\n", __func__);
367 		return -EINVAL;
368 	}
369 
370 	/*  Best effort tc oversubscription weight: non-zero */
371 	if (params->tc_ov_weight == 0) {
372 		RTE_LOG(ERR, SCHED,
373 			"%s: Incorrect value for tc ov weight\n", __func__);
374 		return -EINVAL;
375 	}
376 
377 	/* Queue WRR weights: non-zero */
378 	for (i = 0; i < RTE_SCHED_BE_QUEUES_PER_PIPE; i++) {
379 		if (params->wrr_weights[i] == 0) {
380 			RTE_LOG(ERR, SCHED,
381 				"%s: Incorrect value for wrr weight\n", __func__);
382 			return -EINVAL;
383 		}
384 	}
385 
386 	return 0;
387 }
388 
389 static int
390 subport_profile_check(struct rte_sched_subport_profile_params *params,
391 	uint64_t rate)
392 {
393 	uint32_t i;
394 
395 	/* Check user parameters */
396 	if (params == NULL) {
397 		RTE_LOG(ERR, SCHED, "%s: "
398 		"Incorrect value for parameter params\n", __func__);
399 		return -EINVAL;
400 	}
401 
402 	if (params->tb_rate == 0 || params->tb_rate > rate) {
403 		RTE_LOG(ERR, SCHED, "%s: "
404 		"Incorrect value for tb rate\n", __func__);
405 		return -EINVAL;
406 	}
407 
408 	if (params->tb_size == 0) {
409 		RTE_LOG(ERR, SCHED, "%s: "
410 		"Incorrect value for tb size\n", __func__);
411 		return -EINVAL;
412 	}
413 
414 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
415 		uint64_t tc_rate = params->tc_rate[i];
416 
417 		if (tc_rate == 0 || (tc_rate > params->tb_rate)) {
418 			RTE_LOG(ERR, SCHED, "%s: "
419 			"Incorrect value for tc rate\n", __func__);
420 			return -EINVAL;
421 		}
422 	}
423 
424 	if (params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
425 		RTE_LOG(ERR, SCHED, "%s: "
426 		"Incorrect tc rate(best effort)\n", __func__);
427 		return -EINVAL;
428 	}
429 
430 	if (params->tc_period == 0) {
431 		RTE_LOG(ERR, SCHED, "%s: "
432 		"Incorrect value for tc period\n", __func__);
433 		return -EINVAL;
434 	}
435 
436 	return 0;
437 }
438 
439 static int
440 rte_sched_port_check_params(struct rte_sched_port_params *params)
441 {
442 	uint32_t i;
443 
444 	if (params == NULL) {
445 		RTE_LOG(ERR, SCHED,
446 			"%s: Incorrect value for parameter params\n", __func__);
447 		return -EINVAL;
448 	}
449 
450 	/* socket */
451 	if (params->socket < 0) {
452 		RTE_LOG(ERR, SCHED,
453 			"%s: Incorrect value for socket id\n", __func__);
454 		return -EINVAL;
455 	}
456 
457 	/* rate */
458 	if (params->rate == 0) {
459 		RTE_LOG(ERR, SCHED,
460 			"%s: Incorrect value for rate\n", __func__);
461 		return -EINVAL;
462 	}
463 
464 	/* mtu */
465 	if (params->mtu == 0) {
466 		RTE_LOG(ERR, SCHED,
467 			"%s: Incorrect value for mtu\n", __func__);
468 		return -EINVAL;
469 	}
470 
471 	/* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */
472 	if (params->n_subports_per_port == 0 ||
473 	    params->n_subports_per_port > 1u << 16 ||
474 	    !rte_is_power_of_2(params->n_subports_per_port)) {
475 		RTE_LOG(ERR, SCHED,
476 			"%s: Incorrect value for number of subports\n", __func__);
477 		return -EINVAL;
478 	}
479 
480 	if (params->subport_profiles == NULL ||
481 		params->n_subport_profiles == 0 ||
482 		params->n_max_subport_profiles == 0 ||
483 		params->n_subport_profiles > params->n_max_subport_profiles) {
484 		RTE_LOG(ERR, SCHED,
485 		"%s: Incorrect value for subport profiles\n", __func__);
486 		return -EINVAL;
487 	}
488 
489 	for (i = 0; i < params->n_subport_profiles; i++) {
490 		struct rte_sched_subport_profile_params *p =
491 						params->subport_profiles + i;
492 		int status;
493 
494 		status = subport_profile_check(p, params->rate);
495 		if (status != 0) {
496 			RTE_LOG(ERR, SCHED,
497 			"%s: subport profile check failed(%d)\n",
498 			__func__, status);
499 			return -EINVAL;
500 		}
501 	}
502 
503 	/* n_pipes_per_subport: non-zero, power of 2 */
504 	if (params->n_pipes_per_subport == 0 ||
505 	    !rte_is_power_of_2(params->n_pipes_per_subport)) {
506 		RTE_LOG(ERR, SCHED,
507 			"%s: Incorrect value for maximum pipes number\n", __func__);
508 		return -EINVAL;
509 	}
510 
511 	return 0;
512 }
513 
514 static uint32_t
515 rte_sched_subport_get_array_base(struct rte_sched_subport_params *params,
516 	enum rte_sched_subport_array array)
517 {
518 	uint32_t n_pipes_per_subport = params->n_pipes_per_subport_enabled;
519 	uint32_t n_subport_pipe_queues =
520 		RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport;
521 
522 	uint32_t size_pipe = n_pipes_per_subport * sizeof(struct rte_sched_pipe);
523 	uint32_t size_queue =
524 		n_subport_pipe_queues * sizeof(struct rte_sched_queue);
525 	uint32_t size_queue_extra
526 		= n_subport_pipe_queues * sizeof(struct rte_sched_queue_extra);
527 	uint32_t size_pipe_profiles = params->n_max_pipe_profiles *
528 		sizeof(struct rte_sched_pipe_profile);
529 	uint32_t size_bmp_array =
530 		rte_bitmap_get_memory_footprint(n_subport_pipe_queues);
531 	uint32_t size_per_pipe_queue_array, size_queue_array;
532 
533 	uint32_t base, i;
534 
535 	size_per_pipe_queue_array = 0;
536 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
537 		if (i < RTE_SCHED_TRAFFIC_CLASS_BE)
538 			size_per_pipe_queue_array +=
539 				params->qsize[i] * sizeof(struct rte_mbuf *);
540 		else
541 			size_per_pipe_queue_array += RTE_SCHED_MAX_QUEUES_PER_TC *
542 				params->qsize[i] * sizeof(struct rte_mbuf *);
543 	}
544 	size_queue_array = n_pipes_per_subport * size_per_pipe_queue_array;
545 
546 	base = 0;
547 
548 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE)
549 		return base;
550 	base += RTE_CACHE_LINE_ROUNDUP(size_pipe);
551 
552 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE)
553 		return base;
554 	base += RTE_CACHE_LINE_ROUNDUP(size_queue);
555 
556 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA)
557 		return base;
558 	base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra);
559 
560 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES)
561 		return base;
562 	base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles);
563 
564 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY)
565 		return base;
566 	base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array);
567 
568 	if (array == e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY)
569 		return base;
570 	base += RTE_CACHE_LINE_ROUNDUP(size_queue_array);
571 
572 	return base;
573 }
574 
575 static void
576 rte_sched_subport_config_qsize(struct rte_sched_subport *subport)
577 {
578 	uint32_t i;
579 
580 	subport->qsize_add[0] = 0;
581 
582 	/* Strict prority traffic class */
583 	for (i = 1; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
584 		subport->qsize_add[i] = subport->qsize_add[i-1] + subport->qsize[i-1];
585 
586 	/* Best-effort traffic class */
587 	subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] =
588 		subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE] +
589 		subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
590 	subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] =
591 		subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 1] +
592 		subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
593 	subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] =
594 		subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 2] +
595 		subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
596 
597 	subport->qsize_sum = subport->qsize_add[RTE_SCHED_TRAFFIC_CLASS_BE + 3] +
598 		subport->qsize[RTE_SCHED_TRAFFIC_CLASS_BE];
599 }
600 
601 static void
602 rte_sched_port_log_pipe_profile(struct rte_sched_subport *subport, uint32_t i)
603 {
604 	struct rte_sched_pipe_profile *p = subport->pipe_profiles + i;
605 
606 	RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n"
607 		"	Token bucket: period = %"PRIu64", credits per period = %"PRIu64", size = %"PRIu64"\n"
608 		"	Traffic classes: period = %"PRIu64",\n"
609 		"	credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
610 		", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
611 		", %"PRIu64", %"PRIu64", %"PRIu64"]\n"
612 		"	Best-effort traffic class oversubscription: weight = %hhu\n"
613 		"	WRR cost: [%hhu, %hhu, %hhu, %hhu]\n",
614 		i,
615 
616 		/* Token bucket */
617 		p->tb_period,
618 		p->tb_credits_per_period,
619 		p->tb_size,
620 
621 		/* Traffic classes */
622 		p->tc_period,
623 		p->tc_credits_per_period[0],
624 		p->tc_credits_per_period[1],
625 		p->tc_credits_per_period[2],
626 		p->tc_credits_per_period[3],
627 		p->tc_credits_per_period[4],
628 		p->tc_credits_per_period[5],
629 		p->tc_credits_per_period[6],
630 		p->tc_credits_per_period[7],
631 		p->tc_credits_per_period[8],
632 		p->tc_credits_per_period[9],
633 		p->tc_credits_per_period[10],
634 		p->tc_credits_per_period[11],
635 		p->tc_credits_per_period[12],
636 
637 		/* Best-effort traffic class oversubscription */
638 		p->tc_ov_weight,
639 
640 		/* WRR */
641 		p->wrr_cost[0], p->wrr_cost[1], p->wrr_cost[2], p->wrr_cost[3]);
642 }
643 
644 static void
645 rte_sched_port_log_subport_profile(struct rte_sched_port *port, uint32_t i)
646 {
647 	struct rte_sched_subport_profile *p = port->subport_profiles + i;
648 
649 	RTE_LOG(DEBUG, SCHED, "Low level config for subport profile %u:\n"
650 	"Token bucket: period = %"PRIu64", credits per period = %"PRIu64","
651 	"size = %"PRIu64"\n"
652 	"Traffic classes: period = %"PRIu64",\n"
653 	"credits per period = [%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
654 	" %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64
655 	" %"PRIu64", %"PRIu64", %"PRIu64"]\n",
656 	i,
657 
658 	/* Token bucket */
659 	p->tb_period,
660 	p->tb_credits_per_period,
661 	p->tb_size,
662 
663 	/* Traffic classes */
664 	p->tc_period,
665 	p->tc_credits_per_period[0],
666 	p->tc_credits_per_period[1],
667 	p->tc_credits_per_period[2],
668 	p->tc_credits_per_period[3],
669 	p->tc_credits_per_period[4],
670 	p->tc_credits_per_period[5],
671 	p->tc_credits_per_period[6],
672 	p->tc_credits_per_period[7],
673 	p->tc_credits_per_period[8],
674 	p->tc_credits_per_period[9],
675 	p->tc_credits_per_period[10],
676 	p->tc_credits_per_period[11],
677 	p->tc_credits_per_period[12]);
678 }
679 
680 static inline uint64_t
681 rte_sched_time_ms_to_bytes(uint64_t time_ms, uint64_t rate)
682 {
683 	uint64_t time = time_ms;
684 
685 	time = (time * rate) / 1000;
686 
687 	return time;
688 }
689 
690 static void
691 rte_sched_pipe_profile_convert(struct rte_sched_subport *subport,
692 	struct rte_sched_pipe_params *src,
693 	struct rte_sched_pipe_profile *dst,
694 	uint64_t rate)
695 {
696 	uint32_t wrr_cost[RTE_SCHED_BE_QUEUES_PER_PIPE];
697 	uint32_t lcd1, lcd2, lcd;
698 	uint32_t i;
699 
700 	/* Token Bucket */
701 	if (src->tb_rate == rate) {
702 		dst->tb_credits_per_period = 1;
703 		dst->tb_period = 1;
704 	} else {
705 		double tb_rate = (double) src->tb_rate
706 				/ (double) rate;
707 		double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
708 
709 		rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
710 			&dst->tb_period);
711 	}
712 
713 	dst->tb_size = src->tb_size;
714 
715 	/* Traffic Classes */
716 	dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period,
717 						rate);
718 
719 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
720 		if (subport->qsize[i])
721 			dst->tc_credits_per_period[i]
722 				= rte_sched_time_ms_to_bytes(src->tc_period,
723 					src->tc_rate[i]);
724 
725 	dst->tc_ov_weight = src->tc_ov_weight;
726 
727 	/* WRR queues */
728 	wrr_cost[0] = src->wrr_weights[0];
729 	wrr_cost[1] = src->wrr_weights[1];
730 	wrr_cost[2] = src->wrr_weights[2];
731 	wrr_cost[3] = src->wrr_weights[3];
732 
733 	lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]);
734 	lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]);
735 	lcd = rte_get_lcd(lcd1, lcd2);
736 
737 	wrr_cost[0] = lcd / wrr_cost[0];
738 	wrr_cost[1] = lcd / wrr_cost[1];
739 	wrr_cost[2] = lcd / wrr_cost[2];
740 	wrr_cost[3] = lcd / wrr_cost[3];
741 
742 	dst->wrr_cost[0] = (uint8_t) wrr_cost[0];
743 	dst->wrr_cost[1] = (uint8_t) wrr_cost[1];
744 	dst->wrr_cost[2] = (uint8_t) wrr_cost[2];
745 	dst->wrr_cost[3] = (uint8_t) wrr_cost[3];
746 }
747 
748 static void
749 rte_sched_subport_profile_convert(struct rte_sched_subport_profile_params *src,
750 	struct rte_sched_subport_profile *dst,
751 	uint64_t rate)
752 {
753 	uint32_t i;
754 
755 	/* Token Bucket */
756 	if (src->tb_rate == rate) {
757 		dst->tb_credits_per_period = 1;
758 		dst->tb_period = 1;
759 	} else {
760 		double tb_rate = (double) src->tb_rate
761 				/ (double) rate;
762 		double d = RTE_SCHED_TB_RATE_CONFIG_ERR;
763 
764 		rte_approx_64(tb_rate, d, &dst->tb_credits_per_period,
765 			&dst->tb_period);
766 	}
767 
768 	dst->tb_size = src->tb_size;
769 
770 	/* Traffic Classes */
771 	dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, rate);
772 
773 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
774 		dst->tc_credits_per_period[i]
775 			= rte_sched_time_ms_to_bytes(src->tc_period,
776 				src->tc_rate[i]);
777 }
778 
779 static void
780 rte_sched_subport_config_pipe_profile_table(struct rte_sched_subport *subport,
781 	struct rte_sched_subport_params *params, uint64_t rate)
782 {
783 	uint32_t i;
784 
785 	for (i = 0; i < subport->n_pipe_profiles; i++) {
786 		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
787 		struct rte_sched_pipe_profile *dst = subport->pipe_profiles + i;
788 
789 		rte_sched_pipe_profile_convert(subport, src, dst, rate);
790 		rte_sched_port_log_pipe_profile(subport, i);
791 	}
792 
793 	subport->pipe_tc_be_rate_max = 0;
794 	for (i = 0; i < subport->n_pipe_profiles; i++) {
795 		struct rte_sched_pipe_params *src = params->pipe_profiles + i;
796 		uint64_t pipe_tc_be_rate = src->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
797 
798 		if (subport->pipe_tc_be_rate_max < pipe_tc_be_rate)
799 			subport->pipe_tc_be_rate_max = pipe_tc_be_rate;
800 	}
801 }
802 
803 static void
804 rte_sched_port_config_subport_profile_table(struct rte_sched_port *port,
805 	struct rte_sched_port_params *params,
806 	uint64_t rate)
807 {
808 	uint32_t i;
809 
810 	for (i = 0; i < port->n_subport_profiles; i++) {
811 		struct rte_sched_subport_profile_params *src
812 				= params->subport_profiles + i;
813 		struct rte_sched_subport_profile *dst
814 				= port->subport_profiles + i;
815 
816 		rte_sched_subport_profile_convert(src, dst, rate);
817 		rte_sched_port_log_subport_profile(port, i);
818 	}
819 }
820 
821 static int
822 rte_sched_subport_check_params(struct rte_sched_subport_params *params,
823 	uint32_t n_max_pipes_per_subport,
824 	uint64_t rate)
825 {
826 	uint32_t i;
827 
828 	/* Check user parameters */
829 	if (params == NULL) {
830 		RTE_LOG(ERR, SCHED,
831 			"%s: Incorrect value for parameter params\n", __func__);
832 		return -EINVAL;
833 	}
834 
835 	/* qsize: if non-zero, power of 2,
836 	 * no bigger than 32K (due to 16-bit read/write pointers)
837 	 */
838 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
839 		uint16_t qsize = params->qsize[i];
840 
841 		if (qsize != 0 && !rte_is_power_of_2(qsize)) {
842 			RTE_LOG(ERR, SCHED,
843 				"%s: Incorrect value for qsize\n", __func__);
844 			return -EINVAL;
845 		}
846 	}
847 
848 	if (params->qsize[RTE_SCHED_TRAFFIC_CLASS_BE] == 0) {
849 		RTE_LOG(ERR, SCHED, "%s: Incorrect qsize\n", __func__);
850 		return -EINVAL;
851 	}
852 
853 	/* n_pipes_per_subport: non-zero, power of 2 */
854 	if (params->n_pipes_per_subport_enabled == 0 ||
855 		params->n_pipes_per_subport_enabled > n_max_pipes_per_subport ||
856 	    !rte_is_power_of_2(params->n_pipes_per_subport_enabled)) {
857 		RTE_LOG(ERR, SCHED,
858 			"%s: Incorrect value for pipes number\n", __func__);
859 		return -EINVAL;
860 	}
861 
862 	/* pipe_profiles and n_pipe_profiles */
863 	if (params->pipe_profiles == NULL ||
864 	    params->n_pipe_profiles == 0 ||
865 		params->n_max_pipe_profiles == 0 ||
866 		params->n_pipe_profiles > params->n_max_pipe_profiles) {
867 		RTE_LOG(ERR, SCHED,
868 			"%s: Incorrect value for pipe profiles\n", __func__);
869 		return -EINVAL;
870 	}
871 
872 	for (i = 0; i < params->n_pipe_profiles; i++) {
873 		struct rte_sched_pipe_params *p = params->pipe_profiles + i;
874 		int status;
875 
876 		status = pipe_profile_check(p, rate, &params->qsize[0]);
877 		if (status != 0) {
878 			RTE_LOG(ERR, SCHED,
879 				"%s: Pipe profile check failed(%d)\n", __func__, status);
880 			return -EINVAL;
881 		}
882 	}
883 
884 	return 0;
885 }
886 
887 uint32_t
888 rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
889 	struct rte_sched_subport_params **subport_params)
890 {
891 	uint32_t size0 = 0, size1 = 0, i;
892 	int status;
893 
894 	status = rte_sched_port_check_params(port_params);
895 	if (status != 0) {
896 		RTE_LOG(ERR, SCHED,
897 			"%s: Port scheduler port params check failed (%d)\n",
898 			__func__, status);
899 
900 		return 0;
901 	}
902 
903 	for (i = 0; i < port_params->n_subports_per_port; i++) {
904 		struct rte_sched_subport_params *sp = subport_params[i];
905 
906 		status = rte_sched_subport_check_params(sp,
907 				port_params->n_pipes_per_subport,
908 				port_params->rate);
909 		if (status != 0) {
910 			RTE_LOG(ERR, SCHED,
911 				"%s: Port scheduler subport params check failed (%d)\n",
912 				__func__, status);
913 
914 			return 0;
915 		}
916 	}
917 
918 	size0 = sizeof(struct rte_sched_port);
919 
920 	for (i = 0; i < port_params->n_subports_per_port; i++) {
921 		struct rte_sched_subport_params *sp = subport_params[i];
922 
923 		size1 += rte_sched_subport_get_array_base(sp,
924 					e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
925 	}
926 
927 	return size0 + size1;
928 }
929 
930 struct rte_sched_port *
931 rte_sched_port_config(struct rte_sched_port_params *params)
932 {
933 	struct rte_sched_port *port = NULL;
934 	uint32_t size0, size1, size2;
935 	uint32_t cycles_per_byte;
936 	uint32_t i, j;
937 	int status;
938 
939 	status = rte_sched_port_check_params(params);
940 	if (status != 0) {
941 		RTE_LOG(ERR, SCHED,
942 			"%s: Port scheduler params check failed (%d)\n",
943 			__func__, status);
944 		return NULL;
945 	}
946 
947 	size0 = sizeof(struct rte_sched_port);
948 	size1 = params->n_subports_per_port * sizeof(struct rte_sched_subport *);
949 	size2 = params->n_max_subport_profiles *
950 		sizeof(struct rte_sched_subport_profile);
951 
952 	/* Allocate memory to store the data structures */
953 	port = rte_zmalloc_socket("qos_params", size0 + size1,
954 				 RTE_CACHE_LINE_SIZE, params->socket);
955 	if (port == NULL) {
956 		RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
957 
958 		return NULL;
959 	}
960 
961 	/* Allocate memory to store the subport profile */
962 	port->subport_profiles  = rte_zmalloc_socket("subport_profile", size2,
963 					RTE_CACHE_LINE_SIZE, params->socket);
964 	if (port->subport_profiles == NULL) {
965 		RTE_LOG(ERR, SCHED, "%s: Memory allocation fails\n", __func__);
966 		rte_free(port);
967 		return NULL;
968 	}
969 
970 	/* User parameters */
971 	port->n_subports_per_port = params->n_subports_per_port;
972 	port->n_subport_profiles = params->n_subport_profiles;
973 	port->n_max_subport_profiles = params->n_max_subport_profiles;
974 	port->n_pipes_per_subport = params->n_pipes_per_subport;
975 	port->n_pipes_per_subport_log2 =
976 			__builtin_ctz(params->n_pipes_per_subport);
977 	port->socket = params->socket;
978 
979 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
980 		port->pipe_queue[i] = i;
981 
982 	for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
983 		port->pipe_tc[i] = j;
984 
985 		if (j < RTE_SCHED_TRAFFIC_CLASS_BE)
986 			j++;
987 	}
988 
989 	for (i = 0, j = 0; i < RTE_SCHED_QUEUES_PER_PIPE; i++) {
990 		port->tc_queue[i] = j;
991 
992 		if (i >= RTE_SCHED_TRAFFIC_CLASS_BE)
993 			j++;
994 	}
995 	port->rate = params->rate;
996 	port->mtu = params->mtu + params->frame_overhead;
997 	port->frame_overhead = params->frame_overhead;
998 
999 	/* Timing */
1000 	port->time_cpu_cycles = rte_get_tsc_cycles();
1001 	port->time_cpu_bytes = 0;
1002 	port->time = 0;
1003 
1004 	/* Subport profile table */
1005 	rte_sched_port_config_subport_profile_table(port, params, port->rate);
1006 
1007 	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
1008 		/ params->rate;
1009 	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
1010 	port->cycles_per_byte = cycles_per_byte;
1011 
1012 	/* Grinders */
1013 	port->pkts_out = NULL;
1014 	port->n_pkts_out = 0;
1015 	port->subport_id = 0;
1016 
1017 	return port;
1018 }
1019 
1020 static inline void
1021 rte_sched_subport_free(struct rte_sched_port *port,
1022 	struct rte_sched_subport *subport)
1023 {
1024 	uint32_t n_subport_pipe_queues;
1025 	uint32_t qindex;
1026 
1027 	if (subport == NULL)
1028 		return;
1029 
1030 	n_subport_pipe_queues = rte_sched_subport_pipe_queues(subport);
1031 
1032 	/* Free enqueued mbufs */
1033 	for (qindex = 0; qindex < n_subport_pipe_queues; qindex++) {
1034 		struct rte_mbuf **mbufs =
1035 			rte_sched_subport_pipe_qbase(subport, qindex);
1036 		uint16_t qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1037 		if (qsize != 0) {
1038 			struct rte_sched_queue *queue = subport->queue + qindex;
1039 			uint16_t qr = queue->qr & (qsize - 1);
1040 			uint16_t qw = queue->qw & (qsize - 1);
1041 
1042 			for (; qr != qw; qr = (qr + 1) & (qsize - 1))
1043 				rte_pktmbuf_free(mbufs[qr]);
1044 		}
1045 	}
1046 
1047 	rte_free(subport);
1048 }
1049 
1050 void
1051 rte_sched_port_free(struct rte_sched_port *port)
1052 {
1053 	uint32_t i;
1054 
1055 	/* Check user parameters */
1056 	if (port == NULL)
1057 		return;
1058 
1059 	for (i = 0; i < port->n_subports_per_port; i++)
1060 		rte_sched_subport_free(port, port->subports[i]);
1061 
1062 	rte_free(port->subport_profiles);
1063 	rte_free(port);
1064 }
1065 
1066 static void
1067 rte_sched_free_memory(struct rte_sched_port *port, uint32_t n_subports)
1068 {
1069 	uint32_t i;
1070 
1071 	for (i = 0; i < n_subports; i++) {
1072 		struct rte_sched_subport *subport = port->subports[i];
1073 
1074 		rte_sched_subport_free(port, subport);
1075 	}
1076 
1077 	rte_free(port->subport_profiles);
1078 	rte_free(port);
1079 }
1080 
1081 int
1082 rte_sched_subport_config(struct rte_sched_port *port,
1083 	uint32_t subport_id,
1084 	struct rte_sched_subport_params *params,
1085 	uint32_t subport_profile_id)
1086 {
1087 	struct rte_sched_subport *s = NULL;
1088 	uint32_t n_subports = subport_id;
1089 	struct rte_sched_subport_profile *profile;
1090 	uint32_t n_subport_pipe_queues, i;
1091 	uint32_t size0, size1, bmp_mem_size;
1092 	int status;
1093 	int ret;
1094 
1095 	/* Check user parameters */
1096 	if (port == NULL) {
1097 		RTE_LOG(ERR, SCHED,
1098 			"%s: Incorrect value for parameter port\n", __func__);
1099 		return 0;
1100 	}
1101 
1102 	if (subport_id >= port->n_subports_per_port) {
1103 		RTE_LOG(ERR, SCHED,
1104 			"%s: Incorrect value for subport id\n", __func__);
1105 		ret = -EINVAL;
1106 		goto out;
1107 	}
1108 
1109 	if (subport_profile_id >= port->n_max_subport_profiles) {
1110 		RTE_LOG(ERR, SCHED, "%s: "
1111 			"Number of subport profile exceeds the max limit\n",
1112 			__func__);
1113 		ret = -EINVAL;
1114 		goto out;
1115 	}
1116 
1117 	/** Memory is allocated only on first invocation of the api for a
1118 	 * given subport. Subsequent invocation on same subport will just
1119 	 * update subport bandwidth parameter.
1120 	 **/
1121 	if (port->subports[subport_id] == NULL) {
1122 
1123 		status = rte_sched_subport_check_params(params,
1124 			port->n_pipes_per_subport,
1125 			port->rate);
1126 		if (status != 0) {
1127 			RTE_LOG(NOTICE, SCHED,
1128 				"%s: Port scheduler params check failed (%d)\n",
1129 				__func__, status);
1130 			ret = -EINVAL;
1131 			goto out;
1132 		}
1133 
1134 		/* Determine the amount of memory to allocate */
1135 		size0 = sizeof(struct rte_sched_subport);
1136 		size1 = rte_sched_subport_get_array_base(params,
1137 					e_RTE_SCHED_SUBPORT_ARRAY_TOTAL);
1138 
1139 		/* Allocate memory to store the data structures */
1140 		s = rte_zmalloc_socket("subport_params", size0 + size1,
1141 			RTE_CACHE_LINE_SIZE, port->socket);
1142 		if (s == NULL) {
1143 			RTE_LOG(ERR, SCHED,
1144 				"%s: Memory allocation fails\n", __func__);
1145 			ret = -ENOMEM;
1146 			goto out;
1147 		}
1148 
1149 		n_subports++;
1150 
1151 		subport_profile_id = 0;
1152 
1153 		/* Port */
1154 		port->subports[subport_id] = s;
1155 
1156 		s->tb_time = port->time;
1157 
1158 		/* compile time checks */
1159 		RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0);
1160 		RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS &
1161 			(RTE_SCHED_PORT_N_GRINDERS - 1));
1162 
1163 		/* User parameters */
1164 		s->n_pipes_per_subport_enabled =
1165 				params->n_pipes_per_subport_enabled;
1166 		memcpy(s->qsize, params->qsize, sizeof(params->qsize));
1167 		s->n_pipe_profiles = params->n_pipe_profiles;
1168 		s->n_max_pipe_profiles = params->n_max_pipe_profiles;
1169 
1170 #ifdef RTE_SCHED_RED
1171 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
1172 			uint32_t j;
1173 
1174 			for (j = 0; j < RTE_COLORS; j++) {
1175 			/* if min/max are both zero, then RED is disabled */
1176 				if ((params->red_params[i][j].min_th |
1177 				     params->red_params[i][j].max_th) == 0) {
1178 					continue;
1179 				}
1180 
1181 				if (rte_red_config_init(&s->red_config[i][j],
1182 				    params->red_params[i][j].wq_log2,
1183 				    params->red_params[i][j].min_th,
1184 				    params->red_params[i][j].max_th,
1185 				    params->red_params[i][j].maxp_inv) != 0) {
1186 					RTE_LOG(NOTICE, SCHED,
1187 					"%s: RED configuration init fails\n",
1188 					__func__);
1189 					ret = -EINVAL;
1190 					goto out;
1191 				}
1192 			}
1193 		}
1194 #endif
1195 
1196 		/* Scheduling loop detection */
1197 		s->pipe_loop = RTE_SCHED_PIPE_INVALID;
1198 		s->pipe_exhaustion = 0;
1199 
1200 		/* Grinders */
1201 		s->busy_grinders = 0;
1202 
1203 		/* Queue base calculation */
1204 		rte_sched_subport_config_qsize(s);
1205 
1206 		/* Large data structures */
1207 		s->pipe = (struct rte_sched_pipe *)
1208 			(s->memory + rte_sched_subport_get_array_base(params,
1209 			e_RTE_SCHED_SUBPORT_ARRAY_PIPE));
1210 		s->queue = (struct rte_sched_queue *)
1211 			(s->memory + rte_sched_subport_get_array_base(params,
1212 			e_RTE_SCHED_SUBPORT_ARRAY_QUEUE));
1213 		s->queue_extra = (struct rte_sched_queue_extra *)
1214 			(s->memory + rte_sched_subport_get_array_base(params,
1215 			e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_EXTRA));
1216 		s->pipe_profiles = (struct rte_sched_pipe_profile *)
1217 			(s->memory + rte_sched_subport_get_array_base(params,
1218 			e_RTE_SCHED_SUBPORT_ARRAY_PIPE_PROFILES));
1219 		s->bmp_array =  s->memory + rte_sched_subport_get_array_base(
1220 				params, e_RTE_SCHED_SUBPORT_ARRAY_BMP_ARRAY);
1221 		s->queue_array = (struct rte_mbuf **)
1222 			(s->memory + rte_sched_subport_get_array_base(params,
1223 			e_RTE_SCHED_SUBPORT_ARRAY_QUEUE_ARRAY));
1224 
1225 		/* Pipe profile table */
1226 		rte_sched_subport_config_pipe_profile_table(s, params,
1227 							    port->rate);
1228 
1229 		/* Bitmap */
1230 		n_subport_pipe_queues = rte_sched_subport_pipe_queues(s);
1231 		bmp_mem_size = rte_bitmap_get_memory_footprint(
1232 						n_subport_pipe_queues);
1233 		s->bmp = rte_bitmap_init(n_subport_pipe_queues, s->bmp_array,
1234 					bmp_mem_size);
1235 		if (s->bmp == NULL) {
1236 			RTE_LOG(ERR, SCHED,
1237 				"%s: Subport bitmap init error\n", __func__);
1238 			ret = -EINVAL;
1239 			goto out;
1240 		}
1241 
1242 		for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++)
1243 			s->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID;
1244 
1245 #ifdef RTE_SCHED_SUBPORT_TC_OV
1246 		/* TC oversubscription */
1247 		s->tc_ov_wm_min = port->mtu;
1248 		s->tc_ov_period_id = 0;
1249 		s->tc_ov = 0;
1250 		s->tc_ov_n = 0;
1251 		s->tc_ov_rate = 0;
1252 #endif
1253 	}
1254 
1255 	{
1256 	/* update subport parameters from subport profile table*/
1257 		profile = port->subport_profiles + subport_profile_id;
1258 
1259 		s = port->subports[subport_id];
1260 
1261 		s->tb_credits = profile->tb_size / 2;
1262 
1263 		s->tc_time = port->time + profile->tc_period;
1264 
1265 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1266 			if (s->qsize[i])
1267 				s->tc_credits[i] =
1268 					profile->tc_credits_per_period[i];
1269 			else
1270 				profile->tc_credits_per_period[i] = 0;
1271 
1272 #ifdef RTE_SCHED_SUBPORT_TC_OV
1273 		s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(profile->tc_period,
1274 							s->pipe_tc_be_rate_max);
1275 		s->tc_ov_wm = s->tc_ov_wm_max;
1276 #endif
1277 		s->profile = subport_profile_id;
1278 
1279 	}
1280 
1281 	rte_sched_port_log_subport_profile(port, subport_profile_id);
1282 
1283 	return 0;
1284 
1285 out:
1286 	rte_sched_free_memory(port, n_subports);
1287 
1288 	return ret;
1289 }
1290 
1291 int
1292 rte_sched_pipe_config(struct rte_sched_port *port,
1293 	uint32_t subport_id,
1294 	uint32_t pipe_id,
1295 	int32_t pipe_profile)
1296 {
1297 	struct rte_sched_subport *s;
1298 	struct rte_sched_subport_profile *sp;
1299 	struct rte_sched_pipe *p;
1300 	struct rte_sched_pipe_profile *params;
1301 	uint32_t n_subports = subport_id + 1;
1302 	uint32_t deactivate, profile, i;
1303 	int ret;
1304 
1305 	/* Check user parameters */
1306 	profile = (uint32_t) pipe_profile;
1307 	deactivate = (pipe_profile < 0);
1308 
1309 	if (port == NULL) {
1310 		RTE_LOG(ERR, SCHED,
1311 			"%s: Incorrect value for parameter port\n", __func__);
1312 		return -EINVAL;
1313 	}
1314 
1315 	if (subport_id >= port->n_subports_per_port) {
1316 		RTE_LOG(ERR, SCHED,
1317 			"%s: Incorrect value for parameter subport id\n", __func__);
1318 		ret = -EINVAL;
1319 		goto out;
1320 	}
1321 
1322 	s = port->subports[subport_id];
1323 	if (pipe_id >= s->n_pipes_per_subport_enabled) {
1324 		RTE_LOG(ERR, SCHED,
1325 			"%s: Incorrect value for parameter pipe id\n", __func__);
1326 		ret = -EINVAL;
1327 		goto out;
1328 	}
1329 
1330 	if (!deactivate && profile >= s->n_pipe_profiles) {
1331 		RTE_LOG(ERR, SCHED,
1332 			"%s: Incorrect value for parameter pipe profile\n", __func__);
1333 		ret = -EINVAL;
1334 		goto out;
1335 	}
1336 
1337 	sp = port->subport_profiles + s->profile;
1338 	/* Handle the case when pipe already has a valid configuration */
1339 	p = s->pipe + pipe_id;
1340 	if (p->tb_time) {
1341 		params = s->pipe_profiles + p->profile;
1342 
1343 		double subport_tc_be_rate =
1344 		(double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1345 			/ (double) sp->tc_period;
1346 		double pipe_tc_be_rate =
1347 			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1348 			/ (double) params->tc_period;
1349 		uint32_t tc_be_ov = s->tc_ov;
1350 
1351 		/* Unplug pipe from its subport */
1352 		s->tc_ov_n -= params->tc_ov_weight;
1353 		s->tc_ov_rate -= pipe_tc_be_rate;
1354 		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1355 
1356 		if (s->tc_ov != tc_be_ov) {
1357 			RTE_LOG(DEBUG, SCHED,
1358 				"Subport %u Best-effort TC oversubscription is OFF (%.4lf >= %.4lf)\n",
1359 				subport_id, subport_tc_be_rate, s->tc_ov_rate);
1360 		}
1361 
1362 		/* Reset the pipe */
1363 		memset(p, 0, sizeof(struct rte_sched_pipe));
1364 	}
1365 
1366 	if (deactivate)
1367 		return 0;
1368 
1369 	/* Apply the new pipe configuration */
1370 	p->profile = profile;
1371 	params = s->pipe_profiles + p->profile;
1372 
1373 	/* Token Bucket (TB) */
1374 	p->tb_time = port->time;
1375 	p->tb_credits = params->tb_size / 2;
1376 
1377 	/* Traffic Classes (TCs) */
1378 	p->tc_time = port->time + params->tc_period;
1379 
1380 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
1381 		if (s->qsize[i])
1382 			p->tc_credits[i] = params->tc_credits_per_period[i];
1383 
1384 	{
1385 		/* Subport best effort tc oversubscription */
1386 		double subport_tc_be_rate =
1387 		(double)sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1388 			/ (double) sp->tc_period;
1389 		double pipe_tc_be_rate =
1390 			(double) params->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE]
1391 			/ (double) params->tc_period;
1392 		uint32_t tc_be_ov = s->tc_ov;
1393 
1394 		s->tc_ov_n += params->tc_ov_weight;
1395 		s->tc_ov_rate += pipe_tc_be_rate;
1396 		s->tc_ov = s->tc_ov_rate > subport_tc_be_rate;
1397 
1398 		if (s->tc_ov != tc_be_ov) {
1399 			RTE_LOG(DEBUG, SCHED,
1400 				"Subport %u Best effort TC oversubscription is ON (%.4lf < %.4lf)\n",
1401 				subport_id, subport_tc_be_rate, s->tc_ov_rate);
1402 		}
1403 		p->tc_ov_period_id = s->tc_ov_period_id;
1404 		p->tc_ov_credits = s->tc_ov_wm;
1405 	}
1406 
1407 	return 0;
1408 
1409 out:
1410 	rte_sched_free_memory(port, n_subports);
1411 
1412 	return ret;
1413 }
1414 
1415 int
1416 rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
1417 	uint32_t subport_id,
1418 	struct rte_sched_pipe_params *params,
1419 	uint32_t *pipe_profile_id)
1420 {
1421 	struct rte_sched_subport *s;
1422 	struct rte_sched_pipe_profile *pp;
1423 	uint32_t i;
1424 	int status;
1425 
1426 	/* Port */
1427 	if (port == NULL) {
1428 		RTE_LOG(ERR, SCHED,
1429 			"%s: Incorrect value for parameter port\n", __func__);
1430 		return -EINVAL;
1431 	}
1432 
1433 	/* Subport id not exceeds the max limit */
1434 	if (subport_id > port->n_subports_per_port) {
1435 		RTE_LOG(ERR, SCHED,
1436 			"%s: Incorrect value for subport id\n", __func__);
1437 		return -EINVAL;
1438 	}
1439 
1440 	s = port->subports[subport_id];
1441 
1442 	/* Pipe profiles exceeds the max limit */
1443 	if (s->n_pipe_profiles >= s->n_max_pipe_profiles) {
1444 		RTE_LOG(ERR, SCHED,
1445 			"%s: Number of pipe profiles exceeds the max limit\n", __func__);
1446 		return -EINVAL;
1447 	}
1448 
1449 	/* Pipe params */
1450 	status = pipe_profile_check(params, port->rate, &s->qsize[0]);
1451 	if (status != 0) {
1452 		RTE_LOG(ERR, SCHED,
1453 			"%s: Pipe profile check failed(%d)\n", __func__, status);
1454 		return -EINVAL;
1455 	}
1456 
1457 	pp = &s->pipe_profiles[s->n_pipe_profiles];
1458 	rte_sched_pipe_profile_convert(s, params, pp, port->rate);
1459 
1460 	/* Pipe profile should not exists */
1461 	for (i = 0; i < s->n_pipe_profiles; i++)
1462 		if (memcmp(s->pipe_profiles + i, pp, sizeof(*pp)) == 0) {
1463 			RTE_LOG(ERR, SCHED,
1464 				"%s: Pipe profile exists\n", __func__);
1465 			return -EINVAL;
1466 		}
1467 
1468 	/* Pipe profile commit */
1469 	*pipe_profile_id = s->n_pipe_profiles;
1470 	s->n_pipe_profiles++;
1471 
1472 	if (s->pipe_tc_be_rate_max < params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE])
1473 		s->pipe_tc_be_rate_max = params->tc_rate[RTE_SCHED_TRAFFIC_CLASS_BE];
1474 
1475 	rte_sched_port_log_pipe_profile(s, *pipe_profile_id);
1476 
1477 	return 0;
1478 }
1479 
1480 int
1481 rte_sched_port_subport_profile_add(struct rte_sched_port *port,
1482 	struct rte_sched_subport_profile_params *params,
1483 	uint32_t *subport_profile_id)
1484 {
1485 	int status;
1486 	uint32_t i;
1487 	struct rte_sched_subport_profile *dst;
1488 
1489 	/* Port */
1490 	if (port == NULL) {
1491 		RTE_LOG(ERR, SCHED, "%s: "
1492 		"Incorrect value for parameter port\n", __func__);
1493 		return -EINVAL;
1494 	}
1495 
1496 	if (params == NULL) {
1497 		RTE_LOG(ERR, SCHED, "%s: "
1498 		"Incorrect value for parameter profile\n", __func__);
1499 		return -EINVAL;
1500 	}
1501 
1502 	if (subport_profile_id == NULL) {
1503 		RTE_LOG(ERR, SCHED, "%s: "
1504 		"Incorrect value for parameter subport_profile_id\n",
1505 		__func__);
1506 		return -EINVAL;
1507 	}
1508 
1509 	dst = port->subport_profiles + port->n_subport_profiles;
1510 
1511 	/* Subport profiles exceeds the max limit */
1512 	if (port->n_subport_profiles >= port->n_max_subport_profiles) {
1513 		RTE_LOG(ERR, SCHED, "%s: "
1514 		"Number of subport profiles exceeds the max limit\n",
1515 		 __func__);
1516 		return -EINVAL;
1517 	}
1518 
1519 	status = subport_profile_check(params, port->rate);
1520 	if (status != 0) {
1521 		RTE_LOG(ERR, SCHED,
1522 		"%s: subport profile check failed(%d)\n", __func__, status);
1523 		return -EINVAL;
1524 	}
1525 
1526 	rte_sched_subport_profile_convert(params, dst, port->rate);
1527 
1528 	/* Subport profile should not exists */
1529 	for (i = 0; i < port->n_subport_profiles; i++)
1530 		if (memcmp(port->subport_profiles + i,
1531 		    dst, sizeof(*dst)) == 0) {
1532 			RTE_LOG(ERR, SCHED,
1533 			"%s: subport profile exists\n", __func__);
1534 			return -EINVAL;
1535 		}
1536 
1537 	/* Subport profile commit */
1538 	*subport_profile_id = port->n_subport_profiles;
1539 	port->n_subport_profiles++;
1540 
1541 	rte_sched_port_log_subport_profile(port, *subport_profile_id);
1542 
1543 	return 0;
1544 }
1545 
1546 static inline uint32_t
1547 rte_sched_port_qindex(struct rte_sched_port *port,
1548 	uint32_t subport,
1549 	uint32_t pipe,
1550 	uint32_t traffic_class,
1551 	uint32_t queue)
1552 {
1553 	return ((subport & (port->n_subports_per_port - 1)) <<
1554 		(port->n_pipes_per_subport_log2 + 4)) |
1555 		((pipe &
1556 		(port->subports[subport]->n_pipes_per_subport_enabled - 1)) << 4) |
1557 		((rte_sched_port_pipe_queue(port, traffic_class) + queue) &
1558 		(RTE_SCHED_QUEUES_PER_PIPE - 1));
1559 }
1560 
1561 void
1562 rte_sched_port_pkt_write(struct rte_sched_port *port,
1563 			 struct rte_mbuf *pkt,
1564 			 uint32_t subport, uint32_t pipe,
1565 			 uint32_t traffic_class,
1566 			 uint32_t queue, enum rte_color color)
1567 {
1568 	uint32_t queue_id =
1569 		rte_sched_port_qindex(port, subport, pipe, traffic_class, queue);
1570 
1571 	rte_mbuf_sched_set(pkt, queue_id, traffic_class, (uint8_t)color);
1572 }
1573 
1574 void
1575 rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
1576 				  const struct rte_mbuf *pkt,
1577 				  uint32_t *subport, uint32_t *pipe,
1578 				  uint32_t *traffic_class, uint32_t *queue)
1579 {
1580 	uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1581 
1582 	*subport = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1583 	*pipe = (queue_id >> 4) &
1584 		(port->subports[*subport]->n_pipes_per_subport_enabled - 1);
1585 	*traffic_class = rte_sched_port_pipe_tc(port, queue_id);
1586 	*queue = rte_sched_port_tc_queue(port, queue_id);
1587 }
1588 
1589 enum rte_color
1590 rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt)
1591 {
1592 	return (enum rte_color)rte_mbuf_sched_color_get(pkt);
1593 }
1594 
1595 int
1596 rte_sched_subport_read_stats(struct rte_sched_port *port,
1597 			     uint32_t subport_id,
1598 			     struct rte_sched_subport_stats *stats,
1599 			     uint32_t *tc_ov)
1600 {
1601 	struct rte_sched_subport *s;
1602 
1603 	/* Check user parameters */
1604 	if (port == NULL) {
1605 		RTE_LOG(ERR, SCHED,
1606 			"%s: Incorrect value for parameter port\n", __func__);
1607 		return -EINVAL;
1608 	}
1609 
1610 	if (subport_id >= port->n_subports_per_port) {
1611 		RTE_LOG(ERR, SCHED,
1612 			"%s: Incorrect value for subport id\n", __func__);
1613 		return -EINVAL;
1614 	}
1615 
1616 	if (stats == NULL) {
1617 		RTE_LOG(ERR, SCHED,
1618 			"%s: Incorrect value for parameter stats\n", __func__);
1619 		return -EINVAL;
1620 	}
1621 
1622 	if (tc_ov == NULL) {
1623 		RTE_LOG(ERR, SCHED,
1624 			"%s: Incorrect value for tc_ov\n", __func__);
1625 		return -EINVAL;
1626 	}
1627 
1628 	s = port->subports[subport_id];
1629 
1630 	/* Copy subport stats and clear */
1631 	memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
1632 	memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
1633 
1634 	/* Subport TC oversubscription status */
1635 	*tc_ov = s->tc_ov;
1636 
1637 	return 0;
1638 }
1639 
1640 int
1641 rte_sched_queue_read_stats(struct rte_sched_port *port,
1642 	uint32_t queue_id,
1643 	struct rte_sched_queue_stats *stats,
1644 	uint16_t *qlen)
1645 {
1646 	struct rte_sched_subport *s;
1647 	struct rte_sched_queue *q;
1648 	struct rte_sched_queue_extra *qe;
1649 	uint32_t subport_id, subport_qmask, subport_qindex;
1650 
1651 	/* Check user parameters */
1652 	if (port == NULL) {
1653 		RTE_LOG(ERR, SCHED,
1654 			"%s: Incorrect value for parameter port\n", __func__);
1655 		return -EINVAL;
1656 	}
1657 
1658 	if (queue_id >= rte_sched_port_queues_per_port(port)) {
1659 		RTE_LOG(ERR, SCHED,
1660 			"%s: Incorrect value for queue id\n", __func__);
1661 		return -EINVAL;
1662 	}
1663 
1664 	if (stats == NULL) {
1665 		RTE_LOG(ERR, SCHED,
1666 			"%s: Incorrect value for parameter stats\n", __func__);
1667 		return -EINVAL;
1668 	}
1669 
1670 	if (qlen == NULL) {
1671 		RTE_LOG(ERR, SCHED,
1672 			"%s: Incorrect value for parameter qlen\n", __func__);
1673 		return -EINVAL;
1674 	}
1675 	subport_qmask = port->n_pipes_per_subport_log2 + 4;
1676 	subport_id = (queue_id >> subport_qmask) & (port->n_subports_per_port - 1);
1677 
1678 	s = port->subports[subport_id];
1679 	subport_qindex = ((1 << subport_qmask) - 1) & queue_id;
1680 	q = s->queue + subport_qindex;
1681 	qe = s->queue_extra + subport_qindex;
1682 
1683 	/* Copy queue stats and clear */
1684 	memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats));
1685 	memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats));
1686 
1687 	/* Queue length */
1688 	*qlen = q->qw - q->qr;
1689 
1690 	return 0;
1691 }
1692 
1693 #ifdef RTE_SCHED_DEBUG
1694 
1695 static inline int
1696 rte_sched_port_queue_is_empty(struct rte_sched_subport *subport,
1697 	uint32_t qindex)
1698 {
1699 	struct rte_sched_queue *queue = subport->queue + qindex;
1700 
1701 	return queue->qr == queue->qw;
1702 }
1703 
1704 #endif /* RTE_SCHED_DEBUG */
1705 
1706 #ifdef RTE_SCHED_COLLECT_STATS
1707 
1708 static inline void
1709 rte_sched_port_update_subport_stats(struct rte_sched_port *port,
1710 	struct rte_sched_subport *subport,
1711 	uint32_t qindex,
1712 	struct rte_mbuf *pkt)
1713 {
1714 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1715 	uint32_t pkt_len = pkt->pkt_len;
1716 
1717 	subport->stats.n_pkts_tc[tc_index] += 1;
1718 	subport->stats.n_bytes_tc[tc_index] += pkt_len;
1719 }
1720 
1721 #ifdef RTE_SCHED_RED
1722 static inline void
1723 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1724 	struct rte_sched_subport *subport,
1725 	uint32_t qindex,
1726 	struct rte_mbuf *pkt,
1727 	uint32_t red)
1728 #else
1729 static inline void
1730 rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port,
1731 	struct rte_sched_subport *subport,
1732 	uint32_t qindex,
1733 	struct rte_mbuf *pkt,
1734 	__rte_unused uint32_t red)
1735 #endif
1736 {
1737 	uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex);
1738 	uint32_t pkt_len = pkt->pkt_len;
1739 
1740 	subport->stats.n_pkts_tc_dropped[tc_index] += 1;
1741 	subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len;
1742 #ifdef RTE_SCHED_RED
1743 	subport->stats.n_pkts_red_dropped[tc_index] += red;
1744 #endif
1745 }
1746 
1747 static inline void
1748 rte_sched_port_update_queue_stats(struct rte_sched_subport *subport,
1749 	uint32_t qindex,
1750 	struct rte_mbuf *pkt)
1751 {
1752 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1753 	uint32_t pkt_len = pkt->pkt_len;
1754 
1755 	qe->stats.n_pkts += 1;
1756 	qe->stats.n_bytes += pkt_len;
1757 }
1758 
1759 #ifdef RTE_SCHED_RED
1760 static inline void
1761 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1762 	uint32_t qindex,
1763 	struct rte_mbuf *pkt,
1764 	uint32_t red)
1765 #else
1766 static inline void
1767 rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport *subport,
1768 	uint32_t qindex,
1769 	struct rte_mbuf *pkt,
1770 	__rte_unused uint32_t red)
1771 #endif
1772 {
1773 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1774 	uint32_t pkt_len = pkt->pkt_len;
1775 
1776 	qe->stats.n_pkts_dropped += 1;
1777 	qe->stats.n_bytes_dropped += pkt_len;
1778 #ifdef RTE_SCHED_RED
1779 	qe->stats.n_pkts_red_dropped += red;
1780 #endif
1781 }
1782 
1783 #endif /* RTE_SCHED_COLLECT_STATS */
1784 
1785 #ifdef RTE_SCHED_RED
1786 
1787 static inline int
1788 rte_sched_port_red_drop(struct rte_sched_port *port,
1789 	struct rte_sched_subport *subport,
1790 	struct rte_mbuf *pkt,
1791 	uint32_t qindex,
1792 	uint16_t qlen)
1793 {
1794 	struct rte_sched_queue_extra *qe;
1795 	struct rte_red_config *red_cfg;
1796 	struct rte_red *red;
1797 	uint32_t tc_index;
1798 	enum rte_color color;
1799 
1800 	tc_index = rte_sched_port_pipe_tc(port, qindex);
1801 	color = rte_sched_port_pkt_read_color(pkt);
1802 	red_cfg = &subport->red_config[tc_index][color];
1803 
1804 	if ((red_cfg->min_th | red_cfg->max_th) == 0)
1805 		return 0;
1806 
1807 	qe = subport->queue_extra + qindex;
1808 	red = &qe->red;
1809 
1810 	return rte_red_enqueue(red_cfg, red, qlen, port->time);
1811 }
1812 
1813 static inline void
1814 rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port,
1815 	struct rte_sched_subport *subport, uint32_t qindex)
1816 {
1817 	struct rte_sched_queue_extra *qe = subport->queue_extra + qindex;
1818 	struct rte_red *red = &qe->red;
1819 
1820 	rte_red_mark_queue_empty(red, port->time);
1821 }
1822 
1823 #else
1824 
1825 static inline int rte_sched_port_red_drop(struct rte_sched_port *port __rte_unused,
1826 	struct rte_sched_subport *subport __rte_unused,
1827 	struct rte_mbuf *pkt __rte_unused,
1828 	uint32_t qindex __rte_unused,
1829 	uint16_t qlen __rte_unused)
1830 {
1831 	return 0;
1832 }
1833 
1834 #define rte_sched_port_set_queue_empty_timestamp(port, subport, qindex)
1835 
1836 #endif /* RTE_SCHED_RED */
1837 
1838 #ifdef RTE_SCHED_DEBUG
1839 
1840 static inline void
1841 debug_check_queue_slab(struct rte_sched_subport *subport, uint32_t bmp_pos,
1842 		       uint64_t bmp_slab)
1843 {
1844 	uint64_t mask;
1845 	uint32_t i, panic;
1846 
1847 	if (bmp_slab == 0)
1848 		rte_panic("Empty slab at position %u\n", bmp_pos);
1849 
1850 	panic = 0;
1851 	for (i = 0, mask = 1; i < 64; i++, mask <<= 1) {
1852 		if (mask & bmp_slab) {
1853 			if (rte_sched_port_queue_is_empty(subport, bmp_pos + i)) {
1854 				printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i);
1855 				panic = 1;
1856 			}
1857 		}
1858 	}
1859 
1860 	if (panic)
1861 		rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n",
1862 			bmp_slab, bmp_pos);
1863 }
1864 
1865 #endif /* RTE_SCHED_DEBUG */
1866 
1867 static inline struct rte_sched_subport *
1868 rte_sched_port_subport(struct rte_sched_port *port,
1869 	struct rte_mbuf *pkt)
1870 {
1871 	uint32_t queue_id = rte_mbuf_sched_queue_get(pkt);
1872 	uint32_t subport_id = queue_id >> (port->n_pipes_per_subport_log2 + 4);
1873 
1874 	return port->subports[subport_id];
1875 }
1876 
1877 static inline uint32_t
1878 rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_subport *subport,
1879 	struct rte_mbuf *pkt, uint32_t subport_qmask)
1880 {
1881 	struct rte_sched_queue *q;
1882 #ifdef RTE_SCHED_COLLECT_STATS
1883 	struct rte_sched_queue_extra *qe;
1884 #endif
1885 	uint32_t qindex = rte_mbuf_sched_queue_get(pkt);
1886 	uint32_t subport_queue_id = subport_qmask & qindex;
1887 
1888 	q = subport->queue + subport_queue_id;
1889 	rte_prefetch0(q);
1890 #ifdef RTE_SCHED_COLLECT_STATS
1891 	qe = subport->queue_extra + subport_queue_id;
1892 	rte_prefetch0(qe);
1893 #endif
1894 
1895 	return subport_queue_id;
1896 }
1897 
1898 static inline void
1899 rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port,
1900 	struct rte_sched_subport *subport,
1901 	uint32_t qindex,
1902 	struct rte_mbuf **qbase)
1903 {
1904 	struct rte_sched_queue *q;
1905 	struct rte_mbuf **q_qw;
1906 	uint16_t qsize;
1907 
1908 	q = subport->queue + qindex;
1909 	qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1910 	q_qw = qbase + (q->qw & (qsize - 1));
1911 
1912 	rte_prefetch0(q_qw);
1913 	rte_bitmap_prefetch0(subport->bmp, qindex);
1914 }
1915 
1916 static inline int
1917 rte_sched_port_enqueue_qwa(struct rte_sched_port *port,
1918 	struct rte_sched_subport *subport,
1919 	uint32_t qindex,
1920 	struct rte_mbuf **qbase,
1921 	struct rte_mbuf *pkt)
1922 {
1923 	struct rte_sched_queue *q;
1924 	uint16_t qsize;
1925 	uint16_t qlen;
1926 
1927 	q = subport->queue + qindex;
1928 	qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
1929 	qlen = q->qw - q->qr;
1930 
1931 	/* Drop the packet (and update drop stats) when queue is full */
1932 	if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, qlen) ||
1933 		     (qlen >= qsize))) {
1934 		rte_pktmbuf_free(pkt);
1935 #ifdef RTE_SCHED_COLLECT_STATS
1936 		rte_sched_port_update_subport_stats_on_drop(port, subport,
1937 			qindex, pkt, qlen < qsize);
1938 		rte_sched_port_update_queue_stats_on_drop(subport, qindex, pkt,
1939 			qlen < qsize);
1940 #endif
1941 		return 0;
1942 	}
1943 
1944 	/* Enqueue packet */
1945 	qbase[q->qw & (qsize - 1)] = pkt;
1946 	q->qw++;
1947 
1948 	/* Activate queue in the subport bitmap */
1949 	rte_bitmap_set(subport->bmp, qindex);
1950 
1951 	/* Statistics */
1952 #ifdef RTE_SCHED_COLLECT_STATS
1953 	rte_sched_port_update_subport_stats(port, subport, qindex, pkt);
1954 	rte_sched_port_update_queue_stats(subport, qindex, pkt);
1955 #endif
1956 
1957 	return 1;
1958 }
1959 
1960 
1961 /*
1962  * The enqueue function implements a 4-level pipeline with each stage
1963  * processing two different packets. The purpose of using a pipeline
1964  * is to hide the latency of prefetching the data structures. The
1965  * naming convention is presented in the diagram below:
1966  *
1967  *   p00  _______   p10  _______   p20  _______   p30  _______
1968  * ----->|       |----->|       |----->|       |----->|       |----->
1969  *       |   0   |      |   1   |      |   2   |      |   3   |
1970  * ----->|_______|----->|_______|----->|_______|----->|_______|----->
1971  *   p01            p11            p21            p31
1972  *
1973  */
1974 int
1975 rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts,
1976 		       uint32_t n_pkts)
1977 {
1978 	struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21,
1979 		*pkt30, *pkt31, *pkt_last;
1980 	struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base,
1981 		**q20_base, **q21_base, **q30_base, **q31_base, **q_last_base;
1982 	struct rte_sched_subport *subport00, *subport01, *subport10, *subport11,
1983 		*subport20, *subport21, *subport30, *subport31, *subport_last;
1984 	uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last;
1985 	uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last;
1986 	uint32_t subport_qmask;
1987 	uint32_t result, i;
1988 
1989 	result = 0;
1990 	subport_qmask = (1 << (port->n_pipes_per_subport_log2 + 4)) - 1;
1991 
1992 	/*
1993 	 * Less then 6 input packets available, which is not enough to
1994 	 * feed the pipeline
1995 	 */
1996 	if (unlikely(n_pkts < 6)) {
1997 		struct rte_sched_subport *subports[5];
1998 		struct rte_mbuf **q_base[5];
1999 		uint32_t q[5];
2000 
2001 		/* Prefetch the mbuf structure of each packet */
2002 		for (i = 0; i < n_pkts; i++)
2003 			rte_prefetch0(pkts[i]);
2004 
2005 		/* Prefetch the subport structure for each packet */
2006 		for (i = 0; i < n_pkts; i++)
2007 			subports[i] = rte_sched_port_subport(port, pkts[i]);
2008 
2009 		/* Prefetch the queue structure for each queue */
2010 		for (i = 0; i < n_pkts; i++)
2011 			q[i] = rte_sched_port_enqueue_qptrs_prefetch0(subports[i],
2012 					pkts[i], subport_qmask);
2013 
2014 		/* Prefetch the write pointer location of each queue */
2015 		for (i = 0; i < n_pkts; i++) {
2016 			q_base[i] = rte_sched_subport_pipe_qbase(subports[i], q[i]);
2017 			rte_sched_port_enqueue_qwa_prefetch0(port, subports[i],
2018 				q[i], q_base[i]);
2019 		}
2020 
2021 		/* Write each packet to its queue */
2022 		for (i = 0; i < n_pkts; i++)
2023 			result += rte_sched_port_enqueue_qwa(port, subports[i],
2024 						q[i], q_base[i], pkts[i]);
2025 
2026 		return result;
2027 	}
2028 
2029 	/* Feed the first 3 stages of the pipeline (6 packets needed) */
2030 	pkt20 = pkts[0];
2031 	pkt21 = pkts[1];
2032 	rte_prefetch0(pkt20);
2033 	rte_prefetch0(pkt21);
2034 
2035 	pkt10 = pkts[2];
2036 	pkt11 = pkts[3];
2037 	rte_prefetch0(pkt10);
2038 	rte_prefetch0(pkt11);
2039 
2040 	subport20 = rte_sched_port_subport(port, pkt20);
2041 	subport21 = rte_sched_port_subport(port, pkt21);
2042 	q20 = rte_sched_port_enqueue_qptrs_prefetch0(subport20,
2043 			pkt20, subport_qmask);
2044 	q21 = rte_sched_port_enqueue_qptrs_prefetch0(subport21,
2045 			pkt21, subport_qmask);
2046 
2047 	pkt00 = pkts[4];
2048 	pkt01 = pkts[5];
2049 	rte_prefetch0(pkt00);
2050 	rte_prefetch0(pkt01);
2051 
2052 	subport10 = rte_sched_port_subport(port, pkt10);
2053 	subport11 = rte_sched_port_subport(port, pkt11);
2054 	q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2055 			pkt10, subport_qmask);
2056 	q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2057 			pkt11, subport_qmask);
2058 
2059 	q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2060 	q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2061 	rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2062 	rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2063 
2064 	/* Run the pipeline */
2065 	for (i = 6; i < (n_pkts & (~1)); i += 2) {
2066 		/* Propagate stage inputs */
2067 		pkt30 = pkt20;
2068 		pkt31 = pkt21;
2069 		pkt20 = pkt10;
2070 		pkt21 = pkt11;
2071 		pkt10 = pkt00;
2072 		pkt11 = pkt01;
2073 		q30 = q20;
2074 		q31 = q21;
2075 		q20 = q10;
2076 		q21 = q11;
2077 		subport30 = subport20;
2078 		subport31 = subport21;
2079 		subport20 = subport10;
2080 		subport21 = subport11;
2081 		q30_base = q20_base;
2082 		q31_base = q21_base;
2083 
2084 		/* Stage 0: Get packets in */
2085 		pkt00 = pkts[i];
2086 		pkt01 = pkts[i + 1];
2087 		rte_prefetch0(pkt00);
2088 		rte_prefetch0(pkt01);
2089 
2090 		/* Stage 1: Prefetch subport and queue structure storing queue pointers */
2091 		subport10 = rte_sched_port_subport(port, pkt10);
2092 		subport11 = rte_sched_port_subport(port, pkt11);
2093 		q10 = rte_sched_port_enqueue_qptrs_prefetch0(subport10,
2094 				pkt10, subport_qmask);
2095 		q11 = rte_sched_port_enqueue_qptrs_prefetch0(subport11,
2096 				pkt11, subport_qmask);
2097 
2098 		/* Stage 2: Prefetch queue write location */
2099 		q20_base = rte_sched_subport_pipe_qbase(subport20, q20);
2100 		q21_base = rte_sched_subport_pipe_qbase(subport21, q21);
2101 		rte_sched_port_enqueue_qwa_prefetch0(port, subport20, q20, q20_base);
2102 		rte_sched_port_enqueue_qwa_prefetch0(port, subport21, q21, q21_base);
2103 
2104 		/* Stage 3: Write packet to queue and activate queue */
2105 		r30 = rte_sched_port_enqueue_qwa(port, subport30,
2106 				q30, q30_base, pkt30);
2107 		r31 = rte_sched_port_enqueue_qwa(port, subport31,
2108 				q31, q31_base, pkt31);
2109 		result += r30 + r31;
2110 	}
2111 
2112 	/*
2113 	 * Drain the pipeline (exactly 6 packets).
2114 	 * Handle the last packet in the case
2115 	 * of an odd number of input packets.
2116 	 */
2117 	pkt_last = pkts[n_pkts - 1];
2118 	rte_prefetch0(pkt_last);
2119 
2120 	subport00 = rte_sched_port_subport(port, pkt00);
2121 	subport01 = rte_sched_port_subport(port, pkt01);
2122 	q00 = rte_sched_port_enqueue_qptrs_prefetch0(subport00,
2123 			pkt00, subport_qmask);
2124 	q01 = rte_sched_port_enqueue_qptrs_prefetch0(subport01,
2125 			pkt01, subport_qmask);
2126 
2127 	q10_base = rte_sched_subport_pipe_qbase(subport10, q10);
2128 	q11_base = rte_sched_subport_pipe_qbase(subport11, q11);
2129 	rte_sched_port_enqueue_qwa_prefetch0(port, subport10, q10, q10_base);
2130 	rte_sched_port_enqueue_qwa_prefetch0(port, subport11, q11, q11_base);
2131 
2132 	r20 = rte_sched_port_enqueue_qwa(port, subport20,
2133 			q20, q20_base, pkt20);
2134 	r21 = rte_sched_port_enqueue_qwa(port, subport21,
2135 			q21, q21_base, pkt21);
2136 	result += r20 + r21;
2137 
2138 	subport_last = rte_sched_port_subport(port, pkt_last);
2139 	q_last = rte_sched_port_enqueue_qptrs_prefetch0(subport_last,
2140 				pkt_last, subport_qmask);
2141 
2142 	q00_base = rte_sched_subport_pipe_qbase(subport00, q00);
2143 	q01_base = rte_sched_subport_pipe_qbase(subport01, q01);
2144 	rte_sched_port_enqueue_qwa_prefetch0(port, subport00, q00, q00_base);
2145 	rte_sched_port_enqueue_qwa_prefetch0(port, subport01, q01, q01_base);
2146 
2147 	r10 = rte_sched_port_enqueue_qwa(port, subport10, q10,
2148 			q10_base, pkt10);
2149 	r11 = rte_sched_port_enqueue_qwa(port, subport11, q11,
2150 			q11_base, pkt11);
2151 	result += r10 + r11;
2152 
2153 	q_last_base = rte_sched_subport_pipe_qbase(subport_last, q_last);
2154 	rte_sched_port_enqueue_qwa_prefetch0(port, subport_last,
2155 		q_last, q_last_base);
2156 
2157 	r00 = rte_sched_port_enqueue_qwa(port, subport00, q00,
2158 			q00_base, pkt00);
2159 	r01 = rte_sched_port_enqueue_qwa(port, subport01, q01,
2160 			q01_base, pkt01);
2161 	result += r00 + r01;
2162 
2163 	if (n_pkts & 1) {
2164 		r_last = rte_sched_port_enqueue_qwa(port, subport_last,
2165 					q_last,	q_last_base, pkt_last);
2166 		result += r_last;
2167 	}
2168 
2169 	return result;
2170 }
2171 
2172 #ifndef RTE_SCHED_SUBPORT_TC_OV
2173 
2174 static inline void
2175 grinder_credits_update(struct rte_sched_port *port,
2176 	struct rte_sched_subport *subport, uint32_t pos)
2177 {
2178 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2179 	struct rte_sched_pipe *pipe = grinder->pipe;
2180 	struct rte_sched_pipe_profile *params = grinder->pipe_params;
2181 	struct rte_sched_subport_profile *sp = grinder->subport_params;
2182 	uint64_t n_periods;
2183 	uint32_t i;
2184 
2185 	/* Subport TB */
2186 	n_periods = (port->time - subport->tb_time) / sp->tb_period;
2187 	subport->tb_credits += n_periods * sp->tb_credits_per_period;
2188 	subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2189 	subport->tb_time += n_periods * sp->tb_period;
2190 
2191 	/* Pipe TB */
2192 	n_periods = (port->time - pipe->tb_time) / params->tb_period;
2193 	pipe->tb_credits += n_periods * params->tb_credits_per_period;
2194 	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2195 	pipe->tb_time += n_periods * params->tb_period;
2196 
2197 	/* Subport TCs */
2198 	if (unlikely(port->time >= subport->tc_time)) {
2199 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2200 			subport->tc_credits[i] = sp->tc_credits_per_period[i];
2201 
2202 		subport->tc_time = port->time + sp->tc_period;
2203 	}
2204 
2205 	/* Pipe TCs */
2206 	if (unlikely(port->time >= pipe->tc_time)) {
2207 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2208 			pipe->tc_credits[i] = params->tc_credits_per_period[i];
2209 
2210 		pipe->tc_time = port->time + params->tc_period;
2211 	}
2212 }
2213 
2214 #else
2215 
2216 static inline uint64_t
2217 grinder_tc_ov_credits_update(struct rte_sched_port *port,
2218 	struct rte_sched_subport *subport, uint32_t pos)
2219 {
2220 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2221 	struct rte_sched_subport_profile *sp = grinder->subport_params;
2222 	uint64_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2223 	uint64_t tc_consumption = 0, tc_ov_consumption_max;
2224 	uint64_t tc_ov_wm = subport->tc_ov_wm;
2225 	uint32_t i;
2226 
2227 	if (subport->tc_ov == 0)
2228 		return subport->tc_ov_wm_max;
2229 
2230 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2231 		tc_ov_consumption[i] = sp->tc_credits_per_period[i]
2232 					-  subport->tc_credits[i];
2233 		tc_consumption += tc_ov_consumption[i];
2234 	}
2235 
2236 	tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] =
2237 	sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2238 		subport->tc_credits[RTE_SCHED_TRAFFIC_CLASS_BE];
2239 
2240 	tc_ov_consumption_max =
2241 	sp->tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASS_BE] -
2242 			tc_consumption;
2243 
2244 	if (tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASS_BE] >
2245 		(tc_ov_consumption_max - port->mtu)) {
2246 		tc_ov_wm  -= tc_ov_wm >> 7;
2247 		if (tc_ov_wm < subport->tc_ov_wm_min)
2248 			tc_ov_wm = subport->tc_ov_wm_min;
2249 
2250 		return tc_ov_wm;
2251 	}
2252 
2253 	tc_ov_wm += (tc_ov_wm >> 7) + 1;
2254 	if (tc_ov_wm > subport->tc_ov_wm_max)
2255 		tc_ov_wm = subport->tc_ov_wm_max;
2256 
2257 	return tc_ov_wm;
2258 }
2259 
2260 static inline void
2261 grinder_credits_update(struct rte_sched_port *port,
2262 	struct rte_sched_subport *subport, uint32_t pos)
2263 {
2264 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2265 	struct rte_sched_pipe *pipe = grinder->pipe;
2266 	struct rte_sched_pipe_profile *params = grinder->pipe_params;
2267 	struct rte_sched_subport_profile *sp = grinder->subport_params;
2268 	uint64_t n_periods;
2269 	uint32_t i;
2270 
2271 	/* Subport TB */
2272 	n_periods = (port->time - subport->tb_time) / sp->tb_period;
2273 	subport->tb_credits += n_periods * sp->tb_credits_per_period;
2274 	subport->tb_credits = RTE_MIN(subport->tb_credits, sp->tb_size);
2275 	subport->tb_time += n_periods * sp->tb_period;
2276 
2277 	/* Pipe TB */
2278 	n_periods = (port->time - pipe->tb_time) / params->tb_period;
2279 	pipe->tb_credits += n_periods * params->tb_credits_per_period;
2280 	pipe->tb_credits = RTE_MIN(pipe->tb_credits, params->tb_size);
2281 	pipe->tb_time += n_periods * params->tb_period;
2282 
2283 	/* Subport TCs */
2284 	if (unlikely(port->time >= subport->tc_time)) {
2285 		subport->tc_ov_wm =
2286 			grinder_tc_ov_credits_update(port, subport, pos);
2287 
2288 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2289 			subport->tc_credits[i] = sp->tc_credits_per_period[i];
2290 
2291 		subport->tc_time = port->time + sp->tc_period;
2292 		subport->tc_ov_period_id++;
2293 	}
2294 
2295 	/* Pipe TCs */
2296 	if (unlikely(port->time >= pipe->tc_time)) {
2297 		for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2298 			pipe->tc_credits[i] = params->tc_credits_per_period[i];
2299 		pipe->tc_time = port->time + params->tc_period;
2300 	}
2301 
2302 	/* Pipe TCs - Oversubscription */
2303 	if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) {
2304 		pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight;
2305 
2306 		pipe->tc_ov_period_id = subport->tc_ov_period_id;
2307 	}
2308 }
2309 
2310 #endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */
2311 
2312 
2313 #ifndef RTE_SCHED_SUBPORT_TC_OV
2314 
2315 static inline int
2316 grinder_credits_check(struct rte_sched_port *port,
2317 	struct rte_sched_subport *subport, uint32_t pos)
2318 {
2319 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2320 	struct rte_sched_pipe *pipe = grinder->pipe;
2321 	struct rte_mbuf *pkt = grinder->pkt;
2322 	uint32_t tc_index = grinder->tc_index;
2323 	uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2324 	uint64_t subport_tb_credits = subport->tb_credits;
2325 	uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2326 	uint64_t pipe_tb_credits = pipe->tb_credits;
2327 	uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2328 	int enough_credits;
2329 
2330 	/* Check queue credits */
2331 	enough_credits = (pkt_len <= subport_tb_credits) &&
2332 		(pkt_len <= subport_tc_credits) &&
2333 		(pkt_len <= pipe_tb_credits) &&
2334 		(pkt_len <= pipe_tc_credits);
2335 
2336 	if (!enough_credits)
2337 		return 0;
2338 
2339 	/* Update port credits */
2340 	subport->tb_credits -= pkt_len;
2341 	subport->tc_credits[tc_index] -= pkt_len;
2342 	pipe->tb_credits -= pkt_len;
2343 	pipe->tc_credits[tc_index] -= pkt_len;
2344 
2345 	return 1;
2346 }
2347 
2348 #else
2349 
2350 static inline int
2351 grinder_credits_check(struct rte_sched_port *port,
2352 	struct rte_sched_subport *subport, uint32_t pos)
2353 {
2354 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2355 	struct rte_sched_pipe *pipe = grinder->pipe;
2356 	struct rte_mbuf *pkt = grinder->pkt;
2357 	uint32_t tc_index = grinder->tc_index;
2358 	uint64_t pkt_len = pkt->pkt_len + port->frame_overhead;
2359 	uint64_t subport_tb_credits = subport->tb_credits;
2360 	uint64_t subport_tc_credits = subport->tc_credits[tc_index];
2361 	uint64_t pipe_tb_credits = pipe->tb_credits;
2362 	uint64_t pipe_tc_credits = pipe->tc_credits[tc_index];
2363 	uint64_t pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
2364 	uint64_t pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] = {0};
2365 	uint64_t pipe_tc_ov_credits;
2366 	uint32_t i;
2367 	int enough_credits;
2368 
2369 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
2370 		pipe_tc_ov_mask1[i] = ~0LLU;
2371 
2372 	pipe_tc_ov_mask1[RTE_SCHED_TRAFFIC_CLASS_BE] = pipe->tc_ov_credits;
2373 	pipe_tc_ov_mask2[RTE_SCHED_TRAFFIC_CLASS_BE] = ~0LLU;
2374 	pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index];
2375 
2376 	/* Check pipe and subport credits */
2377 	enough_credits = (pkt_len <= subport_tb_credits) &&
2378 		(pkt_len <= subport_tc_credits) &&
2379 		(pkt_len <= pipe_tb_credits) &&
2380 		(pkt_len <= pipe_tc_credits) &&
2381 		(pkt_len <= pipe_tc_ov_credits);
2382 
2383 	if (!enough_credits)
2384 		return 0;
2385 
2386 	/* Update pipe and subport credits */
2387 	subport->tb_credits -= pkt_len;
2388 	subport->tc_credits[tc_index] -= pkt_len;
2389 	pipe->tb_credits -= pkt_len;
2390 	pipe->tc_credits[tc_index] -= pkt_len;
2391 	pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len;
2392 
2393 	return 1;
2394 }
2395 
2396 #endif /* RTE_SCHED_SUBPORT_TC_OV */
2397 
2398 
2399 static inline int
2400 grinder_schedule(struct rte_sched_port *port,
2401 	struct rte_sched_subport *subport, uint32_t pos)
2402 {
2403 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2404 	struct rte_sched_queue *queue = grinder->queue[grinder->qpos];
2405 	struct rte_mbuf *pkt = grinder->pkt;
2406 	uint32_t pkt_len = pkt->pkt_len + port->frame_overhead;
2407 	uint32_t be_tc_active;
2408 
2409 	if (!grinder_credits_check(port, subport, pos))
2410 		return 0;
2411 
2412 	/* Advance port time */
2413 	port->time += pkt_len;
2414 
2415 	/* Send packet */
2416 	port->pkts_out[port->n_pkts_out++] = pkt;
2417 	queue->qr++;
2418 
2419 	be_tc_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE) ? ~0x0 : 0x0;
2420 	grinder->wrr_tokens[grinder->qpos] +=
2421 		(pkt_len * grinder->wrr_cost[grinder->qpos]) & be_tc_active;
2422 
2423 	if (queue->qr == queue->qw) {
2424 		uint32_t qindex = grinder->qindex[grinder->qpos];
2425 
2426 		rte_bitmap_clear(subport->bmp, qindex);
2427 		grinder->qmask &= ~(1 << grinder->qpos);
2428 		if (be_tc_active)
2429 			grinder->wrr_mask[grinder->qpos] = 0;
2430 		rte_sched_port_set_queue_empty_timestamp(port, subport, qindex);
2431 	}
2432 
2433 	/* Reset pipe loop detection */
2434 	subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2435 	grinder->productive = 1;
2436 
2437 	return 1;
2438 }
2439 
2440 #ifdef SCHED_VECTOR_SSE4
2441 
2442 static inline int
2443 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2444 {
2445 	__m128i index = _mm_set1_epi32(base_pipe);
2446 	__m128i pipes = _mm_load_si128((__m128i *)subport->grinder_base_bmp_pos);
2447 	__m128i res = _mm_cmpeq_epi32(pipes, index);
2448 
2449 	pipes = _mm_load_si128((__m128i *)(subport->grinder_base_bmp_pos + 4));
2450 	pipes = _mm_cmpeq_epi32(pipes, index);
2451 	res = _mm_or_si128(res, pipes);
2452 
2453 	if (_mm_testz_si128(res, res))
2454 		return 0;
2455 
2456 	return 1;
2457 }
2458 
2459 #elif defined(SCHED_VECTOR_NEON)
2460 
2461 static inline int
2462 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2463 {
2464 	uint32x4_t index, pipes;
2465 	uint32_t *pos = (uint32_t *)subport->grinder_base_bmp_pos;
2466 
2467 	index = vmovq_n_u32(base_pipe);
2468 	pipes = vld1q_u32(pos);
2469 	if (!vminvq_u32(veorq_u32(pipes, index)))
2470 		return 1;
2471 
2472 	pipes = vld1q_u32(pos + 4);
2473 	if (!vminvq_u32(veorq_u32(pipes, index)))
2474 		return 1;
2475 
2476 	return 0;
2477 }
2478 
2479 #else
2480 
2481 static inline int
2482 grinder_pipe_exists(struct rte_sched_subport *subport, uint32_t base_pipe)
2483 {
2484 	uint32_t i;
2485 
2486 	for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) {
2487 		if (subport->grinder_base_bmp_pos[i] == base_pipe)
2488 			return 1;
2489 	}
2490 
2491 	return 0;
2492 }
2493 
2494 #endif /* RTE_SCHED_OPTIMIZATIONS */
2495 
2496 static inline void
2497 grinder_pcache_populate(struct rte_sched_subport *subport,
2498 	uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab)
2499 {
2500 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2501 	uint16_t w[4];
2502 
2503 	grinder->pcache_w = 0;
2504 	grinder->pcache_r = 0;
2505 
2506 	w[0] = (uint16_t) bmp_slab;
2507 	w[1] = (uint16_t) (bmp_slab >> 16);
2508 	w[2] = (uint16_t) (bmp_slab >> 32);
2509 	w[3] = (uint16_t) (bmp_slab >> 48);
2510 
2511 	grinder->pcache_qmask[grinder->pcache_w] = w[0];
2512 	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos;
2513 	grinder->pcache_w += (w[0] != 0);
2514 
2515 	grinder->pcache_qmask[grinder->pcache_w] = w[1];
2516 	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16;
2517 	grinder->pcache_w += (w[1] != 0);
2518 
2519 	grinder->pcache_qmask[grinder->pcache_w] = w[2];
2520 	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32;
2521 	grinder->pcache_w += (w[2] != 0);
2522 
2523 	grinder->pcache_qmask[grinder->pcache_w] = w[3];
2524 	grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48;
2525 	grinder->pcache_w += (w[3] != 0);
2526 }
2527 
2528 static inline void
2529 grinder_tccache_populate(struct rte_sched_subport *subport,
2530 	uint32_t pos, uint32_t qindex, uint16_t qmask)
2531 {
2532 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2533 	uint8_t b, i;
2534 
2535 	grinder->tccache_w = 0;
2536 	grinder->tccache_r = 0;
2537 
2538 	for (i = 0; i < RTE_SCHED_TRAFFIC_CLASS_BE; i++) {
2539 		b = (uint8_t) ((qmask >> i) & 0x1);
2540 		grinder->tccache_qmask[grinder->tccache_w] = b;
2541 		grinder->tccache_qindex[grinder->tccache_w] = qindex + i;
2542 		grinder->tccache_w += (b != 0);
2543 	}
2544 
2545 	b = (uint8_t) (qmask >> (RTE_SCHED_TRAFFIC_CLASS_BE));
2546 	grinder->tccache_qmask[grinder->tccache_w] = b;
2547 	grinder->tccache_qindex[grinder->tccache_w] = qindex +
2548 		RTE_SCHED_TRAFFIC_CLASS_BE;
2549 	grinder->tccache_w += (b != 0);
2550 }
2551 
2552 static inline int
2553 grinder_next_tc(struct rte_sched_port *port,
2554 	struct rte_sched_subport *subport, uint32_t pos)
2555 {
2556 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2557 	struct rte_mbuf **qbase;
2558 	uint32_t qindex;
2559 	uint16_t qsize;
2560 
2561 	if (grinder->tccache_r == grinder->tccache_w)
2562 		return 0;
2563 
2564 	qindex = grinder->tccache_qindex[grinder->tccache_r];
2565 	qbase = rte_sched_subport_pipe_qbase(subport, qindex);
2566 	qsize = rte_sched_subport_pipe_qsize(port, subport, qindex);
2567 
2568 	grinder->tc_index = rte_sched_port_pipe_tc(port, qindex);
2569 	grinder->qmask = grinder->tccache_qmask[grinder->tccache_r];
2570 	grinder->qsize = qsize;
2571 
2572 	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2573 		grinder->queue[0] = subport->queue + qindex;
2574 		grinder->qbase[0] = qbase;
2575 		grinder->qindex[0] = qindex;
2576 		grinder->tccache_r++;
2577 
2578 		return 1;
2579 	}
2580 
2581 	grinder->queue[0] = subport->queue + qindex;
2582 	grinder->queue[1] = subport->queue + qindex + 1;
2583 	grinder->queue[2] = subport->queue + qindex + 2;
2584 	grinder->queue[3] = subport->queue + qindex + 3;
2585 
2586 	grinder->qbase[0] = qbase;
2587 	grinder->qbase[1] = qbase + qsize;
2588 	grinder->qbase[2] = qbase + 2 * qsize;
2589 	grinder->qbase[3] = qbase + 3 * qsize;
2590 
2591 	grinder->qindex[0] = qindex;
2592 	grinder->qindex[1] = qindex + 1;
2593 	grinder->qindex[2] = qindex + 2;
2594 	grinder->qindex[3] = qindex + 3;
2595 
2596 	grinder->tccache_r++;
2597 	return 1;
2598 }
2599 
2600 static inline int
2601 grinder_next_pipe(struct rte_sched_port *port,
2602 	struct rte_sched_subport *subport, uint32_t pos)
2603 {
2604 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2605 	uint32_t pipe_qindex;
2606 	uint16_t pipe_qmask;
2607 
2608 	if (grinder->pcache_r < grinder->pcache_w) {
2609 		pipe_qmask = grinder->pcache_qmask[grinder->pcache_r];
2610 		pipe_qindex = grinder->pcache_qindex[grinder->pcache_r];
2611 		grinder->pcache_r++;
2612 	} else {
2613 		uint64_t bmp_slab = 0;
2614 		uint32_t bmp_pos = 0;
2615 
2616 		/* Get another non-empty pipe group */
2617 		if (unlikely(rte_bitmap_scan(subport->bmp, &bmp_pos, &bmp_slab) <= 0))
2618 			return 0;
2619 
2620 #ifdef RTE_SCHED_DEBUG
2621 		debug_check_queue_slab(subport, bmp_pos, bmp_slab);
2622 #endif
2623 
2624 		/* Return if pipe group already in one of the other grinders */
2625 		subport->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID;
2626 		if (unlikely(grinder_pipe_exists(subport, bmp_pos)))
2627 			return 0;
2628 
2629 		subport->grinder_base_bmp_pos[pos] = bmp_pos;
2630 
2631 		/* Install new pipe group into grinder's pipe cache */
2632 		grinder_pcache_populate(subport, pos, bmp_pos, bmp_slab);
2633 
2634 		pipe_qmask = grinder->pcache_qmask[0];
2635 		pipe_qindex = grinder->pcache_qindex[0];
2636 		grinder->pcache_r = 1;
2637 	}
2638 
2639 	/* Install new pipe in the grinder */
2640 	grinder->pindex = pipe_qindex >> 4;
2641 	grinder->subport = subport;
2642 	grinder->pipe = subport->pipe + grinder->pindex;
2643 	grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */
2644 	grinder->productive = 0;
2645 
2646 	grinder_tccache_populate(subport, pos, pipe_qindex, pipe_qmask);
2647 	grinder_next_tc(port, subport, pos);
2648 
2649 	/* Check for pipe exhaustion */
2650 	if (grinder->pindex == subport->pipe_loop) {
2651 		subport->pipe_exhaustion = 1;
2652 		subport->pipe_loop = RTE_SCHED_PIPE_INVALID;
2653 	}
2654 
2655 	return 1;
2656 }
2657 
2658 
2659 static inline void
2660 grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos)
2661 {
2662 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2663 	struct rte_sched_pipe *pipe = grinder->pipe;
2664 	struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
2665 	uint32_t qmask = grinder->qmask;
2666 
2667 	grinder->wrr_tokens[0] =
2668 		((uint16_t) pipe->wrr_tokens[0]) << RTE_SCHED_WRR_SHIFT;
2669 	grinder->wrr_tokens[1] =
2670 		((uint16_t) pipe->wrr_tokens[1]) << RTE_SCHED_WRR_SHIFT;
2671 	grinder->wrr_tokens[2] =
2672 		((uint16_t) pipe->wrr_tokens[2]) << RTE_SCHED_WRR_SHIFT;
2673 	grinder->wrr_tokens[3] =
2674 		((uint16_t) pipe->wrr_tokens[3]) << RTE_SCHED_WRR_SHIFT;
2675 
2676 	grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
2677 	grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
2678 	grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
2679 	grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
2680 
2681 	grinder->wrr_cost[0] = pipe_params->wrr_cost[0];
2682 	grinder->wrr_cost[1] = pipe_params->wrr_cost[1];
2683 	grinder->wrr_cost[2] = pipe_params->wrr_cost[2];
2684 	grinder->wrr_cost[3] = pipe_params->wrr_cost[3];
2685 }
2686 
2687 static inline void
2688 grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos)
2689 {
2690 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2691 	struct rte_sched_pipe *pipe = grinder->pipe;
2692 
2693 	pipe->wrr_tokens[0] =
2694 			(grinder->wrr_tokens[0] & grinder->wrr_mask[0]) >>
2695 				RTE_SCHED_WRR_SHIFT;
2696 	pipe->wrr_tokens[1] =
2697 			(grinder->wrr_tokens[1] & grinder->wrr_mask[1]) >>
2698 				RTE_SCHED_WRR_SHIFT;
2699 	pipe->wrr_tokens[2] =
2700 			(grinder->wrr_tokens[2] & grinder->wrr_mask[2]) >>
2701 				RTE_SCHED_WRR_SHIFT;
2702 	pipe->wrr_tokens[3] =
2703 			(grinder->wrr_tokens[3] & grinder->wrr_mask[3]) >>
2704 				RTE_SCHED_WRR_SHIFT;
2705 }
2706 
2707 static inline void
2708 grinder_wrr(struct rte_sched_subport *subport, uint32_t pos)
2709 {
2710 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2711 	uint16_t wrr_tokens_min;
2712 
2713 	grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
2714 	grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
2715 	grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
2716 	grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
2717 
2718 	grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
2719 	wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
2720 
2721 	grinder->wrr_tokens[0] -= wrr_tokens_min;
2722 	grinder->wrr_tokens[1] -= wrr_tokens_min;
2723 	grinder->wrr_tokens[2] -= wrr_tokens_min;
2724 	grinder->wrr_tokens[3] -= wrr_tokens_min;
2725 }
2726 
2727 
2728 #define grinder_evict(subport, pos)
2729 
2730 static inline void
2731 grinder_prefetch_pipe(struct rte_sched_subport *subport, uint32_t pos)
2732 {
2733 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2734 
2735 	rte_prefetch0(grinder->pipe);
2736 	rte_prefetch0(grinder->queue[0]);
2737 }
2738 
2739 static inline void
2740 grinder_prefetch_tc_queue_arrays(struct rte_sched_subport *subport, uint32_t pos)
2741 {
2742 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2743 	uint16_t qsize, qr[RTE_SCHED_MAX_QUEUES_PER_TC];
2744 
2745 	qsize = grinder->qsize;
2746 	grinder->qpos = 0;
2747 
2748 	if (grinder->tc_index < RTE_SCHED_TRAFFIC_CLASS_BE) {
2749 		qr[0] = grinder->queue[0]->qr & (qsize - 1);
2750 
2751 		rte_prefetch0(grinder->qbase[0] + qr[0]);
2752 		return;
2753 	}
2754 
2755 	qr[0] = grinder->queue[0]->qr & (qsize - 1);
2756 	qr[1] = grinder->queue[1]->qr & (qsize - 1);
2757 	qr[2] = grinder->queue[2]->qr & (qsize - 1);
2758 	qr[3] = grinder->queue[3]->qr & (qsize - 1);
2759 
2760 	rte_prefetch0(grinder->qbase[0] + qr[0]);
2761 	rte_prefetch0(grinder->qbase[1] + qr[1]);
2762 
2763 	grinder_wrr_load(subport, pos);
2764 	grinder_wrr(subport, pos);
2765 
2766 	rte_prefetch0(grinder->qbase[2] + qr[2]);
2767 	rte_prefetch0(grinder->qbase[3] + qr[3]);
2768 }
2769 
2770 static inline void
2771 grinder_prefetch_mbuf(struct rte_sched_subport *subport, uint32_t pos)
2772 {
2773 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2774 	uint32_t qpos = grinder->qpos;
2775 	struct rte_mbuf **qbase = grinder->qbase[qpos];
2776 	uint16_t qsize = grinder->qsize;
2777 	uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1);
2778 
2779 	grinder->pkt = qbase[qr];
2780 	rte_prefetch0(grinder->pkt);
2781 
2782 	if (unlikely((qr & 0x7) == 7)) {
2783 		uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1);
2784 
2785 		rte_prefetch0(qbase + qr_next);
2786 	}
2787 }
2788 
2789 static inline uint32_t
2790 grinder_handle(struct rte_sched_port *port,
2791 	struct rte_sched_subport *subport, uint32_t pos)
2792 {
2793 	struct rte_sched_grinder *grinder = subport->grinder + pos;
2794 
2795 	switch (grinder->state) {
2796 	case e_GRINDER_PREFETCH_PIPE:
2797 	{
2798 		if (grinder_next_pipe(port, subport, pos)) {
2799 			grinder_prefetch_pipe(subport, pos);
2800 			subport->busy_grinders++;
2801 
2802 			grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2803 			return 0;
2804 		}
2805 
2806 		return 0;
2807 	}
2808 
2809 	case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS:
2810 	{
2811 		struct rte_sched_pipe *pipe = grinder->pipe;
2812 
2813 		grinder->pipe_params = subport->pipe_profiles + pipe->profile;
2814 		grinder->subport_params = port->subport_profiles +
2815 						subport->profile;
2816 
2817 		grinder_prefetch_tc_queue_arrays(subport, pos);
2818 		grinder_credits_update(port, subport, pos);
2819 
2820 		grinder->state = e_GRINDER_PREFETCH_MBUF;
2821 		return 0;
2822 	}
2823 
2824 	case e_GRINDER_PREFETCH_MBUF:
2825 	{
2826 		grinder_prefetch_mbuf(subport, pos);
2827 
2828 		grinder->state = e_GRINDER_READ_MBUF;
2829 		return 0;
2830 	}
2831 
2832 	case e_GRINDER_READ_MBUF:
2833 	{
2834 		uint32_t wrr_active, result = 0;
2835 
2836 		result = grinder_schedule(port, subport, pos);
2837 
2838 		wrr_active = (grinder->tc_index == RTE_SCHED_TRAFFIC_CLASS_BE);
2839 
2840 		/* Look for next packet within the same TC */
2841 		if (result && grinder->qmask) {
2842 			if (wrr_active)
2843 				grinder_wrr(subport, pos);
2844 
2845 			grinder_prefetch_mbuf(subport, pos);
2846 
2847 			return 1;
2848 		}
2849 
2850 		if (wrr_active)
2851 			grinder_wrr_store(subport, pos);
2852 
2853 		/* Look for another active TC within same pipe */
2854 		if (grinder_next_tc(port, subport, pos)) {
2855 			grinder_prefetch_tc_queue_arrays(subport, pos);
2856 
2857 			grinder->state = e_GRINDER_PREFETCH_MBUF;
2858 			return result;
2859 		}
2860 
2861 		if (grinder->productive == 0 &&
2862 		    subport->pipe_loop == RTE_SCHED_PIPE_INVALID)
2863 			subport->pipe_loop = grinder->pindex;
2864 
2865 		grinder_evict(subport, pos);
2866 
2867 		/* Look for another active pipe */
2868 		if (grinder_next_pipe(port, subport, pos)) {
2869 			grinder_prefetch_pipe(subport, pos);
2870 
2871 			grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS;
2872 			return result;
2873 		}
2874 
2875 		/* No active pipe found */
2876 		subport->busy_grinders--;
2877 
2878 		grinder->state = e_GRINDER_PREFETCH_PIPE;
2879 		return result;
2880 	}
2881 
2882 	default:
2883 		rte_panic("Algorithmic error (invalid state)\n");
2884 		return 0;
2885 	}
2886 }
2887 
2888 static inline void
2889 rte_sched_port_time_resync(struct rte_sched_port *port)
2890 {
2891 	uint64_t cycles = rte_get_tsc_cycles();
2892 	uint64_t cycles_diff;
2893 	uint64_t bytes_diff;
2894 	uint32_t i;
2895 
2896 	if (cycles < port->time_cpu_cycles)
2897 		port->time_cpu_cycles = 0;
2898 
2899 	cycles_diff = cycles - port->time_cpu_cycles;
2900 	/* Compute elapsed time in bytes */
2901 	bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT,
2902 					   port->inv_cycles_per_byte);
2903 
2904 	/* Advance port time */
2905 	port->time_cpu_cycles +=
2906 		(bytes_diff * port->cycles_per_byte) >> RTE_SCHED_TIME_SHIFT;
2907 	port->time_cpu_bytes += bytes_diff;
2908 	if (port->time < port->time_cpu_bytes)
2909 		port->time = port->time_cpu_bytes;
2910 
2911 	/* Reset pipe loop detection */
2912 	for (i = 0; i < port->n_subports_per_port; i++)
2913 		port->subports[i]->pipe_loop = RTE_SCHED_PIPE_INVALID;
2914 }
2915 
2916 static inline int
2917 rte_sched_port_exceptions(struct rte_sched_subport *subport, int second_pass)
2918 {
2919 	int exceptions;
2920 
2921 	/* Check if any exception flag is set */
2922 	exceptions = (second_pass && subport->busy_grinders == 0) ||
2923 		(subport->pipe_exhaustion == 1);
2924 
2925 	/* Clear exception flags */
2926 	subport->pipe_exhaustion = 0;
2927 
2928 	return exceptions;
2929 }
2930 
2931 int
2932 rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts)
2933 {
2934 	struct rte_sched_subport *subport;
2935 	uint32_t subport_id = port->subport_id;
2936 	uint32_t i, n_subports = 0, count;
2937 
2938 	port->pkts_out = pkts;
2939 	port->n_pkts_out = 0;
2940 
2941 	rte_sched_port_time_resync(port);
2942 
2943 	/* Take each queue in the grinder one step further */
2944 	for (i = 0, count = 0; ; i++)  {
2945 		subport = port->subports[subport_id];
2946 
2947 		count += grinder_handle(port, subport,
2948 				i & (RTE_SCHED_PORT_N_GRINDERS - 1));
2949 
2950 		if (count == n_pkts) {
2951 			subport_id++;
2952 
2953 			if (subport_id == port->n_subports_per_port)
2954 				subport_id = 0;
2955 
2956 			port->subport_id = subport_id;
2957 			break;
2958 		}
2959 
2960 		if (rte_sched_port_exceptions(subport, i >= RTE_SCHED_PORT_N_GRINDERS)) {
2961 			i = 0;
2962 			subport_id++;
2963 			n_subports++;
2964 		}
2965 
2966 		if (subport_id == port->n_subports_per_port)
2967 			subport_id = 0;
2968 
2969 		if (n_subports == port->n_subports_per_port) {
2970 			port->subport_id = subport_id;
2971 			break;
2972 		}
2973 	}
2974 
2975 	return count;
2976 }
2977