xref: /dpdk/lib/sched/rte_red.h (revision 719834a6849e1daf4a70ff7742bbcc3ae7e25607)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #ifndef __RTE_RED_H_INCLUDED__
6 #define __RTE_RED_H_INCLUDED__
7 
8 /**
9  * @file
10  * RTE Random Early Detection (RED)
11  */
12 
13 #include <stdint.h>
14 #include <limits.h>
15 #include <rte_debug.h>
16 #include <rte_cycles.h>
17 #include <rte_branch_prediction.h>
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 #define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
24 #define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
25 #define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
26 #define RTE_RED_WQ_LOG2_MIN                 1          /**< Min inverse filter weight value */
27 #define RTE_RED_WQ_LOG2_MAX                 12         /**< Max inverse filter weight value */
28 #define RTE_RED_MAXP_INV_MIN                1          /**< Min inverse mark probability value */
29 #define RTE_RED_MAXP_INV_MAX                255        /**< Max inverse mark probability value */
30 #define RTE_RED_2POW16                      (1<<16)    /**< 2 power 16 */
31 #define RTE_RED_INT16_NBITS                 (sizeof(uint16_t) * CHAR_BIT)
32 #define RTE_RED_WQ_LOG2_NUM                 (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
33 
34 /**
35  * Externs
36  */
37 extern uint32_t rte_red_rand_val;
38 extern uint32_t rte_red_rand_seed;
39 extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
40 extern uint16_t rte_red_pow2_frac_inv[16];
41 
42 /**
43  * RED configuration parameters passed by user
44  */
45 struct rte_red_params {
46 	uint16_t min_th;   /**< Minimum threshold for queue (max_th) */
47 	uint16_t max_th;   /**< Maximum threshold for queue (max_th) */
48 	uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
49 	uint16_t wq_log2;  /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
50 };
51 
52 /**
53  * RED configuration parameters
54  */
55 struct rte_red_config {
56 	uint32_t min_th;   /**< min_th scaled in fixed-point format */
57 	uint32_t max_th;   /**< max_th scaled in fixed-point format */
58 	uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
59 	uint8_t maxp_inv;  /**< maxp_inv */
60 	uint8_t wq_log2;   /**< wq_log2 */
61 };
62 
63 /**
64  * RED run-time data
65  */
66 struct rte_red {
67 	uint32_t avg;      /**< Average queue size (avg), scaled in fixed-point format */
68 	uint32_t count;    /**< Number of packets since last marked packet (count) */
69 	uint64_t q_time;   /**< Start of the queue idle time (q_time) */
70 };
71 
72 /**
73  * @brief Initialises run-time data
74  *
75  * @param red [in,out] data pointer to RED runtime data
76  *
77  * @return Operation status
78  * @retval 0 success
79  * @retval !0 error
80  */
81 int
82 rte_red_rt_data_init(struct rte_red *red);
83 
84 /**
85  * @brief Configures a single RED configuration parameter structure.
86  *
87  * @param red_cfg [in,out] config pointer to a RED configuration parameter structure
88  * @param wq_log2 [in]  log2 of the filter weight, valid range is:
89  *             RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
90  * @param min_th [in] queue minimum threshold in number of packets
91  * @param max_th [in] queue maximum threshold in number of packets
92  * @param maxp_inv [in] inverse maximum mark probability
93  *
94  * @return Operation status
95  * @retval 0 success
96  * @retval !0 error
97  */
98 int
99 rte_red_config_init(struct rte_red_config *red_cfg,
100 	const uint16_t wq_log2,
101 	const uint16_t min_th,
102 	const uint16_t max_th,
103 	const uint16_t maxp_inv);
104 
105 /**
106  * @brief Generate random number for RED
107  *
108  * Implementation based on:
109  * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
110  *
111  * 10 bit shift has been found through empirical tests (was 16).
112  *
113  * @return Random number between 0 and (2^22 - 1)
114  */
115 static inline uint32_t
116 rte_fast_rand(void)
117 {
118 	rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
119 	return rte_red_rand_seed >> 10;
120 }
121 
122 /**
123  * @brief calculate factor to scale average queue size when queue
124  *        becomes empty
125  *
126  * @param wq_log2 [in] where EWMA filter weight wq = 1/(2 ^ wq_log2)
127  * @param m [in] exponent in the computed value (1 - wq) ^ m
128  *
129  * @return computed value
130  * @retval ((1 - wq) ^ m) scaled in fixed-point format
131  */
132 static inline uint16_t
133 __rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
134 {
135 	uint32_t n = 0;
136 	uint32_t f = 0;
137 
138 	/**
139 	 * Basic math tells us that:
140 	 *   a^b = 2^(b * log2(a) )
141 	 *
142 	 * in our case:
143 	 *   a = (1-Wq)
144 	 *   b = m
145 	 *  Wq = 1/ (2^log2n)
146 	 *
147 	 * So we are computing this equation:
148 	 *   factor = 2 ^ ( m * log2(1-Wq))
149 	 *
150 	 * First we are computing:
151 	 *    n = m * log2(1-Wq)
152 	 *
153 	 * To avoid dealing with signed numbers log2 values are positive
154 	 * but they should be negative because (1-Wq) is always < 1.
155 	 * Contents of log2 table values are also scaled for precision.
156 	 */
157 
158 	n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
159 
160 	/**
161 	 * The tricky part is computing 2^n, for this I split n into
162 	 * integer part and fraction part.
163 	 *   f - is fraction part of n
164 	 *   n - is integer part of original n
165 	 *
166 	 * Now using basic math we compute 2^n:
167 	 *   2^(f+n) = 2^f * 2^n
168 	 *   2^f - we use lookup table
169 	 *   2^n - can be replaced with bit shift right operations
170 	 */
171 
172 	f = (n >> 6) & 0xf;
173 	n >>= 10;
174 
175 	if (n < RTE_RED_SCALING)
176 		return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
177 
178 	return 0;
179 }
180 
181 /**
182  * @brief Updates queue average in condition when queue is empty
183  *
184  * Note: packet is never dropped in this particular case.
185  *
186  * @param red_cfg [in] config pointer to a RED configuration parameter structure
187  * @param red [in,out] data pointer to RED runtime data
188  * @param time [in] current time stamp
189  *
190  * @return Operation status
191  * @retval 0 enqueue the packet
192  * @retval 1 drop the packet based on max threshold criterion
193  * @retval 2 drop the packet based on mark probability criterion
194  */
195 static inline int
196 rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
197 	struct rte_red *red,
198 	const uint64_t time)
199 {
200 	uint64_t time_diff = 0, m = 0;
201 
202 	RTE_ASSERT(red_cfg != NULL);
203 	RTE_ASSERT(red != NULL);
204 
205 	red->count ++;
206 
207 	/**
208 	 * We compute avg but we don't compare avg against
209 	 *  min_th or max_th, nor calculate drop probability
210 	 */
211 	time_diff = time - red->q_time;
212 
213 	/**
214 	 * m is the number of packets that might have arrived while the queue was empty.
215 	 * In this case we have time stamps provided by scheduler in byte units (bytes
216 	 * transmitted on network port). Such time stamp translates into time units as
217 	 * port speed is fixed but such approach simplifies the code.
218 	 */
219 	m = time_diff / RTE_RED_S;
220 
221 	/**
222 	 * Check that m will fit into 16-bit unsigned integer
223 	 */
224 	if (m >= RTE_RED_2POW16) {
225 		red->avg = 0;
226 	} else {
227 		red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
228 	}
229 
230 	return 0;
231 }
232 
233 /**
234  *  Drop probability (Sally Floyd and Van Jacobson):
235  *
236  *     pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
237  *     pa = pb / (2 - count * pb)
238  *
239  *
240  *                 (1 / maxp_inv) * (avg - min_th)
241  *                ---------------------------------
242  *                         max_th - min_th
243  *     pa = -----------------------------------------------
244  *                count * (1 / maxp_inv) * (avg - min_th)
245  *           2 - -----------------------------------------
246  *                          max_th - min_th
247  *
248  *
249  *                                  avg - min_th
250  *     pa = -----------------------------------------------------------
251  *           2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
252  *
253  *
254  *  We define pa_const as: pa_const =  2 * (max_th - min_th) * maxp_inv. Then:
255  *
256  *
257  *                     avg - min_th
258  *     pa = -----------------------------------
259  *           pa_const - count * (avg - min_th)
260  */
261 
262 /**
263  * @brief make a decision to drop or enqueue a packet based on mark probability
264  *        criteria
265  *
266  * @param red_cfg [in] config pointer to structure defining RED parameters
267  * @param red [in,out] data pointer to RED runtime data
268  *
269  * @return operation status
270  * @retval 0 enqueue the packet
271  * @retval 1 drop the packet
272  */
273 static inline int
274 __rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
275 {
276 	uint32_t pa_num = 0;    /* numerator of drop-probability */
277 	uint32_t pa_den = 0;    /* denominator of drop-probability */
278 	uint32_t pa_num_count = 0;
279 
280 	pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
281 
282 	pa_num_count = red->count * pa_num;
283 
284 	if (red_cfg->pa_const <= pa_num_count)
285 		return 1;
286 
287 	pa_den = red_cfg->pa_const - pa_num_count;
288 
289 	/* If drop, generate and save random number to be used next time */
290 	if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
291 		rte_red_rand_val = rte_fast_rand();
292 
293 		return 1;
294 	}
295 
296 	/* No drop */
297 	return 0;
298 }
299 
300 /**
301  * @brief Decides if new packet should be enqueued or dropped in queue non-empty case
302  *
303  * @param red_cfg [in] config pointer to a RED configuration parameter structure
304  * @param red [in,out] data pointer to RED runtime data
305  * @param q [in] current queue size (measured in packets)
306  *
307  * @return Operation status
308  * @retval 0 enqueue the packet
309  * @retval 1 drop the packet based on max threshold criterion
310  * @retval 2 drop the packet based on mark probability criterion
311  */
312 static inline int
313 rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
314 	struct rte_red *red,
315 	const unsigned q)
316 {
317 	RTE_ASSERT(red_cfg != NULL);
318 	RTE_ASSERT(red != NULL);
319 
320 	/**
321 	* EWMA filter (Sally Floyd and Van Jacobson):
322 	*    avg = (1 - wq) * avg + wq * q
323 	*    avg = avg + q * wq - avg * wq
324 	*
325 	* We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
326 	*    avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
327 	*
328 	* By using shift left/right operations, we get:
329 	*    avg_s = avg_s + (q << N) - (avg_s >> n)
330 	*    avg_s += (q << N) - (avg_s >> n)
331 	*/
332 
333 	/* avg update */
334 	red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
335 
336 	/* avg < min_th: do not mark the packet  */
337 	if (red->avg < red_cfg->min_th) {
338 		red->count ++;
339 		return 0;
340 	}
341 
342 	/* min_th <= avg < max_th: mark the packet with pa probability */
343 	if (red->avg < red_cfg->max_th) {
344 		if (!__rte_red_drop(red_cfg, red)) {
345 			red->count ++;
346 			return 0;
347 		}
348 
349 		red->count = 0;
350 		return 2;
351 	}
352 
353 	/* max_th <= avg: always mark the packet */
354 	red->count = 0;
355 	return 1;
356 }
357 
358 /**
359  * @brief Decides if new packet should be enqueued or dropped
360  * Updates run time data based on new queue size value.
361  * Based on new queue average and RED configuration parameters
362  * gives verdict whether to enqueue or drop the packet.
363  *
364  * @param red_cfg [in] config pointer to a RED configuration parameter structure
365  * @param red [in,out] data pointer to RED runtime data
366  * @param q [in] updated queue size in packets
367  * @param time [in] current time stamp
368  *
369  * @return Operation status
370  * @retval 0 enqueue the packet
371  * @retval 1 drop the packet based on max threshold criteria
372  * @retval 2 drop the packet based on mark probability criteria
373  */
374 static inline int
375 rte_red_enqueue(const struct rte_red_config *red_cfg,
376 	struct rte_red *red,
377 	const unsigned q,
378 	const uint64_t time)
379 {
380 	RTE_ASSERT(red_cfg != NULL);
381 	RTE_ASSERT(red != NULL);
382 
383 	if (q != 0) {
384 		return rte_red_enqueue_nonempty(red_cfg, red, q);
385 	} else {
386 		return rte_red_enqueue_empty(red_cfg, red, time);
387 	}
388 }
389 
390 /**
391  * @brief Callback to records time that queue became empty
392  *
393  * @param red [in,out] data pointer to RED runtime data
394  * @param time [in] current time stamp
395  */
396 static inline void
397 rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
398 {
399 	red->q_time = time;
400 }
401 
402 #ifdef __cplusplus
403 }
404 #endif
405 
406 #endif /* __RTE_RED_H_INCLUDED__ */
407