xref: /dpdk/lib/sched/rte_red.h (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #ifndef __RTE_RED_H_INCLUDED__
6 #define __RTE_RED_H_INCLUDED__
7 
8 #ifdef __cplusplus
9 extern "C" {
10 #endif
11 
12 /**
13  * @file
14  * RTE Random Early Detection (RED)
15  *
16  *
17  ***/
18 
19 #include <stdint.h>
20 #include <limits.h>
21 #include <rte_common.h>
22 #include <rte_debug.h>
23 #include <rte_cycles.h>
24 #include <rte_branch_prediction.h>
25 
26 #define RTE_RED_SCALING                     10         /**< Fraction size for fixed-point */
27 #define RTE_RED_S                           (1 << 22)  /**< Packet size multiplied by number of leaf queues */
28 #define RTE_RED_MAX_TH_MAX                  1023       /**< Max threshold limit in fixed point format */
29 #define RTE_RED_WQ_LOG2_MIN                 1          /**< Min inverse filter weight value */
30 #define RTE_RED_WQ_LOG2_MAX                 12         /**< Max inverse filter weight value */
31 #define RTE_RED_MAXP_INV_MIN                1          /**< Min inverse mark probability value */
32 #define RTE_RED_MAXP_INV_MAX                255        /**< Max inverse mark probability value */
33 #define RTE_RED_2POW16                      (1<<16)    /**< 2 power 16 */
34 #define RTE_RED_INT16_NBITS                 (sizeof(uint16_t) * CHAR_BIT)
35 #define RTE_RED_WQ_LOG2_NUM                 (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1)
36 
37 /**
38  * Externs
39  *
40  */
41 extern uint32_t rte_red_rand_val;
42 extern uint32_t rte_red_rand_seed;
43 extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM];
44 extern uint16_t rte_red_pow2_frac_inv[16];
45 
46 /**
47  * RED configuration parameters passed by user
48  *
49  */
50 struct rte_red_params {
51 	uint16_t min_th;   /**< Minimum threshold for queue (max_th) */
52 	uint16_t max_th;   /**< Maximum threshold for queue (max_th) */
53 	uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */
54 	uint16_t wq_log2;  /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */
55 };
56 
57 /**
58  * RED configuration parameters
59  */
60 struct rte_red_config {
61 	uint32_t min_th;   /**< min_th scaled in fixed-point format */
62 	uint32_t max_th;   /**< max_th scaled in fixed-point format */
63 	uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */
64 	uint8_t maxp_inv;  /**< maxp_inv */
65 	uint8_t wq_log2;   /**< wq_log2 */
66 };
67 
68 /**
69  * RED run-time data
70  */
71 struct rte_red {
72 	uint32_t avg;      /**< Average queue size (avg), scaled in fixed-point format */
73 	uint32_t count;    /**< Number of packets since last marked packet (count) */
74 	uint64_t q_time;   /**< Start of the queue idle time (q_time) */
75 };
76 
77 /**
78  * @brief Initialises run-time data
79  *
80  * @param red [in,out] data pointer to RED runtime data
81  *
82  * @return Operation status
83  * @retval 0 success
84  * @retval !0 error
85  */
86 int
87 rte_red_rt_data_init(struct rte_red *red);
88 
89 /**
90  * @brief Configures a single RED configuration parameter structure.
91  *
92  * @param red_cfg [in,out] config pointer to a RED configuration parameter structure
93  * @param wq_log2 [in]  log2 of the filter weight, valid range is:
94  *             RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX
95  * @param min_th [in] queue minimum threshold in number of packets
96  * @param max_th [in] queue maximum threshold in number of packets
97  * @param maxp_inv [in] inverse maximum mark probability
98  *
99  * @return Operation status
100  * @retval 0 success
101  * @retval !0 error
102  */
103 int
104 rte_red_config_init(struct rte_red_config *red_cfg,
105 	const uint16_t wq_log2,
106 	const uint16_t min_th,
107 	const uint16_t max_th,
108 	const uint16_t maxp_inv);
109 
110 /**
111  * @brief Generate random number for RED
112  *
113  * Implementation based on:
114  * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
115  *
116  * 10 bit shift has been found through empirical tests (was 16).
117  *
118  * @return Random number between 0 and (2^22 - 1)
119  */
120 static inline uint32_t
121 rte_fast_rand(void)
122 {
123 	rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011;
124 	return rte_red_rand_seed >> 10;
125 }
126 
127 /**
128  * @brief calculate factor to scale average queue size when queue
129  *        becomes empty
130  *
131  * @param wq_log2 [in] where EWMA filter weight wq = 1/(2 ^ wq_log2)
132  * @param m [in] exponent in the computed value (1 - wq) ^ m
133  *
134  * @return computed value
135  * @retval ((1 - wq) ^ m) scaled in fixed-point format
136  */
137 static inline uint16_t
138 __rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
139 {
140 	uint32_t n = 0;
141 	uint32_t f = 0;
142 
143 	/**
144 	 * Basic math tells us that:
145 	 *   a^b = 2^(b * log2(a) )
146 	 *
147 	 * in our case:
148 	 *   a = (1-Wq)
149 	 *   b = m
150 	 *  Wq = 1/ (2^log2n)
151 	 *
152 	 * So we are computing this equation:
153 	 *   factor = 2 ^ ( m * log2(1-Wq))
154 	 *
155 	 * First we are computing:
156 	 *    n = m * log2(1-Wq)
157 	 *
158 	 * To avoid dealing with signed numbers log2 values are positive
159 	 * but they should be negative because (1-Wq) is always < 1.
160 	 * Contents of log2 table values are also scaled for precision.
161 	 */
162 
163 	n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN];
164 
165 	/**
166 	 * The tricky part is computing 2^n, for this I split n into
167 	 * integer part and fraction part.
168 	 *   f - is fraction part of n
169 	 *   n - is integer part of original n
170 	 *
171 	 * Now using basic math we compute 2^n:
172 	 *   2^(f+n) = 2^f * 2^n
173 	 *   2^f - we use lookup table
174 	 *   2^n - can be replaced with bit shift right operations
175 	 */
176 
177 	f = (n >> 6) & 0xf;
178 	n >>= 10;
179 
180 	if (n < RTE_RED_SCALING)
181 		return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n);
182 
183 	return 0;
184 }
185 
186 /**
187  * @brief Updates queue average in condition when queue is empty
188  *
189  * Note: packet is never dropped in this particular case.
190  *
191  * @param red_cfg [in] config pointer to a RED configuration parameter structure
192  * @param red [in,out] data pointer to RED runtime data
193  * @param time [in] current time stamp
194  *
195  * @return Operation status
196  * @retval 0 enqueue the packet
197  * @retval 1 drop the packet based on max threshold criterion
198  * @retval 2 drop the packet based on mark probability criterion
199  */
200 static inline int
201 rte_red_enqueue_empty(const struct rte_red_config *red_cfg,
202 	struct rte_red *red,
203 	const uint64_t time)
204 {
205 	uint64_t time_diff = 0, m = 0;
206 
207 	RTE_ASSERT(red_cfg != NULL);
208 	RTE_ASSERT(red != NULL);
209 
210 	red->count ++;
211 
212 	/**
213 	 * We compute avg but we don't compare avg against
214 	 *  min_th or max_th, nor calculate drop probability
215 	 */
216 	time_diff = time - red->q_time;
217 
218 	/**
219 	 * m is the number of packets that might have arrived while the queue was empty.
220 	 * In this case we have time stamps provided by scheduler in byte units (bytes
221 	 * transmitted on network port). Such time stamp translates into time units as
222 	 * port speed is fixed but such approach simplifies the code.
223 	 */
224 	m = time_diff / RTE_RED_S;
225 
226 	/**
227 	 * Check that m will fit into 16-bit unsigned integer
228 	 */
229 	if (m >= RTE_RED_2POW16) {
230 		red->avg = 0;
231 	} else {
232 		red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m);
233 	}
234 
235 	return 0;
236 }
237 
238 /**
239  *  Drop probability (Sally Floyd and Van Jacobson):
240  *
241  *     pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th)
242  *     pa = pb / (2 - count * pb)
243  *
244  *
245  *                 (1 / maxp_inv) * (avg - min_th)
246  *                ---------------------------------
247  *                         max_th - min_th
248  *     pa = -----------------------------------------------
249  *                count * (1 / maxp_inv) * (avg - min_th)
250  *           2 - -----------------------------------------
251  *                          max_th - min_th
252  *
253  *
254  *                                  avg - min_th
255  *     pa = -----------------------------------------------------------
256  *           2 * (max_th - min_th) * maxp_inv - count * (avg - min_th)
257  *
258  *
259  *  We define pa_const as: pa_const =  2 * (max_th - min_th) * maxp_inv. Then:
260  *
261  *
262  *                     avg - min_th
263  *     pa = -----------------------------------
264  *           pa_const - count * (avg - min_th)
265  */
266 
267 /**
268  * @brief make a decision to drop or enqueue a packet based on mark probability
269  *        criteria
270  *
271  * @param red_cfg [in] config pointer to structure defining RED parameters
272  * @param red [in,out] data pointer to RED runtime data
273  *
274  * @return operation status
275  * @retval 0 enqueue the packet
276  * @retval 1 drop the packet
277  */
278 static inline int
279 __rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red)
280 {
281 	uint32_t pa_num = 0;    /* numerator of drop-probability */
282 	uint32_t pa_den = 0;    /* denominator of drop-probability */
283 	uint32_t pa_num_count = 0;
284 
285 	pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2);
286 
287 	pa_num_count = red->count * pa_num;
288 
289 	if (red_cfg->pa_const <= pa_num_count)
290 		return 1;
291 
292 	pa_den = red_cfg->pa_const - pa_num_count;
293 
294 	/* If drop, generate and save random number to be used next time */
295 	if (unlikely((rte_red_rand_val % pa_den) < pa_num)) {
296 		rte_red_rand_val = rte_fast_rand();
297 
298 		return 1;
299 	}
300 
301 	/* No drop */
302 	return 0;
303 }
304 
305 /**
306  * @brief Decides if new packet should be enqeued or dropped in queue non-empty case
307  *
308  * @param red_cfg [in] config pointer to a RED configuration parameter structure
309  * @param red [in,out] data pointer to RED runtime data
310  * @param q [in] current queue size (measured in packets)
311  *
312  * @return Operation status
313  * @retval 0 enqueue the packet
314  * @retval 1 drop the packet based on max threshold criterion
315  * @retval 2 drop the packet based on mark probability criterion
316  */
317 static inline int
318 rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg,
319 	struct rte_red *red,
320 	const unsigned q)
321 {
322 	RTE_ASSERT(red_cfg != NULL);
323 	RTE_ASSERT(red != NULL);
324 
325 	/**
326 	* EWMA filter (Sally Floyd and Van Jacobson):
327 	*    avg = (1 - wq) * avg + wq * q
328 	*    avg = avg + q * wq - avg * wq
329 	*
330 	* We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get:
331 	*    avg_s = avg_s + q * 2^N - avg_s * 2^(-n)
332 	*
333 	* By using shift left/right operations, we get:
334 	*    avg_s = avg_s + (q << N) - (avg_s >> n)
335 	*    avg_s += (q << N) - (avg_s >> n)
336 	*/
337 
338 	/* avg update */
339 	red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2);
340 
341 	/* avg < min_th: do not mark the packet  */
342 	if (red->avg < red_cfg->min_th) {
343 		red->count ++;
344 		return 0;
345 	}
346 
347 	/* min_th <= avg < max_th: mark the packet with pa probability */
348 	if (red->avg < red_cfg->max_th) {
349 		if (!__rte_red_drop(red_cfg, red)) {
350 			red->count ++;
351 			return 0;
352 		}
353 
354 		red->count = 0;
355 		return 2;
356 	}
357 
358 	/* max_th <= avg: always mark the packet */
359 	red->count = 0;
360 	return 1;
361 }
362 
363 /**
364  * @brief Decides if new packet should be enqeued or dropped
365  * Updates run time data based on new queue size value.
366  * Based on new queue average and RED configuration parameters
367  * gives verdict whether to enqueue or drop the packet.
368  *
369  * @param red_cfg [in] config pointer to a RED configuration parameter structure
370  * @param red [in,out] data pointer to RED runtime data
371  * @param q [in] updated queue size in packets
372  * @param time [in] current time stamp
373  *
374  * @return Operation status
375  * @retval 0 enqueue the packet
376  * @retval 1 drop the packet based on max threshold criteria
377  * @retval 2 drop the packet based on mark probability criteria
378  */
379 static inline int
380 rte_red_enqueue(const struct rte_red_config *red_cfg,
381 	struct rte_red *red,
382 	const unsigned q,
383 	const uint64_t time)
384 {
385 	RTE_ASSERT(red_cfg != NULL);
386 	RTE_ASSERT(red != NULL);
387 
388 	if (q != 0) {
389 		return rte_red_enqueue_nonempty(red_cfg, red, q);
390 	} else {
391 		return rte_red_enqueue_empty(red_cfg, red, time);
392 	}
393 }
394 
395 /**
396  * @brief Callback to records time that queue became empty
397  *
398  * @param red [in,out] data pointer to RED runtime data
399  * @param time [in] current time stamp
400  */
401 static inline void
402 rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time)
403 {
404 	red->q_time = time;
405 }
406 
407 #ifdef __cplusplus
408 }
409 #endif
410 
411 #endif /* __RTE_RED_H_INCLUDED__ */
412