xref: /dpdk/lib/sched/rte_pie.h (revision 719834a6849e1daf4a70ff7742bbcc3ae7e25607)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 
5 #ifndef __RTE_PIE_H_INCLUDED__
6 #define __RTE_PIE_H_INCLUDED__
7 
8 /**
9  * @file
10  * Proportional Integral controller Enhanced (PIE)
11  */
12 
13 #include <stdint.h>
14 
15 #include <rte_random.h>
16 #include <rte_debug.h>
17 #include <rte_cycles.h>
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
24 				     * to start measurement cycle (bytes)
25 				     */
26 #define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
27 #define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
28 #define RTE_BETA           1.25    /**< Weights in drop probability calculations */
29 #define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
30 
31 
32 /**
33  * PIE configuration parameters passed by user
34  */
35 struct rte_pie_params {
36 	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
37 	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
38 	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
39 	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
40 };
41 
42 /**
43  * PIE configuration parameters
44  */
45 struct rte_pie_config {
46 	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
47 	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
48 	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
49 	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
50 };
51 
52 /**
53  * PIE run-time data
54  */
55 struct rte_pie {
56 	uint16_t active;               /**< Flag for activating/deactivating pie */
57 	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
58 	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
59 	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
60 	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
61 	uint64_t qlen;                 /**< Queue length (packets count) */
62 	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
63 	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
64 	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
65 	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
66 	double drop_prob;              /**< Current packet drop probability */
67 	double accu_prob;              /**< Accumulated packet drop probability */
68 };
69 
70 /**
71  * @brief Initialises run-time data
72  *
73  * @param pie [in,out] data pointer to PIE runtime data
74  *
75  * @return Operation status
76  * @retval 0 success
77  * @retval !0 error
78  */
79 int
80 rte_pie_rt_data_init(struct rte_pie *pie);
81 
82 /**
83  * @brief Configures a single PIE configuration parameter structure.
84  *
85  * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
86  * @param qdelay_ref [in]  latency target(milliseconds)
87  * @param dp_update_interval [in] update interval for drop probability (milliseconds)
88  * @param max_burst [in] maximum burst allowance (milliseconds)
89  * @param tailq_th [in] tail drop threshold for the queue (number of packets)
90  *
91  * @return Operation status
92  * @retval 0 success
93  * @retval !0 error
94  */
95 int
96 rte_pie_config_init(struct rte_pie_config *pie_cfg,
97 	const uint16_t qdelay_ref,
98 	const uint16_t dp_update_interval,
99 	const uint16_t max_burst,
100 	const uint16_t tailq_th);
101 
102 /**
103  * @brief Decides packet enqueue when queue is empty
104  *
105  * Note: packet is never dropped in this particular case.
106  *
107  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
108  * @param pie [in, out] data pointer to PIE runtime data
109  * @param pkt_len [in] packet length in bytes
110  *
111  * @return Operation status
112  * @retval 0 enqueue the packet
113  * @retval !0 drop the packet
114  */
115 static int
116 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
117 	struct rte_pie *pie,
118 	uint32_t pkt_len)
119 {
120 	RTE_ASSERT(pkt_len != 0);
121 
122 	/* Update the PIE qlen parameter */
123 	pie->qlen++;
124 	pie->qlen_bytes += pkt_len;
125 
126 	/**
127 	 * If the queue has been idle for a while, turn off PIE and Reset counters
128 	 */
129 	if ((pie->active == 1) &&
130 		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
131 		pie->active =  0;
132 		pie->in_measurement = 0;
133 	}
134 
135 	return 0;
136 }
137 
138 /**
139  * @brief make a decision to drop or enqueue a packet based on probability
140  *        criteria
141  *
142  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
143  * @param pie [in, out] data pointer to PIE runtime data
144  * @param time [in] current time (measured in cpu cycles)
145  */
146 static void
147 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
148 	struct rte_pie *pie, uint64_t time)
149 {
150 	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
151 
152 	/* Note: can be implemented using integer multiply.
153 	 * DQ_THRESHOLD is power of 2 value.
154 	 */
155 	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
156 
157 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
158 		RTE_BETA * (current_qdelay - pie->qdelay_old);
159 
160 	if (pie->drop_prob < 0.000001)
161 		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
162 	else if (pie->drop_prob < 0.00001)
163 		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
164 	else if (pie->drop_prob < 0.0001)
165 		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
166 	else if (pie->drop_prob < 0.001)
167 		p = p * 0.03125;                    /* (1/32) = 0.03125   */
168 	else if (pie->drop_prob < 0.01)
169 		p = p * 0.125;                      /* (1/8) = 0.125    */
170 	else if (pie->drop_prob < 0.1)
171 		p = p * 0.5;                        /* (1/2) = 0.5    */
172 
173 	if (pie->drop_prob >= 0.1 && p > 0.02)
174 		p = 0.02;
175 
176 	pie->drop_prob += p;
177 
178 	double qdelay = qdelay_ref * 0.5;
179 
180 	/*  Exponentially decay drop prob when congestion goes away  */
181 	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
182 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
183 
184 	/* Bound drop probability */
185 	if (pie->drop_prob < 0)
186 		pie->drop_prob = 0;
187 	if (pie->drop_prob > 1)
188 		pie->drop_prob = 1;
189 
190 	pie->qdelay_old = current_qdelay;
191 	pie->last_measurement = time;
192 
193 	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
194 
195 	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
196 }
197 
198 /**
199  * @brief make a decision to drop or enqueue a packet based on probability
200  *        criteria
201  *
202  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
203  * @param pie [in, out] data pointer to PIE runtime data
204  *
205  * @return operation status
206  * @retval 0 enqueue the packet
207  * @retval 1 drop the packet
208  */
209 static inline int
210 _rte_pie_drop(const struct rte_pie_config *pie_cfg,
211 	struct rte_pie *pie)
212 {
213 	uint64_t qdelay = pie_cfg->qdelay_ref / 2;
214 
215 	/* PIE is active but the queue is not congested: return 0 */
216 	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
217 		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
218 		return 0;
219 
220 	if (pie->drop_prob == 0)
221 		pie->accu_prob = 0;
222 
223 	/* For practical reasons, drop probability can be further scaled according
224 	 * to packet size, but one needs to set a bound to avoid unnecessary bias
225 	 * Random drop
226 	 */
227 	pie->accu_prob += pie->drop_prob;
228 
229 	if (pie->accu_prob < 0.85)
230 		return 0;
231 
232 	if (pie->accu_prob >= 8.5)
233 		return 1;
234 
235 	if (rte_drand() < pie->drop_prob) {
236 		pie->accu_prob = 0;
237 		return 1;
238 	}
239 
240 	/* No drop */
241 	return 0;
242 }
243 
244 /**
245  * @brief Decides if new packet should be enqueued or dropped for non-empty queue
246  *
247  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
248  * @param pie [in,out] data pointer to PIE runtime data
249  * @param pkt_len [in] packet length in bytes
250  * @param time [in] current time (measured in cpu cycles)
251  *
252  * @return Operation status
253  * @retval 0 enqueue the packet
254  * @retval 1 drop the packet based on max threshold criterion
255  * @retval 2 drop the packet based on mark probability criterion
256  */
257 static inline int
258 rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
259 	struct rte_pie *pie,
260 	uint32_t pkt_len,
261 	const uint64_t time)
262 {
263 	/* Check queue space against the tail drop threshold */
264 	if (pie->qlen >= pie_cfg->tailq_th) {
265 
266 		pie->accu_prob = 0;
267 		return 1;
268 	}
269 
270 	if (pie->active) {
271 		/* Update drop probability after certain interval */
272 		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
273 			_calc_drop_probability(pie_cfg, pie, time);
274 
275 		/* Decide whether packet to be dropped or enqueued */
276 		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
277 			return 2;
278 	}
279 
280 	/* When queue occupancy is over a certain threshold, turn on PIE */
281 	if ((pie->active == 0) &&
282 		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
283 		pie->active = 1;
284 		pie->qdelay_old = 0;
285 		pie->drop_prob = 0;
286 		pie->in_measurement = 1;
287 		pie->departed_bytes_count = 0;
288 		pie->avg_dq_time = 0;
289 		pie->last_measurement = time;
290 		pie->burst_allowance = pie_cfg->max_burst;
291 		pie->accu_prob = 0;
292 		pie->start_measurement = time;
293 	}
294 
295 	/* when queue has been idle for a while, turn off PIE and Reset counters */
296 	if (pie->active == 1 &&
297 		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
298 		pie->active =  0;
299 		pie->in_measurement = 0;
300 	}
301 
302 	/* Update PIE qlen parameter */
303 	pie->qlen++;
304 	pie->qlen_bytes += pkt_len;
305 
306 	/* No drop */
307 	return 0;
308 }
309 
310 /**
311  * @brief Decides if new packet should be enqueued or dropped
312  * Updates run time data and gives verdict whether to enqueue or drop the packet.
313  *
314  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
315  * @param pie [in,out] data pointer to PIE runtime data
316  * @param qlen [in] queue length
317  * @param pkt_len [in] packet length in bytes
318  * @param time [in] current time stamp (measured in cpu cycles)
319  *
320  * @return Operation status
321  * @retval 0 enqueue the packet
322  * @retval 1 drop the packet based on drop probability criteria
323  */
324 static inline int
325 rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
326 	struct rte_pie *pie,
327 	const unsigned int qlen,
328 	uint32_t pkt_len,
329 	const uint64_t time)
330 {
331 	RTE_ASSERT(pie_cfg != NULL);
332 	RTE_ASSERT(pie != NULL);
333 
334 	if (qlen != 0)
335 		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
336 	else
337 		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
338 }
339 
340 /**
341  * @brief PIE rate estimation method
342  * Called on each packet departure.
343  *
344  * @param pie [in] data pointer to PIE runtime data
345  * @param pkt_len [in] packet length in bytes
346  * @param time [in] current time stamp in cpu cycles
347  */
348 static inline void
349 rte_pie_dequeue(struct rte_pie *pie,
350 	uint32_t pkt_len,
351 	uint64_t time)
352 {
353 	/* Dequeue rate estimation */
354 	if (pie->in_measurement) {
355 		pie->departed_bytes_count += pkt_len;
356 
357 		/* Start a new measurement cycle when enough packets */
358 		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
359 			uint64_t dq_time = time - pie->start_measurement;
360 
361 			if (pie->avg_dq_time == 0)
362 				pie->avg_dq_time = dq_time;
363 			else
364 				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
365 					* (1 - RTE_DQ_WEIGHT);
366 
367 			pie->in_measurement = 0;
368 		}
369 	}
370 
371 	/* Start measurement cycle when enough data in the queue */
372 	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
373 		pie->in_measurement = 1;
374 		pie->start_measurement = time;
375 		pie->departed_bytes_count = 0;
376 	}
377 }
378 
379 #ifdef __cplusplus
380 }
381 #endif
382 
383 #endif /* __RTE_PIE_H_INCLUDED__ */
384