xref: /dpdk/lib/sched/rte_pie.h (revision 02d36ef6a9528e0f4a3403956e66bcea5fadbf8c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 
5 #ifndef __RTE_PIE_H_INCLUDED__
6 #define __RTE_PIE_H_INCLUDED__
7 
8 #ifdef __cplusplus
9 extern "C" {
10 #endif
11 
12 /**
13  * @file
14  * Proportional Integral controller Enhanced (PIE)
15  **/
16 
17 #include <stdint.h>
18 
19 #include <rte_compat.h>
20 #include <rte_random.h>
21 #include <rte_debug.h>
22 #include <rte_cycles.h>
23 
24 #define RTE_DQ_THRESHOLD   16384   /**< Queue length threshold (2^14)
25 				     * to start measurement cycle (bytes)
26 				     */
27 #define RTE_DQ_WEIGHT      0.25    /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */
28 #define RTE_ALPHA          0.125   /**< Weights in drop probability calculations */
29 #define RTE_BETA           1.25    /**< Weights in drop probability calculations */
30 #define RTE_RAND_MAX      ~0LLU    /**< Max value of the random number */
31 
32 
33 /**
34  * PIE configuration parameters passed by user
35  *
36  */
37 struct rte_pie_params {
38 	uint16_t qdelay_ref;           /**< Latency Target (milliseconds) */
39 	uint16_t dp_update_interval;   /**< Update interval for drop probability (milliseconds) */
40 	uint16_t max_burst;            /**< Max Burst Allowance (milliseconds) */
41 	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
42 };
43 
44 /**
45  * PIE configuration parameters
46  *
47  */
48 struct rte_pie_config {
49 	uint64_t qdelay_ref;           /**< Latency Target (in CPU cycles.) */
50 	uint64_t dp_update_interval;   /**< Update interval for drop probability (in CPU cycles) */
51 	uint64_t max_burst;            /**< Max Burst Allowance (in CPU cycles.) */
52 	uint16_t tailq_th;             /**< Tailq drop threshold (packet counts) */
53 };
54 
55 /**
56  * PIE run-time data
57  */
58 struct rte_pie {
59 	uint16_t active;               /**< Flag for activating/deactivating pie */
60 	uint16_t in_measurement;       /**< Flag for activation of measurement cycle */
61 	uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */
62 	uint64_t start_measurement;    /**< Time to start to measurement cycle (in cpu cycles) */
63 	uint64_t last_measurement;     /**< Time of last measurement (in cpu cycles) */
64 	uint64_t qlen;                 /**< Queue length (packets count) */
65 	uint64_t qlen_bytes;           /**< Queue length (bytes count) */
66 	uint64_t avg_dq_time;          /**< Time averaged dequeue rate (in cpu cycles) */
67 	uint32_t burst_allowance;      /**< Current burst allowance (bytes) */
68 	uint64_t qdelay_old;           /**< Old queue delay (bytes) */
69 	double drop_prob;              /**< Current packet drop probability */
70 	double accu_prob;              /**< Accumulated packet drop probability */
71 };
72 
73 /**
74  * @brief Initialises run-time data
75  *
76  * @param pie [in,out] data pointer to PIE runtime data
77  *
78  * @return Operation status
79  * @retval 0 success
80  * @retval !0 error
81  */
82 int
83 __rte_experimental
84 rte_pie_rt_data_init(struct rte_pie *pie);
85 
86 /**
87  * @brief Configures a single PIE configuration parameter structure.
88  *
89  * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure
90  * @param qdelay_ref [in]  latency target(milliseconds)
91  * @param dp_update_interval [in] update interval for drop probability (milliseconds)
92  * @param max_burst [in] maximum burst allowance (milliseconds)
93  * @param tailq_th [in] tail drop threshold for the queue (number of packets)
94  *
95  * @return Operation status
96  * @retval 0 success
97  * @retval !0 error
98  */
99 int
100 __rte_experimental
101 rte_pie_config_init(struct rte_pie_config *pie_cfg,
102 	const uint16_t qdelay_ref,
103 	const uint16_t dp_update_interval,
104 	const uint16_t max_burst,
105 	const uint16_t tailq_th);
106 
107 /**
108  * @brief Decides packet enqueue when queue is empty
109  *
110  * Note: packet is never dropped in this particular case.
111  *
112  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
113  * @param pie [in, out] data pointer to PIE runtime data
114  * @param pkt_len [in] packet length in bytes
115  *
116  * @return Operation status
117  * @retval 0 enqueue the packet
118  * @retval !0 drop the packet
119  */
120 static int
121 __rte_experimental
122 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg,
123 	struct rte_pie *pie,
124 	uint32_t pkt_len)
125 {
126 	RTE_ASSERT(pkt_len != 0);
127 
128 	/* Update the PIE qlen parameter */
129 	pie->qlen++;
130 	pie->qlen_bytes += pkt_len;
131 
132 	/**
133 	 * If the queue has been idle for a while, turn off PIE and Reset counters
134 	 */
135 	if ((pie->active == 1) &&
136 		(pie->qlen < (pie_cfg->tailq_th * 0.1))) {
137 		pie->active =  0;
138 		pie->in_measurement = 0;
139 	}
140 
141 	return 0;
142 }
143 
144 /**
145  * @brief make a decision to drop or enqueue a packet based on probability
146  *        criteria
147  *
148  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
149  * @param pie [in, out] data pointer to PIE runtime data
150  * @param time [in] current time (measured in cpu cycles)
151  */
152 static void
153 __rte_experimental
154 _calc_drop_probability(const struct rte_pie_config *pie_cfg,
155 	struct rte_pie *pie, uint64_t time)
156 {
157 	uint64_t qdelay_ref = pie_cfg->qdelay_ref;
158 
159 	/* Note: can be implemented using integer multiply.
160 	 * DQ_THRESHOLD is power of 2 value.
161 	 */
162 	uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14);
163 
164 	double p = RTE_ALPHA * (current_qdelay - qdelay_ref) +
165 		RTE_BETA * (current_qdelay - pie->qdelay_old);
166 
167 	if (pie->drop_prob < 0.000001)
168 		p = p * 0.00048828125;              /* (1/2048) = 0.00048828125 */
169 	else if (pie->drop_prob < 0.00001)
170 		p = p * 0.001953125;                /* (1/512) = 0.001953125  */
171 	else if (pie->drop_prob < 0.0001)
172 		p = p * 0.0078125;                  /* (1/128) = 0.0078125  */
173 	else if (pie->drop_prob < 0.001)
174 		p = p * 0.03125;                    /* (1/32) = 0.03125   */
175 	else if (pie->drop_prob < 0.01)
176 		p = p * 0.125;                      /* (1/8) = 0.125    */
177 	else if (pie->drop_prob < 0.1)
178 		p = p * 0.5;                        /* (1/2) = 0.5    */
179 
180 	if (pie->drop_prob >= 0.1 && p > 0.02)
181 		p = 0.02;
182 
183 	pie->drop_prob += p;
184 
185 	double qdelay = qdelay_ref * 0.5;
186 
187 	/*  Exponentially decay drop prob when congestion goes away  */
188 	if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay)
189 		pie->drop_prob *= 0.98;     /* 1 - 1/64 is sufficient */
190 
191 	/* Bound drop probability */
192 	if (pie->drop_prob < 0)
193 		pie->drop_prob = 0;
194 	if (pie->drop_prob > 1)
195 		pie->drop_prob = 1;
196 
197 	pie->qdelay_old = current_qdelay;
198 	pie->last_measurement = time;
199 
200 	uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval;
201 
202 	pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0;
203 }
204 
205 /**
206  * @brief make a decision to drop or enqueue a packet based on probability
207  *        criteria
208  *
209  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
210  * @param pie [in, out] data pointer to PIE runtime data
211  *
212  * @return operation status
213  * @retval 0 enqueue the packet
214  * @retval 1 drop the packet
215  */
216 static inline int
217 __rte_experimental
218 _rte_pie_drop(const struct rte_pie_config *pie_cfg,
219 	struct rte_pie *pie)
220 {
221 	uint64_t qdelay = pie_cfg->qdelay_ref / 2;
222 
223 	/* PIE is active but the queue is not congested: return 0 */
224 	if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) ||
225 		(pie->qlen <= (pie_cfg->tailq_th * 0.1)))
226 		return 0;
227 
228 	if (pie->drop_prob == 0)
229 		pie->accu_prob = 0;
230 
231 	/* For practical reasons, drop probability can be further scaled according
232 	 * to packet size, but one needs to set a bound to avoid unnecessary bias
233 	 * Random drop
234 	 */
235 	pie->accu_prob += pie->drop_prob;
236 
237 	if (pie->accu_prob < 0.85)
238 		return 0;
239 
240 	if (pie->accu_prob >= 8.5)
241 		return 1;
242 
243 	if (rte_drand() < pie->drop_prob) {
244 		pie->accu_prob = 0;
245 		return 1;
246 	}
247 
248 	/* No drop */
249 	return 0;
250 }
251 
252 /**
253  * @brief Decides if new packet should be enqueued or dropped for non-empty queue
254  *
255  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
256  * @param pie [in,out] data pointer to PIE runtime data
257  * @param pkt_len [in] packet length in bytes
258  * @param time [in] current time (measured in cpu cycles)
259  *
260  * @return Operation status
261  * @retval 0 enqueue the packet
262  * @retval 1 drop the packet based on max threshold criterion
263  * @retval 2 drop the packet based on mark probability criterion
264  */
265 static inline int
266 __rte_experimental
267 rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg,
268 	struct rte_pie *pie,
269 	uint32_t pkt_len,
270 	const uint64_t time)
271 {
272 	/* Check queue space against the tail drop threshold */
273 	if (pie->qlen >= pie_cfg->tailq_th) {
274 
275 		pie->accu_prob = 0;
276 		return 1;
277 	}
278 
279 	if (pie->active) {
280 		/* Update drop probability after certain interval */
281 		if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval)
282 			_calc_drop_probability(pie_cfg, pie, time);
283 
284 		/* Decide whether packet to be dropped or enqueued */
285 		if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0)
286 			return 2;
287 	}
288 
289 	/* When queue occupancy is over a certain threshold, turn on PIE */
290 	if ((pie->active == 0) &&
291 		(pie->qlen >= (pie_cfg->tailq_th * 0.1))) {
292 		pie->active = 1;
293 		pie->qdelay_old = 0;
294 		pie->drop_prob = 0;
295 		pie->in_measurement = 1;
296 		pie->departed_bytes_count = 0;
297 		pie->avg_dq_time = 0;
298 		pie->last_measurement = time;
299 		pie->burst_allowance = pie_cfg->max_burst;
300 		pie->accu_prob = 0;
301 		pie->start_measurement = time;
302 	}
303 
304 	/* when queue has been idle for a while, turn off PIE and Reset counters */
305 	if (pie->active == 1 &&
306 		pie->qlen < (pie_cfg->tailq_th * 0.1)) {
307 		pie->active =  0;
308 		pie->in_measurement = 0;
309 	}
310 
311 	/* Update PIE qlen parameter */
312 	pie->qlen++;
313 	pie->qlen_bytes += pkt_len;
314 
315 	/* No drop */
316 	return 0;
317 }
318 
319 /**
320  * @brief Decides if new packet should be enqueued or dropped
321  * Updates run time data and gives verdict whether to enqueue or drop the packet.
322  *
323  * @param pie_cfg [in] config pointer to a PIE configuration parameter structure
324  * @param pie [in,out] data pointer to PIE runtime data
325  * @param qlen [in] queue length
326  * @param pkt_len [in] packet length in bytes
327  * @param time [in] current time stamp (measured in cpu cycles)
328  *
329  * @return Operation status
330  * @retval 0 enqueue the packet
331  * @retval 1 drop the packet based on drop probability criteria
332  */
333 static inline int
334 __rte_experimental
335 rte_pie_enqueue(const struct rte_pie_config *pie_cfg,
336 	struct rte_pie *pie,
337 	const unsigned int qlen,
338 	uint32_t pkt_len,
339 	const uint64_t time)
340 {
341 	RTE_ASSERT(pie_cfg != NULL);
342 	RTE_ASSERT(pie != NULL);
343 
344 	if (qlen != 0)
345 		return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time);
346 	else
347 		return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len);
348 }
349 
350 /**
351  * @brief PIE rate estimation method
352  * Called on each packet departure.
353  *
354  * @param pie [in] data pointer to PIE runtime data
355  * @param pkt_len [in] packet length in bytes
356  * @param time [in] current time stamp in cpu cycles
357  */
358 static inline void
359 __rte_experimental
360 rte_pie_dequeue(struct rte_pie *pie,
361 	uint32_t pkt_len,
362 	uint64_t time)
363 {
364 	/* Dequeue rate estimation */
365 	if (pie->in_measurement) {
366 		pie->departed_bytes_count += pkt_len;
367 
368 		/* Start a new measurement cycle when enough packets */
369 		if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) {
370 			uint64_t dq_time = time - pie->start_measurement;
371 
372 			if (pie->avg_dq_time == 0)
373 				pie->avg_dq_time = dq_time;
374 			else
375 				pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time
376 					* (1 - RTE_DQ_WEIGHT);
377 
378 			pie->in_measurement = 0;
379 		}
380 	}
381 
382 	/* Start measurement cycle when enough data in the queue */
383 	if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) {
384 		pie->in_measurement = 1;
385 		pie->start_measurement = time;
386 		pie->departed_bytes_count = 0;
387 	}
388 }
389 
390 #ifdef __cplusplus
391 }
392 #endif
393 
394 #endif /* __RTE_PIE_H_INCLUDED__ */
395