1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 5 #ifndef __RTE_PIE_H_INCLUDED__ 6 #define __RTE_PIE_H_INCLUDED__ 7 8 /** 9 * @file 10 * Proportional Integral controller Enhanced (PIE) 11 */ 12 13 #include <stdint.h> 14 15 #include <rte_random.h> 16 #include <rte_debug.h> 17 #include <rte_cycles.h> 18 19 #ifdef __cplusplus 20 extern "C" { 21 #endif 22 23 #define RTE_DQ_THRESHOLD 16384 /**< Queue length threshold (2^14) 24 * to start measurement cycle (bytes) 25 */ 26 #define RTE_DQ_WEIGHT 0.25 /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */ 27 #define RTE_ALPHA 0.125 /**< Weights in drop probability calculations */ 28 #define RTE_BETA 1.25 /**< Weights in drop probability calculations */ 29 #define RTE_RAND_MAX ~0LLU /**< Max value of the random number */ 30 31 32 /** 33 * PIE configuration parameters passed by user 34 */ 35 struct rte_pie_params { 36 uint16_t qdelay_ref; /**< Latency Target (milliseconds) */ 37 uint16_t dp_update_interval; /**< Update interval for drop probability (milliseconds) */ 38 uint16_t max_burst; /**< Max Burst Allowance (milliseconds) */ 39 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ 40 }; 41 42 /** 43 * PIE configuration parameters 44 */ 45 struct rte_pie_config { 46 uint64_t qdelay_ref; /**< Latency Target (in CPU cycles.) */ 47 uint64_t dp_update_interval; /**< Update interval for drop probability (in CPU cycles) */ 48 uint64_t max_burst; /**< Max Burst Allowance (in CPU cycles.) */ 49 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ 50 }; 51 52 /** 53 * PIE run-time data 54 */ 55 struct rte_pie { 56 uint16_t active; /**< Flag for activating/deactivating pie */ 57 uint16_t in_measurement; /**< Flag for activation of measurement cycle */ 58 uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */ 59 uint64_t start_measurement; /**< Time to start to measurement cycle (in cpu cycles) */ 60 uint64_t last_measurement; /**< Time of last measurement (in cpu cycles) */ 61 uint64_t qlen; /**< Queue length (packets count) */ 62 uint64_t qlen_bytes; /**< Queue length (bytes count) */ 63 uint64_t avg_dq_time; /**< Time averaged dequeue rate (in cpu cycles) */ 64 uint32_t burst_allowance; /**< Current burst allowance (bytes) */ 65 uint64_t qdelay_old; /**< Old queue delay (bytes) */ 66 double drop_prob; /**< Current packet drop probability */ 67 double accu_prob; /**< Accumulated packet drop probability */ 68 }; 69 70 /** 71 * @brief Initialises run-time data 72 * 73 * @param pie [in,out] data pointer to PIE runtime data 74 * 75 * @return Operation status 76 * @retval 0 success 77 * @retval !0 error 78 */ 79 int 80 rte_pie_rt_data_init(struct rte_pie *pie); 81 82 /** 83 * @brief Configures a single PIE configuration parameter structure. 84 * 85 * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure 86 * @param qdelay_ref [in] latency target(milliseconds) 87 * @param dp_update_interval [in] update interval for drop probability (milliseconds) 88 * @param max_burst [in] maximum burst allowance (milliseconds) 89 * @param tailq_th [in] tail drop threshold for the queue (number of packets) 90 * 91 * @return Operation status 92 * @retval 0 success 93 * @retval !0 error 94 */ 95 int 96 rte_pie_config_init(struct rte_pie_config *pie_cfg, 97 const uint16_t qdelay_ref, 98 const uint16_t dp_update_interval, 99 const uint16_t max_burst, 100 const uint16_t tailq_th); 101 102 /** 103 * @brief Decides packet enqueue when queue is empty 104 * 105 * Note: packet is never dropped in this particular case. 106 * 107 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 108 * @param pie [in, out] data pointer to PIE runtime data 109 * @param pkt_len [in] packet length in bytes 110 * 111 * @return Operation status 112 * @retval 0 enqueue the packet 113 * @retval !0 drop the packet 114 */ 115 static int 116 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg, 117 struct rte_pie *pie, 118 uint32_t pkt_len) 119 { 120 RTE_ASSERT(pkt_len != 0); 121 122 /* Update the PIE qlen parameter */ 123 pie->qlen++; 124 pie->qlen_bytes += pkt_len; 125 126 /** 127 * If the queue has been idle for a while, turn off PIE and Reset counters 128 */ 129 if ((pie->active == 1) && 130 (pie->qlen < (pie_cfg->tailq_th * 0.1))) { 131 pie->active = 0; 132 pie->in_measurement = 0; 133 } 134 135 return 0; 136 } 137 138 /** 139 * @brief make a decision to drop or enqueue a packet based on probability 140 * criteria 141 * 142 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 143 * @param pie [in, out] data pointer to PIE runtime data 144 * @param time [in] current time (measured in cpu cycles) 145 */ 146 static void 147 _calc_drop_probability(const struct rte_pie_config *pie_cfg, 148 struct rte_pie *pie, uint64_t time) 149 { 150 uint64_t qdelay_ref = pie_cfg->qdelay_ref; 151 152 /* Note: can be implemented using integer multiply. 153 * DQ_THRESHOLD is power of 2 value. 154 */ 155 uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14); 156 157 double p = RTE_ALPHA * (current_qdelay - qdelay_ref) + 158 RTE_BETA * (current_qdelay - pie->qdelay_old); 159 160 if (pie->drop_prob < 0.000001) 161 p = p * 0.00048828125; /* (1/2048) = 0.00048828125 */ 162 else if (pie->drop_prob < 0.00001) 163 p = p * 0.001953125; /* (1/512) = 0.001953125 */ 164 else if (pie->drop_prob < 0.0001) 165 p = p * 0.0078125; /* (1/128) = 0.0078125 */ 166 else if (pie->drop_prob < 0.001) 167 p = p * 0.03125; /* (1/32) = 0.03125 */ 168 else if (pie->drop_prob < 0.01) 169 p = p * 0.125; /* (1/8) = 0.125 */ 170 else if (pie->drop_prob < 0.1) 171 p = p * 0.5; /* (1/2) = 0.5 */ 172 173 if (pie->drop_prob >= 0.1 && p > 0.02) 174 p = 0.02; 175 176 pie->drop_prob += p; 177 178 double qdelay = qdelay_ref * 0.5; 179 180 /* Exponentially decay drop prob when congestion goes away */ 181 if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay) 182 pie->drop_prob *= 0.98; /* 1 - 1/64 is sufficient */ 183 184 /* Bound drop probability */ 185 if (pie->drop_prob < 0) 186 pie->drop_prob = 0; 187 if (pie->drop_prob > 1) 188 pie->drop_prob = 1; 189 190 pie->qdelay_old = current_qdelay; 191 pie->last_measurement = time; 192 193 uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval; 194 195 pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; 196 } 197 198 /** 199 * @brief make a decision to drop or enqueue a packet based on probability 200 * criteria 201 * 202 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 203 * @param pie [in, out] data pointer to PIE runtime data 204 * 205 * @return operation status 206 * @retval 0 enqueue the packet 207 * @retval 1 drop the packet 208 */ 209 static inline int 210 _rte_pie_drop(const struct rte_pie_config *pie_cfg, 211 struct rte_pie *pie) 212 { 213 uint64_t qdelay = pie_cfg->qdelay_ref / 2; 214 215 /* PIE is active but the queue is not congested: return 0 */ 216 if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) || 217 (pie->qlen <= (pie_cfg->tailq_th * 0.1))) 218 return 0; 219 220 if (pie->drop_prob == 0) 221 pie->accu_prob = 0; 222 223 /* For practical reasons, drop probability can be further scaled according 224 * to packet size, but one needs to set a bound to avoid unnecessary bias 225 * Random drop 226 */ 227 pie->accu_prob += pie->drop_prob; 228 229 if (pie->accu_prob < 0.85) 230 return 0; 231 232 if (pie->accu_prob >= 8.5) 233 return 1; 234 235 if (rte_drand() < pie->drop_prob) { 236 pie->accu_prob = 0; 237 return 1; 238 } 239 240 /* No drop */ 241 return 0; 242 } 243 244 /** 245 * @brief Decides if new packet should be enqueued or dropped for non-empty queue 246 * 247 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 248 * @param pie [in,out] data pointer to PIE runtime data 249 * @param pkt_len [in] packet length in bytes 250 * @param time [in] current time (measured in cpu cycles) 251 * 252 * @return Operation status 253 * @retval 0 enqueue the packet 254 * @retval 1 drop the packet based on max threshold criterion 255 * @retval 2 drop the packet based on mark probability criterion 256 */ 257 static inline int 258 rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg, 259 struct rte_pie *pie, 260 uint32_t pkt_len, 261 const uint64_t time) 262 { 263 /* Check queue space against the tail drop threshold */ 264 if (pie->qlen >= pie_cfg->tailq_th) { 265 266 pie->accu_prob = 0; 267 return 1; 268 } 269 270 if (pie->active) { 271 /* Update drop probability after certain interval */ 272 if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval) 273 _calc_drop_probability(pie_cfg, pie, time); 274 275 /* Decide whether packet to be dropped or enqueued */ 276 if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0) 277 return 2; 278 } 279 280 /* When queue occupancy is over a certain threshold, turn on PIE */ 281 if ((pie->active == 0) && 282 (pie->qlen >= (pie_cfg->tailq_th * 0.1))) { 283 pie->active = 1; 284 pie->qdelay_old = 0; 285 pie->drop_prob = 0; 286 pie->in_measurement = 1; 287 pie->departed_bytes_count = 0; 288 pie->avg_dq_time = 0; 289 pie->last_measurement = time; 290 pie->burst_allowance = pie_cfg->max_burst; 291 pie->accu_prob = 0; 292 pie->start_measurement = time; 293 } 294 295 /* when queue has been idle for a while, turn off PIE and Reset counters */ 296 if (pie->active == 1 && 297 pie->qlen < (pie_cfg->tailq_th * 0.1)) { 298 pie->active = 0; 299 pie->in_measurement = 0; 300 } 301 302 /* Update PIE qlen parameter */ 303 pie->qlen++; 304 pie->qlen_bytes += pkt_len; 305 306 /* No drop */ 307 return 0; 308 } 309 310 /** 311 * @brief Decides if new packet should be enqueued or dropped 312 * Updates run time data and gives verdict whether to enqueue or drop the packet. 313 * 314 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 315 * @param pie [in,out] data pointer to PIE runtime data 316 * @param qlen [in] queue length 317 * @param pkt_len [in] packet length in bytes 318 * @param time [in] current time stamp (measured in cpu cycles) 319 * 320 * @return Operation status 321 * @retval 0 enqueue the packet 322 * @retval 1 drop the packet based on drop probability criteria 323 */ 324 static inline int 325 rte_pie_enqueue(const struct rte_pie_config *pie_cfg, 326 struct rte_pie *pie, 327 const unsigned int qlen, 328 uint32_t pkt_len, 329 const uint64_t time) 330 { 331 RTE_ASSERT(pie_cfg != NULL); 332 RTE_ASSERT(pie != NULL); 333 334 if (qlen != 0) 335 return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time); 336 else 337 return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); 338 } 339 340 /** 341 * @brief PIE rate estimation method 342 * Called on each packet departure. 343 * 344 * @param pie [in] data pointer to PIE runtime data 345 * @param pkt_len [in] packet length in bytes 346 * @param time [in] current time stamp in cpu cycles 347 */ 348 static inline void 349 rte_pie_dequeue(struct rte_pie *pie, 350 uint32_t pkt_len, 351 uint64_t time) 352 { 353 /* Dequeue rate estimation */ 354 if (pie->in_measurement) { 355 pie->departed_bytes_count += pkt_len; 356 357 /* Start a new measurement cycle when enough packets */ 358 if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) { 359 uint64_t dq_time = time - pie->start_measurement; 360 361 if (pie->avg_dq_time == 0) 362 pie->avg_dq_time = dq_time; 363 else 364 pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time 365 * (1 - RTE_DQ_WEIGHT); 366 367 pie->in_measurement = 0; 368 } 369 } 370 371 /* Start measurement cycle when enough data in the queue */ 372 if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) { 373 pie->in_measurement = 1; 374 pie->start_measurement = time; 375 pie->departed_bytes_count = 0; 376 } 377 } 378 379 #ifdef __cplusplus 380 } 381 #endif 382 383 #endif /* __RTE_PIE_H_INCLUDED__ */ 384