1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 5 #ifndef __RTE_PIE_H_INCLUDED__ 6 #define __RTE_PIE_H_INCLUDED__ 7 8 #ifdef __cplusplus 9 extern "C" { 10 #endif 11 12 /** 13 * @file 14 * Proportional Integral controller Enhanced (PIE) 15 **/ 16 17 #include <stdint.h> 18 19 #include <rte_random.h> 20 #include <rte_debug.h> 21 #include <rte_cycles.h> 22 23 #define RTE_DQ_THRESHOLD 16384 /**< Queue length threshold (2^14) 24 * to start measurement cycle (bytes) 25 */ 26 #define RTE_DQ_WEIGHT 0.25 /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */ 27 #define RTE_ALPHA 0.125 /**< Weights in drop probability calculations */ 28 #define RTE_BETA 1.25 /**< Weights in drop probability calculations */ 29 #define RTE_RAND_MAX ~0LLU /**< Max value of the random number */ 30 31 32 /** 33 * PIE configuration parameters passed by user 34 * 35 */ 36 struct rte_pie_params { 37 uint16_t qdelay_ref; /**< Latency Target (milliseconds) */ 38 uint16_t dp_update_interval; /**< Update interval for drop probability (milliseconds) */ 39 uint16_t max_burst; /**< Max Burst Allowance (milliseconds) */ 40 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ 41 }; 42 43 /** 44 * PIE configuration parameters 45 * 46 */ 47 struct rte_pie_config { 48 uint64_t qdelay_ref; /**< Latency Target (in CPU cycles.) */ 49 uint64_t dp_update_interval; /**< Update interval for drop probability (in CPU cycles) */ 50 uint64_t max_burst; /**< Max Burst Allowance (in CPU cycles.) */ 51 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ 52 }; 53 54 /** 55 * PIE run-time data 56 */ 57 struct rte_pie { 58 uint16_t active; /**< Flag for activating/deactivating pie */ 59 uint16_t in_measurement; /**< Flag for activation of measurement cycle */ 60 uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */ 61 uint64_t start_measurement; /**< Time to start to measurement cycle (in cpu cycles) */ 62 uint64_t last_measurement; /**< Time of last measurement (in cpu cycles) */ 63 uint64_t qlen; /**< Queue length (packets count) */ 64 uint64_t qlen_bytes; /**< Queue length (bytes count) */ 65 uint64_t avg_dq_time; /**< Time averaged dequeue rate (in cpu cycles) */ 66 uint32_t burst_allowance; /**< Current burst allowance (bytes) */ 67 uint64_t qdelay_old; /**< Old queue delay (bytes) */ 68 double drop_prob; /**< Current packet drop probability */ 69 double accu_prob; /**< Accumulated packet drop probability */ 70 }; 71 72 /** 73 * @brief Initialises run-time data 74 * 75 * @param pie [in,out] data pointer to PIE runtime data 76 * 77 * @return Operation status 78 * @retval 0 success 79 * @retval !0 error 80 */ 81 int 82 __rte_experimental 83 rte_pie_rt_data_init(struct rte_pie *pie); 84 85 /** 86 * @brief Configures a single PIE configuration parameter structure. 87 * 88 * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure 89 * @param qdelay_ref [in] latency target(milliseconds) 90 * @param dp_update_interval [in] update interval for drop probability (milliseconds) 91 * @param max_burst [in] maximum burst allowance (milliseconds) 92 * @param tailq_th [in] tail drop threshold for the queue (number of packets) 93 * 94 * @return Operation status 95 * @retval 0 success 96 * @retval !0 error 97 */ 98 int 99 __rte_experimental 100 rte_pie_config_init(struct rte_pie_config *pie_cfg, 101 const uint16_t qdelay_ref, 102 const uint16_t dp_update_interval, 103 const uint16_t max_burst, 104 const uint16_t tailq_th); 105 106 /** 107 * @brief Decides packet enqueue when queue is empty 108 * 109 * Note: packet is never dropped in this particular case. 110 * 111 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 112 * @param pie [in, out] data pointer to PIE runtime data 113 * @param pkt_len [in] packet length in bytes 114 * 115 * @return Operation status 116 * @retval 0 enqueue the packet 117 * @retval !0 drop the packet 118 */ 119 static int 120 __rte_experimental 121 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg, 122 struct rte_pie *pie, 123 uint32_t pkt_len) 124 { 125 RTE_ASSERT(pkt_len != 0); 126 127 /* Update the PIE qlen parameter */ 128 pie->qlen++; 129 pie->qlen_bytes += pkt_len; 130 131 /** 132 * If the queue has been idle for a while, turn off PIE and Reset counters 133 */ 134 if ((pie->active == 1) && 135 (pie->qlen < (pie_cfg->tailq_th * 0.1))) { 136 pie->active = 0; 137 pie->in_measurement = 0; 138 } 139 140 return 0; 141 } 142 143 /** 144 * @brief make a decision to drop or enqueue a packet based on probability 145 * criteria 146 * 147 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 148 * @param pie [in, out] data pointer to PIE runtime data 149 * @param time [in] current time (measured in cpu cycles) 150 */ 151 static void 152 __rte_experimental 153 _calc_drop_probability(const struct rte_pie_config *pie_cfg, 154 struct rte_pie *pie, uint64_t time) 155 { 156 uint64_t qdelay_ref = pie_cfg->qdelay_ref; 157 158 /* Note: can be implemented using integer multiply. 159 * DQ_THRESHOLD is power of 2 value. 160 */ 161 uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14); 162 163 double p = RTE_ALPHA * (current_qdelay - qdelay_ref) + 164 RTE_BETA * (current_qdelay - pie->qdelay_old); 165 166 if (pie->drop_prob < 0.000001) 167 p = p * 0.00048828125; /* (1/2048) = 0.00048828125 */ 168 else if (pie->drop_prob < 0.00001) 169 p = p * 0.001953125; /* (1/512) = 0.001953125 */ 170 else if (pie->drop_prob < 0.0001) 171 p = p * 0.0078125; /* (1/128) = 0.0078125 */ 172 else if (pie->drop_prob < 0.001) 173 p = p * 0.03125; /* (1/32) = 0.03125 */ 174 else if (pie->drop_prob < 0.01) 175 p = p * 0.125; /* (1/8) = 0.125 */ 176 else if (pie->drop_prob < 0.1) 177 p = p * 0.5; /* (1/2) = 0.5 */ 178 179 if (pie->drop_prob >= 0.1 && p > 0.02) 180 p = 0.02; 181 182 pie->drop_prob += p; 183 184 double qdelay = qdelay_ref * 0.5; 185 186 /* Exponentially decay drop prob when congestion goes away */ 187 if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay) 188 pie->drop_prob *= 0.98; /* 1 - 1/64 is sufficient */ 189 190 /* Bound drop probability */ 191 if (pie->drop_prob < 0) 192 pie->drop_prob = 0; 193 if (pie->drop_prob > 1) 194 pie->drop_prob = 1; 195 196 pie->qdelay_old = current_qdelay; 197 pie->last_measurement = time; 198 199 uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval; 200 201 pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; 202 } 203 204 /** 205 * @brief make a decision to drop or enqueue a packet based on probability 206 * criteria 207 * 208 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 209 * @param pie [in, out] data pointer to PIE runtime data 210 * 211 * @return operation status 212 * @retval 0 enqueue the packet 213 * @retval 1 drop the packet 214 */ 215 static inline int 216 __rte_experimental 217 _rte_pie_drop(const struct rte_pie_config *pie_cfg, 218 struct rte_pie *pie) 219 { 220 uint64_t rand_value; 221 double qdelay = pie_cfg->qdelay_ref * 0.5; 222 223 /* PIE is active but the queue is not congested: return 0 */ 224 if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) || 225 (pie->qlen <= (pie_cfg->tailq_th * 0.1))) 226 return 0; 227 228 if (pie->drop_prob == 0) 229 pie->accu_prob = 0; 230 231 /* For practical reasons, drop probability can be further scaled according 232 * to packet size, but one needs to set a bound to avoid unnecessary bias 233 * Random drop 234 */ 235 pie->accu_prob += pie->drop_prob; 236 237 if (pie->accu_prob < 0.85) 238 return 0; 239 240 if (pie->accu_prob >= 8.5) 241 return 1; 242 243 rand_value = rte_rand()/RTE_RAND_MAX; 244 245 if ((double)rand_value < pie->drop_prob) { 246 pie->accu_prob = 0; 247 return 1; 248 } 249 250 /* No drop */ 251 return 0; 252 } 253 254 /** 255 * @brief Decides if new packet should be enqeued or dropped for non-empty queue 256 * 257 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 258 * @param pie [in,out] data pointer to PIE runtime data 259 * @param pkt_len [in] packet length in bytes 260 * @param time [in] current time (measured in cpu cycles) 261 * 262 * @return Operation status 263 * @retval 0 enqueue the packet 264 * @retval 1 drop the packet based on max threshold criterion 265 * @retval 2 drop the packet based on mark probability criterion 266 */ 267 static inline int 268 __rte_experimental 269 rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg, 270 struct rte_pie *pie, 271 uint32_t pkt_len, 272 const uint64_t time) 273 { 274 /* Check queue space against the tail drop threshold */ 275 if (pie->qlen >= pie_cfg->tailq_th) { 276 277 pie->accu_prob = 0; 278 return 1; 279 } 280 281 if (pie->active) { 282 /* Update drop probability after certain interval */ 283 if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval) 284 _calc_drop_probability(pie_cfg, pie, time); 285 286 /* Decide whether packet to be dropped or enqueued */ 287 if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0) 288 return 2; 289 } 290 291 /* When queue occupancy is over a certain threshold, turn on PIE */ 292 if ((pie->active == 0) && 293 (pie->qlen >= (pie_cfg->tailq_th * 0.1))) { 294 pie->active = 1; 295 pie->qdelay_old = 0; 296 pie->drop_prob = 0; 297 pie->in_measurement = 1; 298 pie->departed_bytes_count = 0; 299 pie->avg_dq_time = 0; 300 pie->last_measurement = time; 301 pie->burst_allowance = pie_cfg->max_burst; 302 pie->accu_prob = 0; 303 pie->start_measurement = time; 304 } 305 306 /* when queue has been idle for a while, turn off PIE and Reset counters */ 307 if (pie->active == 1 && 308 pie->qlen < (pie_cfg->tailq_th * 0.1)) { 309 pie->active = 0; 310 pie->in_measurement = 0; 311 } 312 313 /* Update PIE qlen parameter */ 314 pie->qlen++; 315 pie->qlen_bytes += pkt_len; 316 317 /* No drop */ 318 return 0; 319 } 320 321 /** 322 * @brief Decides if new packet should be enqeued or dropped 323 * Updates run time data and gives verdict whether to enqueue or drop the packet. 324 * 325 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 326 * @param pie [in,out] data pointer to PIE runtime data 327 * @param qlen [in] queue length 328 * @param pkt_len [in] packet length in bytes 329 * @param time [in] current time stamp (measured in cpu cycles) 330 * 331 * @return Operation status 332 * @retval 0 enqueue the packet 333 * @retval 1 drop the packet based on drop probility criteria 334 */ 335 static inline int 336 __rte_experimental 337 rte_pie_enqueue(const struct rte_pie_config *pie_cfg, 338 struct rte_pie *pie, 339 const unsigned int qlen, 340 uint32_t pkt_len, 341 const uint64_t time) 342 { 343 RTE_ASSERT(pie_cfg != NULL); 344 RTE_ASSERT(pie != NULL); 345 346 if (qlen != 0) 347 return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time); 348 else 349 return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); 350 } 351 352 /** 353 * @brief PIE rate estimation method 354 * Called on each packet departure. 355 * 356 * @param pie [in] data pointer to PIE runtime data 357 * @param pkt_len [in] packet length in bytes 358 * @param time [in] current time stamp in cpu cycles 359 */ 360 static inline void 361 __rte_experimental 362 rte_pie_dequeue(struct rte_pie *pie, 363 uint32_t pkt_len, 364 uint64_t time) 365 { 366 /* Dequeue rate estimation */ 367 if (pie->in_measurement) { 368 pie->departed_bytes_count += pkt_len; 369 370 /* Start a new measurement cycle when enough packets */ 371 if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) { 372 uint64_t dq_time = time - pie->start_measurement; 373 374 if (pie->avg_dq_time == 0) 375 pie->avg_dq_time = dq_time; 376 else 377 pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time 378 * (1 - RTE_DQ_WEIGHT); 379 380 pie->in_measurement = 0; 381 } 382 } 383 384 /* Start measurement cycle when enough data in the queue */ 385 if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) { 386 pie->in_measurement = 1; 387 pie->start_measurement = time; 388 pie->departed_bytes_count = 0; 389 } 390 } 391 392 #ifdef __cplusplus 393 } 394 #endif 395 396 #endif /* __RTE_PIE_H_INCLUDED__ */ 397