1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 5 #ifndef __RTE_PIE_H_INCLUDED__ 6 #define __RTE_PIE_H_INCLUDED__ 7 8 #ifdef __cplusplus 9 extern "C" { 10 #endif 11 12 /** 13 * @file 14 * Proportional Integral controller Enhanced (PIE) 15 **/ 16 17 #include <stdint.h> 18 19 #include <rte_random.h> 20 #include <rte_debug.h> 21 #include <rte_cycles.h> 22 23 #define RTE_DQ_THRESHOLD 16384 /**< Queue length threshold (2^14) 24 * to start measurement cycle (bytes) 25 */ 26 #define RTE_DQ_WEIGHT 0.25 /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */ 27 #define RTE_ALPHA 0.125 /**< Weights in drop probability calculations */ 28 #define RTE_BETA 1.25 /**< Weights in drop probability calculations */ 29 #define RTE_RAND_MAX ~0LLU /**< Max value of the random number */ 30 31 32 /** 33 * PIE configuration parameters passed by user 34 * 35 */ 36 struct rte_pie_params { 37 uint16_t qdelay_ref; /**< Latency Target (milliseconds) */ 38 uint16_t dp_update_interval; /**< Update interval for drop probability (milliseconds) */ 39 uint16_t max_burst; /**< Max Burst Allowance (milliseconds) */ 40 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ 41 }; 42 43 /** 44 * PIE configuration parameters 45 * 46 */ 47 struct rte_pie_config { 48 uint64_t qdelay_ref; /**< Latency Target (in CPU cycles.) */ 49 uint64_t dp_update_interval; /**< Update interval for drop probability (in CPU cycles) */ 50 uint64_t max_burst; /**< Max Burst Allowance (in CPU cycles.) */ 51 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ 52 }; 53 54 /** 55 * PIE run-time data 56 */ 57 struct rte_pie { 58 uint16_t active; /**< Flag for activating/deactivating pie */ 59 uint16_t in_measurement; /**< Flag for activation of measurement cycle */ 60 uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */ 61 uint64_t start_measurement; /**< Time to start to measurement cycle (in cpu cycles) */ 62 uint64_t last_measurement; /**< Time of last measurement (in cpu cycles) */ 63 uint64_t qlen; /**< Queue length (packets count) */ 64 uint64_t qlen_bytes; /**< Queue length (bytes count) */ 65 uint64_t avg_dq_time; /**< Time averaged dequeue rate (in cpu cycles) */ 66 uint32_t burst_allowance; /**< Current burst allowance (bytes) */ 67 uint64_t qdelay_old; /**< Old queue delay (bytes) */ 68 double drop_prob; /**< Current packet drop probability */ 69 double accu_prob; /**< Accumulated packet drop probability */ 70 }; 71 72 /** 73 * @brief Initialises run-time data 74 * 75 * @param pie [in,out] data pointer to PIE runtime data 76 * 77 * @return Operation status 78 * @retval 0 success 79 * @retval !0 error 80 */ 81 int 82 __rte_experimental 83 rte_pie_rt_data_init(struct rte_pie *pie); 84 85 /** 86 * @brief Configures a single PIE configuration parameter structure. 87 * 88 * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure 89 * @param qdelay_ref [in] latency target(milliseconds) 90 * @param dp_update_interval [in] update interval for drop probability (milliseconds) 91 * @param max_burst [in] maximum burst allowance (milliseconds) 92 * @param tailq_th [in] tail drop threshold for the queue (number of packets) 93 * 94 * @return Operation status 95 * @retval 0 success 96 * @retval !0 error 97 */ 98 int 99 __rte_experimental 100 rte_pie_config_init(struct rte_pie_config *pie_cfg, 101 const uint16_t qdelay_ref, 102 const uint16_t dp_update_interval, 103 const uint16_t max_burst, 104 const uint16_t tailq_th); 105 106 /** 107 * @brief Decides packet enqueue when queue is empty 108 * 109 * Note: packet is never dropped in this particular case. 110 * 111 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 112 * @param pie [in, out] data pointer to PIE runtime data 113 * @param pkt_len [in] packet length in bytes 114 * 115 * @return Operation status 116 * @retval 0 enqueue the packet 117 * @retval !0 drop the packet 118 */ 119 static int 120 __rte_experimental 121 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg, 122 struct rte_pie *pie, 123 uint32_t pkt_len) 124 { 125 RTE_ASSERT(pkt_len != 0); 126 127 /* Update the PIE qlen parameter */ 128 pie->qlen++; 129 pie->qlen_bytes += pkt_len; 130 131 /** 132 * If the queue has been idle for a while, turn off PIE and Reset counters 133 */ 134 if ((pie->active == 1) && 135 (pie->qlen < (pie_cfg->tailq_th * 0.1))) { 136 pie->active = 0; 137 pie->in_measurement = 0; 138 } 139 140 return 0; 141 } 142 143 /** 144 * @brief make a decision to drop or enqueue a packet based on probability 145 * criteria 146 * 147 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 148 * @param pie [in, out] data pointer to PIE runtime data 149 * @param time [in] current time (measured in cpu cycles) 150 */ 151 static void 152 __rte_experimental 153 _calc_drop_probability(const struct rte_pie_config *pie_cfg, 154 struct rte_pie *pie, uint64_t time) 155 { 156 uint64_t qdelay_ref = pie_cfg->qdelay_ref; 157 158 /* Note: can be implemented using integer multiply. 159 * DQ_THRESHOLD is power of 2 value. 160 */ 161 uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14); 162 163 double p = RTE_ALPHA * (current_qdelay - qdelay_ref) + 164 RTE_BETA * (current_qdelay - pie->qdelay_old); 165 166 if (pie->drop_prob < 0.000001) 167 p = p * 0.00048828125; /* (1/2048) = 0.00048828125 */ 168 else if (pie->drop_prob < 0.00001) 169 p = p * 0.001953125; /* (1/512) = 0.001953125 */ 170 else if (pie->drop_prob < 0.0001) 171 p = p * 0.0078125; /* (1/128) = 0.0078125 */ 172 else if (pie->drop_prob < 0.001) 173 p = p * 0.03125; /* (1/32) = 0.03125 */ 174 else if (pie->drop_prob < 0.01) 175 p = p * 0.125; /* (1/8) = 0.125 */ 176 else if (pie->drop_prob < 0.1) 177 p = p * 0.5; /* (1/2) = 0.5 */ 178 179 if (pie->drop_prob >= 0.1 && p > 0.02) 180 p = 0.02; 181 182 pie->drop_prob += p; 183 184 double qdelay = qdelay_ref * 0.5; 185 186 /* Exponentially decay drop prob when congestion goes away */ 187 if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay) 188 pie->drop_prob *= 0.98; /* 1 - 1/64 is sufficient */ 189 190 /* Bound drop probability */ 191 if (pie->drop_prob < 0) 192 pie->drop_prob = 0; 193 if (pie->drop_prob > 1) 194 pie->drop_prob = 1; 195 196 pie->qdelay_old = current_qdelay; 197 pie->last_measurement = time; 198 199 uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval; 200 201 pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; 202 } 203 204 /** 205 * @brief make a decision to drop or enqueue a packet based on probability 206 * criteria 207 * 208 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 209 * @param pie [in, out] data pointer to PIE runtime data 210 * 211 * @return operation status 212 * @retval 0 enqueue the packet 213 * @retval 1 drop the packet 214 */ 215 static inline int 216 __rte_experimental 217 _rte_pie_drop(const struct rte_pie_config *pie_cfg, 218 struct rte_pie *pie) 219 { 220 uint64_t qdelay = pie_cfg->qdelay_ref / 2; 221 222 /* PIE is active but the queue is not congested: return 0 */ 223 if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) || 224 (pie->qlen <= (pie_cfg->tailq_th * 0.1))) 225 return 0; 226 227 if (pie->drop_prob == 0) 228 pie->accu_prob = 0; 229 230 /* For practical reasons, drop probability can be further scaled according 231 * to packet size, but one needs to set a bound to avoid unnecessary bias 232 * Random drop 233 */ 234 pie->accu_prob += pie->drop_prob; 235 236 if (pie->accu_prob < 0.85) 237 return 0; 238 239 if (pie->accu_prob >= 8.5) 240 return 1; 241 242 if (rte_drand() < pie->drop_prob) { 243 pie->accu_prob = 0; 244 return 1; 245 } 246 247 /* No drop */ 248 return 0; 249 } 250 251 /** 252 * @brief Decides if new packet should be enqueued or dropped for non-empty queue 253 * 254 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 255 * @param pie [in,out] data pointer to PIE runtime data 256 * @param pkt_len [in] packet length in bytes 257 * @param time [in] current time (measured in cpu cycles) 258 * 259 * @return Operation status 260 * @retval 0 enqueue the packet 261 * @retval 1 drop the packet based on max threshold criterion 262 * @retval 2 drop the packet based on mark probability criterion 263 */ 264 static inline int 265 __rte_experimental 266 rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg, 267 struct rte_pie *pie, 268 uint32_t pkt_len, 269 const uint64_t time) 270 { 271 /* Check queue space against the tail drop threshold */ 272 if (pie->qlen >= pie_cfg->tailq_th) { 273 274 pie->accu_prob = 0; 275 return 1; 276 } 277 278 if (pie->active) { 279 /* Update drop probability after certain interval */ 280 if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval) 281 _calc_drop_probability(pie_cfg, pie, time); 282 283 /* Decide whether packet to be dropped or enqueued */ 284 if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0) 285 return 2; 286 } 287 288 /* When queue occupancy is over a certain threshold, turn on PIE */ 289 if ((pie->active == 0) && 290 (pie->qlen >= (pie_cfg->tailq_th * 0.1))) { 291 pie->active = 1; 292 pie->qdelay_old = 0; 293 pie->drop_prob = 0; 294 pie->in_measurement = 1; 295 pie->departed_bytes_count = 0; 296 pie->avg_dq_time = 0; 297 pie->last_measurement = time; 298 pie->burst_allowance = pie_cfg->max_burst; 299 pie->accu_prob = 0; 300 pie->start_measurement = time; 301 } 302 303 /* when queue has been idle for a while, turn off PIE and Reset counters */ 304 if (pie->active == 1 && 305 pie->qlen < (pie_cfg->tailq_th * 0.1)) { 306 pie->active = 0; 307 pie->in_measurement = 0; 308 } 309 310 /* Update PIE qlen parameter */ 311 pie->qlen++; 312 pie->qlen_bytes += pkt_len; 313 314 /* No drop */ 315 return 0; 316 } 317 318 /** 319 * @brief Decides if new packet should be enqueued or dropped 320 * Updates run time data and gives verdict whether to enqueue or drop the packet. 321 * 322 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 323 * @param pie [in,out] data pointer to PIE runtime data 324 * @param qlen [in] queue length 325 * @param pkt_len [in] packet length in bytes 326 * @param time [in] current time stamp (measured in cpu cycles) 327 * 328 * @return Operation status 329 * @retval 0 enqueue the packet 330 * @retval 1 drop the packet based on drop probability criteria 331 */ 332 static inline int 333 __rte_experimental 334 rte_pie_enqueue(const struct rte_pie_config *pie_cfg, 335 struct rte_pie *pie, 336 const unsigned int qlen, 337 uint32_t pkt_len, 338 const uint64_t time) 339 { 340 RTE_ASSERT(pie_cfg != NULL); 341 RTE_ASSERT(pie != NULL); 342 343 if (qlen != 0) 344 return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time); 345 else 346 return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); 347 } 348 349 /** 350 * @brief PIE rate estimation method 351 * Called on each packet departure. 352 * 353 * @param pie [in] data pointer to PIE runtime data 354 * @param pkt_len [in] packet length in bytes 355 * @param time [in] current time stamp in cpu cycles 356 */ 357 static inline void 358 __rte_experimental 359 rte_pie_dequeue(struct rte_pie *pie, 360 uint32_t pkt_len, 361 uint64_t time) 362 { 363 /* Dequeue rate estimation */ 364 if (pie->in_measurement) { 365 pie->departed_bytes_count += pkt_len; 366 367 /* Start a new measurement cycle when enough packets */ 368 if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) { 369 uint64_t dq_time = time - pie->start_measurement; 370 371 if (pie->avg_dq_time == 0) 372 pie->avg_dq_time = dq_time; 373 else 374 pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time 375 * (1 - RTE_DQ_WEIGHT); 376 377 pie->in_measurement = 0; 378 } 379 } 380 381 /* Start measurement cycle when enough data in the queue */ 382 if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) { 383 pie->in_measurement = 1; 384 pie->start_measurement = time; 385 pie->departed_bytes_count = 0; 386 } 387 } 388 389 #ifdef __cplusplus 390 } 391 #endif 392 393 #endif /* __RTE_PIE_H_INCLUDED__ */ 394