1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 5 #ifndef __RTE_PIE_H_INCLUDED__ 6 #define __RTE_PIE_H_INCLUDED__ 7 8 #ifdef __cplusplus 9 extern "C" { 10 #endif 11 12 /** 13 * @file 14 * Proportional Integral controller Enhanced (PIE) 15 **/ 16 17 #include <stdint.h> 18 19 #include <rte_compat.h> 20 #include <rte_random.h> 21 #include <rte_debug.h> 22 #include <rte_cycles.h> 23 24 #define RTE_DQ_THRESHOLD 16384 /**< Queue length threshold (2^14) 25 * to start measurement cycle (bytes) 26 */ 27 #define RTE_DQ_WEIGHT 0.25 /**< Weight (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */ 28 #define RTE_ALPHA 0.125 /**< Weights in drop probability calculations */ 29 #define RTE_BETA 1.25 /**< Weights in drop probability calculations */ 30 #define RTE_RAND_MAX ~0LLU /**< Max value of the random number */ 31 32 33 /** 34 * PIE configuration parameters passed by user 35 * 36 */ 37 struct rte_pie_params { 38 uint16_t qdelay_ref; /**< Latency Target (milliseconds) */ 39 uint16_t dp_update_interval; /**< Update interval for drop probability (milliseconds) */ 40 uint16_t max_burst; /**< Max Burst Allowance (milliseconds) */ 41 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ 42 }; 43 44 /** 45 * PIE configuration parameters 46 * 47 */ 48 struct rte_pie_config { 49 uint64_t qdelay_ref; /**< Latency Target (in CPU cycles.) */ 50 uint64_t dp_update_interval; /**< Update interval for drop probability (in CPU cycles) */ 51 uint64_t max_burst; /**< Max Burst Allowance (in CPU cycles.) */ 52 uint16_t tailq_th; /**< Tailq drop threshold (packet counts) */ 53 }; 54 55 /** 56 * PIE run-time data 57 */ 58 struct rte_pie { 59 uint16_t active; /**< Flag for activating/deactivating pie */ 60 uint16_t in_measurement; /**< Flag for activation of measurement cycle */ 61 uint32_t departed_bytes_count; /**< Number of bytes departed in current measurement cycle */ 62 uint64_t start_measurement; /**< Time to start to measurement cycle (in cpu cycles) */ 63 uint64_t last_measurement; /**< Time of last measurement (in cpu cycles) */ 64 uint64_t qlen; /**< Queue length (packets count) */ 65 uint64_t qlen_bytes; /**< Queue length (bytes count) */ 66 uint64_t avg_dq_time; /**< Time averaged dequeue rate (in cpu cycles) */ 67 uint32_t burst_allowance; /**< Current burst allowance (bytes) */ 68 uint64_t qdelay_old; /**< Old queue delay (bytes) */ 69 double drop_prob; /**< Current packet drop probability */ 70 double accu_prob; /**< Accumulated packet drop probability */ 71 }; 72 73 /** 74 * @brief Initialises run-time data 75 * 76 * @param pie [in,out] data pointer to PIE runtime data 77 * 78 * @return Operation status 79 * @retval 0 success 80 * @retval !0 error 81 */ 82 int 83 __rte_experimental 84 rte_pie_rt_data_init(struct rte_pie *pie); 85 86 /** 87 * @brief Configures a single PIE configuration parameter structure. 88 * 89 * @param pie_cfg [in,out] config pointer to a PIE configuration parameter structure 90 * @param qdelay_ref [in] latency target(milliseconds) 91 * @param dp_update_interval [in] update interval for drop probability (milliseconds) 92 * @param max_burst [in] maximum burst allowance (milliseconds) 93 * @param tailq_th [in] tail drop threshold for the queue (number of packets) 94 * 95 * @return Operation status 96 * @retval 0 success 97 * @retval !0 error 98 */ 99 int 100 __rte_experimental 101 rte_pie_config_init(struct rte_pie_config *pie_cfg, 102 const uint16_t qdelay_ref, 103 const uint16_t dp_update_interval, 104 const uint16_t max_burst, 105 const uint16_t tailq_th); 106 107 /** 108 * @brief Decides packet enqueue when queue is empty 109 * 110 * Note: packet is never dropped in this particular case. 111 * 112 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 113 * @param pie [in, out] data pointer to PIE runtime data 114 * @param pkt_len [in] packet length in bytes 115 * 116 * @return Operation status 117 * @retval 0 enqueue the packet 118 * @retval !0 drop the packet 119 */ 120 static int 121 __rte_experimental 122 rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg, 123 struct rte_pie *pie, 124 uint32_t pkt_len) 125 { 126 RTE_ASSERT(pkt_len != 0); 127 128 /* Update the PIE qlen parameter */ 129 pie->qlen++; 130 pie->qlen_bytes += pkt_len; 131 132 /** 133 * If the queue has been idle for a while, turn off PIE and Reset counters 134 */ 135 if ((pie->active == 1) && 136 (pie->qlen < (pie_cfg->tailq_th * 0.1))) { 137 pie->active = 0; 138 pie->in_measurement = 0; 139 } 140 141 return 0; 142 } 143 144 /** 145 * @brief make a decision to drop or enqueue a packet based on probability 146 * criteria 147 * 148 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 149 * @param pie [in, out] data pointer to PIE runtime data 150 * @param time [in] current time (measured in cpu cycles) 151 */ 152 static void 153 __rte_experimental 154 _calc_drop_probability(const struct rte_pie_config *pie_cfg, 155 struct rte_pie *pie, uint64_t time) 156 { 157 uint64_t qdelay_ref = pie_cfg->qdelay_ref; 158 159 /* Note: can be implemented using integer multiply. 160 * DQ_THRESHOLD is power of 2 value. 161 */ 162 uint64_t current_qdelay = pie->qlen * (pie->avg_dq_time >> 14); 163 164 double p = RTE_ALPHA * (current_qdelay - qdelay_ref) + 165 RTE_BETA * (current_qdelay - pie->qdelay_old); 166 167 if (pie->drop_prob < 0.000001) 168 p = p * 0.00048828125; /* (1/2048) = 0.00048828125 */ 169 else if (pie->drop_prob < 0.00001) 170 p = p * 0.001953125; /* (1/512) = 0.001953125 */ 171 else if (pie->drop_prob < 0.0001) 172 p = p * 0.0078125; /* (1/128) = 0.0078125 */ 173 else if (pie->drop_prob < 0.001) 174 p = p * 0.03125; /* (1/32) = 0.03125 */ 175 else if (pie->drop_prob < 0.01) 176 p = p * 0.125; /* (1/8) = 0.125 */ 177 else if (pie->drop_prob < 0.1) 178 p = p * 0.5; /* (1/2) = 0.5 */ 179 180 if (pie->drop_prob >= 0.1 && p > 0.02) 181 p = 0.02; 182 183 pie->drop_prob += p; 184 185 double qdelay = qdelay_ref * 0.5; 186 187 /* Exponentially decay drop prob when congestion goes away */ 188 if ((double)current_qdelay < qdelay && pie->qdelay_old < qdelay) 189 pie->drop_prob *= 0.98; /* 1 - 1/64 is sufficient */ 190 191 /* Bound drop probability */ 192 if (pie->drop_prob < 0) 193 pie->drop_prob = 0; 194 if (pie->drop_prob > 1) 195 pie->drop_prob = 1; 196 197 pie->qdelay_old = current_qdelay; 198 pie->last_measurement = time; 199 200 uint64_t burst_allowance = pie->burst_allowance - pie_cfg->dp_update_interval; 201 202 pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; 203 } 204 205 /** 206 * @brief make a decision to drop or enqueue a packet based on probability 207 * criteria 208 * 209 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 210 * @param pie [in, out] data pointer to PIE runtime data 211 * 212 * @return operation status 213 * @retval 0 enqueue the packet 214 * @retval 1 drop the packet 215 */ 216 static inline int 217 __rte_experimental 218 _rte_pie_drop(const struct rte_pie_config *pie_cfg, 219 struct rte_pie *pie) 220 { 221 uint64_t qdelay = pie_cfg->qdelay_ref / 2; 222 223 /* PIE is active but the queue is not congested: return 0 */ 224 if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) || 225 (pie->qlen <= (pie_cfg->tailq_th * 0.1))) 226 return 0; 227 228 if (pie->drop_prob == 0) 229 pie->accu_prob = 0; 230 231 /* For practical reasons, drop probability can be further scaled according 232 * to packet size, but one needs to set a bound to avoid unnecessary bias 233 * Random drop 234 */ 235 pie->accu_prob += pie->drop_prob; 236 237 if (pie->accu_prob < 0.85) 238 return 0; 239 240 if (pie->accu_prob >= 8.5) 241 return 1; 242 243 if (rte_drand() < pie->drop_prob) { 244 pie->accu_prob = 0; 245 return 1; 246 } 247 248 /* No drop */ 249 return 0; 250 } 251 252 /** 253 * @brief Decides if new packet should be enqueued or dropped for non-empty queue 254 * 255 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 256 * @param pie [in,out] data pointer to PIE runtime data 257 * @param pkt_len [in] packet length in bytes 258 * @param time [in] current time (measured in cpu cycles) 259 * 260 * @return Operation status 261 * @retval 0 enqueue the packet 262 * @retval 1 drop the packet based on max threshold criterion 263 * @retval 2 drop the packet based on mark probability criterion 264 */ 265 static inline int 266 __rte_experimental 267 rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg, 268 struct rte_pie *pie, 269 uint32_t pkt_len, 270 const uint64_t time) 271 { 272 /* Check queue space against the tail drop threshold */ 273 if (pie->qlen >= pie_cfg->tailq_th) { 274 275 pie->accu_prob = 0; 276 return 1; 277 } 278 279 if (pie->active) { 280 /* Update drop probability after certain interval */ 281 if ((time - pie->last_measurement) >= pie_cfg->dp_update_interval) 282 _calc_drop_probability(pie_cfg, pie, time); 283 284 /* Decide whether packet to be dropped or enqueued */ 285 if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == 0) 286 return 2; 287 } 288 289 /* When queue occupancy is over a certain threshold, turn on PIE */ 290 if ((pie->active == 0) && 291 (pie->qlen >= (pie_cfg->tailq_th * 0.1))) { 292 pie->active = 1; 293 pie->qdelay_old = 0; 294 pie->drop_prob = 0; 295 pie->in_measurement = 1; 296 pie->departed_bytes_count = 0; 297 pie->avg_dq_time = 0; 298 pie->last_measurement = time; 299 pie->burst_allowance = pie_cfg->max_burst; 300 pie->accu_prob = 0; 301 pie->start_measurement = time; 302 } 303 304 /* when queue has been idle for a while, turn off PIE and Reset counters */ 305 if (pie->active == 1 && 306 pie->qlen < (pie_cfg->tailq_th * 0.1)) { 307 pie->active = 0; 308 pie->in_measurement = 0; 309 } 310 311 /* Update PIE qlen parameter */ 312 pie->qlen++; 313 pie->qlen_bytes += pkt_len; 314 315 /* No drop */ 316 return 0; 317 } 318 319 /** 320 * @brief Decides if new packet should be enqueued or dropped 321 * Updates run time data and gives verdict whether to enqueue or drop the packet. 322 * 323 * @param pie_cfg [in] config pointer to a PIE configuration parameter structure 324 * @param pie [in,out] data pointer to PIE runtime data 325 * @param qlen [in] queue length 326 * @param pkt_len [in] packet length in bytes 327 * @param time [in] current time stamp (measured in cpu cycles) 328 * 329 * @return Operation status 330 * @retval 0 enqueue the packet 331 * @retval 1 drop the packet based on drop probability criteria 332 */ 333 static inline int 334 __rte_experimental 335 rte_pie_enqueue(const struct rte_pie_config *pie_cfg, 336 struct rte_pie *pie, 337 const unsigned int qlen, 338 uint32_t pkt_len, 339 const uint64_t time) 340 { 341 RTE_ASSERT(pie_cfg != NULL); 342 RTE_ASSERT(pie != NULL); 343 344 if (qlen != 0) 345 return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, time); 346 else 347 return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); 348 } 349 350 /** 351 * @brief PIE rate estimation method 352 * Called on each packet departure. 353 * 354 * @param pie [in] data pointer to PIE runtime data 355 * @param pkt_len [in] packet length in bytes 356 * @param time [in] current time stamp in cpu cycles 357 */ 358 static inline void 359 __rte_experimental 360 rte_pie_dequeue(struct rte_pie *pie, 361 uint32_t pkt_len, 362 uint64_t time) 363 { 364 /* Dequeue rate estimation */ 365 if (pie->in_measurement) { 366 pie->departed_bytes_count += pkt_len; 367 368 /* Start a new measurement cycle when enough packets */ 369 if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) { 370 uint64_t dq_time = time - pie->start_measurement; 371 372 if (pie->avg_dq_time == 0) 373 pie->avg_dq_time = dq_time; 374 else 375 pie->avg_dq_time = dq_time * RTE_DQ_WEIGHT + pie->avg_dq_time 376 * (1 - RTE_DQ_WEIGHT); 377 378 pie->in_measurement = 0; 379 } 380 } 381 382 /* Start measurement cycle when enough data in the queue */ 383 if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie->in_measurement == 0)) { 384 pie->in_measurement = 1; 385 pie->start_measurement = time; 386 pie->departed_bytes_count = 0; 387 } 388 } 389 390 #ifdef __cplusplus 391 } 392 #endif 393 394 #endif /* __RTE_PIE_H_INCLUDED__ */ 395