1 /* $NetBSD: altq_red.c,v 1.36 2025/01/08 13:00:04 joe Exp $ */ 2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 /* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.36 2025/01/08 13:00:04 joe Exp $"); 65 66 #ifdef _KERNEL_OPT 67 #include "opt_altq.h" 68 #include "opt_inet.h" 69 #include "pf.h" 70 #endif 71 72 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 73 74 #include <sys/param.h> 75 #include <sys/malloc.h> 76 #include <sys/mbuf.h> 77 #include <sys/socket.h> 78 #include <sys/systm.h> 79 #include <sys/errno.h> 80 #include <sys/kauth.h> 81 #if 1 /* ALTQ3_COMPAT */ 82 #include <sys/sockio.h> 83 #include <sys/proc.h> 84 #include <sys/kernel.h> 85 #ifdef ALTQ_FLOWVALVE 86 #include <sys/queue.h> 87 #include <sys/time.h> 88 #endif 89 #endif /* ALTQ3_COMPAT */ 90 #include <sys/cprng.h> 91 92 #include <net/if.h> 93 94 #include <netinet/in.h> 95 #include <netinet/in_systm.h> 96 #include <netinet/ip.h> 97 #ifdef INET6 98 #include <netinet/ip6.h> 99 #endif 100 101 #if NPF > 0 102 #include <net/pfvar.h> 103 #endif 104 #include <altq/altq.h> 105 #include <altq/altq_red.h> 106 #ifdef ALTQ3_COMPAT 107 #include <altq/altq_conf.h> 108 #ifdef ALTQ_FLOWVALVE 109 #include <altq/altq_flowvalve.h> 110 #endif 111 #endif 112 113 /* 114 * ALTQ/RED (Random Early Detection) implementation using 32-bit 115 * fixed-point calculation. 116 * 117 * written by kjc using the ns code as a reference. 118 * you can learn more about red and ns from Sally's home page at 119 * http://www-nrg.ee.lbl.gov/floyd/ 120 * 121 * most of the red parameter values are fixed in this implementation 122 * to prevent fixed-point overflow/underflow. 123 * if you change the parameters, watch out for overflow/underflow! 124 * 125 * the parameters used are recommended values by Sally. 126 * the corresponding ns config looks: 127 * q_weight=0.00195 128 * minthresh=5 maxthresh=15 queue-size=60 129 * linterm=30 130 * dropmech=drop-tail 131 * bytes=false (can't be handled by 32-bit fixed-point) 132 * doubleq=false dqthresh=false 133 * wait=true 134 */ 135 /* 136 * alternative red parameters for a slow link. 137 * 138 * assume the queue length becomes from zero to L and keeps L, it takes 139 * N packets for q_avg to reach 63% of L. 140 * when q_weight is 0.002, N is about 500 packets. 141 * for a slow link like dial-up, 500 packets takes more than 1 minute! 142 * when q_weight is 0.008, N is about 127 packets. 143 * when q_weight is 0.016, N is about 63 packets. 144 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 145 * are allowed for 0.016. 146 * see Sally's paper for more details. 147 */ 148 /* normal red parameters */ 149 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 150 /* q_weight = 0.00195 */ 151 152 /* red parameters for a slow link */ 153 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 154 /* q_weight = 0.0078125 */ 155 156 /* red parameters for a very slow link (e.g., dialup) */ 157 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 158 /* q_weight = 0.015625 */ 159 160 /* fixed-point uses 12-bit decimal places */ 161 #define FP_SHIFT 12 /* fixed-point shift */ 162 163 /* red parameters for drop probability */ 164 #define INV_P_MAX 10 /* inverse of max drop probability */ 165 #define TH_MIN 5 /* min threshold */ 166 #define TH_MAX 15 /* max threshold */ 167 168 #define RED_LIMIT 60 /* default max queue length */ 169 #define RED_STATS /* collect statistics */ 170 171 /* 172 * our default policy for forced-drop is drop-tail. 173 * (in altq-1.1.2 or earlier, the default was random-drop. 174 * but it makes more sense to punish the cause of the surge.) 175 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 176 */ 177 178 #ifdef ALTQ3_COMPAT 179 #ifdef ALTQ_FLOWVALVE 180 /* 181 * flow-valve is an extension to protect red from unresponsive flows 182 * and to promote end-to-end congestion control. 183 * flow-valve observes the average drop rates of the flows that have 184 * experienced packet drops in the recent past. 185 * when the average drop rate exceeds the threshold, the flow is 186 * blocked by the flow-valve. the trapped flow should back off 187 * exponentially to escape from the flow-valve. 188 */ 189 #ifdef RED_RANDOM_DROP 190 #error "random-drop can't be used with flow-valve!" 191 #endif 192 #endif /* ALTQ_FLOWVALVE */ 193 194 /* red_list keeps all red_queue_t's allocated. */ 195 static red_queue_t *red_list = NULL; 196 197 #endif /* ALTQ3_COMPAT */ 198 199 /* default red parameter values */ 200 static int default_th_min = TH_MIN; 201 static int default_th_max = TH_MAX; 202 static int default_inv_pmax = INV_P_MAX; 203 204 #ifdef ALTQ3_COMPAT 205 /* internal function prototypes */ 206 static int red_enqueue(struct ifaltq *, struct mbuf *); 207 static struct mbuf *red_dequeue(struct ifaltq *, int); 208 static int red_request(struct ifaltq *, int, void *); 209 static void red_purgeq(red_queue_t *); 210 static int red_detach(red_queue_t *); 211 #ifdef ALTQ_FLOWVALVE 212 static inline struct fve *flowlist_lookup(struct flowvalve *, 213 struct altq_pktattr *, struct timeval *); 214 static inline struct fve *flowlist_reclaim(struct flowvalve *, 215 struct altq_pktattr *); 216 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 217 static inline int fv_p2f(struct flowvalve *, int); 218 static struct flowvalve *fv_alloc(struct red *); 219 static void fv_destroy(struct flowvalve *); 220 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 221 struct fve **); 222 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 223 struct fve *); 224 #endif 225 #endif /* ALTQ3_COMPAT */ 226 227 /* 228 * red support routines 229 */ 230 red_t * 231 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 232 int pkttime) 233 { 234 red_t *rp; 235 int w, i; 236 int npkts_per_sec; 237 238 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO); 239 if (rp == NULL) 240 return NULL; 241 242 rp->red_avg = 0; 243 rp->red_idle = 1; 244 245 if (weight == 0) 246 rp->red_weight = W_WEIGHT; 247 else 248 rp->red_weight = weight; 249 if (inv_pmax == 0) 250 rp->red_inv_pmax = default_inv_pmax; 251 else 252 rp->red_inv_pmax = inv_pmax; 253 if (th_min == 0) 254 rp->red_thmin = default_th_min; 255 else 256 rp->red_thmin = th_min; 257 if (th_max == 0) 258 rp->red_thmax = default_th_max; 259 else 260 rp->red_thmax = th_max; 261 262 rp->red_flags = flags; 263 264 if (pkttime == 0) 265 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 266 rp->red_pkttime = 800; 267 else 268 rp->red_pkttime = pkttime; 269 270 if (weight == 0) { 271 /* when the link is very slow, adjust red parameters */ 272 npkts_per_sec = 1000000 / rp->red_pkttime; 273 if (npkts_per_sec < 50) { 274 /* up to about 400Kbps */ 275 rp->red_weight = W_WEIGHT_2; 276 } else if (npkts_per_sec < 300) { 277 /* up to about 2.4Mbps */ 278 rp->red_weight = W_WEIGHT_1; 279 } 280 } 281 282 /* calculate wshift. weight must be power of 2 */ 283 w = rp->red_weight; 284 for (i = 0; w > 1; i++) 285 w = w >> 1; 286 rp->red_wshift = i; 287 w = 1 << rp->red_wshift; 288 if (w != rp->red_weight) { 289 printf("invalid weight value %d for red! use %d\n", 290 rp->red_weight, w); 291 rp->red_weight = w; 292 } 293 294 /* 295 * thmin_s and thmax_s are scaled versions of th_min and th_max 296 * to be compared with avg. 297 */ 298 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 299 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 300 301 /* 302 * precompute probability denominator 303 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 304 */ 305 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 306 * rp->red_inv_pmax) << FP_SHIFT; 307 308 /* allocate weight table */ 309 rp->red_wtab = wtab_alloc(rp->red_weight); 310 311 microtime(&rp->red_last); 312 #ifdef ALTQ3_COMPAT 313 #ifdef ALTQ_FLOWVALVE 314 if (flags & REDF_FLOWVALVE) 315 rp->red_flowvalve = fv_alloc(rp); 316 /* if fv_alloc fails, flowvalve is just disabled */ 317 #endif 318 #endif /* ALTQ3_COMPAT */ 319 return rp; 320 } 321 322 void 323 red_destroy(red_t *rp) 324 { 325 #ifdef ALTQ3_COMPAT 326 #ifdef ALTQ_FLOWVALVE 327 if (rp->red_flowvalve != NULL) 328 fv_destroy(rp->red_flowvalve); 329 #endif 330 #endif /* ALTQ3_COMPAT */ 331 wtab_destroy(rp->red_wtab); 332 free(rp, M_DEVBUF); 333 } 334 335 void 336 red_getstats(red_t *rp, struct redstats *sp) 337 { 338 sp->q_avg = rp->red_avg >> rp->red_wshift; 339 sp->xmit_cnt = rp->red_stats.xmit_cnt; 340 sp->drop_cnt = rp->red_stats.drop_cnt; 341 sp->drop_forced = rp->red_stats.drop_forced; 342 sp->drop_unforced = rp->red_stats.drop_unforced; 343 sp->marked_packets = rp->red_stats.marked_packets; 344 } 345 346 int 347 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 348 struct altq_pktattr *pktattr) 349 { 350 int avg, droptype; 351 int n; 352 #ifdef ALTQ3_COMPAT 353 #ifdef ALTQ_FLOWVALVE 354 struct fve *fve = NULL; 355 356 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 357 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 358 m_freem(m); 359 return -1; 360 } 361 #endif 362 #endif /* ALTQ3_COMPAT */ 363 364 avg = rp->red_avg; 365 366 /* 367 * if we were idle, we pretend that n packets arrived during 368 * the idle period. 369 */ 370 if (rp->red_idle) { 371 struct timeval now; 372 int t; 373 374 rp->red_idle = 0; 375 microtime(&now); 376 t = (now.tv_sec - rp->red_last.tv_sec); 377 if (t > 60) { 378 /* 379 * being idle for more than 1 minute, set avg to zero. 380 * this prevents t from overflow. 381 */ 382 avg = 0; 383 } else { 384 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 385 n = t / rp->red_pkttime - 1; 386 387 /* the following line does (avg = (1 - Wq)^n * avg) */ 388 if (n > 0) 389 avg = (avg >> FP_SHIFT) * 390 pow_w(rp->red_wtab, n); 391 } 392 } 393 394 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 395 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 396 rp->red_avg = avg; /* save the new value */ 397 398 /* 399 * red_count keeps a tally of arriving traffic that has not 400 * been dropped. 401 */ 402 rp->red_count++; 403 404 /* see if we drop early */ 405 droptype = DTYPE_NODROP; 406 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 407 if (avg >= rp->red_thmax_s) { 408 /* avg >= th_max: forced drop */ 409 droptype = DTYPE_FORCED; 410 } else if (rp->red_old == 0) { 411 /* first exceeds th_min */ 412 rp->red_count = 1; 413 rp->red_old = 1; 414 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 415 rp->red_probd, rp->red_count)) { 416 /* mark or drop by red */ 417 if ((rp->red_flags & REDF_ECN) && 418 mark_ecn(m, pktattr, rp->red_flags)) { 419 /* successfully marked. do not drop. */ 420 rp->red_count = 0; 421 #ifdef RED_STATS 422 rp->red_stats.marked_packets++; 423 #endif 424 } else { 425 /* unforced drop by red */ 426 droptype = DTYPE_EARLY; 427 } 428 } 429 } else { 430 /* avg < th_min */ 431 rp->red_old = 0; 432 } 433 434 /* 435 * if the queue length hits the hard limit, it's a forced drop. 436 */ 437 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 438 droptype = DTYPE_FORCED; 439 440 #ifdef RED_RANDOM_DROP 441 /* if successful or forced drop, enqueue this packet. */ 442 if (droptype != DTYPE_EARLY) 443 _addq(q, m); 444 #else 445 /* if successful, enqueue this packet. */ 446 if (droptype == DTYPE_NODROP) 447 _addq(q, m); 448 #endif 449 if (droptype != DTYPE_NODROP) { 450 if (droptype == DTYPE_EARLY) { 451 /* drop the incoming packet */ 452 #ifdef RED_STATS 453 rp->red_stats.drop_unforced++; 454 #endif 455 } else { 456 /* forced drop, select a victim packet in the queue. */ 457 #ifdef RED_RANDOM_DROP 458 m = _getq_random(q); 459 #endif 460 #ifdef RED_STATS 461 rp->red_stats.drop_forced++; 462 #endif 463 } 464 #ifdef RED_STATS 465 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 466 #endif 467 rp->red_count = 0; 468 #ifdef ALTQ3_COMPAT 469 #ifdef ALTQ_FLOWVALVE 470 if (rp->red_flowvalve != NULL) 471 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 472 #endif 473 #endif /* ALTQ3_COMPAT */ 474 m_freem(m); 475 return -1; 476 } 477 /* successfully queued */ 478 #ifdef RED_STATS 479 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 480 #endif 481 return 0; 482 } 483 484 /* 485 * early-drop probability is calculated as follows: 486 * prob = p_max * (avg - th_min) / (th_max - th_min) 487 * prob_a = prob / (2 - count*prob) 488 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 489 * here prob_a increases as successive undrop count increases. 490 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 491 * becomes 1 when (count >= (2 / prob))). 492 */ 493 int 494 drop_early(int fp_len, int fp_probd, int count) 495 { 496 int d; /* denominator of drop-probability */ 497 498 d = fp_probd - count * fp_len; 499 if (d <= 0) 500 /* count exceeds the hard limit: drop or mark */ 501 return 1; 502 503 /* 504 * now the range of d is [1..600] in fixed-point. (when 505 * th_max-th_min=10 and p_max=1/30) 506 * drop probability = (avg - TH_MIN) / d 507 */ 508 509 if ((cprng_fast32() % d) < fp_len) { 510 /* drop or mark */ 511 return 1; 512 } 513 /* no drop/mark */ 514 return 0; 515 } 516 517 /* 518 * try to mark CE bit to the packet. 519 * returns 1 if successfully marked, 0 otherwise. 520 */ 521 int 522 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 523 { 524 struct mbuf *m0; 525 struct m_tag *t; 526 struct altq_tag *at; 527 void *hdr; 528 int af; 529 530 t = m_tag_find(m, PACKET_TAG_ALTQ_QID); 531 if (t != NULL) { 532 at = (struct altq_tag *)(t + 1); 533 if (at == NULL) 534 return 0; 535 af = at->af; 536 hdr = at->hdr; 537 #ifdef ALTQ3_COMPAT 538 } else if (pktattr != NULL) { 539 af = pktattr->pattr_af; 540 hdr = pktattr->pattr_hdr; 541 #endif /* ALTQ3_COMPAT */ 542 } else 543 return 0; 544 545 if (af != AF_INET && af != AF_INET6) 546 return 0; 547 548 /* verify that pattr_hdr is within the mbuf data */ 549 for (m0 = m; m0 != NULL; m0 = m0->m_next) 550 if (((char *)hdr >= m0->m_data) && 551 ((char *)hdr < m0->m_data + m0->m_len)) 552 break; 553 if (m0 == NULL) { 554 /* ick, tag info is stale */ 555 return 0; 556 } 557 558 switch (af) { 559 case AF_INET: 560 if (flags & REDF_ECN4) { 561 struct ip *ip = hdr; 562 u_int8_t otos; 563 int sum; 564 565 if (ip->ip_v != 4) 566 return 0; /* version mismatch! */ 567 568 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 569 return 0; /* not-ECT */ 570 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 571 return 1; /* already marked */ 572 573 /* 574 * ecn-capable but not marked, 575 * mark CE and update checksum 576 */ 577 otos = ip->ip_tos; 578 ip->ip_tos |= IPTOS_ECN_CE; 579 /* 580 * update checksum (from RFC1624) 581 * HC' = ~(~HC + ~m + m') 582 */ 583 sum = ~ntohs(ip->ip_sum) & 0xffff; 584 sum += (~otos & 0xffff) + ip->ip_tos; 585 sum = (sum >> 16) + (sum & 0xffff); 586 sum += (sum >> 16); /* add carry */ 587 ip->ip_sum = htons(~sum & 0xffff); 588 return 1; 589 } 590 break; 591 #ifdef INET6 592 case AF_INET6: 593 if (flags & REDF_ECN6) { 594 struct ip6_hdr *ip6 = hdr; 595 u_int32_t flowlabel; 596 597 flowlabel = ntohl(ip6->ip6_flow); 598 if ((flowlabel >> 28) != 6) 599 return 0; /* version mismatch! */ 600 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 601 (IPTOS_ECN_NOTECT << 20)) 602 return 0; /* not-ECT */ 603 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 604 (IPTOS_ECN_CE << 20)) 605 return 1; /* already marked */ 606 /* 607 * ecn-capable but not marked, mark CE 608 */ 609 flowlabel |= (IPTOS_ECN_CE << 20); 610 ip6->ip6_flow = htonl(flowlabel); 611 return 1; 612 } 613 break; 614 #endif /* INET6 */ 615 } 616 617 /* not marked */ 618 return 0; 619 } 620 621 struct mbuf * 622 red_getq(red_t *rp, class_queue_t *q) 623 { 624 struct mbuf *m; 625 626 if ((m = _getq(q)) == NULL) { 627 if (rp->red_idle == 0) { 628 rp->red_idle = 1; 629 microtime(&rp->red_last); 630 } 631 return NULL; 632 } 633 634 rp->red_idle = 0; 635 return m; 636 } 637 638 /* 639 * helper routine to calibrate avg during idle. 640 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 641 * here Wq = 1/weight and the code assumes Wq is close to zero. 642 * 643 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 644 */ 645 static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 646 647 struct wtab * 648 wtab_alloc(int weight) 649 { 650 struct wtab *w; 651 int i; 652 653 for (w = wtab_list; w != NULL; w = w->w_next) 654 if (w->w_weight == weight) { 655 w->w_refcount++; 656 return w; 657 } 658 659 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO); 660 if (w == NULL) 661 panic("wtab_alloc: malloc failed!"); 662 w->w_weight = weight; 663 w->w_refcount = 1; 664 w->w_next = wtab_list; 665 wtab_list = w; 666 667 /* initialize the weight table */ 668 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 669 for (i = 1; i < 32; i++) { 670 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 671 if (w->w_tab[i] == 0 && w->w_param_max == 0) 672 w->w_param_max = 1 << i; 673 } 674 675 return w; 676 } 677 678 int 679 wtab_destroy(struct wtab *w) 680 { 681 struct wtab *prev; 682 683 if (--w->w_refcount > 0) 684 return 0; 685 686 if (wtab_list == w) 687 wtab_list = w->w_next; 688 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 689 if (prev->w_next == w) { 690 prev->w_next = w->w_next; 691 break; 692 } 693 694 free(w, M_DEVBUF); 695 return 0; 696 } 697 698 int32_t 699 pow_w(struct wtab *w, int n) 700 { 701 int i, bit; 702 int32_t val; 703 704 if (n >= w->w_param_max) 705 return 0; 706 707 val = 1 << FP_SHIFT; 708 if (n <= 0) 709 return val; 710 711 bit = 1; 712 i = 0; 713 while (n) { 714 if (n & bit) { 715 val = (val * w->w_tab[i]) >> FP_SHIFT; 716 n &= ~bit; 717 } 718 i++; 719 bit <<= 1; 720 } 721 return val; 722 } 723 724 #ifdef ALTQ3_COMPAT 725 /* 726 * red device interface 727 */ 728 altqdev_decl(red); 729 730 int 731 redopen(dev_t dev, int flag, int fmt, 732 struct lwp *l) 733 { 734 /* everything will be done when the queueing scheme is attached. */ 735 return 0; 736 } 737 738 int 739 redclose(dev_t dev, int flag, int fmt, 740 struct lwp *l) 741 { 742 red_queue_t *rqp; 743 int err, error = 0; 744 745 while ((rqp = red_list) != NULL) { 746 /* destroy all */ 747 err = red_detach(rqp); 748 if (err != 0 && error == 0) 749 error = err; 750 } 751 752 return error; 753 } 754 755 int 756 redioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag, 757 struct lwp *l) 758 { 759 red_queue_t *rqp; 760 struct red_interface *ifacep; 761 struct ifnet *ifp; 762 int error = 0; 763 764 /* check super-user privilege */ 765 switch (cmd) { 766 case RED_GETSTATS: 767 break; 768 default: 769 if ((error = kauth_authorize_network(l->l_cred, 770 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL, 771 NULL, NULL)) != 0) 772 return error; 773 break; 774 } 775 776 switch (cmd) { 777 778 case RED_ENABLE: 779 ifacep = (struct red_interface *)addr; 780 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 781 error = EBADF; 782 break; 783 } 784 error = altq_enable(rqp->rq_ifq); 785 break; 786 787 case RED_DISABLE: 788 ifacep = (struct red_interface *)addr; 789 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 790 error = EBADF; 791 break; 792 } 793 error = altq_disable(rqp->rq_ifq); 794 break; 795 796 case RED_IF_ATTACH: 797 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 798 if (ifp == NULL) { 799 error = ENXIO; 800 break; 801 } 802 803 /* allocate and initialize red_queue_t */ 804 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO); 805 if (rqp == NULL) { 806 error = ENOMEM; 807 break; 808 } 809 810 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, 811 M_WAITOK|M_ZERO); 812 if (rqp->rq_q == NULL) { 813 free(rqp, M_DEVBUF); 814 error = ENOMEM; 815 break; 816 } 817 818 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 819 if (rqp->rq_red == NULL) { 820 free(rqp->rq_q, M_DEVBUF); 821 free(rqp, M_DEVBUF); 822 error = ENOMEM; 823 break; 824 } 825 826 rqp->rq_ifq = &ifp->if_snd; 827 qtail(rqp->rq_q) = NULL; 828 qlen(rqp->rq_q) = 0; 829 qlimit(rqp->rq_q) = RED_LIMIT; 830 qtype(rqp->rq_q) = Q_RED; 831 832 /* 833 * set RED to this ifnet structure. 834 */ 835 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 836 red_enqueue, red_dequeue, red_request, 837 NULL, NULL); 838 if (error) { 839 red_destroy(rqp->rq_red); 840 free(rqp->rq_q, M_DEVBUF); 841 free(rqp, M_DEVBUF); 842 break; 843 } 844 845 /* add this state to the red list */ 846 rqp->rq_next = red_list; 847 red_list = rqp; 848 break; 849 850 case RED_IF_DETACH: 851 ifacep = (struct red_interface *)addr; 852 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 853 error = EBADF; 854 break; 855 } 856 error = red_detach(rqp); 857 break; 858 859 case RED_GETSTATS: 860 do { 861 struct red_stats *q_stats; 862 red_t *rp; 863 864 q_stats = (struct red_stats *)addr; 865 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 866 ALTQT_RED)) == NULL) { 867 error = EBADF; 868 break; 869 } 870 871 q_stats->q_len = qlen(rqp->rq_q); 872 q_stats->q_limit = qlimit(rqp->rq_q); 873 874 rp = rqp->rq_red; 875 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 876 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 877 q_stats->drop_cnt = rp->red_stats.drop_cnt; 878 q_stats->drop_forced = rp->red_stats.drop_forced; 879 q_stats->drop_unforced = rp->red_stats.drop_unforced; 880 q_stats->marked_packets = rp->red_stats.marked_packets; 881 882 q_stats->weight = rp->red_weight; 883 q_stats->inv_pmax = rp->red_inv_pmax; 884 q_stats->th_min = rp->red_thmin; 885 q_stats->th_max = rp->red_thmax; 886 887 #ifdef ALTQ_FLOWVALVE 888 if (rp->red_flowvalve != NULL) { 889 struct flowvalve *fv = rp->red_flowvalve; 890 q_stats->fv_flows = fv->fv_flows; 891 q_stats->fv_pass = fv->fv_stats.pass; 892 q_stats->fv_predrop = fv->fv_stats.predrop; 893 q_stats->fv_alloc = fv->fv_stats.alloc; 894 q_stats->fv_escape = fv->fv_stats.escape; 895 } else { 896 #endif /* ALTQ_FLOWVALVE */ 897 q_stats->fv_flows = 0; 898 q_stats->fv_pass = 0; 899 q_stats->fv_predrop = 0; 900 q_stats->fv_alloc = 0; 901 q_stats->fv_escape = 0; 902 #ifdef ALTQ_FLOWVALVE 903 } 904 #endif /* ALTQ_FLOWVALVE */ 905 } while (/*CONSTCOND*/ 0); 906 break; 907 908 case RED_CONFIG: 909 do { 910 struct red_conf *fc; 911 red_t *new; 912 int s, limit; 913 914 fc = (struct red_conf *)addr; 915 if ((rqp = altq_lookup(fc->iface.red_ifname, 916 ALTQT_RED)) == NULL) { 917 error = EBADF; 918 break; 919 } 920 new = red_alloc(fc->red_weight, 921 fc->red_inv_pmax, 922 fc->red_thmin, 923 fc->red_thmax, 924 fc->red_flags, 925 fc->red_pkttime); 926 if (new == NULL) { 927 error = ENOMEM; 928 break; 929 } 930 931 s = splnet(); 932 red_purgeq(rqp); 933 limit = fc->red_limit; 934 if (limit < fc->red_thmax) 935 limit = fc->red_thmax; 936 qlimit(rqp->rq_q) = limit; 937 fc->red_limit = limit; /* write back the new value */ 938 939 red_destroy(rqp->rq_red); 940 rqp->rq_red = new; 941 942 splx(s); 943 944 /* write back new values */ 945 fc->red_limit = limit; 946 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 947 fc->red_thmin = rqp->rq_red->red_thmin; 948 fc->red_thmax = rqp->rq_red->red_thmax; 949 950 } while (/*CONSTCOND*/ 0); 951 break; 952 953 case RED_SETDEFAULTS: 954 do { 955 struct redparams *rp; 956 957 rp = (struct redparams *)addr; 958 959 default_th_min = rp->th_min; 960 default_th_max = rp->th_max; 961 default_inv_pmax = rp->inv_pmax; 962 } while (/*CONSTCOND*/ 0); 963 break; 964 965 default: 966 error = EINVAL; 967 break; 968 } 969 return error; 970 } 971 972 static int 973 red_detach(red_queue_t *rqp) 974 { 975 red_queue_t *tmp; 976 int error = 0; 977 978 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 979 altq_disable(rqp->rq_ifq); 980 981 if ((error = altq_detach(rqp->rq_ifq))) 982 return error; 983 984 if (red_list == rqp) 985 red_list = rqp->rq_next; 986 else { 987 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 988 if (tmp->rq_next == rqp) { 989 tmp->rq_next = rqp->rq_next; 990 break; 991 } 992 if (tmp == NULL) 993 printf("red_detach: no state found in red_list!\n"); 994 } 995 996 red_destroy(rqp->rq_red); 997 free(rqp->rq_q, M_DEVBUF); 998 free(rqp, M_DEVBUF); 999 return error; 1000 } 1001 1002 /* 1003 * enqueue routine: 1004 * 1005 * returns: 0 when successfully queued. 1006 * ENOBUFS when drop occurs. 1007 */ 1008 static int 1009 red_enqueue(struct ifaltq *ifq, struct mbuf *m) 1010 { 1011 struct altq_pktattr pktattr; 1012 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1013 1014 pktattr.pattr_class = m->m_pkthdr.pattr_class; 1015 pktattr.pattr_af = m->m_pkthdr.pattr_af; 1016 pktattr.pattr_hdr = m->m_pkthdr.pattr_hdr; 1017 1018 if (red_addq(rqp->rq_red, rqp->rq_q, m, &pktattr) < 0) 1019 return ENOBUFS; 1020 ifq->ifq_len++; 1021 return 0; 1022 } 1023 1024 /* 1025 * dequeue routine: 1026 * must be called in splnet. 1027 * 1028 * returns: mbuf dequeued. 1029 * NULL when no packet is available in the queue. 1030 */ 1031 1032 static struct mbuf * 1033 red_dequeue(struct ifaltq *ifq, int op) 1034 { 1035 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1036 struct mbuf *m; 1037 1038 if (op == ALTDQ_POLL) 1039 return qhead(rqp->rq_q); 1040 1041 /* op == ALTDQ_REMOVE */ 1042 m = red_getq(rqp->rq_red, rqp->rq_q); 1043 if (m != NULL) 1044 ifq->ifq_len--; 1045 return m; 1046 } 1047 1048 static int 1049 red_request(struct ifaltq *ifq, int req, void *arg) 1050 { 1051 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1052 1053 switch (req) { 1054 case ALTRQ_PURGE: 1055 red_purgeq(rqp); 1056 break; 1057 } 1058 return 0; 1059 } 1060 1061 static void 1062 red_purgeq(red_queue_t *rqp) 1063 { 1064 _flushq(rqp->rq_q); 1065 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1066 rqp->rq_ifq->ifq_len = 0; 1067 } 1068 1069 #ifdef ALTQ_FLOWVALVE 1070 1071 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1072 #define FV_PSCALE(x) ((x) << FV_PSHIFT) 1073 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1074 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1075 #define FV_FSCALE(x) ((x) << FV_FSHIFT) 1076 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1077 1078 #define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1079 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1080 1081 #define FV_N 10 /* update fve_f every FV_N packets */ 1082 1083 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1084 #define FV_TTHRESH 3 /* time threshold to delete fve */ 1085 #define FV_ALPHA 5 /* extra packet count */ 1086 1087 #define FV_STATS 1088 1089 #define FV_TIMESTAMP(tp) getmicrotime(tp) 1090 1091 /* 1092 * Brtt table: 127 entry table to convert drop rate (p) to 1093 * the corresponding bandwidth fraction (f) 1094 * the following equation is implemented to use scaled values, 1095 * fve_p and fve_f, in the fixed point format. 1096 * 1097 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1098 * f = Brtt(p) / (max_th + alpha) 1099 */ 1100 #define BRTT_SIZE 128 1101 #define BRTT_SHIFT 12 1102 #define BRTT_MASK 0x0007f000 1103 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1104 1105 const int brtt_tab[BRTT_SIZE] = { 1106 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1107 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1108 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1109 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1110 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1111 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1112 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1113 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1114 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1115 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1116 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1117 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1118 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1119 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1120 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1121 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1122 }; 1123 1124 static inline struct fve * 1125 flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr, 1126 struct timeval *now) 1127 { 1128 struct fve *fve; 1129 int flows; 1130 struct ip *ip; 1131 #ifdef INET6 1132 struct ip6_hdr *ip6; 1133 #endif 1134 struct timeval tthresh; 1135 1136 if (pktattr == NULL) 1137 return NULL; 1138 1139 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1140 flows = 0; 1141 /* 1142 * search the flow list 1143 */ 1144 switch (pktattr->pattr_af) { 1145 case AF_INET: 1146 ip = (struct ip *)pktattr->pattr_hdr; 1147 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1148 if (fve->fve_lastdrop.tv_sec == 0) 1149 break; 1150 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1151 fve->fve_lastdrop.tv_sec = 0; 1152 break; 1153 } 1154 if (fve->fve_flow.flow_af == AF_INET && 1155 fve->fve_flow.flow_ip.ip_src.s_addr == 1156 ip->ip_src.s_addr && 1157 fve->fve_flow.flow_ip.ip_dst.s_addr == 1158 ip->ip_dst.s_addr) 1159 return fve; 1160 flows++; 1161 } 1162 break; 1163 #ifdef INET6 1164 case AF_INET6: 1165 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1166 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1167 if (fve->fve_lastdrop.tv_sec == 0) 1168 break; 1169 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1170 fve->fve_lastdrop.tv_sec = 0; 1171 break; 1172 } 1173 if (fve->fve_flow.flow_af == AF_INET6 && 1174 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1175 &ip6->ip6_src) && 1176 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1177 &ip6->ip6_dst)) 1178 return fve; 1179 flows++; 1180 } 1181 break; 1182 #endif /* INET6 */ 1183 1184 default: 1185 /* unknown protocol. no drop. */ 1186 return NULL; 1187 } 1188 fv->fv_flows = flows; /* save the number of active fve's */ 1189 return NULL; 1190 } 1191 1192 static inline struct fve * 1193 flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr) 1194 { 1195 struct fve *fve; 1196 struct ip *ip; 1197 #ifdef INET6 1198 struct ip6_hdr *ip6; 1199 #endif 1200 1201 /* 1202 * get an entry from the tail of the LRU list. 1203 */ 1204 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1205 1206 switch (pktattr->pattr_af) { 1207 case AF_INET: 1208 ip = (struct ip *)pktattr->pattr_hdr; 1209 fve->fve_flow.flow_af = AF_INET; 1210 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1211 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1212 break; 1213 #ifdef INET6 1214 case AF_INET6: 1215 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1216 fve->fve_flow.flow_af = AF_INET6; 1217 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1218 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1219 break; 1220 #endif 1221 } 1222 1223 fve->fve_state = Green; 1224 fve->fve_p = 0.0; 1225 fve->fve_f = 0.0; 1226 fve->fve_ifseq = fv->fv_ifseq - 1; 1227 fve->fve_count = 0; 1228 1229 fv->fv_flows++; 1230 #ifdef FV_STATS 1231 fv->fv_stats.alloc++; 1232 #endif 1233 return fve; 1234 } 1235 1236 static inline void 1237 flowlist_move_to_head(struct flowvalve *fv, struct fve *fve) 1238 { 1239 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1240 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1241 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1242 } 1243 } 1244 1245 /* 1246 * allocate flowvalve structure 1247 */ 1248 static struct flowvalve * 1249 fv_alloc(struct red *rp) 1250 { 1251 struct flowvalve *fv; 1252 struct fve *fve; 1253 int i, num; 1254 1255 num = FV_FLOWLISTSIZE; 1256 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO); 1257 if (fv == NULL) 1258 return NULL; 1259 1260 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF, 1261 M_WAITOK|M_ZERO); 1262 if (fv->fv_fves == NULL) { 1263 free(fv, M_DEVBUF); 1264 return NULL; 1265 } 1266 1267 fv->fv_flows = 0; 1268 TAILQ_INIT(&fv->fv_flowlist); 1269 for (i = 0; i < num; i++) { 1270 fve = &fv->fv_fves[i]; 1271 fve->fve_lastdrop.tv_sec = 0; 1272 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1273 } 1274 1275 /* initialize drop rate threshold in scaled fixed-point */ 1276 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1277 1278 /* initialize drop rate to fraction table */ 1279 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK); 1280 if (fv->fv_p2ftab == NULL) { 1281 free(fv->fv_fves, M_DEVBUF); 1282 free(fv, M_DEVBUF); 1283 return NULL; 1284 } 1285 /* 1286 * create the p2f table. 1287 * (shift is used to keep the precision) 1288 */ 1289 for (i = 1; i < BRTT_SIZE; i++) { 1290 int f; 1291 1292 f = brtt_tab[i] << 8; 1293 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1294 } 1295 1296 return fv; 1297 } 1298 1299 static void 1300 fv_destroy(struct flowvalve *fv) 1301 { 1302 free(fv->fv_p2ftab, M_DEVBUF); 1303 free(fv->fv_fves, M_DEVBUF); 1304 free(fv, M_DEVBUF); 1305 } 1306 1307 static inline int 1308 fv_p2f(struct flowvalve *fv, int p) 1309 { 1310 int val, f; 1311 1312 if (p >= BRTT_PMAX) 1313 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1314 else if ((val = (p & BRTT_MASK))) 1315 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1316 else 1317 f = fv->fv_p2ftab[1]; 1318 return f; 1319 } 1320 1321 /* 1322 * check if an arriving packet should be pre-dropped. 1323 * called from red_addq() when a packet arrives. 1324 * returns 1 when the packet should be pre-dropped. 1325 * should be called in splnet. 1326 */ 1327 static int 1328 fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr, 1329 struct fve **fcache) 1330 { 1331 struct fve *fve; 1332 struct timeval now; 1333 1334 fv->fv_ifseq++; 1335 FV_TIMESTAMP(&now); 1336 1337 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1338 /* no matching entry in the flowlist */ 1339 return 0; 1340 1341 *fcache = fve; 1342 1343 /* update fraction f for every FV_N packets */ 1344 if (++fve->fve_count == FV_N) { 1345 /* 1346 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1347 */ 1348 fve->fve_f = 1349 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1350 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1351 fve->fve_ifseq = fv->fv_ifseq; 1352 fve->fve_count = 0; 1353 } 1354 1355 /* 1356 * overpumping test 1357 */ 1358 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1359 int fthresh; 1360 1361 /* calculate a threshold */ 1362 fthresh = fv_p2f(fv, fve->fve_p); 1363 if (fve->fve_f > fthresh) 1364 fve->fve_state = Red; 1365 } 1366 1367 if (fve->fve_state == Red) { 1368 /* 1369 * backoff test 1370 */ 1371 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1372 /* no drop for at least FV_BACKOFFTHRESH sec */ 1373 fve->fve_p = 0; 1374 fve->fve_state = Green; 1375 #ifdef FV_STATS 1376 fv->fv_stats.escape++; 1377 #endif 1378 } else { 1379 /* block this flow */ 1380 flowlist_move_to_head(fv, fve); 1381 fve->fve_lastdrop = now; 1382 #ifdef FV_STATS 1383 fv->fv_stats.predrop++; 1384 #endif 1385 return 1; 1386 } 1387 } 1388 1389 /* 1390 * p = (1 - Wp) * p 1391 */ 1392 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1393 if (fve->fve_p < 0) 1394 fve->fve_p = 0; 1395 #ifdef FV_STATS 1396 fv->fv_stats.pass++; 1397 #endif 1398 return 0; 1399 } 1400 1401 /* 1402 * called from red_addq when a packet is dropped by red. 1403 * should be called in splnet. 1404 */ 1405 static void 1406 fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr, 1407 struct fve *fcache) 1408 { 1409 struct fve *fve; 1410 struct timeval now; 1411 1412 if (pktattr == NULL) 1413 return; 1414 FV_TIMESTAMP(&now); 1415 1416 if (fcache != NULL) 1417 /* the fve of this packet is already cached */ 1418 fve = fcache; 1419 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1420 fve = flowlist_reclaim(fv, pktattr); 1421 1422 flowlist_move_to_head(fv, fve); 1423 1424 /* 1425 * update p: the following line cancels the update 1426 * in fv_checkflow() and calculate 1427 * p = Wp + (1 - Wp) * p 1428 */ 1429 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1430 1431 fve->fve_lastdrop = now; 1432 } 1433 1434 #endif /* ALTQ_FLOWVALVE */ 1435 1436 #ifdef KLD_MODULE 1437 1438 static struct altqsw red_sw = 1439 {"red", redopen, redclose, redioctl}; 1440 1441 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1442 MODULE_VERSION(altq_red, 1); 1443 1444 #endif /* KLD_MODULE */ 1445 #endif /* ALTQ3_COMPAT */ 1446 1447 #endif /* ALTQ_RED */ 1448