1 /* $NetBSD: altq_red.c,v 1.26 2007/03/26 22:43:19 hubertf Exp $ */ 2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 /* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.26 2007/03/26 22:43:19 hubertf Exp $"); 65 66 #ifdef _KERNEL_OPT 67 #include "opt_altq.h" 68 #include "opt_inet.h" 69 #include "pf.h" 70 #endif 71 72 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 73 74 #include <sys/param.h> 75 #include <sys/malloc.h> 76 #include <sys/mbuf.h> 77 #include <sys/socket.h> 78 #include <sys/systm.h> 79 #include <sys/errno.h> 80 #include <sys/kauth.h> 81 #if 1 /* ALTQ3_COMPAT */ 82 #include <sys/sockio.h> 83 #include <sys/proc.h> 84 #include <sys/kernel.h> 85 #ifdef ALTQ_FLOWVALVE 86 #include <sys/queue.h> 87 #include <sys/time.h> 88 #endif 89 #endif /* ALTQ3_COMPAT */ 90 91 #include <net/if.h> 92 93 #include <netinet/in.h> 94 #include <netinet/in_systm.h> 95 #include <netinet/ip.h> 96 #ifdef INET6 97 #include <netinet/ip6.h> 98 #endif 99 100 #if NPF > 0 101 #include <net/pfvar.h> 102 #endif 103 #include <altq/altq.h> 104 #include <altq/altq_red.h> 105 #ifdef ALTQ3_COMPAT 106 #include <altq/altq_conf.h> 107 #ifdef ALTQ_FLOWVALVE 108 #include <altq/altq_flowvalve.h> 109 #endif 110 #endif 111 112 /* 113 * ALTQ/RED (Random Early Detection) implementation using 32-bit 114 * fixed-point calculation. 115 * 116 * written by kjc using the ns code as a reference. 117 * you can learn more about red and ns from Sally's home page at 118 * http://www-nrg.ee.lbl.gov/floyd/ 119 * 120 * most of the red parameter values are fixed in this implementation 121 * to prevent fixed-point overflow/underflow. 122 * if you change the parameters, watch out for overflow/underflow! 123 * 124 * the parameters used are recommended values by Sally. 125 * the corresponding ns config looks: 126 * q_weight=0.00195 127 * minthresh=5 maxthresh=15 queue-size=60 128 * linterm=30 129 * dropmech=drop-tail 130 * bytes=false (can't be handled by 32-bit fixed-point) 131 * doubleq=false dqthresh=false 132 * wait=true 133 */ 134 /* 135 * alternative red parameters for a slow link. 136 * 137 * assume the queue length becomes from zero to L and keeps L, it takes 138 * N packets for q_avg to reach 63% of L. 139 * when q_weight is 0.002, N is about 500 packets. 140 * for a slow link like dial-up, 500 packets takes more than 1 minute! 141 * when q_weight is 0.008, N is about 127 packets. 142 * when q_weight is 0.016, N is about 63 packets. 143 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 144 * are allowed for 0.016. 145 * see Sally's paper for more details. 146 */ 147 /* normal red parameters */ 148 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 149 /* q_weight = 0.00195 */ 150 151 /* red parameters for a slow link */ 152 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 153 /* q_weight = 0.0078125 */ 154 155 /* red parameters for a very slow link (e.g., dialup) */ 156 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 157 /* q_weight = 0.015625 */ 158 159 /* fixed-point uses 12-bit decimal places */ 160 #define FP_SHIFT 12 /* fixed-point shift */ 161 162 /* red parameters for drop probability */ 163 #define INV_P_MAX 10 /* inverse of max drop probability */ 164 #define TH_MIN 5 /* min threshold */ 165 #define TH_MAX 15 /* max threshold */ 166 167 #define RED_LIMIT 60 /* default max queue lenght */ 168 #define RED_STATS /* collect statistics */ 169 170 /* 171 * our default policy for forced-drop is drop-tail. 172 * (in altq-1.1.2 or earlier, the default was random-drop. 173 * but it makes more sense to punish the cause of the surge.) 174 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 175 */ 176 177 #ifdef ALTQ3_COMPAT 178 #ifdef ALTQ_FLOWVALVE 179 /* 180 * flow-valve is an extention to protect red from unresponsive flows 181 * and to promote end-to-end congestion control. 182 * flow-valve observes the average drop rates of the flows that have 183 * experienced packet drops in the recent past. 184 * when the average drop rate exceeds the threshold, the flow is 185 * blocked by the flow-valve. the trapped flow should back off 186 * exponentially to escape from the flow-valve. 187 */ 188 #ifdef RED_RANDOM_DROP 189 #error "random-drop can't be used with flow-valve!" 190 #endif 191 #endif /* ALTQ_FLOWVALVE */ 192 193 /* red_list keeps all red_queue_t's allocated. */ 194 static red_queue_t *red_list = NULL; 195 196 #endif /* ALTQ3_COMPAT */ 197 198 /* default red parameter values */ 199 static int default_th_min = TH_MIN; 200 static int default_th_max = TH_MAX; 201 static int default_inv_pmax = INV_P_MAX; 202 203 #ifdef ALTQ3_COMPAT 204 /* internal function prototypes */ 205 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 206 static struct mbuf *red_dequeue(struct ifaltq *, int); 207 static int red_request(struct ifaltq *, int, void *); 208 static void red_purgeq(red_queue_t *); 209 static int red_detach(red_queue_t *); 210 #ifdef ALTQ_FLOWVALVE 211 static inline struct fve *flowlist_lookup(struct flowvalve *, 212 struct altq_pktattr *, struct timeval *); 213 static inline struct fve *flowlist_reclaim(struct flowvalve *, 214 struct altq_pktattr *); 215 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 216 static inline int fv_p2f(struct flowvalve *, int); 217 static struct flowvalve *fv_alloc(struct red *); 218 static void fv_destroy(struct flowvalve *); 219 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 220 struct fve **); 221 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 222 struct fve *); 223 #endif 224 #endif /* ALTQ3_COMPAT */ 225 226 /* 227 * red support routines 228 */ 229 red_t * 230 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 231 int pkttime) 232 { 233 red_t *rp; 234 int w, i; 235 int npkts_per_sec; 236 237 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO); 238 if (rp == NULL) 239 return (NULL); 240 241 rp->red_avg = 0; 242 rp->red_idle = 1; 243 244 if (weight == 0) 245 rp->red_weight = W_WEIGHT; 246 else 247 rp->red_weight = weight; 248 if (inv_pmax == 0) 249 rp->red_inv_pmax = default_inv_pmax; 250 else 251 rp->red_inv_pmax = inv_pmax; 252 if (th_min == 0) 253 rp->red_thmin = default_th_min; 254 else 255 rp->red_thmin = th_min; 256 if (th_max == 0) 257 rp->red_thmax = default_th_max; 258 else 259 rp->red_thmax = th_max; 260 261 rp->red_flags = flags; 262 263 if (pkttime == 0) 264 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 265 rp->red_pkttime = 800; 266 else 267 rp->red_pkttime = pkttime; 268 269 if (weight == 0) { 270 /* when the link is very slow, adjust red parameters */ 271 npkts_per_sec = 1000000 / rp->red_pkttime; 272 if (npkts_per_sec < 50) { 273 /* up to about 400Kbps */ 274 rp->red_weight = W_WEIGHT_2; 275 } else if (npkts_per_sec < 300) { 276 /* up to about 2.4Mbps */ 277 rp->red_weight = W_WEIGHT_1; 278 } 279 } 280 281 /* calculate wshift. weight must be power of 2 */ 282 w = rp->red_weight; 283 for (i = 0; w > 1; i++) 284 w = w >> 1; 285 rp->red_wshift = i; 286 w = 1 << rp->red_wshift; 287 if (w != rp->red_weight) { 288 printf("invalid weight value %d for red! use %d\n", 289 rp->red_weight, w); 290 rp->red_weight = w; 291 } 292 293 /* 294 * thmin_s and thmax_s are scaled versions of th_min and th_max 295 * to be compared with avg. 296 */ 297 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 298 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 299 300 /* 301 * precompute probability denominator 302 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 303 */ 304 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 305 * rp->red_inv_pmax) << FP_SHIFT; 306 307 /* allocate weight table */ 308 rp->red_wtab = wtab_alloc(rp->red_weight); 309 310 microtime(&rp->red_last); 311 #ifdef ALTQ3_COMPAT 312 #ifdef ALTQ_FLOWVALVE 313 if (flags & REDF_FLOWVALVE) 314 rp->red_flowvalve = fv_alloc(rp); 315 /* if fv_alloc failes, flowvalve is just disabled */ 316 #endif 317 #endif /* ALTQ3_COMPAT */ 318 return (rp); 319 } 320 321 void 322 red_destroy(red_t *rp) 323 { 324 #ifdef ALTQ3_COMPAT 325 #ifdef ALTQ_FLOWVALVE 326 if (rp->red_flowvalve != NULL) 327 fv_destroy(rp->red_flowvalve); 328 #endif 329 #endif /* ALTQ3_COMPAT */ 330 wtab_destroy(rp->red_wtab); 331 free(rp, M_DEVBUF); 332 } 333 334 void 335 red_getstats(red_t *rp, struct redstats *sp) 336 { 337 sp->q_avg = rp->red_avg >> rp->red_wshift; 338 sp->xmit_cnt = rp->red_stats.xmit_cnt; 339 sp->drop_cnt = rp->red_stats.drop_cnt; 340 sp->drop_forced = rp->red_stats.drop_forced; 341 sp->drop_unforced = rp->red_stats.drop_unforced; 342 sp->marked_packets = rp->red_stats.marked_packets; 343 } 344 345 int 346 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 347 struct altq_pktattr *pktattr) 348 { 349 int avg, droptype; 350 int n; 351 #ifdef ALTQ3_COMPAT 352 #ifdef ALTQ_FLOWVALVE 353 struct fve *fve = NULL; 354 355 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 356 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 357 m_freem(m); 358 return (-1); 359 } 360 #endif 361 #endif /* ALTQ3_COMPAT */ 362 363 avg = rp->red_avg; 364 365 /* 366 * if we were idle, we pretend that n packets arrived during 367 * the idle period. 368 */ 369 if (rp->red_idle) { 370 struct timeval now; 371 int t; 372 373 rp->red_idle = 0; 374 microtime(&now); 375 t = (now.tv_sec - rp->red_last.tv_sec); 376 if (t > 60) { 377 /* 378 * being idle for more than 1 minute, set avg to zero. 379 * this prevents t from overflow. 380 */ 381 avg = 0; 382 } else { 383 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 384 n = t / rp->red_pkttime - 1; 385 386 /* the following line does (avg = (1 - Wq)^n * avg) */ 387 if (n > 0) 388 avg = (avg >> FP_SHIFT) * 389 pow_w(rp->red_wtab, n); 390 } 391 } 392 393 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 394 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 395 rp->red_avg = avg; /* save the new value */ 396 397 /* 398 * red_count keeps a tally of arriving traffic that has not 399 * been dropped. 400 */ 401 rp->red_count++; 402 403 /* see if we drop early */ 404 droptype = DTYPE_NODROP; 405 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 406 if (avg >= rp->red_thmax_s) { 407 /* avg >= th_max: forced drop */ 408 droptype = DTYPE_FORCED; 409 } else if (rp->red_old == 0) { 410 /* first exceeds th_min */ 411 rp->red_count = 1; 412 rp->red_old = 1; 413 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 414 rp->red_probd, rp->red_count)) { 415 /* mark or drop by red */ 416 if ((rp->red_flags & REDF_ECN) && 417 mark_ecn(m, pktattr, rp->red_flags)) { 418 /* successfully marked. do not drop. */ 419 rp->red_count = 0; 420 #ifdef RED_STATS 421 rp->red_stats.marked_packets++; 422 #endif 423 } else { 424 /* unforced drop by red */ 425 droptype = DTYPE_EARLY; 426 } 427 } 428 } else { 429 /* avg < th_min */ 430 rp->red_old = 0; 431 } 432 433 /* 434 * if the queue length hits the hard limit, it's a forced drop. 435 */ 436 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 437 droptype = DTYPE_FORCED; 438 439 #ifdef RED_RANDOM_DROP 440 /* if successful or forced drop, enqueue this packet. */ 441 if (droptype != DTYPE_EARLY) 442 _addq(q, m); 443 #else 444 /* if successful, enqueue this packet. */ 445 if (droptype == DTYPE_NODROP) 446 _addq(q, m); 447 #endif 448 if (droptype != DTYPE_NODROP) { 449 if (droptype == DTYPE_EARLY) { 450 /* drop the incoming packet */ 451 #ifdef RED_STATS 452 rp->red_stats.drop_unforced++; 453 #endif 454 } else { 455 /* forced drop, select a victim packet in the queue. */ 456 #ifdef RED_RANDOM_DROP 457 m = _getq_random(q); 458 #endif 459 #ifdef RED_STATS 460 rp->red_stats.drop_forced++; 461 #endif 462 } 463 #ifdef RED_STATS 464 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 465 #endif 466 rp->red_count = 0; 467 #ifdef ALTQ3_COMPAT 468 #ifdef ALTQ_FLOWVALVE 469 if (rp->red_flowvalve != NULL) 470 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 471 #endif 472 #endif /* ALTQ3_COMPAT */ 473 m_freem(m); 474 return (-1); 475 } 476 /* successfully queued */ 477 #ifdef RED_STATS 478 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 479 #endif 480 return (0); 481 } 482 483 /* 484 * early-drop probability is calculated as follows: 485 * prob = p_max * (avg - th_min) / (th_max - th_min) 486 * prob_a = prob / (2 - count*prob) 487 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 488 * here prob_a increases as successive undrop count increases. 489 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 490 * becomes 1 when (count >= (2 / prob))). 491 */ 492 int 493 drop_early(int fp_len, int fp_probd, int count) 494 { 495 int d; /* denominator of drop-probability */ 496 497 d = fp_probd - count * fp_len; 498 if (d <= 0) 499 /* count exceeds the hard limit: drop or mark */ 500 return (1); 501 502 /* 503 * now the range of d is [1..600] in fixed-point. (when 504 * th_max-th_min=10 and p_max=1/30) 505 * drop probability = (avg - TH_MIN) / d 506 */ 507 508 if ((arc4random() % d) < fp_len) { 509 /* drop or mark */ 510 return (1); 511 } 512 /* no drop/mark */ 513 return (0); 514 } 515 516 /* 517 * try to mark CE bit to the packet. 518 * returns 1 if successfully marked, 0 otherwise. 519 */ 520 int 521 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 522 { 523 struct mbuf *m0; 524 struct m_tag *t; 525 struct altq_tag *at; 526 void *hdr; 527 int af; 528 529 t = m_tag_find(m, PACKET_TAG_PF_QID, NULL); 530 if (t != NULL) { 531 at = (struct altq_tag *)(t + 1); 532 if (at == NULL) 533 return (0); 534 af = at->af; 535 hdr = at->hdr; 536 #ifdef ALTQ3_COMPAT 537 } else if (pktattr != NULL) { 538 af = pktattr->pattr_af; 539 hdr = pktattr->pattr_hdr; 540 #endif /* ALTQ3_COMPAT */ 541 } else 542 return (0); 543 544 if (af != AF_INET && af != AF_INET6) 545 return (0); 546 547 /* verify that pattr_hdr is within the mbuf data */ 548 for (m0 = m; m0 != NULL; m0 = m0->m_next) 549 if (((char *)hdr >= m0->m_data) && 550 ((char *)hdr < m0->m_data + m0->m_len)) 551 break; 552 if (m0 == NULL) { 553 /* ick, tag info is stale */ 554 return (0); 555 } 556 557 switch (af) { 558 case AF_INET: 559 if (flags & REDF_ECN4) { 560 struct ip *ip = hdr; 561 u_int8_t otos; 562 int sum; 563 564 if (ip->ip_v != 4) 565 return (0); /* version mismatch! */ 566 567 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 568 return (0); /* not-ECT */ 569 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 570 return (1); /* already marked */ 571 572 /* 573 * ecn-capable but not marked, 574 * mark CE and update checksum 575 */ 576 otos = ip->ip_tos; 577 ip->ip_tos |= IPTOS_ECN_CE; 578 /* 579 * update checksum (from RFC1624) 580 * HC' = ~(~HC + ~m + m') 581 */ 582 sum = ~ntohs(ip->ip_sum) & 0xffff; 583 sum += (~otos & 0xffff) + ip->ip_tos; 584 sum = (sum >> 16) + (sum & 0xffff); 585 sum += (sum >> 16); /* add carry */ 586 ip->ip_sum = htons(~sum & 0xffff); 587 return (1); 588 } 589 break; 590 #ifdef INET6 591 case AF_INET6: 592 if (flags & REDF_ECN6) { 593 struct ip6_hdr *ip6 = hdr; 594 u_int32_t flowlabel; 595 596 flowlabel = ntohl(ip6->ip6_flow); 597 if ((flowlabel >> 28) != 6) 598 return (0); /* version mismatch! */ 599 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 600 (IPTOS_ECN_NOTECT << 20)) 601 return (0); /* not-ECT */ 602 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 603 (IPTOS_ECN_CE << 20)) 604 return (1); /* already marked */ 605 /* 606 * ecn-capable but not marked, mark CE 607 */ 608 flowlabel |= (IPTOS_ECN_CE << 20); 609 ip6->ip6_flow = htonl(flowlabel); 610 return (1); 611 } 612 break; 613 #endif /* INET6 */ 614 } 615 616 /* not marked */ 617 return (0); 618 } 619 620 struct mbuf * 621 red_getq(red_t *rp, class_queue_t *q) 622 { 623 struct mbuf *m; 624 625 if ((m = _getq(q)) == NULL) { 626 if (rp->red_idle == 0) { 627 rp->red_idle = 1; 628 microtime(&rp->red_last); 629 } 630 return NULL; 631 } 632 633 rp->red_idle = 0; 634 return (m); 635 } 636 637 /* 638 * helper routine to calibrate avg during idle. 639 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 640 * here Wq = 1/weight and the code assumes Wq is close to zero. 641 * 642 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 643 */ 644 static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 645 646 struct wtab * 647 wtab_alloc(int weight) 648 { 649 struct wtab *w; 650 int i; 651 652 for (w = wtab_list; w != NULL; w = w->w_next) 653 if (w->w_weight == weight) { 654 w->w_refcount++; 655 return (w); 656 } 657 658 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO); 659 if (w == NULL) 660 panic("wtab_alloc: malloc failed!"); 661 w->w_weight = weight; 662 w->w_refcount = 1; 663 w->w_next = wtab_list; 664 wtab_list = w; 665 666 /* initialize the weight table */ 667 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 668 for (i = 1; i < 32; i++) { 669 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 670 if (w->w_tab[i] == 0 && w->w_param_max == 0) 671 w->w_param_max = 1 << i; 672 } 673 674 return (w); 675 } 676 677 int 678 wtab_destroy(struct wtab *w) 679 { 680 struct wtab *prev; 681 682 if (--w->w_refcount > 0) 683 return (0); 684 685 if (wtab_list == w) 686 wtab_list = w->w_next; 687 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 688 if (prev->w_next == w) { 689 prev->w_next = w->w_next; 690 break; 691 } 692 693 free(w, M_DEVBUF); 694 return (0); 695 } 696 697 int32_t 698 pow_w(struct wtab *w, int n) 699 { 700 int i, bit; 701 int32_t val; 702 703 if (n >= w->w_param_max) 704 return (0); 705 706 val = 1 << FP_SHIFT; 707 if (n <= 0) 708 return (val); 709 710 bit = 1; 711 i = 0; 712 while (n) { 713 if (n & bit) { 714 val = (val * w->w_tab[i]) >> FP_SHIFT; 715 n &= ~bit; 716 } 717 i++; 718 bit <<= 1; 719 } 720 return (val); 721 } 722 723 #ifdef ALTQ3_COMPAT 724 /* 725 * red device interface 726 */ 727 altqdev_decl(red); 728 729 int 730 redopen(dev_t dev, int flag, int fmt, 731 struct lwp *l) 732 { 733 /* everything will be done when the queueing scheme is attached. */ 734 return 0; 735 } 736 737 int 738 redclose(dev_t dev, int flag, int fmt, 739 struct lwp *l) 740 { 741 red_queue_t *rqp; 742 int err, error = 0; 743 744 while ((rqp = red_list) != NULL) { 745 /* destroy all */ 746 err = red_detach(rqp); 747 if (err != 0 && error == 0) 748 error = err; 749 } 750 751 return error; 752 } 753 754 int 755 redioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag, 756 struct lwp *l) 757 { 758 red_queue_t *rqp; 759 struct red_interface *ifacep; 760 struct ifnet *ifp; 761 struct proc *p = l->l_proc; 762 int error = 0; 763 764 /* check super-user privilege */ 765 switch (cmd) { 766 case RED_GETSTATS: 767 break; 768 default: 769 #if (__FreeBSD_version > 400000) 770 if ((error = suser(p)) != 0) 771 #else 772 if ((error = kauth_authorize_network(p->p_cred, 773 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL, 774 NULL, NULL)) != 0) 775 #endif 776 return (error); 777 break; 778 } 779 780 switch (cmd) { 781 782 case RED_ENABLE: 783 ifacep = (struct red_interface *)addr; 784 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 785 error = EBADF; 786 break; 787 } 788 error = altq_enable(rqp->rq_ifq); 789 break; 790 791 case RED_DISABLE: 792 ifacep = (struct red_interface *)addr; 793 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 794 error = EBADF; 795 break; 796 } 797 error = altq_disable(rqp->rq_ifq); 798 break; 799 800 case RED_IF_ATTACH: 801 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 802 if (ifp == NULL) { 803 error = ENXIO; 804 break; 805 } 806 807 /* allocate and initialize red_queue_t */ 808 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO); 809 if (rqp == NULL) { 810 error = ENOMEM; 811 break; 812 } 813 814 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, 815 M_WAITOK|M_ZERO); 816 if (rqp->rq_q == NULL) { 817 free(rqp, M_DEVBUF); 818 error = ENOMEM; 819 break; 820 } 821 822 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 823 if (rqp->rq_red == NULL) { 824 free(rqp->rq_q, M_DEVBUF); 825 free(rqp, M_DEVBUF); 826 error = ENOMEM; 827 break; 828 } 829 830 rqp->rq_ifq = &ifp->if_snd; 831 qtail(rqp->rq_q) = NULL; 832 qlen(rqp->rq_q) = 0; 833 qlimit(rqp->rq_q) = RED_LIMIT; 834 qtype(rqp->rq_q) = Q_RED; 835 836 /* 837 * set RED to this ifnet structure. 838 */ 839 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 840 red_enqueue, red_dequeue, red_request, 841 NULL, NULL); 842 if (error) { 843 red_destroy(rqp->rq_red); 844 free(rqp->rq_q, M_DEVBUF); 845 free(rqp, M_DEVBUF); 846 break; 847 } 848 849 /* add this state to the red list */ 850 rqp->rq_next = red_list; 851 red_list = rqp; 852 break; 853 854 case RED_IF_DETACH: 855 ifacep = (struct red_interface *)addr; 856 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 857 error = EBADF; 858 break; 859 } 860 error = red_detach(rqp); 861 break; 862 863 case RED_GETSTATS: 864 do { 865 struct red_stats *q_stats; 866 red_t *rp; 867 868 q_stats = (struct red_stats *)addr; 869 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 870 ALTQT_RED)) == NULL) { 871 error = EBADF; 872 break; 873 } 874 875 q_stats->q_len = qlen(rqp->rq_q); 876 q_stats->q_limit = qlimit(rqp->rq_q); 877 878 rp = rqp->rq_red; 879 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 880 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 881 q_stats->drop_cnt = rp->red_stats.drop_cnt; 882 q_stats->drop_forced = rp->red_stats.drop_forced; 883 q_stats->drop_unforced = rp->red_stats.drop_unforced; 884 q_stats->marked_packets = rp->red_stats.marked_packets; 885 886 q_stats->weight = rp->red_weight; 887 q_stats->inv_pmax = rp->red_inv_pmax; 888 q_stats->th_min = rp->red_thmin; 889 q_stats->th_max = rp->red_thmax; 890 891 #ifdef ALTQ_FLOWVALVE 892 if (rp->red_flowvalve != NULL) { 893 struct flowvalve *fv = rp->red_flowvalve; 894 q_stats->fv_flows = fv->fv_flows; 895 q_stats->fv_pass = fv->fv_stats.pass; 896 q_stats->fv_predrop = fv->fv_stats.predrop; 897 q_stats->fv_alloc = fv->fv_stats.alloc; 898 q_stats->fv_escape = fv->fv_stats.escape; 899 } else { 900 #endif /* ALTQ_FLOWVALVE */ 901 q_stats->fv_flows = 0; 902 q_stats->fv_pass = 0; 903 q_stats->fv_predrop = 0; 904 q_stats->fv_alloc = 0; 905 q_stats->fv_escape = 0; 906 #ifdef ALTQ_FLOWVALVE 907 } 908 #endif /* ALTQ_FLOWVALVE */ 909 } while (/*CONSTCOND*/ 0); 910 break; 911 912 case RED_CONFIG: 913 do { 914 struct red_conf *fc; 915 red_t *new; 916 int s, limit; 917 918 fc = (struct red_conf *)addr; 919 if ((rqp = altq_lookup(fc->iface.red_ifname, 920 ALTQT_RED)) == NULL) { 921 error = EBADF; 922 break; 923 } 924 new = red_alloc(fc->red_weight, 925 fc->red_inv_pmax, 926 fc->red_thmin, 927 fc->red_thmax, 928 fc->red_flags, 929 fc->red_pkttime); 930 if (new == NULL) { 931 error = ENOMEM; 932 break; 933 } 934 935 s = splnet(); 936 red_purgeq(rqp); 937 limit = fc->red_limit; 938 if (limit < fc->red_thmax) 939 limit = fc->red_thmax; 940 qlimit(rqp->rq_q) = limit; 941 fc->red_limit = limit; /* write back the new value */ 942 943 red_destroy(rqp->rq_red); 944 rqp->rq_red = new; 945 946 splx(s); 947 948 /* write back new values */ 949 fc->red_limit = limit; 950 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 951 fc->red_thmin = rqp->rq_red->red_thmin; 952 fc->red_thmax = rqp->rq_red->red_thmax; 953 954 } while (/*CONSTCOND*/ 0); 955 break; 956 957 case RED_SETDEFAULTS: 958 do { 959 struct redparams *rp; 960 961 rp = (struct redparams *)addr; 962 963 default_th_min = rp->th_min; 964 default_th_max = rp->th_max; 965 default_inv_pmax = rp->inv_pmax; 966 } while (/*CONSTCOND*/ 0); 967 break; 968 969 default: 970 error = EINVAL; 971 break; 972 } 973 return error; 974 } 975 976 static int 977 red_detach(red_queue_t *rqp) 978 { 979 red_queue_t *tmp; 980 int error = 0; 981 982 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 983 altq_disable(rqp->rq_ifq); 984 985 if ((error = altq_detach(rqp->rq_ifq))) 986 return (error); 987 988 if (red_list == rqp) 989 red_list = rqp->rq_next; 990 else { 991 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 992 if (tmp->rq_next == rqp) { 993 tmp->rq_next = rqp->rq_next; 994 break; 995 } 996 if (tmp == NULL) 997 printf("red_detach: no state found in red_list!\n"); 998 } 999 1000 red_destroy(rqp->rq_red); 1001 free(rqp->rq_q, M_DEVBUF); 1002 free(rqp, M_DEVBUF); 1003 return (error); 1004 } 1005 1006 /* 1007 * enqueue routine: 1008 * 1009 * returns: 0 when successfully queued. 1010 * ENOBUFS when drop occurs. 1011 */ 1012 static int 1013 red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) 1014 { 1015 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1016 1017 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 1018 return ENOBUFS; 1019 ifq->ifq_len++; 1020 return 0; 1021 } 1022 1023 /* 1024 * dequeue routine: 1025 * must be called in splnet. 1026 * 1027 * returns: mbuf dequeued. 1028 * NULL when no packet is available in the queue. 1029 */ 1030 1031 static struct mbuf * 1032 red_dequeue(struct ifaltq *ifq, int op) 1033 { 1034 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1035 struct mbuf *m; 1036 1037 if (op == ALTDQ_POLL) 1038 return qhead(rqp->rq_q); 1039 1040 /* op == ALTDQ_REMOVE */ 1041 m = red_getq(rqp->rq_red, rqp->rq_q); 1042 if (m != NULL) 1043 ifq->ifq_len--; 1044 return (m); 1045 } 1046 1047 static int 1048 red_request(struct ifaltq *ifq, int req, void *arg) 1049 { 1050 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1051 1052 switch (req) { 1053 case ALTRQ_PURGE: 1054 red_purgeq(rqp); 1055 break; 1056 } 1057 return (0); 1058 } 1059 1060 static void 1061 red_purgeq(red_queue_t *rqp) 1062 { 1063 _flushq(rqp->rq_q); 1064 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1065 rqp->rq_ifq->ifq_len = 0; 1066 } 1067 1068 #ifdef ALTQ_FLOWVALVE 1069 1070 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1071 #define FV_PSCALE(x) ((x) << FV_PSHIFT) 1072 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1073 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1074 #define FV_FSCALE(x) ((x) << FV_FSHIFT) 1075 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1076 1077 #define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1078 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1079 1080 #define FV_N 10 /* update fve_f every FV_N packets */ 1081 1082 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1083 #define FV_TTHRESH 3 /* time threshold to delete fve */ 1084 #define FV_ALPHA 5 /* extra packet count */ 1085 1086 #define FV_STATS 1087 1088 #if (__FreeBSD_version > 300000) || defined(__HAVE_TIMECOUNTER) 1089 #define FV_TIMESTAMP(tp) getmicrotime(tp) 1090 #else 1091 #define FV_TIMESTAMP(tp) { (*(tp)) = time; } 1092 #endif 1093 1094 /* 1095 * Brtt table: 127 entry table to convert drop rate (p) to 1096 * the corresponding bandwidth fraction (f) 1097 * the following equation is implemented to use scaled values, 1098 * fve_p and fve_f, in the fixed point format. 1099 * 1100 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1101 * f = Brtt(p) / (max_th + alpha) 1102 */ 1103 #define BRTT_SIZE 128 1104 #define BRTT_SHIFT 12 1105 #define BRTT_MASK 0x0007f000 1106 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1107 1108 const int brtt_tab[BRTT_SIZE] = { 1109 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1110 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1111 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1112 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1113 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1114 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1115 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1116 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1117 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1118 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1119 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1120 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1121 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1122 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1123 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1124 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1125 }; 1126 1127 static inline struct fve * 1128 flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr, 1129 struct timeval *now) 1130 { 1131 struct fve *fve; 1132 int flows; 1133 struct ip *ip; 1134 #ifdef INET6 1135 struct ip6_hdr *ip6; 1136 #endif 1137 struct timeval tthresh; 1138 1139 if (pktattr == NULL) 1140 return (NULL); 1141 1142 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1143 flows = 0; 1144 /* 1145 * search the flow list 1146 */ 1147 switch (pktattr->pattr_af) { 1148 case AF_INET: 1149 ip = (struct ip *)pktattr->pattr_hdr; 1150 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1151 if (fve->fve_lastdrop.tv_sec == 0) 1152 break; 1153 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1154 fve->fve_lastdrop.tv_sec = 0; 1155 break; 1156 } 1157 if (fve->fve_flow.flow_af == AF_INET && 1158 fve->fve_flow.flow_ip.ip_src.s_addr == 1159 ip->ip_src.s_addr && 1160 fve->fve_flow.flow_ip.ip_dst.s_addr == 1161 ip->ip_dst.s_addr) 1162 return (fve); 1163 flows++; 1164 } 1165 break; 1166 #ifdef INET6 1167 case AF_INET6: 1168 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1169 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1170 if (fve->fve_lastdrop.tv_sec == 0) 1171 break; 1172 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1173 fve->fve_lastdrop.tv_sec = 0; 1174 break; 1175 } 1176 if (fve->fve_flow.flow_af == AF_INET6 && 1177 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1178 &ip6->ip6_src) && 1179 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1180 &ip6->ip6_dst)) 1181 return (fve); 1182 flows++; 1183 } 1184 break; 1185 #endif /* INET6 */ 1186 1187 default: 1188 /* unknown protocol. no drop. */ 1189 return (NULL); 1190 } 1191 fv->fv_flows = flows; /* save the number of active fve's */ 1192 return (NULL); 1193 } 1194 1195 static inline struct fve * 1196 flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr) 1197 { 1198 struct fve *fve; 1199 struct ip *ip; 1200 #ifdef INET6 1201 struct ip6_hdr *ip6; 1202 #endif 1203 1204 /* 1205 * get an entry from the tail of the LRU list. 1206 */ 1207 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1208 1209 switch (pktattr->pattr_af) { 1210 case AF_INET: 1211 ip = (struct ip *)pktattr->pattr_hdr; 1212 fve->fve_flow.flow_af = AF_INET; 1213 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1214 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1215 break; 1216 #ifdef INET6 1217 case AF_INET6: 1218 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1219 fve->fve_flow.flow_af = AF_INET6; 1220 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1221 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1222 break; 1223 #endif 1224 } 1225 1226 fve->fve_state = Green; 1227 fve->fve_p = 0.0; 1228 fve->fve_f = 0.0; 1229 fve->fve_ifseq = fv->fv_ifseq - 1; 1230 fve->fve_count = 0; 1231 1232 fv->fv_flows++; 1233 #ifdef FV_STATS 1234 fv->fv_stats.alloc++; 1235 #endif 1236 return (fve); 1237 } 1238 1239 static inline void 1240 flowlist_move_to_head(struct flowvalve *fv, struct fve *fve) 1241 { 1242 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1243 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1244 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1245 } 1246 } 1247 1248 /* 1249 * allocate flowvalve structure 1250 */ 1251 static struct flowvalve * 1252 fv_alloc(struct red *rp) 1253 { 1254 struct flowvalve *fv; 1255 struct fve *fve; 1256 int i, num; 1257 1258 num = FV_FLOWLISTSIZE; 1259 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO); 1260 if (fv == NULL) 1261 return (NULL); 1262 1263 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF, 1264 M_WAITOK|M_ZERO); 1265 if (fv->fv_fves == NULL) { 1266 free(fv, M_DEVBUF); 1267 return (NULL); 1268 } 1269 1270 fv->fv_flows = 0; 1271 TAILQ_INIT(&fv->fv_flowlist); 1272 for (i = 0; i < num; i++) { 1273 fve = &fv->fv_fves[i]; 1274 fve->fve_lastdrop.tv_sec = 0; 1275 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1276 } 1277 1278 /* initialize drop rate threshold in scaled fixed-point */ 1279 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1280 1281 /* initialize drop rate to fraction table */ 1282 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK); 1283 if (fv->fv_p2ftab == NULL) { 1284 free(fv->fv_fves, M_DEVBUF); 1285 free(fv, M_DEVBUF); 1286 return (NULL); 1287 } 1288 /* 1289 * create the p2f table. 1290 * (shift is used to keep the precision) 1291 */ 1292 for (i = 1; i < BRTT_SIZE; i++) { 1293 int f; 1294 1295 f = brtt_tab[i] << 8; 1296 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1297 } 1298 1299 return (fv); 1300 } 1301 1302 static void 1303 fv_destroy(struct flowvalve *fv) 1304 { 1305 free(fv->fv_p2ftab, M_DEVBUF); 1306 free(fv->fv_fves, M_DEVBUF); 1307 free(fv, M_DEVBUF); 1308 } 1309 1310 static inline int 1311 fv_p2f(struct flowvalve *fv, int p) 1312 { 1313 int val, f; 1314 1315 if (p >= BRTT_PMAX) 1316 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1317 else if ((val = (p & BRTT_MASK))) 1318 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1319 else 1320 f = fv->fv_p2ftab[1]; 1321 return (f); 1322 } 1323 1324 /* 1325 * check if an arriving packet should be pre-dropped. 1326 * called from red_addq() when a packet arrives. 1327 * returns 1 when the packet should be pre-dropped. 1328 * should be called in splnet. 1329 */ 1330 static int 1331 fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr, 1332 struct fve **fcache) 1333 { 1334 struct fve *fve; 1335 struct timeval now; 1336 1337 fv->fv_ifseq++; 1338 FV_TIMESTAMP(&now); 1339 1340 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1341 /* no matching entry in the flowlist */ 1342 return (0); 1343 1344 *fcache = fve; 1345 1346 /* update fraction f for every FV_N packets */ 1347 if (++fve->fve_count == FV_N) { 1348 /* 1349 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1350 */ 1351 fve->fve_f = 1352 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1353 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1354 fve->fve_ifseq = fv->fv_ifseq; 1355 fve->fve_count = 0; 1356 } 1357 1358 /* 1359 * overpumping test 1360 */ 1361 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1362 int fthresh; 1363 1364 /* calculate a threshold */ 1365 fthresh = fv_p2f(fv, fve->fve_p); 1366 if (fve->fve_f > fthresh) 1367 fve->fve_state = Red; 1368 } 1369 1370 if (fve->fve_state == Red) { 1371 /* 1372 * backoff test 1373 */ 1374 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1375 /* no drop for at least FV_BACKOFFTHRESH sec */ 1376 fve->fve_p = 0; 1377 fve->fve_state = Green; 1378 #ifdef FV_STATS 1379 fv->fv_stats.escape++; 1380 #endif 1381 } else { 1382 /* block this flow */ 1383 flowlist_move_to_head(fv, fve); 1384 fve->fve_lastdrop = now; 1385 #ifdef FV_STATS 1386 fv->fv_stats.predrop++; 1387 #endif 1388 return (1); 1389 } 1390 } 1391 1392 /* 1393 * p = (1 - Wp) * p 1394 */ 1395 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1396 if (fve->fve_p < 0) 1397 fve->fve_p = 0; 1398 #ifdef FV_STATS 1399 fv->fv_stats.pass++; 1400 #endif 1401 return (0); 1402 } 1403 1404 /* 1405 * called from red_addq when a packet is dropped by red. 1406 * should be called in splnet. 1407 */ 1408 static void 1409 fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr, 1410 struct fve *fcache) 1411 { 1412 struct fve *fve; 1413 struct timeval now; 1414 1415 if (pktattr == NULL) 1416 return; 1417 FV_TIMESTAMP(&now); 1418 1419 if (fcache != NULL) 1420 /* the fve of this packet is already cached */ 1421 fve = fcache; 1422 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1423 fve = flowlist_reclaim(fv, pktattr); 1424 1425 flowlist_move_to_head(fv, fve); 1426 1427 /* 1428 * update p: the following line cancels the update 1429 * in fv_checkflow() and calculate 1430 * p = Wp + (1 - Wp) * p 1431 */ 1432 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1433 1434 fve->fve_lastdrop = now; 1435 } 1436 1437 #endif /* ALTQ_FLOWVALVE */ 1438 1439 #ifdef KLD_MODULE 1440 1441 static struct altqsw red_sw = 1442 {"red", redopen, redclose, redioctl}; 1443 1444 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1445 MODULE_VERSION(altq_red, 1); 1446 1447 #endif /* KLD_MODULE */ 1448 #endif /* ALTQ3_COMPAT */ 1449 1450 #endif /* ALTQ_RED */ 1451