1 /* $NetBSD: altq_red.c,v 1.30 2016/04/20 08:58:48 knakahara Exp $ */ 2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 /* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.30 2016/04/20 08:58:48 knakahara Exp $"); 65 66 #ifdef _KERNEL_OPT 67 #include "opt_altq.h" 68 #include "opt_inet.h" 69 #include "pf.h" 70 #endif 71 72 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 73 74 #include <sys/param.h> 75 #include <sys/malloc.h> 76 #include <sys/mbuf.h> 77 #include <sys/socket.h> 78 #include <sys/systm.h> 79 #include <sys/errno.h> 80 #include <sys/kauth.h> 81 #if 1 /* ALTQ3_COMPAT */ 82 #include <sys/sockio.h> 83 #include <sys/proc.h> 84 #include <sys/kernel.h> 85 #ifdef ALTQ_FLOWVALVE 86 #include <sys/queue.h> 87 #include <sys/time.h> 88 #endif 89 #endif /* ALTQ3_COMPAT */ 90 #include <sys/cprng.h> 91 92 #include <net/if.h> 93 94 #include <netinet/in.h> 95 #include <netinet/in_systm.h> 96 #include <netinet/ip.h> 97 #ifdef INET6 98 #include <netinet/ip6.h> 99 #endif 100 101 #if NPF > 0 102 #include <net/pfvar.h> 103 #endif 104 #include <altq/altq.h> 105 #include <altq/altq_red.h> 106 #ifdef ALTQ3_COMPAT 107 #include <altq/altq_conf.h> 108 #ifdef ALTQ_FLOWVALVE 109 #include <altq/altq_flowvalve.h> 110 #endif 111 #endif 112 113 /* 114 * ALTQ/RED (Random Early Detection) implementation using 32-bit 115 * fixed-point calculation. 116 * 117 * written by kjc using the ns code as a reference. 118 * you can learn more about red and ns from Sally's home page at 119 * http://www-nrg.ee.lbl.gov/floyd/ 120 * 121 * most of the red parameter values are fixed in this implementation 122 * to prevent fixed-point overflow/underflow. 123 * if you change the parameters, watch out for overflow/underflow! 124 * 125 * the parameters used are recommended values by Sally. 126 * the corresponding ns config looks: 127 * q_weight=0.00195 128 * minthresh=5 maxthresh=15 queue-size=60 129 * linterm=30 130 * dropmech=drop-tail 131 * bytes=false (can't be handled by 32-bit fixed-point) 132 * doubleq=false dqthresh=false 133 * wait=true 134 */ 135 /* 136 * alternative red parameters for a slow link. 137 * 138 * assume the queue length becomes from zero to L and keeps L, it takes 139 * N packets for q_avg to reach 63% of L. 140 * when q_weight is 0.002, N is about 500 packets. 141 * for a slow link like dial-up, 500 packets takes more than 1 minute! 142 * when q_weight is 0.008, N is about 127 packets. 143 * when q_weight is 0.016, N is about 63 packets. 144 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 145 * are allowed for 0.016. 146 * see Sally's paper for more details. 147 */ 148 /* normal red parameters */ 149 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 150 /* q_weight = 0.00195 */ 151 152 /* red parameters for a slow link */ 153 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 154 /* q_weight = 0.0078125 */ 155 156 /* red parameters for a very slow link (e.g., dialup) */ 157 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 158 /* q_weight = 0.015625 */ 159 160 /* fixed-point uses 12-bit decimal places */ 161 #define FP_SHIFT 12 /* fixed-point shift */ 162 163 /* red parameters for drop probability */ 164 #define INV_P_MAX 10 /* inverse of max drop probability */ 165 #define TH_MIN 5 /* min threshold */ 166 #define TH_MAX 15 /* max threshold */ 167 168 #define RED_LIMIT 60 /* default max queue lenght */ 169 #define RED_STATS /* collect statistics */ 170 171 /* 172 * our default policy for forced-drop is drop-tail. 173 * (in altq-1.1.2 or earlier, the default was random-drop. 174 * but it makes more sense to punish the cause of the surge.) 175 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 176 */ 177 178 #ifdef ALTQ3_COMPAT 179 #ifdef ALTQ_FLOWVALVE 180 /* 181 * flow-valve is an extention to protect red from unresponsive flows 182 * and to promote end-to-end congestion control. 183 * flow-valve observes the average drop rates of the flows that have 184 * experienced packet drops in the recent past. 185 * when the average drop rate exceeds the threshold, the flow is 186 * blocked by the flow-valve. the trapped flow should back off 187 * exponentially to escape from the flow-valve. 188 */ 189 #ifdef RED_RANDOM_DROP 190 #error "random-drop can't be used with flow-valve!" 191 #endif 192 #endif /* ALTQ_FLOWVALVE */ 193 194 /* red_list keeps all red_queue_t's allocated. */ 195 static red_queue_t *red_list = NULL; 196 197 #endif /* ALTQ3_COMPAT */ 198 199 /* default red parameter values */ 200 static int default_th_min = TH_MIN; 201 static int default_th_max = TH_MAX; 202 static int default_inv_pmax = INV_P_MAX; 203 204 #ifdef ALTQ3_COMPAT 205 /* internal function prototypes */ 206 static int red_enqueue(struct ifaltq *, struct mbuf *); 207 static struct mbuf *red_dequeue(struct ifaltq *, int); 208 static int red_request(struct ifaltq *, int, void *); 209 static void red_purgeq(red_queue_t *); 210 static int red_detach(red_queue_t *); 211 #ifdef ALTQ_FLOWVALVE 212 static inline struct fve *flowlist_lookup(struct flowvalve *, 213 struct altq_pktattr *, struct timeval *); 214 static inline struct fve *flowlist_reclaim(struct flowvalve *, 215 struct altq_pktattr *); 216 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 217 static inline int fv_p2f(struct flowvalve *, int); 218 static struct flowvalve *fv_alloc(struct red *); 219 static void fv_destroy(struct flowvalve *); 220 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 221 struct fve **); 222 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 223 struct fve *); 224 #endif 225 #endif /* ALTQ3_COMPAT */ 226 227 /* 228 * red support routines 229 */ 230 red_t * 231 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 232 int pkttime) 233 { 234 red_t *rp; 235 int w, i; 236 int npkts_per_sec; 237 238 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO); 239 if (rp == NULL) 240 return (NULL); 241 242 rp->red_avg = 0; 243 rp->red_idle = 1; 244 245 if (weight == 0) 246 rp->red_weight = W_WEIGHT; 247 else 248 rp->red_weight = weight; 249 if (inv_pmax == 0) 250 rp->red_inv_pmax = default_inv_pmax; 251 else 252 rp->red_inv_pmax = inv_pmax; 253 if (th_min == 0) 254 rp->red_thmin = default_th_min; 255 else 256 rp->red_thmin = th_min; 257 if (th_max == 0) 258 rp->red_thmax = default_th_max; 259 else 260 rp->red_thmax = th_max; 261 262 rp->red_flags = flags; 263 264 if (pkttime == 0) 265 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 266 rp->red_pkttime = 800; 267 else 268 rp->red_pkttime = pkttime; 269 270 if (weight == 0) { 271 /* when the link is very slow, adjust red parameters */ 272 npkts_per_sec = 1000000 / rp->red_pkttime; 273 if (npkts_per_sec < 50) { 274 /* up to about 400Kbps */ 275 rp->red_weight = W_WEIGHT_2; 276 } else if (npkts_per_sec < 300) { 277 /* up to about 2.4Mbps */ 278 rp->red_weight = W_WEIGHT_1; 279 } 280 } 281 282 /* calculate wshift. weight must be power of 2 */ 283 w = rp->red_weight; 284 for (i = 0; w > 1; i++) 285 w = w >> 1; 286 rp->red_wshift = i; 287 w = 1 << rp->red_wshift; 288 if (w != rp->red_weight) { 289 printf("invalid weight value %d for red! use %d\n", 290 rp->red_weight, w); 291 rp->red_weight = w; 292 } 293 294 /* 295 * thmin_s and thmax_s are scaled versions of th_min and th_max 296 * to be compared with avg. 297 */ 298 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 299 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 300 301 /* 302 * precompute probability denominator 303 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 304 */ 305 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 306 * rp->red_inv_pmax) << FP_SHIFT; 307 308 /* allocate weight table */ 309 rp->red_wtab = wtab_alloc(rp->red_weight); 310 311 microtime(&rp->red_last); 312 #ifdef ALTQ3_COMPAT 313 #ifdef ALTQ_FLOWVALVE 314 if (flags & REDF_FLOWVALVE) 315 rp->red_flowvalve = fv_alloc(rp); 316 /* if fv_alloc failes, flowvalve is just disabled */ 317 #endif 318 #endif /* ALTQ3_COMPAT */ 319 return (rp); 320 } 321 322 void 323 red_destroy(red_t *rp) 324 { 325 #ifdef ALTQ3_COMPAT 326 #ifdef ALTQ_FLOWVALVE 327 if (rp->red_flowvalve != NULL) 328 fv_destroy(rp->red_flowvalve); 329 #endif 330 #endif /* ALTQ3_COMPAT */ 331 wtab_destroy(rp->red_wtab); 332 free(rp, M_DEVBUF); 333 } 334 335 void 336 red_getstats(red_t *rp, struct redstats *sp) 337 { 338 sp->q_avg = rp->red_avg >> rp->red_wshift; 339 sp->xmit_cnt = rp->red_stats.xmit_cnt; 340 sp->drop_cnt = rp->red_stats.drop_cnt; 341 sp->drop_forced = rp->red_stats.drop_forced; 342 sp->drop_unforced = rp->red_stats.drop_unforced; 343 sp->marked_packets = rp->red_stats.marked_packets; 344 } 345 346 int 347 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 348 struct altq_pktattr *pktattr) 349 { 350 int avg, droptype; 351 int n; 352 #ifdef ALTQ3_COMPAT 353 #ifdef ALTQ_FLOWVALVE 354 struct fve *fve = NULL; 355 356 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 357 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 358 m_freem(m); 359 return (-1); 360 } 361 #endif 362 #endif /* ALTQ3_COMPAT */ 363 364 avg = rp->red_avg; 365 366 /* 367 * if we were idle, we pretend that n packets arrived during 368 * the idle period. 369 */ 370 if (rp->red_idle) { 371 struct timeval now; 372 int t; 373 374 rp->red_idle = 0; 375 microtime(&now); 376 t = (now.tv_sec - rp->red_last.tv_sec); 377 if (t > 60) { 378 /* 379 * being idle for more than 1 minute, set avg to zero. 380 * this prevents t from overflow. 381 */ 382 avg = 0; 383 } else { 384 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 385 n = t / rp->red_pkttime - 1; 386 387 /* the following line does (avg = (1 - Wq)^n * avg) */ 388 if (n > 0) 389 avg = (avg >> FP_SHIFT) * 390 pow_w(rp->red_wtab, n); 391 } 392 } 393 394 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 395 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 396 rp->red_avg = avg; /* save the new value */ 397 398 /* 399 * red_count keeps a tally of arriving traffic that has not 400 * been dropped. 401 */ 402 rp->red_count++; 403 404 /* see if we drop early */ 405 droptype = DTYPE_NODROP; 406 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 407 if (avg >= rp->red_thmax_s) { 408 /* avg >= th_max: forced drop */ 409 droptype = DTYPE_FORCED; 410 } else if (rp->red_old == 0) { 411 /* first exceeds th_min */ 412 rp->red_count = 1; 413 rp->red_old = 1; 414 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 415 rp->red_probd, rp->red_count)) { 416 /* mark or drop by red */ 417 if ((rp->red_flags & REDF_ECN) && 418 mark_ecn(m, pktattr, rp->red_flags)) { 419 /* successfully marked. do not drop. */ 420 rp->red_count = 0; 421 #ifdef RED_STATS 422 rp->red_stats.marked_packets++; 423 #endif 424 } else { 425 /* unforced drop by red */ 426 droptype = DTYPE_EARLY; 427 } 428 } 429 } else { 430 /* avg < th_min */ 431 rp->red_old = 0; 432 } 433 434 /* 435 * if the queue length hits the hard limit, it's a forced drop. 436 */ 437 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 438 droptype = DTYPE_FORCED; 439 440 #ifdef RED_RANDOM_DROP 441 /* if successful or forced drop, enqueue this packet. */ 442 if (droptype != DTYPE_EARLY) 443 _addq(q, m); 444 #else 445 /* if successful, enqueue this packet. */ 446 if (droptype == DTYPE_NODROP) 447 _addq(q, m); 448 #endif 449 if (droptype != DTYPE_NODROP) { 450 if (droptype == DTYPE_EARLY) { 451 /* drop the incoming packet */ 452 #ifdef RED_STATS 453 rp->red_stats.drop_unforced++; 454 #endif 455 } else { 456 /* forced drop, select a victim packet in the queue. */ 457 #ifdef RED_RANDOM_DROP 458 m = _getq_random(q); 459 #endif 460 #ifdef RED_STATS 461 rp->red_stats.drop_forced++; 462 #endif 463 } 464 #ifdef RED_STATS 465 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 466 #endif 467 rp->red_count = 0; 468 #ifdef ALTQ3_COMPAT 469 #ifdef ALTQ_FLOWVALVE 470 if (rp->red_flowvalve != NULL) 471 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 472 #endif 473 #endif /* ALTQ3_COMPAT */ 474 m_freem(m); 475 return (-1); 476 } 477 /* successfully queued */ 478 #ifdef RED_STATS 479 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 480 #endif 481 return (0); 482 } 483 484 /* 485 * early-drop probability is calculated as follows: 486 * prob = p_max * (avg - th_min) / (th_max - th_min) 487 * prob_a = prob / (2 - count*prob) 488 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 489 * here prob_a increases as successive undrop count increases. 490 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 491 * becomes 1 when (count >= (2 / prob))). 492 */ 493 int 494 drop_early(int fp_len, int fp_probd, int count) 495 { 496 int d; /* denominator of drop-probability */ 497 498 d = fp_probd - count * fp_len; 499 if (d <= 0) 500 /* count exceeds the hard limit: drop or mark */ 501 return (1); 502 503 /* 504 * now the range of d is [1..600] in fixed-point. (when 505 * th_max-th_min=10 and p_max=1/30) 506 * drop probability = (avg - TH_MIN) / d 507 */ 508 509 if ((cprng_fast32() % d) < fp_len) { 510 /* drop or mark */ 511 return (1); 512 } 513 /* no drop/mark */ 514 return (0); 515 } 516 517 /* 518 * try to mark CE bit to the packet. 519 * returns 1 if successfully marked, 0 otherwise. 520 */ 521 int 522 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 523 { 524 struct mbuf *m0; 525 struct m_tag *t; 526 struct altq_tag *at; 527 void *hdr; 528 int af; 529 530 t = m_tag_find(m, PACKET_TAG_ALTQ_QID, NULL); 531 if (t != NULL) { 532 at = (struct altq_tag *)(t + 1); 533 if (at == NULL) 534 return (0); 535 af = at->af; 536 hdr = at->hdr; 537 #ifdef ALTQ3_COMPAT 538 } else if (pktattr != NULL) { 539 af = pktattr->pattr_af; 540 hdr = pktattr->pattr_hdr; 541 #endif /* ALTQ3_COMPAT */ 542 } else 543 return (0); 544 545 if (af != AF_INET && af != AF_INET6) 546 return (0); 547 548 /* verify that pattr_hdr is within the mbuf data */ 549 for (m0 = m; m0 != NULL; m0 = m0->m_next) 550 if (((char *)hdr >= m0->m_data) && 551 ((char *)hdr < m0->m_data + m0->m_len)) 552 break; 553 if (m0 == NULL) { 554 /* ick, tag info is stale */ 555 return (0); 556 } 557 558 switch (af) { 559 case AF_INET: 560 if (flags & REDF_ECN4) { 561 struct ip *ip = hdr; 562 u_int8_t otos; 563 int sum; 564 565 if (ip->ip_v != 4) 566 return (0); /* version mismatch! */ 567 568 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 569 return (0); /* not-ECT */ 570 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 571 return (1); /* already marked */ 572 573 /* 574 * ecn-capable but not marked, 575 * mark CE and update checksum 576 */ 577 otos = ip->ip_tos; 578 ip->ip_tos |= IPTOS_ECN_CE; 579 /* 580 * update checksum (from RFC1624) 581 * HC' = ~(~HC + ~m + m') 582 */ 583 sum = ~ntohs(ip->ip_sum) & 0xffff; 584 sum += (~otos & 0xffff) + ip->ip_tos; 585 sum = (sum >> 16) + (sum & 0xffff); 586 sum += (sum >> 16); /* add carry */ 587 ip->ip_sum = htons(~sum & 0xffff); 588 return (1); 589 } 590 break; 591 #ifdef INET6 592 case AF_INET6: 593 if (flags & REDF_ECN6) { 594 struct ip6_hdr *ip6 = hdr; 595 u_int32_t flowlabel; 596 597 flowlabel = ntohl(ip6->ip6_flow); 598 if ((flowlabel >> 28) != 6) 599 return (0); /* version mismatch! */ 600 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 601 (IPTOS_ECN_NOTECT << 20)) 602 return (0); /* not-ECT */ 603 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 604 (IPTOS_ECN_CE << 20)) 605 return (1); /* already marked */ 606 /* 607 * ecn-capable but not marked, mark CE 608 */ 609 flowlabel |= (IPTOS_ECN_CE << 20); 610 ip6->ip6_flow = htonl(flowlabel); 611 return (1); 612 } 613 break; 614 #endif /* INET6 */ 615 } 616 617 /* not marked */ 618 return (0); 619 } 620 621 struct mbuf * 622 red_getq(red_t *rp, class_queue_t *q) 623 { 624 struct mbuf *m; 625 626 if ((m = _getq(q)) == NULL) { 627 if (rp->red_idle == 0) { 628 rp->red_idle = 1; 629 microtime(&rp->red_last); 630 } 631 return NULL; 632 } 633 634 rp->red_idle = 0; 635 return (m); 636 } 637 638 /* 639 * helper routine to calibrate avg during idle. 640 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 641 * here Wq = 1/weight and the code assumes Wq is close to zero. 642 * 643 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 644 */ 645 static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 646 647 struct wtab * 648 wtab_alloc(int weight) 649 { 650 struct wtab *w; 651 int i; 652 653 for (w = wtab_list; w != NULL; w = w->w_next) 654 if (w->w_weight == weight) { 655 w->w_refcount++; 656 return (w); 657 } 658 659 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO); 660 if (w == NULL) 661 panic("wtab_alloc: malloc failed!"); 662 w->w_weight = weight; 663 w->w_refcount = 1; 664 w->w_next = wtab_list; 665 wtab_list = w; 666 667 /* initialize the weight table */ 668 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 669 for (i = 1; i < 32; i++) { 670 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 671 if (w->w_tab[i] == 0 && w->w_param_max == 0) 672 w->w_param_max = 1 << i; 673 } 674 675 return (w); 676 } 677 678 int 679 wtab_destroy(struct wtab *w) 680 { 681 struct wtab *prev; 682 683 if (--w->w_refcount > 0) 684 return (0); 685 686 if (wtab_list == w) 687 wtab_list = w->w_next; 688 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 689 if (prev->w_next == w) { 690 prev->w_next = w->w_next; 691 break; 692 } 693 694 free(w, M_DEVBUF); 695 return (0); 696 } 697 698 int32_t 699 pow_w(struct wtab *w, int n) 700 { 701 int i, bit; 702 int32_t val; 703 704 if (n >= w->w_param_max) 705 return (0); 706 707 val = 1 << FP_SHIFT; 708 if (n <= 0) 709 return (val); 710 711 bit = 1; 712 i = 0; 713 while (n) { 714 if (n & bit) { 715 val = (val * w->w_tab[i]) >> FP_SHIFT; 716 n &= ~bit; 717 } 718 i++; 719 bit <<= 1; 720 } 721 return (val); 722 } 723 724 #ifdef ALTQ3_COMPAT 725 /* 726 * red device interface 727 */ 728 altqdev_decl(red); 729 730 int 731 redopen(dev_t dev, int flag, int fmt, 732 struct lwp *l) 733 { 734 /* everything will be done when the queueing scheme is attached. */ 735 return 0; 736 } 737 738 int 739 redclose(dev_t dev, int flag, int fmt, 740 struct lwp *l) 741 { 742 red_queue_t *rqp; 743 int err, error = 0; 744 745 while ((rqp = red_list) != NULL) { 746 /* destroy all */ 747 err = red_detach(rqp); 748 if (err != 0 && error == 0) 749 error = err; 750 } 751 752 return error; 753 } 754 755 int 756 redioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag, 757 struct lwp *l) 758 { 759 red_queue_t *rqp; 760 struct red_interface *ifacep; 761 struct ifnet *ifp; 762 struct proc *p = l->l_proc; 763 int error = 0; 764 765 /* check super-user privilege */ 766 switch (cmd) { 767 case RED_GETSTATS: 768 break; 769 default: 770 #if (__FreeBSD_version > 400000) 771 if ((error = suser(p)) != 0) 772 #else 773 if ((error = kauth_authorize_network(p->p_cred, 774 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL, 775 NULL, NULL)) != 0) 776 #endif 777 return (error); 778 break; 779 } 780 781 switch (cmd) { 782 783 case RED_ENABLE: 784 ifacep = (struct red_interface *)addr; 785 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 786 error = EBADF; 787 break; 788 } 789 error = altq_enable(rqp->rq_ifq); 790 break; 791 792 case RED_DISABLE: 793 ifacep = (struct red_interface *)addr; 794 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 795 error = EBADF; 796 break; 797 } 798 error = altq_disable(rqp->rq_ifq); 799 break; 800 801 case RED_IF_ATTACH: 802 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 803 if (ifp == NULL) { 804 error = ENXIO; 805 break; 806 } 807 808 /* allocate and initialize red_queue_t */ 809 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO); 810 if (rqp == NULL) { 811 error = ENOMEM; 812 break; 813 } 814 815 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, 816 M_WAITOK|M_ZERO); 817 if (rqp->rq_q == NULL) { 818 free(rqp, M_DEVBUF); 819 error = ENOMEM; 820 break; 821 } 822 823 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 824 if (rqp->rq_red == NULL) { 825 free(rqp->rq_q, M_DEVBUF); 826 free(rqp, M_DEVBUF); 827 error = ENOMEM; 828 break; 829 } 830 831 rqp->rq_ifq = &ifp->if_snd; 832 qtail(rqp->rq_q) = NULL; 833 qlen(rqp->rq_q) = 0; 834 qlimit(rqp->rq_q) = RED_LIMIT; 835 qtype(rqp->rq_q) = Q_RED; 836 837 /* 838 * set RED to this ifnet structure. 839 */ 840 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 841 red_enqueue, red_dequeue, red_request, 842 NULL, NULL); 843 if (error) { 844 red_destroy(rqp->rq_red); 845 free(rqp->rq_q, M_DEVBUF); 846 free(rqp, M_DEVBUF); 847 break; 848 } 849 850 /* add this state to the red list */ 851 rqp->rq_next = red_list; 852 red_list = rqp; 853 break; 854 855 case RED_IF_DETACH: 856 ifacep = (struct red_interface *)addr; 857 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 858 error = EBADF; 859 break; 860 } 861 error = red_detach(rqp); 862 break; 863 864 case RED_GETSTATS: 865 do { 866 struct red_stats *q_stats; 867 red_t *rp; 868 869 q_stats = (struct red_stats *)addr; 870 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 871 ALTQT_RED)) == NULL) { 872 error = EBADF; 873 break; 874 } 875 876 q_stats->q_len = qlen(rqp->rq_q); 877 q_stats->q_limit = qlimit(rqp->rq_q); 878 879 rp = rqp->rq_red; 880 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 881 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 882 q_stats->drop_cnt = rp->red_stats.drop_cnt; 883 q_stats->drop_forced = rp->red_stats.drop_forced; 884 q_stats->drop_unforced = rp->red_stats.drop_unforced; 885 q_stats->marked_packets = rp->red_stats.marked_packets; 886 887 q_stats->weight = rp->red_weight; 888 q_stats->inv_pmax = rp->red_inv_pmax; 889 q_stats->th_min = rp->red_thmin; 890 q_stats->th_max = rp->red_thmax; 891 892 #ifdef ALTQ_FLOWVALVE 893 if (rp->red_flowvalve != NULL) { 894 struct flowvalve *fv = rp->red_flowvalve; 895 q_stats->fv_flows = fv->fv_flows; 896 q_stats->fv_pass = fv->fv_stats.pass; 897 q_stats->fv_predrop = fv->fv_stats.predrop; 898 q_stats->fv_alloc = fv->fv_stats.alloc; 899 q_stats->fv_escape = fv->fv_stats.escape; 900 } else { 901 #endif /* ALTQ_FLOWVALVE */ 902 q_stats->fv_flows = 0; 903 q_stats->fv_pass = 0; 904 q_stats->fv_predrop = 0; 905 q_stats->fv_alloc = 0; 906 q_stats->fv_escape = 0; 907 #ifdef ALTQ_FLOWVALVE 908 } 909 #endif /* ALTQ_FLOWVALVE */ 910 } while (/*CONSTCOND*/ 0); 911 break; 912 913 case RED_CONFIG: 914 do { 915 struct red_conf *fc; 916 red_t *new; 917 int s, limit; 918 919 fc = (struct red_conf *)addr; 920 if ((rqp = altq_lookup(fc->iface.red_ifname, 921 ALTQT_RED)) == NULL) { 922 error = EBADF; 923 break; 924 } 925 new = red_alloc(fc->red_weight, 926 fc->red_inv_pmax, 927 fc->red_thmin, 928 fc->red_thmax, 929 fc->red_flags, 930 fc->red_pkttime); 931 if (new == NULL) { 932 error = ENOMEM; 933 break; 934 } 935 936 s = splnet(); 937 red_purgeq(rqp); 938 limit = fc->red_limit; 939 if (limit < fc->red_thmax) 940 limit = fc->red_thmax; 941 qlimit(rqp->rq_q) = limit; 942 fc->red_limit = limit; /* write back the new value */ 943 944 red_destroy(rqp->rq_red); 945 rqp->rq_red = new; 946 947 splx(s); 948 949 /* write back new values */ 950 fc->red_limit = limit; 951 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 952 fc->red_thmin = rqp->rq_red->red_thmin; 953 fc->red_thmax = rqp->rq_red->red_thmax; 954 955 } while (/*CONSTCOND*/ 0); 956 break; 957 958 case RED_SETDEFAULTS: 959 do { 960 struct redparams *rp; 961 962 rp = (struct redparams *)addr; 963 964 default_th_min = rp->th_min; 965 default_th_max = rp->th_max; 966 default_inv_pmax = rp->inv_pmax; 967 } while (/*CONSTCOND*/ 0); 968 break; 969 970 default: 971 error = EINVAL; 972 break; 973 } 974 return error; 975 } 976 977 static int 978 red_detach(red_queue_t *rqp) 979 { 980 red_queue_t *tmp; 981 int error = 0; 982 983 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 984 altq_disable(rqp->rq_ifq); 985 986 if ((error = altq_detach(rqp->rq_ifq))) 987 return (error); 988 989 if (red_list == rqp) 990 red_list = rqp->rq_next; 991 else { 992 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 993 if (tmp->rq_next == rqp) { 994 tmp->rq_next = rqp->rq_next; 995 break; 996 } 997 if (tmp == NULL) 998 printf("red_detach: no state found in red_list!\n"); 999 } 1000 1001 red_destroy(rqp->rq_red); 1002 free(rqp->rq_q, M_DEVBUF); 1003 free(rqp, M_DEVBUF); 1004 return (error); 1005 } 1006 1007 /* 1008 * enqueue routine: 1009 * 1010 * returns: 0 when successfully queued. 1011 * ENOBUFS when drop occurs. 1012 */ 1013 static int 1014 red_enqueue(struct ifaltq *ifq, struct mbuf *m) 1015 { 1016 struct altq_pktattr pktattr; 1017 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1018 1019 pktattr.pattr_class = m->m_pkthdr.pattr_class; 1020 pktattr.pattr_af = m->m_pkthdr.pattr_af; 1021 pktattr.pattr_hdr = m->m_pkthdr.pattr_hdr; 1022 1023 if (red_addq(rqp->rq_red, rqp->rq_q, m, &pktattr) < 0) 1024 return ENOBUFS; 1025 ifq->ifq_len++; 1026 return 0; 1027 } 1028 1029 /* 1030 * dequeue routine: 1031 * must be called in splnet. 1032 * 1033 * returns: mbuf dequeued. 1034 * NULL when no packet is available in the queue. 1035 */ 1036 1037 static struct mbuf * 1038 red_dequeue(struct ifaltq *ifq, int op) 1039 { 1040 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1041 struct mbuf *m; 1042 1043 if (op == ALTDQ_POLL) 1044 return qhead(rqp->rq_q); 1045 1046 /* op == ALTDQ_REMOVE */ 1047 m = red_getq(rqp->rq_red, rqp->rq_q); 1048 if (m != NULL) 1049 ifq->ifq_len--; 1050 return (m); 1051 } 1052 1053 static int 1054 red_request(struct ifaltq *ifq, int req, void *arg) 1055 { 1056 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1057 1058 switch (req) { 1059 case ALTRQ_PURGE: 1060 red_purgeq(rqp); 1061 break; 1062 } 1063 return (0); 1064 } 1065 1066 static void 1067 red_purgeq(red_queue_t *rqp) 1068 { 1069 _flushq(rqp->rq_q); 1070 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1071 rqp->rq_ifq->ifq_len = 0; 1072 } 1073 1074 #ifdef ALTQ_FLOWVALVE 1075 1076 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1077 #define FV_PSCALE(x) ((x) << FV_PSHIFT) 1078 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1079 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1080 #define FV_FSCALE(x) ((x) << FV_FSHIFT) 1081 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1082 1083 #define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1084 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1085 1086 #define FV_N 10 /* update fve_f every FV_N packets */ 1087 1088 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1089 #define FV_TTHRESH 3 /* time threshold to delete fve */ 1090 #define FV_ALPHA 5 /* extra packet count */ 1091 1092 #define FV_STATS 1093 1094 #define FV_TIMESTAMP(tp) getmicrotime(tp) 1095 1096 /* 1097 * Brtt table: 127 entry table to convert drop rate (p) to 1098 * the corresponding bandwidth fraction (f) 1099 * the following equation is implemented to use scaled values, 1100 * fve_p and fve_f, in the fixed point format. 1101 * 1102 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1103 * f = Brtt(p) / (max_th + alpha) 1104 */ 1105 #define BRTT_SIZE 128 1106 #define BRTT_SHIFT 12 1107 #define BRTT_MASK 0x0007f000 1108 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1109 1110 const int brtt_tab[BRTT_SIZE] = { 1111 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1112 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1113 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1114 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1115 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1116 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1117 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1118 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1119 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1120 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1121 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1122 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1123 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1124 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1125 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1126 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1127 }; 1128 1129 static inline struct fve * 1130 flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr, 1131 struct timeval *now) 1132 { 1133 struct fve *fve; 1134 int flows; 1135 struct ip *ip; 1136 #ifdef INET6 1137 struct ip6_hdr *ip6; 1138 #endif 1139 struct timeval tthresh; 1140 1141 if (pktattr == NULL) 1142 return (NULL); 1143 1144 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1145 flows = 0; 1146 /* 1147 * search the flow list 1148 */ 1149 switch (pktattr->pattr_af) { 1150 case AF_INET: 1151 ip = (struct ip *)pktattr->pattr_hdr; 1152 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1153 if (fve->fve_lastdrop.tv_sec == 0) 1154 break; 1155 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1156 fve->fve_lastdrop.tv_sec = 0; 1157 break; 1158 } 1159 if (fve->fve_flow.flow_af == AF_INET && 1160 fve->fve_flow.flow_ip.ip_src.s_addr == 1161 ip->ip_src.s_addr && 1162 fve->fve_flow.flow_ip.ip_dst.s_addr == 1163 ip->ip_dst.s_addr) 1164 return (fve); 1165 flows++; 1166 } 1167 break; 1168 #ifdef INET6 1169 case AF_INET6: 1170 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1171 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1172 if (fve->fve_lastdrop.tv_sec == 0) 1173 break; 1174 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1175 fve->fve_lastdrop.tv_sec = 0; 1176 break; 1177 } 1178 if (fve->fve_flow.flow_af == AF_INET6 && 1179 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1180 &ip6->ip6_src) && 1181 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1182 &ip6->ip6_dst)) 1183 return (fve); 1184 flows++; 1185 } 1186 break; 1187 #endif /* INET6 */ 1188 1189 default: 1190 /* unknown protocol. no drop. */ 1191 return (NULL); 1192 } 1193 fv->fv_flows = flows; /* save the number of active fve's */ 1194 return (NULL); 1195 } 1196 1197 static inline struct fve * 1198 flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr) 1199 { 1200 struct fve *fve; 1201 struct ip *ip; 1202 #ifdef INET6 1203 struct ip6_hdr *ip6; 1204 #endif 1205 1206 /* 1207 * get an entry from the tail of the LRU list. 1208 */ 1209 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1210 1211 switch (pktattr->pattr_af) { 1212 case AF_INET: 1213 ip = (struct ip *)pktattr->pattr_hdr; 1214 fve->fve_flow.flow_af = AF_INET; 1215 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1216 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1217 break; 1218 #ifdef INET6 1219 case AF_INET6: 1220 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1221 fve->fve_flow.flow_af = AF_INET6; 1222 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1223 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1224 break; 1225 #endif 1226 } 1227 1228 fve->fve_state = Green; 1229 fve->fve_p = 0.0; 1230 fve->fve_f = 0.0; 1231 fve->fve_ifseq = fv->fv_ifseq - 1; 1232 fve->fve_count = 0; 1233 1234 fv->fv_flows++; 1235 #ifdef FV_STATS 1236 fv->fv_stats.alloc++; 1237 #endif 1238 return (fve); 1239 } 1240 1241 static inline void 1242 flowlist_move_to_head(struct flowvalve *fv, struct fve *fve) 1243 { 1244 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1245 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1246 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1247 } 1248 } 1249 1250 /* 1251 * allocate flowvalve structure 1252 */ 1253 static struct flowvalve * 1254 fv_alloc(struct red *rp) 1255 { 1256 struct flowvalve *fv; 1257 struct fve *fve; 1258 int i, num; 1259 1260 num = FV_FLOWLISTSIZE; 1261 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO); 1262 if (fv == NULL) 1263 return (NULL); 1264 1265 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF, 1266 M_WAITOK|M_ZERO); 1267 if (fv->fv_fves == NULL) { 1268 free(fv, M_DEVBUF); 1269 return (NULL); 1270 } 1271 1272 fv->fv_flows = 0; 1273 TAILQ_INIT(&fv->fv_flowlist); 1274 for (i = 0; i < num; i++) { 1275 fve = &fv->fv_fves[i]; 1276 fve->fve_lastdrop.tv_sec = 0; 1277 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1278 } 1279 1280 /* initialize drop rate threshold in scaled fixed-point */ 1281 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1282 1283 /* initialize drop rate to fraction table */ 1284 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK); 1285 if (fv->fv_p2ftab == NULL) { 1286 free(fv->fv_fves, M_DEVBUF); 1287 free(fv, M_DEVBUF); 1288 return (NULL); 1289 } 1290 /* 1291 * create the p2f table. 1292 * (shift is used to keep the precision) 1293 */ 1294 for (i = 1; i < BRTT_SIZE; i++) { 1295 int f; 1296 1297 f = brtt_tab[i] << 8; 1298 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1299 } 1300 1301 return (fv); 1302 } 1303 1304 static void 1305 fv_destroy(struct flowvalve *fv) 1306 { 1307 free(fv->fv_p2ftab, M_DEVBUF); 1308 free(fv->fv_fves, M_DEVBUF); 1309 free(fv, M_DEVBUF); 1310 } 1311 1312 static inline int 1313 fv_p2f(struct flowvalve *fv, int p) 1314 { 1315 int val, f; 1316 1317 if (p >= BRTT_PMAX) 1318 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1319 else if ((val = (p & BRTT_MASK))) 1320 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1321 else 1322 f = fv->fv_p2ftab[1]; 1323 return (f); 1324 } 1325 1326 /* 1327 * check if an arriving packet should be pre-dropped. 1328 * called from red_addq() when a packet arrives. 1329 * returns 1 when the packet should be pre-dropped. 1330 * should be called in splnet. 1331 */ 1332 static int 1333 fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr, 1334 struct fve **fcache) 1335 { 1336 struct fve *fve; 1337 struct timeval now; 1338 1339 fv->fv_ifseq++; 1340 FV_TIMESTAMP(&now); 1341 1342 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1343 /* no matching entry in the flowlist */ 1344 return (0); 1345 1346 *fcache = fve; 1347 1348 /* update fraction f for every FV_N packets */ 1349 if (++fve->fve_count == FV_N) { 1350 /* 1351 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1352 */ 1353 fve->fve_f = 1354 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1355 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1356 fve->fve_ifseq = fv->fv_ifseq; 1357 fve->fve_count = 0; 1358 } 1359 1360 /* 1361 * overpumping test 1362 */ 1363 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1364 int fthresh; 1365 1366 /* calculate a threshold */ 1367 fthresh = fv_p2f(fv, fve->fve_p); 1368 if (fve->fve_f > fthresh) 1369 fve->fve_state = Red; 1370 } 1371 1372 if (fve->fve_state == Red) { 1373 /* 1374 * backoff test 1375 */ 1376 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1377 /* no drop for at least FV_BACKOFFTHRESH sec */ 1378 fve->fve_p = 0; 1379 fve->fve_state = Green; 1380 #ifdef FV_STATS 1381 fv->fv_stats.escape++; 1382 #endif 1383 } else { 1384 /* block this flow */ 1385 flowlist_move_to_head(fv, fve); 1386 fve->fve_lastdrop = now; 1387 #ifdef FV_STATS 1388 fv->fv_stats.predrop++; 1389 #endif 1390 return (1); 1391 } 1392 } 1393 1394 /* 1395 * p = (1 - Wp) * p 1396 */ 1397 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1398 if (fve->fve_p < 0) 1399 fve->fve_p = 0; 1400 #ifdef FV_STATS 1401 fv->fv_stats.pass++; 1402 #endif 1403 return (0); 1404 } 1405 1406 /* 1407 * called from red_addq when a packet is dropped by red. 1408 * should be called in splnet. 1409 */ 1410 static void 1411 fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr, 1412 struct fve *fcache) 1413 { 1414 struct fve *fve; 1415 struct timeval now; 1416 1417 if (pktattr == NULL) 1418 return; 1419 FV_TIMESTAMP(&now); 1420 1421 if (fcache != NULL) 1422 /* the fve of this packet is already cached */ 1423 fve = fcache; 1424 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1425 fve = flowlist_reclaim(fv, pktattr); 1426 1427 flowlist_move_to_head(fv, fve); 1428 1429 /* 1430 * update p: the following line cancels the update 1431 * in fv_checkflow() and calculate 1432 * p = Wp + (1 - Wp) * p 1433 */ 1434 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1435 1436 fve->fve_lastdrop = now; 1437 } 1438 1439 #endif /* ALTQ_FLOWVALVE */ 1440 1441 #ifdef KLD_MODULE 1442 1443 static struct altqsw red_sw = 1444 {"red", redopen, redclose, redioctl}; 1445 1446 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1447 MODULE_VERSION(altq_red, 1); 1448 1449 #endif /* KLD_MODULE */ 1450 #endif /* ALTQ3_COMPAT */ 1451 1452 #endif /* ALTQ_RED */ 1453