1 /* $NetBSD: altq_red.c,v 1.5 2001/11/12 23:14:22 lukem Exp $ */ 2 /* $KAME: altq_red.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2000 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 /* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.5 2001/11/12 23:14:22 lukem Exp $"); 65 66 #if defined(__FreeBSD__) || defined(__NetBSD__) 67 #include "opt_altq.h" 68 #if (__FreeBSD__ != 2) 69 #include "opt_inet.h" 70 #ifdef __FreeBSD__ 71 #include "opt_inet6.h" 72 #endif 73 #endif 74 #endif /* __FreeBSD__ || __NetBSD__ */ 75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 76 77 #include <sys/param.h> 78 #include <sys/malloc.h> 79 #include <sys/mbuf.h> 80 #include <sys/socket.h> 81 #include <sys/sockio.h> 82 #include <sys/systm.h> 83 #include <sys/proc.h> 84 #include <sys/errno.h> 85 #include <sys/kernel.h> 86 #ifdef ALTQ_FLOWVALVE 87 #include <sys/queue.h> 88 #include <sys/time.h> 89 #endif 90 91 #include <net/if.h> 92 #include <net/if_types.h> 93 94 #include <netinet/in.h> 95 #include <netinet/in_systm.h> 96 #include <netinet/ip.h> 97 #ifdef INET6 98 #include <netinet/ip6.h> 99 #endif 100 101 #include <altq/altq.h> 102 #include <altq/altq_conf.h> 103 #include <altq/altq_red.h> 104 #ifdef ALTQ_FLOWVALVE 105 #include <altq/altq_flowvalve.h> 106 #endif 107 108 /* 109 * ALTQ/RED (Random Early Detection) implementation using 32-bit 110 * fixed-point calculation. 111 * 112 * written by kjc using the ns code as a reference. 113 * you can learn more about red and ns from Sally's home page at 114 * http://www-nrg.ee.lbl.gov/floyd/ 115 * 116 * most of the red parameter values are fixed in this implementation 117 * to prevent fixed-point overflow/underflow. 118 * if you change the parameters, watch out for overflow/underflow! 119 * 120 * the parameters used are recommended values by Sally. 121 * the corresponding ns config looks: 122 * q_weight=0.00195 123 * minthresh=5 maxthresh=15 queue-size=60 124 * linterm=30 125 * dropmech=drop-tail 126 * bytes=false (can't be handled by 32-bit fixed-point) 127 * doubleq=false dqthresh=false 128 * wait=true 129 */ 130 /* 131 * alternative red parameters for a slow link. 132 * 133 * assume the queue length becomes from zero to L and keeps L, it takes 134 * N packets for q_avg to reach 63% of L. 135 * when q_weight is 0.002, N is about 500 packets. 136 * for a slow link like dial-up, 500 packets takes more than 1 minute! 137 * when q_weight is 0.008, N is about 127 packets. 138 * when q_weight is 0.016, N is about 63 packets. 139 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets 140 * are allowed for 0.016. 141 * see Sally's paper for more details. 142 */ 143 /* normal red parameters */ 144 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 145 /* q_weight = 0.00195 */ 146 147 /* red parameters for a slow link */ 148 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 149 /* q_weight = 0.0078125 */ 150 151 /* red parameters for a very slow link (e.g., dialup) */ 152 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 153 /* q_weight = 0.015625 */ 154 155 /* fixed-point uses 12-bit decimal places */ 156 #define FP_SHIFT 12 /* fixed-point shift */ 157 158 /* red parameters for drop probability */ 159 #define INV_P_MAX 10 /* inverse of max drop probability */ 160 #define TH_MIN 5 /* min threshold */ 161 #define TH_MAX 15 /* max threshold */ 162 163 #define RED_LIMIT 60 /* default max queue lenght */ 164 #define RED_STATS /* collect statistics */ 165 166 /* 167 * our default policy for forced-drop is drop-tail. 168 * (in altq-1.1.2 or earlier, the default was random-drop. 169 * but it makes more sense to punish the cause of the surge.) 170 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 171 */ 172 173 #ifdef ALTQ_FLOWVALVE 174 /* 175 * flow-valve is an extension to protect red from unresponsive flows 176 * and to promote end-to-end congestion control. 177 * flow-valve observes the average drop rates of the flows that have 178 * experienced packet drops in the recent past. 179 * when the average drop rate exceeds the threshold, the flow is 180 * blocked by the flow-valve. the trapped flow should back off 181 * exponentially to escape from the flow-valve. 182 */ 183 #ifdef RED_RANDOM_DROP 184 #error "random-drop can't be used with flow-valve!" 185 #endif 186 #endif /* ALTQ_FLOWVALVE */ 187 188 /* red_list keeps all red_queue_t's allocated. */ 189 static red_queue_t *red_list = NULL; 190 191 /* default red parameter values */ 192 static int default_th_min = TH_MIN; 193 static int default_th_max = TH_MAX; 194 static int default_inv_pmax = INV_P_MAX; 195 196 /* internal function prototypes */ 197 static int red_enqueue __P((struct ifaltq *, struct mbuf *, 198 struct altq_pktattr *)); 199 static struct mbuf *red_dequeue __P((struct ifaltq *, int)); 200 static int red_request __P((struct ifaltq *, int, void *)); 201 static void red_purgeq __P((red_queue_t *)); 202 static int red_detach __P((red_queue_t *)); 203 #ifdef ALTQ_FLOWVALVE 204 static __inline struct fve *flowlist_lookup __P((struct flowvalve *, 205 struct altq_pktattr *, struct timeval *)); 206 static __inline struct fve *flowlist_reclaim __P((struct flowvalve *, 207 struct altq_pktattr *)); 208 static __inline void flowlist_move_to_head __P((struct flowvalve *, 209 struct fve *)); 210 static __inline int fv_p2f __P((struct flowvalve *, int)); 211 static struct flowvalve *fv_alloc __P((struct red *)); 212 static void fv_destroy __P((struct flowvalve *)); 213 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *, 214 struct fve **)); 215 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *, 216 struct fve *)); 217 #endif 218 219 /* 220 * red device interface 221 */ 222 altqdev_decl(red); 223 224 int 225 redopen(dev, flag, fmt, p) 226 dev_t dev; 227 int flag, fmt; 228 struct proc *p; 229 { 230 /* everything will be done when the queueing scheme is attached. */ 231 return 0; 232 } 233 234 int 235 redclose(dev, flag, fmt, p) 236 dev_t dev; 237 int flag, fmt; 238 struct proc *p; 239 { 240 red_queue_t *rqp; 241 int err, error = 0; 242 243 while ((rqp = red_list) != NULL) { 244 /* destroy all */ 245 err = red_detach(rqp); 246 if (err != 0 && error == 0) 247 error = err; 248 } 249 250 return error; 251 } 252 253 int 254 redioctl(dev, cmd, addr, flag, p) 255 dev_t dev; 256 ioctlcmd_t cmd; 257 caddr_t addr; 258 int flag; 259 struct proc *p; 260 { 261 red_queue_t *rqp; 262 struct red_interface *ifacep; 263 struct ifnet *ifp; 264 int error = 0; 265 266 /* check super-user privilege */ 267 switch (cmd) { 268 case RED_GETSTATS: 269 break; 270 default: 271 #if (__FreeBSD_version > 400000) 272 if ((error = suser(p)) != 0) 273 #else 274 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 275 #endif 276 return (error); 277 break; 278 } 279 280 switch (cmd) { 281 282 case RED_ENABLE: 283 ifacep = (struct red_interface *)addr; 284 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 285 error = EBADF; 286 break; 287 } 288 error = altq_enable(rqp->rq_ifq); 289 break; 290 291 case RED_DISABLE: 292 ifacep = (struct red_interface *)addr; 293 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 294 error = EBADF; 295 break; 296 } 297 error = altq_disable(rqp->rq_ifq); 298 break; 299 300 case RED_IF_ATTACH: 301 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 302 if (ifp == NULL) { 303 error = ENXIO; 304 break; 305 } 306 307 /* allocate and initialize red_queue_t */ 308 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK); 309 if (rqp == NULL) { 310 error = ENOMEM; 311 break; 312 } 313 bzero(rqp, sizeof(red_queue_t)); 314 315 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t), 316 M_DEVBUF, M_WAITOK); 317 if (rqp->rq_q == NULL) { 318 FREE(rqp, M_DEVBUF); 319 error = ENOMEM; 320 break; 321 } 322 bzero(rqp->rq_q, sizeof(class_queue_t)); 323 324 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 325 if (rqp->rq_red == NULL) { 326 FREE(rqp->rq_q, M_DEVBUF); 327 FREE(rqp, M_DEVBUF); 328 error = ENOMEM; 329 break; 330 } 331 332 rqp->rq_ifq = &ifp->if_snd; 333 qtail(rqp->rq_q) = NULL; 334 qlen(rqp->rq_q) = 0; 335 qlimit(rqp->rq_q) = RED_LIMIT; 336 qtype(rqp->rq_q) = Q_RED; 337 338 /* 339 * set RED to this ifnet structure. 340 */ 341 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 342 red_enqueue, red_dequeue, red_request, 343 NULL, NULL); 344 if (error) { 345 red_destroy(rqp->rq_red); 346 FREE(rqp->rq_q, M_DEVBUF); 347 FREE(rqp, M_DEVBUF); 348 break; 349 } 350 351 /* add this state to the red list */ 352 rqp->rq_next = red_list; 353 red_list = rqp; 354 break; 355 356 case RED_IF_DETACH: 357 ifacep = (struct red_interface *)addr; 358 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 359 error = EBADF; 360 break; 361 } 362 error = red_detach(rqp); 363 break; 364 365 case RED_GETSTATS: 366 do { 367 struct red_stats *q_stats; 368 red_t *rp; 369 370 q_stats = (struct red_stats *)addr; 371 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 372 ALTQT_RED)) == NULL) { 373 error = EBADF; 374 break; 375 } 376 377 q_stats->q_len = qlen(rqp->rq_q); 378 q_stats->q_limit = qlimit(rqp->rq_q); 379 380 rp = rqp->rq_red; 381 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 382 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 383 q_stats->drop_cnt = rp->red_stats.drop_cnt; 384 q_stats->drop_forced = rp->red_stats.drop_forced; 385 q_stats->drop_unforced = rp->red_stats.drop_unforced; 386 q_stats->marked_packets = rp->red_stats.marked_packets; 387 388 q_stats->weight = rp->red_weight; 389 q_stats->inv_pmax = rp->red_inv_pmax; 390 q_stats->th_min = rp->red_thmin; 391 q_stats->th_max = rp->red_thmax; 392 393 #ifdef ALTQ_FLOWVALVE 394 if (rp->red_flowvalve != NULL) { 395 struct flowvalve *fv = rp->red_flowvalve; 396 q_stats->fv_flows = fv->fv_flows; 397 q_stats->fv_pass = fv->fv_stats.pass; 398 q_stats->fv_predrop = fv->fv_stats.predrop; 399 q_stats->fv_alloc = fv->fv_stats.alloc; 400 q_stats->fv_escape = fv->fv_stats.escape; 401 } else { 402 #endif /* ALTQ_FLOWVALVE */ 403 q_stats->fv_flows = 0; 404 q_stats->fv_pass = 0; 405 q_stats->fv_predrop = 0; 406 q_stats->fv_alloc = 0; 407 q_stats->fv_escape = 0; 408 #ifdef ALTQ_FLOWVALVE 409 } 410 #endif /* ALTQ_FLOWVALVE */ 411 } while (0); 412 break; 413 414 case RED_CONFIG: 415 do { 416 struct red_conf *fc; 417 red_t *new; 418 int s, limit; 419 420 fc = (struct red_conf *)addr; 421 if ((rqp = altq_lookup(fc->iface.red_ifname, 422 ALTQT_RED)) == NULL) { 423 error = EBADF; 424 break; 425 } 426 new = red_alloc(fc->red_weight, 427 fc->red_inv_pmax, 428 fc->red_thmin, 429 fc->red_thmax, 430 fc->red_flags, 431 fc->red_pkttime); 432 if (new == NULL) { 433 error = ENOMEM; 434 break; 435 } 436 437 s = splnet(); 438 red_purgeq(rqp); 439 limit = fc->red_limit; 440 if (limit < fc->red_thmax) 441 limit = fc->red_thmax; 442 qlimit(rqp->rq_q) = limit; 443 fc->red_limit = limit; /* write back the new value */ 444 445 red_destroy(rqp->rq_red); 446 rqp->rq_red = new; 447 448 splx(s); 449 450 /* write back new values */ 451 fc->red_limit = limit; 452 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 453 fc->red_thmin = rqp->rq_red->red_thmin; 454 fc->red_thmax = rqp->rq_red->red_thmax; 455 456 } while (0); 457 break; 458 459 case RED_SETDEFAULTS: 460 do { 461 struct redparams *rp; 462 463 rp = (struct redparams *)addr; 464 465 default_th_min = rp->th_min; 466 default_th_max = rp->th_max; 467 default_inv_pmax = rp->inv_pmax; 468 } while (0); 469 break; 470 471 default: 472 error = EINVAL; 473 break; 474 } 475 return error; 476 } 477 478 static int 479 red_detach(rqp) 480 red_queue_t *rqp; 481 { 482 red_queue_t *tmp; 483 int error = 0; 484 485 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 486 altq_disable(rqp->rq_ifq); 487 488 if ((error = altq_detach(rqp->rq_ifq))) 489 return (error); 490 491 if (red_list == rqp) 492 red_list = rqp->rq_next; 493 else { 494 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 495 if (tmp->rq_next == rqp) { 496 tmp->rq_next = rqp->rq_next; 497 break; 498 } 499 if (tmp == NULL) 500 printf("red_detach: no state found in red_list!\n"); 501 } 502 503 red_destroy(rqp->rq_red); 504 FREE(rqp->rq_q, M_DEVBUF); 505 FREE(rqp, M_DEVBUF); 506 return (error); 507 } 508 509 /* 510 * red support routines 511 */ 512 513 red_t * 514 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime) 515 int weight, inv_pmax, th_min, th_max; 516 int flags, pkttime; 517 { 518 red_t *rp; 519 int w, i; 520 int npkts_per_sec; 521 522 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK); 523 if (rp == NULL) 524 return (NULL); 525 bzero(rp, sizeof(red_t)); 526 527 rp->red_avg = 0; 528 rp->red_idle = 1; 529 530 if (weight == 0) 531 rp->red_weight = W_WEIGHT; 532 else 533 rp->red_weight = weight; 534 if (inv_pmax == 0) 535 rp->red_inv_pmax = default_inv_pmax; 536 else 537 rp->red_inv_pmax = inv_pmax; 538 if (th_min == 0) 539 rp->red_thmin = default_th_min; 540 else 541 rp->red_thmin = th_min; 542 if (th_max == 0) 543 rp->red_thmax = default_th_max; 544 else 545 rp->red_thmax = th_max; 546 547 rp->red_flags = flags; 548 549 if (pkttime == 0) 550 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 551 rp->red_pkttime = 800; 552 else 553 rp->red_pkttime = pkttime; 554 555 if (weight == 0) { 556 /* when the link is very slow, adjust red parameters */ 557 npkts_per_sec = 1000000 / rp->red_pkttime; 558 if (npkts_per_sec < 50) { 559 /* up to about 400Kbps */ 560 rp->red_weight = W_WEIGHT_2; 561 } else if (npkts_per_sec < 300) { 562 /* up to about 2.4Mbps */ 563 rp->red_weight = W_WEIGHT_1; 564 } 565 } 566 567 /* calculate wshift. weight must be power of 2 */ 568 w = rp->red_weight; 569 for (i = 0; w > 1; i++) 570 w = w >> 1; 571 rp->red_wshift = i; 572 w = 1 << rp->red_wshift; 573 if (w != rp->red_weight) { 574 printf("invalid weight value %d for red! use %d\n", 575 rp->red_weight, w); 576 rp->red_weight = w; 577 } 578 579 /* 580 * thmin_s and thmax_s are scaled versions of th_min and th_max 581 * to be compared with avg. 582 */ 583 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 584 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 585 586 /* 587 * precompute probability denominator 588 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 589 */ 590 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 591 * rp->red_inv_pmax) << FP_SHIFT; 592 593 /* allocate weight table */ 594 rp->red_wtab = wtab_alloc(rp->red_weight); 595 596 microtime(&rp->red_last); 597 #ifdef ALTQ_FLOWVALVE 598 if (flags & REDF_FLOWVALVE) 599 rp->red_flowvalve = fv_alloc(rp); 600 /* if fv_alloc failes, flowvalve is just disabled */ 601 #endif 602 return (rp); 603 } 604 605 void 606 red_destroy(rp) 607 red_t *rp; 608 { 609 #ifdef ALTQ_FLOWVALVE 610 if (rp->red_flowvalve != NULL) 611 fv_destroy(rp->red_flowvalve); 612 #endif 613 wtab_destroy(rp->red_wtab); 614 FREE(rp, M_DEVBUF); 615 } 616 617 void 618 red_getstats(rp, sp) 619 red_t *rp; 620 struct redstats *sp; 621 { 622 sp->q_avg = rp->red_avg >> rp->red_wshift; 623 sp->xmit_cnt = rp->red_stats.xmit_cnt; 624 sp->drop_cnt = rp->red_stats.drop_cnt; 625 sp->drop_forced = rp->red_stats.drop_forced; 626 sp->drop_unforced = rp->red_stats.drop_unforced; 627 sp->marked_packets = rp->red_stats.marked_packets; 628 } 629 630 /* 631 * enqueue routine: 632 * 633 * returns: 0 when successfully queued. 634 * ENOBUFS when drop occurs. 635 */ 636 static int 637 red_enqueue(ifq, m, pktattr) 638 struct ifaltq *ifq; 639 struct mbuf *m; 640 struct altq_pktattr *pktattr; 641 { 642 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 643 644 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 645 return ENOBUFS; 646 ifq->ifq_len++; 647 return 0; 648 } 649 650 int 651 red_addq(rp, q, m, pktattr) 652 red_t *rp; 653 class_queue_t *q; 654 struct mbuf *m; 655 struct altq_pktattr *pktattr; 656 { 657 int avg, droptype; 658 int n; 659 #ifdef ALTQ_FLOWVALVE 660 struct fve *fve = NULL; 661 662 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 663 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 664 m_freem(m); 665 return (-1); 666 } 667 #endif 668 669 avg = rp->red_avg; 670 671 /* 672 * if we were idle, we pretend that n packets arrived during 673 * the idle period. 674 */ 675 if (rp->red_idle) { 676 struct timeval now; 677 int t; 678 679 rp->red_idle = 0; 680 microtime(&now); 681 t = (now.tv_sec - rp->red_last.tv_sec); 682 if (t > 60) { 683 /* 684 * being idle for more than 1 minute, set avg to zero. 685 * this prevents t from overflow. 686 */ 687 avg = 0; 688 } else { 689 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 690 n = t / rp->red_pkttime - 1; 691 692 /* the following line does (avg = (1 - Wq)^n * avg) */ 693 if (n > 0) 694 avg = (avg >> FP_SHIFT) * 695 pow_w(rp->red_wtab, n); 696 } 697 } 698 699 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 700 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 701 rp->red_avg = avg; /* save the new value */ 702 703 /* 704 * red_count keeps a tally of arriving traffic that has not 705 * been dropped. 706 */ 707 rp->red_count++; 708 709 /* see if we drop early */ 710 droptype = DTYPE_NODROP; 711 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 712 if (avg >= rp->red_thmax_s) { 713 /* avg >= th_max: forced drop */ 714 droptype = DTYPE_FORCED; 715 } else if (rp->red_old == 0) { 716 /* first exceeds th_min */ 717 rp->red_count = 1; 718 rp->red_old = 1; 719 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 720 rp->red_probd, rp->red_count)) { 721 /* mark or drop by red */ 722 if ((rp->red_flags & REDF_ECN) && 723 mark_ecn(m, pktattr, rp->red_flags)) { 724 /* successfully marked. do not drop. */ 725 rp->red_count = 0; 726 #ifdef RED_STATS 727 rp->red_stats.marked_packets++; 728 #endif 729 } else { 730 /* unforced drop by red */ 731 droptype = DTYPE_EARLY; 732 } 733 } 734 } else { 735 /* avg < th_min */ 736 rp->red_old = 0; 737 } 738 739 /* 740 * if the queue length hits the hard limit, it's a forced drop. 741 */ 742 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 743 droptype = DTYPE_FORCED; 744 745 #ifdef RED_RANDOM_DROP 746 /* if successful or forced drop, enqueue this packet. */ 747 if (droptype != DTYPE_EARLY) 748 _addq(q, m); 749 #else 750 /* if successful, enqueue this packet. */ 751 if (droptype == DTYPE_NODROP) 752 _addq(q, m); 753 #endif 754 if (droptype != DTYPE_NODROP) { 755 if (droptype == DTYPE_EARLY) { 756 /* drop the incoming packet */ 757 #ifdef RED_STATS 758 rp->red_stats.drop_unforced++; 759 #endif 760 } else { 761 /* forced drop, select a victim packet in the queue. */ 762 #ifdef RED_RANDOM_DROP 763 m = _getq_random(q); 764 #endif 765 #ifdef RED_STATS 766 rp->red_stats.drop_forced++; 767 #endif 768 } 769 #ifdef RED_STATS 770 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 771 #endif 772 rp->red_count = 0; 773 #ifdef ALTQ_FLOWVALVE 774 if (rp->red_flowvalve != NULL) 775 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 776 #endif 777 m_freem(m); 778 return (-1); 779 } 780 /* successfully queued */ 781 #ifdef RED_STATS 782 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 783 #endif 784 return (0); 785 } 786 787 /* 788 * early-drop probability is calculated as follows: 789 * prob = p_max * (avg - th_min) / (th_max - th_min) 790 * prob_a = prob / (2 - count*prob) 791 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 792 * here prob_a increases as successive undrop count increases. 793 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 794 * becomes 1 when (count >= (2 / prob))). 795 */ 796 int 797 drop_early(fp_len, fp_probd, count) 798 int fp_len; /* (avg - TH_MIN) in fixed-point */ 799 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ 800 int count; /* how many successive undropped packets */ 801 { 802 int d; /* denominator of drop-probability */ 803 804 d = fp_probd - count * fp_len; 805 if (d <= 0) 806 /* count exceeds the hard limit: drop or mark */ 807 return (1); 808 809 /* 810 * now the range of d is [1..600] in fixed-point. (when 811 * th_max-th_min=10 and p_max=1/30) 812 * drop probability = (avg - TH_MIN) / d 813 */ 814 815 if ((random() % d) < fp_len) { 816 /* drop or mark */ 817 return (1); 818 } 819 /* no drop/mark */ 820 return (0); 821 } 822 823 /* 824 * try to mark CE bit to the packet. 825 * returns 1 if successfully marked, 0 otherwise. 826 */ 827 int 828 mark_ecn(m, pktattr, flags) 829 struct mbuf *m; 830 struct altq_pktattr *pktattr; 831 int flags; 832 { 833 struct mbuf *m0; 834 835 if (pktattr == NULL || 836 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 837 return (0); 838 839 /* verify that pattr_hdr is within the mbuf data */ 840 for (m0 = m; m0 != NULL; m0 = m0->m_next) 841 if ((pktattr->pattr_hdr >= m0->m_data) && 842 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 843 break; 844 if (m0 == NULL) { 845 /* ick, pattr_hdr is stale */ 846 pktattr->pattr_af = AF_UNSPEC; 847 return (0); 848 } 849 850 switch (pktattr->pattr_af) { 851 case AF_INET: 852 if (flags & REDF_ECN4) { 853 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 854 855 if (ip->ip_v != 4) 856 return (0); /* version mismatch! */ 857 if (ip->ip_tos & IPTOS_ECT) { 858 /* ECN-capable, mark ECN bit. */ 859 if ((ip->ip_tos & IPTOS_CE) == 0) { 860 #if (IPTOS_CE == 0x01) 861 u_short sum; 862 863 ip->ip_tos |= IPTOS_CE; 864 /* 865 * optimized version when IPTOS_CE 866 * is 0x01. 867 * HC' = HC -1 when HC > 0 868 * = 0xfffe when HC = 0 869 */ 870 sum = ntohs(ip->ip_sum); 871 if (sum == 0) 872 sum = 0xfffe; 873 else 874 sum -= 1; 875 ip->ip_sum = htons(sum); 876 #else /* IPTOS_CE != 0x01 */ 877 long sum; 878 879 ip->ip_tos |= IPTOS_CE; 880 /* 881 * update checksum (from RFC1624) 882 * HC' = ~(~HC + ~m + m') 883 */ 884 sum = ~ntohs(ip->ip_sum) & 0xffff; 885 sum += 0xffff + IPTOS_CE; 886 sum = (sum >> 16) + (sum & 0xffff); 887 sum += (sum >> 16); /* add carry */ 888 889 ip->ip_sum = htons(~sum & 0xffff); 890 #endif /* IPTOS_CE != 0x01 */ 891 } 892 return (1); 893 } 894 } 895 break; 896 #ifdef INET6 897 case AF_INET6: 898 if (flags & REDF_ECN6) { 899 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 900 u_int32_t flowlabel; 901 902 flowlabel = ntohl(ip6->ip6_flow); 903 if ((flowlabel >> 28) != 6) 904 return (0); /* version mismatch! */ 905 if (flowlabel & (IPTOS_ECT << 20)) { 906 /* ECN-capable, mark ECN bit. */ 907 flowlabel |= (IPTOS_CE << 20); 908 ip6->ip6_flow = htonl(flowlabel); 909 return (1); 910 } 911 } 912 break; 913 #endif /* INET6 */ 914 } 915 916 /* not marked */ 917 return (0); 918 } 919 920 /* 921 * dequeue routine: 922 * must be called in splnet. 923 * 924 * returns: mbuf dequeued. 925 * NULL when no packet is available in the queue. 926 */ 927 928 static struct mbuf * 929 red_dequeue(ifq, op) 930 struct ifaltq *ifq; 931 int op; 932 { 933 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 934 struct mbuf *m; 935 936 if (op == ALTDQ_POLL) 937 return qhead(rqp->rq_q); 938 939 /* op == ALTDQ_REMOVE */ 940 m = red_getq(rqp->rq_red, rqp->rq_q); 941 if (m != NULL) 942 ifq->ifq_len--; 943 return (m); 944 } 945 946 struct mbuf * 947 red_getq(rp, q) 948 red_t *rp; 949 class_queue_t *q; 950 { 951 struct mbuf *m; 952 953 if ((m = _getq(q)) == NULL) { 954 if (rp->red_idle == 0) { 955 rp->red_idle = 1; 956 microtime(&rp->red_last); 957 } 958 return NULL; 959 } 960 961 rp->red_idle = 0; 962 return (m); 963 } 964 965 static int 966 red_request(ifq, req, arg) 967 struct ifaltq *ifq; 968 int req; 969 void *arg; 970 { 971 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 972 973 switch (req) { 974 case ALTRQ_PURGE: 975 red_purgeq(rqp); 976 break; 977 } 978 return (0); 979 } 980 981 static void 982 red_purgeq(rqp) 983 red_queue_t *rqp; 984 { 985 _flushq(rqp->rq_q); 986 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 987 rqp->rq_ifq->ifq_len = 0; 988 } 989 990 991 /* 992 * helper routine to calibrate avg during idle. 993 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 994 * here Wq = 1/weight and the code assumes Wq is close to zero. 995 * 996 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 997 */ 998 static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 999 1000 struct wtab * 1001 wtab_alloc(weight) 1002 int weight; 1003 { 1004 struct wtab *w; 1005 int i; 1006 1007 for (w = wtab_list; w != NULL; w = w->w_next) 1008 if (w->w_weight == weight) { 1009 w->w_refcount++; 1010 return (w); 1011 } 1012 1013 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK); 1014 if (w == NULL) 1015 panic("wtab_alloc: malloc failed!"); 1016 bzero(w, sizeof(struct wtab)); 1017 w->w_weight = weight; 1018 w->w_refcount = 1; 1019 w->w_next = wtab_list; 1020 wtab_list = w; 1021 1022 /* initialize the weight table */ 1023 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 1024 for (i = 1; i < 32; i++) { 1025 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 1026 if (w->w_tab[i] == 0 && w->w_param_max == 0) 1027 w->w_param_max = 1 << i; 1028 } 1029 1030 return (w); 1031 } 1032 1033 int 1034 wtab_destroy(w) 1035 struct wtab *w; 1036 { 1037 struct wtab *prev; 1038 1039 if (--w->w_refcount > 0) 1040 return (0); 1041 1042 if (wtab_list == w) 1043 wtab_list = w->w_next; 1044 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 1045 if (prev->w_next == w) { 1046 prev->w_next = w->w_next; 1047 break; 1048 } 1049 1050 FREE(w, M_DEVBUF); 1051 return (0); 1052 } 1053 1054 int32_t 1055 pow_w(w, n) 1056 struct wtab *w; 1057 int n; 1058 { 1059 int i, bit; 1060 int32_t val; 1061 1062 if (n >= w->w_param_max) 1063 return (0); 1064 1065 val = 1 << FP_SHIFT; 1066 if (n <= 0) 1067 return (val); 1068 1069 bit = 1; 1070 i = 0; 1071 while (n) { 1072 if (n & bit) { 1073 val = (val * w->w_tab[i]) >> FP_SHIFT; 1074 n &= ~bit; 1075 } 1076 i++; 1077 bit <<= 1; 1078 } 1079 return (val); 1080 } 1081 1082 #ifdef ALTQ_FLOWVALVE 1083 1084 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1085 #define FV_PSCALE(x) ((x) << FV_PSHIFT) 1086 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1087 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1088 #define FV_FSCALE(x) ((x) << FV_FSHIFT) 1089 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1090 1091 #define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1092 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1093 1094 #define FV_N 10 /* update fve_f every FV_N packets */ 1095 1096 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1097 #define FV_TTHRESH 3 /* time threshold to delete fve */ 1098 #define FV_ALPHA 5 /* extra packet count */ 1099 1100 #define FV_STATS 1101 1102 #if (__FreeBSD_version > 300000) 1103 #define FV_TIMESTAMP(tp) getmicrotime(tp) 1104 #else 1105 #define FV_TIMESTAMP(tp) { (*(tp)) = time; } 1106 #endif 1107 1108 /* 1109 * Brtt table: 127 entry table to convert drop rate (p) to 1110 * the corresponding bandwidth fraction (f) 1111 * the following equation is implemented to use scaled values, 1112 * fve_p and fve_f, in the fixed point format. 1113 * 1114 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1115 * f = Brtt(p) / (max_th + alpha) 1116 */ 1117 #define BRTT_SIZE 128 1118 #define BRTT_SHIFT 12 1119 #define BRTT_MASK 0x0007f000 1120 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1121 1122 const int brtt_tab[BRTT_SIZE] = { 1123 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1124 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1125 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1126 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1127 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1128 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1129 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1130 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1131 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1132 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1133 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1134 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1135 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1136 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1137 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1138 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1139 }; 1140 1141 static __inline struct fve * 1142 flowlist_lookup(fv, pktattr, now) 1143 struct flowvalve *fv; 1144 struct altq_pktattr *pktattr; 1145 struct timeval *now; 1146 { 1147 struct fve *fve; 1148 int flows; 1149 struct ip *ip; 1150 #ifdef INET6 1151 struct ip6_hdr *ip6; 1152 #endif 1153 struct timeval tthresh; 1154 1155 if (pktattr == NULL) 1156 return (NULL); 1157 1158 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1159 flows = 0; 1160 /* 1161 * search the flow list 1162 */ 1163 switch (pktattr->pattr_af) { 1164 case AF_INET: 1165 ip = (struct ip *)pktattr->pattr_hdr; 1166 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1167 if (fve->fve_lastdrop.tv_sec == 0) 1168 break; 1169 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1170 fve->fve_lastdrop.tv_sec = 0; 1171 break; 1172 } 1173 if (fve->fve_flow.flow_af == AF_INET && 1174 fve->fve_flow.flow_ip.ip_src.s_addr == 1175 ip->ip_src.s_addr && 1176 fve->fve_flow.flow_ip.ip_dst.s_addr == 1177 ip->ip_dst.s_addr) 1178 return (fve); 1179 flows++; 1180 } 1181 break; 1182 #ifdef INET6 1183 case AF_INET6: 1184 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1185 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1186 if (fve->fve_lastdrop.tv_sec == 0) 1187 break; 1188 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1189 fve->fve_lastdrop.tv_sec = 0; 1190 break; 1191 } 1192 if (fve->fve_flow.flow_af == AF_INET6 && 1193 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1194 &ip6->ip6_src) && 1195 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1196 &ip6->ip6_dst)) 1197 return (fve); 1198 flows++; 1199 } 1200 break; 1201 #endif /* INET6 */ 1202 1203 default: 1204 /* unknown protocol. no drop. */ 1205 return (NULL); 1206 } 1207 fv->fv_flows = flows; /* save the number of active fve's */ 1208 return (NULL); 1209 } 1210 1211 static __inline struct fve * 1212 flowlist_reclaim(fv, pktattr) 1213 struct flowvalve *fv; 1214 struct altq_pktattr *pktattr; 1215 { 1216 struct fve *fve; 1217 struct ip *ip; 1218 #ifdef INET6 1219 struct ip6_hdr *ip6; 1220 #endif 1221 1222 /* 1223 * get an entry from the tail of the LRU list. 1224 */ 1225 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1226 1227 switch (pktattr->pattr_af) { 1228 case AF_INET: 1229 ip = (struct ip *)pktattr->pattr_hdr; 1230 fve->fve_flow.flow_af = AF_INET; 1231 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1232 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1233 break; 1234 #ifdef INET6 1235 case AF_INET6: 1236 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1237 fve->fve_flow.flow_af = AF_INET6; 1238 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1239 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1240 break; 1241 #endif 1242 } 1243 1244 fve->fve_state = Green; 1245 fve->fve_p = 0.0; 1246 fve->fve_f = 0.0; 1247 fve->fve_ifseq = fv->fv_ifseq - 1; 1248 fve->fve_count = 0; 1249 1250 fv->fv_flows++; 1251 #ifdef FV_STATS 1252 fv->fv_stats.alloc++; 1253 #endif 1254 return (fve); 1255 } 1256 1257 static __inline void 1258 flowlist_move_to_head(fv, fve) 1259 struct flowvalve *fv; 1260 struct fve *fve; 1261 { 1262 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1263 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1264 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1265 } 1266 } 1267 1268 /* 1269 * allocate flowvalve structure 1270 */ 1271 static struct flowvalve * 1272 fv_alloc(rp) 1273 struct red *rp; 1274 { 1275 struct flowvalve *fv; 1276 struct fve *fve; 1277 int i, num; 1278 1279 num = FV_FLOWLISTSIZE; 1280 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve), 1281 M_DEVBUF, M_WAITOK); 1282 if (fv == NULL) 1283 return (NULL); 1284 bzero(fv, sizeof(struct flowvalve)); 1285 1286 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num, 1287 M_DEVBUF, M_WAITOK); 1288 if (fv->fv_fves == NULL) { 1289 FREE(fv, M_DEVBUF); 1290 return (NULL); 1291 } 1292 bzero(fv->fv_fves, sizeof(struct fve) * num); 1293 1294 fv->fv_flows = 0; 1295 TAILQ_INIT(&fv->fv_flowlist); 1296 for (i = 0; i < num; i++) { 1297 fve = &fv->fv_fves[i]; 1298 fve->fve_lastdrop.tv_sec = 0; 1299 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1300 } 1301 1302 /* initialize drop rate threshold in scaled fixed-point */ 1303 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1304 1305 /* initialize drop rate to fraction table */ 1306 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE, 1307 M_DEVBUF, M_WAITOK); 1308 if (fv->fv_p2ftab == NULL) { 1309 FREE(fv->fv_fves, M_DEVBUF); 1310 FREE(fv, M_DEVBUF); 1311 return (NULL); 1312 } 1313 /* 1314 * create the p2f table. 1315 * (shift is used to keep the precision) 1316 */ 1317 for (i = 1; i < BRTT_SIZE; i++) { 1318 int f; 1319 1320 f = brtt_tab[i] << 8; 1321 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1322 } 1323 1324 return (fv); 1325 } 1326 1327 static void fv_destroy(fv) 1328 struct flowvalve *fv; 1329 { 1330 FREE(fv->fv_p2ftab, M_DEVBUF); 1331 FREE(fv->fv_fves, M_DEVBUF); 1332 FREE(fv, M_DEVBUF); 1333 } 1334 1335 static __inline int 1336 fv_p2f(fv, p) 1337 struct flowvalve *fv; 1338 int p; 1339 { 1340 int val, f; 1341 1342 if (p >= BRTT_PMAX) 1343 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1344 else if ((val = (p & BRTT_MASK))) 1345 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1346 else 1347 f = fv->fv_p2ftab[1]; 1348 return (f); 1349 } 1350 1351 /* 1352 * check if an arriving packet should be pre-dropped. 1353 * called from red_addq() when a packet arrives. 1354 * returns 1 when the packet should be pre-dropped. 1355 * should be called in splnet. 1356 */ 1357 static int 1358 fv_checkflow(fv, pktattr, fcache) 1359 struct flowvalve *fv; 1360 struct altq_pktattr *pktattr; 1361 struct fve **fcache; 1362 { 1363 struct fve *fve; 1364 struct timeval now; 1365 1366 fv->fv_ifseq++; 1367 FV_TIMESTAMP(&now); 1368 1369 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1370 /* no matching entry in the flowlist */ 1371 return (0); 1372 1373 *fcache = fve; 1374 1375 /* update fraction f for every FV_N packets */ 1376 if (++fve->fve_count == FV_N) { 1377 /* 1378 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1379 */ 1380 fve->fve_f = 1381 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1382 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1383 fve->fve_ifseq = fv->fv_ifseq; 1384 fve->fve_count = 0; 1385 } 1386 1387 /* 1388 * overpumping test 1389 */ 1390 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1391 int fthresh; 1392 1393 /* calculate a threshold */ 1394 fthresh = fv_p2f(fv, fve->fve_p); 1395 if (fve->fve_f > fthresh) 1396 fve->fve_state = Red; 1397 } 1398 1399 if (fve->fve_state == Red) { 1400 /* 1401 * backoff test 1402 */ 1403 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1404 /* no drop for at least FV_BACKOFFTHRESH sec */ 1405 fve->fve_p = 0; 1406 fve->fve_state = Green; 1407 #ifdef FV_STATS 1408 fv->fv_stats.escape++; 1409 #endif 1410 } else { 1411 /* block this flow */ 1412 flowlist_move_to_head(fv, fve); 1413 fve->fve_lastdrop = now; 1414 #ifdef FV_STATS 1415 fv->fv_stats.predrop++; 1416 #endif 1417 return (1); 1418 } 1419 } 1420 1421 /* 1422 * p = (1 - Wp) * p 1423 */ 1424 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1425 if (fve->fve_p < 0) 1426 fve->fve_p = 0; 1427 #ifdef FV_STATS 1428 fv->fv_stats.pass++; 1429 #endif 1430 return (0); 1431 } 1432 1433 /* 1434 * called from red_addq when a packet is dropped by red. 1435 * should be called in splnet. 1436 */ 1437 static void fv_dropbyred(fv, pktattr, fcache) 1438 struct flowvalve *fv; 1439 struct altq_pktattr *pktattr; 1440 struct fve *fcache; 1441 { 1442 struct fve *fve; 1443 struct timeval now; 1444 1445 if (pktattr == NULL) 1446 return; 1447 FV_TIMESTAMP(&now); 1448 1449 if (fcache != NULL) 1450 /* the fve of this packet is already cached */ 1451 fve = fcache; 1452 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1453 fve = flowlist_reclaim(fv, pktattr); 1454 1455 flowlist_move_to_head(fv, fve); 1456 1457 /* 1458 * update p: the following line cancels the update 1459 * in fv_checkflow() and calculate 1460 * p = Wp + (1 - Wp) * p 1461 */ 1462 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1463 1464 fve->fve_lastdrop = now; 1465 } 1466 1467 #endif /* ALTQ_FLOWVALVE */ 1468 1469 #ifdef KLD_MODULE 1470 1471 static struct altqsw red_sw = 1472 {"red", redopen, redclose, redioctl}; 1473 1474 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1475 1476 #endif /* KLD_MODULE */ 1477 1478 #endif /* ALTQ_RED */ 1479