1 /* $NetBSD: altq_red.c,v 1.10 2004/02/13 18:02:05 wiz Exp $ */ 2 /* $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2000 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 /* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.10 2004/02/13 18:02:05 wiz Exp $"); 65 66 #if defined(__FreeBSD__) || defined(__NetBSD__) 67 #include "opt_altq.h" 68 #if (__FreeBSD__ != 2) 69 #include "opt_inet.h" 70 #ifdef __FreeBSD__ 71 #include "opt_inet6.h" 72 #endif 73 #endif 74 #endif /* __FreeBSD__ || __NetBSD__ */ 75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 76 77 #include <sys/param.h> 78 #include <sys/malloc.h> 79 #include <sys/mbuf.h> 80 #include <sys/socket.h> 81 #include <sys/sockio.h> 82 #include <sys/systm.h> 83 #include <sys/proc.h> 84 #include <sys/errno.h> 85 #include <sys/kernel.h> 86 #ifdef ALTQ_FLOWVALVE 87 #include <sys/queue.h> 88 #include <sys/time.h> 89 #endif 90 91 #include <net/if.h> 92 #include <net/if_types.h> 93 94 #include <netinet/in.h> 95 #include <netinet/in_systm.h> 96 #include <netinet/ip.h> 97 #ifdef INET6 98 #include <netinet/ip6.h> 99 #endif 100 101 #include <altq/altq.h> 102 #include <altq/altq_conf.h> 103 #include <altq/altq_red.h> 104 #ifdef ALTQ_FLOWVALVE 105 #include <altq/altq_flowvalve.h> 106 #endif 107 108 /* 109 * ALTQ/RED (Random Early Detection) implementation using 32-bit 110 * fixed-point calculation. 111 * 112 * written by kjc using the ns code as a reference. 113 * you can learn more about red and ns from Sally's home page at 114 * http://www-nrg.ee.lbl.gov/floyd/ 115 * 116 * most of the red parameter values are fixed in this implementation 117 * to prevent fixed-point overflow/underflow. 118 * if you change the parameters, watch out for overflow/underflow! 119 * 120 * the parameters used are recommended values by Sally. 121 * the corresponding ns config looks: 122 * q_weight=0.00195 123 * minthresh=5 maxthresh=15 queue-size=60 124 * linterm=30 125 * dropmech=drop-tail 126 * bytes=false (can't be handled by 32-bit fixed-point) 127 * doubleq=false dqthresh=false 128 * wait=true 129 */ 130 /* 131 * alternative red parameters for a slow link. 132 * 133 * assume the queue length becomes from zero to L and keeps L, it takes 134 * N packets for q_avg to reach 63% of L. 135 * when q_weight is 0.002, N is about 500 packets. 136 * for a slow link like dial-up, 500 packets takes more than 1 minute! 137 * when q_weight is 0.008, N is about 127 packets. 138 * when q_weight is 0.016, N is about 63 packets. 139 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets 140 * are allowed for 0.016. 141 * see Sally's paper for more details. 142 */ 143 /* normal red parameters */ 144 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 145 /* q_weight = 0.00195 */ 146 147 /* red parameters for a slow link */ 148 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 149 /* q_weight = 0.0078125 */ 150 151 /* red parameters for a very slow link (e.g., dialup) */ 152 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 153 /* q_weight = 0.015625 */ 154 155 /* fixed-point uses 12-bit decimal places */ 156 #define FP_SHIFT 12 /* fixed-point shift */ 157 158 /* red parameters for drop probability */ 159 #define INV_P_MAX 10 /* inverse of max drop probability */ 160 #define TH_MIN 5 /* min threshold */ 161 #define TH_MAX 15 /* max threshold */ 162 163 #define RED_LIMIT 60 /* default max queue length */ 164 165 /* 166 * our default policy for forced-drop is drop-tail. 167 * (in altq-1.1.2 or earlier, the default was random-drop. 168 * but it makes more sense to punish the cause of the surge.) 169 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 170 */ 171 172 #ifdef ALTQ_FLOWVALVE 173 /* 174 * flow-valve is an extension to protect red from unresponsive flows 175 * and to promote end-to-end congestion control. 176 * flow-valve observes the average drop rates of the flows that have 177 * experienced packet drops in the recent past. 178 * when the average drop rate exceeds the threshold, the flow is 179 * blocked by the flow-valve. the trapped flow should back off 180 * exponentially to escape from the flow-valve. 181 */ 182 #ifdef RED_RANDOM_DROP 183 #error "random-drop can't be used with flow-valve!" 184 #endif 185 #endif /* ALTQ_FLOWVALVE */ 186 187 /* red_list keeps all red_queue_t's allocated. */ 188 static red_queue_t *red_list = NULL; 189 190 /* default red parameter values */ 191 static int default_th_min = TH_MIN; 192 static int default_th_max = TH_MAX; 193 static int default_inv_pmax = INV_P_MAX; 194 195 /* internal function prototypes */ 196 static int red_enqueue __P((struct ifaltq *, struct mbuf *, 197 struct altq_pktattr *)); 198 static struct mbuf *red_dequeue __P((struct ifaltq *, int)); 199 static int red_request __P((struct ifaltq *, int, void *)); 200 static void red_purgeq __P((red_queue_t *)); 201 static int red_detach __P((red_queue_t *)); 202 #ifdef ALTQ_FLOWVALVE 203 static __inline struct fve *flowlist_lookup __P((struct flowvalve *, 204 struct altq_pktattr *, struct timeval *)); 205 static __inline struct fve *flowlist_reclaim __P((struct flowvalve *, 206 struct altq_pktattr *)); 207 static __inline void flowlist_move_to_head __P((struct flowvalve *, 208 struct fve *)); 209 static __inline int fv_p2f __P((struct flowvalve *, int)); 210 static struct flowvalve *fv_alloc __P((struct red *)); 211 static void fv_destroy __P((struct flowvalve *)); 212 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *, 213 struct fve **)); 214 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *, 215 struct fve *)); 216 #endif 217 218 /* 219 * red device interface 220 */ 221 altqdev_decl(red); 222 223 int 224 redopen(dev, flag, fmt, p) 225 dev_t dev; 226 int flag, fmt; 227 struct proc *p; 228 { 229 /* everything will be done when the queueing scheme is attached. */ 230 return 0; 231 } 232 233 int 234 redclose(dev, flag, fmt, p) 235 dev_t dev; 236 int flag, fmt; 237 struct proc *p; 238 { 239 red_queue_t *rqp; 240 int err, error = 0; 241 242 while ((rqp = red_list) != NULL) { 243 /* destroy all */ 244 err = red_detach(rqp); 245 if (err != 0 && error == 0) 246 error = err; 247 } 248 249 return error; 250 } 251 252 int 253 redioctl(dev, cmd, addr, flag, p) 254 dev_t dev; 255 ioctlcmd_t cmd; 256 caddr_t addr; 257 int flag; 258 struct proc *p; 259 { 260 red_queue_t *rqp; 261 struct red_interface *ifacep; 262 struct ifnet *ifp; 263 int error = 0; 264 265 /* check super-user privilege */ 266 switch (cmd) { 267 case RED_GETSTATS: 268 break; 269 default: 270 #if (__FreeBSD_version > 400000) 271 if ((error = suser(p)) != 0) 272 #else 273 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 274 #endif 275 return (error); 276 break; 277 } 278 279 switch (cmd) { 280 281 case RED_ENABLE: 282 ifacep = (struct red_interface *)addr; 283 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 284 error = EBADF; 285 break; 286 } 287 error = altq_enable(rqp->rq_ifq); 288 break; 289 290 case RED_DISABLE: 291 ifacep = (struct red_interface *)addr; 292 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 293 error = EBADF; 294 break; 295 } 296 error = altq_disable(rqp->rq_ifq); 297 break; 298 299 case RED_IF_ATTACH: 300 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 301 if (ifp == NULL) { 302 error = ENXIO; 303 break; 304 } 305 306 /* allocate and initialize red_queue_t */ 307 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK); 308 if (rqp == NULL) { 309 error = ENOMEM; 310 break; 311 } 312 (void)memset(rqp, 0, sizeof(red_queue_t)); 313 314 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t), 315 M_DEVBUF, M_WAITOK); 316 if (rqp->rq_q == NULL) { 317 FREE(rqp, M_DEVBUF); 318 error = ENOMEM; 319 break; 320 } 321 (void)memset(rqp->rq_q, 0, sizeof(class_queue_t)); 322 323 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 324 if (rqp->rq_red == NULL) { 325 FREE(rqp->rq_q, M_DEVBUF); 326 FREE(rqp, M_DEVBUF); 327 error = ENOMEM; 328 break; 329 } 330 331 rqp->rq_ifq = &ifp->if_snd; 332 qtail(rqp->rq_q) = NULL; 333 qlen(rqp->rq_q) = 0; 334 qlimit(rqp->rq_q) = RED_LIMIT; 335 qtype(rqp->rq_q) = Q_RED; 336 337 /* 338 * set RED to this ifnet structure. 339 */ 340 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 341 red_enqueue, red_dequeue, red_request, 342 NULL, NULL); 343 if (error) { 344 red_destroy(rqp->rq_red); 345 FREE(rqp->rq_q, M_DEVBUF); 346 FREE(rqp, M_DEVBUF); 347 break; 348 } 349 350 /* add this state to the red list */ 351 rqp->rq_next = red_list; 352 red_list = rqp; 353 break; 354 355 case RED_IF_DETACH: 356 ifacep = (struct red_interface *)addr; 357 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 358 error = EBADF; 359 break; 360 } 361 error = red_detach(rqp); 362 break; 363 364 case RED_GETSTATS: 365 do { 366 struct red_stats *q_stats; 367 red_t *rp; 368 369 q_stats = (struct red_stats *)addr; 370 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 371 ALTQT_RED)) == NULL) { 372 error = EBADF; 373 break; 374 } 375 376 q_stats->q_len = qlen(rqp->rq_q); 377 q_stats->q_limit = qlimit(rqp->rq_q); 378 379 rp = rqp->rq_red; 380 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 381 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 382 q_stats->drop_cnt = rp->red_stats.drop_cnt; 383 q_stats->drop_forced = rp->red_stats.drop_forced; 384 q_stats->drop_unforced = rp->red_stats.drop_unforced; 385 q_stats->marked_packets = rp->red_stats.marked_packets; 386 387 q_stats->weight = rp->red_weight; 388 q_stats->inv_pmax = rp->red_inv_pmax; 389 q_stats->th_min = rp->red_thmin; 390 q_stats->th_max = rp->red_thmax; 391 392 #ifdef ALTQ_FLOWVALVE 393 if (rp->red_flowvalve != NULL) { 394 struct flowvalve *fv = rp->red_flowvalve; 395 q_stats->fv_flows = fv->fv_flows; 396 q_stats->fv_pass = fv->fv_stats.pass; 397 q_stats->fv_predrop = fv->fv_stats.predrop; 398 q_stats->fv_alloc = fv->fv_stats.alloc; 399 q_stats->fv_escape = fv->fv_stats.escape; 400 } else { 401 #endif /* ALTQ_FLOWVALVE */ 402 q_stats->fv_flows = 0; 403 q_stats->fv_pass = 0; 404 q_stats->fv_predrop = 0; 405 q_stats->fv_alloc = 0; 406 q_stats->fv_escape = 0; 407 #ifdef ALTQ_FLOWVALVE 408 } 409 #endif /* ALTQ_FLOWVALVE */ 410 } while (0); 411 break; 412 413 case RED_CONFIG: 414 do { 415 struct red_conf *fc; 416 red_t *new; 417 int s, limit; 418 419 fc = (struct red_conf *)addr; 420 if ((rqp = altq_lookup(fc->iface.red_ifname, 421 ALTQT_RED)) == NULL) { 422 error = EBADF; 423 break; 424 } 425 new = red_alloc(fc->red_weight, 426 fc->red_inv_pmax, 427 fc->red_thmin, 428 fc->red_thmax, 429 fc->red_flags, 430 fc->red_pkttime); 431 if (new == NULL) { 432 error = ENOMEM; 433 break; 434 } 435 436 s = splnet(); 437 red_purgeq(rqp); 438 limit = fc->red_limit; 439 if (limit < fc->red_thmax) 440 limit = fc->red_thmax; 441 qlimit(rqp->rq_q) = limit; 442 fc->red_limit = limit; /* write back the new value */ 443 444 red_destroy(rqp->rq_red); 445 rqp->rq_red = new; 446 447 splx(s); 448 449 /* write back new values */ 450 fc->red_limit = limit; 451 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 452 fc->red_thmin = rqp->rq_red->red_thmin; 453 fc->red_thmax = rqp->rq_red->red_thmax; 454 455 } while (0); 456 break; 457 458 case RED_SETDEFAULTS: 459 do { 460 struct redparams *rp; 461 462 rp = (struct redparams *)addr; 463 464 default_th_min = rp->th_min; 465 default_th_max = rp->th_max; 466 default_inv_pmax = rp->inv_pmax; 467 } while (0); 468 break; 469 470 default: 471 error = EINVAL; 472 break; 473 } 474 return error; 475 } 476 477 static int 478 red_detach(rqp) 479 red_queue_t *rqp; 480 { 481 red_queue_t *tmp; 482 int error = 0; 483 484 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 485 altq_disable(rqp->rq_ifq); 486 487 if ((error = altq_detach(rqp->rq_ifq))) 488 return (error); 489 490 if (red_list == rqp) 491 red_list = rqp->rq_next; 492 else { 493 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 494 if (tmp->rq_next == rqp) { 495 tmp->rq_next = rqp->rq_next; 496 break; 497 } 498 if (tmp == NULL) 499 printf("red_detach: no state found in red_list!\n"); 500 } 501 502 red_destroy(rqp->rq_red); 503 FREE(rqp->rq_q, M_DEVBUF); 504 FREE(rqp, M_DEVBUF); 505 return (error); 506 } 507 508 /* 509 * red support routines 510 */ 511 512 red_t * 513 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime) 514 int weight, inv_pmax, th_min, th_max; 515 int flags, pkttime; 516 { 517 red_t *rp; 518 int w, i; 519 int npkts_per_sec; 520 521 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK); 522 if (rp == NULL) 523 return (NULL); 524 (void)memset(rp, 0, sizeof(red_t)); 525 526 rp->red_avg = 0; 527 rp->red_idle = 1; 528 529 if (weight == 0) 530 rp->red_weight = W_WEIGHT; 531 else 532 rp->red_weight = weight; 533 if (inv_pmax == 0) 534 rp->red_inv_pmax = default_inv_pmax; 535 else 536 rp->red_inv_pmax = inv_pmax; 537 if (th_min == 0) 538 rp->red_thmin = default_th_min; 539 else 540 rp->red_thmin = th_min; 541 if (th_max == 0) 542 rp->red_thmax = default_th_max; 543 else 544 rp->red_thmax = th_max; 545 546 rp->red_flags = flags; 547 548 if (pkttime == 0) 549 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 550 rp->red_pkttime = 800; 551 else 552 rp->red_pkttime = pkttime; 553 554 if (weight == 0) { 555 /* when the link is very slow, adjust red parameters */ 556 npkts_per_sec = 1000000 / rp->red_pkttime; 557 if (npkts_per_sec < 50) { 558 /* up to about 400Kbps */ 559 rp->red_weight = W_WEIGHT_2; 560 } else if (npkts_per_sec < 300) { 561 /* up to about 2.4Mbps */ 562 rp->red_weight = W_WEIGHT_1; 563 } 564 } 565 566 /* calculate wshift. weight must be power of 2 */ 567 w = rp->red_weight; 568 for (i = 0; w > 1; i++) 569 w = w >> 1; 570 rp->red_wshift = i; 571 w = 1 << rp->red_wshift; 572 if (w != rp->red_weight) { 573 printf("invalid weight value %d for red! use %d\n", 574 rp->red_weight, w); 575 rp->red_weight = w; 576 } 577 578 /* 579 * thmin_s and thmax_s are scaled versions of th_min and th_max 580 * to be compared with avg. 581 */ 582 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 583 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 584 585 /* 586 * precompute probability denominator 587 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 588 */ 589 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 590 * rp->red_inv_pmax) << FP_SHIFT; 591 592 /* allocate weight table */ 593 rp->red_wtab = wtab_alloc(rp->red_weight); 594 595 microtime(&rp->red_last); 596 #ifdef ALTQ_FLOWVALVE 597 if (flags & REDF_FLOWVALVE) 598 rp->red_flowvalve = fv_alloc(rp); 599 /* if fv_alloc failes, flowvalve is just disabled */ 600 #endif 601 return (rp); 602 } 603 604 void 605 red_destroy(rp) 606 red_t *rp; 607 { 608 #ifdef ALTQ_FLOWVALVE 609 if (rp->red_flowvalve != NULL) 610 fv_destroy(rp->red_flowvalve); 611 #endif 612 wtab_destroy(rp->red_wtab); 613 FREE(rp, M_DEVBUF); 614 } 615 616 void 617 red_getstats(rp, sp) 618 red_t *rp; 619 struct redstats *sp; 620 { 621 sp->q_avg = rp->red_avg >> rp->red_wshift; 622 sp->xmit_cnt = rp->red_stats.xmit_cnt; 623 sp->drop_cnt = rp->red_stats.drop_cnt; 624 sp->drop_forced = rp->red_stats.drop_forced; 625 sp->drop_unforced = rp->red_stats.drop_unforced; 626 sp->marked_packets = rp->red_stats.marked_packets; 627 } 628 629 /* 630 * enqueue routine: 631 * 632 * returns: 0 when successfully queued. 633 * ENOBUFS when drop occurs. 634 */ 635 static int 636 red_enqueue(ifq, m, pktattr) 637 struct ifaltq *ifq; 638 struct mbuf *m; 639 struct altq_pktattr *pktattr; 640 { 641 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 642 643 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 644 return ENOBUFS; 645 ifq->ifq_len++; 646 return 0; 647 } 648 649 int 650 red_addq(rp, q, m, pktattr) 651 red_t *rp; 652 class_queue_t *q; 653 struct mbuf *m; 654 struct altq_pktattr *pktattr; 655 { 656 int avg, droptype; 657 int n; 658 #ifdef ALTQ_FLOWVALVE 659 struct fve *fve = NULL; 660 661 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 662 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 663 m_freem(m); 664 return (-1); 665 } 666 #endif 667 668 avg = rp->red_avg; 669 670 /* 671 * if we were idle, we pretend that n packets arrived during 672 * the idle period. 673 */ 674 if (rp->red_idle) { 675 struct timeval now; 676 int t; 677 678 rp->red_idle = 0; 679 microtime(&now); 680 t = (now.tv_sec - rp->red_last.tv_sec); 681 if (t > 60) { 682 /* 683 * being idle for more than 1 minute, set avg to zero. 684 * this prevents t from overflow. 685 */ 686 avg = 0; 687 } else { 688 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 689 n = t / rp->red_pkttime - 1; 690 691 /* the following line does (avg = (1 - Wq)^n * avg) */ 692 if (n > 0) 693 avg = (avg >> FP_SHIFT) * 694 pow_w(rp->red_wtab, n); 695 } 696 } 697 698 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 699 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 700 rp->red_avg = avg; /* save the new value */ 701 702 /* 703 * red_count keeps a tally of arriving traffic that has not 704 * been dropped. 705 */ 706 rp->red_count++; 707 708 /* see if we drop early */ 709 droptype = DTYPE_NODROP; 710 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 711 if (avg >= rp->red_thmax_s) { 712 /* avg >= th_max: forced drop */ 713 droptype = DTYPE_FORCED; 714 } else if (rp->red_old == 0) { 715 /* first exceeds th_min */ 716 rp->red_count = 1; 717 rp->red_old = 1; 718 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 719 rp->red_probd, rp->red_count)) { 720 /* mark or drop by red */ 721 if ((rp->red_flags & REDF_ECN) && 722 mark_ecn(m, pktattr, rp->red_flags)) { 723 /* successfully marked. do not drop. */ 724 rp->red_count = 0; 725 #ifdef RED_STATS 726 rp->red_stats.marked_packets++; 727 #endif 728 } else { 729 /* unforced drop by red */ 730 droptype = DTYPE_EARLY; 731 } 732 } 733 } else { 734 /* avg < th_min */ 735 rp->red_old = 0; 736 } 737 738 /* 739 * if the queue length hits the hard limit, it's a forced drop. 740 */ 741 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 742 droptype = DTYPE_FORCED; 743 744 #ifdef RED_RANDOM_DROP 745 /* if successful or forced drop, enqueue this packet. */ 746 if (droptype != DTYPE_EARLY) 747 _addq(q, m); 748 #else 749 /* if successful, enqueue this packet. */ 750 if (droptype == DTYPE_NODROP) 751 _addq(q, m); 752 #endif 753 if (droptype != DTYPE_NODROP) { 754 if (droptype == DTYPE_EARLY) { 755 /* drop the incoming packet */ 756 #ifdef RED_STATS 757 rp->red_stats.drop_unforced++; 758 #endif 759 } else { 760 /* forced drop, select a victim packet in the queue. */ 761 #ifdef RED_RANDOM_DROP 762 m = _getq_random(q); 763 #endif 764 #ifdef RED_STATS 765 rp->red_stats.drop_forced++; 766 #endif 767 } 768 #ifdef RED_STATS 769 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 770 #endif 771 rp->red_count = 0; 772 #ifdef ALTQ_FLOWVALVE 773 if (rp->red_flowvalve != NULL) 774 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 775 #endif 776 m_freem(m); 777 return (-1); 778 } 779 /* successfully queued */ 780 #ifdef RED_STATS 781 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 782 #endif 783 return (0); 784 } 785 786 /* 787 * early-drop probability is calculated as follows: 788 * prob = p_max * (avg - th_min) / (th_max - th_min) 789 * prob_a = prob / (2 - count*prob) 790 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 791 * here prob_a increases as successive undrop count increases. 792 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 793 * becomes 1 when (count >= (2 / prob))). 794 */ 795 int 796 drop_early(fp_len, fp_probd, count) 797 int fp_len; /* (avg - TH_MIN) in fixed-point */ 798 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ 799 int count; /* how many successive undropped packets */ 800 { 801 int d; /* denominator of drop-probability */ 802 803 d = fp_probd - count * fp_len; 804 if (d <= 0) 805 /* count exceeds the hard limit: drop or mark */ 806 return (1); 807 808 /* 809 * now the range of d is [1..600] in fixed-point. (when 810 * th_max-th_min=10 and p_max=1/30) 811 * drop probability = (avg - TH_MIN) / d 812 */ 813 814 if ((random() % d) < fp_len) { 815 /* drop or mark */ 816 return (1); 817 } 818 /* no drop/mark */ 819 return (0); 820 } 821 822 /* 823 * try to mark CE bit to the packet. 824 * returns 1 if successfully marked, 0 otherwise. 825 */ 826 int 827 mark_ecn(m, pktattr, flags) 828 struct mbuf *m; 829 struct altq_pktattr *pktattr; 830 int flags; 831 { 832 struct mbuf *m0; 833 834 if (pktattr == NULL || 835 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 836 return (0); 837 838 /* verify that pattr_hdr is within the mbuf data */ 839 for (m0 = m; m0 != NULL; m0 = m0->m_next) 840 if ((pktattr->pattr_hdr >= m0->m_data) && 841 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 842 break; 843 if (m0 == NULL) { 844 /* ick, pattr_hdr is stale */ 845 pktattr->pattr_af = AF_UNSPEC; 846 return (0); 847 } 848 849 switch (pktattr->pattr_af) { 850 case AF_INET: 851 if (flags & REDF_ECN4) { 852 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 853 u_int8_t otos; 854 int sum; 855 856 if (ip->ip_v != 4) 857 return (0); /* version mismatch! */ 858 859 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 860 return (0); /* not-ECT */ 861 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 862 return (1); /* already marked */ 863 864 /* 865 * ecn-capable but not marked, 866 * mark CE and update checksum 867 */ 868 otos = ip->ip_tos; 869 ip->ip_tos |= IPTOS_ECN_CE; 870 /* 871 * update checksum (from RFC1624) 872 * HC' = ~(~HC + ~m + m') 873 */ 874 sum = ~ntohs(ip->ip_sum) & 0xffff; 875 sum += (~otos & 0xffff) + ip->ip_tos; 876 sum = (sum >> 16) + (sum & 0xffff); 877 sum += (sum >> 16); /* add carry */ 878 ip->ip_sum = htons(~sum & 0xffff); 879 return (1); 880 } 881 break; 882 #ifdef INET6 883 case AF_INET6: 884 if (flags & REDF_ECN6) { 885 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 886 u_int32_t flowlabel; 887 888 flowlabel = ntohl(ip6->ip6_flow); 889 if ((flowlabel >> 28) != 6) 890 return (0); /* version mismatch! */ 891 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 892 (IPTOS_ECN_NOTECT << 20)) 893 return (0); /* not-ECT */ 894 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 895 (IPTOS_ECN_CE << 20)) 896 return (1); /* already marked */ 897 /* 898 * ecn-capable but not marked, mark CE 899 */ 900 flowlabel |= (IPTOS_ECN_CE << 20); 901 ip6->ip6_flow = htonl(flowlabel); 902 return (1); 903 } 904 break; 905 #endif /* INET6 */ 906 } 907 908 /* not marked */ 909 return (0); 910 } 911 912 /* 913 * dequeue routine: 914 * must be called in splnet. 915 * 916 * returns: mbuf dequeued. 917 * NULL when no packet is available in the queue. 918 */ 919 920 static struct mbuf * 921 red_dequeue(ifq, op) 922 struct ifaltq *ifq; 923 int op; 924 { 925 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 926 struct mbuf *m; 927 928 if (op == ALTDQ_POLL) 929 return qhead(rqp->rq_q); 930 931 /* op == ALTDQ_REMOVE */ 932 m = red_getq(rqp->rq_red, rqp->rq_q); 933 if (m != NULL) 934 ifq->ifq_len--; 935 return (m); 936 } 937 938 struct mbuf * 939 red_getq(rp, q) 940 red_t *rp; 941 class_queue_t *q; 942 { 943 struct mbuf *m; 944 945 if ((m = _getq(q)) == NULL) { 946 if (rp->red_idle == 0) { 947 rp->red_idle = 1; 948 microtime(&rp->red_last); 949 } 950 return NULL; 951 } 952 953 rp->red_idle = 0; 954 return (m); 955 } 956 957 static int 958 red_request(ifq, req, arg) 959 struct ifaltq *ifq; 960 int req; 961 void *arg; 962 { 963 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 964 965 switch (req) { 966 case ALTRQ_PURGE: 967 red_purgeq(rqp); 968 break; 969 } 970 return (0); 971 } 972 973 static void 974 red_purgeq(rqp) 975 red_queue_t *rqp; 976 { 977 _flushq(rqp->rq_q); 978 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 979 rqp->rq_ifq->ifq_len = 0; 980 } 981 982 983 /* 984 * helper routine to calibrate avg during idle. 985 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 986 * here Wq = 1/weight and the code assumes Wq is close to zero. 987 * 988 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 989 */ 990 static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 991 992 struct wtab * 993 wtab_alloc(weight) 994 int weight; 995 { 996 struct wtab *w; 997 int i; 998 999 for (w = wtab_list; w != NULL; w = w->w_next) 1000 if (w->w_weight == weight) { 1001 w->w_refcount++; 1002 return (w); 1003 } 1004 1005 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK); 1006 if (w == NULL) 1007 panic("wtab_alloc: malloc failed!"); 1008 (void)memset(w, 0, sizeof(struct wtab)); 1009 w->w_weight = weight; 1010 w->w_refcount = 1; 1011 w->w_next = wtab_list; 1012 wtab_list = w; 1013 1014 /* initialize the weight table */ 1015 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 1016 for (i = 1; i < 32; i++) { 1017 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 1018 if (w->w_tab[i] == 0 && w->w_param_max == 0) 1019 w->w_param_max = 1 << i; 1020 } 1021 1022 return (w); 1023 } 1024 1025 int 1026 wtab_destroy(w) 1027 struct wtab *w; 1028 { 1029 struct wtab *prev; 1030 1031 if (--w->w_refcount > 0) 1032 return (0); 1033 1034 if (wtab_list == w) 1035 wtab_list = w->w_next; 1036 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 1037 if (prev->w_next == w) { 1038 prev->w_next = w->w_next; 1039 break; 1040 } 1041 1042 FREE(w, M_DEVBUF); 1043 return (0); 1044 } 1045 1046 int32_t 1047 pow_w(w, n) 1048 struct wtab *w; 1049 int n; 1050 { 1051 int i, bit; 1052 int32_t val; 1053 1054 if (n >= w->w_param_max) 1055 return (0); 1056 1057 val = 1 << FP_SHIFT; 1058 if (n <= 0) 1059 return (val); 1060 1061 bit = 1; 1062 i = 0; 1063 while (n) { 1064 if (n & bit) { 1065 val = (val * w->w_tab[i]) >> FP_SHIFT; 1066 n &= ~bit; 1067 } 1068 i++; 1069 bit <<= 1; 1070 } 1071 return (val); 1072 } 1073 1074 #ifdef ALTQ_FLOWVALVE 1075 1076 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1077 #define FV_PSCALE(x) ((x) << FV_PSHIFT) 1078 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1079 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1080 #define FV_FSCALE(x) ((x) << FV_FSHIFT) 1081 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1082 1083 #define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1084 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1085 1086 #define FV_N 10 /* update fve_f every FV_N packets */ 1087 1088 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1089 #define FV_TTHRESH 3 /* time threshold to delete fve */ 1090 #define FV_ALPHA 5 /* extra packet count */ 1091 1092 #if (__FreeBSD_version > 300000) 1093 #define FV_TIMESTAMP(tp) getmicrotime(tp) 1094 #else 1095 #define FV_TIMESTAMP(tp) { (*(tp)) = time; } 1096 #endif 1097 1098 /* 1099 * Brtt table: 127 entry table to convert drop rate (p) to 1100 * the corresponding bandwidth fraction (f) 1101 * the following equation is implemented to use scaled values, 1102 * fve_p and fve_f, in the fixed point format. 1103 * 1104 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1105 * f = Brtt(p) / (max_th + alpha) 1106 */ 1107 #define BRTT_SIZE 128 1108 #define BRTT_SHIFT 12 1109 #define BRTT_MASK 0x0007f000 1110 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1111 1112 const int brtt_tab[BRTT_SIZE] = { 1113 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1114 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1115 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1116 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1117 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1118 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1119 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1120 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1121 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1122 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1123 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1124 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1125 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1126 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1127 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1128 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1129 }; 1130 1131 static __inline struct fve * 1132 flowlist_lookup(fv, pktattr, now) 1133 struct flowvalve *fv; 1134 struct altq_pktattr *pktattr; 1135 struct timeval *now; 1136 { 1137 struct fve *fve; 1138 int flows; 1139 struct ip *ip; 1140 #ifdef INET6 1141 struct ip6_hdr *ip6; 1142 #endif 1143 struct timeval tthresh; 1144 1145 if (pktattr == NULL) 1146 return (NULL); 1147 1148 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1149 flows = 0; 1150 /* 1151 * search the flow list 1152 */ 1153 switch (pktattr->pattr_af) { 1154 case AF_INET: 1155 ip = (struct ip *)pktattr->pattr_hdr; 1156 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1157 if (fve->fve_lastdrop.tv_sec == 0) 1158 break; 1159 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1160 fve->fve_lastdrop.tv_sec = 0; 1161 break; 1162 } 1163 if (fve->fve_flow.flow_af == AF_INET && 1164 fve->fve_flow.flow_ip.ip_src.s_addr == 1165 ip->ip_src.s_addr && 1166 fve->fve_flow.flow_ip.ip_dst.s_addr == 1167 ip->ip_dst.s_addr) 1168 return (fve); 1169 flows++; 1170 } 1171 break; 1172 #ifdef INET6 1173 case AF_INET6: 1174 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1175 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1176 if (fve->fve_lastdrop.tv_sec == 0) 1177 break; 1178 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1179 fve->fve_lastdrop.tv_sec = 0; 1180 break; 1181 } 1182 if (fve->fve_flow.flow_af == AF_INET6 && 1183 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1184 &ip6->ip6_src) && 1185 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1186 &ip6->ip6_dst)) 1187 return (fve); 1188 flows++; 1189 } 1190 break; 1191 #endif /* INET6 */ 1192 1193 default: 1194 /* unknown protocol. no drop. */ 1195 return (NULL); 1196 } 1197 fv->fv_flows = flows; /* save the number of active fve's */ 1198 return (NULL); 1199 } 1200 1201 static __inline struct fve * 1202 flowlist_reclaim(fv, pktattr) 1203 struct flowvalve *fv; 1204 struct altq_pktattr *pktattr; 1205 { 1206 struct fve *fve; 1207 struct ip *ip; 1208 #ifdef INET6 1209 struct ip6_hdr *ip6; 1210 #endif 1211 1212 /* 1213 * get an entry from the tail of the LRU list. 1214 */ 1215 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1216 1217 switch (pktattr->pattr_af) { 1218 case AF_INET: 1219 ip = (struct ip *)pktattr->pattr_hdr; 1220 fve->fve_flow.flow_af = AF_INET; 1221 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1222 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1223 break; 1224 #ifdef INET6 1225 case AF_INET6: 1226 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1227 fve->fve_flow.flow_af = AF_INET6; 1228 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1229 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1230 break; 1231 #endif 1232 } 1233 1234 fve->fve_state = Green; 1235 fve->fve_p = 0.0; 1236 fve->fve_f = 0.0; 1237 fve->fve_ifseq = fv->fv_ifseq - 1; 1238 fve->fve_count = 0; 1239 1240 fv->fv_flows++; 1241 #ifdef FV_STATS 1242 fv->fv_stats.alloc++; 1243 #endif 1244 return (fve); 1245 } 1246 1247 static __inline void 1248 flowlist_move_to_head(fv, fve) 1249 struct flowvalve *fv; 1250 struct fve *fve; 1251 { 1252 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1253 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1254 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1255 } 1256 } 1257 1258 /* 1259 * allocate flowvalve structure 1260 */ 1261 static struct flowvalve * 1262 fv_alloc(rp) 1263 struct red *rp; 1264 { 1265 struct flowvalve *fv; 1266 struct fve *fve; 1267 int i, num; 1268 1269 num = FV_FLOWLISTSIZE; 1270 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve), 1271 M_DEVBUF, M_WAITOK); 1272 if (fv == NULL) 1273 return (NULL); 1274 (void)memset(fv, 0, sizeof(struct flowvalve)); 1275 1276 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num, 1277 M_DEVBUF, M_WAITOK); 1278 if (fv->fv_fves == NULL) { 1279 FREE(fv, M_DEVBUF); 1280 return (NULL); 1281 } 1282 (void)memset(fv->fv_fves, 0, sizeof(struct fve) * num); 1283 1284 fv->fv_flows = 0; 1285 TAILQ_INIT(&fv->fv_flowlist); 1286 for (i = 0; i < num; i++) { 1287 fve = &fv->fv_fves[i]; 1288 fve->fve_lastdrop.tv_sec = 0; 1289 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1290 } 1291 1292 /* initialize drop rate threshold in scaled fixed-point */ 1293 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1294 1295 /* initialize drop rate to fraction table */ 1296 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE, 1297 M_DEVBUF, M_WAITOK); 1298 if (fv->fv_p2ftab == NULL) { 1299 FREE(fv->fv_fves, M_DEVBUF); 1300 FREE(fv, M_DEVBUF); 1301 return (NULL); 1302 } 1303 /* 1304 * create the p2f table. 1305 * (shift is used to keep the precision) 1306 */ 1307 for (i = 1; i < BRTT_SIZE; i++) { 1308 int f; 1309 1310 f = brtt_tab[i] << 8; 1311 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1312 } 1313 1314 return (fv); 1315 } 1316 1317 static void fv_destroy(fv) 1318 struct flowvalve *fv; 1319 { 1320 FREE(fv->fv_p2ftab, M_DEVBUF); 1321 FREE(fv->fv_fves, M_DEVBUF); 1322 FREE(fv, M_DEVBUF); 1323 } 1324 1325 static __inline int 1326 fv_p2f(fv, p) 1327 struct flowvalve *fv; 1328 int p; 1329 { 1330 int val, f; 1331 1332 if (p >= BRTT_PMAX) 1333 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1334 else if ((val = (p & BRTT_MASK))) 1335 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1336 else 1337 f = fv->fv_p2ftab[1]; 1338 return (f); 1339 } 1340 1341 /* 1342 * check if an arriving packet should be pre-dropped. 1343 * called from red_addq() when a packet arrives. 1344 * returns 1 when the packet should be pre-dropped. 1345 * should be called in splnet. 1346 */ 1347 static int 1348 fv_checkflow(fv, pktattr, fcache) 1349 struct flowvalve *fv; 1350 struct altq_pktattr *pktattr; 1351 struct fve **fcache; 1352 { 1353 struct fve *fve; 1354 struct timeval now; 1355 1356 fv->fv_ifseq++; 1357 FV_TIMESTAMP(&now); 1358 1359 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1360 /* no matching entry in the flowlist */ 1361 return (0); 1362 1363 *fcache = fve; 1364 1365 /* update fraction f for every FV_N packets */ 1366 if (++fve->fve_count == FV_N) { 1367 /* 1368 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1369 */ 1370 fve->fve_f = 1371 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1372 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1373 fve->fve_ifseq = fv->fv_ifseq; 1374 fve->fve_count = 0; 1375 } 1376 1377 /* 1378 * overpumping test 1379 */ 1380 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1381 int fthresh; 1382 1383 /* calculate a threshold */ 1384 fthresh = fv_p2f(fv, fve->fve_p); 1385 if (fve->fve_f > fthresh) 1386 fve->fve_state = Red; 1387 } 1388 1389 if (fve->fve_state == Red) { 1390 /* 1391 * backoff test 1392 */ 1393 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1394 /* no drop for at least FV_BACKOFFTHRESH sec */ 1395 fve->fve_p = 0; 1396 fve->fve_state = Green; 1397 #ifdef FV_STATS 1398 fv->fv_stats.escape++; 1399 #endif 1400 } else { 1401 /* block this flow */ 1402 flowlist_move_to_head(fv, fve); 1403 fve->fve_lastdrop = now; 1404 #ifdef FV_STATS 1405 fv->fv_stats.predrop++; 1406 #endif 1407 return (1); 1408 } 1409 } 1410 1411 /* 1412 * p = (1 - Wp) * p 1413 */ 1414 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1415 if (fve->fve_p < 0) 1416 fve->fve_p = 0; 1417 #ifdef FV_STATS 1418 fv->fv_stats.pass++; 1419 #endif 1420 return (0); 1421 } 1422 1423 /* 1424 * called from red_addq when a packet is dropped by red. 1425 * should be called in splnet. 1426 */ 1427 static void fv_dropbyred(fv, pktattr, fcache) 1428 struct flowvalve *fv; 1429 struct altq_pktattr *pktattr; 1430 struct fve *fcache; 1431 { 1432 struct fve *fve; 1433 struct timeval now; 1434 1435 if (pktattr == NULL) 1436 return; 1437 FV_TIMESTAMP(&now); 1438 1439 if (fcache != NULL) 1440 /* the fve of this packet is already cached */ 1441 fve = fcache; 1442 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1443 fve = flowlist_reclaim(fv, pktattr); 1444 1445 flowlist_move_to_head(fv, fve); 1446 1447 /* 1448 * update p: the following line cancels the update 1449 * in fv_checkflow() and calculate 1450 * p = Wp + (1 - Wp) * p 1451 */ 1452 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1453 1454 fve->fve_lastdrop = now; 1455 } 1456 1457 #endif /* ALTQ_FLOWVALVE */ 1458 1459 #ifdef KLD_MODULE 1460 1461 static struct altqsw red_sw = 1462 {"red", redopen, redclose, redioctl}; 1463 1464 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1465 1466 #endif /* KLD_MODULE */ 1467 1468 #endif /* ALTQ_RED */ 1469