xref: /netbsd-src/sys/altq/altq_red.c (revision de1dfb1250df962f1ff3a011772cf58e605aed11)
1 /*	$NetBSD: altq_red.c,v 1.10 2004/02/13 18:02:05 wiz Exp $	*/
2 /*	$KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $	*/
3 
4 /*
5  * Copyright (C) 1997-2000
6  *	Sony Computer Science Laboratories Inc.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  */
30 /*
31  * Copyright (c) 1990-1994 Regents of the University of California.
32  * All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  * 3. All advertising materials mentioning features or use of this software
43  *    must display the following acknowledgement:
44  *	This product includes software developed by the Computer Systems
45  *	Engineering Group at Lawrence Berkeley Laboratory.
46  * 4. Neither the name of the University nor of the Laboratory may be used
47  *    to endorse or promote products derived from this software without
48  *    specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.10 2004/02/13 18:02:05 wiz Exp $");
65 
66 #if defined(__FreeBSD__) || defined(__NetBSD__)
67 #include "opt_altq.h"
68 #if (__FreeBSD__ != 2)
69 #include "opt_inet.h"
70 #ifdef __FreeBSD__
71 #include "opt_inet6.h"
72 #endif
73 #endif
74 #endif /* __FreeBSD__ || __NetBSD__ */
75 #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
76 
77 #include <sys/param.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/systm.h>
83 #include <sys/proc.h>
84 #include <sys/errno.h>
85 #include <sys/kernel.h>
86 #ifdef ALTQ_FLOWVALVE
87 #include <sys/queue.h>
88 #include <sys/time.h>
89 #endif
90 
91 #include <net/if.h>
92 #include <net/if_types.h>
93 
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100 
101 #include <altq/altq.h>
102 #include <altq/altq_conf.h>
103 #include <altq/altq_red.h>
104 #ifdef ALTQ_FLOWVALVE
105 #include <altq/altq_flowvalve.h>
106 #endif
107 
108 /*
109  * ALTQ/RED (Random Early Detection) implementation using 32-bit
110  * fixed-point calculation.
111  *
112  * written by kjc using the ns code as a reference.
113  * you can learn more about red and ns from Sally's home page at
114  * http://www-nrg.ee.lbl.gov/floyd/
115  *
116  * most of the red parameter values are fixed in this implementation
117  * to prevent fixed-point overflow/underflow.
118  * if you change the parameters, watch out for overflow/underflow!
119  *
120  * the parameters used are recommended values by Sally.
121  * the corresponding ns config looks:
122  *	q_weight=0.00195
123  *	minthresh=5 maxthresh=15 queue-size=60
124  *	linterm=30
125  *	dropmech=drop-tail
126  *	bytes=false (can't be handled by 32-bit fixed-point)
127  *	doubleq=false dqthresh=false
128  *	wait=true
129  */
130 /*
131  * alternative red parameters for a slow link.
132  *
133  * assume the queue length becomes from zero to L and keeps L, it takes
134  * N packets for q_avg to reach 63% of L.
135  * when q_weight is 0.002, N is about 500 packets.
136  * for a slow link like dial-up, 500 packets takes more than 1 minute!
137  * when q_weight is 0.008, N is about 127 packets.
138  * when q_weight is 0.016, N is about 63 packets.
139  * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
140  * are allowed for 0.016.
141  * see Sally's paper for more details.
142  */
143 /* normal red parameters */
144 #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
145 				/* q_weight = 0.00195 */
146 
147 /* red parameters for a slow link */
148 #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
149 				/* q_weight = 0.0078125 */
150 
151 /* red parameters for a very slow link (e.g., dialup) */
152 #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
153 				/* q_weight = 0.015625 */
154 
155 /* fixed-point uses 12-bit decimal places */
156 #define	FP_SHIFT	12	/* fixed-point shift */
157 
158 /* red parameters for drop probability */
159 #define	INV_P_MAX	10	/* inverse of max drop probability */
160 #define	TH_MIN		5	/* min threshold */
161 #define	TH_MAX		15	/* max threshold */
162 
163 #define	RED_LIMIT	60	/* default max queue length */
164 
165 /*
166  * our default policy for forced-drop is drop-tail.
167  * (in altq-1.1.2 or earlier, the default was random-drop.
168  * but it makes more sense to punish the cause of the surge.)
169  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
170  */
171 
172 #ifdef ALTQ_FLOWVALVE
173 /*
174  * flow-valve is an extension to protect red from unresponsive flows
175  * and to promote end-to-end congestion control.
176  * flow-valve observes the average drop rates of the flows that have
177  * experienced packet drops in the recent past.
178  * when the average drop rate exceeds the threshold, the flow is
179  * blocked by the flow-valve.  the trapped flow should back off
180  * exponentially to escape from the flow-valve.
181  */
182 #ifdef RED_RANDOM_DROP
183 #error "random-drop can't be used with flow-valve!"
184 #endif
185 #endif /* ALTQ_FLOWVALVE */
186 
187 /* red_list keeps all red_queue_t's allocated. */
188 static red_queue_t *red_list = NULL;
189 
190 /* default red parameter values */
191 static int default_th_min = TH_MIN;
192 static int default_th_max = TH_MAX;
193 static int default_inv_pmax = INV_P_MAX;
194 
195 /* internal function prototypes */
196 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
197 			    struct altq_pktattr *));
198 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
199 static int red_request __P((struct ifaltq *, int, void *));
200 static void red_purgeq __P((red_queue_t *));
201 static int red_detach __P((red_queue_t *));
202 #ifdef ALTQ_FLOWVALVE
203 static __inline struct fve *flowlist_lookup __P((struct flowvalve *,
204 			 struct altq_pktattr *, struct timeval *));
205 static __inline struct fve *flowlist_reclaim __P((struct flowvalve *,
206 						  struct altq_pktattr *));
207 static __inline void flowlist_move_to_head __P((struct flowvalve *,
208 						struct fve *));
209 static __inline int fv_p2f __P((struct flowvalve *, int));
210 static struct flowvalve *fv_alloc __P((struct red *));
211 static void fv_destroy __P((struct flowvalve *));
212 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
213 			     struct fve **));
214 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
215 			      struct fve *));
216 #endif
217 
218 /*
219  * red device interface
220  */
221 altqdev_decl(red);
222 
223 int
224 redopen(dev, flag, fmt, p)
225 	dev_t dev;
226 	int flag, fmt;
227 	struct proc *p;
228 {
229 	/* everything will be done when the queueing scheme is attached. */
230 	return 0;
231 }
232 
233 int
234 redclose(dev, flag, fmt, p)
235 	dev_t dev;
236 	int flag, fmt;
237 	struct proc *p;
238 {
239 	red_queue_t *rqp;
240 	int err, error = 0;
241 
242 	while ((rqp = red_list) != NULL) {
243 		/* destroy all */
244 		err = red_detach(rqp);
245 		if (err != 0 && error == 0)
246 			error = err;
247 	}
248 
249 	return error;
250 }
251 
252 int
253 redioctl(dev, cmd, addr, flag, p)
254 	dev_t dev;
255 	ioctlcmd_t cmd;
256 	caddr_t addr;
257 	int flag;
258 	struct proc *p;
259 {
260 	red_queue_t *rqp;
261 	struct red_interface *ifacep;
262 	struct ifnet *ifp;
263 	int	error = 0;
264 
265 	/* check super-user privilege */
266 	switch (cmd) {
267 	case RED_GETSTATS:
268 		break;
269 	default:
270 #if (__FreeBSD_version > 400000)
271 		if ((error = suser(p)) != 0)
272 #else
273 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
274 #endif
275 			return (error);
276 		break;
277 	}
278 
279 	switch (cmd) {
280 
281 	case RED_ENABLE:
282 		ifacep = (struct red_interface *)addr;
283 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
284 			error = EBADF;
285 			break;
286 		}
287 		error = altq_enable(rqp->rq_ifq);
288 		break;
289 
290 	case RED_DISABLE:
291 		ifacep = (struct red_interface *)addr;
292 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
293 			error = EBADF;
294 			break;
295 		}
296 		error = altq_disable(rqp->rq_ifq);
297 		break;
298 
299 	case RED_IF_ATTACH:
300 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
301 		if (ifp == NULL) {
302 			error = ENXIO;
303 			break;
304 		}
305 
306 		/* allocate and initialize red_queue_t */
307 		MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
308 		if (rqp == NULL) {
309 			error = ENOMEM;
310 			break;
311 		}
312 		(void)memset(rqp, 0, sizeof(red_queue_t));
313 
314 		MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
315 		       M_DEVBUF, M_WAITOK);
316 		if (rqp->rq_q == NULL) {
317 			FREE(rqp, M_DEVBUF);
318 			error = ENOMEM;
319 			break;
320 		}
321 		(void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
322 
323 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
324 		if (rqp->rq_red == NULL) {
325 			FREE(rqp->rq_q, M_DEVBUF);
326 			FREE(rqp, M_DEVBUF);
327 			error = ENOMEM;
328 			break;
329 		}
330 
331 		rqp->rq_ifq = &ifp->if_snd;
332 		qtail(rqp->rq_q) = NULL;
333 		qlen(rqp->rq_q) = 0;
334 		qlimit(rqp->rq_q) = RED_LIMIT;
335 		qtype(rqp->rq_q) = Q_RED;
336 
337 		/*
338 		 * set RED to this ifnet structure.
339 		 */
340 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
341 				    red_enqueue, red_dequeue, red_request,
342 				    NULL, NULL);
343 		if (error) {
344 			red_destroy(rqp->rq_red);
345 			FREE(rqp->rq_q, M_DEVBUF);
346 			FREE(rqp, M_DEVBUF);
347 			break;
348 		}
349 
350 		/* add this state to the red list */
351 		rqp->rq_next = red_list;
352 		red_list = rqp;
353 		break;
354 
355 	case RED_IF_DETACH:
356 		ifacep = (struct red_interface *)addr;
357 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
358 			error = EBADF;
359 			break;
360 		}
361 		error = red_detach(rqp);
362 		break;
363 
364 	case RED_GETSTATS:
365 		do {
366 			struct red_stats *q_stats;
367 			red_t *rp;
368 
369 			q_stats = (struct red_stats *)addr;
370 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
371 					     ALTQT_RED)) == NULL) {
372 				error = EBADF;
373 				break;
374 			}
375 
376 			q_stats->q_len 	   = qlen(rqp->rq_q);
377 			q_stats->q_limit   = qlimit(rqp->rq_q);
378 
379 			rp = rqp->rq_red;
380 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
381 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
382 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
383 			q_stats->drop_forced   = rp->red_stats.drop_forced;
384 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
385 			q_stats->marked_packets = rp->red_stats.marked_packets;
386 
387 			q_stats->weight		= rp->red_weight;
388 			q_stats->inv_pmax	= rp->red_inv_pmax;
389 			q_stats->th_min		= rp->red_thmin;
390 			q_stats->th_max		= rp->red_thmax;
391 
392 #ifdef ALTQ_FLOWVALVE
393 			if (rp->red_flowvalve != NULL) {
394 				struct flowvalve *fv = rp->red_flowvalve;
395 				q_stats->fv_flows    = fv->fv_flows;
396 				q_stats->fv_pass     = fv->fv_stats.pass;
397 				q_stats->fv_predrop  = fv->fv_stats.predrop;
398 				q_stats->fv_alloc    = fv->fv_stats.alloc;
399 				q_stats->fv_escape   = fv->fv_stats.escape;
400 			} else {
401 #endif /* ALTQ_FLOWVALVE */
402 				q_stats->fv_flows    = 0;
403 				q_stats->fv_pass     = 0;
404 				q_stats->fv_predrop  = 0;
405 				q_stats->fv_alloc    = 0;
406 				q_stats->fv_escape   = 0;
407 #ifdef ALTQ_FLOWVALVE
408 			}
409 #endif /* ALTQ_FLOWVALVE */
410 		} while (0);
411 		break;
412 
413 	case RED_CONFIG:
414 		do {
415 			struct red_conf *fc;
416 			red_t *new;
417 			int s, limit;
418 
419 			fc = (struct red_conf *)addr;
420 			if ((rqp = altq_lookup(fc->iface.red_ifname,
421 					       ALTQT_RED)) == NULL) {
422 				error = EBADF;
423 				break;
424 			}
425 			new = red_alloc(fc->red_weight,
426 					fc->red_inv_pmax,
427 					fc->red_thmin,
428 					fc->red_thmax,
429 					fc->red_flags,
430 					fc->red_pkttime);
431 			if (new == NULL) {
432 				error = ENOMEM;
433 				break;
434 			}
435 
436 			s = splnet();
437 			red_purgeq(rqp);
438 			limit = fc->red_limit;
439 			if (limit < fc->red_thmax)
440 				limit = fc->red_thmax;
441 			qlimit(rqp->rq_q) = limit;
442 			fc->red_limit = limit;	/* write back the new value */
443 
444 			red_destroy(rqp->rq_red);
445 			rqp->rq_red = new;
446 
447 			splx(s);
448 
449 			/* write back new values */
450 			fc->red_limit = limit;
451 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
452 			fc->red_thmin = rqp->rq_red->red_thmin;
453 			fc->red_thmax = rqp->rq_red->red_thmax;
454 
455 		} while (0);
456 		break;
457 
458 	case RED_SETDEFAULTS:
459 		do {
460 			struct redparams *rp;
461 
462 			rp = (struct redparams *)addr;
463 
464 			default_th_min = rp->th_min;
465 			default_th_max = rp->th_max;
466 			default_inv_pmax = rp->inv_pmax;
467 		} while (0);
468 		break;
469 
470 	default:
471 		error = EINVAL;
472 		break;
473 	}
474 	return error;
475 }
476 
477 static int
478 red_detach(rqp)
479 	red_queue_t *rqp;
480 {
481 	red_queue_t *tmp;
482 	int error = 0;
483 
484 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
485 		altq_disable(rqp->rq_ifq);
486 
487 	if ((error = altq_detach(rqp->rq_ifq)))
488 		return (error);
489 
490 	if (red_list == rqp)
491 		red_list = rqp->rq_next;
492 	else {
493 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
494 			if (tmp->rq_next == rqp) {
495 				tmp->rq_next = rqp->rq_next;
496 				break;
497 			}
498 		if (tmp == NULL)
499 			printf("red_detach: no state found in red_list!\n");
500 	}
501 
502 	red_destroy(rqp->rq_red);
503 	FREE(rqp->rq_q, M_DEVBUF);
504 	FREE(rqp, M_DEVBUF);
505 	return (error);
506 }
507 
508 /*
509  * red support routines
510  */
511 
512 red_t *
513 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
514 	int	weight, inv_pmax, th_min, th_max;
515 	int	flags, pkttime;
516 {
517 	red_t 	*rp;
518 	int	w, i;
519 	int	npkts_per_sec;
520 
521 	MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
522 	if (rp == NULL)
523 		return (NULL);
524 	(void)memset(rp, 0, sizeof(red_t));
525 
526 	rp->red_avg = 0;
527 	rp->red_idle = 1;
528 
529 	if (weight == 0)
530 		rp->red_weight = W_WEIGHT;
531 	else
532 		rp->red_weight = weight;
533 	if (inv_pmax == 0)
534 		rp->red_inv_pmax = default_inv_pmax;
535 	else
536 		rp->red_inv_pmax = inv_pmax;
537 	if (th_min == 0)
538 		rp->red_thmin = default_th_min;
539 	else
540 		rp->red_thmin = th_min;
541 	if (th_max == 0)
542 		rp->red_thmax = default_th_max;
543 	else
544 		rp->red_thmax = th_max;
545 
546 	rp->red_flags = flags;
547 
548 	if (pkttime == 0)
549 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
550 		rp->red_pkttime = 800;
551 	else
552 		rp->red_pkttime = pkttime;
553 
554 	if (weight == 0) {
555 		/* when the link is very slow, adjust red parameters */
556 		npkts_per_sec = 1000000 / rp->red_pkttime;
557 		if (npkts_per_sec < 50) {
558 			/* up to about 400Kbps */
559 			rp->red_weight = W_WEIGHT_2;
560 		} else if (npkts_per_sec < 300) {
561 			/* up to about 2.4Mbps */
562 			rp->red_weight = W_WEIGHT_1;
563 		}
564 	}
565 
566 	/* calculate wshift.  weight must be power of 2 */
567 	w = rp->red_weight;
568 	for (i = 0; w > 1; i++)
569 		w = w >> 1;
570 	rp->red_wshift = i;
571 	w = 1 << rp->red_wshift;
572 	if (w != rp->red_weight) {
573 		printf("invalid weight value %d for red! use %d\n",
574 		       rp->red_weight, w);
575 		rp->red_weight = w;
576 	}
577 
578 	/*
579 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
580 	 * to be compared with avg.
581 	 */
582 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
583 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
584 
585 	/*
586 	 * precompute probability denominator
587 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
588 	 */
589 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
590 			 * rp->red_inv_pmax) << FP_SHIFT;
591 
592 	/* allocate weight table */
593 	rp->red_wtab = wtab_alloc(rp->red_weight);
594 
595 	microtime(&rp->red_last);
596 #ifdef ALTQ_FLOWVALVE
597 	if (flags & REDF_FLOWVALVE)
598 		rp->red_flowvalve = fv_alloc(rp);
599 	/* if fv_alloc failes, flowvalve is just disabled */
600 #endif
601 	return (rp);
602 }
603 
604 void
605 red_destroy(rp)
606 	red_t *rp;
607 {
608 #ifdef ALTQ_FLOWVALVE
609 	if (rp->red_flowvalve != NULL)
610 		fv_destroy(rp->red_flowvalve);
611 #endif
612 	wtab_destroy(rp->red_wtab);
613 	FREE(rp, M_DEVBUF);
614 }
615 
616 void
617 red_getstats(rp, sp)
618 	red_t *rp;
619 	struct redstats *sp;
620 {
621 	sp->q_avg 		= rp->red_avg >> rp->red_wshift;
622 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
623 	sp->drop_cnt		= rp->red_stats.drop_cnt;
624 	sp->drop_forced		= rp->red_stats.drop_forced;
625 	sp->drop_unforced	= rp->red_stats.drop_unforced;
626 	sp->marked_packets	= rp->red_stats.marked_packets;
627 }
628 
629 /*
630  * enqueue routine:
631  *
632  *	returns: 0 when successfully queued.
633  *		 ENOBUFS when drop occurs.
634  */
635 static int
636 red_enqueue(ifq, m, pktattr)
637 	struct ifaltq *ifq;
638 	struct mbuf *m;
639 	struct altq_pktattr *pktattr;
640 {
641 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
642 
643 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
644 		return ENOBUFS;
645 	ifq->ifq_len++;
646 	return 0;
647 }
648 
649 int
650 red_addq(rp, q, m, pktattr)
651 	red_t *rp;
652 	class_queue_t *q;
653 	struct mbuf *m;
654 	struct altq_pktattr *pktattr;
655 {
656 	int avg, droptype;
657 	int n;
658 #ifdef ALTQ_FLOWVALVE
659 	struct fve *fve = NULL;
660 
661 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
662 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
663 			m_freem(m);
664 			return (-1);
665 		}
666 #endif
667 
668 	avg = rp->red_avg;
669 
670 	/*
671 	 * if we were idle, we pretend that n packets arrived during
672 	 * the idle period.
673 	 */
674 	if (rp->red_idle) {
675 		struct timeval now;
676 		int t;
677 
678 		rp->red_idle = 0;
679 		microtime(&now);
680 		t = (now.tv_sec - rp->red_last.tv_sec);
681 		if (t > 60) {
682 			/*
683 			 * being idle for more than 1 minute, set avg to zero.
684 			 * this prevents t from overflow.
685 			 */
686 			avg = 0;
687 		} else {
688 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
689 			n = t / rp->red_pkttime - 1;
690 
691 			/* the following line does (avg = (1 - Wq)^n * avg) */
692 			if (n > 0)
693 				avg = (avg >> FP_SHIFT) *
694 				    pow_w(rp->red_wtab, n);
695 		}
696 	}
697 
698 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
699 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
700 	rp->red_avg = avg;		/* save the new value */
701 
702 	/*
703 	 * red_count keeps a tally of arriving traffic that has not
704 	 * been dropped.
705 	 */
706 	rp->red_count++;
707 
708 	/* see if we drop early */
709 	droptype = DTYPE_NODROP;
710 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
711 		if (avg >= rp->red_thmax_s) {
712 			/* avg >= th_max: forced drop */
713 			droptype = DTYPE_FORCED;
714 		} else if (rp->red_old == 0) {
715 			/* first exceeds th_min */
716 			rp->red_count = 1;
717 			rp->red_old = 1;
718 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
719 				      rp->red_probd, rp->red_count)) {
720 			/* mark or drop by red */
721 			if ((rp->red_flags & REDF_ECN) &&
722 			    mark_ecn(m, pktattr, rp->red_flags)) {
723 				/* successfully marked.  do not drop. */
724 				rp->red_count = 0;
725 #ifdef RED_STATS
726 				rp->red_stats.marked_packets++;
727 #endif
728 			} else {
729 				/* unforced drop by red */
730 				droptype = DTYPE_EARLY;
731 			}
732 		}
733 	} else {
734 		/* avg < th_min */
735 		rp->red_old = 0;
736 	}
737 
738 	/*
739 	 * if the queue length hits the hard limit, it's a forced drop.
740 	 */
741 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
742 		droptype = DTYPE_FORCED;
743 
744 #ifdef RED_RANDOM_DROP
745 	/* if successful or forced drop, enqueue this packet. */
746 	if (droptype != DTYPE_EARLY)
747 		_addq(q, m);
748 #else
749 	/* if successful, enqueue this packet. */
750 	if (droptype == DTYPE_NODROP)
751 		_addq(q, m);
752 #endif
753 	if (droptype != DTYPE_NODROP) {
754 		if (droptype == DTYPE_EARLY) {
755 			/* drop the incoming packet */
756 #ifdef RED_STATS
757 			rp->red_stats.drop_unforced++;
758 #endif
759 		} else {
760 			/* forced drop, select a victim packet in the queue. */
761 #ifdef RED_RANDOM_DROP
762 			m = _getq_random(q);
763 #endif
764 #ifdef RED_STATS
765 			rp->red_stats.drop_forced++;
766 #endif
767 		}
768 #ifdef RED_STATS
769 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
770 #endif
771 		rp->red_count = 0;
772 #ifdef ALTQ_FLOWVALVE
773 		if (rp->red_flowvalve != NULL)
774 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
775 #endif
776 		m_freem(m);
777 		return (-1);
778 	}
779 	/* successfully queued */
780 #ifdef RED_STATS
781 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
782 #endif
783 	return (0);
784 }
785 
786 /*
787  * early-drop probability is calculated as follows:
788  *   prob = p_max * (avg - th_min) / (th_max - th_min)
789  *   prob_a = prob / (2 - count*prob)
790  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
791  * here prob_a increases as successive undrop count increases.
792  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
793  * becomes 1 when (count >= (2 / prob))).
794  */
795 int
796 drop_early(fp_len, fp_probd, count)
797 	int fp_len;	/* (avg - TH_MIN) in fixed-point */
798 	int fp_probd;	/* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
799 	int count;	/* how many successive undropped packets */
800 {
801 	int d;		/* denominator of drop-probability */
802 
803 	d = fp_probd - count * fp_len;
804 	if (d <= 0)
805 		/* count exceeds the hard limit: drop or mark */
806 		return (1);
807 
808 	/*
809 	 * now the range of d is [1..600] in fixed-point. (when
810 	 * th_max-th_min=10 and p_max=1/30)
811 	 * drop probability = (avg - TH_MIN) / d
812 	 */
813 
814 	if ((random() % d) < fp_len) {
815 		/* drop or mark */
816 		return (1);
817 	}
818 	/* no drop/mark */
819 	return (0);
820 }
821 
822 /*
823  * try to mark CE bit to the packet.
824  *    returns 1 if successfully marked, 0 otherwise.
825  */
826 int
827 mark_ecn(m, pktattr, flags)
828 	struct mbuf *m;
829 	struct altq_pktattr *pktattr;
830 	int flags;
831 {
832 	struct mbuf *m0;
833 
834 	if (pktattr == NULL ||
835 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
836 		return (0);
837 
838 	/* verify that pattr_hdr is within the mbuf data */
839 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
840 		if ((pktattr->pattr_hdr >= m0->m_data) &&
841 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
842 			break;
843 	if (m0 == NULL) {
844 		/* ick, pattr_hdr is stale */
845 		pktattr->pattr_af = AF_UNSPEC;
846 		return (0);
847 	}
848 
849 	switch (pktattr->pattr_af) {
850 	case AF_INET:
851 		if (flags & REDF_ECN4) {
852 			struct ip *ip = (struct ip *)pktattr->pattr_hdr;
853 			u_int8_t otos;
854 			int sum;
855 
856 			if (ip->ip_v != 4)
857 				return (0);	/* version mismatch! */
858 
859 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
860 				return (0);	/* not-ECT */
861 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
862 				return (1);	/* already marked */
863 
864 			/*
865 			 * ecn-capable but not marked,
866 			 * mark CE and update checksum
867 			 */
868 			otos = ip->ip_tos;
869 			ip->ip_tos |= IPTOS_ECN_CE;
870 			/*
871 			 * update checksum (from RFC1624)
872 			 *	   HC' = ~(~HC + ~m + m')
873 			 */
874 			sum = ~ntohs(ip->ip_sum) & 0xffff;
875 			sum += (~otos & 0xffff) + ip->ip_tos;
876 			sum = (sum >> 16) + (sum & 0xffff);
877 			sum += (sum >> 16);  /* add carry */
878 			ip->ip_sum = htons(~sum & 0xffff);
879 			return (1);
880 		}
881 		break;
882 #ifdef INET6
883 	case AF_INET6:
884 		if (flags & REDF_ECN6) {
885 			struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
886 			u_int32_t flowlabel;
887 
888 			flowlabel = ntohl(ip6->ip6_flow);
889 			if ((flowlabel >> 28) != 6)
890 				return (0);	/* version mismatch! */
891 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
892 			    (IPTOS_ECN_NOTECT << 20))
893 				return (0);	/* not-ECT */
894 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
895 			    (IPTOS_ECN_CE << 20))
896 				return (1);	/* already marked */
897 			/*
898 			 * ecn-capable but not marked,  mark CE
899 			 */
900 			flowlabel |= (IPTOS_ECN_CE << 20);
901 			ip6->ip6_flow = htonl(flowlabel);
902 			return (1);
903 		}
904 		break;
905 #endif  /* INET6 */
906 	}
907 
908 	/* not marked */
909 	return (0);
910 }
911 
912 /*
913  * dequeue routine:
914  *	must be called in splnet.
915  *
916  *	returns: mbuf dequeued.
917  *		 NULL when no packet is available in the queue.
918  */
919 
920 static struct mbuf *
921 red_dequeue(ifq, op)
922 	struct ifaltq *ifq;
923 	int op;
924 {
925 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
926 	struct mbuf *m;
927 
928 	if (op == ALTDQ_POLL)
929 		return qhead(rqp->rq_q);
930 
931 	/* op == ALTDQ_REMOVE */
932 	m =  red_getq(rqp->rq_red, rqp->rq_q);
933 	if (m != NULL)
934 		ifq->ifq_len--;
935 	return (m);
936 }
937 
938 struct mbuf *
939 red_getq(rp, q)
940 	red_t *rp;
941 	class_queue_t *q;
942 {
943 	struct mbuf *m;
944 
945 	if ((m = _getq(q)) == NULL) {
946 		if (rp->red_idle == 0) {
947 			rp->red_idle = 1;
948 			microtime(&rp->red_last);
949 		}
950 		return NULL;
951 	}
952 
953 	rp->red_idle = 0;
954 	return (m);
955 }
956 
957 static int
958 red_request(ifq, req, arg)
959 	struct ifaltq *ifq;
960 	int req;
961 	void *arg;
962 {
963 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
964 
965 	switch (req) {
966 	case ALTRQ_PURGE:
967 		red_purgeq(rqp);
968 		break;
969 	}
970 	return (0);
971 }
972 
973 static void
974 red_purgeq(rqp)
975 	red_queue_t *rqp;
976 {
977 	_flushq(rqp->rq_q);
978 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
979 		rqp->rq_ifq->ifq_len = 0;
980 }
981 
982 
983 /*
984  * helper routine to calibrate avg during idle.
985  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
986  * here Wq = 1/weight and the code assumes Wq is close to zero.
987  *
988  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
989  */
990 static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
991 
992 struct wtab *
993 wtab_alloc(weight)
994 	int weight;
995 {
996 	struct wtab *w;
997 	int i;
998 
999 	for (w = wtab_list; w != NULL; w = w->w_next)
1000 		if (w->w_weight == weight) {
1001 			w->w_refcount++;
1002 			return (w);
1003 		}
1004 
1005 	MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
1006 	if (w == NULL)
1007 		panic("wtab_alloc: malloc failed!");
1008 	(void)memset(w, 0, sizeof(struct wtab));
1009 	w->w_weight = weight;
1010 	w->w_refcount = 1;
1011 	w->w_next = wtab_list;
1012 	wtab_list = w;
1013 
1014 	/* initialize the weight table */
1015 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1016 	for (i = 1; i < 32; i++) {
1017 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1018 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
1019 			w->w_param_max = 1 << i;
1020 	}
1021 
1022 	return (w);
1023 }
1024 
1025 int
1026 wtab_destroy(w)
1027 	struct wtab *w;
1028 {
1029 	struct wtab *prev;
1030 
1031 	if (--w->w_refcount > 0)
1032 		return (0);
1033 
1034 	if (wtab_list == w)
1035 		wtab_list = w->w_next;
1036 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1037 		if (prev->w_next == w) {
1038 			prev->w_next = w->w_next;
1039 			break;
1040 		}
1041 
1042 	FREE(w, M_DEVBUF);
1043 	return (0);
1044 }
1045 
1046 int32_t
1047 pow_w(w, n)
1048 	struct wtab *w;
1049 	int n;
1050 {
1051 	int	i, bit;
1052 	int32_t	val;
1053 
1054 	if (n >= w->w_param_max)
1055 		return (0);
1056 
1057 	val = 1 << FP_SHIFT;
1058 	if (n <= 0)
1059 		return (val);
1060 
1061 	bit = 1;
1062 	i = 0;
1063 	while (n) {
1064 		if (n & bit) {
1065 			val = (val * w->w_tab[i]) >> FP_SHIFT;
1066 			n &= ~bit;
1067 		}
1068 		i++;
1069 		bit <<=  1;
1070 	}
1071 	return (val);
1072 }
1073 
1074 #ifdef ALTQ_FLOWVALVE
1075 
1076 #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
1077 #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
1078 #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
1079 #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
1080 #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
1081 #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
1082 
1083 #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
1084 #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
1085 
1086 #define	FV_N			10	/* update fve_f every FV_N packets */
1087 
1088 #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
1089 #define	FV_TTHRESH		3  /* time threshold to delete fve */
1090 #define	FV_ALPHA		5  /* extra packet count */
1091 
1092 #if (__FreeBSD_version > 300000)
1093 #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
1094 #else
1095 #define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
1096 #endif
1097 
1098 /*
1099  * Brtt table: 127 entry table to convert drop rate (p) to
1100  * the corresponding bandwidth fraction (f)
1101  * the following equation is implemented to use scaled values,
1102  * fve_p and fve_f, in the fixed point format.
1103  *
1104  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1105  *   f = Brtt(p) / (max_th + alpha)
1106  */
1107 #define	BRTT_SIZE	128
1108 #define	BRTT_SHIFT	12
1109 #define	BRTT_MASK	0x0007f000
1110 #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
1111 
1112 const int brtt_tab[BRTT_SIZE] = {
1113 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1114 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1115 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1116 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1117 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1118 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1119 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1120 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1121 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1122 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1123 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1124 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1125 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1126 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1127 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1128 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1129 };
1130 
1131 static __inline struct fve *
1132 flowlist_lookup(fv, pktattr, now)
1133 	struct flowvalve *fv;
1134 	struct altq_pktattr *pktattr;
1135 	struct timeval *now;
1136 {
1137 	struct fve *fve;
1138 	int flows;
1139 	struct ip *ip;
1140 #ifdef INET6
1141 	struct ip6_hdr *ip6;
1142 #endif
1143 	struct timeval tthresh;
1144 
1145 	if (pktattr == NULL)
1146 		return (NULL);
1147 
1148 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1149 	flows = 0;
1150 	/*
1151 	 * search the flow list
1152 	 */
1153 	switch (pktattr->pattr_af) {
1154 	case AF_INET:
1155 		ip = (struct ip *)pktattr->pattr_hdr;
1156 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1157 			if (fve->fve_lastdrop.tv_sec == 0)
1158 				break;
1159 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1160 				fve->fve_lastdrop.tv_sec = 0;
1161 				break;
1162 			}
1163 			if (fve->fve_flow.flow_af == AF_INET &&
1164 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
1165 			    ip->ip_src.s_addr &&
1166 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
1167 			    ip->ip_dst.s_addr)
1168 				return (fve);
1169 			flows++;
1170 		}
1171 		break;
1172 #ifdef INET6
1173 	case AF_INET6:
1174 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1175 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1176 			if (fve->fve_lastdrop.tv_sec == 0)
1177 				break;
1178 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1179 				fve->fve_lastdrop.tv_sec = 0;
1180 				break;
1181 			}
1182 			if (fve->fve_flow.flow_af == AF_INET6 &&
1183 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1184 					       &ip6->ip6_src) &&
1185 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1186 					       &ip6->ip6_dst))
1187 				return (fve);
1188 			flows++;
1189 		}
1190 		break;
1191 #endif /* INET6 */
1192 
1193 	default:
1194 		/* unknown protocol.  no drop. */
1195 		return (NULL);
1196 	}
1197 	fv->fv_flows = flows;	/* save the number of active fve's */
1198 	return (NULL);
1199 }
1200 
1201 static __inline struct fve *
1202 flowlist_reclaim(fv, pktattr)
1203 	struct flowvalve *fv;
1204 	struct altq_pktattr *pktattr;
1205 {
1206 	struct fve *fve;
1207 	struct ip *ip;
1208 #ifdef INET6
1209 	struct ip6_hdr *ip6;
1210 #endif
1211 
1212 	/*
1213 	 * get an entry from the tail of the LRU list.
1214 	 */
1215 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1216 
1217 	switch (pktattr->pattr_af) {
1218 	case AF_INET:
1219 		ip = (struct ip *)pktattr->pattr_hdr;
1220 		fve->fve_flow.flow_af = AF_INET;
1221 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1222 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1223 		break;
1224 #ifdef INET6
1225 	case AF_INET6:
1226 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1227 		fve->fve_flow.flow_af = AF_INET6;
1228 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1229 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1230 		break;
1231 #endif
1232 	}
1233 
1234 	fve->fve_state = Green;
1235 	fve->fve_p = 0.0;
1236 	fve->fve_f = 0.0;
1237 	fve->fve_ifseq = fv->fv_ifseq - 1;
1238 	fve->fve_count = 0;
1239 
1240 	fv->fv_flows++;
1241 #ifdef FV_STATS
1242 	fv->fv_stats.alloc++;
1243 #endif
1244 	return (fve);
1245 }
1246 
1247 static __inline void
1248 flowlist_move_to_head(fv, fve)
1249 	struct flowvalve *fv;
1250 	struct fve *fve;
1251 {
1252 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1253 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1254 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1255 	}
1256 }
1257 
1258 /*
1259  * allocate flowvalve structure
1260  */
1261 static struct flowvalve *
1262 fv_alloc(rp)
1263 	struct red *rp;
1264 {
1265 	struct flowvalve *fv;
1266 	struct fve *fve;
1267 	int i, num;
1268 
1269 	num = FV_FLOWLISTSIZE;
1270 	MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1271 	       M_DEVBUF, M_WAITOK);
1272 	if (fv == NULL)
1273 		return (NULL);
1274 	(void)memset(fv, 0, sizeof(struct flowvalve));
1275 
1276 	MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1277 	       M_DEVBUF, M_WAITOK);
1278 	if (fv->fv_fves == NULL) {
1279 		FREE(fv, M_DEVBUF);
1280 		return (NULL);
1281 	}
1282 	(void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
1283 
1284 	fv->fv_flows = 0;
1285 	TAILQ_INIT(&fv->fv_flowlist);
1286 	for (i = 0; i < num; i++) {
1287 		fve = &fv->fv_fves[i];
1288 		fve->fve_lastdrop.tv_sec = 0;
1289 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1290 	}
1291 
1292 	/* initialize drop rate threshold in scaled fixed-point */
1293 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1294 
1295 	/* initialize drop rate to fraction table */
1296 	MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1297 	       M_DEVBUF, M_WAITOK);
1298 	if (fv->fv_p2ftab == NULL) {
1299 		FREE(fv->fv_fves, M_DEVBUF);
1300 		FREE(fv, M_DEVBUF);
1301 		return (NULL);
1302 	}
1303 	/*
1304 	 * create the p2f table.
1305 	 * (shift is used to keep the precision)
1306 	 */
1307 	for (i = 1; i < BRTT_SIZE; i++) {
1308 		int f;
1309 
1310 		f = brtt_tab[i] << 8;
1311 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1312 	}
1313 
1314 	return (fv);
1315 }
1316 
1317 static void fv_destroy(fv)
1318 	struct flowvalve *fv;
1319 {
1320 	FREE(fv->fv_p2ftab, M_DEVBUF);
1321 	FREE(fv->fv_fves, M_DEVBUF);
1322 	FREE(fv, M_DEVBUF);
1323 }
1324 
1325 static __inline int
1326 fv_p2f(fv, p)
1327 	struct flowvalve	*fv;
1328 	int	p;
1329 {
1330 	int val, f;
1331 
1332 	if (p >= BRTT_PMAX)
1333 		f = fv->fv_p2ftab[BRTT_SIZE-1];
1334 	else if ((val = (p & BRTT_MASK)))
1335 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1336 	else
1337 		f = fv->fv_p2ftab[1];
1338 	return (f);
1339 }
1340 
1341 /*
1342  * check if an arriving packet should be pre-dropped.
1343  * called from red_addq() when a packet arrives.
1344  * returns 1 when the packet should be pre-dropped.
1345  * should be called in splnet.
1346  */
1347 static int
1348 fv_checkflow(fv, pktattr, fcache)
1349 	struct flowvalve *fv;
1350 	struct altq_pktattr *pktattr;
1351 	struct fve **fcache;
1352 {
1353 	struct fve *fve;
1354 	struct timeval now;
1355 
1356 	fv->fv_ifseq++;
1357 	FV_TIMESTAMP(&now);
1358 
1359 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1360 		/* no matching entry in the flowlist */
1361 		return (0);
1362 
1363 	*fcache = fve;
1364 
1365 	/* update fraction f for every FV_N packets */
1366 	if (++fve->fve_count == FV_N) {
1367 		/*
1368 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1369 		 */
1370 		fve->fve_f =
1371 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1372 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
1373 		fve->fve_ifseq = fv->fv_ifseq;
1374 		fve->fve_count = 0;
1375 	}
1376 
1377 	/*
1378 	 * overpumping test
1379 	 */
1380 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1381 		int fthresh;
1382 
1383 		/* calculate a threshold */
1384 		fthresh = fv_p2f(fv, fve->fve_p);
1385 		if (fve->fve_f > fthresh)
1386 			fve->fve_state = Red;
1387 	}
1388 
1389 	if (fve->fve_state == Red) {
1390 		/*
1391 		 * backoff test
1392 		 */
1393 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1394 			/* no drop for at least FV_BACKOFFTHRESH sec */
1395 			fve->fve_p = 0;
1396 			fve->fve_state = Green;
1397 #ifdef FV_STATS
1398 			fv->fv_stats.escape++;
1399 #endif
1400 		} else {
1401 			/* block this flow */
1402 			flowlist_move_to_head(fv, fve);
1403 			fve->fve_lastdrop = now;
1404 #ifdef FV_STATS
1405 			fv->fv_stats.predrop++;
1406 #endif
1407 			return (1);
1408 		}
1409 	}
1410 
1411 	/*
1412 	 * p = (1 - Wp) * p
1413 	 */
1414 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1415 	if (fve->fve_p < 0)
1416 		fve->fve_p = 0;
1417 #ifdef FV_STATS
1418 	fv->fv_stats.pass++;
1419 #endif
1420 	return (0);
1421 }
1422 
1423 /*
1424  * called from red_addq when a packet is dropped by red.
1425  * should be called in splnet.
1426  */
1427 static void fv_dropbyred(fv, pktattr, fcache)
1428 	struct flowvalve *fv;
1429 	struct altq_pktattr *pktattr;
1430 	struct fve *fcache;
1431 {
1432 	struct fve *fve;
1433 	struct timeval now;
1434 
1435 	if (pktattr == NULL)
1436 		return;
1437 	FV_TIMESTAMP(&now);
1438 
1439 	if (fcache != NULL)
1440 		/* the fve of this packet is already cached */
1441 		fve = fcache;
1442 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1443 		fve = flowlist_reclaim(fv, pktattr);
1444 
1445 	flowlist_move_to_head(fv, fve);
1446 
1447 	/*
1448 	 * update p:  the following line cancels the update
1449 	 *	      in fv_checkflow() and calculate
1450 	 *	p = Wp + (1 - Wp) * p
1451 	 */
1452 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1453 
1454 	fve->fve_lastdrop = now;
1455 }
1456 
1457 #endif /* ALTQ_FLOWVALVE */
1458 
1459 #ifdef KLD_MODULE
1460 
1461 static struct altqsw red_sw =
1462 	{"red", redopen, redclose, redioctl};
1463 
1464 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1465 
1466 #endif /* KLD_MODULE */
1467 
1468 #endif /* ALTQ_RED */
1469