xref: /openbsd-src/sys/net/fq_codel.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*
2  * Copyright (c) 2017 Mike Belopuhov
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /*
18  * Codel - The Controlled-Delay Active Queue Management algorithm
19  * IETF draft-ietf-aqm-codel-07
20  *
21  * Based on the algorithm by Kathleen Nichols and Van Jacobson with
22  * improvements from Dave Taht and Eric Dumazet.
23  */
24 
25 /*
26  * The FlowQueue-CoDel Packet Scheduler and Active Queue Management
27  * IETF draft-ietf-aqm-fq-codel-06
28  *
29  * Based on the implementation by Rasool Al-Saadi, Centre for Advanced
30  * Internet Architectures, Swinburne University of Technology, Melbourne,
31  * Australia.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/socket.h>
37 #include <sys/mbuf.h>
38 #include <sys/queue.h>
39 
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <net/pfvar.h>
43 #include <net/fq_codel.h>
44 
45 /* #define FQCODEL_DEBUG 1 */
46 
47 #ifdef FQCODEL_DEBUG
48 #define DPRINTF(x...)		printf(x)
49 #else
50 #define DPRINTF(x...)
51 #endif
52 
53 struct codel {
54 	struct mbuf_list q;
55 
56 	unsigned int	 dropping:1;	/* Dropping state */
57 	unsigned int	 backlog:31;	/* Number of bytes in the queue */
58 
59 	unsigned short	 drops;		/* Free running counter of drops */
60 	unsigned short	 ldrops;	/* Value from the previous run */
61 
62 	int64_t		 start;		/* The moment queue was above target */
63 	int64_t		 next;		/* Next interval */
64 	int64_t		 delay;		/* Delay incurred by the last packet */
65 };
66 
67 struct codel_params {
68 	int64_t		 target;
69 	int64_t		 interval;
70 	int		 quantum;
71 
72 	uint32_t	*intervals;
73 };
74 
75 void		 codel_initparams(struct codel_params *, unsigned int,
76 		    unsigned int, int);
77 void		 codel_freeparams(struct codel_params *);
78 void		 codel_enqueue(struct codel *, int64_t, struct mbuf *);
79 struct mbuf	*codel_dequeue(struct codel *, struct codel_params *, int64_t,
80 		    struct mbuf_list *, uint64_t *, uint64_t *);
81 struct mbuf	*codel_commit(struct codel *, struct mbuf *);
82 void		 codel_purge(struct codel *, struct mbuf_list *ml);
83 
84 struct flow {
85 	struct codel		 cd;
86 	int			 active:1;
87 	int			 deficit:31;
88 #ifdef FQCODEL_DEBUG
89 	uint16_t		 id;
90 #endif
91 	SIMPLEQ_ENTRY(flow)	 flowentry;
92 };
93 SIMPLEQ_HEAD(flowq, flow);
94 
95 struct fqcodel {
96 	struct flowq		 newq;
97 	struct flowq		 oldq;
98 
99 	struct flow		*flows;
100 	unsigned int		 qlength;
101 
102 	struct ifnet		*ifp;
103 
104 	struct codel_params	 cparams;
105 
106 	unsigned int		 nflows;
107 	unsigned int		 qlimit;
108 	int			 quantum;
109 
110 	unsigned int		 flags;
111 #define FQCF_FIXED_QUANTUM	  0x1
112 
113 	/* stats */
114 	struct fqcodel_pktcntr   xmit_cnt;
115 	struct fqcodel_pktcntr 	 drop_cnt;
116 };
117 
118 unsigned int	 fqcodel_idx(unsigned int, const struct mbuf *);
119 void		*fqcodel_alloc(unsigned int, void *);
120 void		 fqcodel_free(unsigned int, void *);
121 struct mbuf	*fqcodel_if_enq(struct ifqueue *, struct mbuf *);
122 struct mbuf	*fqcodel_if_deq_begin(struct ifqueue *, void **);
123 void		 fqcodel_if_deq_commit(struct ifqueue *, struct mbuf *, void *);
124 void		 fqcodel_if_purge(struct ifqueue *, struct mbuf_list *);
125 
126 struct mbuf	*fqcodel_enq(struct fqcodel *, struct mbuf *);
127 struct mbuf	*fqcodel_deq_begin(struct fqcodel *, void **,
128 		    struct mbuf_list *);
129 void		 fqcodel_deq_commit(struct fqcodel *, struct mbuf *, void *);
130 void		 fqcodel_purge(struct fqcodel *, struct mbuf_list *);
131 
132 /*
133  * ifqueue glue.
134  */
135 
136 static const struct ifq_ops fqcodel_ops = {
137 	fqcodel_idx,
138 	fqcodel_if_enq,
139 	fqcodel_if_deq_begin,
140 	fqcodel_if_deq_commit,
141 	fqcodel_if_purge,
142 	fqcodel_alloc,
143 	fqcodel_free
144 };
145 
146 const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops;
147 
148 void		*fqcodel_pf_alloc(struct ifnet *);
149 int		 fqcodel_pf_addqueue(void *, struct pf_queuespec *);
150 void		 fqcodel_pf_free(void *);
151 int		 fqcodel_pf_qstats(struct pf_queuespec *, void *, int *);
152 unsigned int	 fqcodel_pf_qlength(void *);
153 struct mbuf *	 fqcodel_pf_enqueue(void *, struct mbuf *);
154 struct mbuf *	 fqcodel_pf_deq_begin(void *, void **, struct mbuf_list *);
155 void		 fqcodel_pf_deq_commit(void *, struct mbuf *, void *);
156 void		 fqcodel_pf_purge(void *, struct mbuf_list *);
157 
158 /*
159  * pf queue glue.
160  */
161 
162 static const struct pfq_ops fqcodel_pf_ops = {
163 	fqcodel_pf_alloc,
164 	fqcodel_pf_addqueue,
165 	fqcodel_pf_free,
166 	fqcodel_pf_qstats,
167 	fqcodel_pf_qlength,
168 	fqcodel_pf_enqueue,
169 	fqcodel_pf_deq_begin,
170 	fqcodel_pf_deq_commit,
171 	fqcodel_pf_purge
172 };
173 
174 const struct pfq_ops * const pfq_fqcodel_ops = &fqcodel_pf_ops;
175 
176 /* Default aggregate queue depth */
177 static const unsigned int fqcodel_qlimit = 1024;
178 
179 /*
180  * CoDel implementation
181  */
182 
183 /* Delay target, 5ms */
184 static const int64_t codel_target = 5000000;
185 
186 /* Grace period after last drop, 16 100ms intervals */
187 static const int64_t codel_grace = 1600000000;
188 
189 /* First 399 "100 / sqrt(x)" intervarls, ns precision */
190 static const uint32_t codel_intervals[] = {
191 	100000000, 70710678, 57735027, 50000000, 44721360, 40824829, 37796447,
192 	35355339,  33333333, 31622777, 30151134, 28867513, 27735010, 26726124,
193 	25819889,  25000000, 24253563, 23570226, 22941573, 22360680, 21821789,
194 	21320072,  20851441, 20412415, 20000000, 19611614, 19245009, 18898224,
195 	18569534,  18257419, 17960530, 17677670, 17407766, 17149859, 16903085,
196 	16666667,  16439899, 16222142, 16012815, 15811388, 15617376, 15430335,
197 	15249857,  15075567, 14907120, 14744196, 14586499, 14433757, 14285714,
198 	14142136,  14002801, 13867505, 13736056, 13608276, 13483997, 13363062,
199 	13245324,  13130643, 13018891, 12909944, 12803688, 12700013, 12598816,
200 	12500000,  12403473, 12309149, 12216944, 12126781, 12038585, 11952286,
201 	11867817,  11785113, 11704115, 11624764, 11547005, 11470787, 11396058,
202 	11322770,  11250879, 11180340, 11111111, 11043153, 10976426, 10910895,
203 	10846523,  10783277, 10721125, 10660036, 10599979, 10540926, 10482848,
204 	10425721,  10369517, 10314212, 10259784, 10206207, 10153462, 10101525,
205 	10050378,  10000000, 9950372,  9901475,  9853293,  9805807,  9759001,
206 	9712859,   9667365,  9622504,  9578263,  9534626,  9491580,  9449112,
207 	9407209,   9365858,  9325048,  9284767,  9245003,  9205746,  9166985,
208 	9128709,   9090909,  9053575,  9016696,  8980265,  8944272,  8908708,
209 	8873565,   8838835,  8804509,  8770580,  8737041,  8703883,  8671100,
210 	8638684,   8606630,  8574929,  8543577,  8512565,  8481889,  8451543,
211 	8421519,   8391814,  8362420,  8333333,  8304548,  8276059,  8247861,
212 	8219949,   8192319,  8164966,  8137885,  8111071,  8084521,  8058230,
213 	8032193,   8006408,  7980869,  7955573,  7930516,  7905694,  7881104,
214 	7856742,   7832604,  7808688,  7784989,  7761505,  7738232,  7715167,
215 	7692308,   7669650,  7647191,  7624929,  7602859,  7580980,  7559289,
216 	7537784,   7516460,  7495317,  7474351,  7453560,  7432941,  7412493,
217 	7392213,   7372098,  7352146,  7332356,  7312724,  7293250,  7273930,
218 	7254763,   7235746,  7216878,  7198158,  7179582,  7161149,  7142857,
219 	7124705,   7106691,  7088812,  7071068,  7053456,  7035975,  7018624,
220 	7001400,   6984303,  6967330,  6950480,  6933752,  6917145,  6900656,
221 	6884284,   6868028,  6851887,  6835859,  6819943,  6804138,  6788442,
222 	6772855,   6757374,  6741999,  6726728,  6711561,  6696495,  6681531,
223 	6666667,   6651901,  6637233,  6622662,  6608186,  6593805,  6579517,
224 	6565322,   6551218,  6537205,  6523281,  6509446,  6495698,  6482037,
225 	6468462,   6454972,  6441566,  6428243,  6415003,  6401844,  6388766,
226 	6375767,   6362848,  6350006,  6337243,  6324555,  6311944,  6299408,
227 	6286946,   6274558,  6262243,  6250000,  6237829,  6225728,  6213698,
228 	6201737,   6189845,  6178021,  6166264,  6154575,  6142951,  6131393,
229 	6119901,   6108472,  6097108,  6085806,  6074567,  6063391,  6052275,
230 	6041221,   6030227,  6019293,  6008418,  5997601,  5986843,  5976143,
231 	5965500,   5954913,  5944383,  5933908,  5923489,  5913124,  5902813,
232 	5892557,   5882353,  5872202,  5862104,  5852057,  5842062,  5832118,
233 	5822225,   5812382,  5802589,  5792844,  5783149,  5773503,  5763904,
234 	5754353,   5744850,  5735393,  5725983,  5716620,  5707301,  5698029,
235 	5688801,   5679618,  5670480,  5661385,  5652334,  5643326,  5634362,
236 	5625440,   5616560,  5607722,  5598925,  5590170,  5581456,  5572782,
237 	5564149,   5555556,  5547002,  5538488,  5530013,  5521576,  5513178,
238 	5504819,   5496497,  5488213,  5479966,  5471757,  5463584,  5455447,
239 	5447347,   5439283,  5431254,  5423261,  5415304,  5407381,  5399492,
240 	5391639,   5383819,  5376033,  5368281,  5360563,  5352877,  5345225,
241 	5337605,   5330018,  5322463,  5314940,  5307449,  5299989,  5292561,
242 	5285164,   5277798,  5270463,  5263158,  5255883,  5248639,  5241424,
243 	5234239,   5227084,  5219958,  5212860,  5205792,  5198752,  5191741,
244 	5184758,   5177804,  5170877,  5163978,  5157106,  5150262,  5143445,
245 	5136655,   5129892,  5123155,  5116445,  5109761,  5103104,  5096472,
246 	5089866,   5083286,  5076731,  5070201,  5063697,  5057217,  5050763,
247 	5044333,   5037927,  5031546,  5025189,  5018856,  5012547,  5006262
248 };
249 
250 void
251 codel_initparams(struct codel_params *cp, unsigned int target,
252     unsigned int interval, int quantum)
253 {
254 	uint64_t mult;
255 	unsigned int i;
256 
257 	/*
258 	 * Update observation intervals table according to the configured
259 	 * initial interval value.
260 	 */
261 	if (interval > codel_intervals[0]) {
262 		/* Select either specified target or 5% of an interval */
263 		cp->target = MAX(target, interval / 5);
264 		cp->interval = interval;
265 
266 		/* The coefficient is scaled up by a 1000 */
267 		mult = ((uint64_t)cp->interval * 1000) / codel_intervals[0];
268 
269 		/* Prepare table of intervals */
270 		cp->intervals = mallocarray(nitems(codel_intervals),
271 		    sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
272 		for (i = 0; i < nitems(codel_intervals); i++)
273 			cp->intervals[i] = ((uint64_t)codel_intervals[i] *
274 			    mult) / 1000;
275 	} else {
276 		cp->target = MAX(target, codel_target);
277 		cp->interval = codel_intervals[0];
278 		cp->intervals = (uint32_t *)codel_intervals;
279 	}
280 
281 	cp->quantum = quantum;
282 }
283 
284 void
285 codel_freeparams(struct codel_params *cp)
286 {
287 	if (cp->intervals != codel_intervals)
288 		free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
289 		    sizeof(codel_intervals[0]));
290 	cp->intervals = NULL;
291 }
292 
293 static inline void
294 codel_gettime(int64_t *now)
295 {
296 	struct timespec tv;
297 
298 	nanouptime(&tv);
299 	*now = tv.tv_sec * 1000000000LL + tv.tv_nsec;
300 }
301 
302 static inline unsigned int
303 codel_backlog(struct codel *cd)
304 {
305 	return (cd->backlog);
306 }
307 
308 static inline unsigned int
309 codel_qlength(struct codel *cd)
310 {
311 	return (ml_len(&cd->q));
312 }
313 
314 static inline int64_t
315 codel_delay(struct codel *cd)
316 {
317 	return (cd->delay);
318 }
319 
320 void
321 codel_enqueue(struct codel *cd, int64_t now, struct mbuf *m)
322 {
323 	m->m_pkthdr.ph_timestamp = now;
324 
325 	ml_enqueue(&cd->q, m);
326 	cd->backlog += m->m_pkthdr.len;
327 }
328 
329 /*
330  * Select the next interval according to the number of drops
331  * in the current one relative to the provided timestamp.
332  */
333 static inline void
334 control_law(struct codel *cd, struct codel_params *cp, int64_t rts)
335 {
336 	unsigned int idx;
337 
338 	idx = min(cd->drops, nitems(codel_intervals) - 1);
339 	cd->next = rts + cp->intervals[idx];
340 }
341 
342 /*
343  * Pick the next enqueued packet and determine the queueing delay
344  * as well as whether or not it's a good candidate for dropping
345  * from the queue.
346  *
347  * The decision whether to drop the packet or not is made based
348  * on the queueing delay target of 5ms and on the current queue
349  * length in bytes which shouldn't be less than the amount of data
350  * that arrives in a typical interarrival time (MTU-sized packets
351  * arriving spaced by the amount of time it takes to send such a
352  * packet on the bottleneck).
353  */
354 static inline struct mbuf *
355 codel_next_packet(struct codel *cd, struct codel_params *cp, int64_t now,
356     int *drop)
357 {
358 	struct mbuf *m;
359 
360 	*drop = 0;
361 
362 	m = MBUF_LIST_FIRST(&cd->q);
363 	if (m == NULL) {
364 		KASSERT(cd->backlog == 0);
365 		/* Empty queue, reset interval */
366 		cd->start = 0;
367 		return (NULL);
368 	}
369 
370 	if (now - m->m_pkthdr.ph_timestamp < cp->target ||
371 	    cd->backlog <= cp->quantum) {
372 		/*
373 		 * The minimum delay decreased below the target, reset
374 		 * the current observation interval.
375 		 */
376 		cd->start = 0;
377 		return (m);
378 	}
379 
380 	if (cd->start == 0) {
381 		/*
382 		 * This is the first packet to be delayed for more than
383 		 * the target, start the first observation interval after
384 		 * a single RTT and see if the minimum delay goes below
385 		 * the target within the interval, otherwise punish the
386 		 * next packet.
387 		 */
388 		cd->start = now + cp->interval;
389 	} else if (now > cd->start) {
390 		*drop = 1;
391 	}
392 	return (m);
393 }
394 
395 enum { INITIAL, ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
396 
397 static inline int
398 codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop,
399     int state)
400 {
401 	if (state == FIRSTDROP)
402 		return (ACCEPTING);
403 
404 	if (cd->dropping) {
405 		if (!drop)
406 			return (RECOVERY);
407 		else if (now >= cd->next)
408 			return (state == DROPPING ? CONTROL : DROPPING);
409 	} else if (drop)
410 		return (FIRSTDROP);
411 
412 	if (m == NULL)
413 		return (RECOVERY);
414 
415 	return (ACCEPTING);
416 }
417 
418 struct mbuf *
419 codel_dequeue(struct codel *cd, struct codel_params *cp, int64_t now,
420     struct mbuf_list *free_ml, uint64_t *dpkts, uint64_t *dbytes)
421 {
422 	struct mbuf *m;
423 	unsigned short delta;
424 	int drop, state, done = 0;
425 
426 	state = INITIAL;
427 
428 	while (!done) {
429 		m = codel_next_packet(cd, cp, now, &drop);
430 		state = codel_state_change(cd, now, m, drop, state);
431 
432 		switch (state) {
433 		case FIRSTDROP:
434 			m = codel_commit(cd, m);
435 			ml_enqueue(free_ml, m);
436 
437 			*dpkts += 1;
438 			*dbytes += m->m_pkthdr.len;
439 
440 			cd->dropping = 1;
441 
442 			/*
443 			 * If we're still within the grace period and not
444 			 * meeting our minimal delay target we treat this
445 			 * as a continuation of the previous observation
446 			 * interval and shrink it further.  Otherwise we
447 			 * start from the initial one.
448 			 */
449 			delta = cd->drops - cd->ldrops;
450 			if (delta > 1) {
451 				if (now < cd->next ||
452 				    now - cd->next < codel_grace)
453 					cd->drops = delta;
454 				else
455 					cd->drops = 1;
456 			} else
457 				cd->drops = 1;
458 			control_law(cd, cp, now);
459 			cd->ldrops = cd->drops;
460 
461 			/* fetches the next packet and goes to ACCEPTING */
462 			break;
463 		case DROPPING:
464 			m = codel_commit(cd, m);
465 			ml_enqueue(free_ml, m);
466 			cd->drops++;
467 
468 			*dpkts += 1;
469 			*dbytes += m->m_pkthdr.len;
470 
471 			/* fetches the next packet and goes to CONTROL */
472 			break;
473 		case CONTROL:
474 			if (drop) {
475 				control_law(cd, cp, cd->next);
476 				continue;
477 			}
478 			/* FALLTHROUGH */
479 		case RECOVERY:
480 			cd->dropping = 0;
481 			/* FALLTHROUGH */
482 		case ACCEPTING:
483 			done = 1;
484 			break;
485 		}
486 	}
487 
488 	if (m != NULL)
489 		cd->delay = now - m->m_pkthdr.ph_timestamp;
490 
491 	return (m);
492 }
493 
494 struct mbuf *
495 codel_commit(struct codel *cd, struct mbuf *m)
496 {
497 	struct mbuf *n;
498 
499 	n = ml_dequeue(&cd->q);
500 	if (m)
501 		KASSERT(n == m);
502 	KASSERT(n != NULL);
503 	KASSERT(cd->backlog >= n->m_pkthdr.len);
504 	cd->backlog -= n->m_pkthdr.len;
505 	return (n);
506 }
507 
508 void
509 codel_purge(struct codel *cd, struct mbuf_list *ml)
510 {
511 	ml_enlist(ml, &cd->q);
512 	cd->backlog = 0;
513 }
514 
515 /*
516  * FQ-CoDel implementation
517  */
518 
519 static inline struct flow *
520 classify_flow(struct fqcodel *fqc, struct mbuf *m)
521 {
522 	unsigned int index;
523 
524 	if (m->m_pkthdr.ph_flowid & M_FLOWID_VALID)
525 		index = (m->m_pkthdr.ph_flowid & M_FLOWID_MASK) % fqc->nflows;
526 	else
527 		index = arc4random_uniform(fqc->nflows);
528 
529 	DPRINTF("%s: %u\n", __func__, index);
530 
531 	return (&fqc->flows[index]);
532 }
533 
534 struct mbuf *
535 fqcodel_enq(struct fqcodel *fqc, struct mbuf *m)
536 {
537 	struct flow *flow;
538 	unsigned int backlog = 0;
539 	int64_t now;
540 	int i;
541 
542 	flow = classify_flow(fqc, m);
543 	if (flow == NULL)
544 		return (m);
545 
546 	codel_gettime(&now);
547 	codel_enqueue(&flow->cd, now, m);
548 	fqc->qlength++;
549 
550 	if (!flow->active) {
551 		SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry);
552 		flow->deficit = fqc->quantum;
553 		flow->active = 1;
554 		DPRINTF("%s: flow %u active deficit %d\n", __func__,
555 		    flow->id, flow->deficit);
556 	}
557 
558 	/*
559 	 * Check the limit for all queues and remove a packet
560 	 * from the longest one.
561 	 */
562 	if (fqc->qlength >= fqcodel_qlimit) {
563 		for (i = 0; i < fqc->nflows; i++) {
564 			if (codel_backlog(&fqc->flows[i].cd) > backlog) {
565 				flow = &fqc->flows[i];
566 				backlog = codel_backlog(&flow->cd);
567 			}
568 		}
569 
570 		KASSERT(flow != NULL);
571 		m = codel_commit(&flow->cd, NULL);
572 
573 		fqc->drop_cnt.packets++;
574 		fqc->drop_cnt.bytes += m->m_pkthdr.len;
575 
576 		fqc->qlength--;
577 
578 		DPRINTF("%s: dropping from flow %u\n", __func__,
579 		    flow->id);
580 		return (m);
581 	}
582 
583 	return (NULL);
584 }
585 
586 static inline struct flowq *
587 select_queue(struct fqcodel *fqc)
588 {
589 	struct flowq *fq = NULL;
590 
591 	if (!SIMPLEQ_EMPTY(&fqc->newq))
592 		fq = &fqc->newq;
593 	else if (!SIMPLEQ_EMPTY(&fqc->oldq))
594 		fq = &fqc->oldq;
595 	return (fq);
596 }
597 
598 static inline struct flow *
599 first_flow(struct fqcodel *fqc, struct flowq **fq)
600 {
601 	struct flow *flow;
602 
603 	while ((*fq = select_queue(fqc)) != NULL) {
604 		while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) {
605 			if (flow->deficit <= 0) {
606 				flow->deficit += fqc->quantum;
607 				SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
608 				SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow,
609 				    flowentry);
610 				DPRINTF("%s: flow %u deficit %d\n", __func__,
611 				    flow->id, flow->deficit);
612 			} else
613 				return (flow);
614 		}
615 	}
616 
617 	return (NULL);
618 }
619 
620 static inline struct flow *
621 next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq)
622 {
623 	SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
624 
625 	if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) {
626 		/* A packet was dropped, starve the queue */
627 		SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry);
628 		DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__,
629 		    flow->id, flow->deficit);
630 	} else {
631 		/* A packet was dropped on a starved queue, disable it */
632 		flow->active = 0;
633 		DPRINTF("%s: flow %u inactive deficit %d\n", __func__,
634 		    flow->id, flow->deficit);
635 	}
636 
637 	return (first_flow(fqc, fq));
638 }
639 
640 struct mbuf *
641 fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep,
642     struct mbuf_list *free_ml)
643 {
644 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
645 	struct flowq *fq;
646 	struct flow *flow;
647 	struct mbuf *m;
648 	int64_t now;
649 
650 	if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0)
651 		fqc->quantum = fqc->ifp->if_mtu + max_linkhdr;
652 
653 	codel_gettime(&now);
654 
655 	for (flow = first_flow(fqc, &fq); flow != NULL;
656 	     flow = next_flow(fqc, flow, &fq)) {
657 		m = codel_dequeue(&flow->cd, &fqc->cparams, now, &ml,
658 		    &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes);
659 
660 		KASSERT(fqc->qlength >= ml_len(&ml));
661 		fqc->qlength -= ml_len(&ml);
662 
663 		ml_enlist(free_ml, &ml);
664 
665 		if (m != NULL) {
666 			flow->deficit -= m->m_pkthdr.len;
667 			DPRINTF("%s: flow %u deficit %d\n", __func__,
668 			    flow->id, flow->deficit);
669 			*cookiep = flow;
670 			return (m);
671 		}
672 	}
673 
674 	return (NULL);
675 }
676 
677 void
678 fqcodel_deq_commit(struct fqcodel *fqc, struct mbuf *m, void *cookie)
679 {
680 	struct flow *flow = cookie;
681 
682 	KASSERT(fqc->qlength > 0);
683 	fqc->qlength--;
684 
685 	fqc->xmit_cnt.packets++;
686 	fqc->xmit_cnt.bytes += m->m_pkthdr.len;
687 
688 	(void)codel_commit(&flow->cd, m);
689 }
690 
691 void
692 fqcodel_purge(struct fqcodel *fqc, struct mbuf_list *ml)
693 {
694 	unsigned int i;
695 
696 	for (i = 0; i < fqc->nflows; i++)
697 		codel_purge(&fqc->flows[i].cd, ml);
698 	fqc->qlength = 0;
699 }
700 
701 struct mbuf *
702 fqcodel_if_enq(struct ifqueue *ifq, struct mbuf *m)
703 {
704 	return fqcodel_enq(ifq->ifq_q, m);
705 }
706 
707 struct mbuf *
708 fqcodel_if_deq_begin(struct ifqueue *ifq, void **cookiep)
709 {
710 	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
711 	struct mbuf *m;
712 
713 	m = fqcodel_deq_begin(ifq->ifq_q, cookiep, &free_ml);
714 	ifq_mfreeml(ifq, &free_ml);
715 	return (m);
716 }
717 
718 void
719 fqcodel_if_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
720 {
721 	return fqcodel_deq_commit(ifq->ifq_q, m, cookie);
722 }
723 
724 void
725 fqcodel_if_purge(struct ifqueue *ifq, struct mbuf_list *ml)
726 {
727 	return fqcodel_purge(ifq->ifq_q, ml);
728 }
729 
730 void *
731 fqcodel_pf_alloc(struct ifnet *ifp)
732 {
733 	struct fqcodel *fqc;
734 
735 	fqc = malloc(sizeof(struct fqcodel), M_DEVBUF, M_WAITOK | M_ZERO);
736 
737 	SIMPLEQ_INIT(&fqc->newq);
738 	SIMPLEQ_INIT(&fqc->oldq);
739 
740 	return (fqc);
741 }
742 
743 int
744 fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs)
745 {
746 	struct ifnet *ifp = qs->kif->pfik_ifp;
747 	struct fqcodel *fqc = arg;
748 
749 	if (qs->flowqueue.flows == 0 || qs->flowqueue.flows > M_FLOWID_MASK)
750 		return (EINVAL);
751 
752 	fqc->nflows = qs->flowqueue.flows;
753 	fqc->quantum = qs->flowqueue.quantum;
754 	if (qs->qlimit > 0)
755 		fqc->qlimit = qs->qlimit;
756 	else
757 		fqc->qlimit = fqcodel_qlimit;
758 	if (fqc->quantum > 0)
759 		fqc->flags |= FQCF_FIXED_QUANTUM;
760 	else
761 		fqc->quantum = ifp->if_mtu + max_linkhdr;
762 
763 	codel_initparams(&fqc->cparams, qs->flowqueue.target,
764 	    qs->flowqueue.interval, fqc->quantum);
765 
766 	fqc->flows = mallocarray(fqc->nflows, sizeof(struct flow),
767 	    M_DEVBUF, M_WAITOK | M_ZERO);
768 
769 #ifdef FQCODEL_DEBUG
770 	{
771 		unsigned int i;
772 
773 		for (i = 0; i < fqc->nflows; i++)
774 			fqc->flows[i].id = i;
775 	}
776 #endif
777 
778 	fqc->ifp = ifp;
779 
780 	DPRINTF("fq-codel on %s: %d queues %d deep, quantum %d target %llums "
781 	    "interval %llums\n", ifp->if_xname, fqc->nflows, fqc->qlimit,
782 	    fqc->quantum, fqc->cparams.target / 1000000,
783 	    fqc->cparams.interval / 1000000);
784 
785 	return (0);
786 }
787 
788 void
789 fqcodel_pf_free(void *arg)
790 {
791 	struct fqcodel *fqc = arg;
792 
793 	codel_freeparams(&fqc->cparams);
794 	free(fqc->flows, M_DEVBUF, fqc->nflows * sizeof(struct flow));
795 	free(fqc, M_DEVBUF, sizeof(struct fqcodel));
796 }
797 
798 int
799 fqcodel_pf_qstats(struct pf_queuespec *qs, void *ubuf, int *nbytes)
800 {
801 	struct ifnet *ifp = qs->kif->pfik_ifp;
802 	struct fqcodel_stats stats;
803 	struct fqcodel *fqc;
804 	int64_t delay;
805 	unsigned int i;
806 	int error = 0;
807 
808 	if (ifp == NULL)
809 		return (EBADF);
810 
811 	if (*nbytes < sizeof(stats))
812 		return (EINVAL);
813 
814 	memset(&stats, 0, sizeof(stats));
815 
816 	/* XXX: multi-q? */
817 	fqc = ifq_q_enter(&ifp->if_snd, ifq_fqcodel_ops);
818 	if (fqc == NULL)
819 		return (EBADF);
820 
821 	stats.xmit_cnt = fqc->xmit_cnt;
822 	stats.drop_cnt = fqc->drop_cnt;
823 
824 	stats.qlength = ifq_len(&ifp->if_snd);
825 	stats.qlimit = fqc->qlimit;
826 
827 	stats.flows = 0;
828 	stats.delaysum = stats.delaysumsq = 0;
829 
830 	for (i = 0; i < fqc->nflows; i++) {
831 		if (codel_qlength(&fqc->flows[i].cd) == 0)
832 			continue;
833 		/* Scale down to microseconds to avoid overflows */
834 		delay = codel_delay(&fqc->flows[i].cd) / 1000;
835 		stats.delaysum += delay;
836 		stats.delaysumsq += delay * delay;
837 		stats.flows++;
838 	}
839 
840 	ifq_q_leave(&ifp->if_snd, fqc);
841 
842 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
843 		return (error);
844 
845 	*nbytes = sizeof(stats);
846 	return (0);
847 }
848 
849 unsigned int
850 fqcodel_pf_qlength(void *fqc)
851 {
852 	return ((struct fqcodel *)fqc)->qlength;
853 }
854 
855 struct mbuf *
856 fqcodel_pf_enqueue(void *fqc, struct mbuf *m)
857 {
858 	return fqcodel_enq(fqc, m);
859 }
860 
861 struct mbuf *
862 fqcodel_pf_deq_begin(void *fqc, void **cookiep, struct mbuf_list *free_ml)
863 {
864 	return fqcodel_deq_begin(fqc, cookiep, free_ml);
865 }
866 
867 void
868 fqcodel_pf_deq_commit(void *fqc, struct mbuf *m, void *cookie)
869 {
870 	return fqcodel_deq_commit(fqc, m, cookie);
871 }
872 
873 void
874 fqcodel_pf_purge(void *fqc, struct mbuf_list *ml)
875 {
876 	return fqcodel_purge(fqc, ml);
877 }
878 
879 unsigned int
880 fqcodel_idx(unsigned int nqueues, const struct mbuf *m)
881 {
882 	return (0);
883 }
884 
885 void *
886 fqcodel_alloc(unsigned int idx, void *arg)
887 {
888 	/* Allocation is done in fqcodel_pf_alloc */
889 	return (arg);
890 }
891 
892 void
893 fqcodel_free(unsigned int idx, void *arg)
894 {
895 	/* nothing to do here */
896 }
897