xref: /openbsd-src/sys/net/hfsc.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: hfsc.c,v 1.33 2016/09/15 02:00:18 dlg Exp $	*/
2 
3 /*
4  * Copyright (c) 2012-2013 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
6  *
7  * Permission to use, copy, modify, and distribute this software and
8  * its documentation is hereby granted (including for commercial or
9  * for-profit use), provided that both the copyright notice and this
10  * permission notice appear in all copies of the software, derivative
11  * works, or modified versions, and any portions thereof.
12  *
13  * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
14  * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
15  * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
16  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  *
28  * Carnegie Mellon encourages (but does not require) users of this
29  * software to return any improvements or extensions that they make,
30  * and to grant Carnegie Mellon the rights to redistribute these
31  * changes without encumbrance.
32  */
33 /*
34  * H-FSC is described in Proceedings of SIGCOMM'97,
35  * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
36  * Real-Time and Priority Service"
37  * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
38  *
39  * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
40  * when a class has an upperlimit, the fit-time is computed from the
41  * upperlimit service curve.  the link-sharing scheduler does not schedule
42  * a class whose fit-time exceeds the current time.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/malloc.h>
47 #include <sys/pool.h>
48 #include <sys/mbuf.h>
49 #include <sys/socket.h>
50 #include <sys/systm.h>
51 #include <sys/errno.h>
52 #include <sys/queue.h>
53 #include <sys/kernel.h>
54 #include <sys/timeout.h>
55 
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <netinet/in.h>
59 
60 #include <net/pfvar.h>
61 #include <net/hfsc.h>
62 
63 /* need to provide dummies for hfsc-less kernels to reduce the if.h horror */
64 #include "pf.h"
65 #if NPF > 0
66 /*
67  * kernel internal service curve representation
68  *	coordinates are given by 64 bit unsigned integers.
69  *	x-axis: unit is clock count.  for the intel x86 architecture,
70  *		the raw Pentium TSC (Timestamp Counter) value is used.
71  *		virtual time is also calculated in this time scale.
72  *	y-axis: unit is byte.
73  *
74  *	the service curve parameters are converted to the internal
75  *	representation.
76  *	the slope values are scaled to avoid overflow.
77  *	the inverse slope values as well as the y-projection of the 1st
78  *	segment are kept in order to to avoid 64-bit divide operations
79  *	that are expensive on 32-bit architectures.
80  *
81  *  note: Intel Pentium TSC never wraps around in several thousands of years.
82  *	x-axis doesn't wrap around for 1089 years with 1GHz clock.
83  *      y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
84  */
85 
86 /* kernel internal representation of a service curve */
87 struct hfsc_internal_sc {
88 	u_int64_t	sm1;	/* scaled slope of the 1st segment */
89 	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
90 	u_int64_t	dx;	/* the x-projection of the 1st segment */
91 	u_int64_t	dy;	/* the y-projection of the 1st segment */
92 	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
93 	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
94 };
95 
96 /* runtime service curve */
97 struct hfsc_runtime_sc {
98 	u_int64_t	x;	/* current starting position on x-axis */
99 	u_int64_t	y;	/* current starting position on x-axis */
100 	u_int64_t	sm1;	/* scaled slope of the 1st segment */
101 	u_int64_t	ism1;	/* scaled inverse-slope of the 1st segment */
102 	u_int64_t	dx;	/* the x-projection of the 1st segment */
103 	u_int64_t	dy;	/* the y-projection of the 1st segment */
104 	u_int64_t	sm2;	/* scaled slope of the 2nd segment */
105 	u_int64_t	ism2;	/* scaled inverse-slope of the 2nd segment */
106 };
107 
108 struct hfsc_classq {
109 	struct mbuf_list q;	 /* Queue of packets */
110 	int		 qlimit; /* Queue limit */
111 };
112 
113 /* for TAILQ based ellist and actlist implementation */
114 struct hfsc_class;
115 TAILQ_HEAD(hfsc_eligible, hfsc_class);
116 TAILQ_HEAD(hfsc_active, hfsc_class);
117 #define	hfsc_actlist_last(s)		TAILQ_LAST(s, hfsc_active)
118 
119 struct hfsc_class {
120 	u_int		cl_id;		/* class id (just for debug) */
121 	u_int32_t	cl_handle;	/* class handle */
122 	int		cl_flags;	/* misc flags */
123 
124 	struct hfsc_class *cl_parent;	/* parent class */
125 	struct hfsc_class *cl_siblings;	/* sibling classes */
126 	struct hfsc_class *cl_children;	/* child classes */
127 
128 	struct hfsc_classq cl_q;	/* class queue structure */
129 /*	struct red	*cl_red;*/	/* RED state */
130 	struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
131 
132 	u_int64_t	cl_total;	/* total work in bytes */
133 	u_int64_t	cl_cumul;	/* cumulative work in bytes
134 					   done by real-time criteria */
135 	u_int64_t	cl_d;		/* deadline */
136 	u_int64_t	cl_e;		/* eligible time */
137 	u_int64_t	cl_vt;		/* virtual time */
138 	u_int64_t	cl_f;		/* time when this class will fit for
139 					   link-sharing, max(myf, cfmin) */
140 	u_int64_t	cl_myf;		/* my fit-time (as calculated from this
141 					   class's own upperlimit curve) */
142 	u_int64_t	cl_myfadj;	/* my fit-time adjustment
143 					   (to cancel history dependence) */
144 	u_int64_t	cl_cfmin;	/* earliest children's fit-time (used
145 					   with cl_myf to obtain cl_f) */
146 	u_int64_t	cl_cvtmin;	/* minimal virtual time among the
147 					   children fit for link-sharing
148 					   (monotonic within a period) */
149 	u_int64_t	cl_vtadj;	/* intra-period cumulative vt
150 					   adjustment */
151 	u_int64_t	cl_vtoff;	/* inter-period cumulative vt offset */
152 	u_int64_t	cl_cvtmax;	/* max child's vt in the last period */
153 
154 	u_int64_t	cl_initvt;	/* init virtual time (for debugging) */
155 
156 	struct hfsc_internal_sc *cl_rsc; /* internal real-time service curve */
157 	struct hfsc_internal_sc *cl_fsc; /* internal fair service curve */
158 	struct hfsc_internal_sc *cl_usc; /* internal upperlimit service curve */
159 	struct hfsc_runtime_sc   cl_deadline; /* deadline curve */
160 	struct hfsc_runtime_sc   cl_eligible; /* eligible curve */
161 	struct hfsc_runtime_sc   cl_virtual;  /* virtual curve */
162 	struct hfsc_runtime_sc   cl_ulimit;   /* upperlimit curve */
163 
164 	u_int		cl_vtperiod;	/* vt period sequence no */
165 	u_int		cl_parentperiod;  /* parent's vt period seqno */
166 	int		cl_nactive;	/* number of active children */
167 	struct hfsc_active	cl_actc; /* active children list */
168 
169 	TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */
170 	TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */
171 
172 	struct {
173 		struct hfsc_pktcntr xmit_cnt;
174 		struct hfsc_pktcntr drop_cnt;
175 		u_int period;
176 	} cl_stats;
177 };
178 
179 /*
180  * hfsc interface state
181  */
182 struct hfsc_if {
183 	struct hfsc_if		*hif_next;	/* interface state list */
184 	struct hfsc_class	*hif_rootclass;		/* root class */
185 	struct hfsc_class	*hif_defaultclass;	/* default class */
186 	struct hfsc_class	**hif_class_tbl;
187 
188 	u_int64_t		hif_microtime;	/* time at deq_begin */
189 
190 	u_int	hif_allocated;			/* # of slots in hif_class_tbl */
191 	u_int	hif_classes;			/* # of classes in the tree */
192 	u_int	hif_classid;			/* class id sequence number */
193 
194 	struct hfsc_eligible hif_eligible;	/* eligible list */
195 	struct timeout hif_defer;	/* for queues that weren't ready */
196 };
197 
198 /*
199  * function prototypes
200  */
201 struct hfsc_class	*hfsc_class_create(struct hfsc_if *,
202 			    struct hfsc_sc *, struct hfsc_sc *,
203 			    struct hfsc_sc *, struct hfsc_class *, int,
204 			    int, int);
205 int			 hfsc_class_destroy(struct hfsc_if *,
206 			    struct hfsc_class *);
207 struct hfsc_class	*hfsc_nextclass(struct hfsc_class *);
208 
209 void		 hfsc_cl_purge(struct hfsc_if *, struct hfsc_class *,
210 		     struct mbuf_list *);
211 
212 void		 hfsc_deferred(void *);
213 void		 hfsc_update_cfmin(struct hfsc_class *);
214 void		 hfsc_set_active(struct hfsc_if *, struct hfsc_class *, int);
215 void		 hfsc_set_passive(struct hfsc_if *, struct hfsc_class *);
216 void		 hfsc_init_ed(struct hfsc_if *, struct hfsc_class *, int);
217 void		 hfsc_update_ed(struct hfsc_if *, struct hfsc_class *, int);
218 void		 hfsc_update_d(struct hfsc_class *, int);
219 void		 hfsc_init_vf(struct hfsc_class *, int);
220 void		 hfsc_update_vf(struct hfsc_class *, int, u_int64_t);
221 void		 hfsc_ellist_insert(struct hfsc_if *, struct hfsc_class *);
222 void		 hfsc_ellist_remove(struct hfsc_if *, struct hfsc_class *);
223 void		 hfsc_ellist_update(struct hfsc_if *, struct hfsc_class *);
224 struct hfsc_class	*hfsc_ellist_get_mindl(struct hfsc_if *, u_int64_t);
225 void		 hfsc_actlist_insert(struct hfsc_class *);
226 void		 hfsc_actlist_remove(struct hfsc_class *);
227 void		 hfsc_actlist_update(struct hfsc_class *);
228 
229 struct hfsc_class	*hfsc_actlist_firstfit(struct hfsc_class *,
230 				    u_int64_t);
231 
232 static __inline u_int64_t	seg_x2y(u_int64_t, u_int64_t);
233 static __inline u_int64_t	seg_y2x(u_int64_t, u_int64_t);
234 static __inline u_int64_t	m2sm(u_int);
235 static __inline u_int64_t	m2ism(u_int);
236 static __inline u_int64_t	d2dx(u_int);
237 static __inline u_int		sm2m(u_int64_t);
238 static __inline u_int		dx2d(u_int64_t);
239 
240 void		hfsc_sc2isc(struct hfsc_sc *, struct hfsc_internal_sc *);
241 void		hfsc_rtsc_init(struct hfsc_runtime_sc *,
242 		    struct hfsc_internal_sc *, u_int64_t, u_int64_t);
243 u_int64_t	hfsc_rtsc_y2x(struct hfsc_runtime_sc *, u_int64_t);
244 u_int64_t	hfsc_rtsc_x2y(struct hfsc_runtime_sc *, u_int64_t);
245 void		hfsc_rtsc_min(struct hfsc_runtime_sc *,
246 		    struct hfsc_internal_sc *, u_int64_t, u_int64_t);
247 
248 void		hfsc_getclstats(struct hfsc_class_stats *, struct hfsc_class *);
249 struct hfsc_class	*hfsc_clh2cph(struct hfsc_if *, u_int32_t);
250 
251 #define	HFSC_CLK_SHIFT		8
252 #define	HFSC_FREQ		(1000000 << HFSC_CLK_SHIFT)
253 #define	HFSC_CLK_PER_TICK	(HFSC_FREQ / hz)
254 #define	HFSC_HT_INFINITY	0xffffffffffffffffLL /* infinite time value */
255 
256 struct pool	hfsc_class_pl, hfsc_internal_sc_pl;
257 
258 /*
259  * ifqueue glue.
260  */
261 
262 void		*hfsc_alloc(void *);
263 void		 hfsc_free(void *);
264 int		 hfsc_enq(struct ifqueue *, struct mbuf *);
265 struct mbuf	*hfsc_deq_begin(struct ifqueue *, void **);
266 void		 hfsc_deq_commit(struct ifqueue *, struct mbuf *, void *);
267 void		 hfsc_purge(struct ifqueue *, struct mbuf_list *);
268 
269 const struct ifq_ops hfsc_ops = {
270         hfsc_alloc,
271         hfsc_free,
272         hfsc_enq,
273         hfsc_deq_begin,
274         hfsc_deq_commit,
275         hfsc_purge,
276 };
277 
278 const struct ifq_ops * const ifq_hfsc_ops = &hfsc_ops;
279 
280 u_int64_t
281 hfsc_microuptime(void)
282 {
283 	struct timeval tv;
284 
285 	microuptime(&tv);
286 	return (((u_int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec) <<
287 	    HFSC_CLK_SHIFT);
288 }
289 
290 static inline u_int
291 hfsc_more_slots(u_int current)
292 {
293 	u_int want = current * 2;
294 
295 	return (want > HFSC_MAX_CLASSES ? HFSC_MAX_CLASSES : want);
296 }
297 
298 static void
299 hfsc_grow_class_tbl(struct hfsc_if *hif, u_int howmany)
300 {
301 	struct hfsc_class **newtbl, **old;
302 	size_t oldlen = sizeof(void *) * hif->hif_allocated;
303 
304 	newtbl = mallocarray(howmany, sizeof(void *), M_DEVBUF,
305 	    M_WAITOK | M_ZERO);
306 	old = hif->hif_class_tbl;
307 
308 	memcpy(newtbl, old, oldlen);
309 	hif->hif_class_tbl = newtbl;
310 	hif->hif_allocated = howmany;
311 
312 	free(old, M_DEVBUF, oldlen);
313 }
314 
315 void
316 hfsc_initialize(void)
317 {
318 	pool_init(&hfsc_class_pl, sizeof(struct hfsc_class), 0,
319 	    IPL_NONE, PR_WAITOK, "hfscclass", NULL);
320 	pool_init(&hfsc_internal_sc_pl, sizeof(struct hfsc_internal_sc), 0,
321 	    IPL_NONE, PR_WAITOK, "hfscintsc", NULL);
322 }
323 
324 struct hfsc_if *
325 hfsc_pf_alloc(struct ifnet *ifp)
326 {
327 	struct hfsc_if *hif;
328 
329 	KASSERT(ifp != NULL);
330 
331 	hif = malloc(sizeof(*hif), M_DEVBUF, M_WAITOK | M_ZERO);
332 	TAILQ_INIT(&hif->hif_eligible);
333 	hif->hif_class_tbl = mallocarray(HFSC_DEFAULT_CLASSES, sizeof(void *),
334 	    M_DEVBUF, M_WAITOK | M_ZERO);
335 	hif->hif_allocated = HFSC_DEFAULT_CLASSES;
336 
337 	timeout_set(&hif->hif_defer, hfsc_deferred, ifp);
338 
339 	return (hif);
340 }
341 
342 int
343 hfsc_pf_addqueue(struct hfsc_if *hif, struct pf_queuespec *q)
344 {
345 	struct hfsc_class *cl, *parent;
346 	struct hfsc_sc rtsc, lssc, ulsc;
347 
348 	KASSERT(hif != NULL);
349 
350 	if (q->parent_qid == HFSC_NULLCLASS_HANDLE &&
351 	    hif->hif_rootclass == NULL)
352 		parent = NULL;
353 	else if ((parent = hfsc_clh2cph(hif, q->parent_qid)) == NULL)
354 		return (EINVAL);
355 
356 	if (q->qid == 0)
357 		return (EINVAL);
358 
359 	if (hfsc_clh2cph(hif, q->qid) != NULL)
360 		return (EBUSY);
361 
362 	rtsc.m1 = q->realtime.m1.absolute;
363 	rtsc.d  = q->realtime.d;
364 	rtsc.m2 = q->realtime.m2.absolute;
365 	lssc.m1 = q->linkshare.m1.absolute;
366 	lssc.d  = q->linkshare.d;
367 	lssc.m2 = q->linkshare.m2.absolute;
368 	ulsc.m1 = q->upperlimit.m1.absolute;
369 	ulsc.d  = q->upperlimit.d;
370 	ulsc.m2 = q->upperlimit.m2.absolute;
371 
372 	cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
373 	    parent, q->qlimit, q->flags, q->qid);
374 	if (cl == NULL)
375 		return (ENOMEM);
376 
377 	return (0);
378 }
379 
380 int
381 hfsc_pf_qstats(struct pf_queuespec *q, void *ubuf, int *nbytes)
382 {
383 	struct ifnet *ifp = q->kif->pfik_ifp;
384 	struct hfsc_if *hif;
385 	struct hfsc_class *cl;
386 	struct hfsc_class_stats stats;
387 	int error = 0;
388 
389 	if (ifp == NULL)
390 		return (EBADF);
391 
392 	if (*nbytes < sizeof(stats))
393 		return (EINVAL);
394 
395 	hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops);
396 	if (hif == NULL)
397 		return (EBADF);
398 
399 	if ((cl = hfsc_clh2cph(hif, q->qid)) == NULL) {
400 		ifq_q_leave(&ifp->if_snd, hif);
401 		return (EINVAL);
402 	}
403 
404 	hfsc_getclstats(&stats, cl);
405 	ifq_q_leave(&ifp->if_snd, hif);
406 
407 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
408 		return (error);
409 
410 	*nbytes = sizeof(stats);
411 	return (0);
412 }
413 
414 void
415 hfsc_pf_free(struct hfsc_if *hif)
416 {
417 	hfsc_free(hif);
418 }
419 
420 void *
421 hfsc_alloc(void *q)
422 {
423 	struct hfsc_if *hif = q;
424 	KASSERT(hif != NULL);
425 
426 	timeout_add(&hif->hif_defer, 1);
427 	return (hif);
428 }
429 
430 void
431 hfsc_free(void *q)
432 {
433 	struct hfsc_if *hif = q;
434 	int i;
435 
436 	KERNEL_ASSERT_LOCKED();
437 
438 	timeout_del(&hif->hif_defer);
439 
440 	i = hif->hif_allocated;
441 	do
442 		hfsc_class_destroy(hif, hif->hif_class_tbl[--i]);
443 	while (i > 0);
444 
445 	free(hif->hif_class_tbl, M_DEVBUF, hif->hif_allocated * sizeof(void *));
446 	free(hif, M_DEVBUF, sizeof(*hif));
447 }
448 
449 void
450 hfsc_purge(struct ifqueue *ifq, struct mbuf_list *ml)
451 {
452 	struct hfsc_if		*hif = ifq->ifq_q;
453 	struct hfsc_class	*cl;
454 
455 	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
456 		hfsc_cl_purge(hif, cl, ml);
457 }
458 
459 struct hfsc_class *
460 hfsc_class_create(struct hfsc_if *hif, struct hfsc_sc *rsc,
461     struct hfsc_sc *fsc, struct hfsc_sc *usc, struct hfsc_class *parent,
462     int qlimit, int flags, int qid)
463 {
464 	struct hfsc_class *cl, *p;
465 	int i, s;
466 
467 	if (qlimit == 0)
468 		qlimit = HFSC_DEFAULT_QLIMIT;
469 
470 	if (hif->hif_classes >= hif->hif_allocated) {
471 		u_int newslots = hfsc_more_slots(hif->hif_allocated);
472 
473 		if (newslots == hif->hif_allocated)
474 			return (NULL);
475 		hfsc_grow_class_tbl(hif, newslots);
476 	}
477 
478 	cl = pool_get(&hfsc_class_pl, PR_WAITOK | PR_ZERO);
479 	TAILQ_INIT(&cl->cl_actc);
480 
481 	ml_init(&cl->cl_q.q);
482 	cl->cl_q.qlimit = qlimit;
483 	cl->cl_flags = flags;
484 
485 	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
486 		cl->cl_rsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
487 		hfsc_sc2isc(rsc, cl->cl_rsc);
488 		hfsc_rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
489 		hfsc_rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
490 	}
491 	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
492 		cl->cl_fsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
493 		hfsc_sc2isc(fsc, cl->cl_fsc);
494 		hfsc_rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
495 	}
496 	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
497 		cl->cl_usc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
498 		hfsc_sc2isc(usc, cl->cl_usc);
499 		hfsc_rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
500 	}
501 
502 	cl->cl_id = hif->hif_classid++;
503 	cl->cl_handle = qid;
504 	cl->cl_parent = parent;
505 
506 	s = splnet();
507 	hif->hif_classes++;
508 
509 	/*
510 	 * find a free slot in the class table.  if the slot matching
511 	 * the lower bits of qid is free, use this slot.  otherwise,
512 	 * use the first free slot.
513 	 */
514 	i = qid % hif->hif_allocated;
515 	if (hif->hif_class_tbl[i] == NULL)
516 		hif->hif_class_tbl[i] = cl;
517 	else {
518 		for (i = 0; i < hif->hif_allocated; i++)
519 			if (hif->hif_class_tbl[i] == NULL) {
520 				hif->hif_class_tbl[i] = cl;
521 				break;
522 			}
523 		if (i == hif->hif_allocated) {
524 			splx(s);
525 			goto err_ret;
526 		}
527 	}
528 
529 	if (flags & HFSC_DEFAULTCLASS)
530 		hif->hif_defaultclass = cl;
531 
532 	if (parent == NULL)
533 		hif->hif_rootclass = cl;
534 	else {
535 		/* add this class to the children list of the parent */
536 		if ((p = parent->cl_children) == NULL)
537 			parent->cl_children = cl;
538 		else {
539 			while (p->cl_siblings != NULL)
540 				p = p->cl_siblings;
541 			p->cl_siblings = cl;
542 		}
543 	}
544 	splx(s);
545 
546 	return (cl);
547 
548 err_ret:
549 	if (cl->cl_fsc != NULL)
550 		pool_put(&hfsc_internal_sc_pl, cl->cl_fsc);
551 	if (cl->cl_rsc != NULL)
552 		pool_put(&hfsc_internal_sc_pl, cl->cl_rsc);
553 	if (cl->cl_usc != NULL)
554 		pool_put(&hfsc_internal_sc_pl, cl->cl_usc);
555 	pool_put(&hfsc_class_pl, cl);
556 	return (NULL);
557 }
558 
559 int
560 hfsc_class_destroy(struct hfsc_if *hif, struct hfsc_class *cl)
561 {
562 	int i, s;
563 
564 	if (cl == NULL)
565 		return (0);
566 
567 	if (cl->cl_children != NULL)
568 		return (EBUSY);
569 
570 	s = splnet();
571 	KASSERT(ml_empty(&cl->cl_q.q));
572 
573 	if (cl->cl_parent != NULL) {
574 		struct hfsc_class *p = cl->cl_parent->cl_children;
575 
576 		if (p == cl)
577 			cl->cl_parent->cl_children = cl->cl_siblings;
578 		else do {
579 			if (p->cl_siblings == cl) {
580 				p->cl_siblings = cl->cl_siblings;
581 				break;
582 			}
583 		} while ((p = p->cl_siblings) != NULL);
584 	}
585 
586 	for (i = 0; i < hif->hif_allocated; i++)
587 		if (hif->hif_class_tbl[i] == cl) {
588 			hif->hif_class_tbl[i] = NULL;
589 			break;
590 		}
591 
592 	hif->hif_classes--;
593 	splx(s);
594 
595 	KASSERT(TAILQ_EMPTY(&cl->cl_actc));
596 
597 	if (cl == hif->hif_rootclass)
598 		hif->hif_rootclass = NULL;
599 	if (cl == hif->hif_defaultclass)
600 		hif->hif_defaultclass = NULL;
601 
602 	if (cl->cl_usc != NULL)
603 		pool_put(&hfsc_internal_sc_pl, cl->cl_usc);
604 	if (cl->cl_fsc != NULL)
605 		pool_put(&hfsc_internal_sc_pl, cl->cl_fsc);
606 	if (cl->cl_rsc != NULL)
607 		pool_put(&hfsc_internal_sc_pl, cl->cl_rsc);
608 	pool_put(&hfsc_class_pl, cl);
609 
610 	return (0);
611 }
612 
613 /*
614  * hfsc_nextclass returns the next class in the tree.
615  *   usage:
616  *	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
617  *		do_something;
618  */
619 struct hfsc_class *
620 hfsc_nextclass(struct hfsc_class *cl)
621 {
622 	if (cl->cl_children != NULL)
623 		cl = cl->cl_children;
624 	else if (cl->cl_siblings != NULL)
625 		cl = cl->cl_siblings;
626 	else {
627 		while ((cl = cl->cl_parent) != NULL)
628 			if (cl->cl_siblings) {
629 				cl = cl->cl_siblings;
630 				break;
631 			}
632 	}
633 
634 	return (cl);
635 }
636 
637 int
638 hfsc_enq(struct ifqueue *ifq, struct mbuf *m)
639 {
640 	struct hfsc_if *hif = ifq->ifq_q;
641 	struct hfsc_class *cl;
642 
643 	if ((cl = hfsc_clh2cph(hif, m->m_pkthdr.pf.qid)) == NULL ||
644 	    cl->cl_children != NULL) {
645 		cl = hif->hif_defaultclass;
646 		if (cl == NULL)
647 			return (ENOBUFS);
648 		cl->cl_pktattr = NULL;
649 	}
650 
651 	if (ml_len(&cl->cl_q.q) >= cl->cl_q.qlimit) {
652 		/* drop occurred.  mbuf needs to be freed */
653 		PKTCNTR_INC(&cl->cl_stats.drop_cnt, m->m_pkthdr.len);
654 		return (ENOBUFS);
655 	}
656 
657 	ml_enqueue(&cl->cl_q.q, m);
658 	m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
659 
660 	/* successfully queued. */
661 	if (ml_len(&cl->cl_q.q) == 1)
662 		hfsc_set_active(hif, cl, m->m_pkthdr.len);
663 
664 	return (0);
665 }
666 
667 struct mbuf *
668 hfsc_deq_begin(struct ifqueue *ifq, void **cookiep)
669 {
670 	struct hfsc_if *hif = ifq->ifq_q;
671 	struct hfsc_class *cl, *tcl;
672 	struct mbuf *m;
673 	u_int64_t cur_time;
674 
675 	cur_time = hfsc_microuptime();
676 
677 	/*
678 	 * if there are eligible classes, use real-time criteria.
679 	 * find the class with the minimum deadline among
680 	 * the eligible classes.
681 	 */
682 	cl = hfsc_ellist_get_mindl(hif, cur_time);
683 	if (cl == NULL) {
684 		/*
685 		 * use link-sharing criteria
686 		 * get the class with the minimum vt in the hierarchy
687 		 */
688 		cl = NULL;
689 		tcl = hif->hif_rootclass;
690 
691 		while (tcl != NULL && tcl->cl_children != NULL) {
692 			tcl = hfsc_actlist_firstfit(tcl, cur_time);
693 			if (tcl == NULL)
694 				continue;
695 
696 			/*
697 			 * update parent's cl_cvtmin.
698 			 * don't update if the new vt is smaller.
699 			 */
700 			if (tcl->cl_parent->cl_cvtmin < tcl->cl_vt)
701 				tcl->cl_parent->cl_cvtmin = tcl->cl_vt;
702 
703 			cl = tcl;
704 		}
705 		/* XXX HRTIMER plan hfsc_deferred precisely here. */
706 		if (cl == NULL)
707 			return (NULL);
708 	}
709 
710 	m = MBUF_LIST_FIRST(&cl->cl_q.q);
711 	KASSERT(m != NULL);
712 
713 	hif->hif_microtime = cur_time;
714 	*cookiep = cl;
715 	return (m);
716 }
717 
718 void
719 hfsc_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
720 {
721 	struct hfsc_if *hif = ifq->ifq_q;
722 	struct hfsc_class *cl = cookie;
723 	struct mbuf *m0;
724 	int next_len, realtime = 0;
725 	u_int64_t cur_time = hif->hif_microtime;
726 
727 	/* check if the class was scheduled by real-time criteria */
728 	if (cl->cl_rsc != NULL)
729 		realtime = (cl->cl_e <= cur_time);
730 
731 	m0 = ml_dequeue(&cl->cl_q.q);
732 	KASSERT(m == m0);
733 
734 	PKTCNTR_INC(&cl->cl_stats.xmit_cnt, m->m_pkthdr.len);
735 
736 	hfsc_update_vf(cl, m->m_pkthdr.len, cur_time);
737 	if (realtime)
738 		cl->cl_cumul += m->m_pkthdr.len;
739 
740 	if (ml_len(&cl->cl_q.q) > 0) {
741 		if (cl->cl_rsc != NULL) {
742 			/* update ed */
743 			m0 = MBUF_LIST_FIRST(&cl->cl_q.q);
744 			next_len = m0->m_pkthdr.len;
745 
746 			if (realtime)
747 				hfsc_update_ed(hif, cl, next_len);
748 			else
749 				hfsc_update_d(cl, next_len);
750 		}
751 	} else {
752 		/* the class becomes passive */
753 		hfsc_set_passive(hif, cl);
754 	}
755 }
756 
757 void
758 hfsc_deferred(void *arg)
759 {
760 	struct ifnet *ifp = arg;
761 	struct hfsc_if *hif;
762 	int s;
763 
764 	KERNEL_ASSERT_LOCKED();
765 	KASSERT(HFSC_ENABLED(&ifp->if_snd));
766 
767 	s = splnet();
768 	if (!IFQ_IS_EMPTY(&ifp->if_snd))
769 		if_start(ifp);
770 	splx(s);
771 
772 	hif = ifp->if_snd.ifq_q;
773 
774 	/* XXX HRTIMER nearest virtual/fit time is likely less than 1/HZ. */
775 	timeout_add(&hif->hif_defer, 1);
776 }
777 
778 void
779 hfsc_cl_purge(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf_list *ml)
780 {
781 	struct mbuf *m;
782 
783 	if (ml_empty(&cl->cl_q.q))
784 		return;
785 
786 	MBUF_LIST_FOREACH(&cl->cl_q.q, m)
787 		PKTCNTR_INC(&cl->cl_stats.drop_cnt, m->m_pkthdr.len);
788 
789 	ml_enlist(ml, &cl->cl_q.q);
790 
791 	hfsc_update_vf(cl, 0, 0);	/* remove cl from the actlist */
792 	hfsc_set_passive(hif, cl);
793 }
794 
795 void
796 hfsc_set_active(struct hfsc_if *hif, struct hfsc_class *cl, int len)
797 {
798 	if (cl->cl_rsc != NULL)
799 		hfsc_init_ed(hif, cl, len);
800 	if (cl->cl_fsc != NULL)
801 		hfsc_init_vf(cl, len);
802 
803 	cl->cl_stats.period++;
804 }
805 
806 void
807 hfsc_set_passive(struct hfsc_if *hif, struct hfsc_class *cl)
808 {
809 	if (cl->cl_rsc != NULL)
810 		hfsc_ellist_remove(hif, cl);
811 
812 	/*
813 	 * actlist is handled in hfsc_update_vf() so that hfsc_update_vf(cl, 0,
814 	 * 0) needs to be called explicitly to remove a class from actlist
815 	 */
816 }
817 
818 void
819 hfsc_init_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len)
820 {
821 	u_int64_t cur_time;
822 
823 	cur_time = hfsc_microuptime();
824 
825 	/* update the deadline curve */
826 	hfsc_rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
827 
828 	/*
829 	 * update the eligible curve.
830 	 * for concave, it is equal to the deadline curve.
831 	 * for convex, it is a linear curve with slope m2.
832 	 */
833 	cl->cl_eligible = cl->cl_deadline;
834 	if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
835 		cl->cl_eligible.dx = 0;
836 		cl->cl_eligible.dy = 0;
837 	}
838 
839 	/* compute e and d */
840 	cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
841 	cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
842 
843 	hfsc_ellist_insert(hif, cl);
844 }
845 
846 void
847 hfsc_update_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len)
848 {
849 	cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
850 	cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
851 
852 	hfsc_ellist_update(hif, cl);
853 }
854 
855 void
856 hfsc_update_d(struct hfsc_class *cl, int next_len)
857 {
858 	cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
859 }
860 
861 void
862 hfsc_init_vf(struct hfsc_class *cl, int len)
863 {
864 	struct hfsc_class *max_cl, *p;
865 	u_int64_t vt, f, cur_time;
866 	int go_active;
867 
868 	cur_time = 0;
869 	go_active = 1;
870 	for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
871 		if (go_active && cl->cl_nactive++ == 0)
872 			go_active = 1;
873 		else
874 			go_active = 0;
875 
876 		if (go_active) {
877 			max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc,
878 			    hfsc_active);
879 			if (max_cl != NULL) {
880 				/*
881 				 * set vt to the average of the min and max
882 				 * classes.  if the parent's period didn't
883 				 * change, don't decrease vt of the class.
884 				 */
885 				vt = max_cl->cl_vt;
886 				if (cl->cl_parent->cl_cvtmin != 0)
887 					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
888 
889 				if (cl->cl_parent->cl_vtperiod !=
890 				    cl->cl_parentperiod || vt > cl->cl_vt)
891 					cl->cl_vt = vt;
892 			} else {
893 				/*
894 				 * first child for a new parent backlog period.
895 				 * add parent's cvtmax to vtoff of children
896 				 * to make a new vt (vtoff + vt) larger than
897 				 * the vt in the last period for all children.
898 				 */
899 				vt = cl->cl_parent->cl_cvtmax;
900 				for (p = cl->cl_parent->cl_children; p != NULL;
901 				     p = p->cl_siblings)
902 					p->cl_vtoff += vt;
903 				cl->cl_vt = 0;
904 				cl->cl_parent->cl_cvtmax = 0;
905 				cl->cl_parent->cl_cvtmin = 0;
906 			}
907 			cl->cl_initvt = cl->cl_vt;
908 
909 			/* update the virtual curve */
910 			vt = cl->cl_vt + cl->cl_vtoff;
911 			hfsc_rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt,
912 			    cl->cl_total);
913 			if (cl->cl_virtual.x == vt) {
914 				cl->cl_virtual.x -= cl->cl_vtoff;
915 				cl->cl_vtoff = 0;
916 			}
917 			cl->cl_vtadj = 0;
918 
919 			cl->cl_vtperiod++;  /* increment vt period */
920 			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
921 			if (cl->cl_parent->cl_nactive == 0)
922 				cl->cl_parentperiod++;
923 			cl->cl_f = 0;
924 
925 			hfsc_actlist_insert(cl);
926 
927 			if (cl->cl_usc != NULL) {
928 				/* class has upper limit curve */
929 				if (cur_time == 0)
930 					cur_time = hfsc_microuptime();
931 
932 				/* update the ulimit curve */
933 				hfsc_rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
934 				    cl->cl_total);
935 				/* compute myf */
936 				cl->cl_myf = hfsc_rtsc_y2x(&cl->cl_ulimit,
937 				    cl->cl_total);
938 				cl->cl_myfadj = 0;
939 			}
940 		}
941 
942 		if (cl->cl_myf > cl->cl_cfmin)
943 			f = cl->cl_myf;
944 		else
945 			f = cl->cl_cfmin;
946 		if (f != cl->cl_f) {
947 			cl->cl_f = f;
948 			hfsc_update_cfmin(cl->cl_parent);
949 		}
950 	}
951 }
952 
953 void
954 hfsc_update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
955 {
956 	u_int64_t f, myf_bound, delta;
957 	int go_passive;
958 
959 	go_passive = ml_empty(&cl->cl_q.q);
960 
961 	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
962 		cl->cl_total += len;
963 
964 		if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
965 			continue;
966 
967 		if (go_passive && --cl->cl_nactive == 0)
968 			go_passive = 1;
969 		else
970 			go_passive = 0;
971 
972 		if (go_passive) {
973 			/* no more active child, going passive */
974 
975 			/* update cvtmax of the parent class */
976 			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
977 				cl->cl_parent->cl_cvtmax = cl->cl_vt;
978 
979 			/* remove this class from the vt list */
980 			hfsc_actlist_remove(cl);
981 
982 			hfsc_update_cfmin(cl->cl_parent);
983 
984 			continue;
985 		}
986 
987 		/*
988 		 * update vt and f
989 		 */
990 		cl->cl_vt = hfsc_rtsc_y2x(&cl->cl_virtual, cl->cl_total)
991 		    - cl->cl_vtoff + cl->cl_vtadj;
992 
993 		/*
994 		 * if vt of the class is smaller than cvtmin,
995 		 * the class was skipped in the past due to non-fit.
996 		 * if so, we need to adjust vtadj.
997 		 */
998 		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
999 			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
1000 			cl->cl_vt = cl->cl_parent->cl_cvtmin;
1001 		}
1002 
1003 		/* update the vt list */
1004 		hfsc_actlist_update(cl);
1005 
1006 		if (cl->cl_usc != NULL) {
1007 			cl->cl_myf = cl->cl_myfadj +
1008 			    hfsc_rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
1009 
1010 			/*
1011 			 * if myf lags behind by more than one clock tick
1012 			 * from the current time, adjust myfadj to prevent
1013 			 * a rate-limited class from going greedy.
1014 			 * in a steady state under rate-limiting, myf
1015 			 * fluctuates within one clock tick.
1016 			 */
1017 			myf_bound = cur_time - HFSC_CLK_PER_TICK;
1018 			if (cl->cl_myf < myf_bound) {
1019 				delta = cur_time - cl->cl_myf;
1020 				cl->cl_myfadj += delta;
1021 				cl->cl_myf += delta;
1022 			}
1023 		}
1024 
1025 		/* cl_f is max(cl_myf, cl_cfmin) */
1026 		if (cl->cl_myf > cl->cl_cfmin)
1027 			f = cl->cl_myf;
1028 		else
1029 			f = cl->cl_cfmin;
1030 		if (f != cl->cl_f) {
1031 			cl->cl_f = f;
1032 			hfsc_update_cfmin(cl->cl_parent);
1033 		}
1034 	}
1035 }
1036 
1037 void
1038 hfsc_update_cfmin(struct hfsc_class *cl)
1039 {
1040 	struct hfsc_class *p;
1041 	u_int64_t cfmin;
1042 
1043 	if (TAILQ_EMPTY(&cl->cl_actc)) {
1044 		cl->cl_cfmin = 0;
1045 		return;
1046 	}
1047 	cfmin = HFSC_HT_INFINITY;
1048 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
1049 		if (p->cl_f == 0) {
1050 			cl->cl_cfmin = 0;
1051 			return;
1052 		}
1053 		if (p->cl_f < cfmin)
1054 			cfmin = p->cl_f;
1055 	}
1056 	cl->cl_cfmin = cfmin;
1057 }
1058 
1059 /*
1060  * eligible list holds backlogged classes being sorted by their eligible times.
1061  * there is one eligible list per interface.
1062  */
1063 void
1064 hfsc_ellist_insert(struct hfsc_if *hif, struct hfsc_class *cl)
1065 {
1066 	struct hfsc_class *p;
1067 
1068 	/* check the last entry first */
1069 	if ((p = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible)) == NULL ||
1070 	    p->cl_e <= cl->cl_e) {
1071 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
1072 		return;
1073 	}
1074 
1075 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
1076 		if (cl->cl_e < p->cl_e) {
1077 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
1078 			return;
1079 		}
1080 	}
1081 }
1082 
1083 void
1084 hfsc_ellist_remove(struct hfsc_if *hif, struct hfsc_class *cl)
1085 {
1086 	TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1087 }
1088 
1089 void
1090 hfsc_ellist_update(struct hfsc_if *hif, struct hfsc_class *cl)
1091 {
1092 	struct hfsc_class *p, *last;
1093 
1094 	/*
1095 	 * the eligible time of a class increases monotonically.
1096 	 * if the next entry has a larger eligible time, nothing to do.
1097 	 */
1098 	p = TAILQ_NEXT(cl, cl_ellist);
1099 	if (p == NULL || cl->cl_e <= p->cl_e)
1100 		return;
1101 
1102 	/* check the last entry */
1103 	last = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible);
1104 	if (last->cl_e <= cl->cl_e) {
1105 		TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1106 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
1107 		return;
1108 	}
1109 
1110 	/*
1111 	 * the new position must be between the next entry
1112 	 * and the last entry
1113 	 */
1114 	while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
1115 		if (cl->cl_e < p->cl_e) {
1116 			TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1117 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
1118 			return;
1119 		}
1120 	}
1121 }
1122 
1123 /* find the class with the minimum deadline among the eligible classes */
1124 struct hfsc_class *
1125 hfsc_ellist_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
1126 {
1127 	struct hfsc_class *p, *cl = NULL;
1128 
1129 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
1130 		if (p->cl_e > cur_time)
1131 			break;
1132 		if (cl == NULL || p->cl_d < cl->cl_d)
1133 			cl = p;
1134 	}
1135 	return (cl);
1136 }
1137 
1138 /*
1139  * active children list holds backlogged child classes being sorted
1140  * by their virtual time.
1141  * each intermediate class has one active children list.
1142  */
1143 void
1144 hfsc_actlist_insert(struct hfsc_class *cl)
1145 {
1146 	struct hfsc_class *p;
1147 
1148 	/* check the last entry first */
1149 	if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active)) == NULL
1150 	    || p->cl_vt <= cl->cl_vt) {
1151 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
1152 		return;
1153 	}
1154 
1155 	TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
1156 		if (cl->cl_vt < p->cl_vt) {
1157 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
1158 			return;
1159 		}
1160 	}
1161 }
1162 
1163 void
1164 hfsc_actlist_remove(struct hfsc_class *cl)
1165 {
1166 	TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1167 }
1168 
1169 void
1170 hfsc_actlist_update(struct hfsc_class *cl)
1171 {
1172 	struct hfsc_class *p, *last;
1173 
1174 	/*
1175 	 * the virtual time of a class increases monotonically during its
1176 	 * backlogged period.
1177 	 * if the next entry has a larger virtual time, nothing to do.
1178 	 */
1179 	p = TAILQ_NEXT(cl, cl_actlist);
1180 	if (p == NULL || cl->cl_vt < p->cl_vt)
1181 		return;
1182 
1183 	/* check the last entry */
1184 	last = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active);
1185 	if (last->cl_vt <= cl->cl_vt) {
1186 		TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1187 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
1188 		return;
1189 	}
1190 
1191 	/*
1192 	 * the new position must be between the next entry
1193 	 * and the last entry
1194 	 */
1195 	while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
1196 		if (cl->cl_vt < p->cl_vt) {
1197 			TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1198 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
1199 			return;
1200 		}
1201 	}
1202 }
1203 
1204 struct hfsc_class *
1205 hfsc_actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
1206 {
1207 	struct hfsc_class *p;
1208 
1209 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist)
1210 		if (p->cl_f <= cur_time)
1211 			return (p);
1212 
1213 	return (NULL);
1214 }
1215 
1216 /*
1217  * service curve support functions
1218  *
1219  *  external service curve parameters
1220  *	m: bits/sec
1221  *	d: msec
1222  *  internal service curve parameters
1223  *	sm: (bytes/tsc_interval) << SM_SHIFT
1224  *	ism: (tsc_count/byte) << ISM_SHIFT
1225  *	dx: tsc_count
1226  *
1227  * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
1228  * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
1229  * speed.  SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
1230  * digits in decimal using the following table.
1231  *
1232  *  bits/sec    100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
1233  *  ----------+-------------------------------------------------------
1234  *  bytes/nsec  12.5e-6    125e-6     1250e-6    12500e-6   125000e-6
1235  *  sm(500MHz)  25.0e-6    250e-6     2500e-6    25000e-6   250000e-6
1236  *  sm(200MHz)  62.5e-6    625e-6     6250e-6    62500e-6   625000e-6
1237  *
1238  *  nsec/byte   80000      8000       800        80         8
1239  *  ism(500MHz) 40000      4000       400        40         4
1240  *  ism(200MHz) 16000      1600       160        16         1.6
1241  */
1242 #define	SM_SHIFT	24
1243 #define	ISM_SHIFT	10
1244 
1245 #define	SM_MASK		((1LL << SM_SHIFT) - 1)
1246 #define	ISM_MASK	((1LL << ISM_SHIFT) - 1)
1247 
1248 static __inline u_int64_t
1249 seg_x2y(u_int64_t x, u_int64_t sm)
1250 {
1251 	u_int64_t y;
1252 
1253 	/*
1254 	 * compute
1255 	 *	y = x * sm >> SM_SHIFT
1256 	 * but divide it for the upper and lower bits to avoid overflow
1257 	 */
1258 	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
1259 	return (y);
1260 }
1261 
1262 static __inline u_int64_t
1263 seg_y2x(u_int64_t y, u_int64_t ism)
1264 {
1265 	u_int64_t x;
1266 
1267 	if (y == 0)
1268 		x = 0;
1269 	else if (ism == HFSC_HT_INFINITY)
1270 		x = HFSC_HT_INFINITY;
1271 	else {
1272 		x = (y >> ISM_SHIFT) * ism
1273 		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
1274 	}
1275 	return (x);
1276 }
1277 
1278 static __inline u_int64_t
1279 m2sm(u_int m)
1280 {
1281 	u_int64_t sm;
1282 
1283 	sm = ((u_int64_t)m << SM_SHIFT) / 8 / HFSC_FREQ;
1284 	return (sm);
1285 }
1286 
1287 static __inline u_int64_t
1288 m2ism(u_int m)
1289 {
1290 	u_int64_t ism;
1291 
1292 	if (m == 0)
1293 		ism = HFSC_HT_INFINITY;
1294 	else
1295 		ism = ((u_int64_t)HFSC_FREQ << ISM_SHIFT) * 8 / m;
1296 	return (ism);
1297 }
1298 
1299 static __inline u_int64_t
1300 d2dx(u_int d)
1301 {
1302 	u_int64_t dx;
1303 
1304 	dx = ((u_int64_t)d * HFSC_FREQ) / 1000;
1305 	return (dx);
1306 }
1307 
1308 static __inline u_int
1309 sm2m(u_int64_t sm)
1310 {
1311 	u_int64_t m;
1312 
1313 	m = (sm * 8 * HFSC_FREQ) >> SM_SHIFT;
1314 	return ((u_int)m);
1315 }
1316 
1317 static __inline u_int
1318 dx2d(u_int64_t dx)
1319 {
1320 	u_int64_t d;
1321 
1322 	d = dx * 1000 / HFSC_FREQ;
1323 	return ((u_int)d);
1324 }
1325 
1326 void
1327 hfsc_sc2isc(struct hfsc_sc *sc, struct hfsc_internal_sc *isc)
1328 {
1329 	isc->sm1 = m2sm(sc->m1);
1330 	isc->ism1 = m2ism(sc->m1);
1331 	isc->dx = d2dx(sc->d);
1332 	isc->dy = seg_x2y(isc->dx, isc->sm1);
1333 	isc->sm2 = m2sm(sc->m2);
1334 	isc->ism2 = m2ism(sc->m2);
1335 }
1336 
1337 /*
1338  * initialize the runtime service curve with the given internal
1339  * service curve starting at (x, y).
1340  */
1341 void
1342 hfsc_rtsc_init(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc * isc,
1343     u_int64_t x, u_int64_t y)
1344 {
1345 	rtsc->x =	x;
1346 	rtsc->y =	y;
1347 	rtsc->sm1 =	isc->sm1;
1348 	rtsc->ism1 =	isc->ism1;
1349 	rtsc->dx =	isc->dx;
1350 	rtsc->dy =	isc->dy;
1351 	rtsc->sm2 =	isc->sm2;
1352 	rtsc->ism2 =	isc->ism2;
1353 }
1354 
1355 /*
1356  * calculate the y-projection of the runtime service curve by the
1357  * given x-projection value
1358  */
1359 u_int64_t
1360 hfsc_rtsc_y2x(struct hfsc_runtime_sc *rtsc, u_int64_t y)
1361 {
1362 	u_int64_t x;
1363 
1364 	if (y < rtsc->y)
1365 		x = rtsc->x;
1366 	else if (y <= rtsc->y + rtsc->dy) {
1367 		/* x belongs to the 1st segment */
1368 		if (rtsc->dy == 0)
1369 			x = rtsc->x + rtsc->dx;
1370 		else
1371 			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
1372 	} else {
1373 		/* x belongs to the 2nd segment */
1374 		x = rtsc->x + rtsc->dx
1375 		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
1376 	}
1377 	return (x);
1378 }
1379 
1380 u_int64_t
1381 hfsc_rtsc_x2y(struct hfsc_runtime_sc *rtsc, u_int64_t x)
1382 {
1383 	u_int64_t y;
1384 
1385 	if (x <= rtsc->x)
1386 		y = rtsc->y;
1387 	else if (x <= rtsc->x + rtsc->dx)
1388 		/* y belongs to the 1st segment */
1389 		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
1390 	else
1391 		/* y belongs to the 2nd segment */
1392 		y = rtsc->y + rtsc->dy
1393 		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
1394 	return (y);
1395 }
1396 
1397 /*
1398  * update the runtime service curve by taking the minimum of the current
1399  * runtime service curve and the service curve starting at (x, y).
1400  */
1401 void
1402 hfsc_rtsc_min(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc *isc,
1403     u_int64_t x, u_int64_t y)
1404 {
1405 	u_int64_t y1, y2, dx, dy;
1406 
1407 	if (isc->sm1 <= isc->sm2) {
1408 		/* service curve is convex */
1409 		y1 = hfsc_rtsc_x2y(rtsc, x);
1410 		if (y1 < y)
1411 			/* the current rtsc is smaller */
1412 			return;
1413 		rtsc->x = x;
1414 		rtsc->y = y;
1415 		return;
1416 	}
1417 
1418 	/*
1419 	 * service curve is concave
1420 	 * compute the two y values of the current rtsc
1421 	 *	y1: at x
1422 	 *	y2: at (x + dx)
1423 	 */
1424 	y1 = hfsc_rtsc_x2y(rtsc, x);
1425 	if (y1 <= y) {
1426 		/* rtsc is below isc, no change to rtsc */
1427 		return;
1428 	}
1429 
1430 	y2 = hfsc_rtsc_x2y(rtsc, x + isc->dx);
1431 	if (y2 >= y + isc->dy) {
1432 		/* rtsc is above isc, replace rtsc by isc */
1433 		rtsc->x = x;
1434 		rtsc->y = y;
1435 		rtsc->dx = isc->dx;
1436 		rtsc->dy = isc->dy;
1437 		return;
1438 	}
1439 
1440 	/*
1441 	 * the two curves intersect
1442 	 * compute the offsets (dx, dy) using the reverse
1443 	 * function of seg_x2y()
1444 	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
1445 	 */
1446 	dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
1447 	/*
1448 	 * check if (x, y1) belongs to the 1st segment of rtsc.
1449 	 * if so, add the offset.
1450 	 */
1451 	if (rtsc->x + rtsc->dx > x)
1452 		dx += rtsc->x + rtsc->dx - x;
1453 	dy = seg_x2y(dx, isc->sm1);
1454 
1455 	rtsc->x = x;
1456 	rtsc->y = y;
1457 	rtsc->dx = dx;
1458 	rtsc->dy = dy;
1459 	return;
1460 }
1461 
1462 void
1463 hfsc_getclstats(struct hfsc_class_stats *sp, struct hfsc_class *cl)
1464 {
1465 	sp->class_id = cl->cl_id;
1466 	sp->class_handle = cl->cl_handle;
1467 
1468 	if (cl->cl_rsc != NULL) {
1469 		sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
1470 		sp->rsc.d = dx2d(cl->cl_rsc->dx);
1471 		sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
1472 	} else {
1473 		sp->rsc.m1 = 0;
1474 		sp->rsc.d = 0;
1475 		sp->rsc.m2 = 0;
1476 	}
1477 	if (cl->cl_fsc != NULL) {
1478 		sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
1479 		sp->fsc.d = dx2d(cl->cl_fsc->dx);
1480 		sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
1481 	} else {
1482 		sp->fsc.m1 = 0;
1483 		sp->fsc.d = 0;
1484 		sp->fsc.m2 = 0;
1485 	}
1486 	if (cl->cl_usc != NULL) {
1487 		sp->usc.m1 = sm2m(cl->cl_usc->sm1);
1488 		sp->usc.d = dx2d(cl->cl_usc->dx);
1489 		sp->usc.m2 = sm2m(cl->cl_usc->sm2);
1490 	} else {
1491 		sp->usc.m1 = 0;
1492 		sp->usc.d = 0;
1493 		sp->usc.m2 = 0;
1494 	}
1495 
1496 	sp->total = cl->cl_total;
1497 	sp->cumul = cl->cl_cumul;
1498 
1499 	sp->d = cl->cl_d;
1500 	sp->e = cl->cl_e;
1501 	sp->vt = cl->cl_vt;
1502 	sp->f = cl->cl_f;
1503 
1504 	sp->initvt = cl->cl_initvt;
1505 	sp->vtperiod = cl->cl_vtperiod;
1506 	sp->parentperiod = cl->cl_parentperiod;
1507 	sp->nactive = cl->cl_nactive;
1508 	sp->vtoff = cl->cl_vtoff;
1509 	sp->cvtmax = cl->cl_cvtmax;
1510 	sp->myf = cl->cl_myf;
1511 	sp->cfmin = cl->cl_cfmin;
1512 	sp->cvtmin = cl->cl_cvtmin;
1513 	sp->myfadj = cl->cl_myfadj;
1514 	sp->vtadj = cl->cl_vtadj;
1515 
1516 	sp->cur_time = hfsc_microuptime();
1517 	sp->machclk_freq = HFSC_FREQ;
1518 
1519 	sp->qlength = ml_len(&cl->cl_q.q);
1520 	sp->qlimit = cl->cl_q.qlimit;
1521 	sp->xmit_cnt = cl->cl_stats.xmit_cnt;
1522 	sp->drop_cnt = cl->cl_stats.drop_cnt;
1523 	sp->period = cl->cl_stats.period;
1524 
1525 	sp->qtype = 0;
1526 }
1527 
1528 /* convert a class handle to the corresponding class pointer */
1529 struct hfsc_class *
1530 hfsc_clh2cph(struct hfsc_if *hif, u_int32_t chandle)
1531 {
1532 	int i;
1533 	struct hfsc_class *cl;
1534 
1535 	if (chandle == 0)
1536 		return (NULL);
1537 	/*
1538 	 * first, try the slot corresponding to the lower bits of the handle.
1539 	 * if it does not match, do the linear table search.
1540 	 */
1541 	i = chandle % hif->hif_allocated;
1542 	if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
1543 		return (cl);
1544 	for (i = 0; i < hif->hif_allocated; i++)
1545 		if ((cl = hif->hif_class_tbl[i]) != NULL &&
1546 		    cl->cl_handle == chandle)
1547 			return (cl);
1548 	return (NULL);
1549 }
1550 #endif
1551