xref: /netbsd-src/sys/altq/altq_cdnr.c (revision 0633b543c2bc91e93b550ec10f8042775c1b1b4c)
1 /*	$NetBSD: altq_cdnr.c,v 1.23 2025/01/08 13:00:04 joe Exp $	*/
2 /*	$KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $	*/
3 
4 /*
5  * Copyright (C) 1999-2002
6  *	Sony Computer Science Laboratories Inc.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: altq_cdnr.c,v 1.23 2025/01/08 13:00:04 joe Exp $");
32 
33 #ifdef _KERNEL_OPT
34 #include "opt_altq.h"
35 #include "opt_inet.h"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/errno.h>
46 #include <sys/kernel.h>
47 #include <sys/queue.h>
48 #include <sys/kauth.h>
49 #include <sys/cprng.h>
50 
51 #include <net/if.h>
52 #include <net/if_types.h>
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
56 #ifdef INET6
57 #include <netinet/ip6.h>
58 #endif
59 
60 #include <altq/altq.h>
61 #include <altq/altq_conf.h>
62 #include <altq/altq_cdnr.h>
63 
64 #ifdef ALTQ3_COMPAT
65 /*
66  * diffserv traffic conditioning module
67  */
68 
69 int altq_cdnr_enabled = 0;
70 
71 /* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
72 #ifdef ALTQ_CDNR
73 
74 /* cdnr_list keeps all cdnr's allocated. */
75 static LIST_HEAD(, top_cdnr) tcb_list;
76 
77 static int altq_cdnr_input(struct mbuf *, int);
78 static struct top_cdnr *tcb_lookup(char *ifname);
79 static struct cdnr_block *cdnr_handle2cb(u_long);
80 static u_long cdnr_cb2handle(struct cdnr_block *);
81 static void *cdnr_cballoc(struct top_cdnr *, int,
82        struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
83 static void cdnr_cbdestroy(void *);
84 static int tca_verify_action(struct tc_action *);
85 static void tca_import_action(struct tc_action *, struct tc_action *);
86 static void tca_invalidate_action(struct tc_action *);
87 
88 static int generic_element_destroy(struct cdnr_block *);
89 static struct top_cdnr *top_create(struct ifaltq *);
90 static int top_destroy(struct top_cdnr *);
91 static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
92 static int element_destroy(struct cdnr_block *);
93 static void tb_import_profile(struct tbe *, struct tb_profile *);
94 static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
95 				  struct tc_action *, struct tc_action *);
96 static int tbm_destroy(struct tbmeter *);
97 static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
98 static struct trtcm *trtcm_create(struct top_cdnr *,
99 		  struct tb_profile *, struct tb_profile *,
100 		  struct tc_action *, struct tc_action *, struct tc_action *,
101 		  int);
102 static int trtcm_destroy(struct trtcm *);
103 static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
104 static struct tswtcm *tswtcm_create(struct top_cdnr *,
105 		  u_int32_t, u_int32_t, u_int32_t,
106 		  struct tc_action *, struct tc_action *, struct tc_action *);
107 static int tswtcm_destroy(struct tswtcm *);
108 static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
109 
110 static int cdnrcmd_if_attach(char *);
111 static int cdnrcmd_if_detach(char *);
112 static int cdnrcmd_add_element(struct cdnr_add_element *);
113 static int cdnrcmd_delete_element(struct cdnr_delete_element *);
114 static int cdnrcmd_add_filter(struct cdnr_add_filter *);
115 static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
116 static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
117 static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
118 static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
119 static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
120 static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
121 static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
122 static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
123 static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
124 static int cdnrcmd_get_stats(struct cdnr_get_stats *);
125 
126 altqdev_decl(cdnr);
127 
128 /*
129  * top level input function called from ip_input.
130  * should be called before converting header fields to host-byte-order.
131  */
132 int
133 altq_cdnr_input(struct mbuf *m, int af)
134 {
135 	struct ifnet		*ifp;
136 	struct ip		*ip;
137 	struct top_cdnr		*top;
138 	struct tc_action	*tca;
139 	struct cdnr_block	*cb;
140 	struct cdnr_pktinfo	pktinfo;
141 
142 	ifp = m_get_rcvif_NOMPSAFE(m);
143 	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
144 		/* traffic conditioner is not enabled on this interface */
145 		return 1;
146 
147 	top = ifp->if_snd.altq_cdnr;
148 
149 	ip = mtod(m, struct ip *);
150 #ifdef INET6
151 	if (af == AF_INET6) {
152 		u_int32_t flowlabel;
153 
154 		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
155 		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
156 	} else
157 #endif
158 		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
159 	pktinfo.pkt_len = m_pktlen(m);
160 
161 	tca = NULL;
162 
163 	cb = acc_classify(&top->tc_classifier, m, af);
164 	if (cb != NULL)
165 		tca = &cb->cb_action;
166 
167 	if (tca == NULL)
168 		tca = &top->tc_block.cb_action;
169 
170 	while (1) {
171 		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
172 
173 		switch (tca->tca_code) {
174 		case TCACODE_PASS:
175 			return 1;
176 		case TCACODE_DROP:
177 			m_freem(m);
178 			return 0;
179 		case TCACODE_RETURN:
180 			return 0;
181 		case TCACODE_MARK:
182 #ifdef INET6
183 			if (af == AF_INET6) {
184 				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
185 				u_int32_t flowlabel;
186 
187 				flowlabel = ntohl(ip6->ip6_flow);
188 				flowlabel = (tca->tca_dscp << 20) |
189 					(flowlabel & ~(DSCP_MASK << 20));
190 				ip6->ip6_flow = htonl(flowlabel);
191 			} else
192 #endif
193 				ip->ip_tos = tca->tca_dscp |
194 					(ip->ip_tos & DSCP_CUMASK);
195 			return 1;
196 		case TCACODE_NEXT:
197 			cb = tca->tca_next;
198 			tca = (*cb->cb_input)(cb, &pktinfo);
199 			break;
200 		case TCACODE_NONE:
201 		default:
202 			return 1;
203 		}
204 	}
205 }
206 
207 static struct top_cdnr *
208 tcb_lookup(char *ifname)
209 {
210 	struct top_cdnr *top;
211 	struct ifnet *ifp;
212 
213 	if ((ifp = ifunit(ifname)) != NULL)
214 		LIST_FOREACH(top, &tcb_list, tc_next)
215 			if (top->tc_ifq->altq_ifp == ifp)
216 				return top;
217 	return NULL;
218 }
219 
220 static struct cdnr_block *
221 cdnr_handle2cb(u_long handle)
222 {
223 	struct cdnr_block *cb;
224 
225 	cb = (struct cdnr_block *)handle;
226 	if (handle != ALIGN(cb))
227 		return NULL;
228 
229 	if (cb == NULL || cb->cb_handle != handle)
230 		return NULL;
231 	return cb;
232 }
233 
234 static u_long
235 cdnr_cb2handle(struct cdnr_block *cb)
236 {
237 	return (cb->cb_handle);
238 }
239 
240 static void *
241 cdnr_cballoc(struct top_cdnr *top, int type, struct tc_action *(*input_func)(
242     struct cdnr_block *, struct cdnr_pktinfo *))
243 {
244 	struct cdnr_block *cb;
245 	int size;
246 
247 	switch (type) {
248 	case TCETYPE_TOP:
249 		size = sizeof(struct top_cdnr);
250 		break;
251 	case TCETYPE_ELEMENT:
252 		size = sizeof(struct cdnr_block);
253 		break;
254 	case TCETYPE_TBMETER:
255 		size = sizeof(struct tbmeter);
256 		break;
257 	case TCETYPE_TRTCM:
258 		size = sizeof(struct trtcm);
259 		break;
260 	case TCETYPE_TSWTCM:
261 		size = sizeof(struct tswtcm);
262 		break;
263 	default:
264 		return NULL;
265 	}
266 
267 	cb = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO);
268 	if (cb == NULL)
269 		return NULL;
270 
271 	cb->cb_len = size;
272 	cb->cb_type = type;
273 	cb->cb_ref = 0;
274 	cb->cb_handle = (u_long)cb;
275 	if (top == NULL)
276 		cb->cb_top = (struct top_cdnr *)cb;
277 	else
278 		cb->cb_top = top;
279 
280 	if (input_func != NULL) {
281 		/*
282 		 * if this cdnr has an action function,
283 		 * make tc_action to call itself.
284 		 */
285 		cb->cb_action.tca_code = TCACODE_NEXT;
286 		cb->cb_action.tca_next = cb;
287 		cb->cb_input = input_func;
288 	} else
289 		cb->cb_action.tca_code = TCACODE_NONE;
290 
291 	/* if this isn't top, register the element to the top level cdnr */
292 	if (top != NULL)
293 		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
294 
295 	return ((void *)cb);
296 }
297 
298 static void
299 cdnr_cbdestroy(void *cblock)
300 {
301 	struct cdnr_block *cb = cblock;
302 
303 	/* delete filters belonging to this cdnr */
304 	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
305 
306 	/* remove from the top level cdnr */
307 	if (cb->cb_top != cblock)
308 		LIST_REMOVE(cb, cb_next);
309 
310 	free(cb, M_DEVBUF);
311 }
312 
313 /*
314  * conditioner common destroy routine
315  */
316 static int
317 generic_element_destroy(struct cdnr_block *cb)
318 {
319 	int error = 0;
320 
321 	switch (cb->cb_type) {
322 	case TCETYPE_TOP:
323 		error = top_destroy((struct top_cdnr *)cb);
324 		break;
325 	case TCETYPE_ELEMENT:
326 		error = element_destroy(cb);
327 		break;
328 	case TCETYPE_TBMETER:
329 		error = tbm_destroy((struct tbmeter *)cb);
330 		break;
331 	case TCETYPE_TRTCM:
332 		error = trtcm_destroy((struct trtcm *)cb);
333 		break;
334 	case TCETYPE_TSWTCM:
335 		error = tswtcm_destroy((struct tswtcm *)cb);
336 		break;
337 	default:
338 		error = EINVAL;
339 	}
340 	return error;
341 }
342 
343 static int
344 tca_verify_action(struct tc_action *utca)
345 {
346 	switch (utca->tca_code) {
347 	case TCACODE_PASS:
348 	case TCACODE_DROP:
349 	case TCACODE_MARK:
350 		/* these are ok */
351 		break;
352 
353 	case TCACODE_HANDLE:
354 		/* verify handle value */
355 		if (cdnr_handle2cb(utca->tca_handle) == NULL)
356 			return -1;
357 		break;
358 
359 	case TCACODE_NONE:
360 	case TCACODE_RETURN:
361 	case TCACODE_NEXT:
362 	default:
363 		/* should not be passed from a user */
364 		return -1;
365 	}
366 	return 0;
367 }
368 
369 static void
370 tca_import_action(struct tc_action *ktca, struct tc_action *utca)
371 {
372 	struct cdnr_block *cb;
373 
374 	*ktca = *utca;
375 	if (ktca->tca_code == TCACODE_HANDLE) {
376 		cb = cdnr_handle2cb(ktca->tca_handle);
377 		if (cb == NULL) {
378 			ktca->tca_code = TCACODE_NONE;
379 			return;
380 		}
381 		ktca->tca_code = TCACODE_NEXT;
382 		ktca->tca_next = cb;
383 		cb->cb_ref++;
384 	} else if (ktca->tca_code == TCACODE_MARK) {
385 		ktca->tca_dscp &= DSCP_MASK;
386 	}
387 	return;
388 }
389 
390 static void
391 tca_invalidate_action(struct tc_action *tca)
392 {
393 	struct cdnr_block *cb;
394 
395 	if (tca->tca_code == TCACODE_NEXT) {
396 		cb = tca->tca_next;
397 		if (cb == NULL)
398 			return;
399 		cb->cb_ref--;
400 	}
401 	tca->tca_code = TCACODE_NONE;
402 }
403 
404 /*
405  * top level traffic conditioner
406  */
407 static struct top_cdnr *
408 top_create(struct ifaltq *ifq)
409 {
410 	struct top_cdnr *top;
411 
412 	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
413 		return NULL;
414 
415 	top->tc_ifq = ifq;
416 	/* set default action for the top level conditioner */
417 	top->tc_block.cb_action.tca_code = TCACODE_PASS;
418 
419 	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
420 
421 	ifq->altq_cdnr = top;
422 
423 	return top;
424 }
425 
426 static int
427 top_destroy(struct top_cdnr *top)
428 {
429 	struct cdnr_block *cb;
430 
431 	if (ALTQ_IS_CNDTNING(top->tc_ifq))
432 		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
433 	top->tc_ifq->altq_cdnr = NULL;
434 
435 	/*
436 	 * destroy all the conditioner elements belonging to this interface
437 	 */
438 	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
439 		while (cb != NULL && cb->cb_ref > 0)
440 			cb = LIST_NEXT(cb, cb_next);
441 		if (cb != NULL)
442 			generic_element_destroy(cb);
443 	}
444 
445 	LIST_REMOVE(top, tc_next);
446 
447 	cdnr_cbdestroy(top);
448 
449 	/* if there is no active conditioner, remove the input hook */
450 	if (altq_input != NULL) {
451 		LIST_FOREACH(top, &tcb_list, tc_next)
452 			if (ALTQ_IS_CNDTNING(top->tc_ifq))
453 				break;
454 		if (top == NULL)
455 			altq_input = NULL;
456 	}
457 
458 	return 0;
459 }
460 
461 /*
462  * simple tc elements without input function (e.g., dropper and makers).
463  */
464 static struct cdnr_block *
465 element_create(struct top_cdnr *top, struct tc_action *action)
466 {
467 	struct cdnr_block *cb;
468 
469 	if (tca_verify_action(action) < 0)
470 		return NULL;
471 
472 	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
473 		return NULL;
474 
475 	tca_import_action(&cb->cb_action, action);
476 
477 	return cb;
478 }
479 
480 static int
481 element_destroy(struct cdnr_block *cb)
482 {
483 	if (cb->cb_ref > 0)
484 		return EBUSY;
485 
486 	tca_invalidate_action(&cb->cb_action);
487 
488 	cdnr_cbdestroy(cb);
489 	return 0;
490 }
491 
492 /*
493  * internal representation of token bucket parameters
494  *	rate: 	byte_per_unittime << 32
495  *		(((bits_per_sec) / 8) << 32) / machclk_freq
496  *	depth:	byte << 32
497  *
498  */
499 #define	TB_SHIFT	32
500 #define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
501 #define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
502 
503 static void
504 tb_import_profile(struct tbe *tb, struct tb_profile *profile)
505 {
506 	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
507 	tb->depth = TB_SCALE(profile->depth);
508 	if (tb->rate > 0)
509 		tb->filluptime = tb->depth / tb->rate;
510 	else
511 		tb->filluptime = 0xffffffffffffffffLL;
512 	tb->token = tb->depth;
513 	tb->last = read_machclk();
514 }
515 
516 /*
517  * simple token bucket meter
518  */
519 static struct tbmeter *
520 tbm_create(struct top_cdnr *top, struct tb_profile *profile,
521     struct tc_action *in_action, struct tc_action *out_action)
522 {
523 	struct tbmeter *tbm = NULL;
524 
525 	if (tca_verify_action(in_action) < 0
526 	    || tca_verify_action(out_action) < 0)
527 		return NULL;
528 
529 	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
530 				tbm_input)) == NULL)
531 		return NULL;
532 
533 	tb_import_profile(&tbm->tb, profile);
534 
535 	tca_import_action(&tbm->in_action, in_action);
536 	tca_import_action(&tbm->out_action, out_action);
537 
538 	return tbm;
539 }
540 
541 static int
542 tbm_destroy(struct tbmeter *tbm)
543 {
544 	if (tbm->cdnrblk.cb_ref > 0)
545 		return EBUSY;
546 
547 	tca_invalidate_action(&tbm->in_action);
548 	tca_invalidate_action(&tbm->out_action);
549 
550 	cdnr_cbdestroy(tbm);
551 	return 0;
552 }
553 
554 static struct tc_action *
555 tbm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo)
556 {
557 	struct tbmeter *tbm = (struct tbmeter *)cb;
558 	u_int64_t	len;
559 	u_int64_t	interval, now;
560 
561 	len = TB_SCALE(pktinfo->pkt_len);
562 
563 	if (tbm->tb.token < len) {
564 		now = read_machclk();
565 		interval = now - tbm->tb.last;
566 		if (interval >= tbm->tb.filluptime)
567 			tbm->tb.token = tbm->tb.depth;
568 		else {
569 			tbm->tb.token += interval * tbm->tb.rate;
570 			if (tbm->tb.token > tbm->tb.depth)
571 				tbm->tb.token = tbm->tb.depth;
572 		}
573 		tbm->tb.last = now;
574 	}
575 
576 	if (tbm->tb.token < len) {
577 		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
578 		return (&tbm->out_action);
579 	}
580 
581 	tbm->tb.token -= len;
582 	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
583 	return (&tbm->in_action);
584 }
585 
586 /*
587  * two rate three color marker
588  * as described in draft-heinanen-diffserv-trtcm-01.txt
589  */
590 static struct trtcm *
591 trtcm_create(struct top_cdnr *top, struct tb_profile *cmtd_profile,
592     struct tb_profile *peak_profile, struct tc_action *green_action,
593     struct tc_action *yellow_action, struct tc_action *red_action,
594     int coloraware)
595 {
596 	struct trtcm *tcm = NULL;
597 
598 	if (tca_verify_action(green_action) < 0
599 	    || tca_verify_action(yellow_action) < 0
600 	    || tca_verify_action(red_action) < 0)
601 		return NULL;
602 
603 	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
604 				trtcm_input)) == NULL)
605 		return NULL;
606 
607 	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
608 	tb_import_profile(&tcm->peak_tb, peak_profile);
609 
610 	tca_import_action(&tcm->green_action, green_action);
611 	tca_import_action(&tcm->yellow_action, yellow_action);
612 	tca_import_action(&tcm->red_action, red_action);
613 
614 	/* set dscps to use */
615 	if (tcm->green_action.tca_code == TCACODE_MARK)
616 		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
617 	else
618 		tcm->green_dscp = DSCP_AF11;
619 	if (tcm->yellow_action.tca_code == TCACODE_MARK)
620 		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
621 	else
622 		tcm->yellow_dscp = DSCP_AF12;
623 	if (tcm->red_action.tca_code == TCACODE_MARK)
624 		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
625 	else
626 		tcm->red_dscp = DSCP_AF13;
627 
628 	tcm->coloraware = coloraware;
629 
630 	return tcm;
631 }
632 
633 static int
634 trtcm_destroy(struct trtcm *tcm)
635 {
636 	if (tcm->cdnrblk.cb_ref > 0)
637 		return EBUSY;
638 
639 	tca_invalidate_action(&tcm->green_action);
640 	tca_invalidate_action(&tcm->yellow_action);
641 	tca_invalidate_action(&tcm->red_action);
642 
643 	cdnr_cbdestroy(tcm);
644 	return 0;
645 }
646 
647 static struct tc_action *
648 trtcm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo)
649 {
650 	struct trtcm *tcm = (struct trtcm *)cb;
651 	u_int64_t	len;
652 	u_int64_t	interval, now;
653 	u_int8_t	color;
654 
655 	len = TB_SCALE(pktinfo->pkt_len);
656 	if (tcm->coloraware) {
657 		color = pktinfo->pkt_dscp;
658 		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
659 			color = tcm->green_dscp;
660 	} else {
661 		/* if color-blind, precolor it as green */
662 		color = tcm->green_dscp;
663 	}
664 
665 	now = read_machclk();
666 	if (tcm->cmtd_tb.token < len) {
667 		interval = now - tcm->cmtd_tb.last;
668 		if (interval >= tcm->cmtd_tb.filluptime)
669 			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
670 		else {
671 			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
672 			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
673 				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
674 		}
675 		tcm->cmtd_tb.last = now;
676 	}
677 	if (tcm->peak_tb.token < len) {
678 		interval = now - tcm->peak_tb.last;
679 		if (interval >= tcm->peak_tb.filluptime)
680 			tcm->peak_tb.token = tcm->peak_tb.depth;
681 		else {
682 			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
683 			if (tcm->peak_tb.token > tcm->peak_tb.depth)
684 				tcm->peak_tb.token = tcm->peak_tb.depth;
685 		}
686 		tcm->peak_tb.last = now;
687 	}
688 
689 	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
690 		pktinfo->pkt_dscp = tcm->red_dscp;
691 		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
692 		return (&tcm->red_action);
693 	}
694 
695 	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
696 		pktinfo->pkt_dscp = tcm->yellow_dscp;
697 		tcm->peak_tb.token -= len;
698 		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
699 		return (&tcm->yellow_action);
700 	}
701 
702 	pktinfo->pkt_dscp = tcm->green_dscp;
703 	tcm->cmtd_tb.token -= len;
704 	tcm->peak_tb.token -= len;
705 	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
706 	return (&tcm->green_action);
707 }
708 
709 /*
710  * time sliding window three color marker
711  * as described in draft-fang-diffserv-tc-tswtcm-00.txt
712  */
713 static struct tswtcm *
714 tswtcm_create(struct top_cdnr *top, u_int32_t cmtd_rate, u_int32_t peak_rate,
715     u_int32_t avg_interval, struct tc_action *green_action,
716     struct tc_action *yellow_action, struct tc_action *red_action)
717 {
718 	struct tswtcm *tsw;
719 
720 	if (tca_verify_action(green_action) < 0
721 	    || tca_verify_action(yellow_action) < 0
722 	    || tca_verify_action(red_action) < 0)
723 		return NULL;
724 
725 	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
726 				tswtcm_input)) == NULL)
727 		return NULL;
728 
729 	tca_import_action(&tsw->green_action, green_action);
730 	tca_import_action(&tsw->yellow_action, yellow_action);
731 	tca_import_action(&tsw->red_action, red_action);
732 
733 	/* set dscps to use */
734 	if (tsw->green_action.tca_code == TCACODE_MARK)
735 		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
736 	else
737 		tsw->green_dscp = DSCP_AF11;
738 	if (tsw->yellow_action.tca_code == TCACODE_MARK)
739 		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
740 	else
741 		tsw->yellow_dscp = DSCP_AF12;
742 	if (tsw->red_action.tca_code == TCACODE_MARK)
743 		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
744 	else
745 		tsw->red_dscp = DSCP_AF13;
746 
747 	/* convert rates from bits/sec to bytes/sec */
748 	tsw->cmtd_rate = cmtd_rate / 8;
749 	tsw->peak_rate = peak_rate / 8;
750 	tsw->avg_rate = 0;
751 
752 	/* timewin is converted from msec to machine clock unit */
753 	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
754 
755 	return tsw;
756 }
757 
758 static int
759 tswtcm_destroy(struct tswtcm *tsw)
760 {
761 	if (tsw->cdnrblk.cb_ref > 0)
762 		return EBUSY;
763 
764 	tca_invalidate_action(&tsw->green_action);
765 	tca_invalidate_action(&tsw->yellow_action);
766 	tca_invalidate_action(&tsw->red_action);
767 
768 	cdnr_cbdestroy(tsw);
769 	return 0;
770 }
771 
772 static struct tc_action *
773 tswtcm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo)
774 {
775 	struct tswtcm	*tsw = (struct tswtcm *)cb;
776 	int		len;
777 	u_int32_t	avg_rate;
778 	u_int64_t	interval, now, tmp;
779 
780 	/*
781 	 * rate estimator
782 	 */
783 	len = pktinfo->pkt_len;
784 	now = read_machclk();
785 
786 	interval = now - tsw->t_front;
787 	/*
788 	 * calculate average rate:
789 	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
790 	 * pkt_len needs to be multiplied by machclk_freq in order to
791 	 * get (bytes/sec).
792 	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
793 	 * less than 32 bits, the following 64-bit operation has enough
794 	 * precision.
795 	 */
796 	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
797 	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
798 	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
799 	tsw->t_front = now;
800 
801 	/*
802 	 * marker
803 	 */
804 	if (avg_rate > tsw->cmtd_rate) {
805 		u_int32_t randval = cprng_fast32() % avg_rate;
806 
807 		if (avg_rate > tsw->peak_rate) {
808 			if (randval < avg_rate - tsw->peak_rate) {
809 				/* mark red */
810 				pktinfo->pkt_dscp = tsw->red_dscp;
811 				PKTCNTR_ADD(&tsw->red_cnt, len);
812 				return (&tsw->red_action);
813 			} else if (randval < avg_rate - tsw->cmtd_rate)
814 				goto mark_yellow;
815 		} else {
816 			/* peak_rate >= avg_rate > cmtd_rate */
817 			if (randval < avg_rate - tsw->cmtd_rate) {
818 			mark_yellow:
819 				pktinfo->pkt_dscp = tsw->yellow_dscp;
820 				PKTCNTR_ADD(&tsw->yellow_cnt, len);
821 				return (&tsw->yellow_action);
822 			}
823 		}
824 	}
825 
826 	/* mark green */
827 	pktinfo->pkt_dscp = tsw->green_dscp;
828 	PKTCNTR_ADD(&tsw->green_cnt, len);
829 	return (&tsw->green_action);
830 }
831 
832 /*
833  * ioctl requests
834  */
835 static int
836 cdnrcmd_if_attach(char *ifname)
837 {
838 	struct ifnet *ifp;
839 	struct top_cdnr *top;
840 
841 	if ((ifp = ifunit(ifname)) == NULL)
842 		return EBADF;
843 
844 	if (ifp->if_snd.altq_cdnr != NULL)
845 		return EBUSY;
846 
847 	if ((top = top_create(&ifp->if_snd)) == NULL)
848 		return ENOMEM;
849 	return 0;
850 }
851 
852 static int
853 cdnrcmd_if_detach(char *ifname)
854 {
855 	struct top_cdnr *top;
856 
857 	if ((top = tcb_lookup(ifname)) == NULL)
858 		return EBADF;
859 
860 	return top_destroy(top);
861 }
862 
863 static int
864 cdnrcmd_add_element(struct cdnr_add_element *ap)
865 {
866 	struct top_cdnr *top;
867 	struct cdnr_block *cb;
868 
869 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
870 		return EBADF;
871 
872 	cb = element_create(top, &ap->action);
873 	if (cb == NULL)
874 		return EINVAL;
875 	/* return a class handle to the user */
876 	ap->cdnr_handle = cdnr_cb2handle(cb);
877 	return 0;
878 }
879 
880 static int
881 cdnrcmd_delete_element(struct cdnr_delete_element *ap)
882 {
883 	struct top_cdnr *top;
884 	struct cdnr_block *cb;
885 
886 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
887 		return EBADF;
888 
889 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
890 		return EINVAL;
891 
892 	if (cb->cb_type != TCETYPE_ELEMENT)
893 		return generic_element_destroy(cb);
894 
895 	return element_destroy(cb);
896 }
897 
898 static int
899 cdnrcmd_add_filter(struct cdnr_add_filter *ap)
900 {
901 	struct top_cdnr *top;
902 	struct cdnr_block *cb;
903 
904 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
905 		return EBADF;
906 
907 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
908 		return EINVAL;
909 
910 	return acc_add_filter(&top->tc_classifier, &ap->filter,
911 			      cb, &ap->filter_handle);
912 }
913 
914 static int
915 cdnrcmd_delete_filter(struct cdnr_delete_filter *ap)
916 {
917 	struct top_cdnr *top;
918 
919 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
920 		return EBADF;
921 
922 	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
923 }
924 
925 static int
926 cdnrcmd_add_tbm(struct cdnr_add_tbmeter *ap)
927 {
928 	struct top_cdnr *top;
929 	struct tbmeter *tbm;
930 
931 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
932 		return EBADF;
933 
934 	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
935 	if (tbm == NULL)
936 		return EINVAL;
937 	/* return a class handle to the user */
938 	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
939 	return 0;
940 }
941 
942 static int
943 cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *ap)
944 {
945 	struct tbmeter *tbm;
946 
947 	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
948 		return EINVAL;
949 
950 	tb_import_profile(&tbm->tb, &ap->profile);
951 
952 	return 0;
953 }
954 
955 static int
956 cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *ap)
957 {
958 	struct tbmeter *tbm;
959 
960 	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
961 		return EINVAL;
962 
963 	ap->in_cnt = tbm->in_cnt;
964 	ap->out_cnt = tbm->out_cnt;
965 
966 	return 0;
967 }
968 
969 static int
970 cdnrcmd_add_trtcm(struct cdnr_add_trtcm *ap)
971 {
972 	struct top_cdnr *top;
973 	struct trtcm *tcm;
974 
975 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
976 		return EBADF;
977 
978 	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
979 			   &ap->green_action, &ap->yellow_action,
980 			   &ap->red_action, ap->coloraware);
981 	if (tcm == NULL)
982 		return EINVAL;
983 
984 	/* return a class handle to the user */
985 	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
986 	return 0;
987 }
988 
989 static int
990 cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *ap)
991 {
992 	struct trtcm *tcm;
993 
994 	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
995 		return EINVAL;
996 
997 	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
998 	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
999 
1000 	return 0;
1001 }
1002 
1003 static int
1004 cdnrcmd_tcm_stats(struct cdnr_tcm_stats *ap)
1005 {
1006 	struct cdnr_block *cb;
1007 
1008 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1009 		return EINVAL;
1010 
1011 	if (cb->cb_type == TCETYPE_TRTCM) {
1012 		struct trtcm *tcm = (struct trtcm *)cb;
1013 
1014 		ap->green_cnt = tcm->green_cnt;
1015 		ap->yellow_cnt = tcm->yellow_cnt;
1016 		ap->red_cnt = tcm->red_cnt;
1017 	} else if (cb->cb_type == TCETYPE_TSWTCM) {
1018 		struct tswtcm *tsw = (struct tswtcm *)cb;
1019 
1020 		ap->green_cnt = tsw->green_cnt;
1021 		ap->yellow_cnt = tsw->yellow_cnt;
1022 		ap->red_cnt = tsw->red_cnt;
1023 	} else
1024 		return EINVAL;
1025 
1026 	return 0;
1027 }
1028 
1029 static int
1030 cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *ap)
1031 {
1032 	struct top_cdnr *top;
1033 	struct tswtcm *tsw;
1034 
1035 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1036 		return EBADF;
1037 
1038 	if (ap->cmtd_rate > ap->peak_rate)
1039 		return EINVAL;
1040 
1041 	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
1042 			    ap->avg_interval, &ap->green_action,
1043 			    &ap->yellow_action, &ap->red_action);
1044 	if (tsw == NULL)
1045 		return EINVAL;
1046 
1047 	/* return a class handle to the user */
1048 	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
1049 	return 0;
1050 }
1051 
1052 static int
1053 cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *ap)
1054 {
1055 	struct tswtcm *tsw;
1056 
1057 	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1058 		return EINVAL;
1059 
1060 	if (ap->cmtd_rate > ap->peak_rate)
1061 		return EINVAL;
1062 
1063 	/* convert rates from bits/sec to bytes/sec */
1064 	tsw->cmtd_rate = ap->cmtd_rate / 8;
1065 	tsw->peak_rate = ap->peak_rate / 8;
1066 	tsw->avg_rate = 0;
1067 
1068 	/* timewin is converted from msec to machine clock unit */
1069 	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
1070 
1071 	return 0;
1072 }
1073 
1074 static int
1075 cdnrcmd_get_stats(struct cdnr_get_stats *ap)
1076 {
1077 	struct top_cdnr *top;
1078 	struct cdnr_block *cb;
1079 	struct tbmeter *tbm;
1080 	struct trtcm *tcm;
1081 	struct tswtcm *tsw;
1082 	struct tce_stats tce, *usp;
1083 	int error, n, nskip, nelements;
1084 
1085 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1086 		return EBADF;
1087 
1088 	/* copy action stats */
1089 	(void)memcpy(ap->cnts, top->tc_cnts, sizeof(ap->cnts));
1090 
1091 	/* stats for each element */
1092 	nelements = ap->nelements;
1093 	usp = ap->tce_stats;
1094 	if (nelements <= 0 || usp == NULL)
1095 		return 0;
1096 
1097 	nskip = ap->nskip;
1098 	n = 0;
1099 	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
1100 		if (nskip > 0) {
1101 			nskip--;
1102 			continue;
1103 		}
1104 
1105 		(void)memset(&tce, 0, sizeof(tce));
1106 		tce.tce_handle = cb->cb_handle;
1107 		tce.tce_type = cb->cb_type;
1108 		switch (cb->cb_type) {
1109 		case TCETYPE_TBMETER:
1110 			tbm = (struct tbmeter *)cb;
1111 			tce.tce_cnts[0] = tbm->in_cnt;
1112 			tce.tce_cnts[1] = tbm->out_cnt;
1113 			break;
1114 		case TCETYPE_TRTCM:
1115 			tcm = (struct trtcm *)cb;
1116 			tce.tce_cnts[0] = tcm->green_cnt;
1117 			tce.tce_cnts[1] = tcm->yellow_cnt;
1118 			tce.tce_cnts[2] = tcm->red_cnt;
1119 			break;
1120 		case TCETYPE_TSWTCM:
1121 			tsw = (struct tswtcm *)cb;
1122 			tce.tce_cnts[0] = tsw->green_cnt;
1123 			tce.tce_cnts[1] = tsw->yellow_cnt;
1124 			tce.tce_cnts[2] = tsw->red_cnt;
1125 			break;
1126 		default:
1127 			continue;
1128 		}
1129 
1130 		if ((error = copyout((void *)&tce, (void *)usp++,
1131 				     sizeof(tce))) != 0)
1132 			return error;
1133 
1134 		if (++n == nelements)
1135 			break;
1136 	}
1137 	ap->nelements = n;
1138 
1139 	return 0;
1140 }
1141 
1142 /*
1143  * conditioner device interface
1144  */
1145 int
1146 cdnropen(dev_t dev, int flag, int fmt,
1147     struct lwp *l)
1148 {
1149 	if (machclk_freq == 0)
1150 		init_machclk();
1151 
1152 	if (machclk_freq == 0) {
1153 		printf("cdnr: no CPU clock available!\n");
1154 		return ENXIO;
1155 	}
1156 
1157 	/* everything will be done when the queueing scheme is attached. */
1158 	return 0;
1159 }
1160 
1161 int
1162 cdnrclose(dev_t dev, int flag, int fmt,
1163     struct lwp *l)
1164 {
1165 	struct top_cdnr *top;
1166 	int err, error = 0;
1167 
1168 	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
1169 		/* destroy all */
1170 		err = top_destroy(top);
1171 		if (err != 0 && error == 0)
1172 			error = err;
1173 	}
1174 	altq_input = NULL;
1175 
1176 	return error;
1177 }
1178 
1179 int
1180 cdnrioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
1181     struct lwp *l)
1182 {
1183 	struct top_cdnr *top;
1184 	struct cdnr_interface *ifacep;
1185 	int	s, error = 0;
1186 
1187 	/* check super-user privilege */
1188 	switch (cmd) {
1189 	case CDNR_GETSTATS:
1190 		break;
1191 	default:
1192 		if ((error = kauth_authorize_network(l->l_cred,
1193 		    KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_CDNR, NULL,
1194 		    NULL, NULL)) != 0)
1195 			return (error);
1196 		break;
1197 	}
1198 
1199 	s = splnet();
1200 	switch (cmd) {
1201 
1202 	case CDNR_IF_ATTACH:
1203 		ifacep = (struct cdnr_interface *)addr;
1204 		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
1205 		break;
1206 
1207 	case CDNR_IF_DETACH:
1208 		ifacep = (struct cdnr_interface *)addr;
1209 		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
1210 		break;
1211 
1212 	case CDNR_ENABLE:
1213 	case CDNR_DISABLE:
1214 		ifacep = (struct cdnr_interface *)addr;
1215 		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
1216 			error = EBADF;
1217 			break;
1218 		}
1219 
1220 		switch (cmd) {
1221 
1222 		case CDNR_ENABLE:
1223 			ALTQ_SET_CNDTNING(top->tc_ifq);
1224 			if (altq_input == NULL)
1225 				altq_input = altq_cdnr_input;
1226 			break;
1227 
1228 		case CDNR_DISABLE:
1229 			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
1230 			LIST_FOREACH(top, &tcb_list, tc_next)
1231 				if (ALTQ_IS_CNDTNING(top->tc_ifq))
1232 					break;
1233 			if (top == NULL)
1234 				altq_input = NULL;
1235 			break;
1236 		}
1237 		break;
1238 
1239 	case CDNR_ADD_ELEM:
1240 		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
1241 		break;
1242 
1243 	case CDNR_DEL_ELEM:
1244 		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
1245 		break;
1246 
1247 	case CDNR_ADD_TBM:
1248 		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
1249 		break;
1250 
1251 	case CDNR_MOD_TBM:
1252 		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
1253 		break;
1254 
1255 	case CDNR_TBM_STATS:
1256 		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
1257 		break;
1258 
1259 	case CDNR_ADD_TCM:
1260 		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
1261 		break;
1262 
1263 	case CDNR_MOD_TCM:
1264 		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
1265 		break;
1266 
1267 	case CDNR_TCM_STATS:
1268 		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
1269 		break;
1270 
1271 	case CDNR_ADD_FILTER:
1272 		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
1273 		break;
1274 
1275 	case CDNR_DEL_FILTER:
1276 		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
1277 		break;
1278 
1279 	case CDNR_GETSTATS:
1280 		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
1281 		break;
1282 
1283 	case CDNR_ADD_TSW:
1284 		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
1285 		break;
1286 
1287 	case CDNR_MOD_TSW:
1288 		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
1289 		break;
1290 
1291 	default:
1292 		error = EINVAL;
1293 		break;
1294 	}
1295 	splx(s);
1296 
1297 	return error;
1298 }
1299 
1300 #ifdef KLD_MODULE
1301 
1302 static struct altqsw cdnr_sw =
1303 	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
1304 
1305 ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
1306 
1307 #endif /* KLD_MODULE */
1308 
1309 #endif /* ALTQ3_COMPAT */
1310 #endif /* ALTQ_CDNR */
1311