xref: /netbsd-src/dist/pf/sbin/pfctl/pfctl_altq.c (revision c8da0e5fefd3800856b306200a18b2315c7fbb9f)
1 /*	$NetBSD: pfctl_altq.c,v 1.8 2008/06/18 09:06:26 yamt Exp $	*/
2 /*	$OpenBSD: pfctl_altq.c,v 1.92 2007/05/27 05:15:17 claudio Exp $	*/
3 
4 /*
5  * Copyright (c) 2002
6  *	Sony Computer Science Laboratories Inc.
7  * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
8  *
9  * Permission to use, copy, modify, and distribute this software for any
10  * purpose with or without fee is hereby granted, provided that the above
11  * copyright notice and this permission notice appear in all copies.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20  */
21 
22 #include <sys/types.h>
23 #include <sys/ioctl.h>
24 #include <sys/socket.h>
25 #ifdef __NetBSD__
26 #include <sys/param.h>
27 #include <sys/mbuf.h>
28 #endif
29 
30 #include <net/if.h>
31 #include <netinet/in.h>
32 #include <net/pfvar.h>
33 
34 #include <err.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 
43 #include <altq/altq.h>
44 #include <altq/altq_cbq.h>
45 #include <altq/altq_priq.h>
46 #include <altq/altq_hfsc.h>
47 
48 #include "pfctl_parser.h"
49 #include "pfctl.h"
50 
51 #define is_sc_null(sc)	(((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
52 
53 TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
54 LIST_HEAD(gen_sc, segment) rtsc, lssc;
55 
56 struct pf_altq	*qname_to_pfaltq(const char *, const char *);
57 u_int32_t	 qname_to_qid(const char *);
58 
59 static int	eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
60 static int	cbq_compute_idletime(struct pfctl *, struct pf_altq *);
61 static int	check_commit_cbq(int, int, struct pf_altq *);
62 static int	print_cbq_opts(const struct pf_altq *);
63 
64 static int	eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
65 static int	check_commit_priq(int, int, struct pf_altq *);
66 static int	print_priq_opts(const struct pf_altq *);
67 
68 static int	eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
69 static int	check_commit_hfsc(int, int, struct pf_altq *);
70 static int	print_hfsc_opts(const struct pf_altq *,
71 		    const struct node_queue_opt *);
72 
73 static void		 gsc_add_sc(struct gen_sc *, struct service_curve *);
74 static int		 is_gsc_under_sc(struct gen_sc *,
75 			     struct service_curve *);
76 static void		 gsc_destroy(struct gen_sc *);
77 static struct segment	*gsc_getentry(struct gen_sc *, double);
78 static int		 gsc_add_seg(struct gen_sc *, double, double, double,
79 			     double);
80 static double		 sc_x2y(struct service_curve *, double);
81 
82 u_int32_t	 getifspeed(char *);
83 u_long		 getifmtu(char *);
84 int		 eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
85 		     u_int32_t);
86 u_int32_t	 eval_bwspec(struct node_queue_bw *, u_int32_t);
87 void		 print_hfsc_sc(const char *, u_int, u_int, u_int,
88 		     const struct node_hfsc_sc *);
89 
90 void
91 pfaltq_store(struct pf_altq *a)
92 {
93 	struct pf_altq	*altq;
94 
95 	if ((altq = malloc(sizeof(*altq))) == NULL)
96 		err(1, "malloc");
97 	memcpy(altq, a, sizeof(struct pf_altq));
98 	TAILQ_INSERT_TAIL(&altqs, altq, entries);
99 }
100 
101 struct pf_altq *
102 pfaltq_lookup(const char *ifname)
103 {
104 	struct pf_altq	*altq;
105 
106 	TAILQ_FOREACH(altq, &altqs, entries) {
107 		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
108 		    altq->qname[0] == 0)
109 			return (altq);
110 	}
111 	return (NULL);
112 }
113 
114 struct pf_altq *
115 qname_to_pfaltq(const char *qname, const char *ifname)
116 {
117 	struct pf_altq	*altq;
118 
119 	TAILQ_FOREACH(altq, &altqs, entries) {
120 		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
121 		    strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
122 			return (altq);
123 	}
124 	return (NULL);
125 }
126 
127 u_int32_t
128 qname_to_qid(const char *qname)
129 {
130 	struct pf_altq	*altq;
131 
132 	/*
133 	 * We guarantee that same named queues on different interfaces
134 	 * have the same qid, so we do NOT need to limit matching on
135 	 * one interface!
136 	 */
137 
138 	TAILQ_FOREACH(altq, &altqs, entries) {
139 		if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
140 			return (altq->qid);
141 	}
142 	return (0);
143 }
144 
145 void
146 print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
147 	struct node_queue_opt *qopts)
148 {
149 	if (a->qname[0] != 0) {
150 		print_queue(a, level, bw, 1, qopts);
151 		return;
152 	}
153 
154 	printf("altq on %s ", a->ifname);
155 
156 	switch (a->scheduler) {
157 	case ALTQT_CBQ:
158 		if (!print_cbq_opts(a))
159 			printf("cbq ");
160 		break;
161 	case ALTQT_PRIQ:
162 		if (!print_priq_opts(a))
163 			printf("priq ");
164 		break;
165 	case ALTQT_HFSC:
166 		if (!print_hfsc_opts(a, qopts))
167 			printf("hfsc ");
168 		break;
169 	}
170 
171 	if (bw != NULL && bw->bw_percent > 0) {
172 		if (bw->bw_percent < 100)
173 			printf("bandwidth %u%% ", bw->bw_percent);
174 	} else
175 		printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
176 
177 	if (a->qlimit != DEFAULT_QLIMIT)
178 		printf("qlimit %u ", a->qlimit);
179 	printf("tbrsize %u ", a->tbrsize);
180 }
181 
182 void
183 print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
184     int print_interface, struct node_queue_opt *qopts)
185 {
186 	unsigned	i;
187 
188 	printf("queue ");
189 	for (i = 0; i < level; ++i)
190 		printf(" ");
191 	printf("%s ", a->qname);
192 	if (print_interface)
193 		printf("on %s ", a->ifname);
194 	if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
195 		if (bw != NULL && bw->bw_percent > 0) {
196 			if (bw->bw_percent < 100)
197 				printf("bandwidth %u%% ", bw->bw_percent);
198 		} else
199 			printf("bandwidth %s ", rate2str((double)a->bandwidth));
200 	}
201 	if (a->priority != DEFAULT_PRIORITY)
202 		printf("priority %u ", a->priority);
203 	if (a->qlimit != DEFAULT_QLIMIT)
204 		printf("qlimit %u ", a->qlimit);
205 	switch (a->scheduler) {
206 	case ALTQT_CBQ:
207 		print_cbq_opts(a);
208 		break;
209 	case ALTQT_PRIQ:
210 		print_priq_opts(a);
211 		break;
212 	case ALTQT_HFSC:
213 		print_hfsc_opts(a, qopts);
214 		break;
215 	}
216 }
217 
218 /*
219  * eval_pfaltq computes the discipline parameters.
220  */
221 int
222 eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
223     struct node_queue_opt *opts)
224 {
225 	u_int	rate, size, errors = 0;
226 
227 	if (bw->bw_absolute > 0)
228 		pa->ifbandwidth = bw->bw_absolute;
229 	else
230 		if ((rate = getifspeed(pa->ifname)) == 0) {
231 			fprintf(stderr, "interface %s does not know its bandwidth, "
232 			    "please specify an absolute bandwidth\n",
233 			    pa->ifname);
234 			errors++;
235 		} else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
236 			pa->ifbandwidth = rate;
237 
238 	errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
239 
240 	/* if tbrsize is not specified, use heuristics */
241 	if (pa->tbrsize == 0) {
242 		rate = pa->ifbandwidth;
243 		if (rate <= 1 * 1000 * 1000)
244 			size = 1;
245 		else if (rate <= 10 * 1000 * 1000)
246 			size = 4;
247 		else if (rate <= 200 * 1000 * 1000)
248 			size = 8;
249 		else
250 			size = 24;
251 		size = size * getifmtu(pa->ifname);
252 		if (size > 0xffff)
253 			size = 0xffff;
254 		pa->tbrsize = size;
255 	}
256 	return (errors);
257 }
258 
259 /*
260  * check_commit_altq does consistency check for each interface
261  */
262 int
263 check_commit_altq(int dev, int opts)
264 {
265 	struct pf_altq	*altq;
266 	int		 error = 0;
267 
268 	/* call the discipline check for each interface. */
269 	TAILQ_FOREACH(altq, &altqs, entries) {
270 		if (altq->qname[0] == 0) {
271 			switch (altq->scheduler) {
272 			case ALTQT_CBQ:
273 				error = check_commit_cbq(dev, opts, altq);
274 				break;
275 			case ALTQT_PRIQ:
276 				error = check_commit_priq(dev, opts, altq);
277 				break;
278 			case ALTQT_HFSC:
279 				error = check_commit_hfsc(dev, opts, altq);
280 				break;
281 			default:
282 				break;
283 			}
284 		}
285 	}
286 	return (error);
287 }
288 
289 /*
290  * eval_pfqueue computes the queue parameters.
291  */
292 int
293 eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
294     struct node_queue_opt *opts)
295 {
296 	/* should be merged with expand_queue */
297 	struct pf_altq	*if_pa, *parent, *altq;
298 	u_int32_t	 bwsum;
299 	int		 error = 0;
300 
301 	/* find the corresponding interface and copy fields used by queues */
302 	if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
303 		fprintf(stderr, "altq not defined on %s\n", pa->ifname);
304 		return (1);
305 	}
306 	pa->scheduler = if_pa->scheduler;
307 	pa->ifbandwidth = if_pa->ifbandwidth;
308 
309 	if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
310 		fprintf(stderr, "queue %s already exists on interface %s\n",
311 		    pa->qname, pa->ifname);
312 		return (1);
313 	}
314 	pa->qid = qname_to_qid(pa->qname);
315 
316 	parent = NULL;
317 	if (pa->parent[0] != 0) {
318 		parent = qname_to_pfaltq(pa->parent, pa->ifname);
319 		if (parent == NULL) {
320 			fprintf(stderr, "parent %s not found for %s\n",
321 			    pa->parent, pa->qname);
322 			return (1);
323 		}
324 		pa->parent_qid = parent->qid;
325 	}
326 	if (pa->qlimit == 0)
327 		pa->qlimit = DEFAULT_QLIMIT;
328 
329 	if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
330 		pa->bandwidth = eval_bwspec(bw,
331 		    parent == NULL ? 0 : parent->bandwidth);
332 
333 		if (pa->bandwidth > pa->ifbandwidth) {
334 			fprintf(stderr, "bandwidth for %s higher than "
335 			    "interface\n", pa->qname);
336 			return (1);
337 		}
338 		/* check the sum of the child bandwidth is under parent's */
339 		if (parent != NULL) {
340 			if (pa->bandwidth > parent->bandwidth) {
341 				warnx("bandwidth for %s higher than parent",
342 				    pa->qname);
343 				return (1);
344 			}
345 			bwsum = 0;
346 			TAILQ_FOREACH(altq, &altqs, entries) {
347 				if (strncmp(altq->ifname, pa->ifname,
348 				    IFNAMSIZ) == 0 &&
349 				    altq->qname[0] != 0 &&
350 				    strncmp(altq->parent, pa->parent,
351 				    PF_QNAME_SIZE) == 0)
352 					bwsum += altq->bandwidth;
353 			}
354 			bwsum += pa->bandwidth;
355 			if (bwsum > parent->bandwidth) {
356 				warnx("the sum of the child bandwidth higher"
357 				    " than parent \"%s\"", parent->qname);
358 			}
359 		}
360 	}
361 
362 	if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
363 		return (1);
364 
365 	switch (pa->scheduler) {
366 	case ALTQT_CBQ:
367 		error = eval_pfqueue_cbq(pf, pa);
368 		break;
369 	case ALTQT_PRIQ:
370 		error = eval_pfqueue_priq(pf, pa);
371 		break;
372 	case ALTQT_HFSC:
373 		error = eval_pfqueue_hfsc(pf, pa);
374 		break;
375 	default:
376 		break;
377 	}
378 	return (error);
379 }
380 
381 /*
382  * CBQ support functions
383  */
384 #define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
385 #define	RM_NS_PER_SEC	(1000000000)
386 
387 static int
388 eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
389 {
390 	struct cbq_opts	*opts;
391 	u_int		 ifmtu;
392 
393 	if (pa->priority >= CBQ_MAXPRI) {
394 		warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
395 		return (-1);
396 	}
397 
398 	ifmtu = getifmtu(pa->ifname);
399 	opts = &pa->pq_u.cbq_opts;
400 
401 	if (opts->pktsize == 0) {	/* use default */
402 		opts->pktsize = ifmtu;
403 		if (opts->pktsize > MCLBYTES)	/* do what TCP does */
404 			opts->pktsize &= ~MCLBYTES;
405 	} else if (opts->pktsize > ifmtu)
406 		opts->pktsize = ifmtu;
407 	if (opts->maxpktsize == 0)	/* use default */
408 		opts->maxpktsize = ifmtu;
409 	else if (opts->maxpktsize > ifmtu)
410 		opts->pktsize = ifmtu;
411 
412 	if (opts->pktsize > opts->maxpktsize)
413 		opts->pktsize = opts->maxpktsize;
414 
415 	if (pa->parent[0] == 0)
416 		opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
417 
418 	cbq_compute_idletime(pf, pa);
419 	return (0);
420 }
421 
422 /*
423  * compute ns_per_byte, maxidle, minidle, and offtime
424  */
425 static int
426 cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
427 {
428 	struct cbq_opts	*opts;
429 	double		 maxidle_s, maxidle, minidle;
430 	double		 offtime, nsPerByte, ifnsPerByte, ptime, cptime;
431 	double		 z, g, f, gton, gtom;
432 	u_int		 minburst, maxburst;
433 
434 	opts = &pa->pq_u.cbq_opts;
435 	ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
436 	minburst = opts->minburst;
437 	maxburst = opts->maxburst;
438 
439 	if (pa->bandwidth == 0)
440 		f = 0.0001;	/* small enough? */
441 	else
442 		f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
443 
444 	nsPerByte = ifnsPerByte / f;
445 	ptime = (double)opts->pktsize * ifnsPerByte;
446 	cptime = ptime * (1.0 - f) / f;
447 
448 	if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
449 		/*
450 		 * this causes integer overflow in kernel!
451 		 * (bandwidth < 6Kbps when max_pkt_size=1500)
452 		 */
453 		if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0)
454 			warnx("queue bandwidth must be larger than %s",
455 			    rate2str(ifnsPerByte * (double)opts->maxpktsize /
456 			    (double)INT_MAX * (double)pa->ifbandwidth));
457 			fprintf(stderr, "cbq: queue %s is too slow!\n",
458 			    pa->qname);
459 		nsPerByte = (double)(INT_MAX / opts->maxpktsize);
460 	}
461 
462 	if (maxburst == 0) {  /* use default */
463 		if (cptime > 10.0 * 1000000)
464 			maxburst = 4;
465 		else
466 			maxburst = 16;
467 	}
468 	if (minburst == 0)  /* use default */
469 		minburst = 2;
470 	if (minburst > maxburst)
471 		minburst = maxburst;
472 
473 	z = (double)(1 << RM_FILTER_GAIN);
474 	g = (1.0 - 1.0 / z);
475 	gton = pow(g, (double)maxburst);
476 	gtom = pow(g, (double)(minburst-1));
477 	maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
478 	maxidle_s = (1.0 - g);
479 	if (maxidle > maxidle_s)
480 		maxidle = ptime * maxidle;
481 	else
482 		maxidle = ptime * maxidle_s;
483 	offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
484 	minidle = -((double)opts->maxpktsize * (double)nsPerByte);
485 
486 	/* scale parameters */
487 	maxidle = ((maxidle * 8.0) / nsPerByte) *
488 	    pow(2.0, (double)RM_FILTER_GAIN);
489 	offtime = (offtime * 8.0) / nsPerByte *
490 	    pow(2.0, (double)RM_FILTER_GAIN);
491 	minidle = ((minidle * 8.0) / nsPerByte) *
492 	    pow(2.0, (double)RM_FILTER_GAIN);
493 
494 	maxidle = maxidle / 1000.0;
495 	offtime = offtime / 1000.0;
496 	minidle = minidle / 1000.0;
497 
498 	opts->minburst = minburst;
499 	opts->maxburst = maxburst;
500 	opts->ns_per_byte = (u_int)nsPerByte;
501 	opts->maxidle = (u_int)fabs(maxidle);
502 	opts->minidle = (int)minidle;
503 	opts->offtime = (u_int)fabs(offtime);
504 
505 	return (0);
506 }
507 
508 static int
509 check_commit_cbq(int dev, int opts, struct pf_altq *pa)
510 {
511 	struct pf_altq	*altq;
512 	int		 root_class, default_class;
513 	int		 error = 0;
514 
515 	/*
516 	 * check if cbq has one root queue and one default queue
517 	 * for this interface
518 	 */
519 	root_class = default_class = 0;
520 	TAILQ_FOREACH(altq, &altqs, entries) {
521 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
522 			continue;
523 		if (altq->qname[0] == 0)  /* this is for interface */
524 			continue;
525 		if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
526 			root_class++;
527 		if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
528 			default_class++;
529 	}
530 	if (root_class != 1) {
531 		warnx("should have one root queue on %s", pa->ifname);
532 		error++;
533 	}
534 	if (default_class != 1) {
535 		warnx("should have one default queue on %s", pa->ifname);
536 		error++;
537 	}
538 	return (error);
539 }
540 
541 static int
542 print_cbq_opts(const struct pf_altq *a)
543 {
544 	const struct cbq_opts	*opts;
545 
546 	opts = &a->pq_u.cbq_opts;
547 	if (opts->flags) {
548 		printf("cbq(");
549 		if (opts->flags & CBQCLF_RED)
550 			printf(" red");
551 		if (opts->flags & CBQCLF_ECN)
552 			printf(" ecn");
553 		if (opts->flags & CBQCLF_RIO)
554 			printf(" rio");
555 		if (opts->flags & CBQCLF_CLEARDSCP)
556 			printf(" cleardscp");
557 		if (opts->flags & CBQCLF_FLOWVALVE)
558 			printf(" flowvalve");
559 #ifdef CBQCLF_BORROW
560 		if (opts->flags & CBQCLF_BORROW)
561 			printf(" borrow");
562 #endif
563 		if (opts->flags & CBQCLF_WRR)
564 			printf(" wrr");
565 		if (opts->flags & CBQCLF_EFFICIENT)
566 			printf(" efficient");
567 		if (opts->flags & CBQCLF_ROOTCLASS)
568 			printf(" root");
569 		if (opts->flags & CBQCLF_DEFCLASS)
570 			printf(" default");
571 		printf(" ) ");
572 
573 		return (1);
574 	} else
575 		return (0);
576 }
577 
578 /*
579  * PRIQ support functions
580  */
581 static int
582 eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
583 {
584 	struct pf_altq	*altq;
585 
586 	if (pa->priority >= PRIQ_MAXPRI) {
587 		warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
588 		return (-1);
589 	}
590 	/* the priority should be unique for the interface */
591 	TAILQ_FOREACH(altq, &altqs, entries) {
592 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
593 		    altq->qname[0] != 0 && altq->priority == pa->priority) {
594 			warnx("%s and %s have the same priority",
595 			    altq->qname, pa->qname);
596 			return (-1);
597 		}
598 	}
599 
600 	return (0);
601 }
602 
603 static int
604 check_commit_priq(int dev, int opts, struct pf_altq *pa)
605 {
606 	struct pf_altq	*altq;
607 	int		 default_class;
608 	int		 error = 0;
609 
610 	/*
611 	 * check if priq has one default class for this interface
612 	 */
613 	default_class = 0;
614 	TAILQ_FOREACH(altq, &altqs, entries) {
615 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
616 			continue;
617 		if (altq->qname[0] == 0)  /* this is for interface */
618 			continue;
619 		if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
620 			default_class++;
621 	}
622 	if (default_class != 1) {
623 		warnx("should have one default queue on %s", pa->ifname);
624 		error++;
625 	}
626 	return (error);
627 }
628 
629 static int
630 print_priq_opts(const struct pf_altq *a)
631 {
632 	const struct priq_opts	*opts;
633 
634 	opts = &a->pq_u.priq_opts;
635 
636 	if (opts->flags) {
637 		printf("priq(");
638 		if (opts->flags & PRCF_RED)
639 			printf(" red");
640 		if (opts->flags & PRCF_ECN)
641 			printf(" ecn");
642 		if (opts->flags & PRCF_RIO)
643 			printf(" rio");
644 		if (opts->flags & PRCF_CLEARDSCP)
645 			printf(" cleardscp");
646 		if (opts->flags & PRCF_DEFAULTCLASS)
647 			printf(" default");
648 		printf(" ) ");
649 
650 		return (1);
651 	} else
652 		return (0);
653 }
654 
655 /*
656  * HFSC support functions
657  */
658 static int
659 eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
660 {
661 	struct pf_altq		*altq, *parent;
662 	struct hfsc_opts	*opts;
663 	struct service_curve	 sc;
664 
665 	opts = &pa->pq_u.hfsc_opts;
666 
667 	if (pa->parent[0] == 0) {
668 		/* root queue */
669 		opts->lssc_m1 = pa->ifbandwidth;
670 		opts->lssc_m2 = pa->ifbandwidth;
671 		opts->lssc_d = 0;
672 		return (0);
673 	}
674 
675 	LIST_INIT(&rtsc);
676 	LIST_INIT(&lssc);
677 
678 	/* if link_share is not specified, use bandwidth */
679 	if (opts->lssc_m2 == 0)
680 		opts->lssc_m2 = pa->bandwidth;
681 
682 	if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
683 	    (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
684 	    (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
685 		warnx("m2 is zero for %s", pa->qname);
686 		return (-1);
687 	}
688 
689 	if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
690 	    (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) ||
691 	    (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) {
692 		warnx("m1 must be zero for convex curve: %s", pa->qname);
693 		return (-1);
694 	}
695 
696 	/*
697 	 * admission control:
698 	 * for the real-time service curve, the sum of the service curves
699 	 * should not exceed 80% of the interface bandwidth.  20% is reserved
700 	 * not to over-commit the actual interface bandwidth.
701 	 * for the linkshare service curve, the sum of the child service
702 	 * curve should not exceed the parent service curve.
703 	 * for the upper-limit service curve, the assigned bandwidth should
704 	 * be smaller than the interface bandwidth, and the upper-limit should
705 	 * be larger than the real-time service curve when both are defined.
706 	 */
707 	parent = qname_to_pfaltq(pa->parent, pa->ifname);
708 	if (parent == NULL)
709 		errx(1, "parent %s not found for %s", pa->parent, pa->qname);
710 
711 	TAILQ_FOREACH(altq, &altqs, entries) {
712 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
713 			continue;
714 		if (altq->qname[0] == 0)  /* this is for interface */
715 			continue;
716 
717 		/* if the class has a real-time service curve, add it. */
718 		if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
719 			sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
720 			sc.d = altq->pq_u.hfsc_opts.rtsc_d;
721 			sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
722 			gsc_add_sc(&rtsc, &sc);
723 		}
724 
725 		if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
726 			continue;
727 
728 		/* if the class has a linkshare service curve, add it. */
729 		if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
730 			sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
731 			sc.d = altq->pq_u.hfsc_opts.lssc_d;
732 			sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
733 			gsc_add_sc(&lssc, &sc);
734 		}
735 	}
736 
737 	/* check the real-time service curve.  reserve 20% of interface bw */
738 	if (opts->rtsc_m2 != 0) {
739 		/* add this queue to the sum */
740 		sc.m1 = opts->rtsc_m1;
741 		sc.d = opts->rtsc_d;
742 		sc.m2 = opts->rtsc_m2;
743 		gsc_add_sc(&rtsc, &sc);
744 		/* compare the sum with 80% of the interface */
745 		sc.m1 = 0;
746 		sc.d = 0;
747 		sc.m2 = pa->ifbandwidth / 100 * 80;
748 		if (!is_gsc_under_sc(&rtsc, &sc)) {
749 			warnx("real-time sc exceeds 80%% of the interface "
750 			    "bandwidth (%s)", rate2str((double)sc.m2));
751 			goto err_ret;
752 		}
753 	}
754 
755 	/* check the linkshare service curve. */
756 	if (opts->lssc_m2 != 0) {
757 		/* add this queue to the child sum */
758 		sc.m1 = opts->lssc_m1;
759 		sc.d = opts->lssc_d;
760 		sc.m2 = opts->lssc_m2;
761 		gsc_add_sc(&lssc, &sc);
762 		/* compare the sum of the children with parent's sc */
763 		sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
764 		sc.d = parent->pq_u.hfsc_opts.lssc_d;
765 		sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
766 		if (!is_gsc_under_sc(&lssc, &sc)) {
767 			warnx("linkshare sc exceeds parent's sc");
768 			goto err_ret;
769 		}
770 	}
771 
772 	/* check the upper-limit service curve. */
773 	if (opts->ulsc_m2 != 0) {
774 		if (opts->ulsc_m1 > pa->ifbandwidth ||
775 		    opts->ulsc_m2 > pa->ifbandwidth) {
776 			warnx("upper-limit larger than interface bandwidth");
777 			goto err_ret;
778 		}
779 		if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
780 			warnx("upper-limit sc smaller than real-time sc");
781 			goto err_ret;
782 		}
783 	}
784 
785 	gsc_destroy(&rtsc);
786 	gsc_destroy(&lssc);
787 
788 	return (0);
789 
790 err_ret:
791 	gsc_destroy(&rtsc);
792 	gsc_destroy(&lssc);
793 	return (-1);
794 }
795 
796 static int
797 check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
798 {
799 	struct pf_altq	*altq, *def = NULL;
800 	int		 default_class;
801 	int		 error = 0;
802 
803 	/* check if hfsc has one default queue for this interface */
804 	default_class = 0;
805 	TAILQ_FOREACH(altq, &altqs, entries) {
806 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
807 			continue;
808 		if (altq->qname[0] == 0)  /* this is for interface */
809 			continue;
810 		if (altq->parent[0] == 0)  /* dummy root */
811 			continue;
812 		if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
813 			default_class++;
814 			def = altq;
815 		}
816 	}
817 	if (default_class != 1) {
818 		warnx("should have one default queue on %s", pa->ifname);
819 		return (1);
820 	}
821 	/* make sure the default queue is a leaf */
822 	TAILQ_FOREACH(altq, &altqs, entries) {
823 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
824 			continue;
825 		if (altq->qname[0] == 0)  /* this is for interface */
826 			continue;
827 		if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
828 			warnx("default queue is not a leaf");
829 			error++;
830 		}
831 	}
832 	return (error);
833 }
834 
835 static int
836 print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
837 {
838 	const struct hfsc_opts		*opts;
839 	const struct node_hfsc_sc	*rtsc, *lssc, *ulsc;
840 
841 	opts = &a->pq_u.hfsc_opts;
842 	if (qopts == NULL)
843 		rtsc = lssc = ulsc = NULL;
844 	else {
845 		rtsc = &qopts->data.hfsc_opts.realtime;
846 		lssc = &qopts->data.hfsc_opts.linkshare;
847 		ulsc = &qopts->data.hfsc_opts.upperlimit;
848 	}
849 
850 	if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
851 	    (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
852 	    opts->lssc_d != 0))) {
853 		printf("hfsc(");
854 		if (opts->flags & HFCF_RED)
855 			printf(" red");
856 		if (opts->flags & HFCF_ECN)
857 			printf(" ecn");
858 		if (opts->flags & HFCF_RIO)
859 			printf(" rio");
860 		if (opts->flags & HFCF_CLEARDSCP)
861 			printf(" cleardscp");
862 		if (opts->flags & HFCF_DEFAULTCLASS)
863 			printf(" default");
864 		if (opts->rtsc_m2 != 0)
865 			print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
866 			    opts->rtsc_m2, rtsc);
867 		if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
868 		    opts->lssc_d != 0))
869 			print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
870 			    opts->lssc_m2, lssc);
871 		if (opts->ulsc_m2 != 0)
872 			print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
873 			    opts->ulsc_m2, ulsc);
874 		printf(" ) ");
875 
876 		return (1);
877 	} else
878 		return (0);
879 }
880 
881 /*
882  * admission control using generalized service curve
883  */
884 #ifndef __NetBSD__
885 #define	INFINITY	HUGE_VAL  /* positive infinity defined in <math.h> */
886 #endif /* !__NetBSD__ */
887 
888 /* add a new service curve to a generalized service curve */
889 static void
890 gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
891 {
892 	if (is_sc_null(sc))
893 		return;
894 	if (sc->d != 0)
895 		gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
896 	gsc_add_seg(gsc, (double)sc->d, 0.0, INFINITY, (double)sc->m2);
897 }
898 
899 /*
900  * check whether all points of a generalized service curve have
901  * their y-coordinates no larger than a given two-piece linear
902  * service curve.
903  */
904 static int
905 is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
906 {
907 	struct segment	*s, *last, *end;
908 	double		 y;
909 
910 	if (is_sc_null(sc)) {
911 		if (LIST_EMPTY(gsc))
912 			return (1);
913 		LIST_FOREACH(s, gsc, _next) {
914 			if (s->m != 0)
915 				return (0);
916 		}
917 		return (1);
918 	}
919 	/*
920 	 * gsc has a dummy entry at the end with x = INFINITY.
921 	 * loop through up to this dummy entry.
922 	 */
923 	end = gsc_getentry(gsc, INFINITY);
924 	if (end == NULL)
925 		return (1);
926 	last = NULL;
927 	for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
928 		if (s->y > sc_x2y(sc, s->x))
929 			return (0);
930 		last = s;
931 	}
932 	/* last now holds the real last segment */
933 	if (last == NULL)
934 		return (1);
935 	if (last->m > sc->m2)
936 		return (0);
937 	if (last->x < sc->d && last->m > sc->m1) {
938 		y = last->y + (sc->d - last->x) * last->m;
939 		if (y > sc_x2y(sc, sc->d))
940 			return (0);
941 	}
942 	return (1);
943 }
944 
945 static void
946 gsc_destroy(struct gen_sc *gsc)
947 {
948 	struct segment	*s;
949 
950 	while ((s = LIST_FIRST(gsc)) != NULL) {
951 		LIST_REMOVE(s, _next);
952 		free(s);
953 	}
954 }
955 
956 /*
957  * return a segment entry starting at x.
958  * if gsc has no entry starting at x, a new entry is created at x.
959  */
960 static struct segment *
961 gsc_getentry(struct gen_sc *gsc, double x)
962 {
963 	struct segment	*new, *prev, *s;
964 
965 	prev = NULL;
966 	LIST_FOREACH(s, gsc, _next) {
967 		if (s->x == x)
968 			return (s);	/* matching entry found */
969 		else if (s->x < x)
970 			prev = s;
971 		else
972 			break;
973 	}
974 
975 	/* we have to create a new entry */
976 	if ((new = calloc(1, sizeof(struct segment))) == NULL)
977 		return (NULL);
978 
979 	new->x = x;
980 	if (x == INFINITY || s == NULL)
981 		new->d = 0;
982 	else if (s->x == INFINITY)
983 		new->d = INFINITY;
984 	else
985 		new->d = s->x - x;
986 	if (prev == NULL) {
987 		/* insert the new entry at the head of the list */
988 		new->y = 0;
989 		new->m = 0;
990 		LIST_INSERT_HEAD(gsc, new, _next);
991 	} else {
992 		/*
993 		 * the start point intersects with the segment pointed by
994 		 * prev.  divide prev into 2 segments
995 		 */
996 		if (x == INFINITY) {
997 			prev->d = INFINITY;
998 			if (prev->m == 0)
999 				new->y = prev->y;
1000 			else
1001 				new->y = INFINITY;
1002 		} else {
1003 			prev->d = x - prev->x;
1004 			new->y = prev->d * prev->m + prev->y;
1005 		}
1006 		new->m = prev->m;
1007 		LIST_INSERT_AFTER(prev, new, _next);
1008 	}
1009 	return (new);
1010 }
1011 
1012 /* add a segment to a generalized service curve */
1013 static int
1014 gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1015 {
1016 	struct segment	*start, *end, *s;
1017 	double		 x2;
1018 
1019 	if (d == INFINITY)
1020 		x2 = INFINITY;
1021 	else
1022 		x2 = x + d;
1023 	start = gsc_getentry(gsc, x);
1024 	end = gsc_getentry(gsc, x2);
1025 	if (start == NULL || end == NULL)
1026 		return (-1);
1027 
1028 	for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1029 		s->m += m;
1030 		s->y += y + (s->x - x) * m;
1031 	}
1032 
1033 	end = gsc_getentry(gsc, INFINITY);
1034 	for (; s != end; s = LIST_NEXT(s, _next)) {
1035 		s->y += m * d;
1036 	}
1037 
1038 	return (0);
1039 }
1040 
1041 /* get y-projection of a service curve */
1042 static double
1043 sc_x2y(struct service_curve *sc, double x)
1044 {
1045 	double	y;
1046 
1047 	if (x <= (double)sc->d)
1048 		/* y belongs to the 1st segment */
1049 		y = x * (double)sc->m1;
1050 	else
1051 		/* y belongs to the 2nd segment */
1052 		y = (double)sc->d * (double)sc->m1
1053 			+ (x - (double)sc->d) * (double)sc->m2;
1054 	return (y);
1055 }
1056 
1057 /*
1058  * misc utilities
1059  */
1060 #define	R2S_BUFS	8
1061 #define	RATESTR_MAX	16
1062 
1063 char *
1064 rate2str(double rate)
1065 {
1066 	char		*buf;
1067 	static char	 r2sbuf[R2S_BUFS][RATESTR_MAX];  /* ring bufer */
1068 	static int	 idx = 0;
1069 	int		 i;
1070 	static const char unit[] = " KMG";
1071 
1072 	buf = r2sbuf[idx++];
1073 	if (idx == R2S_BUFS)
1074 		idx = 0;
1075 
1076 	for (i = 0; rate >= 1000 && i <= 3; i++)
1077 		rate /= 1000;
1078 
1079 	if ((int)(rate * 100) % 100)
1080 		snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1081 	else
1082 		snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1083 
1084 	return (buf);
1085 }
1086 
1087 u_int32_t
1088 getifspeed(char *ifname)
1089 {
1090 #ifdef __NetBSD__
1091 	int			 s;
1092 	struct ifdatareq	 ifdr;
1093 	struct if_data		*ifrdat;
1094 
1095 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1096 		err(1, "getifspeed: socket");
1097 	memset(&ifdr, 0, sizeof(ifdr));
1098 	if (strlcpy(ifdr.ifdr_name, ifname, sizeof(ifdr.ifdr_name)) >=
1099 	    sizeof(ifdr.ifdr_name))
1100 		errx(1, "getifspeed: strlcpy");
1101 	if (ioctl(s, SIOCGIFDATA, &ifdr) == -1)
1102 		err(1, "getifspeed: SIOCGIFDATA");
1103 	ifrdat = &ifdr.ifdr_data;
1104 	if (close(s) == -1)
1105 		err(1, "getifspeed: close");
1106 	return ((u_int32_t)ifrdat->ifi_baudrate);
1107 #else
1108 	int		s;
1109 	struct ifreq	ifr;
1110 	struct if_data	ifrdat;
1111 
1112 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1113 		err(1, "socket");
1114 	bzero(&ifr, sizeof(ifr));
1115 	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1116 	    sizeof(ifr.ifr_name))
1117 		errx(1, "getifspeed: strlcpy");
1118 	ifr.ifr_data = (caddr_t)&ifrdat;
1119 	if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1120 		err(1, "SIOCGIFDATA");
1121 	if (close(s))
1122 		err(1, "close");
1123 	return ((u_int32_t)ifrdat.ifi_baudrate);
1124 #endif /* !__NetBSD__ */
1125 }
1126 
1127 u_long
1128 getifmtu(char *ifname)
1129 {
1130 	int		s;
1131 	struct ifreq	ifr;
1132 
1133 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1134 		err(1, "socket");
1135 	bzero(&ifr, sizeof(ifr));
1136 	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1137 	    sizeof(ifr.ifr_name))
1138 		errx(1, "getifmtu: strlcpy");
1139 	if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1140 		err(1, "SIOCGIFMTU");
1141 	if (close(s) == -1)
1142 		err(1, "close");
1143 	if (ifr.ifr_mtu > 0)
1144 		return (ifr.ifr_mtu);
1145 	else {
1146 		warnx("could not get mtu for %s, assuming 1500", ifname);
1147 		return (1500);
1148 	}
1149 }
1150 
1151 int
1152 eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1153     u_int32_t ref_bw)
1154 {
1155 	int	errors = 0;
1156 
1157 	switch (pa->scheduler) {
1158 	case ALTQT_CBQ:
1159 		pa->pq_u.cbq_opts = opts->data.cbq_opts;
1160 		break;
1161 	case ALTQT_PRIQ:
1162 		pa->pq_u.priq_opts = opts->data.priq_opts;
1163 		break;
1164 	case ALTQT_HFSC:
1165 		pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1166 		if (opts->data.hfsc_opts.linkshare.used) {
1167 			pa->pq_u.hfsc_opts.lssc_m1 =
1168 			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1169 			    ref_bw);
1170 			pa->pq_u.hfsc_opts.lssc_m2 =
1171 			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1172 			    ref_bw);
1173 			pa->pq_u.hfsc_opts.lssc_d =
1174 			    opts->data.hfsc_opts.linkshare.d;
1175 		}
1176 		if (opts->data.hfsc_opts.realtime.used) {
1177 			pa->pq_u.hfsc_opts.rtsc_m1 =
1178 			    eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1179 			    ref_bw);
1180 			pa->pq_u.hfsc_opts.rtsc_m2 =
1181 			    eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1182 			    ref_bw);
1183 			pa->pq_u.hfsc_opts.rtsc_d =
1184 			    opts->data.hfsc_opts.realtime.d;
1185 		}
1186 		if (opts->data.hfsc_opts.upperlimit.used) {
1187 			pa->pq_u.hfsc_opts.ulsc_m1 =
1188 			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1189 			    ref_bw);
1190 			pa->pq_u.hfsc_opts.ulsc_m2 =
1191 			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1192 			    ref_bw);
1193 			pa->pq_u.hfsc_opts.ulsc_d =
1194 			    opts->data.hfsc_opts.upperlimit.d;
1195 		}
1196 		break;
1197 	default:
1198 		warnx("eval_queue_opts: unknown scheduler type %u",
1199 		    opts->qtype);
1200 		errors++;
1201 		break;
1202 	}
1203 
1204 	return (errors);
1205 }
1206 
1207 u_int32_t
1208 eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1209 {
1210 	if (bw->bw_absolute > 0)
1211 		return (bw->bw_absolute);
1212 
1213 	if (bw->bw_percent > 0)
1214 		return (ref_bw / 100 * bw->bw_percent);
1215 
1216 	return (0);
1217 }
1218 
1219 void
1220 print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1221     const struct node_hfsc_sc *sc)
1222 {
1223 	printf(" %s", scname);
1224 
1225 	if (d != 0) {
1226 		printf("(");
1227 		if (sc != NULL && sc->m1.bw_percent > 0)
1228 			printf("%u%%", sc->m1.bw_percent);
1229 		else
1230 			printf("%s", rate2str((double)m1));
1231 		printf(" %u", d);
1232 	}
1233 
1234 	if (sc != NULL && sc->m2.bw_percent > 0)
1235 		printf(" %u%%", sc->m2.bw_percent);
1236 	else
1237 		printf(" %s", rate2str((double)m2));
1238 
1239 	if (d != 0)
1240 		printf(")");
1241 }
1242