1 /* $NetBSD: pfctl_altq.c,v 1.11 2021/07/24 21:31:31 andvar Exp $ */
2 /* $OpenBSD: pfctl_altq.c,v 1.92 2007/05/27 05:15:17 claudio Exp $ */
3
4 /*
5 * Copyright (c) 2002
6 * Sony Computer Science Laboratories Inc.
7 * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
8 *
9 * Permission to use, copy, modify, and distribute this software for any
10 * purpose with or without fee is hereby granted, provided that the above
11 * copyright notice and this permission notice appear in all copies.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 */
21
22 #include <sys/types.h>
23 #include <sys/ioctl.h>
24 #include <sys/socket.h>
25 #ifdef __NetBSD__
26 #include <sys/param.h>
27 #include <sys/mbuf.h>
28 #endif
29
30 #include <net/if.h>
31 #include <netinet/in.h>
32 #include <net/pfvar.h>
33
34 #include <err.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42
43 #include <altq/altq.h>
44 #include <altq/altq_cbq.h>
45 #include <altq/altq_priq.h>
46 #include <altq/altq_hfsc.h>
47
48 #include "pfctl_parser.h"
49 #include "pfctl.h"
50
51 #define is_sc_null(sc) (((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
52
53 TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
54 LIST_HEAD(gen_sc, segment) rtsc, lssc;
55
56 struct pf_altq *qname_to_pfaltq(const char *, const char *);
57 u_int32_t qname_to_qid(const char *);
58
59 static int eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
60 static int cbq_compute_idletime(struct pfctl *, struct pf_altq *);
61 static int check_commit_cbq(int, int, struct pf_altq *);
62 static int print_cbq_opts(const struct pf_altq *);
63
64 static int eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
65 static int check_commit_priq(int, int, struct pf_altq *);
66 static int print_priq_opts(const struct pf_altq *);
67
68 static int eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
69 static int check_commit_hfsc(int, int, struct pf_altq *);
70 static int print_hfsc_opts(const struct pf_altq *,
71 const struct node_queue_opt *);
72
73 static void gsc_add_sc(struct gen_sc *, struct service_curve *);
74 static int is_gsc_under_sc(struct gen_sc *,
75 struct service_curve *);
76 static void gsc_destroy(struct gen_sc *);
77 static struct segment *gsc_getentry(struct gen_sc *, double);
78 static int gsc_add_seg(struct gen_sc *, double, double, double,
79 double);
80 static double sc_x2y(struct service_curve *, double);
81
82 u_int32_t getifspeed(char *);
83 u_long getifmtu(char *);
84 int eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
85 u_int32_t);
86 u_int32_t eval_bwspec(struct node_queue_bw *, u_int32_t);
87 void print_hfsc_sc(const char *, u_int, u_int, u_int,
88 const struct node_hfsc_sc *);
89
90 void
pfaltq_store(struct pf_altq * a)91 pfaltq_store(struct pf_altq *a)
92 {
93 struct pf_altq *altq;
94
95 if ((altq = malloc(sizeof(*altq))) == NULL)
96 err(1, "malloc");
97 memcpy(altq, a, sizeof(struct pf_altq));
98 TAILQ_INSERT_TAIL(&altqs, altq, entries);
99 }
100
101 struct pf_altq *
pfaltq_lookup(const char * ifname)102 pfaltq_lookup(const char *ifname)
103 {
104 struct pf_altq *altq;
105
106 TAILQ_FOREACH(altq, &altqs, entries) {
107 if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
108 altq->qname[0] == 0)
109 return (altq);
110 }
111 return (NULL);
112 }
113
114 struct pf_altq *
qname_to_pfaltq(const char * qname,const char * ifname)115 qname_to_pfaltq(const char *qname, const char *ifname)
116 {
117 struct pf_altq *altq;
118
119 TAILQ_FOREACH(altq, &altqs, entries) {
120 if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
121 strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
122 return (altq);
123 }
124 return (NULL);
125 }
126
127 u_int32_t
qname_to_qid(const char * qname)128 qname_to_qid(const char *qname)
129 {
130 struct pf_altq *altq;
131
132 /*
133 * We guarantee that same named queues on different interfaces
134 * have the same qid, so we do NOT need to limit matching on
135 * one interface!
136 */
137
138 TAILQ_FOREACH(altq, &altqs, entries) {
139 if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
140 return (altq->qid);
141 }
142 return (0);
143 }
144
145 void
print_altq(const struct pf_altq * a,unsigned level,struct node_queue_bw * bw,struct node_queue_opt * qopts)146 print_altq(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
147 struct node_queue_opt *qopts)
148 {
149 if (a->qname[0] != 0) {
150 print_queue(a, level, bw, 1, qopts);
151 return;
152 }
153
154 printf("altq on %s ", a->ifname);
155
156 switch (a->scheduler) {
157 case ALTQT_CBQ:
158 if (!print_cbq_opts(a))
159 printf("cbq ");
160 break;
161 case ALTQT_PRIQ:
162 if (!print_priq_opts(a))
163 printf("priq ");
164 break;
165 case ALTQT_HFSC:
166 if (!print_hfsc_opts(a, qopts))
167 printf("hfsc ");
168 break;
169 }
170
171 if (bw != NULL && bw->bw_percent > 0) {
172 if (bw->bw_percent < 100)
173 printf("bandwidth %u%% ", bw->bw_percent);
174 } else
175 printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
176
177 if (a->qlimit != DEFAULT_QLIMIT)
178 printf("qlimit %u ", a->qlimit);
179 printf("tbrsize %u ", a->tbrsize);
180 }
181
182 void
print_queue(const struct pf_altq * a,unsigned level,struct node_queue_bw * bw,int print_interface,struct node_queue_opt * qopts)183 print_queue(const struct pf_altq *a, unsigned level, struct node_queue_bw *bw,
184 int print_interface, struct node_queue_opt *qopts)
185 {
186 unsigned i;
187
188 printf("queue ");
189 for (i = 0; i < level; ++i)
190 printf(" ");
191 printf("%s ", a->qname);
192 if (print_interface)
193 printf("on %s ", a->ifname);
194 if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
195 if (bw != NULL && bw->bw_percent > 0) {
196 if (bw->bw_percent < 100)
197 printf("bandwidth %u%% ", bw->bw_percent);
198 } else
199 printf("bandwidth %s ", rate2str((double)a->bandwidth));
200 }
201 if (a->priority != DEFAULT_PRIORITY)
202 printf("priority %u ", a->priority);
203 if (a->qlimit != DEFAULT_QLIMIT)
204 printf("qlimit %u ", a->qlimit);
205 switch (a->scheduler) {
206 case ALTQT_CBQ:
207 print_cbq_opts(a);
208 break;
209 case ALTQT_PRIQ:
210 print_priq_opts(a);
211 break;
212 case ALTQT_HFSC:
213 print_hfsc_opts(a, qopts);
214 break;
215 }
216 }
217
218 /*
219 * eval_pfaltq computes the discipline parameters.
220 */
221 int
eval_pfaltq(struct pfctl * pf,struct pf_altq * pa,struct node_queue_bw * bw,struct node_queue_opt * opts)222 eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
223 struct node_queue_opt *opts)
224 {
225 u_int rate, size, errors = 0;
226
227 if (bw->bw_absolute > 0)
228 pa->ifbandwidth = bw->bw_absolute;
229 else
230 if ((rate = getifspeed(pa->ifname)) == 0) {
231 fprintf(stderr, "interface %s does not know its bandwidth, "
232 "please specify an absolute bandwidth\n",
233 pa->ifname);
234 errors++;
235 } else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
236 pa->ifbandwidth = rate;
237
238 errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
239
240 /* if tbrsize is not specified, use heuristics */
241 if (pa->tbrsize == 0) {
242 rate = pa->ifbandwidth;
243 if (rate <= 1 * 1000 * 1000)
244 size = 1;
245 else if (rate <= 10 * 1000 * 1000)
246 size = 4;
247 else if (rate <= 200 * 1000 * 1000)
248 size = 8;
249 else
250 size = 24;
251 size = size * getifmtu(pa->ifname);
252 if (size > 0xffff)
253 size = 0xffff;
254 pa->tbrsize = size;
255 }
256 return (errors);
257 }
258
259 /*
260 * check_commit_altq does consistency check for each interface
261 */
262 int
check_commit_altq(int dev,int opts)263 check_commit_altq(int dev, int opts)
264 {
265 struct pf_altq *altq;
266 int error = 0;
267
268 /* call the discipline check for each interface. */
269 TAILQ_FOREACH(altq, &altqs, entries) {
270 if (altq->qname[0] == 0) {
271 switch (altq->scheduler) {
272 case ALTQT_CBQ:
273 error = check_commit_cbq(dev, opts, altq);
274 break;
275 case ALTQT_PRIQ:
276 error = check_commit_priq(dev, opts, altq);
277 break;
278 case ALTQT_HFSC:
279 error = check_commit_hfsc(dev, opts, altq);
280 break;
281 default:
282 break;
283 }
284 }
285 }
286 return (error);
287 }
288
289 /*
290 * eval_pfqueue computes the queue parameters.
291 */
292 int
eval_pfqueue(struct pfctl * pf,struct pf_altq * pa,struct node_queue_bw * bw,struct node_queue_opt * opts)293 eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
294 struct node_queue_opt *opts)
295 {
296 /* should be merged with expand_queue */
297 struct pf_altq *if_pa, *parent, *altq;
298 u_int32_t bwsum;
299 int error = 0;
300
301 /* find the corresponding interface and copy fields used by queues */
302 if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
303 fprintf(stderr, "altq not defined on %s\n", pa->ifname);
304 return (1);
305 }
306 pa->scheduler = if_pa->scheduler;
307 pa->ifbandwidth = if_pa->ifbandwidth;
308
309 if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
310 fprintf(stderr, "queue %s already exists on interface %s\n",
311 pa->qname, pa->ifname);
312 return (1);
313 }
314 pa->qid = qname_to_qid(pa->qname);
315
316 parent = NULL;
317 if (pa->parent[0] != 0) {
318 parent = qname_to_pfaltq(pa->parent, pa->ifname);
319 if (parent == NULL) {
320 fprintf(stderr, "parent %s not found for %s\n",
321 pa->parent, pa->qname);
322 return (1);
323 }
324 pa->parent_qid = parent->qid;
325 }
326 if (pa->qlimit == 0)
327 pa->qlimit = DEFAULT_QLIMIT;
328
329 if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
330 pa->bandwidth = eval_bwspec(bw,
331 parent == NULL ? 0 : parent->bandwidth);
332
333 if (pa->bandwidth > pa->ifbandwidth) {
334 fprintf(stderr, "bandwidth for %s higher than "
335 "interface\n", pa->qname);
336 return (1);
337 }
338 /* check the sum of the child bandwidth is under parent's */
339 if (parent != NULL) {
340 if (pa->bandwidth > parent->bandwidth) {
341 warnx("bandwidth for %s higher than parent",
342 pa->qname);
343 return (1);
344 }
345 bwsum = 0;
346 TAILQ_FOREACH(altq, &altqs, entries) {
347 if (strncmp(altq->ifname, pa->ifname,
348 IFNAMSIZ) == 0 &&
349 altq->qname[0] != 0 &&
350 strncmp(altq->parent, pa->parent,
351 PF_QNAME_SIZE) == 0)
352 bwsum += altq->bandwidth;
353 }
354 bwsum += pa->bandwidth;
355 if (bwsum > parent->bandwidth) {
356 warnx("the sum of the child bandwidth higher"
357 " than parent \"%s\"", parent->qname);
358 }
359 }
360 }
361
362 if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
363 return (1);
364
365 switch (pa->scheduler) {
366 case ALTQT_CBQ:
367 error = eval_pfqueue_cbq(pf, pa);
368 break;
369 case ALTQT_PRIQ:
370 error = eval_pfqueue_priq(pf, pa);
371 break;
372 case ALTQT_HFSC:
373 error = eval_pfqueue_hfsc(pf, pa);
374 break;
375 default:
376 break;
377 }
378 return (error);
379 }
380
381 /*
382 * CBQ support functions
383 */
384 #define RM_FILTER_GAIN 5 /* log2 of gain, e.g., 5 => 31/32 */
385 #define RM_NS_PER_SEC (1000000000)
386
387 static int
eval_pfqueue_cbq(struct pfctl * pf,struct pf_altq * pa)388 eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
389 {
390 struct cbq_opts *opts;
391 u_int ifmtu;
392
393 if (pa->priority >= CBQ_MAXPRI) {
394 warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
395 return (-1);
396 }
397
398 ifmtu = getifmtu(pa->ifname);
399 opts = &pa->pq_u.cbq_opts;
400
401 if (opts->pktsize == 0) { /* use default */
402 opts->pktsize = ifmtu;
403 if (opts->pktsize > MCLBYTES) /* do what TCP does */
404 opts->pktsize &= ~MCLBYTES;
405 } else if (opts->pktsize > ifmtu)
406 opts->pktsize = ifmtu;
407 if (opts->maxpktsize == 0) /* use default */
408 opts->maxpktsize = ifmtu;
409 else if (opts->maxpktsize > ifmtu)
410 opts->pktsize = ifmtu;
411
412 if (opts->pktsize > opts->maxpktsize)
413 opts->pktsize = opts->maxpktsize;
414
415 if (pa->parent[0] == 0)
416 opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
417
418 cbq_compute_idletime(pf, pa);
419 return (0);
420 }
421
422 /*
423 * compute ns_per_byte, maxidle, minidle, and offtime
424 */
425 static int
cbq_compute_idletime(struct pfctl * pf,struct pf_altq * pa)426 cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
427 {
428 struct cbq_opts *opts;
429 double maxidle_s, maxidle, minidle;
430 double offtime, nsPerByte, ifnsPerByte, ptime, cptime;
431 double z, g, f, gton, gtom;
432 u_int minburst, maxburst;
433
434 opts = &pa->pq_u.cbq_opts;
435 ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
436 minburst = opts->minburst;
437 maxburst = opts->maxburst;
438
439 if (pa->bandwidth == 0)
440 f = 0.0001; /* small enough? */
441 else
442 f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
443
444 nsPerByte = ifnsPerByte / f;
445 ptime = (double)opts->pktsize * ifnsPerByte;
446 cptime = ptime * (1.0 - f) / f;
447
448 if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
449 /*
450 * this causes integer overflow in kernel!
451 * (bandwidth < 6Kbps when max_pkt_size=1500)
452 */
453 if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0) {
454 warnx("queue bandwidth must be larger than %s",
455 rate2str(ifnsPerByte * (double)opts->maxpktsize /
456 (double)INT_MAX * (double)pa->ifbandwidth));
457 fprintf(stderr, "cbq: queue %s is too slow!\n",
458 pa->qname);
459 }
460 nsPerByte = (double)(INT_MAX / opts->maxpktsize);
461 }
462
463 if (maxburst == 0) { /* use default */
464 if (cptime > 10.0 * 1000000)
465 maxburst = 4;
466 else
467 maxburst = 16;
468 }
469 if (minburst == 0) /* use default */
470 minburst = 2;
471 if (minburst > maxburst)
472 minburst = maxburst;
473
474 z = (double)(1 << RM_FILTER_GAIN);
475 g = (1.0 - 1.0 / z);
476 gton = pow(g, (double)maxburst);
477 gtom = pow(g, (double)(minburst-1));
478 maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
479 maxidle_s = (1.0 - g);
480 if (maxidle > maxidle_s)
481 maxidle = ptime * maxidle;
482 else
483 maxidle = ptime * maxidle_s;
484 offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
485 minidle = -((double)opts->maxpktsize * (double)nsPerByte);
486
487 /* scale parameters */
488 maxidle = ((maxidle * 8.0) / nsPerByte) *
489 pow(2.0, (double)RM_FILTER_GAIN);
490 offtime = (offtime * 8.0) / nsPerByte *
491 pow(2.0, (double)RM_FILTER_GAIN);
492 minidle = ((minidle * 8.0) / nsPerByte) *
493 pow(2.0, (double)RM_FILTER_GAIN);
494
495 maxidle = maxidle / 1000.0;
496 offtime = offtime / 1000.0;
497 minidle = minidle / 1000.0;
498
499 opts->minburst = minburst;
500 opts->maxburst = maxburst;
501 opts->ns_per_byte = (u_int)nsPerByte;
502 opts->maxidle = (u_int)fabs(maxidle);
503 opts->minidle = (int)minidle;
504 opts->offtime = (u_int)fabs(offtime);
505
506 return (0);
507 }
508
509 static int
check_commit_cbq(int dev,int opts,struct pf_altq * pa)510 check_commit_cbq(int dev, int opts, struct pf_altq *pa)
511 {
512 struct pf_altq *altq;
513 int root_class, default_class;
514 int error = 0;
515
516 /*
517 * check if cbq has one root queue and one default queue
518 * for this interface
519 */
520 root_class = default_class = 0;
521 TAILQ_FOREACH(altq, &altqs, entries) {
522 if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
523 continue;
524 if (altq->qname[0] == 0) /* this is for interface */
525 continue;
526 if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
527 root_class++;
528 if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
529 default_class++;
530 }
531 if (root_class != 1) {
532 warnx("should have one root queue on %s", pa->ifname);
533 error++;
534 }
535 if (default_class != 1) {
536 warnx("should have one default queue on %s", pa->ifname);
537 error++;
538 }
539 return (error);
540 }
541
542 static int
print_cbq_opts(const struct pf_altq * a)543 print_cbq_opts(const struct pf_altq *a)
544 {
545 const struct cbq_opts *opts;
546
547 opts = &a->pq_u.cbq_opts;
548 if (opts->flags) {
549 printf("cbq(");
550 if (opts->flags & CBQCLF_RED)
551 printf(" red");
552 if (opts->flags & CBQCLF_ECN)
553 printf(" ecn");
554 if (opts->flags & CBQCLF_RIO)
555 printf(" rio");
556 if (opts->flags & CBQCLF_CLEARDSCP)
557 printf(" cleardscp");
558 if (opts->flags & CBQCLF_FLOWVALVE)
559 printf(" flowvalve");
560 #ifdef CBQCLF_BORROW
561 if (opts->flags & CBQCLF_BORROW)
562 printf(" borrow");
563 #endif
564 if (opts->flags & CBQCLF_WRR)
565 printf(" wrr");
566 if (opts->flags & CBQCLF_EFFICIENT)
567 printf(" efficient");
568 if (opts->flags & CBQCLF_ROOTCLASS)
569 printf(" root");
570 if (opts->flags & CBQCLF_DEFCLASS)
571 printf(" default");
572 printf(" ) ");
573
574 return (1);
575 } else
576 return (0);
577 }
578
579 /*
580 * PRIQ support functions
581 */
582 static int
eval_pfqueue_priq(struct pfctl * pf,struct pf_altq * pa)583 eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
584 {
585 struct pf_altq *altq;
586
587 if (pa->priority >= PRIQ_MAXPRI) {
588 warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
589 return (-1);
590 }
591 /* the priority should be unique for the interface */
592 TAILQ_FOREACH(altq, &altqs, entries) {
593 if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
594 altq->qname[0] != 0 && altq->priority == pa->priority) {
595 warnx("%s and %s have the same priority",
596 altq->qname, pa->qname);
597 return (-1);
598 }
599 }
600
601 return (0);
602 }
603
604 static int
check_commit_priq(int dev,int opts,struct pf_altq * pa)605 check_commit_priq(int dev, int opts, struct pf_altq *pa)
606 {
607 struct pf_altq *altq;
608 int default_class;
609 int error = 0;
610
611 /*
612 * check if priq has one default class for this interface
613 */
614 default_class = 0;
615 TAILQ_FOREACH(altq, &altqs, entries) {
616 if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
617 continue;
618 if (altq->qname[0] == 0) /* this is for interface */
619 continue;
620 if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
621 default_class++;
622 }
623 if (default_class != 1) {
624 warnx("should have one default queue on %s", pa->ifname);
625 error++;
626 }
627 return (error);
628 }
629
630 static int
print_priq_opts(const struct pf_altq * a)631 print_priq_opts(const struct pf_altq *a)
632 {
633 const struct priq_opts *opts;
634
635 opts = &a->pq_u.priq_opts;
636
637 if (opts->flags) {
638 printf("priq(");
639 if (opts->flags & PRCF_RED)
640 printf(" red");
641 if (opts->flags & PRCF_ECN)
642 printf(" ecn");
643 if (opts->flags & PRCF_RIO)
644 printf(" rio");
645 if (opts->flags & PRCF_CLEARDSCP)
646 printf(" cleardscp");
647 if (opts->flags & PRCF_DEFAULTCLASS)
648 printf(" default");
649 printf(" ) ");
650
651 return (1);
652 } else
653 return (0);
654 }
655
656 /*
657 * HFSC support functions
658 */
659 static int
eval_pfqueue_hfsc(struct pfctl * pf,struct pf_altq * pa)660 eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
661 {
662 struct pf_altq *altq, *parent;
663 struct hfsc_opts *opts;
664 struct service_curve sc;
665
666 opts = &pa->pq_u.hfsc_opts;
667
668 if (pa->parent[0] == 0) {
669 /* root queue */
670 opts->lssc_m1 = pa->ifbandwidth;
671 opts->lssc_m2 = pa->ifbandwidth;
672 opts->lssc_d = 0;
673 return (0);
674 }
675
676 LIST_INIT(&rtsc);
677 LIST_INIT(&lssc);
678
679 /* if link_share is not specified, use bandwidth */
680 if (opts->lssc_m2 == 0)
681 opts->lssc_m2 = pa->bandwidth;
682
683 if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
684 (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
685 (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
686 warnx("m2 is zero for %s", pa->qname);
687 return (-1);
688 }
689
690 if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
691 (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) ||
692 (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) {
693 warnx("m1 must be zero for convex curve: %s", pa->qname);
694 return (-1);
695 }
696
697 /*
698 * admission control:
699 * for the real-time service curve, the sum of the service curves
700 * should not exceed 80% of the interface bandwidth. 20% is reserved
701 * not to over-commit the actual interface bandwidth.
702 * for the linkshare service curve, the sum of the child service
703 * curve should not exceed the parent service curve.
704 * for the upper-limit service curve, the assigned bandwidth should
705 * be smaller than the interface bandwidth, and the upper-limit should
706 * be larger than the real-time service curve when both are defined.
707 */
708 parent = qname_to_pfaltq(pa->parent, pa->ifname);
709 if (parent == NULL)
710 errx(1, "parent %s not found for %s", pa->parent, pa->qname);
711
712 TAILQ_FOREACH(altq, &altqs, entries) {
713 if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
714 continue;
715 if (altq->qname[0] == 0) /* this is for interface */
716 continue;
717
718 /* if the class has a real-time service curve, add it. */
719 if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
720 sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
721 sc.d = altq->pq_u.hfsc_opts.rtsc_d;
722 sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
723 gsc_add_sc(&rtsc, &sc);
724 }
725
726 if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
727 continue;
728
729 /* if the class has a linkshare service curve, add it. */
730 if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
731 sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
732 sc.d = altq->pq_u.hfsc_opts.lssc_d;
733 sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
734 gsc_add_sc(&lssc, &sc);
735 }
736 }
737
738 /* check the real-time service curve. reserve 20% of interface bw */
739 if (opts->rtsc_m2 != 0) {
740 /* add this queue to the sum */
741 sc.m1 = opts->rtsc_m1;
742 sc.d = opts->rtsc_d;
743 sc.m2 = opts->rtsc_m2;
744 gsc_add_sc(&rtsc, &sc);
745 /* compare the sum with 80% of the interface */
746 sc.m1 = 0;
747 sc.d = 0;
748 sc.m2 = pa->ifbandwidth / 100 * 80;
749 if (!is_gsc_under_sc(&rtsc, &sc)) {
750 warnx("real-time sc exceeds 80%% of the interface "
751 "bandwidth (%s)", rate2str((double)sc.m2));
752 goto err_ret;
753 }
754 }
755
756 /* check the linkshare service curve. */
757 if (opts->lssc_m2 != 0) {
758 /* add this queue to the child sum */
759 sc.m1 = opts->lssc_m1;
760 sc.d = opts->lssc_d;
761 sc.m2 = opts->lssc_m2;
762 gsc_add_sc(&lssc, &sc);
763 /* compare the sum of the children with parent's sc */
764 sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
765 sc.d = parent->pq_u.hfsc_opts.lssc_d;
766 sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
767 if (!is_gsc_under_sc(&lssc, &sc)) {
768 warnx("linkshare sc exceeds parent's sc");
769 goto err_ret;
770 }
771 }
772
773 /* check the upper-limit service curve. */
774 if (opts->ulsc_m2 != 0) {
775 if (opts->ulsc_m1 > pa->ifbandwidth ||
776 opts->ulsc_m2 > pa->ifbandwidth) {
777 warnx("upper-limit larger than interface bandwidth");
778 goto err_ret;
779 }
780 if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
781 warnx("upper-limit sc smaller than real-time sc");
782 goto err_ret;
783 }
784 }
785
786 gsc_destroy(&rtsc);
787 gsc_destroy(&lssc);
788
789 return (0);
790
791 err_ret:
792 gsc_destroy(&rtsc);
793 gsc_destroy(&lssc);
794 return (-1);
795 }
796
797 static int
check_commit_hfsc(int dev,int opts,struct pf_altq * pa)798 check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
799 {
800 struct pf_altq *altq, *def = NULL;
801 int default_class;
802 int error = 0;
803
804 /* check if hfsc has one default queue for this interface */
805 default_class = 0;
806 TAILQ_FOREACH(altq, &altqs, entries) {
807 if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
808 continue;
809 if (altq->qname[0] == 0) /* this is for interface */
810 continue;
811 if (altq->parent[0] == 0) /* dummy root */
812 continue;
813 if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
814 default_class++;
815 def = altq;
816 }
817 }
818 if (default_class != 1) {
819 warnx("should have one default queue on %s", pa->ifname);
820 return (1);
821 }
822 /* make sure the default queue is a leaf */
823 TAILQ_FOREACH(altq, &altqs, entries) {
824 if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
825 continue;
826 if (altq->qname[0] == 0) /* this is for interface */
827 continue;
828 if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
829 warnx("default queue is not a leaf");
830 error++;
831 }
832 }
833 return (error);
834 }
835
836 static int
print_hfsc_opts(const struct pf_altq * a,const struct node_queue_opt * qopts)837 print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
838 {
839 const struct hfsc_opts *opts;
840 const struct node_hfsc_sc *rtsc, *lssc, *ulsc;
841
842 opts = &a->pq_u.hfsc_opts;
843 if (qopts == NULL)
844 rtsc = lssc = ulsc = NULL;
845 else {
846 rtsc = &qopts->data.hfsc_opts.realtime;
847 lssc = &qopts->data.hfsc_opts.linkshare;
848 ulsc = &qopts->data.hfsc_opts.upperlimit;
849 }
850
851 if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
852 (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
853 opts->lssc_d != 0))) {
854 printf("hfsc(");
855 if (opts->flags & HFCF_RED)
856 printf(" red");
857 if (opts->flags & HFCF_ECN)
858 printf(" ecn");
859 if (opts->flags & HFCF_RIO)
860 printf(" rio");
861 if (opts->flags & HFCF_CLEARDSCP)
862 printf(" cleardscp");
863 if (opts->flags & HFCF_DEFAULTCLASS)
864 printf(" default");
865 if (opts->rtsc_m2 != 0)
866 print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
867 opts->rtsc_m2, rtsc);
868 if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
869 opts->lssc_d != 0))
870 print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
871 opts->lssc_m2, lssc);
872 if (opts->ulsc_m2 != 0)
873 print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
874 opts->ulsc_m2, ulsc);
875 printf(" ) ");
876
877 return (1);
878 } else
879 return (0);
880 }
881
882 /*
883 * admission control using generalized service curve
884 */
885
886 /* add a new service curve to a generalized service curve */
887 static void
gsc_add_sc(struct gen_sc * gsc,struct service_curve * sc)888 gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
889 {
890 if (is_sc_null(sc))
891 return;
892 if (sc->d != 0)
893 gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
894 gsc_add_seg(gsc, (double)sc->d, 0.0, HUGE_VAL, (double)sc->m2);
895 }
896
897 /*
898 * check whether all points of a generalized service curve have
899 * their y-coordinates no larger than a given two-piece linear
900 * service curve.
901 */
902 static int
is_gsc_under_sc(struct gen_sc * gsc,struct service_curve * sc)903 is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
904 {
905 struct segment *s, *last, *end;
906 double y;
907
908 if (is_sc_null(sc)) {
909 if (LIST_EMPTY(gsc))
910 return (1);
911 LIST_FOREACH(s, gsc, _next) {
912 if (s->m != 0)
913 return (0);
914 }
915 return (1);
916 }
917 /*
918 * gsc has a dummy entry at the end with x = HUGE_VAL.
919 * loop through up to this dummy entry.
920 */
921 end = gsc_getentry(gsc, HUGE_VAL);
922 if (end == NULL)
923 return (1);
924 last = NULL;
925 for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
926 if (s->y > sc_x2y(sc, s->x))
927 return (0);
928 last = s;
929 }
930 /* last now holds the real last segment */
931 if (last == NULL)
932 return (1);
933 if (last->m > sc->m2)
934 return (0);
935 if (last->x < sc->d && last->m > sc->m1) {
936 y = last->y + (sc->d - last->x) * last->m;
937 if (y > sc_x2y(sc, sc->d))
938 return (0);
939 }
940 return (1);
941 }
942
943 static void
gsc_destroy(struct gen_sc * gsc)944 gsc_destroy(struct gen_sc *gsc)
945 {
946 struct segment *s;
947
948 while ((s = LIST_FIRST(gsc)) != NULL) {
949 LIST_REMOVE(s, _next);
950 free(s);
951 }
952 }
953
954 /*
955 * return a segment entry starting at x.
956 * if gsc has no entry starting at x, a new entry is created at x.
957 */
958 static struct segment *
gsc_getentry(struct gen_sc * gsc,double x)959 gsc_getentry(struct gen_sc *gsc, double x)
960 {
961 struct segment *new, *prev, *s;
962
963 prev = NULL;
964 LIST_FOREACH(s, gsc, _next) {
965 if (s->x == x)
966 return (s); /* matching entry found */
967 else if (s->x < x)
968 prev = s;
969 else
970 break;
971 }
972
973 /* we have to create a new entry */
974 if ((new = calloc(1, sizeof(struct segment))) == NULL)
975 return (NULL);
976
977 new->x = x;
978 if (x == HUGE_VAL || s == NULL)
979 new->d = 0;
980 else if (s->x == HUGE_VAL)
981 new->d = HUGE_VAL;
982 else
983 new->d = s->x - x;
984 if (prev == NULL) {
985 /* insert the new entry at the head of the list */
986 new->y = 0;
987 new->m = 0;
988 LIST_INSERT_HEAD(gsc, new, _next);
989 } else {
990 /*
991 * the start point intersects with the segment pointed by
992 * prev. divide prev into 2 segments
993 */
994 if (x == HUGE_VAL) {
995 prev->d = HUGE_VAL;
996 if (prev->m == 0)
997 new->y = prev->y;
998 else
999 new->y = HUGE_VAL;
1000 } else {
1001 prev->d = x - prev->x;
1002 new->y = prev->d * prev->m + prev->y;
1003 }
1004 new->m = prev->m;
1005 LIST_INSERT_AFTER(prev, new, _next);
1006 }
1007 return (new);
1008 }
1009
1010 /* add a segment to a generalized service curve */
1011 static int
gsc_add_seg(struct gen_sc * gsc,double x,double y,double d,double m)1012 gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1013 {
1014 struct segment *start, *end, *s;
1015 double x2;
1016
1017 if (d == HUGE_VAL)
1018 x2 = HUGE_VAL;
1019 else
1020 x2 = x + d;
1021 start = gsc_getentry(gsc, x);
1022 end = gsc_getentry(gsc, x2);
1023 if (start == NULL || end == NULL)
1024 return (-1);
1025
1026 for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1027 s->m += m;
1028 s->y += y + (s->x - x) * m;
1029 }
1030
1031 end = gsc_getentry(gsc, HUGE_VAL);
1032 for (; s != end; s = LIST_NEXT(s, _next)) {
1033 s->y += m * d;
1034 }
1035
1036 return (0);
1037 }
1038
1039 /* get y-projection of a service curve */
1040 static double
sc_x2y(struct service_curve * sc,double x)1041 sc_x2y(struct service_curve *sc, double x)
1042 {
1043 double y;
1044
1045 if (x <= (double)sc->d)
1046 /* y belongs to the 1st segment */
1047 y = x * (double)sc->m1;
1048 else
1049 /* y belongs to the 2nd segment */
1050 y = (double)sc->d * (double)sc->m1
1051 + (x - (double)sc->d) * (double)sc->m2;
1052 return (y);
1053 }
1054
1055 /*
1056 * misc utilities
1057 */
1058 #define R2S_BUFS 8
1059 #define RATESTR_MAX 16
1060
1061 char *
rate2str(double rate)1062 rate2str(double rate)
1063 {
1064 char *buf;
1065 static char r2sbuf[R2S_BUFS][RATESTR_MAX]; /* ring buffer */
1066 static int idx = 0;
1067 int i;
1068 static const char unit[] = " KMG";
1069
1070 buf = r2sbuf[idx++];
1071 if (idx == R2S_BUFS)
1072 idx = 0;
1073
1074 for (i = 0; rate >= 1000 && i <= 3; i++)
1075 rate /= 1000;
1076
1077 if ((int)(rate * 100) % 100)
1078 snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1079 else
1080 snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1081
1082 return (buf);
1083 }
1084
1085 u_int32_t
getifspeed(char * ifname)1086 getifspeed(char *ifname)
1087 {
1088 #ifdef __NetBSD__
1089 int s;
1090 struct ifdatareq ifdr;
1091 struct if_data *ifrdat;
1092
1093 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1094 err(1, "getifspeed: socket");
1095 memset(&ifdr, 0, sizeof(ifdr));
1096 if (strlcpy(ifdr.ifdr_name, ifname, sizeof(ifdr.ifdr_name)) >=
1097 sizeof(ifdr.ifdr_name))
1098 errx(1, "getifspeed: strlcpy");
1099 if (ioctl(s, SIOCGIFDATA, &ifdr) == -1)
1100 err(1, "getifspeed: SIOCGIFDATA");
1101 ifrdat = &ifdr.ifdr_data;
1102 if (close(s) == -1)
1103 err(1, "getifspeed: close");
1104 return ((u_int32_t)ifrdat->ifi_baudrate);
1105 #else
1106 int s;
1107 struct ifreq ifr;
1108 struct if_data ifrdat;
1109
1110 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1111 err(1, "socket");
1112 bzero(&ifr, sizeof(ifr));
1113 if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1114 sizeof(ifr.ifr_name))
1115 errx(1, "getifspeed: strlcpy");
1116 ifr.ifr_data = (caddr_t)&ifrdat;
1117 if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1118 err(1, "SIOCGIFDATA");
1119 if (close(s))
1120 err(1, "close");
1121 return ((u_int32_t)ifrdat.ifi_baudrate);
1122 #endif /* !__NetBSD__ */
1123 }
1124
1125 u_long
getifmtu(char * ifname)1126 getifmtu(char *ifname)
1127 {
1128 int s;
1129 struct ifreq ifr;
1130
1131 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1132 err(1, "socket");
1133 bzero(&ifr, sizeof(ifr));
1134 if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1135 sizeof(ifr.ifr_name))
1136 errx(1, "getifmtu: strlcpy");
1137 if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1138 err(1, "SIOCGIFMTU");
1139 if (close(s) == -1)
1140 err(1, "close");
1141 if (ifr.ifr_mtu > 0)
1142 return (ifr.ifr_mtu);
1143 else {
1144 warnx("could not get mtu for %s, assuming 1500", ifname);
1145 return (1500);
1146 }
1147 }
1148
1149 int
eval_queue_opts(struct pf_altq * pa,struct node_queue_opt * opts,u_int32_t ref_bw)1150 eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1151 u_int32_t ref_bw)
1152 {
1153 int errors = 0;
1154
1155 switch (pa->scheduler) {
1156 case ALTQT_CBQ:
1157 pa->pq_u.cbq_opts = opts->data.cbq_opts;
1158 break;
1159 case ALTQT_PRIQ:
1160 pa->pq_u.priq_opts = opts->data.priq_opts;
1161 break;
1162 case ALTQT_HFSC:
1163 pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1164 if (opts->data.hfsc_opts.linkshare.used) {
1165 pa->pq_u.hfsc_opts.lssc_m1 =
1166 eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1167 ref_bw);
1168 pa->pq_u.hfsc_opts.lssc_m2 =
1169 eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1170 ref_bw);
1171 pa->pq_u.hfsc_opts.lssc_d =
1172 opts->data.hfsc_opts.linkshare.d;
1173 }
1174 if (opts->data.hfsc_opts.realtime.used) {
1175 pa->pq_u.hfsc_opts.rtsc_m1 =
1176 eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1177 ref_bw);
1178 pa->pq_u.hfsc_opts.rtsc_m2 =
1179 eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1180 ref_bw);
1181 pa->pq_u.hfsc_opts.rtsc_d =
1182 opts->data.hfsc_opts.realtime.d;
1183 }
1184 if (opts->data.hfsc_opts.upperlimit.used) {
1185 pa->pq_u.hfsc_opts.ulsc_m1 =
1186 eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1187 ref_bw);
1188 pa->pq_u.hfsc_opts.ulsc_m2 =
1189 eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1190 ref_bw);
1191 pa->pq_u.hfsc_opts.ulsc_d =
1192 opts->data.hfsc_opts.upperlimit.d;
1193 }
1194 break;
1195 default:
1196 warnx("eval_queue_opts: unknown scheduler type %u",
1197 opts->qtype);
1198 errors++;
1199 break;
1200 }
1201
1202 return (errors);
1203 }
1204
1205 u_int32_t
eval_bwspec(struct node_queue_bw * bw,u_int32_t ref_bw)1206 eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1207 {
1208 if (bw->bw_absolute > 0)
1209 return (bw->bw_absolute);
1210
1211 if (bw->bw_percent > 0)
1212 return (ref_bw / 100 * bw->bw_percent);
1213
1214 return (0);
1215 }
1216
1217 void
print_hfsc_sc(const char * scname,u_int m1,u_int d,u_int m2,const struct node_hfsc_sc * sc)1218 print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1219 const struct node_hfsc_sc *sc)
1220 {
1221 printf(" %s", scname);
1222
1223 if (d != 0) {
1224 printf("(");
1225 if (sc != NULL && sc->m1.bw_percent > 0)
1226 printf("%u%%", sc->m1.bw_percent);
1227 else
1228 printf("%s", rate2str((double)m1));
1229 printf(" %u", d);
1230 }
1231
1232 if (sc != NULL && sc->m2.bw_percent > 0)
1233 printf(" %u%%", sc->m2.bw_percent);
1234 else
1235 printf(" %s", rate2str((double)m2));
1236
1237 if (d != 0)
1238 printf(")");
1239 }
1240