1 /* $NetBSD: altq_cdnr.c,v 1.23 2025/01/08 13:00:04 joe Exp $ */ 2 /* $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */ 3 4 /* 5 * Copyright (C) 1999-2002 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: altq_cdnr.c,v 1.23 2025/01/08 13:00:04 joe Exp $"); 32 33 #ifdef _KERNEL_OPT 34 #include "opt_altq.h" 35 #include "opt_inet.h" 36 #endif 37 38 #include <sys/param.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/sockio.h> 43 #include <sys/systm.h> 44 #include <sys/proc.h> 45 #include <sys/errno.h> 46 #include <sys/kernel.h> 47 #include <sys/queue.h> 48 #include <sys/kauth.h> 49 #include <sys/cprng.h> 50 51 #include <net/if.h> 52 #include <net/if_types.h> 53 #include <netinet/in.h> 54 #include <netinet/in_systm.h> 55 #include <netinet/ip.h> 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #endif 59 60 #include <altq/altq.h> 61 #include <altq/altq_conf.h> 62 #include <altq/altq_cdnr.h> 63 64 #ifdef ALTQ3_COMPAT 65 /* 66 * diffserv traffic conditioning module 67 */ 68 69 int altq_cdnr_enabled = 0; 70 71 /* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */ 72 #ifdef ALTQ_CDNR 73 74 /* cdnr_list keeps all cdnr's allocated. */ 75 static LIST_HEAD(, top_cdnr) tcb_list; 76 77 static int altq_cdnr_input(struct mbuf *, int); 78 static struct top_cdnr *tcb_lookup(char *ifname); 79 static struct cdnr_block *cdnr_handle2cb(u_long); 80 static u_long cdnr_cb2handle(struct cdnr_block *); 81 static void *cdnr_cballoc(struct top_cdnr *, int, 82 struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *)); 83 static void cdnr_cbdestroy(void *); 84 static int tca_verify_action(struct tc_action *); 85 static void tca_import_action(struct tc_action *, struct tc_action *); 86 static void tca_invalidate_action(struct tc_action *); 87 88 static int generic_element_destroy(struct cdnr_block *); 89 static struct top_cdnr *top_create(struct ifaltq *); 90 static int top_destroy(struct top_cdnr *); 91 static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *); 92 static int element_destroy(struct cdnr_block *); 93 static void tb_import_profile(struct tbe *, struct tb_profile *); 94 static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *, 95 struct tc_action *, struct tc_action *); 96 static int tbm_destroy(struct tbmeter *); 97 static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *); 98 static struct trtcm *trtcm_create(struct top_cdnr *, 99 struct tb_profile *, struct tb_profile *, 100 struct tc_action *, struct tc_action *, struct tc_action *, 101 int); 102 static int trtcm_destroy(struct trtcm *); 103 static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *); 104 static struct tswtcm *tswtcm_create(struct top_cdnr *, 105 u_int32_t, u_int32_t, u_int32_t, 106 struct tc_action *, struct tc_action *, struct tc_action *); 107 static int tswtcm_destroy(struct tswtcm *); 108 static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *); 109 110 static int cdnrcmd_if_attach(char *); 111 static int cdnrcmd_if_detach(char *); 112 static int cdnrcmd_add_element(struct cdnr_add_element *); 113 static int cdnrcmd_delete_element(struct cdnr_delete_element *); 114 static int cdnrcmd_add_filter(struct cdnr_add_filter *); 115 static int cdnrcmd_delete_filter(struct cdnr_delete_filter *); 116 static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *); 117 static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *); 118 static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *); 119 static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *); 120 static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *); 121 static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *); 122 static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *); 123 static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *); 124 static int cdnrcmd_get_stats(struct cdnr_get_stats *); 125 126 altqdev_decl(cdnr); 127 128 /* 129 * top level input function called from ip_input. 130 * should be called before converting header fields to host-byte-order. 131 */ 132 int 133 altq_cdnr_input(struct mbuf *m, int af) 134 { 135 struct ifnet *ifp; 136 struct ip *ip; 137 struct top_cdnr *top; 138 struct tc_action *tca; 139 struct cdnr_block *cb; 140 struct cdnr_pktinfo pktinfo; 141 142 ifp = m_get_rcvif_NOMPSAFE(m); 143 if (!ALTQ_IS_CNDTNING(&ifp->if_snd)) 144 /* traffic conditioner is not enabled on this interface */ 145 return 1; 146 147 top = ifp->if_snd.altq_cdnr; 148 149 ip = mtod(m, struct ip *); 150 #ifdef INET6 151 if (af == AF_INET6) { 152 u_int32_t flowlabel; 153 154 flowlabel = ((struct ip6_hdr *)ip)->ip6_flow; 155 pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK; 156 } else 157 #endif 158 pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK; 159 pktinfo.pkt_len = m_pktlen(m); 160 161 tca = NULL; 162 163 cb = acc_classify(&top->tc_classifier, m, af); 164 if (cb != NULL) 165 tca = &cb->cb_action; 166 167 if (tca == NULL) 168 tca = &top->tc_block.cb_action; 169 170 while (1) { 171 PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len); 172 173 switch (tca->tca_code) { 174 case TCACODE_PASS: 175 return 1; 176 case TCACODE_DROP: 177 m_freem(m); 178 return 0; 179 case TCACODE_RETURN: 180 return 0; 181 case TCACODE_MARK: 182 #ifdef INET6 183 if (af == AF_INET6) { 184 struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; 185 u_int32_t flowlabel; 186 187 flowlabel = ntohl(ip6->ip6_flow); 188 flowlabel = (tca->tca_dscp << 20) | 189 (flowlabel & ~(DSCP_MASK << 20)); 190 ip6->ip6_flow = htonl(flowlabel); 191 } else 192 #endif 193 ip->ip_tos = tca->tca_dscp | 194 (ip->ip_tos & DSCP_CUMASK); 195 return 1; 196 case TCACODE_NEXT: 197 cb = tca->tca_next; 198 tca = (*cb->cb_input)(cb, &pktinfo); 199 break; 200 case TCACODE_NONE: 201 default: 202 return 1; 203 } 204 } 205 } 206 207 static struct top_cdnr * 208 tcb_lookup(char *ifname) 209 { 210 struct top_cdnr *top; 211 struct ifnet *ifp; 212 213 if ((ifp = ifunit(ifname)) != NULL) 214 LIST_FOREACH(top, &tcb_list, tc_next) 215 if (top->tc_ifq->altq_ifp == ifp) 216 return top; 217 return NULL; 218 } 219 220 static struct cdnr_block * 221 cdnr_handle2cb(u_long handle) 222 { 223 struct cdnr_block *cb; 224 225 cb = (struct cdnr_block *)handle; 226 if (handle != ALIGN(cb)) 227 return NULL; 228 229 if (cb == NULL || cb->cb_handle != handle) 230 return NULL; 231 return cb; 232 } 233 234 static u_long 235 cdnr_cb2handle(struct cdnr_block *cb) 236 { 237 return (cb->cb_handle); 238 } 239 240 static void * 241 cdnr_cballoc(struct top_cdnr *top, int type, struct tc_action *(*input_func)( 242 struct cdnr_block *, struct cdnr_pktinfo *)) 243 { 244 struct cdnr_block *cb; 245 int size; 246 247 switch (type) { 248 case TCETYPE_TOP: 249 size = sizeof(struct top_cdnr); 250 break; 251 case TCETYPE_ELEMENT: 252 size = sizeof(struct cdnr_block); 253 break; 254 case TCETYPE_TBMETER: 255 size = sizeof(struct tbmeter); 256 break; 257 case TCETYPE_TRTCM: 258 size = sizeof(struct trtcm); 259 break; 260 case TCETYPE_TSWTCM: 261 size = sizeof(struct tswtcm); 262 break; 263 default: 264 return NULL; 265 } 266 267 cb = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO); 268 if (cb == NULL) 269 return NULL; 270 271 cb->cb_len = size; 272 cb->cb_type = type; 273 cb->cb_ref = 0; 274 cb->cb_handle = (u_long)cb; 275 if (top == NULL) 276 cb->cb_top = (struct top_cdnr *)cb; 277 else 278 cb->cb_top = top; 279 280 if (input_func != NULL) { 281 /* 282 * if this cdnr has an action function, 283 * make tc_action to call itself. 284 */ 285 cb->cb_action.tca_code = TCACODE_NEXT; 286 cb->cb_action.tca_next = cb; 287 cb->cb_input = input_func; 288 } else 289 cb->cb_action.tca_code = TCACODE_NONE; 290 291 /* if this isn't top, register the element to the top level cdnr */ 292 if (top != NULL) 293 LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next); 294 295 return ((void *)cb); 296 } 297 298 static void 299 cdnr_cbdestroy(void *cblock) 300 { 301 struct cdnr_block *cb = cblock; 302 303 /* delete filters belonging to this cdnr */ 304 acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0); 305 306 /* remove from the top level cdnr */ 307 if (cb->cb_top != cblock) 308 LIST_REMOVE(cb, cb_next); 309 310 free(cb, M_DEVBUF); 311 } 312 313 /* 314 * conditioner common destroy routine 315 */ 316 static int 317 generic_element_destroy(struct cdnr_block *cb) 318 { 319 int error = 0; 320 321 switch (cb->cb_type) { 322 case TCETYPE_TOP: 323 error = top_destroy((struct top_cdnr *)cb); 324 break; 325 case TCETYPE_ELEMENT: 326 error = element_destroy(cb); 327 break; 328 case TCETYPE_TBMETER: 329 error = tbm_destroy((struct tbmeter *)cb); 330 break; 331 case TCETYPE_TRTCM: 332 error = trtcm_destroy((struct trtcm *)cb); 333 break; 334 case TCETYPE_TSWTCM: 335 error = tswtcm_destroy((struct tswtcm *)cb); 336 break; 337 default: 338 error = EINVAL; 339 } 340 return error; 341 } 342 343 static int 344 tca_verify_action(struct tc_action *utca) 345 { 346 switch (utca->tca_code) { 347 case TCACODE_PASS: 348 case TCACODE_DROP: 349 case TCACODE_MARK: 350 /* these are ok */ 351 break; 352 353 case TCACODE_HANDLE: 354 /* verify handle value */ 355 if (cdnr_handle2cb(utca->tca_handle) == NULL) 356 return -1; 357 break; 358 359 case TCACODE_NONE: 360 case TCACODE_RETURN: 361 case TCACODE_NEXT: 362 default: 363 /* should not be passed from a user */ 364 return -1; 365 } 366 return 0; 367 } 368 369 static void 370 tca_import_action(struct tc_action *ktca, struct tc_action *utca) 371 { 372 struct cdnr_block *cb; 373 374 *ktca = *utca; 375 if (ktca->tca_code == TCACODE_HANDLE) { 376 cb = cdnr_handle2cb(ktca->tca_handle); 377 if (cb == NULL) { 378 ktca->tca_code = TCACODE_NONE; 379 return; 380 } 381 ktca->tca_code = TCACODE_NEXT; 382 ktca->tca_next = cb; 383 cb->cb_ref++; 384 } else if (ktca->tca_code == TCACODE_MARK) { 385 ktca->tca_dscp &= DSCP_MASK; 386 } 387 return; 388 } 389 390 static void 391 tca_invalidate_action(struct tc_action *tca) 392 { 393 struct cdnr_block *cb; 394 395 if (tca->tca_code == TCACODE_NEXT) { 396 cb = tca->tca_next; 397 if (cb == NULL) 398 return; 399 cb->cb_ref--; 400 } 401 tca->tca_code = TCACODE_NONE; 402 } 403 404 /* 405 * top level traffic conditioner 406 */ 407 static struct top_cdnr * 408 top_create(struct ifaltq *ifq) 409 { 410 struct top_cdnr *top; 411 412 if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL) 413 return NULL; 414 415 top->tc_ifq = ifq; 416 /* set default action for the top level conditioner */ 417 top->tc_block.cb_action.tca_code = TCACODE_PASS; 418 419 LIST_INSERT_HEAD(&tcb_list, top, tc_next); 420 421 ifq->altq_cdnr = top; 422 423 return top; 424 } 425 426 static int 427 top_destroy(struct top_cdnr *top) 428 { 429 struct cdnr_block *cb; 430 431 if (ALTQ_IS_CNDTNING(top->tc_ifq)) 432 ALTQ_CLEAR_CNDTNING(top->tc_ifq); 433 top->tc_ifq->altq_cdnr = NULL; 434 435 /* 436 * destroy all the conditioner elements belonging to this interface 437 */ 438 while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) { 439 while (cb != NULL && cb->cb_ref > 0) 440 cb = LIST_NEXT(cb, cb_next); 441 if (cb != NULL) 442 generic_element_destroy(cb); 443 } 444 445 LIST_REMOVE(top, tc_next); 446 447 cdnr_cbdestroy(top); 448 449 /* if there is no active conditioner, remove the input hook */ 450 if (altq_input != NULL) { 451 LIST_FOREACH(top, &tcb_list, tc_next) 452 if (ALTQ_IS_CNDTNING(top->tc_ifq)) 453 break; 454 if (top == NULL) 455 altq_input = NULL; 456 } 457 458 return 0; 459 } 460 461 /* 462 * simple tc elements without input function (e.g., dropper and makers). 463 */ 464 static struct cdnr_block * 465 element_create(struct top_cdnr *top, struct tc_action *action) 466 { 467 struct cdnr_block *cb; 468 469 if (tca_verify_action(action) < 0) 470 return NULL; 471 472 if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL) 473 return NULL; 474 475 tca_import_action(&cb->cb_action, action); 476 477 return cb; 478 } 479 480 static int 481 element_destroy(struct cdnr_block *cb) 482 { 483 if (cb->cb_ref > 0) 484 return EBUSY; 485 486 tca_invalidate_action(&cb->cb_action); 487 488 cdnr_cbdestroy(cb); 489 return 0; 490 } 491 492 /* 493 * internal representation of token bucket parameters 494 * rate: byte_per_unittime << 32 495 * (((bits_per_sec) / 8) << 32) / machclk_freq 496 * depth: byte << 32 497 * 498 */ 499 #define TB_SHIFT 32 500 #define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT) 501 #define TB_UNSCALE(x) ((x) >> TB_SHIFT) 502 503 static void 504 tb_import_profile(struct tbe *tb, struct tb_profile *profile) 505 { 506 tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq; 507 tb->depth = TB_SCALE(profile->depth); 508 if (tb->rate > 0) 509 tb->filluptime = tb->depth / tb->rate; 510 else 511 tb->filluptime = 0xffffffffffffffffLL; 512 tb->token = tb->depth; 513 tb->last = read_machclk(); 514 } 515 516 /* 517 * simple token bucket meter 518 */ 519 static struct tbmeter * 520 tbm_create(struct top_cdnr *top, struct tb_profile *profile, 521 struct tc_action *in_action, struct tc_action *out_action) 522 { 523 struct tbmeter *tbm = NULL; 524 525 if (tca_verify_action(in_action) < 0 526 || tca_verify_action(out_action) < 0) 527 return NULL; 528 529 if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER, 530 tbm_input)) == NULL) 531 return NULL; 532 533 tb_import_profile(&tbm->tb, profile); 534 535 tca_import_action(&tbm->in_action, in_action); 536 tca_import_action(&tbm->out_action, out_action); 537 538 return tbm; 539 } 540 541 static int 542 tbm_destroy(struct tbmeter *tbm) 543 { 544 if (tbm->cdnrblk.cb_ref > 0) 545 return EBUSY; 546 547 tca_invalidate_action(&tbm->in_action); 548 tca_invalidate_action(&tbm->out_action); 549 550 cdnr_cbdestroy(tbm); 551 return 0; 552 } 553 554 static struct tc_action * 555 tbm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo) 556 { 557 struct tbmeter *tbm = (struct tbmeter *)cb; 558 u_int64_t len; 559 u_int64_t interval, now; 560 561 len = TB_SCALE(pktinfo->pkt_len); 562 563 if (tbm->tb.token < len) { 564 now = read_machclk(); 565 interval = now - tbm->tb.last; 566 if (interval >= tbm->tb.filluptime) 567 tbm->tb.token = tbm->tb.depth; 568 else { 569 tbm->tb.token += interval * tbm->tb.rate; 570 if (tbm->tb.token > tbm->tb.depth) 571 tbm->tb.token = tbm->tb.depth; 572 } 573 tbm->tb.last = now; 574 } 575 576 if (tbm->tb.token < len) { 577 PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len); 578 return (&tbm->out_action); 579 } 580 581 tbm->tb.token -= len; 582 PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len); 583 return (&tbm->in_action); 584 } 585 586 /* 587 * two rate three color marker 588 * as described in draft-heinanen-diffserv-trtcm-01.txt 589 */ 590 static struct trtcm * 591 trtcm_create(struct top_cdnr *top, struct tb_profile *cmtd_profile, 592 struct tb_profile *peak_profile, struct tc_action *green_action, 593 struct tc_action *yellow_action, struct tc_action *red_action, 594 int coloraware) 595 { 596 struct trtcm *tcm = NULL; 597 598 if (tca_verify_action(green_action) < 0 599 || tca_verify_action(yellow_action) < 0 600 || tca_verify_action(red_action) < 0) 601 return NULL; 602 603 if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM, 604 trtcm_input)) == NULL) 605 return NULL; 606 607 tb_import_profile(&tcm->cmtd_tb, cmtd_profile); 608 tb_import_profile(&tcm->peak_tb, peak_profile); 609 610 tca_import_action(&tcm->green_action, green_action); 611 tca_import_action(&tcm->yellow_action, yellow_action); 612 tca_import_action(&tcm->red_action, red_action); 613 614 /* set dscps to use */ 615 if (tcm->green_action.tca_code == TCACODE_MARK) 616 tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK; 617 else 618 tcm->green_dscp = DSCP_AF11; 619 if (tcm->yellow_action.tca_code == TCACODE_MARK) 620 tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK; 621 else 622 tcm->yellow_dscp = DSCP_AF12; 623 if (tcm->red_action.tca_code == TCACODE_MARK) 624 tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK; 625 else 626 tcm->red_dscp = DSCP_AF13; 627 628 tcm->coloraware = coloraware; 629 630 return tcm; 631 } 632 633 static int 634 trtcm_destroy(struct trtcm *tcm) 635 { 636 if (tcm->cdnrblk.cb_ref > 0) 637 return EBUSY; 638 639 tca_invalidate_action(&tcm->green_action); 640 tca_invalidate_action(&tcm->yellow_action); 641 tca_invalidate_action(&tcm->red_action); 642 643 cdnr_cbdestroy(tcm); 644 return 0; 645 } 646 647 static struct tc_action * 648 trtcm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo) 649 { 650 struct trtcm *tcm = (struct trtcm *)cb; 651 u_int64_t len; 652 u_int64_t interval, now; 653 u_int8_t color; 654 655 len = TB_SCALE(pktinfo->pkt_len); 656 if (tcm->coloraware) { 657 color = pktinfo->pkt_dscp; 658 if (color != tcm->yellow_dscp && color != tcm->red_dscp) 659 color = tcm->green_dscp; 660 } else { 661 /* if color-blind, precolor it as green */ 662 color = tcm->green_dscp; 663 } 664 665 now = read_machclk(); 666 if (tcm->cmtd_tb.token < len) { 667 interval = now - tcm->cmtd_tb.last; 668 if (interval >= tcm->cmtd_tb.filluptime) 669 tcm->cmtd_tb.token = tcm->cmtd_tb.depth; 670 else { 671 tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate; 672 if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth) 673 tcm->cmtd_tb.token = tcm->cmtd_tb.depth; 674 } 675 tcm->cmtd_tb.last = now; 676 } 677 if (tcm->peak_tb.token < len) { 678 interval = now - tcm->peak_tb.last; 679 if (interval >= tcm->peak_tb.filluptime) 680 tcm->peak_tb.token = tcm->peak_tb.depth; 681 else { 682 tcm->peak_tb.token += interval * tcm->peak_tb.rate; 683 if (tcm->peak_tb.token > tcm->peak_tb.depth) 684 tcm->peak_tb.token = tcm->peak_tb.depth; 685 } 686 tcm->peak_tb.last = now; 687 } 688 689 if (color == tcm->red_dscp || tcm->peak_tb.token < len) { 690 pktinfo->pkt_dscp = tcm->red_dscp; 691 PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len); 692 return (&tcm->red_action); 693 } 694 695 if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) { 696 pktinfo->pkt_dscp = tcm->yellow_dscp; 697 tcm->peak_tb.token -= len; 698 PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len); 699 return (&tcm->yellow_action); 700 } 701 702 pktinfo->pkt_dscp = tcm->green_dscp; 703 tcm->cmtd_tb.token -= len; 704 tcm->peak_tb.token -= len; 705 PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len); 706 return (&tcm->green_action); 707 } 708 709 /* 710 * time sliding window three color marker 711 * as described in draft-fang-diffserv-tc-tswtcm-00.txt 712 */ 713 static struct tswtcm * 714 tswtcm_create(struct top_cdnr *top, u_int32_t cmtd_rate, u_int32_t peak_rate, 715 u_int32_t avg_interval, struct tc_action *green_action, 716 struct tc_action *yellow_action, struct tc_action *red_action) 717 { 718 struct tswtcm *tsw; 719 720 if (tca_verify_action(green_action) < 0 721 || tca_verify_action(yellow_action) < 0 722 || tca_verify_action(red_action) < 0) 723 return NULL; 724 725 if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM, 726 tswtcm_input)) == NULL) 727 return NULL; 728 729 tca_import_action(&tsw->green_action, green_action); 730 tca_import_action(&tsw->yellow_action, yellow_action); 731 tca_import_action(&tsw->red_action, red_action); 732 733 /* set dscps to use */ 734 if (tsw->green_action.tca_code == TCACODE_MARK) 735 tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK; 736 else 737 tsw->green_dscp = DSCP_AF11; 738 if (tsw->yellow_action.tca_code == TCACODE_MARK) 739 tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK; 740 else 741 tsw->yellow_dscp = DSCP_AF12; 742 if (tsw->red_action.tca_code == TCACODE_MARK) 743 tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK; 744 else 745 tsw->red_dscp = DSCP_AF13; 746 747 /* convert rates from bits/sec to bytes/sec */ 748 tsw->cmtd_rate = cmtd_rate / 8; 749 tsw->peak_rate = peak_rate / 8; 750 tsw->avg_rate = 0; 751 752 /* timewin is converted from msec to machine clock unit */ 753 tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000; 754 755 return tsw; 756 } 757 758 static int 759 tswtcm_destroy(struct tswtcm *tsw) 760 { 761 if (tsw->cdnrblk.cb_ref > 0) 762 return EBUSY; 763 764 tca_invalidate_action(&tsw->green_action); 765 tca_invalidate_action(&tsw->yellow_action); 766 tca_invalidate_action(&tsw->red_action); 767 768 cdnr_cbdestroy(tsw); 769 return 0; 770 } 771 772 static struct tc_action * 773 tswtcm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo) 774 { 775 struct tswtcm *tsw = (struct tswtcm *)cb; 776 int len; 777 u_int32_t avg_rate; 778 u_int64_t interval, now, tmp; 779 780 /* 781 * rate estimator 782 */ 783 len = pktinfo->pkt_len; 784 now = read_machclk(); 785 786 interval = now - tsw->t_front; 787 /* 788 * calculate average rate: 789 * avg = (avg * timewin + pkt_len)/(timewin + interval) 790 * pkt_len needs to be multiplied by machclk_freq in order to 791 * get (bytes/sec). 792 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are 793 * less than 32 bits, the following 64-bit operation has enough 794 * precision. 795 */ 796 tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin 797 + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval); 798 tsw->avg_rate = avg_rate = (u_int32_t)tmp; 799 tsw->t_front = now; 800 801 /* 802 * marker 803 */ 804 if (avg_rate > tsw->cmtd_rate) { 805 u_int32_t randval = cprng_fast32() % avg_rate; 806 807 if (avg_rate > tsw->peak_rate) { 808 if (randval < avg_rate - tsw->peak_rate) { 809 /* mark red */ 810 pktinfo->pkt_dscp = tsw->red_dscp; 811 PKTCNTR_ADD(&tsw->red_cnt, len); 812 return (&tsw->red_action); 813 } else if (randval < avg_rate - tsw->cmtd_rate) 814 goto mark_yellow; 815 } else { 816 /* peak_rate >= avg_rate > cmtd_rate */ 817 if (randval < avg_rate - tsw->cmtd_rate) { 818 mark_yellow: 819 pktinfo->pkt_dscp = tsw->yellow_dscp; 820 PKTCNTR_ADD(&tsw->yellow_cnt, len); 821 return (&tsw->yellow_action); 822 } 823 } 824 } 825 826 /* mark green */ 827 pktinfo->pkt_dscp = tsw->green_dscp; 828 PKTCNTR_ADD(&tsw->green_cnt, len); 829 return (&tsw->green_action); 830 } 831 832 /* 833 * ioctl requests 834 */ 835 static int 836 cdnrcmd_if_attach(char *ifname) 837 { 838 struct ifnet *ifp; 839 struct top_cdnr *top; 840 841 if ((ifp = ifunit(ifname)) == NULL) 842 return EBADF; 843 844 if (ifp->if_snd.altq_cdnr != NULL) 845 return EBUSY; 846 847 if ((top = top_create(&ifp->if_snd)) == NULL) 848 return ENOMEM; 849 return 0; 850 } 851 852 static int 853 cdnrcmd_if_detach(char *ifname) 854 { 855 struct top_cdnr *top; 856 857 if ((top = tcb_lookup(ifname)) == NULL) 858 return EBADF; 859 860 return top_destroy(top); 861 } 862 863 static int 864 cdnrcmd_add_element(struct cdnr_add_element *ap) 865 { 866 struct top_cdnr *top; 867 struct cdnr_block *cb; 868 869 if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) 870 return EBADF; 871 872 cb = element_create(top, &ap->action); 873 if (cb == NULL) 874 return EINVAL; 875 /* return a class handle to the user */ 876 ap->cdnr_handle = cdnr_cb2handle(cb); 877 return 0; 878 } 879 880 static int 881 cdnrcmd_delete_element(struct cdnr_delete_element *ap) 882 { 883 struct top_cdnr *top; 884 struct cdnr_block *cb; 885 886 if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) 887 return EBADF; 888 889 if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) 890 return EINVAL; 891 892 if (cb->cb_type != TCETYPE_ELEMENT) 893 return generic_element_destroy(cb); 894 895 return element_destroy(cb); 896 } 897 898 static int 899 cdnrcmd_add_filter(struct cdnr_add_filter *ap) 900 { 901 struct top_cdnr *top; 902 struct cdnr_block *cb; 903 904 if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) 905 return EBADF; 906 907 if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) 908 return EINVAL; 909 910 return acc_add_filter(&top->tc_classifier, &ap->filter, 911 cb, &ap->filter_handle); 912 } 913 914 static int 915 cdnrcmd_delete_filter(struct cdnr_delete_filter *ap) 916 { 917 struct top_cdnr *top; 918 919 if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) 920 return EBADF; 921 922 return acc_delete_filter(&top->tc_classifier, ap->filter_handle); 923 } 924 925 static int 926 cdnrcmd_add_tbm(struct cdnr_add_tbmeter *ap) 927 { 928 struct top_cdnr *top; 929 struct tbmeter *tbm; 930 931 if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) 932 return EBADF; 933 934 tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action); 935 if (tbm == NULL) 936 return EINVAL; 937 /* return a class handle to the user */ 938 ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk); 939 return 0; 940 } 941 942 static int 943 cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *ap) 944 { 945 struct tbmeter *tbm; 946 947 if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) 948 return EINVAL; 949 950 tb_import_profile(&tbm->tb, &ap->profile); 951 952 return 0; 953 } 954 955 static int 956 cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *ap) 957 { 958 struct tbmeter *tbm; 959 960 if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) 961 return EINVAL; 962 963 ap->in_cnt = tbm->in_cnt; 964 ap->out_cnt = tbm->out_cnt; 965 966 return 0; 967 } 968 969 static int 970 cdnrcmd_add_trtcm(struct cdnr_add_trtcm *ap) 971 { 972 struct top_cdnr *top; 973 struct trtcm *tcm; 974 975 if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) 976 return EBADF; 977 978 tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile, 979 &ap->green_action, &ap->yellow_action, 980 &ap->red_action, ap->coloraware); 981 if (tcm == NULL) 982 return EINVAL; 983 984 /* return a class handle to the user */ 985 ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk); 986 return 0; 987 } 988 989 static int 990 cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *ap) 991 { 992 struct trtcm *tcm; 993 994 if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) 995 return EINVAL; 996 997 tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile); 998 tb_import_profile(&tcm->peak_tb, &ap->peak_profile); 999 1000 return 0; 1001 } 1002 1003 static int 1004 cdnrcmd_tcm_stats(struct cdnr_tcm_stats *ap) 1005 { 1006 struct cdnr_block *cb; 1007 1008 if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL) 1009 return EINVAL; 1010 1011 if (cb->cb_type == TCETYPE_TRTCM) { 1012 struct trtcm *tcm = (struct trtcm *)cb; 1013 1014 ap->green_cnt = tcm->green_cnt; 1015 ap->yellow_cnt = tcm->yellow_cnt; 1016 ap->red_cnt = tcm->red_cnt; 1017 } else if (cb->cb_type == TCETYPE_TSWTCM) { 1018 struct tswtcm *tsw = (struct tswtcm *)cb; 1019 1020 ap->green_cnt = tsw->green_cnt; 1021 ap->yellow_cnt = tsw->yellow_cnt; 1022 ap->red_cnt = tsw->red_cnt; 1023 } else 1024 return EINVAL; 1025 1026 return 0; 1027 } 1028 1029 static int 1030 cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *ap) 1031 { 1032 struct top_cdnr *top; 1033 struct tswtcm *tsw; 1034 1035 if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) 1036 return EBADF; 1037 1038 if (ap->cmtd_rate > ap->peak_rate) 1039 return EINVAL; 1040 1041 tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate, 1042 ap->avg_interval, &ap->green_action, 1043 &ap->yellow_action, &ap->red_action); 1044 if (tsw == NULL) 1045 return EINVAL; 1046 1047 /* return a class handle to the user */ 1048 ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk); 1049 return 0; 1050 } 1051 1052 static int 1053 cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *ap) 1054 { 1055 struct tswtcm *tsw; 1056 1057 if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL) 1058 return EINVAL; 1059 1060 if (ap->cmtd_rate > ap->peak_rate) 1061 return EINVAL; 1062 1063 /* convert rates from bits/sec to bytes/sec */ 1064 tsw->cmtd_rate = ap->cmtd_rate / 8; 1065 tsw->peak_rate = ap->peak_rate / 8; 1066 tsw->avg_rate = 0; 1067 1068 /* timewin is converted from msec to machine clock unit */ 1069 tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000; 1070 1071 return 0; 1072 } 1073 1074 static int 1075 cdnrcmd_get_stats(struct cdnr_get_stats *ap) 1076 { 1077 struct top_cdnr *top; 1078 struct cdnr_block *cb; 1079 struct tbmeter *tbm; 1080 struct trtcm *tcm; 1081 struct tswtcm *tsw; 1082 struct tce_stats tce, *usp; 1083 int error, n, nskip, nelements; 1084 1085 if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL) 1086 return EBADF; 1087 1088 /* copy action stats */ 1089 (void)memcpy(ap->cnts, top->tc_cnts, sizeof(ap->cnts)); 1090 1091 /* stats for each element */ 1092 nelements = ap->nelements; 1093 usp = ap->tce_stats; 1094 if (nelements <= 0 || usp == NULL) 1095 return 0; 1096 1097 nskip = ap->nskip; 1098 n = 0; 1099 LIST_FOREACH(cb, &top->tc_elements, cb_next) { 1100 if (nskip > 0) { 1101 nskip--; 1102 continue; 1103 } 1104 1105 (void)memset(&tce, 0, sizeof(tce)); 1106 tce.tce_handle = cb->cb_handle; 1107 tce.tce_type = cb->cb_type; 1108 switch (cb->cb_type) { 1109 case TCETYPE_TBMETER: 1110 tbm = (struct tbmeter *)cb; 1111 tce.tce_cnts[0] = tbm->in_cnt; 1112 tce.tce_cnts[1] = tbm->out_cnt; 1113 break; 1114 case TCETYPE_TRTCM: 1115 tcm = (struct trtcm *)cb; 1116 tce.tce_cnts[0] = tcm->green_cnt; 1117 tce.tce_cnts[1] = tcm->yellow_cnt; 1118 tce.tce_cnts[2] = tcm->red_cnt; 1119 break; 1120 case TCETYPE_TSWTCM: 1121 tsw = (struct tswtcm *)cb; 1122 tce.tce_cnts[0] = tsw->green_cnt; 1123 tce.tce_cnts[1] = tsw->yellow_cnt; 1124 tce.tce_cnts[2] = tsw->red_cnt; 1125 break; 1126 default: 1127 continue; 1128 } 1129 1130 if ((error = copyout((void *)&tce, (void *)usp++, 1131 sizeof(tce))) != 0) 1132 return error; 1133 1134 if (++n == nelements) 1135 break; 1136 } 1137 ap->nelements = n; 1138 1139 return 0; 1140 } 1141 1142 /* 1143 * conditioner device interface 1144 */ 1145 int 1146 cdnropen(dev_t dev, int flag, int fmt, 1147 struct lwp *l) 1148 { 1149 if (machclk_freq == 0) 1150 init_machclk(); 1151 1152 if (machclk_freq == 0) { 1153 printf("cdnr: no CPU clock available!\n"); 1154 return ENXIO; 1155 } 1156 1157 /* everything will be done when the queueing scheme is attached. */ 1158 return 0; 1159 } 1160 1161 int 1162 cdnrclose(dev_t dev, int flag, int fmt, 1163 struct lwp *l) 1164 { 1165 struct top_cdnr *top; 1166 int err, error = 0; 1167 1168 while ((top = LIST_FIRST(&tcb_list)) != NULL) { 1169 /* destroy all */ 1170 err = top_destroy(top); 1171 if (err != 0 && error == 0) 1172 error = err; 1173 } 1174 altq_input = NULL; 1175 1176 return error; 1177 } 1178 1179 int 1180 cdnrioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag, 1181 struct lwp *l) 1182 { 1183 struct top_cdnr *top; 1184 struct cdnr_interface *ifacep; 1185 int s, error = 0; 1186 1187 /* check super-user privilege */ 1188 switch (cmd) { 1189 case CDNR_GETSTATS: 1190 break; 1191 default: 1192 if ((error = kauth_authorize_network(l->l_cred, 1193 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_CDNR, NULL, 1194 NULL, NULL)) != 0) 1195 return (error); 1196 break; 1197 } 1198 1199 s = splnet(); 1200 switch (cmd) { 1201 1202 case CDNR_IF_ATTACH: 1203 ifacep = (struct cdnr_interface *)addr; 1204 error = cdnrcmd_if_attach(ifacep->cdnr_ifname); 1205 break; 1206 1207 case CDNR_IF_DETACH: 1208 ifacep = (struct cdnr_interface *)addr; 1209 error = cdnrcmd_if_detach(ifacep->cdnr_ifname); 1210 break; 1211 1212 case CDNR_ENABLE: 1213 case CDNR_DISABLE: 1214 ifacep = (struct cdnr_interface *)addr; 1215 if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) { 1216 error = EBADF; 1217 break; 1218 } 1219 1220 switch (cmd) { 1221 1222 case CDNR_ENABLE: 1223 ALTQ_SET_CNDTNING(top->tc_ifq); 1224 if (altq_input == NULL) 1225 altq_input = altq_cdnr_input; 1226 break; 1227 1228 case CDNR_DISABLE: 1229 ALTQ_CLEAR_CNDTNING(top->tc_ifq); 1230 LIST_FOREACH(top, &tcb_list, tc_next) 1231 if (ALTQ_IS_CNDTNING(top->tc_ifq)) 1232 break; 1233 if (top == NULL) 1234 altq_input = NULL; 1235 break; 1236 } 1237 break; 1238 1239 case CDNR_ADD_ELEM: 1240 error = cdnrcmd_add_element((struct cdnr_add_element *)addr); 1241 break; 1242 1243 case CDNR_DEL_ELEM: 1244 error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr); 1245 break; 1246 1247 case CDNR_ADD_TBM: 1248 error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr); 1249 break; 1250 1251 case CDNR_MOD_TBM: 1252 error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr); 1253 break; 1254 1255 case CDNR_TBM_STATS: 1256 error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr); 1257 break; 1258 1259 case CDNR_ADD_TCM: 1260 error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr); 1261 break; 1262 1263 case CDNR_MOD_TCM: 1264 error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr); 1265 break; 1266 1267 case CDNR_TCM_STATS: 1268 error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr); 1269 break; 1270 1271 case CDNR_ADD_FILTER: 1272 error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr); 1273 break; 1274 1275 case CDNR_DEL_FILTER: 1276 error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr); 1277 break; 1278 1279 case CDNR_GETSTATS: 1280 error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr); 1281 break; 1282 1283 case CDNR_ADD_TSW: 1284 error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr); 1285 break; 1286 1287 case CDNR_MOD_TSW: 1288 error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr); 1289 break; 1290 1291 default: 1292 error = EINVAL; 1293 break; 1294 } 1295 splx(s); 1296 1297 return error; 1298 } 1299 1300 #ifdef KLD_MODULE 1301 1302 static struct altqsw cdnr_sw = 1303 {"cdnr", cdnropen, cdnrclose, cdnrioctl}; 1304 1305 ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw); 1306 1307 #endif /* KLD_MODULE */ 1308 1309 #endif /* ALTQ3_COMPAT */ 1310 #endif /* ALTQ_CDNR */ 1311