1 /* $NetBSD: altq_subr.c,v 1.24 2007/10/19 12:16:36 ad Exp $ */ 2 /* $KAME: altq_subr.c,v 1.24 2005/04/13 03:44:25 suz Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.24 2007/10/19 12:16:36 ad Exp $"); 32 33 #ifdef _KERNEL_OPT 34 #include "opt_altq.h" 35 #include "opt_inet.h" 36 #include "pf.h" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/systm.h> 43 #include <sys/proc.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/kernel.h> 47 #include <sys/errno.h> 48 #include <sys/syslog.h> 49 #include <sys/sysctl.h> 50 #include <sys/queue.h> 51 52 #include <net/if.h> 53 #include <net/if_dl.h> 54 #include <net/if_types.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_systm.h> 58 #include <netinet/ip.h> 59 #ifdef INET6 60 #include <netinet/ip6.h> 61 #endif 62 #include <netinet/tcp.h> 63 #include <netinet/udp.h> 64 65 #if NPF > 0 66 #include <net/pfvar.h> 67 #endif 68 #include <altq/altq.h> 69 #ifdef ALTQ3_COMPAT 70 #include <altq/altq_conf.h> 71 #endif 72 73 /* machine dependent clock related includes */ 74 #ifdef __FreeBSD__ 75 #include "opt_cpu.h" /* for FreeBSD-2.2.8 to get i586_ctr_freq */ 76 #include <machine/clock.h> 77 #endif 78 #if defined(__i386__) 79 #include <machine/cpufunc.h> /* for pentium tsc */ 80 #include <machine/specialreg.h> /* for CPUID_TSC */ 81 #ifdef __FreeBSD__ 82 #include <machine/md_var.h> /* for cpu_feature */ 83 #elif defined(__NetBSD__) || defined(__OpenBSD__) 84 #include <sys/cpu.h> /* for cpu_feature */ 85 #endif 86 #endif /* __i386__ */ 87 88 /* 89 * internal function prototypes 90 */ 91 static void tbr_timeout(void *); 92 int (*altq_input)(struct mbuf *, int) = NULL; 93 static int tbr_timer = 0; /* token bucket regulator timer */ 94 static struct callout tbr_callout; 95 96 #ifdef ALTQ3_CLFIER_COMPAT 97 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 98 #ifdef INET6 99 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 100 struct flowinfo_in6 *); 101 #endif 102 static int apply_filter4(u_int32_t, struct flow_filter *, 103 struct flowinfo_in *); 104 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 105 struct flowinfo_in *); 106 #ifdef INET6 107 static int apply_filter6(u_int32_t, struct flow_filter6 *, 108 struct flowinfo_in6 *); 109 #endif 110 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 111 struct flowinfo_in *); 112 static u_long get_filt_handle(struct acc_classifier *, int); 113 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 114 static u_int32_t filt2fibmask(struct flow_filter *); 115 116 static void ip4f_cache(struct ip *, struct flowinfo_in *); 117 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 118 static int ip4f_init(void); 119 static struct ip4_frag *ip4f_alloc(void); 120 static void ip4f_free(struct ip4_frag *); 121 #endif /* ALTQ3_CLFIER_COMPAT */ 122 123 /* 124 * alternate queueing support routines 125 */ 126 127 /* look up the queue state by the interface name and the queueing type. */ 128 void * 129 altq_lookup(char *name, int type) 130 { 131 struct ifnet *ifp; 132 133 if ((ifp = ifunit(name)) != NULL) { 134 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 135 return (ifp->if_snd.altq_disc); 136 } 137 138 return NULL; 139 } 140 141 int 142 altq_attach(struct ifaltq *ifq, int type, void *discipline, 143 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *), 144 struct mbuf *(*dequeue)(struct ifaltq *, int), 145 int (*request)(struct ifaltq *, int, void *), 146 void *clfier, void *(*classify)(void *, struct mbuf *, int)) 147 { 148 if (!ALTQ_IS_READY(ifq)) 149 return ENXIO; 150 151 #ifdef ALTQ3_COMPAT 152 /* 153 * pfaltq can override the existing discipline, but altq3 cannot. 154 * check these if clfier is not NULL (which implies altq3). 155 */ 156 if (clfier != NULL) { 157 if (ALTQ_IS_ENABLED(ifq)) 158 return EBUSY; 159 if (ALTQ_IS_ATTACHED(ifq)) 160 return EEXIST; 161 } 162 #endif 163 ifq->altq_type = type; 164 ifq->altq_disc = discipline; 165 ifq->altq_enqueue = enqueue; 166 ifq->altq_dequeue = dequeue; 167 ifq->altq_request = request; 168 ifq->altq_clfier = clfier; 169 ifq->altq_classify = classify; 170 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 171 #ifdef ALTQ3_COMPAT 172 #ifdef ALTQ_KLD 173 altq_module_incref(type); 174 #endif 175 #endif 176 return 0; 177 } 178 179 int 180 altq_detach(struct ifaltq *ifq) 181 { 182 if (!ALTQ_IS_READY(ifq)) 183 return ENXIO; 184 if (ALTQ_IS_ENABLED(ifq)) 185 return EBUSY; 186 if (!ALTQ_IS_ATTACHED(ifq)) 187 return (0); 188 #ifdef ALTQ3_COMPAT 189 #ifdef ALTQ_KLD 190 altq_module_declref(ifq->altq_type); 191 #endif 192 #endif 193 194 ifq->altq_type = ALTQT_NONE; 195 ifq->altq_disc = NULL; 196 ifq->altq_enqueue = NULL; 197 ifq->altq_dequeue = NULL; 198 ifq->altq_request = NULL; 199 ifq->altq_clfier = NULL; 200 ifq->altq_classify = NULL; 201 ifq->altq_flags &= ALTQF_CANTCHANGE; 202 return 0; 203 } 204 205 int 206 altq_enable(struct ifaltq *ifq) 207 { 208 int s; 209 210 if (!ALTQ_IS_READY(ifq)) 211 return ENXIO; 212 if (ALTQ_IS_ENABLED(ifq)) 213 return 0; 214 215 s = splnet(); 216 IFQ_PURGE(ifq); 217 ASSERT(ifq->ifq_len == 0); 218 ifq->altq_flags |= ALTQF_ENABLED; 219 if (ifq->altq_clfier != NULL) 220 ifq->altq_flags |= ALTQF_CLASSIFY; 221 splx(s); 222 223 return 0; 224 } 225 226 int 227 altq_disable(struct ifaltq *ifq) 228 { 229 int s; 230 231 if (!ALTQ_IS_ENABLED(ifq)) 232 return 0; 233 234 s = splnet(); 235 IFQ_PURGE(ifq); 236 ASSERT(ifq->ifq_len == 0); 237 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 238 splx(s); 239 return 0; 240 } 241 242 #ifdef ALTQ_DEBUG 243 void 244 altq_assert(const char *file, int line, const char *failedexpr) 245 { 246 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 247 failedexpr, file, line); 248 panic("altq assertion"); 249 /* NOTREACHED */ 250 } 251 #endif 252 253 /* 254 * internal representation of token bucket parameters 255 * rate: byte_per_unittime << 32 256 * (((bits_per_sec) / 8) << 32) / machclk_freq 257 * depth: byte << 32 258 * 259 */ 260 #define TBR_SHIFT 32 261 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 262 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 263 264 struct mbuf * 265 tbr_dequeue(struct ifaltq *ifq, int op) 266 { 267 struct tb_regulator *tbr; 268 struct mbuf *m; 269 int64_t interval; 270 u_int64_t now; 271 272 tbr = ifq->altq_tbr; 273 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 274 /* if this is a remove after poll, bypass tbr check */ 275 } else { 276 /* update token only when it is negative */ 277 if (tbr->tbr_token <= 0) { 278 now = read_machclk(); 279 interval = now - tbr->tbr_last; 280 if (interval >= tbr->tbr_filluptime) 281 tbr->tbr_token = tbr->tbr_depth; 282 else { 283 tbr->tbr_token += interval * tbr->tbr_rate; 284 if (tbr->tbr_token > tbr->tbr_depth) 285 tbr->tbr_token = tbr->tbr_depth; 286 } 287 tbr->tbr_last = now; 288 } 289 /* if token is still negative, don't allow dequeue */ 290 if (tbr->tbr_token <= 0) 291 return (NULL); 292 } 293 294 if (ALTQ_IS_ENABLED(ifq)) 295 m = (*ifq->altq_dequeue)(ifq, op); 296 else { 297 if (op == ALTDQ_POLL) 298 IF_POLL(ifq, m); 299 else 300 IF_DEQUEUE(ifq, m); 301 } 302 303 if (m != NULL && op == ALTDQ_REMOVE) 304 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 305 tbr->tbr_lastop = op; 306 return (m); 307 } 308 309 /* 310 * set a token bucket regulator. 311 * if the specified rate is zero, the token bucket regulator is deleted. 312 */ 313 int 314 tbr_set(struct ifaltq *ifq, struct tb_profile *profile) 315 { 316 struct tb_regulator *tbr, *otbr; 317 318 if (machclk_freq == 0) 319 init_machclk(); 320 if (machclk_freq == 0) { 321 printf("tbr_set: no CPU clock available!\n"); 322 return (ENXIO); 323 } 324 325 if (profile->rate == 0) { 326 /* delete this tbr */ 327 if ((tbr = ifq->altq_tbr) == NULL) 328 return (ENOENT); 329 ifq->altq_tbr = NULL; 330 free(tbr, M_DEVBUF); 331 return (0); 332 } 333 334 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_WAITOK|M_ZERO); 335 if (tbr == NULL) 336 return (ENOMEM); 337 338 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 339 tbr->tbr_depth = TBR_SCALE(profile->depth); 340 if (tbr->tbr_rate > 0) 341 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 342 else 343 tbr->tbr_filluptime = 0xffffffffffffffffLL; 344 tbr->tbr_token = tbr->tbr_depth; 345 tbr->tbr_last = read_machclk(); 346 tbr->tbr_lastop = ALTDQ_REMOVE; 347 348 otbr = ifq->altq_tbr; 349 ifq->altq_tbr = tbr; /* set the new tbr */ 350 351 if (otbr != NULL) 352 free(otbr, M_DEVBUF); 353 else { 354 if (tbr_timer == 0) { 355 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 356 tbr_timer = 1; 357 } 358 } 359 return (0); 360 } 361 362 /* 363 * tbr_timeout goes through the interface list, and kicks the drivers 364 * if necessary. 365 */ 366 static void 367 tbr_timeout(void *arg) 368 { 369 struct ifnet *ifp; 370 int active, s; 371 372 active = 0; 373 s = splnet(); 374 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { 375 if (!TBR_IS_ENABLED(&ifp->if_snd)) 376 continue; 377 active++; 378 if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) 379 (*ifp->if_start)(ifp); 380 } 381 splx(s); 382 if (active > 0) 383 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 384 else 385 tbr_timer = 0; /* don't need tbr_timer anymore */ 386 #if defined(__alpha__) && !defined(ALTQ_NOPCC) 387 { 388 /* 389 * XXX read out the machine dependent clock once a second 390 * to detect counter wrap-around. 391 */ 392 static u_int cnt; 393 394 if (++cnt >= hz) { 395 (void)read_machclk(); 396 cnt = 0; 397 } 398 } 399 #endif /* __alpha__ && !ALTQ_NOPCC */ 400 } 401 402 /* 403 * get token bucket regulator profile 404 */ 405 int 406 tbr_get(struct ifaltq *ifq, struct tb_profile *profile) 407 { 408 struct tb_regulator *tbr; 409 410 if ((tbr = ifq->altq_tbr) == NULL) { 411 profile->rate = 0; 412 profile->depth = 0; 413 } else { 414 profile->rate = 415 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); 416 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); 417 } 418 return (0); 419 } 420 421 #if NPF > 0 422 /* 423 * attach a discipline to the interface. if one already exists, it is 424 * overridden. 425 */ 426 int 427 altq_pfattach(struct pf_altq *a) 428 { 429 int error = 0; 430 431 switch (a->scheduler) { 432 case ALTQT_NONE: 433 break; 434 #ifdef ALTQ_CBQ 435 case ALTQT_CBQ: 436 error = cbq_pfattach(a); 437 break; 438 #endif 439 #ifdef ALTQ_PRIQ 440 case ALTQT_PRIQ: 441 error = priq_pfattach(a); 442 break; 443 #endif 444 #ifdef ALTQ_HFSC 445 case ALTQT_HFSC: 446 error = hfsc_pfattach(a); 447 break; 448 #endif 449 default: 450 error = ENXIO; 451 } 452 453 return (error); 454 } 455 456 /* 457 * detach a discipline from the interface. 458 * it is possible that the discipline was already overridden by another 459 * discipline. 460 */ 461 int 462 altq_pfdetach(struct pf_altq *a) 463 { 464 struct ifnet *ifp; 465 int s, error = 0; 466 467 if ((ifp = ifunit(a->ifname)) == NULL) 468 return (EINVAL); 469 470 /* if this discipline is no longer referenced, just return */ 471 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 472 return (0); 473 474 s = splnet(); 475 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 476 error = altq_disable(&ifp->if_snd); 477 if (error == 0) 478 error = altq_detach(&ifp->if_snd); 479 splx(s); 480 481 return (error); 482 } 483 484 /* 485 * add a discipline or a queue 486 */ 487 int 488 altq_add(struct pf_altq *a) 489 { 490 int error = 0; 491 492 if (a->qname[0] != 0) 493 return (altq_add_queue(a)); 494 495 if (machclk_freq == 0) 496 init_machclk(); 497 if (machclk_freq == 0) 498 panic("altq_add: no CPU clock"); 499 500 switch (a->scheduler) { 501 #ifdef ALTQ_CBQ 502 case ALTQT_CBQ: 503 error = cbq_add_altq(a); 504 break; 505 #endif 506 #ifdef ALTQ_PRIQ 507 case ALTQT_PRIQ: 508 error = priq_add_altq(a); 509 break; 510 #endif 511 #ifdef ALTQ_HFSC 512 case ALTQT_HFSC: 513 error = hfsc_add_altq(a); 514 break; 515 #endif 516 default: 517 error = ENXIO; 518 } 519 520 return (error); 521 } 522 523 /* 524 * remove a discipline or a queue 525 */ 526 int 527 altq_remove(struct pf_altq *a) 528 { 529 int error = 0; 530 531 if (a->qname[0] != 0) 532 return (altq_remove_queue(a)); 533 534 switch (a->scheduler) { 535 #ifdef ALTQ_CBQ 536 case ALTQT_CBQ: 537 error = cbq_remove_altq(a); 538 break; 539 #endif 540 #ifdef ALTQ_PRIQ 541 case ALTQT_PRIQ: 542 error = priq_remove_altq(a); 543 break; 544 #endif 545 #ifdef ALTQ_HFSC 546 case ALTQT_HFSC: 547 error = hfsc_remove_altq(a); 548 break; 549 #endif 550 default: 551 error = ENXIO; 552 } 553 554 return (error); 555 } 556 557 /* 558 * add a queue to the discipline 559 */ 560 int 561 altq_add_queue(struct pf_altq *a) 562 { 563 int error = 0; 564 565 switch (a->scheduler) { 566 #ifdef ALTQ_CBQ 567 case ALTQT_CBQ: 568 error = cbq_add_queue(a); 569 break; 570 #endif 571 #ifdef ALTQ_PRIQ 572 case ALTQT_PRIQ: 573 error = priq_add_queue(a); 574 break; 575 #endif 576 #ifdef ALTQ_HFSC 577 case ALTQT_HFSC: 578 error = hfsc_add_queue(a); 579 break; 580 #endif 581 default: 582 error = ENXIO; 583 } 584 585 return (error); 586 } 587 588 /* 589 * remove a queue from the discipline 590 */ 591 int 592 altq_remove_queue(struct pf_altq *a) 593 { 594 int error = 0; 595 596 switch (a->scheduler) { 597 #ifdef ALTQ_CBQ 598 case ALTQT_CBQ: 599 error = cbq_remove_queue(a); 600 break; 601 #endif 602 #ifdef ALTQ_PRIQ 603 case ALTQT_PRIQ: 604 error = priq_remove_queue(a); 605 break; 606 #endif 607 #ifdef ALTQ_HFSC 608 case ALTQT_HFSC: 609 error = hfsc_remove_queue(a); 610 break; 611 #endif 612 default: 613 error = ENXIO; 614 } 615 616 return (error); 617 } 618 619 /* 620 * get queue statistics 621 */ 622 int 623 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 624 { 625 int error = 0; 626 627 switch (a->scheduler) { 628 #ifdef ALTQ_CBQ 629 case ALTQT_CBQ: 630 error = cbq_getqstats(a, ubuf, nbytes); 631 break; 632 #endif 633 #ifdef ALTQ_PRIQ 634 case ALTQT_PRIQ: 635 error = priq_getqstats(a, ubuf, nbytes); 636 break; 637 #endif 638 #ifdef ALTQ_HFSC 639 case ALTQT_HFSC: 640 error = hfsc_getqstats(a, ubuf, nbytes); 641 break; 642 #endif 643 default: 644 error = ENXIO; 645 } 646 647 return (error); 648 } 649 #endif /* NPF > 0 */ 650 651 /* 652 * read and write diffserv field in IPv4 or IPv6 header 653 */ 654 u_int8_t 655 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr) 656 { 657 struct mbuf *m0; 658 u_int8_t ds_field = 0; 659 660 if (pktattr == NULL || 661 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 662 return ((u_int8_t)0); 663 664 /* verify that pattr_hdr is within the mbuf data */ 665 for (m0 = m; m0 != NULL; m0 = m0->m_next) 666 if (((char *)pktattr->pattr_hdr >= m0->m_data) && 667 ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len)) 668 break; 669 if (m0 == NULL) { 670 /* ick, pattr_hdr is stale */ 671 pktattr->pattr_af = AF_UNSPEC; 672 #ifdef ALTQ_DEBUG 673 printf("read_dsfield: can't locate header!\n"); 674 #endif 675 return ((u_int8_t)0); 676 } 677 678 if (pktattr->pattr_af == AF_INET) { 679 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 680 681 if (ip->ip_v != 4) 682 return ((u_int8_t)0); /* version mismatch! */ 683 ds_field = ip->ip_tos; 684 } 685 #ifdef INET6 686 else if (pktattr->pattr_af == AF_INET6) { 687 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 688 u_int32_t flowlabel; 689 690 flowlabel = ntohl(ip6->ip6_flow); 691 if ((flowlabel >> 28) != 6) 692 return ((u_int8_t)0); /* version mismatch! */ 693 ds_field = (flowlabel >> 20) & 0xff; 694 } 695 #endif 696 return (ds_field); 697 } 698 699 void 700 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 701 { 702 struct mbuf *m0; 703 704 if (pktattr == NULL || 705 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 706 return; 707 708 /* verify that pattr_hdr is within the mbuf data */ 709 for (m0 = m; m0 != NULL; m0 = m0->m_next) 710 if (((char *)pktattr->pattr_hdr >= m0->m_data) && 711 ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len)) 712 break; 713 if (m0 == NULL) { 714 /* ick, pattr_hdr is stale */ 715 pktattr->pattr_af = AF_UNSPEC; 716 #ifdef ALTQ_DEBUG 717 printf("write_dsfield: can't locate header!\n"); 718 #endif 719 return; 720 } 721 722 if (pktattr->pattr_af == AF_INET) { 723 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 724 u_int8_t old; 725 int32_t sum; 726 727 if (ip->ip_v != 4) 728 return; /* version mismatch! */ 729 old = ip->ip_tos; 730 dsfield |= old & 3; /* leave CU bits */ 731 if (old == dsfield) 732 return; 733 ip->ip_tos = dsfield; 734 /* 735 * update checksum (from RFC1624) 736 * HC' = ~(~HC + ~m + m') 737 */ 738 sum = ~ntohs(ip->ip_sum) & 0xffff; 739 sum += 0xff00 + (~old & 0xff) + dsfield; 740 sum = (sum >> 16) + (sum & 0xffff); 741 sum += (sum >> 16); /* add carry */ 742 743 ip->ip_sum = htons(~sum & 0xffff); 744 } 745 #ifdef INET6 746 else if (pktattr->pattr_af == AF_INET6) { 747 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 748 u_int32_t flowlabel; 749 750 flowlabel = ntohl(ip6->ip6_flow); 751 if ((flowlabel >> 28) != 6) 752 return; /* version mismatch! */ 753 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 754 ip6->ip6_flow = htonl(flowlabel); 755 } 756 #endif 757 return; 758 } 759 760 761 /* 762 * high resolution clock support taking advantage of a machine dependent 763 * high resolution time counter (e.g., timestamp counter of intel pentium). 764 * we assume 765 * - 64-bit-long monotonically-increasing counter 766 * - frequency range is 100M-4GHz (CPU speed) 767 */ 768 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 769 #define MACHCLK_SHIFT 8 770 771 int machclk_usepcc; 772 u_int32_t machclk_freq = 0; 773 u_int32_t machclk_per_tick = 0; 774 775 #ifdef __alpha__ 776 #ifdef __FreeBSD__ 777 extern u_int32_t cycles_per_sec; /* alpha cpu clock frequency */ 778 #elif defined(__NetBSD__) || defined(__OpenBSD__) 779 extern u_int64_t cycles_per_usec; /* alpha cpu clock frequency */ 780 #endif 781 #endif /* __alpha__ */ 782 783 void 784 init_machclk(void) 785 { 786 787 callout_init(&tbr_callout, 0); 788 789 machclk_usepcc = 1; 790 791 #if (!defined(__i386__) && !defined(__alpha__)) || defined(ALTQ_NOPCC) 792 machclk_usepcc = 0; 793 #endif 794 #if defined(__FreeBSD__) && defined(SMP) 795 machclk_usepcc = 0; 796 #endif 797 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 798 machclk_usepcc = 0; 799 #endif 800 #ifdef __i386__ 801 /* check if TSC is available */ 802 if (machclk_usepcc == 1 && (cpu_feature & CPUID_TSC) == 0) 803 machclk_usepcc = 0; 804 #endif 805 806 if (machclk_usepcc == 0) { 807 /* emulate 256MHz using microtime() */ 808 machclk_freq = 1000000 << MACHCLK_SHIFT; 809 machclk_per_tick = machclk_freq / hz; 810 #ifdef ALTQ_DEBUG 811 printf("altq: emulate %uHz CPU clock\n", machclk_freq); 812 #endif 813 return; 814 } 815 816 /* 817 * if the clock frequency (of Pentium TSC or Alpha PCC) is 818 * accessible, just use it. 819 */ 820 #ifdef __i386__ 821 #ifdef __FreeBSD__ 822 #if (__FreeBSD_version > 300000) 823 machclk_freq = tsc_freq; 824 #else 825 machclk_freq = i586_ctr_freq; 826 #endif 827 #elif defined(__NetBSD__) 828 machclk_freq = (u_int32_t)curcpu()->ci_tsc_freq; 829 #elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU)) 830 machclk_freq = pentium_mhz * 1000000; 831 #endif 832 #elif defined(__alpha__) 833 #ifdef __FreeBSD__ 834 machclk_freq = cycles_per_sec; 835 #elif defined(__NetBSD__) || defined(__OpenBSD__) 836 machclk_freq = (u_int32_t)(cycles_per_usec * 1000000); 837 #endif 838 #endif /* __alpha__ */ 839 840 /* 841 * if we don't know the clock frequency, measure it. 842 */ 843 if (machclk_freq == 0) { 844 static int wait; 845 struct timeval tv_start, tv_end; 846 u_int64_t start, end, diff; 847 int timo; 848 849 microtime(&tv_start); 850 start = read_machclk(); 851 timo = hz; /* 1 sec */ 852 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 853 microtime(&tv_end); 854 end = read_machclk(); 855 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 856 + tv_end.tv_usec - tv_start.tv_usec; 857 if (diff != 0) 858 machclk_freq = (u_int)((end - start) * 1000000 / diff); 859 } 860 861 machclk_per_tick = machclk_freq / hz; 862 863 #ifdef ALTQ_DEBUG 864 printf("altq: CPU clock: %uHz\n", machclk_freq); 865 #endif 866 } 867 868 #if defined(__OpenBSD__) && defined(__i386__) 869 static inline u_int64_t 870 rdtsc(void) 871 { 872 u_int64_t rv; 873 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 874 return (rv); 875 } 876 #endif /* __OpenBSD__ && __i386__ */ 877 878 u_int64_t 879 read_machclk(void) 880 { 881 u_int64_t val; 882 883 if (machclk_usepcc) { 884 #if defined(__i386__) 885 val = rdtsc(); 886 #elif defined(__alpha__) 887 static u_int32_t last_pcc, upper; 888 u_int32_t pcc; 889 890 /* 891 * for alpha, make a 64bit counter value out of the 32bit 892 * alpha processor cycle counter. 893 * read_machclk must be called within a half of its 894 * wrap-around cycle (about 5 sec for 400MHz cpu) to properly 895 * detect a counter wrap-around. 896 * tbr_timeout calls read_machclk once a second. 897 */ 898 pcc = (u_int32_t)alpha_rpcc(); 899 if (pcc <= last_pcc) 900 upper++; 901 last_pcc = pcc; 902 val = ((u_int64_t)upper << 32) + pcc; 903 #else 904 panic("read_machclk"); 905 #endif 906 } else { 907 struct timeval tv; 908 909 microtime(&tv); 910 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 911 + tv.tv_usec) << MACHCLK_SHIFT); 912 } 913 return (val); 914 } 915 916 #ifdef ALTQ3_CLFIER_COMPAT 917 918 #ifndef IPPROTO_ESP 919 #define IPPROTO_ESP 50 /* encapsulating security payload */ 920 #endif 921 #ifndef IPPROTO_AH 922 #define IPPROTO_AH 51 /* authentication header */ 923 #endif 924 925 /* 926 * extract flow information from a given packet. 927 * filt_mask shows flowinfo fields required. 928 * we assume the ip header is in one mbuf, and addresses and ports are 929 * in network byte order. 930 */ 931 int 932 altq_extractflow(struct mbuf *m, int af, struct flowinfo *flow, 933 u_int32_t filt_bmask) 934 { 935 936 switch (af) { 937 case PF_INET: { 938 struct flowinfo_in *fin; 939 struct ip *ip; 940 941 ip = mtod(m, struct ip *); 942 943 if (ip->ip_v != 4) 944 break; 945 946 fin = (struct flowinfo_in *)flow; 947 fin->fi_len = sizeof(struct flowinfo_in); 948 fin->fi_family = AF_INET; 949 950 fin->fi_proto = ip->ip_p; 951 fin->fi_tos = ip->ip_tos; 952 953 fin->fi_src.s_addr = ip->ip_src.s_addr; 954 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 955 956 if (filt_bmask & FIMB4_PORTS) 957 /* if port info is required, extract port numbers */ 958 extract_ports4(m, ip, fin); 959 else { 960 fin->fi_sport = 0; 961 fin->fi_dport = 0; 962 fin->fi_gpi = 0; 963 } 964 return (1); 965 } 966 967 #ifdef INET6 968 case PF_INET6: { 969 struct flowinfo_in6 *fin6; 970 struct ip6_hdr *ip6; 971 972 ip6 = mtod(m, struct ip6_hdr *); 973 /* should we check the ip version? */ 974 975 fin6 = (struct flowinfo_in6 *)flow; 976 fin6->fi6_len = sizeof(struct flowinfo_in6); 977 fin6->fi6_family = AF_INET6; 978 979 fin6->fi6_proto = ip6->ip6_nxt; 980 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 981 982 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 983 fin6->fi6_src = ip6->ip6_src; 984 fin6->fi6_dst = ip6->ip6_dst; 985 986 if ((filt_bmask & FIMB6_PORTS) || 987 ((filt_bmask & FIMB6_PROTO) 988 && ip6->ip6_nxt > IPPROTO_IPV6)) 989 /* 990 * if port info is required, or proto is required 991 * but there are option headers, extract port 992 * and protocol numbers. 993 */ 994 extract_ports6(m, ip6, fin6); 995 else { 996 fin6->fi6_sport = 0; 997 fin6->fi6_dport = 0; 998 fin6->fi6_gpi = 0; 999 } 1000 return (1); 1001 } 1002 #endif /* INET6 */ 1003 1004 default: 1005 break; 1006 } 1007 1008 /* failed */ 1009 flow->fi_len = sizeof(struct flowinfo); 1010 flow->fi_family = AF_UNSPEC; 1011 return (0); 1012 } 1013 1014 /* 1015 * helper routine to extract port numbers 1016 */ 1017 /* structure for ipsec and ipv6 option header template */ 1018 struct _opt6 { 1019 u_int8_t opt6_nxt; /* next header */ 1020 u_int8_t opt6_hlen; /* header extension length */ 1021 u_int16_t _pad; 1022 u_int32_t ah_spi; /* security parameter index 1023 for authentication header */ 1024 }; 1025 1026 /* 1027 * extract port numbers from a ipv4 packet. 1028 */ 1029 static int 1030 extract_ports4(struct mbuf *m, struct ip *ip, struct flowinfo_in *fin) 1031 { 1032 struct mbuf *m0; 1033 u_short ip_off; 1034 u_int8_t proto; 1035 int off; 1036 1037 fin->fi_sport = 0; 1038 fin->fi_dport = 0; 1039 fin->fi_gpi = 0; 1040 1041 ip_off = ntohs(ip->ip_off); 1042 /* if it is a fragment, try cached fragment info */ 1043 if (ip_off & IP_OFFMASK) { 1044 ip4f_lookup(ip, fin); 1045 return (1); 1046 } 1047 1048 /* locate the mbuf containing the protocol header */ 1049 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1050 if (((char *)ip >= m0->m_data) && 1051 ((char *)ip < m0->m_data + m0->m_len)) 1052 break; 1053 if (m0 == NULL) { 1054 #ifdef ALTQ_DEBUG 1055 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1056 #endif 1057 return (0); 1058 } 1059 off = ((char *)ip - m0->m_data) + (ip->ip_hl << 2); 1060 proto = ip->ip_p; 1061 1062 #ifdef ALTQ_IPSEC 1063 again: 1064 #endif 1065 while (off >= m0->m_len) { 1066 off -= m0->m_len; 1067 m0 = m0->m_next; 1068 if (m0 == NULL) 1069 return (0); /* bogus ip_hl! */ 1070 } 1071 if (m0->m_len < off + 4) 1072 return (0); 1073 1074 switch (proto) { 1075 case IPPROTO_TCP: 1076 case IPPROTO_UDP: { 1077 struct udphdr *udp; 1078 1079 udp = (struct udphdr *)(mtod(m0, char *) + off); 1080 fin->fi_sport = udp->uh_sport; 1081 fin->fi_dport = udp->uh_dport; 1082 fin->fi_proto = proto; 1083 } 1084 break; 1085 1086 #ifdef ALTQ_IPSEC 1087 case IPPROTO_ESP: 1088 if (fin->fi_gpi == 0){ 1089 u_int32_t *gpi; 1090 1091 gpi = (u_int32_t *)(mtod(m0, char *) + off); 1092 fin->fi_gpi = *gpi; 1093 } 1094 fin->fi_proto = proto; 1095 break; 1096 1097 case IPPROTO_AH: { 1098 /* get next header and header length */ 1099 struct _opt6 *opt6; 1100 1101 opt6 = (struct _opt6 *)(mtod(m0, char *) + off); 1102 proto = opt6->opt6_nxt; 1103 off += 8 + (opt6->opt6_hlen * 4); 1104 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1105 fin->fi_gpi = opt6->ah_spi; 1106 } 1107 /* goto the next header */ 1108 goto again; 1109 #endif /* ALTQ_IPSEC */ 1110 1111 default: 1112 fin->fi_proto = proto; 1113 return (0); 1114 } 1115 1116 /* if this is a first fragment, cache it. */ 1117 if (ip_off & IP_MF) 1118 ip4f_cache(ip, fin); 1119 1120 return (1); 1121 } 1122 1123 #ifdef INET6 1124 static int 1125 extract_ports6(struct mbuf *m, struct ip6_hdr *ip6, struct flowinfo_in6 *fin6) 1126 { 1127 struct mbuf *m0; 1128 int off; 1129 u_int8_t proto; 1130 1131 fin6->fi6_gpi = 0; 1132 fin6->fi6_sport = 0; 1133 fin6->fi6_dport = 0; 1134 1135 /* locate the mbuf containing the protocol header */ 1136 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1137 if (((char *)ip6 >= m0->m_data) && 1138 ((char *)ip6 < m0->m_data + m0->m_len)) 1139 break; 1140 if (m0 == NULL) { 1141 #ifdef ALTQ_DEBUG 1142 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1143 #endif 1144 return (0); 1145 } 1146 off = ((char *)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1147 1148 proto = ip6->ip6_nxt; 1149 do { 1150 while (off >= m0->m_len) { 1151 off -= m0->m_len; 1152 m0 = m0->m_next; 1153 if (m0 == NULL) 1154 return (0); 1155 } 1156 if (m0->m_len < off + 4) 1157 return (0); 1158 1159 switch (proto) { 1160 case IPPROTO_TCP: 1161 case IPPROTO_UDP: { 1162 struct udphdr *udp; 1163 1164 udp = (struct udphdr *)(mtod(m0, char *) + off); 1165 fin6->fi6_sport = udp->uh_sport; 1166 fin6->fi6_dport = udp->uh_dport; 1167 fin6->fi6_proto = proto; 1168 } 1169 return (1); 1170 1171 case IPPROTO_ESP: 1172 if (fin6->fi6_gpi == 0) { 1173 u_int32_t *gpi; 1174 1175 gpi = (u_int32_t *)(mtod(m0, char *) + off); 1176 fin6->fi6_gpi = *gpi; 1177 } 1178 fin6->fi6_proto = proto; 1179 return (1); 1180 1181 case IPPROTO_AH: { 1182 /* get next header and header length */ 1183 struct _opt6 *opt6; 1184 1185 opt6 = (struct _opt6 *)(mtod(m0, char *) + off); 1186 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1187 fin6->fi6_gpi = opt6->ah_spi; 1188 proto = opt6->opt6_nxt; 1189 off += 8 + (opt6->opt6_hlen * 4); 1190 /* goto the next header */ 1191 break; 1192 } 1193 1194 case IPPROTO_HOPOPTS: 1195 case IPPROTO_ROUTING: 1196 case IPPROTO_DSTOPTS: { 1197 /* get next header and header length */ 1198 struct _opt6 *opt6; 1199 1200 opt6 = (struct _opt6 *)(mtod(m0, char *) + off); 1201 proto = opt6->opt6_nxt; 1202 off += (opt6->opt6_hlen + 1) * 8; 1203 /* goto the next header */ 1204 break; 1205 } 1206 1207 case IPPROTO_FRAGMENT: 1208 /* ipv6 fragmentations are not supported yet */ 1209 default: 1210 fin6->fi6_proto = proto; 1211 return (0); 1212 } 1213 } while (1); 1214 /*NOTREACHED*/ 1215 } 1216 #endif /* INET6 */ 1217 1218 /* 1219 * altq common classifier 1220 */ 1221 int 1222 acc_add_filter(struct acc_classifier *classifier, struct flow_filter *filter, 1223 void *class, u_long *phandle) 1224 { 1225 struct acc_filter *afp, *prev, *tmp; 1226 int i, s; 1227 1228 #ifdef INET6 1229 if (filter->ff_flow.fi_family != AF_INET && 1230 filter->ff_flow.fi_family != AF_INET6) 1231 return (EINVAL); 1232 #else 1233 if (filter->ff_flow.fi_family != AF_INET) 1234 return (EINVAL); 1235 #endif 1236 1237 afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK|M_ZERO); 1238 if (afp == NULL) 1239 return (ENOMEM); 1240 1241 afp->f_filter = *filter; 1242 afp->f_class = class; 1243 1244 i = ACC_WILDCARD_INDEX; 1245 if (filter->ff_flow.fi_family == AF_INET) { 1246 struct flow_filter *filter4 = &afp->f_filter; 1247 1248 /* 1249 * if address is 0, it's a wildcard. if address mask 1250 * isn't set, use full mask. 1251 */ 1252 if (filter4->ff_flow.fi_dst.s_addr == 0) 1253 filter4->ff_mask.mask_dst.s_addr = 0; 1254 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1255 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1256 if (filter4->ff_flow.fi_src.s_addr == 0) 1257 filter4->ff_mask.mask_src.s_addr = 0; 1258 else if (filter4->ff_mask.mask_src.s_addr == 0) 1259 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1260 1261 /* clear extra bits in addresses */ 1262 filter4->ff_flow.fi_dst.s_addr &= 1263 filter4->ff_mask.mask_dst.s_addr; 1264 filter4->ff_flow.fi_src.s_addr &= 1265 filter4->ff_mask.mask_src.s_addr; 1266 1267 /* 1268 * if dst address is a wildcard, use hash-entry 1269 * ACC_WILDCARD_INDEX. 1270 */ 1271 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1272 i = ACC_WILDCARD_INDEX; 1273 else 1274 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1275 } 1276 #ifdef INET6 1277 else if (filter->ff_flow.fi_family == AF_INET6) { 1278 struct flow_filter6 *filter6 = 1279 (struct flow_filter6 *)&afp->f_filter; 1280 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1281 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1282 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1283 const struct in6_addr in6mask0 = IN6MASK0; 1284 const struct in6_addr in6mask128 = IN6MASK128; 1285 #endif 1286 1287 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1288 filter6->ff_mask6.mask6_dst = in6mask0; 1289 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1290 filter6->ff_mask6.mask6_dst = in6mask128; 1291 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1292 filter6->ff_mask6.mask6_src = in6mask0; 1293 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1294 filter6->ff_mask6.mask6_src = in6mask128; 1295 1296 /* clear extra bits in addresses */ 1297 for (i = 0; i < 16; i++) 1298 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1299 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1300 for (i = 0; i < 16; i++) 1301 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1302 filter6->ff_mask6.mask6_src.s6_addr[i]; 1303 1304 if (filter6->ff_flow6.fi6_flowlabel == 0) 1305 i = ACC_WILDCARD_INDEX; 1306 else 1307 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1308 } 1309 #endif /* INET6 */ 1310 1311 afp->f_handle = get_filt_handle(classifier, i); 1312 1313 /* update filter bitmask */ 1314 afp->f_fbmask = filt2fibmask(filter); 1315 classifier->acc_fbmask |= afp->f_fbmask; 1316 1317 /* 1318 * add this filter to the filter list. 1319 * filters are ordered from the highest rule number. 1320 */ 1321 s = splnet(); 1322 prev = NULL; 1323 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1324 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1325 prev = tmp; 1326 else 1327 break; 1328 } 1329 if (prev == NULL) 1330 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1331 else 1332 LIST_INSERT_AFTER(prev, afp, f_chain); 1333 splx(s); 1334 1335 *phandle = afp->f_handle; 1336 return (0); 1337 } 1338 1339 int 1340 acc_delete_filter(struct acc_classifier *classifier, u_long handle) 1341 { 1342 struct acc_filter *afp; 1343 int s; 1344 1345 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1346 return (EINVAL); 1347 1348 s = splnet(); 1349 LIST_REMOVE(afp, f_chain); 1350 splx(s); 1351 1352 free(afp, M_DEVBUF); 1353 1354 /* todo: update filt_bmask */ 1355 1356 return (0); 1357 } 1358 1359 /* 1360 * delete filters referencing to the specified class. 1361 * if the all flag is not 0, delete all the filters. 1362 */ 1363 int 1364 acc_discard_filters(struct acc_classifier *classifier, void *class, int all) 1365 { 1366 struct acc_filter *afp; 1367 int i, s; 1368 1369 s = splnet(); 1370 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1371 do { 1372 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1373 if (all || afp->f_class == class) { 1374 LIST_REMOVE(afp, f_chain); 1375 free(afp, M_DEVBUF); 1376 /* start again from the head */ 1377 break; 1378 } 1379 } while (afp != NULL); 1380 } 1381 splx(s); 1382 1383 if (all) 1384 classifier->acc_fbmask = 0; 1385 1386 return (0); 1387 } 1388 1389 void * 1390 acc_classify(void *clfier, struct mbuf *m, int af) 1391 { 1392 struct acc_classifier *classifier; 1393 struct flowinfo flow; 1394 struct acc_filter *afp; 1395 int i; 1396 1397 classifier = (struct acc_classifier *)clfier; 1398 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1399 1400 if (flow.fi_family == AF_INET) { 1401 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1402 1403 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1404 /* only tos is used */ 1405 LIST_FOREACH(afp, 1406 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1407 f_chain) 1408 if (apply_tosfilter4(afp->f_fbmask, 1409 &afp->f_filter, fp)) 1410 /* filter matched */ 1411 return (afp->f_class); 1412 } else if ((classifier->acc_fbmask & 1413 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1414 == 0) { 1415 /* only proto and ports are used */ 1416 LIST_FOREACH(afp, 1417 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1418 f_chain) 1419 if (apply_ppfilter4(afp->f_fbmask, 1420 &afp->f_filter, fp)) 1421 /* filter matched */ 1422 return (afp->f_class); 1423 } else { 1424 /* get the filter hash entry from its dest address */ 1425 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1426 do { 1427 /* 1428 * go through this loop twice. first for dst 1429 * hash, second for wildcards. 1430 */ 1431 LIST_FOREACH(afp, &classifier->acc_filters[i], 1432 f_chain) 1433 if (apply_filter4(afp->f_fbmask, 1434 &afp->f_filter, fp)) 1435 /* filter matched */ 1436 return (afp->f_class); 1437 1438 /* 1439 * check again for filters with a dst addr 1440 * wildcard. 1441 * (daddr == 0 || dmask != 0xffffffff). 1442 */ 1443 if (i != ACC_WILDCARD_INDEX) 1444 i = ACC_WILDCARD_INDEX; 1445 else 1446 break; 1447 } while (1); 1448 } 1449 } 1450 #ifdef INET6 1451 else if (flow.fi_family == AF_INET6) { 1452 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1453 1454 /* get the filter hash entry from its flow ID */ 1455 if (fp6->fi6_flowlabel != 0) 1456 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1457 else 1458 /* flowlable can be zero */ 1459 i = ACC_WILDCARD_INDEX; 1460 1461 /* go through this loop twice. first for flow hash, second 1462 for wildcards. */ 1463 do { 1464 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1465 if (apply_filter6(afp->f_fbmask, 1466 (struct flow_filter6 *)&afp->f_filter, 1467 fp6)) 1468 /* filter matched */ 1469 return (afp->f_class); 1470 1471 /* 1472 * check again for filters with a wildcard. 1473 */ 1474 if (i != ACC_WILDCARD_INDEX) 1475 i = ACC_WILDCARD_INDEX; 1476 else 1477 break; 1478 } while (1); 1479 } 1480 #endif /* INET6 */ 1481 1482 /* no filter matched */ 1483 return (NULL); 1484 } 1485 1486 static int 1487 apply_filter4(u_int32_t fbmask, struct flow_filter *filt, 1488 struct flowinfo_in *pkt) 1489 { 1490 if (filt->ff_flow.fi_family != AF_INET) 1491 return (0); 1492 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1493 return (0); 1494 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1495 return (0); 1496 if ((fbmask & FIMB4_DADDR) && 1497 filt->ff_flow.fi_dst.s_addr != 1498 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1499 return (0); 1500 if ((fbmask & FIMB4_SADDR) && 1501 filt->ff_flow.fi_src.s_addr != 1502 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1503 return (0); 1504 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1505 return (0); 1506 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1507 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1508 return (0); 1509 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1510 return (0); 1511 /* match */ 1512 return (1); 1513 } 1514 1515 /* 1516 * filter matching function optimized for a common case that checks 1517 * only protocol and port numbers 1518 */ 1519 static int 1520 apply_ppfilter4(u_int32_t fbmask, struct flow_filter *filt, 1521 struct flowinfo_in *pkt) 1522 { 1523 if (filt->ff_flow.fi_family != AF_INET) 1524 return (0); 1525 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1526 return (0); 1527 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1528 return (0); 1529 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1530 return (0); 1531 /* match */ 1532 return (1); 1533 } 1534 1535 /* 1536 * filter matching function only for tos field. 1537 */ 1538 static int 1539 apply_tosfilter4(u_int32_t fbmask, struct flow_filter *filt, 1540 struct flowinfo_in *pkt) 1541 { 1542 if (filt->ff_flow.fi_family != AF_INET) 1543 return (0); 1544 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1545 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1546 return (0); 1547 /* match */ 1548 return (1); 1549 } 1550 1551 #ifdef INET6 1552 static int 1553 apply_filter6(u_int32_t fbmask, struct flow_filter6 *filt, 1554 struct flowinfo_in6 *pkt) 1555 { 1556 int i; 1557 1558 if (filt->ff_flow6.fi6_family != AF_INET6) 1559 return (0); 1560 if ((fbmask & FIMB6_FLABEL) && 1561 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1562 return (0); 1563 if ((fbmask & FIMB6_PROTO) && 1564 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1565 return (0); 1566 if ((fbmask & FIMB6_SPORT) && 1567 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1568 return (0); 1569 if ((fbmask & FIMB6_DPORT) && 1570 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1571 return (0); 1572 if (fbmask & FIMB6_SADDR) { 1573 for (i = 0; i < 4; i++) 1574 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1575 (pkt->fi6_src.s6_addr32[i] & 1576 filt->ff_mask6.mask6_src.s6_addr32[i])) 1577 return (0); 1578 } 1579 if (fbmask & FIMB6_DADDR) { 1580 for (i = 0; i < 4; i++) 1581 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1582 (pkt->fi6_dst.s6_addr32[i] & 1583 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1584 return (0); 1585 } 1586 if ((fbmask & FIMB6_TCLASS) && 1587 filt->ff_flow6.fi6_tclass != 1588 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1589 return (0); 1590 if ((fbmask & FIMB6_GPI) && 1591 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1592 return (0); 1593 /* match */ 1594 return (1); 1595 } 1596 #endif /* INET6 */ 1597 1598 /* 1599 * filter handle: 1600 * bit 20-28: index to the filter hash table 1601 * bit 0-19: unique id in the hash bucket. 1602 */ 1603 static u_long 1604 get_filt_handle(struct acc_classifier *classifier, int i) 1605 { 1606 static u_long handle_number = 1; 1607 u_long handle; 1608 struct acc_filter *afp; 1609 1610 while (1) { 1611 handle = handle_number++ & 0x000fffff; 1612 1613 if (LIST_EMPTY(&classifier->acc_filters[i])) 1614 break; 1615 1616 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1617 if ((afp->f_handle & 0x000fffff) == handle) 1618 break; 1619 if (afp == NULL) 1620 break; 1621 /* this handle is already used, try again */ 1622 } 1623 1624 return ((i << 20) | handle); 1625 } 1626 1627 /* convert filter handle to filter pointer */ 1628 static struct acc_filter * 1629 filth_to_filtp(struct acc_classifier *classifier, u_long handle) 1630 { 1631 struct acc_filter *afp; 1632 int i; 1633 1634 i = ACC_GET_HINDEX(handle); 1635 1636 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1637 if (afp->f_handle == handle) 1638 return (afp); 1639 1640 return (NULL); 1641 } 1642 1643 /* create flowinfo bitmask */ 1644 static u_int32_t 1645 filt2fibmask(struct flow_filter *filt) 1646 { 1647 u_int32_t mask = 0; 1648 #ifdef INET6 1649 struct flow_filter6 *filt6; 1650 #endif 1651 1652 switch (filt->ff_flow.fi_family) { 1653 case AF_INET: 1654 if (filt->ff_flow.fi_proto != 0) 1655 mask |= FIMB4_PROTO; 1656 if (filt->ff_flow.fi_tos != 0) 1657 mask |= FIMB4_TOS; 1658 if (filt->ff_flow.fi_dst.s_addr != 0) 1659 mask |= FIMB4_DADDR; 1660 if (filt->ff_flow.fi_src.s_addr != 0) 1661 mask |= FIMB4_SADDR; 1662 if (filt->ff_flow.fi_sport != 0) 1663 mask |= FIMB4_SPORT; 1664 if (filt->ff_flow.fi_dport != 0) 1665 mask |= FIMB4_DPORT; 1666 if (filt->ff_flow.fi_gpi != 0) 1667 mask |= FIMB4_GPI; 1668 break; 1669 #ifdef INET6 1670 case AF_INET6: 1671 filt6 = (struct flow_filter6 *)filt; 1672 1673 if (filt6->ff_flow6.fi6_proto != 0) 1674 mask |= FIMB6_PROTO; 1675 if (filt6->ff_flow6.fi6_tclass != 0) 1676 mask |= FIMB6_TCLASS; 1677 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1678 mask |= FIMB6_DADDR; 1679 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1680 mask |= FIMB6_SADDR; 1681 if (filt6->ff_flow6.fi6_sport != 0) 1682 mask |= FIMB6_SPORT; 1683 if (filt6->ff_flow6.fi6_dport != 0) 1684 mask |= FIMB6_DPORT; 1685 if (filt6->ff_flow6.fi6_gpi != 0) 1686 mask |= FIMB6_GPI; 1687 if (filt6->ff_flow6.fi6_flowlabel != 0) 1688 mask |= FIMB6_FLABEL; 1689 break; 1690 #endif /* INET6 */ 1691 } 1692 return (mask); 1693 } 1694 1695 1696 /* 1697 * helper functions to handle IPv4 fragments. 1698 * currently only in-sequence fragments are handled. 1699 * - fragment info is cached in a LRU list. 1700 * - when a first fragment is found, cache its flow info. 1701 * - when a non-first fragment is found, lookup the cache. 1702 */ 1703 1704 struct ip4_frag { 1705 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1706 char ip4f_valid; 1707 u_short ip4f_id; 1708 struct flowinfo_in ip4f_info; 1709 }; 1710 1711 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1712 1713 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1714 1715 1716 static void 1717 ip4f_cache(struct ip *ip, struct flowinfo_in *fin) 1718 { 1719 struct ip4_frag *fp; 1720 1721 if (TAILQ_EMPTY(&ip4f_list)) { 1722 /* first time call, allocate fragment cache entries. */ 1723 if (ip4f_init() < 0) 1724 /* allocation failed! */ 1725 return; 1726 } 1727 1728 fp = ip4f_alloc(); 1729 fp->ip4f_id = ip->ip_id; 1730 fp->ip4f_info.fi_proto = ip->ip_p; 1731 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1732 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1733 1734 /* save port numbers */ 1735 fp->ip4f_info.fi_sport = fin->fi_sport; 1736 fp->ip4f_info.fi_dport = fin->fi_dport; 1737 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1738 } 1739 1740 static int 1741 ip4f_lookup(struct ip *ip, struct flowinfo_in *fin) 1742 { 1743 struct ip4_frag *fp; 1744 1745 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1746 fp = TAILQ_NEXT(fp, ip4f_chain)) 1747 if (ip->ip_id == fp->ip4f_id && 1748 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1749 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1750 ip->ip_p == fp->ip4f_info.fi_proto) { 1751 1752 /* found the matching entry */ 1753 fin->fi_sport = fp->ip4f_info.fi_sport; 1754 fin->fi_dport = fp->ip4f_info.fi_dport; 1755 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1756 1757 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1758 /* this is the last fragment, 1759 release the entry. */ 1760 ip4f_free(fp); 1761 1762 return (1); 1763 } 1764 1765 /* no matching entry found */ 1766 return (0); 1767 } 1768 1769 static int 1770 ip4f_init(void) 1771 { 1772 struct ip4_frag *fp; 1773 int i; 1774 1775 TAILQ_INIT(&ip4f_list); 1776 for (i=0; i<IP4F_TABSIZE; i++) { 1777 fp = malloc(sizeof(struct ip4_frag), M_DEVBUF, M_NOWAIT); 1778 if (fp == NULL) { 1779 printf("ip4f_init: can't alloc %dth entry!\n", i); 1780 if (i == 0) 1781 return (-1); 1782 return (0); 1783 } 1784 fp->ip4f_valid = 0; 1785 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1786 } 1787 return (0); 1788 } 1789 1790 static struct ip4_frag * 1791 ip4f_alloc(void) 1792 { 1793 struct ip4_frag *fp; 1794 1795 /* reclaim an entry at the tail, put it at the head */ 1796 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1797 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1798 fp->ip4f_valid = 1; 1799 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1800 return (fp); 1801 } 1802 1803 static void 1804 ip4f_free(struct ip4_frag *fp) 1805 { 1806 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1807 fp->ip4f_valid = 0; 1808 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1809 } 1810 1811 #endif /* ALTQ3_CLFIER_COMPAT */ 1812