1 /* $NetBSD: altq_subr.c,v 1.14 2006/04/23 06:46:40 christos Exp $ */ 2 /* $KAME: altq_subr.c,v 1.11 2002/01/11 08:11:49 kjc Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2002 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.14 2006/04/23 06:46:40 christos Exp $"); 32 33 #if defined(__FreeBSD__) || defined(__NetBSD__) 34 #include "opt_altq.h" 35 #if (__FreeBSD__ != 2) 36 #include "opt_inet.h" 37 #ifdef __FreeBSD__ 38 #include "opt_inet6.h" 39 #endif 40 #endif 41 #endif /* __FreeBSD__ || __NetBSD__ */ 42 43 #include <sys/param.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/errno.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/queue.h> 55 56 #include <net/if.h> 57 #include <net/if_dl.h> 58 #include <net/if_types.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/ip.h> 63 #ifdef INET6 64 #include <netinet/ip6.h> 65 #endif 66 #include <netinet/tcp.h> 67 #include <netinet/udp.h> 68 69 #include <altq/altq.h> 70 #include <altq/altq_conf.h> 71 72 /* machine dependent clock related includes */ 73 #ifdef __FreeBSD__ 74 #include "opt_cpu.h" /* for FreeBSD-2.2.8 to get i586_ctr_freq */ 75 #include <machine/clock.h> 76 #endif 77 #if defined(__i386__) 78 #include <machine/specialreg.h> /* for CPUID_TSC */ 79 #ifdef __FreeBSD__ 80 #include <machine/md_var.h> /* for cpu_feature */ 81 #elif defined(__NetBSD__) || defined(__OpenBSD__) 82 #include <machine/cpu.h> /* for cpu_feature */ 83 #endif 84 #endif /* __i386__ */ 85 86 /* 87 * internal function prototypes 88 */ 89 static void tbr_timeout __P((void *)); 90 static int extract_ports4 __P((struct mbuf *, struct ip *, 91 struct flowinfo_in *)); 92 #ifdef INET6 93 static int extract_ports6 __P((struct mbuf *, struct ip6_hdr *, 94 struct flowinfo_in6 *)); 95 #endif 96 static int apply_filter4 __P((u_int32_t, struct flow_filter *, 97 struct flowinfo_in *)); 98 static int apply_ppfilter4 __P((u_int32_t, struct flow_filter *, 99 struct flowinfo_in *)); 100 #ifdef INET6 101 static int apply_filter6 __P((u_int32_t, struct flow_filter6 *, 102 struct flowinfo_in6 *)); 103 #endif 104 static int apply_tosfilter4 __P((u_int32_t, struct flow_filter *, 105 struct flowinfo_in *)); 106 static u_long get_filt_handle __P((struct acc_classifier *, int)); 107 static struct acc_filter *filth_to_filtp __P((struct acc_classifier *, 108 u_long)); 109 static u_int32_t filt2fibmask __P((struct flow_filter *)); 110 111 static void ip4f_cache __P((struct ip *, struct flowinfo_in *)); 112 static int ip4f_lookup __P((struct ip *, struct flowinfo_in *)); 113 static int ip4f_init __P((void)); 114 static struct ip4_frag *ip4f_alloc __P((void)); 115 static void ip4f_free __P((struct ip4_frag *)); 116 117 int (*altq_input) __P((struct mbuf *, int)) = NULL; 118 static int tbr_timer = 0; /* token bucket regulator timer */ 119 static struct callout tbr_callout = CALLOUT_INITIALIZER; 120 121 /* 122 * alternate queueing support routines 123 */ 124 125 /* look up the queue state by the interface name and the queuing type. */ 126 void * 127 altq_lookup(name, type) 128 char *name; 129 int type; 130 { 131 struct ifnet *ifp; 132 133 if ((ifp = ifunit(name)) != NULL) { 134 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 135 return (ifp->if_snd.altq_disc); 136 } 137 138 return NULL; 139 } 140 141 int 142 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 143 struct ifaltq *ifq; 144 int type; 145 void *discipline; 146 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 147 struct mbuf *(*dequeue)(struct ifaltq *, int); 148 int (*request)(struct ifaltq *, int, void *); 149 void *clfier; 150 void *(*classify)(void *, struct mbuf *, int); 151 { 152 if (!ALTQ_IS_READY(ifq)) 153 return ENXIO; 154 if (ALTQ_IS_ENABLED(ifq)) 155 return EBUSY; 156 if (ALTQ_IS_ATTACHED(ifq)) 157 return EEXIST; 158 ifq->altq_type = type; 159 ifq->altq_disc = discipline; 160 ifq->altq_enqueue = enqueue; 161 ifq->altq_dequeue = dequeue; 162 ifq->altq_request = request; 163 ifq->altq_clfier = clfier; 164 ifq->altq_classify = classify; 165 ifq->altq_flags &= ALTQF_CANTCHANGE; 166 #ifdef ALTQ_KLD 167 altq_module_incref(type); 168 #endif 169 return 0; 170 } 171 172 int 173 altq_detach(ifq) 174 struct ifaltq *ifq; 175 { 176 if (!ALTQ_IS_READY(ifq)) 177 return ENXIO; 178 if (ALTQ_IS_ENABLED(ifq)) 179 return EBUSY; 180 if (!ALTQ_IS_ATTACHED(ifq)) 181 return (0); 182 183 #ifdef ALTQ_KLD 184 altq_module_declref(ifq->altq_type); 185 #endif 186 ifq->altq_type = ALTQT_NONE; 187 ifq->altq_disc = NULL; 188 ifq->altq_enqueue = NULL; 189 ifq->altq_dequeue = NULL; 190 ifq->altq_request = NULL; 191 ifq->altq_clfier = NULL; 192 ifq->altq_classify = NULL; 193 ifq->altq_flags &= ALTQF_CANTCHANGE; 194 return 0; 195 } 196 197 int 198 altq_enable(ifq) 199 struct ifaltq *ifq; 200 { 201 int s; 202 203 if (!ALTQ_IS_READY(ifq)) 204 return ENXIO; 205 if (ALTQ_IS_ENABLED(ifq)) 206 return 0; 207 208 s = splnet(); 209 IFQ_PURGE(ifq); 210 ASSERT(ifq->ifq_len == 0); 211 ifq->altq_flags |= ALTQF_ENABLED; 212 if (ifq->altq_clfier != NULL) 213 ifq->altq_flags |= ALTQF_CLASSIFY; 214 splx(s); 215 216 return 0; 217 } 218 219 int 220 altq_disable(ifq) 221 struct ifaltq *ifq; 222 { 223 int s; 224 225 if (!ALTQ_IS_ENABLED(ifq)) 226 return 0; 227 228 s = splnet(); 229 IFQ_PURGE(ifq); 230 ASSERT(ifq->ifq_len == 0); 231 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 232 splx(s); 233 return 0; 234 } 235 236 void 237 altq_assert(file, line, failedexpr) 238 const char *file, *failedexpr; 239 int line; 240 { 241 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 242 failedexpr, file, line); 243 panic("altq assertion"); 244 /* NOTREACHED */ 245 } 246 247 /* 248 * internal representation of token bucket parameters 249 * rate: byte_per_unittime << 32 250 * (((bits_per_sec) / 8) << 32) / machclk_freq 251 * depth: byte << 32 252 * 253 */ 254 #define TBR_SHIFT 32 255 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 256 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 257 258 struct mbuf * 259 tbr_dequeue(ifq, op) 260 struct ifaltq *ifq; 261 int op; 262 { 263 struct tb_regulator *tbr; 264 struct mbuf *m; 265 int64_t interval; 266 u_int64_t now; 267 268 tbr = ifq->altq_tbr; 269 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 270 /* if this is a remove after poll, bypass tbr check */ 271 } else { 272 /* update token only when it is negative */ 273 if (tbr->tbr_token <= 0) { 274 now = read_machclk(); 275 interval = now - tbr->tbr_last; 276 if (interval >= tbr->tbr_filluptime) 277 tbr->tbr_token = tbr->tbr_depth; 278 else { 279 tbr->tbr_token += interval * tbr->tbr_rate; 280 if (tbr->tbr_token > tbr->tbr_depth) 281 tbr->tbr_token = tbr->tbr_depth; 282 } 283 tbr->tbr_last = now; 284 } 285 /* if token is still negative, don't allow dequeue */ 286 if (tbr->tbr_token <= 0) 287 return (NULL); 288 } 289 290 if (ALTQ_IS_ENABLED(ifq)) 291 m = (*ifq->altq_dequeue)(ifq, op); 292 else { 293 if (op == ALTDQ_POLL) 294 IF_POLL(ifq, m); 295 else 296 IF_DEQUEUE(ifq, m); 297 } 298 299 if (m != NULL && op == ALTDQ_REMOVE) 300 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 301 tbr->tbr_lastop = op; 302 return (m); 303 } 304 305 /* 306 * set a token bucket regulator. 307 * if the specified rate is zero, the token bucket regulator is deleted. 308 */ 309 int 310 tbr_set(ifq, profile) 311 struct ifaltq *ifq; 312 struct tb_profile *profile; 313 { 314 struct tb_regulator *tbr, *otbr; 315 316 if (machclk_freq == 0) 317 init_machclk(); 318 if (machclk_freq == 0) { 319 printf("tbr_set: no CPU clock available!\n"); 320 return (ENXIO); 321 } 322 323 if (profile->rate == 0) { 324 /* delete this tbr */ 325 if ((tbr = ifq->altq_tbr) == NULL) 326 return (ENOENT); 327 ifq->altq_tbr = NULL; 328 free(tbr, M_DEVBUF); 329 return (0); 330 } 331 332 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_WAITOK|M_ZERO); 333 if (tbr == NULL) 334 return (ENOMEM); 335 336 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 337 tbr->tbr_depth = TBR_SCALE(profile->depth); 338 if (tbr->tbr_rate > 0) 339 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 340 else 341 tbr->tbr_filluptime = 0xffffffffffffffffLL; 342 tbr->tbr_token = tbr->tbr_depth; 343 tbr->tbr_last = read_machclk(); 344 tbr->tbr_lastop = ALTDQ_REMOVE; 345 346 otbr = ifq->altq_tbr; 347 ifq->altq_tbr = tbr; /* set the new tbr */ 348 349 if (otbr != NULL) 350 free(otbr, M_DEVBUF); 351 else { 352 if (tbr_timer == 0) { 353 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 354 tbr_timer = 1; 355 } 356 } 357 return (0); 358 } 359 360 /* 361 * tbr_timeout goes through the interface list, and kicks the drivers 362 * if necessary. 363 */ 364 static void 365 tbr_timeout(arg) 366 void *arg; 367 { 368 struct ifnet *ifp; 369 int active, s; 370 371 active = 0; 372 s = splnet(); 373 #ifdef __FreeBSD__ 374 #if (__FreeBSD_version < 300000) 375 for (ifp = ifnet; ifp; ifp = ifp->if_next) 376 #else 377 for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_link.tqe_next) 378 #endif 379 #else /* !FreeBSD */ 380 for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_list.tqe_next) 381 #endif 382 { 383 if (!TBR_IS_ENABLED(&ifp->if_snd)) 384 continue; 385 active++; 386 if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) 387 (*ifp->if_start)(ifp); 388 } 389 splx(s); 390 if (active > 0) 391 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 392 else 393 tbr_timer = 0; /* don't need tbr_timer anymore */ 394 #if defined(__alpha__) && !defined(ALTQ_NOPCC) 395 { 396 /* 397 * XXX read out the machine dependent clock once a second 398 * to detect counter wrap-around. 399 */ 400 static u_int cnt; 401 402 if (++cnt >= hz) { 403 (void)read_machclk(); 404 cnt = 0; 405 } 406 } 407 #endif /* __alpha__ && !ALTQ_NOPCC */ 408 } 409 410 /* 411 * get token bucket regulator profile 412 */ 413 int 414 tbr_get(ifq, profile) 415 struct ifaltq *ifq; 416 struct tb_profile *profile; 417 { 418 struct tb_regulator *tbr; 419 420 if ((tbr = ifq->altq_tbr) == NULL) { 421 profile->rate = 0; 422 profile->depth = 0; 423 } else { 424 profile->rate = 425 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); 426 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); 427 } 428 return (0); 429 } 430 431 432 #ifndef IPPROTO_ESP 433 #define IPPROTO_ESP 50 /* encapsulating security payload */ 434 #endif 435 #ifndef IPPROTO_AH 436 #define IPPROTO_AH 51 /* authentication header */ 437 #endif 438 439 /* 440 * extract flow information from a given packet. 441 * filt_mask shows flowinfo fields required. 442 * we assume the ip header is in one mbuf, and addresses and ports are 443 * in network byte order. 444 */ 445 int 446 altq_extractflow(m, af, flow, filt_bmask) 447 struct mbuf *m; 448 int af; 449 struct flowinfo *flow; 450 u_int32_t filt_bmask; 451 { 452 453 switch (af) { 454 case PF_INET: { 455 struct flowinfo_in *fin; 456 struct ip *ip; 457 458 ip = mtod(m, struct ip *); 459 460 if (ip->ip_v != 4) 461 break; 462 463 fin = (struct flowinfo_in *)flow; 464 fin->fi_len = sizeof(struct flowinfo_in); 465 fin->fi_family = AF_INET; 466 467 fin->fi_proto = ip->ip_p; 468 fin->fi_tos = ip->ip_tos; 469 470 fin->fi_src.s_addr = ip->ip_src.s_addr; 471 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 472 473 if (filt_bmask & FIMB4_PORTS) 474 /* if port info is required, extract port numbers */ 475 extract_ports4(m, ip, fin); 476 else { 477 fin->fi_sport = 0; 478 fin->fi_dport = 0; 479 fin->fi_gpi = 0; 480 } 481 return (1); 482 } 483 484 #ifdef INET6 485 case PF_INET6: { 486 struct flowinfo_in6 *fin6; 487 struct ip6_hdr *ip6; 488 489 ip6 = mtod(m, struct ip6_hdr *); 490 /* should we check the ip version? */ 491 492 fin6 = (struct flowinfo_in6 *)flow; 493 fin6->fi6_len = sizeof(struct flowinfo_in6); 494 fin6->fi6_family = AF_INET6; 495 496 fin6->fi6_proto = ip6->ip6_nxt; 497 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 498 499 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 500 fin6->fi6_src = ip6->ip6_src; 501 fin6->fi6_dst = ip6->ip6_dst; 502 503 if ((filt_bmask & FIMB6_PORTS) || 504 ((filt_bmask & FIMB6_PROTO) 505 && ip6->ip6_nxt > IPPROTO_IPV6)) 506 /* 507 * if port info is required, or proto is required 508 * but there are option headers, extract port 509 * and protocol numbers. 510 */ 511 extract_ports6(m, ip6, fin6); 512 else { 513 fin6->fi6_sport = 0; 514 fin6->fi6_dport = 0; 515 fin6->fi6_gpi = 0; 516 } 517 return (1); 518 } 519 #endif /* INET6 */ 520 521 default: 522 break; 523 } 524 525 /* failed */ 526 flow->fi_len = sizeof(struct flowinfo); 527 flow->fi_family = AF_UNSPEC; 528 return (0); 529 } 530 531 /* 532 * helper routine to extract port numbers 533 */ 534 /* structure for ipsec and ipv6 option header template */ 535 struct _opt6 { 536 u_int8_t opt6_nxt; /* next header */ 537 u_int8_t opt6_hlen; /* header extension length */ 538 u_int16_t _pad; 539 u_int32_t ah_spi; /* security parameter index 540 for authentication header */ 541 }; 542 543 /* 544 * extract port numbers from a ipv4 packet. 545 */ 546 static int 547 extract_ports4(m, ip, fin) 548 struct mbuf *m; 549 struct ip *ip; 550 struct flowinfo_in *fin; 551 { 552 struct mbuf *m0; 553 u_short ip_off; 554 u_int8_t proto; 555 int off; 556 557 fin->fi_sport = 0; 558 fin->fi_dport = 0; 559 fin->fi_gpi = 0; 560 561 ip_off = ntohs(ip->ip_off); 562 /* if it is a fragment, try cached fragment info */ 563 if (ip_off & IP_OFFMASK) { 564 ip4f_lookup(ip, fin); 565 return (1); 566 } 567 568 /* locate the mbuf containing the protocol header */ 569 for (m0 = m; m0 != NULL; m0 = m0->m_next) 570 if (((caddr_t)ip >= m0->m_data) && 571 ((caddr_t)ip < m0->m_data + m0->m_len)) 572 break; 573 if (m0 == NULL) { 574 #ifdef ALTQ_DEBUG 575 printf("extract_ports4: can't locate header! ip=%p\n", ip); 576 #endif 577 return (0); 578 } 579 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 580 proto = ip->ip_p; 581 582 #ifdef ALTQ_IPSEC 583 again: 584 #endif 585 while (off >= m0->m_len) { 586 off -= m0->m_len; 587 m0 = m0->m_next; 588 if (m0 == NULL) 589 return (0); /* bogus ip_hl! */ 590 } 591 if (m0->m_len < off + 4) 592 return (0); 593 594 switch (proto) { 595 case IPPROTO_TCP: 596 case IPPROTO_UDP: { 597 struct udphdr *udp; 598 599 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 600 fin->fi_sport = udp->uh_sport; 601 fin->fi_dport = udp->uh_dport; 602 fin->fi_proto = proto; 603 } 604 break; 605 606 #ifdef ALTQ_IPSEC 607 case IPPROTO_ESP: 608 if (fin->fi_gpi == 0){ 609 u_int32_t *gpi; 610 611 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 612 fin->fi_gpi = *gpi; 613 } 614 fin->fi_proto = proto; 615 break; 616 617 case IPPROTO_AH: { 618 /* get next header and header length */ 619 struct _opt6 *opt6; 620 621 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 622 proto = opt6->opt6_nxt; 623 off += 8 + (opt6->opt6_hlen * 4); 624 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 625 fin->fi_gpi = opt6->ah_spi; 626 } 627 /* goto the next header */ 628 goto again; 629 #endif /* ALTQ_IPSEC */ 630 631 default: 632 fin->fi_proto = proto; 633 return (0); 634 } 635 636 /* if this is a first fragment, cache it. */ 637 if (ip_off & IP_MF) 638 ip4f_cache(ip, fin); 639 640 return (1); 641 } 642 643 #ifdef INET6 644 static int 645 extract_ports6(m, ip6, fin6) 646 struct mbuf *m; 647 struct ip6_hdr *ip6; 648 struct flowinfo_in6 *fin6; 649 { 650 struct mbuf *m0; 651 int off; 652 u_int8_t proto; 653 654 fin6->fi6_gpi = 0; 655 fin6->fi6_sport = 0; 656 fin6->fi6_dport = 0; 657 658 /* locate the mbuf containing the protocol header */ 659 for (m0 = m; m0 != NULL; m0 = m0->m_next) 660 if (((caddr_t)ip6 >= m0->m_data) && 661 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 662 break; 663 if (m0 == NULL) { 664 #ifdef ALTQ_DEBUG 665 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 666 #endif 667 return (0); 668 } 669 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 670 671 proto = ip6->ip6_nxt; 672 do { 673 while (off >= m0->m_len) { 674 off -= m0->m_len; 675 m0 = m0->m_next; 676 if (m0 == NULL) 677 return (0); 678 } 679 if (m0->m_len < off + 4) 680 return (0); 681 682 switch (proto) { 683 case IPPROTO_TCP: 684 case IPPROTO_UDP: { 685 struct udphdr *udp; 686 687 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 688 fin6->fi6_sport = udp->uh_sport; 689 fin6->fi6_dport = udp->uh_dport; 690 fin6->fi6_proto = proto; 691 } 692 return (1); 693 694 case IPPROTO_ESP: 695 if (fin6->fi6_gpi == 0) { 696 u_int32_t *gpi; 697 698 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 699 fin6->fi6_gpi = *gpi; 700 } 701 fin6->fi6_proto = proto; 702 return (1); 703 704 case IPPROTO_AH: { 705 /* get next header and header length */ 706 struct _opt6 *opt6; 707 708 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 709 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 710 fin6->fi6_gpi = opt6->ah_spi; 711 proto = opt6->opt6_nxt; 712 off += 8 + (opt6->opt6_hlen * 4); 713 /* goto the next header */ 714 break; 715 } 716 717 case IPPROTO_HOPOPTS: 718 case IPPROTO_ROUTING: 719 case IPPROTO_DSTOPTS: { 720 /* get next header and header length */ 721 struct _opt6 *opt6; 722 723 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 724 proto = opt6->opt6_nxt; 725 off += (opt6->opt6_hlen + 1) * 8; 726 /* goto the next header */ 727 break; 728 } 729 730 case IPPROTO_FRAGMENT: 731 /* ipv6 fragmentations are not supported yet */ 732 default: 733 fin6->fi6_proto = proto; 734 return (0); 735 } 736 } while (1); 737 /*NOTREACHED*/ 738 } 739 #endif /* INET6 */ 740 741 /* 742 * altq common classifier 743 */ 744 int 745 acc_add_filter(classifier, filter, class, phandle) 746 struct acc_classifier *classifier; 747 struct flow_filter *filter; 748 void *class; 749 u_long *phandle; 750 { 751 struct acc_filter *afp, *prev, *tmp; 752 int i, s; 753 754 #ifdef INET6 755 if (filter->ff_flow.fi_family != AF_INET && 756 filter->ff_flow.fi_family != AF_INET6) 757 return (EINVAL); 758 #else 759 if (filter->ff_flow.fi_family != AF_INET) 760 return (EINVAL); 761 #endif 762 763 afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK|M_ZERO); 764 if (afp == NULL) 765 return (ENOMEM); 766 767 afp->f_filter = *filter; 768 afp->f_class = class; 769 770 i = ACC_WILDCARD_INDEX; 771 if (filter->ff_flow.fi_family == AF_INET) { 772 struct flow_filter *filter4 = &afp->f_filter; 773 774 /* 775 * if address is 0, it's a wildcard. if address mask 776 * isn't set, use full mask. 777 */ 778 if (filter4->ff_flow.fi_dst.s_addr == 0) 779 filter4->ff_mask.mask_dst.s_addr = 0; 780 else if (filter4->ff_mask.mask_dst.s_addr == 0) 781 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 782 if (filter4->ff_flow.fi_src.s_addr == 0) 783 filter4->ff_mask.mask_src.s_addr = 0; 784 else if (filter4->ff_mask.mask_src.s_addr == 0) 785 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 786 787 /* clear extra bits in addresses */ 788 filter4->ff_flow.fi_dst.s_addr &= 789 filter4->ff_mask.mask_dst.s_addr; 790 filter4->ff_flow.fi_src.s_addr &= 791 filter4->ff_mask.mask_src.s_addr; 792 793 /* 794 * if dst address is a wildcard, use hash-entry 795 * ACC_WILDCARD_INDEX. 796 */ 797 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 798 i = ACC_WILDCARD_INDEX; 799 else 800 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 801 } 802 #ifdef INET6 803 else if (filter->ff_flow.fi_family == AF_INET6) { 804 struct flow_filter6 *filter6 = 805 (struct flow_filter6 *)&afp->f_filter; 806 #ifndef IN6MASK0 /* taken from kame ipv6 */ 807 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 808 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 809 const struct in6_addr in6mask0 = IN6MASK0; 810 const struct in6_addr in6mask128 = IN6MASK128; 811 #endif 812 813 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 814 filter6->ff_mask6.mask6_dst = in6mask0; 815 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 816 filter6->ff_mask6.mask6_dst = in6mask128; 817 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 818 filter6->ff_mask6.mask6_src = in6mask0; 819 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 820 filter6->ff_mask6.mask6_src = in6mask128; 821 822 /* clear extra bits in addresses */ 823 for (i = 0; i < 16; i++) 824 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 825 filter6->ff_mask6.mask6_dst.s6_addr[i]; 826 for (i = 0; i < 16; i++) 827 filter6->ff_flow6.fi6_src.s6_addr[i] &= 828 filter6->ff_mask6.mask6_src.s6_addr[i]; 829 830 if (filter6->ff_flow6.fi6_flowlabel == 0) 831 i = ACC_WILDCARD_INDEX; 832 else 833 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 834 } 835 #endif /* INET6 */ 836 837 afp->f_handle = get_filt_handle(classifier, i); 838 839 /* update filter bitmask */ 840 afp->f_fbmask = filt2fibmask(filter); 841 classifier->acc_fbmask |= afp->f_fbmask; 842 843 /* 844 * add this filter to the filter list. 845 * filters are ordered from the highest rule number. 846 */ 847 s = splnet(); 848 prev = NULL; 849 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 850 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 851 prev = tmp; 852 else 853 break; 854 } 855 if (prev == NULL) 856 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 857 else 858 LIST_INSERT_AFTER(prev, afp, f_chain); 859 splx(s); 860 861 *phandle = afp->f_handle; 862 return (0); 863 } 864 865 int 866 acc_delete_filter(classifier, handle) 867 struct acc_classifier *classifier; 868 u_long handle; 869 { 870 struct acc_filter *afp; 871 int s; 872 873 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 874 return (EINVAL); 875 876 s = splnet(); 877 LIST_REMOVE(afp, f_chain); 878 splx(s); 879 880 free(afp, M_DEVBUF); 881 882 /* todo: update filt_bmask */ 883 884 return (0); 885 } 886 887 /* 888 * delete filters referencing to the specified class. 889 * if the all flag is not 0, delete all the filters. 890 */ 891 int 892 acc_discard_filters(classifier, class, all) 893 struct acc_classifier *classifier; 894 void *class; 895 int all; 896 { 897 struct acc_filter *afp; 898 int i, s; 899 900 s = splnet(); 901 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 902 do { 903 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 904 if (all || afp->f_class == class) { 905 LIST_REMOVE(afp, f_chain); 906 free(afp, M_DEVBUF); 907 /* start again from the head */ 908 break; 909 } 910 } while (afp != NULL); 911 } 912 splx(s); 913 914 if (all) 915 classifier->acc_fbmask = 0; 916 917 return (0); 918 } 919 920 void * 921 acc_classify(clfier, m, af) 922 void *clfier; 923 struct mbuf *m; 924 int af; 925 { 926 struct acc_classifier *classifier; 927 struct flowinfo flow; 928 struct acc_filter *afp; 929 int i; 930 931 classifier = (struct acc_classifier *)clfier; 932 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 933 934 if (flow.fi_family == AF_INET) { 935 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 936 937 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 938 /* only tos is used */ 939 LIST_FOREACH(afp, 940 &classifier->acc_filters[ACC_WILDCARD_INDEX], 941 f_chain) 942 if (apply_tosfilter4(afp->f_fbmask, 943 &afp->f_filter, fp)) 944 /* filter matched */ 945 return (afp->f_class); 946 } else if ((classifier->acc_fbmask & 947 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 948 == 0) { 949 /* only proto and ports are used */ 950 LIST_FOREACH(afp, 951 &classifier->acc_filters[ACC_WILDCARD_INDEX], 952 f_chain) 953 if (apply_ppfilter4(afp->f_fbmask, 954 &afp->f_filter, fp)) 955 /* filter matched */ 956 return (afp->f_class); 957 } else { 958 /* get the filter hash entry from its dest address */ 959 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 960 do { 961 /* 962 * go through this loop twice. first for dst 963 * hash, second for wildcards. 964 */ 965 LIST_FOREACH(afp, &classifier->acc_filters[i], 966 f_chain) 967 if (apply_filter4(afp->f_fbmask, 968 &afp->f_filter, fp)) 969 /* filter matched */ 970 return (afp->f_class); 971 972 /* 973 * check again for filters with a dst addr 974 * wildcard. 975 * (daddr == 0 || dmask != 0xffffffff). 976 */ 977 if (i != ACC_WILDCARD_INDEX) 978 i = ACC_WILDCARD_INDEX; 979 else 980 break; 981 } while (1); 982 } 983 } 984 #ifdef INET6 985 else if (flow.fi_family == AF_INET6) { 986 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 987 988 /* get the filter hash entry from its flow ID */ 989 if (fp6->fi6_flowlabel != 0) 990 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 991 else 992 /* flowlable can be zero */ 993 i = ACC_WILDCARD_INDEX; 994 995 /* go through this loop twice. first for flow hash, second 996 for wildcards. */ 997 do { 998 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 999 if (apply_filter6(afp->f_fbmask, 1000 (struct flow_filter6 *)&afp->f_filter, 1001 fp6)) 1002 /* filter matched */ 1003 return (afp->f_class); 1004 1005 /* 1006 * check again for filters with a wildcard. 1007 */ 1008 if (i != ACC_WILDCARD_INDEX) 1009 i = ACC_WILDCARD_INDEX; 1010 else 1011 break; 1012 } while (1); 1013 } 1014 #endif /* INET6 */ 1015 1016 /* no filter matched */ 1017 return (NULL); 1018 } 1019 1020 static int 1021 apply_filter4(fbmask, filt, pkt) 1022 u_int32_t fbmask; 1023 struct flow_filter *filt; 1024 struct flowinfo_in *pkt; 1025 { 1026 if (filt->ff_flow.fi_family != AF_INET) 1027 return (0); 1028 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1029 return (0); 1030 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1031 return (0); 1032 if ((fbmask & FIMB4_DADDR) && 1033 filt->ff_flow.fi_dst.s_addr != 1034 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1035 return (0); 1036 if ((fbmask & FIMB4_SADDR) && 1037 filt->ff_flow.fi_src.s_addr != 1038 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1039 return (0); 1040 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1041 return (0); 1042 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1043 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1044 return (0); 1045 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1046 return (0); 1047 /* match */ 1048 return (1); 1049 } 1050 1051 /* 1052 * filter matching function optimized for a common case that checks 1053 * only protocol and port numbers 1054 */ 1055 static int 1056 apply_ppfilter4(fbmask, filt, pkt) 1057 u_int32_t fbmask; 1058 struct flow_filter *filt; 1059 struct flowinfo_in *pkt; 1060 { 1061 if (filt->ff_flow.fi_family != AF_INET) 1062 return (0); 1063 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1064 return (0); 1065 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1066 return (0); 1067 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1068 return (0); 1069 /* match */ 1070 return (1); 1071 } 1072 1073 /* 1074 * filter matching function only for tos field. 1075 */ 1076 static int 1077 apply_tosfilter4(fbmask, filt, pkt) 1078 u_int32_t fbmask; 1079 struct flow_filter *filt; 1080 struct flowinfo_in *pkt; 1081 { 1082 if (filt->ff_flow.fi_family != AF_INET) 1083 return (0); 1084 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1085 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1086 return (0); 1087 /* match */ 1088 return (1); 1089 } 1090 1091 #ifdef INET6 1092 static int 1093 apply_filter6(fbmask, filt, pkt) 1094 u_int32_t fbmask; 1095 struct flow_filter6 *filt; 1096 struct flowinfo_in6 *pkt; 1097 { 1098 int i; 1099 1100 if (filt->ff_flow6.fi6_family != AF_INET6) 1101 return (0); 1102 if ((fbmask & FIMB6_FLABEL) && 1103 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1104 return (0); 1105 if ((fbmask & FIMB6_PROTO) && 1106 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1107 return (0); 1108 if ((fbmask & FIMB6_SPORT) && 1109 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1110 return (0); 1111 if ((fbmask & FIMB6_DPORT) && 1112 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1113 return (0); 1114 if (fbmask & FIMB6_SADDR) { 1115 for (i = 0; i < 4; i++) 1116 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1117 (pkt->fi6_src.s6_addr32[i] & 1118 filt->ff_mask6.mask6_src.s6_addr32[i])) 1119 return (0); 1120 } 1121 if (fbmask & FIMB6_DADDR) { 1122 for (i = 0; i < 4; i++) 1123 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1124 (pkt->fi6_dst.s6_addr32[i] & 1125 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1126 return (0); 1127 } 1128 if ((fbmask & FIMB6_TCLASS) && 1129 filt->ff_flow6.fi6_tclass != 1130 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1131 return (0); 1132 if ((fbmask & FIMB6_GPI) && 1133 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1134 return (0); 1135 /* match */ 1136 return (1); 1137 } 1138 #endif /* INET6 */ 1139 1140 /* 1141 * filter handle: 1142 * bit 20-28: index to the filter hash table 1143 * bit 0-19: unique id in the hash bucket. 1144 */ 1145 static u_long 1146 get_filt_handle(classifier, i) 1147 struct acc_classifier *classifier; 1148 int i; 1149 { 1150 static u_long handle_number = 1; 1151 u_long handle; 1152 struct acc_filter *afp; 1153 1154 while (1) { 1155 handle = handle_number++ & 0x000fffff; 1156 1157 if (LIST_EMPTY(&classifier->acc_filters[i])) 1158 break; 1159 1160 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1161 if ((afp->f_handle & 0x000fffff) == handle) 1162 break; 1163 if (afp == NULL) 1164 break; 1165 /* this handle is already used, try again */ 1166 } 1167 1168 return ((i << 20) | handle); 1169 } 1170 1171 /* convert filter handle to filter pointer */ 1172 static struct acc_filter * 1173 filth_to_filtp(classifier, handle) 1174 struct acc_classifier *classifier; 1175 u_long handle; 1176 { 1177 struct acc_filter *afp; 1178 int i; 1179 1180 i = ACC_GET_HINDEX(handle); 1181 1182 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1183 if (afp->f_handle == handle) 1184 return (afp); 1185 1186 return (NULL); 1187 } 1188 1189 /* create flowinfo bitmask */ 1190 static u_int32_t 1191 filt2fibmask(filt) 1192 struct flow_filter *filt; 1193 { 1194 u_int32_t mask = 0; 1195 #ifdef INET6 1196 struct flow_filter6 *filt6; 1197 #endif 1198 1199 switch (filt->ff_flow.fi_family) { 1200 case AF_INET: 1201 if (filt->ff_flow.fi_proto != 0) 1202 mask |= FIMB4_PROTO; 1203 if (filt->ff_flow.fi_tos != 0) 1204 mask |= FIMB4_TOS; 1205 if (filt->ff_flow.fi_dst.s_addr != 0) 1206 mask |= FIMB4_DADDR; 1207 if (filt->ff_flow.fi_src.s_addr != 0) 1208 mask |= FIMB4_SADDR; 1209 if (filt->ff_flow.fi_sport != 0) 1210 mask |= FIMB4_SPORT; 1211 if (filt->ff_flow.fi_dport != 0) 1212 mask |= FIMB4_DPORT; 1213 if (filt->ff_flow.fi_gpi != 0) 1214 mask |= FIMB4_GPI; 1215 break; 1216 #ifdef INET6 1217 case AF_INET6: 1218 filt6 = (struct flow_filter6 *)filt; 1219 1220 if (filt6->ff_flow6.fi6_proto != 0) 1221 mask |= FIMB6_PROTO; 1222 if (filt6->ff_flow6.fi6_tclass != 0) 1223 mask |= FIMB6_TCLASS; 1224 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1225 mask |= FIMB6_DADDR; 1226 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1227 mask |= FIMB6_SADDR; 1228 if (filt6->ff_flow6.fi6_sport != 0) 1229 mask |= FIMB6_SPORT; 1230 if (filt6->ff_flow6.fi6_dport != 0) 1231 mask |= FIMB6_DPORT; 1232 if (filt6->ff_flow6.fi6_gpi != 0) 1233 mask |= FIMB6_GPI; 1234 if (filt6->ff_flow6.fi6_flowlabel != 0) 1235 mask |= FIMB6_FLABEL; 1236 break; 1237 #endif /* INET6 */ 1238 } 1239 return (mask); 1240 } 1241 1242 1243 /* 1244 * helper functions to handle IPv4 fragments. 1245 * currently only in-sequence fragments are handled. 1246 * - fragment info is cached in a LRU list. 1247 * - when a first fragment is found, cache its flow info. 1248 * - when a non-first fragment is found, lookup the cache. 1249 */ 1250 1251 struct ip4_frag { 1252 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1253 char ip4f_valid; 1254 u_short ip4f_id; 1255 struct flowinfo_in ip4f_info; 1256 }; 1257 1258 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1259 1260 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1261 1262 1263 static void 1264 ip4f_cache(ip, fin) 1265 struct ip *ip; 1266 struct flowinfo_in *fin; 1267 { 1268 struct ip4_frag *fp; 1269 1270 if (TAILQ_EMPTY(&ip4f_list)) { 1271 /* first time call, allocate fragment cache entries. */ 1272 if (ip4f_init() < 0) 1273 /* allocation failed! */ 1274 return; 1275 } 1276 1277 fp = ip4f_alloc(); 1278 fp->ip4f_id = ip->ip_id; 1279 fp->ip4f_info.fi_proto = ip->ip_p; 1280 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1281 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1282 1283 /* save port numbers */ 1284 fp->ip4f_info.fi_sport = fin->fi_sport; 1285 fp->ip4f_info.fi_dport = fin->fi_dport; 1286 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1287 } 1288 1289 static int 1290 ip4f_lookup(ip, fin) 1291 struct ip *ip; 1292 struct flowinfo_in *fin; 1293 { 1294 struct ip4_frag *fp; 1295 1296 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1297 fp = TAILQ_NEXT(fp, ip4f_chain)) 1298 if (ip->ip_id == fp->ip4f_id && 1299 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1300 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1301 ip->ip_p == fp->ip4f_info.fi_proto) { 1302 1303 /* found the matching entry */ 1304 fin->fi_sport = fp->ip4f_info.fi_sport; 1305 fin->fi_dport = fp->ip4f_info.fi_dport; 1306 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1307 1308 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1309 /* this is the last fragment, 1310 release the entry. */ 1311 ip4f_free(fp); 1312 1313 return (1); 1314 } 1315 1316 /* no matching entry found */ 1317 return (0); 1318 } 1319 1320 static int 1321 ip4f_init(void) 1322 { 1323 struct ip4_frag *fp; 1324 int i; 1325 1326 TAILQ_INIT(&ip4f_list); 1327 for (i=0; i<IP4F_TABSIZE; i++) { 1328 fp = malloc(sizeof(struct ip4_frag), M_DEVBUF, M_NOWAIT); 1329 if (fp == NULL) { 1330 printf("ip4f_init: can't alloc %dth entry!\n", i); 1331 if (i == 0) 1332 return (-1); 1333 return (0); 1334 } 1335 fp->ip4f_valid = 0; 1336 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1337 } 1338 return (0); 1339 } 1340 1341 static struct ip4_frag * 1342 ip4f_alloc(void) 1343 { 1344 struct ip4_frag *fp; 1345 1346 /* reclaim an entry at the tail, put it at the head */ 1347 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1348 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1349 fp->ip4f_valid = 1; 1350 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1351 return (fp); 1352 } 1353 1354 static void 1355 ip4f_free(fp) 1356 struct ip4_frag *fp; 1357 { 1358 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1359 fp->ip4f_valid = 0; 1360 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1361 } 1362 1363 /* 1364 * read and write diffserv field in IPv4 or IPv6 header 1365 */ 1366 u_int8_t 1367 read_dsfield(m, pktattr) 1368 struct mbuf *m; 1369 struct altq_pktattr *pktattr; 1370 { 1371 struct mbuf *m0; 1372 u_int8_t ds_field = 0; 1373 1374 if (pktattr == NULL || 1375 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 1376 return ((u_int8_t)0); 1377 1378 /* verify that pattr_hdr is within the mbuf data */ 1379 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1380 if ((pktattr->pattr_hdr >= m0->m_data) && 1381 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 1382 break; 1383 if (m0 == NULL) { 1384 /* ick, pattr_hdr is stale */ 1385 pktattr->pattr_af = AF_UNSPEC; 1386 #ifdef ALTQ_DEBUG 1387 printf("read_dsfield: can't locate header!\n"); 1388 #endif 1389 return ((u_int8_t)0); 1390 } 1391 1392 if (pktattr->pattr_af == AF_INET) { 1393 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 1394 1395 if (ip->ip_v != 4) 1396 return ((u_int8_t)0); /* version mismatch! */ 1397 ds_field = ip->ip_tos; 1398 } 1399 #ifdef INET6 1400 else if (pktattr->pattr_af == AF_INET6) { 1401 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1402 u_int32_t flowlabel; 1403 1404 flowlabel = ntohl(ip6->ip6_flow); 1405 if ((flowlabel >> 28) != 6) 1406 return ((u_int8_t)0); /* version mismatch! */ 1407 ds_field = (flowlabel >> 20) & 0xff; 1408 } 1409 #endif 1410 return (ds_field); 1411 } 1412 1413 void 1414 write_dsfield(m, pktattr, dsfield) 1415 struct mbuf *m; 1416 struct altq_pktattr *pktattr; 1417 u_int8_t dsfield; 1418 { 1419 struct mbuf *m0; 1420 1421 if (pktattr == NULL || 1422 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 1423 return; 1424 1425 /* verify that pattr_hdr is within the mbuf data */ 1426 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1427 if ((pktattr->pattr_hdr >= m0->m_data) && 1428 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 1429 break; 1430 if (m0 == NULL) { 1431 /* ick, pattr_hdr is stale */ 1432 pktattr->pattr_af = AF_UNSPEC; 1433 #ifdef ALTQ_DEBUG 1434 printf("write_dsfield: can't locate header!\n"); 1435 #endif 1436 return; 1437 } 1438 1439 if (pktattr->pattr_af == AF_INET) { 1440 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 1441 u_int8_t old; 1442 int32_t sum; 1443 1444 if (ip->ip_v != 4) 1445 return; /* version mismatch! */ 1446 old = ip->ip_tos; 1447 dsfield |= old & 3; /* leave CU bits */ 1448 if (old == dsfield) 1449 return; 1450 ip->ip_tos = dsfield; 1451 /* 1452 * update checksum (from RFC1624) 1453 * HC' = ~(~HC + ~m + m') 1454 */ 1455 sum = ~ntohs(ip->ip_sum) & 0xffff; 1456 sum += 0xff00 + (~old & 0xff) + dsfield; 1457 sum = (sum >> 16) + (sum & 0xffff); 1458 sum += (sum >> 16); /* add carry */ 1459 1460 ip->ip_sum = htons(~sum & 0xffff); 1461 } 1462 #ifdef INET6 1463 else if (pktattr->pattr_af == AF_INET6) { 1464 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1465 u_int32_t flowlabel; 1466 1467 flowlabel = ntohl(ip6->ip6_flow); 1468 if ((flowlabel >> 28) != 6) 1469 return; /* version mismatch! */ 1470 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 1471 ip6->ip6_flow = htonl(flowlabel); 1472 } 1473 #endif 1474 return; 1475 } 1476 1477 1478 /* 1479 * high resolution clock support taking advantage of a machine dependent 1480 * high resolution time counter (e.g., timestamp counter of intel pentium). 1481 * we assume 1482 * - 64-bit-long monotonically-increasing counter 1483 * - frequency range is 100M-4GHz (CPU speed) 1484 */ 1485 u_int32_t machclk_freq = 0; 1486 u_int32_t machclk_per_tick = 0; 1487 1488 #if (defined(__i386__) || defined(__alpha__)) && !defined(ALTQ_NOPCC) 1489 1490 #if defined(__FreeBSD__) && defined(SMP) 1491 #error SMP system! use ALTQ_NOPCC option. 1492 #endif 1493 1494 #ifdef __alpha__ 1495 #ifdef __FreeBSD__ 1496 extern u_int32_t cycles_per_sec; /* alpha CPU clock frequency */ 1497 #elif defined(__NetBSD__) || defined(__OpenBSD__) 1498 extern u_int64_t cycles_per_usec; /* alpha CPU clock frequency */ 1499 #endif 1500 #endif /* __alpha__ */ 1501 1502 void 1503 init_machclk(void) 1504 { 1505 /* sanity check */ 1506 #ifdef __i386__ 1507 /* check if TSC is available */ 1508 if ((cpu_feature & CPUID_TSC) == 0) { 1509 printf("altq: TSC isn't available! use ALTQ_NOPCC option.\n"); 1510 return; 1511 } 1512 #endif 1513 1514 /* 1515 * if the clock frequency (of Pentium TSC or Alpha PCC) is 1516 * accessible, just use it. 1517 */ 1518 #ifdef __i386__ 1519 #ifdef __FreeBSD__ 1520 #if (__FreeBSD_version > 300000) 1521 machclk_freq = tsc_freq; 1522 #else 1523 machclk_freq = i586_ctr_freq; 1524 #endif 1525 #elif defined(__NetBSD__) 1526 machclk_freq = (u_int32_t)curcpu()->ci_tsc_freq; 1527 #elif defined(__OpenBSD__) 1528 machclk_freq = pentium_mhz * 1000000; 1529 #endif 1530 #elif defined(__alpha__) 1531 #ifdef __FreeBSD__ 1532 machclk_freq = cycles_per_sec; 1533 #elif defined(__NetBSD__) || defined(__OpenBSD__) 1534 machclk_freq = (u_int32_t)(cycles_per_usec * 1000000); 1535 #endif 1536 #endif /* __alpha__ */ 1537 1538 /* 1539 * if we don't know the clock frequency, measure it. 1540 */ 1541 if (machclk_freq == 0) { 1542 static int wait; 1543 struct timeval tv_start, tv_end; 1544 u_int64_t start, end, diff; 1545 int timo; 1546 1547 microtime(&tv_start); 1548 start = read_machclk(); 1549 timo = hz; /* 1 sec */ 1550 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 1551 microtime(&tv_end); 1552 end = read_machclk(); 1553 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 1554 + tv_end.tv_usec - tv_start.tv_usec; 1555 if (diff != 0) 1556 machclk_freq = (u_int)((end - start) * 1000000 / diff); 1557 } 1558 1559 machclk_per_tick = machclk_freq / hz; 1560 1561 #ifdef ALTQ_DEBUG 1562 printf("altq: CPU clock: %uHz\n", machclk_freq); 1563 #endif 1564 } 1565 1566 #ifdef __alpha__ 1567 /* 1568 * make a 64bit counter value out of the 32bit alpha processor cycle counter. 1569 * read_machclk must be called within a half of its wrap-around cycle 1570 * (about 5 sec for 400MHz CPU) to properly detect a counter wrap-around. 1571 * tbr_timeout calls read_machclk once a second. 1572 */ 1573 u_int64_t 1574 read_machclk(void) 1575 { 1576 static u_int32_t last_pcc, upper; 1577 u_int32_t pcc; 1578 1579 pcc = (u_int32_t)alpha_rpcc(); 1580 if (pcc <= last_pcc) 1581 upper++; 1582 last_pcc = pcc; 1583 return (((u_int64_t)upper << 32) + pcc); 1584 } 1585 #endif /* __alpha__ */ 1586 #else /* !i386 && !alpha */ 1587 /* use microtime() for now */ 1588 void 1589 init_machclk(void) 1590 { 1591 machclk_freq = 1000000 << MACHCLK_SHIFT; 1592 machclk_per_tick = machclk_freq / hz; 1593 printf("altq: emulate %uHz CPU clock\n", machclk_freq); 1594 } 1595 #endif /* !i386 && !alpha */ 1596