1 /* $NetBSD: ip_mroute.c,v 1.34 1996/10/13 02:03:06 christos Exp $ */ 2 3 /* 4 * IP multicast forwarding procedures 5 * 6 * Written by David Waitzman, BBN Labs, August 1988. 7 * Modified by Steve Deering, Stanford, February 1989. 8 * Modified by Mark J. Steiglitz, Stanford, May, 1991 9 * Modified by Van Jacobson, LBL, January 1993 10 * Modified by Ajit Thyagarajan, PARC, August 1993 11 * Modified by Bill Fenner, PARC, April 1994 12 * Modified by Charles M. Hannum, NetBSD, May 1995. 13 * 14 * MROUTING Revision: 1.2 15 */ 16 17 #include <sys/param.h> 18 #include <sys/systm.h> 19 #include <sys/mbuf.h> 20 #include <sys/socket.h> 21 #include <sys/socketvar.h> 22 #include <sys/protosw.h> 23 #include <sys/errno.h> 24 #include <sys/time.h> 25 #include <sys/kernel.h> 26 #include <sys/ioctl.h> 27 #include <sys/syslog.h> 28 #include <net/if.h> 29 #include <net/route.h> 30 #include <net/raw_cb.h> 31 #include <netinet/in.h> 32 #include <netinet/in_var.h> 33 #include <netinet/in_systm.h> 34 #include <netinet/ip.h> 35 #include <netinet/ip_var.h> 36 #include <netinet/in_pcb.h> 37 #include <netinet/udp.h> 38 #include <netinet/igmp.h> 39 #include <netinet/igmp_var.h> 40 #include <netinet/ip_mroute.h> 41 42 #include <machine/stdarg.h> 43 44 #define IP_MULTICASTOPTS 0 45 #define M_PULLUP(m, len) \ 46 do { \ 47 if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \ 48 (m) = m_pullup((m), (len)); \ 49 } while (0) 50 51 /* 52 * Globals. All but ip_mrouter and ip_mrtproto could be static, 53 * except for netstat or debugging purposes. 54 */ 55 struct socket *ip_mrouter = 0; 56 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 57 58 #define NO_RTE_FOUND 0x1 59 #define RTE_FOUND 0x2 60 61 #define MFCHASH(a, g) \ 62 ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \ 63 ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash) 64 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl; 65 u_long mfchash; 66 67 u_char nexpire[MFCTBLSIZ]; 68 struct vif viftable[MAXVIFS]; 69 struct mrtstat mrtstat; 70 u_int mrtdebug = 0; /* debug level */ 71 #define DEBUG_MFC 0x02 72 #define DEBUG_FORWARD 0x04 73 #define DEBUG_EXPIRE 0x08 74 #define DEBUG_XMIT 0x10 75 u_int tbfdebug = 0; /* tbf debug level */ 76 #ifdef RSVP_ISI 77 u_int rsvpdebug = 0; /* rsvp debug level */ 78 extern struct socket *ip_rsvpd; 79 extern int rsvp_on; 80 #endif /* RSVP_ISI */ 81 82 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 83 #define UPCALL_EXPIRE 6 /* number of timeouts */ 84 85 /* 86 * Define the token bucket filter structures 87 */ 88 89 #define TBF_REPROCESS (hz / 100) /* 100x / second */ 90 91 static int get_sg_cnt __P((struct sioc_sg_req *)); 92 static int get_vif_cnt __P((struct sioc_vif_req *)); 93 static int ip_mrouter_init __P((struct socket *, struct mbuf *)); 94 static int get_version __P((struct mbuf *)); 95 static int set_assert __P((struct mbuf *)); 96 static int get_assert __P((struct mbuf *)); 97 static int add_vif __P((struct mbuf *)); 98 static int del_vif __P((struct mbuf *)); 99 static void update_mfc __P((struct mfcctl *, struct mfc *)); 100 static void expire_mfc __P((struct mfc *)); 101 static int add_mfc __P((struct mbuf *)); 102 #ifdef UPCALL_TIMING 103 static void collate __P((struct timeval *)); 104 #endif 105 static int del_mfc __P((struct mbuf *)); 106 static int socket_send __P((struct socket *, struct mbuf *, 107 struct sockaddr_in *)); 108 static void expire_upcalls __P((void *)); 109 #ifdef RSVP_ISI 110 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t)); 111 #else 112 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *)); 113 #endif 114 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *)); 115 static void encap_send __P((struct ip *, struct vif *, struct mbuf *)); 116 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *, 117 u_int32_t)); 118 static void tbf_queue __P((struct vif *, struct mbuf *)); 119 static void tbf_process_q __P((struct vif *)); 120 static void tbf_reprocess_q __P((void *)); 121 static int tbf_dq_sel __P((struct vif *, struct ip *)); 122 static void tbf_send_packet __P((struct vif *, struct mbuf *)); 123 static void tbf_update_tokens __P((struct vif *)); 124 static int priority __P((struct vif *, struct ip *)); 125 126 /* 127 * 'Interfaces' associated with decapsulator (so we can tell 128 * packets that went through it from ones that get reflected 129 * by a broken gateway). These interfaces are never linked into 130 * the system ifnet list & no routes point to them. I.e., packets 131 * can't be sent this way. They only exist as a placeholder for 132 * multicast source verification. 133 */ 134 #if 0 135 struct ifnet multicast_decap_if[MAXVIFS]; 136 #endif 137 138 #define ENCAP_TTL 64 139 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */ 140 141 /* prototype IP hdr for encapsulated packets */ 142 struct ip multicast_encap_iphdr = { 143 #if BYTE_ORDER == LITTLE_ENDIAN 144 sizeof(struct ip) >> 2, IPVERSION, 145 #else 146 IPVERSION, sizeof(struct ip) >> 2, 147 #endif 148 0, /* tos */ 149 sizeof(struct ip), /* total length */ 150 0, /* id */ 151 0, /* frag offset */ 152 ENCAP_TTL, ENCAP_PROTO, 153 0, /* checksum */ 154 }; 155 156 /* 157 * Private variables. 158 */ 159 static vifi_t numvifs = 0; 160 static int have_encap_tunnel = 0; 161 162 /* 163 * one-back cache used by ipip_input to locate a tunnel's vif 164 * given a datagram's src ip address. 165 */ 166 static struct in_addr last_encap_src; 167 static struct vif *last_encap_vif; 168 169 /* 170 * whether or not special PIM assert processing is enabled. 171 */ 172 static int pim_assert; 173 /* 174 * Rate limit for assert notification messages, in usec 175 */ 176 #define ASSERT_MSG_TIME 3000000 177 178 /* 179 * Find a route for a given origin IP address and Multicast group address 180 * Type of service parameter to be added in the future!!! 181 */ 182 183 #define MFCFIND(o, g, rt) { \ 184 register struct mfc *_rt; \ 185 (rt) = 0; \ 186 ++mrtstat.mrts_mfc_lookups; \ 187 for (_rt = mfchashtbl[MFCHASH(o, g)].lh_first; \ 188 _rt; _rt = _rt->mfc_hash.le_next) { \ 189 if (in_hosteq(_rt->mfc_origin, (o)) && \ 190 in_hosteq(_rt->mfc_mcastgrp, (g)) && \ 191 _rt->mfc_stall == 0) { \ 192 (rt) = _rt; \ 193 break; \ 194 } \ 195 } \ 196 if ((rt) == 0) \ 197 ++mrtstat.mrts_mfc_misses; \ 198 } 199 200 /* 201 * Macros to compute elapsed time efficiently 202 * Borrowed from Van Jacobson's scheduling code 203 */ 204 #define TV_DELTA(a, b, delta) { \ 205 register int xxs; \ 206 delta = (a).tv_usec - (b).tv_usec; \ 207 xxs = (a).tv_sec - (b).tv_sec; \ 208 switch (xxs) { \ 209 case 2: \ 210 delta += 1000000; \ 211 /* fall through */ \ 212 case 1: \ 213 delta += 1000000; \ 214 /* fall through */ \ 215 case 0: \ 216 break; \ 217 default: \ 218 delta += (1000000 * xxs); \ 219 break; \ 220 } \ 221 } 222 223 #ifdef UPCALL_TIMING 224 u_int32_t upcall_data[51]; 225 #endif /* UPCALL_TIMING */ 226 227 /* 228 * Handle MRT setsockopt commands to modify the multicast routing tables. 229 */ 230 int 231 ip_mrouter_set(so, optname, m) 232 struct socket *so; 233 int optname; 234 struct mbuf **m; 235 { 236 int error; 237 238 if (optname != MRT_INIT && so != ip_mrouter) 239 error = ENOPROTOOPT; 240 else 241 switch (optname) { 242 case MRT_INIT: 243 error = ip_mrouter_init(so, *m); 244 break; 245 case MRT_DONE: 246 error = ip_mrouter_done(); 247 break; 248 case MRT_ADD_VIF: 249 error = add_vif(*m); 250 break; 251 case MRT_DEL_VIF: 252 error = del_vif(*m); 253 break; 254 case MRT_ADD_MFC: 255 error = add_mfc(*m); 256 break; 257 case MRT_DEL_MFC: 258 error = del_mfc(*m); 259 break; 260 case MRT_ASSERT: 261 error = set_assert(*m); 262 break; 263 default: 264 error = ENOPROTOOPT; 265 break; 266 } 267 268 if (*m) 269 m_free(*m); 270 return (error); 271 } 272 273 /* 274 * Handle MRT getsockopt commands 275 */ 276 int 277 ip_mrouter_get(so, optname, m) 278 struct socket *so; 279 int optname; 280 struct mbuf **m; 281 { 282 int error; 283 284 if (so != ip_mrouter) 285 error = ENOPROTOOPT; 286 else { 287 *m = m_get(M_WAIT, MT_SOOPTS); 288 289 switch (optname) { 290 case MRT_VERSION: 291 error = get_version(*m); 292 break; 293 case MRT_ASSERT: 294 error = get_assert(*m); 295 break; 296 default: 297 error = ENOPROTOOPT; 298 break; 299 } 300 301 if (error) 302 m_free(*m); 303 } 304 305 return (error); 306 } 307 308 /* 309 * Handle ioctl commands to obtain information from the cache 310 */ 311 int 312 mrt_ioctl(so, cmd, data) 313 struct socket *so; 314 u_long cmd; 315 caddr_t data; 316 { 317 int error; 318 319 if (so != ip_mrouter) 320 error = EINVAL; 321 else 322 switch (cmd) { 323 case SIOCGETVIFCNT: 324 error = get_vif_cnt((struct sioc_vif_req *)data); 325 break; 326 case SIOCGETSGCNT: 327 error = get_sg_cnt((struct sioc_sg_req *)data); 328 break; 329 default: 330 error = EINVAL; 331 break; 332 } 333 334 return (error); 335 } 336 337 /* 338 * returns the packet, byte, rpf-failure count for the source group provided 339 */ 340 static int 341 get_sg_cnt(req) 342 register struct sioc_sg_req *req; 343 { 344 register struct mfc *rt; 345 int s; 346 347 s = splsoftnet(); 348 MFCFIND(req->src, req->grp, rt); 349 splx(s); 350 if (rt != 0) { 351 req->pktcnt = rt->mfc_pkt_cnt; 352 req->bytecnt = rt->mfc_byte_cnt; 353 req->wrong_if = rt->mfc_wrong_if; 354 } else 355 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 356 357 return (0); 358 } 359 360 /* 361 * returns the input and output packet and byte counts on the vif provided 362 */ 363 static int 364 get_vif_cnt(req) 365 register struct sioc_vif_req *req; 366 { 367 register vifi_t vifi = req->vifi; 368 369 if (vifi >= numvifs) 370 return (EINVAL); 371 372 req->icount = viftable[vifi].v_pkt_in; 373 req->ocount = viftable[vifi].v_pkt_out; 374 req->ibytes = viftable[vifi].v_bytes_in; 375 req->obytes = viftable[vifi].v_bytes_out; 376 377 return (0); 378 } 379 380 /* 381 * Enable multicast routing 382 */ 383 static int 384 ip_mrouter_init(so, m) 385 struct socket *so; 386 struct mbuf *m; 387 { 388 int *v; 389 390 if (mrtdebug) 391 log(LOG_DEBUG, 392 "ip_mrouter_init: so_type = %d, pr_protocol = %d\n", 393 so->so_type, so->so_proto->pr_protocol); 394 395 if (so->so_type != SOCK_RAW || 396 so->so_proto->pr_protocol != IPPROTO_IGMP) 397 return (EOPNOTSUPP); 398 399 if (m == 0 || m->m_len < sizeof(int)) 400 return (EINVAL); 401 402 v = mtod(m, int *); 403 if (*v != 1) 404 return (EINVAL); 405 406 if (ip_mrouter != 0) 407 return (EADDRINUSE); 408 409 ip_mrouter = so; 410 411 mfchashtbl = hashinit(MFCTBLSIZ, M_MRTABLE, &mfchash); 412 bzero((caddr_t)nexpire, sizeof(nexpire)); 413 414 pim_assert = 0; 415 416 timeout(expire_upcalls, (caddr_t)0, EXPIRE_TIMEOUT); 417 418 if (mrtdebug) 419 log(LOG_DEBUG, "ip_mrouter_init\n"); 420 421 return (0); 422 } 423 424 /* 425 * Disable multicast routing 426 */ 427 int 428 ip_mrouter_done() 429 { 430 vifi_t vifi; 431 register struct vif *vifp; 432 int i; 433 int s; 434 435 s = splsoftnet(); 436 437 /* Clear out all the vifs currently in use. */ 438 for (vifi = 0; vifi < numvifs; vifi++) { 439 vifp = &viftable[vifi]; 440 if (!in_nullhost(vifp->v_lcl_addr)) 441 reset_vif(vifp); 442 } 443 444 numvifs = 0; 445 pim_assert = 0; 446 447 untimeout(expire_upcalls, (caddr_t)0); 448 449 /* 450 * Free all multicast forwarding cache entries. 451 */ 452 for (i = 0; i < MFCTBLSIZ; i++) { 453 register struct mfc *rt, *nrt; 454 455 for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) { 456 nrt = rt->mfc_hash.le_next; 457 458 expire_mfc(rt); 459 } 460 } 461 free(mfchashtbl, M_MRTABLE); 462 463 /* Reset de-encapsulation cache. */ 464 have_encap_tunnel = 0; 465 466 ip_mrouter = 0; 467 468 splx(s); 469 470 if (mrtdebug) 471 log(LOG_DEBUG, "ip_mrouter_done\n"); 472 473 return (0); 474 } 475 476 static int 477 get_version(m) 478 struct mbuf *m; 479 { 480 int *v = mtod(m, int *); 481 482 *v = 0x0305; /* XXX !!!! */ 483 m->m_len = sizeof(int); 484 return (0); 485 } 486 487 /* 488 * Set PIM assert processing global 489 */ 490 static int 491 set_assert(m) 492 struct mbuf *m; 493 { 494 int *i; 495 496 if (m == 0 || m->m_len < sizeof(int)) 497 return (EINVAL); 498 499 i = mtod(m, int *); 500 pim_assert = !!*i; 501 return (0); 502 } 503 504 /* 505 * Get PIM assert processing global 506 */ 507 static int 508 get_assert(m) 509 struct mbuf *m; 510 { 511 int *i = mtod(m, int *); 512 513 *i = pim_assert; 514 m->m_len = sizeof(int); 515 return (0); 516 } 517 518 static struct sockaddr_in sin = { sizeof(sin), AF_INET }; 519 520 /* 521 * Add a vif to the vif table 522 */ 523 static int 524 add_vif(m) 525 struct mbuf *m; 526 { 527 register struct vifctl *vifcp; 528 register struct vif *vifp; 529 struct ifaddr *ifa; 530 struct ifnet *ifp; 531 struct ifreq ifr; 532 int error, s; 533 534 if (m == 0 || m->m_len < sizeof(struct vifctl)) 535 return (EINVAL); 536 537 vifcp = mtod(m, struct vifctl *); 538 if (vifcp->vifc_vifi >= MAXVIFS) 539 return (EINVAL); 540 541 vifp = &viftable[vifcp->vifc_vifi]; 542 if (!in_nullhost(vifp->v_lcl_addr)) 543 return (EADDRINUSE); 544 545 /* Find the interface with an address in AF_INET family. */ 546 sin.sin_addr = vifcp->vifc_lcl_addr; 547 ifa = ifa_ifwithaddr(sintosa(&sin)); 548 if (ifa == 0) 549 return (EADDRNOTAVAIL); 550 551 if (vifcp->vifc_flags & VIFF_TUNNEL) { 552 if (vifcp->vifc_flags & VIFF_SRCRT) { 553 log(LOG_ERR, "Source routed tunnels not supported\n"); 554 return (EOPNOTSUPP); 555 } 556 557 /* Create a fake encapsulation interface. */ 558 ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK); 559 bzero(ifp, sizeof(*ifp)); 560 sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi); 561 562 /* Prepare cached route entry. */ 563 bzero(&vifp->v_route, sizeof(vifp->v_route)); 564 565 /* Tell ipip_input() to start looking at encapsulated packets. */ 566 have_encap_tunnel = 1; 567 } else { 568 /* Use the physical interface associated with the address. */ 569 ifp = ifa->ifa_ifp; 570 571 /* Make sure the interface supports multicast. */ 572 if ((ifp->if_flags & IFF_MULTICAST) == 0) 573 return (EOPNOTSUPP); 574 575 /* Enable promiscuous reception of all IP multicasts. */ 576 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in); 577 satosin(&ifr.ifr_addr)->sin_family = AF_INET; 578 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr; 579 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 580 if (error) 581 return (error); 582 } 583 584 s = splsoftnet(); 585 586 /* Define parameters for the tbf structure. */ 587 vifp->tbf_q = 0; 588 vifp->tbf_t = &vifp->tbf_q; 589 microtime(&vifp->tbf_last_pkt_t); 590 vifp->tbf_n_tok = 0; 591 vifp->tbf_q_len = 0; 592 vifp->tbf_max_q_len = MAXQSIZE; 593 594 vifp->v_flags = vifcp->vifc_flags; 595 vifp->v_threshold = vifcp->vifc_threshold; 596 /* scaling up here allows division by 1024 in critical code */ 597 vifp->v_rate_limit = vifcp->vifc_rate_limit * 1024 / 1000; 598 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 599 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 600 vifp->v_ifp = ifp; 601 /* Initialize per vif pkt counters. */ 602 vifp->v_pkt_in = 0; 603 vifp->v_pkt_out = 0; 604 vifp->v_bytes_in = 0; 605 vifp->v_bytes_out = 0; 606 #ifdef RSVP_ISI 607 vifp->v_rsvp_on = 0; 608 vifp->v_rsvpd = 0; 609 #endif /* RSVP_ISI */ 610 611 splx(s); 612 613 /* Adjust numvifs up if the vifi is higher than numvifs. */ 614 if (numvifs <= vifcp->vifc_vifi) 615 numvifs = vifcp->vifc_vifi + 1; 616 617 if (mrtdebug) 618 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n", 619 vifcp->vifc_vifi, 620 ntohl(vifcp->vifc_lcl_addr.s_addr), 621 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 622 ntohl(vifcp->vifc_rmt_addr.s_addr), 623 vifcp->vifc_threshold, 624 vifcp->vifc_rate_limit); 625 626 return (0); 627 } 628 629 void 630 reset_vif(vifp) 631 register struct vif *vifp; 632 { 633 register struct mbuf *m, *n; 634 struct ifnet *ifp; 635 struct ifreq ifr; 636 637 for (m = vifp->tbf_q; m != 0; m = n) { 638 n = m->m_nextpkt; 639 m_freem(m); 640 } 641 642 if (vifp->v_flags & VIFF_TUNNEL) { 643 free(vifp->v_ifp, M_MRTABLE); 644 if (vifp == last_encap_vif) { 645 last_encap_vif = 0; 646 last_encap_src = zeroin_addr; 647 } 648 } else { 649 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in); 650 satosin(&ifr.ifr_addr)->sin_family = AF_INET; 651 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr; 652 ifp = vifp->v_ifp; 653 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 654 } 655 bzero((caddr_t)vifp, sizeof(*vifp)); 656 } 657 658 /* 659 * Delete a vif from the vif table 660 */ 661 static int 662 del_vif(m) 663 struct mbuf *m; 664 { 665 vifi_t *vifip; 666 register struct vif *vifp; 667 register vifi_t vifi; 668 int s; 669 670 if (m == 0 || m->m_len < sizeof(vifi_t)) 671 return (EINVAL); 672 673 vifip = mtod(m, vifi_t *); 674 if (*vifip >= numvifs) 675 return (EINVAL); 676 677 vifp = &viftable[*vifip]; 678 if (in_nullhost(vifp->v_lcl_addr)) 679 return (EADDRNOTAVAIL); 680 681 s = splsoftnet(); 682 683 reset_vif(vifp); 684 685 /* Adjust numvifs down */ 686 for (vifi = numvifs; vifi > 0; vifi--) 687 if (!in_nullhost(viftable[vifi-1].v_lcl_addr)) 688 break; 689 numvifs = vifi; 690 691 splx(s); 692 693 if (mrtdebug) 694 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs); 695 696 return (0); 697 } 698 699 static void 700 update_mfc(mfccp, rt) 701 struct mfcctl *mfccp; 702 struct mfc *rt; 703 { 704 vifi_t vifi; 705 706 rt->mfc_parent = mfccp->mfcc_parent; 707 for (vifi = 0; vifi < numvifs; vifi++) 708 rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi]; 709 rt->mfc_expire = 0; 710 rt->mfc_stall = 0; 711 } 712 713 static void 714 expire_mfc(rt) 715 struct mfc *rt; 716 { 717 struct rtdetq *rte, *nrte; 718 719 for (rte = rt->mfc_stall; rte != 0; rte = nrte) { 720 nrte = rte->next; 721 m_freem(rte->m); 722 free(rte, M_MRTABLE); 723 } 724 725 LIST_REMOVE(rt, mfc_hash); 726 free(rt, M_MRTABLE); 727 } 728 729 /* 730 * Add an mfc entry 731 */ 732 static int 733 add_mfc(m) 734 struct mbuf *m; 735 { 736 struct mfcctl *mfccp; 737 struct mfc *rt; 738 u_int32_t hash = 0; 739 struct rtdetq *rte, *nrte; 740 register u_short nstl; 741 int s; 742 743 if (m == 0 || m->m_len < sizeof(struct mfcctl)) 744 return (EINVAL); 745 746 mfccp = mtod(m, struct mfcctl *); 747 748 s = splsoftnet(); 749 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt); 750 751 /* If an entry already exists, just update the fields */ 752 if (rt) { 753 if (mrtdebug & DEBUG_MFC) 754 log(LOG_DEBUG,"add_mfc update o %x g %x p %x\n", 755 ntohl(mfccp->mfcc_origin.s_addr), 756 ntohl(mfccp->mfcc_mcastgrp.s_addr), 757 mfccp->mfcc_parent); 758 759 if (rt->mfc_expire) 760 nexpire[hash]--; 761 762 update_mfc(mfccp, rt); 763 764 splx(s); 765 return (0); 766 } 767 768 /* 769 * Find the entry for which the upcall was made and update 770 */ 771 nstl = 0; 772 hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); 773 for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) { 774 if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && 775 in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && 776 rt->mfc_stall != 0) { 777 if (nstl++) 778 log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n", 779 "multiple kernel entries", 780 ntohl(mfccp->mfcc_origin.s_addr), 781 ntohl(mfccp->mfcc_mcastgrp.s_addr), 782 mfccp->mfcc_parent, rt->mfc_stall); 783 784 if (mrtdebug & DEBUG_MFC) 785 log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %p\n", 786 ntohl(mfccp->mfcc_origin.s_addr), 787 ntohl(mfccp->mfcc_mcastgrp.s_addr), 788 mfccp->mfcc_parent, rt->mfc_stall); 789 790 if (rt->mfc_expire) 791 nexpire[hash]--; 792 793 /* free packets Qed at the end of this entry */ 794 for (rte = rt->mfc_stall; rte != 0; rte = nrte) { 795 nrte = rte->next; 796 #ifdef RSVP_ISI 797 ip_mdq(rte->m, rte->ifp, rt, -1); 798 #else 799 ip_mdq(rte->m, rte->ifp, rt); 800 #endif /* RSVP_ISI */ 801 m_freem(rte->m); 802 #ifdef UPCALL_TIMING 803 collate(&rte->t); 804 #endif /* UPCALL_TIMING */ 805 free(rte, M_MRTABLE); 806 } 807 808 update_mfc(mfccp, rt); 809 } 810 } 811 812 if (nstl == 0) { 813 /* 814 * No mfc; make a new one 815 */ 816 if (mrtdebug & DEBUG_MFC) 817 log(LOG_DEBUG,"add_mfc no upcall o %x g %x p %x\n", 818 ntohl(mfccp->mfcc_origin.s_addr), 819 ntohl(mfccp->mfcc_mcastgrp.s_addr), 820 mfccp->mfcc_parent); 821 822 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 823 if (rt == 0) { 824 splx(s); 825 return (ENOBUFS); 826 } 827 828 rt->mfc_origin = mfccp->mfcc_origin; 829 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 830 /* initialize pkt counters per src-grp */ 831 rt->mfc_pkt_cnt = 0; 832 rt->mfc_byte_cnt = 0; 833 rt->mfc_wrong_if = 0; 834 timerclear(&rt->mfc_last_assert); 835 update_mfc(mfccp, rt); 836 837 /* insert new entry at head of hash chain */ 838 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); 839 } 840 841 splx(s); 842 return (0); 843 } 844 845 #ifdef UPCALL_TIMING 846 /* 847 * collect delay statistics on the upcalls 848 */ 849 static void collate(t) 850 register struct timeval *t; 851 { 852 register u_int32_t d; 853 register struct timeval tp; 854 register u_int32_t delta; 855 856 microtime(&tp); 857 858 if (timercmp(t, &tp, <)) { 859 TV_DELTA(tp, *t, delta); 860 861 d = delta >> 10; 862 if (d > 50) 863 d = 50; 864 865 ++upcall_data[d]; 866 } 867 } 868 #endif /* UPCALL_TIMING */ 869 870 /* 871 * Delete an mfc entry 872 */ 873 static int 874 del_mfc(m) 875 struct mbuf *m; 876 { 877 struct mfcctl *mfccp; 878 struct mfc *rt; 879 int s; 880 881 if (m == 0 || m->m_len < sizeof(struct mfcctl)) 882 return (EINVAL); 883 884 mfccp = mtod(m, struct mfcctl *); 885 886 if (mrtdebug & DEBUG_MFC) 887 log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x\n", 888 ntohl(mfccp->mfcc_origin.s_addr), 889 ntohl(mfccp->mfcc_mcastgrp.s_addr)); 890 891 s = splsoftnet(); 892 893 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt); 894 if (rt == 0) { 895 splx(s); 896 return (EADDRNOTAVAIL); 897 } 898 899 LIST_REMOVE(rt, mfc_hash); 900 free(rt, M_MRTABLE); 901 902 splx(s); 903 return (0); 904 } 905 906 static int 907 socket_send(s, mm, src) 908 struct socket *s; 909 struct mbuf *mm; 910 struct sockaddr_in *src; 911 { 912 if (s) { 913 if (sbappendaddr(&s->so_rcv, sintosa(src), mm, (struct mbuf *)0) != 0) { 914 sorwakeup(s); 915 return (0); 916 } 917 } 918 m_freem(mm); 919 return (-1); 920 } 921 922 /* 923 * IP multicast forwarding function. This function assumes that the packet 924 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 925 * pointed to by "ifp", and the packet is to be relayed to other networks 926 * that have members of the packet's destination IP multicast group. 927 * 928 * The packet is returned unscathed to the caller, unless it is 929 * erroneous, in which case a non-zero return value tells the caller to 930 * discard it. 931 */ 932 933 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 934 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 935 936 int 937 #ifdef RSVP_ISI 938 ip_mforward(m, ifp, imo) 939 #else 940 ip_mforward(m, ifp) 941 #endif /* RSVP_ISI */ 942 struct mbuf *m; 943 struct ifnet *ifp; 944 #ifdef RSVP_ISI 945 struct ip_moptions *imo; 946 #endif /* RSVP_ISI */ 947 { 948 register struct ip *ip = mtod(m, struct ip *); 949 register struct mfc *rt; 950 register u_char *ipoptions; 951 static int srctun = 0; 952 register struct mbuf *mm; 953 int s; 954 #ifdef RSVP_ISI 955 register struct vif *vifp; 956 vifi_t vifi; 957 #endif /* RSVP_ISI */ 958 959 if (mrtdebug & DEBUG_FORWARD) 960 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n", 961 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); 962 963 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 964 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) { 965 /* 966 * Packet arrived via a physical interface or 967 * an encapuslated tunnel. 968 */ 969 } else { 970 /* 971 * Packet arrived through a source-route tunnel. 972 * Source-route tunnels are no longer supported. 973 */ 974 if ((srctun++ % 1000) == 0) 975 log(LOG_ERR, "ip_mforward: received source-routed packet from %x\n", 976 ntohl(ip->ip_src.s_addr)); 977 978 return (1); 979 } 980 981 #ifdef RSVP_ISI 982 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) { 983 if (ip->ip_ttl < 255) 984 ip->ip_ttl++; /* compensate for -1 in *_send routines */ 985 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 986 vifp = viftable + vifi; 987 printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n", 988 ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi, 989 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", 990 vifp->v_ifp->if_xname); 991 } 992 return (ip_mdq(m, ifp, (struct mfc *)0, vifi)); 993 } 994 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 995 printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", 996 ntohl(ip->ip_src), ntohl(ip->ip_dst)); 997 } 998 #endif /* RSVP_ISI */ 999 1000 /* 1001 * Don't forward a packet with time-to-live of zero or one, 1002 * or a packet destined to a local-only group. 1003 */ 1004 if (ip->ip_ttl <= 1 || 1005 IN_LOCAL_GROUP(ip->ip_dst.s_addr)) 1006 return (0); 1007 1008 /* 1009 * Determine forwarding vifs from the forwarding cache table 1010 */ 1011 s = splsoftnet(); 1012 MFCFIND(ip->ip_src, ip->ip_dst, rt); 1013 1014 /* Entry exists, so forward if necessary */ 1015 if (rt != 0) { 1016 splx(s); 1017 #ifdef RSVP_ISI 1018 return (ip_mdq(m, ifp, rt, -1)); 1019 #else 1020 return (ip_mdq(m, ifp, rt)); 1021 #endif /* RSVP_ISI */ 1022 } else { 1023 /* 1024 * If we don't have a route for packet's origin, 1025 * Make a copy of the packet & 1026 * send message to routing daemon 1027 */ 1028 1029 register struct mbuf *mb0; 1030 register struct rtdetq *rte; 1031 register u_int32_t hash; 1032 int hlen = ip->ip_hl << 2; 1033 #ifdef UPCALL_TIMING 1034 struct timeval tp; 1035 1036 microtime(&tp); 1037 #endif /* UPCALL_TIMING */ 1038 1039 mrtstat.mrts_no_route++; 1040 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1041 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n", 1042 ntohl(ip->ip_src.s_addr), 1043 ntohl(ip->ip_dst.s_addr)); 1044 1045 /* 1046 * Allocate mbufs early so that we don't do extra work if we are 1047 * just going to fail anyway. Make sure to pullup the header so 1048 * that other people can't step on it. 1049 */ 1050 rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT); 1051 if (rte == 0) { 1052 splx(s); 1053 return (ENOBUFS); 1054 } 1055 mb0 = m_copy(m, 0, M_COPYALL); 1056 M_PULLUP(mb0, hlen); 1057 if (mb0 == 0) { 1058 free(rte, M_MRTABLE); 1059 splx(s); 1060 return (ENOBUFS); 1061 } 1062 1063 /* is there an upcall waiting for this packet? */ 1064 hash = MFCHASH(ip->ip_src, ip->ip_dst); 1065 for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) { 1066 if (in_hosteq(ip->ip_src, rt->mfc_origin) && 1067 in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && 1068 rt->mfc_stall != 0) 1069 break; 1070 } 1071 1072 if (rt == 0) { 1073 int i; 1074 struct igmpmsg *im; 1075 1076 /* no upcall, so make a new entry */ 1077 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1078 if (rt == 0) { 1079 free(rte, M_MRTABLE); 1080 m_freem(mb0); 1081 splx(s); 1082 return (ENOBUFS); 1083 } 1084 /* Make a copy of the header to send to the user level process */ 1085 mm = m_copy(m, 0, hlen); 1086 M_PULLUP(mm, hlen); 1087 if (mm == 0) { 1088 free(rte, M_MRTABLE); 1089 m_freem(mb0); 1090 free(rt, M_MRTABLE); 1091 splx(s); 1092 return (ENOBUFS); 1093 } 1094 1095 /* 1096 * Send message to routing daemon to install 1097 * a route into the kernel table 1098 */ 1099 sin.sin_addr = ip->ip_src; 1100 1101 im = mtod(mm, struct igmpmsg *); 1102 im->im_msgtype = IGMPMSG_NOCACHE; 1103 im->im_mbz = 0; 1104 1105 mrtstat.mrts_upcalls++; 1106 1107 if (socket_send(ip_mrouter, mm, &sin) < 0) { 1108 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); 1109 ++mrtstat.mrts_upq_sockfull; 1110 free(rte, M_MRTABLE); 1111 m_freem(mb0); 1112 free(rt, M_MRTABLE); 1113 splx(s); 1114 return (ENOBUFS); 1115 } 1116 1117 /* insert new entry at head of hash chain */ 1118 rt->mfc_origin = ip->ip_src; 1119 rt->mfc_mcastgrp = ip->ip_dst; 1120 rt->mfc_pkt_cnt = 0; 1121 rt->mfc_byte_cnt = 0; 1122 rt->mfc_wrong_if = 0; 1123 rt->mfc_expire = UPCALL_EXPIRE; 1124 nexpire[hash]++; 1125 for (i = 0; i < numvifs; i++) 1126 rt->mfc_ttls[i] = 0; 1127 rt->mfc_parent = -1; 1128 1129 /* link into table */ 1130 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); 1131 /* Add this entry to the end of the queue */ 1132 rt->mfc_stall = rte; 1133 } else { 1134 /* determine if q has overflowed */ 1135 struct rtdetq **p; 1136 register int npkts = 0; 1137 1138 for (p = &rt->mfc_stall; *p != 0; p = &(*p)->next) 1139 if (++npkts > MAX_UPQ) { 1140 mrtstat.mrts_upq_ovflw++; 1141 free(rte, M_MRTABLE); 1142 m_freem(mb0); 1143 splx(s); 1144 return (0); 1145 } 1146 1147 /* Add this entry to the end of the queue */ 1148 *p = rte; 1149 } 1150 1151 rte->next = 0; 1152 rte->m = mb0; 1153 rte->ifp = ifp; 1154 #ifdef UPCALL_TIMING 1155 rte->t = tp; 1156 #endif /* UPCALL_TIMING */ 1157 1158 1159 splx(s); 1160 1161 return (0); 1162 } 1163 } 1164 1165 1166 /*ARGSUSED*/ 1167 static void 1168 expire_upcalls(v) 1169 void *v; 1170 { 1171 int i; 1172 int s; 1173 1174 s = splsoftnet(); 1175 1176 for (i = 0; i < MFCTBLSIZ; i++) { 1177 register struct mfc *rt, *nrt; 1178 1179 if (nexpire[i] == 0) 1180 continue; 1181 1182 for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) { 1183 nrt = rt->mfc_hash.le_next; 1184 1185 if (rt->mfc_expire == 0 || 1186 --rt->mfc_expire > 0) 1187 continue; 1188 nexpire[i]--; 1189 1190 ++mrtstat.mrts_cache_cleanups; 1191 if (mrtdebug & DEBUG_EXPIRE) 1192 log(LOG_DEBUG, 1193 "expire_upcalls: expiring (%x %x)\n", 1194 ntohl(rt->mfc_origin.s_addr), 1195 ntohl(rt->mfc_mcastgrp.s_addr)); 1196 1197 expire_mfc(rt); 1198 } 1199 } 1200 1201 splx(s); 1202 timeout(expire_upcalls, (caddr_t)0, EXPIRE_TIMEOUT); 1203 } 1204 1205 /* 1206 * Packet forwarding routine once entry in the cache is made 1207 */ 1208 static int 1209 #ifdef RSVP_ISI 1210 ip_mdq(m, ifp, rt, xmt_vif) 1211 #else 1212 ip_mdq(m, ifp, rt) 1213 #endif /* RSVP_ISI */ 1214 register struct mbuf *m; 1215 register struct ifnet *ifp; 1216 register struct mfc *rt; 1217 #ifdef RSVP_ISI 1218 register vifi_t xmt_vif; 1219 #endif /* RSVP_ISI */ 1220 { 1221 register struct ip *ip = mtod(m, struct ip *); 1222 register vifi_t vifi; 1223 register struct vif *vifp; 1224 register int plen = ntohs(ip->ip_len); 1225 1226 /* 1227 * Macro to send packet on vif. Since RSVP packets don't get counted on 1228 * input, they shouldn't get counted on output, so statistics keeping is 1229 * seperate. 1230 */ 1231 #define MC_SEND(ip,vifp,m) { \ 1232 if ((vifp)->v_flags & VIFF_TUNNEL) \ 1233 encap_send((ip), (vifp), (m)); \ 1234 else \ 1235 phyint_send((ip), (vifp), (m)); \ 1236 } 1237 1238 #ifdef RSVP_ISI 1239 /* 1240 * If xmt_vif is not -1, send on only the requested vif. 1241 * 1242 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs. 1243 */ 1244 if (xmt_vif < numvifs) { 1245 MC_SEND(ip, viftable + xmt_vif, m); 1246 return (1); 1247 } 1248 #endif /* RSVP_ISI */ 1249 1250 /* 1251 * Don't forward if it didn't arrive from the parent vif for its origin. 1252 */ 1253 vifi = rt->mfc_parent; 1254 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1255 /* came in the wrong interface */ 1256 if (mrtdebug & DEBUG_FORWARD) 1257 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", 1258 ifp, vifi, viftable[vifi].v_ifp); 1259 ++mrtstat.mrts_wrong_if; 1260 ++rt->mfc_wrong_if; 1261 /* 1262 * If we are doing PIM assert processing, and we are forwarding 1263 * packets on this interface, and it is a broadcast medium 1264 * interface (and not a tunnel), send a message to the routing daemon. 1265 */ 1266 if (pim_assert && rt->mfc_ttls[vifi] && 1267 (ifp->if_flags & IFF_BROADCAST) && 1268 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 1269 struct mbuf *mm; 1270 struct igmpmsg *im; 1271 int hlen = ip->ip_hl << 2; 1272 struct timeval now; 1273 register u_int32_t delta; 1274 1275 microtime(&now); 1276 1277 TV_DELTA(rt->mfc_last_assert, now, delta); 1278 1279 if (delta > ASSERT_MSG_TIME) { 1280 mm = m_copy(m, 0, hlen); 1281 M_PULLUP(mm, hlen); 1282 if (mm == 0) { 1283 return (ENOBUFS); 1284 } 1285 1286 rt->mfc_last_assert = now; 1287 1288 im = mtod(mm, struct igmpmsg *); 1289 im->im_msgtype = IGMPMSG_WRONGVIF; 1290 im->im_mbz = 0; 1291 im->im_vif = vifi; 1292 1293 sin.sin_addr = im->im_src; 1294 1295 socket_send(ip_mrouter, mm, &sin); 1296 } 1297 } 1298 return (0); 1299 } 1300 1301 /* If I sourced this packet, it counts as output, else it was input. */ 1302 if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) { 1303 viftable[vifi].v_pkt_out++; 1304 viftable[vifi].v_bytes_out += plen; 1305 } else { 1306 viftable[vifi].v_pkt_in++; 1307 viftable[vifi].v_bytes_in += plen; 1308 } 1309 rt->mfc_pkt_cnt++; 1310 rt->mfc_byte_cnt += plen; 1311 1312 /* 1313 * For each vif, decide if a copy of the packet should be forwarded. 1314 * Forward if: 1315 * - the ttl exceeds the vif's threshold 1316 * - there are group members downstream on interface 1317 */ 1318 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1319 if ((rt->mfc_ttls[vifi] > 0) && 1320 (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1321 vifp->v_pkt_out++; 1322 vifp->v_bytes_out += plen; 1323 MC_SEND(ip, vifp, m); 1324 } 1325 1326 return (0); 1327 } 1328 1329 #ifdef RSVP_ISI 1330 /* 1331 * check if a vif number is legal/ok. This is used by ip_output, to export 1332 * numvifs there, 1333 */ 1334 int 1335 legal_vif_num(vif) 1336 int vif; 1337 { 1338 if (vif >= 0 && vif < numvifs) 1339 return (1); 1340 else 1341 return (0); 1342 } 1343 #endif /* RSVP_ISI */ 1344 1345 static void 1346 phyint_send(ip, vifp, m) 1347 struct ip *ip; 1348 struct vif *vifp; 1349 struct mbuf *m; 1350 { 1351 register struct mbuf *mb_copy; 1352 register int hlen = ip->ip_hl << 2; 1353 1354 /* 1355 * Make a new reference to the packet; make sure that 1356 * the IP header is actually copied, not just referenced, 1357 * so that ip_output() only scribbles on the copy. 1358 */ 1359 mb_copy = m_copy(m, 0, M_COPYALL); 1360 M_PULLUP(mb_copy, hlen); 1361 if (mb_copy == 0) 1362 return; 1363 1364 if (vifp->v_rate_limit <= 0) 1365 tbf_send_packet(vifp, mb_copy); 1366 else 1367 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); 1368 } 1369 1370 static void 1371 encap_send(ip, vifp, m) 1372 register struct ip *ip; 1373 register struct vif *vifp; 1374 register struct mbuf *m; 1375 { 1376 register struct mbuf *mb_copy; 1377 register struct ip *ip_copy; 1378 register int i, len = ip->ip_len + sizeof(multicast_encap_iphdr); 1379 1380 /* 1381 * copy the old packet & pullup it's IP header into the 1382 * new mbuf so we can modify it. Try to fill the new 1383 * mbuf since if we don't the ethernet driver will. 1384 */ 1385 MGETHDR(mb_copy, M_DONTWAIT, MT_DATA); 1386 if (mb_copy == 0) 1387 return; 1388 mb_copy->m_data += max_linkhdr; 1389 mb_copy->m_pkthdr.len = len; 1390 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1391 1392 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == 0) { 1393 m_freem(mb_copy); 1394 return; 1395 } 1396 i = MHLEN - max_linkhdr; 1397 if (i > len) 1398 i = len; 1399 mb_copy = m_pullup(mb_copy, i); 1400 if (mb_copy == 0) 1401 return; 1402 1403 /* 1404 * fill in the encapsulating IP header. 1405 */ 1406 ip_copy = mtod(mb_copy, struct ip *); 1407 *ip_copy = multicast_encap_iphdr; 1408 ip_copy->ip_id = htons(ip_id++); 1409 ip_copy->ip_len = len; 1410 ip_copy->ip_src = vifp->v_lcl_addr; 1411 ip_copy->ip_dst = vifp->v_rmt_addr; 1412 1413 /* 1414 * turn the encapsulated IP header back into a valid one. 1415 */ 1416 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1417 --ip->ip_ttl; 1418 HTONS(ip->ip_len); 1419 HTONS(ip->ip_off); 1420 ip->ip_sum = 0; 1421 #if defined(LBL) && !defined(ultrix) && !defined(i386) 1422 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 1423 #else 1424 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1425 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1426 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1427 #endif 1428 1429 if (vifp->v_rate_limit <= 0) 1430 tbf_send_packet(vifp, mb_copy); 1431 else 1432 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); 1433 } 1434 1435 /* 1436 * De-encapsulate a packet and feed it back through ip input (this 1437 * routine is called whenever IP gets a packet with proto type 1438 * ENCAP_PROTO and a local destination address). 1439 */ 1440 void 1441 #if __STDC__ 1442 ipip_input(struct mbuf *m, ...) 1443 #else 1444 ipip_input(m, va_alist) 1445 struct mbuf *m; 1446 va_dcl 1447 #endif 1448 { 1449 register int hlen; 1450 register struct ip *ip = mtod(m, struct ip *); 1451 register int s; 1452 register struct ifqueue *ifq; 1453 register struct vif *vifp; 1454 va_list ap; 1455 1456 va_start(ap, m); 1457 hlen = va_arg(ap, int); 1458 va_end(ap); 1459 1460 if (!have_encap_tunnel) { 1461 rip_input(m); 1462 return; 1463 } 1464 1465 /* 1466 * dump the packet if it's not to a multicast destination or if 1467 * we don't have an encapsulating tunnel with the source. 1468 * Note: This code assumes that the remote site IP address 1469 * uniquely identifies the tunnel (i.e., that this site has 1470 * at most one tunnel with the remote site). 1471 */ 1472 if (!IN_MULTICAST(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr)) { 1473 ++mrtstat.mrts_bad_tunnel; 1474 m_freem(m); 1475 return; 1476 } 1477 1478 if (!in_hosteq(ip->ip_src, last_encap_src)) { 1479 register struct vif *vife; 1480 1481 vifp = viftable; 1482 vife = vifp + numvifs; 1483 for (; vifp < vife; vifp++) 1484 if (vifp->v_flags & VIFF_TUNNEL && 1485 in_hosteq(vifp->v_rmt_addr, ip->ip_src)) 1486 break; 1487 if (vifp == vife) { 1488 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1489 m_freem(m); 1490 if (mrtdebug) 1491 log(LOG_DEBUG, "ip_mforward: no tunnel with %x\n", 1492 ntohl(ip->ip_src.s_addr)); 1493 return; 1494 } 1495 last_encap_vif = vifp; 1496 last_encap_src = ip->ip_src; 1497 } else 1498 vifp = last_encap_vif; 1499 1500 m->m_data += hlen; 1501 m->m_len -= hlen; 1502 m->m_pkthdr.len -= hlen; 1503 m->m_pkthdr.rcvif = vifp->v_ifp; 1504 ifq = &ipintrq; 1505 s = splimp(); 1506 if (IF_QFULL(ifq)) { 1507 IF_DROP(ifq); 1508 m_freem(m); 1509 } else { 1510 IF_ENQUEUE(ifq, m); 1511 /* 1512 * normally we would need a "schednetisr(NETISR_IP)" 1513 * here but we were called by ip_input and it is going 1514 * to loop back & try to dequeue the packet we just 1515 * queued as soon as we return so we avoid the 1516 * unnecessary software interrrupt. 1517 */ 1518 } 1519 splx(s); 1520 } 1521 1522 /* 1523 * Token bucket filter module 1524 */ 1525 static void 1526 tbf_control(vifp, m, ip, len) 1527 register struct vif *vifp; 1528 register struct mbuf *m; 1529 register struct ip *ip; 1530 register u_int32_t len; 1531 { 1532 1533 if (len > MAX_BKT_SIZE) { 1534 /* drop if packet is too large */ 1535 mrtstat.mrts_pkt2large++; 1536 m_freem(m); 1537 return; 1538 } 1539 1540 tbf_update_tokens(vifp); 1541 1542 /* 1543 * If there are enough tokens, and the queue is empty, send this packet 1544 * out immediately. Otherwise, try to insert it on this vif's queue. 1545 */ 1546 if (vifp->tbf_q_len == 0) { 1547 if (len <= vifp->tbf_n_tok) { 1548 vifp->tbf_n_tok -= len; 1549 tbf_send_packet(vifp, m); 1550 } else { 1551 /* queue packet and timeout till later */ 1552 tbf_queue(vifp, m); 1553 timeout(tbf_reprocess_q, vifp, TBF_REPROCESS); 1554 } 1555 } else { 1556 if (vifp->tbf_q_len >= vifp->tbf_max_q_len && 1557 !tbf_dq_sel(vifp, ip)) { 1558 /* queue length too much, and couldn't make room */ 1559 mrtstat.mrts_q_overflow++; 1560 m_freem(m); 1561 } else { 1562 /* queue length low enough, or made room */ 1563 tbf_queue(vifp, m); 1564 tbf_process_q(vifp); 1565 } 1566 } 1567 } 1568 1569 /* 1570 * adds a packet to the queue at the interface 1571 */ 1572 static void 1573 tbf_queue(vifp, m) 1574 register struct vif *vifp; 1575 register struct mbuf *m; 1576 { 1577 register int s = splsoftnet(); 1578 1579 /* insert at tail */ 1580 *vifp->tbf_t = m; 1581 vifp->tbf_t = &m->m_nextpkt; 1582 vifp->tbf_q_len++; 1583 1584 splx(s); 1585 } 1586 1587 1588 /* 1589 * processes the queue at the interface 1590 */ 1591 static void 1592 tbf_process_q(vifp) 1593 register struct vif *vifp; 1594 { 1595 register struct mbuf *m; 1596 register int len; 1597 register int s = splsoftnet(); 1598 1599 /* 1600 * Loop through the queue at the interface and send as many packets 1601 * as possible. 1602 */ 1603 for (m = vifp->tbf_q; 1604 m != 0; 1605 m = vifp->tbf_q) { 1606 len = mtod(m, struct ip *)->ip_len; 1607 1608 /* determine if the packet can be sent */ 1609 if (len <= vifp->tbf_n_tok) { 1610 /* if so, 1611 * reduce no of tokens, dequeue the packet, 1612 * send the packet. 1613 */ 1614 if ((vifp->tbf_q = m->m_nextpkt) == 0) 1615 vifp->tbf_t = &vifp->tbf_q; 1616 --vifp->tbf_q_len; 1617 1618 m->m_nextpkt = 0; 1619 vifp->tbf_n_tok -= len; 1620 tbf_send_packet(vifp, m); 1621 } else 1622 break; 1623 } 1624 splx(s); 1625 } 1626 1627 static void 1628 tbf_reprocess_q(arg) 1629 void *arg; 1630 { 1631 register struct vif *vifp = arg; 1632 1633 if (ip_mrouter == 0) 1634 return; 1635 1636 tbf_update_tokens(vifp); 1637 tbf_process_q(vifp); 1638 1639 if (vifp->tbf_q_len != 0) 1640 timeout(tbf_reprocess_q, vifp, TBF_REPROCESS); 1641 } 1642 1643 /* function that will selectively discard a member of the queue 1644 * based on the precedence value and the priority 1645 */ 1646 static int 1647 tbf_dq_sel(vifp, ip) 1648 register struct vif *vifp; 1649 register struct ip *ip; 1650 { 1651 register u_int p; 1652 register struct mbuf **mp, *m; 1653 register int s = splsoftnet(); 1654 1655 p = priority(vifp, ip); 1656 1657 for (mp = &vifp->tbf_q, m = *mp; 1658 m != 0; 1659 mp = &m->m_nextpkt, m = *mp) { 1660 if (p > priority(vifp, mtod(m, struct ip *))) { 1661 if ((*mp = m->m_nextpkt) == 0) 1662 vifp->tbf_t = mp; 1663 --vifp->tbf_q_len; 1664 1665 m_freem(m); 1666 mrtstat.mrts_drop_sel++; 1667 splx(s); 1668 return (1); 1669 } 1670 } 1671 splx(s); 1672 return (0); 1673 } 1674 1675 static void 1676 tbf_send_packet(vifp, m) 1677 register struct vif *vifp; 1678 register struct mbuf *m; 1679 { 1680 int error; 1681 int s = splsoftnet(); 1682 1683 if (vifp->v_flags & VIFF_TUNNEL) { 1684 /* If tunnel options */ 1685 ip_output(m, (struct mbuf *)0, &vifp->v_route, 1686 IP_FORWARDING, (struct ip_moptions *)0); 1687 } else { 1688 /* if physical interface option, extract the options and then send */ 1689 struct ip_moptions imo; 1690 1691 imo.imo_multicast_ifp = vifp->v_ifp; 1692 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 1693 imo.imo_multicast_loop = 1; 1694 #ifdef RSVP_ISI 1695 imo.imo_multicast_vif = -1; 1696 #endif 1697 1698 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1699 IP_FORWARDING|IP_MULTICASTOPTS, &imo); 1700 1701 if (mrtdebug & DEBUG_XMIT) 1702 log(LOG_DEBUG, "phyint_send on vif %d err %d\n", 1703 vifp-viftable, error); 1704 } 1705 splx(s); 1706 } 1707 1708 /* determine the current time and then 1709 * the elapsed time (between the last time and time now) 1710 * in milliseconds & update the no. of tokens in the bucket 1711 */ 1712 static void 1713 tbf_update_tokens(vifp) 1714 register struct vif *vifp; 1715 { 1716 struct timeval tp; 1717 register u_int32_t tm; 1718 register int s = splsoftnet(); 1719 1720 microtime(&tp); 1721 1722 TV_DELTA(tp, vifp->tbf_last_pkt_t, tm); 1723 1724 /* 1725 * This formula is actually 1726 * "time in seconds" * "bytes/second". 1727 * 1728 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) 1729 * 1730 * The (1000/1024) was introduced in add_vif to optimize 1731 * this divide into a shift. 1732 */ 1733 vifp->tbf_n_tok += tm * vifp->v_rate_limit / 8192; 1734 vifp->tbf_last_pkt_t = tp; 1735 1736 if (vifp->tbf_n_tok > MAX_BKT_SIZE) 1737 vifp->tbf_n_tok = MAX_BKT_SIZE; 1738 1739 splx(s); 1740 } 1741 1742 static int 1743 priority(vifp, ip) 1744 register struct vif *vifp; 1745 register struct ip *ip; 1746 { 1747 register int prio; 1748 1749 /* temporary hack; may add general packet classifier some day */ 1750 1751 /* 1752 * The UDP port space is divided up into four priority ranges: 1753 * [0, 16384) : unclassified - lowest priority 1754 * [16384, 32768) : audio - highest priority 1755 * [32768, 49152) : whiteboard - medium priority 1756 * [49152, 65536) : video - low priority 1757 */ 1758 if (ip->ip_p == IPPROTO_UDP) { 1759 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 1760 1761 switch (ntohs(udp->uh_dport) & 0xc000) { 1762 case 0x4000: 1763 prio = 70; 1764 break; 1765 case 0x8000: 1766 prio = 60; 1767 break; 1768 case 0xc000: 1769 prio = 55; 1770 break; 1771 default: 1772 prio = 50; 1773 break; 1774 } 1775 1776 if (tbfdebug > 1) 1777 log(LOG_DEBUG, "port %x prio %d\n", ntohs(udp->uh_dport), prio); 1778 } else 1779 prio = 50; 1780 1781 1782 return (prio); 1783 } 1784 1785 /* 1786 * End of token bucket filter modifications 1787 */ 1788 1789 #ifdef RSVP_ISI 1790 1791 int 1792 ip_rsvp_vif_init(so, m) 1793 struct socket *so; 1794 struct mbuf *m; 1795 { 1796 int i; 1797 register int s; 1798 1799 if (rsvpdebug) 1800 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", 1801 so->so_type, so->so_proto->pr_protocol); 1802 1803 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1804 return (EOPNOTSUPP); 1805 1806 /* Check mbuf. */ 1807 if (m == 0 || m->m_len != sizeof(int)) { 1808 return (EINVAL); 1809 } 1810 i = *(mtod(m, int *)); 1811 1812 if (rsvpdebug) 1813 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on); 1814 1815 s = splsoftnet(); 1816 1817 /* Check vif. */ 1818 if (!legal_vif_num(i)) { 1819 splx(s); 1820 return (EADDRNOTAVAIL); 1821 } 1822 1823 /* Check if socket is available. */ 1824 if (viftable[i].v_rsvpd != 0) { 1825 splx(s); 1826 return (EADDRINUSE); 1827 } 1828 1829 viftable[i].v_rsvpd = so; 1830 /* This may seem silly, but we need to be sure we don't over-increment 1831 * the RSVP counter, in case something slips up. 1832 */ 1833 if (!viftable[i].v_rsvp_on) { 1834 viftable[i].v_rsvp_on = 1; 1835 rsvp_on++; 1836 } 1837 1838 splx(s); 1839 return (0); 1840 } 1841 1842 int 1843 ip_rsvp_vif_done(so, m) 1844 struct socket *so; 1845 struct mbuf *m; 1846 { 1847 int i; 1848 register int s; 1849 1850 if (rsvpdebug) 1851 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", 1852 so->so_type, so->so_proto->pr_protocol); 1853 1854 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1855 return (EOPNOTSUPP); 1856 1857 /* Check mbuf. */ 1858 if (m == 0 || m->m_len != sizeof(int)) { 1859 return (EINVAL); 1860 } 1861 i = *(mtod(m, int *)); 1862 1863 s = splsoftnet(); 1864 1865 /* Check vif. */ 1866 if (!legal_vif_num(i)) { 1867 splx(s); 1868 return (EADDRNOTAVAIL); 1869 } 1870 1871 if (rsvpdebug) 1872 printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n", 1873 viftable[i].v_rsvpd, so); 1874 1875 viftable[i].v_rsvpd = 0; 1876 /* This may seem silly, but we need to be sure we don't over-decrement 1877 * the RSVP counter, in case something slips up. 1878 */ 1879 if (viftable[i].v_rsvp_on) { 1880 viftable[i].v_rsvp_on = 0; 1881 rsvp_on--; 1882 } 1883 1884 splx(s); 1885 return (0); 1886 } 1887 1888 void 1889 ip_rsvp_force_done(so) 1890 struct socket *so; 1891 { 1892 int vifi; 1893 register int s; 1894 1895 /* Don't bother if it is not the right type of socket. */ 1896 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1897 return; 1898 1899 s = splsoftnet(); 1900 1901 /* The socket may be attached to more than one vif...this 1902 * is perfectly legal. 1903 */ 1904 for (vifi = 0; vifi < numvifs; vifi++) { 1905 if (viftable[vifi].v_rsvpd == so) { 1906 viftable[vifi].v_rsvpd = 0; 1907 /* This may seem silly, but we need to be sure we don't 1908 * over-decrement the RSVP counter, in case something slips up. 1909 */ 1910 if (viftable[vifi].v_rsvp_on) { 1911 viftable[vifi].v_rsvp_on = 0; 1912 rsvp_on--; 1913 } 1914 } 1915 } 1916 1917 splx(s); 1918 return; 1919 } 1920 1921 void 1922 rsvp_input(m, ifp) 1923 struct mbuf *m; 1924 struct ifnet *ifp; 1925 { 1926 int vifi; 1927 register struct ip *ip = mtod(m, struct ip *); 1928 static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET }; 1929 register int s; 1930 1931 if (rsvpdebug) 1932 printf("rsvp_input: rsvp_on %d\n",rsvp_on); 1933 1934 /* Can still get packets with rsvp_on = 0 if there is a local member 1935 * of the group to which the RSVP packet is addressed. But in this 1936 * case we want to throw the packet away. 1937 */ 1938 if (!rsvp_on) { 1939 m_freem(m); 1940 return; 1941 } 1942 1943 /* If the old-style non-vif-associated socket is set, then use 1944 * it and ignore the new ones. 1945 */ 1946 if (ip_rsvpd != 0) { 1947 if (rsvpdebug) 1948 printf("rsvp_input: Sending packet up old-style socket\n"); 1949 rip_input(m); 1950 return; 1951 } 1952 1953 s = splsoftnet(); 1954 1955 if (rsvpdebug) 1956 printf("rsvp_input: check vifs\n"); 1957 1958 /* Find which vif the packet arrived on. */ 1959 for (vifi = 0; vifi < numvifs; vifi++) { 1960 if (viftable[vifi].v_ifp == ifp) 1961 break; 1962 } 1963 1964 if (vifi == numvifs) { 1965 /* Can't find vif packet arrived on. Drop packet. */ 1966 if (rsvpdebug) 1967 printf("rsvp_input: Can't find vif for packet...dropping it.\n"); 1968 m_freem(m); 1969 splx(s); 1970 return; 1971 } 1972 1973 if (rsvpdebug) 1974 printf("rsvp_input: check socket\n"); 1975 1976 if (viftable[vifi].v_rsvpd == 0) { 1977 /* drop packet, since there is no specific socket for this 1978 * interface */ 1979 if (rsvpdebug) 1980 printf("rsvp_input: No socket defined for vif %d\n",vifi); 1981 m_freem(m); 1982 splx(s); 1983 return; 1984 } 1985 1986 rsvp_src.sin_addr = ip->ip_src; 1987 1988 if (rsvpdebug && m) 1989 printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", 1990 m->m_len,sbspace(&viftable[vifi].v_rsvpd->so_rcv)); 1991 1992 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) 1993 if (rsvpdebug) 1994 printf("rsvp_input: Failed to append to socket\n"); 1995 else 1996 if (rsvpdebug) 1997 printf("rsvp_input: send packet up\n"); 1998 1999 splx(s); 2000 } 2001 #endif /* RSVP_ISI */ 2002