1 /* $NetBSD: ip_mroute.c,v 1.56 2001/07/22 13:34:11 wiz Exp $ */ 2 3 /* 4 * IP multicast forwarding procedures 5 * 6 * Written by David Waitzman, BBN Labs, August 1988. 7 * Modified by Steve Deering, Stanford, February 1989. 8 * Modified by Mark J. Steiglitz, Stanford, May, 1991 9 * Modified by Van Jacobson, LBL, January 1993 10 * Modified by Ajit Thyagarajan, PARC, August 1993 11 * Modified by Bill Fenner, PARC, April 1994 12 * Modified by Charles M. Hannum, NetBSD, May 1995. 13 * 14 * MROUTING Revision: 1.2 15 */ 16 17 #include "opt_ipsec.h" 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/callout.h> 22 #include <sys/mbuf.h> 23 #include <sys/socket.h> 24 #include <sys/socketvar.h> 25 #include <sys/protosw.h> 26 #include <sys/errno.h> 27 #include <sys/time.h> 28 #include <sys/kernel.h> 29 #include <sys/ioctl.h> 30 #include <sys/syslog.h> 31 #include <net/if.h> 32 #include <net/route.h> 33 #include <net/raw_cb.h> 34 #include <netinet/in.h> 35 #include <netinet/in_var.h> 36 #include <netinet/in_systm.h> 37 #include <netinet/ip.h> 38 #include <netinet/ip_var.h> 39 #include <netinet/in_pcb.h> 40 #include <netinet/udp.h> 41 #include <netinet/igmp.h> 42 #include <netinet/igmp_var.h> 43 #include <netinet/ip_mroute.h> 44 #include <netinet/ip_encap.h> 45 46 #include <machine/stdarg.h> 47 48 #define IP_MULTICASTOPTS 0 49 #define M_PULLUP(m, len) \ 50 do { \ 51 if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \ 52 (m) = m_pullup((m), (len)); \ 53 } while (0) 54 55 /* 56 * Globals. All but ip_mrouter and ip_mrtproto could be static, 57 * except for netstat or debugging purposes. 58 */ 59 struct socket *ip_mrouter = 0; 60 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 61 62 #define NO_RTE_FOUND 0x1 63 #define RTE_FOUND 0x2 64 65 #define MFCHASH(a, g) \ 66 ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \ 67 ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash) 68 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl; 69 u_long mfchash; 70 71 u_char nexpire[MFCTBLSIZ]; 72 struct vif viftable[MAXVIFS]; 73 struct mrtstat mrtstat; 74 u_int mrtdebug = 0; /* debug level */ 75 #define DEBUG_MFC 0x02 76 #define DEBUG_FORWARD 0x04 77 #define DEBUG_EXPIRE 0x08 78 #define DEBUG_XMIT 0x10 79 u_int tbfdebug = 0; /* tbf debug level */ 80 #ifdef RSVP_ISI 81 u_int rsvpdebug = 0; /* rsvp debug level */ 82 extern struct socket *ip_rsvpd; 83 extern int rsvp_on; 84 #endif /* RSVP_ISI */ 85 86 /* vif attachment using sys/netinet/ip_encap.c */ 87 extern struct domain inetdomain; 88 static void vif_input __P((struct mbuf *, ...)); 89 static int vif_encapcheck __P((const struct mbuf *, int, int, void *)); 90 static struct protosw vif_protosw = 91 { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, 92 vif_input, rip_output, 0, rip_ctloutput, 93 rip_usrreq, 94 0, 0, 0, 0, 95 }; 96 97 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ 98 #define UPCALL_EXPIRE 6 /* number of timeouts */ 99 100 /* 101 * Define the token bucket filter structures 102 */ 103 104 #define TBF_REPROCESS (hz / 100) /* 100x / second */ 105 106 static int get_sg_cnt __P((struct sioc_sg_req *)); 107 static int get_vif_cnt __P((struct sioc_vif_req *)); 108 static int ip_mrouter_init __P((struct socket *, struct mbuf *)); 109 static int get_version __P((struct mbuf *)); 110 static int set_assert __P((struct mbuf *)); 111 static int get_assert __P((struct mbuf *)); 112 static int add_vif __P((struct mbuf *)); 113 static int del_vif __P((struct mbuf *)); 114 static void update_mfc __P((struct mfcctl *, struct mfc *)); 115 static void expire_mfc __P((struct mfc *)); 116 static int add_mfc __P((struct mbuf *)); 117 #ifdef UPCALL_TIMING 118 static void collate __P((struct timeval *)); 119 #endif 120 static int del_mfc __P((struct mbuf *)); 121 static int socket_send __P((struct socket *, struct mbuf *, 122 struct sockaddr_in *)); 123 static void expire_upcalls __P((void *)); 124 #ifdef RSVP_ISI 125 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t)); 126 #else 127 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *)); 128 #endif 129 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *)); 130 static void encap_send __P((struct ip *, struct vif *, struct mbuf *)); 131 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *, 132 u_int32_t)); 133 static void tbf_queue __P((struct vif *, struct mbuf *)); 134 static void tbf_process_q __P((struct vif *)); 135 static void tbf_reprocess_q __P((void *)); 136 static int tbf_dq_sel __P((struct vif *, struct ip *)); 137 static void tbf_send_packet __P((struct vif *, struct mbuf *)); 138 static void tbf_update_tokens __P((struct vif *)); 139 static int priority __P((struct vif *, struct ip *)); 140 141 /* 142 * 'Interfaces' associated with decapsulator (so we can tell 143 * packets that went through it from ones that get reflected 144 * by a broken gateway). These interfaces are never linked into 145 * the system ifnet list & no routes point to them. I.e., packets 146 * can't be sent this way. They only exist as a placeholder for 147 * multicast source verification. 148 */ 149 #if 0 150 struct ifnet multicast_decap_if[MAXVIFS]; 151 #endif 152 153 #define ENCAP_TTL 64 154 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */ 155 156 /* prototype IP hdr for encapsulated packets */ 157 struct ip multicast_encap_iphdr = { 158 #if BYTE_ORDER == LITTLE_ENDIAN 159 sizeof(struct ip) >> 2, IPVERSION, 160 #else 161 IPVERSION, sizeof(struct ip) >> 2, 162 #endif 163 0, /* tos */ 164 sizeof(struct ip), /* total length */ 165 0, /* id */ 166 0, /* frag offset */ 167 ENCAP_TTL, ENCAP_PROTO, 168 0, /* checksum */ 169 }; 170 171 /* 172 * Private variables. 173 */ 174 static vifi_t numvifs = 0; 175 static int have_encap_tunnel = 0; 176 177 static struct callout expire_upcalls_ch; 178 179 /* 180 * one-back cache used by mrt_ipip_input to locate a tunnel's vif 181 * given a datagram's src ip address. 182 */ 183 static struct in_addr last_encap_src; 184 static struct vif *last_encap_vif; 185 186 /* 187 * whether or not special PIM assert processing is enabled. 188 */ 189 static int pim_assert; 190 /* 191 * Rate limit for assert notification messages, in usec 192 */ 193 #define ASSERT_MSG_TIME 3000000 194 195 /* 196 * Find a route for a given origin IP address and Multicast group address 197 * Type of service parameter to be added in the future!!! 198 */ 199 200 #define MFCFIND(o, g, rt) { \ 201 struct mfc *_rt; \ 202 (rt) = 0; \ 203 ++mrtstat.mrts_mfc_lookups; \ 204 for (_rt = mfchashtbl[MFCHASH(o, g)].lh_first; \ 205 _rt; _rt = _rt->mfc_hash.le_next) { \ 206 if (in_hosteq(_rt->mfc_origin, (o)) && \ 207 in_hosteq(_rt->mfc_mcastgrp, (g)) && \ 208 _rt->mfc_stall == 0) { \ 209 (rt) = _rt; \ 210 break; \ 211 } \ 212 } \ 213 if ((rt) == 0) \ 214 ++mrtstat.mrts_mfc_misses; \ 215 } 216 217 /* 218 * Macros to compute elapsed time efficiently 219 * Borrowed from Van Jacobson's scheduling code 220 */ 221 #define TV_DELTA(a, b, delta) { \ 222 int xxs; \ 223 delta = (a).tv_usec - (b).tv_usec; \ 224 xxs = (a).tv_sec - (b).tv_sec; \ 225 switch (xxs) { \ 226 case 2: \ 227 delta += 1000000; \ 228 /* fall through */ \ 229 case 1: \ 230 delta += 1000000; \ 231 /* fall through */ \ 232 case 0: \ 233 break; \ 234 default: \ 235 delta += (1000000 * xxs); \ 236 break; \ 237 } \ 238 } 239 240 #ifdef UPCALL_TIMING 241 u_int32_t upcall_data[51]; 242 #endif /* UPCALL_TIMING */ 243 244 /* 245 * Handle MRT setsockopt commands to modify the multicast routing tables. 246 */ 247 int 248 ip_mrouter_set(so, optname, m) 249 struct socket *so; 250 int optname; 251 struct mbuf **m; 252 { 253 int error; 254 255 if (optname != MRT_INIT && so != ip_mrouter) 256 error = ENOPROTOOPT; 257 else 258 switch (optname) { 259 case MRT_INIT: 260 error = ip_mrouter_init(so, *m); 261 break; 262 case MRT_DONE: 263 error = ip_mrouter_done(); 264 break; 265 case MRT_ADD_VIF: 266 error = add_vif(*m); 267 break; 268 case MRT_DEL_VIF: 269 error = del_vif(*m); 270 break; 271 case MRT_ADD_MFC: 272 error = add_mfc(*m); 273 break; 274 case MRT_DEL_MFC: 275 error = del_mfc(*m); 276 break; 277 case MRT_ASSERT: 278 error = set_assert(*m); 279 break; 280 default: 281 error = ENOPROTOOPT; 282 break; 283 } 284 285 if (*m) 286 m_free(*m); 287 return (error); 288 } 289 290 /* 291 * Handle MRT getsockopt commands 292 */ 293 int 294 ip_mrouter_get(so, optname, m) 295 struct socket *so; 296 int optname; 297 struct mbuf **m; 298 { 299 int error; 300 301 if (so != ip_mrouter) 302 error = ENOPROTOOPT; 303 else { 304 *m = m_get(M_WAIT, MT_SOOPTS); 305 306 switch (optname) { 307 case MRT_VERSION: 308 error = get_version(*m); 309 break; 310 case MRT_ASSERT: 311 error = get_assert(*m); 312 break; 313 default: 314 error = ENOPROTOOPT; 315 break; 316 } 317 318 if (error) 319 m_free(*m); 320 } 321 322 return (error); 323 } 324 325 /* 326 * Handle ioctl commands to obtain information from the cache 327 */ 328 int 329 mrt_ioctl(so, cmd, data) 330 struct socket *so; 331 u_long cmd; 332 caddr_t data; 333 { 334 int error; 335 336 if (so != ip_mrouter) 337 error = EINVAL; 338 else 339 switch (cmd) { 340 case SIOCGETVIFCNT: 341 error = get_vif_cnt((struct sioc_vif_req *)data); 342 break; 343 case SIOCGETSGCNT: 344 error = get_sg_cnt((struct sioc_sg_req *)data); 345 break; 346 default: 347 error = EINVAL; 348 break; 349 } 350 351 return (error); 352 } 353 354 /* 355 * returns the packet, byte, rpf-failure count for the source group provided 356 */ 357 static int 358 get_sg_cnt(req) 359 struct sioc_sg_req *req; 360 { 361 struct mfc *rt; 362 int s; 363 364 s = splsoftnet(); 365 MFCFIND(req->src, req->grp, rt); 366 splx(s); 367 if (rt != 0) { 368 req->pktcnt = rt->mfc_pkt_cnt; 369 req->bytecnt = rt->mfc_byte_cnt; 370 req->wrong_if = rt->mfc_wrong_if; 371 } else 372 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; 373 374 return (0); 375 } 376 377 /* 378 * returns the input and output packet and byte counts on the vif provided 379 */ 380 static int 381 get_vif_cnt(req) 382 struct sioc_vif_req *req; 383 { 384 vifi_t vifi = req->vifi; 385 386 if (vifi >= numvifs) 387 return (EINVAL); 388 389 req->icount = viftable[vifi].v_pkt_in; 390 req->ocount = viftable[vifi].v_pkt_out; 391 req->ibytes = viftable[vifi].v_bytes_in; 392 req->obytes = viftable[vifi].v_bytes_out; 393 394 return (0); 395 } 396 397 /* 398 * Enable multicast routing 399 */ 400 static int 401 ip_mrouter_init(so, m) 402 struct socket *so; 403 struct mbuf *m; 404 { 405 int *v; 406 407 if (mrtdebug) 408 log(LOG_DEBUG, 409 "ip_mrouter_init: so_type = %d, pr_protocol = %d\n", 410 so->so_type, so->so_proto->pr_protocol); 411 412 if (so->so_type != SOCK_RAW || 413 so->so_proto->pr_protocol != IPPROTO_IGMP) 414 return (EOPNOTSUPP); 415 416 if (m == 0 || m->m_len < sizeof(int)) 417 return (EINVAL); 418 419 v = mtod(m, int *); 420 if (*v != 1) 421 return (EINVAL); 422 423 if (ip_mrouter != 0) 424 return (EADDRINUSE); 425 426 ip_mrouter = so; 427 428 mfchashtbl = 429 hashinit(MFCTBLSIZ, HASH_LIST, M_MRTABLE, M_WAITOK, &mfchash); 430 bzero((caddr_t)nexpire, sizeof(nexpire)); 431 432 pim_assert = 0; 433 434 callout_init(&expire_upcalls_ch); 435 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, 436 expire_upcalls, NULL); 437 438 if (mrtdebug) 439 log(LOG_DEBUG, "ip_mrouter_init\n"); 440 441 return (0); 442 } 443 444 /* 445 * Disable multicast routing 446 */ 447 int 448 ip_mrouter_done() 449 { 450 vifi_t vifi; 451 struct vif *vifp; 452 int i; 453 int s; 454 455 s = splsoftnet(); 456 457 /* Clear out all the vifs currently in use. */ 458 for (vifi = 0; vifi < numvifs; vifi++) { 459 vifp = &viftable[vifi]; 460 if (!in_nullhost(vifp->v_lcl_addr)) 461 reset_vif(vifp); 462 } 463 464 numvifs = 0; 465 pim_assert = 0; 466 467 callout_stop(&expire_upcalls_ch); 468 469 /* 470 * Free all multicast forwarding cache entries. 471 */ 472 for (i = 0; i < MFCTBLSIZ; i++) { 473 struct mfc *rt, *nrt; 474 475 for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) { 476 nrt = rt->mfc_hash.le_next; 477 478 expire_mfc(rt); 479 } 480 } 481 482 free(mfchashtbl, M_MRTABLE); 483 mfchashtbl = 0; 484 485 /* Reset de-encapsulation cache. */ 486 have_encap_tunnel = 0; 487 488 ip_mrouter = 0; 489 490 splx(s); 491 492 if (mrtdebug) 493 log(LOG_DEBUG, "ip_mrouter_done\n"); 494 495 return (0); 496 } 497 498 static int 499 get_version(m) 500 struct mbuf *m; 501 { 502 int *v = mtod(m, int *); 503 504 *v = 0x0305; /* XXX !!!! */ 505 m->m_len = sizeof(int); 506 return (0); 507 } 508 509 /* 510 * Set PIM assert processing global 511 */ 512 static int 513 set_assert(m) 514 struct mbuf *m; 515 { 516 int *i; 517 518 if (m == 0 || m->m_len < sizeof(int)) 519 return (EINVAL); 520 521 i = mtod(m, int *); 522 pim_assert = !!*i; 523 return (0); 524 } 525 526 /* 527 * Get PIM assert processing global 528 */ 529 static int 530 get_assert(m) 531 struct mbuf *m; 532 { 533 int *i = mtod(m, int *); 534 535 *i = pim_assert; 536 m->m_len = sizeof(int); 537 return (0); 538 } 539 540 static struct sockaddr_in sin = { sizeof(sin), AF_INET }; 541 542 /* 543 * Add a vif to the vif table 544 */ 545 static int 546 add_vif(m) 547 struct mbuf *m; 548 { 549 struct vifctl *vifcp; 550 struct vif *vifp; 551 struct ifaddr *ifa; 552 struct ifnet *ifp; 553 struct ifreq ifr; 554 int error, s; 555 556 if (m == 0 || m->m_len < sizeof(struct vifctl)) 557 return (EINVAL); 558 559 vifcp = mtod(m, struct vifctl *); 560 if (vifcp->vifc_vifi >= MAXVIFS) 561 return (EINVAL); 562 563 vifp = &viftable[vifcp->vifc_vifi]; 564 if (!in_nullhost(vifp->v_lcl_addr)) 565 return (EADDRINUSE); 566 567 /* Find the interface with an address in AF_INET family. */ 568 sin.sin_addr = vifcp->vifc_lcl_addr; 569 ifa = ifa_ifwithaddr(sintosa(&sin)); 570 if (ifa == 0) 571 return (EADDRNOTAVAIL); 572 573 if (vifcp->vifc_flags & VIFF_TUNNEL) { 574 if (vifcp->vifc_flags & VIFF_SRCRT) { 575 log(LOG_ERR, "Source routed tunnels not supported\n"); 576 return (EOPNOTSUPP); 577 } 578 579 /* attach this vif to decapsulator dispatch table */ 580 vifp->v_encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4, 581 vif_encapcheck, &vif_protosw, vifp); 582 if (!vifp->v_encap_cookie) 583 return (EINVAL); 584 585 /* Create a fake encapsulation interface. */ 586 ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK); 587 bzero(ifp, sizeof(*ifp)); 588 sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi); 589 590 /* Prepare cached route entry. */ 591 bzero(&vifp->v_route, sizeof(vifp->v_route)); 592 593 /* 594 * Tell mrt_ipip_input() to start looking at encapsulated 595 * packets. 596 */ 597 have_encap_tunnel = 1; 598 } else { 599 /* Use the physical interface associated with the address. */ 600 ifp = ifa->ifa_ifp; 601 602 /* Make sure the interface supports multicast. */ 603 if ((ifp->if_flags & IFF_MULTICAST) == 0) 604 return (EOPNOTSUPP); 605 606 /* Enable promiscuous reception of all IP multicasts. */ 607 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in); 608 satosin(&ifr.ifr_addr)->sin_family = AF_INET; 609 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr; 610 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 611 if (error) 612 return (error); 613 } 614 615 s = splsoftnet(); 616 617 /* Define parameters for the tbf structure. */ 618 vifp->tbf_q = 0; 619 vifp->tbf_t = &vifp->tbf_q; 620 microtime(&vifp->tbf_last_pkt_t); 621 vifp->tbf_n_tok = 0; 622 vifp->tbf_q_len = 0; 623 vifp->tbf_max_q_len = MAXQSIZE; 624 625 vifp->v_flags = vifcp->vifc_flags; 626 vifp->v_threshold = vifcp->vifc_threshold; 627 /* scaling up here allows division by 1024 in critical code */ 628 vifp->v_rate_limit = vifcp->vifc_rate_limit * 1024 / 1000; 629 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 630 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 631 vifp->v_ifp = ifp; 632 /* Initialize per vif pkt counters. */ 633 vifp->v_pkt_in = 0; 634 vifp->v_pkt_out = 0; 635 vifp->v_bytes_in = 0; 636 vifp->v_bytes_out = 0; 637 638 callout_init(&vifp->v_repq_ch); 639 640 #ifdef RSVP_ISI 641 vifp->v_rsvp_on = 0; 642 vifp->v_rsvpd = 0; 643 #endif /* RSVP_ISI */ 644 645 splx(s); 646 647 /* Adjust numvifs up if the vifi is higher than numvifs. */ 648 if (numvifs <= vifcp->vifc_vifi) 649 numvifs = vifcp->vifc_vifi + 1; 650 651 if (mrtdebug) 652 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n", 653 vifcp->vifc_vifi, 654 ntohl(vifcp->vifc_lcl_addr.s_addr), 655 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", 656 ntohl(vifcp->vifc_rmt_addr.s_addr), 657 vifcp->vifc_threshold, 658 vifcp->vifc_rate_limit); 659 660 return (0); 661 } 662 663 void 664 reset_vif(vifp) 665 struct vif *vifp; 666 { 667 struct mbuf *m, *n; 668 struct ifnet *ifp; 669 struct ifreq ifr; 670 671 callout_stop(&vifp->v_repq_ch); 672 673 /* detach this vif from decapsulator dispatch table */ 674 encap_detach(vifp->v_encap_cookie); 675 vifp->v_encap_cookie = NULL; 676 677 for (m = vifp->tbf_q; m != 0; m = n) { 678 n = m->m_nextpkt; 679 m_freem(m); 680 } 681 682 if (vifp->v_flags & VIFF_TUNNEL) { 683 free(vifp->v_ifp, M_MRTABLE); 684 if (vifp == last_encap_vif) { 685 last_encap_vif = 0; 686 last_encap_src = zeroin_addr; 687 } 688 } else { 689 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in); 690 satosin(&ifr.ifr_addr)->sin_family = AF_INET; 691 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr; 692 ifp = vifp->v_ifp; 693 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 694 } 695 bzero((caddr_t)vifp, sizeof(*vifp)); 696 } 697 698 /* 699 * Delete a vif from the vif table 700 */ 701 static int 702 del_vif(m) 703 struct mbuf *m; 704 { 705 vifi_t *vifip; 706 struct vif *vifp; 707 vifi_t vifi; 708 int s; 709 710 if (m == 0 || m->m_len < sizeof(vifi_t)) 711 return (EINVAL); 712 713 vifip = mtod(m, vifi_t *); 714 if (*vifip >= numvifs) 715 return (EINVAL); 716 717 vifp = &viftable[*vifip]; 718 if (in_nullhost(vifp->v_lcl_addr)) 719 return (EADDRNOTAVAIL); 720 721 s = splsoftnet(); 722 723 reset_vif(vifp); 724 725 /* Adjust numvifs down */ 726 for (vifi = numvifs; vifi > 0; vifi--) 727 if (!in_nullhost(viftable[vifi-1].v_lcl_addr)) 728 break; 729 numvifs = vifi; 730 731 splx(s); 732 733 if (mrtdebug) 734 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs); 735 736 return (0); 737 } 738 739 static void 740 update_mfc(mfccp, rt) 741 struct mfcctl *mfccp; 742 struct mfc *rt; 743 { 744 vifi_t vifi; 745 746 rt->mfc_parent = mfccp->mfcc_parent; 747 for (vifi = 0; vifi < numvifs; vifi++) 748 rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi]; 749 rt->mfc_expire = 0; 750 rt->mfc_stall = 0; 751 } 752 753 static void 754 expire_mfc(rt) 755 struct mfc *rt; 756 { 757 struct rtdetq *rte, *nrte; 758 759 for (rte = rt->mfc_stall; rte != 0; rte = nrte) { 760 nrte = rte->next; 761 m_freem(rte->m); 762 free(rte, M_MRTABLE); 763 } 764 765 LIST_REMOVE(rt, mfc_hash); 766 free(rt, M_MRTABLE); 767 } 768 769 /* 770 * Add an mfc entry 771 */ 772 static int 773 add_mfc(m) 774 struct mbuf *m; 775 { 776 struct mfcctl *mfccp; 777 struct mfc *rt; 778 u_int32_t hash = 0; 779 struct rtdetq *rte, *nrte; 780 u_short nstl; 781 int s; 782 783 if (m == 0 || m->m_len < sizeof(struct mfcctl)) 784 return (EINVAL); 785 786 mfccp = mtod(m, struct mfcctl *); 787 788 s = splsoftnet(); 789 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt); 790 791 /* If an entry already exists, just update the fields */ 792 if (rt) { 793 if (mrtdebug & DEBUG_MFC) 794 log(LOG_DEBUG,"add_mfc update o %x g %x p %x\n", 795 ntohl(mfccp->mfcc_origin.s_addr), 796 ntohl(mfccp->mfcc_mcastgrp.s_addr), 797 mfccp->mfcc_parent); 798 799 if (rt->mfc_expire) 800 nexpire[hash]--; 801 802 update_mfc(mfccp, rt); 803 804 splx(s); 805 return (0); 806 } 807 808 /* 809 * Find the entry for which the upcall was made and update 810 */ 811 nstl = 0; 812 hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); 813 for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) { 814 if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && 815 in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && 816 rt->mfc_stall != 0) { 817 if (nstl++) 818 log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n", 819 "multiple kernel entries", 820 ntohl(mfccp->mfcc_origin.s_addr), 821 ntohl(mfccp->mfcc_mcastgrp.s_addr), 822 mfccp->mfcc_parent, rt->mfc_stall); 823 824 if (mrtdebug & DEBUG_MFC) 825 log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %p\n", 826 ntohl(mfccp->mfcc_origin.s_addr), 827 ntohl(mfccp->mfcc_mcastgrp.s_addr), 828 mfccp->mfcc_parent, rt->mfc_stall); 829 830 if (rt->mfc_expire) 831 nexpire[hash]--; 832 833 rte = rt->mfc_stall; 834 update_mfc(mfccp, rt); 835 836 /* free packets Qed at the end of this entry */ 837 for (; rte != 0; rte = nrte) { 838 nrte = rte->next; 839 #ifdef RSVP_ISI 840 ip_mdq(rte->m, rte->ifp, rt, -1); 841 #else 842 ip_mdq(rte->m, rte->ifp, rt); 843 #endif /* RSVP_ISI */ 844 m_freem(rte->m); 845 #ifdef UPCALL_TIMING 846 collate(&rte->t); 847 #endif /* UPCALL_TIMING */ 848 free(rte, M_MRTABLE); 849 } 850 } 851 } 852 853 if (nstl == 0) { 854 /* 855 * No mfc; make a new one 856 */ 857 if (mrtdebug & DEBUG_MFC) 858 log(LOG_DEBUG,"add_mfc no upcall o %x g %x p %x\n", 859 ntohl(mfccp->mfcc_origin.s_addr), 860 ntohl(mfccp->mfcc_mcastgrp.s_addr), 861 mfccp->mfcc_parent); 862 863 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 864 if (rt == 0) { 865 splx(s); 866 return (ENOBUFS); 867 } 868 869 rt->mfc_origin = mfccp->mfcc_origin; 870 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; 871 /* initialize pkt counters per src-grp */ 872 rt->mfc_pkt_cnt = 0; 873 rt->mfc_byte_cnt = 0; 874 rt->mfc_wrong_if = 0; 875 timerclear(&rt->mfc_last_assert); 876 update_mfc(mfccp, rt); 877 878 /* insert new entry at head of hash chain */ 879 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); 880 } 881 882 splx(s); 883 return (0); 884 } 885 886 #ifdef UPCALL_TIMING 887 /* 888 * collect delay statistics on the upcalls 889 */ 890 static void collate(t) 891 struct timeval *t; 892 { 893 u_int32_t d; 894 struct timeval tp; 895 u_int32_t delta; 896 897 microtime(&tp); 898 899 if (timercmp(t, &tp, <)) { 900 TV_DELTA(tp, *t, delta); 901 902 d = delta >> 10; 903 if (d > 50) 904 d = 50; 905 906 ++upcall_data[d]; 907 } 908 } 909 #endif /* UPCALL_TIMING */ 910 911 /* 912 * Delete an mfc entry 913 */ 914 static int 915 del_mfc(m) 916 struct mbuf *m; 917 { 918 struct mfcctl *mfccp; 919 struct mfc *rt; 920 int s; 921 922 if (m == 0 || m->m_len < sizeof(struct mfcctl)) 923 return (EINVAL); 924 925 mfccp = mtod(m, struct mfcctl *); 926 927 if (mrtdebug & DEBUG_MFC) 928 log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x\n", 929 ntohl(mfccp->mfcc_origin.s_addr), 930 ntohl(mfccp->mfcc_mcastgrp.s_addr)); 931 932 s = splsoftnet(); 933 934 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt); 935 if (rt == 0) { 936 splx(s); 937 return (EADDRNOTAVAIL); 938 } 939 940 LIST_REMOVE(rt, mfc_hash); 941 free(rt, M_MRTABLE); 942 943 splx(s); 944 return (0); 945 } 946 947 static int 948 socket_send(s, mm, src) 949 struct socket *s; 950 struct mbuf *mm; 951 struct sockaddr_in *src; 952 { 953 if (s) { 954 if (sbappendaddr(&s->so_rcv, sintosa(src), mm, (struct mbuf *)0) != 0) { 955 sorwakeup(s); 956 return (0); 957 } 958 } 959 m_freem(mm); 960 return (-1); 961 } 962 963 /* 964 * IP multicast forwarding function. This function assumes that the packet 965 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 966 * pointed to by "ifp", and the packet is to be relayed to other networks 967 * that have members of the packet's destination IP multicast group. 968 * 969 * The packet is returned unscathed to the caller, unless it is 970 * erroneous, in which case a non-zero return value tells the caller to 971 * discard it. 972 */ 973 974 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 975 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 976 977 int 978 #ifdef RSVP_ISI 979 ip_mforward(m, ifp, imo) 980 #else 981 ip_mforward(m, ifp) 982 #endif /* RSVP_ISI */ 983 struct mbuf *m; 984 struct ifnet *ifp; 985 #ifdef RSVP_ISI 986 struct ip_moptions *imo; 987 #endif /* RSVP_ISI */ 988 { 989 struct ip *ip = mtod(m, struct ip *); 990 struct mfc *rt; 991 u_char *ipoptions; 992 static int srctun = 0; 993 struct mbuf *mm; 994 int s; 995 #ifdef RSVP_ISI 996 struct vif *vifp; 997 vifi_t vifi; 998 #endif /* RSVP_ISI */ 999 1000 /* 1001 * Clear any in-bound checksum flags for this packet. 1002 */ 1003 m->m_pkthdr.csum_flags = 0; 1004 1005 if (mrtdebug & DEBUG_FORWARD) 1006 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n", 1007 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); 1008 1009 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 1010 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) { 1011 /* 1012 * Packet arrived via a physical interface or 1013 * an encapuslated tunnel. 1014 */ 1015 } else { 1016 /* 1017 * Packet arrived through a source-route tunnel. 1018 * Source-route tunnels are no longer supported. 1019 */ 1020 if ((srctun++ % 1000) == 0) 1021 log(LOG_ERR, "ip_mforward: received source-routed packet from %x\n", 1022 ntohl(ip->ip_src.s_addr)); 1023 1024 return (1); 1025 } 1026 1027 #ifdef RSVP_ISI 1028 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) { 1029 if (ip->ip_ttl < 255) 1030 ip->ip_ttl++; /* compensate for -1 in *_send routines */ 1031 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1032 vifp = viftable + vifi; 1033 printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n", 1034 ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi, 1035 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", 1036 vifp->v_ifp->if_xname); 1037 } 1038 return (ip_mdq(m, ifp, (struct mfc *)0, vifi)); 1039 } 1040 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { 1041 printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", 1042 ntohl(ip->ip_src), ntohl(ip->ip_dst)); 1043 } 1044 #endif /* RSVP_ISI */ 1045 1046 /* 1047 * Don't forward a packet with time-to-live of zero or one, 1048 * or a packet destined to a local-only group. 1049 */ 1050 if (ip->ip_ttl <= 1 || 1051 IN_LOCAL_GROUP(ip->ip_dst.s_addr)) 1052 return (0); 1053 1054 /* 1055 * Determine forwarding vifs from the forwarding cache table 1056 */ 1057 s = splsoftnet(); 1058 MFCFIND(ip->ip_src, ip->ip_dst, rt); 1059 1060 /* Entry exists, so forward if necessary */ 1061 if (rt != 0) { 1062 splx(s); 1063 #ifdef RSVP_ISI 1064 return (ip_mdq(m, ifp, rt, -1)); 1065 #else 1066 return (ip_mdq(m, ifp, rt)); 1067 #endif /* RSVP_ISI */ 1068 } else { 1069 /* 1070 * If we don't have a route for packet's origin, 1071 * Make a copy of the packet & 1072 * send message to routing daemon 1073 */ 1074 1075 struct mbuf *mb0; 1076 struct rtdetq *rte; 1077 u_int32_t hash; 1078 int hlen = ip->ip_hl << 2; 1079 #ifdef UPCALL_TIMING 1080 struct timeval tp; 1081 1082 microtime(&tp); 1083 #endif /* UPCALL_TIMING */ 1084 1085 mrtstat.mrts_no_route++; 1086 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) 1087 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n", 1088 ntohl(ip->ip_src.s_addr), 1089 ntohl(ip->ip_dst.s_addr)); 1090 1091 /* 1092 * Allocate mbufs early so that we don't do extra work if we are 1093 * just going to fail anyway. Make sure to pullup the header so 1094 * that other people can't step on it. 1095 */ 1096 rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT); 1097 if (rte == 0) { 1098 splx(s); 1099 return (ENOBUFS); 1100 } 1101 mb0 = m_copy(m, 0, M_COPYALL); 1102 M_PULLUP(mb0, hlen); 1103 if (mb0 == 0) { 1104 free(rte, M_MRTABLE); 1105 splx(s); 1106 return (ENOBUFS); 1107 } 1108 1109 /* is there an upcall waiting for this packet? */ 1110 hash = MFCHASH(ip->ip_src, ip->ip_dst); 1111 for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) { 1112 if (in_hosteq(ip->ip_src, rt->mfc_origin) && 1113 in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && 1114 rt->mfc_stall != 0) 1115 break; 1116 } 1117 1118 if (rt == 0) { 1119 int i; 1120 struct igmpmsg *im; 1121 1122 /* no upcall, so make a new entry */ 1123 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 1124 if (rt == 0) { 1125 free(rte, M_MRTABLE); 1126 m_freem(mb0); 1127 splx(s); 1128 return (ENOBUFS); 1129 } 1130 /* Make a copy of the header to send to the user level process */ 1131 mm = m_copy(m, 0, hlen); 1132 M_PULLUP(mm, hlen); 1133 if (mm == 0) { 1134 free(rte, M_MRTABLE); 1135 m_freem(mb0); 1136 free(rt, M_MRTABLE); 1137 splx(s); 1138 return (ENOBUFS); 1139 } 1140 1141 /* 1142 * Send message to routing daemon to install 1143 * a route into the kernel table 1144 */ 1145 sin.sin_addr = ip->ip_src; 1146 1147 im = mtod(mm, struct igmpmsg *); 1148 im->im_msgtype = IGMPMSG_NOCACHE; 1149 im->im_mbz = 0; 1150 1151 mrtstat.mrts_upcalls++; 1152 1153 if (socket_send(ip_mrouter, mm, &sin) < 0) { 1154 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); 1155 ++mrtstat.mrts_upq_sockfull; 1156 free(rte, M_MRTABLE); 1157 m_freem(mb0); 1158 free(rt, M_MRTABLE); 1159 splx(s); 1160 return (ENOBUFS); 1161 } 1162 1163 /* insert new entry at head of hash chain */ 1164 rt->mfc_origin = ip->ip_src; 1165 rt->mfc_mcastgrp = ip->ip_dst; 1166 rt->mfc_pkt_cnt = 0; 1167 rt->mfc_byte_cnt = 0; 1168 rt->mfc_wrong_if = 0; 1169 rt->mfc_expire = UPCALL_EXPIRE; 1170 nexpire[hash]++; 1171 for (i = 0; i < numvifs; i++) 1172 rt->mfc_ttls[i] = 0; 1173 rt->mfc_parent = -1; 1174 1175 /* link into table */ 1176 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); 1177 /* Add this entry to the end of the queue */ 1178 rt->mfc_stall = rte; 1179 } else { 1180 /* determine if q has overflowed */ 1181 struct rtdetq **p; 1182 int npkts = 0; 1183 1184 for (p = &rt->mfc_stall; *p != 0; p = &(*p)->next) 1185 if (++npkts > MAX_UPQ) { 1186 mrtstat.mrts_upq_ovflw++; 1187 free(rte, M_MRTABLE); 1188 m_freem(mb0); 1189 splx(s); 1190 return (0); 1191 } 1192 1193 /* Add this entry to the end of the queue */ 1194 *p = rte; 1195 } 1196 1197 rte->next = 0; 1198 rte->m = mb0; 1199 rte->ifp = ifp; 1200 #ifdef UPCALL_TIMING 1201 rte->t = tp; 1202 #endif /* UPCALL_TIMING */ 1203 1204 1205 splx(s); 1206 1207 return (0); 1208 } 1209 } 1210 1211 1212 /*ARGSUSED*/ 1213 static void 1214 expire_upcalls(v) 1215 void *v; 1216 { 1217 int i; 1218 int s; 1219 1220 s = splsoftnet(); 1221 1222 for (i = 0; i < MFCTBLSIZ; i++) { 1223 struct mfc *rt, *nrt; 1224 1225 if (nexpire[i] == 0) 1226 continue; 1227 1228 for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) { 1229 nrt = rt->mfc_hash.le_next; 1230 1231 if (rt->mfc_expire == 0 || 1232 --rt->mfc_expire > 0) 1233 continue; 1234 nexpire[i]--; 1235 1236 ++mrtstat.mrts_cache_cleanups; 1237 if (mrtdebug & DEBUG_EXPIRE) 1238 log(LOG_DEBUG, 1239 "expire_upcalls: expiring (%x %x)\n", 1240 ntohl(rt->mfc_origin.s_addr), 1241 ntohl(rt->mfc_mcastgrp.s_addr)); 1242 1243 expire_mfc(rt); 1244 } 1245 } 1246 1247 splx(s); 1248 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, 1249 expire_upcalls, NULL); 1250 } 1251 1252 /* 1253 * Packet forwarding routine once entry in the cache is made 1254 */ 1255 static int 1256 #ifdef RSVP_ISI 1257 ip_mdq(m, ifp, rt, xmt_vif) 1258 #else 1259 ip_mdq(m, ifp, rt) 1260 #endif /* RSVP_ISI */ 1261 struct mbuf *m; 1262 struct ifnet *ifp; 1263 struct mfc *rt; 1264 #ifdef RSVP_ISI 1265 vifi_t xmt_vif; 1266 #endif /* RSVP_ISI */ 1267 { 1268 struct ip *ip = mtod(m, struct ip *); 1269 vifi_t vifi; 1270 struct vif *vifp; 1271 int plen = ntohs(ip->ip_len); 1272 1273 /* 1274 * Macro to send packet on vif. Since RSVP packets don't get counted on 1275 * input, they shouldn't get counted on output, so statistics keeping is 1276 * separate. 1277 */ 1278 #define MC_SEND(ip,vifp,m) { \ 1279 if ((vifp)->v_flags & VIFF_TUNNEL) \ 1280 encap_send((ip), (vifp), (m)); \ 1281 else \ 1282 phyint_send((ip), (vifp), (m)); \ 1283 } 1284 1285 #ifdef RSVP_ISI 1286 /* 1287 * If xmt_vif is not -1, send on only the requested vif. 1288 * 1289 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs. 1290 */ 1291 if (xmt_vif < numvifs) { 1292 MC_SEND(ip, viftable + xmt_vif, m); 1293 return (1); 1294 } 1295 #endif /* RSVP_ISI */ 1296 1297 /* 1298 * Don't forward if it didn't arrive from the parent vif for its origin. 1299 */ 1300 vifi = rt->mfc_parent; 1301 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { 1302 /* came in the wrong interface */ 1303 if (mrtdebug & DEBUG_FORWARD) 1304 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", 1305 ifp, vifi, viftable[vifi].v_ifp); 1306 ++mrtstat.mrts_wrong_if; 1307 ++rt->mfc_wrong_if; 1308 /* 1309 * If we are doing PIM assert processing, and we are forwarding 1310 * packets on this interface, and it is a broadcast medium 1311 * interface (and not a tunnel), send a message to the routing daemon. 1312 */ 1313 if (pim_assert && rt->mfc_ttls[vifi] && 1314 (ifp->if_flags & IFF_BROADCAST) && 1315 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 1316 struct mbuf *mm; 1317 struct igmpmsg *im; 1318 int hlen = ip->ip_hl << 2; 1319 struct timeval now; 1320 u_int32_t delta; 1321 1322 microtime(&now); 1323 1324 TV_DELTA(rt->mfc_last_assert, now, delta); 1325 1326 if (delta > ASSERT_MSG_TIME) { 1327 mm = m_copy(m, 0, hlen); 1328 M_PULLUP(mm, hlen); 1329 if (mm == 0) { 1330 return (ENOBUFS); 1331 } 1332 1333 rt->mfc_last_assert = now; 1334 1335 im = mtod(mm, struct igmpmsg *); 1336 im->im_msgtype = IGMPMSG_WRONGVIF; 1337 im->im_mbz = 0; 1338 im->im_vif = vifi; 1339 1340 sin.sin_addr = im->im_src; 1341 1342 socket_send(ip_mrouter, mm, &sin); 1343 } 1344 } 1345 return (0); 1346 } 1347 1348 /* If I sourced this packet, it counts as output, else it was input. */ 1349 if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) { 1350 viftable[vifi].v_pkt_out++; 1351 viftable[vifi].v_bytes_out += plen; 1352 } else { 1353 viftable[vifi].v_pkt_in++; 1354 viftable[vifi].v_bytes_in += plen; 1355 } 1356 rt->mfc_pkt_cnt++; 1357 rt->mfc_byte_cnt += plen; 1358 1359 /* 1360 * For each vif, decide if a copy of the packet should be forwarded. 1361 * Forward if: 1362 * - the ttl exceeds the vif's threshold 1363 * - there are group members downstream on interface 1364 */ 1365 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) 1366 if ((rt->mfc_ttls[vifi] > 0) && 1367 (ip->ip_ttl > rt->mfc_ttls[vifi])) { 1368 vifp->v_pkt_out++; 1369 vifp->v_bytes_out += plen; 1370 MC_SEND(ip, vifp, m); 1371 } 1372 1373 return (0); 1374 } 1375 1376 #ifdef RSVP_ISI 1377 /* 1378 * check if a vif number is legal/ok. This is used by ip_output, to export 1379 * numvifs there, 1380 */ 1381 int 1382 legal_vif_num(vif) 1383 int vif; 1384 { 1385 if (vif >= 0 && vif < numvifs) 1386 return (1); 1387 else 1388 return (0); 1389 } 1390 #endif /* RSVP_ISI */ 1391 1392 static void 1393 phyint_send(ip, vifp, m) 1394 struct ip *ip; 1395 struct vif *vifp; 1396 struct mbuf *m; 1397 { 1398 struct mbuf *mb_copy; 1399 int hlen = ip->ip_hl << 2; 1400 1401 /* 1402 * Make a new reference to the packet; make sure that 1403 * the IP header is actually copied, not just referenced, 1404 * so that ip_output() only scribbles on the copy. 1405 */ 1406 mb_copy = m_copy(m, 0, M_COPYALL); 1407 M_PULLUP(mb_copy, hlen); 1408 if (mb_copy == 0) 1409 return; 1410 1411 if (vifp->v_rate_limit <= 0) 1412 tbf_send_packet(vifp, mb_copy); 1413 else 1414 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); 1415 } 1416 1417 static void 1418 encap_send(ip, vifp, m) 1419 struct ip *ip; 1420 struct vif *vifp; 1421 struct mbuf *m; 1422 { 1423 struct mbuf *mb_copy; 1424 struct ip *ip_copy; 1425 int i, len = ip->ip_len + sizeof(multicast_encap_iphdr); 1426 1427 /* 1428 * copy the old packet & pullup it's IP header into the 1429 * new mbuf so we can modify it. Try to fill the new 1430 * mbuf since if we don't the ethernet driver will. 1431 */ 1432 MGETHDR(mb_copy, M_DONTWAIT, MT_DATA); 1433 if (mb_copy == 0) 1434 return; 1435 mb_copy->m_data += max_linkhdr; 1436 mb_copy->m_pkthdr.len = len; 1437 mb_copy->m_len = sizeof(multicast_encap_iphdr); 1438 1439 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == 0) { 1440 m_freem(mb_copy); 1441 return; 1442 } 1443 i = MHLEN - max_linkhdr; 1444 if (i > len) 1445 i = len; 1446 mb_copy = m_pullup(mb_copy, i); 1447 if (mb_copy == 0) 1448 return; 1449 1450 /* 1451 * fill in the encapsulating IP header. 1452 */ 1453 ip_copy = mtod(mb_copy, struct ip *); 1454 *ip_copy = multicast_encap_iphdr; 1455 ip_copy->ip_id = htons(ip_id++); 1456 ip_copy->ip_len = len; 1457 ip_copy->ip_src = vifp->v_lcl_addr; 1458 ip_copy->ip_dst = vifp->v_rmt_addr; 1459 1460 /* 1461 * turn the encapsulated IP header back into a valid one. 1462 */ 1463 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 1464 --ip->ip_ttl; 1465 HTONS(ip->ip_len); 1466 HTONS(ip->ip_off); 1467 ip->ip_sum = 0; 1468 mb_copy->m_data += sizeof(multicast_encap_iphdr); 1469 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 1470 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 1471 1472 if (vifp->v_rate_limit <= 0) 1473 tbf_send_packet(vifp, mb_copy); 1474 else 1475 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); 1476 } 1477 1478 /* 1479 * De-encapsulate a packet and feed it back through ip input. 1480 */ 1481 static void 1482 #if __STDC__ 1483 vif_input(struct mbuf *m, ...) 1484 #else 1485 vif_input(m, va_alist) 1486 struct mbuf *m; 1487 va_dcl 1488 #endif 1489 { 1490 int off, proto; 1491 va_list ap; 1492 struct ip *ip; 1493 struct vif *vifp; 1494 int s; 1495 struct ifqueue *ifq; 1496 1497 va_start(ap, m); 1498 off = va_arg(ap, int); 1499 proto = va_arg(ap, int); 1500 va_end(ap); 1501 1502 vifp = (struct vif *)encap_getarg(m); 1503 if (!vifp || proto != AF_INET) { 1504 m_freem(m); 1505 mrtstat.mrts_bad_tunnel++; 1506 return; 1507 } 1508 1509 ip = mtod(m, struct ip *); 1510 1511 m_adj(m, off); 1512 m->m_pkthdr.rcvif = vifp->v_ifp; 1513 ifq = &ipintrq; 1514 s = splnet(); 1515 if (IF_QFULL(ifq)) { 1516 IF_DROP(ifq); 1517 m_freem(m); 1518 } else { 1519 IF_ENQUEUE(ifq, m); 1520 /* 1521 * normally we would need a "schednetisr(NETISR_IP)" 1522 * here but we were called by ip_input and it is going 1523 * to loop back & try to dequeue the packet we just 1524 * queued as soon as we return so we avoid the 1525 * unnecessary software interrrupt. 1526 */ 1527 } 1528 splx(s); 1529 } 1530 1531 /* 1532 * Check if the packet should be grabbed by us. 1533 */ 1534 static int 1535 vif_encapcheck(m, off, proto, arg) 1536 const struct mbuf *m; 1537 int off; 1538 int proto; 1539 void *arg; 1540 { 1541 struct vif *vifp; 1542 struct ip ip; 1543 1544 #ifdef DIAGNOSTIC 1545 if (!arg || proto != IPPROTO_IPV4) 1546 panic("unexpected arg in vif_encapcheck"); 1547 #endif 1548 1549 /* 1550 * do not grab the packet if it's not to a multicast destination or if 1551 * we don't have an encapsulating tunnel with the source. 1552 * Note: This code assumes that the remote site IP address 1553 * uniquely identifies the tunnel (i.e., that this site has 1554 * at most one tunnel with the remote site). 1555 */ 1556 1557 /* LINTED const cast */ 1558 m_copydata((struct mbuf *)m, off, sizeof(ip), (caddr_t)&ip); 1559 if (!IN_MULTICAST(ip.ip_dst.s_addr)) 1560 return 0; 1561 1562 /* LINTED const cast */ 1563 m_copydata((struct mbuf *)m, 0, sizeof(ip), (caddr_t)&ip); 1564 if (!in_hosteq(ip.ip_src, last_encap_src)) { 1565 vifp = (struct vif *)arg; 1566 if (vifp->v_flags & VIFF_TUNNEL && 1567 in_hosteq(vifp->v_rmt_addr, ip.ip_src)) 1568 ; 1569 else 1570 return 0; 1571 last_encap_vif = vifp; 1572 last_encap_src = ip.ip_src; 1573 } else 1574 vifp = last_encap_vif; 1575 1576 /* 32bit match, since we have checked ip_src only */ 1577 return 32; 1578 } 1579 1580 /* 1581 * Token bucket filter module 1582 */ 1583 static void 1584 tbf_control(vifp, m, ip, len) 1585 struct vif *vifp; 1586 struct mbuf *m; 1587 struct ip *ip; 1588 u_int32_t len; 1589 { 1590 1591 if (len > MAX_BKT_SIZE) { 1592 /* drop if packet is too large */ 1593 mrtstat.mrts_pkt2large++; 1594 m_freem(m); 1595 return; 1596 } 1597 1598 tbf_update_tokens(vifp); 1599 1600 /* 1601 * If there are enough tokens, and the queue is empty, send this packet 1602 * out immediately. Otherwise, try to insert it on this vif's queue. 1603 */ 1604 if (vifp->tbf_q_len == 0) { 1605 if (len <= vifp->tbf_n_tok) { 1606 vifp->tbf_n_tok -= len; 1607 tbf_send_packet(vifp, m); 1608 } else { 1609 /* queue packet and timeout till later */ 1610 tbf_queue(vifp, m); 1611 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS, 1612 tbf_reprocess_q, vifp); 1613 } 1614 } else { 1615 if (vifp->tbf_q_len >= vifp->tbf_max_q_len && 1616 !tbf_dq_sel(vifp, ip)) { 1617 /* queue length too much, and couldn't make room */ 1618 mrtstat.mrts_q_overflow++; 1619 m_freem(m); 1620 } else { 1621 /* queue length low enough, or made room */ 1622 tbf_queue(vifp, m); 1623 tbf_process_q(vifp); 1624 } 1625 } 1626 } 1627 1628 /* 1629 * adds a packet to the queue at the interface 1630 */ 1631 static void 1632 tbf_queue(vifp, m) 1633 struct vif *vifp; 1634 struct mbuf *m; 1635 { 1636 int s = splsoftnet(); 1637 1638 /* insert at tail */ 1639 *vifp->tbf_t = m; 1640 vifp->tbf_t = &m->m_nextpkt; 1641 vifp->tbf_q_len++; 1642 1643 splx(s); 1644 } 1645 1646 1647 /* 1648 * processes the queue at the interface 1649 */ 1650 static void 1651 tbf_process_q(vifp) 1652 struct vif *vifp; 1653 { 1654 struct mbuf *m; 1655 int len; 1656 int s = splsoftnet(); 1657 1658 /* 1659 * Loop through the queue at the interface and send as many packets 1660 * as possible. 1661 */ 1662 for (m = vifp->tbf_q; 1663 m != 0; 1664 m = vifp->tbf_q) { 1665 len = mtod(m, struct ip *)->ip_len; 1666 1667 /* determine if the packet can be sent */ 1668 if (len <= vifp->tbf_n_tok) { 1669 /* if so, 1670 * reduce no of tokens, dequeue the packet, 1671 * send the packet. 1672 */ 1673 if ((vifp->tbf_q = m->m_nextpkt) == 0) 1674 vifp->tbf_t = &vifp->tbf_q; 1675 --vifp->tbf_q_len; 1676 1677 m->m_nextpkt = 0; 1678 vifp->tbf_n_tok -= len; 1679 tbf_send_packet(vifp, m); 1680 } else 1681 break; 1682 } 1683 splx(s); 1684 } 1685 1686 static void 1687 tbf_reprocess_q(arg) 1688 void *arg; 1689 { 1690 struct vif *vifp = arg; 1691 1692 if (ip_mrouter == 0) 1693 return; 1694 1695 tbf_update_tokens(vifp); 1696 tbf_process_q(vifp); 1697 1698 if (vifp->tbf_q_len != 0) 1699 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS, 1700 tbf_reprocess_q, vifp); 1701 } 1702 1703 /* function that will selectively discard a member of the queue 1704 * based on the precedence value and the priority 1705 */ 1706 static int 1707 tbf_dq_sel(vifp, ip) 1708 struct vif *vifp; 1709 struct ip *ip; 1710 { 1711 u_int p; 1712 struct mbuf **mp, *m; 1713 int s = splsoftnet(); 1714 1715 p = priority(vifp, ip); 1716 1717 for (mp = &vifp->tbf_q, m = *mp; 1718 m != 0; 1719 mp = &m->m_nextpkt, m = *mp) { 1720 if (p > priority(vifp, mtod(m, struct ip *))) { 1721 if ((*mp = m->m_nextpkt) == 0) 1722 vifp->tbf_t = mp; 1723 --vifp->tbf_q_len; 1724 1725 m_freem(m); 1726 mrtstat.mrts_drop_sel++; 1727 splx(s); 1728 return (1); 1729 } 1730 } 1731 splx(s); 1732 return (0); 1733 } 1734 1735 static void 1736 tbf_send_packet(vifp, m) 1737 struct vif *vifp; 1738 struct mbuf *m; 1739 { 1740 int error; 1741 int s = splsoftnet(); 1742 1743 if (vifp->v_flags & VIFF_TUNNEL) { 1744 /* If tunnel options */ 1745 #ifdef IPSEC 1746 /* Don't lookup socket in forwading case */ 1747 (void)ipsec_setsocket(m, NULL); 1748 #endif 1749 ip_output(m, (struct mbuf *)0, &vifp->v_route, 1750 IP_FORWARDING, (struct ip_moptions *)0); 1751 } else { 1752 /* if physical interface option, extract the options and then send */ 1753 struct ip_moptions imo; 1754 1755 imo.imo_multicast_ifp = vifp->v_ifp; 1756 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; 1757 imo.imo_multicast_loop = 1; 1758 #ifdef RSVP_ISI 1759 imo.imo_multicast_vif = -1; 1760 #endif 1761 1762 #ifdef IPSEC 1763 /* Don't lookup socket in forwading case */ 1764 (void)ipsec_setsocket(m, NULL); 1765 #endif 1766 error = ip_output(m, (struct mbuf *)0, (struct route *)0, 1767 IP_FORWARDING|IP_MULTICASTOPTS, &imo); 1768 1769 if (mrtdebug & DEBUG_XMIT) 1770 log(LOG_DEBUG, "phyint_send on vif %ld err %d\n", 1771 (long)(vifp-viftable), error); 1772 } 1773 splx(s); 1774 } 1775 1776 /* determine the current time and then 1777 * the elapsed time (between the last time and time now) 1778 * in milliseconds & update the no. of tokens in the bucket 1779 */ 1780 static void 1781 tbf_update_tokens(vifp) 1782 struct vif *vifp; 1783 { 1784 struct timeval tp; 1785 u_int32_t tm; 1786 int s = splsoftnet(); 1787 1788 microtime(&tp); 1789 1790 TV_DELTA(tp, vifp->tbf_last_pkt_t, tm); 1791 1792 /* 1793 * This formula is actually 1794 * "time in seconds" * "bytes/second". 1795 * 1796 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) 1797 * 1798 * The (1000/1024) was introduced in add_vif to optimize 1799 * this divide into a shift. 1800 */ 1801 vifp->tbf_n_tok += tm * vifp->v_rate_limit / 8192; 1802 vifp->tbf_last_pkt_t = tp; 1803 1804 if (vifp->tbf_n_tok > MAX_BKT_SIZE) 1805 vifp->tbf_n_tok = MAX_BKT_SIZE; 1806 1807 splx(s); 1808 } 1809 1810 static int 1811 priority(vifp, ip) 1812 struct vif *vifp; 1813 struct ip *ip; 1814 { 1815 int prio; 1816 1817 /* temporary hack; may add general packet classifier some day */ 1818 1819 /* 1820 * The UDP port space is divided up into four priority ranges: 1821 * [0, 16384) : unclassified - lowest priority 1822 * [16384, 32768) : audio - highest priority 1823 * [32768, 49152) : whiteboard - medium priority 1824 * [49152, 65536) : video - low priority 1825 */ 1826 if (ip->ip_p == IPPROTO_UDP) { 1827 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); 1828 1829 switch (ntohs(udp->uh_dport) & 0xc000) { 1830 case 0x4000: 1831 prio = 70; 1832 break; 1833 case 0x8000: 1834 prio = 60; 1835 break; 1836 case 0xc000: 1837 prio = 55; 1838 break; 1839 default: 1840 prio = 50; 1841 break; 1842 } 1843 1844 if (tbfdebug > 1) 1845 log(LOG_DEBUG, "port %x prio %d\n", ntohs(udp->uh_dport), prio); 1846 } else 1847 prio = 50; 1848 1849 1850 return (prio); 1851 } 1852 1853 /* 1854 * End of token bucket filter modifications 1855 */ 1856 1857 #ifdef RSVP_ISI 1858 1859 int 1860 ip_rsvp_vif_init(so, m) 1861 struct socket *so; 1862 struct mbuf *m; 1863 { 1864 int i; 1865 int s; 1866 1867 if (rsvpdebug) 1868 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", 1869 so->so_type, so->so_proto->pr_protocol); 1870 1871 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1872 return (EOPNOTSUPP); 1873 1874 /* Check mbuf. */ 1875 if (m == 0 || m->m_len != sizeof(int)) { 1876 return (EINVAL); 1877 } 1878 i = *(mtod(m, int *)); 1879 1880 if (rsvpdebug) 1881 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on); 1882 1883 s = splsoftnet(); 1884 1885 /* Check vif. */ 1886 if (!legal_vif_num(i)) { 1887 splx(s); 1888 return (EADDRNOTAVAIL); 1889 } 1890 1891 /* Check if socket is available. */ 1892 if (viftable[i].v_rsvpd != 0) { 1893 splx(s); 1894 return (EADDRINUSE); 1895 } 1896 1897 viftable[i].v_rsvpd = so; 1898 /* This may seem silly, but we need to be sure we don't over-increment 1899 * the RSVP counter, in case something slips up. 1900 */ 1901 if (!viftable[i].v_rsvp_on) { 1902 viftable[i].v_rsvp_on = 1; 1903 rsvp_on++; 1904 } 1905 1906 splx(s); 1907 return (0); 1908 } 1909 1910 int 1911 ip_rsvp_vif_done(so, m) 1912 struct socket *so; 1913 struct mbuf *m; 1914 { 1915 int i; 1916 int s; 1917 1918 if (rsvpdebug) 1919 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", 1920 so->so_type, so->so_proto->pr_protocol); 1921 1922 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1923 return (EOPNOTSUPP); 1924 1925 /* Check mbuf. */ 1926 if (m == 0 || m->m_len != sizeof(int)) { 1927 return (EINVAL); 1928 } 1929 i = *(mtod(m, int *)); 1930 1931 s = splsoftnet(); 1932 1933 /* Check vif. */ 1934 if (!legal_vif_num(i)) { 1935 splx(s); 1936 return (EADDRNOTAVAIL); 1937 } 1938 1939 if (rsvpdebug) 1940 printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n", 1941 viftable[i].v_rsvpd, so); 1942 1943 viftable[i].v_rsvpd = 0; 1944 /* This may seem silly, but we need to be sure we don't over-decrement 1945 * the RSVP counter, in case something slips up. 1946 */ 1947 if (viftable[i].v_rsvp_on) { 1948 viftable[i].v_rsvp_on = 0; 1949 rsvp_on--; 1950 } 1951 1952 splx(s); 1953 return (0); 1954 } 1955 1956 void 1957 ip_rsvp_force_done(so) 1958 struct socket *so; 1959 { 1960 int vifi; 1961 int s; 1962 1963 /* Don't bother if it is not the right type of socket. */ 1964 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) 1965 return; 1966 1967 s = splsoftnet(); 1968 1969 /* The socket may be attached to more than one vif...this 1970 * is perfectly legal. 1971 */ 1972 for (vifi = 0; vifi < numvifs; vifi++) { 1973 if (viftable[vifi].v_rsvpd == so) { 1974 viftable[vifi].v_rsvpd = 0; 1975 /* This may seem silly, but we need to be sure we don't 1976 * over-decrement the RSVP counter, in case something slips up. 1977 */ 1978 if (viftable[vifi].v_rsvp_on) { 1979 viftable[vifi].v_rsvp_on = 0; 1980 rsvp_on--; 1981 } 1982 } 1983 } 1984 1985 splx(s); 1986 return; 1987 } 1988 1989 void 1990 rsvp_input(m, ifp) 1991 struct mbuf *m; 1992 struct ifnet *ifp; 1993 { 1994 int vifi; 1995 struct ip *ip = mtod(m, struct ip *); 1996 static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET }; 1997 int s; 1998 1999 if (rsvpdebug) 2000 printf("rsvp_input: rsvp_on %d\n",rsvp_on); 2001 2002 /* Can still get packets with rsvp_on = 0 if there is a local member 2003 * of the group to which the RSVP packet is addressed. But in this 2004 * case we want to throw the packet away. 2005 */ 2006 if (!rsvp_on) { 2007 m_freem(m); 2008 return; 2009 } 2010 2011 /* If the old-style non-vif-associated socket is set, then use 2012 * it and ignore the new ones. 2013 */ 2014 if (ip_rsvpd != 0) { 2015 if (rsvpdebug) 2016 printf("rsvp_input: Sending packet up old-style socket\n"); 2017 rip_input(m); /*XXX*/ 2018 return; 2019 } 2020 2021 s = splsoftnet(); 2022 2023 if (rsvpdebug) 2024 printf("rsvp_input: check vifs\n"); 2025 2026 /* Find which vif the packet arrived on. */ 2027 for (vifi = 0; vifi < numvifs; vifi++) { 2028 if (viftable[vifi].v_ifp == ifp) 2029 break; 2030 } 2031 2032 if (vifi == numvifs) { 2033 /* Can't find vif packet arrived on. Drop packet. */ 2034 if (rsvpdebug) 2035 printf("rsvp_input: Can't find vif for packet...dropping it.\n"); 2036 m_freem(m); 2037 splx(s); 2038 return; 2039 } 2040 2041 if (rsvpdebug) 2042 printf("rsvp_input: check socket\n"); 2043 2044 if (viftable[vifi].v_rsvpd == 0) { 2045 /* drop packet, since there is no specific socket for this 2046 * interface */ 2047 if (rsvpdebug) 2048 printf("rsvp_input: No socket defined for vif %d\n",vifi); 2049 m_freem(m); 2050 splx(s); 2051 return; 2052 } 2053 2054 rsvp_src.sin_addr = ip->ip_src; 2055 2056 if (rsvpdebug && m) 2057 printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", 2058 m->m_len,sbspace(&viftable[vifi].v_rsvpd->so_rcv)); 2059 2060 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) 2061 if (rsvpdebug) 2062 printf("rsvp_input: Failed to append to socket\n"); 2063 else 2064 if (rsvpdebug) 2065 printf("rsvp_input: send packet up\n"); 2066 2067 splx(s); 2068 } 2069 #endif /* RSVP_ISI */ 2070