1 /* $NetBSD: ip_mroute.c,v 1.14 1995/04/13 06:34:00 cgd Exp $ */ 2 3 /* 4 * Copyright (c) 1989 Stephen Deering 5 * Copyright (c) 1992 Regents of the University of California. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Stephen Deering of Stanford University. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)ip_mroute.c 7.4 (Berkeley) 11/19/92 40 */ 41 42 /* 43 * Procedures for the kernel part of DVMRP, 44 * a Distance-Vector Multicast Routing Protocol. 45 * (See RFC-1075.) 46 * 47 * Written by David Waitzman, BBN Labs, August 1988. 48 * Modified by Steve Deering, Stanford, February 1989. 49 * 50 * MROUTING 1.1 51 */ 52 53 #ifndef MROUTING 54 int ip_mrtproto; /* for netstat only */ 55 #else 56 57 #include <sys/param.h> 58 #include <sys/errno.h> 59 #include <sys/ioctl.h> 60 #include <sys/malloc.h> 61 #include <sys/mbuf.h> 62 #include <sys/protosw.h> 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <sys/time.h> 66 67 #include <net/if.h> 68 #include <net/route.h> 69 #include <net/raw_cb.h> 70 71 #include <netinet/in.h> 72 #include <netinet/in_systm.h> 73 #include <netinet/ip.h> 74 #include <netinet/in_pcb.h> 75 #include <netinet/in_var.h> 76 #include <netinet/ip_var.h> 77 78 #include <netinet/igmp.h> 79 #include <netinet/igmp_var.h> 80 #include <netinet/ip_mroute.h> 81 82 /* Static forwards */ 83 static int ip_mrouter_init __P((struct socket *)); 84 static int add_vif __P((struct vifctl *)); 85 static int del_vif __P((vifi_t *vifip)); 86 static int add_lgrp __P((struct lgrplctl *)); 87 static int del_lgrp __P((struct lgrplctl *)); 88 static int grplst_member __P((struct vif *, struct in_addr)); 89 static u_int32_t nethash __P((u_int32_t in)); 90 static int add_mrt __P((struct mrtctl *)); 91 static int del_mrt __P((struct in_addr *)); 92 static struct mrt *mrtfind __P((u_int32_t)); 93 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *)); 94 static void srcrt_send __P((struct ip *, struct vif *, struct mbuf *)); 95 static void encap_send __P((struct ip *, struct vif *, struct mbuf *)); 96 static void multiencap_decap __P((struct mbuf *, int hlen)); 97 98 #define INSIZ sizeof(struct in_addr) 99 #define same(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) 100 #define satosin(sa) ((struct sockaddr_in *)(sa)) 101 102 /* 103 * Globals. All but ip_mrouter and ip_mrtproto could be static, 104 * except for netstat or debugging purposes. 105 */ 106 struct socket *ip_mrouter = NULL; 107 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ 108 109 struct mrt *mrttable[MRTHASHSIZ]; 110 struct vif viftable[MAXVIFS]; 111 struct mrtstat mrtstat; 112 113 /* 114 * 'Interfaces' associated with decapsulator (so we can tell 115 * packets that went through it from ones that get reflected 116 * by a broken gateway). These interfaces are never linked into 117 * the system ifnet list & no routes point to them. I.e., packets 118 * can't be sent this way. They only exist as a placeholder for 119 * multicast source verification. 120 */ 121 struct ifnet multicast_decap_if[MAXVIFS]; 122 123 #define ENCAP_TTL 64 124 #define ENCAP_PROTO 4 125 126 /* prototype IP hdr for encapsulated packets */ 127 struct ip multicast_encap_iphdr = { 128 #if defined(ultrix) || defined(i386) 129 sizeof(struct ip) >> 2, IPVERSION, 130 #else 131 IPVERSION, sizeof(struct ip) >> 2, 132 #endif 133 0, /* tos */ 134 sizeof(struct ip), /* total length */ 135 0, /* id */ 136 0, /* frag offset */ 137 ENCAP_TTL, ENCAP_PROTO, 138 0, /* checksum */ 139 }; 140 141 /* 142 * Private variables. 143 */ 144 static vifi_t numvifs = 0; 145 static struct mrt *cached_mrt = NULL; 146 static u_int32_t cached_origin; 147 static u_int32_t cached_originmask; 148 149 static void (*encap_oldrawip)(); 150 151 /* 152 * one-back cache used by multiencap_decap to locate a tunnel's vif 153 * given a datagram's src ip address. 154 */ 155 static u_int32_t last_encap_src; 156 static struct vif *last_encap_vif; 157 158 /* 159 * A simple hash function: returns MRTHASHMOD of the low-order octet of 160 * the argument's network or subnet number. 161 */ 162 static u_int32_t 163 nethash(n) 164 u_int32_t n; 165 { 166 struct in_addr in; 167 168 in.s_addr = n; 169 n = in_netof(in); 170 while ((n & 0xff) == 0) 171 n >>= 8; 172 return (MRTHASHMOD(n)); 173 } 174 175 /* 176 * this is a direct-mapped cache used to speed the mapping from a 177 * datagram source address to the associated multicast route. Note 178 * that unlike mrttable, the hash is on IP address, not IP net number. 179 */ 180 #define MSRCHASHSIZ 1024 181 #define MSRCHASH(a) ((((a) >> 20) ^ ((a) >> 10) ^ (a)) & (MSRCHASHSIZ - 1)) 182 struct mrt *mrtsrchash[MSRCHASHSIZ]; 183 184 /* 185 * Find a route for a given origin IP address. 186 */ 187 #define MRTFIND(o, rt) { \ 188 register u_int _mrhash = o; \ 189 _mrhash = MSRCHASH(_mrhash); \ 190 ++mrtstat.mrts_mrt_lookups; \ 191 rt = mrtsrchash[_mrhash]; \ 192 if (rt == NULL || \ 193 (o & rt->mrt_originmask.s_addr) != rt->mrt_origin.s_addr) \ 194 if ((rt = mrtfind(o)) != NULL) \ 195 mrtsrchash[_mrhash] = rt; \ 196 } 197 198 static struct mrt * 199 mrtfind(origin) 200 u_int32_t origin; 201 { 202 register struct mrt *rt; 203 register u_int hash; 204 205 mrtstat.mrts_mrt_misses++; 206 207 hash = nethash(origin); 208 for (rt = mrttable[hash]; rt; rt = rt->mrt_next) { 209 if ((origin & rt->mrt_originmask.s_addr) == 210 rt->mrt_origin.s_addr) 211 return (rt); 212 } 213 return (NULL); 214 } 215 216 /* 217 * Handle DVMRP setsockopt commands to modify the multicast routing tables. 218 */ 219 int 220 ip_mrouter_cmd(cmd, so, m) 221 register int cmd; 222 register struct socket *so; 223 register struct mbuf *m; 224 { 225 register int error = 0; 226 227 if (cmd != DVMRP_INIT && so != ip_mrouter) 228 error = EACCES; 229 else switch (cmd) { 230 231 case DVMRP_INIT: 232 error = ip_mrouter_init(so); 233 break; 234 235 case DVMRP_DONE: 236 error = ip_mrouter_done(); 237 break; 238 239 case DVMRP_ADD_VIF: 240 if (m == NULL || m->m_len < sizeof(struct vifctl)) 241 error = EINVAL; 242 else 243 error = add_vif(mtod(m, struct vifctl *)); 244 break; 245 246 case DVMRP_DEL_VIF: 247 if (m == NULL || m->m_len < sizeof(short)) 248 error = EINVAL; 249 else 250 error = del_vif(mtod(m, vifi_t *)); 251 break; 252 253 case DVMRP_ADD_LGRP: 254 if (m == NULL || m->m_len < sizeof(struct lgrplctl)) 255 error = EINVAL; 256 else 257 error = add_lgrp(mtod(m, struct lgrplctl *)); 258 break; 259 260 case DVMRP_DEL_LGRP: 261 if (m == NULL || m->m_len < sizeof(struct lgrplctl)) 262 error = EINVAL; 263 else 264 error = del_lgrp(mtod(m, struct lgrplctl *)); 265 break; 266 267 case DVMRP_ADD_MRT: 268 if (m == NULL || m->m_len < sizeof(struct mrtctl)) 269 error = EINVAL; 270 else 271 error = add_mrt(mtod(m, struct mrtctl *)); 272 break; 273 274 case DVMRP_DEL_MRT: 275 if (m == NULL || m->m_len < sizeof(struct in_addr)) 276 error = EINVAL; 277 else 278 error = del_mrt(mtod(m, struct in_addr *)); 279 break; 280 281 default: 282 error = EOPNOTSUPP; 283 break; 284 } 285 return (error); 286 } 287 288 /* 289 * Enable multicast routing 290 */ 291 static int 292 ip_mrouter_init(so) 293 register struct socket *so; 294 { 295 if (so->so_type != SOCK_RAW || 296 so->so_proto->pr_protocol != IPPROTO_IGMP) 297 return (EOPNOTSUPP); 298 299 if (ip_mrouter != NULL) 300 return (EADDRINUSE); 301 302 ip_mrouter = so; 303 304 return (0); 305 } 306 307 /* 308 * Disable multicast routing 309 */ 310 int 311 ip_mrouter_done() 312 { 313 register vifi_t vifi; 314 register int i; 315 register struct ifnet *ifp; 316 register int s; 317 struct ifreq ifr; 318 319 s = splnet(); 320 321 /* 322 * For each phyint in use, free its local group list and 323 * disable promiscuous reception of all IP multicasts. 324 */ 325 for (vifi = 0; vifi < numvifs; vifi++) { 326 if (viftable[vifi].v_lcl_addr.s_addr != 0 && 327 !(viftable[vifi].v_flags & VIFF_TUNNEL)) { 328 if (viftable[vifi].v_lcl_grps) 329 free(viftable[vifi].v_lcl_grps, M_MRTABLE); 330 satosin(&ifr.ifr_addr)->sin_family = AF_INET; 331 satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; 332 ifp = viftable[vifi].v_ifp; 333 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 334 } 335 } 336 bzero((caddr_t)viftable, sizeof(viftable)); 337 numvifs = 0; 338 339 /* 340 * Free any multicast route entries. 341 */ 342 for (i = 0; i < MRTHASHSIZ; i++) 343 if (mrttable[i]) 344 free(mrttable[i], M_MRTABLE); 345 bzero((caddr_t)mrttable, sizeof(mrttable)); 346 bzero((caddr_t)mrtsrchash, sizeof(mrtsrchash)); 347 348 ip_mrouter = NULL; 349 350 splx(s); 351 return (0); 352 } 353 354 /* 355 * Add a vif to the vif table 356 */ 357 static int 358 add_vif(vifcp) 359 register struct vifctl *vifcp; 360 { 361 register struct vif *vifp = viftable + vifcp->vifc_vifi; 362 register struct ifaddr *ifa; 363 register struct ifnet *ifp; 364 struct ifreq ifr; 365 register int error, s; 366 static struct sockaddr_in sin = { sizeof(sin), AF_INET }; 367 368 if (vifcp->vifc_vifi >= MAXVIFS) 369 return (EINVAL); 370 if (vifp->v_lcl_addr.s_addr != 0) 371 return (EADDRINUSE); 372 373 /* Find the interface with an address in AF_INET family */ 374 sin.sin_addr = vifcp->vifc_lcl_addr; 375 ifa = ifa_ifwithaddr((struct sockaddr *)&sin); 376 if (ifa == 0) 377 return (EADDRNOTAVAIL); 378 ifp = ifa->ifa_ifp; 379 380 if (vifcp->vifc_flags & VIFF_TUNNEL) { 381 if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { 382 /* 383 * An encapsulating tunnel is wanted. If we 384 * haven't done so already, put our decap routine 385 * in front of raw_input so we have a chance to 386 * decapsulate incoming packets. Then set the 387 * arrival 'interface' to be the decapsulator. 388 */ 389 if (encap_oldrawip == 0) { 390 extern struct protosw inetsw[]; 391 extern u_char ip_protox[]; 392 register int pr = ip_protox[ENCAP_PROTO]; 393 394 encap_oldrawip = inetsw[pr].pr_input; 395 inetsw[pr].pr_input = multiencap_decap; 396 for (s = 0; s < MAXVIFS; ++s) { 397 multicast_decap_if[s].if_name = 398 "mdecap"; 399 multicast_decap_if[s].if_unit = s; 400 } 401 } 402 ifp = &multicast_decap_if[vifcp->vifc_vifi]; 403 } else { 404 ifp = 0; 405 } 406 } else { 407 /* Make sure the interface supports multicast */ 408 if ((ifp->if_flags & IFF_MULTICAST) == 0) 409 return EOPNOTSUPP; 410 411 /* 412 * Enable promiscuous reception of all 413 * IP multicasts from the if 414 */ 415 ((struct sockaddr_in *)&ifr.ifr_addr)->sin_family = AF_INET; 416 ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr = 417 INADDR_ANY; 418 s = splnet(); 419 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); 420 splx(s); 421 if (error) 422 return error; 423 } 424 425 s = splnet(); 426 vifp->v_flags = vifcp->vifc_flags; 427 vifp->v_threshold = vifcp->vifc_threshold; 428 vifp->v_lcl_addr = vifcp->vifc_lcl_addr; 429 vifp->v_ifp = ifp; 430 vifp->v_rmt_addr = vifcp->vifc_rmt_addr; 431 splx(s); 432 433 /* Adjust numvifs up if the vifi is higher than numvifs */ 434 if (numvifs <= vifcp->vifc_vifi) 435 numvifs = vifcp->vifc_vifi + 1; 436 437 splx(s); 438 return (0); 439 } 440 441 /* 442 * Delete a vif from the vif table 443 */ 444 static int 445 del_vif(vifip) 446 register vifi_t *vifip; 447 { 448 register struct vif *vifp = viftable + *vifip; 449 register struct ifnet *ifp; 450 register int i, s; 451 struct ifreq ifr; 452 453 if (*vifip >= numvifs) 454 return (EINVAL); 455 if (vifp->v_lcl_addr.s_addr == 0) 456 return (EADDRNOTAVAIL); 457 458 s = splnet(); 459 460 if (!(vifp->v_flags & VIFF_TUNNEL)) { 461 if (vifp->v_lcl_grps) 462 free(vifp->v_lcl_grps, M_MRTABLE); 463 satosin(&ifr.ifr_addr)->sin_family = AF_INET; 464 satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY; 465 ifp = vifp->v_ifp; 466 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); 467 } 468 if (vifp == last_encap_vif) { 469 last_encap_vif = 0; 470 last_encap_src = 0; 471 } 472 bzero((caddr_t)vifp, sizeof (*vifp)); 473 474 /* Adjust numvifs down */ 475 for (i = numvifs - 1; i >= 0; i--) 476 if (viftable[i].v_lcl_addr.s_addr != 0) 477 break; 478 numvifs = i + 1; 479 480 splx(s); 481 return (0); 482 } 483 484 /* 485 * Add the multicast group in the lgrpctl to the list of local multicast 486 * group memberships associated with the vif indexed by gcp->lgc_vifi. 487 */ 488 static int 489 add_lgrp(gcp) 490 register struct lgrplctl *gcp; 491 { 492 register struct vif *vifp; 493 register int s; 494 495 if (gcp->lgc_vifi >= numvifs) 496 return (EINVAL); 497 498 vifp = viftable + gcp->lgc_vifi; 499 if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL)) 500 return (EADDRNOTAVAIL); 501 502 /* If not enough space in existing list, allocate a larger one */ 503 s = splnet(); 504 if (vifp->v_lcl_grps_n + 1 >= vifp->v_lcl_grps_max) { 505 register int num; 506 register struct in_addr *ip; 507 508 num = vifp->v_lcl_grps_max; 509 if (num <= 0) 510 num = 32; /* initial number */ 511 else 512 num += num; /* double last number */ 513 ip = (struct in_addr *)malloc(num * sizeof(*ip), 514 M_MRTABLE, M_NOWAIT); 515 if (ip == NULL) { 516 splx(s); 517 return (ENOBUFS); 518 } 519 520 bzero((caddr_t)ip, num * sizeof(*ip)); /* XXX paranoid */ 521 bcopy((caddr_t)vifp->v_lcl_grps, (caddr_t)ip, 522 vifp->v_lcl_grps_n * sizeof(*ip)); 523 524 vifp->v_lcl_grps_max = num; 525 if (vifp->v_lcl_grps) 526 free(vifp->v_lcl_grps, M_MRTABLE); 527 vifp->v_lcl_grps = ip; 528 } 529 530 vifp->v_lcl_grps[vifp->v_lcl_grps_n++] = gcp->lgc_gaddr; 531 532 if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group) 533 vifp->v_cached_result = 1; 534 535 splx(s); 536 return (0); 537 } 538 539 /* 540 * Delete the the local multicast group associated with the vif 541 * indexed by gcp->lgc_vifi. 542 */ 543 static int 544 del_lgrp(gcp) 545 register struct lgrplctl *gcp; 546 { 547 register struct vif *vifp; 548 register int i, error, s; 549 550 if (gcp->lgc_vifi >= numvifs) 551 return (EINVAL); 552 vifp = viftable + gcp->lgc_vifi; 553 if (vifp->v_lcl_addr.s_addr == 0 || (vifp->v_flags & VIFF_TUNNEL)) 554 return (EADDRNOTAVAIL); 555 556 s = splnet(); 557 558 if (gcp->lgc_gaddr.s_addr == vifp->v_cached_group) 559 vifp->v_cached_result = 0; 560 561 error = EADDRNOTAVAIL; 562 for (i = 0; i < vifp->v_lcl_grps_n; ++i) 563 if (same(&gcp->lgc_gaddr, &vifp->v_lcl_grps[i])) { 564 error = 0; 565 --vifp->v_lcl_grps_n; 566 for (; i < vifp->v_lcl_grps_n; ++i) 567 vifp->v_lcl_grps[i] = vifp->v_lcl_grps[i + 1]; 568 error = 0; 569 break; 570 } 571 572 splx(s); 573 return (error); 574 } 575 576 /* 577 * Return 1 if gaddr is a member of the local group list for vifp. 578 */ 579 static int 580 grplst_member(vifp, gaddr) 581 register struct vif *vifp; 582 struct in_addr gaddr; 583 { 584 register int i, s; 585 register u_int32_t addr; 586 587 mrtstat.mrts_grp_lookups++; 588 589 addr = gaddr.s_addr; 590 if (addr == vifp->v_cached_group) 591 return (vifp->v_cached_result); 592 593 mrtstat.mrts_grp_misses++; 594 595 for (i = 0; i < vifp->v_lcl_grps_n; ++i) 596 if (addr == vifp->v_lcl_grps[i].s_addr) { 597 s = splnet(); 598 vifp->v_cached_group = addr; 599 vifp->v_cached_result = 1; 600 splx(s); 601 return (1); 602 } 603 s = splnet(); 604 vifp->v_cached_group = addr; 605 vifp->v_cached_result = 0; 606 splx(s); 607 return (0); 608 } 609 610 /* 611 * Add an mrt entry 612 */ 613 static int 614 add_mrt(mrtcp) 615 register struct mrtctl *mrtcp; 616 { 617 struct mrt *rt; 618 u_int32_t hash; 619 int s; 620 621 if (rt = mrtfind(mrtcp->mrtc_origin.s_addr)) { 622 /* Just update the route */ 623 s = splnet(); 624 rt->mrt_parent = mrtcp->mrtc_parent; 625 VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children); 626 VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves); 627 splx(s); 628 return (0); 629 } 630 631 s = splnet(); 632 633 rt = (struct mrt *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); 634 if (rt == NULL) { 635 splx(s); 636 return (ENOBUFS); 637 } 638 639 /* 640 * insert new entry at head of hash chain 641 */ 642 rt->mrt_origin = mrtcp->mrtc_origin; 643 rt->mrt_originmask = mrtcp->mrtc_originmask; 644 rt->mrt_parent = mrtcp->mrtc_parent; 645 VIFM_COPY(mrtcp->mrtc_children, rt->mrt_children); 646 VIFM_COPY(mrtcp->mrtc_leaves, rt->mrt_leaves); 647 /* link into table */ 648 hash = nethash(mrtcp->mrtc_origin.s_addr); 649 rt->mrt_next = mrttable[hash]; 650 mrttable[hash] = rt; 651 652 splx(s); 653 return (0); 654 } 655 656 /* 657 * Delete an mrt entry 658 */ 659 static int 660 del_mrt(origin) 661 register struct in_addr *origin; 662 { 663 register struct mrt *rt, *prev_rt; 664 register u_int32_t hash = nethash(origin->s_addr); 665 register struct mrt **cmrt, **cmrtend; 666 register int s; 667 668 for (prev_rt = rt = mrttable[hash]; rt; prev_rt = rt, rt = rt->mrt_next) 669 if (origin->s_addr == rt->mrt_origin.s_addr) 670 break; 671 if (!rt) 672 return (ESRCH); 673 674 s = splnet(); 675 676 cmrt = mrtsrchash; 677 cmrtend = cmrt + MSRCHASHSIZ; 678 for ( ; cmrt < cmrtend; ++cmrt) 679 if (*cmrt == rt) 680 *cmrt = 0; 681 682 if (prev_rt == rt) 683 mrttable[hash] = rt->mrt_next; 684 else 685 prev_rt->mrt_next = rt->mrt_next; 686 free(rt, M_MRTABLE); 687 688 splx(s); 689 return (0); 690 } 691 692 /* 693 * IP multicast forwarding function. This function assumes that the packet 694 * pointed to by "ip" has arrived on (or is about to be sent to) the interface 695 * pointed to by "ifp", and the packet is to be relayed to other networks 696 * that have members of the packet's destination IP multicast group. 697 * 698 * The packet is returned unscathed to the caller, unless it is tunneled 699 * or erroneous, in which case a non-zero return value tells the caller to 700 * discard it. 701 */ 702 703 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ 704 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ 705 706 int 707 ip_mforward(m, ifp) 708 register struct mbuf *m; 709 register struct ifnet *ifp; 710 { 711 register struct ip *ip = mtod(m, struct ip *); 712 register struct mrt *rt; 713 register struct vif *vifp; 714 register int vifi; 715 register u_char *ipoptions; 716 u_int32_t tunnel_src; 717 718 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || 719 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) { 720 /* 721 * Packet arrived via a physical interface or was 722 * decapsulated off an encapsulating tunnel. 723 * If ifp is one of the multicast_decap_if[] 724 * dummy interfaces, we know it arrived on an 725 * encapsulating tunnel, and we set tunnel_src to 1. 726 * We can detect the dummy interface easily since 727 * it's output function is null. 728 */ 729 tunnel_src = (ifp->if_output == 0) ? 1 : 0; 730 } else { 731 /* 732 * Packet arrived through a tunnel. 733 * 734 * A tunneled packet has a single NOP option and a 735 * two-element loose-source-and-record-route (LSRR) 736 * option immediately following the fixed-size part of 737 * the IP header. At this point in processing, the IP 738 * header should contain the following IP addresses: 739 * 740 * original source - in the source address field 741 * destination group - in the destination address field 742 * remote tunnel end-point - in the first element of LSRR 743 * one of this host's addrs - in the second element of LSRR 744 * 745 * NOTE: RFC-1075 would have the original source and 746 * remote tunnel end-point addresses swapped. However, 747 * that could cause delivery of ICMP error messages to 748 * innocent applications on intermediate routing 749 * hosts! Therefore, we hereby change the spec. 750 */ 751 752 /* 753 * Verify that the tunnel options are well-formed. 754 */ 755 if (ipoptions[0] != IPOPT_NOP || 756 ipoptions[2] != 11 || /* LSRR option length */ 757 ipoptions[3] != 12 || /* LSRR address pointer */ 758 (tunnel_src = *(u_int32_t *)(&ipoptions[4])) == 0) { 759 mrtstat.mrts_bad_tunnel++; 760 return (1); 761 } 762 763 /* 764 * Delete the tunnel options from the packet. 765 */ 766 ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, 767 (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); 768 m->m_len -= TUNNEL_LEN; 769 ip->ip_len -= TUNNEL_LEN; 770 ip->ip_hl -= TUNNEL_LEN >> 2; 771 772 ifp = 0; 773 } 774 775 /* 776 * Don't forward a packet with time-to-live of zero or one, 777 * or a packet destined to a local-only group. 778 */ 779 if (ip->ip_ttl <= 1 || 780 ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) 781 return ((int)tunnel_src); 782 783 /* 784 * Don't forward if we don't have a route for the packet's origin. 785 */ 786 MRTFIND(ip->ip_src.s_addr, rt) 787 if (rt == NULL) { 788 mrtstat.mrts_no_route++; 789 return ((int)tunnel_src); 790 } 791 792 /* 793 * Don't forward if it didn't arrive from the 794 * parent vif for its origin. 795 * 796 * Notes: v_ifp is zero for src route tunnels, multicast_decap_if 797 * for encapsulated tunnels and a real ifnet for non-tunnels so 798 * the first part of the if catches wrong physical interface or 799 * tunnel type; v_rmt_addr is zero for non-tunneled packets so 800 * the 2nd part catches both packets that arrive via a tunnel 801 * that shouldn't and packets that arrive via the wrong tunnel. 802 */ 803 vifi = rt->mrt_parent; 804 if (viftable[vifi].v_ifp != ifp || 805 (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { 806 /* came in the wrong interface */ 807 ++mrtstat.mrts_wrong_if; 808 return (int)tunnel_src; 809 } 810 811 /* 812 * For each vif, decide if a copy of the packet should be forwarded. 813 * Forward if: 814 * - the ttl exceeds the vif's threshold AND 815 * - the vif is a child in the origin's route AND 816 * - ( the vif is not a leaf in the origin's route OR 817 * the destination group has members on the vif ) 818 * 819 * (This might be speeded up with some sort of cache -- someday.) 820 */ 821 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) { 822 if (ip->ip_ttl > vifp->v_threshold && 823 VIFM_ISSET(vifi, rt->mrt_children) && 824 (!VIFM_ISSET(vifi, rt->mrt_leaves) || 825 grplst_member(vifp, ip->ip_dst))) { 826 if (vifp->v_flags & VIFF_SRCRT) 827 srcrt_send(ip, vifp, m); 828 else if (vifp->v_flags & VIFF_TUNNEL) 829 encap_send(ip, vifp, m); 830 else 831 phyint_send(ip, vifp, m); 832 } 833 } 834 return ((int)tunnel_src); 835 } 836 837 static void 838 phyint_send(ip, vifp, m) 839 register struct ip *ip; 840 register struct vif *vifp; 841 register struct mbuf *m; 842 { 843 register struct mbuf *mb_copy; 844 register struct ip_moptions *imo; 845 register int error; 846 struct ip_moptions simo; 847 848 mb_copy = m_copy(m, 0, M_COPYALL); 849 if (mb_copy == NULL) 850 return; 851 852 imo = &simo; 853 imo->imo_multicast_ifp = vifp->v_ifp; 854 imo->imo_multicast_ttl = ip->ip_ttl - 1; 855 imo->imo_multicast_loop = 1; 856 857 error = ip_output(mb_copy, NULL, NULL, IP_FORWARDING, imo); 858 } 859 860 static void 861 srcrt_send(ip, vifp, m) 862 register struct ip *ip; 863 register struct vif *vifp; 864 register struct mbuf *m; 865 { 866 register struct mbuf *mb_copy, *mb_opts; 867 register struct ip *ip_copy; 868 register int error; 869 register u_char *cp; 870 871 /* 872 * Make sure that adding the tunnel options won't exceed the 873 * maximum allowed number of option bytes. 874 */ 875 if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { 876 mrtstat.mrts_cant_tunnel++; 877 return; 878 } 879 880 mb_copy = m_copy(m, 0, M_COPYALL); 881 if (mb_copy == NULL) 882 return; 883 ip_copy = mtod(mb_copy, struct ip *); 884 ip_copy->ip_ttl--; 885 ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ 886 /* 887 * Adjust the ip header length to account for the tunnel options. 888 */ 889 ip_copy->ip_hl += TUNNEL_LEN >> 2; 890 ip_copy->ip_len += TUNNEL_LEN; 891 MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); 892 if (mb_opts == NULL) { 893 m_freem(mb_copy); 894 return; 895 } 896 /* 897 * 'Delete' the base ip header from the mb_copy chain 898 */ 899 mb_copy->m_len -= IP_HDR_LEN; 900 mb_copy->m_data += IP_HDR_LEN; 901 /* 902 * Make mb_opts be the new head of the packet chain. 903 * Any options of the packet were left in the old packet chain head 904 */ 905 mb_opts->m_next = mb_copy; 906 mb_opts->m_len = IP_HDR_LEN + TUNNEL_LEN; 907 mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN; 908 mb_opts->m_pkthdr.rcvif = mb_copy->m_pkthdr.rcvif; 909 mb_opts->m_data += MSIZE - mb_opts->m_len; 910 /* 911 * Copy the base ip header from the mb_copy chain to the new head mbuf 912 */ 913 bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), IP_HDR_LEN); 914 /* 915 * Add the NOP and LSRR after the base ip header 916 */ 917 cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; 918 *cp++ = IPOPT_NOP; 919 *cp++ = IPOPT_LSRR; 920 *cp++ = 11; /* LSRR option length */ 921 *cp++ = 8; /* LSSR pointer to second element */ 922 *(u_int32_t*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ 923 cp += 4; 924 *(u_int32_t*)cp = ip->ip_dst.s_addr; /* destination group */ 925 926 error = ip_output(mb_opts, NULL, NULL, IP_FORWARDING, NULL); 927 } 928 929 static void 930 encap_send(ip, vifp, m) 931 register struct ip *ip; 932 register struct vif *vifp; 933 register struct mbuf *m; 934 { 935 register struct mbuf *mb_copy; 936 register struct ip *ip_copy; 937 register int i, len = ip->ip_len; 938 939 /* 940 * copy the old packet & pullup it's IP header into the 941 * new mbuf so we can modify it. Try to fill the new 942 * mbuf since if we don't the ethernet driver will. 943 */ 944 MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); 945 if (mb_copy == NULL) 946 return; 947 mb_copy->m_data += 16; 948 mb_copy->m_len = sizeof(multicast_encap_iphdr); 949 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { 950 m_freem(mb_copy); 951 return; 952 } 953 i = MHLEN - 16; 954 if (i > len) 955 i = len; 956 mb_copy = m_pullup(mb_copy, i); 957 if (mb_copy == NULL) 958 return; 959 960 /* 961 * fill in the encapsulating IP header. 962 */ 963 ip_copy = mtod(mb_copy, struct ip *); 964 *ip_copy = multicast_encap_iphdr; 965 ip_copy->ip_id = htons(ip_id++); 966 ip_copy->ip_len += len; 967 ip_copy->ip_src = vifp->v_lcl_addr; 968 ip_copy->ip_dst = vifp->v_rmt_addr; 969 970 /* 971 * turn the encapsulated IP header back into a valid one. 972 */ 973 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); 974 --ip->ip_ttl; 975 HTONS(ip->ip_len); 976 HTONS(ip->ip_off); 977 ip->ip_sum = 0; 978 #if defined(LBL) && !defined(ultrix) && !defined(i386) 979 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); 980 #else 981 mb_copy->m_data += sizeof(multicast_encap_iphdr); 982 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); 983 mb_copy->m_data -= sizeof(multicast_encap_iphdr); 984 mb_copy->m_pkthdr.len = m->m_pkthdr.len + sizeof(multicast_encap_iphdr); 985 mb_copy->m_pkthdr.rcvif = m->m_pkthdr.rcvif; 986 #endif 987 ip_output(mb_copy, (struct mbuf *)0, (struct route *)0, 988 IP_FORWARDING, (struct ip_moptions *)0); 989 } 990 991 /* 992 * De-encapsulate a packet and feed it back through ip input (this 993 * routine is called whenever IP gets a packet with proto type 994 * ENCAP_PROTO and a local destination address). 995 */ 996 static void 997 multiencap_decap(m, hlen) 998 register struct mbuf *m; 999 int hlen; 1000 { 1001 struct ifnet *ifp; 1002 register struct ip *ip = mtod(m, struct ip *); 1003 register int s; 1004 register struct ifqueue *ifq; 1005 register struct vif *vifp; 1006 1007 if (ip->ip_p != ENCAP_PROTO) { 1008 (*encap_oldrawip)(m, hlen); 1009 return; 1010 } 1011 /* 1012 * dump the packet if it's not to a multicast destination or if 1013 * we don't have an encapsulating tunnel with the source. 1014 * Note: This code assumes that the remote site IP address 1015 * uniquely identifies the tunnel (i.e., that this site has 1016 * at most one tunnel with the remote site). 1017 */ 1018 if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { 1019 ++mrtstat.mrts_bad_tunnel; 1020 m_freem(m); 1021 return; 1022 } 1023 if (ip->ip_src.s_addr != last_encap_src) { 1024 register struct vif *vife; 1025 1026 vifp = viftable; 1027 vife = vifp + numvifs; 1028 last_encap_src = ip->ip_src.s_addr; 1029 last_encap_vif = 0; 1030 for ( ; vifp < vife; ++vifp) 1031 if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { 1032 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) 1033 == VIFF_TUNNEL) 1034 last_encap_vif = vifp; 1035 break; 1036 } 1037 } 1038 if ((vifp = last_encap_vif) == 0) { 1039 mrtstat.mrts_cant_tunnel++; /*XXX*/ 1040 m_freem(m); 1041 return; 1042 } 1043 ifp = vifp->v_ifp; 1044 m->m_data += hlen; 1045 m->m_len -= hlen; 1046 m->m_pkthdr.rcvif = ifp; 1047 m->m_pkthdr.len -= hlen; 1048 ifq = &ipintrq; 1049 s = splimp(); 1050 if (IF_QFULL(ifq)) { 1051 IF_DROP(ifq); 1052 m_freem(m); 1053 } else { 1054 IF_ENQUEUE(ifq, m); 1055 /* 1056 * normally we would need a "schednetisr(NETISR_IP)" 1057 * here but we were called by ip_input and it is going 1058 * to loop back & try to dequeue the packet we just 1059 * queued as soon as we return so we avoid the 1060 * unnecessary software interrrupt. 1061 */ 1062 } 1063 splx(s); 1064 } 1065 #endif 1066