1 /* $OpenBSD: if_gre.c,v 1.44 2008/06/26 05:42:20 ray Exp $ */ 2 /* $NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */ 3 4 /* 5 * Copyright (c) 1998 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Heiko W.Rupp <hwr@pilhuhn.de> 10 * 11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Encapsulate L3 protocols into IP, per RFC 1701 and 1702. 37 * See gre(4) for more details. 38 * Also supported: IP in IP encapsulation (proto 55) per RFC 2004. 39 */ 40 41 #include "gre.h" 42 #if NGRE > 0 43 44 #include "bpfilter.h" 45 46 #include <sys/param.h> 47 #include <sys/proc.h> 48 #include <sys/mbuf.h> 49 #include <sys/socket.h> 50 #include <sys/sockio.h> 51 #include <sys/kernel.h> 52 #include <sys/systm.h> 53 54 #include <net/if.h> 55 #include <net/if_types.h> 56 #include <net/netisr.h> 57 #include <net/route.h> 58 59 #ifdef INET 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/in_var.h> 63 #include <netinet/ip.h> 64 #include <netinet/ip_var.h> 65 #include <netinet/if_ether.h> 66 #else 67 #error "if_gre used without inet" 68 #endif 69 70 #ifdef NETATALK 71 #include <netatalk/at.h> 72 #include <netatalk/at_var.h> 73 #include <netatalk/at_extern.h> 74 #endif 75 76 #if NBPFILTER > 0 77 #include <net/bpf.h> 78 #endif 79 80 #include <net/if_gre.h> 81 82 #ifndef GRE_RECURSION_LIMIT 83 #define GRE_RECURSION_LIMIT 3 /* How many levels of recursion allowed */ 84 #endif /* GRE_RECURSION_LIMIT */ 85 86 /* 87 * It is not easy to calculate the right value for a GRE MTU. 88 * We leave this task to the admin and use the same default that 89 * other vendors use. 90 */ 91 #define GREMTU 1476 92 93 int gre_clone_create(struct if_clone *, int); 94 int gre_clone_destroy(struct ifnet *); 95 96 struct gre_softc_head gre_softc_list; 97 struct if_clone gre_cloner = 98 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy); 99 100 /* 101 * We can control the acceptance of GRE and MobileIP packets by 102 * altering the sysctl net.inet.gre.allow and net.inet.mobileip.allow values 103 * respectively. Zero means drop them, all else is acceptance. We can also 104 * control acceptance of WCCPv1-style GRE packets through the 105 * net.inet.gre.wccp value, but be aware it depends upon normal GRE being 106 * allowed as well. 107 * 108 */ 109 int gre_allow = 0; 110 int gre_wccp = 0; 111 int ip_mobile_allow = 0; 112 113 static void gre_compute_route(struct gre_softc *sc); 114 115 void 116 greattach(int n) 117 { 118 LIST_INIT(&gre_softc_list); 119 if_clone_attach(&gre_cloner); 120 } 121 122 int 123 gre_clone_create(struct if_clone *ifc, int unit) 124 { 125 struct gre_softc *sc; 126 int s; 127 128 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 129 if (!sc) 130 return (ENOMEM); 131 snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d", 132 ifc->ifc_name, unit); 133 sc->sc_if.if_softc = sc; 134 sc->sc_if.if_type = IFT_TUNNEL; 135 sc->sc_if.if_addrlen = 0; 136 sc->sc_if.if_hdrlen = 24; /* IP + GRE */ 137 sc->sc_if.if_mtu = GREMTU; 138 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 139 sc->sc_if.if_output = gre_output; 140 sc->sc_if.if_ioctl = gre_ioctl; 141 sc->sc_if.if_collisions = 0; 142 sc->sc_if.if_ierrors = 0; 143 sc->sc_if.if_oerrors = 0; 144 sc->sc_if.if_ipackets = 0; 145 sc->sc_if.if_opackets = 0; 146 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; 147 sc->g_proto = IPPROTO_GRE; 148 sc->sc_if.if_flags |= IFF_LINK0; 149 150 if_attach(&sc->sc_if); 151 if_alloc_sadl(&sc->sc_if); 152 153 #if NBPFILTER > 0 154 bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_NULL, 155 sizeof(u_int32_t)); 156 #endif 157 s = splnet(); 158 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list); 159 splx(s); 160 161 return (0); 162 } 163 164 int 165 gre_clone_destroy(struct ifnet *ifp) 166 { 167 struct gre_softc *sc = ifp->if_softc; 168 int s; 169 170 s = splnet(); 171 LIST_REMOVE(sc, sc_list); 172 splx(s); 173 174 if_detach(ifp); 175 176 free(sc, M_DEVBUF); 177 return (0); 178 } 179 180 /* 181 * The output routine. Takes a packet and encapsulates it in the protocol 182 * given by sc->g_proto. See also RFC 1701 and RFC 2004. 183 */ 184 185 int 186 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 187 struct rtentry *rt) 188 { 189 int error = 0; 190 struct gre_softc *sc = (struct gre_softc *) (ifp->if_softc); 191 struct greip *gh = NULL; 192 struct ip *inp = NULL; 193 u_int8_t ip_tos = 0; 194 u_int16_t etype = 0; 195 struct mobile_h mob_h; 196 struct m_tag *mtag; 197 198 if ((ifp->if_flags & IFF_UP) == 0 || 199 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { 200 m_freem(m); 201 error = ENETDOWN; 202 goto end; 203 } 204 205 /* Try to limit infinite recursion through misconfiguration. */ 206 for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; 207 mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { 208 if (!bcmp((caddr_t)(mtag + 1), &ifp, sizeof(struct ifnet *))) { 209 IF_DROP(&ifp->if_snd); 210 m_freem(m); 211 error = EIO; 212 goto end; 213 } 214 } 215 216 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(struct ifnet *), M_NOWAIT); 217 if (mtag == NULL) { 218 IF_DROP(&ifp->if_snd); 219 m_freem(m); 220 error = ENOBUFS; 221 goto end; 222 } 223 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 224 m_tag_prepend(m, mtag); 225 226 m->m_flags &= ~(M_BCAST|M_MCAST); 227 228 #if NBPFILTER >0 229 if (ifp->if_bpf) 230 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m, BPF_DIRECTION_OUT); 231 #endif 232 233 if (sc->g_proto == IPPROTO_MOBILE) { 234 if (ip_mobile_allow == 0) { 235 IF_DROP(&ifp->if_snd); 236 m_freem(m); 237 error = EACCES; 238 goto end; 239 } 240 241 if (dst->sa_family == AF_INET) { 242 struct mbuf *m0; 243 int msiz; 244 245 /* 246 * Make sure the complete IP header (with options) 247 * is in the first mbuf. 248 */ 249 if (m->m_len < sizeof(struct ip)) { 250 m = m_pullup(m, sizeof(struct ip)); 251 if (m == NULL) { 252 IF_DROP(&ifp->if_snd); 253 error = ENOBUFS; 254 goto end; 255 } else 256 inp = mtod(m, struct ip *); 257 258 if (m->m_len < inp->ip_hl << 2) { 259 m = m_pullup(m, inp->ip_hl << 2); 260 if (m == NULL) { 261 IF_DROP(&ifp->if_snd); 262 error = ENOBUFS; 263 goto end; 264 } 265 } 266 } 267 268 inp = mtod(m, struct ip *); 269 270 bzero(&mob_h, MOB_H_SIZ_L); 271 mob_h.proto = (inp->ip_p) << 8; 272 mob_h.odst = inp->ip_dst.s_addr; 273 inp->ip_dst.s_addr = sc->g_dst.s_addr; 274 275 /* 276 * If the packet comes from our host, we only change 277 * the destination address in the IP header. 278 * Otherwise we need to save and change the source. 279 */ 280 if (inp->ip_src.s_addr == sc->g_src.s_addr) { 281 msiz = MOB_H_SIZ_S; 282 } else { 283 mob_h.proto |= MOB_H_SBIT; 284 mob_h.osrc = inp->ip_src.s_addr; 285 inp->ip_src.s_addr = sc->g_src.s_addr; 286 msiz = MOB_H_SIZ_L; 287 } 288 289 HTONS(mob_h.proto); 290 mob_h.hcrc = gre_in_cksum((u_int16_t *) &mob_h, msiz); 291 292 /* Squeeze in the mobility header */ 293 if ((m->m_data - msiz) < m->m_pktdat) { 294 /* Need new mbuf */ 295 MGETHDR(m0, M_DONTWAIT, MT_HEADER); 296 if (m0 == NULL) { 297 IF_DROP(&ifp->if_snd); 298 m_freem(m); 299 error = ENOBUFS; 300 goto end; 301 } 302 M_MOVE_HDR(m0, m); 303 304 m0->m_len = msiz + (inp->ip_hl << 2); 305 m0->m_data += max_linkhdr; 306 m0->m_pkthdr.len = m->m_pkthdr.len + msiz; 307 m->m_data += inp->ip_hl << 2; 308 m->m_len -= inp->ip_hl << 2; 309 310 bcopy((caddr_t) inp, mtod(m0, caddr_t), 311 sizeof(struct ip)); 312 313 m0->m_next = m; 314 m = m0; 315 } else { /* we have some space left in the old one */ 316 m->m_data -= msiz; 317 m->m_len += msiz; 318 m->m_pkthdr.len += msiz; 319 bcopy(inp, mtod(m, caddr_t), 320 inp->ip_hl << 2); 321 } 322 323 /* Copy Mobility header */ 324 inp = mtod(m, struct ip *); 325 bcopy(&mob_h, (caddr_t)(inp + 1), (unsigned) msiz); 326 inp->ip_len = htons(ntohs(inp->ip_len) + msiz); 327 } else { /* AF_INET */ 328 IF_DROP(&ifp->if_snd); 329 m_freem(m); 330 error = EINVAL; 331 goto end; 332 } 333 } else if (sc->g_proto == IPPROTO_GRE) { 334 if (gre_allow == 0) { 335 IF_DROP(&ifp->if_snd); 336 m_freem(m); 337 error = EACCES; 338 goto end; 339 } 340 341 switch(dst->sa_family) { 342 case AF_INET: 343 if (m->m_len < sizeof(struct ip)) { 344 m = m_pullup(m, sizeof(struct ip)); 345 if (m == NULL) { 346 IF_DROP(&ifp->if_snd); 347 error = ENOBUFS; 348 goto end; 349 } 350 } 351 352 inp = mtod(m, struct ip *); 353 ip_tos = inp->ip_tos; 354 etype = ETHERTYPE_IP; 355 break; 356 #ifdef NETATALK 357 case AF_APPLETALK: 358 etype = ETHERTYPE_AT; 359 break; 360 #endif 361 #ifdef INET6 362 case AF_INET6: 363 etype = ETHERTYPE_IPV6; 364 break; 365 #endif 366 default: 367 IF_DROP(&ifp->if_snd); 368 m_freem(m); 369 error = EAFNOSUPPORT; 370 goto end; 371 } 372 373 M_PREPEND(m, sizeof(struct greip), M_DONTWAIT); 374 } else { 375 IF_DROP(&ifp->if_snd); 376 m_freem(m); 377 error = EINVAL; 378 goto end; 379 } 380 381 if (m == NULL) { 382 IF_DROP(&ifp->if_snd); 383 error = ENOBUFS; 384 goto end; 385 } 386 387 gh = mtod(m, struct greip *); 388 if (sc->g_proto == IPPROTO_GRE) { 389 /* We don't support any GRE flags for now */ 390 391 bzero((void *) &gh->gi_g, sizeof(struct gre_h)); 392 gh->gi_ptype = htons(etype); 393 } 394 395 gh->gi_pr = sc->g_proto; 396 if (sc->g_proto != IPPROTO_MOBILE) { 397 gh->gi_src = sc->g_src; 398 gh->gi_dst = sc->g_dst; 399 ((struct ip *) gh)->ip_hl = (sizeof(struct ip)) >> 2; 400 ((struct ip *) gh)->ip_ttl = ip_defttl; 401 ((struct ip *) gh)->ip_tos = ip_tos; 402 gh->gi_len = htons(m->m_pkthdr.len); 403 } 404 405 ifp->if_opackets++; 406 ifp->if_obytes += m->m_pkthdr.len; 407 408 /* Send it off */ 409 error = ip_output(m, (void *)NULL, &sc->route, 0, (void *)NULL, (void *)NULL); 410 end: 411 if (error) 412 ifp->if_oerrors++; 413 return (error); 414 } 415 416 int 417 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 418 { 419 420 struct ifreq *ifr = (struct ifreq *) data; 421 struct if_laddrreq *lifr = (struct if_laddrreq *)data; 422 struct gre_softc *sc = ifp->if_softc; 423 int s; 424 struct sockaddr_in si; 425 struct sockaddr *sa = NULL; 426 int error = 0; 427 struct proc *prc = curproc; /* XXX */ 428 429 s = splnet(); 430 switch(cmd) { 431 case SIOCSIFADDR: 432 ifp->if_flags |= IFF_UP; 433 break; 434 case SIOCSIFDSTADDR: 435 break; 436 case SIOCSIFFLAGS: 437 if ((ifr->ifr_flags & IFF_LINK0) != 0) 438 sc->g_proto = IPPROTO_GRE; 439 else 440 sc->g_proto = IPPROTO_MOBILE; 441 break; 442 case SIOCSIFMTU: 443 if (ifr->ifr_mtu < 576) { 444 error = EINVAL; 445 break; 446 } 447 ifp->if_mtu = ifr->ifr_mtu; 448 break; 449 case SIOCGIFMTU: 450 ifr->ifr_mtu = sc->sc_if.if_mtu; 451 break; 452 case SIOCADDMULTI: 453 case SIOCDELMULTI: 454 if (ifr == 0) { 455 error = EAFNOSUPPORT; 456 break; 457 } 458 switch (ifr->ifr_addr.sa_family) { 459 #ifdef INET 460 case AF_INET: 461 break; 462 #endif 463 #ifdef INET6 464 case AF_INET6: 465 break; 466 #endif 467 default: 468 error = EAFNOSUPPORT; 469 break; 470 } 471 break; 472 case GRESPROTO: 473 /* Check for superuser */ 474 if ((error = suser(prc, 0)) != 0) 475 break; 476 477 sc->g_proto = ifr->ifr_flags; 478 switch (sc->g_proto) { 479 case IPPROTO_GRE: 480 ifp->if_flags |= IFF_LINK0; 481 break; 482 case IPPROTO_MOBILE: 483 ifp->if_flags &= ~IFF_LINK0; 484 break; 485 default: 486 error = EPROTONOSUPPORT; 487 break; 488 } 489 break; 490 case GREGPROTO: 491 ifr->ifr_flags = sc->g_proto; 492 break; 493 case GRESADDRS: 494 case GRESADDRD: 495 /* Check for superuser */ 496 if ((error = suser(prc, 0)) != 0) 497 break; 498 499 /* 500 * set tunnel endpoints, compute a less specific route 501 * to the remote end and mark if as up 502 */ 503 sa = &ifr->ifr_addr; 504 if (cmd == GRESADDRS ) 505 sc->g_src = (satosin(sa))->sin_addr; 506 if (cmd == GRESADDRD ) 507 sc->g_dst = (satosin(sa))->sin_addr; 508 recompute: 509 if ((sc->g_src.s_addr != INADDR_ANY) && 510 (sc->g_dst.s_addr != INADDR_ANY)) { 511 if (sc->route.ro_rt != 0) { 512 /* free old route */ 513 RTFREE(sc->route.ro_rt); 514 sc->route.ro_rt = (struct rtentry *) 0; 515 } 516 gre_compute_route(sc); 517 ifp->if_flags |= IFF_UP; 518 } 519 break; 520 case GREGADDRS: 521 bzero(&si, sizeof(si)); 522 si.sin_family = AF_INET; 523 si.sin_len = sizeof(struct sockaddr_in); 524 si.sin_addr.s_addr = sc->g_src.s_addr; 525 sa = sintosa(&si); 526 ifr->ifr_addr = *sa; 527 break; 528 case GREGADDRD: 529 bzero(&si, sizeof(si)); 530 si.sin_family = AF_INET; 531 si.sin_len = sizeof(struct sockaddr_in); 532 si.sin_addr.s_addr = sc->g_dst.s_addr; 533 sa = sintosa(&si); 534 ifr->ifr_addr = *sa; 535 break; 536 case SIOCSLIFPHYADDR: 537 if ((error = suser(prc, 0)) != 0) 538 break; 539 if (lifr->addr.ss_family != AF_INET || 540 lifr->dstaddr.ss_family != AF_INET) { 541 error = EAFNOSUPPORT; 542 break; 543 } 544 if (lifr->addr.ss_len != sizeof(si) || 545 lifr->dstaddr.ss_len != sizeof(si)) { 546 error = EINVAL; 547 break; 548 } 549 sc->g_src = (satosin((struct sockadrr *)&lifr->addr))->sin_addr; 550 sc->g_dst = 551 (satosin((struct sockadrr *)&lifr->dstaddr))->sin_addr; 552 goto recompute; 553 case SIOCDIFPHYADDR: 554 if ((error = suser(prc, 0)) != 0) 555 break; 556 sc->g_src.s_addr = INADDR_ANY; 557 sc->g_dst.s_addr = INADDR_ANY; 558 break; 559 case SIOCGLIFPHYADDR: 560 if (sc->g_src.s_addr == INADDR_ANY || 561 sc->g_dst.s_addr == INADDR_ANY) { 562 error = EADDRNOTAVAIL; 563 break; 564 } 565 bzero(&si, sizeof(si)); 566 si.sin_family = AF_INET; 567 si.sin_len = sizeof(struct sockaddr_in); 568 si.sin_addr.s_addr = sc->g_src.s_addr; 569 memcpy(&lifr->addr, &si, sizeof(si)); 570 si.sin_addr.s_addr = sc->g_dst.s_addr; 571 memcpy(&lifr->dstaddr, &si, sizeof(si)); 572 break; 573 default: 574 error = ENOTTY; 575 } 576 577 splx(s); 578 return (error); 579 } 580 581 /* 582 * computes a route to our destination that is not the one 583 * which would be taken by ip_output(), as this one will loop back to 584 * us. If the interface is p2p as a--->b, then a routing entry exists 585 * If we now send a packet to b (e.g. ping b), this will come down here 586 * gets src=a, dst=b tacked on and would from ip_output() sent back to 587 * if_gre. 588 * Goal here is to compute a route to b that is less specific than 589 * a-->b. We know that this one exists as in normal operation we have 590 * at least a default route which matches. 591 */ 592 593 static void 594 gre_compute_route(struct gre_softc *sc) 595 { 596 struct route *ro; 597 u_int32_t a, b, c; 598 599 ro = &sc->route; 600 601 bzero(ro, sizeof(struct route)); 602 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sc->g_dst; 603 ro->ro_dst.sa_family = AF_INET; 604 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 605 606 /* 607 * toggle last bit, so our interface is not found, but a less 608 * specific route. I'd rather like to specify a shorter mask, 609 * but this is not possible. Should work though. XXX 610 * there is a simpler way ... 611 */ 612 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) { 613 a = ntohl(sc->g_dst.s_addr); 614 b = a & 0x01; 615 c = a & 0xfffffffe; 616 b = b ^ 0x01; 617 a = b | c; 618 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr.s_addr = htonl(a); 619 } 620 621 rtalloc(ro); 622 if (ro->ro_rt == 0) 623 return; 624 625 /* 626 * Check whether we just created a loop. An even more paranoid 627 * check would be against all GRE interfaces, but that would 628 * not allow people to link GRE tunnels. 629 */ 630 if (ro->ro_rt->rt_ifp == &sc->sc_if) { 631 RTFREE(ro->ro_rt); 632 ro->ro_rt = (struct rtentry *) 0; 633 return; 634 } 635 636 /* 637 * now change it back - else ip_output will just drop 638 * the route and search one to this interface ... 639 */ 640 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) 641 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sc->g_dst; 642 } 643 644 /* 645 * do a checksum of a buffer - much like in_cksum, which operates on 646 * mbufs. 647 */ 648 u_int16_t 649 gre_in_cksum(u_int16_t *p, u_int len) 650 { 651 u_int32_t sum = 0; 652 int nwords = len >> 1; 653 654 while (nwords-- != 0) 655 sum += *p++; 656 657 if (len & 1) { 658 union { 659 u_short w; 660 u_char c[2]; 661 } u; 662 u.c[0] = *(u_char *) p; 663 u.c[1] = 0; 664 sum += u.w; 665 } 666 667 /* end-around-carry */ 668 sum = (sum >> 16) + (sum & 0xffff); 669 sum += (sum >> 16); 670 return (~sum); 671 } 672 #endif 673