1 /* $OpenBSD: if_gre.c,v 1.45 2009/06/02 17:10:23 henning Exp $ */ 2 /* $NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */ 3 4 /* 5 * Copyright (c) 1998 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Heiko W.Rupp <hwr@pilhuhn.de> 10 * 11 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * Encapsulate L3 protocols into IP, per RFC 1701 and 1702. 37 * See gre(4) for more details. 38 * Also supported: IP in IP encapsulation (proto 55) per RFC 2004. 39 */ 40 41 #include "gre.h" 42 #if NGRE > 0 43 44 #include "bpfilter.h" 45 #include "pf.h" 46 47 #include <sys/param.h> 48 #include <sys/proc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/sockio.h> 52 #include <sys/kernel.h> 53 #include <sys/systm.h> 54 55 #include <net/if.h> 56 #include <net/if_types.h> 57 #include <net/netisr.h> 58 #include <net/route.h> 59 60 #ifdef INET 61 #include <netinet/in.h> 62 #include <netinet/in_systm.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/if_ether.h> 67 #else 68 #error "if_gre used without inet" 69 #endif 70 71 #ifdef NETATALK 72 #include <netatalk/at.h> 73 #include <netatalk/at_var.h> 74 #include <netatalk/at_extern.h> 75 #endif 76 77 #if NBPFILTER > 0 78 #include <net/bpf.h> 79 #endif 80 81 #if NPF > 0 82 #include <net/pfvar.h> 83 #endif 84 85 #include <net/if_gre.h> 86 87 #ifndef GRE_RECURSION_LIMIT 88 #define GRE_RECURSION_LIMIT 3 /* How many levels of recursion allowed */ 89 #endif /* GRE_RECURSION_LIMIT */ 90 91 /* 92 * It is not easy to calculate the right value for a GRE MTU. 93 * We leave this task to the admin and use the same default that 94 * other vendors use. 95 */ 96 #define GREMTU 1476 97 98 int gre_clone_create(struct if_clone *, int); 99 int gre_clone_destroy(struct ifnet *); 100 101 struct gre_softc_head gre_softc_list; 102 struct if_clone gre_cloner = 103 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy); 104 105 /* 106 * We can control the acceptance of GRE and MobileIP packets by 107 * altering the sysctl net.inet.gre.allow and net.inet.mobileip.allow values 108 * respectively. Zero means drop them, all else is acceptance. We can also 109 * control acceptance of WCCPv1-style GRE packets through the 110 * net.inet.gre.wccp value, but be aware it depends upon normal GRE being 111 * allowed as well. 112 * 113 */ 114 int gre_allow = 0; 115 int gre_wccp = 0; 116 int ip_mobile_allow = 0; 117 118 static void gre_compute_route(struct gre_softc *sc); 119 120 void 121 greattach(int n) 122 { 123 LIST_INIT(&gre_softc_list); 124 if_clone_attach(&gre_cloner); 125 } 126 127 int 128 gre_clone_create(struct if_clone *ifc, int unit) 129 { 130 struct gre_softc *sc; 131 int s; 132 133 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 134 if (!sc) 135 return (ENOMEM); 136 snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d", 137 ifc->ifc_name, unit); 138 sc->sc_if.if_softc = sc; 139 sc->sc_if.if_type = IFT_TUNNEL; 140 sc->sc_if.if_addrlen = 0; 141 sc->sc_if.if_hdrlen = 24; /* IP + GRE */ 142 sc->sc_if.if_mtu = GREMTU; 143 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 144 sc->sc_if.if_output = gre_output; 145 sc->sc_if.if_ioctl = gre_ioctl; 146 sc->sc_if.if_collisions = 0; 147 sc->sc_if.if_ierrors = 0; 148 sc->sc_if.if_oerrors = 0; 149 sc->sc_if.if_ipackets = 0; 150 sc->sc_if.if_opackets = 0; 151 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; 152 sc->g_proto = IPPROTO_GRE; 153 sc->sc_if.if_flags |= IFF_LINK0; 154 155 if_attach(&sc->sc_if); 156 if_alloc_sadl(&sc->sc_if); 157 158 #if NBPFILTER > 0 159 bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_NULL, 160 sizeof(u_int32_t)); 161 #endif 162 s = splnet(); 163 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list); 164 splx(s); 165 166 return (0); 167 } 168 169 int 170 gre_clone_destroy(struct ifnet *ifp) 171 { 172 struct gre_softc *sc = ifp->if_softc; 173 int s; 174 175 s = splnet(); 176 LIST_REMOVE(sc, sc_list); 177 splx(s); 178 179 if_detach(ifp); 180 181 free(sc, M_DEVBUF); 182 return (0); 183 } 184 185 /* 186 * The output routine. Takes a packet and encapsulates it in the protocol 187 * given by sc->g_proto. See also RFC 1701 and RFC 2004. 188 */ 189 190 int 191 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 192 struct rtentry *rt) 193 { 194 int error = 0; 195 struct gre_softc *sc = (struct gre_softc *) (ifp->if_softc); 196 struct greip *gh = NULL; 197 struct ip *inp = NULL; 198 u_int8_t ip_tos = 0; 199 u_int16_t etype = 0; 200 struct mobile_h mob_h; 201 struct m_tag *mtag; 202 203 if ((ifp->if_flags & IFF_UP) == 0 || 204 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { 205 m_freem(m); 206 error = ENETDOWN; 207 goto end; 208 } 209 210 /* Try to limit infinite recursion through misconfiguration. */ 211 for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; 212 mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { 213 if (!bcmp((caddr_t)(mtag + 1), &ifp, sizeof(struct ifnet *))) { 214 IF_DROP(&ifp->if_snd); 215 m_freem(m); 216 error = EIO; 217 goto end; 218 } 219 } 220 221 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(struct ifnet *), M_NOWAIT); 222 if (mtag == NULL) { 223 IF_DROP(&ifp->if_snd); 224 m_freem(m); 225 error = ENOBUFS; 226 goto end; 227 } 228 bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 229 m_tag_prepend(m, mtag); 230 231 m->m_flags &= ~(M_BCAST|M_MCAST); 232 233 #if NBPFILTER >0 234 if (ifp->if_bpf) 235 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m, BPF_DIRECTION_OUT); 236 #endif 237 238 if (sc->g_proto == IPPROTO_MOBILE) { 239 if (ip_mobile_allow == 0) { 240 IF_DROP(&ifp->if_snd); 241 m_freem(m); 242 error = EACCES; 243 goto end; 244 } 245 246 if (dst->sa_family == AF_INET) { 247 struct mbuf *m0; 248 int msiz; 249 250 /* 251 * Make sure the complete IP header (with options) 252 * is in the first mbuf. 253 */ 254 if (m->m_len < sizeof(struct ip)) { 255 m = m_pullup(m, sizeof(struct ip)); 256 if (m == NULL) { 257 IF_DROP(&ifp->if_snd); 258 error = ENOBUFS; 259 goto end; 260 } else 261 inp = mtod(m, struct ip *); 262 263 if (m->m_len < inp->ip_hl << 2) { 264 m = m_pullup(m, inp->ip_hl << 2); 265 if (m == NULL) { 266 IF_DROP(&ifp->if_snd); 267 error = ENOBUFS; 268 goto end; 269 } 270 } 271 } 272 273 inp = mtod(m, struct ip *); 274 275 bzero(&mob_h, MOB_H_SIZ_L); 276 mob_h.proto = (inp->ip_p) << 8; 277 mob_h.odst = inp->ip_dst.s_addr; 278 inp->ip_dst.s_addr = sc->g_dst.s_addr; 279 280 /* 281 * If the packet comes from our host, we only change 282 * the destination address in the IP header. 283 * Otherwise we need to save and change the source. 284 */ 285 if (inp->ip_src.s_addr == sc->g_src.s_addr) { 286 msiz = MOB_H_SIZ_S; 287 } else { 288 mob_h.proto |= MOB_H_SBIT; 289 mob_h.osrc = inp->ip_src.s_addr; 290 inp->ip_src.s_addr = sc->g_src.s_addr; 291 msiz = MOB_H_SIZ_L; 292 } 293 294 HTONS(mob_h.proto); 295 mob_h.hcrc = gre_in_cksum((u_int16_t *) &mob_h, msiz); 296 297 /* Squeeze in the mobility header */ 298 if ((m->m_data - msiz) < m->m_pktdat) { 299 /* Need new mbuf */ 300 MGETHDR(m0, M_DONTWAIT, MT_HEADER); 301 if (m0 == NULL) { 302 IF_DROP(&ifp->if_snd); 303 m_freem(m); 304 error = ENOBUFS; 305 goto end; 306 } 307 M_MOVE_HDR(m0, m); 308 309 m0->m_len = msiz + (inp->ip_hl << 2); 310 m0->m_data += max_linkhdr; 311 m0->m_pkthdr.len = m->m_pkthdr.len + msiz; 312 m->m_data += inp->ip_hl << 2; 313 m->m_len -= inp->ip_hl << 2; 314 315 bcopy((caddr_t) inp, mtod(m0, caddr_t), 316 sizeof(struct ip)); 317 318 m0->m_next = m; 319 m = m0; 320 } else { /* we have some space left in the old one */ 321 m->m_data -= msiz; 322 m->m_len += msiz; 323 m->m_pkthdr.len += msiz; 324 bcopy(inp, mtod(m, caddr_t), 325 inp->ip_hl << 2); 326 } 327 328 /* Copy Mobility header */ 329 inp = mtod(m, struct ip *); 330 bcopy(&mob_h, (caddr_t)(inp + 1), (unsigned) msiz); 331 inp->ip_len = htons(ntohs(inp->ip_len) + msiz); 332 } else { /* AF_INET */ 333 IF_DROP(&ifp->if_snd); 334 m_freem(m); 335 error = EINVAL; 336 goto end; 337 } 338 } else if (sc->g_proto == IPPROTO_GRE) { 339 if (gre_allow == 0) { 340 IF_DROP(&ifp->if_snd); 341 m_freem(m); 342 error = EACCES; 343 goto end; 344 } 345 346 switch(dst->sa_family) { 347 case AF_INET: 348 if (m->m_len < sizeof(struct ip)) { 349 m = m_pullup(m, sizeof(struct ip)); 350 if (m == NULL) { 351 IF_DROP(&ifp->if_snd); 352 error = ENOBUFS; 353 goto end; 354 } 355 } 356 357 inp = mtod(m, struct ip *); 358 ip_tos = inp->ip_tos; 359 etype = ETHERTYPE_IP; 360 break; 361 #ifdef NETATALK 362 case AF_APPLETALK: 363 etype = ETHERTYPE_AT; 364 break; 365 #endif 366 #ifdef INET6 367 case AF_INET6: 368 etype = ETHERTYPE_IPV6; 369 break; 370 #endif 371 default: 372 IF_DROP(&ifp->if_snd); 373 m_freem(m); 374 error = EAFNOSUPPORT; 375 goto end; 376 } 377 378 M_PREPEND(m, sizeof(struct greip), M_DONTWAIT); 379 } else { 380 IF_DROP(&ifp->if_snd); 381 m_freem(m); 382 error = EINVAL; 383 goto end; 384 } 385 386 if (m == NULL) { 387 IF_DROP(&ifp->if_snd); 388 error = ENOBUFS; 389 goto end; 390 } 391 392 gh = mtod(m, struct greip *); 393 if (sc->g_proto == IPPROTO_GRE) { 394 /* We don't support any GRE flags for now */ 395 396 bzero((void *) &gh->gi_g, sizeof(struct gre_h)); 397 gh->gi_ptype = htons(etype); 398 } 399 400 gh->gi_pr = sc->g_proto; 401 if (sc->g_proto != IPPROTO_MOBILE) { 402 gh->gi_src = sc->g_src; 403 gh->gi_dst = sc->g_dst; 404 ((struct ip *) gh)->ip_hl = (sizeof(struct ip)) >> 2; 405 ((struct ip *) gh)->ip_ttl = ip_defttl; 406 ((struct ip *) gh)->ip_tos = ip_tos; 407 gh->gi_len = htons(m->m_pkthdr.len); 408 } 409 410 ifp->if_opackets++; 411 ifp->if_obytes += m->m_pkthdr.len; 412 413 #if NPF > 0 414 pf_pkt_addr_changed(m); 415 #endif 416 417 /* Send it off */ 418 error = ip_output(m, (void *)NULL, &sc->route, 0, (void *)NULL, (void *)NULL); 419 end: 420 if (error) 421 ifp->if_oerrors++; 422 return (error); 423 } 424 425 int 426 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 427 { 428 429 struct ifreq *ifr = (struct ifreq *) data; 430 struct if_laddrreq *lifr = (struct if_laddrreq *)data; 431 struct gre_softc *sc = ifp->if_softc; 432 int s; 433 struct sockaddr_in si; 434 struct sockaddr *sa = NULL; 435 int error = 0; 436 struct proc *prc = curproc; /* XXX */ 437 438 s = splnet(); 439 switch(cmd) { 440 case SIOCSIFADDR: 441 ifp->if_flags |= IFF_UP; 442 break; 443 case SIOCSIFDSTADDR: 444 break; 445 case SIOCSIFFLAGS: 446 if ((ifr->ifr_flags & IFF_LINK0) != 0) 447 sc->g_proto = IPPROTO_GRE; 448 else 449 sc->g_proto = IPPROTO_MOBILE; 450 break; 451 case SIOCSIFMTU: 452 if (ifr->ifr_mtu < 576) { 453 error = EINVAL; 454 break; 455 } 456 ifp->if_mtu = ifr->ifr_mtu; 457 break; 458 case SIOCGIFMTU: 459 ifr->ifr_mtu = sc->sc_if.if_mtu; 460 break; 461 case SIOCADDMULTI: 462 case SIOCDELMULTI: 463 if (ifr == 0) { 464 error = EAFNOSUPPORT; 465 break; 466 } 467 switch (ifr->ifr_addr.sa_family) { 468 #ifdef INET 469 case AF_INET: 470 break; 471 #endif 472 #ifdef INET6 473 case AF_INET6: 474 break; 475 #endif 476 default: 477 error = EAFNOSUPPORT; 478 break; 479 } 480 break; 481 case GRESPROTO: 482 /* Check for superuser */ 483 if ((error = suser(prc, 0)) != 0) 484 break; 485 486 sc->g_proto = ifr->ifr_flags; 487 switch (sc->g_proto) { 488 case IPPROTO_GRE: 489 ifp->if_flags |= IFF_LINK0; 490 break; 491 case IPPROTO_MOBILE: 492 ifp->if_flags &= ~IFF_LINK0; 493 break; 494 default: 495 error = EPROTONOSUPPORT; 496 break; 497 } 498 break; 499 case GREGPROTO: 500 ifr->ifr_flags = sc->g_proto; 501 break; 502 case GRESADDRS: 503 case GRESADDRD: 504 /* Check for superuser */ 505 if ((error = suser(prc, 0)) != 0) 506 break; 507 508 /* 509 * set tunnel endpoints, compute a less specific route 510 * to the remote end and mark if as up 511 */ 512 sa = &ifr->ifr_addr; 513 if (cmd == GRESADDRS ) 514 sc->g_src = (satosin(sa))->sin_addr; 515 if (cmd == GRESADDRD ) 516 sc->g_dst = (satosin(sa))->sin_addr; 517 recompute: 518 if ((sc->g_src.s_addr != INADDR_ANY) && 519 (sc->g_dst.s_addr != INADDR_ANY)) { 520 if (sc->route.ro_rt != 0) { 521 /* free old route */ 522 RTFREE(sc->route.ro_rt); 523 sc->route.ro_rt = (struct rtentry *) 0; 524 } 525 gre_compute_route(sc); 526 ifp->if_flags |= IFF_UP; 527 } 528 break; 529 case GREGADDRS: 530 bzero(&si, sizeof(si)); 531 si.sin_family = AF_INET; 532 si.sin_len = sizeof(struct sockaddr_in); 533 si.sin_addr.s_addr = sc->g_src.s_addr; 534 sa = sintosa(&si); 535 ifr->ifr_addr = *sa; 536 break; 537 case GREGADDRD: 538 bzero(&si, sizeof(si)); 539 si.sin_family = AF_INET; 540 si.sin_len = sizeof(struct sockaddr_in); 541 si.sin_addr.s_addr = sc->g_dst.s_addr; 542 sa = sintosa(&si); 543 ifr->ifr_addr = *sa; 544 break; 545 case SIOCSLIFPHYADDR: 546 if ((error = suser(prc, 0)) != 0) 547 break; 548 if (lifr->addr.ss_family != AF_INET || 549 lifr->dstaddr.ss_family != AF_INET) { 550 error = EAFNOSUPPORT; 551 break; 552 } 553 if (lifr->addr.ss_len != sizeof(si) || 554 lifr->dstaddr.ss_len != sizeof(si)) { 555 error = EINVAL; 556 break; 557 } 558 sc->g_src = (satosin((struct sockadrr *)&lifr->addr))->sin_addr; 559 sc->g_dst = 560 (satosin((struct sockadrr *)&lifr->dstaddr))->sin_addr; 561 goto recompute; 562 case SIOCDIFPHYADDR: 563 if ((error = suser(prc, 0)) != 0) 564 break; 565 sc->g_src.s_addr = INADDR_ANY; 566 sc->g_dst.s_addr = INADDR_ANY; 567 break; 568 case SIOCGLIFPHYADDR: 569 if (sc->g_src.s_addr == INADDR_ANY || 570 sc->g_dst.s_addr == INADDR_ANY) { 571 error = EADDRNOTAVAIL; 572 break; 573 } 574 bzero(&si, sizeof(si)); 575 si.sin_family = AF_INET; 576 si.sin_len = sizeof(struct sockaddr_in); 577 si.sin_addr.s_addr = sc->g_src.s_addr; 578 memcpy(&lifr->addr, &si, sizeof(si)); 579 si.sin_addr.s_addr = sc->g_dst.s_addr; 580 memcpy(&lifr->dstaddr, &si, sizeof(si)); 581 break; 582 default: 583 error = ENOTTY; 584 } 585 586 splx(s); 587 return (error); 588 } 589 590 /* 591 * computes a route to our destination that is not the one 592 * which would be taken by ip_output(), as this one will loop back to 593 * us. If the interface is p2p as a--->b, then a routing entry exists 594 * If we now send a packet to b (e.g. ping b), this will come down here 595 * gets src=a, dst=b tacked on and would from ip_output() sent back to 596 * if_gre. 597 * Goal here is to compute a route to b that is less specific than 598 * a-->b. We know that this one exists as in normal operation we have 599 * at least a default route which matches. 600 */ 601 602 static void 603 gre_compute_route(struct gre_softc *sc) 604 { 605 struct route *ro; 606 u_int32_t a, b, c; 607 608 ro = &sc->route; 609 610 bzero(ro, sizeof(struct route)); 611 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sc->g_dst; 612 ro->ro_dst.sa_family = AF_INET; 613 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 614 615 /* 616 * toggle last bit, so our interface is not found, but a less 617 * specific route. I'd rather like to specify a shorter mask, 618 * but this is not possible. Should work though. XXX 619 * there is a simpler way ... 620 */ 621 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) { 622 a = ntohl(sc->g_dst.s_addr); 623 b = a & 0x01; 624 c = a & 0xfffffffe; 625 b = b ^ 0x01; 626 a = b | c; 627 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr.s_addr = htonl(a); 628 } 629 630 rtalloc(ro); 631 if (ro->ro_rt == 0) 632 return; 633 634 /* 635 * Check whether we just created a loop. An even more paranoid 636 * check would be against all GRE interfaces, but that would 637 * not allow people to link GRE tunnels. 638 */ 639 if (ro->ro_rt->rt_ifp == &sc->sc_if) { 640 RTFREE(ro->ro_rt); 641 ro->ro_rt = (struct rtentry *) 0; 642 return; 643 } 644 645 /* 646 * now change it back - else ip_output will just drop 647 * the route and search one to this interface ... 648 */ 649 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) 650 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sc->g_dst; 651 } 652 653 /* 654 * do a checksum of a buffer - much like in_cksum, which operates on 655 * mbufs. 656 */ 657 u_int16_t 658 gre_in_cksum(u_int16_t *p, u_int len) 659 { 660 u_int32_t sum = 0; 661 int nwords = len >> 1; 662 663 while (nwords-- != 0) 664 sum += *p++; 665 666 if (len & 1) { 667 union { 668 u_short w; 669 u_char c[2]; 670 } u; 671 u.c[0] = *(u_char *) p; 672 u.c[1] = 0; 673 sum += u.w; 674 } 675 676 /* end-around-carry */ 677 sum = (sum >> 16) + (sum & 0xffff); 678 sum += (sum >> 16); 679 return (~sum); 680 } 681 #endif 682