1 /* $NetBSD: if_gre.c,v 1.57 2005/05/20 16:23:05 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Heiko W.Rupp <hwr@pilhuhn.de> 9 * 10 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de> 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the NetBSD 23 * Foundation, Inc. and its contributors. 24 * 4. Neither the name of The NetBSD Foundation nor the names of its 25 * contributors may be used to endorse or promote products derived 26 * from this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 38 * POSSIBILITY OF SUCH DAMAGE. 39 */ 40 41 /* 42 * Encapsulate L3 protocols into IP 43 * See RFC 1701 and 1702 for more details. 44 * If_gre is compatible with Cisco GRE tunnels, so you can 45 * have a NetBSD box as the other end of a tunnel interface of a Cisco 46 * router. See gre(4) for more details. 47 * Also supported: IP in IP encaps (proto 55) as of RFC 2004 48 */ 49 50 #include <sys/cdefs.h> 51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.57 2005/05/20 16:23:05 christos Exp $"); 52 53 #include "opt_inet.h" 54 #include "opt_ns.h" 55 #include "bpfilter.h" 56 57 #ifdef INET 58 #include <sys/param.h> 59 #include <sys/malloc.h> 60 #include <sys/mbuf.h> 61 #include <sys/proc.h> 62 #include <sys/protosw.h> 63 #include <sys/socket.h> 64 #include <sys/ioctl.h> 65 #include <sys/queue.h> 66 #if __NetBSD__ 67 #include <sys/systm.h> 68 #endif 69 70 #include <machine/cpu.h> 71 72 #include <net/ethertypes.h> 73 #include <net/if.h> 74 #include <net/if_types.h> 75 #include <net/netisr.h> 76 #include <net/route.h> 77 78 #ifdef INET 79 #include <netinet/in.h> 80 #include <netinet/in_systm.h> 81 #include <netinet/in_var.h> 82 #include <netinet/ip.h> 83 #include <netinet/ip_var.h> 84 #else 85 #error "Huh? if_gre without inet?" 86 #endif 87 88 #ifdef NS 89 #include <netns/ns.h> 90 #include <netns/ns_if.h> 91 #endif 92 93 #ifdef NETATALK 94 #include <netatalk/at.h> 95 #include <netatalk/at_var.h> 96 #include <netatalk/at_extern.h> 97 #endif 98 99 #if NBPFILTER > 0 100 #include <sys/time.h> 101 #include <net/bpf.h> 102 #endif 103 104 #include <net/if_gre.h> 105 106 /* 107 * It is not easy to calculate the right value for a GRE MTU. 108 * We leave this task to the admin and use the same default that 109 * other vendors use. 110 */ 111 #define GREMTU 1476 112 113 struct gre_softc_head gre_softc_list; 114 int ip_gre_ttl = GRE_TTL; 115 116 int gre_clone_create __P((struct if_clone *, int)); 117 int gre_clone_destroy __P((struct ifnet *)); 118 119 struct if_clone gre_cloner = 120 IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy); 121 122 int gre_compute_route(struct gre_softc *sc); 123 124 int 125 gre_clone_create(ifc, unit) 126 struct if_clone *ifc; 127 int unit; 128 { 129 struct gre_softc *sc; 130 131 sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK); 132 memset(sc, 0, sizeof(struct gre_softc)); 133 134 snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d", 135 ifc->ifc_name, unit); 136 sc->sc_if.if_softc = sc; 137 sc->sc_if.if_type = IFT_TUNNEL; 138 sc->sc_if.if_addrlen = 0; 139 sc->sc_if.if_hdrlen = 24; /* IP + GRE */ 140 sc->sc_if.if_dlt = DLT_NULL; 141 sc->sc_if.if_mtu = GREMTU; 142 sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST; 143 sc->sc_if.if_output = gre_output; 144 sc->sc_if.if_ioctl = gre_ioctl; 145 sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; 146 sc->g_proto = IPPROTO_GRE; 147 sc->sc_if.if_flags |= IFF_LINK0; 148 if_attach(&sc->sc_if); 149 if_alloc_sadl(&sc->sc_if); 150 #if NBPFILTER > 0 151 bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t)); 152 #endif 153 LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list); 154 return (0); 155 } 156 157 int 158 gre_clone_destroy(ifp) 159 struct ifnet *ifp; 160 { 161 struct gre_softc *sc = ifp->if_softc; 162 163 LIST_REMOVE(sc, sc_list); 164 #if NBPFILTER > 0 165 bpfdetach(ifp); 166 #endif 167 if_detach(ifp); 168 free(sc, M_DEVBUF); 169 170 return (0); 171 } 172 173 /* 174 * The output routine. Takes a packet and encapsulates it in the protocol 175 * given by sc->g_proto. See also RFC 1701 and RFC 2004 176 */ 177 int 178 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 179 struct rtentry *rt) 180 { 181 int error = 0; 182 struct gre_softc *sc = ifp->if_softc; 183 struct greip *gh; 184 struct ip *ip; 185 u_int8_t ip_tos = 0; 186 u_int16_t etype = 0; 187 struct mobile_h mob_h; 188 189 if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 || 190 sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { 191 m_freem(m); 192 error = ENETDOWN; 193 goto end; 194 } 195 196 gh = NULL; 197 ip = NULL; 198 199 #if NBPFILTER >0 200 if (ifp->if_bpf) 201 bpf_mtap_af(ifp->if_bpf, dst->sa_family, m); 202 #endif 203 204 m->m_flags &= ~(M_BCAST|M_MCAST); 205 206 if (sc->g_proto == IPPROTO_MOBILE) { 207 if (dst->sa_family == AF_INET) { 208 struct mbuf *m0; 209 int msiz; 210 211 ip = mtod(m, struct ip *); 212 213 memset(&mob_h, 0, MOB_H_SIZ_L); 214 mob_h.proto = (ip->ip_p) << 8; 215 mob_h.odst = ip->ip_dst.s_addr; 216 ip->ip_dst.s_addr = sc->g_dst.s_addr; 217 218 /* 219 * If the packet comes from our host, we only change 220 * the destination address in the IP header. 221 * Else we also need to save and change the source 222 */ 223 if (in_hosteq(ip->ip_src, sc->g_src)) { 224 msiz = MOB_H_SIZ_S; 225 } else { 226 mob_h.proto |= MOB_H_SBIT; 227 mob_h.osrc = ip->ip_src.s_addr; 228 ip->ip_src.s_addr = sc->g_src.s_addr; 229 msiz = MOB_H_SIZ_L; 230 } 231 HTONS(mob_h.proto); 232 mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz); 233 234 if ((m->m_data - msiz) < m->m_pktdat) { 235 /* need new mbuf */ 236 MGETHDR(m0, M_DONTWAIT, MT_HEADER); 237 if (m0 == NULL) { 238 IF_DROP(&ifp->if_snd); 239 m_freem(m); 240 error = ENOBUFS; 241 goto end; 242 } 243 m0->m_next = m; 244 m->m_data += sizeof(struct ip); 245 m->m_len -= sizeof(struct ip); 246 m0->m_pkthdr.len = m->m_pkthdr.len + msiz; 247 m0->m_len = msiz + sizeof(struct ip); 248 m0->m_data += max_linkhdr; 249 memcpy(mtod(m0, caddr_t), (caddr_t)ip, 250 sizeof(struct ip)); 251 m = m0; 252 } else { /* we have some space left in the old one */ 253 m->m_data -= msiz; 254 m->m_len += msiz; 255 m->m_pkthdr.len += msiz; 256 memmove(mtod(m, caddr_t), ip, 257 sizeof(struct ip)); 258 } 259 ip = mtod(m, struct ip *); 260 memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz); 261 ip->ip_len = htons(ntohs(ip->ip_len) + msiz); 262 } else { /* AF_INET */ 263 IF_DROP(&ifp->if_snd); 264 m_freem(m); 265 error = EINVAL; 266 goto end; 267 } 268 } else if (sc->g_proto == IPPROTO_GRE) { 269 #ifdef GRE_DEBUG 270 printf( "start gre_output/GRE, dst->sa_family=%d\n", 271 dst->sa_family ); 272 #endif 273 switch (dst->sa_family) { 274 case AF_INET: 275 ip = mtod(m, struct ip *); 276 ip_tos = ip->ip_tos; 277 etype = ETHERTYPE_IP; 278 break; 279 #ifdef NETATALK 280 case AF_APPLETALK: 281 etype = ETHERTYPE_ATALK; 282 break; 283 #endif 284 #ifdef NS 285 case AF_NS: 286 etype = ETHERTYPE_NS; 287 break; 288 #endif 289 #ifdef INET6 290 case AF_INET6: 291 etype = ETHERTYPE_IPV6; 292 break; 293 #endif 294 default: 295 IF_DROP(&ifp->if_snd); 296 m_freem(m); 297 error = EAFNOSUPPORT; 298 goto end; 299 } 300 M_PREPEND(m, sizeof(struct greip), M_DONTWAIT); 301 } else { 302 IF_DROP(&ifp->if_snd); 303 m_freem(m); 304 error = EINVAL; 305 goto end; 306 } 307 308 if (m == NULL) { /* impossible */ 309 IF_DROP(&ifp->if_snd); 310 error = ENOBUFS; 311 goto end; 312 } 313 314 gh = mtod(m, struct greip *); 315 if (sc->g_proto == IPPROTO_GRE) { 316 /* we don't have any GRE flags for now */ 317 318 memset((void *)&gh->gi_g, 0, sizeof(struct gre_h)); 319 gh->gi_ptype = htons(etype); 320 } 321 322 gh->gi_pr = sc->g_proto; 323 if (sc->g_proto != IPPROTO_MOBILE) { 324 gh->gi_src = sc->g_src; 325 gh->gi_dst = sc->g_dst; 326 ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2; 327 ((struct ip*)gh)->ip_ttl = ip_gre_ttl; 328 ((struct ip*)gh)->ip_tos = ip_tos; 329 gh->gi_len = htons(m->m_pkthdr.len); 330 } 331 332 ifp->if_opackets++; 333 ifp->if_obytes += m->m_pkthdr.len; 334 /* send it off */ 335 error = ip_output(m, NULL, &sc->route, 0, 336 (struct ip_moptions *)NULL, (struct socket *)NULL); 337 end: 338 if (error) 339 ifp->if_oerrors++; 340 return (error); 341 } 342 343 int 344 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 345 { 346 struct proc *p = curproc; /* XXX */ 347 struct ifreq *ifr = (struct ifreq *)data; 348 struct if_laddrreq *lifr = (struct if_laddrreq *)data; 349 struct gre_softc *sc = ifp->if_softc; 350 int s; 351 struct sockaddr_in si; 352 struct sockaddr *sa = NULL; 353 int error; 354 355 error = 0; 356 357 s = splnet(); 358 switch (cmd) { 359 case SIOCSIFADDR: 360 ifp->if_flags |= IFF_UP; 361 break; 362 case SIOCSIFDSTADDR: 363 break; 364 case SIOCSIFFLAGS: 365 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 366 break; 367 if ((ifr->ifr_flags & IFF_LINK0) != 0) 368 sc->g_proto = IPPROTO_GRE; 369 else 370 sc->g_proto = IPPROTO_MOBILE; 371 break; 372 case SIOCSIFMTU: 373 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 374 break; 375 if (ifr->ifr_mtu < 576) { 376 error = EINVAL; 377 break; 378 } 379 ifp->if_mtu = ifr->ifr_mtu; 380 break; 381 case SIOCGIFMTU: 382 ifr->ifr_mtu = sc->sc_if.if_mtu; 383 break; 384 case SIOCADDMULTI: 385 case SIOCDELMULTI: 386 if (ifr == 0) { 387 error = EAFNOSUPPORT; 388 break; 389 } 390 switch (ifr->ifr_addr.sa_family) { 391 #ifdef INET 392 case AF_INET: 393 break; 394 #endif 395 #ifdef INET6 396 case AF_INET6: 397 break; 398 #endif 399 default: 400 error = EAFNOSUPPORT; 401 break; 402 } 403 break; 404 case GRESPROTO: 405 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 406 break; 407 sc->g_proto = ifr->ifr_flags; 408 switch (sc->g_proto) { 409 case IPPROTO_GRE: 410 ifp->if_flags |= IFF_LINK0; 411 break; 412 case IPPROTO_MOBILE: 413 ifp->if_flags &= ~IFF_LINK0; 414 break; 415 default: 416 error = EPROTONOSUPPORT; 417 break; 418 } 419 break; 420 case GREGPROTO: 421 ifr->ifr_flags = sc->g_proto; 422 break; 423 case GRESADDRS: 424 case GRESADDRD: 425 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 426 break; 427 /* 428 * set tunnel endpoints, compute a less specific route 429 * to the remote end and mark if as up 430 */ 431 sa = &ifr->ifr_addr; 432 if (cmd == GRESADDRS) 433 sc->g_src = (satosin(sa))->sin_addr; 434 if (cmd == GRESADDRD) 435 sc->g_dst = (satosin(sa))->sin_addr; 436 recompute: 437 if ((sc->g_src.s_addr != INADDR_ANY) && 438 (sc->g_dst.s_addr != INADDR_ANY)) { 439 if (sc->route.ro_rt != 0) /* free old route */ 440 RTFREE(sc->route.ro_rt); 441 if (gre_compute_route(sc) == 0) 442 ifp->if_flags |= IFF_RUNNING; 443 else 444 ifp->if_flags &= ~IFF_RUNNING; 445 } 446 break; 447 case GREGADDRS: 448 memset(&si, 0, sizeof(si)); 449 si.sin_family = AF_INET; 450 si.sin_len = sizeof(struct sockaddr_in); 451 si.sin_addr.s_addr = sc->g_src.s_addr; 452 sa = sintosa(&si); 453 ifr->ifr_addr = *sa; 454 break; 455 case GREGADDRD: 456 memset(&si, 0, sizeof(si)); 457 si.sin_family = AF_INET; 458 si.sin_len = sizeof(struct sockaddr_in); 459 si.sin_addr.s_addr = sc->g_dst.s_addr; 460 sa = sintosa(&si); 461 ifr->ifr_addr = *sa; 462 break; 463 case SIOCSLIFPHYADDR: 464 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 465 break; 466 if (lifr->addr.ss_family != AF_INET || 467 lifr->dstaddr.ss_family != AF_INET) { 468 error = EAFNOSUPPORT; 469 break; 470 } 471 if (lifr->addr.ss_len != sizeof(si) || 472 lifr->dstaddr.ss_len != sizeof(si)) { 473 error = EINVAL; 474 break; 475 } 476 sc->g_src = (satosin((struct sockadrr *)&lifr->addr))->sin_addr; 477 sc->g_dst = 478 (satosin((struct sockadrr *)&lifr->dstaddr))->sin_addr; 479 goto recompute; 480 case SIOCDIFPHYADDR: 481 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 482 break; 483 sc->g_src.s_addr = INADDR_ANY; 484 sc->g_dst.s_addr = INADDR_ANY; 485 break; 486 case SIOCGLIFPHYADDR: 487 if (sc->g_src.s_addr == INADDR_ANY || 488 sc->g_dst.s_addr == INADDR_ANY) { 489 error = EADDRNOTAVAIL; 490 break; 491 } 492 memset(&si, 0, sizeof(si)); 493 si.sin_family = AF_INET; 494 si.sin_len = sizeof(struct sockaddr_in); 495 si.sin_addr.s_addr = sc->g_src.s_addr; 496 memcpy(&lifr->addr, &si, sizeof(si)); 497 si.sin_addr.s_addr = sc->g_dst.s_addr; 498 memcpy(&lifr->dstaddr, &si, sizeof(si)); 499 break; 500 default: 501 error = EINVAL; 502 break; 503 } 504 505 splx(s); 506 return (error); 507 } 508 509 /* 510 * computes a route to our destination that is not the one 511 * which would be taken by ip_output(), as this one will loop back to 512 * us. If the interface is p2p as a--->b, then a routing entry exists 513 * If we now send a packet to b (e.g. ping b), this will come down here 514 * gets src=a, dst=b tacked on and would from ip_output() sent back to 515 * if_gre. 516 * Goal here is to compute a route to b that is less specific than 517 * a-->b. We know that this one exists as in normal operation we have 518 * at least a default route which matches. 519 */ 520 int 521 gre_compute_route(struct gre_softc *sc) 522 { 523 struct route *ro; 524 u_int32_t a, b, c; 525 526 ro = &sc->route; 527 528 memset(ro, 0, sizeof(struct route)); 529 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; 530 ro->ro_dst.sa_family = AF_INET; 531 ro->ro_dst.sa_len = sizeof(ro->ro_dst); 532 533 /* 534 * toggle last bit, so our interface is not found, but a less 535 * specific route. I'd rather like to specify a shorter mask, 536 * but this is not possible. Should work though. XXX 537 * there is a simpler way ... 538 */ 539 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) { 540 a = ntohl(sc->g_dst.s_addr); 541 b = a & 0x01; 542 c = a & 0xfffffffe; 543 b = b ^ 0x01; 544 a = b | c; 545 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr 546 = htonl(a); 547 } 548 549 #ifdef DIAGNOSTIC 550 printf("%s: searching for a route to %s", sc->sc_if.if_xname, 551 inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr)); 552 #endif 553 554 rtalloc(ro); 555 556 /* 557 * check if this returned a route at all and this route is no 558 * recursion to ourself 559 */ 560 if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) { 561 #ifdef DIAGNOSTIC 562 if (ro->ro_rt == NULL) 563 printf(" - no route found!\n"); 564 else 565 printf(" - route loops back to ourself!\n"); 566 #endif 567 return EADDRNOTAVAIL; 568 } 569 570 /* 571 * now change it back - else ip_output will just drop 572 * the route and search one to this interface ... 573 */ 574 if ((sc->sc_if.if_flags & IFF_LINK1) == 0) 575 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; 576 577 #ifdef DIAGNOSTIC 578 printf(", choosing %s with gateway %s", ro->ro_rt->rt_ifp->if_xname, 579 inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr)); 580 printf("\n"); 581 #endif 582 583 return 0; 584 } 585 586 /* 587 * do a checksum of a buffer - much like in_cksum, which operates on 588 * mbufs. 589 */ 590 u_int16_t 591 gre_in_cksum(u_int16_t *p, u_int len) 592 { 593 u_int32_t sum = 0; 594 int nwords = len >> 1; 595 596 while (nwords-- != 0) 597 sum += *p++; 598 599 if (len & 1) { 600 union { 601 u_short w; 602 u_char c[2]; 603 } u; 604 u.c[0] = *(u_char *)p; 605 u.c[1] = 0; 606 sum += u.w; 607 } 608 609 /* end-around-carry */ 610 sum = (sum >> 16) + (sum & 0xffff); 611 sum += (sum >> 16); 612 return (~sum); 613 } 614 #endif 615 616 void greattach __P((int)); 617 618 /* ARGSUSED */ 619 void 620 greattach(count) 621 int count; 622 { 623 #ifdef INET 624 LIST_INIT(&gre_softc_list); 625 if_clone_attach(&gre_cloner); 626 #endif 627 } 628