xref: /netbsd-src/sys/net/if_gre.c (revision fd5cb0acea84d278e04e640d37ca2398f894991f)
1 /*	$NetBSD: if_gre.c,v 1.54 2004/12/06 02:59:23 christos Exp $ */
2 
3 /*
4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Heiko W.Rupp <hwr@pilhuhn.de>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Encapsulate L3 protocols into IP
41  * See RFC 1701 and 1702 for more details.
42  * If_gre is compatible with Cisco GRE tunnels, so you can
43  * have a NetBSD box as the other end of a tunnel interface of a Cisco
44  * router. See gre(4) for more details.
45  * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
46  */
47 
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.54 2004/12/06 02:59:23 christos Exp $");
50 
51 #include "opt_inet.h"
52 #include "opt_ns.h"
53 #include "bpfilter.h"
54 
55 #ifdef INET
56 #include <sys/param.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/proc.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/ioctl.h>
63 #include <sys/queue.h>
64 #if __NetBSD__
65 #include <sys/systm.h>
66 #endif
67 
68 #include <machine/cpu.h>
69 
70 #include <net/ethertypes.h>
71 #include <net/if.h>
72 #include <net/if_types.h>
73 #include <net/netisr.h>
74 #include <net/route.h>
75 
76 #ifdef INET
77 #include <netinet/in.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/in_var.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip_var.h>
82 #else
83 #error "Huh? if_gre without inet?"
84 #endif
85 
86 #ifdef NS
87 #include <netns/ns.h>
88 #include <netns/ns_if.h>
89 #endif
90 
91 #ifdef NETATALK
92 #include <netatalk/at.h>
93 #include <netatalk/at_var.h>
94 #include <netatalk/at_extern.h>
95 #endif
96 
97 #if NBPFILTER > 0
98 #include <sys/time.h>
99 #include <net/bpf.h>
100 #endif
101 
102 #include <net/if_gre.h>
103 
104 /*
105  * It is not easy to calculate the right value for a GRE MTU.
106  * We leave this task to the admin and use the same default that
107  * other vendors use.
108  */
109 #define GREMTU 1476
110 
111 struct gre_softc_head gre_softc_list;
112 int ip_gre_ttl = GRE_TTL;
113 
114 int	gre_clone_create __P((struct if_clone *, int));
115 int	gre_clone_destroy __P((struct ifnet *));
116 
117 struct if_clone gre_cloner =
118     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
119 
120 int gre_compute_route(struct gre_softc *sc);
121 
122 int
123 gre_clone_create(ifc, unit)
124 	struct if_clone *ifc;
125 	int unit;
126 {
127 	struct gre_softc *sc;
128 
129 	sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
130 	memset(sc, 0, sizeof(struct gre_softc));
131 
132 	snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
133 	    ifc->ifc_name, unit);
134 	sc->sc_if.if_softc = sc;
135 	sc->sc_if.if_type = IFT_TUNNEL;
136 	sc->sc_if.if_addrlen = 0;
137 	sc->sc_if.if_hdrlen = 24; /* IP + GRE */
138 	sc->sc_if.if_dlt = DLT_NULL;
139 	sc->sc_if.if_mtu = GREMTU;
140 	sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
141 	sc->sc_if.if_output = gre_output;
142 	sc->sc_if.if_ioctl = gre_ioctl;
143 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
144 	sc->g_proto = IPPROTO_GRE;
145 	sc->sc_if.if_flags |= IFF_LINK0;
146 	if_attach(&sc->sc_if);
147 	if_alloc_sadl(&sc->sc_if);
148 #if NBPFILTER > 0
149 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
150 #endif
151 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
152 	return (0);
153 }
154 
155 int
156 gre_clone_destroy(ifp)
157 	struct ifnet *ifp;
158 {
159 	struct gre_softc *sc = ifp->if_softc;
160 
161 	LIST_REMOVE(sc, sc_list);
162 #if NBPFILTER > 0
163 	bpfdetach(ifp);
164 #endif
165 	if_detach(ifp);
166 	free(sc, M_DEVBUF);
167 
168 	return (0);
169 }
170 
171 /*
172  * The output routine. Takes a packet and encapsulates it in the protocol
173  * given by sc->g_proto. See also RFC 1701 and RFC 2004
174  */
175 int
176 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
177 	   struct rtentry *rt)
178 {
179 	int error = 0;
180 	struct gre_softc *sc = ifp->if_softc;
181 	struct greip *gh;
182 	struct ip *ip;
183 	u_int16_t etype = 0;
184 	struct mobile_h mob_h;
185 
186 	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
187 	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
188 		m_freem(m);
189 		error = ENETDOWN;
190 		goto end;
191 	}
192 
193 	gh = NULL;
194 	ip = NULL;
195 
196 #if NBPFILTER >0
197 	if (ifp->if_bpf)
198 		bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
199 #endif
200 
201 	m->m_flags &= ~(M_BCAST|M_MCAST);
202 
203 	if (sc->g_proto == IPPROTO_MOBILE) {
204 		if (dst->sa_family == AF_INET) {
205 			struct mbuf *m0;
206 			int msiz;
207 
208 			ip = mtod(m, struct ip *);
209 
210 			memset(&mob_h, 0, MOB_H_SIZ_L);
211 			mob_h.proto = (ip->ip_p) << 8;
212 			mob_h.odst = ip->ip_dst.s_addr;
213 			ip->ip_dst.s_addr = sc->g_dst.s_addr;
214 
215 			/*
216 			 * If the packet comes from our host, we only change
217 			 * the destination address in the IP header.
218 			 * Else we also need to save and change the source
219 			 */
220 			if (in_hosteq(ip->ip_src, sc->g_src)) {
221 				msiz = MOB_H_SIZ_S;
222 			} else {
223 				mob_h.proto |= MOB_H_SBIT;
224 				mob_h.osrc = ip->ip_src.s_addr;
225 				ip->ip_src.s_addr = sc->g_src.s_addr;
226 				msiz = MOB_H_SIZ_L;
227 			}
228 			HTONS(mob_h.proto);
229 			mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
230 
231 			if ((m->m_data - msiz) < m->m_pktdat) {
232 				/* need new mbuf */
233 				MGETHDR(m0, M_DONTWAIT, MT_HEADER);
234 				if (m0 == NULL) {
235 					IF_DROP(&ifp->if_snd);
236 					m_freem(m);
237 					error = ENOBUFS;
238 					goto end;
239 				}
240 				m0->m_next = m;
241 				m->m_data += sizeof(struct ip);
242 				m->m_len -= sizeof(struct ip);
243 				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
244 				m0->m_len = msiz + sizeof(struct ip);
245 				m0->m_data += max_linkhdr;
246 				memcpy(mtod(m0, caddr_t), (caddr_t)ip,
247 				       sizeof(struct ip));
248 				m = m0;
249 			} else {  /* we have some space left in the old one */
250 				m->m_data -= msiz;
251 				m->m_len += msiz;
252 				m->m_pkthdr.len += msiz;
253 				memmove(mtod(m, caddr_t), ip,
254 					sizeof(struct ip));
255 			}
256 			ip = mtod(m, struct ip *);
257 			memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
258 			ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
259 		} else {  /* AF_INET */
260 			IF_DROP(&ifp->if_snd);
261 			m_freem(m);
262 			error = EINVAL;
263 			goto end;
264 		}
265 	} else if (sc->g_proto == IPPROTO_GRE) {
266 		switch (dst->sa_family) {
267 		case AF_INET:
268 			ip = mtod(m, struct ip *);
269 			etype = ETHERTYPE_IP;
270 			break;
271 #ifdef NETATALK
272 		case AF_APPLETALK:
273 			etype = ETHERTYPE_ATALK;
274 			break;
275 #endif
276 #ifdef NS
277 		case AF_NS:
278 			etype = ETHERTYPE_NS;
279 			break;
280 #endif
281 		default:
282 			IF_DROP(&ifp->if_snd);
283 			m_freem(m);
284 			error = EAFNOSUPPORT;
285 			goto end;
286 		}
287 		M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
288 	} else {
289 		IF_DROP(&ifp->if_snd);
290 		m_freem(m);
291 		error = EINVAL;
292 		goto end;
293 	}
294 
295 	if (m == NULL) {	/* impossible */
296 		IF_DROP(&ifp->if_snd);
297 		error = ENOBUFS;
298 		goto end;
299 	}
300 
301 	gh = mtod(m, struct greip *);
302 	if (sc->g_proto == IPPROTO_GRE) {
303 		/* we don't have any GRE flags for now */
304 
305 		memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
306 		gh->gi_ptype = htons(etype);
307 	}
308 
309 	gh->gi_pr = sc->g_proto;
310 	if (sc->g_proto != IPPROTO_MOBILE) {
311 		gh->gi_src = sc->g_src;
312 		gh->gi_dst = sc->g_dst;
313 		((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
314 		((struct ip*)gh)->ip_ttl = ip_gre_ttl;
315 		((struct ip*)gh)->ip_tos = ip->ip_tos;
316 		gh->gi_len = htons(m->m_pkthdr.len);
317 	}
318 
319 	ifp->if_opackets++;
320 	ifp->if_obytes += m->m_pkthdr.len;
321 	/* send it off */
322 	error = ip_output(m, NULL, &sc->route, 0,
323 	    (struct ip_moptions *)NULL, (struct socket *)NULL);
324   end:
325 	if (error)
326 		ifp->if_oerrors++;
327 	return (error);
328 }
329 
330 int
331 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
332 {
333 	struct proc *p = curproc;	/* XXX */
334 	struct ifreq *ifr = (struct ifreq *)data;
335 	struct if_laddrreq *lifr = (struct if_laddrreq *)data;
336 	struct gre_softc *sc = ifp->if_softc;
337 	int s;
338 	struct sockaddr_in si;
339 	struct sockaddr *sa = NULL;
340 	int error;
341 
342 	error = 0;
343 
344 	s = splnet();
345 	switch (cmd) {
346 	case SIOCSIFADDR:
347 		ifp->if_flags |= IFF_UP;
348 		break;
349 	case SIOCSIFDSTADDR:
350 		break;
351 	case SIOCSIFFLAGS:
352 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
353 			break;
354 		if ((ifr->ifr_flags & IFF_LINK0) != 0)
355 			sc->g_proto = IPPROTO_GRE;
356 		else
357 			sc->g_proto = IPPROTO_MOBILE;
358 		break;
359 	case SIOCSIFMTU:
360 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
361 			break;
362 		if (ifr->ifr_mtu < 576) {
363 			error = EINVAL;
364 			break;
365 		}
366 		ifp->if_mtu = ifr->ifr_mtu;
367 		break;
368 	case SIOCGIFMTU:
369 		ifr->ifr_mtu = sc->sc_if.if_mtu;
370 		break;
371 	case SIOCADDMULTI:
372 	case SIOCDELMULTI:
373 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
374 			break;
375 		if (ifr == 0) {
376 			error = EAFNOSUPPORT;
377 			break;
378 		}
379 		switch (ifr->ifr_addr.sa_family) {
380 #ifdef INET
381 		case AF_INET:
382 			break;
383 #endif
384 		default:
385 			error = EAFNOSUPPORT;
386 			break;
387 		}
388 		break;
389 	case GRESPROTO:
390 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
391 			break;
392 		sc->g_proto = ifr->ifr_flags;
393 		switch (sc->g_proto) {
394 		case IPPROTO_GRE:
395 			ifp->if_flags |= IFF_LINK0;
396 			break;
397 		case IPPROTO_MOBILE:
398 			ifp->if_flags &= ~IFF_LINK0;
399 			break;
400 		default:
401 			error = EPROTONOSUPPORT;
402 			break;
403 		}
404 		break;
405 	case GREGPROTO:
406 		ifr->ifr_flags = sc->g_proto;
407 		break;
408 	case GRESADDRS:
409 	case GRESADDRD:
410 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
411 			break;
412 		/*
413 		 * set tunnel endpoints, compute a less specific route
414 		 * to the remote end and mark if as up
415 		 */
416 		sa = &ifr->ifr_addr;
417 		if (cmd == GRESADDRS)
418 			sc->g_src = (satosin(sa))->sin_addr;
419 		if (cmd == GRESADDRD)
420 			sc->g_dst = (satosin(sa))->sin_addr;
421 	recompute:
422 		if ((sc->g_src.s_addr != INADDR_ANY) &&
423 		    (sc->g_dst.s_addr != INADDR_ANY)) {
424 			if (sc->route.ro_rt != 0) /* free old route */
425 				RTFREE(sc->route.ro_rt);
426 			if (gre_compute_route(sc) == 0)
427 				ifp->if_flags |= IFF_RUNNING;
428 			else
429 				ifp->if_flags &= ~IFF_RUNNING;
430 		}
431 		break;
432 	case GREGADDRS:
433 		memset(&si, 0, sizeof(si));
434 		si.sin_family = AF_INET;
435 		si.sin_len = sizeof(struct sockaddr_in);
436 		si.sin_addr.s_addr = sc->g_src.s_addr;
437 		sa = sintosa(&si);
438 		ifr->ifr_addr = *sa;
439 		break;
440 	case GREGADDRD:
441 		memset(&si, 0, sizeof(si));
442 		si.sin_family = AF_INET;
443 		si.sin_len = sizeof(struct sockaddr_in);
444 		si.sin_addr.s_addr = sc->g_dst.s_addr;
445 		sa = sintosa(&si);
446 		ifr->ifr_addr = *sa;
447 		break;
448 	case SIOCSLIFPHYADDR:
449 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
450 			break;
451 		if (lifr->addr.ss_family != AF_INET ||
452 		    lifr->dstaddr.ss_family != AF_INET) {
453 			error = EAFNOSUPPORT;
454 			break;
455 		}
456 		if (lifr->addr.ss_len != sizeof(si) ||
457 		    lifr->dstaddr.ss_len != sizeof(si)) {
458 			error = EINVAL;
459 			break;
460 		}
461 		sc->g_src = (satosin((struct sockadrr *)&lifr->addr))->sin_addr;
462 		sc->g_dst =
463 		    (satosin((struct sockadrr *)&lifr->dstaddr))->sin_addr;
464 		goto recompute;
465 	case SIOCDIFPHYADDR:
466 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
467 			break;
468 		sc->g_src.s_addr = INADDR_ANY;
469 		sc->g_dst.s_addr = INADDR_ANY;
470 		break;
471 	case SIOCGLIFPHYADDR:
472 		if (sc->g_src.s_addr == INADDR_ANY ||
473 		    sc->g_dst.s_addr == INADDR_ANY) {
474 			error = EADDRNOTAVAIL;
475 			break;
476 		}
477 		memset(&si, 0, sizeof(si));
478 		si.sin_family = AF_INET;
479 		si.sin_len = sizeof(struct sockaddr_in);
480 		si.sin_addr.s_addr = sc->g_src.s_addr;
481 		memcpy(&lifr->addr, &si, sizeof(si));
482 		si.sin_addr.s_addr = sc->g_dst.s_addr;
483 		memcpy(&lifr->dstaddr, &si, sizeof(si));
484 		break;
485 	default:
486 		error = EINVAL;
487 		break;
488 	}
489 
490 	splx(s);
491 	return (error);
492 }
493 
494 /*
495  * computes a route to our destination that is not the one
496  * which would be taken by ip_output(), as this one will loop back to
497  * us. If the interface is p2p as  a--->b, then a routing entry exists
498  * If we now send a packet to b (e.g. ping b), this will come down here
499  * gets src=a, dst=b tacked on and would from ip_output() sent back to
500  * if_gre.
501  * Goal here is to compute a route to b that is less specific than
502  * a-->b. We know that this one exists as in normal operation we have
503  * at least a default route which matches.
504  */
505 int
506 gre_compute_route(struct gre_softc *sc)
507 {
508 	struct route *ro;
509 	u_int32_t a, b, c;
510 
511 	ro = &sc->route;
512 
513 	memset(ro, 0, sizeof(struct route));
514 	((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
515 	ro->ro_dst.sa_family = AF_INET;
516 	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
517 
518 	/*
519 	 * toggle last bit, so our interface is not found, but a less
520 	 * specific route. I'd rather like to specify a shorter mask,
521 	 * but this is not possible. Should work though. XXX
522 	 * there is a simpler way ...
523 	 */
524 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
525 		a = ntohl(sc->g_dst.s_addr);
526 		b = a & 0x01;
527 		c = a & 0xfffffffe;
528 		b = b ^ 0x01;
529 		a = b | c;
530 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
531 		    = htonl(a);
532 	}
533 
534 #ifdef DIAGNOSTIC
535 	printf("%s: searching for a route to %s", sc->sc_if.if_xname,
536 	    inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
537 #endif
538 
539 	rtalloc(ro);
540 
541 	/*
542 	 * check if this returned a route at all and this route is no
543 	 * recursion to ourself
544 	 */
545 	if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
546 #ifdef DIAGNOSTIC
547 		if (ro->ro_rt == NULL)
548 			printf(" - no route found!\n");
549 		else
550 			printf(" - route loops back to ourself!\n");
551 #endif
552 		return EADDRNOTAVAIL;
553 	}
554 
555 	/*
556 	 * now change it back - else ip_output will just drop
557 	 * the route and search one to this interface ...
558 	 */
559 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
560 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
561 
562 #ifdef DIAGNOSTIC
563 	printf(", choosing %s with gateway %s", ro->ro_rt->rt_ifp->if_xname,
564 	    inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
565 	printf("\n");
566 #endif
567 
568 	return 0;
569 }
570 
571 /*
572  * do a checksum of a buffer - much like in_cksum, which operates on
573  * mbufs.
574  */
575 u_int16_t
576 gre_in_cksum(u_int16_t *p, u_int len)
577 {
578 	u_int32_t sum = 0;
579 	int nwords = len >> 1;
580 
581 	while (nwords-- != 0)
582 		sum += *p++;
583 
584 	if (len & 1) {
585 		union {
586 			u_short w;
587 			u_char c[2];
588 		} u;
589 		u.c[0] = *(u_char *)p;
590 		u.c[1] = 0;
591 		sum += u.w;
592 	}
593 
594 	/* end-around-carry */
595 	sum = (sum >> 16) + (sum & 0xffff);
596 	sum += (sum >> 16);
597 	return (~sum);
598 }
599 #endif
600 
601 void	greattach __P((int));
602 
603 /* ARGSUSED */
604 void
605 greattach(count)
606 	int count;
607 {
608 #ifdef INET
609 	LIST_INIT(&gre_softc_list);
610 	if_clone_attach(&gre_cloner);
611 #endif
612 }
613