xref: /openbsd-src/sys/net/if_gre.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*      $OpenBSD: if_gre.c,v 1.17 2001/06/27 03:49:53 angelos Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *        This product includes software developed by the NetBSD
22  *        Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
42  * See gre(4) for more details.
43  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
44  */
45 
46 #include "gre.h"
47 #if NGRE > 0
48 
49 #include "bpfilter.h"
50 
51 #include <sys/param.h>
52 #include <sys/proc.h>
53 #include <sys/mbuf.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/kernel.h>
57 #include <sys/systm.h>
58 
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/netisr.h>
62 #include <net/route.h>
63 
64 #ifdef INET
65 #include <netinet/in.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/in_var.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/if_ether.h>
71 #else
72 #error "if_gre used without inet"
73 #endif
74 
75 #ifdef NS
76 #include <netns/ns.h>
77 #include <netns/ns_if.h>
78 #endif
79 
80 #ifdef NETATALK
81 #include <netatalk/at.h>
82 #include <netatalk/at_var.h>
83 #include <netatalk/at_extern.h>
84 #endif
85 
86 #if NBPFILTER > 0
87 #include <net/bpf.h>
88 #endif
89 
90 #include <net/if_gre.h>
91 
92 #ifndef GRE_RECURSION_LIMIT
93 #define GRE_RECURSION_LIMIT	3   /* How many levels of recursion allowed */
94 #endif /* GRE_RECURSION_LIMIT */
95 
96 #define GREMTU 1450	/* XXX this is below the standard MTU of
97                          1500 Bytes, allowing for headers,
98                          but we should possibly do path mtu discovery
99                          before changing if state to up to find the
100                          correct value */
101 
102 #define LINK_MASK (IFF_LINK0|IFF_LINK1|IFF_LINK2)
103 
104 struct gre_softc *gre = 0;
105 
106 int ngre = 0;
107 
108 /*
109  * We can control the acceptance of GRE and MobileIP packets by
110  * altering the sysctl net.inet.gre.allow and net.inet.mobileip.allow values
111  * respectively. Zero means drop them, all else is acceptance.
112  */
113 int gre_allow = 0;
114 int ip_mobile_allow = 0;
115 
116 static void gre_compute_route(struct gre_softc *sc);
117 
118 void
119 greattach(n)
120 	int n;
121 {
122 	struct gre_softc *sc;
123 	int i;
124 
125 	ngre = n;
126 	gre = sc = malloc(ngre * sizeof(struct gre_softc), M_DEVBUF, M_WAIT);
127 	bzero(sc, ngre * sizeof(struct gre_softc));
128 	for (i = 0; i < ngre ; sc++) {
129 		snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname),
130 			 "gre%d", i++);
131 		sc->sc_if.if_softc = sc;
132 		sc->sc_if.if_type =  IFT_OTHER;
133 		sc->sc_if.if_addrlen = 4;
134 		sc->sc_if.if_hdrlen = 24; /* IP + GRE */
135 		sc->sc_if.if_mtu = GREMTU;
136 		sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
137 		sc->sc_if.if_output = gre_output;
138 		sc->sc_if.if_ioctl = gre_ioctl;
139 		sc->sc_if.if_collisions = 0;
140 		sc->sc_if.if_ierrors = 0;
141 		sc->sc_if.if_oerrors = 0;
142 		sc->sc_if.if_ipackets = 0;
143 		sc->sc_if.if_opackets = 0;
144 		sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
145 		sc->g_proto = IPPROTO_GRE;
146 
147 		if_attach(&sc->sc_if);
148 
149 #if NBPFILTER > 0
150 		bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_RAW,
151 			  sizeof(u_int32_t) );
152 #endif
153 	}
154 }
155 
156 /*
157  * The output routine. Takes a packet and encapsulates it in the protocol
158  * given by sc->g_proto. See also RFC 1701 and RFC 2004.
159  */
160 
161 int
162 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
163 	   struct rtentry *rt)
164 {
165 	int error = 0;
166 	struct gre_softc *sc = (struct gre_softc *) (ifp->if_softc);
167 	struct greip *gh = NULL;
168 	struct ip *inp = NULL;
169 	u_short etype = 0;
170 	struct mobile_h mob_h;
171 	struct m_tag *mtag;
172 
173 	/* Try to limit infinite recursion through misconfiguration. */
174 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
175 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
176 		if (!bcmp((caddr_t)(mtag + 1), &ifp, sizeof(struct ifnet *))) {
177 			IF_DROP(&ifp->if_snd);
178 			m_freem(m);
179 			return (EIO);	/* Use the same as in if_gif.c */
180 		}
181 	}
182 
183 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(struct ifnet *), M_NOWAIT);
184 	if (mtag == NULL) {
185 		IF_DROP(&ifp->if_snd);
186 		m_freem(m);
187 		return (ENOBUFS);
188 	}
189 	bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
190 	m_tag_prepend(m, mtag);
191 
192 #if NBPFILTER >0
193 	if (ifp->if_bpf)
194 		bpf_mtap(ifp->if_bpf, m);
195 #endif
196 
197 	if (sc->g_proto == IPPROTO_MOBILE) {
198 		if (ip_mobile_allow == 0) {
199 			IF_DROP(&ifp->if_snd);
200 			m_freem(m);
201 			return (EACCES);
202 		}
203 
204 		if (dst->sa_family == AF_INET) {
205 			struct mbuf *m0;
206 			int msiz;
207 
208 			/*
209 			 * Make sure the complete IP header (with options)
210 			 * is in the first mbuf.
211 			 */
212 			if (m->m_len < sizeof(struct ip)) {
213 				m = m_pullup(m, sizeof(struct ip));
214 				if (m == NULL) {
215 					IF_DROP(&ifp->if_snd);
216 					return (ENOBUFS);
217 				} else
218 					inp = mtod(m, struct ip *);
219 
220 				if (m->m_len < inp->ip_hl << 2) {
221 					m = m_pullup(m,
222 					    sizeof(inp->ip_hl << 2));
223 					if (m == NULL) {
224 						IF_DROP(&ifp->if_snd);
225 						return (ENOBUFS);
226 					}
227 				}
228 			}
229 
230 			inp = mtod(m, struct ip *);
231 
232 			bzero(&mob_h, MOB_H_SIZ_L);
233 			mob_h.proto = (inp->ip_p) << 8;
234 			mob_h.odst = inp->ip_dst.s_addr;
235 			inp->ip_dst.s_addr = sc->g_dst.s_addr;
236 
237 			/*
238 			 * If the packet comes from our host, we only change
239 			 * the destination address in the IP header.
240 			 * Otherwise we need to save and change the source.
241 			 */
242 			if (inp->ip_src.s_addr == sc->g_src.s_addr) {
243 				msiz = MOB_H_SIZ_S;
244 			} else {
245 				mob_h.proto |= MOB_H_SBIT;
246 				mob_h.osrc = inp->ip_src.s_addr;
247 				inp->ip_src.s_addr = sc->g_src.s_addr;
248 				msiz = MOB_H_SIZ_L;
249 			}
250 
251 			HTONS(mob_h.proto);
252 			mob_h.hcrc = gre_in_cksum((u_short *) &mob_h, msiz);
253 
254 			/* Squeeze in the mobility header */
255 			if ((m->m_data - msiz) < m->m_pktdat) {
256 				/* Need new mbuf */
257 				MGETHDR(m0, M_DONTWAIT, MT_HEADER);
258 				if (m0 == NULL) {
259 					IF_DROP(&ifp->if_snd);
260 					m_freem(m);
261 					return (ENOBUFS);
262 				}
263 				M_MOVE_HDR(m0, m);
264 
265 				m0->m_len = msiz + (inp->ip_hl << 2);
266 				m0->m_data += max_linkhdr;
267 				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
268 				m->m_data += inp->ip_hl << 2;
269 				m->m_len -= inp->ip_hl << 2;
270 
271 				bcopy((caddr_t) inp, mtod(m0, caddr_t),
272 				    sizeof(struct ip));
273 
274 				m0->m_next = m;
275 				m = m0;
276 			} else {  /* we have some space left in the old one */
277 				m->m_data -= msiz;
278 				m->m_len += msiz;
279 				m->m_pkthdr.len += msiz;
280 				bcopy(inp, mtod(m, caddr_t),
281 				    inp->ip_hl << 2);
282 			}
283 
284 			/* Copy Mobility header */
285 			inp = mtod(m, struct ip *);
286 			bcopy(&mob_h, (caddr_t)(inp + 1), (unsigned) msiz);
287 			NTOHS(inp->ip_len);
288 			inp->ip_len += msiz;
289 		} else {  /* AF_INET */
290 			IF_DROP(&ifp->if_snd);
291 			m_freem(m);
292 			return (EINVAL);
293 		}
294 	} else if (sc->g_proto == IPPROTO_GRE) {
295 		if (gre_allow == 0) {
296 			IF_DROP(&ifp->if_snd);
297 			m_freem(m);
298 			return (EACCES);
299 		}
300 
301 		switch(dst->sa_family) {
302 		case AF_INET:
303 			if (m->m_len < sizeof(struct ip)) {
304 				m = m_pullup(m, sizeof(struct ip));
305 				if (m == NULL) {
306 					IF_DROP(&ifp->if_snd);
307 					return (ENOBUFS);
308 				}
309 			}
310 
311 			inp = mtod(m, struct ip *);
312 			etype = ETHERTYPE_IP;
313 			break;
314 #ifdef NETATALK
315 		case AF_APPLETALK:
316 			etype = ETHERTYPE_AT;
317 			break;
318 #endif
319 #ifdef NS
320 		case AF_NS:
321 			etype = ETHERTYPE_NS;
322 			break;
323 #endif
324 		default:
325 			IF_DROP(&ifp->if_snd);
326 			m_freem(m);
327 			return (EAFNOSUPPORT);
328 		}
329 
330 		M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
331 	} else {
332 		error = EINVAL;
333 		IF_DROP(&ifp->if_snd);
334 		m_freem(m);
335 		return (error);
336 	}
337 
338 	if (m == NULL) {
339 		IF_DROP(&ifp->if_snd);
340 		return (ENOBUFS);
341 	}
342 
343 	gh = mtod(m, struct greip *);
344 	if (sc->g_proto == IPPROTO_GRE) {
345 		/* We don't support any GRE flags for now */
346 
347 		bzero((void *) &gh->gi_g, sizeof(struct gre_h));
348 		gh->gi_ptype = htons(etype);
349 	}
350 
351 	gh->gi_pr = sc->g_proto;
352 	if (sc->g_proto != IPPROTO_MOBILE) {
353 		gh->gi_src = sc->g_src;
354 		gh->gi_dst = sc->g_dst;
355 		((struct ip *) gh)->ip_hl = (sizeof(struct ip)) >> 2;
356 		((struct ip *) gh)->ip_ttl = ip_defttl;
357 		((struct ip *) gh)->ip_tos = inp->ip_tos;
358 		gh->gi_len = m->m_pkthdr.len;
359 	}
360 
361 	ifp->if_opackets++;
362 	ifp->if_obytes += m->m_pkthdr.len;
363 
364 	/* Send it off */
365 	error = ip_output(m, NULL, &sc->route, 0, NULL, NULL);
366 	if (error)
367 		ifp->if_oerrors++;
368 	return (error);
369 }
370 
371 int
372 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
373 {
374 
375 	struct ifaddr *ifa = (struct ifaddr *) data;
376 	struct ifreq *ifr = (struct ifreq *) data;
377 	struct in_ifaddr *ia = (struct in_ifaddr *) data;
378 	struct gre_softc *sc = ifp->if_softc;
379 	int s;
380 	struct sockaddr_in si;
381 	struct sockaddr *sa = NULL;
382 	int error = 0;
383 	struct proc *prc = curproc;		/* XXX */
384 
385 	s = splimp();
386 	switch(cmd) {
387 	case SIOCSIFADDR:
388 	case SIOCSIFDSTADDR:
389 		/*
390 		 * set tunnel endpoints in case that we "only"
391 		 * have ip over ip encapsulation. This allows to
392 		 * set tunnel endpoints with ifconfig.
393 		 */
394 		if (ifa->ifa_addr->sa_family == AF_INET) {
395 			sa = ifa->ifa_addr;
396 			sc->g_src = (satosin(sa))->sin_addr;
397 			sc->g_dst = ia->ia_dstaddr.sin_addr;
398 			if ((sc->g_src.s_addr != INADDR_ANY) &&
399 			    (sc->g_dst.s_addr != INADDR_ANY)) {
400 				if (sc->route.ro_rt != 0) {
401 					/* free old route */
402 					RTFREE(sc->route.ro_rt);
403 					sc->route.ro_rt = (struct rtentry *) 0;
404 				}
405 
406 				gre_compute_route(sc);
407 				if (sc->route.ro_rt == 0) {
408 					sc->g_src.s_addr = INADDR_ANY;
409 					sc->g_dst.s_addr = INADDR_ANY;
410 					splx(s);
411 					return EIO; /* Is this is good ? */
412 				}
413 
414 				ifp->if_flags |= IFF_UP;
415 			}
416 		}
417 		break;
418 	case SIOCSIFFLAGS:
419 		if ((sc->g_dst.s_addr == INADDR_ANY) ||
420 		    (sc->g_src.s_addr == INADDR_ANY))
421 			ifp->if_flags &= ~IFF_UP;
422 
423 		switch(ifr->ifr_flags & LINK_MASK) {
424 			case IFF_LINK0:
425 				sc->g_proto = IPPROTO_GRE;
426 				ifp->if_flags |= IFF_LINK0;
427 				ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
428 				break;
429 			case IFF_LINK2:
430 				sc->g_proto = IPPROTO_MOBILE;
431 				ifp->if_flags |= IFF_LINK2;
432 				ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1);
433 				break;
434 		}
435 		break;
436 	case SIOCADDMULTI:
437 	case SIOCDELMULTI:
438 		if (ifr == 0) {
439 			error = EAFNOSUPPORT;
440 			break;
441 		}
442 		switch (ifr->ifr_addr.sa_family) {
443 #ifdef INET
444 		case AF_INET:
445 			break;
446 #endif
447 		default:
448 			error = EAFNOSUPPORT;
449 			break;
450 		}
451 		break;
452 	case GRESPROTO:
453 		/* Check for superuser */
454 		if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0)
455 			break;
456 
457 		sc->g_proto = ifr->ifr_flags;
458 		switch (sc->g_proto) {
459 		case IPPROTO_GRE :
460 			ifp->if_flags |= IFF_LINK0;
461 			ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
462 			break;
463 		case IPPROTO_MOBILE :
464 			ifp->if_flags |= IFF_LINK2;
465 			ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
466 			break;
467 		default:
468 			ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
469 		}
470 		break;
471 	case GREGPROTO:
472 		ifr->ifr_flags = sc->g_proto;
473 		break;
474 	case GRESADDRS:
475 	case GRESADDRD:
476 		/* Check for superuser */
477 		if ((error = suser(prc->p_ucred, &prc->p_acflag)) != 0)
478 			break;
479 
480 		/*
481 		 * set tunnel endpoints, compute a less specific route
482 		 * to the remote end and mark if as up
483 		 */
484 		sa = &ifr->ifr_addr;
485 		if (cmd == GRESADDRS )
486 			sc->g_src = (satosin(sa))->sin_addr;
487 		if (cmd == GRESADDRD )
488 			sc->g_dst = (satosin(sa))->sin_addr;
489 		if ((sc->g_src.s_addr != INADDR_ANY) &&
490 		    (sc->g_dst.s_addr != INADDR_ANY)) {
491 			if (sc->route.ro_rt != 0) {
492 				/* free old route */
493 				RTFREE(sc->route.ro_rt);
494 				sc->route.ro_rt = (struct rtentry *) 0;
495 			}
496 
497 			gre_compute_route(sc);
498 			if (sc->route.ro_rt == 0)
499 			{
500 				sc->g_src.s_addr = INADDR_ANY;
501 				sc->g_dst.s_addr = INADDR_ANY;
502 				splx(s);
503 				return EIO; /* Is this is good ? */
504 			}
505 			ifp->if_flags |= IFF_UP;
506 		}
507 		break;
508 	case GREGADDRS:
509 		si.sin_addr.s_addr = sc->g_src.s_addr;
510 		sa = sintosa(&si);
511 		ifr->ifr_addr = *sa;
512 		break;
513 	case GREGADDRD:
514 		si.sin_addr.s_addr = sc->g_dst.s_addr;
515 		sa = sintosa(&si);
516 		ifr->ifr_addr = *sa;
517 		break;
518 	default:
519 		error = EINVAL;
520 	}
521 
522 	splx(s);
523 	return (error);
524 }
525 
526 /*
527  * computes a route to our destination that is not the one
528  * which would be taken by ip_output(), as this one will loop back to
529  * us. If the interface is p2p as  a--->b, then a routing entry exists
530  * If we now send a packet to b (e.g. ping b), this will come down here
531  * gets src=a, dst=b tacked on and would from ip_ouput() sent back to
532  * if_gre.
533  * Goal here is to compute a route to b that is less specific than
534  * a-->b. We know that this one exists as in normal operation we have
535  * at least a default route which matches.
536  */
537 
538 static void
539 gre_compute_route(struct gre_softc *sc)
540 {
541 	struct route *ro;
542 	u_int32_t a, b, c;
543 
544 	ro = &sc->route;
545 
546 	bzero(ro, sizeof(struct route));
547 	((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sc->g_dst;
548 	ro->ro_dst.sa_family = AF_INET;
549 	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
550 
551 	/*
552 	 * toggle last bit, so our interface is not found, but a less
553 	 * specific route. I'd rather like to specify a shorter mask,
554  	 * but this is not possible. Should work though. XXX
555 	 * there is a simpler way ...
556 	 */
557 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
558 		a = ntohl(sc->g_dst.s_addr);
559 		b = a & 0x01;
560 		c = a & 0xfffffffe;
561 		b = b ^ 0x01;
562 		a = b | c;
563 		((struct sockaddr_in *) &ro->ro_dst)->sin_addr.s_addr = htonl(a);
564 	}
565 
566 	rtalloc(ro);
567 	if (ro->ro_rt == 0)
568 		return;
569 
570 	/*
571 	 * Check whether we just created a loop. An even more paranoid
572 	 * check would be against all GRE interfaces, but that would
573 	 * not allow people to link GRE tunnels.
574 	 */
575 	if (ro->ro_rt->rt_ifp == &sc->sc_if) {
576 		RTFREE(ro->ro_rt);
577 		ro->ro_rt = (struct rtentry *) 0;
578 		return;
579 	}
580 
581 	/*
582 	 * now change it back - else ip_output will just drop
583 	 * the route and search one to this interface ...
584 	 */
585 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
586 		((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sc->g_dst;
587 }
588 
589 /*
590  * do a checksum of a buffer - much like in_cksum, which operates on
591  * mbufs.
592  */
593 u_short
594 gre_in_cksum(u_short *p, u_int len)
595 {
596 	u_int sum = 0;
597 	int nwords = len >> 1;
598 
599 	while (nwords-- != 0)
600 		sum += *p++;
601 
602 		if (len & 1) {
603 			union {
604 				u_short w;
605 				u_char c[2];
606 			} u;
607 			u.c[0] = *(u_char *) p;
608 			u.c[1] = 0;
609 			sum += u.w;
610 		}
611 
612 		/* end-around-carry */
613 		sum = (sum >> 16) + (sum & 0xffff);
614 		sum += (sum >> 16);
615 		return (~sum);
616 }
617 #endif
618