xref: /netbsd-src/sys/net/if_gre.c (revision e55cffd8e520e9b03f18a1bd98bb04223e79f69f)
1 /*	$NetBSD: if_gre.c,v 1.18 2001/04/12 17:53:48 thorpej Exp $ */
2 
3 /*
4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Heiko W.Rupp <hwr@pilhuhn.de>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Encapsulate L3 protocols into IP
41  * See RFC 1701 and 1702 for more details.
42  * If_gre is compatible with Cisco GRE tunnels, so you can
43  * have a NetBSD box as the other end of a tunnel interface of a Cisco
44  * router. See gre(4) for more details.
45  * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
46  */
47 
48 #include "gre.h"
49 #if NGRE > 0
50 
51 #include "opt_inet.h"
52 #include "opt_ns.h"
53 #include "bpfilter.h"
54 
55 #include <sys/param.h>
56 #include <sys/malloc.h>
57 #include <sys/mbuf.h>
58 #include <sys/proc.h>
59 #include <sys/protosw.h>
60 #include <sys/socket.h>
61 #include <sys/ioctl.h>
62 #include <sys/queue.h>
63 #if __NetBSD__
64 #include <sys/systm.h>
65 #endif
66 
67 #include <machine/cpu.h>
68 
69 #include <net/ethertypes.h>
70 #include <net/if.h>
71 #include <net/if_types.h>
72 #include <net/netisr.h>
73 #include <net/route.h>
74 
75 #ifdef INET
76 #include <netinet/in.h>
77 #include <netinet/in_systm.h>
78 #include <netinet/in_var.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip_var.h>
81 #else
82 #error "Huh? if_gre without inet?"
83 #endif
84 
85 #ifdef NS
86 #include <netns/ns.h>
87 #include <netns/ns_if.h>
88 #endif
89 
90 #ifdef NETATALK
91 #include <netatalk/at.h>
92 #include <netatalk/at_var.h>
93 #include <netatalk/at_extern.h>
94 #endif
95 
96 #if NBPFILTER > 0
97 #include <sys/time.h>
98 #include <net/bpf.h>
99 #endif
100 
101 #include <net/if_gre.h>
102 
103 #define GREMTU 1450	/* XXX this is below the standard MTU of
104                          1500 Bytes, allowing for headers,
105                          but we should possibly do path mtu discovery
106                          before changing if state to up to find the
107                          correct value */
108 #define LINK_MASK (IFF_LINK0|IFF_LINK1|IFF_LINK2)
109 
110 struct gre_softc_head gre_softc_list;
111 
112 int	gre_clone_create __P((struct if_clone *, int));
113 void	gre_clone_destroy __P((struct ifnet *));
114 
115 struct if_clone gre_cloner =
116     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
117 
118 void gre_compute_route(struct gre_softc *sc);
119 #ifdef DIAGNOSTIC
120 void gre_inet_ntoa(struct in_addr in);
121 #endif
122 
123 void	greattach __P((int));
124 
125 /* ARGSUSED */
126 void
127 greattach(count)
128 	int count;
129 {
130 
131 	LIST_INIT(&gre_softc_list);
132 	if_clone_attach(&gre_cloner);
133 }
134 
135 int
136 gre_clone_create(ifc, unit)
137 	struct if_clone *ifc;
138 	int unit;
139 {
140 	struct gre_softc *sc;
141 
142 	sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
143 	memset(sc, 0, sizeof(struct gre_softc));
144 
145 	sprintf(sc->sc_if.if_xname, "%s%d", ifc->ifc_name, unit);
146 	sc->sc_if.if_softc = sc;
147 	sc->sc_if.if_type =  IFT_OTHER;
148 	sc->sc_if.if_addrlen = 4;
149 	sc->sc_if.if_hdrlen = 24; /* IP + GRE */
150 	sc->sc_if.if_dlt = DLT_NULL;
151 	sc->sc_if.if_mtu = GREMTU;
152 	sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
153 	sc->sc_if.if_output = gre_output;
154 	sc->sc_if.if_ioctl = gre_ioctl;
155 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
156 	sc->g_proto = IPPROTO_GRE;
157 	if_attach(&sc->sc_if);
158 	if_alloc_sadl(&sc->sc_if);
159 #if NBPFILTER > 0
160 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
161 #endif
162 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
163 	return (0);
164 }
165 
166 void
167 gre_clone_destroy(ifp)
168 	struct ifnet *ifp;
169 {
170 	struct gre_softc *sc = ifp->if_softc;
171 
172 	LIST_REMOVE(sc, sc_list);
173 #if NBPFILTER > 0
174 	bpfdetach(ifp);
175 #endif
176 	if_detach(ifp);
177 	free(sc, M_DEVBUF);
178 }
179 
180 /*
181  * The output routine. Takes a packet and encapsulates it in the protocol
182  * given by sc->g_proto. See also RFC 1701 and RFC 2004
183  */
184 
185 #if 0
186 struct ip ip_h;
187 #endif
188 struct mobile_h mob_h;
189 
190 int
191 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
192 	   struct rtentry *rt)
193 {
194 	int error = 0;
195 	struct gre_softc *sc = ifp->if_softc;
196 	struct greip *gh;
197 	struct ip *inp;
198 	u_char ttl, osrc;
199 	u_short etype = 0;
200 
201 
202 	gh = NULL;
203 	inp = NULL;
204 	osrc = 0;
205 
206 #if NBPFILTER >0
207 	if (ifp->if_bpf) {
208 		/* see comment of other if_foo.c files */
209 		struct mbuf m0;
210 		u_int32_t af = dst->sa_family;
211 
212 		m0.m_next = m;
213 		m0.m_len = 4;
214 		m0.m_data = (char *)&af;
215 
216 		bpf_mtap(ifp->if_bpf, &m0);
217 	}
218 #endif
219 
220 	ttl = 255;
221 
222 	if (sc->g_proto == IPPROTO_MOBILE) {
223 		if (dst->sa_family == AF_INET) {
224 			struct mbuf *m0;
225 			int msiz;
226 
227 			inp = mtod(m, struct ip *);
228 
229 			memset(&mob_h, 0, MOB_H_SIZ_L);
230 			mob_h.proto = (inp->ip_p) << 8;
231 			mob_h.odst = inp->ip_dst.s_addr;
232 			inp->ip_dst.s_addr = sc->g_dst.s_addr;
233 
234 			/*
235 			 * If the packet comes from our host, we only change
236 			 * the destination address in the IP header.
237 			 * Else we also need to save and change the source
238 			 */
239 			if (in_hosteq(inp->ip_src, sc->g_src)) {
240 				msiz = MOB_H_SIZ_S;
241 			} else {
242 				mob_h.proto |= MOB_H_SBIT;
243 				mob_h.osrc = inp->ip_src.s_addr;
244 				inp->ip_src.s_addr = sc->g_src.s_addr;
245 				msiz = MOB_H_SIZ_L;
246 			}
247 			HTONS(mob_h.proto);
248 			mob_h.hcrc = gre_in_cksum((u_short *)&mob_h, msiz);
249 
250 			if ((m->m_data - msiz) < m->m_pktdat) {
251 				/* need new mbuf */
252 				MGETHDR(m0, M_DONTWAIT, MT_HEADER);
253 				if (m0 == NULL) {
254 					IF_DROP(&ifp->if_snd);
255 					m_freem(m);
256 					return (ENOBUFS);
257 				}
258 				m0->m_next = m;
259 				m->m_data += sizeof(struct ip);
260 				m->m_len -= sizeof(struct ip);
261 				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
262 				m0->m_len = msiz + sizeof(struct ip);
263 				m0->m_data += max_linkhdr;
264 				memcpy(mtod(m0, caddr_t), (caddr_t)inp,
265 				       sizeof(struct ip));
266 				m = m0;
267 			} else {  /* we have some spave left in the old one */
268 				m->m_data -= msiz;
269 				m->m_len += msiz;
270 				m->m_pkthdr.len += msiz;
271 				memmove(mtod(m, caddr_t), inp,
272 					sizeof(struct ip));
273 			}
274 			inp=mtod(m, struct ip *);
275 			memcpy((caddr_t)(inp + 1), &mob_h, (unsigned)msiz);
276 			NTOHS(inp->ip_len);
277 			inp->ip_len += msiz;
278 		} else {  /* AF_INET */
279 			IF_DROP(&ifp->if_snd);
280 			m_freem(m);
281 			return (EINVAL);
282 		}
283 	} else if (sc->g_proto == IPPROTO_GRE) {
284 		switch(dst->sa_family) {
285 		case AF_INET:
286 			inp = mtod(m, struct ip *);
287 			ttl = inp->ip_ttl;
288 			etype = ETHERTYPE_IP;
289 			break;
290 #ifdef NETATALK
291 		case AF_APPLETALK:
292 			etype = ETHERTYPE_ATALK;
293 			break;
294 #endif
295 #ifdef NS
296 		case AF_NS:
297 			etype = ETHERTYPE_NS;
298 			break;
299 #endif
300 		default:
301 			IF_DROP(&ifp->if_snd);
302 			m_freem(m);
303 			return (EAFNOSUPPORT);
304 		}
305 		M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
306 	} else {
307 		error = EINVAL;
308 		IF_DROP(&ifp->if_snd);
309 		m_freem(m);
310 		return (error);
311 	}
312 
313 
314 	if (m == NULL) {
315 		IF_DROP(&ifp->if_snd);
316 		return (ENOBUFS);
317 	}
318 
319 	gh = mtod(m, struct greip *);
320 	if (sc->g_proto == IPPROTO_GRE) {
321 		/* we don't have any GRE flags for now */
322 
323 		memset((void *)&gh->gi_g, 0, sizeof(struct gre_h));
324 		gh->gi_ptype = htons(etype);
325 	}
326 
327 	gh->gi_pr = sc->g_proto;
328 	if (sc->g_proto != IPPROTO_MOBILE) {
329 		gh->gi_src = sc->g_src;
330 		gh->gi_dst = sc->g_dst;
331 		((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2;
332 		((struct ip*)gh)->ip_ttl = ttl;
333 		((struct ip*)gh)->ip_tos = inp->ip_tos;
334 	    gh->gi_len = m->m_pkthdr.len;
335 	}
336 
337 	ifp->if_opackets++;
338 	ifp->if_obytes += m->m_pkthdr.len;
339 	/* send it off */
340 	error = ip_output(m, NULL, &sc->route, 0, NULL);
341 	if (error)
342 		ifp->if_oerrors++;
343 	return (error);
344 
345 }
346 
347 int
348 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
349 {
350 	struct proc *p = curproc;	/* XXX */
351 	struct ifaddr *ifa = (struct ifaddr *)data;
352 	struct ifreq *ifr = (struct ifreq *)data;
353 	struct in_ifaddr *ia = (struct in_ifaddr *)data;
354 	struct gre_softc *sc = ifp->if_softc;
355 	int s;
356 	struct sockaddr_in si;
357 	struct sockaddr *sa = NULL;
358 	int error;
359 
360 	error = 0;
361 
362 	s = splnet();
363 	switch(cmd) {
364 	case SIOCSIFADDR:
365 	case SIOCSIFDSTADDR:
366 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
367 			break;
368 		/*
369                  * set tunnel endpoints in case that we "only"
370                  * have ip over ip encapsulation. This allows to
371                  * set tunnel endpoints with ifconfig.
372                  */
373 		if (ifa->ifa_addr->sa_family == AF_INET) {
374 			sa = ifa->ifa_addr;
375 			sc->g_src = (satosin(sa))->sin_addr;
376 			sc->g_dst = ia->ia_dstaddr.sin_addr;
377 			if ((sc->g_src.s_addr != INADDR_ANY) &&
378 			    (sc->g_dst.s_addr != INADDR_ANY)) {
379 				if (sc->route.ro_rt != 0) /* free old route */
380 					RTFREE(sc->route.ro_rt);
381 				gre_compute_route(sc);
382 				ifp->if_flags |= IFF_UP;
383 			}
384 		}
385 		break;
386 	case SIOCSIFFLAGS:
387 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
388 			break;
389 		if ((sc->g_dst.s_addr == INADDR_ANY) ||
390 		    (sc->g_src.s_addr == INADDR_ANY))
391 			ifp->if_flags &= ~IFF_UP;
392 
393 		switch(ifr->ifr_flags & LINK_MASK) {
394 			case IFF_LINK0:
395 				sc->g_proto = IPPROTO_GRE;
396 				ifp->if_flags |= IFF_LINK0;
397 				ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
398 				break;
399 			case IFF_LINK2:
400 				sc->g_proto = IPPROTO_MOBILE;
401 				ifp->if_flags |= IFF_LINK2;
402 				ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1);
403 				break;
404 		}
405 		break;
406 	case SIOCSIFMTU:
407 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
408 			break;
409 		if (ifr->ifr_mtu > GREMTU || ifr->ifr_mtu < 576) {
410 			error = EINVAL;
411 			break;
412 		}
413 		ifp->if_mtu = ifr->ifr_mtu;
414 		break;
415 	case SIOCGIFMTU:
416 		ifr->ifr_mtu = sc->sc_if.if_mtu;
417 		break;
418 	case SIOCADDMULTI:
419 	case SIOCDELMULTI:
420 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
421 			break;
422 		if (ifr == 0) {
423 			error = EAFNOSUPPORT;
424 			break;
425 		}
426 		switch (ifr->ifr_addr.sa_family) {
427 #ifdef INET
428 		case AF_INET:
429 			break;
430 #endif
431 		default:
432 			error = EAFNOSUPPORT;
433 			break;
434 		}
435 		break;
436 	case GRESPROTO:
437 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
438 			break;
439 		sc->g_proto = ifr->ifr_flags;
440 		switch (sc->g_proto) {
441 		case IPPROTO_GRE :
442 			ifp->if_flags |= IFF_LINK0;
443 			ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
444 			break;
445 		case IPPROTO_MOBILE :
446 			ifp->if_flags |= IFF_LINK2;
447 			ifp->if_flags &= ~(IFF_LINK1|IFF_LINK2);
448 			break;
449 		default:
450 			ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
451 		}
452 		break;
453 	case GREGPROTO:
454 		ifr->ifr_flags = sc->g_proto;
455 		break;
456 	case GRESADDRS:
457 	case GRESADDRD:
458 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
459 			break;
460 		/*
461 	         * set tunnel endpoints, compute a less specific route
462 	         * to the remote end and mark if as up
463                  */
464 		sa = &ifr->ifr_addr;
465 		if (cmd == GRESADDRS )
466 			sc->g_src = (satosin(sa))->sin_addr;
467 		if (cmd == GRESADDRD )
468 			sc->g_dst = (satosin(sa))->sin_addr;
469 		if ((sc->g_src.s_addr != INADDR_ANY) &&
470 		    (sc->g_dst.s_addr != INADDR_ANY)) {
471 			if (sc->route.ro_rt != 0) /* free old route */
472 				RTFREE(sc->route.ro_rt);
473 			gre_compute_route(sc);
474 			ifp->if_flags |= IFF_UP;
475 		}
476 		break;
477 	case GREGADDRS:
478 		si.sin_addr.s_addr = sc->g_src.s_addr;
479 		sa = sintosa(&si);
480 		ifr->ifr_addr = *sa;
481 		break;
482 	case GREGADDRD:
483 		si.sin_addr.s_addr = sc->g_dst.s_addr;
484 		sa = sintosa(&si);
485 		ifr->ifr_addr = *sa;
486 		break;
487 	default:
488 		error = EINVAL;
489 	}
490 
491 	splx(s);
492 	return (error);
493 }
494 
495 /*
496  * computes a route to our destination that is not the one
497  * which would be taken by ip_output(), as this one will loop back to
498  * us. If the interface is p2p as  a--->b, then a routing entry exists
499  * If we now send a packet to b (e.g. ping b), this will come down here
500  * gets src=a, dst=b tacked on and would from ip_ouput() sent back to
501  * if_gre.
502  * Goal here is to compute a route to b that is less specific than
503  * a-->b. We know that this one exists as in normal operation we have
504  * at least a default route which matches.
505  */
506 
507 void
508 gre_compute_route(struct gre_softc *sc)
509 {
510 	struct route *ro;
511 	u_int32_t a, b, c;
512 
513 	ro = &sc->route;
514 
515 	memset(ro, 0, sizeof(struct route));
516 	((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
517 	ro->ro_dst.sa_family = AF_INET;
518 	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
519 
520 	/*
521 	 * toggle last bit, so our interface is not found, but a less
522          * specific route. I'd rather like to specify a shorter mask,
523  	 * but this is not possible. Should work though. XXX
524 	 * there is a simpler way ...
525          */
526 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
527 		a = ntohl(sc->g_dst.s_addr);
528 		b = a & 0x01;
529 		c = a & 0xfffffffe;
530 		b = b ^ 0x01;
531 		a = b | c;
532 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
533 			= htonl(a);
534 	}
535 
536 #ifdef DIAGNOSTIC
537 	printf("%s: searching a route to ", sc->sc_if.if_xname);
538 	gre_inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr);
539 #endif
540 
541 	rtalloc(ro);
542 
543 	/*
544 	 * now change it back - else ip_output will just drop
545          * the route and search one to this interface ...
546          */
547 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
548 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
549 
550 #ifdef DIAGNOSTIC
551 	printf(", choosing %s with gateway ",ro->ro_rt->rt_ifp->if_xname);
552 	gre_inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr);
553 	printf("\n");
554 #endif
555 }
556 
557 /*
558  * do a checksum of a buffer - much like in_cksum, which operates on
559  * mbufs.
560  */
561 
562 u_short
563 gre_in_cksum(u_short *p, u_int len)
564 {
565 	u_int sum = 0;
566 	int nwords = len >> 1;
567 
568 	while (nwords-- != 0)
569 		sum += *p++;
570 
571 		if (len & 1) {
572 			union {
573 				u_short w;
574 				u_char c[2];
575 			} u;
576 			u.c[0] = *(u_char *)p;
577 			u.c[1] = 0;
578 			sum += u.w;
579 		}
580 
581 		/* end-around-carry */
582 		sum = (sum >> 16) + (sum & 0xffff);
583 		sum += (sum >> 16);
584 		return (~sum);
585 }
586 
587 
588 /* while testing ... */
589 #ifdef DIAGNOSTIC
590 void
591 gre_inet_ntoa(struct in_addr in)
592 {
593 	char *p;
594 
595 	p = (char *)&in;
596 #define UC(b)   (((int)b)&0xff)
597 	printf("%d.%d.%d.%d", UC(p[0]), UC(p[1]), UC(p[2]), UC(p[3]));
598 }
599 
600 #endif
601 #endif
602 
603