xref: /netbsd-src/sys/net/if_gre.c (revision ce2c90c7c172d95d2402a5b3d96d8f8e6d138a21)
1 /*	$NetBSD: if_gre.c,v 1.69 2006/10/15 06:36:54 dyoung Exp $ */
2 
3 /*
4  * Copyright (c) 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Heiko W.Rupp <hwr@pilhuhn.de>
9  *
10  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *        This product includes software developed by the NetBSD
23  *        Foundation, Inc. and its contributors.
24  * 4. Neither the name of The NetBSD Foundation nor the names of its
25  *    contributors may be used to endorse or promote products derived
26  *    from this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40 
41 /*
42  * Encapsulate L3 protocols into IP
43  * See RFC 1701 and 1702 for more details.
44  * If_gre is compatible with Cisco GRE tunnels, so you can
45  * have a NetBSD box as the other end of a tunnel interface of a Cisco
46  * router. See gre(4) for more details.
47  * Also supported:  IP in IP encaps (proto 55) as of RFC 2004
48  */
49 
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.69 2006/10/15 06:36:54 dyoung Exp $");
52 
53 #include "opt_gre.h"
54 #include "opt_inet.h"
55 #include "bpfilter.h"
56 
57 #ifdef INET
58 #include <sys/param.h>
59 #include <sys/file.h>
60 #include <sys/filedesc.h>
61 #include <sys/malloc.h>
62 #include <sys/mbuf.h>
63 #include <sys/proc.h>
64 #include <sys/protosw.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/ioctl.h>
68 #include <sys/queue.h>
69 #if __NetBSD__
70 #include <sys/systm.h>
71 #include <sys/sysctl.h>
72 #include <sys/kauth.h>
73 #endif
74 
75 #include <sys/kthread.h>
76 
77 #include <machine/cpu.h>
78 
79 #include <net/ethertypes.h>
80 #include <net/if.h>
81 #include <net/if_types.h>
82 #include <net/netisr.h>
83 #include <net/route.h>
84 
85 #ifdef INET
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/in_var.h>
89 #include <netinet/ip.h>
90 #include <netinet/ip_var.h>
91 #else
92 #error "Huh? if_gre without inet?"
93 #endif
94 
95 
96 #ifdef NETATALK
97 #include <netatalk/at.h>
98 #include <netatalk/at_var.h>
99 #include <netatalk/at_extern.h>
100 #endif
101 
102 #if NBPFILTER > 0
103 #include <sys/time.h>
104 #include <net/bpf.h>
105 #endif
106 
107 #include <net/if_gre.h>
108 
109 /*
110  * It is not easy to calculate the right value for a GRE MTU.
111  * We leave this task to the admin and use the same default that
112  * other vendors use.
113  */
114 #define GREMTU 1476
115 
116 #ifdef GRE_DEBUG
117 #define	GRE_DPRINTF(__sc, __fmt, ...)				\
118 	do {							\
119 		if (((__sc)->sc_if.if_flags & IFF_DEBUG) != 0)	\
120 			printf(__fmt, __VA_ARGS__);		\
121 	} while (/*CONSTCOND*/0)
122 #else
123 #define	GRE_DPRINTF(__sc, __fmt, ...)	do { } while (/*CONSTCOND*/0)
124 #endif /* GRE_DEBUG */
125 
126 struct gre_softc_head gre_softc_list;
127 int ip_gre_ttl = GRE_TTL;
128 
129 static int	gre_clone_create(struct if_clone *, int);
130 static int	gre_clone_destroy(struct ifnet *);
131 
132 static struct if_clone gre_cloner =
133     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
134 
135 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
136 			   struct rtentry *);
137 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
138 
139 static int	gre_compute_route(struct gre_softc *sc);
140 
141 static int gre_getsockname(struct socket *, struct mbuf *, struct lwp *);
142 static int gre_getpeername(struct socket *, struct mbuf *, struct lwp *);
143 static int gre_getnames(struct socket *, struct lwp *, struct sockaddr_in *,
144     struct sockaddr_in *);
145 
146 static void
147 gre_stop(int *running)
148 {
149 	*running = 0;
150 	wakeup(running);
151 }
152 
153 static void
154 gre_join(int *running)
155 {
156 	int s;
157 
158 	s = splnet();
159 	while (*running != 0) {
160 		splx(s);
161 		tsleep(running, PSOCK, "grejoin", 0);
162 		s = splnet();
163 	}
164 	splx(s);
165 }
166 
167 static void
168 gre_wakeup(struct gre_softc *sc)
169 {
170 	GRE_DPRINTF(sc, "%s: enter\n", __func__);
171 	sc->sc_waitchan = 1;
172 	wakeup(&sc->sc_waitchan);
173 }
174 
175 static int
176 gre_clone_create(struct if_clone *ifc, int unit)
177 {
178 	struct gre_softc *sc;
179 
180 	sc = malloc(sizeof(struct gre_softc), M_DEVBUF, M_WAITOK);
181 	memset(sc, 0, sizeof(struct gre_softc));
182 
183 	snprintf(sc->sc_if.if_xname, sizeof(sc->sc_if.if_xname), "%s%d",
184 	    ifc->ifc_name, unit);
185 	sc->sc_if.if_softc = sc;
186 	sc->sc_if.if_type = IFT_TUNNEL;
187 	sc->sc_if.if_addrlen = 0;
188 	sc->sc_if.if_hdrlen = 24; /* IP + GRE */
189 	sc->sc_if.if_dlt = DLT_NULL;
190 	sc->sc_if.if_mtu = GREMTU;
191 	sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
192 	sc->sc_if.if_output = gre_output;
193 	sc->sc_if.if_ioctl = gre_ioctl;
194 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
195 	sc->g_dstport = sc->g_srcport = 0;
196 	sc->g_proto = IPPROTO_GRE;
197 	sc->sc_snd.ifq_maxlen = 256;
198 	sc->sc_if.if_flags |= IFF_LINK0;
199 	if_attach(&sc->sc_if);
200 	if_alloc_sadl(&sc->sc_if);
201 #if NBPFILTER > 0
202 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int32_t));
203 #endif
204 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
205 	return (0);
206 }
207 
208 static int
209 gre_clone_destroy(struct ifnet *ifp)
210 {
211 	struct gre_softc *sc = ifp->if_softc;
212 
213 	LIST_REMOVE(sc, sc_list);
214 #if NBPFILTER > 0
215 	bpfdetach(ifp);
216 #endif
217 	if_detach(ifp);
218 	gre_wakeup(sc);
219 	gre_join(&sc->sc_thread);
220 	if (sc->sc_fp != NULL) {
221 		closef(sc->sc_fp, curlwp);
222 		sc->sc_fp = NULL;
223 	}
224 	free(sc, M_DEVBUF);
225 
226 	return (0);
227 }
228 
229 static void
230 gre_receive(struct socket *so __unused, caddr_t arg, int waitflag __unused)
231 {
232 	struct gre_softc *sc = (struct gre_softc *)arg;
233 
234 	GRE_DPRINTF(sc, "%s: enter\n", __func__);
235 
236 	gre_wakeup(sc);
237 }
238 
239 static void
240 gre_upcall_add(struct socket *so, caddr_t arg)
241 {
242 	/* XXX What if the kernel already set an upcall? */
243 	so->so_upcallarg = arg;
244 	so->so_upcall = gre_receive;
245 	so->so_rcv.sb_flags |= SB_UPCALL;
246 }
247 
248 static void
249 gre_upcall_remove(struct socket *so)
250 {
251 	/* XXX What if the kernel already set an upcall? */
252 	so->so_rcv.sb_flags &= ~SB_UPCALL;
253 	so->so_upcallarg = NULL;
254 	so->so_upcall = NULL;
255 }
256 
257 static void
258 gre_sodestroy(struct socket **sop)
259 {
260 	gre_upcall_remove(*sop);
261 	soshutdown(*sop, SHUT_RDWR);
262 	soclose(*sop);
263 	*sop = NULL;
264 }
265 
266 static struct mbuf *
267 gre_getsockmbuf(struct socket *so __unused)
268 {
269 	struct mbuf *m;
270 
271 	m = m_get(M_WAIT, MT_SONAME);
272 	if (m != NULL)
273 		MCLAIM(m, so->so_mowner);
274 	return m;
275 }
276 
277 static int
278 gre_socreate1(struct gre_softc *sc, struct lwp *l, struct gre_soparm *sp,
279     struct socket **sop)
280 {
281 	int rc;
282 	struct mbuf *m;
283 	struct sockaddr_in *sin;
284 	struct socket *so;
285 
286 	GRE_DPRINTF(sc, "%s: enter\n", __func__);
287 	rc = socreate(AF_INET, sop, SOCK_DGRAM, IPPROTO_UDP, l);
288 	if (rc != 0) {
289 		GRE_DPRINTF(sc, "%s: socreate failed\n", __func__);
290 		return rc;
291 	}
292 
293 	so = *sop;
294 
295 	gre_upcall_add(so, (caddr_t)sc);
296 	if ((m = gre_getsockmbuf(so)) == NULL) {
297 		rc = ENOBUFS;
298 		goto out;
299 	}
300 	sin = mtod(m, struct sockaddr_in *);
301 	sin->sin_len = m->m_len = sizeof(struct sockaddr_in);
302 	sin->sin_family = AF_INET;
303 	sin->sin_addr = sc->g_src;
304 	sin->sin_port = sc->g_srcport;
305 
306 	GRE_DPRINTF(sc, "%s: bind 0x%08" PRIx32 " port %d\n", __func__,
307 	    sin->sin_addr.s_addr, ntohs(sin->sin_port));
308 	if ((rc = sobind(so, m, l)) != 0) {
309 		GRE_DPRINTF(sc, "%s: sobind failed\n", __func__);
310 		goto out;
311 	}
312 
313 	if (sc->g_srcport == 0) {
314 		if ((rc = gre_getsockname(so, m, l)) != 0) {
315 			GRE_DPRINTF(sc, "%s: gre_getsockname failed\n",
316 			    __func__);
317 			goto out;
318 		}
319 		sc->g_srcport = sin->sin_port;
320 	}
321 
322 	sin->sin_addr = sc->g_dst;
323 	sin->sin_port = sc->g_dstport;
324 
325 	if ((rc = soconnect(so, m, l)) != 0) {
326 		GRE_DPRINTF(sc, "%s: soconnect failed\n", __func__);
327 		goto out;
328 	}
329 
330 	*mtod(m, int *) = ip_gre_ttl;
331 	rc = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, IPPROTO_IP, IP_TTL,
332 	    &m);
333 	m = NULL;
334 	if (rc != 0) {
335 		printf("%s: setopt ttl failed\n", __func__);
336 		rc = 0;
337 	}
338 out:
339 	m_freem(m);
340 
341 	if (rc != 0)
342 		gre_sodestroy(sop);
343 	else
344 		*sp = sc->sc_soparm;
345 
346 	return rc;
347 }
348 
349 static void
350 gre_thread1(struct gre_softc *sc, struct lwp *l)
351 {
352 	int flags, rc, s;
353 	const struct gre_h *gh;
354 	struct ifnet *ifp = &sc->sc_if;
355 	struct mbuf *m;
356 	struct socket *so = NULL;
357 	struct uio uio;
358 	struct gre_soparm sp;
359 
360 	GRE_DPRINTF(sc, "%s: enter\n", __func__);
361 	s = splnet();
362 
363 	sc->sc_waitchan = 1;
364 
365 	memset(&sp, 0, sizeof(sp));
366 	memset(&uio, 0, sizeof(uio));
367 
368 	ifp->if_flags |= IFF_RUNNING;
369 
370 	for (;;) {
371 		while (sc->sc_waitchan == 0) {
372 			splx(s);
373 			GRE_DPRINTF(sc, "%s: sleeping\n", __func__);
374 			tsleep(&sc->sc_waitchan, PSOCK, "grewait", 0);
375 			s = splnet();
376 		}
377 		sc->sc_waitchan = 0;
378 		GRE_DPRINTF(sc, "%s: awake\n", __func__);
379 		if ((ifp->if_flags & IFF_UP) != IFF_UP) {
380 			GRE_DPRINTF(sc, "%s: not up & running; exiting\n",
381 			    __func__);
382 			break;
383 		}
384 		if (sc->g_proto != IPPROTO_UDP) {
385 			GRE_DPRINTF(sc, "%s: not udp; exiting\n", __func__);
386 			break;
387 		}
388 		/* XXX optimize */
389 		if (so == NULL || memcmp(&sp, &sc->sc_soparm, sizeof(sp)) != 0){
390 			GRE_DPRINTF(sc, "%s: parameters changed\n", __func__);
391 
392 			if (sp.sp_fp != NULL) {
393 				FILE_UNUSE(sp.sp_fp, NULL);
394 				sp.sp_fp = NULL;
395 				so = NULL;
396 			} else if (so != NULL)
397 				gre_sodestroy(&so);
398 
399 			if (sc->sc_fp != NULL) {
400 				so = (struct socket *)sc->sc_fp->f_data;
401 				gre_upcall_add(so, (caddr_t)sc);
402 				sp = sc->sc_soparm;
403 				FILE_USE(sp.sp_fp);
404 			} else if (gre_socreate1(sc, l, &sp, &so) != 0)
405 				goto out;
406 		}
407 		for (;;) {
408 			flags = MSG_DONTWAIT;
409 			uio.uio_resid = 1000000;
410 			rc = (*so->so_receive)(so, NULL, &uio, &m, NULL,
411 			    &flags);
412 			/* TBD Back off if ECONNREFUSED (indicates
413 			 * ICMP Port Unreachable)?
414 			 */
415 			if (rc == EWOULDBLOCK) {
416 				GRE_DPRINTF(sc, "%s: so_receive EWOULDBLOCK\n",
417 				    __func__);
418 				break;
419 			} else if (rc != 0 || m == NULL) {
420 				GRE_DPRINTF(sc, "%s: rc %d m %p\n",
421 				    ifp->if_xname, rc, (void *)m);
422 				continue;
423 			} else
424 				GRE_DPRINTF(sc, "%s: so_receive ok\n",
425 				    __func__);
426 			if (m->m_len < sizeof(*gh) &&
427 			    (m = m_pullup(m, sizeof(*gh))) == NULL) {
428 				GRE_DPRINTF(sc, "%s: m_pullup failed\n",
429 				    __func__);
430 				continue;
431 			}
432 			gh = mtod(m, const struct gre_h *);
433 
434 			if (gre_input3(sc, m, 0, IPPROTO_GRE, gh) == 0) {
435 				GRE_DPRINTF(sc, "%s: dropping unsupported\n",
436 				    __func__);
437 				ifp->if_ierrors++;
438 				m_freem(m);
439 			}
440 		}
441 		for (;;) {
442 			IF_DEQUEUE(&sc->sc_snd, m);
443 			if (m == NULL)
444 				break;
445 			GRE_DPRINTF(sc, "%s: dequeue\n", __func__);
446 			if ((so->so_state & SS_ISCONNECTED) == 0) {
447 				GRE_DPRINTF(sc, "%s: not connected\n",
448 				    __func__);
449 				m_freem(m);
450 				continue;
451 			}
452 			rc = (*so->so_send)(so, NULL, NULL, m, NULL, 0, l);
453 			/* XXX handle ENOBUFS? */
454 			if (rc != 0)
455 				GRE_DPRINTF(sc, "%s: so_send failed\n",
456 				    __func__);
457 		}
458 		/* Give the software interrupt queues a chance to
459 		 * run, or else when I send a ping from gre0 to gre1 on
460 		 * the same host, gre0 will not wake for the reply.
461 		 */
462 		splx(s);
463 		s = splnet();
464 	}
465 	if (sp.sp_fp != NULL) {
466 		GRE_DPRINTF(sc, "%s: removing upcall\n", __func__);
467 		gre_upcall_remove(so);
468 		FILE_UNUSE(sp.sp_fp, NULL);
469 		sp.sp_fp = NULL;
470 	} else if (so != NULL)
471 		gre_sodestroy(&so);
472 out:
473 	GRE_DPRINTF(sc, "%s: stopping\n", __func__);
474 	if (sc->g_proto == IPPROTO_UDP)
475 		ifp->if_flags &= ~IFF_RUNNING;
476 	while (!IF_IS_EMPTY(&sc->sc_snd)) {
477 		IF_DEQUEUE(&sc->sc_snd, m);
478 		m_freem(m);
479 	}
480 	gre_stop(&sc->sc_thread);
481 	/* must not touch sc after this! */
482 	GRE_DPRINTF(sc, "%s: restore ipl\n", __func__);
483 	splx(s);
484 }
485 
486 static void
487 gre_thread(void *arg)
488 {
489 	struct gre_softc *sc = (struct gre_softc *)arg;
490 
491 	gre_thread1(sc, curlwp);
492 	/* must not touch sc after this! */
493 	kthread_exit(0);
494 }
495 
496 int
497 gre_input3(struct gre_softc *sc, struct mbuf *m, int hlen, u_char proto,
498     const struct gre_h *gh)
499 {
500 	u_int16_t flags;
501 #if NBPFILTER > 0
502 	u_int32_t af = AF_INET;		/* af passed to BPF tap */
503 #endif
504 	int s, isr;
505 	struct ifqueue *ifq;
506 
507 	sc->sc_if.if_ipackets++;
508 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
509 
510 	switch (proto) {
511 	case IPPROTO_GRE:
512 		hlen += sizeof(struct gre_h);
513 
514 		/* process GRE flags as packet can be of variable len */
515 		flags = ntohs(gh->flags);
516 
517 		/* Checksum & Offset are present */
518 		if ((flags & GRE_CP) | (flags & GRE_RP))
519 			hlen += 4;
520 		/* We don't support routing fields (variable length) */
521 		if (flags & GRE_RP)
522 			return (0);
523 		if (flags & GRE_KP)
524 			hlen += 4;
525 		if (flags & GRE_SP)
526 			hlen += 4;
527 
528 		switch (ntohs(gh->ptype)) { /* ethertypes */
529 		case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */
530 			ifq = &ipintrq;          /* we are in ip_input */
531 			isr = NETISR_IP;
532 			break;
533 #ifdef NETATALK
534 		case ETHERTYPE_ATALK:
535 			ifq = &atintrq1;
536 			isr = NETISR_ATALK;
537 #if NBPFILTER > 0
538 			af = AF_APPLETALK;
539 #endif
540 			break;
541 #endif
542 #ifdef INET6
543 		case ETHERTYPE_IPV6:
544 			GRE_DPRINTF(sc, "%s: IPv6 packet\n", __func__);
545 			ifq = &ip6intrq;
546 			isr = NETISR_IPV6;
547 #if NBPFILTER > 0
548 			af = AF_INET6;
549 #endif
550 			break;
551 #endif
552 		default:	   /* others not yet supported */
553 			printf("%s: unhandled ethertype 0x%04x\n", __func__,
554 			    ntohs(gh->ptype));
555 			return (0);
556 		}
557 		break;
558 	default:
559 		/* others not yet supported */
560 		return (0);
561 	}
562 
563 	if (hlen > m->m_pkthdr.len) {
564 		m_freem(m);
565 		sc->sc_if.if_ierrors++;
566 		return (EINVAL);
567 	}
568 	m_adj(m, hlen);
569 
570 #if NBPFILTER > 0
571 	if (sc->sc_if.if_bpf != NULL)
572 		bpf_mtap_af(sc->sc_if.if_bpf, af, m);
573 #endif /*NBPFILTER > 0*/
574 
575 	m->m_pkthdr.rcvif = &sc->sc_if;
576 
577 	s = splnet();		/* possible */
578 	if (IF_QFULL(ifq)) {
579 		IF_DROP(ifq);
580 		m_freem(m);
581 	} else {
582 		IF_ENQUEUE(ifq, m);
583 	}
584 	/* we need schednetisr since the address family may change */
585 	schednetisr(isr);
586 	splx(s);
587 
588 	return (1);	/* packet is done, no further processing needed */
589 }
590 
591 /*
592  * The output routine. Takes a packet and encapsulates it in the protocol
593  * given by sc->g_proto. See also RFC 1701 and RFC 2004
594  */
595 static int
596 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
597 	   struct rtentry *rt __unused)
598 {
599 	int error = 0, hlen;
600 	struct gre_softc *sc = ifp->if_softc;
601 	struct greip *gi;
602 	struct gre_h *gh;
603 	struct ip *eip, *ip;
604 	u_int8_t ip_tos = 0;
605 	u_int16_t etype = 0;
606 	struct mobile_h mob_h;
607 
608 	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
609 	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
610 		m_freem(m);
611 		error = ENETDOWN;
612 		goto end;
613 	}
614 
615 	gi = NULL;
616 	ip = NULL;
617 
618 #if NBPFILTER >0
619 	if (ifp->if_bpf)
620 		bpf_mtap_af(ifp->if_bpf, dst->sa_family, m);
621 #endif
622 
623 	m->m_flags &= ~(M_BCAST|M_MCAST);
624 
625 	switch (sc->g_proto) {
626 	case IPPROTO_MOBILE:
627 		if (dst->sa_family == AF_INET) {
628 			int msiz;
629 
630 			if (M_UNWRITABLE(m, sizeof(*ip)) &&
631 			    (m = m_pullup(m, sizeof(*ip))) == NULL) {
632 				error = ENOBUFS;
633 				goto end;
634 			}
635 			ip = mtod(m, struct ip *);
636 
637 			memset(&mob_h, 0, MOB_H_SIZ_L);
638 			mob_h.proto = (ip->ip_p) << 8;
639 			mob_h.odst = ip->ip_dst.s_addr;
640 			ip->ip_dst.s_addr = sc->g_dst.s_addr;
641 
642 			/*
643 			 * If the packet comes from our host, we only change
644 			 * the destination address in the IP header.
645 			 * Else we also need to save and change the source
646 			 */
647 			if (in_hosteq(ip->ip_src, sc->g_src)) {
648 				msiz = MOB_H_SIZ_S;
649 			} else {
650 				mob_h.proto |= MOB_H_SBIT;
651 				mob_h.osrc = ip->ip_src.s_addr;
652 				ip->ip_src.s_addr = sc->g_src.s_addr;
653 				msiz = MOB_H_SIZ_L;
654 			}
655 			HTONS(mob_h.proto);
656 			mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz);
657 
658 			M_PREPEND(m, msiz, M_DONTWAIT);
659 			if (m == NULL) {
660 				error = ENOBUFS;
661 				goto end;
662 			}
663 			/* XXX Assuming that ip does not dangle after
664 			 * M_PREPEND.  In practice, that's true, but
665 			 * that's in M_PREPEND's contract.
666 			 */
667 			memmove(mtod(m, caddr_t), ip, sizeof(*ip));
668 			ip = mtod(m, struct ip *);
669 			memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
670 			ip->ip_len = htons(ntohs(ip->ip_len) + msiz);
671 		} else {  /* AF_INET */
672 			IF_DROP(&ifp->if_snd);
673 			m_freem(m);
674 			error = EINVAL;
675 			goto end;
676 		}
677 		break;
678 	case IPPROTO_UDP:
679 	case IPPROTO_GRE:
680 		GRE_DPRINTF(sc, "%s: dst->sa_family=%d\n", __func__,
681 		    dst->sa_family);
682 		switch (dst->sa_family) {
683 		case AF_INET:
684 			ip = mtod(m, struct ip *);
685 			ip_tos = ip->ip_tos;
686 			etype = ETHERTYPE_IP;
687 			break;
688 #ifdef NETATALK
689 		case AF_APPLETALK:
690 			etype = ETHERTYPE_ATALK;
691 			break;
692 #endif
693 #ifdef INET6
694 		case AF_INET6:
695 			etype = ETHERTYPE_IPV6;
696 			break;
697 #endif
698 		default:
699 			IF_DROP(&ifp->if_snd);
700 			m_freem(m);
701 			error = EAFNOSUPPORT;
702 			goto end;
703 		}
704 		break;
705 	default:
706 		IF_DROP(&ifp->if_snd);
707 		m_freem(m);
708 		error = EINVAL;
709 		goto end;
710 	}
711 
712 	switch (sc->g_proto) {
713 	case IPPROTO_GRE:
714 		hlen = sizeof(struct greip);
715 		break;
716 	case IPPROTO_UDP:
717 		hlen = sizeof(struct gre_h);
718 		break;
719 	default:
720 		hlen = 0;
721 		break;
722 	}
723 
724 	M_PREPEND(m, hlen, M_DONTWAIT);
725 
726 	if (m == NULL) {
727 		IF_DROP(&ifp->if_snd);
728 		error = ENOBUFS;
729 		goto end;
730 	}
731 
732 	switch (sc->g_proto) {
733 	case IPPROTO_UDP:
734 		gh = mtod(m, struct gre_h *);
735 		memset(gh, 0, sizeof(*gh));
736 		gh->ptype = htons(etype);
737 		/* XXX Need to handle IP ToS.  Look at how I handle IP TTL. */
738 		break;
739 	case IPPROTO_GRE:
740 		gi = mtod(m, struct greip *);
741 		gh = &gi->gi_g;
742 		eip = &gi->gi_i;
743 		/* we don't have any GRE flags for now */
744 		memset(gh, 0, sizeof(*gh));
745 		gh->ptype = htons(etype);
746 		eip->ip_src = sc->g_src;
747 		eip->ip_dst = sc->g_dst;
748 		eip->ip_hl = (sizeof(struct ip)) >> 2;
749 		eip->ip_ttl = ip_gre_ttl;
750 		eip->ip_tos = ip_tos;
751 		eip->ip_len = htons(m->m_pkthdr.len);
752 		eip->ip_p = sc->g_proto;
753 		break;
754 	case IPPROTO_MOBILE:
755 		eip = mtod(m, struct ip *);
756 		eip->ip_p = sc->g_proto;
757 		break;
758 	default:
759 		error = EPROTONOSUPPORT;
760 		m_freem(m);
761 		goto end;
762 	}
763 
764 	ifp->if_opackets++;
765 	ifp->if_obytes += m->m_pkthdr.len;
766 
767 	/* send it off */
768 	if (sc->g_proto == IPPROTO_UDP) {
769 		if (IF_QFULL(&sc->sc_snd)) {
770 			IF_DROP(&sc->sc_snd);
771 			error = ENOBUFS;
772 			m_freem(m);
773 		} else {
774 			IF_ENQUEUE(&sc->sc_snd, m);
775 			gre_wakeup(sc);
776 			error = 0;
777 		}
778 	} else {
779 		error = ip_output(m, NULL, &sc->route, 0,
780 		    (struct ip_moptions *)NULL, (struct socket *)NULL);
781 	}
782   end:
783 	if (error)
784 		ifp->if_oerrors++;
785 	return (error);
786 }
787 
788 /* Must be called at IPL_NET. */
789 static int
790 gre_kick(struct gre_softc *sc)
791 {
792 	int rc;
793 	struct ifnet *ifp = &sc->sc_if;
794 
795 	if (sc->g_proto == IPPROTO_UDP && (ifp->if_flags & IFF_UP) == IFF_UP &&
796 	    !sc->sc_thread) {
797 		sc->sc_thread = 1;
798 		rc = kthread_create1(gre_thread, (void *)sc, NULL,
799 		    ifp->if_xname);
800 		if (rc != 0)
801 			gre_stop(&sc->sc_thread);
802 		return rc;
803 	} else {
804 		gre_wakeup(sc);
805 		return 0;
806 	}
807 }
808 
809 static int
810 gre_getname(struct socket *so, int req, struct mbuf *nam, struct lwp *l)
811 {
812 	int s, error;
813 
814 	s = splsoftnet();
815 	error = (*so->so_proto->pr_usrreq)(so, req, (struct mbuf *)0,
816 	    nam, (struct mbuf *)0, l);
817 	splx(s);
818 	return error;
819 }
820 
821 static int
822 gre_getsockname(struct socket *so, struct mbuf *nam, struct lwp *l)
823 {
824 	return gre_getname(so, PRU_SOCKADDR, nam, l);
825 }
826 
827 static int
828 gre_getpeername(struct socket *so, struct mbuf *nam, struct lwp *l)
829 {
830 	return gre_getname(so, PRU_PEERADDR, nam, l);
831 }
832 
833 static int
834 gre_getnames(struct socket *so, struct lwp *l, struct sockaddr_in *src,
835     struct sockaddr_in *dst)
836 {
837 	struct mbuf *m;
838 	struct sockaddr_in *sin;
839 	int rc;
840 
841 	if ((m = gre_getsockmbuf(so)) == NULL)
842 		return ENOBUFS;
843 
844 	sin = mtod(m, struct sockaddr_in *);
845 
846 	if ((rc = gre_getsockname(so, m, l)) != 0)
847 		goto out;
848 	if (sin->sin_family != AF_INET) {
849 		rc = EAFNOSUPPORT;
850 		goto out;
851 	}
852 	*src = *sin;
853 
854 	if ((rc = gre_getpeername(so, m, l)) != 0)
855 		goto out;
856 	if (sin->sin_family != AF_INET) {
857 		rc = EAFNOSUPPORT;
858 		goto out;
859 	}
860 	*dst = *sin;
861 
862 out:
863 	m_freem(m);
864 	return rc;
865 }
866 
867 static int
868 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
869 {
870 	u_char oproto;
871 	struct file *fp, *ofp;
872 	struct socket *so;
873 	struct sockaddr_in dst, src;
874 	struct proc *p = curproc;	/* XXX */
875 	struct lwp *l = curlwp;	/* XXX */
876 	struct ifreq *ifr = (struct ifreq *)data;
877 	struct if_laddrreq *lifr = (struct if_laddrreq *)data;
878 	struct gre_softc *sc = ifp->if_softc;
879 	int s;
880 	struct sockaddr_in si;
881 	struct sockaddr *sa = NULL;
882 	int error;
883 
884 	switch (cmd) {
885 	case SIOCSIFFLAGS:
886 	case SIOCSIFMTU:
887 	case GRESPROTO:
888 	case GRESADDRD:
889 	case GRESADDRS:
890 	case GRESSOCK:
891 	case GREDSOCK:
892 	case SIOCSLIFPHYADDR:
893 	case SIOCDIFPHYADDR:
894 		if ((error = kauth_authorize_generic(l->l_cred,
895 		    KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)
896 			return (error);
897 		break;
898 	default:
899 		error = 0;
900 		break;
901 	}
902 
903 	s = splnet();
904 	switch (cmd) {
905 	case SIOCSIFADDR:
906 		ifp->if_flags |= IFF_UP;
907 		error = gre_kick(sc);
908 		break;
909 	case SIOCSIFDSTADDR:
910 		break;
911 	case SIOCSIFFLAGS:
912 		oproto = sc->g_proto;
913 		switch (ifr->ifr_flags & (IFF_LINK0|IFF_LINK2)) {
914 		case IFF_LINK0|IFF_LINK2:
915 			sc->g_proto = IPPROTO_UDP;
916 			if (oproto != IPPROTO_UDP)
917 				ifp->if_flags &= ~IFF_RUNNING;
918 			error = gre_kick(sc);
919 			break;
920 		case IFF_LINK0:
921 			sc->g_proto = IPPROTO_GRE;
922 			gre_wakeup(sc);
923 			goto recompute;
924 		case 0:
925 			sc->g_proto = IPPROTO_MOBILE;
926 			gre_wakeup(sc);
927 			goto recompute;
928 		}
929 		break;
930 	case SIOCSIFMTU:
931 		if (ifr->ifr_mtu < 576) {
932 			error = EINVAL;
933 			break;
934 		}
935 		ifp->if_mtu = ifr->ifr_mtu;
936 		break;
937 	case SIOCGIFMTU:
938 		ifr->ifr_mtu = sc->sc_if.if_mtu;
939 		break;
940 	case SIOCADDMULTI:
941 	case SIOCDELMULTI:
942 		if (ifr == 0) {
943 			error = EAFNOSUPPORT;
944 			break;
945 		}
946 		switch (ifr->ifr_addr.sa_family) {
947 #ifdef INET
948 		case AF_INET:
949 			break;
950 #endif
951 #ifdef INET6
952 		case AF_INET6:
953 			break;
954 #endif
955 		default:
956 			error = EAFNOSUPPORT;
957 			break;
958 		}
959 		break;
960 	case GRESPROTO:
961 		oproto = sc->g_proto;
962 		sc->g_proto = ifr->ifr_flags;
963 		switch (sc->g_proto) {
964 		case IPPROTO_UDP:
965 			ifp->if_flags |= IFF_LINK0|IFF_LINK2;
966 			if (oproto != IPPROTO_UDP)
967 				ifp->if_flags &= ~IFF_RUNNING;
968 			error = gre_kick(sc);
969 			break;
970 		case IPPROTO_GRE:
971 			ifp->if_flags |= IFF_LINK0;
972 			ifp->if_flags &= ~IFF_LINK2;
973 			goto recompute;
974 		case IPPROTO_MOBILE:
975 			ifp->if_flags &= ~(IFF_LINK0|IFF_LINK2);
976 			goto recompute;
977 		default:
978 			error = EPROTONOSUPPORT;
979 			break;
980 		}
981 		break;
982 	case GREGPROTO:
983 		ifr->ifr_flags = sc->g_proto;
984 		break;
985 	case GRESADDRS:
986 	case GRESADDRD:
987 		/*
988 		 * set tunnel endpoints, compute a less specific route
989 		 * to the remote end and mark if as up
990 		 */
991 		sa = &ifr->ifr_addr;
992 		if (cmd == GRESADDRS) {
993 			sc->g_src = (satosin(sa))->sin_addr;
994 			sc->g_srcport = satosin(sa)->sin_port;
995 		}
996 		if (cmd == GRESADDRD) {
997 			if (sc->g_proto == IPPROTO_UDP &&
998 			    satosin(sa)->sin_port == 0) {
999 				error = EINVAL;
1000 				break;
1001 			}
1002 			sc->g_dst = (satosin(sa))->sin_addr;
1003 			sc->g_dstport = satosin(sa)->sin_port;
1004 		}
1005 	recompute:
1006 		if (sc->g_proto == IPPROTO_UDP ||
1007 		    (sc->g_src.s_addr != INADDR_ANY &&
1008 		     sc->g_dst.s_addr != INADDR_ANY)) {
1009 			if (sc->sc_fp != NULL) {
1010 				closef(sc->sc_fp, l);
1011 				sc->sc_fp = NULL;
1012 			}
1013 			if (sc->route.ro_rt != NULL) {
1014 				RTFREE(sc->route.ro_rt);
1015 				sc->route.ro_rt = NULL;
1016 			}
1017 			if (sc->g_proto == IPPROTO_UDP)
1018 				error = gre_kick(sc);
1019 			else if (gre_compute_route(sc) == 0)
1020 				ifp->if_flags |= IFF_RUNNING;
1021 			else
1022 				ifp->if_flags &= ~IFF_RUNNING;
1023 		}
1024 		break;
1025 	case GREGADDRS:
1026 		memset(&si, 0, sizeof(si));
1027 		si.sin_family = AF_INET;
1028 		si.sin_len = sizeof(struct sockaddr_in);
1029 		si.sin_addr.s_addr = sc->g_src.s_addr;
1030 		sa = sintosa(&si);
1031 		ifr->ifr_addr = *sa;
1032 		break;
1033 	case GREGADDRD:
1034 		memset(&si, 0, sizeof(si));
1035 		si.sin_family = AF_INET;
1036 		si.sin_len = sizeof(struct sockaddr_in);
1037 		si.sin_addr.s_addr = sc->g_dst.s_addr;
1038 		sa = sintosa(&si);
1039 		ifr->ifr_addr = *sa;
1040 		break;
1041 	case GREDSOCK:
1042 		if (sc->g_proto != IPPROTO_UDP)
1043 			return EINVAL;
1044 		if (sc->sc_fp != NULL) {
1045 			closef(sc->sc_fp, l);
1046 			sc->sc_fp = NULL;
1047 			error = gre_kick(sc);
1048 		}
1049 		break;
1050 	case GRESSOCK:
1051 		if (sc->g_proto != IPPROTO_UDP)
1052 			return EINVAL;
1053 		/* getsock() will FILE_USE() the descriptor for us */
1054 		if ((error = getsock(p->p_fd, (int)ifr->ifr_value, &fp)) != 0)
1055 			break;
1056 		so = (struct socket *)fp->f_data;
1057 		if (so->so_type != SOCK_DGRAM) {
1058 			FILE_UNUSE(fp, NULL);
1059 			error = EINVAL;
1060 			break;
1061 		}
1062 		/* check address */
1063 		if ((error = gre_getnames(so, curlwp, &src, &dst)) != 0) {
1064 			FILE_UNUSE(fp, NULL);
1065 			break;
1066 		}
1067 
1068 		fp->f_count++;
1069 
1070 		ofp = sc->sc_fp;
1071 		sc->sc_fp = fp;
1072 		if ((error = gre_kick(sc)) != 0) {
1073 			closef(fp, l);
1074 			sc->sc_fp = ofp;
1075 			break;
1076 		}
1077 		sc->g_src = src.sin_addr;
1078 		sc->g_srcport = src.sin_port;
1079 		sc->g_dst = dst.sin_addr;
1080 		sc->g_dstport = dst.sin_port;
1081 		if (ofp != NULL)
1082 			closef(ofp, l);
1083 		break;
1084 	case SIOCSLIFPHYADDR:
1085 		if (lifr->addr.ss_family != AF_INET ||
1086 		    lifr->dstaddr.ss_family != AF_INET) {
1087 			error = EAFNOSUPPORT;
1088 			break;
1089 		}
1090 		if (lifr->addr.ss_len != sizeof(si) ||
1091 		    lifr->dstaddr.ss_len != sizeof(si)) {
1092 			error = EINVAL;
1093 			break;
1094 		}
1095 		sc->g_src = satosin(&lifr->addr)->sin_addr;
1096 		sc->g_dst = satosin(&lifr->dstaddr)->sin_addr;
1097 		sc->g_srcport = satosin(&lifr->addr)->sin_port;
1098 		sc->g_dstport = satosin(&lifr->dstaddr)->sin_port;
1099 		goto recompute;
1100 	case SIOCDIFPHYADDR:
1101 		sc->g_src.s_addr = INADDR_ANY;
1102 		sc->g_dst.s_addr = INADDR_ANY;
1103 		sc->g_srcport = 0;
1104 		sc->g_dstport = 0;
1105 		goto recompute;
1106 	case SIOCGLIFPHYADDR:
1107 		if (sc->g_src.s_addr == INADDR_ANY ||
1108 		    sc->g_dst.s_addr == INADDR_ANY) {
1109 			error = EADDRNOTAVAIL;
1110 			break;
1111 		}
1112 		memset(&si, 0, sizeof(si));
1113 		si.sin_family = AF_INET;
1114 		si.sin_len = sizeof(struct sockaddr_in);
1115 		si.sin_addr = sc->g_src;
1116 		if (sc->g_proto == IPPROTO_UDP)
1117 			si.sin_port = sc->g_srcport;
1118 		memcpy(&lifr->addr, &si, sizeof(si));
1119 		si.sin_addr = sc->g_dst;
1120 		if (sc->g_proto == IPPROTO_UDP)
1121 			si.sin_port = sc->g_dstport;
1122 		memcpy(&lifr->dstaddr, &si, sizeof(si));
1123 		break;
1124 	default:
1125 		error = EINVAL;
1126 		break;
1127 	}
1128 	splx(s);
1129 	return (error);
1130 }
1131 
1132 /*
1133  * computes a route to our destination that is not the one
1134  * which would be taken by ip_output(), as this one will loop back to
1135  * us. If the interface is p2p as  a--->b, then a routing entry exists
1136  * If we now send a packet to b (e.g. ping b), this will come down here
1137  * gets src=a, dst=b tacked on and would from ip_output() sent back to
1138  * if_gre.
1139  * Goal here is to compute a route to b that is less specific than
1140  * a-->b. We know that this one exists as in normal operation we have
1141  * at least a default route which matches.
1142  */
1143 static int
1144 gre_compute_route(struct gre_softc *sc)
1145 {
1146 	struct route *ro;
1147 	u_int32_t a, b, c;
1148 
1149 	ro = &sc->route;
1150 
1151 	memset(ro, 0, sizeof(struct route));
1152 	((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
1153 	ro->ro_dst.sa_family = AF_INET;
1154 	ro->ro_dst.sa_len = sizeof(ro->ro_dst);
1155 
1156 	/*
1157 	 * toggle last bit, so our interface is not found, but a less
1158 	 * specific route. I'd rather like to specify a shorter mask,
1159 	 * but this is not possible. Should work though. XXX
1160 	 * there is a simpler way ...
1161 	 */
1162 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0) {
1163 		a = ntohl(sc->g_dst.s_addr);
1164 		b = a & 0x01;
1165 		c = a & 0xfffffffe;
1166 		b = b ^ 0x01;
1167 		a = b | c;
1168 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr
1169 		    = htonl(a);
1170 	}
1171 
1172 #ifdef DIAGNOSTIC
1173 	printf("%s: searching for a route to %s", sc->sc_if.if_xname,
1174 	    inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr));
1175 #endif
1176 
1177 	rtalloc(ro);
1178 
1179 	/*
1180 	 * check if this returned a route at all and this route is no
1181 	 * recursion to ourself
1182 	 */
1183 	if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) {
1184 #ifdef DIAGNOSTIC
1185 		if (ro->ro_rt == NULL)
1186 			printf(" - no route found!\n");
1187 		else
1188 			printf(" - route loops back to ourself!\n");
1189 #endif
1190 		return EADDRNOTAVAIL;
1191 	}
1192 
1193 	/*
1194 	 * now change it back - else ip_output will just drop
1195 	 * the route and search one to this interface ...
1196 	 */
1197 	if ((sc->sc_if.if_flags & IFF_LINK1) == 0)
1198 		((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst;
1199 
1200 #ifdef DIAGNOSTIC
1201 	printf(", choosing %s with gateway %s\n", ro->ro_rt->rt_ifp->if_xname,
1202 	    inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr));
1203 #endif
1204 
1205 	return 0;
1206 }
1207 
1208 /*
1209  * do a checksum of a buffer - much like in_cksum, which operates on
1210  * mbufs.
1211  */
1212 u_int16_t
1213 gre_in_cksum(u_int16_t *p, u_int len)
1214 {
1215 	u_int32_t sum = 0;
1216 	int nwords = len >> 1;
1217 
1218 	while (nwords-- != 0)
1219 		sum += *p++;
1220 
1221 	if (len & 1) {
1222 		union {
1223 			u_short w;
1224 			u_char c[2];
1225 		} u;
1226 		u.c[0] = *(u_char *)p;
1227 		u.c[1] = 0;
1228 		sum += u.w;
1229 	}
1230 
1231 	/* end-around-carry */
1232 	sum = (sum >> 16) + (sum & 0xffff);
1233 	sum += (sum >> 16);
1234 	return (~sum);
1235 }
1236 #endif
1237 
1238 void	greattach(int);
1239 
1240 /* ARGSUSED */
1241 void
1242 greattach(int count __unused)
1243 {
1244 #ifdef INET
1245 	LIST_INIT(&gre_softc_list);
1246 	if_clone_attach(&gre_cloner);
1247 #endif
1248 }
1249