xref: /openbsd-src/sys/net/if_gre.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*      $OpenBSD: if_gre.c,v 1.57 2011/07/12 15:23:50 jsg Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37  * See gre(4) for more details.
38  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39  */
40 
41 #include "gre.h"
42 #if NGRE > 0
43 
44 #include "bpfilter.h"
45 #include "pf.h"
46 
47 #include <sys/param.h>
48 #include <sys/proc.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/kernel.h>
53 #include <sys/systm.h>
54 
55 #include <net/if.h>
56 #include <net/if_types.h>
57 #include <net/netisr.h>
58 #include <net/route.h>
59 
60 #ifdef INET
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/in_var.h>
64 #include <netinet/ip.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/if_ether.h>
67 #else
68 #error "if_gre used without inet"
69 #endif
70 
71 #if NBPFILTER > 0
72 #include <net/bpf.h>
73 #endif
74 
75 #if NPF > 0
76 #include <net/pfvar.h>
77 #endif
78 
79 #include <net/if_gre.h>
80 
81 #ifndef GRE_RECURSION_LIMIT
82 #define GRE_RECURSION_LIMIT	3   /* How many levels of recursion allowed */
83 #endif /* GRE_RECURSION_LIMIT */
84 
85 /*
86  * It is not easy to calculate the right value for a GRE MTU.
87  * We leave this task to the admin and use the same default that
88  * other vendors use.
89  */
90 #define GREMTU 1476
91 
92 int	gre_clone_create(struct if_clone *, int);
93 int	gre_clone_destroy(struct ifnet *);
94 
95 struct gre_softc_head gre_softc_list;
96 struct if_clone gre_cloner =
97     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
98 
99 /*
100  * We can control the acceptance of GRE and MobileIP packets by
101  * altering the sysctl net.inet.gre.allow and net.inet.mobileip.allow values
102  * respectively. Zero means drop them, all else is acceptance.  We can also
103  * control acceptance of WCCPv1-style GRE packets through the
104  * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
105  * allowed as well.
106  *
107  */
108 int gre_allow = 0;
109 int gre_wccp = 0;
110 int ip_mobile_allow = 0;
111 
112 void gre_keepalive(void *);
113 void gre_send_keepalive(void *);
114 void gre_link_state(struct gre_softc *);
115 
116 void
117 greattach(int n)
118 {
119 	LIST_INIT(&gre_softc_list);
120 	if_clone_attach(&gre_cloner);
121 }
122 
123 int
124 gre_clone_create(struct if_clone *ifc, int unit)
125 {
126 	struct gre_softc *sc;
127 	int s;
128 
129 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
130 	if (!sc)
131 		return (ENOMEM);
132 	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
133 	    ifc->ifc_name, unit);
134 	sc->sc_if.if_softc = sc;
135 	sc->sc_if.if_type = IFT_TUNNEL;
136 	sc->sc_if.if_addrlen = 0;
137 	sc->sc_if.if_hdrlen = 24; /* IP + GRE */
138 	sc->sc_if.if_mtu = GREMTU;
139 	sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
140 	sc->sc_if.if_output = gre_output;
141 	sc->sc_if.if_ioctl = gre_ioctl;
142 	sc->sc_if.if_collisions = 0;
143 	sc->sc_if.if_ierrors = 0;
144 	sc->sc_if.if_oerrors = 0;
145 	sc->sc_if.if_ipackets = 0;
146 	sc->sc_if.if_opackets = 0;
147 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
148 	sc->g_proto = IPPROTO_GRE;
149 	sc->sc_if.if_flags |= IFF_LINK0;
150 	sc->sc_ka_state = GRE_STATE_UKNWN;
151 
152 	timeout_set(&sc->sc_ka_hold, gre_keepalive, sc);
153 	timeout_set(&sc->sc_ka_snd, gre_send_keepalive, sc);
154 
155 	if_attach(&sc->sc_if);
156 	if_alloc_sadl(&sc->sc_if);
157 
158 #if NBPFILTER > 0
159 	bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_NULL,
160 	    sizeof(u_int32_t));
161 #endif
162 	s = splnet();
163 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
164 	splx(s);
165 
166 	return (0);
167 }
168 
169 int
170 gre_clone_destroy(struct ifnet *ifp)
171 {
172 	struct gre_softc *sc = ifp->if_softc;
173 	int s;
174 
175 	s = splnet();
176 	timeout_del(&sc->sc_ka_snd);
177 	timeout_del(&sc->sc_ka_hold);
178 	LIST_REMOVE(sc, sc_list);
179 	splx(s);
180 
181 	if_detach(ifp);
182 
183 	free(sc, M_DEVBUF);
184 	return (0);
185 }
186 
187 /*
188  * The output routine. Takes a packet and encapsulates it in the protocol
189  * given by sc->g_proto. See also RFC 1701 and RFC 2004.
190  */
191 
192 int
193 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
194 	   struct rtentry *rt)
195 {
196 	int error = 0;
197 	struct gre_softc *sc = (struct gre_softc *) (ifp->if_softc);
198 	struct greip *gh = NULL;
199 	struct ip *inp = NULL;
200 	u_int8_t ip_tos = 0;
201 	u_int16_t etype = 0;
202 	struct mobile_h mob_h;
203 	struct m_tag *mtag;
204 
205 	if ((ifp->if_flags & IFF_UP) == 0 ||
206 	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
207 		m_freem(m);
208 		error = ENETDOWN;
209 		goto end;
210 	}
211 
212 #ifdef DIAGNOSTIC
213 	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.rdomain)) {
214 		printf("%s: trying to send packet on wrong domain. "
215 		    "if %d vs. mbuf %d, AF %d\n", ifp->if_xname,
216 		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.rdomain),
217 		    dst->sa_family);
218 	}
219 #endif
220 
221 	/* Try to limit infinite recursion through misconfiguration. */
222 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
223 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
224 		if (!bcmp((caddr_t)(mtag + 1), &ifp, sizeof(struct ifnet *))) {
225 			IF_DROP(&ifp->if_snd);
226 			m_freem(m);
227 			error = EIO;
228 			goto end;
229 		}
230 	}
231 
232 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(struct ifnet *), M_NOWAIT);
233 	if (mtag == NULL) {
234 		IF_DROP(&ifp->if_snd);
235 		m_freem(m);
236 		error = ENOBUFS;
237 		goto end;
238 	}
239 	bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
240 	m_tag_prepend(m, mtag);
241 
242 	m->m_flags &= ~(M_BCAST|M_MCAST);
243 
244 #if NBPFILTER > 0
245 	if (ifp->if_bpf)
246 		bpf_mtap_af(ifp->if_bpf, dst->sa_family, m, BPF_DIRECTION_OUT);
247 #endif
248 
249 	if (sc->g_proto == IPPROTO_MOBILE) {
250 		if (ip_mobile_allow == 0) {
251 			IF_DROP(&ifp->if_snd);
252 			m_freem(m);
253 			error = EACCES;
254 			goto end;
255 		}
256 
257 		if (dst->sa_family == AF_INET) {
258 			struct mbuf *m0;
259 			int msiz;
260 
261 			/*
262 			 * Make sure the complete IP header (with options)
263 			 * is in the first mbuf.
264 			 */
265 			if (m->m_len < sizeof(struct ip)) {
266 				m = m_pullup(m, sizeof(struct ip));
267 				if (m == NULL) {
268 					IF_DROP(&ifp->if_snd);
269 					error = ENOBUFS;
270 					goto end;
271 				} else
272 					inp = mtod(m, struct ip *);
273 
274 				if (m->m_len < inp->ip_hl << 2) {
275 					m = m_pullup(m, inp->ip_hl << 2);
276 					if (m == NULL) {
277 						IF_DROP(&ifp->if_snd);
278 						error = ENOBUFS;
279 						goto end;
280 					}
281 				}
282 			}
283 
284 			inp = mtod(m, struct ip *);
285 
286 			bzero(&mob_h, MOB_H_SIZ_L);
287 			mob_h.proto = (inp->ip_p) << 8;
288 			mob_h.odst = inp->ip_dst.s_addr;
289 			inp->ip_dst.s_addr = sc->g_dst.s_addr;
290 
291 			/*
292 			 * If the packet comes from our host, we only change
293 			 * the destination address in the IP header.
294 			 * Otherwise we need to save and change the source.
295 			 */
296 			if (inp->ip_src.s_addr == sc->g_src.s_addr) {
297 				msiz = MOB_H_SIZ_S;
298 			} else {
299 				mob_h.proto |= MOB_H_SBIT;
300 				mob_h.osrc = inp->ip_src.s_addr;
301 				inp->ip_src.s_addr = sc->g_src.s_addr;
302 				msiz = MOB_H_SIZ_L;
303 			}
304 
305 			HTONS(mob_h.proto);
306 			mob_h.hcrc = gre_in_cksum((u_int16_t *) &mob_h, msiz);
307 
308 			/* Squeeze in the mobility header */
309 			if ((m->m_data - msiz) < m->m_pktdat) {
310 				/* Need new mbuf */
311 				MGETHDR(m0, M_DONTWAIT, MT_HEADER);
312 				if (m0 == NULL) {
313 					IF_DROP(&ifp->if_snd);
314 					m_freem(m);
315 					error = ENOBUFS;
316 					goto end;
317 				}
318 				M_MOVE_HDR(m0, m);
319 
320 				m0->m_len = msiz + (inp->ip_hl << 2);
321 				m0->m_data += max_linkhdr;
322 				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
323 				m->m_data += inp->ip_hl << 2;
324 				m->m_len -= inp->ip_hl << 2;
325 
326 				bcopy((caddr_t) inp, mtod(m0, caddr_t),
327 				    sizeof(struct ip));
328 
329 				m0->m_next = m;
330 				m = m0;
331 			} else {  /* we have some space left in the old one */
332 				m->m_data -= msiz;
333 				m->m_len += msiz;
334 				m->m_pkthdr.len += msiz;
335 				bcopy(inp, mtod(m, caddr_t),
336 				    inp->ip_hl << 2);
337 			}
338 
339 			/* Copy Mobility header */
340 			inp = mtod(m, struct ip *);
341 			bcopy(&mob_h, (caddr_t)(inp + 1), (unsigned) msiz);
342 			inp->ip_len = htons(ntohs(inp->ip_len) + msiz);
343 		} else {  /* AF_INET */
344 			IF_DROP(&ifp->if_snd);
345 			m_freem(m);
346 			error = EINVAL;
347 			goto end;
348 		}
349 	} else if (sc->g_proto == IPPROTO_GRE) {
350 		if (gre_allow == 0) {
351 			IF_DROP(&ifp->if_snd);
352 			m_freem(m);
353 			error = EACCES;
354 			goto end;
355 		}
356 
357 		switch(dst->sa_family) {
358 		case AF_INET:
359 			if (m->m_len < sizeof(struct ip)) {
360 				m = m_pullup(m, sizeof(struct ip));
361 				if (m == NULL) {
362 					IF_DROP(&ifp->if_snd);
363 					error = ENOBUFS;
364 					goto end;
365 				}
366 			}
367 
368 			inp = mtod(m, struct ip *);
369 			ip_tos = inp->ip_tos;
370 			etype = ETHERTYPE_IP;
371 			break;
372 #ifdef INET6
373 		case AF_INET6:
374 			etype = ETHERTYPE_IPV6;
375 			break;
376 #endif
377 #ifdef MPLS
378 		case AF_MPLS:
379 			if (m->m_flags & (M_BCAST | M_MCAST))
380 				etype = ETHERTYPE_MPLS_MCAST;
381 			else
382 				etype = ETHERTYPE_MPLS;
383 			break;
384 #endif
385 		default:
386 			IF_DROP(&ifp->if_snd);
387 			m_freem(m);
388 			error = EAFNOSUPPORT;
389 			goto end;
390 		}
391 
392 		M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
393 	} else {
394 		IF_DROP(&ifp->if_snd);
395 		m_freem(m);
396 		error = EINVAL;
397 		goto end;
398 	}
399 
400 	if (m == NULL) {
401 		IF_DROP(&ifp->if_snd);
402 		error = ENOBUFS;
403 		goto end;
404 	}
405 
406 	gh = mtod(m, struct greip *);
407 	if (sc->g_proto == IPPROTO_GRE) {
408 		/* We don't support any GRE flags for now */
409 
410 		bzero((void *) &gh->gi_g, sizeof(struct gre_h));
411 		gh->gi_ptype = htons(etype);
412 	}
413 
414 	gh->gi_pr = sc->g_proto;
415 	if (sc->g_proto != IPPROTO_MOBILE) {
416 		gh->gi_src = sc->g_src;
417 		gh->gi_dst = sc->g_dst;
418 		((struct ip *) gh)->ip_hl = (sizeof(struct ip)) >> 2;
419 		((struct ip *) gh)->ip_ttl = ip_defttl;
420 		((struct ip *) gh)->ip_tos = ip_tos;
421 		gh->gi_len = htons(m->m_pkthdr.len);
422 	}
423 
424 	ifp->if_opackets++;
425 	ifp->if_obytes += m->m_pkthdr.len;
426 
427 
428 	m->m_pkthdr.rdomain = sc->g_rtableid;
429 
430 #if NPF > 0
431 	pf_pkt_addr_changed(m);
432 #endif
433 
434 	/* Send it off */
435 	error = ip_output(m, (void *)NULL, &sc->route, 0, (void *)NULL, (void *)NULL);
436   end:
437 	if (error)
438 		ifp->if_oerrors++;
439 	return (error);
440 }
441 
442 int
443 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
444 {
445 
446 	struct ifreq *ifr = (struct ifreq *) data;
447 	struct if_laddrreq *lifr = (struct if_laddrreq *)data;
448 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
449 	struct gre_softc *sc = ifp->if_softc;
450 	int s;
451 	struct sockaddr_in si;
452 	struct sockaddr *sa = NULL;
453 	int error = 0;
454 	struct proc *prc = curproc;		/* XXX */
455 
456 	s = splnet();
457 	switch(cmd) {
458 	case SIOCSIFADDR:
459 		ifp->if_flags |= IFF_UP;
460 		break;
461 	case SIOCSIFDSTADDR:
462 		break;
463 	case SIOCSIFFLAGS:
464 		if ((ifr->ifr_flags & IFF_LINK0) != 0)
465 			sc->g_proto = IPPROTO_GRE;
466 		else
467 			sc->g_proto = IPPROTO_MOBILE;
468 		break;
469 	case SIOCSIFMTU:
470 		if (ifr->ifr_mtu < 576) {
471 			error = EINVAL;
472 			break;
473 		}
474 		ifp->if_mtu = ifr->ifr_mtu;
475 		break;
476 	case SIOCGIFMTU:
477 		ifr->ifr_mtu = sc->sc_if.if_mtu;
478 		break;
479 	case SIOCADDMULTI:
480 	case SIOCDELMULTI:
481 		if (ifr == 0) {
482 			error = EAFNOSUPPORT;
483 			break;
484 		}
485 		switch (ifr->ifr_addr.sa_family) {
486 #ifdef INET
487 		case AF_INET:
488 			break;
489 #endif
490 #ifdef INET6
491 		case AF_INET6:
492 			break;
493 #endif
494 		default:
495 			error = EAFNOSUPPORT;
496 			break;
497 		}
498 		break;
499 	case GRESPROTO:
500 		/* Check for superuser */
501 		if ((error = suser(prc, 0)) != 0)
502 			break;
503 
504 		sc->g_proto = ifr->ifr_flags;
505 		switch (sc->g_proto) {
506 		case IPPROTO_GRE:
507 			ifp->if_flags |= IFF_LINK0;
508 			break;
509 		case IPPROTO_MOBILE:
510 			ifp->if_flags &= ~IFF_LINK0;
511 			break;
512 		default:
513 			error = EPROTONOSUPPORT;
514 			break;
515 		}
516 		break;
517 	case GREGPROTO:
518 		ifr->ifr_flags = sc->g_proto;
519 		break;
520 	case GRESADDRS:
521 	case GRESADDRD:
522 		/* Check for superuser */
523 		if ((error = suser(prc, 0)) != 0)
524 			break;
525 
526 		/*
527 		 * set tunnel endpoints and mark if as up
528 		 */
529 		sa = &ifr->ifr_addr;
530 		if (cmd == GRESADDRS )
531 			sc->g_src = (satosin(sa))->sin_addr;
532 		if (cmd == GRESADDRD )
533 			sc->g_dst = (satosin(sa))->sin_addr;
534 recompute:
535 		if ((sc->g_src.s_addr != INADDR_ANY) &&
536 		    (sc->g_dst.s_addr != INADDR_ANY)) {
537 			if (sc->route.ro_rt != 0)
538 				RTFREE(sc->route.ro_rt);
539 			/* ip_output() will do the lookup */
540 			bzero(&sc->route, sizeof(sc->route));
541 			ifp->if_flags |= IFF_UP;
542 		}
543 		break;
544 	case GREGADDRS:
545 		bzero(&si, sizeof(si));
546 		si.sin_family = AF_INET;
547 		si.sin_len = sizeof(struct sockaddr_in);
548 		si.sin_addr.s_addr = sc->g_src.s_addr;
549 		sa = sintosa(&si);
550 		ifr->ifr_addr = *sa;
551 		break;
552 	case GREGADDRD:
553 		bzero(&si, sizeof(si));
554 		si.sin_family = AF_INET;
555 		si.sin_len = sizeof(struct sockaddr_in);
556 		si.sin_addr.s_addr = sc->g_dst.s_addr;
557 		sa = sintosa(&si);
558 		ifr->ifr_addr = *sa;
559 		break;
560 	case SIOCSETKALIVE:
561 		if ((error = suser(prc, 0)) != 0)
562 			break;
563 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
564 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256) {
565 			error = EINVAL;
566 			break;
567 		}
568 		sc->sc_ka_timout = ikar->ikar_timeo;
569 		sc->sc_ka_cnt = ikar->ikar_cnt;
570 		if (sc->sc_ka_timout == 0 || sc->sc_ka_cnt == 0) {
571 			sc->sc_ka_timout = 0;
572 			sc->sc_ka_cnt = 0;
573 			sc->sc_ka_state = GRE_STATE_UKNWN;
574 			gre_link_state(sc);
575 			break;
576 		}
577 		if (!timeout_pending(&sc->sc_ka_snd)) {
578 			sc->sc_ka_holdmax = sc->sc_ka_cnt;
579 			timeout_add(&sc->sc_ka_snd, 1);
580 			timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout *
581 			    sc->sc_ka_cnt);
582 		}
583 		break;
584 	case SIOCGETKALIVE:
585 		ikar->ikar_timeo = sc->sc_ka_timout;
586 		ikar->ikar_cnt = sc->sc_ka_cnt;
587 		break;
588 	case SIOCSLIFPHYADDR:
589 		if ((error = suser(prc, 0)) != 0)
590 			break;
591 		if (lifr->addr.ss_family != AF_INET ||
592 		    lifr->dstaddr.ss_family != AF_INET) {
593 			error = EAFNOSUPPORT;
594 			break;
595 		}
596 		if (lifr->addr.ss_len != sizeof(si) ||
597 		    lifr->dstaddr.ss_len != sizeof(si)) {
598 			error = EINVAL;
599 			break;
600 		}
601 		sc->g_src = (satosin((struct sockadrr *)&lifr->addr))->sin_addr;
602 		sc->g_dst =
603 		    (satosin((struct sockadrr *)&lifr->dstaddr))->sin_addr;
604 		goto recompute;
605 	case SIOCDIFPHYADDR:
606 		if ((error = suser(prc, 0)) != 0)
607 			break;
608 		sc->g_src.s_addr = INADDR_ANY;
609 		sc->g_dst.s_addr = INADDR_ANY;
610 		break;
611 	case SIOCGLIFPHYADDR:
612 		if (sc->g_src.s_addr == INADDR_ANY ||
613 		    sc->g_dst.s_addr == INADDR_ANY) {
614 			error = EADDRNOTAVAIL;
615 			break;
616 		}
617 		bzero(&si, sizeof(si));
618 		si.sin_family = AF_INET;
619 		si.sin_len = sizeof(struct sockaddr_in);
620 		si.sin_addr.s_addr = sc->g_src.s_addr;
621 		memcpy(&lifr->addr, &si, sizeof(si));
622 		si.sin_addr.s_addr = sc->g_dst.s_addr;
623 		memcpy(&lifr->dstaddr, &si, sizeof(si));
624 		break;
625 	case SIOCSLIFPHYRTABLE:
626 		if ((error = suser(prc, 0)) != 0)
627 			break;
628 		if (ifr->ifr_rdomainid < 0 ||
629 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
630 		    !rtable_exists(ifr->ifr_rdomainid)) {
631 			error = EINVAL;
632 			break;
633 		}
634 		sc->g_rtableid = ifr->ifr_rdomainid;
635 		goto recompute;
636 	case SIOCGLIFPHYRTABLE:
637 		ifr->ifr_rdomainid = sc->g_rtableid;
638 		break;
639 	default:
640 		error = ENOTTY;
641 	}
642 
643 	splx(s);
644 	return (error);
645 }
646 
647 /*
648  * do a checksum of a buffer - much like in_cksum, which operates on
649  * mbufs.
650  */
651 u_int16_t
652 gre_in_cksum(u_int16_t *p, u_int len)
653 {
654 	u_int32_t sum = 0;
655 	int nwords = len >> 1;
656 
657 	while (nwords-- != 0)
658 		sum += *p++;
659 
660 	if (len & 1) {
661 		union {
662 			u_short w;
663 			u_char c[2];
664 		} u;
665 		u.c[0] = *(u_char *) p;
666 		u.c[1] = 0;
667 		sum += u.w;
668 	}
669 
670 	/* end-around-carry */
671 	sum = (sum >> 16) + (sum & 0xffff);
672 	sum += (sum >> 16);
673 	return (~sum);
674 }
675 
676 void
677 gre_keepalive(void *arg)
678 {
679 	struct gre_softc *sc = arg;
680 
681 	if (!sc->sc_ka_timout)
682 		return;
683 
684 	sc->sc_ka_state = GRE_STATE_DOWN;
685 	gre_link_state(sc);
686 }
687 
688 void
689 gre_send_keepalive(void *arg)
690 {
691 	struct gre_softc *sc = arg;
692 	struct mbuf *m;
693 	struct ip *ip;
694 	struct gre_h *gh;
695 	struct sockaddr dst;
696 	int s;
697 
698 	if (sc->sc_ka_timout)
699 		timeout_add_sec(&sc->sc_ka_snd, sc->sc_ka_timout);
700 
701 	if (sc->g_proto != IPPROTO_GRE)
702 		return;
703 	if ((sc->sc_if.if_flags & IFF_UP) == 0 ||
704 	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY)
705 		return;
706 
707 	MGETHDR(m, M_DONTWAIT, MT_DATA);
708 	if (m == NULL) {
709 		sc->sc_if.if_oerrors++;
710 		return;
711 	}
712 
713 	m->m_len = m->m_pkthdr.len = sizeof(*ip) + sizeof(*gh);
714 	MH_ALIGN(m, m->m_len);
715 
716 	/* build the ip header */
717 	ip = mtod(m, struct ip *);
718 
719 	ip->ip_v = IPVERSION;
720 	ip->ip_hl = sizeof(*ip) >> 2;
721 	ip->ip_tos = IPTOS_LOWDELAY;
722 	ip->ip_len = htons(m->m_pkthdr.len);
723 	ip->ip_id = htons(ip_randomid());
724 	ip->ip_off = htons(IP_DF);
725 	ip->ip_ttl = ip_defttl;
726 	ip->ip_p = IPPROTO_GRE;
727 	ip->ip_src.s_addr = sc->g_dst.s_addr;
728 	ip->ip_dst.s_addr = sc->g_src.s_addr;
729 	ip->ip_sum = 0;
730 	ip->ip_sum = in_cksum(m, sizeof(*ip));
731 
732 	gh = (struct gre_h *)(ip + 1);
733 	/* We don't support any GRE flags for now */
734 	bzero(gh, sizeof(*gh));
735 
736 	bzero(&dst, sizeof(dst));
737 	dst.sa_family = AF_INET;
738 
739 	s = splsoftnet();
740 	/* should we care about the error? */
741 	gre_output(&sc->sc_if, m, &dst, NULL);
742 	splx(s);
743 }
744 
745 void
746 gre_recv_keepalive(struct gre_softc *sc)
747 {
748 	if (!sc->sc_ka_timout)
749 		return;
750 
751 	/* link state flap dampening */
752 	switch (sc->sc_ka_state) {
753 	case GRE_STATE_UKNWN:
754 	case GRE_STATE_DOWN:
755 		sc->sc_ka_state = GRE_STATE_HOLD;
756 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
757 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
758 		    16 * sc->sc_ka_cnt);
759 		break;
760 	case GRE_STATE_HOLD:
761 		if (--sc->sc_ka_holdcnt < 1) {
762 			sc->sc_ka_state = GRE_STATE_UP;
763 			gre_link_state(sc);
764 		}
765 		break;
766 	case GRE_STATE_UP:
767 		sc->sc_ka_holdmax--;
768 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_cnt);
769 		break;
770 	}
771 
772 	/* rescedule hold timer */
773 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout * sc->sc_ka_cnt);
774 }
775 
776 void
777 gre_link_state(struct gre_softc *sc)
778 {
779 	struct ifnet *ifp = &sc->sc_if;
780 	int link_state = LINK_STATE_UNKNOWN;
781 
782 	if (sc->sc_ka_state == GRE_STATE_UP)
783 		link_state = LINK_STATE_UP;
784 	else if (sc->sc_ka_state != GRE_STATE_UKNWN)
785 		link_state = LINK_STATE_KALIVE_DOWN;
786 
787 	if (ifp->if_link_state != link_state) {
788 		ifp->if_link_state = link_state;
789 		if_link_state_change(ifp);
790 	}
791 }
792 #endif
793