xref: /openbsd-src/sys/net/if_gif.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: if_gif.c,v 1.131 2020/08/21 22:59:27 kn Exp $	*/
2 /*	$KAME: if_gif.c,v 1.43 2001/02/20 08:51:07 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/mbuf.h>
36 #include <sys/socket.h>
37 #include <sys/sockio.h>
38 #include <sys/syslog.h>
39 #include <sys/queue.h>
40 
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_types.h>
44 #include <net/route.h>
45 
46 #include <netinet/in.h>
47 #include <netinet/in_var.h>
48 #include <netinet/ip.h>
49 #include <netinet/ip_var.h>
50 #include <netinet/ip_ipip.h>
51 #include <netinet/ip_ecn.h>
52 
53 #ifdef INET6
54 #include <netinet6/in6_var.h>
55 #include <netinet/ip6.h>
56 #include <netinet6/ip6_var.h>
57 #endif /* INET6 */
58 
59 #include <net/if_gif.h>
60 
61 #include "bpfilter.h"
62 #if NBPFILTER > 0
63 #include <net/bpf.h>
64 #endif
65 
66 #ifdef MPLS
67 #include <netmpls/mpls.h>
68 #endif
69 
70 #include "pf.h"
71 #if NPF > 0
72 #include <net/pfvar.h>
73 #endif
74 
75 #define GIF_MTU		(1280)	/* Default MTU */
76 #define GIF_MTU_MIN	(1280)	/* Minimum MTU */
77 #define GIF_MTU_MAX	(8192)	/* Maximum MTU */
78 
79 union gif_addr {
80 	struct in6_addr		in6;
81 	struct in_addr		in4;
82 };
83 
84 struct gif_tunnel {
85 	TAILQ_ENTRY(gif_tunnel)	t_entry;
86 
87 	union gif_addr		t_src;
88 #define t_src4		t_src.in4
89 #define t_src6		t_src.in6
90 	union gif_addr		t_dst;
91 #define t_dst4		t_dst.in4
92 #define t_dst6		t_dst.in6
93 	u_int			t_rtableid;
94 
95 	sa_family_t		t_af;
96 };
97 
98 TAILQ_HEAD(gif_list, gif_tunnel);
99 
100 static inline int	gif_cmp(const struct gif_tunnel *,
101 			    const struct gif_tunnel *);
102 
103 struct gif_softc {
104 	struct gif_tunnel	sc_tunnel; /* must be first */
105 	struct ifnet		sc_if;
106 	uint16_t		sc_df;
107 	int			sc_ttl;
108 	int			sc_txhprio;
109 	int			sc_rxhprio;
110 	int			sc_ecn;
111 };
112 
113 struct gif_list gif_list = TAILQ_HEAD_INITIALIZER(gif_list);
114 
115 void	gifattach(int);
116 int	gif_clone_create(struct if_clone *, int);
117 int	gif_clone_destroy(struct ifnet *);
118 
119 void	gif_start(struct ifnet *);
120 int	gif_ioctl(struct ifnet *, u_long, caddr_t);
121 int	gif_output(struct ifnet *, struct mbuf *, struct sockaddr *,
122 	    struct rtentry *);
123 int	gif_send(struct gif_softc *, struct mbuf *, uint8_t, uint8_t, uint8_t);
124 
125 int	gif_up(struct gif_softc *);
126 int	gif_down(struct gif_softc *);
127 int	gif_set_tunnel(struct gif_softc *, struct if_laddrreq *);
128 int	gif_get_tunnel(struct gif_softc *, struct if_laddrreq *);
129 int	gif_del_tunnel(struct gif_softc *);
130 int	in_gif_output(struct ifnet *, int, struct mbuf **);
131 int	in6_gif_output(struct ifnet *, int, struct mbuf **);
132 int	gif_input(struct gif_tunnel *, struct mbuf **, int *, int, int,
133 	    uint8_t);
134 
135 /*
136  * gif global variable definitions
137  */
138 struct if_clone gif_cloner =
139     IF_CLONE_INITIALIZER("gif", gif_clone_create, gif_clone_destroy);
140 
141 void
142 gifattach(int count)
143 {
144 	if_clone_attach(&gif_cloner);
145 }
146 
147 int
148 gif_clone_create(struct if_clone *ifc, int unit)
149 {
150 	struct gif_softc *sc;
151 	struct ifnet *ifp;
152 
153 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
154 	ifp = &sc->sc_if;
155 
156 	sc->sc_df = htons(0);
157 	sc->sc_ttl = ip_defttl;
158 	sc->sc_txhprio = IF_HDRPRIO_PAYLOAD;
159 	sc->sc_rxhprio = IF_HDRPRIO_PAYLOAD;
160 	sc->sc_ecn = ECN_ALLOWED;
161 
162 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
163 	    "%s%d", ifc->ifc_name, unit);
164 
165 	ifp->if_mtu    = GIF_MTU;
166 	ifp->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
167 	ifp->if_xflags = IFXF_CLONED;
168 	ifp->if_ioctl  = gif_ioctl;
169 	ifp->if_start  = gif_start;
170 	ifp->if_output = gif_output;
171 	ifp->if_rtrequest = p2p_rtrequest;
172 	ifp->if_type   = IFT_GIF;
173 	ifp->if_softc = sc;
174 
175 	if_attach(ifp);
176 	if_alloc_sadl(ifp);
177 
178 #if NBPFILTER > 0
179 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
180 #endif
181 
182 	NET_LOCK();
183 	TAILQ_INSERT_TAIL(&gif_list, &sc->sc_tunnel, t_entry);
184 	NET_UNLOCK();
185 
186 	return (0);
187 }
188 
189 int
190 gif_clone_destroy(struct ifnet *ifp)
191 {
192 	struct gif_softc *sc = ifp->if_softc;
193 
194 	NET_LOCK();
195 	if (ISSET(ifp->if_flags, IFF_RUNNING))
196 		gif_down(sc);
197 
198 	TAILQ_REMOVE(&gif_list, &sc->sc_tunnel, t_entry);
199 	NET_UNLOCK();
200 
201 	if_detach(ifp);
202 
203 	free(sc, M_DEVBUF, sizeof(*sc));
204 
205 	return (0);
206 }
207 
208 void
209 gif_start(struct ifnet *ifp)
210 {
211 	struct gif_softc *sc = ifp->if_softc;
212 	struct mbuf *m;
213 #if NBPFILTER > 0
214 	caddr_t if_bpf;
215 #endif
216 	uint8_t proto, ttl, tos;
217 	int ttloff, tttl;
218 
219 	tttl = sc->sc_ttl;
220 
221 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
222 #if NBPFILTER > 0
223 		if_bpf = ifp->if_bpf;
224 		if (if_bpf) {
225 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m,
226 			    BPF_DIRECTION_OUT);
227 		}
228 #endif
229 
230 		switch (m->m_pkthdr.ph_family) {
231 		case AF_INET: {
232 			struct ip *ip;
233 
234 			m = m_pullup(m, sizeof(*ip));
235 			if (m == NULL)
236 				continue;
237 
238 			ip = mtod(m, struct ip *);
239 			tos = ip->ip_tos;
240 
241 			ttloff = offsetof(struct ip, ip_ttl);
242 			proto = IPPROTO_IPV4;
243 			break;
244 		}
245 #ifdef INET6
246 		case AF_INET6: {
247 			struct ip6_hdr *ip6;
248 
249 			m = m_pullup(m, sizeof(*ip6));
250 			if (m == NULL)
251 				continue;
252 
253 			ip6 = mtod(m, struct ip6_hdr *);
254 			tos = ntohl(ip6->ip6_flow >> 20);
255 
256 			ttloff = offsetof(struct ip6_hdr, ip6_hlim);
257 			proto = IPPROTO_IPV6;
258 			break;
259 		}
260 #endif
261 #ifdef MPLS
262 		case AF_MPLS: {
263 			uint32_t shim;
264 
265 			m = m_pullup(m, sizeof(shim));
266 			if (m == NULL)
267 				continue;
268 
269 			shim = *mtod(m, uint32_t *) & MPLS_EXP_MASK;
270 			tos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
271 
272 			ttloff = 3;
273 
274 			proto = IPPROTO_MPLS;
275 			break;
276 		}
277 #endif
278 		default:
279 			unhandled_af(m->m_pkthdr.ph_family);
280 		}
281 
282 		if (tttl == -1) {
283 			KASSERT(m->m_len > ttloff);
284 
285 			ttl = *(m->m_data + ttloff);
286 		} else
287 			ttl = tttl;
288 
289 		switch (sc->sc_txhprio) {
290 		case IF_HDRPRIO_PAYLOAD:
291 			/* tos is already set */
292 			break;
293 		case IF_HDRPRIO_PACKET:
294 			tos = IFQ_PRIO2TOS(m->m_pkthdr.pf.prio);
295 			break;
296 		default:
297 			tos = IFQ_PRIO2TOS(sc->sc_txhprio);
298 			break;
299 		}
300 
301 		gif_send(sc, m, proto, ttl, tos);
302 	}
303 }
304 
305 int
306 gif_send(struct gif_softc *sc, struct mbuf *m,
307     uint8_t proto, uint8_t ttl, uint8_t itos)
308 {
309 	uint8_t otos;
310 
311 	m->m_flags &= ~(M_BCAST|M_MCAST);
312 	m->m_pkthdr.ph_rtableid = sc->sc_tunnel.t_rtableid;
313 
314 #if NPF > 0
315 	pf_pkt_addr_changed(m);
316 #endif
317 
318 	ip_ecn_ingress(sc->sc_ecn, &otos, &itos);
319 
320 	switch (sc->sc_tunnel.t_af) {
321 	case AF_INET: {
322 		struct ip *ip;
323 
324 		if (in_nullhost(sc->sc_tunnel.t_dst4))
325 			goto drop;
326 
327 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
328 		if (m == NULL)
329 			return (-1);
330 
331 		ip = mtod(m, struct ip *);
332 		ip->ip_off = sc->sc_df;
333 		ip->ip_tos = otos;
334 		ip->ip_len = htons(m->m_pkthdr.len);
335 		ip->ip_ttl = ttl;
336 		ip->ip_p = proto;
337 		ip->ip_src = sc->sc_tunnel.t_src4;
338 		ip->ip_dst = sc->sc_tunnel.t_dst4;
339 
340 		ip_send(m);
341 		break;
342 	}
343 #ifdef INET6
344 	case AF_INET6: {
345 		struct ip6_hdr *ip6;
346 		int len = m->m_pkthdr.len;
347 		uint32_t flow;
348 
349 		if (IN6_IS_ADDR_UNSPECIFIED(&sc->sc_tunnel.t_dst6))
350 			goto drop;
351 
352 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
353 		if (m == NULL)
354 			return (-1);
355 
356 		flow = otos << 20;
357 		if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
358 			flow |= m->m_pkthdr.ph_flowid;
359 
360 		ip6 = mtod(m, struct ip6_hdr *);
361 		ip6->ip6_flow = htonl(flow);
362 		ip6->ip6_vfc |= IPV6_VERSION;
363 		ip6->ip6_plen = htons(len);
364 		ip6->ip6_nxt = proto;
365 		ip6->ip6_hlim = ttl;
366 		ip6->ip6_src = sc->sc_tunnel.t_src6;
367 		ip6->ip6_dst = sc->sc_tunnel.t_dst6;
368 
369 		if (sc->sc_df)
370 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
371 
372 		ip6_send(m);
373 		break;
374 	}
375 #endif
376 	default:
377 		m_freem(m);
378 		break;
379 	}
380 
381 	return (0);
382 
383 drop:
384 	m_freem(m);
385 	return (0);
386 }
387 
388 int
389 gif_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
390     struct rtentry *rt)
391 {
392 	struct m_tag *mtag;
393 	int error = 0;
394 
395 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
396 		error = ENETDOWN;
397 		goto drop;
398 	}
399 
400 	switch (dst->sa_family) {
401 	case AF_INET:
402 #ifdef INET6
403 	case AF_INET6:
404 #endif
405 #ifdef MPLS
406 	case AF_MPLS:
407 #endif
408 		break;
409 	default:
410 		error = EAFNOSUPPORT;
411 		goto drop;
412 	}
413 
414 	/* Try to limit infinite recursion through misconfiguration. */
415 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
416 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
417 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
418 		    sizeof(ifp->if_index)) == 0) {
419 			error = EIO;
420 			goto drop;
421 		}
422 	}
423 
424 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
425 	if (mtag == NULL) {
426 		error = ENOBUFS;
427 		goto drop;
428 	}
429 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
430 	m_tag_prepend(m, mtag);
431 
432 	m->m_pkthdr.ph_family = dst->sa_family;
433 
434 	error = if_enqueue(ifp, m);
435 
436 	if (error)
437 		ifp->if_oerrors++;
438 	return (error);
439 
440 drop:
441 	m_freem(m);
442 	return (error);
443 }
444 
445 int
446 gif_up(struct gif_softc *sc)
447 {
448 	NET_ASSERT_LOCKED();
449 
450 	SET(sc->sc_if.if_flags, IFF_RUNNING);
451 
452 	return (0);
453 }
454 
455 int
456 gif_down(struct gif_softc *sc)
457 {
458 	NET_ASSERT_LOCKED();
459 
460 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
461 
462 	/* barrier? */
463 
464 	return (0);
465 }
466 
467 int
468 gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
469 {
470 	struct gif_softc *sc = ifp->if_softc;
471 	struct ifreq *ifr = (struct ifreq *)data;
472 	int error = 0;
473 
474 	switch (cmd) {
475 	case SIOCSIFADDR:
476 		SET(ifp->if_flags, IFF_UP);
477 		/* FALLTHROUGH */
478 	case SIOCSIFFLAGS:
479 		if (ISSET(ifp->if_flags, IFF_UP)) {
480 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
481 				error = gif_up(sc);
482 			else
483 				error = 0;
484 		} else {
485 			if (ISSET(ifp->if_flags, IFF_RUNNING))
486 				error = gif_down(sc);
487 		}
488 		break;
489 
490 	case SIOCADDMULTI:
491 	case SIOCDELMULTI:
492 		break;
493 
494 	case SIOCSLIFPHYADDR:
495 		error = gif_set_tunnel(sc, (struct if_laddrreq *)data);
496 		break;
497 	case SIOCGLIFPHYADDR:
498 		error = gif_get_tunnel(sc, (struct if_laddrreq *)data);
499 		break;
500 	case SIOCDIFPHYADDR:
501 		error = gif_del_tunnel(sc);
502 		break;
503 
504 	case SIOCSIFMTU:
505 		if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) {
506 			error = EINVAL;
507 			break;
508 		}
509 
510 		ifp->if_mtu = ifr->ifr_mtu;
511 		break;
512 
513 	case SIOCSLIFPHYRTABLE:
514 		if (ifr->ifr_rdomainid < 0 ||
515 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
516 		    !rtable_exists(ifr->ifr_rdomainid)) {
517 			error = EINVAL;
518 			break;
519 		}
520 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
521 		break;
522 	case SIOCGLIFPHYRTABLE:
523 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
524 		break;
525 
526 	case SIOCSLIFPHYTTL:
527 		if (ifr->ifr_ttl != -1 &&
528 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
529 			error = EINVAL;
530 			break;
531 		}
532 
533 		/* commit */
534 		sc->sc_ttl = ifr->ifr_ttl;
535 		break;
536 	case SIOCGLIFPHYTTL:
537 		ifr->ifr_ttl = sc->sc_ttl;
538 		break;
539 
540 	case SIOCSLIFPHYDF:
541 		/* commit */
542 		sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
543 		break;
544 	case SIOCGLIFPHYDF:
545 		ifr->ifr_df = sc->sc_df ? 1 : 0;
546 		break;
547 
548 	case SIOCSLIFPHYECN:
549 		sc->sc_ecn = ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
550 		break;
551 	case SIOCGLIFPHYECN:
552 		ifr->ifr_metric = (sc->sc_ecn == ECN_ALLOWED);
553 		break;
554 
555 	case SIOCSTXHPRIO:
556 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
557 		if (error != 0)
558 			break;
559 
560 		sc->sc_txhprio = ifr->ifr_hdrprio;
561 		break;
562 	case SIOCGTXHPRIO:
563 		ifr->ifr_hdrprio = sc->sc_txhprio;
564 		break;
565 
566 	case SIOCSRXHPRIO:
567 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
568 		if (error != 0)
569 			break;
570 
571 		sc->sc_rxhprio = ifr->ifr_hdrprio;
572 		break;
573 	case SIOCGRXHPRIO:
574 		ifr->ifr_hdrprio = sc->sc_rxhprio;
575 		break;
576 
577 	default:
578 		error = ENOTTY;
579 		break;
580 	}
581 
582 	return (error);
583 }
584 
585 int
586 gif_get_tunnel(struct gif_softc *sc, struct if_laddrreq *req)
587 {
588 	struct gif_tunnel *tunnel = &sc->sc_tunnel;
589 	struct sockaddr *src = (struct sockaddr *)&req->addr;
590 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
591 	struct sockaddr_in *sin;
592 #ifdef INET6 /* ifconfig already embeds the scopeid */
593 	struct sockaddr_in6 *sin6;
594 #endif
595 
596 	switch (tunnel->t_af) {
597 	case AF_UNSPEC:
598 		return (EADDRNOTAVAIL);
599 	case AF_INET:
600 		sin = (struct sockaddr_in *)src;
601 		memset(sin, 0, sizeof(*sin));
602 		sin->sin_family = AF_INET;
603 		sin->sin_len = sizeof(*sin);
604 		sin->sin_addr = tunnel->t_src4;
605 
606 		sin = (struct sockaddr_in *)dst;
607 		memset(sin, 0, sizeof(*sin));
608 		sin->sin_family = AF_INET;
609 		sin->sin_len = sizeof(*sin);
610 		sin->sin_addr = tunnel->t_dst4;
611 
612 		break;
613 
614 #ifdef INET6
615 	case AF_INET6:
616 		sin6 = (struct sockaddr_in6 *)src;
617 		memset(sin6, 0, sizeof(*sin6));
618 		sin6->sin6_family = AF_INET6;
619 		sin6->sin6_len = sizeof(*sin6);
620 		in6_recoverscope(sin6, &tunnel->t_src6);
621 
622 		sin6 = (struct sockaddr_in6 *)dst;
623 		memset(sin6, 0, sizeof(*sin6));
624 		sin6->sin6_family = AF_INET6;
625 		sin6->sin6_len = sizeof(*sin6);
626 		in6_recoverscope(sin6, &tunnel->t_dst6);
627 
628 		break;
629 #endif
630 	default:
631 		return (EAFNOSUPPORT);
632 	}
633 
634 	return (0);
635 }
636 
637 int
638 gif_set_tunnel(struct gif_softc *sc, struct if_laddrreq *req)
639 {
640 	struct gif_tunnel *tunnel = &sc->sc_tunnel;
641 	struct sockaddr *src = (struct sockaddr *)&req->addr;
642 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
643 	struct sockaddr_in *src4, *dst4;
644 #ifdef INET6
645 	struct sockaddr_in6 *src6, *dst6;
646 	int error;
647 #endif
648 
649 	/* sa_family and sa_len must be equal */
650 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
651 		return (EINVAL);
652 
653 	/* validate */
654 	switch (dst->sa_family) {
655 	case AF_INET:
656 		if (dst->sa_len != sizeof(*dst4))
657 			return (EINVAL);
658 
659 		src4 = (struct sockaddr_in *)src;
660 		if (in_nullhost(src4->sin_addr) ||
661 		    IN_MULTICAST(src4->sin_addr.s_addr))
662 			return (EINVAL);
663 
664 		dst4 = (struct sockaddr_in *)dst;
665 		/* dst4 can be 0.0.0.0 */
666 		if (IN_MULTICAST(dst4->sin_addr.s_addr))
667 			return (EINVAL);
668 
669 		tunnel->t_src4 = src4->sin_addr;
670 		tunnel->t_dst4 = dst4->sin_addr;
671 
672 		break;
673 #ifdef INET6
674 	case AF_INET6:
675 		if (dst->sa_len != sizeof(*dst6))
676 			return (EINVAL);
677 
678 		src6 = (struct sockaddr_in6 *)src;
679 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
680 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
681 			return (EINVAL);
682 
683 		dst6 = (struct sockaddr_in6 *)dst;
684 		if (IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
685 			return (EINVAL);
686 
687 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
688 		if (error != 0)
689 			return (error);
690 
691 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
692 		if (error != 0)
693 			return (error);
694 
695 		break;
696 #endif
697 	default:
698 		return (EAFNOSUPPORT);
699 	}
700 
701 	/* commit */
702 	tunnel->t_af = dst->sa_family;
703 
704 	return (0);
705 }
706 
707 int
708 gif_del_tunnel(struct gif_softc *sc)
709 {
710 	/* commit */
711 	sc->sc_tunnel.t_af = AF_UNSPEC;
712 
713 	return (0);
714 }
715 
716 int
717 in_gif_input(struct mbuf **mp, int *offp, int proto, int af)
718 {
719 	struct mbuf *m = *mp;
720 	struct gif_tunnel key;
721 	struct ip *ip;
722 	int rv;
723 
724 	ip = mtod(m, struct ip *);
725 
726 	key.t_af = AF_INET;
727 	key.t_src4 = ip->ip_dst;
728 	key.t_dst4 = ip->ip_src;
729 
730 	rv = gif_input(&key, mp, offp, proto, af, ip->ip_tos);
731 	if (rv == -1)
732 		rv = ipip_input(mp, offp, proto, af);
733 
734 	return (rv);
735 }
736 
737 #ifdef INET6
738 int
739 in6_gif_input(struct mbuf **mp, int *offp, int proto, int af)
740 {
741 	struct mbuf *m = *mp;
742 	struct gif_tunnel key;
743 	struct ip6_hdr *ip6;
744 	uint32_t flow;
745 	int rv;
746 
747 	ip6 = mtod(m, struct ip6_hdr *);
748 
749 	key.t_af = AF_INET6;
750 	key.t_src6 = ip6->ip6_dst;
751 	key.t_dst6 = ip6->ip6_src;
752 
753 	flow = ntohl(ip6->ip6_flow);
754 
755 	rv = gif_input(&key, mp, offp, proto, af, flow >> 20);
756 	if (rv == -1)
757 		rv = ipip_input(mp, offp, proto, af);
758 
759 	return (rv);
760 }
761 #endif /* INET6 */
762 
763 struct gif_softc *
764 gif_find(const struct gif_tunnel *key)
765 {
766 	struct gif_tunnel *t;
767 	struct gif_softc *sc;
768 
769 	TAILQ_FOREACH(t, &gif_list, t_entry) {
770 		if (gif_cmp(key, t) != 0)
771 			continue;
772 
773 		sc = (struct gif_softc *)t;
774 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
775 			continue;
776 
777 		return (sc);
778 	}
779 
780 	return (NULL);
781 }
782 
783 int
784 gif_input(struct gif_tunnel *key, struct mbuf **mp, int *offp, int proto,
785     int af, uint8_t otos)
786 {
787 	struct mbuf *m = *mp;
788 	struct gif_softc *sc;
789 	struct ifnet *ifp;
790 	void (*input)(struct ifnet *, struct mbuf *);
791 	uint8_t itos;
792 	int rxhprio;
793 
794 	/* IP-in-IP header is caused by tunnel mode, so skip gif lookup */
795 	if (m->m_flags & M_TUNNEL) {
796 		m->m_flags &= ~M_TUNNEL;
797 		return (-1);
798 	}
799 
800 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
801 
802 	sc = gif_find(key);
803 	if (sc == NULL) {
804 		memset(&key->t_dst, 0, sizeof(key->t_dst));
805 		sc = gif_find(key);
806 		if (sc == NULL)
807 			return (-1);
808 	}
809 
810 	m_adj(m, *offp); /* this is ours now */
811 
812 	ifp = &sc->sc_if;
813 	rxhprio = sc->sc_rxhprio;
814 
815 	switch (proto) {
816 	case IPPROTO_IPV4: {
817 		struct ip *ip;
818 
819 		m = *mp = m_pullup(m, sizeof(*ip));
820 		if (m == NULL)
821 			return (IPPROTO_DONE);
822 
823 		ip = mtod(m, struct ip *);
824 
825 		itos = ip->ip_tos;
826 		if (ip_ecn_egress(sc->sc_ecn, &otos, &itos) == 0)
827 			goto drop;
828 
829 		if (itos != ip->ip_tos)
830 			ip_tos_patch(ip, itos);
831 
832 		m->m_pkthdr.ph_family = AF_INET;
833 		input = ipv4_input;
834 		break;
835 	}
836 #ifdef INET6
837 	case IPPROTO_IPV6: {
838 		struct ip6_hdr *ip6;
839 
840 		m = *mp = m_pullup(m, sizeof(*ip6));
841 		if (m == NULL)
842 			return (IPPROTO_DONE);
843 
844 		ip6 = mtod(m, struct ip6_hdr *);
845 
846 		itos = ntohl(ip6->ip6_flow) >> 20;
847 		if (!ip_ecn_egress(sc->sc_ecn, &otos, &itos))
848 			goto drop;
849 
850 		CLR(ip6->ip6_flow, htonl(0xff << 20));
851 		SET(ip6->ip6_flow, htonl(itos << 20));
852 
853 		m->m_pkthdr.ph_family = AF_INET6;
854 		input = ipv6_input;
855 		break;
856 	}
857 #endif /* INET6 */
858 #ifdef MPLS
859 	case IPPROTO_MPLS: {
860 		uint32_t shim;
861 		m = *mp = m_pullup(m, sizeof(shim));
862 		if (m == NULL)
863 			return (IPPROTO_DONE);
864 
865 		shim = *mtod(m, uint32_t *) & MPLS_EXP_MASK;
866 		itos = (ntohl(shim) >> MPLS_EXP_OFFSET) << 5;
867 
868 		m->m_pkthdr.ph_family = AF_MPLS;
869 		input = mpls_input;
870 		break;
871 	}
872 #endif /* MPLS */
873 	default:
874 		return (-1);
875 	}
876 
877 	m->m_flags &= ~(M_MCAST|M_BCAST);
878 	m->m_pkthdr.ph_ifidx = ifp->if_index;
879 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
880 
881 	switch (rxhprio) {
882 	case IF_HDRPRIO_PACKET:
883 		/* nop */
884 		break;
885 	case IF_HDRPRIO_PAYLOAD:
886 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
887 		break;
888 	case IF_HDRPRIO_OUTER:
889 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
890 		break;
891 	default:
892 		m->m_pkthdr.pf.prio = rxhprio;
893 		break;
894 	}
895 
896 #if NPF > 0
897 	pf_pkt_addr_changed(m);
898 #endif
899 
900 	ifp->if_ipackets++;
901 	ifp->if_ibytes += m->m_pkthdr.len;
902 
903 #if NBPFILTER > 0
904 	{
905 		caddr_t if_bpf = ifp->if_bpf;
906 		if (if_bpf) {
907 			bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family,
908 			    m, BPF_DIRECTION_IN);
909 		}
910 	}
911 #endif
912 
913 	*mp = NULL;
914 	(*input)(ifp, m);
915 	return (IPPROTO_DONE);
916 
917  drop:
918 	m_freemp(mp);
919 	return (IPPROTO_DONE);
920 }
921 
922 static inline int
923 gif_ip_cmp(int af, const union gif_addr *a, const union gif_addr *b)
924 {
925 	switch (af) {
926 #ifdef INET6
927 	case AF_INET6:
928 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
929 #endif /* INET6 */
930 	case AF_INET:
931 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
932 	default:
933 		panic("%s: unsupported af %d\n", __func__, af);
934 	}
935 
936 	return (0);
937 }
938 
939 
940 static inline int
941 gif_cmp(const struct gif_tunnel *a, const struct gif_tunnel *b)
942 {
943 	int rv;
944 
945 	/* sort by routing table */
946 	if (a->t_rtableid > b->t_rtableid)
947 		return (1);
948 	if (a->t_rtableid < b->t_rtableid)
949 		return (-1);
950 
951 	/* sort by address */
952 	if (a->t_af > b->t_af)
953 		return (1);
954 	if (a->t_af < b->t_af)
955 		return (-1);
956 
957 	rv = gif_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
958 	if (rv != 0)
959 		return (rv);
960 
961 	rv = gif_ip_cmp(a->t_af, &a->t_src, &b->t_src);
962 	if (rv != 0)
963 		return (rv);
964 
965 	return (0);
966 }
967