xref: /netbsd-src/sys/net/if_gif.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*	$NetBSD: if_gif.c,v 1.154 2020/10/14 15:22:17 roy Exp $	*/
2 /*	$KAME: if_gif.c,v 1.76 2001/08/20 02:01:02 kjc Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: if_gif.c,v 1.154 2020/10/14 15:22:17 roy Exp $");
35 
36 #ifdef _KERNEL_OPT
37 #include "opt_inet.h"
38 #include "opt_net_mpsafe.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/atomic.h>
44 #include <sys/kernel.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/time.h>
51 #include <sys/socketvar.h>
52 #include <sys/syslog.h>
53 #include <sys/proc.h>
54 #include <sys/cpu.h>
55 #include <sys/intr.h>
56 #include <sys/kmem.h>
57 #include <sys/sysctl.h>
58 #include <sys/xcall.h>
59 #include <sys/device.h>
60 #include <sys/module.h>
61 #include <sys/mutex.h>
62 #include <sys/pserialize.h>
63 #include <sys/psref.h>
64 
65 #include <net/if.h>
66 #include <net/if_types.h>
67 #include <net/netisr.h>
68 #include <net/route.h>
69 #include <net/bpf.h>
70 
71 #include <netinet/in.h>
72 #include <netinet/in_systm.h>
73 #include <netinet/ip.h>
74 #ifdef	INET
75 #include <netinet/in_var.h>
76 #endif	/* INET */
77 #include <netinet/in_gif.h>
78 
79 #ifdef INET6
80 #ifndef INET
81 #include <netinet/in.h>
82 #endif
83 #include <netinet6/in6_var.h>
84 #include <netinet/ip6.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/in6_gif.h>
87 #endif /* INET6 */
88 
89 #include <netinet/ip_encap.h>
90 #include <net/if_gif.h>
91 
92 #include "ioconf.h"
93 
94 #ifdef NET_MPSAFE
95 #define GIF_MPSAFE	1
96 #endif
97 
98 /*
99  * gif global variable definitions
100  */
101 static struct {
102 	LIST_HEAD(gif_sclist, gif_softc) list;
103 	kmutex_t lock;
104 } gif_softcs __cacheline_aligned;
105 
106 struct psref_class *gv_psref_class __read_mostly;
107 
108 static int	gifattach0(struct gif_softc *);
109 static int	gif_output(struct ifnet *, struct mbuf *,
110 			   const struct sockaddr *, const struct rtentry *);
111 static void	gif_start(struct ifnet *);
112 static int	gif_transmit(struct ifnet *, struct mbuf *);
113 static int	gif_transmit_direct(struct gif_variant *, struct mbuf *);
114 static int	gif_ioctl(struct ifnet *, u_long, void *);
115 static int	gif_set_tunnel(struct ifnet *, struct sockaddr *,
116 			       struct sockaddr *);
117 static void	gif_delete_tunnel(struct ifnet *);
118 
119 static int	gif_clone_create(struct if_clone *, int);
120 static int	gif_clone_destroy(struct ifnet *);
121 static int	gif_check_nesting(struct ifnet *, struct mbuf *);
122 
123 static int	gif_encap_attach(struct gif_variant *);
124 static int	gif_encap_detach(struct gif_variant *);
125 
126 static void	gif_update_variant(struct gif_softc *, struct gif_variant *);
127 
128 static struct if_clone gif_cloner =
129     IF_CLONE_INITIALIZER("gif", gif_clone_create, gif_clone_destroy);
130 
131 #ifndef MAX_GIF_NEST
132 /*
133  * This macro controls the upper limitation on nesting of gif tunnels.
134  * Since, setting a large value to this macro with a careless configuration
135  * may introduce system crash, we don't allow any nestings by default.
136  * If you need to configure nested gif tunnels, you can define this macro
137  * in your kernel configuration file.  However, if you do so, please be
138  * careful to configure the tunnels so that it won't make a loop.
139  */
140 #define MAX_GIF_NEST 1
141 #endif
142 static int max_gif_nesting = MAX_GIF_NEST;
143 
144 static struct sysctllog *gif_sysctl;
145 
146 #ifdef INET6
147 static int
148 sysctl_gif_pmtu_global(SYSCTLFN_ARGS)
149 {
150 	int error, pmtu;
151 	struct sysctlnode node = *rnode;
152 
153 	pmtu = ip6_gif_pmtu;
154 	node.sysctl_data = &pmtu;
155 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
156 	if (error || newp == NULL)
157 		return error;
158 
159 	switch (pmtu) {
160 	case GIF_PMTU_MINMTU:
161 	case GIF_PMTU_OUTERMTU:
162 		ip6_gif_pmtu = pmtu;
163 		break;
164 	default:
165 		return EINVAL;
166 	}
167 
168 	return 0;
169 }
170 
171 static int
172 sysctl_gif_pmtu_perif(SYSCTLFN_ARGS)
173 {
174 	int error, pmtu;
175 	struct sysctlnode node = *rnode;
176 	struct gif_softc *sc = (struct gif_softc *)node.sysctl_data;
177 
178 	pmtu = sc->gif_pmtu;
179 	node.sysctl_data = &pmtu;
180 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
181 	if (error || newp == NULL)
182 		return error;
183 
184 	switch (pmtu) {
185 	case GIF_PMTU_SYSDEFAULT:
186 	case GIF_PMTU_MINMTU:
187 	case GIF_PMTU_OUTERMTU:
188 		sc->gif_pmtu = pmtu;
189 		break;
190 	default:
191 		return EINVAL;
192 	}
193 
194 	return 0;
195 }
196 #endif
197 
198 static void
199 gif_sysctl_setup(void)
200 {
201 	gif_sysctl = NULL;
202 
203 #ifdef INET
204 	/*
205 	 * Previously create "net.inet.ip" entry to avoid sysctl_createv error.
206 	 */
207 	sysctl_createv(NULL, 0, NULL, NULL,
208 		       CTLFLAG_PERMANENT,
209 		       CTLTYPE_NODE, "inet",
210 		       SYSCTL_DESCR("PF_INET related settings"),
211 		       NULL, 0, NULL, 0,
212 		       CTL_NET, PF_INET, CTL_EOL);
213 	sysctl_createv(NULL, 0, NULL, NULL,
214 		       CTLFLAG_PERMANENT,
215 		       CTLTYPE_NODE, "ip",
216 		       SYSCTL_DESCR("IPv4 related settings"),
217 		       NULL, 0, NULL, 0,
218 		       CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);
219 
220 	sysctl_createv(&gif_sysctl, 0, NULL, NULL,
221 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
222 		       CTLTYPE_INT, "gifttl",
223 		       SYSCTL_DESCR("Default TTL for a gif tunnel datagram"),
224 		       NULL, 0, &ip_gif_ttl, 0,
225 		       CTL_NET, PF_INET, IPPROTO_IP,
226 		       IPCTL_GIF_TTL, CTL_EOL);
227 #endif
228 #ifdef INET6
229 	/*
230 	 * Previously create "net.inet6.ip6" entry to avoid sysctl_createv error.
231 	 */
232 	sysctl_createv(NULL, 0, NULL, NULL,
233 		       CTLFLAG_PERMANENT,
234 		       CTLTYPE_NODE, "inet6",
235 		       SYSCTL_DESCR("PF_INET6 related settings"),
236 		       NULL, 0, NULL, 0,
237 		       CTL_NET, PF_INET6, CTL_EOL);
238 	sysctl_createv(NULL, 0, NULL, NULL,
239 		       CTLFLAG_PERMANENT,
240 		       CTLTYPE_NODE, "ip6",
241 		       SYSCTL_DESCR("IPv6 related settings"),
242 		       NULL, 0, NULL, 0,
243 		       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
244 
245 	sysctl_createv(&gif_sysctl, 0, NULL, NULL,
246 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
247 		       CTLTYPE_INT, "gifhlim",
248 		       SYSCTL_DESCR("Default hop limit for a gif tunnel datagram"),
249 		       NULL, 0, &ip6_gif_hlim, 0,
250 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
251 		       IPV6CTL_GIF_HLIM, CTL_EOL);
252 
253 	sysctl_createv(&gif_sysctl, 0, NULL, NULL,
254 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
255 		       CTLTYPE_INT, "gifpmtu",
256 		       SYSCTL_DESCR("Default Path MTU setting for gif tunnels"),
257 		       sysctl_gif_pmtu_global, 0, NULL, 0,
258 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
259 		       IPV6CTL_GIF_PMTU, CTL_EOL);
260 #endif
261 }
262 
263 static void
264 gif_perif_sysctl_setup(struct sysctllog **clog, struct gif_softc *sc)
265 {
266 #ifdef INET6
267 	const struct sysctlnode *cnode, *rnode;
268 	struct ifnet *ifp = &sc->gif_if;
269 	const char *ifname = ifp->if_xname;
270 	int rv;
271 
272 	/*
273 	 * Already created in sysctl_sndq_setup().
274 	 */
275 	sysctl_createv(clog, 0, NULL, &rnode,
276 		       CTLFLAG_PERMANENT,
277 		       CTLTYPE_NODE, "interfaces",
278 		       SYSCTL_DESCR("Per-interface controls"),
279 		       NULL, 0, NULL, 0,
280 		       CTL_NET, CTL_CREATE, CTL_EOL);
281 	sysctl_createv(clog, 0, &rnode, &rnode,
282 		       CTLFLAG_PERMANENT,
283 		       CTLTYPE_NODE, ifname,
284 		       SYSCTL_DESCR("Interface controls"),
285 		       NULL, 0, NULL, 0,
286 		       CTL_CREATE, CTL_EOL);
287 
288 	rv = sysctl_createv(clog, 0, &rnode, &cnode,
289 			    CTLFLAG_PERMANENT,
290 			    CTLTYPE_INT, "pmtu",
291 			    SYSCTL_DESCR("Path MTU setting for this gif tunnel"),
292 			    sysctl_gif_pmtu_perif, 0, (void *)sc, 0,
293 			    CTL_CREATE, CTL_EOL);
294 	if (rv != 0)
295 		log(LOG_WARNING, "%s: could not attach sysctl node pmtu\n", ifname);
296 
297 	sc->gif_pmtu = GIF_PMTU_SYSDEFAULT;
298 #endif
299 }
300 
301 /* ARGSUSED */
302 void
303 gifattach(int count)
304 {
305 	/*
306 	 * Nothing to do here, initialization is handled by the
307 	 * module initialization code in gifinit() below).
308 	 */
309 }
310 
311 static void
312 gifinit(void)
313 {
314 
315 	mutex_init(&gif_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
316 	LIST_INIT(&gif_softcs.list);
317 	if_clone_attach(&gif_cloner);
318 
319 	gv_psref_class = psref_class_create("gifvar", IPL_SOFTNET);
320 
321 	gif_sysctl_setup();
322 }
323 
324 static int
325 gifdetach(void)
326 {
327 
328 	mutex_enter(&gif_softcs.lock);
329 	if (!LIST_EMPTY(&gif_softcs.list)) {
330 		mutex_exit(&gif_softcs.lock);
331 		return EBUSY;
332 	}
333 
334 	psref_class_destroy(gv_psref_class);
335 
336 	if_clone_detach(&gif_cloner);
337 	sysctl_teardown(&gif_sysctl);
338 	mutex_exit(&gif_softcs.lock);
339 	mutex_destroy(&gif_softcs.lock);
340 	return 0;
341 }
342 
343 static int
344 gif_clone_create(struct if_clone *ifc, int unit)
345 {
346 	struct gif_softc *sc;
347 	struct gif_variant *var;
348 	struct ifnet *ifp;
349 	int rv;
350 
351 	sc = kmem_zalloc(sizeof(struct gif_softc), KM_SLEEP);
352 
353 	if_initname(&sc->gif_if, ifc->ifc_name, unit);
354 
355 	rv = gifattach0(sc);
356 	if (rv != 0) {
357 		kmem_free(sc, sizeof(struct gif_softc));
358 		return rv;
359 	}
360 
361 	ifp = &sc->gif_if;
362 	gif_perif_sysctl_setup(&ifp->if_sysctl_log, sc);
363 
364 	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
365 	var->gv_softc = sc;
366 	psref_target_init(&var->gv_psref, gv_psref_class);
367 
368 	sc->gif_var = var;
369 	mutex_init(&sc->gif_lock, MUTEX_DEFAULT, IPL_NONE);
370 	sc->gif_psz = pserialize_create();
371 
372 	sc->gif_ro_percpu = if_tunnel_alloc_ro_percpu();
373 	mutex_enter(&gif_softcs.lock);
374 	LIST_INSERT_HEAD(&gif_softcs.list, sc, gif_list);
375 	mutex_exit(&gif_softcs.lock);
376 	return 0;
377 }
378 
379 static int
380 gifattach0(struct gif_softc *sc)
381 {
382 	int rv;
383 
384 	sc->gif_if.if_addrlen = 0;
385 	sc->gif_if.if_mtu    = GIF_MTU;
386 	sc->gif_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
387 #ifdef GIF_MPSAFE
388 	sc->gif_if.if_extflags  |= IFEF_MPSAFE;
389 #endif
390 	sc->gif_if.if_ioctl  = gif_ioctl;
391 	sc->gif_if.if_output = gif_output;
392 	sc->gif_if.if_start = gif_start;
393 	sc->gif_if.if_transmit = gif_transmit;
394 	sc->gif_if.if_type   = IFT_GIF;
395 	sc->gif_if.if_dlt    = DLT_NULL;
396 	sc->gif_if.if_softc  = sc;
397 	IFQ_SET_READY(&sc->gif_if.if_snd);
398 	rv = if_initialize(&sc->gif_if);
399 	if (rv != 0)
400 		return rv;
401 
402 	sc->gif_if.if_link_state = LINK_STATE_DOWN;
403 	if_alloc_sadl(&sc->gif_if);
404 	bpf_attach(&sc->gif_if, DLT_NULL, sizeof(u_int));
405 	if_register(&sc->gif_if);
406 	return 0;
407 }
408 
409 static int
410 gif_clone_destroy(struct ifnet *ifp)
411 {
412 	struct gif_softc *sc = (void *) ifp;
413 	struct gif_variant *var;
414 
415 	LIST_REMOVE(sc, gif_list);
416 
417 	gif_delete_tunnel(&sc->gif_if);
418 	bpf_detach(ifp);
419 	if_detach(ifp);
420 
421 	if_tunnel_free_ro_percpu(sc->gif_ro_percpu);
422 
423 	pserialize_destroy(sc->gif_psz);
424 	mutex_destroy(&sc->gif_lock);
425 
426 	var = sc->gif_var;
427 	kmem_free(var, sizeof(*var));
428 	kmem_free(sc, sizeof(struct gif_softc));
429 
430 	return 0;
431 }
432 
433 #ifdef GIF_ENCAPCHECK
434 int
435 gif_encapcheck(struct mbuf *m, int off, int proto, void *arg)
436 {
437 	struct ip ip;
438 	struct gif_softc *sc;
439 	struct gif_variant *var;
440 	struct psref psref;
441 	int ret = 0;
442 
443 	sc = arg;
444 	if (sc == NULL)
445 		return 0;
446 
447 	if ((sc->gif_if.if_flags & IFF_UP) == 0)
448 		return 0;
449 
450 	var = gif_getref_variant(sc, &psref);
451 	/* no physical address */
452 	if (var->gv_psrc == NULL || var->gv_pdst == NULL)
453 		goto out;
454 
455 	switch (proto) {
456 #ifdef INET
457 	case IPPROTO_IPV4:
458 		break;
459 #endif
460 #ifdef INET6
461 	case IPPROTO_IPV6:
462 		break;
463 #endif
464 	default:
465 		goto out;
466 	}
467 
468 	/* Bail on short packets */
469 	KASSERT(m->m_flags & M_PKTHDR);
470 	if (m->m_pkthdr.len < sizeof(ip))
471 		goto  out;
472 
473 	m_copydata(m, 0, sizeof(ip), &ip);
474 
475 	switch (ip.ip_v) {
476 #ifdef INET
477 	case 4:
478 		if (var->gv_psrc->sa_family != AF_INET ||
479 		    var->gv_pdst->sa_family != AF_INET)
480 			goto out;
481 		ret = gif_encapcheck4(m, off, proto, var);
482 		break;
483 #endif
484 #ifdef INET6
485 	case 6:
486 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
487 			goto out;
488 		if (var->gv_psrc->sa_family != AF_INET6 ||
489 		    var->gv_pdst->sa_family != AF_INET6)
490 			goto out;
491 		ret = gif_encapcheck6(m, off, proto, var);
492 		break;
493 #endif
494 	default:
495 		goto out;
496 	}
497 
498 out:
499 	gif_putref_variant(var, &psref);
500 	return ret;
501 }
502 #endif
503 
504 /*
505  * gif may cause infinite recursion calls when misconfigured.
506  * We'll prevent this by introducing upper limit.
507  */
508 static int
509 gif_check_nesting(struct ifnet *ifp, struct mbuf *m)
510 {
511 
512 	return if_tunnel_check_nesting(ifp, m, max_gif_nesting);
513 }
514 
515 static int
516 gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
517     const struct rtentry *rt)
518 {
519 	struct gif_softc *sc = ifp->if_softc;
520 	struct gif_variant *var = NULL;
521 	struct psref psref;
522 	int error = 0;
523 
524 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
525 
526 	if ((error = gif_check_nesting(ifp, m)) != 0) {
527 		m_freem(m);
528 		goto end;
529 	}
530 
531 	if ((ifp->if_flags & IFF_UP) == 0) {
532 		m_freem(m);
533 		error = ENETDOWN;
534 		goto end;
535 	}
536 
537 	var = gif_getref_variant(sc, &psref);
538 	if (var->gv_psrc == NULL || var->gv_pdst == NULL) {
539 		m_freem(m);
540 		error = ENETDOWN;
541 		goto end;
542 	}
543 	/* XXX should we check if our outer source is legal? */
544 
545 	m->m_flags &= ~(M_BCAST | M_MCAST);
546 
547 	/* use DLT_NULL encapsulation here to pass inner af type */
548 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
549 	if (!m) {
550 		error = ENOBUFS;
551 		goto end;
552 	}
553 	*mtod(m, int *) = dst->sa_family;
554 
555 	/* Clear checksum-offload flags. */
556 	m->m_pkthdr.csum_flags = 0;
557 	m->m_pkthdr.csum_data = 0;
558 
559 	error = if_transmit_lock(ifp, m);
560 
561 end:
562 	if (var != NULL)
563 		gif_putref_variant(var, &psref);
564 	if (error)
565 		if_statinc(ifp, if_oerrors);
566 	return error;
567 }
568 
569 static void
570 gif_start(struct ifnet *ifp)
571 {
572 	struct gif_softc *sc;
573 	struct gif_variant *var;
574 	struct mbuf *m;
575 	struct psref psref;
576 	int family;
577 	int len;
578 	int error;
579 
580 	sc = ifp->if_softc;
581 	var = gif_getref_variant(sc, &psref);
582 
583 	KASSERT(var->gv_output != NULL);
584 
585 	/* output processing */
586 	while (1) {
587 		IFQ_DEQUEUE(&sc->gif_if.if_snd, m);
588 		if (m == NULL)
589 			break;
590 
591 		/* grab and chop off inner af type */
592 		if (sizeof(int) > m->m_len) {
593 			m = m_pullup(m, sizeof(int));
594 			if (!m) {
595 				if_statinc(ifp, if_oerrors);
596 				continue;
597 			}
598 		}
599 		family = *mtod(m, int *);
600 		bpf_mtap(ifp, m, BPF_D_OUT);
601 		m_adj(m, sizeof(int));
602 
603 		len = m->m_pkthdr.len;
604 
605 		error = var->gv_output(var, family, m);
606 		if (error)
607 			if_statinc(ifp, if_oerrors);
608 		else
609 			if_statadd2(ifp, if_opackets, 1, if_obytes, len);
610 	}
611 
612 	gif_putref_variant(var, &psref);
613 }
614 
615 static int
616 gif_transmit(struct ifnet *ifp, struct mbuf *m)
617 {
618 	struct gif_softc *sc;
619 	struct gif_variant *var;
620 	struct psref psref;
621 	int error;
622 
623 	sc = ifp->if_softc;
624 
625 	/* output processing */
626 	if (m == NULL)
627 		return EINVAL;
628 
629 	var = gif_getref_variant(sc, &psref);
630 	error = gif_transmit_direct(var, m);
631 	gif_putref_variant(var, &psref);
632 
633 	return error;
634 }
635 
636 static int
637 gif_transmit_direct(struct gif_variant *var, struct mbuf *m)
638 {
639 	struct ifnet *ifp = &var->gv_softc->gif_if;
640 	int error;
641 	int family;
642 	int len;
643 
644 	KASSERT(gif_heldref_variant(var));
645 	KASSERT(var->gv_output != NULL);
646 
647 	/* grab and chop off inner af type */
648 	if (sizeof(int) > m->m_len) {
649 		m = m_pullup(m, sizeof(int));
650 		if (!m) {
651 			if_statinc(ifp, if_oerrors);
652 			return ENOBUFS;
653 		}
654 	}
655 	family = *mtod(m, int *);
656 	bpf_mtap(ifp, m, BPF_D_OUT);
657 	m_adj(m, sizeof(int));
658 
659 	len = m->m_pkthdr.len;
660 
661 	error = var->gv_output(var, family, m);
662 	if (error)
663 		if_statinc(ifp, if_oerrors);
664 	else
665 		if_statadd2(ifp, if_opackets, 1, if_obytes, len);
666 
667 	return error;
668 }
669 
670 void
671 gif_input(struct mbuf *m, int af, struct ifnet *ifp)
672 {
673 	pktqueue_t *pktq;
674 	size_t pktlen;
675 
676 	if (ifp == NULL) {
677 		/* just in case */
678 		m_freem(m);
679 		return;
680 	}
681 
682 	m_set_rcvif(m, ifp);
683 	pktlen = m->m_pkthdr.len;
684 
685 	bpf_mtap_af(ifp, af, m, BPF_D_IN);
686 
687 	/*
688 	 * Put the packet to the network layer input queue according to the
689 	 * specified address family.  Note: we avoid direct call to the
690 	 * input function of the network layer in order to avoid recursion.
691 	 * This may be revisited in the future.
692 	 */
693 	switch (af) {
694 #ifdef INET
695 	case AF_INET:
696 		pktq = ip_pktq;
697 		break;
698 #endif
699 #ifdef INET6
700 	case AF_INET6:
701 		pktq = ip6_pktq;
702 		break;
703 #endif
704 	default:
705 		m_freem(m);
706 		return;
707 	}
708 
709 #ifdef GIF_MPSAFE
710 	const u_int h = curcpu()->ci_index;
711 #else
712 	const uint32_t h = pktq_rps_hash(m);
713 #endif
714 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
715 		if_statadd2(ifp, if_ibytes, pktlen, if_ipackets, 1);
716 	} else {
717 		m_freem(m);
718 	}
719 }
720 
721 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
722 static int
723 gif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
724 {
725 	struct gif_softc *sc  = ifp->if_softc;
726 	struct ifreq     *ifr = (struct ifreq*)data;
727 	struct ifaddr    *ifa = (struct ifaddr*)data;
728 	int error = 0, size, bound;
729 	struct sockaddr *dst, *src;
730 	struct gif_variant *var;
731 	struct psref psref;
732 
733 	switch (cmd) {
734 	case SIOCINITIFADDR:
735 		ifp->if_flags |= IFF_UP;
736 		ifa->ifa_rtrequest = p2p_rtrequest;
737 		break;
738 
739 	case SIOCADDMULTI:
740 	case SIOCDELMULTI:
741 		switch (ifr->ifr_addr.sa_family) {
742 #ifdef INET
743 		case AF_INET:	/* IP supports Multicast */
744 			break;
745 #endif /* INET */
746 #ifdef INET6
747 		case AF_INET6:	/* IP6 supports Multicast */
748 			break;
749 #endif /* INET6 */
750 		default:  /* Other protocols doesn't support Multicast */
751 			error = EAFNOSUPPORT;
752 			break;
753 		}
754 		break;
755 
756 	case SIOCSIFMTU:
757 		if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX)
758 			return EINVAL;
759 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
760 			error = 0;
761 		break;
762 
763 #ifdef INET
764 	case SIOCSIFPHYADDR:
765 #endif
766 #ifdef INET6
767 	case SIOCSIFPHYADDR_IN6:
768 #endif /* INET6 */
769 	case SIOCSLIFPHYADDR:
770 		switch (cmd) {
771 #ifdef INET
772 		case SIOCSIFPHYADDR:
773 			src = (struct sockaddr *)
774 				&(((struct in_aliasreq *)data)->ifra_addr);
775 			dst = (struct sockaddr *)
776 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
777 			break;
778 #endif
779 #ifdef INET6
780 		case SIOCSIFPHYADDR_IN6:
781 			src = (struct sockaddr *)
782 				&(((struct in6_aliasreq *)data)->ifra_addr);
783 			dst = (struct sockaddr *)
784 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
785 			break;
786 #endif
787 		case SIOCSLIFPHYADDR:
788 			src = (struct sockaddr *)
789 				&(((struct if_laddrreq *)data)->addr);
790 			dst = (struct sockaddr *)
791 				&(((struct if_laddrreq *)data)->dstaddr);
792 			break;
793 		default:
794 			return EINVAL;
795 		}
796 
797 		/* sa_family must be equal */
798 		if (src->sa_family != dst->sa_family)
799 			return EINVAL;
800 
801 		/* validate sa_len */
802 		switch (src->sa_family) {
803 #ifdef INET
804 		case AF_INET:
805 			if (src->sa_len != sizeof(struct sockaddr_in))
806 				return EINVAL;
807 			break;
808 #endif
809 #ifdef INET6
810 		case AF_INET6:
811 			if (src->sa_len != sizeof(struct sockaddr_in6))
812 				return EINVAL;
813 			break;
814 #endif
815 		default:
816 			return EAFNOSUPPORT;
817 		}
818 		switch (dst->sa_family) {
819 #ifdef INET
820 		case AF_INET:
821 			if (dst->sa_len != sizeof(struct sockaddr_in))
822 				return EINVAL;
823 			break;
824 #endif
825 #ifdef INET6
826 		case AF_INET6:
827 			if (dst->sa_len != sizeof(struct sockaddr_in6))
828 				return EINVAL;
829 			break;
830 #endif
831 		default:
832 			return EAFNOSUPPORT;
833 		}
834 
835 		/* check sa_family looks sane for the cmd */
836 		switch (cmd) {
837 		case SIOCSIFPHYADDR:
838 			if (src->sa_family == AF_INET)
839 				break;
840 			return EAFNOSUPPORT;
841 #ifdef INET6
842 		case SIOCSIFPHYADDR_IN6:
843 			if (src->sa_family == AF_INET6)
844 				break;
845 			return EAFNOSUPPORT;
846 #endif /* INET6 */
847 		case SIOCSLIFPHYADDR:
848 			/* checks done in the above */
849 			break;
850 		}
851 
852 		/*
853 		 * calls gif_getref_variant() for other softcs to check
854 		 * address pair duplicattion
855 		 */
856 		bound = curlwp_bind();
857 		error = gif_set_tunnel(&sc->gif_if, src, dst);
858 		if (error == 0)
859 			if_link_state_change(&sc->gif_if, LINK_STATE_UP);
860 		curlwp_bindx(bound);
861 
862 		break;
863 
864 #ifdef SIOCDIFPHYADDR
865 	case SIOCDIFPHYADDR:
866 		bound = curlwp_bind();
867 		gif_delete_tunnel(&sc->gif_if);
868 		if_link_state_change(&sc->gif_if, LINK_STATE_DOWN);
869 		curlwp_bindx(bound);
870 		break;
871 #endif
872 
873 	case SIOCGIFPSRCADDR:
874 #ifdef INET6
875 	case SIOCGIFPSRCADDR_IN6:
876 #endif /* INET6 */
877 		bound = curlwp_bind();
878 		var = gif_getref_variant(sc, &psref);
879 		if (var->gv_psrc == NULL) {
880 			gif_putref_variant(var, &psref);
881 			curlwp_bindx(bound);
882 			error = EADDRNOTAVAIL;
883 			goto bad;
884 		}
885 		src = var->gv_psrc;
886 		switch (cmd) {
887 #ifdef INET
888 		case SIOCGIFPSRCADDR:
889 			dst = &ifr->ifr_addr;
890 			size = sizeof(ifr->ifr_addr);
891 			break;
892 #endif /* INET */
893 #ifdef INET6
894 		case SIOCGIFPSRCADDR_IN6:
895 			dst = (struct sockaddr *)
896 				&(((struct in6_ifreq *)data)->ifr_addr);
897 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
898 			break;
899 #endif /* INET6 */
900 		default:
901 			gif_putref_variant(var, &psref);
902 			curlwp_bindx(bound);
903 			error = EADDRNOTAVAIL;
904 			goto bad;
905 		}
906 		if (src->sa_len > size) {
907 			gif_putref_variant(var, &psref);
908 			curlwp_bindx(bound);
909 			return EINVAL;
910 		}
911 		memcpy(dst, src, src->sa_len);
912 		gif_putref_variant(var, &psref);
913 		curlwp_bindx(bound);
914 		break;
915 
916 	case SIOCGIFPDSTADDR:
917 #ifdef INET6
918 	case SIOCGIFPDSTADDR_IN6:
919 #endif /* INET6 */
920 		bound = curlwp_bind();
921 		var = gif_getref_variant(sc, &psref);
922 		if (var->gv_pdst == NULL) {
923 			gif_putref_variant(var, &psref);
924 			curlwp_bindx(bound);
925 			error = EADDRNOTAVAIL;
926 			goto bad;
927 		}
928 		src = var->gv_pdst;
929 		switch (cmd) {
930 #ifdef INET
931 		case SIOCGIFPDSTADDR:
932 			dst = &ifr->ifr_addr;
933 			size = sizeof(ifr->ifr_addr);
934 			break;
935 #endif /* INET */
936 #ifdef INET6
937 		case SIOCGIFPDSTADDR_IN6:
938 			dst = (struct sockaddr *)
939 				&(((struct in6_ifreq *)data)->ifr_addr);
940 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
941 			break;
942 #endif /* INET6 */
943 		default:
944 			gif_putref_variant(var, &psref);
945 			curlwp_bindx(bound);
946 			error = EADDRNOTAVAIL;
947 			goto bad;
948 		}
949 		if (src->sa_len > size) {
950 			gif_putref_variant(var, &psref);
951 			curlwp_bindx(bound);
952 			return EINVAL;
953 		}
954 		memcpy(dst, src, src->sa_len);
955 		gif_putref_variant(var, &psref);
956 		curlwp_bindx(bound);
957 		break;
958 
959 	case SIOCGLIFPHYADDR:
960 		bound = curlwp_bind();
961 		var = gif_getref_variant(sc, &psref);
962 		if (var->gv_psrc == NULL || var->gv_pdst == NULL) {
963 			gif_putref_variant(var, &psref);
964 			curlwp_bindx(bound);
965 			error = EADDRNOTAVAIL;
966 			goto bad;
967 		}
968 
969 		/* copy src */
970 		src = var->gv_psrc;
971 		dst = (struct sockaddr *)
972 			&(((struct if_laddrreq *)data)->addr);
973 		size = sizeof(((struct if_laddrreq *)data)->addr);
974 		if (src->sa_len > size) {
975 			gif_putref_variant(var, &psref);
976 			curlwp_bindx(bound);
977 			return EINVAL;
978 		}
979 		memcpy(dst, src, src->sa_len);
980 
981 		/* copy dst */
982 		src = var->gv_pdst;
983 		dst = (struct sockaddr *)
984 			&(((struct if_laddrreq *)data)->dstaddr);
985 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
986 		if (src->sa_len > size) {
987 			gif_putref_variant(var, &psref);
988 			curlwp_bindx(bound);
989 			return EINVAL;
990 		}
991 		memcpy(dst, src, src->sa_len);
992 		gif_putref_variant(var, &psref);
993 		curlwp_bindx(bound);
994 		break;
995 
996 	default:
997 		return ifioctl_common(ifp, cmd, data);
998 	}
999  bad:
1000 	return error;
1001 }
1002 
1003 static int
1004 gif_encap_attach(struct gif_variant *var)
1005 {
1006 	int error;
1007 
1008 	if (var == NULL || var->gv_psrc == NULL)
1009 		return EINVAL;
1010 
1011 	switch (var->gv_psrc->sa_family) {
1012 #ifdef INET
1013 	case AF_INET:
1014 		error = in_gif_attach(var);
1015 		break;
1016 #endif
1017 #ifdef INET6
1018 	case AF_INET6:
1019 		error = in6_gif_attach(var);
1020 		break;
1021 #endif
1022 	default:
1023 		error = EINVAL;
1024 		break;
1025 	}
1026 
1027 	return error;
1028 }
1029 
1030 static int
1031 gif_encap_detach(struct gif_variant *var)
1032 {
1033 	int error;
1034 
1035 	if (var == NULL || var->gv_psrc == NULL)
1036 		return EINVAL;
1037 
1038 	switch (var->gv_psrc->sa_family) {
1039 #ifdef INET
1040 	case AF_INET:
1041 		error = in_gif_detach(var);
1042 		break;
1043 #endif
1044 #ifdef INET6
1045 	case AF_INET6:
1046 		error = in6_gif_detach(var);
1047 		break;
1048 #endif
1049 	default:
1050 		error = EINVAL;
1051 		break;
1052 	}
1053 
1054 	return error;
1055 }
1056 
1057 static int
1058 gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
1059 {
1060 	struct gif_softc *sc = ifp->if_softc;
1061 	struct gif_softc *sc2;
1062 	struct gif_variant *ovar, *nvar;
1063 	struct sockaddr *osrc, *odst;
1064 	struct sockaddr *nsrc, *ndst;
1065 	int error;
1066 #ifndef GIF_MPSAFE
1067 	int s;
1068 
1069 	s = splsoftnet();
1070 #endif
1071 	error = encap_lock_enter();
1072 	if (error) {
1073 #ifndef GIF_MPSAFE
1074 		splx(s);
1075 #endif
1076 		return error;
1077 	}
1078 
1079 	nsrc = sockaddr_dup(src, M_WAITOK);
1080 	ndst = sockaddr_dup(dst, M_WAITOK);
1081 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1082 
1083 	mutex_enter(&sc->gif_lock);
1084 
1085 	ovar = sc->gif_var;
1086 
1087 	if ((ovar->gv_pdst && sockaddr_cmp(ovar->gv_pdst, dst) == 0) &&
1088 	    (ovar->gv_psrc && sockaddr_cmp(ovar->gv_psrc, src) == 0)) {
1089 		/* address and port pair not changed. */
1090 		error = 0;
1091 		goto out;
1092 	}
1093 
1094 	mutex_enter(&gif_softcs.lock);
1095 	LIST_FOREACH(sc2, &gif_softcs.list, gif_list) {
1096 		struct gif_variant *var2;
1097 		struct psref psref;
1098 
1099 		if (sc2 == sc)
1100 			continue;
1101 		var2 = gif_getref_variant(sc2, &psref);
1102 		if (!var2->gv_pdst || !var2->gv_psrc) {
1103 			gif_putref_variant(var2, &psref);
1104 			continue;
1105 		}
1106 		/* can't configure same pair of address onto two gifs */
1107 		if (sockaddr_cmp(var2->gv_pdst, dst) == 0 &&
1108 		    sockaddr_cmp(var2->gv_psrc, src) == 0) {
1109 			/* continue to use the old configuration. */
1110 			gif_putref_variant(var2, &psref);
1111 			mutex_exit(&gif_softcs.lock);
1112 			error =  EADDRNOTAVAIL;
1113 			goto out;
1114 		}
1115 		gif_putref_variant(var2, &psref);
1116 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
1117 	}
1118 	mutex_exit(&gif_softcs.lock);
1119 
1120 	osrc = ovar->gv_psrc;
1121 	odst = ovar->gv_pdst;
1122 
1123 	*nvar = *ovar;
1124 	nvar->gv_psrc = nsrc;
1125 	nvar->gv_pdst = ndst;
1126 	nvar->gv_encap_cookie4 = NULL;
1127 	nvar->gv_encap_cookie6 = NULL;
1128 	error = gif_encap_attach(nvar);
1129 	if (error)
1130 		goto out;
1131 	psref_target_init(&nvar->gv_psref, gv_psref_class);
1132 	gif_update_variant(sc, nvar);
1133 
1134 	mutex_exit(&sc->gif_lock);
1135 
1136 	(void)gif_encap_detach(ovar);
1137 	encap_lock_exit();
1138 
1139 	if (osrc)
1140 		sockaddr_free(osrc);
1141 	if (odst)
1142 		sockaddr_free(odst);
1143 	kmem_free(ovar, sizeof(*ovar));
1144 
1145 #ifndef GIF_MPSAFE
1146 	splx(s);
1147 #endif
1148 	return 0;
1149 
1150  out:
1151 	sockaddr_free(nsrc);
1152 	sockaddr_free(ndst);
1153 	kmem_free(nvar, sizeof(*nvar));
1154 
1155 	mutex_exit(&sc->gif_lock);
1156 	encap_lock_exit();
1157 #ifndef GIF_MPSAFE
1158 	splx(s);
1159 #endif
1160 	return error;
1161 }
1162 
1163 static void
1164 gif_delete_tunnel(struct ifnet *ifp)
1165 {
1166 	struct gif_softc *sc = ifp->if_softc;
1167 	struct gif_variant *ovar, *nvar;
1168 	struct sockaddr *osrc, *odst;
1169 	int error;
1170 #ifndef GIF_MPSAFE
1171 	int s;
1172 
1173 	s = splsoftnet();
1174 #endif
1175 	error = encap_lock_enter();
1176 	if (error) {
1177 #ifndef GIF_MPSAFE
1178 		splx(s);
1179 #endif
1180 		return;
1181 	}
1182 
1183 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1184 
1185 	mutex_enter(&sc->gif_lock);
1186 
1187 	ovar = sc->gif_var;
1188 	osrc = ovar->gv_psrc;
1189 	odst = ovar->gv_pdst;
1190 	if (osrc == NULL || odst == NULL) {
1191 		/* address pair not changed. */
1192 		mutex_exit(&sc->gif_lock);
1193 		encap_lock_exit();
1194 		kmem_free(nvar, sizeof(*nvar));
1195 #ifndef GIF_MPSAFE
1196 		splx(s);
1197 #endif
1198 		return;
1199 	}
1200 
1201 	*nvar = *ovar;
1202 	nvar->gv_psrc = NULL;
1203 	nvar->gv_pdst = NULL;
1204 	nvar->gv_encap_cookie4 = NULL;
1205 	nvar->gv_encap_cookie6 = NULL;
1206 	nvar->gv_output = NULL;
1207 	psref_target_init(&nvar->gv_psref, gv_psref_class);
1208 	gif_update_variant(sc, nvar);
1209 
1210 	mutex_exit(&sc->gif_lock);
1211 
1212 	gif_encap_detach(ovar);
1213 	encap_lock_exit();
1214 
1215 	sockaddr_free(osrc);
1216 	sockaddr_free(odst);
1217 	kmem_free(ovar, sizeof(*ovar));
1218 
1219 #ifndef GIF_MPSAFE
1220 	splx(s);
1221 #endif
1222 }
1223 
1224 /*
1225  * gif_variant update API.
1226  *
1227  * Assumption:
1228  * reader side dereferences sc->gif_var in reader critical section only,
1229  * that is, all of reader sides do not reader the sc->gif_var after
1230  * pserialize_perform().
1231  */
1232 static void
1233 gif_update_variant(struct gif_softc *sc, struct gif_variant *nvar)
1234 {
1235 	struct ifnet *ifp = &sc->gif_if;
1236 	struct gif_variant *ovar = sc->gif_var;
1237 
1238 	KASSERT(mutex_owned(&sc->gif_lock));
1239 
1240 	atomic_store_release(&sc->gif_var, nvar);
1241 	pserialize_perform(sc->gif_psz);
1242 	psref_target_destroy(&ovar->gv_psref, gv_psref_class);
1243 
1244 	if (nvar->gv_psrc != NULL && nvar->gv_pdst != NULL)
1245 		ifp->if_flags |= IFF_RUNNING;
1246 	else
1247 		ifp->if_flags &= ~IFF_RUNNING;
1248 }
1249 
1250 /*
1251  * Module infrastructure
1252  */
1253 #include "if_module.h"
1254 
1255 IF_MODULE(MODULE_CLASS_DRIVER, gif, "ip_ecn")
1256