xref: /netbsd-src/sys/net/if_l2tp.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: if_l2tp.c,v 1.29 2018/06/26 06:48:02 msaitoh Exp $	*/
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * L2TPv3 kernel interface
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: if_l2tp.c,v 1.29 2018/06/26 06:48:02 msaitoh Exp $");
35 
36 #ifdef _KERNEL_OPT
37 #include "opt_inet.h"
38 #include "opt_net_mpsafe.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/mbuf.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/errno.h>
48 #include <sys/ioctl.h>
49 #include <sys/time.h>
50 #include <sys/syslog.h>
51 #include <sys/proc.h>
52 #include <sys/conf.h>
53 #include <sys/kauth.h>
54 #include <sys/cpu.h>
55 #include <sys/cprng.h>
56 #include <sys/intr.h>
57 #include <sys/kmem.h>
58 #include <sys/mutex.h>
59 #include <sys/atomic.h>
60 #include <sys/pserialize.h>
61 #include <sys/device.h>
62 #include <sys/module.h>
63 
64 #include <net/if.h>
65 #include <net/if_dl.h>
66 #include <net/if_ether.h>
67 #include <net/if_types.h>
68 #include <net/netisr.h>
69 #include <net/route.h>
70 #include <net/bpf.h>
71 #include <net/if_vlanvar.h>
72 
73 #include <netinet/in.h>
74 #include <netinet/in_systm.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip_encap.h>
77 #ifdef	INET
78 #include <netinet/in_var.h>
79 #include <netinet/in_l2tp.h>
80 #endif	/* INET */
81 #ifdef INET6
82 #include <netinet6/in6_l2tp.h>
83 #endif
84 
85 #include <net/if_l2tp.h>
86 
87 #include <net/if_vlanvar.h>
88 
89 /* TODO: IP_TCPMSS support */
90 #undef IP_TCPMSS
91 #ifdef IP_TCPMSS
92 #include <netinet/ip_tcpmss.h>
93 #endif
94 
95 /*
96  * l2tp global variable definitions
97  */
98 LIST_HEAD(l2tp_sclist, l2tp_softc);
99 static struct {
100 	struct l2tp_sclist list;
101 	kmutex_t lock;
102 } l2tp_softcs __cacheline_aligned;
103 
104 
105 #if !defined(L2TP_ID_HASH_SIZE)
106 #define L2TP_ID_HASH_SIZE 64
107 #endif
108 static struct {
109 	kmutex_t lock;
110 	struct pslist_head *lists;
111 	u_long mask;
112 } l2tp_hash __cacheline_aligned = {
113 	.lists = NULL,
114 };
115 
116 pserialize_t l2tp_psz __read_mostly;
117 struct psref_class *lv_psref_class __read_mostly;
118 
119 static void	l2tp_ro_init_pc(void *, void *, struct cpu_info *);
120 static void	l2tp_ro_fini_pc(void *, void *, struct cpu_info *);
121 
122 static int	l2tp_clone_create(struct if_clone *, int);
123 static int	l2tp_clone_destroy(struct ifnet *);
124 
125 struct if_clone l2tp_cloner =
126     IF_CLONE_INITIALIZER("l2tp", l2tp_clone_create, l2tp_clone_destroy);
127 
128 static int	l2tp_output(struct ifnet *, struct mbuf *,
129 		    const struct sockaddr *, const struct rtentry *);
130 static void	l2tpintr(struct l2tp_variant *);
131 
132 static void	l2tp_hash_init(void);
133 static int	l2tp_hash_fini(void);
134 
135 static void	l2tp_start(struct ifnet *);
136 static int	l2tp_transmit(struct ifnet *, struct mbuf *);
137 
138 static int	l2tp_set_tunnel(struct ifnet *, struct sockaddr *,
139 		    struct sockaddr *);
140 static void	l2tp_delete_tunnel(struct ifnet *);
141 
142 static int	id_hash_func(uint32_t, u_long);
143 
144 static void	l2tp_variant_update(struct l2tp_softc *, struct l2tp_variant *);
145 static int	l2tp_set_session(struct l2tp_softc *, uint32_t, uint32_t);
146 static int	l2tp_clear_session(struct l2tp_softc *);
147 static int	l2tp_set_cookie(struct l2tp_softc *, uint64_t, u_int, uint64_t, u_int);
148 static void	l2tp_clear_cookie(struct l2tp_softc *);
149 static void	l2tp_set_state(struct l2tp_softc *, int);
150 static int	l2tp_encap_attach(struct l2tp_variant *);
151 static int	l2tp_encap_detach(struct l2tp_variant *);
152 
153 #ifndef MAX_L2TP_NEST
154 /*
155  * This macro controls the upper limitation on nesting of l2tp tunnels.
156  * Since, setting a large value to this macro with a careless configuration
157  * may introduce system crash, we don't allow any nestings by default.
158  * If you need to configure nested l2tp tunnels, you can define this macro
159  * in your kernel configuration file.  However, if you do so, please be
160  * careful to configure the tunnels so that it won't make a loop.
161  */
162 /*
163  * XXX
164  * Currently, if in_l2tp_output recursively calls, it causes locking against
165  * myself of struct l2tp_ro->lr_lock. So, nested l2tp tunnels is prohibited.
166  */
167 #define MAX_L2TP_NEST 0
168 #endif
169 
170 static int max_l2tp_nesting = MAX_L2TP_NEST;
171 
172 /* ARGSUSED */
173 void
174 l2tpattach(int count)
175 {
176 	/*
177 	 * Nothing to do here, initialization is handled by the
178 	 * module initialization code in l2tpinit() below).
179 	 */
180 }
181 
182 static void
183 l2tpinit(void)
184 {
185 
186 	mutex_init(&l2tp_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
187 	LIST_INIT(&l2tp_softcs.list);
188 
189 	mutex_init(&l2tp_hash.lock, MUTEX_DEFAULT, IPL_NONE);
190 	l2tp_psz = pserialize_create();
191 	lv_psref_class = psref_class_create("l2tpvar", IPL_SOFTNET);
192 	if_clone_attach(&l2tp_cloner);
193 
194 	l2tp_hash_init();
195 }
196 
197 static int
198 l2tpdetach(void)
199 {
200 	int error;
201 
202 	mutex_enter(&l2tp_softcs.lock);
203 	if (!LIST_EMPTY(&l2tp_softcs.list)) {
204 		mutex_exit(&l2tp_softcs.lock);
205 		return EBUSY;
206 	}
207 	mutex_exit(&l2tp_softcs.lock);
208 
209 	error = l2tp_hash_fini();
210 	if (error)
211 		return error;
212 
213 	if_clone_detach(&l2tp_cloner);
214 	psref_class_destroy(lv_psref_class);
215 	pserialize_destroy(l2tp_psz);
216 	mutex_destroy(&l2tp_hash.lock);
217 
218 	mutex_destroy(&l2tp_softcs.lock);
219 
220 	return error;
221 }
222 
223 static int
224 l2tp_clone_create(struct if_clone *ifc, int unit)
225 {
226 	struct l2tp_softc *sc;
227 	struct l2tp_variant *var;
228 	int rv;
229 
230 	sc = kmem_zalloc(sizeof(struct l2tp_softc), KM_SLEEP);
231 	if_initname(&sc->l2tp_ec.ec_if, ifc->ifc_name, unit);
232 	rv = l2tpattach0(sc);
233 	if (rv != 0) {
234 		kmem_free(sc, sizeof(struct l2tp_softc));
235 		return rv;
236 	}
237 
238 	var = kmem_zalloc(sizeof(struct l2tp_variant), KM_SLEEP);
239 	var->lv_softc = sc;
240 	var->lv_state = L2TP_STATE_DOWN;
241 	var->lv_use_cookie = L2TP_COOKIE_OFF;
242 	psref_target_init(&var->lv_psref, lv_psref_class);
243 
244 	sc->l2tp_var = var;
245 	mutex_init(&sc->l2tp_lock, MUTEX_DEFAULT, IPL_NONE);
246 	PSLIST_ENTRY_INIT(sc, l2tp_hash);
247 
248 	sc->l2tp_ro_percpu = percpu_alloc(sizeof(struct l2tp_ro));
249 	percpu_foreach(sc->l2tp_ro_percpu, l2tp_ro_init_pc, NULL);
250 
251 	mutex_enter(&l2tp_softcs.lock);
252 	LIST_INSERT_HEAD(&l2tp_softcs.list, sc, l2tp_list);
253 	mutex_exit(&l2tp_softcs.lock);
254 
255 	return (0);
256 }
257 
258 int
259 l2tpattach0(struct l2tp_softc *sc)
260 {
261 	int rv;
262 
263 	sc->l2tp_ec.ec_if.if_addrlen = 0;
264 	sc->l2tp_ec.ec_if.if_mtu    = L2TP_MTU;
265 	sc->l2tp_ec.ec_if.if_flags  = IFF_POINTOPOINT|IFF_MULTICAST|IFF_SIMPLEX;
266 	sc->l2tp_ec.ec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE;
267 #ifdef NET_MPSAFE
268 	sc->l2tp_ec.ec_if.if_extflags |= IFEF_MPSAFE;
269 #endif
270 	sc->l2tp_ec.ec_if.if_ioctl  = l2tp_ioctl;
271 	sc->l2tp_ec.ec_if.if_output = l2tp_output;
272 	sc->l2tp_ec.ec_if.if_type   = IFT_L2TP;
273 	sc->l2tp_ec.ec_if.if_dlt    = DLT_NULL;
274 	sc->l2tp_ec.ec_if.if_start  = l2tp_start;
275 	sc->l2tp_ec.ec_if.if_transmit = l2tp_transmit;
276 	sc->l2tp_ec.ec_if._if_input = ether_input;
277 	IFQ_SET_READY(&sc->l2tp_ec.ec_if.if_snd);
278 	/* XXX
279 	 * It may improve performance to use if_initialize()/if_register()
280 	 * so that l2tp_input() calls if_input() instead of
281 	 * if_percpuq_enqueue(). However, that causes recursive softnet_lock
282 	 * when NET_MPSAFE is not set.
283 	 */
284 	rv = if_attach(&sc->l2tp_ec.ec_if);
285 	if (rv != 0)
286 		return rv;
287 	if_alloc_sadl(&sc->l2tp_ec.ec_if);
288 	bpf_attach(&sc->l2tp_ec.ec_if, DLT_EN10MB, sizeof(struct ether_header));
289 
290 	return 0;
291 }
292 
293 void
294 l2tp_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
295 {
296 	struct l2tp_ro *lro = p;
297 
298 	lro->lr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
299 }
300 
301 void
302 l2tp_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
303 {
304 	struct l2tp_ro *lro = p;
305 
306 	rtcache_free(&lro->lr_ro);
307 
308 	mutex_obj_free(lro->lr_lock);
309 }
310 
311 static int
312 l2tp_clone_destroy(struct ifnet *ifp)
313 {
314 	struct l2tp_variant *var;
315 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
316 	    l2tp_ec.ec_if);
317 
318 	l2tp_clear_session(sc);
319 	l2tp_delete_tunnel(&sc->l2tp_ec.ec_if);
320 	/*
321 	 * To avoid for l2tp_transmit() to access sc->l2tp_var after free it.
322 	 */
323 	mutex_enter(&sc->l2tp_lock);
324 	var = sc->l2tp_var;
325 	l2tp_variant_update(sc, NULL);
326 	mutex_exit(&sc->l2tp_lock);
327 
328 	mutex_enter(&l2tp_softcs.lock);
329 	LIST_REMOVE(sc, l2tp_list);
330 	mutex_exit(&l2tp_softcs.lock);
331 
332 	bpf_detach(ifp);
333 
334 	if_detach(ifp);
335 
336 	percpu_foreach(sc->l2tp_ro_percpu, l2tp_ro_fini_pc, NULL);
337 	percpu_free(sc->l2tp_ro_percpu, sizeof(struct l2tp_ro));
338 
339 	kmem_free(var, sizeof(struct l2tp_variant));
340 	mutex_destroy(&sc->l2tp_lock);
341 	kmem_free(sc, sizeof(struct l2tp_softc));
342 
343 	return 0;
344 }
345 
346 static int
347 l2tp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
348     const struct rtentry *rt)
349 {
350 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
351 	    l2tp_ec.ec_if);
352 	struct l2tp_variant *var;
353 	struct psref psref;
354 	int error = 0;
355 
356 	var = l2tp_getref_variant(sc, &psref);
357 	if (var == NULL) {
358 		m_freem(m);
359 		return ENETDOWN;
360 	}
361 
362 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
363 
364 	m->m_flags &= ~(M_BCAST|M_MCAST);
365 
366 	if ((ifp->if_flags & IFF_UP) == 0) {
367 		m_freem(m);
368 		error = ENETDOWN;
369 		goto end;
370 	}
371 
372 	if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
373 		m_freem(m);
374 		error = ENETDOWN;
375 		goto end;
376 	}
377 
378 	/* XXX should we check if our outer source is legal? */
379 
380 	/* use DLT_NULL encapsulation here to pass inner af type */
381 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
382 	if (!m) {
383 		error = ENOBUFS;
384 		goto end;
385 	}
386 	*mtod(m, int *) = dst->sa_family;
387 
388 	IFQ_ENQUEUE(&ifp->if_snd, m, error);
389 	if (error)
390 		goto end;
391 
392 	/*
393 	 * direct call to avoid infinite loop at l2tpintr()
394 	 */
395 	l2tpintr(var);
396 
397 	error = 0;
398 
399 end:
400 	l2tp_putref_variant(var, &psref);
401 	if (error)
402 		ifp->if_oerrors++;
403 
404 	return error;
405 }
406 
407 static void
408 l2tpintr(struct l2tp_variant *var)
409 {
410 	struct l2tp_softc *sc;
411 	struct ifnet *ifp;
412 	struct mbuf *m;
413 	int error;
414 
415 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
416 
417 	sc = var->lv_softc;
418 	ifp = &sc->l2tp_ec.ec_if;
419 
420 	/* output processing */
421 	if (var->lv_my_sess_id == 0 || var->lv_peer_sess_id == 0) {
422 		IFQ_PURGE(&ifp->if_snd);
423 		return;
424 	}
425 
426 	for (;;) {
427 		IFQ_DEQUEUE(&ifp->if_snd, m);
428 		if (m == NULL)
429 			break;
430 		m->m_flags &= ~(M_BCAST|M_MCAST);
431 		bpf_mtap(ifp, m, BPF_D_OUT);
432 		switch (var->lv_psrc->sa_family) {
433 #ifdef INET
434 		case AF_INET:
435 			error = in_l2tp_output(var, m);
436 			break;
437 #endif
438 #ifdef INET6
439 		case AF_INET6:
440 			error = in6_l2tp_output(var, m);
441 			break;
442 #endif
443 		default:
444 			m_freem(m);
445 			error = ENETDOWN;
446 			break;
447 		}
448 
449 		if (error)
450 			ifp->if_oerrors++;
451 		else {
452 			ifp->if_opackets++;
453 			/*
454 			 * obytes is incremented at ether_output() or
455 			 * bridge_enqueue().
456 			 */
457 		}
458 	}
459 
460 }
461 
462 void
463 l2tp_input(struct mbuf *m, struct ifnet *ifp)
464 {
465 	vaddr_t addr;
466 
467 	KASSERT(ifp != NULL);
468 
469 	/*
470 	 * Currently, l2tp(4) supports only ethernet as inner protocol.
471 	 */
472 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
473 		m_freem(m);
474 		return;
475 	}
476 
477 	/*
478 	 * If the head of the payload is not aligned, align it.
479 	 */
480 	addr = mtod(m, vaddr_t);
481 	if ((addr & 0x03) != 0x2) {
482 		/* copy and align head of payload */
483 		struct mbuf *m_head;
484 		int copy_length;
485 		u_int pad = roundup(sizeof(struct ether_header), 4)
486 			- sizeof(struct ether_header);
487 
488 #define L2TP_COPY_LENGTH		60
489 
490 		if (m->m_pkthdr.len < L2TP_COPY_LENGTH) {
491 			copy_length = m->m_pkthdr.len;
492 		} else {
493 			copy_length = L2TP_COPY_LENGTH;
494 		}
495 
496 		if (m->m_len < copy_length) {
497 			m = m_pullup(m, copy_length);
498 			if (m == NULL)
499 				return;
500 		}
501 
502 		MGETHDR(m_head, M_DONTWAIT, MT_HEADER);
503 		if (m_head == NULL) {
504 			m_freem(m);
505 			return;
506 		}
507 		M_MOVE_PKTHDR(m_head, m);
508 
509 		/*
510 		 * m_head should be:
511 		 *                             L2TP_COPY_LENGTH
512 		 *                          <-  + roundup(pad, 4) - pad ->
513 		 *   +-------+--------+-----+--------------+-------------+
514 		 *   | m_hdr | pkthdr | ... | ether header |   payload   |
515 		 *   +-------+--------+-----+--------------+-------------+
516 		 *                          ^              ^
517 		 *                          m_data         4 byte aligned
518 		 */
519 		MH_ALIGN(m_head, L2TP_COPY_LENGTH + roundup(pad, 4));
520 		m_head->m_data += pad;
521 
522 		memcpy(mtod(m_head, void *), mtod(m, void *), copy_length);
523 		m_head->m_len = copy_length;
524 		m->m_data += copy_length;
525 		m->m_len -= copy_length;
526 
527 		/* construct chain */
528 		if (m->m_len == 0) {
529 			m_head->m_next = m_free(m);
530 		} else {
531 			m_head->m_next = m;
532 		}
533 
534 		/* override m */
535 		m = m_head;
536 	}
537 
538 	m_set_rcvif(m, ifp);
539 
540 	/*
541 	 * bpf_mtap() and ifp->if_ipackets++ is done in if_input()
542 	 *
543 	 * obytes is incremented at ether_output() or bridge_enqueue().
544 	 */
545 	if_percpuq_enqueue(ifp->if_percpuq, m);
546 }
547 
548 void
549 l2tp_start(struct ifnet *ifp)
550 {
551 	struct psref psref;
552 	struct l2tp_variant *var;
553 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
554 	    l2tp_ec.ec_if);
555 
556 	var = l2tp_getref_variant(sc, &psref);
557 	if (var == NULL)
558 		return;
559 
560 	if (var->lv_psrc == NULL || var->lv_pdst == NULL)
561 		return;
562 
563 	l2tpintr(var);
564 	l2tp_putref_variant(var, &psref);
565 }
566 
567 int
568 l2tp_transmit(struct ifnet *ifp, struct mbuf *m)
569 {
570 	int error;
571 	struct psref psref;
572 	struct l2tp_variant *var;
573 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
574 	    l2tp_ec.ec_if);
575 
576 	var = l2tp_getref_variant(sc, &psref);
577 	if (var == NULL) {
578 		m_freem(m);
579 		return ENETDOWN;
580 	}
581 
582 	if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
583 		m_freem(m);
584 		error = ENETDOWN;
585 		goto out;
586 	}
587 
588 	m->m_flags &= ~(M_BCAST|M_MCAST);
589 	bpf_mtap(ifp, m, BPF_D_OUT);
590 	switch (var->lv_psrc->sa_family) {
591 #ifdef INET
592 	case AF_INET:
593 		error = in_l2tp_output(var, m);
594 		break;
595 #endif
596 #ifdef INET6
597 	case AF_INET6:
598 		error = in6_l2tp_output(var, m);
599 		break;
600 #endif
601 	default:
602 		m_freem(m);
603 		error = ENETDOWN;
604 		break;
605 	}
606 
607 	if (error)
608 		ifp->if_oerrors++;
609 	else {
610 		ifp->if_opackets++;
611 		/*
612 		 * obytes is incremented at ether_output() or bridge_enqueue().
613 		 */
614 	}
615 
616 out:
617 	l2tp_putref_variant(var, &psref);
618 	return error;
619 }
620 
621 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
622 int
623 l2tp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
624 {
625 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
626 	    l2tp_ec.ec_if);
627 	struct l2tp_variant *var, *var_tmp;
628 	struct ifreq     *ifr = data;
629 	int error = 0, size;
630 	struct sockaddr *dst, *src;
631 	struct l2tp_req l2tpr;
632 	u_long mtu;
633 	int bound;
634 	struct psref psref;
635 
636 	switch (cmd) {
637 	case SIOCSIFADDR:
638 		ifp->if_flags |= IFF_UP;
639 		break;
640 
641 	case SIOCSIFDSTADDR:
642 		break;
643 
644 	case SIOCADDMULTI:
645 	case SIOCDELMULTI:
646 		switch (ifr->ifr_addr.sa_family) {
647 #ifdef INET
648 		case AF_INET:	/* IP supports Multicast */
649 			break;
650 #endif /* INET */
651 #ifdef INET6
652 		case AF_INET6:	/* IP6 supports Multicast */
653 			break;
654 #endif /* INET6 */
655 		default:  /* Other protocols doesn't support Multicast */
656 			error = EAFNOSUPPORT;
657 			break;
658 		}
659 		break;
660 
661 	case SIOCSIFMTU:
662 		mtu = ifr->ifr_mtu;
663 		if (mtu < L2TP_MTU_MIN || mtu > L2TP_MTU_MAX)
664 			return (EINVAL);
665 		ifp->if_mtu = mtu;
666 		break;
667 
668 #ifdef INET
669 	case SIOCSIFPHYADDR:
670 		src = (struct sockaddr *)
671 			&(((struct in_aliasreq *)data)->ifra_addr);
672 		dst = (struct sockaddr *)
673 			&(((struct in_aliasreq *)data)->ifra_dstaddr);
674 		if (src->sa_family != AF_INET || dst->sa_family != AF_INET)
675 			return EAFNOSUPPORT;
676 		else if (src->sa_len != sizeof(struct sockaddr_in)
677 		    || dst->sa_len != sizeof(struct sockaddr_in))
678 			return EINVAL;
679 
680 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
681 		break;
682 
683 #endif /* INET */
684 #ifdef INET6
685 	case SIOCSIFPHYADDR_IN6:
686 		src = (struct sockaddr *)
687 			&(((struct in6_aliasreq *)data)->ifra_addr);
688 		dst = (struct sockaddr *)
689 			&(((struct in6_aliasreq *)data)->ifra_dstaddr);
690 		if (src->sa_family != AF_INET6 || dst->sa_family != AF_INET6)
691 			return EAFNOSUPPORT;
692 		else if (src->sa_len != sizeof(struct sockaddr_in6)
693 		    || dst->sa_len != sizeof(struct sockaddr_in6))
694 			return EINVAL;
695 
696 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
697 		break;
698 
699 #endif /* INET6 */
700 	case SIOCSLIFPHYADDR:
701 		src = (struct sockaddr *)
702 			&(((struct if_laddrreq *)data)->addr);
703 		dst = (struct sockaddr *)
704 			&(((struct if_laddrreq *)data)->dstaddr);
705 		if (src->sa_family != dst->sa_family)
706 			return EINVAL;
707 		else if (src->sa_family == AF_INET
708 		    && src->sa_len != sizeof(struct sockaddr_in))
709 			return EINVAL;
710 		else if (src->sa_family == AF_INET6
711 		    && src->sa_len != sizeof(struct sockaddr_in6))
712 			return EINVAL;
713 		else if (dst->sa_family == AF_INET
714 		    && dst->sa_len != sizeof(struct sockaddr_in))
715 			return EINVAL;
716 		else if (dst->sa_family == AF_INET6
717 		    && dst->sa_len != sizeof(struct sockaddr_in6))
718 			return EINVAL;
719 
720 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
721 		break;
722 
723 	case SIOCDIFPHYADDR:
724 		l2tp_delete_tunnel(&sc->l2tp_ec.ec_if);
725 		break;
726 
727 	case SIOCGIFPSRCADDR:
728 #ifdef INET6
729 	case SIOCGIFPSRCADDR_IN6:
730 #endif /* INET6 */
731 		bound = curlwp_bind();
732 		var = l2tp_getref_variant(sc, &psref);
733 		if (var == NULL) {
734 			curlwp_bindx(bound);
735 			error = EADDRNOTAVAIL;
736 			goto bad;
737 		}
738 		if (var->lv_psrc == NULL) {
739 			l2tp_putref_variant(var, &psref);
740 			curlwp_bindx(bound);
741 			error = EADDRNOTAVAIL;
742 			goto bad;
743 		}
744 		src = var->lv_psrc;
745 		switch (cmd) {
746 #ifdef INET
747 		case SIOCGIFPSRCADDR:
748 			dst = &ifr->ifr_addr;
749 			size = sizeof(ifr->ifr_addr);
750 			break;
751 #endif /* INET */
752 #ifdef INET6
753 		case SIOCGIFPSRCADDR_IN6:
754 			dst = (struct sockaddr *)
755 				&(((struct in6_ifreq *)data)->ifr_addr);
756 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
757 			break;
758 #endif /* INET6 */
759 		default:
760 			l2tp_putref_variant(var, &psref);
761 			curlwp_bindx(bound);
762 			error = EADDRNOTAVAIL;
763 			goto bad;
764 		}
765 		if (src->sa_len > size) {
766 			l2tp_putref_variant(var, &psref);
767 			curlwp_bindx(bound);
768 			return EINVAL;
769 		}
770 		sockaddr_copy(dst, src->sa_len, src);
771 		l2tp_putref_variant(var, &psref);
772 		curlwp_bindx(bound);
773 		break;
774 
775 	case SIOCGIFPDSTADDR:
776 #ifdef INET6
777 	case SIOCGIFPDSTADDR_IN6:
778 #endif /* INET6 */
779 		bound = curlwp_bind();
780 		var = l2tp_getref_variant(sc, &psref);
781 		if (var == NULL) {
782 			curlwp_bindx(bound);
783 			error = EADDRNOTAVAIL;
784 			goto bad;
785 		}
786 		if (var->lv_pdst == NULL) {
787 			l2tp_putref_variant(var, &psref);
788 			curlwp_bindx(bound);
789 			error = EADDRNOTAVAIL;
790 			goto bad;
791 		}
792 		src = var->lv_pdst;
793 		switch (cmd) {
794 #ifdef INET
795 		case SIOCGIFPDSTADDR:
796 			dst = &ifr->ifr_addr;
797 			size = sizeof(ifr->ifr_addr);
798 			break;
799 #endif /* INET */
800 #ifdef INET6
801 		case SIOCGIFPDSTADDR_IN6:
802 			dst = (struct sockaddr *)
803 				&(((struct in6_ifreq *)data)->ifr_addr);
804 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
805 			break;
806 #endif /* INET6 */
807 		default:
808 			l2tp_putref_variant(var, &psref);
809 			curlwp_bindx(bound);
810 			error = EADDRNOTAVAIL;
811 			goto bad;
812 		}
813 		if (src->sa_len > size) {
814 			l2tp_putref_variant(var, &psref);
815 			curlwp_bindx(bound);
816 			return EINVAL;
817 		}
818 		sockaddr_copy(dst, src->sa_len, src);
819 		l2tp_putref_variant(var, &psref);
820 		curlwp_bindx(bound);
821 		break;
822 
823 	case SIOCGLIFPHYADDR:
824 		bound = curlwp_bind();
825 		var = l2tp_getref_variant(sc, &psref);
826 		if (var == NULL) {
827 			curlwp_bindx(bound);
828 			error = EADDRNOTAVAIL;
829 			goto bad;
830 		}
831 		if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
832 			l2tp_putref_variant(var, &psref);
833 			curlwp_bindx(bound);
834 			error = EADDRNOTAVAIL;
835 			goto bad;
836 		}
837 
838 		/* copy src */
839 		src = var->lv_psrc;
840 		dst = (struct sockaddr *)
841 			&(((struct if_laddrreq *)data)->addr);
842 		size = sizeof(((struct if_laddrreq *)data)->addr);
843 		if (src->sa_len > size) {
844 			l2tp_putref_variant(var, &psref);
845 			curlwp_bindx(bound);
846 			return EINVAL;
847                 }
848 		sockaddr_copy(dst, src->sa_len, src);
849 
850 		/* copy dst */
851 		src = var->lv_pdst;
852 		dst = (struct sockaddr *)
853 			&(((struct if_laddrreq *)data)->dstaddr);
854 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
855 		if (src->sa_len > size) {
856 			l2tp_putref_variant(var, &psref);
857 			curlwp_bindx(bound);
858 			return EINVAL;
859                 }
860 		sockaddr_copy(dst, src->sa_len, src);
861 		l2tp_putref_variant(var, &psref);
862 		curlwp_bindx(bound);
863 		break;
864 
865 	case SIOCSL2TPSESSION:
866 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
867 			break;
868 
869 		/* session id must not zero */
870 		if (l2tpr.my_sess_id == 0 || l2tpr.peer_sess_id == 0)
871 			return EINVAL;
872 
873 		bound = curlwp_bind();
874 		var_tmp = l2tp_lookup_session_ref(l2tpr.my_sess_id, &psref);
875 		if (var_tmp != NULL) {
876 			/* duplicate session id */
877 			log(LOG_WARNING, "%s: duplicate session id %" PRIu32 " of %s\n",
878 				sc->l2tp_ec.ec_if.if_xname, l2tpr.my_sess_id,
879 				var_tmp->lv_softc->l2tp_ec.ec_if.if_xname);
880 			psref_release(&psref, &var_tmp->lv_psref,
881 			    lv_psref_class);
882 			curlwp_bindx(bound);
883 			return EINVAL;
884 		}
885 		curlwp_bindx(bound);
886 
887 		error = l2tp_set_session(sc, l2tpr.my_sess_id, l2tpr.peer_sess_id);
888 		break;
889 	case SIOCDL2TPSESSION:
890 		l2tp_clear_session(sc);
891 		break;
892 	case SIOCSL2TPCOOKIE:
893 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
894 			break;
895 
896 		error = l2tp_set_cookie(sc, l2tpr.my_cookie, l2tpr.my_cookie_len,
897 		    l2tpr.peer_cookie, l2tpr.peer_cookie_len);
898 		break;
899 	case SIOCDL2TPCOOKIE:
900 		l2tp_clear_cookie(sc);
901 		break;
902 	case SIOCSL2TPSTATE:
903 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
904 			break;
905 
906 		l2tp_set_state(sc, l2tpr.state);
907 		break;
908 	case SIOCGL2TP:
909 		/* get L2TPV3 session info */
910 		memset(&l2tpr, 0, sizeof(l2tpr));
911 
912 		bound = curlwp_bind();
913 		var = l2tp_getref_variant(sc, &psref);
914 		if (var == NULL) {
915 			curlwp_bindx(bound);
916 			error = EADDRNOTAVAIL;
917 			goto bad;
918 		}
919 
920 		l2tpr.state = var->lv_state;
921 		l2tpr.my_sess_id = var->lv_my_sess_id;
922 		l2tpr.peer_sess_id = var->lv_peer_sess_id;
923 		l2tpr.my_cookie = var->lv_my_cookie;
924 		l2tpr.my_cookie_len = var->lv_my_cookie_len;
925 		l2tpr.peer_cookie = var->lv_peer_cookie;
926 		l2tpr.peer_cookie_len = var->lv_peer_cookie_len;
927 		l2tp_putref_variant(var, &psref);
928 		curlwp_bindx(bound);
929 
930 		error = copyout(&l2tpr, ifr->ifr_data, sizeof(l2tpr));
931 		break;
932 
933 	default:
934 		error =	ifioctl_common(ifp, cmd, data);
935 		break;
936 	}
937  bad:
938 	return error;
939 }
940 
941 static int
942 l2tp_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
943 {
944 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
945 	    l2tp_ec.ec_if);
946 	struct sockaddr *osrc, *odst;
947 	struct sockaddr *nsrc, *ndst;
948 	struct l2tp_variant *ovar, *nvar;
949 	int error;
950 
951 	nsrc = sockaddr_dup(src, M_WAITOK);
952 	ndst = sockaddr_dup(dst, M_WAITOK);
953 
954 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
955 
956 	error = encap_lock_enter();
957 	if (error)
958 		goto error;
959 
960 	mutex_enter(&sc->l2tp_lock);
961 
962 	ovar = sc->l2tp_var;
963 	osrc = ovar->lv_psrc;
964 	odst = ovar->lv_pdst;
965 	*nvar = *ovar;
966 	psref_target_init(&nvar->lv_psref, lv_psref_class);
967 	nvar->lv_psrc = nsrc;
968 	nvar->lv_pdst = ndst;
969 	error = l2tp_encap_attach(nvar);
970 	if (error) {
971 		mutex_exit(&sc->l2tp_lock);
972 		encap_lock_exit();
973 		goto error;
974 	}
975 	membar_producer();
976 	l2tp_variant_update(sc, nvar);
977 
978 	mutex_exit(&sc->l2tp_lock);
979 
980 	(void)l2tp_encap_detach(ovar);
981 	encap_lock_exit();
982 
983 	if (osrc)
984 		sockaddr_free(osrc);
985 	if (odst)
986 		sockaddr_free(odst);
987 	kmem_free(ovar, sizeof(*ovar));
988 
989 	return 0;
990 
991 error:
992 	sockaddr_free(nsrc);
993 	sockaddr_free(ndst);
994 	kmem_free(nvar, sizeof(*nvar));
995 
996 	return error;
997 }
998 
999 static void
1000 l2tp_delete_tunnel(struct ifnet *ifp)
1001 {
1002 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
1003 	    l2tp_ec.ec_if);
1004 	struct sockaddr *osrc, *odst;
1005 	struct l2tp_variant *ovar, *nvar;
1006 	int error;
1007 
1008 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1009 
1010 	error = encap_lock_enter();
1011 	if (error) {
1012 		kmem_free(nvar, sizeof(*nvar));
1013 		return;
1014 	}
1015 	mutex_enter(&sc->l2tp_lock);
1016 
1017 	ovar = sc->l2tp_var;
1018 	osrc = ovar->lv_psrc;
1019 	odst = ovar->lv_pdst;
1020 	*nvar = *ovar;
1021 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1022 	nvar->lv_psrc = NULL;
1023 	nvar->lv_pdst = NULL;
1024 	membar_producer();
1025 	l2tp_variant_update(sc, nvar);
1026 
1027 	mutex_exit(&sc->l2tp_lock);
1028 
1029 	(void)l2tp_encap_detach(ovar);
1030 	encap_lock_exit();
1031 
1032 	if (osrc)
1033 		sockaddr_free(osrc);
1034 	if (odst)
1035 		sockaddr_free(odst);
1036 	kmem_free(ovar, sizeof(*ovar));
1037 }
1038 
1039 static int
1040 id_hash_func(uint32_t id, u_long mask)
1041 {
1042 	uint32_t hash;
1043 
1044 	hash = (id >> 16) ^ id;
1045 	hash = (hash >> 4) ^ hash;
1046 
1047 	return hash & mask;
1048 }
1049 
1050 static void
1051 l2tp_hash_init(void)
1052 {
1053 
1054 	l2tp_hash.lists = hashinit(L2TP_ID_HASH_SIZE, HASH_PSLIST, true,
1055 	    &l2tp_hash.mask);
1056 }
1057 
1058 static int
1059 l2tp_hash_fini(void)
1060 {
1061 	int i;
1062 
1063 	mutex_enter(&l2tp_hash.lock);
1064 
1065 	for (i = 0; i < l2tp_hash.mask + 1; i++) {
1066 		if (PSLIST_WRITER_FIRST(&l2tp_hash.lists[i], struct l2tp_softc,
1067 			l2tp_hash) != NULL) {
1068 			mutex_exit(&l2tp_hash.lock);
1069 			return EBUSY;
1070 		}
1071 	}
1072 	for (i = 0; i < l2tp_hash.mask + 1; i++)
1073 		PSLIST_DESTROY(&l2tp_hash.lists[i]);
1074 
1075 	mutex_exit(&l2tp_hash.lock);
1076 
1077 	hashdone(l2tp_hash.lists, HASH_PSLIST, l2tp_hash.mask);
1078 
1079 	return 0;
1080 }
1081 
1082 static int
1083 l2tp_set_session(struct l2tp_softc *sc, uint32_t my_sess_id,
1084     uint32_t peer_sess_id)
1085 {
1086 	uint32_t idx;
1087 	struct l2tp_variant *nvar;
1088 	struct l2tp_variant *ovar;
1089 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1090 
1091 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1092 
1093 	mutex_enter(&sc->l2tp_lock);
1094 	ovar = sc->l2tp_var;
1095 	*nvar = *ovar;
1096 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1097 	nvar->lv_my_sess_id = my_sess_id;
1098 	nvar->lv_peer_sess_id = peer_sess_id;
1099 	membar_producer();
1100 
1101 	mutex_enter(&l2tp_hash.lock);
1102 	if (ovar->lv_my_sess_id > 0 && ovar->lv_peer_sess_id > 0) {
1103 		PSLIST_WRITER_REMOVE(sc, l2tp_hash);
1104 		pserialize_perform(l2tp_psz);
1105 	}
1106 	mutex_exit(&l2tp_hash.lock);
1107 	PSLIST_ENTRY_DESTROY(sc, l2tp_hash);
1108 
1109 	l2tp_variant_update(sc, nvar);
1110 	mutex_exit(&sc->l2tp_lock);
1111 
1112 	idx = id_hash_func(nvar->lv_my_sess_id, l2tp_hash.mask);
1113 	if ((ifp->if_flags & IFF_DEBUG) != 0)
1114 		log(LOG_DEBUG, "%s: add hash entry: sess_id=%" PRIu32 ", idx=%" PRIu32 "\n",
1115 		    sc->l2tp_ec.ec_if.if_xname, nvar->lv_my_sess_id, idx);
1116 
1117 	PSLIST_ENTRY_INIT(sc, l2tp_hash);
1118 	mutex_enter(&l2tp_hash.lock);
1119 	PSLIST_WRITER_INSERT_HEAD(&l2tp_hash.lists[idx], sc, l2tp_hash);
1120 	mutex_exit(&l2tp_hash.lock);
1121 
1122 	kmem_free(ovar, sizeof(*ovar));
1123 	return 0;
1124 }
1125 
1126 static int
1127 l2tp_clear_session(struct l2tp_softc *sc)
1128 {
1129 	struct l2tp_variant *nvar;
1130 	struct l2tp_variant *ovar;
1131 
1132 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1133 
1134 	mutex_enter(&sc->l2tp_lock);
1135 	ovar = sc->l2tp_var;
1136 	*nvar = *ovar;
1137 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1138 	nvar->lv_my_sess_id = 0;
1139 	nvar->lv_peer_sess_id = 0;
1140 	membar_producer();
1141 
1142 	mutex_enter(&l2tp_hash.lock);
1143 	if (ovar->lv_my_sess_id > 0 && ovar->lv_peer_sess_id > 0) {
1144 		PSLIST_WRITER_REMOVE(sc, l2tp_hash);
1145 		pserialize_perform(l2tp_psz);
1146 	}
1147 	mutex_exit(&l2tp_hash.lock);
1148 
1149 	l2tp_variant_update(sc, nvar);
1150 	mutex_exit(&sc->l2tp_lock);
1151 	kmem_free(ovar, sizeof(*ovar));
1152 	return 0;
1153 }
1154 
1155 struct l2tp_variant *
1156 l2tp_lookup_session_ref(uint32_t id, struct psref *psref)
1157 {
1158 	int idx;
1159 	int s;
1160 	struct l2tp_softc *sc;
1161 
1162 	idx = id_hash_func(id, l2tp_hash.mask);
1163 
1164 	s = pserialize_read_enter();
1165 	PSLIST_READER_FOREACH(sc, &l2tp_hash.lists[idx], struct l2tp_softc,
1166 	    l2tp_hash) {
1167 		struct l2tp_variant *var = sc->l2tp_var;
1168 		if (var == NULL)
1169 			continue;
1170 		if (var->lv_my_sess_id != id)
1171 			continue;
1172 		psref_acquire(psref, &var->lv_psref, lv_psref_class);
1173 		pserialize_read_exit(s);
1174 		return var;
1175 	}
1176 	pserialize_read_exit(s);
1177 	return NULL;
1178 }
1179 
1180 /*
1181  * l2tp_variant update API.
1182  *
1183  * Assumption:
1184  * reader side dereferences sc->l2tp_var in reader critical section only,
1185  * that is, all of reader sides do not reader the sc->l2tp_var after
1186  * pserialize_perform().
1187  */
1188 static void
1189 l2tp_variant_update(struct l2tp_softc *sc, struct l2tp_variant *nvar)
1190 {
1191 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1192 	struct l2tp_variant *ovar = sc->l2tp_var;
1193 
1194 	KASSERT(mutex_owned(&sc->l2tp_lock));
1195 
1196 	sc->l2tp_var = nvar;
1197 	pserialize_perform(l2tp_psz);
1198 	psref_target_destroy(&ovar->lv_psref, lv_psref_class);
1199 
1200 	/*
1201 	 * In the manual of atomic_swap_ptr(3), there is no mention if 2nd
1202 	 * argument is rewrite or not. So, use sc->l2tp_var instead of nvar.
1203 	 */
1204 	if (sc->l2tp_var != NULL) {
1205 		if (sc->l2tp_var->lv_psrc != NULL
1206 		    && sc->l2tp_var->lv_pdst != NULL)
1207 			ifp->if_flags |= IFF_RUNNING;
1208 		else
1209 			ifp->if_flags &= ~IFF_RUNNING;
1210 	}
1211 }
1212 
1213 static int
1214 l2tp_set_cookie(struct l2tp_softc *sc, uint64_t my_cookie, u_int my_cookie_len,
1215     uint64_t peer_cookie, u_int peer_cookie_len)
1216 {
1217 	struct l2tp_variant *nvar;
1218 
1219 	if (my_cookie == 0 || peer_cookie == 0)
1220 		return EINVAL;
1221 
1222 	if (my_cookie_len != 4 && my_cookie_len != 8
1223 	    && peer_cookie_len != 4 && peer_cookie_len != 8)
1224 		return EINVAL;
1225 
1226 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1227 
1228 	mutex_enter(&sc->l2tp_lock);
1229 
1230 	*nvar = *sc->l2tp_var;
1231 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1232 	nvar->lv_my_cookie = my_cookie;
1233 	nvar->lv_my_cookie_len = my_cookie_len;
1234 	nvar->lv_peer_cookie = peer_cookie;
1235 	nvar->lv_peer_cookie_len = peer_cookie_len;
1236 	nvar->lv_use_cookie = L2TP_COOKIE_ON;
1237 	membar_producer();
1238 	l2tp_variant_update(sc, nvar);
1239 
1240 	mutex_exit(&sc->l2tp_lock);
1241 
1242 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1243 	if ((ifp->if_flags & IFF_DEBUG) != 0) {
1244 		log(LOG_DEBUG,
1245 		    "%s: set cookie: "
1246 		    "local cookie_len=%u local cookie=%" PRIu64 ", "
1247 		    "remote cookie_len=%u remote cookie=%" PRIu64 "\n",
1248 		    ifp->if_xname, my_cookie_len, my_cookie,
1249 		    peer_cookie_len, peer_cookie);
1250 	}
1251 
1252 	return 0;
1253 }
1254 
1255 static void
1256 l2tp_clear_cookie(struct l2tp_softc *sc)
1257 {
1258 	struct l2tp_variant *nvar;
1259 
1260 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1261 
1262 	mutex_enter(&sc->l2tp_lock);
1263 
1264 	*nvar = *sc->l2tp_var;
1265 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1266 	nvar->lv_my_cookie = 0;
1267 	nvar->lv_my_cookie_len = 0;
1268 	nvar->lv_peer_cookie = 0;
1269 	nvar->lv_peer_cookie_len = 0;
1270 	nvar->lv_use_cookie = L2TP_COOKIE_OFF;
1271 	membar_producer();
1272 	l2tp_variant_update(sc, nvar);
1273 
1274 	mutex_exit(&sc->l2tp_lock);
1275 }
1276 
1277 static void
1278 l2tp_set_state(struct l2tp_softc *sc, int state)
1279 {
1280 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1281 	struct l2tp_variant *nvar;
1282 
1283 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1284 
1285 	mutex_enter(&sc->l2tp_lock);
1286 
1287 	*nvar = *sc->l2tp_var;
1288 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1289 	nvar->lv_state = state;
1290 	membar_producer();
1291 	l2tp_variant_update(sc, nvar);
1292 
1293 	if (nvar->lv_state == L2TP_STATE_UP) {
1294 		ifp->if_link_state = LINK_STATE_UP;
1295 	} else {
1296 		ifp->if_link_state = LINK_STATE_DOWN;
1297 	}
1298 
1299 	mutex_exit(&sc->l2tp_lock);
1300 
1301 #ifdef NOTYET
1302 	vlan_linkstate_notify(ifp, ifp->if_link_state);
1303 #endif
1304 }
1305 
1306 static int
1307 l2tp_encap_attach(struct l2tp_variant *var)
1308 {
1309 	int error;
1310 
1311 	if (var == NULL || var->lv_psrc == NULL)
1312 		return EINVAL;
1313 
1314 	switch (var->lv_psrc->sa_family) {
1315 #ifdef INET
1316 	case AF_INET:
1317 		error = in_l2tp_attach(var);
1318 		break;
1319 #endif
1320 #ifdef INET6
1321 	case AF_INET6:
1322 		error = in6_l2tp_attach(var);
1323 		break;
1324 #endif
1325 	default:
1326 		error = EINVAL;
1327 		break;
1328 	}
1329 
1330 	return error;
1331 }
1332 
1333 static int
1334 l2tp_encap_detach(struct l2tp_variant *var)
1335 {
1336 	int error;
1337 
1338 	if (var == NULL || var->lv_psrc == NULL)
1339 		return EINVAL;
1340 
1341 	switch (var->lv_psrc->sa_family) {
1342 #ifdef INET
1343 	case AF_INET:
1344 		error = in_l2tp_detach(var);
1345 		break;
1346 #endif
1347 #ifdef INET6
1348 	case AF_INET6:
1349 		error = in6_l2tp_detach(var);
1350 		break;
1351 #endif
1352 	default:
1353 		error = EINVAL;
1354 		break;
1355 	}
1356 
1357 	return error;
1358 }
1359 
1360 int
1361 l2tp_check_nesting(struct ifnet *ifp, struct mbuf *m)
1362 {
1363 
1364 	return if_tunnel_check_nesting(ifp, m, max_l2tp_nesting);
1365 }
1366 
1367 /*
1368  * Module infrastructure
1369  */
1370 #include "if_module.h"
1371 
1372 IF_MODULE(MODULE_CLASS_DRIVER, l2tp, "")
1373 
1374 
1375 /* TODO: IP_TCPMSS support */
1376 #ifdef IP_TCPMSS
1377 static int l2tp_need_tcpmss_clamp(struct ifnet *);
1378 #ifdef INET
1379 static struct mbuf *l2tp_tcpmss4_clamp(struct ifnet *, struct mbuf *);
1380 #endif
1381 #ifdef INET6
1382 static struct mbuf *l2tp_tcpmss6_clamp(struct ifnet *, struct mbuf *);
1383 #endif
1384 
1385 struct mbuf *
1386 l2tp_tcpmss_clamp(struct ifnet *ifp, struct mbuf *m)
1387 {
1388 	struct ether_header *eh;
1389 	struct ether_vlan_header evh;
1390 
1391 	if (!l2tp_need_tcpmss_clamp(ifp)) {
1392 		return m;
1393 	}
1394 
1395 	if (m->m_pkthdr.len < sizeof(evh)) {
1396 		m_freem(m);
1397 		return NULL;
1398 	}
1399 
1400 	/* save ether header */
1401 	m_copydata(m, 0, sizeof(evh), (void *)&evh);
1402 	eh = (struct ether_header *)&evh;
1403 
1404 	switch (ntohs(eh->ether_type)) {
1405 	case ETHERTYPE_VLAN: /* Ether + VLAN */
1406 		if (m->m_pkthdr.len <= sizeof(struct ether_vlan_header))
1407 			break;
1408 		m_adj(m, sizeof(struct ether_vlan_header));
1409 		switch (ntohs(evh.evl_proto)) {
1410 #ifdef INET
1411 		case ETHERTYPE_IP: /* Ether + VLAN + IPv4 */
1412 			m = l2tp_tcpmss4_clamp(ifp, m);
1413 			if (m == NULL)
1414 				return NULL;
1415 			break;
1416 #endif /* INET */
1417 #ifdef INET6
1418 		case ETHERTYPE_IPV6: /* Ether + VLAN + IPv6 */
1419 			m = l2tp_tcpmss6_clamp(ifp, m);
1420 			if (m == NULL)
1421 				return NULL;
1422 			break;
1423 #endif /* INET6 */
1424 		default:
1425 			break;
1426 		}
1427 
1428 		/* restore ether header */
1429 		M_PREPEND(m, sizeof(struct ether_vlan_header),
1430 		    M_DONTWAIT);
1431 		if (m == NULL)
1432 			return NULL;
1433 		*mtod(m, struct ether_vlan_header *) = evh;
1434 		break;
1435 
1436 #ifdef INET
1437 	case ETHERTYPE_IP: /* Ether + IPv4 */
1438 		if (m->m_pkthdr.len <= sizeof(struct ether_header))
1439 			break;
1440 		m_adj(m, sizeof(struct ether_header));
1441 		m = l2tp_tcpmss4_clamp(ifp, m);
1442 		if (m == NULL)
1443 			return NULL;
1444 		/* restore ether header */
1445 		M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
1446 		if (m == NULL)
1447 			return NULL;
1448 		*mtod(m, struct ether_header *) = *eh;
1449 		break;
1450 #endif /* INET */
1451 
1452 #ifdef INET6
1453 	case ETHERTYPE_IPV6: /* Ether + IPv6 */
1454 		if (m->m_pkthdr.len <= sizeof(struct ether_header))
1455 			break;
1456 		m_adj(m, sizeof(struct ether_header));
1457 		m = l2tp_tcpmss6_clamp(ifp, m);
1458 		if (m == NULL)
1459 			return NULL;
1460 		/* restore ether header */
1461 		M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
1462 		if (m == NULL)
1463 			return NULL;
1464 		*mtod(m, struct ether_header *) = *eh;
1465 		break;
1466 #endif /* INET6 */
1467 
1468 	default:
1469 		break;
1470 	}
1471 
1472 	return m;
1473 }
1474 
1475 static int
1476 l2tp_need_tcpmss_clamp(struct ifnet *ifp)
1477 {
1478 	int ret = 0;
1479 
1480 #ifdef INET
1481 	if (ifp->if_tcpmss != 0)
1482 		ret = 1;
1483 #endif
1484 
1485 #ifdef INET6
1486 	if (ifp->if_tcpmss6 != 0)
1487 		ret = 1;
1488 #endif
1489 
1490 	return ret;
1491 }
1492 
1493 #ifdef INET
1494 static struct mbuf *
1495 l2tp_tcpmss4_clamp(struct ifnet *ifp, struct mbuf *m)
1496 {
1497 
1498 	if (ifp->if_tcpmss != 0) {
1499 		return ip_tcpmss(m, (ifp->if_tcpmss < 0) ?
1500 			ifp->if_mtu - IP_TCPMSS_EXTLEN :
1501 			ifp->if_tcpmss);
1502 	}
1503 	return m;
1504 }
1505 #endif /* INET */
1506 
1507 #ifdef INET6
1508 static struct mbuf *
1509 l2tp_tcpmss6_clamp(struct ifnet *ifp, struct mbuf *m)
1510 {
1511 	int ip6hdrlen;
1512 
1513 	if (ifp->if_tcpmss6 != 0 &&
1514 	    ip6_tcpmss_applicable(m, &ip6hdrlen)) {
1515 		return ip6_tcpmss(m, ip6hdrlen,
1516 			(ifp->if_tcpmss6 < 0) ?
1517 			ifp->if_mtu - IP6_TCPMSS_EXTLEN :
1518 			ifp->if_tcpmss6);
1519 	}
1520 	return m;
1521 }
1522 #endif /* INET6 */
1523 
1524 #endif /* IP_TCPMSS */
1525