xref: /netbsd-src/sys/net/if_l2tp.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: if_l2tp.c,v 1.43 2020/02/01 12:54:50 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * L2TPv3 kernel interface
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: if_l2tp.c,v 1.43 2020/02/01 12:54:50 riastradh Exp $");
35 
36 #ifdef _KERNEL_OPT
37 #include "opt_inet.h"
38 #include "opt_net_mpsafe.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/mbuf.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/errno.h>
48 #include <sys/ioctl.h>
49 #include <sys/time.h>
50 #include <sys/syslog.h>
51 #include <sys/proc.h>
52 #include <sys/conf.h>
53 #include <sys/kauth.h>
54 #include <sys/cpu.h>
55 #include <sys/cprng.h>
56 #include <sys/intr.h>
57 #include <sys/kmem.h>
58 #include <sys/mutex.h>
59 #include <sys/atomic.h>
60 #include <sys/pserialize.h>
61 #include <sys/device.h>
62 #include <sys/module.h>
63 
64 #include <net/if.h>
65 #include <net/if_dl.h>
66 #include <net/if_ether.h>
67 #include <net/if_types.h>
68 #include <net/netisr.h>
69 #include <net/route.h>
70 #include <net/bpf.h>
71 #include <net/if_vlanvar.h>
72 
73 #include <netinet/in.h>
74 #include <netinet/in_systm.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip_encap.h>
77 #ifdef	INET
78 #include <netinet/in_var.h>
79 #include <netinet/in_l2tp.h>
80 #endif	/* INET */
81 #ifdef INET6
82 #include <netinet6/in6_l2tp.h>
83 #endif
84 
85 #include <net/if_l2tp.h>
86 
87 #include <net/if_vlanvar.h>
88 
89 /* TODO: IP_TCPMSS support */
90 #undef IP_TCPMSS
91 #ifdef IP_TCPMSS
92 #include <netinet/ip_tcpmss.h>
93 #endif
94 
95 /*
96  * l2tp global variable definitions
97  */
98 static struct {
99 	LIST_HEAD(l2tp_sclist, l2tp_softc) list;
100 	kmutex_t lock;
101 } l2tp_softcs __cacheline_aligned;
102 
103 
104 #if !defined(L2TP_ID_HASH_SIZE)
105 #define L2TP_ID_HASH_SIZE 64
106 #endif
107 static struct {
108 	kmutex_t lock;
109 	struct pslist_head *lists;
110 	u_long mask;
111 } l2tp_hash __cacheline_aligned = {
112 	.lists = NULL,
113 };
114 
115 pserialize_t l2tp_psz __read_mostly;
116 struct psref_class *lv_psref_class __read_mostly;
117 
118 static void	l2tp_ifq_init_pc(void *, void *, struct cpu_info *);
119 static void	l2tp_ifq_fini_pc(void *, void *, struct cpu_info *);
120 
121 static int	l2tp_clone_create(struct if_clone *, int);
122 static int	l2tp_clone_destroy(struct ifnet *);
123 
124 struct if_clone l2tp_cloner =
125     IF_CLONE_INITIALIZER("l2tp", l2tp_clone_create, l2tp_clone_destroy);
126 
127 static int	l2tp_tx_enqueue(struct l2tp_variant *, struct mbuf *);
128 static int	l2tp_output(struct ifnet *, struct mbuf *,
129 		    const struct sockaddr *, const struct rtentry *);
130 static void	l2tp_sendit(struct l2tp_variant *, struct mbuf *);
131 static void	l2tpintr(struct l2tp_variant *);
132 static void	l2tpintr_softint(void *);
133 
134 static void	l2tp_hash_init(void);
135 static int	l2tp_hash_fini(void);
136 
137 static void	l2tp_start(struct ifnet *);
138 static int	l2tp_transmit(struct ifnet *, struct mbuf *);
139 
140 static int	l2tp_set_tunnel(struct ifnet *, struct sockaddr *,
141 		    struct sockaddr *);
142 static void	l2tp_delete_tunnel(struct ifnet *);
143 
144 static int	id_hash_func(uint32_t, u_long);
145 
146 static void	l2tp_variant_update(struct l2tp_softc *, struct l2tp_variant *);
147 static int	l2tp_set_session(struct l2tp_softc *, uint32_t, uint32_t);
148 static int	l2tp_clear_session(struct l2tp_softc *);
149 static int	l2tp_set_cookie(struct l2tp_softc *, uint64_t, u_int, uint64_t, u_int);
150 static void	l2tp_clear_cookie(struct l2tp_softc *);
151 static void	l2tp_set_state(struct l2tp_softc *, int);
152 static int	l2tp_encap_attach(struct l2tp_variant *);
153 static int	l2tp_encap_detach(struct l2tp_variant *);
154 
155 static inline struct ifqueue *
156 l2tp_ifq_percpu_getref(percpu_t *pc)
157 {
158 
159 	return *(struct ifqueue **)percpu_getref(pc);
160 }
161 
162 static inline void
163 l2tp_ifq_percpu_putref(percpu_t *pc)
164 {
165 
166 	percpu_putref(pc);
167 }
168 
169 #ifndef MAX_L2TP_NEST
170 /*
171  * This macro controls the upper limitation on nesting of l2tp tunnels.
172  * Since, setting a large value to this macro with a careless configuration
173  * may introduce system crash, we don't allow any nestings by default.
174  * If you need to configure nested l2tp tunnels, you can define this macro
175  * in your kernel configuration file.  However, if you do so, please be
176  * careful to configure the tunnels so that it won't make a loop.
177  */
178 /*
179  * XXX
180  * Currently, if in_l2tp_output recursively calls, it causes locking against
181  * myself of struct l2tp_ro->lr_lock. So, nested l2tp tunnels is prohibited.
182  */
183 #define MAX_L2TP_NEST 0
184 #endif
185 
186 static int max_l2tp_nesting = MAX_L2TP_NEST;
187 
188 /* ARGSUSED */
189 void
190 l2tpattach(int count)
191 {
192 	/*
193 	 * Nothing to do here, initialization is handled by the
194 	 * module initialization code in l2tpinit() below).
195 	 */
196 }
197 
198 static void
199 l2tpinit(void)
200 {
201 
202 	mutex_init(&l2tp_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
203 	LIST_INIT(&l2tp_softcs.list);
204 
205 	mutex_init(&l2tp_hash.lock, MUTEX_DEFAULT, IPL_NONE);
206 	l2tp_psz = pserialize_create();
207 	lv_psref_class = psref_class_create("l2tpvar", IPL_SOFTNET);
208 	if_clone_attach(&l2tp_cloner);
209 
210 	l2tp_hash_init();
211 }
212 
213 static int
214 l2tpdetach(void)
215 {
216 	int error;
217 
218 	mutex_enter(&l2tp_softcs.lock);
219 	if (!LIST_EMPTY(&l2tp_softcs.list)) {
220 		mutex_exit(&l2tp_softcs.lock);
221 		return EBUSY;
222 	}
223 	mutex_exit(&l2tp_softcs.lock);
224 
225 	error = l2tp_hash_fini();
226 	if (error)
227 		return error;
228 
229 	if_clone_detach(&l2tp_cloner);
230 	psref_class_destroy(lv_psref_class);
231 	pserialize_destroy(l2tp_psz);
232 	mutex_destroy(&l2tp_hash.lock);
233 
234 	mutex_destroy(&l2tp_softcs.lock);
235 
236 	return error;
237 }
238 
239 static int
240 l2tp_clone_create(struct if_clone *ifc, int unit)
241 {
242 	struct l2tp_softc *sc;
243 	struct l2tp_variant *var;
244 	int rv;
245 	u_int si_flags = SOFTINT_NET;
246 #ifdef NET_MPSAFE
247 	si_flags |= SOFTINT_MPSAFE;
248 #endif
249 	sc = kmem_zalloc(sizeof(struct l2tp_softc), KM_SLEEP);
250 	if_initname(&sc->l2tp_ec.ec_if, ifc->ifc_name, unit);
251 	rv = l2tpattach0(sc);
252 	if (rv != 0) {
253 		kmem_free(sc, sizeof(struct l2tp_softc));
254 		return rv;
255 	}
256 
257 	var = kmem_zalloc(sizeof(struct l2tp_variant), KM_SLEEP);
258 	var->lv_softc = sc;
259 	var->lv_state = L2TP_STATE_DOWN;
260 	var->lv_use_cookie = L2TP_COOKIE_OFF;
261 	psref_target_init(&var->lv_psref, lv_psref_class);
262 
263 	sc->l2tp_var = var;
264 	mutex_init(&sc->l2tp_lock, MUTEX_DEFAULT, IPL_NONE);
265 	sc->l2tp_psz = pserialize_create();
266 	PSLIST_ENTRY_INIT(sc, l2tp_hash);
267 
268 	sc->l2tp_ro_percpu = if_tunnel_alloc_ro_percpu();
269 
270 	sc->l2tp_ifq_percpu = percpu_create(sizeof(struct ifqueue *),
271 	    l2tp_ifq_init_pc, l2tp_ifq_fini_pc, NULL);
272 	sc->l2tp_si = softint_establish(si_flags, l2tpintr_softint, sc);
273 
274 	mutex_enter(&l2tp_softcs.lock);
275 	LIST_INSERT_HEAD(&l2tp_softcs.list, sc, l2tp_list);
276 	mutex_exit(&l2tp_softcs.lock);
277 
278 	return (0);
279 }
280 
281 int
282 l2tpattach0(struct l2tp_softc *sc)
283 {
284 	int rv;
285 
286 	sc->l2tp_ec.ec_if.if_addrlen = 0;
287 	sc->l2tp_ec.ec_if.if_mtu    = L2TP_MTU;
288 	sc->l2tp_ec.ec_if.if_flags  = IFF_POINTOPOINT|IFF_MULTICAST|IFF_SIMPLEX;
289 	sc->l2tp_ec.ec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE;
290 #ifdef NET_MPSAFE
291 	sc->l2tp_ec.ec_if.if_extflags |= IFEF_MPSAFE;
292 #endif
293 	sc->l2tp_ec.ec_if.if_ioctl  = l2tp_ioctl;
294 	sc->l2tp_ec.ec_if.if_output = l2tp_output;
295 	sc->l2tp_ec.ec_if.if_type   = IFT_L2TP;
296 	sc->l2tp_ec.ec_if.if_dlt    = DLT_NULL;
297 	sc->l2tp_ec.ec_if.if_start  = l2tp_start;
298 	sc->l2tp_ec.ec_if.if_transmit = l2tp_transmit;
299 	sc->l2tp_ec.ec_if._if_input = ether_input;
300 	IFQ_SET_READY(&sc->l2tp_ec.ec_if.if_snd);
301 
302 #ifdef MBUFTRACE
303 	struct ethercom *ec = &sc->l2tp_ec;
304 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
305 
306 	strlcpy(ec->ec_tx_mowner.mo_name, ifp->if_xname,
307 	    sizeof(ec->ec_tx_mowner.mo_name));
308 	strlcpy(ec->ec_tx_mowner.mo_descr, "tx",
309 	    sizeof(ec->ec_tx_mowner.mo_descr));
310 	strlcpy(ec->ec_rx_mowner.mo_name, ifp->if_xname,
311 	    sizeof(ec->ec_rx_mowner.mo_name));
312 	strlcpy(ec->ec_rx_mowner.mo_descr, "rx",
313 	    sizeof(ec->ec_rx_mowner.mo_descr));
314 	MOWNER_ATTACH(&ec->ec_tx_mowner);
315 	MOWNER_ATTACH(&ec->ec_rx_mowner);
316 	ifp->if_mowner = &ec->ec_tx_mowner;
317 #endif
318 
319 	/* XXX
320 	 * It may improve performance to use if_initialize()/if_register()
321 	 * so that l2tp_input() calls if_input() instead of
322 	 * if_percpuq_enqueue(). However, that causes recursive softnet_lock
323 	 * when NET_MPSAFE is not set.
324 	 */
325 	rv = if_attach(&sc->l2tp_ec.ec_if);
326 	if (rv != 0)
327 		return rv;
328 	if_alloc_sadl(&sc->l2tp_ec.ec_if);
329 	bpf_attach(&sc->l2tp_ec.ec_if, DLT_EN10MB, sizeof(struct ether_header));
330 
331 	return 0;
332 }
333 
334 void
335 l2tp_ifq_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
336 {
337 	struct ifqueue **ifqp = p;
338 
339 	*ifqp = kmem_zalloc(sizeof(**ifqp), KM_SLEEP);
340 	(*ifqp)->ifq_maxlen = IFQ_MAXLEN;
341 }
342 
343 void
344 l2tp_ifq_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
345 {
346 	struct ifqueue **ifqp = p;
347 
348 	kmem_free(*ifqp, sizeof(**ifqp));
349 }
350 
351 static int
352 l2tp_clone_destroy(struct ifnet *ifp)
353 {
354 	struct l2tp_variant *var;
355 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
356 	    l2tp_ec.ec_if);
357 
358 	l2tp_clear_session(sc);
359 	l2tp_delete_tunnel(&sc->l2tp_ec.ec_if);
360 	/*
361 	 * To avoid for l2tp_transmit() and l2tpintr_softint() to access
362 	 * sc->l2tp_var after free it.
363 	 */
364 	mutex_enter(&sc->l2tp_lock);
365 	var = sc->l2tp_var;
366 	l2tp_variant_update(sc, NULL);
367 	mutex_exit(&sc->l2tp_lock);
368 
369 	softint_disestablish(sc->l2tp_si);
370 	percpu_free(sc->l2tp_ifq_percpu, sizeof(struct ifqueue *));
371 
372 	mutex_enter(&l2tp_softcs.lock);
373 	LIST_REMOVE(sc, l2tp_list);
374 	mutex_exit(&l2tp_softcs.lock);
375 
376 	bpf_detach(ifp);
377 
378 	if_detach(ifp);
379 
380 	if_tunnel_free_ro_percpu(sc->l2tp_ro_percpu);
381 
382 	kmem_free(var, sizeof(struct l2tp_variant));
383 	pserialize_destroy(sc->l2tp_psz);
384 	mutex_destroy(&sc->l2tp_lock);
385 	kmem_free(sc, sizeof(struct l2tp_softc));
386 
387 	return 0;
388 }
389 
390 static int
391 l2tp_tx_enqueue(struct l2tp_variant *var, struct mbuf *m)
392 {
393 	struct l2tp_softc *sc;
394 	struct ifnet *ifp;
395 	struct ifqueue *ifq;
396 	int s;
397 
398 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
399 
400 	sc = var->lv_softc;
401 	ifp = &sc->l2tp_ec.ec_if;
402 
403 	s = splsoftnet();
404 	ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
405 	if (IF_QFULL(ifq)) {
406 		if_statinc(ifp, if_oerrors);
407 		l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
408 		splx(s);
409 		m_freem(m);
410 		return ENOBUFS;
411 	}
412 
413 	IF_ENQUEUE(ifq, m);
414 	percpu_putref(sc->l2tp_ifq_percpu);
415 	softint_schedule(sc->l2tp_si);
416 	/* counter is incremented in l2tpintr() */
417 	splx(s);
418 	return 0;
419 }
420 
421 static int
422 l2tp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
423     const struct rtentry *rt)
424 {
425 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
426 	    l2tp_ec.ec_if);
427 	struct l2tp_variant *var;
428 	struct psref psref;
429 	int error = 0;
430 
431 	var = l2tp_getref_variant(sc, &psref);
432 	if (var == NULL) {
433 		m_freem(m);
434 		return ENETDOWN;
435 	}
436 
437 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
438 
439 	m->m_flags &= ~(M_BCAST|M_MCAST);
440 
441 	if ((ifp->if_flags & IFF_UP) == 0) {
442 		m_freem(m);
443 		error = ENETDOWN;
444 		goto end;
445 	}
446 
447 	if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
448 		m_freem(m);
449 		error = ENETDOWN;
450 		goto end;
451 	}
452 
453 	/* XXX should we check if our outer source is legal? */
454 
455 	/* use DLT_NULL encapsulation here to pass inner af type */
456 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
457 	if (!m) {
458 		error = ENOBUFS;
459 		goto end;
460 	}
461 	*mtod(m, int *) = dst->sa_family;
462 
463 	error = l2tp_tx_enqueue(var, m);
464 end:
465 	l2tp_putref_variant(var, &psref);
466 	if (error)
467 		if_statinc(ifp, if_oerrors);
468 
469 	return error;
470 }
471 
472 static void
473 l2tp_sendit(struct l2tp_variant *var, struct mbuf *m)
474 {
475 	int len;
476 	int error;
477 	struct l2tp_softc *sc;
478 	struct ifnet *ifp;
479 
480 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
481 
482 	sc = var->lv_softc;
483 	ifp = &sc->l2tp_ec.ec_if;
484 
485 	len = m->m_pkthdr.len;
486 	m->m_flags &= ~(M_BCAST|M_MCAST);
487 	bpf_mtap(ifp, m, BPF_D_OUT);
488 
489 	switch (var->lv_psrc->sa_family) {
490 #ifdef INET
491 	case AF_INET:
492 		error = in_l2tp_output(var, m);
493 		break;
494 #endif
495 #ifdef INET6
496 	case AF_INET6:
497 		error = in6_l2tp_output(var, m);
498 		break;
499 #endif
500 	default:
501 		m_freem(m);
502 		error = ENETDOWN;
503 		break;
504 	}
505 	if (error) {
506 		if_statinc(ifp, if_oerrors);
507 	} else {
508 		if_statadd2(ifp, if_opackets, 1, if_obytes, len);
509 	}
510 }
511 
512 static void
513 l2tpintr(struct l2tp_variant *var)
514 {
515 	struct l2tp_softc *sc;
516 	struct ifnet *ifp;
517 	struct mbuf *m;
518 	struct ifqueue *ifq;
519 	u_int cpuid = cpu_index(curcpu());
520 
521 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
522 
523 	sc = var->lv_softc;
524 	ifp = &sc->l2tp_ec.ec_if;
525 
526 	/* output processing */
527 	if (var->lv_my_sess_id == 0 || var->lv_peer_sess_id == 0) {
528 		ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
529 		IF_PURGE(ifq);
530 		l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
531 		if (cpuid == 0)
532 			IFQ_PURGE(&ifp->if_snd);
533 		return;
534 	}
535 
536 	/* Currently, l2tpintr() is always called in softint context. */
537 	ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
538 	for (;;) {
539 		IF_DEQUEUE(ifq, m);
540 		if (m != NULL)
541 			l2tp_sendit(var, m);
542 		else
543 			break;
544 	}
545 	l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
546 
547 	if (cpuid == 0) {
548 		for (;;) {
549 			IFQ_DEQUEUE(&ifp->if_snd, m);
550 			if (m != NULL)
551 				l2tp_sendit(var, m);
552 			else
553 				break;
554 		}
555 	}
556 }
557 
558 static void
559 l2tpintr_softint(void *arg)
560 {
561 	struct l2tp_variant *var;
562 	struct psref psref;
563 	struct l2tp_softc *sc = arg;
564 
565 	var = l2tp_getref_variant(sc, &psref);
566 	if (var == NULL)
567 		return;
568 
569 	l2tpintr(var);
570 	l2tp_putref_variant(var, &psref);
571 }
572 
573 void
574 l2tp_input(struct mbuf *m, struct ifnet *ifp)
575 {
576 	vaddr_t addr;
577 
578 	KASSERT(ifp != NULL);
579 
580 	/*
581 	 * Currently, l2tp(4) supports only ethernet as inner protocol.
582 	 */
583 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
584 		m_freem(m);
585 		return;
586 	}
587 
588 	/*
589 	 * If the head of the payload is not aligned, align it.
590 	 */
591 	addr = mtod(m, vaddr_t);
592 	if ((addr & 0x03) != 0x2) {
593 		/* copy and align head of payload */
594 		struct mbuf *m_head;
595 		int copy_length;
596 		u_int pad = roundup(sizeof(struct ether_header), 4)
597 			- sizeof(struct ether_header);
598 
599 #define L2TP_COPY_LENGTH		60
600 
601 		if (m->m_pkthdr.len < L2TP_COPY_LENGTH) {
602 			copy_length = m->m_pkthdr.len;
603 		} else {
604 			copy_length = L2TP_COPY_LENGTH;
605 		}
606 
607 		if (m->m_len < copy_length) {
608 			m = m_pullup(m, copy_length);
609 			if (m == NULL)
610 				return;
611 		}
612 
613 		MGETHDR(m_head, M_DONTWAIT, MT_HEADER);
614 		if (m_head == NULL) {
615 			m_freem(m);
616 			return;
617 		}
618 		m_move_pkthdr(m_head, m);
619 
620 		/*
621 		 * m_head should be:
622 		 *                             L2TP_COPY_LENGTH
623 		 *                          <-  + roundup(pad, 4) - pad ->
624 		 *   +-------+--------+-----+--------------+-------------+
625 		 *   | m_hdr | pkthdr | ... | ether header |   payload   |
626 		 *   +-------+--------+-----+--------------+-------------+
627 		 *                          ^              ^
628 		 *                          m_data         4 byte aligned
629 		 */
630 		m_align(m_head, L2TP_COPY_LENGTH + roundup(pad, 4));
631 		m_head->m_data += pad;
632 
633 		memcpy(mtod(m_head, void *), mtod(m, void *), copy_length);
634 		m_head->m_len = copy_length;
635 		m->m_data += copy_length;
636 		m->m_len -= copy_length;
637 
638 		/* construct chain */
639 		if (m->m_len == 0) {
640 			m_head->m_next = m_free(m);
641 		} else {
642 			m_head->m_next = m;
643 		}
644 
645 		/* override m */
646 		m = m_head;
647 	}
648 
649 	m_set_rcvif(m, ifp);
650 
651 	/*
652 	 * bpf_mtap() and ifp->if_ipackets++ is done in if_input()
653 	 *
654 	 * obytes is incremented at ether_output() or bridge_enqueue().
655 	 */
656 	if_percpuq_enqueue(ifp->if_percpuq, m);
657 }
658 
659 void
660 l2tp_start(struct ifnet *ifp)
661 {
662 	struct psref psref;
663 	struct l2tp_variant *var;
664 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
665 	    l2tp_ec.ec_if);
666 
667 	var = l2tp_getref_variant(sc, &psref);
668 	if (var == NULL)
669 		return;
670 
671 	if (var->lv_psrc == NULL || var->lv_pdst == NULL)
672 		return;
673 
674 	kpreempt_disable();
675 	softint_schedule(sc->l2tp_si);
676 	kpreempt_enable();
677 	l2tp_putref_variant(var, &psref);
678 }
679 
680 int
681 l2tp_transmit(struct ifnet *ifp, struct mbuf *m)
682 {
683 	int error;
684 	struct psref psref;
685 	struct l2tp_variant *var;
686 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
687 	    l2tp_ec.ec_if);
688 
689 	var = l2tp_getref_variant(sc, &psref);
690 	if (var == NULL) {
691 		m_freem(m);
692 		return ENETDOWN;
693 	}
694 
695 	if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
696 		m_freem(m);
697 		error = ENETDOWN;
698 		goto out;
699 	}
700 
701 	m->m_flags &= ~(M_BCAST|M_MCAST);
702 
703 	error = l2tp_tx_enqueue(var, m);
704 out:
705 	l2tp_putref_variant(var, &psref);
706 	return error;
707 }
708 
709 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
710 int
711 l2tp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
712 {
713 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
714 	    l2tp_ec.ec_if);
715 	struct l2tp_variant *var, *var_tmp;
716 	struct ifreq     *ifr = data;
717 	int error = 0, size;
718 	struct sockaddr *dst, *src;
719 	struct l2tp_req l2tpr;
720 	u_long mtu;
721 	int bound;
722 	struct psref psref;
723 
724 	switch (cmd) {
725 	case SIOCSIFADDR:
726 		ifp->if_flags |= IFF_UP;
727 		break;
728 
729 	case SIOCSIFDSTADDR:
730 		break;
731 
732 	case SIOCADDMULTI:
733 	case SIOCDELMULTI:
734 		switch (ifr->ifr_addr.sa_family) {
735 #ifdef INET
736 		case AF_INET:	/* IP supports Multicast */
737 			break;
738 #endif /* INET */
739 #ifdef INET6
740 		case AF_INET6:	/* IP6 supports Multicast */
741 			break;
742 #endif /* INET6 */
743 		default:  /* Other protocols doesn't support Multicast */
744 			error = EAFNOSUPPORT;
745 			break;
746 		}
747 		break;
748 
749 	case SIOCSIFMTU:
750 		mtu = ifr->ifr_mtu;
751 		if (mtu < L2TP_MTU_MIN || mtu > L2TP_MTU_MAX)
752 			return (EINVAL);
753 		ifp->if_mtu = mtu;
754 		break;
755 
756 #ifdef INET
757 	case SIOCSIFPHYADDR:
758 		src = (struct sockaddr *)
759 			&(((struct in_aliasreq *)data)->ifra_addr);
760 		dst = (struct sockaddr *)
761 			&(((struct in_aliasreq *)data)->ifra_dstaddr);
762 		if (src->sa_family != AF_INET || dst->sa_family != AF_INET)
763 			return EAFNOSUPPORT;
764 		else if (src->sa_len != sizeof(struct sockaddr_in)
765 		    || dst->sa_len != sizeof(struct sockaddr_in))
766 			return EINVAL;
767 
768 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
769 		break;
770 
771 #endif /* INET */
772 #ifdef INET6
773 	case SIOCSIFPHYADDR_IN6:
774 		src = (struct sockaddr *)
775 			&(((struct in6_aliasreq *)data)->ifra_addr);
776 		dst = (struct sockaddr *)
777 			&(((struct in6_aliasreq *)data)->ifra_dstaddr);
778 		if (src->sa_family != AF_INET6 || dst->sa_family != AF_INET6)
779 			return EAFNOSUPPORT;
780 		else if (src->sa_len != sizeof(struct sockaddr_in6)
781 		    || dst->sa_len != sizeof(struct sockaddr_in6))
782 			return EINVAL;
783 
784 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
785 		break;
786 
787 #endif /* INET6 */
788 	case SIOCSLIFPHYADDR:
789 		src = (struct sockaddr *)
790 			&(((struct if_laddrreq *)data)->addr);
791 		dst = (struct sockaddr *)
792 			&(((struct if_laddrreq *)data)->dstaddr);
793 		if (src->sa_family != dst->sa_family)
794 			return EINVAL;
795 		else if (src->sa_family == AF_INET
796 		    && src->sa_len != sizeof(struct sockaddr_in))
797 			return EINVAL;
798 		else if (src->sa_family == AF_INET6
799 		    && src->sa_len != sizeof(struct sockaddr_in6))
800 			return EINVAL;
801 		else if (dst->sa_family == AF_INET
802 		    && dst->sa_len != sizeof(struct sockaddr_in))
803 			return EINVAL;
804 		else if (dst->sa_family == AF_INET6
805 		    && dst->sa_len != sizeof(struct sockaddr_in6))
806 			return EINVAL;
807 
808 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
809 		break;
810 
811 	case SIOCDIFPHYADDR:
812 		l2tp_delete_tunnel(&sc->l2tp_ec.ec_if);
813 		break;
814 
815 	case SIOCGIFPSRCADDR:
816 #ifdef INET6
817 	case SIOCGIFPSRCADDR_IN6:
818 #endif /* INET6 */
819 		bound = curlwp_bind();
820 		var = l2tp_getref_variant(sc, &psref);
821 		if (var == NULL) {
822 			curlwp_bindx(bound);
823 			error = EADDRNOTAVAIL;
824 			goto bad;
825 		}
826 		if (var->lv_psrc == NULL) {
827 			l2tp_putref_variant(var, &psref);
828 			curlwp_bindx(bound);
829 			error = EADDRNOTAVAIL;
830 			goto bad;
831 		}
832 		src = var->lv_psrc;
833 		switch (cmd) {
834 #ifdef INET
835 		case SIOCGIFPSRCADDR:
836 			dst = &ifr->ifr_addr;
837 			size = sizeof(ifr->ifr_addr);
838 			break;
839 #endif /* INET */
840 #ifdef INET6
841 		case SIOCGIFPSRCADDR_IN6:
842 			dst = (struct sockaddr *)
843 				&(((struct in6_ifreq *)data)->ifr_addr);
844 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
845 			break;
846 #endif /* INET6 */
847 		default:
848 			l2tp_putref_variant(var, &psref);
849 			curlwp_bindx(bound);
850 			error = EADDRNOTAVAIL;
851 			goto bad;
852 		}
853 		if (src->sa_len > size) {
854 			l2tp_putref_variant(var, &psref);
855 			curlwp_bindx(bound);
856 			return EINVAL;
857 		}
858 		sockaddr_copy(dst, src->sa_len, src);
859 		l2tp_putref_variant(var, &psref);
860 		curlwp_bindx(bound);
861 		break;
862 
863 	case SIOCGIFPDSTADDR:
864 #ifdef INET6
865 	case SIOCGIFPDSTADDR_IN6:
866 #endif /* INET6 */
867 		bound = curlwp_bind();
868 		var = l2tp_getref_variant(sc, &psref);
869 		if (var == NULL) {
870 			curlwp_bindx(bound);
871 			error = EADDRNOTAVAIL;
872 			goto bad;
873 		}
874 		if (var->lv_pdst == NULL) {
875 			l2tp_putref_variant(var, &psref);
876 			curlwp_bindx(bound);
877 			error = EADDRNOTAVAIL;
878 			goto bad;
879 		}
880 		src = var->lv_pdst;
881 		switch (cmd) {
882 #ifdef INET
883 		case SIOCGIFPDSTADDR:
884 			dst = &ifr->ifr_addr;
885 			size = sizeof(ifr->ifr_addr);
886 			break;
887 #endif /* INET */
888 #ifdef INET6
889 		case SIOCGIFPDSTADDR_IN6:
890 			dst = (struct sockaddr *)
891 				&(((struct in6_ifreq *)data)->ifr_addr);
892 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
893 			break;
894 #endif /* INET6 */
895 		default:
896 			l2tp_putref_variant(var, &psref);
897 			curlwp_bindx(bound);
898 			error = EADDRNOTAVAIL;
899 			goto bad;
900 		}
901 		if (src->sa_len > size) {
902 			l2tp_putref_variant(var, &psref);
903 			curlwp_bindx(bound);
904 			return EINVAL;
905 		}
906 		sockaddr_copy(dst, src->sa_len, src);
907 		l2tp_putref_variant(var, &psref);
908 		curlwp_bindx(bound);
909 		break;
910 
911 	case SIOCGLIFPHYADDR:
912 		bound = curlwp_bind();
913 		var = l2tp_getref_variant(sc, &psref);
914 		if (var == NULL) {
915 			curlwp_bindx(bound);
916 			error = EADDRNOTAVAIL;
917 			goto bad;
918 		}
919 		if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
920 			l2tp_putref_variant(var, &psref);
921 			curlwp_bindx(bound);
922 			error = EADDRNOTAVAIL;
923 			goto bad;
924 		}
925 
926 		/* copy src */
927 		src = var->lv_psrc;
928 		dst = (struct sockaddr *)
929 			&(((struct if_laddrreq *)data)->addr);
930 		size = sizeof(((struct if_laddrreq *)data)->addr);
931 		if (src->sa_len > size) {
932 			l2tp_putref_variant(var, &psref);
933 			curlwp_bindx(bound);
934 			return EINVAL;
935                 }
936 		sockaddr_copy(dst, src->sa_len, src);
937 
938 		/* copy dst */
939 		src = var->lv_pdst;
940 		dst = (struct sockaddr *)
941 			&(((struct if_laddrreq *)data)->dstaddr);
942 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
943 		if (src->sa_len > size) {
944 			l2tp_putref_variant(var, &psref);
945 			curlwp_bindx(bound);
946 			return EINVAL;
947                 }
948 		sockaddr_copy(dst, src->sa_len, src);
949 		l2tp_putref_variant(var, &psref);
950 		curlwp_bindx(bound);
951 		break;
952 
953 	case SIOCSL2TPSESSION:
954 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
955 			break;
956 
957 		/* session id must not zero */
958 		if (l2tpr.my_sess_id == 0 || l2tpr.peer_sess_id == 0)
959 			return EINVAL;
960 
961 		bound = curlwp_bind();
962 		var_tmp = l2tp_lookup_session_ref(l2tpr.my_sess_id, &psref);
963 		if (var_tmp != NULL) {
964 			/* duplicate session id */
965 			log(LOG_WARNING, "%s: duplicate session id %" PRIu32 " of %s\n",
966 				sc->l2tp_ec.ec_if.if_xname, l2tpr.my_sess_id,
967 				var_tmp->lv_softc->l2tp_ec.ec_if.if_xname);
968 			psref_release(&psref, &var_tmp->lv_psref,
969 			    lv_psref_class);
970 			curlwp_bindx(bound);
971 			return EINVAL;
972 		}
973 		curlwp_bindx(bound);
974 
975 		error = l2tp_set_session(sc, l2tpr.my_sess_id, l2tpr.peer_sess_id);
976 		break;
977 	case SIOCDL2TPSESSION:
978 		l2tp_clear_session(sc);
979 		break;
980 	case SIOCSL2TPCOOKIE:
981 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
982 			break;
983 
984 		error = l2tp_set_cookie(sc, l2tpr.my_cookie, l2tpr.my_cookie_len,
985 		    l2tpr.peer_cookie, l2tpr.peer_cookie_len);
986 		break;
987 	case SIOCDL2TPCOOKIE:
988 		l2tp_clear_cookie(sc);
989 		break;
990 	case SIOCSL2TPSTATE:
991 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
992 			break;
993 
994 		l2tp_set_state(sc, l2tpr.state);
995 		break;
996 	case SIOCGL2TP:
997 		/* get L2TPV3 session info */
998 		memset(&l2tpr, 0, sizeof(l2tpr));
999 
1000 		bound = curlwp_bind();
1001 		var = l2tp_getref_variant(sc, &psref);
1002 		if (var == NULL) {
1003 			curlwp_bindx(bound);
1004 			error = EADDRNOTAVAIL;
1005 			goto bad;
1006 		}
1007 
1008 		l2tpr.state = var->lv_state;
1009 		l2tpr.my_sess_id = var->lv_my_sess_id;
1010 		l2tpr.peer_sess_id = var->lv_peer_sess_id;
1011 		l2tpr.my_cookie = var->lv_my_cookie;
1012 		l2tpr.my_cookie_len = var->lv_my_cookie_len;
1013 		l2tpr.peer_cookie = var->lv_peer_cookie;
1014 		l2tpr.peer_cookie_len = var->lv_peer_cookie_len;
1015 		l2tp_putref_variant(var, &psref);
1016 		curlwp_bindx(bound);
1017 
1018 		error = copyout(&l2tpr, ifr->ifr_data, sizeof(l2tpr));
1019 		break;
1020 
1021 	default:
1022 		error =	ifioctl_common(ifp, cmd, data);
1023 		break;
1024 	}
1025  bad:
1026 	return error;
1027 }
1028 
1029 static int
1030 l2tp_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
1031 {
1032 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
1033 	    l2tp_ec.ec_if);
1034 	struct sockaddr *osrc, *odst;
1035 	struct sockaddr *nsrc, *ndst;
1036 	struct l2tp_variant *ovar, *nvar;
1037 	int error;
1038 
1039 	nsrc = sockaddr_dup(src, M_WAITOK);
1040 	ndst = sockaddr_dup(dst, M_WAITOK);
1041 
1042 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1043 
1044 	error = encap_lock_enter();
1045 	if (error)
1046 		goto error;
1047 
1048 	mutex_enter(&sc->l2tp_lock);
1049 
1050 	ovar = sc->l2tp_var;
1051 	osrc = ovar->lv_psrc;
1052 	odst = ovar->lv_pdst;
1053 	*nvar = *ovar;
1054 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1055 	nvar->lv_psrc = nsrc;
1056 	nvar->lv_pdst = ndst;
1057 	error = l2tp_encap_attach(nvar);
1058 	if (error) {
1059 		mutex_exit(&sc->l2tp_lock);
1060 		encap_lock_exit();
1061 		goto error;
1062 	}
1063 	l2tp_variant_update(sc, nvar);
1064 
1065 	mutex_exit(&sc->l2tp_lock);
1066 
1067 	(void)l2tp_encap_detach(ovar);
1068 	encap_lock_exit();
1069 
1070 	if (osrc)
1071 		sockaddr_free(osrc);
1072 	if (odst)
1073 		sockaddr_free(odst);
1074 	kmem_free(ovar, sizeof(*ovar));
1075 
1076 	return 0;
1077 
1078 error:
1079 	sockaddr_free(nsrc);
1080 	sockaddr_free(ndst);
1081 	kmem_free(nvar, sizeof(*nvar));
1082 
1083 	return error;
1084 }
1085 
1086 static void
1087 l2tp_delete_tunnel(struct ifnet *ifp)
1088 {
1089 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
1090 	    l2tp_ec.ec_if);
1091 	struct sockaddr *osrc, *odst;
1092 	struct l2tp_variant *ovar, *nvar;
1093 	int error;
1094 
1095 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1096 
1097 	error = encap_lock_enter();
1098 	if (error) {
1099 		kmem_free(nvar, sizeof(*nvar));
1100 		return;
1101 	}
1102 	mutex_enter(&sc->l2tp_lock);
1103 
1104 	ovar = sc->l2tp_var;
1105 	osrc = ovar->lv_psrc;
1106 	odst = ovar->lv_pdst;
1107 	*nvar = *ovar;
1108 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1109 	nvar->lv_psrc = NULL;
1110 	nvar->lv_pdst = NULL;
1111 	l2tp_variant_update(sc, nvar);
1112 
1113 	mutex_exit(&sc->l2tp_lock);
1114 
1115 	(void)l2tp_encap_detach(ovar);
1116 	encap_lock_exit();
1117 
1118 	if (osrc)
1119 		sockaddr_free(osrc);
1120 	if (odst)
1121 		sockaddr_free(odst);
1122 	kmem_free(ovar, sizeof(*ovar));
1123 }
1124 
1125 static int
1126 id_hash_func(uint32_t id, u_long mask)
1127 {
1128 	uint32_t hash;
1129 
1130 	hash = (id >> 16) ^ id;
1131 	hash = (hash >> 4) ^ hash;
1132 
1133 	return hash & mask;
1134 }
1135 
1136 static void
1137 l2tp_hash_init(void)
1138 {
1139 
1140 	l2tp_hash.lists = hashinit(L2TP_ID_HASH_SIZE, HASH_PSLIST, true,
1141 	    &l2tp_hash.mask);
1142 }
1143 
1144 static int
1145 l2tp_hash_fini(void)
1146 {
1147 	int i;
1148 
1149 	mutex_enter(&l2tp_hash.lock);
1150 
1151 	for (i = 0; i < l2tp_hash.mask + 1; i++) {
1152 		if (PSLIST_WRITER_FIRST(&l2tp_hash.lists[i], struct l2tp_softc,
1153 			l2tp_hash) != NULL) {
1154 			mutex_exit(&l2tp_hash.lock);
1155 			return EBUSY;
1156 		}
1157 	}
1158 	for (i = 0; i < l2tp_hash.mask + 1; i++)
1159 		PSLIST_DESTROY(&l2tp_hash.lists[i]);
1160 
1161 	mutex_exit(&l2tp_hash.lock);
1162 
1163 	hashdone(l2tp_hash.lists, HASH_PSLIST, l2tp_hash.mask);
1164 
1165 	return 0;
1166 }
1167 
1168 static int
1169 l2tp_set_session(struct l2tp_softc *sc, uint32_t my_sess_id,
1170     uint32_t peer_sess_id)
1171 {
1172 	uint32_t idx;
1173 	struct l2tp_variant *nvar;
1174 	struct l2tp_variant *ovar;
1175 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1176 
1177 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1178 
1179 	mutex_enter(&sc->l2tp_lock);
1180 	ovar = sc->l2tp_var;
1181 	*nvar = *ovar;
1182 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1183 	nvar->lv_my_sess_id = my_sess_id;
1184 	nvar->lv_peer_sess_id = peer_sess_id;
1185 
1186 	mutex_enter(&l2tp_hash.lock);
1187 	if (ovar->lv_my_sess_id > 0 && ovar->lv_peer_sess_id > 0) {
1188 		PSLIST_WRITER_REMOVE(sc, l2tp_hash);
1189 		pserialize_perform(l2tp_psz);
1190 	}
1191 	mutex_exit(&l2tp_hash.lock);
1192 	PSLIST_ENTRY_DESTROY(sc, l2tp_hash);
1193 
1194 	l2tp_variant_update(sc, nvar);
1195 	mutex_exit(&sc->l2tp_lock);
1196 
1197 	idx = id_hash_func(nvar->lv_my_sess_id, l2tp_hash.mask);
1198 	if ((ifp->if_flags & IFF_DEBUG) != 0)
1199 		log(LOG_DEBUG, "%s: add hash entry: sess_id=%" PRIu32 ", idx=%" PRIu32 "\n",
1200 		    sc->l2tp_ec.ec_if.if_xname, nvar->lv_my_sess_id, idx);
1201 
1202 	PSLIST_ENTRY_INIT(sc, l2tp_hash);
1203 	mutex_enter(&l2tp_hash.lock);
1204 	PSLIST_WRITER_INSERT_HEAD(&l2tp_hash.lists[idx], sc, l2tp_hash);
1205 	mutex_exit(&l2tp_hash.lock);
1206 
1207 	kmem_free(ovar, sizeof(*ovar));
1208 	return 0;
1209 }
1210 
1211 static int
1212 l2tp_clear_session(struct l2tp_softc *sc)
1213 {
1214 	struct l2tp_variant *nvar;
1215 	struct l2tp_variant *ovar;
1216 
1217 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1218 
1219 	mutex_enter(&sc->l2tp_lock);
1220 	ovar = sc->l2tp_var;
1221 	*nvar = *ovar;
1222 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1223 	nvar->lv_my_sess_id = 0;
1224 	nvar->lv_peer_sess_id = 0;
1225 
1226 	mutex_enter(&l2tp_hash.lock);
1227 	if (ovar->lv_my_sess_id > 0 && ovar->lv_peer_sess_id > 0) {
1228 		PSLIST_WRITER_REMOVE(sc, l2tp_hash);
1229 		pserialize_perform(l2tp_psz);
1230 	}
1231 	mutex_exit(&l2tp_hash.lock);
1232 
1233 	l2tp_variant_update(sc, nvar);
1234 	mutex_exit(&sc->l2tp_lock);
1235 	kmem_free(ovar, sizeof(*ovar));
1236 	return 0;
1237 }
1238 
1239 struct l2tp_variant *
1240 l2tp_lookup_session_ref(uint32_t id, struct psref *psref)
1241 {
1242 	int idx;
1243 	int s;
1244 	struct l2tp_softc *sc;
1245 
1246 	idx = id_hash_func(id, l2tp_hash.mask);
1247 
1248 	s = pserialize_read_enter();
1249 	PSLIST_READER_FOREACH(sc, &l2tp_hash.lists[idx], struct l2tp_softc,
1250 	    l2tp_hash) {
1251 		struct l2tp_variant *var = atomic_load_consume(&sc->l2tp_var);
1252 		if (var == NULL)
1253 			continue;
1254 		if (var->lv_my_sess_id != id)
1255 			continue;
1256 		psref_acquire(psref, &var->lv_psref, lv_psref_class);
1257 		pserialize_read_exit(s);
1258 		return var;
1259 	}
1260 	pserialize_read_exit(s);
1261 	return NULL;
1262 }
1263 
1264 /*
1265  * l2tp_variant update API.
1266  *
1267  * Assumption:
1268  * reader side dereferences sc->l2tp_var in reader critical section only,
1269  * that is, all of reader sides do not reader the sc->l2tp_var after
1270  * pserialize_perform().
1271  */
1272 static void
1273 l2tp_variant_update(struct l2tp_softc *sc, struct l2tp_variant *nvar)
1274 {
1275 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1276 	struct l2tp_variant *ovar = sc->l2tp_var;
1277 
1278 	KASSERT(mutex_owned(&sc->l2tp_lock));
1279 
1280 	atomic_store_release(&sc->l2tp_var, nvar);
1281 	pserialize_perform(sc->l2tp_psz);
1282 	psref_target_destroy(&ovar->lv_psref, lv_psref_class);
1283 
1284 	if (nvar != NULL) {
1285 		if (nvar->lv_psrc != NULL && nvar->lv_pdst != NULL)
1286 			ifp->if_flags |= IFF_RUNNING;
1287 		else
1288 			ifp->if_flags &= ~IFF_RUNNING;
1289 	}
1290 }
1291 
1292 static int
1293 l2tp_set_cookie(struct l2tp_softc *sc, uint64_t my_cookie, u_int my_cookie_len,
1294     uint64_t peer_cookie, u_int peer_cookie_len)
1295 {
1296 	struct l2tp_variant *nvar;
1297 
1298 	if (my_cookie == 0 || peer_cookie == 0)
1299 		return EINVAL;
1300 
1301 	if (my_cookie_len != 4 && my_cookie_len != 8
1302 	    && peer_cookie_len != 4 && peer_cookie_len != 8)
1303 		return EINVAL;
1304 
1305 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1306 
1307 	mutex_enter(&sc->l2tp_lock);
1308 
1309 	*nvar = *sc->l2tp_var;
1310 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1311 	nvar->lv_my_cookie = my_cookie;
1312 	nvar->lv_my_cookie_len = my_cookie_len;
1313 	nvar->lv_peer_cookie = peer_cookie;
1314 	nvar->lv_peer_cookie_len = peer_cookie_len;
1315 	nvar->lv_use_cookie = L2TP_COOKIE_ON;
1316 	l2tp_variant_update(sc, nvar);
1317 
1318 	mutex_exit(&sc->l2tp_lock);
1319 
1320 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1321 	if ((ifp->if_flags & IFF_DEBUG) != 0) {
1322 		log(LOG_DEBUG,
1323 		    "%s: set cookie: "
1324 		    "local cookie_len=%u local cookie=%" PRIu64 ", "
1325 		    "remote cookie_len=%u remote cookie=%" PRIu64 "\n",
1326 		    ifp->if_xname, my_cookie_len, my_cookie,
1327 		    peer_cookie_len, peer_cookie);
1328 	}
1329 
1330 	return 0;
1331 }
1332 
1333 static void
1334 l2tp_clear_cookie(struct l2tp_softc *sc)
1335 {
1336 	struct l2tp_variant *nvar;
1337 
1338 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1339 
1340 	mutex_enter(&sc->l2tp_lock);
1341 
1342 	*nvar = *sc->l2tp_var;
1343 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1344 	nvar->lv_my_cookie = 0;
1345 	nvar->lv_my_cookie_len = 0;
1346 	nvar->lv_peer_cookie = 0;
1347 	nvar->lv_peer_cookie_len = 0;
1348 	nvar->lv_use_cookie = L2TP_COOKIE_OFF;
1349 	l2tp_variant_update(sc, nvar);
1350 
1351 	mutex_exit(&sc->l2tp_lock);
1352 }
1353 
1354 static void
1355 l2tp_set_state(struct l2tp_softc *sc, int state)
1356 {
1357 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1358 	struct l2tp_variant *nvar;
1359 
1360 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1361 
1362 	mutex_enter(&sc->l2tp_lock);
1363 
1364 	*nvar = *sc->l2tp_var;
1365 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1366 	nvar->lv_state = state;
1367 	l2tp_variant_update(sc, nvar);
1368 
1369 	if (nvar->lv_state == L2TP_STATE_UP) {
1370 		ifp->if_link_state = LINK_STATE_UP;
1371 	} else {
1372 		ifp->if_link_state = LINK_STATE_DOWN;
1373 	}
1374 
1375 	mutex_exit(&sc->l2tp_lock);
1376 
1377 #ifdef NOTYET
1378 	vlan_linkstate_notify(ifp, ifp->if_link_state);
1379 #endif
1380 }
1381 
1382 static int
1383 l2tp_encap_attach(struct l2tp_variant *var)
1384 {
1385 	int error;
1386 
1387 	if (var == NULL || var->lv_psrc == NULL)
1388 		return EINVAL;
1389 
1390 	switch (var->lv_psrc->sa_family) {
1391 #ifdef INET
1392 	case AF_INET:
1393 		error = in_l2tp_attach(var);
1394 		break;
1395 #endif
1396 #ifdef INET6
1397 	case AF_INET6:
1398 		error = in6_l2tp_attach(var);
1399 		break;
1400 #endif
1401 	default:
1402 		error = EINVAL;
1403 		break;
1404 	}
1405 
1406 	return error;
1407 }
1408 
1409 static int
1410 l2tp_encap_detach(struct l2tp_variant *var)
1411 {
1412 	int error;
1413 
1414 	if (var == NULL || var->lv_psrc == NULL)
1415 		return EINVAL;
1416 
1417 	switch (var->lv_psrc->sa_family) {
1418 #ifdef INET
1419 	case AF_INET:
1420 		error = in_l2tp_detach(var);
1421 		break;
1422 #endif
1423 #ifdef INET6
1424 	case AF_INET6:
1425 		error = in6_l2tp_detach(var);
1426 		break;
1427 #endif
1428 	default:
1429 		error = EINVAL;
1430 		break;
1431 	}
1432 
1433 	return error;
1434 }
1435 
1436 int
1437 l2tp_check_nesting(struct ifnet *ifp, struct mbuf *m)
1438 {
1439 
1440 	return if_tunnel_check_nesting(ifp, m, max_l2tp_nesting);
1441 }
1442 
1443 /*
1444  * Module infrastructure
1445  */
1446 #include "if_module.h"
1447 
1448 IF_MODULE(MODULE_CLASS_DRIVER, l2tp, NULL)
1449 
1450 
1451 /* TODO: IP_TCPMSS support */
1452 #ifdef IP_TCPMSS
1453 static int l2tp_need_tcpmss_clamp(struct ifnet *);
1454 #ifdef INET
1455 static struct mbuf *l2tp_tcpmss4_clamp(struct ifnet *, struct mbuf *);
1456 #endif
1457 #ifdef INET6
1458 static struct mbuf *l2tp_tcpmss6_clamp(struct ifnet *, struct mbuf *);
1459 #endif
1460 
1461 struct mbuf *
1462 l2tp_tcpmss_clamp(struct ifnet *ifp, struct mbuf *m)
1463 {
1464 	struct ether_header *eh;
1465 	struct ether_vlan_header evh;
1466 
1467 	if (!l2tp_need_tcpmss_clamp(ifp)) {
1468 		return m;
1469 	}
1470 
1471 	if (m->m_pkthdr.len < sizeof(evh)) {
1472 		m_freem(m);
1473 		return NULL;
1474 	}
1475 
1476 	/* save ether header */
1477 	m_copydata(m, 0, sizeof(evh), (void *)&evh);
1478 	eh = (struct ether_header *)&evh;
1479 
1480 	switch (ntohs(eh->ether_type)) {
1481 	case ETHERTYPE_VLAN: /* Ether + VLAN */
1482 		if (m->m_pkthdr.len <= sizeof(struct ether_vlan_header))
1483 			break;
1484 		m_adj(m, sizeof(struct ether_vlan_header));
1485 		switch (ntohs(evh.evl_proto)) {
1486 #ifdef INET
1487 		case ETHERTYPE_IP: /* Ether + VLAN + IPv4 */
1488 			m = l2tp_tcpmss4_clamp(ifp, m);
1489 			if (m == NULL)
1490 				return NULL;
1491 			break;
1492 #endif /* INET */
1493 #ifdef INET6
1494 		case ETHERTYPE_IPV6: /* Ether + VLAN + IPv6 */
1495 			m = l2tp_tcpmss6_clamp(ifp, m);
1496 			if (m == NULL)
1497 				return NULL;
1498 			break;
1499 #endif /* INET6 */
1500 		default:
1501 			break;
1502 		}
1503 
1504 		/* restore ether header */
1505 		M_PREPEND(m, sizeof(struct ether_vlan_header),
1506 		    M_DONTWAIT);
1507 		if (m == NULL)
1508 			return NULL;
1509 		*mtod(m, struct ether_vlan_header *) = evh;
1510 		break;
1511 
1512 #ifdef INET
1513 	case ETHERTYPE_IP: /* Ether + IPv4 */
1514 		if (m->m_pkthdr.len <= sizeof(struct ether_header))
1515 			break;
1516 		m_adj(m, sizeof(struct ether_header));
1517 		m = l2tp_tcpmss4_clamp(ifp, m);
1518 		if (m == NULL)
1519 			return NULL;
1520 		/* restore ether header */
1521 		M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
1522 		if (m == NULL)
1523 			return NULL;
1524 		*mtod(m, struct ether_header *) = *eh;
1525 		break;
1526 #endif /* INET */
1527 
1528 #ifdef INET6
1529 	case ETHERTYPE_IPV6: /* Ether + IPv6 */
1530 		if (m->m_pkthdr.len <= sizeof(struct ether_header))
1531 			break;
1532 		m_adj(m, sizeof(struct ether_header));
1533 		m = l2tp_tcpmss6_clamp(ifp, m);
1534 		if (m == NULL)
1535 			return NULL;
1536 		/* restore ether header */
1537 		M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
1538 		if (m == NULL)
1539 			return NULL;
1540 		*mtod(m, struct ether_header *) = *eh;
1541 		break;
1542 #endif /* INET6 */
1543 
1544 	default:
1545 		break;
1546 	}
1547 
1548 	return m;
1549 }
1550 
1551 static int
1552 l2tp_need_tcpmss_clamp(struct ifnet *ifp)
1553 {
1554 	int ret = 0;
1555 
1556 #ifdef INET
1557 	if (ifp->if_tcpmss != 0)
1558 		ret = 1;
1559 #endif
1560 
1561 #ifdef INET6
1562 	if (ifp->if_tcpmss6 != 0)
1563 		ret = 1;
1564 #endif
1565 
1566 	return ret;
1567 }
1568 
1569 #ifdef INET
1570 static struct mbuf *
1571 l2tp_tcpmss4_clamp(struct ifnet *ifp, struct mbuf *m)
1572 {
1573 
1574 	if (ifp->if_tcpmss != 0) {
1575 		return ip_tcpmss(m, (ifp->if_tcpmss < 0) ?
1576 			ifp->if_mtu - IP_TCPMSS_EXTLEN :
1577 			ifp->if_tcpmss);
1578 	}
1579 	return m;
1580 }
1581 #endif /* INET */
1582 
1583 #ifdef INET6
1584 static struct mbuf *
1585 l2tp_tcpmss6_clamp(struct ifnet *ifp, struct mbuf *m)
1586 {
1587 	int ip6hdrlen;
1588 
1589 	if (ifp->if_tcpmss6 != 0 &&
1590 	    ip6_tcpmss_applicable(m, &ip6hdrlen)) {
1591 		return ip6_tcpmss(m, ip6hdrlen,
1592 			(ifp->if_tcpmss6 < 0) ?
1593 			ifp->if_mtu - IP6_TCPMSS_EXTLEN :
1594 			ifp->if_tcpmss6);
1595 	}
1596 	return m;
1597 }
1598 #endif /* INET6 */
1599 
1600 #endif /* IP_TCPMSS */
1601