xref: /netbsd-src/sys/net/if_l2tp.c (revision 2dd295436a0082eb4f8d294f4aa73c223413d0f2)
1 /*	$NetBSD: if_l2tp.c,v 1.48 2022/09/03 02:47:59 thorpej Exp $	*/
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * L2TPv3 kernel interface
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: if_l2tp.c,v 1.48 2022/09/03 02:47:59 thorpej Exp $");
35 
36 #ifdef _KERNEL_OPT
37 #include "opt_inet.h"
38 #include "opt_net_mpsafe.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/mbuf.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/errno.h>
48 #include <sys/ioctl.h>
49 #include <sys/time.h>
50 #include <sys/syslog.h>
51 #include <sys/proc.h>
52 #include <sys/conf.h>
53 #include <sys/kauth.h>
54 #include <sys/cpu.h>
55 #include <sys/cprng.h>
56 #include <sys/intr.h>
57 #include <sys/kmem.h>
58 #include <sys/mutex.h>
59 #include <sys/atomic.h>
60 #include <sys/pserialize.h>
61 #include <sys/device.h>
62 #include <sys/module.h>
63 
64 #include <net/if.h>
65 #include <net/if_dl.h>
66 #include <net/if_ether.h>
67 #include <net/if_types.h>
68 #include <net/route.h>
69 #include <net/bpf.h>
70 #include <net/if_vlanvar.h>
71 
72 #include <netinet/in.h>
73 #include <netinet/in_systm.h>
74 #include <netinet/ip.h>
75 #include <netinet/ip_encap.h>
76 #ifdef	INET
77 #include <netinet/in_var.h>
78 #include <netinet/in_l2tp.h>
79 #endif	/* INET */
80 #ifdef INET6
81 #include <netinet6/in6_l2tp.h>
82 #endif
83 
84 #include <net/if_l2tp.h>
85 
86 #include <net/if_vlanvar.h>
87 
88 /* TODO: IP_TCPMSS support */
89 #undef IP_TCPMSS
90 #ifdef IP_TCPMSS
91 #include <netinet/ip_tcpmss.h>
92 #endif
93 
94 /*
95  * l2tp global variable definitions
96  */
97 static struct {
98 	LIST_HEAD(l2tp_sclist, l2tp_softc) list;
99 	kmutex_t lock;
100 } l2tp_softcs __cacheline_aligned;
101 
102 
103 #if !defined(L2TP_ID_HASH_SIZE)
104 #define L2TP_ID_HASH_SIZE 64
105 #endif
106 static struct {
107 	kmutex_t lock;
108 	struct pslist_head *lists;
109 	u_long mask;
110 } l2tp_hash __cacheline_aligned = {
111 	.lists = NULL,
112 };
113 
114 pserialize_t l2tp_psz __read_mostly;
115 struct psref_class *lv_psref_class __read_mostly;
116 
117 static void	l2tp_ifq_init_pc(void *, void *, struct cpu_info *);
118 static void	l2tp_ifq_fini_pc(void *, void *, struct cpu_info *);
119 
120 static int	l2tp_clone_create(struct if_clone *, int);
121 static int	l2tp_clone_destroy(struct ifnet *);
122 
123 struct if_clone l2tp_cloner =
124     IF_CLONE_INITIALIZER("l2tp", l2tp_clone_create, l2tp_clone_destroy);
125 
126 static int	l2tp_tx_enqueue(struct l2tp_variant *, struct mbuf *);
127 static int	l2tp_output(struct ifnet *, struct mbuf *,
128 		    const struct sockaddr *, const struct rtentry *);
129 static void	l2tp_sendit(struct l2tp_variant *, struct mbuf *);
130 static void	l2tpintr(struct l2tp_variant *);
131 static void	l2tpintr_softint(void *);
132 
133 static void	l2tp_hash_init(void);
134 static int	l2tp_hash_fini(void);
135 
136 static void	l2tp_start(struct ifnet *);
137 static int	l2tp_transmit(struct ifnet *, struct mbuf *);
138 
139 static int	l2tp_set_tunnel(struct ifnet *, struct sockaddr *,
140 		    struct sockaddr *);
141 static void	l2tp_delete_tunnel(struct ifnet *);
142 
143 static int	id_hash_func(uint32_t, u_long);
144 
145 static void	l2tp_variant_update(struct l2tp_softc *, struct l2tp_variant *);
146 static int	l2tp_set_session(struct l2tp_softc *, uint32_t, uint32_t);
147 static int	l2tp_clear_session(struct l2tp_softc *);
148 static int	l2tp_set_cookie(struct l2tp_softc *, uint64_t, u_int, uint64_t, u_int);
149 static void	l2tp_clear_cookie(struct l2tp_softc *);
150 static void	l2tp_set_state(struct l2tp_softc *, int);
151 static int	l2tp_encap_attach(struct l2tp_variant *);
152 static int	l2tp_encap_detach(struct l2tp_variant *);
153 
154 static inline struct ifqueue *
155 l2tp_ifq_percpu_getref(percpu_t *pc)
156 {
157 
158 	return *(struct ifqueue **)percpu_getref(pc);
159 }
160 
161 static inline void
162 l2tp_ifq_percpu_putref(percpu_t *pc)
163 {
164 
165 	percpu_putref(pc);
166 }
167 
168 #ifndef MAX_L2TP_NEST
169 /*
170  * This macro controls the upper limitation on nesting of l2tp tunnels.
171  * Since, setting a large value to this macro with a careless configuration
172  * may introduce system crash, we don't allow any nestings by default.
173  * If you need to configure nested l2tp tunnels, you can define this macro
174  * in your kernel configuration file.  However, if you do so, please be
175  * careful to configure the tunnels so that it won't make a loop.
176  */
177 /*
178  * XXX
179  * Currently, if in_l2tp_output recursively calls, it causes locking against
180  * myself of struct l2tp_ro->lr_lock. So, nested l2tp tunnels is prohibited.
181  */
182 #define MAX_L2TP_NEST 0
183 #endif
184 
185 static int max_l2tp_nesting = MAX_L2TP_NEST;
186 
187 /* ARGSUSED */
188 void
189 l2tpattach(int count)
190 {
191 	/*
192 	 * Nothing to do here, initialization is handled by the
193 	 * module initialization code in l2tpinit() below).
194 	 */
195 }
196 
197 static void
198 l2tpinit(void)
199 {
200 
201 	mutex_init(&l2tp_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
202 	LIST_INIT(&l2tp_softcs.list);
203 
204 	mutex_init(&l2tp_hash.lock, MUTEX_DEFAULT, IPL_NONE);
205 	l2tp_psz = pserialize_create();
206 	lv_psref_class = psref_class_create("l2tpvar", IPL_SOFTNET);
207 	if_clone_attach(&l2tp_cloner);
208 
209 	l2tp_hash_init();
210 }
211 
212 static int
213 l2tpdetach(void)
214 {
215 	int error;
216 
217 	mutex_enter(&l2tp_softcs.lock);
218 	if (!LIST_EMPTY(&l2tp_softcs.list)) {
219 		mutex_exit(&l2tp_softcs.lock);
220 		return EBUSY;
221 	}
222 	mutex_exit(&l2tp_softcs.lock);
223 
224 	error = l2tp_hash_fini();
225 	if (error)
226 		return error;
227 
228 	if_clone_detach(&l2tp_cloner);
229 	psref_class_destroy(lv_psref_class);
230 	pserialize_destroy(l2tp_psz);
231 	mutex_destroy(&l2tp_hash.lock);
232 
233 	mutex_destroy(&l2tp_softcs.lock);
234 
235 	return error;
236 }
237 
238 static int
239 l2tp_clone_create(struct if_clone *ifc, int unit)
240 {
241 	struct l2tp_softc *sc;
242 	struct l2tp_variant *var;
243 	int rv;
244 	u_int si_flags = SOFTINT_NET;
245 #ifdef NET_MPSAFE
246 	si_flags |= SOFTINT_MPSAFE;
247 #endif
248 	sc = kmem_zalloc(sizeof(struct l2tp_softc), KM_SLEEP);
249 	if_initname(&sc->l2tp_ec.ec_if, ifc->ifc_name, unit);
250 	rv = l2tpattach0(sc);
251 	if (rv != 0) {
252 		kmem_free(sc, sizeof(struct l2tp_softc));
253 		return rv;
254 	}
255 
256 	var = kmem_zalloc(sizeof(struct l2tp_variant), KM_SLEEP);
257 	var->lv_softc = sc;
258 	var->lv_state = L2TP_STATE_DOWN;
259 	var->lv_use_cookie = L2TP_COOKIE_OFF;
260 	psref_target_init(&var->lv_psref, lv_psref_class);
261 
262 	sc->l2tp_var = var;
263 	mutex_init(&sc->l2tp_lock, MUTEX_DEFAULT, IPL_NONE);
264 	sc->l2tp_psz = pserialize_create();
265 	PSLIST_ENTRY_INIT(sc, l2tp_hash);
266 
267 	sc->l2tp_ro_percpu = if_tunnel_alloc_ro_percpu();
268 
269 	sc->l2tp_ifq_percpu = percpu_create(sizeof(struct ifqueue *),
270 	    l2tp_ifq_init_pc, l2tp_ifq_fini_pc, NULL);
271 	sc->l2tp_si = softint_establish(si_flags, l2tpintr_softint, sc);
272 
273 	mutex_enter(&l2tp_softcs.lock);
274 	LIST_INSERT_HEAD(&l2tp_softcs.list, sc, l2tp_list);
275 	mutex_exit(&l2tp_softcs.lock);
276 
277 	return (0);
278 }
279 
280 int
281 l2tpattach0(struct l2tp_softc *sc)
282 {
283 
284 	sc->l2tp_ec.ec_if.if_addrlen = 0;
285 	sc->l2tp_ec.ec_if.if_mtu    = L2TP_MTU;
286 	sc->l2tp_ec.ec_if.if_flags  = IFF_POINTOPOINT|IFF_MULTICAST|IFF_SIMPLEX;
287 #ifdef NET_MPSAFE
288 	sc->l2tp_ec.ec_if.if_extflags = IFEF_MPSAFE;
289 #endif
290 	sc->l2tp_ec.ec_if.if_ioctl  = l2tp_ioctl;
291 	sc->l2tp_ec.ec_if.if_output = l2tp_output;
292 	sc->l2tp_ec.ec_if.if_type   = IFT_L2TP;
293 	sc->l2tp_ec.ec_if.if_dlt    = DLT_NULL;
294 	sc->l2tp_ec.ec_if.if_start  = l2tp_start;
295 	sc->l2tp_ec.ec_if.if_transmit = l2tp_transmit;
296 	sc->l2tp_ec.ec_if._if_input = ether_input;
297 	IFQ_SET_READY(&sc->l2tp_ec.ec_if.if_snd);
298 
299 #ifdef MBUFTRACE
300 	struct ethercom *ec = &sc->l2tp_ec;
301 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
302 
303 	strlcpy(ec->ec_tx_mowner.mo_name, ifp->if_xname,
304 	    sizeof(ec->ec_tx_mowner.mo_name));
305 	strlcpy(ec->ec_tx_mowner.mo_descr, "tx",
306 	    sizeof(ec->ec_tx_mowner.mo_descr));
307 	strlcpy(ec->ec_rx_mowner.mo_name, ifp->if_xname,
308 	    sizeof(ec->ec_rx_mowner.mo_name));
309 	strlcpy(ec->ec_rx_mowner.mo_descr, "rx",
310 	    sizeof(ec->ec_rx_mowner.mo_descr));
311 	MOWNER_ATTACH(&ec->ec_tx_mowner);
312 	MOWNER_ATTACH(&ec->ec_rx_mowner);
313 	ifp->if_mowner = &ec->ec_tx_mowner;
314 #endif
315 
316 	/* XXX
317 	 * It may improve performance to use if_initialize()/if_register()
318 	 * so that l2tp_input() calls if_input() instead of
319 	 * if_percpuq_enqueue(). However, that causes recursive softnet_lock
320 	 * when NET_MPSAFE is not set.
321 	 */
322 	if_attach(&sc->l2tp_ec.ec_if);
323 	if_link_state_change(&sc->l2tp_ec.ec_if, LINK_STATE_DOWN);
324 	if_alloc_sadl(&sc->l2tp_ec.ec_if);
325 	bpf_attach(&sc->l2tp_ec.ec_if, DLT_EN10MB, sizeof(struct ether_header));
326 
327 	return 0;
328 }
329 
330 void
331 l2tp_ifq_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
332 {
333 	struct ifqueue **ifqp = p;
334 
335 	*ifqp = kmem_zalloc(sizeof(**ifqp), KM_SLEEP);
336 	(*ifqp)->ifq_maxlen = IFQ_MAXLEN;
337 }
338 
339 void
340 l2tp_ifq_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
341 {
342 	struct ifqueue **ifqp = p;
343 
344 	kmem_free(*ifqp, sizeof(**ifqp));
345 }
346 
347 static int
348 l2tp_clone_destroy(struct ifnet *ifp)
349 {
350 	struct l2tp_variant *var;
351 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
352 	    l2tp_ec.ec_if);
353 
354 	l2tp_clear_session(sc);
355 	l2tp_delete_tunnel(&sc->l2tp_ec.ec_if);
356 	/*
357 	 * To avoid for l2tp_transmit() and l2tpintr_softint() to access
358 	 * sc->l2tp_var after free it.
359 	 */
360 	mutex_enter(&sc->l2tp_lock);
361 	var = sc->l2tp_var;
362 	l2tp_variant_update(sc, NULL);
363 	mutex_exit(&sc->l2tp_lock);
364 
365 	softint_disestablish(sc->l2tp_si);
366 	percpu_free(sc->l2tp_ifq_percpu, sizeof(struct ifqueue *));
367 
368 	mutex_enter(&l2tp_softcs.lock);
369 	LIST_REMOVE(sc, l2tp_list);
370 	mutex_exit(&l2tp_softcs.lock);
371 
372 	bpf_detach(ifp);
373 
374 	if_detach(ifp);
375 
376 	if_tunnel_free_ro_percpu(sc->l2tp_ro_percpu);
377 
378 	kmem_free(var, sizeof(struct l2tp_variant));
379 	pserialize_destroy(sc->l2tp_psz);
380 	mutex_destroy(&sc->l2tp_lock);
381 	kmem_free(sc, sizeof(struct l2tp_softc));
382 
383 	return 0;
384 }
385 
386 static int
387 l2tp_tx_enqueue(struct l2tp_variant *var, struct mbuf *m)
388 {
389 	struct l2tp_softc *sc;
390 	struct ifnet *ifp;
391 	struct ifqueue *ifq;
392 	int s;
393 
394 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
395 
396 	sc = var->lv_softc;
397 	ifp = &sc->l2tp_ec.ec_if;
398 
399 	s = splsoftnet();
400 	ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
401 	if (IF_QFULL(ifq)) {
402 		if_statinc(ifp, if_oerrors);
403 		l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
404 		splx(s);
405 		m_freem(m);
406 		return ENOBUFS;
407 	}
408 
409 	IF_ENQUEUE(ifq, m);
410 	percpu_putref(sc->l2tp_ifq_percpu);
411 	softint_schedule(sc->l2tp_si);
412 	/* counter is incremented in l2tpintr() */
413 	splx(s);
414 	return 0;
415 }
416 
417 static int
418 l2tp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
419     const struct rtentry *rt)
420 {
421 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
422 	    l2tp_ec.ec_if);
423 	struct l2tp_variant *var;
424 	struct psref psref;
425 	int error = 0;
426 
427 	var = l2tp_getref_variant(sc, &psref);
428 	if (var == NULL) {
429 		m_freem(m);
430 		return ENETDOWN;
431 	}
432 
433 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
434 
435 	m->m_flags &= ~(M_BCAST|M_MCAST);
436 
437 	if ((ifp->if_flags & IFF_UP) == 0) {
438 		m_freem(m);
439 		error = ENETDOWN;
440 		goto end;
441 	}
442 
443 	if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
444 		m_freem(m);
445 		error = ENETDOWN;
446 		goto end;
447 	}
448 
449 	/* XXX should we check if our outer source is legal? */
450 
451 	/* use DLT_NULL encapsulation here to pass inner af type */
452 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
453 	if (!m) {
454 		error = ENOBUFS;
455 		goto end;
456 	}
457 	*mtod(m, int *) = dst->sa_family;
458 
459 	error = l2tp_tx_enqueue(var, m);
460 end:
461 	l2tp_putref_variant(var, &psref);
462 	if (error)
463 		if_statinc(ifp, if_oerrors);
464 
465 	return error;
466 }
467 
468 static void
469 l2tp_sendit(struct l2tp_variant *var, struct mbuf *m)
470 {
471 	int len;
472 	int error;
473 	struct l2tp_softc *sc;
474 	struct ifnet *ifp;
475 
476 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
477 
478 	sc = var->lv_softc;
479 	ifp = &sc->l2tp_ec.ec_if;
480 
481 	len = m->m_pkthdr.len;
482 	m->m_flags &= ~(M_BCAST|M_MCAST);
483 	bpf_mtap(ifp, m, BPF_D_OUT);
484 
485 	switch (var->lv_psrc->sa_family) {
486 #ifdef INET
487 	case AF_INET:
488 		error = in_l2tp_output(var, m);
489 		break;
490 #endif
491 #ifdef INET6
492 	case AF_INET6:
493 		error = in6_l2tp_output(var, m);
494 		break;
495 #endif
496 	default:
497 		m_freem(m);
498 		error = ENETDOWN;
499 		break;
500 	}
501 	if (error) {
502 		if_statinc(ifp, if_oerrors);
503 	} else {
504 		if_statadd2(ifp, if_opackets, 1, if_obytes, len);
505 	}
506 }
507 
508 static void
509 l2tpintr(struct l2tp_variant *var)
510 {
511 	struct l2tp_softc *sc;
512 	struct ifnet *ifp;
513 	struct mbuf *m;
514 	struct ifqueue *ifq;
515 	u_int cpuid = cpu_index(curcpu());
516 
517 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
518 
519 	sc = var->lv_softc;
520 	ifp = &sc->l2tp_ec.ec_if;
521 
522 	/* output processing */
523 	if (var->lv_my_sess_id == 0 || var->lv_peer_sess_id == 0) {
524 		ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
525 		IF_PURGE(ifq);
526 		l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
527 		if (cpuid == 0)
528 			IFQ_PURGE(&ifp->if_snd);
529 		return;
530 	}
531 
532 	/* Currently, l2tpintr() is always called in softint context. */
533 	ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
534 	for (;;) {
535 		IF_DEQUEUE(ifq, m);
536 		if (m != NULL)
537 			l2tp_sendit(var, m);
538 		else
539 			break;
540 	}
541 	l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
542 
543 	if (cpuid == 0) {
544 		for (;;) {
545 			IFQ_DEQUEUE(&ifp->if_snd, m);
546 			if (m != NULL)
547 				l2tp_sendit(var, m);
548 			else
549 				break;
550 		}
551 	}
552 }
553 
554 static void
555 l2tpintr_softint(void *arg)
556 {
557 	struct l2tp_variant *var;
558 	struct psref psref;
559 	struct l2tp_softc *sc = arg;
560 
561 	var = l2tp_getref_variant(sc, &psref);
562 	if (var == NULL)
563 		return;
564 
565 	l2tpintr(var);
566 	l2tp_putref_variant(var, &psref);
567 }
568 
569 void
570 l2tp_input(struct mbuf *m, struct ifnet *ifp)
571 {
572 	vaddr_t addr;
573 
574 	KASSERT(ifp != NULL);
575 
576 	/*
577 	 * Currently, l2tp(4) supports only ethernet as inner protocol.
578 	 */
579 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
580 		m_freem(m);
581 		return;
582 	}
583 
584 	/*
585 	 * If the head of the payload is not aligned, align it.
586 	 */
587 	addr = mtod(m, vaddr_t);
588 	if ((addr & 0x03) != 0x2) {
589 		/* copy and align head of payload */
590 		struct mbuf *m_head;
591 		int copy_length;
592 		u_int pad = roundup(sizeof(struct ether_header), 4)
593 			- sizeof(struct ether_header);
594 
595 #define L2TP_COPY_LENGTH		60
596 
597 		if (m->m_pkthdr.len < L2TP_COPY_LENGTH) {
598 			copy_length = m->m_pkthdr.len;
599 		} else {
600 			copy_length = L2TP_COPY_LENGTH;
601 		}
602 
603 		if (m->m_len < copy_length) {
604 			m = m_pullup(m, copy_length);
605 			if (m == NULL)
606 				return;
607 		}
608 
609 		MGETHDR(m_head, M_DONTWAIT, MT_HEADER);
610 		if (m_head == NULL) {
611 			m_freem(m);
612 			return;
613 		}
614 		m_move_pkthdr(m_head, m);
615 
616 		/*
617 		 * m_head should be:
618 		 *                             L2TP_COPY_LENGTH
619 		 *                          <-  + roundup(pad, 4) - pad ->
620 		 *   +-------+--------+-----+--------------+-------------+
621 		 *   | m_hdr | pkthdr | ... | ether header |   payload   |
622 		 *   +-------+--------+-----+--------------+-------------+
623 		 *                          ^              ^
624 		 *                          m_data         4 byte aligned
625 		 */
626 		m_align(m_head, L2TP_COPY_LENGTH + roundup(pad, 4));
627 		m_head->m_data += pad;
628 
629 		memcpy(mtod(m_head, void *), mtod(m, void *), copy_length);
630 		m_head->m_len = copy_length;
631 		m->m_data += copy_length;
632 		m->m_len -= copy_length;
633 
634 		/* construct chain */
635 		if (m->m_len == 0) {
636 			m_head->m_next = m_free(m);
637 		} else {
638 			m_head->m_next = m;
639 		}
640 
641 		/* override m */
642 		m = m_head;
643 	}
644 
645 	m_set_rcvif(m, ifp);
646 
647 	/*
648 	 * bpf_mtap() and ifp->if_ipackets++ is done in if_input()
649 	 *
650 	 * obytes is incremented at ether_output() or bridge_enqueue().
651 	 */
652 	if_percpuq_enqueue(ifp->if_percpuq, m);
653 }
654 
655 void
656 l2tp_start(struct ifnet *ifp)
657 {
658 	struct psref psref;
659 	struct l2tp_variant *var;
660 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
661 	    l2tp_ec.ec_if);
662 
663 	var = l2tp_getref_variant(sc, &psref);
664 	if (var == NULL)
665 		return;
666 
667 	if (var->lv_psrc == NULL || var->lv_pdst == NULL)
668 		return;
669 
670 	kpreempt_disable();
671 	softint_schedule(sc->l2tp_si);
672 	kpreempt_enable();
673 	l2tp_putref_variant(var, &psref);
674 }
675 
676 int
677 l2tp_transmit(struct ifnet *ifp, struct mbuf *m)
678 {
679 	int error;
680 	struct psref psref;
681 	struct l2tp_variant *var;
682 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
683 	    l2tp_ec.ec_if);
684 
685 	var = l2tp_getref_variant(sc, &psref);
686 	if (var == NULL) {
687 		m_freem(m);
688 		return ENETDOWN;
689 	}
690 
691 	if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
692 		m_freem(m);
693 		error = ENETDOWN;
694 		goto out;
695 	}
696 
697 	m->m_flags &= ~(M_BCAST|M_MCAST);
698 
699 	error = l2tp_tx_enqueue(var, m);
700 out:
701 	l2tp_putref_variant(var, &psref);
702 	return error;
703 }
704 
705 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
706 int
707 l2tp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
708 {
709 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
710 	    l2tp_ec.ec_if);
711 	struct l2tp_variant *var, *var_tmp;
712 	struct ifreq     *ifr = data;
713 	int error = 0, size;
714 	struct sockaddr *dst, *src;
715 	struct l2tp_req l2tpr;
716 	u_long mtu;
717 	int bound;
718 	struct psref psref;
719 
720 	switch (cmd) {
721 	case SIOCSIFADDR:
722 		ifp->if_flags |= IFF_UP;
723 		break;
724 
725 	case SIOCSIFDSTADDR:
726 		break;
727 
728 	case SIOCADDMULTI:
729 	case SIOCDELMULTI:
730 		switch (ifr->ifr_addr.sa_family) {
731 #ifdef INET
732 		case AF_INET:	/* IP supports Multicast */
733 			break;
734 #endif /* INET */
735 #ifdef INET6
736 		case AF_INET6:	/* IP6 supports Multicast */
737 			break;
738 #endif /* INET6 */
739 		default:  /* Other protocols doesn't support Multicast */
740 			error = EAFNOSUPPORT;
741 			break;
742 		}
743 		break;
744 
745 	case SIOCSIFMTU:
746 		mtu = ifr->ifr_mtu;
747 		if (mtu < L2TP_MTU_MIN || mtu > L2TP_MTU_MAX)
748 			return (EINVAL);
749 		ifp->if_mtu = mtu;
750 		break;
751 
752 #ifdef INET
753 	case SIOCSIFPHYADDR:
754 		src = (struct sockaddr *)
755 			&(((struct in_aliasreq *)data)->ifra_addr);
756 		dst = (struct sockaddr *)
757 			&(((struct in_aliasreq *)data)->ifra_dstaddr);
758 		if (src->sa_family != AF_INET || dst->sa_family != AF_INET)
759 			return EAFNOSUPPORT;
760 		else if (src->sa_len != sizeof(struct sockaddr_in)
761 		    || dst->sa_len != sizeof(struct sockaddr_in))
762 			return EINVAL;
763 
764 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
765 		break;
766 
767 #endif /* INET */
768 #ifdef INET6
769 	case SIOCSIFPHYADDR_IN6:
770 		src = (struct sockaddr *)
771 			&(((struct in6_aliasreq *)data)->ifra_addr);
772 		dst = (struct sockaddr *)
773 			&(((struct in6_aliasreq *)data)->ifra_dstaddr);
774 		if (src->sa_family != AF_INET6 || dst->sa_family != AF_INET6)
775 			return EAFNOSUPPORT;
776 		else if (src->sa_len != sizeof(struct sockaddr_in6)
777 		    || dst->sa_len != sizeof(struct sockaddr_in6))
778 			return EINVAL;
779 
780 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
781 		break;
782 
783 #endif /* INET6 */
784 	case SIOCSLIFPHYADDR:
785 		src = (struct sockaddr *)
786 			&(((struct if_laddrreq *)data)->addr);
787 		dst = (struct sockaddr *)
788 			&(((struct if_laddrreq *)data)->dstaddr);
789 		if (src->sa_family != dst->sa_family)
790 			return EINVAL;
791 		else if (src->sa_family == AF_INET
792 		    && src->sa_len != sizeof(struct sockaddr_in))
793 			return EINVAL;
794 		else if (src->sa_family == AF_INET6
795 		    && src->sa_len != sizeof(struct sockaddr_in6))
796 			return EINVAL;
797 		else if (dst->sa_family == AF_INET
798 		    && dst->sa_len != sizeof(struct sockaddr_in))
799 			return EINVAL;
800 		else if (dst->sa_family == AF_INET6
801 		    && dst->sa_len != sizeof(struct sockaddr_in6))
802 			return EINVAL;
803 
804 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
805 		break;
806 
807 	case SIOCDIFPHYADDR:
808 		l2tp_delete_tunnel(&sc->l2tp_ec.ec_if);
809 		break;
810 
811 	case SIOCGIFPSRCADDR:
812 #ifdef INET6
813 	case SIOCGIFPSRCADDR_IN6:
814 #endif /* INET6 */
815 		bound = curlwp_bind();
816 		var = l2tp_getref_variant(sc, &psref);
817 		if (var == NULL) {
818 			curlwp_bindx(bound);
819 			error = EADDRNOTAVAIL;
820 			goto bad;
821 		}
822 		if (var->lv_psrc == NULL) {
823 			l2tp_putref_variant(var, &psref);
824 			curlwp_bindx(bound);
825 			error = EADDRNOTAVAIL;
826 			goto bad;
827 		}
828 		src = var->lv_psrc;
829 		switch (cmd) {
830 #ifdef INET
831 		case SIOCGIFPSRCADDR:
832 			dst = &ifr->ifr_addr;
833 			size = sizeof(ifr->ifr_addr);
834 			break;
835 #endif /* INET */
836 #ifdef INET6
837 		case SIOCGIFPSRCADDR_IN6:
838 			dst = (struct sockaddr *)
839 				&(((struct in6_ifreq *)data)->ifr_addr);
840 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
841 			break;
842 #endif /* INET6 */
843 		default:
844 			l2tp_putref_variant(var, &psref);
845 			curlwp_bindx(bound);
846 			error = EADDRNOTAVAIL;
847 			goto bad;
848 		}
849 		if (src->sa_len > size) {
850 			l2tp_putref_variant(var, &psref);
851 			curlwp_bindx(bound);
852 			return EINVAL;
853 		}
854 		sockaddr_copy(dst, src->sa_len, src);
855 		l2tp_putref_variant(var, &psref);
856 		curlwp_bindx(bound);
857 		break;
858 
859 	case SIOCGIFPDSTADDR:
860 #ifdef INET6
861 	case SIOCGIFPDSTADDR_IN6:
862 #endif /* INET6 */
863 		bound = curlwp_bind();
864 		var = l2tp_getref_variant(sc, &psref);
865 		if (var == NULL) {
866 			curlwp_bindx(bound);
867 			error = EADDRNOTAVAIL;
868 			goto bad;
869 		}
870 		if (var->lv_pdst == NULL) {
871 			l2tp_putref_variant(var, &psref);
872 			curlwp_bindx(bound);
873 			error = EADDRNOTAVAIL;
874 			goto bad;
875 		}
876 		src = var->lv_pdst;
877 		switch (cmd) {
878 #ifdef INET
879 		case SIOCGIFPDSTADDR:
880 			dst = &ifr->ifr_addr;
881 			size = sizeof(ifr->ifr_addr);
882 			break;
883 #endif /* INET */
884 #ifdef INET6
885 		case SIOCGIFPDSTADDR_IN6:
886 			dst = (struct sockaddr *)
887 				&(((struct in6_ifreq *)data)->ifr_addr);
888 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
889 			break;
890 #endif /* INET6 */
891 		default:
892 			l2tp_putref_variant(var, &psref);
893 			curlwp_bindx(bound);
894 			error = EADDRNOTAVAIL;
895 			goto bad;
896 		}
897 		if (src->sa_len > size) {
898 			l2tp_putref_variant(var, &psref);
899 			curlwp_bindx(bound);
900 			return EINVAL;
901 		}
902 		sockaddr_copy(dst, src->sa_len, src);
903 		l2tp_putref_variant(var, &psref);
904 		curlwp_bindx(bound);
905 		break;
906 
907 	case SIOCGLIFPHYADDR:
908 		bound = curlwp_bind();
909 		var = l2tp_getref_variant(sc, &psref);
910 		if (var == NULL) {
911 			curlwp_bindx(bound);
912 			error = EADDRNOTAVAIL;
913 			goto bad;
914 		}
915 		if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
916 			l2tp_putref_variant(var, &psref);
917 			curlwp_bindx(bound);
918 			error = EADDRNOTAVAIL;
919 			goto bad;
920 		}
921 
922 		/* copy src */
923 		src = var->lv_psrc;
924 		dst = (struct sockaddr *)
925 			&(((struct if_laddrreq *)data)->addr);
926 		size = sizeof(((struct if_laddrreq *)data)->addr);
927 		if (src->sa_len > size) {
928 			l2tp_putref_variant(var, &psref);
929 			curlwp_bindx(bound);
930 			return EINVAL;
931                 }
932 		sockaddr_copy(dst, src->sa_len, src);
933 
934 		/* copy dst */
935 		src = var->lv_pdst;
936 		dst = (struct sockaddr *)
937 			&(((struct if_laddrreq *)data)->dstaddr);
938 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
939 		if (src->sa_len > size) {
940 			l2tp_putref_variant(var, &psref);
941 			curlwp_bindx(bound);
942 			return EINVAL;
943                 }
944 		sockaddr_copy(dst, src->sa_len, src);
945 		l2tp_putref_variant(var, &psref);
946 		curlwp_bindx(bound);
947 		break;
948 
949 	case SIOCSL2TPSESSION:
950 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
951 			break;
952 
953 		/* session id must not zero */
954 		if (l2tpr.my_sess_id == 0 || l2tpr.peer_sess_id == 0)
955 			return EINVAL;
956 
957 		bound = curlwp_bind();
958 		var_tmp = l2tp_lookup_session_ref(l2tpr.my_sess_id, &psref);
959 		if (var_tmp != NULL) {
960 			/* duplicate session id */
961 			log(LOG_WARNING, "%s: duplicate session id %" PRIu32 " of %s\n",
962 				sc->l2tp_ec.ec_if.if_xname, l2tpr.my_sess_id,
963 				var_tmp->lv_softc->l2tp_ec.ec_if.if_xname);
964 			psref_release(&psref, &var_tmp->lv_psref,
965 			    lv_psref_class);
966 			curlwp_bindx(bound);
967 			return EINVAL;
968 		}
969 		curlwp_bindx(bound);
970 
971 		error = l2tp_set_session(sc, l2tpr.my_sess_id, l2tpr.peer_sess_id);
972 		break;
973 	case SIOCDL2TPSESSION:
974 		l2tp_clear_session(sc);
975 		break;
976 	case SIOCSL2TPCOOKIE:
977 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
978 			break;
979 
980 		error = l2tp_set_cookie(sc, l2tpr.my_cookie, l2tpr.my_cookie_len,
981 		    l2tpr.peer_cookie, l2tpr.peer_cookie_len);
982 		break;
983 	case SIOCDL2TPCOOKIE:
984 		l2tp_clear_cookie(sc);
985 		break;
986 	case SIOCSL2TPSTATE:
987 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
988 			break;
989 
990 		l2tp_set_state(sc, l2tpr.state);
991 		break;
992 	case SIOCGL2TP:
993 		/* get L2TPV3 session info */
994 		memset(&l2tpr, 0, sizeof(l2tpr));
995 
996 		bound = curlwp_bind();
997 		var = l2tp_getref_variant(sc, &psref);
998 		if (var == NULL) {
999 			curlwp_bindx(bound);
1000 			error = EADDRNOTAVAIL;
1001 			goto bad;
1002 		}
1003 
1004 		l2tpr.state = var->lv_state;
1005 		l2tpr.my_sess_id = var->lv_my_sess_id;
1006 		l2tpr.peer_sess_id = var->lv_peer_sess_id;
1007 		l2tpr.my_cookie = var->lv_my_cookie;
1008 		l2tpr.my_cookie_len = var->lv_my_cookie_len;
1009 		l2tpr.peer_cookie = var->lv_peer_cookie;
1010 		l2tpr.peer_cookie_len = var->lv_peer_cookie_len;
1011 		l2tp_putref_variant(var, &psref);
1012 		curlwp_bindx(bound);
1013 
1014 		error = copyout(&l2tpr, ifr->ifr_data, sizeof(l2tpr));
1015 		break;
1016 
1017 	default:
1018 		error =	ifioctl_common(ifp, cmd, data);
1019 		break;
1020 	}
1021  bad:
1022 	return error;
1023 }
1024 
1025 static int
1026 l2tp_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
1027 {
1028 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
1029 	    l2tp_ec.ec_if);
1030 	struct sockaddr *osrc, *odst;
1031 	struct sockaddr *nsrc, *ndst;
1032 	struct l2tp_variant *ovar, *nvar;
1033 	int error;
1034 
1035 	nsrc = sockaddr_dup(src, M_WAITOK);
1036 	ndst = sockaddr_dup(dst, M_WAITOK);
1037 
1038 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1039 
1040 	error = encap_lock_enter();
1041 	if (error)
1042 		goto error;
1043 
1044 	mutex_enter(&sc->l2tp_lock);
1045 
1046 	ovar = sc->l2tp_var;
1047 	osrc = ovar->lv_psrc;
1048 	odst = ovar->lv_pdst;
1049 	*nvar = *ovar;
1050 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1051 	nvar->lv_psrc = nsrc;
1052 	nvar->lv_pdst = ndst;
1053 	error = l2tp_encap_attach(nvar);
1054 	if (error) {
1055 		mutex_exit(&sc->l2tp_lock);
1056 		encap_lock_exit();
1057 		goto error;
1058 	}
1059 	l2tp_variant_update(sc, nvar);
1060 
1061 	mutex_exit(&sc->l2tp_lock);
1062 
1063 	(void)l2tp_encap_detach(ovar);
1064 	encap_lock_exit();
1065 
1066 	if (osrc)
1067 		sockaddr_free(osrc);
1068 	if (odst)
1069 		sockaddr_free(odst);
1070 	kmem_free(ovar, sizeof(*ovar));
1071 	return 0;
1072 
1073 error:
1074 	sockaddr_free(nsrc);
1075 	sockaddr_free(ndst);
1076 	kmem_free(nvar, sizeof(*nvar));
1077 
1078 	return error;
1079 }
1080 
1081 static void
1082 l2tp_delete_tunnel(struct ifnet *ifp)
1083 {
1084 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
1085 	    l2tp_ec.ec_if);
1086 	struct sockaddr *osrc, *odst;
1087 	struct l2tp_variant *ovar, *nvar;
1088 	int error;
1089 
1090 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1091 
1092 	error = encap_lock_enter();
1093 	if (error) {
1094 		kmem_free(nvar, sizeof(*nvar));
1095 		return;
1096 	}
1097 	mutex_enter(&sc->l2tp_lock);
1098 
1099 	ovar = sc->l2tp_var;
1100 	osrc = ovar->lv_psrc;
1101 	odst = ovar->lv_pdst;
1102 	*nvar = *ovar;
1103 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1104 	nvar->lv_psrc = NULL;
1105 	nvar->lv_pdst = NULL;
1106 	l2tp_variant_update(sc, nvar);
1107 
1108 	mutex_exit(&sc->l2tp_lock);
1109 
1110 	(void)l2tp_encap_detach(ovar);
1111 	encap_lock_exit();
1112 
1113 	if (osrc)
1114 		sockaddr_free(osrc);
1115 	if (odst)
1116 		sockaddr_free(odst);
1117 	kmem_free(ovar, sizeof(*ovar));
1118 }
1119 
1120 static int
1121 id_hash_func(uint32_t id, u_long mask)
1122 {
1123 	uint32_t hash;
1124 
1125 	hash = (id >> 16) ^ id;
1126 	hash = (hash >> 4) ^ hash;
1127 
1128 	return hash & mask;
1129 }
1130 
1131 static void
1132 l2tp_hash_init(void)
1133 {
1134 
1135 	l2tp_hash.lists = hashinit(L2TP_ID_HASH_SIZE, HASH_PSLIST, true,
1136 	    &l2tp_hash.mask);
1137 }
1138 
1139 static int
1140 l2tp_hash_fini(void)
1141 {
1142 	int i;
1143 
1144 	mutex_enter(&l2tp_hash.lock);
1145 
1146 	for (i = 0; i < l2tp_hash.mask + 1; i++) {
1147 		if (PSLIST_WRITER_FIRST(&l2tp_hash.lists[i], struct l2tp_softc,
1148 			l2tp_hash) != NULL) {
1149 			mutex_exit(&l2tp_hash.lock);
1150 			return EBUSY;
1151 		}
1152 	}
1153 	for (i = 0; i < l2tp_hash.mask + 1; i++)
1154 		PSLIST_DESTROY(&l2tp_hash.lists[i]);
1155 
1156 	mutex_exit(&l2tp_hash.lock);
1157 
1158 	hashdone(l2tp_hash.lists, HASH_PSLIST, l2tp_hash.mask);
1159 
1160 	return 0;
1161 }
1162 
1163 static int
1164 l2tp_set_session(struct l2tp_softc *sc, uint32_t my_sess_id,
1165     uint32_t peer_sess_id)
1166 {
1167 	uint32_t idx;
1168 	struct l2tp_variant *nvar;
1169 	struct l2tp_variant *ovar;
1170 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1171 
1172 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1173 
1174 	mutex_enter(&sc->l2tp_lock);
1175 	ovar = sc->l2tp_var;
1176 	*nvar = *ovar;
1177 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1178 	nvar->lv_my_sess_id = my_sess_id;
1179 	nvar->lv_peer_sess_id = peer_sess_id;
1180 
1181 	mutex_enter(&l2tp_hash.lock);
1182 	if (ovar->lv_my_sess_id > 0 && ovar->lv_peer_sess_id > 0) {
1183 		PSLIST_WRITER_REMOVE(sc, l2tp_hash);
1184 		pserialize_perform(l2tp_psz);
1185 	}
1186 	mutex_exit(&l2tp_hash.lock);
1187 	PSLIST_ENTRY_DESTROY(sc, l2tp_hash);
1188 
1189 	l2tp_variant_update(sc, nvar);
1190 	mutex_exit(&sc->l2tp_lock);
1191 
1192 	idx = id_hash_func(nvar->lv_my_sess_id, l2tp_hash.mask);
1193 	if ((ifp->if_flags & IFF_DEBUG) != 0)
1194 		log(LOG_DEBUG, "%s: add hash entry: sess_id=%" PRIu32 ", idx=%" PRIu32 "\n",
1195 		    sc->l2tp_ec.ec_if.if_xname, nvar->lv_my_sess_id, idx);
1196 
1197 	PSLIST_ENTRY_INIT(sc, l2tp_hash);
1198 	mutex_enter(&l2tp_hash.lock);
1199 	PSLIST_WRITER_INSERT_HEAD(&l2tp_hash.lists[idx], sc, l2tp_hash);
1200 	mutex_exit(&l2tp_hash.lock);
1201 
1202 	kmem_free(ovar, sizeof(*ovar));
1203 	return 0;
1204 }
1205 
1206 static int
1207 l2tp_clear_session(struct l2tp_softc *sc)
1208 {
1209 	struct l2tp_variant *nvar;
1210 	struct l2tp_variant *ovar;
1211 
1212 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1213 
1214 	mutex_enter(&sc->l2tp_lock);
1215 	ovar = sc->l2tp_var;
1216 	*nvar = *ovar;
1217 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1218 	nvar->lv_my_sess_id = 0;
1219 	nvar->lv_peer_sess_id = 0;
1220 
1221 	mutex_enter(&l2tp_hash.lock);
1222 	if (ovar->lv_my_sess_id > 0 && ovar->lv_peer_sess_id > 0) {
1223 		PSLIST_WRITER_REMOVE(sc, l2tp_hash);
1224 		pserialize_perform(l2tp_psz);
1225 	}
1226 	mutex_exit(&l2tp_hash.lock);
1227 
1228 	l2tp_variant_update(sc, nvar);
1229 	mutex_exit(&sc->l2tp_lock);
1230 	kmem_free(ovar, sizeof(*ovar));
1231 	return 0;
1232 }
1233 
1234 struct l2tp_variant *
1235 l2tp_lookup_session_ref(uint32_t id, struct psref *psref)
1236 {
1237 	int idx;
1238 	int s;
1239 	struct l2tp_softc *sc;
1240 
1241 	idx = id_hash_func(id, l2tp_hash.mask);
1242 
1243 	s = pserialize_read_enter();
1244 	PSLIST_READER_FOREACH(sc, &l2tp_hash.lists[idx], struct l2tp_softc,
1245 	    l2tp_hash) {
1246 		struct l2tp_variant *var = atomic_load_consume(&sc->l2tp_var);
1247 		if (var == NULL)
1248 			continue;
1249 		if (var->lv_my_sess_id != id)
1250 			continue;
1251 		psref_acquire(psref, &var->lv_psref, lv_psref_class);
1252 		pserialize_read_exit(s);
1253 		return var;
1254 	}
1255 	pserialize_read_exit(s);
1256 	return NULL;
1257 }
1258 
1259 /*
1260  * l2tp_variant update API.
1261  *
1262  * Assumption:
1263  * reader side dereferences sc->l2tp_var in reader critical section only,
1264  * that is, all of reader sides do not reader the sc->l2tp_var after
1265  * pserialize_perform().
1266  */
1267 static void
1268 l2tp_variant_update(struct l2tp_softc *sc, struct l2tp_variant *nvar)
1269 {
1270 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1271 	struct l2tp_variant *ovar = sc->l2tp_var;
1272 
1273 	KASSERT(mutex_owned(&sc->l2tp_lock));
1274 
1275 	atomic_store_release(&sc->l2tp_var, nvar);
1276 	pserialize_perform(sc->l2tp_psz);
1277 	psref_target_destroy(&ovar->lv_psref, lv_psref_class);
1278 
1279 	if (nvar != NULL) {
1280 		if (nvar->lv_psrc != NULL && nvar->lv_pdst != NULL)
1281 			ifp->if_flags |= IFF_RUNNING;
1282 		else
1283 			ifp->if_flags &= ~IFF_RUNNING;
1284 	}
1285 }
1286 
1287 static int
1288 l2tp_set_cookie(struct l2tp_softc *sc, uint64_t my_cookie, u_int my_cookie_len,
1289     uint64_t peer_cookie, u_int peer_cookie_len)
1290 {
1291 	struct l2tp_variant *nvar;
1292 
1293 	if (my_cookie == 0 || peer_cookie == 0)
1294 		return EINVAL;
1295 
1296 	if (my_cookie_len != 4 && my_cookie_len != 8
1297 	    && peer_cookie_len != 4 && peer_cookie_len != 8)
1298 		return EINVAL;
1299 
1300 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1301 
1302 	mutex_enter(&sc->l2tp_lock);
1303 
1304 	*nvar = *sc->l2tp_var;
1305 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1306 	nvar->lv_my_cookie = my_cookie;
1307 	nvar->lv_my_cookie_len = my_cookie_len;
1308 	nvar->lv_peer_cookie = peer_cookie;
1309 	nvar->lv_peer_cookie_len = peer_cookie_len;
1310 	nvar->lv_use_cookie = L2TP_COOKIE_ON;
1311 	l2tp_variant_update(sc, nvar);
1312 
1313 	mutex_exit(&sc->l2tp_lock);
1314 
1315 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1316 	if ((ifp->if_flags & IFF_DEBUG) != 0) {
1317 		log(LOG_DEBUG,
1318 		    "%s: set cookie: "
1319 		    "local cookie_len=%u local cookie=%" PRIu64 ", "
1320 		    "remote cookie_len=%u remote cookie=%" PRIu64 "\n",
1321 		    ifp->if_xname, my_cookie_len, my_cookie,
1322 		    peer_cookie_len, peer_cookie);
1323 	}
1324 
1325 	return 0;
1326 }
1327 
1328 static void
1329 l2tp_clear_cookie(struct l2tp_softc *sc)
1330 {
1331 	struct l2tp_variant *nvar;
1332 
1333 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1334 
1335 	mutex_enter(&sc->l2tp_lock);
1336 
1337 	*nvar = *sc->l2tp_var;
1338 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1339 	nvar->lv_my_cookie = 0;
1340 	nvar->lv_my_cookie_len = 0;
1341 	nvar->lv_peer_cookie = 0;
1342 	nvar->lv_peer_cookie_len = 0;
1343 	nvar->lv_use_cookie = L2TP_COOKIE_OFF;
1344 	l2tp_variant_update(sc, nvar);
1345 
1346 	mutex_exit(&sc->l2tp_lock);
1347 }
1348 
1349 static void
1350 l2tp_set_state(struct l2tp_softc *sc, int state)
1351 {
1352 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1353 	struct l2tp_variant *nvar;
1354 	int ostate;
1355 
1356 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1357 
1358 	mutex_enter(&sc->l2tp_lock);
1359 
1360 	*nvar = *sc->l2tp_var;
1361 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1362 	ostate = nvar->lv_state;
1363 	nvar->lv_state = state;
1364 	l2tp_variant_update(sc, nvar);
1365 	mutex_exit(&sc->l2tp_lock);
1366 
1367 	if (ostate != state) {
1368 		int lstate;
1369 
1370 		if (state == L2TP_STATE_UP)
1371 			lstate = LINK_STATE_UP;
1372 		else
1373 			lstate = LINK_STATE_DOWN;
1374 
1375 		if_link_state_change(ifp, lstate);
1376 	}
1377 
1378 #ifdef NOTYET
1379 	vlan_linkstate_notify(ifp, ifp->if_link_state);
1380 #endif
1381 }
1382 
1383 static int
1384 l2tp_encap_attach(struct l2tp_variant *var)
1385 {
1386 	int error;
1387 
1388 	if (var == NULL || var->lv_psrc == NULL)
1389 		return EINVAL;
1390 
1391 	switch (var->lv_psrc->sa_family) {
1392 #ifdef INET
1393 	case AF_INET:
1394 		error = in_l2tp_attach(var);
1395 		break;
1396 #endif
1397 #ifdef INET6
1398 	case AF_INET6:
1399 		error = in6_l2tp_attach(var);
1400 		break;
1401 #endif
1402 	default:
1403 		error = EINVAL;
1404 		break;
1405 	}
1406 
1407 	return error;
1408 }
1409 
1410 static int
1411 l2tp_encap_detach(struct l2tp_variant *var)
1412 {
1413 	int error;
1414 
1415 	if (var == NULL || var->lv_psrc == NULL)
1416 		return EINVAL;
1417 
1418 	switch (var->lv_psrc->sa_family) {
1419 #ifdef INET
1420 	case AF_INET:
1421 		error = in_l2tp_detach(var);
1422 		break;
1423 #endif
1424 #ifdef INET6
1425 	case AF_INET6:
1426 		error = in6_l2tp_detach(var);
1427 		break;
1428 #endif
1429 	default:
1430 		error = EINVAL;
1431 		break;
1432 	}
1433 
1434 	return error;
1435 }
1436 
1437 int
1438 l2tp_check_nesting(struct ifnet *ifp, struct mbuf *m)
1439 {
1440 
1441 	return if_tunnel_check_nesting(ifp, m, max_l2tp_nesting);
1442 }
1443 
1444 /*
1445  * Module infrastructure
1446  */
1447 #include "if_module.h"
1448 
1449 IF_MODULE(MODULE_CLASS_DRIVER, l2tp, NULL)
1450 
1451 
1452 /* TODO: IP_TCPMSS support */
1453 #ifdef IP_TCPMSS
1454 static int l2tp_need_tcpmss_clamp(struct ifnet *);
1455 #ifdef INET
1456 static struct mbuf *l2tp_tcpmss4_clamp(struct ifnet *, struct mbuf *);
1457 #endif
1458 #ifdef INET6
1459 static struct mbuf *l2tp_tcpmss6_clamp(struct ifnet *, struct mbuf *);
1460 #endif
1461 
1462 struct mbuf *
1463 l2tp_tcpmss_clamp(struct ifnet *ifp, struct mbuf *m)
1464 {
1465 	struct ether_header *eh;
1466 	struct ether_vlan_header evh;
1467 
1468 	if (!l2tp_need_tcpmss_clamp(ifp)) {
1469 		return m;
1470 	}
1471 
1472 	if (m->m_pkthdr.len < sizeof(evh)) {
1473 		m_freem(m);
1474 		return NULL;
1475 	}
1476 
1477 	/* save ether header */
1478 	m_copydata(m, 0, sizeof(evh), (void *)&evh);
1479 	eh = (struct ether_header *)&evh;
1480 
1481 	switch (ntohs(eh->ether_type)) {
1482 	case ETHERTYPE_VLAN: /* Ether + VLAN */
1483 		if (m->m_pkthdr.len <= sizeof(struct ether_vlan_header))
1484 			break;
1485 		m_adj(m, sizeof(struct ether_vlan_header));
1486 		switch (ntohs(evh.evl_proto)) {
1487 #ifdef INET
1488 		case ETHERTYPE_IP: /* Ether + VLAN + IPv4 */
1489 			m = l2tp_tcpmss4_clamp(ifp, m);
1490 			if (m == NULL)
1491 				return NULL;
1492 			break;
1493 #endif /* INET */
1494 #ifdef INET6
1495 		case ETHERTYPE_IPV6: /* Ether + VLAN + IPv6 */
1496 			m = l2tp_tcpmss6_clamp(ifp, m);
1497 			if (m == NULL)
1498 				return NULL;
1499 			break;
1500 #endif /* INET6 */
1501 		default:
1502 			break;
1503 		}
1504 
1505 		/* restore ether header */
1506 		M_PREPEND(m, sizeof(struct ether_vlan_header),
1507 		    M_DONTWAIT);
1508 		if (m == NULL)
1509 			return NULL;
1510 		*mtod(m, struct ether_vlan_header *) = evh;
1511 		break;
1512 
1513 #ifdef INET
1514 	case ETHERTYPE_IP: /* Ether + IPv4 */
1515 		if (m->m_pkthdr.len <= sizeof(struct ether_header))
1516 			break;
1517 		m_adj(m, sizeof(struct ether_header));
1518 		m = l2tp_tcpmss4_clamp(ifp, m);
1519 		if (m == NULL)
1520 			return NULL;
1521 		/* restore ether header */
1522 		M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
1523 		if (m == NULL)
1524 			return NULL;
1525 		*mtod(m, struct ether_header *) = *eh;
1526 		break;
1527 #endif /* INET */
1528 
1529 #ifdef INET6
1530 	case ETHERTYPE_IPV6: /* Ether + IPv6 */
1531 		if (m->m_pkthdr.len <= sizeof(struct ether_header))
1532 			break;
1533 		m_adj(m, sizeof(struct ether_header));
1534 		m = l2tp_tcpmss6_clamp(ifp, m);
1535 		if (m == NULL)
1536 			return NULL;
1537 		/* restore ether header */
1538 		M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
1539 		if (m == NULL)
1540 			return NULL;
1541 		*mtod(m, struct ether_header *) = *eh;
1542 		break;
1543 #endif /* INET6 */
1544 
1545 	default:
1546 		break;
1547 	}
1548 
1549 	return m;
1550 }
1551 
1552 static int
1553 l2tp_need_tcpmss_clamp(struct ifnet *ifp)
1554 {
1555 	int ret = 0;
1556 
1557 #ifdef INET
1558 	if (ifp->if_tcpmss != 0)
1559 		ret = 1;
1560 #endif
1561 
1562 #ifdef INET6
1563 	if (ifp->if_tcpmss6 != 0)
1564 		ret = 1;
1565 #endif
1566 
1567 	return ret;
1568 }
1569 
1570 #ifdef INET
1571 static struct mbuf *
1572 l2tp_tcpmss4_clamp(struct ifnet *ifp, struct mbuf *m)
1573 {
1574 
1575 	if (ifp->if_tcpmss != 0) {
1576 		return ip_tcpmss(m, (ifp->if_tcpmss < 0) ?
1577 			ifp->if_mtu - IP_TCPMSS_EXTLEN :
1578 			ifp->if_tcpmss);
1579 	}
1580 	return m;
1581 }
1582 #endif /* INET */
1583 
1584 #ifdef INET6
1585 static struct mbuf *
1586 l2tp_tcpmss6_clamp(struct ifnet *ifp, struct mbuf *m)
1587 {
1588 	int ip6hdrlen;
1589 
1590 	if (ifp->if_tcpmss6 != 0 &&
1591 	    ip6_tcpmss_applicable(m, &ip6hdrlen)) {
1592 		return ip6_tcpmss(m, ip6hdrlen,
1593 			(ifp->if_tcpmss6 < 0) ?
1594 			ifp->if_mtu - IP6_TCPMSS_EXTLEN :
1595 			ifp->if_tcpmss6);
1596 	}
1597 	return m;
1598 }
1599 #endif /* INET6 */
1600 
1601 #endif /* IP_TCPMSS */
1602