xref: /netbsd-src/sys/net/if_l2tp.c (revision 122b5006ee1bd67145794b4cde92f4fe4781a5ec)
1 /*	$NetBSD: if_l2tp.c,v 1.47 2021/06/16 00:21:19 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * L2TPv3 kernel interface
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: if_l2tp.c,v 1.47 2021/06/16 00:21:19 riastradh Exp $");
35 
36 #ifdef _KERNEL_OPT
37 #include "opt_inet.h"
38 #include "opt_net_mpsafe.h"
39 #endif
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/mbuf.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/errno.h>
48 #include <sys/ioctl.h>
49 #include <sys/time.h>
50 #include <sys/syslog.h>
51 #include <sys/proc.h>
52 #include <sys/conf.h>
53 #include <sys/kauth.h>
54 #include <sys/cpu.h>
55 #include <sys/cprng.h>
56 #include <sys/intr.h>
57 #include <sys/kmem.h>
58 #include <sys/mutex.h>
59 #include <sys/atomic.h>
60 #include <sys/pserialize.h>
61 #include <sys/device.h>
62 #include <sys/module.h>
63 
64 #include <net/if.h>
65 #include <net/if_dl.h>
66 #include <net/if_ether.h>
67 #include <net/if_types.h>
68 #include <net/netisr.h>
69 #include <net/route.h>
70 #include <net/bpf.h>
71 #include <net/if_vlanvar.h>
72 
73 #include <netinet/in.h>
74 #include <netinet/in_systm.h>
75 #include <netinet/ip.h>
76 #include <netinet/ip_encap.h>
77 #ifdef	INET
78 #include <netinet/in_var.h>
79 #include <netinet/in_l2tp.h>
80 #endif	/* INET */
81 #ifdef INET6
82 #include <netinet6/in6_l2tp.h>
83 #endif
84 
85 #include <net/if_l2tp.h>
86 
87 #include <net/if_vlanvar.h>
88 
89 /* TODO: IP_TCPMSS support */
90 #undef IP_TCPMSS
91 #ifdef IP_TCPMSS
92 #include <netinet/ip_tcpmss.h>
93 #endif
94 
95 /*
96  * l2tp global variable definitions
97  */
98 static struct {
99 	LIST_HEAD(l2tp_sclist, l2tp_softc) list;
100 	kmutex_t lock;
101 } l2tp_softcs __cacheline_aligned;
102 
103 
104 #if !defined(L2TP_ID_HASH_SIZE)
105 #define L2TP_ID_HASH_SIZE 64
106 #endif
107 static struct {
108 	kmutex_t lock;
109 	struct pslist_head *lists;
110 	u_long mask;
111 } l2tp_hash __cacheline_aligned = {
112 	.lists = NULL,
113 };
114 
115 pserialize_t l2tp_psz __read_mostly;
116 struct psref_class *lv_psref_class __read_mostly;
117 
118 static void	l2tp_ifq_init_pc(void *, void *, struct cpu_info *);
119 static void	l2tp_ifq_fini_pc(void *, void *, struct cpu_info *);
120 
121 static int	l2tp_clone_create(struct if_clone *, int);
122 static int	l2tp_clone_destroy(struct ifnet *);
123 
124 struct if_clone l2tp_cloner =
125     IF_CLONE_INITIALIZER("l2tp", l2tp_clone_create, l2tp_clone_destroy);
126 
127 static int	l2tp_tx_enqueue(struct l2tp_variant *, struct mbuf *);
128 static int	l2tp_output(struct ifnet *, struct mbuf *,
129 		    const struct sockaddr *, const struct rtentry *);
130 static void	l2tp_sendit(struct l2tp_variant *, struct mbuf *);
131 static void	l2tpintr(struct l2tp_variant *);
132 static void	l2tpintr_softint(void *);
133 
134 static void	l2tp_hash_init(void);
135 static int	l2tp_hash_fini(void);
136 
137 static void	l2tp_start(struct ifnet *);
138 static int	l2tp_transmit(struct ifnet *, struct mbuf *);
139 
140 static int	l2tp_set_tunnel(struct ifnet *, struct sockaddr *,
141 		    struct sockaddr *);
142 static void	l2tp_delete_tunnel(struct ifnet *);
143 
144 static int	id_hash_func(uint32_t, u_long);
145 
146 static void	l2tp_variant_update(struct l2tp_softc *, struct l2tp_variant *);
147 static int	l2tp_set_session(struct l2tp_softc *, uint32_t, uint32_t);
148 static int	l2tp_clear_session(struct l2tp_softc *);
149 static int	l2tp_set_cookie(struct l2tp_softc *, uint64_t, u_int, uint64_t, u_int);
150 static void	l2tp_clear_cookie(struct l2tp_softc *);
151 static void	l2tp_set_state(struct l2tp_softc *, int);
152 static int	l2tp_encap_attach(struct l2tp_variant *);
153 static int	l2tp_encap_detach(struct l2tp_variant *);
154 
155 static inline struct ifqueue *
156 l2tp_ifq_percpu_getref(percpu_t *pc)
157 {
158 
159 	return *(struct ifqueue **)percpu_getref(pc);
160 }
161 
162 static inline void
163 l2tp_ifq_percpu_putref(percpu_t *pc)
164 {
165 
166 	percpu_putref(pc);
167 }
168 
169 #ifndef MAX_L2TP_NEST
170 /*
171  * This macro controls the upper limitation on nesting of l2tp tunnels.
172  * Since, setting a large value to this macro with a careless configuration
173  * may introduce system crash, we don't allow any nestings by default.
174  * If you need to configure nested l2tp tunnels, you can define this macro
175  * in your kernel configuration file.  However, if you do so, please be
176  * careful to configure the tunnels so that it won't make a loop.
177  */
178 /*
179  * XXX
180  * Currently, if in_l2tp_output recursively calls, it causes locking against
181  * myself of struct l2tp_ro->lr_lock. So, nested l2tp tunnels is prohibited.
182  */
183 #define MAX_L2TP_NEST 0
184 #endif
185 
186 static int max_l2tp_nesting = MAX_L2TP_NEST;
187 
188 /* ARGSUSED */
189 void
190 l2tpattach(int count)
191 {
192 	/*
193 	 * Nothing to do here, initialization is handled by the
194 	 * module initialization code in l2tpinit() below).
195 	 */
196 }
197 
198 static void
199 l2tpinit(void)
200 {
201 
202 	mutex_init(&l2tp_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
203 	LIST_INIT(&l2tp_softcs.list);
204 
205 	mutex_init(&l2tp_hash.lock, MUTEX_DEFAULT, IPL_NONE);
206 	l2tp_psz = pserialize_create();
207 	lv_psref_class = psref_class_create("l2tpvar", IPL_SOFTNET);
208 	if_clone_attach(&l2tp_cloner);
209 
210 	l2tp_hash_init();
211 }
212 
213 static int
214 l2tpdetach(void)
215 {
216 	int error;
217 
218 	mutex_enter(&l2tp_softcs.lock);
219 	if (!LIST_EMPTY(&l2tp_softcs.list)) {
220 		mutex_exit(&l2tp_softcs.lock);
221 		return EBUSY;
222 	}
223 	mutex_exit(&l2tp_softcs.lock);
224 
225 	error = l2tp_hash_fini();
226 	if (error)
227 		return error;
228 
229 	if_clone_detach(&l2tp_cloner);
230 	psref_class_destroy(lv_psref_class);
231 	pserialize_destroy(l2tp_psz);
232 	mutex_destroy(&l2tp_hash.lock);
233 
234 	mutex_destroy(&l2tp_softcs.lock);
235 
236 	return error;
237 }
238 
239 static int
240 l2tp_clone_create(struct if_clone *ifc, int unit)
241 {
242 	struct l2tp_softc *sc;
243 	struct l2tp_variant *var;
244 	int rv;
245 	u_int si_flags = SOFTINT_NET;
246 #ifdef NET_MPSAFE
247 	si_flags |= SOFTINT_MPSAFE;
248 #endif
249 	sc = kmem_zalloc(sizeof(struct l2tp_softc), KM_SLEEP);
250 	if_initname(&sc->l2tp_ec.ec_if, ifc->ifc_name, unit);
251 	rv = l2tpattach0(sc);
252 	if (rv != 0) {
253 		kmem_free(sc, sizeof(struct l2tp_softc));
254 		return rv;
255 	}
256 
257 	var = kmem_zalloc(sizeof(struct l2tp_variant), KM_SLEEP);
258 	var->lv_softc = sc;
259 	var->lv_state = L2TP_STATE_DOWN;
260 	var->lv_use_cookie = L2TP_COOKIE_OFF;
261 	psref_target_init(&var->lv_psref, lv_psref_class);
262 
263 	sc->l2tp_var = var;
264 	mutex_init(&sc->l2tp_lock, MUTEX_DEFAULT, IPL_NONE);
265 	sc->l2tp_psz = pserialize_create();
266 	PSLIST_ENTRY_INIT(sc, l2tp_hash);
267 
268 	sc->l2tp_ro_percpu = if_tunnel_alloc_ro_percpu();
269 
270 	sc->l2tp_ifq_percpu = percpu_create(sizeof(struct ifqueue *),
271 	    l2tp_ifq_init_pc, l2tp_ifq_fini_pc, NULL);
272 	sc->l2tp_si = softint_establish(si_flags, l2tpintr_softint, sc);
273 
274 	mutex_enter(&l2tp_softcs.lock);
275 	LIST_INSERT_HEAD(&l2tp_softcs.list, sc, l2tp_list);
276 	mutex_exit(&l2tp_softcs.lock);
277 
278 	return (0);
279 }
280 
281 int
282 l2tpattach0(struct l2tp_softc *sc)
283 {
284 
285 	sc->l2tp_ec.ec_if.if_addrlen = 0;
286 	sc->l2tp_ec.ec_if.if_mtu    = L2TP_MTU;
287 	sc->l2tp_ec.ec_if.if_flags  = IFF_POINTOPOINT|IFF_MULTICAST|IFF_SIMPLEX;
288 #ifdef NET_MPSAFE
289 	sc->l2tp_ec.ec_if.if_extflags = IFEF_MPSAFE;
290 #endif
291 	sc->l2tp_ec.ec_if.if_ioctl  = l2tp_ioctl;
292 	sc->l2tp_ec.ec_if.if_output = l2tp_output;
293 	sc->l2tp_ec.ec_if.if_type   = IFT_L2TP;
294 	sc->l2tp_ec.ec_if.if_dlt    = DLT_NULL;
295 	sc->l2tp_ec.ec_if.if_start  = l2tp_start;
296 	sc->l2tp_ec.ec_if.if_transmit = l2tp_transmit;
297 	sc->l2tp_ec.ec_if._if_input = ether_input;
298 	IFQ_SET_READY(&sc->l2tp_ec.ec_if.if_snd);
299 
300 #ifdef MBUFTRACE
301 	struct ethercom *ec = &sc->l2tp_ec;
302 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
303 
304 	strlcpy(ec->ec_tx_mowner.mo_name, ifp->if_xname,
305 	    sizeof(ec->ec_tx_mowner.mo_name));
306 	strlcpy(ec->ec_tx_mowner.mo_descr, "tx",
307 	    sizeof(ec->ec_tx_mowner.mo_descr));
308 	strlcpy(ec->ec_rx_mowner.mo_name, ifp->if_xname,
309 	    sizeof(ec->ec_rx_mowner.mo_name));
310 	strlcpy(ec->ec_rx_mowner.mo_descr, "rx",
311 	    sizeof(ec->ec_rx_mowner.mo_descr));
312 	MOWNER_ATTACH(&ec->ec_tx_mowner);
313 	MOWNER_ATTACH(&ec->ec_rx_mowner);
314 	ifp->if_mowner = &ec->ec_tx_mowner;
315 #endif
316 
317 	/* XXX
318 	 * It may improve performance to use if_initialize()/if_register()
319 	 * so that l2tp_input() calls if_input() instead of
320 	 * if_percpuq_enqueue(). However, that causes recursive softnet_lock
321 	 * when NET_MPSAFE is not set.
322 	 */
323 	if_attach(&sc->l2tp_ec.ec_if);
324 	if_link_state_change(&sc->l2tp_ec.ec_if, LINK_STATE_DOWN);
325 	if_alloc_sadl(&sc->l2tp_ec.ec_if);
326 	bpf_attach(&sc->l2tp_ec.ec_if, DLT_EN10MB, sizeof(struct ether_header));
327 
328 	return 0;
329 }
330 
331 void
332 l2tp_ifq_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
333 {
334 	struct ifqueue **ifqp = p;
335 
336 	*ifqp = kmem_zalloc(sizeof(**ifqp), KM_SLEEP);
337 	(*ifqp)->ifq_maxlen = IFQ_MAXLEN;
338 }
339 
340 void
341 l2tp_ifq_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
342 {
343 	struct ifqueue **ifqp = p;
344 
345 	kmem_free(*ifqp, sizeof(**ifqp));
346 }
347 
348 static int
349 l2tp_clone_destroy(struct ifnet *ifp)
350 {
351 	struct l2tp_variant *var;
352 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
353 	    l2tp_ec.ec_if);
354 
355 	l2tp_clear_session(sc);
356 	l2tp_delete_tunnel(&sc->l2tp_ec.ec_if);
357 	/*
358 	 * To avoid for l2tp_transmit() and l2tpintr_softint() to access
359 	 * sc->l2tp_var after free it.
360 	 */
361 	mutex_enter(&sc->l2tp_lock);
362 	var = sc->l2tp_var;
363 	l2tp_variant_update(sc, NULL);
364 	mutex_exit(&sc->l2tp_lock);
365 
366 	softint_disestablish(sc->l2tp_si);
367 	percpu_free(sc->l2tp_ifq_percpu, sizeof(struct ifqueue *));
368 
369 	mutex_enter(&l2tp_softcs.lock);
370 	LIST_REMOVE(sc, l2tp_list);
371 	mutex_exit(&l2tp_softcs.lock);
372 
373 	bpf_detach(ifp);
374 
375 	if_detach(ifp);
376 
377 	if_tunnel_free_ro_percpu(sc->l2tp_ro_percpu);
378 
379 	kmem_free(var, sizeof(struct l2tp_variant));
380 	pserialize_destroy(sc->l2tp_psz);
381 	mutex_destroy(&sc->l2tp_lock);
382 	kmem_free(sc, sizeof(struct l2tp_softc));
383 
384 	return 0;
385 }
386 
387 static int
388 l2tp_tx_enqueue(struct l2tp_variant *var, struct mbuf *m)
389 {
390 	struct l2tp_softc *sc;
391 	struct ifnet *ifp;
392 	struct ifqueue *ifq;
393 	int s;
394 
395 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
396 
397 	sc = var->lv_softc;
398 	ifp = &sc->l2tp_ec.ec_if;
399 
400 	s = splsoftnet();
401 	ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
402 	if (IF_QFULL(ifq)) {
403 		if_statinc(ifp, if_oerrors);
404 		l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
405 		splx(s);
406 		m_freem(m);
407 		return ENOBUFS;
408 	}
409 
410 	IF_ENQUEUE(ifq, m);
411 	percpu_putref(sc->l2tp_ifq_percpu);
412 	softint_schedule(sc->l2tp_si);
413 	/* counter is incremented in l2tpintr() */
414 	splx(s);
415 	return 0;
416 }
417 
418 static int
419 l2tp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
420     const struct rtentry *rt)
421 {
422 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
423 	    l2tp_ec.ec_if);
424 	struct l2tp_variant *var;
425 	struct psref psref;
426 	int error = 0;
427 
428 	var = l2tp_getref_variant(sc, &psref);
429 	if (var == NULL) {
430 		m_freem(m);
431 		return ENETDOWN;
432 	}
433 
434 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
435 
436 	m->m_flags &= ~(M_BCAST|M_MCAST);
437 
438 	if ((ifp->if_flags & IFF_UP) == 0) {
439 		m_freem(m);
440 		error = ENETDOWN;
441 		goto end;
442 	}
443 
444 	if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
445 		m_freem(m);
446 		error = ENETDOWN;
447 		goto end;
448 	}
449 
450 	/* XXX should we check if our outer source is legal? */
451 
452 	/* use DLT_NULL encapsulation here to pass inner af type */
453 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
454 	if (!m) {
455 		error = ENOBUFS;
456 		goto end;
457 	}
458 	*mtod(m, int *) = dst->sa_family;
459 
460 	error = l2tp_tx_enqueue(var, m);
461 end:
462 	l2tp_putref_variant(var, &psref);
463 	if (error)
464 		if_statinc(ifp, if_oerrors);
465 
466 	return error;
467 }
468 
469 static void
470 l2tp_sendit(struct l2tp_variant *var, struct mbuf *m)
471 {
472 	int len;
473 	int error;
474 	struct l2tp_softc *sc;
475 	struct ifnet *ifp;
476 
477 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
478 
479 	sc = var->lv_softc;
480 	ifp = &sc->l2tp_ec.ec_if;
481 
482 	len = m->m_pkthdr.len;
483 	m->m_flags &= ~(M_BCAST|M_MCAST);
484 	bpf_mtap(ifp, m, BPF_D_OUT);
485 
486 	switch (var->lv_psrc->sa_family) {
487 #ifdef INET
488 	case AF_INET:
489 		error = in_l2tp_output(var, m);
490 		break;
491 #endif
492 #ifdef INET6
493 	case AF_INET6:
494 		error = in6_l2tp_output(var, m);
495 		break;
496 #endif
497 	default:
498 		m_freem(m);
499 		error = ENETDOWN;
500 		break;
501 	}
502 	if (error) {
503 		if_statinc(ifp, if_oerrors);
504 	} else {
505 		if_statadd2(ifp, if_opackets, 1, if_obytes, len);
506 	}
507 }
508 
509 static void
510 l2tpintr(struct l2tp_variant *var)
511 {
512 	struct l2tp_softc *sc;
513 	struct ifnet *ifp;
514 	struct mbuf *m;
515 	struct ifqueue *ifq;
516 	u_int cpuid = cpu_index(curcpu());
517 
518 	KASSERT(psref_held(&var->lv_psref, lv_psref_class));
519 
520 	sc = var->lv_softc;
521 	ifp = &sc->l2tp_ec.ec_if;
522 
523 	/* output processing */
524 	if (var->lv_my_sess_id == 0 || var->lv_peer_sess_id == 0) {
525 		ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
526 		IF_PURGE(ifq);
527 		l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
528 		if (cpuid == 0)
529 			IFQ_PURGE(&ifp->if_snd);
530 		return;
531 	}
532 
533 	/* Currently, l2tpintr() is always called in softint context. */
534 	ifq = l2tp_ifq_percpu_getref(sc->l2tp_ifq_percpu);
535 	for (;;) {
536 		IF_DEQUEUE(ifq, m);
537 		if (m != NULL)
538 			l2tp_sendit(var, m);
539 		else
540 			break;
541 	}
542 	l2tp_ifq_percpu_putref(sc->l2tp_ifq_percpu);
543 
544 	if (cpuid == 0) {
545 		for (;;) {
546 			IFQ_DEQUEUE(&ifp->if_snd, m);
547 			if (m != NULL)
548 				l2tp_sendit(var, m);
549 			else
550 				break;
551 		}
552 	}
553 }
554 
555 static void
556 l2tpintr_softint(void *arg)
557 {
558 	struct l2tp_variant *var;
559 	struct psref psref;
560 	struct l2tp_softc *sc = arg;
561 
562 	var = l2tp_getref_variant(sc, &psref);
563 	if (var == NULL)
564 		return;
565 
566 	l2tpintr(var);
567 	l2tp_putref_variant(var, &psref);
568 }
569 
570 void
571 l2tp_input(struct mbuf *m, struct ifnet *ifp)
572 {
573 	vaddr_t addr;
574 
575 	KASSERT(ifp != NULL);
576 
577 	/*
578 	 * Currently, l2tp(4) supports only ethernet as inner protocol.
579 	 */
580 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
581 		m_freem(m);
582 		return;
583 	}
584 
585 	/*
586 	 * If the head of the payload is not aligned, align it.
587 	 */
588 	addr = mtod(m, vaddr_t);
589 	if ((addr & 0x03) != 0x2) {
590 		/* copy and align head of payload */
591 		struct mbuf *m_head;
592 		int copy_length;
593 		u_int pad = roundup(sizeof(struct ether_header), 4)
594 			- sizeof(struct ether_header);
595 
596 #define L2TP_COPY_LENGTH		60
597 
598 		if (m->m_pkthdr.len < L2TP_COPY_LENGTH) {
599 			copy_length = m->m_pkthdr.len;
600 		} else {
601 			copy_length = L2TP_COPY_LENGTH;
602 		}
603 
604 		if (m->m_len < copy_length) {
605 			m = m_pullup(m, copy_length);
606 			if (m == NULL)
607 				return;
608 		}
609 
610 		MGETHDR(m_head, M_DONTWAIT, MT_HEADER);
611 		if (m_head == NULL) {
612 			m_freem(m);
613 			return;
614 		}
615 		m_move_pkthdr(m_head, m);
616 
617 		/*
618 		 * m_head should be:
619 		 *                             L2TP_COPY_LENGTH
620 		 *                          <-  + roundup(pad, 4) - pad ->
621 		 *   +-------+--------+-----+--------------+-------------+
622 		 *   | m_hdr | pkthdr | ... | ether header |   payload   |
623 		 *   +-------+--------+-----+--------------+-------------+
624 		 *                          ^              ^
625 		 *                          m_data         4 byte aligned
626 		 */
627 		m_align(m_head, L2TP_COPY_LENGTH + roundup(pad, 4));
628 		m_head->m_data += pad;
629 
630 		memcpy(mtod(m_head, void *), mtod(m, void *), copy_length);
631 		m_head->m_len = copy_length;
632 		m->m_data += copy_length;
633 		m->m_len -= copy_length;
634 
635 		/* construct chain */
636 		if (m->m_len == 0) {
637 			m_head->m_next = m_free(m);
638 		} else {
639 			m_head->m_next = m;
640 		}
641 
642 		/* override m */
643 		m = m_head;
644 	}
645 
646 	m_set_rcvif(m, ifp);
647 
648 	/*
649 	 * bpf_mtap() and ifp->if_ipackets++ is done in if_input()
650 	 *
651 	 * obytes is incremented at ether_output() or bridge_enqueue().
652 	 */
653 	if_percpuq_enqueue(ifp->if_percpuq, m);
654 }
655 
656 void
657 l2tp_start(struct ifnet *ifp)
658 {
659 	struct psref psref;
660 	struct l2tp_variant *var;
661 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
662 	    l2tp_ec.ec_if);
663 
664 	var = l2tp_getref_variant(sc, &psref);
665 	if (var == NULL)
666 		return;
667 
668 	if (var->lv_psrc == NULL || var->lv_pdst == NULL)
669 		return;
670 
671 	kpreempt_disable();
672 	softint_schedule(sc->l2tp_si);
673 	kpreempt_enable();
674 	l2tp_putref_variant(var, &psref);
675 }
676 
677 int
678 l2tp_transmit(struct ifnet *ifp, struct mbuf *m)
679 {
680 	int error;
681 	struct psref psref;
682 	struct l2tp_variant *var;
683 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
684 	    l2tp_ec.ec_if);
685 
686 	var = l2tp_getref_variant(sc, &psref);
687 	if (var == NULL) {
688 		m_freem(m);
689 		return ENETDOWN;
690 	}
691 
692 	if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
693 		m_freem(m);
694 		error = ENETDOWN;
695 		goto out;
696 	}
697 
698 	m->m_flags &= ~(M_BCAST|M_MCAST);
699 
700 	error = l2tp_tx_enqueue(var, m);
701 out:
702 	l2tp_putref_variant(var, &psref);
703 	return error;
704 }
705 
706 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
707 int
708 l2tp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
709 {
710 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
711 	    l2tp_ec.ec_if);
712 	struct l2tp_variant *var, *var_tmp;
713 	struct ifreq     *ifr = data;
714 	int error = 0, size;
715 	struct sockaddr *dst, *src;
716 	struct l2tp_req l2tpr;
717 	u_long mtu;
718 	int bound;
719 	struct psref psref;
720 
721 	switch (cmd) {
722 	case SIOCSIFADDR:
723 		ifp->if_flags |= IFF_UP;
724 		break;
725 
726 	case SIOCSIFDSTADDR:
727 		break;
728 
729 	case SIOCADDMULTI:
730 	case SIOCDELMULTI:
731 		switch (ifr->ifr_addr.sa_family) {
732 #ifdef INET
733 		case AF_INET:	/* IP supports Multicast */
734 			break;
735 #endif /* INET */
736 #ifdef INET6
737 		case AF_INET6:	/* IP6 supports Multicast */
738 			break;
739 #endif /* INET6 */
740 		default:  /* Other protocols doesn't support Multicast */
741 			error = EAFNOSUPPORT;
742 			break;
743 		}
744 		break;
745 
746 	case SIOCSIFMTU:
747 		mtu = ifr->ifr_mtu;
748 		if (mtu < L2TP_MTU_MIN || mtu > L2TP_MTU_MAX)
749 			return (EINVAL);
750 		ifp->if_mtu = mtu;
751 		break;
752 
753 #ifdef INET
754 	case SIOCSIFPHYADDR:
755 		src = (struct sockaddr *)
756 			&(((struct in_aliasreq *)data)->ifra_addr);
757 		dst = (struct sockaddr *)
758 			&(((struct in_aliasreq *)data)->ifra_dstaddr);
759 		if (src->sa_family != AF_INET || dst->sa_family != AF_INET)
760 			return EAFNOSUPPORT;
761 		else if (src->sa_len != sizeof(struct sockaddr_in)
762 		    || dst->sa_len != sizeof(struct sockaddr_in))
763 			return EINVAL;
764 
765 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
766 		break;
767 
768 #endif /* INET */
769 #ifdef INET6
770 	case SIOCSIFPHYADDR_IN6:
771 		src = (struct sockaddr *)
772 			&(((struct in6_aliasreq *)data)->ifra_addr);
773 		dst = (struct sockaddr *)
774 			&(((struct in6_aliasreq *)data)->ifra_dstaddr);
775 		if (src->sa_family != AF_INET6 || dst->sa_family != AF_INET6)
776 			return EAFNOSUPPORT;
777 		else if (src->sa_len != sizeof(struct sockaddr_in6)
778 		    || dst->sa_len != sizeof(struct sockaddr_in6))
779 			return EINVAL;
780 
781 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
782 		break;
783 
784 #endif /* INET6 */
785 	case SIOCSLIFPHYADDR:
786 		src = (struct sockaddr *)
787 			&(((struct if_laddrreq *)data)->addr);
788 		dst = (struct sockaddr *)
789 			&(((struct if_laddrreq *)data)->dstaddr);
790 		if (src->sa_family != dst->sa_family)
791 			return EINVAL;
792 		else if (src->sa_family == AF_INET
793 		    && src->sa_len != sizeof(struct sockaddr_in))
794 			return EINVAL;
795 		else if (src->sa_family == AF_INET6
796 		    && src->sa_len != sizeof(struct sockaddr_in6))
797 			return EINVAL;
798 		else if (dst->sa_family == AF_INET
799 		    && dst->sa_len != sizeof(struct sockaddr_in))
800 			return EINVAL;
801 		else if (dst->sa_family == AF_INET6
802 		    && dst->sa_len != sizeof(struct sockaddr_in6))
803 			return EINVAL;
804 
805 		error = l2tp_set_tunnel(&sc->l2tp_ec.ec_if, src, dst);
806 		break;
807 
808 	case SIOCDIFPHYADDR:
809 		l2tp_delete_tunnel(&sc->l2tp_ec.ec_if);
810 		break;
811 
812 	case SIOCGIFPSRCADDR:
813 #ifdef INET6
814 	case SIOCGIFPSRCADDR_IN6:
815 #endif /* INET6 */
816 		bound = curlwp_bind();
817 		var = l2tp_getref_variant(sc, &psref);
818 		if (var == NULL) {
819 			curlwp_bindx(bound);
820 			error = EADDRNOTAVAIL;
821 			goto bad;
822 		}
823 		if (var->lv_psrc == NULL) {
824 			l2tp_putref_variant(var, &psref);
825 			curlwp_bindx(bound);
826 			error = EADDRNOTAVAIL;
827 			goto bad;
828 		}
829 		src = var->lv_psrc;
830 		switch (cmd) {
831 #ifdef INET
832 		case SIOCGIFPSRCADDR:
833 			dst = &ifr->ifr_addr;
834 			size = sizeof(ifr->ifr_addr);
835 			break;
836 #endif /* INET */
837 #ifdef INET6
838 		case SIOCGIFPSRCADDR_IN6:
839 			dst = (struct sockaddr *)
840 				&(((struct in6_ifreq *)data)->ifr_addr);
841 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
842 			break;
843 #endif /* INET6 */
844 		default:
845 			l2tp_putref_variant(var, &psref);
846 			curlwp_bindx(bound);
847 			error = EADDRNOTAVAIL;
848 			goto bad;
849 		}
850 		if (src->sa_len > size) {
851 			l2tp_putref_variant(var, &psref);
852 			curlwp_bindx(bound);
853 			return EINVAL;
854 		}
855 		sockaddr_copy(dst, src->sa_len, src);
856 		l2tp_putref_variant(var, &psref);
857 		curlwp_bindx(bound);
858 		break;
859 
860 	case SIOCGIFPDSTADDR:
861 #ifdef INET6
862 	case SIOCGIFPDSTADDR_IN6:
863 #endif /* INET6 */
864 		bound = curlwp_bind();
865 		var = l2tp_getref_variant(sc, &psref);
866 		if (var == NULL) {
867 			curlwp_bindx(bound);
868 			error = EADDRNOTAVAIL;
869 			goto bad;
870 		}
871 		if (var->lv_pdst == NULL) {
872 			l2tp_putref_variant(var, &psref);
873 			curlwp_bindx(bound);
874 			error = EADDRNOTAVAIL;
875 			goto bad;
876 		}
877 		src = var->lv_pdst;
878 		switch (cmd) {
879 #ifdef INET
880 		case SIOCGIFPDSTADDR:
881 			dst = &ifr->ifr_addr;
882 			size = sizeof(ifr->ifr_addr);
883 			break;
884 #endif /* INET */
885 #ifdef INET6
886 		case SIOCGIFPDSTADDR_IN6:
887 			dst = (struct sockaddr *)
888 				&(((struct in6_ifreq *)data)->ifr_addr);
889 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
890 			break;
891 #endif /* INET6 */
892 		default:
893 			l2tp_putref_variant(var, &psref);
894 			curlwp_bindx(bound);
895 			error = EADDRNOTAVAIL;
896 			goto bad;
897 		}
898 		if (src->sa_len > size) {
899 			l2tp_putref_variant(var, &psref);
900 			curlwp_bindx(bound);
901 			return EINVAL;
902 		}
903 		sockaddr_copy(dst, src->sa_len, src);
904 		l2tp_putref_variant(var, &psref);
905 		curlwp_bindx(bound);
906 		break;
907 
908 	case SIOCGLIFPHYADDR:
909 		bound = curlwp_bind();
910 		var = l2tp_getref_variant(sc, &psref);
911 		if (var == NULL) {
912 			curlwp_bindx(bound);
913 			error = EADDRNOTAVAIL;
914 			goto bad;
915 		}
916 		if (var->lv_psrc == NULL || var->lv_pdst == NULL) {
917 			l2tp_putref_variant(var, &psref);
918 			curlwp_bindx(bound);
919 			error = EADDRNOTAVAIL;
920 			goto bad;
921 		}
922 
923 		/* copy src */
924 		src = var->lv_psrc;
925 		dst = (struct sockaddr *)
926 			&(((struct if_laddrreq *)data)->addr);
927 		size = sizeof(((struct if_laddrreq *)data)->addr);
928 		if (src->sa_len > size) {
929 			l2tp_putref_variant(var, &psref);
930 			curlwp_bindx(bound);
931 			return EINVAL;
932                 }
933 		sockaddr_copy(dst, src->sa_len, src);
934 
935 		/* copy dst */
936 		src = var->lv_pdst;
937 		dst = (struct sockaddr *)
938 			&(((struct if_laddrreq *)data)->dstaddr);
939 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
940 		if (src->sa_len > size) {
941 			l2tp_putref_variant(var, &psref);
942 			curlwp_bindx(bound);
943 			return EINVAL;
944                 }
945 		sockaddr_copy(dst, src->sa_len, src);
946 		l2tp_putref_variant(var, &psref);
947 		curlwp_bindx(bound);
948 		break;
949 
950 	case SIOCSL2TPSESSION:
951 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
952 			break;
953 
954 		/* session id must not zero */
955 		if (l2tpr.my_sess_id == 0 || l2tpr.peer_sess_id == 0)
956 			return EINVAL;
957 
958 		bound = curlwp_bind();
959 		var_tmp = l2tp_lookup_session_ref(l2tpr.my_sess_id, &psref);
960 		if (var_tmp != NULL) {
961 			/* duplicate session id */
962 			log(LOG_WARNING, "%s: duplicate session id %" PRIu32 " of %s\n",
963 				sc->l2tp_ec.ec_if.if_xname, l2tpr.my_sess_id,
964 				var_tmp->lv_softc->l2tp_ec.ec_if.if_xname);
965 			psref_release(&psref, &var_tmp->lv_psref,
966 			    lv_psref_class);
967 			curlwp_bindx(bound);
968 			return EINVAL;
969 		}
970 		curlwp_bindx(bound);
971 
972 		error = l2tp_set_session(sc, l2tpr.my_sess_id, l2tpr.peer_sess_id);
973 		break;
974 	case SIOCDL2TPSESSION:
975 		l2tp_clear_session(sc);
976 		break;
977 	case SIOCSL2TPCOOKIE:
978 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
979 			break;
980 
981 		error = l2tp_set_cookie(sc, l2tpr.my_cookie, l2tpr.my_cookie_len,
982 		    l2tpr.peer_cookie, l2tpr.peer_cookie_len);
983 		break;
984 	case SIOCDL2TPCOOKIE:
985 		l2tp_clear_cookie(sc);
986 		break;
987 	case SIOCSL2TPSTATE:
988 		if ((error = copyin(ifr->ifr_data, &l2tpr, sizeof(l2tpr))) != 0)
989 			break;
990 
991 		l2tp_set_state(sc, l2tpr.state);
992 		break;
993 	case SIOCGL2TP:
994 		/* get L2TPV3 session info */
995 		memset(&l2tpr, 0, sizeof(l2tpr));
996 
997 		bound = curlwp_bind();
998 		var = l2tp_getref_variant(sc, &psref);
999 		if (var == NULL) {
1000 			curlwp_bindx(bound);
1001 			error = EADDRNOTAVAIL;
1002 			goto bad;
1003 		}
1004 
1005 		l2tpr.state = var->lv_state;
1006 		l2tpr.my_sess_id = var->lv_my_sess_id;
1007 		l2tpr.peer_sess_id = var->lv_peer_sess_id;
1008 		l2tpr.my_cookie = var->lv_my_cookie;
1009 		l2tpr.my_cookie_len = var->lv_my_cookie_len;
1010 		l2tpr.peer_cookie = var->lv_peer_cookie;
1011 		l2tpr.peer_cookie_len = var->lv_peer_cookie_len;
1012 		l2tp_putref_variant(var, &psref);
1013 		curlwp_bindx(bound);
1014 
1015 		error = copyout(&l2tpr, ifr->ifr_data, sizeof(l2tpr));
1016 		break;
1017 
1018 	default:
1019 		error =	ifioctl_common(ifp, cmd, data);
1020 		break;
1021 	}
1022  bad:
1023 	return error;
1024 }
1025 
1026 static int
1027 l2tp_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
1028 {
1029 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
1030 	    l2tp_ec.ec_if);
1031 	struct sockaddr *osrc, *odst;
1032 	struct sockaddr *nsrc, *ndst;
1033 	struct l2tp_variant *ovar, *nvar;
1034 	int error;
1035 
1036 	nsrc = sockaddr_dup(src, M_WAITOK);
1037 	ndst = sockaddr_dup(dst, M_WAITOK);
1038 
1039 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1040 
1041 	error = encap_lock_enter();
1042 	if (error)
1043 		goto error;
1044 
1045 	mutex_enter(&sc->l2tp_lock);
1046 
1047 	ovar = sc->l2tp_var;
1048 	osrc = ovar->lv_psrc;
1049 	odst = ovar->lv_pdst;
1050 	*nvar = *ovar;
1051 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1052 	nvar->lv_psrc = nsrc;
1053 	nvar->lv_pdst = ndst;
1054 	error = l2tp_encap_attach(nvar);
1055 	if (error) {
1056 		mutex_exit(&sc->l2tp_lock);
1057 		encap_lock_exit();
1058 		goto error;
1059 	}
1060 	l2tp_variant_update(sc, nvar);
1061 
1062 	mutex_exit(&sc->l2tp_lock);
1063 
1064 	(void)l2tp_encap_detach(ovar);
1065 	encap_lock_exit();
1066 
1067 	if (osrc)
1068 		sockaddr_free(osrc);
1069 	if (odst)
1070 		sockaddr_free(odst);
1071 	kmem_free(ovar, sizeof(*ovar));
1072 	return 0;
1073 
1074 error:
1075 	sockaddr_free(nsrc);
1076 	sockaddr_free(ndst);
1077 	kmem_free(nvar, sizeof(*nvar));
1078 
1079 	return error;
1080 }
1081 
1082 static void
1083 l2tp_delete_tunnel(struct ifnet *ifp)
1084 {
1085 	struct l2tp_softc *sc = container_of(ifp, struct l2tp_softc,
1086 	    l2tp_ec.ec_if);
1087 	struct sockaddr *osrc, *odst;
1088 	struct l2tp_variant *ovar, *nvar;
1089 	int error;
1090 
1091 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1092 
1093 	error = encap_lock_enter();
1094 	if (error) {
1095 		kmem_free(nvar, sizeof(*nvar));
1096 		return;
1097 	}
1098 	mutex_enter(&sc->l2tp_lock);
1099 
1100 	ovar = sc->l2tp_var;
1101 	osrc = ovar->lv_psrc;
1102 	odst = ovar->lv_pdst;
1103 	*nvar = *ovar;
1104 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1105 	nvar->lv_psrc = NULL;
1106 	nvar->lv_pdst = NULL;
1107 	l2tp_variant_update(sc, nvar);
1108 
1109 	mutex_exit(&sc->l2tp_lock);
1110 
1111 	(void)l2tp_encap_detach(ovar);
1112 	encap_lock_exit();
1113 
1114 	if (osrc)
1115 		sockaddr_free(osrc);
1116 	if (odst)
1117 		sockaddr_free(odst);
1118 	kmem_free(ovar, sizeof(*ovar));
1119 }
1120 
1121 static int
1122 id_hash_func(uint32_t id, u_long mask)
1123 {
1124 	uint32_t hash;
1125 
1126 	hash = (id >> 16) ^ id;
1127 	hash = (hash >> 4) ^ hash;
1128 
1129 	return hash & mask;
1130 }
1131 
1132 static void
1133 l2tp_hash_init(void)
1134 {
1135 
1136 	l2tp_hash.lists = hashinit(L2TP_ID_HASH_SIZE, HASH_PSLIST, true,
1137 	    &l2tp_hash.mask);
1138 }
1139 
1140 static int
1141 l2tp_hash_fini(void)
1142 {
1143 	int i;
1144 
1145 	mutex_enter(&l2tp_hash.lock);
1146 
1147 	for (i = 0; i < l2tp_hash.mask + 1; i++) {
1148 		if (PSLIST_WRITER_FIRST(&l2tp_hash.lists[i], struct l2tp_softc,
1149 			l2tp_hash) != NULL) {
1150 			mutex_exit(&l2tp_hash.lock);
1151 			return EBUSY;
1152 		}
1153 	}
1154 	for (i = 0; i < l2tp_hash.mask + 1; i++)
1155 		PSLIST_DESTROY(&l2tp_hash.lists[i]);
1156 
1157 	mutex_exit(&l2tp_hash.lock);
1158 
1159 	hashdone(l2tp_hash.lists, HASH_PSLIST, l2tp_hash.mask);
1160 
1161 	return 0;
1162 }
1163 
1164 static int
1165 l2tp_set_session(struct l2tp_softc *sc, uint32_t my_sess_id,
1166     uint32_t peer_sess_id)
1167 {
1168 	uint32_t idx;
1169 	struct l2tp_variant *nvar;
1170 	struct l2tp_variant *ovar;
1171 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1172 
1173 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1174 
1175 	mutex_enter(&sc->l2tp_lock);
1176 	ovar = sc->l2tp_var;
1177 	*nvar = *ovar;
1178 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1179 	nvar->lv_my_sess_id = my_sess_id;
1180 	nvar->lv_peer_sess_id = peer_sess_id;
1181 
1182 	mutex_enter(&l2tp_hash.lock);
1183 	if (ovar->lv_my_sess_id > 0 && ovar->lv_peer_sess_id > 0) {
1184 		PSLIST_WRITER_REMOVE(sc, l2tp_hash);
1185 		pserialize_perform(l2tp_psz);
1186 	}
1187 	mutex_exit(&l2tp_hash.lock);
1188 	PSLIST_ENTRY_DESTROY(sc, l2tp_hash);
1189 
1190 	l2tp_variant_update(sc, nvar);
1191 	mutex_exit(&sc->l2tp_lock);
1192 
1193 	idx = id_hash_func(nvar->lv_my_sess_id, l2tp_hash.mask);
1194 	if ((ifp->if_flags & IFF_DEBUG) != 0)
1195 		log(LOG_DEBUG, "%s: add hash entry: sess_id=%" PRIu32 ", idx=%" PRIu32 "\n",
1196 		    sc->l2tp_ec.ec_if.if_xname, nvar->lv_my_sess_id, idx);
1197 
1198 	PSLIST_ENTRY_INIT(sc, l2tp_hash);
1199 	mutex_enter(&l2tp_hash.lock);
1200 	PSLIST_WRITER_INSERT_HEAD(&l2tp_hash.lists[idx], sc, l2tp_hash);
1201 	mutex_exit(&l2tp_hash.lock);
1202 
1203 	kmem_free(ovar, sizeof(*ovar));
1204 	return 0;
1205 }
1206 
1207 static int
1208 l2tp_clear_session(struct l2tp_softc *sc)
1209 {
1210 	struct l2tp_variant *nvar;
1211 	struct l2tp_variant *ovar;
1212 
1213 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1214 
1215 	mutex_enter(&sc->l2tp_lock);
1216 	ovar = sc->l2tp_var;
1217 	*nvar = *ovar;
1218 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1219 	nvar->lv_my_sess_id = 0;
1220 	nvar->lv_peer_sess_id = 0;
1221 
1222 	mutex_enter(&l2tp_hash.lock);
1223 	if (ovar->lv_my_sess_id > 0 && ovar->lv_peer_sess_id > 0) {
1224 		PSLIST_WRITER_REMOVE(sc, l2tp_hash);
1225 		pserialize_perform(l2tp_psz);
1226 	}
1227 	mutex_exit(&l2tp_hash.lock);
1228 
1229 	l2tp_variant_update(sc, nvar);
1230 	mutex_exit(&sc->l2tp_lock);
1231 	kmem_free(ovar, sizeof(*ovar));
1232 	return 0;
1233 }
1234 
1235 struct l2tp_variant *
1236 l2tp_lookup_session_ref(uint32_t id, struct psref *psref)
1237 {
1238 	int idx;
1239 	int s;
1240 	struct l2tp_softc *sc;
1241 
1242 	idx = id_hash_func(id, l2tp_hash.mask);
1243 
1244 	s = pserialize_read_enter();
1245 	PSLIST_READER_FOREACH(sc, &l2tp_hash.lists[idx], struct l2tp_softc,
1246 	    l2tp_hash) {
1247 		struct l2tp_variant *var = atomic_load_consume(&sc->l2tp_var);
1248 		if (var == NULL)
1249 			continue;
1250 		if (var->lv_my_sess_id != id)
1251 			continue;
1252 		psref_acquire(psref, &var->lv_psref, lv_psref_class);
1253 		pserialize_read_exit(s);
1254 		return var;
1255 	}
1256 	pserialize_read_exit(s);
1257 	return NULL;
1258 }
1259 
1260 /*
1261  * l2tp_variant update API.
1262  *
1263  * Assumption:
1264  * reader side dereferences sc->l2tp_var in reader critical section only,
1265  * that is, all of reader sides do not reader the sc->l2tp_var after
1266  * pserialize_perform().
1267  */
1268 static void
1269 l2tp_variant_update(struct l2tp_softc *sc, struct l2tp_variant *nvar)
1270 {
1271 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1272 	struct l2tp_variant *ovar = sc->l2tp_var;
1273 
1274 	KASSERT(mutex_owned(&sc->l2tp_lock));
1275 
1276 	atomic_store_release(&sc->l2tp_var, nvar);
1277 	pserialize_perform(sc->l2tp_psz);
1278 	psref_target_destroy(&ovar->lv_psref, lv_psref_class);
1279 
1280 	if (nvar != NULL) {
1281 		if (nvar->lv_psrc != NULL && nvar->lv_pdst != NULL)
1282 			ifp->if_flags |= IFF_RUNNING;
1283 		else
1284 			ifp->if_flags &= ~IFF_RUNNING;
1285 	}
1286 }
1287 
1288 static int
1289 l2tp_set_cookie(struct l2tp_softc *sc, uint64_t my_cookie, u_int my_cookie_len,
1290     uint64_t peer_cookie, u_int peer_cookie_len)
1291 {
1292 	struct l2tp_variant *nvar;
1293 
1294 	if (my_cookie == 0 || peer_cookie == 0)
1295 		return EINVAL;
1296 
1297 	if (my_cookie_len != 4 && my_cookie_len != 8
1298 	    && peer_cookie_len != 4 && peer_cookie_len != 8)
1299 		return EINVAL;
1300 
1301 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1302 
1303 	mutex_enter(&sc->l2tp_lock);
1304 
1305 	*nvar = *sc->l2tp_var;
1306 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1307 	nvar->lv_my_cookie = my_cookie;
1308 	nvar->lv_my_cookie_len = my_cookie_len;
1309 	nvar->lv_peer_cookie = peer_cookie;
1310 	nvar->lv_peer_cookie_len = peer_cookie_len;
1311 	nvar->lv_use_cookie = L2TP_COOKIE_ON;
1312 	l2tp_variant_update(sc, nvar);
1313 
1314 	mutex_exit(&sc->l2tp_lock);
1315 
1316 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1317 	if ((ifp->if_flags & IFF_DEBUG) != 0) {
1318 		log(LOG_DEBUG,
1319 		    "%s: set cookie: "
1320 		    "local cookie_len=%u local cookie=%" PRIu64 ", "
1321 		    "remote cookie_len=%u remote cookie=%" PRIu64 "\n",
1322 		    ifp->if_xname, my_cookie_len, my_cookie,
1323 		    peer_cookie_len, peer_cookie);
1324 	}
1325 
1326 	return 0;
1327 }
1328 
1329 static void
1330 l2tp_clear_cookie(struct l2tp_softc *sc)
1331 {
1332 	struct l2tp_variant *nvar;
1333 
1334 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1335 
1336 	mutex_enter(&sc->l2tp_lock);
1337 
1338 	*nvar = *sc->l2tp_var;
1339 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1340 	nvar->lv_my_cookie = 0;
1341 	nvar->lv_my_cookie_len = 0;
1342 	nvar->lv_peer_cookie = 0;
1343 	nvar->lv_peer_cookie_len = 0;
1344 	nvar->lv_use_cookie = L2TP_COOKIE_OFF;
1345 	l2tp_variant_update(sc, nvar);
1346 
1347 	mutex_exit(&sc->l2tp_lock);
1348 }
1349 
1350 static void
1351 l2tp_set_state(struct l2tp_softc *sc, int state)
1352 {
1353 	struct ifnet *ifp = &sc->l2tp_ec.ec_if;
1354 	struct l2tp_variant *nvar;
1355 	int ostate;
1356 
1357 	nvar = kmem_alloc(sizeof(*nvar), KM_SLEEP);
1358 
1359 	mutex_enter(&sc->l2tp_lock);
1360 
1361 	*nvar = *sc->l2tp_var;
1362 	psref_target_init(&nvar->lv_psref, lv_psref_class);
1363 	ostate = nvar->lv_state;
1364 	nvar->lv_state = state;
1365 	l2tp_variant_update(sc, nvar);
1366 	mutex_exit(&sc->l2tp_lock);
1367 
1368 	if (ostate != state) {
1369 		int lstate;
1370 
1371 		if (state == L2TP_STATE_UP)
1372 			lstate = LINK_STATE_UP;
1373 		else
1374 			lstate = LINK_STATE_DOWN;
1375 
1376 		if_link_state_change(ifp, lstate);
1377 	}
1378 
1379 #ifdef NOTYET
1380 	vlan_linkstate_notify(ifp, ifp->if_link_state);
1381 #endif
1382 }
1383 
1384 static int
1385 l2tp_encap_attach(struct l2tp_variant *var)
1386 {
1387 	int error;
1388 
1389 	if (var == NULL || var->lv_psrc == NULL)
1390 		return EINVAL;
1391 
1392 	switch (var->lv_psrc->sa_family) {
1393 #ifdef INET
1394 	case AF_INET:
1395 		error = in_l2tp_attach(var);
1396 		break;
1397 #endif
1398 #ifdef INET6
1399 	case AF_INET6:
1400 		error = in6_l2tp_attach(var);
1401 		break;
1402 #endif
1403 	default:
1404 		error = EINVAL;
1405 		break;
1406 	}
1407 
1408 	return error;
1409 }
1410 
1411 static int
1412 l2tp_encap_detach(struct l2tp_variant *var)
1413 {
1414 	int error;
1415 
1416 	if (var == NULL || var->lv_psrc == NULL)
1417 		return EINVAL;
1418 
1419 	switch (var->lv_psrc->sa_family) {
1420 #ifdef INET
1421 	case AF_INET:
1422 		error = in_l2tp_detach(var);
1423 		break;
1424 #endif
1425 #ifdef INET6
1426 	case AF_INET6:
1427 		error = in6_l2tp_detach(var);
1428 		break;
1429 #endif
1430 	default:
1431 		error = EINVAL;
1432 		break;
1433 	}
1434 
1435 	return error;
1436 }
1437 
1438 int
1439 l2tp_check_nesting(struct ifnet *ifp, struct mbuf *m)
1440 {
1441 
1442 	return if_tunnel_check_nesting(ifp, m, max_l2tp_nesting);
1443 }
1444 
1445 /*
1446  * Module infrastructure
1447  */
1448 #include "if_module.h"
1449 
1450 IF_MODULE(MODULE_CLASS_DRIVER, l2tp, NULL)
1451 
1452 
1453 /* TODO: IP_TCPMSS support */
1454 #ifdef IP_TCPMSS
1455 static int l2tp_need_tcpmss_clamp(struct ifnet *);
1456 #ifdef INET
1457 static struct mbuf *l2tp_tcpmss4_clamp(struct ifnet *, struct mbuf *);
1458 #endif
1459 #ifdef INET6
1460 static struct mbuf *l2tp_tcpmss6_clamp(struct ifnet *, struct mbuf *);
1461 #endif
1462 
1463 struct mbuf *
1464 l2tp_tcpmss_clamp(struct ifnet *ifp, struct mbuf *m)
1465 {
1466 	struct ether_header *eh;
1467 	struct ether_vlan_header evh;
1468 
1469 	if (!l2tp_need_tcpmss_clamp(ifp)) {
1470 		return m;
1471 	}
1472 
1473 	if (m->m_pkthdr.len < sizeof(evh)) {
1474 		m_freem(m);
1475 		return NULL;
1476 	}
1477 
1478 	/* save ether header */
1479 	m_copydata(m, 0, sizeof(evh), (void *)&evh);
1480 	eh = (struct ether_header *)&evh;
1481 
1482 	switch (ntohs(eh->ether_type)) {
1483 	case ETHERTYPE_VLAN: /* Ether + VLAN */
1484 		if (m->m_pkthdr.len <= sizeof(struct ether_vlan_header))
1485 			break;
1486 		m_adj(m, sizeof(struct ether_vlan_header));
1487 		switch (ntohs(evh.evl_proto)) {
1488 #ifdef INET
1489 		case ETHERTYPE_IP: /* Ether + VLAN + IPv4 */
1490 			m = l2tp_tcpmss4_clamp(ifp, m);
1491 			if (m == NULL)
1492 				return NULL;
1493 			break;
1494 #endif /* INET */
1495 #ifdef INET6
1496 		case ETHERTYPE_IPV6: /* Ether + VLAN + IPv6 */
1497 			m = l2tp_tcpmss6_clamp(ifp, m);
1498 			if (m == NULL)
1499 				return NULL;
1500 			break;
1501 #endif /* INET6 */
1502 		default:
1503 			break;
1504 		}
1505 
1506 		/* restore ether header */
1507 		M_PREPEND(m, sizeof(struct ether_vlan_header),
1508 		    M_DONTWAIT);
1509 		if (m == NULL)
1510 			return NULL;
1511 		*mtod(m, struct ether_vlan_header *) = evh;
1512 		break;
1513 
1514 #ifdef INET
1515 	case ETHERTYPE_IP: /* Ether + IPv4 */
1516 		if (m->m_pkthdr.len <= sizeof(struct ether_header))
1517 			break;
1518 		m_adj(m, sizeof(struct ether_header));
1519 		m = l2tp_tcpmss4_clamp(ifp, m);
1520 		if (m == NULL)
1521 			return NULL;
1522 		/* restore ether header */
1523 		M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
1524 		if (m == NULL)
1525 			return NULL;
1526 		*mtod(m, struct ether_header *) = *eh;
1527 		break;
1528 #endif /* INET */
1529 
1530 #ifdef INET6
1531 	case ETHERTYPE_IPV6: /* Ether + IPv6 */
1532 		if (m->m_pkthdr.len <= sizeof(struct ether_header))
1533 			break;
1534 		m_adj(m, sizeof(struct ether_header));
1535 		m = l2tp_tcpmss6_clamp(ifp, m);
1536 		if (m == NULL)
1537 			return NULL;
1538 		/* restore ether header */
1539 		M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
1540 		if (m == NULL)
1541 			return NULL;
1542 		*mtod(m, struct ether_header *) = *eh;
1543 		break;
1544 #endif /* INET6 */
1545 
1546 	default:
1547 		break;
1548 	}
1549 
1550 	return m;
1551 }
1552 
1553 static int
1554 l2tp_need_tcpmss_clamp(struct ifnet *ifp)
1555 {
1556 	int ret = 0;
1557 
1558 #ifdef INET
1559 	if (ifp->if_tcpmss != 0)
1560 		ret = 1;
1561 #endif
1562 
1563 #ifdef INET6
1564 	if (ifp->if_tcpmss6 != 0)
1565 		ret = 1;
1566 #endif
1567 
1568 	return ret;
1569 }
1570 
1571 #ifdef INET
1572 static struct mbuf *
1573 l2tp_tcpmss4_clamp(struct ifnet *ifp, struct mbuf *m)
1574 {
1575 
1576 	if (ifp->if_tcpmss != 0) {
1577 		return ip_tcpmss(m, (ifp->if_tcpmss < 0) ?
1578 			ifp->if_mtu - IP_TCPMSS_EXTLEN :
1579 			ifp->if_tcpmss);
1580 	}
1581 	return m;
1582 }
1583 #endif /* INET */
1584 
1585 #ifdef INET6
1586 static struct mbuf *
1587 l2tp_tcpmss6_clamp(struct ifnet *ifp, struct mbuf *m)
1588 {
1589 	int ip6hdrlen;
1590 
1591 	if (ifp->if_tcpmss6 != 0 &&
1592 	    ip6_tcpmss_applicable(m, &ip6hdrlen)) {
1593 		return ip6_tcpmss(m, ip6hdrlen,
1594 			(ifp->if_tcpmss6 < 0) ?
1595 			ifp->if_mtu - IP6_TCPMSS_EXTLEN :
1596 			ifp->if_tcpmss6);
1597 	}
1598 	return m;
1599 }
1600 #endif /* INET6 */
1601 
1602 #endif /* IP_TCPMSS */
1603