xref: /netbsd-src/sys/net/if_ipsec.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: if_ipsec.c,v 1.31 2021/10/11 05:13:11 knakahara Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.31 2021/10/11 05:13:11 knakahara Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/atomic.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/errno.h>
44 #include <sys/ioctl.h>
45 #include <sys/time.h>
46 #include <sys/syslog.h>
47 #include <sys/cpu.h>
48 #include <sys/kmem.h>
49 #include <sys/mutex.h>
50 #include <sys/pserialize.h>
51 #include <sys/psref.h>
52 #include <sys/sysctl.h>
53 
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/route.h>
57 #include <net/bpf.h>
58 #include <net/pfkeyv2.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #ifdef	INET
64 #include <netinet/in_var.h>
65 #endif	/* INET */
66 
67 #ifdef INET6
68 #include <netinet6/in6_var.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71 #endif /* INET6 */
72 
73 #include <netinet/ip_encap.h>
74 
75 #include <net/if_ipsec.h>
76 
77 #include <net/raw_cb.h>
78 #include <net/pfkeyv2.h>
79 
80 #include <netipsec/key.h>
81 #include <netipsec/keydb.h> /* for union sockaddr_union */
82 #include <netipsec/ipsec.h>
83 #include <netipsec/ipsecif.h>
84 
85 static int if_ipsec_clone_create(struct if_clone *, int);
86 static int if_ipsec_clone_destroy(struct ifnet *);
87 
88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
90 
91 static int if_ipsec_encap_attach(struct ipsec_variant *);
92 static int if_ipsec_encap_detach(struct ipsec_variant *);
93 static int if_ipsec_set_tunnel(struct ifnet *,
94     struct sockaddr *, struct sockaddr *);
95 static void if_ipsec_delete_tunnel(struct ifnet *);
96 static int if_ipsec_ensure_flags(struct ifnet *, u_short);
97 static void if_ipsec_attach0(struct ipsec_softc *);
98 
99 static int if_ipsec_update_variant(struct ipsec_softc *,
100     struct ipsec_variant *, struct ipsec_variant *);
101 
102 /* sadb_msg */
103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
104 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
106     struct sockaddr *, int, uint16_t);
107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
108     struct sockaddr *, int);
109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
110     struct sockaddr *, int);
111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
112     struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t,
113     struct sockaddr *, struct sockaddr *);
114 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
115 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
116 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
117 /* SPD */
118 static int if_ipsec_share_sp(struct ipsec_variant *);
119 static int if_ipsec_unshare_sp(struct ipsec_variant *);
120 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
121     in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
122 static inline int if_ipsec_del_sp0(struct secpolicy *);
123 static int if_ipsec_add_sp(struct ipsec_variant *,
124     struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
125 static void if_ipsec_del_sp(struct ipsec_variant *);
126 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
127     struct ipsec_variant *);
128 
129 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
130     in_port_t);
131 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target)			\
132 	if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
133 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target)			\
134 	if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
135 
136 /*
137  * ipsec global variable definitions
138  */
139 
140 /* This list is used in ioctl context only. */
141 static struct {
142 	LIST_HEAD(ipsec_sclist, ipsec_softc) list;
143 	kmutex_t lock;
144 } ipsec_softcs __cacheline_aligned;
145 
146 struct psref_class *iv_psref_class __read_mostly;
147 
148 struct if_clone ipsec_cloner =
149     IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
150 static int max_ipsec_nesting = MAX_IPSEC_NEST;
151 
152 static struct sysctllog *if_ipsec_sysctl;
153 
154 static pktq_rps_hash_func_t if_ipsec_pktq_rps_hash_p;
155 
156 #ifdef INET6
157 static int
158 sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS)
159 {
160 	int error, pmtu;
161 	struct sysctlnode node = *rnode;
162 
163 	pmtu = ip6_ipsec_pmtu;
164 	node.sysctl_data = &pmtu;
165 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
166 	if (error || newp == NULL)
167 		return error;
168 
169 	switch (pmtu) {
170 	case IPSEC_PMTU_MINMTU:
171 	case IPSEC_PMTU_OUTERMTU:
172 		ip6_ipsec_pmtu = pmtu;
173 		break;
174 	default:
175 		return EINVAL;
176 	}
177 
178 	return 0;
179 }
180 
181 static int
182 sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS)
183 {
184 	int error, pmtu;
185 	struct sysctlnode node = *rnode;
186 	struct ipsec_softc *sc = (struct ipsec_softc *)node.sysctl_data;
187 
188 	pmtu = sc->ipsec_pmtu;
189 	node.sysctl_data = &pmtu;
190 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
191 	if (error || newp == NULL)
192 		return error;
193 
194 	switch (pmtu) {
195 	case IPSEC_PMTU_SYSDEFAULT:
196 	case IPSEC_PMTU_MINMTU:
197 	case IPSEC_PMTU_OUTERMTU:
198 		sc->ipsec_pmtu = pmtu;
199 		break;
200 	default:
201 		return EINVAL;
202 	}
203 
204 	return 0;
205 }
206 #endif
207 
208 static void
209 if_ipsec_sysctl_setup(void)
210 {
211 	const struct sysctlnode *node = NULL;
212 
213 	if_ipsec_sysctl = NULL;
214 
215 #ifdef INET6
216 	/*
217 	 * Previously create "net.inet6.ip6" entry to avoid sysctl_createv error.
218 	 */
219 	sysctl_createv(NULL, 0, NULL, NULL,
220 		       CTLFLAG_PERMANENT,
221 		       CTLTYPE_NODE, "inet6",
222 		       SYSCTL_DESCR("PF_INET6 related settings"),
223 		       NULL, 0, NULL, 0,
224 		       CTL_NET, PF_INET6, CTL_EOL);
225 	sysctl_createv(NULL, 0, NULL, NULL,
226 		       CTLFLAG_PERMANENT,
227 		       CTLTYPE_NODE, "ip6",
228 		       SYSCTL_DESCR("IPv6 related settings"),
229 		       NULL, 0, NULL, 0,
230 		       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
231 
232 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
233 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
234 		       CTLTYPE_INT, "ipsecifhlim",
235 		       SYSCTL_DESCR("Default hop limit for a ipsec tunnel datagram"),
236 		       NULL, 0, &ip6_ipsec_hlim, 0,
237 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
238 		       IPV6CTL_IPSEC_HLIM, CTL_EOL);
239 
240 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
241 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
242 		       CTLTYPE_INT, "ipsecifpmtu",
243 		       SYSCTL_DESCR("Default Path MTU setting for ipsec tunnels"),
244 		       sysctl_if_ipsec_pmtu_global, 0, NULL, 0,
245 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
246 		       IPV6CTL_IPSEC_PMTU, CTL_EOL);
247 #endif
248 
249 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, &node,
250 	    CTLFLAG_PERMANENT,
251 	    CTLTYPE_NODE, "ipsecif",
252 	    SYSCTL_DESCR("ipsecif global control"),
253 	    NULL, 0, NULL, 0,
254 	    CTL_NET, CTL_CREATE, CTL_EOL);
255 
256 	sysctl_createv(&if_ipsec_sysctl, 0, &node, NULL,
257 	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
258 	    CTLTYPE_STRING, "rps_hash",
259 	    SYSCTL_DESCR("Interface rps hash function control"),
260 	    sysctl_pktq_rps_hash_handler, 0, (void *)&if_ipsec_pktq_rps_hash_p,
261 	    PKTQ_RPS_HASH_NAME_LEN,
262 	    CTL_CREATE, CTL_EOL);
263 }
264 
265 static void
266 if_ipsec_perif_sysctl_setup(struct sysctllog **clog, struct ipsec_softc *sc)
267 {
268 #ifdef INET6
269 	const struct sysctlnode *cnode, *rnode;
270 	struct ifnet *ifp = &sc->ipsec_if;
271 	const char *ifname = ifp->if_xname;
272 	int rv;
273 
274 	/*
275 	 * Already created in sysctl_sndq_setup().
276 	 */
277 	sysctl_createv(clog, 0, NULL, &rnode,
278 		       CTLFLAG_PERMANENT,
279 		       CTLTYPE_NODE, "interfaces",
280 		       SYSCTL_DESCR("Per-interface controls"),
281 		       NULL, 0, NULL, 0,
282 		       CTL_NET, CTL_CREATE, CTL_EOL);
283 	sysctl_createv(clog, 0, &rnode, &rnode,
284 		       CTLFLAG_PERMANENT,
285 		       CTLTYPE_NODE, ifname,
286 		       SYSCTL_DESCR("Interface controls"),
287 		       NULL, 0, NULL, 0,
288 		       CTL_CREATE, CTL_EOL);
289 
290 	rv = sysctl_createv(clog, 0, &rnode, &cnode,
291 			    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
292 			    CTLTYPE_INT, "pmtu",
293 			    SYSCTL_DESCR("Path MTU setting for this ipsec tunnel"),
294 			    sysctl_if_ipsec_pmtu_perif, 0, (void *)sc, 0,
295 			    CTL_CREATE, CTL_EOL);
296 	if (rv != 0)
297 		log(LOG_WARNING, "%s: could not attach sysctl node pmtu\n", ifname);
298 
299 	sc->ipsec_pmtu = IPSEC_PMTU_SYSDEFAULT;
300 #endif
301 }
302 
303 /* ARGSUSED */
304 void
305 ipsecifattach(int count)
306 {
307 
308 	mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
309 	LIST_INIT(&ipsec_softcs.list);
310 
311 	iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
312 
313 	if_ipsec_pktq_rps_hash_p = pktq_rps_hash_default;
314 	if_ipsec_sysctl_setup();
315 
316 	if_clone_attach(&ipsec_cloner);
317 }
318 
319 static int
320 if_ipsec_clone_create(struct if_clone *ifc, int unit)
321 {
322 	struct ipsec_softc *sc;
323 	struct ipsec_variant *var;
324 	struct ifnet *ifp;
325 
326 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
327 
328 	if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
329 
330 	if_ipsec_attach0(sc);
331 
332 	ifp = &sc->ipsec_if;
333 	if_ipsec_perif_sysctl_setup(&ifp->if_sysctl_log, sc);
334 
335 	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
336 	var->iv_softc = sc;
337 	psref_target_init(&var->iv_psref, iv_psref_class);
338 
339 	sc->ipsec_var = var;
340 	mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
341 	sc->ipsec_psz = pserialize_create();
342 	sc->ipsec_ro_percpu = if_tunnel_alloc_ro_percpu();
343 
344 	mutex_enter(&ipsec_softcs.lock);
345 	LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
346 	mutex_exit(&ipsec_softcs.lock);
347 	return 0;
348 }
349 
350 static void
351 if_ipsec_attach0(struct ipsec_softc *sc)
352 {
353 
354 	sc->ipsec_if.if_addrlen = 0;
355 	sc->ipsec_if.if_mtu    = IPSEC_MTU;
356 	sc->ipsec_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
357 	/* set ipsec(4) specific default flags. */
358 	sc->ipsec_if.if_flags  |= IFF_FWD_IPV6;
359 	sc->ipsec_if.if_extflags = IFEF_MPSAFE;
360 	sc->ipsec_if.if_ioctl  = if_ipsec_ioctl;
361 	sc->ipsec_if.if_output = if_ipsec_output;
362 	sc->ipsec_if.if_type   = IFT_IPSEC;
363 	sc->ipsec_if.if_dlt    = DLT_NULL;
364 	sc->ipsec_if.if_softc  = sc;
365 	IFQ_SET_READY(&sc->ipsec_if.if_snd);
366 	if_initialize(&sc->ipsec_if);
367 	sc->ipsec_if.if_link_state = LINK_STATE_DOWN;
368 	if_alloc_sadl(&sc->ipsec_if);
369 	bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
370 	if_register(&sc->ipsec_if);
371 }
372 
373 static int
374 if_ipsec_clone_destroy(struct ifnet *ifp)
375 {
376 	struct ipsec_softc *sc = ifp->if_softc;
377 	struct ipsec_variant *var;
378 	int bound;
379 
380 	mutex_enter(&ipsec_softcs.lock);
381 	LIST_REMOVE(sc, ipsec_list);
382 	mutex_exit(&ipsec_softcs.lock);
383 
384 	bound = curlwp_bind();
385 	if_ipsec_delete_tunnel(&sc->ipsec_if);
386 	curlwp_bindx(bound);
387 
388 	bpf_detach(ifp);
389 	if_detach(ifp);
390 
391 	if_tunnel_free_ro_percpu(sc->ipsec_ro_percpu);
392 
393 	pserialize_destroy(sc->ipsec_psz);
394 	mutex_destroy(&sc->ipsec_lock);
395 
396 	var = sc->ipsec_var;
397 	kmem_free(var, sizeof(*var));
398 	kmem_free(sc, sizeof(*sc));
399 
400 	return 0;
401 }
402 
403 static inline bool
404 if_ipsec_nat_t(struct ipsec_softc *sc)
405 {
406 
407 	return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
408 }
409 
410 static inline bool
411 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
412 {
413 
414 	return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
415 }
416 
417 int
418 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
419 {
420 	uint8_t v;
421 	struct ipsec_softc *sc;
422 	struct ipsec_variant *var = NULL;
423 	struct psref psref;
424 	int ret = 0;
425 
426 	sc = arg;
427 	KASSERT(sc != NULL);
428 
429 	if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
430 		goto out;
431 
432 	var = if_ipsec_getref_variant(sc, &psref);
433 	if (if_ipsec_variant_is_unconfigured(var))
434 		goto out;
435 
436 	switch (proto) {
437 	case IPPROTO_IPV4:
438 	case IPPROTO_IPV6:
439 		break;
440 	default:
441 		goto out;
442 	}
443 
444 	m_copydata(m, 0, sizeof(v), &v);
445 	v = (v >> 4) & 0xff;  /* Get the IP version number. */
446 
447 	switch (v) {
448 #ifdef INET
449 	case IPVERSION: {
450 		struct ip ip;
451 
452 		if (m->m_pkthdr.len < sizeof(ip))
453 			goto out;
454 
455 		m_copydata(m, 0, sizeof(ip), &ip);
456 		if (var->iv_psrc->sa_family != AF_INET ||
457 		    var->iv_pdst->sa_family != AF_INET)
458 			goto out;
459 		ret = ipsecif4_encap_func(m, &ip, var);
460 		break;
461 	}
462 #endif
463 #ifdef INET6
464 	case (IPV6_VERSION >> 4): {
465 		struct ip6_hdr ip6;
466 
467 		if (m->m_pkthdr.len < sizeof(ip6))
468 			goto out;
469 
470 		m_copydata(m, 0, sizeof(ip6), &ip6);
471 		if (var->iv_psrc->sa_family != AF_INET6 ||
472 		    var->iv_pdst->sa_family != AF_INET6)
473 			goto out;
474 		ret = ipsecif6_encap_func(m, &ip6, var);
475 		break;
476 	}
477 #endif
478 	default:
479 		goto out;
480 	}
481 
482 out:
483 	if (var != NULL)
484 		if_ipsec_putref_variant(var, &psref);
485 	return ret;
486 }
487 
488 /*
489  * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
490  * We'll prevent this by introducing upper limit.
491  */
492 static int
493 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
494 {
495 
496 	return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
497 }
498 
499 int
500 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
501     const struct rtentry *rt)
502 {
503 	struct ipsec_softc *sc = ifp->if_softc;
504 	struct ipsec_variant *var;
505 	struct psref psref;
506 	int error;
507 	int bound;
508 
509 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
510 
511 	error = if_ipsec_check_nesting(ifp, m);
512 	if (error) {
513 		m_freem(m);
514 		goto noref_end;
515 	}
516 
517 	if ((ifp->if_flags & IFF_UP) == 0) {
518 		m_freem(m);
519 		error = ENETDOWN;
520 		goto noref_end;
521 	}
522 
523 
524 	bound = curlwp_bind();
525 	var = if_ipsec_getref_variant(sc, &psref);
526 	if (if_ipsec_variant_is_unconfigured(var)) {
527 		m_freem(m);
528 		error = ENETDOWN;
529 		goto end;
530 	}
531 
532 	m->m_flags &= ~(M_BCAST|M_MCAST);
533 
534 	/* use DLT_NULL encapsulation here to pass inner af type */
535 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
536 	if (!m) {
537 		error = ENOBUFS;
538 		goto end;
539 	}
540 	*mtod(m, int *) = dst->sa_family;
541 
542 #if INET6
543 	/* drop IPv6 packet if IFF_FWD_IPV6 is not set */
544 	if (dst->sa_family == AF_INET6 &&
545 	    !if_ipsec_fwd_ipv6(sc)) {
546 		/*
547 		 * IPv6 packet is not allowed to forward,that is not error.
548 		 */
549 		error = 0;
550 		IF_DROP(&ifp->if_snd);
551 		m_freem(m);
552 		goto end;
553 	}
554 #endif
555 
556 	error = if_ipsec_out_direct(var, m, dst->sa_family);
557 
558 end:
559 	if_ipsec_putref_variant(var, &psref);
560 	curlwp_bindx(bound);
561 noref_end:
562 	if (error)
563 		if_statinc(ifp, if_oerrors);
564 
565 	return error;
566 }
567 
568 static inline int
569 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
570 {
571 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
572 	int error;
573 	int len;
574 
575 	KASSERT(if_ipsec_heldref_variant(var));
576 	KASSERT(var->iv_output != NULL);
577 
578 	len = m->m_pkthdr.len;
579 
580 	/* input DLT_NULL frame to BPF */
581 	bpf_mtap(ifp, m, BPF_D_OUT);
582 
583 	/* grab and chop off inner af type */
584 	/* XXX need pullup? */
585 	m_adj(m, sizeof(int));
586 
587 	error = var->iv_output(var, family, m);
588 	if (error)
589 		return error;
590 
591 	if_statadd2(ifp, if_opackets, 1, if_obytes, len);
592 
593 	return 0;
594 }
595 
596 void
597 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
598 {
599 
600 	KASSERT(ifp != NULL);
601 
602 	m_set_rcvif(m, ifp);
603 
604 	bpf_mtap_af(ifp, af, m, BPF_D_IN);
605 
606 	if_ipsec_in_enqueue(m, af, ifp);
607 
608 	return;
609 }
610 
611 static inline void
612 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
613 {
614 	pktqueue_t *pktq;
615 	int pktlen;
616 
617 	/*
618 	 * Put the packet to the network layer input queue according to the
619 	 * specified address family.
620 	 */
621 	switch (af) {
622 #ifdef INET
623 	case AF_INET:
624 		pktq = ip_pktq;
625 		break;
626 #endif
627 #ifdef INET6
628 	case AF_INET6:
629 		pktq = ip6_pktq;
630 		break;
631 #endif
632 	default:
633 		if_statinc(ifp, if_ierrors);
634 		m_freem(m);
635 		return;
636 	}
637 
638 	const uint32_t h = pktq_rps_hash(&if_ipsec_pktq_rps_hash_p, m);
639 	pktlen = m->m_pkthdr.len;
640 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
641 		if_statadd2(ifp, if_ibytes, pktlen, if_ipackets, 1);
642 	} else {
643 		if_statinc(ifp, if_iqdrops);
644 		m_freem(m);
645 	}
646 
647 	return;
648 }
649 
650 static inline int
651 if_ipsec_check_salen(struct sockaddr *addr)
652 {
653 
654 	switch (addr->sa_family) {
655 #ifdef INET
656 	case AF_INET:
657 		if (addr->sa_len != sizeof(struct sockaddr_in))
658 			return EINVAL;
659 		break;
660 #endif /* INET */
661 #ifdef INET6
662 	case AF_INET6:
663 		if (addr->sa_len != sizeof(struct sockaddr_in6))
664 			return EINVAL;
665 		break;
666 #endif /* INET6 */
667 	default:
668 		return EAFNOSUPPORT;
669 	}
670 
671 	return 0;
672 }
673 
674 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
675 int
676 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
677 {
678 	struct ipsec_softc *sc  = ifp->if_softc;
679 	struct ipsec_variant *var = NULL;
680 	struct ifreq     *ifr = (struct ifreq*)data;
681 	struct ifaddr    *ifa = (struct ifaddr*)data;
682 	int error = 0, size;
683 	struct sockaddr *dst, *src;
684 	u_long mtu;
685 	u_short oflags = ifp->if_flags;
686 	int bound;
687 	struct psref psref;
688 
689 	switch (cmd) {
690 	case SIOCINITIFADDR:
691 		ifp->if_flags |= IFF_UP;
692 		ifa->ifa_rtrequest = p2p_rtrequest;
693 		break;
694 
695 	case SIOCSIFDSTADDR:
696 		break;
697 
698 	case SIOCADDMULTI:
699 	case SIOCDELMULTI:
700 		switch (ifr->ifr_addr.sa_family) {
701 #ifdef INET
702 		case AF_INET:	/* IP supports Multicast */
703 			break;
704 #endif /* INET */
705 #ifdef INET6
706 		case AF_INET6:	/* IP6 supports Multicast */
707 			break;
708 #endif /* INET6 */
709 		default:  /* Other protocols doesn't support Multicast */
710 			error = EAFNOSUPPORT;
711 			break;
712 		}
713 		break;
714 
715 	case SIOCSIFMTU:
716 		mtu = ifr->ifr_mtu;
717 		if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
718 			return EINVAL;
719 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
720 			error = 0;
721 		break;
722 
723 #ifdef INET
724 	case SIOCSIFPHYADDR:
725 #endif
726 #ifdef INET6
727 	case SIOCSIFPHYADDR_IN6:
728 #endif /* INET6 */
729 	case SIOCSLIFPHYADDR:
730 		switch (cmd) {
731 #ifdef INET
732 		case SIOCSIFPHYADDR:
733 			src = (struct sockaddr *)
734 				&(((struct in_aliasreq *)data)->ifra_addr);
735 			dst = (struct sockaddr *)
736 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
737 			break;
738 #endif /* INET */
739 #ifdef INET6
740 		case SIOCSIFPHYADDR_IN6:
741 			src = (struct sockaddr *)
742 				&(((struct in6_aliasreq *)data)->ifra_addr);
743 			dst = (struct sockaddr *)
744 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
745 			break;
746 #endif /* INET6 */
747 		case SIOCSLIFPHYADDR:
748 			src = (struct sockaddr *)
749 				&(((struct if_laddrreq *)data)->addr);
750 			dst = (struct sockaddr *)
751 				&(((struct if_laddrreq *)data)->dstaddr);
752 			break;
753 		default:
754 			return EINVAL;
755 		}
756 
757 		/* sa_family must be equal */
758 		if (src->sa_family != dst->sa_family)
759 			return EINVAL;
760 
761 		error = if_ipsec_check_salen(src);
762 		if (error)
763 			return error;
764 		error = if_ipsec_check_salen(dst);
765 		if (error)
766 			return error;
767 
768 		/* check sa_family looks sane for the cmd */
769 		switch (cmd) {
770 #ifdef INET
771 		case SIOCSIFPHYADDR:
772 			if (src->sa_family == AF_INET)
773 				break;
774 			return EAFNOSUPPORT;
775 #endif /* INET */
776 #ifdef INET6
777 		case SIOCSIFPHYADDR_IN6:
778 			if (src->sa_family == AF_INET6)
779 				break;
780 			return EAFNOSUPPORT;
781 #endif /* INET6 */
782 		case SIOCSLIFPHYADDR:
783 			/* checks done in the above */
784 			break;
785 		}
786 		/*
787 		 * calls if_ipsec_getref_variant() for other softcs to check
788 		 * address pair duplicattion
789 		 */
790 		bound = curlwp_bind();
791 		error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
792 		if (error)
793 			goto bad;
794 		if_link_state_change(&sc->ipsec_if, LINK_STATE_UP);
795 		curlwp_bindx(bound);
796 		break;
797 
798 	case SIOCDIFPHYADDR:
799 		bound = curlwp_bind();
800 		if_ipsec_delete_tunnel(&sc->ipsec_if);
801 		if_link_state_change(&sc->ipsec_if, LINK_STATE_DOWN);
802 		curlwp_bindx(bound);
803 		break;
804 
805 	case SIOCGIFPSRCADDR:
806 #ifdef INET6
807 	case SIOCGIFPSRCADDR_IN6:
808 #endif /* INET6 */
809 		bound = curlwp_bind();
810 		var = if_ipsec_getref_variant(sc, &psref);
811 		if (var->iv_psrc == NULL) {
812 			error = EADDRNOTAVAIL;
813 			goto bad;
814 		}
815 		src = var->iv_psrc;
816 		switch (cmd) {
817 #ifdef INET
818 		case SIOCGIFPSRCADDR:
819 			dst = &ifr->ifr_addr;
820 			size = sizeof(ifr->ifr_addr);
821 			break;
822 #endif /* INET */
823 #ifdef INET6
824 		case SIOCGIFPSRCADDR_IN6:
825 			dst = (struct sockaddr *)
826 				&(((struct in6_ifreq *)data)->ifr_addr);
827 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
828 			break;
829 #endif /* INET6 */
830 		default:
831 			error = EADDRNOTAVAIL;
832 			goto bad;
833 		}
834 		if (src->sa_len > size) {
835 			error = EINVAL;
836 			goto bad;
837 		}
838 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
839 		if (error)
840 			goto bad;
841 		if_ipsec_putref_variant(var, &psref);
842 		curlwp_bindx(bound);
843 		break;
844 
845 	case SIOCGIFPDSTADDR:
846 #ifdef INET6
847 	case SIOCGIFPDSTADDR_IN6:
848 #endif /* INET6 */
849 		bound = curlwp_bind();
850 		var = if_ipsec_getref_variant(sc, &psref);
851 		if (var->iv_pdst == NULL) {
852 			error = EADDRNOTAVAIL;
853 			goto bad;
854 		}
855 		src = var->iv_pdst;
856 		switch (cmd) {
857 #ifdef INET
858 		case SIOCGIFPDSTADDR:
859 			dst = &ifr->ifr_addr;
860 			size = sizeof(ifr->ifr_addr);
861 			break;
862 #endif /* INET */
863 #ifdef INET6
864 		case SIOCGIFPDSTADDR_IN6:
865 			dst = (struct sockaddr *)
866 				&(((struct in6_ifreq *)data)->ifr_addr);
867 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
868 			break;
869 #endif /* INET6 */
870 		default:
871 			error = EADDRNOTAVAIL;
872 			goto bad;
873 		}
874 		if (src->sa_len > size) {
875 			error = EINVAL;
876 			goto bad;
877 		}
878 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
879 		if (error)
880 			goto bad;
881 		if_ipsec_putref_variant(var, &psref);
882 		curlwp_bindx(bound);
883 		break;
884 
885 	case SIOCGLIFPHYADDR:
886 		bound = curlwp_bind();
887 		var = if_ipsec_getref_variant(sc, &psref);
888 		if (if_ipsec_variant_is_unconfigured(var)) {
889 			error = EADDRNOTAVAIL;
890 			goto bad;
891 		}
892 
893 		/* copy src */
894 		src = var->iv_psrc;
895 		dst = (struct sockaddr *)
896 			&(((struct if_laddrreq *)data)->addr);
897 		size = sizeof(((struct if_laddrreq *)data)->addr);
898 		if (src->sa_len > size) {
899 			error = EINVAL;
900 			goto bad;
901 		}
902 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
903 		if (error)
904 			goto bad;
905 
906 		/* copy dst */
907 		src = var->iv_pdst;
908 		dst = (struct sockaddr *)
909 			&(((struct if_laddrreq *)data)->dstaddr);
910 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
911 		if (src->sa_len > size) {
912 			error = EINVAL;
913 			goto bad;
914 		}
915 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
916 		if (error)
917 			goto bad;
918 		if_ipsec_putref_variant(var, &psref);
919 		curlwp_bindx(bound);
920 		break;
921 
922 	default:
923 		error = ifioctl_common(ifp, cmd, data);
924 		if (!error) {
925 			bound = curlwp_bind();
926 			error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
927 			if (error)
928 				goto bad;
929 			curlwp_bindx(bound);
930 		}
931 		break;
932 	}
933 	return error;
934 
935 bad:
936 	if (var != NULL)
937 		if_ipsec_putref_variant(var, &psref);
938 	curlwp_bindx(bound);
939 
940 	return error;
941 }
942 
943 struct encap_funcs {
944 #ifdef INET
945 	int (*ef_inet)(struct ipsec_variant *);
946 #endif
947 #ifdef INET6
948 	int (*ef_inet6)(struct ipsec_variant *);
949 #endif
950 };
951 
952 static struct encap_funcs ipsec_encap_attach = {
953 #ifdef INET
954 	.ef_inet = ipsecif4_attach,
955 #endif
956 #ifdef INET6
957 	.ef_inet6 = &ipsecif6_attach,
958 #endif
959 };
960 
961 static struct encap_funcs ipsec_encap_detach = {
962 #ifdef INET
963 	.ef_inet = ipsecif4_detach,
964 #endif
965 #ifdef INET6
966 	.ef_inet6 = &ipsecif6_detach,
967 #endif
968 };
969 
970 static int
971 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
972 {
973 	int error;
974 
975 	KASSERT(var != NULL);
976 	KASSERT(if_ipsec_variant_is_configured(var));
977 
978 	switch (var->iv_psrc->sa_family) {
979 #ifdef INET
980 	case AF_INET:
981 		error = (funcs->ef_inet)(var);
982 		break;
983 #endif /* INET */
984 #ifdef INET6
985 	case AF_INET6:
986 		error = (funcs->ef_inet6)(var);
987 		break;
988 #endif /* INET6 */
989 	default:
990 		error = EINVAL;
991 		break;
992 	}
993 
994 	return error;
995 }
996 
997 static int
998 if_ipsec_encap_attach(struct ipsec_variant *var)
999 {
1000 
1001 	return if_ipsec_encap_common(var, &ipsec_encap_attach);
1002 }
1003 
1004 static int
1005 if_ipsec_encap_detach(struct ipsec_variant *var)
1006 {
1007 
1008 	return if_ipsec_encap_common(var, &ipsec_encap_detach);
1009 }
1010 
1011 /*
1012  * Validate and set ipsec(4) I/F configurations.
1013  *     (1) validate
1014  *         (1-1) Check the argument src and dst address pair will change
1015  *               configuration from current src and dst address pair.
1016  *         (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1017  *               with argument src and dst address pair, except for NAT-T shared
1018  *               tunnels.
1019  *     (2) set
1020  *         (2-1) Create variant for new configuration.
1021  *         (2-2) Create temporary "null" variant used to avoid to access
1022  *               dangling variant while SPs are deleted and added.
1023  *         (2-3) Swap variant include its SPs.
1024  *         (2-4) Cleanup last configurations.
1025  */
1026 static int
1027 if_ipsec_set_tunnel(struct ifnet *ifp,
1028     struct sockaddr *src, struct sockaddr *dst)
1029 {
1030 	struct ipsec_softc *sc = ifp->if_softc;
1031 	struct ipsec_softc *sc2;
1032 	struct ipsec_variant *ovar, *nvar, *nullvar;
1033 	struct sockaddr *osrc, *odst;
1034 	struct sockaddr *nsrc, *ndst;
1035 	in_port_t nsport = 0, ndport = 0;
1036 	int error;
1037 
1038 	error = encap_lock_enter();
1039 	if (error)
1040 		return error;
1041 
1042 	nsrc = sockaddr_dup(src, M_WAITOK);
1043 	ndst = sockaddr_dup(dst, M_WAITOK);
1044 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1045 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1046 
1047 	mutex_enter(&sc->ipsec_lock);
1048 
1049 	ovar = sc->ipsec_var;
1050 
1051 	switch(nsrc->sa_family) {
1052 #ifdef INET
1053 	case AF_INET:
1054 		nsport = satosin(src)->sin_port;
1055 		/*
1056 		 * avoid confuse SP when NAT-T disabled,
1057 		 * e.g.
1058 		 *     expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
1059 		 *     confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
1060 		 */
1061 		satosin(nsrc)->sin_port = 0;
1062 		ndport = satosin(dst)->sin_port;
1063 		satosin(ndst)->sin_port = 0;
1064 		break;
1065 #endif /* INET */
1066 #ifdef INET6
1067 	case AF_INET6:
1068 		nsport = satosin6(src)->sin6_port;
1069 		satosin6(nsrc)->sin6_port = 0;
1070 		ndport = satosin6(dst)->sin6_port;
1071 		satosin6(ndst)->sin6_port = 0;
1072 		break;
1073 #endif /* INET6 */
1074 	default:
1075 		log(LOG_DEBUG,
1076 		    "%s: Invalid address family: %d.\n",
1077 		    __func__, src->sa_family);
1078 		error = EINVAL;
1079 		goto out;
1080 	}
1081 
1082 	/*
1083 	 * (1-1) Check the argument src and dst address pair will change
1084 	 *       configuration from current src and dst address pair.
1085 	 */
1086 	if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
1087 	    (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
1088 	    (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
1089 		/* address and port pair not changed. */
1090 		error = 0;
1091 		goto out;
1092 	}
1093 
1094 	/*
1095 	 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1096 	 *       with argument src and dst address pair, except for NAT-T shared
1097 	 *       tunnels.
1098 	 */
1099 	mutex_enter(&ipsec_softcs.lock);
1100 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1101 		struct ipsec_variant *var2;
1102 		struct psref psref;
1103 
1104 		if (sc2 == sc)
1105 			continue;
1106 		var2 = if_ipsec_getref_variant(sc2, &psref);
1107 		if (if_ipsec_variant_is_unconfigured(var2)) {
1108 			if_ipsec_putref_variant(var2, &psref);
1109 			continue;
1110 		}
1111 		if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
1112 			if_ipsec_putref_variant(var2, &psref);
1113 			continue; /* NAT-T shared tunnel */
1114 		}
1115 		if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
1116 		    sockaddr_cmp(var2->iv_psrc, src) == 0) {
1117 			if_ipsec_putref_variant(var2, &psref);
1118 			mutex_exit(&ipsec_softcs.lock);
1119 			error = EADDRNOTAVAIL;
1120 			goto out;
1121 		}
1122 
1123 		if_ipsec_putref_variant(var2, &psref);
1124 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
1125 	}
1126 	mutex_exit(&ipsec_softcs.lock);
1127 
1128 
1129 	osrc = ovar->iv_psrc;
1130 	odst = ovar->iv_pdst;
1131 
1132 	/*
1133 	 * (2-1) Create ipsec_variant for new configuration.
1134 	 */
1135 	if_ipsec_copy_variant(nvar, ovar);
1136 	nvar->iv_psrc = nsrc;
1137 	nvar->iv_pdst = ndst;
1138 	nvar->iv_sport = nsport;
1139 	nvar->iv_dport = ndport;
1140 	nvar->iv_encap_cookie4 = NULL;
1141 	nvar->iv_encap_cookie6 = NULL;
1142 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1143 	error = if_ipsec_encap_attach(nvar);
1144 	if (error)
1145 		goto out;
1146 
1147 	/*
1148 	 * (2-2) Create temporary "null" variant.
1149 	 */
1150 	if_ipsec_copy_variant(nullvar, ovar);
1151 	if_ipsec_clear_config(nullvar);
1152 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1153 	/*
1154 	 * (2-3) Swap variant include its SPs.
1155 	 */
1156 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1157 	if (error) {
1158 		if_ipsec_encap_detach(nvar);
1159 		goto out;
1160 	}
1161 
1162 	mutex_exit(&sc->ipsec_lock);
1163 
1164 	/*
1165 	 * (2-4) Cleanup last configurations.
1166 	 */
1167 	if (if_ipsec_variant_is_configured(ovar))
1168 		if_ipsec_encap_detach(ovar);
1169 	encap_lock_exit();
1170 
1171 	if (osrc != NULL)
1172 		sockaddr_free(osrc);
1173 	if (odst != NULL)
1174 		sockaddr_free(odst);
1175 	kmem_free(ovar, sizeof(*ovar));
1176 	kmem_free(nullvar, sizeof(*nullvar));
1177 
1178 	return 0;
1179 
1180 out:
1181 	mutex_exit(&sc->ipsec_lock);
1182 	encap_lock_exit();
1183 
1184 	sockaddr_free(nsrc);
1185 	sockaddr_free(ndst);
1186 	kmem_free(nvar, sizeof(*nvar));
1187 	kmem_free(nullvar, sizeof(*nullvar));
1188 
1189 	return error;
1190 }
1191 
1192 /*
1193  * Validate and delete ipsec(4) I/F configurations.
1194  *     (1) validate
1195  *         (1-1) Check current src and dst address pair are null,
1196  *               which means the ipsec(4) I/F is already done deletetunnel.
1197  *     (2) delete
1198  *         (2-1) Create variant for deleted status.
1199  *         (2-2) Create temporary "null" variant used to avoid to access
1200  *               dangling variant while SPs are deleted and added.
1201  *               NOTE:
1202  *               The contents of temporary "null" variant equal to the variant
1203  *               of (2-1), however two psref_target_destroy() synchronization
1204  *               points are necessary to avoid to access dangling variant
1205  *               while SPs are deleted and added. To implement that simply,
1206  *               we use the same manner as if_ipsec_set_tunnel(), that is,
1207  *               create extra "null" variant and use it temporarily.
1208  *         (2-3) Swap variant include its SPs.
1209  *         (2-4) Cleanup last configurations.
1210  */
1211 static void
1212 if_ipsec_delete_tunnel(struct ifnet *ifp)
1213 {
1214 	struct ipsec_softc *sc = ifp->if_softc;
1215 	struct ipsec_variant *ovar, *nvar, *nullvar;
1216 	struct sockaddr *osrc, *odst;
1217 	int error;
1218 
1219 	error = encap_lock_enter();
1220 	if (error)
1221 		return;
1222 
1223 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1224 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1225 
1226 	mutex_enter(&sc->ipsec_lock);
1227 
1228 	ovar = sc->ipsec_var;
1229 	osrc = ovar->iv_psrc;
1230 	odst = ovar->iv_pdst;
1231 	/*
1232 	 * (1-1) Check current src and dst address pair are null,
1233 	 *       which means the ipsec(4) I/F is already done deletetunnel.
1234 	 */
1235 	if (osrc == NULL || odst == NULL) {
1236 		/* address pair not changed. */
1237 		mutex_exit(&sc->ipsec_lock);
1238 		encap_lock_exit();
1239 		kmem_free(nvar, sizeof(*nvar));
1240 		kmem_free(nullvar, sizeof(*nullvar));
1241 		return;
1242 	}
1243 
1244 	/*
1245 	 * (2-1) Create variant for deleted status.
1246 	 */
1247 	if_ipsec_copy_variant(nvar, ovar);
1248 	if_ipsec_clear_config(nvar);
1249 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1250 
1251 	/*
1252 	 * (2-2) Create temporary "null" variant used to avoid to access
1253 	 *       dangling variant while SPs are deleted and added.
1254 	 */
1255 	if_ipsec_copy_variant(nullvar, ovar);
1256 	if_ipsec_clear_config(nullvar);
1257 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1258 	/*
1259 	 * (2-3) Swap variant include its SPs.
1260 	 */
1261 	/* if_ipsec_update_variant() does not fail when delete SP only. */
1262 	(void)if_ipsec_update_variant(sc, nvar, nullvar);
1263 
1264 	mutex_exit(&sc->ipsec_lock);
1265 
1266 	/*
1267 	 * (2-4) Cleanup last configurations.
1268 	 */
1269 	if (if_ipsec_variant_is_configured(ovar))
1270 		if_ipsec_encap_detach(ovar);
1271 	encap_lock_exit();
1272 
1273 	sockaddr_free(osrc);
1274 	sockaddr_free(odst);
1275 	kmem_free(ovar, sizeof(*ovar));
1276 	kmem_free(nullvar, sizeof(*nullvar));
1277 }
1278 
1279 /*
1280  * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1281  *     (1) check
1282  *         (1-1) Check flags are changed.
1283  *         (1-2) Check current src and dst address pair. If they are null,
1284  *               that means the ipsec(4) I/F is deletetunnel'ed, so it is
1285  *               not needed to update.
1286  *     (2) update
1287  *         (2-1) Create variant for new SPs.
1288  *         (2-2) Create temporary "null" variant used to avoid to access
1289  *               dangling variant while SPs are deleted and added.
1290  *               NOTE:
1291  *               There is the same problem as if_ipsec_delete_tunnel().
1292  *         (2-3) Swap variant include its SPs.
1293  *         (2-4) Cleanup unused configurations.
1294  *               NOTE: use the same encap_cookies.
1295  */
1296 static int
1297 if_ipsec_ensure_flags(struct ifnet *ifp, u_short oflags)
1298 {
1299 	struct ipsec_softc *sc = ifp->if_softc;
1300 	struct ipsec_variant *ovar, *nvar, *nullvar;
1301 	int error;
1302 
1303 	/*
1304 	 * (1) Check flags are changed.
1305 	 */
1306 	if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1307 	    (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1308 		return 0; /* flags not changed. */
1309 
1310 	error = encap_lock_enter();
1311 	if (error)
1312 		return error;
1313 
1314 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1315 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1316 
1317 	mutex_enter(&sc->ipsec_lock);
1318 
1319 	ovar = sc->ipsec_var;
1320 	/*
1321 	 * (1-2) Check current src and dst address pair.
1322 	 */
1323 	if (if_ipsec_variant_is_unconfigured(ovar)) {
1324 		/* nothing to do */
1325 		mutex_exit(&sc->ipsec_lock);
1326 		encap_lock_exit();
1327 		kmem_free(nvar, sizeof(*nvar));
1328 		kmem_free(nullvar, sizeof(*nullvar));
1329 		return 0;
1330 	}
1331 
1332 	/*
1333 	 * (2-1) Create variant for new SPs.
1334 	 */
1335 	if_ipsec_copy_variant(nvar, ovar);
1336 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1337 	/*
1338 	 * (2-2) Create temporary "null" variant used to avoid to access
1339 	 *       dangling variant while SPs are deleted and added.
1340 	 */
1341 	if_ipsec_copy_variant(nullvar, ovar);
1342 	if_ipsec_clear_config(nullvar);
1343 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1344 	/*
1345 	 * (2-3) Swap variant include its SPs.
1346 	 */
1347 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1348 
1349 	mutex_exit(&sc->ipsec_lock);
1350 	encap_lock_exit();
1351 
1352 	/*
1353 	 * (2-4) Cleanup unused configurations.
1354 	 */
1355 	if (!error)
1356 		kmem_free(ovar, sizeof(*ovar));
1357 	else
1358 		kmem_free(nvar, sizeof(*ovar));
1359 	kmem_free(nullvar, sizeof(*nullvar));
1360 
1361 	return error;
1362 }
1363 
1364 /*
1365  * SPD management
1366  */
1367 
1368 /*
1369  * Share SP set with other NAT-T ipsec(4) I/F(s).
1370  *     Return 1, when "var" shares SP set.
1371  *     Return 0, when "var" cannot share SP set.
1372  *
1373  * NOTE:
1374  * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1375  * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1376  * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1377  * set_tunnel causes race.
1378  * Currently, (fortunately) encap_lock works as this global lock.
1379  */
1380 static int
1381 if_ipsec_share_sp(struct ipsec_variant *var)
1382 {
1383 	struct ipsec_softc *sc = var->iv_softc;
1384 	struct ipsec_softc *sc2;
1385 	struct ipsec_variant *var2;
1386 	struct psref psref;
1387 
1388 	KASSERT(encap_lock_held());
1389 	KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL);
1390 
1391 	mutex_enter(&ipsec_softcs.lock);
1392 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1393 		if (sc2 == sc)
1394 			continue;
1395 		var2 = if_ipsec_getref_variant(sc2, &psref);
1396 		if (if_ipsec_variant_is_unconfigured(var2)) {
1397 			if_ipsec_putref_variant(var2, &psref);
1398 			continue;
1399 		}
1400 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1401 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1402 			if_ipsec_putref_variant(var2, &psref);
1403 			continue;
1404 		}
1405 
1406 		break;
1407 	}
1408 	mutex_exit(&ipsec_softcs.lock);
1409 	if (sc2 == NULL)
1410 		return 0; /* not shared */
1411 
1412 	IV_SP_IN(var) = IV_SP_IN(var2);
1413 	IV_SP_IN6(var) = IV_SP_IN6(var2);
1414 	IV_SP_OUT(var) = IV_SP_OUT(var2);
1415 	IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1416 
1417 	if_ipsec_putref_variant(var2, &psref);
1418 	return 1; /* shared */
1419 }
1420 
1421 /*
1422  * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1423  *     Return 1, when "var" shared SP set, and then unshare them.
1424  *     Return 0, when "var" did not share SP set.
1425  *
1426  * NOTE:
1427  * See if_ipsec_share_sp()'s note.
1428  */
1429 static int
1430 if_ipsec_unshare_sp(struct ipsec_variant *var)
1431 {
1432 	struct ipsec_softc *sc = var->iv_softc;
1433 	struct ipsec_softc *sc2;
1434 	struct ipsec_variant *var2;
1435 	struct psref psref;
1436 
1437 	KASSERT(encap_lock_held());
1438 
1439 	if (!var->iv_pdst || !var->iv_psrc)
1440 		return 0;
1441 
1442 	mutex_enter(&ipsec_softcs.lock);
1443 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1444 		if (sc2 == sc)
1445 			continue;
1446 		var2 = if_ipsec_getref_variant(sc2, &psref);
1447 		if (!var2->iv_pdst || !var2->iv_psrc) {
1448 			if_ipsec_putref_variant(var2, &psref);
1449 			continue;
1450 		}
1451 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1452 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1453 			if_ipsec_putref_variant(var2, &psref);
1454 			continue;
1455 		}
1456 
1457 		break;
1458 	}
1459 	mutex_exit(&ipsec_softcs.lock);
1460 	if (sc2 == NULL)
1461 		return 0; /* not shared */
1462 
1463 	IV_SP_IN(var) = NULL;
1464 	IV_SP_IN6(var) = NULL;
1465 	IV_SP_OUT(var) = NULL;
1466 	IV_SP_OUT6(var) = NULL;
1467 	if_ipsec_putref_variant(var2, &psref);
1468 	return 1; /* shared */
1469 }
1470 
1471 static inline void
1472 if_ipsec_add_mbuf_optalign(struct mbuf *m0, void *data, size_t len, bool align)
1473 {
1474 	struct mbuf *m;
1475 
1476 	MGET(m, M_WAIT, MT_DATA);
1477 	if (align) {
1478 		m->m_len = PFKEY_ALIGN8(len);
1479 		memset(mtod(m, void *), 0, m->m_len);
1480 	} else
1481 		m->m_len = len;
1482 	m_copyback(m, 0, len, data);
1483 	m_cat(m0, m);
1484 }
1485 
1486 static inline void
1487 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1488 {
1489 
1490 	if_ipsec_add_mbuf_optalign(m0, data, len, true);
1491 }
1492 
1493 static inline void
1494 if_ipsec_add_mbuf_addr_port(struct mbuf *m0, struct sockaddr *addr, in_port_t port, bool align)
1495 {
1496 
1497 	if (port == 0) {
1498 		if_ipsec_add_mbuf_optalign(m0, addr, addr->sa_len, align);
1499 	} else {
1500 		union sockaddr_union addrport_u;
1501 		struct sockaddr *addrport = &addrport_u.sa;
1502 
1503 		if_ipsec_set_addr_port(addrport, addr, port);
1504 		if_ipsec_add_mbuf_optalign(m0, addrport, addrport->sa_len, align);
1505 	}
1506 }
1507 
1508 static inline void
1509 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1510 {
1511 	struct mbuf *m;
1512 
1513 	if (len == 0)
1514 		return;
1515 
1516 	MGET(m, M_WAIT, MT_DATA);
1517 	m->m_len = len;
1518 	memset(mtod(m, void *), 0, m->m_len);
1519 	m_cat(m0, m);
1520 }
1521 
1522 static inline size_t
1523 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1524     int proto, uint16_t exttype)
1525 {
1526 	size_t size;
1527 
1528 	KASSERT(saaddr != NULL);
1529 	KASSERT(addr != NULL);
1530 
1531 	size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1532 	saaddr->sadb_address_len = PFKEY_UNIT64(size);
1533 	saaddr->sadb_address_exttype = exttype;
1534 	saaddr->sadb_address_proto = proto;
1535 	switch (addr->sa_family) {
1536 #ifdef INET
1537 	case AF_INET:
1538 		saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1539 		break;
1540 #endif /* INET */
1541 #ifdef INET6
1542 	case AF_INET6:
1543 		saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1544 		break;
1545 #endif /* INET6 */
1546 	default:
1547 		log(LOG_DEBUG,
1548 		    "%s: Invalid address family: %d.\n",
1549 		    __func__, addr->sa_family);
1550 		break;
1551 	}
1552 	saaddr->sadb_address_reserved = 0;
1553 
1554 	return size;
1555 }
1556 
1557 static inline size_t
1558 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1559     int proto)
1560 {
1561 
1562 	return if_ipsec_set_sadb_addr(sasrc, src, proto,
1563 	    SADB_EXT_ADDRESS_SRC);
1564 }
1565 
1566 static inline size_t
1567 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1568     int proto)
1569 {
1570 
1571 	return if_ipsec_set_sadb_addr(sadst, dst, proto,
1572 	    SADB_EXT_ADDRESS_DST);
1573 }
1574 
1575 static inline size_t
1576 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1577     struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1578     uint8_t level, struct sockaddr *src, struct sockaddr *dst)
1579 {
1580 	size_t size;
1581 
1582 	KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1583 
1584 	size = sizeof(*xpl);
1585 	if (policy == IPSEC_POLICY_IPSEC) {
1586 		size += PFKEY_ALIGN8(sizeof(*xisr));
1587 		if (src != NULL && dst != NULL)
1588 			size += PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1589 	}
1590 	xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1591 	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1592 	xpl->sadb_x_policy_type = policy;
1593 	xpl->sadb_x_policy_dir = dir;
1594 	xpl->sadb_x_policy_reserved = 0;
1595 	xpl->sadb_x_policy_id = id;
1596 	xpl->sadb_x_policy_reserved2 = 0;
1597 
1598 	if (policy == IPSEC_POLICY_IPSEC) {
1599 		xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1600 		if (src != NULL && dst != NULL)
1601 			xisr->sadb_x_ipsecrequest_len +=
1602 				PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1603 		xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1604 		xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1605 		xisr->sadb_x_ipsecrequest_level = level;
1606 		if (level == IPSEC_LEVEL_UNIQUE)
1607 			xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
1608 		else
1609 			xisr->sadb_x_ipsecrequest_reqid = 0;
1610 	}
1611 
1612 	return size;
1613 }
1614 
1615 static inline void
1616 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1617 {
1618 
1619 	KASSERT(msg != NULL);
1620 
1621 	msg->sadb_msg_version = PF_KEY_V2;
1622 	msg->sadb_msg_type = msgtype;
1623 	msg->sadb_msg_errno = 0;
1624 	msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1625 	msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1626 	msg->sadb_msg_reserved = 0;
1627 	msg->sadb_msg_seq = 0; /* XXXX */
1628 	msg->sadb_msg_pid = 0; /* XXXX */
1629 }
1630 
1631 static inline void
1632 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1633 {
1634 
1635 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1636 }
1637 
1638 static inline void
1639 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1640 {
1641 
1642 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1643 }
1644 
1645 static int
1646 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1647     in_port_t port)
1648 {
1649 	int error = 0;
1650 
1651 	sockaddr_copy(addrport, addr->sa_len, addr);
1652 
1653 	switch (addr->sa_family) {
1654 #ifdef INET
1655 	case AF_INET: {
1656 		struct sockaddr_in *sin = satosin(addrport);
1657 		sin->sin_port = port;
1658 		break;
1659 	}
1660 #endif /* INET */
1661 #ifdef INET6
1662 	case AF_INET6: {
1663 		struct sockaddr_in6 *sin6 = satosin6(addrport);
1664 		sin6->sin6_port = port;
1665 		break;
1666 	}
1667 #endif /* INET6 */
1668 	default:
1669 		log(LOG_DEBUG,
1670 		    "%s: Invalid address family: %d.\n",
1671 		    __func__, addr->sa_family);
1672 		error = EINVAL;
1673 	}
1674 
1675 	return error;
1676 }
1677 
1678 static struct secpolicy *
1679 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1680     struct sockaddr *dst, in_port_t dport,
1681     int dir, int proto, int level, u_int policy)
1682 {
1683 	struct sadb_msg msg;
1684 	struct sadb_address xsrc, xdst;
1685 	struct sadb_x_policy xpl;
1686 	struct sadb_x_ipsecrequest xisr;
1687 	size_t size;
1688 	size_t padlen;
1689 	uint16_t ext_msg_len = 0;
1690 	struct mbuf *m;
1691 
1692 	memset(&msg, 0, sizeof(msg));
1693 	memset(&xsrc, 0, sizeof(xsrc));
1694 	memset(&xdst, 0, sizeof(xdst));
1695 	memset(&xpl, 0, sizeof(xpl));
1696 	memset(&xisr, 0, sizeof(xisr));
1697 
1698 	MGETHDR(m, M_WAIT, MT_DATA);
1699 
1700 	size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1701 	ext_msg_len += PFKEY_UNIT64(size);
1702 	size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1703 	ext_msg_len += PFKEY_UNIT64(size);
1704 	size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level, NULL, NULL);
1705 	ext_msg_len += PFKEY_UNIT64(size);
1706 	if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1707 
1708 	/* build PF_KEY message */
1709 
1710 	m->m_len = sizeof(msg);
1711 	m_copyback(m, 0, sizeof(msg), &msg);
1712 
1713 	if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1714 	/*
1715 	 * secpolicy.spidx.{src, dst} must not be set port number,
1716 	 * even if it is used for NAT-T.
1717 	 */
1718 	if_ipsec_add_mbuf_addr_port(m, src, 0, true);
1719 	padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1720 		- (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1721 	if_ipsec_add_pad(m, padlen);
1722 
1723 	if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1724 	/* ditto */
1725 	if_ipsec_add_mbuf_addr_port(m, dst, 0, true);
1726 	padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1727 		- (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1728 	if_ipsec_add_pad(m, padlen);
1729 
1730 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1731 	padlen = PFKEY_UNUNIT64(xpl.sadb_x_policy_len) - sizeof(xpl);
1732 	if (policy == IPSEC_POLICY_IPSEC) {
1733 		if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1734 		padlen -= PFKEY_ALIGN8(sizeof(xisr));
1735 	}
1736 	if_ipsec_add_pad(m, padlen);
1737 
1738 	/* key_kpi_spdadd() has already done KEY_SP_REF(). */
1739 	return key_kpi_spdadd(m);
1740 }
1741 
1742 static int
1743 if_ipsec_add_sp(struct ipsec_variant *var,
1744     struct sockaddr *src, in_port_t sport,
1745     struct sockaddr *dst, in_port_t dport)
1746 {
1747 	struct ipsec_softc *sc = var->iv_softc;
1748 	int level;
1749 	u_int v6policy;
1750 
1751 	/*
1752 	 * must delete sp before add it.
1753 	 */
1754 	KASSERT(IV_SP_IN(var) == NULL);
1755 	KASSERT(IV_SP_OUT(var) == NULL);
1756 	KASSERT(IV_SP_IN6(var) == NULL);
1757 	KASSERT(IV_SP_OUT6(var) == NULL);
1758 
1759 	/*
1760 	 * can be shared?
1761 	 */
1762 	if (if_ipsec_share_sp(var))
1763 		return 0;
1764 
1765 	if (if_ipsec_nat_t(sc))
1766 		level = IPSEC_LEVEL_REQUIRE;
1767 	else
1768 		level = IPSEC_LEVEL_UNIQUE;
1769 
1770 	if (if_ipsec_fwd_ipv6(sc))
1771 		v6policy = IPSEC_POLICY_IPSEC;
1772 	else
1773 		v6policy = IPSEC_POLICY_DISCARD;
1774 
1775 	IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1776 	    IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1777 	if (IV_SP_IN(var) == NULL)
1778 		goto fail;
1779 	IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1780 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1781 	if (IV_SP_OUT(var) == NULL)
1782 		goto fail;
1783 	IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1784 	    IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
1785 	if (IV_SP_IN6(var) == NULL)
1786 		goto fail;
1787 	IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1788 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
1789 	if (IV_SP_OUT6(var) == NULL)
1790 		goto fail;
1791 
1792 	return 0;
1793 
1794 fail:
1795 	if (IV_SP_IN6(var) != NULL) {
1796 		if_ipsec_del_sp0(IV_SP_IN6(var));
1797 		IV_SP_IN6(var) = NULL;
1798 	}
1799 	if (IV_SP_OUT(var) != NULL) {
1800 		if_ipsec_del_sp0(IV_SP_OUT(var));
1801 		IV_SP_OUT(var) = NULL;
1802 	}
1803 	if (IV_SP_IN(var) != NULL) {
1804 		if_ipsec_del_sp0(IV_SP_IN(var));
1805 		IV_SP_IN(var) = NULL;
1806 	}
1807 
1808 	return EEXIST;
1809 }
1810 
1811 static int
1812 if_ipsec_del_sp0(struct secpolicy *sp)
1813 {
1814 	struct sadb_msg msg;
1815 	struct sadb_x_policy xpl;
1816 	size_t size;
1817 	uint16_t ext_msg_len = 0;
1818 	int error;
1819 	struct mbuf *m;
1820 
1821 	if (sp == NULL)
1822 		return 0;
1823 
1824 	memset(&msg, 0, sizeof(msg));
1825 	memset(&xpl, 0, sizeof(xpl));
1826 
1827 	MGETHDR(m, M_WAIT, MT_DATA);
1828 
1829 	size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0, NULL, NULL);
1830 	ext_msg_len += PFKEY_UNIT64(size);
1831 
1832 	if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
1833 
1834 	m->m_len = sizeof(msg);
1835 	m_copyback(m, 0, sizeof(msg), &msg);
1836 
1837 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1838 
1839 	/*  unreference correspond to key_kpi_spdadd(). */
1840 	KEY_SP_UNREF(&sp);
1841 	error = key_kpi_spddelete2(m);
1842 	if (error != 0) {
1843 		log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
1844 		    __func__, sp->id, error);
1845 	}
1846 	return error;
1847 }
1848 
1849 static void
1850 if_ipsec_del_sp(struct ipsec_variant *var)
1851 {
1852 
1853 	/* are the SPs shared? */
1854 	if (if_ipsec_unshare_sp(var))
1855 		return;
1856 
1857 	(void)if_ipsec_del_sp0(IV_SP_OUT(var));
1858 	(void)if_ipsec_del_sp0(IV_SP_IN(var));
1859 	(void)if_ipsec_del_sp0(IV_SP_OUT6(var));
1860 	(void)if_ipsec_del_sp0(IV_SP_IN6(var));
1861 	IV_SP_IN(var) = NULL;
1862 	IV_SP_IN6(var) = NULL;
1863 	IV_SP_OUT(var) = NULL;
1864 	IV_SP_OUT6(var) = NULL;
1865 }
1866 
1867 static int
1868 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
1869     struct ipsec_variant *nvar)
1870 {
1871 	in_port_t src_port = 0;
1872 	in_port_t dst_port = 0;
1873 	struct sockaddr *src;
1874 	struct sockaddr *dst;
1875 	int error = 0;
1876 
1877 	KASSERT(mutex_owned(&sc->ipsec_lock));
1878 
1879 	if_ipsec_del_sp(ovar);
1880 
1881 	src = nvar->iv_psrc;
1882 	dst = nvar->iv_pdst;
1883 	if (if_ipsec_nat_t(sc)) {
1884 		/* NAT-T enabled */
1885 		src_port = nvar->iv_sport;
1886 		dst_port = nvar->iv_dport;
1887 	}
1888 	if (src && dst)
1889 		error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
1890 
1891 	return error;
1892 }
1893 
1894 /*
1895  * ipsec_variant and its SPs update API.
1896  *
1897  * Assumption:
1898  * reader side dereferences sc->ipsec_var in reader critical section only,
1899  * that is, all of reader sides do not reader the sc->ipsec_var after
1900  * pserialize_perform().
1901  */
1902 static int
1903 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
1904     struct ipsec_variant *nullvar)
1905 {
1906 	struct ifnet *ifp = &sc->ipsec_if;
1907 	struct ipsec_variant *ovar = sc->ipsec_var;
1908 	int error;
1909 
1910 	KASSERT(mutex_owned(&sc->ipsec_lock));
1911 
1912 	/*
1913 	 * To keep consistency between ipsec(4) I/F settings and SPs,
1914 	 * we stop packet processing while replacing SPs, that is, we set
1915 	 * "null" config variant to sc->ipsec_var.
1916 	 */
1917 	atomic_store_release(&sc->ipsec_var, nullvar);
1918 	pserialize_perform(sc->ipsec_psz);
1919 	psref_target_destroy(&ovar->iv_psref, iv_psref_class);
1920 
1921 	error = if_ipsec_replace_sp(sc, ovar, nvar);
1922 	if (!error)
1923 		atomic_store_release(&sc->ipsec_var, nvar);
1924 	else {
1925 		psref_target_init(&ovar->iv_psref, iv_psref_class);
1926 		atomic_store_release(&sc->ipsec_var, ovar); /* rollback */
1927 	}
1928 
1929 	pserialize_perform(sc->ipsec_psz);
1930 	psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
1931 
1932 	if (if_ipsec_variant_is_configured(sc->ipsec_var))
1933 		ifp->if_flags |= IFF_RUNNING;
1934 	else
1935 		ifp->if_flags &= ~IFF_RUNNING;
1936 
1937 	return error;
1938 }
1939