xref: /netbsd-src/sys/net/if_ipsec.c (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1 /*	$NetBSD: if_ipsec.c,v 1.25 2019/11/01 04:28:14 knakahara Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.25 2019/11/01 04:28:14 knakahara Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/mbuf.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/time.h>
45 #include <sys/syslog.h>
46 #include <sys/cpu.h>
47 #include <sys/kmem.h>
48 #include <sys/mutex.h>
49 #include <sys/pserialize.h>
50 #include <sys/psref.h>
51 #include <sys/sysctl.h>
52 
53 #include <net/if.h>
54 #include <net/if_types.h>
55 #include <net/route.h>
56 #include <net/bpf.h>
57 #include <net/pfkeyv2.h>
58 
59 #include <netinet/in.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/ip.h>
62 #ifdef	INET
63 #include <netinet/in_var.h>
64 #endif	/* INET */
65 
66 #ifdef INET6
67 #include <netinet6/in6_var.h>
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #endif /* INET6 */
71 
72 #include <netinet/ip_encap.h>
73 
74 #include <net/if_ipsec.h>
75 
76 #include <net/raw_cb.h>
77 #include <net/pfkeyv2.h>
78 
79 #include <netipsec/key.h>
80 #include <netipsec/keydb.h> /* for union sockaddr_union */
81 #include <netipsec/ipsec.h>
82 #include <netipsec/ipsecif.h>
83 
84 static int if_ipsec_clone_create(struct if_clone *, int);
85 static int if_ipsec_clone_destroy(struct ifnet *);
86 
87 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
88 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
89 
90 static int if_ipsec_encap_attach(struct ipsec_variant *);
91 static int if_ipsec_encap_detach(struct ipsec_variant *);
92 static int if_ipsec_set_tunnel(struct ifnet *,
93     struct sockaddr *, struct sockaddr *);
94 static void if_ipsec_delete_tunnel(struct ifnet *);
95 static int if_ipsec_ensure_flags(struct ifnet *, u_short);
96 static void if_ipsec_attach0(struct ipsec_softc *);
97 
98 static int if_ipsec_update_variant(struct ipsec_softc *,
99     struct ipsec_variant *, struct ipsec_variant *);
100 
101 /* sadb_msg */
102 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
103 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
104 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
105     struct sockaddr *, int, uint16_t);
106 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
107     struct sockaddr *, int);
108 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
109     struct sockaddr *, int);
110 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
111     struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t,
112     struct sockaddr *, struct sockaddr *);
113 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
114 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
115 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
116 /* SPD */
117 static int if_ipsec_share_sp(struct ipsec_variant *);
118 static int if_ipsec_unshare_sp(struct ipsec_variant *);
119 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
120     in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
121 static inline int if_ipsec_del_sp0(struct secpolicy *);
122 static int if_ipsec_add_sp(struct ipsec_variant *,
123     struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
124 static void if_ipsec_del_sp(struct ipsec_variant *);
125 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
126     struct ipsec_variant *);
127 
128 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
129     in_port_t);
130 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target)			\
131 	if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
132 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target)			\
133 	if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
134 
135 /*
136  * ipsec global variable definitions
137  */
138 
139 /* This list is used in ioctl context only. */
140 static struct {
141 	LIST_HEAD(ipsec_sclist, ipsec_softc) list;
142 	kmutex_t lock;
143 } ipsec_softcs __cacheline_aligned;
144 
145 struct psref_class *iv_psref_class __read_mostly;
146 
147 struct if_clone ipsec_cloner =
148     IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
149 static int max_ipsec_nesting = MAX_IPSEC_NEST;
150 
151 static struct sysctllog *if_ipsec_sysctl;
152 
153 #ifdef INET6
154 static int
155 sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS)
156 {
157 	int error, pmtu;
158 	struct sysctlnode node = *rnode;
159 
160 	pmtu = ip6_ipsec_pmtu;
161 	node.sysctl_data = &pmtu;
162 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
163 	if (error || newp == NULL)
164 		return error;
165 
166 	switch (pmtu) {
167 	case IPSEC_PMTU_MINMTU:
168 	case IPSEC_PMTU_OUTERMTU:
169 		ip6_ipsec_pmtu = pmtu;
170 		break;
171 	default:
172 		return EINVAL;
173 	}
174 
175 	return 0;
176 }
177 
178 static int
179 sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS)
180 {
181 	int error, pmtu;
182 	struct sysctlnode node = *rnode;
183 	struct ipsec_softc *sc = (struct ipsec_softc *)node.sysctl_data;
184 
185 	pmtu = sc->ipsec_pmtu;
186 	node.sysctl_data = &pmtu;
187 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
188 	if (error || newp == NULL)
189 		return error;
190 
191 	switch (pmtu) {
192 	case IPSEC_PMTU_SYSDEFAULT:
193 	case IPSEC_PMTU_MINMTU:
194 	case IPSEC_PMTU_OUTERMTU:
195 		sc->ipsec_pmtu = pmtu;
196 		break;
197 	default:
198 		return EINVAL;
199 	}
200 
201 	return 0;
202 }
203 #endif
204 
205 static void
206 if_ipsec_sysctl_setup(void)
207 {
208 	if_ipsec_sysctl = NULL;
209 
210 #ifdef INET6
211 	/*
212 	 * Previously create "net.inet6.ip6" entry to avoid sysctl_createv error.
213 	 */
214 	sysctl_createv(NULL, 0, NULL, NULL,
215 		       CTLFLAG_PERMANENT,
216 		       CTLTYPE_NODE, "inet6",
217 		       SYSCTL_DESCR("PF_INET6 related settings"),
218 		       NULL, 0, NULL, 0,
219 		       CTL_NET, PF_INET6, CTL_EOL);
220 	sysctl_createv(NULL, 0, NULL, NULL,
221 		       CTLFLAG_PERMANENT,
222 		       CTLTYPE_NODE, "ip6",
223 		       SYSCTL_DESCR("IPv6 related settings"),
224 		       NULL, 0, NULL, 0,
225 		       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
226 
227 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
228 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
229 		       CTLTYPE_INT, "ipsecifhlim",
230 		       SYSCTL_DESCR("Default hop limit for a ipsec tunnel datagram"),
231 		       NULL, 0, &ip6_ipsec_hlim, 0,
232 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
233 		       IPV6CTL_IPSEC_HLIM, CTL_EOL);
234 
235 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
236 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
237 		       CTLTYPE_INT, "ipsecifpmtu",
238 		       SYSCTL_DESCR("Default Path MTU setting for ipsec tunnels"),
239 		       sysctl_if_ipsec_pmtu_global, 0, NULL, 0,
240 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
241 		       IPV6CTL_IPSEC_PMTU, CTL_EOL);
242 #endif
243 }
244 
245 static void
246 if_ipsec_perif_sysctl_setup(struct sysctllog **clog, struct ipsec_softc *sc)
247 {
248 #ifdef INET6
249 	const struct sysctlnode *cnode, *rnode;
250 	struct ifnet *ifp = &sc->ipsec_if;
251 	const char *ifname = ifp->if_xname;
252 	int rv;
253 
254 	/*
255 	 * Already created in sysctl_sndq_setup().
256 	 */
257 	sysctl_createv(clog, 0, NULL, &rnode,
258 		       CTLFLAG_PERMANENT,
259 		       CTLTYPE_NODE, "interfaces",
260 		       SYSCTL_DESCR("Per-interface controls"),
261 		       NULL, 0, NULL, 0,
262 		       CTL_NET, CTL_CREATE, CTL_EOL);
263 	sysctl_createv(clog, 0, &rnode, &rnode,
264 		       CTLFLAG_PERMANENT,
265 		       CTLTYPE_NODE, ifname,
266 		       SYSCTL_DESCR("Interface controls"),
267 		       NULL, 0, NULL, 0,
268 		       CTL_CREATE, CTL_EOL);
269 
270 	rv = sysctl_createv(clog, 0, &rnode, &cnode,
271 			    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
272 			    CTLTYPE_INT, "pmtu",
273 			    SYSCTL_DESCR("Path MTU setting for this ipsec tunnel"),
274 			    sysctl_if_ipsec_pmtu_perif, 0, (void *)sc, 0,
275 			    CTL_CREATE, CTL_EOL);
276 	if (rv != 0)
277 		log(LOG_WARNING, "%s: could not attach sysctl node pmtu\n", ifname);
278 
279 	sc->ipsec_pmtu = IPSEC_PMTU_SYSDEFAULT;
280 #endif
281 }
282 
283 /* ARGSUSED */
284 void
285 ipsecifattach(int count)
286 {
287 
288 	mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
289 	LIST_INIT(&ipsec_softcs.list);
290 
291 	iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
292 
293 	if_ipsec_sysctl_setup();
294 
295 	if_clone_attach(&ipsec_cloner);
296 }
297 
298 static int
299 if_ipsec_clone_create(struct if_clone *ifc, int unit)
300 {
301 	struct ipsec_softc *sc;
302 	struct ipsec_variant *var;
303 	struct ifnet *ifp;
304 
305 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
306 
307 	if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
308 
309 	if_ipsec_attach0(sc);
310 
311 	ifp = &sc->ipsec_if;
312 	if_ipsec_perif_sysctl_setup(&ifp->if_sysctl_log, sc);
313 
314 	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
315 	var->iv_softc = sc;
316 	psref_target_init(&var->iv_psref, iv_psref_class);
317 
318 	sc->ipsec_var = var;
319 	mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
320 	sc->ipsec_psz = pserialize_create();
321 	sc->ipsec_ro_percpu = if_tunnel_alloc_ro_percpu();
322 
323 	mutex_enter(&ipsec_softcs.lock);
324 	LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
325 	mutex_exit(&ipsec_softcs.lock);
326 	return 0;
327 }
328 
329 static void
330 if_ipsec_attach0(struct ipsec_softc *sc)
331 {
332 
333 	sc->ipsec_if.if_addrlen = 0;
334 	sc->ipsec_if.if_mtu    = IPSEC_MTU;
335 	sc->ipsec_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
336 	/* set ipsec(4) specific default flags. */
337 	sc->ipsec_if.if_flags  |= IFF_FWD_IPV6;
338 	sc->ipsec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE | IFEF_MPSAFE;
339 	sc->ipsec_if.if_ioctl  = if_ipsec_ioctl;
340 	sc->ipsec_if.if_output = if_ipsec_output;
341 	sc->ipsec_if.if_type   = IFT_IPSEC;
342 	sc->ipsec_if.if_dlt    = DLT_NULL;
343 	sc->ipsec_if.if_softc  = sc;
344 	IFQ_SET_READY(&sc->ipsec_if.if_snd);
345 	if_initialize(&sc->ipsec_if);
346 	if_alloc_sadl(&sc->ipsec_if);
347 	bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
348 	if_register(&sc->ipsec_if);
349 }
350 
351 static int
352 if_ipsec_clone_destroy(struct ifnet *ifp)
353 {
354 	struct ipsec_softc *sc = ifp->if_softc;
355 	struct ipsec_variant *var;
356 	int bound;
357 
358 	mutex_enter(&ipsec_softcs.lock);
359 	LIST_REMOVE(sc, ipsec_list);
360 	mutex_exit(&ipsec_softcs.lock);
361 
362 	bound = curlwp_bind();
363 	if_ipsec_delete_tunnel(&sc->ipsec_if);
364 	curlwp_bindx(bound);
365 
366 	bpf_detach(ifp);
367 	if_detach(ifp);
368 
369 	if_tunnel_free_ro_percpu(sc->ipsec_ro_percpu);
370 
371 	pserialize_destroy(sc->ipsec_psz);
372 	mutex_destroy(&sc->ipsec_lock);
373 
374 	var = sc->ipsec_var;
375 	kmem_free(var, sizeof(*var));
376 	kmem_free(sc, sizeof(*sc));
377 
378 	return 0;
379 }
380 
381 static inline bool
382 if_ipsec_nat_t(struct ipsec_softc *sc)
383 {
384 
385 	return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
386 }
387 
388 static inline bool
389 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
390 {
391 
392 	return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
393 }
394 
395 int
396 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
397 {
398 	uint8_t v;
399 	struct ipsec_softc *sc;
400 	struct ipsec_variant *var = NULL;
401 	struct psref psref;
402 	int ret = 0;
403 
404 	sc = arg;
405 	KASSERT(sc != NULL);
406 
407 	if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
408 		goto out;
409 
410 	var = if_ipsec_getref_variant(sc, &psref);
411 	if (if_ipsec_variant_is_unconfigured(var))
412 		goto out;
413 
414 	switch (proto) {
415 	case IPPROTO_IPV4:
416 	case IPPROTO_IPV6:
417 		break;
418 	default:
419 		goto out;
420 	}
421 
422 	m_copydata(m, 0, sizeof(v), &v);
423 	v = (v >> 4) & 0xff;  /* Get the IP version number. */
424 
425 	switch (v) {
426 #ifdef INET
427 	case IPVERSION: {
428 		struct ip ip;
429 
430 		if (m->m_pkthdr.len < sizeof(ip))
431 			goto out;
432 
433 		m_copydata(m, 0, sizeof(ip), &ip);
434 		if (var->iv_psrc->sa_family != AF_INET ||
435 		    var->iv_pdst->sa_family != AF_INET)
436 			goto out;
437 		ret = ipsecif4_encap_func(m, &ip, var);
438 		break;
439 	}
440 #endif
441 #ifdef INET6
442 	case (IPV6_VERSION >> 4): {
443 		struct ip6_hdr ip6;
444 
445 		if (m->m_pkthdr.len < sizeof(ip6))
446 			goto out;
447 
448 		m_copydata(m, 0, sizeof(ip6), &ip6);
449 		if (var->iv_psrc->sa_family != AF_INET6 ||
450 		    var->iv_pdst->sa_family != AF_INET6)
451 			goto out;
452 		ret = ipsecif6_encap_func(m, &ip6, var);
453 		break;
454 	}
455 #endif
456 	default:
457 		goto out;
458 	}
459 
460 out:
461 	if (var != NULL)
462 		if_ipsec_putref_variant(var, &psref);
463 	return ret;
464 }
465 
466 /*
467  * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
468  * We'll prevent this by introducing upper limit.
469  */
470 static int
471 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
472 {
473 
474 	return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
475 }
476 
477 int
478 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
479     const struct rtentry *rt)
480 {
481 	struct ipsec_softc *sc = ifp->if_softc;
482 	struct ipsec_variant *var;
483 	struct psref psref;
484 	int error;
485 	int bound;
486 
487 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
488 
489 	error = if_ipsec_check_nesting(ifp, m);
490 	if (error) {
491 		m_freem(m);
492 		goto noref_end;
493 	}
494 
495 	if ((ifp->if_flags & IFF_UP) == 0) {
496 		m_freem(m);
497 		error = ENETDOWN;
498 		goto noref_end;
499 	}
500 
501 
502 	bound = curlwp_bind();
503 	var = if_ipsec_getref_variant(sc, &psref);
504 	if (if_ipsec_variant_is_unconfigured(var)) {
505 		m_freem(m);
506 		error = ENETDOWN;
507 		goto end;
508 	}
509 
510 	m->m_flags &= ~(M_BCAST|M_MCAST);
511 
512 	/* use DLT_NULL encapsulation here to pass inner af type */
513 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
514 	if (!m) {
515 		error = ENOBUFS;
516 		goto end;
517 	}
518 	*mtod(m, int *) = dst->sa_family;
519 
520 #if INET6
521 	/* drop IPv6 packet if IFF_FWD_IPV6 is not set */
522 	if (dst->sa_family == AF_INET6 &&
523 	    !if_ipsec_fwd_ipv6(sc)) {
524 		/*
525 		 * IPv6 packet is not allowed to forward,that is not error.
526 		 */
527 		error = 0;
528 		IF_DROP(&ifp->if_snd);
529 		m_freem(m);
530 		goto end;
531 	}
532 #endif
533 
534 	error = if_ipsec_out_direct(var, m, dst->sa_family);
535 
536 end:
537 	if_ipsec_putref_variant(var, &psref);
538 	curlwp_bindx(bound);
539 noref_end:
540 	if (error)
541 		ifp->if_oerrors++;
542 
543 	return error;
544 }
545 
546 static inline int
547 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
548 {
549 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
550 	int error;
551 	int len;
552 
553 	KASSERT(if_ipsec_heldref_variant(var));
554 	KASSERT(var->iv_output != NULL);
555 
556 	len = m->m_pkthdr.len;
557 
558 	/* input DLT_NULL frame to BPF */
559 	bpf_mtap(ifp, m, BPF_D_OUT);
560 
561 	/* grab and chop off inner af type */
562 	/* XXX need pullup? */
563 	m_adj(m, sizeof(int));
564 
565 	error = var->iv_output(var, family, m);
566 	if (error)
567 		return error;
568 
569 	ifp->if_opackets++;
570 	ifp->if_obytes += len;
571 
572 	return 0;
573 }
574 
575 void
576 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
577 {
578 
579 	KASSERT(ifp != NULL);
580 
581 	m_set_rcvif(m, ifp);
582 
583 	bpf_mtap_af(ifp, af, m, BPF_D_IN);
584 
585 	if_ipsec_in_enqueue(m, af, ifp);
586 
587 	return;
588 }
589 
590 static inline void
591 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
592 {
593 	pktqueue_t *pktq;
594 	int pktlen;
595 
596 	/*
597 	 * Put the packet to the network layer input queue according to the
598 	 * specified address family.
599 	 */
600 	switch (af) {
601 #ifdef INET
602 	case AF_INET:
603 		pktq = ip_pktq;
604 		break;
605 #endif
606 #ifdef INET6
607 	case AF_INET6:
608 		pktq = ip6_pktq;
609 		break;
610 #endif
611 	default:
612 		ifp->if_ierrors++;
613 		m_freem(m);
614 		return;
615 	}
616 
617 #if 1
618 	const u_int h = curcpu()->ci_index;
619 #else
620 	const uint32_t h = pktq_rps_hash(m);
621 #endif
622 	pktlen = m->m_pkthdr.len;
623 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
624 		ifp->if_ibytes += pktlen;
625 		ifp->if_ipackets++;
626 	} else {
627 		ifp->if_iqdrops++;
628 		m_freem(m);
629 	}
630 
631 	return;
632 }
633 
634 static inline int
635 if_ipsec_check_salen(struct sockaddr *addr)
636 {
637 
638 	switch (addr->sa_family) {
639 #ifdef INET
640 	case AF_INET:
641 		if (addr->sa_len != sizeof(struct sockaddr_in))
642 			return EINVAL;
643 		break;
644 #endif /* INET */
645 #ifdef INET6
646 	case AF_INET6:
647 		if (addr->sa_len != sizeof(struct sockaddr_in6))
648 			return EINVAL;
649 		break;
650 #endif /* INET6 */
651 	default:
652 		return EAFNOSUPPORT;
653 	}
654 
655 	return 0;
656 }
657 
658 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
659 int
660 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
661 {
662 	struct ipsec_softc *sc  = ifp->if_softc;
663 	struct ipsec_variant *var = NULL;
664 	struct ifreq     *ifr = (struct ifreq*)data;
665 	struct ifaddr    *ifa = (struct ifaddr*)data;
666 	int error = 0, size;
667 	struct sockaddr *dst, *src;
668 	u_long mtu;
669 	u_short oflags = ifp->if_flags;
670 	int bound;
671 	struct psref psref;
672 
673 	switch (cmd) {
674 	case SIOCINITIFADDR:
675 		ifp->if_flags |= IFF_UP;
676 		ifa->ifa_rtrequest = p2p_rtrequest;
677 		break;
678 
679 	case SIOCSIFDSTADDR:
680 		break;
681 
682 	case SIOCADDMULTI:
683 	case SIOCDELMULTI:
684 		switch (ifr->ifr_addr.sa_family) {
685 #ifdef INET
686 		case AF_INET:	/* IP supports Multicast */
687 			break;
688 #endif /* INET */
689 #ifdef INET6
690 		case AF_INET6:	/* IP6 supports Multicast */
691 			break;
692 #endif /* INET6 */
693 		default:  /* Other protocols doesn't support Multicast */
694 			error = EAFNOSUPPORT;
695 			break;
696 		}
697 		break;
698 
699 	case SIOCSIFMTU:
700 		mtu = ifr->ifr_mtu;
701 		if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
702 			return EINVAL;
703 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
704 			error = 0;
705 		break;
706 
707 #ifdef INET
708 	case SIOCSIFPHYADDR:
709 #endif
710 #ifdef INET6
711 	case SIOCSIFPHYADDR_IN6:
712 #endif /* INET6 */
713 	case SIOCSLIFPHYADDR:
714 		switch (cmd) {
715 #ifdef INET
716 		case SIOCSIFPHYADDR:
717 			src = (struct sockaddr *)
718 				&(((struct in_aliasreq *)data)->ifra_addr);
719 			dst = (struct sockaddr *)
720 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
721 			break;
722 #endif /* INET */
723 #ifdef INET6
724 		case SIOCSIFPHYADDR_IN6:
725 			src = (struct sockaddr *)
726 				&(((struct in6_aliasreq *)data)->ifra_addr);
727 			dst = (struct sockaddr *)
728 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
729 			break;
730 #endif /* INET6 */
731 		case SIOCSLIFPHYADDR:
732 			src = (struct sockaddr *)
733 				&(((struct if_laddrreq *)data)->addr);
734 			dst = (struct sockaddr *)
735 				&(((struct if_laddrreq *)data)->dstaddr);
736 			break;
737 		default:
738 			return EINVAL;
739 		}
740 
741 		/* sa_family must be equal */
742 		if (src->sa_family != dst->sa_family)
743 			return EINVAL;
744 
745 		error = if_ipsec_check_salen(src);
746 		if (error)
747 			return error;
748 		error = if_ipsec_check_salen(dst);
749 		if (error)
750 			return error;
751 
752 		/* check sa_family looks sane for the cmd */
753 		switch (cmd) {
754 #ifdef INET
755 		case SIOCSIFPHYADDR:
756 			if (src->sa_family == AF_INET)
757 				break;
758 			return EAFNOSUPPORT;
759 #endif /* INET */
760 #ifdef INET6
761 		case SIOCSIFPHYADDR_IN6:
762 			if (src->sa_family == AF_INET6)
763 				break;
764 			return EAFNOSUPPORT;
765 #endif /* INET6 */
766 		case SIOCSLIFPHYADDR:
767 			/* checks done in the above */
768 			break;
769 		}
770 		/*
771 		 * calls if_ipsec_getref_variant() for other softcs to check
772 		 * address pair duplicattion
773 		 */
774 		bound = curlwp_bind();
775 		error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
776 		if (error)
777 			goto bad;
778 		curlwp_bindx(bound);
779 		break;
780 
781 	case SIOCDIFPHYADDR:
782 		bound = curlwp_bind();
783 		if_ipsec_delete_tunnel(&sc->ipsec_if);
784 		curlwp_bindx(bound);
785 		break;
786 
787 	case SIOCGIFPSRCADDR:
788 #ifdef INET6
789 	case SIOCGIFPSRCADDR_IN6:
790 #endif /* INET6 */
791 		bound = curlwp_bind();
792 		var = if_ipsec_getref_variant(sc, &psref);
793 		if (var->iv_psrc == NULL) {
794 			error = EADDRNOTAVAIL;
795 			goto bad;
796 		}
797 		src = var->iv_psrc;
798 		switch (cmd) {
799 #ifdef INET
800 		case SIOCGIFPSRCADDR:
801 			dst = &ifr->ifr_addr;
802 			size = sizeof(ifr->ifr_addr);
803 			break;
804 #endif /* INET */
805 #ifdef INET6
806 		case SIOCGIFPSRCADDR_IN6:
807 			dst = (struct sockaddr *)
808 				&(((struct in6_ifreq *)data)->ifr_addr);
809 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
810 			break;
811 #endif /* INET6 */
812 		default:
813 			error = EADDRNOTAVAIL;
814 			goto bad;
815 		}
816 		if (src->sa_len > size) {
817 			error = EINVAL;
818 			goto bad;
819 		}
820 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
821 		if (error)
822 			goto bad;
823 		if_ipsec_putref_variant(var, &psref);
824 		curlwp_bindx(bound);
825 		break;
826 
827 	case SIOCGIFPDSTADDR:
828 #ifdef INET6
829 	case SIOCGIFPDSTADDR_IN6:
830 #endif /* INET6 */
831 		bound = curlwp_bind();
832 		var = if_ipsec_getref_variant(sc, &psref);
833 		if (var->iv_pdst == NULL) {
834 			error = EADDRNOTAVAIL;
835 			goto bad;
836 		}
837 		src = var->iv_pdst;
838 		switch (cmd) {
839 #ifdef INET
840 		case SIOCGIFPDSTADDR:
841 			dst = &ifr->ifr_addr;
842 			size = sizeof(ifr->ifr_addr);
843 			break;
844 #endif /* INET */
845 #ifdef INET6
846 		case SIOCGIFPDSTADDR_IN6:
847 			dst = (struct sockaddr *)
848 				&(((struct in6_ifreq *)data)->ifr_addr);
849 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
850 			break;
851 #endif /* INET6 */
852 		default:
853 			error = EADDRNOTAVAIL;
854 			goto bad;
855 		}
856 		if (src->sa_len > size) {
857 			error = EINVAL;
858 			goto bad;
859 		}
860 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
861 		if (error)
862 			goto bad;
863 		if_ipsec_putref_variant(var, &psref);
864 		curlwp_bindx(bound);
865 		break;
866 
867 	case SIOCGLIFPHYADDR:
868 		bound = curlwp_bind();
869 		var = if_ipsec_getref_variant(sc, &psref);
870 		if (if_ipsec_variant_is_unconfigured(var)) {
871 			error = EADDRNOTAVAIL;
872 			goto bad;
873 		}
874 
875 		/* copy src */
876 		src = var->iv_psrc;
877 		dst = (struct sockaddr *)
878 			&(((struct if_laddrreq *)data)->addr);
879 		size = sizeof(((struct if_laddrreq *)data)->addr);
880 		if (src->sa_len > size) {
881 			error = EINVAL;
882 			goto bad;
883 		}
884 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
885 		if (error)
886 			goto bad;
887 
888 		/* copy dst */
889 		src = var->iv_pdst;
890 		dst = (struct sockaddr *)
891 			&(((struct if_laddrreq *)data)->dstaddr);
892 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
893 		if (src->sa_len > size) {
894 			error = EINVAL;
895 			goto bad;
896 		}
897 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
898 		if (error)
899 			goto bad;
900 		if_ipsec_putref_variant(var, &psref);
901 		curlwp_bindx(bound);
902 		break;
903 
904 	default:
905 		error = ifioctl_common(ifp, cmd, data);
906 		if (!error) {
907 			bound = curlwp_bind();
908 			error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
909 			if (error)
910 				goto bad;
911 			curlwp_bindx(bound);
912 		}
913 		break;
914 	}
915 	return error;
916 
917 bad:
918 	if (var != NULL)
919 		if_ipsec_putref_variant(var, &psref);
920 	curlwp_bindx(bound);
921 
922 	return error;
923 }
924 
925 struct encap_funcs {
926 #ifdef INET
927 	int (*ef_inet)(struct ipsec_variant *);
928 #endif
929 #ifdef INET6
930 	int (*ef_inet6)(struct ipsec_variant *);
931 #endif
932 };
933 
934 static struct encap_funcs ipsec_encap_attach = {
935 #ifdef INET
936 	.ef_inet = ipsecif4_attach,
937 #endif
938 #ifdef INET6
939 	.ef_inet6 = &ipsecif6_attach,
940 #endif
941 };
942 
943 static struct encap_funcs ipsec_encap_detach = {
944 #ifdef INET
945 	.ef_inet = ipsecif4_detach,
946 #endif
947 #ifdef INET6
948 	.ef_inet6 = &ipsecif6_detach,
949 #endif
950 };
951 
952 static int
953 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
954 {
955 	int error;
956 
957 	KASSERT(var != NULL);
958 	KASSERT(if_ipsec_variant_is_configured(var));
959 
960 	switch (var->iv_psrc->sa_family) {
961 #ifdef INET
962 	case AF_INET:
963 		error = (funcs->ef_inet)(var);
964 		break;
965 #endif /* INET */
966 #ifdef INET6
967 	case AF_INET6:
968 		error = (funcs->ef_inet6)(var);
969 		break;
970 #endif /* INET6 */
971 	default:
972 		error = EINVAL;
973 		break;
974 	}
975 
976 	return error;
977 }
978 
979 static int
980 if_ipsec_encap_attach(struct ipsec_variant *var)
981 {
982 
983 	return if_ipsec_encap_common(var, &ipsec_encap_attach);
984 }
985 
986 static int
987 if_ipsec_encap_detach(struct ipsec_variant *var)
988 {
989 
990 	return if_ipsec_encap_common(var, &ipsec_encap_detach);
991 }
992 
993 /*
994  * Validate and set ipsec(4) I/F configurations.
995  *     (1) validate
996  *         (1-1) Check the argument src and dst address pair will change
997  *               configuration from current src and dst address pair.
998  *         (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
999  *               with argument src and dst address pair, except for NAT-T shared
1000  *               tunnels.
1001  *     (2) set
1002  *         (2-1) Create variant for new configuration.
1003  *         (2-2) Create temporary "null" variant used to avoid to access
1004  *               dangling variant while SPs are deleted and added.
1005  *         (2-3) Swap variant include its SPs.
1006  *         (2-4) Cleanup last configurations.
1007  */
1008 static int
1009 if_ipsec_set_tunnel(struct ifnet *ifp,
1010     struct sockaddr *src, struct sockaddr *dst)
1011 {
1012 	struct ipsec_softc *sc = ifp->if_softc;
1013 	struct ipsec_softc *sc2;
1014 	struct ipsec_variant *ovar, *nvar, *nullvar;
1015 	struct sockaddr *osrc, *odst;
1016 	struct sockaddr *nsrc, *ndst;
1017 	in_port_t nsport = 0, ndport = 0;
1018 	int error;
1019 
1020 	error = encap_lock_enter();
1021 	if (error)
1022 		return error;
1023 
1024 	nsrc = sockaddr_dup(src, M_WAITOK);
1025 	ndst = sockaddr_dup(dst, M_WAITOK);
1026 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1027 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1028 
1029 	mutex_enter(&sc->ipsec_lock);
1030 
1031 	ovar = sc->ipsec_var;
1032 
1033 	switch(nsrc->sa_family) {
1034 #ifdef INET
1035 	case AF_INET:
1036 		nsport = satosin(src)->sin_port;
1037 		/*
1038 		 * avoid confuse SP when NAT-T disabled,
1039 		 * e.g.
1040 		 *     expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
1041 		 *     confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
1042 		 */
1043 		satosin(nsrc)->sin_port = 0;
1044 		ndport = satosin(dst)->sin_port;
1045 		satosin(ndst)->sin_port = 0;
1046 		break;
1047 #endif /* INET */
1048 #ifdef INET6
1049 	case AF_INET6:
1050 		nsport = satosin6(src)->sin6_port;
1051 		satosin6(nsrc)->sin6_port = 0;
1052 		ndport = satosin6(dst)->sin6_port;
1053 		satosin6(ndst)->sin6_port = 0;
1054 		break;
1055 #endif /* INET6 */
1056 	default:
1057 		log(LOG_DEBUG,
1058 		    "%s: Invalid address family: %d.\n",
1059 		    __func__, src->sa_family);
1060 		error = EINVAL;
1061 		goto out;
1062 	}
1063 
1064 	/*
1065 	 * (1-1) Check the argument src and dst address pair will change
1066 	 *       configuration from current src and dst address pair.
1067 	 */
1068 	if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
1069 	    (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
1070 	    (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
1071 		/* address and port pair not changed. */
1072 		error = 0;
1073 		goto out;
1074 	}
1075 
1076 	/*
1077 	 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1078 	 *       with argument src and dst address pair, except for NAT-T shared
1079 	 *       tunnels.
1080 	 */
1081 	mutex_enter(&ipsec_softcs.lock);
1082 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1083 		struct ipsec_variant *var2;
1084 		struct psref psref;
1085 
1086 		if (sc2 == sc)
1087 			continue;
1088 		var2 = if_ipsec_getref_variant(sc2, &psref);
1089 		if (if_ipsec_variant_is_unconfigured(var2)) {
1090 			if_ipsec_putref_variant(var2, &psref);
1091 			continue;
1092 		}
1093 		if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
1094 			if_ipsec_putref_variant(var2, &psref);
1095 			continue; /* NAT-T shared tunnel */
1096 		}
1097 		if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
1098 		    sockaddr_cmp(var2->iv_psrc, src) == 0) {
1099 			if_ipsec_putref_variant(var2, &psref);
1100 			mutex_exit(&ipsec_softcs.lock);
1101 			error = EADDRNOTAVAIL;
1102 			goto out;
1103 		}
1104 
1105 		if_ipsec_putref_variant(var2, &psref);
1106 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
1107 	}
1108 	mutex_exit(&ipsec_softcs.lock);
1109 
1110 
1111 	osrc = ovar->iv_psrc;
1112 	odst = ovar->iv_pdst;
1113 
1114 	/*
1115 	 * (2-1) Create ipsec_variant for new configuration.
1116 	 */
1117 	if_ipsec_copy_variant(nvar, ovar);
1118 	nvar->iv_psrc = nsrc;
1119 	nvar->iv_pdst = ndst;
1120 	nvar->iv_sport = nsport;
1121 	nvar->iv_dport = ndport;
1122 	nvar->iv_encap_cookie4 = NULL;
1123 	nvar->iv_encap_cookie6 = NULL;
1124 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1125 	error = if_ipsec_encap_attach(nvar);
1126 	if (error)
1127 		goto out;
1128 
1129 	/*
1130 	 * (2-2) Create temporary "null" variant.
1131 	 */
1132 	if_ipsec_copy_variant(nullvar, ovar);
1133 	if_ipsec_clear_config(nullvar);
1134 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1135 	membar_producer();
1136 	/*
1137 	 * (2-3) Swap variant include its SPs.
1138 	 */
1139 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1140 	if (error) {
1141 		if_ipsec_encap_detach(nvar);
1142 		goto out;
1143 	}
1144 
1145 	mutex_exit(&sc->ipsec_lock);
1146 
1147 	/*
1148 	 * (2-4) Cleanup last configurations.
1149 	 */
1150 	if (if_ipsec_variant_is_configured(ovar))
1151 		if_ipsec_encap_detach(ovar);
1152 	encap_lock_exit();
1153 
1154 	if (osrc != NULL)
1155 		sockaddr_free(osrc);
1156 	if (odst != NULL)
1157 		sockaddr_free(odst);
1158 	kmem_free(ovar, sizeof(*ovar));
1159 	kmem_free(nullvar, sizeof(*nullvar));
1160 
1161 	return 0;
1162 
1163 out:
1164 	mutex_exit(&sc->ipsec_lock);
1165 	encap_lock_exit();
1166 
1167 	sockaddr_free(nsrc);
1168 	sockaddr_free(ndst);
1169 	kmem_free(nvar, sizeof(*nvar));
1170 	kmem_free(nullvar, sizeof(*nullvar));
1171 
1172 	return error;
1173 }
1174 
1175 /*
1176  * Validate and delete ipsec(4) I/F configurations.
1177  *     (1) validate
1178  *         (1-1) Check current src and dst address pair are null,
1179  *               which means the ipsec(4) I/F is already done deletetunnel.
1180  *     (2) delete
1181  *         (2-1) Create variant for deleted status.
1182  *         (2-2) Create temporary "null" variant used to avoid to access
1183  *               dangling variant while SPs are deleted and added.
1184  *               NOTE:
1185  *               The contents of temporary "null" variant equal to the variant
1186  *               of (2-1), however two psref_target_destroy() synchronization
1187  *               points are necessary to avoid to access dangling variant
1188  *               while SPs are deleted and added. To implement that simply,
1189  *               we use the same manner as if_ipsec_set_tunnel(), that is,
1190  *               create extra "null" variant and use it temporarily.
1191  *         (2-3) Swap variant include its SPs.
1192  *         (2-4) Cleanup last configurations.
1193  */
1194 static void
1195 if_ipsec_delete_tunnel(struct ifnet *ifp)
1196 {
1197 	struct ipsec_softc *sc = ifp->if_softc;
1198 	struct ipsec_variant *ovar, *nvar, *nullvar;
1199 	struct sockaddr *osrc, *odst;
1200 	int error;
1201 
1202 	error = encap_lock_enter();
1203 	if (error)
1204 		return;
1205 
1206 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1207 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1208 
1209 	mutex_enter(&sc->ipsec_lock);
1210 
1211 	ovar = sc->ipsec_var;
1212 	osrc = ovar->iv_psrc;
1213 	odst = ovar->iv_pdst;
1214 	/*
1215 	 * (1-1) Check current src and dst address pair are null,
1216 	 *       which means the ipsec(4) I/F is already done deletetunnel.
1217 	 */
1218 	if (osrc == NULL || odst == NULL) {
1219 		/* address pair not changed. */
1220 		mutex_exit(&sc->ipsec_lock);
1221 		encap_lock_exit();
1222 		kmem_free(nvar, sizeof(*nvar));
1223 		kmem_free(nullvar, sizeof(*nullvar));
1224 		return;
1225 	}
1226 
1227 	/*
1228 	 * (2-1) Create variant for deleted status.
1229 	 */
1230 	if_ipsec_copy_variant(nvar, ovar);
1231 	if_ipsec_clear_config(nvar);
1232 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1233 
1234 	/*
1235 	 * (2-2) Create temporary "null" variant used to avoid to access
1236 	 *       dangling variant while SPs are deleted and added.
1237 	 */
1238 	if_ipsec_copy_variant(nullvar, ovar);
1239 	if_ipsec_clear_config(nullvar);
1240 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1241 	membar_producer();
1242 	/*
1243 	 * (2-3) Swap variant include its SPs.
1244 	 */
1245 	/* if_ipsec_update_variant() does not fail when delete SP only. */
1246 	(void)if_ipsec_update_variant(sc, nvar, nullvar);
1247 
1248 	mutex_exit(&sc->ipsec_lock);
1249 
1250 	/*
1251 	 * (2-4) Cleanup last configurations.
1252 	 */
1253 	if (if_ipsec_variant_is_configured(ovar))
1254 		if_ipsec_encap_detach(ovar);
1255 	encap_lock_exit();
1256 
1257 	sockaddr_free(osrc);
1258 	sockaddr_free(odst);
1259 	kmem_free(ovar, sizeof(*ovar));
1260 	kmem_free(nullvar, sizeof(*nullvar));
1261 }
1262 
1263 /*
1264  * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1265  *     (1) check
1266  *         (1-1) Check flags are changed.
1267  *         (1-2) Check current src and dst address pair. If they are null,
1268  *               that means the ipsec(4) I/F is deletetunnel'ed, so it is
1269  *               not needed to update.
1270  *     (2) update
1271  *         (2-1) Create variant for new SPs.
1272  *         (2-2) Create temporary "null" variant used to avoid to access
1273  *               dangling variant while SPs are deleted and added.
1274  *               NOTE:
1275  *               There is the same problem as if_ipsec_delete_tunnel().
1276  *         (2-3) Swap variant include its SPs.
1277  *         (2-4) Cleanup unused configurations.
1278  *               NOTE: use the same encap_cookies.
1279  */
1280 static int
1281 if_ipsec_ensure_flags(struct ifnet *ifp, u_short oflags)
1282 {
1283 	struct ipsec_softc *sc = ifp->if_softc;
1284 	struct ipsec_variant *ovar, *nvar, *nullvar;
1285 	int error;
1286 
1287 	/*
1288 	 * (1) Check flags are changed.
1289 	 */
1290 	if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1291 	    (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1292 		return 0; /* flags not changed. */
1293 
1294 	error = encap_lock_enter();
1295 	if (error)
1296 		return error;
1297 
1298 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1299 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1300 
1301 	mutex_enter(&sc->ipsec_lock);
1302 
1303 	ovar = sc->ipsec_var;
1304 	/*
1305 	 * (1-2) Check current src and dst address pair.
1306 	 */
1307 	if (if_ipsec_variant_is_unconfigured(ovar)) {
1308 		/* nothing to do */
1309 		mutex_exit(&sc->ipsec_lock);
1310 		encap_lock_exit();
1311 		kmem_free(nvar, sizeof(*nvar));
1312 		kmem_free(nullvar, sizeof(*nullvar));
1313 		return 0;
1314 	}
1315 
1316 	/*
1317 	 * (2-1) Create variant for new SPs.
1318 	 */
1319 	if_ipsec_copy_variant(nvar, ovar);
1320 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1321 	/*
1322 	 * (2-2) Create temporary "null" variant used to avoid to access
1323 	 *       dangling variant while SPs are deleted and added.
1324 	 */
1325 	if_ipsec_copy_variant(nullvar, ovar);
1326 	if_ipsec_clear_config(nullvar);
1327 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1328 	membar_producer();
1329 	/*
1330 	 * (2-3) Swap variant include its SPs.
1331 	 */
1332 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1333 
1334 	mutex_exit(&sc->ipsec_lock);
1335 	encap_lock_exit();
1336 
1337 	/*
1338 	 * (2-4) Cleanup unused configurations.
1339 	 */
1340 	if (!error)
1341 		kmem_free(ovar, sizeof(*ovar));
1342 	else
1343 		kmem_free(nvar, sizeof(*ovar));
1344 	kmem_free(nullvar, sizeof(*nullvar));
1345 
1346 	return error;
1347 }
1348 
1349 /*
1350  * SPD management
1351  */
1352 
1353 /*
1354  * Share SP set with other NAT-T ipsec(4) I/F(s).
1355  *     Return 1, when "var" shares SP set.
1356  *     Return 0, when "var" cannot share SP set.
1357  *
1358  * NOTE:
1359  * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1360  * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1361  * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1362  * set_tunnel causes race.
1363  * Currently, (fortunately) encap_lock works as this global lock.
1364  */
1365 static int
1366 if_ipsec_share_sp(struct ipsec_variant *var)
1367 {
1368 	struct ipsec_softc *sc = var->iv_softc;
1369 	struct ipsec_softc *sc2;
1370 	struct ipsec_variant *var2;
1371 	struct psref psref;
1372 
1373 	KASSERT(encap_lock_held());
1374 	KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL);
1375 
1376 	mutex_enter(&ipsec_softcs.lock);
1377 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1378 		if (sc2 == sc)
1379 			continue;
1380 		var2 = if_ipsec_getref_variant(sc2, &psref);
1381 		if (if_ipsec_variant_is_unconfigured(var2)) {
1382 			if_ipsec_putref_variant(var2, &psref);
1383 			continue;
1384 		}
1385 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1386 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1387 			if_ipsec_putref_variant(var2, &psref);
1388 			continue;
1389 		}
1390 
1391 		break;
1392 	}
1393 	mutex_exit(&ipsec_softcs.lock);
1394 	if (sc2 == NULL)
1395 		return 0; /* not shared */
1396 
1397 	IV_SP_IN(var) = IV_SP_IN(var2);
1398 	IV_SP_IN6(var) = IV_SP_IN6(var2);
1399 	IV_SP_OUT(var) = IV_SP_OUT(var2);
1400 	IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1401 
1402 	if_ipsec_putref_variant(var2, &psref);
1403 	return 1; /* shared */
1404 }
1405 
1406 /*
1407  * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1408  *     Return 1, when "var" shared SP set, and then unshare them.
1409  *     Return 0, when "var" did not share SP set.
1410  *
1411  * NOTE:
1412  * See if_ipsec_share_sp()'s note.
1413  */
1414 static int
1415 if_ipsec_unshare_sp(struct ipsec_variant *var)
1416 {
1417 	struct ipsec_softc *sc = var->iv_softc;
1418 	struct ipsec_softc *sc2;
1419 	struct ipsec_variant *var2;
1420 	struct psref psref;
1421 
1422 	KASSERT(encap_lock_held());
1423 
1424 	if (!var->iv_pdst || !var->iv_psrc)
1425 		return 0;
1426 
1427 	mutex_enter(&ipsec_softcs.lock);
1428 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1429 		if (sc2 == sc)
1430 			continue;
1431 		var2 = if_ipsec_getref_variant(sc2, &psref);
1432 		if (!var2->iv_pdst || !var2->iv_psrc) {
1433 			if_ipsec_putref_variant(var2, &psref);
1434 			continue;
1435 		}
1436 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1437 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1438 			if_ipsec_putref_variant(var2, &psref);
1439 			continue;
1440 		}
1441 
1442 		break;
1443 	}
1444 	mutex_exit(&ipsec_softcs.lock);
1445 	if (sc2 == NULL)
1446 		return 0; /* not shared */
1447 
1448 	IV_SP_IN(var) = NULL;
1449 	IV_SP_IN6(var) = NULL;
1450 	IV_SP_OUT(var) = NULL;
1451 	IV_SP_OUT6(var) = NULL;
1452 	if_ipsec_putref_variant(var2, &psref);
1453 	return 1; /* shared */
1454 }
1455 
1456 static inline void
1457 if_ipsec_add_mbuf_optalign(struct mbuf *m0, void *data, size_t len, bool align)
1458 {
1459 	struct mbuf *m;
1460 
1461 	MGET(m, M_WAIT, MT_DATA);
1462 	if (align) {
1463 		m->m_len = PFKEY_ALIGN8(len);
1464 		memset(mtod(m, void *), 0, m->m_len);
1465 	} else
1466 		m->m_len = len;
1467 	m_copyback(m, 0, len, data);
1468 	m_cat(m0, m);
1469 }
1470 
1471 static inline void
1472 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1473 {
1474 
1475 	if_ipsec_add_mbuf_optalign(m0, data, len, true);
1476 }
1477 
1478 static inline void
1479 if_ipsec_add_mbuf_addr_port(struct mbuf *m0, struct sockaddr *addr, in_port_t port, bool align)
1480 {
1481 
1482 	if (port == 0) {
1483 		if_ipsec_add_mbuf_optalign(m0, addr, addr->sa_len, align);
1484 	} else {
1485 		union sockaddr_union addrport_u;
1486 		struct sockaddr *addrport = &addrport_u.sa;
1487 
1488 		if_ipsec_set_addr_port(addrport, addr, port);
1489 		if_ipsec_add_mbuf_optalign(m0, addrport, addrport->sa_len, align);
1490 	}
1491 }
1492 
1493 static inline void
1494 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1495 {
1496 	struct mbuf *m;
1497 
1498 	if (len == 0)
1499 		return;
1500 
1501 	MGET(m, M_WAIT, MT_DATA);
1502 	m->m_len = len;
1503 	memset(mtod(m, void *), 0, m->m_len);
1504 	m_cat(m0, m);
1505 }
1506 
1507 static inline size_t
1508 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1509     int proto, uint16_t exttype)
1510 {
1511 	size_t size;
1512 
1513 	KASSERT(saaddr != NULL);
1514 	KASSERT(addr != NULL);
1515 
1516 	size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1517 	saaddr->sadb_address_len = PFKEY_UNIT64(size);
1518 	saaddr->sadb_address_exttype = exttype;
1519 	saaddr->sadb_address_proto = proto;
1520 	switch (addr->sa_family) {
1521 #ifdef INET
1522 	case AF_INET:
1523 		saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1524 		break;
1525 #endif /* INET */
1526 #ifdef INET6
1527 	case AF_INET6:
1528 		saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1529 		break;
1530 #endif /* INET6 */
1531 	default:
1532 		log(LOG_DEBUG,
1533 		    "%s: Invalid address family: %d.\n",
1534 		    __func__, addr->sa_family);
1535 		break;
1536 	}
1537 	saaddr->sadb_address_reserved = 0;
1538 
1539 	return size;
1540 }
1541 
1542 static inline size_t
1543 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1544     int proto)
1545 {
1546 
1547 	return if_ipsec_set_sadb_addr(sasrc, src, proto,
1548 	    SADB_EXT_ADDRESS_SRC);
1549 }
1550 
1551 static inline size_t
1552 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1553     int proto)
1554 {
1555 
1556 	return if_ipsec_set_sadb_addr(sadst, dst, proto,
1557 	    SADB_EXT_ADDRESS_DST);
1558 }
1559 
1560 static inline size_t
1561 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1562     struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1563     uint8_t level, struct sockaddr *src, struct sockaddr *dst)
1564 {
1565 	size_t size;
1566 
1567 	KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1568 
1569 	size = sizeof(*xpl);
1570 	if (policy == IPSEC_POLICY_IPSEC) {
1571 		size += PFKEY_ALIGN8(sizeof(*xisr));
1572 		if (src != NULL && dst != NULL)
1573 			size += PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1574 	}
1575 	xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1576 	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1577 	xpl->sadb_x_policy_type = policy;
1578 	xpl->sadb_x_policy_dir = dir;
1579 	xpl->sadb_x_policy_reserved = 0;
1580 	xpl->sadb_x_policy_id = id;
1581 	xpl->sadb_x_policy_reserved2 = 0;
1582 
1583 	if (policy == IPSEC_POLICY_IPSEC) {
1584 		xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1585 		if (src != NULL && dst != NULL)
1586 			xisr->sadb_x_ipsecrequest_len +=
1587 				PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1588 		xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1589 		xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1590 		xisr->sadb_x_ipsecrequest_level = level;
1591 		xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
1592 	}
1593 
1594 	return size;
1595 }
1596 
1597 static inline void
1598 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1599 {
1600 
1601 	KASSERT(msg != NULL);
1602 
1603 	msg->sadb_msg_version = PF_KEY_V2;
1604 	msg->sadb_msg_type = msgtype;
1605 	msg->sadb_msg_errno = 0;
1606 	msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1607 	msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1608 	msg->sadb_msg_reserved = 0;
1609 	msg->sadb_msg_seq = 0; /* XXXX */
1610 	msg->sadb_msg_pid = 0; /* XXXX */
1611 }
1612 
1613 static inline void
1614 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1615 {
1616 
1617 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1618 }
1619 
1620 static inline void
1621 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1622 {
1623 
1624 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1625 }
1626 
1627 static int
1628 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1629     in_port_t port)
1630 {
1631 	int error = 0;
1632 
1633 	sockaddr_copy(addrport, addr->sa_len, addr);
1634 
1635 	switch (addr->sa_family) {
1636 #ifdef INET
1637 	case AF_INET: {
1638 		struct sockaddr_in *sin = satosin(addrport);
1639 		sin->sin_port = port;
1640 		break;
1641 	}
1642 #endif /* INET */
1643 #ifdef INET6
1644 	case AF_INET6: {
1645 		struct sockaddr_in6 *sin6 = satosin6(addrport);
1646 		sin6->sin6_port = port;
1647 		break;
1648 	}
1649 #endif /* INET6 */
1650 	default:
1651 		log(LOG_DEBUG,
1652 		    "%s: Invalid address family: %d.\n",
1653 		    __func__, addr->sa_family);
1654 		error = EINVAL;
1655 	}
1656 
1657 	return error;
1658 }
1659 
1660 static struct secpolicy *
1661 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1662     struct sockaddr *dst, in_port_t dport,
1663     int dir, int proto, int level, u_int policy)
1664 {
1665 	struct sadb_msg msg;
1666 	struct sadb_address xsrc, xdst;
1667 	struct sadb_x_policy xpl;
1668 	struct sadb_x_ipsecrequest xisr;
1669 	size_t size;
1670 	size_t padlen;
1671 	uint16_t ext_msg_len = 0;
1672 	struct mbuf *m;
1673 
1674 	memset(&msg, 0, sizeof(msg));
1675 	memset(&xsrc, 0, sizeof(xsrc));
1676 	memset(&xdst, 0, sizeof(xdst));
1677 	memset(&xpl, 0, sizeof(xpl));
1678 	memset(&xisr, 0, sizeof(xisr));
1679 
1680 	MGETHDR(m, M_WAIT, MT_DATA);
1681 
1682 	size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1683 	ext_msg_len += PFKEY_UNIT64(size);
1684 	size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1685 	ext_msg_len += PFKEY_UNIT64(size);
1686 	size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level, src, dst);
1687 	ext_msg_len += PFKEY_UNIT64(size);
1688 	if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1689 
1690 	/* build PF_KEY message */
1691 
1692 	m->m_len = sizeof(msg);
1693 	m_copyback(m, 0, sizeof(msg), &msg);
1694 
1695 	if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1696 	/*
1697 	 * secpolicy.spidx.{src, dst} must not be set port number,
1698 	 * even if it is used for NAT-T.
1699 	 */
1700 	if_ipsec_add_mbuf_addr_port(m, src, 0, true);
1701 	padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1702 		- (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1703 	if_ipsec_add_pad(m, padlen);
1704 
1705 	if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1706 	/* ditto */
1707 	if_ipsec_add_mbuf_addr_port(m, dst, 0, true);
1708 	padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1709 		- (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1710 	if_ipsec_add_pad(m, padlen);
1711 
1712 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1713 	padlen = PFKEY_UNUNIT64(xpl.sadb_x_policy_len) - sizeof(xpl);
1714 	if (policy == IPSEC_POLICY_IPSEC) {
1715 		if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1716 		padlen -= PFKEY_ALIGN8(sizeof(xisr));
1717 	}
1718 	if_ipsec_add_pad(m, padlen);
1719 
1720 	/* key_kpi_spdadd() has already done KEY_SP_REF(). */
1721 	return key_kpi_spdadd(m);
1722 }
1723 
1724 static int
1725 if_ipsec_add_sp(struct ipsec_variant *var,
1726     struct sockaddr *src, in_port_t sport,
1727     struct sockaddr *dst, in_port_t dport)
1728 {
1729 	struct ipsec_softc *sc = var->iv_softc;
1730 	int level;
1731 	u_int v6policy;
1732 
1733 	/*
1734 	 * must delete sp before add it.
1735 	 */
1736 	KASSERT(IV_SP_IN(var) == NULL);
1737 	KASSERT(IV_SP_OUT(var) == NULL);
1738 	KASSERT(IV_SP_IN6(var) == NULL);
1739 	KASSERT(IV_SP_OUT6(var) == NULL);
1740 
1741 	/*
1742 	 * can be shared?
1743 	 */
1744 	if (if_ipsec_share_sp(var))
1745 		return 0;
1746 
1747 	if (if_ipsec_nat_t(sc))
1748 		level = IPSEC_LEVEL_REQUIRE;
1749 	else
1750 		level = IPSEC_LEVEL_UNIQUE;
1751 
1752 	if (if_ipsec_fwd_ipv6(sc))
1753 		v6policy = IPSEC_POLICY_IPSEC;
1754 	else
1755 		v6policy = IPSEC_POLICY_DISCARD;
1756 
1757 	IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1758 	    IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1759 	if (IV_SP_IN(var) == NULL)
1760 		goto fail;
1761 	IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1762 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1763 	if (IV_SP_OUT(var) == NULL)
1764 		goto fail;
1765 	IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1766 	    IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
1767 	if (IV_SP_IN6(var) == NULL)
1768 		goto fail;
1769 	IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1770 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
1771 	if (IV_SP_OUT6(var) == NULL)
1772 		goto fail;
1773 
1774 	return 0;
1775 
1776 fail:
1777 	if (IV_SP_IN6(var) != NULL) {
1778 		if_ipsec_del_sp0(IV_SP_IN6(var));
1779 		IV_SP_IN6(var) = NULL;
1780 	}
1781 	if (IV_SP_OUT(var) != NULL) {
1782 		if_ipsec_del_sp0(IV_SP_OUT(var));
1783 		IV_SP_OUT(var) = NULL;
1784 	}
1785 	if (IV_SP_IN(var) != NULL) {
1786 		if_ipsec_del_sp0(IV_SP_IN(var));
1787 		IV_SP_IN(var) = NULL;
1788 	}
1789 
1790 	return EEXIST;
1791 }
1792 
1793 static int
1794 if_ipsec_del_sp0(struct secpolicy *sp)
1795 {
1796 	struct sadb_msg msg;
1797 	struct sadb_x_policy xpl;
1798 	size_t size;
1799 	uint16_t ext_msg_len = 0;
1800 	int error;
1801 	struct mbuf *m;
1802 
1803 	if (sp == NULL)
1804 		return 0;
1805 
1806 	memset(&msg, 0, sizeof(msg));
1807 	memset(&xpl, 0, sizeof(xpl));
1808 
1809 	MGETHDR(m, M_WAIT, MT_DATA);
1810 
1811 	size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0, NULL, NULL);
1812 	ext_msg_len += PFKEY_UNIT64(size);
1813 
1814 	if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
1815 
1816 	m->m_len = sizeof(msg);
1817 	m_copyback(m, 0, sizeof(msg), &msg);
1818 
1819 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1820 
1821 	/*  unreference correspond to key_kpi_spdadd(). */
1822 	KEY_SP_UNREF(&sp);
1823 	error = key_kpi_spddelete2(m);
1824 	if (error != 0) {
1825 		log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
1826 		    __func__, sp->id, error);
1827 	}
1828 	return error;
1829 }
1830 
1831 static void
1832 if_ipsec_del_sp(struct ipsec_variant *var)
1833 {
1834 
1835 	/* are the SPs shared? */
1836 	if (if_ipsec_unshare_sp(var))
1837 		return;
1838 
1839 	(void)if_ipsec_del_sp0(IV_SP_OUT(var));
1840 	(void)if_ipsec_del_sp0(IV_SP_IN(var));
1841 	(void)if_ipsec_del_sp0(IV_SP_OUT6(var));
1842 	(void)if_ipsec_del_sp0(IV_SP_IN6(var));
1843 	IV_SP_IN(var) = NULL;
1844 	IV_SP_IN6(var) = NULL;
1845 	IV_SP_OUT(var) = NULL;
1846 	IV_SP_OUT6(var) = NULL;
1847 }
1848 
1849 static int
1850 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
1851     struct ipsec_variant *nvar)
1852 {
1853 	in_port_t src_port = 0;
1854 	in_port_t dst_port = 0;
1855 	struct sockaddr *src;
1856 	struct sockaddr *dst;
1857 	int error = 0;
1858 
1859 	KASSERT(mutex_owned(&sc->ipsec_lock));
1860 
1861 	if_ipsec_del_sp(ovar);
1862 
1863 	src = nvar->iv_psrc;
1864 	dst = nvar->iv_pdst;
1865 	if (if_ipsec_nat_t(sc)) {
1866 		/* NAT-T enabled */
1867 		src_port = nvar->iv_sport;
1868 		dst_port = nvar->iv_dport;
1869 	}
1870 	if (src && dst)
1871 		error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
1872 
1873 	return error;
1874 }
1875 
1876 /*
1877  * ipsec_variant and its SPs update API.
1878  *
1879  * Assumption:
1880  * reader side dereferences sc->ipsec_var in reader critical section only,
1881  * that is, all of reader sides do not reader the sc->ipsec_var after
1882  * pserialize_perform().
1883  */
1884 static int
1885 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
1886     struct ipsec_variant *nullvar)
1887 {
1888 	struct ifnet *ifp = &sc->ipsec_if;
1889 	struct ipsec_variant *ovar = sc->ipsec_var;
1890 	int error;
1891 
1892 	KASSERT(mutex_owned(&sc->ipsec_lock));
1893 
1894 	/*
1895 	 * To keep consistency between ipsec(4) I/F settings and SPs,
1896 	 * we stop packet processing while replacing SPs, that is, we set
1897 	 * "null" config variant to sc->ipsec_var.
1898 	 */
1899 	sc->ipsec_var = nullvar;
1900 	pserialize_perform(sc->ipsec_psz);
1901 	psref_target_destroy(&ovar->iv_psref, iv_psref_class);
1902 
1903 	error = if_ipsec_replace_sp(sc, ovar, nvar);
1904 	if (!error)
1905 		sc->ipsec_var = nvar;
1906 	else {
1907 		sc->ipsec_var = ovar; /* rollback */
1908 		psref_target_init(&ovar->iv_psref, iv_psref_class);
1909 	}
1910 
1911 	pserialize_perform(sc->ipsec_psz);
1912 	psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
1913 
1914 	if (if_ipsec_variant_is_configured(sc->ipsec_var))
1915 		ifp->if_flags |= IFF_RUNNING;
1916 	else
1917 		ifp->if_flags &= ~IFF_RUNNING;
1918 
1919 	return error;
1920 }
1921