xref: /netbsd-src/sys/net/if_ipsec.c (revision 627f7eb200a4419d89b531d55fccd2ee3ffdcde0)
1 /*	$NetBSD: if_ipsec.c,v 1.30 2020/10/14 18:48:05 roy Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.30 2020/10/14 18:48:05 roy Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/atomic.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/errno.h>
44 #include <sys/ioctl.h>
45 #include <sys/time.h>
46 #include <sys/syslog.h>
47 #include <sys/cpu.h>
48 #include <sys/kmem.h>
49 #include <sys/mutex.h>
50 #include <sys/pserialize.h>
51 #include <sys/psref.h>
52 #include <sys/sysctl.h>
53 
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/route.h>
57 #include <net/bpf.h>
58 #include <net/pfkeyv2.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #ifdef	INET
64 #include <netinet/in_var.h>
65 #endif	/* INET */
66 
67 #ifdef INET6
68 #include <netinet6/in6_var.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71 #endif /* INET6 */
72 
73 #include <netinet/ip_encap.h>
74 
75 #include <net/if_ipsec.h>
76 
77 #include <net/raw_cb.h>
78 #include <net/pfkeyv2.h>
79 
80 #include <netipsec/key.h>
81 #include <netipsec/keydb.h> /* for union sockaddr_union */
82 #include <netipsec/ipsec.h>
83 #include <netipsec/ipsecif.h>
84 
85 static int if_ipsec_clone_create(struct if_clone *, int);
86 static int if_ipsec_clone_destroy(struct ifnet *);
87 
88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
90 
91 static int if_ipsec_encap_attach(struct ipsec_variant *);
92 static int if_ipsec_encap_detach(struct ipsec_variant *);
93 static int if_ipsec_set_tunnel(struct ifnet *,
94     struct sockaddr *, struct sockaddr *);
95 static void if_ipsec_delete_tunnel(struct ifnet *);
96 static int if_ipsec_ensure_flags(struct ifnet *, u_short);
97 static void if_ipsec_attach0(struct ipsec_softc *);
98 
99 static int if_ipsec_update_variant(struct ipsec_softc *,
100     struct ipsec_variant *, struct ipsec_variant *);
101 
102 /* sadb_msg */
103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
104 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
106     struct sockaddr *, int, uint16_t);
107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
108     struct sockaddr *, int);
109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
110     struct sockaddr *, int);
111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
112     struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t,
113     struct sockaddr *, struct sockaddr *);
114 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
115 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
116 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
117 /* SPD */
118 static int if_ipsec_share_sp(struct ipsec_variant *);
119 static int if_ipsec_unshare_sp(struct ipsec_variant *);
120 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
121     in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
122 static inline int if_ipsec_del_sp0(struct secpolicy *);
123 static int if_ipsec_add_sp(struct ipsec_variant *,
124     struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
125 static void if_ipsec_del_sp(struct ipsec_variant *);
126 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
127     struct ipsec_variant *);
128 
129 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
130     in_port_t);
131 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target)			\
132 	if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
133 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target)			\
134 	if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
135 
136 /*
137  * ipsec global variable definitions
138  */
139 
140 /* This list is used in ioctl context only. */
141 static struct {
142 	LIST_HEAD(ipsec_sclist, ipsec_softc) list;
143 	kmutex_t lock;
144 } ipsec_softcs __cacheline_aligned;
145 
146 struct psref_class *iv_psref_class __read_mostly;
147 
148 struct if_clone ipsec_cloner =
149     IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
150 static int max_ipsec_nesting = MAX_IPSEC_NEST;
151 
152 static struct sysctllog *if_ipsec_sysctl;
153 
154 #ifdef INET6
155 static int
156 sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS)
157 {
158 	int error, pmtu;
159 	struct sysctlnode node = *rnode;
160 
161 	pmtu = ip6_ipsec_pmtu;
162 	node.sysctl_data = &pmtu;
163 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
164 	if (error || newp == NULL)
165 		return error;
166 
167 	switch (pmtu) {
168 	case IPSEC_PMTU_MINMTU:
169 	case IPSEC_PMTU_OUTERMTU:
170 		ip6_ipsec_pmtu = pmtu;
171 		break;
172 	default:
173 		return EINVAL;
174 	}
175 
176 	return 0;
177 }
178 
179 static int
180 sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS)
181 {
182 	int error, pmtu;
183 	struct sysctlnode node = *rnode;
184 	struct ipsec_softc *sc = (struct ipsec_softc *)node.sysctl_data;
185 
186 	pmtu = sc->ipsec_pmtu;
187 	node.sysctl_data = &pmtu;
188 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
189 	if (error || newp == NULL)
190 		return error;
191 
192 	switch (pmtu) {
193 	case IPSEC_PMTU_SYSDEFAULT:
194 	case IPSEC_PMTU_MINMTU:
195 	case IPSEC_PMTU_OUTERMTU:
196 		sc->ipsec_pmtu = pmtu;
197 		break;
198 	default:
199 		return EINVAL;
200 	}
201 
202 	return 0;
203 }
204 #endif
205 
206 static void
207 if_ipsec_sysctl_setup(void)
208 {
209 	if_ipsec_sysctl = NULL;
210 
211 #ifdef INET6
212 	/*
213 	 * Previously create "net.inet6.ip6" entry to avoid sysctl_createv error.
214 	 */
215 	sysctl_createv(NULL, 0, NULL, NULL,
216 		       CTLFLAG_PERMANENT,
217 		       CTLTYPE_NODE, "inet6",
218 		       SYSCTL_DESCR("PF_INET6 related settings"),
219 		       NULL, 0, NULL, 0,
220 		       CTL_NET, PF_INET6, CTL_EOL);
221 	sysctl_createv(NULL, 0, NULL, NULL,
222 		       CTLFLAG_PERMANENT,
223 		       CTLTYPE_NODE, "ip6",
224 		       SYSCTL_DESCR("IPv6 related settings"),
225 		       NULL, 0, NULL, 0,
226 		       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
227 
228 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
229 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
230 		       CTLTYPE_INT, "ipsecifhlim",
231 		       SYSCTL_DESCR("Default hop limit for a ipsec tunnel datagram"),
232 		       NULL, 0, &ip6_ipsec_hlim, 0,
233 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
234 		       IPV6CTL_IPSEC_HLIM, CTL_EOL);
235 
236 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
237 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
238 		       CTLTYPE_INT, "ipsecifpmtu",
239 		       SYSCTL_DESCR("Default Path MTU setting for ipsec tunnels"),
240 		       sysctl_if_ipsec_pmtu_global, 0, NULL, 0,
241 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
242 		       IPV6CTL_IPSEC_PMTU, CTL_EOL);
243 #endif
244 }
245 
246 static void
247 if_ipsec_perif_sysctl_setup(struct sysctllog **clog, struct ipsec_softc *sc)
248 {
249 #ifdef INET6
250 	const struct sysctlnode *cnode, *rnode;
251 	struct ifnet *ifp = &sc->ipsec_if;
252 	const char *ifname = ifp->if_xname;
253 	int rv;
254 
255 	/*
256 	 * Already created in sysctl_sndq_setup().
257 	 */
258 	sysctl_createv(clog, 0, NULL, &rnode,
259 		       CTLFLAG_PERMANENT,
260 		       CTLTYPE_NODE, "interfaces",
261 		       SYSCTL_DESCR("Per-interface controls"),
262 		       NULL, 0, NULL, 0,
263 		       CTL_NET, CTL_CREATE, CTL_EOL);
264 	sysctl_createv(clog, 0, &rnode, &rnode,
265 		       CTLFLAG_PERMANENT,
266 		       CTLTYPE_NODE, ifname,
267 		       SYSCTL_DESCR("Interface controls"),
268 		       NULL, 0, NULL, 0,
269 		       CTL_CREATE, CTL_EOL);
270 
271 	rv = sysctl_createv(clog, 0, &rnode, &cnode,
272 			    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
273 			    CTLTYPE_INT, "pmtu",
274 			    SYSCTL_DESCR("Path MTU setting for this ipsec tunnel"),
275 			    sysctl_if_ipsec_pmtu_perif, 0, (void *)sc, 0,
276 			    CTL_CREATE, CTL_EOL);
277 	if (rv != 0)
278 		log(LOG_WARNING, "%s: could not attach sysctl node pmtu\n", ifname);
279 
280 	sc->ipsec_pmtu = IPSEC_PMTU_SYSDEFAULT;
281 #endif
282 }
283 
284 /* ARGSUSED */
285 void
286 ipsecifattach(int count)
287 {
288 
289 	mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
290 	LIST_INIT(&ipsec_softcs.list);
291 
292 	iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
293 
294 	if_ipsec_sysctl_setup();
295 
296 	if_clone_attach(&ipsec_cloner);
297 }
298 
299 static int
300 if_ipsec_clone_create(struct if_clone *ifc, int unit)
301 {
302 	struct ipsec_softc *sc;
303 	struct ipsec_variant *var;
304 	struct ifnet *ifp;
305 
306 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
307 
308 	if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
309 
310 	if_ipsec_attach0(sc);
311 
312 	ifp = &sc->ipsec_if;
313 	if_ipsec_perif_sysctl_setup(&ifp->if_sysctl_log, sc);
314 
315 	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
316 	var->iv_softc = sc;
317 	psref_target_init(&var->iv_psref, iv_psref_class);
318 
319 	sc->ipsec_var = var;
320 	mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
321 	sc->ipsec_psz = pserialize_create();
322 	sc->ipsec_ro_percpu = if_tunnel_alloc_ro_percpu();
323 
324 	mutex_enter(&ipsec_softcs.lock);
325 	LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
326 	mutex_exit(&ipsec_softcs.lock);
327 	return 0;
328 }
329 
330 static void
331 if_ipsec_attach0(struct ipsec_softc *sc)
332 {
333 
334 	sc->ipsec_if.if_addrlen = 0;
335 	sc->ipsec_if.if_mtu    = IPSEC_MTU;
336 	sc->ipsec_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
337 	/* set ipsec(4) specific default flags. */
338 	sc->ipsec_if.if_flags  |= IFF_FWD_IPV6;
339 	sc->ipsec_if.if_extflags = IFEF_MPSAFE;
340 	sc->ipsec_if.if_ioctl  = if_ipsec_ioctl;
341 	sc->ipsec_if.if_output = if_ipsec_output;
342 	sc->ipsec_if.if_type   = IFT_IPSEC;
343 	sc->ipsec_if.if_dlt    = DLT_NULL;
344 	sc->ipsec_if.if_softc  = sc;
345 	IFQ_SET_READY(&sc->ipsec_if.if_snd);
346 	if_initialize(&sc->ipsec_if);
347 	sc->ipsec_if.if_link_state = LINK_STATE_DOWN;
348 	if_alloc_sadl(&sc->ipsec_if);
349 	bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
350 	if_register(&sc->ipsec_if);
351 }
352 
353 static int
354 if_ipsec_clone_destroy(struct ifnet *ifp)
355 {
356 	struct ipsec_softc *sc = ifp->if_softc;
357 	struct ipsec_variant *var;
358 	int bound;
359 
360 	mutex_enter(&ipsec_softcs.lock);
361 	LIST_REMOVE(sc, ipsec_list);
362 	mutex_exit(&ipsec_softcs.lock);
363 
364 	bound = curlwp_bind();
365 	if_ipsec_delete_tunnel(&sc->ipsec_if);
366 	curlwp_bindx(bound);
367 
368 	bpf_detach(ifp);
369 	if_detach(ifp);
370 
371 	if_tunnel_free_ro_percpu(sc->ipsec_ro_percpu);
372 
373 	pserialize_destroy(sc->ipsec_psz);
374 	mutex_destroy(&sc->ipsec_lock);
375 
376 	var = sc->ipsec_var;
377 	kmem_free(var, sizeof(*var));
378 	kmem_free(sc, sizeof(*sc));
379 
380 	return 0;
381 }
382 
383 static inline bool
384 if_ipsec_nat_t(struct ipsec_softc *sc)
385 {
386 
387 	return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
388 }
389 
390 static inline bool
391 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
392 {
393 
394 	return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
395 }
396 
397 int
398 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
399 {
400 	uint8_t v;
401 	struct ipsec_softc *sc;
402 	struct ipsec_variant *var = NULL;
403 	struct psref psref;
404 	int ret = 0;
405 
406 	sc = arg;
407 	KASSERT(sc != NULL);
408 
409 	if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
410 		goto out;
411 
412 	var = if_ipsec_getref_variant(sc, &psref);
413 	if (if_ipsec_variant_is_unconfigured(var))
414 		goto out;
415 
416 	switch (proto) {
417 	case IPPROTO_IPV4:
418 	case IPPROTO_IPV6:
419 		break;
420 	default:
421 		goto out;
422 	}
423 
424 	m_copydata(m, 0, sizeof(v), &v);
425 	v = (v >> 4) & 0xff;  /* Get the IP version number. */
426 
427 	switch (v) {
428 #ifdef INET
429 	case IPVERSION: {
430 		struct ip ip;
431 
432 		if (m->m_pkthdr.len < sizeof(ip))
433 			goto out;
434 
435 		m_copydata(m, 0, sizeof(ip), &ip);
436 		if (var->iv_psrc->sa_family != AF_INET ||
437 		    var->iv_pdst->sa_family != AF_INET)
438 			goto out;
439 		ret = ipsecif4_encap_func(m, &ip, var);
440 		break;
441 	}
442 #endif
443 #ifdef INET6
444 	case (IPV6_VERSION >> 4): {
445 		struct ip6_hdr ip6;
446 
447 		if (m->m_pkthdr.len < sizeof(ip6))
448 			goto out;
449 
450 		m_copydata(m, 0, sizeof(ip6), &ip6);
451 		if (var->iv_psrc->sa_family != AF_INET6 ||
452 		    var->iv_pdst->sa_family != AF_INET6)
453 			goto out;
454 		ret = ipsecif6_encap_func(m, &ip6, var);
455 		break;
456 	}
457 #endif
458 	default:
459 		goto out;
460 	}
461 
462 out:
463 	if (var != NULL)
464 		if_ipsec_putref_variant(var, &psref);
465 	return ret;
466 }
467 
468 /*
469  * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
470  * We'll prevent this by introducing upper limit.
471  */
472 static int
473 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
474 {
475 
476 	return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
477 }
478 
479 int
480 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
481     const struct rtentry *rt)
482 {
483 	struct ipsec_softc *sc = ifp->if_softc;
484 	struct ipsec_variant *var;
485 	struct psref psref;
486 	int error;
487 	int bound;
488 
489 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
490 
491 	error = if_ipsec_check_nesting(ifp, m);
492 	if (error) {
493 		m_freem(m);
494 		goto noref_end;
495 	}
496 
497 	if ((ifp->if_flags & IFF_UP) == 0) {
498 		m_freem(m);
499 		error = ENETDOWN;
500 		goto noref_end;
501 	}
502 
503 
504 	bound = curlwp_bind();
505 	var = if_ipsec_getref_variant(sc, &psref);
506 	if (if_ipsec_variant_is_unconfigured(var)) {
507 		m_freem(m);
508 		error = ENETDOWN;
509 		goto end;
510 	}
511 
512 	m->m_flags &= ~(M_BCAST|M_MCAST);
513 
514 	/* use DLT_NULL encapsulation here to pass inner af type */
515 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
516 	if (!m) {
517 		error = ENOBUFS;
518 		goto end;
519 	}
520 	*mtod(m, int *) = dst->sa_family;
521 
522 #if INET6
523 	/* drop IPv6 packet if IFF_FWD_IPV6 is not set */
524 	if (dst->sa_family == AF_INET6 &&
525 	    !if_ipsec_fwd_ipv6(sc)) {
526 		/*
527 		 * IPv6 packet is not allowed to forward,that is not error.
528 		 */
529 		error = 0;
530 		IF_DROP(&ifp->if_snd);
531 		m_freem(m);
532 		goto end;
533 	}
534 #endif
535 
536 	error = if_ipsec_out_direct(var, m, dst->sa_family);
537 
538 end:
539 	if_ipsec_putref_variant(var, &psref);
540 	curlwp_bindx(bound);
541 noref_end:
542 	if (error)
543 		if_statinc(ifp, if_oerrors);
544 
545 	return error;
546 }
547 
548 static inline int
549 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
550 {
551 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
552 	int error;
553 	int len;
554 
555 	KASSERT(if_ipsec_heldref_variant(var));
556 	KASSERT(var->iv_output != NULL);
557 
558 	len = m->m_pkthdr.len;
559 
560 	/* input DLT_NULL frame to BPF */
561 	bpf_mtap(ifp, m, BPF_D_OUT);
562 
563 	/* grab and chop off inner af type */
564 	/* XXX need pullup? */
565 	m_adj(m, sizeof(int));
566 
567 	error = var->iv_output(var, family, m);
568 	if (error)
569 		return error;
570 
571 	if_statadd2(ifp, if_opackets, 1, if_obytes, len);
572 
573 	return 0;
574 }
575 
576 void
577 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
578 {
579 
580 	KASSERT(ifp != NULL);
581 
582 	m_set_rcvif(m, ifp);
583 
584 	bpf_mtap_af(ifp, af, m, BPF_D_IN);
585 
586 	if_ipsec_in_enqueue(m, af, ifp);
587 
588 	return;
589 }
590 
591 static inline void
592 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
593 {
594 	pktqueue_t *pktq;
595 	int pktlen;
596 
597 	/*
598 	 * Put the packet to the network layer input queue according to the
599 	 * specified address family.
600 	 */
601 	switch (af) {
602 #ifdef INET
603 	case AF_INET:
604 		pktq = ip_pktq;
605 		break;
606 #endif
607 #ifdef INET6
608 	case AF_INET6:
609 		pktq = ip6_pktq;
610 		break;
611 #endif
612 	default:
613 		if_statinc(ifp, if_ierrors);
614 		m_freem(m);
615 		return;
616 	}
617 
618 #if 1
619 	const u_int h = curcpu()->ci_index;
620 #else
621 	const uint32_t h = pktq_rps_hash(m);
622 #endif
623 	pktlen = m->m_pkthdr.len;
624 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
625 		if_statadd2(ifp, if_ibytes, pktlen, if_ipackets, 1);
626 	} else {
627 		if_statinc(ifp, if_iqdrops);
628 		m_freem(m);
629 	}
630 
631 	return;
632 }
633 
634 static inline int
635 if_ipsec_check_salen(struct sockaddr *addr)
636 {
637 
638 	switch (addr->sa_family) {
639 #ifdef INET
640 	case AF_INET:
641 		if (addr->sa_len != sizeof(struct sockaddr_in))
642 			return EINVAL;
643 		break;
644 #endif /* INET */
645 #ifdef INET6
646 	case AF_INET6:
647 		if (addr->sa_len != sizeof(struct sockaddr_in6))
648 			return EINVAL;
649 		break;
650 #endif /* INET6 */
651 	default:
652 		return EAFNOSUPPORT;
653 	}
654 
655 	return 0;
656 }
657 
658 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
659 int
660 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
661 {
662 	struct ipsec_softc *sc  = ifp->if_softc;
663 	struct ipsec_variant *var = NULL;
664 	struct ifreq     *ifr = (struct ifreq*)data;
665 	struct ifaddr    *ifa = (struct ifaddr*)data;
666 	int error = 0, size;
667 	struct sockaddr *dst, *src;
668 	u_long mtu;
669 	u_short oflags = ifp->if_flags;
670 	int bound;
671 	struct psref psref;
672 
673 	switch (cmd) {
674 	case SIOCINITIFADDR:
675 		ifp->if_flags |= IFF_UP;
676 		ifa->ifa_rtrequest = p2p_rtrequest;
677 		break;
678 
679 	case SIOCSIFDSTADDR:
680 		break;
681 
682 	case SIOCADDMULTI:
683 	case SIOCDELMULTI:
684 		switch (ifr->ifr_addr.sa_family) {
685 #ifdef INET
686 		case AF_INET:	/* IP supports Multicast */
687 			break;
688 #endif /* INET */
689 #ifdef INET6
690 		case AF_INET6:	/* IP6 supports Multicast */
691 			break;
692 #endif /* INET6 */
693 		default:  /* Other protocols doesn't support Multicast */
694 			error = EAFNOSUPPORT;
695 			break;
696 		}
697 		break;
698 
699 	case SIOCSIFMTU:
700 		mtu = ifr->ifr_mtu;
701 		if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
702 			return EINVAL;
703 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
704 			error = 0;
705 		break;
706 
707 #ifdef INET
708 	case SIOCSIFPHYADDR:
709 #endif
710 #ifdef INET6
711 	case SIOCSIFPHYADDR_IN6:
712 #endif /* INET6 */
713 	case SIOCSLIFPHYADDR:
714 		switch (cmd) {
715 #ifdef INET
716 		case SIOCSIFPHYADDR:
717 			src = (struct sockaddr *)
718 				&(((struct in_aliasreq *)data)->ifra_addr);
719 			dst = (struct sockaddr *)
720 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
721 			break;
722 #endif /* INET */
723 #ifdef INET6
724 		case SIOCSIFPHYADDR_IN6:
725 			src = (struct sockaddr *)
726 				&(((struct in6_aliasreq *)data)->ifra_addr);
727 			dst = (struct sockaddr *)
728 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
729 			break;
730 #endif /* INET6 */
731 		case SIOCSLIFPHYADDR:
732 			src = (struct sockaddr *)
733 				&(((struct if_laddrreq *)data)->addr);
734 			dst = (struct sockaddr *)
735 				&(((struct if_laddrreq *)data)->dstaddr);
736 			break;
737 		default:
738 			return EINVAL;
739 		}
740 
741 		/* sa_family must be equal */
742 		if (src->sa_family != dst->sa_family)
743 			return EINVAL;
744 
745 		error = if_ipsec_check_salen(src);
746 		if (error)
747 			return error;
748 		error = if_ipsec_check_salen(dst);
749 		if (error)
750 			return error;
751 
752 		/* check sa_family looks sane for the cmd */
753 		switch (cmd) {
754 #ifdef INET
755 		case SIOCSIFPHYADDR:
756 			if (src->sa_family == AF_INET)
757 				break;
758 			return EAFNOSUPPORT;
759 #endif /* INET */
760 #ifdef INET6
761 		case SIOCSIFPHYADDR_IN6:
762 			if (src->sa_family == AF_INET6)
763 				break;
764 			return EAFNOSUPPORT;
765 #endif /* INET6 */
766 		case SIOCSLIFPHYADDR:
767 			/* checks done in the above */
768 			break;
769 		}
770 		/*
771 		 * calls if_ipsec_getref_variant() for other softcs to check
772 		 * address pair duplicattion
773 		 */
774 		bound = curlwp_bind();
775 		error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
776 		if (error)
777 			goto bad;
778 		if_link_state_change(&sc->ipsec_if, LINK_STATE_UP);
779 		curlwp_bindx(bound);
780 		break;
781 
782 	case SIOCDIFPHYADDR:
783 		bound = curlwp_bind();
784 		if_ipsec_delete_tunnel(&sc->ipsec_if);
785 		if_link_state_change(&sc->ipsec_if, LINK_STATE_DOWN);
786 		curlwp_bindx(bound);
787 		break;
788 
789 	case SIOCGIFPSRCADDR:
790 #ifdef INET6
791 	case SIOCGIFPSRCADDR_IN6:
792 #endif /* INET6 */
793 		bound = curlwp_bind();
794 		var = if_ipsec_getref_variant(sc, &psref);
795 		if (var->iv_psrc == NULL) {
796 			error = EADDRNOTAVAIL;
797 			goto bad;
798 		}
799 		src = var->iv_psrc;
800 		switch (cmd) {
801 #ifdef INET
802 		case SIOCGIFPSRCADDR:
803 			dst = &ifr->ifr_addr;
804 			size = sizeof(ifr->ifr_addr);
805 			break;
806 #endif /* INET */
807 #ifdef INET6
808 		case SIOCGIFPSRCADDR_IN6:
809 			dst = (struct sockaddr *)
810 				&(((struct in6_ifreq *)data)->ifr_addr);
811 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
812 			break;
813 #endif /* INET6 */
814 		default:
815 			error = EADDRNOTAVAIL;
816 			goto bad;
817 		}
818 		if (src->sa_len > size) {
819 			error = EINVAL;
820 			goto bad;
821 		}
822 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
823 		if (error)
824 			goto bad;
825 		if_ipsec_putref_variant(var, &psref);
826 		curlwp_bindx(bound);
827 		break;
828 
829 	case SIOCGIFPDSTADDR:
830 #ifdef INET6
831 	case SIOCGIFPDSTADDR_IN6:
832 #endif /* INET6 */
833 		bound = curlwp_bind();
834 		var = if_ipsec_getref_variant(sc, &psref);
835 		if (var->iv_pdst == NULL) {
836 			error = EADDRNOTAVAIL;
837 			goto bad;
838 		}
839 		src = var->iv_pdst;
840 		switch (cmd) {
841 #ifdef INET
842 		case SIOCGIFPDSTADDR:
843 			dst = &ifr->ifr_addr;
844 			size = sizeof(ifr->ifr_addr);
845 			break;
846 #endif /* INET */
847 #ifdef INET6
848 		case SIOCGIFPDSTADDR_IN6:
849 			dst = (struct sockaddr *)
850 				&(((struct in6_ifreq *)data)->ifr_addr);
851 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
852 			break;
853 #endif /* INET6 */
854 		default:
855 			error = EADDRNOTAVAIL;
856 			goto bad;
857 		}
858 		if (src->sa_len > size) {
859 			error = EINVAL;
860 			goto bad;
861 		}
862 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
863 		if (error)
864 			goto bad;
865 		if_ipsec_putref_variant(var, &psref);
866 		curlwp_bindx(bound);
867 		break;
868 
869 	case SIOCGLIFPHYADDR:
870 		bound = curlwp_bind();
871 		var = if_ipsec_getref_variant(sc, &psref);
872 		if (if_ipsec_variant_is_unconfigured(var)) {
873 			error = EADDRNOTAVAIL;
874 			goto bad;
875 		}
876 
877 		/* copy src */
878 		src = var->iv_psrc;
879 		dst = (struct sockaddr *)
880 			&(((struct if_laddrreq *)data)->addr);
881 		size = sizeof(((struct if_laddrreq *)data)->addr);
882 		if (src->sa_len > size) {
883 			error = EINVAL;
884 			goto bad;
885 		}
886 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
887 		if (error)
888 			goto bad;
889 
890 		/* copy dst */
891 		src = var->iv_pdst;
892 		dst = (struct sockaddr *)
893 			&(((struct if_laddrreq *)data)->dstaddr);
894 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
895 		if (src->sa_len > size) {
896 			error = EINVAL;
897 			goto bad;
898 		}
899 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
900 		if (error)
901 			goto bad;
902 		if_ipsec_putref_variant(var, &psref);
903 		curlwp_bindx(bound);
904 		break;
905 
906 	default:
907 		error = ifioctl_common(ifp, cmd, data);
908 		if (!error) {
909 			bound = curlwp_bind();
910 			error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
911 			if (error)
912 				goto bad;
913 			curlwp_bindx(bound);
914 		}
915 		break;
916 	}
917 	return error;
918 
919 bad:
920 	if (var != NULL)
921 		if_ipsec_putref_variant(var, &psref);
922 	curlwp_bindx(bound);
923 
924 	return error;
925 }
926 
927 struct encap_funcs {
928 #ifdef INET
929 	int (*ef_inet)(struct ipsec_variant *);
930 #endif
931 #ifdef INET6
932 	int (*ef_inet6)(struct ipsec_variant *);
933 #endif
934 };
935 
936 static struct encap_funcs ipsec_encap_attach = {
937 #ifdef INET
938 	.ef_inet = ipsecif4_attach,
939 #endif
940 #ifdef INET6
941 	.ef_inet6 = &ipsecif6_attach,
942 #endif
943 };
944 
945 static struct encap_funcs ipsec_encap_detach = {
946 #ifdef INET
947 	.ef_inet = ipsecif4_detach,
948 #endif
949 #ifdef INET6
950 	.ef_inet6 = &ipsecif6_detach,
951 #endif
952 };
953 
954 static int
955 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
956 {
957 	int error;
958 
959 	KASSERT(var != NULL);
960 	KASSERT(if_ipsec_variant_is_configured(var));
961 
962 	switch (var->iv_psrc->sa_family) {
963 #ifdef INET
964 	case AF_INET:
965 		error = (funcs->ef_inet)(var);
966 		break;
967 #endif /* INET */
968 #ifdef INET6
969 	case AF_INET6:
970 		error = (funcs->ef_inet6)(var);
971 		break;
972 #endif /* INET6 */
973 	default:
974 		error = EINVAL;
975 		break;
976 	}
977 
978 	return error;
979 }
980 
981 static int
982 if_ipsec_encap_attach(struct ipsec_variant *var)
983 {
984 
985 	return if_ipsec_encap_common(var, &ipsec_encap_attach);
986 }
987 
988 static int
989 if_ipsec_encap_detach(struct ipsec_variant *var)
990 {
991 
992 	return if_ipsec_encap_common(var, &ipsec_encap_detach);
993 }
994 
995 /*
996  * Validate and set ipsec(4) I/F configurations.
997  *     (1) validate
998  *         (1-1) Check the argument src and dst address pair will change
999  *               configuration from current src and dst address pair.
1000  *         (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1001  *               with argument src and dst address pair, except for NAT-T shared
1002  *               tunnels.
1003  *     (2) set
1004  *         (2-1) Create variant for new configuration.
1005  *         (2-2) Create temporary "null" variant used to avoid to access
1006  *               dangling variant while SPs are deleted and added.
1007  *         (2-3) Swap variant include its SPs.
1008  *         (2-4) Cleanup last configurations.
1009  */
1010 static int
1011 if_ipsec_set_tunnel(struct ifnet *ifp,
1012     struct sockaddr *src, struct sockaddr *dst)
1013 {
1014 	struct ipsec_softc *sc = ifp->if_softc;
1015 	struct ipsec_softc *sc2;
1016 	struct ipsec_variant *ovar, *nvar, *nullvar;
1017 	struct sockaddr *osrc, *odst;
1018 	struct sockaddr *nsrc, *ndst;
1019 	in_port_t nsport = 0, ndport = 0;
1020 	int error;
1021 
1022 	error = encap_lock_enter();
1023 	if (error)
1024 		return error;
1025 
1026 	nsrc = sockaddr_dup(src, M_WAITOK);
1027 	ndst = sockaddr_dup(dst, M_WAITOK);
1028 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1029 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1030 
1031 	mutex_enter(&sc->ipsec_lock);
1032 
1033 	ovar = sc->ipsec_var;
1034 
1035 	switch(nsrc->sa_family) {
1036 #ifdef INET
1037 	case AF_INET:
1038 		nsport = satosin(src)->sin_port;
1039 		/*
1040 		 * avoid confuse SP when NAT-T disabled,
1041 		 * e.g.
1042 		 *     expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
1043 		 *     confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
1044 		 */
1045 		satosin(nsrc)->sin_port = 0;
1046 		ndport = satosin(dst)->sin_port;
1047 		satosin(ndst)->sin_port = 0;
1048 		break;
1049 #endif /* INET */
1050 #ifdef INET6
1051 	case AF_INET6:
1052 		nsport = satosin6(src)->sin6_port;
1053 		satosin6(nsrc)->sin6_port = 0;
1054 		ndport = satosin6(dst)->sin6_port;
1055 		satosin6(ndst)->sin6_port = 0;
1056 		break;
1057 #endif /* INET6 */
1058 	default:
1059 		log(LOG_DEBUG,
1060 		    "%s: Invalid address family: %d.\n",
1061 		    __func__, src->sa_family);
1062 		error = EINVAL;
1063 		goto out;
1064 	}
1065 
1066 	/*
1067 	 * (1-1) Check the argument src and dst address pair will change
1068 	 *       configuration from current src and dst address pair.
1069 	 */
1070 	if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
1071 	    (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
1072 	    (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
1073 		/* address and port pair not changed. */
1074 		error = 0;
1075 		goto out;
1076 	}
1077 
1078 	/*
1079 	 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1080 	 *       with argument src and dst address pair, except for NAT-T shared
1081 	 *       tunnels.
1082 	 */
1083 	mutex_enter(&ipsec_softcs.lock);
1084 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1085 		struct ipsec_variant *var2;
1086 		struct psref psref;
1087 
1088 		if (sc2 == sc)
1089 			continue;
1090 		var2 = if_ipsec_getref_variant(sc2, &psref);
1091 		if (if_ipsec_variant_is_unconfigured(var2)) {
1092 			if_ipsec_putref_variant(var2, &psref);
1093 			continue;
1094 		}
1095 		if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
1096 			if_ipsec_putref_variant(var2, &psref);
1097 			continue; /* NAT-T shared tunnel */
1098 		}
1099 		if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
1100 		    sockaddr_cmp(var2->iv_psrc, src) == 0) {
1101 			if_ipsec_putref_variant(var2, &psref);
1102 			mutex_exit(&ipsec_softcs.lock);
1103 			error = EADDRNOTAVAIL;
1104 			goto out;
1105 		}
1106 
1107 		if_ipsec_putref_variant(var2, &psref);
1108 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
1109 	}
1110 	mutex_exit(&ipsec_softcs.lock);
1111 
1112 
1113 	osrc = ovar->iv_psrc;
1114 	odst = ovar->iv_pdst;
1115 
1116 	/*
1117 	 * (2-1) Create ipsec_variant for new configuration.
1118 	 */
1119 	if_ipsec_copy_variant(nvar, ovar);
1120 	nvar->iv_psrc = nsrc;
1121 	nvar->iv_pdst = ndst;
1122 	nvar->iv_sport = nsport;
1123 	nvar->iv_dport = ndport;
1124 	nvar->iv_encap_cookie4 = NULL;
1125 	nvar->iv_encap_cookie6 = NULL;
1126 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1127 	error = if_ipsec_encap_attach(nvar);
1128 	if (error)
1129 		goto out;
1130 
1131 	/*
1132 	 * (2-2) Create temporary "null" variant.
1133 	 */
1134 	if_ipsec_copy_variant(nullvar, ovar);
1135 	if_ipsec_clear_config(nullvar);
1136 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1137 	/*
1138 	 * (2-3) Swap variant include its SPs.
1139 	 */
1140 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1141 	if (error) {
1142 		if_ipsec_encap_detach(nvar);
1143 		goto out;
1144 	}
1145 
1146 	mutex_exit(&sc->ipsec_lock);
1147 
1148 	/*
1149 	 * (2-4) Cleanup last configurations.
1150 	 */
1151 	if (if_ipsec_variant_is_configured(ovar))
1152 		if_ipsec_encap_detach(ovar);
1153 	encap_lock_exit();
1154 
1155 	if (osrc != NULL)
1156 		sockaddr_free(osrc);
1157 	if (odst != NULL)
1158 		sockaddr_free(odst);
1159 	kmem_free(ovar, sizeof(*ovar));
1160 	kmem_free(nullvar, sizeof(*nullvar));
1161 
1162 	return 0;
1163 
1164 out:
1165 	mutex_exit(&sc->ipsec_lock);
1166 	encap_lock_exit();
1167 
1168 	sockaddr_free(nsrc);
1169 	sockaddr_free(ndst);
1170 	kmem_free(nvar, sizeof(*nvar));
1171 	kmem_free(nullvar, sizeof(*nullvar));
1172 
1173 	return error;
1174 }
1175 
1176 /*
1177  * Validate and delete ipsec(4) I/F configurations.
1178  *     (1) validate
1179  *         (1-1) Check current src and dst address pair are null,
1180  *               which means the ipsec(4) I/F is already done deletetunnel.
1181  *     (2) delete
1182  *         (2-1) Create variant for deleted status.
1183  *         (2-2) Create temporary "null" variant used to avoid to access
1184  *               dangling variant while SPs are deleted and added.
1185  *               NOTE:
1186  *               The contents of temporary "null" variant equal to the variant
1187  *               of (2-1), however two psref_target_destroy() synchronization
1188  *               points are necessary to avoid to access dangling variant
1189  *               while SPs are deleted and added. To implement that simply,
1190  *               we use the same manner as if_ipsec_set_tunnel(), that is,
1191  *               create extra "null" variant and use it temporarily.
1192  *         (2-3) Swap variant include its SPs.
1193  *         (2-4) Cleanup last configurations.
1194  */
1195 static void
1196 if_ipsec_delete_tunnel(struct ifnet *ifp)
1197 {
1198 	struct ipsec_softc *sc = ifp->if_softc;
1199 	struct ipsec_variant *ovar, *nvar, *nullvar;
1200 	struct sockaddr *osrc, *odst;
1201 	int error;
1202 
1203 	error = encap_lock_enter();
1204 	if (error)
1205 		return;
1206 
1207 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1208 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1209 
1210 	mutex_enter(&sc->ipsec_lock);
1211 
1212 	ovar = sc->ipsec_var;
1213 	osrc = ovar->iv_psrc;
1214 	odst = ovar->iv_pdst;
1215 	/*
1216 	 * (1-1) Check current src and dst address pair are null,
1217 	 *       which means the ipsec(4) I/F is already done deletetunnel.
1218 	 */
1219 	if (osrc == NULL || odst == NULL) {
1220 		/* address pair not changed. */
1221 		mutex_exit(&sc->ipsec_lock);
1222 		encap_lock_exit();
1223 		kmem_free(nvar, sizeof(*nvar));
1224 		kmem_free(nullvar, sizeof(*nullvar));
1225 		return;
1226 	}
1227 
1228 	/*
1229 	 * (2-1) Create variant for deleted status.
1230 	 */
1231 	if_ipsec_copy_variant(nvar, ovar);
1232 	if_ipsec_clear_config(nvar);
1233 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1234 
1235 	/*
1236 	 * (2-2) Create temporary "null" variant used to avoid to access
1237 	 *       dangling variant while SPs are deleted and added.
1238 	 */
1239 	if_ipsec_copy_variant(nullvar, ovar);
1240 	if_ipsec_clear_config(nullvar);
1241 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1242 	/*
1243 	 * (2-3) Swap variant include its SPs.
1244 	 */
1245 	/* if_ipsec_update_variant() does not fail when delete SP only. */
1246 	(void)if_ipsec_update_variant(sc, nvar, nullvar);
1247 
1248 	mutex_exit(&sc->ipsec_lock);
1249 
1250 	/*
1251 	 * (2-4) Cleanup last configurations.
1252 	 */
1253 	if (if_ipsec_variant_is_configured(ovar))
1254 		if_ipsec_encap_detach(ovar);
1255 	encap_lock_exit();
1256 
1257 	sockaddr_free(osrc);
1258 	sockaddr_free(odst);
1259 	kmem_free(ovar, sizeof(*ovar));
1260 	kmem_free(nullvar, sizeof(*nullvar));
1261 }
1262 
1263 /*
1264  * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1265  *     (1) check
1266  *         (1-1) Check flags are changed.
1267  *         (1-2) Check current src and dst address pair. If they are null,
1268  *               that means the ipsec(4) I/F is deletetunnel'ed, so it is
1269  *               not needed to update.
1270  *     (2) update
1271  *         (2-1) Create variant for new SPs.
1272  *         (2-2) Create temporary "null" variant used to avoid to access
1273  *               dangling variant while SPs are deleted and added.
1274  *               NOTE:
1275  *               There is the same problem as if_ipsec_delete_tunnel().
1276  *         (2-3) Swap variant include its SPs.
1277  *         (2-4) Cleanup unused configurations.
1278  *               NOTE: use the same encap_cookies.
1279  */
1280 static int
1281 if_ipsec_ensure_flags(struct ifnet *ifp, u_short oflags)
1282 {
1283 	struct ipsec_softc *sc = ifp->if_softc;
1284 	struct ipsec_variant *ovar, *nvar, *nullvar;
1285 	int error;
1286 
1287 	/*
1288 	 * (1) Check flags are changed.
1289 	 */
1290 	if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1291 	    (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1292 		return 0; /* flags not changed. */
1293 
1294 	error = encap_lock_enter();
1295 	if (error)
1296 		return error;
1297 
1298 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1299 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1300 
1301 	mutex_enter(&sc->ipsec_lock);
1302 
1303 	ovar = sc->ipsec_var;
1304 	/*
1305 	 * (1-2) Check current src and dst address pair.
1306 	 */
1307 	if (if_ipsec_variant_is_unconfigured(ovar)) {
1308 		/* nothing to do */
1309 		mutex_exit(&sc->ipsec_lock);
1310 		encap_lock_exit();
1311 		kmem_free(nvar, sizeof(*nvar));
1312 		kmem_free(nullvar, sizeof(*nullvar));
1313 		return 0;
1314 	}
1315 
1316 	/*
1317 	 * (2-1) Create variant for new SPs.
1318 	 */
1319 	if_ipsec_copy_variant(nvar, ovar);
1320 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1321 	/*
1322 	 * (2-2) Create temporary "null" variant used to avoid to access
1323 	 *       dangling variant while SPs are deleted and added.
1324 	 */
1325 	if_ipsec_copy_variant(nullvar, ovar);
1326 	if_ipsec_clear_config(nullvar);
1327 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1328 	/*
1329 	 * (2-3) Swap variant include its SPs.
1330 	 */
1331 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1332 
1333 	mutex_exit(&sc->ipsec_lock);
1334 	encap_lock_exit();
1335 
1336 	/*
1337 	 * (2-4) Cleanup unused configurations.
1338 	 */
1339 	if (!error)
1340 		kmem_free(ovar, sizeof(*ovar));
1341 	else
1342 		kmem_free(nvar, sizeof(*ovar));
1343 	kmem_free(nullvar, sizeof(*nullvar));
1344 
1345 	return error;
1346 }
1347 
1348 /*
1349  * SPD management
1350  */
1351 
1352 /*
1353  * Share SP set with other NAT-T ipsec(4) I/F(s).
1354  *     Return 1, when "var" shares SP set.
1355  *     Return 0, when "var" cannot share SP set.
1356  *
1357  * NOTE:
1358  * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1359  * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1360  * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1361  * set_tunnel causes race.
1362  * Currently, (fortunately) encap_lock works as this global lock.
1363  */
1364 static int
1365 if_ipsec_share_sp(struct ipsec_variant *var)
1366 {
1367 	struct ipsec_softc *sc = var->iv_softc;
1368 	struct ipsec_softc *sc2;
1369 	struct ipsec_variant *var2;
1370 	struct psref psref;
1371 
1372 	KASSERT(encap_lock_held());
1373 	KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL);
1374 
1375 	mutex_enter(&ipsec_softcs.lock);
1376 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1377 		if (sc2 == sc)
1378 			continue;
1379 		var2 = if_ipsec_getref_variant(sc2, &psref);
1380 		if (if_ipsec_variant_is_unconfigured(var2)) {
1381 			if_ipsec_putref_variant(var2, &psref);
1382 			continue;
1383 		}
1384 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1385 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1386 			if_ipsec_putref_variant(var2, &psref);
1387 			continue;
1388 		}
1389 
1390 		break;
1391 	}
1392 	mutex_exit(&ipsec_softcs.lock);
1393 	if (sc2 == NULL)
1394 		return 0; /* not shared */
1395 
1396 	IV_SP_IN(var) = IV_SP_IN(var2);
1397 	IV_SP_IN6(var) = IV_SP_IN6(var2);
1398 	IV_SP_OUT(var) = IV_SP_OUT(var2);
1399 	IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1400 
1401 	if_ipsec_putref_variant(var2, &psref);
1402 	return 1; /* shared */
1403 }
1404 
1405 /*
1406  * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1407  *     Return 1, when "var" shared SP set, and then unshare them.
1408  *     Return 0, when "var" did not share SP set.
1409  *
1410  * NOTE:
1411  * See if_ipsec_share_sp()'s note.
1412  */
1413 static int
1414 if_ipsec_unshare_sp(struct ipsec_variant *var)
1415 {
1416 	struct ipsec_softc *sc = var->iv_softc;
1417 	struct ipsec_softc *sc2;
1418 	struct ipsec_variant *var2;
1419 	struct psref psref;
1420 
1421 	KASSERT(encap_lock_held());
1422 
1423 	if (!var->iv_pdst || !var->iv_psrc)
1424 		return 0;
1425 
1426 	mutex_enter(&ipsec_softcs.lock);
1427 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1428 		if (sc2 == sc)
1429 			continue;
1430 		var2 = if_ipsec_getref_variant(sc2, &psref);
1431 		if (!var2->iv_pdst || !var2->iv_psrc) {
1432 			if_ipsec_putref_variant(var2, &psref);
1433 			continue;
1434 		}
1435 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1436 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1437 			if_ipsec_putref_variant(var2, &psref);
1438 			continue;
1439 		}
1440 
1441 		break;
1442 	}
1443 	mutex_exit(&ipsec_softcs.lock);
1444 	if (sc2 == NULL)
1445 		return 0; /* not shared */
1446 
1447 	IV_SP_IN(var) = NULL;
1448 	IV_SP_IN6(var) = NULL;
1449 	IV_SP_OUT(var) = NULL;
1450 	IV_SP_OUT6(var) = NULL;
1451 	if_ipsec_putref_variant(var2, &psref);
1452 	return 1; /* shared */
1453 }
1454 
1455 static inline void
1456 if_ipsec_add_mbuf_optalign(struct mbuf *m0, void *data, size_t len, bool align)
1457 {
1458 	struct mbuf *m;
1459 
1460 	MGET(m, M_WAIT, MT_DATA);
1461 	if (align) {
1462 		m->m_len = PFKEY_ALIGN8(len);
1463 		memset(mtod(m, void *), 0, m->m_len);
1464 	} else
1465 		m->m_len = len;
1466 	m_copyback(m, 0, len, data);
1467 	m_cat(m0, m);
1468 }
1469 
1470 static inline void
1471 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1472 {
1473 
1474 	if_ipsec_add_mbuf_optalign(m0, data, len, true);
1475 }
1476 
1477 static inline void
1478 if_ipsec_add_mbuf_addr_port(struct mbuf *m0, struct sockaddr *addr, in_port_t port, bool align)
1479 {
1480 
1481 	if (port == 0) {
1482 		if_ipsec_add_mbuf_optalign(m0, addr, addr->sa_len, align);
1483 	} else {
1484 		union sockaddr_union addrport_u;
1485 		struct sockaddr *addrport = &addrport_u.sa;
1486 
1487 		if_ipsec_set_addr_port(addrport, addr, port);
1488 		if_ipsec_add_mbuf_optalign(m0, addrport, addrport->sa_len, align);
1489 	}
1490 }
1491 
1492 static inline void
1493 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1494 {
1495 	struct mbuf *m;
1496 
1497 	if (len == 0)
1498 		return;
1499 
1500 	MGET(m, M_WAIT, MT_DATA);
1501 	m->m_len = len;
1502 	memset(mtod(m, void *), 0, m->m_len);
1503 	m_cat(m0, m);
1504 }
1505 
1506 static inline size_t
1507 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1508     int proto, uint16_t exttype)
1509 {
1510 	size_t size;
1511 
1512 	KASSERT(saaddr != NULL);
1513 	KASSERT(addr != NULL);
1514 
1515 	size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1516 	saaddr->sadb_address_len = PFKEY_UNIT64(size);
1517 	saaddr->sadb_address_exttype = exttype;
1518 	saaddr->sadb_address_proto = proto;
1519 	switch (addr->sa_family) {
1520 #ifdef INET
1521 	case AF_INET:
1522 		saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1523 		break;
1524 #endif /* INET */
1525 #ifdef INET6
1526 	case AF_INET6:
1527 		saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1528 		break;
1529 #endif /* INET6 */
1530 	default:
1531 		log(LOG_DEBUG,
1532 		    "%s: Invalid address family: %d.\n",
1533 		    __func__, addr->sa_family);
1534 		break;
1535 	}
1536 	saaddr->sadb_address_reserved = 0;
1537 
1538 	return size;
1539 }
1540 
1541 static inline size_t
1542 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1543     int proto)
1544 {
1545 
1546 	return if_ipsec_set_sadb_addr(sasrc, src, proto,
1547 	    SADB_EXT_ADDRESS_SRC);
1548 }
1549 
1550 static inline size_t
1551 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1552     int proto)
1553 {
1554 
1555 	return if_ipsec_set_sadb_addr(sadst, dst, proto,
1556 	    SADB_EXT_ADDRESS_DST);
1557 }
1558 
1559 static inline size_t
1560 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1561     struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1562     uint8_t level, struct sockaddr *src, struct sockaddr *dst)
1563 {
1564 	size_t size;
1565 
1566 	KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1567 
1568 	size = sizeof(*xpl);
1569 	if (policy == IPSEC_POLICY_IPSEC) {
1570 		size += PFKEY_ALIGN8(sizeof(*xisr));
1571 		if (src != NULL && dst != NULL)
1572 			size += PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1573 	}
1574 	xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1575 	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1576 	xpl->sadb_x_policy_type = policy;
1577 	xpl->sadb_x_policy_dir = dir;
1578 	xpl->sadb_x_policy_reserved = 0;
1579 	xpl->sadb_x_policy_id = id;
1580 	xpl->sadb_x_policy_reserved2 = 0;
1581 
1582 	if (policy == IPSEC_POLICY_IPSEC) {
1583 		xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1584 		if (src != NULL && dst != NULL)
1585 			xisr->sadb_x_ipsecrequest_len +=
1586 				PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1587 		xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1588 		xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1589 		xisr->sadb_x_ipsecrequest_level = level;
1590 		if (level == IPSEC_LEVEL_UNIQUE)
1591 			xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
1592 		else
1593 			xisr->sadb_x_ipsecrequest_reqid = 0;
1594 	}
1595 
1596 	return size;
1597 }
1598 
1599 static inline void
1600 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1601 {
1602 
1603 	KASSERT(msg != NULL);
1604 
1605 	msg->sadb_msg_version = PF_KEY_V2;
1606 	msg->sadb_msg_type = msgtype;
1607 	msg->sadb_msg_errno = 0;
1608 	msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1609 	msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1610 	msg->sadb_msg_reserved = 0;
1611 	msg->sadb_msg_seq = 0; /* XXXX */
1612 	msg->sadb_msg_pid = 0; /* XXXX */
1613 }
1614 
1615 static inline void
1616 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1617 {
1618 
1619 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1620 }
1621 
1622 static inline void
1623 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1624 {
1625 
1626 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1627 }
1628 
1629 static int
1630 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1631     in_port_t port)
1632 {
1633 	int error = 0;
1634 
1635 	sockaddr_copy(addrport, addr->sa_len, addr);
1636 
1637 	switch (addr->sa_family) {
1638 #ifdef INET
1639 	case AF_INET: {
1640 		struct sockaddr_in *sin = satosin(addrport);
1641 		sin->sin_port = port;
1642 		break;
1643 	}
1644 #endif /* INET */
1645 #ifdef INET6
1646 	case AF_INET6: {
1647 		struct sockaddr_in6 *sin6 = satosin6(addrport);
1648 		sin6->sin6_port = port;
1649 		break;
1650 	}
1651 #endif /* INET6 */
1652 	default:
1653 		log(LOG_DEBUG,
1654 		    "%s: Invalid address family: %d.\n",
1655 		    __func__, addr->sa_family);
1656 		error = EINVAL;
1657 	}
1658 
1659 	return error;
1660 }
1661 
1662 static struct secpolicy *
1663 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1664     struct sockaddr *dst, in_port_t dport,
1665     int dir, int proto, int level, u_int policy)
1666 {
1667 	struct sadb_msg msg;
1668 	struct sadb_address xsrc, xdst;
1669 	struct sadb_x_policy xpl;
1670 	struct sadb_x_ipsecrequest xisr;
1671 	size_t size;
1672 	size_t padlen;
1673 	uint16_t ext_msg_len = 0;
1674 	struct mbuf *m;
1675 
1676 	memset(&msg, 0, sizeof(msg));
1677 	memset(&xsrc, 0, sizeof(xsrc));
1678 	memset(&xdst, 0, sizeof(xdst));
1679 	memset(&xpl, 0, sizeof(xpl));
1680 	memset(&xisr, 0, sizeof(xisr));
1681 
1682 	MGETHDR(m, M_WAIT, MT_DATA);
1683 
1684 	size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1685 	ext_msg_len += PFKEY_UNIT64(size);
1686 	size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1687 	ext_msg_len += PFKEY_UNIT64(size);
1688 	size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level, NULL, NULL);
1689 	ext_msg_len += PFKEY_UNIT64(size);
1690 	if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1691 
1692 	/* build PF_KEY message */
1693 
1694 	m->m_len = sizeof(msg);
1695 	m_copyback(m, 0, sizeof(msg), &msg);
1696 
1697 	if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1698 	/*
1699 	 * secpolicy.spidx.{src, dst} must not be set port number,
1700 	 * even if it is used for NAT-T.
1701 	 */
1702 	if_ipsec_add_mbuf_addr_port(m, src, 0, true);
1703 	padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1704 		- (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1705 	if_ipsec_add_pad(m, padlen);
1706 
1707 	if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1708 	/* ditto */
1709 	if_ipsec_add_mbuf_addr_port(m, dst, 0, true);
1710 	padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1711 		- (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1712 	if_ipsec_add_pad(m, padlen);
1713 
1714 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1715 	padlen = PFKEY_UNUNIT64(xpl.sadb_x_policy_len) - sizeof(xpl);
1716 	if (policy == IPSEC_POLICY_IPSEC) {
1717 		if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1718 		padlen -= PFKEY_ALIGN8(sizeof(xisr));
1719 	}
1720 	if_ipsec_add_pad(m, padlen);
1721 
1722 	/* key_kpi_spdadd() has already done KEY_SP_REF(). */
1723 	return key_kpi_spdadd(m);
1724 }
1725 
1726 static int
1727 if_ipsec_add_sp(struct ipsec_variant *var,
1728     struct sockaddr *src, in_port_t sport,
1729     struct sockaddr *dst, in_port_t dport)
1730 {
1731 	struct ipsec_softc *sc = var->iv_softc;
1732 	int level;
1733 	u_int v6policy;
1734 
1735 	/*
1736 	 * must delete sp before add it.
1737 	 */
1738 	KASSERT(IV_SP_IN(var) == NULL);
1739 	KASSERT(IV_SP_OUT(var) == NULL);
1740 	KASSERT(IV_SP_IN6(var) == NULL);
1741 	KASSERT(IV_SP_OUT6(var) == NULL);
1742 
1743 	/*
1744 	 * can be shared?
1745 	 */
1746 	if (if_ipsec_share_sp(var))
1747 		return 0;
1748 
1749 	if (if_ipsec_nat_t(sc))
1750 		level = IPSEC_LEVEL_REQUIRE;
1751 	else
1752 		level = IPSEC_LEVEL_UNIQUE;
1753 
1754 	if (if_ipsec_fwd_ipv6(sc))
1755 		v6policy = IPSEC_POLICY_IPSEC;
1756 	else
1757 		v6policy = IPSEC_POLICY_DISCARD;
1758 
1759 	IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1760 	    IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1761 	if (IV_SP_IN(var) == NULL)
1762 		goto fail;
1763 	IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1764 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1765 	if (IV_SP_OUT(var) == NULL)
1766 		goto fail;
1767 	IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1768 	    IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
1769 	if (IV_SP_IN6(var) == NULL)
1770 		goto fail;
1771 	IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1772 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
1773 	if (IV_SP_OUT6(var) == NULL)
1774 		goto fail;
1775 
1776 	return 0;
1777 
1778 fail:
1779 	if (IV_SP_IN6(var) != NULL) {
1780 		if_ipsec_del_sp0(IV_SP_IN6(var));
1781 		IV_SP_IN6(var) = NULL;
1782 	}
1783 	if (IV_SP_OUT(var) != NULL) {
1784 		if_ipsec_del_sp0(IV_SP_OUT(var));
1785 		IV_SP_OUT(var) = NULL;
1786 	}
1787 	if (IV_SP_IN(var) != NULL) {
1788 		if_ipsec_del_sp0(IV_SP_IN(var));
1789 		IV_SP_IN(var) = NULL;
1790 	}
1791 
1792 	return EEXIST;
1793 }
1794 
1795 static int
1796 if_ipsec_del_sp0(struct secpolicy *sp)
1797 {
1798 	struct sadb_msg msg;
1799 	struct sadb_x_policy xpl;
1800 	size_t size;
1801 	uint16_t ext_msg_len = 0;
1802 	int error;
1803 	struct mbuf *m;
1804 
1805 	if (sp == NULL)
1806 		return 0;
1807 
1808 	memset(&msg, 0, sizeof(msg));
1809 	memset(&xpl, 0, sizeof(xpl));
1810 
1811 	MGETHDR(m, M_WAIT, MT_DATA);
1812 
1813 	size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0, NULL, NULL);
1814 	ext_msg_len += PFKEY_UNIT64(size);
1815 
1816 	if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
1817 
1818 	m->m_len = sizeof(msg);
1819 	m_copyback(m, 0, sizeof(msg), &msg);
1820 
1821 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1822 
1823 	/*  unreference correspond to key_kpi_spdadd(). */
1824 	KEY_SP_UNREF(&sp);
1825 	error = key_kpi_spddelete2(m);
1826 	if (error != 0) {
1827 		log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
1828 		    __func__, sp->id, error);
1829 	}
1830 	return error;
1831 }
1832 
1833 static void
1834 if_ipsec_del_sp(struct ipsec_variant *var)
1835 {
1836 
1837 	/* are the SPs shared? */
1838 	if (if_ipsec_unshare_sp(var))
1839 		return;
1840 
1841 	(void)if_ipsec_del_sp0(IV_SP_OUT(var));
1842 	(void)if_ipsec_del_sp0(IV_SP_IN(var));
1843 	(void)if_ipsec_del_sp0(IV_SP_OUT6(var));
1844 	(void)if_ipsec_del_sp0(IV_SP_IN6(var));
1845 	IV_SP_IN(var) = NULL;
1846 	IV_SP_IN6(var) = NULL;
1847 	IV_SP_OUT(var) = NULL;
1848 	IV_SP_OUT6(var) = NULL;
1849 }
1850 
1851 static int
1852 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
1853     struct ipsec_variant *nvar)
1854 {
1855 	in_port_t src_port = 0;
1856 	in_port_t dst_port = 0;
1857 	struct sockaddr *src;
1858 	struct sockaddr *dst;
1859 	int error = 0;
1860 
1861 	KASSERT(mutex_owned(&sc->ipsec_lock));
1862 
1863 	if_ipsec_del_sp(ovar);
1864 
1865 	src = nvar->iv_psrc;
1866 	dst = nvar->iv_pdst;
1867 	if (if_ipsec_nat_t(sc)) {
1868 		/* NAT-T enabled */
1869 		src_port = nvar->iv_sport;
1870 		dst_port = nvar->iv_dport;
1871 	}
1872 	if (src && dst)
1873 		error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
1874 
1875 	return error;
1876 }
1877 
1878 /*
1879  * ipsec_variant and its SPs update API.
1880  *
1881  * Assumption:
1882  * reader side dereferences sc->ipsec_var in reader critical section only,
1883  * that is, all of reader sides do not reader the sc->ipsec_var after
1884  * pserialize_perform().
1885  */
1886 static int
1887 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
1888     struct ipsec_variant *nullvar)
1889 {
1890 	struct ifnet *ifp = &sc->ipsec_if;
1891 	struct ipsec_variant *ovar = sc->ipsec_var;
1892 	int error;
1893 
1894 	KASSERT(mutex_owned(&sc->ipsec_lock));
1895 
1896 	/*
1897 	 * To keep consistency between ipsec(4) I/F settings and SPs,
1898 	 * we stop packet processing while replacing SPs, that is, we set
1899 	 * "null" config variant to sc->ipsec_var.
1900 	 */
1901 	atomic_store_release(&sc->ipsec_var, nullvar);
1902 	pserialize_perform(sc->ipsec_psz);
1903 	psref_target_destroy(&ovar->iv_psref, iv_psref_class);
1904 
1905 	error = if_ipsec_replace_sp(sc, ovar, nvar);
1906 	if (!error)
1907 		atomic_store_release(&sc->ipsec_var, nvar);
1908 	else {
1909 		psref_target_init(&ovar->iv_psref, iv_psref_class);
1910 		atomic_store_release(&sc->ipsec_var, ovar); /* rollback */
1911 	}
1912 
1913 	pserialize_perform(sc->ipsec_psz);
1914 	psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
1915 
1916 	if (if_ipsec_variant_is_configured(sc->ipsec_var))
1917 		ifp->if_flags |= IFF_RUNNING;
1918 	else
1919 		ifp->if_flags &= ~IFF_RUNNING;
1920 
1921 	return error;
1922 }
1923