xref: /netbsd-src/sys/net/if_ipsec.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: if_ipsec.c,v 1.29 2020/03/13 02:43:31 knakahara Exp $  */
2 
3 /*
4  * Copyright (c) 2017 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.29 2020/03/13 02:43:31 knakahara Exp $");
31 
32 #ifdef _KERNEL_OPT
33 #include "opt_inet.h"
34 #endif
35 
36 #include <sys/param.h>
37 #include <sys/atomic.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/errno.h>
44 #include <sys/ioctl.h>
45 #include <sys/time.h>
46 #include <sys/syslog.h>
47 #include <sys/cpu.h>
48 #include <sys/kmem.h>
49 #include <sys/mutex.h>
50 #include <sys/pserialize.h>
51 #include <sys/psref.h>
52 #include <sys/sysctl.h>
53 
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/route.h>
57 #include <net/bpf.h>
58 #include <net/pfkeyv2.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #ifdef	INET
64 #include <netinet/in_var.h>
65 #endif	/* INET */
66 
67 #ifdef INET6
68 #include <netinet6/in6_var.h>
69 #include <netinet/ip6.h>
70 #include <netinet6/ip6_var.h>
71 #endif /* INET6 */
72 
73 #include <netinet/ip_encap.h>
74 
75 #include <net/if_ipsec.h>
76 
77 #include <net/raw_cb.h>
78 #include <net/pfkeyv2.h>
79 
80 #include <netipsec/key.h>
81 #include <netipsec/keydb.h> /* for union sockaddr_union */
82 #include <netipsec/ipsec.h>
83 #include <netipsec/ipsecif.h>
84 
85 static int if_ipsec_clone_create(struct if_clone *, int);
86 static int if_ipsec_clone_destroy(struct ifnet *);
87 
88 static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
89 static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
90 
91 static int if_ipsec_encap_attach(struct ipsec_variant *);
92 static int if_ipsec_encap_detach(struct ipsec_variant *);
93 static int if_ipsec_set_tunnel(struct ifnet *,
94     struct sockaddr *, struct sockaddr *);
95 static void if_ipsec_delete_tunnel(struct ifnet *);
96 static int if_ipsec_ensure_flags(struct ifnet *, u_short);
97 static void if_ipsec_attach0(struct ipsec_softc *);
98 
99 static int if_ipsec_update_variant(struct ipsec_softc *,
100     struct ipsec_variant *, struct ipsec_variant *);
101 
102 /* sadb_msg */
103 static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
104 static inline void if_ipsec_add_pad(struct mbuf *, size_t);
105 static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
106     struct sockaddr *, int, uint16_t);
107 static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
108     struct sockaddr *, int);
109 static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
110     struct sockaddr *, int);
111 static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
112     struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t,
113     struct sockaddr *, struct sockaddr *);
114 static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
115 static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
116 static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
117 /* SPD */
118 static int if_ipsec_share_sp(struct ipsec_variant *);
119 static int if_ipsec_unshare_sp(struct ipsec_variant *);
120 static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
121     in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
122 static inline int if_ipsec_del_sp0(struct secpolicy *);
123 static int if_ipsec_add_sp(struct ipsec_variant *,
124     struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
125 static void if_ipsec_del_sp(struct ipsec_variant *);
126 static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
127     struct ipsec_variant *);
128 
129 static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
130     in_port_t);
131 #define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target)			\
132 	if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
133 #define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target)			\
134 	if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
135 
136 /*
137  * ipsec global variable definitions
138  */
139 
140 /* This list is used in ioctl context only. */
141 static struct {
142 	LIST_HEAD(ipsec_sclist, ipsec_softc) list;
143 	kmutex_t lock;
144 } ipsec_softcs __cacheline_aligned;
145 
146 struct psref_class *iv_psref_class __read_mostly;
147 
148 struct if_clone ipsec_cloner =
149     IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
150 static int max_ipsec_nesting = MAX_IPSEC_NEST;
151 
152 static struct sysctllog *if_ipsec_sysctl;
153 
154 #ifdef INET6
155 static int
156 sysctl_if_ipsec_pmtu_global(SYSCTLFN_ARGS)
157 {
158 	int error, pmtu;
159 	struct sysctlnode node = *rnode;
160 
161 	pmtu = ip6_ipsec_pmtu;
162 	node.sysctl_data = &pmtu;
163 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
164 	if (error || newp == NULL)
165 		return error;
166 
167 	switch (pmtu) {
168 	case IPSEC_PMTU_MINMTU:
169 	case IPSEC_PMTU_OUTERMTU:
170 		ip6_ipsec_pmtu = pmtu;
171 		break;
172 	default:
173 		return EINVAL;
174 	}
175 
176 	return 0;
177 }
178 
179 static int
180 sysctl_if_ipsec_pmtu_perif(SYSCTLFN_ARGS)
181 {
182 	int error, pmtu;
183 	struct sysctlnode node = *rnode;
184 	struct ipsec_softc *sc = (struct ipsec_softc *)node.sysctl_data;
185 
186 	pmtu = sc->ipsec_pmtu;
187 	node.sysctl_data = &pmtu;
188 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
189 	if (error || newp == NULL)
190 		return error;
191 
192 	switch (pmtu) {
193 	case IPSEC_PMTU_SYSDEFAULT:
194 	case IPSEC_PMTU_MINMTU:
195 	case IPSEC_PMTU_OUTERMTU:
196 		sc->ipsec_pmtu = pmtu;
197 		break;
198 	default:
199 		return EINVAL;
200 	}
201 
202 	return 0;
203 }
204 #endif
205 
206 static void
207 if_ipsec_sysctl_setup(void)
208 {
209 	if_ipsec_sysctl = NULL;
210 
211 #ifdef INET6
212 	/*
213 	 * Previously create "net.inet6.ip6" entry to avoid sysctl_createv error.
214 	 */
215 	sysctl_createv(NULL, 0, NULL, NULL,
216 		       CTLFLAG_PERMANENT,
217 		       CTLTYPE_NODE, "inet6",
218 		       SYSCTL_DESCR("PF_INET6 related settings"),
219 		       NULL, 0, NULL, 0,
220 		       CTL_NET, PF_INET6, CTL_EOL);
221 	sysctl_createv(NULL, 0, NULL, NULL,
222 		       CTLFLAG_PERMANENT,
223 		       CTLTYPE_NODE, "ip6",
224 		       SYSCTL_DESCR("IPv6 related settings"),
225 		       NULL, 0, NULL, 0,
226 		       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);
227 
228 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
229 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
230 		       CTLTYPE_INT, "ipsecifhlim",
231 		       SYSCTL_DESCR("Default hop limit for a ipsec tunnel datagram"),
232 		       NULL, 0, &ip6_ipsec_hlim, 0,
233 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
234 		       IPV6CTL_IPSEC_HLIM, CTL_EOL);
235 
236 	sysctl_createv(&if_ipsec_sysctl, 0, NULL, NULL,
237 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
238 		       CTLTYPE_INT, "ipsecifpmtu",
239 		       SYSCTL_DESCR("Default Path MTU setting for ipsec tunnels"),
240 		       sysctl_if_ipsec_pmtu_global, 0, NULL, 0,
241 		       CTL_NET, PF_INET6, IPPROTO_IPV6,
242 		       IPV6CTL_IPSEC_PMTU, CTL_EOL);
243 #endif
244 }
245 
246 static void
247 if_ipsec_perif_sysctl_setup(struct sysctllog **clog, struct ipsec_softc *sc)
248 {
249 #ifdef INET6
250 	const struct sysctlnode *cnode, *rnode;
251 	struct ifnet *ifp = &sc->ipsec_if;
252 	const char *ifname = ifp->if_xname;
253 	int rv;
254 
255 	/*
256 	 * Already created in sysctl_sndq_setup().
257 	 */
258 	sysctl_createv(clog, 0, NULL, &rnode,
259 		       CTLFLAG_PERMANENT,
260 		       CTLTYPE_NODE, "interfaces",
261 		       SYSCTL_DESCR("Per-interface controls"),
262 		       NULL, 0, NULL, 0,
263 		       CTL_NET, CTL_CREATE, CTL_EOL);
264 	sysctl_createv(clog, 0, &rnode, &rnode,
265 		       CTLFLAG_PERMANENT,
266 		       CTLTYPE_NODE, ifname,
267 		       SYSCTL_DESCR("Interface controls"),
268 		       NULL, 0, NULL, 0,
269 		       CTL_CREATE, CTL_EOL);
270 
271 	rv = sysctl_createv(clog, 0, &rnode, &cnode,
272 			    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
273 			    CTLTYPE_INT, "pmtu",
274 			    SYSCTL_DESCR("Path MTU setting for this ipsec tunnel"),
275 			    sysctl_if_ipsec_pmtu_perif, 0, (void *)sc, 0,
276 			    CTL_CREATE, CTL_EOL);
277 	if (rv != 0)
278 		log(LOG_WARNING, "%s: could not attach sysctl node pmtu\n", ifname);
279 
280 	sc->ipsec_pmtu = IPSEC_PMTU_SYSDEFAULT;
281 #endif
282 }
283 
284 /* ARGSUSED */
285 void
286 ipsecifattach(int count)
287 {
288 
289 	mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
290 	LIST_INIT(&ipsec_softcs.list);
291 
292 	iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
293 
294 	if_ipsec_sysctl_setup();
295 
296 	if_clone_attach(&ipsec_cloner);
297 }
298 
299 static int
300 if_ipsec_clone_create(struct if_clone *ifc, int unit)
301 {
302 	struct ipsec_softc *sc;
303 	struct ipsec_variant *var;
304 	struct ifnet *ifp;
305 
306 	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
307 
308 	if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
309 
310 	if_ipsec_attach0(sc);
311 
312 	ifp = &sc->ipsec_if;
313 	if_ipsec_perif_sysctl_setup(&ifp->if_sysctl_log, sc);
314 
315 	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
316 	var->iv_softc = sc;
317 	psref_target_init(&var->iv_psref, iv_psref_class);
318 
319 	sc->ipsec_var = var;
320 	mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
321 	sc->ipsec_psz = pserialize_create();
322 	sc->ipsec_ro_percpu = if_tunnel_alloc_ro_percpu();
323 
324 	mutex_enter(&ipsec_softcs.lock);
325 	LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
326 	mutex_exit(&ipsec_softcs.lock);
327 	return 0;
328 }
329 
330 static void
331 if_ipsec_attach0(struct ipsec_softc *sc)
332 {
333 
334 	sc->ipsec_if.if_addrlen = 0;
335 	sc->ipsec_if.if_mtu    = IPSEC_MTU;
336 	sc->ipsec_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
337 	/* set ipsec(4) specific default flags. */
338 	sc->ipsec_if.if_flags  |= IFF_FWD_IPV6;
339 	sc->ipsec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE | IFEF_MPSAFE;
340 	sc->ipsec_if.if_ioctl  = if_ipsec_ioctl;
341 	sc->ipsec_if.if_output = if_ipsec_output;
342 	sc->ipsec_if.if_type   = IFT_IPSEC;
343 	sc->ipsec_if.if_dlt    = DLT_NULL;
344 	sc->ipsec_if.if_softc  = sc;
345 	IFQ_SET_READY(&sc->ipsec_if.if_snd);
346 	if_initialize(&sc->ipsec_if);
347 	if_alloc_sadl(&sc->ipsec_if);
348 	bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
349 	if_register(&sc->ipsec_if);
350 }
351 
352 static int
353 if_ipsec_clone_destroy(struct ifnet *ifp)
354 {
355 	struct ipsec_softc *sc = ifp->if_softc;
356 	struct ipsec_variant *var;
357 	int bound;
358 
359 	mutex_enter(&ipsec_softcs.lock);
360 	LIST_REMOVE(sc, ipsec_list);
361 	mutex_exit(&ipsec_softcs.lock);
362 
363 	bound = curlwp_bind();
364 	if_ipsec_delete_tunnel(&sc->ipsec_if);
365 	curlwp_bindx(bound);
366 
367 	bpf_detach(ifp);
368 	if_detach(ifp);
369 
370 	if_tunnel_free_ro_percpu(sc->ipsec_ro_percpu);
371 
372 	pserialize_destroy(sc->ipsec_psz);
373 	mutex_destroy(&sc->ipsec_lock);
374 
375 	var = sc->ipsec_var;
376 	kmem_free(var, sizeof(*var));
377 	kmem_free(sc, sizeof(*sc));
378 
379 	return 0;
380 }
381 
382 static inline bool
383 if_ipsec_nat_t(struct ipsec_softc *sc)
384 {
385 
386 	return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
387 }
388 
389 static inline bool
390 if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
391 {
392 
393 	return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
394 }
395 
396 int
397 if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
398 {
399 	uint8_t v;
400 	struct ipsec_softc *sc;
401 	struct ipsec_variant *var = NULL;
402 	struct psref psref;
403 	int ret = 0;
404 
405 	sc = arg;
406 	KASSERT(sc != NULL);
407 
408 	if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
409 		goto out;
410 
411 	var = if_ipsec_getref_variant(sc, &psref);
412 	if (if_ipsec_variant_is_unconfigured(var))
413 		goto out;
414 
415 	switch (proto) {
416 	case IPPROTO_IPV4:
417 	case IPPROTO_IPV6:
418 		break;
419 	default:
420 		goto out;
421 	}
422 
423 	m_copydata(m, 0, sizeof(v), &v);
424 	v = (v >> 4) & 0xff;  /* Get the IP version number. */
425 
426 	switch (v) {
427 #ifdef INET
428 	case IPVERSION: {
429 		struct ip ip;
430 
431 		if (m->m_pkthdr.len < sizeof(ip))
432 			goto out;
433 
434 		m_copydata(m, 0, sizeof(ip), &ip);
435 		if (var->iv_psrc->sa_family != AF_INET ||
436 		    var->iv_pdst->sa_family != AF_INET)
437 			goto out;
438 		ret = ipsecif4_encap_func(m, &ip, var);
439 		break;
440 	}
441 #endif
442 #ifdef INET6
443 	case (IPV6_VERSION >> 4): {
444 		struct ip6_hdr ip6;
445 
446 		if (m->m_pkthdr.len < sizeof(ip6))
447 			goto out;
448 
449 		m_copydata(m, 0, sizeof(ip6), &ip6);
450 		if (var->iv_psrc->sa_family != AF_INET6 ||
451 		    var->iv_pdst->sa_family != AF_INET6)
452 			goto out;
453 		ret = ipsecif6_encap_func(m, &ip6, var);
454 		break;
455 	}
456 #endif
457 	default:
458 		goto out;
459 	}
460 
461 out:
462 	if (var != NULL)
463 		if_ipsec_putref_variant(var, &psref);
464 	return ret;
465 }
466 
467 /*
468  * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
469  * We'll prevent this by introducing upper limit.
470  */
471 static int
472 if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
473 {
474 
475 	return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
476 }
477 
478 int
479 if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
480     const struct rtentry *rt)
481 {
482 	struct ipsec_softc *sc = ifp->if_softc;
483 	struct ipsec_variant *var;
484 	struct psref psref;
485 	int error;
486 	int bound;
487 
488 	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
489 
490 	error = if_ipsec_check_nesting(ifp, m);
491 	if (error) {
492 		m_freem(m);
493 		goto noref_end;
494 	}
495 
496 	if ((ifp->if_flags & IFF_UP) == 0) {
497 		m_freem(m);
498 		error = ENETDOWN;
499 		goto noref_end;
500 	}
501 
502 
503 	bound = curlwp_bind();
504 	var = if_ipsec_getref_variant(sc, &psref);
505 	if (if_ipsec_variant_is_unconfigured(var)) {
506 		m_freem(m);
507 		error = ENETDOWN;
508 		goto end;
509 	}
510 
511 	m->m_flags &= ~(M_BCAST|M_MCAST);
512 
513 	/* use DLT_NULL encapsulation here to pass inner af type */
514 	M_PREPEND(m, sizeof(int), M_DONTWAIT);
515 	if (!m) {
516 		error = ENOBUFS;
517 		goto end;
518 	}
519 	*mtod(m, int *) = dst->sa_family;
520 
521 #if INET6
522 	/* drop IPv6 packet if IFF_FWD_IPV6 is not set */
523 	if (dst->sa_family == AF_INET6 &&
524 	    !if_ipsec_fwd_ipv6(sc)) {
525 		/*
526 		 * IPv6 packet is not allowed to forward,that is not error.
527 		 */
528 		error = 0;
529 		IF_DROP(&ifp->if_snd);
530 		m_freem(m);
531 		goto end;
532 	}
533 #endif
534 
535 	error = if_ipsec_out_direct(var, m, dst->sa_family);
536 
537 end:
538 	if_ipsec_putref_variant(var, &psref);
539 	curlwp_bindx(bound);
540 noref_end:
541 	if (error)
542 		if_statinc(ifp, if_oerrors);
543 
544 	return error;
545 }
546 
547 static inline int
548 if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
549 {
550 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
551 	int error;
552 	int len;
553 
554 	KASSERT(if_ipsec_heldref_variant(var));
555 	KASSERT(var->iv_output != NULL);
556 
557 	len = m->m_pkthdr.len;
558 
559 	/* input DLT_NULL frame to BPF */
560 	bpf_mtap(ifp, m, BPF_D_OUT);
561 
562 	/* grab and chop off inner af type */
563 	/* XXX need pullup? */
564 	m_adj(m, sizeof(int));
565 
566 	error = var->iv_output(var, family, m);
567 	if (error)
568 		return error;
569 
570 	if_statadd2(ifp, if_opackets, 1, if_obytes, len);
571 
572 	return 0;
573 }
574 
575 void
576 if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
577 {
578 
579 	KASSERT(ifp != NULL);
580 
581 	m_set_rcvif(m, ifp);
582 
583 	bpf_mtap_af(ifp, af, m, BPF_D_IN);
584 
585 	if_ipsec_in_enqueue(m, af, ifp);
586 
587 	return;
588 }
589 
590 static inline void
591 if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
592 {
593 	pktqueue_t *pktq;
594 	int pktlen;
595 
596 	/*
597 	 * Put the packet to the network layer input queue according to the
598 	 * specified address family.
599 	 */
600 	switch (af) {
601 #ifdef INET
602 	case AF_INET:
603 		pktq = ip_pktq;
604 		break;
605 #endif
606 #ifdef INET6
607 	case AF_INET6:
608 		pktq = ip6_pktq;
609 		break;
610 #endif
611 	default:
612 		if_statinc(ifp, if_ierrors);
613 		m_freem(m);
614 		return;
615 	}
616 
617 #if 1
618 	const u_int h = curcpu()->ci_index;
619 #else
620 	const uint32_t h = pktq_rps_hash(m);
621 #endif
622 	pktlen = m->m_pkthdr.len;
623 	if (__predict_true(pktq_enqueue(pktq, m, h))) {
624 		if_statadd2(ifp, if_ibytes, pktlen, if_ipackets, 1);
625 	} else {
626 		if_statinc(ifp, if_iqdrops);
627 		m_freem(m);
628 	}
629 
630 	return;
631 }
632 
633 static inline int
634 if_ipsec_check_salen(struct sockaddr *addr)
635 {
636 
637 	switch (addr->sa_family) {
638 #ifdef INET
639 	case AF_INET:
640 		if (addr->sa_len != sizeof(struct sockaddr_in))
641 			return EINVAL;
642 		break;
643 #endif /* INET */
644 #ifdef INET6
645 	case AF_INET6:
646 		if (addr->sa_len != sizeof(struct sockaddr_in6))
647 			return EINVAL;
648 		break;
649 #endif /* INET6 */
650 	default:
651 		return EAFNOSUPPORT;
652 	}
653 
654 	return 0;
655 }
656 
657 /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
658 int
659 if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
660 {
661 	struct ipsec_softc *sc  = ifp->if_softc;
662 	struct ipsec_variant *var = NULL;
663 	struct ifreq     *ifr = (struct ifreq*)data;
664 	struct ifaddr    *ifa = (struct ifaddr*)data;
665 	int error = 0, size;
666 	struct sockaddr *dst, *src;
667 	u_long mtu;
668 	u_short oflags = ifp->if_flags;
669 	int bound;
670 	struct psref psref;
671 
672 	switch (cmd) {
673 	case SIOCINITIFADDR:
674 		ifp->if_flags |= IFF_UP;
675 		ifa->ifa_rtrequest = p2p_rtrequest;
676 		break;
677 
678 	case SIOCSIFDSTADDR:
679 		break;
680 
681 	case SIOCADDMULTI:
682 	case SIOCDELMULTI:
683 		switch (ifr->ifr_addr.sa_family) {
684 #ifdef INET
685 		case AF_INET:	/* IP supports Multicast */
686 			break;
687 #endif /* INET */
688 #ifdef INET6
689 		case AF_INET6:	/* IP6 supports Multicast */
690 			break;
691 #endif /* INET6 */
692 		default:  /* Other protocols doesn't support Multicast */
693 			error = EAFNOSUPPORT;
694 			break;
695 		}
696 		break;
697 
698 	case SIOCSIFMTU:
699 		mtu = ifr->ifr_mtu;
700 		if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
701 			return EINVAL;
702 		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
703 			error = 0;
704 		break;
705 
706 #ifdef INET
707 	case SIOCSIFPHYADDR:
708 #endif
709 #ifdef INET6
710 	case SIOCSIFPHYADDR_IN6:
711 #endif /* INET6 */
712 	case SIOCSLIFPHYADDR:
713 		switch (cmd) {
714 #ifdef INET
715 		case SIOCSIFPHYADDR:
716 			src = (struct sockaddr *)
717 				&(((struct in_aliasreq *)data)->ifra_addr);
718 			dst = (struct sockaddr *)
719 				&(((struct in_aliasreq *)data)->ifra_dstaddr);
720 			break;
721 #endif /* INET */
722 #ifdef INET6
723 		case SIOCSIFPHYADDR_IN6:
724 			src = (struct sockaddr *)
725 				&(((struct in6_aliasreq *)data)->ifra_addr);
726 			dst = (struct sockaddr *)
727 				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
728 			break;
729 #endif /* INET6 */
730 		case SIOCSLIFPHYADDR:
731 			src = (struct sockaddr *)
732 				&(((struct if_laddrreq *)data)->addr);
733 			dst = (struct sockaddr *)
734 				&(((struct if_laddrreq *)data)->dstaddr);
735 			break;
736 		default:
737 			return EINVAL;
738 		}
739 
740 		/* sa_family must be equal */
741 		if (src->sa_family != dst->sa_family)
742 			return EINVAL;
743 
744 		error = if_ipsec_check_salen(src);
745 		if (error)
746 			return error;
747 		error = if_ipsec_check_salen(dst);
748 		if (error)
749 			return error;
750 
751 		/* check sa_family looks sane for the cmd */
752 		switch (cmd) {
753 #ifdef INET
754 		case SIOCSIFPHYADDR:
755 			if (src->sa_family == AF_INET)
756 				break;
757 			return EAFNOSUPPORT;
758 #endif /* INET */
759 #ifdef INET6
760 		case SIOCSIFPHYADDR_IN6:
761 			if (src->sa_family == AF_INET6)
762 				break;
763 			return EAFNOSUPPORT;
764 #endif /* INET6 */
765 		case SIOCSLIFPHYADDR:
766 			/* checks done in the above */
767 			break;
768 		}
769 		/*
770 		 * calls if_ipsec_getref_variant() for other softcs to check
771 		 * address pair duplicattion
772 		 */
773 		bound = curlwp_bind();
774 		error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
775 		if (error)
776 			goto bad;
777 		curlwp_bindx(bound);
778 		break;
779 
780 	case SIOCDIFPHYADDR:
781 		bound = curlwp_bind();
782 		if_ipsec_delete_tunnel(&sc->ipsec_if);
783 		curlwp_bindx(bound);
784 		break;
785 
786 	case SIOCGIFPSRCADDR:
787 #ifdef INET6
788 	case SIOCGIFPSRCADDR_IN6:
789 #endif /* INET6 */
790 		bound = curlwp_bind();
791 		var = if_ipsec_getref_variant(sc, &psref);
792 		if (var->iv_psrc == NULL) {
793 			error = EADDRNOTAVAIL;
794 			goto bad;
795 		}
796 		src = var->iv_psrc;
797 		switch (cmd) {
798 #ifdef INET
799 		case SIOCGIFPSRCADDR:
800 			dst = &ifr->ifr_addr;
801 			size = sizeof(ifr->ifr_addr);
802 			break;
803 #endif /* INET */
804 #ifdef INET6
805 		case SIOCGIFPSRCADDR_IN6:
806 			dst = (struct sockaddr *)
807 				&(((struct in6_ifreq *)data)->ifr_addr);
808 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
809 			break;
810 #endif /* INET6 */
811 		default:
812 			error = EADDRNOTAVAIL;
813 			goto bad;
814 		}
815 		if (src->sa_len > size) {
816 			error = EINVAL;
817 			goto bad;
818 		}
819 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
820 		if (error)
821 			goto bad;
822 		if_ipsec_putref_variant(var, &psref);
823 		curlwp_bindx(bound);
824 		break;
825 
826 	case SIOCGIFPDSTADDR:
827 #ifdef INET6
828 	case SIOCGIFPDSTADDR_IN6:
829 #endif /* INET6 */
830 		bound = curlwp_bind();
831 		var = if_ipsec_getref_variant(sc, &psref);
832 		if (var->iv_pdst == NULL) {
833 			error = EADDRNOTAVAIL;
834 			goto bad;
835 		}
836 		src = var->iv_pdst;
837 		switch (cmd) {
838 #ifdef INET
839 		case SIOCGIFPDSTADDR:
840 			dst = &ifr->ifr_addr;
841 			size = sizeof(ifr->ifr_addr);
842 			break;
843 #endif /* INET */
844 #ifdef INET6
845 		case SIOCGIFPDSTADDR_IN6:
846 			dst = (struct sockaddr *)
847 				&(((struct in6_ifreq *)data)->ifr_addr);
848 			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
849 			break;
850 #endif /* INET6 */
851 		default:
852 			error = EADDRNOTAVAIL;
853 			goto bad;
854 		}
855 		if (src->sa_len > size) {
856 			error = EINVAL;
857 			goto bad;
858 		}
859 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
860 		if (error)
861 			goto bad;
862 		if_ipsec_putref_variant(var, &psref);
863 		curlwp_bindx(bound);
864 		break;
865 
866 	case SIOCGLIFPHYADDR:
867 		bound = curlwp_bind();
868 		var = if_ipsec_getref_variant(sc, &psref);
869 		if (if_ipsec_variant_is_unconfigured(var)) {
870 			error = EADDRNOTAVAIL;
871 			goto bad;
872 		}
873 
874 		/* copy src */
875 		src = var->iv_psrc;
876 		dst = (struct sockaddr *)
877 			&(((struct if_laddrreq *)data)->addr);
878 		size = sizeof(((struct if_laddrreq *)data)->addr);
879 		if (src->sa_len > size) {
880 			error = EINVAL;
881 			goto bad;
882 		}
883 		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
884 		if (error)
885 			goto bad;
886 
887 		/* copy dst */
888 		src = var->iv_pdst;
889 		dst = (struct sockaddr *)
890 			&(((struct if_laddrreq *)data)->dstaddr);
891 		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
892 		if (src->sa_len > size) {
893 			error = EINVAL;
894 			goto bad;
895 		}
896 		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
897 		if (error)
898 			goto bad;
899 		if_ipsec_putref_variant(var, &psref);
900 		curlwp_bindx(bound);
901 		break;
902 
903 	default:
904 		error = ifioctl_common(ifp, cmd, data);
905 		if (!error) {
906 			bound = curlwp_bind();
907 			error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
908 			if (error)
909 				goto bad;
910 			curlwp_bindx(bound);
911 		}
912 		break;
913 	}
914 	return error;
915 
916 bad:
917 	if (var != NULL)
918 		if_ipsec_putref_variant(var, &psref);
919 	curlwp_bindx(bound);
920 
921 	return error;
922 }
923 
924 struct encap_funcs {
925 #ifdef INET
926 	int (*ef_inet)(struct ipsec_variant *);
927 #endif
928 #ifdef INET6
929 	int (*ef_inet6)(struct ipsec_variant *);
930 #endif
931 };
932 
933 static struct encap_funcs ipsec_encap_attach = {
934 #ifdef INET
935 	.ef_inet = ipsecif4_attach,
936 #endif
937 #ifdef INET6
938 	.ef_inet6 = &ipsecif6_attach,
939 #endif
940 };
941 
942 static struct encap_funcs ipsec_encap_detach = {
943 #ifdef INET
944 	.ef_inet = ipsecif4_detach,
945 #endif
946 #ifdef INET6
947 	.ef_inet6 = &ipsecif6_detach,
948 #endif
949 };
950 
951 static int
952 if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
953 {
954 	int error;
955 
956 	KASSERT(var != NULL);
957 	KASSERT(if_ipsec_variant_is_configured(var));
958 
959 	switch (var->iv_psrc->sa_family) {
960 #ifdef INET
961 	case AF_INET:
962 		error = (funcs->ef_inet)(var);
963 		break;
964 #endif /* INET */
965 #ifdef INET6
966 	case AF_INET6:
967 		error = (funcs->ef_inet6)(var);
968 		break;
969 #endif /* INET6 */
970 	default:
971 		error = EINVAL;
972 		break;
973 	}
974 
975 	return error;
976 }
977 
978 static int
979 if_ipsec_encap_attach(struct ipsec_variant *var)
980 {
981 
982 	return if_ipsec_encap_common(var, &ipsec_encap_attach);
983 }
984 
985 static int
986 if_ipsec_encap_detach(struct ipsec_variant *var)
987 {
988 
989 	return if_ipsec_encap_common(var, &ipsec_encap_detach);
990 }
991 
992 /*
993  * Validate and set ipsec(4) I/F configurations.
994  *     (1) validate
995  *         (1-1) Check the argument src and dst address pair will change
996  *               configuration from current src and dst address pair.
997  *         (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
998  *               with argument src and dst address pair, except for NAT-T shared
999  *               tunnels.
1000  *     (2) set
1001  *         (2-1) Create variant for new configuration.
1002  *         (2-2) Create temporary "null" variant used to avoid to access
1003  *               dangling variant while SPs are deleted and added.
1004  *         (2-3) Swap variant include its SPs.
1005  *         (2-4) Cleanup last configurations.
1006  */
1007 static int
1008 if_ipsec_set_tunnel(struct ifnet *ifp,
1009     struct sockaddr *src, struct sockaddr *dst)
1010 {
1011 	struct ipsec_softc *sc = ifp->if_softc;
1012 	struct ipsec_softc *sc2;
1013 	struct ipsec_variant *ovar, *nvar, *nullvar;
1014 	struct sockaddr *osrc, *odst;
1015 	struct sockaddr *nsrc, *ndst;
1016 	in_port_t nsport = 0, ndport = 0;
1017 	int error;
1018 
1019 	error = encap_lock_enter();
1020 	if (error)
1021 		return error;
1022 
1023 	nsrc = sockaddr_dup(src, M_WAITOK);
1024 	ndst = sockaddr_dup(dst, M_WAITOK);
1025 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1026 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1027 
1028 	mutex_enter(&sc->ipsec_lock);
1029 
1030 	ovar = sc->ipsec_var;
1031 
1032 	switch(nsrc->sa_family) {
1033 #ifdef INET
1034 	case AF_INET:
1035 		nsport = satosin(src)->sin_port;
1036 		/*
1037 		 * avoid confuse SP when NAT-T disabled,
1038 		 * e.g.
1039 		 *     expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
1040 		 *     confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
1041 		 */
1042 		satosin(nsrc)->sin_port = 0;
1043 		ndport = satosin(dst)->sin_port;
1044 		satosin(ndst)->sin_port = 0;
1045 		break;
1046 #endif /* INET */
1047 #ifdef INET6
1048 	case AF_INET6:
1049 		nsport = satosin6(src)->sin6_port;
1050 		satosin6(nsrc)->sin6_port = 0;
1051 		ndport = satosin6(dst)->sin6_port;
1052 		satosin6(ndst)->sin6_port = 0;
1053 		break;
1054 #endif /* INET6 */
1055 	default:
1056 		log(LOG_DEBUG,
1057 		    "%s: Invalid address family: %d.\n",
1058 		    __func__, src->sa_family);
1059 		error = EINVAL;
1060 		goto out;
1061 	}
1062 
1063 	/*
1064 	 * (1-1) Check the argument src and dst address pair will change
1065 	 *       configuration from current src and dst address pair.
1066 	 */
1067 	if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
1068 	    (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
1069 	    (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
1070 		/* address and port pair not changed. */
1071 		error = 0;
1072 		goto out;
1073 	}
1074 
1075 	/*
1076 	 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
1077 	 *       with argument src and dst address pair, except for NAT-T shared
1078 	 *       tunnels.
1079 	 */
1080 	mutex_enter(&ipsec_softcs.lock);
1081 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1082 		struct ipsec_variant *var2;
1083 		struct psref psref;
1084 
1085 		if (sc2 == sc)
1086 			continue;
1087 		var2 = if_ipsec_getref_variant(sc2, &psref);
1088 		if (if_ipsec_variant_is_unconfigured(var2)) {
1089 			if_ipsec_putref_variant(var2, &psref);
1090 			continue;
1091 		}
1092 		if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
1093 			if_ipsec_putref_variant(var2, &psref);
1094 			continue; /* NAT-T shared tunnel */
1095 		}
1096 		if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
1097 		    sockaddr_cmp(var2->iv_psrc, src) == 0) {
1098 			if_ipsec_putref_variant(var2, &psref);
1099 			mutex_exit(&ipsec_softcs.lock);
1100 			error = EADDRNOTAVAIL;
1101 			goto out;
1102 		}
1103 
1104 		if_ipsec_putref_variant(var2, &psref);
1105 		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
1106 	}
1107 	mutex_exit(&ipsec_softcs.lock);
1108 
1109 
1110 	osrc = ovar->iv_psrc;
1111 	odst = ovar->iv_pdst;
1112 
1113 	/*
1114 	 * (2-1) Create ipsec_variant for new configuration.
1115 	 */
1116 	if_ipsec_copy_variant(nvar, ovar);
1117 	nvar->iv_psrc = nsrc;
1118 	nvar->iv_pdst = ndst;
1119 	nvar->iv_sport = nsport;
1120 	nvar->iv_dport = ndport;
1121 	nvar->iv_encap_cookie4 = NULL;
1122 	nvar->iv_encap_cookie6 = NULL;
1123 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1124 	error = if_ipsec_encap_attach(nvar);
1125 	if (error)
1126 		goto out;
1127 
1128 	/*
1129 	 * (2-2) Create temporary "null" variant.
1130 	 */
1131 	if_ipsec_copy_variant(nullvar, ovar);
1132 	if_ipsec_clear_config(nullvar);
1133 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1134 	/*
1135 	 * (2-3) Swap variant include its SPs.
1136 	 */
1137 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1138 	if (error) {
1139 		if_ipsec_encap_detach(nvar);
1140 		goto out;
1141 	}
1142 
1143 	mutex_exit(&sc->ipsec_lock);
1144 
1145 	/*
1146 	 * (2-4) Cleanup last configurations.
1147 	 */
1148 	if (if_ipsec_variant_is_configured(ovar))
1149 		if_ipsec_encap_detach(ovar);
1150 	encap_lock_exit();
1151 
1152 	if (osrc != NULL)
1153 		sockaddr_free(osrc);
1154 	if (odst != NULL)
1155 		sockaddr_free(odst);
1156 	kmem_free(ovar, sizeof(*ovar));
1157 	kmem_free(nullvar, sizeof(*nullvar));
1158 
1159 	return 0;
1160 
1161 out:
1162 	mutex_exit(&sc->ipsec_lock);
1163 	encap_lock_exit();
1164 
1165 	sockaddr_free(nsrc);
1166 	sockaddr_free(ndst);
1167 	kmem_free(nvar, sizeof(*nvar));
1168 	kmem_free(nullvar, sizeof(*nullvar));
1169 
1170 	return error;
1171 }
1172 
1173 /*
1174  * Validate and delete ipsec(4) I/F configurations.
1175  *     (1) validate
1176  *         (1-1) Check current src and dst address pair are null,
1177  *               which means the ipsec(4) I/F is already done deletetunnel.
1178  *     (2) delete
1179  *         (2-1) Create variant for deleted status.
1180  *         (2-2) Create temporary "null" variant used to avoid to access
1181  *               dangling variant while SPs are deleted and added.
1182  *               NOTE:
1183  *               The contents of temporary "null" variant equal to the variant
1184  *               of (2-1), however two psref_target_destroy() synchronization
1185  *               points are necessary to avoid to access dangling variant
1186  *               while SPs are deleted and added. To implement that simply,
1187  *               we use the same manner as if_ipsec_set_tunnel(), that is,
1188  *               create extra "null" variant and use it temporarily.
1189  *         (2-3) Swap variant include its SPs.
1190  *         (2-4) Cleanup last configurations.
1191  */
1192 static void
1193 if_ipsec_delete_tunnel(struct ifnet *ifp)
1194 {
1195 	struct ipsec_softc *sc = ifp->if_softc;
1196 	struct ipsec_variant *ovar, *nvar, *nullvar;
1197 	struct sockaddr *osrc, *odst;
1198 	int error;
1199 
1200 	error = encap_lock_enter();
1201 	if (error)
1202 		return;
1203 
1204 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1205 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1206 
1207 	mutex_enter(&sc->ipsec_lock);
1208 
1209 	ovar = sc->ipsec_var;
1210 	osrc = ovar->iv_psrc;
1211 	odst = ovar->iv_pdst;
1212 	/*
1213 	 * (1-1) Check current src and dst address pair are null,
1214 	 *       which means the ipsec(4) I/F is already done deletetunnel.
1215 	 */
1216 	if (osrc == NULL || odst == NULL) {
1217 		/* address pair not changed. */
1218 		mutex_exit(&sc->ipsec_lock);
1219 		encap_lock_exit();
1220 		kmem_free(nvar, sizeof(*nvar));
1221 		kmem_free(nullvar, sizeof(*nullvar));
1222 		return;
1223 	}
1224 
1225 	/*
1226 	 * (2-1) Create variant for deleted status.
1227 	 */
1228 	if_ipsec_copy_variant(nvar, ovar);
1229 	if_ipsec_clear_config(nvar);
1230 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1231 
1232 	/*
1233 	 * (2-2) Create temporary "null" variant used to avoid to access
1234 	 *       dangling variant while SPs are deleted and added.
1235 	 */
1236 	if_ipsec_copy_variant(nullvar, ovar);
1237 	if_ipsec_clear_config(nullvar);
1238 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1239 	/*
1240 	 * (2-3) Swap variant include its SPs.
1241 	 */
1242 	/* if_ipsec_update_variant() does not fail when delete SP only. */
1243 	(void)if_ipsec_update_variant(sc, nvar, nullvar);
1244 
1245 	mutex_exit(&sc->ipsec_lock);
1246 
1247 	/*
1248 	 * (2-4) Cleanup last configurations.
1249 	 */
1250 	if (if_ipsec_variant_is_configured(ovar))
1251 		if_ipsec_encap_detach(ovar);
1252 	encap_lock_exit();
1253 
1254 	sockaddr_free(osrc);
1255 	sockaddr_free(odst);
1256 	kmem_free(ovar, sizeof(*ovar));
1257 	kmem_free(nullvar, sizeof(*nullvar));
1258 }
1259 
1260 /*
1261  * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
1262  *     (1) check
1263  *         (1-1) Check flags are changed.
1264  *         (1-2) Check current src and dst address pair. If they are null,
1265  *               that means the ipsec(4) I/F is deletetunnel'ed, so it is
1266  *               not needed to update.
1267  *     (2) update
1268  *         (2-1) Create variant for new SPs.
1269  *         (2-2) Create temporary "null" variant used to avoid to access
1270  *               dangling variant while SPs are deleted and added.
1271  *               NOTE:
1272  *               There is the same problem as if_ipsec_delete_tunnel().
1273  *         (2-3) Swap variant include its SPs.
1274  *         (2-4) Cleanup unused configurations.
1275  *               NOTE: use the same encap_cookies.
1276  */
1277 static int
1278 if_ipsec_ensure_flags(struct ifnet *ifp, u_short oflags)
1279 {
1280 	struct ipsec_softc *sc = ifp->if_softc;
1281 	struct ipsec_variant *ovar, *nvar, *nullvar;
1282 	int error;
1283 
1284 	/*
1285 	 * (1) Check flags are changed.
1286 	 */
1287 	if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
1288 	    (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
1289 		return 0; /* flags not changed. */
1290 
1291 	error = encap_lock_enter();
1292 	if (error)
1293 		return error;
1294 
1295 	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
1296 	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
1297 
1298 	mutex_enter(&sc->ipsec_lock);
1299 
1300 	ovar = sc->ipsec_var;
1301 	/*
1302 	 * (1-2) Check current src and dst address pair.
1303 	 */
1304 	if (if_ipsec_variant_is_unconfigured(ovar)) {
1305 		/* nothing to do */
1306 		mutex_exit(&sc->ipsec_lock);
1307 		encap_lock_exit();
1308 		kmem_free(nvar, sizeof(*nvar));
1309 		kmem_free(nullvar, sizeof(*nullvar));
1310 		return 0;
1311 	}
1312 
1313 	/*
1314 	 * (2-1) Create variant for new SPs.
1315 	 */
1316 	if_ipsec_copy_variant(nvar, ovar);
1317 	psref_target_init(&nvar->iv_psref, iv_psref_class);
1318 	/*
1319 	 * (2-2) Create temporary "null" variant used to avoid to access
1320 	 *       dangling variant while SPs are deleted and added.
1321 	 */
1322 	if_ipsec_copy_variant(nullvar, ovar);
1323 	if_ipsec_clear_config(nullvar);
1324 	psref_target_init(&nullvar->iv_psref, iv_psref_class);
1325 	/*
1326 	 * (2-3) Swap variant include its SPs.
1327 	 */
1328 	error = if_ipsec_update_variant(sc, nvar, nullvar);
1329 
1330 	mutex_exit(&sc->ipsec_lock);
1331 	encap_lock_exit();
1332 
1333 	/*
1334 	 * (2-4) Cleanup unused configurations.
1335 	 */
1336 	if (!error)
1337 		kmem_free(ovar, sizeof(*ovar));
1338 	else
1339 		kmem_free(nvar, sizeof(*ovar));
1340 	kmem_free(nullvar, sizeof(*nullvar));
1341 
1342 	return error;
1343 }
1344 
1345 /*
1346  * SPD management
1347  */
1348 
1349 /*
1350  * Share SP set with other NAT-T ipsec(4) I/F(s).
1351  *     Return 1, when "var" shares SP set.
1352  *     Return 0, when "var" cannot share SP set.
1353  *
1354  * NOTE:
1355  * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
1356  * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
1357  * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
1358  * set_tunnel causes race.
1359  * Currently, (fortunately) encap_lock works as this global lock.
1360  */
1361 static int
1362 if_ipsec_share_sp(struct ipsec_variant *var)
1363 {
1364 	struct ipsec_softc *sc = var->iv_softc;
1365 	struct ipsec_softc *sc2;
1366 	struct ipsec_variant *var2;
1367 	struct psref psref;
1368 
1369 	KASSERT(encap_lock_held());
1370 	KASSERT(var->iv_psrc != NULL && var->iv_pdst != NULL);
1371 
1372 	mutex_enter(&ipsec_softcs.lock);
1373 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1374 		if (sc2 == sc)
1375 			continue;
1376 		var2 = if_ipsec_getref_variant(sc2, &psref);
1377 		if (if_ipsec_variant_is_unconfigured(var2)) {
1378 			if_ipsec_putref_variant(var2, &psref);
1379 			continue;
1380 		}
1381 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1382 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1383 			if_ipsec_putref_variant(var2, &psref);
1384 			continue;
1385 		}
1386 
1387 		break;
1388 	}
1389 	mutex_exit(&ipsec_softcs.lock);
1390 	if (sc2 == NULL)
1391 		return 0; /* not shared */
1392 
1393 	IV_SP_IN(var) = IV_SP_IN(var2);
1394 	IV_SP_IN6(var) = IV_SP_IN6(var2);
1395 	IV_SP_OUT(var) = IV_SP_OUT(var2);
1396 	IV_SP_OUT6(var) = IV_SP_OUT6(var2);
1397 
1398 	if_ipsec_putref_variant(var2, &psref);
1399 	return 1; /* shared */
1400 }
1401 
1402 /*
1403  * Unshare SP set with other NAT-T ipsec(4) I/F(s).
1404  *     Return 1, when "var" shared SP set, and then unshare them.
1405  *     Return 0, when "var" did not share SP set.
1406  *
1407  * NOTE:
1408  * See if_ipsec_share_sp()'s note.
1409  */
1410 static int
1411 if_ipsec_unshare_sp(struct ipsec_variant *var)
1412 {
1413 	struct ipsec_softc *sc = var->iv_softc;
1414 	struct ipsec_softc *sc2;
1415 	struct ipsec_variant *var2;
1416 	struct psref psref;
1417 
1418 	KASSERT(encap_lock_held());
1419 
1420 	if (!var->iv_pdst || !var->iv_psrc)
1421 		return 0;
1422 
1423 	mutex_enter(&ipsec_softcs.lock);
1424 	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
1425 		if (sc2 == sc)
1426 			continue;
1427 		var2 = if_ipsec_getref_variant(sc2, &psref);
1428 		if (!var2->iv_pdst || !var2->iv_psrc) {
1429 			if_ipsec_putref_variant(var2, &psref);
1430 			continue;
1431 		}
1432 		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
1433 		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
1434 			if_ipsec_putref_variant(var2, &psref);
1435 			continue;
1436 		}
1437 
1438 		break;
1439 	}
1440 	mutex_exit(&ipsec_softcs.lock);
1441 	if (sc2 == NULL)
1442 		return 0; /* not shared */
1443 
1444 	IV_SP_IN(var) = NULL;
1445 	IV_SP_IN6(var) = NULL;
1446 	IV_SP_OUT(var) = NULL;
1447 	IV_SP_OUT6(var) = NULL;
1448 	if_ipsec_putref_variant(var2, &psref);
1449 	return 1; /* shared */
1450 }
1451 
1452 static inline void
1453 if_ipsec_add_mbuf_optalign(struct mbuf *m0, void *data, size_t len, bool align)
1454 {
1455 	struct mbuf *m;
1456 
1457 	MGET(m, M_WAIT, MT_DATA);
1458 	if (align) {
1459 		m->m_len = PFKEY_ALIGN8(len);
1460 		memset(mtod(m, void *), 0, m->m_len);
1461 	} else
1462 		m->m_len = len;
1463 	m_copyback(m, 0, len, data);
1464 	m_cat(m0, m);
1465 }
1466 
1467 static inline void
1468 if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
1469 {
1470 
1471 	if_ipsec_add_mbuf_optalign(m0, data, len, true);
1472 }
1473 
1474 static inline void
1475 if_ipsec_add_mbuf_addr_port(struct mbuf *m0, struct sockaddr *addr, in_port_t port, bool align)
1476 {
1477 
1478 	if (port == 0) {
1479 		if_ipsec_add_mbuf_optalign(m0, addr, addr->sa_len, align);
1480 	} else {
1481 		union sockaddr_union addrport_u;
1482 		struct sockaddr *addrport = &addrport_u.sa;
1483 
1484 		if_ipsec_set_addr_port(addrport, addr, port);
1485 		if_ipsec_add_mbuf_optalign(m0, addrport, addrport->sa_len, align);
1486 	}
1487 }
1488 
1489 static inline void
1490 if_ipsec_add_pad(struct mbuf *m0, size_t len)
1491 {
1492 	struct mbuf *m;
1493 
1494 	if (len == 0)
1495 		return;
1496 
1497 	MGET(m, M_WAIT, MT_DATA);
1498 	m->m_len = len;
1499 	memset(mtod(m, void *), 0, m->m_len);
1500 	m_cat(m0, m);
1501 }
1502 
1503 static inline size_t
1504 if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
1505     int proto, uint16_t exttype)
1506 {
1507 	size_t size;
1508 
1509 	KASSERT(saaddr != NULL);
1510 	KASSERT(addr != NULL);
1511 
1512 	size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
1513 	saaddr->sadb_address_len = PFKEY_UNIT64(size);
1514 	saaddr->sadb_address_exttype = exttype;
1515 	saaddr->sadb_address_proto = proto;
1516 	switch (addr->sa_family) {
1517 #ifdef INET
1518 	case AF_INET:
1519 		saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
1520 		break;
1521 #endif /* INET */
1522 #ifdef INET6
1523 	case AF_INET6:
1524 		saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
1525 		break;
1526 #endif /* INET6 */
1527 	default:
1528 		log(LOG_DEBUG,
1529 		    "%s: Invalid address family: %d.\n",
1530 		    __func__, addr->sa_family);
1531 		break;
1532 	}
1533 	saaddr->sadb_address_reserved = 0;
1534 
1535 	return size;
1536 }
1537 
1538 static inline size_t
1539 if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
1540     int proto)
1541 {
1542 
1543 	return if_ipsec_set_sadb_addr(sasrc, src, proto,
1544 	    SADB_EXT_ADDRESS_SRC);
1545 }
1546 
1547 static inline size_t
1548 if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
1549     int proto)
1550 {
1551 
1552 	return if_ipsec_set_sadb_addr(sadst, dst, proto,
1553 	    SADB_EXT_ADDRESS_DST);
1554 }
1555 
1556 static inline size_t
1557 if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
1558     struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
1559     uint8_t level, struct sockaddr *src, struct sockaddr *dst)
1560 {
1561 	size_t size;
1562 
1563 	KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
1564 
1565 	size = sizeof(*xpl);
1566 	if (policy == IPSEC_POLICY_IPSEC) {
1567 		size += PFKEY_ALIGN8(sizeof(*xisr));
1568 		if (src != NULL && dst != NULL)
1569 			size += PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1570 	}
1571 	xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
1572 	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
1573 	xpl->sadb_x_policy_type = policy;
1574 	xpl->sadb_x_policy_dir = dir;
1575 	xpl->sadb_x_policy_reserved = 0;
1576 	xpl->sadb_x_policy_id = id;
1577 	xpl->sadb_x_policy_reserved2 = 0;
1578 
1579 	if (policy == IPSEC_POLICY_IPSEC) {
1580 		xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
1581 		if (src != NULL && dst != NULL)
1582 			xisr->sadb_x_ipsecrequest_len +=
1583 				PFKEY_ALIGN8(src->sa_len + dst->sa_len);
1584 		xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
1585 		xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
1586 		xisr->sadb_x_ipsecrequest_level = level;
1587 		if (level == IPSEC_LEVEL_UNIQUE)
1588 			xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
1589 		else
1590 			xisr->sadb_x_ipsecrequest_reqid = 0;
1591 	}
1592 
1593 	return size;
1594 }
1595 
1596 static inline void
1597 if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
1598 {
1599 
1600 	KASSERT(msg != NULL);
1601 
1602 	msg->sadb_msg_version = PF_KEY_V2;
1603 	msg->sadb_msg_type = msgtype;
1604 	msg->sadb_msg_errno = 0;
1605 	msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
1606 	msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
1607 	msg->sadb_msg_reserved = 0;
1608 	msg->sadb_msg_seq = 0; /* XXXX */
1609 	msg->sadb_msg_pid = 0; /* XXXX */
1610 }
1611 
1612 static inline void
1613 if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
1614 {
1615 
1616 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
1617 }
1618 
1619 static inline void
1620 if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
1621 {
1622 
1623 	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
1624 }
1625 
1626 static int
1627 if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
1628     in_port_t port)
1629 {
1630 	int error = 0;
1631 
1632 	sockaddr_copy(addrport, addr->sa_len, addr);
1633 
1634 	switch (addr->sa_family) {
1635 #ifdef INET
1636 	case AF_INET: {
1637 		struct sockaddr_in *sin = satosin(addrport);
1638 		sin->sin_port = port;
1639 		break;
1640 	}
1641 #endif /* INET */
1642 #ifdef INET6
1643 	case AF_INET6: {
1644 		struct sockaddr_in6 *sin6 = satosin6(addrport);
1645 		sin6->sin6_port = port;
1646 		break;
1647 	}
1648 #endif /* INET6 */
1649 	default:
1650 		log(LOG_DEBUG,
1651 		    "%s: Invalid address family: %d.\n",
1652 		    __func__, addr->sa_family);
1653 		error = EINVAL;
1654 	}
1655 
1656 	return error;
1657 }
1658 
1659 static struct secpolicy *
1660 if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
1661     struct sockaddr *dst, in_port_t dport,
1662     int dir, int proto, int level, u_int policy)
1663 {
1664 	struct sadb_msg msg;
1665 	struct sadb_address xsrc, xdst;
1666 	struct sadb_x_policy xpl;
1667 	struct sadb_x_ipsecrequest xisr;
1668 	size_t size;
1669 	size_t padlen;
1670 	uint16_t ext_msg_len = 0;
1671 	struct mbuf *m;
1672 
1673 	memset(&msg, 0, sizeof(msg));
1674 	memset(&xsrc, 0, sizeof(xsrc));
1675 	memset(&xdst, 0, sizeof(xdst));
1676 	memset(&xpl, 0, sizeof(xpl));
1677 	memset(&xisr, 0, sizeof(xisr));
1678 
1679 	MGETHDR(m, M_WAIT, MT_DATA);
1680 
1681 	size = if_ipsec_set_sadb_src(&xsrc, src, proto);
1682 	ext_msg_len += PFKEY_UNIT64(size);
1683 	size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
1684 	ext_msg_len += PFKEY_UNIT64(size);
1685 	size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level, NULL, NULL);
1686 	ext_msg_len += PFKEY_UNIT64(size);
1687 	if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
1688 
1689 	/* build PF_KEY message */
1690 
1691 	m->m_len = sizeof(msg);
1692 	m_copyback(m, 0, sizeof(msg), &msg);
1693 
1694 	if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
1695 	/*
1696 	 * secpolicy.spidx.{src, dst} must not be set port number,
1697 	 * even if it is used for NAT-T.
1698 	 */
1699 	if_ipsec_add_mbuf_addr_port(m, src, 0, true);
1700 	padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
1701 		- (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
1702 	if_ipsec_add_pad(m, padlen);
1703 
1704 	if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
1705 	/* ditto */
1706 	if_ipsec_add_mbuf_addr_port(m, dst, 0, true);
1707 	padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
1708 		- (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
1709 	if_ipsec_add_pad(m, padlen);
1710 
1711 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1712 	padlen = PFKEY_UNUNIT64(xpl.sadb_x_policy_len) - sizeof(xpl);
1713 	if (policy == IPSEC_POLICY_IPSEC) {
1714 		if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
1715 		padlen -= PFKEY_ALIGN8(sizeof(xisr));
1716 	}
1717 	if_ipsec_add_pad(m, padlen);
1718 
1719 	/* key_kpi_spdadd() has already done KEY_SP_REF(). */
1720 	return key_kpi_spdadd(m);
1721 }
1722 
1723 static int
1724 if_ipsec_add_sp(struct ipsec_variant *var,
1725     struct sockaddr *src, in_port_t sport,
1726     struct sockaddr *dst, in_port_t dport)
1727 {
1728 	struct ipsec_softc *sc = var->iv_softc;
1729 	int level;
1730 	u_int v6policy;
1731 
1732 	/*
1733 	 * must delete sp before add it.
1734 	 */
1735 	KASSERT(IV_SP_IN(var) == NULL);
1736 	KASSERT(IV_SP_OUT(var) == NULL);
1737 	KASSERT(IV_SP_IN6(var) == NULL);
1738 	KASSERT(IV_SP_OUT6(var) == NULL);
1739 
1740 	/*
1741 	 * can be shared?
1742 	 */
1743 	if (if_ipsec_share_sp(var))
1744 		return 0;
1745 
1746 	if (if_ipsec_nat_t(sc))
1747 		level = IPSEC_LEVEL_REQUIRE;
1748 	else
1749 		level = IPSEC_LEVEL_UNIQUE;
1750 
1751 	if (if_ipsec_fwd_ipv6(sc))
1752 		v6policy = IPSEC_POLICY_IPSEC;
1753 	else
1754 		v6policy = IPSEC_POLICY_DISCARD;
1755 
1756 	IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1757 	    IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1758 	if (IV_SP_IN(var) == NULL)
1759 		goto fail;
1760 	IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1761 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
1762 	if (IV_SP_OUT(var) == NULL)
1763 		goto fail;
1764 	IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
1765 	    IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
1766 	if (IV_SP_IN6(var) == NULL)
1767 		goto fail;
1768 	IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
1769 	    IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
1770 	if (IV_SP_OUT6(var) == NULL)
1771 		goto fail;
1772 
1773 	return 0;
1774 
1775 fail:
1776 	if (IV_SP_IN6(var) != NULL) {
1777 		if_ipsec_del_sp0(IV_SP_IN6(var));
1778 		IV_SP_IN6(var) = NULL;
1779 	}
1780 	if (IV_SP_OUT(var) != NULL) {
1781 		if_ipsec_del_sp0(IV_SP_OUT(var));
1782 		IV_SP_OUT(var) = NULL;
1783 	}
1784 	if (IV_SP_IN(var) != NULL) {
1785 		if_ipsec_del_sp0(IV_SP_IN(var));
1786 		IV_SP_IN(var) = NULL;
1787 	}
1788 
1789 	return EEXIST;
1790 }
1791 
1792 static int
1793 if_ipsec_del_sp0(struct secpolicy *sp)
1794 {
1795 	struct sadb_msg msg;
1796 	struct sadb_x_policy xpl;
1797 	size_t size;
1798 	uint16_t ext_msg_len = 0;
1799 	int error;
1800 	struct mbuf *m;
1801 
1802 	if (sp == NULL)
1803 		return 0;
1804 
1805 	memset(&msg, 0, sizeof(msg));
1806 	memset(&xpl, 0, sizeof(xpl));
1807 
1808 	MGETHDR(m, M_WAIT, MT_DATA);
1809 
1810 	size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0, NULL, NULL);
1811 	ext_msg_len += PFKEY_UNIT64(size);
1812 
1813 	if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
1814 
1815 	m->m_len = sizeof(msg);
1816 	m_copyback(m, 0, sizeof(msg), &msg);
1817 
1818 	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
1819 
1820 	/*  unreference correspond to key_kpi_spdadd(). */
1821 	KEY_SP_UNREF(&sp);
1822 	error = key_kpi_spddelete2(m);
1823 	if (error != 0) {
1824 		log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
1825 		    __func__, sp->id, error);
1826 	}
1827 	return error;
1828 }
1829 
1830 static void
1831 if_ipsec_del_sp(struct ipsec_variant *var)
1832 {
1833 
1834 	/* are the SPs shared? */
1835 	if (if_ipsec_unshare_sp(var))
1836 		return;
1837 
1838 	(void)if_ipsec_del_sp0(IV_SP_OUT(var));
1839 	(void)if_ipsec_del_sp0(IV_SP_IN(var));
1840 	(void)if_ipsec_del_sp0(IV_SP_OUT6(var));
1841 	(void)if_ipsec_del_sp0(IV_SP_IN6(var));
1842 	IV_SP_IN(var) = NULL;
1843 	IV_SP_IN6(var) = NULL;
1844 	IV_SP_OUT(var) = NULL;
1845 	IV_SP_OUT6(var) = NULL;
1846 }
1847 
1848 static int
1849 if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
1850     struct ipsec_variant *nvar)
1851 {
1852 	in_port_t src_port = 0;
1853 	in_port_t dst_port = 0;
1854 	struct sockaddr *src;
1855 	struct sockaddr *dst;
1856 	int error = 0;
1857 
1858 	KASSERT(mutex_owned(&sc->ipsec_lock));
1859 
1860 	if_ipsec_del_sp(ovar);
1861 
1862 	src = nvar->iv_psrc;
1863 	dst = nvar->iv_pdst;
1864 	if (if_ipsec_nat_t(sc)) {
1865 		/* NAT-T enabled */
1866 		src_port = nvar->iv_sport;
1867 		dst_port = nvar->iv_dport;
1868 	}
1869 	if (src && dst)
1870 		error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
1871 
1872 	return error;
1873 }
1874 
1875 /*
1876  * ipsec_variant and its SPs update API.
1877  *
1878  * Assumption:
1879  * reader side dereferences sc->ipsec_var in reader critical section only,
1880  * that is, all of reader sides do not reader the sc->ipsec_var after
1881  * pserialize_perform().
1882  */
1883 static int
1884 if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
1885     struct ipsec_variant *nullvar)
1886 {
1887 	struct ifnet *ifp = &sc->ipsec_if;
1888 	struct ipsec_variant *ovar = sc->ipsec_var;
1889 	int error;
1890 
1891 	KASSERT(mutex_owned(&sc->ipsec_lock));
1892 
1893 	/*
1894 	 * To keep consistency between ipsec(4) I/F settings and SPs,
1895 	 * we stop packet processing while replacing SPs, that is, we set
1896 	 * "null" config variant to sc->ipsec_var.
1897 	 */
1898 	atomic_store_release(&sc->ipsec_var, nullvar);
1899 	pserialize_perform(sc->ipsec_psz);
1900 	psref_target_destroy(&ovar->iv_psref, iv_psref_class);
1901 
1902 	error = if_ipsec_replace_sp(sc, ovar, nvar);
1903 	if (!error)
1904 		atomic_store_release(&sc->ipsec_var, nvar);
1905 	else {
1906 		psref_target_init(&ovar->iv_psref, iv_psref_class);
1907 		atomic_store_release(&sc->ipsec_var, ovar); /* rollback */
1908 	}
1909 
1910 	pserialize_perform(sc->ipsec_psz);
1911 	psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
1912 
1913 	if (if_ipsec_variant_is_configured(sc->ipsec_var))
1914 		ifp->if_flags |= IFF_RUNNING;
1915 	else
1916 		ifp->if_flags &= ~IFF_RUNNING;
1917 
1918 	return error;
1919 }
1920